From 68227c03cba84a24faf8a7277d2b1a03c8959c2c Mon Sep 17 00:00:00 2001
From: Mateusz Jurczyk <mjurczyk@google.com>
Date: Wed, 7 Jun 2017 12:26:49 +0200
Subject: [PATCH 001/281] fuse: initialize the flock flag in fuse_file on
 allocation

Before the patch, the flock flag could remain uninitialized for the
lifespan of the fuse_file allocation. Unless set to true in
fuse_file_flock(), it would remain in an indeterminate state until read in
an if statement in fuse_release_common(). This could consequently lead to
taking an unexpected branch in the code.

The bug was discovered by a runtime instrumentation designed to detect use
of uninitialized memory in the kernel.

Signed-off-by: Mateusz Jurczyk <mjurczyk@google.com>
Fixes: 37fb3a30b462 ("fuse: fix flock")
Cc: <stable@vger.kernel.org> # v3.1+
Signed-off-by: Miklos Szeredi <mszeredi@redhat.com>
---
 fs/fuse/file.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/fs/fuse/file.c b/fs/fuse/file.c
index 3ee4fdc3da9e..76eac2a554c4 100644
--- a/fs/fuse/file.c
+++ b/fs/fuse/file.c
@@ -46,7 +46,7 @@ struct fuse_file *fuse_file_alloc(struct fuse_conn *fc)
 {
 	struct fuse_file *ff;
 
-	ff = kmalloc(sizeof(struct fuse_file), GFP_KERNEL);
+	ff = kzalloc(sizeof(struct fuse_file), GFP_KERNEL);
 	if (unlikely(!ff))
 		return NULL;
 

From a3507e48d3f99a93a3056a34a5365f310434570f Mon Sep 17 00:00:00 2001
From: Akinobu Mita <akinobu.mita@gmail.com>
Date: Wed, 21 Jun 2017 01:46:37 +0900
Subject: [PATCH 002/281] iio: light: tsl2563: use correct event code

The TSL2563 driver provides three iio channels, two of which are raw ADC
channels (channel 0 and channel 1) in the device and the remaining one
is calculated by the two.  The ADC channel 0 only supports programmable
interrupt with threshold settings and this driver supports the event but
the generated event code does not contain the corresponding iio channel
type.

This is going to change userspace ABI.  Hopefully fixing this to be
what it should always have been won't break any userspace code.

Cc: Jonathan Cameron <jic23@kernel.org>
Signed-off-by: Akinobu Mita <akinobu.mita@gmail.com>
Cc: <Stable@vger.kernel.org>
Signed-off-by: Jonathan Cameron <jic23@kernel.org>
---
 drivers/iio/light/tsl2563.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/iio/light/tsl2563.c b/drivers/iio/light/tsl2563.c
index e7d4ea75e007..7599693f7fe9 100644
--- a/drivers/iio/light/tsl2563.c
+++ b/drivers/iio/light/tsl2563.c
@@ -626,7 +626,7 @@ static irqreturn_t tsl2563_event_handler(int irq, void *private)
 	struct tsl2563_chip *chip = iio_priv(dev_info);
 
 	iio_push_event(dev_info,
-		       IIO_UNMOD_EVENT_CODE(IIO_LIGHT,
+		       IIO_UNMOD_EVENT_CODE(IIO_INTENSITY,
 					    0,
 					    IIO_EV_TYPE_THRESH,
 					    IIO_EV_DIR_EITHER),

From add6e6ab3ee0e237822e0951476d3df039b49ec8 Mon Sep 17 00:00:00 2001
From: Lorenzo Bianconi <lorenzo.bianconi83@gmail.com>
Date: Thu, 22 Jun 2017 19:46:43 +0200
Subject: [PATCH 003/281] iio: pressure: st_pressure_core: disable multiread by
 default for LPS22HB

Set multiread variable to false for LPS22HB pressure sensor since
it is already enabled in CTRL_REG2. Previous configuration does not
cause any issue in I2C communication since SUB Msb has no meaning
whereas it breaks register address in SPI communication

Fixes: e039e2f5b4da (iio:st_pressure:initial lps22hb sensor support)
Signed-off-by: Lorenzo Bianconi <lorenzo.bianconi@st.com>
Cc: <Stable@vger.kernel.org>
Signed-off-by: Jonathan Cameron <jic23@kernel.org>
---
 drivers/iio/pressure/st_pressure_core.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/iio/pressure/st_pressure_core.c b/drivers/iio/pressure/st_pressure_core.c
index fd0edca0e656..99448012b47f 100644
--- a/drivers/iio/pressure/st_pressure_core.c
+++ b/drivers/iio/pressure/st_pressure_core.c
@@ -456,7 +456,7 @@ static const struct st_sensor_settings st_press_sensors_settings[] = {
 			.mask_od = 0x40,
 			.addr_stat_drdy = ST_SENSORS_DEFAULT_STAT_ADDR,
 		},
-		.multi_read_bit = true,
+		.multi_read_bit = false,
 		.bootime = 2,
 	},
 };

From be2ea53330f1865ac5b00377f2074f8e255bf74c Mon Sep 17 00:00:00 2001
From: Quentin Schulz <quentin.schulz@free-electrons.com>
Date: Mon, 3 Jul 2017 15:09:26 +0200
Subject: [PATCH 004/281] iio: adc: sun4i-gpadc-iio: fix unbalanced irq
 enable/disable
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

When initializing interrupts, the devm_request_any_context_irq will
enable them right away. An atomic flag was set in sun4i_irq_init and read
in the interrupt handler to make sure no unwanted interrupts were
handled. If an unwanted interrupt occurred, the handler would disable
the irq and return IRQ_HANDLED. However, at the end of sun4i_irq_init,
the irq would be disabled as well, resulting in an unbalanced enable
(since there are more disables than enables, the code enabling the
interrupt would never be called).

When reading the ADC or the temperature, the respective irq would be
enabled in the read function and disabled in the irq handler. In the
read function, we would wait for a completion (with a timeout) that will
be set in the irq handler. However, if the completion is never set or if
the wait for completion times out, the irq would not be disabled in the
read function resulting in an unbalanced enable once the read function
is called again (since there are 2+ enables for no disable).

Moving disable_irq from the irq handler to the read function get rid of
these two cases of unbalanced enable.

Fixes: d1caa9905538 ("iio: adc: add support for Allwinner SoCs ADC")

Reported-by: Andreas Färber <afaerber@suse.de>
Signed-off-by: Quentin Schulz <quentin.schulz@free-electrons.com>
Acked-by: Maxime Ripard <maxime.ripard@free-electrons.com>
Signed-off-by: Jonathan Cameron <jic23@kernel.org>
---
 drivers/iio/adc/sun4i-gpadc-iio.c | 3 +--
 1 file changed, 1 insertion(+), 2 deletions(-)

diff --git a/drivers/iio/adc/sun4i-gpadc-iio.c b/drivers/iio/adc/sun4i-gpadc-iio.c
index 81d4c39e414a..137f577d9432 100644
--- a/drivers/iio/adc/sun4i-gpadc-iio.c
+++ b/drivers/iio/adc/sun4i-gpadc-iio.c
@@ -256,6 +256,7 @@ static int sun4i_gpadc_read(struct iio_dev *indio_dev, int channel, int *val,
 
 err:
 	pm_runtime_put_autosuspend(indio_dev->dev.parent);
+	disable_irq(irq);
 	mutex_unlock(&info->mutex);
 
 	return ret;
@@ -365,7 +366,6 @@ static irqreturn_t sun4i_gpadc_temp_data_irq_handler(int irq, void *dev_id)
 		complete(&info->completion);
 
 out:
-	disable_irq_nosync(info->temp_data_irq);
 	return IRQ_HANDLED;
 }
 
@@ -380,7 +380,6 @@ static irqreturn_t sun4i_gpadc_fifo_data_irq_handler(int irq, void *dev_id)
 		complete(&info->completion);
 
 out:
-	disable_irq_nosync(info->fifo_data_irq);
 	return IRQ_HANDLED;
 }
 

From 631b010abc5b57009c6a8328f51492665f6ef310 Mon Sep 17 00:00:00 2001
From: Hans de Goede <hdegoede@redhat.com>
Date: Fri, 30 Jun 2017 19:42:54 +0200
Subject: [PATCH 005/281] iio: adc: Revert "axp288: Drop bogus
 AXP288_ADC_TS_PIN_CTRL register modifications"

Inheriting the ADC BIAS current settings from the BIOS instead of
hardcoding then causes the AXP288 to disable charging (I think it
mis-detects an overheated battery) on at least one model tablet.

So lets go back to hard coding the values, this reverts
commit fa2849e9649b ("iio: adc: axp288: Drop bogus
AXP288_ADC_TS_PIN_CTRL register modifications"), fixing charging not
working on the model tablet in question.

The exact cause is not fully understood, hence the revert to a known working
state.

Cc: stable@vger.kernel.org
Reported-by: Umberto Ixxo <sfumato1977@gmail.com>
Signed-off-by: Hans de Goede <hdegoede@redhat.com>
Signed-off-by: Jonathan Cameron <jic23@kernel.org>
---
 drivers/iio/adc/axp288_adc.c | 32 +++++++++++++++++++++++++++++++-
 1 file changed, 31 insertions(+), 1 deletion(-)

diff --git a/drivers/iio/adc/axp288_adc.c b/drivers/iio/adc/axp288_adc.c
index 64799ad7ebad..7fd24949c0c1 100644
--- a/drivers/iio/adc/axp288_adc.c
+++ b/drivers/iio/adc/axp288_adc.c
@@ -28,6 +28,8 @@
 #include <linux/iio/driver.h>
 
 #define AXP288_ADC_EN_MASK		0xF1
+#define AXP288_ADC_TS_PIN_GPADC		0xF2
+#define AXP288_ADC_TS_PIN_ON		0xF3
 
 enum axp288_adc_id {
 	AXP288_ADC_TS,
@@ -121,6 +123,16 @@ static int axp288_adc_read_channel(int *val, unsigned long address,
 	return IIO_VAL_INT;
 }
 
+static int axp288_adc_set_ts(struct regmap *regmap, unsigned int mode,
+				unsigned long address)
+{
+	/* channels other than GPADC do not need to switch TS pin */
+	if (address != AXP288_GP_ADC_H)
+		return 0;
+
+	return regmap_write(regmap, AXP288_ADC_TS_PIN_CTRL, mode);
+}
+
 static int axp288_adc_read_raw(struct iio_dev *indio_dev,
 			struct iio_chan_spec const *chan,
 			int *val, int *val2, long mask)
@@ -131,7 +143,16 @@ static int axp288_adc_read_raw(struct iio_dev *indio_dev,
 	mutex_lock(&indio_dev->mlock);
 	switch (mask) {
 	case IIO_CHAN_INFO_RAW:
+		if (axp288_adc_set_ts(info->regmap, AXP288_ADC_TS_PIN_GPADC,
+					chan->address)) {
+			dev_err(&indio_dev->dev, "GPADC mode\n");
+			ret = -EINVAL;
+			break;
+		}
 		ret = axp288_adc_read_channel(val, chan->address, info->regmap);
+		if (axp288_adc_set_ts(info->regmap, AXP288_ADC_TS_PIN_ON,
+						chan->address))
+			dev_err(&indio_dev->dev, "TS pin restore\n");
 		break;
 	default:
 		ret = -EINVAL;
@@ -141,6 +162,15 @@ static int axp288_adc_read_raw(struct iio_dev *indio_dev,
 	return ret;
 }
 
+static int axp288_adc_set_state(struct regmap *regmap)
+{
+	/* ADC should be always enabled for internal FG to function */
+	if (regmap_write(regmap, AXP288_ADC_TS_PIN_CTRL, AXP288_ADC_TS_PIN_ON))
+		return -EIO;
+
+	return regmap_write(regmap, AXP20X_ADC_EN1, AXP288_ADC_EN_MASK);
+}
+
 static const struct iio_info axp288_adc_iio_info = {
 	.read_raw = &axp288_adc_read_raw,
 	.driver_module = THIS_MODULE,
@@ -169,7 +199,7 @@ static int axp288_adc_probe(struct platform_device *pdev)
 	 * Set ADC to enabled state at all time, including system suspend.
 	 * otherwise internal fuel gauge functionality may be affected.
 	 */
-	ret = regmap_write(info->regmap, AXP20X_ADC_EN1, AXP288_ADC_EN_MASK);
+	ret = axp288_adc_set_state(axp20x->regmap);
 	if (ret) {
 		dev_err(&pdev->dev, "unable to enable ADC device\n");
 		return ret;

From a7b8829d242b1a58107e9c02b09e93aec446d55c Mon Sep 17 00:00:00 2001
From: Lorenzo Bianconi <lorenzo.bianconi83@gmail.com>
Date: Wed, 5 Jul 2017 20:30:01 +0200
Subject: [PATCH 006/281] iio: accel: st_accel: add SPI-3wire support

Add SPI Serial Interface Mode (SIM) register information
in st_sensor_settings look up table to support devices
(like LSM303AGR accel sensor) that allow just SPI-3wire
communication mode. SIM mode has to be configured before any
other operation since it is not enabled by default and the driver
is not able to read without that configuration

Whilst a fairly substantial patch, the actual logic is simple and it
is better to have the generic fix than a band aid.

Fixes: ddc05fa28606 (iio: st-accel: add support for lsm303agr accel)
Signed-off-by: Lorenzo Bianconi <lorenzo.bianconi@st.com>
Cc: <Stable@vger.kernel.org>
Signed-off-by: Jonathan Cameron <Jonathan.Cameron@huawei.com>
---
 drivers/iio/accel/st_accel_core.c             | 32 +++++++++++++++++++
 .../iio/common/st_sensors/st_sensors_core.c   | 29 +++++++++++++++++
 include/linux/iio/common/st_sensors.h         |  7 ++++
 .../linux/platform_data/st_sensors_pdata.h    |  2 ++
 4 files changed, 70 insertions(+)

diff --git a/drivers/iio/accel/st_accel_core.c b/drivers/iio/accel/st_accel_core.c
index 784670e2736b..2ee3ae11eb2a 100644
--- a/drivers/iio/accel/st_accel_core.c
+++ b/drivers/iio/accel/st_accel_core.c
@@ -166,6 +166,10 @@ static const struct st_sensor_settings st_accel_sensors_settings[] = {
 			.mask_ihl = 0x02,
 			.addr_stat_drdy = ST_SENSORS_DEFAULT_STAT_ADDR,
 		},
+		.sim = {
+			.addr = 0x23,
+			.value = BIT(0),
+		},
 		.multi_read_bit = true,
 		.bootime = 2,
 	},
@@ -234,6 +238,10 @@ static const struct st_sensor_settings st_accel_sensors_settings[] = {
 			.mask_od = 0x40,
 			.addr_stat_drdy = ST_SENSORS_DEFAULT_STAT_ADDR,
 		},
+		.sim = {
+			.addr = 0x23,
+			.value = BIT(0),
+		},
 		.multi_read_bit = true,
 		.bootime = 2,
 	},
@@ -316,6 +324,10 @@ static const struct st_sensor_settings st_accel_sensors_settings[] = {
 				.en_mask = 0x08,
 			},
 		},
+		.sim = {
+			.addr = 0x24,
+			.value = BIT(0),
+		},
 		.multi_read_bit = false,
 		.bootime = 2,
 	},
@@ -379,6 +391,10 @@ static const struct st_sensor_settings st_accel_sensors_settings[] = {
 			.mask_int1 = 0x04,
 			.addr_stat_drdy = ST_SENSORS_DEFAULT_STAT_ADDR,
 		},
+		.sim = {
+			.addr = 0x21,
+			.value = BIT(1),
+		},
 		.multi_read_bit = true,
 		.bootime = 2, /* guess */
 	},
@@ -437,6 +453,10 @@ static const struct st_sensor_settings st_accel_sensors_settings[] = {
 			.mask_od = 0x40,
 			.addr_stat_drdy = ST_SENSORS_DEFAULT_STAT_ADDR,
 		},
+		.sim = {
+			.addr = 0x21,
+			.value = BIT(7),
+		},
 		.multi_read_bit = false,
 		.bootime = 2, /* guess */
 	},
@@ -499,6 +519,10 @@ static const struct st_sensor_settings st_accel_sensors_settings[] = {
 			.addr_ihl = 0x22,
 			.mask_ihl = 0x80,
 		},
+		.sim = {
+			.addr = 0x23,
+			.value = BIT(0),
+		},
 		.multi_read_bit = true,
 		.bootime = 2,
 	},
@@ -547,6 +571,10 @@ static const struct st_sensor_settings st_accel_sensors_settings[] = {
 			.mask_int1 = 0x04,
 			.addr_stat_drdy = ST_SENSORS_DEFAULT_STAT_ADDR,
 		},
+		.sim = {
+			.addr = 0x21,
+			.value = BIT(1),
+		},
 		.multi_read_bit = false,
 		.bootime = 2,
 	},
@@ -614,6 +642,10 @@ static const struct st_sensor_settings st_accel_sensors_settings[] = {
 			.mask_ihl = 0x02,
 			.addr_stat_drdy = ST_SENSORS_DEFAULT_STAT_ADDR,
 		},
+		.sim = {
+			.addr = 0x23,
+			.value = BIT(0),
+		},
 		.multi_read_bit = true,
 		.bootime = 2,
 	},
diff --git a/drivers/iio/common/st_sensors/st_sensors_core.c b/drivers/iio/common/st_sensors/st_sensors_core.c
index 79c8c7cd70d5..6e6a1ecc99dd 100644
--- a/drivers/iio/common/st_sensors/st_sensors_core.c
+++ b/drivers/iio/common/st_sensors/st_sensors_core.c
@@ -550,6 +550,31 @@ out:
 }
 EXPORT_SYMBOL(st_sensors_read_info_raw);
 
+static int st_sensors_init_interface_mode(struct iio_dev *indio_dev,
+			const struct st_sensor_settings *sensor_settings)
+{
+	struct st_sensor_data *sdata = iio_priv(indio_dev);
+	struct device_node *np = sdata->dev->of_node;
+	struct st_sensors_platform_data *pdata;
+
+	pdata = (struct st_sensors_platform_data *)sdata->dev->platform_data;
+	if (((np && of_property_read_bool(np, "spi-3wire")) ||
+	     (pdata && pdata->spi_3wire)) && sensor_settings->sim.addr) {
+		int err;
+
+		err = sdata->tf->write_byte(&sdata->tb, sdata->dev,
+					    sensor_settings->sim.addr,
+					    sensor_settings->sim.value);
+		if (err < 0) {
+			dev_err(&indio_dev->dev,
+				"failed to init interface mode\n");
+			return err;
+		}
+	}
+
+	return 0;
+}
+
 int st_sensors_check_device_support(struct iio_dev *indio_dev,
 			int num_sensors_list,
 			const struct st_sensor_settings *sensor_settings)
@@ -574,6 +599,10 @@ int st_sensors_check_device_support(struct iio_dev *indio_dev,
 		return -ENODEV;
 	}
 
+	err = st_sensors_init_interface_mode(indio_dev, &sensor_settings[i]);
+	if (err < 0)
+		return err;
+
 	if (sensor_settings[i].wai_addr) {
 		err = sdata->tf->read_byte(&sdata->tb, sdata->dev,
 					   sensor_settings[i].wai_addr, &wai);
diff --git a/include/linux/iio/common/st_sensors.h b/include/linux/iio/common/st_sensors.h
index 497f2b3a5a62..97f1b465d04f 100644
--- a/include/linux/iio/common/st_sensors.h
+++ b/include/linux/iio/common/st_sensors.h
@@ -105,6 +105,11 @@ struct st_sensor_fullscale {
 	struct st_sensor_fullscale_avl fs_avl[ST_SENSORS_FULLSCALE_AVL_MAX];
 };
 
+struct st_sensor_sim {
+	u8 addr;
+	u8 value;
+};
+
 /**
  * struct st_sensor_bdu - ST sensor device block data update
  * @addr: address of the register.
@@ -197,6 +202,7 @@ struct st_sensor_transfer_function {
  * @bdu: Block data update register.
  * @das: Data Alignment Selection register.
  * @drdy_irq: Data ready register of the sensor.
+ * @sim: SPI serial interface mode register of the sensor.
  * @multi_read_bit: Use or not particular bit for [I2C/SPI] multi-read.
  * @bootime: samples to discard when sensor passing from power-down to power-up.
  */
@@ -213,6 +219,7 @@ struct st_sensor_settings {
 	struct st_sensor_bdu bdu;
 	struct st_sensor_das das;
 	struct st_sensor_data_ready_irq drdy_irq;
+	struct st_sensor_sim sim;
 	bool multi_read_bit;
 	unsigned int bootime;
 };
diff --git a/include/linux/platform_data/st_sensors_pdata.h b/include/linux/platform_data/st_sensors_pdata.h
index 79b0e4cdb814..f8274b0c6888 100644
--- a/include/linux/platform_data/st_sensors_pdata.h
+++ b/include/linux/platform_data/st_sensors_pdata.h
@@ -17,10 +17,12 @@
  *	Available only for accelerometer and pressure sensors.
  *	Accelerometer DRDY on LSM330 available only on pin 1 (see datasheet).
  * @open_drain: set the interrupt line to be open drain if possible.
+ * @spi_3wire: enable spi-3wire mode.
  */
 struct st_sensors_platform_data {
 	u8 drdy_int_pin;
 	bool open_drain;
+	bool spi_3wire;
 };
 
 #endif /* ST_SENSORS_PDATA_H */

From d466d3c1217406b14b834335b5b4b33c0d45bd09 Mon Sep 17 00:00:00 2001
From: Stefan-Gabriel Mirea <stefan-gabriel.mirea@nxp.com>
Date: Thu, 6 Jul 2017 10:06:41 +0100
Subject: [PATCH 007/281] iio: adc: vf610_adc: Fix VALT selection value for
 REFSEL bits

In order to select the alternate voltage reference pair (VALTH/VALTL), the
right value for the REFSEL field in the ADCx_CFG register is "01", leading
to 0x800 as register mask. See section 8.2.6.4 in the reference manual[1].

[1] http://www.nxp.com/docs/en/reference-manual/VFXXXRM.pdf

Fixes: a775427632fd ("iio:adc:imx: add Freescale Vybrid vf610 adc driver")
Signed-off-by: Stefan-Gabriel Mirea <stefan-gabriel.mirea@nxp.com>
Signed-off-by: Jonathan Cameron <Jonathan.Cameron@huawei.com>
---
 drivers/iio/adc/vf610_adc.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/iio/adc/vf610_adc.c b/drivers/iio/adc/vf610_adc.c
index 01fc76f7d660..c168e0db329a 100644
--- a/drivers/iio/adc/vf610_adc.c
+++ b/drivers/iio/adc/vf610_adc.c
@@ -77,7 +77,7 @@
 #define VF610_ADC_ADSTS_MASK		0x300
 #define VF610_ADC_ADLPC_EN		0x80
 #define VF610_ADC_ADHSC_EN		0x400
-#define VF610_ADC_REFSEL_VALT		0x100
+#define VF610_ADC_REFSEL_VALT		0x800
 #define VF610_ADC_REFSEL_VBG		0x1000
 #define VF610_ADC_ADTRG_HARD		0x2000
 #define VF610_ADC_AVGS_8		0x4000

From 3091141d7803608950adc001518ffb544a4e3308 Mon Sep 17 00:00:00 2001
From: Hans de Goede <hdegoede@redhat.com>
Date: Sat, 8 Jul 2017 15:11:57 +0200
Subject: [PATCH 008/281] iio: adc: axp288: Fix the GPADC pin reading often
 wrongly returning 0

I noticed in its DSDT that one of my tablets actually is using the GPADC
pin for temperature monitoring.

The whole axp288_adc_set_ts() function is a bit weird, in the past it was
removed because it seems to make no sense, then this was reverted because
of regressions.

So I decided to test the special GPADC pin handling on this tablet.
Conclusion: not only is axp288_adc_set_ts() necessary, we need to sleep a
bit after making the AXP288_ADC_TS_PIN_CTRL changes before sampling the
GPADC, otherwise it will often (about 80% of the time) read 0 instead of
its actual value.

It seems that there is only 1 bias current source and to be able to use it
for the GPIO0 pin in GPADC mode it must be temporarily turned off for the
TS pin, but the datasheet does not mention this.

This commit adds a sleep after disabling the TS pin bias current,
fixing the GPADC more often then not wrongly returning 0.

Signed-off-by: Hans de Goede <hdegoede@redhat.com>
Signed-off-by: Jonathan Cameron <Jonathan.Cameron@huawei.com>
---
 drivers/iio/adc/axp288_adc.c | 12 +++++++++++-
 1 file changed, 11 insertions(+), 1 deletion(-)

diff --git a/drivers/iio/adc/axp288_adc.c b/drivers/iio/adc/axp288_adc.c
index 7fd24949c0c1..462a99c13e7a 100644
--- a/drivers/iio/adc/axp288_adc.c
+++ b/drivers/iio/adc/axp288_adc.c
@@ -126,11 +126,21 @@ static int axp288_adc_read_channel(int *val, unsigned long address,
 static int axp288_adc_set_ts(struct regmap *regmap, unsigned int mode,
 				unsigned long address)
 {
+	int ret;
+
 	/* channels other than GPADC do not need to switch TS pin */
 	if (address != AXP288_GP_ADC_H)
 		return 0;
 
-	return regmap_write(regmap, AXP288_ADC_TS_PIN_CTRL, mode);
+	ret = regmap_write(regmap, AXP288_ADC_TS_PIN_CTRL, mode);
+	if (ret)
+		return ret;
+
+	/* When switching to the GPADC pin give things some time to settle */
+	if (mode == AXP288_ADC_TS_PIN_GPADC)
+		usleep_range(6000, 10000);
+
+	return 0;
 }
 
 static int axp288_adc_read_raw(struct iio_dev *indio_dev,

From 105967ad68d2eb1a041bc041f9cf96af2a653b65 Mon Sep 17 00:00:00 2001
From: Arnd Bergmann <arnd@arndb.de>
Date: Fri, 14 Jul 2017 11:31:03 +0200
Subject: [PATCH 009/281] staging:iio:resolver:ad2s1210 fix negative
 IIO_ANGL_VEL read

gcc-7 points out an older regression:

drivers/staging/iio/resolver/ad2s1210.c: In function 'ad2s1210_read_raw':
drivers/staging/iio/resolver/ad2s1210.c:515:42: error: '<<' in boolean context, did you mean '<' ? [-Werror=int-in-bool-context]

The original code had 'unsigned short' here, but incorrectly got
converted to 'bool'. This reverts the regression and uses a normal
type instead.

Fixes: 29148543c521 ("staging:iio:resolver:ad2s1210 minimal chan spec conversion.")
Cc: stable@vger.kernel.org
Signed-off-by: Arnd Bergmann <arnd@arndb.de>
Signed-off-by: Jonathan Cameron <Jonathan.Cameron@huawei.com>
---
 drivers/staging/iio/resolver/ad2s1210.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/staging/iio/resolver/ad2s1210.c b/drivers/staging/iio/resolver/ad2s1210.c
index a6a8393d6664..3e00df74b18c 100644
--- a/drivers/staging/iio/resolver/ad2s1210.c
+++ b/drivers/staging/iio/resolver/ad2s1210.c
@@ -472,7 +472,7 @@ static int ad2s1210_read_raw(struct iio_dev *indio_dev,
 			     long m)
 {
 	struct ad2s1210_state *st = iio_priv(indio_dev);
-	bool negative;
+	u16 negative;
 	int ret = 0;
 	u16 pos;
 	s16 vel;

From d1510a2e5ab6cb3a67f1c55ca5e7a6d2c6dec340 Mon Sep 17 00:00:00 2001
From: Chris Gorman <chrisjohgorman@gmail.com>
Date: Wed, 12 Jul 2017 13:31:26 -0400
Subject: [PATCH 010/281] i2c: mux: pinctrl: mention correct module name in
 Kconfig help text

Kconfig says the resulting module is pinctrl-i2cmux, but the module when
built is i2c-mux-pinctrl.

Fixes: ae58d1e40698 ("i2c: Add generic I2C multiplexer using pinctrl API")
Signed-off-by: Chris Gorman <chrisjohgorman@gmail.com>
Signed-off-by: Peter Rosin <peda@axentia.se>
---
 drivers/i2c/muxes/Kconfig | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/i2c/muxes/Kconfig b/drivers/i2c/muxes/Kconfig
index 2c64d0e0740f..17121329bb79 100644
--- a/drivers/i2c/muxes/Kconfig
+++ b/drivers/i2c/muxes/Kconfig
@@ -83,7 +83,7 @@ config I2C_MUX_PINCTRL
 	  different sets of pins at run-time.
 
 	  This driver can also be built as a module. If so, the module will be
-	  called pinctrl-i2cmux.
+	  called i2c-mux-pinctrl.
 
 config I2C_MUX_REG
 	tristate "Register-based I2C multiplexer"

From 9585e340db9f6cc1c0928d82c3a23cc4460f0a3f Mon Sep 17 00:00:00 2001
From: Stefan Triller <github@stefantriller.de>
Date: Fri, 30 Jun 2017 14:44:03 +0200
Subject: [PATCH 011/281] USB: serial: cp210x: add support for Qivicon USB
 ZigBee dongle

The German Telekom offers a ZigBee USB Stick under the brand name Qivicon
for their SmartHome Home Base in its 1. Generation. The productId is not
known by the according kernel module, this patch adds support for it.

Signed-off-by: Stefan Triller <github@stefantriller.de>
Reviewed-by: Frans Klaver <fransklaver@gmail.com>
Cc: stable <stable@vger.kernel.org>
Signed-off-by: Johan Hovold <johan@kernel.org>
---
 drivers/usb/serial/cp210x.c | 1 +
 1 file changed, 1 insertion(+)

diff --git a/drivers/usb/serial/cp210x.c b/drivers/usb/serial/cp210x.c
index f64e914a8985..2d945c9f975c 100644
--- a/drivers/usb/serial/cp210x.c
+++ b/drivers/usb/serial/cp210x.c
@@ -142,6 +142,7 @@ static const struct usb_device_id id_table[] = {
 	{ USB_DEVICE(0x10C4, 0x8998) }, /* KCF Technologies PRN */
 	{ USB_DEVICE(0x10C4, 0x8A2A) }, /* HubZ dual ZigBee and Z-Wave dongle */
 	{ USB_DEVICE(0x10C4, 0x8A5E) }, /* CEL EM3588 ZigBee USB Stick Long Range */
+	{ USB_DEVICE(0x10C4, 0x8B34) }, /* Qivicon ZigBee USB Radio Stick */
 	{ USB_DEVICE(0x10C4, 0xEA60) }, /* Silicon Labs factory default */
 	{ USB_DEVICE(0x10C4, 0xEA61) }, /* Silicon Labs factory default */
 	{ USB_DEVICE(0x10C4, 0xEA70) }, /* Silicon Labs factory default */

From e59e18989c68a8d7941005f81ad6abc4ca682de0 Mon Sep 17 00:00:00 2001
From: Hans de Goede <hdegoede@redhat.com>
Date: Thu, 13 Jul 2017 15:13:41 +0200
Subject: [PATCH 012/281] iio: accel: bmc150: Always restore device to normal
 mode after suspend-resume

After probe we would put the device in normal mode, after a runtime
suspend-resume we would put it back in normal mode. But for a regular
suspend-resume we would only put it back in normal mode if triggers
or events have been requested.  This is not consistent and breaks
reading raw values after a suspend-resume.

This commit changes the regular resume path to also unconditionally put
the device back in normal mode, fixing reading of raw values not working
after a regular suspend-resume cycle.

Signed-off-by: Hans de Goede <hdegoede@redhat.com>
Reviewed-by: Srinivas Pandruvada <srinivas.pandruvada@linux.intel.com>
Cc: <Stable@vger.kernel.org>
Signed-off-by: Jonathan Cameron <Jonathan.Cameron@huawei.com>
---
 drivers/iio/accel/bmc150-accel-core.c | 9 +--------
 1 file changed, 1 insertion(+), 8 deletions(-)

diff --git a/drivers/iio/accel/bmc150-accel-core.c b/drivers/iio/accel/bmc150-accel-core.c
index 6b5d3be283c4..807299dd45eb 100644
--- a/drivers/iio/accel/bmc150-accel-core.c
+++ b/drivers/iio/accel/bmc150-accel-core.c
@@ -193,7 +193,6 @@ struct bmc150_accel_data {
 	struct regmap *regmap;
 	int irq;
 	struct bmc150_accel_interrupt interrupts[BMC150_ACCEL_INTERRUPTS];
-	atomic_t active_intr;
 	struct bmc150_accel_trigger triggers[BMC150_ACCEL_TRIGGERS];
 	struct mutex mutex;
 	u8 fifo_mode, watermark;
@@ -493,11 +492,6 @@ static int bmc150_accel_set_interrupt(struct bmc150_accel_data *data, int i,
 		goto out_fix_power_state;
 	}
 
-	if (state)
-		atomic_inc(&data->active_intr);
-	else
-		atomic_dec(&data->active_intr);
-
 	return 0;
 
 out_fix_power_state:
@@ -1710,8 +1704,7 @@ static int bmc150_accel_resume(struct device *dev)
 	struct bmc150_accel_data *data = iio_priv(indio_dev);
 
 	mutex_lock(&data->mutex);
-	if (atomic_read(&data->active_intr))
-		bmc150_accel_set_mode(data, BMC150_ACCEL_SLEEP_MODE_NORMAL, 0);
+	bmc150_accel_set_mode(data, BMC150_ACCEL_SLEEP_MODE_NORMAL, 0);
 	bmc150_accel_fifo_set_mode(data);
 	mutex_unlock(&data->mutex);
 

From 3aa0907675a38498d8f2d343e94207ad28a117cf Mon Sep 17 00:00:00 2001
From: Boris Brezillon <boris.brezillon@free-electrons.com>
Date: Mon, 17 Jul 2017 20:20:08 +0200
Subject: [PATCH 013/281] mtd: nand: atmel: Fix DT backward compatibility in
 pmecc.c

PMECC caps extraction from old DT bindings is broken, thus leading to
erroneous EL registers offset, which in turn make HW ECC unusable on
sama5d2 when old bindings are in use.

Passing the NAND dev node instead of the NFC node to of_match_node()
solves the problem.

Signed-off-by: Boris Brezillon <boris.brezillon@free-electrons.com>
Fixes: f88fc122cc34 ("mtd: nand: Cleanup/rework the atmel_nand driver")
Cc: <stable@vger.kernel.org>
Tested-by: Romain Izard <romain.izard.pro@gmail.com>
---
 drivers/mtd/nand/atmel/pmecc.c | 21 ++++++---------------
 1 file changed, 6 insertions(+), 15 deletions(-)

diff --git a/drivers/mtd/nand/atmel/pmecc.c b/drivers/mtd/nand/atmel/pmecc.c
index 55a8ee5306ea..8c210a5776bc 100644
--- a/drivers/mtd/nand/atmel/pmecc.c
+++ b/drivers/mtd/nand/atmel/pmecc.c
@@ -945,6 +945,7 @@ struct atmel_pmecc *devm_atmel_pmecc_get(struct device *userdev)
 		 */
 		struct platform_device *pdev = to_platform_device(userdev);
 		const struct atmel_pmecc_caps *caps;
+		const struct of_device_id *match;
 
 		/* No PMECC engine available. */
 		if (!of_property_read_bool(userdev->of_node,
@@ -953,21 +954,11 @@ struct atmel_pmecc *devm_atmel_pmecc_get(struct device *userdev)
 
 		caps = &at91sam9g45_caps;
 
-		/*
-		 * Try to find the NFC subnode and extract the associated caps
-		 * from there.
-		 */
-		np = of_find_compatible_node(userdev->of_node, NULL,
-					     "atmel,sama5d3-nfc");
-		if (np) {
-			const struct of_device_id *match;
-
-			match = of_match_node(atmel_pmecc_legacy_match, np);
-			if (match && match->data)
-				caps = match->data;
-
-			of_node_put(np);
-		}
+		/* Find the caps associated to the NAND dev node. */
+		match = of_match_node(atmel_pmecc_legacy_match,
+				      userdev->of_node);
+		if (match && match->data)
+			caps = match->data;
 
 		pmecc = atmel_pmecc_create(pdev, caps, 1, 2);
 	}

From b82d6cb41eb54bf1c0e5d2ae73f49a4dff54c54b Mon Sep 17 00:00:00 2001
From: Tobias Klauser <tklauser@distanz.ch>
Date: Fri, 21 Jul 2017 10:35:31 +0200
Subject: [PATCH 014/281] xtensa: remove wrapper header for asm/device.h

xtensa's asm/device.h is a verbatim copy of asm-generic/device.h and
does not add any arch specific extensions. Thus, use the asm-generic
header directly.

Acked-by: Max Filippov <jcmvbkbc@gmail.com>
Signed-off-by: Tobias Klauser <tklauser@distanz.ch>
Signed-off-by: Max Filippov <jcmvbkbc@gmail.com>
---
 arch/xtensa/include/asm/Kbuild   |  1 +
 arch/xtensa/include/asm/device.h | 15 ---------------
 2 files changed, 1 insertion(+), 15 deletions(-)
 delete mode 100644 arch/xtensa/include/asm/device.h

diff --git a/arch/xtensa/include/asm/Kbuild b/arch/xtensa/include/asm/Kbuild
index 2d716ebc5a5e..a2fdabfce43a 100644
--- a/arch/xtensa/include/asm/Kbuild
+++ b/arch/xtensa/include/asm/Kbuild
@@ -1,5 +1,6 @@
 generic-y += bug.h
 generic-y += clkdev.h
+generic-y += device.h
 generic-y += div64.h
 generic-y += dma-contiguous.h
 generic-y += emergency-restart.h
diff --git a/arch/xtensa/include/asm/device.h b/arch/xtensa/include/asm/device.h
deleted file mode 100644
index 1deeb8ebbb1b..000000000000
--- a/arch/xtensa/include/asm/device.h
+++ /dev/null
@@ -1,15 +0,0 @@
-/*
- * Arch specific extensions to struct device
- *
- * This file is released under the GPLv2
- */
-#ifndef _ASM_XTENSA_DEVICE_H
-#define _ASM_XTENSA_DEVICE_H
-
-struct dev_archdata {
-};
-
-struct pdev_archdata {
-};
-
-#endif /* _ASM_XTENSA_DEVICE_H */

From 536dcc9c34035778892a7e3cbf45166ce73f8d34 Mon Sep 17 00:00:00 2001
From: Tobias Klauser <tklauser@distanz.ch>
Date: Fri, 21 Jul 2017 10:44:07 +0200
Subject: [PATCH 015/281] xtensa: remove wrapper header for asm/param.h

xtensa's asm/device.h is a verbatim copy of asm-generic/device.h and
does not add any arch specific extensions. Thus, use the asm-generic
header directly.

Signed-off-by: Tobias Klauser <tklauser@distanz.ch>
Signed-off-by: Max Filippov <jcmvbkbc@gmail.com>
---
 arch/xtensa/include/asm/Kbuild  |  1 +
 arch/xtensa/include/asm/param.h | 18 ------------------
 2 files changed, 1 insertion(+), 18 deletions(-)
 delete mode 100644 arch/xtensa/include/asm/param.h

diff --git a/arch/xtensa/include/asm/Kbuild b/arch/xtensa/include/asm/Kbuild
index a2fdabfce43a..dff7cc39437c 100644
--- a/arch/xtensa/include/asm/Kbuild
+++ b/arch/xtensa/include/asm/Kbuild
@@ -18,6 +18,7 @@ generic-y += local.h
 generic-y += local64.h
 generic-y += mcs_spinlock.h
 generic-y += mm-arch-hooks.h
+generic-y += param.h
 generic-y += percpu.h
 generic-y += preempt.h
 generic-y += rwsem.h
diff --git a/arch/xtensa/include/asm/param.h b/arch/xtensa/include/asm/param.h
deleted file mode 100644
index 0a70e780ef2a..000000000000
--- a/arch/xtensa/include/asm/param.h
+++ /dev/null
@@ -1,18 +0,0 @@
-/*
- * include/asm-xtensa/param.h
- *
- * This file is subject to the terms and conditions of the GNU General Public
- * License.  See the file "COPYING" in the main directory of this archive
- * for more details.
- *
- * Copyright (C) 2001 - 2005 Tensilica Inc.
- */
-#ifndef _XTENSA_PARAM_H
-#define _XTENSA_PARAM_H
-
-#include <uapi/asm/param.h>
-
-# define HZ		CONFIG_HZ	/* internal timer frequency */
-# define USER_HZ	100		/* for user interfaces in "ticks" */
-# define CLOCKS_PER_SEC (USER_HZ)	/* frequnzy at which times() counts */
-#endif /* _XTENSA_PARAM_H */

From edf3f301db7af7e784d06f7059dfc8a69359af13 Mon Sep 17 00:00:00 2001
From: Feras Daoud <ferasda@mellanox.com>
Date: Mon, 10 Jul 2017 18:45:41 +0300
Subject: [PATCH 016/281] IB/ipoib: Fix race between light events and interface
 restart

A potential race between light_event and interface restart
may attach multicast group to an already attached QP.

Scenario:
light_event flow goes through ipoib_mcast_dev_flush function,
if a context switch occurs before calling ipoib_mcast_remove_list,
then we may face a situation where the broadcast of the priv is null
and the corresponding QP is not detached yet.
If an "interface restart" runs during the previous context switch,
the following scenario occurs:
When the device goes up, ipoib_ib_dev_up function will be called,
it will send a new registration request to the broadcast group and then
attach the group to the QP that was not detached before.

     IPOIB_FLUSH_LIGHT                                          INTERFACE RESTART

    __ipoib_ib_dev_flush                                                |
        |                                                               |
        |                                                               |
        |                                                               |
    ipoib_mcast_dev_flush                                               |
    Move mcast list and broadcast to remove_list                        |
        |                                                               |
        |                                                               |
    Context Switch-->                                                   |
        |                                                       ipoib_ib_dev_down
        |                                                               |
        |                                                               |
        |                                                       ipoib_ib_dev_up
        |                                                               |
        |                                                               |
        |                                                       ipoib_mcast_join_task
        |                                                       allocate new broadcast
        |                                                               |
        |                                                               |
        |                                                       Attach QP to multicast group
        |                                                               |
        |                                                               |
        |                                                       <--Context Switch
    ipoib_mcast_leave
    Detach QP from multicast group

Signed-off-by: Feras Daoud <ferasda@mellanox.com>
Signed-off-by: Leon Romanovsky <leon@kernel.org>
---
 drivers/infiniband/ulp/ipoib/ipoib.h           | 1 +
 drivers/infiniband/ulp/ipoib/ipoib_main.c      | 1 +
 drivers/infiniband/ulp/ipoib/ipoib_multicast.c | 2 ++
 3 files changed, 4 insertions(+)

diff --git a/drivers/infiniband/ulp/ipoib/ipoib.h b/drivers/infiniband/ulp/ipoib/ipoib.h
index ff50a7bd66d8..7ac25059c40f 100644
--- a/drivers/infiniband/ulp/ipoib/ipoib.h
+++ b/drivers/infiniband/ulp/ipoib/ipoib.h
@@ -336,6 +336,7 @@ struct ipoib_dev_priv {
 	unsigned long flags;
 
 	struct rw_semaphore vlan_rwsem;
+	struct mutex mcast_mutex;
 
 	struct rb_root  path_tree;
 	struct list_head path_list;
diff --git a/drivers/infiniband/ulp/ipoib/ipoib_main.c b/drivers/infiniband/ulp/ipoib/ipoib_main.c
index 4ce315c92b48..144187b407bd 100644
--- a/drivers/infiniband/ulp/ipoib/ipoib_main.c
+++ b/drivers/infiniband/ulp/ipoib/ipoib_main.c
@@ -1877,6 +1877,7 @@ static void ipoib_build_priv(struct net_device *dev)
 	priv->dev = dev;
 	spin_lock_init(&priv->lock);
 	init_rwsem(&priv->vlan_rwsem);
+	mutex_init(&priv->mcast_mutex);
 
 	INIT_LIST_HEAD(&priv->path_list);
 	INIT_LIST_HEAD(&priv->child_intfs);
diff --git a/drivers/infiniband/ulp/ipoib/ipoib_multicast.c b/drivers/infiniband/ulp/ipoib/ipoib_multicast.c
index 057f58e6afca..0a0b2ce45cbc 100644
--- a/drivers/infiniband/ulp/ipoib/ipoib_multicast.c
+++ b/drivers/infiniband/ulp/ipoib/ipoib_multicast.c
@@ -838,6 +838,7 @@ void ipoib_mcast_dev_flush(struct net_device *dev)
 	struct ipoib_mcast *mcast, *tmcast;
 	unsigned long flags;
 
+	mutex_lock(&priv->mcast_mutex);
 	ipoib_dbg_mcast(priv, "flushing multicast list\n");
 
 	spin_lock_irqsave(&priv->lock, flags);
@@ -865,6 +866,7 @@ void ipoib_mcast_dev_flush(struct net_device *dev)
 			wait_for_completion(&mcast->done);
 
 	ipoib_mcast_remove_list(&remove_list);
+	mutex_unlock(&priv->mcast_mutex);
 }
 
 static int ipoib_mcast_addr_is_valid(const u8 *addr, const u8 *broadcast)

From 6bdc8de2e86e717124a715ecc480892a2c331ff5 Mon Sep 17 00:00:00 2001
From: Erez Shitrit <erezsh@mellanox.com>
Date: Wed, 12 Jul 2017 10:40:25 +0300
Subject: [PATCH 017/281] IB/ipoib: Use cancel_delayed_work_sync when needed

The work mcast_task can re-queue itself, so instead of doing
cancel && flush_workqueue, that still can leave a queued task
on the air, use cancel_delayed_work_sync.

Also, no need to use lock over the cancel, the original lock was
due to bit assignment setting (IPOIB_MCAST_RUN) that is not in use
anymore.

Signed-off-by: Erez Shitrit <erezsh@mellanox.com>
Signed-off-by: Leon Romanovsky <leon@kernel.org>
---
 drivers/infiniband/ulp/ipoib/ipoib_multicast.c | 7 +------
 1 file changed, 1 insertion(+), 6 deletions(-)

diff --git a/drivers/infiniband/ulp/ipoib/ipoib_multicast.c b/drivers/infiniband/ulp/ipoib/ipoib_multicast.c
index 0a0b2ce45cbc..f80bf0f5d7cf 100644
--- a/drivers/infiniband/ulp/ipoib/ipoib_multicast.c
+++ b/drivers/infiniband/ulp/ipoib/ipoib_multicast.c
@@ -684,15 +684,10 @@ void ipoib_mcast_start_thread(struct net_device *dev)
 int ipoib_mcast_stop_thread(struct net_device *dev)
 {
 	struct ipoib_dev_priv *priv = ipoib_priv(dev);
-	unsigned long flags;
 
 	ipoib_dbg_mcast(priv, "stopping multicast thread\n");
 
-	spin_lock_irqsave(&priv->lock, flags);
-	cancel_delayed_work(&priv->mcast_task);
-	spin_unlock_irqrestore(&priv->lock, flags);
-
-	flush_workqueue(priv->wq);
+	cancel_delayed_work_sync(&priv->mcast_task);
 
 	return 0;
 }

From a08e1120627f72e9ed7c291e3b9f8dd29c1513ab Mon Sep 17 00:00:00 2001
From: Erez Shitrit <erezsh@mellanox.com>
Date: Wed, 12 Jul 2017 13:11:54 +0300
Subject: [PATCH 018/281] IB/ipoib: Make sure no in-flight joins while leaving
 that mcast

While cleaning neighs and there is a send-only mcast neigh, the driver
should wait to finish its join process before trying to remove it.

Without this patch, we will see messages like: "ipoib_mcast_leave on an
in-flight join" and unexpected results in the join_complete.

Signed-off-by: Erez Shitrit <erezsh@mellanox.com>
Signed-off-by: Leon Romanovsky <leon@kernel.org>
---
 .../infiniband/ulp/ipoib/ipoib_multicast.c    | 24 +++++++------------
 1 file changed, 8 insertions(+), 16 deletions(-)

diff --git a/drivers/infiniband/ulp/ipoib/ipoib_multicast.c b/drivers/infiniband/ulp/ipoib/ipoib_multicast.c
index f80bf0f5d7cf..93e149efc1f5 100644
--- a/drivers/infiniband/ulp/ipoib/ipoib_multicast.c
+++ b/drivers/infiniband/ulp/ipoib/ipoib_multicast.c
@@ -743,6 +743,14 @@ void ipoib_mcast_remove_list(struct list_head *remove_list)
 {
 	struct ipoib_mcast *mcast, *tmcast;
 
+	/*
+	 * make sure the in-flight joins have finished before we attempt
+	 * to leave
+	 */
+	list_for_each_entry_safe(mcast, tmcast, remove_list, list)
+		if (test_bit(IPOIB_MCAST_FLAG_BUSY, &mcast->flags))
+			wait_for_completion(&mcast->done);
+
 	list_for_each_entry_safe(mcast, tmcast, remove_list, list) {
 		ipoib_mcast_leave(mcast->dev, mcast);
 		ipoib_mcast_free(mcast);
@@ -852,14 +860,6 @@ void ipoib_mcast_dev_flush(struct net_device *dev)
 
 	spin_unlock_irqrestore(&priv->lock, flags);
 
-	/*
-	 * make sure the in-flight joins have finished before we attempt
-	 * to leave
-	 */
-	list_for_each_entry_safe(mcast, tmcast, &remove_list, list)
-		if (test_bit(IPOIB_MCAST_FLAG_BUSY, &mcast->flags))
-			wait_for_completion(&mcast->done);
-
 	ipoib_mcast_remove_list(&remove_list);
 	mutex_unlock(&priv->mcast_mutex);
 }
@@ -979,14 +979,6 @@ void ipoib_mcast_restart_task(struct work_struct *work)
 	netif_addr_unlock(dev);
 	local_irq_restore(flags);
 
-	/*
-	 * make sure the in-flight joins have finished before we attempt
-	 * to leave
-	 */
-	list_for_each_entry_safe(mcast, tmcast, &remove_list, list)
-		if (test_bit(IPOIB_MCAST_FLAG_BUSY, &mcast->flags))
-			wait_for_completion(&mcast->done);
-
 	ipoib_mcast_remove_list(&remove_list);
 
 	/*

From 11f74b40359b19f760964e71d04882a6caf530cc Mon Sep 17 00:00:00 2001
From: Alex Vesker <valex@mellanox.com>
Date: Thu, 13 Jul 2017 11:27:12 +0300
Subject: [PATCH 019/281] IB/ipoib: Prevent setting negative values to
 max_nonsrq_conn_qp

Don't allow negative values to max_nonsrq_conn_qp. There is no functional
impact on a negative value but it is logicically incorrect.

Fixes: 68e995a29572 ("IPoIB/cm: Add connected mode support for devices without SRQs")
Signed-off-by: Alex Vesker <valex@mellanox.com>
Signed-off-by: Leon Romanovsky <leon@kernel.org>
---
 drivers/infiniband/ulp/ipoib/ipoib_main.c | 1 +
 1 file changed, 1 insertion(+)

diff --git a/drivers/infiniband/ulp/ipoib/ipoib_main.c b/drivers/infiniband/ulp/ipoib/ipoib_main.c
index 144187b407bd..8b7ec15a9d6e 100644
--- a/drivers/infiniband/ulp/ipoib/ipoib_main.c
+++ b/drivers/infiniband/ulp/ipoib/ipoib_main.c
@@ -2366,6 +2366,7 @@ static int __init ipoib_init_module(void)
 	ipoib_sendq_size = max3(ipoib_sendq_size, 2 * MAX_SEND_CQE, IPOIB_MIN_QUEUE_SIZE);
 #ifdef CONFIG_INFINIBAND_IPOIB_CM
 	ipoib_max_conn_qp = min(ipoib_max_conn_qp, IPOIB_CM_MAX_CONN_QP);
+	ipoib_max_conn_qp = max(ipoib_max_conn_qp, 0);
 #endif
 
 	/*

From d2e46fccc3e3d73a741efe433f00960331280696 Mon Sep 17 00:00:00 2001
From: Feras Daoud <ferasda@mellanox.com>
Date: Sun, 16 Jul 2017 11:33:01 +0300
Subject: [PATCH 020/281] IB/ipoib: Set IPOIB_NEIGH_TBL_FLUSH after flushed
 completion initialization

Set IPOIB_NEIGH_TBL_FLUSH bit after initializing the neighbor
flushed completion, otherwise the garbage collector may signal
a completion while it is not initialized yet.

Fixes: b63b70d87741 ("IPoIB: Use a private hash table for path lookup in xmit path")
Signed-off-by: Feras Daoud <ferasda@mellanox.com>
Signed-off-by: Alex Vesker <valex@mellanox.com>
Signed-off-by: Leon Romanovsky <leon@kernel.org>
---
 drivers/infiniband/ulp/ipoib/ipoib_main.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/infiniband/ulp/ipoib/ipoib_main.c b/drivers/infiniband/ulp/ipoib/ipoib_main.c
index 8b7ec15a9d6e..f4403c52cd67 100644
--- a/drivers/infiniband/ulp/ipoib/ipoib_main.c
+++ b/drivers/infiniband/ulp/ipoib/ipoib_main.c
@@ -1560,6 +1560,7 @@ static void ipoib_flush_neighs(struct ipoib_dev_priv *priv)
 	int i, wait_flushed = 0;
 
 	init_completion(&priv->ntbl.flushed);
+	set_bit(IPOIB_NEIGH_TBL_FLUSH, &priv->flags);
 
 	spin_lock_irqsave(&priv->lock, flags);
 
@@ -1604,7 +1605,6 @@ static void ipoib_neigh_hash_uninit(struct net_device *dev)
 
 	ipoib_dbg(priv, "ipoib_neigh_hash_uninit\n");
 	init_completion(&priv->ntbl.deleted);
-	set_bit(IPOIB_NEIGH_TBL_FLUSH, &priv->flags);
 
 	/* Stop GC if called at init fail need to cancel work */
 	stopped = test_and_set_bit(IPOIB_STOP_NEIGH_GC, &priv->flags);

From 4829d964dfb027558c732cfa0d13b716ab3f0838 Mon Sep 17 00:00:00 2001
From: Alex Vesker <valex@mellanox.com>
Date: Mon, 10 Jul 2017 18:12:43 +0300
Subject: [PATCH 021/281] IB/ipoib: Add multicast packets statistics

Update the multicast counter when multicast packets are received and
provide this information through ethtool support.

Signed-off-by: Alex Vesker <valex@mellanox.com>
Signed-off-by: Leon Romanovsky <leon@kernel.org>
---
 drivers/infiniband/ulp/ipoib/ipoib_ethtool.c | 3 ++-
 drivers/infiniband/ulp/ipoib/ipoib_ib.c      | 2 ++
 2 files changed, 4 insertions(+), 1 deletion(-)

diff --git a/drivers/infiniband/ulp/ipoib/ipoib_ethtool.c b/drivers/infiniband/ulp/ipoib/ipoib_ethtool.c
index 7871379342f4..184a22f48027 100644
--- a/drivers/infiniband/ulp/ipoib/ipoib_ethtool.c
+++ b/drivers/infiniband/ulp/ipoib/ipoib_ethtool.c
@@ -52,7 +52,8 @@ static const struct ipoib_stats ipoib_gstrings_stats[] = {
 	IPOIB_NETDEV_STAT(tx_bytes),
 	IPOIB_NETDEV_STAT(tx_errors),
 	IPOIB_NETDEV_STAT(rx_dropped),
-	IPOIB_NETDEV_STAT(tx_dropped)
+	IPOIB_NETDEV_STAT(tx_dropped),
+	IPOIB_NETDEV_STAT(multicast),
 };
 
 #define IPOIB_GLOBAL_STATS_LEN	ARRAY_SIZE(ipoib_gstrings_stats)
diff --git a/drivers/infiniband/ulp/ipoib/ipoib_ib.c b/drivers/infiniband/ulp/ipoib/ipoib_ib.c
index 57a9655e844d..02eda1f53a67 100644
--- a/drivers/infiniband/ulp/ipoib/ipoib_ib.c
+++ b/drivers/infiniband/ulp/ipoib/ipoib_ib.c
@@ -256,6 +256,8 @@ static void ipoib_ib_handle_rx_wc(struct net_device *dev, struct ib_wc *wc)
 
 	++dev->stats.rx_packets;
 	dev->stats.rx_bytes += skb->len;
+	if (skb->pkt_type == PACKET_MULTICAST)
+		dev->stats.multicast++;
 
 	skb->dev = dev;
 	if ((dev->features & NETIF_F_RXCSUM) &&

From eb54714ddcb2462d4d4b8aa78d028b61e217a835 Mon Sep 17 00:00:00 2001
From: Feras Daoud <ferasda@mellanox.com>
Date: Sun, 2 Jul 2017 15:05:59 +0300
Subject: [PATCH 022/281] IB/ipoib: Add get statistics support to SRIOV VF

Add SRIOV VF support to get traffic statistics.

Signed-off-by: Feras Daoud <ferasda@mellanox.com>
Signed-off-by: Leon Romanovsky <leon@kernel.org>
---
 drivers/infiniband/ulp/ipoib/ipoib_main.c | 1 +
 1 file changed, 1 insertion(+)

diff --git a/drivers/infiniband/ulp/ipoib/ipoib_main.c b/drivers/infiniband/ulp/ipoib/ipoib_main.c
index f4403c52cd67..24fa87fe0952 100644
--- a/drivers/infiniband/ulp/ipoib/ipoib_main.c
+++ b/drivers/infiniband/ulp/ipoib/ipoib_main.c
@@ -1847,6 +1847,7 @@ static const struct net_device_ops ipoib_netdev_ops_vf = {
 	.ndo_tx_timeout		 = ipoib_timeout,
 	.ndo_set_rx_mode	 = ipoib_set_mcast_list,
 	.ndo_get_iflink		 = ipoib_get_iflink,
+	.ndo_get_stats64	 = ipoib_get_stats,
 };
 
 void ipoib_setup_common(struct net_device *dev)

From dc892e17bbae670a3d7aa6ab8bd1033b15b24645 Mon Sep 17 00:00:00 2001
From: Leon Romanovsky <leonro@mellanox.com>
Date: Thu, 13 Jul 2017 13:34:19 +0300
Subject: [PATCH 023/281] IB/ipoib: Clean error paths in add port

Refactor error paths in ipoib_add_port() function. The code flow
ensures that the function terminates on every error flow and it makes
redundant all "else" cases.

The functions are called during the flow are returning "result < 0", in
case of error, so there is no need to check it explicitly.

Fixes: 58e9cc90cda7 ("IB/IPoIB: Fix bad error flow in ipoib_add_port()")
Signed-off-by: Leon Romanovsky <leonro@mellanox.com>
---
 drivers/infiniband/ulp/ipoib/ipoib_main.c | 14 ++++++++------
 1 file changed, 8 insertions(+), 6 deletions(-)

diff --git a/drivers/infiniband/ulp/ipoib/ipoib_main.c b/drivers/infiniband/ulp/ipoib/ipoib_main.c
index 24fa87fe0952..6c77df34869d 100644
--- a/drivers/infiniband/ulp/ipoib/ipoib_main.c
+++ b/drivers/infiniband/ulp/ipoib/ipoib_main.c
@@ -2175,14 +2175,14 @@ static struct net_device *ipoib_add_port(const char *format,
 	priv->dev->dev_id = port - 1;
 
 	result = ib_query_port(hca, port, &attr);
-	if (!result)
-		priv->max_ib_mtu = ib_mtu_enum_to_int(attr.max_mtu);
-	else {
+	if (result) {
 		printk(KERN_WARNING "%s: ib_query_port %d failed\n",
 		       hca->name, port);
 		goto device_init_failed;
 	}
 
+	priv->max_ib_mtu = ib_mtu_enum_to_int(attr.max_mtu);
+
 	/* MTU will be reset when mcast join happens */
 	priv->dev->mtu  = IPOIB_UD_MTU(priv->max_ib_mtu);
 	priv->mcast_mtu  = priv->admin_mtu = priv->dev->mtu;
@@ -2213,12 +2213,14 @@ static struct net_device *ipoib_add_port(const char *format,
 		printk(KERN_WARNING "%s: ib_query_gid port %d failed (ret = %d)\n",
 		       hca->name, port, result);
 		goto device_init_failed;
-	} else
-		memcpy(priv->dev->dev_addr + 4, priv->local_gid.raw, sizeof (union ib_gid));
+	}
+
+	memcpy(priv->dev->dev_addr + 4, priv->local_gid.raw,
+	       sizeof(union ib_gid));
 	set_bit(IPOIB_FLAG_DEV_ADDR_SET, &priv->flags);
 
 	result = ipoib_dev_init(priv->dev, hca, port);
-	if (result < 0) {
+	if (result) {
 		printk(KERN_WARNING "%s: failed to initialize port %d (ret = %d)\n",
 		       hca->name, port, result);
 		goto device_init_failed;

From 1b355094b308f3377c8f574ce86135ee159c6285 Mon Sep 17 00:00:00 2001
From: Leon Romanovsky <leonro@mellanox.com>
Date: Sat, 15 Jul 2017 16:26:55 +0300
Subject: [PATCH 024/281] IB/ipoib: Remove double pointer assigning

There is no need to assign "p" pointer twice.

This patch fixes the following smatch warning:
drivers/infiniband/ulp/ipoib/ipoib_cm.c:517 ipoib_cm_rx_handler() warn:
	missing break? reassigning 'p->id'

Fixes: 839fcaba355a ("IPoIB: Connected mode experimental support")
Signed-off-by: Leon Romanovsky <leonro@mellanox.com>
---
 drivers/infiniband/ulp/ipoib/ipoib_cm.c | 1 -
 1 file changed, 1 deletion(-)

diff --git a/drivers/infiniband/ulp/ipoib/ipoib_cm.c b/drivers/infiniband/ulp/ipoib/ipoib_cm.c
index f87d104837dc..d69410c2ed97 100644
--- a/drivers/infiniband/ulp/ipoib/ipoib_cm.c
+++ b/drivers/infiniband/ulp/ipoib/ipoib_cm.c
@@ -511,7 +511,6 @@ static int ipoib_cm_rx_handler(struct ib_cm_id *cm_id,
 	case IB_CM_REQ_RECEIVED:
 		return ipoib_cm_req_handler(cm_id, event);
 	case IB_CM_DREQ_RECEIVED:
-		p = cm_id->context;
 		ib_send_cm_drep(cm_id, NULL, 0);
 		/* Fall through */
 	case IB_CM_REJ_RECEIVED:

From b287b76e89503ef1d403cc5cc8bd74b035d25bfa Mon Sep 17 00:00:00 2001
From: Leon Romanovsky <leonro@mellanox.com>
Date: Sun, 23 Jul 2017 10:46:14 +0300
Subject: [PATCH 025/281] Revert "IB/core: Allow QP state transition from reset
 to error"

The commit ebc9ca43e1d5 ("IB/core: Allow QP state transition from reset to error")
allowed transition from Reset to Error state for the QPs. This behavior
doesn't follow the IBTA specification 1.3, which in 10.3.1 QUEUE PAIR AND
EE CONTEXT STATES section.

The quote from the spec:
"An error can be forced from any state, except Reset, with
the Modify QP/EE Verb."

Signed-off-by: Leon Romanovsky <leonro@mellanox.com>
---
 drivers/infiniband/core/verbs.c | 1 -
 1 file changed, 1 deletion(-)

diff --git a/drivers/infiniband/core/verbs.c b/drivers/infiniband/core/verbs.c
index fb98ed67d5bc..7f8fe443df46 100644
--- a/drivers/infiniband/core/verbs.c
+++ b/drivers/infiniband/core/verbs.c
@@ -895,7 +895,6 @@ static const struct {
 } qp_state_table[IB_QPS_ERR + 1][IB_QPS_ERR + 1] = {
 	[IB_QPS_RESET] = {
 		[IB_QPS_RESET] = { .valid = 1 },
-		[IB_QPS_ERR] =   { .valid = 1 },
 		[IB_QPS_INIT]  = {
 			.valid = 1,
 			.req_param = {

From 5dc78ad1904db597bdb4427f3ead437aae86f54c Mon Sep 17 00:00:00 2001
From: Erez Shitrit <erezsh@mellanox.com>
Date: Thu, 13 Jul 2017 14:29:08 +0300
Subject: [PATCH 026/281] IB/ipoib: Notify on modify QP failure only when
 relevant

Modify QP can fail and it can be acceptable, like when moving from RST to
ERR state, all the rest are not acceptable and a message to the log
should be printed.

The current code prints on all failures and many messages like:
"Failed to modify QP to ERROR state" appear, even when supported by the
state machine of the QP object.

Signed-off-by: Erez Shitrit <erezsh@mellanox.com>
Signed-off-by: Leon Romanovsky <leon@kernel.org>
---
 drivers/infiniband/ulp/ipoib/ipoib_ib.c | 23 ++++++++++++++++++++++-
 1 file changed, 22 insertions(+), 1 deletion(-)

diff --git a/drivers/infiniband/ulp/ipoib/ipoib_ib.c b/drivers/infiniband/ulp/ipoib/ipoib_ib.c
index 02eda1f53a67..2e075377242e 100644
--- a/drivers/infiniband/ulp/ipoib/ipoib_ib.c
+++ b/drivers/infiniband/ulp/ipoib/ipoib_ib.c
@@ -711,6 +711,27 @@ static int recvs_pending(struct net_device *dev)
 	return pending;
 }
 
+static void check_qp_movement_and_print(struct ipoib_dev_priv *priv,
+					struct ib_qp *qp,
+					enum ib_qp_state new_state)
+{
+	struct ib_qp_attr qp_attr;
+	struct ib_qp_init_attr query_init_attr;
+	int ret;
+
+	ret = ib_query_qp(qp, &qp_attr, IB_QP_STATE, &query_init_attr);
+	if (ret) {
+		ipoib_warn(priv, "%s: Failed to query QP\n", __func__);
+		return;
+	}
+	/* print according to the new-state and the previous state.*/
+	if (new_state == IB_QPS_ERR && qp_attr.qp_state == IB_QPS_RESET)
+		ipoib_dbg(priv, "Failed modify QP, IB_QPS_RESET to IB_QPS_ERR, acceptable\n");
+	else
+		ipoib_warn(priv, "Failed to modify QP to state: %d from state: %d\n",
+			   new_state, qp_attr.qp_state);
+}
+
 int ipoib_ib_dev_stop_default(struct net_device *dev)
 {
 	struct ipoib_dev_priv *priv = ipoib_priv(dev);
@@ -730,7 +751,7 @@ int ipoib_ib_dev_stop_default(struct net_device *dev)
 	 */
 	qp_attr.qp_state = IB_QPS_ERR;
 	if (ib_modify_qp(priv->qp, &qp_attr, IB_QP_STATE))
-		ipoib_warn(priv, "Failed to modify QP to ERROR state\n");
+		check_qp_movement_and_print(priv, priv->qp, IB_QPS_ERR);
 
 	/* Wait for all sends and receives to complete */
 	begin = jiffies;

From 737cc2a593782df6846b3cab7e0f64384f58364a Mon Sep 17 00:00:00 2001
From: Mykola Kostenok <c_mykolak@mellanox.com>
Date: Mon, 17 Jul 2017 12:00:35 +0300
Subject: [PATCH 027/281] iio: aspeed-adc: wait for initial sequence.

This patch enables adc engine at initialization time and waits
for the initial sequence completion before enabling adc channels.

Without this code adc channels are not functional and shows
zeros for all connected channels.

Tested on mellanox msn platform.

v1 -> v2:
Pointed by Rick Altherr:
 - Wait init sequence code enabled by bool
from OF match table.

Signed-off-by: Mykola Kostenok <c_mykolak@mellanox.com>
Reviewed-by: Rick Altherr <raltherr@google.com>
Cc: <Stable@vger.kernel.org>
Signed-off-by: Jonathan Cameron <Jonathan.Cameron@huawei.com>
---
 drivers/iio/adc/aspeed_adc.c | 26 ++++++++++++++++++++++++++
 1 file changed, 26 insertions(+)

diff --git a/drivers/iio/adc/aspeed_adc.c b/drivers/iio/adc/aspeed_adc.c
index 62670cbfa2bb..87fd6e0ce5ee 100644
--- a/drivers/iio/adc/aspeed_adc.c
+++ b/drivers/iio/adc/aspeed_adc.c
@@ -22,6 +22,7 @@
 
 #include <linux/iio/iio.h>
 #include <linux/iio/driver.h>
+#include <linux/iopoll.h>
 
 #define ASPEED_RESOLUTION_BITS		10
 #define ASPEED_CLOCKS_PER_SAMPLE	12
@@ -38,11 +39,17 @@
 
 #define ASPEED_ENGINE_ENABLE		BIT(0)
 
+#define ASPEED_ADC_CTRL_INIT_RDY	BIT(8)
+
+#define ASPEED_ADC_INIT_POLLING_TIME	500
+#define ASPEED_ADC_INIT_TIMEOUT		500000
+
 struct aspeed_adc_model_data {
 	const char *model_name;
 	unsigned int min_sampling_rate;	// Hz
 	unsigned int max_sampling_rate;	// Hz
 	unsigned int vref_voltage;	// mV
+	bool wait_init_sequence;
 };
 
 struct aspeed_adc_data {
@@ -211,6 +218,24 @@ static int aspeed_adc_probe(struct platform_device *pdev)
 		goto scaler_error;
 	}
 
+	model_data = of_device_get_match_data(&pdev->dev);
+
+	if (model_data->wait_init_sequence) {
+		/* Enable engine in normal mode. */
+		writel(ASPEED_OPERATION_MODE_NORMAL | ASPEED_ENGINE_ENABLE,
+		       data->base + ASPEED_REG_ENGINE_CONTROL);
+
+		/* Wait for initial sequence complete. */
+		ret = readl_poll_timeout(data->base + ASPEED_REG_ENGINE_CONTROL,
+					 adc_engine_control_reg_val,
+					 adc_engine_control_reg_val &
+					 ASPEED_ADC_CTRL_INIT_RDY,
+					 ASPEED_ADC_INIT_POLLING_TIME,
+					 ASPEED_ADC_INIT_TIMEOUT);
+		if (ret)
+			goto scaler_error;
+	}
+
 	/* Start all channels in normal mode. */
 	clk_prepare_enable(data->clk_scaler->clk);
 	adc_engine_control_reg_val = GENMASK(31, 16) |
@@ -270,6 +295,7 @@ static const struct aspeed_adc_model_data ast2500_model_data = {
 	.vref_voltage = 1800, // mV
 	.min_sampling_rate = 1,
 	.max_sampling_rate = 1000000,
+	.wait_init_sequence = true,
 };
 
 static const struct of_device_id aspeed_adc_matches[] = {

From 8addebc14a322fa8ca67cd57c6038069acde8ddc Mon Sep 17 00:00:00 2001
From: Thomas Gleixner <tglx@linutronix.de>
Date: Mon, 24 Jul 2017 12:52:56 +0200
Subject: [PATCH 028/281] scsi: bnx2fc: Plug CPU hotplug race

bnx2fc_process_new_cqes() has protection against CPU hotplug, which relies
on the per cpu thread pointer. This protection is racy because it happens
only partially with the per cpu fp_work_lock held.

If the CPU is unplugged after the lock is dropped, the wakeup code can
dereference a NULL pointer or access freed and potentially reused memory.

Restructure the code so the thread check and wakeup happens with the
fp_work_lock held.

Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
Acked-by: Chad Dupuis <chad.dupuis@cavium.com>
Signed-off-by: Martin K. Petersen <martin.petersen@oracle.com>
---
 drivers/scsi/bnx2fc/bnx2fc_hwi.c | 45 ++++++++++++++++----------------
 1 file changed, 23 insertions(+), 22 deletions(-)

diff --git a/drivers/scsi/bnx2fc/bnx2fc_hwi.c b/drivers/scsi/bnx2fc/bnx2fc_hwi.c
index 913c750205ce..26de61d65a4d 100644
--- a/drivers/scsi/bnx2fc/bnx2fc_hwi.c
+++ b/drivers/scsi/bnx2fc/bnx2fc_hwi.c
@@ -1008,6 +1008,28 @@ static struct bnx2fc_work *bnx2fc_alloc_work(struct bnx2fc_rport *tgt, u16 wqe)
 	return work;
 }
 
+/* Pending work request completion */
+static void bnx2fc_pending_work(struct bnx2fc_rport *tgt, unsigned int wqe)
+{
+	unsigned int cpu = wqe % num_possible_cpus();
+	struct bnx2fc_percpu_s *fps;
+	struct bnx2fc_work *work;
+
+	fps = &per_cpu(bnx2fc_percpu, cpu);
+	spin_lock_bh(&fps->fp_work_lock);
+	if (fps->iothread) {
+		work = bnx2fc_alloc_work(tgt, wqe);
+		if (work) {
+			list_add_tail(&work->list, &fps->work_list);
+			wake_up_process(fps->iothread);
+			spin_unlock_bh(&fps->fp_work_lock);
+			return;
+		}
+	}
+	spin_unlock_bh(&fps->fp_work_lock);
+	bnx2fc_process_cq_compl(tgt, wqe);
+}
+
 int bnx2fc_process_new_cqes(struct bnx2fc_rport *tgt)
 {
 	struct fcoe_cqe *cq;
@@ -1042,28 +1064,7 @@ int bnx2fc_process_new_cqes(struct bnx2fc_rport *tgt)
 			/* Unsolicited event notification */
 			bnx2fc_process_unsol_compl(tgt, wqe);
 		} else {
-			/* Pending work request completion */
-			struct bnx2fc_work *work = NULL;
-			struct bnx2fc_percpu_s *fps = NULL;
-			unsigned int cpu = wqe % num_possible_cpus();
-
-			fps = &per_cpu(bnx2fc_percpu, cpu);
-			spin_lock_bh(&fps->fp_work_lock);
-			if (unlikely(!fps->iothread))
-				goto unlock;
-
-			work = bnx2fc_alloc_work(tgt, wqe);
-			if (work)
-				list_add_tail(&work->list,
-					      &fps->work_list);
-unlock:
-			spin_unlock_bh(&fps->fp_work_lock);
-
-			/* Pending work request completion */
-			if (fps->iothread && work)
-				wake_up_process(fps->iothread);
-			else
-				bnx2fc_process_cq_compl(tgt, wqe);
+			bnx2fc_pending_work(tgt, wqe);
 			num_free_sqes++;
 		}
 		cqe++;

From 2c2b66ae9d11d99f5f6d7010f0d46401ed9031ce Mon Sep 17 00:00:00 2001
From: Thomas Gleixner <tglx@linutronix.de>
Date: Mon, 24 Jul 2017 12:52:57 +0200
Subject: [PATCH 029/281] scsi: bnx2fc: Prevent recursive cpuhotplug locking

The BNX2FC module init/exit code installs/removes the hotplug callbacks with
the cpu hotplug lock held. This worked with the old CPU locking
implementation which allowed recursive locking, but with the new percpu
rwsem based mechanism this is not longer allowed.

Use the _cpuslocked() variants to fix this.

Reported-by: kernel test robot <fengguang.wu@intel.com>
Acked-by: Chad Dupuis <chad.dupuis@cavium.com>
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
Signed-off-by: Martin K. Petersen <martin.petersen@oracle.com>
---
 drivers/scsi/bnx2fc/bnx2fc_fcoe.c | 15 ++++++++-------
 1 file changed, 8 insertions(+), 7 deletions(-)

diff --git a/drivers/scsi/bnx2fc/bnx2fc_fcoe.c b/drivers/scsi/bnx2fc/bnx2fc_fcoe.c
index 7dfe709a7138..c4ebf8c5d884 100644
--- a/drivers/scsi/bnx2fc/bnx2fc_fcoe.c
+++ b/drivers/scsi/bnx2fc/bnx2fc_fcoe.c
@@ -2766,15 +2766,16 @@ static int __init bnx2fc_mod_init(void)
 	for_each_online_cpu(cpu)
 		bnx2fc_percpu_thread_create(cpu);
 
-	rc = cpuhp_setup_state_nocalls(CPUHP_AP_ONLINE_DYN,
-				       "scsi/bnx2fc:online",
-				       bnx2fc_cpu_online, NULL);
+	rc = cpuhp_setup_state_nocalls_cpuslocked(CPUHP_AP_ONLINE_DYN,
+						  "scsi/bnx2fc:online",
+						  bnx2fc_cpu_online, NULL);
 	if (rc < 0)
 		goto stop_threads;
 	bnx2fc_online_state = rc;
 
-	cpuhp_setup_state_nocalls(CPUHP_SCSI_BNX2FC_DEAD, "scsi/bnx2fc:dead",
-				  NULL, bnx2fc_cpu_dead);
+	cpuhp_setup_state_nocalls_cpuslocked(CPUHP_SCSI_BNX2FC_DEAD,
+					     "scsi/bnx2fc:dead",
+					     NULL, bnx2fc_cpu_dead);
 	put_online_cpus();
 
 	cnic_register_driver(CNIC_ULP_FCOE, &bnx2fc_cnic_cb);
@@ -2850,8 +2851,8 @@ static void __exit bnx2fc_mod_exit(void)
 		bnx2fc_percpu_thread_destroy(cpu);
 	}
 
-	cpuhp_remove_state_nocalls(bnx2fc_online_state);
-	cpuhp_remove_state_nocalls(CPUHP_SCSI_BNX2FC_DEAD);
+	cpuhp_remove_state_nocalls_cpuslocked(bnx2fc_online_state);
+	cpuhp_remove_state_nocalls_cpuslocked(CPUHP_SCSI_BNX2FC_DEAD);
 
 	put_online_cpus();
 

From 2fa2fa1ae6b42fc4c4995fdbf8fd7df96bb25ba4 Mon Sep 17 00:00:00 2001
From: Thomas Gleixner <tglx@linutronix.de>
Date: Mon, 24 Jul 2017 12:52:58 +0200
Subject: [PATCH 030/281] scsi: bnx2i: Prevent recursive cpuhotplug locking

The BNX2I module init/exit code installs/removes the hotplug callbacks with
the cpu hotplug lock held. This worked with the old CPU locking
implementation which allowed recursive locking, but with the new percpu
rwsem based mechanism this is not longer allowed.

Use the _cpuslocked() variants to fix this.

Reported-by: Steven Rostedt <rostedt@goodmis.org>
Acked-by: Chad Dupuis <chad.dupuis@cavium.com>
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
Signed-off-by: Martin K. Petersen <martin.petersen@oracle.com>
---
 drivers/scsi/bnx2i/bnx2i_init.c | 15 ++++++++-------
 1 file changed, 8 insertions(+), 7 deletions(-)

diff --git a/drivers/scsi/bnx2i/bnx2i_init.c b/drivers/scsi/bnx2i/bnx2i_init.c
index 86afc002814c..7487b653e799 100644
--- a/drivers/scsi/bnx2i/bnx2i_init.c
+++ b/drivers/scsi/bnx2i/bnx2i_init.c
@@ -516,15 +516,16 @@ static int __init bnx2i_mod_init(void)
 	for_each_online_cpu(cpu)
 		bnx2i_percpu_thread_create(cpu);
 
-	err = cpuhp_setup_state_nocalls(CPUHP_AP_ONLINE_DYN,
-				       "scsi/bnx2i:online",
-				       bnx2i_cpu_online, NULL);
+	err = cpuhp_setup_state_nocalls_cpuslocked(CPUHP_AP_ONLINE_DYN,
+						   "scsi/bnx2i:online",
+						   bnx2i_cpu_online, NULL);
 	if (err < 0)
 		goto remove_threads;
 	bnx2i_online_state = err;
 
-	cpuhp_setup_state_nocalls(CPUHP_SCSI_BNX2I_DEAD, "scsi/bnx2i:dead",
-				  NULL, bnx2i_cpu_dead);
+	cpuhp_setup_state_nocalls_cpuslocked(CPUHP_SCSI_BNX2I_DEAD,
+					     "scsi/bnx2i:dead",
+					     NULL, bnx2i_cpu_dead);
 	put_online_cpus();
 	return 0;
 
@@ -574,8 +575,8 @@ static void __exit bnx2i_mod_exit(void)
 	for_each_online_cpu(cpu)
 		bnx2i_percpu_thread_destroy(cpu);
 
-	cpuhp_remove_state_nocalls(bnx2i_online_state);
-	cpuhp_remove_state_nocalls(CPUHP_SCSI_BNX2I_DEAD);
+	cpuhp_remove_state_nocalls_cpuslocked(bnx2i_online_state);
+	cpuhp_remove_state_nocalls_cpuslocked(CPUHP_SCSI_BNX2I_DEAD);
 	put_online_cpus();
 
 	iscsi_unregister_transport(&bnx2i_iscsi_transport);

From 1937f8a29f4a650bc27e0311b43b53509a34fd22 Mon Sep 17 00:00:00 2001
From: Thomas Gleixner <tglx@linutronix.de>
Date: Mon, 24 Jul 2017 12:52:59 +0200
Subject: [PATCH 031/281] scsi: bnx2fc: Simplify CPU hotplug code

The CPU hotplug related code of this driver can be simplified by:

1) Consolidating the callbacks into a single state. The CPU thread can be
   torn down on the CPU which goes offline. There is no point in delaying
   that to the CPU dead state

2) Let the core code invoke the online/offline callbacks and remove the
   extra for_each_online_cpu() loops.

Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
Signed-off-by: Martin K. Petersen <martin.petersen@oracle.com>
---
 drivers/scsi/bnx2fc/bnx2fc_fcoe.c | 69 +++++++------------------------
 include/linux/cpuhotplug.h        |  1 -
 2 files changed, 15 insertions(+), 55 deletions(-)

diff --git a/drivers/scsi/bnx2fc/bnx2fc_fcoe.c b/drivers/scsi/bnx2fc/bnx2fc_fcoe.c
index c4ebf8c5d884..6844ba361616 100644
--- a/drivers/scsi/bnx2fc/bnx2fc_fcoe.c
+++ b/drivers/scsi/bnx2fc/bnx2fc_fcoe.c
@@ -2624,12 +2624,11 @@ static struct fcoe_transport bnx2fc_transport = {
 };
 
 /**
- * bnx2fc_percpu_thread_create - Create a receive thread for an
- *				 online CPU
+ * bnx2fc_cpu_online - Create a receive thread for an  online CPU
  *
  * @cpu: cpu index for the online cpu
  */
-static void bnx2fc_percpu_thread_create(unsigned int cpu)
+static int bnx2fc_cpu_online(unsigned int cpu)
 {
 	struct bnx2fc_percpu_s *p;
 	struct task_struct *thread;
@@ -2639,15 +2638,17 @@ static void bnx2fc_percpu_thread_create(unsigned int cpu)
 	thread = kthread_create_on_node(bnx2fc_percpu_io_thread,
 					(void *)p, cpu_to_node(cpu),
 					"bnx2fc_thread/%d", cpu);
+	if (IS_ERR(thread))
+		return PTR_ERR(thread);
+
 	/* bind thread to the cpu */
-	if (likely(!IS_ERR(thread))) {
-		kthread_bind(thread, cpu);
-		p->iothread = thread;
-		wake_up_process(thread);
-	}
+	kthread_bind(thread, cpu);
+	p->iothread = thread;
+	wake_up_process(thread);
+	return 0;
 }
 
-static void bnx2fc_percpu_thread_destroy(unsigned int cpu)
+static int bnx2fc_cpu_offline(unsigned int cpu)
 {
 	struct bnx2fc_percpu_s *p;
 	struct task_struct *thread;
@@ -2661,7 +2662,6 @@ static void bnx2fc_percpu_thread_destroy(unsigned int cpu)
 	thread = p->iothread;
 	p->iothread = NULL;
 
-
 	/* Free all work in the list */
 	list_for_each_entry_safe(work, tmp, &p->work_list, list) {
 		list_del_init(&work->list);
@@ -2673,20 +2673,6 @@ static void bnx2fc_percpu_thread_destroy(unsigned int cpu)
 
 	if (thread)
 		kthread_stop(thread);
-}
-
-
-static int bnx2fc_cpu_online(unsigned int cpu)
-{
-	printk(PFX "CPU %x online: Create Rx thread\n", cpu);
-	bnx2fc_percpu_thread_create(cpu);
-	return 0;
-}
-
-static int bnx2fc_cpu_dead(unsigned int cpu)
-{
-	printk(PFX "CPU %x offline: Remove Rx thread\n", cpu);
-	bnx2fc_percpu_thread_destroy(cpu);
 	return 0;
 }
 
@@ -2761,31 +2747,16 @@ static int __init bnx2fc_mod_init(void)
 		spin_lock_init(&p->fp_work_lock);
 	}
 
-	get_online_cpus();
-
-	for_each_online_cpu(cpu)
-		bnx2fc_percpu_thread_create(cpu);
-
-	rc = cpuhp_setup_state_nocalls_cpuslocked(CPUHP_AP_ONLINE_DYN,
-						  "scsi/bnx2fc:online",
-						  bnx2fc_cpu_online, NULL);
+	rc = cpuhp_setup_state(CPUHP_AP_ONLINE_DYN, "scsi/bnx2fc:online",
+			       bnx2fc_cpu_online, bnx2fc_cpu_offline);
 	if (rc < 0)
-		goto stop_threads;
+		goto stop_thread;
 	bnx2fc_online_state = rc;
 
-	cpuhp_setup_state_nocalls_cpuslocked(CPUHP_SCSI_BNX2FC_DEAD,
-					     "scsi/bnx2fc:dead",
-					     NULL, bnx2fc_cpu_dead);
-	put_online_cpus();
-
 	cnic_register_driver(CNIC_ULP_FCOE, &bnx2fc_cnic_cb);
-
 	return 0;
 
-stop_threads:
-	for_each_online_cpu(cpu)
-		bnx2fc_percpu_thread_destroy(cpu);
-	put_online_cpus();
+stop_thread:
 	kthread_stop(l2_thread);
 free_wq:
 	destroy_workqueue(bnx2fc_wq);
@@ -2804,7 +2775,6 @@ static void __exit bnx2fc_mod_exit(void)
 	struct fcoe_percpu_s *bg;
 	struct task_struct *l2_thread;
 	struct sk_buff *skb;
-	unsigned int cpu = 0;
 
 	/*
 	 * NOTE: Since cnic calls register_driver routine rtnl_lock,
@@ -2845,16 +2815,7 @@ static void __exit bnx2fc_mod_exit(void)
 	if (l2_thread)
 		kthread_stop(l2_thread);
 
-	get_online_cpus();
-	/* Destroy per cpu threads */
-	for_each_online_cpu(cpu) {
-		bnx2fc_percpu_thread_destroy(cpu);
-	}
-
-	cpuhp_remove_state_nocalls_cpuslocked(bnx2fc_online_state);
-	cpuhp_remove_state_nocalls_cpuslocked(CPUHP_SCSI_BNX2FC_DEAD);
-
-	put_online_cpus();
+	cpuhp_remove_state(bnx2fc_online_state);
 
 	destroy_workqueue(bnx2fc_wq);
 	/*
diff --git a/include/linux/cpuhotplug.h b/include/linux/cpuhotplug.h
index b56573bf440d..2e7b1731ad12 100644
--- a/include/linux/cpuhotplug.h
+++ b/include/linux/cpuhotplug.h
@@ -39,7 +39,6 @@ enum cpuhp_state {
 	CPUHP_PCI_XGENE_DEAD,
 	CPUHP_IOMMU_INTEL_DEAD,
 	CPUHP_LUSTRE_CFS_DEAD,
-	CPUHP_SCSI_BNX2FC_DEAD,
 	CPUHP_SCSI_BNX2I_DEAD,
 	CPUHP_WORKQUEUE_PREP,
 	CPUHP_POWER_NUMA_PREPARE,

From f9f22a86912f9d36b50e9b3b383fabfb9f22dd46 Mon Sep 17 00:00:00 2001
From: Thomas Gleixner <tglx@linutronix.de>
Date: Mon, 24 Jul 2017 12:53:00 +0200
Subject: [PATCH 032/281] scsi: bnx2i: Simplify cpu hotplug code

The CPU hotplug related code of this driver can be simplified by:

1) Consolidating the callbacks into a single state. The CPU thread can be
   torn down on the CPU which goes offline. There is no point in delaying
   that to the CPU dead state

2) Let the core code invoke the online/offline callbacks and remove the
   extra for_each_online_cpu() loops.

Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
Acked-by: Chad Dupuis <chad.dupuis@cavium.com>
Signed-off-by: Martin K. Petersen <martin.petersen@oracle.com>
---
 drivers/scsi/bnx2i/bnx2i_init.c | 65 ++++++++-------------------------
 include/linux/cpuhotplug.h      |  1 -
 2 files changed, 15 insertions(+), 51 deletions(-)

diff --git a/drivers/scsi/bnx2i/bnx2i_init.c b/drivers/scsi/bnx2i/bnx2i_init.c
index 7487b653e799..4ebcda8d9500 100644
--- a/drivers/scsi/bnx2i/bnx2i_init.c
+++ b/drivers/scsi/bnx2i/bnx2i_init.c
@@ -404,12 +404,11 @@ int bnx2i_get_stats(void *handle)
 
 
 /**
- * bnx2i_percpu_thread_create - Create a receive thread for an
- *				online CPU
+ * bnx2i_cpu_online - Create a receive thread for an online CPU
  *
  * @cpu:	cpu index for the online cpu
  */
-static void bnx2i_percpu_thread_create(unsigned int cpu)
+static int bnx2i_cpu_online(unsigned int cpu)
 {
 	struct bnx2i_percpu_s *p;
 	struct task_struct *thread;
@@ -419,16 +418,17 @@ static void bnx2i_percpu_thread_create(unsigned int cpu)
 	thread = kthread_create_on_node(bnx2i_percpu_io_thread, (void *)p,
 					cpu_to_node(cpu),
 					"bnx2i_thread/%d", cpu);
+	if (IS_ERR(thread))
+		return PTR_ERR(thread);
+
 	/* bind thread to the cpu */
-	if (likely(!IS_ERR(thread))) {
-		kthread_bind(thread, cpu);
-		p->iothread = thread;
-		wake_up_process(thread);
-	}
+	kthread_bind(thread, cpu);
+	p->iothread = thread;
+	wake_up_process(thread);
+	return 0;
 }
 
-
-static void bnx2i_percpu_thread_destroy(unsigned int cpu)
+static int bnx2i_cpu_offline(unsigned int cpu)
 {
 	struct bnx2i_percpu_s *p;
 	struct task_struct *thread;
@@ -451,19 +451,6 @@ static void bnx2i_percpu_thread_destroy(unsigned int cpu)
 	spin_unlock_bh(&p->p_work_lock);
 	if (thread)
 		kthread_stop(thread);
-}
-
-static int bnx2i_cpu_online(unsigned int cpu)
-{
-	pr_info("bnx2i: CPU %x online: Create Rx thread\n", cpu);
-	bnx2i_percpu_thread_create(cpu);
-	return 0;
-}
-
-static int bnx2i_cpu_dead(unsigned int cpu)
-{
-	pr_info("CPU %x offline: Remove Rx thread\n", cpu);
-	bnx2i_percpu_thread_destroy(cpu);
 	return 0;
 }
 
@@ -511,28 +498,14 @@ static int __init bnx2i_mod_init(void)
 		p->iothread = NULL;
 	}
 
-	get_online_cpus();
-
-	for_each_online_cpu(cpu)
-		bnx2i_percpu_thread_create(cpu);
-
-	err = cpuhp_setup_state_nocalls_cpuslocked(CPUHP_AP_ONLINE_DYN,
-						   "scsi/bnx2i:online",
-						   bnx2i_cpu_online, NULL);
+	err = cpuhp_setup_state(CPUHP_AP_ONLINE_DYN, "scsi/bnx2i:online",
+				bnx2i_cpu_online, bnx2i_cpu_offline);
 	if (err < 0)
-		goto remove_threads;
+		goto unreg_driver;
 	bnx2i_online_state = err;
-
-	cpuhp_setup_state_nocalls_cpuslocked(CPUHP_SCSI_BNX2I_DEAD,
-					     "scsi/bnx2i:dead",
-					     NULL, bnx2i_cpu_dead);
-	put_online_cpus();
 	return 0;
 
-remove_threads:
-	for_each_online_cpu(cpu)
-		bnx2i_percpu_thread_destroy(cpu);
-	put_online_cpus();
+unreg_driver:
 	cnic_unregister_driver(CNIC_ULP_ISCSI);
 unreg_xport:
 	iscsi_unregister_transport(&bnx2i_iscsi_transport);
@@ -552,7 +525,6 @@ out:
 static void __exit bnx2i_mod_exit(void)
 {
 	struct bnx2i_hba *hba;
-	unsigned cpu = 0;
 
 	mutex_lock(&bnx2i_dev_lock);
 	while (!list_empty(&adapter_list)) {
@@ -570,14 +542,7 @@ static void __exit bnx2i_mod_exit(void)
 	}
 	mutex_unlock(&bnx2i_dev_lock);
 
-	get_online_cpus();
-
-	for_each_online_cpu(cpu)
-		bnx2i_percpu_thread_destroy(cpu);
-
-	cpuhp_remove_state_nocalls_cpuslocked(bnx2i_online_state);
-	cpuhp_remove_state_nocalls_cpuslocked(CPUHP_SCSI_BNX2I_DEAD);
-	put_online_cpus();
+	cpuhp_remove_state(bnx2i_online_state);
 
 	iscsi_unregister_transport(&bnx2i_iscsi_transport);
 	cnic_unregister_driver(CNIC_ULP_ISCSI);
diff --git a/include/linux/cpuhotplug.h b/include/linux/cpuhotplug.h
index 2e7b1731ad12..82b30e638430 100644
--- a/include/linux/cpuhotplug.h
+++ b/include/linux/cpuhotplug.h
@@ -39,7 +39,6 @@ enum cpuhp_state {
 	CPUHP_PCI_XGENE_DEAD,
 	CPUHP_IOMMU_INTEL_DEAD,
 	CPUHP_LUSTRE_CFS_DEAD,
-	CPUHP_SCSI_BNX2I_DEAD,
 	CPUHP_WORKQUEUE_PREP,
 	CPUHP_POWER_NUMA_PREPARE,
 	CPUHP_HRTIMERS_PREPARE,

From 722477c4f2c50bc8d800d293a3bc5aa843f16e8d Mon Sep 17 00:00:00 2001
From: Thomas Bogendoerfer <tbogendoerfer@suse.de>
Date: Tue, 25 Jul 2017 11:19:21 +0200
Subject: [PATCH 033/281] scsi: qedf: Limit number of CQs

FCOE offloading failed with:

[qed_sp_fcoe_func_start:150(sp-0-3b:00.02)]Cannot satisfy CQ amount. CQs
		 requested 8, CQs available 6. Aborting function start
[qed_fcoe_start:821()]Failed to start fcoe
[__qedf_probe:3041]:6: Cannot start FCoE function.

The reason is a newly introduced check in the qed main part. This change
also provides the information about how many CQs are available, so we
simply limit the number of requested CQs..

Fixes: 3c5da9427802 ("qed: Share additional information with qedf")
Signed-off-by: Thomas Bogendoerfer <tbogendoerfer@suse.de>
Reviewed-by: Johannes Thumshirn <jthumshirn@suse.de>
Acked-by: Chad Dupuis <chad.dupuis@cavium.com>
Signed-off-by: Martin K. Petersen <martin.petersen@oracle.com>
---
 drivers/scsi/qedf/qedf.h      |  3 ++-
 drivers/scsi/qedf/qedf_main.c | 20 +++++++++-----------
 2 files changed, 11 insertions(+), 12 deletions(-)

diff --git a/drivers/scsi/qedf/qedf.h b/drivers/scsi/qedf/qedf.h
index 4d038926a455..351f06dfc5a0 100644
--- a/drivers/scsi/qedf/qedf.h
+++ b/drivers/scsi/qedf/qedf.h
@@ -528,7 +528,8 @@ struct fip_vlan {
 #define QEDF_WRITE                    (1 << 0)
 #define MAX_FIBRE_LUNS			0xffffffff
 
-#define QEDF_MAX_NUM_CQS		8
+#define MIN_NUM_CPUS_MSIX(x)	min_t(u32, x->dev_info.num_cqs, \
+					num_online_cpus())
 
 /*
  * PCI function probe defines
diff --git a/drivers/scsi/qedf/qedf_main.c b/drivers/scsi/qedf/qedf_main.c
index 7786c97e033f..1d13c9ca517d 100644
--- a/drivers/scsi/qedf/qedf_main.c
+++ b/drivers/scsi/qedf/qedf_main.c
@@ -2760,11 +2760,9 @@ static int qedf_set_fcoe_pf_param(struct qedf_ctx *qedf)
 	 * we allocation is the minimum off:
 	 *
 	 * Number of CPUs
-	 * Number of MSI-X vectors
-	 * Max number allocated in hardware (QEDF_MAX_NUM_CQS)
+	 * Number allocated by qed for our PCI function
 	 */
-	qedf->num_queues = min((unsigned int)QEDF_MAX_NUM_CQS,
-	    num_online_cpus());
+	qedf->num_queues = MIN_NUM_CPUS_MSIX(qedf);
 
 	QEDF_INFO(&(qedf->dbg_ctx), QEDF_LOG_DISC, "Number of CQs is %d.\n",
 		   qedf->num_queues);
@@ -2962,6 +2960,13 @@ static int __qedf_probe(struct pci_dev *pdev, int mode)
 		goto err1;
 	}
 
+	/* Learn information crucial for qedf to progress */
+	rc = qed_ops->fill_dev_info(qedf->cdev, &qedf->dev_info);
+	if (rc) {
+		QEDF_ERR(&(qedf->dbg_ctx), "Failed to dev info.\n");
+		goto err1;
+	}
+
 	/* queue allocation code should come here
 	 * order should be
 	 * 	slowpath_start
@@ -2977,13 +2982,6 @@ static int __qedf_probe(struct pci_dev *pdev, int mode)
 	}
 	qed_ops->common->update_pf_params(qedf->cdev, &qedf->pf_params);
 
-	/* Learn information crucial for qedf to progress */
-	rc = qed_ops->fill_dev_info(qedf->cdev, &qedf->dev_info);
-	if (rc) {
-		QEDF_ERR(&(qedf->dbg_ctx), "Failed to dev info.\n");
-		goto err1;
-	}
-
 	/* Record BDQ producer doorbell addresses */
 	qedf->bdq_primary_prod = qedf->dev_info.primary_dbq_rq_addr;
 	qedf->bdq_secondary_prod = qedf->dev_info.secondary_bdq_rq_addr;

From e6fd916a625110d75bd4d15b8488df44cf41c9fc Mon Sep 17 00:00:00 2001
From: Dan Carpenter <dan.carpenter@oracle.com>
Date: Tue, 25 Jul 2017 22:49:55 +0300
Subject: [PATCH 034/281] scsi: aacraid: reading out of bounds

"qd.id" comes directly from the copy_from_user() on the line before so
we should verify that it's within bounds.

Signed-off-by: Dan Carpenter <dan.carpenter@oracle.com>
Signed-off-by: Martin K. Petersen <martin.petersen@oracle.com>
---
 drivers/scsi/aacraid/aachba.c | 7 ++++---
 1 file changed, 4 insertions(+), 3 deletions(-)

diff --git a/drivers/scsi/aacraid/aachba.c b/drivers/scsi/aacraid/aachba.c
index 707ee2f5954d..4591113c49de 100644
--- a/drivers/scsi/aacraid/aachba.c
+++ b/drivers/scsi/aacraid/aachba.c
@@ -3198,10 +3198,11 @@ static int query_disk(struct aac_dev *dev, void __user *arg)
 		return -EBUSY;
 	if (copy_from_user(&qd, arg, sizeof (struct aac_query_disk)))
 		return -EFAULT;
-	if (qd.cnum == -1)
+	if (qd.cnum == -1) {
+		if (qd.id < 0 || qd.id >= dev->maximum_num_containers)
+			return -EINVAL;
 		qd.cnum = qd.id;
-	else if ((qd.bus == -1) && (qd.id == -1) && (qd.lun == -1))
-	{
+	} else if ((qd.bus == -1) && (qd.id == -1) && (qd.lun == -1)) {
 		if (qd.cnum < 0 || qd.cnum >= dev->maximum_num_containers)
 			return -EINVAL;
 		qd.instance = dev->scsi_host_ptr->host_no;

From f930c7043663188429cd9b254e9d761edfc101ce Mon Sep 17 00:00:00 2001
From: Johannes Thumshirn <jthumshirn@suse.de>
Date: Thu, 27 Jul 2017 09:11:26 +0200
Subject: [PATCH 035/281] scsi: sg: only check for dxfer_len greater than 256M

Don't make any assumptions on the sg_io_hdr_t::dxfer_direction or the
sg_io_hdr_t::dxferp in order to determine if it is a valid request. The
only way we can check for bad requests is by checking if the length
exceeds 256M.

Signed-off-by: Johannes Thumshirn <jthumshirn@suse.de>
Fixes: 28676d869bbb (scsi: sg: check for valid direction before starting the
request)
Reported-by: Jason L Tibbitts III <tibbs@math.uh.edu>
Tested-by: Jason L Tibbitts III <tibbs@math.uh.edu>
Suggested-by: Doug Gilbert <dgilbert@interlog.com>
Cc: Doug Gilbert <dgilbert@interlog.com>
Cc: <stable@vger.kernel.org>
Reviewed-by: Hannes Reinecke <hare@suse.com>
Signed-off-by: Martin K. Petersen <martin.petersen@oracle.com>
---
 drivers/scsi/sg.c | 31 +------------------------------
 1 file changed, 1 insertion(+), 30 deletions(-)

diff --git a/drivers/scsi/sg.c b/drivers/scsi/sg.c
index 4fe606b000b4..d7ff71e0c85c 100644
--- a/drivers/scsi/sg.c
+++ b/drivers/scsi/sg.c
@@ -751,35 +751,6 @@ sg_new_write(Sg_fd *sfp, struct file *file, const char __user *buf,
 	return count;
 }
 
-static bool sg_is_valid_dxfer(sg_io_hdr_t *hp)
-{
-	switch (hp->dxfer_direction) {
-	case SG_DXFER_NONE:
-		if (hp->dxferp || hp->dxfer_len > 0)
-			return false;
-		return true;
-	case SG_DXFER_FROM_DEV:
-		/*
-		 * for SG_DXFER_FROM_DEV we always set dxfer_len to > 0. dxferp
-		 * can either be NULL or != NULL so there's no point in checking
-		 * it either. So just return true.
-		 */
-		return true;
-	case SG_DXFER_TO_DEV:
-	case SG_DXFER_TO_FROM_DEV:
-		if (!hp->dxferp || hp->dxfer_len == 0)
-			return false;
-		return true;
-	case SG_DXFER_UNKNOWN:
-		if ((!hp->dxferp && hp->dxfer_len) ||
-		    (hp->dxferp && hp->dxfer_len == 0))
-			return false;
-		return true;
-	default:
-		return false;
-	}
-}
-
 static int
 sg_common_write(Sg_fd * sfp, Sg_request * srp,
 		unsigned char *cmnd, int timeout, int blocking)
@@ -800,7 +771,7 @@ sg_common_write(Sg_fd * sfp, Sg_request * srp,
 			"sg_common_write:  scsi opcode=0x%02x, cmd_size=%d\n",
 			(int) cmnd[0], (int) hp->cmd_len));
 
-	if (!sg_is_valid_dxfer(hp))
+	if (hp->dxfer_len >= SZ_256M)
 		return -EINVAL;
 
 	k = sg_start_req(srp, cmnd);

From d630213f2a47933e1037909273a20023ba3e598d Mon Sep 17 00:00:00 2001
From: Lucas Stach <l.stach@pengutronix.de>
Date: Mon, 10 Jul 2017 14:41:25 +0200
Subject: [PATCH 036/281] drm/bridge: tc358767: fix probe without attached
 output node

The output node of the TC358767 is only used if another bridge is chained
behind it. Panels attached to the TC358767 can be detected using the usual
DP AUX probing. This restores the old behavior of ignoring the output if
no endpoint is found.

Fixes: ebc944613567 (drm: convert drivers to use drm_of_find_panel_or_bridge)
CC: stable@vger.kernel.org
Acked-by: Andrey Gusakov <andrey.gusakov@cogentembedded.com>
Signed-off-by: Lucas Stach <l.stach@pengutronix.de>
Signed-off-by: Archit Taneja <architt@codeaurora.org>
Link: https://patchwork.freedesktop.org/patch/msgid/20170710124125.9019-1-l.stach@pengutronix.de
---
 drivers/gpu/drm/bridge/tc358767.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/gpu/drm/bridge/tc358767.c b/drivers/gpu/drm/bridge/tc358767.c
index 5c26488e7a2d..0529e500c534 100644
--- a/drivers/gpu/drm/bridge/tc358767.c
+++ b/drivers/gpu/drm/bridge/tc358767.c
@@ -1255,7 +1255,7 @@ static int tc_probe(struct i2c_client *client, const struct i2c_device_id *id)
 
 	/* port@2 is the output port */
 	ret = drm_of_find_panel_or_bridge(dev->of_node, 2, 0, &tc->panel, NULL);
-	if (ret)
+	if (ret && ret != -ENODEV)
 		return ret;
 
 	/* Shut down GPIO is optional */

From 6d0f581d1768d3eaba15776e7dd1fdfec10cfe36 Mon Sep 17 00:00:00 2001
From: Max Filippov <jcmvbkbc@gmail.com>
Date: Fri, 28 Jul 2017 17:42:59 -0700
Subject: [PATCH 037/281] xtensa: fix cache aliasing handling code for WT cache

Currently building kernel for xtensa core with aliasing WT cache fails
with the following messages:

  mm/memory.c:2152: undefined reference to `flush_dcache_page'
  mm/memory.c:2332: undefined reference to `local_flush_cache_page'
  mm/memory.c:1919: undefined reference to `local_flush_cache_range'
  mm/memory.c:4179: undefined reference to `copy_to_user_page'
  mm/memory.c:4183: undefined reference to `copy_from_user_page'

This happens because implementation of these functions is only compiled
when data cache is WB, which looks wrong: even when data cache doesn't
need flushing it still needs invalidation. The functions like
__flush_[invalidate_]dcache_* are correctly defined for both WB and WT
caches (and even if they weren't that'd still be ok, just slower).

Fix this by providing the same implementation of the above functions for
both WB and WT cache.

Cc: stable@vger.kernel.org
Signed-off-by: Max Filippov <jcmvbkbc@gmail.com>
---
 arch/xtensa/mm/cache.c | 10 +++-------
 1 file changed, 3 insertions(+), 7 deletions(-)

diff --git a/arch/xtensa/mm/cache.c b/arch/xtensa/mm/cache.c
index 1a804a2f9a5b..dbb1cdef3663 100644
--- a/arch/xtensa/mm/cache.c
+++ b/arch/xtensa/mm/cache.c
@@ -120,10 +120,6 @@ void copy_user_highpage(struct page *dst, struct page *src,
 	preempt_enable();
 }
 
-#endif /* DCACHE_WAY_SIZE > PAGE_SIZE */
-
-#if (DCACHE_WAY_SIZE > PAGE_SIZE) && XCHAL_DCACHE_IS_WRITEBACK
-
 /*
  * Any time the kernel writes to a user page cache page, or it is about to
  * read from a page cache page this routine is called.
@@ -208,7 +204,7 @@ void local_flush_cache_page(struct vm_area_struct *vma, unsigned long address,
 	__invalidate_icache_page_alias(virt, phys);
 }
 
-#endif
+#endif /* DCACHE_WAY_SIZE > PAGE_SIZE */
 
 void
 update_mmu_cache(struct vm_area_struct * vma, unsigned long addr, pte_t *ptep)
@@ -225,7 +221,7 @@ update_mmu_cache(struct vm_area_struct * vma, unsigned long addr, pte_t *ptep)
 
 	flush_tlb_page(vma, addr);
 
-#if (DCACHE_WAY_SIZE > PAGE_SIZE) && XCHAL_DCACHE_IS_WRITEBACK
+#if (DCACHE_WAY_SIZE > PAGE_SIZE)
 
 	if (!PageReserved(page) && test_bit(PG_arch_1, &page->flags)) {
 		unsigned long phys = page_to_phys(page);
@@ -256,7 +252,7 @@ update_mmu_cache(struct vm_area_struct * vma, unsigned long addr, pte_t *ptep)
  * flush_dcache_page() on the page.
  */
 
-#if (DCACHE_WAY_SIZE > PAGE_SIZE) && XCHAL_DCACHE_IS_WRITEBACK
+#if (DCACHE_WAY_SIZE > PAGE_SIZE)
 
 void copy_to_user_page(struct vm_area_struct *vma, struct page *page,
 		unsigned long vaddr, void *dst, const void *src,

From 6ab1d8da972d4c4e318607e96c5ecb32101c80f4 Mon Sep 17 00:00:00 2001
From: Paolo Valente <paolo.valente@linaro.org>
Date: Fri, 28 Jul 2017 21:41:18 +0200
Subject: [PATCH 038/281] block, bfq: reset in_service_entity if it becomes
 idle

BFQ implements hierarchical scheduling by representing each group of
queues with a generic parent entity. For each parent entity, BFQ
maintains an in_service_entity pointer: if one of the child entities
happens to be in service, in_service_entity points to it.  The
resetting of these pointers happens only on queue expirations: when
the in-service queue is expired, i.e., stops to be the queue in
service, BFQ resets all in_service_entity pointers along the
parent-entity path from this queue to the root entity.

Functions handling the scheduling of entities assume, naturally, that
in-service entities are active, i.e., have pending I/O requests (or,
as a special case, even if they have no pending requests, they are
expected to receive a new request very soon, with the scheduler idling
the storage device while waiting for such an event). Unfortunately,
the above resetting scheme of the in_service_entity pointers may cause
this assumption to be violated.  For example, the in-service queue may
happen to remain without requests because of a request merge. In this
case the queue does become idle, and all related data structures are
updated accordingly. But in_service_entity still points to the queue
in the parent entity. This inconsistency may even propagate to
higher-level parent entities, if they happen to become idle as well,
as a consequence of the leaf queue becoming idle. For this queue and
parent entities, scheduling functions have an undefined behaviour,
and, as reported, may easily lead to kernel crashes or hangs.

This commit addresses this issue by simply resetting the
in_service_entity field also when it is detected to point to an entity
becoming idle (regardless of why the entity becomes idle).

Reported-by: Laurentiu Nicola <lnicola@dend.ro>
Signed-off-by: Paolo Valente <paolo.valente@linaro.org>
Tested-by: Laurentiu Nicola <lnicola@dend.ro>
Signed-off-by: Jens Axboe <axboe@kernel.dk>
---
 block/bfq-wf2q.c | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

diff --git a/block/bfq-wf2q.c b/block/bfq-wf2q.c
index 979f8f21b7e2..881bbe5e1827 100644
--- a/block/bfq-wf2q.c
+++ b/block/bfq-wf2q.c
@@ -1158,8 +1158,10 @@ bool __bfq_deactivate_entity(struct bfq_entity *entity, bool ins_into_idle_tree)
 	st = bfq_entity_service_tree(entity);
 	is_in_service = entity == sd->in_service_entity;
 
-	if (is_in_service)
+	if (is_in_service) {
 		bfq_calc_finish(entity, entity->service);
+		sd->in_service_entity = NULL;
+	}
 
 	if (entity->tree == &st->active)
 		bfq_active_extract(st, entity);

From 46d556e6aaa0ec4dc83648ab1ca3d01dd2fa3ea3 Mon Sep 17 00:00:00 2001
From: Paolo Valente <paolo.valente@linaro.org>
Date: Sat, 29 Jul 2017 12:42:56 +0200
Subject: [PATCH 039/281] block, bfq: consider also in_service_entity to state
 whether an entity is active

Groups of BFQ queues are represented by generic entities in BFQ. When
a queue belonging to a parent entity is deactivated, the parent entity
may need to be deactivated too, in case the deactivated queue was the
only active queue for the parent entity. This deactivation may need to
be propagated upwards if the entity belongs, in its turn, to a further
higher-level entity, and so on. In particular, the upward propagation
of deactivation stops at the first parent entity that remains active
even if one of its child entities has been deactivated.

To decide whether the last non-deactivation condition holds for a
parent entity, BFQ checks whether the field next_in_service is still
not NULL for the parent entity, after the deactivation of one of its
child entity. If it is not NULL, then there are certainly other active
entities in the parent entity, and deactivations can stop.

Unfortunately, this check misses a corner case: if in_service_entity
is not NULL, then next_in_service may happen to be NULL, although the
parent entity is evidently active. This happens if: 1) the entity
pointed by in_service_entity is the only active entity in the parent
entity, and 2) according to the definition of next_in_service, the
in_service_entity cannot be considered as next_in_service. See the
comments on the definition of next_in_service for details on this
second point.

Hitting the above corner case causes crashes.

To address this issue, this commit:
1) Extends the above check on only next_in_service to controlling both
next_in_service and in_service_entity (if any of them is not NULL,
then no further deactivation is performed)
2) Improves the (important) comments on how next_in_service is defined
and updated; in particular it fixes a few rather obscure paragraphs

Reported-by: Eric Wheeler <bfq-sched@lists.ewheeler.net>
Reported-by: Rick Yiu <rick_yiu@htc.com>
Reported-by: Tom X Nguyen <tom81094@gmail.com>
Signed-off-by: Paolo Valente <paolo.valente@linaro.org>
Tested-by: Eric Wheeler <bfq-sched@lists.ewheeler.net>
Tested-by: Rick Yiu <rick_yiu@htc.com>
Tested-by: Laurentiu Nicola <lnicola@dend.ro>
Tested-by: Tom X Nguyen <tom81094@gmail.com>
Signed-off-by: Jens Axboe <axboe@kernel.dk>
---
 block/bfq-iosched.h |  22 +++++--
 block/bfq-wf2q.c    | 142 ++++++++++++++++++++++++--------------------
 2 files changed, 95 insertions(+), 69 deletions(-)

diff --git a/block/bfq-iosched.h b/block/bfq-iosched.h
index 63e771ab56d8..859f0a8c97c8 100644
--- a/block/bfq-iosched.h
+++ b/block/bfq-iosched.h
@@ -71,17 +71,29 @@ struct bfq_service_tree {
  *
  * bfq_sched_data is the basic scheduler queue.  It supports three
  * ioprio_classes, and can be used either as a toplevel queue or as an
- * intermediate queue on a hierarchical setup.  @next_in_service
- * points to the active entity of the sched_data service trees that
- * will be scheduled next. It is used to reduce the number of steps
- * needed for each hierarchical-schedule update.
+ * intermediate queue in a hierarchical setup.
  *
  * The supported ioprio_classes are the same as in CFQ, in descending
  * priority order, IOPRIO_CLASS_RT, IOPRIO_CLASS_BE, IOPRIO_CLASS_IDLE.
  * Requests from higher priority queues are served before all the
  * requests from lower priority queues; among requests of the same
  * queue requests are served according to B-WF2Q+.
- * All the fields are protected by the queue lock of the containing bfqd.
+ *
+ * The schedule is implemented by the service trees, plus the field
+ * @next_in_service, which points to the entity on the active trees
+ * that will be served next, if 1) no changes in the schedule occurs
+ * before the current in-service entity is expired, 2) the in-service
+ * queue becomes idle when it expires, and 3) if the entity pointed by
+ * in_service_entity is not a queue, then the in-service child entity
+ * of the entity pointed by in_service_entity becomes idle on
+ * expiration. This peculiar definition allows for the following
+ * optimization, not yet exploited: while a given entity is still in
+ * service, we already know which is the best candidate for next
+ * service among the other active entitities in the same parent
+ * entity. We can then quickly compare the timestamps of the
+ * in-service entity with those of such best candidate.
+ *
+ * All fields are protected by the lock of the containing bfqd.
  */
 struct bfq_sched_data {
 	/* entity in service */
diff --git a/block/bfq-wf2q.c b/block/bfq-wf2q.c
index 881bbe5e1827..911aa7431dbe 100644
--- a/block/bfq-wf2q.c
+++ b/block/bfq-wf2q.c
@@ -188,21 +188,23 @@ static bool bfq_update_parent_budget(struct bfq_entity *next_in_service)
 
 /*
  * This function tells whether entity stops being a candidate for next
- * service, according to the following logic.
+ * service, according to the restrictive definition of the field
+ * next_in_service. In particular, this function is invoked for an
+ * entity that is about to be set in service.
  *
- * This function is invoked for an entity that is about to be set in
- * service. If such an entity is a queue, then the entity is no longer
- * a candidate for next service (i.e, a candidate entity to serve
- * after the in-service entity is expired). The function then returns
- * true.
+ * If entity is a queue, then the entity is no longer a candidate for
+ * next service according to the that definition, because entity is
+ * about to become the in-service queue. This function then returns
+ * true if entity is a queue.
  *
- * In contrast, the entity could stil be a candidate for next service
- * if it is not a queue, and has more than one child. In fact, even if
- * one of its children is about to be set in service, other children
- * may still be the next to serve. As a consequence, a non-queue
- * entity is not a candidate for next-service only if it has only one
- * child. And only if this condition holds, then the function returns
- * true for a non-queue entity.
+ * In contrast, entity could still be a candidate for next service if
+ * it is not a queue, and has more than one active child. In fact,
+ * even if one of its children is about to be set in service, other
+ * active children may still be the next to serve, for the parent
+ * entity, even according to the above definition. As a consequence, a
+ * non-queue entity is not a candidate for next-service only if it has
+ * only one active child. And only if this condition holds, then this
+ * function returns true for a non-queue entity.
  */
 static bool bfq_no_longer_next_in_service(struct bfq_entity *entity)
 {
@@ -213,6 +215,18 @@ static bool bfq_no_longer_next_in_service(struct bfq_entity *entity)
 
 	bfqg = container_of(entity, struct bfq_group, entity);
 
+	/*
+	 * The field active_entities does not always contain the
+	 * actual number of active children entities: it happens to
+	 * not account for the in-service entity in case the latter is
+	 * removed from its active tree (which may get done after
+	 * invoking the function bfq_no_longer_next_in_service in
+	 * bfq_get_next_queue). Fortunately, here, i.e., while
+	 * bfq_no_longer_next_in_service is not yet completed in
+	 * bfq_get_next_queue, bfq_active_extract has not yet been
+	 * invoked, and thus active_entities still coincides with the
+	 * actual number of active entities.
+	 */
 	if (bfqg->active_entities == 1)
 		return true;
 
@@ -954,7 +968,7 @@ static void bfq_update_fin_time_enqueue(struct bfq_entity *entity,
  * one of its children receives a new request.
  *
  * Basically, this function updates the timestamps of entity and
- * inserts entity into its active tree, ater possible extracting it
+ * inserts entity into its active tree, ater possibly extracting it
  * from its idle tree.
  */
 static void __bfq_activate_entity(struct bfq_entity *entity,
@@ -1048,7 +1062,7 @@ static void __bfq_requeue_entity(struct bfq_entity *entity)
 		entity->start = entity->finish;
 		/*
 		 * In addition, if the entity had more than one child
-		 * when set in service, then was not extracted from
+		 * when set in service, then it was not extracted from
 		 * the active tree. This implies that the position of
 		 * the entity in the active tree may need to be
 		 * changed now, because we have just updated the start
@@ -1056,9 +1070,8 @@ static void __bfq_requeue_entity(struct bfq_entity *entity)
 		 * time in a moment (the requeueing is then, more
 		 * precisely, a repositioning in this case). To
 		 * implement this repositioning, we: 1) dequeue the
-		 * entity here, 2) update the finish time and
-		 * requeue the entity according to the new
-		 * timestamps below.
+		 * entity here, 2) update the finish time and requeue
+		 * the entity according to the new timestamps below.
 		 */
 		if (entity->tree)
 			bfq_active_extract(st, entity);
@@ -1105,9 +1118,10 @@ static void __bfq_activate_requeue_entity(struct bfq_entity *entity,
 
 
 /**
- * bfq_activate_entity - activate or requeue an entity representing a bfq_queue,
- *			 and activate, requeue or reposition all ancestors
- *			 for which such an update becomes necessary.
+ * bfq_activate_requeue_entity - activate or requeue an entity representing a
+ *				 bfq_queue, and activate, requeue or reposition
+ *				 all ancestors for which such an update becomes
+ *				 necessary.
  * @entity: the entity to activate.
  * @non_blocking_wait_rq: true if this entity was waiting for a request
  * @requeue: true if this is a requeue, which implies that bfqq is
@@ -1135,9 +1149,9 @@ static void bfq_activate_requeue_entity(struct bfq_entity *entity,
  * @ins_into_idle_tree: if false, the entity will not be put into the
  *			idle tree.
  *
- * Deactivates an entity, independently from its previous state.  Must
+ * Deactivates an entity, independently of its previous state.  Must
  * be invoked only if entity is on a service tree. Extracts the entity
- * from that tree, and if necessary and allowed, puts it on the idle
+ * from that tree, and if necessary and allowed, puts it into the idle
  * tree.
  */
 bool __bfq_deactivate_entity(struct bfq_entity *entity, bool ins_into_idle_tree)
@@ -1179,7 +1193,7 @@ bool __bfq_deactivate_entity(struct bfq_entity *entity, bool ins_into_idle_tree)
 /**
  * bfq_deactivate_entity - deactivate an entity representing a bfq_queue.
  * @entity: the entity to deactivate.
- * @ins_into_idle_tree: true if the entity can be put on the idle tree
+ * @ins_into_idle_tree: true if the entity can be put into the idle tree
  */
 static void bfq_deactivate_entity(struct bfq_entity *entity,
 				  bool ins_into_idle_tree,
@@ -1210,16 +1224,29 @@ static void bfq_deactivate_entity(struct bfq_entity *entity,
 			 */
 			bfq_update_next_in_service(sd, NULL);
 
-		if (sd->next_in_service)
+		if (sd->next_in_service || sd->in_service_entity) {
 			/*
-			 * The parent entity is still backlogged,
-			 * because next_in_service is not NULL. So, no
-			 * further upwards deactivation must be
-			 * performed.  Yet, next_in_service has
-			 * changed.  Then the schedule does need to be
-			 * updated upwards.
+			 * The parent entity is still active, because
+			 * either next_in_service or in_service_entity
+			 * is not NULL. So, no further upwards
+			 * deactivation must be performed.  Yet,
+			 * next_in_service has changed.	Then the
+			 * schedule does need to be updated upwards.
+			 *
+			 * NOTE If in_service_entity is not NULL, then
+			 * next_in_service may happen to be NULL,
+			 * although the parent entity is evidently
+			 * active. This happens if 1) the entity
+			 * pointed by in_service_entity is the only
+			 * active entity in the parent entity, and 2)
+			 * according to the definition of
+			 * next_in_service, the in_service_entity
+			 * cannot be considered as
+			 * next_in_service. See the comments on the
+			 * definition of next_in_service for details.
 			 */
 			break;
+		}
 
 		/*
 		 * If we get here, then the parent is no more
@@ -1496,47 +1523,34 @@ struct bfq_queue *bfq_get_next_queue(struct bfq_data *bfqd)
 
 		/*
 		 * If entity is no longer a candidate for next
-		 * service, then we extract it from its active tree,
-		 * for the following reason. To further boost the
-		 * throughput in some special case, BFQ needs to know
-		 * which is the next candidate entity to serve, while
-		 * there is already an entity in service. In this
-		 * respect, to make it easy to compute/update the next
-		 * candidate entity to serve after the current
-		 * candidate has been set in service, there is a case
-		 * where it is necessary to extract the current
-		 * candidate from its service tree. Such a case is
-		 * when the entity just set in service cannot be also
-		 * a candidate for next service. Details about when
-		 * this conditions holds are reported in the comments
-		 * on the function bfq_no_longer_next_in_service()
-		 * invoked below.
+		 * service, then it must be extracted from its active
+		 * tree, so as to make sure that it won't be
+		 * considered when computing next_in_service. See the
+		 * comments on the function
+		 * bfq_no_longer_next_in_service() for details.
 		 */
 		if (bfq_no_longer_next_in_service(entity))
 			bfq_active_extract(bfq_entity_service_tree(entity),
 					   entity);
 
 		/*
-		 * For the same reason why we may have just extracted
-		 * entity from its active tree, we may need to update
-		 * next_in_service for the sched_data of entity too,
-		 * regardless of whether entity has been extracted.
-		 * In fact, even if entity has not been extracted, a
-		 * descendant entity may get extracted. Such an event
-		 * would cause a change in next_in_service for the
-		 * level of the descendant entity, and thus possibly
-		 * back to upper levels.
+		 * Even if entity is not to be extracted according to
+		 * the above check, a descendant entity may get
+		 * extracted in one of the next iterations of this
+		 * loop. Such an event could cause a change in
+		 * next_in_service for the level of the descendant
+		 * entity, and thus possibly back to this level.
 		 *
-		 * We cannot perform the resulting needed update
-		 * before the end of this loop, because, to know which
-		 * is the correct next-to-serve candidate entity for
-		 * each level, we need first to find the leaf entity
-		 * to set in service. In fact, only after we know
-		 * which is the next-to-serve leaf entity, we can
-		 * discover whether the parent entity of the leaf
-		 * entity becomes the next-to-serve, and so on.
+		 * However, we cannot perform the resulting needed
+		 * update of next_in_service for this level before the
+		 * end of the whole loop, because, to know which is
+		 * the correct next-to-serve candidate entity for each
+		 * level, we need first to find the leaf entity to set
+		 * in service. In fact, only after we know which is
+		 * the next-to-serve leaf entity, we can discover
+		 * whether the parent entity of the leaf entity
+		 * becomes the next-to-serve, and so on.
 		 */
-
 	}
 
 	bfqq = bfq_entity_to_bfqq(entity);

From cd5a6a4fdaba150089af2afc220eae0fef74878a Mon Sep 17 00:00:00 2001
From: "Rafael J. Wysocki" <rafael.j.wysocki@intel.com>
Date: Tue, 25 Jul 2017 23:58:50 +0200
Subject: [PATCH 040/281] USB: hcd: Mark secondary HCD as dead if the primary
 one died

Make usb_hc_died() clear the HCD_FLAG_RH_RUNNING flag for the shared
HCD and set HCD_FLAG_DEAD for it, in analogy with what is done for
the primary one.

Among other thigs, this prevents check_root_hub_suspended() from
returning -EBUSY for dead HCDs which helps to work around system
suspend issues in some situations.

This actually fixes occasional suspend failures on one of my test
machines.

Suggested-by: Alan Stern <stern@rowland.harvard.edu>
Signed-off-by: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
Acked-by: Alan Stern <stern@rowland.harvard.edu>
Cc: stable <stable@vger.kernel.org>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/usb/core/hcd.c | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/drivers/usb/core/hcd.c b/drivers/usb/core/hcd.c
index ab1bb3b538ac..e518f6438877 100644
--- a/drivers/usb/core/hcd.c
+++ b/drivers/usb/core/hcd.c
@@ -2485,6 +2485,8 @@ void usb_hc_died (struct usb_hcd *hcd)
 	}
 	if (usb_hcd_is_primary_hcd(hcd) && hcd->shared_hcd) {
 		hcd = hcd->shared_hcd;
+		clear_bit(HCD_FLAG_RH_RUNNING, &hcd->flags);
+		set_bit(HCD_FLAG_DEAD, &hcd->flags);
 		if (hcd->rh_registered) {
 			clear_bit(HCD_FLAG_POLL_RH, &hcd->flags);
 

From 89f23d51defcb94a5026d4b5da13faf4e1150a6f Mon Sep 17 00:00:00 2001
From: Alan Swanson <reiver@improbability.net>
Date: Wed, 26 Jul 2017 12:03:33 +0100
Subject: [PATCH 041/281] uas: Add US_FL_IGNORE_RESIDUE for Initio Corporation
 INIC-3069

Similar to commit d595259fbb7a ("usb-storage: Add ignore-residue quirk for
Initio INIC-3619") for INIC-3169 in unusual_devs.h but INIC-3069 already
present in unusual_uas.h. Both in same controller IC family.

Issue is that MakeMKV fails during key exchange with installed bluray drive
with following error:

002004:0000 Error 'Scsi error - ILLEGAL REQUEST:COPY PROTECTION KEY EXCHANGE FAILURE - KEY NOT ESTABLISHED'
occurred while issuing SCSI command AD010..080002400 to device 'SG:dev_11:0'

Signed-off-by: Alan Swanson <reiver@improbability.net>
Acked-by: Oliver Neukum <oneukum@suse.com>
Cc: stable <stable@vger.kernel.org>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/usb/storage/unusual_uas.h | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/drivers/usb/storage/unusual_uas.h b/drivers/usb/storage/unusual_uas.h
index cbea9f329e71..cde115359793 100644
--- a/drivers/usb/storage/unusual_uas.h
+++ b/drivers/usb/storage/unusual_uas.h
@@ -124,9 +124,9 @@ UNUSUAL_DEV(0x0bc2, 0xab2a, 0x0000, 0x9999,
 /* Reported-by: Benjamin Tissoires <benjamin.tissoires@redhat.com> */
 UNUSUAL_DEV(0x13fd, 0x3940, 0x0000, 0x9999,
 		"Initio Corporation",
-		"",
+		"INIC-3069",
 		USB_SC_DEVICE, USB_PR_DEVICE, NULL,
-		US_FL_NO_ATA_1X),
+		US_FL_NO_ATA_1X | US_FL_IGNORE_RESIDUE),
 
 /* Reported-by: Tom Arild Naess <tanaess@gmail.com> */
 UNUSUAL_DEV(0x152d, 0x0539, 0x0000, 0x9999,

From 8b52291a0743fc4db4a7495c846a6f31ee84d282 Mon Sep 17 00:00:00 2001
From: Alan Stern <stern@rowland.harvard.edu>
Date: Wed, 26 Jul 2017 11:49:19 -0400
Subject: [PATCH 042/281] usb-storage: fix deadlock involving host lock and
 scsi_done

Christoph Hellwig says that since version 4.12, the kernel switched to
using blk-mq by default.  The old code used a softirq for handling
request completions, but blk-mq can handle completions in the caller's
context.  This may cause a problem for usb-storage, because it invokes
the ->scsi_done callback while holding the host lock, and the
completion routine sometimes tries to acquire the same lock (when
running the error handler, for example).

The consequence is that the existing code will sometimes deadlock upon
error completion of a SCSI command (with a lockdep warning).

This is easy enough to fix, since usb-storage doesn't really need to
hold the host lock while the callback runs.  It was simpler to write
it that way, but moving the call outside the locked region is pretty
easy and there's no downside.  That's what this patch does.

Signed-off-by: Alan Stern <stern@rowland.harvard.edu>
Reported-and-tested-by: Arthur Marsh <arthur.marsh@internode.on.net>
CC: Christoph Hellwig <hch@lst.de>
CC: <stable@vger.kernel.org>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/usb/storage/usb.c | 18 ++++++++++++------
 1 file changed, 12 insertions(+), 6 deletions(-)

diff --git a/drivers/usb/storage/usb.c b/drivers/usb/storage/usb.c
index 06615934fed1..0dceb9fa3a06 100644
--- a/drivers/usb/storage/usb.c
+++ b/drivers/usb/storage/usb.c
@@ -315,6 +315,7 @@ static int usb_stor_control_thread(void * __us)
 {
 	struct us_data *us = (struct us_data *)__us;
 	struct Scsi_Host *host = us_to_host(us);
+	struct scsi_cmnd *srb;
 
 	for (;;) {
 		usb_stor_dbg(us, "*** thread sleeping\n");
@@ -330,6 +331,7 @@ static int usb_stor_control_thread(void * __us)
 		scsi_lock(host);
 
 		/* When we are called with no command pending, we're done */
+		srb = us->srb;
 		if (us->srb == NULL) {
 			scsi_unlock(host);
 			mutex_unlock(&us->dev_mutex);
@@ -398,14 +400,11 @@ static int usb_stor_control_thread(void * __us)
 		/* lock access to the state */
 		scsi_lock(host);
 
-		/* indicate that the command is done */
-		if (us->srb->result != DID_ABORT << 16) {
-			usb_stor_dbg(us, "scsi cmd done, result=0x%x\n",
-				     us->srb->result);
-			us->srb->scsi_done(us->srb);
-		} else {
+		/* was the command aborted? */
+		if (us->srb->result == DID_ABORT << 16) {
 SkipForAbort:
 			usb_stor_dbg(us, "scsi command aborted\n");
+			srb = NULL;	/* Don't call srb->scsi_done() */
 		}
 
 		/*
@@ -429,6 +428,13 @@ SkipForAbort:
 
 		/* unlock the device pointers */
 		mutex_unlock(&us->dev_mutex);
+
+		/* now that the locks are released, notify the SCSI core */
+		if (srb) {
+			usb_stor_dbg(us, "scsi cmd done, result=0x%x\n",
+					srb->result);
+			srb->scsi_done(srb);
+		}
 	} /* for (;;) */
 
 	/* Wait until we are told to stop */

From 2eac13624364db5b5e1666ae0bb3a4d36bc56b6e Mon Sep 17 00:00:00 2001
From: Bin Liu <b-liu@ti.com>
Date: Tue, 25 Jul 2017 09:31:33 -0500
Subject: [PATCH 043/281] usb: core: unlink urbs from the tail of the
 endpoint's urb_list

While unlink an urb, if the urb has been programmed in the controller,
the controller driver might do some hw related actions to tear down the
urb.

Currently usb_hcd_flush_endpoint() passes each urb from the head of the
endpoint's urb_list to the controller driver, which could make the
controller driver think each urb has been programmed and take the
unnecessary actions for each urb.

This patch changes the behavior in usb_hcd_flush_endpoint() to pass the
urbs from the tail of the list, to avoid any unnecessary actions in an
controller driver.

Cc: stable@vger.kernel.org # v4.4+
Acked-by: Alan Stern <stern@rowland.harvard.edu>
Signed-off-by: Bin Liu <b-liu@ti.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/usb/core/hcd.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/usb/core/hcd.c b/drivers/usb/core/hcd.c
index e518f6438877..7f277b092b5b 100644
--- a/drivers/usb/core/hcd.c
+++ b/drivers/usb/core/hcd.c
@@ -1888,7 +1888,7 @@ void usb_hcd_flush_endpoint(struct usb_device *udev,
 	/* No more submits can occur */
 	spin_lock_irq(&hcd_urb_list_lock);
 rescan:
-	list_for_each_entry (urb, &ep->urb_list, urb_list) {
+	list_for_each_entry_reverse(urb, &ep->urb_list, urb_list) {
 		int	is_in;
 
 		if (urb->unlinked)

From 45d73860530a14c608f410b91c6c341777bfa85d Mon Sep 17 00:00:00 2001
From: Bin Liu <b-liu@ti.com>
Date: Tue, 25 Jul 2017 09:31:34 -0500
Subject: [PATCH 044/281] usb: musb: fix tx fifo flush handling again

commit 68fe05e2a451 ("usb: musb: fix tx fifo flush handling") drops the
1ms delay trying to solve the long disconnect time issue when
application queued many tx urbs. However, the 1ms delay is needed for
some use cases, for example, without the delay, reconnecting AR9271 WIFI
dongle no longer works if the connection is dropped from the AP.

So let's add back the 1ms delay in musb_h_tx_flush_fifo(), and solve the
long disconnect time problem with a separate patch for
usb_hcd_flush_endpoint().

Cc: stable@vger.kernel.org # v4.4+
Signed-off-by: Bin Liu <b-liu@ti.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/usb/musb/musb_host.c | 1 +
 1 file changed, 1 insertion(+)

diff --git a/drivers/usb/musb/musb_host.c b/drivers/usb/musb/musb_host.c
index 76decb8011eb..3344ffd5bb13 100644
--- a/drivers/usb/musb/musb_host.c
+++ b/drivers/usb/musb/musb_host.c
@@ -139,6 +139,7 @@ static void musb_h_tx_flush_fifo(struct musb_hw_ep *ep)
 				"Could not flush host TX%d fifo: csr: %04x\n",
 				ep->epnum, csr))
 			return;
+		mdelay(1);
 	}
 }
 

From 37ef38f3f83891a2f413fb872bae7d0f9bb95b27 Mon Sep 17 00:00:00 2001
From: Timur Tabi <timur@codeaurora.org>
Date: Thu, 27 Jul 2017 16:15:52 -0500
Subject: [PATCH 045/281] tty: pl011: fix initialization order of QDF2400 E44

The work-around for Qualcomm Technologies QDF2400 Erratum 44 hinges on a
global variable defined in the pl011 driver.  The ACPI SPCR parsing code
determines whether the work-around is needed, and if so, it changes the
console name from "pl011" to "qdf2400_e44".  The expectation is that
the pl011 driver will implement the work-around when it sees the console
name.  The global variable qdf2400_e44_present is set when that happens.

The problem is that work-around needs to be enabled when the pl011
driver probes, not when the console name is queried.  However, sbsa_probe()
is called before pl011_console_match().  The work-around appeared to work
previously because the default console on QDF2400 platforms was always
ttyAMA1.  The first time sbsa_probe() is called (for ttyAMA0),
qdf2400_e44_present is still false.  Then pl011_console_match() is called,
and it sets qdf2400_e44_present to true.  All subsequent calls to
sbsa_probe() enable the work-around.

The solution is to move the global variable into spcr.c and let the
pl011 driver query it during probe time.  This works because all QDF2400
platforms require SPCR, so parse_spcr() will always be called.
pl011_console_match still checks for the "qdf2400_e44" console name,
but it doesn't do anything else special.

Fixes: 5a0722b898f8 ("tty: pl011: use "qdf2400_e44" as the earlycon name for QDF2400 E44")
Tested-by: Jeffrey Hugo <jhugo@codeaurora.org>
Signed-off-by: Timur Tabi <timur@codeaurora.org>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/acpi/spcr.c             | 36 ++++++++++++++++++++++++++++++--
 drivers/tty/serial/amba-pl011.c | 37 +++++++++++++++++----------------
 include/linux/acpi.h            |  1 +
 3 files changed, 54 insertions(+), 20 deletions(-)

diff --git a/drivers/acpi/spcr.c b/drivers/acpi/spcr.c
index 4ac3e06b41d8..98aa8c808a33 100644
--- a/drivers/acpi/spcr.c
+++ b/drivers/acpi/spcr.c
@@ -16,6 +16,16 @@
 #include <linux/kernel.h>
 #include <linux/serial_core.h>
 
+/*
+ * Erratum 44 for QDF2432v1 and QDF2400v1 SoCs describes the BUSY bit as
+ * occasionally getting stuck as 1. To avoid the potential for a hang, check
+ * TXFE == 0 instead of BUSY == 1. This may not be suitable for all UART
+ * implementations, so only do so if an affected platform is detected in
+ * parse_spcr().
+ */
+bool qdf2400_e44_present;
+EXPORT_SYMBOL(qdf2400_e44_present);
+
 /*
  * Some Qualcomm Datacenter Technologies SoCs have a defective UART BUSY bit.
  * Detect them by examining the OEM fields in the SPCR header, similiar to PCI
@@ -147,8 +157,30 @@ int __init parse_spcr(bool earlycon)
 		goto done;
 	}
 
-	if (qdf2400_erratum_44_present(&table->header))
-		uart = "qdf2400_e44";
+	/*
+	 * If the E44 erratum is required, then we need to tell the pl011
+	 * driver to implement the work-around.
+	 *
+	 * The global variable is used by the probe function when it
+	 * creates the UARTs, whether or not they're used as a console.
+	 *
+	 * If the user specifies "traditional" earlycon, the qdf2400_e44
+	 * console name matches the EARLYCON_DECLARE() statement, and
+	 * SPCR is not used.  Parameter "earlycon" is false.
+	 *
+	 * If the user specifies "SPCR" earlycon, then we need to update
+	 * the console name so that it also says "qdf2400_e44".  Parameter
+	 * "earlycon" is true.
+	 *
+	 * For consistency, if we change the console name, then we do it
+	 * for everyone, not just earlycon.
+	 */
+	if (qdf2400_erratum_44_present(&table->header)) {
+		qdf2400_e44_present = true;
+		if (earlycon)
+			uart = "qdf2400_e44";
+	}
+
 	if (xgene_8250_erratum_present(table))
 		iotype = "mmio32";
 
diff --git a/drivers/tty/serial/amba-pl011.c b/drivers/tty/serial/amba-pl011.c
index 8a857bb34fbb..1888d168a41c 100644
--- a/drivers/tty/serial/amba-pl011.c
+++ b/drivers/tty/serial/amba-pl011.c
@@ -142,15 +142,7 @@ static struct vendor_data vendor_sbsa = {
 	.fixed_options		= true,
 };
 
-/*
- * Erratum 44 for QDF2432v1 and QDF2400v1 SoCs describes the BUSY bit as
- * occasionally getting stuck as 1. To avoid the potential for a hang, check
- * TXFE == 0 instead of BUSY == 1. This may not be suitable for all UART
- * implementations, so only do so if an affected platform is detected in
- * parse_spcr().
- */
-static bool qdf2400_e44_present = false;
-
+#ifdef CONFIG_ACPI_SPCR_TABLE
 static struct vendor_data vendor_qdt_qdf2400_e44 = {
 	.reg_offset		= pl011_std_offsets,
 	.fr_busy		= UART011_FR_TXFE,
@@ -165,6 +157,7 @@ static struct vendor_data vendor_qdt_qdf2400_e44 = {
 	.always_enabled		= true,
 	.fixed_options		= true,
 };
+#endif
 
 static u16 pl011_st_offsets[REG_ARRAY_SIZE] = {
 	[REG_DR] = UART01x_DR,
@@ -2375,12 +2368,14 @@ static int __init pl011_console_match(struct console *co, char *name, int idx,
 	resource_size_t addr;
 	int i;
 
-	if (strcmp(name, "qdf2400_e44") == 0) {
-		pr_info_once("UART: Working around QDF2400 SoC erratum 44");
-		qdf2400_e44_present = true;
-	} else if (strcmp(name, "pl011") != 0) {
+	/*
+	 * Systems affected by the Qualcomm Technologies QDF2400 E44 erratum
+	 * have a distinct console name, so make sure we check for that.
+	 * The actual implementation of the erratum occurs in the probe
+	 * function.
+	 */
+	if ((strcmp(name, "qdf2400_e44") != 0) && (strcmp(name, "pl011") != 0))
 		return -ENODEV;
-	}
 
 	if (uart_parse_earlycon(options, &iotype, &addr, &options))
 		return -ENODEV;
@@ -2734,11 +2729,17 @@ static int sbsa_uart_probe(struct platform_device *pdev)
 	}
 	uap->port.irq	= ret;
 
-	uap->reg_offset	= vendor_sbsa.reg_offset;
-	uap->vendor	= qdf2400_e44_present ?
-					&vendor_qdt_qdf2400_e44 : &vendor_sbsa;
+#ifdef CONFIG_ACPI_SPCR_TABLE
+	if (qdf2400_e44_present) {
+		dev_info(&pdev->dev, "working around QDF2400 SoC erratum 44\n");
+		uap->vendor = &vendor_qdt_qdf2400_e44;
+	} else
+#endif
+		uap->vendor = &vendor_sbsa;
+
+	uap->reg_offset	= uap->vendor->reg_offset;
 	uap->fifosize	= 32;
-	uap->port.iotype = vendor_sbsa.access_32b ? UPIO_MEM32 : UPIO_MEM;
+	uap->port.iotype = uap->vendor->access_32b ? UPIO_MEM32 : UPIO_MEM;
 	uap->port.ops	= &sbsa_uart_pops;
 	uap->fixed_baud = baudrate;
 
diff --git a/include/linux/acpi.h b/include/linux/acpi.h
index c749eef1daa1..27b4b6615263 100644
--- a/include/linux/acpi.h
+++ b/include/linux/acpi.h
@@ -1209,6 +1209,7 @@ static inline bool acpi_has_watchdog(void) { return false; }
 #endif
 
 #ifdef CONFIG_ACPI_SPCR_TABLE
+extern bool qdf2400_e44_present;
 int parse_spcr(bool earlycon);
 #else
 static inline int parse_spcr(bool earlycon) { return 0; }

From cef988642cdac44e910a27cb6e8166c96f86a0df Mon Sep 17 00:00:00 2001
From: Ian Abbott <abbotti@mev.co.uk>
Date: Fri, 28 Jul 2017 16:22:31 +0100
Subject: [PATCH 046/281] staging: comedi: comedi_fops: do not call blocking
 ops when !TASK_RUNNING

Comedi's read and write file operation handlers (`comedi_read()` and
`comedi_write()`) currently call `copy_to_user()` or `copy_from_user()`
whilst in the `TASK_INTERRUPTIBLE` state, which falls foul of the
`might_fault()` checks when enabled.  Fix it by setting the current task
state back to `TASK_RUNNING` a bit earlier before calling these
functions.

Reported-by: Piotr Gregor <piotrgregor@rsyncme.org>
Signed-off-by: Ian Abbott <abbotti@mev.co.uk>
Cc: <stable@vger.kernel.org> # 4.5+
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/staging/comedi/comedi_fops.c | 3 +++
 1 file changed, 3 insertions(+)

diff --git a/drivers/staging/comedi/comedi_fops.c b/drivers/staging/comedi/comedi_fops.c
index ca11be21f64b..34ca7823255d 100644
--- a/drivers/staging/comedi/comedi_fops.c
+++ b/drivers/staging/comedi/comedi_fops.c
@@ -2396,6 +2396,7 @@ static ssize_t comedi_write(struct file *file, const char __user *buf,
 			continue;
 		}
 
+		set_current_state(TASK_RUNNING);
 		wp = async->buf_write_ptr;
 		n1 = min(n, async->prealloc_bufsz - wp);
 		n2 = n - n1;
@@ -2528,6 +2529,8 @@ static ssize_t comedi_read(struct file *file, char __user *buf, size_t nbytes,
 			}
 			continue;
 		}
+
+		set_current_state(TASK_RUNNING);
 		rp = async->buf_read_ptr;
 		n1 = min(n, async->prealloc_bufsz - rp);
 		n2 = n - n1;

From c542942cb42186f99b6d715a833c7afad359f48f Mon Sep 17 00:00:00 2001
From: Xiubo Li <lixiubo@cmss.chinamobile.com>
Date: Wed, 12 Jul 2017 15:51:17 +0800
Subject: [PATCH 047/281] tcmu: Fix possible to/from address overflow when
 doing the memcpy

For most case the sg->length equals to PAGE_SIZE, so this bug won't
be triggered. Otherwise this will crash the kernel, for example when
all segments' sg->length equal to 1K.

Signed-off-by: Xiubo Li <lixiubo@cmss.chinamobile.com>
Reviewed-by: Mike Christie <mchristi@redhat.com>
Signed-off-by: Nicholas Bellinger <nab@linux-iscsi.org>
---
 drivers/target/target_core_user.c | 11 +++++------
 1 file changed, 5 insertions(+), 6 deletions(-)

diff --git a/drivers/target/target_core_user.c b/drivers/target/target_core_user.c
index 80ee130f8253..db29b5cd0b95 100644
--- a/drivers/target/target_core_user.c
+++ b/drivers/target/target_core_user.c
@@ -563,8 +563,6 @@ static int scatter_data_area(struct tcmu_dev *udev,
 					block_remaining);
 			to_offset = get_block_offset_user(udev, dbi,
 					block_remaining);
-			offset = DATA_BLOCK_SIZE - block_remaining;
-			to += offset;
 
 			if (*iov_cnt != 0 &&
 			    to_offset == iov_tail(*iov)) {
@@ -575,8 +573,10 @@ static int scatter_data_area(struct tcmu_dev *udev,
 				(*iov)->iov_len = copy_bytes;
 			}
 			if (copy_data) {
-				memcpy(to, from + sg->length - sg_remaining,
-					copy_bytes);
+				offset = DATA_BLOCK_SIZE - block_remaining;
+				memcpy(to + offset,
+				       from + sg->length - sg_remaining,
+				       copy_bytes);
 				tcmu_flush_dcache_range(to, copy_bytes);
 			}
 			sg_remaining -= copy_bytes;
@@ -637,9 +637,8 @@ static void gather_data_area(struct tcmu_dev *udev, struct tcmu_cmd *cmd,
 			copy_bytes = min_t(size_t, sg_remaining,
 					block_remaining);
 			offset = DATA_BLOCK_SIZE - block_remaining;
-			from += offset;
 			tcmu_flush_dcache_range(from, copy_bytes);
-			memcpy(to + sg->length - sg_remaining, from,
+			memcpy(to + sg->length - sg_remaining, from + offset,
 					copy_bytes);
 
 			sg_remaining -= copy_bytes;

From ededd039d1b96035b23592c049efcae53922cfce Mon Sep 17 00:00:00 2001
From: "Bryant G. Ly" <bryantly@linux.vnet.ibm.com>
Date: Fri, 14 Jul 2017 08:11:04 -0500
Subject: [PATCH 048/281] tcmu: free old string on reconfig

On initial tcmu_configure_device call the info->name would
have already been allocated and set, so on the second call
make sure to free it first.

Reported-by: Mike Christie <mchristi@redhat.com>
Reviewed-by: Mike Christie <mchristi@redhat.com>
Signed-off-by: Bryant G. Ly <bryantly@linux.vnet.ibm.com>
Signed-off-by: Nicholas Bellinger <nab@linux-iscsi.org>
---
 drivers/target/target_core_user.c | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/drivers/target/target_core_user.c b/drivers/target/target_core_user.c
index db29b5cd0b95..942d094269fb 100644
--- a/drivers/target/target_core_user.c
+++ b/drivers/target/target_core_user.c
@@ -1432,6 +1432,8 @@ static int tcmu_update_uio_info(struct tcmu_dev *udev)
 	if (udev->dev_config[0])
 		snprintf(str + used, size - used, "/%s", udev->dev_config);
 
+	/* If the old string exists, free it */
+	kfree(info->name);
 	info->name = str;
 
 	return 0;

From 66b59f9b1f41ee61eb4862bbcfc1e7db5298ba9e Mon Sep 17 00:00:00 2001
From: Varun Prakash <varun@chelsio.com>
Date: Sun, 23 Jul 2017 20:03:07 +0530
Subject: [PATCH 049/281] cxgbit: add missing __kfree_skb()

Call __kfree_skb() after processing skb to avoid
memory leak.

Signed-off-by: Varun Prakash <varun@chelsio.com>
Signed-off-by: Nicholas Bellinger <nab@linux-iscsi.org>
---
 drivers/target/iscsi/cxgbit/cxgbit_cm.c | 16 ++++++++++++----
 1 file changed, 12 insertions(+), 4 deletions(-)

diff --git a/drivers/target/iscsi/cxgbit/cxgbit_cm.c b/drivers/target/iscsi/cxgbit/cxgbit_cm.c
index e583dd8a418b..d4fa41be80f9 100644
--- a/drivers/target/iscsi/cxgbit/cxgbit_cm.c
+++ b/drivers/target/iscsi/cxgbit/cxgbit_cm.c
@@ -1510,11 +1510,13 @@ cxgbit_pass_open_rpl(struct cxgbit_device *cdev, struct sk_buff *skb)
 
 	if (!cnp) {
 		pr_info("%s stid %d lookup failure\n", __func__, stid);
-		return;
+		goto rel_skb;
 	}
 
 	cxgbit_wake_up(&cnp->com.wr_wait, __func__, rpl->status);
 	cxgbit_put_cnp(cnp);
+rel_skb:
+	__kfree_skb(skb);
 }
 
 static void
@@ -1530,11 +1532,13 @@ cxgbit_close_listsrv_rpl(struct cxgbit_device *cdev, struct sk_buff *skb)
 
 	if (!cnp) {
 		pr_info("%s stid %d lookup failure\n", __func__, stid);
-		return;
+		goto rel_skb;
 	}
 
 	cxgbit_wake_up(&cnp->com.wr_wait, __func__, rpl->status);
 	cxgbit_put_cnp(cnp);
+rel_skb:
+	__kfree_skb(skb);
 }
 
 static void
@@ -1819,12 +1823,16 @@ static void cxgbit_set_tcb_rpl(struct cxgbit_device *cdev, struct sk_buff *skb)
 	struct tid_info *t = lldi->tids;
 
 	csk = lookup_tid(t, tid);
-	if (unlikely(!csk))
+	if (unlikely(!csk)) {
 		pr_err("can't find connection for tid %u.\n", tid);
-	else
+		goto rel_skb;
+	} else {
 		cxgbit_wake_up(&csk->com.wr_wait, __func__, rpl->status);
+	}
 
 	cxgbit_put_csk(csk);
+rel_skb:
+	__kfree_skb(skb);
 }
 
 static void cxgbit_rx_data(struct cxgbit_device *cdev, struct sk_buff *skb)

From ea8dc5b4cd2195ee582cae28afa4164c6dea1738 Mon Sep 17 00:00:00 2001
From: Varun Prakash <varun@chelsio.com>
Date: Sun, 23 Jul 2017 20:03:33 +0530
Subject: [PATCH 050/281] iscsi-target: fix memory leak in
 iscsit_setup_text_cmd()

On receiving text request iscsi-target allocates buffer for
payload in iscsit_handle_text_cmd() and assigns buffer pointer
to cmd->text_in_ptr, this buffer is currently freed in
iscsit_release_cmd(), if iscsi-target sets 'C' bit in text
response then it will receive another text request from the
initiator with ttt != 0xffffffff in this case iscsi-target
will find cmd using itt and call iscsit_setup_text_cmd()
which will set cmd->text_in_ptr to NULL without freeing
previously allocated buffer.

This patch fixes this issue by calling kfree(cmd->text_in_ptr)
in iscsit_setup_text_cmd() before assigning NULL to it.

For the first text request cmd->text_in_ptr is NULL as
cmd is memset to 0 in iscsit_allocate_cmd().

Signed-off-by: Varun Prakash <varun@chelsio.com>
Cc: <stable@vger.kernel.org> # 4.0+
Signed-off-by: Nicholas Bellinger <nab@linux-iscsi.org>
---
 drivers/target/iscsi/iscsi_target.c | 1 +
 1 file changed, 1 insertion(+)

diff --git a/drivers/target/iscsi/iscsi_target.c b/drivers/target/iscsi/iscsi_target.c
index 74e4975dd1b1..2688918b879a 100644
--- a/drivers/target/iscsi/iscsi_target.c
+++ b/drivers/target/iscsi/iscsi_target.c
@@ -2167,6 +2167,7 @@ iscsit_setup_text_cmd(struct iscsi_conn *conn, struct iscsi_cmd *cmd,
 	cmd->cmd_sn		= be32_to_cpu(hdr->cmdsn);
 	cmd->exp_stat_sn	= be32_to_cpu(hdr->exp_statsn);
 	cmd->data_direction	= DMA_NONE;
+	kfree(cmd->text_in_ptr);
 	cmd->text_in_ptr	= NULL;
 
 	return 0;

From 310d40a973c560a24c79f84cb5f16dc540a05686 Mon Sep 17 00:00:00 2001
From: Varun Prakash <varun@chelsio.com>
Date: Sun, 23 Jul 2017 20:03:45 +0530
Subject: [PATCH 051/281] iscsi-target: fix invalid flags in text response

In case of multiple text responses iscsi-target
sets both 'F' and 'C' bit for the final text response
pdu, this issue happens because hdr->flags is not
zeroed out before ORing with 'F' bit.

This patch removes the | operator to fix this issue.

Signed-off-by: Varun Prakash <varun@chelsio.com>
Signed-off-by: Nicholas Bellinger <nab@linux-iscsi.org>
---
 drivers/target/iscsi/iscsi_target.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/drivers/target/iscsi/iscsi_target.c b/drivers/target/iscsi/iscsi_target.c
index 2688918b879a..12803de99400 100644
--- a/drivers/target/iscsi/iscsi_target.c
+++ b/drivers/target/iscsi/iscsi_target.c
@@ -3488,9 +3488,9 @@ iscsit_build_text_rsp(struct iscsi_cmd *cmd, struct iscsi_conn *conn,
 		return text_length;
 
 	if (completed) {
-		hdr->flags |= ISCSI_FLAG_CMD_FINAL;
+		hdr->flags = ISCSI_FLAG_CMD_FINAL;
 	} else {
-		hdr->flags |= ISCSI_FLAG_TEXT_CONTINUE;
+		hdr->flags = ISCSI_FLAG_TEXT_CONTINUE;
 		cmd->read_data_done += text_length;
 		if (cmd->targ_xfer_tag == 0xFFFFFFFF)
 			cmd->targ_xfer_tag = session_get_next_ttt(conn->sess);

From d96adb9b076a12d30500347e2e667689062f44a0 Mon Sep 17 00:00:00 2001
From: Varun Prakash <varun@chelsio.com>
Date: Sat, 29 Jul 2017 21:01:49 +0530
Subject: [PATCH 052/281] cxgbit: fix sg_nents calculation

The current logic of calculating sg_nents can fail
if data_offset % PAGE_SIZE is not zero.

For example -
PAGE_SIZE = 4096
data_len = 3072
data_offset = 3072

As per current logic
sg_nents = max(1UL, DIV_ROUND_UP(data_len, PAGE_SIZE));
sg_nents = max(1UL, DIV_ROUND_UP(3072, 4096));
sg_nents = 1

But as data_offset % PAGE_SIZE = 3072 we should skip 3072 bytes
skip = 3K
sg_nents = max(1UL, DIV_ROUND_UP(3K(skip) + 3K(data_len), 4K(PAGE_SIZE));
sg_nents = 2;

This patch fixes this issue by adding skip to data_len.

Signed-off-by: Varun Prakash <varun@chelsio.com>
Signed-off-by: Nicholas Bellinger <nab@linux-iscsi.org>
---
 drivers/target/iscsi/cxgbit/cxgbit_target.c | 12 +++++++-----
 1 file changed, 7 insertions(+), 5 deletions(-)

diff --git a/drivers/target/iscsi/cxgbit/cxgbit_target.c b/drivers/target/iscsi/cxgbit/cxgbit_target.c
index dda13f1af38e..514986b57c2d 100644
--- a/drivers/target/iscsi/cxgbit/cxgbit_target.c
+++ b/drivers/target/iscsi/cxgbit/cxgbit_target.c
@@ -827,7 +827,7 @@ cxgbit_put_login_tx(struct iscsi_conn *conn, struct iscsi_login *login,
 
 static void
 cxgbit_skb_copy_to_sg(struct sk_buff *skb, struct scatterlist *sg,
-		      unsigned int nents)
+		      unsigned int nents, u32 skip)
 {
 	struct skb_seq_state st;
 	const u8 *buf;
@@ -846,7 +846,7 @@ cxgbit_skb_copy_to_sg(struct sk_buff *skb, struct scatterlist *sg,
 		}
 
 		consumed += sg_pcopy_from_buffer(sg, nents, (void *)buf,
-						 buf_len, consumed);
+						 buf_len, skip + consumed);
 	}
 }
 
@@ -912,7 +912,7 @@ cxgbit_handle_immediate_data(struct iscsi_cmd *cmd, struct iscsi_scsi_req *hdr,
 		struct scatterlist *sg = &cmd->se_cmd.t_data_sg[0];
 		u32 sg_nents = max(1UL, DIV_ROUND_UP(pdu_cb->dlen, PAGE_SIZE));
 
-		cxgbit_skb_copy_to_sg(csk->skb, sg, sg_nents);
+		cxgbit_skb_copy_to_sg(csk->skb, sg, sg_nents, 0);
 	}
 
 	cmd->write_data_done += pdu_cb->dlen;
@@ -1069,11 +1069,13 @@ static int cxgbit_handle_iscsi_dataout(struct cxgbit_sock *csk)
 		  cmd->se_cmd.data_length);
 
 	if (!(pdu_cb->flags & PDUCBF_RX_DATA_DDPD)) {
+		u32 skip = data_offset % PAGE_SIZE;
+
 		sg_off = data_offset / PAGE_SIZE;
 		sg_start = &cmd->se_cmd.t_data_sg[sg_off];
-		sg_nents = max(1UL, DIV_ROUND_UP(data_len, PAGE_SIZE));
+		sg_nents = max(1UL, DIV_ROUND_UP(skip + data_len, PAGE_SIZE));
 
-		cxgbit_skb_copy_to_sg(csk->skb, sg_start, sg_nents);
+		cxgbit_skb_copy_to_sg(csk->skb, sg_start, sg_nents, skip);
 	}
 
 check_payload:

From 6bcbb3174caa5f1ccc894f8ae077631659d5a629 Mon Sep 17 00:00:00 2001
From: Nicholas Bellinger <nab@linux-iscsi.org>
Date: Fri, 30 Jun 2017 00:08:13 -0700
Subject: [PATCH 053/281] qla2xxx: Fix incorrect tcm_qla2xxx_free_cmd use
 during TMR ABORT (v2)

This patch drops two incorrect usages of tcm_qla2xxx_free_cmd()
during TMR ABORT within tcm_qla2xxx_handle_data_work() and
tcm_qla2xxx_aborted_task(), which where attempting to dispatch
into workqueue context to do tcm_qla2xxx_complete_free() and
subsequently invoke transport_generic_free_cmd().

This is incorrect because during TMR ABORT target-core will
drop the outstanding se_cmd->cmd_kref references once it has
quiesced the se_cmd via transport_wait_for_tasks(), and in
the case of qla2xxx it should not attempt to do it's own
transport_generic_free_cmd() once the abort has occured.

As reported by Pascal, this was originally manifesting as a
BUG_ON(cmd->cmd_in_wq) in qlt_free_cmd() during TMR ABORT,
with a LIO backend that had sufficently high enough WRITE
latency to trigger a host side TMR ABORT_TASK.

(v2: Drop the qla_tgt_cmd->write_pending_abort_comp changes,
     as they will be addressed in a seperate series)

Reported-by: Pascal de Bruijn <p.debruijn@unilogic.nl>
Tested-by: Pascal de Bruijn <p.debruijn@unilogic.nl>
Cc: Pascal de Bruijn <p.debruijn@unilogic.nl>
Reported-by: Lukasz Engel <lukasz.engel@softax.pl>
Cc: Lukasz Engel <lukasz.engel@softax.pl>
Acked-by: Himanshu Madhani <himanshu.madhani@cavium.com>
Cc: Quinn Tran <quinn.tran@cavium.com>
Cc: <stable@vger.kernel.org> # 3.10+
Signed-off-by: Nicholas Bellinger <nab@linux-iscsi.org>
---
 drivers/scsi/qla2xxx/tcm_qla2xxx.c | 30 ------------------------------
 1 file changed, 30 deletions(-)

diff --git a/drivers/scsi/qla2xxx/tcm_qla2xxx.c b/drivers/scsi/qla2xxx/tcm_qla2xxx.c
index b20da0d27ad7..3f82ea1b72dc 100644
--- a/drivers/scsi/qla2xxx/tcm_qla2xxx.c
+++ b/drivers/scsi/qla2xxx/tcm_qla2xxx.c
@@ -500,7 +500,6 @@ static int tcm_qla2xxx_handle_cmd(scsi_qla_host_t *vha, struct qla_tgt_cmd *cmd,
 static void tcm_qla2xxx_handle_data_work(struct work_struct *work)
 {
 	struct qla_tgt_cmd *cmd = container_of(work, struct qla_tgt_cmd, work);
-	unsigned long flags;
 
 	/*
 	 * Ensure that the complete FCP WRITE payload has been received.
@@ -508,17 +507,6 @@ static void tcm_qla2xxx_handle_data_work(struct work_struct *work)
 	 */
 	cmd->cmd_in_wq = 0;
 
-	spin_lock_irqsave(&cmd->cmd_lock, flags);
-	cmd->data_work = 1;
-	if (cmd->aborted) {
-		cmd->data_work_free = 1;
-		spin_unlock_irqrestore(&cmd->cmd_lock, flags);
-
-		tcm_qla2xxx_free_cmd(cmd);
-		return;
-	}
-	spin_unlock_irqrestore(&cmd->cmd_lock, flags);
-
 	cmd->qpair->tgt_counters.qla_core_ret_ctio++;
 	if (!cmd->write_data_transferred) {
 		/*
@@ -765,31 +753,13 @@ static void tcm_qla2xxx_queue_tm_rsp(struct se_cmd *se_cmd)
 	qlt_xmit_tm_rsp(mcmd);
 }
 
-#define DATA_WORK_NOT_FREE(_cmd) (_cmd->data_work && !_cmd->data_work_free)
 static void tcm_qla2xxx_aborted_task(struct se_cmd *se_cmd)
 {
 	struct qla_tgt_cmd *cmd = container_of(se_cmd,
 				struct qla_tgt_cmd, se_cmd);
-	unsigned long flags;
 
 	if (qlt_abort_cmd(cmd))
 		return;
-
-	spin_lock_irqsave(&cmd->cmd_lock, flags);
-	if ((cmd->state == QLA_TGT_STATE_NEW)||
-	    ((cmd->state == QLA_TGT_STATE_DATA_IN) &&
-		DATA_WORK_NOT_FREE(cmd))) {
-		cmd->data_work_free = 1;
-		spin_unlock_irqrestore(&cmd->cmd_lock, flags);
-		/*
-		 * cmd has not reached fw, Use this trigger to free it.
-		 */
-		tcm_qla2xxx_free_cmd(cmd);
-		return;
-	}
-	spin_unlock_irqrestore(&cmd->cmd_lock, flags);
-	return;
-
 }
 
 static void tcm_qla2xxx_clear_sess_lookup(struct tcm_qla2xxx_lport *,

From 54e22f265e872ae140755b3318521d400a094605 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Linus=20L=C3=BCssing?= <linus.luessing@c0d3.blue>
Date: Thu, 6 Jul 2017 07:02:25 +0200
Subject: [PATCH 054/281] batman-adv: fix TT sync flag inconsistencies
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

This patch fixes an issue in the translation table code potentially
leading to a TT Request + Response storm. The issue may occur for nodes
involving BLA and an inconsistent configuration of the batman-adv AP
isolation feature. However, since the new multicast optimizations, a
single, malformed packet may lead to a mesh-wide, persistent
Denial-of-Service, too.

The issue occurs because nodes are currently OR-ing the TT sync flags of
all originators announcing a specific MAC address via the
translation table. When an intermediate node now receives a TT Request
and wants to answer this on behalf of the destination node, then this
intermediate node now responds with an altered flag field and broken
CRC. The next OGM of the real destination will lead to a CRC mismatch
and triggering a TT Request and Response again.

Furthermore, the OR-ing is currently never undone as long as at least
one originator announcing the according MAC address remains, leading to
the potential persistency of this issue.

This patch fixes this issue by storing the flags used in the CRC
calculation on a a per TT orig entry basis to be able to respond with
the correct, original flags in an intermediate TT Response for one
thing. And to be able to correctly unset sync flags once all nodes
announcing a sync flag vanish for another.

Fixes: e9c00136a475 ("batman-adv: fix tt_global_entries flags update")
Signed-off-by: Linus Lüssing <linus.luessing@c0d3.blue>
Acked-by: Antonio Quartulli <a@unstable.cc>
[sw: typo in commit message]
Signed-off-by: Simon Wunderlich <sw@simonwunderlich.de>
---
 net/batman-adv/translation-table.c | 60 +++++++++++++++++++++++++-----
 net/batman-adv/types.h             |  2 +
 2 files changed, 53 insertions(+), 9 deletions(-)

diff --git a/net/batman-adv/translation-table.c b/net/batman-adv/translation-table.c
index e1133bc634b5..8a3ce79b1307 100644
--- a/net/batman-adv/translation-table.c
+++ b/net/batman-adv/translation-table.c
@@ -1549,9 +1549,41 @@ batadv_tt_global_entry_has_orig(const struct batadv_tt_global_entry *entry,
 	return found;
 }
 
+/**
+ * batadv_tt_global_sync_flags - update TT sync flags
+ * @tt_global: the TT global entry to update sync flags in
+ *
+ * Updates the sync flag bits in the tt_global flag attribute with a logical
+ * OR of all sync flags from any of its TT orig entries.
+ */
+static void
+batadv_tt_global_sync_flags(struct batadv_tt_global_entry *tt_global)
+{
+	struct batadv_tt_orig_list_entry *orig_entry;
+	const struct hlist_head *head;
+	u16 flags = BATADV_NO_FLAGS;
+
+	rcu_read_lock();
+	head = &tt_global->orig_list;
+	hlist_for_each_entry_rcu(orig_entry, head, list)
+		flags |= orig_entry->flags;
+	rcu_read_unlock();
+
+	flags |= tt_global->common.flags & (~BATADV_TT_SYNC_MASK);
+	tt_global->common.flags = flags;
+}
+
+/**
+ * batadv_tt_global_orig_entry_add - add or update a TT orig entry
+ * @tt_global: the TT global entry to add an orig entry in
+ * @orig_node: the originator to add an orig entry for
+ * @ttvn: translation table version number of this changeset
+ * @flags: TT sync flags
+ */
 static void
 batadv_tt_global_orig_entry_add(struct batadv_tt_global_entry *tt_global,
-				struct batadv_orig_node *orig_node, int ttvn)
+				struct batadv_orig_node *orig_node, int ttvn,
+				u8 flags)
 {
 	struct batadv_tt_orig_list_entry *orig_entry;
 
@@ -1561,7 +1593,8 @@ batadv_tt_global_orig_entry_add(struct batadv_tt_global_entry *tt_global,
 		 * was added during a "temporary client detection"
 		 */
 		orig_entry->ttvn = ttvn;
-		goto out;
+		orig_entry->flags = flags;
+		goto sync_flags;
 	}
 
 	orig_entry = kmem_cache_zalloc(batadv_tt_orig_cache, GFP_ATOMIC);
@@ -1573,6 +1606,7 @@ batadv_tt_global_orig_entry_add(struct batadv_tt_global_entry *tt_global,
 	batadv_tt_global_size_inc(orig_node, tt_global->common.vid);
 	orig_entry->orig_node = orig_node;
 	orig_entry->ttvn = ttvn;
+	orig_entry->flags = flags;
 	kref_init(&orig_entry->refcount);
 
 	spin_lock_bh(&tt_global->list_lock);
@@ -1582,6 +1616,8 @@ batadv_tt_global_orig_entry_add(struct batadv_tt_global_entry *tt_global,
 	spin_unlock_bh(&tt_global->list_lock);
 	atomic_inc(&tt_global->orig_list_count);
 
+sync_flags:
+	batadv_tt_global_sync_flags(tt_global);
 out:
 	if (orig_entry)
 		batadv_tt_orig_list_entry_put(orig_entry);
@@ -1703,10 +1739,10 @@ static bool batadv_tt_global_add(struct batadv_priv *bat_priv,
 		}
 
 		/* the change can carry possible "attribute" flags like the
-		 * TT_CLIENT_WIFI, therefore they have to be copied in the
+		 * TT_CLIENT_TEMP, therefore they have to be copied in the
 		 * client entry
 		 */
-		common->flags |= flags;
+		common->flags |= flags & (~BATADV_TT_SYNC_MASK);
 
 		/* If there is the BATADV_TT_CLIENT_ROAM flag set, there is only
 		 * one originator left in the list and we previously received a
@@ -1723,7 +1759,8 @@ static bool batadv_tt_global_add(struct batadv_priv *bat_priv,
 	}
 add_orig_entry:
 	/* add the new orig_entry (if needed) or update it */
-	batadv_tt_global_orig_entry_add(tt_global_entry, orig_node, ttvn);
+	batadv_tt_global_orig_entry_add(tt_global_entry, orig_node, ttvn,
+					flags & BATADV_TT_SYNC_MASK);
 
 	batadv_dbg(BATADV_DBG_TT, bat_priv,
 		   "Creating new global tt entry: %pM (vid: %d, via %pM)\n",
@@ -1946,6 +1983,7 @@ batadv_tt_global_dump_subentry(struct sk_buff *msg, u32 portid, u32 seq,
 			       struct batadv_tt_orig_list_entry *orig,
 			       bool best)
 {
+	u16 flags = (common->flags & (~BATADV_TT_SYNC_MASK)) | orig->flags;
 	void *hdr;
 	struct batadv_orig_node_vlan *vlan;
 	u8 last_ttvn;
@@ -1975,7 +2013,7 @@ batadv_tt_global_dump_subentry(struct sk_buff *msg, u32 portid, u32 seq,
 	    nla_put_u8(msg, BATADV_ATTR_TT_LAST_TTVN, last_ttvn) ||
 	    nla_put_u32(msg, BATADV_ATTR_TT_CRC32, crc) ||
 	    nla_put_u16(msg, BATADV_ATTR_TT_VID, common->vid) ||
-	    nla_put_u32(msg, BATADV_ATTR_TT_FLAGS, common->flags))
+	    nla_put_u32(msg, BATADV_ATTR_TT_FLAGS, flags))
 		goto nla_put_failure;
 
 	if (best && nla_put_flag(msg, BATADV_ATTR_FLAG_BEST))
@@ -2589,6 +2627,7 @@ static u32 batadv_tt_global_crc(struct batadv_priv *bat_priv,
 				unsigned short vid)
 {
 	struct batadv_hashtable *hash = bat_priv->tt.global_hash;
+	struct batadv_tt_orig_list_entry *tt_orig;
 	struct batadv_tt_common_entry *tt_common;
 	struct batadv_tt_global_entry *tt_global;
 	struct hlist_head *head;
@@ -2627,8 +2666,9 @@ static u32 batadv_tt_global_crc(struct batadv_priv *bat_priv,
 			/* find out if this global entry is announced by this
 			 * originator
 			 */
-			if (!batadv_tt_global_entry_has_orig(tt_global,
-							     orig_node))
+			tt_orig = batadv_tt_global_orig_entry_find(tt_global,
+								   orig_node);
+			if (!tt_orig)
 				continue;
 
 			/* use network order to read the VID: this ensures that
@@ -2640,10 +2680,12 @@ static u32 batadv_tt_global_crc(struct batadv_priv *bat_priv,
 			/* compute the CRC on flags that have to be kept in sync
 			 * among nodes
 			 */
-			flags = tt_common->flags & BATADV_TT_SYNC_MASK;
+			flags = tt_orig->flags;
 			crc_tmp = crc32c(crc_tmp, &flags, sizeof(flags));
 
 			crc ^= crc32c(crc_tmp, tt_common->addr, ETH_ALEN);
+
+			batadv_tt_orig_list_entry_put(tt_orig);
 		}
 		rcu_read_unlock();
 	}
diff --git a/net/batman-adv/types.h b/net/batman-adv/types.h
index ea43a6449247..a62795868794 100644
--- a/net/batman-adv/types.h
+++ b/net/batman-adv/types.h
@@ -1260,6 +1260,7 @@ struct batadv_tt_global_entry {
  * struct batadv_tt_orig_list_entry - orig node announcing a non-mesh client
  * @orig_node: pointer to orig node announcing this non-mesh client
  * @ttvn: translation table version number which added the non-mesh client
+ * @flags: per orig entry TT sync flags
  * @list: list node for batadv_tt_global_entry::orig_list
  * @refcount: number of contexts the object is used
  * @rcu: struct used for freeing in an RCU-safe manner
@@ -1267,6 +1268,7 @@ struct batadv_tt_global_entry {
 struct batadv_tt_orig_list_entry {
 	struct batadv_orig_node *orig_node;
 	u8 ttvn;
+	u8 flags;
 	struct hlist_node list;
 	struct kref refcount;
 	struct rcu_head rcu;

From b962e2cd357f2a529fe14387092401e6552f43d6 Mon Sep 17 00:00:00 2001
From: Shawn Guo <shawn.guo@linaro.org>
Date: Sat, 1 Jul 2017 11:45:36 +0800
Subject: [PATCH 055/281] pinctrl: zte: fix dereference of 'data' in
 zx_set_mux()

It fixes the following Smatch complaint:

drivers/pinctrl/zte/pinctrl-zx.c:76 zx_set_mux()
warn: variable dereferenced before check 'data' (see line 67)

Reported-by: Dan Carpenter <dan.carpenter@oracle.com>
Fixes: cbff0c4d27f4 ("pinctrl: add ZTE ZX pinctrl driver support")
Signed-off-by: Shawn Guo <shawn.guo@linaro.org>
Signed-off-by: Linus Walleij <linus.walleij@linaro.org>
---
 drivers/pinctrl/zte/pinctrl-zx.c | 11 +++++++----
 1 file changed, 7 insertions(+), 4 deletions(-)

diff --git a/drivers/pinctrl/zte/pinctrl-zx.c b/drivers/pinctrl/zte/pinctrl-zx.c
index 787e3967bd5c..f828ee340a98 100644
--- a/drivers/pinctrl/zte/pinctrl-zx.c
+++ b/drivers/pinctrl/zte/pinctrl-zx.c
@@ -64,10 +64,8 @@ static int zx_set_mux(struct pinctrl_dev *pctldev, unsigned int func_selector,
 	struct zx_pinctrl_soc_info *info = zpctl->info;
 	const struct pinctrl_pin_desc *pindesc = info->pins + group_selector;
 	struct zx_pin_data *data = pindesc->drv_data;
-	struct zx_mux_desc *mux = data->muxes;
-	u32 mask = (1 << data->width) - 1;
-	u32 offset = data->offset;
-	u32 bitpos = data->bitpos;
+	struct zx_mux_desc *mux;
+	u32 mask, offset, bitpos;
 	struct function_desc *func;
 	unsigned long flags;
 	u32 val, mval;
@@ -76,6 +74,11 @@ static int zx_set_mux(struct pinctrl_dev *pctldev, unsigned int func_selector,
 	if (!data)
 		return -EINVAL;
 
+	mux = data->muxes;
+	mask = (1 << data->width) - 1;
+	offset = data->offset;
+	bitpos = data->bitpos;
+
 	func = pinmux_generic_get_function(pctldev, func_selector);
 	if (!func)
 		return -EINVAL;

From 65ff135b4f748f7d5f0d6a21b860e88a0d1fb7c5 Mon Sep 17 00:00:00 2001
From: Kunihiko Hayashi <hayashi.kunihiko@socionext.com>
Date: Thu, 6 Jul 2017 15:54:23 +0900
Subject: [PATCH 056/281] pinctrl: uniphier: fix USB3 pin assignment for Pro4

According to pinctrl assignment for Pro4, each definition of USB#2 and
USB#3 are as follows.

  184: USB2VBUS
  185: USB2OD
  186: USB2ID
  187: USB3VBUS
  188: USB3OD

USB#2 has an additional pin "USB2ID", but the chip doesn't use this pin
while in host-mode. Considering this pin, the pin definitions for USB#3
should be {187, 188}.

Signed-off-by: Kunihiko Hayashi <hayashi.kunihiko@socionext.com>
Acked-by: Masahiro Yamada <yamada.masahiro@socionext.com>
Signed-off-by: Linus Walleij <linus.walleij@linaro.org>
---
 drivers/pinctrl/uniphier/pinctrl-uniphier-pro4.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/pinctrl/uniphier/pinctrl-uniphier-pro4.c b/drivers/pinctrl/uniphier/pinctrl-uniphier-pro4.c
index a433a306a2d0..c75e094b2d90 100644
--- a/drivers/pinctrl/uniphier/pinctrl-uniphier-pro4.c
+++ b/drivers/pinctrl/uniphier/pinctrl-uniphier-pro4.c
@@ -1084,7 +1084,7 @@ static const unsigned usb1_pins[] = {182, 183};
 static const int usb1_muxvals[] = {0, 0};
 static const unsigned usb2_pins[] = {184, 185};
 static const int usb2_muxvals[] = {0, 0};
-static const unsigned usb3_pins[] = {186, 187};
+static const unsigned usb3_pins[] = {187, 188};
 static const int usb3_muxvals[] = {0, 0};
 static const unsigned port_range0_pins[] = {
 	300, 301, 302, 303, 304, 305, 306, 307,		/* PORT0x */

From c64ffff7a9d1eee6624d3eaab36968fe6df31a9f Mon Sep 17 00:00:00 2001
From: Andy Shevchenko <andriy.shevchenko@linux.intel.com>
Date: Mon, 17 Jul 2017 17:13:28 +0300
Subject: [PATCH 057/281] i2c: core: Allow empty id_table in ACPI case as well

For now empty ID table is not allowed with ACPI and prevents driver to
be probed.

Add a check to allow empty ID table.

This introduces a helper i2c_acpi_match_device().

Note, we rename some static function in i2c-core-acpi.c to distinguish
with public API.

Fixes: da10c06a044b ("i2c: Make I2C ID tables non-mandatory for DT'ed devices")
Signed-off-by: Andy Shevchenko <andriy.shevchenko@linux.intel.com>
Tested-by: Rajmohan Mani <rajmohan.mani@intel.com>
Acked-by: Mika Westerberg <mika.westerberg@linux.intel.com>
[wsa: needed to get some drivers probed again]
Signed-off-by: Wolfram Sang <wsa@the-dreams.de>
---
 drivers/i2c/i2c-core-acpi.c | 19 +++++++++++++++----
 drivers/i2c/i2c-core-base.c |  1 +
 drivers/i2c/i2c-core.h      |  9 +++++++++
 3 files changed, 25 insertions(+), 4 deletions(-)

diff --git a/drivers/i2c/i2c-core-acpi.c b/drivers/i2c/i2c-core-acpi.c
index 4842ec3a5451..a9126b3cda61 100644
--- a/drivers/i2c/i2c-core-acpi.c
+++ b/drivers/i2c/i2c-core-acpi.c
@@ -230,6 +230,16 @@ void i2c_acpi_register_devices(struct i2c_adapter *adap)
 		dev_warn(&adap->dev, "failed to enumerate I2C slaves\n");
 }
 
+const struct acpi_device_id *
+i2c_acpi_match_device(const struct acpi_device_id *matches,
+		      struct i2c_client *client)
+{
+	if (!(client && matches))
+		return NULL;
+
+	return acpi_match_device(matches, &client->dev);
+}
+
 static acpi_status i2c_acpi_lookup_speed(acpi_handle handle, u32 level,
 					   void *data, void **return_value)
 {
@@ -289,7 +299,7 @@ u32 i2c_acpi_find_bus_speed(struct device *dev)
 }
 EXPORT_SYMBOL_GPL(i2c_acpi_find_bus_speed);
 
-static int i2c_acpi_match_adapter(struct device *dev, void *data)
+static int i2c_acpi_find_match_adapter(struct device *dev, void *data)
 {
 	struct i2c_adapter *adapter = i2c_verify_adapter(dev);
 
@@ -299,7 +309,7 @@ static int i2c_acpi_match_adapter(struct device *dev, void *data)
 	return ACPI_HANDLE(dev) == (acpi_handle)data;
 }
 
-static int i2c_acpi_match_device(struct device *dev, void *data)
+static int i2c_acpi_find_match_device(struct device *dev, void *data)
 {
 	return ACPI_COMPANION(dev) == data;
 }
@@ -309,7 +319,7 @@ static struct i2c_adapter *i2c_acpi_find_adapter_by_handle(acpi_handle handle)
 	struct device *dev;
 
 	dev = bus_find_device(&i2c_bus_type, NULL, handle,
-			      i2c_acpi_match_adapter);
+			      i2c_acpi_find_match_adapter);
 	return dev ? i2c_verify_adapter(dev) : NULL;
 }
 
@@ -317,7 +327,8 @@ static struct i2c_client *i2c_acpi_find_client_by_adev(struct acpi_device *adev)
 {
 	struct device *dev;
 
-	dev = bus_find_device(&i2c_bus_type, NULL, adev, i2c_acpi_match_device);
+	dev = bus_find_device(&i2c_bus_type, NULL, adev,
+			      i2c_acpi_find_match_device);
 	return dev ? i2c_verify_client(dev) : NULL;
 }
 
diff --git a/drivers/i2c/i2c-core-base.c b/drivers/i2c/i2c-core-base.c
index c89dac7fd2e7..12822a4b8f8f 100644
--- a/drivers/i2c/i2c-core-base.c
+++ b/drivers/i2c/i2c-core-base.c
@@ -357,6 +357,7 @@ static int i2c_device_probe(struct device *dev)
 	 * Tree match table entry is supplied for the probing device.
 	 */
 	if (!driver->id_table &&
+	    !i2c_acpi_match_device(dev->driver->acpi_match_table, client) &&
 	    !i2c_of_match_device(dev->driver->of_match_table, client))
 		return -ENODEV;
 
diff --git a/drivers/i2c/i2c-core.h b/drivers/i2c/i2c-core.h
index 3b63f5e5b89c..3d3d9bf02101 100644
--- a/drivers/i2c/i2c-core.h
+++ b/drivers/i2c/i2c-core.h
@@ -31,9 +31,18 @@ int i2c_check_addr_validity(unsigned addr, unsigned short flags);
 int i2c_check_7bit_addr_validity_strict(unsigned short addr);
 
 #ifdef CONFIG_ACPI
+const struct acpi_device_id *
+i2c_acpi_match_device(const struct acpi_device_id *matches,
+		      struct i2c_client *client);
 void i2c_acpi_register_devices(struct i2c_adapter *adap);
 #else /* CONFIG_ACPI */
 static inline void i2c_acpi_register_devices(struct i2c_adapter *adap) { }
+static inline const struct acpi_device_id *
+i2c_acpi_match_device(const struct acpi_device_id *matches,
+		      struct i2c_client *client)
+{
+	return NULL;
+}
 #endif /* CONFIG_ACPI */
 extern struct notifier_block i2c_acpi_notifier;
 

From d81ece747d8727bb8b1cfc9a20dbe62f09a4e35a Mon Sep 17 00:00:00 2001
From: Icenowy Zheng <icenowy@aosc.io>
Date: Sat, 22 Jul 2017 10:50:53 +0800
Subject: [PATCH 058/281] pinctrl: sunxi: add a missing function of A10/A20
 pinctrl driver

The PH16 pin has a function with mux id 0x5, which is the DET pin of the
"sim" (smart card reader) IP block.

This function is missing in old versions of A10/A20 SoCs' datasheets and
user manuals, so it's also missing in the old drivers. The newest A10
Datasheet V1.70 and A20 Datasheet V1.41 contain this pin function, and
it's discovered during implementing R40 pinctrl driver.

Add it to the driver. As we now merged A20 pinctrl driver to the A10
one, we need to only fix the A10 driver now.

Fixes: f2821b1ca3a2 ("pinctrl: sunxi: Move Allwinner A10 pinctrl
driver to a driver of its own")

Signed-off-by: Icenowy Zheng <icenowy@aosc.io>
Reviewed-by: Chen-Yu Tsai <wens@csie.org>
Signed-off-by: Linus Walleij <linus.walleij@linaro.org>
---
 drivers/pinctrl/sunxi/pinctrl-sun4i-a10.c | 1 +
 1 file changed, 1 insertion(+)

diff --git a/drivers/pinctrl/sunxi/pinctrl-sun4i-a10.c b/drivers/pinctrl/sunxi/pinctrl-sun4i-a10.c
index 159580c04b14..47a392bc73c8 100644
--- a/drivers/pinctrl/sunxi/pinctrl-sun4i-a10.c
+++ b/drivers/pinctrl/sunxi/pinctrl-sun4i-a10.c
@@ -918,6 +918,7 @@ static const struct sunxi_desc_pin sun4i_a10_pins[] = {
 		  SUNXI_FUNCTION_VARIANT(0x3, "emac",	/* ETXD1 */
 					 PINCTRL_SUN7I_A20),
 		  SUNXI_FUNCTION(0x4, "keypad"),	/* IN6 */
+		  SUNXI_FUNCTION(0x5, "sim"),		/* DET */
 		  SUNXI_FUNCTION_IRQ(0x6, 16),		/* EINT16 */
 		  SUNXI_FUNCTION(0x7, "csi1")),		/* D16 */
 	SUNXI_PIN(SUNXI_PINCTRL_PIN(H, 17),

From 99f828436788f0155798145853607ca8f0e6de93 Mon Sep 17 00:00:00 2001
From: Chris Wilson <chris@chris-wilson.co.uk>
Date: Fri, 28 Jul 2017 22:29:51 +0100
Subject: [PATCH 059/281] dma-buf/sync_file: Allow multiple sync_files to wrap
 a single dma-fence

Up until recently sync_file were create to export a single dma-fence to
userspace, and so we could canabalise a bit insie dma-fence to mark
whether or not we had enable polling for the sync_file itself. However,
with the advent of syncobj, we do allow userspace to create multiple
sync_files for a single dma-fence. (Similarly, that the sw-sync
validation framework also started returning multiple sync-files wrapping
a single dma-fence for a syncpt also triggering the problem.)

This patch reverts my suggestion in commit e24165537312
("dma-buf/sync_file: only enable fence signalling on poll()") to use a
single bit in the shared dma-fence and restores the sync_file->flags for
tracking the bits individually.

Reported-by: Gustavo Padovan <gustavo.padovan@collabora.com>
Fixes: f1e8c67123cf ("dma-buf/sw-sync: Use an rbtree to sort fences in the timeline")
Fixes: e9083420bbac ("drm: introduce sync objects (v4)")
Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
Cc: Sumit Semwal <sumit.semwal@linaro.org>
Cc: Sean Paul <seanpaul@chromium.org>
Cc: Gustavo Padovan <gustavo@padovan.org>
Cc: dri-devel@lists.freedesktop.org
Cc: <drm-intel-fixes@lists.freedesktop.org> # v4.13-rc1+
Signed-off-by: Gustavo Padovan <gustavo.padovan@collabora.com>
Link: http://patchwork.freedesktop.org/patch/msgid/20170728212951.7818-1-chris@chris-wilson.co.uk
(cherry picked from commit db1fc97ca0c0d3fdeeadf314d99a26188438940a)
---
 drivers/dma-buf/sync_file.c | 5 +++--
 include/linux/sync_file.h   | 3 ++-
 2 files changed, 5 insertions(+), 3 deletions(-)

diff --git a/drivers/dma-buf/sync_file.c b/drivers/dma-buf/sync_file.c
index d7e219d2669d..66fb40d0ebdb 100644
--- a/drivers/dma-buf/sync_file.c
+++ b/drivers/dma-buf/sync_file.c
@@ -304,7 +304,7 @@ static int sync_file_release(struct inode *inode, struct file *file)
 {
 	struct sync_file *sync_file = file->private_data;
 
-	if (test_bit(POLL_ENABLED, &sync_file->fence->flags))
+	if (test_bit(POLL_ENABLED, &sync_file->flags))
 		dma_fence_remove_callback(sync_file->fence, &sync_file->cb);
 	dma_fence_put(sync_file->fence);
 	kfree(sync_file);
@@ -318,7 +318,8 @@ static unsigned int sync_file_poll(struct file *file, poll_table *wait)
 
 	poll_wait(file, &sync_file->wq, wait);
 
-	if (!test_and_set_bit(POLL_ENABLED, &sync_file->fence->flags)) {
+	if (list_empty(&sync_file->cb.node) &&
+	    !test_and_set_bit(POLL_ENABLED, &sync_file->flags)) {
 		if (dma_fence_add_callback(sync_file->fence, &sync_file->cb,
 					   fence_check_cb_func) < 0)
 			wake_up_all(&sync_file->wq);
diff --git a/include/linux/sync_file.h b/include/linux/sync_file.h
index 5726107963b2..0ad87c434ae6 100644
--- a/include/linux/sync_file.h
+++ b/include/linux/sync_file.h
@@ -43,12 +43,13 @@ struct sync_file {
 #endif
 
 	wait_queue_head_t	wq;
+	unsigned long		flags;
 
 	struct dma_fence	*fence;
 	struct dma_fence_cb cb;
 };
 
-#define POLL_ENABLED DMA_FENCE_FLAG_USER_BITS
+#define POLL_ENABLED 0
 
 struct sync_file *sync_file_create(struct dma_fence *fence);
 struct dma_fence *sync_file_get_fence(int fd);

From 22acc37b8681461707a3fc73505af808f9af8260 Mon Sep 17 00:00:00 2001
From: Hans de Goede <hdegoede@redhat.com>
Date: Thu, 13 Jul 2017 15:45:01 +0200
Subject: [PATCH 060/281] i2c: designware: Print clock freq on invalid clock
 freq error

When we refuse to probe due to an invalid clock frequency, log
the frequency which is causing this error.

Signed-off-by: Hans de Goede <hdegoede@redhat.com>
Reviewed-by: Andy Shevchenko <andriy.shevchenko@linux.intel.com>
Signed-off-by: Wolfram Sang <wsa@the-dreams.de>
---
 drivers/i2c/busses/i2c-designware-platdrv.c | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/drivers/i2c/busses/i2c-designware-platdrv.c b/drivers/i2c/busses/i2c-designware-platdrv.c
index 2ea6d0d25a01..d139b156f9c9 100644
--- a/drivers/i2c/busses/i2c-designware-platdrv.c
+++ b/drivers/i2c/busses/i2c-designware-platdrv.c
@@ -319,7 +319,8 @@ static int dw_i2c_plat_probe(struct platform_device *pdev)
 	if (dev->clk_freq != 100000 && dev->clk_freq != 400000
 	    && dev->clk_freq != 1000000 && dev->clk_freq != 3400000) {
 		dev_err(&pdev->dev,
-			"Only 100kHz, 400kHz, 1MHz and 3.4MHz supported");
+			"%d Hz is unsupported, only 100kHz, 400kHz, 1MHz and 3.4MHz are supported\n",
+			dev->clk_freq);
 		ret = -EINVAL;
 		goto exit_reset;
 	}

From 682c6c2188f39d13548ccdc89c9888fbcb547889 Mon Sep 17 00:00:00 2001
From: Hans de Goede <hdegoede@redhat.com>
Date: Thu, 13 Jul 2017 15:45:02 +0200
Subject: [PATCH 061/281] i2c: designware: Some broken DSTDs use 1MiHz instead
 of 1MHz

At least the Acer Iconia Tab8 / aka W1-810 uses 1MiHz instead of
1MHz for one of its busses, fix this up to 1MHz instead of failing
the probe of that bus.

This fixes the accelerometer on the Acer Iconia Tab8 not working.

Cc: stable@vger.kernel.org
Signed-off-by: Hans de Goede <hdegoede@redhat.com>
Reviewed-by: Andy Shevchenko <andriy.shevchenko@linux.intel.com>
Signed-off-by: Wolfram Sang <wsa@the-dreams.de>
---
 drivers/i2c/busses/i2c-designware-platdrv.c | 3 +++
 1 file changed, 3 insertions(+)

diff --git a/drivers/i2c/busses/i2c-designware-platdrv.c b/drivers/i2c/busses/i2c-designware-platdrv.c
index d139b156f9c9..143a8fd582b4 100644
--- a/drivers/i2c/busses/i2c-designware-platdrv.c
+++ b/drivers/i2c/busses/i2c-designware-platdrv.c
@@ -298,6 +298,9 @@ static int dw_i2c_plat_probe(struct platform_device *pdev)
 	}
 
 	acpi_speed = i2c_acpi_find_bus_speed(&pdev->dev);
+	/* Some broken DSTDs use 1MiHz instead of 1MHz */
+	if (acpi_speed == 1048576)
+		acpi_speed = 1000000;
 	/*
 	 * Find bus speed from the "clock-frequency" device property, ACPI
 	 * or by using fast mode if neither is set.

From 5871a1538cd2d7a4a18e5e21adc7e9e6dab702e2 Mon Sep 17 00:00:00 2001
From: Russell King <rmk+kernel@armlinux.org.uk>
Date: Wed, 26 Jul 2017 11:03:49 +0100
Subject: [PATCH 062/281] i2c: allow i2c-versatile for ARM MPS platforms

Allow i2c-versatile to be enabled for ARM MPS platforms.

Signed-off-by: Russell King <rmk+kernel@armlinux.org.uk>
Signed-off-by: Wolfram Sang <wsa@the-dreams.de>
---
 drivers/i2c/busses/Kconfig | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/i2c/busses/Kconfig b/drivers/i2c/busses/Kconfig
index 1006b230b236..65fa29591d21 100644
--- a/drivers/i2c/busses/Kconfig
+++ b/drivers/i2c/busses/Kconfig
@@ -983,7 +983,7 @@ config I2C_UNIPHIER_F
 
 config I2C_VERSATILE
 	tristate "ARM Versatile/Realview I2C bus support"
-	depends on ARCH_VERSATILE || ARCH_REALVIEW || ARCH_VEXPRESS || COMPILE_TEST
+	depends on ARCH_MPS2 || ARCH_VERSATILE || ARCH_REALVIEW || ARCH_VEXPRESS || COMPILE_TEST
 	select I2C_ALGOBIT
 	help
 	  Say yes if you want to support the I2C serial bus on ARMs Versatile

From 9c80034921d1ece5c0e3241ba1645bce32387684 Mon Sep 17 00:00:00 2001
From: Wolfram Sang <wsa@the-dreams.de>
Date: Sat, 29 Jul 2017 14:11:43 +0200
Subject: [PATCH 063/281] i2c: rephrase explanation of I2C_CLASS_DEPRECATED

Hopefully making clear that it is not needed for new drivers.

Signed-off-by: Wolfram Sang <wsa@the-dreams.de>
---
 include/linux/i2c.h | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/include/linux/i2c.h b/include/linux/i2c.h
index 00ca5b86a753..d501d3956f13 100644
--- a/include/linux/i2c.h
+++ b/include/linux/i2c.h
@@ -689,7 +689,8 @@ i2c_unlock_adapter(struct i2c_adapter *adapter)
 #define I2C_CLASS_HWMON		(1<<0)	/* lm_sensors, ... */
 #define I2C_CLASS_DDC		(1<<3)	/* DDC bus on graphics adapters */
 #define I2C_CLASS_SPD		(1<<7)	/* Memory modules */
-#define I2C_CLASS_DEPRECATED	(1<<8)	/* Warn users that adapter will stop using classes */
+/* Warn users that the adapter doesn't support classes anymore */
+#define I2C_CLASS_DEPRECATED	(1<<8)
 
 /* Internal numbers to terminate lists */
 #define I2C_CLIENT_END		0xfffeU

From 5ae29649e03f58be0f412c21b62b203aa7cf1680 Mon Sep 17 00:00:00 2001
From: Dan Carpenter <dan.carpenter@oracle.com>
Date: Mon, 31 Jul 2017 18:45:41 +0200
Subject: [PATCH 064/281] video: fbdev: imxfb: use after free in imxfb_remove()

We free "info" then dereference it on the next line.  Really this whole
function would be better if we wrote it to unwind in the mirror of how
things are allocated in the probe.

Signed-off-by: Dan Carpenter <dan.carpenter@oracle.com>
Cc: Alexander Shiyan <shc_work@mail.ru>
Cc: Sascha Hauer <kernel@pengutronix.de>
Signed-off-by: Bartlomiej Zolnierkiewicz <b.zolnierkie@samsung.com>
---
 drivers/video/fbdev/imxfb.c | 10 +++-------
 1 file changed, 3 insertions(+), 7 deletions(-)

diff --git a/drivers/video/fbdev/imxfb.c b/drivers/video/fbdev/imxfb.c
index c166e0725be5..ba82f97fb42b 100644
--- a/drivers/video/fbdev/imxfb.c
+++ b/drivers/video/fbdev/imxfb.c
@@ -1073,20 +1073,16 @@ static int imxfb_remove(struct platform_device *pdev)
 	imxfb_disable_controller(fbi);
 
 	unregister_framebuffer(info);
-
+	fb_dealloc_cmap(&info->cmap);
 	pdata = dev_get_platdata(&pdev->dev);
 	if (pdata && pdata->exit)
 		pdata->exit(fbi->pdev);
-
-	fb_dealloc_cmap(&info->cmap);
-	kfree(info->pseudo_palette);
-	framebuffer_release(info);
-
 	dma_free_wc(&pdev->dev, fbi->map_size, info->screen_base,
 		    fbi->map_dma);
-
 	iounmap(fbi->regs);
 	release_mem_region(res->start, resource_size(res));
+	kfree(info->pseudo_palette);
+	framebuffer_release(info);
 
 	return 0;
 }

From 7b4dfbe7880cf2092db3c94c6a34ae6ffa8aa344 Mon Sep 17 00:00:00 2001
From: Arnd Bergmann <arnd@arndb.de>
Date: Mon, 31 Jul 2017 18:45:41 +0200
Subject: [PATCH 065/281] fbdev: omapfb: remove unused variable

Removing the default display name left a harmless warning:

fbdev/omap2/omapfb/dss/core.c: In function 'omap_dss_probe':
fbdev/omap2/omapfb/dss/core.c:196:30: error: unused variable 'pdata' [-Werror=unused-variable]

This removes the now-unused variable as well.

Fixes: 278cba7eaf54 ("drm: omapdrm: Remove unused default display name support")
Signed-off-by: Arnd Bergmann <arnd@arndb.de>
Reviewed-by: Laurent Pinchart <laurent.pinchart@ideasonboard.com>
Cc: Tomi Valkeinen <tomi.valkeinen@ti.com>
Signed-off-by: Bartlomiej Zolnierkiewicz <b.zolnierkie@samsung.com>
---
 drivers/video/fbdev/omap2/omapfb/dss/core.c | 1 -
 1 file changed, 1 deletion(-)

diff --git a/drivers/video/fbdev/omap2/omapfb/dss/core.c b/drivers/video/fbdev/omap2/omapfb/dss/core.c
index eecf695c16f4..09e5bb013d28 100644
--- a/drivers/video/fbdev/omap2/omapfb/dss/core.c
+++ b/drivers/video/fbdev/omap2/omapfb/dss/core.c
@@ -193,7 +193,6 @@ static struct notifier_block omap_dss_pm_notif_block = {
 
 static int __init omap_dss_probe(struct platform_device *pdev)
 {
-	struct omap_dss_board_info *pdata = pdev->dev.platform_data;
 	int r;
 
 	core.pdev = pdev;

From dd0c41f8a7e0babdadc61d5201ac8505a79dec05 Mon Sep 17 00:00:00 2001
From: Dave Airlie <airlied@redhat.com>
Date: Mon, 31 Jul 2017 18:45:41 +0200
Subject: [PATCH 066/281] efifb: allow user to disable write combined mapping.

This patch allows the user to disable write combined mapping
of the efifb framebuffer console using an nowc option.

A customer noticed major slowdowns while logging to the console
with write combining enabled, on other tasks running on the same
CPU. (10x or greater slow down on all other cores on the same CPU
as is doing the logging).

I reproduced this on a machine with dual CPUs.
Intel(R) Xeon(R) CPU E5-2609 v3 @ 1.90GHz (6 core)

I wrote a test that just mmaps the pci bar and writes to it in
a loop, while this was running in the background one a single
core with (taskset -c 1), building a kernel up to init/version.o
(taskset -c 8) went from 13s to 133s or so. I've yet to explain
why this occurs or what is going wrong I haven't managed to find
a perf command that in any way gives insight into this.

    11,885,070,715      instructions              #    1.39  insns per cycle
vs
    12,082,592,342      instructions              #    0.13  insns per cycle

is the only thing I've spotted of interest, I've tried at least:
dTLB-stores,dTLB-store-misses,L1-dcache-stores,LLC-store,LLC-store-misses,LLC-load-misses,LLC-loads,\mem-loads,mem-stores,iTLB-loads,iTLB-load-misses,cache-references,cache-misses

For now it seems at least a good idea to allow a user to disable write
combining if they see this until we can figure it out.

Note also most users get a real framebuffer driver loaded when kms
kicks in, it just happens on these machines the kernel didn't support
the gpu specific driver.

Signed-off-by: Dave Airlie <airlied@redhat.com>
Acked-by: Peter Jones <pjones@redhat.com>
Cc: Andy Lutomirski <luto@kernel.org>
Cc: H. Peter Anvin <hpa@zytor.com>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Signed-off-by: Bartlomiej Zolnierkiewicz <b.zolnierkie@samsung.com>
---
 Documentation/fb/efifb.txt  | 6 ++++++
 drivers/video/fbdev/efifb.c | 8 +++++++-
 2 files changed, 13 insertions(+), 1 deletion(-)

diff --git a/Documentation/fb/efifb.txt b/Documentation/fb/efifb.txt
index a59916c29b33..1a85c1bdaf38 100644
--- a/Documentation/fb/efifb.txt
+++ b/Documentation/fb/efifb.txt
@@ -27,5 +27,11 @@ You have to add the following kernel parameters in your elilo.conf:
 	Macbook Pro 17", iMac 20" :
 		video=efifb:i20
 
+Accepted options:
+
+nowc	Don't map the framebuffer write combined. This can be used
+	to workaround side-effects and slowdowns on other CPU cores
+	when large amounts of console data are written.
+
 --
 Edgar Hucek <gimli@dark-green.com>
diff --git a/drivers/video/fbdev/efifb.c b/drivers/video/fbdev/efifb.c
index ff01bed7112f..1e784adb89b1 100644
--- a/drivers/video/fbdev/efifb.c
+++ b/drivers/video/fbdev/efifb.c
@@ -17,6 +17,7 @@
 #include <asm/efi.h>
 
 static bool request_mem_succeeded = false;
+static bool nowc = false;
 
 static struct fb_var_screeninfo efifb_defined = {
 	.activate		= FB_ACTIVATE_NOW,
@@ -99,6 +100,8 @@ static int efifb_setup(char *options)
 				screen_info.lfb_height = simple_strtoul(this_opt+7, NULL, 0);
 			else if (!strncmp(this_opt, "width:", 6))
 				screen_info.lfb_width = simple_strtoul(this_opt+6, NULL, 0);
+			else if (!strcmp(this_opt, "nowc"))
+				nowc = true;
 		}
 	}
 
@@ -255,7 +258,10 @@ static int efifb_probe(struct platform_device *dev)
 	info->apertures->ranges[0].base = efifb_fix.smem_start;
 	info->apertures->ranges[0].size = size_remap;
 
-	info->screen_base = ioremap_wc(efifb_fix.smem_start, efifb_fix.smem_len);
+	if (nowc)
+		info->screen_base = ioremap(efifb_fix.smem_start, efifb_fix.smem_len);
+	else
+		info->screen_base = ioremap_wc(efifb_fix.smem_start, efifb_fix.smem_len);
 	if (!info->screen_base) {
 		pr_err("efifb: abort, cannot ioremap video memory 0x%x @ 0x%lx\n",
 			efifb_fix.smem_len, efifb_fix.smem_start);

From 9527b82ae3af1ebf465506868fb55e7f862cd9da Mon Sep 17 00:00:00 2001
From: Sean Young <sean@mess.org>
Date: Tue, 1 Aug 2017 12:24:17 +0100
Subject: [PATCH 067/281] Revert "serial: Delete dead code for CIR serial
 ports"

This reverts commit 1104321a7b3bb670dc614ffa7958c553e7b3b836.

The code is not dead at all and breaks winbond-cir.

Serial: 8250/16550 driver, 32 ports, IRQ sharing enabled
00:02: ttyS0 at I/O 0x3f8 (irq = 4, base_baud = 115200) is a 16550A
00:03: ttyS1 at I/O 0x2f8 (irq = 3, base_baud = 115200) is a CIR port
lirc lirc0: lirc_dev: driver ir-lirc-codec (winbond-cir) registered at minor = 0
winbond-cir 00:03: Region 0x2f8-0x2ff already in use!
winbond-cir: probe of 00:03 failed with error -16

Signed-off-by: Sean Young <sean@mess.org>
Reviewed-by: Matthias Brugger <mbrugger@suse.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/tty/serial/8250/8250_core.c | 23 +++++++++++++++++------
 1 file changed, 17 insertions(+), 6 deletions(-)

diff --git a/drivers/tty/serial/8250/8250_core.c b/drivers/tty/serial/8250/8250_core.c
index b5def356af63..1aab3010fbfa 100644
--- a/drivers/tty/serial/8250/8250_core.c
+++ b/drivers/tty/serial/8250/8250_core.c
@@ -1043,13 +1043,24 @@ int serial8250_register_8250_port(struct uart_8250_port *up)
 		if (up->dl_write)
 			uart->dl_write = up->dl_write;
 
-		if (serial8250_isa_config != NULL)
-			serial8250_isa_config(0, &uart->port,
-					&uart->capabilities);
+		if (uart->port.type != PORT_8250_CIR) {
+			if (serial8250_isa_config != NULL)
+				serial8250_isa_config(0, &uart->port,
+						&uart->capabilities);
 
-		ret = uart_add_one_port(&serial8250_reg, &uart->port);
-		if (ret == 0)
-			ret = uart->port.line;
+			ret = uart_add_one_port(&serial8250_reg,
+						&uart->port);
+			if (ret == 0)
+				ret = uart->port.line;
+		} else {
+			dev_info(uart->port.dev,
+				"skipping CIR port at 0x%lx / 0x%llx, IRQ %d\n",
+				uart->port.iobase,
+				(unsigned long long)uart->port.mapbase,
+				uart->port.irq);
+
+			ret = 0;
+		}
 	}
 	mutex_unlock(&serial_mutex);
 

From 7f81e55c737a8fa82c71f290945d729a4902f8d2 Mon Sep 17 00:00:00 2001
From: Max Filippov <jcmvbkbc@gmail.com>
Date: Tue, 1 Aug 2017 11:02:46 -0700
Subject: [PATCH 068/281] xtensa: don't limit csum_partial export by CONFIG_NET

csum_partial and csum_partial_copy_generic are defined unconditionally
and are available even when CONFIG_NET is disabled. They are used not
only by the network drivers, but also by scsi and media.
Don't limit these functions export by CONFIG_NET.

Cc: stable@vger.kernel.org
Signed-off-by: Max Filippov <jcmvbkbc@gmail.com>
---
 arch/xtensa/kernel/xtensa_ksyms.c | 2 --
 1 file changed, 2 deletions(-)

diff --git a/arch/xtensa/kernel/xtensa_ksyms.c b/arch/xtensa/kernel/xtensa_ksyms.c
index d159e9b9c018..672391003e40 100644
--- a/arch/xtensa/kernel/xtensa_ksyms.c
+++ b/arch/xtensa/kernel/xtensa_ksyms.c
@@ -94,13 +94,11 @@ unsigned long __sync_fetch_and_or_4(unsigned long *p, unsigned long v)
 }
 EXPORT_SYMBOL(__sync_fetch_and_or_4);
 
-#ifdef CONFIG_NET
 /*
  * Networking support
  */
 EXPORT_SYMBOL(csum_partial);
 EXPORT_SYMBOL(csum_partial_copy_generic);
-#endif /* CONFIG_NET */
 
 /*
  * Architecture-specific symbols

From bc652eb6a0d5cffaea7dc8e8ad488aab2a1bf1ed Mon Sep 17 00:00:00 2001
From: Max Filippov <jcmvbkbc@gmail.com>
Date: Tue, 1 Aug 2017 11:15:15 -0700
Subject: [PATCH 069/281] xtensa: mm/cache: add missing EXPORT_SYMBOLs

Functions clear_user_highpage, copy_user_highpage, flush_dcache_page,
local_flush_cache_range and local_flush_cache_page may be used from
modules. Export them.

Cc: stable@vger.kernel.org
Signed-off-by: Max Filippov <jcmvbkbc@gmail.com>
---
 arch/xtensa/mm/cache.c | 6 +++++-
 1 file changed, 5 insertions(+), 1 deletion(-)

diff --git a/arch/xtensa/mm/cache.c b/arch/xtensa/mm/cache.c
index dbb1cdef3663..3c75c4e597da 100644
--- a/arch/xtensa/mm/cache.c
+++ b/arch/xtensa/mm/cache.c
@@ -103,6 +103,7 @@ void clear_user_highpage(struct page *page, unsigned long vaddr)
 	clear_page_alias(kvaddr, paddr);
 	preempt_enable();
 }
+EXPORT_SYMBOL(clear_user_highpage);
 
 void copy_user_highpage(struct page *dst, struct page *src,
 			unsigned long vaddr, struct vm_area_struct *vma)
@@ -119,6 +120,7 @@ void copy_user_highpage(struct page *dst, struct page *src,
 	copy_page_alias(dst_vaddr, src_vaddr, dst_paddr, src_paddr);
 	preempt_enable();
 }
+EXPORT_SYMBOL(copy_user_highpage);
 
 /*
  * Any time the kernel writes to a user page cache page, or it is about to
@@ -172,7 +174,7 @@ void flush_dcache_page(struct page *page)
 
 	/* There shouldn't be an entry in the cache for this page anymore. */
 }
-
+EXPORT_SYMBOL(flush_dcache_page);
 
 /*
  * For now, flush the whole cache. FIXME??
@@ -184,6 +186,7 @@ void local_flush_cache_range(struct vm_area_struct *vma,
 	__flush_invalidate_dcache_all();
 	__invalidate_icache_all();
 }
+EXPORT_SYMBOL(local_flush_cache_range);
 
 /* 
  * Remove any entry in the cache for this page. 
@@ -203,6 +206,7 @@ void local_flush_cache_page(struct vm_area_struct *vma, unsigned long address,
 	__flush_invalidate_dcache_page_alias(virt, phys);
 	__invalidate_icache_page_alias(virt, phys);
 }
+EXPORT_SYMBOL(local_flush_cache_page);
 
 #endif /* DCACHE_WAY_SIZE > PAGE_SIZE */
 

From fd1b8668af59a11bb754a6c9b0051c6c5ce73b74 Mon Sep 17 00:00:00 2001
From: Hector Martin <marcan@marcan.st>
Date: Wed, 2 Aug 2017 00:45:06 +0900
Subject: [PATCH 070/281] USB: serial: option: add D-Link DWM-222 device ID

Add device id for D-Link DWM-222.

Cc: stable@vger.kernel.org
Signed-off-by: Hector Martin <marcan@marcan.st>
Signed-off-by: Johan Hovold <johan@kernel.org>
---
 drivers/usb/serial/option.c | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/drivers/usb/serial/option.c b/drivers/usb/serial/option.c
index ebe51f11105d..fe123153b1a5 100644
--- a/drivers/usb/serial/option.c
+++ b/drivers/usb/serial/option.c
@@ -2025,6 +2025,8 @@ static const struct usb_device_id option_ids[] = {
 	{ USB_DEVICE_INTERFACE_CLASS(0x2001, 0x7d04, 0xff) },			/* D-Link DWM-158 */
 	{ USB_DEVICE_INTERFACE_CLASS(0x2001, 0x7e19, 0xff),			/* D-Link DWM-221 B1 */
 	  .driver_info = (kernel_ulong_t)&net_intf4_blacklist },
+	{ USB_DEVICE_INTERFACE_CLASS(0x2001, 0x7e35, 0xff),			/* D-Link DWM-222 */
+	  .driver_info = (kernel_ulong_t)&net_intf4_blacklist },
 	{ USB_DEVICE_AND_INTERFACE_INFO(0x07d1, 0x3e01, 0xff, 0xff, 0xff) }, /* D-Link DWM-152/C1 */
 	{ USB_DEVICE_AND_INTERFACE_INFO(0x07d1, 0x3e02, 0xff, 0xff, 0xff) }, /* D-Link DWM-156/C1 */
 	{ USB_DEVICE_AND_INTERFACE_INFO(0x07d1, 0x7e11, 0xff, 0xff, 0xff) }, /* D-Link DWM-156/A3 */

From d490c9cd2f67399e1dbc951f190d03724b81d0c8 Mon Sep 17 00:00:00 2001
From: Viresh Kumar <viresh.kumar@linaro.org>
Date: Thu, 29 Jun 2017 14:49:59 +0530
Subject: [PATCH 071/281] drm/msm/mdp5: Fix compilation warnings

Following compilation warnings were observed for these files:

  CC [M]  drivers/gpu/drm/msm/mdp/mdp5/mdp5_mdss.o
drivers/gpu/drm/msm/mdp/mdp5/mdp5_crtc.c: In function 'blend_setup':
drivers/gpu/drm/msm/mdp/mdp5/mdp5_crtc.c:223:7: warning: missing braces around initializer [-Wmissing-braces]
  enum mdp5_pipe stage[STAGE_MAX + 1][MAX_PIPE_STAGE] = { SSPP_NONE };
       ^
drivers/gpu/drm/msm/mdp/mdp5/mdp5_crtc.c:223:7: warning: (near initialization for 'stage[0]') [-Wmissing-braces]
drivers/gpu/drm/msm/mdp/mdp5/mdp5_crtc.c:224:7: warning: missing braces around initializer [-Wmissing-braces]
  enum mdp5_pipe r_stage[STAGE_MAX + 1][MAX_PIPE_STAGE] = { SSPP_NONE };
       ^
drivers/gpu/drm/msm/mdp/mdp5/mdp5_crtc.c:224:7: warning: (near initialization for 'r_stage[0]') [-Wmissing-braces]

drivers/gpu/drm/msm/mdp/mdp5/mdp5_plane.c: In function 'mdp5_plane_mode_set':
drivers/gpu/drm/msm/mdp/mdp5/mdp5_plane.c:892:9: warning: missing braces around initializer [-Wmissing-braces]
  struct phase_step step = { 0 };
         ^
drivers/gpu/drm/msm/mdp/mdp5/mdp5_plane.c:892:9: warning: (near initialization for 'step.x') [-Wmissing-braces]
drivers/gpu/drm/msm/mdp/mdp5/mdp5_plane.c:893:9: warning: missing braces around initializer [-Wmissing-braces]
  struct pixel_ext pe = { 0 };
         ^
drivers/gpu/drm/msm/mdp/mdp5/mdp5_plane.c:893:9: warning: (near initialization for 'pe.left') [-Wmissing-braces]

This happens because in the first case we were initializing a two
dimensional array with {0} and in the second case we were initializing a
struct containing two arrays with {0}.

Fix them by adding another pair of {}.

Signed-off-by: Viresh Kumar <viresh.kumar@linaro.org>
Signed-off-by: Rob Clark <robdclark@gmail.com>
---
 drivers/gpu/drm/msm/mdp/mdp5/mdp5_crtc.c  | 4 ++--
 drivers/gpu/drm/msm/mdp/mdp5/mdp5_plane.c | 4 ++--
 2 files changed, 4 insertions(+), 4 deletions(-)

diff --git a/drivers/gpu/drm/msm/mdp/mdp5/mdp5_crtc.c b/drivers/gpu/drm/msm/mdp/mdp5/mdp5_crtc.c
index cb5415d6c04b..62a0fb377e7a 100644
--- a/drivers/gpu/drm/msm/mdp/mdp5/mdp5_crtc.c
+++ b/drivers/gpu/drm/msm/mdp/mdp5/mdp5_crtc.c
@@ -221,8 +221,8 @@ static void blend_setup(struct drm_crtc *crtc)
 	struct mdp5_ctl *ctl = mdp5_cstate->ctl;
 	uint32_t blend_op, fg_alpha, bg_alpha, ctl_blend_flags = 0;
 	unsigned long flags;
-	enum mdp5_pipe stage[STAGE_MAX + 1][MAX_PIPE_STAGE] = { SSPP_NONE };
-	enum mdp5_pipe r_stage[STAGE_MAX + 1][MAX_PIPE_STAGE] = { SSPP_NONE };
+	enum mdp5_pipe stage[STAGE_MAX + 1][MAX_PIPE_STAGE] = { { SSPP_NONE } };
+	enum mdp5_pipe r_stage[STAGE_MAX + 1][MAX_PIPE_STAGE] = { { SSPP_NONE } };
 	int i, plane_cnt = 0;
 	bool bg_alpha_enabled = false;
 	u32 mixer_op_mode = 0;
diff --git a/drivers/gpu/drm/msm/mdp/mdp5/mdp5_plane.c b/drivers/gpu/drm/msm/mdp/mdp5/mdp5_plane.c
index fe3a4de1a433..61f39c86dd09 100644
--- a/drivers/gpu/drm/msm/mdp/mdp5/mdp5_plane.c
+++ b/drivers/gpu/drm/msm/mdp/mdp5/mdp5_plane.c
@@ -890,8 +890,8 @@ static int mdp5_plane_mode_set(struct drm_plane *plane,
 	struct mdp5_hw_pipe *right_hwpipe;
 	const struct mdp_format *format;
 	uint32_t nplanes, config = 0;
-	struct phase_step step = { 0 };
-	struct pixel_ext pe = { 0 };
+	struct phase_step step = { { 0 } };
+	struct pixel_ext pe = { { 0 } };
 	uint32_t hdecm = 0, vdecm = 0;
 	uint32_t pix_format;
 	unsigned int rotation;

From 65e93108891e571f177c202add9288eda9ac4100 Mon Sep 17 00:00:00 2001
From: Dan Carpenter <dan.carpenter@oracle.com>
Date: Fri, 30 Jun 2017 10:59:15 +0300
Subject: [PATCH 072/281] drm/msm: fix an integer overflow test

We recently added an integer overflow check but it needs an additional
tweak to work properly on 32 bit systems.

The problem is that we're doing the right hand side of the assignment as
type unsigned long so the max it will have an integer overflow instead
of being larger than SIZE_MAX.  That means the "sz > SIZE_MAX" condition
is never true even on 32 bit systems.  We need to first cast it to u64
and then do the math.

Fixes: 4a630fadbb29 ("drm/msm: Fix potential buffer overflow issue")
Signed-off-by: Dan Carpenter <dan.carpenter@oracle.com>
Acked-by: Jordan Crouse <jcrouse@codeaurora.org>
Signed-off-by: Rob Clark <robdclark@gmail.com>
---
 drivers/gpu/drm/msm/msm_gem_submit.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/drivers/gpu/drm/msm/msm_gem_submit.c b/drivers/gpu/drm/msm/msm_gem_submit.c
index 6bfca7470141..8095658e8cb4 100644
--- a/drivers/gpu/drm/msm/msm_gem_submit.c
+++ b/drivers/gpu/drm/msm/msm_gem_submit.c
@@ -34,8 +34,8 @@ static struct msm_gem_submit *submit_create(struct drm_device *dev,
 		struct msm_gpu *gpu, uint32_t nr_bos, uint32_t nr_cmds)
 {
 	struct msm_gem_submit *submit;
-	uint64_t sz = sizeof(*submit) + (nr_bos * sizeof(submit->bos[0])) +
-		(nr_cmds * sizeof(submit->cmd[0]));
+	uint64_t sz = sizeof(*submit) + ((u64)nr_bos * sizeof(submit->bos[0])) +
+		((u64)nr_cmds * sizeof(submit->cmd[0]));
 
 	if (sz > SIZE_MAX)
 		return NULL;

From 71e3dfa167b105d3c06e76fee1d0e2fd1e502cf6 Mon Sep 17 00:00:00 2001
From: Dan Carpenter <dan.carpenter@oracle.com>
Date: Mon, 10 Jul 2017 10:20:42 +0300
Subject: [PATCH 073/281] drm/msm: unlock on error in msm_gem_get_iova()

We recently added locking to this function but there was a direct return
that was overlooked where we need to unlock.

Fixes: 0e08270a1f01 ("drm/msm: Separate locking of buffer resources from struct_mutex")
Signed-off-by: Dan Carpenter <dan.carpenter@oracle.com>
Signed-off-by: Rob Clark <robdclark@gmail.com>
---
 drivers/gpu/drm/msm/msm_gem.c | 8 +++++---
 1 file changed, 5 insertions(+), 3 deletions(-)

diff --git a/drivers/gpu/drm/msm/msm_gem.c b/drivers/gpu/drm/msm/msm_gem.c
index 65f35544c1ec..065d933df2c3 100644
--- a/drivers/gpu/drm/msm/msm_gem.c
+++ b/drivers/gpu/drm/msm/msm_gem.c
@@ -383,8 +383,10 @@ int msm_gem_get_iova(struct drm_gem_object *obj,
 		struct page **pages;
 
 		vma = add_vma(obj, aspace);
-		if (IS_ERR(vma))
-			return PTR_ERR(vma);
+		if (IS_ERR(vma)) {
+			ret = PTR_ERR(vma);
+			goto unlock;
+		}
 
 		pages = get_pages(obj);
 		if (IS_ERR(pages)) {
@@ -405,7 +407,7 @@ int msm_gem_get_iova(struct drm_gem_object *obj,
 
 fail:
 	del_vma(vma);
-
+unlock:
 	mutex_unlock(&msm_obj->lock);
 	return ret;
 }

From af1f5f12c21bd9dc08f578d86adc192eec4eb28a Mon Sep 17 00:00:00 2001
From: Rob Clark <robdclark@gmail.com>
Date: Mon, 3 Jul 2017 13:15:57 -0400
Subject: [PATCH 074/281] drm/msm/mdp5: fix unclocked register access in
 _cursor_set()

Fixes an insta-reboot when screen-blanking kicks in, due to cursor
updates without clocks enabled.

Signed-off-by: Rob Clark <robdclark@gmail.com>
---
 drivers/gpu/drm/msm/mdp/mdp5/mdp5_crtc.c | 8 ++++++++
 1 file changed, 8 insertions(+)

diff --git a/drivers/gpu/drm/msm/mdp/mdp5/mdp5_crtc.c b/drivers/gpu/drm/msm/mdp/mdp5/mdp5_crtc.c
index 62a0fb377e7a..735a87a699fa 100644
--- a/drivers/gpu/drm/msm/mdp/mdp5/mdp5_crtc.c
+++ b/drivers/gpu/drm/msm/mdp/mdp5/mdp5_crtc.c
@@ -753,6 +753,7 @@ static int mdp5_crtc_cursor_set(struct drm_crtc *crtc,
 	if (!handle) {
 		DBG("Cursor off");
 		cursor_enable = false;
+		mdp5_enable(mdp5_kms);
 		goto set_cursor;
 	}
 
@@ -776,6 +777,8 @@ static int mdp5_crtc_cursor_set(struct drm_crtc *crtc,
 
 	get_roi(crtc, &roi_w, &roi_h);
 
+	mdp5_enable(mdp5_kms);
+
 	mdp5_write(mdp5_kms, REG_MDP5_LM_CURSOR_STRIDE(lm), stride);
 	mdp5_write(mdp5_kms, REG_MDP5_LM_CURSOR_FORMAT(lm),
 			MDP5_LM_CURSOR_FORMAT_FORMAT(CURSOR_FMT_ARGB8888));
@@ -804,6 +807,7 @@ set_cursor:
 	crtc_flush(crtc, flush_mask);
 
 end:
+	mdp5_disable(mdp5_kms);
 	if (old_bo) {
 		drm_flip_work_queue(&mdp5_crtc->unref_cursor_work, old_bo);
 		/* enable vblank to complete cursor work: */
@@ -836,6 +840,8 @@ static int mdp5_crtc_cursor_move(struct drm_crtc *crtc, int x, int y)
 
 	get_roi(crtc, &roi_w, &roi_h);
 
+	mdp5_enable(mdp5_kms);
+
 	spin_lock_irqsave(&mdp5_crtc->cursor.lock, flags);
 	mdp5_write(mdp5_kms, REG_MDP5_LM_CURSOR_SIZE(lm),
 			MDP5_LM_CURSOR_SIZE_ROI_H(roi_h) |
@@ -847,6 +853,8 @@ static int mdp5_crtc_cursor_move(struct drm_crtc *crtc, int x, int y)
 
 	crtc_flush(crtc, flush_mask);
 
+	mdp5_disable(mdp5_kms);
+
 	return 0;
 }
 

From d4cea38ebb4de90913085391ce4febde1a4ba9aa Mon Sep 17 00:00:00 2001
From: Archit Taneja <architt@codeaurora.org>
Date: Wed, 12 Jul 2017 15:09:55 +0530
Subject: [PATCH 075/281] drm/msm/dsi: Calculate link clock rates with updated
 dsi->lanes

After the commit mentioned below, we start computing the byte and pixel
clocks (dsi_calc_clk_rate) in the DSI bridge's mode_set() op. The
calculation involves the number of DSI lanes being used by the
downstream bridge/panel.

If the downstream bridge/panel tries to change the number of DSI lanes
(as done in the ADV7533 driver) in its mode_set() op, then our DSI
host driver will not have the correct number of lanes when computing
byte/pixel clocks.

Fix this by delaying the clock rate calculation in the DSI bridge
enable path. In particular, compute the clock rates in
msm_dsi_host_get_phy_clk_req().

This fixes the DSI host error interrupts seen when we try to switch
between modes that require different number of lanes (4 to 3 lanes, or
vice versa) on db410c. The error interrupts occur since the byte/pixel
clock rates aren't according to what the DSI video mode timing engine
expects.

Fixes: b62aa70a98c5 ("drm/msm/dsi: Move PHY operations out of host")
Signed-off-by: Archit Taneja <architt@codeaurora.org>
Signed-off-by: Rob Clark <robdclark@gmail.com>
---
 drivers/gpu/drm/msm/dsi/dsi_host.c | 14 +++++++-------
 1 file changed, 7 insertions(+), 7 deletions(-)

diff --git a/drivers/gpu/drm/msm/dsi/dsi_host.c b/drivers/gpu/drm/msm/dsi/dsi_host.c
index 9e9c5696bc03..c7b612c3d771 100644
--- a/drivers/gpu/drm/msm/dsi/dsi_host.c
+++ b/drivers/gpu/drm/msm/dsi/dsi_host.c
@@ -2137,6 +2137,13 @@ void msm_dsi_host_get_phy_clk_req(struct mipi_dsi_host *host,
 	struct msm_dsi_phy_clk_request *clk_req)
 {
 	struct msm_dsi_host *msm_host = to_msm_dsi_host(host);
+	int ret;
+
+	ret = dsi_calc_clk_rate(msm_host);
+	if (ret) {
+		pr_err("%s: unable to calc clk rate, %d\n", __func__, ret);
+		return;
+	}
 
 	clk_req->bitclk_rate = msm_host->byte_clk_rate * 8;
 	clk_req->escclk_rate = msm_host->esc_clk_rate;
@@ -2280,7 +2287,6 @@ int msm_dsi_host_set_display_mode(struct mipi_dsi_host *host,
 					struct drm_display_mode *mode)
 {
 	struct msm_dsi_host *msm_host = to_msm_dsi_host(host);
-	int ret;
 
 	if (msm_host->mode) {
 		drm_mode_destroy(msm_host->dev, msm_host->mode);
@@ -2293,12 +2299,6 @@ int msm_dsi_host_set_display_mode(struct mipi_dsi_host *host,
 		return -ENOMEM;
 	}
 
-	ret = dsi_calc_clk_rate(msm_host);
-	if (ret) {
-		pr_err("%s: unable to calc clk rate, %d\n", __func__, ret);
-		return ret;
-	}
-
 	return 0;
 }
 

From d9535cb7b7603aeb549c697ecdf92024e4d0a650 Mon Sep 17 00:00:00 2001
From: Grygorii Strashko <grygorii.strashko@ti.com>
Date: Fri, 28 Jul 2017 17:30:02 -0500
Subject: [PATCH 076/281] ptp: introduce ptp auxiliary worker

Many PTP drivers required to perform some asynchronous or periodic work,
like periodically handling PHC counter overflow or handle delayed timestamp
for RX/TX network packets. In most of the cases, such work is implemented
using workqueues. Unfortunately, Kernel workqueues might introduce
significant delay in work scheduling under high system load and on -RT,
which could cause misbehavior of PTP drivers due to internal counter
overflow, for example, and there is no way to tune its execution policy and
priority manuallly.

Hence, The kthread_worker can be used insted of workqueues, as it create
separte named kthread for each worker and its its execution policy and
priority can be configured using chrt tool.

This prblem was reported for two drivers TI CPSW CPTS and dp83640, so
instead of modifying each of these driver it was proposed to add PTP
auxiliary worker to the PHC subsystem.

The patch adds PTP auxiliary worker in PHC subsystem using kthread_worker
and kthread_delayed_work and introduces two new PHC subsystem APIs:

- long (*do_aux_work)(struct ptp_clock_info *ptp) callback in
ptp_clock_info structure, which driver should assign if it require to
perform asynchronous or periodic work. Driver should return the delay of
the PTP next auxiliary work scheduling time (>=0) or negative value in case
further scheduling is not required.

- int ptp_schedule_worker(struct ptp_clock *ptp, unsigned long delay) which
allows schedule PTP auxiliary work.

The name of kthread_worker thread corresponds PTP PHC device name "ptp%d".

Signed-off-by: Grygorii Strashko <grygorii.strashko@ti.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/ptp/ptp_clock.c          | 42 ++++++++++++++++++++++++++++++++
 drivers/ptp/ptp_private.h        |  3 +++
 include/linux/ptp_clock_kernel.h | 20 +++++++++++++++
 3 files changed, 65 insertions(+)

diff --git a/drivers/ptp/ptp_clock.c b/drivers/ptp/ptp_clock.c
index b77435783ef3..7eacc1c4b3b1 100644
--- a/drivers/ptp/ptp_clock.c
+++ b/drivers/ptp/ptp_clock.c
@@ -28,6 +28,7 @@
 #include <linux/slab.h>
 #include <linux/syscalls.h>
 #include <linux/uaccess.h>
+#include <uapi/linux/sched/types.h>
 
 #include "ptp_private.h"
 
@@ -184,6 +185,19 @@ static void delete_ptp_clock(struct posix_clock *pc)
 	kfree(ptp);
 }
 
+static void ptp_aux_kworker(struct kthread_work *work)
+{
+	struct ptp_clock *ptp = container_of(work, struct ptp_clock,
+					     aux_work.work);
+	struct ptp_clock_info *info = ptp->info;
+	long delay;
+
+	delay = info->do_aux_work(info);
+
+	if (delay >= 0)
+		kthread_queue_delayed_work(ptp->kworker, &ptp->aux_work, delay);
+}
+
 /* public interface */
 
 struct ptp_clock *ptp_clock_register(struct ptp_clock_info *info,
@@ -217,6 +231,20 @@ struct ptp_clock *ptp_clock_register(struct ptp_clock_info *info,
 	mutex_init(&ptp->pincfg_mux);
 	init_waitqueue_head(&ptp->tsev_wq);
 
+	if (ptp->info->do_aux_work) {
+		char *worker_name = kasprintf(GFP_KERNEL, "ptp%d", ptp->index);
+
+		kthread_init_delayed_work(&ptp->aux_work, ptp_aux_kworker);
+		ptp->kworker = kthread_create_worker(0, worker_name ?
+						     worker_name : info->name);
+		kfree(worker_name);
+		if (IS_ERR(ptp->kworker)) {
+			err = PTR_ERR(ptp->kworker);
+			pr_err("failed to create ptp aux_worker %d\n", err);
+			goto kworker_err;
+		}
+	}
+
 	err = ptp_populate_pin_groups(ptp);
 	if (err)
 		goto no_pin_groups;
@@ -259,6 +287,9 @@ no_pps:
 no_device:
 	ptp_cleanup_pin_groups(ptp);
 no_pin_groups:
+	if (ptp->kworker)
+		kthread_destroy_worker(ptp->kworker);
+kworker_err:
 	mutex_destroy(&ptp->tsevq_mux);
 	mutex_destroy(&ptp->pincfg_mux);
 	ida_simple_remove(&ptp_clocks_map, index);
@@ -274,6 +305,11 @@ int ptp_clock_unregister(struct ptp_clock *ptp)
 	ptp->defunct = 1;
 	wake_up_interruptible(&ptp->tsev_wq);
 
+	if (ptp->kworker) {
+		kthread_cancel_delayed_work_sync(&ptp->aux_work);
+		kthread_destroy_worker(ptp->kworker);
+	}
+
 	/* Release the clock's resources. */
 	if (ptp->pps_source)
 		pps_unregister_source(ptp->pps_source);
@@ -339,6 +375,12 @@ int ptp_find_pin(struct ptp_clock *ptp,
 }
 EXPORT_SYMBOL(ptp_find_pin);
 
+int ptp_schedule_worker(struct ptp_clock *ptp, unsigned long delay)
+{
+	return kthread_mod_delayed_work(ptp->kworker, &ptp->aux_work, delay);
+}
+EXPORT_SYMBOL(ptp_schedule_worker);
+
 /* module operations */
 
 static void __exit ptp_exit(void)
diff --git a/drivers/ptp/ptp_private.h b/drivers/ptp/ptp_private.h
index d95888974d0c..b86f1bfecd6f 100644
--- a/drivers/ptp/ptp_private.h
+++ b/drivers/ptp/ptp_private.h
@@ -22,6 +22,7 @@
 
 #include <linux/cdev.h>
 #include <linux/device.h>
+#include <linux/kthread.h>
 #include <linux/mutex.h>
 #include <linux/posix-clock.h>
 #include <linux/ptp_clock.h>
@@ -56,6 +57,8 @@ struct ptp_clock {
 	struct attribute_group pin_attr_group;
 	/* 1st entry is a pointer to the real group, 2nd is NULL terminator */
 	const struct attribute_group *pin_attr_groups[2];
+	struct kthread_worker *kworker;
+	struct kthread_delayed_work aux_work;
 };
 
 /*
diff --git a/include/linux/ptp_clock_kernel.h b/include/linux/ptp_clock_kernel.h
index a026bfd089db..51349d124ee5 100644
--- a/include/linux/ptp_clock_kernel.h
+++ b/include/linux/ptp_clock_kernel.h
@@ -99,6 +99,11 @@ struct system_device_crosststamp;
  *            parameter func: the desired function to use.
  *            parameter chan: the function channel index to use.
  *
+ * @do_work:  Request driver to perform auxiliary (periodic) operations
+ *	      Driver should return delay of the next auxiliary work scheduling
+ *	      time (>=0) or negative value in case further scheduling
+ *	      is not required.
+ *
  * Drivers should embed their ptp_clock_info within a private
  * structure, obtaining a reference to it using container_of().
  *
@@ -126,6 +131,7 @@ struct ptp_clock_info {
 		      struct ptp_clock_request *request, int on);
 	int (*verify)(struct ptp_clock_info *ptp, unsigned int pin,
 		      enum ptp_pin_function func, unsigned int chan);
+	long (*do_aux_work)(struct ptp_clock_info *ptp);
 };
 
 struct ptp_clock;
@@ -211,6 +217,16 @@ extern int ptp_clock_index(struct ptp_clock *ptp);
 int ptp_find_pin(struct ptp_clock *ptp,
 		 enum ptp_pin_function func, unsigned int chan);
 
+/**
+ * ptp_schedule_worker() - schedule ptp auxiliary work
+ *
+ * @ptp:    The clock obtained from ptp_clock_register().
+ * @delay:  number of jiffies to wait before queuing
+ *          See kthread_queue_delayed_work() for more info.
+ */
+
+int ptp_schedule_worker(struct ptp_clock *ptp, unsigned long delay);
+
 #else
 static inline struct ptp_clock *ptp_clock_register(struct ptp_clock_info *info,
 						   struct device *parent)
@@ -225,6 +241,10 @@ static inline int ptp_clock_index(struct ptp_clock *ptp)
 static inline int ptp_find_pin(struct ptp_clock *ptp,
 			       enum ptp_pin_function func, unsigned int chan)
 { return -1; }
+static inline int ptp_schedule_worker(struct ptp_clock *ptp,
+				      unsigned long delay)
+{ return -EOPNOTSUPP; }
+
 #endif
 
 #endif

From 999f129289ab1599e7cdc08804a9b5ec4e1418f4 Mon Sep 17 00:00:00 2001
From: Grygorii Strashko <grygorii.strashko@ti.com>
Date: Fri, 28 Jul 2017 17:30:03 -0500
Subject: [PATCH 077/281] net: ethernet: ti: cpts: convert to use ptp auxiliary
 worker

There could be significant delay in CPTS work schedule under high system
load and on -RT which could cause CPTS misbehavior due to internal counter
overflow. Usage of own kthread_worker allows to avoid such kind of issues
and makes it possible to tune priority of CPTS kthread_worker thread on -RT
(thread name "cpts").

Hence, the CPTS driver is converted to use PTP auxiliary worker as PHC
subsystem implements such functionality in a generic way now.

Signed-off-by: Grygorii Strashko <grygorii.strashko@ti.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/ti/cpts.c | 27 +++++++++++++--------------
 drivers/net/ethernet/ti/cpts.h |  1 -
 2 files changed, 13 insertions(+), 15 deletions(-)

diff --git a/drivers/net/ethernet/ti/cpts.c b/drivers/net/ethernet/ti/cpts.c
index 32279d21c836..3ed438ef9e1e 100644
--- a/drivers/net/ethernet/ti/cpts.c
+++ b/drivers/net/ethernet/ti/cpts.c
@@ -224,6 +224,17 @@ static int cpts_ptp_enable(struct ptp_clock_info *ptp,
 	return -EOPNOTSUPP;
 }
 
+static long cpts_overflow_check(struct ptp_clock_info *ptp)
+{
+	struct cpts *cpts = container_of(ptp, struct cpts, info);
+	unsigned long delay = cpts->ov_check_period;
+	struct timespec64 ts;
+
+	cpts_ptp_gettime(&cpts->info, &ts);
+	pr_debug("cpts overflow check at %lld.%09lu\n", ts.tv_sec, ts.tv_nsec);
+	return (long)delay;
+}
+
 static struct ptp_clock_info cpts_info = {
 	.owner		= THIS_MODULE,
 	.name		= "CTPS timer",
@@ -236,18 +247,9 @@ static struct ptp_clock_info cpts_info = {
 	.gettime64	= cpts_ptp_gettime,
 	.settime64	= cpts_ptp_settime,
 	.enable		= cpts_ptp_enable,
+	.do_aux_work	= cpts_overflow_check,
 };
 
-static void cpts_overflow_check(struct work_struct *work)
-{
-	struct timespec64 ts;
-	struct cpts *cpts = container_of(work, struct cpts, overflow_work.work);
-
-	cpts_ptp_gettime(&cpts->info, &ts);
-	pr_debug("cpts overflow check at %lld.%09lu\n", ts.tv_sec, ts.tv_nsec);
-	schedule_delayed_work(&cpts->overflow_work, cpts->ov_check_period);
-}
-
 static int cpts_match(struct sk_buff *skb, unsigned int ptp_class,
 		      u16 ts_seqid, u8 ts_msgtype)
 {
@@ -378,7 +380,7 @@ int cpts_register(struct cpts *cpts)
 	}
 	cpts->phc_index = ptp_clock_index(cpts->clock);
 
-	schedule_delayed_work(&cpts->overflow_work, cpts->ov_check_period);
+	ptp_schedule_worker(cpts->clock, cpts->ov_check_period);
 	return 0;
 
 err_ptp:
@@ -392,8 +394,6 @@ void cpts_unregister(struct cpts *cpts)
 	if (WARN_ON(!cpts->clock))
 		return;
 
-	cancel_delayed_work_sync(&cpts->overflow_work);
-
 	ptp_clock_unregister(cpts->clock);
 	cpts->clock = NULL;
 
@@ -476,7 +476,6 @@ struct cpts *cpts_create(struct device *dev, void __iomem *regs,
 	cpts->dev = dev;
 	cpts->reg = (struct cpsw_cpts __iomem *)regs;
 	spin_lock_init(&cpts->lock);
-	INIT_DELAYED_WORK(&cpts->overflow_work, cpts_overflow_check);
 
 	ret = cpts_of_parse(cpts, node);
 	if (ret)
diff --git a/drivers/net/ethernet/ti/cpts.h b/drivers/net/ethernet/ti/cpts.h
index 01ea82ba9cdc..586edd9c7de0 100644
--- a/drivers/net/ethernet/ti/cpts.h
+++ b/drivers/net/ethernet/ti/cpts.h
@@ -119,7 +119,6 @@ struct cpts {
 	u32 cc_mult; /* for the nominal frequency */
 	struct cyclecounter cc;
 	struct timecounter tc;
-	struct delayed_work overflow_work;
 	int phc_index;
 	struct clk *refclk;
 	struct list_head events;

From 0d5f54fec0a18eea5e4ac005646fd2bc2118636c Mon Sep 17 00:00:00 2001
From: Grygorii Strashko <grygorii.strashko@ti.com>
Date: Fri, 28 Jul 2017 17:30:04 -0500
Subject: [PATCH 078/281] net: ethernet: ti: cpts: fix tx timestamping timeout

With the low speed Ethernet connection CPDMA notification about packet
processing can be received before CPTS TX timestamp event, which is set
when packet actually left CPSW while cpdma notification is sent when packet
pushed in CPSW fifo.  As result, when connection is slow and CPU is fast
enough TX timestamping is not working properly.

Fix it, by introducing TX SKB queue to store PTP SKBs for which Ethernet
Transmit Event hasn't been received yet and then re-check this queue
with new Ethernet Transmit Events by scheduling CPTS overflow
work more often (every 1 jiffies) until TX SKB queue is not empty.

Side effect of this change is:
 - User space tools require to take into account possible delay in TX
timestamp processing (for example ptp4l works with tx_timestamp_timeout=400
under net traffic and tx_timestamp_timeout=25 in idle).

Signed-off-by: Grygorii Strashko <grygorii.strashko@ti.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/ti/cpts.c | 84 +++++++++++++++++++++++++++++++++-
 drivers/net/ethernet/ti/cpts.h |  1 +
 2 files changed, 83 insertions(+), 2 deletions(-)

diff --git a/drivers/net/ethernet/ti/cpts.c b/drivers/net/ethernet/ti/cpts.c
index 3ed438ef9e1e..95a0076773b3 100644
--- a/drivers/net/ethernet/ti/cpts.c
+++ b/drivers/net/ethernet/ti/cpts.c
@@ -31,9 +31,18 @@
 
 #include "cpts.h"
 
+#define CPTS_SKB_TX_WORK_TIMEOUT 1 /* jiffies */
+
+struct cpts_skb_cb_data {
+	unsigned long tmo;
+};
+
 #define cpts_read32(c, r)	readl_relaxed(&c->reg->r)
 #define cpts_write32(c, v, r)	writel_relaxed(v, &c->reg->r)
 
+static int cpts_match(struct sk_buff *skb, unsigned int ptp_class,
+		      u16 ts_seqid, u8 ts_msgtype);
+
 static int event_expired(struct cpts_event *event)
 {
 	return time_after(jiffies, event->tmo);
@@ -77,6 +86,47 @@ static int cpts_purge_events(struct cpts *cpts)
 	return removed ? 0 : -1;
 }
 
+static bool cpts_match_tx_ts(struct cpts *cpts, struct cpts_event *event)
+{
+	struct sk_buff *skb, *tmp;
+	u16 seqid;
+	u8 mtype;
+	bool found = false;
+
+	mtype = (event->high >> MESSAGE_TYPE_SHIFT) & MESSAGE_TYPE_MASK;
+	seqid = (event->high >> SEQUENCE_ID_SHIFT) & SEQUENCE_ID_MASK;
+
+	/* no need to grab txq.lock as access is always done under cpts->lock */
+	skb_queue_walk_safe(&cpts->txq, skb, tmp) {
+		struct skb_shared_hwtstamps ssh;
+		unsigned int class = ptp_classify_raw(skb);
+		struct cpts_skb_cb_data *skb_cb =
+					(struct cpts_skb_cb_data *)skb->cb;
+
+		if (cpts_match(skb, class, seqid, mtype)) {
+			u64 ns = timecounter_cyc2time(&cpts->tc, event->low);
+
+			memset(&ssh, 0, sizeof(ssh));
+			ssh.hwtstamp = ns_to_ktime(ns);
+			skb_tstamp_tx(skb, &ssh);
+			found = true;
+			__skb_unlink(skb, &cpts->txq);
+			dev_consume_skb_any(skb);
+			dev_dbg(cpts->dev, "match tx timestamp mtype %u seqid %04x\n",
+				mtype, seqid);
+		} else if (time_after(jiffies, skb_cb->tmo)) {
+			/* timeout any expired skbs over 1s */
+			dev_dbg(cpts->dev,
+				"expiring tx timestamp mtype %u seqid %04x\n",
+				mtype, seqid);
+			__skb_unlink(skb, &cpts->txq);
+			dev_consume_skb_any(skb);
+		}
+	}
+
+	return found;
+}
+
 /*
  * Returns zero if matching event type was found.
  */
@@ -101,9 +151,15 @@ static int cpts_fifo_read(struct cpts *cpts, int match)
 		event->low = lo;
 		type = event_type(event);
 		switch (type) {
+		case CPTS_EV_TX:
+			if (cpts_match_tx_ts(cpts, event)) {
+				/* if the new event matches an existing skb,
+				 * then don't queue it
+				 */
+				break;
+			}
 		case CPTS_EV_PUSH:
 		case CPTS_EV_RX:
-		case CPTS_EV_TX:
 			list_del_init(&event->list);
 			list_add_tail(&event->list, &cpts->events);
 			break;
@@ -229,8 +285,15 @@ static long cpts_overflow_check(struct ptp_clock_info *ptp)
 	struct cpts *cpts = container_of(ptp, struct cpts, info);
 	unsigned long delay = cpts->ov_check_period;
 	struct timespec64 ts;
+	unsigned long flags;
+
+	spin_lock_irqsave(&cpts->lock, flags);
+	ts = ns_to_timespec64(timecounter_read(&cpts->tc));
+
+	if (!skb_queue_empty(&cpts->txq))
+		delay = CPTS_SKB_TX_WORK_TIMEOUT;
+	spin_unlock_irqrestore(&cpts->lock, flags);
 
-	cpts_ptp_gettime(&cpts->info, &ts);
 	pr_debug("cpts overflow check at %lld.%09lu\n", ts.tv_sec, ts.tv_nsec);
 	return (long)delay;
 }
@@ -319,6 +382,19 @@ static u64 cpts_find_ts(struct cpts *cpts, struct sk_buff *skb, int ev_type)
 			break;
 		}
 	}
+
+	if (ev_type == CPTS_EV_TX && !ns) {
+		struct cpts_skb_cb_data *skb_cb =
+				(struct cpts_skb_cb_data *)skb->cb;
+		/* Not found, add frame to queue for processing later.
+		 * The periodic FIFO check will handle this.
+		 */
+		skb_get(skb);
+		/* get the timestamp for timeouts */
+		skb_cb->tmo = jiffies + msecs_to_jiffies(100);
+		__skb_queue_tail(&cpts->txq, skb);
+		ptp_schedule_worker(cpts->clock, 0);
+	}
 	spin_unlock_irqrestore(&cpts->lock, flags);
 
 	return ns;
@@ -360,6 +436,7 @@ int cpts_register(struct cpts *cpts)
 {
 	int err, i;
 
+	skb_queue_head_init(&cpts->txq);
 	INIT_LIST_HEAD(&cpts->events);
 	INIT_LIST_HEAD(&cpts->pool);
 	for (i = 0; i < CPTS_MAX_EVENTS; i++)
@@ -400,6 +477,9 @@ void cpts_unregister(struct cpts *cpts)
 	cpts_write32(cpts, 0, int_enable);
 	cpts_write32(cpts, 0, control);
 
+	/* Drop all packet */
+	skb_queue_purge(&cpts->txq);
+
 	clk_disable(cpts->refclk);
 }
 EXPORT_SYMBOL_GPL(cpts_unregister);
diff --git a/drivers/net/ethernet/ti/cpts.h b/drivers/net/ethernet/ti/cpts.h
index 586edd9c7de0..73d73faf0f38 100644
--- a/drivers/net/ethernet/ti/cpts.h
+++ b/drivers/net/ethernet/ti/cpts.h
@@ -125,6 +125,7 @@ struct cpts {
 	struct list_head pool;
 	struct cpts_event pool_data[CPTS_MAX_EVENTS];
 	unsigned long ov_check_period;
+	struct sk_buff_head txq;
 };
 
 void cpts_rx_timestamp(struct cpts *cpts, struct sk_buff *skb);

From a93439cce2c1d31469e8245c504dbb6d1bed8749 Mon Sep 17 00:00:00 2001
From: Grygorii Strashko <grygorii.strashko@ti.com>
Date: Fri, 28 Jul 2017 17:30:05 -0500
Subject: [PATCH 079/281] net: ethernet: ti: cpts: fix fifo read in
 cpts_find_ts

Now the call chain
 cpts_find_ts()
  |- cpts_fifo_read(cpts, CPTS_EV_PUSH)

will stop reading CPTS FIFO if PUSH event is found. But this is not
expected and CPTS FIFI should be completely drained here. This is most
probably copy-paste error and it has no negative impact as CPTS_EV_PUSH
should not be present in FIFO without TS_PUSH request and
cpts_systim_read() and cpts_find_ts() synchronized by spin_lock.

Correct above by calling cpts_fifo_read() with -1 parameter, so it will
read all CPTS event from FIFO.

Signed-off-by: Grygorii Strashko <grygorii.strashko@ti.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/ti/cpts.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/net/ethernet/ti/cpts.c b/drivers/net/ethernet/ti/cpts.c
index 95a0076773b3..c2121d214f08 100644
--- a/drivers/net/ethernet/ti/cpts.c
+++ b/drivers/net/ethernet/ti/cpts.c
@@ -364,7 +364,7 @@ static u64 cpts_find_ts(struct cpts *cpts, struct sk_buff *skb, int ev_type)
 		return 0;
 
 	spin_lock_irqsave(&cpts->lock, flags);
-	cpts_fifo_read(cpts, CPTS_EV_PUSH);
+	cpts_fifo_read(cpts, -1);
 	list_for_each_safe(this, next, &cpts->events) {
 		event = list_entry(this, struct cpts_event, list);
 		if (event_expired(event)) {

From 40413955ee265a5e42f710940ec78f5450d49149 Mon Sep 17 00:00:00 2001
From: "yujuan.qi" <yujuan.qi@mediatek.com>
Date: Mon, 31 Jul 2017 11:23:01 +0800
Subject: [PATCH 080/281] Cipso: cipso_v4_optptr enter infinite loop

in for(),if((optlen > 0) && (optptr[1] == 0)), enter infinite loop.

Test: receive a packet which the ip length > 20 and the first byte of ip option is 0, produce this issue

Signed-off-by: yujuan.qi <yujuan.qi@mediatek.com>
Acked-by: Paul Moore <paul@paul-moore.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/ipv4/cipso_ipv4.c | 12 ++++++++++--
 1 file changed, 10 insertions(+), 2 deletions(-)

diff --git a/net/ipv4/cipso_ipv4.c b/net/ipv4/cipso_ipv4.c
index c4c6e1969ed0..2ae8f54cb321 100644
--- a/net/ipv4/cipso_ipv4.c
+++ b/net/ipv4/cipso_ipv4.c
@@ -1523,9 +1523,17 @@ unsigned char *cipso_v4_optptr(const struct sk_buff *skb)
 	int taglen;
 
 	for (optlen = iph->ihl*4 - sizeof(struct iphdr); optlen > 0; ) {
-		if (optptr[0] == IPOPT_CIPSO)
+		switch (optptr[0]) {
+		case IPOPT_CIPSO:
 			return optptr;
-		taglen = optptr[1];
+		case IPOPT_END:
+			return NULL;
+		case IPOPT_NOOP:
+			taglen = 1;
+			break;
+		default:
+			taglen = optptr[1];
+		}
 		optlen -= taglen;
 		optptr += taglen;
 	}

From b3949a9a3e09f87e0371e80a2bef6ec0d48b575d Mon Sep 17 00:00:00 2001
From: Hans Verkuil <hverkuil@xs4all.nl>
Date: Sun, 30 Jul 2017 14:42:36 +0200
Subject: [PATCH 081/281] drm/msm: fix WARN_ON in add_vma() with no iommu

While I was testing the upcoming adv7533 CEC support with my Dragonboard c410
I encountered this warning several times during boot:

[    4.408309] WARNING: CPU: 3 PID: 1347 at drivers/gpu/drm/msm/msm_gem.c:312 add_vma+0x78/0x88 [msm]
[    4.412951] Modules linked in: snd_soc_hdmi_codec adv7511 cec qcom_wcnss_pil msm mdt_loader drm_kms_helper msm_rng rng_core drm
[    4.421728] CPU: 3 PID: 1347 Comm: kworker/3:3 Not tainted 4.13.0-rc1-dragonboard #111
[    4.433090] Hardware name: Qualcomm Technologies, Inc. APQ 8016 SBC (DT)
[    4.441081] Workqueue: events deferred_probe_work_func
[    4.447929] task: ffff800031243600 task.stack: ffff800003394000
[    4.453023] PC is at add_vma+0x78/0x88 [msm]
[    4.458823] LR is at _msm_gem_new+0xd4/0x188 [msm]
[    4.463207] pc : [<ffff000000ac01f8>] lr : [<ffff000000ac06b4>] pstate: 40000145
[    4.467811] sp : ffff8000033978a0
[    4.475357] x29: ffff8000033978a0 x28: ffff8000031dea18
[    4.478572] x27: ffff800003933a00 x26: ffff800003b39800
[    4.483953] x25: ffff8000338ff800 x24: 0000000000000001
[    4.489249] x23: 0000000000000000 x22: ffff800003b39800
[    4.494544] x21: ffff8000338ff800 x20: 0000000000000000
[    4.499839] x19: ffff800003932600 x18: 0000000000000001
[    4.505135] x17: 0000ffff8969e9e0 x16: ffff7e00000ce7a0
[    4.510429] x15: ffffffffffffffff x14: ffff8000833977ef
[    4.515724] x13: ffff8000033977f3 x12: 0000000000000038
[    4.521020] x11: 0101010101010101 x10: ffffff7f7fff7f7f
[    4.526315] x9 : 0000000000000000 x8 : ffff800003932800
[    4.531633] x7 : 0000000000000000 x6 : 000000000000003f
[    4.531644] x5 : 0000000000000040 x4 : 0000000000000000
[    4.531650] x3 : ffff800031243600 x2 : 0000000000000000
[    4.531655] x1 : 0000000000000000 x0 : 0000000000000000
[    4.531670] Call trace:
[    4.531676] Exception stack(0xffff8000033976c0 to 0xffff8000033977f0)
[    4.531683] 76c0: ffff800003932600 0001000000000000 ffff8000033978a0 ffff000000ac01f8
[    4.531688] 76e0: 0000000000000140 0000000000000000 ffff800003932550 ffff800003397780
[    4.531694] 7700: ffff800003397730 ffff000008261ce8 0000000000000000 ffff8000031d2f80
[    4.531699] 7720: ffff800003397800 ffff0000081d671c 0000000000000140 0000000000000000
[    4.531705] 7740: ffff000000ac04c0 0000000000004003 ffff800003397908 00000000014080c0
[    4.531710] 7760: 0000000000000000 ffff800003b39800 0000000000000000 0000000000000000
[    4.531716] 7780: 0000000000000000 ffff800031243600 0000000000000000 0000000000000040
[    4.531721] 77a0: 000000000000003f 0000000000000000 ffff800003932800 0000000000000000
[    4.531726] 77c0: ffffff7f7fff7f7f 0101010101010101 0000000000000038 ffff8000033977f3
[    4.531730] 77e0: ffff8000833977ef ffffffffffffffff
[    4.531881] [<ffff000000ac01f8>] add_vma+0x78/0x88 [msm]
[    4.532011] [<ffff000000ac06b4>] _msm_gem_new+0xd4/0x188 [msm]
[    4.532134] [<ffff000000ac1900>] msm_gem_new+0x10/0x18 [msm]
[    4.532260] [<ffff000000acb274>] msm_dsi_host_modeset_init+0x17c/0x268 [msm]
[    4.532384] [<ffff000000ac9024>] msm_dsi_modeset_init+0x34/0x1b8 [msm]
[    4.532504] [<ffff000000ab6168>] modeset_init+0x408/0x488 [msm]
[    4.532623] [<ffff000000ab6c4c>] mdp5_kms_init+0x2b4/0x338 [msm]
[    4.532745] [<ffff000000abeff8>] msm_drm_bind+0x218/0x4e8 [msm]
[    4.532755] [<ffff00000855d744>] try_to_bring_up_master+0x1f4/0x318
[    4.532762] [<ffff00000855d900>] component_add+0x98/0x180
[    4.532887] [<ffff000000ac8da0>] dsi_dev_probe+0x18/0x28 [msm]
[    4.532895] [<ffff000008565fe8>] platform_drv_probe+0x58/0xc0
[    4.532901] [<ffff00000856410c>] driver_probe_device+0x324/0x458
[    4.532907] [<ffff00000856440c>] __device_attach_driver+0xac/0x170
[    4.532913] [<ffff000008561ef4>] bus_for_each_drv+0x4c/0x98
[    4.532918] [<ffff000008563c38>] __device_attach+0xc0/0x160
[    4.532924] [<ffff000008564530>] device_initial_probe+0x10/0x18
[    4.532929] [<ffff000008562f84>] bus_probe_device+0x94/0xa0
[    4.532934] [<ffff0000085635d4>] deferred_probe_work_func+0x8c/0xe8
[    4.532941] [<ffff0000080d79bc>] process_one_work+0x1d4/0x330
[    4.532946] [<ffff0000080d7b60>] worker_thread+0x48/0x468
[    4.532952] [<ffff0000080ddae4>] kthread+0x12c/0x130
[    4.532958] [<ffff000008082f10>] ret_from_fork+0x10/0x40
[    4.532962] ---[ end trace b1ac6888ec40b0bb ]---

Signed-off-by: Hans Verkuil <hans.verkuil@cisco.com>
Signed-off-by: Rob Clark <robdclark@gmail.com>
---
 drivers/gpu/drm/msm/msm_gem.c | 4 ++++
 1 file changed, 4 insertions(+)

diff --git a/drivers/gpu/drm/msm/msm_gem.c b/drivers/gpu/drm/msm/msm_gem.c
index 065d933df2c3..a0c60e738db8 100644
--- a/drivers/gpu/drm/msm/msm_gem.c
+++ b/drivers/gpu/drm/msm/msm_gem.c
@@ -930,8 +930,12 @@ static struct drm_gem_object *_msm_gem_new(struct drm_device *dev,
 	if (use_vram) {
 		struct msm_gem_vma *vma;
 		struct page **pages;
+		struct msm_gem_object *msm_obj = to_msm_bo(obj);
+
+		mutex_lock(&msm_obj->lock);
 
 		vma = add_vma(obj, NULL);
+		mutex_unlock(&msm_obj->lock);
 		if (IS_ERR(vma)) {
 			ret = PTR_ERR(vma);
 			goto fail;

From 79687057c2880d871451f107548187e4853c38e6 Mon Sep 17 00:00:00 2001
From: Hans Verkuil <hverkuil@xs4all.nl>
Date: Sun, 30 Jul 2017 14:46:56 +0200
Subject: [PATCH 082/281] drm/msm: NULL pointer dereference in
 drivers/gpu/drm/msm/msm_gem_vma.c

While I was testing the upcoming adv7533 CEC support with my Dragonboard c410
I encountered this NULL pointer dereference:

[   17.912822] Unable to handle kernel NULL pointer dereference at virtual address 000000e8
[   17.917191] user pgtable: 4k pages, 48-bit VAs, pgd = ffff800030e9f000
[   17.925249] [00000000000000e8] *pgd=00000000b0daf003, *pud=0000000000000000
[   17.931650] Internal error: Oops: 96000005 [#1] PREEMPT SMP
[   17.938395] Modules linked in: btqcomsmd btqca arc4 wcn36xx mac80211 bluetooth cfg80211 ecdh_generic r8152 snd_soc_hdmi_codec adv7511 cec
qcom_wcnss_pil msm mdt_loader drm_kms_helper msm_rng rng_core drm
[   17.943967] CPU: 0 PID: 1684 Comm: Xorg Tainted: G        W       4.13.0-rc1-dragonboard #111
[   17.962005] Hardware name: Qualcomm Technologies, Inc. APQ 8016 SBC (DT)
[   17.970685] task: ffff800031236c00 task.stack: ffff800033fbc000
[   17.977582] PC is at msm_gem_unmap_vma+0x20/0x80 [msm]
[   17.983213] LR is at put_iova+0x60/0xb8 [msm]
[   17.988303] pc : [<ffff000000ac2d58>] lr : [<ffff000000ac07c8>] pstate: 20000145
[   17.992733] sp : ffff800033fbfb30
[   18.000193] x29: ffff800033fbfb30 x28: ffff800030b5f000
[   18.003407] x27: 00000000000000b4 x26: ffff0000009f8cd8
[   18.008789] x25: 0000000000000004 x24: dead000000000100
[   18.014085] x23: dead000000000200 x22: ffff800030b5fd40
[   18.019379] x21: ffff800030b5fc00 x20: 0000000000000000
[   18.024675] x19: ffff80003082bf00 x18: 0000000000000000
[   18.029970] x17: 0000ffffb3347e70 x16: ffff000008207638
[   18.035265] x15: 0000000000000053 x14: 0000000000000000
[   18.040560] x13: 0000000000000038 x12: 0101010101010101
[   18.045855] x11: 7f7f7f7f7f7f7f7f x10: 0000000000000040
[   18.051150] x9 : ffff800030b5f038 x8 : ffff800031657b50
[   18.056446] x7 : ffff800031657b78 x6 : 0000000000000000
[   18.061740] x5 : 0000000000000000 x4 : 00000000b5c01000
[   18.067036] x3 : 0000000000000000 x2 : ffff8000337bf300
[   18.072330] x1 : ffff80003082bf00 x0 : 0000000000000000
[   18.077629] Process Xorg (pid: 1684, stack limit = 0xffff800033fbc000)
[   18.082925] Stack: (0xffff800033fbfb30 to 0xffff800033fc0000)
[   18.089262] fb20:                                   ffff800033fbfb60 ffff000000ac07c8
[   18.095081] fb40: ffff80003082bf00 ffff800030b5fc90 ffff800030b5fc00 ffff000000abf4a0
[   18.102893] fb60: ffff800033fbfba0 ffff000000ac16b0 ffff800030b5fc00 ffff8000338ff870
[   18.110706] fb80: ffff8000338ff800 ffff800030b5fc00 ffff800030b5fda8 ffff800033fbfd80
[   18.118518] fba0: ffff800033fbfbe0 ffff0000009d4244 ffff800030b5fc00 ffff800030b5f038
[   18.126332] fbc0: ffff800033fbfbd0 ffff800030b5fc00 ffff800030b5f038 ffff0000009d4840
[   18.134144] fbe0: ffff800033fbfbf0 ffff0000009d4858 ffff800033fbfc10 ffff0000009d48e4
[   18.141955] fc00: ffff800030b5fc00 ffff8000338ffd98 ffff800033fbfc30 ffff0000009d49a4
[   18.149768] fc20: ffff800030b5fc00 ffff800030b5f000 ffff800033fbfc60 ffff0000009d4a4c
[   18.157581] fc40: ffff800030b5f050 ffff800030b5f000 0000000000000001 ffff800030b5fc00
[   18.165394] fc60: ffff800033fbfca0 ffff0000009d4ab0 0000000000000018 ffff800030b5f000
[   18.173206] fc80: ffff0000009efd28 ffff800033fbfd80 ffff8000338ff800 ffff0000009d56a8
[   18.181019] fca0: ffff800033fbfcb0 ffff0000009efd54 ffff800033fbfcc0 ffff0000009d56c8
[   18.188831] fcc0: ffff800033fbfd00 ffff0000009d58e0 ffff0000009fa6e0 00000000c00464b4
[   18.196643] fce0: 0000000000000004 ffff80003082b400 0000ffffea1f0e00 0000000000000000
[   18.204456] fd00: ffff800033fbfe00 ffff000008206f0c ffff80000335caf8 ffff80003082b400
[   18.212269] fd20: 0000ffffea1f0e00 ffff80003082b400 00000000c00464b4 0000ffffea1f0e00
[   18.220081] fd40: 0000000000000124 000000000000001d ffff0000089d2000 ffff800031236c00
[   18.227894] fd60: ffff800033fbfd80 0000000000000004 ffff0000009efd28 ffff800033fbfd80
[   18.235706] fd80: 0000000100000001 0000008000000001 0000001800000020 0000000000000001
[   18.243518] fda0: 0000000100000000 0000000100000001 0000ffff00000000 0000ffff00000000
[   18.251331] fdc0: 0000000000000124 0000000000000038 ffff0000089d2000 ffff800031236c00
[   18.259144] fde0: ffff800033fbfe40 ffff000008214124 ffff800033fbfe30 ffff000008203290
[   18.266956] fe00: ffff800033fbfe80 ffff0000082076b4 0000000000000000 ffff800030d8a000
[   18.274768] fe20: ffff80003082b400 0000000000000016 ffff800033fbfe50 ffff0000081f0488
[   18.282581] fe40: ffff800033fbfe80 ffff000008207678 0000000000000000 ffff80003082b400
[   18.290393] fe60: ffff800033fbfe70 ffff0000082138b0 ffff800033fbfe80 ffff000008207658
[   18.298207] fe80: 0000000000000000 ffff000008082f84 0000000000000000 0000800034a16000
[   18.306017] fea0: ffffffffffffffff 0000ffffb3347e7c 0000000000000000 0000000000000015
[   18.313832] fec0: 0000000000000016 00000000c00464b4 0000ffffea1f0e00 0000000000000001
[   18.321643] fee0: 0000000000000020 0000000000000080 0000000000000001 0000000000000000
[   18.329456] ff00: 000000000000001d 000000012692c5b0 0101010101010101 7f7f7f7f7f7f7f7f
[   18.337269] ff20: 0101010101010101 0000000000000038 0000000000000000 0000000000000053
[   18.345082] ff40: 0000ffffb368b2b8 0000ffffb3347e70 0000000000000000 0000ffffb3847000
[   18.352894] ff60: 0000ffffea1f0e00 00000000c00464b4 0000000000000016 0000ffffea1f0edc
[   18.360705] ff80: 000000012692ad20 0000000000000003 00000001214282e4 0000000121428388
[   18.368518] ffa0: 0000000000000000 0000ffffea1f0da0 0000ffffb367185c 0000ffffea1f0da0
[   18.376332] ffc0: 0000ffffb3347e7c 0000000000000000 0000000000000016 000000000000001d
[   18.384142] ffe0: 0000000000000000 0000000000000000 0000000000000000 0000000000000000
[   18.391953] Call trace:
[   18.399760] Exception stack(0xffff800033fbf950 to 0xffff800033fbfa80)
[   18.402023] f940:                                   ffff80003082bf00 0001000000000000
[   18.408622] f960: ffff800033fbfb30 ffff000000ac2d58 0000000020000145 ffff8000338ffa78
[   18.416435] f980: 0000000000000000 0000000000000000 ffff800033fbf9e0 ffff0000089afcf0
[   18.424248] f9a0: ffff80000348f230 ffff8000338ffa78 0000000000000000 0000000000000000
[   18.432060] f9c0: ffff8000338ffaa8 0000000000000001 ffff800033fbfb80 ffff0000009e8f38
[   18.439872] f9e0: ffff800033fbfa10 ffff0000089a9ff8 0000000000000027 ffff80003082b918
[   18.447684] fa00: 0000000000000000 ffff80003082bf00 ffff8000337bf300 0000000000000000
[   18.455497] fa20: 00000000b5c01000 0000000000000000 0000000000000000 ffff800031657b78
[   18.463310] fa40: ffff800031657b50 ffff800030b5f038 0000000000000040 7f7f7f7f7f7f7f7f
[   18.471122] fa60: 0101010101010101 0000000000000038 0000000000000000 0000000000000053
[   18.479062] [<ffff000000ac2d58>] msm_gem_unmap_vma+0x20/0x80 [msm]
[   18.486862] [<ffff000000ac07c8>] put_iova+0x60/0xb8 [msm]
[   18.492938] [<ffff000000ac16b0>] msm_gem_free_object+0x60/0x198 [msm]
[   18.498432] [<ffff0000009d4244>] drm_gem_object_free+0x1c/0x58 [drm]
[   18.504854] [<ffff0000009d4858>] drm_gem_object_put_unlocked+0x90/0xa0 [drm]
[   18.511273] [<ffff0000009d48e4>] drm_gem_object_handle_put_unlocked+0x64/0xd0 [drm]
[   18.518300] [<ffff0000009d49a4>] drm_gem_object_release_handle+0x54/0x98 [drm]
[   18.525679] [<ffff0000009d4a4c>] drm_gem_handle_delete+0x64/0xb8 [drm]
[   18.532968] [<ffff0000009d4ab0>] drm_gem_dumb_destroy+0x10/0x18 [drm]
[   18.539479] [<ffff0000009efd54>] drm_mode_destroy_dumb_ioctl+0x2c/0x40 [drm]
[   18.545992] [<ffff0000009d56c8>] drm_ioctl_kernel+0x68/0xe0 [drm]
[   18.553105] [<ffff0000009d58e0>] drm_ioctl+0x178/0x3b0 [drm]
[   18.558970] [<ffff000008206f0c>] do_vfs_ioctl+0xa4/0x7d0
[   18.564694] [<ffff0000082076b4>] SyS_ioctl+0x7c/0x98
[   18.569992] [<ffff000008082f84>] el0_svc_naked+0x38/0x3c
[   18.574941] Code: a90153f3 aa0003f4 f90013f5 aa0103f3 (f9407400)
[   18.580502] ---[ end trace b1ac6888ec40b0be ]---

It turns out that the aspace argument in msm_gem_unmap_vma() is NULL.

Signed-off-by: Hans Verkuil <hans.verkuil@cisco.com>
[Note: this case gets hit with !IOMMU config]
Signed-off-by: Rob Clark <robdclark@gmail.com>
---
 drivers/gpu/drm/msm/msm_gem_vma.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/gpu/drm/msm/msm_gem_vma.c b/drivers/gpu/drm/msm/msm_gem_vma.c
index c36321bc8714..d34e331554f3 100644
--- a/drivers/gpu/drm/msm/msm_gem_vma.c
+++ b/drivers/gpu/drm/msm/msm_gem_vma.c
@@ -42,7 +42,7 @@ void
 msm_gem_unmap_vma(struct msm_gem_address_space *aspace,
 		struct msm_gem_vma *vma, struct sg_table *sgt)
 {
-	if (!vma->iova)
+	if (!aspace || !vma->iova)
 		return;
 
 	if (aspace->mmu) {

From b0e77fd87cf302351e537881c8daf8d1c69cf541 Mon Sep 17 00:00:00 2001
From: Archit Taneja <architt@codeaurora.org>
Date: Fri, 28 Jul 2017 16:16:59 +0530
Subject: [PATCH 083/281] drm/msm/mdp5: Fix typo in encoder_enable path

The mdp5_cmd_encoder_disable is accidentally called in the encoder enable
path. We've not seen any problems since we haven't tested with command
mode panels in a while. Fix the copy-paste error.

Signed-off-by: Archit Taneja <architt@codeaurora.org>
Signed-off-by: Rob Clark <robdclark@gmail.com>
---
 drivers/gpu/drm/msm/mdp/mdp5/mdp5_encoder.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/gpu/drm/msm/mdp/mdp5/mdp5_encoder.c b/drivers/gpu/drm/msm/mdp/mdp5/mdp5_encoder.c
index 97f3294fbfc6..70bef51245af 100644
--- a/drivers/gpu/drm/msm/mdp/mdp5/mdp5_encoder.c
+++ b/drivers/gpu/drm/msm/mdp/mdp5/mdp5_encoder.c
@@ -299,7 +299,7 @@ static void mdp5_encoder_enable(struct drm_encoder *encoder)
 	struct mdp5_interface *intf = mdp5_encoder->intf;
 
 	if (intf->mode == MDP5_INTF_DSI_MODE_COMMAND)
-		mdp5_cmd_encoder_disable(encoder);
+		mdp5_cmd_encoder_enable(encoder);
 	else
 		mdp5_vid_encoder_enable(encoder);
 }

From d0538f5048fafe5633c58f25c3332f67739cdeb4 Mon Sep 17 00:00:00 2001
From: Archit Taneja <architt@codeaurora.org>
Date: Fri, 28 Jul 2017 16:17:00 +0530
Subject: [PATCH 084/281] drm/msm/mdp5: Drop clock names with "_clk" suffix

We have upstream bindings (msm8916) that have the "_clk" suffix in the
clock names. The downstream bindings also require it.

We want to drop the "_clk" suffix and at the same time support existing
bindings. Update the MDP5 code with the the msm_clk_get() helper to
support both old and new clock names.

Signed-off-by: Archit Taneja <architt@codeaurora.org>
Signed-off-by: Rob Clark <robdclark@gmail.com>
---
 drivers/gpu/drm/msm/mdp/mdp5/mdp5_kms.c | 12 ++++++------
 1 file changed, 6 insertions(+), 6 deletions(-)

diff --git a/drivers/gpu/drm/msm/mdp/mdp5/mdp5_kms.c b/drivers/gpu/drm/msm/mdp/mdp5/mdp5_kms.c
index 5d13fa5381ee..1c603aef3c59 100644
--- a/drivers/gpu/drm/msm/mdp/mdp5/mdp5_kms.c
+++ b/drivers/gpu/drm/msm/mdp/mdp5/mdp5_kms.c
@@ -502,7 +502,7 @@ static int get_clk(struct platform_device *pdev, struct clk **clkp,
 		const char *name, bool mandatory)
 {
 	struct device *dev = &pdev->dev;
-	struct clk *clk = devm_clk_get(dev, name);
+	struct clk *clk = msm_clk_get(pdev, name);
 	if (IS_ERR(clk) && mandatory) {
 		dev_err(dev, "failed to get %s (%ld)\n", name, PTR_ERR(clk));
 		return PTR_ERR(clk);
@@ -887,21 +887,21 @@ static int mdp5_init(struct platform_device *pdev, struct drm_device *dev)
 	}
 
 	/* mandatory clocks: */
-	ret = get_clk(pdev, &mdp5_kms->axi_clk, "bus_clk", true);
+	ret = get_clk(pdev, &mdp5_kms->axi_clk, "bus", true);
 	if (ret)
 		goto fail;
-	ret = get_clk(pdev, &mdp5_kms->ahb_clk, "iface_clk", true);
+	ret = get_clk(pdev, &mdp5_kms->ahb_clk, "iface", true);
 	if (ret)
 		goto fail;
-	ret = get_clk(pdev, &mdp5_kms->core_clk, "core_clk", true);
+	ret = get_clk(pdev, &mdp5_kms->core_clk, "core", true);
 	if (ret)
 		goto fail;
-	ret = get_clk(pdev, &mdp5_kms->vsync_clk, "vsync_clk", true);
+	ret = get_clk(pdev, &mdp5_kms->vsync_clk, "vsync", true);
 	if (ret)
 		goto fail;
 
 	/* optional clocks: */
-	get_clk(pdev, &mdp5_kms->lut_clk, "lut_clk", false);
+	get_clk(pdev, &mdp5_kms->lut_clk, "lut", false);
 
 	/* we need to set a default rate before enabling.  Set a safe
 	 * rate first, then figure out hw revision, and then set a

From be73b3043bf465455d4c9b88f68e03b6447bcfb0 Mon Sep 17 00:00:00 2001
From: "K. Den" <den@klaipeden.com>
Date: Tue, 1 Aug 2017 01:05:20 +0900
Subject: [PATCH 085/281] vxlan: fix remcsum when GRO on and CHECKSUM_PARTIAL
 boundary is outer UDP

In the case that GRO is turned on and the original received packet is
CHECKSUM_PARTIAL, if the outer UDP header is exactly at the last
csum-unnecessary point, which for instance could occur if the packet
comes from another Linux guest on the same Linux host, we have to do
either remcsum_adjust or set up CHECKSUM_PARTIAL again with its
csum_start properly reset considering RCO.

However, since b7fe10e5ebac("gro: Fix remcsum offload to deal with frags
in GRO") that barrier in such case could be skipped if GRO turned on,
hence we pass over it and the inner L4 validation mistakenly reckons
it as a bad csum.

This patch makes remcsum_offload being reset at the same time of GRO
remcsum cleanup, so as to make it work in such case as before.

Fixes: b7fe10e5ebac ("gro: Fix remcsum offload to deal with frags in GRO")
Signed-off-by: Koichiro Den <den@klaipeden.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/vxlan.c | 1 +
 1 file changed, 1 insertion(+)

diff --git a/drivers/net/vxlan.c b/drivers/net/vxlan.c
index 96aa7e6cf214..e17baac70f43 100644
--- a/drivers/net/vxlan.c
+++ b/drivers/net/vxlan.c
@@ -623,6 +623,7 @@ static struct sk_buff **vxlan_gro_receive(struct sock *sk,
 
 out:
 	skb_gro_remcsum_cleanup(skb, &grc);
+	skb->remcsum_offload = 0;
 	NAPI_GRO_CB(skb)->flush |= flush;
 
 	return pp;

From 1bff8a0c1f8c236209ee369b7952751c04eaa71a Mon Sep 17 00:00:00 2001
From: "K. Den" <den@klaipeden.com>
Date: Tue, 1 Aug 2017 01:05:39 +0900
Subject: [PATCH 086/281] gue: fix remcsum when GRO on and CHECKSUM_PARTIAL
 boundary is outer UDP

In the case that GRO is turned on and the original received packet is
CHECKSUM_PARTIAL, if the outer UDP header is exactly at the last
csum-unnecessary point, which for instance could occur if the packet
comes from another Linux guest on the same Linux host, we have to do
either remcsum_adjust or set up CHECKSUM_PARTIAL again with its
csum_start properly reset considering RCO.

However, since b7fe10e5ebac ("gro: Fix remcsum offload to deal with frags
in GRO") that barrier in such case could be skipped if GRO turned on,
hence we pass over it and the inner L4 validation mistakenly reckons
it as a bad csum.

This patch makes remcsum_offload being reset at the same time of GRO
remcsum cleanup, so as to make it work in such case as before.

Fixes: b7fe10e5ebac ("gro: Fix remcsum offload to deal with frags in GRO")
Signed-off-by: Koichiro Den <den@klaipeden.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/ipv4/fou.c | 1 +
 1 file changed, 1 insertion(+)

diff --git a/net/ipv4/fou.c b/net/ipv4/fou.c
index 8e0257d01200..1540db65241a 100644
--- a/net/ipv4/fou.c
+++ b/net/ipv4/fou.c
@@ -450,6 +450,7 @@ out_unlock:
 out:
 	NAPI_GRO_CB(skb)->flush |= flush;
 	skb_gro_remcsum_cleanup(skb, &grc);
+	skb->remcsum_offload = 0;
 
 	return pp;
 }

From 3394f5618dfe8e686a2429aad75edf7ff6540911 Mon Sep 17 00:00:00 2001
From: Jordan Crouse <jcrouse@codeaurora.org>
Date: Thu, 27 Jul 2017 10:42:30 -0600
Subject: [PATCH 087/281] drm/msm: Remove some potentially blocked register
 ranges

The 0xf400 and 0xf800 ranges are in the RBBM_SECVID block which may
be protected from CPU access. Skip dumping them since they are minimally
useful for debugging and they aren't worth a system hang.

Signed-off-by: Jordan Crouse <jcrouse@codeaurora.org>
Signed-off-by: Rob Clark <robdclark@gmail.com>
---
 drivers/gpu/drm/msm/adreno/a5xx_gpu.c | 49 +++++++++++++--------------
 1 file changed, 24 insertions(+), 25 deletions(-)

diff --git a/drivers/gpu/drm/msm/adreno/a5xx_gpu.c b/drivers/gpu/drm/msm/adreno/a5xx_gpu.c
index b4b54f1c24bc..1f60a9a885b4 100644
--- a/drivers/gpu/drm/msm/adreno/a5xx_gpu.c
+++ b/drivers/gpu/drm/msm/adreno/a5xx_gpu.c
@@ -920,31 +920,30 @@ static const u32 a5xx_registers[] = {
 	0x0000, 0x0002, 0x0004, 0x0020, 0x0022, 0x0026, 0x0029, 0x002B,
 	0x002E, 0x0035, 0x0038, 0x0042, 0x0044, 0x0044, 0x0047, 0x0095,
 	0x0097, 0x00BB, 0x03A0, 0x0464, 0x0469, 0x046F, 0x04D2, 0x04D3,
-	0x04E0, 0x0533, 0x0540, 0x0555, 0xF400, 0xF400, 0xF800, 0xF807,
-	0x0800, 0x081A, 0x081F, 0x0841, 0x0860, 0x0860, 0x0880, 0x08A0,
-	0x0B00, 0x0B12, 0x0B15, 0x0B28, 0x0B78, 0x0B7F, 0x0BB0, 0x0BBD,
-	0x0BC0, 0x0BC6, 0x0BD0, 0x0C53, 0x0C60, 0x0C61, 0x0C80, 0x0C82,
-	0x0C84, 0x0C85, 0x0C90, 0x0C98, 0x0CA0, 0x0CA0, 0x0CB0, 0x0CB2,
-	0x2180, 0x2185, 0x2580, 0x2585, 0x0CC1, 0x0CC1, 0x0CC4, 0x0CC7,
-	0x0CCC, 0x0CCC, 0x0CD0, 0x0CD8, 0x0CE0, 0x0CE5, 0x0CE8, 0x0CE8,
-	0x0CEC, 0x0CF1, 0x0CFB, 0x0D0E, 0x2100, 0x211E, 0x2140, 0x2145,
-	0x2500, 0x251E, 0x2540, 0x2545, 0x0D10, 0x0D17, 0x0D20, 0x0D23,
-	0x0D30, 0x0D30, 0x20C0, 0x20C0, 0x24C0, 0x24C0, 0x0E40, 0x0E43,
-	0x0E4A, 0x0E4A, 0x0E50, 0x0E57, 0x0E60, 0x0E7C, 0x0E80, 0x0E8E,
-	0x0E90, 0x0E96, 0x0EA0, 0x0EA8, 0x0EB0, 0x0EB2, 0xE140, 0xE147,
-	0xE150, 0xE187, 0xE1A0, 0xE1A9, 0xE1B0, 0xE1B6, 0xE1C0, 0xE1C7,
-	0xE1D0, 0xE1D1, 0xE200, 0xE201, 0xE210, 0xE21C, 0xE240, 0xE268,
-	0xE000, 0xE006, 0xE010, 0xE09A, 0xE0A0, 0xE0A4, 0xE0AA, 0xE0EB,
-	0xE100, 0xE105, 0xE380, 0xE38F, 0xE3B0, 0xE3B0, 0xE400, 0xE405,
-	0xE408, 0xE4E9, 0xE4F0, 0xE4F0, 0xE280, 0xE280, 0xE282, 0xE2A3,
-	0xE2A5, 0xE2C2, 0xE940, 0xE947, 0xE950, 0xE987, 0xE9A0, 0xE9A9,
-	0xE9B0, 0xE9B6, 0xE9C0, 0xE9C7, 0xE9D0, 0xE9D1, 0xEA00, 0xEA01,
-	0xEA10, 0xEA1C, 0xEA40, 0xEA68, 0xE800, 0xE806, 0xE810, 0xE89A,
-	0xE8A0, 0xE8A4, 0xE8AA, 0xE8EB, 0xE900, 0xE905, 0xEB80, 0xEB8F,
-	0xEBB0, 0xEBB0, 0xEC00, 0xEC05, 0xEC08, 0xECE9, 0xECF0, 0xECF0,
-	0xEA80, 0xEA80, 0xEA82, 0xEAA3, 0xEAA5, 0xEAC2, 0xA800, 0xA8FF,
-	0xAC60, 0xAC60, 0xB000, 0xB97F, 0xB9A0, 0xB9BF,
-	~0
+	0x04E0, 0x0533, 0x0540, 0x0555, 0x0800, 0x081A, 0x081F, 0x0841,
+	0x0860, 0x0860, 0x0880, 0x08A0, 0x0B00, 0x0B12, 0x0B15, 0x0B28,
+	0x0B78, 0x0B7F, 0x0BB0, 0x0BBD, 0x0BC0, 0x0BC6, 0x0BD0, 0x0C53,
+	0x0C60, 0x0C61, 0x0C80, 0x0C82, 0x0C84, 0x0C85, 0x0C90, 0x0C98,
+	0x0CA0, 0x0CA0, 0x0CB0, 0x0CB2, 0x2180, 0x2185, 0x2580, 0x2585,
+	0x0CC1, 0x0CC1, 0x0CC4, 0x0CC7, 0x0CCC, 0x0CCC, 0x0CD0, 0x0CD8,
+	0x0CE0, 0x0CE5, 0x0CE8, 0x0CE8, 0x0CEC, 0x0CF1, 0x0CFB, 0x0D0E,
+	0x2100, 0x211E, 0x2140, 0x2145, 0x2500, 0x251E, 0x2540, 0x2545,
+	0x0D10, 0x0D17, 0x0D20, 0x0D23, 0x0D30, 0x0D30, 0x20C0, 0x20C0,
+	0x24C0, 0x24C0, 0x0E40, 0x0E43, 0x0E4A, 0x0E4A, 0x0E50, 0x0E57,
+	0x0E60, 0x0E7C, 0x0E80, 0x0E8E, 0x0E90, 0x0E96, 0x0EA0, 0x0EA8,
+	0x0EB0, 0x0EB2, 0xE140, 0xE147, 0xE150, 0xE187, 0xE1A0, 0xE1A9,
+	0xE1B0, 0xE1B6, 0xE1C0, 0xE1C7, 0xE1D0, 0xE1D1, 0xE200, 0xE201,
+	0xE210, 0xE21C, 0xE240, 0xE268, 0xE000, 0xE006, 0xE010, 0xE09A,
+	0xE0A0, 0xE0A4, 0xE0AA, 0xE0EB, 0xE100, 0xE105, 0xE380, 0xE38F,
+	0xE3B0, 0xE3B0, 0xE400, 0xE405, 0xE408, 0xE4E9, 0xE4F0, 0xE4F0,
+	0xE280, 0xE280, 0xE282, 0xE2A3, 0xE2A5, 0xE2C2, 0xE940, 0xE947,
+	0xE950, 0xE987, 0xE9A0, 0xE9A9, 0xE9B0, 0xE9B6, 0xE9C0, 0xE9C7,
+	0xE9D0, 0xE9D1, 0xEA00, 0xEA01, 0xEA10, 0xEA1C, 0xEA40, 0xEA68,
+	0xE800, 0xE806, 0xE810, 0xE89A, 0xE8A0, 0xE8A4, 0xE8AA, 0xE8EB,
+	0xE900, 0xE905, 0xEB80, 0xEB8F, 0xEBB0, 0xEBB0, 0xEC00, 0xEC05,
+	0xEC08, 0xECE9, 0xECF0, 0xECF0, 0xEA80, 0xEA80, 0xEA82, 0xEAA3,
+	0xEAA5, 0xEAC2, 0xA800, 0xA8FF, 0xAC60, 0xAC60, 0xB000, 0xB97F,
+	0xB9A0, 0xB9BF, ~0
 };
 
 static void a5xx_dump(struct msm_gpu *gpu)

From 6e749e5971fc7c7a33d7a673fbe4944604b397cf Mon Sep 17 00:00:00 2001
From: Jordan Crouse <jcrouse@codeaurora.org>
Date: Thu, 27 Jul 2017 10:42:31 -0600
Subject: [PATCH 088/281] drm/msm: Allow hardware clock gating to be toggled

There are some use cases wherein we need to turn off hardware clock
gating before reading certain registers. Modify the A5XX HWCG function
to allow user to enable or disable clock gating at will.

Signed-off-by: Jordan Crouse <jcrouse@codeaurora.org>
Signed-off-by: Rob Clark <robdclark@gmail.com>
---
 drivers/gpu/drm/msm/adreno/a5xx_gpu.c | 42 ++++++---------------------
 drivers/gpu/drm/msm/adreno/a5xx_gpu.h |  1 +
 2 files changed, 10 insertions(+), 33 deletions(-)

diff --git a/drivers/gpu/drm/msm/adreno/a5xx_gpu.c b/drivers/gpu/drm/msm/adreno/a5xx_gpu.c
index 1f60a9a885b4..c1f8c20414f1 100644
--- a/drivers/gpu/drm/msm/adreno/a5xx_gpu.c
+++ b/drivers/gpu/drm/msm/adreno/a5xx_gpu.c
@@ -117,12 +117,10 @@ static void a5xx_submit(struct msm_gpu *gpu, struct msm_gem_submit *submit,
 	gpu->funcs->flush(gpu);
 }
 
-struct a5xx_hwcg {
+static const struct {
 	u32 offset;
 	u32 value;
-};
-
-static const struct a5xx_hwcg a530_hwcg[] = {
+} a5xx_hwcg[] = {
 	{REG_A5XX_RBBM_CLOCK_CNTL_SP0, 0x02222222},
 	{REG_A5XX_RBBM_CLOCK_CNTL_SP1, 0x02222222},
 	{REG_A5XX_RBBM_CLOCK_CNTL_SP2, 0x02222222},
@@ -217,38 +215,16 @@ static const struct a5xx_hwcg a530_hwcg[] = {
 	{REG_A5XX_RBBM_CLOCK_DELAY_VFD, 0x00002222}
 };
 
-static const struct {
-	int (*test)(struct adreno_gpu *gpu);
-	const struct a5xx_hwcg *regs;
-	unsigned int count;
-} a5xx_hwcg_regs[] = {
-	{ adreno_is_a530, a530_hwcg, ARRAY_SIZE(a530_hwcg), },
-};
-
-static void _a5xx_enable_hwcg(struct msm_gpu *gpu,
-		const struct a5xx_hwcg *regs, unsigned int count)
+void a5xx_set_hwcg(struct msm_gpu *gpu, bool state)
 {
 	unsigned int i;
 
-	for (i = 0; i < count; i++)
-		gpu_write(gpu, regs[i].offset, regs[i].value);
+	for (i = 0; i < ARRAY_SIZE(a5xx_hwcg); i++)
+		gpu_write(gpu, a5xx_hwcg[i].offset,
+			state ? a5xx_hwcg[i].value : 0);
 
-	gpu_write(gpu, REG_A5XX_RBBM_CLOCK_CNTL, 0xAAA8AA00);
-	gpu_write(gpu, REG_A5XX_RBBM_ISDB_CNT, 0x182);
-}
-
-static void a5xx_enable_hwcg(struct msm_gpu *gpu)
-{
-	struct adreno_gpu *adreno_gpu = to_adreno_gpu(gpu);
-	unsigned int i;
-
-	for (i = 0; i < ARRAY_SIZE(a5xx_hwcg_regs); i++) {
-		if (a5xx_hwcg_regs[i].test(adreno_gpu)) {
-			_a5xx_enable_hwcg(gpu, a5xx_hwcg_regs[i].regs,
-				a5xx_hwcg_regs[i].count);
-			return;
-		}
-	}
+	gpu_write(gpu, REG_A5XX_RBBM_CLOCK_CNTL, state ? 0xAAA8AA00 : 0);
+	gpu_write(gpu, REG_A5XX_RBBM_ISDB_CNT, state ? 0x182 : 0x180);
 }
 
 static int a5xx_me_init(struct msm_gpu *gpu)
@@ -545,7 +521,7 @@ static int a5xx_hw_init(struct msm_gpu *gpu)
 	gpu_write(gpu, REG_A5XX_RBBM_AHB_CNTL1, 0xA6FFFFFF);
 
 	/* Enable HWCG */
-	a5xx_enable_hwcg(gpu);
+	a5xx_set_hwcg(gpu, true);
 
 	gpu_write(gpu, REG_A5XX_RBBM_AHB_CNTL2, 0x0000003F);
 
diff --git a/drivers/gpu/drm/msm/adreno/a5xx_gpu.h b/drivers/gpu/drm/msm/adreno/a5xx_gpu.h
index 6638bc85645d..d24796f3a706 100644
--- a/drivers/gpu/drm/msm/adreno/a5xx_gpu.h
+++ b/drivers/gpu/drm/msm/adreno/a5xx_gpu.h
@@ -59,5 +59,6 @@ static inline int spin_usecs(struct msm_gpu *gpu, uint32_t usecs,
 }
 
 bool a5xx_idle(struct msm_gpu *gpu);
+void a5xx_set_hwcg(struct msm_gpu *gpu, bool state);
 
 #endif /* __A5XX_GPU_H__ */

From a23cb3b52fec73b22671bac65356d8c55bf37706 Mon Sep 17 00:00:00 2001
From: Jordan Crouse <jcrouse@codeaurora.org>
Date: Thu, 27 Jul 2017 10:42:32 -0600
Subject: [PATCH 089/281] drm/msm: Turn off hardware clock gating before
 reading A5XX registers

On A5XX GPU hardware clock gating needs to be turned off before
reading certain GPU registers via AHB. Turn off HWCG before calling
adreno_show() to safely dump all the registers without a system hang.

Signed-off-by: Jordan Crouse <jcrouse@codeaurora.org>
Signed-off-by: Rob Clark <robdclark@gmail.com>
---
 drivers/gpu/drm/msm/adreno/a5xx_gpu.c | 7 +++++++
 1 file changed, 7 insertions(+)

diff --git a/drivers/gpu/drm/msm/adreno/a5xx_gpu.c b/drivers/gpu/drm/msm/adreno/a5xx_gpu.c
index c1f8c20414f1..33763b005b7b 100644
--- a/drivers/gpu/drm/msm/adreno/a5xx_gpu.c
+++ b/drivers/gpu/drm/msm/adreno/a5xx_gpu.c
@@ -995,7 +995,14 @@ static void a5xx_show(struct msm_gpu *gpu, struct seq_file *m)
 {
 	seq_printf(m, "status:   %08x\n",
 			gpu_read(gpu, REG_A5XX_RBBM_STATUS));
+
+	/*
+	 * Temporarily disable hardware clock gating before going into
+	 * adreno_show to avoid issues while reading the registers
+	 */
+	a5xx_set_hwcg(gpu, false);
 	adreno_show(gpu, m);
+	a5xx_set_hwcg(gpu, true);
 }
 #endif
 

From b0135ab91af16be57e444e74023e48b1f379ab36 Mon Sep 17 00:00:00 2001
From: Jordan Crouse <jcrouse@codeaurora.org>
Date: Thu, 27 Jul 2017 10:42:34 -0600
Subject: [PATCH 090/281] drm/msm: args->fence should be args->flags

Fix a typo in msm_ioctl_gem_submit - check args->flags for the
MSM_SUBMIT_NO_IMPLICIT flag instead of args->fence.

Signed-off-by: Jordan Crouse <jcrouse@codeaurora.org>
Signed-off-by: Rob Clark <robdclark@gmail.com>
---
 drivers/gpu/drm/msm/msm_gem_submit.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/gpu/drm/msm/msm_gem_submit.c b/drivers/gpu/drm/msm/msm_gem_submit.c
index 8095658e8cb4..8a75c0bd8a78 100644
--- a/drivers/gpu/drm/msm/msm_gem_submit.c
+++ b/drivers/gpu/drm/msm/msm_gem_submit.c
@@ -451,7 +451,7 @@ int msm_ioctl_gem_submit(struct drm_device *dev, void *data,
 	if (ret)
 		goto out;
 
-	if (!(args->fence & MSM_SUBMIT_NO_IMPLICIT)) {
+	if (!(args->flags & MSM_SUBMIT_NO_IMPLICIT)) {
 		ret = submit_fence_sync(submit);
 		if (ret)
 			goto out;

From cdbc78ba702650b02b9e3e957dbaa725423bdf1e Mon Sep 17 00:00:00 2001
From: Jordan Crouse <jcrouse@codeaurora.org>
Date: Thu, 27 Jul 2017 10:42:35 -0600
Subject: [PATCH 091/281] drm/msm: Remove __user from __u64 data types

__user should be used to identify user pointers and not __u64
variables containing pointers.

Signed-off-by: Jordan Crouse <jcrouse@codeaurora.org>
Signed-off-by: Rob Clark <robdclark@gmail.com>
---
 include/uapi/drm/msm_drm.h | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/include/uapi/drm/msm_drm.h b/include/uapi/drm/msm_drm.h
index 26c54f6d595d..ad4eb2863e70 100644
--- a/include/uapi/drm/msm_drm.h
+++ b/include/uapi/drm/msm_drm.h
@@ -171,7 +171,7 @@ struct drm_msm_gem_submit_cmd {
 	__u32 size;           /* in, cmdstream size */
 	__u32 pad;
 	__u32 nr_relocs;      /* in, number of submit_reloc's */
-	__u64 __user relocs;  /* in, ptr to array of submit_reloc's */
+	__u64 relocs;         /* in, ptr to array of submit_reloc's */
 };
 
 /* Each buffer referenced elsewhere in the cmdstream submit (ie. the
@@ -215,8 +215,8 @@ struct drm_msm_gem_submit {
 	__u32 fence;          /* out */
 	__u32 nr_bos;         /* in, number of submit_bo's */
 	__u32 nr_cmds;        /* in, number of submit_cmd's */
-	__u64 __user bos;     /* in, ptr to array of submit_bo's */
-	__u64 __user cmds;    /* in, ptr to array of submit_cmd's */
+	__u64 bos;            /* in, ptr to array of submit_bo's */
+	__u64 cmds;           /* in, ptr to array of submit_cmd's */
 	__s32 fence_fd;       /* in/out fence fd (see MSM_SUBMIT_FENCE_FD_IN/OUT) */
 };
 

From 541de4c9c953438b677c31072e7762115ac11299 Mon Sep 17 00:00:00 2001
From: Archit Taneja <architt@codeaurora.org>
Date: Fri, 28 Jul 2017 16:17:08 +0530
Subject: [PATCH 092/281] drm/msm/adreno: Prevent unclocked access when
 retrieving timestamps

msm_gpu's get_timestamp() op (called by the MSM_GET_PARAM ioctl) can
result in register accesses. We need our power domain and clocks to
be active for that. Make sure they are enabled here.

Signed-off-by: Archit Taneja <architt@codeaurora.org>
Signed-off-by: Rob Clark <robdclark@gmail.com>
---
 drivers/gpu/drm/msm/adreno/adreno_gpu.c | 11 +++++++++--
 1 file changed, 9 insertions(+), 2 deletions(-)

diff --git a/drivers/gpu/drm/msm/adreno/adreno_gpu.c b/drivers/gpu/drm/msm/adreno/adreno_gpu.c
index f1ab2703674a..7414c6bbd582 100644
--- a/drivers/gpu/drm/msm/adreno/adreno_gpu.c
+++ b/drivers/gpu/drm/msm/adreno/adreno_gpu.c
@@ -48,8 +48,15 @@ int adreno_get_param(struct msm_gpu *gpu, uint32_t param, uint64_t *value)
 		*value = adreno_gpu->base.fast_rate;
 		return 0;
 	case MSM_PARAM_TIMESTAMP:
-		if (adreno_gpu->funcs->get_timestamp)
-			return adreno_gpu->funcs->get_timestamp(gpu, value);
+		if (adreno_gpu->funcs->get_timestamp) {
+			int ret;
+
+			pm_runtime_get_sync(&gpu->pdev->dev);
+			ret = adreno_gpu->funcs->get_timestamp(gpu, value);
+			pm_runtime_put_autosuspend(&gpu->pdev->dev);
+
+			return ret;
+		}
 		return -EINVAL;
 	default:
 		DBG("%s: invalid param: %u", gpu->name, param);

From bdab8e8b2bc89dce73b6bcabcd295806ce2e5ef9 Mon Sep 17 00:00:00 2001
From: Arnd Bergmann <arnd@arndb.de>
Date: Wed, 26 Jul 2017 17:52:44 +0200
Subject: [PATCH 093/281] drm/msm: gpu: call qcom_mdt interfaces only for
 ARCH_QCOM

When compile-testing for something other than ARCH_QCOM,
we run into a link error:

drivers/gpu/drm/msm/adreno/a5xx_gpu.o: In function `a5xx_hw_init':
a5xx_gpu.c:(.text.a5xx_hw_init+0x600): undefined reference to `qcom_mdt_get_size'
a5xx_gpu.c:(.text.a5xx_hw_init+0x93c): undefined reference to `qcom_mdt_load'

There is already an #ifdef that tries to check for CONFIG_QCOM_MDT_LOADER,
but that symbol is only meaningful when building for ARCH_QCOM.

This adds a compile-time check for ARCH_QCOM, and clarifies the
Kconfig select statement so we don't even try it for other targets.

The check for CONFIG_QCOM_MDT_LOADER can then go away, which also
improves compile-time coverage and makes the code a little nicer
to read.

Fixes: 7c65817e6d38 ("drm/msm: gpu: Enable zap shader for A5XX")
Acked-by: Jordan Crouse <jcrouse@codeaurora.org>
Acked-by: Bjorn Andersson <bjorn.andersson@linaro.org>
Signed-off-by: Arnd Bergmann <arnd@arndb.de>
Signed-off-by: Rob Clark <robdclark@gmail.com>
---
 drivers/gpu/drm/msm/Kconfig           |  2 +-
 drivers/gpu/drm/msm/adreno/a5xx_gpu.c | 11 +++--------
 2 files changed, 4 insertions(+), 9 deletions(-)

diff --git a/drivers/gpu/drm/msm/Kconfig b/drivers/gpu/drm/msm/Kconfig
index b638d192ce5e..99d39b2aefa6 100644
--- a/drivers/gpu/drm/msm/Kconfig
+++ b/drivers/gpu/drm/msm/Kconfig
@@ -5,7 +5,7 @@ config DRM_MSM
 	depends on ARCH_QCOM || (ARM && COMPILE_TEST)
 	depends on OF && COMMON_CLK
 	depends on MMU
-	select QCOM_MDT_LOADER
+	select QCOM_MDT_LOADER if ARCH_QCOM
 	select REGULATOR
 	select DRM_KMS_HELPER
 	select DRM_PANEL
diff --git a/drivers/gpu/drm/msm/adreno/a5xx_gpu.c b/drivers/gpu/drm/msm/adreno/a5xx_gpu.c
index 33763b005b7b..a3393e1dc497 100644
--- a/drivers/gpu/drm/msm/adreno/a5xx_gpu.c
+++ b/drivers/gpu/drm/msm/adreno/a5xx_gpu.c
@@ -26,8 +26,6 @@ static void a5xx_dump(struct msm_gpu *gpu);
 
 #define GPU_PAS_ID 13
 
-#if IS_ENABLED(CONFIG_QCOM_MDT_LOADER)
-
 static int zap_shader_load_mdt(struct device *dev, const char *fwname)
 {
 	const struct firmware *fw;
@@ -36,6 +34,9 @@ static int zap_shader_load_mdt(struct device *dev, const char *fwname)
 	void *mem_region = NULL;
 	int ret;
 
+	if (!IS_ENABLED(CONFIG_ARCH_QCOM))
+		return -EINVAL;
+
 	/* Request the MDT file for the firmware */
 	ret = request_firmware(&fw, fwname, dev);
 	if (ret) {
@@ -73,12 +74,6 @@ out:
 
 	return ret;
 }
-#else
-static int zap_shader_load_mdt(struct device *dev, const char *fwname)
-{
-	return -ENODEV;
-}
-#endif
 
 static void a5xx_submit(struct msm_gpu *gpu, struct msm_gem_submit *submit,
 	struct msm_file_private *ctx)

From 8f93e043d048b671c32c6f0a5102fefa800c4618 Mon Sep 17 00:00:00 2001
From: Arnd Bergmann <arnd@arndb.de>
Date: Wed, 26 Jul 2017 21:59:21 +0200
Subject: [PATCH 094/281] drm/msm: gpu: don't abuse dma_alloc for non-DMA
 allocations

In zap_shader_load_mdt(), we pass a pointer to a phys_addr_t
into dmam_alloc_coherent, which the compiler warns about:

drivers/gpu/drm/msm/adreno/a5xx_gpu.c: In function 'zap_shader_load_mdt':
drivers/gpu/drm/msm/adreno/a5xx_gpu.c:54:50: error: passing argument 3 of 'dmam_alloc_coherent' from incompatible pointer type [-Werror=incompatible-pointer-types]

The returned DMA address is later passed on to a function that
takes a phys_addr_t, so it's clearly wrong to use the DMA
mapping interface here: the memory may be uncached, or the
address may be completely wrong if there is an IOMMU connected
to the device. What the code actually wants to do is to get
the physical address from the reserved-mem node. It goes through
the dma-mapping interfaces for obscure reasons, and this
apparently only works by chance, relying on specific bugs
in the error handling of the arm64 dma-mapping implementation.

The same problem existed in the "venus" media driver, which was
now fixed by Stanimir Varbanov after long discussions.

In order to make some progress here, I have now ported his
approach over to the adreno driver. The patch is currently
untested, and should get a good review, but it is now much
simpler than the original, and it should be obvious what
goes wrong if I made a mistake in the port.

See also: a6e2d36bf6b7 ("media: venus: don't abuse dma_alloc for non-DMA allocations")
Cc: Stanimir Varbanov <stanimir.varbanov@linaro.org>
Fixes: 7c65817e6d38 ("drm/msm: gpu: Enable zap shader for A5XX")
Acked-by: Bjorn Andersson <bjorn.andersson@linaro.org>
Acked-and-Tested-by: Jordan Crouse <jcrouse@codeaurora.org>
Signed-off-by: Arnd Bergmann <arnd@arndb.de>
Signed-off-by: Rob Clark <robdclark@gmail.com>
---
 drivers/gpu/drm/msm/adreno/a5xx_gpu.c | 72 +++++++++------------------
 drivers/gpu/drm/msm/adreno/a5xx_gpu.h |  2 -
 2 files changed, 23 insertions(+), 51 deletions(-)

diff --git a/drivers/gpu/drm/msm/adreno/a5xx_gpu.c b/drivers/gpu/drm/msm/adreno/a5xx_gpu.c
index a3393e1dc497..f9eae03aa1dc 100644
--- a/drivers/gpu/drm/msm/adreno/a5xx_gpu.c
+++ b/drivers/gpu/drm/msm/adreno/a5xx_gpu.c
@@ -15,7 +15,7 @@
 #include <linux/cpumask.h>
 #include <linux/qcom_scm.h>
 #include <linux/dma-mapping.h>
-#include <linux/of_reserved_mem.h>
+#include <linux/of_address.h>
 #include <linux/soc/qcom/mdt_loader.h>
 #include "msm_gem.h"
 #include "msm_mmu.h"
@@ -29,6 +29,8 @@ static void a5xx_dump(struct msm_gpu *gpu);
 static int zap_shader_load_mdt(struct device *dev, const char *fwname)
 {
 	const struct firmware *fw;
+	struct device_node *np;
+	struct resource r;
 	phys_addr_t mem_phys;
 	ssize_t mem_size;
 	void *mem_region = NULL;
@@ -37,6 +39,21 @@ static int zap_shader_load_mdt(struct device *dev, const char *fwname)
 	if (!IS_ENABLED(CONFIG_ARCH_QCOM))
 		return -EINVAL;
 
+	np = of_get_child_by_name(dev->of_node, "zap-shader");
+	if (!np)
+		return -ENODEV;
+
+	np = of_parse_phandle(np, "memory-region", 0);
+	if (!np)
+		return -EINVAL;
+
+	ret = of_address_to_resource(np, 0, &r);
+	if (ret)
+		return ret;
+
+	mem_phys = r.start;
+	mem_size = resource_size(&r);
+
 	/* Request the MDT file for the firmware */
 	ret = request_firmware(&fw, fwname, dev);
 	if (ret) {
@@ -52,7 +69,7 @@ static int zap_shader_load_mdt(struct device *dev, const char *fwname)
 	}
 
 	/* Allocate memory for the firmware image */
-	mem_region = dmam_alloc_coherent(dev, mem_size, &mem_phys, GFP_KERNEL);
+	mem_region = memremap(mem_phys, mem_size,  MEMREMAP_WC);
 	if (!mem_region) {
 		ret = -ENOMEM;
 		goto out;
@@ -70,6 +87,9 @@ static int zap_shader_load_mdt(struct device *dev, const char *fwname)
 		DRM_DEV_ERROR(dev, "Unable to authorize the image\n");
 
 out:
+	if (mem_region)
+		memunmap(mem_region);
+
 	release_firmware(fw);
 
 	return ret;
@@ -348,45 +368,6 @@ static int a5xx_zap_shader_resume(struct msm_gpu *gpu)
 	return ret;
 }
 
-/* Set up a child device to "own" the zap shader */
-static int a5xx_zap_shader_dev_init(struct device *parent, struct device *dev)
-{
-	struct device_node *node;
-	int ret;
-
-	if (dev->parent)
-		return 0;
-
-	/* Find the sub-node for the zap shader */
-	node = of_get_child_by_name(parent->of_node, "zap-shader");
-	if (!node) {
-		DRM_DEV_ERROR(parent, "zap-shader not found in device tree\n");
-		return -ENODEV;
-	}
-
-	dev->parent = parent;
-	dev->of_node = node;
-	dev_set_name(dev, "adreno_zap_shader");
-
-	ret = device_register(dev);
-	if (ret) {
-		DRM_DEV_ERROR(parent, "Couldn't register zap shader device\n");
-		goto out;
-	}
-
-	ret = of_reserved_mem_device_init(dev);
-	if (ret) {
-		DRM_DEV_ERROR(parent, "Unable to set up the reserved memory\n");
-		device_unregister(dev);
-	}
-
-out:
-	if (ret)
-		dev->parent = NULL;
-
-	return ret;
-}
-
 static int a5xx_zap_shader_init(struct msm_gpu *gpu)
 {
 	static bool loaded;
@@ -415,11 +396,7 @@ static int a5xx_zap_shader_init(struct msm_gpu *gpu)
 		return -ENODEV;
 	}
 
-	ret = a5xx_zap_shader_dev_init(&pdev->dev, &a5xx_gpu->zap_dev);
-
-	if (!ret)
-		ret = zap_shader_load_mdt(&a5xx_gpu->zap_dev,
-			adreno_gpu->info->zapfw);
+	ret = zap_shader_load_mdt(&pdev->dev, adreno_gpu->info->zapfw);
 
 	loaded = !ret;
 
@@ -662,9 +639,6 @@ static void a5xx_destroy(struct msm_gpu *gpu)
 
 	DBG("%s", gpu->name);
 
-	if (a5xx_gpu->zap_dev.parent)
-		device_unregister(&a5xx_gpu->zap_dev);
-
 	if (a5xx_gpu->pm4_bo) {
 		if (a5xx_gpu->pm4_iova)
 			msm_gem_put_iova(a5xx_gpu->pm4_bo, gpu->aspace);
diff --git a/drivers/gpu/drm/msm/adreno/a5xx_gpu.h b/drivers/gpu/drm/msm/adreno/a5xx_gpu.h
index d24796f3a706..1137092241d5 100644
--- a/drivers/gpu/drm/msm/adreno/a5xx_gpu.h
+++ b/drivers/gpu/drm/msm/adreno/a5xx_gpu.h
@@ -36,8 +36,6 @@ struct a5xx_gpu {
 	uint32_t gpmu_dwords;
 
 	uint32_t lm_leakage;
-
-	struct device zap_dev;
 };
 
 #define to_a5xx_gpu(x) container_of(x, struct a5xx_gpu, base)

From a477b9cd37aa81a490dfa3265b7ff4f2c5a92463 Mon Sep 17 00:00:00 2001
From: Marc Zyngier <marc.zyngier@arm.com>
Date: Tue, 1 Aug 2017 20:11:02 -0500
Subject: [PATCH 095/281] PCI: Add pci_reset_function_locked()

The implementation of PCI workarounds may require that the device is reset
from its probe function.  This implies that the PCI device lock is already
held, and makes calling pci_reset_function() impossible (since it will
itself try to take that lock).

Add pci_reset_function_locked(), which is the equivalent of
pci_reset_function(), except that it requires the PCI device lock to be
already held by the caller.

Tested-by: Ard Biesheuvel <ard.biesheuvel@linaro.org>
Signed-off-by: Marc Zyngier <marc.zyngier@arm.com>
[bhelgaas: folded in fix for conflict with 52354b9d1f46 ("PCI: Remove
__pci_dev_reset() and pci_dev_reset()")]
Signed-off-by: Bjorn Helgaas <bhelgaas@google.com>
Cc: stable@vger.kernel.org	# 4.11: 52354b9d1f46: PCI: Remove __pci_dev_reset() and pci_dev_reset()
Cc: stable@vger.kernel.org	# 4.11
---
 drivers/pci/pci.c   | 35 +++++++++++++++++++++++++++++++++++
 include/linux/pci.h |  1 +
 2 files changed, 36 insertions(+)

diff --git a/drivers/pci/pci.c b/drivers/pci/pci.c
index af0cc3456dc1..b4b7eab29400 100644
--- a/drivers/pci/pci.c
+++ b/drivers/pci/pci.c
@@ -4259,6 +4259,41 @@ int pci_reset_function(struct pci_dev *dev)
 }
 EXPORT_SYMBOL_GPL(pci_reset_function);
 
+/**
+ * pci_reset_function_locked - quiesce and reset a PCI device function
+ * @dev: PCI device to reset
+ *
+ * Some devices allow an individual function to be reset without affecting
+ * other functions in the same device.  The PCI device must be responsive
+ * to PCI config space in order to use this function.
+ *
+ * This function does not just reset the PCI portion of a device, but
+ * clears all the state associated with the device.  This function differs
+ * from __pci_reset_function() in that it saves and restores device state
+ * over the reset.  It also differs from pci_reset_function() in that it
+ * requires the PCI device lock to be held.
+ *
+ * Returns 0 if the device function was successfully reset or negative if the
+ * device doesn't support resetting a single function.
+ */
+int pci_reset_function_locked(struct pci_dev *dev)
+{
+	int rc;
+
+	rc = pci_probe_reset_function(dev);
+	if (rc)
+		return rc;
+
+	pci_dev_save_and_disable(dev);
+
+	rc = __pci_reset_function_locked(dev);
+
+	pci_dev_restore(dev);
+
+	return rc;
+}
+EXPORT_SYMBOL_GPL(pci_reset_function_locked);
+
 /**
  * pci_try_reset_function - quiesce and reset a PCI device function
  * @dev: PCI device to reset
diff --git a/include/linux/pci.h b/include/linux/pci.h
index 4869e66dd659..a75c13673852 100644
--- a/include/linux/pci.h
+++ b/include/linux/pci.h
@@ -1067,6 +1067,7 @@ void pcie_flr(struct pci_dev *dev);
 int __pci_reset_function(struct pci_dev *dev);
 int __pci_reset_function_locked(struct pci_dev *dev);
 int pci_reset_function(struct pci_dev *dev);
+int pci_reset_function_locked(struct pci_dev *dev);
 int pci_try_reset_function(struct pci_dev *dev);
 int pci_probe_reset_slot(struct pci_slot *slot);
 int pci_reset_slot(struct pci_slot *slot);

From 6184cc8ddbb318758a000da68c5285fc2dd74338 Mon Sep 17 00:00:00 2001
From: Chuanxiao Dong <chuanxiao.dong@intel.com>
Date: Tue, 1 Aug 2017 17:47:25 +0800
Subject: [PATCH 096/281] drm/i915/gvt: change resetting to resetting_eng

Use resetting_eng to identify which engine is resetting
so the rest ones' workload won't be impacted

v2:
- use ENGINE_MASK(ring_id) instead of (1 << ring_id). (Zhenyu)

Signed-off-by: Chuanxiao Dong <chuanxiao.dong@intel.com>
Cc: Zhenyu Wang <zhenyuw@linux.intel.com>
Signed-off-by: Zhenyu Wang <zhenyuw@linux.intel.com>
---
 drivers/gpu/drm/i915/gvt/execlist.c  | 10 +++++-----
 drivers/gpu/drm/i915/gvt/gvt.h       |  2 +-
 drivers/gpu/drm/i915/gvt/scheduler.c |  3 ++-
 drivers/gpu/drm/i915/gvt/vgpu.c      |  8 +++++---
 4 files changed, 13 insertions(+), 10 deletions(-)

diff --git a/drivers/gpu/drm/i915/gvt/execlist.c b/drivers/gpu/drm/i915/gvt/execlist.c
index 700050556242..cac669b2b320 100644
--- a/drivers/gpu/drm/i915/gvt/execlist.c
+++ b/drivers/gpu/drm/i915/gvt/execlist.c
@@ -499,10 +499,10 @@ static void release_shadow_wa_ctx(struct intel_shadow_wa_ctx *wa_ctx)
 static int complete_execlist_workload(struct intel_vgpu_workload *workload)
 {
 	struct intel_vgpu *vgpu = workload->vgpu;
-	struct intel_vgpu_execlist *execlist =
-		&vgpu->execlist[workload->ring_id];
+	int ring_id = workload->ring_id;
+	struct intel_vgpu_execlist *execlist = &vgpu->execlist[ring_id];
 	struct intel_vgpu_workload *next_workload;
-	struct list_head *next = workload_q_head(vgpu, workload->ring_id)->next;
+	struct list_head *next = workload_q_head(vgpu, ring_id)->next;
 	bool lite_restore = false;
 	int ret;
 
@@ -512,10 +512,10 @@ static int complete_execlist_workload(struct intel_vgpu_workload *workload)
 	release_shadow_batch_buffer(workload);
 	release_shadow_wa_ctx(&workload->wa_ctx);
 
-	if (workload->status || vgpu->resetting)
+	if (workload->status || (vgpu->resetting_eng & ENGINE_MASK(ring_id)))
 		goto out;
 
-	if (!list_empty(workload_q_head(vgpu, workload->ring_id))) {
+	if (!list_empty(workload_q_head(vgpu, ring_id))) {
 		struct execlist_ctx_descriptor_format *this_desc, *next_desc;
 
 		next_workload = container_of(next,
diff --git a/drivers/gpu/drm/i915/gvt/gvt.h b/drivers/gpu/drm/i915/gvt/gvt.h
index 3a74e79eac2f..d96c41aa5aa7 100644
--- a/drivers/gpu/drm/i915/gvt/gvt.h
+++ b/drivers/gpu/drm/i915/gvt/gvt.h
@@ -149,7 +149,7 @@ struct intel_vgpu {
 	bool active;
 	bool pv_notified;
 	bool failsafe;
-	bool resetting;
+	unsigned int resetting_eng;
 	void *sched_data;
 	struct vgpu_sched_ctl sched_ctl;
 
diff --git a/drivers/gpu/drm/i915/gvt/scheduler.c b/drivers/gpu/drm/i915/gvt/scheduler.c
index 4f7057d62d88..22e08eb2d0b7 100644
--- a/drivers/gpu/drm/i915/gvt/scheduler.c
+++ b/drivers/gpu/drm/i915/gvt/scheduler.c
@@ -432,7 +432,8 @@ static void complete_current_workload(struct intel_gvt *gvt, int ring_id)
 
 		i915_gem_request_put(fetch_and_zero(&workload->req));
 
-		if (!workload->status && !vgpu->resetting) {
+		if (!workload->status && !(vgpu->resetting_eng &
+					   ENGINE_MASK(ring_id))) {
 			update_guest_context(workload);
 
 			for_each_set_bit(event, workload->pending_events,
diff --git a/drivers/gpu/drm/i915/gvt/vgpu.c b/drivers/gpu/drm/i915/gvt/vgpu.c
index 90c14e6e3ea0..3deadcbd5a24 100644
--- a/drivers/gpu/drm/i915/gvt/vgpu.c
+++ b/drivers/gpu/drm/i915/gvt/vgpu.c
@@ -480,11 +480,13 @@ void intel_gvt_reset_vgpu_locked(struct intel_vgpu *vgpu, bool dmlr,
 {
 	struct intel_gvt *gvt = vgpu->gvt;
 	struct intel_gvt_workload_scheduler *scheduler = &gvt->scheduler;
+	unsigned int resetting_eng = dmlr ? ALL_ENGINES : engine_mask;
 
 	gvt_dbg_core("------------------------------------------\n");
 	gvt_dbg_core("resseting vgpu%d, dmlr %d, engine_mask %08x\n",
 		     vgpu->id, dmlr, engine_mask);
-	vgpu->resetting = true;
+
+	vgpu->resetting_eng = resetting_eng;
 
 	intel_vgpu_stop_schedule(vgpu);
 	/*
@@ -497,7 +499,7 @@ void intel_gvt_reset_vgpu_locked(struct intel_vgpu *vgpu, bool dmlr,
 		mutex_lock(&gvt->lock);
 	}
 
-	intel_vgpu_reset_execlist(vgpu, dmlr ? ALL_ENGINES : engine_mask);
+	intel_vgpu_reset_execlist(vgpu, resetting_eng);
 
 	/* full GPU reset or device model level reset */
 	if (engine_mask == ALL_ENGINES || dmlr) {
@@ -520,7 +522,7 @@ void intel_gvt_reset_vgpu_locked(struct intel_vgpu *vgpu, bool dmlr,
 		}
 	}
 
-	vgpu->resetting = false;
+	vgpu->resetting_eng = 0;
 	gvt_dbg_core("reset vgpu%d done\n", vgpu->id);
 	gvt_dbg_core("------------------------------------------\n");
 }

From f2e2c00adcdc59b9fe86c82259abdaf32d0ee6ea Mon Sep 17 00:00:00 2001
From: Chuanxiao Dong <chuanxiao.dong@intel.com>
Date: Tue, 1 Aug 2017 17:47:26 +0800
Subject: [PATCH 097/281] drm/i915/gvt: clean workload queue if error happened

If a workload caused a HW GPU hang or it is in the middle of
vGPU reset, the workload queue should be cleaned up to emulate
the hang state of the GPU.

v2:
- use ENGINE_MASK(ring_id) instead of (1 << ring_id). (Zhenyu)

Signed-off-by: Chuanxiao Dong <chuanxiao.dong@intel.com>
Cc: Zhenyu Wang <zhenyuw@linux.intel.com>
Signed-off-by: Zhenyu Wang <zhenyuw@linux.intel.com>
---
 drivers/gpu/drm/i915/gvt/execlist.c | 19 ++++++++++++++++++-
 1 file changed, 18 insertions(+), 1 deletion(-)

diff --git a/drivers/gpu/drm/i915/gvt/execlist.c b/drivers/gpu/drm/i915/gvt/execlist.c
index cac669b2b320..1648887d3f55 100644
--- a/drivers/gpu/drm/i915/gvt/execlist.c
+++ b/drivers/gpu/drm/i915/gvt/execlist.c
@@ -46,6 +46,8 @@
 #define same_context(a, b) (((a)->context_id == (b)->context_id) && \
 		((a)->lrca == (b)->lrca))
 
+static void clean_workloads(struct intel_vgpu *vgpu, unsigned long engine_mask);
+
 static int context_switch_events[] = {
 	[RCS] = RCS_AS_CONTEXT_SWITCH,
 	[BCS] = BCS_AS_CONTEXT_SWITCH,
@@ -512,8 +514,23 @@ static int complete_execlist_workload(struct intel_vgpu_workload *workload)
 	release_shadow_batch_buffer(workload);
 	release_shadow_wa_ctx(&workload->wa_ctx);
 
-	if (workload->status || (vgpu->resetting_eng & ENGINE_MASK(ring_id)))
+	if (workload->status || (vgpu->resetting_eng & ENGINE_MASK(ring_id))) {
+		/* if workload->status is not successful means HW GPU
+		 * has occurred GPU hang or something wrong with i915/GVT,
+		 * and GVT won't inject context switch interrupt to guest.
+		 * So this error is a vGPU hang actually to the guest.
+		 * According to this we should emunlate a vGPU hang. If
+		 * there are pending workloads which are already submitted
+		 * from guest, we should clean them up like HW GPU does.
+		 *
+		 * if it is in middle of engine resetting, the pending
+		 * workloads won't be submitted to HW GPU and will be
+		 * cleaned up during the resetting process later, so doing
+		 * the workload clean up here doesn't have any impact.
+		 **/
+		clean_workloads(vgpu, ENGINE_MASK(ring_id));
 		goto out;
+	}
 
 	if (!list_empty(workload_q_head(vgpu, ring_id))) {
 		struct execlist_ctx_descriptor_format *this_desc, *next_desc;

From e43c9f23efadade684773a855675c99da278c862 Mon Sep 17 00:00:00 2001
From: Florian Fainelli <f.fainelli@gmail.com>
Date: Tue, 1 Aug 2017 12:11:06 -0700
Subject: [PATCH 098/281] b44: Initialize 64-bit stats seqcount

On 32-bit hosts and with CONFIG_DEBUG_LOCK_ALLOC we should be seeing a
lockdep splat indicating this seqcount is not correctly initialized, fix
that.

Fixes: eeda8585522b ("b44: add 64 bit stats")
Signed-off-by: Florian Fainelli <f.fainelli@gmail.com>
Acked-by: Michael Chan <michael.chan@broadcom.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/broadcom/b44.c | 1 +
 1 file changed, 1 insertion(+)

diff --git a/drivers/net/ethernet/broadcom/b44.c b/drivers/net/ethernet/broadcom/b44.c
index f411936b744c..a1125d10c825 100644
--- a/drivers/net/ethernet/broadcom/b44.c
+++ b/drivers/net/ethernet/broadcom/b44.c
@@ -2368,6 +2368,7 @@ static int b44_init_one(struct ssb_device *sdev,
 	bp->msg_enable = netif_msg_init(b44_debug, B44_DEF_MSG_ENABLE);
 
 	spin_lock_init(&bp->lock);
+	u64_stats_init(&bp->hw_stats.syncp);
 
 	bp->rx_pending = B44_DEF_RX_RING_PENDING;
 	bp->tx_pending = B44_DEF_TX_RING_PENDING;

From 7d6d067790289e4f61f59fa60550ca5918aa25bd Mon Sep 17 00:00:00 2001
From: Florian Fainelli <f.fainelli@gmail.com>
Date: Tue, 1 Aug 2017 12:11:07 -0700
Subject: [PATCH 099/281] i40e: Initialize 64-bit statistics TX ring seqcount

On 32-bit hosts and with CONFIG_DEBUG_LOCK_ALLOC we should be seeing a
lockdep splat indicating this seqcount is not correctly initialized, fix
that.

Fixes: 980e9b118642 ("i40e: Add support for 64 bit netstats")
Signed-off-by: Florian Fainelli <f.fainelli@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/intel/i40e/i40e_txrx.c | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/drivers/net/ethernet/intel/i40e/i40e_txrx.c b/drivers/net/ethernet/intel/i40e/i40e_txrx.c
index b936febc315a..2194960d5855 100644
--- a/drivers/net/ethernet/intel/i40e/i40e_txrx.c
+++ b/drivers/net/ethernet/intel/i40e/i40e_txrx.c
@@ -1113,6 +1113,8 @@ int i40e_setup_tx_descriptors(struct i40e_ring *tx_ring)
 	if (!tx_ring->tx_bi)
 		goto err;
 
+	u64_stats_init(&tx_ring->syncp);
+
 	/* round up to nearest 4K */
 	tx_ring->size = tx_ring->count * sizeof(struct i40e_tx_desc);
 	/* add u32 for head writeback, align after this takes care of

From 7c3a4626eb65e78ebe208f48ffa21a5002f7f38e Mon Sep 17 00:00:00 2001
From: Florian Fainelli <f.fainelli@gmail.com>
Date: Tue, 1 Aug 2017 12:11:08 -0700
Subject: [PATCH 100/281] ixgbe: Initialize 64-bit stats seqcounts

On 32-bit hosts and with CONFIG_DEBUG_LOCK_ALLOC we should be seeing a
lockdep splat indicating this seqcount is not correctly initialized, fix
that.

Fixes: 4197aa7bb818 ("ixgbevf: provide 64 bit statistics")
Signed-off-by: Florian Fainelli <f.fainelli@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/intel/ixgbevf/ixgbevf_main.c | 4 ++++
 1 file changed, 4 insertions(+)

diff --git a/drivers/net/ethernet/intel/ixgbevf/ixgbevf_main.c b/drivers/net/ethernet/intel/ixgbevf/ixgbevf_main.c
index 084c53582793..032f8ac06357 100644
--- a/drivers/net/ethernet/intel/ixgbevf/ixgbevf_main.c
+++ b/drivers/net/ethernet/intel/ixgbevf/ixgbevf_main.c
@@ -2988,6 +2988,8 @@ int ixgbevf_setup_tx_resources(struct ixgbevf_ring *tx_ring)
 	if (!tx_ring->tx_buffer_info)
 		goto err;
 
+	u64_stats_init(&tx_ring->syncp);
+
 	/* round up to nearest 4K */
 	tx_ring->size = tx_ring->count * sizeof(union ixgbe_adv_tx_desc);
 	tx_ring->size = ALIGN(tx_ring->size, 4096);
@@ -3046,6 +3048,8 @@ int ixgbevf_setup_rx_resources(struct ixgbevf_ring *rx_ring)
 	if (!rx_ring->rx_buffer_info)
 		goto err;
 
+	u64_stats_init(&rx_ring->syncp);
+
 	/* Round up to nearest 4K */
 	rx_ring->size = rx_ring->count * sizeof(union ixgbe_adv_rx_desc);
 	rx_ring->size = ALIGN(rx_ring->size, 4096);

From 7ceb781a1aa0356086e2bdc76bcd953fcb7ac377 Mon Sep 17 00:00:00 2001
From: Florian Fainelli <f.fainelli@gmail.com>
Date: Tue, 1 Aug 2017 12:11:09 -0700
Subject: [PATCH 101/281] nfp: Initialize RX and TX ring 64-bit stats seqcounts

On 32-bit hosts and with CONFIG_DEBUG_LOCK_ALLOC we should be seeing a
lockdep splat indicating this seqcount is not correctly initialized, fix
that.

Fixes: 4c3523623dc0 ("net: add driver for Netronome NFP4000/NFP6000 NIC VFs")
Signed-off-by: Florian Fainelli <f.fainelli@gmail.com>
Reviewed-by: Simon Horman <simon.horman@netronome.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/netronome/nfp/nfp_net_common.c | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/drivers/net/ethernet/netronome/nfp/nfp_net_common.c b/drivers/net/ethernet/netronome/nfp/nfp_net_common.c
index 18750ff0ede6..4631ca8b8eb2 100644
--- a/drivers/net/ethernet/netronome/nfp/nfp_net_common.c
+++ b/drivers/net/ethernet/netronome/nfp/nfp_net_common.c
@@ -513,6 +513,7 @@ nfp_net_tx_ring_init(struct nfp_net_tx_ring *tx_ring,
 	tx_ring->idx = idx;
 	tx_ring->r_vec = r_vec;
 	tx_ring->is_xdp = is_xdp;
+	u64_stats_init(&tx_ring->r_vec->tx_sync);
 
 	tx_ring->qcidx = tx_ring->idx * nn->stride_tx;
 	tx_ring->qcp_q = nn->tx_bar + NFP_QCP_QUEUE_OFF(tx_ring->qcidx);
@@ -532,6 +533,7 @@ nfp_net_rx_ring_init(struct nfp_net_rx_ring *rx_ring,
 
 	rx_ring->idx = idx;
 	rx_ring->r_vec = r_vec;
+	u64_stats_init(&rx_ring->r_vec->rx_sync);
 
 	rx_ring->fl_qcidx = rx_ring->idx * nn->stride_rx;
 	rx_ring->qcp_fl = nn->rx_bar + NFP_QCP_QUEUE_OFF(rx_ring->fl_qcidx);

From 790cb2ebb3f9c5d26a320117d5d13cafe479484d Mon Sep 17 00:00:00 2001
From: Florian Fainelli <f.fainelli@gmail.com>
Date: Tue, 1 Aug 2017 12:11:10 -0700
Subject: [PATCH 102/281] gtp: Initialize 64-bit per-cpu stats correctly

On 32-bit hosts and with CONFIG_DEBUG_LOCK_ALLOC we should be seeing a
lockdep splat indicating this seqcount is not correctly initialized, fix
that by using netdev_alloc_pcpu_stats() instead of an open coded
allocation.

Fixes: 459aa660eb1d ("gtp: add initial driver for datapath of GPRS Tunneling Protocol (GTP-U)")
Signed-off-by: Florian Fainelli <f.fainelli@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/gtp.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/net/gtp.c b/drivers/net/gtp.c
index 1542e837fdfa..f38e32a7ec9c 100644
--- a/drivers/net/gtp.c
+++ b/drivers/net/gtp.c
@@ -364,7 +364,7 @@ static int gtp_dev_init(struct net_device *dev)
 
 	gtp->dev = dev;
 
-	dev->tstats = alloc_percpu(struct pcpu_sw_netstats);
+	dev->tstats = netdev_alloc_pcpu_stats(struct pcpu_sw_netstats);
 	if (!dev->tstats)
 		return -ENOMEM;
 

From 4a0dee1ffe0e8f4101e704a325e97f8997b0abcc Mon Sep 17 00:00:00 2001
From: Florian Fainelli <f.fainelli@gmail.com>
Date: Tue, 1 Aug 2017 12:11:12 -0700
Subject: [PATCH 103/281] netvsc: Initialize 64-bit stats seqcount

On 32-bit hosts and with CONFIG_DEBUG_LOCK_ALLOC we should be seeing a
lockdep splat indicating this seqcount is not correctly initialized, fix
that. In commit 6c80f3fc2398 ("netvsc: report per-channel stats in
ethtool statistics") netdev_alloc_pcpu_stats() was removed in favor of
open-coding the 64-bits statistics, except that u64_stats_init() was
missed.

Fixes: 6c80f3fc2398 ("netvsc: report per-channel stats in ethtool statistics")
Signed-off-by: Florian Fainelli <f.fainelli@gmail.com>
Signed-off-by: Stephen Hemminger <sthemmin@microsoft.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/hyperv/netvsc.c | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/drivers/net/hyperv/netvsc.c b/drivers/net/hyperv/netvsc.c
index 0a9167dd72fb..96f90c75d1b7 100644
--- a/drivers/net/hyperv/netvsc.c
+++ b/drivers/net/hyperv/netvsc.c
@@ -1302,6 +1302,8 @@ int netvsc_device_add(struct hv_device *device,
 		struct netvsc_channel *nvchan = &net_device->chan_table[i];
 
 		nvchan->channel = device->channel;
+		u64_stats_init(&nvchan->tx_stats.syncp);
+		u64_stats_init(&nvchan->rx_stats.syncp);
 	}
 
 	/* Enable NAPI handler before init callbacks */

From 87173cd6cf242f2340db187d82bd47a48fe5e5fe Mon Sep 17 00:00:00 2001
From: Florian Fainelli <f.fainelli@gmail.com>
Date: Tue, 1 Aug 2017 12:11:13 -0700
Subject: [PATCH 104/281] ipvlan: Fix 64-bit statistics seqcount initialization

On 32-bit hosts and with CONFIG_DEBUG_LOCK_ALLOC we should be seeing a
lockdep splat indicating this seqcount is not correctly initialized, fix
that by using the proper helper function: netdev_alloc_pcpu_stats().

Fixes: 2ad7bf363841 ("ipvlan: Initial check-in of the IPVLAN driver.")
Signed-off-by: Florian Fainelli <f.fainelli@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ipvlan/ipvlan_main.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/net/ipvlan/ipvlan_main.c b/drivers/net/ipvlan/ipvlan_main.c
index f37e3c1fd4e7..8dab74a81303 100644
--- a/drivers/net/ipvlan/ipvlan_main.c
+++ b/drivers/net/ipvlan/ipvlan_main.c
@@ -192,7 +192,7 @@ static int ipvlan_init(struct net_device *dev)
 
 	netdev_lockdep_set_classes(dev);
 
-	ipvlan->pcpu_stats = alloc_percpu(struct ipvl_pcpu_stats);
+	ipvlan->pcpu_stats = netdev_alloc_pcpu_stats(struct ipvl_pcpu_stats);
 	if (!ipvlan->pcpu_stats)
 		return -ENOMEM;
 

From f7f8c1756e9a5f1258a7cc6b663f8451b724900f Mon Sep 17 00:00:00 2001
From: Miquel Raynal <miquel.raynal@free-electrons.com>
Date: Wed, 5 Jul 2017 08:51:09 +0200
Subject: [PATCH 105/281] nand: fix wrong default oob layout for small pages
 using soft ecc

When using soft ecc, if no ooblayout is given, the core automatically
uses one of the nand_ooblayout_{sp,lp}*() functions to determine the
layout inside the out of band data.

Until kernel version 4.6, struct nand_ecclayout was used for that
purpose. During the migration from 4.6 to 4.7, an error shown up in the
small page layout, in the case oob section is only 8 bytes long.

The layout was using three bytes (0, 1, 2) for ecc, two bytes (3, 4)
as free bytes, one byte (5) for bad block marker and finally
two bytes (6, 7) as free bytes, as shown there:

[linux-4.6] drivers/mtd/nand/nand_base.c:52
static struct nand_ecclayout nand_oob_8 = {
	.eccbytes = 3,
	.eccpos = {0, 1, 2},
	.oobfree = {
		{.offset = 3,
		 .length = 2},
		{.offset = 6,
		 .length = 2} }
};

This fixes the current implementation which is incoherent. It
references bit 3 at the same time as an ecc byte and a free byte.

Furthermore, it is clear with the previous implementation that there
is only one ecc section with 8 bytes oob sections. We shall return
-ERANGE in the nand_ooblayout_ecc_sp() function when asked for the
second section.

Signed-off-by: Miquel Raynal <miquel.raynal@free-electrons.com>
Fixes: 41b207a70d3a ("mtd: nand: implement the default mtd_ooblayout_ops")
Cc: <stable@vger.kernel.org>
Signed-off-by: Boris Brezillon <boris.brezillon@free-electrons.com>
---
 drivers/mtd/nand/nand_base.c | 8 +++++++-
 1 file changed, 7 insertions(+), 1 deletion(-)

diff --git a/drivers/mtd/nand/nand_base.c b/drivers/mtd/nand/nand_base.c
index 5fa5ddc94834..7b3826b42447 100644
--- a/drivers/mtd/nand/nand_base.c
+++ b/drivers/mtd/nand/nand_base.c
@@ -65,8 +65,14 @@ static int nand_ooblayout_ecc_sp(struct mtd_info *mtd, int section,
 
 	if (!section) {
 		oobregion->offset = 0;
-		oobregion->length = 4;
+		if (mtd->oobsize == 16)
+			oobregion->length = 4;
+		else
+			oobregion->length = 3;
 	} else {
+		if (mtd->oobsize == 8)
+			return -ERANGE;
+
 		oobregion->offset = 6;
 		oobregion->length = ecc->total - 4;
 	}

From 791eccd94965a8029ae09c5530bcb9a76794e408 Mon Sep 17 00:00:00 2001
From: Bryan O'Donoghue <pure.logic@nexus-software.ie>
Date: Fri, 28 Jul 2017 14:22:57 +0100
Subject: [PATCH 106/281] mtd: nand: sunxi: fix potential divide-by-zero error

clk_round_rate() can return <= 0. Currently the value returned by
clk_round_rate() is used directly for a division. This patch introduces a
guard to ensure a divide-by-zero or a divide by a negative number for that
matter can't happen by bugging out returning -EINVAL if clk_round_rate()
returns <= 0.

Fixes: 2d43457f79e4 ("mtd: nand: sunxi: fix EDO mode selection")
Signed-off-by: Bryan O'Donoghue <pure.logic@nexus-software.ie>
Signed-off-by: Boris Brezillon <boris.brezillon@free-electrons.com>
---
 drivers/mtd/nand/sunxi_nand.c | 4 ++++
 1 file changed, 4 insertions(+)

diff --git a/drivers/mtd/nand/sunxi_nand.c b/drivers/mtd/nand/sunxi_nand.c
index d0b6f8f9f297..6abd142b1324 100644
--- a/drivers/mtd/nand/sunxi_nand.c
+++ b/drivers/mtd/nand/sunxi_nand.c
@@ -1728,6 +1728,10 @@ static int sunxi_nfc_setup_data_interface(struct mtd_info *mtd, int csline,
 	 */
 	chip->clk_rate = NSEC_PER_SEC / min_clk_period;
 	real_clk_rate = clk_round_rate(nfc->mod_clk, chip->clk_rate);
+	if (real_clk_rate <= 0) {
+		dev_err(nfc->dev, "Unable to round clk %lu\n", chip->clk_rate);
+		return -EINVAL;
+	}
 
 	/*
 	 * ONFI specification 3.1, paragraph 4.15.2 dictates that EDO data

From cb25fae18207330caac848d850e11f9cdb500dbf Mon Sep 17 00:00:00 2001
From: Jonathan Corbet <corbet@lwn.net>
Date: Sun, 30 Jul 2017 16:18:03 -0600
Subject: [PATCH 107/281] mtd: nand: Fix a docs build warning

Commit 0b4773fd1649 (mtd: nand: Drop unused cached programming support)
removed the "cached" parameter from nand_write_page(), but did not update
the kerneldoc comments, creating this docs build warning:

  ./drivers/mtd/nand/nand_base.c:2751: warning: Excess function parameter 'cached' description in 'nand_write_page'

Remove the offending line so we can have a little peace and quiet.

Signed-off-by: Jonathan Corbet <corbet@lwn.net>
Signed-off-by: Boris Brezillon <boris.brezillon@free-electrons.com>
---
 drivers/mtd/nand/nand_base.c | 1 -
 1 file changed, 1 deletion(-)

diff --git a/drivers/mtd/nand/nand_base.c b/drivers/mtd/nand/nand_base.c
index 7b3826b42447..5b7c8d05360f 100644
--- a/drivers/mtd/nand/nand_base.c
+++ b/drivers/mtd/nand/nand_base.c
@@ -2747,7 +2747,6 @@ static int nand_write_page_syndrome(struct mtd_info *mtd,
  * @buf: the data to write
  * @oob_required: must write chip->oob_poi to OOB
  * @page: page number to write
- * @cached: cached programming
  * @raw: use _raw version of write_page
  */
 static int nand_write_page(struct mtd_info *mtd, struct nand_chip *chip,

From a11bf5ed951f8900d244d09eb03a888b59c7fc82 Mon Sep 17 00:00:00 2001
From: Boris Brezillon <boris.brezillon@free-electrons.com>
Date: Mon, 31 Jul 2017 10:29:56 +0200
Subject: [PATCH 108/281] mtd: nand: Fix timing setup for NANDs that do not
 support SET FEATURES

Some ONFI NANDs do not support the SET/GET FEATURES commands, which,
according to the spec, is perfectly valid.

On these NANDs we can't set a specific timing mode using the "timing
mode" feature, and we should assume the NAND does not require any setup
to enter a specific timing mode.

Signed-off-by: Boris Brezillon <boris.brezillon@free-electrons.com>
Fixes: d8e725dd8311 ("mtd: nand: automate NAND timings selection")
Reported-by: Alexander Dahl <ada@thorsis.com>
Cc: <stable@vger.kernel.org>
Tested-by: Alexander Dahl <ada@thorsis.com>
Signed-off-by: Boris Brezillon <boris.brezillon@free-electrons.com>
---
 drivers/mtd/nand/nand_base.c | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

diff --git a/drivers/mtd/nand/nand_base.c b/drivers/mtd/nand/nand_base.c
index 5b7c8d05360f..c6c18b82f8f4 100644
--- a/drivers/mtd/nand/nand_base.c
+++ b/drivers/mtd/nand/nand_base.c
@@ -1131,7 +1131,9 @@ static int nand_setup_data_interface(struct nand_chip *chip, int chipnr)
 	 * Ensure the timing mode has been changed on the chip side
 	 * before changing timings on the controller side.
 	 */
-	if (chip->onfi_version) {
+	if (chip->onfi_version &&
+	    (le16_to_cpu(chip->onfi_params.opt_cmd) &
+	     ONFI_OPT_CMD_SET_GET_FEATURES)) {
 		u8 tmode_param[ONFI_SUBFEATURE_PARAM_LEN] = {
 			chip->onfi_timing_mode_default,
 		};

From 6d29231000bbe0fb9e4893a9c68151ffdd3b5469 Mon Sep 17 00:00:00 2001
From: Boris Brezillon <boris.brezillon@free-electrons.com>
Date: Mon, 31 Jul 2017 10:31:27 +0200
Subject: [PATCH 109/281] mtd: nand: Declare tBERS, tR and tPROG as u64 to
 avoid integer overflow

All timings in nand_sdr_timings are expressed in picoseconds but some
of them may not fit in an u32.

Signed-off-by: Boris Brezillon <boris.brezillon@free-electrons.com>
Fixes: 204e7ecd47e2 ("mtd: nand: Add a few more timings to nand_sdr_timings")
Reported-by: Alexander Dahl <ada@thorsis.com>
Cc: <stable@vger.kernel.org>
Reviewed-by: Alexander Dahl <ada@thorsis.com>
Tested-by: Alexander Dahl <ada@thorsis.com>
Signed-off-by: Boris Brezillon <boris.brezillon@free-electrons.com>
---
 drivers/mtd/nand/nand_timings.c | 6 +++---
 include/linux/mtd/nand.h        | 6 +++---
 2 files changed, 6 insertions(+), 6 deletions(-)

diff --git a/drivers/mtd/nand/nand_timings.c b/drivers/mtd/nand/nand_timings.c
index f06312df3669..7e36d7d13c26 100644
--- a/drivers/mtd/nand/nand_timings.c
+++ b/drivers/mtd/nand/nand_timings.c
@@ -311,9 +311,9 @@ int onfi_init_data_interface(struct nand_chip *chip,
 		struct nand_sdr_timings *timings = &iface->timings.sdr;
 
 		/* microseconds -> picoseconds */
-		timings->tPROG_max = 1000000UL * le16_to_cpu(params->t_prog);
-		timings->tBERS_max = 1000000UL * le16_to_cpu(params->t_bers);
-		timings->tR_max = 1000000UL * le16_to_cpu(params->t_r);
+		timings->tPROG_max = 1000000ULL * le16_to_cpu(params->t_prog);
+		timings->tBERS_max = 1000000ULL * le16_to_cpu(params->t_bers);
+		timings->tR_max = 1000000ULL * le16_to_cpu(params->t_r);
 
 		/* nanoseconds -> picoseconds */
 		timings->tCCS_min = 1000UL * le16_to_cpu(params->t_ccs);
diff --git a/include/linux/mtd/nand.h b/include/linux/mtd/nand.h
index 892148c448cc..5216d2eb2289 100644
--- a/include/linux/mtd/nand.h
+++ b/include/linux/mtd/nand.h
@@ -681,10 +681,10 @@ struct nand_buffers {
  * @tWW_min: WP# transition to WE# low
  */
 struct nand_sdr_timings {
-	u32 tBERS_max;
+	u64 tBERS_max;
 	u32 tCCS_min;
-	u32 tPROG_max;
-	u32 tR_max;
+	u64 tPROG_max;
+	u64 tR_max;
 	u32 tALH_min;
 	u32 tADL_min;
 	u32 tALS_min;

From ee02f73e04c0e690600f621a3a1d2245834af7fe Mon Sep 17 00:00:00 2001
From: Boris Brezillon <boris.brezillon@free-electrons.com>
Date: Mon, 31 Jul 2017 10:32:21 +0200
Subject: [PATCH 110/281] mtd: nand: atmel: Fix EDO mode check

EDO mode should be used when tRC is less than 30ns, but timings are
expressed in picoseconds in the nand_sdr_timings struct.

Signed-off-by: Boris Brezillon <boris.brezillon@free-electrons.com>
Fixes: f9ce2eddf176 ("mtd: nand: atmel: Add ->setup_data_interface() hooks")
Reported-by: Alexander Dahl <ada@thorsis.com>
Tested-by: Alexander Dahl <ada@thorsis.com>
Signed-off-by: Boris Brezillon <boris.brezillon@free-electrons.com>
---
 drivers/mtd/nand/atmel/nand-controller.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/mtd/nand/atmel/nand-controller.c b/drivers/mtd/nand/atmel/nand-controller.c
index d922a88e407f..2c8baa0c2c4e 100644
--- a/drivers/mtd/nand/atmel/nand-controller.c
+++ b/drivers/mtd/nand/atmel/nand-controller.c
@@ -1201,7 +1201,7 @@ static int atmel_smc_nand_prepare_smcconf(struct atmel_nand *nand,
 	 * tRC < 30ns implies EDO mode. This controller does not support this
 	 * mode.
 	 */
-	if (conf->timings.sdr.tRC_min < 30)
+	if (conf->timings.sdr.tRC_min < 30000)
 		return -ENOTSUPP;
 
 	atmel_smc_cs_conf_init(smcconf);

From 1ad43c0078b79a76accd0fe64062e47b3430dc6b Mon Sep 17 00:00:00 2001
From: Ming Lei <minlei@redhat.com>
Date: Wed, 2 Aug 2017 08:01:45 +0800
Subject: [PATCH 111/281] blk-mq: don't leak preempt counter/q_usage_counter
 when allocating rq failed

When blk_mq_get_request() failed, preempt counter isn't
released, and blk_mq_make_request() doesn't release the counter
too.

This patch fixes the issue, and makes sure that preempt counter
is only held if rq is allocated successfully. The same policy is
applied on .q_usage_counter too.

Signed-off-by: Ming Lei <minlei@redhat.com>
Signed-off-by: Jens Axboe <axboe@kernel.dk>
---
 block/blk-mq.c | 17 +++++++++++------
 1 file changed, 11 insertions(+), 6 deletions(-)

diff --git a/block/blk-mq.c b/block/blk-mq.c
index 041f7b7fa0d6..211ef367345f 100644
--- a/block/blk-mq.c
+++ b/block/blk-mq.c
@@ -301,11 +301,12 @@ static struct request *blk_mq_get_request(struct request_queue *q,
 	struct elevator_queue *e = q->elevator;
 	struct request *rq;
 	unsigned int tag;
+	struct blk_mq_ctx *local_ctx = NULL;
 
 	blk_queue_enter_live(q);
 	data->q = q;
 	if (likely(!data->ctx))
-		data->ctx = blk_mq_get_ctx(q);
+		data->ctx = local_ctx = blk_mq_get_ctx(q);
 	if (likely(!data->hctx))
 		data->hctx = blk_mq_map_queue(q, data->ctx->cpu);
 	if (op & REQ_NOWAIT)
@@ -324,6 +325,10 @@ static struct request *blk_mq_get_request(struct request_queue *q,
 
 	tag = blk_mq_get_tag(data);
 	if (tag == BLK_MQ_TAG_FAIL) {
+		if (local_ctx) {
+			blk_mq_put_ctx(local_ctx);
+			data->ctx = NULL;
+		}
 		blk_queue_exit(q);
 		return NULL;
 	}
@@ -356,12 +361,12 @@ struct request *blk_mq_alloc_request(struct request_queue *q, unsigned int op,
 
 	rq = blk_mq_get_request(q, NULL, op, &alloc_data);
 
-	blk_mq_put_ctx(alloc_data.ctx);
-	blk_queue_exit(q);
-
 	if (!rq)
 		return ERR_PTR(-EWOULDBLOCK);
 
+	blk_mq_put_ctx(alloc_data.ctx);
+	blk_queue_exit(q);
+
 	rq->__data_len = 0;
 	rq->__sector = (sector_t) -1;
 	rq->bio = rq->biotail = NULL;
@@ -407,11 +412,11 @@ struct request *blk_mq_alloc_request_hctx(struct request_queue *q,
 
 	rq = blk_mq_get_request(q, NULL, op, &alloc_data);
 
-	blk_queue_exit(q);
-
 	if (!rq)
 		return ERR_PTR(-EWOULDBLOCK);
 
+	blk_queue_exit(q);
+
 	return rq;
 }
 EXPORT_SYMBOL_GPL(blk_mq_alloc_request_hctx);

From 8466489ef5ba48272ba4fa4ea9f8f403306de4c7 Mon Sep 17 00:00:00 2001
From: Marc Zyngier <marc.zyngier@arm.com>
Date: Tue, 1 Aug 2017 20:11:08 -0500
Subject: [PATCH 112/281] xhci: Reset Renesas uPD72020x USB controller for
 32-bit DMA issue

The Renesas uPD72020x XHCI controller seems to suffer from a really
annoying bug, where it may retain some of its DMA programming across a XHCI
reset, and despite the driver correctly programming new DMA addresses.
This is visible if the device has been using 64-bit DMA addresses, and is
then switched to using 32-bit DMA addresses.  The top 32 bits of the
address (now zero) are ignored are replaced by the 32 bits from the
*previous* programming.  Sticking with 64-bit DMA always works, but doesn't
seem very appropriate.

A PCI reset of the device restores the normal functionality, which is done
at probe time.  Unfortunately, this has to be done before any quirk has
been discovered, hence the intrusive nature of the fix.

Tested-by: Ard Biesheuvel <ard.biesheuvel@linaro.org>
Signed-off-by: Marc Zyngier <marc.zyngier@arm.com>
Signed-off-by: Bjorn Helgaas <bhelgaas@google.com>
Acked-by: Mathias Nyman <mathias.nyman@linux.intel.com>
CC: stable@vger.kernel.org	# v4.11+
---
 drivers/usb/host/pci-quirks.c | 20 ++++++++++++++++++++
 drivers/usb/host/pci-quirks.h |  1 +
 drivers/usb/host/xhci-pci.c   |  7 +++++++
 3 files changed, 28 insertions(+)

diff --git a/drivers/usb/host/pci-quirks.c b/drivers/usb/host/pci-quirks.c
index c8989c62a262..858fefd67ebe 100644
--- a/drivers/usb/host/pci-quirks.c
+++ b/drivers/usb/host/pci-quirks.c
@@ -1150,3 +1150,23 @@ static void quirk_usb_early_handoff(struct pci_dev *pdev)
 }
 DECLARE_PCI_FIXUP_CLASS_FINAL(PCI_ANY_ID, PCI_ANY_ID,
 			PCI_CLASS_SERIAL_USB, 8, quirk_usb_early_handoff);
+
+bool usb_xhci_needs_pci_reset(struct pci_dev *pdev)
+{
+	/*
+	 * Our dear uPD72020{1,2} friend only partially resets when
+	 * asked to via the XHCI interface, and may end up doing DMA
+	 * at the wrong addresses, as it keeps the top 32bit of some
+	 * addresses from its previous programming under obscure
+	 * circumstances.
+	 * Give it a good wack at probe time. Unfortunately, this
+	 * needs to happen before we've had a chance to discover any
+	 * quirk, or the system will be in a rather bad state.
+	 */
+	if (pdev->vendor == PCI_VENDOR_ID_RENESAS &&
+	    (pdev->device == 0x0014 || pdev->device == 0x0015))
+		return true;
+
+	return false;
+}
+EXPORT_SYMBOL_GPL(usb_xhci_needs_pci_reset);
diff --git a/drivers/usb/host/pci-quirks.h b/drivers/usb/host/pci-quirks.h
index 655994480198..5582cbafecd4 100644
--- a/drivers/usb/host/pci-quirks.h
+++ b/drivers/usb/host/pci-quirks.h
@@ -15,6 +15,7 @@ void usb_asmedia_modifyflowcontrol(struct pci_dev *pdev);
 void usb_enable_intel_xhci_ports(struct pci_dev *xhci_pdev);
 void usb_disable_xhci_ports(struct pci_dev *xhci_pdev);
 void sb800_prefetch(struct device *dev, int on);
+bool usb_xhci_needs_pci_reset(struct pci_dev *pdev);
 #else
 struct pci_dev;
 static inline void usb_amd_quirk_pll_disable(void) {}
diff --git a/drivers/usb/host/xhci-pci.c b/drivers/usb/host/xhci-pci.c
index 5b0fa553c8bc..8071c8fdd15e 100644
--- a/drivers/usb/host/xhci-pci.c
+++ b/drivers/usb/host/xhci-pci.c
@@ -284,6 +284,13 @@ static int xhci_pci_probe(struct pci_dev *dev, const struct pci_device_id *id)
 
 	driver = (struct hc_driver *)id->driver_data;
 
+	/* For some HW implementation, a XHCI reset is just not enough... */
+	if (usb_xhci_needs_pci_reset(dev)) {
+		dev_info(&dev->dev, "Resetting\n");
+		if (pci_reset_function_locked(dev))
+			dev_warn(&dev->dev, "Reset failed");
+	}
+
 	/* Prevent runtime suspending between USB-2 and USB-3 initialization */
 	pm_runtime_get_noresume(&dev->dev);
 

From fb52c3b597f0b44e893ded8c253845561f9f57da Mon Sep 17 00:00:00 2001
From: Nisar Sayed <Nisar.Sayed@microchip.com>
Date: Tue, 1 Aug 2017 10:24:17 +0000
Subject: [PATCH 113/281] lan78xx: USB fast connect/disconnect crash fix

USB fast connect/disconnect crash fix

When USB plugged/unplugged at fast rate,
lan78xx_mdio_init() in lan78xx_bind() failing case is not handled.
Whenever  lan78xx_mdio_init() failed, dev->mdiobus will be freed, however
since lan78xx_bind() not consider as error and try to proceed for
further initialization in lan78xx_probe() which leads system hung/crash.
Also when register_netdev() failed, netdev is freed without calling lan78xx_unbind().
Hence halting the failed cases right manner fixes the system crash/hung issue.

Signed-off-by: Nisar Sayed <Nisar.Sayed@microchip.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/usb/lan78xx.c | 12 ++++++------
 1 file changed, 6 insertions(+), 6 deletions(-)

diff --git a/drivers/net/usb/lan78xx.c b/drivers/net/usb/lan78xx.c
index 5833f7e2a127..8ef3639649a0 100644
--- a/drivers/net/usb/lan78xx.c
+++ b/drivers/net/usb/lan78xx.c
@@ -2858,13 +2858,13 @@ static int lan78xx_bind(struct lan78xx_net *dev, struct usb_interface *intf)
 	/* Init all registers */
 	ret = lan78xx_reset(dev);
 
-	lan78xx_mdio_init(dev);
+	ret = lan78xx_mdio_init(dev);
 
 	dev->net->flags |= IFF_MULTICAST;
 
 	pdata->wol = WAKE_MAGIC;
 
-	return 0;
+	return ret;
 }
 
 static void lan78xx_unbind(struct lan78xx_net *dev, struct usb_interface *intf)
@@ -3525,11 +3525,11 @@ static int lan78xx_probe(struct usb_interface *intf,
 	udev = interface_to_usbdev(intf);
 	udev = usb_get_dev(udev);
 
-	ret = -ENOMEM;
 	netdev = alloc_etherdev(sizeof(struct lan78xx_net));
 	if (!netdev) {
-			dev_err(&intf->dev, "Error: OOM\n");
-			goto out1;
+		dev_err(&intf->dev, "Error: OOM\n");
+		ret = -ENOMEM;
+		goto out1;
 	}
 
 	/* netdev_printk() needs this */
@@ -3610,7 +3610,7 @@ static int lan78xx_probe(struct usb_interface *intf,
 	ret = register_netdev(netdev);
 	if (ret != 0) {
 		netif_err(dev, probe, netdev, "couldn't register the device\n");
-		goto out2;
+		goto out3;
 	}
 
 	usb_set_intfdata(intf, dev);

From 0573f94b1abfb42952275ca67f665dbd720b00d7 Mon Sep 17 00:00:00 2001
From: Nisar Sayed <Nisar.Sayed@microchip.com>
Date: Tue, 1 Aug 2017 10:24:33 +0000
Subject: [PATCH 114/281] lan78xx: Fix to handle hard_header_len update

Fix to handle hard_header_len update

When ifconfig up/down sequence is initiated hard_header_len
get updated incrementally for each ifconfig up /down sequence,
this leads invalid hard_header_len, moving to lan78xx_bind
to have one time update of hard_header_len addresses the issue.

Signed-off-by: Nisar Sayed <Nisar.Sayed@microchip.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/usb/lan78xx.c | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/drivers/net/usb/lan78xx.c b/drivers/net/usb/lan78xx.c
index 8ef3639649a0..b99a7fb09f8e 100644
--- a/drivers/net/usb/lan78xx.c
+++ b/drivers/net/usb/lan78xx.c
@@ -2367,9 +2367,6 @@ static int lan78xx_reset(struct lan78xx_net *dev)
 	/* Init LTM */
 	lan78xx_init_ltm(dev);
 
-	dev->net->hard_header_len += TX_OVERHEAD;
-	dev->hard_mtu = dev->net->mtu + dev->net->hard_header_len;
-
 	if (dev->udev->speed == USB_SPEED_SUPER) {
 		buf = DEFAULT_BURST_CAP_SIZE / SS_USB_PKT_SIZE;
 		dev->rx_urb_size = DEFAULT_BURST_CAP_SIZE;
@@ -2855,6 +2852,9 @@ static int lan78xx_bind(struct lan78xx_net *dev, struct usb_interface *intf)
 		return ret;
 	}
 
+	dev->net->hard_header_len += TX_OVERHEAD;
+	dev->hard_mtu = dev->net->mtu + dev->net->hard_header_len;
+
 	/* Init all registers */
 	ret = lan78xx_reset(dev);
 

From c994f778bb1cca8ebe7a4e528cefec233e93b5cc Mon Sep 17 00:00:00 2001
From: Inbar Karmy <inbark@mellanox.com>
Date: Tue, 1 Aug 2017 16:43:43 +0300
Subject: [PATCH 115/281] net/mlx4_en: Fix wrong indication of Wake-on-LAN
 (WoL) support

Currently when WoL is supported but disabled, ethtool reports:
"Supports Wake-on: d".
Fix the indication of Wol support, so that the indication
remains "g" all the time if the NIC supports WoL.

Tested:
As accepted, when NIC supports WoL- ethtool reports:
	Supports Wake-on: g
	Wake-on: d
when NIC doesn't support WoL- ethtool reports:
        Supports Wake-on: d
        Wake-on: d

Fixes: 14c07b1358ed ("mlx4: Wake on LAN support")
Signed-off-by: Inbar Karmy <inbark@mellanox.com>
Signed-off-by: Tariq Toukan <tariqt@mellanox.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/mellanox/mlx4/en_ethtool.c | 15 ++++++++-------
 drivers/net/ethernet/mellanox/mlx4/fw.c         |  4 ++++
 drivers/net/ethernet/mellanox/mlx4/fw.h         |  1 +
 drivers/net/ethernet/mellanox/mlx4/main.c       |  2 ++
 include/linux/mlx4/device.h                     |  1 +
 5 files changed, 16 insertions(+), 7 deletions(-)

diff --git a/drivers/net/ethernet/mellanox/mlx4/en_ethtool.c b/drivers/net/ethernet/mellanox/mlx4/en_ethtool.c
index c751a1d434ad..3d4e4a5d00d1 100644
--- a/drivers/net/ethernet/mellanox/mlx4/en_ethtool.c
+++ b/drivers/net/ethernet/mellanox/mlx4/en_ethtool.c
@@ -223,6 +223,7 @@ static void mlx4_en_get_wol(struct net_device *netdev,
 			    struct ethtool_wolinfo *wol)
 {
 	struct mlx4_en_priv *priv = netdev_priv(netdev);
+	struct mlx4_caps *caps = &priv->mdev->dev->caps;
 	int err = 0;
 	u64 config = 0;
 	u64 mask;
@@ -235,24 +236,24 @@ static void mlx4_en_get_wol(struct net_device *netdev,
 	mask = (priv->port == 1) ? MLX4_DEV_CAP_FLAG_WOL_PORT1 :
 		MLX4_DEV_CAP_FLAG_WOL_PORT2;
 
-	if (!(priv->mdev->dev->caps.flags & mask)) {
+	if (!(caps->flags & mask)) {
 		wol->supported = 0;
 		wol->wolopts = 0;
 		return;
 	}
 
+	if (caps->wol_port[priv->port])
+		wol->supported = WAKE_MAGIC;
+	else
+		wol->supported = 0;
+
 	err = mlx4_wol_read(priv->mdev->dev, &config, priv->port);
 	if (err) {
 		en_err(priv, "Failed to get WoL information\n");
 		return;
 	}
 
-	if (config & MLX4_EN_WOL_MAGIC)
-		wol->supported = WAKE_MAGIC;
-	else
-		wol->supported = 0;
-
-	if (config & MLX4_EN_WOL_ENABLED)
+	if ((config & MLX4_EN_WOL_ENABLED) && (config & MLX4_EN_WOL_MAGIC))
 		wol->wolopts = WAKE_MAGIC;
 	else
 		wol->wolopts = 0;
diff --git a/drivers/net/ethernet/mellanox/mlx4/fw.c b/drivers/net/ethernet/mellanox/mlx4/fw.c
index 37e84a59e751..c165f16623a9 100644
--- a/drivers/net/ethernet/mellanox/mlx4/fw.c
+++ b/drivers/net/ethernet/mellanox/mlx4/fw.c
@@ -764,6 +764,7 @@ int mlx4_QUERY_DEV_CAP(struct mlx4_dev *dev, struct mlx4_dev_cap *dev_cap)
 #define QUERY_DEV_CAP_CQ_TS_SUPPORT_OFFSET	0x3e
 #define QUERY_DEV_CAP_MAX_PKEY_OFFSET		0x3f
 #define QUERY_DEV_CAP_EXT_FLAGS_OFFSET		0x40
+#define QUERY_DEV_CAP_WOL_OFFSET		0x43
 #define QUERY_DEV_CAP_FLAGS_OFFSET		0x44
 #define QUERY_DEV_CAP_RSVD_UAR_OFFSET		0x48
 #define QUERY_DEV_CAP_UAR_SZ_OFFSET		0x49
@@ -920,6 +921,9 @@ int mlx4_QUERY_DEV_CAP(struct mlx4_dev *dev, struct mlx4_dev_cap *dev_cap)
 	MLX4_GET(ext_flags, outbox, QUERY_DEV_CAP_EXT_FLAGS_OFFSET);
 	MLX4_GET(flags, outbox, QUERY_DEV_CAP_FLAGS_OFFSET);
 	dev_cap->flags = flags | (u64)ext_flags << 32;
+	MLX4_GET(field, outbox, QUERY_DEV_CAP_WOL_OFFSET);
+	dev_cap->wol_port[1] = !!(field & 0x20);
+	dev_cap->wol_port[2] = !!(field & 0x40);
 	MLX4_GET(field, outbox, QUERY_DEV_CAP_RSVD_UAR_OFFSET);
 	dev_cap->reserved_uars = field >> 4;
 	MLX4_GET(field, outbox, QUERY_DEV_CAP_UAR_SZ_OFFSET);
diff --git a/drivers/net/ethernet/mellanox/mlx4/fw.h b/drivers/net/ethernet/mellanox/mlx4/fw.h
index 5343a0599253..b52ba01aa486 100644
--- a/drivers/net/ethernet/mellanox/mlx4/fw.h
+++ b/drivers/net/ethernet/mellanox/mlx4/fw.h
@@ -129,6 +129,7 @@ struct mlx4_dev_cap {
 	u32 dmfs_high_rate_qpn_range;
 	struct mlx4_rate_limit_caps rl_caps;
 	struct mlx4_port_cap port_cap[MLX4_MAX_PORTS + 1];
+	bool wol_port[MLX4_MAX_PORTS + 1];
 };
 
 struct mlx4_func_cap {
diff --git a/drivers/net/ethernet/mellanox/mlx4/main.c b/drivers/net/ethernet/mellanox/mlx4/main.c
index a27c9c13a36e..09b9bc17bce9 100644
--- a/drivers/net/ethernet/mellanox/mlx4/main.c
+++ b/drivers/net/ethernet/mellanox/mlx4/main.c
@@ -424,6 +424,8 @@ static int mlx4_dev_cap(struct mlx4_dev *dev, struct mlx4_dev_cap *dev_cap)
 	dev->caps.stat_rate_support  = dev_cap->stat_rate_support;
 	dev->caps.max_gso_sz	     = dev_cap->max_gso_sz;
 	dev->caps.max_rss_tbl_sz     = dev_cap->max_rss_tbl_sz;
+	dev->caps.wol_port[1]          = dev_cap->wol_port[1];
+	dev->caps.wol_port[2]          = dev_cap->wol_port[2];
 
 	/* Save uar page shift */
 	if (!mlx4_is_slave(dev)) {
diff --git a/include/linux/mlx4/device.h b/include/linux/mlx4/device.h
index aad5d81dfb44..b54517c05e9a 100644
--- a/include/linux/mlx4/device.h
+++ b/include/linux/mlx4/device.h
@@ -620,6 +620,7 @@ struct mlx4_caps {
 	u32			dmfs_high_rate_qpn_base;
 	u32			dmfs_high_rate_qpn_range;
 	u32			vf_caps;
+	bool			wol_port[MLX4_MAX_PORTS + 1];
 	struct mlx4_rate_limit_caps rl_caps;
 };
 

From 5886259c127026ee4a6a7093d6c320440d833fda Mon Sep 17 00:00:00 2001
From: Jack Morgenstein <jackm@dev.mellanox.co.il>
Date: Tue, 1 Aug 2017 16:43:44 +0300
Subject: [PATCH 116/281] net/mlx4_core: Fix sl_to_vl_change bit offset in
 flags2 dump

The index value in function dump_dev_cap_flags2() for outputting
"sl to vl mapping table change event support" needs to be
consistent with the value of the enumerated constant
MLX4_DEV_CAP_FLAG2_SL_TO_VL_CHANGE_EVENT defined in file
include/linux/mlx4_device.h

The change here restores that consistency.

Fixes: fd10ed8e6f42 ("IB/mlx4: Fix possible vl/sl field mismatch in LRH header in QP1 packets")
Reported-by: Mukesh Kacker <mukesh.kacker@oracle.com>
Signed-off-by: Jack Morgenstein <jackm@dev.mellanox.co.il>
Signed-off-by: Tariq Toukan <tariqt@mellanox.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/mellanox/mlx4/fw.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/net/ethernet/mellanox/mlx4/fw.c b/drivers/net/ethernet/mellanox/mlx4/fw.c
index c165f16623a9..009dd03466d6 100644
--- a/drivers/net/ethernet/mellanox/mlx4/fw.c
+++ b/drivers/net/ethernet/mellanox/mlx4/fw.c
@@ -160,7 +160,7 @@ static void dump_dev_cap_flags2(struct mlx4_dev *dev, u64 flags)
 		[33] = "RoCEv2 support",
 		[34] = "DMFS Sniffer support (UC & MC)",
 		[35] = "QinQ VST mode support",
-		[36] = "sl to vl mapping table change event support"
+		[37] = "sl to vl mapping table change event support",
 	};
 	int i;
 

From f9fb9d0b8519a795eeaca5108205593b66068cce Mon Sep 17 00:00:00 2001
From: Jack Morgenstein <jackm@dev.mellanox.co.il>
Date: Tue, 1 Aug 2017 16:43:45 +0300
Subject: [PATCH 117/281] net/mlx4_core: Fix namespace misalignment in QinQ VST
 support commit

The cited commit introduced the following new enum value in file
include/linux/mlx4/device.h:

    MLX4_DEV_CAP_FLAG2_SVLAN_BY_QP

However the value of MLX4_DEV_CAP_FLAG2_SVLAN_BY_QP needs to stay
consistent with the value used in another namespace in
function dump_dev_cap_flags2(), which is manually kept in sync.
The change here restores that consistency.

Fixes: 7c3d21c8153c ("net/mlx4_core: Preparation for VF vlan protocol 802.1ad")
Reported-by: Mukesh Kacker <mukesh.kacker@oracle.com>
Signed-off-by: Jack Morgenstein <jackm@dev.mellanox.co.il>
Signed-off-by: Tariq Toukan <tariqt@mellanox.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/mellanox/mlx4/fw.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/net/ethernet/mellanox/mlx4/fw.c b/drivers/net/ethernet/mellanox/mlx4/fw.c
index 009dd03466d6..7c9502bae1cc 100644
--- a/drivers/net/ethernet/mellanox/mlx4/fw.c
+++ b/drivers/net/ethernet/mellanox/mlx4/fw.c
@@ -159,7 +159,7 @@ static void dump_dev_cap_flags2(struct mlx4_dev *dev, u64 flags)
 		[32] = "Loopback source checks support",
 		[33] = "RoCEv2 support",
 		[34] = "DMFS Sniffer support (UC & MC)",
-		[35] = "QinQ VST mode support",
+		[36] = "QinQ VST mode support",
 		[37] = "sl to vl mapping table change event support",
 	};
 	int i;

From bff0c6840c135c14675404ba697aae2638c37cb6 Mon Sep 17 00:00:00 2001
From: Jack Morgenstein <jackm@dev.mellanox.co.il>
Date: Tue, 1 Aug 2017 16:43:46 +0300
Subject: [PATCH 118/281] net/mlx4_core: Fixes missing capability bit in flags2
 capability dump

The cited commit introduced the following new enum value in file
include/linux/mlx4/device.h:

    QUERY_DEV_CAP_DIAG_RPRT_PER_PORT

However, it failed to introduce a corresponding entry in function
dump_dev_cap_flags2() for outputting a line in the message log
when this capability bit is set.

The change here fixes that omission.

Fixes: c7c122ed67e4 ("net/mlx4: Add diagnostic counters capability bit")
Reported-by: Mukesh Kacker <mukesh.kacker@oracle.com>
Signed-off-by: Jack Morgenstein <jackm@dev.mellanox.co.il>
Signed-off-by: Tariq Toukan <tariqt@mellanox.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/mellanox/mlx4/fw.c | 1 +
 1 file changed, 1 insertion(+)

diff --git a/drivers/net/ethernet/mellanox/mlx4/fw.c b/drivers/net/ethernet/mellanox/mlx4/fw.c
index 7c9502bae1cc..041c0ed65929 100644
--- a/drivers/net/ethernet/mellanox/mlx4/fw.c
+++ b/drivers/net/ethernet/mellanox/mlx4/fw.c
@@ -159,6 +159,7 @@ static void dump_dev_cap_flags2(struct mlx4_dev *dev, u64 flags)
 		[32] = "Loopback source checks support",
 		[33] = "RoCEv2 support",
 		[34] = "DMFS Sniffer support (UC & MC)",
+		[35] = "Diag counters per port",
 		[36] = "QinQ VST mode support",
 		[37] = "sl to vl mapping table change event support",
 	};

From bed9ff165960921303a100228585f2d1691b42eb Mon Sep 17 00:00:00 2001
From: Hector Martin <marcan@marcan.st>
Date: Wed, 2 Aug 2017 00:45:44 +0900
Subject: [PATCH 119/281] usb: qmi_wwan: add D-Link DWM-222 device ID

Signed-off-by: Hector Martin <marcan@marcan.st>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/usb/qmi_wwan.c | 1 +
 1 file changed, 1 insertion(+)

diff --git a/drivers/net/usb/qmi_wwan.c b/drivers/net/usb/qmi_wwan.c
index 5894e3c9468f..ff6f39fe6c00 100644
--- a/drivers/net/usb/qmi_wwan.c
+++ b/drivers/net/usb/qmi_wwan.c
@@ -1175,6 +1175,7 @@ static const struct usb_device_id products[] = {
 	{QMI_FIXED_INTF(0x19d2, 0x1428, 2)},	/* Telewell TW-LTE 4G v2 */
 	{QMI_FIXED_INTF(0x19d2, 0x2002, 4)},	/* ZTE (Vodafone) K3765-Z */
 	{QMI_FIXED_INTF(0x2001, 0x7e19, 4)},	/* D-Link DWM-221 B1 */
+	{QMI_FIXED_INTF(0x2001, 0x7e35, 4)},	/* D-Link DWM-222 */
 	{QMI_FIXED_INTF(0x0f3d, 0x68a2, 8)},    /* Sierra Wireless MC7700 */
 	{QMI_FIXED_INTF(0x114f, 0x68a2, 8)},    /* Sierra Wireless MC7750 */
 	{QMI_FIXED_INTF(0x1199, 0x68a2, 8)},	/* Sierra Wireless MC7710 in QMI mode */

From 4d96f12a072c669d48dc3a2c6b539a9faeca138d Mon Sep 17 00:00:00 2001
From: Thomas Falcon <tlfalcon@linux.vnet.ibm.com>
Date: Tue, 1 Aug 2017 15:04:36 -0500
Subject: [PATCH 120/281] ibmvnic: Initialize SCRQ's during login renegotiation

SCRQ resources are freed during renegotiation, but they are not
re-allocated afterwards due to some changes in the initialization
process. Fix that by re-allocating the memory after renegotation.

SCRQ's can also be freed if a server capabilities request fails.
If this were encountered during a device reset for example,
SCRQ's may not be re-allocated. This operation is not necessary
anymore so remove it.

Signed-off-by: Thomas Falcon <tlfalcon@linux.vnet.ibm.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/ibm/ibmvnic.c | 15 ++++++++++++++-
 1 file changed, 14 insertions(+), 1 deletion(-)

diff --git a/drivers/net/ethernet/ibm/ibmvnic.c b/drivers/net/ethernet/ibm/ibmvnic.c
index a3e694679635..c45e8e3b82d3 100644
--- a/drivers/net/ethernet/ibm/ibmvnic.c
+++ b/drivers/net/ethernet/ibm/ibmvnic.c
@@ -111,6 +111,7 @@ static void send_request_map(struct ibmvnic_adapter *, dma_addr_t, __be32, u8);
 static void send_request_unmap(struct ibmvnic_adapter *, u8);
 static void send_login(struct ibmvnic_adapter *adapter);
 static void send_cap_queries(struct ibmvnic_adapter *adapter);
+static int init_sub_crqs(struct ibmvnic_adapter *);
 static int init_sub_crq_irqs(struct ibmvnic_adapter *adapter);
 static int ibmvnic_init(struct ibmvnic_adapter *);
 static void release_crq_queue(struct ibmvnic_adapter *);
@@ -651,6 +652,7 @@ static int ibmvnic_login(struct net_device *netdev)
 	struct ibmvnic_adapter *adapter = netdev_priv(netdev);
 	unsigned long timeout = msecs_to_jiffies(30000);
 	struct device *dev = &adapter->vdev->dev;
+	int rc;
 
 	do {
 		if (adapter->renegotiate) {
@@ -664,6 +666,18 @@ static int ibmvnic_login(struct net_device *netdev)
 				dev_err(dev, "Capabilities query timeout\n");
 				return -1;
 			}
+			rc = init_sub_crqs(adapter);
+			if (rc) {
+				dev_err(dev,
+					"Initialization of SCRQ's failed\n");
+				return -1;
+			}
+			rc = init_sub_crq_irqs(adapter);
+			if (rc) {
+				dev_err(dev,
+					"Initialization of SCRQ's irqs failed\n");
+				return -1;
+			}
 		}
 
 		reinit_completion(&adapter->init_done);
@@ -3004,7 +3018,6 @@ static void handle_request_cap_rsp(union ibmvnic_crq *crq,
 			 *req_value,
 			 (long int)be64_to_cpu(crq->request_capability_rsp.
 					       number), name);
-		release_sub_crqs(adapter);
 		*req_value = be64_to_cpu(crq->request_capability_rsp.number);
 		ibmvnic_send_req_caps(adapter, 1);
 		return;

From ed254971edea92c3ac5c67c6a05247a92aa6075e Mon Sep 17 00:00:00 2001
From: Yuchung Cheng <ycheng@google.com>
Date: Tue, 1 Aug 2017 13:22:32 -0700
Subject: [PATCH 121/281] tcp: avoid setting cwnd to invalid ssthresh after
 cwnd reduction states

If the sender switches the congestion control during ECN-triggered
cwnd-reduction state (CA_CWR), upon exiting recovery cwnd is set to
the ssthresh value calculated by the previous congestion control. If
the previous congestion control is BBR that always keep ssthresh
to TCP_INIFINITE_SSTHRESH, cwnd ends up being infinite. The safe
step is to avoid assigning invalid ssthresh value when recovery ends.

Signed-off-by: Yuchung Cheng <ycheng@google.com>
Signed-off-by: Neal Cardwell <ncardwell@google.com>
Acked-by: Eric Dumazet <edumazet@google.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/ipv4/tcp_input.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c
index 2920e0cb09f8..dad026fcfd09 100644
--- a/net/ipv4/tcp_input.c
+++ b/net/ipv4/tcp_input.c
@@ -2520,8 +2520,8 @@ static inline void tcp_end_cwnd_reduction(struct sock *sk)
 		return;
 
 	/* Reset cwnd to ssthresh in CWR or Recovery (unless it's undone) */
-	if (inet_csk(sk)->icsk_ca_state == TCP_CA_CWR ||
-	    (tp->undo_marker && tp->snd_ssthresh < TCP_INFINITE_SSTHRESH)) {
+	if (tp->snd_ssthresh < TCP_INFINITE_SSTHRESH &&
+	    (inet_csk(sk)->icsk_ca_state == TCP_CA_CWR || tp->undo_marker)) {
 		tp->snd_cwnd = tp->snd_ssthresh;
 		tp->snd_cwnd_stamp = tcp_jiffies32;
 	}

From 42ef3bedf30020a3b5298c2b918ccd7b6e1794d3 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Antoine=20T=C3=A9nart?= <antoine.tenart@free-electrons.com>
Date: Wed, 19 Jul 2017 11:02:30 +0200
Subject: [PATCH 122/281] crypto: inside-secure - fix invalidation check in
 hmac_sha1_setkey

The safexcel_hmac_sha1_setkey function checks if an invalidation command
should be issued, i.e. when the context ipad/opad change. This checks is
done after filling the ipad/opad which and it can't be true. The patch
fixes this by moving the check before the ipad/opad memcpy operations.

Fixes: 1b44c5a60c13 ("crypto: inside-secure - add SafeXcel EIP197 crypto engine driver")
Signed-off-by: Antoine Tenart <antoine.tenart@free-electrons.com>
Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au>
---
 drivers/crypto/inside-secure/safexcel_hash.c | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/drivers/crypto/inside-secure/safexcel_hash.c b/drivers/crypto/inside-secure/safexcel_hash.c
index 8527a5899a2f..a11b2edb41b9 100644
--- a/drivers/crypto/inside-secure/safexcel_hash.c
+++ b/drivers/crypto/inside-secure/safexcel_hash.c
@@ -883,9 +883,6 @@ static int safexcel_hmac_sha1_setkey(struct crypto_ahash *tfm, const u8 *key,
 	if (ret)
 		return ret;
 
-	memcpy(ctx->ipad, &istate.state, SHA1_DIGEST_SIZE);
-	memcpy(ctx->opad, &ostate.state, SHA1_DIGEST_SIZE);
-
 	for (i = 0; i < ARRAY_SIZE(istate.state); i++) {
 		if (ctx->ipad[i] != le32_to_cpu(istate.state[i]) ||
 		    ctx->opad[i] != le32_to_cpu(ostate.state[i])) {
@@ -894,6 +891,9 @@ static int safexcel_hmac_sha1_setkey(struct crypto_ahash *tfm, const u8 *key,
 		}
 	}
 
+	memcpy(ctx->ipad, &istate.state, SHA1_DIGEST_SIZE);
+	memcpy(ctx->opad, &ostate.state, SHA1_DIGEST_SIZE);
+
 	return 0;
 }
 

From 60c4081ec4195389f4fa1fce83eaad5e4b948748 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Antoine=20T=C3=A9nart?= <antoine.tenart@free-electrons.com>
Date: Wed, 19 Jul 2017 11:02:31 +0200
Subject: [PATCH 123/281] crypto: inside-secure - fix the sha state length in
 hmac_sha1_setkey

A check is performed on the ipad/opad in the safexcel_hmac_sha1_setkey
function, but the index used by the loop doing it is wrong. It is
currently the size of the state array while it should be the size of a
sha1 state. This patch fixes it.

Fixes: 1b44c5a60c13 ("crypto: inside-secure - add SafeXcel EIP197 crypto engine driver")
Reported-by: Dan Carpenter <dan.carpenter@oracle.com>
Signed-off-by: Antoine Tenart <antoine.tenart@free-electrons.com>
Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au>
---
 drivers/crypto/inside-secure/safexcel_hash.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/crypto/inside-secure/safexcel_hash.c b/drivers/crypto/inside-secure/safexcel_hash.c
index a11b2edb41b9..3f819399cd95 100644
--- a/drivers/crypto/inside-secure/safexcel_hash.c
+++ b/drivers/crypto/inside-secure/safexcel_hash.c
@@ -883,7 +883,7 @@ static int safexcel_hmac_sha1_setkey(struct crypto_ahash *tfm, const u8 *key,
 	if (ret)
 		return ret;
 
-	for (i = 0; i < ARRAY_SIZE(istate.state); i++) {
+	for (i = 0; i < SHA1_DIGEST_SIZE / sizeof(u32); i++) {
 		if (ctx->ipad[i] != le32_to_cpu(istate.state[i]) ||
 		    ctx->opad[i] != le32_to_cpu(ostate.state[i])) {
 			ctx->base.needs_inv = true;

From 2d80bd3f7eb69204cd5dec4fa7fe7e12cbfaed13 Mon Sep 17 00:00:00 2001
From: Andy Shevchenko <andriy.shevchenko@linux.intel.com>
Date: Tue, 4 Jul 2017 15:58:39 +0300
Subject: [PATCH 124/281] pinctrl: cherryview: Add Setzer models to the
 Chromebook DMI quirk

Add one more model to the Chromebook DMI quirk to make it working again.

Link: https://bugzilla.kernel.org/show_bug.cgi?id=194945
Fixes: 2a8209fa6823 ("pinctrl: cherryview: Extend the Chromebook DMI quirk to Intel_Strago systems")
Reported-by: mail@abhishek.geek.nz
Signed-off-by: Andy Shevchenko <andriy.shevchenko@linux.intel.com>
Acked-by: Mika Westerberg <mika.westerberg@linux.intel.com>
Signed-off-by: Linus Walleij <linus.walleij@linaro.org>
---
 drivers/pinctrl/intel/pinctrl-cherryview.c | 7 +++++++
 1 file changed, 7 insertions(+)

diff --git a/drivers/pinctrl/intel/pinctrl-cherryview.c b/drivers/pinctrl/intel/pinctrl-cherryview.c
index 20f1b4493994..04e929fd0ffe 100644
--- a/drivers/pinctrl/intel/pinctrl-cherryview.c
+++ b/drivers/pinctrl/intel/pinctrl-cherryview.c
@@ -1547,6 +1547,13 @@ static const struct dmi_system_id chv_no_valid_mask[] = {
 			DMI_MATCH(DMI_PRODUCT_FAMILY, "Intel_Strago"),
 		},
 	},
+	{
+		.ident = "HP Chromebook 11 G5 (Setzer)",
+		.matches = {
+			DMI_MATCH(DMI_SYS_VENDOR, "HP"),
+			DMI_MATCH(DMI_PRODUCT_NAME, "Setzer"),
+		},
+	},
 	{
 		.ident = "Acer Chromebook R11 (Cyan)",
 		.matches = {

From 40d829fb2ec636b6b4b0cc95e2546ab9aca04cc9 Mon Sep 17 00:00:00 2001
From: Manu Gautam <mgautam@codeaurora.org>
Date: Wed, 19 Jul 2017 17:07:10 +0530
Subject: [PATCH 125/281] usb: dwc3: gadget: Correct ISOC DATA PIDs for short
 packets

The PIDs for Isochronous data transfers are incorrect
for high bandwidth IN endpoints when the request length
is less than EP wMaxPacketSize.

As per spec correct PIDs for ISOC data transfers are:

1) For request length <= maxpacket
	- DATA0,

2) For maxpacket < length <= (2 * maxpacket)
	- DATA1, DATA0

3) For (2 * maxpacket) <  length <= (3 * maxpacket)
	- DATA2, DATA1, DATA0.

But driver always sets PCM fields based on wMaxPacketSize
due to which DATA2 happens even for small requests.

Fix this by setting the PCM field of trb->size depending
on request length rather than fixing it to the value
depending on wMaxPacketSize.

Ideally it shouldn't give any issues as dwc3 will send
0-length packet for next IN token if host sends (even
after receiving a short packet). Windows seems to ignore
this but with MacOS frame loss observed when using f_uvc.

Signed-off-by: Manu Gautam <mgautam@codeaurora.org>
Signed-off-by: Felipe Balbi <felipe.balbi@linux.intel.com>
---
 drivers/usb/dwc3/gadget.c | 33 ++++++++++++++++++++++++++++++++-
 1 file changed, 32 insertions(+), 1 deletion(-)

diff --git a/drivers/usb/dwc3/gadget.c b/drivers/usb/dwc3/gadget.c
index 6b299c7b7656..f064f1549333 100644
--- a/drivers/usb/dwc3/gadget.c
+++ b/drivers/usb/dwc3/gadget.c
@@ -896,9 +896,40 @@ static void __dwc3_prepare_one_trb(struct dwc3_ep *dep, struct dwc3_trb *trb,
 		if (!node) {
 			trb->ctrl = DWC3_TRBCTL_ISOCHRONOUS_FIRST;
 
+			/*
+			 * USB Specification 2.0 Section 5.9.2 states that: "If
+			 * there is only a single transaction in the microframe,
+			 * only a DATA0 data packet PID is used.  If there are
+			 * two transactions per microframe, DATA1 is used for
+			 * the first transaction data packet and DATA0 is used
+			 * for the second transaction data packet.  If there are
+			 * three transactions per microframe, DATA2 is used for
+			 * the first transaction data packet, DATA1 is used for
+			 * the second, and DATA0 is used for the third."
+			 *
+			 * IOW, we should satisfy the following cases:
+			 *
+			 * 1) length <= maxpacket
+			 *	- DATA0
+			 *
+			 * 2) maxpacket < length <= (2 * maxpacket)
+			 *	- DATA1, DATA0
+			 *
+			 * 3) (2 * maxpacket) < length <= (3 * maxpacket)
+			 *	- DATA2, DATA1, DATA0
+			 */
 			if (speed == USB_SPEED_HIGH) {
 				struct usb_ep *ep = &dep->endpoint;
-				trb->size |= DWC3_TRB_SIZE_PCM1(ep->mult - 1);
+				unsigned int mult = ep->mult - 1;
+				unsigned int maxp = usb_endpoint_maxp(ep->desc);
+
+				if (length <= (2 * maxp))
+					mult--;
+
+				if (length <= maxp)
+					mult--;
+
+				trb->size |= DWC3_TRB_SIZE_PCM1(mult);
 			}
 		} else {
 			trb->ctrl = DWC3_TRBCTL_ISOCHRONOUS;

From aca5b9ebd096039657417c321a9252c696b359c2 Mon Sep 17 00:00:00 2001
From: Yoshihiro Shimoda <yoshihiro.shimoda.uh@renesas.com>
Date: Wed, 2 Aug 2017 21:06:35 +0900
Subject: [PATCH 126/281] usb: gadget: udc: renesas_usb3: Fix
 usb_gadget_giveback_request() calling

According to the gadget.h, a "complete" function will always be called
with interrupts disabled. However, sometimes usb3_request_done() function
is called with interrupts enabled. So, this function should be held
by spin_lock_irqsave() to disable interruption. Also, this driver has
to call spin_unlock() to avoid spinlock recursion by this driver before
calling usb_gadget_giveback_request().

Reported-by: Kazuya Mizuguchi <kazuya.mizuguchi.ks@renesas.com>
Tested-by: Kazuya Mizuguchi <kazuya.mizuguchi.ks@renesas.com>
Fixes: 746bfe63bba3 ("usb: gadget: renesas_usb3: add support for Renesas USB3.0 peripheral controller")
Cc: <stable@vger.kernel.org> # v4.5+
Signed-off-by: Yoshihiro Shimoda <yoshihiro.shimoda.uh@renesas.com>
Signed-off-by: Felipe Balbi <felipe.balbi@linux.intel.com>
---
 drivers/usb/gadget/udc/renesas_usb3.c | 25 ++++++++++++++++++-------
 1 file changed, 18 insertions(+), 7 deletions(-)

diff --git a/drivers/usb/gadget/udc/renesas_usb3.c b/drivers/usb/gadget/udc/renesas_usb3.c
index 62dc9c7798e7..e1de8fe599a3 100644
--- a/drivers/usb/gadget/udc/renesas_usb3.c
+++ b/drivers/usb/gadget/udc/renesas_usb3.c
@@ -838,21 +838,32 @@ static struct renesas_usb3_request *usb3_get_request(struct renesas_usb3_ep
 	return usb3_req;
 }
 
+static void __usb3_request_done(struct renesas_usb3_ep *usb3_ep,
+				struct renesas_usb3_request *usb3_req,
+				int status)
+{
+	struct renesas_usb3 *usb3 = usb3_ep_to_usb3(usb3_ep);
+
+	dev_dbg(usb3_to_dev(usb3), "giveback: ep%2d, %u, %u, %d\n",
+		usb3_ep->num, usb3_req->req.length, usb3_req->req.actual,
+		status);
+	usb3_req->req.status = status;
+	usb3_ep->started = false;
+	list_del_init(&usb3_req->queue);
+	spin_unlock(&usb3->lock);
+	usb_gadget_giveback_request(&usb3_ep->ep, &usb3_req->req);
+	spin_lock(&usb3->lock);
+}
+
 static void usb3_request_done(struct renesas_usb3_ep *usb3_ep,
 			      struct renesas_usb3_request *usb3_req, int status)
 {
 	struct renesas_usb3 *usb3 = usb3_ep_to_usb3(usb3_ep);
 	unsigned long flags;
 
-	dev_dbg(usb3_to_dev(usb3), "giveback: ep%2d, %u, %u, %d\n",
-		usb3_ep->num, usb3_req->req.length, usb3_req->req.actual,
-		status);
-	usb3_req->req.status = status;
 	spin_lock_irqsave(&usb3->lock, flags);
-	usb3_ep->started = false;
-	list_del_init(&usb3_req->queue);
+	__usb3_request_done(usb3_ep, usb3_req, status);
 	spin_unlock_irqrestore(&usb3->lock, flags);
-	usb_gadget_giveback_request(&usb3_ep->ep, &usb3_req->req);
 }
 
 static void usb3_irq_epc_pipe0_status_end(struct renesas_usb3 *usb3)

From 5a8141bd41f0e7f7758956e2340e10cdf5f2b0b9 Mon Sep 17 00:00:00 2001
From: Rajendra Nayak <rnayak@codeaurora.org>
Date: Wed, 2 Aug 2017 13:15:42 +0530
Subject: [PATCH 127/281] usb: phy: phy-msm-usb: Fix usage of
 devm_regulator_bulk_get()

The regulator_bulk_data pointer passed to devm_regulator_bulk_get()
is used to store the client handles for the regulators, which
is later used by devm_regulator_bulk_release() to free the
regulators.
Passing a local array as is done here means the memory used to
store the handles is freed causing the handles to be corrupted,
resulting in a crash when devm_regulator_bulk_release() tries to
free them.

Fix this my moving the array inside of the msm_otg structure.

Signed-off-by: Rajendra Nayak <rnayak@codeaurora.org>
Signed-off-by: Felipe Balbi <felipe.balbi@linux.intel.com>
---
 drivers/usb/phy/phy-msm-usb.c | 17 +++++++++--------
 1 file changed, 9 insertions(+), 8 deletions(-)

diff --git a/drivers/usb/phy/phy-msm-usb.c b/drivers/usb/phy/phy-msm-usb.c
index 8fb86a5f458e..3d0dd2f97415 100644
--- a/drivers/usb/phy/phy-msm-usb.c
+++ b/drivers/usb/phy/phy-msm-usb.c
@@ -197,6 +197,7 @@ struct msm_otg {
 	struct regulator *v3p3;
 	struct regulator *v1p8;
 	struct regulator *vddcx;
+	struct regulator_bulk_data supplies[3];
 
 	struct reset_control *phy_rst;
 	struct reset_control *link_rst;
@@ -1731,7 +1732,6 @@ static int msm_otg_reboot_notify(struct notifier_block *this,
 
 static int msm_otg_probe(struct platform_device *pdev)
 {
-	struct regulator_bulk_data regs[3];
 	int ret = 0;
 	struct device_node *np = pdev->dev.of_node;
 	struct msm_otg_platform_data *pdata;
@@ -1817,17 +1817,18 @@ static int msm_otg_probe(struct platform_device *pdev)
 		return motg->irq;
 	}
 
-	regs[0].supply = "vddcx";
-	regs[1].supply = "v3p3";
-	regs[2].supply = "v1p8";
+	motg->supplies[0].supply = "vddcx";
+	motg->supplies[1].supply = "v3p3";
+	motg->supplies[2].supply = "v1p8";
 
-	ret = devm_regulator_bulk_get(motg->phy.dev, ARRAY_SIZE(regs), regs);
+	ret = devm_regulator_bulk_get(motg->phy.dev, ARRAY_SIZE(motg->supplies),
+				      motg->supplies);
 	if (ret)
 		return ret;
 
-	motg->vddcx = regs[0].consumer;
-	motg->v3p3  = regs[1].consumer;
-	motg->v1p8  = regs[2].consumer;
+	motg->vddcx = motg->supplies[0].consumer;
+	motg->v3p3  = motg->supplies[1].consumer;
+	motg->v1p8  = motg->supplies[2].consumer;
 
 	clk_set_rate(motg->clk, 60000000);
 

From 2acecd58969897795cf015c9057ebd349a3fda8a Mon Sep 17 00:00:00 2001
From: Yoshihiro Shimoda <yoshihiro.shimoda.uh@renesas.com>
Date: Wed, 2 Aug 2017 13:21:45 +0900
Subject: [PATCH 128/281] usb: renesas_usbhs: Fix UGCTRL2 value for R-Car Gen3

The latest HW manual (Rev.0.55) shows us this UGCTRL2.VBUSSEL bit.
If the bit sets to 1, the VBUS drive is controlled by phy related
registers (called "UCOM Registers" on the manual). Since R-Car Gen3
environment will control VBUS by phy-rcar-gen3-usb2 driver,
the UGCTRL2.VBUSSEL bit should be set to 1. So, this patch fixes
the register's value. Otherwise, even if the ID pin indicates to
peripheral, the R-Car will output USBn_PWEN to 1 when a host driver
is running.

Fixes: de18757e272d ("usb: renesas_usbhs: add R-Car Gen3 power control"
Cc: <stable@vger.kernel.org> # v4.6+
Signed-off-by: Yoshihiro Shimoda <yoshihiro.shimoda.uh@renesas.com>
Signed-off-by: Felipe Balbi <felipe.balbi@linux.intel.com>
---
 drivers/usb/renesas_usbhs/rcar3.c | 9 +++++++--
 1 file changed, 7 insertions(+), 2 deletions(-)

diff --git a/drivers/usb/renesas_usbhs/rcar3.c b/drivers/usb/renesas_usbhs/rcar3.c
index d544b331c9f2..02b67abfc2a1 100644
--- a/drivers/usb/renesas_usbhs/rcar3.c
+++ b/drivers/usb/renesas_usbhs/rcar3.c
@@ -20,9 +20,13 @@
 /* Low Power Status register (LPSTS) */
 #define LPSTS_SUSPM	0x4000
 
-/* USB General control register 2 (UGCTRL2), bit[31:6] should be 0 */
+/*
+ * USB General control register 2 (UGCTRL2)
+ * Remarks: bit[31:11] and bit[9:6] should be 0
+ */
 #define UGCTRL2_RESERVED_3	0x00000001	/* bit[3:0] should be B'0001 */
 #define UGCTRL2_USB0SEL_OTG	0x00000030
+#define UGCTRL2_VBUSSEL		0x00000400
 
 static void usbhs_write32(struct usbhs_priv *priv, u32 reg, u32 data)
 {
@@ -34,7 +38,8 @@ static int usbhs_rcar3_power_ctrl(struct platform_device *pdev,
 {
 	struct usbhs_priv *priv = usbhs_pdev_to_priv(pdev);
 
-	usbhs_write32(priv, UGCTRL2, UGCTRL2_RESERVED_3 | UGCTRL2_USB0SEL_OTG);
+	usbhs_write32(priv, UGCTRL2, UGCTRL2_RESERVED_3 | UGCTRL2_USB0SEL_OTG |
+		      UGCTRL2_VBUSSEL);
 
 	if (enable) {
 		usbhs_bset(priv, LPSTS, LPSTS_SUSPM, LPSTS_SUSPM);

From b7d44c36a6f6d956e1539e0dd42f98b26e5a4684 Mon Sep 17 00:00:00 2001
From: Yoshihiro Shimoda <yoshihiro.shimoda.uh@renesas.com>
Date: Fri, 28 Jul 2017 19:28:57 +0900
Subject: [PATCH 129/281] usb: renesas_usbhs: gadget: fix
 unused-but-set-variable warning

The commit b8b9c974afee ("usb: renesas_usbhs: gadget: disable all eps
when the driver stops") causes the unused-but-set-variable warning.
But, if the usbhsg_ep_disable() will return non-zero value, udc/core.c
doesn't clear the ep->enabled flag. So, this driver should not return
non-zero value, if the pipe is zero because this means the pipe is
already disabled. Otherwise, the ep->enabled flag is never cleared
when the usbhsg_ep_disable() is called by the renesas_usbhs driver first.

Fixes: b8b9c974afee ("usb: renesas_usbhs: gadget: disable all eps when the driver stops")
Fixes: 11432050f070 ("usb: renesas_usbhs: gadget: fix NULL pointer dereference in ep_disable()")
Signed-off-by: Yoshihiro Shimoda <yoshihiro.shimoda.uh@renesas.com>
Signed-off-by: Felipe Balbi <felipe.balbi@linux.intel.com>
---
 drivers/usb/renesas_usbhs/mod_gadget.c | 5 +----
 1 file changed, 1 insertion(+), 4 deletions(-)

diff --git a/drivers/usb/renesas_usbhs/mod_gadget.c b/drivers/usb/renesas_usbhs/mod_gadget.c
index 93fba9033b00..2c8161bcf5b5 100644
--- a/drivers/usb/renesas_usbhs/mod_gadget.c
+++ b/drivers/usb/renesas_usbhs/mod_gadget.c
@@ -639,14 +639,11 @@ static int usbhsg_ep_disable(struct usb_ep *ep)
 	struct usbhsg_uep *uep = usbhsg_ep_to_uep(ep);
 	struct usbhs_pipe *pipe;
 	unsigned long flags;
-	int ret = 0;
 
 	spin_lock_irqsave(&uep->lock, flags);
 	pipe = usbhsg_uep_to_pipe(uep);
-	if (!pipe) {
-		ret = -EINVAL;
+	if (!pipe)
 		goto out;
-	}
 
 	usbhsg_pipe_disable(uep);
 	usbhs_pipe_free(pipe);

From 61c12b49e1c9c77d7a1bcc161de540d0fd21cf0c Mon Sep 17 00:00:00 2001
From: Ashish Samant <ashish.samant@oracle.com>
Date: Wed, 12 Jul 2017 19:26:58 -0700
Subject: [PATCH 130/281] fuse: Dont call set_page_dirty_lock() for ITER_BVEC
 pages for async_dio

Commit 8fba54aebbdf ("fuse: direct-io: don't dirty ITER_BVEC pages") fixes
the ITER_BVEC page deadlock for direct io in fuse by checking in
fuse_direct_io(), whether the page is a bvec page or not, before locking
it.  However, this check is missed when the "async_dio" mount option is
enabled.  In this case, set_page_dirty_lock() is called from the req->end
callback in request_end(), when the fuse thread is returning from userspace
to respond to the read request.  This will cause the same deadlock because
the bvec condition is not checked in this path.

Here is the stack of the deadlocked thread, while returning from userspace:

[13706.656686] INFO: task glusterfs:3006 blocked for more than 120 seconds.
[13706.657808] "echo 0 > /proc/sys/kernel/hung_task_timeout_secs" disables
this message.
[13706.658788] glusterfs       D ffffffff816c80f0     0  3006      1
0x00000080
[13706.658797]  ffff8800d6713a58 0000000000000086 ffff8800d9ad7000
ffff8800d9ad5400
[13706.658799]  ffff88011ffd5cc0 ffff8800d6710008 ffff88011fd176c0
7fffffffffffffff
[13706.658801]  0000000000000002 ffffffff816c80f0 ffff8800d6713a78
ffffffff816c790e
[13706.658803] Call Trace:
[13706.658809]  [<ffffffff816c80f0>] ? bit_wait_io_timeout+0x80/0x80
[13706.658811]  [<ffffffff816c790e>] schedule+0x3e/0x90
[13706.658813]  [<ffffffff816ca7e5>] schedule_timeout+0x1b5/0x210
[13706.658816]  [<ffffffff81073ffb>] ? gup_pud_range+0x1db/0x1f0
[13706.658817]  [<ffffffff810668fe>] ? kvm_clock_read+0x1e/0x20
[13706.658819]  [<ffffffff81066909>] ? kvm_clock_get_cycles+0x9/0x10
[13706.658822]  [<ffffffff810f5792>] ? ktime_get+0x52/0xc0
[13706.658824]  [<ffffffff816c6f04>] io_schedule_timeout+0xa4/0x110
[13706.658826]  [<ffffffff816c8126>] bit_wait_io+0x36/0x50
[13706.658828]  [<ffffffff816c7d06>] __wait_on_bit_lock+0x76/0xb0
[13706.658831]  [<ffffffffa0545636>] ? lock_request+0x46/0x70 [fuse]
[13706.658834]  [<ffffffff8118800a>] __lock_page+0xaa/0xb0
[13706.658836]  [<ffffffff810c8500>] ? wake_atomic_t_function+0x40/0x40
[13706.658838]  [<ffffffff81194d08>] set_page_dirty_lock+0x58/0x60
[13706.658841]  [<ffffffffa054d968>] fuse_release_user_pages+0x58/0x70 [fuse]
[13706.658844]  [<ffffffffa0551430>] ? fuse_aio_complete+0x190/0x190 [fuse]
[13706.658847]  [<ffffffffa0551459>] fuse_aio_complete_req+0x29/0x90 [fuse]
[13706.658849]  [<ffffffffa05471e9>] request_end+0xd9/0x190 [fuse]
[13706.658852]  [<ffffffffa0549126>] fuse_dev_do_write+0x336/0x490 [fuse]
[13706.658854]  [<ffffffffa054963e>] fuse_dev_write+0x6e/0xa0 [fuse]
[13706.658857]  [<ffffffff812a9ef3>] ? security_file_permission+0x23/0x90
[13706.658859]  [<ffffffff81205300>] do_iter_readv_writev+0x60/0x90
[13706.658862]  [<ffffffffa05495d0>] ? fuse_dev_splice_write+0x350/0x350
[fuse]
[13706.658863]  [<ffffffff812062a1>] do_readv_writev+0x171/0x1f0
[13706.658866]  [<ffffffff810b3d00>] ? try_to_wake_up+0x210/0x210
[13706.658868]  [<ffffffff81206361>] vfs_writev+0x41/0x50
[13706.658870]  [<ffffffff81206496>] SyS_writev+0x56/0xf0
[13706.658872]  [<ffffffff810257a1>] ? syscall_trace_leave+0xf1/0x160
[13706.658874]  [<ffffffff816cbb2e>] system_call_fastpath+0x12/0x71

Fix this by making should_dirty a fuse_io_priv parameter that can be
checked in fuse_aio_complete_req().

Reported-by: Tiger Yang <tiger.yang@oracle.com>
Signed-off-by: Ashish Samant <ashish.samant@oracle.com>
Signed-off-by: Miklos Szeredi <mszeredi@redhat.com>
---
 fs/fuse/file.c   | 6 +++---
 fs/fuse/fuse_i.h | 1 +
 2 files changed, 4 insertions(+), 3 deletions(-)

diff --git a/fs/fuse/file.c b/fs/fuse/file.c
index 76eac2a554c4..810ed4f99e38 100644
--- a/fs/fuse/file.c
+++ b/fs/fuse/file.c
@@ -609,7 +609,7 @@ static void fuse_aio_complete_req(struct fuse_conn *fc, struct fuse_req *req)
 	struct fuse_io_priv *io = req->io;
 	ssize_t pos = -1;
 
-	fuse_release_user_pages(req, !io->write);
+	fuse_release_user_pages(req, io->should_dirty);
 
 	if (io->write) {
 		if (req->misc.write.in.size != req->misc.write.out.size)
@@ -1316,7 +1316,6 @@ ssize_t fuse_direct_io(struct fuse_io_priv *io, struct iov_iter *iter,
 		       loff_t *ppos, int flags)
 {
 	int write = flags & FUSE_DIO_WRITE;
-	bool should_dirty = !write && iter_is_iovec(iter);
 	int cuse = flags & FUSE_DIO_CUSE;
 	struct file *file = io->file;
 	struct inode *inode = file->f_mapping->host;
@@ -1346,6 +1345,7 @@ ssize_t fuse_direct_io(struct fuse_io_priv *io, struct iov_iter *iter,
 			inode_unlock(inode);
 	}
 
+	io->should_dirty = !write && iter_is_iovec(iter);
 	while (count) {
 		size_t nres;
 		fl_owner_t owner = current->files;
@@ -1360,7 +1360,7 @@ ssize_t fuse_direct_io(struct fuse_io_priv *io, struct iov_iter *iter,
 			nres = fuse_send_read(req, io, pos, nbytes, owner);
 
 		if (!io->async)
-			fuse_release_user_pages(req, should_dirty);
+			fuse_release_user_pages(req, io->should_dirty);
 		if (req->out.h.error) {
 			err = req->out.h.error;
 			break;
diff --git a/fs/fuse/fuse_i.h b/fs/fuse/fuse_i.h
index 1bd7ffdad593..bd4d2a3e1ec1 100644
--- a/fs/fuse/fuse_i.h
+++ b/fs/fuse/fuse_i.h
@@ -249,6 +249,7 @@ struct fuse_io_priv {
 	size_t size;
 	__u64 offset;
 	bool write;
+	bool should_dirty;
 	int err;
 	struct kiocb *iocb;
 	struct file *file;

From 2dda640040876cd8ae646408b69eea40c24f9ae9 Mon Sep 17 00:00:00 2001
From: Eric Dumazet <edumazet@google.com>
Date: Wed, 2 Aug 2017 23:10:46 -0700
Subject: [PATCH 131/281] net: fix keepalive code vs TCP_FASTOPEN_CONNECT

syzkaller was able to trigger a divide by 0 in TCP stack [1]

Issue here is that keepalive timer needs to be updated to not attempt
to send a probe if the connection setup was deferred using
TCP_FASTOPEN_CONNECT socket option added in linux-4.11

[1]
 divide error: 0000 [#1] SMP
 CPU: 18 PID: 0 Comm: swapper/18 Not tainted
 task: ffff986f62f4b040 ti: ffff986f62fa2000 task.ti: ffff986f62fa2000
 RIP: 0010:[<ffffffff8409cc0d>]  [<ffffffff8409cc0d>] __tcp_select_window+0x8d/0x160
 Call Trace:
  <IRQ>
  [<ffffffff8409d951>] tcp_transmit_skb+0x11/0x20
  [<ffffffff8409da21>] tcp_xmit_probe_skb+0xc1/0xe0
  [<ffffffff840a0ee8>] tcp_write_wakeup+0x68/0x160
  [<ffffffff840a151b>] tcp_keepalive_timer+0x17b/0x230
  [<ffffffff83b3f799>] call_timer_fn+0x39/0xf0
  [<ffffffff83b40797>] run_timer_softirq+0x1d7/0x280
  [<ffffffff83a04ddb>] __do_softirq+0xcb/0x257
  [<ffffffff83ae03ac>] irq_exit+0x9c/0xb0
  [<ffffffff83a04c1a>] smp_apic_timer_interrupt+0x6a/0x80
  [<ffffffff83a03eaf>] apic_timer_interrupt+0x7f/0x90
  <EOI>
  [<ffffffff83fed2ea>] ? cpuidle_enter_state+0x13a/0x3b0
  [<ffffffff83fed2cd>] ? cpuidle_enter_state+0x11d/0x3b0

Tested:

Following packetdrill no longer crashes the kernel

`echo 0 >/proc/sys/net/ipv4/tcp_timestamps`

// Cache warmup: send a Fast Open cookie request
    0 socket(..., SOCK_STREAM, IPPROTO_TCP) = 3
   +0 fcntl(3, F_SETFL, O_RDWR|O_NONBLOCK) = 0
   +0 setsockopt(3, SOL_TCP, TCP_FASTOPEN_CONNECT, [1], 4) = 0
   +0 connect(3, ..., ...) = -1 EINPROGRESS (Operation is now in progress)
   +0 > S 0:0(0) <mss 1460,nop,nop,sackOK,nop,wscale 8,FO,nop,nop>
 +.01 < S. 123:123(0) ack 1 win 14600 <mss 1460,nop,nop,sackOK,nop,wscale 6,FO abcd1234,nop,nop>
   +0 > . 1:1(0) ack 1
   +0 close(3) = 0
   +0 > F. 1:1(0) ack 1
   +0 < F. 1:1(0) ack 2 win 92
   +0 > .  2:2(0) ack 2

   +0 socket(..., SOCK_STREAM, IPPROTO_TCP) = 4
   +0 fcntl(4, F_SETFL, O_RDWR|O_NONBLOCK) = 0
   +0 setsockopt(4, SOL_TCP, TCP_FASTOPEN_CONNECT, [1], 4) = 0
   +0 setsockopt(4, SOL_SOCKET, SO_KEEPALIVE, [1], 4) = 0
 +.01 connect(4, ..., ...) = 0
   +0 setsockopt(4, SOL_TCP, TCP_KEEPIDLE, [5], 4) = 0
   +10 close(4) = 0

`echo 1 >/proc/sys/net/ipv4/tcp_timestamps`

Fixes: 19f6d3f3c842 ("net/tcp-fastopen: Add new API support")
Signed-off-by: Eric Dumazet <edumazet@google.com>
Reported-by: Dmitry Vyukov <dvyukov@google.com>
Cc: Wei Wang <weiwan@google.com>
Cc: Yuchung Cheng <ycheng@google.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/ipv4/tcp_timer.c | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/net/ipv4/tcp_timer.c b/net/ipv4/tcp_timer.c
index c0feeeef962a..e906014890b6 100644
--- a/net/ipv4/tcp_timer.c
+++ b/net/ipv4/tcp_timer.c
@@ -652,7 +652,8 @@ static void tcp_keepalive_timer (unsigned long data)
 		goto death;
 	}
 
-	if (!sock_flag(sk, SOCK_KEEPOPEN) || sk->sk_state == TCP_CLOSE)
+	if (!sock_flag(sk, SOCK_KEEPOPEN) ||
+	    ((1 << sk->sk_state) & (TCPF_CLOSE | TCPF_SYN_SENT)))
 		goto out;
 
 	elapsed = keepalive_time_when(tp);

From b91d532928dff2141ea9c107c3e73104d9843767 Mon Sep 17 00:00:00 2001
From: Xin Long <lucien.xin@gmail.com>
Date: Thu, 3 Aug 2017 14:13:46 +0800
Subject: [PATCH 132/281] ipv6: set rt6i_protocol properly in the route when it
 is installed

After commit c2ed1880fd61 ("net: ipv6: check route protocol when
deleting routes"), ipv6 route checks rt protocol when trying to
remove a rt entry.

It introduced a side effect causing 'ip -6 route flush cache' not
to work well. When flushing caches with iproute, all route caches
get dumped from kernel then removed one by one by sending DELROUTE
requests to kernel for each cache.

The thing is iproute sends the request with the cache whose proto
is set with RTPROT_REDIRECT by rt6_fill_node() when kernel dumps
it. But in kernel the rt_cache protocol is still 0, which causes
the cache not to be matched and removed.

So the real reason is rt6i_protocol in the route is not set when
it is allocated. As David Ahern's suggestion, this patch is to
set rt6i_protocol properly in the route when it is installed and
remove the codes setting rtm_protocol according to rt6i_flags in
rt6_fill_node.

This is also an improvement to keep rt6i_protocol consistent with
rtm_protocol.

Fixes: c2ed1880fd61 ("net: ipv6: check route protocol when deleting routes")
Reported-by: Jianlin Shi <jishi@redhat.com>
Suggested-by: David Ahern <dsahern@gmail.com>
Signed-off-by: Xin Long <lucien.xin@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/ipv6/route.c | 11 +++--------
 1 file changed, 3 insertions(+), 8 deletions(-)

diff --git a/net/ipv6/route.c b/net/ipv6/route.c
index 4d30c96a819d..a640fbcba15d 100644
--- a/net/ipv6/route.c
+++ b/net/ipv6/route.c
@@ -2351,6 +2351,7 @@ static void rt6_do_redirect(struct dst_entry *dst, struct sock *sk, struct sk_bu
 	if (on_link)
 		nrt->rt6i_flags &= ~RTF_GATEWAY;
 
+	nrt->rt6i_protocol = RTPROT_REDIRECT;
 	nrt->rt6i_gateway = *(struct in6_addr *)neigh->primary_key;
 
 	if (ip6_ins_rt(nrt))
@@ -2461,6 +2462,7 @@ static struct rt6_info *rt6_add_route_info(struct net *net,
 		.fc_dst_len	= prefixlen,
 		.fc_flags	= RTF_GATEWAY | RTF_ADDRCONF | RTF_ROUTEINFO |
 				  RTF_UP | RTF_PREF(pref),
+		.fc_protocol = RTPROT_RA,
 		.fc_nlinfo.portid = 0,
 		.fc_nlinfo.nlh = NULL,
 		.fc_nlinfo.nl_net = net,
@@ -2513,6 +2515,7 @@ struct rt6_info *rt6_add_dflt_router(const struct in6_addr *gwaddr,
 		.fc_ifindex	= dev->ifindex,
 		.fc_flags	= RTF_GATEWAY | RTF_ADDRCONF | RTF_DEFAULT |
 				  RTF_UP | RTF_EXPIRES | RTF_PREF(pref),
+		.fc_protocol = RTPROT_RA,
 		.fc_nlinfo.portid = 0,
 		.fc_nlinfo.nlh = NULL,
 		.fc_nlinfo.nl_net = dev_net(dev),
@@ -3424,14 +3427,6 @@ static int rt6_fill_node(struct net *net,
 	rtm->rtm_flags = 0;
 	rtm->rtm_scope = RT_SCOPE_UNIVERSE;
 	rtm->rtm_protocol = rt->rt6i_protocol;
-	if (rt->rt6i_flags & RTF_DYNAMIC)
-		rtm->rtm_protocol = RTPROT_REDIRECT;
-	else if (rt->rt6i_flags & RTF_ADDRCONF) {
-		if (rt->rt6i_flags & (RTF_DEFAULT | RTF_ROUTEINFO))
-			rtm->rtm_protocol = RTPROT_RA;
-		else
-			rtm->rtm_protocol = RTPROT_KERNEL;
-	}
 
 	if (rt->rt6i_flags & RTF_CACHE)
 		rtm->rtm_flags |= RTM_F_CLONED;

From e1a10ef7fa876f8510aaec36ea5c0cf34baba410 Mon Sep 17 00:00:00 2001
From: Neal Cardwell <ncardwell@google.com>
Date: Thu, 3 Aug 2017 09:19:52 -0400
Subject: [PATCH 133/281] tcp: introduce tcp_rto_delta_us() helper for xmit
 timer fix

Pure refactor. This helper will be required in the xmit timer fix
later in the patch series. (Because the TLP logic will want to make
this calculation.)

Fixes: 6ba8a3b19e76 ("tcp: Tail loss probe (TLP)")
Signed-off-by: Neal Cardwell <ncardwell@google.com>
Signed-off-by: Yuchung Cheng <ycheng@google.com>
Signed-off-by: Nandita Dukkipati <nanditad@google.com>
Acked-by: Eric Dumazet <edumazet@google.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/net/tcp.h    | 10 ++++++++++
 net/ipv4/tcp_input.c |  5 +----
 2 files changed, 11 insertions(+), 4 deletions(-)

diff --git a/include/net/tcp.h b/include/net/tcp.h
index 70483296157f..ada65e767b28 100644
--- a/include/net/tcp.h
+++ b/include/net/tcp.h
@@ -1916,6 +1916,16 @@ extern void tcp_rack_advance(struct tcp_sock *tp, u8 sacked, u32 end_seq,
 			     u64 xmit_time);
 extern void tcp_rack_reo_timeout(struct sock *sk);
 
+/* At how many usecs into the future should the RTO fire? */
+static inline s64 tcp_rto_delta_us(const struct sock *sk)
+{
+	const struct sk_buff *skb = tcp_write_queue_head(sk);
+	u32 rto = inet_csk(sk)->icsk_rto;
+	u64 rto_time_stamp_us = skb->skb_mstamp + jiffies_to_usecs(rto);
+
+	return rto_time_stamp_us - tcp_sk(sk)->tcp_mstamp;
+}
+
 /*
  * Save and compile IPv4 options, return a pointer to it
  */
diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c
index dad026fcfd09..b9ba8d55d8b8 100644
--- a/net/ipv4/tcp_input.c
+++ b/net/ipv4/tcp_input.c
@@ -3004,10 +3004,7 @@ void tcp_rearm_rto(struct sock *sk)
 		/* Offset the time elapsed after installing regular RTO */
 		if (icsk->icsk_pending == ICSK_TIME_REO_TIMEOUT ||
 		    icsk->icsk_pending == ICSK_TIME_LOSS_PROBE) {
-			struct sk_buff *skb = tcp_write_queue_head(sk);
-			u64 rto_time_stamp = skb->skb_mstamp +
-					     jiffies_to_usecs(rto);
-			s64 delta_us = rto_time_stamp - tp->tcp_mstamp;
+			s64 delta_us = tcp_rto_delta_us(sk);
 			/* delta_us may not be positive if the socket is locked
 			 * when the retrans timer fires and is rescheduled.
 			 */

From a2815817ffa68c7933a43eb55836d6e789bd4389 Mon Sep 17 00:00:00 2001
From: Neal Cardwell <ncardwell@google.com>
Date: Thu, 3 Aug 2017 09:19:53 -0400
Subject: [PATCH 134/281] tcp: enable xmit timer fix by having TLP use time
 when RTO should fire

Have tcp_schedule_loss_probe() base the TLP scheduling decision based
on when the RTO *should* fire. This is to enable the upcoming xmit
timer fix in this series, where tcp_schedule_loss_probe() cannot
assume that the last timer installed was an RTO timer (because we are
no longer doing the "rearm RTO, rearm RTO, rearm TLP" dance on every
ACK). So tcp_schedule_loss_probe() must independently figure out when
an RTO would want to fire.

In the new TLP implementation following in this series, we cannot
assume that icsk_timeout was set based on an RTO; after processing a
cumulative ACK the icsk_timeout we see can be from a previous TLP or
RTO. So we need to independently recalculate the RTO time (instead of
reading it out of icsk_timeout). Removing this dependency on the
nature of icsk_timeout makes things a little easier to reason about
anyway.

Note that the old and new code should be equivalent, since they are
both saying: "if the RTO is in the future, but at an earlier time than
the normal TLP time, then set the TLP timer to fire when the RTO would
have fired".

Fixes: 6ba8a3b19e76 ("tcp: Tail loss probe (TLP)")
Signed-off-by: Neal Cardwell <ncardwell@google.com>
Signed-off-by: Yuchung Cheng <ycheng@google.com>
Signed-off-by: Nandita Dukkipati <nanditad@google.com>
Acked-by: Eric Dumazet <edumazet@google.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/ipv4/tcp_output.c | 14 +++++---------
 1 file changed, 5 insertions(+), 9 deletions(-)

diff --git a/net/ipv4/tcp_output.c b/net/ipv4/tcp_output.c
index 2f1588bf73da..cd8e257492c4 100644
--- a/net/ipv4/tcp_output.c
+++ b/net/ipv4/tcp_output.c
@@ -2377,8 +2377,8 @@ bool tcp_schedule_loss_probe(struct sock *sk)
 {
 	struct inet_connection_sock *icsk = inet_csk(sk);
 	struct tcp_sock *tp = tcp_sk(sk);
-	u32 timeout, tlp_time_stamp, rto_time_stamp;
 	u32 rtt = usecs_to_jiffies(tp->srtt_us >> 3);
+	u32 timeout, rto_delta_us;
 
 	/* No consecutive loss probes. */
 	if (WARN_ON(icsk->icsk_pending == ICSK_TIME_LOSS_PROBE)) {
@@ -2417,14 +2417,10 @@ bool tcp_schedule_loss_probe(struct sock *sk)
 				(rtt + (rtt >> 1) + TCP_DELACK_MAX));
 	timeout = max_t(u32, timeout, msecs_to_jiffies(10));
 
-	/* If RTO is shorter, just schedule TLP in its place. */
-	tlp_time_stamp = tcp_jiffies32 + timeout;
-	rto_time_stamp = (u32)inet_csk(sk)->icsk_timeout;
-	if ((s32)(tlp_time_stamp - rto_time_stamp) > 0) {
-		s32 delta = rto_time_stamp - tcp_jiffies32;
-		if (delta > 0)
-			timeout = delta;
-	}
+	/* If the RTO formula yields an earlier time, then use that time. */
+	rto_delta_us = tcp_rto_delta_us(sk);  /* How far in future is RTO? */
+	if (rto_delta_us > 0)
+		timeout = min_t(u32, timeout, usecs_to_jiffies(rto_delta_us));
 
 	inet_csk_reset_xmit_timer(sk, ICSK_TIME_LOSS_PROBE, timeout,
 				  TCP_RTO_MAX);

From df92c8394e6ea0469e8056946ef8add740ab8046 Mon Sep 17 00:00:00 2001
From: Neal Cardwell <ncardwell@google.com>
Date: Thu, 3 Aug 2017 09:19:54 -0400
Subject: [PATCH 135/281] tcp: fix xmit timer to only be reset if data
 ACKed/SACKed

Fix a TCP loss recovery performance bug raised recently on the netdev
list, in two threads:

(i)  July 26, 2017: netdev thread "TCP fast retransmit issues"
(ii) July 26, 2017: netdev thread:
     "[PATCH V2 net-next] TLP: Don't reschedule PTO when there's one
     outstanding TLP retransmission"

The basic problem is that incoming TCP packets that did not indicate
forward progress could cause the xmit timer (TLP or RTO) to be rearmed
and pushed back in time. In certain corner cases this could result in
the following problems noted in these threads:

 - Repeated ACKs coming in with bogus SACKs corrupted by middleboxes
   could cause TCP to repeatedly schedule TLPs forever. We kept
   sending TLPs after every ~200ms, which elicited bogus SACKs, which
   caused more TLPs, ad infinitum; we never fired an RTO to fill in
   the holes.

 - Incoming data segments could, in some cases, cause us to reschedule
   our RTO or TLP timer further out in time, for no good reason. This
   could cause repeated inbound data to result in stalls in outbound
   data, in the presence of packet loss.

This commit fixes these bugs by changing the TLP and RTO ACK
processing to:

 (a) Only reschedule the xmit timer once per ACK.

 (b) Only reschedule the xmit timer if tcp_clean_rtx_queue() deems the
     ACK indicates sufficient forward progress (a packet was
     cumulatively ACKed, or we got a SACK for a packet that was sent
     before the most recent retransmit of the write queue head).

This brings us back into closer compliance with the RFCs, since, as
the comment for tcp_rearm_rto() notes, we should only restart the RTO
timer after forward progress on the connection. Previously we were
restarting the xmit timer even in these cases where there was no
forward progress.

As a side benefit, this commit simplifies and speeds up the TCP timer
arming logic. We had been calling inet_csk_reset_xmit_timer() three
times on normal ACKs that cumulatively acknowledged some data:

1) Once near the top of tcp_ack() to switch from TLP timer to RTO:
        if (icsk->icsk_pending == ICSK_TIME_LOSS_PROBE)
               tcp_rearm_rto(sk);

2) Once in tcp_clean_rtx_queue(), to update the RTO:
        if (flag & FLAG_ACKED) {
               tcp_rearm_rto(sk);

3) Once in tcp_ack() after tcp_fastretrans_alert() to switch from RTO
   to TLP:
        if (icsk->icsk_pending == ICSK_TIME_RETRANS)
               tcp_schedule_loss_probe(sk);

This commit, by only rescheduling the xmit timer once per ACK,
simplifies the code and reduces CPU overhead.

This commit was tested in an A/B test with Google web server
traffic. SNMP stats and request latency metrics were within noise
levels, substantiating that for normal web traffic patterns this is a
rare issue. This commit was also tested with packetdrill tests to
verify that it fixes the timer behavior in the corner cases discussed
in the netdev threads mentioned above.

This patch is a bug fix patch intended to be queued for -stable
relases.

Fixes: 6ba8a3b19e76 ("tcp: Tail loss probe (TLP)")
Reported-by: Klavs Klavsen <kl@vsen.dk>
Reported-by: Mao Wenan <maowenan@huawei.com>
Signed-off-by: Neal Cardwell <ncardwell@google.com>
Signed-off-by: Yuchung Cheng <ycheng@google.com>
Signed-off-by: Nandita Dukkipati <nanditad@google.com>
Acked-by: Eric Dumazet <edumazet@google.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/ipv4/tcp_input.c  | 25 ++++++++++++++++---------
 net/ipv4/tcp_output.c |  9 ---------
 2 files changed, 16 insertions(+), 18 deletions(-)

diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c
index b9ba8d55d8b8..53de1424c13c 100644
--- a/net/ipv4/tcp_input.c
+++ b/net/ipv4/tcp_input.c
@@ -107,6 +107,7 @@ int sysctl_tcp_invalid_ratelimit __read_mostly = HZ/2;
 #define FLAG_ORIG_SACK_ACKED	0x200 /* Never retransmitted data are (s)acked	*/
 #define FLAG_SND_UNA_ADVANCED	0x400 /* Snd_una was changed (!= FLAG_DATA_ACKED) */
 #define FLAG_DSACKING_ACK	0x800 /* SACK blocks contained D-SACK info */
+#define FLAG_SET_XMIT_TIMER	0x1000 /* Set TLP or RTO timer */
 #define FLAG_SACK_RENEGING	0x2000 /* snd_una advanced to a sacked seq */
 #define FLAG_UPDATE_TS_RECENT	0x4000 /* tcp_replace_ts_recent() */
 #define FLAG_NO_CHALLENGE_ACK	0x8000 /* do not call tcp_send_challenge_ack()	*/
@@ -3016,6 +3017,13 @@ void tcp_rearm_rto(struct sock *sk)
 	}
 }
 
+/* Try to schedule a loss probe; if that doesn't work, then schedule an RTO. */
+static void tcp_set_xmit_timer(struct sock *sk)
+{
+	if (!tcp_schedule_loss_probe(sk))
+		tcp_rearm_rto(sk);
+}
+
 /* If we get here, the whole TSO packet has not been acked. */
 static u32 tcp_tso_acked(struct sock *sk, struct sk_buff *skb)
 {
@@ -3177,7 +3185,7 @@ static int tcp_clean_rtx_queue(struct sock *sk, int prior_fackets,
 					ca_rtt_us, sack->rate);
 
 	if (flag & FLAG_ACKED) {
-		tcp_rearm_rto(sk);
+		flag |= FLAG_SET_XMIT_TIMER;  /* set TLP or RTO timer */
 		if (unlikely(icsk->icsk_mtup.probe_size &&
 			     !after(tp->mtu_probe.probe_seq_end, tp->snd_una))) {
 			tcp_mtup_probe_success(sk);
@@ -3205,7 +3213,7 @@ static int tcp_clean_rtx_queue(struct sock *sk, int prior_fackets,
 		 * after when the head was last (re)transmitted. Otherwise the
 		 * timeout may continue to extend in loss recovery.
 		 */
-		tcp_rearm_rto(sk);
+		flag |= FLAG_SET_XMIT_TIMER;  /* set TLP or RTO timer */
 	}
 
 	if (icsk->icsk_ca_ops->pkts_acked) {
@@ -3577,9 +3585,6 @@ static int tcp_ack(struct sock *sk, const struct sk_buff *skb, int flag)
 	if (after(ack, tp->snd_nxt))
 		goto invalid_ack;
 
-	if (icsk->icsk_pending == ICSK_TIME_LOSS_PROBE)
-		tcp_rearm_rto(sk);
-
 	if (after(ack, prior_snd_una)) {
 		flag |= FLAG_SND_UNA_ADVANCED;
 		icsk->icsk_retransmits = 0;
@@ -3644,18 +3649,20 @@ static int tcp_ack(struct sock *sk, const struct sk_buff *skb, int flag)
 	flag |= tcp_clean_rtx_queue(sk, prior_fackets, prior_snd_una, &acked,
 				    &sack_state);
 
+	if (tp->tlp_high_seq)
+		tcp_process_tlp_ack(sk, ack, flag);
+	/* If needed, reset TLP/RTO timer; RACK may later override this. */
+	if (flag & FLAG_SET_XMIT_TIMER)
+		tcp_set_xmit_timer(sk);
+
 	if (tcp_ack_is_dubious(sk, flag)) {
 		is_dupack = !(flag & (FLAG_SND_UNA_ADVANCED | FLAG_NOT_DUP));
 		tcp_fastretrans_alert(sk, acked, is_dupack, &flag, &rexmit);
 	}
-	if (tp->tlp_high_seq)
-		tcp_process_tlp_ack(sk, ack, flag);
 
 	if ((flag & FLAG_FORWARD_PROGRESS) || !(flag & FLAG_NOT_DUP))
 		sk_dst_confirm(sk);
 
-	if (icsk->icsk_pending == ICSK_TIME_RETRANS)
-		tcp_schedule_loss_probe(sk);
 	delivered = tp->delivered - delivered;	/* freshly ACKed or SACKed */
 	lost = tp->lost - lost;			/* freshly marked lost */
 	tcp_rate_gen(sk, delivered, lost, sack_state.rate);
diff --git a/net/ipv4/tcp_output.c b/net/ipv4/tcp_output.c
index cd8e257492c4..276406a83a37 100644
--- a/net/ipv4/tcp_output.c
+++ b/net/ipv4/tcp_output.c
@@ -2380,21 +2380,12 @@ bool tcp_schedule_loss_probe(struct sock *sk)
 	u32 rtt = usecs_to_jiffies(tp->srtt_us >> 3);
 	u32 timeout, rto_delta_us;
 
-	/* No consecutive loss probes. */
-	if (WARN_ON(icsk->icsk_pending == ICSK_TIME_LOSS_PROBE)) {
-		tcp_rearm_rto(sk);
-		return false;
-	}
 	/* Don't do any loss probe on a Fast Open connection before 3WHS
 	 * finishes.
 	 */
 	if (tp->fastopen_rsk)
 		return false;
 
-	/* TLP is only scheduled when next timer event is RTO. */
-	if (icsk->icsk_pending != ICSK_TIME_RETRANS)
-		return false;
-
 	/* Schedule a loss probe in 2*RTT for SACK capable connections
 	 * in Open state, that are either limited by cwnd or application.
 	 */

From da6c9bbf418dcb0f8be261f4353400fa959b1e92 Mon Sep 17 00:00:00 2001
From: Mark yao <mark.yao@rock-chips.com>
Date: Mon, 31 Jul 2017 17:49:42 +0800
Subject: [PATCH 136/281] drm/rockchip: vop: fix iommu page fault when resume

Iommu would get page fault with following path:
   vop_disable:
      1, disable all windows and set vop config done
      2, vop enter to standy, all windows not works, but their registers
         are not clean, when you read window's enable bit, may found the
         window is enable.

   vop_enable:
      1, memcpy(vop->regsbak, vop->regs, len)
         save current vop registers to vop->regsbak, then you can found
         window is enable on regsbak.
      2, VOP_WIN_SET(vop, win, gate, 1);
         force enable window gate, but gate and enable are on same
         hardware register, then window enable bit rewrite to vop hardware.
      3, vop power on, and vop might try to scan destroyed buffer,
         then iommu get page fault.

Move windows disable after vop regsbak restore, then vop regsbak mechanism
would keep tracing the modify, everything would be safe.

Signed-off-by: Mark Yao <mark.yao@rock-chips.com>
Reviewed-by: Sandy huang <sandy.huang@rock-chips.com>
Link: https://patchwork.freedesktop.org/patch/msgid/1501494582-6934-1-git-send-email-mark.yao@rock-chips.com
---
 drivers/gpu/drm/rockchip/rockchip_drm_vop.c | 33 ++++++++++-----------
 1 file changed, 15 insertions(+), 18 deletions(-)

diff --git a/drivers/gpu/drm/rockchip/rockchip_drm_vop.c b/drivers/gpu/drm/rockchip/rockchip_drm_vop.c
index 5d450332c2fd..804d0ff53059 100644
--- a/drivers/gpu/drm/rockchip/rockchip_drm_vop.c
+++ b/drivers/gpu/drm/rockchip/rockchip_drm_vop.c
@@ -500,7 +500,7 @@ static void vop_line_flag_irq_disable(struct vop *vop)
 static int vop_enable(struct drm_crtc *crtc)
 {
 	struct vop *vop = to_vop(crtc);
-	int ret;
+	int ret, i;
 
 	ret = pm_runtime_get_sync(vop->dev);
 	if (ret < 0) {
@@ -533,6 +533,20 @@ static int vop_enable(struct drm_crtc *crtc)
 	}
 
 	memcpy(vop->regs, vop->regsbak, vop->len);
+	/*
+	 * We need to make sure that all windows are disabled before we
+	 * enable the crtc. Otherwise we might try to scan from a destroyed
+	 * buffer later.
+	 */
+	for (i = 0; i < vop->data->win_size; i++) {
+		struct vop_win *vop_win = &vop->win[i];
+		const struct vop_win_data *win = vop_win->data;
+
+		spin_lock(&vop->reg_lock);
+		VOP_WIN_SET(vop, win, enable, 0);
+		spin_unlock(&vop->reg_lock);
+	}
+
 	vop_cfg_done(vop);
 
 	/*
@@ -566,28 +580,11 @@ err_put_pm_runtime:
 static void vop_crtc_disable(struct drm_crtc *crtc)
 {
 	struct vop *vop = to_vop(crtc);
-	int i;
 
 	WARN_ON(vop->event);
 
 	rockchip_drm_psr_deactivate(&vop->crtc);
 
-	/*
-	 * We need to make sure that all windows are disabled before we
-	 * disable that crtc. Otherwise we might try to scan from a destroyed
-	 * buffer later.
-	 */
-	for (i = 0; i < vop->data->win_size; i++) {
-		struct vop_win *vop_win = &vop->win[i];
-		const struct vop_win_data *win = vop_win->data;
-
-		spin_lock(&vop->reg_lock);
-		VOP_WIN_SET(vop, win, enable, 0);
-		spin_unlock(&vop->reg_lock);
-	}
-
-	vop_cfg_done(vop);
-
 	drm_crtc_vblank_off(crtc);
 
 	/*

From 6f04f5925ce763e07cb405d1fbe97a53b6c026a3 Mon Sep 17 00:00:00 2001
From: Mark yao <mark.yao@rock-chips.com>
Date: Mon, 31 Jul 2017 17:49:46 +0800
Subject: [PATCH 137/281] drm/rockchip: vop: fix NV12 video display error

fixup the scale calculation formula on the case
src_height == (dst_height/2).

Signed-off-by: Mark Yao <mark.yao@rock-chips.com>
Reviewed-by: Sandy huang <sandy.huang@rock-chips.com>
Link: https://patchwork.freedesktop.org/patch/msgid/1501494586-6984-1-git-send-email-mark.yao@rock-chips.com
---
 drivers/gpu/drm/rockchip/rockchip_drm_vop.h | 3 +++
 1 file changed, 3 insertions(+)

diff --git a/drivers/gpu/drm/rockchip/rockchip_drm_vop.h b/drivers/gpu/drm/rockchip/rockchip_drm_vop.h
index 9979fd0c2282..27eefbfcf3d0 100644
--- a/drivers/gpu/drm/rockchip/rockchip_drm_vop.h
+++ b/drivers/gpu/drm/rockchip/rockchip_drm_vop.h
@@ -282,6 +282,9 @@ static inline uint16_t scl_get_bili_dn_vskip(int src_h, int dst_h,
 
 	act_height = (src_h + vskiplines - 1) / vskiplines;
 
+	if (act_height == dst_h)
+		return GET_SCL_FT_BILI_DN(src_h, dst_h) / vskiplines;
+
 	return GET_SCL_FT_BILI_DN(act_height, dst_h);
 }
 

From 79a0b149d4e3d45fc46673f4b29a157776c174e2 Mon Sep 17 00:00:00 2001
From: Mark yao <mark.yao@rock-chips.com>
Date: Mon, 31 Jul 2017 17:49:50 +0800
Subject: [PATCH 138/281] drm/rockchip: vop: round_up pitches to word align

VOP pitch register is word align, need align to word.

VOP_WIN0_VIR:
  bit[31:16] win0_vir_stride_uv
    Number of words of Win0 uv Virtual width
  bit[15:0] win0_vir_width
    Number of words of Win0 yrgb Virtual width
    ARGB888 : win0_vir_width
    RGB888 : (win0_vir_width*3/4) + (win0_vir_width%3)
    RGB565 : ceil(win0_vir_width/2)
    YUV : ceil(win0_vir_width/4)

Signed-off-by: Mark Yao <mark.yao@rock-chips.com>
Reviewed-by: Sandy huang <sandy.huang@rock-chips.com>
Link: https://patchwork.freedesktop.org/patch/msgid/1501494591-7034-1-git-send-email-mark.yao@rock-chips.com
---
 drivers/gpu/drm/rockchip/rockchip_drm_vop.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/drivers/gpu/drm/rockchip/rockchip_drm_vop.c b/drivers/gpu/drm/rockchip/rockchip_drm_vop.c
index 804d0ff53059..e205c5cf9019 100644
--- a/drivers/gpu/drm/rockchip/rockchip_drm_vop.c
+++ b/drivers/gpu/drm/rockchip/rockchip_drm_vop.c
@@ -761,7 +761,7 @@ static void vop_plane_atomic_update(struct drm_plane *plane,
 	spin_lock(&vop->reg_lock);
 
 	VOP_WIN_SET(vop, win, format, format);
-	VOP_WIN_SET(vop, win, yrgb_vir, fb->pitches[0] >> 2);
+	VOP_WIN_SET(vop, win, yrgb_vir, DIV_ROUND_UP(fb->pitches[0], 4));
 	VOP_WIN_SET(vop, win, yrgb_mst, dma_addr);
 	if (is_yuv_support(fb->format->format)) {
 		int hsub = drm_format_horz_chroma_subsampling(fb->format->format);
@@ -775,7 +775,7 @@ static void vop_plane_atomic_update(struct drm_plane *plane,
 		offset += (src->y1 >> 16) * fb->pitches[1] / vsub;
 
 		dma_addr = rk_uv_obj->dma_addr + offset + fb->offsets[1];
-		VOP_WIN_SET(vop, win, uv_vir, fb->pitches[1] >> 2);
+		VOP_WIN_SET(vop, win, uv_vir, DIV_ROUND_UP(fb->pitches[1], 4));
 		VOP_WIN_SET(vop, win, uv_mst, dma_addr);
 	}
 

From 80c471ea040ad9006ebff6d64221a04e8fa1b7f6 Mon Sep 17 00:00:00 2001
From: Mark yao <mark.yao@rock-chips.com>
Date: Mon, 31 Jul 2017 17:49:55 +0800
Subject: [PATCH 139/281] drm/rockchip: vop: report error when check resource
 error

The user would be confused while facing a error commit without
any error report.

Signed-off-by: Mark Yao <mark.yao@rock-chips.com>
Reviewed-by: Sandy huang <sandy.huang@rock-chips.com>
Link: https://patchwork.freedesktop.org/patch/msgid/1501494596-7090-1-git-send-email-mark.yao@rock-chips.com
---
 drivers/gpu/drm/rockchip/rockchip_drm_vop.c | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

diff --git a/drivers/gpu/drm/rockchip/rockchip_drm_vop.c b/drivers/gpu/drm/rockchip/rockchip_drm_vop.c
index e205c5cf9019..2900f1410d95 100644
--- a/drivers/gpu/drm/rockchip/rockchip_drm_vop.c
+++ b/drivers/gpu/drm/rockchip/rockchip_drm_vop.c
@@ -679,8 +679,10 @@ static int vop_plane_atomic_check(struct drm_plane *plane,
 	 * Src.x1 can be odd when do clip, but yuv plane start point
 	 * need align with 2 pixel.
 	 */
-	if (is_yuv_support(fb->format->format) && ((state->src.x1 >> 16) % 2))
+	if (is_yuv_support(fb->format->format) && ((state->src.x1 >> 16) % 2)) {
+		DRM_ERROR("Invalid Source: Yuv format not support odd xpos\n");
 		return -EINVAL;
+	}
 
 	return 0;
 }

From 02b6ed44304e458d68e454493ab272f4e3c3c53a Mon Sep 17 00:00:00 2001
From: Tina Zhang <tina.zhang@intel.com>
Date: Fri, 4 Aug 2017 17:39:41 +0800
Subject: [PATCH 140/281] drm/i915/gvt: Initialize MMIO Block with HW state

MMIO block with tracked mmio, is introduced for the sake of performance
of searching tracked mmio. All the tracked mmio needs to get the initial
value from the HW state during vGPU being created. This patch is to
initialize the tracked registers in MMIO block with the HW state.

v2: Add "Fixes:" line for this patch (Zhenyu)

Fixes: 65f9f6febf12 ("drm/i915/gvt: Optimize MMIO register handling for some large MMIO blocks")
Signed-off-by: Tina Zhang <tina.zhang@intel.com>
Signed-off-by: Zhenyu Wang <zhenyuw@linux.intel.com>
---
 drivers/gpu/drm/i915/gvt/firmware.c | 11 ++++++++-
 drivers/gpu/drm/i915/gvt/gvt.h      | 12 ++++++++++
 drivers/gpu/drm/i915/gvt/handlers.c | 36 ++++++++++++++---------------
 3 files changed, 39 insertions(+), 20 deletions(-)

diff --git a/drivers/gpu/drm/i915/gvt/firmware.c b/drivers/gpu/drm/i915/gvt/firmware.c
index 5dad9298b2d5..a26c1705430e 100644
--- a/drivers/gpu/drm/i915/gvt/firmware.c
+++ b/drivers/gpu/drm/i915/gvt/firmware.c
@@ -72,11 +72,13 @@ static int expose_firmware_sysfs(struct intel_gvt *gvt)
 	struct intel_gvt_device_info *info = &gvt->device_info;
 	struct pci_dev *pdev = gvt->dev_priv->drm.pdev;
 	struct intel_gvt_mmio_info *e;
+	struct gvt_mmio_block *block = gvt->mmio.mmio_block;
+	int num = gvt->mmio.num_mmio_block;
 	struct gvt_firmware_header *h;
 	void *firmware;
 	void *p;
 	unsigned long size, crc32_start;
-	int i;
+	int i, j;
 	int ret;
 
 	size = sizeof(*h) + info->mmio_size + info->cfg_space_size;
@@ -105,6 +107,13 @@ static int expose_firmware_sysfs(struct intel_gvt *gvt)
 	hash_for_each(gvt->mmio.mmio_info_table, i, e, node)
 		*(u32 *)(p + e->offset) = I915_READ_NOTRACE(_MMIO(e->offset));
 
+	for (i = 0; i < num; i++, block++) {
+		for (j = 0; j < block->size; j += 4)
+			*(u32 *)(p + INTEL_GVT_MMIO_OFFSET(block->offset) + j) =
+				I915_READ_NOTRACE(_MMIO(INTEL_GVT_MMIO_OFFSET(
+							block->offset) + j));
+	}
+
 	memcpy(gvt->firmware.mmio, p, info->mmio_size);
 
 	crc32_start = offsetof(struct gvt_firmware_header, crc32) + 4;
diff --git a/drivers/gpu/drm/i915/gvt/gvt.h b/drivers/gpu/drm/i915/gvt/gvt.h
index d96c41aa5aa7..2964a4d01a66 100644
--- a/drivers/gpu/drm/i915/gvt/gvt.h
+++ b/drivers/gpu/drm/i915/gvt/gvt.h
@@ -195,6 +195,15 @@ struct intel_gvt_fence {
 	unsigned long vgpu_allocated_fence_num;
 };
 
+/* Special MMIO blocks. */
+struct gvt_mmio_block {
+	unsigned int device;
+	i915_reg_t   offset;
+	unsigned int size;
+	gvt_mmio_func read;
+	gvt_mmio_func write;
+};
+
 #define INTEL_GVT_MMIO_HASH_BITS 11
 
 struct intel_gvt_mmio {
@@ -214,6 +223,9 @@ struct intel_gvt_mmio {
 /* This reg could be accessed by unaligned address */
 #define F_UNALIGN	(1 << 6)
 
+	struct gvt_mmio_block *mmio_block;
+	unsigned int num_mmio_block;
+
 	DECLARE_HASHTABLE(mmio_info_table, INTEL_GVT_MMIO_HASH_BITS);
 	unsigned int num_tracked_mmio;
 };
diff --git a/drivers/gpu/drm/i915/gvt/handlers.c b/drivers/gpu/drm/i915/gvt/handlers.c
index 17febe830ff6..323664a238f5 100644
--- a/drivers/gpu/drm/i915/gvt/handlers.c
+++ b/drivers/gpu/drm/i915/gvt/handlers.c
@@ -2857,31 +2857,15 @@ static int init_skl_mmio_info(struct intel_gvt *gvt)
 	return 0;
 }
 
-/* Special MMIO blocks. */
-static struct gvt_mmio_block {
-	unsigned int device;
-	i915_reg_t   offset;
-	unsigned int size;
-	gvt_mmio_func read;
-	gvt_mmio_func write;
-} gvt_mmio_blocks[] = {
-	{D_SKL_PLUS, _MMIO(CSR_MMIO_START_RANGE), 0x3000, NULL, NULL},
-	{D_ALL, _MMIO(MCHBAR_MIRROR_BASE_SNB), 0x40000, NULL, NULL},
-	{D_ALL, _MMIO(VGT_PVINFO_PAGE), VGT_PVINFO_SIZE,
-		pvinfo_mmio_read, pvinfo_mmio_write},
-	{D_ALL, LGC_PALETTE(PIPE_A, 0), 1024, NULL, NULL},
-	{D_ALL, LGC_PALETTE(PIPE_B, 0), 1024, NULL, NULL},
-	{D_ALL, LGC_PALETTE(PIPE_C, 0), 1024, NULL, NULL},
-};
-
 static struct gvt_mmio_block *find_mmio_block(struct intel_gvt *gvt,
 					      unsigned int offset)
 {
 	unsigned long device = intel_gvt_get_device_type(gvt);
-	struct gvt_mmio_block *block = gvt_mmio_blocks;
+	struct gvt_mmio_block *block = gvt->mmio.mmio_block;
+	int num = gvt->mmio.num_mmio_block;
 	int i;
 
-	for (i = 0; i < ARRAY_SIZE(gvt_mmio_blocks); i++, block++) {
+	for (i = 0; i < num; i++, block++) {
 		if (!(device & block->device))
 			continue;
 		if (offset >= INTEL_GVT_MMIO_OFFSET(block->offset) &&
@@ -2912,6 +2896,17 @@ void intel_gvt_clean_mmio_info(struct intel_gvt *gvt)
 	gvt->mmio.mmio_attribute = NULL;
 }
 
+/* Special MMIO blocks. */
+static struct gvt_mmio_block mmio_blocks[] = {
+	{D_SKL_PLUS, _MMIO(CSR_MMIO_START_RANGE), 0x3000, NULL, NULL},
+	{D_ALL, _MMIO(MCHBAR_MIRROR_BASE_SNB), 0x40000, NULL, NULL},
+	{D_ALL, _MMIO(VGT_PVINFO_PAGE), VGT_PVINFO_SIZE,
+		pvinfo_mmio_read, pvinfo_mmio_write},
+	{D_ALL, LGC_PALETTE(PIPE_A, 0), 1024, NULL, NULL},
+	{D_ALL, LGC_PALETTE(PIPE_B, 0), 1024, NULL, NULL},
+	{D_ALL, LGC_PALETTE(PIPE_C, 0), 1024, NULL, NULL},
+};
+
 /**
  * intel_gvt_setup_mmio_info - setup MMIO information table for GVT device
  * @gvt: GVT device
@@ -2951,6 +2946,9 @@ int intel_gvt_setup_mmio_info(struct intel_gvt *gvt)
 			goto err;
 	}
 
+	gvt->mmio.mmio_block = mmio_blocks;
+	gvt->mmio.num_mmio_block = ARRAY_SIZE(mmio_blocks);
+
 	gvt_dbg_mmio("traced %u virtual mmio registers\n",
 		     gvt->mmio.num_tracked_mmio);
 	return 0;

From 9e48cd4a77d5170433a2e611eeda7accdf96604d Mon Sep 17 00:00:00 2001
From: Allen Pais <allen.pais@oracle.com>
Date: Mon, 24 Jul 2017 11:44:17 +0530
Subject: [PATCH 141/281] sparc64: properly name the cpu constants

Acked-by: Sam Ravnborg <sam@ravnborg.org>
Acked-by: David S. Miller <davem@davemloft.net>
Signed-off-by: Allen Pais <allen.pais@oracle.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 arch/sparc/include/asm/spitfire.h | 14 ++++++++++++++
 arch/sparc/kernel/head_64.S       | 16 ++++++++--------
 2 files changed, 22 insertions(+), 8 deletions(-)

diff --git a/arch/sparc/include/asm/spitfire.h b/arch/sparc/include/asm/spitfire.h
index 1d8321c827a8..9cc2afe10ef0 100644
--- a/arch/sparc/include/asm/spitfire.h
+++ b/arch/sparc/include/asm/spitfire.h
@@ -51,6 +51,20 @@
 #define SUN4V_CHIP_SPARC_SN	0x8b
 #define SUN4V_CHIP_UNKNOWN	0xff
 
+/*
+ * The following CPU_ID_xxx constants are used
+ * to identify the CPU type in the setup phase
+ * (see head_64.S)
+ */
+#define CPU_ID_NIAGARA1		('1')
+#define CPU_ID_NIAGARA2		('2')
+#define CPU_ID_NIAGARA3		('3')
+#define CPU_ID_NIAGARA4		('4')
+#define CPU_ID_NIAGARA5		('5')
+#define CPU_ID_M6		('6')
+#define CPU_ID_M7		('7')
+#define CPU_ID_SONOMA1		('N')
+
 #ifndef __ASSEMBLY__
 
 enum ultra_tlb_layout {
diff --git a/arch/sparc/kernel/head_64.S b/arch/sparc/kernel/head_64.S
index 41a407328667..ddb5e24adf49 100644
--- a/arch/sparc/kernel/head_64.S
+++ b/arch/sparc/kernel/head_64.S
@@ -424,22 +424,22 @@ EXPORT_SYMBOL(sun4v_chip_type)
 	 nop
 
 70:	ldub	[%g1 + 7], %g2
-	cmp	%g2, '3'
+	cmp	%g2, CPU_ID_NIAGARA3
 	be,pt	%xcc, 5f
 	 mov	SUN4V_CHIP_NIAGARA3, %g4
-	cmp	%g2, '4'
+	cmp	%g2, CPU_ID_NIAGARA4
 	be,pt	%xcc, 5f
 	 mov	SUN4V_CHIP_NIAGARA4, %g4
-	cmp	%g2, '5'
+	cmp	%g2, CPU_ID_NIAGARA5
 	be,pt	%xcc, 5f
 	 mov	SUN4V_CHIP_NIAGARA5, %g4
-	cmp	%g2, '6'
+	cmp	%g2, CPU_ID_M6
 	be,pt	%xcc, 5f
 	 mov	SUN4V_CHIP_SPARC_M6, %g4
-	cmp	%g2, '7'
+	cmp	%g2, CPU_ID_M7
 	be,pt	%xcc, 5f
 	 mov	SUN4V_CHIP_SPARC_M7, %g4
-	cmp	%g2, 'N'
+	cmp	%g2, CPU_ID_SONOMA1
 	be,pt	%xcc, 5f
 	 mov	SUN4V_CHIP_SPARC_SN, %g4
 	ba,pt	%xcc, 49f
@@ -448,10 +448,10 @@ EXPORT_SYMBOL(sun4v_chip_type)
 91:	sethi	%hi(prom_cpu_compatible), %g1
 	or	%g1, %lo(prom_cpu_compatible), %g1
 	ldub	[%g1 + 17], %g2
-	cmp	%g2, '1'
+	cmp	%g2, CPU_ID_NIAGARA1
 	be,pt	%xcc, 5f
 	 mov	SUN4V_CHIP_NIAGARA1, %g4
-	cmp	%g2, '2'
+	cmp	%g2, CPU_ID_NIAGARA2
 	be,pt	%xcc, 5f
 	 mov	SUN4V_CHIP_NIAGARA2, %g4
 	

From 7d484acb2f90643de7e242fd47e48c3ebb22df3a Mon Sep 17 00:00:00 2001
From: Allen Pais <allen.pais@oracle.com>
Date: Mon, 24 Jul 2017 11:44:18 +0530
Subject: [PATCH 142/281] sparc64: recognize and support sparc M8 cpu type

Recognize SPARC-M8 cpu type, hardware caps and cpu
distribution map.

Signed-off-by: Allen Pais <allen.pais@oracle.com>
Signed-off-by: David Aldridge <david.j.aldridge@oracle.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 arch/sparc/include/asm/spitfire.h |  2 ++
 arch/sparc/kernel/cpu.c           |  6 ++++++
 arch/sparc/kernel/cpumap.c        |  1 +
 arch/sparc/kernel/head_64.S       |  6 ++++++
 arch/sparc/kernel/setup_64.c      | 15 +++++++++++++--
 arch/sparc/mm/init_64.c           |  2 ++
 6 files changed, 30 insertions(+), 2 deletions(-)

diff --git a/arch/sparc/include/asm/spitfire.h b/arch/sparc/include/asm/spitfire.h
index 9cc2afe10ef0..1b1286d05069 100644
--- a/arch/sparc/include/asm/spitfire.h
+++ b/arch/sparc/include/asm/spitfire.h
@@ -47,6 +47,7 @@
 #define SUN4V_CHIP_NIAGARA5	0x05
 #define SUN4V_CHIP_SPARC_M6	0x06
 #define SUN4V_CHIP_SPARC_M7	0x07
+#define SUN4V_CHIP_SPARC_M8	0x08
 #define SUN4V_CHIP_SPARC64X	0x8a
 #define SUN4V_CHIP_SPARC_SN	0x8b
 #define SUN4V_CHIP_UNKNOWN	0xff
@@ -63,6 +64,7 @@
 #define CPU_ID_NIAGARA5		('5')
 #define CPU_ID_M6		('6')
 #define CPU_ID_M7		('7')
+#define CPU_ID_M8		('8')
 #define CPU_ID_SONOMA1		('N')
 
 #ifndef __ASSEMBLY__
diff --git a/arch/sparc/kernel/cpu.c b/arch/sparc/kernel/cpu.c
index 493e023a468a..ef4f18f7a674 100644
--- a/arch/sparc/kernel/cpu.c
+++ b/arch/sparc/kernel/cpu.c
@@ -506,6 +506,12 @@ static void __init sun4v_cpu_probe(void)
 		sparc_pmu_type = "sparc-m7";
 		break;
 
+	case SUN4V_CHIP_SPARC_M8:
+		sparc_cpu_type = "SPARC-M8";
+		sparc_fpu_type = "SPARC-M8 integrated FPU";
+		sparc_pmu_type = "sparc-m8";
+		break;
+
 	case SUN4V_CHIP_SPARC_SN:
 		sparc_cpu_type = "SPARC-SN";
 		sparc_fpu_type = "SPARC-SN integrated FPU";
diff --git a/arch/sparc/kernel/cpumap.c b/arch/sparc/kernel/cpumap.c
index 45c820e1cba5..90d550bbfeef 100644
--- a/arch/sparc/kernel/cpumap.c
+++ b/arch/sparc/kernel/cpumap.c
@@ -328,6 +328,7 @@ static int iterate_cpu(struct cpuinfo_tree *t, unsigned int root_index)
 	case SUN4V_CHIP_NIAGARA5:
 	case SUN4V_CHIP_SPARC_M6:
 	case SUN4V_CHIP_SPARC_M7:
+	case SUN4V_CHIP_SPARC_M8:
 	case SUN4V_CHIP_SPARC_SN:
 	case SUN4V_CHIP_SPARC64X:
 		rover_inc_table = niagara_iterate_method;
diff --git a/arch/sparc/kernel/head_64.S b/arch/sparc/kernel/head_64.S
index ddb5e24adf49..78e0211753d2 100644
--- a/arch/sparc/kernel/head_64.S
+++ b/arch/sparc/kernel/head_64.S
@@ -439,6 +439,9 @@ EXPORT_SYMBOL(sun4v_chip_type)
 	cmp	%g2, CPU_ID_M7
 	be,pt	%xcc, 5f
 	 mov	SUN4V_CHIP_SPARC_M7, %g4
+	cmp	%g2, CPU_ID_M8
+	be,pt	%xcc, 5f
+	 mov	SUN4V_CHIP_SPARC_M8, %g4
 	cmp	%g2, CPU_ID_SONOMA1
 	be,pt	%xcc, 5f
 	 mov	SUN4V_CHIP_SPARC_SN, %g4
@@ -600,6 +603,9 @@ niagara_tlb_fixup:
 	be,pt	%xcc, niagara4_patch
 	 nop
 	cmp	%g1, SUN4V_CHIP_SPARC_M7
+	be,pt	%xcc, niagara4_patch
+	 nop
+	cmp	%g1, SUN4V_CHIP_SPARC_M8
 	be,pt	%xcc, niagara4_patch
 	 nop
 	cmp	%g1, SUN4V_CHIP_SPARC_SN
diff --git a/arch/sparc/kernel/setup_64.c b/arch/sparc/kernel/setup_64.c
index 4d9c3e13c150..150ee7d4b059 100644
--- a/arch/sparc/kernel/setup_64.c
+++ b/arch/sparc/kernel/setup_64.c
@@ -288,10 +288,17 @@ static void __init sun4v_patch(void)
 
 	sun4v_patch_2insn_range(&__sun4v_2insn_patch,
 				&__sun4v_2insn_patch_end);
-	if (sun4v_chip_type == SUN4V_CHIP_SPARC_M7 ||
-	    sun4v_chip_type == SUN4V_CHIP_SPARC_SN)
+
+	switch (sun4v_chip_type) {
+	case SUN4V_CHIP_SPARC_M7:
+	case SUN4V_CHIP_SPARC_M8:
+	case SUN4V_CHIP_SPARC_SN:
 		sun_m7_patch_2insn_range(&__sun_m7_2insn_patch,
 					 &__sun_m7_2insn_patch_end);
+		break;
+	default:
+		break;
+	}
 
 	sun4v_hvapi_init();
 }
@@ -529,6 +536,7 @@ static void __init init_sparc64_elf_hwcap(void)
 		    sun4v_chip_type == SUN4V_CHIP_NIAGARA5 ||
 		    sun4v_chip_type == SUN4V_CHIP_SPARC_M6 ||
 		    sun4v_chip_type == SUN4V_CHIP_SPARC_M7 ||
+		    sun4v_chip_type == SUN4V_CHIP_SPARC_M8 ||
 		    sun4v_chip_type == SUN4V_CHIP_SPARC_SN ||
 		    sun4v_chip_type == SUN4V_CHIP_SPARC64X)
 			cap |= HWCAP_SPARC_BLKINIT;
@@ -538,6 +546,7 @@ static void __init init_sparc64_elf_hwcap(void)
 		    sun4v_chip_type == SUN4V_CHIP_NIAGARA5 ||
 		    sun4v_chip_type == SUN4V_CHIP_SPARC_M6 ||
 		    sun4v_chip_type == SUN4V_CHIP_SPARC_M7 ||
+		    sun4v_chip_type == SUN4V_CHIP_SPARC_M8 ||
 		    sun4v_chip_type == SUN4V_CHIP_SPARC_SN ||
 		    sun4v_chip_type == SUN4V_CHIP_SPARC64X)
 			cap |= HWCAP_SPARC_N2;
@@ -568,6 +577,7 @@ static void __init init_sparc64_elf_hwcap(void)
 			    sun4v_chip_type == SUN4V_CHIP_NIAGARA5 ||
 			    sun4v_chip_type == SUN4V_CHIP_SPARC_M6 ||
 			    sun4v_chip_type == SUN4V_CHIP_SPARC_M7 ||
+			    sun4v_chip_type == SUN4V_CHIP_SPARC_M8 ||
 			    sun4v_chip_type == SUN4V_CHIP_SPARC_SN ||
 			    sun4v_chip_type == SUN4V_CHIP_SPARC64X)
 				cap |= (AV_SPARC_VIS | AV_SPARC_VIS2 |
@@ -578,6 +588,7 @@ static void __init init_sparc64_elf_hwcap(void)
 			    sun4v_chip_type == SUN4V_CHIP_NIAGARA5 ||
 			    sun4v_chip_type == SUN4V_CHIP_SPARC_M6 ||
 			    sun4v_chip_type == SUN4V_CHIP_SPARC_M7 ||
+			    sun4v_chip_type == SUN4V_CHIP_SPARC_M8 ||
 			    sun4v_chip_type == SUN4V_CHIP_SPARC_SN ||
 			    sun4v_chip_type == SUN4V_CHIP_SPARC64X)
 				cap |= (AV_SPARC_VIS3 | AV_SPARC_HPC |
diff --git a/arch/sparc/mm/init_64.c b/arch/sparc/mm/init_64.c
index fed73f14aa49..c24f4bfc3b84 100644
--- a/arch/sparc/mm/init_64.c
+++ b/arch/sparc/mm/init_64.c
@@ -2161,6 +2161,7 @@ static void __init sun4v_linear_pte_xor_finalize(void)
 	 */
 	switch (sun4v_chip_type) {
 	case SUN4V_CHIP_SPARC_M7:
+	case SUN4V_CHIP_SPARC_M8:
 	case SUN4V_CHIP_SPARC_SN:
 		pagecv_flag = 0x00;
 		break;
@@ -2313,6 +2314,7 @@ void __init paging_init(void)
 	 */
 	switch (sun4v_chip_type) {
 	case SUN4V_CHIP_SPARC_M7:
+	case SUN4V_CHIP_SPARC_M8:
 	case SUN4V_CHIP_SPARC_SN:
 		page_cache4v_flag = _PAGE_CP_4V;
 		break;

From fdaccf74fe83ab3438df014efd55788a1dd414b5 Mon Sep 17 00:00:00 2001
From: Vijay Kumar <vijay.ac.kumar@oracle.com>
Date: Fri, 28 Jul 2017 19:29:32 -0600
Subject: [PATCH 143/281] sparc64: Increase max_phys_bits to 51 and VA bits to
 53 for M8.

On M8 chips, use a max_phys_bits value of 51.

Also, M8 supports VA bits up to 54 bits. However, for now
restrict VA bits to 53 due to 4-level pagetable limitation.

Signed-off-by: Vijay Kumar <vijay.ac.kumar@oracle.com>
Reviewed-by: Bob Picco <bob.picco@oracle.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 arch/sparc/mm/init_64.c | 12 +++++++++++-
 1 file changed, 11 insertions(+), 1 deletion(-)

diff --git a/arch/sparc/mm/init_64.c b/arch/sparc/mm/init_64.c
index c24f4bfc3b84..afa0099f3748 100644
--- a/arch/sparc/mm/init_64.c
+++ b/arch/sparc/mm/init_64.c
@@ -1944,12 +1944,22 @@ static void __init setup_page_offset(void)
 			break;
 		case SUN4V_CHIP_SPARC_M7:
 		case SUN4V_CHIP_SPARC_SN:
-		default:
 			/* M7 and later support 52-bit virtual addresses.  */
 			sparc64_va_hole_top =    0xfff8000000000000UL;
 			sparc64_va_hole_bottom = 0x0008000000000000UL;
 			max_phys_bits = 49;
 			break;
+		case SUN4V_CHIP_SPARC_M8:
+		default:
+			/* M8 and later support 54-bit virtual addresses.
+			 * However, restricting M8 and above VA bits to 53
+			 * as 4-level page table cannot support more than
+			 * 53 VA bits.
+			 */
+			sparc64_va_hole_top =    0xfff0000000000000UL;
+			sparc64_va_hole_bottom = 0x0010000000000000UL;
+			max_phys_bits = 51;
+			break;
 		}
 	}
 

From 17b334a876fe121abbd6634ace83ca58deea4bc5 Mon Sep 17 00:00:00 2001
From: Ido Schimmel <idosch@mellanox.com>
Date: Fri, 4 Aug 2017 14:12:29 +0200
Subject: [PATCH 144/281] mlxsw: spectrum_switchdev: Don't warn about valid
 situations

Some operations in the bridge driver such as MDB deletion are preformed
in an atomic context and thus deferred to a process context by the
switchdev infrastructure.

Therefore, by the time the operation is performed by the underlying
device driver it's possible the bridge port context is already gone.
This is especially true for removal flows, but theoretically can also be
invoked during addition.

Remove the warnings in such situations and return normally.

Fixes: c57529e1d5d8 ("mlxsw: spectrum: Replace vPorts with Port-VLAN")
Fixes: 3922285d96e7 ("net: bridge: Add support for offloading port attributes")
Signed-off-by: Ido Schimmel <idosch@mellanox.com>
Signed-off-by: Jiri Pirko <jiri@mellanox.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 .../mellanox/mlxsw/spectrum_switchdev.c       | 24 +++++++++----------
 1 file changed, 12 insertions(+), 12 deletions(-)

diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum_switchdev.c b/drivers/net/ethernet/mellanox/mlxsw/spectrum_switchdev.c
index 656b2d3f1bee..73d54df89578 100644
--- a/drivers/net/ethernet/mellanox/mlxsw/spectrum_switchdev.c
+++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum_switchdev.c
@@ -626,8 +626,8 @@ static int mlxsw_sp_port_attr_br_flags_set(struct mlxsw_sp_port *mlxsw_sp_port,
 
 	bridge_port = mlxsw_sp_bridge_port_find(mlxsw_sp_port->mlxsw_sp->bridge,
 						orig_dev);
-	if (WARN_ON(!bridge_port))
-		return -EINVAL;
+	if (!bridge_port)
+		return 0;
 
 	err = mlxsw_sp_bridge_port_flood_table_set(mlxsw_sp_port, bridge_port,
 						   MLXSW_SP_FLOOD_TYPE_UC,
@@ -711,8 +711,8 @@ static int mlxsw_sp_port_attr_mc_router_set(struct mlxsw_sp_port *mlxsw_sp_port,
 
 	bridge_port = mlxsw_sp_bridge_port_find(mlxsw_sp_port->mlxsw_sp->bridge,
 						orig_dev);
-	if (WARN_ON(!bridge_port))
-		return -EINVAL;
+	if (!bridge_port)
+		return 0;
 
 	if (!bridge_port->bridge_device->multicast_enabled)
 		return 0;
@@ -1283,15 +1283,15 @@ static int mlxsw_sp_port_mdb_add(struct mlxsw_sp_port *mlxsw_sp_port,
 		return 0;
 
 	bridge_port = mlxsw_sp_bridge_port_find(mlxsw_sp->bridge, orig_dev);
-	if (WARN_ON(!bridge_port))
-		return -EINVAL;
+	if (!bridge_port)
+		return 0;
 
 	bridge_device = bridge_port->bridge_device;
 	mlxsw_sp_port_vlan = mlxsw_sp_port_vlan_find_by_bridge(mlxsw_sp_port,
 							       bridge_device,
 							       mdb->vid);
-	if (WARN_ON(!mlxsw_sp_port_vlan))
-		return -EINVAL;
+	if (!mlxsw_sp_port_vlan)
+		return 0;
 
 	fid_index = mlxsw_sp_fid_index(mlxsw_sp_port_vlan->fid);
 
@@ -1407,15 +1407,15 @@ static int mlxsw_sp_port_mdb_del(struct mlxsw_sp_port *mlxsw_sp_port,
 	int err = 0;
 
 	bridge_port = mlxsw_sp_bridge_port_find(mlxsw_sp->bridge, orig_dev);
-	if (WARN_ON(!bridge_port))
-		return -EINVAL;
+	if (!bridge_port)
+		return 0;
 
 	bridge_device = bridge_port->bridge_device;
 	mlxsw_sp_port_vlan = mlxsw_sp_port_vlan_find_by_bridge(mlxsw_sp_port,
 							       bridge_device,
 							       mdb->vid);
-	if (WARN_ON(!mlxsw_sp_port_vlan))
-		return -EINVAL;
+	if (!mlxsw_sp_port_vlan)
+		return 0;
 
 	fid_index = mlxsw_sp_fid_index(mlxsw_sp_port_vlan->fid);
 

From 852cfeed0ebe93a1c1e0c153c04a57221f72bd7e Mon Sep 17 00:00:00 2001
From: Ido Schimmel <idosch@mellanox.com>
Date: Fri, 4 Aug 2017 14:12:30 +0200
Subject: [PATCH 145/281] mlxsw: spectrum_switchdev: Release multicast groups
 during fini

Each multicast group (MID) stores a bitmap of ports to which a packet
should be forwarded to in case an MDB entry associated with the MID is
hit.

Since the initial introduction of IGMP snooping in commit 3a49b4fde2a1
("mlxsw: Adding layer 2 multicast support") the driver didn't correctly
free these multicast groups upon ungraceful situations such as the
removal of the upper bridge device or module removal.

The correct way to fix this is to associate each MID with the bridge
ports member in it and then drop the reference in case the bridge port
is destroyed, but this will result in a lot more code and will be fixed
in net-next.

For now, upon module removal, traverse the MID list and release each
one.

Fixes: 3a49b4fde2a1 ("mlxsw: Adding layer 2 multicast support")
Signed-off-by: Ido Schimmel <idosch@mellanox.com>
Signed-off-by: Jiri Pirko <jiri@mellanox.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 .../ethernet/mellanox/mlxsw/spectrum_switchdev.c    | 13 ++++++++++++-
 1 file changed, 12 insertions(+), 1 deletion(-)

diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum_switchdev.c b/drivers/net/ethernet/mellanox/mlxsw/spectrum_switchdev.c
index 73d54df89578..5eb1606765c5 100644
--- a/drivers/net/ethernet/mellanox/mlxsw/spectrum_switchdev.c
+++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum_switchdev.c
@@ -1974,6 +1974,17 @@ static void mlxsw_sp_fdb_fini(struct mlxsw_sp *mlxsw_sp)
 
 }
 
+static void mlxsw_sp_mids_fini(struct mlxsw_sp *mlxsw_sp)
+{
+	struct mlxsw_sp_mid *mid, *tmp;
+
+	list_for_each_entry_safe(mid, tmp, &mlxsw_sp->bridge->mids_list, list) {
+		list_del(&mid->list);
+		clear_bit(mid->mid, mlxsw_sp->bridge->mids_bitmap);
+		kfree(mid);
+	}
+}
+
 int mlxsw_sp_switchdev_init(struct mlxsw_sp *mlxsw_sp)
 {
 	struct mlxsw_sp_bridge *bridge;
@@ -1996,7 +2007,7 @@ int mlxsw_sp_switchdev_init(struct mlxsw_sp *mlxsw_sp)
 void mlxsw_sp_switchdev_fini(struct mlxsw_sp *mlxsw_sp)
 {
 	mlxsw_sp_fdb_fini(mlxsw_sp);
-	WARN_ON(!list_empty(&mlxsw_sp->bridge->mids_list));
+	mlxsw_sp_mids_fini(mlxsw_sp);
 	WARN_ON(!list_empty(&mlxsw_sp->bridge->bridges_list));
 	kfree(mlxsw_sp->bridge);
 }

From b0a0c2566f28e71e5e32121992ac8060cec75510 Mon Sep 17 00:00:00 2001
From: Daniel Borkmann <daniel@iogearbox.net>
Date: Fri, 4 Aug 2017 14:20:54 +0200
Subject: [PATCH 146/281] bpf, s390: fix jit branch offset related to ldimm64

While testing some other work that required JIT modifications, I
run into test_bpf causing a hang when JIT enabled on s390. The
problematic test case was the one from ddc665a4bb4b (bpf, arm64:
fix jit branch offset related to ldimm64), and turns out that we
do have a similar issue on s390 as well. In bpf_jit_prog() we
update next instruction address after returning from bpf_jit_insn()
with an insn_count. bpf_jit_insn() returns either -1 in case of
error (e.g. unsupported insn), 1 or 2. The latter is only the
case for ldimm64 due to spanning 2 insns, however, next address
is only set to i + 1 not taking actual insn_count into account,
thus fix is to use insn_count instead of 1. bpf_jit_enable in
mode 2 provides also disasm on s390:

Before fix:

  000003ff800349b6: a7f40003   brc     15,3ff800349bc                 ; target
  000003ff800349ba: 0000               unknown
  000003ff800349bc: e3b0f0700024       stg     %r11,112(%r15)
  000003ff800349c2: e3e0f0880024       stg     %r14,136(%r15)
  000003ff800349c8: 0db0               basr    %r11,%r0
  000003ff800349ca: c0ef00000000       llilf   %r14,0
  000003ff800349d0: e320b0360004       lg      %r2,54(%r11)
  000003ff800349d6: e330b03e0004       lg      %r3,62(%r11)
  000003ff800349dc: ec23ffeda065       clgrj   %r2,%r3,10,3ff800349b6 ; jmp
  000003ff800349e2: e3e0b0460004       lg      %r14,70(%r11)
  000003ff800349e8: e3e0b04e0004       lg      %r14,78(%r11)
  000003ff800349ee: b904002e   lgr     %r2,%r14
  000003ff800349f2: e3b0f0700004       lg      %r11,112(%r15)
  000003ff800349f8: e3e0f0880004       lg      %r14,136(%r15)
  000003ff800349fe: 07fe               bcr     15,%r14

After fix:

  000003ff80ef3db4: a7f40003   brc     15,3ff80ef3dba
  000003ff80ef3db8: 0000               unknown
  000003ff80ef3dba: e3b0f0700024       stg     %r11,112(%r15)
  000003ff80ef3dc0: e3e0f0880024       stg     %r14,136(%r15)
  000003ff80ef3dc6: 0db0               basr    %r11,%r0
  000003ff80ef3dc8: c0ef00000000       llilf   %r14,0
  000003ff80ef3dce: e320b0360004       lg      %r2,54(%r11)
  000003ff80ef3dd4: e330b03e0004       lg      %r3,62(%r11)
  000003ff80ef3dda: ec230006a065       clgrj   %r2,%r3,10,3ff80ef3de6 ; jmp
  000003ff80ef3de0: e3e0b0460004       lg      %r14,70(%r11)
  000003ff80ef3de6: e3e0b04e0004       lg      %r14,78(%r11)          ; target
  000003ff80ef3dec: b904002e   lgr     %r2,%r14
  000003ff80ef3df0: e3b0f0700004       lg      %r11,112(%r15)
  000003ff80ef3df6: e3e0f0880004       lg      %r14,136(%r15)
  000003ff80ef3dfc: 07fe               bcr     15,%r14

test_bpf.ko suite runs fine after the fix.

Fixes: 054623105728 ("s390/bpf: Add s390x eBPF JIT compiler backend")
Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>
Tested-by: Michael Holzheu <holzheu@linux.vnet.ibm.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 arch/s390/net/bpf_jit_comp.c | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/arch/s390/net/bpf_jit_comp.c b/arch/s390/net/bpf_jit_comp.c
index 01c6fbc3e85b..1803797fc885 100644
--- a/arch/s390/net/bpf_jit_comp.c
+++ b/arch/s390/net/bpf_jit_comp.c
@@ -1253,7 +1253,8 @@ static int bpf_jit_prog(struct bpf_jit *jit, struct bpf_prog *fp)
 		insn_count = bpf_jit_insn(jit, fp, i);
 		if (insn_count < 0)
 			return -1;
-		jit->addrs[i + 1] = jit->prg; /* Next instruction address */
+		/* Next instruction address */
+		jit->addrs[i + insn_count] = jit->prg;
 	}
 	bpf_jit_epilogue(jit);
 

From bad1926dd2f60bbb4af5223ace3a7b34a0219a66 Mon Sep 17 00:00:00 2001
From: Daniel Borkmann <daniel@iogearbox.net>
Date: Fri, 4 Aug 2017 14:20:55 +0200
Subject: [PATCH 147/281] bpf, s390: fix build for libbpf and selftest suite

The BPF feature test as well as libbpf is missing the __NR_bpf
define for s390 and currently refuses to compile (selftest suite
depends on libbpf as well). Similar issue was fixed some time
ago via b0c47807d31d ("bpf: Add sparc support to tools and
samples."), just do the same and add definitions.

Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 tools/build/feature/test-bpf.c | 2 ++
 tools/lib/bpf/bpf.c            | 2 ++
 2 files changed, 4 insertions(+)

diff --git a/tools/build/feature/test-bpf.c b/tools/build/feature/test-bpf.c
index 7598361ef1f1..da2172ff9662 100644
--- a/tools/build/feature/test-bpf.c
+++ b/tools/build/feature/test-bpf.c
@@ -11,6 +11,8 @@
 #  define __NR_bpf 280
 # elif defined(__sparc__)
 #  define __NR_bpf 349
+# elif defined(__s390__)
+#  define __NR_bpf 351
 # else
 #  error __NR_bpf not defined. libbpf does not support your arch.
 # endif
diff --git a/tools/lib/bpf/bpf.c b/tools/lib/bpf/bpf.c
index 256f571f2ab5..e5bbb090bf88 100644
--- a/tools/lib/bpf/bpf.c
+++ b/tools/lib/bpf/bpf.c
@@ -39,6 +39,8 @@
 #  define __NR_bpf 280
 # elif defined(__sparc__)
 #  define __NR_bpf 349
+# elif defined(__s390__)
+#  define __NR_bpf 351
 # else
 #  error __NR_bpf not defined. libbpf does not support your arch.
 # endif

From b6bd53f9c4e825fca82fe1392157c78443c814ab Mon Sep 17 00:00:00 2001
From: David Daney <david.daney@cavium.com>
Date: Thu, 3 Aug 2017 17:10:12 -0700
Subject: [PATCH 148/281] MIPS: Add missing file for eBPF JIT.

Inexplicably, commit f381bf6d82f0 ("MIPS: Add support for eBPF JIT.")
lost a file somewhere on its path to Linus' tree.  Add back the
missing ebpf_jit.c so that we can build with CONFIG_BPF_JIT selected.

This version of ebpf_jit.c is identical to the original except for two
minor change need to resolve conflicts with changes merged from the
BPF branch:

A) Set prog->jited_len = image_size;
B) Use BPF_TAIL_CALL instead of BPF_CALL | BPF_X

Fixes: f381bf6d82f0 ("MIPS: Add support for eBPF JIT.")
Signed-off-by: David Daney <david.daney@cavium.com>
Acked-by: Daniel Borkmann <daniel@iogearbox.net>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 arch/mips/net/ebpf_jit.c | 1950 ++++++++++++++++++++++++++++++++++++++
 1 file changed, 1950 insertions(+)
 create mode 100644 arch/mips/net/ebpf_jit.c

diff --git a/arch/mips/net/ebpf_jit.c b/arch/mips/net/ebpf_jit.c
new file mode 100644
index 000000000000..3f87b96da5c4
--- /dev/null
+++ b/arch/mips/net/ebpf_jit.c
@@ -0,0 +1,1950 @@
+/*
+ * Just-In-Time compiler for eBPF filters on MIPS
+ *
+ * Copyright (c) 2017 Cavium, Inc.
+ *
+ * Based on code from:
+ *
+ * Copyright (c) 2014 Imagination Technologies Ltd.
+ * Author: Markos Chandras <markos.chandras@imgtec.com>
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License as published by the
+ * Free Software Foundation; version 2 of the License.
+ */
+
+#include <linux/bitops.h>
+#include <linux/errno.h>
+#include <linux/filter.h>
+#include <linux/bpf.h>
+#include <linux/slab.h>
+#include <asm/bitops.h>
+#include <asm/byteorder.h>
+#include <asm/cacheflush.h>
+#include <asm/cpu-features.h>
+#include <asm/uasm.h>
+
+/* Registers used by JIT */
+#define MIPS_R_ZERO	0
+#define MIPS_R_AT	1
+#define MIPS_R_V0	2	/* BPF_R0 */
+#define MIPS_R_V1	3
+#define MIPS_R_A0	4	/* BPF_R1 */
+#define MIPS_R_A1	5	/* BPF_R2 */
+#define MIPS_R_A2	6	/* BPF_R3 */
+#define MIPS_R_A3	7	/* BPF_R4 */
+#define MIPS_R_A4	8	/* BPF_R5 */
+#define MIPS_R_T4	12	/* BPF_AX */
+#define MIPS_R_T5	13
+#define MIPS_R_T6	14
+#define MIPS_R_T7	15
+#define MIPS_R_S0	16	/* BPF_R6 */
+#define MIPS_R_S1	17	/* BPF_R7 */
+#define MIPS_R_S2	18	/* BPF_R8 */
+#define MIPS_R_S3	19	/* BPF_R9 */
+#define MIPS_R_S4	20	/* BPF_TCC */
+#define MIPS_R_S5	21
+#define MIPS_R_S6	22
+#define MIPS_R_S7	23
+#define MIPS_R_T8	24
+#define MIPS_R_T9	25
+#define MIPS_R_SP	29
+#define MIPS_R_RA	31
+
+/* eBPF flags */
+#define EBPF_SAVE_S0	BIT(0)
+#define EBPF_SAVE_S1	BIT(1)
+#define EBPF_SAVE_S2	BIT(2)
+#define EBPF_SAVE_S3	BIT(3)
+#define EBPF_SAVE_S4	BIT(4)
+#define EBPF_SAVE_RA	BIT(5)
+#define EBPF_SEEN_FP	BIT(6)
+#define EBPF_SEEN_TC	BIT(7)
+#define EBPF_TCC_IN_V1	BIT(8)
+
+/*
+ * For the mips64 ISA, we need to track the value range or type for
+ * each JIT register.  The BPF machine requires zero extended 32-bit
+ * values, but the mips64 ISA requires sign extended 32-bit values.
+ * At each point in the BPF program we track the state of every
+ * register so that we can zero extend or sign extend as the BPF
+ * semantics require.
+ */
+enum reg_val_type {
+	/* uninitialized */
+	REG_UNKNOWN,
+	/* not known to be 32-bit compatible. */
+	REG_64BIT,
+	/* 32-bit compatible, no truncation needed for 64-bit ops. */
+	REG_64BIT_32BIT,
+	/* 32-bit compatible, need truncation for 64-bit ops. */
+	REG_32BIT,
+	/* 32-bit zero extended. */
+	REG_32BIT_ZERO_EX,
+	/* 32-bit no sign/zero extension needed. */
+	REG_32BIT_POS
+};
+
+/*
+ * high bit of offsets indicates if long branch conversion done at
+ * this insn.
+ */
+#define OFFSETS_B_CONV	BIT(31)
+
+/**
+ * struct jit_ctx - JIT context
+ * @skf:		The sk_filter
+ * @stack_size:		eBPF stack size
+ * @tmp_offset:		eBPF $sp offset to 8-byte temporary memory
+ * @idx:		Instruction index
+ * @flags:		JIT flags
+ * @offsets:		Instruction offsets
+ * @target:		Memory location for the compiled filter
+ * @reg_val_types	Packed enum reg_val_type for each register.
+ */
+struct jit_ctx {
+	const struct bpf_prog *skf;
+	int stack_size;
+	int tmp_offset;
+	u32 idx;
+	u32 flags;
+	u32 *offsets;
+	u32 *target;
+	u64 *reg_val_types;
+	unsigned int long_b_conversion:1;
+	unsigned int gen_b_offsets:1;
+};
+
+static void set_reg_val_type(u64 *rvt, int reg, enum reg_val_type type)
+{
+	*rvt &= ~(7ull << (reg * 3));
+	*rvt |= ((u64)type << (reg * 3));
+}
+
+static enum reg_val_type get_reg_val_type(const struct jit_ctx *ctx,
+					  int index, int reg)
+{
+	return (ctx->reg_val_types[index] >> (reg * 3)) & 7;
+}
+
+/* Simply emit the instruction if the JIT memory space has been allocated */
+#define emit_instr(ctx, func, ...)			\
+do {							\
+	if ((ctx)->target != NULL) {			\
+		u32 *p = &(ctx)->target[ctx->idx];	\
+		uasm_i_##func(&p, ##__VA_ARGS__);	\
+	}						\
+	(ctx)->idx++;					\
+} while (0)
+
+static unsigned int j_target(struct jit_ctx *ctx, int target_idx)
+{
+	unsigned long target_va, base_va;
+	unsigned int r;
+
+	if (!ctx->target)
+		return 0;
+
+	base_va = (unsigned long)ctx->target;
+	target_va = base_va + (ctx->offsets[target_idx] & ~OFFSETS_B_CONV);
+
+	if ((base_va & ~0x0ffffffful) != (target_va & ~0x0ffffffful))
+		return (unsigned int)-1;
+	r = target_va & 0x0ffffffful;
+	return r;
+}
+
+/* Compute the immediate value for PC-relative branches. */
+static u32 b_imm(unsigned int tgt, struct jit_ctx *ctx)
+{
+	if (!ctx->gen_b_offsets)
+		return 0;
+
+	/*
+	 * We want a pc-relative branch.  tgt is the instruction offset
+	 * we want to jump to.
+
+	 * Branch on MIPS:
+	 * I: target_offset <- sign_extend(offset)
+	 * I+1: PC += target_offset (delay slot)
+	 *
+	 * ctx->idx currently points to the branch instruction
+	 * but the offset is added to the delay slot so we need
+	 * to subtract 4.
+	 */
+	return (ctx->offsets[tgt] & ~OFFSETS_B_CONV) -
+		(ctx->idx * 4) - 4;
+}
+
+int bpf_jit_enable __read_mostly;
+
+enum which_ebpf_reg {
+	src_reg,
+	src_reg_no_fp,
+	dst_reg,
+	dst_reg_fp_ok
+};
+
+/*
+ * For eBPF, the register mapping naturally falls out of the
+ * requirements of eBPF and the MIPS n64 ABI.  We don't maintain a
+ * separate frame pointer, so BPF_REG_10 relative accesses are
+ * adjusted to be $sp relative.
+ */
+int ebpf_to_mips_reg(struct jit_ctx *ctx, const struct bpf_insn *insn,
+		     enum which_ebpf_reg w)
+{
+	int ebpf_reg = (w == src_reg || w == src_reg_no_fp) ?
+		insn->src_reg : insn->dst_reg;
+
+	switch (ebpf_reg) {
+	case BPF_REG_0:
+		return MIPS_R_V0;
+	case BPF_REG_1:
+		return MIPS_R_A0;
+	case BPF_REG_2:
+		return MIPS_R_A1;
+	case BPF_REG_3:
+		return MIPS_R_A2;
+	case BPF_REG_4:
+		return MIPS_R_A3;
+	case BPF_REG_5:
+		return MIPS_R_A4;
+	case BPF_REG_6:
+		ctx->flags |= EBPF_SAVE_S0;
+		return MIPS_R_S0;
+	case BPF_REG_7:
+		ctx->flags |= EBPF_SAVE_S1;
+		return MIPS_R_S1;
+	case BPF_REG_8:
+		ctx->flags |= EBPF_SAVE_S2;
+		return MIPS_R_S2;
+	case BPF_REG_9:
+		ctx->flags |= EBPF_SAVE_S3;
+		return MIPS_R_S3;
+	case BPF_REG_10:
+		if (w == dst_reg || w == src_reg_no_fp)
+			goto bad_reg;
+		ctx->flags |= EBPF_SEEN_FP;
+		/*
+		 * Needs special handling, return something that
+		 * cannot be clobbered just in case.
+		 */
+		return MIPS_R_ZERO;
+	case BPF_REG_AX:
+		return MIPS_R_T4;
+	default:
+bad_reg:
+		WARN(1, "Illegal bpf reg: %d\n", ebpf_reg);
+		return -EINVAL;
+	}
+}
+/*
+ * eBPF stack frame will be something like:
+ *
+ *  Entry $sp ------>   +--------------------------------+
+ *                      |   $ra  (optional)              |
+ *                      +--------------------------------+
+ *                      |   $s0  (optional)              |
+ *                      +--------------------------------+
+ *                      |   $s1  (optional)              |
+ *                      +--------------------------------+
+ *                      |   $s2  (optional)              |
+ *                      +--------------------------------+
+ *                      |   $s3  (optional)              |
+ *                      +--------------------------------+
+ *                      |   $s4  (optional)              |
+ *                      +--------------------------------+
+ *                      |   tmp-storage  (if $ra saved)  |
+ * $sp + tmp_offset --> +--------------------------------+ <--BPF_REG_10
+ *                      |   BPF_REG_10 relative storage  |
+ *                      |    MAX_BPF_STACK (optional)    |
+ *                      |      .                         |
+ *                      |      .                         |
+ *                      |      .                         |
+ *     $sp -------->    +--------------------------------+
+ *
+ * If BPF_REG_10 is never referenced, then the MAX_BPF_STACK sized
+ * area is not allocated.
+ */
+static int gen_int_prologue(struct jit_ctx *ctx)
+{
+	int stack_adjust = 0;
+	int store_offset;
+	int locals_size;
+
+	if (ctx->flags & EBPF_SAVE_RA)
+		/*
+		 * If RA we are doing a function call and may need
+		 * extra 8-byte tmp area.
+		 */
+		stack_adjust += 16;
+	if (ctx->flags & EBPF_SAVE_S0)
+		stack_adjust += 8;
+	if (ctx->flags & EBPF_SAVE_S1)
+		stack_adjust += 8;
+	if (ctx->flags & EBPF_SAVE_S2)
+		stack_adjust += 8;
+	if (ctx->flags & EBPF_SAVE_S3)
+		stack_adjust += 8;
+	if (ctx->flags & EBPF_SAVE_S4)
+		stack_adjust += 8;
+
+	BUILD_BUG_ON(MAX_BPF_STACK & 7);
+	locals_size = (ctx->flags & EBPF_SEEN_FP) ? MAX_BPF_STACK : 0;
+
+	stack_adjust += locals_size;
+	ctx->tmp_offset = locals_size;
+
+	ctx->stack_size = stack_adjust;
+
+	/*
+	 * First instruction initializes the tail call count (TCC).
+	 * On tail call we skip this instruction, and the TCC is
+	 * passed in $v1 from the caller.
+	 */
+	emit_instr(ctx, daddiu, MIPS_R_V1, MIPS_R_ZERO, MAX_TAIL_CALL_CNT);
+	if (stack_adjust)
+		emit_instr(ctx, daddiu, MIPS_R_SP, MIPS_R_SP, -stack_adjust);
+	else
+		return 0;
+
+	store_offset = stack_adjust - 8;
+
+	if (ctx->flags & EBPF_SAVE_RA) {
+		emit_instr(ctx, sd, MIPS_R_RA, store_offset, MIPS_R_SP);
+		store_offset -= 8;
+	}
+	if (ctx->flags & EBPF_SAVE_S0) {
+		emit_instr(ctx, sd, MIPS_R_S0, store_offset, MIPS_R_SP);
+		store_offset -= 8;
+	}
+	if (ctx->flags & EBPF_SAVE_S1) {
+		emit_instr(ctx, sd, MIPS_R_S1, store_offset, MIPS_R_SP);
+		store_offset -= 8;
+	}
+	if (ctx->flags & EBPF_SAVE_S2) {
+		emit_instr(ctx, sd, MIPS_R_S2, store_offset, MIPS_R_SP);
+		store_offset -= 8;
+	}
+	if (ctx->flags & EBPF_SAVE_S3) {
+		emit_instr(ctx, sd, MIPS_R_S3, store_offset, MIPS_R_SP);
+		store_offset -= 8;
+	}
+	if (ctx->flags & EBPF_SAVE_S4) {
+		emit_instr(ctx, sd, MIPS_R_S4, store_offset, MIPS_R_SP);
+		store_offset -= 8;
+	}
+
+	if ((ctx->flags & EBPF_SEEN_TC) && !(ctx->flags & EBPF_TCC_IN_V1))
+		emit_instr(ctx, daddu, MIPS_R_S4, MIPS_R_V1, MIPS_R_ZERO);
+
+	return 0;
+}
+
+static int build_int_epilogue(struct jit_ctx *ctx, int dest_reg)
+{
+	const struct bpf_prog *prog = ctx->skf;
+	int stack_adjust = ctx->stack_size;
+	int store_offset = stack_adjust - 8;
+	int r0 = MIPS_R_V0;
+
+	if (dest_reg == MIPS_R_RA &&
+	    get_reg_val_type(ctx, prog->len, BPF_REG_0) == REG_32BIT_ZERO_EX)
+		/* Don't let zero extended value escape. */
+		emit_instr(ctx, sll, r0, r0, 0);
+
+	if (ctx->flags & EBPF_SAVE_RA) {
+		emit_instr(ctx, ld, MIPS_R_RA, store_offset, MIPS_R_SP);
+		store_offset -= 8;
+	}
+	if (ctx->flags & EBPF_SAVE_S0) {
+		emit_instr(ctx, ld, MIPS_R_S0, store_offset, MIPS_R_SP);
+		store_offset -= 8;
+	}
+	if (ctx->flags & EBPF_SAVE_S1) {
+		emit_instr(ctx, ld, MIPS_R_S1, store_offset, MIPS_R_SP);
+		store_offset -= 8;
+	}
+	if (ctx->flags & EBPF_SAVE_S2) {
+		emit_instr(ctx, ld, MIPS_R_S2, store_offset, MIPS_R_SP);
+		store_offset -= 8;
+	}
+	if (ctx->flags & EBPF_SAVE_S3) {
+		emit_instr(ctx, ld, MIPS_R_S3, store_offset, MIPS_R_SP);
+		store_offset -= 8;
+	}
+	if (ctx->flags & EBPF_SAVE_S4) {
+		emit_instr(ctx, ld, MIPS_R_S4, store_offset, MIPS_R_SP);
+		store_offset -= 8;
+	}
+	emit_instr(ctx, jr, dest_reg);
+
+	if (stack_adjust)
+		emit_instr(ctx, daddiu, MIPS_R_SP, MIPS_R_SP, stack_adjust);
+	else
+		emit_instr(ctx, nop);
+
+	return 0;
+}
+
+static void gen_imm_to_reg(const struct bpf_insn *insn, int reg,
+			   struct jit_ctx *ctx)
+{
+	if (insn->imm >= S16_MIN && insn->imm <= S16_MAX) {
+		emit_instr(ctx, addiu, reg, MIPS_R_ZERO, insn->imm);
+	} else {
+		int lower = (s16)(insn->imm & 0xffff);
+		int upper = insn->imm - lower;
+
+		emit_instr(ctx, lui, reg, upper >> 16);
+		emit_instr(ctx, addiu, reg, reg, lower);
+	}
+
+}
+
+static int gen_imm_insn(const struct bpf_insn *insn, struct jit_ctx *ctx,
+			int idx)
+{
+	int upper_bound, lower_bound;
+	int dst = ebpf_to_mips_reg(ctx, insn, dst_reg);
+
+	if (dst < 0)
+		return dst;
+
+	switch (BPF_OP(insn->code)) {
+	case BPF_MOV:
+	case BPF_ADD:
+		upper_bound = S16_MAX;
+		lower_bound = S16_MIN;
+		break;
+	case BPF_SUB:
+		upper_bound = -(int)S16_MIN;
+		lower_bound = -(int)S16_MAX;
+		break;
+	case BPF_AND:
+	case BPF_OR:
+	case BPF_XOR:
+		upper_bound = 0xffff;
+		lower_bound = 0;
+		break;
+	case BPF_RSH:
+	case BPF_LSH:
+	case BPF_ARSH:
+		/* Shift amounts are truncated, no need for bounds */
+		upper_bound = S32_MAX;
+		lower_bound = S32_MIN;
+		break;
+	default:
+		return -EINVAL;
+	}
+
+	/*
+	 * Immediate move clobbers the register, so no sign/zero
+	 * extension needed.
+	 */
+	if (BPF_CLASS(insn->code) == BPF_ALU64 &&
+	    BPF_OP(insn->code) != BPF_MOV &&
+	    get_reg_val_type(ctx, idx, insn->dst_reg) == REG_32BIT)
+		emit_instr(ctx, dinsu, dst, MIPS_R_ZERO, 32, 32);
+	/* BPF_ALU | BPF_LSH doesn't need separate sign extension */
+	if (BPF_CLASS(insn->code) == BPF_ALU &&
+	    BPF_OP(insn->code) != BPF_LSH &&
+	    BPF_OP(insn->code) != BPF_MOV &&
+	    get_reg_val_type(ctx, idx, insn->dst_reg) != REG_32BIT)
+		emit_instr(ctx, sll, dst, dst, 0);
+
+	if (insn->imm >= lower_bound && insn->imm <= upper_bound) {
+		/* single insn immediate case */
+		switch (BPF_OP(insn->code) | BPF_CLASS(insn->code)) {
+		case BPF_ALU64 | BPF_MOV:
+			emit_instr(ctx, daddiu, dst, MIPS_R_ZERO, insn->imm);
+			break;
+		case BPF_ALU64 | BPF_AND:
+		case BPF_ALU | BPF_AND:
+			emit_instr(ctx, andi, dst, dst, insn->imm);
+			break;
+		case BPF_ALU64 | BPF_OR:
+		case BPF_ALU | BPF_OR:
+			emit_instr(ctx, ori, dst, dst, insn->imm);
+			break;
+		case BPF_ALU64 | BPF_XOR:
+		case BPF_ALU | BPF_XOR:
+			emit_instr(ctx, xori, dst, dst, insn->imm);
+			break;
+		case BPF_ALU64 | BPF_ADD:
+			emit_instr(ctx, daddiu, dst, dst, insn->imm);
+			break;
+		case BPF_ALU64 | BPF_SUB:
+			emit_instr(ctx, daddiu, dst, dst, -insn->imm);
+			break;
+		case BPF_ALU64 | BPF_RSH:
+			emit_instr(ctx, dsrl_safe, dst, dst, insn->imm & 0x3f);
+			break;
+		case BPF_ALU | BPF_RSH:
+			emit_instr(ctx, srl, dst, dst, insn->imm & 0x1f);
+			break;
+		case BPF_ALU64 | BPF_LSH:
+			emit_instr(ctx, dsll_safe, dst, dst, insn->imm & 0x3f);
+			break;
+		case BPF_ALU | BPF_LSH:
+			emit_instr(ctx, sll, dst, dst, insn->imm & 0x1f);
+			break;
+		case BPF_ALU64 | BPF_ARSH:
+			emit_instr(ctx, dsra_safe, dst, dst, insn->imm & 0x3f);
+			break;
+		case BPF_ALU | BPF_ARSH:
+			emit_instr(ctx, sra, dst, dst, insn->imm & 0x1f);
+			break;
+		case BPF_ALU | BPF_MOV:
+			emit_instr(ctx, addiu, dst, MIPS_R_ZERO, insn->imm);
+			break;
+		case BPF_ALU | BPF_ADD:
+			emit_instr(ctx, addiu, dst, dst, insn->imm);
+			break;
+		case BPF_ALU | BPF_SUB:
+			emit_instr(ctx, addiu, dst, dst, -insn->imm);
+			break;
+		default:
+			return -EINVAL;
+		}
+	} else {
+		/* multi insn immediate case */
+		if (BPF_OP(insn->code) == BPF_MOV) {
+			gen_imm_to_reg(insn, dst, ctx);
+		} else {
+			gen_imm_to_reg(insn, MIPS_R_AT, ctx);
+			switch (BPF_OP(insn->code) | BPF_CLASS(insn->code)) {
+			case BPF_ALU64 | BPF_AND:
+			case BPF_ALU | BPF_AND:
+				emit_instr(ctx, and, dst, dst, MIPS_R_AT);
+				break;
+			case BPF_ALU64 | BPF_OR:
+			case BPF_ALU | BPF_OR:
+				emit_instr(ctx, or, dst, dst, MIPS_R_AT);
+				break;
+			case BPF_ALU64 | BPF_XOR:
+			case BPF_ALU | BPF_XOR:
+				emit_instr(ctx, xor, dst, dst, MIPS_R_AT);
+				break;
+			case BPF_ALU64 | BPF_ADD:
+				emit_instr(ctx, daddu, dst, dst, MIPS_R_AT);
+				break;
+			case BPF_ALU64 | BPF_SUB:
+				emit_instr(ctx, dsubu, dst, dst, MIPS_R_AT);
+				break;
+			case BPF_ALU | BPF_ADD:
+				emit_instr(ctx, addu, dst, dst, MIPS_R_AT);
+				break;
+			case BPF_ALU | BPF_SUB:
+				emit_instr(ctx, subu, dst, dst, MIPS_R_AT);
+				break;
+			default:
+				return -EINVAL;
+			}
+		}
+	}
+
+	return 0;
+}
+
+static void * __must_check
+ool_skb_header_pointer(const struct sk_buff *skb, int offset,
+		       int len, void *buffer)
+{
+	return skb_header_pointer(skb, offset, len, buffer);
+}
+
+static int size_to_len(const struct bpf_insn *insn)
+{
+	switch (BPF_SIZE(insn->code)) {
+	case BPF_B:
+		return 1;
+	case BPF_H:
+		return 2;
+	case BPF_W:
+		return 4;
+	case BPF_DW:
+		return 8;
+	}
+	return 0;
+}
+
+static void emit_const_to_reg(struct jit_ctx *ctx, int dst, u64 value)
+{
+	if (value >= 0xffffffffffff8000ull || value < 0x8000ull) {
+		emit_instr(ctx, daddiu, dst, MIPS_R_ZERO, (int)value);
+	} else if (value >= 0xffffffff80000000ull ||
+		   (value < 0x80000000 && value > 0xffff)) {
+		emit_instr(ctx, lui, dst, (s32)(s16)(value >> 16));
+		emit_instr(ctx, ori, dst, dst, (unsigned int)(value & 0xffff));
+	} else {
+		int i;
+		bool seen_part = false;
+		int needed_shift = 0;
+
+		for (i = 0; i < 4; i++) {
+			u64 part = (value >> (16 * (3 - i))) & 0xffff;
+
+			if (seen_part && needed_shift > 0 && (part || i == 3)) {
+				emit_instr(ctx, dsll_safe, dst, dst, needed_shift);
+				needed_shift = 0;
+			}
+			if (part) {
+				if (i == 0 || (!seen_part && i < 3 && part < 0x8000)) {
+					emit_instr(ctx, lui, dst, (s32)(s16)part);
+					needed_shift = -16;
+				} else {
+					emit_instr(ctx, ori, dst,
+						   seen_part ? dst : MIPS_R_ZERO,
+						   (unsigned int)part);
+				}
+				seen_part = true;
+			}
+			if (seen_part)
+				needed_shift += 16;
+		}
+	}
+}
+
+static int emit_bpf_tail_call(struct jit_ctx *ctx, int this_idx)
+{
+	int off, b_off;
+
+	ctx->flags |= EBPF_SEEN_TC;
+	/*
+	 * if (index >= array->map.max_entries)
+	 *     goto out;
+	 */
+	off = offsetof(struct bpf_array, map.max_entries);
+	emit_instr(ctx, lwu, MIPS_R_T5, off, MIPS_R_A1);
+	emit_instr(ctx, sltu, MIPS_R_AT, MIPS_R_T5, MIPS_R_A2);
+	b_off = b_imm(this_idx + 1, ctx);
+	emit_instr(ctx, bne, MIPS_R_AT, MIPS_R_ZERO, b_off);
+	/*
+	 * if (--TCC < 0)
+	 *     goto out;
+	 */
+	/* Delay slot */
+	emit_instr(ctx, daddiu, MIPS_R_T5,
+		   (ctx->flags & EBPF_TCC_IN_V1) ? MIPS_R_V1 : MIPS_R_S4, -1);
+	b_off = b_imm(this_idx + 1, ctx);
+	emit_instr(ctx, bltz, MIPS_R_T5, b_off);
+	/*
+	 * prog = array->ptrs[index];
+	 * if (prog == NULL)
+	 *     goto out;
+	 */
+	/* Delay slot */
+	emit_instr(ctx, dsll, MIPS_R_T8, MIPS_R_A2, 3);
+	emit_instr(ctx, daddu, MIPS_R_T8, MIPS_R_T8, MIPS_R_A1);
+	off = offsetof(struct bpf_array, ptrs);
+	emit_instr(ctx, ld, MIPS_R_AT, off, MIPS_R_T8);
+	b_off = b_imm(this_idx + 1, ctx);
+	emit_instr(ctx, beq, MIPS_R_AT, MIPS_R_ZERO, b_off);
+	/* Delay slot */
+	emit_instr(ctx, nop);
+
+	/* goto *(prog->bpf_func + 4); */
+	off = offsetof(struct bpf_prog, bpf_func);
+	emit_instr(ctx, ld, MIPS_R_T9, off, MIPS_R_AT);
+	/* All systems are go... propagate TCC */
+	emit_instr(ctx, daddu, MIPS_R_V1, MIPS_R_T5, MIPS_R_ZERO);
+	/* Skip first instruction (TCC initialization) */
+	emit_instr(ctx, daddiu, MIPS_R_T9, MIPS_R_T9, 4);
+	return build_int_epilogue(ctx, MIPS_R_T9);
+}
+
+static bool use_bbit_insns(void)
+{
+	switch (current_cpu_type()) {
+	case CPU_CAVIUM_OCTEON:
+	case CPU_CAVIUM_OCTEON_PLUS:
+	case CPU_CAVIUM_OCTEON2:
+	case CPU_CAVIUM_OCTEON3:
+		return true;
+	default:
+		return false;
+	}
+}
+
+static bool is_bad_offset(int b_off)
+{
+	return b_off > 0x1ffff || b_off < -0x20000;
+}
+
+/* Returns the number of insn slots consumed. */
+static int build_one_insn(const struct bpf_insn *insn, struct jit_ctx *ctx,
+			  int this_idx, int exit_idx)
+{
+	int src, dst, r, td, ts, mem_off, b_off;
+	bool need_swap, did_move, cmp_eq;
+	unsigned int target;
+	u64 t64;
+	s64 t64s;
+
+	switch (insn->code) {
+	case BPF_ALU64 | BPF_ADD | BPF_K: /* ALU64_IMM */
+	case BPF_ALU64 | BPF_SUB | BPF_K: /* ALU64_IMM */
+	case BPF_ALU64 | BPF_OR | BPF_K: /* ALU64_IMM */
+	case BPF_ALU64 | BPF_AND | BPF_K: /* ALU64_IMM */
+	case BPF_ALU64 | BPF_LSH | BPF_K: /* ALU64_IMM */
+	case BPF_ALU64 | BPF_RSH | BPF_K: /* ALU64_IMM */
+	case BPF_ALU64 | BPF_XOR | BPF_K: /* ALU64_IMM */
+	case BPF_ALU64 | BPF_ARSH | BPF_K: /* ALU64_IMM */
+	case BPF_ALU64 | BPF_MOV | BPF_K: /* ALU64_IMM */
+	case BPF_ALU | BPF_MOV | BPF_K: /* ALU32_IMM */
+	case BPF_ALU | BPF_ADD | BPF_K: /* ALU32_IMM */
+	case BPF_ALU | BPF_SUB | BPF_K: /* ALU32_IMM */
+	case BPF_ALU | BPF_OR | BPF_K: /* ALU64_IMM */
+	case BPF_ALU | BPF_AND | BPF_K: /* ALU64_IMM */
+	case BPF_ALU | BPF_LSH | BPF_K: /* ALU64_IMM */
+	case BPF_ALU | BPF_RSH | BPF_K: /* ALU64_IMM */
+	case BPF_ALU | BPF_XOR | BPF_K: /* ALU64_IMM */
+	case BPF_ALU | BPF_ARSH | BPF_K: /* ALU64_IMM */
+		r = gen_imm_insn(insn, ctx, this_idx);
+		if (r < 0)
+			return r;
+		break;
+	case BPF_ALU64 | BPF_MUL | BPF_K: /* ALU64_IMM */
+		dst = ebpf_to_mips_reg(ctx, insn, dst_reg);
+		if (dst < 0)
+			return dst;
+		if (get_reg_val_type(ctx, this_idx, insn->dst_reg) == REG_32BIT)
+			emit_instr(ctx, dinsu, dst, MIPS_R_ZERO, 32, 32);
+		if (insn->imm == 1) /* Mult by 1 is a nop */
+			break;
+		gen_imm_to_reg(insn, MIPS_R_AT, ctx);
+		emit_instr(ctx, dmultu, MIPS_R_AT, dst);
+		emit_instr(ctx, mflo, dst);
+		break;
+	case BPF_ALU64 | BPF_NEG | BPF_K: /* ALU64_IMM */
+		dst = ebpf_to_mips_reg(ctx, insn, dst_reg);
+		if (dst < 0)
+			return dst;
+		if (get_reg_val_type(ctx, this_idx, insn->dst_reg) == REG_32BIT)
+			emit_instr(ctx, dinsu, dst, MIPS_R_ZERO, 32, 32);
+		emit_instr(ctx, dsubu, dst, MIPS_R_ZERO, dst);
+		break;
+	case BPF_ALU | BPF_MUL | BPF_K: /* ALU_IMM */
+		dst = ebpf_to_mips_reg(ctx, insn, dst_reg);
+		if (dst < 0)
+			return dst;
+		td = get_reg_val_type(ctx, this_idx, insn->dst_reg);
+		if (td == REG_64BIT || td == REG_32BIT_ZERO_EX) {
+			/* sign extend */
+			emit_instr(ctx, sll, dst, dst, 0);
+		}
+		if (insn->imm == 1) /* Mult by 1 is a nop */
+			break;
+		gen_imm_to_reg(insn, MIPS_R_AT, ctx);
+		emit_instr(ctx, multu, dst, MIPS_R_AT);
+		emit_instr(ctx, mflo, dst);
+		break;
+	case BPF_ALU | BPF_NEG | BPF_K: /* ALU_IMM */
+		dst = ebpf_to_mips_reg(ctx, insn, dst_reg);
+		if (dst < 0)
+			return dst;
+		td = get_reg_val_type(ctx, this_idx, insn->dst_reg);
+		if (td == REG_64BIT || td == REG_32BIT_ZERO_EX) {
+			/* sign extend */
+			emit_instr(ctx, sll, dst, dst, 0);
+		}
+		emit_instr(ctx, subu, dst, MIPS_R_ZERO, dst);
+		break;
+	case BPF_ALU | BPF_DIV | BPF_K: /* ALU_IMM */
+	case BPF_ALU | BPF_MOD | BPF_K: /* ALU_IMM */
+		dst = ebpf_to_mips_reg(ctx, insn, dst_reg);
+		if (dst < 0)
+			return dst;
+		if (insn->imm == 0) { /* Div by zero */
+			b_off = b_imm(exit_idx, ctx);
+			if (is_bad_offset(b_off))
+				return -E2BIG;
+			emit_instr(ctx, beq, MIPS_R_ZERO, MIPS_R_ZERO, b_off);
+			emit_instr(ctx, addu, MIPS_R_V0, MIPS_R_ZERO, MIPS_R_ZERO);
+		}
+		td = get_reg_val_type(ctx, this_idx, insn->dst_reg);
+		if (td == REG_64BIT || td == REG_32BIT_ZERO_EX)
+			/* sign extend */
+			emit_instr(ctx, sll, dst, dst, 0);
+		if (insn->imm == 1) {
+			/* div by 1 is a nop, mod by 1 is zero */
+			if (BPF_OP(insn->code) == BPF_MOD)
+				emit_instr(ctx, addu, dst, MIPS_R_ZERO, MIPS_R_ZERO);
+			break;
+		}
+		gen_imm_to_reg(insn, MIPS_R_AT, ctx);
+		emit_instr(ctx, divu, dst, MIPS_R_AT);
+		if (BPF_OP(insn->code) == BPF_DIV)
+			emit_instr(ctx, mflo, dst);
+		else
+			emit_instr(ctx, mfhi, dst);
+		break;
+	case BPF_ALU64 | BPF_DIV | BPF_K: /* ALU_IMM */
+	case BPF_ALU64 | BPF_MOD | BPF_K: /* ALU_IMM */
+		dst = ebpf_to_mips_reg(ctx, insn, dst_reg);
+		if (dst < 0)
+			return dst;
+		if (insn->imm == 0) { /* Div by zero */
+			b_off = b_imm(exit_idx, ctx);
+			if (is_bad_offset(b_off))
+				return -E2BIG;
+			emit_instr(ctx, beq, MIPS_R_ZERO, MIPS_R_ZERO, b_off);
+			emit_instr(ctx, addu, MIPS_R_V0, MIPS_R_ZERO, MIPS_R_ZERO);
+		}
+		if (get_reg_val_type(ctx, this_idx, insn->dst_reg) == REG_32BIT)
+			emit_instr(ctx, dinsu, dst, MIPS_R_ZERO, 32, 32);
+
+		if (insn->imm == 1) {
+			/* div by 1 is a nop, mod by 1 is zero */
+			if (BPF_OP(insn->code) == BPF_MOD)
+				emit_instr(ctx, addu, dst, MIPS_R_ZERO, MIPS_R_ZERO);
+			break;
+		}
+		gen_imm_to_reg(insn, MIPS_R_AT, ctx);
+		emit_instr(ctx, ddivu, dst, MIPS_R_AT);
+		if (BPF_OP(insn->code) == BPF_DIV)
+			emit_instr(ctx, mflo, dst);
+		else
+			emit_instr(ctx, mfhi, dst);
+		break;
+	case BPF_ALU64 | BPF_MOV | BPF_X: /* ALU64_REG */
+	case BPF_ALU64 | BPF_ADD | BPF_X: /* ALU64_REG */
+	case BPF_ALU64 | BPF_SUB | BPF_X: /* ALU64_REG */
+	case BPF_ALU64 | BPF_XOR | BPF_X: /* ALU64_REG */
+	case BPF_ALU64 | BPF_OR | BPF_X: /* ALU64_REG */
+	case BPF_ALU64 | BPF_AND | BPF_X: /* ALU64_REG */
+	case BPF_ALU64 | BPF_MUL | BPF_X: /* ALU64_REG */
+	case BPF_ALU64 | BPF_DIV | BPF_X: /* ALU64_REG */
+	case BPF_ALU64 | BPF_MOD | BPF_X: /* ALU64_REG */
+	case BPF_ALU64 | BPF_LSH | BPF_X: /* ALU64_REG */
+	case BPF_ALU64 | BPF_RSH | BPF_X: /* ALU64_REG */
+	case BPF_ALU64 | BPF_ARSH | BPF_X: /* ALU64_REG */
+		src = ebpf_to_mips_reg(ctx, insn, src_reg);
+		dst = ebpf_to_mips_reg(ctx, insn, dst_reg);
+		if (src < 0 || dst < 0)
+			return -EINVAL;
+		if (get_reg_val_type(ctx, this_idx, insn->dst_reg) == REG_32BIT)
+			emit_instr(ctx, dinsu, dst, MIPS_R_ZERO, 32, 32);
+		did_move = false;
+		if (insn->src_reg == BPF_REG_10) {
+			if (BPF_OP(insn->code) == BPF_MOV) {
+				emit_instr(ctx, daddiu, dst, MIPS_R_SP, MAX_BPF_STACK);
+				did_move = true;
+			} else {
+				emit_instr(ctx, daddiu, MIPS_R_AT, MIPS_R_SP, MAX_BPF_STACK);
+				src = MIPS_R_AT;
+			}
+		} else if (get_reg_val_type(ctx, this_idx, insn->src_reg) == REG_32BIT) {
+			int tmp_reg = MIPS_R_AT;
+
+			if (BPF_OP(insn->code) == BPF_MOV) {
+				tmp_reg = dst;
+				did_move = true;
+			}
+			emit_instr(ctx, daddu, tmp_reg, src, MIPS_R_ZERO);
+			emit_instr(ctx, dinsu, tmp_reg, MIPS_R_ZERO, 32, 32);
+			src = MIPS_R_AT;
+		}
+		switch (BPF_OP(insn->code)) {
+		case BPF_MOV:
+			if (!did_move)
+				emit_instr(ctx, daddu, dst, src, MIPS_R_ZERO);
+			break;
+		case BPF_ADD:
+			emit_instr(ctx, daddu, dst, dst, src);
+			break;
+		case BPF_SUB:
+			emit_instr(ctx, dsubu, dst, dst, src);
+			break;
+		case BPF_XOR:
+			emit_instr(ctx, xor, dst, dst, src);
+			break;
+		case BPF_OR:
+			emit_instr(ctx, or, dst, dst, src);
+			break;
+		case BPF_AND:
+			emit_instr(ctx, and, dst, dst, src);
+			break;
+		case BPF_MUL:
+			emit_instr(ctx, dmultu, dst, src);
+			emit_instr(ctx, mflo, dst);
+			break;
+		case BPF_DIV:
+		case BPF_MOD:
+			b_off = b_imm(exit_idx, ctx);
+			if (is_bad_offset(b_off))
+				return -E2BIG;
+			emit_instr(ctx, beq, src, MIPS_R_ZERO, b_off);
+			emit_instr(ctx, movz, MIPS_R_V0, MIPS_R_ZERO, src);
+			emit_instr(ctx, ddivu, dst, src);
+			if (BPF_OP(insn->code) == BPF_DIV)
+				emit_instr(ctx, mflo, dst);
+			else
+				emit_instr(ctx, mfhi, dst);
+			break;
+		case BPF_LSH:
+			emit_instr(ctx, dsllv, dst, dst, src);
+			break;
+		case BPF_RSH:
+			emit_instr(ctx, dsrlv, dst, dst, src);
+			break;
+		case BPF_ARSH:
+			emit_instr(ctx, dsrav, dst, dst, src);
+			break;
+		default:
+			pr_err("ALU64_REG NOT HANDLED\n");
+			return -EINVAL;
+		}
+		break;
+	case BPF_ALU | BPF_MOV | BPF_X: /* ALU_REG */
+	case BPF_ALU | BPF_ADD | BPF_X: /* ALU_REG */
+	case BPF_ALU | BPF_SUB | BPF_X: /* ALU_REG */
+	case BPF_ALU | BPF_XOR | BPF_X: /* ALU_REG */
+	case BPF_ALU | BPF_OR | BPF_X: /* ALU_REG */
+	case BPF_ALU | BPF_AND | BPF_X: /* ALU_REG */
+	case BPF_ALU | BPF_MUL | BPF_X: /* ALU_REG */
+	case BPF_ALU | BPF_DIV | BPF_X: /* ALU_REG */
+	case BPF_ALU | BPF_MOD | BPF_X: /* ALU_REG */
+	case BPF_ALU | BPF_LSH | BPF_X: /* ALU_REG */
+	case BPF_ALU | BPF_RSH | BPF_X: /* ALU_REG */
+		src = ebpf_to_mips_reg(ctx, insn, src_reg_no_fp);
+		dst = ebpf_to_mips_reg(ctx, insn, dst_reg);
+		if (src < 0 || dst < 0)
+			return -EINVAL;
+		td = get_reg_val_type(ctx, this_idx, insn->dst_reg);
+		if (td == REG_64BIT || td == REG_32BIT_ZERO_EX) {
+			/* sign extend */
+			emit_instr(ctx, sll, dst, dst, 0);
+		}
+		did_move = false;
+		ts = get_reg_val_type(ctx, this_idx, insn->src_reg);
+		if (ts == REG_64BIT || ts == REG_32BIT_ZERO_EX) {
+			int tmp_reg = MIPS_R_AT;
+
+			if (BPF_OP(insn->code) == BPF_MOV) {
+				tmp_reg = dst;
+				did_move = true;
+			}
+			/* sign extend */
+			emit_instr(ctx, sll, tmp_reg, src, 0);
+			src = MIPS_R_AT;
+		}
+		switch (BPF_OP(insn->code)) {
+		case BPF_MOV:
+			if (!did_move)
+				emit_instr(ctx, addu, dst, src, MIPS_R_ZERO);
+			break;
+		case BPF_ADD:
+			emit_instr(ctx, addu, dst, dst, src);
+			break;
+		case BPF_SUB:
+			emit_instr(ctx, subu, dst, dst, src);
+			break;
+		case BPF_XOR:
+			emit_instr(ctx, xor, dst, dst, src);
+			break;
+		case BPF_OR:
+			emit_instr(ctx, or, dst, dst, src);
+			break;
+		case BPF_AND:
+			emit_instr(ctx, and, dst, dst, src);
+			break;
+		case BPF_MUL:
+			emit_instr(ctx, mul, dst, dst, src);
+			break;
+		case BPF_DIV:
+		case BPF_MOD:
+			b_off = b_imm(exit_idx, ctx);
+			if (is_bad_offset(b_off))
+				return -E2BIG;
+			emit_instr(ctx, beq, src, MIPS_R_ZERO, b_off);
+			emit_instr(ctx, movz, MIPS_R_V0, MIPS_R_ZERO, src);
+			emit_instr(ctx, divu, dst, src);
+			if (BPF_OP(insn->code) == BPF_DIV)
+				emit_instr(ctx, mflo, dst);
+			else
+				emit_instr(ctx, mfhi, dst);
+			break;
+		case BPF_LSH:
+			emit_instr(ctx, sllv, dst, dst, src);
+			break;
+		case BPF_RSH:
+			emit_instr(ctx, srlv, dst, dst, src);
+			break;
+		default:
+			pr_err("ALU_REG NOT HANDLED\n");
+			return -EINVAL;
+		}
+		break;
+	case BPF_JMP | BPF_EXIT:
+		if (this_idx + 1 < exit_idx) {
+			b_off = b_imm(exit_idx, ctx);
+			if (is_bad_offset(b_off))
+				return -E2BIG;
+			emit_instr(ctx, beq, MIPS_R_ZERO, MIPS_R_ZERO, b_off);
+			emit_instr(ctx, nop);
+		}
+		break;
+	case BPF_JMP | BPF_JEQ | BPF_K: /* JMP_IMM */
+	case BPF_JMP | BPF_JNE | BPF_K: /* JMP_IMM */
+		cmp_eq = (BPF_OP(insn->code) == BPF_JEQ);
+		dst = ebpf_to_mips_reg(ctx, insn, dst_reg_fp_ok);
+		if (dst < 0)
+			return dst;
+		if (insn->imm == 0) {
+			src = MIPS_R_ZERO;
+		} else {
+			gen_imm_to_reg(insn, MIPS_R_AT, ctx);
+			src = MIPS_R_AT;
+		}
+		goto jeq_common;
+	case BPF_JMP | BPF_JEQ | BPF_X: /* JMP_REG */
+	case BPF_JMP | BPF_JNE | BPF_X:
+	case BPF_JMP | BPF_JSGT | BPF_X:
+	case BPF_JMP | BPF_JSGE | BPF_X:
+	case BPF_JMP | BPF_JGT | BPF_X:
+	case BPF_JMP | BPF_JGE | BPF_X:
+	case BPF_JMP | BPF_JSET | BPF_X:
+		src = ebpf_to_mips_reg(ctx, insn, src_reg_no_fp);
+		dst = ebpf_to_mips_reg(ctx, insn, dst_reg);
+		if (src < 0 || dst < 0)
+			return -EINVAL;
+		td = get_reg_val_type(ctx, this_idx, insn->dst_reg);
+		ts = get_reg_val_type(ctx, this_idx, insn->src_reg);
+		if (td == REG_32BIT && ts != REG_32BIT) {
+			emit_instr(ctx, sll, MIPS_R_AT, src, 0);
+			src = MIPS_R_AT;
+		} else if (ts == REG_32BIT && td != REG_32BIT) {
+			emit_instr(ctx, sll, MIPS_R_AT, dst, 0);
+			dst = MIPS_R_AT;
+		}
+		if (BPF_OP(insn->code) == BPF_JSET) {
+			emit_instr(ctx, and, MIPS_R_AT, dst, src);
+			cmp_eq = false;
+			dst = MIPS_R_AT;
+			src = MIPS_R_ZERO;
+		} else if (BPF_OP(insn->code) == BPF_JSGT) {
+			emit_instr(ctx, dsubu, MIPS_R_AT, dst, src);
+			if ((insn + 1)->code == (BPF_JMP | BPF_EXIT) && insn->off == 1) {
+				b_off = b_imm(exit_idx, ctx);
+				if (is_bad_offset(b_off))
+					return -E2BIG;
+				emit_instr(ctx, blez, MIPS_R_AT, b_off);
+				emit_instr(ctx, nop);
+				return 2; /* We consumed the exit. */
+			}
+			b_off = b_imm(this_idx + insn->off + 1, ctx);
+			if (is_bad_offset(b_off))
+				return -E2BIG;
+			emit_instr(ctx, bgtz, MIPS_R_AT, b_off);
+			emit_instr(ctx, nop);
+			break;
+		} else if (BPF_OP(insn->code) == BPF_JSGE) {
+			emit_instr(ctx, slt, MIPS_R_AT, dst, src);
+			cmp_eq = true;
+			dst = MIPS_R_AT;
+			src = MIPS_R_ZERO;
+		} else if (BPF_OP(insn->code) == BPF_JGT) {
+			/* dst or src could be AT */
+			emit_instr(ctx, dsubu, MIPS_R_T8, dst, src);
+			emit_instr(ctx, sltu, MIPS_R_AT, dst, src);
+			/* SP known to be non-zero, movz becomes boolean not */
+			emit_instr(ctx, movz, MIPS_R_T9, MIPS_R_SP, MIPS_R_T8);
+			emit_instr(ctx, movn, MIPS_R_T9, MIPS_R_ZERO, MIPS_R_T8);
+			emit_instr(ctx, or, MIPS_R_AT, MIPS_R_T9, MIPS_R_AT);
+			cmp_eq = true;
+			dst = MIPS_R_AT;
+			src = MIPS_R_ZERO;
+		} else if (BPF_OP(insn->code) == BPF_JGE) {
+			emit_instr(ctx, sltu, MIPS_R_AT, dst, src);
+			cmp_eq = true;
+			dst = MIPS_R_AT;
+			src = MIPS_R_ZERO;
+		} else { /* JNE/JEQ case */
+			cmp_eq = (BPF_OP(insn->code) == BPF_JEQ);
+		}
+jeq_common:
+		/*
+		 * If the next insn is EXIT and we are jumping arround
+		 * only it, invert the sense of the compare and
+		 * conditionally jump to the exit.  Poor man's branch
+		 * chaining.
+		 */
+		if ((insn + 1)->code == (BPF_JMP | BPF_EXIT) && insn->off == 1) {
+			b_off = b_imm(exit_idx, ctx);
+			if (is_bad_offset(b_off)) {
+				target = j_target(ctx, exit_idx);
+				if (target == (unsigned int)-1)
+					return -E2BIG;
+				cmp_eq = !cmp_eq;
+				b_off = 4 * 3;
+				if (!(ctx->offsets[this_idx] & OFFSETS_B_CONV)) {
+					ctx->offsets[this_idx] |= OFFSETS_B_CONV;
+					ctx->long_b_conversion = 1;
+				}
+			}
+
+			if (cmp_eq)
+				emit_instr(ctx, bne, dst, src, b_off);
+			else
+				emit_instr(ctx, beq, dst, src, b_off);
+			emit_instr(ctx, nop);
+			if (ctx->offsets[this_idx] & OFFSETS_B_CONV) {
+				emit_instr(ctx, j, target);
+				emit_instr(ctx, nop);
+			}
+			return 2; /* We consumed the exit. */
+		}
+		b_off = b_imm(this_idx + insn->off + 1, ctx);
+		if (is_bad_offset(b_off)) {
+			target = j_target(ctx, this_idx + insn->off + 1);
+			if (target == (unsigned int)-1)
+				return -E2BIG;
+			cmp_eq = !cmp_eq;
+			b_off = 4 * 3;
+			if (!(ctx->offsets[this_idx] & OFFSETS_B_CONV)) {
+				ctx->offsets[this_idx] |= OFFSETS_B_CONV;
+				ctx->long_b_conversion = 1;
+			}
+		}
+
+		if (cmp_eq)
+			emit_instr(ctx, beq, dst, src, b_off);
+		else
+			emit_instr(ctx, bne, dst, src, b_off);
+		emit_instr(ctx, nop);
+		if (ctx->offsets[this_idx] & OFFSETS_B_CONV) {
+			emit_instr(ctx, j, target);
+			emit_instr(ctx, nop);
+		}
+		break;
+	case BPF_JMP | BPF_JSGT | BPF_K: /* JMP_IMM */
+	case BPF_JMP | BPF_JSGE | BPF_K: /* JMP_IMM */
+		cmp_eq = (BPF_OP(insn->code) == BPF_JSGE);
+		dst = ebpf_to_mips_reg(ctx, insn, dst_reg_fp_ok);
+		if (dst < 0)
+			return dst;
+
+		if (insn->imm == 0) {
+			if ((insn + 1)->code == (BPF_JMP | BPF_EXIT) && insn->off == 1) {
+				b_off = b_imm(exit_idx, ctx);
+				if (is_bad_offset(b_off))
+					return -E2BIG;
+				if (cmp_eq)
+					emit_instr(ctx, bltz, dst, b_off);
+				else
+					emit_instr(ctx, blez, dst, b_off);
+				emit_instr(ctx, nop);
+				return 2; /* We consumed the exit. */
+			}
+			b_off = b_imm(this_idx + insn->off + 1, ctx);
+			if (is_bad_offset(b_off))
+				return -E2BIG;
+			if (cmp_eq)
+				emit_instr(ctx, bgez, dst, b_off);
+			else
+				emit_instr(ctx, bgtz, dst, b_off);
+			emit_instr(ctx, nop);
+			break;
+		}
+		/*
+		 * only "LT" compare available, so we must use imm + 1
+		 * to generate "GT"
+		 */
+		t64s = insn->imm + (cmp_eq ? 0 : 1);
+		if (t64s >= S16_MIN && t64s <= S16_MAX) {
+			emit_instr(ctx, slti, MIPS_R_AT, dst, (int)t64s);
+			src = MIPS_R_AT;
+			dst = MIPS_R_ZERO;
+			cmp_eq = true;
+			goto jeq_common;
+		}
+		emit_const_to_reg(ctx, MIPS_R_AT, (u64)t64s);
+		emit_instr(ctx, slt, MIPS_R_AT, dst, MIPS_R_AT);
+		src = MIPS_R_AT;
+		dst = MIPS_R_ZERO;
+		cmp_eq = true;
+		goto jeq_common;
+
+	case BPF_JMP | BPF_JGT | BPF_K:
+	case BPF_JMP | BPF_JGE | BPF_K:
+		cmp_eq = (BPF_OP(insn->code) == BPF_JGE);
+		dst = ebpf_to_mips_reg(ctx, insn, dst_reg_fp_ok);
+		if (dst < 0)
+			return dst;
+		/*
+		 * only "LT" compare available, so we must use imm + 1
+		 * to generate "GT"
+		 */
+		t64s = (u64)(u32)(insn->imm) + (cmp_eq ? 0 : 1);
+		if (t64s >= 0 && t64s <= S16_MAX) {
+			emit_instr(ctx, sltiu, MIPS_R_AT, dst, (int)t64s);
+			src = MIPS_R_AT;
+			dst = MIPS_R_ZERO;
+			cmp_eq = true;
+			goto jeq_common;
+		}
+		emit_const_to_reg(ctx, MIPS_R_AT, (u64)t64s);
+		emit_instr(ctx, sltu, MIPS_R_AT, dst, MIPS_R_AT);
+		src = MIPS_R_AT;
+		dst = MIPS_R_ZERO;
+		cmp_eq = true;
+		goto jeq_common;
+
+	case BPF_JMP | BPF_JSET | BPF_K: /* JMP_IMM */
+		dst = ebpf_to_mips_reg(ctx, insn, dst_reg_fp_ok);
+		if (dst < 0)
+			return dst;
+
+		if (use_bbit_insns() && hweight32((u32)insn->imm) == 1) {
+			if ((insn + 1)->code == (BPF_JMP | BPF_EXIT) && insn->off == 1) {
+				b_off = b_imm(exit_idx, ctx);
+				if (is_bad_offset(b_off))
+					return -E2BIG;
+				emit_instr(ctx, bbit0, dst, ffs((u32)insn->imm) - 1, b_off);
+				emit_instr(ctx, nop);
+				return 2; /* We consumed the exit. */
+			}
+			b_off = b_imm(this_idx + insn->off + 1, ctx);
+			if (is_bad_offset(b_off))
+				return -E2BIG;
+			emit_instr(ctx, bbit1, dst, ffs((u32)insn->imm) - 1, b_off);
+			emit_instr(ctx, nop);
+			break;
+		}
+		t64 = (u32)insn->imm;
+		emit_const_to_reg(ctx, MIPS_R_AT, t64);
+		emit_instr(ctx, and, MIPS_R_AT, dst, MIPS_R_AT);
+		src = MIPS_R_AT;
+		dst = MIPS_R_ZERO;
+		cmp_eq = false;
+		goto jeq_common;
+
+	case BPF_JMP | BPF_JA:
+		/*
+		 * Prefer relative branch for easier debugging, but
+		 * fall back if needed.
+		 */
+		b_off = b_imm(this_idx + insn->off + 1, ctx);
+		if (is_bad_offset(b_off)) {
+			target = j_target(ctx, this_idx + insn->off + 1);
+			if (target == (unsigned int)-1)
+				return -E2BIG;
+			emit_instr(ctx, j, target);
+		} else {
+			emit_instr(ctx, b, b_off);
+		}
+		emit_instr(ctx, nop);
+		break;
+	case BPF_LD | BPF_DW | BPF_IMM:
+		if (insn->src_reg != 0)
+			return -EINVAL;
+		dst = ebpf_to_mips_reg(ctx, insn, dst_reg);
+		if (dst < 0)
+			return dst;
+		t64 = ((u64)(u32)insn->imm) | ((u64)(insn + 1)->imm << 32);
+		emit_const_to_reg(ctx, dst, t64);
+		return 2; /* Double slot insn */
+
+	case BPF_JMP | BPF_CALL:
+		ctx->flags |= EBPF_SAVE_RA;
+		t64s = (s64)insn->imm + (s64)__bpf_call_base;
+		emit_const_to_reg(ctx, MIPS_R_T9, (u64)t64s);
+		emit_instr(ctx, jalr, MIPS_R_RA, MIPS_R_T9);
+		/* delay slot */
+		emit_instr(ctx, nop);
+		break;
+
+	case BPF_JMP | BPF_TAIL_CALL:
+		if (emit_bpf_tail_call(ctx, this_idx))
+			return -EINVAL;
+		break;
+
+	case BPF_LD | BPF_B | BPF_ABS:
+	case BPF_LD | BPF_H | BPF_ABS:
+	case BPF_LD | BPF_W | BPF_ABS:
+	case BPF_LD | BPF_DW | BPF_ABS:
+		ctx->flags |= EBPF_SAVE_RA;
+
+		gen_imm_to_reg(insn, MIPS_R_A1, ctx);
+		emit_instr(ctx, addiu, MIPS_R_A2, MIPS_R_ZERO, size_to_len(insn));
+
+		if (insn->imm < 0) {
+			emit_const_to_reg(ctx, MIPS_R_T9, (u64)bpf_internal_load_pointer_neg_helper);
+		} else {
+			emit_const_to_reg(ctx, MIPS_R_T9, (u64)ool_skb_header_pointer);
+			emit_instr(ctx, daddiu, MIPS_R_A3, MIPS_R_SP, ctx->tmp_offset);
+		}
+		goto ld_skb_common;
+
+	case BPF_LD | BPF_B | BPF_IND:
+	case BPF_LD | BPF_H | BPF_IND:
+	case BPF_LD | BPF_W | BPF_IND:
+	case BPF_LD | BPF_DW | BPF_IND:
+		ctx->flags |= EBPF_SAVE_RA;
+		src = ebpf_to_mips_reg(ctx, insn, src_reg_no_fp);
+		if (src < 0)
+			return src;
+		ts = get_reg_val_type(ctx, this_idx, insn->src_reg);
+		if (ts == REG_32BIT_ZERO_EX) {
+			/* sign extend */
+			emit_instr(ctx, sll, MIPS_R_A1, src, 0);
+			src = MIPS_R_A1;
+		}
+		if (insn->imm >= S16_MIN && insn->imm <= S16_MAX) {
+			emit_instr(ctx, daddiu, MIPS_R_A1, src, insn->imm);
+		} else {
+			gen_imm_to_reg(insn, MIPS_R_AT, ctx);
+			emit_instr(ctx, daddu, MIPS_R_A1, MIPS_R_AT, src);
+		}
+		/* truncate to 32-bit int */
+		emit_instr(ctx, sll, MIPS_R_A1, MIPS_R_A1, 0);
+		emit_instr(ctx, daddiu, MIPS_R_A3, MIPS_R_SP, ctx->tmp_offset);
+		emit_instr(ctx, slt, MIPS_R_AT, MIPS_R_A1, MIPS_R_ZERO);
+
+		emit_const_to_reg(ctx, MIPS_R_T8, (u64)bpf_internal_load_pointer_neg_helper);
+		emit_const_to_reg(ctx, MIPS_R_T9, (u64)ool_skb_header_pointer);
+		emit_instr(ctx, addiu, MIPS_R_A2, MIPS_R_ZERO, size_to_len(insn));
+		emit_instr(ctx, movn, MIPS_R_T9, MIPS_R_T8, MIPS_R_AT);
+
+ld_skb_common:
+		emit_instr(ctx, jalr, MIPS_R_RA, MIPS_R_T9);
+		/* delay slot move */
+		emit_instr(ctx, daddu, MIPS_R_A0, MIPS_R_S0, MIPS_R_ZERO);
+
+		/* Check the error value */
+		b_off = b_imm(exit_idx, ctx);
+		if (is_bad_offset(b_off)) {
+			target = j_target(ctx, exit_idx);
+			if (target == (unsigned int)-1)
+				return -E2BIG;
+
+			if (!(ctx->offsets[this_idx] & OFFSETS_B_CONV)) {
+				ctx->offsets[this_idx] |= OFFSETS_B_CONV;
+				ctx->long_b_conversion = 1;
+			}
+			emit_instr(ctx, bne, MIPS_R_V0, MIPS_R_ZERO, 4 * 3);
+			emit_instr(ctx, nop);
+			emit_instr(ctx, j, target);
+			emit_instr(ctx, nop);
+		} else {
+			emit_instr(ctx, beq, MIPS_R_V0, MIPS_R_ZERO, b_off);
+			emit_instr(ctx, nop);
+		}
+
+#ifdef __BIG_ENDIAN
+		need_swap = false;
+#else
+		need_swap = true;
+#endif
+		dst = MIPS_R_V0;
+		switch (BPF_SIZE(insn->code)) {
+		case BPF_B:
+			emit_instr(ctx, lbu, dst, 0, MIPS_R_V0);
+			break;
+		case BPF_H:
+			emit_instr(ctx, lhu, dst, 0, MIPS_R_V0);
+			if (need_swap)
+				emit_instr(ctx, wsbh, dst, dst);
+			break;
+		case BPF_W:
+			emit_instr(ctx, lw, dst, 0, MIPS_R_V0);
+			if (need_swap) {
+				emit_instr(ctx, wsbh, dst, dst);
+				emit_instr(ctx, rotr, dst, dst, 16);
+			}
+			break;
+		case BPF_DW:
+			emit_instr(ctx, ld, dst, 0, MIPS_R_V0);
+			if (need_swap) {
+				emit_instr(ctx, dsbh, dst, dst);
+				emit_instr(ctx, dshd, dst, dst);
+			}
+			break;
+		}
+
+		break;
+	case BPF_ALU | BPF_END | BPF_FROM_BE:
+	case BPF_ALU | BPF_END | BPF_FROM_LE:
+		dst = ebpf_to_mips_reg(ctx, insn, dst_reg);
+		if (dst < 0)
+			return dst;
+		td = get_reg_val_type(ctx, this_idx, insn->dst_reg);
+		if (insn->imm == 64 && td == REG_32BIT)
+			emit_instr(ctx, dinsu, dst, MIPS_R_ZERO, 32, 32);
+
+		if (insn->imm != 64 &&
+		    (td == REG_64BIT || td == REG_32BIT_ZERO_EX)) {
+			/* sign extend */
+			emit_instr(ctx, sll, dst, dst, 0);
+		}
+
+#ifdef __BIG_ENDIAN
+		need_swap = (BPF_SRC(insn->code) == BPF_FROM_LE);
+#else
+		need_swap = (BPF_SRC(insn->code) == BPF_FROM_BE);
+#endif
+		if (insn->imm == 16) {
+			if (need_swap)
+				emit_instr(ctx, wsbh, dst, dst);
+			emit_instr(ctx, andi, dst, dst, 0xffff);
+		} else if (insn->imm == 32) {
+			if (need_swap) {
+				emit_instr(ctx, wsbh, dst, dst);
+				emit_instr(ctx, rotr, dst, dst, 16);
+			}
+		} else { /* 64-bit*/
+			if (need_swap) {
+				emit_instr(ctx, dsbh, dst, dst);
+				emit_instr(ctx, dshd, dst, dst);
+			}
+		}
+		break;
+
+	case BPF_ST | BPF_B | BPF_MEM:
+	case BPF_ST | BPF_H | BPF_MEM:
+	case BPF_ST | BPF_W | BPF_MEM:
+	case BPF_ST | BPF_DW | BPF_MEM:
+		if (insn->dst_reg == BPF_REG_10) {
+			ctx->flags |= EBPF_SEEN_FP;
+			dst = MIPS_R_SP;
+			mem_off = insn->off + MAX_BPF_STACK;
+		} else {
+			dst = ebpf_to_mips_reg(ctx, insn, dst_reg);
+			if (dst < 0)
+				return dst;
+			mem_off = insn->off;
+		}
+		gen_imm_to_reg(insn, MIPS_R_AT, ctx);
+		switch (BPF_SIZE(insn->code)) {
+		case BPF_B:
+			emit_instr(ctx, sb, MIPS_R_AT, mem_off, dst);
+			break;
+		case BPF_H:
+			emit_instr(ctx, sh, MIPS_R_AT, mem_off, dst);
+			break;
+		case BPF_W:
+			emit_instr(ctx, sw, MIPS_R_AT, mem_off, dst);
+			break;
+		case BPF_DW:
+			emit_instr(ctx, sd, MIPS_R_AT, mem_off, dst);
+			break;
+		}
+		break;
+
+	case BPF_LDX | BPF_B | BPF_MEM:
+	case BPF_LDX | BPF_H | BPF_MEM:
+	case BPF_LDX | BPF_W | BPF_MEM:
+	case BPF_LDX | BPF_DW | BPF_MEM:
+		if (insn->src_reg == BPF_REG_10) {
+			ctx->flags |= EBPF_SEEN_FP;
+			src = MIPS_R_SP;
+			mem_off = insn->off + MAX_BPF_STACK;
+		} else {
+			src = ebpf_to_mips_reg(ctx, insn, src_reg_no_fp);
+			if (src < 0)
+				return src;
+			mem_off = insn->off;
+		}
+		dst = ebpf_to_mips_reg(ctx, insn, dst_reg);
+		if (dst < 0)
+			return dst;
+		switch (BPF_SIZE(insn->code)) {
+		case BPF_B:
+			emit_instr(ctx, lbu, dst, mem_off, src);
+			break;
+		case BPF_H:
+			emit_instr(ctx, lhu, dst, mem_off, src);
+			break;
+		case BPF_W:
+			emit_instr(ctx, lw, dst, mem_off, src);
+			break;
+		case BPF_DW:
+			emit_instr(ctx, ld, dst, mem_off, src);
+			break;
+		}
+		break;
+
+	case BPF_STX | BPF_B | BPF_MEM:
+	case BPF_STX | BPF_H | BPF_MEM:
+	case BPF_STX | BPF_W | BPF_MEM:
+	case BPF_STX | BPF_DW | BPF_MEM:
+	case BPF_STX | BPF_W | BPF_XADD:
+	case BPF_STX | BPF_DW | BPF_XADD:
+		if (insn->dst_reg == BPF_REG_10) {
+			ctx->flags |= EBPF_SEEN_FP;
+			dst = MIPS_R_SP;
+			mem_off = insn->off + MAX_BPF_STACK;
+		} else {
+			dst = ebpf_to_mips_reg(ctx, insn, dst_reg);
+			if (dst < 0)
+				return dst;
+			mem_off = insn->off;
+		}
+		src = ebpf_to_mips_reg(ctx, insn, src_reg_no_fp);
+		if (src < 0)
+			return dst;
+		if (BPF_MODE(insn->code) == BPF_XADD) {
+			switch (BPF_SIZE(insn->code)) {
+			case BPF_W:
+				if (get_reg_val_type(ctx, this_idx, insn->src_reg) == REG_32BIT) {
+					emit_instr(ctx, sll, MIPS_R_AT, src, 0);
+					src = MIPS_R_AT;
+				}
+				emit_instr(ctx, ll, MIPS_R_T8, mem_off, dst);
+				emit_instr(ctx, addu, MIPS_R_T8, MIPS_R_T8, src);
+				emit_instr(ctx, sc, MIPS_R_T8, mem_off, dst);
+				/*
+				 * On failure back up to LL (-4
+				 * instructions of 4 bytes each
+				 */
+				emit_instr(ctx, beq, MIPS_R_T8, MIPS_R_ZERO, -4 * 4);
+				emit_instr(ctx, nop);
+				break;
+			case BPF_DW:
+				if (get_reg_val_type(ctx, this_idx, insn->src_reg) == REG_32BIT) {
+					emit_instr(ctx, daddu, MIPS_R_AT, src, MIPS_R_ZERO);
+					emit_instr(ctx, dinsu, MIPS_R_AT, MIPS_R_ZERO, 32, 32);
+					src = MIPS_R_AT;
+				}
+				emit_instr(ctx, lld, MIPS_R_T8, mem_off, dst);
+				emit_instr(ctx, daddu, MIPS_R_T8, MIPS_R_T8, src);
+				emit_instr(ctx, scd, MIPS_R_T8, mem_off, dst);
+				emit_instr(ctx, beq, MIPS_R_T8, MIPS_R_ZERO, -4 * 4);
+				emit_instr(ctx, nop);
+				break;
+			}
+		} else { /* BPF_MEM */
+			switch (BPF_SIZE(insn->code)) {
+			case BPF_B:
+				emit_instr(ctx, sb, src, mem_off, dst);
+				break;
+			case BPF_H:
+				emit_instr(ctx, sh, src, mem_off, dst);
+				break;
+			case BPF_W:
+				emit_instr(ctx, sw, src, mem_off, dst);
+				break;
+			case BPF_DW:
+				if (get_reg_val_type(ctx, this_idx, insn->src_reg) == REG_32BIT) {
+					emit_instr(ctx, daddu, MIPS_R_AT, src, MIPS_R_ZERO);
+					emit_instr(ctx, dinsu, MIPS_R_AT, MIPS_R_ZERO, 32, 32);
+					src = MIPS_R_AT;
+				}
+				emit_instr(ctx, sd, src, mem_off, dst);
+				break;
+			}
+		}
+		break;
+
+	default:
+		pr_err("NOT HANDLED %d - (%02x)\n",
+		       this_idx, (unsigned int)insn->code);
+		return -EINVAL;
+	}
+	return 1;
+}
+
+#define RVT_VISITED_MASK 0xc000000000000000ull
+#define RVT_FALL_THROUGH 0x4000000000000000ull
+#define RVT_BRANCH_TAKEN 0x8000000000000000ull
+#define RVT_DONE (RVT_FALL_THROUGH | RVT_BRANCH_TAKEN)
+
+static int build_int_body(struct jit_ctx *ctx)
+{
+	const struct bpf_prog *prog = ctx->skf;
+	const struct bpf_insn *insn;
+	int i, r;
+
+	for (i = 0; i < prog->len; ) {
+		insn = prog->insnsi + i;
+		if ((ctx->reg_val_types[i] & RVT_VISITED_MASK) == 0) {
+			/* dead instruction, don't emit it. */
+			i++;
+			continue;
+		}
+
+		if (ctx->target == NULL)
+			ctx->offsets[i] = (ctx->offsets[i] & OFFSETS_B_CONV) | (ctx->idx * 4);
+
+		r = build_one_insn(insn, ctx, i, prog->len);
+		if (r < 0)
+			return r;
+		i += r;
+	}
+	/* epilogue offset */
+	if (ctx->target == NULL)
+		ctx->offsets[i] = ctx->idx * 4;
+
+	/*
+	 * All exits have an offset of the epilogue, some offsets may
+	 * not have been set due to banch-around threading, so set
+	 * them now.
+	 */
+	if (ctx->target == NULL)
+		for (i = 0; i < prog->len; i++) {
+			insn = prog->insnsi + i;
+			if (insn->code == (BPF_JMP | BPF_EXIT))
+				ctx->offsets[i] = ctx->idx * 4;
+		}
+	return 0;
+}
+
+/* return the last idx processed, or negative for error */
+static int reg_val_propagate_range(struct jit_ctx *ctx, u64 initial_rvt,
+				   int start_idx, bool follow_taken)
+{
+	const struct bpf_prog *prog = ctx->skf;
+	const struct bpf_insn *insn;
+	u64 exit_rvt = initial_rvt;
+	u64 *rvt = ctx->reg_val_types;
+	int idx;
+	int reg;
+
+	for (idx = start_idx; idx < prog->len; idx++) {
+		rvt[idx] = (rvt[idx] & RVT_VISITED_MASK) | exit_rvt;
+		insn = prog->insnsi + idx;
+		switch (BPF_CLASS(insn->code)) {
+		case BPF_ALU:
+			switch (BPF_OP(insn->code)) {
+			case BPF_ADD:
+			case BPF_SUB:
+			case BPF_MUL:
+			case BPF_DIV:
+			case BPF_OR:
+			case BPF_AND:
+			case BPF_LSH:
+			case BPF_RSH:
+			case BPF_NEG:
+			case BPF_MOD:
+			case BPF_XOR:
+				set_reg_val_type(&exit_rvt, insn->dst_reg, REG_32BIT);
+				break;
+			case BPF_MOV:
+				if (BPF_SRC(insn->code)) {
+					set_reg_val_type(&exit_rvt, insn->dst_reg, REG_32BIT);
+				} else {
+					/* IMM to REG move*/
+					if (insn->imm >= 0)
+						set_reg_val_type(&exit_rvt, insn->dst_reg, REG_32BIT_POS);
+					else
+						set_reg_val_type(&exit_rvt, insn->dst_reg, REG_32BIT);
+				}
+				break;
+			case BPF_END:
+				if (insn->imm == 64)
+					set_reg_val_type(&exit_rvt, insn->dst_reg, REG_64BIT);
+				else if (insn->imm == 32)
+					set_reg_val_type(&exit_rvt, insn->dst_reg, REG_32BIT);
+				else /* insn->imm == 16 */
+					set_reg_val_type(&exit_rvt, insn->dst_reg, REG_32BIT_POS);
+				break;
+			}
+			rvt[idx] |= RVT_DONE;
+			break;
+		case BPF_ALU64:
+			switch (BPF_OP(insn->code)) {
+			case BPF_MOV:
+				if (BPF_SRC(insn->code)) {
+					/* REG to REG move*/
+					set_reg_val_type(&exit_rvt, insn->dst_reg, REG_64BIT);
+				} else {
+					/* IMM to REG move*/
+					if (insn->imm >= 0)
+						set_reg_val_type(&exit_rvt, insn->dst_reg, REG_32BIT_POS);
+					else
+						set_reg_val_type(&exit_rvt, insn->dst_reg, REG_64BIT_32BIT);
+				}
+				break;
+			default:
+				set_reg_val_type(&exit_rvt, insn->dst_reg, REG_64BIT);
+			}
+			rvt[idx] |= RVT_DONE;
+			break;
+		case BPF_LD:
+			switch (BPF_SIZE(insn->code)) {
+			case BPF_DW:
+				if (BPF_MODE(insn->code) == BPF_IMM) {
+					s64 val;
+
+					val = (s64)((u32)insn->imm | ((u64)(insn + 1)->imm << 32));
+					if (val > 0 && val <= S32_MAX)
+						set_reg_val_type(&exit_rvt, insn->dst_reg, REG_32BIT_POS);
+					else if (val >= S32_MIN && val <= S32_MAX)
+						set_reg_val_type(&exit_rvt, insn->dst_reg, REG_64BIT_32BIT);
+					else
+						set_reg_val_type(&exit_rvt, insn->dst_reg, REG_64BIT);
+					rvt[idx] |= RVT_DONE;
+					idx++;
+				} else {
+					set_reg_val_type(&exit_rvt, insn->dst_reg, REG_64BIT);
+				}
+				break;
+			case BPF_B:
+			case BPF_H:
+				set_reg_val_type(&exit_rvt, insn->dst_reg, REG_32BIT_POS);
+				break;
+			case BPF_W:
+				if (BPF_MODE(insn->code) == BPF_IMM)
+					set_reg_val_type(&exit_rvt, insn->dst_reg,
+							 insn->imm >= 0 ? REG_32BIT_POS : REG_32BIT);
+				else
+					set_reg_val_type(&exit_rvt, insn->dst_reg, REG_32BIT);
+				break;
+			}
+			rvt[idx] |= RVT_DONE;
+			break;
+		case BPF_LDX:
+			switch (BPF_SIZE(insn->code)) {
+			case BPF_DW:
+				set_reg_val_type(&exit_rvt, insn->dst_reg, REG_64BIT);
+				break;
+			case BPF_B:
+			case BPF_H:
+				set_reg_val_type(&exit_rvt, insn->dst_reg, REG_32BIT_POS);
+				break;
+			case BPF_W:
+				set_reg_val_type(&exit_rvt, insn->dst_reg, REG_32BIT);
+				break;
+			}
+			rvt[idx] |= RVT_DONE;
+			break;
+		case BPF_JMP:
+			switch (BPF_OP(insn->code)) {
+			case BPF_EXIT:
+				rvt[idx] = RVT_DONE | exit_rvt;
+				rvt[prog->len] = exit_rvt;
+				return idx;
+			case BPF_JA:
+				rvt[idx] |= RVT_DONE;
+				idx += insn->off;
+				break;
+			case BPF_JEQ:
+			case BPF_JGT:
+			case BPF_JGE:
+			case BPF_JSET:
+			case BPF_JNE:
+			case BPF_JSGT:
+			case BPF_JSGE:
+				if (follow_taken) {
+					rvt[idx] |= RVT_BRANCH_TAKEN;
+					idx += insn->off;
+					follow_taken = false;
+				} else {
+					rvt[idx] |= RVT_FALL_THROUGH;
+				}
+				break;
+			case BPF_CALL:
+				set_reg_val_type(&exit_rvt, BPF_REG_0, REG_64BIT);
+				/* Upon call return, argument registers are clobbered. */
+				for (reg = BPF_REG_0; reg <= BPF_REG_5; reg++)
+					set_reg_val_type(&exit_rvt, reg, REG_64BIT);
+
+				rvt[idx] |= RVT_DONE;
+				break;
+			default:
+				WARN(1, "Unhandled BPF_JMP case.\n");
+				rvt[idx] |= RVT_DONE;
+				break;
+			}
+			break;
+		default:
+			rvt[idx] |= RVT_DONE;
+			break;
+		}
+	}
+	return idx;
+}
+
+/*
+ * Track the value range (i.e. 32-bit vs. 64-bit) of each register at
+ * each eBPF insn.  This allows unneeded sign and zero extension
+ * operations to be omitted.
+ *
+ * Doesn't handle yet confluence of control paths with conflicting
+ * ranges, but it is good enough for most sane code.
+ */
+static int reg_val_propagate(struct jit_ctx *ctx)
+{
+	const struct bpf_prog *prog = ctx->skf;
+	u64 exit_rvt;
+	int reg;
+	int i;
+
+	/*
+	 * 11 registers * 3 bits/reg leaves top bits free for other
+	 * uses.  Bit-62..63 used to see if we have visited an insn.
+	 */
+	exit_rvt = 0;
+
+	/* Upon entry, argument registers are 64-bit. */
+	for (reg = BPF_REG_1; reg <= BPF_REG_5; reg++)
+		set_reg_val_type(&exit_rvt, reg, REG_64BIT);
+
+	/*
+	 * First follow all conditional branches on the fall-through
+	 * edge of control flow..
+	 */
+	reg_val_propagate_range(ctx, exit_rvt, 0, false);
+restart_search:
+	/*
+	 * Then repeatedly find the first conditional branch where
+	 * both edges of control flow have not been taken, and follow
+	 * the branch taken edge.  We will end up restarting the
+	 * search once per conditional branch insn.
+	 */
+	for (i = 0; i < prog->len; i++) {
+		u64 rvt = ctx->reg_val_types[i];
+
+		if ((rvt & RVT_VISITED_MASK) == RVT_DONE ||
+		    (rvt & RVT_VISITED_MASK) == 0)
+			continue;
+		if ((rvt & RVT_VISITED_MASK) == RVT_FALL_THROUGH) {
+			reg_val_propagate_range(ctx, rvt & ~RVT_VISITED_MASK, i, true);
+		} else { /* RVT_BRANCH_TAKEN */
+			WARN(1, "Unexpected RVT_BRANCH_TAKEN case.\n");
+			reg_val_propagate_range(ctx, rvt & ~RVT_VISITED_MASK, i, false);
+		}
+		goto restart_search;
+	}
+	/*
+	 * Eventually all conditional branches have been followed on
+	 * both branches and we are done.  Any insn that has not been
+	 * visited at this point is dead.
+	 */
+
+	return 0;
+}
+
+static void jit_fill_hole(void *area, unsigned int size)
+{
+	u32 *p;
+
+	/* We are guaranteed to have aligned memory. */
+	for (p = area; size >= sizeof(u32); size -= sizeof(u32))
+		uasm_i_break(&p, BRK_BUG); /* Increments p */
+}
+
+struct bpf_prog *bpf_int_jit_compile(struct bpf_prog *prog)
+{
+	struct bpf_prog *orig_prog = prog;
+	bool tmp_blinded = false;
+	struct bpf_prog *tmp;
+	struct bpf_binary_header *header = NULL;
+	struct jit_ctx ctx;
+	unsigned int image_size;
+	u8 *image_ptr;
+
+	if (!bpf_jit_enable || !cpu_has_mips64r2)
+		return prog;
+
+	tmp = bpf_jit_blind_constants(prog);
+	/* If blinding was requested and we failed during blinding,
+	 * we must fall back to the interpreter.
+	 */
+	if (IS_ERR(tmp))
+		return orig_prog;
+	if (tmp != prog) {
+		tmp_blinded = true;
+		prog = tmp;
+	}
+
+	memset(&ctx, 0, sizeof(ctx));
+
+	ctx.offsets = kcalloc(prog->len + 1, sizeof(*ctx.offsets), GFP_KERNEL);
+	if (ctx.offsets == NULL)
+		goto out_err;
+
+	ctx.reg_val_types = kcalloc(prog->len + 1, sizeof(*ctx.reg_val_types), GFP_KERNEL);
+	if (ctx.reg_val_types == NULL)
+		goto out_err;
+
+	ctx.skf = prog;
+
+	if (reg_val_propagate(&ctx))
+		goto out_err;
+
+	/*
+	 * First pass discovers used resources and instruction offsets
+	 * assuming short branches are used.
+	 */
+	if (build_int_body(&ctx))
+		goto out_err;
+
+	/*
+	 * If no calls are made (EBPF_SAVE_RA), then tail call count
+	 * in $v1, else we must save in n$s4.
+	 */
+	if (ctx.flags & EBPF_SEEN_TC) {
+		if (ctx.flags & EBPF_SAVE_RA)
+			ctx.flags |= EBPF_SAVE_S4;
+		else
+			ctx.flags |= EBPF_TCC_IN_V1;
+	}
+
+	/*
+	 * Second pass generates offsets, if any branches are out of
+	 * range a jump-around long sequence is generated, and we have
+	 * to try again from the beginning to generate the new
+	 * offsets.  This is done until no additional conversions are
+	 * necessary.
+	 */
+	do {
+		ctx.idx = 0;
+		ctx.gen_b_offsets = 1;
+		ctx.long_b_conversion = 0;
+		if (gen_int_prologue(&ctx))
+			goto out_err;
+		if (build_int_body(&ctx))
+			goto out_err;
+		if (build_int_epilogue(&ctx, MIPS_R_RA))
+			goto out_err;
+	} while (ctx.long_b_conversion);
+
+	image_size = 4 * ctx.idx;
+
+	header = bpf_jit_binary_alloc(image_size, &image_ptr,
+				      sizeof(u32), jit_fill_hole);
+	if (header == NULL)
+		goto out_err;
+
+	ctx.target = (u32 *)image_ptr;
+
+	/* Third pass generates the code */
+	ctx.idx = 0;
+	if (gen_int_prologue(&ctx))
+		goto out_err;
+	if (build_int_body(&ctx))
+		goto out_err;
+	if (build_int_epilogue(&ctx, MIPS_R_RA))
+		goto out_err;
+
+	/* Update the icache */
+	flush_icache_range((unsigned long)ctx.target,
+			   (unsigned long)(ctx.target + ctx.idx * sizeof(u32)));
+
+	if (bpf_jit_enable > 1)
+		/* Dump JIT code */
+		bpf_jit_dump(prog->len, image_size, 2, ctx.target);
+
+	bpf_jit_binary_lock_ro(header);
+	prog->bpf_func = (void *)ctx.target;
+	prog->jited = 1;
+	prog->jited_len = image_size;
+out_normal:
+	if (tmp_blinded)
+		bpf_jit_prog_release_other(prog, prog == orig_prog ?
+					   tmp : orig_prog);
+	kfree(ctx.offsets);
+	kfree(ctx.reg_val_types);
+
+	return prog;
+
+out_err:
+	prog = orig_prog;
+	if (header)
+		bpf_jit_binary_free(header);
+	goto out_normal;
+}

From 5fff41e1f89d93feef9833c49a415dc337af5a99 Mon Sep 17 00:00:00 2001
From: Parav Pandit <parav@mellanox.com>
Date: Tue, 1 Aug 2017 09:41:34 +0300
Subject: [PATCH 149/281] IB/core: Fix race condition in resolving IP to MAC

Currently while resolving IP address to MAC address single delayed work
is used for resolving multiple such resolve requests. This singled work
is essentially performs two tasks.
(a) any retry needed to resolve and
(b) it executes the callback function for all completed requests

While work is executing callbacks, any new work scheduled on for this
workqueue is lost because workqueue has completed looking at all pending
requests and now looking at callbacks, but work is still under
execution. Any further retry to look at pending requests in
process_req() after executing callbacks would lead to similar race
condition (may be reduce the probably further but doesn't eliminate it).
Retrying to enqueue work that from queue_req() context is not something
rest of the kernel modules have followed.

Therefore fix in this patch utilizes kernel facility to enqueue multiple
work items to a workqueue. This ensures that no such requests
gets lost in synchronization. Request list is still maintained so that
rdma_cancel_addr() can unlink the request and get the completion with
error sooner. Neighbour update event handling continues to be handled in
same way as before.
Additionally process_req() work entry cancels any pending work for a
request that gets completed while processing those requests.

Originally ib_addr was ST workqueue, but it became MT work queue with
patch of [1]. This patch again makes it similar to ST so that
neighbour update events handler work item doesn't race with
other work items.

In one such below trace, (though on 4.5 based kernel) it can be seen
that process_req() never executed the callback, which is likely for an
event that was schedule by queue_req() when previous callback was
getting executed by workqueue.

 [<ffffffff816b0dde>] schedule+0x3e/0x90
 [<ffffffff816b3c45>] schedule_timeout+0x1b5/0x210
 [<ffffffff81618c37>] ? ip_route_output_flow+0x27/0x70
 [<ffffffffa027f9c9>] ? addr_resolve+0x149/0x1b0 [ib_addr]
 [<ffffffff816b228f>] wait_for_completion+0x10f/0x170
 [<ffffffff810b6140>] ? try_to_wake_up+0x210/0x210
 [<ffffffffa027f220>] ? rdma_copy_addr+0xa0/0xa0 [ib_addr]
 [<ffffffffa0280120>] rdma_addr_find_l2_eth_by_grh+0x1d0/0x278 [ib_addr]
 [<ffffffff81321297>] ? sub_alloc+0x77/0x1c0
 [<ffffffffa02943b7>] ib_init_ah_from_wc+0x3a7/0x5a0 [ib_core]
 [<ffffffffa0457aba>] cm_req_handler+0xea/0x580 [ib_cm]
 [<ffffffff81015982>] ? __switch_to+0x212/0x5e0
 [<ffffffffa04582fd>] cm_work_handler+0x6d/0x150 [ib_cm]
 [<ffffffff810a14c1>] process_one_work+0x151/0x4b0
 [<ffffffff810a1940>] worker_thread+0x120/0x480
 [<ffffffff816b074b>] ? __schedule+0x30b/0x890
 [<ffffffff810a1820>] ? process_one_work+0x4b0/0x4b0
 [<ffffffff810a1820>] ? process_one_work+0x4b0/0x4b0
 [<ffffffff810a6b1e>] kthread+0xce/0xf0
 [<ffffffff810a6a50>] ? kthread_freezable_should_stop+0x70/0x70
 [<ffffffff816b53a2>] ret_from_fork+0x42/0x70
 [<ffffffff810a6a50>] ? kthread_freezable_should_stop+0x70/0x70
INFO: task kworker/u144:1:156520 blocked for more than 120 seconds.
"echo 0 > /proc/sys/kernel/hung_task_timeout_secs" disables this
message.
kworker/u144:1  D ffff883ffe1d7600     0 156520      2 0x00000080
Workqueue: ib_addr process_req [ib_addr]
 ffff883f446fbbd8 0000000000000046 ffff881f95280000 ffff881ff24de200
 ffff883f66120000 ffff883f446f8008 ffff881f95280000 ffff883f6f9208c4
 ffff883f6f9208c8 00000000ffffffff ffff883f446fbbf8 ffffffff816b0dde

[1] http://lkml.iu.edu/hypermail/linux/kernel/1608.1/05834.html

Signed-off-by: Parav Pandit <parav@mellanox.com>
Reviewed-by: Mark Bloch <markb@mellanox.com>
Signed-off-by: Leon Romanovsky <leon@kernel.org>
Signed-off-by: Doug Ledford <dledford@redhat.com>
---
 drivers/infiniband/core/addr.c | 62 ++++++++++++++++++++++++++--------
 1 file changed, 48 insertions(+), 14 deletions(-)

diff --git a/drivers/infiniband/core/addr.c b/drivers/infiniband/core/addr.c
index 01236cef7bfb..437522ca97b4 100644
--- a/drivers/infiniband/core/addr.c
+++ b/drivers/infiniband/core/addr.c
@@ -61,6 +61,7 @@ struct addr_req {
 	void (*callback)(int status, struct sockaddr *src_addr,
 			 struct rdma_dev_addr *addr, void *context);
 	unsigned long timeout;
+	struct delayed_work work;
 	int status;
 	u32 seq;
 };
@@ -295,7 +296,7 @@ int rdma_translate_ip(const struct sockaddr *addr,
 }
 EXPORT_SYMBOL(rdma_translate_ip);
 
-static void set_timeout(unsigned long time)
+static void set_timeout(struct delayed_work *delayed_work, unsigned long time)
 {
 	unsigned long delay;
 
@@ -303,7 +304,7 @@ static void set_timeout(unsigned long time)
 	if ((long)delay < 0)
 		delay = 0;
 
-	mod_delayed_work(addr_wq, &work, delay);
+	mod_delayed_work(addr_wq, delayed_work, delay);
 }
 
 static void queue_req(struct addr_req *req)
@@ -318,8 +319,7 @@ static void queue_req(struct addr_req *req)
 
 	list_add(&req->list, &temp_req->list);
 
-	if (req_list.next == &req->list)
-		set_timeout(req->timeout);
+	set_timeout(&req->work, req->timeout);
 	mutex_unlock(&lock);
 }
 
@@ -574,6 +574,37 @@ static int addr_resolve(struct sockaddr *src_in,
 	return ret;
 }
 
+static void process_one_req(struct work_struct *_work)
+{
+	struct addr_req *req;
+	struct sockaddr *src_in, *dst_in;
+
+	mutex_lock(&lock);
+	req = container_of(_work, struct addr_req, work.work);
+
+	if (req->status == -ENODATA) {
+		src_in = (struct sockaddr *)&req->src_addr;
+		dst_in = (struct sockaddr *)&req->dst_addr;
+		req->status = addr_resolve(src_in, dst_in, req->addr,
+					   true, req->seq);
+		if (req->status && time_after_eq(jiffies, req->timeout)) {
+			req->status = -ETIMEDOUT;
+		} else if (req->status == -ENODATA) {
+			/* requeue the work for retrying again */
+			set_timeout(&req->work, req->timeout);
+			mutex_unlock(&lock);
+			return;
+		}
+	}
+	list_del(&req->list);
+	mutex_unlock(&lock);
+
+	req->callback(req->status, (struct sockaddr *)&req->src_addr,
+		req->addr, req->context);
+	put_client(req->client);
+	kfree(req);
+}
+
 static void process_req(struct work_struct *work)
 {
 	struct addr_req *req, *temp_req;
@@ -591,20 +622,23 @@ static void process_req(struct work_struct *work)
 						   true, req->seq);
 			if (req->status && time_after_eq(jiffies, req->timeout))
 				req->status = -ETIMEDOUT;
-			else if (req->status == -ENODATA)
+			else if (req->status == -ENODATA) {
+				set_timeout(&req->work, req->timeout);
 				continue;
+			}
 		}
 		list_move_tail(&req->list, &done_list);
 	}
 
-	if (!list_empty(&req_list)) {
-		req = list_entry(req_list.next, struct addr_req, list);
-		set_timeout(req->timeout);
-	}
 	mutex_unlock(&lock);
 
 	list_for_each_entry_safe(req, temp_req, &done_list, list) {
 		list_del(&req->list);
+		/* It is safe to cancel other work items from this work item
+		 * because at a time there can be only one work item running
+		 * with this single threaded work queue.
+		 */
+		cancel_delayed_work(&req->work);
 		req->callback(req->status, (struct sockaddr *) &req->src_addr,
 			req->addr, req->context);
 		put_client(req->client);
@@ -647,6 +681,7 @@ int rdma_resolve_ip(struct rdma_addr_client *client,
 	req->context = context;
 	req->client = client;
 	atomic_inc(&client->refcount);
+	INIT_DELAYED_WORK(&req->work, process_one_req);
 	req->seq = (u32)atomic_inc_return(&ib_nl_addr_request_seq);
 
 	req->status = addr_resolve(src_in, dst_in, addr, true, req->seq);
@@ -701,7 +736,7 @@ void rdma_addr_cancel(struct rdma_dev_addr *addr)
 			req->status = -ECANCELED;
 			req->timeout = jiffies;
 			list_move(&req->list, &req_list);
-			set_timeout(req->timeout);
+			set_timeout(&req->work, req->timeout);
 			break;
 		}
 	}
@@ -807,9 +842,8 @@ static int netevent_callback(struct notifier_block *self, unsigned long event,
 	if (event == NETEVENT_NEIGH_UPDATE) {
 		struct neighbour *neigh = ctx;
 
-		if (neigh->nud_state & NUD_VALID) {
-			set_timeout(jiffies);
-		}
+		if (neigh->nud_state & NUD_VALID)
+			set_timeout(&work, jiffies);
 	}
 	return 0;
 }
@@ -820,7 +854,7 @@ static struct notifier_block nb = {
 
 int addr_init(void)
 {
-	addr_wq = alloc_workqueue("ib_addr", WQ_MEM_RECLAIM, 0);
+	addr_wq = alloc_ordered_workqueue("ib_addr", WQ_MEM_RECLAIM);
 	if (!addr_wq)
 		return -ENOMEM;
 

From f7a6cb7b38c6845b26aaa8bbdf519ff6e3090831 Mon Sep 17 00:00:00 2001
From: Leon Romanovsky <leonro@mellanox.com>
Date: Tue, 1 Aug 2017 09:41:35 +0300
Subject: [PATCH 150/281] RDMA/uverbs: Prevent leak of reserved field

initialize to zero the response structure to prevent
the leakage of "resp.reserved" field.

drivers/infiniband/core/uverbs_cmd.c:1178 ib_uverbs_resize_cq() warn:
	check that 'resp.reserved' doesn't leak information

Fixes: 33b9b3ee9709 ("IB: Add userspace support for resizing CQs")
Signed-off-by: Leon Romanovsky <leonro@mellanox.com>
Reviewed-by: Dennis Dalessandro <dennis.dalessandro@intel.com>
Signed-off-by: Doug Ledford <dledford@redhat.com>
---
 drivers/infiniband/core/uverbs_cmd.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/infiniband/core/uverbs_cmd.c b/drivers/infiniband/core/uverbs_cmd.c
index 2c98533a0203..c551d2b275fd 100644
--- a/drivers/infiniband/core/uverbs_cmd.c
+++ b/drivers/infiniband/core/uverbs_cmd.c
@@ -1153,7 +1153,7 @@ ssize_t ib_uverbs_resize_cq(struct ib_uverbs_file *file,
 			    int out_len)
 {
 	struct ib_uverbs_resize_cq	cmd;
-	struct ib_uverbs_resize_cq_resp	resp;
+	struct ib_uverbs_resize_cq_resp	resp = {};
 	struct ib_udata                 udata;
 	struct ib_cq			*cq;
 	int				ret = -EINVAL;

From efdd6f53b10aead0f5cf19a93dd3eb268ac0d991 Mon Sep 17 00:00:00 2001
From: Yishai Hadas <yishaih@mellanox.com>
Date: Tue, 1 Aug 2017 09:41:36 +0300
Subject: [PATCH 151/281] IB/uverbs: Fix device cleanup

Uverbs device should be cleaned up only when there is no
potential usage of.

As part of ib_uverbs_remove_one which might be triggered upon reset flow
the device reference count is decreased as expected and leave the final
cleanup to the FDs that were opened.

Current code increases reference count upon opening a new command FD and
decreases it upon closing the file. The event FD is opened internally
and rely on the command FD by taking on it a reference count.

In case that the command FD was closed and just later the event FD we
may ensure that the device resources as of srcu are still alive as they
are still in use.

Fixing the above by moving the reference count decreasing to the place
where the command FD is really freed instead of doing that when it was
just closed.

fixes: 036b10635739 ("IB/uverbs: Enable device removal when there are active user space applications")
Signed-off-by: Yishai Hadas <yishaih@mellanox.com>
Reviewed-by: Matan Barak <matanb@mellanox.com>
Reviewed-by: Jason Gunthorpe <jgunthorpe@obsidianresearch.com>
Tested-by: Jason Gunthorpe <jgunthorpe@obsidianresearch.com>
Signed-off-by: Leon Romanovsky <leon@kernel.org>
Signed-off-by: Doug Ledford <dledford@redhat.com>
---
 drivers/infiniband/core/uverbs_main.c | 3 +--
 1 file changed, 1 insertion(+), 2 deletions(-)

diff --git a/drivers/infiniband/core/uverbs_main.c b/drivers/infiniband/core/uverbs_main.c
index 3d2609608f58..c023e2c81b8f 100644
--- a/drivers/infiniband/core/uverbs_main.c
+++ b/drivers/infiniband/core/uverbs_main.c
@@ -250,6 +250,7 @@ void ib_uverbs_release_file(struct kref *ref)
 	if (atomic_dec_and_test(&file->device->refcount))
 		ib_uverbs_comp_dev(file->device);
 
+	kobject_put(&file->device->kobj);
 	kfree(file);
 }
 
@@ -917,7 +918,6 @@ err:
 static int ib_uverbs_close(struct inode *inode, struct file *filp)
 {
 	struct ib_uverbs_file *file = filp->private_data;
-	struct ib_uverbs_device *dev = file->device;
 
 	mutex_lock(&file->cleanup_mutex);
 	if (file->ucontext) {
@@ -939,7 +939,6 @@ static int ib_uverbs_close(struct inode *inode, struct file *filp)
 			 ib_uverbs_release_async_event_file);
 
 	kref_put(&file->ref, ib_uverbs_release_file);
-	kobject_put(&dev->kobj);
 
 	return 0;
 }

From 931b3c1a832621b4bdcbaf783096fc267eb36fbe Mon Sep 17 00:00:00 2001
From: Leon Romanovsky <leonro@mellanox.com>
Date: Tue, 1 Aug 2017 09:41:37 +0300
Subject: [PATCH 152/281] RDMA/mlx5: Fix existence check for extended address
 vector

The extended address vector is the highest bit in be32 variable,
but it was compared with the lowest. This patch fixes the endianness
of that check and removes already declared define.

Fixes: 17d2f88f92ce ("IB/mlx5: Add ODP atomics support")
Reviewed-by: Artemy Kovalyov <artemyko@mellanox.com>
Signed-off-by: Leon Romanovsky <leonro@mellanox.com>
Signed-off-by: Doug Ledford <dledford@redhat.com>
---
 drivers/infiniband/hw/mlx5/odp.c | 2 +-
 include/linux/mlx5/qp.h          | 1 -
 2 files changed, 1 insertion(+), 2 deletions(-)

diff --git a/drivers/infiniband/hw/mlx5/odp.c b/drivers/infiniband/hw/mlx5/odp.c
index ae0746754008..3d701c7a4c91 100644
--- a/drivers/infiniband/hw/mlx5/odp.c
+++ b/drivers/infiniband/hw/mlx5/odp.c
@@ -939,7 +939,7 @@ static int mlx5_ib_mr_initiator_pfault_handler(
 
 	if (qp->ibqp.qp_type != IB_QPT_RC) {
 		av = *wqe;
-		if (av->dqp_dct & be32_to_cpu(MLX5_WQE_AV_EXT))
+		if (av->dqp_dct & cpu_to_be32(MLX5_EXTENDED_UD_AV))
 			*wqe += sizeof(struct mlx5_av);
 		else
 			*wqe += sizeof(struct mlx5_base_av);
diff --git a/include/linux/mlx5/qp.h b/include/linux/mlx5/qp.h
index 6f41270d80c0..f378dc0e7eaf 100644
--- a/include/linux/mlx5/qp.h
+++ b/include/linux/mlx5/qp.h
@@ -212,7 +212,6 @@ struct mlx5_wqe_ctrl_seg {
 #define MLX5_WQE_CTRL_OPCODE_MASK 0xff
 #define MLX5_WQE_CTRL_WQE_INDEX_MASK 0x00ffff00
 #define MLX5_WQE_CTRL_WQE_INDEX_SHIFT 8
-#define MLX5_WQE_AV_EXT 0x80000000
 
 enum {
 	MLX5_ETH_WQE_L3_INNER_CSUM      = 1 << 4,

From aaf83aecd294fd61db6b34051f718f3e7ea34c22 Mon Sep 17 00:00:00 2001
From: Thomas Bogendoerfer <tbogendoerfer@suse.de>
Date: Thu, 3 Aug 2017 15:43:14 +0200
Subject: [PATCH 153/281] xgene: Always get clk source, but ignore if it's
 missing for SGMII ports

Even the driver doesn't do anything with the clk source for SGMII
ports it needs to be enabled by doing a devm_clk_get(), if there is
a clk source in DT.

Fixes: 0db01097cabd ('xgene: Don't fail probe, if there is no clk resource for SGMII interfaces')
Signed-off-by: Thomas Bogendoerfer <tbogendoerfer@suse.de>
Tested-by: Laura Abbott <labbott@redhat.com>
Acked-by: Iyappan Subramanian <isubramanian@apm.com>
Tested-by: Will Deacon <will.deacon@arm.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/apm/xgene/xgene_enet_main.c | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/drivers/net/ethernet/apm/xgene/xgene_enet_main.c b/drivers/net/ethernet/apm/xgene/xgene_enet_main.c
index 86058a9f3417..1d307f2def2d 100644
--- a/drivers/net/ethernet/apm/xgene/xgene_enet_main.c
+++ b/drivers/net/ethernet/apm/xgene/xgene_enet_main.c
@@ -1785,9 +1785,9 @@ static int xgene_enet_get_resources(struct xgene_enet_pdata *pdata)
 
 	xgene_enet_gpiod_get(pdata);
 
-	if (pdata->phy_mode != PHY_INTERFACE_MODE_SGMII) {
-		pdata->clk = devm_clk_get(&pdev->dev, NULL);
-		if (IS_ERR(pdata->clk)) {
+	pdata->clk = devm_clk_get(&pdev->dev, NULL);
+	if (IS_ERR(pdata->clk)) {
+		if (pdata->phy_mode != PHY_INTERFACE_MODE_SGMII) {
 			/* Abort if the clock is defined but couldn't be
 			 * retrived. Always abort if the clock is missing on
 			 * DT system as the driver can't cope with this case.

From 5db465f235e74293e285e1fa924a55e52ba52a98 Mon Sep 17 00:00:00 2001
From: Dan Carpenter <dan.carpenter@oracle.com>
Date: Fri, 4 Aug 2017 11:12:08 +0300
Subject: [PATCH 154/281] IB/hns: checking for IS_ERR() instead of NULL

The hns_roce_v1_create_lp_qp() returns NULL on error, not error pointers.

Fixes: bfcc681bd09d ("IB/hns: Fix the bug when free mr")
Signed-off-by: Dan Carpenter <dan.carpenter@oracle.com>
Signed-off-by: Doug Ledford <dledford@redhat.com>
---
 drivers/infiniband/hw/hns/hns_roce_hw_v1.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/infiniband/hw/hns/hns_roce_hw_v1.c b/drivers/infiniband/hw/hns/hns_roce_hw_v1.c
index 23fad6d96944..2540b65e242c 100644
--- a/drivers/infiniband/hw/hns/hns_roce_hw_v1.c
+++ b/drivers/infiniband/hw/hns/hns_roce_hw_v1.c
@@ -733,7 +733,7 @@ static int hns_roce_v1_rsv_lp_qp(struct hns_roce_dev *hr_dev)
 			continue;
 
 		free_mr->mr_free_qp[i] = hns_roce_v1_create_lp_qp(hr_dev, pd);
-		if (IS_ERR(free_mr->mr_free_qp[i])) {
+		if (!free_mr->mr_free_qp[i]) {
 			dev_err(dev, "Create loop qp failed!\n");
 			goto create_lp_qp_failed;
 		}

From ea7bd56fa309d10a41b1041827a63c0746c47554 Mon Sep 17 00:00:00 2001
From: Jan Kara <jack@suse.cz>
Date: Wed, 2 Aug 2017 12:37:16 -0700
Subject: [PATCH 155/281] xfs: Fix leak of discard bio

The bio describing discard operation is allocated by
__blkdev_issue_discard() which returns us a reference to it. That
reference is never released and thus we leak this bio. Drop the bio
reference once it completes in xlog_discard_endio().

CC: stable@vger.kernel.org
Fixes: 4560e78f40cb55bd2ea8f1ef4001c5baa88531c7
Signed-off-by: Jan Kara <jack@suse.cz>
Reviewed-by: Darrick J. Wong <darrick.wong@oracle.com>
Signed-off-by: Darrick J. Wong <darrick.wong@oracle.com>
---
 fs/xfs/xfs_log_cil.c | 1 +
 1 file changed, 1 insertion(+)

diff --git a/fs/xfs/xfs_log_cil.c b/fs/xfs/xfs_log_cil.c
index fbe72b134bef..43aa42a3a5d3 100644
--- a/fs/xfs/xfs_log_cil.c
+++ b/fs/xfs/xfs_log_cil.c
@@ -539,6 +539,7 @@ xlog_discard_endio(
 
 	INIT_WORK(&ctx->discard_endio_work, xlog_discard_endio_work);
 	queue_work(xfs_discard_wq, &ctx->discard_endio_work);
+	bio_put(bio);
 }
 
 static void

From 56bdf855e676f1f2ed7033f288f57dfd315725ba Mon Sep 17 00:00:00 2001
From: Lukas Czerner <lczerner@redhat.com>
Date: Thu, 3 Aug 2017 13:19:13 -0700
Subject: [PATCH 156/281] xfs: Fix per-inode DAX flag inheritance

According to the commit that implemented per-inode DAX flag:
commit 58f88ca2df72 ("xfs: introduce per-inode DAX enablement")
the flag is supposed to act as "inherit flag".

Currently this only works in the situations where parent directory
already has a flag in di_flags set, otherwise inheritance does not
work. This is because setting the XFS_DIFLAG2_DAX flag is done in a
wrong branch designated for di_flags, not di_flags2.

Fix this by moving the code to branch designated for setting di_flags2,
which does test for flags in di_flags2.

Fixes: 58f88ca2df72 ("xfs: introduce per-inode DAX enablement")
Signed-off-by: Lukas Czerner <lczerner@redhat.com>
Reviewed-by: Darrick J. Wong <darrick.wong@oracle.com>
Signed-off-by: Darrick J. Wong <darrick.wong@oracle.com>
---
 fs/xfs/xfs_inode.c | 12 +++++++-----
 1 file changed, 7 insertions(+), 5 deletions(-)

diff --git a/fs/xfs/xfs_inode.c b/fs/xfs/xfs_inode.c
index ceef77c0416a..ff48f0096810 100644
--- a/fs/xfs/xfs_inode.c
+++ b/fs/xfs/xfs_inode.c
@@ -874,7 +874,6 @@ xfs_ialloc(
 	case S_IFREG:
 	case S_IFDIR:
 		if (pip && (pip->i_d.di_flags & XFS_DIFLAG_ANY)) {
-			uint64_t	di_flags2 = 0;
 			uint		di_flags = 0;
 
 			if (S_ISDIR(mode)) {
@@ -911,20 +910,23 @@ xfs_ialloc(
 				di_flags |= XFS_DIFLAG_NODEFRAG;
 			if (pip->i_d.di_flags & XFS_DIFLAG_FILESTREAM)
 				di_flags |= XFS_DIFLAG_FILESTREAM;
-			if (pip->i_d.di_flags2 & XFS_DIFLAG2_DAX)
-				di_flags2 |= XFS_DIFLAG2_DAX;
 
 			ip->i_d.di_flags |= di_flags;
-			ip->i_d.di_flags2 |= di_flags2;
 		}
 		if (pip &&
 		    (pip->i_d.di_flags2 & XFS_DIFLAG2_ANY) &&
 		    pip->i_d.di_version == 3 &&
 		    ip->i_d.di_version == 3) {
+			uint64_t	di_flags2 = 0;
+
 			if (pip->i_d.di_flags2 & XFS_DIFLAG2_COWEXTSIZE) {
-				ip->i_d.di_flags2 |= XFS_DIFLAG2_COWEXTSIZE;
+				di_flags2 |= XFS_DIFLAG2_COWEXTSIZE;
 				ip->i_d.di_cowextsize = pip->i_d.di_cowextsize;
 			}
+			if (pip->i_d.di_flags2 & XFS_DIFLAG2_DAX)
+				di_flags2 |= XFS_DIFLAG2_DAX;
+
+			ip->i_d.di_flags2 |= di_flags2;
 		}
 		/* FALLTHROUGH */
 	case S_IFLNK:

From 2c460621bb2e6baf8a475c407cdb29029b2497ac Mon Sep 17 00:00:00 2001
From: Daniel Borkmann <daniel@iogearbox.net>
Date: Fri, 4 Aug 2017 22:24:41 +0200
Subject: [PATCH 157/281] bpf: fix byte order test in test_verifier

We really must check with #if __BYTE_ORDER == XYZ instead of
just presence of #ifdef __LITTLE_ENDIAN. I noticed that when
actually running this on big endian machine, the latter test
resolves to true for user space, same for #ifdef __BIG_ENDIAN.

E.g., looking at endian.h from libc, both are also defined
there, so we really must test this against __BYTE_ORDER instead
for proper insns selection. For the kernel, such checks are
fine though e.g. see 13da9e200fe4 ("Revert "endian: #define
__BYTE_ORDER"") and 415586c9e6d3 ("UAPI: fix endianness conditionals
in M32R's asm/stat.h") for some more context, but not for
user space. Lets also make sure to properly include endian.h.
After that, suite passes for me:

./test_verifier: ELF 64-bit MSB executable, [...]

Linux foo 4.13.0-rc3+ #4 SMP Fri Aug 4 06:59:30 EDT 2017 s390x s390x s390x GNU/Linux

Before fix: Summary: 505 PASSED, 11 FAILED
After  fix: Summary: 516 PASSED,  0 FAILED

Fixes: 18f3d6be6be1 ("selftests/bpf: Add test cases to test narrower ctx field loads")
Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>
Acked-by: Yonghong <yhs@fb.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 tools/testing/selftests/bpf/test_verifier.c | 19 ++++++++++---------
 1 file changed, 10 insertions(+), 9 deletions(-)

diff --git a/tools/testing/selftests/bpf/test_verifier.c b/tools/testing/selftests/bpf/test_verifier.c
index addea82f76c9..d3ed7324105e 100644
--- a/tools/testing/selftests/bpf/test_verifier.c
+++ b/tools/testing/selftests/bpf/test_verifier.c
@@ -8,6 +8,7 @@
  * License as published by the Free Software Foundation.
  */
 
+#include <endian.h>
 #include <asm/types.h>
 #include <linux/types.h>
 #include <stdint.h>
@@ -1098,7 +1099,7 @@ static struct bpf_test tests[] = {
 		"check skb->hash byte load permitted",
 		.insns = {
 			BPF_MOV64_IMM(BPF_REG_0, 0),
-#ifdef __LITTLE_ENDIAN
+#if __BYTE_ORDER == __LITTLE_ENDIAN
 			BPF_LDX_MEM(BPF_B, BPF_REG_0, BPF_REG_1,
 				    offsetof(struct __sk_buff, hash)),
 #else
@@ -1135,7 +1136,7 @@ static struct bpf_test tests[] = {
 		"check skb->hash byte load not permitted 3",
 		.insns = {
 			BPF_MOV64_IMM(BPF_REG_0, 0),
-#ifdef __LITTLE_ENDIAN
+#if __BYTE_ORDER == __LITTLE_ENDIAN
 			BPF_LDX_MEM(BPF_B, BPF_REG_0, BPF_REG_1,
 				    offsetof(struct __sk_buff, hash) + 3),
 #else
@@ -1244,7 +1245,7 @@ static struct bpf_test tests[] = {
 		"check skb->hash half load permitted",
 		.insns = {
 			BPF_MOV64_IMM(BPF_REG_0, 0),
-#ifdef __LITTLE_ENDIAN
+#if __BYTE_ORDER == __LITTLE_ENDIAN
 			BPF_LDX_MEM(BPF_H, BPF_REG_0, BPF_REG_1,
 				    offsetof(struct __sk_buff, hash)),
 #else
@@ -1259,7 +1260,7 @@ static struct bpf_test tests[] = {
 		"check skb->hash half load not permitted",
 		.insns = {
 			BPF_MOV64_IMM(BPF_REG_0, 0),
-#ifdef __LITTLE_ENDIAN
+#if __BYTE_ORDER == __LITTLE_ENDIAN
 			BPF_LDX_MEM(BPF_H, BPF_REG_0, BPF_REG_1,
 				    offsetof(struct __sk_buff, hash) + 2),
 #else
@@ -5422,7 +5423,7 @@ static struct bpf_test tests[] = {
 		"check bpf_perf_event_data->sample_period byte load permitted",
 		.insns = {
 			BPF_MOV64_IMM(BPF_REG_0, 0),
-#ifdef __LITTLE_ENDIAN
+#if __BYTE_ORDER == __LITTLE_ENDIAN
 			BPF_LDX_MEM(BPF_B, BPF_REG_0, BPF_REG_1,
 				    offsetof(struct bpf_perf_event_data, sample_period)),
 #else
@@ -5438,7 +5439,7 @@ static struct bpf_test tests[] = {
 		"check bpf_perf_event_data->sample_period half load permitted",
 		.insns = {
 			BPF_MOV64_IMM(BPF_REG_0, 0),
-#ifdef __LITTLE_ENDIAN
+#if __BYTE_ORDER == __LITTLE_ENDIAN
 			BPF_LDX_MEM(BPF_H, BPF_REG_0, BPF_REG_1,
 				    offsetof(struct bpf_perf_event_data, sample_period)),
 #else
@@ -5454,7 +5455,7 @@ static struct bpf_test tests[] = {
 		"check bpf_perf_event_data->sample_period word load permitted",
 		.insns = {
 			BPF_MOV64_IMM(BPF_REG_0, 0),
-#ifdef __LITTLE_ENDIAN
+#if __BYTE_ORDER == __LITTLE_ENDIAN
 			BPF_LDX_MEM(BPF_W, BPF_REG_0, BPF_REG_1,
 				    offsetof(struct bpf_perf_event_data, sample_period)),
 #else
@@ -5481,7 +5482,7 @@ static struct bpf_test tests[] = {
 		"check skb->data half load not permitted",
 		.insns = {
 			BPF_MOV64_IMM(BPF_REG_0, 0),
-#ifdef __LITTLE_ENDIAN
+#if __BYTE_ORDER == __LITTLE_ENDIAN
 			BPF_LDX_MEM(BPF_H, BPF_REG_0, BPF_REG_1,
 				    offsetof(struct __sk_buff, data)),
 #else
@@ -5497,7 +5498,7 @@ static struct bpf_test tests[] = {
 		"check skb->tc_classid half load not permitted for lwt prog",
 		.insns = {
 			BPF_MOV64_IMM(BPF_REG_0, 0),
-#ifdef __LITTLE_ENDIAN
+#if __BYTE_ORDER == __LITTLE_ENDIAN
 			BPF_LDX_MEM(BPF_H, BPF_REG_0, BPF_REG_1,
 				    offsetof(struct __sk_buff, tc_classid)),
 #else

From 978d13d60c34818a41fc35962602bdfa5c03f214 Mon Sep 17 00:00:00 2001
From: Nicholas Bellinger <nab@linux-iscsi.org>
Date: Fri, 4 Aug 2017 23:59:31 -0700
Subject: [PATCH 158/281] iscsi-target: Fix iscsi_np reset hung task during
 parallel delete

This patch fixes a bug associated with iscsit_reset_np_thread()
that can occur during parallel configfs rmdir of a single iscsi_np
used across multiple iscsi-target instances, that would result in
hung task(s) similar to below where configfs rmdir process context
was blocked indefinately waiting for iscsi_np->np_restart_comp
to finish:

[ 6726.112076] INFO: task dcp_proxy_node_:15550 blocked for more than 120 seconds.
[ 6726.119440]       Tainted: G        W  O     4.1.26-3321 #2
[ 6726.125045] "echo 0 > /proc/sys/kernel/hung_task_timeout_secs" disables this message.
[ 6726.132927] dcp_proxy_node_ D ffff8803f202bc88     0 15550      1 0x00000000
[ 6726.140058]  ffff8803f202bc88 ffff88085c64d960 ffff88083b3b1ad0 ffff88087fffeb08
[ 6726.147593]  ffff8803f202c000 7fffffffffffffff ffff88083f459c28 ffff88083b3b1ad0
[ 6726.155132]  ffff88035373c100 ffff8803f202bca8 ffffffff8168ced2 ffff8803f202bcb8
[ 6726.162667] Call Trace:
[ 6726.165150]  [<ffffffff8168ced2>] schedule+0x32/0x80
[ 6726.170156]  [<ffffffff8168f5b4>] schedule_timeout+0x214/0x290
[ 6726.176030]  [<ffffffff810caef2>] ? __send_signal+0x52/0x4a0
[ 6726.181728]  [<ffffffff8168d7d6>] wait_for_completion+0x96/0x100
[ 6726.187774]  [<ffffffff810e7c80>] ? wake_up_state+0x10/0x10
[ 6726.193395]  [<ffffffffa035d6e2>] iscsit_reset_np_thread+0x62/0xe0 [iscsi_target_mod]
[ 6726.201278]  [<ffffffffa0355d86>] iscsit_tpg_disable_portal_group+0x96/0x190 [iscsi_target_mod]
[ 6726.210033]  [<ffffffffa0363f7f>] lio_target_tpg_store_enable+0x4f/0xc0 [iscsi_target_mod]
[ 6726.218351]  [<ffffffff81260c5a>] configfs_write_file+0xaa/0x110
[ 6726.224392]  [<ffffffff811ea364>] vfs_write+0xa4/0x1b0
[ 6726.229576]  [<ffffffff811eb111>] SyS_write+0x41/0xb0
[ 6726.234659]  [<ffffffff8169042e>] system_call_fastpath+0x12/0x71

It would happen because each iscsit_reset_np_thread() sets state
to ISCSI_NP_THREAD_RESET, sends SIGINT, and then blocks waiting
for completion on iscsi_np->np_restart_comp.

However, if iscsi_np was active processing a login request and
more than a single iscsit_reset_np_thread() caller to the same
iscsi_np was blocked on iscsi_np->np_restart_comp, iscsi_np
kthread process context in __iscsi_target_login_thread() would
flush pending signals and only perform a single completion of
np->np_restart_comp before going back to sleep within transport
specific iscsit_transport->iscsi_accept_np code.

To address this bug, add a iscsi_np->np_reset_count and update
__iscsi_target_login_thread() to keep completing np->np_restart_comp
until ->np_reset_count has reached zero.

Reported-by: Gary Guo <ghg@datera.io>
Tested-by: Gary Guo <ghg@datera.io>
Cc: Mike Christie <mchristi@redhat.com>
Cc: Hannes Reinecke <hare@suse.de>
Cc: stable@vger.kernel.org # 3.10+
Signed-off-by: Nicholas Bellinger <nab@linux-iscsi.org>
---
 drivers/target/iscsi/iscsi_target.c       | 1 +
 drivers/target/iscsi/iscsi_target_login.c | 7 +++++--
 include/target/iscsi/iscsi_target_core.h  | 1 +
 3 files changed, 7 insertions(+), 2 deletions(-)

diff --git a/drivers/target/iscsi/iscsi_target.c b/drivers/target/iscsi/iscsi_target.c
index 12803de99400..5001261f5d69 100644
--- a/drivers/target/iscsi/iscsi_target.c
+++ b/drivers/target/iscsi/iscsi_target.c
@@ -418,6 +418,7 @@ int iscsit_reset_np_thread(
 		return 0;
 	}
 	np->np_thread_state = ISCSI_NP_THREAD_RESET;
+	atomic_inc(&np->np_reset_count);
 
 	if (np->np_thread) {
 		spin_unlock_bh(&np->np_thread_lock);
diff --git a/drivers/target/iscsi/iscsi_target_login.c b/drivers/target/iscsi/iscsi_target_login.c
index e9bdc8b86e7d..dc13afbd4c88 100644
--- a/drivers/target/iscsi/iscsi_target_login.c
+++ b/drivers/target/iscsi/iscsi_target_login.c
@@ -1243,9 +1243,11 @@ static int __iscsi_target_login_thread(struct iscsi_np *np)
 	flush_signals(current);
 
 	spin_lock_bh(&np->np_thread_lock);
-	if (np->np_thread_state == ISCSI_NP_THREAD_RESET) {
+	if (atomic_dec_if_positive(&np->np_reset_count) >= 0) {
 		np->np_thread_state = ISCSI_NP_THREAD_ACTIVE;
+		spin_unlock_bh(&np->np_thread_lock);
 		complete(&np->np_restart_comp);
+		return 1;
 	} else if (np->np_thread_state == ISCSI_NP_THREAD_SHUTDOWN) {
 		spin_unlock_bh(&np->np_thread_lock);
 		goto exit;
@@ -1278,7 +1280,8 @@ static int __iscsi_target_login_thread(struct iscsi_np *np)
 		goto exit;
 	} else if (rc < 0) {
 		spin_lock_bh(&np->np_thread_lock);
-		if (np->np_thread_state == ISCSI_NP_THREAD_RESET) {
+		if (atomic_dec_if_positive(&np->np_reset_count) >= 0) {
+			np->np_thread_state = ISCSI_NP_THREAD_ACTIVE;
 			spin_unlock_bh(&np->np_thread_lock);
 			complete(&np->np_restart_comp);
 			iscsit_put_transport(conn->conn_transport);
diff --git a/include/target/iscsi/iscsi_target_core.h b/include/target/iscsi/iscsi_target_core.h
index 0ca1fb08805b..fb87d32f5e51 100644
--- a/include/target/iscsi/iscsi_target_core.h
+++ b/include/target/iscsi/iscsi_target_core.h
@@ -786,6 +786,7 @@ struct iscsi_np {
 	int			np_sock_type;
 	enum np_thread_state_table np_thread_state;
 	bool                    enabled;
+	atomic_t		np_reset_count;
 	enum iscsi_timer_flags_table np_login_timer_flags;
 	u32			np_exports;
 	enum np_flags_table	np_flags;

From 732e49850c5e15231e11a0a464748bcbade5e3c2 Mon Sep 17 00:00:00 2001
From: stephen hemminger <stephen@networkplumber.org>
Date: Thu, 3 Aug 2017 17:13:54 -0700
Subject: [PATCH 159/281] netvsc: fix race on sub channel creation

The existing sub channel code did not wait for all the sub-channels
to completely initialize. This could lead to race causing crash
in napi_netif_del() from bad list. The existing code would send
an init message, then wait only for the initial response that
the init message was received. It thought it was waiting for
sub channels but really the init response did the wakeup.

The new code keeps track of the number of open channels and
waits until that many are open.

Other issues here were:
  * host might return less sub-channels than was requested.
  * the new init status is not valid until after init was completed.

Fixes: b3e6b82a0099 ("hv_netvsc: Wait for sub-channels to be processed during probe")
Signed-off-by: Stephen Hemminger <sthemmin@microsoft.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/hyperv/hyperv_net.h   |  3 ++-
 drivers/net/hyperv/netvsc.c       |  1 +
 drivers/net/hyperv/rndis_filter.c | 14 ++++++++------
 3 files changed, 11 insertions(+), 7 deletions(-)

diff --git a/drivers/net/hyperv/hyperv_net.h b/drivers/net/hyperv/hyperv_net.h
index d6c25580f8dd..12cc64bfcff8 100644
--- a/drivers/net/hyperv/hyperv_net.h
+++ b/drivers/net/hyperv/hyperv_net.h
@@ -765,7 +765,8 @@ struct netvsc_device {
 	u32 max_chn;
 	u32 num_chn;
 
-	refcount_t sc_offered;
+	atomic_t open_chn;
+	wait_queue_head_t subchan_open;
 
 	struct rndis_device *extension;
 
diff --git a/drivers/net/hyperv/netvsc.c b/drivers/net/hyperv/netvsc.c
index 96f90c75d1b7..d18c3326a1f7 100644
--- a/drivers/net/hyperv/netvsc.c
+++ b/drivers/net/hyperv/netvsc.c
@@ -78,6 +78,7 @@ static struct netvsc_device *alloc_net_device(void)
 	net_device->max_pkt = RNDIS_MAX_PKT_DEFAULT;
 	net_device->pkt_align = RNDIS_PKT_ALIGN_DEFAULT;
 	init_completion(&net_device->channel_init_wait);
+	init_waitqueue_head(&net_device->subchan_open);
 
 	return net_device;
 }
diff --git a/drivers/net/hyperv/rndis_filter.c b/drivers/net/hyperv/rndis_filter.c
index 85c00e1c52b6..d6308ffda53e 100644
--- a/drivers/net/hyperv/rndis_filter.c
+++ b/drivers/net/hyperv/rndis_filter.c
@@ -1048,8 +1048,8 @@ static void netvsc_sc_open(struct vmbus_channel *new_sc)
 	else
 		netif_napi_del(&nvchan->napi);
 
-	if (refcount_dec_and_test(&nvscdev->sc_offered))
-		complete(&nvscdev->channel_init_wait);
+	atomic_inc(&nvscdev->open_chn);
+	wake_up(&nvscdev->subchan_open);
 }
 
 int rndis_filter_device_add(struct hv_device *dev,
@@ -1090,8 +1090,6 @@ int rndis_filter_device_add(struct hv_device *dev,
 	net_device->max_chn = 1;
 	net_device->num_chn = 1;
 
-	refcount_set(&net_device->sc_offered, 0);
-
 	net_device->extension = rndis_device;
 	rndis_device->ndev = net;
 
@@ -1221,11 +1219,11 @@ int rndis_filter_device_add(struct hv_device *dev,
 		rndis_device->ind_table[i] = ethtool_rxfh_indir_default(i,
 							net_device->num_chn);
 
+	atomic_set(&net_device->open_chn, 1);
 	num_rss_qs = net_device->num_chn - 1;
 	if (num_rss_qs == 0)
 		return 0;
 
-	refcount_set(&net_device->sc_offered, num_rss_qs);
 	vmbus_set_sc_create_callback(dev->channel, netvsc_sc_open);
 
 	init_packet = &net_device->channel_init_pkt;
@@ -1242,15 +1240,19 @@ int rndis_filter_device_add(struct hv_device *dev,
 	if (ret)
 		goto out;
 
+	wait_for_completion(&net_device->channel_init_wait);
 	if (init_packet->msg.v5_msg.subchn_comp.status != NVSP_STAT_SUCCESS) {
 		ret = -ENODEV;
 		goto out;
 	}
-	wait_for_completion(&net_device->channel_init_wait);
 
 	net_device->num_chn = 1 +
 		init_packet->msg.v5_msg.subchn_comp.num_subchannels;
 
+	/* wait for all sub channels to open */
+	wait_event(net_device->subchan_open,
+		   atomic_read(&net_device->open_chn) == net_device->num_chn);
+
 	/* ignore failues from setting rss parameters, still have channels */
 	rndis_filter_set_rss_param(rndis_device, netvsc_hash_key,
 				   net_device->num_chn);

From d6086598d34e1cf9091c7be201f5b2041dc6203e Mon Sep 17 00:00:00 2001
From: Xiong Zhang <xiong.y.zhang@intel.com>
Date: Wed, 2 Aug 2017 10:31:01 +0800
Subject: [PATCH 160/281] drm/i915/gvt: Change the max length of mmio_reg_rw
 from 4 to 8

When linux guest access mmio with __raw_i915_read64 or __raw_i915_write64,
its length is 8 bytes.

This fix the linux guest in xengt couldn't boot up as it fail in
reading pv_info->magic.

Fixes: 65f9f6febf12 ("drm/i915/gvt: Optimize MMIO register handling for some large MMIO blocks")
Signed-off-by: Xiong Zhang <xiong.y.zhang@intel.com>
Signed-off-by: Zhenyu Wang <zhenyuw@linux.intel.com>
---
 drivers/gpu/drm/i915/gvt/handlers.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/gpu/drm/i915/gvt/handlers.c b/drivers/gpu/drm/i915/gvt/handlers.c
index 323664a238f5..feed9921b3b3 100644
--- a/drivers/gpu/drm/i915/gvt/handlers.c
+++ b/drivers/gpu/drm/i915/gvt/handlers.c
@@ -3028,7 +3028,7 @@ int intel_vgpu_mmio_reg_rw(struct intel_vgpu *vgpu, unsigned int offset,
 	gvt_mmio_func func;
 	int ret;
 
-	if (WARN_ON(bytes > 4))
+	if (WARN_ON(bytes > 8))
 		return -EINVAL;
 
 	/*

From 5279fc7724ae3a82c9cfe5b09c1fb07ff0e41056 Mon Sep 17 00:00:00 2001
From: Maarten Lankhorst <maarten.lankhorst@linux.intel.com>
Date: Mon, 24 Jul 2017 11:14:31 +0200
Subject: [PATCH 161/281] drm/i915: Fix out-of-bounds array access in
 bdw_load_gamma_lut

bdw_load_gamma_lut is writing beyond the array to the maximum value.
The intend of the function is to clamp values > 1 to 1, so write
the intended color to the max register.

This fixes the following KASAN warning:

[  197.020857] [IGT] kms_pipe_color: executing
[  197.063434] [IGT] kms_pipe_color: starting subtest ctm-0-25-pipe0
[  197.078989] ==================================================================
[  197.079127] BUG: KASAN: slab-out-of-bounds in bdw_load_gamma_lut.isra.2+0x3b9/0x570 [i915]
[  197.079188] Read of size 2 at addr ffff8800d38db150 by task kms_pipe_color/1839
[  197.079208] CPU: 2 PID: 1839 Comm: kms_pipe_color Tainted: G     U 4.13.0-rc1-patser+ #5211
[  197.079215] Hardware name: NUC5i7RYB, BIOS RYBDWi35.86A.0246.2015.0309.1355 03/09/2015
[  197.079220] Call Trace:
[  197.079230]  dump_stack+0x68/0x9e
[  197.079239]  print_address_description+0x6f/0x250
[  197.079251]  kasan_report+0x216/0x370
[  197.079374]  ? bdw_load_gamma_lut.isra.2+0x3b9/0x570 [i915]
[  197.079451]  ? gen8_write16+0x4e0/0x4e0 [i915]
[  197.079460]  __asan_report_load2_noabort+0x14/0x20
[  197.079535]  bdw_load_gamma_lut.isra.2+0x3b9/0x570 [i915]
[  197.079612]  broadwell_load_luts+0x1df/0x550 [i915]
[  197.079690]  intel_color_load_luts+0x7b/0x80 [i915]
[  197.079764]  intel_begin_crtc_commit+0x138/0x760 [i915]
[  197.079783]  drm_atomic_helper_commit_planes_on_crtc+0x1a3/0x820 [drm_kms_helper]
[  197.079859]  ? intel_pre_plane_update+0x571/0x580 [i915]
[  197.079937]  intel_update_crtc+0x238/0x330 [i915]
[  197.080016]  intel_update_crtcs+0x10f/0x210 [i915]
[  197.080092]  intel_atomic_commit_tail+0x1552/0x3340 [i915]
[  197.080101]  ? _raw_spin_unlock+0x3c/0x40
[  197.080110]  ? __queue_work+0xb40/0xbf0
[  197.080188]  ? skl_update_crtcs+0xc00/0xc00 [i915]
[  197.080195]  ? trace_hardirqs_on+0xd/0x10
[  197.080269]  ? intel_atomic_commit_ready+0x128/0x13c [i915]
[  197.080329]  ? __i915_sw_fence_complete+0x5b8/0x6d0 [i915]
[  197.080336]  ? debug_object_activate+0x39e/0x580
[  197.080397]  ? i915_sw_fence_await+0x30/0x30 [i915]
[  197.080409]  ? __might_sleep+0x15b/0x180
[  197.080483]  intel_atomic_commit+0x944/0xa70 [i915]
[  197.080490]  ? refcount_dec_and_test+0x11/0x20
[  197.080567]  ? intel_atomic_commit_tail+0x3340/0x3340 [i915]
[  197.080597]  ? drm_atomic_crtc_set_property+0x303/0x580 [drm]
[  197.080674]  ? intel_atomic_commit_tail+0x3340/0x3340 [i915]
[  197.080704]  drm_atomic_commit+0xd7/0xe0 [drm]
[  197.080722]  drm_atomic_helper_crtc_set_property+0xec/0x130 [drm_kms_helper]
[  197.080749]  drm_mode_crtc_set_obj_prop+0x7d/0xb0 [drm]
[  197.080775]  drm_mode_obj_set_property_ioctl+0x50b/0x5d0 [drm]
[  197.080783]  ? __might_fault+0x104/0x180
[  197.080809]  ? drm_mode_obj_find_prop_id+0x160/0x160 [drm]
[  197.080838]  ? drm_mode_obj_find_prop_id+0x160/0x160 [drm]
[  197.080861]  drm_ioctl_kernel+0x154/0x1a0 [drm]
[  197.080885]  drm_ioctl+0x624/0x8f0 [drm]
[  197.080910]  ? drm_mode_obj_find_prop_id+0x160/0x160 [drm]
[  197.080934]  ? drm_getunique+0x210/0x210 [drm]
[  197.080943]  ? __handle_mm_fault+0x1bd0/0x1ce0
[  197.080949]  ? lock_downgrade+0x610/0x610
[  197.080957]  ? __lru_cache_add+0x15a/0x180
[  197.080967]  do_vfs_ioctl+0xd92/0xe40
[  197.080975]  ? ioctl_preallocate+0x1b0/0x1b0
[  197.080982]  ? selinux_capable+0x20/0x20
[  197.080991]  ? __do_page_fault+0x7b7/0x9a0
[  197.080997]  ? lock_downgrade+0x5bb/0x610
[  197.081007]  ? security_file_ioctl+0x57/0x90
[  197.081016]  SyS_ioctl+0x4e/0x80
[  197.081024]  entry_SYSCALL_64_fastpath+0x18/0xad
[  197.081030] RIP: 0033:0x7f61f287a987
[  197.081035] RSP: 002b:00007fff7d44d188 EFLAGS: 00000246 ORIG_RAX: 0000000000000010
[  197.081043] RAX: ffffffffffffffda RBX: 0000000000000000 RCX: 00007f61f287a987
[  197.081048] RDX: 00007fff7d44d1c0 RSI: 00000000c01864ba RDI: 0000000000000003
[  197.081053] RBP: 00007f61f2b3eb00 R08: 0000000000000059 R09: 0000000000000000
[  197.081058] R10: 0000002ea5c4a290 R11: 0000000000000246 R12: 00007f61f2b3eb58
[  197.081063] R13: 0000000000001010 R14: 00007f61f2b3eb58 R15: 0000000000002702

Bugzilla: https://bugs.freedesktop.org/show_bug.cgi?id=101659
Signed-off-by: Maarten Lankhorst <maarten.lankhorst@linux.intel.com>
Reported-by: Martin Peres <martin.peres@linux.intel.com>
Cc: Martin Peres <martin.peres@linux.intel.com>
Fixes: 82cf435b3134 ("drm/i915: Implement color management on bdw/skl/bxt/kbl")
Cc: Shashank Sharma <shashank.sharma@intel.com>
Cc: Kiran S Kumar <kiran.s.kumar@intel.com>
Cc: Kausal Malladi <kausalmalladi@gmail.com>
Cc: Lionel Landwerlin <lionel.g.landwerlin@intel.com>
Cc: Matt Roper <matthew.d.roper@intel.com>
Cc: Daniel Vetter <daniel.vetter@intel.com>
Cc: Jani Nikula <jani.nikula@linux.intel.com>
Cc: intel-gfx@lists.freedesktop.org
Cc: <stable@vger.kernel.org> # v4.7+
Link: https://patchwork.freedesktop.org/patch/msgid/20170724091431.24251-1-maarten.lankhorst@linux.intel.com
Reviewed-by: Lionel Landwerlin <lionel.g.landwerlin@intel.com>
(cherry picked from commit 09a92bc8773b4314e02b478e003fe5936ce85adb)
Signed-off-by: Jani Nikula <jani.nikula@intel.com>
---
 drivers/gpu/drm/i915/intel_color.c | 1 +
 1 file changed, 1 insertion(+)

diff --git a/drivers/gpu/drm/i915/intel_color.c b/drivers/gpu/drm/i915/intel_color.c
index 306c6b06b330..17c4ae7e4e7c 100644
--- a/drivers/gpu/drm/i915/intel_color.c
+++ b/drivers/gpu/drm/i915/intel_color.c
@@ -398,6 +398,7 @@ static void bdw_load_gamma_lut(struct drm_crtc_state *state, u32 offset)
 		}
 
 		/* Program the max register to clamp values > 1.0. */
+		i = lut_size - 1;
 		I915_WRITE(PREC_PAL_GC_MAX(pipe, 0),
 			   drm_color_lut_extract(lut[i].red, 16));
 		I915_WRITE(PREC_PAL_GC_MAX(pipe, 1),

From 00e06297b351d286199c6cf542ea70b8fbabafee Mon Sep 17 00:00:00 2001
From: Thomas Petazzoni <thomas.petazzoni@free-electrons.com>
Date: Thu, 3 Aug 2017 21:16:15 +0200
Subject: [PATCH 162/281] MIPS: mm: remove duplicate "const" qualifier on
 insn_table
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Fixes the following gcc 7.x build error:

arch/mips/mm/uasm-mips.c:51:26: error: duplicate ‘const’ declaration specifier [-Werror=duplicate-decl-specifier]
 static const struct insn const insn_table[insn_invalid] = {

Signed-off-by: Thomas Petazzoni <thomas.petazzoni@free-electrons.com>
Fixes: ce807d5f67ed ("MIPS: Optimize uasm insn lookup.")
Cc: David Daney <david.daney@cavium.com>
Cc: linux-mips@linux-mips.org
Cc: linux-kernel@vger.kernel.org
Patchwork: https://patchwork.linux-mips.org/patch/16926/
Signed-off-by: Ralf Baechle <ralf@linux-mips.org>
---
 arch/mips/mm/uasm-mips.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/arch/mips/mm/uasm-mips.c b/arch/mips/mm/uasm-mips.c
index 3f74f6c1f065..9fea6c6bbf49 100644
--- a/arch/mips/mm/uasm-mips.c
+++ b/arch/mips/mm/uasm-mips.c
@@ -48,7 +48,7 @@
 
 #include "uasm.c"
 
-static const struct insn const insn_table[insn_invalid] = {
+static const struct insn insn_table[insn_invalid] = {
 	[insn_addiu]	= {M(addiu_op, 0, 0, 0, 0, 0), RS | RT | SIMM},
 	[insn_addu]	= {M(spec_op, 0, 0, 0, 0, addu_op), RS | RT | RD},
 	[insn_and]	= {M(spec_op, 0, 0, 0, 0, and_op), RS | RT | RD},

From 6f542ebeaee0ee552a902ce3892220fc22c7ec8e Mon Sep 17 00:00:00 2001
From: Matija Glavinic Pecotic <matija.glavinic-pecotic.ext@nokia.com>
Date: Thu, 3 Aug 2017 08:20:22 +0200
Subject: [PATCH 163/281] MIPS: Fix race on setting and getting cpu_online_mask

While testing cpu hoptlug (cpu down and up in loops) on kernel 4.4, it was
observed that occasionally check for cpu online will fail in kernel/cpu.c,
_cpu_up:

https://git.kernel.org/pub/scm/linux/kernel/git/stable/linux-stable.git/tree/kernel/cpu.c?h=v4.4.79#n485
 518        /* Arch-specific enabling code. */
 519        ret = __cpu_up(cpu, idle);
 520
 521        if (ret != 0)
 522                goto out_notify;
 523        BUG_ON(!cpu_online(cpu));

Reason is race between start_secondary and _cpu_up. cpu_callin_map is set
before cpu_online_mask. In __cpu_up, cpu_callin_map is waited for, but cpu
online mask is not, resulting in race in which secondary processor started
and set cpu_callin_map, but not yet set the online mask,resulting in above
BUG being hit.

Upstream differs in the area. cpu_online check is in bringup_wait_for_ap,
which is after cpu reached AP_ONLINE_IDLE,where secondary passed its start
function. Nonetheless, fix makes start_secondary safe and not depending on
other locks throughout the code. It protects as well against cpu_online
checks put in between sometimes in the future.

Fix this by moving completion after all flags are set.

Signed-off-by: Matija Glavinic Pecotic <matija.glavinic-pecotic.ext@nokia.com>
Cc: Alexander Sverdlin <alexander.sverdlin@nokia.com>
Cc: linux-mips@linux-mips.org
Patchwork: https://patchwork.linux-mips.org/patch/16925/
Signed-off-by: Ralf Baechle <ralf@linux-mips.org>
---
 arch/mips/kernel/smp.c | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/arch/mips/kernel/smp.c b/arch/mips/kernel/smp.c
index 770d4d1516cb..6bace7695788 100644
--- a/arch/mips/kernel/smp.c
+++ b/arch/mips/kernel/smp.c
@@ -376,9 +376,6 @@ asmlinkage void start_secondary(void)
 	cpumask_set_cpu(cpu, &cpu_coherent_mask);
 	notify_cpu_starting(cpu);
 
-	complete(&cpu_running);
-	synchronise_count_slave(cpu);
-
 	set_cpu_online(cpu, true);
 
 	set_cpu_sibling_map(cpu);
@@ -386,6 +383,9 @@ asmlinkage void start_secondary(void)
 
 	calculate_cpu_foreign_map();
 
+	complete(&cpu_running);
+	synchronise_count_slave(cpu);
+
 	/*
 	 * irq will be enabled in ->smp_finish(), enabling it too early
 	 * is dangerous.

From 33a73649acc412848996ae0921d577f33eb54af5 Mon Sep 17 00:00:00 2001
From: Bartosz Golaszewski <brgl@bgdev.pl>
Date: Wed, 2 Aug 2017 17:04:04 +0200
Subject: [PATCH 164/281] MIPS: gitignore: ignore generated .c files

Add ashldi3.c and bswapsi.c to the list of ignored files.

Signed-off-by: Bartosz Golaszewski <brgl@bgdev.pl>
Reviewed-by: James Hogan <james.hogan@imgtec.com>
Cc: linux-mips@linux-mips.org
Cc: linux-kernel@vger.kernel.org
Patchwork: https://patchwork.linux-mips.org/patch/16905/
Signed-off-by: Ralf Baechle <ralf@linux-mips.org>
---
 arch/mips/boot/compressed/.gitignore | 2 ++
 1 file changed, 2 insertions(+)
 create mode 100644 arch/mips/boot/compressed/.gitignore

diff --git a/arch/mips/boot/compressed/.gitignore b/arch/mips/boot/compressed/.gitignore
new file mode 100644
index 000000000000..ebae133f1d00
--- /dev/null
+++ b/arch/mips/boot/compressed/.gitignore
@@ -0,0 +1,2 @@
+ashldi3.c
+bswapsi.c

From 81a67e52763d1db6b3200c648d1efa16daddc536 Mon Sep 17 00:00:00 2001
From: "Steven J. Hill" <Steven.Hill@cavium.com>
Date: Wed, 2 Aug 2017 12:39:28 -0500
Subject: [PATCH 165/281] MIPS: Octeon: Fix broken EDAC driver.

Commit "MIPS: Octeon: Remove unused L2C types and macros." broke the
the EDAC driver. Bring back 'cvmx-l2d-defs.h' file and the missing
types for L2C. Fixes: 15f6847923a8 ("MIPS: Octeon: Remove unused L2C
types and macros.")

Fixes: 15f6847923a8 ("MIPS: Octeon: Remove unused L2C types and macros.")
Signed-off-by: Steven J. Hill <steven.hill@cavium.com>
Reviewed-by: James Hogan <james.hogan@imgtec.com>
Cc: linux-mips@linux-mips.org
Cc: <stable@vger.kernel.org> # 4.12+
Patchwork: https://patchwork.linux-mips.org/patch/16906/
Signed-off-by: Ralf Baechle <ralf@linux-mips.org>
---
 arch/mips/include/asm/octeon/cvmx-l2c-defs.h | 37 +++++++++++-
 arch/mips/include/asm/octeon/cvmx-l2d-defs.h | 60 ++++++++++++++++++++
 arch/mips/include/asm/octeon/cvmx.h          |  1 +
 3 files changed, 97 insertions(+), 1 deletion(-)
 create mode 100644 arch/mips/include/asm/octeon/cvmx-l2d-defs.h

diff --git a/arch/mips/include/asm/octeon/cvmx-l2c-defs.h b/arch/mips/include/asm/octeon/cvmx-l2c-defs.h
index d045973ddb33..3ea84acf1814 100644
--- a/arch/mips/include/asm/octeon/cvmx-l2c-defs.h
+++ b/arch/mips/include/asm/octeon/cvmx-l2c-defs.h
@@ -33,6 +33,10 @@
 #define CVMX_L2C_DBG (CVMX_ADD_IO_SEG(0x0001180080000030ull))
 #define CVMX_L2C_CFG (CVMX_ADD_IO_SEG(0x0001180080000000ull))
 #define CVMX_L2C_CTL (CVMX_ADD_IO_SEG(0x0001180080800000ull))
+#define CVMX_L2C_ERR_TDTX(block_id)					       \
+	(CVMX_ADD_IO_SEG(0x0001180080A007E0ull) + ((block_id) & 3) * 0x40000ull)
+#define CVMX_L2C_ERR_TTGX(block_id)					       \
+	(CVMX_ADD_IO_SEG(0x0001180080A007E8ull) + ((block_id) & 3) * 0x40000ull)
 #define CVMX_L2C_LCKBASE (CVMX_ADD_IO_SEG(0x0001180080000058ull))
 #define CVMX_L2C_LCKOFF (CVMX_ADD_IO_SEG(0x0001180080000060ull))
 #define CVMX_L2C_PFCTL (CVMX_ADD_IO_SEG(0x0001180080000090ull))
@@ -66,9 +70,40 @@
 		((offset) & 1) * 8)
 #define CVMX_L2C_WPAR_PPX(offset) (CVMX_ADD_IO_SEG(0x0001180080840000ull)    + \
 		((offset) & 31) * 8)
-#define CVMX_L2D_FUS3 (CVMX_ADD_IO_SEG(0x00011800800007B8ull))
 
 
+union cvmx_l2c_err_tdtx {
+	uint64_t u64;
+	struct cvmx_l2c_err_tdtx_s {
+		__BITFIELD_FIELD(uint64_t dbe:1,
+		__BITFIELD_FIELD(uint64_t sbe:1,
+		__BITFIELD_FIELD(uint64_t vdbe:1,
+		__BITFIELD_FIELD(uint64_t vsbe:1,
+		__BITFIELD_FIELD(uint64_t syn:10,
+		__BITFIELD_FIELD(uint64_t reserved_22_49:28,
+		__BITFIELD_FIELD(uint64_t wayidx:18,
+		__BITFIELD_FIELD(uint64_t reserved_2_3:2,
+		__BITFIELD_FIELD(uint64_t type:2,
+		;)))))))))
+	} s;
+};
+
+union cvmx_l2c_err_ttgx {
+	uint64_t u64;
+	struct cvmx_l2c_err_ttgx_s {
+		__BITFIELD_FIELD(uint64_t dbe:1,
+		__BITFIELD_FIELD(uint64_t sbe:1,
+		__BITFIELD_FIELD(uint64_t noway:1,
+		__BITFIELD_FIELD(uint64_t reserved_56_60:5,
+		__BITFIELD_FIELD(uint64_t syn:6,
+		__BITFIELD_FIELD(uint64_t reserved_22_49:28,
+		__BITFIELD_FIELD(uint64_t wayidx:15,
+		__BITFIELD_FIELD(uint64_t reserved_2_6:5,
+		__BITFIELD_FIELD(uint64_t type:2,
+		;)))))))))
+	} s;
+};
+
 union cvmx_l2c_cfg {
 	uint64_t u64;
 	struct cvmx_l2c_cfg_s {
diff --git a/arch/mips/include/asm/octeon/cvmx-l2d-defs.h b/arch/mips/include/asm/octeon/cvmx-l2d-defs.h
new file mode 100644
index 000000000000..a951ad5d65ad
--- /dev/null
+++ b/arch/mips/include/asm/octeon/cvmx-l2d-defs.h
@@ -0,0 +1,60 @@
+/***********************license start***************
+ * Author: Cavium Networks
+ *
+ * Contact: support@caviumnetworks.com
+ * This file is part of the OCTEON SDK
+ *
+ * Copyright (c) 2003-2017 Cavium, Inc.
+ *
+ * This file is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License, Version 2, as
+ * published by the Free Software Foundation.
+ *
+ * This file is distributed in the hope that it will be useful, but
+ * AS-IS and WITHOUT ANY WARRANTY; without even the implied warranty
+ * of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE, TITLE, or
+ * NONINFRINGEMENT.  See the GNU General Public License for more
+ * details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this file; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
+ * or visit http://www.gnu.org/licenses/.
+ *
+ * This file may also be available under a different license from Cavium.
+ * Contact Cavium Networks for more information
+ ***********************license end**************************************/
+
+#ifndef __CVMX_L2D_DEFS_H__
+#define __CVMX_L2D_DEFS_H__
+
+#define CVMX_L2D_ERR	(CVMX_ADD_IO_SEG(0x0001180080000010ull))
+#define CVMX_L2D_FUS3	(CVMX_ADD_IO_SEG(0x00011800800007B8ull))
+
+
+union cvmx_l2d_err {
+	uint64_t u64;
+	struct cvmx_l2d_err_s {
+		__BITFIELD_FIELD(uint64_t reserved_6_63:58,
+		__BITFIELD_FIELD(uint64_t bmhclsel:1,
+		__BITFIELD_FIELD(uint64_t ded_err:1,
+		__BITFIELD_FIELD(uint64_t sec_err:1,
+		__BITFIELD_FIELD(uint64_t ded_intena:1,
+		__BITFIELD_FIELD(uint64_t sec_intena:1,
+		__BITFIELD_FIELD(uint64_t ecc_ena:1,
+		;)))))))
+	} s;
+};
+
+union cvmx_l2d_fus3 {
+	uint64_t u64;
+	struct cvmx_l2d_fus3_s {
+		__BITFIELD_FIELD(uint64_t reserved_40_63:24,
+		__BITFIELD_FIELD(uint64_t ema_ctl:3,
+		__BITFIELD_FIELD(uint64_t reserved_34_36:3,
+		__BITFIELD_FIELD(uint64_t q3fus:34,
+		;))))
+	} s;
+};
+
+#endif
diff --git a/arch/mips/include/asm/octeon/cvmx.h b/arch/mips/include/asm/octeon/cvmx.h
index 9742202f2a32..e638735cc3ac 100644
--- a/arch/mips/include/asm/octeon/cvmx.h
+++ b/arch/mips/include/asm/octeon/cvmx.h
@@ -62,6 +62,7 @@ enum cvmx_mips_space {
 #include <asm/octeon/cvmx-iob-defs.h>
 #include <asm/octeon/cvmx-ipd-defs.h>
 #include <asm/octeon/cvmx-l2c-defs.h>
+#include <asm/octeon/cvmx-l2d-defs.h>
 #include <asm/octeon/cvmx-l2t-defs.h>
 #include <asm/octeon/cvmx-led-defs.h>
 #include <asm/octeon/cvmx-mio-defs.h>

From bed58464303fabdbf126544cbdd95a9e1c2e9949 Mon Sep 17 00:00:00 2001
From: "Steven J. Hill" <steven.hill@cavium.com>
Date: Wed, 2 Aug 2017 12:39:50 -0500
Subject: [PATCH 166/281] MIPS: OCTEON: Fix USB platform code breakage.
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Fix build error when CONFIG_SMP is turned off:

CC [M]  arch/mips/cavium-octeon/octeon-usb.o
arch/mips/cavium-octeon/octeon-usb.c: In function
‘dwc3_octeon_device_init’:
arch/mips/cavium-octeon/octeon-usb.c:540:4: error: implicit declaration
of function ‘devm_iounmap’ [-Werror=implicit-function-declaration]
     devm_iounmap(&pdev->dev, base);

Signed-off-by: Steven J. Hill <steven.hill@cavium.com>
Reviewed-by: James Hogan <james.hogan@imgtec.com>
Tested-by: Matt Redfearn <matt.redfearn@imgtec.com>
Cc: linux-mips@linux-mips.org
Patchwork: https://patchwork.linux-mips.org/patch/16907/
Signed-off-by: Ralf Baechle <ralf@linux-mips.org>
---
 arch/mips/cavium-octeon/octeon-usb.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/arch/mips/cavium-octeon/octeon-usb.c b/arch/mips/cavium-octeon/octeon-usb.c
index 542be1cd0f32..bfdfaf32d2c4 100644
--- a/arch/mips/cavium-octeon/octeon-usb.c
+++ b/arch/mips/cavium-octeon/octeon-usb.c
@@ -13,9 +13,9 @@
 #include <linux/mutex.h>
 #include <linux/delay.h>
 #include <linux/of_platform.h>
+#include <linux/io.h>
 
 #include <asm/octeon/octeon.h>
-#include <asm/octeon/cvmx-gpio-defs.h>
 
 /* USB Control Register */
 union cvm_usbdrd_uctl_ctl {

From ae5b0675942ab30cde96099c68a2290bd1aafcca Mon Sep 17 00:00:00 2001
From: Paul Burton <paul.burton@imgtec.com>
Date: Tue, 1 Aug 2017 13:32:57 -0700
Subject: [PATCH 167/281] Revert "MIPS: Don't unnecessarily include kmalloc.h
 into <asm/cache.h>."

Commit 296e46db0073 ("MIPS: Don't unnecessarily include kmalloc.h into
<asm/cache.h>.") claimed that the inclusion of the machine's kmalloc.h
from asm/cache.h is unnecessary, but this is not true.

Without including kmalloc.h we don't get a definition for
ARCH_DMA_MINALIGN, which means we no longer suitably align DMA. Further
to this the definition of ARCH_KMALLOC_MINALIGN provided by linux/slab.h
ends up being set to the alignment of an unsigned long long value rather
than to ARCH_DMA_MINALIGN, which means that buffers allocated using
kmalloc may no longer be safely aligned for use with DMA.

Fix this by re-adding the include of kmalloc.h in asm/cache.h. This
reverts commit 296e46db0073 ("MIPS: Don't unnecessarily include
kmalloc.h into <asm/cache.h>.")

Signed-off-by: Paul Burton <paul.burton@imgtec.com>
Fixes: 296e46db0073 ("MIPS: Don't unnecessarily include kmalloc.h into <asm/cache.h>.")
Cc: linux-mips@linux-mips.org
Cc: stable <stable@vger.kernel.org> # v4.12+
Patchwork: https://patchwork.linux-mips.org/patch/16895/
Signed-off-by: Ralf Baechle <ralf@linux-mips.org>
---
 arch/mips/include/asm/cache.h | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/arch/mips/include/asm/cache.h b/arch/mips/include/asm/cache.h
index fc67947ed658..8b14c2706aa5 100644
--- a/arch/mips/include/asm/cache.h
+++ b/arch/mips/include/asm/cache.h
@@ -9,6 +9,8 @@
 #ifndef _ASM_CACHE_H
 #define _ASM_CACHE_H
 
+#include <kmalloc.h>
+
 #define L1_CACHE_SHIFT		CONFIG_MIPS_L1_CACHE_SHIFT
 #define L1_CACHE_BYTES		(1 << L1_CACHE_SHIFT)
 

From b5fa57ddc4a2492441a1391f07d5c8a282271249 Mon Sep 17 00:00:00 2001
From: Lionel Landwerlin <lionel.g.landwerlin@intel.com>
Date: Thu, 3 Aug 2017 17:58:07 +0100
Subject: [PATCH 168/281] drm/i915/perf: fix flex eu registers programming

We were reserving fewer dwords in the ring than necessary. Indeed
we're always writing all registers once, so discard the actual number
of registers given by the user and just program the whitelisted ones
once.

Fixes: 19f81df2859e ("drm/i915/perf: Add OA unit support for Gen 8+")
Reported-by: Matthew Auld <matthew.william.auld@gmail.com>
Signed-off-by: Lionel Landwerlin <lionel.g.landwerlin@intel.com>
Reviewed-by: Matthew Auld <matthew.auld@intel.com>
Cc: <stable@vger.kernel.org> # v4.12+
Link: https://patchwork.freedesktop.org/patch/msgid/20170803165812.2373-6-lionel.g.landwerlin@intel.com
(cherry picked from commit 01d928e9a1644eb2e28f684905f888e700c7b9dc)
Signed-off-by: Jani Nikula <jani.nikula@intel.com>
---
 drivers/gpu/drm/i915/i915_perf.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/drivers/gpu/drm/i915/i915_perf.c b/drivers/gpu/drm/i915/i915_perf.c
index 9cd22f83b0cf..f33d90226704 100644
--- a/drivers/gpu/drm/i915/i915_perf.c
+++ b/drivers/gpu/drm/i915/i915_perf.c
@@ -1601,11 +1601,11 @@ static int gen8_emit_oa_config(struct drm_i915_gem_request *req)
 	u32 *cs;
 	int i;
 
-	cs = intel_ring_begin(req, n_flex_regs * 2 + 4);
+	cs = intel_ring_begin(req, ARRAY_SIZE(flex_mmio) * 2 + 4);
 	if (IS_ERR(cs))
 		return PTR_ERR(cs);
 
-	*cs++ = MI_LOAD_REGISTER_IMM(n_flex_regs + 1);
+	*cs++ = MI_LOAD_REGISTER_IMM(ARRAY_SIZE(flex_mmio) + 1);
 
 	*cs++ = i915_mmio_reg_offset(GEN8_OACTXCONTROL);
 	*cs++ = (dev_priv->perf.oa.period_exponent << GEN8_OA_TIMER_PERIOD_SHIFT) |

From cd82f37a9ddaaafb33d8bc3f44857edbad5d52bf Mon Sep 17 00:00:00 2001
From: Chris Wilson <chris@chris-wilson.co.uk>
Date: Fri, 4 Aug 2017 11:41:35 +0100
Subject: [PATCH 169/281] drm/i915/shrinker: Wrap need_resched() inside
 preempt-disable

In order for us to successfully detect the end of a timeslice,
preemption must be disabled. Otherwise, inside the loop we may be
preempted many times without our noticing, and each time our timeslice
will be reset, invalidating need_resched()

Reported-by: Joonas Lahtinen <joonas.lahtinen@linux.intel.com>
Reported-by: Tomi Sarvela <tomi.p.sarvela@intel.com>
Fixes: 290271de34f6 ("drm/i915: Spin for struct_mutex inside shrinker")
Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
Cc: Mika Kuoppala <mika.kuoppala@linux.intel.com>
Cc: Joonas Lahtinen <joonas.lahtinen@linux.intel.com>
Cc: <drm-intel-fixes@lists.freedesktop.org> # v4.13-rc1+
Link: https://patchwork.freedesktop.org/patch/msgid/20170804104135.26805-1-chris@chris-wilson.co.uk
Tested-by: Joonas Lahtinen <joonas.lahtinen@linux.intel.com>
Reviewed-by: Joonas Lahtinen <joonas.lahtinen@linux.intel.com>
(cherry picked from commit 6cb0c6ad9e07f2c7971c4e8e0d9b7ceba151a925)
Signed-off-by: Jani Nikula <jani.nikula@intel.com>
---
 drivers/gpu/drm/i915/i915_gem_shrinker.c | 11 ++++++++---
 1 file changed, 8 insertions(+), 3 deletions(-)

diff --git a/drivers/gpu/drm/i915/i915_gem_shrinker.c b/drivers/gpu/drm/i915/i915_gem_shrinker.c
index 1032f98add11..77fb39808131 100644
--- a/drivers/gpu/drm/i915/i915_gem_shrinker.c
+++ b/drivers/gpu/drm/i915/i915_gem_shrinker.c
@@ -43,16 +43,21 @@ static bool shrinker_lock(struct drm_i915_private *dev_priv, bool *unlock)
 		return true;
 
 	case MUTEX_TRYLOCK_FAILED:
+		*unlock = false;
+		preempt_disable();
 		do {
 			cpu_relax();
 			if (mutex_trylock(&dev_priv->drm.struct_mutex)) {
-	case MUTEX_TRYLOCK_SUCCESS:
 				*unlock = true;
-				return true;
+				break;
 			}
 		} while (!need_resched());
+		preempt_enable();
+		return *unlock;
 
-		return false;
+	case MUTEX_TRYLOCK_SUCCESS:
+		*unlock = true;
+		return true;
 	}
 
 	BUG();

From 1e2ba788787c86f527eca6ffd9adb97d691a810e Mon Sep 17 00:00:00 2001
From: Jani Nikula <jani.nikula@intel.com>
Date: Wed, 31 May 2017 11:33:55 +0300
Subject: [PATCH 170/281] drm/i915: fix backlight invert for non-zero minimum
 brightness

When we started following the backlight minimum brightness in
6dda730e55f4 ("drm/i915: respect the VBT minimum backlight brightness")
we overlooked the brightness invert quirk. Even if we invert the
brightness, we need to take the min limit into account. We probably
missed this because the invert has only been required on gen4 for proper
operation.

Bugzilla: https://bugs.freedesktop.org/show_bug.cgi?id=101127
Fixes: 6dda730e55f4 ("drm/i915: respect the VBT minimum backlight brightness")
Cc: Daniel Vetter <daniel.vetter@ffwll.ch>
Reviewed-by: Daniel Vetter <daniel.vetter@ffwll.ch>
Signed-off-by: Jani Nikula <jani.nikula@intel.com>
Link: https://patchwork.freedesktop.org/patch/msgid/20170531083355.7898-1-jani.nikula@intel.com
(cherry picked from commit e9d7486eac949f2a8d121657e536c8abdd4ea088)
Signed-off-by: Jani Nikula <jani.nikula@intel.com>
---
 drivers/gpu/drm/i915/intel_panel.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/gpu/drm/i915/intel_panel.c b/drivers/gpu/drm/i915/intel_panel.c
index 96c2cbd81869..593349be8b9d 100644
--- a/drivers/gpu/drm/i915/intel_panel.c
+++ b/drivers/gpu/drm/i915/intel_panel.c
@@ -469,7 +469,7 @@ static u32 intel_panel_compute_brightness(struct intel_connector *connector,
 
 	if (i915.invert_brightness > 0 ||
 	    dev_priv->quirks & QUIRK_INVERT_BRIGHTNESS) {
-		return panel->backlight.max - val;
+		return panel->backlight.max - val + panel->backlight.min;
 	}
 
 	return val;

From 44a12806d010944a5727f1dc99123121e3e2c8c6 Mon Sep 17 00:00:00 2001
From: Michael Ellerman <mpe@ellerman.id.au>
Date: Mon, 7 Aug 2017 21:25:01 +1000
Subject: [PATCH 171/281] Revert "powerpc/64: Avoid restore_math call if
 possible in syscall exit"

This reverts commit bc4f65e4cf9d6cc43e0e9ba0b8648cf9201cd55f.

As reported by Andreas, this commit is causing unrecoverable SLB misses in the
system call exit path:

  Unrecoverable exception 4100 at c00000000000a1ec
  Oops: Unrecoverable exception, sig: 6 [#1]
  SMP NR_CPUS=2 PowerMac
  ...
  CPU: 0 PID: 18626 Comm: rm Not tainted 4.13.0-rc3 #1
  task: c00000018335e080 task.stack: c000000139e50000
  NIP: c00000000000a1ec LR: c00000000000a118 CTR: 0000000000000000
  REGS: c000000139e53bb0 TRAP: 4100   Not tainted  (4.13.0-rc3)
  MSR: 9000000000001030 <SF,HV,ME,IR,DR> CR: 24000044  XER: 20000000 SOFTE: 1
  GPR00: 0000000000000000 c000000139e53e30 c000000000abb500 fffffffffffffffe
  GPR04: c0000001eb866298 0000000000000000 0000000000000000 c00000018335e080
  GPR08: 900000000000d032 0000000000000000 0000000000000002 fffffffffffff001
  GPR12: c000000139e50000 c00000000ffff000 00003fffa8c0dca0 00003fffa8c0dc88
  GPR16: 0000000010000000 0000000000000001 00003fffa8c0eaa0 0000000000000000
  GPR20: 00003fffa8c27528 00003fffa8c27b00 0000000000000000 0000000000000000
  GPR24: 00003fffa8c0d918 00003ffff1b3efa0 00003fffa8c26d68 0000000000000000
  GPR28: 00003fffa8c249e8 00003fffa8c263d0 00003fffa8c27550 00003ffff1b3ef10
  NIP [c00000000000a1ec] system_call_exit+0xc0/0x21c
  LR [c00000000000a118] system_call+0x58/0x6c
  Call Trace:
  [c000000139e53e30] [c00000000000a118] system_call+0x58/0x6c (unreliable)
  Instruction dump:
  64a51000 7c6300d0 f8a101a0 4bffff9c 3c000000 60000006 780007c6 64000000
  60000000 7c004039 4082001c e8ed0170 <88070b78> 88c70b79 7c003214 2c200000

This is caused by us trying to load THREAD_LOAD_FP with MSR_RI=0, and taking an
SLB miss on the thread struct.

Reported-by: Andreas Schwab <schwab@linux-m68k.org>
Diagnosed-by: Nicholas Piggin <npiggin@gmail.com>
Signed-off-by: Michael Ellerman <mpe@ellerman.id.au>
---
 arch/powerpc/kernel/entry_64.S | 60 ++++++++++------------------------
 arch/powerpc/kernel/process.c  |  4 ---
 2 files changed, 18 insertions(+), 46 deletions(-)

diff --git a/arch/powerpc/kernel/entry_64.S b/arch/powerpc/kernel/entry_64.S
index 49d8422767b4..e925c1c99c71 100644
--- a/arch/powerpc/kernel/entry_64.S
+++ b/arch/powerpc/kernel/entry_64.S
@@ -223,17 +223,27 @@ system_call_exit:
 	andi.	r0,r9,(_TIF_SYSCALL_DOTRACE|_TIF_SINGLESTEP|_TIF_USER_WORK_MASK|_TIF_PERSYSCALL_MASK)
 	bne-	.Lsyscall_exit_work
 
-	/* If MSR_FP and MSR_VEC are set in user msr, then no need to restore */
-	li	r7,MSR_FP
+	andi.	r0,r8,MSR_FP
+	beq 2f
 #ifdef CONFIG_ALTIVEC
-	oris	r7,r7,MSR_VEC@h
+	andis.	r0,r8,MSR_VEC@h
+	bne	3f
 #endif
-	and	r0,r8,r7
-	cmpd	r0,r7
-	bne	.Lsyscall_restore_math
-.Lsyscall_restore_math_cont:
+2:	addi    r3,r1,STACK_FRAME_OVERHEAD
+#ifdef CONFIG_PPC_BOOK3S
+	li	r10,MSR_RI
+	mtmsrd	r10,1		/* Restore RI */
+#endif
+	bl	restore_math
+#ifdef CONFIG_PPC_BOOK3S
+	li	r11,0
+	mtmsrd	r11,1
+#endif
+	ld	r8,_MSR(r1)
+	ld	r3,RESULT(r1)
+	li	r11,-MAX_ERRNO
 
-	cmpld	r3,r11
+3:	cmpld	r3,r11
 	ld	r5,_CCR(r1)
 	bge-	.Lsyscall_error
 .Lsyscall_error_cont:
@@ -267,40 +277,6 @@ END_FTR_SECTION_IFSET(CPU_FTR_HAS_PPR)
 	std	r5,_CCR(r1)
 	b	.Lsyscall_error_cont
 
-.Lsyscall_restore_math:
-	/*
-	 * Some initial tests from restore_math to avoid the heavyweight
-	 * C code entry and MSR manipulations.
-	 */
-	LOAD_REG_IMMEDIATE(r0, MSR_TS_MASK)
-	and.	r0,r0,r8
-	bne	1f
-
-	ld	r7,PACACURRENT(r13)
-	lbz	r0,THREAD+THREAD_LOAD_FP(r7)
-#ifdef CONFIG_ALTIVEC
-	lbz	r6,THREAD+THREAD_LOAD_VEC(r7)
-	add	r0,r0,r6
-#endif
-	cmpdi	r0,0
-	beq	.Lsyscall_restore_math_cont
-
-1:	addi    r3,r1,STACK_FRAME_OVERHEAD
-#ifdef CONFIG_PPC_BOOK3S
-	li	r10,MSR_RI
-	mtmsrd	r10,1		/* Restore RI */
-#endif
-	bl	restore_math
-#ifdef CONFIG_PPC_BOOK3S
-	li	r11,0
-	mtmsrd	r11,1
-#endif
-	/* Restore volatiles, reload MSR from updated one */
-	ld	r8,_MSR(r1)
-	ld	r3,RESULT(r1)
-	li	r11,-MAX_ERRNO
-	b	.Lsyscall_restore_math_cont
-
 /* Traced system call support */
 .Lsyscall_dotrace:
 	bl	save_nvgprs
diff --git a/arch/powerpc/kernel/process.c b/arch/powerpc/kernel/process.c
index 9f3e2c932dcc..ec480966f9bf 100644
--- a/arch/powerpc/kernel/process.c
+++ b/arch/powerpc/kernel/process.c
@@ -511,10 +511,6 @@ void restore_math(struct pt_regs *regs)
 {
 	unsigned long msr;
 
-	/*
-	 * Syscall exit makes a similar initial check before branching
-	 * to restore_math. Keep them in synch.
-	 */
 	if (!msr_tm_active(regs->msr) &&
 		!current->thread.load_fp && !loadvec(current->thread))
 		return;

From b399ee28c29c07f6a7ad87dade9148828757e6e9 Mon Sep 17 00:00:00 2001
From: Goran Ferenc <goran.ferenc@imgtec.com>
Date: Thu, 27 Jul 2017 18:08:47 +0200
Subject: [PATCH 172/281] MIPS: VDSO: Fix clobber lists in fallback code paths

Extend clobber lists to include all GP registers.

Fixes: 0b523a85e134 ("MIPS: VDSO: Add implementation of gettimeofday() fallback")

Signed-off-by: Miodrag Dinic <miodrag.dinic@imgtec.com>
Signed-off-by: Goran Ferenc <goran.ferenc@imgtec.com>
Signed-off-by: Aleksandar Markovic <aleksandar.markovic@imgtec.com>
Reviewed-by: James Hogan <james.hogan@imgtec.com>
Cc: Bo Hu <bohu@google.com>
Cc: Douglas Leung <douglas.leung@imgtec.com>
Cc: James Hogan <james.hogan@imgtec.com>
Cc: Jin Qian <jinqian@google.com>
Cc: Paul Burton <paul.burton@imgtec.com>
Cc: Petar Jovanovic <petar.jovanovic@imgtec.com>
Cc: Raghu Gandham <raghu.gandham@imgtec.com>
Cc: linux-mips@linux-mips.org
Cc: linux-kernel@vger.kernel.org
Patchwork: https://patchwork.linux-mips.org/patch/16879/
Signed-off-by: Ralf Baechle <ralf@linux-mips.org>
---
 arch/mips/vdso/gettimeofday.c | 6 ++++--
 1 file changed, 4 insertions(+), 2 deletions(-)

diff --git a/arch/mips/vdso/gettimeofday.c b/arch/mips/vdso/gettimeofday.c
index 974276e828b2..e2690d7ca4dd 100644
--- a/arch/mips/vdso/gettimeofday.c
+++ b/arch/mips/vdso/gettimeofday.c
@@ -35,7 +35,8 @@ static __always_inline long gettimeofday_fallback(struct timeval *_tv,
 	"       syscall\n"
 	: "=r" (ret), "=r" (error)
 	: "r" (tv), "r" (tz), "r" (nr)
-	: "memory");
+	: "$1", "$3", "$8", "$9", "$10", "$11", "$12", "$13",
+	  "$14", "$15", "$24", "$25", "hi", "lo", "memory");
 
 	return error ? -ret : ret;
 }
@@ -55,7 +56,8 @@ static __always_inline long clock_gettime_fallback(clockid_t _clkid,
 	"       syscall\n"
 	: "=r" (ret), "=r" (error)
 	: "r" (clkid), "r" (ts), "r" (nr)
-	: "memory");
+	: "$1", "$3", "$8", "$9", "$10", "$11", "$12", "$13",
+	  "$14", "$15", "$24", "$25", "hi", "lo", "memory");
 
 	return error ? -ret : ret;
 }

From 9ac6e7ccc11e795a6e3eecc1f59346a99e51cd07 Mon Sep 17 00:00:00 2001
From: Gregory CLEMENT <gregory.clement@free-electrons.com>
Date: Tue, 1 Aug 2017 17:57:19 +0200
Subject: [PATCH 173/281] pinctrl: armada-37xx: Fix the pin 23 on south bridge

Pin 23 on South bridge does not belong to the rgmii group. It belongs to
a separate group which can have 3 functions.

Due to this the fix also have to update the way the functions are
managed. Until now each groups used NB_FUNCS(which was 2) functions. For
the mpp23, 3 functions are available but it is the only group which needs
it, so on the loop involving NB_FUNCS an extra test was added to handle
only the functions added.

The bug was visible with the merge of the commit 07d065abf93d "arm64:
dts: marvell: armada-3720-db: Add vqmmc regulator for SD slot", the gpio
regulator used the gpio 23, due to this the whole rgmii group was setup
to gpio which broke the Ethernet support on the Armada 3720 DB
board. Thanks to this patch, the UHS SD cards (which need the vqmmc)
_and_ the Ethernet work again.

Cc: stable@vger.kernel.org
Fixes: 87466ccd9401 ("pinctrl: armada-37xx: Add pin controller support
for Armada 37xx")
Signed-off-by: Gregory CLEMENT <gregory.clement@free-electrons.com>
Signed-off-by: Linus Walleij <linus.walleij@linaro.org>
---
 drivers/pinctrl/mvebu/pinctrl-armada-37xx.c | 23 +++++++++++++++------
 1 file changed, 17 insertions(+), 6 deletions(-)

diff --git a/drivers/pinctrl/mvebu/pinctrl-armada-37xx.c b/drivers/pinctrl/mvebu/pinctrl-armada-37xx.c
index f024e25787fc..c95c76ecc3f7 100644
--- a/drivers/pinctrl/mvebu/pinctrl-armada-37xx.c
+++ b/drivers/pinctrl/mvebu/pinctrl-armada-37xx.c
@@ -37,7 +37,7 @@
 #define IRQ_STATUS	0x10
 #define IRQ_WKUP	0x18
 
-#define NB_FUNCS 2
+#define NB_FUNCS 3
 #define GPIO_PER_REG	32
 
 /**
@@ -126,6 +126,16 @@ struct armada_37xx_pinctrl {
 		.funcs = {_func1, "gpio"}	\
 	}
 
+#define PIN_GRP_GPIO_3(_name, _start, _nr, _mask, _v1, _v2, _v3, _f1, _f2) \
+	{					\
+		.name = _name,			\
+		.start_pin = _start,		\
+		.npins = _nr,			\
+		.reg_mask = _mask,		\
+		.val = {_v1, _v2, _v3},	\
+		.funcs = {_f1, _f2, "gpio"}	\
+	}
+
 #define PIN_GRP_EXTRA(_name, _start, _nr, _mask, _v1, _v2, _start2, _nr2, \
 		      _f1, _f2)				\
 	{						\
@@ -171,12 +181,13 @@ static struct armada_37xx_pin_group armada_37xx_sb_groups[] = {
 	PIN_GRP_GPIO("usb32_drvvbus0", 0, 1, BIT(0), "drvbus"),
 	PIN_GRP_GPIO("usb2_drvvbus1", 1, 1, BIT(1), "drvbus"),
 	PIN_GRP_GPIO("sdio_sb", 24, 6, BIT(2), "sdio"),
-	PIN_GRP_EXTRA("rgmii", 6, 12, BIT(3), 0, BIT(3), 23, 1, "mii", "gpio"),
+	PIN_GRP_GPIO("rgmii", 6, 12, BIT(3), "mii"),
 	PIN_GRP_GPIO("pcie1", 3, 2, BIT(4), "pcie"),
 	PIN_GRP_GPIO("ptp", 20, 3, BIT(5), "ptp"),
 	PIN_GRP("ptp_clk", 21, 1, BIT(6), "ptp", "mii"),
 	PIN_GRP("ptp_trig", 22, 1, BIT(7), "ptp", "mii"),
-	PIN_GRP("mii_col", 23, 1, BIT(8), "mii", "mii_err"),
+	PIN_GRP_GPIO_3("mii_col", 23, 1, BIT(8) | BIT(14), 0, BIT(8), BIT(14),
+		       "mii", "mii_err"),
 };
 
 const struct armada_37xx_pin_data armada_37xx_pin_nb = {
@@ -208,7 +219,7 @@ static int armada_37xx_get_func_reg(struct armada_37xx_pin_group *grp,
 {
 	int f;
 
-	for (f = 0; f < NB_FUNCS; f++)
+	for (f = 0; (f < NB_FUNCS) && grp->funcs[f]; f++)
 		if (!strcmp(grp->funcs[f], func))
 			return f;
 
@@ -795,7 +806,7 @@ static int armada_37xx_fill_group(struct armada_37xx_pinctrl *info)
 		for (j = 0; j < grp->extra_npins; j++)
 			grp->pins[i+j] = grp->extra_pin + j;
 
-		for (f = 0; f < NB_FUNCS; f++) {
+		for (f = 0; (f < NB_FUNCS) && grp->funcs[f]; f++) {
 			int ret;
 			/* check for unique functions and count groups */
 			ret = armada_37xx_add_function(info->funcs, &funcsize,
@@ -847,7 +858,7 @@ static int armada_37xx_fill_func(struct armada_37xx_pinctrl *info)
 			struct armada_37xx_pin_group *gp = &info->groups[g];
 			int f;
 
-			for (f = 0; f < NB_FUNCS; f++) {
+			for (f = 0; (f < NB_FUNCS) && gp->funcs[f]; f++) {
 				if (strcmp(gp->funcs[f], name) == 0) {
 					*groups = gp->name;
 					groups++;

From 6b67c3906cd74d88da4f6717b4b294bd52fce56a Mon Sep 17 00:00:00 2001
From: Gregory CLEMENT <gregory.clement@free-electrons.com>
Date: Tue, 1 Aug 2017 17:57:20 +0200
Subject: [PATCH 174/281] pinctrl: armada-37xx: Fix number of pin in south
 bridge

On the south bridge we have pin from to 29, so it gives 30 pins (and not
29).

Without this patch the kernel complain with the following traces:
cat /sys/kernel/debug/pinctrl/d0018800.pinctrl/pingroups
[  154.530205] armada-37xx-pinctrl d0018800.pinctrl: failed to get pin(29) name
[  154.537567] ------------[ cut here ]------------
[  154.542348] WARNING: CPU: 1 PID: 1347 at /home/gclement/open/kernel/marvell-mainline-linux/drivers/pinctrl/core.c:1610 pinctrl_groups_show+0x15c/0x1a0
[  154.555918] Modules linked in:
[  154.558890] CPU: 1 PID: 1347 Comm: cat Tainted: G        W       4.13.0-rc1-00001-g19e1b9fa219d #525
[  154.568316] Hardware name: Marvell Armada 3720 Development Board DB-88F3720-DDR3 (DT)
[  154.576311] task: ffff80001d32d100 task.stack: ffff80001bdc0000
[  154.583048] PC is at pinctrl_groups_show+0x15c/0x1a0
[  154.587816] LR is at pinctrl_groups_show+0x148/0x1a0
[  154.592847] pc : [<ffff0000083e3adc>] lr : [<ffff0000083e3ac8>] pstate: 00000145
[  154.600840] sp : ffff80001bdc3c80
[  154.604255] x29: ffff80001bdc3c80 x28: 00000000f7750000
[  154.609825] x27: ffff80001d05d198 x26: 0000000000000009
[  154.615224] x25: ffff0000089ead20 x24: 0000000000000002
[  154.620705] x23: ffff000008c8e1d0 x22: ffff80001be55700
[  154.626187] x21: ffff80001d05d100 x20: 0000000000000005
[  154.631667] x19: 0000000000000006 x18: 0000000000000010
[  154.637238] x17: 0000000000000000 x16: ffff0000081fc4b8
[  154.642726] x15: 0000000000000006 x14: ffff0000899e537f
[  154.648214] x13: ffff0000099e538d x12: 206f742064656c69
[  154.653613] x11: 6166203a6c727463 x10: 0000000005f5e0ff
[  154.659094] x9 : ffff80001bdc38c0 x8 : 286e697020746567
[  154.664576] x7 : ffff000008551870 x6 : 000000000000011b
[  154.670146] x5 : 0000000000000000 x4 : 0000000000000000
[  154.675544] x3 : 0000000000000000 x2 : 0000000000000000
[  154.681025] x1 : ffff000008c8e1d0 x0 : ffff80001be55700
[  154.686507] Call trace:
[  154.688668] Exception stack(0xffff80001bdc3ab0 to 0xffff80001bdc3be0)
[  154.695224] 3aa0:                                   0000000000000006 0001000000000000
[  154.703310] 3ac0: ffff80001bdc3c80 ffff0000083e3adc ffff80001bdc3bb0 00000000ffffffd8
[  154.711304] 3ae0: 4554535953425553 6f6674616c703d4d 4349564544006d72 6674616c702b3d45
[  154.719478] 3b00: 313030643a6d726f 6e69702e30303838 ffff80006c727463 ffff0000089635d8
[  154.727562] 3b20: ffff80001d1ca0cb ffff000008af0fa4 ffff80001bdc3b40 ffff000008c8e1dc
[  154.735648] 3b40: ffff80001bdc3bc0 ffff000008223174 ffff80001be55700 ffff000008c8e1d0
[  154.743731] 3b60: 0000000000000000 0000000000000000 0000000000000000 0000000000000000
[  154.752354] 3b80: 000000000000011b ffff000008551870 286e697020746567 ffff80001bdc38c0
[  154.760446] 3ba0: 0000000005f5e0ff 6166203a6c727463 206f742064656c69 ffff0000099e538d
[  154.767910] 3bc0: ffff0000899e537f 0000000000000006 ffff0000081fc4b8 0000000000000000
[  154.776085] [<ffff0000083e3adc>] pinctrl_groups_show+0x15c/0x1a0
[  154.782823] [<ffff000008222abc>] seq_read+0x184/0x460
[  154.787505] [<ffff000008344120>] full_proxy_read+0x60/0xa8
[  154.793431] [<ffff0000081f9bec>] __vfs_read+0x1c/0x110
[  154.799001] [<ffff0000081faff4>] vfs_read+0x84/0x140
[  154.803860] [<ffff0000081fc4fc>] SyS_read+0x44/0xa0
[  154.808983] [<ffff000008082f30>] el0_svc_naked+0x24/0x28
[  154.814459] ---[ end trace 4cbb00a92d616b95 ]---

Cc: stable@vger.kernel.org
Fixes: 87466ccd9401 ("pinctrl: armada-37xx: Add pin controller support
for Armada 37xx")
Signed-off-by: Gregory CLEMENT <gregory.clement@free-electrons.com>
Signed-off-by: Linus Walleij <linus.walleij@linaro.org>
---
 drivers/pinctrl/mvebu/pinctrl-armada-37xx.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/pinctrl/mvebu/pinctrl-armada-37xx.c b/drivers/pinctrl/mvebu/pinctrl-armada-37xx.c
index c95c76ecc3f7..0c6d7812d6fd 100644
--- a/drivers/pinctrl/mvebu/pinctrl-armada-37xx.c
+++ b/drivers/pinctrl/mvebu/pinctrl-armada-37xx.c
@@ -198,7 +198,7 @@ const struct armada_37xx_pin_data armada_37xx_pin_nb = {
 };
 
 const struct armada_37xx_pin_data armada_37xx_pin_sb = {
-	.nr_pins = 29,
+	.nr_pins = 30,
 	.name = "GPIO2",
 	.groups = armada_37xx_sb_groups,
 	.ngroups = ARRAY_SIZE(armada_37xx_sb_groups),

From 5d996132d921c391af5f267123eca1a6a3148ecd Mon Sep 17 00:00:00 2001
From: Andy Shevchenko <andriy.shevchenko@linux.intel.com>
Date: Fri, 4 Aug 2017 19:26:34 +0300
Subject: [PATCH 175/281] pinctrl: intel: merrifield: Correct UART pin lists

UART pin lists consist GPIO numbers which is simply wrong.
Replace it by pin numbers.

Fixes: 4e80c8f50574 ("pinctrl: intel: Add Intel Merrifield pin controller support")
Signed-off-by: Andy Shevchenko <andriy.shevchenko@linux.intel.com>
Acked-by: Mika Westerberg <mika.westerberg@linux.intel.com>
Signed-off-by: Linus Walleij <linus.walleij@linaro.org>
---
 drivers/pinctrl/intel/pinctrl-merrifield.c | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/drivers/pinctrl/intel/pinctrl-merrifield.c b/drivers/pinctrl/intel/pinctrl-merrifield.c
index 4d4ef42a39b5..86c4b3fab7b0 100644
--- a/drivers/pinctrl/intel/pinctrl-merrifield.c
+++ b/drivers/pinctrl/intel/pinctrl-merrifield.c
@@ -343,9 +343,9 @@ static const struct pinctrl_pin_desc mrfld_pins[] = {
 
 static const unsigned int mrfld_sdio_pins[] = { 50, 51, 52, 53, 54, 55, 56 };
 static const unsigned int mrfld_spi5_pins[] = { 90, 91, 92, 93, 94, 95, 96 };
-static const unsigned int mrfld_uart0_pins[] = { 124, 125, 126, 127 };
-static const unsigned int mrfld_uart1_pins[] = { 128, 129, 130, 131 };
-static const unsigned int mrfld_uart2_pins[] = { 132, 133, 134, 135 };
+static const unsigned int mrfld_uart0_pins[] = { 115, 116, 117, 118 };
+static const unsigned int mrfld_uart1_pins[] = { 119, 120, 121, 122 };
+static const unsigned int mrfld_uart2_pins[] = { 123, 124, 125, 126 };
 static const unsigned int mrfld_pwm0_pins[] = { 144 };
 static const unsigned int mrfld_pwm1_pins[] = { 145 };
 static const unsigned int mrfld_pwm2_pins[] = { 132 };

From 0cca6c8920ade95e2741b2062cf1397dc546fb0f Mon Sep 17 00:00:00 2001
From: Ludovic Desroches <ludovic.desroches@o2linux.fr>
Date: Sun, 6 Aug 2017 16:00:05 +0200
Subject: [PATCH 176/281] pinctrl: generic: update references to
 Documentation/pinctrl.txt

Update deprecated references to Documentation/pinctrl.txt since it has been
moved to Documentation/driver-api/pinctl.rst.

Signed-off-by: Ludovic Desroches <ludovic.desroches@o2linux.fr>
Fixes: 5a9b73832e9e ("pinctrl.txt: move it to the driver-api book")
Signed-off-by: Linus Walleij <linus.walleij@linaro.org>
---
 Documentation/gpio/gpio-legacy.txt      | 2 +-
 MAINTAINERS                             | 2 +-
 include/linux/device.h                  | 2 +-
 include/linux/pinctrl/pinconf-generic.h | 4 ++--
 4 files changed, 5 insertions(+), 5 deletions(-)

diff --git a/Documentation/gpio/gpio-legacy.txt b/Documentation/gpio/gpio-legacy.txt
index b34fd94f7089..5eacc147ea87 100644
--- a/Documentation/gpio/gpio-legacy.txt
+++ b/Documentation/gpio/gpio-legacy.txt
@@ -459,7 +459,7 @@ pin controller?
 
 This is done by registering "ranges" of pins, which are essentially
 cross-reference tables. These are described in
-Documentation/pinctrl.txt
+Documentation/driver-api/pinctl.rst
 
 While the pin allocation is totally managed by the pinctrl subsystem,
 gpio (under gpiolib) is still maintained by gpio drivers. It may happen
diff --git a/MAINTAINERS b/MAINTAINERS
index f66488dfdbc9..2c85558aec19 100644
--- a/MAINTAINERS
+++ b/MAINTAINERS
@@ -10375,7 +10375,7 @@ L:	linux-gpio@vger.kernel.org
 T:	git git://git.kernel.org/pub/scm/linux/kernel/git/linusw/linux-pinctrl.git
 S:	Maintained
 F:	Documentation/devicetree/bindings/pinctrl/
-F:	Documentation/pinctrl.txt
+F:	Documentation/driver-api/pinctl.rst
 F:	drivers/pinctrl/
 F:	include/linux/pinctrl/
 
diff --git a/include/linux/device.h b/include/linux/device.h
index 723cd54b94da..beabdbc08420 100644
--- a/include/linux/device.h
+++ b/include/linux/device.h
@@ -843,7 +843,7 @@ struct dev_links_info {
  * 		hibernation, system resume and during runtime PM transitions
  * 		along with subsystem-level and driver-level callbacks.
  * @pins:	For device pin management.
- *		See Documentation/pinctrl.txt for details.
+ *		See Documentation/driver-api/pinctl.rst for details.
  * @msi_list:	Hosts MSI descriptors
  * @msi_domain: The generic MSI domain this device is using.
  * @numa_node:	NUMA node this device is close to.
diff --git a/include/linux/pinctrl/pinconf-generic.h b/include/linux/pinctrl/pinconf-generic.h
index 231d3075815a..e91d1b6a260d 100644
--- a/include/linux/pinctrl/pinconf-generic.h
+++ b/include/linux/pinctrl/pinconf-generic.h
@@ -81,8 +81,8 @@
  * 	it.
  * @PIN_CONFIG_OUTPUT: this will configure the pin as an output and drive a
  * 	value on the line. Use argument 1 to indicate high level, argument 0 to
- * 	indicate low level. (Please see Documentation/pinctrl.txt, section
- * 	"GPIO mode pitfalls" for a discussion around this parameter.)
+ *	indicate low level. (Please see Documentation/driver-api/pinctl.rst,
+ *	section "GPIO mode pitfalls" for a discussion around this parameter.)
  * @PIN_CONFIG_POWER_SOURCE: if the pin can select between different power
  *	supplies, the argument to this parameter (on a custom format) tells
  *	the driver which alternative power source to use.

From 68fe55680d0f3342969f49412fceabb90bdfadba Mon Sep 17 00:00:00 2001
From: "Maciej W. Rozycki" <macro@linux-mips.org>
Date: Sun, 30 Jul 2017 21:28:15 +0100
Subject: [PATCH 177/281] MIPS: DEC: Fix an int-handler.S CPU_DADDI_WORKAROUNDS
 regression

Fix a commit 3021773c7c3e ("MIPS: DEC: Avoid la pseudo-instruction in
delay slots") regression and remove assembly errors:

arch/mips/dec/int-handler.S: Assembler messages:
arch/mips/dec/int-handler.S:162: Error: Macro used $at after ".set noat"
arch/mips/dec/int-handler.S:163: Error: Macro used $at after ".set noat"
arch/mips/dec/int-handler.S:229: Error: Macro used $at after ".set noat"
arch/mips/dec/int-handler.S:230: Error: Macro used $at after ".set noat"

triggering with with the CPU_DADDI_WORKAROUNDS option set and the DADDIU
instruction.  This is because with that option in place the instruction
becomes a macro, which expands to an LI/DADDU (or actually ADDIU/DADDU)
sequence that uses $at as a temporary register.

With CPU_DADDI_WORKAROUNDS we only support `-msym32' compilation though,
and this is already enforced in arch/mips/Makefile, so choose the 32-bit
expansion variant for the supported configurations and then replace the
64-bit variant with #error just in case.

Fixes: 3021773c7c3e ("MIPS: DEC: Avoid la pseudo-instruction in delay slots")
Signed-off-by: Maciej W. Rozycki <macro@linux-mips.org>
Cc: linux-mips@linux-mips.org
Cc: stable@vger.kernel.org # 4.8+
Patchwork: https://patchwork.linux-mips.org/patch/16893/
Signed-off-by: Ralf Baechle <ralf@linux-mips.org>
---
 arch/mips/dec/int-handler.S | 34 ++++++----------------------------
 1 file changed, 6 insertions(+), 28 deletions(-)

diff --git a/arch/mips/dec/int-handler.S b/arch/mips/dec/int-handler.S
index 1910223a9c02..cea2bb1621e6 100644
--- a/arch/mips/dec/int-handler.S
+++ b/arch/mips/dec/int-handler.S
@@ -147,23 +147,12 @@
 		 * Find irq with highest priority
 		 */
 		# open coded PTR_LA t1, cpu_mask_nr_tbl
-#if (_MIPS_SZPTR == 32)
+#if defined(CONFIG_32BIT) || defined(KBUILD_64BIT_SYM32)
 		# open coded la t1, cpu_mask_nr_tbl
 		lui	t1, %hi(cpu_mask_nr_tbl)
 		addiu	t1, %lo(cpu_mask_nr_tbl)
-
-#endif
-#if (_MIPS_SZPTR == 64)
-		# open coded dla t1, cpu_mask_nr_tbl
-		.set	push
-		.set	noat
-		lui	t1, %highest(cpu_mask_nr_tbl)
-		lui	AT, %hi(cpu_mask_nr_tbl)
-		daddiu	t1, t1, %higher(cpu_mask_nr_tbl)
-		daddiu	AT, AT, %lo(cpu_mask_nr_tbl)
-		dsll	t1, 32
-		daddu	t1, t1, AT
-		.set	pop
+#else
+#error GCC `-msym32' option required for 64-bit DECstation builds
 #endif
 1:		lw	t2,(t1)
 		nop
@@ -214,23 +203,12 @@
 		 * Find irq with highest priority
 		 */
 		# open coded PTR_LA t1,asic_mask_nr_tbl
-#if (_MIPS_SZPTR == 32)
+#if defined(CONFIG_32BIT) || defined(KBUILD_64BIT_SYM32)
 		# open coded la t1, asic_mask_nr_tbl
 		lui	t1, %hi(asic_mask_nr_tbl)
 		addiu	t1, %lo(asic_mask_nr_tbl)
-
-#endif
-#if (_MIPS_SZPTR == 64)
-		# open coded dla t1, asic_mask_nr_tbl
-		.set	push
-		.set	noat
-		lui	t1, %highest(asic_mask_nr_tbl)
-		lui	AT, %hi(asic_mask_nr_tbl)
-		daddiu	t1, t1, %higher(asic_mask_nr_tbl)
-		daddiu	AT, AT, %lo(asic_mask_nr_tbl)
-		dsll	t1, 32
-		daddu	t1, t1, AT
-		.set	pop
+#else
+#error GCC `-msym32' option required for 64-bit DECstation builds
 #endif
 2:		lw	t2,(t1)
 		nop

From f9ea3225ddafa269cf1f6b495d132c26fde93903 Mon Sep 17 00:00:00 2001
From: Thomas Richter <tmricht@linux.vnet.ibm.com>
Date: Mon, 7 Aug 2017 10:16:36 +0200
Subject: [PATCH 178/281] bpf: fix selftest/bpf/test_pkt_md_access on s390x

Commit 18f3d6be6be1 ("selftests/bpf: Add test cases to test narrower ctx field loads")
introduced new eBPF test cases. One of them (test_pkt_md_access.c)
fails on s390x. The BPF verifier error message is:

[root@s8360046 bpf]# ./test_progs
test_pkt_access:PASS:ipv4 349 nsec
test_pkt_access:PASS:ipv6 212 nsec
[....]
libbpf: load bpf program failed: Permission denied
libbpf: -- BEGIN DUMP LOG ---
libbpf:
0: (71) r2 = *(u8 *)(r1 +0)
invalid bpf_context access off=0 size=1

libbpf: -- END LOG --
libbpf: failed to load program 'test1'
libbpf: failed to load object './test_pkt_md_access.o'
Summary: 29 PASSED, 1 FAILED
[root@s8360046 bpf]#

This is caused by a byte endianness issue. S390x is a big endian
architecture.  Pointer access to the lowest byte or halfword of a
four byte value need to add an offset.
On little endian architectures this offset is not needed.

Fix this and use the same approach as the originator used for other files
(for example test_verifier.c) in his original commit.

With this fix the test program test_progs succeeds on s390x:
[root@s8360046 bpf]# ./test_progs
test_pkt_access:PASS:ipv4 236 nsec
test_pkt_access:PASS:ipv6 217 nsec
test_xdp:PASS:ipv4 3624 nsec
test_xdp:PASS:ipv6 1722 nsec
test_l4lb:PASS:ipv4 926 nsec
test_l4lb:PASS:ipv6 1322 nsec
test_tcp_estats:PASS: 0 nsec
test_bpf_obj_id:PASS:get-fd-by-notexist-prog-id 0 nsec
test_bpf_obj_id:PASS:get-fd-by-notexist-map-id 0 nsec
test_bpf_obj_id:PASS:get-prog-info(fd) 0 nsec
test_bpf_obj_id:PASS:get-map-info(fd) 0 nsec
test_bpf_obj_id:PASS:get-prog-info(fd) 0 nsec
test_bpf_obj_id:PASS:get-map-info(fd) 0 nsec
test_bpf_obj_id:PASS:get-prog-fd(next_id) 0 nsec
test_bpf_obj_id:PASS:get-prog-info(next_id->fd) 0 nsec
test_bpf_obj_id:PASS:get-prog-fd(next_id) 0 nsec
test_bpf_obj_id:PASS:get-prog-info(next_id->fd) 0 nsec
test_bpf_obj_id:PASS:check total prog id found by get_next_id 0 nsec
test_bpf_obj_id:PASS:get-map-fd(next_id) 0 nsec
test_bpf_obj_id:PASS:get-map-fd(next_id) 0 nsec
test_bpf_obj_id:PASS:get-map-fd(next_id) 0 nsec
test_bpf_obj_id:PASS:get-map-fd(next_id) 0 nsec
test_bpf_obj_id:PASS:get-map-fd(next_id) 0 nsec
test_bpf_obj_id:PASS:get-map-fd(next_id) 0 nsec
test_bpf_obj_id:PASS:get-map-fd(next_id) 0 nsec
test_bpf_obj_id:PASS:check get-map-info(next_id->fd) 0 nsec
test_bpf_obj_id:PASS:get-map-fd(next_id) 0 nsec
test_bpf_obj_id:PASS:check get-map-info(next_id->fd) 0 nsec
test_bpf_obj_id:PASS:check total map id found by get_next_id 0 nsec
test_pkt_md_access:PASS: 277 nsec
Summary: 30 PASSED, 0 FAILED
[root@s8360046 bpf]#

Fixes: 18f3d6be6be1 ("selftests/bpf: Add test cases to test narrower ctx field loads")
Signed-off-by: Thomas Richter <tmricht@linux.vnet.ibm.com>
Acked-by: Daniel Borkmann <daniel@iogearbox.net>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 tools/testing/selftests/bpf/test_pkt_md_access.c | 11 +++++++++++
 1 file changed, 11 insertions(+)

diff --git a/tools/testing/selftests/bpf/test_pkt_md_access.c b/tools/testing/selftests/bpf/test_pkt_md_access.c
index 71729d47eb85..7956302ecdf2 100644
--- a/tools/testing/selftests/bpf/test_pkt_md_access.c
+++ b/tools/testing/selftests/bpf/test_pkt_md_access.c
@@ -12,12 +12,23 @@
 
 int _version SEC("version") = 1;
 
+#if  __BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__
 #define TEST_FIELD(TYPE, FIELD, MASK)					\
 	{								\
 		TYPE tmp = *(volatile TYPE *)&skb->FIELD;		\
 		if (tmp != ((*(volatile __u32 *)&skb->FIELD) & MASK))	\
 			return TC_ACT_SHOT;				\
 	}
+#else
+#define TEST_FIELD_OFFSET(a, b)	((sizeof(a) - sizeof(b)) / sizeof(b))
+#define TEST_FIELD(TYPE, FIELD, MASK)					\
+	{								\
+		TYPE tmp = *((volatile TYPE *)&skb->FIELD +		\
+			      TEST_FIELD_OFFSET(skb->FIELD, TYPE));	\
+		if (tmp != ((*(volatile __u32 *)&skb->FIELD) & MASK))	\
+			return TC_ACT_SHOT;				\
+	}
+#endif
 
 SEC("test1")
 int process(struct __sk_buff *skb)

From 22889dbbd98a0e3390e9120074c39c6e5a3fea5e Mon Sep 17 00:00:00 2001
From: Dean Jenkins <Dean_Jenkins@mentor.com>
Date: Mon, 7 Aug 2017 09:50:14 +0100
Subject: [PATCH 179/281] asix: Add rx->ax_skb = NULL after usbnet_skb_return()

In asix_rx_fixup_internal() there is a risk that rx->ax_skb gets
reused after passing the Ethernet frame into the network stack via
usbnet_skb_return().

The risks include:

a) asynchronously freeing rx->ax_skb after passing the netdev buffer
   to the NAPI layer which might corrupt the backlog queue.

b) erroneously reusing rx->ax_skb such as calling skb_put_data() multiple
   times which causes writing off the end of the netdev buffer.

Therefore add a defensive rx->ax_skb = NULL after usbnet_skb_return()
so that it is not possible to free rx->ax_skb or to apply
skb_put_data() too many times.

Signed-off-by: Dean Jenkins <Dean_Jenkins@mentor.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/usb/asix_common.c | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

diff --git a/drivers/net/usb/asix_common.c b/drivers/net/usb/asix_common.c
index 7847436c441e..6983b6bd8cf9 100644
--- a/drivers/net/usb/asix_common.c
+++ b/drivers/net/usb/asix_common.c
@@ -168,8 +168,10 @@ int asix_rx_fixup_internal(struct usbnet *dev, struct sk_buff *skb,
 		if (rx->ax_skb) {
 			skb_put_data(rx->ax_skb, skb->data + offset,
 				     copy_length);
-			if (!rx->remaining)
+			if (!rx->remaining) {
 				usbnet_skb_return(dev, rx->ax_skb);
+				rx->ax_skb = NULL;
+			}
 		}
 
 		offset += (copy_length + 1) & 0xfffe;

From 960eb4eeaa47a3a5061a4e47e28411e85840ab2c Mon Sep 17 00:00:00 2001
From: Dean Jenkins <Dean_Jenkins@mentor.com>
Date: Mon, 7 Aug 2017 09:50:15 +0100
Subject: [PATCH 180/281] asix: Ensure asix_rx_fixup_info members are all reset

There is a risk that the members of the structure asix_rx_fixup_info
become unsynchronised leading to the possibility of a malfunction.

For example, rx->split_head was not being set to false after an
error was detected so potentially could cause a malformed 32-bit
Data header word to be formed.

Therefore add function reset_asix_rx_fixup_info() to reset all the
members of asix_rx_fixup_info so that future processing will start
with known initial conditions.

Also, if (skb->len != offset) becomes true then call
reset_asix_rx_fixup_info() so that the processing of the next URB
starts with known initial conditions. Without the call, the check
does nothing which potentially could lead to a malfunction
when the next URB is processed.

In addition, for robustness, call reset_asix_rx_fixup_info() before
every error path's "return 0". This ensures that the next URB is
processed from known initial conditions.

Signed-off-by: Dean Jenkins <Dean_Jenkins@mentor.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/usb/asix_common.c | 34 +++++++++++++++++++++++++---------
 1 file changed, 25 insertions(+), 9 deletions(-)

diff --git a/drivers/net/usb/asix_common.c b/drivers/net/usb/asix_common.c
index 6983b6bd8cf9..fda74f33e3ec 100644
--- a/drivers/net/usb/asix_common.c
+++ b/drivers/net/usb/asix_common.c
@@ -75,6 +75,27 @@ void asix_write_cmd_async(struct usbnet *dev, u8 cmd, u16 value, u16 index,
 			       value, index, data, size);
 }
 
+static void reset_asix_rx_fixup_info(struct asix_rx_fixup_info *rx)
+{
+	/* Reset the variables that have a lifetime outside of
+	 * asix_rx_fixup_internal() so that future processing starts from a
+	 * known set of initial conditions.
+	 */
+
+	if (rx->ax_skb) {
+		/* Discard any incomplete Ethernet frame in the netdev buffer */
+		kfree_skb(rx->ax_skb);
+		rx->ax_skb = NULL;
+	}
+
+	/* Assume the Data header 32-bit word is at the start of the current
+	 * or next URB socket buffer so reset all the state variables.
+	 */
+	rx->remaining = 0;
+	rx->split_head = false;
+	rx->header = 0;
+}
+
 int asix_rx_fixup_internal(struct usbnet *dev, struct sk_buff *skb,
 			   struct asix_rx_fixup_info *rx)
 {
@@ -99,15 +120,7 @@ int asix_rx_fixup_internal(struct usbnet *dev, struct sk_buff *skb,
 		if (size != ((~rx->header >> 16) & 0x7ff)) {
 			netdev_err(dev->net, "asix_rx_fixup() Data Header synchronisation was lost, remaining %d\n",
 				   rx->remaining);
-			if (rx->ax_skb) {
-				kfree_skb(rx->ax_skb);
-				rx->ax_skb = NULL;
-				/* Discard the incomplete netdev Ethernet frame
-				 * and assume the Data header is at the start of
-				 * the current URB socket buffer.
-				 */
-			}
-			rx->remaining = 0;
+			reset_asix_rx_fixup_info(rx);
 		}
 	}
 
@@ -139,11 +152,13 @@ int asix_rx_fixup_internal(struct usbnet *dev, struct sk_buff *skb,
 			if (size != ((~rx->header >> 16) & 0x7ff)) {
 				netdev_err(dev->net, "asix_rx_fixup() Bad Header Length 0x%x, offset %d\n",
 					   rx->header, offset);
+				reset_asix_rx_fixup_info(rx);
 				return 0;
 			}
 			if (size > dev->net->mtu + ETH_HLEN + VLAN_HLEN) {
 				netdev_dbg(dev->net, "asix_rx_fixup() Bad RX Length %d\n",
 					   size);
+				reset_asix_rx_fixup_info(rx);
 				return 0;
 			}
 
@@ -180,6 +195,7 @@ int asix_rx_fixup_internal(struct usbnet *dev, struct sk_buff *skb,
 	if (skb->len != offset) {
 		netdev_err(dev->net, "asix_rx_fixup() Bad SKB Length %d, %d\n",
 			   skb->len, offset);
+		reset_asix_rx_fixup_info(rx);
 		return 0;
 	}
 

From d0c8f338ab41438bdf8472cb4209d4ab54d439d5 Mon Sep 17 00:00:00 2001
From: Dean Jenkins <Dean_Jenkins@mentor.com>
Date: Mon, 7 Aug 2017 09:50:16 +0100
Subject: [PATCH 181/281] asix: Fix small memory leak in ax88772_unbind()

When Ethernet frames span mulitple URBs, the netdev buffer memory
pointed to by the asix_rx_fixup_info structure remains allocated
during the time gap between the 2 executions of asix_rx_fixup_internal().

This means that if ax88772_unbind() is called within this time
gap to free the memory of the parent private data structure then
a memory leak of the part filled netdev buffer memory will occur.

Therefore, create a new function asix_rx_fixup_common_free() to
free the memory of the netdev buffer and add a call to
asix_rx_fixup_common_free() from inside ax88772_unbind().

Consequently when an unbind occurs part way through receiving
an Ethernet frame, the netdev buffer memory that is holding part
of the received Ethernet frame will now be freed.

Signed-off-by: Dean Jenkins <Dean_Jenkins@mentor.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/usb/asix.h         |  1 +
 drivers/net/usb/asix_common.c  | 15 +++++++++++++++
 drivers/net/usb/asix_devices.c |  1 +
 3 files changed, 17 insertions(+)

diff --git a/drivers/net/usb/asix.h b/drivers/net/usb/asix.h
index d1092421aaa7..9a4171b90947 100644
--- a/drivers/net/usb/asix.h
+++ b/drivers/net/usb/asix.h
@@ -209,6 +209,7 @@ void asix_write_cmd_async(struct usbnet *dev, u8 cmd, u16 value,
 int asix_rx_fixup_internal(struct usbnet *dev, struct sk_buff *skb,
 			   struct asix_rx_fixup_info *rx);
 int asix_rx_fixup_common(struct usbnet *dev, struct sk_buff *skb);
+void asix_rx_fixup_common_free(struct asix_common_private *dp);
 
 struct sk_buff *asix_tx_fixup(struct usbnet *dev, struct sk_buff *skb,
 			      gfp_t flags);
diff --git a/drivers/net/usb/asix_common.c b/drivers/net/usb/asix_common.c
index fda74f33e3ec..522d2900cd1d 100644
--- a/drivers/net/usb/asix_common.c
+++ b/drivers/net/usb/asix_common.c
@@ -210,6 +210,21 @@ int asix_rx_fixup_common(struct usbnet *dev, struct sk_buff *skb)
 	return asix_rx_fixup_internal(dev, skb, rx);
 }
 
+void asix_rx_fixup_common_free(struct asix_common_private *dp)
+{
+	struct asix_rx_fixup_info *rx;
+
+	if (!dp)
+		return;
+
+	rx = &dp->rx_fixup_info;
+
+	if (rx->ax_skb) {
+		kfree_skb(rx->ax_skb);
+		rx->ax_skb = NULL;
+	}
+}
+
 struct sk_buff *asix_tx_fixup(struct usbnet *dev, struct sk_buff *skb,
 			      gfp_t flags)
 {
diff --git a/drivers/net/usb/asix_devices.c b/drivers/net/usb/asix_devices.c
index a3aa0a27dfe5..b2ff88e69a81 100644
--- a/drivers/net/usb/asix_devices.c
+++ b/drivers/net/usb/asix_devices.c
@@ -764,6 +764,7 @@ static int ax88772_bind(struct usbnet *dev, struct usb_interface *intf)
 
 static void ax88772_unbind(struct usbnet *dev, struct usb_interface *intf)
 {
+	asix_rx_fixup_common_free(dev->driver_priv);
 	kfree(dev->driver_priv);
 }
 

From ec2c6726322f0d270bab477e4904bf9496f70ee5 Mon Sep 17 00:00:00 2001
From: Julian Wiedmann <jwi@linux.vnet.ibm.com>
Date: Mon, 7 Aug 2017 13:28:39 +0200
Subject: [PATCH 182/281] s390/qeth: fix L3 next-hop in xmit qeth hdr

On L3, the qeth_hdr struct needs to be filled with the next-hop
IP address.
The current code accesses rtable->rt_gateway without checking that
rtable is a valid address. The accidental access to a lowcore area
results in a random next-hop address in the qeth_hdr.
rtable (or more precisely, skb_dst(skb)) can be NULL in rare cases
(for instance together with AF_PACKET sockets).
This patch adds the missing NULL-ptr checks.

Signed-off-by: Julian Wiedmann <jwi@linux.vnet.ibm.com>
Signed-off-by: Ursula Braun <ubraun@linux.vnet.ibm.com>
Fixes: 87e7597b5a3 qeth: Move away from using neighbour entries in qeth_l3_fill_header()
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/s390/net/qeth_l3_main.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/drivers/s390/net/qeth_l3_main.c b/drivers/s390/net/qeth_l3_main.c
index 8975cd321390..d42e758518ed 100644
--- a/drivers/s390/net/qeth_l3_main.c
+++ b/drivers/s390/net/qeth_l3_main.c
@@ -2512,7 +2512,7 @@ static void qeth_l3_fill_header(struct qeth_card *card, struct qeth_hdr *hdr,
 		struct rtable *rt = (struct rtable *) dst;
 		__be32 *pkey = &ip_hdr(skb)->daddr;
 
-		if (rt->rt_gateway)
+		if (rt && rt->rt_gateway)
 			pkey = &rt->rt_gateway;
 
 		/* IPv4 */
@@ -2523,7 +2523,7 @@ static void qeth_l3_fill_header(struct qeth_card *card, struct qeth_hdr *hdr,
 		struct rt6_info *rt = (struct rt6_info *) dst;
 		struct in6_addr *pkey = &ipv6_hdr(skb)->daddr;
 
-		if (!ipv6_addr_any(&rt->rt6i_gateway))
+		if (rt && !ipv6_addr_any(&rt->rt6i_gateway))
 			pkey = &rt->rt6i_gateway;
 
 		/* IPv6 */

From b925ef37b0a152b0c06aa43bc9204d0116f676d7 Mon Sep 17 00:00:00 2001
From: Anton Volkov <avolkov@ispras.ru>
Date: Mon, 7 Aug 2017 15:54:14 +0300
Subject: [PATCH 183/281] hysdn: fix to a race condition in put_log_buffer

The synchronization type that was used earlier to guard the loop that
deletes unused log buffers may lead to a situation that prevents any
thread from going through the loop.

The patch deletes previously used synchronization mechanism and moves
the loop under the spin_lock so the similar cases won't be feasible in
the future.

Found by by Linux Driver Verification project (linuxtesting.org).

Signed-off-by: Anton Volkov <avolkov@ispras.ru>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/isdn/hysdn/hysdn_proclog.c | 28 +++++++++++++---------------
 1 file changed, 13 insertions(+), 15 deletions(-)

diff --git a/drivers/isdn/hysdn/hysdn_proclog.c b/drivers/isdn/hysdn/hysdn_proclog.c
index 7b5fd8fb1761..aaca0b3d662e 100644
--- a/drivers/isdn/hysdn/hysdn_proclog.c
+++ b/drivers/isdn/hysdn/hysdn_proclog.c
@@ -44,7 +44,6 @@ struct procdata {
 	char log_name[15];	/* log filename */
 	struct log_data *log_head, *log_tail;	/* head and tail for queue */
 	int if_used;		/* open count for interface */
-	int volatile del_lock;	/* lock for delete operations */
 	unsigned char logtmp[LOG_MAX_LINELEN];
 	wait_queue_head_t rd_queue;
 };
@@ -102,7 +101,6 @@ put_log_buffer(hysdn_card *card, char *cp)
 {
 	struct log_data *ib;
 	struct procdata *pd = card->proclog;
-	int i;
 	unsigned long flags;
 
 	if (!pd)
@@ -126,21 +124,21 @@ put_log_buffer(hysdn_card *card, char *cp)
 	else
 		pd->log_tail->next = ib;	/* follows existing messages */
 	pd->log_tail = ib;	/* new tail */
-	i = pd->del_lock++;	/* get lock state */
-	spin_unlock_irqrestore(&card->hysdn_lock, flags);
 
 	/* delete old entrys */
-	if (!i)
-		while (pd->log_head->next) {
-			if ((pd->log_head->usage_cnt <= 0) &&
-			    (pd->log_head->next->usage_cnt <= 0)) {
-				ib = pd->log_head;
-				pd->log_head = pd->log_head->next;
-				kfree(ib);
-			} else
-				break;
-		}		/* pd->log_head->next */
-	pd->del_lock--;		/* release lock level */
+	while (pd->log_head->next) {
+		if ((pd->log_head->usage_cnt <= 0) &&
+		    (pd->log_head->next->usage_cnt <= 0)) {
+			ib = pd->log_head;
+			pd->log_head = pd->log_head->next;
+			kfree(ib);
+		} else {
+			break;
+		}
+	}		/* pd->log_head->next */
+
+	spin_unlock_irqrestore(&card->hysdn_lock, flags);
+
 	wake_up_interruptible(&(pd->rd_queue));		/* announce new entry */
 }				/* put_log_buffer */
 

From eb2a6b800c2d1336cce6709d48c42753a611c07b Mon Sep 17 00:00:00 2001
From: Christophe Jaillet <christophe.jaillet@wanadoo.fr>
Date: Mon, 7 Aug 2017 00:00:17 +0200
Subject: [PATCH 184/281] qed: Fix a memory allocation failure test in
 'qed_mcp_cmd_init()'

We allocate 'p_info->mfw_mb_cur' and 'p_info->mfw_mb_shadow' but we check
'p_info->mfw_mb_addr' instead of 'p_info->mfw_mb_cur'.

'p_info->mfw_mb_addr' is never 0, because it is initiliazed a few lines
above in 'qed_load_mcp_offsets()'.

Update the test and check the result of the 2 'kzalloc()' instead.

Signed-off-by: Christophe JAILLET <christophe.jaillet@wanadoo.fr>
Acked-by: Tomer Tayar <Tomer.Tayar@cavium.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/qlogic/qed/qed_mcp.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/net/ethernet/qlogic/qed/qed_mcp.c b/drivers/net/ethernet/qlogic/qed/qed_mcp.c
index 9da91045d167..3eb241657368 100644
--- a/drivers/net/ethernet/qlogic/qed/qed_mcp.c
+++ b/drivers/net/ethernet/qlogic/qed/qed_mcp.c
@@ -253,7 +253,7 @@ int qed_mcp_cmd_init(struct qed_hwfn *p_hwfn, struct qed_ptt *p_ptt)
 	size = MFW_DRV_MSG_MAX_DWORDS(p_info->mfw_mb_length) * sizeof(u32);
 	p_info->mfw_mb_cur = kzalloc(size, GFP_KERNEL);
 	p_info->mfw_mb_shadow = kzalloc(size, GFP_KERNEL);
-	if (!p_info->mfw_mb_shadow || !p_info->mfw_mb_addr)
+	if (!p_info->mfw_mb_cur || !p_info->mfw_mb_shadow)
 		goto err;
 
 	return 0;

From 21da5332327b6d183bd93336ecf29c70bc609b7b Mon Sep 17 00:00:00 2001
From: Matt Redfearn <matt.redfearn@imgtec.com>
Date: Wed, 26 Jul 2017 08:41:08 +0100
Subject: [PATCH 185/281] MIPS: Introduce cpu_tcache_line_size

There exist macros to return the cache line size of the L1 dcache and L2
scache but there is currently no macro for the L3 tcache. Add this macro
which will be used by the following patch "MIPS: PCI: Fix
smp_processor_id() in preemptible"

Signed-off-by: Matt Redfearn <matt.redfearn@imgtec.com>
Cc: Maciej W. Rozycki <macro@imgtec.com>
Cc: James Hogan <james.hogan@imgtec.com>
Cc: Paul Burton <paul.burton@imgtec.com>
Cc: linux-mips@linux-mips.org
Cc: linux-kernel@vger.kernel.org
Patchwork: https://patchwork.linux-mips.org/patch/16871/
Signed-off-by: Ralf Baechle <ralf@linux-mips.org>
---
 arch/mips/include/asm/cpu-features.h | 3 +++
 1 file changed, 3 insertions(+)

diff --git a/arch/mips/include/asm/cpu-features.h b/arch/mips/include/asm/cpu-features.h
index 8baa9033b181..721b698bfe3c 100644
--- a/arch/mips/include/asm/cpu-features.h
+++ b/arch/mips/include/asm/cpu-features.h
@@ -428,6 +428,9 @@
 #ifndef cpu_scache_line_size
 #define cpu_scache_line_size()	cpu_data[0].scache.linesz
 #endif
+#ifndef cpu_tcache_line_size
+#define cpu_tcache_line_size()	cpu_data[0].tcache.linesz
+#endif
 
 #ifndef cpu_hwrena_impl_bits
 #define cpu_hwrena_impl_bits		0

From 735302665c353d6756e7fa2a2cf41b039299f732 Mon Sep 17 00:00:00 2001
From: Matt Redfearn <matt.redfearn@imgtec.com>
Date: Wed, 26 Jul 2017 08:41:09 +0100
Subject: [PATCH 186/281] MIPS: PCI: Fix smp_processor_id() in preemptible

Commit 1c3c5eab1715 ("sched/core: Enable might_sleep() and
smp_processor_id() checks early") enables checks for might_sleep() and
smp_processor_id() being used in preemptible code earlier in the boot
than before. This results in a new BUG from
pcibios_set_cache_line_size().

BUG: using smp_processor_id() in preemptible [00000000] code:
swapper/0/1 caller is pcibios_set_cache_line_size+0x10/0x70
CPU: 1 PID: 1 Comm: swapper/0 Not tainted 4.13.0-rc1-00007-g3ce3e4ba4275 #615
Stack: 0000000000000000 ffffffff81189694 0000000000000000 ffffffff81822318
       000000000000004e 0000000000000001 800000000e20bd08 20c49ba5e3540000
       0000000000000000 0000000000000000 ffffffff818d0000 0000000000000000
       0000000000000000 ffffffff81189328 ffffffff818ce692 0000000000000000
       0000000000000000 ffffffff81189bc8 ffffffff818d0000 0000000000000000
       ffffffff81828907 ffffffff81769970 800000020ec78d80 ffffffff818c7b48
       0000000000000001 0000000000000001 ffffffff818652b0 ffffffff81896268
       ffffffff818c0000 800000020ec7fb40 800000020ec7fc58 ffffffff81684cac
       0000000000000000 ffffffff8118ab50 0000000000000030 ffffffff81769970
       0000000000000001 ffffffff81122a58 0000000000000000 0000000000000000 ...
Call Trace:
[<ffffffff81122a58>] show_stack+0x90/0xb0
[<ffffffff81684cac>] dump_stack+0xac/0xf0
[<ffffffff813f7050>] check_preemption_disabled+0x120/0x128
[<ffffffff818855e8>] pcibios_set_cache_line_size+0x10/0x70
[<ffffffff81100578>] do_one_initcall+0x48/0x140
[<ffffffff81865dc4>] kernel_init_freeable+0x194/0x24c
[<ffffffff8169c534>] kernel_init+0x14/0x118
[<ffffffff8111ca84>] ret_from_kernel_thread+0x14/0x1c

Fix this by using the cpu_*cache_line_size() macros instead. These
macros are the "proper" way to determine the CPU cache sizes.
This makes use of the newly added cpu_tcache_line_size.

Fixes: 1c3c5eab1715 ("sched/core: Enable might_sleep() and smp_processor_id() checks early")
Signed-off-by: Matt Redfearn <matt.redfearn@imgtec.com>
Suggested-by: James Hogan <james.hogan@imgtec.com>
Reviewed-by: James Hogan <james.hogan@imgtec.com>
Signed-off-by: Ralf Baechle <ralf@linux-mips.org>
---
 arch/mips/pci/pci.c | 7 +++----
 1 file changed, 3 insertions(+), 4 deletions(-)

diff --git a/arch/mips/pci/pci.c b/arch/mips/pci/pci.c
index bd67ac74fe2d..9632436d74d7 100644
--- a/arch/mips/pci/pci.c
+++ b/arch/mips/pci/pci.c
@@ -28,16 +28,15 @@ EXPORT_SYMBOL(PCIBIOS_MIN_MEM);
 
 static int __init pcibios_set_cache_line_size(void)
 {
-	struct cpuinfo_mips *c = &current_cpu_data;
 	unsigned int lsize;
 
 	/*
 	 * Set PCI cacheline size to that of the highest level in the
 	 * cache hierarchy.
 	 */
-	lsize = c->dcache.linesz;
-	lsize = c->scache.linesz ? : lsize;
-	lsize = c->tcache.linesz ? : lsize;
+	lsize = cpu_dcache_line_size();
+	lsize = cpu_scache_line_size() ? : lsize;
+	lsize = cpu_tcache_line_size() ? : lsize;
 
 	BUG_ON(!lsize);
 

From ac2b21157a3104ad2daa21c65e6cc73604edba0b Mon Sep 17 00:00:00 2001
From: Kishon Vijay Abraham I <kishon@ti.com>
Date: Tue, 8 Aug 2017 10:48:15 +0530
Subject: [PATCH 187/281] mmc: host: omap_hsmmc: Add CMD23 capability to
 omap_hsmmc driver

omap_hsmmc driver always relied on CMD12 to stop transmission.
However if CMD12 is not issued at the correct timing, the card will
indicate a out of range error. With certain cards in some of the
DRA7 based boards, -EIO error is observed. By Adding CMD23 capability,
the MMC core will send MMC_SET_BLOCK_COUNT command before
MMC_READ_MULTIPLE_BLOCK/MMC_WRITE_MULTIPLE_BLOCK commands.

commit a04e6bae9e6f12 ("mmc: core: check also R1 response for
stop commands") exposed this bug in omap_hsmmc driver.

Signed-off-by: Kishon Vijay Abraham I <kishon@ti.com>
Signed-off-by: Ulf Hansson <ulf.hansson@linaro.org>
---
 drivers/mmc/host/omap_hsmmc.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/mmc/host/omap_hsmmc.c b/drivers/mmc/host/omap_hsmmc.c
index 04ff3c97a535..2ab4788d021f 100644
--- a/drivers/mmc/host/omap_hsmmc.c
+++ b/drivers/mmc/host/omap_hsmmc.c
@@ -2086,7 +2086,7 @@ static int omap_hsmmc_probe(struct platform_device *pdev)
 	mmc->max_seg_size = mmc->max_req_size;
 
 	mmc->caps |= MMC_CAP_MMC_HIGHSPEED | MMC_CAP_SD_HIGHSPEED |
-		     MMC_CAP_WAIT_WHILE_BUSY | MMC_CAP_ERASE;
+		     MMC_CAP_WAIT_WHILE_BUSY | MMC_CAP_ERASE | MMC_CAP_CMD23;
 
 	mmc->caps |= mmc_pdata(host)->caps;
 	if (mmc->caps & MMC_CAP_8_BIT_DATA)

From 785a12afdb4a52903447fd890633c82fdda4b6f7 Mon Sep 17 00:00:00 2001
From: "Gautham R. Shenoy" <ego@linux.vnet.ibm.com>
Date: Tue, 8 Aug 2017 14:13:15 +0530
Subject: [PATCH 188/281] powerpc/powernv/idle: Disable LOSE_FULL_CONTEXT
 states when stop-api fails

Currently, we use the opal call opal_slw_set_reg() to inform the
Sleep-Winkle Engine (SLW) to restore the contents of some of the
Hypervisor state on wakeup from deep idle states that lose full
hypervisor context (characterized by the flag
OPAL_PM_LOSE_FULL_CONTEXT).

However, the current code has a bug in that if opal_slw_set_reg()
fails, we don't disable the use of these deep states (winkle on
POWER8, stop4 onwards on POWER9).

This patch fixes this bug by ensuring that if programing the
sleep-winkle engine to restore the hypervisor states in
pnv_save_sprs_for_deep_states() fails, then we exclude such states by
clearing the OPAL_PM_LOSE_FULL_CONTEXT flag from
supported_cpuidle_states. As a result POWER8 will be prevented from
using winkle for CPU-Hotplug, and POWER9 will put the offlined CPUs to
the default stop state when available.

Further, we ensure in the initialization of the cpuidle-powernv driver
to only include those states whose flags are present in
supported_cpuidle_states, thereby skipping OPAL_PM_LOSE_FULL_CONTEXT
states when they have been disabled due to stop-api failure.

Fixes: 1e1601b38e6 ("powerpc/powernv/idle: Restore SPRs for deep idle
states via stop API.")

Signed-off-by: Gautham R. Shenoy <ego@linux.vnet.ibm.com>
Signed-off-by: Michael Ellerman <mpe@ellerman.id.au>
---
 arch/powerpc/platforms/powernv/idle.c | 41 +++++++++++++++++++++++++--
 drivers/cpuidle/cpuidle-powernv.c     | 10 +++++++
 2 files changed, 48 insertions(+), 3 deletions(-)

diff --git a/arch/powerpc/platforms/powernv/idle.c b/arch/powerpc/platforms/powernv/idle.c
index 2abee070373f..a553aeea7af6 100644
--- a/arch/powerpc/platforms/powernv/idle.c
+++ b/arch/powerpc/platforms/powernv/idle.c
@@ -56,6 +56,7 @@ u64 pnv_first_deep_stop_state = MAX_STOP_STATE;
  */
 static u64 pnv_deepest_stop_psscr_val;
 static u64 pnv_deepest_stop_psscr_mask;
+static u64 pnv_deepest_stop_flag;
 static bool deepest_stop_found;
 
 static int pnv_save_sprs_for_deep_states(void)
@@ -185,8 +186,40 @@ static void pnv_alloc_idle_core_states(void)
 
 	update_subcore_sibling_mask();
 
-	if (supported_cpuidle_states & OPAL_PM_LOSE_FULL_CONTEXT)
-		pnv_save_sprs_for_deep_states();
+	if (supported_cpuidle_states & OPAL_PM_LOSE_FULL_CONTEXT) {
+		int rc = pnv_save_sprs_for_deep_states();
+
+		if (likely(!rc))
+			return;
+
+		/*
+		 * The stop-api is unable to restore hypervisor
+		 * resources on wakeup from platform idle states which
+		 * lose full context. So disable such states.
+		 */
+		supported_cpuidle_states &= ~OPAL_PM_LOSE_FULL_CONTEXT;
+		pr_warn("cpuidle-powernv: Disabling idle states that lose full context\n");
+		pr_warn("cpuidle-powernv: Idle power-savings, CPU-Hotplug affected\n");
+
+		if (cpu_has_feature(CPU_FTR_ARCH_300) &&
+		    (pnv_deepest_stop_flag & OPAL_PM_LOSE_FULL_CONTEXT)) {
+			/*
+			 * Use the default stop state for CPU-Hotplug
+			 * if available.
+			 */
+			if (default_stop_found) {
+				pnv_deepest_stop_psscr_val =
+					pnv_default_stop_val;
+				pnv_deepest_stop_psscr_mask =
+					pnv_default_stop_mask;
+				pr_warn("cpuidle-powernv: Offlined CPUs will stop with psscr = 0x%016llx\n",
+					pnv_deepest_stop_psscr_val);
+			} else { /* Fallback to snooze loop for CPU-Hotplug */
+				deepest_stop_found = false;
+				pr_warn("cpuidle-powernv: Offlined CPUs will busy wait\n");
+			}
+		}
+	}
 }
 
 u32 pnv_get_supported_cpuidle_states(void)
@@ -375,7 +408,8 @@ unsigned long pnv_cpu_offline(unsigned int cpu)
 						pnv_deepest_stop_psscr_val;
 		srr1 = power9_idle_stop(psscr);
 
-	} else if (idle_states & OPAL_PM_WINKLE_ENABLED) {
+	} else if ((idle_states & OPAL_PM_WINKLE_ENABLED) &&
+		   (idle_states & OPAL_PM_LOSE_FULL_CONTEXT)) {
 		srr1 = power7_idle_insn(PNV_THREAD_WINKLE);
 	} else if ((idle_states & OPAL_PM_SLEEP_ENABLED) ||
 		   (idle_states & OPAL_PM_SLEEP_ENABLED_ER1)) {
@@ -553,6 +587,7 @@ static int __init pnv_power9_idle_init(struct device_node *np, u32 *flags,
 			max_residency_ns = residency_ns[i];
 			pnv_deepest_stop_psscr_val = psscr_val[i];
 			pnv_deepest_stop_psscr_mask = psscr_mask[i];
+			pnv_deepest_stop_flag = flags[i];
 			deepest_stop_found = true;
 		}
 
diff --git a/drivers/cpuidle/cpuidle-powernv.c b/drivers/cpuidle/cpuidle-powernv.c
index 37b0698b7193..42896a67aeae 100644
--- a/drivers/cpuidle/cpuidle-powernv.c
+++ b/drivers/cpuidle/cpuidle-powernv.c
@@ -235,6 +235,7 @@ static inline int validate_dt_prop_sizes(const char *prop1, int prop1_len,
 	return -1;
 }
 
+extern u32 pnv_get_supported_cpuidle_states(void);
 static int powernv_add_idle_states(void)
 {
 	struct device_node *power_mgt;
@@ -248,6 +249,8 @@ static int powernv_add_idle_states(void)
 	const char *names[CPUIDLE_STATE_MAX];
 	u32 has_stop_states = 0;
 	int i, rc;
+	u32 supported_flags = pnv_get_supported_cpuidle_states();
+
 
 	/* Currently we have snooze statically defined */
 
@@ -362,6 +365,13 @@ static int powernv_add_idle_states(void)
 	for (i = 0; i < dt_idle_states; i++) {
 		unsigned int exit_latency, target_residency;
 		bool stops_timebase = false;
+
+		/*
+		 * Skip the platform idle state whose flag isn't in
+		 * the supported_cpuidle_states flag mask.
+		 */
+		if ((flags[i] & supported_flags) != flags[i])
+			continue;
 		/*
 		 * If an idle state has exit latency beyond
 		 * POWERNV_THRESHOLD_LATENCY_NS then don't use it

From 527f10285bc3f700407bf3aab0ea7b3b9f9338da Mon Sep 17 00:00:00 2001
From: Paul Burton <paul.burton@imgtec.com>
Date: Mon, 7 Aug 2017 16:18:04 -0700
Subject: [PATCH 189/281] MIPS: Prevent building MT support for microMIPS
 kernels

We don't currently support the MT ASE for microMIPS kernels, and there
are no CPUs currently in existence that use both. They can however both
be enabled in Kconfig, resulting in build failures such as:

  AS      arch/mips/kernel/cps-vec.o
arch/mips/kernel/cps-vec.S: Assembler messages:
arch/mips/kernel/cps-vec.S:242: Warning: the 32-bit microMIPS architecture does not support the `mt' extension
arch/mips/kernel/cps-vec.S:276: Error: unrecognized opcode `mttc0 $13,$2,2'
arch/mips/kernel/cps-vec.S:282: Error: unrecognized opcode `mttc0 $8,$1,2'
arch/mips/kernel/cps-vec.S:285: Error: unrecognized opcode `mttc0 $0,$2,1'
...

Fix this by preventing MT from being enabled when targeting microMIPS.

Signed-off-by: Paul Burton <paul.burton@imgtec.com>
Cc: linux-mips@linux-mips.org
Patchwork: https://patchwork.linux-mips.org/patch/16951/
Signed-off-by: Ralf Baechle <ralf@linux-mips.org>
---
 arch/mips/Kconfig | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/arch/mips/Kconfig b/arch/mips/Kconfig
index 8dd20358464f..48d91d5be4e9 100644
--- a/arch/mips/Kconfig
+++ b/arch/mips/Kconfig
@@ -2260,7 +2260,7 @@ config CPU_R4K_CACHE_TLB
 
 config MIPS_MT_SMP
 	bool "MIPS MT SMP support (1 TC on each available VPE)"
-	depends on SYS_SUPPORTS_MULTITHREADING && !CPU_MIPSR6
+	depends on SYS_SUPPORTS_MULTITHREADING && !CPU_MIPSR6 && !CPU_MICROMIPS
 	select CPU_MIPSR2_IRQ_VI
 	select CPU_MIPSR2_IRQ_EI
 	select SYNC_R4K

From 5fc9484f5e41b239d1e7a123219e53f333e43ba5 Mon Sep 17 00:00:00 2001
From: Paul Burton <paul.burton@imgtec.com>
Date: Mon, 7 Aug 2017 16:16:47 -0700
Subject: [PATCH 190/281] MIPS: Set ISA bit in entry-y for microMIPS kernels

When building a kernel for the microMIPS ISA, ensure that the ISA bit
(ie. bit 0) in the entry address is set. Otherwise we may include an
entry address in images which bootloaders will jump to as MIPS32 code.

I originally tried using "objdump -f" to obtain the entry address, which
works for microMIPS but it always outputs a 32 bit address for a 32 bit
ELF whilst nm will sign extend to 64 bit. That matters for systems where
we might want to run a MIPS32 kernel on a MIPS64 CPU & load it with a
MIPS64 bootloader, which would then jump to a non-canonical
(non-sign-extended) address.

This works in all cases as it only changes the behaviour for microMIPS
kernels, but isn't the prettiest solution. A possible alternative would
be to write a custom tool to just extract, sign extend & print the entry
point of an ELF executable. I'm open to feedback if that would be
preferred.

Signed-off-by: Paul Burton <paul.burton@imgtec.com>
Cc: linux-mips@linux-mips.org
Patchwork: https://patchwork.linux-mips.org/patch/16950/
Signed-off-by: Ralf Baechle <ralf@linux-mips.org>
---
 arch/mips/Makefile | 15 ++++++++++++++-
 1 file changed, 14 insertions(+), 1 deletion(-)

diff --git a/arch/mips/Makefile b/arch/mips/Makefile
index 04343625b929..bc2708c9ada4 100644
--- a/arch/mips/Makefile
+++ b/arch/mips/Makefile
@@ -243,8 +243,21 @@ include arch/mips/Kbuild.platforms
 ifdef CONFIG_PHYSICAL_START
 load-y					= $(CONFIG_PHYSICAL_START)
 endif
-entry-y				= 0x$(shell $(NM) vmlinux 2>/dev/null \
+
+entry-noisa-y				= 0x$(shell $(NM) vmlinux 2>/dev/null \
 					| grep "\bkernel_entry\b" | cut -f1 -d \ )
+ifdef CONFIG_CPU_MICROMIPS
+  #
+  # Set the ISA bit, since the kernel_entry symbol in the ELF will have it
+  # clear which would lead to images containing addresses which bootloaders may
+  # jump to as MIPS32 code.
+  #
+  entry-y = $(patsubst %0,%1,$(patsubst %2,%3,$(patsubst %4,%5, \
+              $(patsubst %6,%7,$(patsubst %8,%9,$(patsubst %a,%b, \
+              $(patsubst %c,%d,$(patsubst %e,%f,$(entry-noisa-y)))))))))
+else
+  entry-y = $(entry-noisa-y)
+endif
 
 cflags-y			+= -I$(srctree)/arch/mips/include/asm/mach-generic
 drivers-$(CONFIG_PCI)		+= arch/mips/pci/

From d6f756e09f01ea7a0efbbcef269a1e384a35d824 Mon Sep 17 00:00:00 2001
From: "Wladimir J. van der Laan" <laanwj@gmail.com>
Date: Tue, 25 Jul 2017 14:33:36 +0200
Subject: [PATCH 191/281] drm/etnaviv: Fix off-by-one error in reloc checking

A relocation pointing to the last four bytes of a buffer can
legitimately happen in the case of small vertex buffers.

CC: stable@vger.kernel.org #4.9+
Signed-off-by: Wladimir J. van der Laan <laanwj@gmail.com>
Reviewed-by: Philipp Zabel <p.zabel@pengutronix.de>
Reviewed-by: Christian Gmeiner <christian.gmeiner@gmail.com>
Signed-off-by: Lucas Stach <l.stach@pengutronix.de>
---
 drivers/gpu/drm/etnaviv/etnaviv_gem_submit.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/drivers/gpu/drm/etnaviv/etnaviv_gem_submit.c b/drivers/gpu/drm/etnaviv/etnaviv_gem_submit.c
index 5bd93169dac2..6463fc2c736f 100644
--- a/drivers/gpu/drm/etnaviv/etnaviv_gem_submit.c
+++ b/drivers/gpu/drm/etnaviv/etnaviv_gem_submit.c
@@ -270,8 +270,8 @@ static int submit_reloc(struct etnaviv_gem_submit *submit, void *stream,
 		if (ret)
 			return ret;
 
-		if (r->reloc_offset >= bo->obj->base.size - sizeof(*ptr)) {
-			DRM_ERROR("relocation %u outside object", i);
+		if (r->reloc_offset > bo->obj->base.size - sizeof(*ptr)) {
+			DRM_ERROR("relocation %u outside object\n", i);
 			return -EINVAL;
 		}
 

From 51d96dc2e2dc2cf9b81cf976cc93c51ba3ac2f92 Mon Sep 17 00:00:00 2001
From: Helge Deller <deller@gmx.de>
Date: Tue, 8 Aug 2017 18:28:41 +0200
Subject: [PATCH 192/281] random: fix warning message on ia64 and parisc

Fix the warning message on the parisc and IA64 architectures to show the
correct function name of the caller by using %pS instead of %pF. The
message is printed with the value of _RET_IP_ which calls
__builtin_return_address(0) and as such returns the IP address caller
instead of pointer to a function descriptor of the caller.

The effect of this patch is visible on the parisc and ia64 architectures
only since those are the ones which use function descriptors while on
all others %pS and %pF will behave the same.

Cc: Theodore Ts'o <tytso@mit.edu>
Cc: Jason A. Donenfeld <Jason@zx2c4.com>
Signed-off-by: Helge Deller <deller@gmx.de>
Fixes: eecabf567422 ("random: suppress spammy warnings about unseeded randomness")
Fixes: d06bfd1989fe ("random: warn when kernel uses unseeded randomness")
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 drivers/char/random.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/char/random.c b/drivers/char/random.c
index afa3ce7d3e72..8ad92707e45f 100644
--- a/drivers/char/random.c
+++ b/drivers/char/random.c
@@ -1492,7 +1492,7 @@ static void _warn_unseeded_randomness(const char *func_name, void *caller,
 #ifndef CONFIG_WARN_ALL_UNSEEDED_RANDOM
 	print_once = true;
 #endif
-	pr_notice("random: %s called from %pF with crng_init=%d\n",
+	pr_notice("random: %s called from %pS with crng_init=%d\n",
 		  func_name, caller, crng_init);
 }
 

From 92ddd95919466de5d34f3cb43635da9a7f9ab814 Mon Sep 17 00:00:00 2001
From: Haibo Chen <haibo.chen@nxp.com>
Date: Tue, 8 Aug 2017 18:54:01 +0800
Subject: [PATCH 193/281] mmc: mmc: correct the logic for setting HS400ES
 signal voltage

Change the default err value to -EINVAL, make sure the card only
has type EXT_CSD_CARD_TYPE_HS400_1_8V also do the signal voltage
setting when select hs400es mode.

Fixes: commit 1720d3545b77 ("mmc: core: switch to 1V8 or 1V2 for hs400es mode")
Cc: <stable@vger.kernel.org>
Signed-off-by: Haibo Chen <haibo.chen@nxp.com>
Reviewed-by: Shawn Lin <shawn.lin@rock-chips.com>
Signed-off-by: Ulf Hansson <ulf.hansson@linaro.org>
---
 drivers/mmc/core/mmc.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/mmc/core/mmc.c b/drivers/mmc/core/mmc.c
index 4ffea14b7eb6..2bae69e39544 100644
--- a/drivers/mmc/core/mmc.c
+++ b/drivers/mmc/core/mmc.c
@@ -1289,7 +1289,7 @@ out_err:
 static int mmc_select_hs400es(struct mmc_card *card)
 {
 	struct mmc_host *host = card->host;
-	int err = 0;
+	int err = -EINVAL;
 	u8 val;
 
 	if (!(host->caps & MMC_CAP_8_BIT_DATA)) {

From a1ffc2d25ae98878f37445d223b3344686b5c822 Mon Sep 17 00:00:00 2001
From: Sedat Dilek <sedat.dilek@gmail.com>
Date: Tue, 25 Jul 2017 14:53:42 +0200
Subject: [PATCH 194/281] MAINTAINERS: greybus: Fix typo s/LOOBACK/LOOPBACK

Fixes: f47e07bc5f1a5c48 ("Fix up MAINTAINERS file problems")
Cc: Joe Perches <joe@perches.com>
Signed-off-by: Sedat Dilek <sedat.dilek@gmail.com>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 MAINTAINERS | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/MAINTAINERS b/MAINTAINERS
index 44cb004c765d..01609e368bc7 100644
--- a/MAINTAINERS
+++ b/MAINTAINERS
@@ -5834,7 +5834,7 @@ F:	drivers/staging/greybus/spi.c
 F:	drivers/staging/greybus/spilib.c
 F:	drivers/staging/greybus/spilib.h
 
-GREYBUS LOOBACK/TIME PROTOCOLS DRIVERS
+GREYBUS LOOPBACK/TIME PROTOCOLS DRIVERS
 M:	Bryan O'Donoghue <pure.logic@nexus-software.ie>
 S:	Maintained
 F:	drivers/staging/greybus/loopback.c

From 6209ef67883ae6407cceb557b9ec4b2117d91697 Mon Sep 17 00:00:00 2001
From: Randy Dunlap <rdunlap@infradead.org>
Date: Wed, 2 Aug 2017 10:57:45 -0700
Subject: [PATCH 195/281] MAINTAINERS: openbmc mailing list is moderated

The openbmc mailing list is moderated for non-subscribers.

Signed-off-by: Randy Dunlap <rdunlap@infradead.org>
Acked-by: Brendan Higgins <brendanhiggins@google.com>
Cc: Benjamin Herrenschmidt <benh@kernel.crashing.org>
Cc: Joel Stanley <joel@jms.id.au>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 MAINTAINERS | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/MAINTAINERS b/MAINTAINERS
index 01609e368bc7..3c419022ed93 100644
--- a/MAINTAINERS
+++ b/MAINTAINERS
@@ -1161,7 +1161,7 @@ M:	Brendan Higgins <brendanhiggins@google.com>
 R:	Benjamin Herrenschmidt <benh@kernel.crashing.org>
 R:	Joel Stanley <joel@jms.id.au>
 L:	linux-i2c@vger.kernel.org
-L:	openbmc@lists.ozlabs.org
+L:	openbmc@lists.ozlabs.org (moderated for non-subscribers)
 S:	Maintained
 F:	drivers/irqchip/irq-aspeed-i2c-ic.c
 F:	drivers/i2c/busses/i2c-aspeed.c

From 6f7d98ec445b61f8f416fedfe607cb4f1653a8bc Mon Sep 17 00:00:00 2001
From: Joe Perches <joe@perches.com>
Date: Fri, 4 Aug 2017 21:45:48 -0700
Subject: [PATCH 196/281] get_maintainer: Prepare for separate MAINTAINERS
 files

Allow for MAINTAINERS to become a directory and if it is,
read all the files in the directory for maintained sections.

Optionally look for all files named MAINTAINERS in directories
excluding the .git directory by using --find-maintainer-files.

This optional feature adds ~.3 seconds of CPU on an Intel
i5-6200 with an SSD.

Miscellanea:

 - Create a read_maintainer_file subroutine from the existing code
 - Test only the existence of MAINTAINERS, not whether it's a file

Signed-off-by: Joe Perches <joe@perches.com>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 scripts/get_maintainer.pl | 87 ++++++++++++++++++++++++++++-----------
 1 file changed, 64 insertions(+), 23 deletions(-)

diff --git a/scripts/get_maintainer.pl b/scripts/get_maintainer.pl
index 3bd5f4f30235..bc443201d3ef 100755
--- a/scripts/get_maintainer.pl
+++ b/scripts/get_maintainer.pl
@@ -18,6 +18,7 @@ my $V = '0.26';
 
 use Getopt::Long qw(:config no_auto_abbrev);
 use Cwd;
+use File::Find;
 
 my $cur_path = fastgetcwd() . '/';
 my $lk_path = "./";
@@ -58,6 +59,7 @@ my $from_filename = 0;
 my $pattern_depth = 0;
 my $version = 0;
 my $help = 0;
+my $find_maintainer_files = 0;
 
 my $vcs_used = 0;
 
@@ -249,6 +251,7 @@ if (!GetOptions(
 		'sections!' => \$sections,
 		'fe|file-emails!' => \$file_emails,
 		'f|file' => \$from_filename,
+		'find-maintainer-files' => \$find_maintainer_files,
 		'v|version' => \$version,
 		'h|help|usage' => \$help,
 		)) {
@@ -307,36 +310,74 @@ if (!top_of_kernel_tree($lk_path)) {
 
 my @typevalue = ();
 my %keyword_hash;
+my @mfiles = ();
 
-open (my $maint, '<', "${lk_path}MAINTAINERS")
-    or die "$P: Can't open MAINTAINERS: $!\n";
-while (<$maint>) {
-    my $line = $_;
+sub read_maintainer_file {
+    my ($file) = @_;
 
-    if ($line =~ m/^([A-Z]):\s*(.*)/) {
-	my $type = $1;
-	my $value = $2;
+    open (my $maint, '<', "$file")
+	or die "$P: Can't open MAINTAINERS file '$file': $!\n";
+    while (<$maint>) {
+	my $line = $_;
 
-	##Filename pattern matching
-	if ($type eq "F" || $type eq "X") {
-	    $value =~ s@\.@\\\.@g;       ##Convert . to \.
-	    $value =~ s/\*/\.\*/g;       ##Convert * to .*
-	    $value =~ s/\?/\./g;         ##Convert ? to .
-	    ##if pattern is a directory and it lacks a trailing slash, add one
-	    if ((-d $value)) {
-		$value =~ s@([^/])$@$1/@;
+	if ($line =~ m/^([A-Z]):\s*(.*)/) {
+	    my $type = $1;
+	    my $value = $2;
+
+	    ##Filename pattern matching
+	    if ($type eq "F" || $type eq "X") {
+		$value =~ s@\.@\\\.@g;       ##Convert . to \.
+		$value =~ s/\*/\.\*/g;       ##Convert * to .*
+		$value =~ s/\?/\./g;         ##Convert ? to .
+		##if pattern is a directory and it lacks a trailing slash, add one
+		if ((-d $value)) {
+		    $value =~ s@([^/])$@$1/@;
+		}
+	    } elsif ($type eq "K") {
+		$keyword_hash{@typevalue} = $value;
 	    }
-	} elsif ($type eq "K") {
-	    $keyword_hash{@typevalue} = $value;
+	    push(@typevalue, "$type:$value");
+	} elsif (!(/^\s*$/ || /^\s*\#/)) {
+	    $line =~ s/\n$//g;
+	    push(@typevalue, $line);
 	}
-	push(@typevalue, "$type:$value");
-    } elsif (!/^(\s)*$/) {
-	$line =~ s/\n$//g;
-	push(@typevalue, $line);
+    }
+    close($maint);
+}
+
+sub find_is_maintainer_file {
+    my ($file) = $_;
+    return if ($file !~ m@/MAINTAINERS$@);
+    $file = $File::Find::name;
+    return if (! -f $file);
+    push(@mfiles, $file);
+}
+
+sub find_ignore_git {
+    return grep { $_ !~ /^\.git$/; } @_;
+}
+
+if (-d "${lk_path}MAINTAINERS") {
+    opendir(DIR, "${lk_path}MAINTAINERS") or die $!;
+    my @files = readdir(DIR);
+    closedir(DIR);
+    foreach my $file (@files) {
+	push(@mfiles, "${lk_path}MAINTAINERS/$file") if ($file !~ /^\./);
     }
 }
-close($maint);
 
+if ($find_maintainer_files) {
+    find( { wanted => \&find_is_maintainer_file,
+	    preprocess => \&find_ignore_git,
+	    no_chdir => 1,
+	}, "${lk_path}");
+} else {
+    push(@mfiles, "${lk_path}MAINTAINERS") if -f "${lk_path}MAINTAINERS";
+}
+
+foreach my $file (@mfiles) {
+    read_maintainer_file("$file");
+}
 
 #
 # Read mail address map
@@ -873,7 +914,7 @@ sub top_of_kernel_tree {
     if (   (-f "${lk_path}COPYING")
 	&& (-f "${lk_path}CREDITS")
 	&& (-f "${lk_path}Kbuild")
-	&& (-f "${lk_path}MAINTAINERS")
+	&& (-e "${lk_path}MAINTAINERS")
 	&& (-f "${lk_path}Makefile")
 	&& (-f "${lk_path}README")
 	&& (-d "${lk_path}Documentation")

From 61f741645a354d91fece7b9cbb2f3f3587db8b8a Mon Sep 17 00:00:00 2001
From: Joe Perches <joe@perches.com>
Date: Sat, 5 Aug 2017 18:45:47 -0700
Subject: [PATCH 197/281] parse-maintainers: Add section pattern sorting

Section [A-Z]: patterns are not currently in any required sorting order.
Add a specific sorting sequence to MAINTAINERS entries.
Sort F: and X: patterns in alphabetic order.

The preferred section ordering is:

  SECTION HEADER
  M:	Maintainers
  R:	Reviewers
  P:	Named persons without email addresses
  L:	Mailing list addresses
  S:	Status of this section (Supported, Maintained, Orphan, etc...)
  W:	Any relevant URLs
  T:	Source code control type (git, quilt, etc)
  Q:	Patchwork patch acceptance queue site
  B:	Bug tracking URIs
  C:	Chat URIs
  F:	Files with wildcard patterns (alphabetic ordered)
  X:	Excluded files with wildcard patterns (alphabetic ordered)
  N:	Files with regex patterns
  K:	Keyword regexes in source code for maintainership identification

Miscellaneous perl neatening:

 - Rename %map to %hash, map has a different meaning in perl
 - Avoid using \& and local variables for function indirection
 - Use return for a little c like clarity
 - Use c-like function call style instead of &function

Signed-off-by: Joe Perches <joe@perches.com>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 scripts/parse-maintainers.pl | 70 +++++++++++++++++++++++++-----------
 1 file changed, 49 insertions(+), 21 deletions(-)

diff --git a/scripts/parse-maintainers.pl b/scripts/parse-maintainers.pl
index a0fe34349b24..5c8e0504c67e 100644
--- a/scripts/parse-maintainers.pl
+++ b/scripts/parse-maintainers.pl
@@ -2,9 +2,9 @@
 
 use strict;
 
-my %map;
+my %hash;
 
-# sort comparison function
+# sort comparison functions
 sub by_category($$) {
     my ($a, $b) = @_;
 
@@ -15,20 +15,47 @@ sub by_category($$) {
     $a =~ s/THE REST/ZZZZZZ/g;
     $b =~ s/THE REST/ZZZZZZ/g;
 
-    $a cmp $b;
+    return $a cmp $b;
+}
+
+sub by_pattern($$) {
+    my ($a, $b) = @_;
+    my $preferred_order = 'MRPLSWTQBCFXNK';
+
+    my $a1 = uc(substr($a, 0, 1));
+    my $b1 = uc(substr($b, 0, 1));
+
+    my $a_index = index($preferred_order, $a1);
+    my $b_index = index($preferred_order, $b1);
+
+    $a_index = 1000 if ($a_index == -1);
+    $b_index = 1000 if ($b_index == -1);
+
+    if (($a1 =~ /^F$/ && $b1 =~ /^F$/) ||
+	($a1 =~ /^X$/ && $b1 =~ /^X$/)) {
+	return $a cmp $b;
+    }
+
+    if ($a_index < $b_index) {
+	return -1;
+    } elsif ($a_index == $b_index) {
+	return 0;
+    } else {
+	return 1;
+    }
 }
 
 sub alpha_output {
-    my $key;
-    my $sort_method = \&by_category;
-    my $sep = "";
-
-    foreach $key (sort $sort_method keys %map) {
-        if ($key ne " ") {
-            print $sep . $key . "\n";
-            $sep = "\n";
-        }
-        print $map{$key};
+    foreach my $key (sort by_category keys %hash) {
+	if ($key eq " ") {
+	    chomp $hash{$key};
+	    print $hash{$key};
+	} else {
+	    print "\n" . $key . "\n";
+	    foreach my $pattern (sort by_pattern split('\n', $hash{$key})) {
+		print($pattern . "\n");
+	    }
+	}
     }
 }
 
@@ -42,7 +69,7 @@ sub trim {
 sub file_input {
     my $lastline = "";
     my $case = " ";
-    $map{$case} = "";
+    $hash{$case} = "";
 
     while (<>) {
         my $line = $_;
@@ -51,27 +78,28 @@ sub file_input {
         if ($line =~ m/^([A-Z]):\s*(.*)/) {
             $line = $1 . ":\t" . trim($2) . "\n";
             if ($lastline eq "") {
-                $map{$case} = $map{$case} . $line;
+                $hash{$case} = $hash{$case} . $line;
                 next;
             }
             $case = trim($lastline);
-            exists $map{$case} and die "Header '$case' already exists";
-            $map{$case} = $line;
+            exists $hash{$case} and die "Header '$case' already exists";
+            $hash{$case} = $line;
             $lastline = "";
             next;
         }
 
         if ($case eq " ") {
-            $map{$case} = $map{$case} . $lastline;
+            $hash{$case} = $hash{$case} . $lastline;
             $lastline = $line;
             next;
         }
         trim($lastline) eq "" or die ("Odd non-pattern line '$lastline' for '$case'");
         $lastline = $line;
     }
-    $map{$case} = $map{$case} . $lastline;
+    $hash{$case} = $hash{$case} . $lastline;
 }
 
-&file_input;
-&alpha_output;
+file_input();
+alpha_output();
+
 exit(0);

From fe9090301fed202a80a6113534efaa0d9184543b Mon Sep 17 00:00:00 2001
From: Joe Perches <joe@perches.com>
Date: Sat, 5 Aug 2017 18:45:48 -0700
Subject: [PATCH 198/281] parse-maintainers: Use perl hash references and
 specific filenames

Instead of reading STDIN and writing STDOUT, use specific filenames of
MAINTAINERS and MAINTAINERS.new.

Use hash references instead of global hash %hash so future modifications
can read and write specific hashes to split up MAINTAINERS into multiple
files using a script.

Signed-off-by: Joe Perches <joe@perches.com>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 scripts/parse-maintainers.pl | 59 +++++++++++++++++++++---------------
 1 file changed, 35 insertions(+), 24 deletions(-)

diff --git a/scripts/parse-maintainers.pl b/scripts/parse-maintainers.pl
index 5c8e0504c67e..c286154a2b68 100644
--- a/scripts/parse-maintainers.pl
+++ b/scripts/parse-maintainers.pl
@@ -2,7 +2,7 @@
 
 use strict;
 
-my %hash;
+my $P = $0;
 
 # sort comparison functions
 sub by_category($$) {
@@ -45,20 +45,6 @@ sub by_pattern($$) {
     }
 }
 
-sub alpha_output {
-    foreach my $key (sort by_category keys %hash) {
-	if ($key eq " ") {
-	    chomp $hash{$key};
-	    print $hash{$key};
-	} else {
-	    print "\n" . $key . "\n";
-	    foreach my $pattern (sort by_pattern split('\n', $hash{$key})) {
-		print($pattern . "\n");
-	    }
-	}
-    }
-}
-
 sub trim {
     my $s = shift;
     $s =~ s/\s+$//;
@@ -66,40 +52,65 @@ sub trim {
     return $s;
 }
 
+sub alpha_output {
+    my ($hashref, $filename) = (@_);
+
+    open(my $file, '>', "$filename") or die "$P: $filename: open failed - $!\n";
+    foreach my $key (sort by_category keys %$hashref) {
+	if ($key eq " ") {
+	    chomp $$hashref{$key};
+	    print $file $$hashref{$key};
+	} else {
+	    print $file "\n" . $key . "\n";
+	    foreach my $pattern (sort by_pattern split('\n', %$hashref{$key})) {
+		print $file ($pattern . "\n");
+	    }
+	}
+    }
+    close($file);
+}
+
 sub file_input {
+    my ($hashref, $filename) = (@_);
+
     my $lastline = "";
     my $case = " ";
-    $hash{$case} = "";
+    $$hashref{$case} = "";
 
-    while (<>) {
+    open(my $file, '<', "$filename") or die "$P: $filename: open failed - $!\n";
+
+    while (<$file>) {
         my $line = $_;
 
         # Pattern line?
         if ($line =~ m/^([A-Z]):\s*(.*)/) {
             $line = $1 . ":\t" . trim($2) . "\n";
             if ($lastline eq "") {
-                $hash{$case} = $hash{$case} . $line;
+                $$hashref{$case} = $$hashref{$case} . $line;
                 next;
             }
             $case = trim($lastline);
-            exists $hash{$case} and die "Header '$case' already exists";
-            $hash{$case} = $line;
+            exists $$hashref{$case} and die "Header '$case' already exists";
+            $$hashref{$case} = $line;
             $lastline = "";
             next;
         }
 
         if ($case eq " ") {
-            $hash{$case} = $hash{$case} . $lastline;
+            $$hashref{$case} = $$hashref{$case} . $lastline;
             $lastline = $line;
             next;
         }
         trim($lastline) eq "" or die ("Odd non-pattern line '$lastline' for '$case'");
         $lastline = $line;
     }
-    $hash{$case} = $hash{$case} . $lastline;
+    $$hashref{$case} = $$hashref{$case} . $lastline;
+    close($file);
 }
 
-file_input();
-alpha_output();
+my %hash;
+
+file_input(\%hash, "MAINTAINERS");
+alpha_output(\%hash, "MAINTAINERS.new");
 
 exit(0);

From b95c29a20f070babfea92ab8f741ec94695617d3 Mon Sep 17 00:00:00 2001
From: Joe Perches <joe@perches.com>
Date: Sat, 5 Aug 2017 18:45:49 -0700
Subject: [PATCH 199/281] parse-maintainers: Move matching sections from
 MAINTAINERS

Allow any number of command line arguments to match either the
section header or the section contents and create new files.

Create MAINTAINERS.new and SECTION.new.

This allows scripting of the movement of various sections from
MAINTAINERS.

Signed-off-by: Joe Perches <joe@perches.com>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 scripts/parse-maintainers.pl | 12 ++++++++++++
 1 file changed, 12 insertions(+)

diff --git a/scripts/parse-maintainers.pl b/scripts/parse-maintainers.pl
index c286154a2b68..e40b53db7f9f 100644
--- a/scripts/parse-maintainers.pl
+++ b/scripts/parse-maintainers.pl
@@ -109,8 +109,20 @@ sub file_input {
 }
 
 my %hash;
+my %new_hash;
 
 file_input(\%hash, "MAINTAINERS");
+
+foreach my $type (@ARGV) {
+    foreach my $key (keys %hash) {
+	if ($key =~ /$type/ || $hash{$key} =~ /$type/) {
+	    $new_hash{$key} = $hash{$key};
+	    delete $hash{$key};
+	}
+    }
+}
+
 alpha_output(\%hash, "MAINTAINERS.new");
+alpha_output(\%new_hash, "SECTION.new");
 
 exit(0);

From 1feb26162bee7b2f110facfec71b5c7bdbc7d14d Mon Sep 17 00:00:00 2001
From: Weston Andros Adamson <dros@monkey.org>
Date: Tue, 1 Aug 2017 16:25:01 -0400
Subject: [PATCH 200/281] nfs/flexfiles: fix leak of nfs4_ff_ds_version arrays

The client was freeing the nfs4_ff_layout_ds, but not the contained
nfs4_ff_ds_version array.

Signed-off-by: Weston Andros Adamson <dros@primarydata.com>
Cc: stable@vger.kernel.org # v4.0+
Signed-off-by: Anna Schumaker <Anna.Schumaker@Netapp.com>
---
 fs/nfs/flexfilelayout/flexfilelayoutdev.c | 1 +
 1 file changed, 1 insertion(+)

diff --git a/fs/nfs/flexfilelayout/flexfilelayoutdev.c b/fs/nfs/flexfilelayout/flexfilelayoutdev.c
index 6df7a0cf5660..f32c58bbe556 100644
--- a/fs/nfs/flexfilelayout/flexfilelayoutdev.c
+++ b/fs/nfs/flexfilelayout/flexfilelayoutdev.c
@@ -32,6 +32,7 @@ void nfs4_ff_layout_free_deviceid(struct nfs4_ff_layout_ds *mirror_ds)
 {
 	nfs4_print_deviceid(&mirror_ds->id_node.deviceid);
 	nfs4_pnfs_ds_put(mirror_ds->ds);
+	kfree(mirror_ds->ds_versions);
 	kfree_rcu(mirror_ds, id_node.rcu);
 }
 

From 1899bd57570a3e610db574b57d1e7e66378aa908 Mon Sep 17 00:00:00 2001
From: Marek Szyprowski <m.szyprowski@samsung.com>
Date: Wed, 12 Jul 2017 12:09:22 +0200
Subject: [PATCH 201/281] drm/exynos: forbid creating framebuffers from too
 small GEM buffers

Add a check if the framebuffer described by the provided drm_mode_fb_cmd2
structure fits into provided GEM buffers. Without this check it is
possible to create a framebuffer object from a small buffer and set it to
the hardware, what results in displaying system memory outside the
allocated GEM buffer.

Signed-off-by: Marek Szyprowski <m.szyprowski@samsung.com>
Reviewed-by: Tobias Jakobi <tjakobi@math.uni-bielefeld.de>
Signed-off-by: Inki Dae <inki.dae@samsung.com>
---
 drivers/gpu/drm/exynos/exynos_drm_fb.c | 14 +++++++++++++-
 1 file changed, 13 insertions(+), 1 deletion(-)

diff --git a/drivers/gpu/drm/exynos/exynos_drm_fb.c b/drivers/gpu/drm/exynos/exynos_drm_fb.c
index d48fd7c918f8..73217c281c9a 100644
--- a/drivers/gpu/drm/exynos/exynos_drm_fb.c
+++ b/drivers/gpu/drm/exynos/exynos_drm_fb.c
@@ -145,13 +145,19 @@ static struct drm_framebuffer *
 exynos_user_fb_create(struct drm_device *dev, struct drm_file *file_priv,
 		      const struct drm_mode_fb_cmd2 *mode_cmd)
 {
+	const struct drm_format_info *info = drm_get_format_info(dev, mode_cmd);
 	struct exynos_drm_gem *exynos_gem[MAX_FB_BUFFER];
 	struct drm_gem_object *obj;
 	struct drm_framebuffer *fb;
 	int i;
 	int ret;
 
-	for (i = 0; i < drm_format_num_planes(mode_cmd->pixel_format); i++) {
+	for (i = 0; i < info->num_planes; i++) {
+		unsigned int height = (i == 0) ? mode_cmd->height :
+				     DIV_ROUND_UP(mode_cmd->height, info->vsub);
+		unsigned long size = height * mode_cmd->pitches[i] +
+				     mode_cmd->offsets[i];
+
 		obj = drm_gem_object_lookup(file_priv, mode_cmd->handles[i]);
 		if (!obj) {
 			DRM_ERROR("failed to lookup gem object\n");
@@ -160,6 +166,12 @@ exynos_user_fb_create(struct drm_device *dev, struct drm_file *file_priv,
 		}
 
 		exynos_gem[i] = to_exynos_gem(obj);
+
+		if (size > exynos_gem[i]->size) {
+			i++;
+			ret = -EINVAL;
+			goto err;
+		}
 	}
 
 	fb = exynos_drm_framebuffer_init(dev, mode_cmd, exynos_gem, i);

From e718fe450e616227b74d27a233cdf37b4df0c82b Mon Sep 17 00:00:00 2001
From: Davide Caratti <dcaratti@redhat.com>
Date: Thu, 3 Aug 2017 22:54:48 +0200
Subject: [PATCH 202/281] net/mlx4_en: don't set CHECKSUM_COMPLETE on SCTP
 packets

if the NIC fails to validate the checksum on TCP/UDP, and validation of IP
checksum is successful, the driver subtracts the pseudo-header checksum
from the value obtained by the hardware and sets CHECKSUM_COMPLETE. Don't
do that if protocol is IPPROTO_SCTP, otherwise CRC32c validation fails.

V2: don't test MLX4_CQE_STATUS_IPV6 if MLX4_CQE_STATUS_IPV4 is set

Reported-by: Shuang Li <shuali@redhat.com>
Fixes: f8c6455bb04b ("net/mlx4_en: Extend checksum offloading by CHECKSUM COMPLETE")
Signed-off-by: Davide Caratti <dcaratti@redhat.com>
Acked-by: Saeed Mahameed <saeedm@mellanox.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/mellanox/mlx4/en_rx.c | 29 ++++++++++++++--------
 1 file changed, 18 insertions(+), 11 deletions(-)

diff --git a/drivers/net/ethernet/mellanox/mlx4/en_rx.c b/drivers/net/ethernet/mellanox/mlx4/en_rx.c
index 436f7689a032..bf1638044a7a 100644
--- a/drivers/net/ethernet/mellanox/mlx4/en_rx.c
+++ b/drivers/net/ethernet/mellanox/mlx4/en_rx.c
@@ -574,16 +574,21 @@ static inline __wsum get_fixed_vlan_csum(__wsum hw_checksum,
  * header, the HW adds it. To address that, we are subtracting the pseudo
  * header checksum from the checksum value provided by the HW.
  */
-static void get_fixed_ipv4_csum(__wsum hw_checksum, struct sk_buff *skb,
-				struct iphdr *iph)
+static int get_fixed_ipv4_csum(__wsum hw_checksum, struct sk_buff *skb,
+			       struct iphdr *iph)
 {
 	__u16 length_for_csum = 0;
 	__wsum csum_pseudo_header = 0;
+	__u8 ipproto = iph->protocol;
+
+	if (unlikely(ipproto == IPPROTO_SCTP))
+		return -1;
 
 	length_for_csum = (be16_to_cpu(iph->tot_len) - (iph->ihl << 2));
 	csum_pseudo_header = csum_tcpudp_nofold(iph->saddr, iph->daddr,
-						length_for_csum, iph->protocol, 0);
+						length_for_csum, ipproto, 0);
 	skb->csum = csum_sub(hw_checksum, csum_pseudo_header);
+	return 0;
 }
 
 #if IS_ENABLED(CONFIG_IPV6)
@@ -594,17 +599,20 @@ static void get_fixed_ipv4_csum(__wsum hw_checksum, struct sk_buff *skb,
 static int get_fixed_ipv6_csum(__wsum hw_checksum, struct sk_buff *skb,
 			       struct ipv6hdr *ipv6h)
 {
+	__u8 nexthdr = ipv6h->nexthdr;
 	__wsum csum_pseudo_hdr = 0;
 
-	if (unlikely(ipv6h->nexthdr == IPPROTO_FRAGMENT ||
-		     ipv6h->nexthdr == IPPROTO_HOPOPTS))
+	if (unlikely(nexthdr == IPPROTO_FRAGMENT ||
+		     nexthdr == IPPROTO_HOPOPTS ||
+		     nexthdr == IPPROTO_SCTP))
 		return -1;
-	hw_checksum = csum_add(hw_checksum, (__force __wsum)htons(ipv6h->nexthdr));
+	hw_checksum = csum_add(hw_checksum, (__force __wsum)htons(nexthdr));
 
 	csum_pseudo_hdr = csum_partial(&ipv6h->saddr,
 				       sizeof(ipv6h->saddr) + sizeof(ipv6h->daddr), 0);
 	csum_pseudo_hdr = csum_add(csum_pseudo_hdr, (__force __wsum)ipv6h->payload_len);
-	csum_pseudo_hdr = csum_add(csum_pseudo_hdr, (__force __wsum)ntohs(ipv6h->nexthdr));
+	csum_pseudo_hdr = csum_add(csum_pseudo_hdr,
+				   (__force __wsum)htons(nexthdr));
 
 	skb->csum = csum_sub(hw_checksum, csum_pseudo_hdr);
 	skb->csum = csum_add(skb->csum, csum_partial(ipv6h, sizeof(struct ipv6hdr), 0));
@@ -627,11 +635,10 @@ static int check_csum(struct mlx4_cqe *cqe, struct sk_buff *skb, void *va,
 	}
 
 	if (cqe->status & cpu_to_be16(MLX4_CQE_STATUS_IPV4))
-		get_fixed_ipv4_csum(hw_checksum, skb, hdr);
+		return get_fixed_ipv4_csum(hw_checksum, skb, hdr);
 #if IS_ENABLED(CONFIG_IPV6)
-	else if (cqe->status & cpu_to_be16(MLX4_CQE_STATUS_IPV6))
-		if (unlikely(get_fixed_ipv6_csum(hw_checksum, skb, hdr)))
-			return -1;
+	if (cqe->status & cpu_to_be16(MLX4_CQE_STATUS_IPV6))
+		return get_fixed_ipv6_csum(hw_checksum, skb, hdr);
 #endif
 	return 0;
 }

From 8e6f1521ec431dbade73f57e21e5dc46eaae50ba Mon Sep 17 00:00:00 2001
From: John Crispin <john@phrozen.org>
Date: Mon, 7 Aug 2017 16:20:49 +0200
Subject: [PATCH 203/281] net: dsa: mediatek: add adjust link support for user
 ports

Manually adjust the port settings of user ports once PHY polling has
completed. This patch extends the adjust_link callback to configure the
per port PMCR register, applying the proper values polled from the PHY.
Without this patch flow control was not always getting setup properly.

Signed-off-by: Shashidhar Lakkavalli <shashidhar.lakkavalli@openmesh.com>
Signed-off-by: Muciri Gatimu <muciri@openmesh.com>
Signed-off-by: John Crispin <john@phrozen.org>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/dsa/mt7530.c | 38 ++++++++++++++++++++++++++++++++++++++
 drivers/net/dsa/mt7530.h |  1 +
 2 files changed, 39 insertions(+)

diff --git a/drivers/net/dsa/mt7530.c b/drivers/net/dsa/mt7530.c
index 1e46418a3b74..264b281eb86b 100644
--- a/drivers/net/dsa/mt7530.c
+++ b/drivers/net/dsa/mt7530.c
@@ -625,6 +625,44 @@ static void mt7530_adjust_link(struct dsa_switch *ds, int port,
 		 * all finished.
 		 */
 		mt7623_pad_clk_setup(ds);
+	} else {
+		u16 lcl_adv = 0, rmt_adv = 0;
+		u8 flowctrl;
+		u32 mcr = PMCR_USERP_LINK | PMCR_FORCE_MODE;
+
+		switch (phydev->speed) {
+		case SPEED_1000:
+			mcr |= PMCR_FORCE_SPEED_1000;
+			break;
+		case SPEED_100:
+			mcr |= PMCR_FORCE_SPEED_100;
+			break;
+		};
+
+		if (phydev->link)
+			mcr |= PMCR_FORCE_LNK;
+
+		if (phydev->duplex) {
+			mcr |= PMCR_FORCE_FDX;
+
+			if (phydev->pause)
+				rmt_adv = LPA_PAUSE_CAP;
+			if (phydev->asym_pause)
+				rmt_adv |= LPA_PAUSE_ASYM;
+
+			if (phydev->advertising & ADVERTISED_Pause)
+				lcl_adv |= ADVERTISE_PAUSE_CAP;
+			if (phydev->advertising & ADVERTISED_Asym_Pause)
+				lcl_adv |= ADVERTISE_PAUSE_ASYM;
+
+			flowctrl = mii_resolve_flowctrl_fdx(lcl_adv, rmt_adv);
+
+			if (flowctrl & FLOW_CTRL_TX)
+				mcr |= PMCR_TX_FC_EN;
+			if (flowctrl & FLOW_CTRL_RX)
+				mcr |= PMCR_RX_FC_EN;
+		}
+		mt7530_write(priv, MT7530_PMCR_P(port), mcr);
 	}
 }
 
diff --git a/drivers/net/dsa/mt7530.h b/drivers/net/dsa/mt7530.h
index b83d76b99802..74db9822eb40 100644
--- a/drivers/net/dsa/mt7530.h
+++ b/drivers/net/dsa/mt7530.h
@@ -151,6 +151,7 @@ enum mt7530_stp_state {
 #define  PMCR_TX_FC_EN			BIT(5)
 #define  PMCR_RX_FC_EN			BIT(4)
 #define  PMCR_FORCE_SPEED_1000		BIT(3)
+#define  PMCR_FORCE_SPEED_100		BIT(2)
 #define  PMCR_FORCE_FDX			BIT(1)
 #define  PMCR_FORCE_LNK			BIT(0)
 #define  PMCR_COMMON_LINK		(PMCR_IFG_XMIT(1) | PMCR_MAC_MODE | \

From ec0acb09313074ba1a4976945791d9c6815f39fb Mon Sep 17 00:00:00 2001
From: Xin Long <lucien.xin@gmail.com>
Date: Tue, 8 Aug 2017 15:25:25 +0800
Subject: [PATCH 204/281] net: sched: set xt_tgchk_param par.net properly in
 ipt_init_target

Now xt_tgchk_param par in ipt_init_target is a local varibale,
par.net is not initialized there. Later when xt_check_target
calls target's checkentry in which it may access par.net, it
would cause kernel panic.

Jaroslav found this panic when running:

  # ip link add TestIface type dummy
  # tc qd add dev TestIface ingress handle ffff:
  # tc filter add dev TestIface parent ffff: u32 match u32 0 0 \
    action xt -j CONNMARK --set-mark 4

This patch is to pass net param into ipt_init_target and set
par.net with it properly in there.

v1->v2:
  As Wang Cong pointed, I missed ipt_net_id != xt_net_id, so fix
  it by also passing net_id to __tcf_ipt_init.
v2->v3:
  Missed the fixes tag, so add it.

Fixes: ecb2421b5ddf ("netfilter: add and use nf_ct_netns_get/put")
Reported-by: Jaroslav Aster <jaster@redhat.com>
Signed-off-by: Xin Long <lucien.xin@gmail.com>
Acked-by: Jiri Pirko <jiri@mellanox.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/sched/act_ipt.c | 20 ++++++++++----------
 1 file changed, 10 insertions(+), 10 deletions(-)

diff --git a/net/sched/act_ipt.c b/net/sched/act_ipt.c
index 36f0ced9e60c..94ba5cfab860 100644
--- a/net/sched/act_ipt.c
+++ b/net/sched/act_ipt.c
@@ -36,8 +36,8 @@ static struct tc_action_ops act_ipt_ops;
 static unsigned int xt_net_id;
 static struct tc_action_ops act_xt_ops;
 
-static int ipt_init_target(struct xt_entry_target *t, char *table,
-			   unsigned int hook)
+static int ipt_init_target(struct net *net, struct xt_entry_target *t,
+			   char *table, unsigned int hook)
 {
 	struct xt_tgchk_param par;
 	struct xt_target *target;
@@ -49,6 +49,7 @@ static int ipt_init_target(struct xt_entry_target *t, char *table,
 		return PTR_ERR(target);
 
 	t->u.kernel.target = target;
+	par.net       = net;
 	par.table     = table;
 	par.entryinfo = NULL;
 	par.target    = target;
@@ -91,10 +92,11 @@ static const struct nla_policy ipt_policy[TCA_IPT_MAX + 1] = {
 	[TCA_IPT_TARG]	= { .len = sizeof(struct xt_entry_target) },
 };
 
-static int __tcf_ipt_init(struct tc_action_net *tn, struct nlattr *nla,
+static int __tcf_ipt_init(struct net *net, unsigned int id, struct nlattr *nla,
 			  struct nlattr *est, struct tc_action **a,
 			  const struct tc_action_ops *ops, int ovr, int bind)
 {
+	struct tc_action_net *tn = net_generic(net, id);
 	struct nlattr *tb[TCA_IPT_MAX + 1];
 	struct tcf_ipt *ipt;
 	struct xt_entry_target *td, *t;
@@ -159,7 +161,7 @@ static int __tcf_ipt_init(struct tc_action_net *tn, struct nlattr *nla,
 	if (unlikely(!t))
 		goto err2;
 
-	err = ipt_init_target(t, tname, hook);
+	err = ipt_init_target(net, t, tname, hook);
 	if (err < 0)
 		goto err3;
 
@@ -193,18 +195,16 @@ static int tcf_ipt_init(struct net *net, struct nlattr *nla,
 			struct nlattr *est, struct tc_action **a, int ovr,
 			int bind)
 {
-	struct tc_action_net *tn = net_generic(net, ipt_net_id);
-
-	return __tcf_ipt_init(tn, nla, est, a, &act_ipt_ops, ovr, bind);
+	return __tcf_ipt_init(net, ipt_net_id, nla, est, a, &act_ipt_ops, ovr,
+			      bind);
 }
 
 static int tcf_xt_init(struct net *net, struct nlattr *nla,
 		       struct nlattr *est, struct tc_action **a, int ovr,
 		       int bind)
 {
-	struct tc_action_net *tn = net_generic(net, xt_net_id);
-
-	return __tcf_ipt_init(tn, nla, est, a, &act_xt_ops, ovr, bind);
+	return __tcf_ipt_init(net, xt_net_id, nla, est, a, &act_xt_ops, ovr,
+			      bind);
 }
 
 static int tcf_ipt(struct sk_buff *skb, const struct tc_action *a,

From 8ba60924710cde564a3905588b6219741d6356d0 Mon Sep 17 00:00:00 2001
From: Eric Dumazet <edumazet@google.com>
Date: Tue, 8 Aug 2017 01:41:58 -0700
Subject: [PATCH 205/281] tcp: fastopen: tcp_connect() must refresh the route

With new TCP_FASTOPEN_CONNECT socket option, there is a possibility
to call tcp_connect() while socket sk_dst_cache is either NULL
or invalid.

 +0 socket(..., SOCK_STREAM, IPPROTO_TCP) = 4
 +0 fcntl(4, F_SETFL, O_RDWR|O_NONBLOCK) = 0
 +0 setsockopt(4, SOL_TCP, TCP_FASTOPEN_CONNECT, [1], 4) = 0
 +0 connect(4, ..., ...) = 0

<< sk->sk_dst_cache becomes obsolete, or even set to NULL >>

 +1 sendto(4, ..., 1000, MSG_FASTOPEN, ..., ...) = 1000

We need to refresh the route otherwise bad things can happen,
especially when syzkaller is running on the host :/

Fixes: 19f6d3f3c8422 ("net/tcp-fastopen: Add new API support")
Reported-by: Dmitry Vyukov <dvyukov@google.com>
Signed-off-by: Eric Dumazet <edumazet@google.com>
Cc: Wei Wang <weiwan@google.com>
Cc: Yuchung Cheng <ycheng@google.com>
Acked-by: Wei Wang <weiwan@google.com>
Acked-by: Yuchung Cheng <ycheng@google.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/ipv4/tcp_output.c | 4 ++++
 1 file changed, 4 insertions(+)

diff --git a/net/ipv4/tcp_output.c b/net/ipv4/tcp_output.c
index 276406a83a37..b7661a68d498 100644
--- a/net/ipv4/tcp_output.c
+++ b/net/ipv4/tcp_output.c
@@ -3436,6 +3436,10 @@ int tcp_connect(struct sock *sk)
 	int err;
 
 	tcp_call_bpf(sk, BPF_SOCK_OPS_TCP_CONNECT_CB);
+
+	if (inet_csk(sk)->icsk_af_ops->rebuild_header(sk))
+		return -EHOSTUNREACH; /* Routing failure or similar. */
+
 	tcp_connect_init(sk);
 
 	if (unlikely(tp->repair)) {

From 05bfd7dbb53a10be4a3e7aebaeec04b558198d49 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?H=C3=A5kon=20Bugge?= <Haakon.Bugge@oracle.com>
Date: Tue, 8 Aug 2017 11:13:32 +0200
Subject: [PATCH 206/281] rds: Reintroduce statistics counting
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

In commit 7e3f2952eeb1 ("rds: don't let RDS shutdown a connection
while senders are present"), refilling the receive queue was removed
from rds_ib_recv(), along with the increment of
s_ib_rx_refill_from_thread.

Commit 73ce4317bf98 ("RDS: make sure we post recv buffers")
re-introduces filling the receive queue from rds_ib_recv(), but does
not add the statistics counter. rds_ib_recv() was later renamed to
rds_ib_recv_path().

This commit reintroduces the statistics counting of
s_ib_rx_refill_from_thread and s_ib_rx_refill_from_cq.

Signed-off-by: Håkon Bugge <haakon.bugge@oracle.com>
Reviewed-by: Knut Omang <knut.omang@oracle.com>
Reviewed-by: Wei Lin Guay <wei.lin.guay@oracle.com>
Reviewed-by: Shamir Rabinovitch <shamir.rabinovitch@oracle.com>
Acked-by: Santosh Shilimkar <santosh.shilimkar@oracle.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/rds/ib_recv.c | 5 ++++-
 1 file changed, 4 insertions(+), 1 deletion(-)

diff --git a/net/rds/ib_recv.c b/net/rds/ib_recv.c
index e10624aa6959..9722bf839d9d 100644
--- a/net/rds/ib_recv.c
+++ b/net/rds/ib_recv.c
@@ -1015,8 +1015,10 @@ void rds_ib_recv_cqe_handler(struct rds_ib_connection *ic,
 	if (rds_ib_ring_empty(&ic->i_recv_ring))
 		rds_ib_stats_inc(s_ib_rx_ring_empty);
 
-	if (rds_ib_ring_low(&ic->i_recv_ring))
+	if (rds_ib_ring_low(&ic->i_recv_ring)) {
 		rds_ib_recv_refill(conn, 0, GFP_NOWAIT);
+		rds_ib_stats_inc(s_ib_rx_refill_from_cq);
+	}
 }
 
 int rds_ib_recv_path(struct rds_conn_path *cp)
@@ -1029,6 +1031,7 @@ int rds_ib_recv_path(struct rds_conn_path *cp)
 	if (rds_conn_up(conn)) {
 		rds_ib_attempt_ack(ic);
 		rds_ib_recv_refill(conn, 0, GFP_KERNEL);
+		rds_ib_stats_inc(s_ib_rx_refill_from_thread);
 	}
 
 	return ret;

From 0a0e1a85c83775a648041be2b15de6d0a2f2b8eb Mon Sep 17 00:00:00 2001
From: Guillaume Nault <g.nault@alphalink.fr>
Date: Tue, 8 Aug 2017 11:43:24 +0200
Subject: [PATCH 207/281] ppp: fix xmit recursion detection on ppp channels

Commit e5dadc65f9e0 ("ppp: Fix false xmit recursion detect with two ppp
devices") dropped the xmit_recursion counter incrementation in
ppp_channel_push() and relied on ppp_xmit_process() for this task.
But __ppp_channel_push() can also send packets directly (using the
.start_xmit() channel callback), in which case the xmit_recursion
counter isn't incremented anymore. If such packets get routed back to
the parent ppp unit, ppp_xmit_process() won't notice the recursion and
will call ppp_channel_push() on the same channel, effectively creating
the deadlock situation that the xmit_recursion mechanism was supposed
to prevent.

This patch re-introduces the xmit_recursion counter incrementation in
ppp_channel_push(). Since the xmit_recursion variable is now part of
the parent ppp unit, incrementation is skipped if the channel doesn't
have any. This is fine because only packets routed through the parent
unit may enter the channel recursively.

Finally, we have to ensure that pch->ppp is not going to be modified
while executing ppp_channel_push(). Instead of taking this lock only
while calling ppp_xmit_process(), we now have to hold it for the full
ppp_channel_push() execution. This respects the ppp locks ordering
which requires locking ->upl before ->downl.

Fixes: e5dadc65f9e0 ("ppp: Fix false xmit recursion detect with two ppp devices")
Signed-off-by: Guillaume Nault <g.nault@alphalink.fr>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ppp/ppp_generic.c | 18 ++++++++++--------
 1 file changed, 10 insertions(+), 8 deletions(-)

diff --git a/drivers/net/ppp/ppp_generic.c b/drivers/net/ppp/ppp_generic.c
index bd4303944e44..a404552555d4 100644
--- a/drivers/net/ppp/ppp_generic.c
+++ b/drivers/net/ppp/ppp_generic.c
@@ -1915,21 +1915,23 @@ static void __ppp_channel_push(struct channel *pch)
 	spin_unlock(&pch->downl);
 	/* see if there is anything from the attached unit to be sent */
 	if (skb_queue_empty(&pch->file.xq)) {
-		read_lock(&pch->upl);
 		ppp = pch->ppp;
 		if (ppp)
-			ppp_xmit_process(ppp);
-		read_unlock(&pch->upl);
+			__ppp_xmit_process(ppp);
 	}
 }
 
 static void ppp_channel_push(struct channel *pch)
 {
-	local_bh_disable();
-
-	__ppp_channel_push(pch);
-
-	local_bh_enable();
+	read_lock_bh(&pch->upl);
+	if (pch->ppp) {
+		(*this_cpu_ptr(pch->ppp->xmit_recursion))++;
+		__ppp_channel_push(pch);
+		(*this_cpu_ptr(pch->ppp->xmit_recursion))--;
+	} else {
+		__ppp_channel_push(pch);
+	}
+	read_unlock_bh(&pch->upl);
 }
 
 /*

From bbae08e592706dc32e5c7c97827b13c1c178668b Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Bj=C3=B8rn=20Mork?= <bjorn@mork.no>
Date: Tue, 8 Aug 2017 18:02:11 +0200
Subject: [PATCH 208/281] qmi_wwan: fix NULL deref on disconnect
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

qmi_wwan_disconnect is called twice when disconnecting devices with
separate control and data interfaces.  The first invocation will set
the interface data to NULL for both interfaces to flag that the
disconnect has been handled.  But the matching NULL check was left
out when qmi_wwan_disconnect was added, resulting in this oops:

  usb 2-1.4: USB disconnect, device number 4
  qmi_wwan 2-1.4:1.6 wwp0s29u1u4i6: unregister 'qmi_wwan' usb-0000:00:1d.0-1.4, WWAN/QMI device
  BUG: unable to handle kernel NULL pointer dereference at 00000000000000e0
  IP: qmi_wwan_disconnect+0x25/0xc0 [qmi_wwan]
  PGD 0
  P4D 0
  Oops: 0000 [#1] SMP
  Modules linked in: <stripped irrelevant module list>
  CPU: 2 PID: 33 Comm: kworker/2:1 Tainted: G            E   4.12.3-nr44-normandy-r1500619820+ #1
  Hardware name: LENOVO 4291LR7/4291LR7, BIOS CBET4000 4.6-810-g50522254fb 07/21/2017
  Workqueue: usb_hub_wq hub_event [usbcore]
  task: ffff8c882b716040 task.stack: ffffb8e800d84000
  RIP: 0010:qmi_wwan_disconnect+0x25/0xc0 [qmi_wwan]
  RSP: 0018:ffffb8e800d87b38 EFLAGS: 00010246
  RAX: 0000000000000000 RBX: 0000000000000000 RCX: 0000000000000000
  RDX: 0000000000000001 RSI: ffff8c8824f3f1d0 RDI: ffff8c8824ef6400
  RBP: ffff8c8824ef6400 R08: 0000000000000000 R09: 0000000000000000
  R10: ffffb8e800d87780 R11: 0000000000000011 R12: ffffffffc07ea0e8
  R13: ffff8c8824e2e000 R14: ffff8c8824e2e098 R15: 0000000000000000
  FS:  0000000000000000(0000) GS:ffff8c8835300000(0000) knlGS:0000000000000000
  CS:  0010 DS: 0000 ES: 0000 CR0: 0000000080050033
  CR2: 00000000000000e0 CR3: 0000000229ca5000 CR4: 00000000000406e0
  Call Trace:
   ? usb_unbind_interface+0x71/0x270 [usbcore]
   ? device_release_driver_internal+0x154/0x210
   ? qmi_wwan_unbind+0x6d/0xc0 [qmi_wwan]
   ? usbnet_disconnect+0x6c/0xf0 [usbnet]
   ? qmi_wwan_disconnect+0x87/0xc0 [qmi_wwan]
   ? usb_unbind_interface+0x71/0x270 [usbcore]
   ? device_release_driver_internal+0x154/0x210

Reported-and-tested-by: Nathaniel Roach <nroach44@gmail.com>
Fixes: c6adf77953bc ("net: usb: qmi_wwan: add qmap mux protocol support")
Cc: Daniele Palmas <dnlplm@gmail.com>
Signed-off-by: Bjørn Mork <bjorn@mork.no>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/usb/qmi_wwan.c | 6 +++++-
 1 file changed, 5 insertions(+), 1 deletion(-)

diff --git a/drivers/net/usb/qmi_wwan.c b/drivers/net/usb/qmi_wwan.c
index ff6f39fe6c00..8c3733608271 100644
--- a/drivers/net/usb/qmi_wwan.c
+++ b/drivers/net/usb/qmi_wwan.c
@@ -1341,10 +1341,14 @@ static int qmi_wwan_probe(struct usb_interface *intf,
 static void qmi_wwan_disconnect(struct usb_interface *intf)
 {
 	struct usbnet *dev = usb_get_intfdata(intf);
-	struct qmi_wwan_state *info = (void *)&dev->data;
+	struct qmi_wwan_state *info;
 	struct list_head *iter;
 	struct net_device *ldev;
 
+	/* called twice if separate control and data intf */
+	if (!dev)
+		return;
+	info = (void *)&dev->data;
 	if (info->flags & QMI_WWAN_FLAG_MUX) {
 		if (!rtnl_trylock()) {
 			restart_syscall();

From 8d63bee643f1fb53e472f0e135cae4eb99d62d19 Mon Sep 17 00:00:00 2001
From: Willem de Bruijn <willemb@google.com>
Date: Tue, 8 Aug 2017 14:22:55 -0400
Subject: [PATCH 209/281] net: avoid skb_warn_bad_offload false positives on
 UFO

skb_warn_bad_offload triggers a warning when an skb enters the GSO
stack at __skb_gso_segment that does not have CHECKSUM_PARTIAL
checksum offload set.

Commit b2504a5dbef3 ("net: reduce skb_warn_bad_offload() noise")
observed that SKB_GSO_DODGY producers can trigger the check and
that passing those packets through the GSO handlers will fix it
up. But, the software UFO handler will set ip_summed to
CHECKSUM_NONE.

When __skb_gso_segment is called from the receive path, this
triggers the warning again.

Make UFO set CHECKSUM_UNNECESSARY instead of CHECKSUM_NONE. On
Tx these two are equivalent. On Rx, this better matches the
skb state (checksum computed), as CHECKSUM_NONE here means no
checksum computed.

See also this thread for context:
http://patchwork.ozlabs.org/patch/799015/

Fixes: b2504a5dbef3 ("net: reduce skb_warn_bad_offload() noise")
Signed-off-by: Willem de Bruijn <willemb@google.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/core/dev.c         | 2 +-
 net/ipv4/udp_offload.c | 2 +-
 net/ipv6/udp_offload.c | 2 +-
 3 files changed, 3 insertions(+), 3 deletions(-)

diff --git a/net/core/dev.c b/net/core/dev.c
index 8515f8fe0460..ce15a06d5558 100644
--- a/net/core/dev.c
+++ b/net/core/dev.c
@@ -2739,7 +2739,7 @@ static inline bool skb_needs_check(struct sk_buff *skb, bool tx_path)
 {
 	if (tx_path)
 		return skb->ip_summed != CHECKSUM_PARTIAL &&
-		       skb->ip_summed != CHECKSUM_NONE;
+		       skb->ip_summed != CHECKSUM_UNNECESSARY;
 
 	return skb->ip_summed == CHECKSUM_NONE;
 }
diff --git a/net/ipv4/udp_offload.c b/net/ipv4/udp_offload.c
index 781250151d40..0932c85b42af 100644
--- a/net/ipv4/udp_offload.c
+++ b/net/ipv4/udp_offload.c
@@ -235,7 +235,7 @@ static struct sk_buff *udp4_ufo_fragment(struct sk_buff *skb,
 	if (uh->check == 0)
 		uh->check = CSUM_MANGLED_0;
 
-	skb->ip_summed = CHECKSUM_NONE;
+	skb->ip_summed = CHECKSUM_UNNECESSARY;
 
 	/* If there is no outer header we can fake a checksum offload
 	 * due to the fact that we have already done the checksum in
diff --git a/net/ipv6/udp_offload.c b/net/ipv6/udp_offload.c
index a2267f80febb..e7d378c032cb 100644
--- a/net/ipv6/udp_offload.c
+++ b/net/ipv6/udp_offload.c
@@ -72,7 +72,7 @@ static struct sk_buff *udp6_ufo_fragment(struct sk_buff *skb,
 		if (uh->check == 0)
 			uh->check = CSUM_MANGLED_0;
 
-		skb->ip_summed = CHECKSUM_NONE;
+		skb->ip_summed = CHECKSUM_UNNECESSARY;
 
 		/* If there is no outer header we can fake a checksum offload
 		 * due to the fact that we have already done the checksum in

From 3f8b23a09a87aa65df3e13129cb2d9cffcb394db Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Micha=C5=82=20Miros=C5=82aw?= <mirq-linux@rere.qmqm.pl>
Date: Wed, 9 Aug 2017 01:48:59 +0200
Subject: [PATCH 210/281] mmc: block: fix lockdep splat when removing mmc_block
 module
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Fix lockdep splat introduced in v4.13-rc4.

[  266.297226] ------------[ cut here ]------------
[  266.300078] WARNING: CPU: 2 PID: 176 at /mnt/src/jaja/git/tf300t/include/linux/blkdev.h:657 mmc_blk_remove_req+0xd0/0xe8 [mmc_block]
[  266.302937] Modules linked in: mmc_block(-) sdhci_tegra sdhci_pltfm sdhci pwrseq_simple pwrseq_emmc mmc_core
[  266.305941] CPU: 2 PID: 176 Comm: rmmod Tainted: G        W       4.13.0-rc4mq-00208-gb691e67724b8-dirty #694
[  266.308852] Hardware name: NVIDIA Tegra SoC (Flattened Device Tree)
[  266.311719] [<b011144c>] (unwind_backtrace) from [<b010ca54>] (show_stack+0x18/0x1c)
[  266.314664] [<b010ca54>] (show_stack) from [<b062e3f4>] (dump_stack+0x84/0x98)
[  266.317644] [<b062e3f4>] (dump_stack) from [<b01214f4>] (__warn+0xf4/0x10c)
[  266.320542] [<b01214f4>] (__warn) from [<b01215d4>] (warn_slowpath_null+0x28/0x30)
[  266.323534] [<b01215d4>] (warn_slowpath_null) from [<af067858>] (mmc_blk_remove_req+0xd0/0xe8 [mmc_block])
[  266.326568] [<af067858>] (mmc_blk_remove_req [mmc_block]) from [<af068f40>] (mmc_blk_remove_parts.constprop.6+0x50/0x64 [mmc_block])
[  266.329678] [<af068f40>] (mmc_blk_remove_parts.constprop.6 [mmc_block]) from [<af0693b8>] (mmc_blk_remove+0x24/0x140 [mmc_block])
[  266.332894] [<af0693b8>] (mmc_blk_remove [mmc_block]) from [<af0052ec>] (mmc_bus_remove+0x20/0x28 [mmc_core])
[  266.336198] [<af0052ec>] (mmc_bus_remove [mmc_core]) from [<b046aa64>] (device_release_driver_internal+0x164/0x200)
[  266.339367] [<b046aa64>] (device_release_driver_internal) from [<b046ab54>] (driver_detach+0x40/0x74)
[  266.342537] [<b046ab54>] (driver_detach) from [<b046982c>] (bus_remove_driver+0x68/0xdc)
[  266.345660] [<b046982c>] (bus_remove_driver) from [<af06ad40>] (mmc_blk_exit+0xc/0x2cc [mmc_block])
[  266.348875] [<af06ad40>] (mmc_blk_exit [mmc_block]) from [<b01aee30>] (SyS_delete_module+0x1c4/0x254)
[  266.352068] [<b01aee30>] (SyS_delete_module) from [<b0108480>] (ret_fast_syscall+0x0/0x34)
[  266.355308] ---[ end trace f68728a0d3053b72 ]---

Fixes: 7c84b8b43d3d ("mmc: block: bypass the queue even if usage is present for hotplug")
Signed-off-by: Michał Mirosław <mirq-linux@rere.qmqm.pl>
Reviewed-by: Shawn Lin <shawn.lin@rock-chips.com>
Signed-off-by: Ulf Hansson <ulf.hansson@linaro.org>
---
 drivers/mmc/core/block.c | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/drivers/mmc/core/block.c b/drivers/mmc/core/block.c
index e5938c791330..f1bbfd389367 100644
--- a/drivers/mmc/core/block.c
+++ b/drivers/mmc/core/block.c
@@ -2170,7 +2170,9 @@ static void mmc_blk_remove_req(struct mmc_blk_data *md)
 		 * from being accepted.
 		 */
 		card = md->queue.card;
+		spin_lock_irq(md->queue.queue->queue_lock);
 		queue_flag_set(QUEUE_FLAG_BYPASS, md->queue.queue);
+		spin_unlock_irq(md->queue.queue->queue_lock);
 		blk_set_queue_dying(md->queue.queue);
 		mmc_cleanup_queue(&md->queue);
 		if (md->disk->flags & GENHD_FL_UP) {

From 7310d5c8c55e8987a854507f71022f8de676bbf4 Mon Sep 17 00:00:00 2001
From: Michael Ellerman <mpe@ellerman.id.au>
Date: Wed, 9 Aug 2017 20:57:55 +1000
Subject: [PATCH 211/281] powerpc/configs: Re-enable HARD/SOFT lockup detectors

In commit 05a4a9527931 ("kernel/watchdog: split up config options"),
CONFIG_LOCKUP_DETECTOR was split into two separate config options,
CONFIG_HARDLOCKUP_DETECTOR and CONFIG_SOFTLOCKUP_DETECTOR.

Our defconfigs still have CONFIG_LOCKUP_DETECTOR=y, but that is no longer
user selectable, and we don't mention the new options, so we end up with
none of them enabled.

So update the defconfigs to turn on the new SOFT and HARD options, the
end result being the same as what we had previously.

Fixes: 05a4a9527931 ("kernel/watchdog: split up config options")
Signed-off-by: Michael Ellerman <mpe@ellerman.id.au>
---
 arch/powerpc/configs/powernv_defconfig | 3 ++-
 arch/powerpc/configs/ppc64_defconfig   | 3 ++-
 arch/powerpc/configs/pseries_defconfig | 3 ++-
 3 files changed, 6 insertions(+), 3 deletions(-)

diff --git a/arch/powerpc/configs/powernv_defconfig b/arch/powerpc/configs/powernv_defconfig
index 0695ce047d56..34fc9bbfca9e 100644
--- a/arch/powerpc/configs/powernv_defconfig
+++ b/arch/powerpc/configs/powernv_defconfig
@@ -293,7 +293,8 @@ CONFIG_MAGIC_SYSRQ=y
 CONFIG_DEBUG_KERNEL=y
 CONFIG_DEBUG_STACK_USAGE=y
 CONFIG_DEBUG_STACKOVERFLOW=y
-CONFIG_LOCKUP_DETECTOR=y
+CONFIG_SOFTLOCKUP_DETECTOR=y
+CONFIG_HARDLOCKUP_DETECTOR=y
 CONFIG_LATENCYTOP=y
 CONFIG_SCHED_TRACER=y
 CONFIG_BLK_DEV_IO_TRACE=y
diff --git a/arch/powerpc/configs/ppc64_defconfig b/arch/powerpc/configs/ppc64_defconfig
index 5175028c56ce..c5246d29f385 100644
--- a/arch/powerpc/configs/ppc64_defconfig
+++ b/arch/powerpc/configs/ppc64_defconfig
@@ -324,7 +324,8 @@ CONFIG_MAGIC_SYSRQ=y
 CONFIG_DEBUG_KERNEL=y
 CONFIG_DEBUG_STACK_USAGE=y
 CONFIG_DEBUG_STACKOVERFLOW=y
-CONFIG_LOCKUP_DETECTOR=y
+CONFIG_SOFTLOCKUP_DETECTOR=y
+CONFIG_HARDLOCKUP_DETECTOR=y
 CONFIG_DEBUG_MUTEXES=y
 CONFIG_LATENCYTOP=y
 CONFIG_SCHED_TRACER=y
diff --git a/arch/powerpc/configs/pseries_defconfig b/arch/powerpc/configs/pseries_defconfig
index 1a61aa20dfba..fd5d98a0b95c 100644
--- a/arch/powerpc/configs/pseries_defconfig
+++ b/arch/powerpc/configs/pseries_defconfig
@@ -291,7 +291,8 @@ CONFIG_MAGIC_SYSRQ=y
 CONFIG_DEBUG_KERNEL=y
 CONFIG_DEBUG_STACK_USAGE=y
 CONFIG_DEBUG_STACKOVERFLOW=y
-CONFIG_LOCKUP_DETECTOR=y
+CONFIG_SOFTLOCKUP_DETECTOR=y
+CONFIG_HARDLOCKUP_DETECTOR=y
 CONFIG_LATENCYTOP=y
 CONFIG_SCHED_TRACER=y
 CONFIG_BLK_DEV_IO_TRACE=y

From 0459ddfdb31e7d812b555a2530ecbacdf96961a6 Mon Sep 17 00:00:00 2001
From: Nicholas Piggin <npiggin@gmail.com>
Date: Wed, 9 Aug 2017 22:41:21 +1000
Subject: [PATCH 212/281] powerpc: NMI IPI improve lock primitive

When the NMI IPI lock is contended, spin at low SMT priority, using
loads only, and with interrupts enabled (where possible). This
improves behaviour under high contention (e.g., a system crash when
a number of CPUs are trying to enter the debugger).

Signed-off-by: Nicholas Piggin <npiggin@gmail.com>
Signed-off-by: Michael Ellerman <mpe@ellerman.id.au>
---
 arch/powerpc/kernel/smp.c | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/arch/powerpc/kernel/smp.c b/arch/powerpc/kernel/smp.c
index cf0e1245b8cc..8d3320562c70 100644
--- a/arch/powerpc/kernel/smp.c
+++ b/arch/powerpc/kernel/smp.c
@@ -351,7 +351,7 @@ static void nmi_ipi_lock_start(unsigned long *flags)
 	hard_irq_disable();
 	while (atomic_cmpxchg(&__nmi_ipi_lock, 0, 1) == 1) {
 		raw_local_irq_restore(*flags);
-		cpu_relax();
+		spin_until_cond(atomic_read(&__nmi_ipi_lock) == 0);
 		raw_local_irq_save(*flags);
 		hard_irq_disable();
 	}
@@ -360,7 +360,7 @@ static void nmi_ipi_lock_start(unsigned long *flags)
 static void nmi_ipi_lock(void)
 {
 	while (atomic_cmpxchg(&__nmi_ipi_lock, 0, 1) == 1)
-		cpu_relax();
+		spin_until_cond(atomic_read(&__nmi_ipi_lock) == 0);
 }
 
 static void nmi_ipi_unlock(void)
@@ -475,7 +475,7 @@ int smp_send_nmi_ipi(int cpu, void (*fn)(struct pt_regs *), u64 delay_us)
 	nmi_ipi_lock_start(&flags);
 	while (nmi_ipi_busy_count) {
 		nmi_ipi_unlock_end(&flags);
-		cpu_relax();
+		spin_until_cond(nmi_ipi_busy_count == 0);
 		nmi_ipi_lock_start(&flags);
 	}
 

From d8e2a4053574002135fbb032c2e74f1d1dbb2103 Mon Sep 17 00:00:00 2001
From: Nicholas Piggin <npiggin@gmail.com>
Date: Wed, 9 Aug 2017 22:41:22 +1000
Subject: [PATCH 213/281] powerpc/watchdog: Improve watchdog lock primitive

- Hard-disable interrupts before taking the lock, which prevents
  soft-NMI re-entrancy and therefore can prevent deadlocks.
- Use raw_ variants of local_irq_disable to avoid irq debugging.
- When the lock is contended, spin at low SMT priority, using
  loads only, and with interrupts enabled (where possible).

Some stalls have been noticed at high loads that go away with improved
locking. There should not be so much locking contention in the first
place (which is addressed in a subsequent patch), but locking should
still be improved.

Signed-off-by: Nicholas Piggin <npiggin@gmail.com>
Signed-off-by: Michael Ellerman <mpe@ellerman.id.au>
---
 arch/powerpc/kernel/watchdog.c | 13 +++++++++----
 1 file changed, 9 insertions(+), 4 deletions(-)

diff --git a/arch/powerpc/kernel/watchdog.c b/arch/powerpc/kernel/watchdog.c
index b67f8b03a32d..fda4d044d326 100644
--- a/arch/powerpc/kernel/watchdog.c
+++ b/arch/powerpc/kernel/watchdog.c
@@ -71,15 +71,20 @@ static inline void wd_smp_lock(unsigned long *flags)
 	 * This may be called from low level interrupt handlers at some
 	 * point in future.
 	 */
-	local_irq_save(*flags);
-	while (unlikely(test_and_set_bit_lock(0, &__wd_smp_lock)))
-		cpu_relax();
+	raw_local_irq_save(*flags);
+	hard_irq_disable(); /* Make it soft-NMI safe */
+	while (unlikely(test_and_set_bit_lock(0, &__wd_smp_lock))) {
+		raw_local_irq_restore(*flags);
+		spin_until_cond(!test_bit(0, &__wd_smp_lock));
+		raw_local_irq_save(*flags);
+		hard_irq_disable();
+	}
 }
 
 static inline void wd_smp_unlock(unsigned long *flags)
 {
 	clear_bit_unlock(0, &__wd_smp_lock);
-	local_irq_restore(*flags);
+	raw_local_irq_restore(*flags);
 }
 
 static void wd_lockup_ipi(struct pt_regs *regs)

From 26c5c6e129ee725f103938262a034861ada467ae Mon Sep 17 00:00:00 2001
From: Nicholas Piggin <npiggin@gmail.com>
Date: Wed, 9 Aug 2017 22:41:23 +1000
Subject: [PATCH 214/281] powerpc/watchdog: Moderate touch_nmi_watchdog
 overhead

Some code can go into a tight loop calling touch_nmi_watchdog (e.g.,
stop_machine CPU hotplug code). This can cause contention on watchdog
locks particularly if all CPUs with watchdog enabled are spinning in
the loops.

Avoid this storm of activity by running the watchdog timer callback
from this path if we have exceeded the timer period since it was last
run.

Signed-off-by: Nicholas Piggin <npiggin@gmail.com>
Signed-off-by: Michael Ellerman <mpe@ellerman.id.au>
---
 arch/powerpc/kernel/watchdog.c | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

diff --git a/arch/powerpc/kernel/watchdog.c b/arch/powerpc/kernel/watchdog.c
index fda4d044d326..426dd34891d6 100644
--- a/arch/powerpc/kernel/watchdog.c
+++ b/arch/powerpc/kernel/watchdog.c
@@ -263,9 +263,11 @@ static void wd_timer_fn(unsigned long data)
 
 void arch_touch_nmi_watchdog(void)
 {
+	unsigned long ticks = tb_ticks_per_usec * wd_timer_period_ms * 1000;
 	int cpu = smp_processor_id();
 
-	watchdog_timer_interrupt(cpu);
+	if (get_tb() - per_cpu(wd_timer_tb, cpu) >= ticks)
+		watchdog_timer_interrupt(cpu);
 }
 EXPORT_SYMBOL(arch_touch_nmi_watchdog);
 

From 8e23692175ad465628b8c86c1acc154fecad97be Mon Sep 17 00:00:00 2001
From: Nicholas Piggin <npiggin@gmail.com>
Date: Wed, 9 Aug 2017 22:41:24 +1000
Subject: [PATCH 215/281] powerpc/watchdog: Fix final-check recovered case

When the watchdog decides to panic, it takes the lock and double
checks everything (to avoid races with the CPU being unstuck or
panic()ed by something else).

The exit label was misplaced and would result in all-CPUs backtrace
and watchdog panic even in the case that the condition was found to be
resolved.

Signed-off-by: Nicholas Piggin <npiggin@gmail.com>
Signed-off-by: Michael Ellerman <mpe@ellerman.id.au>
---
 arch/powerpc/kernel/watchdog.c | 6 +++++-
 1 file changed, 5 insertions(+), 1 deletion(-)

diff --git a/arch/powerpc/kernel/watchdog.c b/arch/powerpc/kernel/watchdog.c
index 426dd34891d6..7d16dafa1bb6 100644
--- a/arch/powerpc/kernel/watchdog.c
+++ b/arch/powerpc/kernel/watchdog.c
@@ -144,7 +144,6 @@ static void watchdog_smp_panic(int cpu, u64 tb)
 	for_each_cpu(c, &wd_smp_cpus_pending)
 		set_cpu_stuck(c, tb);
 
-out:
 	wd_smp_unlock(&flags);
 
 	printk_safe_flush();
@@ -157,6 +156,11 @@ out:
 
 	if (hardlockup_panic)
 		nmi_panic(NULL, "Hard LOCKUP");
+
+	return;
+
+out:
+	wd_smp_unlock(&flags);
 }
 
 static void wd_smp_clear_cpu_pending(int cpu, u64 tb)

From 87607a30be92f1ecee3af6f4a5779e179db98118 Mon Sep 17 00:00:00 2001
From: Nicholas Piggin <npiggin@gmail.com>
Date: Wed, 9 Aug 2017 22:41:25 +1000
Subject: [PATCH 216/281] powerpc/watchdog: Fix marking of stuck CPUs

When the SMP detector finds other CPUs stuck, it iterates over
them and marks them as stuck. This pulls them out of the pending
mask and allows the detector to continue with remaining good
CPUs (if nmi_watchdog=panic is not enabled).

The code to dothat was buggy because when setting a CPU stuck,
if the pending mask became empty, it resets it to keep the
watchdog running. However the iterator will continue to run
over the new pending mask and mark remaining good CPUs sas stuck.

Fix this by doing it with cpumask bitwise operations.

Signed-off-by: Nicholas Piggin <npiggin@gmail.com>
Signed-off-by: Michael Ellerman <mpe@ellerman.id.au>
---
 arch/powerpc/kernel/watchdog.c | 15 +++++++++------
 1 file changed, 9 insertions(+), 6 deletions(-)

diff --git a/arch/powerpc/kernel/watchdog.c b/arch/powerpc/kernel/watchdog.c
index 7d16dafa1bb6..12e90ae712fe 100644
--- a/arch/powerpc/kernel/watchdog.c
+++ b/arch/powerpc/kernel/watchdog.c
@@ -101,10 +101,10 @@ static void wd_lockup_ipi(struct pt_regs *regs)
 		nmi_panic(regs, "Hard LOCKUP");
 }
 
-static void set_cpu_stuck(int cpu, u64 tb)
+static void set_cpumask_stuck(const struct cpumask *cpumask, u64 tb)
 {
-	cpumask_set_cpu(cpu, &wd_smp_cpus_stuck);
-	cpumask_clear_cpu(cpu, &wd_smp_cpus_pending);
+	cpumask_or(&wd_smp_cpus_stuck, &wd_smp_cpus_stuck, cpumask);
+	cpumask_andnot(&wd_smp_cpus_pending, &wd_smp_cpus_pending, cpumask);
 	if (cpumask_empty(&wd_smp_cpus_pending)) {
 		wd_smp_last_reset_tb = tb;
 		cpumask_andnot(&wd_smp_cpus_pending,
@@ -112,6 +112,10 @@ static void set_cpu_stuck(int cpu, u64 tb)
 				&wd_smp_cpus_stuck);
 	}
 }
+static void set_cpu_stuck(int cpu, u64 tb)
+{
+	set_cpumask_stuck(cpumask_of(cpu), tb);
+}
 
 static void watchdog_smp_panic(int cpu, u64 tb)
 {
@@ -140,9 +144,8 @@ static void watchdog_smp_panic(int cpu, u64 tb)
 	}
 	smp_flush_nmi_ipi(1000000);
 
-	/* Take the stuck CPU out of the watch group */
-	for_each_cpu(c, &wd_smp_cpus_pending)
-		set_cpu_stuck(c, tb);
+	/* Take the stuck CPUs out of the watch group */
+	set_cpumask_stuck(&wd_smp_cpus_pending, tb);
 
 	wd_smp_unlock(&flags);
 

From 96ea91e7b6ee2c406598d859e7348b4829404eea Mon Sep 17 00:00:00 2001
From: Nicholas Piggin <npiggin@gmail.com>
Date: Wed, 9 Aug 2017 22:41:26 +1000
Subject: [PATCH 217/281] powerpc/watchdog: add locking around init/exit
 functions

When CPUs start and stop the watchdog, they manipulate shared data
that is normally protected by the lock. Other CPUs can be running
concurrently at this time, so it's a good idea to use locking here
to be on the safe side.

Remove the barrier which is undocumented and didn't do anything.

Signed-off-by: Nicholas Piggin <npiggin@gmail.com>
Signed-off-by: Michael Ellerman <mpe@ellerman.id.au>
---
 arch/powerpc/kernel/watchdog.c | 11 ++++++++++-
 1 file changed, 10 insertions(+), 1 deletion(-)

diff --git a/arch/powerpc/kernel/watchdog.c b/arch/powerpc/kernel/watchdog.c
index 12e90ae712fe..34721a257a77 100644
--- a/arch/powerpc/kernel/watchdog.c
+++ b/arch/powerpc/kernel/watchdog.c
@@ -297,6 +297,8 @@ static void stop_watchdog_timer_on(unsigned int cpu)
 
 static int start_wd_on_cpu(unsigned int cpu)
 {
+	unsigned long flags;
+
 	if (cpumask_test_cpu(cpu, &wd_cpus_enabled)) {
 		WARN_ON(1);
 		return 0;
@@ -311,12 +313,14 @@ static int start_wd_on_cpu(unsigned int cpu)
 	if (!cpumask_test_cpu(cpu, &watchdog_cpumask))
 		return 0;
 
+	wd_smp_lock(&flags);
 	cpumask_set_cpu(cpu, &wd_cpus_enabled);
 	if (cpumask_weight(&wd_cpus_enabled) == 1) {
 		cpumask_set_cpu(cpu, &wd_smp_cpus_pending);
 		wd_smp_last_reset_tb = get_tb();
 	}
-	smp_wmb();
+	wd_smp_unlock(&flags);
+
 	start_watchdog_timer_on(cpu);
 
 	return 0;
@@ -324,12 +328,17 @@ static int start_wd_on_cpu(unsigned int cpu)
 
 static int stop_wd_on_cpu(unsigned int cpu)
 {
+	unsigned long flags;
+
 	if (!cpumask_test_cpu(cpu, &wd_cpus_enabled))
 		return 0; /* Can happen in CPU unplug case */
 
 	stop_watchdog_timer_on(cpu);
 
+	wd_smp_lock(&flags);
 	cpumask_clear_cpu(cpu, &wd_cpus_enabled);
+	wd_smp_unlock(&flags);
+
 	wd_smp_clear_cpu_pending(cpu, get_tb());
 
 	return 0;

From c0ca0e5934b3bd70d456b363a6a989a3b4a692f1 Mon Sep 17 00:00:00 2001
From: Trond Myklebust <trond.myklebust@primarydata.com>
Date: Tue, 8 Aug 2017 21:39:28 -0400
Subject: [PATCH 218/281] NFSv4: Ignore NFS4ERR_OLD_STATEID in
 nfs41_check_open_stateid()

If the call to TEST_STATEID returns NFS4ERR_OLD_STATEID, then it just
means we raced with other calls to OPEN.

Signed-off-by: Trond Myklebust <trond.myklebust@primarydata.com>
Signed-off-by: Anna Schumaker <Anna.Schumaker@Netapp.com>
---
 fs/nfs/nfs4proc.c | 3 +--
 1 file changed, 1 insertion(+), 2 deletions(-)

diff --git a/fs/nfs/nfs4proc.c b/fs/nfs/nfs4proc.c
index 3e46be8c9be1..cbf9cdd48a3b 100644
--- a/fs/nfs/nfs4proc.c
+++ b/fs/nfs/nfs4proc.c
@@ -2553,9 +2553,8 @@ static int nfs41_check_open_stateid(struct nfs4_state *state)
 		clear_bit(NFS_O_RDWR_STATE, &state->flags);
 		clear_bit(NFS_OPEN_STATE, &state->flags);
 		stateid->type = NFS4_INVALID_STATEID_TYPE;
-	}
-	if (status != NFS_OK)
 		return status;
+	}
 	if (nfs_open_stateid_recover_openmode(state))
 		return -NFS4ERR_OPENMODE;
 	return NFS_OK;

From 48fb6f4db940e92cfb16cd878cddd59ea6120d06 Mon Sep 17 00:00:00 2001
From: Mel Gorman <mgorman@suse.de>
Date: Wed, 9 Aug 2017 08:27:11 +0100
Subject: [PATCH 219/281] futex: Remove unnecessary warning from get_futex_key

Commit 65d8fc777f6d ("futex: Remove requirement for lock_page() in
get_futex_key()") removed an unnecessary lock_page() with the
side-effect that page->mapping needed to be treated very carefully.

Two defensive warnings were added in case any assumption was missed and
the first warning assumed a correct application would not alter a
mapping backing a futex key.  Since merging, it has not triggered for
any unexpected case but Mark Rutland reported the following bug
triggering due to the first warning.

  kernel BUG at kernel/futex.c:679!
  Internal error: Oops - BUG: 0 [#1] PREEMPT SMP
  Modules linked in:
  CPU: 0 PID: 3695 Comm: syz-executor1 Not tainted 4.13.0-rc3-00020-g307fec773ba3 #3
  Hardware name: linux,dummy-virt (DT)
  task: ffff80001e271780 task.stack: ffff000010908000
  PC is at get_futex_key+0x6a4/0xcf0 kernel/futex.c:679
  LR is at get_futex_key+0x6a4/0xcf0 kernel/futex.c:679
  pc : [<ffff00000821ac14>] lr : [<ffff00000821ac14>] pstate: 80000145

The fact that it's a bug instead of a warning was due to an unrelated
arm64 problem, but the warning itself triggered because the underlying
mapping changed.

This is an application issue but from a kernel perspective it's a
recoverable situation and the warning is unnecessary so this patch
removes the warning.  The warning may potentially be triggered with the
following test program from Mark although it may be necessary to adjust
NR_FUTEX_THREADS to be a value smaller than the number of CPUs in the
system.

    #include <linux/futex.h>
    #include <pthread.h>
    #include <stdio.h>
    #include <stdlib.h>
    #include <sys/mman.h>
    #include <sys/syscall.h>
    #include <sys/time.h>
    #include <unistd.h>

    #define NR_FUTEX_THREADS 16
    pthread_t threads[NR_FUTEX_THREADS];

    void *mem;

    #define MEM_PROT  (PROT_READ | PROT_WRITE)
    #define MEM_SIZE  65536

    static int futex_wrapper(int *uaddr, int op, int val,
                             const struct timespec *timeout,
                             int *uaddr2, int val3)
    {
        syscall(SYS_futex, uaddr, op, val, timeout, uaddr2, val3);
    }

    void *poll_futex(void *unused)
    {
        for (;;) {
            futex_wrapper(mem, FUTEX_CMP_REQUEUE_PI, 1, NULL, mem + 4, 1);
        }
    }

    int main(int argc, char *argv[])
    {
        int i;

        mem = mmap(NULL, MEM_SIZE, MEM_PROT,
               MAP_SHARED | MAP_ANONYMOUS, -1, 0);

        printf("Mapping @ %p\n", mem);

        printf("Creating futex threads...\n");

        for (i = 0; i < NR_FUTEX_THREADS; i++)
            pthread_create(&threads[i], NULL, poll_futex, NULL);

        printf("Flipping mapping...\n");
        for (;;) {
            mmap(mem, MEM_SIZE, MEM_PROT,
                 MAP_FIXED | MAP_SHARED | MAP_ANONYMOUS, -1, 0);
        }

        return 0;
    }

Reported-and-tested-by: Mark Rutland <mark.rutland@arm.com>
Signed-off-by: Mel Gorman <mgorman@suse.de>
Acked-by: Peter Zijlstra (Intel) <peterz@infradead.org>
Cc: stable@vger.kernel.org # 4.7+
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 kernel/futex.c | 5 +++--
 1 file changed, 3 insertions(+), 2 deletions(-)

diff --git a/kernel/futex.c b/kernel/futex.c
index 16dbe4c93895..f50b434756c1 100644
--- a/kernel/futex.c
+++ b/kernel/futex.c
@@ -670,13 +670,14 @@ again:
 		 * this reference was taken by ihold under the page lock
 		 * pinning the inode in place so i_lock was unnecessary. The
 		 * only way for this check to fail is if the inode was
-		 * truncated in parallel so warn for now if this happens.
+		 * truncated in parallel which is almost certainly an
+		 * application bug. In such a case, just retry.
 		 *
 		 * We are not calling into get_futex_key_refs() in file-backed
 		 * cases, therefore a successful atomic_inc return below will
 		 * guarantee that get_futex_key() will still imply smp_mb(); (B).
 		 */
-		if (WARN_ON_ONCE(!atomic_inc_not_zero(&inode->i_count))) {
+		if (!atomic_inc_not_zero(&inode->i_count)) {
 			rcu_read_unlock();
 			put_page(page);
 

From 92f190aba2d57c91ef99e227e36461234c35eb7b Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Micha=C5=82=20Miros=C5=82aw?= <mirq-linux@rere.qmqm.pl>
Date: Wed, 9 Aug 2017 01:42:22 +0200
Subject: [PATCH 220/281] drm: make DRM_STM default n
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Default config value for all other drivers is N.

Signed-off-by: Michał Mirosław <mirq-linux@rere.qmqm.pl>
Signed-off-by: Dave Airlie <airlied@redhat.com>
---
 drivers/gpu/drm/stm/Kconfig | 1 -
 1 file changed, 1 deletion(-)

diff --git a/drivers/gpu/drm/stm/Kconfig b/drivers/gpu/drm/stm/Kconfig
index 2c4817fb0890..8fe5b184b4e8 100644
--- a/drivers/gpu/drm/stm/Kconfig
+++ b/drivers/gpu/drm/stm/Kconfig
@@ -7,7 +7,6 @@ config DRM_STM
 	select DRM_PANEL
 	select VIDEOMODE_HELPERS
 	select FB_PROVIDE_GET_FB_UNMAPPED_AREA
-	default y
 
 	help
 	  Enable support for the on-chip display controller on

From 372aa73e20197d463fc8b34524c20b089a98b1c3 Mon Sep 17 00:00:00 2001
From: Ben Skeggs <bskeggs@redhat.com>
Date: Thu, 10 Aug 2017 11:32:18 +1000
Subject: [PATCH 221/281] drm/nouveau/disp/nv04: avoid creation of output paths

Fixes hitting WARN_ON() during initialisation of pre-NV50 GPUs, caused
by the recent changes to support pad macro routing on GM20x.

We currently don't use them here for older GPUs anyway.

Signed-off-by: Ben Skeggs <bskeggs@redhat.com>
---
 drivers/gpu/drm/nouveau/nvkm/engine/disp/base.c | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/drivers/gpu/drm/nouveau/nvkm/engine/disp/base.c b/drivers/gpu/drm/nouveau/nvkm/engine/disp/base.c
index c7c84d34d97e..88582af8bd89 100644
--- a/drivers/gpu/drm/nouveau/nvkm/engine/disp/base.c
+++ b/drivers/gpu/drm/nouveau/nvkm/engine/disp/base.c
@@ -267,6 +267,8 @@ nvkm_disp_oneinit(struct nvkm_engine *engine)
 	/* Create output path objects for each VBIOS display path. */
 	i = -1;
 	while ((data = dcb_outp_parse(bios, ++i, &ver, &hdr, &dcbE))) {
+		if (ver < 0x40) /* No support for chipsets prior to NV50. */
+			break;
 		if (dcbE.type == DCB_OUTPUT_UNUSED)
 			continue;
 		if (dcbE.type == DCB_OUTPUT_EOL)

From c775d2098d35bd130a883bbdf6af9401a8c4cb2d Mon Sep 17 00:00:00 2001
From: Milan Broz <gmazyland@gmail.com>
Date: Wed, 9 Aug 2017 17:47:26 +0200
Subject: [PATCH 222/281] bio-integrity: Fix regression if profile verify_fn is
 NULL

In dm-integrity target we register integrity profile that have
both generate_fn and verify_fn callbacks set to NULL.

This is used if dm-integrity is stacked under a dm-crypt device
for authenticated encryption (integrity payload contains authentication
tag and IV seed).

In this case the verification is done through own crypto API
processing inside dm-crypt; integrity profile is only holder
of these data. (And memory is owned by dm-crypt as well.)

After the commit (and previous changes)
  Commit 7c20f11680a441df09de7235206f70115fbf6290
  Author: Christoph Hellwig <hch@lst.de>
  Date:   Mon Jul 3 16:58:43 2017 -0600

    bio-integrity: stop abusing bi_end_io

we get this crash:

: BUG: unable to handle kernel NULL pointer dereference at   (null)
: IP:   (null)
: *pde = 00000000
...
:
: Workqueue: kintegrityd bio_integrity_verify_fn
: task: f48ae180 task.stack: f4b5c000
: EIP:   (null)
: EFLAGS: 00210286 CPU: 0
: EAX: f4b5debc EBX: 00001000 ECX: 00000001 EDX: 00000000
: ESI: 00001000 EDI: ed25f000 EBP: f4b5dee8 ESP: f4b5dea4
:  DS: 007b ES: 007b FS: 00d8 GS: 00e0 SS: 0068
: CR0: 80050033 CR2: 00000000 CR3: 32823000 CR4: 001406d0
: Call Trace:
:  ? bio_integrity_process+0xe3/0x1e0
:  bio_integrity_verify_fn+0xea/0x150
:  process_one_work+0x1c7/0x5c0
:  worker_thread+0x39/0x380
:  kthread+0xd6/0x110
:  ? process_one_work+0x5c0/0x5c0
:  ? kthread_worker_fn+0x100/0x100
:  ? kthread_worker_fn+0x100/0x100
:  ret_from_fork+0x19/0x24
: Code:  Bad EIP value.
: EIP:   (null) SS:ESP: 0068:f4b5dea4
: CR2: 0000000000000000

Patch just skip the whole verify workqueue if verify_fn is set to NULL.

Fixes: 7c20f116 ("bio-integrity: stop abusing bi_end_io")
Signed-off-by: Milan Broz <gmazyland@gmail.com>
[hch: trivial whitespace fix]
Signed-off-by: Christoph Hellwig <hch@lst.de>
Signed-off-by: Jens Axboe <axboe@kernel.dk>
---
 block/bio-integrity.c | 5 ++++-
 1 file changed, 4 insertions(+), 1 deletion(-)

diff --git a/block/bio-integrity.c b/block/bio-integrity.c
index 83e92beb3c9f..0fd9604974da 100644
--- a/block/bio-integrity.c
+++ b/block/bio-integrity.c
@@ -387,7 +387,10 @@ static void bio_integrity_verify_fn(struct work_struct *work)
  */
 bool __bio_integrity_endio(struct bio *bio)
 {
-	if (bio_op(bio) == REQ_OP_READ && !bio->bi_status) {
+	struct blk_integrity *bi = bdev_get_integrity(bio->bi_bdev);
+
+	if (bio_op(bio) == REQ_OP_READ && !bio->bi_status &&
+	    bi->profile->verify_fn) {
 		struct bio_integrity_payload *bip = bio_integrity(bio);
 
 		INIT_WORK(&bip->bip_work, bio_integrity_verify_fn);

From f86e28c4dc8d475cb82ca8d018daaa1564534aad Mon Sep 17 00:00:00 2001
From: Christoph Hellwig <hch@lst.de>
Date: Wed, 9 Aug 2017 17:47:27 +0200
Subject: [PATCH 223/281] bio-integrity: only verify integrity on the lowest
 stacked driver

This gets us back to the behavior in 4.12 and earlier.

Signed-off-by: Christoph Hellwig <hch@lst.de>
Fixes: 7c20f116 ("bio-integrity: stop abusing bi_end_io")
Signed-off-by: Jens Axboe <axboe@kernel.dk>
---
 block/bio-integrity.c | 5 ++---
 1 file changed, 2 insertions(+), 3 deletions(-)

diff --git a/block/bio-integrity.c b/block/bio-integrity.c
index 0fd9604974da..9b1ea478577b 100644
--- a/block/bio-integrity.c
+++ b/block/bio-integrity.c
@@ -388,11 +388,10 @@ static void bio_integrity_verify_fn(struct work_struct *work)
 bool __bio_integrity_endio(struct bio *bio)
 {
 	struct blk_integrity *bi = bdev_get_integrity(bio->bi_bdev);
+	struct bio_integrity_payload *bip = bio_integrity(bio);
 
 	if (bio_op(bio) == REQ_OP_READ && !bio->bi_status &&
-	    bi->profile->verify_fn) {
-		struct bio_integrity_payload *bip = bio_integrity(bio);
-
+	    (bip->bip_flags & BIP_BLOCK_INTEGRITY) && bi->profile->verify_fn) {
 		INIT_WORK(&bip->bip_work, bio_integrity_verify_fn);
 		queue_work(kintegrityd_wq, &bip->bip_work);
 		return false;

From d4acf3650c7c968f46ad932b9a25d1cc24cf4998 Mon Sep 17 00:00:00 2001
From: Bart Van Assche <bart.vanassche@wdc.com>
Date: Wed, 9 Aug 2017 11:28:06 -0700
Subject: [PATCH 224/281] block: Make blk_mq_delay_kick_requeue_list() rerun
 the queue at a quiet time

The blk_mq_delay_kick_requeue_list() function is used by the device
mapper and only by the device mapper to rerun the queue and requeue
list after a delay. This function is called once per request that
gets requeued. Modify this function such that the queue is run once
per path change event instead of once per request that is requeued.

Fixes: commit 2849450ad39d ("blk-mq: introduce blk_mq_delay_kick_requeue_list()")
Signed-off-by: Bart Van Assche <bart.vanassche@wdc.com>
Cc: Mike Snitzer <snitzer@redhat.com>
Cc: Laurence Oberman <loberman@redhat.com>
Cc: <stable@vger.kernel.org>
Signed-off-by: Jens Axboe <axboe@kernel.dk>
---
 block/blk-mq.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/block/blk-mq.c b/block/blk-mq.c
index 211ef367345f..535cbdf32aab 100644
--- a/block/blk-mq.c
+++ b/block/blk-mq.c
@@ -684,8 +684,8 @@ EXPORT_SYMBOL(blk_mq_kick_requeue_list);
 void blk_mq_delay_kick_requeue_list(struct request_queue *q,
 				    unsigned long msecs)
 {
-	kblockd_schedule_delayed_work(&q->requeue_work,
-				      msecs_to_jiffies(msecs));
+	kblockd_mod_delayed_work_on(WORK_CPU_UNBOUND, &q->requeue_work,
+				    msecs_to_jiffies(msecs));
 }
 EXPORT_SYMBOL(blk_mq_delay_kick_requeue_list);
 

From 6f48655facfd7f7ccfe6d252ac0fe319ab02e4dd Mon Sep 17 00:00:00 2001
From: Nicholas Bellinger <nab@linux-iscsi.org>
Date: Sun, 6 Aug 2017 16:10:03 -0700
Subject: [PATCH 225/281] target: Fix node_acl demo-mode + uncached dynamic
 shutdown regression

This patch fixes a generate_node_acls = 1 + cache_dynamic_acls = 0
regression, that was introduced by

  commit 01d4d673558985d9a118e1e05026633c3e2ade9b
  Author: Nicholas Bellinger <nab@linux-iscsi.org>
  Date:   Wed Dec 7 12:55:54 2016 -0800

which originally had the proper list_del_init() usage, but was
dropped during list review as it was thought unnecessary by HCH.

However, list_del_init() usage is required during the special
generate_node_acls = 1 + cache_dynamic_acls = 0 case when
transport_free_session() does a list_del(&se_nacl->acl_list),
followed by target_complete_nacl() doing the same thing.

This was manifesting as a general protection fault as reported
by Justin:

kernel: general protection fault: 0000 [#1] SMP
kernel: Modules linked in:
kernel: CPU: 0 PID: 11047 Comm: iscsi_ttx Not tainted 4.13.0-rc2.x86_64.1+ #20
kernel: Hardware name: Intel Corporation S5500BC/S5500BC, BIOS S5500.86B.01.00.0064.050520141428 05/05/2014
kernel: task: ffff88026939e800 task.stack: ffffc90007884000
kernel: RIP: 0010:target_put_nacl+0x49/0xb0
kernel: RSP: 0018:ffffc90007887d70 EFLAGS: 00010246
kernel: RAX: dead000000000200 RBX: ffff8802556ca000 RCX: 0000000000000000
kernel: RDX: dead000000000100 RSI: 0000000000000246 RDI: ffff8802556ce028
kernel: RBP: ffffc90007887d88 R08: 0000000000000001 R09: 0000000000000000
kernel: R10: ffffc90007887df8 R11: ffffea0009986900 R12: ffff8802556ce020
kernel: R13: ffff8802556ce028 R14: ffff8802556ce028 R15: ffffffff88d85540
kernel: FS:  0000000000000000(0000) GS:ffff88027fc00000(0000) knlGS:0000000000000000
kernel: CS:  0010 DS: 0000 ES: 0000 CR0: 0000000080050033
kernel: CR2: 00007fffe36f5f94 CR3: 0000000009209000 CR4: 00000000003406f0
kernel: DR0: 0000000000000000 DR1: 0000000000000000 DR2: 0000000000000000
kernel: DR3: 0000000000000000 DR6: 00000000fffe0ff0 DR7: 0000000000000400
kernel: Call Trace:
kernel:  transport_free_session+0x67/0x140
kernel:  transport_deregister_session+0x7a/0xc0
kernel:  iscsit_close_session+0x92/0x210
kernel:  iscsit_close_connection+0x5f9/0x840
kernel:  iscsit_take_action_for_connection_exit+0xfe/0x110
kernel:  iscsi_target_tx_thread+0x140/0x1e0
kernel:  ? wait_woken+0x90/0x90
kernel:  kthread+0x124/0x160
kernel:  ? iscsit_thread_get_cpumask+0x90/0x90
kernel:  ? kthread_create_on_node+0x40/0x40
kernel:  ret_from_fork+0x22/0x30
kernel: Code: 00 48 89 fb 4c 8b a7 48 01 00 00 74 68 4d 8d 6c 24 08 4c
89 ef e8 e8 28 43 00 48 8b 93 20 04 00 00 48 8b 83 28 04 00 00 4c 89
ef <48> 89 42 08 48 89 10 48 b8 00 01 00 00 00 00 ad de 48 89 83 20
kernel: RIP: target_put_nacl+0x49/0xb0 RSP: ffffc90007887d70
kernel: ---[ end trace f12821adbfd46fed ]---

To address this, go ahead and use proper list_del_list() for all
cases of se_nacl->acl_list deletion.

Reported-by: Justin Maggard <jmaggard01@gmail.com>
Tested-by: Justin Maggard <jmaggard01@gmail.com>
Cc: Justin Maggard <jmaggard01@gmail.com>
Cc: stable@vger.kernel.org # 4.1+
Signed-off-by: Nicholas Bellinger <nab@linux-iscsi.org>
---
 drivers/target/target_core_tpg.c       | 4 ++--
 drivers/target/target_core_transport.c | 4 ++--
 2 files changed, 4 insertions(+), 4 deletions(-)

diff --git a/drivers/target/target_core_tpg.c b/drivers/target/target_core_tpg.c
index 36913734c6bc..02e8a5d86658 100644
--- a/drivers/target/target_core_tpg.c
+++ b/drivers/target/target_core_tpg.c
@@ -364,7 +364,7 @@ void core_tpg_del_initiator_node_acl(struct se_node_acl *acl)
 	mutex_lock(&tpg->acl_node_mutex);
 	if (acl->dynamic_node_acl)
 		acl->dynamic_node_acl = 0;
-	list_del(&acl->acl_list);
+	list_del_init(&acl->acl_list);
 	mutex_unlock(&tpg->acl_node_mutex);
 
 	target_shutdown_sessions(acl);
@@ -548,7 +548,7 @@ int core_tpg_deregister(struct se_portal_group *se_tpg)
 	 * in transport_deregister_session().
 	 */
 	list_for_each_entry_safe(nacl, nacl_tmp, &node_list, acl_list) {
-		list_del(&nacl->acl_list);
+		list_del_init(&nacl->acl_list);
 
 		core_tpg_wait_for_nacl_pr_ref(nacl);
 		core_free_device_list_for_node(nacl, se_tpg);
diff --git a/drivers/target/target_core_transport.c b/drivers/target/target_core_transport.c
index 97fed9a298bd..836d552b0385 100644
--- a/drivers/target/target_core_transport.c
+++ b/drivers/target/target_core_transport.c
@@ -466,7 +466,7 @@ static void target_complete_nacl(struct kref *kref)
 	}
 
 	mutex_lock(&se_tpg->acl_node_mutex);
-	list_del(&nacl->acl_list);
+	list_del_init(&nacl->acl_list);
 	mutex_unlock(&se_tpg->acl_node_mutex);
 
 	core_tpg_wait_for_nacl_pr_ref(nacl);
@@ -538,7 +538,7 @@ void transport_free_session(struct se_session *se_sess)
 			spin_unlock_irqrestore(&se_nacl->nacl_sess_lock, flags);
 
 			if (se_nacl->dynamic_stop)
-				list_del(&se_nacl->acl_list);
+				list_del_init(&se_nacl->acl_list);
 		}
 		mutex_unlock(&se_tpg->acl_node_mutex);
 

From 3ee70591d6c47ef4c4699b3395ba96ce287db937 Mon Sep 17 00:00:00 2001
From: Jim Quigley <Jim.Quigley@oracle.com>
Date: Fri, 21 Jul 2017 09:20:15 -0400
Subject: [PATCH 226/281] sunvdc: prevent sunvdc panic when mpgroup disk added
 to guest domain

Using mpgroup to define multiple paths for a virtual disk causes multiple
virtual-device-port ports to be created for that virtual device.
Each virtual-device-port port then gets a vdisk created for it by the Linux
sunvdc driver. As mpgroup is not supported by the Linux sunvdc driver it
cannot handle multiple ports for a single vdisk, leading to a kernel panic
at startup.

This fix prevents more than one vdisk per virtual-device-port being created
until full virtual disk multipathing (mpgroup) support is implemented.

Signed-off-by: Jim Quigley <Jim.Quigley@oracle.com>
Reviewed-by: Liam Merwick <liam.merwick@oracle.com>
Reviewed-by: Shannon Nelson <shannon.nelson@oracle.com>
Reviewed-by: Alexandre Chartre <alexandre.chartre@oracle.com>
Reviewed-by: Aaron Young <aaron.young@oracle.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/block/sunvdc.c | 61 ++++++++++++++++++++++++++++++++++++++++++
 1 file changed, 61 insertions(+)

diff --git a/drivers/block/sunvdc.c b/drivers/block/sunvdc.c
index 6b16ead1da58..ad9749463d4f 100644
--- a/drivers/block/sunvdc.c
+++ b/drivers/block/sunvdc.c
@@ -875,6 +875,56 @@ static void print_version(void)
 		printk(KERN_INFO "%s", version);
 }
 
+struct vdc_check_port_data {
+	int	dev_no;
+	char	*type;
+};
+
+static int vdc_device_probed(struct device *dev, void *arg)
+{
+	struct vio_dev *vdev = to_vio_dev(dev);
+	struct vdc_check_port_data *port_data;
+
+	port_data = (struct vdc_check_port_data *)arg;
+
+	if ((vdev->dev_no == port_data->dev_no) &&
+	    (!(strcmp((char *)&vdev->type, port_data->type))) &&
+		dev_get_drvdata(dev)) {
+		/* This device has already been configured
+		 * by vdc_port_probe()
+		 */
+		return 1;
+	} else {
+		return 0;
+	}
+}
+
+/* Determine whether the VIO device is part of an mpgroup
+ * by locating all the virtual-device-port nodes associated
+ * with the parent virtual-device node for the VIO device
+ * and checking whether any of these nodes are vdc-ports
+ * which have already been configured.
+ *
+ * Returns true if this device is part of an mpgroup and has
+ * already been probed.
+ */
+static bool vdc_port_mpgroup_check(struct vio_dev *vdev)
+{
+	struct vdc_check_port_data port_data;
+	struct device *dev;
+
+	port_data.dev_no = vdev->dev_no;
+	port_data.type = (char *)&vdev->type;
+
+	dev = device_find_child(vdev->dev.parent, &port_data,
+				vdc_device_probed);
+
+	if (dev)
+		return true;
+
+	return false;
+}
+
 static int vdc_port_probe(struct vio_dev *vdev, const struct vio_device_id *id)
 {
 	struct mdesc_handle *hp;
@@ -893,6 +943,14 @@ static int vdc_port_probe(struct vio_dev *vdev, const struct vio_device_id *id)
 		goto err_out_release_mdesc;
 	}
 
+	/* Check if this device is part of an mpgroup */
+	if (vdc_port_mpgroup_check(vdev)) {
+		printk(KERN_WARNING
+			"VIO: Ignoring extra vdisk port %s",
+			dev_name(&vdev->dev));
+		goto err_out_release_mdesc;
+	}
+
 	port = kzalloc(sizeof(*port), GFP_KERNEL);
 	err = -ENOMEM;
 	if (!port) {
@@ -943,6 +1001,9 @@ static int vdc_port_probe(struct vio_dev *vdev, const struct vio_device_id *id)
 	if (err)
 		goto err_out_free_tx_ring;
 
+	/* Note that the device driver_data is used to determine
+	 * whether the port has been probed.
+	 */
 	dev_set_drvdata(&vdev->dev, port);
 
 	mdesc_release(hp);

From ed43594aede9719e56eca72fc6a9a200c60b60e6 Mon Sep 17 00:00:00 2001
From: Jon Paul Maloy <jon.maloy@ericsson.com>
Date: Tue, 8 Aug 2017 22:23:56 +0200
Subject: [PATCH 227/281] tipc: remove premature ESTABLISH FSM event at link
 synchronization

When a link between two nodes come up, both endpoints will initially
send out a STATE message to the peer, to increase the probability that
the peer endpoint also is up when the first traffic message arrives.
Thereafter, if the establishing link is the second link between two
nodes, this first "traffic" message is a TUNNEL_PROTOCOL/SYNCH message,
helping the peer to perform initial synchronization between the two
links.

However, the initial STATE message may be lost, in which case the SYNCH
message will be the first one arriving at the peer. This should also
work, as the SYNCH message itself will be used to take up the link
endpoint before  initializing synchronization.

Unfortunately the code for this case is broken. Currently, the link is
brought up through a tipc_link_fsm_evt(ESTABLISHED) when a SYNCH
arrives, whereupon __tipc_node_link_up() is called to distribute the
link slots and take the link into traffic. But, __tipc_node_link_up() is
itself starting with a test for whether the link is up, and if true,
returns without action. Clearly, the tipc_link_fsm_evt(ESTABLISHED) call
is unnecessary, since tipc_node_link_up() is itself issuing such an
event, but also harmful, since it inhibits tipc_node_link_up() to
perform the test of its tasks, and the link endpoint in question hence
is never taken into traffic.

This problem has been exposed when we set up dual links between pre-
and post-4.4 kernels, because the former ones don't send out the
initial STATE message described above.

We fix this by removing the unnecessary event call.

Signed-off-by: Jon Maloy <jon.maloy@ericsson.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/tipc/node.c | 4 +---
 1 file changed, 1 insertion(+), 3 deletions(-)

diff --git a/net/tipc/node.c b/net/tipc/node.c
index aeef8011ac7d..9b4dcb6a16b5 100644
--- a/net/tipc/node.c
+++ b/net/tipc/node.c
@@ -1455,10 +1455,8 @@ static bool tipc_node_check_state(struct tipc_node *n, struct sk_buff *skb,
 	/* Initiate synch mode if applicable */
 	if ((usr == TUNNEL_PROTOCOL) && (mtyp == SYNCH_MSG) && (oseqno == 1)) {
 		syncpt = iseqno + exp_pkts - 1;
-		if (!tipc_link_is_up(l)) {
-			tipc_link_fsm_evt(l, LINK_ESTABLISH_EVT);
+		if (!tipc_link_is_up(l))
 			__tipc_node_link_up(n, bearer_id, xmitq);
-		}
 		if (n->state == SELF_UP_PEER_UP) {
 			n->sync_point = syncpt;
 			tipc_link_fsm_evt(l, LINK_SYNCH_BEGIN_EVT);

From 50ddfbafcd1e1f95185dfaa4ad794f742b0beaf8 Mon Sep 17 00:00:00 2001
From: Florian Fainelli <f.fainelli@gmail.com>
Date: Tue, 8 Aug 2017 14:45:09 -0700
Subject: [PATCH 228/281] net: systemport: Fix software statistics for
 SYSTEMPORT Lite

With SYSTEMPORT Lite we have holes in our statistics layout that make us
skip over the hardware MIB counters, bcm_sysport_get_stats() was not
taking that into account resulting in reporting 0 for all SW-maintained
statistics, fix this by skipping accordingly.

Fixes: 44a4524c54af ("net: systemport: Add support for SYSTEMPORT Lite")
Signed-off-by: Florian Fainelli <f.fainelli@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/broadcom/bcmsysport.c | 4 ++++
 1 file changed, 4 insertions(+)

diff --git a/drivers/net/ethernet/broadcom/bcmsysport.c b/drivers/net/ethernet/broadcom/bcmsysport.c
index 5333601f855f..dc3052751bc1 100644
--- a/drivers/net/ethernet/broadcom/bcmsysport.c
+++ b/drivers/net/ethernet/broadcom/bcmsysport.c
@@ -449,6 +449,10 @@ static void bcm_sysport_get_stats(struct net_device *dev,
 			p = (char *)&dev->stats;
 		else
 			p = (char *)priv;
+
+		if (priv->is_lite && !bcm_sysport_lite_stat_valid(s->type))
+			continue;
+
 		p += s->stat_offset;
 		data[j] = *(unsigned long *)p;
 		j++;

From 04db70d9fe7019d96118158fbaaccb4959ba2bd4 Mon Sep 17 00:00:00 2001
From: Girish Moodalbail <girish.moodalbail@oracle.com>
Date: Tue, 8 Aug 2017 17:26:24 -0700
Subject: [PATCH 229/281] geneve: maximum value of VNI cannot be used

Geneve's Virtual Network Identifier (VNI) is 24 bit long, so the range
of values for it would be from 0 to 16777215 (2^24 -1).  However, one
cannot create a geneve device with VNI set to 16777215. This patch fixes
this issue.

Signed-off-by: Girish Moodalbail <girish.moodalbail@oracle.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/geneve.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/net/geneve.c b/drivers/net/geneve.c
index de8156c6b292..2bbda71818ad 100644
--- a/drivers/net/geneve.c
+++ b/drivers/net/geneve.c
@@ -1091,7 +1091,7 @@ static int geneve_validate(struct nlattr *tb[], struct nlattr *data[],
 	if (data[IFLA_GENEVE_ID]) {
 		__u32 vni =  nla_get_u32(data[IFLA_GENEVE_ID]);
 
-		if (vni >= GENEVE_VID_MASK)
+		if (vni >= GENEVE_N_VID)
 			return -ERANGE;
 	}
 

From 1714020e42b17135032c8606f7185b3fb2ba5d78 Mon Sep 17 00:00:00 2001
From: Nikolay Borisov <nborisov@suse.com>
Date: Wed, 9 Aug 2017 14:38:04 +0300
Subject: [PATCH 230/281] igmp: Fix regression caused by igmp sysctl namespace
 code.

Commit dcd87999d415 ("igmp: net: Move igmp namespace init to correct file")
moved the igmp sysctls initialization from tcp_sk_init to igmp_net_init. This
function is only called as part of per-namespace initialization, only if
CONFIG_IP_MULTICAST is defined, otherwise igmp_mc_init() call in ip_init is
compiled out, casuing the igmp pernet ops to not be registerd and those sysctl
being left initialized with 0. However, there are certain functions, such as
ip_mc_join_group which are always compiled and make use of some of those
sysctls. Let's do a partial revert of the aforementioned commit and move the
sysctl initialization into inet_init_net, that way they will always have
sane values.

Fixes: dcd87999d415 ("igmp: net: Move igmp namespace init to correct file")
Link: https://bugzilla.kernel.org/show_bug.cgi?id=196595
Reported-by: Gerardo Exequiel Pozzi <vmlinuz386@gmail.com>
Signed-off-by: Nikolay Borisov <nborisov@suse.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/ipv4/af_inet.c | 7 +++++++
 net/ipv4/igmp.c    | 6 ------
 2 files changed, 7 insertions(+), 6 deletions(-)

diff --git a/net/ipv4/af_inet.c b/net/ipv4/af_inet.c
index 76c2077c3f5b..2e548eca3489 100644
--- a/net/ipv4/af_inet.c
+++ b/net/ipv4/af_inet.c
@@ -1731,6 +1731,13 @@ static __net_init int inet_init_net(struct net *net)
 	net->ipv4.sysctl_ip_prot_sock = PROT_SOCK;
 #endif
 
+	/* Some igmp sysctl, whose values are always used */
+	net->ipv4.sysctl_igmp_max_memberships = 20;
+	net->ipv4.sysctl_igmp_max_msf = 10;
+	/* IGMP reports for link-local multicast groups are enabled by default */
+	net->ipv4.sysctl_igmp_llm_reports = 1;
+	net->ipv4.sysctl_igmp_qrv = 2;
+
 	return 0;
 }
 
diff --git a/net/ipv4/igmp.c b/net/ipv4/igmp.c
index 28f14afd0dd3..498706b072fb 100644
--- a/net/ipv4/igmp.c
+++ b/net/ipv4/igmp.c
@@ -2974,12 +2974,6 @@ static int __net_init igmp_net_init(struct net *net)
 		goto out_sock;
 	}
 
-	/* Sysctl initialization */
-	net->ipv4.sysctl_igmp_max_memberships = 20;
-	net->ipv4.sysctl_igmp_max_msf = 10;
-	/* IGMP reports for link-local multicast groups are enabled by default */
-	net->ipv4.sysctl_igmp_llm_reports = 1;
-	net->ipv4.sysctl_igmp_qrv = 2;
 	return 0;
 
 out_sock:

From 96d9703050a0036a3360ec98bb41e107c90664fe Mon Sep 17 00:00:00 2001
From: Xin Long <lucien.xin@gmail.com>
Date: Wed, 9 Aug 2017 18:15:19 +0800
Subject: [PATCH 231/281] net: sched: set xt_tgchk_param par.nft_compat as 0 in
 ipt_init_target

Commit 55917a21d0cc ("netfilter: x_tables: add context to know if
extension runs from nft_compat") introduced a member nft_compat to
xt_tgchk_param structure.

But it didn't set it's value for ipt_init_target. With unexpected
value in par.nft_compat, it may return unexpected result in some
target's checkentry.

This patch is to set all it's fields as 0 and only initialize the
non-zero fields in ipt_init_target.

v1->v2:
  As Wang Cong's suggestion, fix it by setting all it's fields as
  0 and only initializing the non-zero fields.

Fixes: 55917a21d0cc ("netfilter: x_tables: add context to know if extension runs from nft_compat")
Suggested-by: Cong Wang <xiyou.wangcong@gmail.com>
Signed-off-by: Xin Long <lucien.xin@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/sched/act_ipt.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/net/sched/act_ipt.c b/net/sched/act_ipt.c
index 94ba5cfab860..d516ba8178b8 100644
--- a/net/sched/act_ipt.c
+++ b/net/sched/act_ipt.c
@@ -49,9 +49,9 @@ static int ipt_init_target(struct net *net, struct xt_entry_target *t,
 		return PTR_ERR(target);
 
 	t->u.kernel.target = target;
+	memset(&par, 0, sizeof(par));
 	par.net       = net;
 	par.table     = table;
-	par.entryinfo = NULL;
 	par.target    = target;
 	par.targinfo  = t->data;
 	par.hook_mask = hook;

From 758f3735580c21b8a36d644128af6608120a1dde Mon Sep 17 00:00:00 2001
From: Martin Wilck <mwilck@suse.com>
Date: Thu, 20 Jul 2017 18:34:02 +0200
Subject: [PATCH 232/281] nvme: strip trailing 0-bytes in wwid_show

Some broken controllers (such as earlier Linux targets) pad model or
serial fields with 0-bytes rather than spaces. The NVMe spec disallows
0 bytes in "ASCII" fields.  Thus strip trailing 0-bytes, too. Also make
sure that we get no underflow for pathological input.

Signed-off-by: Martin Wilck <mwilck@suse.com>
Reviewed-by: Hannes Reinecke <hare@suse.de>
Reviewed-by: Keith Busch <keith.busch@intel.com>
Signed-off-by: Christoph Hellwig <hch@lst.de>
---
 drivers/nvme/host/core.c | 6 ++++--
 1 file changed, 4 insertions(+), 2 deletions(-)

diff --git a/drivers/nvme/host/core.c b/drivers/nvme/host/core.c
index c49f1f8b2e57..1e9290983694 100644
--- a/drivers/nvme/host/core.c
+++ b/drivers/nvme/host/core.c
@@ -2004,9 +2004,11 @@ static ssize_t wwid_show(struct device *dev, struct device_attribute *attr,
 	if (memchr_inv(ns->eui, 0, sizeof(ns->eui)))
 		return sprintf(buf, "eui.%8phN\n", ns->eui);
 
-	while (ctrl->serial[serial_len - 1] == ' ')
+	while (serial_len > 0 && (ctrl->serial[serial_len - 1] == ' ' ||
+				  ctrl->serial[serial_len - 1] == '\0'))
 		serial_len--;
-	while (ctrl->model[model_len - 1] == ' ')
+	while (model_len > 0 && (ctrl->model[model_len - 1] == ' ' ||
+				 ctrl->model[model_len - 1] == '\0'))
 		model_len--;
 
 	return sprintf(buf, "nvme.%04x-%*phN-%*phN-%08x\n", ctrl->vid,

From 0fb228d30b8d72bfee51f57e638d412324d44a11 Mon Sep 17 00:00:00 2001
From: James Smart <jsmart2021@gmail.com>
Date: Tue, 1 Aug 2017 15:12:39 -0700
Subject: [PATCH 233/281] nvmet_fc: add defer_req callback for deferment of cmd
 buffer return

At queue creation, the transport allocates a local job struct
(struct nvmet_fc_fcp_iod) for each possible element of the queue.
When a new CMD is received from the wire, a jobs struct is allocated
from the queue and then used for the duration of the command.
The job struct contains buffer space for the wire command iu. Thus,
upon allocation of the job struct, the cmd iu buffer is copied to
the job struct and the LLDD may immediately free/reuse the CMD IU
buffer passed in the call.

However, in some circumstances, due to the packetized nature of FC
and the api of the FC LLDD which may issue a hw command to send the
wire response, but the LLDD may not get the hw completion for the
command and upcall the nvmet_fc layer before a new command may be
asynchronously received on the wire. In other words, its possible
for the initiator to get the response from the wire, thus believe a
command slot free, and send a new command iu. The new command iu
may be received by the LLDD and passed to the transport before the
LLDD had serviced the hw completion and made the teardown calls for
the original job struct. As such, there is no available job struct
available for the new io. E.g. it appears like the host sent more
queue elements than the queue size. It didn't based on it's
understanding.

Rather than treat this as a hard connection failure queue the new
request until the job struct does free up. As the buffer isn't
copied as there's no job struct, a special return value must be
returned to the LLDD to signify to hold off on recycling the cmd
iu buffer.  And later, when a job struct is allocated and the
buffer copied, a new LLDD callback is introduced to notify the
LLDD and allow it to recycle it's command iu buffer.

Signed-off-by: James Smart <james.smart@broadcom.com>
Reviewed-by: Johannes Thumshirn <jthumshirn@suse.de>
Signed-off-by: Christoph Hellwig <hch@lst.de>
---
 drivers/nvme/target/fc.c       | 216 ++++++++++++++++++++++++++++-----
 include/linux/nvme-fc-driver.h |   7 ++
 2 files changed, 193 insertions(+), 30 deletions(-)

diff --git a/drivers/nvme/target/fc.c b/drivers/nvme/target/fc.c
index 31ca55dfcb1d..1b7f2520a20d 100644
--- a/drivers/nvme/target/fc.c
+++ b/drivers/nvme/target/fc.c
@@ -114,6 +114,11 @@ struct nvmet_fc_tgtport {
 	struct kref			ref;
 };
 
+struct nvmet_fc_defer_fcp_req {
+	struct list_head		req_list;
+	struct nvmefc_tgt_fcp_req	*fcp_req;
+};
+
 struct nvmet_fc_tgt_queue {
 	bool				ninetypercent;
 	u16				qid;
@@ -132,6 +137,8 @@ struct nvmet_fc_tgt_queue {
 	struct nvmet_fc_tgt_assoc	*assoc;
 	struct nvmet_fc_fcp_iod		*fod;		/* array of fcp_iods */
 	struct list_head		fod_list;
+	struct list_head		pending_cmd_list;
+	struct list_head		avail_defer_list;
 	struct workqueue_struct		*work_q;
 	struct kref			ref;
 } __aligned(sizeof(unsigned long long));
@@ -223,6 +230,8 @@ static void nvmet_fc_tgt_q_put(struct nvmet_fc_tgt_queue *queue);
 static int nvmet_fc_tgt_q_get(struct nvmet_fc_tgt_queue *queue);
 static void nvmet_fc_tgtport_put(struct nvmet_fc_tgtport *tgtport);
 static int nvmet_fc_tgtport_get(struct nvmet_fc_tgtport *tgtport);
+static void nvmet_fc_handle_fcp_rqst(struct nvmet_fc_tgtport *tgtport,
+					struct nvmet_fc_fcp_iod *fod);
 
 
 /* *********************** FC-NVME DMA Handling **************************** */
@@ -463,9 +472,9 @@ static struct nvmet_fc_fcp_iod *
 nvmet_fc_alloc_fcp_iod(struct nvmet_fc_tgt_queue *queue)
 {
 	static struct nvmet_fc_fcp_iod *fod;
-	unsigned long flags;
 
-	spin_lock_irqsave(&queue->qlock, flags);
+	lockdep_assert_held(&queue->qlock);
+
 	fod = list_first_entry_or_null(&queue->fod_list,
 					struct nvmet_fc_fcp_iod, fcp_list);
 	if (fod) {
@@ -477,17 +486,37 @@ nvmet_fc_alloc_fcp_iod(struct nvmet_fc_tgt_queue *queue)
 		 * will "inherit" that reference.
 		 */
 	}
-	spin_unlock_irqrestore(&queue->qlock, flags);
 	return fod;
 }
 
 
+static void
+nvmet_fc_queue_fcp_req(struct nvmet_fc_tgtport *tgtport,
+		       struct nvmet_fc_tgt_queue *queue,
+		       struct nvmefc_tgt_fcp_req *fcpreq)
+{
+	struct nvmet_fc_fcp_iod *fod = fcpreq->nvmet_fc_private;
+
+	/*
+	 * put all admin cmds on hw queue id 0. All io commands go to
+	 * the respective hw queue based on a modulo basis
+	 */
+	fcpreq->hwqid = queue->qid ?
+			((queue->qid - 1) % tgtport->ops->max_hw_queues) : 0;
+
+	if (tgtport->ops->target_features & NVMET_FCTGTFEAT_CMD_IN_ISR)
+		queue_work_on(queue->cpu, queue->work_q, &fod->work);
+	else
+		nvmet_fc_handle_fcp_rqst(tgtport, fod);
+}
+
 static void
 nvmet_fc_free_fcp_iod(struct nvmet_fc_tgt_queue *queue,
 			struct nvmet_fc_fcp_iod *fod)
 {
 	struct nvmefc_tgt_fcp_req *fcpreq = fod->fcpreq;
 	struct nvmet_fc_tgtport *tgtport = fod->tgtport;
+	struct nvmet_fc_defer_fcp_req *deferfcp;
 	unsigned long flags;
 
 	fc_dma_sync_single_for_cpu(tgtport->dev, fod->rspdma,
@@ -495,21 +524,56 @@ nvmet_fc_free_fcp_iod(struct nvmet_fc_tgt_queue *queue,
 
 	fcpreq->nvmet_fc_private = NULL;
 
-	spin_lock_irqsave(&queue->qlock, flags);
-	list_add_tail(&fod->fcp_list, &fod->queue->fod_list);
 	fod->active = false;
 	fod->abort = false;
 	fod->aborted = false;
 	fod->writedataactive = false;
 	fod->fcpreq = NULL;
-	spin_unlock_irqrestore(&queue->qlock, flags);
-
-	/*
-	 * release the reference taken at queue lookup and fod allocation
-	 */
-	nvmet_fc_tgt_q_put(queue);
 
 	tgtport->ops->fcp_req_release(&tgtport->fc_target_port, fcpreq);
+
+	spin_lock_irqsave(&queue->qlock, flags);
+	deferfcp = list_first_entry_or_null(&queue->pending_cmd_list,
+				struct nvmet_fc_defer_fcp_req, req_list);
+	if (!deferfcp) {
+		list_add_tail(&fod->fcp_list, &fod->queue->fod_list);
+		spin_unlock_irqrestore(&queue->qlock, flags);
+
+		/* Release reference taken at queue lookup and fod allocation */
+		nvmet_fc_tgt_q_put(queue);
+		return;
+	}
+
+	/* Re-use the fod for the next pending cmd that was deferred */
+	list_del(&deferfcp->req_list);
+
+	fcpreq = deferfcp->fcp_req;
+
+	/* deferfcp can be reused for another IO at a later date */
+	list_add_tail(&deferfcp->req_list, &queue->avail_defer_list);
+
+	spin_unlock_irqrestore(&queue->qlock, flags);
+
+	/* Save NVME CMD IO in fod */
+	memcpy(&fod->cmdiubuf, fcpreq->rspaddr, fcpreq->rsplen);
+
+	/* Setup new fcpreq to be processed */
+	fcpreq->rspaddr = NULL;
+	fcpreq->rsplen  = 0;
+	fcpreq->nvmet_fc_private = fod;
+	fod->fcpreq = fcpreq;
+	fod->active = true;
+
+	/* inform LLDD IO is now being processed */
+	tgtport->ops->defer_rcv(&tgtport->fc_target_port, fcpreq);
+
+	/* Submit deferred IO for processing */
+	nvmet_fc_queue_fcp_req(tgtport, queue, fcpreq);
+
+	/*
+	 * Leave the queue lookup get reference taken when
+	 * fod was originally allocated.
+	 */
 }
 
 static int
@@ -569,6 +633,8 @@ nvmet_fc_alloc_target_queue(struct nvmet_fc_tgt_assoc *assoc,
 	queue->port = assoc->tgtport->port;
 	queue->cpu = nvmet_fc_queue_to_cpu(assoc->tgtport, qid);
 	INIT_LIST_HEAD(&queue->fod_list);
+	INIT_LIST_HEAD(&queue->avail_defer_list);
+	INIT_LIST_HEAD(&queue->pending_cmd_list);
 	atomic_set(&queue->connected, 0);
 	atomic_set(&queue->sqtail, 0);
 	atomic_set(&queue->rsn, 1);
@@ -638,6 +704,7 @@ nvmet_fc_delete_target_queue(struct nvmet_fc_tgt_queue *queue)
 {
 	struct nvmet_fc_tgtport *tgtport = queue->assoc->tgtport;
 	struct nvmet_fc_fcp_iod *fod = queue->fod;
+	struct nvmet_fc_defer_fcp_req *deferfcp;
 	unsigned long flags;
 	int i, writedataactive;
 	bool disconnect;
@@ -666,6 +733,35 @@ nvmet_fc_delete_target_queue(struct nvmet_fc_tgt_queue *queue)
 			}
 		}
 	}
+
+	/* Cleanup defer'ed IOs in queue */
+	list_for_each_entry(deferfcp, &queue->avail_defer_list, req_list) {
+		list_del(&deferfcp->req_list);
+		kfree(deferfcp);
+	}
+
+	for (;;) {
+		deferfcp = list_first_entry_or_null(&queue->pending_cmd_list,
+				struct nvmet_fc_defer_fcp_req, req_list);
+		if (!deferfcp)
+			break;
+
+		list_del(&deferfcp->req_list);
+		spin_unlock_irqrestore(&queue->qlock, flags);
+
+		tgtport->ops->defer_rcv(&tgtport->fc_target_port,
+				deferfcp->fcp_req);
+
+		tgtport->ops->fcp_abort(&tgtport->fc_target_port,
+				deferfcp->fcp_req);
+
+		tgtport->ops->fcp_req_release(&tgtport->fc_target_port,
+				deferfcp->fcp_req);
+
+		kfree(deferfcp);
+
+		spin_lock_irqsave(&queue->qlock, flags);
+	}
 	spin_unlock_irqrestore(&queue->qlock, flags);
 
 	flush_workqueue(queue->work_q);
@@ -2172,11 +2268,38 @@ nvmet_fc_handle_fcp_rqst_work(struct work_struct *work)
  * Pass a FC-NVME FCP CMD IU received from the FC link to the nvmet-fc
  * layer for processing.
  *
- * The nvmet-fc layer will copy cmd payload to an internal structure for
- * processing.  As such, upon completion of the routine, the LLDD may
- * immediately free/reuse the CMD IU buffer passed in the call.
+ * The nvmet_fc layer allocates a local job structure (struct
+ * nvmet_fc_fcp_iod) from the queue for the io and copies the
+ * CMD IU buffer to the job structure. As such, on a successful
+ * completion (returns 0), the LLDD may immediately free/reuse
+ * the CMD IU buffer passed in the call.
  *
- * If this routine returns error, the lldd should abort the exchange.
+ * However, in some circumstances, due to the packetized nature of FC
+ * and the api of the FC LLDD which may issue a hw command to send the
+ * response, but the LLDD may not get the hw completion for that command
+ * and upcall the nvmet_fc layer before a new command may be
+ * asynchronously received - its possible for a command to be received
+ * before the LLDD and nvmet_fc have recycled the job structure. It gives
+ * the appearance of more commands received than fits in the sq.
+ * To alleviate this scenario, a temporary queue is maintained in the
+ * transport for pending LLDD requests waiting for a queue job structure.
+ * In these "overrun" cases, a temporary queue element is allocated
+ * the LLDD request and CMD iu buffer information remembered, and the
+ * routine returns a -EOVERFLOW status. Subsequently, when a queue job
+ * structure is freed, it is immediately reallocated for anything on the
+ * pending request list. The LLDDs defer_rcv() callback is called,
+ * informing the LLDD that it may reuse the CMD IU buffer, and the io
+ * is then started normally with the transport.
+ *
+ * The LLDD, when receiving an -EOVERFLOW completion status, is to treat
+ * the completion as successful but must not reuse the CMD IU buffer
+ * until the LLDD's defer_rcv() callback has been called for the
+ * corresponding struct nvmefc_tgt_fcp_req pointer.
+ *
+ * If there is any other condition in which an error occurs, the
+ * transport will return a non-zero status indicating the error.
+ * In all cases other than -EOVERFLOW, the transport has not accepted the
+ * request and the LLDD should abort the exchange.
  *
  * @target_port: pointer to the (registered) target port the FCP CMD IU
  *              was received on.
@@ -2194,6 +2317,8 @@ nvmet_fc_rcv_fcp_req(struct nvmet_fc_target_port *target_port,
 	struct nvme_fc_cmd_iu *cmdiu = cmdiubuf;
 	struct nvmet_fc_tgt_queue *queue;
 	struct nvmet_fc_fcp_iod *fod;
+	struct nvmet_fc_defer_fcp_req *deferfcp;
+	unsigned long flags;
 
 	/* validate iu, so the connection id can be used to find the queue */
 	if ((cmdiubuf_len != sizeof(*cmdiu)) ||
@@ -2214,29 +2339,60 @@ nvmet_fc_rcv_fcp_req(struct nvmet_fc_target_port *target_port,
 	 * when the fod is freed.
 	 */
 
+	spin_lock_irqsave(&queue->qlock, flags);
+
 	fod = nvmet_fc_alloc_fcp_iod(queue);
-	if (!fod) {
+	if (fod) {
+		spin_unlock_irqrestore(&queue->qlock, flags);
+
+		fcpreq->nvmet_fc_private = fod;
+		fod->fcpreq = fcpreq;
+
+		memcpy(&fod->cmdiubuf, cmdiubuf, cmdiubuf_len);
+
+		nvmet_fc_queue_fcp_req(tgtport, queue, fcpreq);
+
+		return 0;
+	}
+
+	if (!tgtport->ops->defer_rcv) {
+		spin_unlock_irqrestore(&queue->qlock, flags);
 		/* release the queue lookup reference */
 		nvmet_fc_tgt_q_put(queue);
 		return -ENOENT;
 	}
 
-	fcpreq->nvmet_fc_private = fod;
-	fod->fcpreq = fcpreq;
-	/*
-	 * put all admin cmds on hw queue id 0. All io commands go to
-	 * the respective hw queue based on a modulo basis
-	 */
-	fcpreq->hwqid = queue->qid ?
-			((queue->qid - 1) % tgtport->ops->max_hw_queues) : 0;
-	memcpy(&fod->cmdiubuf, cmdiubuf, cmdiubuf_len);
+	deferfcp = list_first_entry_or_null(&queue->avail_defer_list,
+			struct nvmet_fc_defer_fcp_req, req_list);
+	if (deferfcp) {
+		/* Just re-use one that was previously allocated */
+		list_del(&deferfcp->req_list);
+	} else {
+		spin_unlock_irqrestore(&queue->qlock, flags);
 
-	if (tgtport->ops->target_features & NVMET_FCTGTFEAT_CMD_IN_ISR)
-		queue_work_on(queue->cpu, queue->work_q, &fod->work);
-	else
-		nvmet_fc_handle_fcp_rqst(tgtport, fod);
+		/* Now we need to dynamically allocate one */
+		deferfcp = kmalloc(sizeof(*deferfcp), GFP_KERNEL);
+		if (!deferfcp) {
+			/* release the queue lookup reference */
+			nvmet_fc_tgt_q_put(queue);
+			return -ENOMEM;
+		}
+		spin_lock_irqsave(&queue->qlock, flags);
+	}
 
-	return 0;
+	/* For now, use rspaddr / rsplen to save payload information */
+	fcpreq->rspaddr = cmdiubuf;
+	fcpreq->rsplen  = cmdiubuf_len;
+	deferfcp->fcp_req = fcpreq;
+
+	/* defer processing till a fod becomes available */
+	list_add_tail(&deferfcp->req_list, &queue->pending_cmd_list);
+
+	/* NOTE: the queue lookup reference is still valid */
+
+	spin_unlock_irqrestore(&queue->qlock, flags);
+
+	return -EOVERFLOW;
 }
 EXPORT_SYMBOL_GPL(nvmet_fc_rcv_fcp_req);
 
diff --git a/include/linux/nvme-fc-driver.h b/include/linux/nvme-fc-driver.h
index 6c8c5d8041b7..2591878c1d48 100644
--- a/include/linux/nvme-fc-driver.h
+++ b/include/linux/nvme-fc-driver.h
@@ -346,6 +346,11 @@ struct nvme_fc_remote_port {
  *       indicating an FC transport Aborted status.
  *       Entrypoint is Mandatory.
  *
+ * @defer_rcv:  Called by the transport to signal the LLLD that it has
+ *       begun processing of a previously received NVME CMD IU. The LLDD
+ *       is now free to re-use the rcv buffer associated with the
+ *       nvmefc_tgt_fcp_req.
+ *
  * @max_hw_queues:  indicates the maximum number of hw queues the LLDD
  *       supports for cpu affinitization.
  *       Value is Mandatory. Must be at least 1.
@@ -846,6 +851,8 @@ struct nvmet_fc_target_template {
 				struct nvmefc_tgt_fcp_req *fcpreq);
 	void (*fcp_req_release)(struct nvmet_fc_target_port *tgtport,
 				struct nvmefc_tgt_fcp_req *fcpreq);
+	void (*defer_rcv)(struct nvmet_fc_target_port *tgtport,
+				struct nvmefc_tgt_fcp_req *fcpreq);
 
 	u32	max_hw_queues;
 	u16	max_sgl_segments;

From 507384209371fc25cab203b95e7bdf50e58b47d5 Mon Sep 17 00:00:00 2001
From: James Smart <jsmart2021@gmail.com>
Date: Tue, 1 Aug 2017 15:12:40 -0700
Subject: [PATCH 234/281] lpfc: support nvmet_fc defer_rcv callback

Currently, calls to nvmet_fc_rcv_fcp_req() always copied the
FC-NVME cmd iu to a temporary buffer before returning, allowing
the driver to immediately repost the buffer to the hardware.

To address timing conditions on queue element structures vs async
command reception, the nvmet_fc transport occasionally may need to
hold on to the command iu buffer for a short period. In these cases,
the nvmet_fc_rcv_fcp_req() will return a special return code
(-EOVERFLOW). In these cases, the LLDD must delay until the new
defer_rcv lldd callback is called before recycling the buffer back
to the hw.

This patch adds support for the new nvmet_fc transport defer_rcv
callback and recognition of the new error code when passing commands
to the transport.

Signed-off-by: Dick Kennedy <dick.kennedy@broadcom.com>
Signed-off-by: James Smart <james.smart@broadcom.com>
Signed-off-by: Christoph Hellwig <hch@lst.de>
---
 drivers/scsi/lpfc/lpfc_attr.c    |  4 +++-
 drivers/scsi/lpfc/lpfc_debugfs.c |  5 ++++-
 drivers/scsi/lpfc/lpfc_nvmet.c   | 30 ++++++++++++++++++++++++++++++
 drivers/scsi/lpfc/lpfc_nvmet.h   |  1 +
 4 files changed, 38 insertions(+), 2 deletions(-)

diff --git a/drivers/scsi/lpfc/lpfc_attr.c b/drivers/scsi/lpfc/lpfc_attr.c
index 4ed48ed38e79..7ee1a94c0b33 100644
--- a/drivers/scsi/lpfc/lpfc_attr.c
+++ b/drivers/scsi/lpfc/lpfc_attr.c
@@ -205,8 +205,10 @@ lpfc_nvme_info_show(struct device *dev, struct device_attribute *attr,
 				atomic_read(&tgtp->xmt_ls_rsp_error));
 
 		len += snprintf(buf+len, PAGE_SIZE-len,
-				"FCP: Rcv %08x Release %08x Drop %08x\n",
+				"FCP: Rcv %08x Defer %08x Release %08x "
+				"Drop %08x\n",
 				atomic_read(&tgtp->rcv_fcp_cmd_in),
+				atomic_read(&tgtp->rcv_fcp_cmd_defer),
 				atomic_read(&tgtp->xmt_fcp_release),
 				atomic_read(&tgtp->rcv_fcp_cmd_drop));
 
diff --git a/drivers/scsi/lpfc/lpfc_debugfs.c b/drivers/scsi/lpfc/lpfc_debugfs.c
index 5cc8b0f7d885..744f3f395b64 100644
--- a/drivers/scsi/lpfc/lpfc_debugfs.c
+++ b/drivers/scsi/lpfc/lpfc_debugfs.c
@@ -782,8 +782,11 @@ lpfc_debugfs_nvmestat_data(struct lpfc_vport *vport, char *buf, int size)
 				atomic_read(&tgtp->xmt_ls_rsp_error));
 
 		len += snprintf(buf + len, size - len,
-				"FCP: Rcv %08x Drop %08x\n",
+				"FCP: Rcv %08x Defer %08x Release %08x "
+				"Drop %08x\n",
 				atomic_read(&tgtp->rcv_fcp_cmd_in),
+				atomic_read(&tgtp->rcv_fcp_cmd_defer),
+				atomic_read(&tgtp->xmt_fcp_release),
 				atomic_read(&tgtp->rcv_fcp_cmd_drop));
 
 		if (atomic_read(&tgtp->rcv_fcp_cmd_in) !=
diff --git a/drivers/scsi/lpfc/lpfc_nvmet.c b/drivers/scsi/lpfc/lpfc_nvmet.c
index fbeec344c6cc..bbbd0f84160d 100644
--- a/drivers/scsi/lpfc/lpfc_nvmet.c
+++ b/drivers/scsi/lpfc/lpfc_nvmet.c
@@ -841,12 +841,31 @@ lpfc_nvmet_xmt_fcp_release(struct nvmet_fc_target_port *tgtport,
 	lpfc_nvmet_ctxbuf_post(phba, ctxp->ctxbuf);
 }
 
+static void
+lpfc_nvmet_defer_rcv(struct nvmet_fc_target_port *tgtport,
+		     struct nvmefc_tgt_fcp_req *rsp)
+{
+	struct lpfc_nvmet_tgtport *tgtp;
+	struct lpfc_nvmet_rcv_ctx *ctxp =
+		container_of(rsp, struct lpfc_nvmet_rcv_ctx, ctx.fcp_req);
+	struct rqb_dmabuf *nvmebuf = ctxp->rqb_buffer;
+	struct lpfc_hba *phba = ctxp->phba;
+
+	lpfc_nvmeio_data(phba, "NVMET DEFERRCV: xri x%x sz %d CPU %02x\n",
+			 ctxp->oxid, ctxp->size, smp_processor_id());
+
+	tgtp = phba->targetport->private;
+	atomic_inc(&tgtp->rcv_fcp_cmd_defer);
+	lpfc_rq_buf_free(phba, &nvmebuf->hbuf); /* repost */
+}
+
 static struct nvmet_fc_target_template lpfc_tgttemplate = {
 	.targetport_delete = lpfc_nvmet_targetport_delete,
 	.xmt_ls_rsp     = lpfc_nvmet_xmt_ls_rsp,
 	.fcp_op         = lpfc_nvmet_xmt_fcp_op,
 	.fcp_abort      = lpfc_nvmet_xmt_fcp_abort,
 	.fcp_req_release = lpfc_nvmet_xmt_fcp_release,
+	.defer_rcv	= lpfc_nvmet_defer_rcv,
 
 	.max_hw_queues  = 1,
 	.max_sgl_segments = LPFC_NVMET_DEFAULT_SEGS,
@@ -1504,6 +1523,17 @@ lpfc_nvmet_unsol_fcp_buffer(struct lpfc_hba *phba,
 		return;
 	}
 
+	/* Processing of FCP command is deferred */
+	if (rc == -EOVERFLOW) {
+		lpfc_nvmeio_data(phba,
+				 "NVMET RCV BUSY: xri x%x sz %d from %06x\n",
+				 oxid, size, sid);
+		/* defer reposting rcv buffer till .defer_rcv callback */
+		ctxp->rqb_buffer = nvmebuf;
+		atomic_inc(&tgtp->rcv_fcp_cmd_out);
+		return;
+	}
+
 	atomic_inc(&tgtp->rcv_fcp_cmd_drop);
 	lpfc_printf_log(phba, KERN_ERR, LOG_NVME_IOERR,
 			"6159 FCP Drop IO x%x: err x%x: x%x x%x x%x\n",
diff --git a/drivers/scsi/lpfc/lpfc_nvmet.h b/drivers/scsi/lpfc/lpfc_nvmet.h
index e675ef17be08..48a76788b003 100644
--- a/drivers/scsi/lpfc/lpfc_nvmet.h
+++ b/drivers/scsi/lpfc/lpfc_nvmet.h
@@ -49,6 +49,7 @@ struct lpfc_nvmet_tgtport {
 	atomic_t rcv_fcp_cmd_in;
 	atomic_t rcv_fcp_cmd_out;
 	atomic_t rcv_fcp_cmd_drop;
+	atomic_t rcv_fcp_cmd_defer;
 	atomic_t xmt_fcp_release;
 
 	/* Stats counters - lpfc_nvmet_xmt_fcp_op */

From 1c78f7735b2bdd0afbe5d14c5c8b6d8d381b6f13 Mon Sep 17 00:00:00 2001
From: Max Gurtovoy <maxg@mellanox.com>
Date: Sun, 30 Jul 2017 01:45:08 +0300
Subject: [PATCH 235/281] nvme-pci: fix CMB sysfs file removal in reset path

Currently we create the sysfs entry even if we fail mapping
it. In that case, the unmapping will not remove the sysfs created
file. There is no good reason to create a sysfs entry for a non
working CMB and show his characteristics.

Fixes: f63572dff ("nvme: unmap CMB and remove sysfs file in reset path")
Signed-off-by: Max Gurtovoy <maxg@mellanox.com>
Reviewed-by: Stephen Bates <sbates@raithlin.com>
Signed-off-by: Christoph Hellwig <hch@lst.de>
---
 drivers/nvme/host/pci.c | 18 +++++++-----------
 1 file changed, 7 insertions(+), 11 deletions(-)

diff --git a/drivers/nvme/host/pci.c b/drivers/nvme/host/pci.c
index cd888a47d0fc..74a124a06264 100644
--- a/drivers/nvme/host/pci.c
+++ b/drivers/nvme/host/pci.c
@@ -1558,11 +1558,9 @@ static inline void nvme_release_cmb(struct nvme_dev *dev)
 	if (dev->cmb) {
 		iounmap(dev->cmb);
 		dev->cmb = NULL;
-		if (dev->cmbsz) {
-			sysfs_remove_file_from_group(&dev->ctrl.device->kobj,
-						     &dev_attr_cmb.attr, NULL);
-			dev->cmbsz = 0;
-		}
+		sysfs_remove_file_from_group(&dev->ctrl.device->kobj,
+					     &dev_attr_cmb.attr, NULL);
+		dev->cmbsz = 0;
 	}
 }
 
@@ -1953,16 +1951,14 @@ static int nvme_pci_enable(struct nvme_dev *dev)
 
 	/*
 	 * CMBs can currently only exist on >=1.2 PCIe devices. We only
-	 * populate sysfs if a CMB is implemented. Note that we add the
-	 * CMB attribute to the nvme_ctrl kobj which removes the need to remove
-	 * it on exit. Since nvme_dev_attrs_group has no name we can pass
-	 * NULL as final argument to sysfs_add_file_to_group.
+	 * populate sysfs if a CMB is implemented. Since nvme_dev_attrs_group
+	 * has no name we can pass NULL as final argument to
+	 * sysfs_add_file_to_group.
 	 */
 
 	if (readl(dev->bar + NVME_REG_VS) >= NVME_VS(1, 2, 0)) {
 		dev->cmb = nvme_map_cmb(dev);
-
-		if (dev->cmbsz) {
+		if (dev->cmb) {
 			if (sysfs_add_file_to_group(&dev->ctrl.device->kobj,
 						    &dev_attr_cmb.attr, NULL))
 				dev_warn(dev->ctrl.device,

From 85f1bd9a7b5a79d5baa8bf44af19658f7bf77bfa Mon Sep 17 00:00:00 2001
From: Willem de Bruijn <willemb@google.com>
Date: Thu, 10 Aug 2017 12:29:19 -0400
Subject: [PATCH 236/281] udp: consistently apply ufo or fragmentation

When iteratively building a UDP datagram with MSG_MORE and that
datagram exceeds MTU, consistently choose UFO or fragmentation.

Once skb_is_gso, always apply ufo. Conversely, once a datagram is
split across multiple skbs, do not consider ufo.

Sendpage already maintains the first invariant, only add the second.
IPv6 does not have a sendpage implementation to modify.

A gso skb must have a partial checksum, do not follow sk_no_check_tx
in udp_send_skb.

Found by syzkaller.

Fixes: e89e9cf539a2 ("[IPv4/IPv6]: UFO Scatter-gather approach")
Reported-by: Andrey Konovalov <andreyknvl@google.com>
Signed-off-by: Willem de Bruijn <willemb@google.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/ipv4/ip_output.c  | 8 +++++---
 net/ipv4/udp.c        | 2 +-
 net/ipv6/ip6_output.c | 7 ++++---
 3 files changed, 10 insertions(+), 7 deletions(-)

diff --git a/net/ipv4/ip_output.c b/net/ipv4/ip_output.c
index 50c74cd890bc..e153c40c2436 100644
--- a/net/ipv4/ip_output.c
+++ b/net/ipv4/ip_output.c
@@ -965,11 +965,12 @@ static int __ip_append_data(struct sock *sk,
 		csummode = CHECKSUM_PARTIAL;
 
 	cork->length += length;
-	if ((((length + (skb ? skb->len : fragheaderlen)) > mtu) ||
-	     (skb && skb_is_gso(skb))) &&
+	if ((skb && skb_is_gso(skb)) ||
+	    (((length + (skb ? skb->len : fragheaderlen)) > mtu) &&
+	    (skb_queue_len(queue) <= 1) &&
 	    (sk->sk_protocol == IPPROTO_UDP) &&
 	    (rt->dst.dev->features & NETIF_F_UFO) && !dst_xfrm(&rt->dst) &&
-	    (sk->sk_type == SOCK_DGRAM) && !sk->sk_no_check_tx) {
+	    (sk->sk_type == SOCK_DGRAM) && !sk->sk_no_check_tx)) {
 		err = ip_ufo_append_data(sk, queue, getfrag, from, length,
 					 hh_len, fragheaderlen, transhdrlen,
 					 maxfraglen, flags);
@@ -1288,6 +1289,7 @@ ssize_t	ip_append_page(struct sock *sk, struct flowi4 *fl4, struct page *page,
 		return -EINVAL;
 
 	if ((size + skb->len > mtu) &&
+	    (skb_queue_len(&sk->sk_write_queue) == 1) &&
 	    (sk->sk_protocol == IPPROTO_UDP) &&
 	    (rt->dst.dev->features & NETIF_F_UFO)) {
 		if (skb->ip_summed != CHECKSUM_PARTIAL)
diff --git a/net/ipv4/udp.c b/net/ipv4/udp.c
index e6276fa3750b..a7c804f73990 100644
--- a/net/ipv4/udp.c
+++ b/net/ipv4/udp.c
@@ -802,7 +802,7 @@ static int udp_send_skb(struct sk_buff *skb, struct flowi4 *fl4)
 	if (is_udplite)  				 /*     UDP-Lite      */
 		csum = udplite_csum(skb);
 
-	else if (sk->sk_no_check_tx) {   /* UDP csum disabled */
+	else if (sk->sk_no_check_tx && !skb_is_gso(skb)) {   /* UDP csum off */
 
 		skb->ip_summed = CHECKSUM_NONE;
 		goto send;
diff --git a/net/ipv6/ip6_output.c b/net/ipv6/ip6_output.c
index 162efba0d0cd..2dfe50d8d609 100644
--- a/net/ipv6/ip6_output.c
+++ b/net/ipv6/ip6_output.c
@@ -1381,11 +1381,12 @@ emsgsize:
 	 */
 
 	cork->length += length;
-	if ((((length + (skb ? skb->len : headersize)) > mtu) ||
-	     (skb && skb_is_gso(skb))) &&
+	if ((skb && skb_is_gso(skb)) ||
+	    (((length + (skb ? skb->len : headersize)) > mtu) &&
+	    (skb_queue_len(queue) <= 1) &&
 	    (sk->sk_protocol == IPPROTO_UDP) &&
 	    (rt->dst.dev->features & NETIF_F_UFO) && !dst_xfrm(&rt->dst) &&
-	    (sk->sk_type == SOCK_DGRAM) && !udp_get_no_check6_tx(sk)) {
+	    (sk->sk_type == SOCK_DGRAM) && !udp_get_no_check6_tx(sk))) {
 		err = ip6_ufo_append_data(sk, queue, getfrag, from, length,
 					  hh_len, fragheaderlen, exthdrlen,
 					  transhdrlen, mtu, flags, fl6);

From c27927e372f0785f3303e8fad94b85945e2c97b7 Mon Sep 17 00:00:00 2001
From: Willem de Bruijn <willemb@google.com>
Date: Thu, 10 Aug 2017 12:41:58 -0400
Subject: [PATCH 237/281] packet: fix tp_reserve race in packet_set_ring

Updates to tp_reserve can race with reads of the field in
packet_set_ring. Avoid this by holding the socket lock during
updates in setsockopt PACKET_RESERVE.

This bug was discovered by syzkaller.

Fixes: 8913336a7e8d ("packet: add PACKET_RESERVE sockopt")
Reported-by: Andrey Konovalov <andreyknvl@google.com>
Signed-off-by: Willem de Bruijn <willemb@google.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/packet/af_packet.c | 13 +++++++++----
 1 file changed, 9 insertions(+), 4 deletions(-)

diff --git a/net/packet/af_packet.c b/net/packet/af_packet.c
index 0615c2a950fa..008a45ca3112 100644
--- a/net/packet/af_packet.c
+++ b/net/packet/af_packet.c
@@ -3700,14 +3700,19 @@ packet_setsockopt(struct socket *sock, int level, int optname, char __user *optv
 
 		if (optlen != sizeof(val))
 			return -EINVAL;
-		if (po->rx_ring.pg_vec || po->tx_ring.pg_vec)
-			return -EBUSY;
 		if (copy_from_user(&val, optval, sizeof(val)))
 			return -EFAULT;
 		if (val > INT_MAX)
 			return -EINVAL;
-		po->tp_reserve = val;
-		return 0;
+		lock_sock(sk);
+		if (po->rx_ring.pg_vec || po->tx_ring.pg_vec) {
+			ret = -EBUSY;
+		} else {
+			po->tp_reserve = val;
+			ret = 0;
+		}
+		release_sock(sk);
+		return ret;
 	}
 	case PACKET_LOSS:
 	{

From 634b8325905031eeafa61951623681ebc1329385 Mon Sep 17 00:00:00 2001
From: Keith Busch <keith.busch@intel.com>
Date: Thu, 10 Aug 2017 11:23:31 +0200
Subject: [PATCH 238/281] nvme: fix nvme reset command timeout handling

We need to return an error if a timeout occurs on any NVMe command during
initialization. Without this, the nvme reset work will be stuck. A timeout
will have a negative error code, meaning we need to stop initializing
the controller. All postitive returns mean the controller is still usable.

bugzilla: https://bugzilla.kernel.org/show_bug.cgi?id=196325

Signed-off-by: Keith Busch <keith.busch@intel.com>
Cc: Martin Peres <martin.peres@intel.com>
[jth consolidated cleanup path ]
Signed-off-by: Johannes Thumshirn <jthumshirn@suse.de>
Signed-off-by: Christoph Hellwig <hch@lst.de>
---
 drivers/nvme/host/core.c | 27 ++++++++++++++++++++-------
 1 file changed, 20 insertions(+), 7 deletions(-)

diff --git a/drivers/nvme/host/core.c b/drivers/nvme/host/core.c
index 1e9290983694..6212cf4e9829 100644
--- a/drivers/nvme/host/core.c
+++ b/drivers/nvme/host/core.c
@@ -1509,7 +1509,7 @@ static void nvme_set_queue_limits(struct nvme_ctrl *ctrl,
 	blk_queue_write_cache(q, vwc, vwc);
 }
 
-static void nvme_configure_apst(struct nvme_ctrl *ctrl)
+static int nvme_configure_apst(struct nvme_ctrl *ctrl)
 {
 	/*
 	 * APST (Autonomous Power State Transition) lets us program a
@@ -1538,16 +1538,16 @@ static void nvme_configure_apst(struct nvme_ctrl *ctrl)
 	 * then don't do anything.
 	 */
 	if (!ctrl->apsta)
-		return;
+		return 0;
 
 	if (ctrl->npss > 31) {
 		dev_warn(ctrl->device, "NPSS is invalid; not using APST\n");
-		return;
+		return 0;
 	}
 
 	table = kzalloc(sizeof(*table), GFP_KERNEL);
 	if (!table)
-		return;
+		return 0;
 
 	if (!ctrl->apst_enabled || ctrl->ps_max_latency_us == 0) {
 		/* Turn off APST. */
@@ -1629,6 +1629,7 @@ static void nvme_configure_apst(struct nvme_ctrl *ctrl)
 		dev_err(ctrl->device, "failed to set APST feature (%d)\n", ret);
 
 	kfree(table);
+	return ret;
 }
 
 static void nvme_set_latency_tolerance(struct device *dev, s32 val)
@@ -1835,13 +1836,16 @@ int nvme_init_identify(struct nvme_ctrl *ctrl)
 		 * In fabrics we need to verify the cntlid matches the
 		 * admin connect
 		 */
-		if (ctrl->cntlid != le16_to_cpu(id->cntlid))
+		if (ctrl->cntlid != le16_to_cpu(id->cntlid)) {
 			ret = -EINVAL;
+			goto out_free;
+		}
 
 		if (!ctrl->opts->discovery_nqn && !ctrl->kas) {
 			dev_err(ctrl->device,
 				"keep-alive support is mandatory for fabrics\n");
 			ret = -EINVAL;
+			goto out_free;
 		}
 	} else {
 		ctrl->cntlid = le16_to_cpu(id->cntlid);
@@ -1856,11 +1860,20 @@ int nvme_init_identify(struct nvme_ctrl *ctrl)
 	else if (!ctrl->apst_enabled && prev_apst_enabled)
 		dev_pm_qos_hide_latency_tolerance(ctrl->device);
 
-	nvme_configure_apst(ctrl);
-	nvme_configure_directives(ctrl);
+	ret = nvme_configure_apst(ctrl);
+	if (ret < 0)
+		return ret;
+
+	ret = nvme_configure_directives(ctrl);
+	if (ret < 0)
+		return ret;
 
 	ctrl->identified = true;
 
+	return 0;
+
+out_free:
+	kfree(id);
 	return ret;
 }
 EXPORT_SYMBOL_GPL(nvme_init_identify);

From a082b426286d1ead97fb87646ea361d528be023d Mon Sep 17 00:00:00 2001
From: "Kwan (Hingkwan) Huen-SSI" <kwan.huen@samsung.com>
Date: Wed, 9 Aug 2017 11:26:29 -0700
Subject: [PATCH 239/281] nvme: fix directive command numd calculation

The numd field of directive receive command takes number of dwords to
transfer. This fix has the correct calculation for numd.

Signed-off-by: Kwan (Hingkwan) Huen-SSI <kwan.huen@samsung.com>
Reviewed-by: Jens Axboe <axboe@kernel.dk>
Signed-off-by: Christoph Hellwig <hch@lst.de>
---
 drivers/nvme/host/core.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/nvme/host/core.c b/drivers/nvme/host/core.c
index 6212cf4e9829..37046ac2c441 100644
--- a/drivers/nvme/host/core.c
+++ b/drivers/nvme/host/core.c
@@ -336,7 +336,7 @@ static int nvme_get_stream_params(struct nvme_ctrl *ctrl,
 
 	c.directive.opcode = nvme_admin_directive_recv;
 	c.directive.nsid = cpu_to_le32(nsid);
-	c.directive.numd = cpu_to_le32(sizeof(*s));
+	c.directive.numd = cpu_to_le32((sizeof(*s) >> 2) - 1);
 	c.directive.doper = NVME_DIR_RCV_ST_OP_PARAM;
 	c.directive.dtype = NVME_DIR_STREAMS;
 

From e788787ef4f9c24aafefc480a8da5f92b914e5e6 Mon Sep 17 00:00:00 2001
From: Sandeep Singh <sandeep.singh@amd.com>
Date: Fri, 4 Aug 2017 16:35:56 +0530
Subject: [PATCH 240/281] usb:xhci:Add quirk for Certain failing HP keyboard on
 reset after resume

Certain HP keyboards would keep inputting a character automatically which
is the wake-up key after S3 resume

On some AMD platforms USB host fails to respond (by holding resume-K) to
USB device (an HP keyboard) resume request within 1ms (TURSM) and ensures
that resume is signaled for at least 20 ms (TDRSMDN), which is defined in
USB 2.0 spec. The result is that the keyboard is out of function.

In SNPS USB design, the host responds to the resume request only after
system gets back to S0 and the host gets to functional after the internal
HW restore operation that is more than 1 second after the initial resume
request from the USB device.

As a workaround for specific keyboard ID(HP Keyboards), applying port reset
after resume when the keyboard is plugged in.

Signed-off-by: Sandeep Singh <Sandeep.Singh@amd.com>
Signed-off-by: Shyam Sundar S K <Shyam-sundar.S-k@amd.com>
cc: Nehal Shah <Nehal-bakulchandra.Shah@amd.com>
Reviewed-by: Felipe Balbi <felipe.balbi@linux.intel.com>
Cc: stable <stable@vger.kernel.org>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/usb/core/quirks.c     |  1 +
 drivers/usb/host/pci-quirks.c | 17 ++++++++++++-----
 2 files changed, 13 insertions(+), 5 deletions(-)

diff --git a/drivers/usb/core/quirks.c b/drivers/usb/core/quirks.c
index 3116edfcdc18..2e9cde0b25d7 100644
--- a/drivers/usb/core/quirks.c
+++ b/drivers/usb/core/quirks.c
@@ -249,6 +249,7 @@ static const struct usb_device_id usb_amd_resume_quirk_list[] = {
 	{ USB_DEVICE(0x093a, 0x2500), .driver_info = USB_QUIRK_RESET_RESUME },
 	{ USB_DEVICE(0x093a, 0x2510), .driver_info = USB_QUIRK_RESET_RESUME },
 	{ USB_DEVICE(0x093a, 0x2521), .driver_info = USB_QUIRK_RESET_RESUME },
+	{ USB_DEVICE(0x03f0, 0x2b4a), .driver_info = USB_QUIRK_RESET_RESUME },
 
 	/* Logitech Optical Mouse M90/M100 */
 	{ USB_DEVICE(0x046d, 0xc05a), .driver_info = USB_QUIRK_RESET_RESUME },
diff --git a/drivers/usb/host/pci-quirks.c b/drivers/usb/host/pci-quirks.c
index c8989c62a262..5f4ca7890435 100644
--- a/drivers/usb/host/pci-quirks.c
+++ b/drivers/usb/host/pci-quirks.c
@@ -98,6 +98,7 @@ enum amd_chipset_gen {
 	AMD_CHIPSET_HUDSON2,
 	AMD_CHIPSET_BOLTON,
 	AMD_CHIPSET_YANGTZE,
+	AMD_CHIPSET_TAISHAN,
 	AMD_CHIPSET_UNKNOWN,
 };
 
@@ -141,6 +142,11 @@ static int amd_chipset_sb_type_init(struct amd_chipset_info *pinfo)
 			pinfo->sb_type.gen = AMD_CHIPSET_SB700;
 		else if (rev >= 0x40 && rev <= 0x4f)
 			pinfo->sb_type.gen = AMD_CHIPSET_SB800;
+	}
+	pinfo->smbus_dev = pci_get_device(PCI_VENDOR_ID_AMD,
+					  0x145c, NULL);
+	if (pinfo->smbus_dev) {
+		pinfo->sb_type.gen = AMD_CHIPSET_TAISHAN;
 	} else {
 		pinfo->smbus_dev = pci_get_device(PCI_VENDOR_ID_AMD,
 				PCI_DEVICE_ID_AMD_HUDSON2_SMBUS, NULL);
@@ -260,11 +266,12 @@ int usb_hcd_amd_remote_wakeup_quirk(struct pci_dev *pdev)
 {
 	/* Make sure amd chipset type has already been initialized */
 	usb_amd_find_chipset_info();
-	if (amd_chipset.sb_type.gen != AMD_CHIPSET_YANGTZE)
-		return 0;
-
-	dev_dbg(&pdev->dev, "QUIRK: Enable AMD remote wakeup fix\n");
-	return 1;
+	if (amd_chipset.sb_type.gen == AMD_CHIPSET_YANGTZE ||
+	    amd_chipset.sb_type.gen == AMD_CHIPSET_TAISHAN) {
+		dev_dbg(&pdev->dev, "QUIRK: Enable AMD remote wakeup fix\n");
+		return 1;
+	}
+	return 0;
 }
 EXPORT_SYMBOL_GPL(usb_hcd_amd_remote_wakeup_quirk);
 

From 94c43b9897abf4ea366ed4dba027494e080c7050 Mon Sep 17 00:00:00 2001
From: Alan Stern <stern@rowland.harvard.edu>
Date: Tue, 1 Aug 2017 10:41:56 -0400
Subject: [PATCH 241/281] USB: Check for dropped connection before switching to
 full speed

Some buggy USB disk adapters disconnect and reconnect multiple times
during the enumeration procedure.  This may lead to a device
connecting at full speed instead of high speed, because when the USB
stack sees that a device isn't able to enumerate at high speed, it
tries to hand the connection over to a full-speed companion
controller.

The logic for doing this is careful to check that the device is still
connected.  But this check is inadequate if the device disconnects and
reconnects before the check is done.  The symptom is that a device
works, but much more slowly than it is capable of operating.

The situation was made worse recently by commit 22547c4cc4fe ("usb:
hub: Wait for connection to be reestablished after port reset"), which
increases the delay following a reset before a disconnect is
recognized, thus giving the device more time to reconnect.

This patch makes the check more robust.  If the device was
disconnected at any time during enumeration, we will now skip the
full-speed handover.

Signed-off-by: Alan Stern <stern@rowland.harvard.edu>
Reported-and-tested-by: Zdenek Kabelac <zkabelac@redhat.com>
Reviewed-by: Guenter Roeck <linux@roeck-us.net>
Cc: stable <stable@vger.kernel.org>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/usb/core/hub.c | 10 ++++++----
 1 file changed, 6 insertions(+), 4 deletions(-)

diff --git a/drivers/usb/core/hub.c b/drivers/usb/core/hub.c
index 6e6797d145dd..822f8c50e423 100644
--- a/drivers/usb/core/hub.c
+++ b/drivers/usb/core/hub.c
@@ -4725,7 +4725,8 @@ hub_power_remaining(struct usb_hub *hub)
 static void hub_port_connect(struct usb_hub *hub, int port1, u16 portstatus,
 		u16 portchange)
 {
-	int status, i;
+	int status = -ENODEV;
+	int i;
 	unsigned unit_load;
 	struct usb_device *hdev = hub->hdev;
 	struct usb_hcd *hcd = bus_to_hcd(hdev->bus);
@@ -4929,9 +4930,10 @@ loop:
 
 done:
 	hub_port_disable(hub, port1, 1);
-	if (hcd->driver->relinquish_port && !hub->hdev->parent)
-		hcd->driver->relinquish_port(hcd, port1);
-
+	if (hcd->driver->relinquish_port && !hub->hdev->parent) {
+		if (status != -ENOTCONN && status != -ENODEV)
+			hcd->driver->relinquish_port(hcd, port1);
+	}
 }
 
 /* Handle physical or logical connection change events.

From 7496cfe5431f21da5d27a8388c326397e3f0a5db Mon Sep 17 00:00:00 2001
From: Kai-Heng Feng <kai.heng.feng@canonical.com>
Date: Tue, 8 Aug 2017 17:51:27 +0800
Subject: [PATCH 242/281] usb: quirks: Add no-lpm quirk for Moshi USB to
 Ethernet Adapter

Moshi USB to Ethernet Adapter internally uses a Genesys Logic hub to
connect to Realtek r8153.

The Realtek r8153 ethernet does not work on the internal hub, no-lpm quirk
can make it work.

Since another r8153 dongle at my hand does not have the issue, so add
the quirk to the Genesys Logic hub instead.

Signed-off-by: Kai-Heng Feng <kai.heng.feng@canonical.com>
Cc: stable <stable@vger.kernel.org>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/usb/core/quirks.c | 3 +++
 1 file changed, 3 insertions(+)

diff --git a/drivers/usb/core/quirks.c b/drivers/usb/core/quirks.c
index 2e9cde0b25d7..574da2b4529c 100644
--- a/drivers/usb/core/quirks.c
+++ b/drivers/usb/core/quirks.c
@@ -150,6 +150,9 @@ static const struct usb_device_id usb_quirk_list[] = {
 	/* appletouch */
 	{ USB_DEVICE(0x05ac, 0x021a), .driver_info = USB_QUIRK_RESET_RESUME },
 
+	/* Genesys Logic hub, internally used by Moshi USB to Ethernet Adapter */
+	{ USB_DEVICE(0x05e3, 0x0616), .driver_info = USB_QUIRK_NO_LPM },
+
 	/* Avision AV600U */
 	{ USB_DEVICE(0x0638, 0x0a13), .driver_info =
 	  USB_QUIRK_STRING_FETCH_255 },

From 3b6bcd3d093c698d32e93d4da57679b8fbc5e01e Mon Sep 17 00:00:00 2001
From: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
Date: Thu, 10 Aug 2017 11:54:12 -0700
Subject: [PATCH 243/281] USB: serial: pl2303: add new ATEN device id

This adds a new ATEN device id for a new pl2303-based device.

Reported-by: Peter Kuo <PeterKuo@aten.com.tw>
Cc: stable <stable@vger.kernel.org>
Cc: Johan Hovold <johan@kernel.org>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/usb/serial/pl2303.c | 2 ++
 drivers/usb/serial/pl2303.h | 1 +
 2 files changed, 3 insertions(+)

diff --git a/drivers/usb/serial/pl2303.c b/drivers/usb/serial/pl2303.c
index c9ebefd8f35f..a585b477415d 100644
--- a/drivers/usb/serial/pl2303.c
+++ b/drivers/usb/serial/pl2303.c
@@ -52,6 +52,8 @@ static const struct usb_device_id id_table[] = {
 	{ USB_DEVICE(IODATA_VENDOR_ID, IODATA_PRODUCT_ID_RSAQ5) },
 	{ USB_DEVICE(ATEN_VENDOR_ID, ATEN_PRODUCT_ID),
 		.driver_info = PL2303_QUIRK_ENDPOINT_HACK },
+	{ USB_DEVICE(ATEN_VENDOR_ID, ATEN_PRODUCT_UC485),
+		.driver_info = PL2303_QUIRK_ENDPOINT_HACK },
 	{ USB_DEVICE(ATEN_VENDOR_ID, ATEN_PRODUCT_ID2) },
 	{ USB_DEVICE(ATEN_VENDOR_ID2, ATEN_PRODUCT_ID) },
 	{ USB_DEVICE(ELCOM_VENDOR_ID, ELCOM_PRODUCT_ID) },
diff --git a/drivers/usb/serial/pl2303.h b/drivers/usb/serial/pl2303.h
index 09d9be88209e..3b5a15d1dc0d 100644
--- a/drivers/usb/serial/pl2303.h
+++ b/drivers/usb/serial/pl2303.h
@@ -27,6 +27,7 @@
 #define ATEN_VENDOR_ID		0x0557
 #define ATEN_VENDOR_ID2		0x0547
 #define ATEN_PRODUCT_ID		0x2008
+#define ATEN_PRODUCT_UC485	0x2021
 #define ATEN_PRODUCT_ID2	0x2118
 
 #define IODATA_VENDOR_ID	0x04bb

From e44565f62a72064e686f7a852137595ec94d78f2 Mon Sep 17 00:00:00 2001
From: "Luis R. Rodriguez" <mcgrof@kernel.org>
Date: Thu, 20 Jul 2017 13:13:09 -0700
Subject: [PATCH 244/281] firmware: fix batched requests - wake all waiters

The firmware cache mechanism serves two purposes, the secondary purpose is
not well documented nor understood. This fixes a regression with the
secondary purpose of the firmware cache mechanism: batched requests on
successful lookups. Without this fix *any* time a batched request is
triggered, secondary requests for which the batched request mechanism
was designed for will seem to last forver and seem to never return.
This issue is present for all kernel builds possible, and a hard reset
is required.

The firmware cache is used for:

1) Addressing races with file lookups during the suspend/resume cycle
   by keeping firmware in memory during the suspend/resume cycle

2) Batched requests for the same file rely only on work from the first file
   lookup, which keeps the firmware in memory until the last
   release_firmware() is called

Batched requests *only* take effect if secondary requests come in prior to
the first user calling release_firmware(). The devres name used for the
internal firmware cache is used as a hint other pending requests are
ongoing, the firmware buffer data is kept in memory until the last user of
the buffer calls release_firmware(), therefore serializing requests and
delaying the release until all requests are done.

Batched requests wait for a wakup or signal so we can rely on the first file
fetch to write to the pending secondary requests. Commit 5b029624948d
("firmware: do not use fw_lock for fw_state protection") ported the firmware
API to use swait, and in doing so failed to convert complete_all() to
swake_up_all() -- it used swake_up(), loosing the ability for *some* batched
requests to take effect.

We *could* fix this by just using swake_up_all() *but* swait is now known
to be very special use case, so its best to just move away from it. So we
just go back to using completions as before commit 5b029624948d ("firmware:
do not use fw_lock for fw_state protection") given this was using
complete_all().

Without this fix it has been reported plugging in two Intel 6260 Wifi cards
on a system will end up enumerating the two devices only 50% of the time
[0]. The ported swake_up() should have actually handled the case with two
devices, however, *if more than two cards are used* the swake_up() would
not have sufficed. This change is only part of the required fixes for
batched requests. Another fix is provided in the next patch.

This particular change should fix the cases where more than three requests
with the same firmware name is used, otherwise batched requests will wait
for MAX_SCHEDULE_TIMEOUT and just timeout eventually.

Below is a summary of tests triggering batched requests on different
kernel builds.

Before this patch:
============================================================================
CONFIG_FW_LOADER_USER_HELPER_FALLBACK=n
CONFIG_FW_LOADER_USER_HELPER=y

Most common Linux distribution setup.

API-type                               no-firmware-found   firmware-found
----------------------------------------------------------------------
request_firmware()                     FAIL                FAIL
request_firmware_direct()              FAIL                FAIL
request_firmware_nowait(uevent=true)   FAIL                FAIL
request_firmware_nowait(uevent=false)  FAIL                FAIL
============================================================================
CONFIG_FW_LOADER_USER_HELPER_FALLBACK=n
CONFIG_FW_LOADER_USER_HELPER=n

Only possible if CONFIG_DELL_RBU=n and CONFIG_LEDS_LP55XX_COMMON=n, rare.

API-type                               no-firmware-found   firmware-found
----------------------------------------------------------------------
request_firmware()                     FAIL                FAIL
request_firmware_direct()              FAIL                FAIL
request_firmware_nowait(uevent=true)   FAIL                FAIL
request_firmware_nowait(uevent=false)  FAIL                FAIL
============================================================================
CONFIG_FW_LOADER_USER_HELPER_FALLBACK=y
CONFIG_FW_LOADER_USER_HELPER=y

Google Android setup.

API-type                               no-firmware-found   firmware-found
----------------------------------------------------------------------
request_firmware()                     FAIL                FAIL
request_firmware_direct()              FAIL                FAIL
request_firmware_nowait(uevent=true)   FAIL                FAIL
request_firmware_nowait(uevent=false)  FAIL                FAIL
============================================================================

After this patch:
============================================================================
CONFIG_FW_LOADER_USER_HELPER_FALLBACK=n
CONFIG_FW_LOADER_USER_HELPER=y

Most common Linux distribution setup.

API-type                               no-firmware-found   firmware-found
----------------------------------------------------------------------
request_firmware()                     FAIL                OK
request_firmware_direct()              FAIL                OK
request_firmware_nowait(uevent=true)   FAIL                OK
request_firmware_nowait(uevent=false)  FAIL                OK
============================================================================
CONFIG_FW_LOADER_USER_HELPER_FALLBACK=n
CONFIG_FW_LOADER_USER_HELPER=n

Only possible if CONFIG_DELL_RBU=n and CONFIG_LEDS_LP55XX_COMMON=n, rare.

API-type                               no-firmware-found   firmware-found
----------------------------------------------------------------------
request_firmware()                     FAIL                OK
request_firmware_direct()              FAIL                OK
request_firmware_nowait(uevent=true)   FAIL                OK
request_firmware_nowait(uevent=false)  FAIL                OK
============================================================================
CONFIG_FW_LOADER_USER_HELPER_FALLBACK=y
CONFIG_FW_LOADER_USER_HELPER=y

Google Android setup.

API-type                               no-firmware-found   firmware-found
----------------------------------------------------------------------
request_firmware()                     OK                  OK
request_firmware_direct()              FAIL                OK
request_firmware_nowait(uevent=true)   OK                  OK
request_firmware_nowait(uevent=false)  OK                  OK
============================================================================

[0] https://bugzilla.kernel.org/show_bug.cgi?id=195477

CC: <stable@vger.kernel.org>    [4.10+]
Cc: Ming Lei <ming.lei@redhat.com>
Fixes: 5b029624948d ("firmware: do not use fw_lock for fw_state protection")
Reported-by: Jakub Kicinski <jakub.kicinski@netronome.com>
Signed-off-by: Luis R. Rodriguez <mcgrof@kernel.org>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/base/firmware_class.c | 12 +++++-------
 1 file changed, 5 insertions(+), 7 deletions(-)

diff --git a/drivers/base/firmware_class.c b/drivers/base/firmware_class.c
index b9f907eedbf7..f50ec6e367bd 100644
--- a/drivers/base/firmware_class.c
+++ b/drivers/base/firmware_class.c
@@ -30,7 +30,6 @@
 #include <linux/syscore_ops.h>
 #include <linux/reboot.h>
 #include <linux/security.h>
-#include <linux/swait.h>
 
 #include <generated/utsrelease.h>
 
@@ -112,13 +111,13 @@ static inline long firmware_loading_timeout(void)
  * state of the firmware loading.
  */
 struct fw_state {
-	struct swait_queue_head wq;
+	struct completion completion;
 	enum fw_status status;
 };
 
 static void fw_state_init(struct fw_state *fw_st)
 {
-	init_swait_queue_head(&fw_st->wq);
+	init_completion(&fw_st->completion);
 	fw_st->status = FW_STATUS_UNKNOWN;
 }
 
@@ -131,9 +130,8 @@ static int __fw_state_wait_common(struct fw_state *fw_st, long timeout)
 {
 	long ret;
 
-	ret = swait_event_interruptible_timeout(fw_st->wq,
-				__fw_state_is_done(READ_ONCE(fw_st->status)),
-				timeout);
+	ret = wait_for_completion_interruptible_timeout(&fw_st->completion,
+							timeout);
 	if (ret != 0 && fw_st->status == FW_STATUS_ABORTED)
 		return -ENOENT;
 	if (!ret)
@@ -148,7 +146,7 @@ static void __fw_state_set(struct fw_state *fw_st,
 	WRITE_ONCE(fw_st->status, status);
 
 	if (status == FW_STATUS_DONE || status == FW_STATUS_ABORTED)
-		swake_up(&fw_st->wq);
+		complete_all(&fw_st->completion);
 }
 
 #define fw_state_start(fw_st)					\

From 90d41e74a9c36a84e2efbd2a5b8d79299feee6fa Mon Sep 17 00:00:00 2001
From: "Luis R. Rodriguez" <mcgrof@kernel.org>
Date: Thu, 20 Jul 2017 13:13:10 -0700
Subject: [PATCH 245/281] firmware: fix batched requests - send wake up on
 failure on direct lookups

Fix batched requests from waiting forever on failure.

The firmware API batched requests feature has been broken since the API call
request_firmware_direct() was introduced on commit bba3a87e982ad ("firmware:
Introduce request_firmware_direct()"), added on v3.14 *iff* the firmware
being requested was not present in *certain kernel builds* [0].

When no firmware is found the worker which goes on to finish never informs
waiters queued up of this, so any batched request will stall in what seems
to be forever (MAX_SCHEDULE_TIMEOUT). Sadly, a reboot will also stall, as
the reboot notifier was only designed to kill custom fallback workers. The
issue seems to the user as a type of soft lockup, what *actually* happens
underneath the hood is a wait call which never completes as we failed to
issue a completion on error.

For device drivers with optional firmware schemes (ie, Intel iwlwifi, or
Netronome -- even though it uses request_firmware() and not
request_firmware_direct()), this could mean that when you boot a system with
multiple cards the firmware will seem to never load on the system, or that
the card is just not responsive even the driver initialization. Due to
differences in scheduling possible this should not always trigger --
one would need to to ensure that multiple requests are in place at the
right time for this to work, also release_firmware() must not be called
prior to any other incoming request. The complexity may not be worth
supporting batched requests in the future given the wait mechanism is
only used also for the fallback mechanism. We'll keep it for now and
just fix it.

Its reported that at least with the Intel WiFi cards on one system this
issue was creeping up 50% of the boots [0].

Before this commit batched requests testing revealed:
============================================================================
CONFIG_FW_LOADER_USER_HELPER_FALLBACK=n
CONFIG_FW_LOADER_USER_HELPER=y

Most common Linux distribution setup.

API-type                               no-firmware-found   firmware-found
----------------------------------------------------------------------
request_firmware()                     FAIL                OK
request_firmware_direct()              FAIL                OK
request_firmware_nowait(uevent=true)   FAIL                OK
request_firmware_nowait(uevent=false)  FAIL                OK
============================================================================
CONFIG_FW_LOADER_USER_HELPER_FALLBACK=n
CONFIG_FW_LOADER_USER_HELPER=n

Only possible if CONFIG_DELL_RBU=n and CONFIG_LEDS_LP55XX_COMMON=n, rare.

API-type                               no-firmware-found   firmware-found
----------------------------------------------------------------------
request_firmware()                     FAIL                OK
request_firmware_direct()              FAIL                OK
request_firmware_nowait(uevent=true)   FAIL                OK
request_firmware_nowait(uevent=false)  FAIL                OK
============================================================================
CONFIG_FW_LOADER_USER_HELPER_FALLBACK=y
CONFIG_FW_LOADER_USER_HELPER=y

Google Android setup.

API-type                               no-firmware-found   firmware-found
----------------------------------------------------------------------
request_firmware()                     OK                  OK
request_firmware_direct()              FAIL                OK
request_firmware_nowait(uevent=true)   OK                  OK
request_firmware_nowait(uevent=false)  OK                  OK
============================================================================

Ater this commit batched testing results:
============================================================================
CONFIG_FW_LOADER_USER_HELPER_FALLBACK=n
CONFIG_FW_LOADER_USER_HELPER=y

Most common Linux distribution setup.

API-type                               no-firmware-found   firmware-found
----------------------------------------------------------------------
request_firmware()                     OK                  OK
request_firmware_direct()              OK                  OK
request_firmware_nowait(uevent=true)   OK                  OK
request_firmware_nowait(uevent=false)  OK                  OK
============================================================================
CONFIG_FW_LOADER_USER_HELPER_FALLBACK=n
CONFIG_FW_LOADER_USER_HELPER=n

Only possible if CONFIG_DELL_RBU=n and CONFIG_LEDS_LP55XX_COMMON=n, rare.

API-type                               no-firmware-found   firmware-found
----------------------------------------------------------------------
request_firmware()                     OK                  OK
request_firmware_direct()              OK                  OK
request_firmware_nowait(uevent=true)   OK                  OK
request_firmware_nowait(uevent=false)  OK                  OK
============================================================================
CONFIG_FW_LOADER_USER_HELPER_FALLBACK=y
CONFIG_FW_LOADER_USER_HELPER=y

Google Android setup.

API-type                               no-firmware-found   firmware-found
----------------------------------------------------------------------
request_firmware()                     OK                  OK
request_firmware_direct()              OK                  OK
request_firmware_nowait(uevent=true)   OK                  OK
request_firmware_nowait(uevent=false)  OK                  OK
============================================================================

[0] https://bugzilla.kernel.org/show_bug.cgi?id=195477

Cc: stable <stable@vger.kernel.org> # v3.14
Fixes: bba3a87e982ad ("firmware: Introduce request_firmware_direct()"
Reported-by: Nicolas <nbroeking@me.com>
Reported-by: John Ewalt  <jewalt@lgsinnovations.com>
Reported-by: Jakub Kicinski <jakub.kicinski@netronome.com>
Signed-off-by: Luis R. Rodriguez <mcgrof@kernel.org>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/base/firmware_class.c | 38 +++++++++++++++++++++++++++--------
 1 file changed, 30 insertions(+), 8 deletions(-)

diff --git a/drivers/base/firmware_class.c b/drivers/base/firmware_class.c
index f50ec6e367bd..76f1b702bdd6 100644
--- a/drivers/base/firmware_class.c
+++ b/drivers/base/firmware_class.c
@@ -153,28 +153,27 @@ static void __fw_state_set(struct fw_state *fw_st,
 	__fw_state_set(fw_st, FW_STATUS_LOADING)
 #define fw_state_done(fw_st)					\
 	__fw_state_set(fw_st, FW_STATUS_DONE)
+#define fw_state_aborted(fw_st)					\
+	__fw_state_set(fw_st, FW_STATUS_ABORTED)
 #define fw_state_wait(fw_st)					\
 	__fw_state_wait_common(fw_st, MAX_SCHEDULE_TIMEOUT)
 
-#ifndef CONFIG_FW_LOADER_USER_HELPER
-
-#define fw_state_is_aborted(fw_st)	false
-
-#else /* CONFIG_FW_LOADER_USER_HELPER */
-
 static int __fw_state_check(struct fw_state *fw_st, enum fw_status status)
 {
 	return fw_st->status == status;
 }
 
+#define fw_state_is_aborted(fw_st)				\
+	__fw_state_check(fw_st, FW_STATUS_ABORTED)
+
+#ifdef CONFIG_FW_LOADER_USER_HELPER
+
 #define fw_state_aborted(fw_st)					\
 	__fw_state_set(fw_st, FW_STATUS_ABORTED)
 #define fw_state_is_done(fw_st)					\
 	__fw_state_check(fw_st, FW_STATUS_DONE)
 #define fw_state_is_loading(fw_st)				\
 	__fw_state_check(fw_st, FW_STATUS_LOADING)
-#define fw_state_is_aborted(fw_st)				\
-	__fw_state_check(fw_st, FW_STATUS_ABORTED)
 #define fw_state_wait_timeout(fw_st, timeout)			\
 	__fw_state_wait_common(fw_st, timeout)
 
@@ -1198,6 +1197,28 @@ _request_firmware_prepare(struct firmware **firmware_p, const char *name,
 	return 1; /* need to load */
 }
 
+/*
+ * Batched requests need only one wake, we need to do this step last due to the
+ * fallback mechanism. The buf is protected with kref_get(), and it won't be
+ * released until the last user calls release_firmware().
+ *
+ * Failed batched requests are possible as well, in such cases we just share
+ * the struct firmware_buf and won't release it until all requests are woken
+ * and have gone through this same path.
+ */
+static void fw_abort_batch_reqs(struct firmware *fw)
+{
+	struct firmware_buf *buf;
+
+	/* Loaded directly? */
+	if (!fw || !fw->priv)
+		return;
+
+	buf = fw->priv;
+	if (!fw_state_is_aborted(&buf->fw_st))
+		fw_state_aborted(&buf->fw_st);
+}
+
 /* called from request_firmware() and request_firmware_work_func() */
 static int
 _request_firmware(const struct firmware **firmware_p, const char *name,
@@ -1241,6 +1262,7 @@ _request_firmware(const struct firmware **firmware_p, const char *name,
 
  out:
 	if (ret < 0) {
+		fw_abort_batch_reqs(fw);
 		release_firmware(fw);
 		fw = NULL;
 	}

From 260d9f2fc5655a2552701cdd0b909c4466732f19 Mon Sep 17 00:00:00 2001
From: "Luis R. Rodriguez" <mcgrof@kernel.org>
Date: Thu, 20 Jul 2017 13:13:11 -0700
Subject: [PATCH 246/281] firmware: avoid invalid fallback aborts by using
 killable wait

Commit 0cb64249ca500 ("firmware_loader: abort request if wait_for_completion
is interrupted") added via 4.0 added support to abort the fallback mechanism
when a signal was detected and wait_for_completion_interruptible() returned
-ERESTARTSYS -- for instance when a user hits CTRL-C. The abort was overly
*too* effective.

When a child process terminates (successful or not) the signal SIGCHLD can
be sent to the parent process which ran the child in the background and
later triggered a sync request for firmware through a sysfs interface which
relies on the fallback mechanism. This signal in turn can be recieved by the
interruptible wait we constructed on firmware_class and detects it as an
abort *before* userspace could get a chance to write the firmware. Upon
failure -EAGAIN is returned, so userspace is also kept in the dark about
exactly what happened.

We can reproduce the issue with the fw_fallback.sh selftest:

Before this patch:
$ sudo tools/testing/selftests/firmware/fw_fallback.sh
...
tools/testing/selftests/firmware/fw_fallback.sh: error - sync firmware request cancelled due to SIGCHLD

After this patch:
$ sudo tools/testing/selftests/firmware/fw_fallback.sh
...
tools/testing/selftests/firmware/fw_fallback.sh: SIGCHLD on sync ignored as expected

Fix this by making the wait killable -- only killable by SIGKILL (kill -9).
We loose the ability to allow userspace to cancel a write with CTRL-C
(SIGINT), however its been decided the compromise to require SIGKILL is
worth the gains.

Chances of this issue occuring are low due to the number of drivers upstream
exclusively relying on the fallback mechanism for firmware (2 drivers),
however this is observed in the field with custom drivers with sysfs
triggers to load firmware. Only distributions relying on the fallback
mechanism are impacted as well. An example reported issue was on Android,
as follows:

1) Android init (pid=1) fork()s (say pid=42) [this child process is totally
   unrelated to firmware loading, it could be sleep 2; for all we care ]
2) Android init (pid=1) does a write() on a (driver custom) sysfs file which
   ends up calling request_firmware() kernel side
3) The firmware loading fallback mechanism is used, the request is sent to
   userspace and pid 1 waits in the kernel on wait_*
4) before firmware loading completes pid 42 dies (for any reason, even
   normal termination)
5) Kernel delivers SIGCHLD to pid=1 to tell it a child has died, which
   causes -ERESTARTSYS to be returned from wait_*
6) The kernel's wait aborts and return -EAGAIN for the
   request_firmware() caller.

Cc: stable <stable@vger.kernel.org> # 4.0
Fixes: 0cb64249ca500 ("firmware_loader: abort request if wait_for_completion is interrupted")
Suggested-by: "Eric W. Biederman" <ebiederm@xmission.com>
Suggested-by: Dmitry Torokhov <dmitry.torokhov@gmail.com>
Tested-by: Martin Fuzzey <mfuzzey@parkeon.com>
Reported-by: Martin Fuzzey <mfuzzey@parkeon.com>
Signed-off-by: Luis R. Rodriguez <mcgrof@kernel.org>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/base/firmware_class.c | 3 +--
 1 file changed, 1 insertion(+), 2 deletions(-)

diff --git a/drivers/base/firmware_class.c b/drivers/base/firmware_class.c
index 76f1b702bdd6..bfbe1e154128 100644
--- a/drivers/base/firmware_class.c
+++ b/drivers/base/firmware_class.c
@@ -130,8 +130,7 @@ static int __fw_state_wait_common(struct fw_state *fw_st, long timeout)
 {
 	long ret;
 
-	ret = wait_for_completion_interruptible_timeout(&fw_st->completion,
-							timeout);
+	ret = wait_for_completion_killable_timeout(&fw_st->completion, timeout);
 	if (ret != 0 && fw_st->status == FW_STATUS_ABORTED)
 		return -ENOENT;
 	if (!ret)

From 557909e195aea23d9e6b29cef0be3d795f4cf675 Mon Sep 17 00:00:00 2001
From: Alexander Usyskin <alexander.usyskin@intel.com>
Date: Thu, 3 Aug 2017 17:30:19 +0300
Subject: [PATCH 247/281] mei: exclude device from suspend direct complete
 optimization

MEI device performs link reset during system suspend sequence.
The link reset cannot be performed while device is in
runtime suspend state. The resume sequence is bypassed with
suspend direct complete optimization,so the optimization should be
disabled for mei devices.

Fixes:
 [  192.940537] Restarting tasks ...
 [  192.940610] PGI is not set
 [  192.940619] ------------[ cut here ]------------ [  192.940623]
 WARNING: CPU: 0
 me.c:653 mei_me_pg_exit_sync+0x351/0x360 [  192.940624] Modules
 linked
 in:
 [  192.940627] CPU: 0 PID: 1661 Comm: kworker/0:3 Not tainted
 4.13.0-rc2+
 #2 [  192.940628] Hardware name: Dell Inc. XPS 13 9343/0TM99H, BIOS
 A11
 12/08/2016 [  192.940630] Workqueue: pm pm_runtime_work <snip> [
 192.940642] Call Trace:
 [  192.940646]  ? pci_pme_active+0x1de/0x1f0 [  192.940649]  ?
 pci_restore_standard_config+0x50/0x50
 [  192.940651]  ? kfree+0x172/0x190
 [  192.940653]  ? kfree+0x172/0x190
 [  192.940655]  ? pci_restore_standard_config+0x50/0x50
 [  192.940663]  mei_me_pm_runtime_resume+0x3f/0xc0
 [  192.940665]  pci_pm_runtime_resume+0x7a/0xa0 [  192.940667]
 __rpm_callback+0xb9/0x1e0 [  192.940668]  ?
 preempt_count_add+0x6d/0xc0 [  192.940670]  rpm_callback+0x24/0x90 [
 192.940672]  ? pci_restore_standard_config+0x50/0x50
 [  192.940674]  rpm_resume+0x4e8/0x800 [  192.940676]
 pm_runtime_work+0x55/0xb0 [  192.940678]
 process_one_work+0x184/0x3e0 [  192.940680]
 worker_thread+0x4d/0x3a0 [ 192.940681]  ?
 preempt_count_sub+0x9b/0x100 [  192.940683]
 kthread+0x122/0x140 [  192.940684]  ? process_one_work+0x3e0/0x3e0 [
 192.940685]  ? __kthread_create_on_node+0x1a0/0x1a0
 [  192.940688]  ret_from_fork+0x27/0x40 [  192.940690] Code: 96 3a
 9e ff 48 8b 7d 98 e8 cd 21 58 00 83 bb bc 01 00 00
 04 0f 85 40 fe ff ff e9 41 fe ff ff 48 c7 c7 5f 04 99 96 e8 93 6b 9f
 ff <0f> ff e9 5d fd ff ff e8 33 fe 99 ff 0f 1f 00 0f 1f 44 00 00 55
 [  192.940719] ---[ end trace
 a86955597774ead8 ]--- [  192.942540] done.

Suggested-by: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
Reported-by: Dominik Brodowski <linux@dominikbrodowski.net>
Cc: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
Cc: Dominik Brodowski <linux@dominikbrodowski.net>
Signed-off-by: Alexander Usyskin <alexander.usyskin@intel.com>
Signed-off-by: Tomas Winkler <tomas.winkler@intel.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/misc/mei/pci-me.c  | 6 ++++++
 drivers/misc/mei/pci-txe.c | 6 ++++++
 2 files changed, 12 insertions(+)

diff --git a/drivers/misc/mei/pci-me.c b/drivers/misc/mei/pci-me.c
index 8621a198a2ce..bac33311f55a 100644
--- a/drivers/misc/mei/pci-me.c
+++ b/drivers/misc/mei/pci-me.c
@@ -215,6 +215,12 @@ static int mei_me_probe(struct pci_dev *pdev, const struct pci_device_id *ent)
 
 	pci_set_drvdata(pdev, dev);
 
+	/*
+	 * MEI requires to resume from runtime suspend mode
+	 * in order to perform link reset flow upon system suspend.
+	 */
+	pdev->dev_flags |= PCI_DEV_FLAGS_NEEDS_RESUME;
+
 	/*
 	* For not wake-able HW runtime pm framework
 	* can't be used on pci device level.
diff --git a/drivers/misc/mei/pci-txe.c b/drivers/misc/mei/pci-txe.c
index f811cd524468..e38a5f144373 100644
--- a/drivers/misc/mei/pci-txe.c
+++ b/drivers/misc/mei/pci-txe.c
@@ -137,6 +137,12 @@ static int mei_txe_probe(struct pci_dev *pdev, const struct pci_device_id *ent)
 
 	pci_set_drvdata(pdev, dev);
 
+	/*
+	 * MEI requires to resume from runtime suspend mode
+	 * in order to perform link reset flow upon system suspend.
+	 */
+	pdev->dev_flags |= PCI_DEV_FLAGS_NEEDS_RESUME;
+
 	/*
 	* For not wake-able HW runtime pm framework
 	* can't be used on pci device level.

From 1cd65d17612e8b64989f7af20213d4bb7a7f4d91 Mon Sep 17 00:00:00 2001
From: Mika Westerberg <mika.westerberg@linux.intel.com>
Date: Tue, 25 Jul 2017 17:41:58 +0300
Subject: [PATCH 248/281] thunderbolt: Do not enumerate more ports from DROM
 than the controller has

Some Alpine Ridge LP DROMs (there might be others) erroneusly list more
ports than the controller actually has. Most probably because DROM of
the full Dual/Single port Thunderbolt controller was reused for LP
version. The current DROM parser does not check the upper bound thus it
leads to crash when sw->ports[] is accessed over bounds:

 BUG: unable to handle kernel NULL pointer dereference at 00000000000002ec
 IP: tb_drom_read+0x383/0x890 [thunderbolt]
 PGD 0
 P4D 0
 Oops: 0000 [#1] SMP
 CPU: 3 PID: 12248 Comm: systemd-udevd Not tainted 4.13.0-rc1-next-20170719 #1
 Hardware name: LENOVO 20HF000YGE/20HF000YGE, BIOS N1WET32W (1.11 ) 05/23/2017
 task: ffff8a293e4bcd80 task.stack: ffffa698027a8000
 RIP: 0010:tb_drom_read+0x383/0x890 [thunderbolt]
 RSP: 0018:ffffa698027ab990 EFLAGS: 00010246
 RAX: 0000000000000000 RBX: ffff8a2940af7800 RCX: 0000000000000000
 RDX: ffff8a2940ebb400 RSI: 0000000000000000 RDI: ffffa698027ab9a0
 RBP: ffffa698027ab9d0 R08: 0000000000000001 R09: 0000000000000002
 R10: ffff8a2940ebb5b0 R11: 0000000000000000 R12: ffff8a293bfa968c
 R13: 000000000000002c R14: 0000000000000056 R15: 0000000000000056
 FS:  00007f0a945a38c0(0000) GS:ffff8a2961580000(0000) knlGS:0000000000000000
 CS:  0010 DS: 0000 ES: 0000 CR0: 0000000080050033
 CR2: 00000000000002ec CR3: 000000043e785000 CR4: 00000000003606e0
 DR0: 0000000000000000 DR1: 0000000000000000 DR2: 0000000000000000
 DR3: 0000000000000000 DR6: 00000000fffe0ff0 DR7: 0000000000000400
 Call Trace:
  tb_switch_add+0x9d/0x730 [thunderbolt]
  ? tb_switch_alloc+0x3cd/0x4d0 [thunderbolt]
  icm_start+0x5a/0xa0 [thunderbolt]
  tb_domain_add+0xc3/0xf0 [thunderbolt]
  nhi_probe+0x19e/0x310 [thunderbolt]
  local_pci_probe+0x42/0xa0
  pci_device_probe+0x18d/0x1a0
  driver_probe_device+0x2ff/0x450
  __driver_attach+0xa4/0xe0
  ? driver_probe_device+0x450/0x450
  bus_for_each_dev+0x6e/0xb0
  driver_attach+0x1e/0x20
  bus_add_driver+0x1d0/0x270
  ? 0xffffffffc0bbb000
  driver_register+0x60/0xe0
  ? 0xffffffffc0bbb000
  __pci_register_driver+0x4c/0x50
  nhi_init+0x28/0x1000 [thunderbolt]
  do_one_initcall+0x50/0x190
  ? __vunmap+0x81/0xb0
  ? _cond_resched+0x1a/0x50
  ? kmem_cache_alloc_trace+0x15f/0x1c0
  ? do_init_module+0x27/0x1e9
  do_init_module+0x5f/0x1e9
  load_module+0x24e7/0x2a60
  ? vfs_read+0x115/0x130
  SYSC_finit_module+0xfc/0x120
  ? SYSC_finit_module+0xfc/0x120
  SyS_finit_module+0xe/0x10
  do_syscall_64+0x67/0x170
  entry_SYSCALL64_slow_path+0x25/0x25

Fix this by making sure we only enumerate DROM port entries the hardware
actually has.

Reported-by: Christian Kellner <ckellner@redhat.com>
Signed-off-by: Mika Westerberg <mika.westerberg@linux.intel.com>
Reviewed-by: Lukas Wunner <lukas@wunner.de>
Tested-by: Christian Kellner <ckellner@redhat.com>
Cc: stable <stable@vger.kernel.org>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/thunderbolt/eeprom.c | 9 +++++++++
 1 file changed, 9 insertions(+)

diff --git a/drivers/thunderbolt/eeprom.c b/drivers/thunderbolt/eeprom.c
index 308b6e17c88a..fe2f00ceafc5 100644
--- a/drivers/thunderbolt/eeprom.c
+++ b/drivers/thunderbolt/eeprom.c
@@ -333,6 +333,15 @@ static int tb_drom_parse_entry_port(struct tb_switch *sw,
 	int res;
 	enum tb_port_type type;
 
+	/*
+	 * Some DROMs list more ports than the controller actually has
+	 * so we skip those but allow the parser to continue.
+	 */
+	if (header->index > sw->config.max_port_number) {
+		dev_info_once(&sw->dev, "ignoring unnecessary extra entries in DROM\n");
+		return 0;
+	}
+
 	port = &sw->ports[header->index];
 	port->disabled = header->port_disabled;
 	if (port->disabled)

From d507e2ebd2c7be9138e5cf5c0cb1931c90c42ab1 Mon Sep 17 00:00:00 2001
From: Johannes Weiner <hannes@cmpxchg.org>
Date: Thu, 10 Aug 2017 15:23:31 -0700
Subject: [PATCH 249/281] mm: fix global NR_SLAB_.*CLAIMABLE counter reads

As Tetsuo points out:
 "Commit 385386cff4c6 ("mm: vmstat: move slab statistics from zone to
  node counters") broke "Slab:" field of /proc/meminfo . It shows nearly
  0kB"

In addition to /proc/meminfo, this problem also affects the slab
counters OOM/allocation failure info dumps, can cause early -ENOMEM from
overcommit protection, and miscalculate image size requirements during
suspend-to-disk.

This is because the patch in question switched the slab counters from
the zone level to the node level, but forgot to update the global
accessor functions to read the aggregate node data instead of the
aggregate zone data.

Use global_node_page_state() to access the global slab counters.

Fixes: 385386cff4c6 ("mm: vmstat: move slab statistics from zone to node counters")
Link: http://lkml.kernel.org/r/20170801134256.5400-1-hannes@cmpxchg.org
Signed-off-by: Johannes Weiner <hannes@cmpxchg.org>
Reported-by: Tetsuo Handa <penguin-kernel@i-love.sakura.ne.jp>
Acked-by: Michal Hocko <mhocko@suse.com>
Cc: Josef Bacik <josef@toxicpanda.com>
Cc: Vladimir Davydov <vdavydov.dev@gmail.com>
Cc: Stefan Agner <stefan@agner.ch>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 fs/proc/meminfo.c       | 8 ++++----
 kernel/power/snapshot.c | 2 +-
 mm/page_alloc.c         | 9 +++++----
 mm/util.c               | 2 +-
 4 files changed, 11 insertions(+), 10 deletions(-)

diff --git a/fs/proc/meminfo.c b/fs/proc/meminfo.c
index 8a428498d6b2..509a61668d90 100644
--- a/fs/proc/meminfo.c
+++ b/fs/proc/meminfo.c
@@ -106,13 +106,13 @@ static int meminfo_proc_show(struct seq_file *m, void *v)
 		    global_node_page_state(NR_FILE_MAPPED));
 	show_val_kb(m, "Shmem:          ", i.sharedram);
 	show_val_kb(m, "Slab:           ",
-		    global_page_state(NR_SLAB_RECLAIMABLE) +
-		    global_page_state(NR_SLAB_UNRECLAIMABLE));
+		    global_node_page_state(NR_SLAB_RECLAIMABLE) +
+		    global_node_page_state(NR_SLAB_UNRECLAIMABLE));
 
 	show_val_kb(m, "SReclaimable:   ",
-		    global_page_state(NR_SLAB_RECLAIMABLE));
+		    global_node_page_state(NR_SLAB_RECLAIMABLE));
 	show_val_kb(m, "SUnreclaim:     ",
-		    global_page_state(NR_SLAB_UNRECLAIMABLE));
+		    global_node_page_state(NR_SLAB_UNRECLAIMABLE));
 	seq_printf(m, "KernelStack:    %8lu kB\n",
 		   global_page_state(NR_KERNEL_STACK_KB));
 	show_val_kb(m, "PageTables:     ",
diff --git a/kernel/power/snapshot.c b/kernel/power/snapshot.c
index 222317721c5a..0972a8e09d08 100644
--- a/kernel/power/snapshot.c
+++ b/kernel/power/snapshot.c
@@ -1650,7 +1650,7 @@ static unsigned long minimum_image_size(unsigned long saveable)
 {
 	unsigned long size;
 
-	size = global_page_state(NR_SLAB_RECLAIMABLE)
+	size = global_node_page_state(NR_SLAB_RECLAIMABLE)
 		+ global_node_page_state(NR_ACTIVE_ANON)
 		+ global_node_page_state(NR_INACTIVE_ANON)
 		+ global_node_page_state(NR_ACTIVE_FILE)
diff --git a/mm/page_alloc.c b/mm/page_alloc.c
index fc32aa81f359..626a430e32d1 100644
--- a/mm/page_alloc.c
+++ b/mm/page_alloc.c
@@ -4458,8 +4458,9 @@ long si_mem_available(void)
 	 * Part of the reclaimable slab consists of items that are in use,
 	 * and cannot be freed. Cap this estimate at the low watermark.
 	 */
-	available += global_page_state(NR_SLAB_RECLAIMABLE) -
-		     min(global_page_state(NR_SLAB_RECLAIMABLE) / 2, wmark_low);
+	available += global_node_page_state(NR_SLAB_RECLAIMABLE) -
+		     min(global_node_page_state(NR_SLAB_RECLAIMABLE) / 2,
+			 wmark_low);
 
 	if (available < 0)
 		available = 0;
@@ -4602,8 +4603,8 @@ void show_free_areas(unsigned int filter, nodemask_t *nodemask)
 		global_node_page_state(NR_FILE_DIRTY),
 		global_node_page_state(NR_WRITEBACK),
 		global_node_page_state(NR_UNSTABLE_NFS),
-		global_page_state(NR_SLAB_RECLAIMABLE),
-		global_page_state(NR_SLAB_UNRECLAIMABLE),
+		global_node_page_state(NR_SLAB_RECLAIMABLE),
+		global_node_page_state(NR_SLAB_UNRECLAIMABLE),
 		global_node_page_state(NR_FILE_MAPPED),
 		global_node_page_state(NR_SHMEM),
 		global_page_state(NR_PAGETABLE),
diff --git a/mm/util.c b/mm/util.c
index 7b07ec852e01..9ecddf568fe3 100644
--- a/mm/util.c
+++ b/mm/util.c
@@ -633,7 +633,7 @@ int __vm_enough_memory(struct mm_struct *mm, long pages, int cap_sys_admin)
 		 * which are reclaimable, under pressure.  The dentry
 		 * cache and most inode caches should fall into this
 		 */
-		free += global_page_state(NR_SLAB_RECLAIMABLE);
+		free += global_node_page_state(NR_SLAB_RECLAIMABLE);
 
 		/*
 		 * Leave reserved pages. The pages are not for anonymous pages.

From 75dddef32514f7aa58930bde6a1263253bc3d4ba Mon Sep 17 00:00:00 2001
From: Jonathan Toppins <jtoppins@redhat.com>
Date: Thu, 10 Aug 2017 15:23:35 -0700
Subject: [PATCH 250/281] mm: ratelimit PFNs busy info message

The RDMA subsystem can generate several thousand of these messages per
second eventually leading to a kernel crash.  Ratelimit these messages
to prevent this crash.

Doug said:
 "I've been carrying a version of this for several kernel versions. I
  don't remember when they started, but we have one (and only one) class
  of machines: Dell PE R730xd, that generate these errors. When it
  happens, without a rate limit, we get rcu timeouts and kernel oopses.
  With the rate limit, we just get a lot of annoying kernel messages but
  the machine continues on, recovers, and eventually the memory
  operations all succeed"

And:
 "> Well... why are all these EBUSY's occurring? It sounds inefficient
  > (at least) but if it is expected, normal and unavoidable then
  > perhaps we should just remove that message altogether?

  I don't have an answer to that question. To be honest, I haven't
  looked real hard. We never had this at all, then it started out of the
  blue, but only on our Dell 730xd machines (and it hits all of them),
  but no other classes or brands of machines. And we have our 730xd
  machines loaded up with different brands and models of cards (for
  instance one dedicated to mlx4 hardware, one for qib, one for mlx5, an
  ocrdma/cxgb4 combo, etc), so the fact that it hit all of the machines
  meant it wasn't tied to any particular brand/model of RDMA hardware.
  To me, it always smelled of a hardware oddity specific to maybe the
  CPUs or mainboard chipsets in these machines, so given that I'm not an
  mm expert anyway, I never chased it down.

  A few other relevant details: it showed up somewhere around 4.8/4.9 or
  thereabouts. It never happened before, but the prinkt has been there
  since the 3.18 days, so possibly the test to trigger this message was
  changed, or something else in the allocator changed such that the
  situation started happening on these machines?

  And, like I said, it is specific to our 730xd machines (but they are
  all identical, so that could mean it's something like their specific
  ram configuration is causing the allocator to hit this on these
  machine but not on other machines in the cluster, I don't want to say
  it's necessarily the model of chipset or CPU, there are other bits of
  identicalness between these machines)"

Link: http://lkml.kernel.org/r/499c0f6cc10d6eb829a67f2a4d75b4228a9b356e.1501695897.git.jtoppins@redhat.com
Signed-off-by: Jonathan Toppins <jtoppins@redhat.com>
Reviewed-by: Doug Ledford <dledford@redhat.com>
Tested-by: Doug Ledford <dledford@redhat.com>
Cc: Michal Hocko <mhocko@suse.com>
Cc: Vlastimil Babka <vbabka@suse.cz>
Cc: Mel Gorman <mgorman@techsingularity.net>
Cc: Hillf Danton <hillf.zj@alibaba-inc.com>
Cc: <stable@vger.kernel.org>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 mm/page_alloc.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/mm/page_alloc.c b/mm/page_alloc.c
index 626a430e32d1..6d00f746c2fd 100644
--- a/mm/page_alloc.c
+++ b/mm/page_alloc.c
@@ -7669,7 +7669,7 @@ int alloc_contig_range(unsigned long start, unsigned long end,
 
 	/* Make sure the range is really isolated. */
 	if (test_pages_isolated(outer_start, end, false)) {
-		pr_info("%s: [%lx, %lx) PFNs busy\n",
+		pr_info_ratelimited("%s: [%lx, %lx) PFNs busy\n",
 			__func__, outer_start, end);
 		ret = -EBUSY;
 		goto done;

From 5af10dfd0afc559bb4b0f7e3e8227a1578333995 Mon Sep 17 00:00:00 2001
From: Andrea Arcangeli <aarcange@redhat.com>
Date: Thu, 10 Aug 2017 15:23:38 -0700
Subject: [PATCH 251/281] userfaultfd: hugetlbfs: remove superfluous page
 unlock in VM_SHARED case

huge_add_to_page_cache->add_to_page_cache implicitly unlocks the page
before returning in case of errors.

The error returned was -EEXIST by running UFFDIO_COPY on a non-hole
offset of a VM_SHARED hugetlbfs mapping.  It was an userland bug that
triggered it and the kernel must cope with it returning -EEXIST from
ioctl(UFFDIO_COPY) as expected.

  page dumped because: VM_BUG_ON_PAGE(!PageLocked(page))
  kernel BUG at mm/filemap.c:964!
  invalid opcode: 0000 [#1] SMP
  CPU: 1 PID: 22582 Comm: qemu-system-x86 Not tainted 4.11.11-300.fc26.x86_64 #1
  RIP: unlock_page+0x4a/0x50
  Call Trace:
    hugetlb_mcopy_atomic_pte+0xc0/0x320
    mcopy_atomic+0x96f/0xbe0
    userfaultfd_ioctl+0x218/0xe90
    do_vfs_ioctl+0xa5/0x600
    SyS_ioctl+0x79/0x90
    entry_SYSCALL_64_fastpath+0x1a/0xa9

Link: http://lkml.kernel.org/r/20170802165145.22628-2-aarcange@redhat.com
Signed-off-by: Andrea Arcangeli <aarcange@redhat.com>
Tested-by: Maxime Coquelin <maxime.coquelin@redhat.com>
Reviewed-by: Mike Kravetz <mike.kravetz@oracle.com>
Cc: "Dr. David Alan Gilbert" <dgilbert@redhat.com>
Cc: Mike Rapoport <rppt@linux.vnet.ibm.com>
Cc: Alexey Perevalov <a.perevalov@samsung.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 mm/hugetlb.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/mm/hugetlb.c b/mm/hugetlb.c
index a1a0ac0ad6f6..31e207cb399b 100644
--- a/mm/hugetlb.c
+++ b/mm/hugetlb.c
@@ -4062,9 +4062,9 @@ out:
 	return ret;
 out_release_unlock:
 	spin_unlock(ptl);
-out_release_nounlock:
 	if (vm_shared)
 		unlock_page(page);
+out_release_nounlock:
 	put_page(page);
 	goto out;
 }

From a4afe8cdec1646c3d258b02d1cfdfb1313b76eb1 Mon Sep 17 00:00:00 2001
From: Colin Ian King <colin.king@canonical.com>
Date: Thu, 10 Aug 2017 15:23:40 -0700
Subject: [PATCH 252/281] test_kmod: fix spelling mistake: "EMTPY" -> "EMPTY"

Trivial fix to spelling mistake in snprintf text

[mcgrof@kernel.org: massaged commit message]
Link: http://lkml.kernel.org/r/20170802211450.27928-3-mcgrof@kernel.org
Fixes: 39258f448d71 ("kmod: add test driver to stress test the module loader")
Signed-off-by: Colin Ian King <colin.king@canonical.com>
Signed-off-by: Luis R. Rodriguez <mcgrof@kernel.org>
Cc: Dmitry Torokhov <dmitry.torokhov@gmail.com>
Cc: Kees Cook <keescook@chromium.org>
Cc: Jessica Yu <jeyu@redhat.com>
Cc: Rusty Russell <rusty@rustcorp.com.au>
Cc: Michal Marek <mmarek@suse.com>
Cc: Petr Mladek <pmladek@suse.com>
Cc: Miroslav Benes <mbenes@suse.cz>
Cc: Josh Poimboeuf <jpoimboe@redhat.com>
Cc: Eric W. Biederman <ebiederm@xmission.com>
Cc: Shuah Khan <shuah@kernel.org>
Cc: Dan Carpenter <dan.carpenter@oracle.com>
Cc: David Binderman <dcb314@hotmail.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 lib/test_kmod.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/lib/test_kmod.c b/lib/test_kmod.c
index 6c1d678bcf8b..90c91541fc16 100644
--- a/lib/test_kmod.c
+++ b/lib/test_kmod.c
@@ -485,7 +485,7 @@ static ssize_t config_show(struct device *dev,
 				config->test_driver);
 	else
 		len += snprintf(buf+len, PAGE_SIZE - len,
-				"driver:\tEMTPY\n");
+				"driver:\tEMPTY\n");
 
 	if (config->test_fs)
 		len += snprintf(buf+len, PAGE_SIZE - len,
@@ -493,7 +493,7 @@ static ssize_t config_show(struct device *dev,
 				config->test_fs);
 	else
 		len += snprintf(buf+len, PAGE_SIZE - len,
-				"fs:\tEMTPY\n");
+				"fs:\tEMPTY\n");
 
 	mutex_unlock(&test_dev->config_mutex);
 

From 434b06ae23bab4f4111a674f9b64409c87fa8df9 Mon Sep 17 00:00:00 2001
From: "Luis R. Rodriguez" <mcgrof@kernel.org>
Date: Thu, 10 Aug 2017 15:23:44 -0700
Subject: [PATCH 253/281] test_kmod: fix bug which allows negative values on
 two config options

Parsing with kstrtol() enables values to be negative, and we failed to
check for negative values when parsing with test_dev_config_update_uint_sync()
or test_dev_config_update_uint_range().

test_dev_config_update_uint_range() has a minimum check though so an
issue is not present there.  test_dev_config_update_uint_sync() is only
used for the number of threads to use (config_num_threads_store()), and
indeed this would fail with an attempt for a large allocation.

Although the issue is only present in practice with the first fix both
by using kstrtoul() instead of kstrtol().

Link: http://lkml.kernel.org/r/20170802211450.27928-4-mcgrof@kernel.org
Fixes: 39258f448d71 ("kmod: add test driver to stress test the module loader")
Signed-off-by: Luis R. Rodriguez <mcgrof@kernel.org>
Reported-by: Dan Carpenter <dan.carpenter@oracle.com>
Cc: Colin Ian King <colin.king@canonical.com>
Cc: David Binderman <dcb314@hotmail.com>
Cc: Dmitry Torokhov <dmitry.torokhov@gmail.com>
Cc: Eric W. Biederman <ebiederm@xmission.com>
Cc: Jessica Yu <jeyu@redhat.com>
Cc: Josh Poimboeuf <jpoimboe@redhat.com>
Cc: Kees Cook <keescook@chromium.org>
Cc: Michal Marek <mmarek@suse.com>
Cc: Miroslav Benes <mbenes@suse.cz>
Cc: Petr Mladek <pmladek@suse.com>
Cc: Rusty Russell <rusty@rustcorp.com.au>
Cc: Shuah Khan <shuah@kernel.org>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 lib/test_kmod.c | 8 ++++----
 1 file changed, 4 insertions(+), 4 deletions(-)

diff --git a/lib/test_kmod.c b/lib/test_kmod.c
index 90c91541fc16..8fc0a7a19c83 100644
--- a/lib/test_kmod.c
+++ b/lib/test_kmod.c
@@ -880,10 +880,10 @@ static int test_dev_config_update_uint_sync(struct kmod_test_device *test_dev,
 					    int (*test_sync)(struct kmod_test_device *test_dev))
 {
 	int ret;
-	long new;
+	unsigned long new;
 	unsigned int old_val;
 
-	ret = kstrtol(buf, 10, &new);
+	ret = kstrtoul(buf, 10, &new);
 	if (ret)
 		return ret;
 
@@ -918,9 +918,9 @@ static int test_dev_config_update_uint_range(struct kmod_test_device *test_dev,
 					     unsigned int max)
 {
 	int ret;
-	long new;
+	unsigned long new;
 
-	ret = kstrtol(buf, 10, &new);
+	ret = kstrtoul(buf, 10, &new);
 	if (ret)
 		return ret;
 

From 9c56771316ef50992ada284b4c01b03842b2660d Mon Sep 17 00:00:00 2001
From: Dan Carpenter <dan.carpenter@oracle.com>
Date: Thu, 10 Aug 2017 15:23:47 -0700
Subject: [PATCH 254/281] test_kmod: fix the lock in register_test_dev_kmod()

We accidentally just drop the lock twice instead of taking it and then
releasing it.  This isn't a big issue unless you are adding more than
one device to test on, and the kmod.sh doesn't do that yet, however this
obviously is the correct thing to do.

[mcgrof@kernel.org: massaged subject, explain what happens]
Link: http://lkml.kernel.org/r/20170802211450.27928-5-mcgrof@kernel.org
Fixes: 39258f448d71 ("kmod: add test driver to stress test the module loader")
Signed-off-by: Dan Carpenter <dan.carpenter@oracle.com>
Signed-off-by: Luis R. Rodriguez <mcgrof@kernel.org>
Cc: Colin Ian King <colin.king@canonical.com>
Cc: David Binderman <dcb314@hotmail.com>
Cc: Dmitry Torokhov <dmitry.torokhov@gmail.com>
Cc: Eric W. Biederman <ebiederm@xmission.com>
Cc: Jessica Yu <jeyu@redhat.com>
Cc: Josh Poimboeuf <jpoimboe@redhat.com>
Cc: Kees Cook <keescook@chromium.org>
Cc: Michal Marek <mmarek@suse.com>
Cc: Miroslav Benes <mbenes@suse.cz>
Cc: Petr Mladek <pmladek@suse.com>
Cc: Rusty Russell <rusty@rustcorp.com.au>
Cc: Shuah Khan <shuah@kernel.org>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 lib/test_kmod.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/lib/test_kmod.c b/lib/test_kmod.c
index 8fc0a7a19c83..1bc06bbfc97a 100644
--- a/lib/test_kmod.c
+++ b/lib/test_kmod.c
@@ -1146,7 +1146,7 @@ static struct kmod_test_device *register_test_dev_kmod(void)
 	struct kmod_test_device *test_dev = NULL;
 	int ret;
 
-	mutex_unlock(&reg_dev_mutex);
+	mutex_lock(&reg_dev_mutex);
 
 	/* int should suffice for number of devices, test for wrap */
 	if (unlikely(num_test_devs + 1) < 0) {

From 4e98ebe5f435fbe5bca91c24bc5d5a2b33025e08 Mon Sep 17 00:00:00 2001
From: Dan Carpenter <dan.carpenter@oracle.com>
Date: Thu, 10 Aug 2017 15:23:50 -0700
Subject: [PATCH 255/281] test_kmod: fix small memory leak on filesystem tests

The break was in the wrong place so file system tests don't work as
intended, leaking memory at each test switch.

[mcgrof@kernel.org: massaged commit subject, noted memory leak issue without the fix]
Link: http://lkml.kernel.org/r/20170802211450.27928-6-mcgrof@kernel.org
Fixes: 39258f448d71 ("kmod: add test driver to stress test the module loader")
Signed-off-by: Dan Carpenter <dan.carpenter@oracle.com>
Signed-off-by: Luis R. Rodriguez <mcgrof@kernel.org>
Reported-by: David Binderman <dcb314@hotmail.com>
Cc: Colin Ian King <colin.king@canonical.com>
Cc: Dmitry Torokhov <dmitry.torokhov@gmail.com>
Cc: Eric W. Biederman <ebiederm@xmission.com>
Cc: Jessica Yu <jeyu@redhat.com>
Cc: Josh Poimboeuf <jpoimboe@redhat.com>
Cc: Kees Cook <keescook@chromium.org>
Cc: Michal Marek <mmarek@suse.com>
Cc: Miroslav Benes <mbenes@suse.cz>
Cc: Petr Mladek <pmladek@suse.com>
Cc: Rusty Russell <rusty@rustcorp.com.au>
Cc: Shuah Khan <shuah@kernel.org>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 lib/test_kmod.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/lib/test_kmod.c b/lib/test_kmod.c
index 1bc06bbfc97a..ff9148969b92 100644
--- a/lib/test_kmod.c
+++ b/lib/test_kmod.c
@@ -746,11 +746,11 @@ static int trigger_config_run_type(struct kmod_test_device *test_dev,
 						      strlen(test_str));
 		break;
 	case TEST_KMOD_FS_TYPE:
-		break;
 		kfree_const(config->test_fs);
 		config->test_driver = NULL;
 		copied = config_copy_test_fs(config, test_str,
 					     strlen(test_str));
+		break;
 	default:
 		mutex_unlock(&test_dev->config_mutex);
 		return -EINVAL;

From 9eeb52ae712e72141c4c1d173048a606ba8f42f6 Mon Sep 17 00:00:00 2001
From: Akinobu Mita <akinobu.mita@gmail.com>
Date: Thu, 10 Aug 2017 15:23:53 -0700
Subject: [PATCH 256/281] fault-inject: fix wrong should_fail() decision in
 task context

Commit 1203c8e6fb0a ("fault-inject: simplify access check for fail-nth")
unintentionally broke a conditional statement in should_fail().  Any
faults are not injected in the task context by the change when the
systematic fault injection is not used.

This change restores to the previous correct behaviour.

Link: http://lkml.kernel.org/r/1501633700-3488-1-git-send-email-akinobu.mita@gmail.com
Fixes: 1203c8e6fb0a ("fault-inject: simplify access check for fail-nth")
Signed-off-by: Akinobu Mita <akinobu.mita@gmail.com>
Reported-by: Lu Fengqi <lufq.fnst@cn.fujitsu.com>
Tested-by: Lu Fengqi <lufq.fnst@cn.fujitsu.com>
Cc: Dmitry Vyukov <dvyukov@google.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 lib/fault-inject.c | 8 +++++---
 1 file changed, 5 insertions(+), 3 deletions(-)

diff --git a/lib/fault-inject.c b/lib/fault-inject.c
index 7d315fdb9f13..cf7b129b0b2b 100644
--- a/lib/fault-inject.c
+++ b/lib/fault-inject.c
@@ -110,10 +110,12 @@ bool should_fail(struct fault_attr *attr, ssize_t size)
 	if (in_task()) {
 		unsigned int fail_nth = READ_ONCE(current->fail_nth);
 
-		if (fail_nth && !WRITE_ONCE(current->fail_nth, fail_nth - 1))
-			goto fail;
+		if (fail_nth) {
+			if (!WRITE_ONCE(current->fail_nth, fail_nth - 1))
+				goto fail;
 
-		return false;
+			return false;
+		}
 	}
 
 	/* No need to check any other properties if the probability is 0 */

From 16af97dc5a8975371a83d9e30a64038b48f40a2d Mon Sep 17 00:00:00 2001
From: Nadav Amit <nadav.amit@gmail.com>
Date: Thu, 10 Aug 2017 15:23:56 -0700
Subject: [PATCH 257/281] mm: migrate: prevent racy access to tlb_flush_pending

Patch series "fixes of TLB batching races", v6.

It turns out that Linux TLB batching mechanism suffers from various
races.  Races that are caused due to batching during reclamation were
recently handled by Mel and this patch-set deals with others.  The more
fundamental issue is that concurrent updates of the page-tables allow
for TLB flushes to be batched on one core, while another core changes
the page-tables.  This other core may assume a PTE change does not
require a flush based on the updated PTE value, while it is unaware that
TLB flushes are still pending.

This behavior affects KSM (which may result in memory corruption) and
MADV_FREE and MADV_DONTNEED (which may result in incorrect behavior).  A
proof-of-concept can easily produce the wrong behavior of MADV_DONTNEED.
Memory corruption in KSM is harder to produce in practice, but was
observed by hacking the kernel and adding a delay before flushing and
replacing the KSM page.

Finally, there is also one memory barrier missing, which may affect
architectures with weak memory model.

This patch (of 7):

Setting and clearing mm->tlb_flush_pending can be performed by multiple
threads, since mmap_sem may only be acquired for read in
task_numa_work().  If this happens, tlb_flush_pending might be cleared
while one of the threads still changes PTEs and batches TLB flushes.

This can lead to the same race between migration and
change_protection_range() that led to the introduction of
tlb_flush_pending.  The result of this race was data corruption, which
means that this patch also addresses a theoretically possible data
corruption.

An actual data corruption was not observed, yet the race was was
confirmed by adding assertion to check tlb_flush_pending is not set by
two threads, adding artificial latency in change_protection_range() and
using sysctl to reduce kernel.numa_balancing_scan_delay_ms.

Link: http://lkml.kernel.org/r/20170802000818.4760-2-namit@vmware.com
Fixes: 20841405940e ("mm: fix TLB flush race between migration, and
change_protection_range")
Signed-off-by: Nadav Amit <namit@vmware.com>
Acked-by: Mel Gorman <mgorman@suse.de>
Acked-by: Rik van Riel <riel@redhat.com>
Acked-by: Minchan Kim <minchan@kernel.org>
Cc: Andy Lutomirski <luto@kernel.org>
Cc: Hugh Dickins <hughd@google.com>
Cc: "David S. Miller" <davem@davemloft.net>
Cc: Andrea Arcangeli <aarcange@redhat.com>
Cc: Heiko Carstens <heiko.carstens@de.ibm.com>
Cc: Ingo Molnar <mingo@redhat.com>
Cc: Jeff Dike <jdike@addtoit.com>
Cc: Martin Schwidefsky <schwidefsky@de.ibm.com>
Cc: Mel Gorman <mgorman@techsingularity.net>
Cc: Russell King <linux@armlinux.org.uk>
Cc: Sergey Senozhatsky <sergey.senozhatsky@gmail.com>
Cc: Tony Luck <tony.luck@intel.com>
Cc: Yoshinori Sato <ysato@users.sourceforge.jp>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/mm_types.h | 31 ++++++++++++++++++++++---------
 kernel/fork.c            |  2 +-
 mm/debug.c               |  2 +-
 mm/mprotect.c            |  4 ++--
 4 files changed, 26 insertions(+), 13 deletions(-)

diff --git a/include/linux/mm_types.h b/include/linux/mm_types.h
index 7f384bb62d8e..f58f76ee1dfa 100644
--- a/include/linux/mm_types.h
+++ b/include/linux/mm_types.h
@@ -493,7 +493,7 @@ struct mm_struct {
 	 * can move process memory needs to flush the TLB when moving a
 	 * PROT_NONE or PROT_NUMA mapped page.
 	 */
-	bool tlb_flush_pending;
+	atomic_t tlb_flush_pending;
 #endif
 #ifdef CONFIG_ARCH_WANT_BATCHED_UNMAP_TLB_FLUSH
 	/* See flush_tlb_batched_pending() */
@@ -532,33 +532,46 @@ static inline cpumask_t *mm_cpumask(struct mm_struct *mm)
 static inline bool mm_tlb_flush_pending(struct mm_struct *mm)
 {
 	barrier();
-	return mm->tlb_flush_pending;
+	return atomic_read(&mm->tlb_flush_pending) > 0;
 }
-static inline void set_tlb_flush_pending(struct mm_struct *mm)
+
+static inline void init_tlb_flush_pending(struct mm_struct *mm)
 {
-	mm->tlb_flush_pending = true;
+	atomic_set(&mm->tlb_flush_pending, 0);
+}
+
+static inline void inc_tlb_flush_pending(struct mm_struct *mm)
+{
+	atomic_inc(&mm->tlb_flush_pending);
 
 	/*
-	 * Guarantee that the tlb_flush_pending store does not leak into the
+	 * Guarantee that the tlb_flush_pending increase does not leak into the
 	 * critical section updating the page tables
 	 */
 	smp_mb__before_spinlock();
 }
+
 /* Clearing is done after a TLB flush, which also provides a barrier. */
-static inline void clear_tlb_flush_pending(struct mm_struct *mm)
+static inline void dec_tlb_flush_pending(struct mm_struct *mm)
 {
 	barrier();
-	mm->tlb_flush_pending = false;
+	atomic_dec(&mm->tlb_flush_pending);
 }
 #else
 static inline bool mm_tlb_flush_pending(struct mm_struct *mm)
 {
 	return false;
 }
-static inline void set_tlb_flush_pending(struct mm_struct *mm)
+
+static inline void init_tlb_flush_pending(struct mm_struct *mm)
 {
 }
-static inline void clear_tlb_flush_pending(struct mm_struct *mm)
+
+static inline void inc_tlb_flush_pending(struct mm_struct *mm)
+{
+}
+
+static inline void dec_tlb_flush_pending(struct mm_struct *mm)
 {
 }
 #endif
diff --git a/kernel/fork.c b/kernel/fork.c
index 17921b0390b4..e075b7780421 100644
--- a/kernel/fork.c
+++ b/kernel/fork.c
@@ -807,7 +807,7 @@ static struct mm_struct *mm_init(struct mm_struct *mm, struct task_struct *p,
 	mm_init_aio(mm);
 	mm_init_owner(mm, p);
 	mmu_notifier_mm_init(mm);
-	clear_tlb_flush_pending(mm);
+	init_tlb_flush_pending(mm);
 #if defined(CONFIG_TRANSPARENT_HUGEPAGE) && !USE_SPLIT_PMD_PTLOCKS
 	mm->pmd_huge_pte = NULL;
 #endif
diff --git a/mm/debug.c b/mm/debug.c
index db1cd26d8752..d70103bb4731 100644
--- a/mm/debug.c
+++ b/mm/debug.c
@@ -159,7 +159,7 @@ void dump_mm(const struct mm_struct *mm)
 		mm->numa_next_scan, mm->numa_scan_offset, mm->numa_scan_seq,
 #endif
 #if defined(CONFIG_NUMA_BALANCING) || defined(CONFIG_COMPACTION)
-		mm->tlb_flush_pending,
+		atomic_read(&mm->tlb_flush_pending),
 #endif
 		mm->def_flags, &mm->def_flags
 	);
diff --git a/mm/mprotect.c b/mm/mprotect.c
index 4180ad8cc9c5..bd0f409922cb 100644
--- a/mm/mprotect.c
+++ b/mm/mprotect.c
@@ -244,7 +244,7 @@ static unsigned long change_protection_range(struct vm_area_struct *vma,
 	BUG_ON(addr >= end);
 	pgd = pgd_offset(mm, addr);
 	flush_cache_range(vma, addr, end);
-	set_tlb_flush_pending(mm);
+	inc_tlb_flush_pending(mm);
 	do {
 		next = pgd_addr_end(addr, end);
 		if (pgd_none_or_clear_bad(pgd))
@@ -256,7 +256,7 @@ static unsigned long change_protection_range(struct vm_area_struct *vma,
 	/* Only flush the TLB if we actually modified any entries: */
 	if (pages)
 		flush_tlb_range(vma, start, end);
-	clear_tlb_flush_pending(mm);
+	dec_tlb_flush_pending(mm);
 
 	return pages;
 }

From 0a2c40487f3e4215c6ab46e7f837036badfb542b Mon Sep 17 00:00:00 2001
From: Nadav Amit <namit@vmware.com>
Date: Thu, 10 Aug 2017 15:23:59 -0700
Subject: [PATCH 258/281] mm: migrate: fix barriers around tlb_flush_pending

Reading tlb_flush_pending while the page-table lock is taken does not
require a barrier, since the lock/unlock already acts as a barrier.
Removing the barrier in mm_tlb_flush_pending() to address this issue.

However, migrate_misplaced_transhuge_page() calls mm_tlb_flush_pending()
while the page-table lock is already released, which may present a
problem on architectures with weak memory model (PPC).  To deal with
this case, a new parameter is added to mm_tlb_flush_pending() to
indicate if it is read without the page-table lock taken, and calling
smp_mb__after_unlock_lock() in this case.

Link: http://lkml.kernel.org/r/20170802000818.4760-3-namit@vmware.com
Signed-off-by: Nadav Amit <namit@vmware.com>
Acked-by: Rik van Riel <riel@redhat.com>
Cc: Minchan Kim <minchan@kernel.org>
Cc: Sergey Senozhatsky <sergey.senozhatsky@gmail.com>
Cc: Andy Lutomirski <luto@kernel.org>
Cc: Mel Gorman <mgorman@suse.de>
Cc: "David S. Miller" <davem@davemloft.net>
Cc: Andrea Arcangeli <aarcange@redhat.com>
Cc: Heiko Carstens <heiko.carstens@de.ibm.com>
Cc: Hugh Dickins <hughd@google.com>
Cc: Ingo Molnar <mingo@redhat.com>
Cc: Jeff Dike <jdike@addtoit.com>
Cc: Martin Schwidefsky <schwidefsky@de.ibm.com>
Cc: Mel Gorman <mgorman@techsingularity.net>
Cc: Nadav Amit <nadav.amit@gmail.com>
Cc: Russell King <linux@armlinux.org.uk>
Cc: Tony Luck <tony.luck@intel.com>
Cc: Yoshinori Sato <ysato@users.sourceforge.jp>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/mm_types.h | 14 ++++++++++----
 1 file changed, 10 insertions(+), 4 deletions(-)

diff --git a/include/linux/mm_types.h b/include/linux/mm_types.h
index f58f76ee1dfa..0e478ebd2706 100644
--- a/include/linux/mm_types.h
+++ b/include/linux/mm_types.h
@@ -526,12 +526,12 @@ static inline cpumask_t *mm_cpumask(struct mm_struct *mm)
 /*
  * Memory barriers to keep this state in sync are graciously provided by
  * the page table locks, outside of which no page table modifications happen.
- * The barriers below prevent the compiler from re-ordering the instructions
- * around the memory barriers that are already present in the code.
+ * The barriers are used to ensure the order between tlb_flush_pending updates,
+ * which happen while the lock is not taken, and the PTE updates, which happen
+ * while the lock is taken, are serialized.
  */
 static inline bool mm_tlb_flush_pending(struct mm_struct *mm)
 {
-	barrier();
 	return atomic_read(&mm->tlb_flush_pending) > 0;
 }
 
@@ -554,7 +554,13 @@ static inline void inc_tlb_flush_pending(struct mm_struct *mm)
 /* Clearing is done after a TLB flush, which also provides a barrier. */
 static inline void dec_tlb_flush_pending(struct mm_struct *mm)
 {
-	barrier();
+	/*
+	 * Guarantee that the tlb_flush_pending does not not leak into the
+	 * critical section, since we must order the PTE change and changes to
+	 * the pending TLB flush indication. We could have relied on TLB flush
+	 * as a memory barrier, but this behavior is not clearly documented.
+	 */
+	smp_mb__before_atomic();
 	atomic_dec(&mm->tlb_flush_pending);
 }
 #else

From a9b802500ebbff1544519a2969323b719dac21f0 Mon Sep 17 00:00:00 2001
From: Nadav Amit <namit@vmware.com>
Date: Thu, 10 Aug 2017 15:24:02 -0700
Subject: [PATCH 259/281] Revert "mm: numa: defer TLB flush for THP migration
 as long as possible"

While deferring TLB flushes is a good practice, the reverted patch
caused pending TLB flushes to be checked while the page-table lock is
not taken.  As a result, in architectures with weak memory model (PPC),
Linux may miss a memory-barrier, miss the fact TLB flushes are pending,
and cause (in theory) a memory corruption.

Since the alternative of using smp_mb__after_unlock_lock() was
considered a bit open-coded, and the performance impact is expected to
be small, the previous patch is reverted.

This reverts b0943d61b8fa ("mm: numa: defer TLB flush for THP migration
as long as possible").

Link: http://lkml.kernel.org/r/20170802000818.4760-4-namit@vmware.com
Signed-off-by: Nadav Amit <namit@vmware.com>
Suggested-by: Mel Gorman <mgorman@suse.de>
Acked-by: Mel Gorman <mgorman@suse.de>
Acked-by: Rik van Riel <riel@redhat.com>
Cc: Minchan Kim <minchan@kernel.org>
Cc: Sergey Senozhatsky <sergey.senozhatsky@gmail.com>
Cc: Andy Lutomirski <luto@kernel.org>
Cc: "David S. Miller" <davem@davemloft.net>
Cc: Andrea Arcangeli <aarcange@redhat.com>
Cc: Heiko Carstens <heiko.carstens@de.ibm.com>
Cc: Hugh Dickins <hughd@google.com>
Cc: Ingo Molnar <mingo@redhat.com>
Cc: Jeff Dike <jdike@addtoit.com>
Cc: Martin Schwidefsky <schwidefsky@de.ibm.com>
Cc: Mel Gorman <mgorman@techsingularity.net>
Cc: Nadav Amit <nadav.amit@gmail.com>
Cc: Russell King <linux@armlinux.org.uk>
Cc: Tony Luck <tony.luck@intel.com>
Cc: Yoshinori Sato <ysato@users.sourceforge.jp>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 mm/huge_memory.c | 7 +++++++
 mm/migrate.c     | 6 ------
 2 files changed, 7 insertions(+), 6 deletions(-)

diff --git a/mm/huge_memory.c b/mm/huge_memory.c
index 86975dec0ba1..216114f6ef0b 100644
--- a/mm/huge_memory.c
+++ b/mm/huge_memory.c
@@ -1495,6 +1495,13 @@ int do_huge_pmd_numa_page(struct vm_fault *vmf, pmd_t pmd)
 		goto clear_pmdnuma;
 	}
 
+	/*
+	 * The page_table_lock above provides a memory barrier
+	 * with change_protection_range.
+	 */
+	if (mm_tlb_flush_pending(vma->vm_mm))
+		flush_tlb_range(vma, haddr, haddr + HPAGE_PMD_SIZE);
+
 	/*
 	 * Migrate the THP to the requested node, returns with page unlocked
 	 * and access rights restored.
diff --git a/mm/migrate.c b/mm/migrate.c
index 627671551873..d68a41da6abb 100644
--- a/mm/migrate.c
+++ b/mm/migrate.c
@@ -1937,12 +1937,6 @@ int migrate_misplaced_transhuge_page(struct mm_struct *mm,
 		put_page(new_page);
 		goto out_fail;
 	}
-	/*
-	 * We are not sure a pending tlb flush here is for a huge page
-	 * mapping or not. Hence use the tlb range variant
-	 */
-	if (mm_tlb_flush_pending(mm))
-		flush_tlb_range(vma, mmun_start, mmun_end);
 
 	/* Prepare a page as a migration target */
 	__SetPageLocked(new_page);

From 56236a59556cfd3bae7bffb7e5f438b5ef0af880 Mon Sep 17 00:00:00 2001
From: Minchan Kim <minchan@kernel.org>
Date: Thu, 10 Aug 2017 15:24:05 -0700
Subject: [PATCH 260/281] mm: refactor TLB gathering API

This patch is a preparatory patch for solving race problems caused by
TLB batch.  For that, we will increase/decrease TLB flush pending count
of mm_struct whenever tlb_[gather|finish]_mmu is called.

Before making it simple, this patch separates architecture specific part
and rename it to arch_tlb_[gather|finish]_mmu and generic part just
calls it.

It shouldn't change any behavior.

Link: http://lkml.kernel.org/r/20170802000818.4760-5-namit@vmware.com
Signed-off-by: Minchan Kim <minchan@kernel.org>
Signed-off-by: Nadav Amit <namit@vmware.com>
Acked-by: Mel Gorman <mgorman@techsingularity.net>
Cc: Ingo Molnar <mingo@redhat.com>
Cc: Russell King <linux@armlinux.org.uk>
Cc: Tony Luck <tony.luck@intel.com>
Cc: Martin Schwidefsky <schwidefsky@de.ibm.com>
Cc: "David S. Miller" <davem@davemloft.net>
Cc: Heiko Carstens <heiko.carstens@de.ibm.com>
Cc: Yoshinori Sato <ysato@users.sourceforge.jp>
Cc: Jeff Dike <jdike@addtoit.com>
Cc: Andrea Arcangeli <aarcange@redhat.com>
Cc: Andy Lutomirski <luto@kernel.org>
Cc: Hugh Dickins <hughd@google.com>
Cc: Mel Gorman <mgorman@suse.de>
Cc: Nadav Amit <nadav.amit@gmail.com>
Cc: Rik van Riel <riel@redhat.com>
Cc: Sergey Senozhatsky <sergey.senozhatsky@gmail.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 arch/arm/include/asm/tlb.h  |  6 ++++--
 arch/ia64/include/asm/tlb.h |  6 ++++--
 arch/s390/include/asm/tlb.h | 12 ++++++------
 arch/sh/include/asm/tlb.h   |  6 ++++--
 arch/um/include/asm/tlb.h   |  8 +++++---
 include/asm-generic/tlb.h   |  7 ++++---
 include/linux/mm_types.h    |  6 ++++++
 mm/memory.c                 | 28 +++++++++++++++++++++-------
 8 files changed, 54 insertions(+), 25 deletions(-)

diff --git a/arch/arm/include/asm/tlb.h b/arch/arm/include/asm/tlb.h
index 3f2eb76243e3..7f5b2a2d3861 100644
--- a/arch/arm/include/asm/tlb.h
+++ b/arch/arm/include/asm/tlb.h
@@ -148,7 +148,8 @@ static inline void tlb_flush_mmu(struct mmu_gather *tlb)
 }
 
 static inline void
-tlb_gather_mmu(struct mmu_gather *tlb, struct mm_struct *mm, unsigned long start, unsigned long end)
+arch_tlb_gather_mmu(struct mmu_gather *tlb, struct mm_struct *mm,
+			unsigned long start, unsigned long end)
 {
 	tlb->mm = mm;
 	tlb->fullmm = !(start | (end+1));
@@ -166,7 +167,8 @@ tlb_gather_mmu(struct mmu_gather *tlb, struct mm_struct *mm, unsigned long start
 }
 
 static inline void
-tlb_finish_mmu(struct mmu_gather *tlb, unsigned long start, unsigned long end)
+arch_tlb_finish_mmu(struct mmu_gather *tlb,
+			unsigned long start, unsigned long end)
 {
 	tlb_flush_mmu(tlb);
 
diff --git a/arch/ia64/include/asm/tlb.h b/arch/ia64/include/asm/tlb.h
index fced197b9626..93cadc04ac62 100644
--- a/arch/ia64/include/asm/tlb.h
+++ b/arch/ia64/include/asm/tlb.h
@@ -168,7 +168,8 @@ static inline void __tlb_alloc_page(struct mmu_gather *tlb)
 
 
 static inline void
-tlb_gather_mmu(struct mmu_gather *tlb, struct mm_struct *mm, unsigned long start, unsigned long end)
+arch_tlb_gather_mmu(struct mmu_gather *tlb, struct mm_struct *mm,
+			unsigned long start, unsigned long end)
 {
 	tlb->mm = mm;
 	tlb->max = ARRAY_SIZE(tlb->local);
@@ -185,7 +186,8 @@ tlb_gather_mmu(struct mmu_gather *tlb, struct mm_struct *mm, unsigned long start
  * collected.
  */
 static inline void
-tlb_finish_mmu(struct mmu_gather *tlb, unsigned long start, unsigned long end)
+arch_tlb_finish_mmu(struct mmu_gather *tlb,
+			unsigned long start, unsigned long end)
 {
 	/*
 	 * Note: tlb->nr may be 0 at this point, so we can't rely on tlb->start_addr and
diff --git a/arch/s390/include/asm/tlb.h b/arch/s390/include/asm/tlb.h
index 7317b3108a88..d574d0820dc8 100644
--- a/arch/s390/include/asm/tlb.h
+++ b/arch/s390/include/asm/tlb.h
@@ -47,10 +47,9 @@ struct mmu_table_batch {
 extern void tlb_table_flush(struct mmu_gather *tlb);
 extern void tlb_remove_table(struct mmu_gather *tlb, void *table);
 
-static inline void tlb_gather_mmu(struct mmu_gather *tlb,
-				  struct mm_struct *mm,
-				  unsigned long start,
-				  unsigned long end)
+static inline void
+arch_tlb_gather_mmu(struct mmu_gather *tlb, struct mm_struct *mm,
+			unsigned long start, unsigned long end)
 {
 	tlb->mm = mm;
 	tlb->start = start;
@@ -76,8 +75,9 @@ static inline void tlb_flush_mmu(struct mmu_gather *tlb)
 	tlb_flush_mmu_free(tlb);
 }
 
-static inline void tlb_finish_mmu(struct mmu_gather *tlb,
-				  unsigned long start, unsigned long end)
+static inline void
+arch_tlb_finish_mmu(struct mmu_gather *tlb,
+		unsigned long start, unsigned long end)
 {
 	tlb_flush_mmu(tlb);
 }
diff --git a/arch/sh/include/asm/tlb.h b/arch/sh/include/asm/tlb.h
index 46e0d635e36f..89786560dbd4 100644
--- a/arch/sh/include/asm/tlb.h
+++ b/arch/sh/include/asm/tlb.h
@@ -36,7 +36,8 @@ static inline void init_tlb_gather(struct mmu_gather *tlb)
 }
 
 static inline void
-tlb_gather_mmu(struct mmu_gather *tlb, struct mm_struct *mm, unsigned long start, unsigned long end)
+arch_tlb_gather_mmu(struct mmu_gather *tlb, struct mm_struct *mm,
+		unsigned long start, unsigned long end)
 {
 	tlb->mm = mm;
 	tlb->start = start;
@@ -47,7 +48,8 @@ tlb_gather_mmu(struct mmu_gather *tlb, struct mm_struct *mm, unsigned long start
 }
 
 static inline void
-tlb_finish_mmu(struct mmu_gather *tlb, unsigned long start, unsigned long end)
+arch_tlb_finish_mmu(struct mmu_gather *tlb,
+		unsigned long start, unsigned long end)
 {
 	if (tlb->fullmm)
 		flush_tlb_mm(tlb->mm);
diff --git a/arch/um/include/asm/tlb.h b/arch/um/include/asm/tlb.h
index 600a2e9bfee2..2a901eca7145 100644
--- a/arch/um/include/asm/tlb.h
+++ b/arch/um/include/asm/tlb.h
@@ -45,7 +45,8 @@ static inline void init_tlb_gather(struct mmu_gather *tlb)
 }
 
 static inline void
-tlb_gather_mmu(struct mmu_gather *tlb, struct mm_struct *mm, unsigned long start, unsigned long end)
+arch_tlb_gather_mmu(struct mmu_gather *tlb, struct mm_struct *mm,
+		unsigned long start, unsigned long end)
 {
 	tlb->mm = mm;
 	tlb->start = start;
@@ -80,12 +81,13 @@ tlb_flush_mmu(struct mmu_gather *tlb)
 	tlb_flush_mmu_free(tlb);
 }
 
-/* tlb_finish_mmu
+/* arch_tlb_finish_mmu
  *	Called at the end of the shootdown operation to free up any resources
  *	that were required.
  */
 static inline void
-tlb_finish_mmu(struct mmu_gather *tlb, unsigned long start, unsigned long end)
+arch_tlb_finish_mmu(struct mmu_gather *tlb,
+		unsigned long start, unsigned long end)
 {
 	tlb_flush_mmu(tlb);
 
diff --git a/include/asm-generic/tlb.h b/include/asm-generic/tlb.h
index 8afa4335e5b2..8f71521e7a44 100644
--- a/include/asm-generic/tlb.h
+++ b/include/asm-generic/tlb.h
@@ -112,10 +112,11 @@ struct mmu_gather {
 
 #define HAVE_GENERIC_MMU_GATHER
 
-void tlb_gather_mmu(struct mmu_gather *tlb, struct mm_struct *mm, unsigned long start, unsigned long end);
+void arch_tlb_gather_mmu(struct mmu_gather *tlb,
+	struct mm_struct *mm, unsigned long start, unsigned long end);
 void tlb_flush_mmu(struct mmu_gather *tlb);
-void tlb_finish_mmu(struct mmu_gather *tlb, unsigned long start,
-							unsigned long end);
+void arch_tlb_finish_mmu(struct mmu_gather *tlb,
+			 unsigned long start, unsigned long end);
 extern bool __tlb_remove_page_size(struct mmu_gather *tlb, struct page *page,
 				   int page_size);
 
diff --git a/include/linux/mm_types.h b/include/linux/mm_types.h
index 0e478ebd2706..c605f2a3a68e 100644
--- a/include/linux/mm_types.h
+++ b/include/linux/mm_types.h
@@ -522,6 +522,12 @@ static inline cpumask_t *mm_cpumask(struct mm_struct *mm)
 	return mm->cpu_vm_mask_var;
 }
 
+struct mmu_gather;
+extern void tlb_gather_mmu(struct mmu_gather *tlb, struct mm_struct *mm,
+				unsigned long start, unsigned long end);
+extern void tlb_finish_mmu(struct mmu_gather *tlb,
+				unsigned long start, unsigned long end);
+
 #if defined(CONFIG_NUMA_BALANCING) || defined(CONFIG_COMPACTION)
 /*
  * Memory barriers to keep this state in sync are graciously provided by
diff --git a/mm/memory.c b/mm/memory.c
index f65beaad319b..34cba5113e06 100644
--- a/mm/memory.c
+++ b/mm/memory.c
@@ -215,12 +215,8 @@ static bool tlb_next_batch(struct mmu_gather *tlb)
 	return true;
 }
 
-/* tlb_gather_mmu
- *	Called to initialize an (on-stack) mmu_gather structure for page-table
- *	tear-down from @mm. The @fullmm argument is used when @mm is without
- *	users and we're going to destroy the full address space (exit/execve).
- */
-void tlb_gather_mmu(struct mmu_gather *tlb, struct mm_struct *mm, unsigned long start, unsigned long end)
+void arch_tlb_gather_mmu(struct mmu_gather *tlb, struct mm_struct *mm,
+				unsigned long start, unsigned long end)
 {
 	tlb->mm = mm;
 
@@ -275,7 +271,8 @@ void tlb_flush_mmu(struct mmu_gather *tlb)
  *	Called at the end of the shootdown operation to free up any resources
  *	that were required.
  */
-void tlb_finish_mmu(struct mmu_gather *tlb, unsigned long start, unsigned long end)
+void arch_tlb_finish_mmu(struct mmu_gather *tlb,
+		unsigned long start, unsigned long end)
 {
 	struct mmu_gather_batch *batch, *next;
 
@@ -398,6 +395,23 @@ void tlb_remove_table(struct mmu_gather *tlb, void *table)
 
 #endif /* CONFIG_HAVE_RCU_TABLE_FREE */
 
+/* tlb_gather_mmu
+ *	Called to initialize an (on-stack) mmu_gather structure for page-table
+ *	tear-down from @mm. The @fullmm argument is used when @mm is without
+ *	users and we're going to destroy the full address space (exit/execve).
+ */
+void tlb_gather_mmu(struct mmu_gather *tlb, struct mm_struct *mm,
+			unsigned long start, unsigned long end)
+{
+	arch_tlb_gather_mmu(tlb, mm, start, end);
+}
+
+void tlb_finish_mmu(struct mmu_gather *tlb,
+		unsigned long start, unsigned long end)
+{
+	arch_tlb_finish_mmu(tlb, start, end);
+}
+
 /*
  * Note: this doesn't free the actual pages themselves. That
  * has been handled earlier when unmapping all the memory regions.

From 0a2dd266dd6b7a31503b5bbe63af05961a6b446d Mon Sep 17 00:00:00 2001
From: Minchan Kim <minchan@kernel.org>
Date: Thu, 10 Aug 2017 15:24:09 -0700
Subject: [PATCH 261/281] mm: make tlb_flush_pending global

Currently, tlb_flush_pending is used only for CONFIG_[NUMA_BALANCING|
COMPACTION] but upcoming patches to solve subtle TLB flush batching
problem will use it regardless of compaction/NUMA so this patch doesn't
remove the dependency.

[akpm@linux-foundation.org: remove more ifdefs from world's ugliest printk statement]
Link: http://lkml.kernel.org/r/20170802000818.4760-6-namit@vmware.com
Signed-off-by: Minchan Kim <minchan@kernel.org>
Signed-off-by: Nadav Amit <namit@vmware.com>
Acked-by: Mel Gorman <mgorman@techsingularity.net>
Cc: "David S. Miller" <davem@davemloft.net>
Cc: Andrea Arcangeli <aarcange@redhat.com>
Cc: Andy Lutomirski <luto@kernel.org>
Cc: Heiko Carstens <heiko.carstens@de.ibm.com>
Cc: Hugh Dickins <hughd@google.com>
Cc: Ingo Molnar <mingo@redhat.com>
Cc: Jeff Dike <jdike@addtoit.com>
Cc: Martin Schwidefsky <schwidefsky@de.ibm.com>
Cc: Mel Gorman <mgorman@suse.de>
Cc: Nadav Amit <nadav.amit@gmail.com>
Cc: Rik van Riel <riel@redhat.com>
Cc: Russell King <linux@armlinux.org.uk>
Cc: Sergey Senozhatsky <sergey.senozhatsky@gmail.com>
Cc: Tony Luck <tony.luck@intel.com>
Cc: Yoshinori Sato <ysato@users.sourceforge.jp>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/mm_types.h | 21 ---------------------
 mm/debug.c               |  4 ----
 2 files changed, 25 deletions(-)

diff --git a/include/linux/mm_types.h b/include/linux/mm_types.h
index c605f2a3a68e..892a7b0196fd 100644
--- a/include/linux/mm_types.h
+++ b/include/linux/mm_types.h
@@ -487,14 +487,12 @@ struct mm_struct {
 	/* numa_scan_seq prevents two threads setting pte_numa */
 	int numa_scan_seq;
 #endif
-#if defined(CONFIG_NUMA_BALANCING) || defined(CONFIG_COMPACTION)
 	/*
 	 * An operation with batched TLB flushing is going on. Anything that
 	 * can move process memory needs to flush the TLB when moving a
 	 * PROT_NONE or PROT_NUMA mapped page.
 	 */
 	atomic_t tlb_flush_pending;
-#endif
 #ifdef CONFIG_ARCH_WANT_BATCHED_UNMAP_TLB_FLUSH
 	/* See flush_tlb_batched_pending() */
 	bool tlb_flush_batched;
@@ -528,7 +526,6 @@ extern void tlb_gather_mmu(struct mmu_gather *tlb, struct mm_struct *mm,
 extern void tlb_finish_mmu(struct mmu_gather *tlb,
 				unsigned long start, unsigned long end);
 
-#if defined(CONFIG_NUMA_BALANCING) || defined(CONFIG_COMPACTION)
 /*
  * Memory barriers to keep this state in sync are graciously provided by
  * the page table locks, outside of which no page table modifications happen.
@@ -569,24 +566,6 @@ static inline void dec_tlb_flush_pending(struct mm_struct *mm)
 	smp_mb__before_atomic();
 	atomic_dec(&mm->tlb_flush_pending);
 }
-#else
-static inline bool mm_tlb_flush_pending(struct mm_struct *mm)
-{
-	return false;
-}
-
-static inline void init_tlb_flush_pending(struct mm_struct *mm)
-{
-}
-
-static inline void inc_tlb_flush_pending(struct mm_struct *mm)
-{
-}
-
-static inline void dec_tlb_flush_pending(struct mm_struct *mm)
-{
-}
-#endif
 
 struct vm_fault;
 
diff --git a/mm/debug.c b/mm/debug.c
index d70103bb4731..5715448ab0b5 100644
--- a/mm/debug.c
+++ b/mm/debug.c
@@ -124,9 +124,7 @@ void dump_mm(const struct mm_struct *mm)
 #ifdef CONFIG_NUMA_BALANCING
 		"numa_next_scan %lu numa_scan_offset %lu numa_scan_seq %d\n"
 #endif
-#if defined(CONFIG_NUMA_BALANCING) || defined(CONFIG_COMPACTION)
 		"tlb_flush_pending %d\n"
-#endif
 		"def_flags: %#lx(%pGv)\n",
 
 		mm, mm->mmap, mm->vmacache_seqnum, mm->task_size,
@@ -158,9 +156,7 @@ void dump_mm(const struct mm_struct *mm)
 #ifdef CONFIG_NUMA_BALANCING
 		mm->numa_next_scan, mm->numa_scan_offset, mm->numa_scan_seq,
 #endif
-#if defined(CONFIG_NUMA_BALANCING) || defined(CONFIG_COMPACTION)
 		atomic_read(&mm->tlb_flush_pending),
-#endif
 		mm->def_flags, &mm->def_flags
 	);
 }

From 99baac21e4585f4258f919502c6e23f1e5edc98c Mon Sep 17 00:00:00 2001
From: Minchan Kim <minchan@kernel.org>
Date: Thu, 10 Aug 2017 15:24:12 -0700
Subject: [PATCH 262/281] mm: fix MADV_[FREE|DONTNEED] TLB flush miss problem

Nadav reported parallel MADV_DONTNEED on same range has a stale TLB
problem and Mel fixed it[1] and found same problem on MADV_FREE[2].

Quote from Mel Gorman:
 "The race in question is CPU 0 running madv_free and updating some PTEs
  while CPU 1 is also running madv_free and looking at the same PTEs.
  CPU 1 may have writable TLB entries for a page but fail the pte_dirty
  check (because CPU 0 has updated it already) and potentially fail to
  flush.

  Hence, when madv_free on CPU 1 returns, there are still potentially
  writable TLB entries and the underlying PTE is still present so that a
  subsequent write does not necessarily propagate the dirty bit to the
  underlying PTE any more. Reclaim at some unknown time at the future
  may then see that the PTE is still clean and discard the page even
  though a write has happened in the meantime. I think this is possible
  but I could have missed some protection in madv_free that prevents it
  happening."

This patch aims for solving both problems all at once and is ready for
other problem with KSM, MADV_FREE and soft-dirty story[3].

TLB batch API(tlb_[gather|finish]_mmu] uses [inc|dec]_tlb_flush_pending
and mmu_tlb_flush_pending so that when tlb_finish_mmu is called, we can
catch there are parallel threads going on.  In that case, forcefully,
flush TLB to prevent for user to access memory via stale TLB entry
although it fail to gather page table entry.

I confirmed this patch works with [4] test program Nadav gave so this
patch supersedes "mm: Always flush VMA ranges affected by zap_page_range
v2" in current mmotm.

NOTE:

This patch modifies arch-specific TLB gathering interface(x86, ia64,
s390, sh, um).  It seems most of architecture are straightforward but
s390 need to be careful because tlb_flush_mmu works only if
mm->context.flush_mm is set to non-zero which happens only a pte entry
really is cleared by ptep_get_and_clear and friends.  However, this
problem never changes the pte entries but need to flush to prevent
memory access from stale tlb.

[1] http://lkml.kernel.org/r/20170725101230.5v7gvnjmcnkzzql3@techsingularity.net
[2] http://lkml.kernel.org/r/20170725100722.2dxnmgypmwnrfawp@suse.de
[3] http://lkml.kernel.org/r/BD3A0EBE-ECF4-41D4-87FA-C755EA9AB6BD@gmail.com
[4] https://patchwork.kernel.org/patch/9861621/

[minchan@kernel.org: decrease tlb flush pending count in tlb_finish_mmu]
  Link: http://lkml.kernel.org/r/20170808080821.GA31730@bbox
Link: http://lkml.kernel.org/r/20170802000818.4760-7-namit@vmware.com
Signed-off-by: Minchan Kim <minchan@kernel.org>
Signed-off-by: Nadav Amit <namit@vmware.com>
Reported-by: Nadav Amit <namit@vmware.com>
Reported-by: Mel Gorman <mgorman@techsingularity.net>
Acked-by: Mel Gorman <mgorman@techsingularity.net>
Cc: Ingo Molnar <mingo@redhat.com>
Cc: Russell King <linux@armlinux.org.uk>
Cc: Tony Luck <tony.luck@intel.com>
Cc: Martin Schwidefsky <schwidefsky@de.ibm.com>
Cc: "David S. Miller" <davem@davemloft.net>
Cc: Heiko Carstens <heiko.carstens@de.ibm.com>
Cc: Yoshinori Sato <ysato@users.sourceforge.jp>
Cc: Jeff Dike <jdike@addtoit.com>
Cc: Andrea Arcangeli <aarcange@redhat.com>
Cc: Andy Lutomirski <luto@kernel.org>
Cc: Hugh Dickins <hughd@google.com>
Cc: Mel Gorman <mgorman@suse.de>
Cc: Nadav Amit <nadav.amit@gmail.com>
Cc: Rik van Riel <riel@redhat.com>
Cc: Sergey Senozhatsky <sergey.senozhatsky@gmail.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 arch/arm/include/asm/tlb.h  |  7 ++++++-
 arch/ia64/include/asm/tlb.h |  4 +++-
 arch/s390/include/asm/tlb.h |  7 ++++++-
 arch/sh/include/asm/tlb.h   |  4 ++--
 arch/um/include/asm/tlb.h   |  7 ++++++-
 include/asm-generic/tlb.h   |  2 +-
 include/linux/mm_types.h    |  8 ++++++++
 mm/memory.c                 | 18 ++++++++++++++++--
 8 files changed, 48 insertions(+), 9 deletions(-)

diff --git a/arch/arm/include/asm/tlb.h b/arch/arm/include/asm/tlb.h
index 7f5b2a2d3861..d5562f9ce600 100644
--- a/arch/arm/include/asm/tlb.h
+++ b/arch/arm/include/asm/tlb.h
@@ -168,8 +168,13 @@ arch_tlb_gather_mmu(struct mmu_gather *tlb, struct mm_struct *mm,
 
 static inline void
 arch_tlb_finish_mmu(struct mmu_gather *tlb,
-			unsigned long start, unsigned long end)
+			unsigned long start, unsigned long end, bool force)
 {
+	if (force) {
+		tlb->range_start = start;
+		tlb->range_end = end;
+	}
+
 	tlb_flush_mmu(tlb);
 
 	/* keep the page table cache within bounds */
diff --git a/arch/ia64/include/asm/tlb.h b/arch/ia64/include/asm/tlb.h
index 93cadc04ac62..cbe5ac3699bf 100644
--- a/arch/ia64/include/asm/tlb.h
+++ b/arch/ia64/include/asm/tlb.h
@@ -187,8 +187,10 @@ arch_tlb_gather_mmu(struct mmu_gather *tlb, struct mm_struct *mm,
  */
 static inline void
 arch_tlb_finish_mmu(struct mmu_gather *tlb,
-			unsigned long start, unsigned long end)
+			unsigned long start, unsigned long end, bool force)
 {
+	if (force)
+		tlb->need_flush = 1;
 	/*
 	 * Note: tlb->nr may be 0 at this point, so we can't rely on tlb->start_addr and
 	 * tlb->end_addr.
diff --git a/arch/s390/include/asm/tlb.h b/arch/s390/include/asm/tlb.h
index d574d0820dc8..2eb8ff0d6fca 100644
--- a/arch/s390/include/asm/tlb.h
+++ b/arch/s390/include/asm/tlb.h
@@ -77,8 +77,13 @@ static inline void tlb_flush_mmu(struct mmu_gather *tlb)
 
 static inline void
 arch_tlb_finish_mmu(struct mmu_gather *tlb,
-		unsigned long start, unsigned long end)
+		unsigned long start, unsigned long end, bool force)
 {
+	if (force) {
+		tlb->start = start;
+		tlb->end = end;
+	}
+
 	tlb_flush_mmu(tlb);
 }
 
diff --git a/arch/sh/include/asm/tlb.h b/arch/sh/include/asm/tlb.h
index 89786560dbd4..51a8bc967e75 100644
--- a/arch/sh/include/asm/tlb.h
+++ b/arch/sh/include/asm/tlb.h
@@ -49,9 +49,9 @@ arch_tlb_gather_mmu(struct mmu_gather *tlb, struct mm_struct *mm,
 
 static inline void
 arch_tlb_finish_mmu(struct mmu_gather *tlb,
-		unsigned long start, unsigned long end)
+		unsigned long start, unsigned long end, bool force)
 {
-	if (tlb->fullmm)
+	if (tlb->fullmm || force)
 		flush_tlb_mm(tlb->mm);
 
 	/* keep the page table cache within bounds */
diff --git a/arch/um/include/asm/tlb.h b/arch/um/include/asm/tlb.h
index 2a901eca7145..344d95619d03 100644
--- a/arch/um/include/asm/tlb.h
+++ b/arch/um/include/asm/tlb.h
@@ -87,8 +87,13 @@ tlb_flush_mmu(struct mmu_gather *tlb)
  */
 static inline void
 arch_tlb_finish_mmu(struct mmu_gather *tlb,
-		unsigned long start, unsigned long end)
+		unsigned long start, unsigned long end, bool force)
 {
+	if (force) {
+		tlb->start = start;
+		tlb->end = end;
+		tlb->need_flush = 1;
+	}
 	tlb_flush_mmu(tlb);
 
 	/* keep the page table cache within bounds */
diff --git a/include/asm-generic/tlb.h b/include/asm-generic/tlb.h
index 8f71521e7a44..faddde44de8c 100644
--- a/include/asm-generic/tlb.h
+++ b/include/asm-generic/tlb.h
@@ -116,7 +116,7 @@ void arch_tlb_gather_mmu(struct mmu_gather *tlb,
 	struct mm_struct *mm, unsigned long start, unsigned long end);
 void tlb_flush_mmu(struct mmu_gather *tlb);
 void arch_tlb_finish_mmu(struct mmu_gather *tlb,
-			 unsigned long start, unsigned long end);
+			 unsigned long start, unsigned long end, bool force);
 extern bool __tlb_remove_page_size(struct mmu_gather *tlb, struct page *page,
 				   int page_size);
 
diff --git a/include/linux/mm_types.h b/include/linux/mm_types.h
index 892a7b0196fd..3cadee0a3508 100644
--- a/include/linux/mm_types.h
+++ b/include/linux/mm_types.h
@@ -538,6 +538,14 @@ static inline bool mm_tlb_flush_pending(struct mm_struct *mm)
 	return atomic_read(&mm->tlb_flush_pending) > 0;
 }
 
+/*
+ * Returns true if there are two above TLB batching threads in parallel.
+ */
+static inline bool mm_tlb_flush_nested(struct mm_struct *mm)
+{
+	return atomic_read(&mm->tlb_flush_pending) > 1;
+}
+
 static inline void init_tlb_flush_pending(struct mm_struct *mm)
 {
 	atomic_set(&mm->tlb_flush_pending, 0);
diff --git a/mm/memory.c b/mm/memory.c
index 34cba5113e06..e158f7ac6730 100644
--- a/mm/memory.c
+++ b/mm/memory.c
@@ -272,10 +272,13 @@ void tlb_flush_mmu(struct mmu_gather *tlb)
  *	that were required.
  */
 void arch_tlb_finish_mmu(struct mmu_gather *tlb,
-		unsigned long start, unsigned long end)
+		unsigned long start, unsigned long end, bool force)
 {
 	struct mmu_gather_batch *batch, *next;
 
+	if (force)
+		__tlb_adjust_range(tlb, start, end - start);
+
 	tlb_flush_mmu(tlb);
 
 	/* keep the page table cache within bounds */
@@ -404,12 +407,23 @@ void tlb_gather_mmu(struct mmu_gather *tlb, struct mm_struct *mm,
 			unsigned long start, unsigned long end)
 {
 	arch_tlb_gather_mmu(tlb, mm, start, end);
+	inc_tlb_flush_pending(tlb->mm);
 }
 
 void tlb_finish_mmu(struct mmu_gather *tlb,
 		unsigned long start, unsigned long end)
 {
-	arch_tlb_finish_mmu(tlb, start, end);
+	/*
+	 * If there are parallel threads are doing PTE changes on same range
+	 * under non-exclusive lock(e.g., mmap_sem read-side) but defer TLB
+	 * flush by batching, a thread has stable TLB entry can fail to flush
+	 * the TLB by observing pte_none|!pte_dirty, for example so flush TLB
+	 * forcefully if we detect parallel PTE batching threads.
+	 */
+	bool force = mm_tlb_flush_nested(tlb->mm);
+
+	arch_tlb_finish_mmu(tlb, start, end, force);
+	dec_tlb_flush_pending(tlb->mm);
 }
 
 /*

From b3a81d0841a954a3d3e782059960f53e63b37066 Mon Sep 17 00:00:00 2001
From: Minchan Kim <minchan@kernel.org>
Date: Thu, 10 Aug 2017 15:24:15 -0700
Subject: [PATCH 263/281] mm: fix KSM data corruption

Nadav reported KSM can corrupt the user data by the TLB batching
race[1].  That means data user written can be lost.

Quote from Nadav Amit:
 "For this race we need 4 CPUs:

  CPU0: Caches a writable and dirty PTE entry, and uses the stale value
  for write later.

  CPU1: Runs madvise_free on the range that includes the PTE. It would
  clear the dirty-bit. It batches TLB flushes.

  CPU2: Writes 4 to /proc/PID/clear_refs , clearing the PTEs soft-dirty.
  We care about the fact that it clears the PTE write-bit, and of
  course, batches TLB flushes.

  CPU3: Runs KSM. Our purpose is to pass the following test in
  write_protect_page():

	if (pte_write(*pvmw.pte) || pte_dirty(*pvmw.pte) ||
	    (pte_protnone(*pvmw.pte) && pte_savedwrite(*pvmw.pte)))

  Since it will avoid TLB flush. And we want to do it while the PTE is
  stale. Later, and before replacing the page, we would be able to
  change the page.

  Note that all the operations the CPU1-3 perform canhappen in parallel
  since they only acquire mmap_sem for read.

  We start with two identical pages. Everything below regards the same
  page/PTE.

  CPU0        CPU1        CPU2        CPU3
  ----        ----        ----        ----
  Write the same
  value on page

  [cache PTE as
   dirty in TLB]

              MADV_FREE
              pte_mkclean()

                          4 > clear_refs
                          pte_wrprotect()

                                      write_protect_page()
                                      [ success, no flush ]

                                      pages_indentical()
                                      [ ok ]

  Write to page
  different value

  [Ok, using stale
   PTE]

                                      replace_page()

  Later, CPU1, CPU2 and CPU3 would flush the TLB, but that is too late.
  CPU0 already wrote on the page, but KSM ignored this write, and it got
  lost"

In above scenario, MADV_FREE is fixed by changing TLB batching API
including [set|clear]_tlb_flush_pending.  Remained thing is soft-dirty
part.

This patch changes soft-dirty uses TLB batching API instead of
flush_tlb_mm and KSM checks pending TLB flush by using
mm_tlb_flush_pending so that it will flush TLB to avoid data lost if
there are other parallel threads pending TLB flush.

[1] http://lkml.kernel.org/r/BD3A0EBE-ECF4-41D4-87FA-C755EA9AB6BD@gmail.com

Link: http://lkml.kernel.org/r/20170802000818.4760-8-namit@vmware.com
Signed-off-by: Minchan Kim <minchan@kernel.org>
Signed-off-by: Nadav Amit <namit@vmware.com>
Reported-by: Nadav Amit <namit@vmware.com>
Tested-by: Nadav Amit <namit@vmware.com>
Reviewed-by: Andrea Arcangeli <aarcange@redhat.com>
Cc: Mel Gorman <mgorman@techsingularity.net>
Cc: Hugh Dickins <hughd@google.com>
Cc: "David S. Miller" <davem@davemloft.net>
Cc: Andy Lutomirski <luto@kernel.org>
Cc: Heiko Carstens <heiko.carstens@de.ibm.com>
Cc: Ingo Molnar <mingo@redhat.com>
Cc: Jeff Dike <jdike@addtoit.com>
Cc: Martin Schwidefsky <schwidefsky@de.ibm.com>
Cc: Mel Gorman <mgorman@suse.de>
Cc: Nadav Amit <nadav.amit@gmail.com>
Cc: Rik van Riel <riel@redhat.com>
Cc: Russell King <linux@armlinux.org.uk>
Cc: Sergey Senozhatsky <sergey.senozhatsky@gmail.com>
Cc: Tony Luck <tony.luck@intel.com>
Cc: Yoshinori Sato <ysato@users.sourceforge.jp>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 fs/proc/task_mmu.c | 7 +++++--
 mm/ksm.c           | 3 ++-
 2 files changed, 7 insertions(+), 3 deletions(-)

diff --git a/fs/proc/task_mmu.c b/fs/proc/task_mmu.c
index b836fd61ed87..fe8f3265e877 100644
--- a/fs/proc/task_mmu.c
+++ b/fs/proc/task_mmu.c
@@ -16,9 +16,10 @@
 #include <linux/mmu_notifier.h>
 #include <linux/page_idle.h>
 #include <linux/shmem_fs.h>
+#include <linux/uaccess.h>
 
 #include <asm/elf.h>
-#include <linux/uaccess.h>
+#include <asm/tlb.h>
 #include <asm/tlbflush.h>
 #include "internal.h"
 
@@ -1008,6 +1009,7 @@ static ssize_t clear_refs_write(struct file *file, const char __user *buf,
 	struct mm_struct *mm;
 	struct vm_area_struct *vma;
 	enum clear_refs_types type;
+	struct mmu_gather tlb;
 	int itype;
 	int rv;
 
@@ -1054,6 +1056,7 @@ static ssize_t clear_refs_write(struct file *file, const char __user *buf,
 		}
 
 		down_read(&mm->mmap_sem);
+		tlb_gather_mmu(&tlb, mm, 0, -1);
 		if (type == CLEAR_REFS_SOFT_DIRTY) {
 			for (vma = mm->mmap; vma; vma = vma->vm_next) {
 				if (!(vma->vm_flags & VM_SOFTDIRTY))
@@ -1075,7 +1078,7 @@ static ssize_t clear_refs_write(struct file *file, const char __user *buf,
 		walk_page_range(0, mm->highest_vm_end, &clear_refs_walk);
 		if (type == CLEAR_REFS_SOFT_DIRTY)
 			mmu_notifier_invalidate_range_end(mm, 0, -1);
-		flush_tlb_mm(mm);
+		tlb_finish_mmu(&tlb, 0, -1);
 		up_read(&mm->mmap_sem);
 out_mm:
 		mmput(mm);
diff --git a/mm/ksm.c b/mm/ksm.c
index 4dc92f138786..db20f8436bc3 100644
--- a/mm/ksm.c
+++ b/mm/ksm.c
@@ -1038,7 +1038,8 @@ static int write_protect_page(struct vm_area_struct *vma, struct page *page,
 		goto out_unlock;
 
 	if (pte_write(*pvmw.pte) || pte_dirty(*pvmw.pte) ||
-	    (pte_protnone(*pvmw.pte) && pte_savedwrite(*pvmw.pte))) {
+	    (pte_protnone(*pvmw.pte) && pte_savedwrite(*pvmw.pte)) ||
+						mm_tlb_flush_pending(mm)) {
 		pte_t entry;
 
 		swapped = PageSwapCache(page);

From c0a6a5ae6b5d976592a83bdafc1c2f49b0718c65 Mon Sep 17 00:00:00 2001
From: "Michael S. Tsirkin" <mst@redhat.com>
Date: Thu, 10 Aug 2017 15:24:18 -0700
Subject: [PATCH 264/281] MAINTAINERS: copy virtio on balloon_compaction.c

Changes to mm/balloon_compaction.c can easily break virtio, and virtio
is the only user of that interface.  Add a line to MAINTAINERS so
whoever changes that file remembers to copy us.

Link: http://lkml.kernel.org/r/1501764010-24456-1-git-send-email-mst@redhat.com
Signed-off-by: Michael S. Tsirkin <mst@redhat.com>
Acked-by: Rafael Aquini <aquini@redhat.com>
Acked-by: Wei Wang <wei.w.wang@intel.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 MAINTAINERS | 1 +
 1 file changed, 1 insertion(+)

diff --git a/MAINTAINERS b/MAINTAINERS
index 84d6a8277cbd..6f7721d1634c 100644
--- a/MAINTAINERS
+++ b/MAINTAINERS
@@ -14004,6 +14004,7 @@ F:	drivers/block/virtio_blk.c
 F:	include/linux/virtio*.h
 F:	include/uapi/linux/virtio_*.h
 F:	drivers/crypto/virtio/
+F:	mm/balloon_compaction.c
 
 VIRTIO CRYPTO DRIVER
 M:	Gonglei <arei.gonglei@huawei.com>

From af54aed94bf3a1cf0b847bbbf00f9a58e278b338 Mon Sep 17 00:00:00 2001
From: Wei Wang <wei.w.wang@intel.com>
Date: Thu, 10 Aug 2017 15:24:21 -0700
Subject: [PATCH 265/281] mm/balloon_compaction.c: don't zero ballooned pages

Revert commit bb01b64cfab7 ("mm/balloon_compaction.c: enqueue zero page
to balloon device")'

Zeroing ballon pages is rather time consuming, especially when a lot of
pages are in flight. E.g. 7GB worth of ballooned memory takes 2.8s with
__GFP_ZERO while it takes ~491ms without it.

The original commit argued that zeroing will help ksmd to merge these
pages on the host but this argument is assuming that the host actually
marks balloon pages for ksm which is not universally true.  So we pay
performance penalty for something that even might not be used in the end
which is wrong.  The host can zero out pages on its own when there is a
need.

[mhocko@kernel.org: new changelog text]
Link: http://lkml.kernel.org/r/1501761557-9758-1-git-send-email-wei.w.wang@intel.com
Fixes: bb01b64cfab7 ("mm/balloon_compaction.c: enqueue zero page to balloon device")
Signed-off-by: Wei Wang <wei.w.wang@intel.com>
Acked-by: Michael S. Tsirkin <mst@redhat.com>
Acked-by: Michal Hocko <mhocko@suse.com>
Cc: zhenwei.pi <zhenwei.pi@youruncloud.com>
Cc: David Hildenbrand <david@redhat.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 mm/balloon_compaction.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/mm/balloon_compaction.c b/mm/balloon_compaction.c
index 9075aa54e955..b06d9fe23a28 100644
--- a/mm/balloon_compaction.c
+++ b/mm/balloon_compaction.c
@@ -24,7 +24,7 @@ struct page *balloon_page_enqueue(struct balloon_dev_info *b_dev_info)
 {
 	unsigned long flags;
 	struct page *page = alloc_page(balloon_mapping_gfp_mask() |
-				__GFP_NOMEMALLOC | __GFP_NORETRY | __GFP_ZERO);
+				       __GFP_NOMEMALLOC | __GFP_NORETRY);
 	if (!page)
 		return NULL;
 

From d041353dc98a6339182cd6f628b4c8f111278cb3 Mon Sep 17 00:00:00 2001
From: Cong Wang <xiyou.wangcong@gmail.com>
Date: Thu, 10 Aug 2017 15:24:24 -0700
Subject: [PATCH 266/281] mm: fix list corruptions on shmem shrinklist

We saw many list corruption warnings on shmem shrinklist:

  WARNING: CPU: 18 PID: 177 at lib/list_debug.c:59 __list_del_entry+0x9e/0xc0
  list_del corruption. prev->next should be ffff9ae5694b82d8, but was ffff9ae5699ba960
  Modules linked in: intel_rapl sb_edac edac_core x86_pkg_temp_thermal coretemp iTCO_wdt iTCO_vendor_support crct10dif_pclmul crc32_pclmul ghash_clmulni_intel raid0 dcdbas shpchp wmi hed i2c_i801 ioatdma lpc_ich i2c_smbus acpi_cpufreq tcp_diag inet_diag sch_fq_codel ipmi_si ipmi_devintf ipmi_msghandler igb ptp crc32c_intel pps_core i2c_algo_bit i2c_core dca ipv6 crc_ccitt
  CPU: 18 PID: 177 Comm: kswapd1 Not tainted 4.9.34-t3.el7.twitter.x86_64 #1
  Hardware name: Dell Inc. PowerEdge C6220/0W6W6G, BIOS 2.2.3 11/07/2013
  Call Trace:
    dump_stack+0x4d/0x66
    __warn+0xcb/0xf0
    warn_slowpath_fmt+0x4f/0x60
    __list_del_entry+0x9e/0xc0
    shmem_unused_huge_shrink+0xfa/0x2e0
    shmem_unused_huge_scan+0x20/0x30
    super_cache_scan+0x193/0x1a0
    shrink_slab.part.41+0x1e3/0x3f0
    shrink_slab+0x29/0x30
    shrink_node+0xf9/0x2f0
    kswapd+0x2d8/0x6c0
    kthread+0xd7/0xf0
    ret_from_fork+0x22/0x30

  WARNING: CPU: 23 PID: 639 at lib/list_debug.c:33 __list_add+0x89/0xb0
  list_add corruption. prev->next should be next (ffff9ae5699ba960), but was ffff9ae5694b82d8. (prev=ffff9ae5694b82d8).
  Modules linked in: intel_rapl sb_edac edac_core x86_pkg_temp_thermal coretemp iTCO_wdt iTCO_vendor_support crct10dif_pclmul crc32_pclmul ghash_clmulni_intel raid0 dcdbas shpchp wmi hed i2c_i801 ioatdma lpc_ich i2c_smbus acpi_cpufreq tcp_diag inet_diag sch_fq_codel ipmi_si ipmi_devintf ipmi_msghandler igb ptp crc32c_intel pps_core i2c_algo_bit i2c_core dca ipv6 crc_ccitt
  CPU: 23 PID: 639 Comm: systemd-udevd Tainted: G        W       4.9.34-t3.el7.twitter.x86_64 #1
  Hardware name: Dell Inc. PowerEdge C6220/0W6W6G, BIOS 2.2.3 11/07/2013
  Call Trace:
    dump_stack+0x4d/0x66
    __warn+0xcb/0xf0
    warn_slowpath_fmt+0x4f/0x60
    __list_add+0x89/0xb0
    shmem_setattr+0x204/0x230
    notify_change+0x2ef/0x440
    do_truncate+0x5d/0x90
    path_openat+0x331/0x1190
    do_filp_open+0x7e/0xe0
    do_sys_open+0x123/0x200
    SyS_open+0x1e/0x20
    do_syscall_64+0x61/0x170
    entry_SYSCALL64_slow_path+0x25/0x25

The problem is that shmem_unused_huge_shrink() moves entries from the
global sbinfo->shrinklist to its local lists and then releases the
spinlock.  However, a parallel shmem_setattr() could access one of these
entries directly and add it back to the global shrinklist if it is
removed, with the spinlock held.

The logic itself looks solid since an entry could be either in a local
list or the global list, otherwise it is removed from one of them by
list_del_init().  So probably the race condition is that, one CPU is in
the middle of INIT_LIST_HEAD() but the other CPU calls list_empty()
which returns true too early then the following list_add_tail() sees a
corrupted entry.

list_empty_careful() is designed to fix this situation.

[akpm@linux-foundation.org: add comments]
Link: http://lkml.kernel.org/r/20170803054630.18775-1-xiyou.wangcong@gmail.com
Fixes: 779750d20b93 ("shmem: split huge pages beyond i_size under memory pressure")
Signed-off-by: Cong Wang <xiyou.wangcong@gmail.com>
Acked-by: Linus Torvalds <torvalds@linux-foundation.org>
Acked-by: Kirill A. Shutemov <kirill.shutemov@linux.intel.com>
Cc: Hugh Dickins <hughd@google.com>
Cc: <stable@vger.kernel.org>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 mm/shmem.c | 12 ++++++++++--
 1 file changed, 10 insertions(+), 2 deletions(-)

diff --git a/mm/shmem.c b/mm/shmem.c
index b0aa6075d164..6540e5982444 100644
--- a/mm/shmem.c
+++ b/mm/shmem.c
@@ -1022,7 +1022,11 @@ static int shmem_setattr(struct dentry *dentry, struct iattr *attr)
 			 */
 			if (IS_ENABLED(CONFIG_TRANSPARENT_HUGE_PAGECACHE)) {
 				spin_lock(&sbinfo->shrinklist_lock);
-				if (list_empty(&info->shrinklist)) {
+				/*
+				 * _careful to defend against unlocked access to
+				 * ->shrink_list in shmem_unused_huge_shrink()
+				 */
+				if (list_empty_careful(&info->shrinklist)) {
 					list_add_tail(&info->shrinklist,
 							&sbinfo->shrinklist);
 					sbinfo->shrinklist_len++;
@@ -1817,7 +1821,11 @@ alloc_nohuge:		page = shmem_alloc_and_acct_page(gfp, info, sbinfo,
 			 * to shrink under memory pressure.
 			 */
 			spin_lock(&sbinfo->shrinklist_lock);
-			if (list_empty(&info->shrinklist)) {
+			/*
+			 * _careful to defend against unlocked access to
+			 * ->shrink_list in shmem_unused_huge_shrink()
+			 */
+			if (list_empty_careful(&info->shrinklist)) {
 				list_add_tail(&info->shrinklist,
 						&sbinfo->shrinklist);
 				sbinfo->shrinklist_len++;

From aac2fea94f7a3df8ad1eeb477eb2643f81fd5393 Mon Sep 17 00:00:00 2001
From: "Kirill A. Shutemov" <kirill.shutemov@linux.intel.com>
Date: Thu, 10 Aug 2017 15:24:27 -0700
Subject: [PATCH 267/281] rmap: do not call mmu_notifier_invalidate_page()
 under ptl
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

MMU notifiers can sleep, but in page_mkclean_one() we call
mmu_notifier_invalidate_page() under page table lock.

Let's instead use mmu_notifier_invalidate_range() outside
page_vma_mapped_walk() loop.

[jglisse@redhat.com: try_to_unmap_one() do not call mmu_notifier under ptl]
  Link: http://lkml.kernel.org/r/20170809204333.27485-1-jglisse@redhat.com
Link: http://lkml.kernel.org/r/20170804134928.l4klfcnqatni7vsc@black.fi.intel.com
Fixes: c7ab0d2fdc84 ("mm: convert try_to_unmap_one() to use page_vma_mapped_walk()")
Signed-off-by: Kirill A. Shutemov <kirill.shutemov@linux.intel.com>
Signed-off-by: Jérôme Glisse <jglisse@redhat.com>
Reported-by: axie <axie@amd.com>
Cc: Alex Deucher <alexander.deucher@amd.com>
Cc: "Writer, Tim" <Tim.Writer@amd.com>
Cc: <stable@vger.kernel.org>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 mm/rmap.c | 52 ++++++++++++++++++++++++++++++----------------------
 1 file changed, 30 insertions(+), 22 deletions(-)

diff --git a/mm/rmap.c b/mm/rmap.c
index c8993c63eb25..c1286d47aa1f 100644
--- a/mm/rmap.c
+++ b/mm/rmap.c
@@ -888,10 +888,10 @@ static bool page_mkclean_one(struct page *page, struct vm_area_struct *vma,
 		.flags = PVMW_SYNC,
 	};
 	int *cleaned = arg;
+	bool invalidation_needed = false;
 
 	while (page_vma_mapped_walk(&pvmw)) {
 		int ret = 0;
-		address = pvmw.address;
 		if (pvmw.pte) {
 			pte_t entry;
 			pte_t *pte = pvmw.pte;
@@ -899,11 +899,11 @@ static bool page_mkclean_one(struct page *page, struct vm_area_struct *vma,
 			if (!pte_dirty(*pte) && !pte_write(*pte))
 				continue;
 
-			flush_cache_page(vma, address, pte_pfn(*pte));
-			entry = ptep_clear_flush(vma, address, pte);
+			flush_cache_page(vma, pvmw.address, pte_pfn(*pte));
+			entry = ptep_clear_flush(vma, pvmw.address, pte);
 			entry = pte_wrprotect(entry);
 			entry = pte_mkclean(entry);
-			set_pte_at(vma->vm_mm, address, pte, entry);
+			set_pte_at(vma->vm_mm, pvmw.address, pte, entry);
 			ret = 1;
 		} else {
 #ifdef CONFIG_TRANSPARENT_HUGE_PAGECACHE
@@ -913,11 +913,11 @@ static bool page_mkclean_one(struct page *page, struct vm_area_struct *vma,
 			if (!pmd_dirty(*pmd) && !pmd_write(*pmd))
 				continue;
 
-			flush_cache_page(vma, address, page_to_pfn(page));
-			entry = pmdp_huge_clear_flush(vma, address, pmd);
+			flush_cache_page(vma, pvmw.address, page_to_pfn(page));
+			entry = pmdp_huge_clear_flush(vma, pvmw.address, pmd);
 			entry = pmd_wrprotect(entry);
 			entry = pmd_mkclean(entry);
-			set_pmd_at(vma->vm_mm, address, pmd, entry);
+			set_pmd_at(vma->vm_mm, pvmw.address, pmd, entry);
 			ret = 1;
 #else
 			/* unexpected pmd-mapped page? */
@@ -926,11 +926,16 @@ static bool page_mkclean_one(struct page *page, struct vm_area_struct *vma,
 		}
 
 		if (ret) {
-			mmu_notifier_invalidate_page(vma->vm_mm, address);
 			(*cleaned)++;
+			invalidation_needed = true;
 		}
 	}
 
+	if (invalidation_needed) {
+		mmu_notifier_invalidate_range(vma->vm_mm, address,
+				address + (1UL << compound_order(page)));
+	}
+
 	return true;
 }
 
@@ -1323,7 +1328,7 @@ static bool try_to_unmap_one(struct page *page, struct vm_area_struct *vma,
 	};
 	pte_t pteval;
 	struct page *subpage;
-	bool ret = true;
+	bool ret = true, invalidation_needed = false;
 	enum ttu_flags flags = (enum ttu_flags)arg;
 
 	/* munlock has nothing to gain from examining un-locked vmas */
@@ -1363,11 +1368,9 @@ static bool try_to_unmap_one(struct page *page, struct vm_area_struct *vma,
 		VM_BUG_ON_PAGE(!pvmw.pte, page);
 
 		subpage = page - page_to_pfn(page) + pte_pfn(*pvmw.pte);
-		address = pvmw.address;
-
 
 		if (!(flags & TTU_IGNORE_ACCESS)) {
-			if (ptep_clear_flush_young_notify(vma, address,
+			if (ptep_clear_flush_young_notify(vma, pvmw.address,
 						pvmw.pte)) {
 				ret = false;
 				page_vma_mapped_walk_done(&pvmw);
@@ -1376,7 +1379,7 @@ static bool try_to_unmap_one(struct page *page, struct vm_area_struct *vma,
 		}
 
 		/* Nuke the page table entry. */
-		flush_cache_page(vma, address, pte_pfn(*pvmw.pte));
+		flush_cache_page(vma, pvmw.address, pte_pfn(*pvmw.pte));
 		if (should_defer_flush(mm, flags)) {
 			/*
 			 * We clear the PTE but do not flush so potentially
@@ -1386,11 +1389,12 @@ static bool try_to_unmap_one(struct page *page, struct vm_area_struct *vma,
 			 * transition on a cached TLB entry is written through
 			 * and traps if the PTE is unmapped.
 			 */
-			pteval = ptep_get_and_clear(mm, address, pvmw.pte);
+			pteval = ptep_get_and_clear(mm, pvmw.address,
+						    pvmw.pte);
 
 			set_tlb_ubc_flush_pending(mm, pte_dirty(pteval));
 		} else {
-			pteval = ptep_clear_flush(vma, address, pvmw.pte);
+			pteval = ptep_clear_flush(vma, pvmw.address, pvmw.pte);
 		}
 
 		/* Move the dirty bit to the page. Now the pte is gone. */
@@ -1405,12 +1409,12 @@ static bool try_to_unmap_one(struct page *page, struct vm_area_struct *vma,
 			if (PageHuge(page)) {
 				int nr = 1 << compound_order(page);
 				hugetlb_count_sub(nr, mm);
-				set_huge_swap_pte_at(mm, address,
+				set_huge_swap_pte_at(mm, pvmw.address,
 						     pvmw.pte, pteval,
 						     vma_mmu_pagesize(vma));
 			} else {
 				dec_mm_counter(mm, mm_counter(page));
-				set_pte_at(mm, address, pvmw.pte, pteval);
+				set_pte_at(mm, pvmw.address, pvmw.pte, pteval);
 			}
 
 		} else if (pte_unused(pteval)) {
@@ -1434,7 +1438,7 @@ static bool try_to_unmap_one(struct page *page, struct vm_area_struct *vma,
 			swp_pte = swp_entry_to_pte(entry);
 			if (pte_soft_dirty(pteval))
 				swp_pte = pte_swp_mksoft_dirty(swp_pte);
-			set_pte_at(mm, address, pvmw.pte, swp_pte);
+			set_pte_at(mm, pvmw.address, pvmw.pte, swp_pte);
 		} else if (PageAnon(page)) {
 			swp_entry_t entry = { .val = page_private(subpage) };
 			pte_t swp_pte;
@@ -1460,7 +1464,7 @@ static bool try_to_unmap_one(struct page *page, struct vm_area_struct *vma,
 				 * If the page was redirtied, it cannot be
 				 * discarded. Remap the page to page table.
 				 */
-				set_pte_at(mm, address, pvmw.pte, pteval);
+				set_pte_at(mm, pvmw.address, pvmw.pte, pteval);
 				SetPageSwapBacked(page);
 				ret = false;
 				page_vma_mapped_walk_done(&pvmw);
@@ -1468,7 +1472,7 @@ static bool try_to_unmap_one(struct page *page, struct vm_area_struct *vma,
 			}
 
 			if (swap_duplicate(entry) < 0) {
-				set_pte_at(mm, address, pvmw.pte, pteval);
+				set_pte_at(mm, pvmw.address, pvmw.pte, pteval);
 				ret = false;
 				page_vma_mapped_walk_done(&pvmw);
 				break;
@@ -1484,14 +1488,18 @@ static bool try_to_unmap_one(struct page *page, struct vm_area_struct *vma,
 			swp_pte = swp_entry_to_pte(entry);
 			if (pte_soft_dirty(pteval))
 				swp_pte = pte_swp_mksoft_dirty(swp_pte);
-			set_pte_at(mm, address, pvmw.pte, swp_pte);
+			set_pte_at(mm, pvmw.address, pvmw.pte, swp_pte);
 		} else
 			dec_mm_counter(mm, mm_counter_file(page));
 discard:
 		page_remove_rmap(subpage, PageHuge(page));
 		put_page(page);
-		mmu_notifier_invalidate_page(mm, address);
+		invalidation_needed = true;
 	}
+
+	if (invalidation_needed)
+		mmu_notifier_invalidate_range(mm, address,
+				address + (1UL << compound_order(page)));
 	return ret;
 }
 

From f357e345eef7863da037e0243f2d3df4ba6df986 Mon Sep 17 00:00:00 2001
From: Matthias Kaehlcke <mka@chromium.org>
Date: Thu, 10 Aug 2017 15:24:29 -0700
Subject: [PATCH 268/281] zram: rework copy of compressor name in
 comp_algorithm_store()

comp_algorithm_store() passes the size of the source buffer to strlcpy()
instead of the destination buffer size.  Make it explicit that the two
buffers have the same size and use strcpy() instead of strlcpy().  The
latter can be done safely since the function ensures that the string in
the source buffer is terminated.

Link: http://lkml.kernel.org/r/20170803163350.45245-1-mka@chromium.org
Signed-off-by: Matthias Kaehlcke <mka@chromium.org>
Reviewed-by: Douglas Anderson <dianders@chromium.org>
Reviewed-by: Sergey Senozhatsky <sergey.senozhatsky@gmail.com>
Acked-by: Minchan Kim <minchan@kernel.org>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 drivers/block/zram/zram_drv.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/drivers/block/zram/zram_drv.c b/drivers/block/zram/zram_drv.c
index 856d5dc02451..3b1b6340ba13 100644
--- a/drivers/block/zram/zram_drv.c
+++ b/drivers/block/zram/zram_drv.c
@@ -308,7 +308,7 @@ static ssize_t comp_algorithm_store(struct device *dev,
 		struct device_attribute *attr, const char *buf, size_t len)
 {
 	struct zram *zram = dev_to_zram(dev);
-	char compressor[CRYPTO_MAX_ALG_NAME];
+	char compressor[ARRAY_SIZE(zram->compressor)];
 	size_t sz;
 
 	strlcpy(compressor, buf, sizeof(compressor));
@@ -327,7 +327,7 @@ static ssize_t comp_algorithm_store(struct device *dev,
 		return -EBUSY;
 	}
 
-	strlcpy(zram->compressor, compressor, sizeof(compressor));
+	strcpy(zram->compressor, compressor);
 	up_write(&zram->init_lock);
 	return len;
 }

From e86b298bebf7e799e4b7232e9135799f1947552e Mon Sep 17 00:00:00 2001
From: Mike Rapoport <rppt@linux.vnet.ibm.com>
Date: Thu, 10 Aug 2017 15:24:32 -0700
Subject: [PATCH 269/281] userfaultfd: replace ENOSPC with ESRCH in case mm has
 gone during copy/zeropage

When the process exit races with outstanding mcopy_atomic, it would be
better to return ESRCH error.  When such race occurs the process and
it's mm are going away and returning "no such process" to the uffd
monitor seems better fit than ENOSPC.

Link: http://lkml.kernel.org/r/1502111545-32305-1-git-send-email-rppt@linux.vnet.ibm.com
Signed-off-by: Mike Rapoport <rppt@linux.vnet.ibm.com>
Suggested-by: Michal Hocko <mhocko@suse.com>
Acked-by: Michal Hocko <mhocko@suse.com>
Cc: Andrea Arcangeli <aarcange@redhat.com>
Cc: "Dr. David Alan Gilbert" <dgilbert@redhat.com>
Cc: Pavel Emelyanov <xemul@virtuozzo.com>
Cc: Mike Kravetz <mike.kravetz@oracle.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 fs/userfaultfd.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/fs/userfaultfd.c b/fs/userfaultfd.c
index 06ea26b8c996..b0d5897bc4e6 100644
--- a/fs/userfaultfd.c
+++ b/fs/userfaultfd.c
@@ -1600,7 +1600,7 @@ static int userfaultfd_copy(struct userfaultfd_ctx *ctx,
 				   uffdio_copy.len);
 		mmput(ctx->mm);
 	} else {
-		return -ENOSPC;
+		return -ESRCH;
 	}
 	if (unlikely(put_user(ret, &user_uffdio_copy->copy)))
 		return -EFAULT;
@@ -1647,7 +1647,7 @@ static int userfaultfd_zeropage(struct userfaultfd_ctx *ctx,
 				     uffdio_zeropage.range.len);
 		mmput(ctx->mm);
 	} else {
-		return -ENOSPC;
+		return -ESRCH;
 	}
 	if (unlikely(put_user(ret, &user_uffdio_zeropage->zeropage)))
 		return -EFAULT;

From 9183976ef1c858c289b09066fd57aae51b86653c Mon Sep 17 00:00:00 2001
From: Jeff Layton <jlayton@redhat.com>
Date: Thu, 25 May 2017 06:57:50 -0400
Subject: [PATCH 270/281] fuse: set mapping error in writepage_locked when it
 fails

This ensures that we see errors on fsync when writeback fails.

Signed-off-by: Jeff Layton <jlayton@redhat.com>
Reviewed-by: Christoph Hellwig <hch@lst.de>
Reviewed-by: Jan Kara <jack@suse.cz>
Signed-off-by: Miklos Szeredi <mszeredi@redhat.com>
---
 fs/fuse/file.c | 1 +
 1 file changed, 1 insertion(+)

diff --git a/fs/fuse/file.c b/fs/fuse/file.c
index 810ed4f99e38..ab60051be6e5 100644
--- a/fs/fuse/file.c
+++ b/fs/fuse/file.c
@@ -1669,6 +1669,7 @@ err_nofile:
 err_free:
 	fuse_request_free(req);
 err:
+	mapping_set_error(page->mapping, error);
 	end_page_writeback(page);
 	return error;
 }

From c138d81163d82db044dcaf1141395713f03bf0bf Mon Sep 17 00:00:00 2001
From: Juergen Gross <jgross@suse.com>
Date: Fri, 28 Jul 2017 12:23:12 +0200
Subject: [PATCH 271/281] x86: provide an init_mem_mapping hypervisor hook

Provide a hook in hypervisor_x86 called after setting up initial
memory mapping.

This is needed e.g. by Xen HVM guests to map the hypervisor shared
info page.

Signed-off-by: Juergen Gross <jgross@suse.com>
Reviewed-by: Boris Ostrovsky <boris.ostrovsky@oracle.com>
Acked-by: Ingo Molnar <mingo@kernel.org>
Signed-off-by: Juergen Gross <jgross@suse.com>
---
 arch/x86/include/asm/hypervisor.h | 10 ++++++++++
 arch/x86/mm/init.c                |  3 +++
 2 files changed, 13 insertions(+)

diff --git a/arch/x86/include/asm/hypervisor.h b/arch/x86/include/asm/hypervisor.h
index 21126155a739..0ead9dbb9130 100644
--- a/arch/x86/include/asm/hypervisor.h
+++ b/arch/x86/include/asm/hypervisor.h
@@ -43,6 +43,9 @@ struct hypervisor_x86 {
 
 	/* pin current vcpu to specified physical cpu (run rarely) */
 	void		(*pin_vcpu)(int);
+
+	/* called during init_mem_mapping() to setup early mappings. */
+	void		(*init_mem_mapping)(void);
 };
 
 extern const struct hypervisor_x86 *x86_hyper;
@@ -57,8 +60,15 @@ extern const struct hypervisor_x86 x86_hyper_kvm;
 extern void init_hypervisor_platform(void);
 extern bool hypervisor_x2apic_available(void);
 extern void hypervisor_pin_vcpu(int cpu);
+
+static inline void hypervisor_init_mem_mapping(void)
+{
+	if (x86_hyper && x86_hyper->init_mem_mapping)
+		x86_hyper->init_mem_mapping();
+}
 #else
 static inline void init_hypervisor_platform(void) { }
 static inline bool hypervisor_x2apic_available(void) { return false; }
+static inline void hypervisor_init_mem_mapping(void) { }
 #endif /* CONFIG_HYPERVISOR_GUEST */
 #endif /* _ASM_X86_HYPERVISOR_H */
diff --git a/arch/x86/mm/init.c b/arch/x86/mm/init.c
index 673541eb3b3f..bf3f1065d6ad 100644
--- a/arch/x86/mm/init.c
+++ b/arch/x86/mm/init.c
@@ -18,6 +18,7 @@
 #include <asm/dma.h>		/* for MAX_DMA_PFN */
 #include <asm/microcode.h>
 #include <asm/kaslr.h>
+#include <asm/hypervisor.h>
 
 /*
  * We need to define the tracepoints somewhere, and tlb.c
@@ -636,6 +637,8 @@ void __init init_mem_mapping(void)
 	load_cr3(swapper_pg_dir);
 	__flush_tlb_all();
 
+	hypervisor_init_mem_mapping();
+
 	early_memtest(0, max_pfn_mapped << PAGE_SHIFT);
 }
 

From 10231f69eb039550864ff3eb395da0c63c03ed5f Mon Sep 17 00:00:00 2001
From: Juergen Gross <jgross@suse.com>
Date: Fri, 28 Jul 2017 12:23:13 +0200
Subject: [PATCH 272/281] xen: split up xen_hvm_init_shared_info()

Instead of calling xen_hvm_init_shared_info() on boot and resume split
it up into a boot time function searching for the pfn to use and a
mapping function doing the hypervisor mapping call.

Signed-off-by: Juergen Gross <jgross@suse.com>
Reviewed-by: Boris Ostrovsky <boris.ostrovsky@oracle.com>
Acked-by: Ingo Molnar <mingo@kernel.org>
Signed-off-by: Juergen Gross <jgross@suse.com>
---
 arch/x86/xen/enlighten_hvm.c | 45 +++++++++++++++++++-----------------
 1 file changed, 24 insertions(+), 21 deletions(-)

diff --git a/arch/x86/xen/enlighten_hvm.c b/arch/x86/xen/enlighten_hvm.c
index 87d791356ea9..d23531f5f17e 100644
--- a/arch/x86/xen/enlighten_hvm.c
+++ b/arch/x86/xen/enlighten_hvm.c
@@ -21,29 +21,9 @@
 #include "mmu.h"
 #include "smp.h"
 
-void __ref xen_hvm_init_shared_info(void)
+void xen_hvm_init_shared_info(void)
 {
 	struct xen_add_to_physmap xatp;
-	u64 pa;
-
-	if (HYPERVISOR_shared_info == &xen_dummy_shared_info) {
-		/*
-		 * Search for a free page starting at 4kB physical address.
-		 * Low memory is preferred to avoid an EPT large page split up
-		 * by the mapping.
-		 * Starting below X86_RESERVE_LOW (usually 64kB) is fine as
-		 * the BIOS used for HVM guests is well behaved and won't
-		 * clobber memory other than the first 4kB.
-		 */
-		for (pa = PAGE_SIZE;
-		     !e820__mapped_all(pa, pa + PAGE_SIZE, E820_TYPE_RAM) ||
-		     memblock_is_reserved(pa);
-		     pa += PAGE_SIZE)
-			;
-
-		memblock_reserve(pa, PAGE_SIZE);
-		HYPERVISOR_shared_info = __va(pa);
-	}
 
 	xatp.domid = DOMID_SELF;
 	xatp.idx = 0;
@@ -53,6 +33,28 @@ void __ref xen_hvm_init_shared_info(void)
 		BUG();
 }
 
+static void __init reserve_shared_info(void)
+{
+	u64 pa;
+
+	/*
+	 * Search for a free page starting at 4kB physical address.
+	 * Low memory is preferred to avoid an EPT large page split up
+	 * by the mapping.
+	 * Starting below X86_RESERVE_LOW (usually 64kB) is fine as
+	 * the BIOS used for HVM guests is well behaved and won't
+	 * clobber memory other than the first 4kB.
+	 */
+	for (pa = PAGE_SIZE;
+	     !e820__mapped_all(pa, pa + PAGE_SIZE, E820_TYPE_RAM) ||
+	     memblock_is_reserved(pa);
+	     pa += PAGE_SIZE)
+		;
+
+	memblock_reserve(pa, PAGE_SIZE);
+	HYPERVISOR_shared_info = __va(pa);
+}
+
 static void __init init_hvm_pv_info(void)
 {
 	int major, minor;
@@ -153,6 +155,7 @@ static void __init xen_hvm_guest_init(void)
 
 	init_hvm_pv_info();
 
+	reserve_shared_info();
 	xen_hvm_init_shared_info();
 
 	/*

From 4ca83dcf4e3bc0c98836dbb97553792ca7ea5429 Mon Sep 17 00:00:00 2001
From: Juergen Gross <jgross@suse.com>
Date: Fri, 28 Jul 2017 12:23:14 +0200
Subject: [PATCH 273/281] xen: fix hvm guest with kaslr enabled

A Xen HVM guest running with KASLR enabled will die rather soon today
because the shared info page mapping is using va() too early. This was
introduced by commit a5d5f328b0e2baa5ee7c119fd66324eb79eeeb66 ("xen:
allocate page for shared info page from low memory").

In order to fix this use early_memremap() to get a temporary virtual
address for shared info until va() can be used safely.

Signed-off-by: Juergen Gross <jgross@suse.com>
Reviewed-by: Boris Ostrovsky <boris.ostrovsky@oracle.com>
Acked-by: Ingo Molnar <mingo@kernel.org>
Signed-off-by: Juergen Gross <jgross@suse.com>
---
 arch/x86/xen/enlighten_hvm.c | 16 ++++++++++++++--
 1 file changed, 14 insertions(+), 2 deletions(-)

diff --git a/arch/x86/xen/enlighten_hvm.c b/arch/x86/xen/enlighten_hvm.c
index d23531f5f17e..de503c225ae1 100644
--- a/arch/x86/xen/enlighten_hvm.c
+++ b/arch/x86/xen/enlighten_hvm.c
@@ -12,6 +12,7 @@
 #include <asm/setup.h>
 #include <asm/hypervisor.h>
 #include <asm/e820/api.h>
+#include <asm/early_ioremap.h>
 
 #include <asm/xen/cpuid.h>
 #include <asm/xen/hypervisor.h>
@@ -21,6 +22,8 @@
 #include "mmu.h"
 #include "smp.h"
 
+static unsigned long shared_info_pfn;
+
 void xen_hvm_init_shared_info(void)
 {
 	struct xen_add_to_physmap xatp;
@@ -28,7 +31,7 @@ void xen_hvm_init_shared_info(void)
 	xatp.domid = DOMID_SELF;
 	xatp.idx = 0;
 	xatp.space = XENMAPSPACE_shared_info;
-	xatp.gpfn = virt_to_pfn(HYPERVISOR_shared_info);
+	xatp.gpfn = shared_info_pfn;
 	if (HYPERVISOR_memory_op(XENMEM_add_to_physmap, &xatp))
 		BUG();
 }
@@ -51,8 +54,16 @@ static void __init reserve_shared_info(void)
 	     pa += PAGE_SIZE)
 		;
 
+	shared_info_pfn = PHYS_PFN(pa);
+
 	memblock_reserve(pa, PAGE_SIZE);
-	HYPERVISOR_shared_info = __va(pa);
+	HYPERVISOR_shared_info = early_memremap(pa, PAGE_SIZE);
+}
+
+static void __init xen_hvm_init_mem_mapping(void)
+{
+	early_memunmap(HYPERVISOR_shared_info, PAGE_SIZE);
+	HYPERVISOR_shared_info = __va(PFN_PHYS(shared_info_pfn));
 }
 
 static void __init init_hvm_pv_info(void)
@@ -221,5 +232,6 @@ const struct hypervisor_x86 x86_hyper_xen_hvm = {
 	.init_platform          = xen_hvm_guest_init,
 	.pin_vcpu               = xen_pin_vcpu,
 	.x2apic_available       = xen_x2apic_para_available,
+	.init_mem_mapping	= xen_hvm_init_mem_mapping,
 };
 EXPORT_SYMBOL(x86_hyper_xen_hvm);

From 529871bb3c0675d0b425e2070d5a739db097be98 Mon Sep 17 00:00:00 2001
From: Juergen Gross <jgross@suse.com>
Date: Fri, 28 Jul 2017 16:53:55 +0200
Subject: [PATCH 274/281] xen: avoid deadlock in xenbus

When starting the xenwatch thread a theoretical deadlock situation is
possible:

xs_init() contains:

    task = kthread_run(xenwatch_thread, NULL, "xenwatch");
    if (IS_ERR(task))
        return PTR_ERR(task);
    xenwatch_pid = task->pid;

And xenwatch_thread() does:

    mutex_lock(&xenwatch_mutex);
    ...
    event->handle->callback();
    ...
    mutex_unlock(&xenwatch_mutex);

The callback could call unregister_xenbus_watch() which does:

    ...
    if (current->pid != xenwatch_pid)
        mutex_lock(&xenwatch_mutex);
    ...

In case a watch is firing before xenwatch_pid could be set and the
callback of that watch unregisters a watch, then a self-deadlock would
occur.

Avoid this by setting xenwatch_pid in xenwatch_thread().

Signed-off-by: Juergen Gross <jgross@suse.com>
Reviewed-by: Boris Ostrovsky <boris.ostrovsky@oracle.com>
Signed-off-by: Juergen Gross <jgross@suse.com>
---
 drivers/xen/xenbus/xenbus_xs.c | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/drivers/xen/xenbus/xenbus_xs.c b/drivers/xen/xenbus/xenbus_xs.c
index e46080214955..3e59590c7254 100644
--- a/drivers/xen/xenbus/xenbus_xs.c
+++ b/drivers/xen/xenbus/xenbus_xs.c
@@ -857,6 +857,8 @@ static int xenwatch_thread(void *unused)
 	struct list_head *ent;
 	struct xs_watch_event *event;
 
+	xenwatch_pid = current->pid;
+
 	for (;;) {
 		wait_event_interruptible(watch_events_waitq,
 					 !list_empty(&watch_events));
@@ -925,7 +927,6 @@ int xs_init(void)
 	task = kthread_run(xenwatch_thread, NULL, "xenwatch");
 	if (IS_ERR(task))
 		return PTR_ERR(task);
-	xenwatch_pid = task->pid;
 
 	/* shutdown watches for kexec boot */
 	xs_reset_watches();

From 020db9d3c1dc0aab9ab1252f4a36b6d8456b8794 Mon Sep 17 00:00:00 2001
From: Liu Shuo <shuo.a.liu@intel.com>
Date: Sun, 30 Jul 2017 00:59:57 +0800
Subject: [PATCH 275/281] xen/events: Fix interrupt lost during irq_disable and
 irq_enable

Here is a device has xen-pirq-MSI interrupt. Dom0 might lost interrupt
during driver irq_disable/irq_enable. Here is the scenario,
 1. irq_disable -> disable_dynirq -> mask_evtchn(irq channel)
 2. dev interrupt raised by HW and Xen mark its evtchn as pending
 3. irq_enable -> startup_pirq -> eoi_pirq ->
    clear_evtchn(channel of irq) -> clear pending status
 4. consume_one_event process the irq event without pending bit assert
    which result in interrupt lost once
 5. No HW interrupt raising anymore.

Now use enable_dynirq for enable_pirq of xen_pirq_chip to remove
eoi_pirq when irq_enable.

Signed-off-by: Liu Shuo <shuo.a.liu@intel.com>
Reviewed-by: Boris Ostrovsky <boris.ostrovsky@oracle.com>
Signed-off-by: Juergen Gross <jgross@suse.com>
---
 drivers/xen/events/events_base.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/xen/events/events_base.c b/drivers/xen/events/events_base.c
index bae1f5d36c26..2d43118077e4 100644
--- a/drivers/xen/events/events_base.c
+++ b/drivers/xen/events/events_base.c
@@ -574,7 +574,7 @@ static void shutdown_pirq(struct irq_data *data)
 
 static void enable_pirq(struct irq_data *data)
 {
-	startup_pirq(data);
+	enable_dynirq(data);
 }
 
 static void disable_pirq(struct irq_data *data)

From a7990c647b35415e3dd07a077480a908678947ba Mon Sep 17 00:00:00 2001
From: Artem Savkov <asavkov@redhat.com>
Date: Tue, 8 Aug 2017 12:26:02 +0200
Subject: [PATCH 276/281] iommu/arm-smmu: fix null-pointer dereference in
 arm_smmu_add_device

Commit c54451a "iommu/arm-smmu: Fix the error path in arm_smmu_add_device"
removed fwspec assignment in legacy_binding path as redundant which is
wrong. It needs to be updated after fwspec initialisation in
arm_smmu_register_legacy_master() as it is dereferenced later. Without
this there is a NULL-pointer dereference panic during boot on some hosts.

Signed-off-by: Artem Savkov <asavkov@redhat.com>
Reviewed-by: Robin Murphy <robin.murphy@arm.com>
Acked-by: Will Deacon <will.deacon@arm.com>
Signed-off-by: Joerg Roedel <jroedel@suse.de>
---
 drivers/iommu/arm-smmu.c | 7 +++++++
 1 file changed, 7 insertions(+)

diff --git a/drivers/iommu/arm-smmu.c b/drivers/iommu/arm-smmu.c
index b97188acc4f1..2d80fa8a0634 100644
--- a/drivers/iommu/arm-smmu.c
+++ b/drivers/iommu/arm-smmu.c
@@ -1519,6 +1519,13 @@ static int arm_smmu_add_device(struct device *dev)
 
 	if (using_legacy_binding) {
 		ret = arm_smmu_register_legacy_master(dev, &smmu);
+
+		/*
+		 * If dev->iommu_fwspec is initally NULL, arm_smmu_register_legacy_master()
+		 * will allocate/initialise a new one. Thus we need to update fwspec for
+		 * later use.
+		 */
+		fwspec = dev->iommu_fwspec;
 		if (ret)
 			goto out_free;
 	} else if (fwspec && fwspec->ops == &arm_smmu_ops) {

From 8a9d6e964d318533ba3d2901ce153ba317c99a89 Mon Sep 17 00:00:00 2001
From: Christoph Hellwig <hch@lst.de>
Date: Sat, 5 Aug 2017 10:59:14 +0200
Subject: [PATCH 277/281] pnfs/blocklayout: require 64-bit sector_t

The blocklayout code does not compile cleanly for a 32-bit sector_t,
and also has no reliable checks for devices sizes, which makes it
unsafe to use with a kernel that doesn't support large block devices.

Signed-off-by: Christoph Hellwig <hch@lst.de>
Reported-by: Arnd Bergmann <arnd@arndb.de>
Fixes: 5c83746a0cf2 ("pnfs/blocklayout: in-kernel GETDEVICEINFO XDR parsing")
Signed-off-by: Anna Schumaker <Anna.Schumaker@Netapp.com>
---
 fs/nfs/Kconfig | 1 +
 1 file changed, 1 insertion(+)

diff --git a/fs/nfs/Kconfig b/fs/nfs/Kconfig
index 69d02cf8cf37..5f93cfacb3d1 100644
--- a/fs/nfs/Kconfig
+++ b/fs/nfs/Kconfig
@@ -121,6 +121,7 @@ config PNFS_FILE_LAYOUT
 config PNFS_BLOCK
 	tristate
 	depends on NFS_V4_1 && BLK_DEV_DM
+	depends on 64BIT || LBDAF
 	default NFS_V4
 
 config PNFS_FLEXFILE_LAYOUT

From 9a51544774a57fcb94994a61860a17f9e63a8d7b Mon Sep 17 00:00:00 2001
From: Abhishek Sahu <absahu@codeaurora.org>
Date: Wed, 2 Aug 2017 18:03:05 +0530
Subject: [PATCH 278/281] mtd: blkdevs: Fix mtd block write failure

All the MTD block write requests are failing with
following error messages

    mkfs.ext4  /dev/mtdblock0

    print_req_error: I/O error, dev mtdblock0, sector 0
    Buffer I/O error on dev mtdblock0, logical block 0,
    lost async page write

The control is going to default case after block write request
because of missing return.

Fixes: commit 2a842acab109 ("block: introduce new block status code type")
Signed-off-by: Abhishek Sahu <absahu@codeaurora.org>
Signed-off-by: Brian Norris <computersforpeace@gmail.com>
---
 drivers/mtd/mtd_blkdevs.c | 1 +
 1 file changed, 1 insertion(+)

diff --git a/drivers/mtd/mtd_blkdevs.c b/drivers/mtd/mtd_blkdevs.c
index f336a9b85576..9ec8f033ac5f 100644
--- a/drivers/mtd/mtd_blkdevs.c
+++ b/drivers/mtd/mtd_blkdevs.c
@@ -113,6 +113,7 @@ static blk_status_t do_blktrans_request(struct mtd_blktrans_ops *tr,
 		for (; nsect > 0; nsect--, block++, buf += tr->blksize)
 			if (tr->writesect(dev, block, buf))
 				return BLK_STS_IOERR;
+		return BLK_STS_OK;
 	default:
 		return BLK_STS_IOERR;
 	}

From ef954844c7ace62f773f4f23e28d2d915adc419f Mon Sep 17 00:00:00 2001
From: Linus Torvalds <torvalds@linux-foundation.org>
Date: Sun, 13 Aug 2017 16:01:32 -0700
Subject: [PATCH 279/281] Linux 4.13-rc5

---
 Makefile | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/Makefile b/Makefile
index 6eba23bcb5ad..90da7bdc3f45 100644
--- a/Makefile
+++ b/Makefile
@@ -1,7 +1,7 @@
 VERSION = 4
 PATCHLEVEL = 13
 SUBLEVEL = 0
-EXTRAVERSION = -rc4
+EXTRAVERSION = -rc5
 NAME = Fearless Coyote
 
 # *DOCUMENTATION*

From 4e2fcac773902eefdeae81637ca73bbf0398d11f Mon Sep 17 00:00:00 2001
From: Trond Myklebust <trond.myklebust@primarydata.com>
Date: Tue, 8 Aug 2017 09:06:18 -0400
Subject: [PATCH 280/281] NFSv4: Use correct inode in
 _nfs4_opendata_to_nfs4_state()

When doing open by filehandle we don't really want to lookup a new inode,
but rather update the one we've got. Add a helper which does this for us.

Signed-off-by: Trond Myklebust <trond.myklebust@primarydata.com>
---
 fs/nfs/nfs4proc.c | 28 +++++++++++++++++++++++-----
 1 file changed, 23 insertions(+), 5 deletions(-)

diff --git a/fs/nfs/nfs4proc.c b/fs/nfs/nfs4proc.c
index d90132642340..5f11caefd36d 100644
--- a/fs/nfs/nfs4proc.c
+++ b/fs/nfs/nfs4proc.c
@@ -1659,6 +1659,28 @@ update:
 	return state;
 }
 
+static struct inode *
+nfs4_opendata_get_inode(struct nfs4_opendata *data)
+{
+	struct inode *inode;
+
+	switch (data->o_arg.claim) {
+	case NFS4_OPEN_CLAIM_NULL:
+	case NFS4_OPEN_CLAIM_DELEGATE_CUR:
+	case NFS4_OPEN_CLAIM_DELEGATE_PREV:
+		if (!(data->f_attr.valid & NFS_ATTR_FATTR))
+			return ERR_PTR(-EAGAIN);
+		inode = nfs_fhget(data->dir->d_sb, &data->o_res.fh,
+				&data->f_attr, data->f_label);
+		break;
+	default:
+		inode = d_inode(data->dentry);
+		ihold(inode);
+		nfs_refresh_inode(inode, &data->f_attr);
+	}
+	return inode;
+}
+
 static struct nfs4_state *
 _nfs4_opendata_to_nfs4_state(struct nfs4_opendata *data)
 {
@@ -1672,10 +1694,7 @@ _nfs4_opendata_to_nfs4_state(struct nfs4_opendata *data)
 		goto out;
 	}
 
-	ret = -EAGAIN;
-	if (!(data->f_attr.valid & NFS_ATTR_FATTR))
-		goto err;
-	inode = nfs_fhget(data->dir->d_sb, &data->o_res.fh, &data->f_attr, data->f_label);
+	inode = nfs4_opendata_get_inode(data);
 	ret = PTR_ERR(inode);
 	if (IS_ERR(inode))
 		goto err;
@@ -2071,7 +2090,6 @@ static void nfs4_open_prepare(struct rpc_task *task, void *calldata)
 		data->o_arg.open_bitmap = &nfs4_open_noattr_bitmap[0];
 	case NFS4_OPEN_CLAIM_FH:
 		task->tk_msg.rpc_proc = &nfs4_procedures[NFSPROC4_CLNT_OPEN_NOATTR];
-		nfs_copy_fh(&data->o_res.fh, data->o_arg.fh);
 	}
 	data->timestamp = jiffies;
 	if (nfs4_setup_sequence(data->o_arg.server->nfs_client,

From 75e8c48b9ef30bfda38e7f6e5d807352fbf0b090 Mon Sep 17 00:00:00 2001
From: Trond Myklebust <trond.myklebust@primarydata.com>
Date: Tue, 8 Aug 2017 10:38:07 -0400
Subject: [PATCH 281/281] NFSv4: Use the nfs4_state being recovered in
 _nfs4_opendata_to_nfs4_state()

If we're recovering a nfs4_state, then we should try to use that instead
of looking up a new stateid. Only do that if the inodes match, though.

Signed-off-by: Trond Myklebust <trond.myklebust@primarydata.com>
---
 fs/nfs/nfs4proc.c | 41 +++++++++++++++++++++++++----------------
 1 file changed, 25 insertions(+), 16 deletions(-)

diff --git a/fs/nfs/nfs4proc.c b/fs/nfs/nfs4proc.c
index 5f11caefd36d..3923ac71a420 100644
--- a/fs/nfs/nfs4proc.c
+++ b/fs/nfs/nfs4proc.c
@@ -1681,12 +1681,30 @@ nfs4_opendata_get_inode(struct nfs4_opendata *data)
 	return inode;
 }
 
+static struct nfs4_state *
+nfs4_opendata_find_nfs4_state(struct nfs4_opendata *data)
+{
+	struct nfs4_state *state;
+	struct inode *inode;
+
+	inode = nfs4_opendata_get_inode(data);
+	if (IS_ERR(inode))
+		return ERR_CAST(inode);
+	if (data->state != NULL && data->state->inode == inode) {
+		state = data->state;
+		atomic_inc(&state->count);
+	} else
+		state = nfs4_get_open_state(inode, data->owner);
+	iput(inode);
+	if (state == NULL)
+		state = ERR_PTR(-ENOMEM);
+	return state;
+}
+
 static struct nfs4_state *
 _nfs4_opendata_to_nfs4_state(struct nfs4_opendata *data)
 {
-	struct inode *inode;
-	struct nfs4_state *state = NULL;
-	int ret;
+	struct nfs4_state *state;
 
 	if (!data->rpc_done) {
 		state = nfs4_try_open_cached(data);
@@ -1694,26 +1712,17 @@ _nfs4_opendata_to_nfs4_state(struct nfs4_opendata *data)
 		goto out;
 	}
 
-	inode = nfs4_opendata_get_inode(data);
-	ret = PTR_ERR(inode);
-	if (IS_ERR(inode))
-		goto err;
-	ret = -ENOMEM;
-	state = nfs4_get_open_state(inode, data->owner);
-	if (state == NULL)
-		goto err_put_inode;
+	state = nfs4_opendata_find_nfs4_state(data);
+	if (IS_ERR(state))
+		goto out;
+
 	if (data->o_res.delegation_type != 0)
 		nfs4_opendata_check_deleg(data, state);
 	update_open_stateid(state, &data->o_res.stateid, NULL,
 			data->o_arg.fmode);
-	iput(inode);
 out:
 	nfs_release_seqid(data->o_arg.seqid);
 	return state;
-err_put_inode:
-	iput(inode);
-err:
-	return ERR_PTR(ret);
 }
 
 static struct nfs4_state *