From 6219f432ec037317a77c40910da12a626c34af1c Mon Sep 17 00:00:00 2001 From: Bhuvanchandra DV Date: Mon, 19 Oct 2015 21:24:36 +0530 Subject: [PATCH 001/291] vf610_adc: Fix internal temperature calculation MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Calculate ADCR_VTEMP25 using VTEMP25 at VREFH_ADC 3V3. Existing calculations consider the typical values provided in datasheet. Those typical values are valid for VREFH_ADC at 3.0V. VTEMP25 is different for different VREFH_ADC voltages. With VREFH_ADC at 3.3V, voltage at 25°C is 0.699V. Hence update the VTEMP25 to 0.699V which gives ADCR@Temp25 as 867. Formula for finding ADCR@Temp25: ADCR@Temp25 = (ADCR@Vdd * V@TEMP25 * 10) / VDDconv ADCR@Vdd for 12-Bit ADC = 4095 VDDconv = VREFH_ADC * 10 VREFH_ADC@3.3V ADCR@Temp25 = (4095 * .699 * 10) / 33 ADCR@Temp25 ~= 867 | VREFH_ADC | V@TEMP25 | VDDconv | ADCR@Temp25 | | 3.0V | 0.696mV | 30 | 950 | | 3.3V | 0.699mV | 33 | 867 | Signed-off-by: Bhuvanchandra DV Acked-by: Fugang Duan Cc: Signed-off-by: Jonathan Cameron --- drivers/iio/adc/vf610_adc.c | 19 ++++++++++++++----- 1 file changed, 14 insertions(+), 5 deletions(-) diff --git a/drivers/iio/adc/vf610_adc.c b/drivers/iio/adc/vf610_adc.c index 6bf4c20eb231..42d7c87ea5be 100644 --- a/drivers/iio/adc/vf610_adc.c +++ b/drivers/iio/adc/vf610_adc.c @@ -103,6 +103,13 @@ #define DEFAULT_SAMPLE_TIME 1000 +/* V at 25°C of 696 mV */ +#define VF610_VTEMP25_3V0 950 +/* V at 25°C of 699 mV */ +#define VF610_VTEMP25_3V3 867 +/* Typical sensor slope coefficient at all temperatures */ +#define VF610_TEMP_SLOPE_COEFF 1840 + enum clk_sel { VF610_ADCIOC_BUSCLK_SET, VF610_ADCIOC_ALTCLK_SET, @@ -635,11 +642,13 @@ static int vf610_read_raw(struct iio_dev *indio_dev, break; case IIO_TEMP: /* - * Calculate in degree Celsius times 1000 - * Using sensor slope of 1.84 mV/°C and - * V at 25°C of 696 mV - */ - *val = 25000 - ((int)info->value - 864) * 1000000 / 1840; + * Calculate in degree Celsius times 1000 + * Using the typical sensor slope of 1.84 mV/°C + * and VREFH_ADC at 3.3V, V at 25°C of 699 mV + */ + *val = 25000 - ((int)info->value - VF610_VTEMP25_3V3) * + 1000000 / VF610_TEMP_SLOPE_COEFF; + break; default: mutex_unlock(&indio_dev->mlock); From aea545fa90812cf68b3bf1f09715c465ba942ab3 Mon Sep 17 00:00:00 2001 From: Arnd Bergmann Date: Sat, 17 Oct 2015 00:29:55 +0200 Subject: [PATCH 002/291] staging: iio: select IRQ_WORK for IIO_DUMMY_EVGEN The iio dummy code was recently changed to use irq_work_queue, but that code is compiled into the kernel only if IRQ_WORK is set, so we can get a link error here: drivers/built-in.o: In function `iio_evgen_poke': (.text+0x208a04): undefined reference to `irq_work_queue' This changes the Kconfig file to match what other drivers do. Signed-off-by: Arnd Bergmann Fixes: fd2bb310ca3d ("Staging: iio: Move evgen interrupt generation to irq_work") Acked-by: Daniel Baluta Signed-off-by: Jonathan Cameron --- drivers/staging/iio/Kconfig | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/drivers/staging/iio/Kconfig b/drivers/staging/iio/Kconfig index 6d5b38d69578..9d7f0004d2d7 100644 --- a/drivers/staging/iio/Kconfig +++ b/drivers/staging/iio/Kconfig @@ -18,7 +18,8 @@ source "drivers/staging/iio/resolver/Kconfig" source "drivers/staging/iio/trigger/Kconfig" config IIO_DUMMY_EVGEN - tristate + tristate + select IRQ_WORK config IIO_SIMPLE_DUMMY tristate "An example driver with no hardware requirements" From 01bb70ae0b98d266fa3e860482c7ce22fa482a6e Mon Sep 17 00:00:00 2001 From: Vladimir Zapolskiy Date: Sat, 17 Oct 2015 21:44:38 +0300 Subject: [PATCH 003/291] iio: lpc32xx_adc: fix warnings caused by enabling unprepared clock If common clock framework is configured, the driver generates a warning, which is fixed by this change: root@devkit3250:~# cat /sys/bus/iio/devices/iio\:device0/in_voltage0_raw ------------[ cut here ]------------ WARNING: CPU: 0 PID: 724 at drivers/clk/clk.c:727 clk_core_enable+0x2c/0xa4() Modules linked in: sc16is7xx snd_soc_uda1380 CPU: 0 PID: 724 Comm: cat Not tainted 4.3.0-rc2+ #198 Hardware name: LPC32XX SoC (Flattened Device Tree) Backtrace: [<>] (dump_backtrace) from [<>] (show_stack+0x18/0x1c) [<>] (show_stack) from [<>] (dump_stack+0x20/0x28) [<>] (dump_stack) from [<>] (warn_slowpath_common+0x90/0xb8) [<>] (warn_slowpath_common) from [<>] (warn_slowpath_null+0x24/0x2c) [<>] (warn_slowpath_null) from [<>] (clk_core_enable+0x2c/0xa4) [<>] (clk_core_enable) from [<>] (clk_enable+0x24/0x38) [<>] (clk_enable) from [<>] (lpc32xx_read_raw+0x38/0x80) [<>] (lpc32xx_read_raw) from [<>] (iio_read_channel_info+0x70/0x94) [<>] (iio_read_channel_info) from [<>] (dev_attr_show+0x28/0x4c) [<>] (dev_attr_show) from [<>] (sysfs_kf_seq_show+0x8c/0xf0) [<>] (sysfs_kf_seq_show) from [<>] (kernfs_seq_show+0x2c/0x30) [<>] (kernfs_seq_show) from [<>] (seq_read+0x1c8/0x440) [<>] (seq_read) from [<>] (kernfs_fop_read+0x38/0x170) [<>] (kernfs_fop_read) from [<>] (do_readv_writev+0x16c/0x238) [<>] (do_readv_writev) from [<>] (vfs_readv+0x50/0x58) [<>] (vfs_readv) from [<>] (default_file_splice_read+0x1a4/0x308) [<>] (default_file_splice_read) from [<>] (do_splice_to+0x78/0x84) [<>] (do_splice_to) from [<>] (splice_direct_to_actor+0xc8/0x1cc) [<>] (splice_direct_to_actor) from [<>] (do_splice_direct+0xa0/0xb8) [<>] (do_splice_direct) from [<>] (do_sendfile+0x1a8/0x30c) [<>] (do_sendfile) from [<>] (SyS_sendfile64+0x104/0x10c) [<>] (SyS_sendfile64) from [<>] (ret_fast_syscall+0x0/0x38) Signed-off-by: Vladimir Zapolskiy Cc: Signed-off-by: Jonathan Cameron --- drivers/staging/iio/adc/lpc32xx_adc.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/staging/iio/adc/lpc32xx_adc.c b/drivers/staging/iio/adc/lpc32xx_adc.c index 5331c442fcfc..ef6d0b0ccf55 100644 --- a/drivers/staging/iio/adc/lpc32xx_adc.c +++ b/drivers/staging/iio/adc/lpc32xx_adc.c @@ -76,7 +76,7 @@ static int lpc32xx_read_raw(struct iio_dev *indio_dev, if (mask == IIO_CHAN_INFO_RAW) { mutex_lock(&indio_dev->mlock); - clk_enable(info->clk); + clk_prepare_enable(info->clk); /* Measurement setup */ __raw_writel(AD_INTERNAL | (chan->address) | AD_REFp | AD_REFm, LPC32XX_ADC_SELECT(info->adc_base)); @@ -84,7 +84,7 @@ static int lpc32xx_read_raw(struct iio_dev *indio_dev, __raw_writel(AD_PDN_CTRL | AD_STROBE, LPC32XX_ADC_CTRL(info->adc_base)); wait_for_completion(&info->completion); /* set by ISR */ - clk_disable(info->clk); + clk_disable_unprepare(info->clk); *val = info->value; mutex_unlock(&indio_dev->mlock); From 03fe472ef33b7f31fbd11d300dbb3fdab9c00fd4 Mon Sep 17 00:00:00 2001 From: Michael Hennerich Date: Tue, 13 Oct 2015 18:15:37 +0200 Subject: [PATCH 004/291] iio:ad5064: Make sure ad5064_i2c_write() returns 0 on success i2c_master_send() returns the number of bytes transferred on success while the ad5064 driver expects that the write() callback returns 0 on success. Fix that by translating any non negative return value of i2c_master_send() to 0. Fixes: commit 6a17a0768f77 ("iio:dac:ad5064: Add support for the ad5629r and ad5669r") Signed-off-by: Michael Hennerich Signed-off-by: Lars-Peter Clausen Cc: Signed-off-by: Jonathan Cameron --- drivers/iio/dac/ad5064.c | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) diff --git a/drivers/iio/dac/ad5064.c b/drivers/iio/dac/ad5064.c index c067e6821496..7e7ebf30e954 100644 --- a/drivers/iio/dac/ad5064.c +++ b/drivers/iio/dac/ad5064.c @@ -598,10 +598,16 @@ static int ad5064_i2c_write(struct ad5064_state *st, unsigned int cmd, unsigned int addr, unsigned int val) { struct i2c_client *i2c = to_i2c_client(st->dev); + int ret; st->data.i2c[0] = (cmd << 4) | addr; put_unaligned_be16(val, &st->data.i2c[1]); - return i2c_master_send(i2c, st->data.i2c, 3); + + ret = i2c_master_send(i2c, st->data.i2c, 3); + if (ret < 0) + return ret; + + return 0; } static int ad5064_i2c_probe(struct i2c_client *i2c, From 5dcbe97bedd6ba4b0f574a96cc2e293d26f3d857 Mon Sep 17 00:00:00 2001 From: Lars-Peter Clausen Date: Tue, 13 Oct 2015 18:15:38 +0200 Subject: [PATCH 005/291] iio: ad5064: Fix ad5629/ad5669 shift The ad5629/ad5669 are the I2C variant of the ad5628/ad5668, which has a SPI interface. They are mostly identical with the exception that the shift factor is different. Currently the driver does not take care of this difference which leads to incorrect DAC output values. Fix this by introducing a custom channel spec for the ad5629/ad5669 with the correct shift factor. Fixes: commit 6a17a0768f77 ("iio:dac:ad5064: Add support for the ad5629r and ad5669r") Signed-off-by: Lars-Peter Clausen Cc: Signed-off-by: Jonathan Cameron --- drivers/iio/dac/ad5064.c | 83 +++++++++++++++++++++++++++------------- 1 file changed, 57 insertions(+), 26 deletions(-) diff --git a/drivers/iio/dac/ad5064.c b/drivers/iio/dac/ad5064.c index 7e7ebf30e954..978f130ca678 100644 --- a/drivers/iio/dac/ad5064.c +++ b/drivers/iio/dac/ad5064.c @@ -113,12 +113,16 @@ enum ad5064_type { ID_AD5065, ID_AD5628_1, ID_AD5628_2, + ID_AD5629_1, + ID_AD5629_2, ID_AD5648_1, ID_AD5648_2, ID_AD5666_1, ID_AD5666_2, ID_AD5668_1, ID_AD5668_2, + ID_AD5669_1, + ID_AD5669_2, }; static int ad5064_write(struct ad5064_state *st, unsigned int cmd, @@ -291,7 +295,7 @@ static const struct iio_chan_spec_ext_info ad5064_ext_info[] = { { }, }; -#define AD5064_CHANNEL(chan, addr, bits) { \ +#define AD5064_CHANNEL(chan, addr, bits, _shift) { \ .type = IIO_VOLTAGE, \ .indexed = 1, \ .output = 1, \ @@ -303,36 +307,39 @@ static const struct iio_chan_spec_ext_info ad5064_ext_info[] = { .sign = 'u', \ .realbits = (bits), \ .storagebits = 16, \ - .shift = 20 - bits, \ + .shift = (_shift), \ }, \ .ext_info = ad5064_ext_info, \ } -#define DECLARE_AD5064_CHANNELS(name, bits) \ +#define DECLARE_AD5064_CHANNELS(name, bits, shift) \ const struct iio_chan_spec name[] = { \ - AD5064_CHANNEL(0, 0, bits), \ - AD5064_CHANNEL(1, 1, bits), \ - AD5064_CHANNEL(2, 2, bits), \ - AD5064_CHANNEL(3, 3, bits), \ - AD5064_CHANNEL(4, 4, bits), \ - AD5064_CHANNEL(5, 5, bits), \ - AD5064_CHANNEL(6, 6, bits), \ - AD5064_CHANNEL(7, 7, bits), \ + AD5064_CHANNEL(0, 0, bits, shift), \ + AD5064_CHANNEL(1, 1, bits, shift), \ + AD5064_CHANNEL(2, 2, bits, shift), \ + AD5064_CHANNEL(3, 3, bits, shift), \ + AD5064_CHANNEL(4, 4, bits, shift), \ + AD5064_CHANNEL(5, 5, bits, shift), \ + AD5064_CHANNEL(6, 6, bits, shift), \ + AD5064_CHANNEL(7, 7, bits, shift), \ } -#define DECLARE_AD5065_CHANNELS(name, bits) \ +#define DECLARE_AD5065_CHANNELS(name, bits, shift) \ const struct iio_chan_spec name[] = { \ - AD5064_CHANNEL(0, 0, bits), \ - AD5064_CHANNEL(1, 3, bits), \ + AD5064_CHANNEL(0, 0, bits, shift), \ + AD5064_CHANNEL(1, 3, bits, shift), \ } -static DECLARE_AD5064_CHANNELS(ad5024_channels, 12); -static DECLARE_AD5064_CHANNELS(ad5044_channels, 14); -static DECLARE_AD5064_CHANNELS(ad5064_channels, 16); +static DECLARE_AD5064_CHANNELS(ad5024_channels, 12, 8); +static DECLARE_AD5064_CHANNELS(ad5044_channels, 14, 6); +static DECLARE_AD5064_CHANNELS(ad5064_channels, 16, 4); -static DECLARE_AD5065_CHANNELS(ad5025_channels, 12); -static DECLARE_AD5065_CHANNELS(ad5045_channels, 14); -static DECLARE_AD5065_CHANNELS(ad5065_channels, 16); +static DECLARE_AD5065_CHANNELS(ad5025_channels, 12, 8); +static DECLARE_AD5065_CHANNELS(ad5045_channels, 14, 6); +static DECLARE_AD5065_CHANNELS(ad5065_channels, 16, 4); + +static DECLARE_AD5064_CHANNELS(ad5629_channels, 12, 4); +static DECLARE_AD5064_CHANNELS(ad5669_channels, 16, 0); static const struct ad5064_chip_info ad5064_chip_info_tbl[] = { [ID_AD5024] = { @@ -382,6 +389,18 @@ static const struct ad5064_chip_info ad5064_chip_info_tbl[] = { .channels = ad5024_channels, .num_channels = 8, }, + [ID_AD5629_1] = { + .shared_vref = true, + .internal_vref = 2500000, + .channels = ad5629_channels, + .num_channels = 8, + }, + [ID_AD5629_2] = { + .shared_vref = true, + .internal_vref = 5000000, + .channels = ad5629_channels, + .num_channels = 8, + }, [ID_AD5648_1] = { .shared_vref = true, .internal_vref = 2500000, @@ -418,6 +437,18 @@ static const struct ad5064_chip_info ad5064_chip_info_tbl[] = { .channels = ad5064_channels, .num_channels = 8, }, + [ID_AD5669_1] = { + .shared_vref = true, + .internal_vref = 2500000, + .channels = ad5669_channels, + .num_channels = 8, + }, + [ID_AD5669_2] = { + .shared_vref = true, + .internal_vref = 5000000, + .channels = ad5669_channels, + .num_channels = 8, + }, }; static inline unsigned int ad5064_num_vref(struct ad5064_state *st) @@ -623,12 +654,12 @@ static int ad5064_i2c_remove(struct i2c_client *i2c) } static const struct i2c_device_id ad5064_i2c_ids[] = { - {"ad5629-1", ID_AD5628_1}, - {"ad5629-2", ID_AD5628_2}, - {"ad5629-3", ID_AD5628_2}, /* similar enough to ad5629-2 */ - {"ad5669-1", ID_AD5668_1}, - {"ad5669-2", ID_AD5668_2}, - {"ad5669-3", ID_AD5668_2}, /* similar enough to ad5669-2 */ + {"ad5629-1", ID_AD5629_1}, + {"ad5629-2", ID_AD5629_2}, + {"ad5629-3", ID_AD5629_2}, /* similar enough to ad5629-2 */ + {"ad5669-1", ID_AD5669_1}, + {"ad5669-2", ID_AD5669_2}, + {"ad5669-3", ID_AD5669_2}, /* similar enough to ad5669-2 */ {} }; MODULE_DEVICE_TABLE(i2c, ad5064_i2c_ids); From 785171fd6cd7dcd7ada5a733b6a2d44ec566c3a0 Mon Sep 17 00:00:00 2001 From: Lars-Peter Clausen Date: Mon, 12 Oct 2015 14:56:28 +0200 Subject: [PATCH 006/291] iio:ad7793: Fix ad7785 product ID While the datasheet for the AD7785 lists 0xXB as the product ID the actual product ID is 0xX3. Fix the product ID otherwise the driver will reject the device due to non matching IDs. Fixes: e786cc26dcc5 ("staging:iio:ad7793: Implement stricter id checking") Signed-off-by: Lars-Peter Clausen Cc: Signed-off-by: Jonathan Cameron --- drivers/iio/adc/ad7793.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/iio/adc/ad7793.c b/drivers/iio/adc/ad7793.c index b84922a4b32e..710aee8377bb 100644 --- a/drivers/iio/adc/ad7793.c +++ b/drivers/iio/adc/ad7793.c @@ -101,7 +101,7 @@ #define AD7795_CH_AIN1M_AIN1M 8 /* AIN1(-) - AIN1(-) */ /* ID Register Bit Designations (AD7793_REG_ID) */ -#define AD7785_ID 0xB +#define AD7785_ID 0x3 #define AD7792_ID 0xA #define AD7793_ID 0xB #define AD7794_ID 0xF From 8546d2e5b15e454d2d492a1e9625732980e8eb26 Mon Sep 17 00:00:00 2001 From: Sanchayan Maity Date: Mon, 19 Oct 2015 13:13:52 +0530 Subject: [PATCH 007/291] iio: adc: vf610_adc: Fix division by zero error In case the fsl,adck-max-frequency property is not present in the device tree, a division by zero error results during the probe call on kernel boot (see below). This patch fixes it and also restores device tree compatibility in case kernels are booting with old device trees without this property specified. [ 1.063229] Division by zero in kernel. [ 1.067152] CPU: 0 PID: 1 Comm: swapper Not tainted 4.3.0-rc5-00212-gcc88cef #37 [ 1.074650] Hardware name: Freescale Vybrid VF5xx/VF6xx (Device Tree) [ 1.081135] Backtrace: [ 1.083694] [<800134a4>] (dump_backtrace) from [<8001369c>] (show_stack+0x18/0x1c) [ 1.091340] r7:00000008 r6:8e0ae210 r5:00000000 r4:8e299800 [ 1.097146] [<80013684>] (show_stack) from [<80297b1c>] (dump_stack+0x24/0x28) [ 1.104483] [<80297af8>] (dump_stack) from [<80013608>] (__div0+0x1c/0x20) [ 1.111421] [<800135ec>] (__div0) from [<802968b4>] (Ldiv0+0x8/0x10) [ 1.117865] [<80424350>] (vf610_adc_probe) from [<803153b4>] (platform_drv_probe+0x4c/0xac) [ 1.126311] r10:00000000 r9:8076a5ec r8:00000000 r7:fffffdfb r6:807cc67c r5:8e0ae210 [ 1.134319] r4:807f6c54 [ 1.136915] [<80315368>] (platform_drv_probe) from [<803138bc>] (driver_probe_device+0x20c/0x2f8) [ 1.145882] r7:807cc67c r6:00000000 r5:8e0ae210 r4:807f6c54 [ 1.151657] [<803136b0>] (driver_probe_device) from [<80313a3c>] (__driver_attach+0x94/0x98) [ 1.160190] r9:8076a5ec r8:00000098 r7:00000000 r6:8e0ae244 r5:807cc67c r4:8e0ae210 [ 1.168112] [<803139a8>] (__driver_attach) from [<80311cb8>] (bus_for_each_dev+0x70/0xa4) [ 1.176383] r7:00000000 r6:803139a8 r5:807cc67c r4:00000000 [ 1.182159] [<80311c48>] (bus_for_each_dev) from [<80313318>] (driver_attach+0x24/0x28) [ 1.190260] r6:807bb568 r5:8e2a5b00 r4:807cc67c [ 1.194996] [<803132f4>] (driver_attach) from [<80312f50>] (bus_add_driver+0x1a4/0x21c) [ 1.203113] [<80312dac>] (bus_add_driver) from [<803142a8>] (driver_register+0x80/0x100) [ 1.211275] r7:8e2a7dc0 r6:807a8160 r5:80789e14 r4:807cc67c [ 1.217075] [<80314228>] (driver_register) from [<803152f8>] (__platform_driver_register+0x5c/0x64) [ 1.226216] r5:80789e14 r4:807a8160 [ 1.229877] [<8031529c>] (__platform_driver_register) from [<80789e30>] (vf610_adc_driver_init+0x1c/0x20) [ 1.239556] [<80789e14>] (vf610_adc_driver_init) from [<800095f8>] (do_one_initcall+0x94/0x1dc) [ 1.248365] [<80009564>] (do_one_initcall) from [<8076ae34>] (kernel_init_freeable+0x13c/0x1e0) [ 1.257155] r10:80794830 r9:8076a5ec r8:00000098 r7:807d5780 r6:807d5780 r5:00000006 [ 1.265153] r4:807a0ee8 [ 1.267753] [<8076acf8>] (kernel_init_freeable) from [<80590ef0>] (kernel_init+0x18/0xf0) [ 1.276021] r10:00000000 r9:00000000 r8:00000000 r7:00000000 r6:00000000 r5:80590ed8 [ 1.284015] r4:807d5780 [ 1.286615] [<80590ed8>] (kernel_init) from [<8000f878>] (ret_from_fork+0x14/0x3c) [ 1.294278] r5:80590ed8 r4:00000000 Signed-off-by: Sanchayan Maity Acked-by: Fugang Duan Acked-by: Stefan Agner Cc: Signed-off-by: Jonathan Cameron --- drivers/iio/adc/vf610_adc.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/drivers/iio/adc/vf610_adc.c b/drivers/iio/adc/vf610_adc.c index 42d7c87ea5be..d937df4f7f2b 100644 --- a/drivers/iio/adc/vf610_adc.c +++ b/drivers/iio/adc/vf610_adc.c @@ -200,6 +200,8 @@ static inline void vf610_adc_calculate_rates(struct vf610_adc *info) adc_feature->clk_div = 8; } + adck_rate = ipg_rate / adc_feature->clk_div; + /* * Determine the long sample time adder value to be used based * on the default minimum sample time provided. @@ -224,7 +226,6 @@ static inline void vf610_adc_calculate_rates(struct vf610_adc *info) * BCT (Base Conversion Time): fixed to 25 ADCK cycles for 12 bit mode * LSTAdder(Long Sample Time): 3, 5, 7, 9, 13, 17, 21, 25 ADCK cycles */ - adck_rate = ipg_rate / info->adc_feature.clk_div; for (i = 0; i < ARRAY_SIZE(vf610_hw_avgs); i++) info->sample_freq_avail[i] = adck_rate / (6 + vf610_hw_avgs[i] * From 46561c3959d6307d22139c24cd0bf196162e5681 Mon Sep 17 00:00:00 2001 From: Dave Hansen Date: Wed, 11 Nov 2015 10:19:31 -0800 Subject: [PATCH 008/291] x86/mpx: Do proper get_user() when running 32-bit binaries on 64-bit kernels When you call get_user(foo, bar), you effectively do a copy_from_user(&foo, bar, sizeof(*bar)); Note that the sizeof() is implicit. When we reach out to userspace to try to zap an entire "bounds table" we need to go read a "bounds directory entry" in order to locate the table's address. The size of a "directory entry" depends on the binary being run and is always the size of a pointer. But, when we have a 64-bit kernel and a 32-bit application, the directory entry is still only 32-bits long, but we fetch it with a 64-bit pointer which makes get_user() does a 64-bit fetch. Reading 4 extra bytes isn't harmful, unless we are at the end of and run off the table. It might also cause the zero page to get faulted in unnecessarily even if you are not at the end. Fix it up by doing a special 32-bit get_user() via a cast when we have 32-bit userspace. Signed-off-by: Dave Hansen Cc: Cc: Andy Lutomirski Cc: Borislav Petkov Cc: Brian Gerst Cc: Dave Hansen Cc: Denys Vlasenko Cc: H. Peter Anvin Cc: Linus Torvalds Cc: Peter Zijlstra Cc: Thomas Gleixner Link: http://lkml.kernel.org/r/20151111181931.3ACF6822@viggo.jf.intel.com Signed-off-by: Ingo Molnar --- arch/x86/mm/mpx.c | 25 ++++++++++++++++++++++++- 1 file changed, 24 insertions(+), 1 deletion(-) diff --git a/arch/x86/mm/mpx.c b/arch/x86/mm/mpx.c index b0ae85f90f10..6127c5e3a19b 100644 --- a/arch/x86/mm/mpx.c +++ b/arch/x86/mm/mpx.c @@ -585,6 +585,29 @@ static unsigned long mpx_bd_entry_to_bt_addr(struct mm_struct *mm, return bt_addr; } +/* + * We only want to do a 4-byte get_user() on 32-bit. Otherwise, + * we might run off the end of the bounds table if we are on + * a 64-bit kernel and try to get 8 bytes. + */ +int get_user_bd_entry(struct mm_struct *mm, unsigned long *bd_entry_ret, + long __user *bd_entry_ptr) +{ + u32 bd_entry_32; + int ret; + + if (is_64bit_mm(mm)) + return get_user(*bd_entry_ret, bd_entry_ptr); + + /* + * Note that get_user() uses the type of the *pointer* to + * establish the size of the get, not the destination. + */ + ret = get_user(bd_entry_32, (u32 __user *)bd_entry_ptr); + *bd_entry_ret = bd_entry_32; + return ret; +} + /* * Get the base of bounds tables pointed by specific bounds * directory entry. @@ -605,7 +628,7 @@ static int get_bt_addr(struct mm_struct *mm, int need_write = 0; pagefault_disable(); - ret = get_user(bd_entry, bd_entry_ptr); + ret = get_user_bd_entry(mm, &bd_entry, bd_entry_ptr); pagefault_enable(); if (!ret) break; From f3119b830264d89d216bfb378ab65065dffa02d9 Mon Sep 17 00:00:00 2001 From: Dave Hansen Date: Wed, 11 Nov 2015 10:19:34 -0800 Subject: [PATCH 009/291] x86/mpx: Fix 32-bit address space calculation I received a bug report that running 32-bit MPX binaries on 64-bit kernels was broken. I traced it down to this little code snippet. We were switching our "number of bounds directory entries" calculation correctly. But, we didn't switch the other side of the calculation: the virtual space size. This meant that we were calculating an absurd size for bd_entry_virt_space() on 32-bit because we used the 64-bit virt_space. This was _also_ broken for 32-bit kernels running on 64-bit hardware since boot_cpu_data.x86_virt_bits=48 even when running in 32-bit mode. Correct that and properly handle all 3 possible cases: 1. 32-bit binary on 64-bit kernel 2. 64-bit binary on 64-bit kernel 3. 32-bit binary on 32-bit kernel This manifested in having bounds tables not properly unmapped. It "leaked" memory but had no functional impact otherwise. Signed-off-by: Dave Hansen Cc: Cc: Andy Lutomirski Cc: Borislav Petkov Cc: Brian Gerst Cc: Dave Hansen Cc: Denys Vlasenko Cc: H. Peter Anvin Cc: Linus Torvalds Cc: Peter Zijlstra Cc: Thomas Gleixner Link: http://lkml.kernel.org/r/20151111181934.FA7FAC34@viggo.jf.intel.com Signed-off-by: Ingo Molnar --- arch/x86/mm/mpx.c | 22 +++++++++++++++++----- 1 file changed, 17 insertions(+), 5 deletions(-) diff --git a/arch/x86/mm/mpx.c b/arch/x86/mm/mpx.c index 6127c5e3a19b..1202d5ca2fb5 100644 --- a/arch/x86/mm/mpx.c +++ b/arch/x86/mm/mpx.c @@ -723,11 +723,23 @@ static unsigned long mpx_get_bt_entry_offset_bytes(struct mm_struct *mm, */ static inline unsigned long bd_entry_virt_space(struct mm_struct *mm) { - unsigned long long virt_space = (1ULL << boot_cpu_data.x86_virt_bits); - if (is_64bit_mm(mm)) - return virt_space / MPX_BD_NR_ENTRIES_64; - else - return virt_space / MPX_BD_NR_ENTRIES_32; + unsigned long long virt_space; + unsigned long long GB = (1ULL << 30); + + /* + * This covers 32-bit emulation as well as 32-bit kernels + * running on 64-bit harware. + */ + if (!is_64bit_mm(mm)) + return (4ULL * GB) / MPX_BD_NR_ENTRIES_32; + + /* + * 'x86_virt_bits' returns what the hardware is capable + * of, and returns the full >32-bit adddress space when + * running 32-bit kernels on 64-bit hardware. + */ + virt_space = (1ULL << boot_cpu_data.x86_virt_bits); + return virt_space / MPX_BD_NR_ENTRIES_64; } /* From ab6b52947545a5355154f64f449f97af9d05845f Mon Sep 17 00:00:00 2001 From: Dave Hansen Date: Tue, 10 Nov 2015 16:23:54 -0800 Subject: [PATCH 010/291] x86/fpu: Fix 32-bit signal frame handling (This should have gone to LKML originally. Sorry for the extra noise, folks on the cc.) Background: Signal frames on x86 have two formats: 1. For 32-bit executables (whether on a real 32-bit kernel or under 32-bit emulation on a 64-bit kernel) we have a 'fpregset_t' that includes the "FSAVE" registers. 2. For 64-bit executables (on 64-bit kernels obviously), the 'fpregset_t' is smaller and does not contain the "FSAVE" state. When creating the signal frame, we have to be aware of whether we are running a 32 or 64-bit executable so we create the correct format signal frame. Problem: save_xstate_epilog() uses 'fx_sw_reserved_ia32' whenever it is called for a 32-bit executable. This is for real 32-bit and ia32 emulation. But, fpu__init_prepare_fx_sw_frame() only initializes 'fx_sw_reserved_ia32' when emulation is enabled, *NOT* for real 32-bit kernels. This leads to really wierd situations where 32-bit programs lose their extended state when returning from a signal handler. The kernel copies the uninitialized (zero) 'fx_sw_reserved_ia32' out to userspace in save_xstate_epilog(). But when returning from the signal, the kernel errors out in check_for_xstate() when it does not see FP_XSTATE_MAGIC1 present (because it was zeroed). This leads to the FPU/XSAVE state being initialized. For MPX, this leads to the most permissive state and means we silently lose bounds violations. I think this would also mean that we could lose *ANY* FPU/SSE/AVX state. I'm not sure why no one has spotted this bug. I believe this was broken by: 72a671ced66d ("x86, fpu: Unify signal handling code paths for x86 and x86_64 kernels") way back in 2012. Signed-off-by: Dave Hansen Cc: Cc: Andy Lutomirski Cc: Borislav Petkov Cc: Brian Gerst Cc: Denys Vlasenko Cc: H. Peter Anvin Cc: Linus Torvalds Cc: Peter Zijlstra Cc: Thomas Gleixner Cc: dave@sr71.net Cc: fenghua.yu@intel.com Cc: yu-cheng.yu@intel.com Link: http://lkml.kernel.org/r/20151111002354.A0799571@viggo.jf.intel.com Signed-off-by: Ingo Molnar --- arch/x86/kernel/fpu/signal.c | 11 +++++------ 1 file changed, 5 insertions(+), 6 deletions(-) diff --git a/arch/x86/kernel/fpu/signal.c b/arch/x86/kernel/fpu/signal.c index ef29b742cea7..31c6a60505e6 100644 --- a/arch/x86/kernel/fpu/signal.c +++ b/arch/x86/kernel/fpu/signal.c @@ -385,20 +385,19 @@ fpu__alloc_mathframe(unsigned long sp, int ia32_frame, */ void fpu__init_prepare_fx_sw_frame(void) { - int fsave_header_size = sizeof(struct fregs_state); int size = xstate_size + FP_XSTATE_MAGIC2_SIZE; - if (config_enabled(CONFIG_X86_32)) - size += fsave_header_size; - fx_sw_reserved.magic1 = FP_XSTATE_MAGIC1; fx_sw_reserved.extended_size = size; fx_sw_reserved.xfeatures = xfeatures_mask; fx_sw_reserved.xstate_size = xstate_size; - if (config_enabled(CONFIG_IA32_EMULATION)) { + if (config_enabled(CONFIG_IA32_EMULATION) || + config_enabled(CONFIG_X86_32)) { + int fsave_header_size = sizeof(struct fregs_state); + fx_sw_reserved_ia32 = fx_sw_reserved; - fx_sw_reserved_ia32.extended_size += fsave_header_size; + fx_sw_reserved_ia32.extended_size = size + fsave_header_size; } } From a05917b6ba9dc9a95fc42bdcbe3a875e8ad83935 Mon Sep 17 00:00:00 2001 From: Huaitong Han Date: Fri, 6 Nov 2015 17:00:23 +0800 Subject: [PATCH 011/291] x86/fpu: Fix get_xsave_addr() behavior under virtualization KVM uses the get_xsave_addr() function in a different fashion from the native kernel, in that the 'xsave' parameter belongs to guest vcpu, not the currently running task. But 'xsave' is replaced with current task's (host) xsave structure, so get_xsave_addr() will incorrectly return the bad xsave address to KVM. Fix it so that the passed in 'xsave' address is used - as intended originally. Signed-off-by: Huaitong Han Reviewed-by: Dave Hansen Cc: Cc: Andy Lutomirski Cc: Paolo Bonzini Cc: Borislav Petkov Cc: Fenghua Yu Cc: H. Peter Anvin Cc: Linus Torvalds Cc: Oleg Nesterov Cc: Peter Zijlstra Cc: Quentin Casasnovas Cc: Thomas Gleixner Cc: dave.hansen@intel.com Link: http://lkml.kernel.org/r/1446800423-21622-1-git-send-email-huaitong.han@intel.com [ Tidied up the changelog. ] Signed-off-by: Ingo Molnar --- arch/x86/kernel/fpu/xstate.c | 1 - 1 file changed, 1 deletion(-) diff --git a/arch/x86/kernel/fpu/xstate.c b/arch/x86/kernel/fpu/xstate.c index 6454f2731b56..70fc312221fc 100644 --- a/arch/x86/kernel/fpu/xstate.c +++ b/arch/x86/kernel/fpu/xstate.c @@ -694,7 +694,6 @@ void *get_xsave_addr(struct xregs_state *xsave, int xstate_feature) if (!boot_cpu_has(X86_FEATURE_XSAVE)) return NULL; - xsave = ¤t->thread.fpu.state.xsave; /* * We should not ever be requesting features that we * have not enabled. Remember that pcntxt_mask is From 836d525baada26e1a7462e87805337b9a9b729f6 Mon Sep 17 00:00:00 2001 From: Kamal Mostafa Date: Wed, 11 Nov 2015 14:25:33 -0800 Subject: [PATCH 012/291] tools: Actually install tmon in the install rule Signed-off-by: Kamal Mostafa Cc: Jiri Olsa Cc: Pali Rohar Cc: Roberta Dobrescu Link: http://lkml.kernel.org/r/1447280736-2161-1-git-send-email-kamal@canonical.com Signed-off-by: Arnaldo Carvalho de Melo --- tools/Makefile | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tools/Makefile b/tools/Makefile index d6f307dfb1a3..fa9d0a614d97 100644 --- a/tools/Makefile +++ b/tools/Makefile @@ -101,7 +101,7 @@ freefall_install: install: acpi_install cgroup_install cpupower_install hv_install firewire_install lguest_install \ perf_install selftests_install turbostat_install usb_install \ virtio_install vm_install net_install x86_energy_perf_policy_install \ - tmon freefall_install + tmon_install freefall_install acpi_clean: $(call descend,power/acpi,clean) From f6ba98c5dc78708cb7fd29950c4a50c4c7e88f95 Mon Sep 17 00:00:00 2001 From: Kamal Mostafa Date: Wed, 11 Nov 2015 14:25:34 -0800 Subject: [PATCH 013/291] tools: Add a "make all" rule Signed-off-by: Kamal Mostafa Acked-by: Pavel Machek Cc: Jiri Olsa Cc: Jonathan Cameron Cc: Pali Rohar Cc: Roberta Dobrescu Link: http://lkml.kernel.org/r/1447280736-2161-2-git-send-email-kamal@canonical.com Signed-off-by: Arnaldo Carvalho de Melo --- tools/Makefile | 9 +++++++++ 1 file changed, 9 insertions(+) diff --git a/tools/Makefile b/tools/Makefile index fa9d0a614d97..7dc820a8c1f1 100644 --- a/tools/Makefile +++ b/tools/Makefile @@ -32,6 +32,10 @@ help: @echo ' from the kernel command line to build and install one of' @echo ' the tools above' @echo '' + @echo ' $$ make tools/all' + @echo '' + @echo ' builds all tools.' + @echo '' @echo ' $$ make tools/install' @echo '' @echo ' installs all tools.' @@ -77,6 +81,11 @@ tmon: FORCE freefall: FORCE $(call descend,laptop/$@) +all: acpi cgroup cpupower hv firewire lguest \ + perf selftests turbostat usb \ + virtio vm net x86_energy_perf_policy \ + tmon freefall + acpi_install: $(call descend,power/$(@:_install=),install) From 866548dd6e22c3795ae5146a9746a5cf659698f1 Mon Sep 17 00:00:00 2001 From: Adrian Hunter Date: Fri, 6 Nov 2015 15:59:29 +0200 Subject: [PATCH 014/291] perf symbols: Rebuild rbtree when adjusting symbols for kcore Normally symbols are read from the DSO and adjusted, if need be, so that the symbol start matches the file offset in the DSO file (we want the file offset because that is what we know from MMAP events). That is done by dso__load_sym() which inserts the symbols *after* adjusting them. In the case of kcore, the symbols have been read from kallsyms and the symbol start is the memory address. The symbols have to be adjusted to match the kcore file offsets. dso__split_kallsyms_for_kcore() does that, but now the adjustment is being done *after* the symbols have been inserted. It appears dso__split_kallsyms_for_kcore() was assuming that changing the symbol start would not change the order in the rbtree - which is, of course, not guaranteed. Signed-off-by: Adrian Hunter Tested-by: Wang Nan Cc: Jiri Olsa Cc: Masami Hiramatsu Cc: Namhyung Kim Cc: Zefan Li Cc: pi3orama@163.com Link: http://lkml.kernel.org/r/563CB241.2090701@intel.com Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/util/symbol.c | 30 ++++++++++++++---------------- 1 file changed, 14 insertions(+), 16 deletions(-) diff --git a/tools/perf/util/symbol.c b/tools/perf/util/symbol.c index b4cc7662677e..09343a880c0b 100644 --- a/tools/perf/util/symbol.c +++ b/tools/perf/util/symbol.c @@ -654,19 +654,24 @@ static int dso__split_kallsyms_for_kcore(struct dso *dso, struct map *map, struct map_groups *kmaps = map__kmaps(map); struct map *curr_map; struct symbol *pos; - int count = 0, moved = 0; + int count = 0; + struct rb_root old_root = dso->symbols[map->type]; struct rb_root *root = &dso->symbols[map->type]; struct rb_node *next = rb_first(root); if (!kmaps) return -1; + *root = RB_ROOT; + while (next) { char *module; pos = rb_entry(next, struct symbol, rb_node); next = rb_next(&pos->rb_node); + rb_erase_init(&pos->rb_node, &old_root); + module = strchr(pos->name, '\t'); if (module) *module = '\0'; @@ -674,28 +679,21 @@ static int dso__split_kallsyms_for_kcore(struct dso *dso, struct map *map, curr_map = map_groups__find(kmaps, map->type, pos->start); if (!curr_map || (filter && filter(curr_map, pos))) { - rb_erase_init(&pos->rb_node, root); symbol__delete(pos); - } else { - pos->start -= curr_map->start - curr_map->pgoff; - if (pos->end) - pos->end -= curr_map->start - curr_map->pgoff; - if (curr_map->dso != map->dso) { - rb_erase_init(&pos->rb_node, root); - symbols__insert( - &curr_map->dso->symbols[curr_map->type], - pos); - ++moved; - } else { - ++count; - } + continue; } + + pos->start -= curr_map->start - curr_map->pgoff; + if (pos->end) + pos->end -= curr_map->start - curr_map->pgoff; + symbols__insert(&curr_map->dso->symbols[curr_map->type], pos); + ++count; } /* Symbols have been adjusted */ dso->adjust_symbols = 1; - return count + moved; + return count; } /* From fae00650f7910da7064f45d49c95df88fab816f6 Mon Sep 17 00:00:00 2001 From: Arnaldo Carvalho de Melo Date: Thu, 12 Nov 2015 15:59:26 -0300 Subject: [PATCH 015/291] perf hists browser: Fix 'd' hotkey action to filter by DSO When pressing 'd' the expected action is to filter all entries by the DSO in the current entry, but for that the action->map needs to be set, and only action->dso was being set, fix it. Cc: Adrian Hunter Cc: David Ahern Cc: Jiri Olsa Cc: Namhyung Kim Cc: Wang Nan Fixes: 045b80dd0340 ("perf hists browser: Use the map to determine if a DSO is being used as a kernel") Link: http://lkml.kernel.org/n/tip-xqhfzgoblq49lk5h5u82atro@git.kernel.org Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/ui/browsers/hists.c | 1 + 1 file changed, 1 insertion(+) diff --git a/tools/perf/ui/browsers/hists.c b/tools/perf/ui/browsers/hists.c index e5afb8936040..a7ce0cf117e7 100644 --- a/tools/perf/ui/browsers/hists.c +++ b/tools/perf/ui/browsers/hists.c @@ -1875,6 +1875,7 @@ static int perf_evsel__hists_browse(struct perf_evsel *evsel, int nr_events, continue; case 'd': actions->dso = dso; + actions->ms.map = map; do_zoom_dso(browser, actions); continue; case 'V': From b7f294b58adb02d928dccb04bd39cb789db09e1f Mon Sep 17 00:00:00 2001 From: Arnaldo Carvalho de Melo Date: Thu, 12 Nov 2015 16:01:56 -0300 Subject: [PATCH 016/291] perf hists browser: The dso can be obtained from popup_action->ms.map->dso So no need to have a 'dso' member in 'popup_action', remove it as no code is using it, already. Cc: Adrian Hunter Cc: David Ahern Cc: Jiri Olsa Cc: Namhyung Kim Cc: Wang Nan Link: http://lkml.kernel.org/n/tip-76a6s0007slug0op0wkl6o8b@git.kernel.org Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/ui/browsers/hists.c | 6 ------ 1 file changed, 6 deletions(-) diff --git a/tools/perf/ui/browsers/hists.c b/tools/perf/ui/browsers/hists.c index a7ce0cf117e7..fa9eb92c9e24 100644 --- a/tools/perf/ui/browsers/hists.c +++ b/tools/perf/ui/browsers/hists.c @@ -1430,7 +1430,6 @@ close_file_and_continue: struct popup_action { struct thread *thread; - struct dso *dso; struct map_symbol ms; int socket; @@ -1565,7 +1564,6 @@ add_dso_opt(struct hist_browser *browser, struct popup_action *act, return 0; act->ms.map = map; - act->dso = map->dso; act->fn = do_zoom_dso; return 1; } @@ -1827,7 +1825,6 @@ static int perf_evsel__hists_browse(struct perf_evsel *evsel, int nr_events, while (1) { struct thread *thread = NULL; - struct dso *dso = NULL; struct map *map = NULL; int choice = 0; int socked_id = -1; @@ -1839,8 +1836,6 @@ static int perf_evsel__hists_browse(struct perf_evsel *evsel, int nr_events, if (browser->he_selection != NULL) { thread = hist_browser__selected_thread(browser); map = browser->selection->map; - if (map) - dso = map->dso; socked_id = browser->he_selection->socket; } switch (key) { @@ -1874,7 +1869,6 @@ static int perf_evsel__hists_browse(struct perf_evsel *evsel, int nr_events, hist_browser__dump(browser); continue; case 'd': - actions->dso = dso; actions->ms.map = map; do_zoom_dso(browser, actions); continue; From 2059fc7a5a9e667797b8ec503bfb4685afee48d8 Mon Sep 17 00:00:00 2001 From: Arnaldo Carvalho de Melo Date: Thu, 12 Nov 2015 16:50:13 -0300 Subject: [PATCH 017/291] perf symbols: Allow forcing reading of non-root owned files by root When the root user tries to read a file owned by some other user we get: # ls -la perf.data -rw-------. 1 acme acme 20032 Nov 12 15:50 perf.data # perf report File perf.data not owned by current user or root (use -f to override) # perf report -f | grep -v ^# | head -2 30.96% ls [kernel.vmlinux] [k] do_set_pte 28.24% ls libc-2.20.so [.] intel_check_word # That wasn't happening when the symbol code tried to read a JIT map, where the same check was done but no forcing was possible, fix it. Reported-by: Brendan Gregg Tested-by: Brendan Gregg Cc: Adrian Hunter Cc: David Ahern Cc: Jiri Olsa Cc: Namhyung Kim Cc: Wang Nan Link: http://permalink.gmane.org/gmane.linux.kernel.perf.user/2380 Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/builtin-report.c | 6 +++--- tools/perf/util/symbol.c | 4 ++-- tools/perf/util/symbol.h | 1 + 3 files changed, 6 insertions(+), 5 deletions(-) diff --git a/tools/perf/builtin-report.c b/tools/perf/builtin-report.c index 2853ad2bd435..f256fac1e722 100644 --- a/tools/perf/builtin-report.c +++ b/tools/perf/builtin-report.c @@ -44,7 +44,7 @@ struct report { struct perf_tool tool; struct perf_session *session; - bool force, use_tui, use_gtk, use_stdio; + bool use_tui, use_gtk, use_stdio; bool hide_unresolved; bool dont_use_callchains; bool show_full_info; @@ -678,7 +678,7 @@ int cmd_report(int argc, const char **argv, const char *prefix __maybe_unused) "file", "vmlinux pathname"), OPT_STRING(0, "kallsyms", &symbol_conf.kallsyms_name, "file", "kallsyms pathname"), - OPT_BOOLEAN('f', "force", &report.force, "don't complain, do it"), + OPT_BOOLEAN('f', "force", &symbol_conf.force, "don't complain, do it"), OPT_BOOLEAN('m', "modules", &symbol_conf.use_modules, "load module symbols - WARNING: use only with -k and LIVE kernel"), OPT_BOOLEAN('n', "show-nr-samples", &symbol_conf.show_nr_samples, @@ -832,7 +832,7 @@ int cmd_report(int argc, const char **argv, const char *prefix __maybe_unused) } file.path = input_name; - file.force = report.force; + file.force = symbol_conf.force; repeat: session = perf_session__new(&file, false, &report.tool); diff --git a/tools/perf/util/symbol.c b/tools/perf/util/symbol.c index 09343a880c0b..cd08027a6d2c 100644 --- a/tools/perf/util/symbol.c +++ b/tools/perf/util/symbol.c @@ -1436,9 +1436,9 @@ int dso__load(struct dso *dso, struct map *map, symbol_filter_t filter) if (lstat(dso->name, &st) < 0) goto out; - if (st.st_uid && (st.st_uid != geteuid())) { + if (!symbol_conf.force && st.st_uid && (st.st_uid != geteuid())) { pr_warning("File %s not owned by current user or root, " - "ignoring it.\n", dso->name); + "ignoring it (use -f to override).\n", dso->name); goto out; } diff --git a/tools/perf/util/symbol.h b/tools/perf/util/symbol.h index 40073c60b83d..dcd786e364f2 100644 --- a/tools/perf/util/symbol.h +++ b/tools/perf/util/symbol.h @@ -84,6 +84,7 @@ struct symbol_conf { unsigned short priv_size; unsigned short nr_events; bool try_vmlinux_path, + force, ignore_vmlinux, ignore_vmlinux_buildid, show_kernel_path, From e266a753bf51b2c3b46d0d230349662c35ac5629 Mon Sep 17 00:00:00 2001 From: Adrian Hunter Date: Fri, 13 Nov 2015 11:48:30 +0200 Subject: [PATCH 018/291] perf symbols: Fix dso lookup by long name and missing buildids Commit 4598a0a6d22f ("perf symbols: Improve DSO long names lookup speed with rbtree") Added a tree to lookup dsos by long name. That tree gets corrupted whenever a dso long name is changed because the tree is not updated. One effect of that is buildid-list does not work with the 'with-hits' option because dso lookup fails and results in two structs for the same dso. The first has the buildid but no hits, the second has hits but no buildid. e.g. Before: $ tools/perf/perf record ls arch certs CREDITS Documentation firmware include ipc Kconfig lib Makefile net REPORTING-BUGS scripts sound usr block COPYING crypto drivers fs init Kbuild kernel MAINTAINERS mm README samples security tools virt [ perf record: Woken up 1 times to write data ] [ perf record: Captured and wrote 0.012 MB perf.data (11 samples) ] $ tools/perf/perf buildid-list 574da826c66538a8d9060d393a8866289bd06005 [kernel.kallsyms] 30c94dc66a1fe95180c3d68d2b89e576d5ae213c /lib/x86_64-linux-gnu/libc-2.19.so $ tools/perf/perf buildid-list -H 574da826c66538a8d9060d393a8866289bd06005 [kernel.kallsyms] 0000000000000000000000000000000000000000 /lib/x86_64-linux-gnu/libc-2.19.so After: $ tools/perf/perf buildid-list -H 574da826c66538a8d9060d393a8866289bd06005 [kernel.kallsyms] 30c94dc66a1fe95180c3d68d2b89e576d5ae213c /lib/x86_64-linux-gnu/libc-2.19.so The fix is to record the root of the tree on the dso so that dso__set_long_name() can update the tree when the long name changes. Signed-off-by: Adrian Hunter Tested-by: Arnaldo Carvalho de Melo Cc: Adrian Hunter Cc: Don Zickus Cc: Douglas Hatch Cc: Jiri Olsa Cc: Namhyung Kim Cc: Peter Zijlstra Cc: Scott J Norton Cc: Waiman Long Fixes: 4598a0a6d22f ("perf symbols: Improve DSO long names lookup speed with rbtree") Link: http://lkml.kernel.org/r/1447408112-1920-2-git-send-email-adrian.hunter@intel.com Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/util/dso.c | 17 +++++++++++++++++ tools/perf/util/dso.h | 1 + tools/perf/util/machine.c | 1 + 3 files changed, 19 insertions(+) diff --git a/tools/perf/util/dso.c b/tools/perf/util/dso.c index 7c0c08386a1d..425df5c86c9c 100644 --- a/tools/perf/util/dso.c +++ b/tools/perf/util/dso.c @@ -933,6 +933,7 @@ static struct dso *__dso__findlink_by_longname(struct rb_root *root, /* Add new node and rebalance tree */ rb_link_node(&dso->rb_node, parent, p); rb_insert_color(&dso->rb_node, root); + dso->root = root; } return NULL; } @@ -945,15 +946,30 @@ static inline struct dso *__dso__find_by_longname(struct rb_root *root, void dso__set_long_name(struct dso *dso, const char *name, bool name_allocated) { + struct rb_root *root = dso->root; + if (name == NULL) return; if (dso->long_name_allocated) free((char *)dso->long_name); + if (root) { + rb_erase(&dso->rb_node, root); + /* + * __dso__findlink_by_longname() isn't guaranteed to add it + * back, so a clean removal is required here. + */ + RB_CLEAR_NODE(&dso->rb_node); + dso->root = NULL; + } + dso->long_name = name; dso->long_name_len = strlen(name); dso->long_name_allocated = name_allocated; + + if (root) + __dso__findlink_by_longname(root, dso, NULL); } void dso__set_short_name(struct dso *dso, const char *name, bool name_allocated) @@ -1046,6 +1062,7 @@ struct dso *dso__new(const char *name) dso->kernel = DSO_TYPE_USER; dso->needs_swap = DSO_SWAP__UNSET; RB_CLEAR_NODE(&dso->rb_node); + dso->root = NULL; INIT_LIST_HEAD(&dso->node); INIT_LIST_HEAD(&dso->data.open_entry); pthread_mutex_init(&dso->lock, NULL); diff --git a/tools/perf/util/dso.h b/tools/perf/util/dso.h index fc8db9c764ac..45ec4d0a50ed 100644 --- a/tools/perf/util/dso.h +++ b/tools/perf/util/dso.h @@ -135,6 +135,7 @@ struct dso { pthread_mutex_t lock; struct list_head node; struct rb_node rb_node; /* rbtree node sorted by long name */ + struct rb_root *root; /* root of rbtree that rb_node is in */ struct rb_root symbols[MAP__NR_TYPES]; struct rb_root symbol_names[MAP__NR_TYPES]; struct { diff --git a/tools/perf/util/machine.c b/tools/perf/util/machine.c index 5ef90be2a249..8b303ff20289 100644 --- a/tools/perf/util/machine.c +++ b/tools/perf/util/machine.c @@ -91,6 +91,7 @@ static void dsos__purge(struct dsos *dsos) list_for_each_entry_safe(pos, n, &dsos->head, node) { RB_CLEAR_NODE(&pos->rb_node); + pos->root = NULL; list_del_init(&pos->node); dso__put(pos); } From 1216b65c502e0f130cc9984dfd5f9e1968c1eb46 Mon Sep 17 00:00:00 2001 From: Adrian Hunter Date: Fri, 13 Nov 2015 11:48:31 +0200 Subject: [PATCH 019/291] perf buildid-list: Requires ordered events 'perf buildid-list' processes events to determine hits (i.e. with-hits option). That may not work if events are not sorted in order. i.e. MMAP events must be processed before the samples that depend on them so that sample processing can 'hit' the DSO to which the MMAP refers. Signed-off-by: Adrian Hunter Cc: Jiri Olsa Link: http://lkml.kernel.org/r/1447408112-1920-3-git-send-email-adrian.hunter@intel.com Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/util/build-id.c | 1 + 1 file changed, 1 insertion(+) diff --git a/tools/perf/util/build-id.c b/tools/perf/util/build-id.c index d909459fb54c..217b5a60e2ab 100644 --- a/tools/perf/util/build-id.c +++ b/tools/perf/util/build-id.c @@ -76,6 +76,7 @@ struct perf_tool build_id__mark_dso_hit_ops = { .exit = perf_event__exit_del_thread, .attr = perf_event__process_attr, .build_id = perf_event__process_build_id, + .ordered_events = true, }; int build_id__sprintf(const u8 *build_id, int len, char *bf) From d8145b3e30a24280c396d88c8703c50a1ea0aa3a Mon Sep 17 00:00:00 2001 From: Adrian Hunter Date: Fri, 13 Nov 2015 11:48:32 +0200 Subject: [PATCH 020/291] perf inject: Also re-pipe lost_samples event perf inject must re-pipe all events otherwise they get dropped from the output file. Signed-off-by: Adrian Hunter Cc: Jiri Olsa Link: http://lkml.kernel.org/r/1447408112-1920-4-git-send-email-adrian.hunter@intel.com Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/builtin-inject.c | 1 + 1 file changed, 1 insertion(+) diff --git a/tools/perf/builtin-inject.c b/tools/perf/builtin-inject.c index 0a945d2e8ca5..99d127fe9c35 100644 --- a/tools/perf/builtin-inject.c +++ b/tools/perf/builtin-inject.c @@ -675,6 +675,7 @@ int cmd_inject(int argc, const char **argv, const char *prefix __maybe_unused) .fork = perf_event__repipe, .exit = perf_event__repipe, .lost = perf_event__repipe, + .lost_samples = perf_event__repipe, .aux = perf_event__repipe, .itrace_start = perf_event__repipe, .context_switch = perf_event__repipe, From 0196e787ceb58cdfea822482ec70019bc16cbd51 Mon Sep 17 00:00:00 2001 From: Masami Hiramatsu Date: Fri, 13 Nov 2015 12:29:10 +0000 Subject: [PATCH 021/291] perf probe: Fix memory leaking on failure by clearing all probe_trace_events Fix memory leaking on the debuginfo__find_trace_events() failure path which frees an array of probe_trace_events but doesn't clears all the allocated sub-structures and strings. So, before doing zfree(tevs), clear all the array elements which may have allocated resources. Reported-by: Wang Nan Signed-off-by: Masami Hiramatsu Cc: Alexei Starovoitov Cc: Zefan Li Cc: pi3orama@163.com Link: http://lkml.kernel.org/r/1447417761-156094-2-git-send-email-wangnan0@huawei.com Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/util/probe-finder.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/tools/perf/util/probe-finder.c b/tools/perf/util/probe-finder.c index bd8f03de5e40..63993d7e0fac 100644 --- a/tools/perf/util/probe-finder.c +++ b/tools/perf/util/probe-finder.c @@ -1246,7 +1246,7 @@ int debuginfo__find_trace_events(struct debuginfo *dbg, struct trace_event_finder tf = { .pf = {.pev = pev, .callback = add_probe_trace_event}, .max_tevs = probe_conf.max_probes, .mod = dbg->mod}; - int ret; + int ret, i; /* Allocate result tevs array */ *tevs = zalloc(sizeof(struct probe_trace_event) * tf.max_tevs); @@ -1258,6 +1258,8 @@ int debuginfo__find_trace_events(struct debuginfo *dbg, ret = debuginfo__find_probes(dbg, &tf.pf); if (ret < 0) { + for (i = 0; i < tf.ntevs; i++) + clear_probe_trace_event(&tf.tevs[i]); zfree(tevs); return ret; } From 092b1f0b5f9f797812da0de927c3aa26acbe8762 Mon Sep 17 00:00:00 2001 From: Wang Nan Date: Fri, 13 Nov 2015 12:29:11 +0000 Subject: [PATCH 022/291] perf probe: Clear probe_trace_event when add_probe_trace_event() fails When probing with a glob, errors in add_probe_trace_event() won't be passed to debuginfo__find_trace_events() because it would be modified by probe_point_search_cb(). It causes a segfault if perf fails to find an argument for a probe point matched by the glob. For example: # ./perf probe -v -n 'SyS_dup? oldfd' probe-definition(0): SyS_dup? oldfd symbol:SyS_dup? file:(null) line:0 offset:0 return:0 lazy:(null) parsing arg: oldfd into oldfd 1 arguments Looking at the vmlinux_path (7 entries long) Using /lib/modules/4.3.0-rc4+/build/vmlinux for symbols Open Debuginfo file: /lib/modules/4.3.0-rc4+/build/vmlinux Try to find probe point from debuginfo. Matched function: SyS_dup3 found inline addr: 0xffffffff812095c0 Probe point found: SyS_dup3+0 Searching 'oldfd' variable in context. Converting variable oldfd into trace event. oldfd type is long int. found inline addr: 0xffffffff812096d4 Probe point found: SyS_dup2+36 Searching 'oldfd' variable in context. Failed to find 'oldfd' in this function. Matched function: SyS_dup3 Probe point found: SyS_dup3+0 Searching 'oldfd' variable in context. Converting variable oldfd into trace event. oldfd type is long int. Matched function: SyS_dup2 Probe point found: SyS_dup2+0 Searching 'oldfd' variable in context. Converting variable oldfd into trace event. oldfd type is long int. Found 4 probe_trace_events. Opening /sys/kernel/debug/tracing//kprobe_events write=1 Writing event: p:probe/SyS_dup3 _text+2135488 oldfd=%di:s64 Segmentation fault (core dumped) # This patch ensures that add_probe_trace_event() doesn't touches tf->ntevs and tf->tevs if those functions fail. After the patch: # perf probe 'SyS_dup? oldfd' Failed to find 'oldfd' in this function. Added new events: probe:SyS_dup3 (on SyS_dup? with oldfd) probe:SyS_dup3_1 (on SyS_dup? with oldfd) probe:SyS_dup2 (on SyS_dup? with oldfd) You can now use it in all perf tools, such as: perf record -e probe:SyS_dup2 -aR sleep 1 Signed-off-by: Wang Nan Tested-by: Arnaldo Carvalho de Melo Cc: Alexei Starovoitov Cc: Masami Hiramatsu Cc: Zefan Li Cc: pi3orama@163.com Link: http://lkml.kernel.org/r/1447417761-156094-3-git-send-email-wangnan0@huawei.com Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/util/probe-finder.c | 20 ++++++++++++++------ 1 file changed, 14 insertions(+), 6 deletions(-) diff --git a/tools/perf/util/probe-finder.c b/tools/perf/util/probe-finder.c index 63993d7e0fac..05012bb178d7 100644 --- a/tools/perf/util/probe-finder.c +++ b/tools/perf/util/probe-finder.c @@ -1183,7 +1183,7 @@ static int add_probe_trace_event(Dwarf_Die *sc_die, struct probe_finder *pf) container_of(pf, struct trace_event_finder, pf); struct perf_probe_point *pp = &pf->pev->point; struct probe_trace_event *tev; - struct perf_probe_arg *args; + struct perf_probe_arg *args = NULL; int ret, i; /* Check number of tevs */ @@ -1198,19 +1198,23 @@ static int add_probe_trace_event(Dwarf_Die *sc_die, struct probe_finder *pf) ret = convert_to_trace_point(&pf->sp_die, tf->mod, pf->addr, pp->retprobe, pp->function, &tev->point); if (ret < 0) - return ret; + goto end; tev->point.realname = strdup(dwarf_diename(sc_die)); - if (!tev->point.realname) - return -ENOMEM; + if (!tev->point.realname) { + ret = -ENOMEM; + goto end; + } pr_debug("Probe point found: %s+%lu\n", tev->point.symbol, tev->point.offset); /* Expand special probe argument if exist */ args = zalloc(sizeof(struct perf_probe_arg) * MAX_PROBE_ARGS); - if (args == NULL) - return -ENOMEM; + if (args == NULL) { + ret = -ENOMEM; + goto end; + } ret = expand_probe_args(sc_die, pf, args); if (ret < 0) @@ -1234,6 +1238,10 @@ static int add_probe_trace_event(Dwarf_Die *sc_die, struct probe_finder *pf) } end: + if (ret) { + clear_probe_trace_event(tev); + tf->ntevs--; + } free(args); return ret; } From ec0adc539b8bf59b7c00db0748671f6594b77843 Mon Sep 17 00:00:00 2001 From: Len Brown Date: Thu, 12 Nov 2015 02:42:31 -0500 Subject: [PATCH 023/291] tools/power turbostat: use new name for MSR_PLATFORM_INFO MSR_PLATFORM_INFO is the new name for MSR_NHM_PLATFORM_INFO no functional change Signed-off-by: Len Brown Signed-off-by: Rafael J. Wysocki --- tools/power/x86/turbostat/turbostat.c | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/tools/power/x86/turbostat/turbostat.c b/tools/power/x86/turbostat/turbostat.c index d8e4b20b6d54..0dac7e05a6ac 100644 --- a/tools/power/x86/turbostat/turbostat.c +++ b/tools/power/x86/turbostat/turbostat.c @@ -1173,9 +1173,9 @@ dump_nhm_platform_info(void) unsigned long long msr; unsigned int ratio; - get_msr(base_cpu, MSR_NHM_PLATFORM_INFO, &msr); + get_msr(base_cpu, MSR_PLATFORM_INFO, &msr); - fprintf(stderr, "cpu%d: MSR_NHM_PLATFORM_INFO: 0x%08llx\n", base_cpu, msr); + fprintf(stderr, "cpu%d: MSR_PLATFORM_INFO: 0x%08llx\n", base_cpu, msr); ratio = (msr >> 40) & 0xFF; fprintf(stderr, "%d * %.0f = %.0f MHz max efficiency frequency\n", @@ -1807,7 +1807,7 @@ void check_permissions() * * MSR_SMI_COUNT 0x00000034 * - * MSR_NHM_PLATFORM_INFO 0x000000ce + * MSR_PLATFORM_INFO 0x000000ce * MSR_NHM_SNB_PKG_CST_CFG_CTL 0x000000e2 * * MSR_PKG_C3_RESIDENCY 0x000003f8 @@ -1876,7 +1876,7 @@ int probe_nhm_msrs(unsigned int family, unsigned int model) get_msr(base_cpu, MSR_NHM_SNB_PKG_CST_CFG_CTL, &msr); pkg_cstate_limit = pkg_cstate_limits[msr & 0xF]; - get_msr(base_cpu, MSR_NHM_PLATFORM_INFO, &msr); + get_msr(base_cpu, MSR_PLATFORM_INFO, &msr); base_ratio = (msr >> 8) & 0xFF; base_hz = base_ratio * bclk * 1000000; From 5369a21e3f26ef9d2bf6ea1b322d6899a4ed08e0 Mon Sep 17 00:00:00 2001 From: Len Brown Date: Thu, 12 Nov 2015 02:42:32 -0500 Subject: [PATCH 024/291] x86: remove unused definition of MSR_NHM_PLATFORM_INFO MSR_NHM_PLATFORM_INFO has been replaced by... MSR_PLATFORM_INFO Signed-off-by: Len Brown Signed-off-by: Rafael J. Wysocki --- arch/x86/include/asm/msr-index.h | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/arch/x86/include/asm/msr-index.h b/arch/x86/include/asm/msr-index.h index b8c14bb7fc8f..705c4082b7f1 100644 --- a/arch/x86/include/asm/msr-index.h +++ b/arch/x86/include/asm/msr-index.h @@ -35,7 +35,7 @@ #define MSR_IA32_PERFCTR0 0x000000c1 #define MSR_IA32_PERFCTR1 0x000000c2 #define MSR_FSB_FREQ 0x000000cd -#define MSR_NHM_PLATFORM_INFO 0x000000ce +#define MSR_PLATFORM_INFO 0x000000ce #define MSR_NHM_SNB_PKG_CST_CFG_CTL 0x000000e2 #define NHM_C3_AUTO_DEMOTE (1UL << 25) @@ -44,7 +44,6 @@ #define SNB_C1_AUTO_UNDEMOTE (1UL << 27) #define SNB_C3_AUTO_UNDEMOTE (1UL << 28) -#define MSR_PLATFORM_INFO 0x000000ce #define MSR_MTRRcap 0x000000fe #define MSR_IA32_BBL_CR_CTL 0x00000119 #define MSR_IA32_BBL_CR_CTL3 0x0000011e From d29d67357db8dad08f97162bbeb51e4975ee67fd Mon Sep 17 00:00:00 2001 From: Ashwin Chaugule Date: Thu, 12 Nov 2015 19:52:30 -0500 Subject: [PATCH 025/291] ACPI / CPPC: Use h/w reduced version of the PCCT structure CPPC is enabled only on platforms which support the h/w reduced ACPI specification, so use the h/w reduced version of the PCCT consistently when deferencing PCCT contents. Fixes: 337aadff8e45 (ACPI: Introduce CPU performance controls using CPPC) Signed-off-by: Ashwin Chaugule Signed-off-by: Rafael J. Wysocki --- drivers/acpi/cppc_acpi.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/acpi/cppc_acpi.c b/drivers/acpi/cppc_acpi.c index 0bbf84bdcdc4..9361966f1b61 100644 --- a/drivers/acpi/cppc_acpi.c +++ b/drivers/acpi/cppc_acpi.c @@ -304,7 +304,7 @@ EXPORT_SYMBOL_GPL(acpi_get_psd_map); static int register_pcc_channel(unsigned pcc_subspace_idx) { - struct acpi_pcct_subspace *cppc_ss; + struct acpi_pcct_hw_reduced *cppc_ss; unsigned int len; if (pcc_subspace_idx >= 0) { From 0d2f6fd3ffd5e7a447233a57112246bc00064752 Mon Sep 17 00:00:00 2001 From: Chris Lesiak Date: Mon, 2 Nov 2015 15:45:03 -0600 Subject: [PATCH 026/291] iio: si7020: Swap data byte order The Silicon Labs Si7013, Si7020, and Si7021 family of I2C humidity and temperature sensors deliver 16 bit data high byte first. See the datasheet available at: https://www.silabs.com/Support%20Documents%2fTechnicalDocs%2fSi7020-A20.pdf But as documented in Documentation/i2c/smbus-protocol, i2c_smbus_read_word_data() expects the low byte first. Change the driver to use i2c_smbus_read_word_swapped to get correct byte order. Signed-off-by: Chris Lesiak Cc: Signed-off-by: Jonathan Cameron --- drivers/iio/humidity/si7020.c | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/drivers/iio/humidity/si7020.c b/drivers/iio/humidity/si7020.c index fa3b809aff5e..1b6935d99100 100644 --- a/drivers/iio/humidity/si7020.c +++ b/drivers/iio/humidity/si7020.c @@ -50,10 +50,10 @@ static int si7020_read_raw(struct iio_dev *indio_dev, switch (mask) { case IIO_CHAN_INFO_RAW: - ret = i2c_smbus_read_word_data(*client, - chan->type == IIO_TEMP ? - SI7020CMD_TEMP_HOLD : - SI7020CMD_RH_HOLD); + ret = i2c_smbus_read_word_swapped(*client, + chan->type == IIO_TEMP ? + SI7020CMD_TEMP_HOLD : + SI7020CMD_RH_HOLD); if (ret < 0) return ret; *val = ret >> 2; From a57f8dac46fbac5ab0e31aef1a98b3f6eb30c079 Mon Sep 17 00:00:00 2001 From: Thomas Betker Date: Wed, 11 Nov 2015 21:24:38 +0100 Subject: [PATCH 027/291] iio: adc: xilinx: Fix VREFN scale The scaling factor for VREFN is 3.0/4096 (not 1.0/4096), just as for VREFP. This is not immediately obvious from the specification (Xilinx UG480), but has been confirmed by Xilinx support. Suggested-by: Hartmut Knaack Signed-off-by: Thomas Betker Cc: Signed-off-by: Jonathan Cameron --- drivers/iio/adc/xilinx-xadc-core.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/iio/adc/xilinx-xadc-core.c b/drivers/iio/adc/xilinx-xadc-core.c index ce93bd8e3f68..95c0b3d9890c 100644 --- a/drivers/iio/adc/xilinx-xadc-core.c +++ b/drivers/iio/adc/xilinx-xadc-core.c @@ -857,6 +857,7 @@ static int xadc_read_raw(struct iio_dev *indio_dev, case XADC_REG_VCCINT: case XADC_REG_VCCAUX: case XADC_REG_VREFP: + case XADC_REG_VREFN: case XADC_REG_VCCBRAM: case XADC_REG_VCCPINT: case XADC_REG_VCCPAUX: From 0cfec916e86d881e209de4b4ae9959a6271e6660 Mon Sep 17 00:00:00 2001 From: Corey Minyard Date: Sat, 5 Sep 2015 17:44:13 -0500 Subject: [PATCH 028/291] ipmi: Start the timer and thread on internal msgs The timer and thread were not being started for internal messages, so in interrupt mode if something hung the timer would never go off and clean things up. Factor out the internal message sending and start the timer for those messages, too. Signed-off-by: Corey Minyard Tested-by: Gouji, Masayuki Cc: stable@vger.kernel.org --- drivers/char/ipmi/ipmi_si_intf.c | 73 +++++++++++++++++++------------- 1 file changed, 44 insertions(+), 29 deletions(-) diff --git a/drivers/char/ipmi/ipmi_si_intf.c b/drivers/char/ipmi/ipmi_si_intf.c index 654f6f36a071..20c3d7b97602 100644 --- a/drivers/char/ipmi/ipmi_si_intf.c +++ b/drivers/char/ipmi/ipmi_si_intf.c @@ -412,18 +412,42 @@ static enum si_sm_result start_next_msg(struct smi_info *smi_info) return rv; } -static void start_check_enables(struct smi_info *smi_info) +static void smi_mod_timer(struct smi_info *smi_info, unsigned long new_val) +{ + smi_info->last_timeout_jiffies = jiffies; + mod_timer(&smi_info->si_timer, new_val); + smi_info->timer_running = true; +} + +/* + * Start a new message and (re)start the timer and thread. + */ +static void start_new_msg(struct smi_info *smi_info, unsigned char *msg, + unsigned int size) +{ + smi_mod_timer(smi_info, jiffies + SI_TIMEOUT_JIFFIES); + + if (smi_info->thread) + wake_up_process(smi_info->thread); + + smi_info->handlers->start_transaction(smi_info->si_sm, msg, size); +} + +static void start_check_enables(struct smi_info *smi_info, bool start_timer) { unsigned char msg[2]; msg[0] = (IPMI_NETFN_APP_REQUEST << 2); msg[1] = IPMI_GET_BMC_GLOBAL_ENABLES_CMD; - smi_info->handlers->start_transaction(smi_info->si_sm, msg, 2); + if (start_timer) + start_new_msg(smi_info, msg, 2); + else + smi_info->handlers->start_transaction(smi_info->si_sm, msg, 2); smi_info->si_state = SI_CHECKING_ENABLES; } -static void start_clear_flags(struct smi_info *smi_info) +static void start_clear_flags(struct smi_info *smi_info, bool start_timer) { unsigned char msg[3]; @@ -432,7 +456,10 @@ static void start_clear_flags(struct smi_info *smi_info) msg[1] = IPMI_CLEAR_MSG_FLAGS_CMD; msg[2] = WDT_PRE_TIMEOUT_INT; - smi_info->handlers->start_transaction(smi_info->si_sm, msg, 3); + if (start_timer) + start_new_msg(smi_info, msg, 3); + else + smi_info->handlers->start_transaction(smi_info->si_sm, msg, 3); smi_info->si_state = SI_CLEARING_FLAGS; } @@ -442,10 +469,8 @@ static void start_getting_msg_queue(struct smi_info *smi_info) smi_info->curr_msg->data[1] = IPMI_GET_MSG_CMD; smi_info->curr_msg->data_size = 2; - smi_info->handlers->start_transaction( - smi_info->si_sm, - smi_info->curr_msg->data, - smi_info->curr_msg->data_size); + start_new_msg(smi_info, smi_info->curr_msg->data, + smi_info->curr_msg->data_size); smi_info->si_state = SI_GETTING_MESSAGES; } @@ -455,20 +480,11 @@ static void start_getting_events(struct smi_info *smi_info) smi_info->curr_msg->data[1] = IPMI_READ_EVENT_MSG_BUFFER_CMD; smi_info->curr_msg->data_size = 2; - smi_info->handlers->start_transaction( - smi_info->si_sm, - smi_info->curr_msg->data, - smi_info->curr_msg->data_size); + start_new_msg(smi_info, smi_info->curr_msg->data, + smi_info->curr_msg->data_size); smi_info->si_state = SI_GETTING_EVENTS; } -static void smi_mod_timer(struct smi_info *smi_info, unsigned long new_val) -{ - smi_info->last_timeout_jiffies = jiffies; - mod_timer(&smi_info->si_timer, new_val); - smi_info->timer_running = true; -} - /* * When we have a situtaion where we run out of memory and cannot * allocate messages, we just leave them in the BMC and run the system @@ -478,11 +494,11 @@ static void smi_mod_timer(struct smi_info *smi_info, unsigned long new_val) * Note that we cannot just use disable_irq(), since the interrupt may * be shared. */ -static inline bool disable_si_irq(struct smi_info *smi_info) +static inline bool disable_si_irq(struct smi_info *smi_info, bool start_timer) { if ((smi_info->irq) && (!smi_info->interrupt_disabled)) { smi_info->interrupt_disabled = true; - start_check_enables(smi_info); + start_check_enables(smi_info, start_timer); return true; } return false; @@ -492,7 +508,7 @@ static inline bool enable_si_irq(struct smi_info *smi_info) { if ((smi_info->irq) && (smi_info->interrupt_disabled)) { smi_info->interrupt_disabled = false; - start_check_enables(smi_info); + start_check_enables(smi_info, true); return true; } return false; @@ -510,7 +526,7 @@ static struct ipmi_smi_msg *alloc_msg_handle_irq(struct smi_info *smi_info) msg = ipmi_alloc_smi_msg(); if (!msg) { - if (!disable_si_irq(smi_info)) + if (!disable_si_irq(smi_info, true)) smi_info->si_state = SI_NORMAL; } else if (enable_si_irq(smi_info)) { ipmi_free_smi_msg(msg); @@ -526,7 +542,7 @@ static void handle_flags(struct smi_info *smi_info) /* Watchdog pre-timeout */ smi_inc_stat(smi_info, watchdog_pretimeouts); - start_clear_flags(smi_info); + start_clear_flags(smi_info, true); smi_info->msg_flags &= ~WDT_PRE_TIMEOUT_INT; if (smi_info->intf) ipmi_smi_watchdog_pretimeout(smi_info->intf); @@ -879,8 +895,7 @@ static enum si_sm_result smi_event_handler(struct smi_info *smi_info, msg[0] = (IPMI_NETFN_APP_REQUEST << 2); msg[1] = IPMI_GET_MSG_FLAGS_CMD; - smi_info->handlers->start_transaction( - smi_info->si_sm, msg, 2); + start_new_msg(smi_info, msg, 2); smi_info->si_state = SI_GETTING_FLAGS; goto restart; } @@ -910,7 +925,7 @@ static enum si_sm_result smi_event_handler(struct smi_info *smi_info, * disable and messages disabled. */ if (smi_info->supports_event_msg_buff || smi_info->irq) { - start_check_enables(smi_info); + start_check_enables(smi_info, true); } else { smi_info->curr_msg = alloc_msg_handle_irq(smi_info); if (!smi_info->curr_msg) @@ -3613,7 +3628,7 @@ static int try_smi_init(struct smi_info *new_smi) * Start clearing the flags before we enable interrupts or the * timer to avoid racing with the timer. */ - start_clear_flags(new_smi); + start_clear_flags(new_smi, false); /* * IRQ is defined to be set when non-zero. req_events will @@ -3908,7 +3923,7 @@ static void cleanup_one_si(struct smi_info *to_clean) poll(to_clean); schedule_timeout_uninterruptible(1); } - disable_si_irq(to_clean); + disable_si_irq(to_clean, false); while (to_clean->curr_msg || (to_clean->si_state != SI_NORMAL)) { poll(to_clean); schedule_timeout_uninterruptible(1); From 314ef52fe67f8f03453b69169f954e2d04679bbd Mon Sep 17 00:00:00 2001 From: Corey Minyard Date: Sat, 5 Sep 2015 17:58:13 -0500 Subject: [PATCH 029/291] ipmi: Stop the timer immediately if idle The IPMI driver would let the final timeout just happen, but it could easily just stop the timer. If the timer stop fails that's ok, that should be rare. Signed-off-by: Corey Minyard --- drivers/char/ipmi/ipmi_si_intf.c | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/drivers/char/ipmi/ipmi_si_intf.c b/drivers/char/ipmi/ipmi_si_intf.c index 20c3d7b97602..71b59cb6ae0c 100644 --- a/drivers/char/ipmi/ipmi_si_intf.c +++ b/drivers/char/ipmi/ipmi_si_intf.c @@ -935,6 +935,13 @@ static enum si_sm_result smi_event_handler(struct smi_info *smi_info, } goto restart; } + + if (si_sm_result == SI_SM_IDLE && smi_info->timer_running) { + /* Ok it if fails, the timer will just go off. */ + if (del_timer(&smi_info->si_timer)) + smi_info->timer_running = false; + } + out: return si_sm_result; } From 66f44018300c5e6f53c9d30d6920332cf0e6a8f9 Mon Sep 17 00:00:00 2001 From: Luis de Bethencourt Date: Sat, 19 Sep 2015 16:43:23 +0100 Subject: [PATCH 030/291] char: ipmi: Move MODULE_DEVICE_TABLE() to follow struct The policy for drivers is to have MODULE_DEVICE_TABLE() just after the struct used in it. For clarity. Suggested-by: Corey Minyard Signed-off-by: Luis de Bethencourt Signed-off-by: Corey Minyard --- drivers/char/ipmi/ipmi_si_intf.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/char/ipmi/ipmi_si_intf.c b/drivers/char/ipmi/ipmi_si_intf.c index 71b59cb6ae0c..55fe9020459f 100644 --- a/drivers/char/ipmi/ipmi_si_intf.c +++ b/drivers/char/ipmi/ipmi_si_intf.c @@ -2582,6 +2582,7 @@ static const struct of_device_id of_ipmi_match[] = { .data = (void *)(unsigned long) SI_BT }, {}, }; +MODULE_DEVICE_TABLE(of, of_ipmi_match); static int of_ipmi_probe(struct platform_device *dev) { @@ -2668,7 +2669,6 @@ static int of_ipmi_probe(struct platform_device *dev) } return 0; } -MODULE_DEVICE_TABLE(of, of_ipmi_match); #else #define of_ipmi_match NULL static int of_ipmi_probe(struct platform_device *dev) From 36fa4a530b7798aa85789953b08d94c03fb09fa5 Mon Sep 17 00:00:00 2001 From: Geert Uytterhoeven Date: Sun, 25 Oct 2015 11:09:33 +0100 Subject: [PATCH 031/291] dmaengine: sh: usb-dmac: Fix crash on runtime suspend If CONFIG_PREEMPT=y: Unable to handle kernel NULL pointer dereference at virtual address 00000014 pgd = c0003000 [00000014] *pgd=80000040004003, *pmd=00000000 Internal error: Oops: 206 [#1] PREEMPT SMP ARM Modules linked in: CPU: 0 PID: 17 Comm: kworker/0:1 Tainted: G W 4.3.0-rc3-koelsch-022 71-g705498fc5e6a5da8-dirty #1789 Hardware name: Generic R8A7791 (Flattened Device Tree) Workqueue: pm pm_runtime_work task: ef578e40 ti: ef57a000 task.ti: ef57a000 PC is at usb_dmac_chan_halt+0xc/0xc0 LR is at usb_dmac_runtime_suspend+0x28/0x38 pc : [] lr : [] psr: 80000113 sp : ef57bdf8 ip : 00000008 fp : 00000003 r10: 00000008 r9 : c06ab928 r8 : ef49e810 r7 : 00000000 r6 : 000000ac r5 : ef770010 r4 : 00000000 r3 : 00000000 r2 : 8ffc2b84 r1 : 00000000 r0 : ef770010 Flags: Nzcv IRQs on FIQs on Mode SVC_32 ISA ARM Segment kernel Control: 30c5307d Table: 40003000 DAC: fffffffd Process kworker/0:1 (pid: 17, stack limit = 0xef57a210) Stack: (0xef57bdf8 to 0xef57c000) [... [] (usb_dmac_chan_halt) from [] (usb_dmac_runtime_suspend+0x28/0x38) [] (usb_dmac_runtime_suspend) from [] (pm_genpd_runtime_suspend+0x74/0x23c) This happens because usb_dmac_probe() calls pm_runtime_put() before usb_dmac_chan_probe(), leading to the device being suspended before the DMA channels are initialized, causing a NULL pointer dereference. Move the call to pm_runtime_put() to the end of usb_dmac_probe() to fix this. Add a check to usb_dmac_runtime_suspend() to prevent the crash from happening in the error path. Reported-by: Sergei Shtylyov Signed-off-by: Geert Uytterhoeven Tested-by: Yoshihiro Shimoda Signed-off-by: Vinod Koul --- drivers/dma/sh/usb-dmac.c | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/drivers/dma/sh/usb-dmac.c b/drivers/dma/sh/usb-dmac.c index ebd8a5f398b0..f305738b5adf 100644 --- a/drivers/dma/sh/usb-dmac.c +++ b/drivers/dma/sh/usb-dmac.c @@ -679,8 +679,11 @@ static int usb_dmac_runtime_suspend(struct device *dev) struct usb_dmac *dmac = dev_get_drvdata(dev); int i; - for (i = 0; i < dmac->n_channels; ++i) + for (i = 0; i < dmac->n_channels; ++i) { + if (!dmac->channels[i].iomem) + break; usb_dmac_chan_halt(&dmac->channels[i]); + } return 0; } @@ -803,7 +806,6 @@ static int usb_dmac_probe(struct platform_device *pdev) } ret = usb_dmac_init(dmac); - pm_runtime_put(&pdev->dev); if (ret) { dev_err(&pdev->dev, "failed to reset device\n"); @@ -851,10 +853,12 @@ static int usb_dmac_probe(struct platform_device *pdev) if (ret < 0) goto error; + pm_runtime_put(&pdev->dev); return 0; error: of_dma_controller_free(pdev->dev.of_node); + pm_runtime_put(&pdev->dev); pm_runtime_disable(&pdev->dev); return ret; } From bf55555baaf80cdf2cc4176fee02545a07a8ff4a Mon Sep 17 00:00:00 2001 From: Geert Uytterhoeven Date: Sun, 25 Oct 2015 11:09:34 +0100 Subject: [PATCH 032/291] dmaengine: sh: usb-dmac: Fix pm_runtime_{enable,disable}() imbalance If the call to pm_runtime_get_sync() failed, Runtime PM was left enabled. Signed-off-by: Geert Uytterhoeven Signed-off-by: Vinod Koul --- drivers/dma/sh/usb-dmac.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/drivers/dma/sh/usb-dmac.c b/drivers/dma/sh/usb-dmac.c index f305738b5adf..f1bcc2a163b3 100644 --- a/drivers/dma/sh/usb-dmac.c +++ b/drivers/dma/sh/usb-dmac.c @@ -802,7 +802,7 @@ static int usb_dmac_probe(struct platform_device *pdev) ret = pm_runtime_get_sync(&pdev->dev); if (ret < 0) { dev_err(&pdev->dev, "runtime PM get sync failed (%d)\n", ret); - return ret; + goto error_pm; } ret = usb_dmac_init(dmac); @@ -859,6 +859,7 @@ static int usb_dmac_probe(struct platform_device *pdev) error: of_dma_controller_free(pdev->dev.of_node); pm_runtime_put(&pdev->dev); +error_pm: pm_runtime_disable(&pdev->dev); return ret; } From 500404ebcbd074ca11aa0c3fd9a268aa4054fd8b Mon Sep 17 00:00:00 2001 From: Peter Ujfalusi Date: Tue, 3 Nov 2015 12:28:10 +0200 Subject: [PATCH 033/291] dmaengine: of_dma: Correct return code for of_dma_request_slave_channel in case !CONFIG_OF of_dma_request_slave_channel should return either pointer for valid dma_chan or ERR_PTR() error code, NULL is not expected to be returned. Signed-off-by: Peter Ujfalusi Acked-by: Arnd Bergmann Signed-off-by: Vinod Koul --- include/linux/of_dma.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/include/linux/of_dma.h b/include/linux/of_dma.h index 36112cdd665a..b90d8ec57c1f 100644 --- a/include/linux/of_dma.h +++ b/include/linux/of_dma.h @@ -80,7 +80,7 @@ static inline int of_dma_router_register(struct device_node *np, static inline struct dma_chan *of_dma_request_slave_channel(struct device_node *np, const char *name) { - return NULL; + return ERR_PTR(-ENODEV); } static inline struct dma_chan *of_dma_simple_xlate(struct of_phandle_args *dma_spec, From 638bdc8ce845285e1112f4870aded1638187fc98 Mon Sep 17 00:00:00 2001 From: Arnd Bergmann Date: Tue, 3 Nov 2015 15:00:57 +0100 Subject: [PATCH 034/291] dmaengine: edma: fix build without CONFIG_OF During the edma rework, a build error was introduced for the case that CONFIG_OF is disabled: drivers/built-in.o: In function `edma_tc_set_pm_state': :(.text+0x43bf0): undefined reference to `of_find_device_by_node' As the edma_tc_set_pm_state() function does nothing in case we are running without OF, this adds an IS_ENABLED() check that turns the function into an empty stub then and avoids the link error. Signed-off-by: Arnd Bergmann Fixes: ca304fa9bb76 ("ARM/dmaengine: edma: Public API to use private struct pointer") Acked-by: Peter Ujfalusi Signed-off-by: Vinod Koul --- drivers/dma/edma.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/dma/edma.c b/drivers/dma/edma.c index 6b03e4e84e6b..c69d22c0b9ba 100644 --- a/drivers/dma/edma.c +++ b/drivers/dma/edma.c @@ -1565,7 +1565,7 @@ static void edma_tc_set_pm_state(struct edma_tc *tc, bool enable) struct platform_device *tc_pdev; int ret; - if (!tc) + if (!IS_ENABLED(CONFIG_OF) || !tc) return; tc_pdev = of_find_device_by_node(tc->node); From f5ea7ad252452f0b461e04a0d0e468f1f6ad8b3b Mon Sep 17 00:00:00 2001 From: Dan Carpenter Date: Wed, 4 Nov 2015 16:38:31 +0300 Subject: [PATCH 035/291] dmaengine: edma: predecence bug in GET_NUM_QDMACH() The current code uses bits 0-2 instead of 4-6 as the comment says. Fixes: 633e42b8c546 ('dmaengine: edma: Get qDMA channel information from HW also') Signed-off-by: Dan Carpenter Acked-by: Peter Ujfalusi Signed-off-by: Vinod Koul --- drivers/dma/edma.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/dma/edma.c b/drivers/dma/edma.c index c69d22c0b9ba..0675e268d577 100644 --- a/drivers/dma/edma.c +++ b/drivers/dma/edma.c @@ -107,7 +107,7 @@ /* CCCFG register */ #define GET_NUM_DMACH(x) (x & 0x7) /* bits 0-2 */ -#define GET_NUM_QDMACH(x) (x & 0x70 >> 4) /* bits 4-6 */ +#define GET_NUM_QDMACH(x) ((x & 0x70) >> 4) /* bits 4-6 */ #define GET_NUM_PAENTRY(x) ((x & 0x7000) >> 12) /* bits 12-14 */ #define GET_NUM_EVQUE(x) ((x & 0x70000) >> 16) /* bits 16-18 */ #define GET_NUM_REGN(x) ((x & 0x300000) >> 20) /* bits 20-21 */ From 29f493dafa6113d28763e9efb853e6972b41ade5 Mon Sep 17 00:00:00 2001 From: Jason Liu Date: Wed, 11 Nov 2015 17:20:49 +0800 Subject: [PATCH 036/291] dmaengine: imx-sdma: remove __init annotation on sdma_event_remap The sdma_probe function will call sdma_event_remap, but sdma_event_remap marked with the __init annotation which make the kbuild complains as the following log: WARNING: drivers/dma/built-in.o(.text+0x56fc): Section mismatch in reference from the function sdma_probe() to the function .init.text:sdma_event_remap() The function sdma_probe() references the function __init sdma_event_remap(). This is often because sdma_probe lacks a __init annotation or the annotation of sdma_event_remap is wrong. Remove the __init annotation on sdma_event_remap to kill this build warning Signed-off-by: Jason Liu Acked-by: Arnd Bergmann Signed-off-by: Vinod Koul --- drivers/dma/imx-sdma.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/dma/imx-sdma.c b/drivers/dma/imx-sdma.c index 7058d58ba588..0f6fd42f55ca 100644 --- a/drivers/dma/imx-sdma.c +++ b/drivers/dma/imx-sdma.c @@ -1462,7 +1462,7 @@ err_firmware: #define EVENT_REMAP_CELLS 3 -static int __init sdma_event_remap(struct sdma_engine *sdma) +static int sdma_event_remap(struct sdma_engine *sdma) { struct device_node *np = sdma->dev->of_node; struct device_node *gpr_np = of_parse_phandle(np, "gpr", 0); From 268914f4e7a013a4798b5544cce9d9584de99889 Mon Sep 17 00:00:00 2001 From: Arnd Bergmann Date: Thu, 12 Nov 2015 15:16:53 +0100 Subject: [PATCH 037/291] dmaengine: at_xdmac: use %pad format string for dma_addr_t dma_addr_t may be defined as 32 or 64 bit depending on configuration, so it cannot be printed using the normal format strings, as gcc correctly warns: drivers/dma/at_xdmac.c: In function 'at_xdmac_interleaved_queue_desc': drivers/dma/at_xdmac.c:922:51: warning: format '%x' expects argument of type 'unsigned int', but argument 5 has type 'dma_addr_t {aka long long unsigned int}' [-Wformat=] This changes the format strings to use the special "%pad" format string that prints a dma_addr_t, and changes the arguments so we pass the address by reference as required. Signed-off-by: Arnd Bergmann Signed-off-by: Vinod Koul --- drivers/dma/at_xdmac.c | 20 ++++++++++---------- 1 file changed, 10 insertions(+), 10 deletions(-) diff --git a/drivers/dma/at_xdmac.c b/drivers/dma/at_xdmac.c index b5e132d4bae5..7f039de143f0 100644 --- a/drivers/dma/at_xdmac.c +++ b/drivers/dma/at_xdmac.c @@ -920,8 +920,8 @@ at_xdmac_interleaved_queue_desc(struct dma_chan *chan, desc->lld.mbr_cfg = chan_cc; dev_dbg(chan2dev(chan), - "%s: lld: mbr_sa=0x%08x, mbr_da=0x%08x, mbr_ubc=0x%08x, mbr_cfg=0x%08x\n", - __func__, desc->lld.mbr_sa, desc->lld.mbr_da, + "%s: lld: mbr_sa=%pad, mbr_da=%pad, mbr_ubc=0x%08x, mbr_cfg=0x%08x\n", + __func__, &desc->lld.mbr_sa, &desc->lld.mbr_da, desc->lld.mbr_ubc, desc->lld.mbr_cfg); /* Chain lld. */ @@ -953,8 +953,8 @@ at_xdmac_prep_interleaved(struct dma_chan *chan, if ((xt->numf > 1) && (xt->frame_size > 1)) return NULL; - dev_dbg(chan2dev(chan), "%s: src=0x%08x, dest=0x%08x, numf=%d, frame_size=%d, flags=0x%lx\n", - __func__, xt->src_start, xt->dst_start, xt->numf, + dev_dbg(chan2dev(chan), "%s: src=%pad, dest=%pad, numf=%d, frame_size=%d, flags=0x%lx\n", + __func__, &xt->src_start, &xt->dst_start, xt->numf, xt->frame_size, flags); src_addr = xt->src_start; @@ -1179,8 +1179,8 @@ static struct at_xdmac_desc *at_xdmac_memset_create_desc(struct dma_chan *chan, desc->lld.mbr_cfg = chan_cc; dev_dbg(chan2dev(chan), - "%s: lld: mbr_da=0x%08x, mbr_ds=0x%08x, mbr_ubc=0x%08x, mbr_cfg=0x%08x\n", - __func__, desc->lld.mbr_da, desc->lld.mbr_ds, desc->lld.mbr_ubc, + "%s: lld: mbr_da=%pad, mbr_ds=%pad, mbr_ubc=0x%08x, mbr_cfg=0x%08x\n", + __func__, &desc->lld.mbr_da, &desc->lld.mbr_ds, desc->lld.mbr_ubc, desc->lld.mbr_cfg); return desc; @@ -1193,8 +1193,8 @@ at_xdmac_prep_dma_memset(struct dma_chan *chan, dma_addr_t dest, int value, struct at_xdmac_chan *atchan = to_at_xdmac_chan(chan); struct at_xdmac_desc *desc; - dev_dbg(chan2dev(chan), "%s: dest=0x%08x, len=%d, pattern=0x%x, flags=0x%lx\n", - __func__, dest, len, value, flags); + dev_dbg(chan2dev(chan), "%s: dest=%pad, len=%d, pattern=0x%x, flags=0x%lx\n", + __func__, &dest, len, value, flags); if (unlikely(!len)) return NULL; @@ -1229,8 +1229,8 @@ at_xdmac_prep_dma_memset_sg(struct dma_chan *chan, struct scatterlist *sgl, /* Prepare descriptors. */ for_each_sg(sgl, sg, sg_len, i) { - dev_dbg(chan2dev(chan), "%s: dest=0x%08x, len=%d, pattern=0x%x, flags=0x%lx\n", - __func__, sg_dma_address(sg), sg_dma_len(sg), + dev_dbg(chan2dev(chan), "%s: dest=%pad, len=%d, pattern=0x%x, flags=0x%lx\n", + __func__, &sg_dma_address(sg), sg_dma_len(sg), value, flags); desc = at_xdmac_memset_create_desc(chan, atchan, sg_dma_address(sg), From 2c5d7407e012721f02741f1adae2b1bdf6ef6449 Mon Sep 17 00:00:00 2001 From: Arnd Bergmann Date: Thu, 12 Nov 2015 15:18:22 +0100 Subject: [PATCH 038/291] dmaengine: at_hdmac: use %pad format string for dma_addr_t dma_addr_t may be defined as 32 or 64 bit depending on configuration, so it cannot be printed using the normal format strings, as gcc correctly warns: drivers/dma/at_hdmac.c: In function 'atc_prep_dma_interleaved': drivers/dma/at_hdmac.c:731:28: warning: format '%x' expects argument of type 'unsigned int', but argument 4 has type 'dma_addr_t {aka long long unsigned int}' [-Wformat=] This changes the format strings to use the special "%pad" format string that prints a dma_addr_t, and changes the arguments so we pass the address by reference as required. Signed-off-by: Arnd Bergmann Acked-by: Nicolas Ferre Signed-off-by: Vinod Koul --- drivers/dma/at_hdmac.c | 20 ++++++++++---------- drivers/dma/at_hdmac_regs.h | 6 +++--- 2 files changed, 13 insertions(+), 13 deletions(-) diff --git a/drivers/dma/at_hdmac.c b/drivers/dma/at_hdmac.c index 4e55239c7a30..53d22eb73b56 100644 --- a/drivers/dma/at_hdmac.c +++ b/drivers/dma/at_hdmac.c @@ -729,8 +729,8 @@ atc_prep_dma_interleaved(struct dma_chan *chan, return NULL; dev_info(chan2dev(chan), - "%s: src=0x%08x, dest=0x%08x, numf=%d, frame_size=%d, flags=0x%lx\n", - __func__, xt->src_start, xt->dst_start, xt->numf, + "%s: src=%pad, dest=%pad, numf=%d, frame_size=%d, flags=0x%lx\n", + __func__, &xt->src_start, &xt->dst_start, xt->numf, xt->frame_size, flags); /* @@ -824,8 +824,8 @@ atc_prep_dma_memcpy(struct dma_chan *chan, dma_addr_t dest, dma_addr_t src, u32 ctrla; u32 ctrlb; - dev_vdbg(chan2dev(chan), "prep_dma_memcpy: d0x%x s0x%x l0x%zx f0x%lx\n", - dest, src, len, flags); + dev_vdbg(chan2dev(chan), "prep_dma_memcpy: d%pad s%pad l0x%zx f0x%lx\n", + &dest, &src, len, flags); if (unlikely(!len)) { dev_dbg(chan2dev(chan), "prep_dma_memcpy: length is zero!\n"); @@ -938,8 +938,8 @@ atc_prep_dma_memset(struct dma_chan *chan, dma_addr_t dest, int value, void __iomem *vaddr; dma_addr_t paddr; - dev_vdbg(chan2dev(chan), "%s: d0x%x v0x%x l0x%zx f0x%lx\n", __func__, - dest, value, len, flags); + dev_vdbg(chan2dev(chan), "%s: d%pad v0x%x l0x%zx f0x%lx\n", __func__, + &dest, value, len, flags); if (unlikely(!len)) { dev_dbg(chan2dev(chan), "%s: length is zero!\n", __func__); @@ -1022,8 +1022,8 @@ atc_prep_dma_memset_sg(struct dma_chan *chan, dma_addr_t dest = sg_dma_address(sg); size_t len = sg_dma_len(sg); - dev_vdbg(chan2dev(chan), "%s: d0x%08x, l0x%zx\n", - __func__, dest, len); + dev_vdbg(chan2dev(chan), "%s: d%pad, l0x%zx\n", + __func__, &dest, len); if (!is_dma_fill_aligned(chan->device, dest, 0, len)) { dev_err(chan2dev(chan), "%s: buffer is not aligned\n", @@ -1439,9 +1439,9 @@ atc_prep_dma_cyclic(struct dma_chan *chan, dma_addr_t buf_addr, size_t buf_len, unsigned int periods = buf_len / period_len; unsigned int i; - dev_vdbg(chan2dev(chan), "prep_dma_cyclic: %s buf@0x%08x - %d (%d/%d)\n", + dev_vdbg(chan2dev(chan), "prep_dma_cyclic: %s buf@%pad - %d (%d/%d)\n", direction == DMA_MEM_TO_DEV ? "TO DEVICE" : "FROM DEVICE", - buf_addr, + &buf_addr, periods, buf_len, period_len); if (unlikely(!atslave || !buf_len || !period_len)) { diff --git a/drivers/dma/at_hdmac_regs.h b/drivers/dma/at_hdmac_regs.h index d1cfc8c876f9..7f58f06157f6 100644 --- a/drivers/dma/at_hdmac_regs.h +++ b/drivers/dma/at_hdmac_regs.h @@ -385,9 +385,9 @@ static void vdbg_dump_regs(struct at_dma_chan *atchan) {} static void atc_dump_lli(struct at_dma_chan *atchan, struct at_lli *lli) { dev_crit(chan2dev(&atchan->chan_common), - " desc: s0x%x d0x%x ctrl0x%x:0x%x l0x%x\n", - lli->saddr, lli->daddr, - lli->ctrla, lli->ctrlb, lli->dscr); + " desc: s%pad d%pad ctrl0x%x:0x%x l0x%pad\n", + &lli->saddr, &lli->daddr, + lli->ctrla, lli->ctrlb, &lli->dscr); } From 1451ad03fac3e86948728e0b11f63dee73d3106c Mon Sep 17 00:00:00 2001 From: Michael Ellerman Date: Wed, 11 Nov 2015 21:54:26 +1100 Subject: [PATCH 039/291] powerpc: Wire up sys_mlock2() The selftest passes on 64-bit LE and 32-bit BE. Signed-off-by: Michael Ellerman --- arch/powerpc/include/asm/systbl.h | 1 + arch/powerpc/include/asm/unistd.h | 2 +- arch/powerpc/include/uapi/asm/unistd.h | 1 + 3 files changed, 3 insertions(+), 1 deletion(-) diff --git a/arch/powerpc/include/asm/systbl.h b/arch/powerpc/include/asm/systbl.h index c9e26cb264f4..f2b0b1b0c72a 100644 --- a/arch/powerpc/include/asm/systbl.h +++ b/arch/powerpc/include/asm/systbl.h @@ -382,3 +382,4 @@ COMPAT_SYS(shmat) SYSCALL(shmdt) SYSCALL(shmget) COMPAT_SYS(shmctl) +SYSCALL(mlock2) diff --git a/arch/powerpc/include/asm/unistd.h b/arch/powerpc/include/asm/unistd.h index 6d8f8023ac27..4b6b8ace18e0 100644 --- a/arch/powerpc/include/asm/unistd.h +++ b/arch/powerpc/include/asm/unistd.h @@ -12,7 +12,7 @@ #include -#define __NR_syscalls 378 +#define __NR_syscalls 379 #define __NR__exit __NR_exit #define NR_syscalls __NR_syscalls diff --git a/arch/powerpc/include/uapi/asm/unistd.h b/arch/powerpc/include/uapi/asm/unistd.h index 81579e93c659..1effea5193d6 100644 --- a/arch/powerpc/include/uapi/asm/unistd.h +++ b/arch/powerpc/include/uapi/asm/unistd.h @@ -400,5 +400,6 @@ #define __NR_shmdt 375 #define __NR_shmget 376 #define __NR_shmctl 377 +#define __NR_mlock2 378 #endif /* _UAPI_ASM_POWERPC_UNISTD_H_ */ From bd1c6ff74ce0bbd8cda6eb7763fa0e2625dfcc8b Mon Sep 17 00:00:00 2001 From: Robin Murphy Date: Wed, 4 Nov 2015 13:23:52 +0000 Subject: [PATCH 040/291] arm64/dma-mapping: Fix sizes in __iommu_{alloc,free}_attrs The iommu-dma layer does its own size-alignment for coherent DMA allocations based on IOMMU page sizes, but we still need to consider CPU page sizes for the cases where a non-cacheable CPU mapping is created. Whilst everything on the alloc/map path seems to implicitly align things enough to make it work, some functions used by the corresponding unmap/free path do not, which leads to problems freeing odd-sized allocations. Either way it's something we really should be handling explicitly, so do that to make both paths suitably robust. Reported-by: Yong Wu Signed-off-by: Robin Murphy Signed-off-by: Catalin Marinas --- arch/arm64/mm/dma-mapping.c | 19 +++++++++++++------ 1 file changed, 13 insertions(+), 6 deletions(-) diff --git a/arch/arm64/mm/dma-mapping.c b/arch/arm64/mm/dma-mapping.c index 131a199114b4..97fd714035f9 100644 --- a/arch/arm64/mm/dma-mapping.c +++ b/arch/arm64/mm/dma-mapping.c @@ -552,10 +552,14 @@ static void *__iommu_alloc_attrs(struct device *dev, size_t size, { bool coherent = is_device_dma_coherent(dev); int ioprot = dma_direction_to_prot(DMA_BIDIRECTIONAL, coherent); + size_t iosize = size; void *addr; if (WARN(!dev, "cannot create IOMMU mapping for unknown device\n")) return NULL; + + size = PAGE_ALIGN(size); + /* * Some drivers rely on this, and we probably don't want the * possibility of stale kernel data being read by devices anyway. @@ -566,7 +570,7 @@ static void *__iommu_alloc_attrs(struct device *dev, size_t size, struct page **pages; pgprot_t prot = __get_dma_pgprot(attrs, PAGE_KERNEL, coherent); - pages = iommu_dma_alloc(dev, size, gfp, ioprot, handle, + pages = iommu_dma_alloc(dev, iosize, gfp, ioprot, handle, flush_page); if (!pages) return NULL; @@ -574,7 +578,7 @@ static void *__iommu_alloc_attrs(struct device *dev, size_t size, addr = dma_common_pages_remap(pages, size, VM_USERMAP, prot, __builtin_return_address(0)); if (!addr) - iommu_dma_free(dev, pages, size, handle); + iommu_dma_free(dev, pages, iosize, handle); } else { struct page *page; /* @@ -591,7 +595,7 @@ static void *__iommu_alloc_attrs(struct device *dev, size_t size, if (!addr) return NULL; - *handle = iommu_dma_map_page(dev, page, 0, size, ioprot); + *handle = iommu_dma_map_page(dev, page, 0, iosize, ioprot); if (iommu_dma_mapping_error(dev, *handle)) { if (coherent) __free_pages(page, get_order(size)); @@ -606,6 +610,9 @@ static void *__iommu_alloc_attrs(struct device *dev, size_t size, static void __iommu_free_attrs(struct device *dev, size_t size, void *cpu_addr, dma_addr_t handle, struct dma_attrs *attrs) { + size_t iosize = size; + + size = PAGE_ALIGN(size); /* * @cpu_addr will be one of 3 things depending on how it was allocated: * - A remapped array of pages from iommu_dma_alloc(), for all @@ -617,17 +624,17 @@ static void __iommu_free_attrs(struct device *dev, size_t size, void *cpu_addr, * Hence how dodgy the below logic looks... */ if (__in_atomic_pool(cpu_addr, size)) { - iommu_dma_unmap_page(dev, handle, size, 0, NULL); + iommu_dma_unmap_page(dev, handle, iosize, 0, NULL); __free_from_pool(cpu_addr, size); } else if (is_vmalloc_addr(cpu_addr)){ struct vm_struct *area = find_vm_area(cpu_addr); if (WARN_ON(!area || !area->pages)) return; - iommu_dma_free(dev, area->pages, size, &handle); + iommu_dma_free(dev, area->pages, iosize, &handle); dma_common_free_remap(cpu_addr, size, VM_USERMAP); } else { - iommu_dma_unmap_page(dev, handle, size, 0, NULL); + iommu_dma_unmap_page(dev, handle, iosize, 0, NULL); __free_pages(virt_to_page(cpu_addr), get_order(size)); } } From 95486e4979e56e7da2fbb4fd32eb54d672b1e074 Mon Sep 17 00:00:00 2001 From: Ralf Baechle Date: Mon, 16 Nov 2015 12:07:10 +0100 Subject: [PATCH 041/291] MIPS: Fix flood of warnings about comparsion being always true. ./arch/mips/include/asm/page.h:204:13: warning: comparison of unsigned expression >= 0 is always true [-Wtype-limits] The default value of ARCH_PFN_OFFSET is 0 thus triggering this warning for all platforms using the default value. Signed-off-by: Ralf Baechle --- arch/mips/include/asm/page.h | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/arch/mips/include/asm/page.h b/arch/mips/include/asm/page.h index ad1fccdb8d13..2046c0230224 100644 --- a/arch/mips/include/asm/page.h +++ b/arch/mips/include/asm/page.h @@ -200,8 +200,9 @@ static inline int pfn_valid(unsigned long pfn) { /* avoid include hell */ extern unsigned long max_mapnr; + unsigned long pfn_offset = ARCH_PFN_OFFSET; - return pfn >= ARCH_PFN_OFFSET && pfn < max_mapnr; + return pfn >= pfn_offset && pfn < max_mapnr; } #elif defined(CONFIG_SPARSEMEM) From c7f42c63901b964833eb23a9bda873b799e7f308 Mon Sep 17 00:00:00 2001 From: Jean-Yves Faye Date: Tue, 29 Sep 2015 11:39:19 +0200 Subject: [PATCH 042/291] ipmi watchdog : add panic_wdt_timeout parameter In order to allow panic actions to be processed, the ipmi watchdog driver sets a new timeout value on panic. The 255s timeout was designed to allow kdump and others actions on panic, as in http://lkml.iu.edu/hypermail/linux/kernel/0711.3/0258.html This is counter-intuitive for a end-user who sets watchdog timeout value to something like 30s and who expects BMC to reset the system within 30s of a panic. This commit allows user to configure the timeout on panic. Signed-off-by: Jean-Yves Faye Signed-off-by: Corey Minyard --- Documentation/IPMI.txt | 7 +++++-- drivers/char/ipmi/ipmi_watchdog.c | 8 +++++++- 2 files changed, 12 insertions(+), 3 deletions(-) diff --git a/Documentation/IPMI.txt b/Documentation/IPMI.txt index 31d1d658827f..c0d8788e75d3 100644 --- a/Documentation/IPMI.txt +++ b/Documentation/IPMI.txt @@ -587,7 +587,7 @@ used to control it: modprobe ipmi_watchdog timeout= pretimeout= action= preaction= preop= start_now=x - nowayout=x ifnum_to_use=n + nowayout=x ifnum_to_use=n panic_wdt_timeout= ifnum_to_use specifies which interface the watchdog timer should use. The default is -1, which means to pick the first one registered. @@ -597,7 +597,9 @@ is the amount of seconds before the reset that the pre-timeout panic will occur (if pretimeout is zero, then pretimeout will not be enabled). Note that the pretimeout is the time before the final timeout. So if the timeout is 50 seconds and the pretimeout is 10 seconds, then the pretimeout -will occur in 40 second (10 seconds before the timeout). +will occur in 40 second (10 seconds before the timeout). The panic_wdt_timeout +is the value of timeout which is set on kernel panic, in order to let actions +such as kdump to occur during panic. The action may be "reset", "power_cycle", or "power_off", and specifies what to do when the timer times out, and defaults to @@ -634,6 +636,7 @@ for configuring the watchdog: ipmi_watchdog.preop= ipmi_watchdog.start_now=x ipmi_watchdog.nowayout=x + ipmi_watchdog.panic_wdt_timeout= The options are the same as the module parameter options. diff --git a/drivers/char/ipmi/ipmi_watchdog.c b/drivers/char/ipmi/ipmi_watchdog.c index 0ac3bd1a5497..096f0cef4da1 100644 --- a/drivers/char/ipmi/ipmi_watchdog.c +++ b/drivers/char/ipmi/ipmi_watchdog.c @@ -153,6 +153,9 @@ static int timeout = 10; /* The pre-timeout is disabled by default. */ static int pretimeout; +/* Default timeout to set on panic */ +static int panic_wdt_timeout = 255; + /* Default action is to reset the board on a timeout. */ static unsigned char action_val = WDOG_TIMEOUT_RESET; @@ -293,6 +296,9 @@ MODULE_PARM_DESC(timeout, "Timeout value in seconds."); module_param(pretimeout, timeout, 0644); MODULE_PARM_DESC(pretimeout, "Pretimeout value in seconds."); +module_param(panic_wdt_timeout, timeout, 0644); +MODULE_PARM_DESC(timeout, "Timeout value on kernel panic in seconds."); + module_param_cb(action, ¶m_ops_str, action_op, 0644); MODULE_PARM_DESC(action, "Timeout action. One of: " "reset, none, power_cycle, power_off."); @@ -1189,7 +1195,7 @@ static int wdog_panic_handler(struct notifier_block *this, /* Make sure we do this only once. */ panic_event_handled = 1; - timeout = 255; + timeout = panic_wdt_timeout; pretimeout = 0; panic_halt_ipmi_set_timeout(); } From f44e26593aea9b920e892fc490ffd2a1a0b9123f Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ville=20Syrj=C3=A4l=C3=A4?= Date: Fri, 13 Nov 2015 19:16:13 +0200 Subject: [PATCH 043/291] drm/i915: Fix crtc_y assignment in intel_find_initial_plane_obj() MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Let's set crtc_y to 0 instead of setting src_y twice. Multiple assignments in one statement is a good way to hide bugs. Please don't do that. Cc: Maarten Lankhorst Fixes: be5651f2d581 ("drm/i915: Update missing properties in find_initial_plane_obj") Signed-off-by: Ville Syrjälä Reviewed-by: Maarten Lankhorst Link: http://patchwork.freedesktop.org/patch/msgid/1447434973-12369-1-git-send-email-ville.syrjala@linux.intel.com Signed-off-by: Jani Nikula --- drivers/gpu/drm/i915/intel_display.c | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/drivers/gpu/drm/i915/intel_display.c b/drivers/gpu/drm/i915/intel_display.c index f62ffc04c21d..b0f23692346e 100644 --- a/drivers/gpu/drm/i915/intel_display.c +++ b/drivers/gpu/drm/i915/intel_display.c @@ -2646,11 +2646,13 @@ intel_find_initial_plane_obj(struct intel_crtc *intel_crtc, return; valid_fb: - plane_state->src_x = plane_state->src_y = 0; + plane_state->src_x = 0; + plane_state->src_y = 0; plane_state->src_w = fb->width << 16; plane_state->src_h = fb->height << 16; - plane_state->crtc_x = plane_state->src_y = 0; + plane_state->crtc_x = 0; + plane_state->crtc_y = 0; plane_state->crtc_w = fb->width; plane_state->crtc_h = fb->height; From 1b9448b071caa7d10bb2569fabe3020a2c25ae59 Mon Sep 17 00:00:00 2001 From: Jani Nikula Date: Thu, 5 Nov 2015 11:49:59 +0200 Subject: [PATCH 044/291] drm/i915: quirk backlight present on Macbook 4, 1 Unsurprisingly macbooks have backlights, just the VBT doesn't seem to know it in this case. Reported-and-tested-by: Daniel Nicoletti Bugzilla: https://bugs.freedesktop.org/show_bug.cgi?id=88325 Fixes: c675949ec58c ("drm/i915: do not setup backlight if not available according to VBT") Cc: stable@vger.kernel.org # v3.15+ Reviewed-by: Ander Conselvan de Oliveira Signed-off-by: Jani Nikula Link: http://patchwork.freedesktop.org/patch/msgid/1446716999-1796-1-git-send-email-jani.nikula@intel.com --- drivers/gpu/drm/i915/intel_display.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/drivers/gpu/drm/i915/intel_display.c b/drivers/gpu/drm/i915/intel_display.c index b0f23692346e..bcf227356f78 100644 --- a/drivers/gpu/drm/i915/intel_display.c +++ b/drivers/gpu/drm/i915/intel_display.c @@ -14707,6 +14707,9 @@ static struct intel_quirk intel_quirks[] = { /* Apple Macbook 2,1 (Core 2 T7400) */ { 0x27a2, 0x8086, 0x7270, quirk_backlight_present }, + /* Apple Macbook 4,1 */ + { 0x2a02, 0x106b, 0x00a1, quirk_backlight_present }, + /* Toshiba CB35 Chromebook (Celeron 2955U) */ { 0x0a06, 0x1179, 0x0a88, quirk_backlight_present }, From 9d5b5ed796d7afd7e8d2ac4b4fb77c6a49463f4b Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Petr=20=C5=A0tetiar?= Date: Tue, 3 Nov 2015 11:25:28 +0100 Subject: [PATCH 045/291] USB: qcserial: Add support for Quectel EC20 Mini PCIe module MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit It seems like this device has same vendor and product IDs as G2K devices, but it has different number of interfaces(4 vs 5) and also different interface layout which makes it currently unusable: usbcore: registered new interface driver qcserial usbserial: USB Serial support registered for Qualcomm USB modem usb 2-1.2: unknown number of interfaces: 5 lsusb output: Bus 002 Device 003: ID 05c6:9215 Qualcomm, Inc. Acer Gobi 2000 Wireless Device Descriptor: bLength 18 bDescriptorType 1 bcdUSB 2.00 bDeviceClass 0 (Defined at Interface level) bDeviceSubClass 0 bDeviceProtocol 0 bMaxPacketSize0 64 idVendor 0x05c6 Qualcomm, Inc. idProduct 0x9215 Acer Gobi 2000 Wireless Modem bcdDevice 2.32 iManufacturer 1 Quectel iProduct 2 Quectel LTE Module iSerial 0 bNumConfigurations 1 Configuration Descriptor: bLength 9 bDescriptorType 2 wTotalLength 209 bNumInterfaces 5 bConfigurationValue 1 iConfiguration 0 bmAttributes 0xa0 (Bus Powered) Remote Wakeup MaxPower 500mA Signed-off-by: Petr Štetiar Cc: stable [johan: rename define and add comment ] Signed-off-by: Johan Hovold --- drivers/usb/serial/qcserial.c | 40 +++++++++++++++++++++++++++++++++++ 1 file changed, 40 insertions(+) diff --git a/drivers/usb/serial/qcserial.c b/drivers/usb/serial/qcserial.c index 5022fcfa0260..0e46af0d0cb4 100644 --- a/drivers/usb/serial/qcserial.c +++ b/drivers/usb/serial/qcserial.c @@ -22,6 +22,8 @@ #define DRIVER_AUTHOR "Qualcomm Inc" #define DRIVER_DESC "Qualcomm USB Serial driver" +#define QUECTEL_EC20_PID 0x9215 + /* standard device layouts supported by this driver */ enum qcserial_layouts { QCSERIAL_G2K = 0, /* Gobi 2000 */ @@ -171,6 +173,38 @@ static const struct usb_device_id id_table[] = { }; MODULE_DEVICE_TABLE(usb, id_table); +static int handle_quectel_ec20(struct device *dev, int ifnum) +{ + int altsetting = 0; + + /* + * Quectel EC20 Mini PCIe LTE module layout: + * 0: DM/DIAG (use libqcdm from ModemManager for communication) + * 1: NMEA + * 2: AT-capable modem port + * 3: Modem interface + * 4: NDIS + */ + switch (ifnum) { + case 0: + dev_dbg(dev, "Quectel EC20 DM/DIAG interface found\n"); + break; + case 1: + dev_dbg(dev, "Quectel EC20 NMEA GPS interface found\n"); + break; + case 2: + case 3: + dev_dbg(dev, "Quectel EC20 Modem port found\n"); + break; + case 4: + /* Don't claim the QMI/net interface */ + altsetting = -1; + break; + } + + return altsetting; +} + static int qcprobe(struct usb_serial *serial, const struct usb_device_id *id) { struct usb_host_interface *intf = serial->interface->cur_altsetting; @@ -240,6 +274,12 @@ static int qcprobe(struct usb_serial *serial, const struct usb_device_id *id) altsetting = -1; break; case QCSERIAL_G2K: + /* handle non-standard layouts */ + if (nintf == 5 && id->idProduct == QUECTEL_EC20_PID) { + altsetting = handle_quectel_ec20(dev, ifnum); + goto done; + } + /* * Gobi 2K+ USB layout: * 0: QMI/net From e07af133c3e2716db25e3e1e1d9f10c2088e9c1a Mon Sep 17 00:00:00 2001 From: Aleksander Morgado Date: Wed, 11 Nov 2015 19:51:40 +0100 Subject: [PATCH 046/291] USB: serial: option: add support for Novatel MiFi USB620L MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Also known as Verizon U620L. The device is modeswitched from 1410:9020 to 1410:9022 by selecting the 4th USB configuration: $ sudo usb_modeswitch –v 0x1410 –p 0x9020 –u 4 This configuration provides a ECM interface as well as TTYs ('Enterprise Mode' according to the U620 Linux integration guide). Signed-off-by: Aleksander Morgado Cc: stable Signed-off-by: Johan Hovold --- drivers/usb/serial/option.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/drivers/usb/serial/option.c b/drivers/usb/serial/option.c index 685fef71d3d1..2ab2a33e00c4 100644 --- a/drivers/usb/serial/option.c +++ b/drivers/usb/serial/option.c @@ -161,6 +161,7 @@ static void option_instat_callback(struct urb *urb); #define NOVATELWIRELESS_PRODUCT_HSPA_EMBEDDED_HIGHSPEED 0x9001 #define NOVATELWIRELESS_PRODUCT_E362 0x9010 #define NOVATELWIRELESS_PRODUCT_E371 0x9011 +#define NOVATELWIRELESS_PRODUCT_U620L 0x9022 #define NOVATELWIRELESS_PRODUCT_G2 0xA010 #define NOVATELWIRELESS_PRODUCT_MC551 0xB001 @@ -1052,6 +1053,7 @@ static const struct usb_device_id option_ids[] = { { USB_DEVICE_AND_INTERFACE_INFO(NOVATELWIRELESS_VENDOR_ID, NOVATELWIRELESS_PRODUCT_MC551, 0xff, 0xff, 0xff) }, { USB_DEVICE_AND_INTERFACE_INFO(NOVATELWIRELESS_VENDOR_ID, NOVATELWIRELESS_PRODUCT_E362, 0xff, 0xff, 0xff) }, { USB_DEVICE_AND_INTERFACE_INFO(NOVATELWIRELESS_VENDOR_ID, NOVATELWIRELESS_PRODUCT_E371, 0xff, 0xff, 0xff) }, + { USB_DEVICE_AND_INTERFACE_INFO(NOVATELWIRELESS_VENDOR_ID, NOVATELWIRELESS_PRODUCT_U620L, 0xff, 0x00, 0x00) }, { USB_DEVICE(AMOI_VENDOR_ID, AMOI_PRODUCT_H01) }, { USB_DEVICE(AMOI_VENDOR_ID, AMOI_PRODUCT_H01A) }, From 1bcb49e663f88bccee35b8688e6a3da2bea31fd4 Mon Sep 17 00:00:00 2001 From: David Woodhouse Date: Sat, 14 Nov 2015 16:49:30 +0000 Subject: [PATCH 047/291] USB: ti_usb_3410_5052: Add Honeywell HGI80 ID The Honeywell HGI80 is a wireless interface to the evohome connected thermostat. It uses a TI 3410 USB-serial port. Signed-off-by: David Woodhouse Cc: stable@vger.kernel.org Signed-off-by: Johan Hovold --- drivers/usb/serial/ti_usb_3410_5052.c | 2 ++ drivers/usb/serial/ti_usb_3410_5052.h | 4 ++++ 2 files changed, 6 insertions(+) diff --git a/drivers/usb/serial/ti_usb_3410_5052.c b/drivers/usb/serial/ti_usb_3410_5052.c index e9da41d9fe7f..2694df2f4559 100644 --- a/drivers/usb/serial/ti_usb_3410_5052.c +++ b/drivers/usb/serial/ti_usb_3410_5052.c @@ -159,6 +159,7 @@ static const struct usb_device_id ti_id_table_3410[] = { { USB_DEVICE(ABBOTT_VENDOR_ID, ABBOTT_STEREO_PLUG_ID) }, { USB_DEVICE(ABBOTT_VENDOR_ID, ABBOTT_STRIP_PORT_ID) }, { USB_DEVICE(TI_VENDOR_ID, FRI2_PRODUCT_ID) }, + { USB_DEVICE(HONEYWELL_VENDOR_ID, HONEYWELL_HGI80_PRODUCT_ID) }, { } /* terminator */ }; @@ -191,6 +192,7 @@ static const struct usb_device_id ti_id_table_combined[] = { { USB_DEVICE(ABBOTT_VENDOR_ID, ABBOTT_PRODUCT_ID) }, { USB_DEVICE(ABBOTT_VENDOR_ID, ABBOTT_STRIP_PORT_ID) }, { USB_DEVICE(TI_VENDOR_ID, FRI2_PRODUCT_ID) }, + { USB_DEVICE(HONEYWELL_VENDOR_ID, HONEYWELL_HGI80_PRODUCT_ID) }, { } /* terminator */ }; diff --git a/drivers/usb/serial/ti_usb_3410_5052.h b/drivers/usb/serial/ti_usb_3410_5052.h index 4a2423e84d55..98f35c656c02 100644 --- a/drivers/usb/serial/ti_usb_3410_5052.h +++ b/drivers/usb/serial/ti_usb_3410_5052.h @@ -56,6 +56,10 @@ #define ABBOTT_PRODUCT_ID ABBOTT_STEREO_PLUG_ID #define ABBOTT_STRIP_PORT_ID 0x3420 +/* Honeywell vendor and product IDs */ +#define HONEYWELL_VENDOR_ID 0x10ac +#define HONEYWELL_HGI80_PRODUCT_ID 0x0102 /* Honeywell HGI80 */ + /* Commands */ #define TI_GET_VERSION 0x01 #define TI_GET_PORT_STATUS 0x02 From 500a3d2eb3883b71350036e15aad286cc6e5df21 Mon Sep 17 00:00:00 2001 From: Mika Kuoppala Date: Fri, 13 Nov 2015 19:29:41 +0200 Subject: [PATCH 048/291] drm/i915: Fix GT frequency rounding MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit When we set and later readback a frequency value through sysfs interface, igt/pm_rpm assumes that we get same value back if it matches hw granularity. On bxt we have found out that this is not always the case. Currently frequency - hw ratio - frequency conversions round down, with few exceptions on platforms that have more specific conversions. On bxt the supported range can be for example from 100Mhz to 650Mhz. Midpoint is then calculated by test to be 375 which pm_rps uses to find a closest hw supported frequency. That is 366 (ratio 22), which it then writes back. But as the rounding down kicks in, driver actually sets 350 instead of 366, as 366 is 2/3 below 22 * 50/3. Fix this by rounding to closest instead of rounding down in freq-ratio-freq conversions. Bugzilla: https://bugs.freedesktop.org/show_bug.cgi?id=92768 Testcase: igt/pm_rps/basic-api Tested-by: Bob Paauwe Cc: Bob Paauwe Signed-off-by: Imre Deak Signed-off-by: Mika Kuoppala Reviewed-by: Ville Syrjälä Reviewed-by: Bob Paauwe Link: http://patchwork.freedesktop.org/patch/msgid/1447435781-23416-1-git-send-email-mika.kuoppala@intel.com Signed-off-by: Jani Nikula --- drivers/gpu/drm/i915/intel_pm.c | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) diff --git a/drivers/gpu/drm/i915/intel_pm.c b/drivers/gpu/drm/i915/intel_pm.c index d52a15df6917..7b47da805685 100644 --- a/drivers/gpu/drm/i915/intel_pm.c +++ b/drivers/gpu/drm/i915/intel_pm.c @@ -7255,7 +7255,8 @@ static int chv_freq_opcode(struct drm_i915_private *dev_priv, int val) int intel_gpu_freq(struct drm_i915_private *dev_priv, int val) { if (IS_GEN9(dev_priv->dev)) - return (val * GT_FREQUENCY_MULTIPLIER) / GEN9_FREQ_SCALER; + return DIV_ROUND_CLOSEST(val * GT_FREQUENCY_MULTIPLIER, + GEN9_FREQ_SCALER); else if (IS_CHERRYVIEW(dev_priv->dev)) return chv_gpu_freq(dev_priv, val); else if (IS_VALLEYVIEW(dev_priv->dev)) @@ -7267,13 +7268,14 @@ int intel_gpu_freq(struct drm_i915_private *dev_priv, int val) int intel_freq_opcode(struct drm_i915_private *dev_priv, int val) { if (IS_GEN9(dev_priv->dev)) - return (val * GEN9_FREQ_SCALER) / GT_FREQUENCY_MULTIPLIER; + return DIV_ROUND_CLOSEST(val * GEN9_FREQ_SCALER, + GT_FREQUENCY_MULTIPLIER); else if (IS_CHERRYVIEW(dev_priv->dev)) return chv_freq_opcode(dev_priv, val); else if (IS_VALLEYVIEW(dev_priv->dev)) return byt_freq_opcode(dev_priv, val); else - return val / GT_FREQUENCY_MULTIPLIER; + return DIV_ROUND_CLOSEST(val, GT_FREQUENCY_MULTIPLIER); } struct request_boost { From fd0fe6acf1dd88aabfbf383f7e4c16315387a7b7 Mon Sep 17 00:00:00 2001 From: Imre Deak Date: Wed, 4 Nov 2015 21:25:32 +0200 Subject: [PATCH 049/291] drm/i915: get runtime PM reference around GEM set_caching IOCTL After Damien's D3 fix I started to get runtime suspend residency for the first time and that revealed a breakage on the set_caching IOCTL path that accesses the HW but doesn't take an RPM ref. Fix this up. Signed-off-by: Imre Deak Reviewed-by: Paulo Zanoni Cc: stable@vger.kernel.org Link: http://patchwork.freedesktop.org/patch/msgid/1446665132-22491-1-git-send-email-imre.deak@intel.com Signed-off-by: Jani Nikula --- drivers/gpu/drm/i915/i915_gem.c | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c index 5cf4a1998273..91bb1fc27420 100644 --- a/drivers/gpu/drm/i915/i915_gem.c +++ b/drivers/gpu/drm/i915/i915_gem.c @@ -3809,6 +3809,7 @@ int i915_gem_get_caching_ioctl(struct drm_device *dev, void *data, int i915_gem_set_caching_ioctl(struct drm_device *dev, void *data, struct drm_file *file) { + struct drm_i915_private *dev_priv = dev->dev_private; struct drm_i915_gem_caching *args = data; struct drm_i915_gem_object *obj; enum i915_cache_level level; @@ -3837,9 +3838,11 @@ int i915_gem_set_caching_ioctl(struct drm_device *dev, void *data, return -EINVAL; } + intel_runtime_pm_get(dev_priv); + ret = i915_mutex_lock_interruptible(dev); if (ret) - return ret; + goto rpm_put; obj = to_intel_bo(drm_gem_object_lookup(dev, file, args->handle)); if (&obj->base == NULL) { @@ -3852,6 +3855,9 @@ int i915_gem_set_caching_ioctl(struct drm_device *dev, void *data, drm_gem_object_unreference(&obj->base); unlock: mutex_unlock(&dev->struct_mutex); +rpm_put: + intel_runtime_pm_put(dev_priv); + return ret; } From 172c238612ebf81cabccc86b788c9209af591f61 Mon Sep 17 00:00:00 2001 From: Mike Snitzer Date: Fri, 6 Nov 2015 10:53:01 -0500 Subject: [PATCH 050/291] dm thin: restore requested 'error_if_no_space' setting on OODS to WRITE transition A thin-pool that is in out-of-data-space (OODS) mode may transition back to write mode -- without the admin adding more space to the thin-pool -- if/when blocks are released (either by deleting thin devices or discarding provisioned blocks). But as part of the thin-pool's earlier transition to out-of-data-space mode the thin-pool may have set the 'error_if_no_space' flag to true if the no_space_timeout expires without more space having been made available. That implementation detail, of changing the pool's error_if_no_space setting, needs to be reset back to the default that the user specified when the thin-pool's table was loaded. Otherwise we'll drop the user requested behaviour on the floor when this out-of-data-space to write mode transition occurs. Reported-by: Vivek Goyal Signed-off-by: Mike Snitzer Acked-by: Joe Thornber Fixes: 2c43fd26e4 ("dm thin: fix missing out-of-data-space to write mode transition if blocks are released") Cc: stable@vger.kernel.org --- drivers/md/dm-thin.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/md/dm-thin.c b/drivers/md/dm-thin.c index 3897b90bd462..9f0b94fad361 100644 --- a/drivers/md/dm-thin.c +++ b/drivers/md/dm-thin.c @@ -2432,6 +2432,7 @@ static void set_pool_mode(struct pool *pool, enum pool_mode new_mode) case PM_WRITE: if (old_mode != new_mode) notify_of_pool_mode_change(pool, "write"); + pool->pf.error_if_no_space = pt->requested_pf.error_if_no_space; dm_pool_metadata_read_write(pool->pmd); pool->process_bio = process_bio; pool->process_discard = process_discard_bio; From 96ea47c0ec8c012509116bee8c57414281428fc4 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Michel=20D=C3=A4nzer?= Date: Thu, 5 Nov 2015 17:25:26 +0900 Subject: [PATCH 051/291] drm/radeon: Disable uncacheable CPU mappings of GTT with RV6xx MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit They reportedly cause random GPU hangs. Bugzilla: https://bugs.freedesktop.org/show_bug.cgi?id=91268 Reviewed-by: Christian König Cc: stable@vger.kernel.org Signed-off-by: Michel Dänzer Signed-off-by: Alex Deucher --- drivers/gpu/drm/radeon/radeon_object.c | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/drivers/gpu/drm/radeon/radeon_object.c b/drivers/gpu/drm/radeon/radeon_object.c index d3024883b844..a35f5af77c6a 100644 --- a/drivers/gpu/drm/radeon/radeon_object.c +++ b/drivers/gpu/drm/radeon/radeon_object.c @@ -221,6 +221,12 @@ int radeon_bo_create(struct radeon_device *rdev, if (!(rdev->flags & RADEON_IS_PCIE)) bo->flags &= ~(RADEON_GEM_GTT_WC | RADEON_GEM_GTT_UC); + /* Write-combined CPU mappings of GTT cause GPU hangs with RV6xx + * See https://bugs.freedesktop.org/show_bug.cgi?id=91268 + */ + if (rdev->family >= CHIP_RV610 && rdev->family <= CHIP_RV635) + bo->flags &= ~(RADEON_GEM_GTT_WC | RADEON_GEM_GTT_UC); + #ifdef CONFIG_X86_32 /* XXX: Write-combined CPU mappings of GTT seem broken on 32-bit * See https://bugs.freedesktop.org/show_bug.cgi?id=84627 From a28bbd5824d4a2af98de45b300ab8d8fb39739fc Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Michel=20D=C3=A4nzer?= Date: Thu, 5 Nov 2015 17:25:27 +0900 Subject: [PATCH 052/291] drm/radeon: Always disable RADEON_GEM_GTT_UC along with RADEON_GEM_GTT_WC MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Write-combining is a CPU feature. From the GPU POV, these both simply mean no GPU<->CPU cache coherency. Reviewed-by: Christian König Signed-off-by: Michel Dänzer Signed-off-by: Alex Deucher --- drivers/gpu/drm/radeon/radeon_object.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/gpu/drm/radeon/radeon_object.c b/drivers/gpu/drm/radeon/radeon_object.c index a35f5af77c6a..dda2ecfbf056 100644 --- a/drivers/gpu/drm/radeon/radeon_object.c +++ b/drivers/gpu/drm/radeon/radeon_object.c @@ -231,7 +231,7 @@ int radeon_bo_create(struct radeon_device *rdev, /* XXX: Write-combined CPU mappings of GTT seem broken on 32-bit * See https://bugs.freedesktop.org/show_bug.cgi?id=84627 */ - bo->flags &= ~RADEON_GEM_GTT_WC; + bo->flags &= ~(RADEON_GEM_GTT_WC | RADEON_GEM_GTT_UC); #elif defined(CONFIG_X86) && !defined(CONFIG_X86_PAT) /* Don't try to enable write-combining when it can't work, or things * may be slow @@ -243,7 +243,7 @@ int radeon_bo_create(struct radeon_device *rdev, DRM_INFO_ONCE("Please enable CONFIG_MTRR and CONFIG_X86_PAT for " "better performance thanks to write-combining\n"); - bo->flags &= ~RADEON_GEM_GTT_WC; + bo->flags &= ~(RADEON_GEM_GTT_WC | RADEON_GEM_GTT_UC); #endif radeon_ttm_placement_from_domain(bo, domain); From 938204985c1c46c28a88cb558d9e38610044b67a Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Michel=20D=C3=A4nzer?= Date: Thu, 5 Nov 2015 17:25:28 +0900 Subject: [PATCH 053/291] drm/radeon: Only prompt for enabling PAT when we'd allow write-combining MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit No use bothering users about this for whom we disable write-combining for other reasons anyway. Reviewed-by: Christian König Signed-off-by: Michel Dänzer Signed-off-by: Alex Deucher --- drivers/gpu/drm/radeon/radeon_object.c | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/drivers/gpu/drm/radeon/radeon_object.c b/drivers/gpu/drm/radeon/radeon_object.c index dda2ecfbf056..84d45633d28c 100644 --- a/drivers/gpu/drm/radeon/radeon_object.c +++ b/drivers/gpu/drm/radeon/radeon_object.c @@ -241,8 +241,9 @@ int radeon_bo_create(struct radeon_device *rdev, #warning Please enable CONFIG_MTRR and CONFIG_X86_PAT for better performance \ thanks to write-combining - DRM_INFO_ONCE("Please enable CONFIG_MTRR and CONFIG_X86_PAT for " - "better performance thanks to write-combining\n"); + if (bo->flags & RADEON_GEM_GTT_WC) + DRM_INFO_ONCE("Please enable CONFIG_MTRR and CONFIG_X86_PAT for " + "better performance thanks to write-combining\n"); bo->flags &= ~(RADEON_GEM_GTT_WC | RADEON_GEM_GTT_UC); #endif From fa6760482bed1ed752568fe09135c078e9768595 Mon Sep 17 00:00:00 2001 From: Flora Cui Date: Fri, 30 Oct 2015 18:29:46 +0800 Subject: [PATCH 054/291] drm/amdgpu: update Fiji's mmPA_SC_RASTER_CONFIG value Change-Id: I6d138306a878450e5bf8a77a2f1aacc380a39fe5 Signed-off-by: Flora Cui Reviewed-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c b/drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c index 6776cf756d40..4410e5735154 100644 --- a/drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c +++ b/drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c @@ -2971,10 +2971,13 @@ static int gfx_v8_0_cp_gfx_start(struct amdgpu_device *adev) amdgpu_ring_write(ring, mmPA_SC_RASTER_CONFIG - PACKET3_SET_CONTEXT_REG_START); switch (adev->asic_type) { case CHIP_TONGA: - case CHIP_FIJI: amdgpu_ring_write(ring, 0x16000012); amdgpu_ring_write(ring, 0x0000002A); break; + case CHIP_FIJI: + amdgpu_ring_write(ring, 0x3a00161a); + amdgpu_ring_write(ring, 0x0000002e); + break; case CHIP_TOPAZ: case CHIP_CARRIZO: amdgpu_ring_write(ring, 0x00000002); From 935c186aaecc25258495351adaba34f7c507d298 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Christian=20K=C3=B6nig?= Date: Tue, 3 Nov 2015 21:23:08 +0100 Subject: [PATCH 055/291] drm/amdgpu: remove fence trace points MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Mostly unused and replaced by the common trace points. Signed-off-by: Christian König Reviewed-by: Chunming Zhou Acked-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/amdgpu_fence.c | 1 - drivers/gpu/drm/amd/amdgpu/amdgpu_trace.h | 43 ----------------------- 2 files changed, 44 deletions(-) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_fence.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_fence.c index 257d72205bb5..b92c1937543a 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_fence.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_fence.c @@ -132,7 +132,6 @@ int amdgpu_fence_emit(struct amdgpu_ring *ring, void *owner, amdgpu_ring_emit_fence(ring, ring->fence_drv.gpu_addr, (*fence)->seq, AMDGPU_FENCE_FLAG_INT); - trace_amdgpu_fence_emit(ring->adev->ddev, ring->idx, (*fence)->seq); return 0; } diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_trace.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_trace.h index 76ecbaf72a2e..26e2d50a6f64 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_trace.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_trace.h @@ -196,49 +196,6 @@ TRACE_EVENT(amdgpu_bo_list_set, TP_printk("list=%p, bo=%p", __entry->list, __entry->bo) ); -DECLARE_EVENT_CLASS(amdgpu_fence_request, - - TP_PROTO(struct drm_device *dev, int ring, u32 seqno), - - TP_ARGS(dev, ring, seqno), - - TP_STRUCT__entry( - __field(u32, dev) - __field(int, ring) - __field(u32, seqno) - ), - - TP_fast_assign( - __entry->dev = dev->primary->index; - __entry->ring = ring; - __entry->seqno = seqno; - ), - - TP_printk("dev=%u, ring=%d, seqno=%u", - __entry->dev, __entry->ring, __entry->seqno) -); - -DEFINE_EVENT(amdgpu_fence_request, amdgpu_fence_emit, - - TP_PROTO(struct drm_device *dev, int ring, u32 seqno), - - TP_ARGS(dev, ring, seqno) -); - -DEFINE_EVENT(amdgpu_fence_request, amdgpu_fence_wait_begin, - - TP_PROTO(struct drm_device *dev, int ring, u32 seqno), - - TP_ARGS(dev, ring, seqno) -); - -DEFINE_EVENT(amdgpu_fence_request, amdgpu_fence_wait_end, - - TP_PROTO(struct drm_device *dev, int ring, u32 seqno), - - TP_ARGS(dev, ring, seqno) -); - DECLARE_EVENT_CLASS(amdgpu_semaphore_request, TP_PROTO(int ring, struct amdgpu_semaphore *sem), From c2776afe740db5598c4c457dcacb94d4427b13f9 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Christian=20K=C3=B6nig?= Date: Tue, 3 Nov 2015 13:27:39 +0100 Subject: [PATCH 056/291] drm/amdgpu: use a timer for fence fallback MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Less overhead than a work item and also adds proper cleanup handling. Signed-off-by: Christian König Reviewed-by: Chunming Zhou Acked-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/amdgpu.h | 3 +- drivers/gpu/drm/amd/amdgpu/amdgpu_fence.c | 80 ++++++++++------------- 2 files changed, 36 insertions(+), 47 deletions(-) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu.h b/drivers/gpu/drm/amd/amdgpu/amdgpu.h index 615ce6d464fb..3630cb545d0d 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu.h @@ -389,7 +389,6 @@ struct amdgpu_clock { * Fences. */ struct amdgpu_fence_driver { - struct amdgpu_ring *ring; uint64_t gpu_addr; volatile uint32_t *cpu_addr; /* sync_seq is protected by ring emission lock */ @@ -398,7 +397,7 @@ struct amdgpu_fence_driver { bool initialized; struct amdgpu_irq_src *irq_src; unsigned irq_type; - struct delayed_work lockup_work; + struct timer_list fallback_timer; wait_queue_head_t fence_queue; }; diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_fence.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_fence.c index b92c1937543a..257fce356319 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_fence.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_fence.c @@ -84,24 +84,6 @@ static u32 amdgpu_fence_read(struct amdgpu_ring *ring) return seq; } -/** - * amdgpu_fence_schedule_check - schedule lockup check - * - * @ring: pointer to struct amdgpu_ring - * - * Queues a delayed work item to check for lockups. - */ -static void amdgpu_fence_schedule_check(struct amdgpu_ring *ring) -{ - /* - * Do not reset the timer here with mod_delayed_work, - * this can livelock in an interaction with TTM delayed destroy. - */ - queue_delayed_work(system_power_efficient_wq, - &ring->fence_drv.lockup_work, - AMDGPU_FENCE_JIFFIES_TIMEOUT); -} - /** * amdgpu_fence_emit - emit a fence on the requested ring * @@ -135,6 +117,19 @@ int amdgpu_fence_emit(struct amdgpu_ring *ring, void *owner, return 0; } +/** + * amdgpu_fence_schedule_fallback - schedule fallback check + * + * @ring: pointer to struct amdgpu_ring + * + * Start a timer as fallback to our interrupts. + */ +static void amdgpu_fence_schedule_fallback(struct amdgpu_ring *ring) +{ + mod_timer(&ring->fence_drv.fallback_timer, + jiffies + AMDGPU_FENCE_JIFFIES_TIMEOUT); +} + /** * amdgpu_fence_activity - check for fence activity * @@ -201,32 +196,11 @@ static bool amdgpu_fence_activity(struct amdgpu_ring *ring) } while (atomic64_xchg(&ring->fence_drv.last_seq, seq) > seq); if (seq < last_emitted) - amdgpu_fence_schedule_check(ring); + amdgpu_fence_schedule_fallback(ring); return wake; } -/** - * amdgpu_fence_check_lockup - check for hardware lockup - * - * @work: delayed work item - * - * Checks for fence activity and if there is none probe - * the hardware if a lockup occured. - */ -static void amdgpu_fence_check_lockup(struct work_struct *work) -{ - struct amdgpu_fence_driver *fence_drv; - struct amdgpu_ring *ring; - - fence_drv = container_of(work, struct amdgpu_fence_driver, - lockup_work.work); - ring = fence_drv->ring; - - if (amdgpu_fence_activity(ring)) - wake_up_all(&ring->fence_drv.fence_queue); -} - /** * amdgpu_fence_process - process a fence * @@ -242,6 +216,20 @@ void amdgpu_fence_process(struct amdgpu_ring *ring) wake_up_all(&ring->fence_drv.fence_queue); } +/** + * amdgpu_fence_fallback - fallback for hardware interrupts + * + * @work: delayed work item + * + * Checks for fence activity. + */ +static void amdgpu_fence_fallback(unsigned long arg) +{ + struct amdgpu_ring *ring = (void *)arg; + + amdgpu_fence_process(ring); +} + /** * amdgpu_fence_seq_signaled - check if a fence sequence number has signaled * @@ -289,7 +277,7 @@ static int amdgpu_fence_ring_wait_seq(struct amdgpu_ring *ring, uint64_t seq) if (atomic64_read(&ring->fence_drv.last_seq) >= seq) return 0; - amdgpu_fence_schedule_check(ring); + amdgpu_fence_schedule_fallback(ring); wait_event(ring->fence_drv.fence_queue, ( (signaled = amdgpu_fence_seq_signaled(ring, seq)))); @@ -490,9 +478,8 @@ int amdgpu_fence_driver_init_ring(struct amdgpu_ring *ring) atomic64_set(&ring->fence_drv.last_seq, 0); ring->fence_drv.initialized = false; - INIT_DELAYED_WORK(&ring->fence_drv.lockup_work, - amdgpu_fence_check_lockup); - ring->fence_drv.ring = ring; + setup_timer(&ring->fence_drv.fallback_timer, amdgpu_fence_fallback, + (unsigned long)ring); init_waitqueue_head(&ring->fence_drv.fence_queue); @@ -556,6 +543,7 @@ void amdgpu_fence_driver_fini(struct amdgpu_device *adev) mutex_lock(&adev->ring_lock); for (i = 0; i < AMDGPU_MAX_RINGS; i++) { struct amdgpu_ring *ring = adev->rings[i]; + if (!ring || !ring->fence_drv.initialized) continue; r = amdgpu_fence_wait_empty(ring); @@ -567,6 +555,7 @@ void amdgpu_fence_driver_fini(struct amdgpu_device *adev) amdgpu_irq_put(adev, ring->fence_drv.irq_src, ring->fence_drv.irq_type); amd_sched_fini(&ring->sched); + del_timer_sync(&ring->fence_drv.fallback_timer); ring->fence_drv.initialized = false; } mutex_unlock(&adev->ring_lock); @@ -750,7 +739,8 @@ static bool amdgpu_fence_enable_signaling(struct fence *f) fence->fence_wake.func = amdgpu_fence_check_signaled; __add_wait_queue(&ring->fence_drv.fence_queue, &fence->fence_wake); fence_get(f); - amdgpu_fence_schedule_check(ring); + if (!timer_pending(&ring->fence_drv.fallback_timer)) + amdgpu_fence_schedule_fallback(ring); FENCE_TRACE(&fence->base, "armed on ring %i!\n", ring->idx); return true; } From b56c22853eabde22a93d77bbd5250c1655405d74 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Christian=20K=C3=B6nig?= Date: Thu, 29 Oct 2015 17:01:19 +0100 Subject: [PATCH 057/291] drm/amdgpu: use fence_is_later() for vm_flush as well v2 MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit v2: remove superfluous check Signed-off-by: Christian König Reviewed-by: Alex Deucher (v1) Reviewed-by: Chunming Zhou --- drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c | 21 +++++++++------------ 1 file changed, 9 insertions(+), 12 deletions(-) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c index 633a32a48560..8c80381b1ea9 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c @@ -207,24 +207,21 @@ void amdgpu_vm_flush(struct amdgpu_ring *ring, uint64_t pd_addr = amdgpu_bo_gpu_offset(vm->page_directory); struct amdgpu_vm_id *vm_id = &vm->ids[ring->idx]; struct fence *flushed_updates = vm_id->flushed_updates; - bool is_earlier = false; + bool is_later; - if (flushed_updates && updates) { - BUG_ON(flushed_updates->context != updates->context); - is_earlier = (updates->seqno - flushed_updates->seqno <= - INT_MAX) ? true : false; - } - - if (pd_addr != vm_id->pd_gpu_addr || !flushed_updates || - is_earlier) { + if (!flushed_updates) + is_later = true; + else if (!updates) + is_later = false; + else + is_later = fence_is_later(updates, flushed_updates); + if (pd_addr != vm_id->pd_gpu_addr || is_later) { trace_amdgpu_vm_flush(pd_addr, ring->idx, vm_id->id); - if (is_earlier) { + if (is_later) { vm_id->flushed_updates = fence_get(updates); fence_put(flushed_updates); } - if (!flushed_updates) - vm_id->flushed_updates = fence_get(updates); vm_id->pd_gpu_addr = pd_addr; amdgpu_ring_emit_vm_flush(ring, vm_id->id, vm_id->pd_gpu_addr); } From 16ae42feb0ce6cc80473d35dd82a1cd07e2e7178 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Christian=20K=C3=B6nig?= Date: Tue, 3 Nov 2015 14:53:28 +0100 Subject: [PATCH 058/291] drm/amdgpu: use common fence for amdgpu_vm_fence MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Just cleanup the function parameters. Signed-off-by: Christian König Reviewed-by: Chunming Zhou Reviewed-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/amdgpu.h | 2 +- drivers/gpu/drm/amd/amdgpu/amdgpu_ib.c | 2 +- drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c | 12 ++++++------ 3 files changed, 8 insertions(+), 8 deletions(-) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu.h b/drivers/gpu/drm/amd/amdgpu/amdgpu.h index 3630cb545d0d..b801b6702812 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu.h @@ -2332,7 +2332,7 @@ void amdgpu_vm_flush(struct amdgpu_ring *ring, struct fence *updates); void amdgpu_vm_fence(struct amdgpu_device *adev, struct amdgpu_vm *vm, - struct amdgpu_fence *fence); + struct fence *fence); uint64_t amdgpu_vm_map_gart(struct amdgpu_device *adev, uint64_t addr); int amdgpu_vm_update_page_directory(struct amdgpu_device *adev, struct amdgpu_vm *vm); diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ib.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_ib.c index e65987743871..3902e7a9f8cc 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ib.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ib.c @@ -216,7 +216,7 @@ int amdgpu_ib_schedule(struct amdgpu_device *adev, unsigned num_ibs, } if (ib->vm) - amdgpu_vm_fence(adev, ib->vm, ib->fence); + amdgpu_vm_fence(adev, ib->vm, &ib->fence->base); amdgpu_ring_unlock_commit(ring); return 0; diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c index 8c80381b1ea9..a9fcc995d27e 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c @@ -241,16 +241,16 @@ void amdgpu_vm_flush(struct amdgpu_ring *ring, */ void amdgpu_vm_fence(struct amdgpu_device *adev, struct amdgpu_vm *vm, - struct amdgpu_fence *fence) + struct fence *fence) { - unsigned ridx = fence->ring->idx; - unsigned vm_id = vm->ids[ridx].id; + struct amdgpu_ring *ring = amdgpu_ring_from_fence(fence); + unsigned vm_id = vm->ids[ring->idx].id; fence_put(adev->vm_manager.active[vm_id]); - adev->vm_manager.active[vm_id] = fence_get(&fence->base); + adev->vm_manager.active[vm_id] = fence_get(fence); - fence_put(vm->ids[ridx].last_id_use); - vm->ids[ridx].last_id_use = fence_get(&fence->base); + fence_put(vm->ids[ring->idx].last_id_use); + vm->ids[ring->idx].last_id_use = fence_get(fence); } /** From 451f698bcac84c49dd4e25dd8e89dbd66796b4cd Mon Sep 17 00:00:00 2001 From: Flora Cui Date: Thu, 5 Nov 2015 15:24:46 +0800 Subject: [PATCH 059/291] drm/amdgpu: update fiji_mgcg_cgcg_init table Change-Id: If44b8057741c78208f1976f60f31b535c944d0bd Signed-off-by: Flora Cui Reviewed-by: Jammy Zhou --- drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c | 5 ----- 1 file changed, 5 deletions(-) diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c b/drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c index 4410e5735154..3de6a8894e2d 100644 --- a/drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c +++ b/drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c @@ -268,7 +268,6 @@ static const u32 fiji_mgcg_cgcg_init[] = mmCGTT_CP_CLK_CTRL, 0xffffffff, 0x00000100, mmCGTT_CPC_CLK_CTRL, 0xffffffff, 0x00000100, mmCGTT_CPF_CLK_CTRL, 0xffffffff, 0x40000100, - mmCGTT_DRM_CLK_CTRL0, 0xffffffff, 0x00600100, mmCGTT_GDS_CLK_CTRL, 0xffffffff, 0x00000100, mmCGTT_IA_CLK_CTRL, 0xffffffff, 0x06000100, mmCGTT_PA_CLK_CTRL, 0xffffffff, 0x00000100, @@ -296,10 +295,6 @@ static const u32 fiji_mgcg_cgcg_init[] = mmCGTS_SM_CTRL_REG, 0xffffffff, 0x96e00200, mmCP_RB_WPTR_POLL_CNTL, 0xffffffff, 0x00900100, mmRLC_CGCG_CGLS_CTRL, 0xffffffff, 0x0020003c, - mmPCIE_INDEX, 0xffffffff, 0x0140001c, - mmPCIE_DATA, 0x000f0000, 0x00000000, - mmCGTT_DRM_CLK_CTRL0, 0xff000fff, 0x00000100, - mmHDP_XDP_CGTT_BLK_CTRL, 0xc0000fff, 0x00000104, mmCP_MEM_SLP_CNTL, 0x00000001, 0x00000001, }; From b49c84a5765cf68b8e2fdb1dc4eded9c208e6d58 Mon Sep 17 00:00:00 2001 From: Chunming Zhou Date: Thu, 5 Nov 2015 11:28:28 +0800 Subject: [PATCH 060/291] drm/amdgpu: add kmem cache for amdgpu fence MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Change-Id: I5ad8dd156ccf27a6f18004aa0a215a0925b6e67b Signed-off-by: Chunming Zhou Reviewed-by: Christian König --- drivers/gpu/drm/amd/amdgpu/amdgpu_fence.c | 22 ++++++++++++++++++++-- 1 file changed, 20 insertions(+), 2 deletions(-) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_fence.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_fence.c index 257fce356319..3671f9f220bd 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_fence.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_fence.c @@ -47,6 +47,9 @@ * that the the relevant GPU caches have been flushed. */ +static struct kmem_cache *amdgpu_fence_slab; +static atomic_t amdgpu_fence_slab_ref = ATOMIC_INIT(0); + /** * amdgpu_fence_write - write a fence value * @@ -100,7 +103,7 @@ int amdgpu_fence_emit(struct amdgpu_ring *ring, void *owner, struct amdgpu_device *adev = ring->adev; /* we are protected by the ring emission mutex */ - *fence = kmalloc(sizeof(struct amdgpu_fence), GFP_KERNEL); + *fence = kmem_cache_alloc(amdgpu_fence_slab, GFP_KERNEL); if ((*fence) == NULL) { return -ENOMEM; } @@ -522,6 +525,13 @@ int amdgpu_fence_driver_init_ring(struct amdgpu_ring *ring) */ int amdgpu_fence_driver_init(struct amdgpu_device *adev) { + if (atomic_inc_return(&amdgpu_fence_slab_ref) == 1) { + amdgpu_fence_slab = kmem_cache_create( + "amdgpu_fence", sizeof(struct amdgpu_fence), 0, + SLAB_HWCACHE_ALIGN, NULL); + if (!amdgpu_fence_slab) + return -ENOMEM; + } if (amdgpu_debugfs_fence_init(adev)) dev_err(adev->dev, "fence debugfs file creation failed\n"); @@ -540,6 +550,8 @@ void amdgpu_fence_driver_fini(struct amdgpu_device *adev) { int i, r; + if (atomic_dec_and_test(&amdgpu_fence_slab_ref)) + kmem_cache_destroy(amdgpu_fence_slab); mutex_lock(&adev->ring_lock); for (i = 0; i < AMDGPU_MAX_RINGS; i++) { struct amdgpu_ring *ring = adev->rings[i]; @@ -745,13 +757,19 @@ static bool amdgpu_fence_enable_signaling(struct fence *f) return true; } +static void amdgpu_fence_release(struct fence *f) +{ + struct amdgpu_fence *fence = to_amdgpu_fence(f); + kmem_cache_free(amdgpu_fence_slab, fence); +} + const struct fence_ops amdgpu_fence_ops = { .get_driver_name = amdgpu_fence_get_driver_name, .get_timeline_name = amdgpu_fence_get_timeline_name, .enable_signaling = amdgpu_fence_enable_signaling, .signaled = amdgpu_fence_is_signaled, .wait = fence_default_wait, - .release = NULL, + .release = amdgpu_fence_release, }; /* From f5617f9dde5ae2466560f7cb008c741e2b88adab Mon Sep 17 00:00:00 2001 From: Chunming Zhou Date: Thu, 5 Nov 2015 11:41:50 +0800 Subject: [PATCH 061/291] drm/amd: add kmem cache for sched fence MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Change-Id: I45bb8ff10ef05dc3b15e31a77fbcf31117705f11 Signed-off-by: Chunming Zhou Reviewed-by: Christian König --- drivers/gpu/drm/amd/scheduler/gpu_scheduler.c | 12 ++++++++++++ drivers/gpu/drm/amd/scheduler/gpu_scheduler.h | 3 +++ drivers/gpu/drm/amd/scheduler/sched_fence.c | 10 ++++++++-- 3 files changed, 23 insertions(+), 2 deletions(-) diff --git a/drivers/gpu/drm/amd/scheduler/gpu_scheduler.c b/drivers/gpu/drm/amd/scheduler/gpu_scheduler.c index 89619a5a4289..fe5b3c415f18 100644 --- a/drivers/gpu/drm/amd/scheduler/gpu_scheduler.c +++ b/drivers/gpu/drm/amd/scheduler/gpu_scheduler.c @@ -34,6 +34,9 @@ static struct amd_sched_job * amd_sched_entity_pop_job(struct amd_sched_entity *entity); static void amd_sched_wakeup(struct amd_gpu_scheduler *sched); +struct kmem_cache *sched_fence_slab; +atomic_t sched_fence_slab_ref = ATOMIC_INIT(0); + /* Initialize a given run queue struct */ static void amd_sched_rq_init(struct amd_sched_rq *rq) { @@ -450,6 +453,13 @@ int amd_sched_init(struct amd_gpu_scheduler *sched, init_waitqueue_head(&sched->wake_up_worker); init_waitqueue_head(&sched->job_scheduled); atomic_set(&sched->hw_rq_count, 0); + if (atomic_inc_return(&sched_fence_slab_ref) == 1) { + sched_fence_slab = kmem_cache_create( + "amd_sched_fence", sizeof(struct amd_sched_fence), 0, + SLAB_HWCACHE_ALIGN, NULL); + if (!sched_fence_slab) + return -ENOMEM; + } /* Each scheduler will run on a seperate kernel thread */ sched->thread = kthread_run(amd_sched_main, sched, sched->name); @@ -470,4 +480,6 @@ void amd_sched_fini(struct amd_gpu_scheduler *sched) { if (sched->thread) kthread_stop(sched->thread); + if (atomic_dec_and_test(&sched_fence_slab_ref)) + kmem_cache_destroy(sched_fence_slab); } diff --git a/drivers/gpu/drm/amd/scheduler/gpu_scheduler.h b/drivers/gpu/drm/amd/scheduler/gpu_scheduler.h index 929e9aced041..4d05ca6fb057 100644 --- a/drivers/gpu/drm/amd/scheduler/gpu_scheduler.h +++ b/drivers/gpu/drm/amd/scheduler/gpu_scheduler.h @@ -30,6 +30,9 @@ struct amd_gpu_scheduler; struct amd_sched_rq; +extern struct kmem_cache *sched_fence_slab; +extern atomic_t sched_fence_slab_ref; + /** * A scheduler entity is a wrapper around a job queue or a group * of other entities. Entities take turns emitting jobs from their diff --git a/drivers/gpu/drm/amd/scheduler/sched_fence.c b/drivers/gpu/drm/amd/scheduler/sched_fence.c index d802638094f4..8d2130b9ff05 100644 --- a/drivers/gpu/drm/amd/scheduler/sched_fence.c +++ b/drivers/gpu/drm/amd/scheduler/sched_fence.c @@ -32,7 +32,7 @@ struct amd_sched_fence *amd_sched_fence_create(struct amd_sched_entity *s_entity struct amd_sched_fence *fence = NULL; unsigned seq; - fence = kzalloc(sizeof(struct amd_sched_fence), GFP_KERNEL); + fence = kmem_cache_zalloc(sched_fence_slab, GFP_KERNEL); if (fence == NULL) return NULL; fence->owner = owner; @@ -71,11 +71,17 @@ static bool amd_sched_fence_enable_signaling(struct fence *f) return true; } +static void amd_sched_fence_release(struct fence *f) +{ + struct amd_sched_fence *fence = to_amd_sched_fence(f); + kmem_cache_free(sched_fence_slab, fence); +} + const struct fence_ops amd_sched_fence_ops = { .get_driver_name = amd_sched_fence_get_driver_name, .get_timeline_name = amd_sched_fence_get_timeline_name, .enable_signaling = amd_sched_fence_enable_signaling, .signaled = NULL, .wait = fence_default_wait, - .release = NULL, + .release = amd_sched_fence_release, }; From e4a58a28b50f30e72292b6659d94410cbf7355ad Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Christian=20K=C3=B6nig?= Date: Thu, 5 Nov 2015 17:00:25 +0100 Subject: [PATCH 062/291] drm/amdgpu: fix leaking the IBs on error MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Fixing a memory leak when the scheduler is enabled. Signed-off-by: Christian König Reviewed-by: Junwei Zhang Reviewed-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c | 25 ++++++++++++------------- 1 file changed, 12 insertions(+), 13 deletions(-) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c index dfc4d02c7a38..ecc82dfe83f8 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c @@ -499,16 +499,12 @@ static void amdgpu_cs_parser_fini_late(struct amdgpu_cs_parser *parser) for (i = 0; i < parser->nchunks; i++) drm_free_large(parser->chunks[i].kdata); kfree(parser->chunks); - if (!amdgpu_enable_scheduler) - { - if (parser->ibs) - for (i = 0; i < parser->num_ibs; i++) - amdgpu_ib_free(parser->adev, &parser->ibs[i]); - kfree(parser->ibs); - if (parser->uf.bo) - drm_gem_object_unreference_unlocked(&parser->uf.bo->gem_base); - } - + if (parser->ibs) + for (i = 0; i < parser->num_ibs; i++) + amdgpu_ib_free(parser->adev, &parser->ibs[i]); + kfree(parser->ibs); + if (parser->uf.bo) + drm_gem_object_unreference_unlocked(&parser->uf.bo->gem_base); kfree(parser); } @@ -888,11 +884,14 @@ int amdgpu_cs_ioctl(struct drm_device *dev, void *data, struct drm_file *filp) job->base.owner = parser->filp; mutex_init(&job->job_lock); if (job->ibs[job->num_ibs - 1].user) { - memcpy(&job->uf, &parser->uf, - sizeof(struct amdgpu_user_fence)); + job->uf = parser->uf; job->ibs[job->num_ibs - 1].user = &job->uf; + parser->uf.bo = NULL; } + parser->ibs = NULL; + parser->num_ibs = 0; + job->free_job = amdgpu_cs_free_job; mutex_lock(&job->job_lock); r = amd_sched_entity_push_job(&job->base); @@ -905,7 +904,7 @@ int amdgpu_cs_ioctl(struct drm_device *dev, void *data, struct drm_file *filp) cs->out.handle = amdgpu_ctx_add_fence(parser->ctx, ring, &job->base.s_fence->base); - parser->ibs[parser->num_ibs - 1].sequence = cs->out.handle; + job->ibs[job->num_ibs - 1].sequence = cs->out.handle; list_sort(NULL, &parser->validated, cmp_size_smaller_first); ttm_eu_fence_buffer_objects(&parser->ticket, From 7e52a81c2f0326a85d3ebc005829bcd604731c6d Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Christian=20K=C3=B6nig?= Date: Wed, 4 Nov 2015 15:44:39 +0100 Subject: [PATCH 063/291] drm/amdgpu: cleanup amdgpu_cs_parser handling MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit No need any more to allocate that structure dynamically, just put it on the stack. This is a start to cleanup some of the scheduler fallouts. Signed-off-by: Christian König Reviewed-by: Junwei Zhang Reviewed-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/amdgpu.h | 5 -- drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c | 88 ++++++++++---------------- 2 files changed, 33 insertions(+), 60 deletions(-) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu.h b/drivers/gpu/drm/amd/amdgpu/amdgpu.h index b801b6702812..7b02e3455172 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu.h @@ -2256,11 +2256,6 @@ void amdgpu_pci_config_reset(struct amdgpu_device *adev); bool amdgpu_card_posted(struct amdgpu_device *adev); void amdgpu_update_display_priority(struct amdgpu_device *adev); bool amdgpu_boot_test_post_card(struct amdgpu_device *adev); -struct amdgpu_cs_parser *amdgpu_cs_parser_create(struct amdgpu_device *adev, - struct drm_file *filp, - struct amdgpu_ctx *ctx, - struct amdgpu_ib *ibs, - uint32_t num_ibs); int amdgpu_cs_parser_init(struct amdgpu_cs_parser *p, void *data); int amdgpu_cs_get_ring(struct amdgpu_device *adev, u32 ip_type, diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c index ecc82dfe83f8..bf32096b8eb7 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c @@ -127,30 +127,6 @@ int amdgpu_cs_get_ring(struct amdgpu_device *adev, u32 ip_type, return 0; } -struct amdgpu_cs_parser *amdgpu_cs_parser_create(struct amdgpu_device *adev, - struct drm_file *filp, - struct amdgpu_ctx *ctx, - struct amdgpu_ib *ibs, - uint32_t num_ibs) -{ - struct amdgpu_cs_parser *parser; - int i; - - parser = kzalloc(sizeof(struct amdgpu_cs_parser), GFP_KERNEL); - if (!parser) - return NULL; - - parser->adev = adev; - parser->filp = filp; - parser->ctx = ctx; - parser->ibs = ibs; - parser->num_ibs = num_ibs; - for (i = 0; i < num_ibs; i++) - ibs[i].ctx = ctx; - - return parser; -} - int amdgpu_cs_parser_init(struct amdgpu_cs_parser *p, void *data) { union drm_amdgpu_cs *cs = data; @@ -490,6 +466,7 @@ static void amdgpu_cs_parser_fini_early(struct amdgpu_cs_parser *parser, int err static void amdgpu_cs_parser_fini_late(struct amdgpu_cs_parser *parser) { unsigned i; + if (parser->ctx) amdgpu_ctx_put(parser->ctx); if (parser->bo_list) @@ -505,7 +482,6 @@ static void amdgpu_cs_parser_fini_late(struct amdgpu_cs_parser *parser) kfree(parser->ibs); if (parser->uf.bo) drm_gem_object_unreference_unlocked(&parser->uf.bo->gem_base); - kfree(parser); } /** @@ -824,36 +800,36 @@ int amdgpu_cs_ioctl(struct drm_device *dev, void *data, struct drm_file *filp) union drm_amdgpu_cs *cs = data; struct amdgpu_fpriv *fpriv = filp->driver_priv; struct amdgpu_vm *vm = &fpriv->vm; - struct amdgpu_cs_parser *parser; + struct amdgpu_cs_parser parser = {}; bool reserved_buffers = false; int i, r; if (!adev->accel_working) return -EBUSY; - parser = amdgpu_cs_parser_create(adev, filp, NULL, NULL, 0); - if (!parser) - return -ENOMEM; - r = amdgpu_cs_parser_init(parser, data); + parser.adev = adev; + parser.filp = filp; + + r = amdgpu_cs_parser_init(&parser, data); if (r) { DRM_ERROR("Failed to initialize parser !\n"); - amdgpu_cs_parser_fini(parser, r, false); + amdgpu_cs_parser_fini(&parser, r, false); r = amdgpu_cs_handle_lockup(adev, r); return r; } mutex_lock(&vm->mutex); - r = amdgpu_cs_parser_relocs(parser); + r = amdgpu_cs_parser_relocs(&parser); if (r == -ENOMEM) DRM_ERROR("Not enough memory for command submission!\n"); else if (r && r != -ERESTARTSYS) DRM_ERROR("Failed to process the buffer list %d!\n", r); else if (!r) { reserved_buffers = true; - r = amdgpu_cs_ib_fill(adev, parser); + r = amdgpu_cs_ib_fill(adev, &parser); } if (!r) { - r = amdgpu_cs_dependencies(adev, parser); + r = amdgpu_cs_dependencies(adev, &parser); if (r) DRM_ERROR("Failed in the dependencies handling %d!\n", r); } @@ -861,36 +837,38 @@ int amdgpu_cs_ioctl(struct drm_device *dev, void *data, struct drm_file *filp) if (r) goto out; - for (i = 0; i < parser->num_ibs; i++) - trace_amdgpu_cs(parser, i); + for (i = 0; i < parser.num_ibs; i++) + trace_amdgpu_cs(&parser, i); - r = amdgpu_cs_ib_vm_chunk(adev, parser); + r = amdgpu_cs_ib_vm_chunk(adev, &parser); if (r) goto out; - if (amdgpu_enable_scheduler && parser->num_ibs) { + if (amdgpu_enable_scheduler && parser.num_ibs) { struct amdgpu_job *job; - struct amdgpu_ring * ring = parser->ibs->ring; + struct amdgpu_ring * ring = parser.ibs->ring; + job = kzalloc(sizeof(struct amdgpu_job), GFP_KERNEL); if (!job) { r = -ENOMEM; goto out; } + job->base.sched = &ring->sched; - job->base.s_entity = &parser->ctx->rings[ring->idx].entity; - job->adev = parser->adev; - job->ibs = parser->ibs; - job->num_ibs = parser->num_ibs; - job->base.owner = parser->filp; + job->base.s_entity = &parser.ctx->rings[ring->idx].entity; + job->adev = parser.adev; + job->ibs = parser.ibs; + job->num_ibs = parser.num_ibs; + job->base.owner = parser.filp; mutex_init(&job->job_lock); if (job->ibs[job->num_ibs - 1].user) { - job->uf = parser->uf; + job->uf = parser.uf; job->ibs[job->num_ibs - 1].user = &job->uf; - parser->uf.bo = NULL; + parser.uf.bo = NULL; } - parser->ibs = NULL; - parser->num_ibs = 0; + parser.ibs = NULL; + parser.num_ibs = 0; job->free_job = amdgpu_cs_free_job; mutex_lock(&job->job_lock); @@ -902,24 +880,24 @@ int amdgpu_cs_ioctl(struct drm_device *dev, void *data, struct drm_file *filp) goto out; } cs->out.handle = - amdgpu_ctx_add_fence(parser->ctx, ring, + amdgpu_ctx_add_fence(parser.ctx, ring, &job->base.s_fence->base); job->ibs[job->num_ibs - 1].sequence = cs->out.handle; - list_sort(NULL, &parser->validated, cmp_size_smaller_first); - ttm_eu_fence_buffer_objects(&parser->ticket, - &parser->validated, + list_sort(NULL, &parser.validated, cmp_size_smaller_first); + ttm_eu_fence_buffer_objects(&parser.ticket, + &parser.validated, &job->base.s_fence->base); mutex_unlock(&job->job_lock); - amdgpu_cs_parser_fini_late(parser); + amdgpu_cs_parser_fini_late(&parser); mutex_unlock(&vm->mutex); return 0; } - cs->out.handle = parser->ibs[parser->num_ibs - 1].sequence; + cs->out.handle = parser.ibs[parser.num_ibs - 1].sequence; out: - amdgpu_cs_parser_fini(parser, r, reserved_buffers); + amdgpu_cs_parser_fini(&parser, r, reserved_buffers); mutex_unlock(&vm->mutex); r = amdgpu_cs_handle_lockup(adev, r); return r; From 24dd2f64c5a877392925202321c7c2c46c2b0ddf Mon Sep 17 00:00:00 2001 From: Alex Deucher Date: Tue, 10 Nov 2015 13:01:35 -0500 Subject: [PATCH 064/291] drm/radeon: unconditionally set sysfs_initialized Avoids spew on resume for systems where sysfs may fail even on init. bug: https://bugzilla.kernel.org/show_bug.cgi?id=106851 Signed-off-by: Alex Deucher Cc: stable@vger.kernel.org --- drivers/gpu/drm/radeon/radeon_pm.c | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/drivers/gpu/drm/radeon/radeon_pm.c b/drivers/gpu/drm/radeon/radeon_pm.c index 6d80dde23400..f4f03dcc1530 100644 --- a/drivers/gpu/drm/radeon/radeon_pm.c +++ b/drivers/gpu/drm/radeon/radeon_pm.c @@ -1542,8 +1542,7 @@ int radeon_pm_late_init(struct radeon_device *rdev) ret = device_create_file(rdev->dev, &dev_attr_power_method); if (ret) DRM_ERROR("failed to create device file for power method\n"); - if (!ret) - rdev->pm.sysfs_initialized = true; + rdev->pm.sysfs_initialized = true; } mutex_lock(&rdev->pm.mutex); From a80b30476d240482d360a25a1b2e8c13036ed750 Mon Sep 17 00:00:00 2001 From: Jay Cornwall Date: Thu, 5 Nov 2015 13:06:15 -0600 Subject: [PATCH 065/291] drm/amdgpu: Fix default page access routing MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The VM default page (used when a VM translation fails) is allocated in system memory. The VM is misconfigured to interpret the physical address as referencing a VRAM physical page. Route default page accesses to system memory. Reviewed-by: Christian König Signed-off-by: Jay Cornwall Cc: # v4.2+ Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/gmc_v7_0.c | 1 + drivers/gpu/drm/amd/amdgpu/gmc_v8_0.c | 1 + 2 files changed, 2 insertions(+) diff --git a/drivers/gpu/drm/amd/amdgpu/gmc_v7_0.c b/drivers/gpu/drm/amd/amdgpu/gmc_v7_0.c index 85bbcdc73fff..b93b649d9e36 100644 --- a/drivers/gpu/drm/amd/amdgpu/gmc_v7_0.c +++ b/drivers/gpu/drm/amd/amdgpu/gmc_v7_0.c @@ -501,6 +501,7 @@ static int gmc_v7_0_gart_enable(struct amdgpu_device *adev) tmp = REG_SET_FIELD(tmp, VM_L2_CNTL, ENABLE_L2_PDE0_CACHE_LRU_UPDATE_BY_WRITE, 1); tmp = REG_SET_FIELD(tmp, VM_L2_CNTL, EFFECTIVE_L2_QUEUE_SIZE, 7); tmp = REG_SET_FIELD(tmp, VM_L2_CNTL, CONTEXT1_IDENTITY_ACCESS_MODE, 1); + tmp = REG_SET_FIELD(tmp, VM_L2_CNTL, ENABLE_DEFAULT_PAGE_OUT_TO_SYSTEM_MEMORY, 1); WREG32(mmVM_L2_CNTL, tmp); tmp = REG_SET_FIELD(0, VM_L2_CNTL2, INVALIDATE_ALL_L1_TLBS, 1); tmp = REG_SET_FIELD(tmp, VM_L2_CNTL2, INVALIDATE_L2_CACHE, 1); diff --git a/drivers/gpu/drm/amd/amdgpu/gmc_v8_0.c b/drivers/gpu/drm/amd/amdgpu/gmc_v8_0.c index 1bcc4e74e3b4..c9209b45d901 100644 --- a/drivers/gpu/drm/amd/amdgpu/gmc_v8_0.c +++ b/drivers/gpu/drm/amd/amdgpu/gmc_v8_0.c @@ -629,6 +629,7 @@ static int gmc_v8_0_gart_enable(struct amdgpu_device *adev) tmp = REG_SET_FIELD(tmp, VM_L2_CNTL, ENABLE_L2_PDE0_CACHE_LRU_UPDATE_BY_WRITE, 1); tmp = REG_SET_FIELD(tmp, VM_L2_CNTL, EFFECTIVE_L2_QUEUE_SIZE, 7); tmp = REG_SET_FIELD(tmp, VM_L2_CNTL, CONTEXT1_IDENTITY_ACCESS_MODE, 1); + tmp = REG_SET_FIELD(tmp, VM_L2_CNTL, ENABLE_DEFAULT_PAGE_OUT_TO_SYSTEM_MEMORY, 1); WREG32(mmVM_L2_CNTL, tmp); tmp = RREG32(mmVM_L2_CNTL2); tmp = REG_SET_FIELD(tmp, VM_L2_CNTL2, INVALIDATE_ALL_L1_TLBS, 1); From 515c752dabee9945c1e8686c87f7cdeb3935eea4 Mon Sep 17 00:00:00 2001 From: Maxim Sheviakov Date: Tue, 10 Nov 2015 13:09:13 -0500 Subject: [PATCH 066/291] drm/radeon: fix quirk for MSI R7 370 Armor 2X There was a typo in the original. bug: https://bugs.freedesktop.org/show_bug.cgi?id=92865 Signed-off-by: Maxim Sheviakov Signed-off-by: Alex Deucher --- drivers/gpu/drm/radeon/si_dpm.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/gpu/drm/radeon/si_dpm.c b/drivers/gpu/drm/radeon/si_dpm.c index e72bf46042e0..a82b891ae1fe 100644 --- a/drivers/gpu/drm/radeon/si_dpm.c +++ b/drivers/gpu/drm/radeon/si_dpm.c @@ -2927,7 +2927,7 @@ static struct si_dpm_quirk si_dpm_quirk_list[] = { { PCI_VENDOR_ID_ATI, 0x6810, 0x1462, 0x3036, 0, 120000 }, { PCI_VENDOR_ID_ATI, 0x6811, 0x174b, 0xe271, 0, 120000 }, { PCI_VENDOR_ID_ATI, 0x6810, 0x174b, 0xe271, 85000, 90000 }, - { PCI_VENDOR_ID_ATI, 0x6811, 0x1762, 0x2015, 0, 120000 }, + { PCI_VENDOR_ID_ATI, 0x6811, 0x1462, 0x2015, 0, 120000 }, { PCI_VENDOR_ID_ATI, 0x6811, 0x1043, 0x2015, 0, 120000 }, { 0, 0, 0, 0 }, }; From e1b35f6103b37e0d81184b32906b7010170dda02 Mon Sep 17 00:00:00 2001 From: Arnd Bergmann Date: Tue, 10 Nov 2015 13:17:55 +0100 Subject: [PATCH 067/291] drm/amdgpu: fix seq_printf format string The amdgpu driver has a debugfs interface that shows the amount of VRAM in use, but the newly added code causes a build error on all 32-bit architectures: drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c:1076:17: warning: format '%lu' expects argument of type 'long unsigned int', but argument 4 has type 'long long int' [-Wformat=] This fixes the format string to use "%llu" for printing 64-bit numbers, which works everywhere, as long as we also cast to 'u64'. Unlike atomic64_t, u64 is defined as 'unsigned long long' on all architectures. Signed-off-by: Arnd Bergmann Fixes: a2ef8a974931 ("drm/amdgpu: add vram usage into debugfs") Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c index 81bb8e9fc26d..d4bac5f49939 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c @@ -1073,10 +1073,10 @@ static int amdgpu_mm_dump_table(struct seq_file *m, void *data) ret = drm_mm_dump_table(m, mm); spin_unlock(&glob->lru_lock); if (ttm_pl == TTM_PL_VRAM) - seq_printf(m, "man size:%llu pages, ram usage:%luMB, vis usage:%luMB\n", + seq_printf(m, "man size:%llu pages, ram usage:%lluMB, vis usage:%lluMB\n", adev->mman.bdev.man[ttm_pl].size, - atomic64_read(&adev->vram_usage) >> 20, - atomic64_read(&adev->vram_vis_usage) >> 20); + (u64)atomic64_read(&adev->vram_usage) >> 20, + (u64)atomic64_read(&adev->vram_vis_usage) >> 20); return ret; } From c305fd5fffae5b826b9f8f7ef32fe72ee83354fc Mon Sep 17 00:00:00 2001 From: Rex Zhu Date: Tue, 13 Oct 2015 13:57:52 +0800 Subject: [PATCH 068/291] drm/amdgpu: fix bug that can't enter thermal interrupt for bonaire. Set reversed bit to enable/disable thermal interrupt. Signed-off-by: Rex Zhu Reviewed-by: Jammy Zhou --- drivers/gpu/drm/amd/amdgpu/ci_dpm.c | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/drivers/gpu/drm/amd/amdgpu/ci_dpm.c b/drivers/gpu/drm/amd/amdgpu/ci_dpm.c index a1a35a5df8e7..57a2e347f04d 100644 --- a/drivers/gpu/drm/amd/amdgpu/ci_dpm.c +++ b/drivers/gpu/drm/amd/amdgpu/ci_dpm.c @@ -6569,12 +6569,12 @@ static int ci_dpm_set_interrupt_state(struct amdgpu_device *adev, switch (state) { case AMDGPU_IRQ_STATE_DISABLE: cg_thermal_int = RREG32_SMC(ixCG_THERMAL_INT); - cg_thermal_int &= ~CG_THERMAL_INT_CTRL__THERM_INTH_MASK_MASK; + cg_thermal_int |= CG_THERMAL_INT_CTRL__THERM_INTH_MASK_MASK; WREG32_SMC(ixCG_THERMAL_INT, cg_thermal_int); break; case AMDGPU_IRQ_STATE_ENABLE: cg_thermal_int = RREG32_SMC(ixCG_THERMAL_INT); - cg_thermal_int |= CG_THERMAL_INT_CTRL__THERM_INTH_MASK_MASK; + cg_thermal_int &= ~CG_THERMAL_INT_CTRL__THERM_INTH_MASK_MASK; WREG32_SMC(ixCG_THERMAL_INT, cg_thermal_int); break; default: @@ -6586,12 +6586,12 @@ static int ci_dpm_set_interrupt_state(struct amdgpu_device *adev, switch (state) { case AMDGPU_IRQ_STATE_DISABLE: cg_thermal_int = RREG32_SMC(ixCG_THERMAL_INT); - cg_thermal_int &= ~CG_THERMAL_INT_CTRL__THERM_INTL_MASK_MASK; + cg_thermal_int |= CG_THERMAL_INT_CTRL__THERM_INTL_MASK_MASK; WREG32_SMC(ixCG_THERMAL_INT, cg_thermal_int); break; case AMDGPU_IRQ_STATE_ENABLE: cg_thermal_int = RREG32_SMC(ixCG_THERMAL_INT); - cg_thermal_int |= CG_THERMAL_INT_CTRL__THERM_INTL_MASK_MASK; + cg_thermal_int &= ~CG_THERMAL_INT_CTRL__THERM_INTL_MASK_MASK; WREG32_SMC(ixCG_THERMAL_INT, cg_thermal_int); break; default: From 5f2e816b2957fdde3c7d51d55db6751a980f4bea Mon Sep 17 00:00:00 2001 From: Flora Cui Date: Thu, 5 Nov 2015 09:50:21 +0800 Subject: [PATCH 069/291] drm/amdgpu: update Fiji's tiling mode table Change-Id: I925c15015390113f7e27746ec5751eaa6a92c2a7 Signed-off-by: Flora Cui Reviewed-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c | 292 +++++++++++++++++++++++++- 1 file changed, 291 insertions(+), 1 deletion(-) diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c b/drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c index 3de6a8894e2d..e1dcab98e249 100644 --- a/drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c +++ b/drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c @@ -995,7 +995,7 @@ static void gfx_v8_0_gpu_early_init(struct amdgpu_device *adev) adev->gfx.config.max_cu_per_sh = 16; adev->gfx.config.max_sh_per_se = 1; adev->gfx.config.max_backends_per_se = 4; - adev->gfx.config.max_texture_channel_caches = 8; + adev->gfx.config.max_texture_channel_caches = 16; adev->gfx.config.max_gprs = 256; adev->gfx.config.max_gs_threads = 32; adev->gfx.config.max_hw_contexts = 8; @@ -1608,6 +1608,296 @@ static void gfx_v8_0_tiling_mode_table_init(struct amdgpu_device *adev) WREG32(mmGB_MACROTILE_MODE0 + reg_offset, gb_tile_moden); } case CHIP_FIJI: + for (reg_offset = 0; reg_offset < num_tile_mode_states; reg_offset++) { + switch (reg_offset) { + case 0: + gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) | + PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) | + TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B) | + MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING)); + break; + case 1: + gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) | + PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) | + TILE_SPLIT(ADDR_SURF_TILE_SPLIT_128B) | + MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING)); + break; + case 2: + gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) | + PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) | + TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) | + MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING)); + break; + case 3: + gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) | + PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) | + TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B) | + MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING)); + break; + case 4: + gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) | + PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) | + TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) | + MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING)); + break; + case 5: + gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) | + PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) | + TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) | + MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING)); + break; + case 6: + gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) | + PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) | + TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) | + MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING)); + break; + case 7: + gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) | + PIPE_CONFIG(ADDR_SURF_P4_16x16) | + TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) | + MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING)); + break; + case 8: + gb_tile_moden = (ARRAY_MODE(ARRAY_LINEAR_ALIGNED) | + PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16)); + break; + case 9: + gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) | + PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) | + MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) | + SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2)); + break; + case 10: + gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) | + PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) | + MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) | + SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2)); + break; + case 11: + gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) | + PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) | + MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) | + SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8)); + break; + case 12: + gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) | + PIPE_CONFIG(ADDR_SURF_P4_16x16) | + MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) | + SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8)); + break; + case 13: + gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) | + PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) | + MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) | + SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2)); + break; + case 14: + gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) | + PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) | + MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) | + SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2)); + break; + case 15: + gb_tile_moden = (ARRAY_MODE(ARRAY_3D_TILED_THIN1) | + PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) | + MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) | + SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2)); + break; + case 16: + gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) | + PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) | + MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) | + SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8)); + break; + case 17: + gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) | + PIPE_CONFIG(ADDR_SURF_P4_16x16) | + MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) | + SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8)); + break; + case 18: + gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THICK) | + PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) | + MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) | + SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1)); + break; + case 19: + gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THICK) | + PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) | + MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) | + SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1)); + break; + case 20: + gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THICK) | + PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) | + MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) | + SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1)); + break; + case 21: + gb_tile_moden = (ARRAY_MODE(ARRAY_3D_TILED_THICK) | + PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) | + MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) | + SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1)); + break; + case 22: + gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_TILED_THICK) | + PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) | + MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) | + SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1)); + break; + case 23: + gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_TILED_THICK) | + PIPE_CONFIG(ADDR_SURF_P4_16x16) | + MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) | + SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1)); + break; + case 24: + gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THICK) | + PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) | + MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) | + SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1)); + break; + case 25: + gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_XTHICK) | + PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) | + MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) | + SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1)); + break; + case 26: + gb_tile_moden = (ARRAY_MODE(ARRAY_3D_TILED_XTHICK) | + PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) | + MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) | + SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1)); + break; + case 27: + gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) | + PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) | + MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) | + SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2)); + break; + case 28: + gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) | + PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) | + MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) | + SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2)); + break; + case 29: + gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) | + PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) | + MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) | + SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8)); + break; + case 30: + gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) | + PIPE_CONFIG(ADDR_SURF_P4_16x16) | + MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) | + SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8)); + break; + default: + gb_tile_moden = 0; + break; + } + adev->gfx.config.tile_mode_array[reg_offset] = gb_tile_moden; + WREG32(mmGB_TILE_MODE0 + reg_offset, gb_tile_moden); + } + for (reg_offset = 0; reg_offset < num_secondary_tile_mode_states; reg_offset++) { + switch (reg_offset) { + case 0: + gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | + BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) | + MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) | + NUM_BANKS(ADDR_SURF_8_BANK)); + break; + case 1: + gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | + BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) | + MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) | + NUM_BANKS(ADDR_SURF_8_BANK)); + break; + case 2: + gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | + BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) | + MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) | + NUM_BANKS(ADDR_SURF_8_BANK)); + break; + case 3: + gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | + BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) | + MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) | + NUM_BANKS(ADDR_SURF_8_BANK)); + break; + case 4: + gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | + BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) | + MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) | + NUM_BANKS(ADDR_SURF_8_BANK)); + break; + case 5: + gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | + BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) | + MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) | + NUM_BANKS(ADDR_SURF_8_BANK)); + break; + case 6: + gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | + BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) | + MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) | + NUM_BANKS(ADDR_SURF_8_BANK)); + break; + case 8: + gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | + BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_8) | + MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) | + NUM_BANKS(ADDR_SURF_8_BANK)); + break; + case 9: + gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | + BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) | + MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) | + NUM_BANKS(ADDR_SURF_8_BANK)); + break; + case 10: + gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | + BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) | + MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) | + NUM_BANKS(ADDR_SURF_8_BANK)); + break; + case 11: + gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | + BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) | + MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) | + NUM_BANKS(ADDR_SURF_8_BANK)); + break; + case 12: + gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | + BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) | + MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) | + NUM_BANKS(ADDR_SURF_8_BANK)); + break; + case 13: + gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | + BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) | + MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) | + NUM_BANKS(ADDR_SURF_8_BANK)); + break; + case 14: + gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | + BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) | + MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) | + NUM_BANKS(ADDR_SURF_4_BANK)); + break; + case 7: + /* unused idx */ + continue; + default: + gb_tile_moden = 0; + break; + } + adev->gfx.config.macrotile_mode_array[reg_offset] = gb_tile_moden; + WREG32(mmGB_MACROTILE_MODE0 + reg_offset, gb_tile_moden); + } + break; case CHIP_TONGA: for (reg_offset = 0; reg_offset < num_tile_mode_states; reg_offset++) { switch (reg_offset) { From 7034decf6a5b1ff778d83ff9d7ce1f0b404804e4 Mon Sep 17 00:00:00 2001 From: Chunming Zhou Date: Wed, 11 Nov 2015 14:56:00 +0800 Subject: [PATCH 070/291] drm/amdgpu: add command submission workflow tracepoint OGL needs these tracepoints to investigate performance issue. Change-Id: I5e58187d061253f7d665dfce8e4e163ba91d3e2b Signed-off-by: Chunming Zhou --- drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c | 2 +- drivers/gpu/drm/amd/amdgpu/amdgpu_sched.c | 2 + drivers/gpu/drm/amd/amdgpu/amdgpu_trace.h | 51 +++++++++++++++++++ .../gpu/drm/amd/scheduler/gpu_sched_trace.h | 24 +++++++-- drivers/gpu/drm/amd/scheduler/gpu_scheduler.c | 1 + 5 files changed, 76 insertions(+), 4 deletions(-) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c index bf32096b8eb7..2ae73d5232dd 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c @@ -888,7 +888,7 @@ int amdgpu_cs_ioctl(struct drm_device *dev, void *data, struct drm_file *filp) ttm_eu_fence_buffer_objects(&parser.ticket, &parser.validated, &job->base.s_fence->base); - + trace_amdgpu_cs_ioctl(job); mutex_unlock(&job->job_lock); amdgpu_cs_parser_fini_late(&parser); mutex_unlock(&vm->mutex); diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_sched.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_sched.c index dcf4a8aca680..67f778f6eedb 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_sched.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_sched.c @@ -26,6 +26,7 @@ #include #include #include "amdgpu.h" +#include "amdgpu_trace.h" static struct fence *amdgpu_sched_dependency(struct amd_sched_job *sched_job) { @@ -45,6 +46,7 @@ static struct fence *amdgpu_sched_run_job(struct amd_sched_job *sched_job) } job = to_amdgpu_job(sched_job); mutex_lock(&job->job_lock); + trace_amdgpu_sched_run_job(job); r = amdgpu_ib_schedule(job->adev, job->num_ibs, job->ibs, diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_trace.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_trace.h index 26e2d50a6f64..8f9834ab1bd5 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_trace.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_trace.h @@ -48,6 +48,57 @@ TRACE_EVENT(amdgpu_cs, __entry->fences) ); +TRACE_EVENT(amdgpu_cs_ioctl, + TP_PROTO(struct amdgpu_job *job), + TP_ARGS(job), + TP_STRUCT__entry( + __field(struct amdgpu_device *, adev) + __field(struct amd_sched_job *, sched_job) + __field(struct amdgpu_ib *, ib) + __field(struct fence *, fence) + __field(char *, ring_name) + __field(u32, num_ibs) + ), + + TP_fast_assign( + __entry->adev = job->adev; + __entry->sched_job = &job->base; + __entry->ib = job->ibs; + __entry->fence = &job->base.s_fence->base; + __entry->ring_name = job->ibs[0].ring->name; + __entry->num_ibs = job->num_ibs; + ), + TP_printk("adev=%p, sched_job=%p, first ib=%p, sched fence=%p, ring name:%s, num_ibs:%u", + __entry->adev, __entry->sched_job, __entry->ib, + __entry->fence, __entry->ring_name, __entry->num_ibs) +); + +TRACE_EVENT(amdgpu_sched_run_job, + TP_PROTO(struct amdgpu_job *job), + TP_ARGS(job), + TP_STRUCT__entry( + __field(struct amdgpu_device *, adev) + __field(struct amd_sched_job *, sched_job) + __field(struct amdgpu_ib *, ib) + __field(struct fence *, fence) + __field(char *, ring_name) + __field(u32, num_ibs) + ), + + TP_fast_assign( + __entry->adev = job->adev; + __entry->sched_job = &job->base; + __entry->ib = job->ibs; + __entry->fence = &job->base.s_fence->base; + __entry->ring_name = job->ibs[0].ring->name; + __entry->num_ibs = job->num_ibs; + ), + TP_printk("adev=%p, sched_job=%p, first ib=%p, sched fence=%p, ring name:%s, num_ibs:%u", + __entry->adev, __entry->sched_job, __entry->ib, + __entry->fence, __entry->ring_name, __entry->num_ibs) +); + + TRACE_EVENT(amdgpu_vm_grab_id, TP_PROTO(unsigned vmid, int ring), TP_ARGS(vmid, ring), diff --git a/drivers/gpu/drm/amd/scheduler/gpu_sched_trace.h b/drivers/gpu/drm/amd/scheduler/gpu_sched_trace.h index 144f50acc971..c89dc777768f 100644 --- a/drivers/gpu/drm/amd/scheduler/gpu_sched_trace.h +++ b/drivers/gpu/drm/amd/scheduler/gpu_sched_trace.h @@ -16,6 +16,8 @@ TRACE_EVENT(amd_sched_job, TP_ARGS(sched_job), TP_STRUCT__entry( __field(struct amd_sched_entity *, entity) + __field(struct amd_sched_job *, sched_job) + __field(struct fence *, fence) __field(const char *, name) __field(u32, job_count) __field(int, hw_job_count) @@ -23,16 +25,32 @@ TRACE_EVENT(amd_sched_job, TP_fast_assign( __entry->entity = sched_job->s_entity; + __entry->sched_job = sched_job; + __entry->fence = &sched_job->s_fence->base; __entry->name = sched_job->sched->name; __entry->job_count = kfifo_len( &sched_job->s_entity->job_queue) / sizeof(sched_job); __entry->hw_job_count = atomic_read( &sched_job->sched->hw_rq_count); ), - TP_printk("entity=%p, ring=%s, job count:%u, hw job count:%d", - __entry->entity, __entry->name, __entry->job_count, - __entry->hw_job_count) + TP_printk("entity=%p, sched job=%p, fence=%p, ring=%s, job count:%u, hw job count:%d", + __entry->entity, __entry->sched_job, __entry->fence, __entry->name, + __entry->job_count, __entry->hw_job_count) ); + +TRACE_EVENT(amd_sched_process_job, + TP_PROTO(struct amd_sched_fence *fence), + TP_ARGS(fence), + TP_STRUCT__entry( + __field(struct fence *, fence) + ), + + TP_fast_assign( + __entry->fence = &fence->base; + ), + TP_printk("fence=%p signaled", __entry->fence) +); + #endif /* This part must be outside protection */ diff --git a/drivers/gpu/drm/amd/scheduler/gpu_scheduler.c b/drivers/gpu/drm/amd/scheduler/gpu_scheduler.c index fe5b3c415f18..b8925fea577c 100644 --- a/drivers/gpu/drm/amd/scheduler/gpu_scheduler.c +++ b/drivers/gpu/drm/amd/scheduler/gpu_scheduler.c @@ -346,6 +346,7 @@ static void amd_sched_process_job(struct fence *f, struct fence_cb *cb) list_del_init(&s_fence->list); spin_unlock_irqrestore(&sched->fence_list_lock, flags); } + trace_amd_sched_process_job(s_fence); fence_put(&s_fence->base); wake_up_interruptible(&sched->wake_up_worker); } From 4a562283376197722b295d27633134401bbc80f5 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Christian=20K=C3=B6nig?= Date: Fri, 6 Nov 2015 14:09:21 +0100 Subject: [PATCH 071/291] drm/amdgpu: cleanup scheduler fence get/put dance MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The code was correct, but getting two references when the ownership is linearly moved on is a bit awkward and just overhead. Signed: Christian König Reviewed-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/amdgpu_sched.c | 1 - drivers/gpu/drm/amd/scheduler/gpu_scheduler.c | 1 - 2 files changed, 2 deletions(-) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_sched.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_sched.c index 67f778f6eedb..8ef9e4415fcc 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_sched.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_sched.c @@ -64,7 +64,6 @@ err: job->free_job(job); mutex_unlock(&job->job_lock); - fence_put(&job->base.s_fence->base); kfree(job); return fence ? &fence->base : NULL; } diff --git a/drivers/gpu/drm/amd/scheduler/gpu_scheduler.c b/drivers/gpu/drm/amd/scheduler/gpu_scheduler.c index b8925fea577c..ccb7c1554f5e 100644 --- a/drivers/gpu/drm/amd/scheduler/gpu_scheduler.c +++ b/drivers/gpu/drm/amd/scheduler/gpu_scheduler.c @@ -285,7 +285,6 @@ int amd_sched_entity_push_job(struct amd_sched_job *sched_job) if (!fence) return -ENOMEM; - fence_get(&fence->base); sched_job->s_fence = fence; wait_event(entity->sched->job_scheduled, From e284022163716ecf11c37fd1057c35d689ef2c11 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Christian=20K=C3=B6nig?= Date: Thu, 5 Nov 2015 19:49:48 +0100 Subject: [PATCH 072/291] drm/amdgpu: fix incorrect mutex usage v3 MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Before this patch the scheduler fence was created when we push the job into the queue, so we could only get the fence after pushing it. The mutex now was necessary to prevent the thread pushing the jobs to the hardware from running faster than the thread pushing the jobs into the queue. Otherwise the thread pushing jobs into the queue would have accessed possible freed up memory when it tries to get a reference to the fence. So what you get in the end is thread A: mutex_lock(&job->lock); ... Kick of thread B. ... mutex_unlock(&job->lock); And thread B: mutex_lock(&job->lock); .... mutex_unlock(&job->lock); kfree(job); I'm actually not sure if I'm still up to date on this, but this usage pattern used to be not allowed with mutexes. See here as well https://lwn.net/Articles/575460/. v2: remove unrelated changes, fix missing owner v3: rebased, add more commit message Signed-off-by: Christian König Reviewed-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/amdgpu.h | 2 +- drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c | 43 +++++++++++-------- drivers/gpu/drm/amd/amdgpu/amdgpu_sched.c | 27 +++++------- drivers/gpu/drm/amd/scheduler/gpu_scheduler.c | 10 +---- drivers/gpu/drm/amd/scheduler/gpu_scheduler.h | 3 +- 5 files changed, 37 insertions(+), 48 deletions(-) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu.h b/drivers/gpu/drm/amd/amdgpu/amdgpu.h index 7b02e3455172..0f187027c753 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu.h @@ -1225,7 +1225,7 @@ struct amdgpu_job { struct amdgpu_device *adev; struct amdgpu_ib *ibs; uint32_t num_ibs; - struct mutex job_lock; + void *owner; struct amdgpu_user_fence uf; int (*free_job)(struct amdgpu_job *job); }; diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c index 2ae73d5232dd..44cf977ae4f6 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c @@ -845,8 +845,9 @@ int amdgpu_cs_ioctl(struct drm_device *dev, void *data, struct drm_file *filp) goto out; if (amdgpu_enable_scheduler && parser.num_ibs) { - struct amdgpu_job *job; struct amdgpu_ring * ring = parser.ibs->ring; + struct amd_sched_fence *fence; + struct amdgpu_job *job; job = kzalloc(sizeof(struct amdgpu_job), GFP_KERNEL); if (!job) { @@ -859,37 +860,41 @@ int amdgpu_cs_ioctl(struct drm_device *dev, void *data, struct drm_file *filp) job->adev = parser.adev; job->ibs = parser.ibs; job->num_ibs = parser.num_ibs; - job->base.owner = parser.filp; - mutex_init(&job->job_lock); + job->owner = parser.filp; + job->free_job = amdgpu_cs_free_job; + if (job->ibs[job->num_ibs - 1].user) { job->uf = parser.uf; job->ibs[job->num_ibs - 1].user = &job->uf; parser.uf.bo = NULL; } - parser.ibs = NULL; - parser.num_ibs = 0; - - job->free_job = amdgpu_cs_free_job; - mutex_lock(&job->job_lock); - r = amd_sched_entity_push_job(&job->base); - if (r) { - mutex_unlock(&job->job_lock); + fence = amd_sched_fence_create(job->base.s_entity, + parser.filp); + if (!fence) { + r = -ENOMEM; amdgpu_cs_free_job(job); kfree(job); goto out; } - cs->out.handle = - amdgpu_ctx_add_fence(parser.ctx, ring, - &job->base.s_fence->base); + job->base.s_fence = fence; + fence_get(&fence->base); + + cs->out.handle = amdgpu_ctx_add_fence(parser.ctx, ring, + &fence->base); job->ibs[job->num_ibs - 1].sequence = cs->out.handle; - list_sort(NULL, &parser.validated, cmp_size_smaller_first); - ttm_eu_fence_buffer_objects(&parser.ticket, - &parser.validated, - &job->base.s_fence->base); + parser.ibs = NULL; + parser.num_ibs = 0; + trace_amdgpu_cs_ioctl(job); - mutex_unlock(&job->job_lock); + amd_sched_entity_push_job(&job->base); + + list_sort(NULL, &parser.validated, cmp_size_smaller_first); + ttm_eu_fence_buffer_objects(&parser.ticket, &parser.validated, + &fence->base); + fence_put(&fence->base); + amdgpu_cs_parser_fini_late(&parser); mutex_unlock(&vm->mutex); return 0; diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_sched.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_sched.c index 8ef9e4415fcc..438c05254695 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_sched.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_sched.c @@ -45,12 +45,8 @@ static struct fence *amdgpu_sched_run_job(struct amd_sched_job *sched_job) return NULL; } job = to_amdgpu_job(sched_job); - mutex_lock(&job->job_lock); trace_amdgpu_sched_run_job(job); - r = amdgpu_ib_schedule(job->adev, - job->num_ibs, - job->ibs, - job->base.owner); + r = amdgpu_ib_schedule(job->adev, job->num_ibs, job->ibs, job->owner); if (r) { DRM_ERROR("Error scheduling IBs (%d)\n", r); goto err; @@ -63,7 +59,6 @@ err: if (job->free_job) job->free_job(job); - mutex_unlock(&job->job_lock); kfree(job); return fence ? &fence->base : NULL; } @@ -89,21 +84,19 @@ int amdgpu_sched_ib_submit_kernel_helper(struct amdgpu_device *adev, return -ENOMEM; job->base.sched = &ring->sched; job->base.s_entity = &adev->kernel_ctx.rings[ring->idx].entity; + job->base.s_fence = amd_sched_fence_create(job->base.s_entity, owner); + if (!job->base.s_fence) { + kfree(job); + return -ENOMEM; + } + *f = fence_get(&job->base.s_fence->base); + job->adev = adev; job->ibs = ibs; job->num_ibs = num_ibs; - job->base.owner = owner; - mutex_init(&job->job_lock); + job->owner = owner; job->free_job = free_job; - mutex_lock(&job->job_lock); - r = amd_sched_entity_push_job(&job->base); - if (r) { - mutex_unlock(&job->job_lock); - kfree(job); - return r; - } - *f = fence_get(&job->base.s_fence->base); - mutex_unlock(&job->job_lock); + amd_sched_entity_push_job(&job->base); } else { r = amdgpu_ib_schedule(adev, num_ibs, ibs, owner); if (r) diff --git a/drivers/gpu/drm/amd/scheduler/gpu_scheduler.c b/drivers/gpu/drm/amd/scheduler/gpu_scheduler.c index ccb7c1554f5e..ea30d6ad4c13 100644 --- a/drivers/gpu/drm/amd/scheduler/gpu_scheduler.c +++ b/drivers/gpu/drm/amd/scheduler/gpu_scheduler.c @@ -276,21 +276,13 @@ static bool amd_sched_entity_in(struct amd_sched_job *sched_job) * * Returns 0 for success, negative error code otherwise. */ -int amd_sched_entity_push_job(struct amd_sched_job *sched_job) +void amd_sched_entity_push_job(struct amd_sched_job *sched_job) { struct amd_sched_entity *entity = sched_job->s_entity; - struct amd_sched_fence *fence = amd_sched_fence_create( - entity, sched_job->owner); - - if (!fence) - return -ENOMEM; - - sched_job->s_fence = fence; wait_event(entity->sched->job_scheduled, amd_sched_entity_in(sched_job)); trace_amd_sched_job(sched_job); - return 0; } /** diff --git a/drivers/gpu/drm/amd/scheduler/gpu_scheduler.h b/drivers/gpu/drm/amd/scheduler/gpu_scheduler.h index 4d05ca6fb057..939692b14f4b 100644 --- a/drivers/gpu/drm/amd/scheduler/gpu_scheduler.h +++ b/drivers/gpu/drm/amd/scheduler/gpu_scheduler.h @@ -79,7 +79,6 @@ struct amd_sched_job { struct amd_gpu_scheduler *sched; struct amd_sched_entity *s_entity; struct amd_sched_fence *s_fence; - void *owner; }; extern const struct fence_ops amd_sched_fence_ops; @@ -131,7 +130,7 @@ int amd_sched_entity_init(struct amd_gpu_scheduler *sched, uint32_t jobs); void amd_sched_entity_fini(struct amd_gpu_scheduler *sched, struct amd_sched_entity *entity); -int amd_sched_entity_push_job(struct amd_sched_job *sched_job); +void amd_sched_entity_push_job(struct amd_sched_job *sched_job); struct amd_sched_fence *amd_sched_fence_create( struct amd_sched_entity *s_entity, void *owner); From 5d82730af746abca2aa74e00de6370d338df7e95 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Christian=20K=C3=B6nig?= Date: Fri, 13 Nov 2015 13:04:50 +0100 Subject: [PATCH 073/291] drm/amdgpu: fix handling order in scheduler CS MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit We need to clear parser.ibs and num_ibs before amd_sched_fence_create, otherwise the IB could be freed twice if fence creates fails. Signed-off-by: Christian König Reviewed-by: Chunming Zhou --- drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c index 44cf977ae4f6..6096effd6a56 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c @@ -858,11 +858,14 @@ int amdgpu_cs_ioctl(struct drm_device *dev, void *data, struct drm_file *filp) job->base.sched = &ring->sched; job->base.s_entity = &parser.ctx->rings[ring->idx].entity; job->adev = parser.adev; - job->ibs = parser.ibs; - job->num_ibs = parser.num_ibs; job->owner = parser.filp; job->free_job = amdgpu_cs_free_job; + job->ibs = parser.ibs; + job->num_ibs = parser.num_ibs; + parser.ibs = NULL; + parser.num_ibs = 0; + if (job->ibs[job->num_ibs - 1].user) { job->uf = parser.uf; job->ibs[job->num_ibs - 1].user = &job->uf; @@ -884,9 +887,6 @@ int amdgpu_cs_ioctl(struct drm_device *dev, void *data, struct drm_file *filp) &fence->base); job->ibs[job->num_ibs - 1].sequence = cs->out.handle; - parser.ibs = NULL; - parser.num_ibs = 0; - trace_amdgpu_cs_ioctl(job); amd_sched_entity_push_job(&job->base); From 43c27fb5c21eb238ce5ffc7766bc59ab773bfd7c Mon Sep 17 00:00:00 2001 From: Chunming Zhou Date: Thu, 12 Nov 2015 15:33:09 +0800 Subject: [PATCH 074/291] drm/amdgpu: update pd while updating vm as well MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Change-Id: I93a861cd6707f7d91672b9e19757cc50008cd7a2 Signed-off-by: Chunming Zhou Reviewed-by: Christian König --- drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c index 087332858853..16dca46314bd 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c @@ -483,6 +483,9 @@ static void amdgpu_gem_va_update_vm(struct amdgpu_device *adev, if (domain == AMDGPU_GEM_DOMAIN_CPU) goto error_unreserve; } + r = amdgpu_vm_update_page_directory(adev, bo_va->vm); + if (r) + goto error_unreserve; r = amdgpu_vm_clear_freed(adev, bo_va->vm); if (r) From b4c580a43d520b7812c0fd064fbab929ce2f1da0 Mon Sep 17 00:00:00 2001 From: Heikki Krogerus Date: Wed, 21 Oct 2015 14:37:04 +0300 Subject: [PATCH 075/291] usb: dwc3: pci: add support for Intel Broxton SOC PCI IDs for Broxton based platforms. Signed-off-by: Heikki Krogerus Signed-off-by: Felipe Balbi --- drivers/usb/dwc3/dwc3-pci.c | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/drivers/usb/dwc3/dwc3-pci.c b/drivers/usb/dwc3/dwc3-pci.c index 77a622cb48ab..009d83048c8c 100644 --- a/drivers/usb/dwc3/dwc3-pci.c +++ b/drivers/usb/dwc3/dwc3-pci.c @@ -34,6 +34,8 @@ #define PCI_DEVICE_ID_INTEL_BSW 0x22b7 #define PCI_DEVICE_ID_INTEL_SPTLP 0x9d30 #define PCI_DEVICE_ID_INTEL_SPTH 0xa130 +#define PCI_DEVICE_ID_INTEL_BXT 0x0aaa +#define PCI_DEVICE_ID_INTEL_APL 0x5aaa static const struct acpi_gpio_params reset_gpios = { 0, 0, false }; static const struct acpi_gpio_params cs_gpios = { 1, 0, false }; @@ -210,6 +212,8 @@ static const struct pci_device_id dwc3_pci_id_table[] = { { PCI_DEVICE(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_MRFLD), }, { PCI_DEVICE(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_SPTLP), }, { PCI_DEVICE(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_SPTH), }, + { PCI_DEVICE(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_BXT), }, + { PCI_DEVICE(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_APL), }, { PCI_DEVICE(PCI_VENDOR_ID_AMD, PCI_DEVICE_ID_AMD_NL_USB), }, { } /* Terminating Entry */ }; From 705e63d2b29c8bbf091119084544d353bda70393 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Uwe=20Kleine-K=C3=B6nig?= Date: Fri, 23 Oct 2015 09:53:50 +0200 Subject: [PATCH 076/291] usb: musb: core: fix order of arguments to ulpi write callback MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit There is a bit of a mess in the order of arguments to the ulpi write callback. There is int ulpi_write(struct ulpi *ulpi, u8 addr, u8 val) in drivers/usb/common/ulpi.c; struct usb_phy_io_ops { ... int (*write)(struct usb_phy *x, u32 val, u32 reg); } in include/linux/usb/phy.h. The callback registered by the musb driver has to comply to the latter, but up to now had "offset" first which effectively made the function broken for correct users. So flip the order and while at it also switch to the parameter names of struct usb_phy_io_ops's write. Fixes: ffb865b1e460 ("usb: musb: add ulpi access operations") Signed-off-by: Uwe Kleine-König Signed-off-by: Felipe Balbi --- drivers/usb/musb/musb_core.c | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/drivers/usb/musb/musb_core.c b/drivers/usb/musb/musb_core.c index ba13529cbd52..3de9087ecad4 100644 --- a/drivers/usb/musb/musb_core.c +++ b/drivers/usb/musb/musb_core.c @@ -132,7 +132,7 @@ static inline struct musb *dev_to_musb(struct device *dev) /*-------------------------------------------------------------------------*/ #ifndef CONFIG_BLACKFIN -static int musb_ulpi_read(struct usb_phy *phy, u32 offset) +static int musb_ulpi_read(struct usb_phy *phy, u32 reg) { void __iomem *addr = phy->io_priv; int i = 0; @@ -151,7 +151,7 @@ static int musb_ulpi_read(struct usb_phy *phy, u32 offset) * ULPICarKitControlDisableUTMI after clearing POWER_SUSPENDM. */ - musb_writeb(addr, MUSB_ULPI_REG_ADDR, (u8)offset); + musb_writeb(addr, MUSB_ULPI_REG_ADDR, (u8)reg); musb_writeb(addr, MUSB_ULPI_REG_CONTROL, MUSB_ULPI_REG_REQ | MUSB_ULPI_RDN_WR); @@ -176,7 +176,7 @@ out: return ret; } -static int musb_ulpi_write(struct usb_phy *phy, u32 offset, u32 data) +static int musb_ulpi_write(struct usb_phy *phy, u32 val, u32 reg) { void __iomem *addr = phy->io_priv; int i = 0; @@ -191,8 +191,8 @@ static int musb_ulpi_write(struct usb_phy *phy, u32 offset, u32 data) power &= ~MUSB_POWER_SUSPENDM; musb_writeb(addr, MUSB_POWER, power); - musb_writeb(addr, MUSB_ULPI_REG_ADDR, (u8)offset); - musb_writeb(addr, MUSB_ULPI_REG_DATA, (u8)data); + musb_writeb(addr, MUSB_ULPI_REG_ADDR, (u8)reg); + musb_writeb(addr, MUSB_ULPI_REG_DATA, (u8)val); musb_writeb(addr, MUSB_ULPI_REG_CONTROL, MUSB_ULPI_REG_REQ); while (!(musb_readb(addr, MUSB_ULPI_REG_CONTROL) From 2c2025b41aeff57963f9ae2dd909fea704c625ab Mon Sep 17 00:00:00 2001 From: Aaro Koskinen Date: Tue, 27 Oct 2015 23:26:33 +0200 Subject: [PATCH 077/291] usb: phy: omap-otg: fix uninitialized pointer otg_dev->extcon was referenced before otg_dev was initialized. Fix. Cc: # v4.3 Fixes: a2fd2423240f ("usb: phy: omap-otg: Replace deprecated API of extcon") Reviewed-by: Chanwoo Choi Signed-off-by: Aaro Koskinen Signed-off-by: Felipe Balbi --- drivers/usb/phy/phy-omap-otg.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/usb/phy/phy-omap-otg.c b/drivers/usb/phy/phy-omap-otg.c index 1270906ccb95..c4bf2de6d14e 100644 --- a/drivers/usb/phy/phy-omap-otg.c +++ b/drivers/usb/phy/phy-omap-otg.c @@ -105,7 +105,6 @@ static int omap_otg_probe(struct platform_device *pdev) extcon = extcon_get_extcon_dev(config->extcon); if (!extcon) return -EPROBE_DEFER; - otg_dev->extcon = extcon; otg_dev = devm_kzalloc(&pdev->dev, sizeof(*otg_dev), GFP_KERNEL); if (!otg_dev) @@ -115,6 +114,7 @@ static int omap_otg_probe(struct platform_device *pdev) if (IS_ERR(otg_dev->base)) return PTR_ERR(otg_dev->base); + otg_dev->extcon = extcon; otg_dev->id_nb.notifier_call = omap_otg_id_notifier; otg_dev->vbus_nb.notifier_call = omap_otg_vbus_notifier; From 0ba58de231066e47de87ccc4d61c5e396fe9bd27 Mon Sep 17 00:00:00 2001 From: Geert Uytterhoeven Date: Thu, 27 Aug 2015 22:06:52 +0200 Subject: [PATCH 078/291] drivers: sh: Get rid of CONFIG_ARCH_SHMOBILE_MULTI Shmobile is all multiplatform these days, so get rid of the reference to CONFIG_ARCH_SHMOBILE_MULTI. Signed-off-by: Geert Uytterhoeven Signed-off-by: Simon Horman --- drivers/sh/pm_runtime.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/sh/pm_runtime.c b/drivers/sh/pm_runtime.c index 25abd4eb7d10..91a003011acf 100644 --- a/drivers/sh/pm_runtime.c +++ b/drivers/sh/pm_runtime.c @@ -34,7 +34,7 @@ static struct pm_clk_notifier_block platform_bus_notifier = { static int __init sh_pm_runtime_init(void) { - if (IS_ENABLED(CONFIG_ARCH_SHMOBILE_MULTI)) { + if (IS_ENABLED(CONFIG_ARCH_SHMOBILE)) { if (!of_find_compatible_node(NULL, NULL, "renesas,cpg-mstp-clocks")) return 0; From 59536da34513c594af2a6fd35ba65ea45b6960a1 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Bj=C3=B8rn=20Mork?= Date: Mon, 16 Nov 2015 13:15:46 +0100 Subject: [PATCH 079/291] USB: qcserial: Fix support for HP lt4112 LTE/HSPA+ Gobi 4G Modem MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The DEVICE_HWI type was added under the faulty assumption that Huawei devices based on Qualcomm chipsets and firmware use the static USB interface numbering known from Gobi devices. But this model does not apply to Huawei devices like the HP branded lt4112 (Huawei me906e). Huawei firmwares will dynamically assign interface numbers. Functions are renumbered when the firmware is reconfigured. Fix by changing the DEVICE_HWI type to use a simplified version of Huawei's subclass + protocol scheme: Blacklisting known network interface combinations and assuming the rest are serial. Reported-and-tested-by: Muri Nicanor Tested-by: Martin Hauke Cc: Fixes: e7181d005e84 ("USB: qcserial: Add support for HP lt4112 LTE/HSPA+ Gobi 4G Modem") Signed-off-by: Bjørn Mork Signed-off-by: Johan Hovold --- drivers/usb/serial/qcserial.c | 54 ++++++++++++++++++++++------------- 1 file changed, 34 insertions(+), 20 deletions(-) diff --git a/drivers/usb/serial/qcserial.c b/drivers/usb/serial/qcserial.c index 0e46af0d0cb4..9919d2a9faf2 100644 --- a/drivers/usb/serial/qcserial.c +++ b/drivers/usb/serial/qcserial.c @@ -215,6 +215,10 @@ static int qcprobe(struct usb_serial *serial, const struct usb_device_id *id) int altsetting = -1; bool sendsetup = false; + /* we only support vendor specific functions */ + if (intf->desc.bInterfaceClass != USB_CLASS_VENDOR_SPEC) + goto done; + nintf = serial->dev->actconfig->desc.bNumInterfaces; dev_dbg(dev, "Num Interfaces = %d\n", nintf); ifnum = intf->desc.bInterfaceNumber; @@ -341,29 +345,39 @@ static int qcprobe(struct usb_serial *serial, const struct usb_device_id *id) break; case QCSERIAL_HWI: /* - * Huawei layout: - * 0: AT-capable modem port - * 1: DM/DIAG - * 2: AT-capable modem port - * 3: CCID-compatible PCSC interface - * 4: QMI/net - * 5: NMEA + * Huawei devices map functions by subclass + protocol + * instead of interface numbers. The protocol identify + * a specific function, while the subclass indicate a + * specific firmware source + * + * This is a blacklist of functions known to be + * non-serial. The rest are assumed to be serial and + * will be handled by this driver */ - switch (ifnum) { - case 0: - case 2: - dev_dbg(dev, "Modem port found\n"); - break; - case 1: - dev_dbg(dev, "DM/DIAG interface found\n"); - break; - case 5: - dev_dbg(dev, "NMEA GPS interface found\n"); - break; - default: - /* don't claim any unsupported interface */ + switch (intf->desc.bInterfaceProtocol) { + /* QMI combined (qmi_wwan) */ + case 0x07: + case 0x37: + case 0x67: + /* QMI data (qmi_wwan) */ + case 0x08: + case 0x38: + case 0x68: + /* QMI control (qmi_wwan) */ + case 0x09: + case 0x39: + case 0x69: + /* NCM like (huawei_cdc_ncm) */ + case 0x16: + case 0x46: + case 0x76: altsetting = -1; break; + default: + dev_dbg(dev, "Huawei type serial port found (%02x/%02x/%02x)\n", + intf->desc.bInterfaceClass, + intf->desc.bInterfaceSubClass, + intf->desc.bInterfaceProtocol); } break; default: From ef83b6e8f40bb24b92ad73b5889732346e54a793 Mon Sep 17 00:00:00 2001 From: Dan Williams Date: Tue, 29 Sep 2015 15:48:11 -0400 Subject: [PATCH 080/291] ext2, ext4: warn when mounting with dax enabled Similar to XFS warn when mounting DAX while it is still considered under development. Also, aspects of the DAX implementation, for example synchronization against multiple faults and faults causing block allocation, depend on the correct implementation in the filesystem. The maturity of a given DAX implementation is filesystem specific. Cc: Cc: "Theodore Ts'o" Cc: Matthew Wilcox Cc: linux-ext4@vger.kernel.org Cc: Kirill A. Shutemov Reported-by: Dave Chinner Acked-by: Jan Kara Signed-off-by: Dan Williams --- fs/ext2/super.c | 2 ++ fs/ext4/super.c | 6 +++++- 2 files changed, 7 insertions(+), 1 deletion(-) diff --git a/fs/ext2/super.c b/fs/ext2/super.c index 3a71cea68420..748d35afc902 100644 --- a/fs/ext2/super.c +++ b/fs/ext2/super.c @@ -569,6 +569,8 @@ static int parse_options(char *options, struct super_block *sb) /* Fall through */ case Opt_dax: #ifdef CONFIG_FS_DAX + ext2_msg(sb, KERN_WARNING, + "DAX enabled. Warning: EXPERIMENTAL, use at your own risk"); set_opt(sbi->s_mount_opt, DAX); #else ext2_msg(sb, KERN_INFO, "dax option not supported"); diff --git a/fs/ext4/super.c b/fs/ext4/super.c index 753f4e68b820..c9ab67da6e5a 100644 --- a/fs/ext4/super.c +++ b/fs/ext4/super.c @@ -1664,8 +1664,12 @@ static int handle_mount_opt(struct super_block *sb, char *opt, int token, } sbi->s_jquota_fmt = m->mount_opt; #endif -#ifndef CONFIG_FS_DAX } else if (token == Opt_dax) { +#ifdef CONFIG_FS_DAX + ext4_msg(sb, KERN_WARNING, + "DAX enabled. Warning: EXPERIMENTAL, use at your own risk"); + sbi->s_mount_opt |= m->mount_opt; +#else ext4_msg(sb, KERN_INFO, "dax option not supported"); return -1; #endif From dde7632ed02382e4bac2b57c66ee2285764f2cd7 Mon Sep 17 00:00:00 2001 From: Arnd Bergmann Date: Mon, 16 Nov 2015 17:34:50 +0100 Subject: [PATCH 081/291] clocksource/fsl: Avoid harmless 64-bit warnings The ftm_clockevent_init passes the value of "~0UL" into a function that takes a 32-bit argument, which drops the upper 32 bits, as gcc warns about on ARM64: clocksource/fsl_ftm_timer.c: In function 'ftm_clockevent_init': clocksource/fsl_ftm_timer.c:206:13: warning: large integer implicitly truncated to unsigned type [-Woverflow] This was obviously unintended behavior, and is easily avoided by using '~0u' as the integer literal, because that is 32-bit wide on all architectures. Signed-off-by: Arnd Bergmann Cc: linux-arm-kernel@lists.infradead.org Cc: Xiubo Li Cc: Shawn Guo Cc: Sascha Hauer Cc: Stefan Agner Cc: Daniel Lezcano Link: http://lkml.kernel.org/r/3990834.xnjhm37Grs@wuerfel Signed-off-by: Thomas Gleixner --- drivers/clocksource/fsl_ftm_timer.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/clocksource/fsl_ftm_timer.c b/drivers/clocksource/fsl_ftm_timer.c index 10202f1fdfd7..517e1c7624d4 100644 --- a/drivers/clocksource/fsl_ftm_timer.c +++ b/drivers/clocksource/fsl_ftm_timer.c @@ -203,7 +203,7 @@ static int __init ftm_clockevent_init(unsigned long freq, int irq) int err; ftm_writel(0x00, priv->clkevt_base + FTM_CNTIN); - ftm_writel(~0UL, priv->clkevt_base + FTM_MOD); + ftm_writel(~0u, priv->clkevt_base + FTM_MOD); ftm_reset_counter(priv->clkevt_base); @@ -230,7 +230,7 @@ static int __init ftm_clocksource_init(unsigned long freq) int err; ftm_writel(0x00, priv->clksrc_base + FTM_CNTIN); - ftm_writel(~0UL, priv->clksrc_base + FTM_MOD); + ftm_writel(~0u, priv->clksrc_base + FTM_MOD); ftm_reset_counter(priv->clksrc_base); From 3da6d49e847128378c30292848125cc3e207e5f7 Mon Sep 17 00:00:00 2001 From: Arnd Bergmann Date: Mon, 16 Nov 2015 17:51:17 +0100 Subject: [PATCH 082/291] clocksource: Disallow drivers for ARCH_USES_GETTIMEOFFSET We can now select clocksource drivers like ti-32k and CONFIG_OF on ancient machines that still use gettimeoffset, and the combination results in a link error. arch/arm/kernel/built-in.o: In function `time_init': (.init.text+0xc28): undefined reference to `clocksource_probe' The reason for this is that the Makefile is hidden behind CONFIG_ARCH_USES_GETTIMEOFFSET, but the Kconfig file is not, and it has shown up just now because the ti-32k driver was added and can be selected using COMPILE_TEST on all platforms. This patch hides the Kconfig menu in CONFIG_ARCH_USES_GETTIMEOFFSET as well. Fixes: dfedaf105d60 "clocksource: ti-32k: make it depend on GENERIC_CLOCKSOURCE" Signed-off-by: Arnd Bergmann Cc: linux-arm-kernel@lists.infradead.org Cc: Felipe Balbi Cc: Tony Lindgren Cc: Daniel Lezcano Link: http://lkml.kernel.org/r/7579471.4N90fYPQOK@wuerfel Signed-off-by: Thomas Gleixner --- drivers/clocksource/Kconfig | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/clocksource/Kconfig b/drivers/clocksource/Kconfig index 71cfdf7c9708..2eb5f0efae90 100644 --- a/drivers/clocksource/Kconfig +++ b/drivers/clocksource/Kconfig @@ -1,4 +1,5 @@ menu "Clock Source drivers" + depends on !ARCH_USES_GETTIMEOFFSET config CLKSRC_OF bool From 96dd922c198286681fbbc15100e196e0f629e2fb Mon Sep 17 00:00:00 2001 From: Brian Norris Date: Wed, 11 Nov 2015 15:36:16 -0800 Subject: [PATCH 083/291] mtd: jz4740_nand: fix build on jz4740 after removing gpio.h Fallout from commit 832f5dacfa0b ("MIPS: Remove all the uses of custom gpio.h") We see errors like this: drivers/mtd/nand/jz4740_nand.c: In function 'jz_nand_detect_bank': drivers/mtd/nand/jz4740_nand.c:340:9: error: 'JZ_GPIO_MEM_CS0' undeclared (first use in this function) drivers/mtd/nand/jz4740_nand.c:340:9: note: each undeclared identifier is reported only once for each function it appears in drivers/mtd/nand/jz4740_nand.c:359:2: error: implicit declaration of function 'jz_gpio_set_function' [-Werror=implicit-function-declaration] drivers/mtd/nand/jz4740_nand.c:359:29: error: 'JZ_GPIO_FUNC_MEM_CS0' undeclared (first use in this function) drivers/mtd/nand/jz4740_nand.c:399:29: error: 'JZ_GPIO_FUNC_NONE' undeclared (first use in this function) drivers/mtd/nand/jz4740_nand.c: In function 'jz_nand_probe': drivers/mtd/nand/jz4740_nand.c:528:13: error: 'JZ_GPIO_MEM_CS0' undeclared (first use in this function) drivers/mtd/nand/jz4740_nand.c: In function 'jz_nand_remove': drivers/mtd/nand/jz4740_nand.c:555:14: error: 'JZ_GPIO_MEM_CS0' undeclared (first use in this function) Patched similarly to: https://patchwork.linux-mips.org/patch/11089/ Fixes: 832f5dacfa0b ("MIPS: Remove all the uses of custom gpio.h") Signed-off-by: Brian Norris --- drivers/mtd/nand/jz4740_nand.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/mtd/nand/jz4740_nand.c b/drivers/mtd/nand/jz4740_nand.c index dc4e8446f1ff..5a99a93ed025 100644 --- a/drivers/mtd/nand/jz4740_nand.c +++ b/drivers/mtd/nand/jz4740_nand.c @@ -25,6 +25,7 @@ #include +#include #include #define JZ_REG_NAND_CTRL 0x50 From 9ca641b0f02a3a1eedbc8c296e695326da9bbaf9 Mon Sep 17 00:00:00 2001 From: Brian Norris Date: Mon, 9 Nov 2015 16:37:28 -0800 Subject: [PATCH 084/291] mtd: nand: fix shutdown/reboot for multi-chip systems If multiple NAND chips are registered to the same controller, then when rebooting the system, the first one will grab the controller lock, while the second will wait forever for the first one to release it. i.e., a classic deadlock. This problem was solved for a similar case (suspend/resume) back in commit 6b0d9a841249 ("mtd: nand: fix multi-chip suspend problem"), and the shutdown state really isn't much different for us, so rather than adding a new special case to nand_get_device(), we can just overload the FL_PM_SUSPENDED state. Now, multiple chips can "get" the same controller lock (preventing further I/O), while we still allow other chips to pass through nand_shutdown(). Original report: http://thread.gmane.org/gmane.linux.drivers.mtd/59726 http://lists.infradead.org/pipermail/linux-mtd/2015-July/059992.html Fixes: 72ea403669c7 ("mtd: nand: added nand_shutdown") Reported-by: Andrew E. Mileski Signed-off-by: Brian Norris Cc: Scott Branden Cc: Andrew E. Mileski Acked-by: Scott Branden Reviewed-by: Boris Brezillon --- drivers/mtd/nand/nand_base.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/mtd/nand/nand_base.c b/drivers/mtd/nand/nand_base.c index cc74142938b0..ece544efccc3 100644 --- a/drivers/mtd/nand/nand_base.c +++ b/drivers/mtd/nand/nand_base.c @@ -3110,7 +3110,7 @@ static void nand_resume(struct mtd_info *mtd) */ static void nand_shutdown(struct mtd_info *mtd) { - nand_get_device(mtd, FL_SHUTDOWN); + nand_get_device(mtd, FL_PM_SUSPENDED); } /* Set default functions */ From 680513cc0accf14cb447663b65e02ae307ac3811 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Christian=20K=C3=B6nig?= Date: Thu, 10 Sep 2015 15:03:50 +0200 Subject: [PATCH 085/291] drm/amdgpu: wait interruptible when semaphores are disabled v2 MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Otherwise debugging locked up processes isn't possible. v2: rebased Signed-off-by: Christian König Reviewed-by: Alex Deucher (v1) --- drivers/gpu/drm/amd/amdgpu/amdgpu_sync.c | 10 ++++++++-- 1 file changed, 8 insertions(+), 2 deletions(-) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_sync.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_sync.c index a6697fd05217..dd005c336c97 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_sync.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_sync.c @@ -302,8 +302,14 @@ int amdgpu_sync_rings(struct amdgpu_sync *sync, return -EINVAL; } - if (amdgpu_enable_scheduler || !amdgpu_enable_semaphores || - (count >= AMDGPU_NUM_SYNCS)) { + if (amdgpu_enable_scheduler || !amdgpu_enable_semaphores) { + r = fence_wait(&fence->base, true); + if (r) + return r; + continue; + } + + if (count >= AMDGPU_NUM_SYNCS) { /* not enough room, wait manually */ r = fence_wait(&fence->base, false); if (r) From bbf0b34578a71f42d19bd6c966f09221044921fe Mon Sep 17 00:00:00 2001 From: Junwei Zhang Date: Sun, 6 Sep 2015 14:00:46 +0800 Subject: [PATCH 086/291] drm/amdgpu: remove the unnecessary parameter adev for amdgpu_sa_bo_new() MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Signed-off-by: Junwei Zhang Reviewed-by: Christian König --- drivers/gpu/drm/amd/amdgpu/amdgpu_ib.c | 2 +- drivers/gpu/drm/amd/amdgpu/amdgpu_object.h | 7 +++---- drivers/gpu/drm/amd/amdgpu/amdgpu_sa.c | 3 +-- drivers/gpu/drm/amd/amdgpu/amdgpu_semaphore.c | 2 +- 4 files changed, 6 insertions(+), 8 deletions(-) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ib.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_ib.c index 3902e7a9f8cc..9e25edafa721 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ib.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ib.c @@ -62,7 +62,7 @@ int amdgpu_ib_get(struct amdgpu_ring *ring, struct amdgpu_vm *vm, int r; if (size) { - r = amdgpu_sa_bo_new(adev, &adev->ring_tmp_bo, + r = amdgpu_sa_bo_new(&adev->ring_tmp_bo, &ib->sa_bo, size, 256); if (r) { dev_err(adev->dev, "failed to get a new IB (%d)\n", r); diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_object.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_object.h index 3c2ff4567798..ea756e77b023 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_object.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_object.h @@ -189,10 +189,9 @@ int amdgpu_sa_bo_manager_start(struct amdgpu_device *adev, struct amdgpu_sa_manager *sa_manager); int amdgpu_sa_bo_manager_suspend(struct amdgpu_device *adev, struct amdgpu_sa_manager *sa_manager); -int amdgpu_sa_bo_new(struct amdgpu_device *adev, - struct amdgpu_sa_manager *sa_manager, - struct amdgpu_sa_bo **sa_bo, - unsigned size, unsigned align); +int amdgpu_sa_bo_new(struct amdgpu_sa_manager *sa_manager, + struct amdgpu_sa_bo **sa_bo, + unsigned size, unsigned align); void amdgpu_sa_bo_free(struct amdgpu_device *adev, struct amdgpu_sa_bo **sa_bo, struct fence *fence); diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_sa.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_sa.c index 0212b31dc194..8b88edb0434b 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_sa.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_sa.c @@ -311,8 +311,7 @@ static bool amdgpu_sa_bo_next_hole(struct amdgpu_sa_manager *sa_manager, return false; } -int amdgpu_sa_bo_new(struct amdgpu_device *adev, - struct amdgpu_sa_manager *sa_manager, +int amdgpu_sa_bo_new(struct amdgpu_sa_manager *sa_manager, struct amdgpu_sa_bo **sa_bo, unsigned size, unsigned align) { diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_semaphore.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_semaphore.c index ff3ca52ec6fe..1caaf201b708 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_semaphore.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_semaphore.c @@ -40,7 +40,7 @@ int amdgpu_semaphore_create(struct amdgpu_device *adev, if (*semaphore == NULL) { return -ENOMEM; } - r = amdgpu_sa_bo_new(adev, &adev->ring_tmp_bo, + r = amdgpu_sa_bo_new(&adev->ring_tmp_bo, &(*semaphore)->sa_bo, 8, 8); if (r) { kfree(*semaphore); From 6f9b36cd2497660e254c67ed131f0b297ed8bf40 Mon Sep 17 00:00:00 2001 From: Dmitry Torokhov Date: Thu, 12 Nov 2015 10:26:57 -0800 Subject: [PATCH 087/291] PM / wakeirq: check that wake IRQ is valid before accepting it Check that IRQ number passed to dev_pm_set_wake_irq() and dev_pm_set_dedicated_wake_irq() is valid (not negative) before accepting it. Signed-off-by: Dmitry Torokhov Signed-off-by: Rafael J. Wysocki --- drivers/base/power/wakeirq.c | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/drivers/base/power/wakeirq.c b/drivers/base/power/wakeirq.c index eb6e67451dec..0d77cd6fd8d1 100644 --- a/drivers/base/power/wakeirq.c +++ b/drivers/base/power/wakeirq.c @@ -68,6 +68,9 @@ int dev_pm_set_wake_irq(struct device *dev, int irq) struct wake_irq *wirq; int err; + if (irq < 0) + return -EINVAL; + wirq = kzalloc(sizeof(*wirq), GFP_KERNEL); if (!wirq) return -ENOMEM; @@ -167,6 +170,9 @@ int dev_pm_set_dedicated_wake_irq(struct device *dev, int irq) struct wake_irq *wirq; int err; + if (irq < 0) + return -EINVAL; + wirq = kzalloc(sizeof(*wirq), GFP_KERNEL); if (!wirq) return -ENOMEM; From 4ead1a25ce436f73501d6358450f3ba0c1cb2ce3 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Matias=20Bj=C3=B8rling?= Date: Mon, 16 Nov 2015 15:34:35 +0100 Subject: [PATCH 088/291] MAINTAINERS: Add linux-block list to LightNVM for patches MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Signed-off-by: Matias Bjørling Signed-off-by: Jens Axboe --- MAINTAINERS | 1 + 1 file changed, 1 insertion(+) diff --git a/MAINTAINERS b/MAINTAINERS index e9caa4b28828..018865e364c4 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -6371,6 +6371,7 @@ F: arch/*/include/asm/pmem.h LIGHTNVM PLATFORM SUPPORT M: Matias Bjorling W: http://github/OpenChannelSSD +L: linux-block@vger.kernel.org S: Maintained F: drivers/lightnvm/ F: include/linux/lightnvm.h From aedf17f4515b12ba1cd73298e66baa69cf93010e Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Matias=20Bj=C3=B8rling?= Date: Mon, 16 Nov 2015 15:34:36 +0100 Subject: [PATCH 089/291] lightnvm: change max_phys_sect to uint MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The max_phys_sect variable is defined as a char. We do a boundary check to maximally allow 256 physical page descriptors per command. As we are not indexing from zero. This expression is always false. Bump the max_phys_sect to an unsigned int to support the range check. Signed-off-by: Matias Bjørling Reported-by: Geert Uytterhoeven Signed-off-by: Jens Axboe --- include/linux/lightnvm.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/include/linux/lightnvm.h b/include/linux/lightnvm.h index 69c9057e1ab8..32b5369e814e 100644 --- a/include/linux/lightnvm.h +++ b/include/linux/lightnvm.h @@ -220,7 +220,7 @@ struct nvm_dev_ops { nvm_dev_dma_alloc_fn *dev_dma_alloc; nvm_dev_dma_free_fn *dev_dma_free; - uint8_t max_phys_sect; + unsigned int max_phys_sect; }; struct nvm_lun { From 11450469830f2481a9e7cb181609288d40f41323 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Matias=20Bj=C3=B8rling?= Date: Mon, 16 Nov 2015 15:34:37 +0100 Subject: [PATCH 090/291] lightnvm: update bad block table format MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The specification was changed to reflect a multi-value bad block table. Instead of bit-based bad block table, the bad block table now allows eight bad block categories. Currently four are defined: * Factory bad blocks * Grown bad blocks * Device-side reserved blocks * Host-side reserved blocks The factory and grown bad blocks are the regular bad blocks. The reserved blocks are either for internal use or external use. In particular, the device-side reserved blocks allows the host to bootstrap from a limited number of flash blocks. Reducing the flash blocks to scan upon super block initialization. Support for both get bad block table and set bad block table is added. Signed-off-by: Matias Bjørling Signed-off-by: Jens Axboe --- drivers/lightnvm/gennvm.c | 32 ++++++---- drivers/lightnvm/gennvm.h | 2 + drivers/nvme/host/lightnvm.c | 113 ++++++++++++++++++++++++++++------- include/linux/lightnvm.h | 6 +- 4 files changed, 117 insertions(+), 36 deletions(-) diff --git a/drivers/lightnvm/gennvm.c b/drivers/lightnvm/gennvm.c index ae1fb2bdc5f4..8cfc0114ff13 100644 --- a/drivers/lightnvm/gennvm.c +++ b/drivers/lightnvm/gennvm.c @@ -64,19 +64,22 @@ static int gennvm_luns_init(struct nvm_dev *dev, struct gen_nvm *gn) return 0; } -static int gennvm_block_bb(u32 lun_id, void *bb_bitmap, unsigned int nr_blocks, +static int gennvm_block_bb(struct ppa_addr ppa, int nr_blocks, u8 *blks, void *private) { struct gen_nvm *gn = private; - struct gen_lun *lun = &gn->luns[lun_id]; + struct nvm_dev *dev = gn->dev; + struct gen_lun *lun; struct nvm_block *blk; int i; - if (unlikely(bitmap_empty(bb_bitmap, nr_blocks))) - return 0; + ppa = addr_to_generic_mode(gn->dev, ppa); + lun = &gn->luns[(dev->nr_luns * ppa.g.ch) + ppa.g.lun]; + + for (i = 0; i < nr_blocks; i++) { + if (blks[i] == 0) + continue; - i = -1; - while ((i = find_next_bit(bb_bitmap, nr_blocks, i + 1)) < nr_blocks) { blk = &lun->vlun.blocks[i]; if (!blk) { pr_err("gennvm: BB data is out of bounds.\n"); @@ -171,8 +174,16 @@ static int gennvm_blocks_init(struct nvm_dev *dev, struct gen_nvm *gn) } if (dev->ops->get_bb_tbl) { - ret = dev->ops->get_bb_tbl(dev->q, lun->vlun.id, - dev->blks_per_lun, gennvm_block_bb, gn); + struct ppa_addr ppa; + + ppa.ppa = 0; + ppa.g.ch = lun->vlun.chnl_id; + ppa.g.lun = lun->vlun.id; + ppa = generic_to_addr_mode(dev, ppa); + + ret = dev->ops->get_bb_tbl(dev->q, ppa, + dev->blks_per_lun, + gennvm_block_bb, gn); if (ret) pr_err("gennvm: could not read BB table\n"); } @@ -199,6 +210,7 @@ static int gennvm_register(struct nvm_dev *dev) if (!gn) return -ENOMEM; + gn->dev = dev; gn->nr_luns = dev->nr_luns; dev->mp = gn; @@ -354,10 +366,10 @@ static void gennvm_mark_blk_bad(struct nvm_dev *dev, struct nvm_rq *rqd) { int i; - if (!dev->ops->set_bb) + if (!dev->ops->set_bb_tbl) return; - if (dev->ops->set_bb(dev->q, rqd, 1)) + if (dev->ops->set_bb_tbl(dev->q, rqd, 1)) return; gennvm_addr_to_generic_mode(dev, rqd); diff --git a/drivers/lightnvm/gennvm.h b/drivers/lightnvm/gennvm.h index d23bd3501ddc..9c24b5b32dac 100644 --- a/drivers/lightnvm/gennvm.h +++ b/drivers/lightnvm/gennvm.h @@ -35,6 +35,8 @@ struct gen_lun { }; struct gen_nvm { + struct nvm_dev *dev; + int nr_luns; struct gen_lun *luns; }; diff --git a/drivers/nvme/host/lightnvm.c b/drivers/nvme/host/lightnvm.c index e0b7b95813bc..2c3546516300 100644 --- a/drivers/nvme/host/lightnvm.c +++ b/drivers/nvme/host/lightnvm.c @@ -93,7 +93,7 @@ struct nvme_nvm_l2ptbl { __le16 cdw14[6]; }; -struct nvme_nvm_bbtbl { +struct nvme_nvm_getbbtbl { __u8 opcode; __u8 flags; __u16 command_id; @@ -101,10 +101,23 @@ struct nvme_nvm_bbtbl { __u64 rsvd[2]; __le64 prp1; __le64 prp2; - __le32 prp1_len; - __le32 prp2_len; - __le32 lbb; - __u32 rsvd11[3]; + __le64 spba; + __u32 rsvd4[4]; +}; + +struct nvme_nvm_setbbtbl { + __u8 opcode; + __u8 flags; + __u16 command_id; + __le32 nsid; + __le64 rsvd[2]; + __le64 prp1; + __le64 prp2; + __le64 spba; + __le16 nlb; + __u8 value; + __u8 rsvd3; + __u32 rsvd4[3]; }; struct nvme_nvm_erase_blk { @@ -129,8 +142,8 @@ struct nvme_nvm_command { struct nvme_nvm_hb_rw hb_rw; struct nvme_nvm_ph_rw ph_rw; struct nvme_nvm_l2ptbl l2p; - struct nvme_nvm_bbtbl get_bb; - struct nvme_nvm_bbtbl set_bb; + struct nvme_nvm_getbbtbl get_bb; + struct nvme_nvm_setbbtbl set_bb; struct nvme_nvm_erase_blk erase; }; }; @@ -187,6 +200,20 @@ struct nvme_nvm_id { struct nvme_nvm_id_group groups[4]; } __packed; +struct nvme_nvm_bb_tbl { + __u8 tblid[4]; + __le16 verid; + __le16 revid; + __le32 rvsd1; + __le32 tblks; + __le32 tfact; + __le32 tgrown; + __le32 tdresv; + __le32 thresv; + __le32 rsvd2[8]; + __u8 blk[0]; +}; + /* * Check we didn't inadvertently grow the command struct */ @@ -195,12 +222,14 @@ static inline void _nvme_nvm_check_size(void) BUILD_BUG_ON(sizeof(struct nvme_nvm_identity) != 64); BUILD_BUG_ON(sizeof(struct nvme_nvm_hb_rw) != 64); BUILD_BUG_ON(sizeof(struct nvme_nvm_ph_rw) != 64); - BUILD_BUG_ON(sizeof(struct nvme_nvm_bbtbl) != 64); + BUILD_BUG_ON(sizeof(struct nvme_nvm_getbbtbl) != 64); + BUILD_BUG_ON(sizeof(struct nvme_nvm_setbbtbl) != 64); BUILD_BUG_ON(sizeof(struct nvme_nvm_l2ptbl) != 64); BUILD_BUG_ON(sizeof(struct nvme_nvm_erase_blk) != 64); BUILD_BUG_ON(sizeof(struct nvme_nvm_id_group) != 960); BUILD_BUG_ON(sizeof(struct nvme_nvm_addr_format) != 128); BUILD_BUG_ON(sizeof(struct nvme_nvm_id) != 4096); + BUILD_BUG_ON(sizeof(struct nvme_nvm_bb_tbl) != 512); } static int init_grps(struct nvm_id *nvm_id, struct nvme_nvm_id *nvme_nvm_id) @@ -322,43 +351,80 @@ out: return ret; } -static int nvme_nvm_get_bb_tbl(struct request_queue *q, int lunid, - unsigned int nr_blocks, - nvm_bb_update_fn *update_bbtbl, void *priv) +static int nvme_nvm_get_bb_tbl(struct request_queue *q, struct ppa_addr ppa, + int nr_blocks, nvm_bb_update_fn *update_bbtbl, + void *priv) { struct nvme_ns *ns = q->queuedata; struct nvme_dev *dev = ns->dev; struct nvme_nvm_command c = {}; - void *bb_bitmap; - u16 bb_bitmap_size; + struct nvme_nvm_bb_tbl *bb_tbl; + int tblsz = sizeof(struct nvme_nvm_bb_tbl) + nr_blocks; int ret = 0; c.get_bb.opcode = nvme_nvm_admin_get_bb_tbl; c.get_bb.nsid = cpu_to_le32(ns->ns_id); - c.get_bb.lbb = cpu_to_le32(lunid); - bb_bitmap_size = ((nr_blocks >> 15) + 1) * PAGE_SIZE; - bb_bitmap = kmalloc(bb_bitmap_size, GFP_KERNEL); - if (!bb_bitmap) + c.get_bb.spba = cpu_to_le64(ppa.ppa); + + bb_tbl = kzalloc(tblsz, GFP_KERNEL); + if (!bb_tbl) return -ENOMEM; - bitmap_zero(bb_bitmap, nr_blocks); - - ret = nvme_submit_sync_cmd(q, (struct nvme_command *)&c, bb_bitmap, - bb_bitmap_size); + ret = nvme_submit_sync_cmd(q, (struct nvme_command *)&c, bb_tbl, tblsz); if (ret) { dev_err(dev->dev, "get bad block table failed (%d)\n", ret); ret = -EIO; goto out; } - ret = update_bbtbl(lunid, bb_bitmap, nr_blocks, priv); + if (bb_tbl->tblid[0] != 'B' || bb_tbl->tblid[1] != 'B' || + bb_tbl->tblid[2] != 'L' || bb_tbl->tblid[3] != 'T') { + dev_err(dev->dev, "bbt format mismatch\n"); + ret = -EINVAL; + goto out; + } + + if (le16_to_cpu(bb_tbl->verid) != 1) { + ret = -EINVAL; + dev_err(dev->dev, "bbt version not supported\n"); + goto out; + } + + if (le32_to_cpu(bb_tbl->tblks) != nr_blocks) { + ret = -EINVAL; + dev_err(dev->dev, "bbt unsuspected blocks returned (%u!=%u)", + le32_to_cpu(bb_tbl->tblks), nr_blocks); + goto out; + } + + ret = update_bbtbl(ppa, nr_blocks, bb_tbl->blk, priv); if (ret) { ret = -EINTR; goto out; } out: - kfree(bb_bitmap); + kfree(bb_tbl); + return ret; +} + +static int nvme_nvm_set_bb_tbl(struct request_queue *q, struct nvm_rq *rqd, + int type) +{ + struct nvme_ns *ns = q->queuedata; + struct nvme_dev *dev = ns->dev; + struct nvme_nvm_command c = {}; + int ret = 0; + + c.set_bb.opcode = nvme_nvm_admin_set_bb_tbl; + c.set_bb.nsid = cpu_to_le32(ns->ns_id); + c.set_bb.spba = cpu_to_le64(rqd->ppa_addr.ppa); + c.set_bb.nlb = cpu_to_le16(rqd->nr_pages - 1); + c.set_bb.value = type; + + ret = nvme_submit_sync_cmd(q, (struct nvme_command *)&c, NULL, 0); + if (ret) + dev_err(dev->dev, "set bad block table failed (%d)\n", ret); return ret; } @@ -474,6 +540,7 @@ static struct nvm_dev_ops nvme_nvm_dev_ops = { .get_l2p_tbl = nvme_nvm_get_l2p_tbl, .get_bb_tbl = nvme_nvm_get_bb_tbl, + .set_bb_tbl = nvme_nvm_set_bb_tbl, .submit_io = nvme_nvm_submit_io, .erase_block = nvme_nvm_erase_block, diff --git a/include/linux/lightnvm.h b/include/linux/lightnvm.h index 32b5369e814e..9b3dc1bc9296 100644 --- a/include/linux/lightnvm.h +++ b/include/linux/lightnvm.h @@ -191,11 +191,11 @@ static inline void *nvm_rq_to_pdu(struct nvm_rq *rqdata) struct nvm_block; typedef int (nvm_l2p_update_fn)(u64, u32, __le64 *, void *); -typedef int (nvm_bb_update_fn)(u32, void *, unsigned int, void *); +typedef int (nvm_bb_update_fn)(struct ppa_addr, int, u8 *, void *); typedef int (nvm_id_fn)(struct request_queue *, struct nvm_id *); typedef int (nvm_get_l2p_tbl_fn)(struct request_queue *, u64, u32, nvm_l2p_update_fn *, void *); -typedef int (nvm_op_bb_tbl_fn)(struct request_queue *, int, unsigned int, +typedef int (nvm_op_bb_tbl_fn)(struct request_queue *, struct ppa_addr, int, nvm_bb_update_fn *, void *); typedef int (nvm_op_set_bb_fn)(struct request_queue *, struct nvm_rq *, int); typedef int (nvm_submit_io_fn)(struct request_queue *, struct nvm_rq *); @@ -210,7 +210,7 @@ struct nvm_dev_ops { nvm_id_fn *identity; nvm_get_l2p_tbl_fn *get_l2p_tbl; nvm_op_bb_tbl_fn *get_bb_tbl; - nvm_op_set_bb_fn *set_bb; + nvm_op_set_bb_fn *set_bb_tbl; nvm_submit_io_fn *submit_io; nvm_erase_blk_fn *erase_block; From 36d5dbc694e0d1b1909506666d7c27b53f7f9674 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Matias=20Bj=C3=B8rling?= Date: Mon, 16 Nov 2015 15:34:38 +0100 Subject: [PATCH 091/291] lightnvm: update alignments for identify command MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit A single 8 bit and 16 bit reserve field were inserted in the specification to align fields appropriately. Reflect this in the identify group structure. Signed-off-by: Matias Bjørling Signed-off-by: Jens Axboe --- drivers/nvme/host/lightnvm.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/drivers/nvme/host/lightnvm.c b/drivers/nvme/host/lightnvm.c index 2c3546516300..60687ed68b5d 100644 --- a/drivers/nvme/host/lightnvm.c +++ b/drivers/nvme/host/lightnvm.c @@ -155,11 +155,13 @@ struct nvme_nvm_id_group { __u8 num_ch; __u8 num_lun; __u8 num_pln; + __u8 rsvd1; __le16 num_blk; __le16 num_pg; __le16 fpg_sz; __le16 csecs; __le16 sos; + __le16 rsvd2; __le32 trdt; __le32 trdm; __le32 tprt; @@ -168,7 +170,7 @@ struct nvme_nvm_id_group { __le32 tbem; __le32 mpos; __le16 cpar; - __u8 reserved[913]; + __u8 reserved[910]; } __packed; struct nvme_nvm_addr_format { From 12be5edf68e785dd5dc8665db5a88152b49c1fe8 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Matias=20Bj=C3=B8rling?= Date: Mon, 16 Nov 2015 15:34:39 +0100 Subject: [PATCH 092/291] lightnvm: expose mccap in identify command MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The mccap field is required for I/O command option support. It defines the following flash access modes: * SLC mode * Erase/Program Suspension * Scramble On/Off * Encryption It is slotted in between mpos and cpar, changing the offset for cpar as well. Signed-off-by: Matias Bjørling Signed-off-by: Jens Axboe --- drivers/nvme/host/lightnvm.c | 4 +++- include/linux/lightnvm.h | 1 + 2 files changed, 4 insertions(+), 1 deletion(-) diff --git a/drivers/nvme/host/lightnvm.c b/drivers/nvme/host/lightnvm.c index 60687ed68b5d..52b311cf694c 100644 --- a/drivers/nvme/host/lightnvm.c +++ b/drivers/nvme/host/lightnvm.c @@ -169,8 +169,9 @@ struct nvme_nvm_id_group { __le32 tbet; __le32 tbem; __le32 mpos; + __le32 mccap; __le16 cpar; - __u8 reserved[910]; + __u8 reserved[906]; } __packed; struct nvme_nvm_addr_format { @@ -265,6 +266,7 @@ static int init_grps(struct nvm_id *nvm_id, struct nvme_nvm_id *nvme_nvm_id) dst->tbet = le32_to_cpu(src->tbet); dst->tbem = le32_to_cpu(src->tbem); dst->mpos = le32_to_cpu(src->mpos); + dst->mccap = le32_to_cpu(src->mccap); dst->cpar = le16_to_cpu(src->cpar); } diff --git a/include/linux/lightnvm.h b/include/linux/lightnvm.h index 9b3dc1bc9296..2572856e2a89 100644 --- a/include/linux/lightnvm.h +++ b/include/linux/lightnvm.h @@ -74,6 +74,7 @@ struct nvm_id_group { u32 tbet; u32 tbem; u32 mpos; + u32 mccap; u16 cpar; u8 res[913]; } __packed; From 73387e7bed260c89628fc6a4e3632b45be9776b0 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Matias=20Bj=C3=B8rling?= Date: Mon, 16 Nov 2015 15:34:40 +0100 Subject: [PATCH 093/291] lightnvm: remove unused attrs in nvm_id structs MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The nvm_id, nvm_id_group and nvm_addr_format data structures contain reserved attributes. They are unused by media managers and targets. Remove them. Signed-off-by: Matias Bjørling Signed-off-by: Jens Axboe --- include/linux/lightnvm.h | 7 +------ 1 file changed, 1 insertion(+), 6 deletions(-) diff --git a/include/linux/lightnvm.h b/include/linux/lightnvm.h index 2572856e2a89..e6ef8aaf533f 100644 --- a/include/linux/lightnvm.h +++ b/include/linux/lightnvm.h @@ -58,7 +58,6 @@ enum { struct nvm_id_group { u8 mtype; u8 fmtype; - u16 res16; u8 num_ch; u8 num_lun; u8 num_pln; @@ -76,8 +75,7 @@ struct nvm_id_group { u32 mpos; u32 mccap; u16 cpar; - u8 res[913]; -} __packed; +}; struct nvm_addr_format { u8 ch_offset; @@ -92,19 +90,16 @@ struct nvm_addr_format { u8 pg_len; u8 sect_offset; u8 sect_len; - u8 res[4]; }; struct nvm_id { u8 ver_id; u8 vmnt; u8 cgrps; - u8 res[5]; u32 cap; u32 dom; struct nvm_addr_format ppaf; u8 ppat; - u8 resv[224]; struct nvm_id_group groups[4]; } __packed; From 4264c980e3e9bb904b7f41dc9c64786cc5466bee Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Matias=20Bj=C3=B8rling?= Date: Mon, 16 Nov 2015 15:34:41 +0100 Subject: [PATCH 094/291] lightnvm: check for NAND flash and its type MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Only NAND flash with SLC and MLC is supported. Make sure to not try to initialize TLC memory or other non-volatile memory types. Signed-off-by: Matias Bjørling Signed-off-by: Jens Axboe --- drivers/lightnvm/core.c | 10 ++++++++++ 1 file changed, 10 insertions(+) diff --git a/drivers/lightnvm/core.c b/drivers/lightnvm/core.c index f659e605a406..0985a032debc 100644 --- a/drivers/lightnvm/core.c +++ b/drivers/lightnvm/core.c @@ -185,6 +185,16 @@ static int nvm_core_init(struct nvm_dev *dev) dev->plane_mode = NVM_PLANE_SINGLE; dev->max_rq_size = dev->ops->max_phys_sect * dev->sec_size; + if (grp->mtype != 0) { + pr_err("nvm: memory type not supported\n"); + return -EINVAL; + } + + if (grp->fmtype != 0 && grp->fmtype != 1) { + pr_err("nvm: flash type not supported\n"); + return -EINVAL; + } + if (grp->mpos & 0x020202) dev->plane_mode = NVM_PLANE_DOUBLE; if (grp->mpos & 0x040404) From edad2e6606ee62dd7dfc5b001fae39c5c8015a55 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Matias=20Bj=C3=B8rling?= Date: Mon, 16 Nov 2015 15:34:42 +0100 Subject: [PATCH 095/291] lightnvm: prematurely activate nvm_dev MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit We register with nvm_devices when there registration can still fail. Move the final registration at the end of the nvm_register function to make sure we are fully registered when added to the nvm_devices list. Signed-off-by: Matias Bjørling Signed-off-by: Jens Axboe --- drivers/lightnvm/core.c | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/drivers/lightnvm/core.c b/drivers/lightnvm/core.c index 0985a032debc..40e6cfae4585 100644 --- a/drivers/lightnvm/core.c +++ b/drivers/lightnvm/core.c @@ -318,10 +318,6 @@ int nvm_register(struct request_queue *q, char *disk_name, if (ret) goto err_init; - down_write(&nvm_lock); - list_add(&dev->devices, &nvm_devices); - up_write(&nvm_lock); - if (dev->ops->max_phys_sect > 1) { dev->ppalist_pool = dev->ops->create_dma_pool(dev->q, "ppalist"); @@ -334,6 +330,10 @@ int nvm_register(struct request_queue *q, char *disk_name, return -EINVAL; } + down_write(&nvm_lock); + list_add(&dev->devices, &nvm_devices); + up_write(&nvm_lock); + return 0; err_init: kfree(dev); From c1480ad5943261e01a62eaa7132eab76f9c490e0 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Matias=20Bj=C3=B8rling?= Date: Mon, 16 Nov 2015 15:34:43 +0100 Subject: [PATCH 096/291] lightnvm: prevent double free on init error MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Both the nvm_register and nvm_init does a kfree(dev) on error. Make sure to only free it once. Signed-off-by: Matias Bjørling Signed-off-by: Jens Axboe --- drivers/lightnvm/core.c | 12 +++--------- 1 file changed, 3 insertions(+), 9 deletions(-) diff --git a/drivers/lightnvm/core.c b/drivers/lightnvm/core.c index 40e6cfae4585..899f6b9a9f68 100644 --- a/drivers/lightnvm/core.c +++ b/drivers/lightnvm/core.c @@ -160,11 +160,6 @@ int nvm_erase_blk(struct nvm_dev *dev, struct nvm_block *blk) } EXPORT_SYMBOL(nvm_erase_blk); -static void nvm_core_free(struct nvm_dev *dev) -{ - kfree(dev); -} - static int nvm_core_init(struct nvm_dev *dev) { struct nvm_id *id = &dev->identity; @@ -223,8 +218,6 @@ static void nvm_free(struct nvm_dev *dev) if (dev->mt) dev->mt->unregister_mgr(dev); - - nvm_core_free(dev); } static int nvm_init(struct nvm_dev *dev) @@ -351,11 +344,12 @@ void nvm_unregister(char *disk_name) return; } - nvm_exit(dev); - down_write(&nvm_lock); list_del(&dev->devices); up_write(&nvm_lock); + + nvm_exit(dev); + kfree(dev); } EXPORT_SYMBOL(nvm_unregister); From 7386af270c72be65c7cb2ba4ad0d4e70dc373106 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Matias=20Bj=C3=B8rling?= Date: Mon, 16 Nov 2015 15:34:44 +0100 Subject: [PATCH 097/291] lightnvm: remove linear and device addr modes MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The linear and device specific address modes can be replaced with a simple offset and bit length conversion that is generic across all devices. This both simplifies the specification and removes the special case for qemu nvme, that previously relied on the linear address mapping. Signed-off-by: Matias Bjørling Signed-off-by: Jens Axboe --- drivers/lightnvm/core.c | 3 +- drivers/lightnvm/gennvm.c | 12 +-- drivers/lightnvm/rrpc.c | 32 +++++++- drivers/nvme/host/lightnvm.c | 3 +- include/linux/lightnvm.h | 154 ++++++++--------------------------- 5 files changed, 73 insertions(+), 131 deletions(-) diff --git a/drivers/lightnvm/core.c b/drivers/lightnvm/core.c index 899f6b9a9f68..790b1d7a8d43 100644 --- a/drivers/lightnvm/core.c +++ b/drivers/lightnvm/core.c @@ -174,8 +174,7 @@ static int nvm_core_init(struct nvm_dev *dev) dev->sec_size = grp->csecs; dev->oob_size = grp->sos; dev->sec_per_pg = grp->fpg_sz / grp->csecs; - dev->addr_mode = id->ppat; - dev->addr_format = id->ppaf; + memcpy(&dev->ppaf, &id->ppaf, sizeof(struct nvm_addr_format)); dev->plane_mode = NVM_PLANE_SINGLE; dev->max_rq_size = dev->ops->max_phys_sect * dev->sec_size; diff --git a/drivers/lightnvm/gennvm.c b/drivers/lightnvm/gennvm.c index 8cfc0114ff13..c0d0eb2357a8 100644 --- a/drivers/lightnvm/gennvm.c +++ b/drivers/lightnvm/gennvm.c @@ -73,7 +73,7 @@ static int gennvm_block_bb(struct ppa_addr ppa, int nr_blocks, u8 *blks, struct nvm_block *blk; int i; - ppa = addr_to_generic_mode(gn->dev, ppa); + ppa = dev_to_generic_addr(gn->dev, ppa); lun = &gn->luns[(dev->nr_luns * ppa.g.ch) + ppa.g.lun]; for (i = 0; i < nr_blocks; i++) { @@ -179,7 +179,7 @@ static int gennvm_blocks_init(struct nvm_dev *dev, struct gen_nvm *gn) ppa.ppa = 0; ppa.g.ch = lun->vlun.chnl_id; ppa.g.lun = lun->vlun.id; - ppa = generic_to_addr_mode(dev, ppa); + ppa = generic_to_dev_addr(dev, ppa); ret = dev->ops->get_bb_tbl(dev->q, ppa, dev->blks_per_lun, @@ -304,10 +304,10 @@ static void gennvm_addr_to_generic_mode(struct nvm_dev *dev, struct nvm_rq *rqd) if (rqd->nr_pages > 1) { for (i = 0; i < rqd->nr_pages; i++) - rqd->ppa_list[i] = addr_to_generic_mode(dev, + rqd->ppa_list[i] = dev_to_generic_addr(dev, rqd->ppa_list[i]); } else { - rqd->ppa_addr = addr_to_generic_mode(dev, rqd->ppa_addr); + rqd->ppa_addr = dev_to_generic_addr(dev, rqd->ppa_addr); } } @@ -317,10 +317,10 @@ static void gennvm_generic_to_addr_mode(struct nvm_dev *dev, struct nvm_rq *rqd) if (rqd->nr_pages > 1) { for (i = 0; i < rqd->nr_pages; i++) - rqd->ppa_list[i] = generic_to_addr_mode(dev, + rqd->ppa_list[i] = generic_to_dev_addr(dev, rqd->ppa_list[i]); } else { - rqd->ppa_addr = generic_to_addr_mode(dev, rqd->ppa_addr); + rqd->ppa_addr = generic_to_dev_addr(dev, rqd->ppa_addr); } } diff --git a/drivers/lightnvm/rrpc.c b/drivers/lightnvm/rrpc.c index 7ba64c87ba1c..75e59c3a3f96 100644 --- a/drivers/lightnvm/rrpc.c +++ b/drivers/lightnvm/rrpc.c @@ -123,12 +123,42 @@ static u64 block_to_addr(struct rrpc *rrpc, struct rrpc_block *rblk) return blk->id * rrpc->dev->pgs_per_blk; } +static struct ppa_addr linear_to_generic_addr(struct nvm_dev *dev, + struct ppa_addr r) +{ + struct ppa_addr l; + int secs, pgs, blks, luns; + sector_t ppa = r.ppa; + + l.ppa = 0; + + div_u64_rem(ppa, dev->sec_per_pg, &secs); + l.g.sec = secs; + + sector_div(ppa, dev->sec_per_pg); + div_u64_rem(ppa, dev->sec_per_blk, &pgs); + l.g.pg = pgs; + + sector_div(ppa, dev->pgs_per_blk); + div_u64_rem(ppa, dev->blks_per_lun, &blks); + l.g.blk = blks; + + sector_div(ppa, dev->blks_per_lun); + div_u64_rem(ppa, dev->luns_per_chnl, &luns); + l.g.lun = luns; + + sector_div(ppa, dev->luns_per_chnl); + l.g.ch = ppa; + + return l; +} + static struct ppa_addr rrpc_ppa_to_gaddr(struct nvm_dev *dev, u64 addr) { struct ppa_addr paddr; paddr.ppa = addr; - return __linear_to_generic_addr(dev, paddr); + return linear_to_generic_addr(dev, paddr); } /* requires lun->lock taken */ diff --git a/drivers/nvme/host/lightnvm.c b/drivers/nvme/host/lightnvm.c index 52b311cf694c..9069be811f82 100644 --- a/drivers/nvme/host/lightnvm.c +++ b/drivers/nvme/host/lightnvm.c @@ -198,8 +198,7 @@ struct nvme_nvm_id { __le32 cap; __le32 dom; struct nvme_nvm_addr_format ppaf; - __u8 ppat; - __u8 resv[223]; + __u8 resv[224]; struct nvme_nvm_id_group groups[4]; } __packed; diff --git a/include/linux/lightnvm.h b/include/linux/lightnvm.h index e6ef8aaf533f..cbe288acb1de 100644 --- a/include/linux/lightnvm.h +++ b/include/linux/lightnvm.h @@ -99,7 +99,6 @@ struct nvm_id { u32 cap; u32 dom; struct nvm_addr_format ppaf; - u8 ppat; struct nvm_id_group groups[4]; } __packed; @@ -119,39 +118,28 @@ struct nvm_tgt_instance { #define NVM_VERSION_MINOR 0 #define NVM_VERSION_PATCH 0 -#define NVM_SEC_BITS (8) -#define NVM_PL_BITS (6) -#define NVM_PG_BITS (16) #define NVM_BLK_BITS (16) -#define NVM_LUN_BITS (10) +#define NVM_PG_BITS (16) +#define NVM_SEC_BITS (8) +#define NVM_PL_BITS (8) +#define NVM_LUN_BITS (8) #define NVM_CH_BITS (8) struct ppa_addr { + /* Generic structure for all addresses */ union { - /* Channel-based PPA format in nand 4x2x2x2x8x10 */ - struct { - u64 ch : 4; - u64 sec : 2; /* 4 sectors per page */ - u64 pl : 2; /* 4 planes per LUN */ - u64 lun : 2; /* 4 LUNs per channel */ - u64 pg : 8; /* 256 pages per block */ - u64 blk : 10;/* 1024 blocks per plane */ - u64 resved : 36; - } chnl; - - /* Generic structure for all addresses */ struct { + u64 blk : NVM_BLK_BITS; + u64 pg : NVM_PG_BITS; u64 sec : NVM_SEC_BITS; u64 pl : NVM_PL_BITS; - u64 pg : NVM_PG_BITS; - u64 blk : NVM_BLK_BITS; u64 lun : NVM_LUN_BITS; u64 ch : NVM_CH_BITS; } g; u64 ppa; }; -} __packed; +}; struct nvm_rq { struct nvm_tgt_instance *ins; @@ -259,8 +247,7 @@ struct nvm_dev { int blks_per_lun; int sec_size; int oob_size; - int addr_mode; - struct nvm_addr_format addr_format; + struct nvm_addr_format ppaf; /* Calculated/Cached values. These do not reflect the actual usable * blocks at run-time. @@ -286,118 +273,45 @@ struct nvm_dev { char name[DISK_NAME_LEN]; }; -/* fallback conversion */ -static struct ppa_addr __generic_to_linear_addr(struct nvm_dev *dev, - struct ppa_addr r) +static inline struct ppa_addr generic_to_dev_addr(struct nvm_dev *dev, + struct ppa_addr r) { struct ppa_addr l; - l.ppa = r.g.sec + - r.g.pg * dev->sec_per_pg + - r.g.blk * (dev->pgs_per_blk * - dev->sec_per_pg) + - r.g.lun * (dev->blks_per_lun * - dev->pgs_per_blk * - dev->sec_per_pg) + - r.g.ch * (dev->blks_per_lun * - dev->pgs_per_blk * - dev->luns_per_chnl * - dev->sec_per_pg); + l.ppa = ((u64)r.g.blk) << dev->ppaf.blk_offset; + l.ppa |= ((u64)r.g.pg) << dev->ppaf.pg_offset; + l.ppa |= ((u64)r.g.sec) << dev->ppaf.sect_offset; + l.ppa |= ((u64)r.g.pl) << dev->ppaf.pln_offset; + l.ppa |= ((u64)r.g.lun) << dev->ppaf.lun_offset; + l.ppa |= ((u64)r.g.ch) << dev->ppaf.ch_offset; return l; } -/* fallback conversion */ -static struct ppa_addr __linear_to_generic_addr(struct nvm_dev *dev, - struct ppa_addr r) -{ - struct ppa_addr l; - int secs, pgs, blks, luns; - sector_t ppa = r.ppa; - - l.ppa = 0; - - div_u64_rem(ppa, dev->sec_per_pg, &secs); - l.g.sec = secs; - - sector_div(ppa, dev->sec_per_pg); - div_u64_rem(ppa, dev->sec_per_blk, &pgs); - l.g.pg = pgs; - - sector_div(ppa, dev->pgs_per_blk); - div_u64_rem(ppa, dev->blks_per_lun, &blks); - l.g.blk = blks; - - sector_div(ppa, dev->blks_per_lun); - div_u64_rem(ppa, dev->luns_per_chnl, &luns); - l.g.lun = luns; - - sector_div(ppa, dev->luns_per_chnl); - l.g.ch = ppa; - - return l; -} - -static struct ppa_addr __generic_to_chnl_addr(struct ppa_addr r) +static inline struct ppa_addr dev_to_generic_addr(struct nvm_dev *dev, + struct ppa_addr r) { struct ppa_addr l; - l.ppa = 0; - - l.chnl.sec = r.g.sec; - l.chnl.pl = r.g.pl; - l.chnl.pg = r.g.pg; - l.chnl.blk = r.g.blk; - l.chnl.lun = r.g.lun; - l.chnl.ch = r.g.ch; + /* + * (r.ppa << X offset) & X len bitmask. X eq. blk, pg, etc. + */ + l.g.blk = (r.ppa >> dev->ppaf.blk_offset) & + (((1 << dev->ppaf.blk_len) - 1)); + l.g.pg |= (r.ppa >> dev->ppaf.pg_offset) & + (((1 << dev->ppaf.pg_len) - 1)); + l.g.sec |= (r.ppa >> dev->ppaf.sect_offset) & + (((1 << dev->ppaf.sect_len) - 1)); + l.g.pl |= (r.ppa >> dev->ppaf.pln_offset) & + (((1 << dev->ppaf.pln_len) - 1)); + l.g.lun |= (r.ppa >> dev->ppaf.lun_offset) & + (((1 << dev->ppaf.lun_len) - 1)); + l.g.ch |= (r.ppa >> dev->ppaf.ch_offset) & + (((1 << dev->ppaf.ch_len) - 1)); return l; } -static struct ppa_addr __chnl_to_generic_addr(struct ppa_addr r) -{ - struct ppa_addr l; - - l.ppa = 0; - - l.g.sec = r.chnl.sec; - l.g.pl = r.chnl.pl; - l.g.pg = r.chnl.pg; - l.g.blk = r.chnl.blk; - l.g.lun = r.chnl.lun; - l.g.ch = r.chnl.ch; - - return l; -} - -static inline struct ppa_addr addr_to_generic_mode(struct nvm_dev *dev, - struct ppa_addr gppa) -{ - switch (dev->addr_mode) { - case NVM_ADDRMODE_LINEAR: - return __linear_to_generic_addr(dev, gppa); - case NVM_ADDRMODE_CHANNEL: - return __chnl_to_generic_addr(gppa); - default: - BUG(); - } - return gppa; -} - -static inline struct ppa_addr generic_to_addr_mode(struct nvm_dev *dev, - struct ppa_addr gppa) -{ - switch (dev->addr_mode) { - case NVM_ADDRMODE_LINEAR: - return __generic_to_linear_addr(dev, gppa); - case NVM_ADDRMODE_CHANNEL: - return __generic_to_chnl_addr(gppa); - default: - BUG(); - } - return gppa; -} - static inline int ppa_empty(struct ppa_addr ppa_addr) { return (ppa_addr.ppa == ADDR_EMPTY); From 2393bd39c77f21488ac7e5337cac1523e9ebd2c4 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Matias=20Bj=C3=B8rling?= Date: Mon, 16 Nov 2015 15:34:45 +0100 Subject: [PATCH 098/291] nvme: missing ppaf copy MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The ppa format was not copied from the NVMe specific ppa format to the lightnvm specific ppa format. This led to the ppa format not being communicated to the layers above. Signed-off-by: Matias Bjørling Signed-off-by: Jens Axboe --- drivers/nvme/host/lightnvm.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/drivers/nvme/host/lightnvm.c b/drivers/nvme/host/lightnvm.c index 9069be811f82..fd37123d26b8 100644 --- a/drivers/nvme/host/lightnvm.c +++ b/drivers/nvme/host/lightnvm.c @@ -300,6 +300,8 @@ static int nvme_nvm_identity(struct request_queue *q, struct nvm_id *nvm_id) nvm_id->cgrps = nvme_nvm_id->cgrps; nvm_id->cap = le32_to_cpu(nvme_nvm_id->cap); nvm_id->dom = le32_to_cpu(nvme_nvm_id->dom); + memcpy(&nvm_id->ppaf, &nvme_nvm_id->ppaf, + sizeof(struct nvme_nvm_addr_format)); ret = init_grps(nvm_id, nvme_nvm_id); out: From dad1b00977f9dc8120b48f9711deb4aa6462a3a6 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Matias=20Bj=C3=B8rling?= Date: Mon, 16 Nov 2015 15:34:46 +0100 Subject: [PATCH 099/291] nvme: remove reserved double word MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The specification was updated the remove the double word just after number of configuration groups and capabilities. Update the identify structure to reflect it. Signed-off-by: Matias Bjørling Signed-off-by: Jens Axboe --- drivers/nvme/host/lightnvm.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/nvme/host/lightnvm.c b/drivers/nvme/host/lightnvm.c index fd37123d26b8..7e82fe33d6f8 100644 --- a/drivers/nvme/host/lightnvm.c +++ b/drivers/nvme/host/lightnvm.c @@ -194,11 +194,11 @@ struct nvme_nvm_id { __u8 ver_id; __u8 vmnt; __u8 cgrps; - __u8 res[5]; + __u8 res; __le32 cap; __le32 dom; struct nvme_nvm_addr_format ppaf; - __u8 resv[224]; + __u8 resv[228]; struct nvme_nvm_id_group groups[4]; } __packed; From d09f9581b235a92abbe372531660b468fadabc17 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Javier=20Gonz=C3=A1lez?= Date: Mon, 16 Nov 2015 15:34:47 +0100 Subject: [PATCH 100/291] lightnvm: cleanup queue before target removal MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit This prevents outstanding IOs to be sent for completion to target after the target has been removed. The flow is now: stop new IOs > cleanup queue > remove target. Signed-off-by: Javier Gonzalez Signed-off-by: Matias Bjørling Signed-off-by: Jens Axboe --- drivers/lightnvm/core.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/lightnvm/core.c b/drivers/lightnvm/core.c index 790b1d7a8d43..8a556f3f36bb 100644 --- a/drivers/lightnvm/core.c +++ b/drivers/lightnvm/core.c @@ -460,11 +460,11 @@ static void nvm_remove_target(struct nvm_target *t) lockdep_assert_held(&nvm_lock); del_gendisk(tdisk); + blk_cleanup_queue(q); + if (tt->exit) tt->exit(tdisk->private_data); - blk_cleanup_queue(q); - put_disk(tdisk); list_del(&t->list); From 4736346bb47966254ee6d1fc50267a2609791cba Mon Sep 17 00:00:00 2001 From: Geliang Tang Date: Mon, 16 Nov 2015 23:30:00 +0800 Subject: [PATCH 101/291] elevator: use list_{first,prev,next}_entry To make the intention clearer, use list_{first,prev,next}_entry instead of list_entry. Signed-off-by: Geliang Tang Signed-off-by: Jens Axboe --- block/noop-iosched.c | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/block/noop-iosched.c b/block/noop-iosched.c index 3de89d4690f3..a163c487cf38 100644 --- a/block/noop-iosched.c +++ b/block/noop-iosched.c @@ -21,10 +21,10 @@ static void noop_merged_requests(struct request_queue *q, struct request *rq, static int noop_dispatch(struct request_queue *q, int force) { struct noop_data *nd = q->elevator->elevator_data; + struct request *rq; - if (!list_empty(&nd->queue)) { - struct request *rq; - rq = list_entry(nd->queue.next, struct request, queuelist); + rq = list_first_entry_or_null(&nd->queue, struct request, queuelist); + if (rq) { list_del_init(&rq->queuelist); elv_dispatch_sort(q, rq); return 1; @@ -46,7 +46,7 @@ noop_former_request(struct request_queue *q, struct request *rq) if (rq->queuelist.prev == &nd->queue) return NULL; - return list_entry(rq->queuelist.prev, struct request, queuelist); + return list_prev_entry(rq, queuelist); } static struct request * @@ -56,7 +56,7 @@ noop_latter_request(struct request_queue *q, struct request *rq) if (rq->queuelist.next == &nd->queue) return NULL; - return list_entry(rq->queuelist.next, struct request, queuelist); + return list_next_entry(rq, queuelist); } static int noop_init_queue(struct request_queue *q, struct elevator_type *e) From b2b7e00148a203e9934bbd17aebffae3f447ade7 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Matias=20Bj=C3=B8rling?= Date: Thu, 12 Nov 2015 20:25:10 +0100 Subject: [PATCH 102/291] null_blk: register as a LightNVM device MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Add support for registering as a LightNVM device. This allows us to evaluate the performance of the LightNVM subsystem. In /drivers/Makefile, LightNVM is moved above block device drivers to make sure that the LightNVM media managers have been initialized before drivers under /drivers/block are initialized. Signed-off-by: Matias Bjørling Fix by Jens Axboe to remove unneeded slab cache and the following memory leak. Signed-off-by: Jens Axboe --- Documentation/block/null_blk.txt | 3 + drivers/Makefile | 2 +- drivers/block/null_blk.c | 160 +++++++++++++++++++++++++++++-- 3 files changed, 158 insertions(+), 7 deletions(-) diff --git a/Documentation/block/null_blk.txt b/Documentation/block/null_blk.txt index 2f6c6ff7161d..d8880ca30af4 100644 --- a/Documentation/block/null_blk.txt +++ b/Documentation/block/null_blk.txt @@ -70,3 +70,6 @@ use_per_node_hctx=[0/1]: Default: 0 parameter. 1: The multi-queue block layer is instantiated with a hardware dispatch queue for each CPU node in the system. + +use_lightnvm=[0/1]: Default: 0 + Register device with LightNVM. Requires blk-mq to be used. diff --git a/drivers/Makefile b/drivers/Makefile index 73d039156ea7..795d0ca714bf 100644 --- a/drivers/Makefile +++ b/drivers/Makefile @@ -63,6 +63,7 @@ obj-$(CONFIG_FB_I810) += video/fbdev/i810/ obj-$(CONFIG_FB_INTEL) += video/fbdev/intelfb/ obj-$(CONFIG_PARPORT) += parport/ +obj-$(CONFIG_NVM) += lightnvm/ obj-y += base/ block/ misc/ mfd/ nfc/ obj-$(CONFIG_LIBNVDIMM) += nvdimm/ obj-$(CONFIG_DMA_SHARED_BUFFER) += dma-buf/ @@ -70,7 +71,6 @@ obj-$(CONFIG_NUBUS) += nubus/ obj-y += macintosh/ obj-$(CONFIG_IDE) += ide/ obj-$(CONFIG_SCSI) += scsi/ -obj-$(CONFIG_NVM) += lightnvm/ obj-y += nvme/ obj-$(CONFIG_ATA) += ata/ obj-$(CONFIG_TARGET_CORE) += target/ diff --git a/drivers/block/null_blk.c b/drivers/block/null_blk.c index 6255d1c4bba4..816525143a74 100644 --- a/drivers/block/null_blk.c +++ b/drivers/block/null_blk.c @@ -8,6 +8,7 @@ #include #include #include +#include struct nullb_cmd { struct list_head list; @@ -39,6 +40,7 @@ struct nullb { struct nullb_queue *queues; unsigned int nr_queues; + char disk_name[DISK_NAME_LEN]; }; static LIST_HEAD(nullb_list); @@ -119,6 +121,10 @@ static int nr_devices = 2; module_param(nr_devices, int, S_IRUGO); MODULE_PARM_DESC(nr_devices, "Number of devices to register"); +static bool use_lightnvm; +module_param(use_lightnvm, bool, S_IRUGO); +MODULE_PARM_DESC(use_lightnvm, "Register as a LightNVM device"); + static int irqmode = NULL_IRQ_SOFTIRQ; static int null_set_irqmode(const char *str, const struct kernel_param *kp) @@ -427,6 +433,8 @@ static void null_del_dev(struct nullb *nullb) { list_del_init(&nullb->list); + if (use_lightnvm) + nvm_unregister(nullb->disk->disk_name); del_gendisk(nullb->disk); blk_cleanup_queue(nullb->q); if (queue_mode == NULL_Q_MQ) @@ -436,6 +444,125 @@ static void null_del_dev(struct nullb *nullb) kfree(nullb); } +#ifdef CONFIG_NVM + +static void null_lnvm_end_io(struct request *rq, int error) +{ + struct nvm_rq *rqd = rq->end_io_data; + struct nvm_dev *dev = rqd->dev; + + dev->mt->end_io(rqd, error); + + blk_put_request(rq); +} + +static int null_lnvm_submit_io(struct request_queue *q, struct nvm_rq *rqd) +{ + struct request *rq; + struct bio *bio = rqd->bio; + + rq = blk_mq_alloc_request(q, bio_rw(bio), GFP_KERNEL, 0); + if (IS_ERR(rq)) + return -ENOMEM; + + rq->cmd_type = REQ_TYPE_DRV_PRIV; + rq->__sector = bio->bi_iter.bi_sector; + rq->ioprio = bio_prio(bio); + + if (bio_has_data(bio)) + rq->nr_phys_segments = bio_phys_segments(q, bio); + + rq->__data_len = bio->bi_iter.bi_size; + rq->bio = rq->biotail = bio; + + rq->end_io_data = rqd; + + blk_execute_rq_nowait(q, NULL, rq, 0, null_lnvm_end_io); + + return 0; +} + +static int null_lnvm_id(struct request_queue *q, struct nvm_id *id) +{ + sector_t size = gb * 1024 * 1024 * 1024ULL; + struct nvm_id_group *grp; + + id->ver_id = 0x1; + id->vmnt = 0; + id->cgrps = 1; + id->cap = 0x3; + id->dom = 0x1; + id->ppat = NVM_ADDRMODE_LINEAR; + + do_div(size, bs); /* convert size to pages */ + grp = &id->groups[0]; + grp->mtype = 0; + grp->fmtype = 1; + grp->num_ch = 1; + grp->num_lun = 1; + grp->num_pln = 1; + grp->num_blk = size / 256; + grp->num_pg = 256; + grp->fpg_sz = bs; + grp->csecs = bs; + grp->trdt = 25000; + grp->trdm = 25000; + grp->tprt = 500000; + grp->tprm = 500000; + grp->tbet = 1500000; + grp->tbem = 1500000; + grp->mpos = 0x010101; /* single plane rwe */ + grp->cpar = hw_queue_depth; + + return 0; +} + +static void *null_lnvm_create_dma_pool(struct request_queue *q, char *name) +{ + mempool_t *virtmem_pool; + + virtmem_pool = mempool_create_page_pool(64, 0); + if (!virtmem_pool) { + pr_err("null_blk: Unable to create virtual memory pool\n"); + return NULL; + } + + return virtmem_pool; +} + +static void null_lnvm_destroy_dma_pool(void *pool) +{ + mempool_destroy(pool); +} + +static void *null_lnvm_dev_dma_alloc(struct request_queue *q, void *pool, + gfp_t mem_flags, dma_addr_t *dma_handler) +{ + return mempool_alloc(pool, mem_flags); +} + +static void null_lnvm_dev_dma_free(void *pool, void *entry, + dma_addr_t dma_handler) +{ + mempool_free(entry, pool); +} + +static struct nvm_dev_ops null_lnvm_dev_ops = { + .identity = null_lnvm_id, + .submit_io = null_lnvm_submit_io, + + .create_dma_pool = null_lnvm_create_dma_pool, + .destroy_dma_pool = null_lnvm_destroy_dma_pool, + .dev_dma_alloc = null_lnvm_dev_dma_alloc, + .dev_dma_free = null_lnvm_dev_dma_free, + + /* Simulate nvme protocol restriction */ + .max_phys_sect = 64, +}; +#else +static struct nvm_dev_ops null_lnvm_dev_ops; +#endif /* CONFIG_NVM */ + static int null_open(struct block_device *bdev, fmode_t mode) { return 0; @@ -575,11 +702,6 @@ static int null_add_dev(void) queue_flag_set_unlocked(QUEUE_FLAG_NONROT, nullb->q); queue_flag_clear_unlocked(QUEUE_FLAG_ADD_RANDOM, nullb->q); - disk = nullb->disk = alloc_disk_node(1, home_node); - if (!disk) { - rv = -ENOMEM; - goto out_cleanup_blk_queue; - } mutex_lock(&lock); list_add_tail(&nullb->list, &nullb_list); @@ -589,6 +711,21 @@ static int null_add_dev(void) blk_queue_logical_block_size(nullb->q, bs); blk_queue_physical_block_size(nullb->q, bs); + sprintf(nullb->disk_name, "nullb%d", nullb->index); + + if (use_lightnvm) { + rv = nvm_register(nullb->q, nullb->disk_name, + &null_lnvm_dev_ops); + if (rv) + goto out_cleanup_blk_queue; + goto done; + } + + disk = nullb->disk = alloc_disk_node(1, home_node); + if (!disk) { + rv = -ENOMEM; + goto out_cleanup_lightnvm; + } size = gb * 1024 * 1024 * 1024ULL; set_capacity(disk, size >> 9); @@ -598,10 +735,15 @@ static int null_add_dev(void) disk->fops = &null_fops; disk->private_data = nullb; disk->queue = nullb->q; - sprintf(disk->disk_name, "nullb%d", nullb->index); + strncpy(disk->disk_name, nullb->disk_name, DISK_NAME_LEN); + add_disk(disk); +done: return 0; +out_cleanup_lightnvm: + if (use_lightnvm) + nvm_unregister(nullb->disk_name); out_cleanup_blk_queue: blk_cleanup_queue(nullb->q); out_cleanup_tags: @@ -625,6 +767,12 @@ static int __init null_init(void) bs = PAGE_SIZE; } + if (use_lightnvm && queue_mode != NULL_Q_MQ) { + pr_warn("null_blk: LightNVM only supported for blk-mq\n"); + pr_warn("null_blk: defaults queue mode to blk-mq\n"); + queue_mode = NULL_Q_MQ; + } + if (queue_mode == NULL_Q_MQ && use_per_node_hctx) { if (submit_queues < nr_online_nodes) { pr_warn("null_blk: submit_queues param is set to %u.", From add68d6aa9e2492e51707ca603ada5b26c626757 Mon Sep 17 00:00:00 2001 From: Chris Bainbridge Date: Thu, 12 Nov 2015 18:05:37 +0000 Subject: [PATCH 103/291] ACPI / SMBus: Fix boot stalls / high CPU caused by reentrant code In the SBS initialisation, a reentrant call to wait_event_timeout() causes an intermittent boot stall of several minutes usually following the "Switching to clocksource tsc" message. Another symptom of this bug is high CPU usage from programs (Firefox, upowerd) querying the battery state. This is caused by: 1. drivers/acpi/sbshc.c wait_transaction_complete() calls wait_event_timeout(): if (wait_event_timeout(hc->wait, smb_check_done(hc), msecs_to_jiffies(timeout))) 2. ___wait_event sets task state to uninterruptible 3. ___wait_event calls the "condition" smb_check_done() 4. smb_check_done (sbshc.c) calls through to ec_read() in drivers/acpi/ec.c 5. ec_guard() is reached which calls wait_event_timeout() if (wait_event_timeout(ec->wait, ec_transaction_completed(ec), guard)) ie. wait_event_timeout() is being called again inside evaluation of the previous wait_event_timeout() condition 5. The EC IRQ handler calls wake_up() and wakes up the sleeping task in ec_guard() 6. The task is now in state running even though the wait "condition" is still being evaluated 7. The "condition" check returns false so ___wait_event calls schedule_timeout() 8. Since the task state is running, the scheduler immediately schedules it again 9. This loop usually repeats for around 250 seconds even though the original wait_event_timeout was only 1000ms. The timeout is incorrect because each call to schedule_timeout() usually returns immediately, taking less than 1ms, so the jiffies timeout counter is not decremented. The task is now stuck in a running state, and so is highly likely to be immediately rescheduled, which takes less than a jiffy. The loop will never exit if all schedule_timeout() calls take less than a jiffy. Fix this by replacing SMBus reads in the wait_event_timeout condition with checks of a boolean value that is updated by the EC query handler. Link: https://bugzilla.kernel.org/show_bug.cgi?id=107191 Link: https://lkml.org/lkml/2015/11/6/776 Signed-off-by: Chris Bainbridge Signed-off-by: Rafael J. Wysocki --- drivers/acpi/sbshc.c | 26 +++++++------------------- 1 file changed, 7 insertions(+), 19 deletions(-) diff --git a/drivers/acpi/sbshc.c b/drivers/acpi/sbshc.c index bf034f8b7c1a..e2900518cf7e 100644 --- a/drivers/acpi/sbshc.c +++ b/drivers/acpi/sbshc.c @@ -30,6 +30,7 @@ struct acpi_smb_hc { u8 query_bit; smbus_alarm_callback callback; void *context; + bool done; }; static int acpi_smbus_hc_add(struct acpi_device *device); @@ -100,27 +101,11 @@ static inline int smb_hc_write(struct acpi_smb_hc *hc, u8 address, u8 data) return ec_write(hc->offset + address, data); } -static inline int smb_check_done(struct acpi_smb_hc *hc) -{ - union acpi_smb_status status = {.raw = 0}; - smb_hc_read(hc, ACPI_SMB_STATUS, &status.raw); - return status.fields.done && (status.fields.status == SMBUS_OK); -} - static int wait_transaction_complete(struct acpi_smb_hc *hc, int timeout) { - if (wait_event_timeout(hc->wait, smb_check_done(hc), - msecs_to_jiffies(timeout))) + if (wait_event_timeout(hc->wait, hc->done, msecs_to_jiffies(timeout))) return 0; - /* - * After the timeout happens, OS will try to check the status of SMbus. - * If the status is what OS expected, it will be regarded as the bogus - * timeout. - */ - if (smb_check_done(hc)) - return 0; - else - return -ETIME; + return -ETIME; } static int acpi_smbus_transaction(struct acpi_smb_hc *hc, u8 protocol, @@ -135,6 +120,7 @@ static int acpi_smbus_transaction(struct acpi_smb_hc *hc, u8 protocol, } mutex_lock(&hc->lock); + hc->done = false; if (macbook) udelay(5); if (smb_hc_read(hc, ACPI_SMB_PROTOCOL, &temp)) @@ -235,8 +221,10 @@ static int smbus_alarm(void *context) if (smb_hc_read(hc, ACPI_SMB_STATUS, &status.raw)) return 0; /* Check if it is only a completion notify */ - if (status.fields.done) + if (status.fields.done && status.fields.status == SMBUS_OK) { + hc->done = true; wake_up(&hc->wait); + } if (!status.fields.alarm) return 0; mutex_lock(&hc->lock); From 1b2ff19e6a957b1ef0f365ad331b608af80e932e Mon Sep 17 00:00:00 2001 From: Jan Kara Date: Thu, 12 Nov 2015 14:25:52 +0100 Subject: [PATCH 104/291] blk-flush: Queue through IO scheduler when flush not required Currently blk_insert_flush() just adds flush request to q->queue_head when flush is not required. That completely bypasses IO scheduler so e.g. CFQ can be idling waiting for new request to arrive and will idle through the whole window unnecessarily. Luckily this only happens in rare cases as usually checks in generic_make_request_checks() clear FLUSH and FUA flags early if they are not needed. When no flushing is actually required, we can easily fix the problem by properly queueing the request through the IO scheduler. Ideally IO scheduler should be also made aware of requests queued via blk_flush_queue_rq(). However inserting flush request through IO scheduler can have unwanted side-effects since due to flush batching delaying the flush request in IO scheduler will delay all flush requests possibly coming from other processes. So we keep adding the request directly to q->queue_head. Signed-off-by: Jan Kara Reviewed-by: Jeff Moyer Signed-off-by: Jens Axboe --- block/blk-flush.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/block/blk-flush.c b/block/blk-flush.c index 9c423e53324a..c81d56ec308f 100644 --- a/block/blk-flush.c +++ b/block/blk-flush.c @@ -422,7 +422,7 @@ void blk_insert_flush(struct request *rq) if (q->mq_ops) { blk_mq_insert_request(rq, false, false, true); } else - list_add_tail(&rq->queuelist, &q->queue_head); + q->elevator->type->ops.elevator_add_req_fn(q, rq); return; } From a76032e0abef027df83f7f053580aa297de54818 Mon Sep 17 00:00:00 2001 From: Chris Bainbridge Date: Thu, 12 Nov 2015 19:24:47 +0000 Subject: [PATCH 105/291] Revert "ACPI / SBS: Add 5 us delay to fix SBS hangs on MacBook" Revert commit 3349fb64b292 (ACPI / SBS: Add 5 us delay to fix SBS hangs on MacBook), since the delay introduced by it is not necessary any more after commit add68d6aa9e2 (ACPI / SMBus: Fix boot stalls / high CPU caused by reentrant code). Signed-off-by: Chris Bainbridge [ rjw: Changelog ] Signed-off-by: Rafael J. Wysocki --- drivers/acpi/sbshc.c | 22 ---------------------- 1 file changed, 22 deletions(-) diff --git a/drivers/acpi/sbshc.c b/drivers/acpi/sbshc.c index e2900518cf7e..2fa8304171e0 100644 --- a/drivers/acpi/sbshc.c +++ b/drivers/acpi/sbshc.c @@ -14,7 +14,6 @@ #include #include #include -#include #include "sbshc.h" #define PREFIX "ACPI: " @@ -89,8 +88,6 @@ enum acpi_smb_offset { ACPI_SMB_ALARM_DATA = 0x26, /* 2 bytes alarm data */ }; -static bool macbook; - static inline int smb_hc_read(struct acpi_smb_hc *hc, u8 address, u8 *data) { return ec_read(hc->offset + address, data); @@ -121,8 +118,6 @@ static int acpi_smbus_transaction(struct acpi_smb_hc *hc, u8 protocol, mutex_lock(&hc->lock); hc->done = false; - if (macbook) - udelay(5); if (smb_hc_read(hc, ACPI_SMB_PROTOCOL, &temp)) goto end; if (temp) { @@ -250,29 +245,12 @@ extern int acpi_ec_add_query_handler(struct acpi_ec *ec, u8 query_bit, acpi_handle handle, acpi_ec_query_func func, void *data); -static int macbook_dmi_match(const struct dmi_system_id *d) -{ - pr_debug("Detected MacBook, enabling workaround\n"); - macbook = true; - return 0; -} - -static struct dmi_system_id acpi_smbus_dmi_table[] = { - { macbook_dmi_match, "Apple MacBook", { - DMI_MATCH(DMI_BOARD_VENDOR, "Apple"), - DMI_MATCH(DMI_PRODUCT_NAME, "MacBook") }, - }, - { }, -}; - static int acpi_smbus_hc_add(struct acpi_device *device) { int status; unsigned long long val; struct acpi_smb_hc *hc; - dmi_check_system(acpi_smbus_dmi_table); - if (!device) return -EINVAL; From 4981c2b7abfe92a7ad3c9888b8a1ada552d49406 Mon Sep 17 00:00:00 2001 From: Markus Elfring Date: Sun, 15 Nov 2015 22:42:27 +0100 Subject: [PATCH 106/291] ACPI-EC: Drop unnecessary check made before calling acpi_ec_delete_query() The acpi_ec_delete_query() function tests whether its argument is NULL and then returns immediately. Thus the test around the call is not needed. This issue was detected by using the Coccinelle software. Signed-off-by: Markus Elfring [ rjw: Subject ] Signed-off-by: Rafael J. Wysocki --- drivers/acpi/ec.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/acpi/ec.c b/drivers/acpi/ec.c index f61a7c834540..b420fb46669d 100644 --- a/drivers/acpi/ec.c +++ b/drivers/acpi/ec.c @@ -1103,7 +1103,7 @@ static int acpi_ec_query(struct acpi_ec *ec, u8 *data) } err_exit: - if (result && q) + if (result) acpi_ec_delete_query(q); if (data) *data = value; From 1285734c7a776b6195f8aaac6fa6acce26b5b97a Mon Sep 17 00:00:00 2001 From: Florian Fainelli Date: Mon, 16 Nov 2015 15:34:15 -0800 Subject: [PATCH 107/291] MAINTAINERS: brcmnand: Add Broadcom internal mailing-list The Broadcom NAND driver is used by many different groups at Broadcom now, so use the same mailing-list we use for other areas of the kernel. Signed-off-by: Florian Fainelli Signed-off-by: Brian Norris --- MAINTAINERS | 1 + 1 file changed, 1 insertion(+) diff --git a/MAINTAINERS b/MAINTAINERS index e9caa4b28828..da691aad7031 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -2450,6 +2450,7 @@ F: drivers/firmware/broadcom/* BROADCOM STB NAND FLASH DRIVER M: Brian Norris L: linux-mtd@lists.infradead.org +L: bcm-kernel-feedback-list@broadcom.com S: Maintained F: drivers/mtd/nand/brcmnand/ From 9e5b8a6e53c79e6ccdbe6e62142ed9f317cefd46 Mon Sep 17 00:00:00 2001 From: Wei Jiangang Date: Mon, 9 Nov 2015 14:26:39 +0800 Subject: [PATCH 108/291] tools:testing/selftests: fix typo in futex/README Correct typo in tools/testing/selftests/futex/README. Signed-off-by: Wei Jiangang Acked-by: Darren Hart Signed-off-by: Shuah Khan --- tools/testing/selftests/futex/README | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tools/testing/selftests/futex/README b/tools/testing/selftests/futex/README index 3224a049b196..0558bb9ce0a6 100644 --- a/tools/testing/selftests/futex/README +++ b/tools/testing/selftests/futex/README @@ -27,7 +27,7 @@ o The build system shall remain as simple as possible, avoiding any archive or o Where possible, any helper functions or other package-wide code shall be implemented in header files, avoiding the need to compile intermediate object files. -o External dependendencies shall remain as minimal as possible. Currently gcc +o External dependencies shall remain as minimal as possible. Currently gcc and glibc are the only dependencies. o Tests return 0 for success and < 0 for failure. From ee82c9ed41e896bd47e121d87e4628de0f2656a3 Mon Sep 17 00:00:00 2001 From: Dan Williams Date: Sun, 15 Nov 2015 16:06:32 -0800 Subject: [PATCH 109/291] dax: disable pmd mappings While dax pmd mappings are functional in the nominal path they trigger kernel crashes in the following paths: BUG: unable to handle kernel paging request at ffffea0004098000 IP: [] follow_trans_huge_pmd+0x117/0x3b0 [..] Call Trace: [] follow_page_mask+0x2d3/0x380 [] __get_user_pages+0xe8/0x6f0 [] get_user_pages_unlocked+0x165/0x1e0 [] get_user_pages_fast+0xa1/0x1b0 kernel BUG at arch/x86/mm/gup.c:131! [..] Call Trace: [] gup_pud_range+0x1bc/0x220 [] get_user_pages_fast+0x124/0x1b0 BUG: unable to handle kernel paging request at ffffea0004088000 IP: [] copy_huge_pmd+0x159/0x350 [..] Call Trace: [] copy_page_range+0x34c/0x9f0 [] copy_process+0x1b7f/0x1e10 [] _do_fork+0x91/0x590 All of these paths are interpreting a dax pmd mapping as a transparent huge page and making the assumption that the pfn is covered by the memmap, i.e. that the pfn has an associated struct page. PTE mappings do not suffer the same fate since they have the _PAGE_SPECIAL flag to cause the gup path to fault. We can do something similar for the PMD path, or otherwise defer pmd support for cases where a struct page is available. For now, 4.4-rc and -stable need to disable dax pmd support by default. For development the "depends on BROKEN" line can be removed from CONFIG_FS_DAX_PMD. Cc: Cc: Jan Kara Cc: Dave Chinner Cc: Matthew Wilcox Cc: Kirill A. Shutemov Reported-by: Ross Zwisler Signed-off-by: Dan Williams --- fs/Kconfig | 6 ++++++ fs/dax.c | 4 ++++ 2 files changed, 10 insertions(+) diff --git a/fs/Kconfig b/fs/Kconfig index da3f32f1a4e4..6ce72d8d1ee1 100644 --- a/fs/Kconfig +++ b/fs/Kconfig @@ -46,6 +46,12 @@ config FS_DAX or if unsure, say N. Saying Y will increase the size of the kernel by about 5kB. +config FS_DAX_PMD + bool + default FS_DAX + depends on FS_DAX + depends on BROKEN + endif # BLOCK # Posix ACL utility routines diff --git a/fs/dax.c b/fs/dax.c index d1e5cb7311a1..43671b68220e 100644 --- a/fs/dax.c +++ b/fs/dax.c @@ -541,6 +541,10 @@ int __dax_pmd_fault(struct vm_area_struct *vma, unsigned long address, unsigned long pfn; int result = 0; + /* dax pmd mappings are broken wrt gup and fork */ + if (!IS_ENABLED(CONFIG_FS_DAX_PMD)) + return VM_FAULT_FALLBACK; + /* Fall back to PTEs if we're going to COW */ if (write && !(vma->vm_flags & VM_SHARED)) return VM_FAULT_FALLBACK; From 24e79d0ddc416184594aa8193023c15d85ab15db Mon Sep 17 00:00:00 2001 From: Maarten Lankhorst Date: Wed, 11 Nov 2015 11:29:07 +0100 Subject: [PATCH 110/291] drm/core: Set legacy_cursor_update in drm_atomic_helper_disable_plane. legacy_cursor_update was being set in restore_fbdev_mode_atomic which was probably unintended. Fix this by only setting it in the function that needs it. This oversight was introduced in commit bbb1e52402b2a288b09ae37e8182599931c7e9df Author: Rob Clark Date: Tue Aug 25 15:35:58 2015 -0400 drm/fb-helper: atomic restore_fbdev_mode()... Signed-off-by: Maarten Lankhorst Reviewed-by: Daniel Vetter [Jani: checkpatch fix] Signed-off-by: Jani Nikula Link: http://patchwork.freedesktop.org/patch/msgid/1447237751-9663-2-git-send-email-maarten.lankhorst@ubuntu.com --- drivers/gpu/drm/drm_atomic_helper.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/drivers/gpu/drm/drm_atomic_helper.c b/drivers/gpu/drm/drm_atomic_helper.c index 0c6f62168776..b5efdfde641b 100644 --- a/drivers/gpu/drm/drm_atomic_helper.c +++ b/drivers/gpu/drm/drm_atomic_helper.c @@ -1553,6 +1553,9 @@ retry: goto fail; } + if (plane_state->crtc && (plane == plane->crtc->cursor)) + plane_state->state->legacy_cursor_update = true; + ret = __drm_atomic_helper_disable_plane(plane, plane_state); if (ret != 0) goto fail; @@ -1605,9 +1608,6 @@ int __drm_atomic_helper_disable_plane(struct drm_plane *plane, plane_state->src_h = 0; plane_state->src_w = 0; - if (plane->crtc && (plane == plane->crtc->cursor)) - plane_state->state->legacy_cursor_update = true; - return 0; } From 4572372847680ee04f184df916d5cf007c94ff7e Mon Sep 17 00:00:00 2001 From: Maarten Lankhorst Date: Wed, 11 Nov 2015 11:29:08 +0100 Subject: [PATCH 111/291] drm/core: Fix old_fb handling in drm_mode_atomic_ioctl. plane_mask should be cleared inside the retry loop, because it gets reset on every retry. Without this fix the plane->fb refcounting might get out of sync on retries, resulting in either leaked memory or use-after-free. Signed-off-by: Maarten Lankhorst Cc: stable@vger.kernel.org #v4.3 Reviewed-by: Daniel Vetter Signed-off-by: Jani Nikula Link: http://patchwork.freedesktop.org/patch/msgid/1447237751-9663-3-git-send-email-maarten.lankhorst@ubuntu.com --- drivers/gpu/drm/drm_atomic.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/drivers/gpu/drm/drm_atomic.c b/drivers/gpu/drm/drm_atomic.c index 7bb3845d9974..0ac31b1ecb67 100644 --- a/drivers/gpu/drm/drm_atomic.c +++ b/drivers/gpu/drm/drm_atomic.c @@ -1446,7 +1446,7 @@ int drm_mode_atomic_ioctl(struct drm_device *dev, struct drm_plane *plane; struct drm_crtc *crtc; struct drm_crtc_state *crtc_state; - unsigned plane_mask = 0; + unsigned plane_mask; int ret = 0; unsigned int i, j; @@ -1486,6 +1486,7 @@ int drm_mode_atomic_ioctl(struct drm_device *dev, state->allow_modeset = !!(arg->flags & DRM_MODE_ATOMIC_ALLOW_MODESET); retry: + plane_mask = 0; copied_objs = 0; copied_props = 0; From 0f45c26fc302c02b0576db37d4849baa53a2bb41 Mon Sep 17 00:00:00 2001 From: Maarten Lankhorst Date: Wed, 11 Nov 2015 11:29:09 +0100 Subject: [PATCH 112/291] drm/atomic: add a drm_atomic_clean_old_fb helper. This is useful for all the boilerplate code about cleaning old_fb. Signed-off-by: Maarten Lankhorst Reviewed-by: Daniel Vetter Signed-off-by: Jani Nikula Link: http://patchwork.freedesktop.org/patch/msgid/1447237751-9663-4-git-send-email-maarten.lankhorst@ubuntu.com --- drivers/gpu/drm/drm_atomic.c | 58 +++++++++++++++++++++++++----------- include/drm/drm_atomic.h | 3 ++ 2 files changed, 43 insertions(+), 18 deletions(-) diff --git a/drivers/gpu/drm/drm_atomic.c b/drivers/gpu/drm/drm_atomic.c index 0ac31b1ecb67..aeee083c7f95 100644 --- a/drivers/gpu/drm/drm_atomic.c +++ b/drivers/gpu/drm/drm_atomic.c @@ -1432,6 +1432,45 @@ static int atomic_set_prop(struct drm_atomic_state *state, return ret; } +/** + * drm_atomic_update_old_fb -- Unset old_fb pointers and set plane->fb pointers. + * + * @dev: drm device to check. + * @plane_mask: plane mask for planes that were updated. + * @ret: return value, can be -EDEADLK for a retry. + * + * Before doing an update plane->old_fb is set to plane->fb, + * but before dropping the locks old_fb needs to be set to NULL + * and plane->fb updated. This is a common operation for each + * atomic update, so this call is split off as a helper. + */ +void drm_atomic_clean_old_fb(struct drm_device *dev, + unsigned plane_mask, + int ret) +{ + struct drm_plane *plane; + + /* if succeeded, fixup legacy plane crtc/fb ptrs before dropping + * locks (ie. while it is still safe to deref plane->state). We + * need to do this here because the driver entry points cannot + * distinguish between legacy and atomic ioctls. + */ + drm_for_each_plane_mask(plane, dev, plane_mask) { + if (ret == 0) { + struct drm_framebuffer *new_fb = plane->state->fb; + if (new_fb) + drm_framebuffer_reference(new_fb); + plane->fb = new_fb; + plane->crtc = plane->state->crtc; + + if (plane->old_fb) + drm_framebuffer_unreference(plane->old_fb); + } + plane->old_fb = NULL; + } +} +EXPORT_SYMBOL(drm_atomic_clean_old_fb); + int drm_mode_atomic_ioctl(struct drm_device *dev, void *data, struct drm_file *file_priv) { @@ -1577,24 +1616,7 @@ retry: } out: - /* if succeeded, fixup legacy plane crtc/fb ptrs before dropping - * locks (ie. while it is still safe to deref plane->state). We - * need to do this here because the driver entry points cannot - * distinguish between legacy and atomic ioctls. - */ - drm_for_each_plane_mask(plane, dev, plane_mask) { - if (ret == 0) { - struct drm_framebuffer *new_fb = plane->state->fb; - if (new_fb) - drm_framebuffer_reference(new_fb); - plane->fb = new_fb; - plane->crtc = plane->state->crtc; - - if (plane->old_fb) - drm_framebuffer_unreference(plane->old_fb); - } - plane->old_fb = NULL; - } + drm_atomic_clean_old_fb(dev, plane_mask, ret); if (ret && arg->flags & DRM_MODE_PAGE_FLIP_EVENT) { /* diff --git a/include/drm/drm_atomic.h b/include/drm/drm_atomic.h index e67aeac2aee0..4b74c97d297a 100644 --- a/include/drm/drm_atomic.h +++ b/include/drm/drm_atomic.h @@ -136,6 +136,9 @@ drm_atomic_connectors_for_crtc(struct drm_atomic_state *state, void drm_atomic_legacy_backoff(struct drm_atomic_state *state); +void +drm_atomic_clean_old_fb(struct drm_device *dev, unsigned plane_mask, int ret); + int __must_check drm_atomic_check_only(struct drm_atomic_state *state); int __must_check drm_atomic_commit(struct drm_atomic_state *state); int __must_check drm_atomic_async_commit(struct drm_atomic_state *state); From f72c6b33edda439acff81a0da612f2bcd1f46f35 Mon Sep 17 00:00:00 2001 From: Maarten Lankhorst Date: Wed, 11 Nov 2015 11:29:10 +0100 Subject: [PATCH 113/291] drm/core: Fix old_fb handling in restore_fbdev_mode_atomic. Don't touch plane->old_fb/fb without having the right locks held. Signed-off-by: Maarten Lankhorst Reviewed-by: Daniel Vetter Signed-off-by: Jani Nikula Link: http://patchwork.freedesktop.org/patch/msgid/1447237751-9663-5-git-send-email-maarten.lankhorst@ubuntu.com --- drivers/gpu/drm/drm_fb_helper.c | 21 ++++++--------------- 1 file changed, 6 insertions(+), 15 deletions(-) diff --git a/drivers/gpu/drm/drm_fb_helper.c b/drivers/gpu/drm/drm_fb_helper.c index e673c13c7391..abd50863506e 100644 --- a/drivers/gpu/drm/drm_fb_helper.c +++ b/drivers/gpu/drm/drm_fb_helper.c @@ -342,6 +342,7 @@ static int restore_fbdev_mode_atomic(struct drm_fb_helper *fb_helper) struct drm_plane *plane; struct drm_atomic_state *state; int i, ret; + unsigned plane_mask; state = drm_atomic_state_alloc(dev); if (!state) @@ -349,11 +350,10 @@ static int restore_fbdev_mode_atomic(struct drm_fb_helper *fb_helper) state->acquire_ctx = dev->mode_config.acquire_ctx; retry: + plane_mask = 0; drm_for_each_plane(plane, dev) { struct drm_plane_state *plane_state; - plane->old_fb = plane->fb; - plane_state = drm_atomic_get_plane_state(state, plane); if (IS_ERR(plane_state)) { ret = PTR_ERR(plane_state); @@ -362,6 +362,9 @@ retry: plane_state->rotation = BIT(DRM_ROTATE_0); + plane->old_fb = plane->fb; + plane_mask |= 1 << drm_plane_index(plane); + /* disable non-primary: */ if (plane->type == DRM_PLANE_TYPE_PRIMARY) continue; @@ -382,19 +385,7 @@ retry: ret = drm_atomic_commit(state); fail: - drm_for_each_plane(plane, dev) { - if (ret == 0) { - struct drm_framebuffer *new_fb = plane->state->fb; - if (new_fb) - drm_framebuffer_reference(new_fb); - plane->fb = new_fb; - plane->crtc = plane->state->crtc; - - if (plane->old_fb) - drm_framebuffer_unreference(plane->old_fb); - } - plane->old_fb = NULL; - } + drm_atomic_clean_old_fb(dev, plane_mask, ret); if (ret == -EDEADLK) goto backoff; From 07d3bad6c1210bd21e85d084807ef4ee4ac43a78 Mon Sep 17 00:00:00 2001 From: Maarten Lankhorst Date: Wed, 11 Nov 2015 11:29:11 +0100 Subject: [PATCH 114/291] drm/core: Fix old_fb handling in pan_display_atomic. Don't touch plane->old_fb/fb without having the right locks held. Signed-off-by: Maarten Lankhorst Reviewed-by: Daniel Vetter Signed-off-by: Jani Nikula Link: http://patchwork.freedesktop.org/patch/msgid/1447237751-9663-6-git-send-email-maarten.lankhorst@ubuntu.com --- drivers/gpu/drm/drm_fb_helper.c | 30 ++++++++---------------------- 1 file changed, 8 insertions(+), 22 deletions(-) diff --git a/drivers/gpu/drm/drm_fb_helper.c b/drivers/gpu/drm/drm_fb_helper.c index abd50863506e..69cbab5e5c81 100644 --- a/drivers/gpu/drm/drm_fb_helper.c +++ b/drivers/gpu/drm/drm_fb_helper.c @@ -1227,7 +1227,9 @@ static int pan_display_atomic(struct fb_var_screeninfo *var, struct drm_fb_helper *fb_helper = info->par; struct drm_device *dev = fb_helper->dev; struct drm_atomic_state *state; + struct drm_plane *plane; int i, ret; + unsigned plane_mask; state = drm_atomic_state_alloc(dev); if (!state) @@ -1235,19 +1237,22 @@ static int pan_display_atomic(struct fb_var_screeninfo *var, state->acquire_ctx = dev->mode_config.acquire_ctx; retry: + plane_mask = 0; for(i = 0; i < fb_helper->crtc_count; i++) { struct drm_mode_set *mode_set; mode_set = &fb_helper->crtc_info[i].mode_set; - mode_set->crtc->primary->old_fb = mode_set->crtc->primary->fb; - mode_set->x = var->xoffset; mode_set->y = var->yoffset; ret = __drm_atomic_helper_set_config(mode_set, state); if (ret != 0) goto fail; + + plane = mode_set->crtc->primary; + plane_mask |= drm_plane_index(plane); + plane->old_fb = plane->fb; } ret = drm_atomic_commit(state); @@ -1259,26 +1264,7 @@ retry: fail: - for(i = 0; i < fb_helper->crtc_count; i++) { - struct drm_mode_set *mode_set; - struct drm_plane *plane; - - mode_set = &fb_helper->crtc_info[i].mode_set; - plane = mode_set->crtc->primary; - - if (ret == 0) { - struct drm_framebuffer *new_fb = plane->state->fb; - - if (new_fb) - drm_framebuffer_reference(new_fb); - plane->fb = new_fb; - plane->crtc = plane->state->crtc; - - if (plane->old_fb) - drm_framebuffer_unreference(plane->old_fb); - } - plane->old_fb = NULL; - } + drm_atomic_clean_old_fb(dev, plane_mask, ret); if (ret == -EDEADLK) goto backoff; From 4fee9f364b9b99f76732f2a6fd6df679a237fa74 Mon Sep 17 00:00:00 2001 From: Ard Biesheuvel Date: Mon, 16 Nov 2015 11:18:14 +0100 Subject: [PATCH 115/291] arm64: mm: use correct mapping granularity under DEBUG_RODATA When booting a 64k pages kernel that is built with CONFIG_DEBUG_RODATA and resides at an offset that is not a multiple of 512 MB, the rounding that occurs in __map_memblock() and fixup_executable() results in incorrect regions being mapped. The following snippet from /sys/kernel/debug/kernel_page_tables shows how, when the kernel is loaded 2 MB above the base of DRAM at 0x40000000, the first 2 MB of memory (which may be inaccessible from non-secure EL1 or just reserved by the firmware) is inadvertently mapped into the end of the module region. ---[ Modules start ]--- 0xfffffdffffe00000-0xfffffe0000000000 2M RW NX ... UXN MEM/NORMAL ---[ Modules end ]--- ---[ Kernel Mapping ]--- 0xfffffe0000000000-0xfffffe0000090000 576K RW NX ... UXN MEM/NORMAL 0xfffffe0000090000-0xfffffe0000200000 1472K ro x ... UXN MEM/NORMAL 0xfffffe0000200000-0xfffffe0000800000 6M ro x ... UXN MEM/NORMAL 0xfffffe0000800000-0xfffffe0000810000 64K ro x ... UXN MEM/NORMAL 0xfffffe0000810000-0xfffffe0000a00000 1984K RW NX ... UXN MEM/NORMAL 0xfffffe0000a00000-0xfffffe00ffe00000 4084M RW NX ... UXN MEM/NORMAL The same issue is likely to occur on 16k pages kernels whose load address is not a multiple of 32 MB (i.e., SECTION_SIZE). So round to SWAPPER_BLOCK_SIZE instead of SECTION_SIZE. Fixes: da141706aea5 ("arm64: add better page protections to arm64") Signed-off-by: Ard Biesheuvel Acked-by: Mark Rutland Acked-by: Laura Abbott Cc: # 4.0+ Signed-off-by: Catalin Marinas --- arch/arm64/mm/mmu.c | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/arch/arm64/mm/mmu.c b/arch/arm64/mm/mmu.c index e3f563c81c48..32ddd893da9a 100644 --- a/arch/arm64/mm/mmu.c +++ b/arch/arm64/mm/mmu.c @@ -362,8 +362,8 @@ static void __init __map_memblock(phys_addr_t start, phys_addr_t end) * for now. This will get more fine grained later once all memory * is mapped */ - unsigned long kernel_x_start = round_down(__pa(_stext), SECTION_SIZE); - unsigned long kernel_x_end = round_up(__pa(__init_end), SECTION_SIZE); + unsigned long kernel_x_start = round_down(__pa(_stext), SWAPPER_BLOCK_SIZE); + unsigned long kernel_x_end = round_up(__pa(__init_end), SWAPPER_BLOCK_SIZE); if (end < kernel_x_start) { create_mapping(start, __phys_to_virt(start), @@ -451,18 +451,18 @@ static void __init fixup_executable(void) { #ifdef CONFIG_DEBUG_RODATA /* now that we are actually fully mapped, make the start/end more fine grained */ - if (!IS_ALIGNED((unsigned long)_stext, SECTION_SIZE)) { + if (!IS_ALIGNED((unsigned long)_stext, SWAPPER_BLOCK_SIZE)) { unsigned long aligned_start = round_down(__pa(_stext), - SECTION_SIZE); + SWAPPER_BLOCK_SIZE); create_mapping(aligned_start, __phys_to_virt(aligned_start), __pa(_stext) - aligned_start, PAGE_KERNEL); } - if (!IS_ALIGNED((unsigned long)__init_end, SECTION_SIZE)) { + if (!IS_ALIGNED((unsigned long)__init_end, SWAPPER_BLOCK_SIZE)) { unsigned long aligned_end = round_up(__pa(__init_end), - SECTION_SIZE); + SWAPPER_BLOCK_SIZE); create_mapping(__pa(__init_end), (unsigned long)__init_end, aligned_end - __pa(__init_end), PAGE_KERNEL); From 1dccb598df549d892b6450c261da54cdd7af44b4 Mon Sep 17 00:00:00 2001 From: Arnd Bergmann Date: Mon, 16 Nov 2015 17:25:48 +0100 Subject: [PATCH 116/291] arm64: simplify dma_get_ops Including linux/acpi.h from asm/dma-mapping.h causes tons of compile-time warnings, e.g. drivers/isdn/mISDN/dsp_ecdis.h:43:0: warning: "FALSE" redefined drivers/isdn/mISDN/dsp_ecdis.h:44:0: warning: "TRUE" redefined drivers/net/fddi/skfp/h/targetos.h:62:0: warning: "TRUE" redefined drivers/net/fddi/skfp/h/targetos.h:63:0: warning: "FALSE" redefined However, it looks like the dependency should not even there as I do not see why __generic_dma_ops() cares about whether we have an ACPI based system or not. The current behavior is to fall back to the global dma_ops when a device has not set its own dma_ops, but only for DT based systems. This seems dangerous, as a random device might have different requirements regarding IOMMU or coherency, so we should really never have that fallback and just forbid DMA when we have not initialized DMA for a device. This removes the global dma_ops variable and the special-casing for ACPI, and just returns the dma ops that got set for the device, or the dummy_dma_ops if none were present. The original code has apparently been copied from arm32 where we rely on it for ISA devices things like the floppy controller, but we should have no such devices on ARM64. Signed-off-by: Arnd Bergmann [catalin.marinas@arm.com: removed acpi_disabled check in arch_setup_dma_ops()] Signed-off-by: Catalin Marinas --- arch/arm64/include/asm/dma-mapping.h | 13 +++---------- arch/arm64/mm/dma-mapping.c | 16 ++++------------ 2 files changed, 7 insertions(+), 22 deletions(-) diff --git a/arch/arm64/include/asm/dma-mapping.h b/arch/arm64/include/asm/dma-mapping.h index 54d0ead41afc..61e08f360e31 100644 --- a/arch/arm64/include/asm/dma-mapping.h +++ b/arch/arm64/include/asm/dma-mapping.h @@ -18,7 +18,6 @@ #ifdef __KERNEL__ -#include #include #include @@ -26,22 +25,16 @@ #include #define DMA_ERROR_CODE (~(dma_addr_t)0) -extern struct dma_map_ops *dma_ops; extern struct dma_map_ops dummy_dma_ops; static inline struct dma_map_ops *__generic_dma_ops(struct device *dev) { - if (unlikely(!dev)) - return dma_ops; - else if (dev->archdata.dma_ops) + if (dev && dev->archdata.dma_ops) return dev->archdata.dma_ops; - else if (acpi_disabled) - return dma_ops; /* - * When ACPI is enabled, if arch_set_dma_ops is not called, - * we will disable device DMA capability by setting it - * to dummy_dma_ops. + * We expect no ISA devices, and all other DMA masters are expected to + * have someone call arch_setup_dma_ops at device creation time. */ return &dummy_dma_ops; } diff --git a/arch/arm64/mm/dma-mapping.c b/arch/arm64/mm/dma-mapping.c index 97fd714035f9..7963aa4b5d28 100644 --- a/arch/arm64/mm/dma-mapping.c +++ b/arch/arm64/mm/dma-mapping.c @@ -18,6 +18,7 @@ */ #include +#include #include #include #include @@ -28,9 +29,6 @@ #include -struct dma_map_ops *dma_ops; -EXPORT_SYMBOL(dma_ops); - static pgprot_t __get_dma_pgprot(struct dma_attrs *attrs, pgprot_t prot, bool coherent) { @@ -515,13 +513,7 @@ EXPORT_SYMBOL(dummy_dma_ops); static int __init arm64_dma_init(void) { - int ret; - - dma_ops = &swiotlb_dma_ops; - - ret = atomic_pool_init(); - - return ret; + return atomic_pool_init(); } arch_initcall(arm64_dma_init); @@ -991,8 +983,8 @@ static void __iommu_setup_dma_ops(struct device *dev, u64 dma_base, u64 size, void arch_setup_dma_ops(struct device *dev, u64 dma_base, u64 size, struct iommu_ops *iommu, bool coherent) { - if (!acpi_disabled && !dev->archdata.dma_ops) - dev->archdata.dma_ops = dma_ops; + if (!dev->archdata.dma_ops) + dev->archdata.dma_ops = &swiotlb_dma_ops; dev->archdata.dma_coherent = coherent; __iommu_setup_dma_ops(dev, dma_base, size, iommu); From adc235aff6fcf5ae4d8dc00faf30a07632b9725b Mon Sep 17 00:00:00 2001 From: Arnd Bergmann Date: Mon, 16 Nov 2015 17:32:15 +0100 Subject: [PATCH 117/291] arm64: do not include ptrace.h from compat.h including ptrace.h brings a definition of BITS_PER_PAGE into device drivers and cause a build warning in allmodconfig builds: drivers/block/drbd/drbd_bitmap.c:482:0: warning: "BITS_PER_PAGE" redefined #define BITS_PER_PAGE (1UL << (PAGE_SHIFT + 3)) This uses a slightly different way to express current_pt_regs() that avoids the use of the header and gets away with the already included asm/ptrace.h. Signed-off-by: Arnd Bergmann Signed-off-by: Catalin Marinas --- arch/arm64/include/asm/compat.h | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/arch/arm64/include/asm/compat.h b/arch/arm64/include/asm/compat.h index 7fbed6919b54..eb8432bb82b8 100644 --- a/arch/arm64/include/asm/compat.h +++ b/arch/arm64/include/asm/compat.h @@ -23,7 +23,6 @@ */ #include #include -#include #define COMPAT_USER_HZ 100 #ifdef __AARCH64EB__ @@ -234,7 +233,7 @@ static inline compat_uptr_t ptr_to_compat(void __user *uptr) return (u32)(unsigned long)uptr; } -#define compat_user_stack_pointer() (user_stack_pointer(current_pt_regs())) +#define compat_user_stack_pointer() (user_stack_pointer(task_pt_regs(current))) static inline void __user *arch_compat_alloc_user_space(long len) { From 0eece2b22849c90b730815c893425a36b9d10fd5 Mon Sep 17 00:00:00 2001 From: Marc Zyngier Date: Mon, 16 Nov 2015 19:13:26 +0000 Subject: [PATCH 118/291] irqchip/gic: Make sure all interrupts are deactivated at boot When booting a GIC/GICv3 based system, we have no idea what state the firmware (or previous kernel in the case of kexec) has left the GIC, and some interrupts may still be active. In order to garantee that we have a clean state, make sure the active bits are cleared at init time. Signed-off-by: Marc Zyngier Cc: Cc: Jason Cooper Cc: Russell King Link: http://lkml.kernel.org/r/1447701208-18150-3-git-send-email-marc.zyngier@arm.com Signed-off-by: Thomas Gleixner --- drivers/irqchip/irq-gic-common.c | 13 +++++++++---- 1 file changed, 9 insertions(+), 4 deletions(-) diff --git a/drivers/irqchip/irq-gic-common.c b/drivers/irqchip/irq-gic-common.c index 44a077f3a4a2..f174ce0ca361 100644 --- a/drivers/irqchip/irq-gic-common.c +++ b/drivers/irqchip/irq-gic-common.c @@ -84,12 +84,15 @@ void __init gic_dist_config(void __iomem *base, int gic_irqs, writel_relaxed(GICD_INT_DEF_PRI_X4, base + GIC_DIST_PRI + i); /* - * Disable all interrupts. Leave the PPI and SGIs alone - * as they are enabled by redistributor registers. + * Deactivate and disable all SPIs. Leave the PPI and SGIs + * alone as they are in the redistributor registers on GICv3. */ - for (i = 32; i < gic_irqs; i += 32) + for (i = 32; i < gic_irqs; i += 32) { writel_relaxed(GICD_INT_EN_CLR_X32, - base + GIC_DIST_ENABLE_CLEAR + i / 8); + base + GIC_DIST_ACTIVE_CLEAR + i / 8); + writel_relaxed(GICD_INT_EN_CLR_X32, + base + GIC_DIST_ENABLE_CLEAR + i / 8); + } if (sync_access) sync_access(); @@ -102,7 +105,9 @@ void gic_cpu_config(void __iomem *base, void (*sync_access)(void)) /* * Deal with the banked PPI and SGI interrupts - disable all * PPI interrupts, ensure all SGI interrupts are enabled. + * Make sure everything is deactivated. */ + writel_relaxed(GICD_INT_EN_CLR_X32, base + GIC_DIST_ACTIVE_CLEAR); writel_relaxed(GICD_INT_EN_CLR_PPI, base + GIC_DIST_ENABLE_CLEAR); writel_relaxed(GICD_INT_EN_SET_SGI, base + GIC_DIST_ENABLE_SET); From 92eda4ad4371225d6ccf9cded74315547e9a3153 Mon Sep 17 00:00:00 2001 From: Marc Zyngier Date: Mon, 16 Nov 2015 19:13:27 +0000 Subject: [PATCH 119/291] irqchip/gic: Clear enable bits before restoring them When restoring the GIC state (after a suspend/resume cycle, for example), the driver directly writes the 'enabled' state it has saved by accessing GICD_ISENABLERn, which performs an OR operation between the value present in the register and the value we write. If whatever code that has run before we reentered the kernel has enabled an interrupt that was previously disabled, we won't restore that disabled state. Making sure we first clear the register (by writting to GICD_ICENABLERn) before restoring the enabled state. Signed-off-by: Marc Zyngier Cc: Cc: Jason Cooper Cc: Russell King Link: http://lkml.kernel.org/r/1447701208-18150-4-git-send-email-marc.zyngier@arm.com Signed-off-by: Thomas Gleixner --- drivers/irqchip/irq-gic.c | 10 ++++++++-- 1 file changed, 8 insertions(+), 2 deletions(-) diff --git a/drivers/irqchip/irq-gic.c b/drivers/irqchip/irq-gic.c index 515c823c1c95..bc846e7fec44 100644 --- a/drivers/irqchip/irq-gic.c +++ b/drivers/irqchip/irq-gic.c @@ -604,9 +604,12 @@ static void gic_dist_restore(unsigned int gic_nr) writel_relaxed(gic_data[gic_nr].saved_spi_target[i], dist_base + GIC_DIST_TARGET + i * 4); - for (i = 0; i < DIV_ROUND_UP(gic_irqs, 32); i++) + for (i = 0; i < DIV_ROUND_UP(gic_irqs, 32); i++) { + writel_relaxed(GICD_INT_EN_CLR_X32, + dist_base + GIC_DIST_ENABLE_CLEAR + i * 4); writel_relaxed(gic_data[gic_nr].saved_spi_enable[i], dist_base + GIC_DIST_ENABLE_SET + i * 4); + } writel_relaxed(GICD_ENABLE, dist_base + GIC_DIST_CTRL); } @@ -654,8 +657,11 @@ static void gic_cpu_restore(unsigned int gic_nr) return; ptr = raw_cpu_ptr(gic_data[gic_nr].saved_ppi_enable); - for (i = 0; i < DIV_ROUND_UP(32, 32); i++) + for (i = 0; i < DIV_ROUND_UP(32, 32); i++) { + writel_relaxed(GICD_INT_EN_CLR_X32, + dist_base + GIC_DIST_ENABLE_CLEAR + i * 4); writel_relaxed(ptr[i], dist_base + GIC_DIST_ENABLE_SET + i * 4); + } ptr = raw_cpu_ptr(gic_data[gic_nr].saved_ppi_conf); for (i = 0; i < DIV_ROUND_UP(32, 16); i++) From 1c7d4dd46ee048afe19e1294634df6fa66862519 Mon Sep 17 00:00:00 2001 From: Marc Zyngier Date: Mon, 16 Nov 2015 19:13:28 +0000 Subject: [PATCH 120/291] irqchip/gic: Add save/restore of the active state When using EOImode==1, we may mark interrupts as being forwarded to a virtual machine. In that case, the interrupt is left active while being passed to the VM. If we suspend the system before the VM has deactivated the interrupt, the active state will be lost (which may be very annoying, as this may result in spurious interrupts and a confused guest). To avoid this, save and restore the active state together with the rest of the GIC registers. Signed-off-by: Marc Zyngier Cc: Cc: Jason Cooper Cc: Russell King Link: http://lkml.kernel.org/r/1447701208-18150-5-git-send-email-marc.zyngier@arm.com Signed-off-by: Thomas Gleixner --- drivers/irqchip/irq-gic.c | 28 ++++++++++++++++++++++++++++ 1 file changed, 28 insertions(+) diff --git a/drivers/irqchip/irq-gic.c b/drivers/irqchip/irq-gic.c index bc846e7fec44..abf2ffaed392 100644 --- a/drivers/irqchip/irq-gic.c +++ b/drivers/irqchip/irq-gic.c @@ -73,9 +73,11 @@ struct gic_chip_data { union gic_base cpu_base; #ifdef CONFIG_CPU_PM u32 saved_spi_enable[DIV_ROUND_UP(1020, 32)]; + u32 saved_spi_active[DIV_ROUND_UP(1020, 32)]; u32 saved_spi_conf[DIV_ROUND_UP(1020, 16)]; u32 saved_spi_target[DIV_ROUND_UP(1020, 4)]; u32 __percpu *saved_ppi_enable; + u32 __percpu *saved_ppi_active; u32 __percpu *saved_ppi_conf; #endif struct irq_domain *domain; @@ -566,6 +568,10 @@ static void gic_dist_save(unsigned int gic_nr) for (i = 0; i < DIV_ROUND_UP(gic_irqs, 32); i++) gic_data[gic_nr].saved_spi_enable[i] = readl_relaxed(dist_base + GIC_DIST_ENABLE_SET + i * 4); + + for (i = 0; i < DIV_ROUND_UP(gic_irqs, 32); i++) + gic_data[gic_nr].saved_spi_active[i] = + readl_relaxed(dist_base + GIC_DIST_ACTIVE_SET + i * 4); } /* @@ -611,6 +617,13 @@ static void gic_dist_restore(unsigned int gic_nr) dist_base + GIC_DIST_ENABLE_SET + i * 4); } + for (i = 0; i < DIV_ROUND_UP(gic_irqs, 32); i++) { + writel_relaxed(GICD_INT_EN_CLR_X32, + dist_base + GIC_DIST_ACTIVE_CLEAR + i * 4); + writel_relaxed(gic_data[gic_nr].saved_spi_active[i], + dist_base + GIC_DIST_ACTIVE_SET + i * 4); + } + writel_relaxed(GICD_ENABLE, dist_base + GIC_DIST_CTRL); } @@ -634,6 +647,10 @@ static void gic_cpu_save(unsigned int gic_nr) for (i = 0; i < DIV_ROUND_UP(32, 32); i++) ptr[i] = readl_relaxed(dist_base + GIC_DIST_ENABLE_SET + i * 4); + ptr = raw_cpu_ptr(gic_data[gic_nr].saved_ppi_active); + for (i = 0; i < DIV_ROUND_UP(32, 32); i++) + ptr[i] = readl_relaxed(dist_base + GIC_DIST_ACTIVE_SET + i * 4); + ptr = raw_cpu_ptr(gic_data[gic_nr].saved_ppi_conf); for (i = 0; i < DIV_ROUND_UP(32, 16); i++) ptr[i] = readl_relaxed(dist_base + GIC_DIST_CONFIG + i * 4); @@ -663,6 +680,13 @@ static void gic_cpu_restore(unsigned int gic_nr) writel_relaxed(ptr[i], dist_base + GIC_DIST_ENABLE_SET + i * 4); } + ptr = raw_cpu_ptr(gic_data[gic_nr].saved_ppi_active); + for (i = 0; i < DIV_ROUND_UP(32, 32); i++) { + writel_relaxed(GICD_INT_EN_CLR_X32, + dist_base + GIC_DIST_ACTIVE_CLEAR + i * 4); + writel_relaxed(ptr[i], dist_base + GIC_DIST_ACTIVE_SET + i * 4); + } + ptr = raw_cpu_ptr(gic_data[gic_nr].saved_ppi_conf); for (i = 0; i < DIV_ROUND_UP(32, 16); i++) writel_relaxed(ptr[i], dist_base + GIC_DIST_CONFIG + i * 4); @@ -716,6 +740,10 @@ static void __init gic_pm_init(struct gic_chip_data *gic) sizeof(u32)); BUG_ON(!gic->saved_ppi_enable); + gic->saved_ppi_active = __alloc_percpu(DIV_ROUND_UP(32, 32) * 4, + sizeof(u32)); + BUG_ON(!gic->saved_ppi_active); + gic->saved_ppi_conf = __alloc_percpu(DIV_ROUND_UP(32, 16) * 4, sizeof(u32)); BUG_ON(!gic->saved_ppi_conf); From ba8af3e592f7175b5f9a92d2cfcc00b82097d1be Mon Sep 17 00:00:00 2001 From: Maarten Lankhorst Date: Mon, 16 Nov 2015 12:49:14 +0100 Subject: [PATCH 121/291] drm/i915: Clear intel_crtc->atomic before updating it. MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit If an atomic update fails intel_crtc->atomic may have have some values left from the last atomic check. One example is atomic->wait_for_vblank, which results in spurious errors in kms_flip. [ 1551.892708] ------------[ cut here ]------------ [ 1551.892721] WARNING: CPU: 3 PID: 4179 at ../drivers/gpu/drm/drm_irq.c:1199 drm_wait_one_vblank+0x197/0x1a0 [drm]() [ 1551.892722] vblank not available on crtc 2, ret=-22 [ 1551.892751] Modules linked in: snd_hda_intel i915 drm_kms_helper drm intel_gtt i2c_algo_bit cfbfillrect syscopyarea cfbimgblt sysfillrect sysimgblt fb_sys_fops cfbcopyarea agpgart cfg80211 binfmt_misc snd_hda_codec_hdmi intel_rapl iosf_mbi x86_pkg_temp_thermal coretemp kvm_intel snd_hda_codec_realtek kvm snd_hda_codec_generic iTCO_wdt aesni_intel aes_x86_64 glue_helper lrw snd_hda_codec gf128mul ablk_helper cryptd snd_hwdep psmouse snd_hda_core pcspkr snd_pcm snd_timer snd lpc_ich i2c_i801 mfd_core soundcore wmi evdev [last unloaded: drm] [ 1551.892753] CPU: 3 PID: 4179 Comm: kms_pipe_crc_ba Tainted: G U W 4.3.0-reg+ #6 [ 1551.892754] Hardware name: /DZ77BH-55K, BIOS BHZ7710H.86A.0100.2013.0517.0942 05/17/2013 [ 1551.892758] ffffffffa03128d8 ffff8800cec73890 ffffffff812c0f3c ffff8800cec738d8 [ 1551.892760] ffff8800cec738c8 ffffffff8104ff36 ffff880116ae2290 0000000000000002 [ 1551.892762] ffff8800d39fcda0 ffff8800d038b4d0 ffff8800d42b5550 ffff8800cec73928 [ 1551.892763] Call Trace: [ 1551.892768] [] dump_stack+0x4e/0x82 [ 1551.892771] [] warn_slowpath_common+0x86/0xc0 [ 1551.892773] [] warn_slowpath_fmt+0x4c/0x50 [ 1551.892781] [] ? drm_vblank_get+0x78/0xd0 [drm] [ 1551.892787] [] drm_wait_one_vblank+0x197/0x1a0 [drm] [ 1551.892813] [] intel_post_plane_update+0xef/0x120 [i915] [ 1551.892832] [] intel_atomic_commit+0x4c2/0x1600 [i915] [ 1551.892862] [] ? drm_atomic_check_only+0x147/0x5e0 [drm] [ 1551.892872] [] ? drm_atomic_add_affected_connectors+0x27/0xf0 [drm] [ 1551.892881] [] drm_atomic_commit+0x37/0x60 [drm] [ 1551.892887] [] restore_fbdev_mode+0x28a/0x2c0 [drm_kms_helper] [ 1551.892895] [] drm_fb_helper_restore_fbdev_mode_unlocked+0x33/0x80 [drm_kms_helper] [ 1551.892900] [] drm_fb_helper_set_par+0x2d/0x50 [drm_kms_helper] [ 1551.892920] [] intel_fbdev_set_par+0x1a/0x60 [i915] [ 1551.892923] [] fb_set_var+0x1a7/0x3f0 [ 1551.892927] [] ? trace_hardirqs_on_caller+0x12f/0x1c0 [ 1551.892931] [] fbcon_blank+0x212/0x2f0 [ 1551.892935] [] do_unblank_screen+0xba/0x1d0 [ 1551.892937] [] vt_ioctl+0x13d5/0x1450 [ 1551.892940] [] ? preempt_count_sub+0x41/0x50 [ 1551.892943] [] tty_ioctl+0x423/0xe30 [ 1551.892947] [] do_vfs_ioctl+0x301/0x560 [ 1551.892949] [] ? putname+0x53/0x60 [ 1551.892952] [] ? __fget_light+0x66/0x90 [ 1551.892955] [] SyS_ioctl+0x79/0x90 [ 1551.892958] [] entry_SYSCALL_64_fastpath+0x12/0x6f [ 1551.892961] ---[ end trace 3e764d4b6628c91c ]--- Testcase: kms_flip Reported-and-tested-by: Ville Syrjälä Cc: stable@vger.kernel.org #v4.3 Signed-off-by: Maarten Lankhorst Reviewed-by: Daniel Stone Signed-off-by: Jani Nikula Link: http://patchwork.freedesktop.org/patch/msgid/5649C2BA.6080300@mblankhorst.nl --- drivers/gpu/drm/i915/intel_display.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/drivers/gpu/drm/i915/intel_display.c b/drivers/gpu/drm/i915/intel_display.c index bcf227356f78..442c1d220276 100644 --- a/drivers/gpu/drm/i915/intel_display.c +++ b/drivers/gpu/drm/i915/intel_display.c @@ -13034,6 +13034,9 @@ static int intel_atomic_check(struct drm_device *dev, struct intel_crtc_state *pipe_config = to_intel_crtc_state(crtc_state); + memset(&to_intel_crtc(crtc)->atomic, 0, + sizeof(struct intel_crtc_atomic_commit)); + /* Catch I915_MODE_FLAG_INHERITED */ if (crtc_state->mode.private_flags != crtc->state->mode.private_flags) crtc_state->mode_changed = true; From 83926117273cd6d9ffa1b5ff0cc566ad38648b70 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ville=20Syrj=C3=A4l=C3=A4?= Date: Mon, 16 Nov 2015 17:02:34 +0200 Subject: [PATCH 122/291] drm: Fix primary plane size for stereo doubled modes for legacy setcrtc MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Properly double the hdisplay/vdisplay timings that we use as the primary plane size with stereo doubled modes. Otherwise the modeset gets rejected on machines where the primary plane must be fullscreen, and on the rest only the first eye would get a visible plane. Cc: Daniel Vetter Cc: stable@vger.kernel.org #v3.19+ Fixes: 042652ed9599 ("drm/atomic-helper: implementatations for legacy interfaces") Signed-off-by: Ville Syrjälä Link: http://patchwork.freedesktop.org/patch/msgid/1447686157-29607-1-git-send-email-ville.syrjala@linux.intel.com Testcase: igt/kms_3d Signed-off-by: Daniel Vetter Signed-off-by: Jani Nikula --- drivers/gpu/drm/drm_atomic_helper.c | 15 +++++++++------ 1 file changed, 9 insertions(+), 6 deletions(-) diff --git a/drivers/gpu/drm/drm_atomic_helper.c b/drivers/gpu/drm/drm_atomic_helper.c index b5efdfde641b..cd398a245d2e 100644 --- a/drivers/gpu/drm/drm_atomic_helper.c +++ b/drivers/gpu/drm/drm_atomic_helper.c @@ -1741,6 +1741,7 @@ int __drm_atomic_helper_set_config(struct drm_mode_set *set, struct drm_crtc_state *crtc_state; struct drm_plane_state *primary_state; struct drm_crtc *crtc = set->crtc; + int hdisplay, vdisplay; int ret; crtc_state = drm_atomic_get_crtc_state(state, crtc); @@ -1783,19 +1784,21 @@ int __drm_atomic_helper_set_config(struct drm_mode_set *set, if (ret != 0) return ret; + drm_crtc_get_hv_timing(set->mode, &hdisplay, &vdisplay); + drm_atomic_set_fb_for_plane(primary_state, set->fb); primary_state->crtc_x = 0; primary_state->crtc_y = 0; - primary_state->crtc_h = set->mode->vdisplay; - primary_state->crtc_w = set->mode->hdisplay; + primary_state->crtc_h = vdisplay; + primary_state->crtc_w = hdisplay; primary_state->src_x = set->x << 16; primary_state->src_y = set->y << 16; if (primary_state->rotation & (BIT(DRM_ROTATE_90) | BIT(DRM_ROTATE_270))) { - primary_state->src_h = set->mode->hdisplay << 16; - primary_state->src_w = set->mode->vdisplay << 16; + primary_state->src_h = hdisplay << 16; + primary_state->src_w = vdisplay << 16; } else { - primary_state->src_h = set->mode->vdisplay << 16; - primary_state->src_w = set->mode->hdisplay << 16; + primary_state->src_h = vdisplay << 16; + primary_state->src_w = hdisplay << 16; } commit: From 76dc3769d7c3cdcfa7c4c7768a7cb87cd91af12f Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ville=20Syrj=C3=A4l=C3=A4?= Date: Wed, 11 Nov 2015 19:11:28 +0200 Subject: [PATCH 123/291] drm/i915: Don't clobber the addfb2 ioctl params MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit We try to convert the old way of of specifying fb tiling (obj->tiling) into the new fb modifiers. We store the result in the passed in mode_cmd structure. But that structure comes directly from the addfb2 ioctl, and gets copied back out to userspace, which means we're clobbering the modifiers that the user provided (all 0 since the DRM_MODE_FB_MODIFIERS flag wasn't even set by the user). Hence if the user reuses the struct for another addfb2, the ioctl will be rejected since it's now asking for some modifiers w/o the flag set. Fix the problem by making a copy of the user provided structure. We can play any games we want with the copy. IGT-Version: 1.12-git (x86_64) (Linux: 4.4.0-rc1-stereo+ x86_64) ... Subtest basic-X-tiled: SUCCESS (0.001s) Test assertion failure function pitch_tests, file kms_addfb_basic.c:167: Failed assertion: drmIoctl(fd, DRM_IOCTL_MODE_ADDFB2, &f) == 0 Last errno: 22, Invalid argument Stack trace: #0 [__igt_fail_assert+0x101] #1 [pitch_tests+0x619] #2 [__real_main426+0x2f] #3 [main+0x23] #4 [__libc_start_main+0xf0] #5 [_start+0x29] #6 [+0x29] Subtest framebuffer-vs-set-tiling failed. **** DEBUG **** Test assertion failure function pitch_tests, file kms_addfb_basic.c:167: Failed assertion: drmIoctl(fd, DRM_IOCTL_MODE_ADDFB2, &f) == 0 Last errno: 22, Invalid argument **** END **** Subtest framebuffer-vs-set-tiling: FAIL (0.003s) ... IGT-Version: 1.12-git (x86_64) (Linux: 4.4.0-rc1-stereo+ x86_64) Subtest framebuffer-vs-set-tiling: SUCCESS (0.000s) Cc: stable@vger.kernel.org # v4.1+ Cc: Daniel Vetter Cc: Tvrtko Ursulin Fixes: 2a80eada326f ("drm/i915: Add fb format modifier support") Testcase: igt/kms_addfb_basic/clobbered-modifier Signed-off-by: Ville Syrjälä Reviewed-by: Daniel Vetter Signed-off-by: Jani Nikula Link: http://patchwork.freedesktop.org/patch/msgid/1447261890-3960-1-git-send-email-ville.syrjala@linux.intel.com --- drivers/gpu/drm/i915/intel_display.c | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/drivers/gpu/drm/i915/intel_display.c b/drivers/gpu/drm/i915/intel_display.c index 442c1d220276..8f80bbeb1313 100644 --- a/drivers/gpu/drm/i915/intel_display.c +++ b/drivers/gpu/drm/i915/intel_display.c @@ -14369,16 +14369,17 @@ static int intel_framebuffer_init(struct drm_device *dev, static struct drm_framebuffer * intel_user_framebuffer_create(struct drm_device *dev, struct drm_file *filp, - struct drm_mode_fb_cmd2 *mode_cmd) + struct drm_mode_fb_cmd2 *user_mode_cmd) { struct drm_i915_gem_object *obj; + struct drm_mode_fb_cmd2 mode_cmd = *user_mode_cmd; obj = to_intel_bo(drm_gem_object_lookup(dev, filp, - mode_cmd->handles[0])); + mode_cmd.handles[0])); if (&obj->base == NULL) return ERR_PTR(-ENOENT); - return intel_framebuffer_create(dev, mode_cmd, obj); + return intel_framebuffer_create(dev, &mode_cmd, obj); } #ifndef CONFIG_DRM_FBDEV_EMULATION From de818bd4522c40ea02a81b387d2fa86f989c9623 Mon Sep 17 00:00:00 2001 From: Lorenzo Pieralisi Date: Tue, 17 Nov 2015 11:50:51 +0000 Subject: [PATCH 124/291] arm64: kernel: pause/unpause function graph tracer in cpu_suspend() The function graph tracer adds instrumentation that is required to trace both entry and exit of a function. In particular the function graph tracer updates the "return address" of a function in order to insert a trace callback on function exit. Kernel power management functions like cpu_suspend() are called upon power down entry with functions called "finishers" that are in turn called to trigger the power down sequence but they may not return to the kernel through the normal return path. When the core resumes from low-power it returns to the cpu_suspend() function through the cpu_resume path, which leaves the trace stack frame set-up by the function tracer in an incosistent state upon return to the kernel when tracing is enabled. This patch fixes the issue by pausing/resuming the function graph tracer on the thread executing cpu_suspend() (ie the function call that subsequently triggers the "suspend finishers"), so that the function graph tracer state is kept consistent across functions that enter power down states and never return by effectively disabling graph tracer while they are executing. Fixes: 819e50e25d0c ("arm64: Add ftrace support") Signed-off-by: Lorenzo Pieralisi Reported-by: Catalin Marinas Reported-by: AKASHI Takahiro Suggested-by: Steven Rostedt Acked-by: Steven Rostedt Cc: Will Deacon Cc: # 3.16+ Signed-off-by: Catalin Marinas --- arch/arm64/kernel/suspend.c | 10 ++++++++++ 1 file changed, 10 insertions(+) diff --git a/arch/arm64/kernel/suspend.c b/arch/arm64/kernel/suspend.c index fce95e17cf7f..1095aa483a1c 100644 --- a/arch/arm64/kernel/suspend.c +++ b/arch/arm64/kernel/suspend.c @@ -1,3 +1,4 @@ +#include #include #include #include @@ -70,6 +71,13 @@ int cpu_suspend(unsigned long arg, int (*fn)(unsigned long)) */ local_dbg_save(flags); + /* + * Function graph tracer state gets incosistent when the kernel + * calls functions that never return (aka suspend finishers) hence + * disable graph tracing during their execution. + */ + pause_graph_tracing(); + /* * mm context saved on the stack, it will be restored when * the cpu comes out of reset through the identity mapped @@ -111,6 +119,8 @@ int cpu_suspend(unsigned long arg, int (*fn)(unsigned long)) hw_breakpoint_restore(NULL); } + unpause_graph_tracing(); + /* * Restore pstate flags. OS lock and mdscr have been already * restored, so from this point onwards, debugging is fully From f16593034a30bcbef8f63c37d0f2b9e1a0902c2d Mon Sep 17 00:00:00 2001 From: Douglas Anderson Date: Tue, 20 Oct 2015 16:33:53 -0700 Subject: [PATCH 125/291] usb: dwc2: host: Fix ahbcfg for rk3066 The comment for ahbcfg for rk3066 parameters (also used for rk3288) claimed that ahbcfg was INCR16, but it wasn't. Since the bits weren't shifted properly, the 0x7 ended up being masked and we ended up programming 0x3 for the HBstLen. Let's set it to INCR16 properly. As per Wu Liang Feng at Rockchip this may increase transmission efficiency. I did blackbox tests with writing 0s to a USB-based SD reader (forcefully capping CPU Freq to try to measure efficiency): cd /sys/devices/system/cpu/cpu0/cpufreq echo userspace > scaling_governor echo 126000 > scaling_setspeed for i in $(seq 10); do dd if=/dev/zero of=/dev/sdb bs=1M count=750 done With the above tests I found that speeds went from ~15MB/s to ~18MB/s. Note that most other tests I did (including reading from the same USB reader) didn't show any difference in performance. Tested-by: Heiko Stuebner Acked-by: John Youn Reviewed-by: Liangfeng Wu Signed-off-by: Douglas Anderson Signed-off-by: Felipe Balbi --- drivers/usb/dwc2/platform.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/drivers/usb/dwc2/platform.c b/drivers/usb/dwc2/platform.c index 5859b0fa19ee..e61d773cf65e 100644 --- a/drivers/usb/dwc2/platform.c +++ b/drivers/usb/dwc2/platform.c @@ -108,7 +108,8 @@ static const struct dwc2_core_params params_rk3066 = { .host_ls_low_power_phy_clk = -1, .ts_dline = -1, .reload_ctl = -1, - .ahbcfg = 0x7, /* INCR16 */ + .ahbcfg = GAHBCFG_HBSTLEN_INCR16 << + GAHBCFG_HBSTLEN_SHIFT, .uframe_sched = -1, .external_id_pin_ctl = -1, .hibernation = -1, From 1fb7f12d5bec1b0a699c85d724bd5184a01fa2dd Mon Sep 17 00:00:00 2001 From: Douglas Anderson Date: Thu, 22 Oct 2015 13:05:03 -0700 Subject: [PATCH 126/291] usb: dwc2: host: Fix remote wakeup when not in DWC2_L2 In commit 734643dfbdde ("usb: dwc2: host: add flag to reflect bus state") we changed dwc2_port_suspend() not to set the lx_state anymore (instead it sets the new bus_suspended variable). This introduced a bug where we would fail to detect device insertions if: 1. Plug empty hub into dwc2 2. Plug USB flash drive into the empty hub. 3. Wait a few seconds 4. Unplug USB flash drive 5. Less than 2 seconds after step 4, plug the USB flash drive in again. The dwc2_hcd_rem_wakeup() function should have been changed to look at the new bus_suspended variable. Let's fix it. Since commit b46146d59fda ("usb: dwc2: host: resume root hub on remote wakeup") talks about needing the root hub resumed if the bus was suspended, we'll include it in our test. It appears that the "port_l1_change" should only be set to 1 if we were in DWC2_L1 (the driver currently never sets this), so we'll update the former "else" case based on this test. Fixes: 734643dfbdde ("usb: dwc2: host: add flag to reflect bus state") Acked-by: John Youn Tested-by: Gregory Herrero Reviewed-by: Heiko Stuebner Signed-off-by: Douglas Anderson Signed-off-by: Felipe Balbi --- drivers/usb/dwc2/hcd.c | 9 +++++---- 1 file changed, 5 insertions(+), 4 deletions(-) diff --git a/drivers/usb/dwc2/hcd.c b/drivers/usb/dwc2/hcd.c index e79baf73c234..571c21727ff9 100644 --- a/drivers/usb/dwc2/hcd.c +++ b/drivers/usb/dwc2/hcd.c @@ -324,12 +324,13 @@ void dwc2_hcd_disconnect(struct dwc2_hsotg *hsotg) */ static void dwc2_hcd_rem_wakeup(struct dwc2_hsotg *hsotg) { - if (hsotg->lx_state == DWC2_L2) { + if (hsotg->bus_suspended) { hsotg->flags.b.port_suspend_change = 1; usb_hcd_resume_root_hub(hsotg->priv); - } else { - hsotg->flags.b.port_l1_change = 1; } + + if (hsotg->lx_state == DWC2_L1) + hsotg->flags.b.port_l1_change = 1; } /** @@ -1428,8 +1429,8 @@ static void dwc2_wakeup_detected(unsigned long data) dev_dbg(hsotg->dev, "Clear Resume: HPRT0=%0x\n", dwc2_readl(hsotg->regs + HPRT0)); - hsotg->bus_suspended = 0; dwc2_hcd_rem_wakeup(hsotg); + hsotg->bus_suspended = 0; /* Change to L0 state */ hsotg->lx_state = DWC2_L0; From 5e216d54b4862611c663e1e3e8ca1db1930ef948 Mon Sep 17 00:00:00 2001 From: Peter Chen Date: Tue, 3 Nov 2015 15:57:38 +0800 Subject: [PATCH 127/291] usb: gadget: f_loopback: fix the warning during the enumeration The current code tries to allocate memory with GFP_KERNEL at interrupt context, it would show below warning during the enumeration when I test it with chipidea hardware, change GFP flag as GFP_ATOMIC can fix this issue. [ 40.438237] zero gadget: high-speed config #2: loopback [ 40.444924] ------------[ cut here ]------------ [ 40.449609] WARNING: CPU: 0 PID: 0 at kernel/locking/lockdep.c:2755 lockdep_trace_alloc+0x108/0x128() [ 40.461715] DEBUG_LOCKS_WARN_ON(irqs_disabled_flags(flags)) [ 40.467130] Modules linked in: [ 40.470216] usb_f_ss_lb g_zero libcomposite evbug [ 40.473822] CPU: 0 PID: 0 Comm: swapper/0 Not tainted 4.3.0-rc5-00168-gb730aaf #604 [ 40.481496] Hardware name: Freescale i.MX6 SoloX (Device Tree) [ 40.487345] Backtrace: [ 40.489857] [<80014e94>] (dump_backtrace) from [<80015088>] (show_stack+0x18/0x1c) [ 40.497445] r6:80b67a80 r5:00000000 r4:00000000 r3:00000000 [ 40.503234] [<80015070>] (show_stack) from [<802e27b4>] (dump_stack+0x8c/0xa4) [ 40.510503] [<802e2728>] (dump_stack) from [<8002cfe8>] (warn_slowpath_common+0x80/0xbc) [ 40.518612] r6:8007510c r5:00000009 r4:80b49c88 r3:00000001 [ 40.524396] [<8002cf68>] (warn_slowpath_common) from [<8002d05c>] (warn_slowpath_fmt+0x38/0x40) [ 40.533109] r8:bcfdef80 r7:bdb705cc r6:000080d0 r5:be001e80 r4:809cc278 [ 40.539965] [<8002d028>] (warn_slowpath_fmt) from [<8007510c>] (lockdep_trace_alloc+0x108/0x128) [ 40.548766] r3:809d0128 r2:809cc278 [ 40.552401] r4:600b0193 [ 40.554990] [<80075004>] (lockdep_trace_alloc) from [<801093d4>] (kmem_cache_alloc+0x28/0x15c) [ 40.563618] r4:000080d0 r3:80b4aa8c [ 40.567270] [<801093ac>] (kmem_cache_alloc) from [<804d95e4>] (ep_alloc_request+0x58/0x68) [ 40.575550] r10:7f01f104 r9:00000001 r8:bcfdef80 r7:bdb705cc r6:bc178700 r5:00000000 [ 40.583512] r4:bcfdef80 r3:813c0a38 [ 40.587183] [<804d958c>] (ep_alloc_request) from [<7f01f7ec>] (loopback_set_alt+0x114/0x21c [usb_f_ss_lb]) [ 40.596929] [<7f01f6d8>] (loopback_set_alt [usb_f_ss_lb]) from [<7f006910>] (composite_setup+0xbd0/0x17e8 [libcomposite]) [ 40.607902] r10:bd3a2c0c r9:00000000 r8:bcfdef80 r7:bc178700 r6:bdb702d0 r5:bcfdefdc [ 40.615866] r4:7f0199b4 r3:00000002 [ 40.619542] [<7f005d40>] (composite_setup [libcomposite]) from [<804dae88>] (udc_irq+0x784/0xd1c) [ 40.628431] r10:80bb5619 r9:c0876140 r8:00012001 r7:bdb71010 r6:bdb70568 r5:00010001 [ 40.636392] r4:bdb70014 [ 40.638985] [<804da704>] (udc_irq) from [<804d64f8>] (ci_irq+0x5c/0x118) [ 40.645702] r10:80bb5619 r9:be11e000 r8:00000117 r7:00000000 r6:bdb71010 r5:be11e060 [ 40.653666] r4:bdb70010 [ 40.656261] [<804d649c>] (ci_irq) from [<8007f638>] (handle_irq_event_percpu+0x7c/0x13c) [ 40.664367] r6:00000000 r5:be11e060 r4:bdb05cc0 r3:804d649c [ 40.670149] [<8007f5bc>] (handle_irq_event_percpu) from [<8007f740>] (handle_irq_event+0x48/0x6c) [ 40.679036] r10:00000000 r9:be008000 r8:00000001 r7:00000000 r6:bdb05cc0 r5:be11e060 [ 40.686998] r4:be11e000 [ 40.689581] [<8007f6f8>] (handle_irq_event) from [<80082850>] (handle_fasteoi_irq+0xd4/0x1b0) [ 40.698120] r6:80b56a30 r5:be11e060 r4:be11e000 r3:00000000 [ 40.703898] [<8008277c>] (handle_fasteoi_irq) from [<8007ec04>] (generic_handle_irq+0x28/0x3c) [ 40.712524] r7:00000000 r6:80b4aaf4 r5:00000117 r4:80b445fc [ 40.718304] [<8007ebdc>] (generic_handle_irq) from [<8007ef20>] (__handle_domain_irq+0x6c/0xe8) [ 40.727033] [<8007eeb4>] (__handle_domain_irq) from [<800095d4>] (gic_handle_irq+0x48/0x94) [ 40.735402] r9:c080f100 r8:80b4ac6c r7:c080e100 r6:80b67d40 r5:80b49f00 r4:c080e10c [ 40.743290] [<8000958c>] (gic_handle_irq) from [<80015d38>] (__irq_svc+0x58/0x78) [ 40.750791] Exception stack(0x80b49f00 to 0x80b49f48) [ 40.755873] 9f00: 00000001 00000001 00000000 80024320 80b48000 80b4a9d0 80b4a984 80b433e4 [ 40.764078] 9f20: 00000001 807f4680 00000000 80b49f5c 80b49f20 80b49f50 80071ca4 800113fc [ 40.772272] 9f40: 200b0013 ffffffff [ 40.775776] r9:807f4680 r8:00000001 r7:80b49f34 r6:ffffffff r5:200b0013 r4:800113fc [ 40.783677] [<800113d4>] (arch_cpu_idle) from [<8006c5bc>] (default_idle_call+0x28/0x38) [ 40.791798] [<8006c594>] (default_idle_call) from [<8006c6dc>] (cpu_startup_entry+0x110/0x1b0) [ 40.800445] [<8006c5cc>] (cpu_startup_entry) from [<807e95dc>] (rest_init+0x12c/0x168) [ 40.808376] r7:80b4a8c0 r3:807f4b7c [ 40.812030] [<807e94b0>] (rest_init) from [<80ad7cc0>] (start_kernel+0x360/0x3d4) [ 40.819528] r5:80bcb000 r4:80bcb050 [ 40.823171] [<80ad7960>] (start_kernel) from [<8000807c>] (0x8000807c) It fixes commit 91c42b0da8e3 ("usb: gadget: loopback: Fix looping back logic implementation"). Cc: # v3.18+ Signed-off-by: Peter Chen Reviewed-by: Krzysztof Opasiak Signed-off-by: Felipe Balbi --- drivers/usb/gadget/function/f_loopback.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/usb/gadget/function/f_loopback.c b/drivers/usb/gadget/function/f_loopback.c index 23933bdf2d9d..ddc3aad886b7 100644 --- a/drivers/usb/gadget/function/f_loopback.c +++ b/drivers/usb/gadget/function/f_loopback.c @@ -329,7 +329,7 @@ static int alloc_requests(struct usb_composite_dev *cdev, for (i = 0; i < loop->qlen && result == 0; i++) { result = -ENOMEM; - in_req = usb_ep_alloc_request(loop->in_ep, GFP_KERNEL); + in_req = usb_ep_alloc_request(loop->in_ep, GFP_ATOMIC); if (!in_req) goto fail; From 68fe05e2a45181ce02ab2698930b37f20487bd24 Mon Sep 17 00:00:00 2001 From: Bin Liu Date: Fri, 6 Nov 2015 12:08:56 -0600 Subject: [PATCH 128/291] usb: musb: fix tx fifo flush handling Here are a few changes in musb_h_tx_flush_fifo(). - It has been observed that sometimes (if not always) musb is unable to flush tx fifo during urb dequeue when disconnect a device. But it seems to be harmless, since the tx fifo flush is done again in musb_ep_program() when re-use the hw_ep. But the WARN() floods the console in the case when multiple tx urbs are queued, so change it to dev_WARN_ONCE(). - applications could queue up many tx urbs, then the 1ms delay could causes minutes of delay in device disconnect. So remove it to get better user experience. The 1ms delay does not help the flushing anyway. - cleanup the debug code - related to lastcsr. ---- Note: The tx fifo flush issue has been observed during device disconnect on AM335x. To reproduce the issue, ensure tx urb(s) are queued when unplug the usb device which is connected to AM335x usb host port. I found using a usb-ethernet device and running iperf (client on AM335x) has very high chance to trigger the problem. Better to turn on dev_dbg() in musb_cleanup_urb() with CPPI enabled to see the issue when aborting the tx channel. Signed-off-by: Bin Liu Signed-off-by: Felipe Balbi --- drivers/usb/musb/musb_host.c | 22 ++++++++++++++++------ 1 file changed, 16 insertions(+), 6 deletions(-) diff --git a/drivers/usb/musb/musb_host.c b/drivers/usb/musb/musb_host.c index 26c65e66cc0f..795a45b1b25b 100644 --- a/drivers/usb/musb/musb_host.c +++ b/drivers/usb/musb/musb_host.c @@ -112,22 +112,32 @@ static void musb_h_tx_flush_fifo(struct musb_hw_ep *ep) struct musb *musb = ep->musb; void __iomem *epio = ep->regs; u16 csr; - u16 lastcsr = 0; int retries = 1000; csr = musb_readw(epio, MUSB_TXCSR); while (csr & MUSB_TXCSR_FIFONOTEMPTY) { - if (csr != lastcsr) - dev_dbg(musb->controller, "Host TX FIFONOTEMPTY csr: %02x\n", csr); - lastcsr = csr; csr |= MUSB_TXCSR_FLUSHFIFO | MUSB_TXCSR_TXPKTRDY; musb_writew(epio, MUSB_TXCSR, csr); csr = musb_readw(epio, MUSB_TXCSR); - if (WARN(retries-- < 1, + + /* + * FIXME: sometimes the tx fifo flush failed, it has been + * observed during device disconnect on AM335x. + * + * To reproduce the issue, ensure tx urb(s) are queued when + * unplug the usb device which is connected to AM335x usb + * host port. + * + * I found using a usb-ethernet device and running iperf + * (client on AM335x) has very high chance to trigger it. + * + * Better to turn on dev_dbg() in musb_cleanup_urb() with + * CPPI enabled to see the issue when aborting the tx channel. + */ + if (dev_WARN_ONCE(musb->controller, retries-- < 1, "Could not flush host TX%d fifo: csr: %04x\n", ep->epnum, csr)) return; - mdelay(1); } } From b9e51b2b1fda19143f48d182ed7a2943f21e1ae4 Mon Sep 17 00:00:00 2001 From: Ben McCauley Date: Mon, 16 Nov 2015 10:47:24 -0600 Subject: [PATCH 129/291] usb: dwc3: gadget: let us set lower max_speed In some SoCs, dwc3 is implemented as a USB2.0 only core, meaning that it can't ever achieve SuperSpeed. Currect driver always sets gadget.max_speed to USB_SPEED_SUPER unconditionally. This can causes issues to some Host stacks where the host will issue a GetBOS() request and we will reply with a BOS containing Superspeed Capability Descriptor. At least Windows seems to be upset by this fact and prints a warning that we should connect $this device to another port. [ balbi@ti.com : rewrote entire commit, including source code comment to make a lot clearer what the problem is ] Cc: Signed-off-by: Ben McCauley Signed-off-by: Felipe Balbi --- drivers/usb/dwc3/gadget.c | 24 +++++++++++++++++++++++- 1 file changed, 23 insertions(+), 1 deletion(-) diff --git a/drivers/usb/dwc3/gadget.c b/drivers/usb/dwc3/gadget.c index 55ba447fdf8b..e24a01cc98df 100644 --- a/drivers/usb/dwc3/gadget.c +++ b/drivers/usb/dwc3/gadget.c @@ -2744,11 +2744,33 @@ int dwc3_gadget_init(struct dwc3 *dwc) } dwc->gadget.ops = &dwc3_gadget_ops; - dwc->gadget.max_speed = USB_SPEED_SUPER; dwc->gadget.speed = USB_SPEED_UNKNOWN; dwc->gadget.sg_supported = true; dwc->gadget.name = "dwc3-gadget"; + /* + * FIXME We might be setting max_speed to revision < DWC3_REVISION_220A) + dwc3_trace(trace_dwc3_gadget, + "Changing max_speed on rev %08x\n", + dwc->revision); + + dwc->gadget.max_speed = dwc->maximum_speed; + /* * Per databook, DWC3 needs buffer size to be aligned to MaxPacketSize * on ep out. From 89d99aea94a5bf8eae530cd77df40904d382946c Mon Sep 17 00:00:00 2001 From: LABBE Corentin Date: Thu, 12 Nov 2015 08:45:52 +0100 Subject: [PATCH 130/291] usb: phy: phy-mxs-usb: fix a possible NULL dereference of_match_device could return NULL, and so cause a NULL pointer dereference later. Signed-off-by: LABBE Corentin Signed-off-by: Felipe Balbi --- drivers/usb/phy/phy-mxs-usb.c | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/drivers/usb/phy/phy-mxs-usb.c b/drivers/usb/phy/phy-mxs-usb.c index 4d863ebc117c..b7536af777ab 100644 --- a/drivers/usb/phy/phy-mxs-usb.c +++ b/drivers/usb/phy/phy-mxs-usb.c @@ -452,10 +452,13 @@ static int mxs_phy_probe(struct platform_device *pdev) struct clk *clk; struct mxs_phy *mxs_phy; int ret; - const struct of_device_id *of_id = - of_match_device(mxs_phy_dt_ids, &pdev->dev); + const struct of_device_id *of_id; struct device_node *np = pdev->dev.of_node; + of_id = of_match_device(mxs_phy_dt_ids, &pdev->dev); + if (!of_id) + return -ENODEV; + res = platform_get_resource(pdev, IORESOURCE_MEM, 0); base = devm_ioremap_resource(&pdev->dev, res); if (IS_ERR(base)) From 51676c8d6daac6dd11f768cb26db8d5b572f65b4 Mon Sep 17 00:00:00 2001 From: Bin Liu Date: Fri, 13 Nov 2015 15:45:24 -0600 Subject: [PATCH 131/291] usb: musb: enable usb_dma parameter Change the permission of usb_dma parameter so it can be used for runtime debug without reboot. Signed-off-by: Bin Liu Signed-off-by: Felipe Balbi --- drivers/usb/musb/musb_core.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/usb/musb/musb_core.c b/drivers/usb/musb/musb_core.c index 3de9087ecad4..18cfc0a361cb 100644 --- a/drivers/usb/musb/musb_core.c +++ b/drivers/usb/musb/musb_core.c @@ -1668,7 +1668,7 @@ EXPORT_SYMBOL_GPL(musb_interrupt); static bool use_dma = 1; /* "modprobe ... use_dma=0" etc */ -module_param(use_dma, bool, 0); +module_param(use_dma, bool, 0644); MODULE_PARM_DESC(use_dma, "enable/disable use of DMA"); void musb_dma_completion(struct musb *musb, u8 epnum, u8 transmit) From d134c48d889ddceadf4c990e6f3df16b816ed5d4 Mon Sep 17 00:00:00 2001 From: Douglas Gilbert Date: Mon, 16 Nov 2015 19:22:08 +0100 Subject: [PATCH 132/291] usb: gadget: atmel_usba_udc: Expose correct device speed Following changes that appeared in lk 4.0.0, the gadget udc driver for some ARM based Atmel SoCs (e.g. at91sam9x5 and sama5d3 families) incorrectly deduced full-speed USB link speed even when the hardware had negotiated a high-speed link. The fix is to make sure that the UDPHS Interrupt Enable Register value does not mask the SPEED bit in the Interrupt Status Register. For a mass storage gadget this problem lead to failures when the host had a USB 3 port with the xhci_hcd driver. If the host was a USB 2 port using the ehci_hcd driver then the mass storage gadget worked (but probably at a lower speed than it should have). Signed-off-by: Douglas Gilbert Reviewed-by: Boris Brezillon Cc: #4.0+ Fixes: 9870d895ad87 ("usb: atmel_usba_udc: Mask status with enabled irqs") Signed-off-by: Felipe Balbi --- drivers/usb/gadget/udc/atmel_usba_udc.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/usb/gadget/udc/atmel_usba_udc.c b/drivers/usb/gadget/udc/atmel_usba_udc.c index f0f2b066ac08..f92f5aff0dd5 100644 --- a/drivers/usb/gadget/udc/atmel_usba_udc.c +++ b/drivers/usb/gadget/udc/atmel_usba_udc.c @@ -1633,7 +1633,7 @@ static irqreturn_t usba_udc_irq(int irq, void *devid) spin_lock(&udc->lock); int_enb = usba_int_enb_get(udc); - status = usba_readl(udc, INT_STA) & int_enb; + status = usba_readl(udc, INT_STA) & (int_enb | USBA_HIGH_SPEED); DBG(DBG_INT, "irq, status=%#08x\n", status); if (status & USBA_DET_SUSPEND) { From 5bbbfdf685657771fda05b926b28ca0f79163a28 Mon Sep 17 00:00:00 2001 From: Junichi Nomura Date: Tue, 17 Nov 2015 09:39:26 +0000 Subject: [PATCH 133/291] dm: fix ioctl retry termination with signal dm-mpath retries ioctl, when no path is readily available and the device is configured to queue I/O in such a case. If you want to stop the retry before multipathd decides to turn off queueing mode, you could send signal for the process to exit from the loop. However the check of fatal signal has not carried along when commit 6c182cd88d17 ("dm mpath: fix ioctl deadlock when no paths") moved the loop from dm-mpath to dm core. As a result, we can't terminate such a process in the retry loop. Easy reproducer of the situation is: # dmsetup create mp --table '0 1024 multipath 0 0 0 0' # dmsetup message mp 0 'queue_if_no_path' # sg_inq /dev/mapper/mp then you should be able to terminate sg_inq by pressing Ctrl+C. Fixes: 6c182cd88d17 ("dm mpath: fix ioctl deadlock when no paths") Signed-off-by: Jun'ichi Nomura Cc: Hannes Reinecke Cc: Mikulas Patocka Signed-off-by: Mike Snitzer Cc: stable@vger.kernel.org --- drivers/md/dm-mpath.c | 2 +- drivers/md/dm.c | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/md/dm-mpath.c b/drivers/md/dm-mpath.c index aaa6caa46a9f..3d1829237678 100644 --- a/drivers/md/dm-mpath.c +++ b/drivers/md/dm-mpath.c @@ -1562,7 +1562,7 @@ static int multipath_prepare_ioctl(struct dm_target *ti, spin_unlock_irqrestore(&m->lock, flags); - if (r == -ENOTCONN && !fatal_signal_pending(current)) { + if (r == -ENOTCONN) { spin_lock_irqsave(&m->lock, flags); if (!m->current_pg) { /* Path status changed, redo selection */ diff --git a/drivers/md/dm.c b/drivers/md/dm.c index 6e15f3565892..fabd5d8fd559 100644 --- a/drivers/md/dm.c +++ b/drivers/md/dm.c @@ -591,7 +591,7 @@ retry: out: dm_put_live_table(md, *srcu_idx); - if (r == -ENOTCONN) { + if (r == -ENOTCONN && !fatal_signal_pending(current)) { msleep(10); goto retry; } From 647a20d5cad7477033bc021ec9dd75edf4bbf9a0 Mon Sep 17 00:00:00 2001 From: Mike Snitzer Date: Tue, 17 Nov 2015 14:11:34 -0500 Subject: [PATCH 134/291] dm: do not reuse dm_blk_ioctl block_device input as local variable (Ab)using the @bdev passed to dm_blk_ioctl() opens the potential for targets' .prepare_ioctl to fail if they go on to check the bdev for !NULL. Fixes: e56f81e0b01e ("dm: refactor ioctl handling") Reported-by: Junichi Nomura Signed-off-by: Mike Snitzer --- drivers/md/dm.c | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/drivers/md/dm.c b/drivers/md/dm.c index fabd5d8fd559..5df40480228b 100644 --- a/drivers/md/dm.c +++ b/drivers/md/dm.c @@ -603,9 +603,10 @@ static int dm_blk_ioctl(struct block_device *bdev, fmode_t mode, { struct mapped_device *md = bdev->bd_disk->private_data; struct dm_target *tgt; + struct block_device *tgt_bdev = NULL; int srcu_idx, r; - r = dm_get_live_table_for_ioctl(md, &tgt, &bdev, &mode, &srcu_idx); + r = dm_get_live_table_for_ioctl(md, &tgt, &tgt_bdev, &mode, &srcu_idx); if (r < 0) return r; @@ -620,7 +621,7 @@ static int dm_blk_ioctl(struct block_device *bdev, fmode_t mode, goto out; } - r = __blkdev_driver_ioctl(bdev, mode, cmd, arg); + r = __blkdev_driver_ioctl(tgt_bdev, mode, cmd, arg); out: dm_put_live_table(md, srcu_idx); return r; From 43e43c9ea60a7a1831ec823773e924d2dadefd44 Mon Sep 17 00:00:00 2001 From: Junichi Nomura Date: Tue, 17 Nov 2015 09:36:56 +0000 Subject: [PATCH 135/291] dm mpath: fix infinite recursion in ioctl when no paths and !queue_if_no_path In multipath_prepare_ioctl(), - pgpath is a path selected from available paths - m->queue_io is true if we cannot send a request immediately to paths, either because: * there is no available path * the path group needs activation (pg_init) - pg_init is not started - pg_init is still running - m->queue_if_no_path is true if the device is configured to queue I/O if there are no available paths If !pgpath && !m->queue_if_no_path, the handler should return -EIO. However in the course of refactoring the condition check has broken and returns success in that case. Since bdev points to the dm device itself, dm_blk_ioctl() calls __blk_dev_driver_ioctl() for itself and recurses until crash. You could reproduce the problem like this: # dmsetup create mp --table '0 1024 multipath 0 0 0 0' # sg_inq /dev/mapper/mp [ 172.648615] BUG: unable to handle kernel paging request at fffffffc81b10268 [ 172.662843] PGD 19dd067 PUD 0 [ 172.666269] Thread overran stack, or stack corrupted [ 172.671808] Oops: 0000 [#1] SMP ... Fix the condition check with some clarifications. Fixes: e56f81e0b01e ("dm: refactor ioctl handling") Signed-off-by: Jun'ichi Nomura Cc: Christoph Hellwig Cc: Mike Snitzer Signed-off-by: Mike Snitzer --- drivers/md/dm-mpath.c | 28 +++++++++++++++------------- 1 file changed, 15 insertions(+), 13 deletions(-) diff --git a/drivers/md/dm-mpath.c b/drivers/md/dm-mpath.c index 3d1829237678..cfa29f574c2a 100644 --- a/drivers/md/dm-mpath.c +++ b/drivers/md/dm-mpath.c @@ -1537,29 +1537,31 @@ static int multipath_prepare_ioctl(struct dm_target *ti, struct block_device **bdev, fmode_t *mode) { struct multipath *m = ti->private; - struct pgpath *pgpath; unsigned long flags; int r; - r = 0; - spin_lock_irqsave(&m->lock, flags); if (!m->current_pgpath) __choose_pgpath(m, 0); - pgpath = m->current_pgpath; - - if (pgpath) { - *bdev = pgpath->path.dev->bdev; - *mode = pgpath->path.dev->mode; + if (m->current_pgpath) { + if (!m->queue_io) { + *bdev = m->current_pgpath->path.dev->bdev; + *mode = m->current_pgpath->path.dev->mode; + r = 0; + } else { + /* pg_init has not started or completed */ + r = -ENOTCONN; + } + } else { + /* No path is available */ + if (m->queue_if_no_path) + r = -ENOTCONN; + else + r = -EIO; } - if ((pgpath && m->queue_io) || (!pgpath && m->queue_if_no_path)) - r = -ENOTCONN; - else if (!*bdev) - r = -EIO; - spin_unlock_irqrestore(&m->lock, flags); if (r == -ENOTCONN) { From 91276ae2ea7d588cb70ed54fb0b00b732aa7ea04 Mon Sep 17 00:00:00 2001 From: kbuild test robot Date: Thu, 22 Oct 2015 11:12:26 +0800 Subject: [PATCH 136/291] drm/vc4: vc4_plane_duplicate_state() can be static Signed-off-by: Fengguang Wu Signed-off-by: Eric Anholt --- drivers/gpu/drm/vc4/vc4_plane.c | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/drivers/gpu/drm/vc4/vc4_plane.c b/drivers/gpu/drm/vc4/vc4_plane.c index cdd8b10c0147..2db509210d62 100644 --- a/drivers/gpu/drm/vc4/vc4_plane.c +++ b/drivers/gpu/drm/vc4/vc4_plane.c @@ -70,7 +70,7 @@ static bool plane_enabled(struct drm_plane_state *state) return state->fb && state->crtc; } -struct drm_plane_state *vc4_plane_duplicate_state(struct drm_plane *plane) +static struct drm_plane_state *vc4_plane_duplicate_state(struct drm_plane *plane) { struct vc4_plane_state *vc4_state; @@ -97,8 +97,8 @@ struct drm_plane_state *vc4_plane_duplicate_state(struct drm_plane *plane) return &vc4_state->base; } -void vc4_plane_destroy_state(struct drm_plane *plane, - struct drm_plane_state *state) +static void vc4_plane_destroy_state(struct drm_plane *plane, + struct drm_plane_state *state) { struct vc4_plane_state *vc4_state = to_vc4_plane_state(state); @@ -108,7 +108,7 @@ void vc4_plane_destroy_state(struct drm_plane *plane, } /* Called during init to allocate the plane's atomic state. */ -void vc4_plane_reset(struct drm_plane *plane) +static void vc4_plane_reset(struct drm_plane *plane) { struct vc4_plane_state *vc4_state; From e2ab3218f46bce3380189ba4ac2a8cbdb2ae6272 Mon Sep 17 00:00:00 2001 From: Julia Lawall Date: Fri, 23 Oct 2015 07:39:45 +0200 Subject: [PATCH 137/291] drm/vc4: fix platform_no_drv_owner.cocci warnings drivers/gpu/drm/vc4/vc4_drv.c:248:3-8: No need to set .owner here. The core will do it. Remove .owner field if calls are used which set it automatically Generated by: scripts/coccinelle/api/platform_no_drv_owner.cocci Signed-off-by: Fengguang Wu Signed-off-by: Julia Lawall Signed-off-by: Eric Anholt --- drivers/gpu/drm/vc4/vc4_drv.c | 1 - 1 file changed, 1 deletion(-) diff --git a/drivers/gpu/drm/vc4/vc4_drv.c b/drivers/gpu/drm/vc4/vc4_drv.c index 6e730605edcc..d5db9e0f3b73 100644 --- a/drivers/gpu/drm/vc4/vc4_drv.c +++ b/drivers/gpu/drm/vc4/vc4_drv.c @@ -259,7 +259,6 @@ static struct platform_driver vc4_platform_driver = { .remove = vc4_platform_drm_remove, .driver = { .name = "vc4-drm", - .owner = THIS_MODULE, .of_match_table = vc4_of_match, }, }; From 2fa8e90433cdc2d9f09883f45fd2942dc042d6a6 Mon Sep 17 00:00:00 2001 From: Julia Lawall Date: Fri, 23 Oct 2015 07:38:00 +0200 Subject: [PATCH 138/291] drm/vc4: fix itnull.cocci warnings Connector cannot be null because it is a list entry, ie accessed at an offset from the positions of the list structure pointers themselves. Generated by: scripts/coccinelle/iterators/itnull.cocci Signed-off-by: Fengguang Wu Signed-off-by: Julia Lawall Signed-off-by: Eric Anholt --- drivers/gpu/drm/vc4/vc4_crtc.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/gpu/drm/vc4/vc4_crtc.c b/drivers/gpu/drm/vc4/vc4_crtc.c index 7a9f4768591e..f794c5b6996b 100644 --- a/drivers/gpu/drm/vc4/vc4_crtc.c +++ b/drivers/gpu/drm/vc4/vc4_crtc.c @@ -168,7 +168,7 @@ static int vc4_get_clock_select(struct drm_crtc *crtc) struct drm_connector *connector; drm_for_each_connector(connector, crtc->dev) { - if (connector && connector->state->crtc == crtc) { + if (connector->state->crtc == crtc) { struct drm_encoder *encoder = connector->encoder; struct vc4_encoder *vc4_encoder = to_vc4_encoder(encoder); From 79513237da7ecc0981e7d962a784625b32cabfdc Mon Sep 17 00:00:00 2001 From: Dan Carpenter Date: Wed, 4 Nov 2015 16:21:40 +0300 Subject: [PATCH 139/291] drm/vc4: checking for NULL instead of IS_ERR vc4_plane_init() returns an ERR_PTR on error, it doesn't return NULL. This was obviously intended because the next lines call PTR_ERR(primary_plane) already. Fixes: c8b75bca92cb ('Eric Anholt ') Signed-off-by: Dan Carpenter Signed-off-by: Eric Anholt --- drivers/gpu/drm/vc4/vc4_crtc.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/gpu/drm/vc4/vc4_crtc.c b/drivers/gpu/drm/vc4/vc4_crtc.c index f794c5b6996b..f77fa9ebab29 100644 --- a/drivers/gpu/drm/vc4/vc4_crtc.c +++ b/drivers/gpu/drm/vc4/vc4_crtc.c @@ -591,14 +591,14 @@ static int vc4_crtc_bind(struct device *dev, struct device *master, void *data) * that will take too much. */ primary_plane = vc4_plane_init(drm, DRM_PLANE_TYPE_PRIMARY); - if (!primary_plane) { + if (IS_ERR(primary_plane)) { dev_err(dev, "failed to construct primary plane\n"); ret = PTR_ERR(primary_plane); goto err; } cursor_plane = vc4_plane_init(drm, DRM_PLANE_TYPE_CURSOR); - if (!cursor_plane) { + if (IS_ERR(cursor_plane)) { dev_err(dev, "failed to construct cursor plane\n"); ret = PTR_ERR(cursor_plane); goto err_primary; From 36f4f69a092d766581f2442ddffea051e1ef464e Mon Sep 17 00:00:00 2001 From: Eric Anholt Date: Fri, 23 Oct 2015 10:24:11 +0100 Subject: [PATCH 140/291] drm/vc4: Fix some failure to track __iomem decorations on pointers. Caught by the kbuild test robot. v2: Fix the +i I dropped in the first version. Signed-off-by: Eric Anholt --- drivers/gpu/drm/vc4/vc4_crtc.c | 3 ++- drivers/gpu/drm/vc4/vc4_hvs.c | 8 ++++---- 2 files changed, 6 insertions(+), 5 deletions(-) diff --git a/drivers/gpu/drm/vc4/vc4_crtc.c b/drivers/gpu/drm/vc4/vc4_crtc.c index f77fa9ebab29..265064c62d49 100644 --- a/drivers/gpu/drm/vc4/vc4_crtc.c +++ b/drivers/gpu/drm/vc4/vc4_crtc.c @@ -401,7 +401,8 @@ static void vc4_crtc_atomic_flush(struct drm_crtc *crtc, dlist_next++; HVS_WRITE(SCALER_DISPLISTX(vc4_crtc->channel), - (u32 *)vc4_crtc->dlist - (u32 *)vc4->hvs->dlist); + (u32 __iomem *)vc4_crtc->dlist - + (u32 __iomem *)vc4->hvs->dlist); /* Make the next display list start after ours. */ vc4_crtc->dlist_size -= (dlist_next - vc4_crtc->dlist); diff --git a/drivers/gpu/drm/vc4/vc4_hvs.c b/drivers/gpu/drm/vc4/vc4_hvs.c index ab1673f672a4..8098c5b21ba4 100644 --- a/drivers/gpu/drm/vc4/vc4_hvs.c +++ b/drivers/gpu/drm/vc4/vc4_hvs.c @@ -75,10 +75,10 @@ void vc4_hvs_dump_state(struct drm_device *dev) for (i = 0; i < 64; i += 4) { DRM_INFO("0x%08x (%s): 0x%08x 0x%08x 0x%08x 0x%08x\n", i * 4, i < HVS_BOOTLOADER_DLIST_END ? "B" : "D", - ((uint32_t *)vc4->hvs->dlist)[i + 0], - ((uint32_t *)vc4->hvs->dlist)[i + 1], - ((uint32_t *)vc4->hvs->dlist)[i + 2], - ((uint32_t *)vc4->hvs->dlist)[i + 3]); + readl((u32 __iomem *)vc4->hvs->dlist + i + 0), + readl((u32 __iomem *)vc4->hvs->dlist + i + 1), + readl((u32 __iomem *)vc4->hvs->dlist + i + 2), + readl((u32 __iomem *)vc4->hvs->dlist + i + 3)); } } From bf893acc1675516fabf238a5ee3b52859c5687dc Mon Sep 17 00:00:00 2001 From: Eric Anholt Date: Fri, 23 Oct 2015 10:36:27 +0100 Subject: [PATCH 141/291] drm/vc4: Make sure that planes aren't scaled. We would scan out the memory around them if an upscale was attempted, and would just scan out incorrectly for downscaling. Signed-off-by: Eric Anholt --- drivers/gpu/drm/vc4/vc4_plane.c | 10 ++++++++++ 1 file changed, 10 insertions(+) diff --git a/drivers/gpu/drm/vc4/vc4_plane.c b/drivers/gpu/drm/vc4/vc4_plane.c index 2db509210d62..887f3caad0be 100644 --- a/drivers/gpu/drm/vc4/vc4_plane.c +++ b/drivers/gpu/drm/vc4/vc4_plane.c @@ -157,6 +157,16 @@ static int vc4_plane_mode_set(struct drm_plane *plane, int crtc_w = state->crtc_w; int crtc_h = state->crtc_h; + if (state->crtc_w << 16 != state->src_w || + state->crtc_h << 16 != state->src_h) { + /* We don't support scaling yet, which involves + * allocating the LBM memory for scaling temporary + * storage, and putting filter kernels in the HVS + * context. + */ + return -EINVAL; + } + if (crtc_x < 0) { offset += drm_format_plane_cpp(fb->pixel_format, 0) * -crtc_x; crtc_w += crtc_x; From 1ef0623371e0a39a476fb05e575089cf48178f5c Mon Sep 17 00:00:00 2001 From: Mauro Carvalho Chehab Date: Tue, 17 Nov 2015 13:29:49 -0200 Subject: [PATCH 142/291] kernel-doc: Make it compatible with Perl versions below 5.12 again Changeset 4d73270192ec('scripts/kernel-doc: Replacing highlights hash by an array') broke compatibility of the kernel-doc script with older versions of perl by using "keys ARRAY" syntax with is available only on Perl 5.12 or newer, according with: http://perldoc.perl.org/functions/keys.html Restore backward compatibility by replacing "foreach my $k (keys ARRAY)" by a C-like variant: "for (my $k = 0; $k < !ARRAY; $k++)" Signed-off-by: Mauro Carvalho Chehab Signed-off-by: Jonathan Corbet --- scripts/kernel-doc | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/scripts/kernel-doc b/scripts/kernel-doc index 125b906cd1d4..638a38e1b419 100755 --- a/scripts/kernel-doc +++ b/scripts/kernel-doc @@ -2711,7 +2711,7 @@ $kernelversion = get_kernel_version(); # generate a sequence of code that will splice in highlighting information # using the s// operator. -foreach my $k (keys @highlights) { +for (my $k = 0; $k < @highlights; $k++) { my $pattern = $highlights[$k][0]; my $result = $highlights[$k][1]; # print STDERR "scanning pattern:$pattern, highlight:($result)\n"; From ae3e57ae26cdcc85728bb566f999bcb9a7cc6954 Mon Sep 17 00:00:00 2001 From: Peter Chen Date: Wed, 16 Sep 2015 09:40:51 +0800 Subject: [PATCH 143/291] usb: chipidea: imx: refine clock operations to adapt for all platforms Some i.mx platforms need three clocks to let controller work, but others only need one, refine clock operation to adapt for all platforms, it fixes a regression found at i.mx27. Signed-off-by: Peter Chen Tested-by: Fabio Estevam Cc: #v4.1+ --- drivers/usb/chipidea/ci_hdrc_imx.c | 133 ++++++++++++++++++++++++----- 1 file changed, 114 insertions(+), 19 deletions(-) diff --git a/drivers/usb/chipidea/ci_hdrc_imx.c b/drivers/usb/chipidea/ci_hdrc_imx.c index 6ccbf60cdd5c..7c9b63064ea0 100644 --- a/drivers/usb/chipidea/ci_hdrc_imx.c +++ b/drivers/usb/chipidea/ci_hdrc_imx.c @@ -84,6 +84,12 @@ struct ci_hdrc_imx_data { struct imx_usbmisc_data *usbmisc_data; bool supports_runtime_pm; bool in_lpm; + /* SoC before i.mx6 (except imx23/imx28) needs three clks */ + bool need_three_clks; + struct clk *clk_ipg; + struct clk *clk_ahb; + struct clk *clk_per; + /* --------------------------------- */ }; /* Common functions shared by usbmisc drivers */ @@ -135,6 +141,102 @@ static struct imx_usbmisc_data *usbmisc_get_init_data(struct device *dev) } /* End of common functions shared by usbmisc drivers*/ +static int imx_get_clks(struct device *dev) +{ + struct ci_hdrc_imx_data *data = dev_get_drvdata(dev); + int ret = 0; + + data->clk_ipg = devm_clk_get(dev, "ipg"); + if (IS_ERR(data->clk_ipg)) { + /* If the platform only needs one clocks */ + data->clk = devm_clk_get(dev, NULL); + if (IS_ERR(data->clk)) { + ret = PTR_ERR(data->clk); + dev_err(dev, + "Failed to get clks, err=%ld,%ld\n", + PTR_ERR(data->clk), PTR_ERR(data->clk_ipg)); + return ret; + } + return ret; + } + + data->clk_ahb = devm_clk_get(dev, "ahb"); + if (IS_ERR(data->clk_ahb)) { + ret = PTR_ERR(data->clk_ahb); + dev_err(dev, + "Failed to get ahb clock, err=%d\n", ret); + return ret; + } + + data->clk_per = devm_clk_get(dev, "per"); + if (IS_ERR(data->clk_per)) { + ret = PTR_ERR(data->clk_per); + dev_err(dev, + "Failed to get per clock, err=%d\n", ret); + return ret; + } + + data->need_three_clks = true; + return ret; +} + +static int imx_prepare_enable_clks(struct device *dev) +{ + struct ci_hdrc_imx_data *data = dev_get_drvdata(dev); + int ret = 0; + + if (data->need_three_clks) { + ret = clk_prepare_enable(data->clk_ipg); + if (ret) { + dev_err(dev, + "Failed to prepare/enable ipg clk, err=%d\n", + ret); + return ret; + } + + ret = clk_prepare_enable(data->clk_ahb); + if (ret) { + dev_err(dev, + "Failed to prepare/enable ahb clk, err=%d\n", + ret); + clk_disable_unprepare(data->clk_ipg); + return ret; + } + + ret = clk_prepare_enable(data->clk_per); + if (ret) { + dev_err(dev, + "Failed to prepare/enable per clk, err=%d\n", + ret); + clk_disable_unprepare(data->clk_ahb); + clk_disable_unprepare(data->clk_ipg); + return ret; + } + } else { + ret = clk_prepare_enable(data->clk); + if (ret) { + dev_err(dev, + "Failed to prepare/enable clk, err=%d\n", + ret); + return ret; + } + } + + return ret; +} + +static void imx_disable_unprepare_clks(struct device *dev) +{ + struct ci_hdrc_imx_data *data = dev_get_drvdata(dev); + + if (data->need_three_clks) { + clk_disable_unprepare(data->clk_per); + clk_disable_unprepare(data->clk_ahb); + clk_disable_unprepare(data->clk_ipg); + } else { + clk_disable_unprepare(data->clk); + } +} static int ci_hdrc_imx_probe(struct platform_device *pdev) { @@ -153,23 +255,18 @@ static int ci_hdrc_imx_probe(struct platform_device *pdev) if (!data) return -ENOMEM; + platform_set_drvdata(pdev, data); data->usbmisc_data = usbmisc_get_init_data(&pdev->dev); if (IS_ERR(data->usbmisc_data)) return PTR_ERR(data->usbmisc_data); - data->clk = devm_clk_get(&pdev->dev, NULL); - if (IS_ERR(data->clk)) { - dev_err(&pdev->dev, - "Failed to get clock, err=%ld\n", PTR_ERR(data->clk)); - return PTR_ERR(data->clk); - } - - ret = clk_prepare_enable(data->clk); - if (ret) { - dev_err(&pdev->dev, - "Failed to prepare or enable clock, err=%d\n", ret); + ret = imx_get_clks(&pdev->dev); + if (ret) + return ret; + + ret = imx_prepare_enable_clks(&pdev->dev); + if (ret) return ret; - } data->phy = devm_usb_get_phy_by_phandle(&pdev->dev, "fsl,usbphy", 0); if (IS_ERR(data->phy)) { @@ -212,8 +309,6 @@ static int ci_hdrc_imx_probe(struct platform_device *pdev) goto disable_device; } - platform_set_drvdata(pdev, data); - if (data->supports_runtime_pm) { pm_runtime_set_active(&pdev->dev); pm_runtime_enable(&pdev->dev); @@ -226,7 +321,7 @@ static int ci_hdrc_imx_probe(struct platform_device *pdev) disable_device: ci_hdrc_remove_device(data->ci_pdev); err_clk: - clk_disable_unprepare(data->clk); + imx_disable_unprepare_clks(&pdev->dev); return ret; } @@ -240,7 +335,7 @@ static int ci_hdrc_imx_remove(struct platform_device *pdev) pm_runtime_put_noidle(&pdev->dev); } ci_hdrc_remove_device(data->ci_pdev); - clk_disable_unprepare(data->clk); + imx_disable_unprepare_clks(&pdev->dev); return 0; } @@ -252,7 +347,7 @@ static int imx_controller_suspend(struct device *dev) dev_dbg(dev, "at %s\n", __func__); - clk_disable_unprepare(data->clk); + imx_disable_unprepare_clks(dev); data->in_lpm = true; return 0; @@ -270,7 +365,7 @@ static int imx_controller_resume(struct device *dev) return 0; } - ret = clk_prepare_enable(data->clk); + ret = imx_prepare_enable_clks(dev); if (ret) return ret; @@ -285,7 +380,7 @@ static int imx_controller_resume(struct device *dev) return 0; clk_disable: - clk_disable_unprepare(data->clk); + imx_disable_unprepare_clks(dev); return ret; } From facf47ee6b4d07d43c3bfd6f0762f1b28f64703a Mon Sep 17 00:00:00 2001 From: Peter Chen Date: Wed, 16 Sep 2015 09:35:06 +0800 Subject: [PATCH 144/291] ARM: dts: imx27.dtsi: change the clock information for usb For imx27, it needs three clocks to let the controller work, the old code is wrong, and usbmisc has not included clock handling code any more. Without this patch, it will cause below data abort when accessing usbmisc registers. usbcore: registered new interface driver usb-storage Unhandled fault: external abort on non-linefetch (0x008) at 0xf4424600 pgd = c0004000 [f4424600] *pgd=10000452(bad) Internal error: : 8 [#1] PREEMPT ARM Modules linked in: CPU: 0 PID: 1 Comm: swapper Not tainted 4.1.0-next-20150701-dirty #3089 Hardware name: Freescale i.MX27 (Device Tree Support) task: c7832b60 ti: c783e000 task.ti: c783e000 PC is at usbmisc_imx27_init+0x4c/0xbc LR is at usbmisc_imx27_init+0x40/0xbc pc : [] lr : [] psr: 60000093 sp : c783fe08 ip : 00000000 fp : 00000000 r10: c0576434 r9 : 0000009c r8 : c7a773a0 r7 : 01000000 r6 : 60000013 r5 : c7a776f0 r4 : c7a773f0 r3 : f4424600 r2 : 00000000 r1 : 00000001 r0 : 00000001 Flags: nZCv IRQs off FIQs on Mode SVC_32 ISA ARM Segment kernel Control: 0005317f Table: a0004000 DAC: 00000017 Process swapper (pid: 1, stack limit = 0xc783e190) Stack: (0xc783fe08 to 0xc7840000) Signed-off-by: Peter Chen Reported-by: Fabio Estevam Tested-by: Fabio Estevam Cc: #v4.1+ Acked-by: Shawn Guo --- arch/arm/boot/dts/imx27.dtsi | 16 ++++++++++++---- 1 file changed, 12 insertions(+), 4 deletions(-) diff --git a/arch/arm/boot/dts/imx27.dtsi b/arch/arm/boot/dts/imx27.dtsi index feb9d34b239c..f818ea483aeb 100644 --- a/arch/arm/boot/dts/imx27.dtsi +++ b/arch/arm/boot/dts/imx27.dtsi @@ -486,7 +486,10 @@ compatible = "fsl,imx27-usb"; reg = <0x10024000 0x200>; interrupts = <56>; - clocks = <&clks IMX27_CLK_USB_IPG_GATE>; + clocks = <&clks IMX27_CLK_USB_IPG_GATE>, + <&clks IMX27_CLK_USB_AHB_GATE>, + <&clks IMX27_CLK_USB_DIV>; + clock-names = "ipg", "ahb", "per"; fsl,usbmisc = <&usbmisc 0>; status = "disabled"; }; @@ -495,7 +498,10 @@ compatible = "fsl,imx27-usb"; reg = <0x10024200 0x200>; interrupts = <54>; - clocks = <&clks IMX27_CLK_USB_IPG_GATE>; + clocks = <&clks IMX27_CLK_USB_IPG_GATE>, + <&clks IMX27_CLK_USB_AHB_GATE>, + <&clks IMX27_CLK_USB_DIV>; + clock-names = "ipg", "ahb", "per"; fsl,usbmisc = <&usbmisc 1>; dr_mode = "host"; status = "disabled"; @@ -505,7 +511,10 @@ compatible = "fsl,imx27-usb"; reg = <0x10024400 0x200>; interrupts = <55>; - clocks = <&clks IMX27_CLK_USB_IPG_GATE>; + clocks = <&clks IMX27_CLK_USB_IPG_GATE>, + <&clks IMX27_CLK_USB_AHB_GATE>, + <&clks IMX27_CLK_USB_DIV>; + clock-names = "ipg", "ahb", "per"; fsl,usbmisc = <&usbmisc 2>; dr_mode = "host"; status = "disabled"; @@ -515,7 +524,6 @@ #index-cells = <1>; compatible = "fsl,imx27-usbmisc"; reg = <0x10024600 0x200>; - clocks = <&clks IMX27_CLK_USB_AHB_GATE>; }; sahara2: sahara@10025000 { From 251b3c8b57481bcecd3f753108e36e7389ce12ac Mon Sep 17 00:00:00 2001 From: Li Jun Date: Tue, 13 Oct 2015 18:23:31 +0800 Subject: [PATCH 145/291] usb: chipidea: debug: disable usb irq while role switch Since the ci->role will be set after the host role start is complete, there will be nobody cared irq during start host if usb irq enabled. This error can be reproduced on i.mx6 sololite EVK board by: 1. disable otg id irq(IDIE) and disable all real otg properties of usbotg1 in dts. 2. boot up the board with ID cable and usb device connected. 3. echo gadget > /sys/kernel/debug/ci_hdrc.0/role 4. echo host > /sys/kernel/debug/ci_hdrc.0/role 5. irq 212: nobody cared. Cc: # v3.10+ Signed-off-by: Li Jun Signed-off-by: Peter Chen --- drivers/usb/chipidea/debug.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/drivers/usb/chipidea/debug.c b/drivers/usb/chipidea/debug.c index 080b7be3daf0..58c8485a0715 100644 --- a/drivers/usb/chipidea/debug.c +++ b/drivers/usb/chipidea/debug.c @@ -322,8 +322,10 @@ static ssize_t ci_role_write(struct file *file, const char __user *ubuf, return -EINVAL; pm_runtime_get_sync(ci->dev); + disable_irq(ci->irq); ci_role_stop(ci); ret = ci_role_start(ci, role); + enable_irq(ci->irq); pm_runtime_put_sync(ci->dev); return ret ? ret : count; From 85da852df66e5e0d3aba761b0fece7c958ff0685 Mon Sep 17 00:00:00 2001 From: Li Jun Date: Fri, 12 Dec 2014 09:11:42 +0800 Subject: [PATCH 146/291] usb: chipidea: otg: gadget module load and unload support This patch is to support load and unload gadget driver in full OTG mode. Signed-off-by: Li Jun Signed-off-by: Peter Chen Tested-by: Jiada Wang Cc: #v4.0+ --- drivers/usb/chipidea/udc.c | 17 +++++++++++++++++ 1 file changed, 17 insertions(+) diff --git a/drivers/usb/chipidea/udc.c b/drivers/usb/chipidea/udc.c index 8223fe73ea85..391a1225b0ba 100644 --- a/drivers/usb/chipidea/udc.c +++ b/drivers/usb/chipidea/udc.c @@ -1751,6 +1751,22 @@ static int ci_udc_start(struct usb_gadget *gadget, return retval; } +static void ci_udc_stop_for_otg_fsm(struct ci_hdrc *ci) +{ + if (!ci_otg_is_fsm_mode(ci)) + return; + + mutex_lock(&ci->fsm.lock); + if (ci->fsm.otg->state == OTG_STATE_A_PERIPHERAL) { + ci->fsm.a_bidl_adis_tmout = 1; + ci_hdrc_otg_fsm_start(ci); + } else if (ci->fsm.otg->state == OTG_STATE_B_PERIPHERAL) { + ci->fsm.protocol = PROTO_UNDEF; + ci->fsm.otg->state = OTG_STATE_UNDEFINED; + } + mutex_unlock(&ci->fsm.lock); +} + /** * ci_udc_stop: unregister a gadget driver */ @@ -1775,6 +1791,7 @@ static int ci_udc_stop(struct usb_gadget *gadget) ci->driver = NULL; spin_unlock_irqrestore(&ci->lock, flags); + ci_udc_stop_for_otg_fsm(ci); return 0; } From 090bc267ea1013bbb33778b343b4acd78b9c5d9b Mon Sep 17 00:00:00 2001 From: LABBE Corentin Date: Thu, 12 Nov 2015 08:43:33 +0100 Subject: [PATCH 147/291] usb: chipidea: usbmisc_imx: fix a possible NULL dereference of_match_device could return NULL, and so cause a NULL pointer dereference later. Renaming tmp_dev to of_id (like all others do) in the process. Reported-by: coverity (CID 1324135) Signed-off-by: LABBE Corentin Signed-off-by: Peter Chen --- drivers/usb/chipidea/usbmisc_imx.c | 10 ++++++---- 1 file changed, 6 insertions(+), 4 deletions(-) diff --git a/drivers/usb/chipidea/usbmisc_imx.c b/drivers/usb/chipidea/usbmisc_imx.c index fcea4eb36eee..ab8b027e8cc8 100644 --- a/drivers/usb/chipidea/usbmisc_imx.c +++ b/drivers/usb/chipidea/usbmisc_imx.c @@ -500,7 +500,11 @@ static int usbmisc_imx_probe(struct platform_device *pdev) { struct resource *res; struct imx_usbmisc *data; - struct of_device_id *tmp_dev; + const struct of_device_id *of_id; + + of_id = of_match_device(usbmisc_imx_dt_ids, &pdev->dev); + if (!of_id) + return -ENODEV; data = devm_kzalloc(&pdev->dev, sizeof(*data), GFP_KERNEL); if (!data) @@ -513,9 +517,7 @@ static int usbmisc_imx_probe(struct platform_device *pdev) if (IS_ERR(data->base)) return PTR_ERR(data->base); - tmp_dev = (struct of_device_id *) - of_match_device(usbmisc_imx_dt_ids, &pdev->dev); - data->ops = (const struct usbmisc_ops *)tmp_dev->data; + data->ops = (const struct usbmisc_ops *)of_id->data; platform_set_drvdata(pdev, data); return 0; From 6f51bc340d2a1c71a2409f80f3e60fe2c44e35ae Mon Sep 17 00:00:00 2001 From: LABBE Corentin Date: Thu, 12 Nov 2015 08:43:34 +0100 Subject: [PATCH 148/291] usb: chipidea: imx: fix a possible NULL dereference of_match_device could return NULL, and so cause a NULL pointer dereference later. Reported-by: coverity (CID 1324138) Signed-off-by: LABBE Corentin Signed-off-by: Peter Chen --- drivers/usb/chipidea/ci_hdrc_imx.c | 11 ++++++++--- 1 file changed, 8 insertions(+), 3 deletions(-) diff --git a/drivers/usb/chipidea/ci_hdrc_imx.c b/drivers/usb/chipidea/ci_hdrc_imx.c index 7c9b63064ea0..5a048b7b92e8 100644 --- a/drivers/usb/chipidea/ci_hdrc_imx.c +++ b/drivers/usb/chipidea/ci_hdrc_imx.c @@ -247,9 +247,14 @@ static int ci_hdrc_imx_probe(struct platform_device *pdev) .flags = CI_HDRC_SET_NON_ZERO_TTHA, }; int ret; - const struct of_device_id *of_id = - of_match_device(ci_hdrc_imx_dt_ids, &pdev->dev); - const struct ci_hdrc_imx_platform_flag *imx_platform_flag = of_id->data; + const struct of_device_id *of_id; + const struct ci_hdrc_imx_platform_flag *imx_platform_flag; + + of_id = of_match_device(ci_hdrc_imx_dt_ids, &pdev->dev); + if (!of_id) + return -ENODEV; + + imx_platform_flag = of_id->data; data = devm_kzalloc(&pdev->dev, sizeof(*data), GFP_KERNEL); if (!data) From 0f94592efd36213c961145fe1ab0c3bc323ec053 Mon Sep 17 00:00:00 2001 From: Mika Kuoppala Date: Tue, 17 Nov 2015 18:14:26 +0200 Subject: [PATCH 149/291] drm/i915: Fix gpu frequency change tracing MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit With gen < 9 we have had always 50Mhz units as our hw ratio. With gen >= 9 the hw ratio changed to 16.667Mhz (50/3). The result was that our gpu frequency tracing started to output values 3 times larger than expected due to hardcoded scaling value. Fix this by using Use intel_gpu_freq() when generating Mhz value from ratio for 'intel_gpu_freq_change' trace event. Bugzilla: https://bugs.freedesktop.org/show_bug.cgi?id=92591 Cc: stable@vger.kernel.org # v4.3+ Reported-by: Eero Tamminen Signed-off-by: Mika Kuoppala Reviewed-by: Ville Syrjälä Signed-off-by: Jani Nikula Link: http://patchwork.freedesktop.org/patch/msgid/1447776866-29384-1-git-send-email-mika.kuoppala@intel.com --- drivers/gpu/drm/i915/intel_pm.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/gpu/drm/i915/intel_pm.c b/drivers/gpu/drm/i915/intel_pm.c index 7b47da805685..071a76b9ac52 100644 --- a/drivers/gpu/drm/i915/intel_pm.c +++ b/drivers/gpu/drm/i915/intel_pm.c @@ -4449,7 +4449,7 @@ static void gen6_set_rps(struct drm_device *dev, u8 val) POSTING_READ(GEN6_RPNSWREQ); dev_priv->rps.cur_freq = val; - trace_intel_gpu_freq_change(val * 50); + trace_intel_gpu_freq_change(intel_gpu_freq(dev_priv, val)); } static void valleyview_set_rps(struct drm_device *dev, u8 val) From 65da0a8e34a857f2ba9ccb91dc8f8f964cf938b7 Mon Sep 17 00:00:00 2001 From: Ard Biesheuvel Date: Tue, 17 Nov 2015 09:53:31 +0100 Subject: [PATCH 150/291] arm64: use non-global mappings for UEFI runtime regions As pointed out by Russell King in response to the proposed ARM version of this code, the sequence to switch between the UEFI runtime mapping and current's actual userland mapping (and vice versa) is potentially unsafe, since it leaves a time window between the switch to the new page tables and the TLB flush where speculative accesses may hit on stale global TLB entries. So instead, use non-global mappings, and perform the switch via the ordinary ASID-aware context switch routines. Signed-off-by: Ard Biesheuvel Acked-by: Will Deacon Reviewed-by: Mark Rutland Signed-off-by: Catalin Marinas --- arch/arm64/include/asm/mmu_context.h | 2 +- arch/arm64/kernel/efi.c | 14 +++++--------- 2 files changed, 6 insertions(+), 10 deletions(-) diff --git a/arch/arm64/include/asm/mmu_context.h b/arch/arm64/include/asm/mmu_context.h index c0e87898ba96..24165784b803 100644 --- a/arch/arm64/include/asm/mmu_context.h +++ b/arch/arm64/include/asm/mmu_context.h @@ -101,7 +101,7 @@ static inline void cpu_set_default_tcr_t0sz(void) #define destroy_context(mm) do { } while(0) void check_and_switch_context(struct mm_struct *mm, unsigned int cpu); -#define init_new_context(tsk,mm) ({ atomic64_set(&mm->context.id, 0); 0; }) +#define init_new_context(tsk,mm) ({ atomic64_set(&(mm)->context.id, 0); 0; }) /* * This is called when "tsk" is about to enter lazy TLB mode. diff --git a/arch/arm64/kernel/efi.c b/arch/arm64/kernel/efi.c index de46b50f4cdf..fc5508e0df57 100644 --- a/arch/arm64/kernel/efi.c +++ b/arch/arm64/kernel/efi.c @@ -224,6 +224,8 @@ static bool __init efi_virtmap_init(void) { efi_memory_desc_t *md; + init_new_context(NULL, &efi_mm); + for_each_efi_memory_desc(&memmap, md) { u64 paddr, npages, size; pgprot_t prot; @@ -254,7 +256,8 @@ static bool __init efi_virtmap_init(void) else prot = PAGE_KERNEL; - create_pgd_mapping(&efi_mm, paddr, md->virt_addr, size, prot); + create_pgd_mapping(&efi_mm, paddr, md->virt_addr, size, + __pgprot(pgprot_val(prot) | PTE_NG)); } return true; } @@ -329,14 +332,7 @@ core_initcall(arm64_dmi_init); static void efi_set_pgd(struct mm_struct *mm) { - if (mm == &init_mm) - cpu_set_reserved_ttbr0(); - else - cpu_switch_mm(mm->pgd, mm); - - local_flush_tlb_all(); - if (icache_is_aivivt()) - __local_flush_icache_all(); + switch_mm(NULL, mm, NULL); } void efi_virtmap_load(void) From 002374f371bd02df864cce1fe85d90dc5b292837 Mon Sep 17 00:00:00 2001 From: James Hogan Date: Wed, 11 Nov 2015 14:21:18 +0000 Subject: [PATCH 151/291] MIPS: KVM: Fix ASID restoration logic ASID restoration on guest resume should determine the guest execution mode based on the guest Status register rather than bit 30 of the guest PC. Fix the two places in locore.S that do this, loading the guest status from the cop0 area. Note, this assembly is specific to the trap & emulate implementation of KVM, so it doesn't need to check the supervisor bit as that mode is not implemented in the guest. Fixes: b680f70fc111 ("KVM/MIPS32: Entry point for trampolining to...") Signed-off-by: James Hogan Cc: Ralf Baechle Cc: Paolo Bonzini Cc: Gleb Natapov Cc: linux-mips@linux-mips.org Cc: kvm@vger.kernel.org Cc: # 3.10.x- Signed-off-by: Paolo Bonzini --- arch/mips/kvm/locore.S | 16 ++++++++++------ 1 file changed, 10 insertions(+), 6 deletions(-) diff --git a/arch/mips/kvm/locore.S b/arch/mips/kvm/locore.S index 7bab3a4e8f7d..7e2210846b8b 100644 --- a/arch/mips/kvm/locore.S +++ b/arch/mips/kvm/locore.S @@ -157,9 +157,11 @@ FEXPORT(__kvm_mips_vcpu_run) FEXPORT(__kvm_mips_load_asid) /* Set the ASID for the Guest Kernel */ - INT_SLL t0, t0, 1 /* with kseg0 @ 0x40000000, kernel */ - /* addresses shift to 0x80000000 */ - bltz t0, 1f /* If kernel */ + PTR_L t0, VCPU_COP0(k1) + LONG_L t0, COP0_STATUS(t0) + andi t0, KSU_USER | ST0_ERL | ST0_EXL + xori t0, KSU_USER + bnez t0, 1f /* If kernel */ INT_ADDIU t1, k1, VCPU_GUEST_KERNEL_ASID /* (BD) */ INT_ADDIU t1, k1, VCPU_GUEST_USER_ASID /* else user */ 1: @@ -474,9 +476,11 @@ __kvm_mips_return_to_guest: mtc0 t0, CP0_EPC /* Set the ASID for the Guest Kernel */ - INT_SLL t0, t0, 1 /* with kseg0 @ 0x40000000, kernel */ - /* addresses shift to 0x80000000 */ - bltz t0, 1f /* If kernel */ + PTR_L t0, VCPU_COP0(k1) + LONG_L t0, COP0_STATUS(t0) + andi t0, KSU_USER | ST0_ERL | ST0_EXL + xori t0, KSU_USER + bnez t0, 1f /* If kernel */ INT_ADDIU t1, k1, VCPU_GUEST_KERNEL_ASID /* (BD) */ INT_ADDIU t1, k1, VCPU_GUEST_USER_ASID /* else user */ 1: From c5c2a3b998f1ff5a586f9d37e154070b8d550d17 Mon Sep 17 00:00:00 2001 From: James Hogan Date: Wed, 11 Nov 2015 14:21:19 +0000 Subject: [PATCH 152/291] MIPS: KVM: Fix CACHE immediate offset sign extension The immediate field of the CACHE instruction is signed, so ensure that it gets sign extended by casting it to an int16_t rather than just masking the low 16 bits. Fixes: e685c689f3a8 ("KVM/MIPS32: Privileged instruction/target branch emulation.") Signed-off-by: James Hogan Cc: Ralf Baechle Cc: Paolo Bonzini Cc: Gleb Natapov Cc: linux-mips@linux-mips.org Cc: kvm@vger.kernel.org Cc: # 3.10.x- Signed-off-by: Paolo Bonzini --- arch/mips/kvm/emulate.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/mips/kvm/emulate.c b/arch/mips/kvm/emulate.c index d5fa3eaf39a1..41b1b090f56f 100644 --- a/arch/mips/kvm/emulate.c +++ b/arch/mips/kvm/emulate.c @@ -1581,7 +1581,7 @@ enum emulation_result kvm_mips_emulate_cache(uint32_t inst, uint32_t *opc, base = (inst >> 21) & 0x1f; op_inst = (inst >> 16) & 0x1f; - offset = inst & 0xffff; + offset = (int16_t)inst; cache = (inst >> 16) & 0x3; op = (inst >> 18) & 0x7; From 585bb8f9a5e592f2ce7abbe5ed3112d5438d2754 Mon Sep 17 00:00:00 2001 From: James Hogan Date: Wed, 11 Nov 2015 14:21:20 +0000 Subject: [PATCH 153/291] MIPS: KVM: Uninit VCPU in vcpu_create error path If either of the memory allocations in kvm_arch_vcpu_create() fail, the vcpu which has been allocated and kvm_vcpu_init'd doesn't get uninit'd in the error handling path. Add a call to kvm_vcpu_uninit() to fix this. Fixes: 669e846e6c4e ("KVM/MIPS32: MIPS arch specific APIs for KVM") Signed-off-by: James Hogan Cc: Ralf Baechle Cc: Paolo Bonzini Cc: Gleb Natapov Cc: linux-mips@linux-mips.org Cc: kvm@vger.kernel.org Cc: # 3.10.x- Signed-off-by: Paolo Bonzini --- arch/mips/kvm/mips.c | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/arch/mips/kvm/mips.c b/arch/mips/kvm/mips.c index 49ff3bfc007e..b9b803facdbf 100644 --- a/arch/mips/kvm/mips.c +++ b/arch/mips/kvm/mips.c @@ -279,7 +279,7 @@ struct kvm_vcpu *kvm_arch_vcpu_create(struct kvm *kvm, unsigned int id) if (!gebase) { err = -ENOMEM; - goto out_free_cpu; + goto out_uninit_cpu; } kvm_debug("Allocated %d bytes for KVM Exception Handlers @ %p\n", ALIGN(size, PAGE_SIZE), gebase); @@ -343,6 +343,9 @@ struct kvm_vcpu *kvm_arch_vcpu_create(struct kvm *kvm, unsigned int id) out_free_gebase: kfree(gebase); +out_uninit_cpu: + kvm_vcpu_uninit(vcpu); + out_free_cpu: kfree(vcpu); From 127a457acb2131fdb31c68c98cf11eda8ba7b380 Mon Sep 17 00:00:00 2001 From: Matt Gingell Date: Tue, 17 Nov 2015 17:32:05 +0100 Subject: [PATCH 154/291] KVM: x86: fix interrupt window handling in split IRQ chip case This patch ensures that dm_request_for_irq_injection and post_kvm_run_save are in sync, avoiding that an endless ping-pong between userspace (who correctly notices that IF=0) and the kernel (who insists that userspace handles its request for the interrupt window). To synchronize them, it also adds checks for kvm_arch_interrupt_allowed and !kvm_event_needs_reinjection. These are always needed, not just for in-kernel LAPIC. Signed-off-by: Matt Gingell [A collage of two patches from Matt. - Paolo] Fixes: 1c1a9ce973a7863dd46767226bce2a5f12d48bc6 Cc: stable@vger.kernel.org Signed-off-by: Paolo Bonzini --- arch/x86/kvm/x86.c | 33 +++++++++++++++++++-------------- 1 file changed, 19 insertions(+), 14 deletions(-) diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c index 00462bd63129..46ed8edad793 100644 --- a/arch/x86/kvm/x86.c +++ b/arch/x86/kvm/x86.c @@ -2763,6 +2763,12 @@ static int kvm_vcpu_ioctl_set_lapic(struct kvm_vcpu *vcpu, return 0; } +static int kvm_cpu_accept_dm_intr(struct kvm_vcpu *vcpu) +{ + return (!lapic_in_kernel(vcpu) || + kvm_apic_accept_pic_intr(vcpu)); +} + static int kvm_vcpu_ioctl_interrupt(struct kvm_vcpu *vcpu, struct kvm_interrupt *irq) { @@ -5921,12 +5927,16 @@ static int dm_request_for_irq_injection(struct kvm_vcpu *vcpu) if (!vcpu->run->request_interrupt_window || pic_in_kernel(vcpu->kvm)) return false; + if (!kvm_arch_interrupt_allowed(vcpu)) + return false; + if (kvm_cpu_has_interrupt(vcpu)) return false; - return (irqchip_split(vcpu->kvm) - ? kvm_apic_accept_pic_intr(vcpu) - : kvm_arch_interrupt_allowed(vcpu)); + if (kvm_event_needs_reinjection(vcpu)) + return false; + + return kvm_cpu_accept_dm_intr(vcpu); } static void post_kvm_run_save(struct kvm_vcpu *vcpu) @@ -5937,17 +5947,12 @@ static void post_kvm_run_save(struct kvm_vcpu *vcpu) kvm_run->flags = is_smm(vcpu) ? KVM_RUN_X86_SMM : 0; kvm_run->cr8 = kvm_get_cr8(vcpu); kvm_run->apic_base = kvm_get_apic_base(vcpu); - if (!irqchip_in_kernel(vcpu->kvm)) - kvm_run->ready_for_interrupt_injection = - kvm_arch_interrupt_allowed(vcpu) && - !kvm_cpu_has_interrupt(vcpu) && - !kvm_event_needs_reinjection(vcpu); - else if (!pic_in_kernel(vcpu->kvm)) - kvm_run->ready_for_interrupt_injection = - kvm_apic_accept_pic_intr(vcpu) && - !kvm_cpu_has_interrupt(vcpu); - else - kvm_run->ready_for_interrupt_injection = 1; + kvm_run->ready_for_interrupt_injection = + pic_in_kernel(vcpu->kvm) || + (kvm_arch_interrupt_allowed(vcpu) && + !kvm_cpu_has_interrupt(vcpu) && + !kvm_event_needs_reinjection(vcpu) && + kvm_cpu_accept_dm_intr(vcpu)); } static void update_cr8_intercept(struct kvm_vcpu *vcpu) From 782d422bcaee4680c640fbc8ce8c45524fd11790 Mon Sep 17 00:00:00 2001 From: Matt Gingell Date: Mon, 16 Nov 2015 15:26:00 -0800 Subject: [PATCH 155/291] KVM: x86: split kvm_vcpu_ready_for_interrupt_injection out of dm_request_for_irq_injection This patch breaks out a new function kvm_vcpu_ready_for_interrupt_injection. This routine encapsulates the logic required to determine whether a vcpu is ready to accept an interrupt injection, which is now required on multiple paths. Reviewed-by: Steve Rutherford Signed-off-by: Matt Gingell Fixes: 1c1a9ce973a7863dd46767226bce2a5f12d48bc6 Cc: stable@vger.kernel.org Signed-off-by: Paolo Bonzini --- arch/x86/kvm/x86.c | 43 +++++++++++++++++++------------------------ 1 file changed, 19 insertions(+), 24 deletions(-) diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c index 46ed8edad793..32f6b760682c 100644 --- a/arch/x86/kvm/x86.c +++ b/arch/x86/kvm/x86.c @@ -2769,6 +2769,20 @@ static int kvm_cpu_accept_dm_intr(struct kvm_vcpu *vcpu) kvm_apic_accept_pic_intr(vcpu)); } +/* + * if userspace requested an interrupt window, check that the + * interrupt window is open. + * + * No need to exit to userspace if we already have an interrupt queued. + */ +static int kvm_vcpu_ready_for_interrupt_injection(struct kvm_vcpu *vcpu) +{ + return kvm_arch_interrupt_allowed(vcpu) && + !kvm_cpu_has_interrupt(vcpu) && + !kvm_event_needs_reinjection(vcpu) && + kvm_cpu_accept_dm_intr(vcpu); +} + static int kvm_vcpu_ioctl_interrupt(struct kvm_vcpu *vcpu, struct kvm_interrupt *irq) { @@ -5916,27 +5930,10 @@ static int emulator_fix_hypercall(struct x86_emulate_ctxt *ctxt) return emulator_write_emulated(ctxt, rip, instruction, 3, NULL); } -/* - * Check if userspace requested an interrupt window, and that the - * interrupt window is open. - * - * No need to exit to userspace if we already have an interrupt queued. - */ static int dm_request_for_irq_injection(struct kvm_vcpu *vcpu) { - if (!vcpu->run->request_interrupt_window || pic_in_kernel(vcpu->kvm)) - return false; - - if (!kvm_arch_interrupt_allowed(vcpu)) - return false; - - if (kvm_cpu_has_interrupt(vcpu)) - return false; - - if (kvm_event_needs_reinjection(vcpu)) - return false; - - return kvm_cpu_accept_dm_intr(vcpu); + return vcpu->run->request_interrupt_window && + likely(!pic_in_kernel(vcpu->kvm)); } static void post_kvm_run_save(struct kvm_vcpu *vcpu) @@ -5949,10 +5946,7 @@ static void post_kvm_run_save(struct kvm_vcpu *vcpu) kvm_run->apic_base = kvm_get_apic_base(vcpu); kvm_run->ready_for_interrupt_injection = pic_in_kernel(vcpu->kvm) || - (kvm_arch_interrupt_allowed(vcpu) && - !kvm_cpu_has_interrupt(vcpu) && - !kvm_event_needs_reinjection(vcpu) && - kvm_cpu_accept_dm_intr(vcpu)); + kvm_vcpu_ready_for_interrupt_injection(vcpu); } static void update_cr8_intercept(struct kvm_vcpu *vcpu) @@ -6668,7 +6662,8 @@ static int vcpu_run(struct kvm_vcpu *vcpu) if (kvm_cpu_has_pending_timer(vcpu)) kvm_inject_pending_timer_irqs(vcpu); - if (dm_request_for_irq_injection(vcpu)) { + if (dm_request_for_irq_injection(vcpu) && + kvm_vcpu_ready_for_interrupt_injection(vcpu)) { r = 0; vcpu->run->exit_reason = KVM_EXIT_IRQ_WINDOW_OPEN; ++vcpu->stat.request_irq_exits; From 934bf65354227981df15bbc755d33f4ba3443ff2 Mon Sep 17 00:00:00 2001 From: Matt Gingell Date: Mon, 16 Nov 2015 15:26:05 -0800 Subject: [PATCH 156/291] KVM: x86: set KVM_REQ_EVENT on local interrupt request from user space Set KVM_REQ_EVENT when a PIC in user space injects a local interrupt. Currently a request is only made when neither the PIC nor the APIC is in the kernel, which is not sufficient in the split IRQ chip case. This addresses a problem in QEMU where interrupts are delayed until another path invokes the event loop. Reviewed-by: Steve Rutherford Signed-off-by: Matt Gingell Fixes: 1c1a9ce973a7863dd46767226bce2a5f12d48bc6 Cc: stable@vger.kernel.org Signed-off-by: Paolo Bonzini --- arch/x86/kvm/x86.c | 1 + 1 file changed, 1 insertion(+) diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c index 32f6b760682c..f254e296d368 100644 --- a/arch/x86/kvm/x86.c +++ b/arch/x86/kvm/x86.c @@ -2806,6 +2806,7 @@ static int kvm_vcpu_ioctl_interrupt(struct kvm_vcpu *vcpu, return -EEXIST; vcpu->arch.pending_external_vector = irq->irq; + kvm_make_request(KVM_REQ_EVENT, vcpu); return 0; } From 62a193edaf90df38356e292f47a17f28e0cee3f1 Mon Sep 17 00:00:00 2001 From: Matt Gingell Date: Mon, 16 Nov 2015 15:26:07 -0800 Subject: [PATCH 157/291] KVM: x86: request interrupt window when IRQ chip is split Before this patch, we incorrectly enter the guest without requesting an interrupt window if the IRQ chip is split between user space and the kernel. Because lapic_in_kernel no longer implies the PIC is in the kernel, this patch tests pic_in_kernel to determining whether an interrupt window should be requested when entering the guest. If the APIC is in the kernel and we request an interrupt window the guest will return immediately. If the APIC is masked the guest will not not make forward progress and unmask it, leading to a loop when KVM reenters and requests again. This patch adds a check to ensure the APIC is ready to accept an interrupt before requesting a window. Reviewed-by: Steve Rutherford Signed-off-by: Matt Gingell [Use the other newly introduced functions. - Paolo] Fixes: 1c1a9ce973a7863dd46767226bce2a5f12d48bc6 Cc: stable@vger.kernel.org Signed-off-by: Paolo Bonzini --- arch/x86/kvm/x86.c | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c index f254e296d368..eed32283d22c 100644 --- a/arch/x86/kvm/x86.c +++ b/arch/x86/kvm/x86.c @@ -6360,8 +6360,10 @@ void kvm_arch_mmu_notifier_invalidate_page(struct kvm *kvm, static int vcpu_enter_guest(struct kvm_vcpu *vcpu) { int r; - bool req_int_win = !lapic_in_kernel(vcpu) && - vcpu->run->request_interrupt_window; + bool req_int_win = + dm_request_for_irq_injection(vcpu) && + kvm_cpu_accept_dm_intr(vcpu); + bool req_immediate_exit = false; if (vcpu->requests) { From 08c6781cfaa196d4b257ec043c17e8746d9284e3 Mon Sep 17 00:00:00 2001 From: Ard Biesheuvel Date: Mon, 16 Nov 2015 13:12:48 +0100 Subject: [PATCH 158/291] arm64: crypto: reduce priority of core AES cipher The asynchronous, merged implementations of AES in CBC, CTR and XTS modes are preferred when available (i.e., when instantiating ablkciphers explicitly). However, the synchronous core AES cipher combined with the generic CBC mode implementation will produce a 'cbc(aes)' blkcipher that is callable asynchronously as well. To prevent this implementation from being used when the accelerated asynchronous implemenation is also available, lower its priority to 250 (i.e., below the asynchronous module's priority of 300). Signed-off-by: Ard Biesheuvel Signed-off-by: Catalin Marinas --- arch/arm64/crypto/aes-ce-cipher.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/arm64/crypto/aes-ce-cipher.c b/arch/arm64/crypto/aes-ce-cipher.c index ce47792a983d..f7bd9bf0bbb3 100644 --- a/arch/arm64/crypto/aes-ce-cipher.c +++ b/arch/arm64/crypto/aes-ce-cipher.c @@ -237,7 +237,7 @@ EXPORT_SYMBOL(ce_aes_setkey); static struct crypto_alg aes_alg = { .cra_name = "aes", .cra_driver_name = "aes-ce", - .cra_priority = 300, + .cra_priority = 250, .cra_flags = CRYPTO_ALG_TYPE_CIPHER, .cra_blocksize = AES_BLOCK_SIZE, .cra_ctxsize = sizeof(struct crypto_aes_ctx), From 0b2aa5b80bbf4d0fb5daa1fb83ff637daa12d552 Mon Sep 17 00:00:00 2001 From: Laura Abbott Date: Thu, 12 Nov 2015 12:21:10 -0800 Subject: [PATCH 159/291] arm64: Fix R/O permissions in mark_rodata_ro The permissions in mark_rodata_ro trigger a build error with STRICT_MM_TYPECHECKS. Fix this by introducing PAGE_KERNEL_ROX for the same reasons as PAGE_KERNEL_RO. From Ard: "PAGE_KERNEL_EXEC has PTE_WRITE set as well, making the range writeable under the ARMv8.1 DBM feature, that manages the dirty bit in hardware (writing to a page with the PTE_RDONLY and PTE_WRITE bits both set will clear the PTE_RDONLY bit in that case)" Signed-off-by: Laura Abbott Acked-by: Ard Biesheuvel Signed-off-by: Catalin Marinas --- arch/arm64/include/asm/pgtable.h | 1 + arch/arm64/mm/mmu.c | 2 +- 2 files changed, 2 insertions(+), 1 deletion(-) diff --git a/arch/arm64/include/asm/pgtable.h b/arch/arm64/include/asm/pgtable.h index 9819a9426b69..7e074f93f383 100644 --- a/arch/arm64/include/asm/pgtable.h +++ b/arch/arm64/include/asm/pgtable.h @@ -81,6 +81,7 @@ extern void __pgd_error(const char *file, int line, unsigned long val); #define PAGE_KERNEL __pgprot(_PAGE_DEFAULT | PTE_PXN | PTE_UXN | PTE_DIRTY | PTE_WRITE) #define PAGE_KERNEL_RO __pgprot(_PAGE_DEFAULT | PTE_PXN | PTE_UXN | PTE_DIRTY | PTE_RDONLY) +#define PAGE_KERNEL_ROX __pgprot(_PAGE_DEFAULT | PTE_UXN | PTE_DIRTY | PTE_RDONLY) #define PAGE_KERNEL_EXEC __pgprot(_PAGE_DEFAULT | PTE_UXN | PTE_DIRTY | PTE_WRITE) #define PAGE_KERNEL_EXEC_CONT __pgprot(_PAGE_DEFAULT | PTE_UXN | PTE_DIRTY | PTE_WRITE | PTE_CONT) diff --git a/arch/arm64/mm/mmu.c b/arch/arm64/mm/mmu.c index 32ddd893da9a..abb66f84d4ac 100644 --- a/arch/arm64/mm/mmu.c +++ b/arch/arm64/mm/mmu.c @@ -475,7 +475,7 @@ void mark_rodata_ro(void) { create_mapping_late(__pa(_stext), (unsigned long)_stext, (unsigned long)_etext - (unsigned long)_stext, - PAGE_KERNEL_EXEC | PTE_RDONLY); + PAGE_KERNEL_ROX); } #endif From 00490c22b1b59b168d89de798f0790bfd9541515 Mon Sep 17 00:00:00 2001 From: Maarten Lankhorst Date: Mon, 16 Nov 2015 14:42:12 +0100 Subject: [PATCH 160/291] drm/i915: Consider SPLL as another shared pll, v2. When diagnosing a unrelated bug for someone on irc, it would seem the hardware can be brought up by the BIOS with the embedded displayport using the SPLL for spread spectrum. Right now this is not handled well in i915, and it calculates the crtc needs to be reprogrammed on the first modeset without SSC, but the SPLL itself was kept active. Fix this by exposing SPLL as a shared pll that will not be returned by intel_get_shared_dpll; you have to know it exists to use it. Changes since v1: - Create a separate dpll_hw_state.spll for spll, and use separate pll functions for spll. Tested-by: Emil Renner Berthing Tested-by: Gabriel Feceoru Reviewed-by: Daniel Vetter Signed-off-by: Maarten Lankhorst Link: http://patchwork.freedesktop.org/patch/msgid/1447681332-6318-1-git-send-email-maarten.lankhorst@linux.intel.com Signed-off-by: Jani Nikula --- drivers/gpu/drm/i915/i915_drv.h | 3 ++ drivers/gpu/drm/i915/intel_crt.c | 31 ++---------- drivers/gpu/drm/i915/intel_ddi.c | 75 ++++++++++++++++++++++++---- drivers/gpu/drm/i915/intel_display.c | 15 ++++-- 4 files changed, 83 insertions(+), 41 deletions(-) diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h index 8afda459a26e..84b5db257f1b 100644 --- a/drivers/gpu/drm/i915/i915_drv.h +++ b/drivers/gpu/drm/i915/i915_drv.h @@ -351,6 +351,8 @@ enum intel_dpll_id { /* hsw/bdw */ DPLL_ID_WRPLL1 = 0, DPLL_ID_WRPLL2 = 1, + DPLL_ID_SPLL = 2, + /* skl */ DPLL_ID_SKL_DPLL1 = 0, DPLL_ID_SKL_DPLL2 = 1, @@ -367,6 +369,7 @@ struct intel_dpll_hw_state { /* hsw, bdw */ uint32_t wrpll; + uint32_t spll; /* skl */ /* diff --git a/drivers/gpu/drm/i915/intel_crt.c b/drivers/gpu/drm/i915/intel_crt.c index b84aaa0bb48a..6a2c76e367a5 100644 --- a/drivers/gpu/drm/i915/intel_crt.c +++ b/drivers/gpu/drm/i915/intel_crt.c @@ -138,18 +138,6 @@ static void hsw_crt_get_config(struct intel_encoder *encoder, pipe_config->base.adjusted_mode.flags |= intel_crt_get_flags(encoder); } -static void hsw_crt_pre_enable(struct intel_encoder *encoder) -{ - struct drm_device *dev = encoder->base.dev; - struct drm_i915_private *dev_priv = dev->dev_private; - - WARN(I915_READ(SPLL_CTL) & SPLL_PLL_ENABLE, "SPLL already enabled\n"); - I915_WRITE(SPLL_CTL, - SPLL_PLL_ENABLE | SPLL_PLL_FREQ_1350MHz | SPLL_PLL_SSC); - POSTING_READ(SPLL_CTL); - udelay(20); -} - /* Note: The caller is required to filter out dpms modes not supported by the * platform. */ static void intel_crt_set_dpms(struct intel_encoder *encoder, int mode) @@ -216,19 +204,6 @@ static void pch_post_disable_crt(struct intel_encoder *encoder) intel_disable_crt(encoder); } -static void hsw_crt_post_disable(struct intel_encoder *encoder) -{ - struct drm_device *dev = encoder->base.dev; - struct drm_i915_private *dev_priv = dev->dev_private; - uint32_t val; - - DRM_DEBUG_KMS("Disabling SPLL\n"); - val = I915_READ(SPLL_CTL); - WARN_ON(!(val & SPLL_PLL_ENABLE)); - I915_WRITE(SPLL_CTL, val & ~SPLL_PLL_ENABLE); - POSTING_READ(SPLL_CTL); -} - static void intel_enable_crt(struct intel_encoder *encoder) { struct intel_crt *crt = intel_encoder_to_crt(encoder); @@ -280,6 +255,10 @@ static bool intel_crt_compute_config(struct intel_encoder *encoder, if (HAS_DDI(dev)) { pipe_config->ddi_pll_sel = PORT_CLK_SEL_SPLL; pipe_config->port_clock = 135000 * 2; + + pipe_config->dpll_hw_state.wrpll = 0; + pipe_config->dpll_hw_state.spll = + SPLL_PLL_ENABLE | SPLL_PLL_FREQ_1350MHz | SPLL_PLL_SSC; } return true; @@ -860,8 +839,6 @@ void intel_crt_init(struct drm_device *dev) if (HAS_DDI(dev)) { crt->base.get_config = hsw_crt_get_config; crt->base.get_hw_state = intel_ddi_get_hw_state; - crt->base.pre_enable = hsw_crt_pre_enable; - crt->base.post_disable = hsw_crt_post_disable; } else { crt->base.get_config = intel_crt_get_config; crt->base.get_hw_state = intel_crt_get_hw_state; diff --git a/drivers/gpu/drm/i915/intel_ddi.c b/drivers/gpu/drm/i915/intel_ddi.c index b25e99a432fb..a6752a61d99f 100644 --- a/drivers/gpu/drm/i915/intel_ddi.c +++ b/drivers/gpu/drm/i915/intel_ddi.c @@ -1286,6 +1286,18 @@ hsw_ddi_pll_select(struct intel_crtc *intel_crtc, } crtc_state->ddi_pll_sel = PORT_CLK_SEL_WRPLL(pll->id); + } else if (crtc_state->ddi_pll_sel == PORT_CLK_SEL_SPLL) { + struct drm_atomic_state *state = crtc_state->base.state; + struct intel_shared_dpll_config *spll = + &intel_atomic_get_shared_dpll_state(state)[DPLL_ID_SPLL]; + + if (spll->crtc_mask && + WARN_ON(spll->hw_state.spll != crtc_state->dpll_hw_state.spll)) + return false; + + crtc_state->shared_dpll = DPLL_ID_SPLL; + spll->hw_state.spll = crtc_state->dpll_hw_state.spll; + spll->crtc_mask |= 1 << intel_crtc->pipe; } return true; @@ -2437,7 +2449,7 @@ static void intel_disable_ddi(struct intel_encoder *intel_encoder) } } -static void hsw_ddi_pll_enable(struct drm_i915_private *dev_priv, +static void hsw_ddi_wrpll_enable(struct drm_i915_private *dev_priv, struct intel_shared_dpll *pll) { I915_WRITE(WRPLL_CTL(pll->id), pll->config.hw_state.wrpll); @@ -2445,8 +2457,16 @@ static void hsw_ddi_pll_enable(struct drm_i915_private *dev_priv, udelay(20); } -static void hsw_ddi_pll_disable(struct drm_i915_private *dev_priv, +static void hsw_ddi_spll_enable(struct drm_i915_private *dev_priv, struct intel_shared_dpll *pll) +{ + I915_WRITE(SPLL_CTL, pll->config.hw_state.spll); + POSTING_READ(SPLL_CTL); + udelay(20); +} + +static void hsw_ddi_wrpll_disable(struct drm_i915_private *dev_priv, + struct intel_shared_dpll *pll) { uint32_t val; @@ -2455,9 +2475,19 @@ static void hsw_ddi_pll_disable(struct drm_i915_private *dev_priv, POSTING_READ(WRPLL_CTL(pll->id)); } -static bool hsw_ddi_pll_get_hw_state(struct drm_i915_private *dev_priv, - struct intel_shared_dpll *pll, - struct intel_dpll_hw_state *hw_state) +static void hsw_ddi_spll_disable(struct drm_i915_private *dev_priv, + struct intel_shared_dpll *pll) +{ + uint32_t val; + + val = I915_READ(SPLL_CTL); + I915_WRITE(SPLL_CTL, val & ~SPLL_PLL_ENABLE); + POSTING_READ(SPLL_CTL); +} + +static bool hsw_ddi_wrpll_get_hw_state(struct drm_i915_private *dev_priv, + struct intel_shared_dpll *pll, + struct intel_dpll_hw_state *hw_state) { uint32_t val; @@ -2470,25 +2500,50 @@ static bool hsw_ddi_pll_get_hw_state(struct drm_i915_private *dev_priv, return val & WRPLL_PLL_ENABLE; } +static bool hsw_ddi_spll_get_hw_state(struct drm_i915_private *dev_priv, + struct intel_shared_dpll *pll, + struct intel_dpll_hw_state *hw_state) +{ + uint32_t val; + + if (!intel_display_power_is_enabled(dev_priv, POWER_DOMAIN_PLLS)) + return false; + + val = I915_READ(SPLL_CTL); + hw_state->spll = val; + + return val & SPLL_PLL_ENABLE; +} + + static const char * const hsw_ddi_pll_names[] = { "WRPLL 1", "WRPLL 2", + "SPLL" }; static void hsw_shared_dplls_init(struct drm_i915_private *dev_priv) { int i; - dev_priv->num_shared_dpll = 2; + dev_priv->num_shared_dpll = 3; - for (i = 0; i < dev_priv->num_shared_dpll; i++) { + for (i = 0; i < 2; i++) { dev_priv->shared_dplls[i].id = i; dev_priv->shared_dplls[i].name = hsw_ddi_pll_names[i]; - dev_priv->shared_dplls[i].disable = hsw_ddi_pll_disable; - dev_priv->shared_dplls[i].enable = hsw_ddi_pll_enable; + dev_priv->shared_dplls[i].disable = hsw_ddi_wrpll_disable; + dev_priv->shared_dplls[i].enable = hsw_ddi_wrpll_enable; dev_priv->shared_dplls[i].get_hw_state = - hsw_ddi_pll_get_hw_state; + hsw_ddi_wrpll_get_hw_state; } + + /* SPLL is special, but needs to be initialized anyway.. */ + dev_priv->shared_dplls[i].id = i; + dev_priv->shared_dplls[i].name = hsw_ddi_pll_names[i]; + dev_priv->shared_dplls[i].disable = hsw_ddi_spll_disable; + dev_priv->shared_dplls[i].enable = hsw_ddi_spll_enable; + dev_priv->shared_dplls[i].get_hw_state = hsw_ddi_spll_get_hw_state; + } static const char * const skl_ddi_pll_names[] = { diff --git a/drivers/gpu/drm/i915/intel_display.c b/drivers/gpu/drm/i915/intel_display.c index 8f80bbeb1313..002e2a41ce1b 100644 --- a/drivers/gpu/drm/i915/intel_display.c +++ b/drivers/gpu/drm/i915/intel_display.c @@ -4239,6 +4239,7 @@ struct intel_shared_dpll *intel_get_shared_dpll(struct intel_crtc *crtc, struct intel_shared_dpll *pll; struct intel_shared_dpll_config *shared_dpll; enum intel_dpll_id i; + int max = dev_priv->num_shared_dpll; shared_dpll = intel_atomic_get_shared_dpll_state(crtc_state->base.state); @@ -4273,9 +4274,11 @@ struct intel_shared_dpll *intel_get_shared_dpll(struct intel_crtc *crtc, WARN_ON(shared_dpll[i].crtc_mask); goto found; - } + } else if (INTEL_INFO(dev_priv)->gen < 9 && HAS_DDI(dev_priv)) + /* Do not consider SPLL */ + max = 2; - for (i = 0; i < dev_priv->num_shared_dpll; i++) { + for (i = 0; i < max; i++) { pll = &dev_priv->shared_dplls[i]; /* Only want to check enabled timings first */ @@ -9725,6 +9728,8 @@ static void haswell_get_ddi_pll(struct drm_i915_private *dev_priv, case PORT_CLK_SEL_WRPLL2: pipe_config->shared_dpll = DPLL_ID_WRPLL2; break; + case PORT_CLK_SEL_SPLL: + pipe_config->shared_dpll = DPLL_ID_SPLL; } } @@ -12005,9 +12010,10 @@ static void intel_dump_pipe_config(struct intel_crtc *crtc, pipe_config->dpll_hw_state.cfgcr1, pipe_config->dpll_hw_state.cfgcr2); } else if (HAS_DDI(dev)) { - DRM_DEBUG_KMS("ddi_pll_sel: %u; dpll_hw_state: wrpll: 0x%x\n", + DRM_DEBUG_KMS("ddi_pll_sel: %u; dpll_hw_state: wrpll: 0x%x spll: 0x%x\n", pipe_config->ddi_pll_sel, - pipe_config->dpll_hw_state.wrpll); + pipe_config->dpll_hw_state.wrpll, + pipe_config->dpll_hw_state.spll); } else { DRM_DEBUG_KMS("dpll_hw_state: dpll: 0x%x, dpll_md: 0x%x, " "fp0: 0x%x, fp1: 0x%x\n", @@ -12530,6 +12536,7 @@ intel_pipe_config_compare(struct drm_device *dev, PIPE_CONF_CHECK_X(dpll_hw_state.fp0); PIPE_CONF_CHECK_X(dpll_hw_state.fp1); PIPE_CONF_CHECK_X(dpll_hw_state.wrpll); + PIPE_CONF_CHECK_X(dpll_hw_state.spll); PIPE_CONF_CHECK_X(dpll_hw_state.ctrl1); PIPE_CONF_CHECK_X(dpll_hw_state.cfgcr1); PIPE_CONF_CHECK_X(dpll_hw_state.cfgcr2); From 2269a395790dd1ed3ae6e7708d76731e92fd46ff Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Christian=20K=C3=B6nig?= Date: Tue, 17 Nov 2015 11:02:47 +0100 Subject: [PATCH 161/291] drm/amdgpu: fix typo in firmware name MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Signed-off-by: Christian König Reviewed-by: Chunming Zhou --- drivers/gpu/drm/amd/amdgpu/gmc_v7_0.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/gpu/drm/amd/amdgpu/gmc_v7_0.c b/drivers/gpu/drm/amd/amdgpu/gmc_v7_0.c index b93b649d9e36..98ee39001c45 100644 --- a/drivers/gpu/drm/amd/amdgpu/gmc_v7_0.c +++ b/drivers/gpu/drm/amd/amdgpu/gmc_v7_0.c @@ -40,7 +40,7 @@ static void gmc_v7_0_set_gart_funcs(struct amdgpu_device *adev); static void gmc_v7_0_set_irq_funcs(struct amdgpu_device *adev); -MODULE_FIRMWARE("radeon/boniare_mc.bin"); +MODULE_FIRMWARE("radeon/bonaire_mc.bin"); MODULE_FIRMWARE("radeon/hawaii_mc.bin"); /** From 984810fc45389c545719fbb4219e8a12b27032a4 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Christian=20K=C3=B6nig?= Date: Sat, 14 Nov 2015 21:05:35 +0100 Subject: [PATCH 162/291] drm/amdgpu: cleanup scheduler command submission MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Unify the two code path again, cause they do pretty much the same thing. Signed-off-by: Christian König Reviewed-by: Chunming Zhou Reviewed-by: Junwei Zhang --- drivers/gpu/drm/amd/amdgpu/amdgpu.h | 1 + drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c | 63 ++++++++++---------------- 2 files changed, 25 insertions(+), 39 deletions(-) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu.h b/drivers/gpu/drm/amd/amdgpu/amdgpu.h index 0f187027c753..0ce5c1e45ec8 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu.h @@ -1210,6 +1210,7 @@ struct amdgpu_cs_parser { /* relocations */ struct amdgpu_bo_list_entry *vm_bos; struct list_head validated; + struct fence *fence; struct amdgpu_ib *ibs; uint32_t num_ibs; diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c index 6096effd6a56..3afcf0237c25 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c @@ -439,8 +439,18 @@ static int cmp_size_smaller_first(void *priv, struct list_head *a, return (int)la->robj->tbo.num_pages - (int)lb->robj->tbo.num_pages; } -static void amdgpu_cs_parser_fini_early(struct amdgpu_cs_parser *parser, int error, bool backoff) +/** + * cs_parser_fini() - clean parser states + * @parser: parser structure holding parsing context. + * @error: error number + * + * If error is set than unvalidate buffer, otherwise just free memory + * used by parsing context. + **/ +static void amdgpu_cs_parser_fini(struct amdgpu_cs_parser *parser, int error, bool backoff) { + unsigned i; + if (!error) { /* Sort the buffer list from the smallest to largest buffer, * which affects the order of buffers in the LRU list. @@ -455,17 +465,13 @@ static void amdgpu_cs_parser_fini_early(struct amdgpu_cs_parser *parser, int err list_sort(NULL, &parser->validated, cmp_size_smaller_first); ttm_eu_fence_buffer_objects(&parser->ticket, - &parser->validated, - &parser->ibs[parser->num_ibs-1].fence->base); + &parser->validated, + parser->fence); } else if (backoff) { ttm_eu_backoff_reservation(&parser->ticket, &parser->validated); } -} - -static void amdgpu_cs_parser_fini_late(struct amdgpu_cs_parser *parser) -{ - unsigned i; + fence_put(parser->fence); if (parser->ctx) amdgpu_ctx_put(parser->ctx); @@ -484,20 +490,6 @@ static void amdgpu_cs_parser_fini_late(struct amdgpu_cs_parser *parser) drm_gem_object_unreference_unlocked(&parser->uf.bo->gem_base); } -/** - * cs_parser_fini() - clean parser states - * @parser: parser structure holding parsing context. - * @error: error number - * - * If error is set than unvalidate buffer, otherwise just free memory - * used by parsing context. - **/ -static void amdgpu_cs_parser_fini(struct amdgpu_cs_parser *parser, int error, bool backoff) -{ - amdgpu_cs_parser_fini_early(parser, error, backoff); - amdgpu_cs_parser_fini_late(parser); -} - static int amdgpu_bo_vm_update_pte(struct amdgpu_cs_parser *p, struct amdgpu_vm *vm) { @@ -582,15 +574,9 @@ static int amdgpu_cs_ib_vm_chunk(struct amdgpu_device *adev, } r = amdgpu_bo_vm_update_pte(parser, vm); - if (r) { - goto out; - } - amdgpu_cs_sync_rings(parser); - if (!amdgpu_enable_scheduler) - r = amdgpu_ib_schedule(adev, parser->num_ibs, parser->ibs, - parser->filp); + if (!r) + amdgpu_cs_sync_rings(parser); -out: return r; } @@ -881,7 +867,7 @@ int amdgpu_cs_ioctl(struct drm_device *dev, void *data, struct drm_file *filp) goto out; } job->base.s_fence = fence; - fence_get(&fence->base); + parser.fence = fence_get(&fence->base); cs->out.handle = amdgpu_ctx_add_fence(parser.ctx, ring, &fence->base); @@ -890,17 +876,16 @@ int amdgpu_cs_ioctl(struct drm_device *dev, void *data, struct drm_file *filp) trace_amdgpu_cs_ioctl(job); amd_sched_entity_push_job(&job->base); - list_sort(NULL, &parser.validated, cmp_size_smaller_first); - ttm_eu_fence_buffer_objects(&parser.ticket, &parser.validated, - &fence->base); - fence_put(&fence->base); + } else { + struct amdgpu_fence *fence; - amdgpu_cs_parser_fini_late(&parser); - mutex_unlock(&vm->mutex); - return 0; + r = amdgpu_ib_schedule(adev, parser.num_ibs, parser.ibs, + parser.filp); + fence = parser.ibs[parser.num_ibs - 1].fence; + parser.fence = fence_get(&fence->base); + cs->out.handle = parser.ibs[parser.num_ibs - 1].sequence; } - cs->out.handle = parser.ibs[parser.num_ibs - 1].sequence; out: amdgpu_cs_parser_fini(&parser, r, reserved_buffers); mutex_unlock(&vm->mutex); From eeed25ab83bbd68b2c9dba3cc2ce5913537594f6 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Christian=20K=C3=B6nig?= Date: Sun, 15 Nov 2015 15:50:07 +0100 Subject: [PATCH 163/291] drm/amdgpu: remove unused VM manager field MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Signed-off-by: Christian König Reviewed-by: Chunming Zhou Reviewed-by: Junwei Zhang --- drivers/gpu/drm/amd/amdgpu/amdgpu.h | 2 -- 1 file changed, 2 deletions(-) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu.h b/drivers/gpu/drm/amd/amdgpu/amdgpu.h index 0ce5c1e45ec8..819d1473de30 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu.h @@ -967,8 +967,6 @@ struct amdgpu_vm_manager { u64 vram_base_offset; /* is vm enabled? */ bool enabled; - /* for hw to save the PD addr on suspend/resume */ - uint32_t saved_table_addr[AMDGPU_NUM_VM]; /* vm pte handling */ const struct amdgpu_vm_pte_funcs *vm_pte_funcs; struct amdgpu_ring *vm_pte_funcs_ring; From 8b4fb00b5db271bb1e117a078d5d7a161d8d8af1 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Christian=20K=C3=B6nig?= Date: Sun, 15 Nov 2015 16:04:16 +0100 Subject: [PATCH 164/291] drm/amdgpu: cleanup VM coding style MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Fix the indentation and move the VM functions to the structures. Signed-off-by: Christian König Reviewed-by: Chunming Zhou Reviewed-by: Junwei Zhang Reviewed-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/amdgpu.h | 98 ++++++++++++++--------------- 1 file changed, 48 insertions(+), 50 deletions(-) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu.h b/drivers/gpu/drm/amd/amdgpu/amdgpu.h index 819d1473de30..d1e16726e225 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu.h @@ -916,8 +916,8 @@ struct amdgpu_ring { #define AMDGPU_VM_FAULT_STOP_ALWAYS 2 struct amdgpu_vm_pt { - struct amdgpu_bo *bo; - uint64_t addr; + struct amdgpu_bo *bo; + uint64_t addr; }; struct amdgpu_vm_id { @@ -959,19 +959,60 @@ struct amdgpu_vm { }; struct amdgpu_vm_manager { - struct fence *active[AMDGPU_NUM_VM]; - uint32_t max_pfn; + struct fence *active[AMDGPU_NUM_VM]; + uint32_t max_pfn; /* number of VMIDs */ - unsigned nvm; + unsigned nvm; /* vram base address for page table entry */ - u64 vram_base_offset; + u64 vram_base_offset; /* is vm enabled? */ - bool enabled; + bool enabled; /* vm pte handling */ const struct amdgpu_vm_pte_funcs *vm_pte_funcs; struct amdgpu_ring *vm_pte_funcs_ring; }; +int amdgpu_vm_init(struct amdgpu_device *adev, struct amdgpu_vm *vm); +void amdgpu_vm_fini(struct amdgpu_device *adev, struct amdgpu_vm *vm); +struct amdgpu_bo_list_entry *amdgpu_vm_get_bos(struct amdgpu_device *adev, + struct amdgpu_vm *vm, + struct list_head *head); +int amdgpu_vm_grab_id(struct amdgpu_vm *vm, struct amdgpu_ring *ring, + struct amdgpu_sync *sync); +void amdgpu_vm_flush(struct amdgpu_ring *ring, + struct amdgpu_vm *vm, + struct fence *updates); +void amdgpu_vm_fence(struct amdgpu_device *adev, + struct amdgpu_vm *vm, + struct fence *fence); +uint64_t amdgpu_vm_map_gart(struct amdgpu_device *adev, uint64_t addr); +int amdgpu_vm_update_page_directory(struct amdgpu_device *adev, + struct amdgpu_vm *vm); +int amdgpu_vm_clear_freed(struct amdgpu_device *adev, + struct amdgpu_vm *vm); +int amdgpu_vm_clear_invalids(struct amdgpu_device *adev, struct amdgpu_vm *vm, + struct amdgpu_sync *sync); +int amdgpu_vm_bo_update(struct amdgpu_device *adev, + struct amdgpu_bo_va *bo_va, + struct ttm_mem_reg *mem); +void amdgpu_vm_bo_invalidate(struct amdgpu_device *adev, + struct amdgpu_bo *bo); +struct amdgpu_bo_va *amdgpu_vm_bo_find(struct amdgpu_vm *vm, + struct amdgpu_bo *bo); +struct amdgpu_bo_va *amdgpu_vm_bo_add(struct amdgpu_device *adev, + struct amdgpu_vm *vm, + struct amdgpu_bo *bo); +int amdgpu_vm_bo_map(struct amdgpu_device *adev, + struct amdgpu_bo_va *bo_va, + uint64_t addr, uint64_t offset, + uint64_t size, uint32_t flags); +int amdgpu_vm_bo_unmap(struct amdgpu_device *adev, + struct amdgpu_bo_va *bo_va, + uint64_t addr); +void amdgpu_vm_bo_rmv(struct amdgpu_device *adev, + struct amdgpu_bo_va *bo_va); +int amdgpu_vm_free_job(struct amdgpu_job *job); + /* * context related structures */ @@ -2311,49 +2352,6 @@ int amdgpu_get_vblank_timestamp_kms(struct drm_device *dev, unsigned int pipe, long amdgpu_kms_compat_ioctl(struct file *filp, unsigned int cmd, unsigned long arg); -/* - * vm - */ -int amdgpu_vm_init(struct amdgpu_device *adev, struct amdgpu_vm *vm); -void amdgpu_vm_fini(struct amdgpu_device *adev, struct amdgpu_vm *vm); -struct amdgpu_bo_list_entry *amdgpu_vm_get_bos(struct amdgpu_device *adev, - struct amdgpu_vm *vm, - struct list_head *head); -int amdgpu_vm_grab_id(struct amdgpu_vm *vm, struct amdgpu_ring *ring, - struct amdgpu_sync *sync); -void amdgpu_vm_flush(struct amdgpu_ring *ring, - struct amdgpu_vm *vm, - struct fence *updates); -void amdgpu_vm_fence(struct amdgpu_device *adev, - struct amdgpu_vm *vm, - struct fence *fence); -uint64_t amdgpu_vm_map_gart(struct amdgpu_device *adev, uint64_t addr); -int amdgpu_vm_update_page_directory(struct amdgpu_device *adev, - struct amdgpu_vm *vm); -int amdgpu_vm_clear_freed(struct amdgpu_device *adev, - struct amdgpu_vm *vm); -int amdgpu_vm_clear_invalids(struct amdgpu_device *adev, - struct amdgpu_vm *vm, struct amdgpu_sync *sync); -int amdgpu_vm_bo_update(struct amdgpu_device *adev, - struct amdgpu_bo_va *bo_va, - struct ttm_mem_reg *mem); -void amdgpu_vm_bo_invalidate(struct amdgpu_device *adev, - struct amdgpu_bo *bo); -struct amdgpu_bo_va *amdgpu_vm_bo_find(struct amdgpu_vm *vm, - struct amdgpu_bo *bo); -struct amdgpu_bo_va *amdgpu_vm_bo_add(struct amdgpu_device *adev, - struct amdgpu_vm *vm, - struct amdgpu_bo *bo); -int amdgpu_vm_bo_map(struct amdgpu_device *adev, - struct amdgpu_bo_va *bo_va, - uint64_t addr, uint64_t offset, - uint64_t size, uint32_t flags); -int amdgpu_vm_bo_unmap(struct amdgpu_device *adev, - struct amdgpu_bo_va *bo_va, - uint64_t addr); -void amdgpu_vm_bo_rmv(struct amdgpu_device *adev, - struct amdgpu_bo_va *bo_va); -int amdgpu_vm_free_job(struct amdgpu_job *job); /* * functions used by amdgpu_encoder.c */ From ea89f8c9e8ba8a7b75446eef36917da50d2186d9 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Christian=20K=C3=B6nig?= Date: Sun, 15 Nov 2015 20:52:06 +0100 Subject: [PATCH 165/291] drm/amdgpu: move VM manager clean into the VM code again MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit It's not a good idea to duplicate that code. Signed-off-by: Christian König Reviewed-by: Chunming Zhou Reviewed-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/amdgpu.h | 1 + drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c | 15 +++++++++++++++ drivers/gpu/drm/amd/amdgpu/gmc_v7_0.c | 8 ++------ drivers/gpu/drm/amd/amdgpu/gmc_v8_0.c | 8 ++------ 4 files changed, 20 insertions(+), 12 deletions(-) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu.h b/drivers/gpu/drm/amd/amdgpu/amdgpu.h index d1e16726e225..beb74854a8a3 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu.h @@ -972,6 +972,7 @@ struct amdgpu_vm_manager { struct amdgpu_ring *vm_pte_funcs_ring; }; +void amdgpu_vm_manager_fini(struct amdgpu_device *adev); int amdgpu_vm_init(struct amdgpu_device *adev, struct amdgpu_vm *vm); void amdgpu_vm_fini(struct amdgpu_device *adev, struct amdgpu_vm *vm); struct amdgpu_bo_list_entry *amdgpu_vm_get_bos(struct amdgpu_device *adev, diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c index a9fcc995d27e..6bb209bc0d36 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c @@ -1318,3 +1318,18 @@ void amdgpu_vm_fini(struct amdgpu_device *adev, struct amdgpu_vm *vm) mutex_destroy(&vm->mutex); } + +/** + * amdgpu_vm_manager_fini - cleanup VM manager + * + * @adev: amdgpu_device pointer + * + * Cleanup the VM manager and free resources. + */ +void amdgpu_vm_manager_fini(struct amdgpu_device *adev) +{ + unsigned i; + + for (i = 0; i < AMDGPU_NUM_VM; ++i) + fence_put(adev->vm_manager.active[i]); +} diff --git a/drivers/gpu/drm/amd/amdgpu/gmc_v7_0.c b/drivers/gpu/drm/amd/amdgpu/gmc_v7_0.c index 98ee39001c45..7427d8cd4c43 100644 --- a/drivers/gpu/drm/amd/amdgpu/gmc_v7_0.c +++ b/drivers/gpu/drm/amd/amdgpu/gmc_v7_0.c @@ -961,12 +961,10 @@ static int gmc_v7_0_sw_init(void *handle) static int gmc_v7_0_sw_fini(void *handle) { - int i; struct amdgpu_device *adev = (struct amdgpu_device *)handle; if (adev->vm_manager.enabled) { - for (i = 0; i < AMDGPU_NUM_VM; ++i) - fence_put(adev->vm_manager.active[i]); + amdgpu_vm_manager_fini(adev); gmc_v7_0_vm_fini(adev); adev->vm_manager.enabled = false; } @@ -1011,12 +1009,10 @@ static int gmc_v7_0_hw_fini(void *handle) static int gmc_v7_0_suspend(void *handle) { - int i; struct amdgpu_device *adev = (struct amdgpu_device *)handle; if (adev->vm_manager.enabled) { - for (i = 0; i < AMDGPU_NUM_VM; ++i) - fence_put(adev->vm_manager.active[i]); + amdgpu_vm_manager_fini(adev); gmc_v7_0_vm_fini(adev); adev->vm_manager.enabled = false; } diff --git a/drivers/gpu/drm/amd/amdgpu/gmc_v8_0.c b/drivers/gpu/drm/amd/amdgpu/gmc_v8_0.c index c9209b45d901..cb0e50ebb528 100644 --- a/drivers/gpu/drm/amd/amdgpu/gmc_v8_0.c +++ b/drivers/gpu/drm/amd/amdgpu/gmc_v8_0.c @@ -980,12 +980,10 @@ static int gmc_v8_0_sw_init(void *handle) static int gmc_v8_0_sw_fini(void *handle) { - int i; struct amdgpu_device *adev = (struct amdgpu_device *)handle; if (adev->vm_manager.enabled) { - for (i = 0; i < AMDGPU_NUM_VM; ++i) - fence_put(adev->vm_manager.active[i]); + amdgpu_vm_manager_fini(adev); gmc_v8_0_vm_fini(adev); adev->vm_manager.enabled = false; } @@ -1032,12 +1030,10 @@ static int gmc_v8_0_hw_fini(void *handle) static int gmc_v8_0_suspend(void *handle) { - int i; struct amdgpu_device *adev = (struct amdgpu_device *)handle; if (adev->vm_manager.enabled) { - for (i = 0; i < AMDGPU_NUM_VM; ++i) - fence_put(adev->vm_manager.active[i]); + amdgpu_vm_manager_fini(adev); gmc_v8_0_vm_fini(adev); adev->vm_manager.enabled = false; } From 1c16c0a7b26c6c905dc79c4194135ca2f360f0f5 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Christian=20K=C3=B6nig?= Date: Sat, 14 Nov 2015 21:31:40 +0100 Subject: [PATCH 166/291] drm/amdgpu: keep the owner for VMIDs MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit We don't need the last VM use any more, keep the owner directly. Signed-off-by: Christian König Reviewed-by: Chunming Zhou --- drivers/gpu/drm/amd/amdgpu/amdgpu.h | 8 ++++--- drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c | 33 +++++++++++++++----------- 2 files changed, 24 insertions(+), 17 deletions(-) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu.h b/drivers/gpu/drm/amd/amdgpu/amdgpu.h index beb74854a8a3..a5692624070a 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu.h @@ -925,8 +925,6 @@ struct amdgpu_vm_id { uint64_t pd_gpu_addr; /* last flushed PD/PT update */ struct fence *flushed_updates; - /* last use of vmid */ - struct fence *last_id_use; }; struct amdgpu_vm { @@ -959,7 +957,11 @@ struct amdgpu_vm { }; struct amdgpu_vm_manager { - struct fence *active[AMDGPU_NUM_VM]; + struct { + struct fence *active; + atomic_long_t owner; + } ids[AMDGPU_NUM_VM]; + uint32_t max_pfn; /* number of VMIDs */ unsigned nvm; diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c index 6bb209bc0d36..0bdbb2480f9b 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c @@ -143,10 +143,15 @@ int amdgpu_vm_grab_id(struct amdgpu_vm *vm, struct amdgpu_ring *ring, unsigned i; /* check if the id is still valid */ - if (vm_id->id && vm_id->last_id_use && - vm_id->last_id_use == adev->vm_manager.active[vm_id->id]) { - trace_amdgpu_vm_grab_id(vm_id->id, ring->idx); - return 0; + if (vm_id->id) { + unsigned id = vm_id->id; + long owner; + + owner = atomic_long_read(&adev->vm_manager.ids[id].owner); + if (owner == (long)vm) { + trace_amdgpu_vm_grab_id(vm_id->id, ring->idx); + return 0; + } } /* we definately need to flush */ @@ -154,7 +159,7 @@ int amdgpu_vm_grab_id(struct amdgpu_vm *vm, struct amdgpu_ring *ring, /* skip over VMID 0, since it is the system VM */ for (i = 1; i < adev->vm_manager.nvm; ++i) { - struct fence *fence = adev->vm_manager.active[i]; + struct fence *fence = adev->vm_manager.ids[i].active; struct amdgpu_ring *fring; if (fence == NULL) { @@ -176,7 +181,7 @@ int amdgpu_vm_grab_id(struct amdgpu_vm *vm, struct amdgpu_ring *ring, if (choices[i]) { struct fence *fence; - fence = adev->vm_manager.active[choices[i]]; + fence = adev->vm_manager.ids[choices[i]].active; vm_id->id = choices[i]; trace_amdgpu_vm_grab_id(choices[i], ring->idx); @@ -246,11 +251,9 @@ void amdgpu_vm_fence(struct amdgpu_device *adev, struct amdgpu_ring *ring = amdgpu_ring_from_fence(fence); unsigned vm_id = vm->ids[ring->idx].id; - fence_put(adev->vm_manager.active[vm_id]); - adev->vm_manager.active[vm_id] = fence_get(fence); - - fence_put(vm->ids[ring->idx].last_id_use); - vm->ids[ring->idx].last_id_use = fence_get(fence); + fence_put(adev->vm_manager.ids[vm_id].active); + adev->vm_manager.ids[vm_id].active = fence_get(fence); + atomic_long_set(&adev->vm_manager.ids[vm_id].owner, (long)vm); } /** @@ -1238,7 +1241,6 @@ int amdgpu_vm_init(struct amdgpu_device *adev, struct amdgpu_vm *vm) for (i = 0; i < AMDGPU_MAX_RINGS; ++i) { vm->ids[i].id = 0; vm->ids[i].flushed_updates = NULL; - vm->ids[i].last_id_use = NULL; } mutex_init(&vm->mutex); vm->va = RB_ROOT; @@ -1312,8 +1314,11 @@ void amdgpu_vm_fini(struct amdgpu_device *adev, struct amdgpu_vm *vm) fence_put(vm->page_directory_fence); for (i = 0; i < AMDGPU_MAX_RINGS; ++i) { + unsigned id = vm->ids[i].id; + + atomic_long_cmpxchg(&adev->vm_manager.ids[id].owner, + (long)vm, 0); fence_put(vm->ids[i].flushed_updates); - fence_put(vm->ids[i].last_id_use); } mutex_destroy(&vm->mutex); @@ -1331,5 +1336,5 @@ void amdgpu_vm_manager_fini(struct amdgpu_device *adev) unsigned i; for (i = 0; i < AMDGPU_NUM_VM; ++i) - fence_put(adev->vm_manager.active[i]); + fence_put(adev->vm_manager.ids[i].active); } From c25867dfabf045a8148fd179fa759bb17f670e42 Mon Sep 17 00:00:00 2001 From: Chunming Zhou Date: Fri, 13 Nov 2015 13:32:01 +0800 Subject: [PATCH 167/291] drm/amdgpu: add lock for interval tree in vm MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Change-Id: I62b892a22af37b32e6b4aefca80a25cf45426ed2 Signed-off-by: Chunming Zhou Reviewed-by: Christian König --- drivers/gpu/drm/amd/amdgpu/amdgpu.h | 2 ++ drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c | 15 +++++++++++++-- 2 files changed, 15 insertions(+), 2 deletions(-) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu.h b/drivers/gpu/drm/amd/amdgpu/amdgpu.h index a5692624070a..306f75700bf8 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu.h @@ -954,6 +954,8 @@ struct amdgpu_vm { /* for id and flush management per ring */ struct amdgpu_vm_id ids[AMDGPU_MAX_RINGS]; + /* for interval tree */ + spinlock_t it_lock; }; struct amdgpu_vm_manager { diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c index 0bdbb2480f9b..0513f3fed2c2 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c @@ -1028,7 +1028,9 @@ int amdgpu_vm_bo_map(struct amdgpu_device *adev, saddr /= AMDGPU_GPU_PAGE_SIZE; eaddr /= AMDGPU_GPU_PAGE_SIZE; + spin_lock(&vm->it_lock); it = interval_tree_iter_first(&vm->va, saddr, eaddr - 1); + spin_unlock(&vm->it_lock); if (it) { struct amdgpu_bo_va_mapping *tmp; tmp = container_of(it, struct amdgpu_bo_va_mapping, it); @@ -1055,7 +1057,9 @@ int amdgpu_vm_bo_map(struct amdgpu_device *adev, mapping->flags = flags; list_add(&mapping->list, &bo_va->invalids); + spin_lock(&vm->it_lock); interval_tree_insert(&mapping->it, &vm->va); + spin_unlock(&vm->it_lock); trace_amdgpu_vm_bo_map(bo_va, mapping); /* Make sure the page tables are allocated */ @@ -1101,7 +1105,9 @@ int amdgpu_vm_bo_map(struct amdgpu_device *adev, error_free: list_del(&mapping->list); + spin_lock(&vm->it_lock); interval_tree_remove(&mapping->it, &vm->va); + spin_unlock(&vm->it_lock); trace_amdgpu_vm_bo_unmap(bo_va, mapping); kfree(mapping); @@ -1151,7 +1157,9 @@ int amdgpu_vm_bo_unmap(struct amdgpu_device *adev, } list_del(&mapping->list); + spin_lock(&vm->it_lock); interval_tree_remove(&mapping->it, &vm->va); + spin_unlock(&vm->it_lock); trace_amdgpu_vm_bo_unmap(bo_va, mapping); if (valid) @@ -1187,13 +1195,17 @@ void amdgpu_vm_bo_rmv(struct amdgpu_device *adev, list_for_each_entry_safe(mapping, next, &bo_va->valids, list) { list_del(&mapping->list); + spin_lock(&vm->it_lock); interval_tree_remove(&mapping->it, &vm->va); + spin_unlock(&vm->it_lock); trace_amdgpu_vm_bo_unmap(bo_va, mapping); list_add(&mapping->list, &vm->freed); } list_for_each_entry_safe(mapping, next, &bo_va->invalids, list) { list_del(&mapping->list); + spin_lock(&vm->it_lock); interval_tree_remove(&mapping->it, &vm->va); + spin_unlock(&vm->it_lock); kfree(mapping); } @@ -1248,7 +1260,7 @@ int amdgpu_vm_init(struct amdgpu_device *adev, struct amdgpu_vm *vm) INIT_LIST_HEAD(&vm->invalidated); INIT_LIST_HEAD(&vm->cleared); INIT_LIST_HEAD(&vm->freed); - + spin_lock_init(&vm->it_lock); pd_size = amdgpu_vm_directory_size(adev); pd_entries = amdgpu_vm_num_pdes(adev); @@ -1312,7 +1324,6 @@ void amdgpu_vm_fini(struct amdgpu_device *adev, struct amdgpu_vm *vm) amdgpu_bo_unref(&vm->page_directory); fence_put(vm->page_directory_fence); - for (i = 0; i < AMDGPU_MAX_RINGS; ++i) { unsigned id = vm->ids[i].id; From ef9f0a83d68cecca241ee5d61b797d598722772e Mon Sep 17 00:00:00 2001 From: Chunming Zhou Date: Fri, 13 Nov 2015 13:43:22 +0800 Subject: [PATCH 168/291] drm/amdgpu: move bo_reserve out of amdgpu_vm_clear_bo MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Change-Id: Ifbb0c06680494bfa04d0be5e5941d31ae2e5ef28 Signed-off-by: Chunming Zhou Reviewed-by: Christian König --- drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c | 33 ++++++++++++++------------ 1 file changed, 18 insertions(+), 15 deletions(-) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c index 0513f3fed2c2..e6dc19bc2dd8 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c @@ -332,6 +332,8 @@ int amdgpu_vm_free_job(struct amdgpu_job *job) * * @adev: amdgpu_device pointer * @bo: bo to clear + * + * need to reserve bo first before calling it. */ static int amdgpu_vm_clear_bo(struct amdgpu_device *adev, struct amdgpu_bo *bo) @@ -343,24 +345,20 @@ static int amdgpu_vm_clear_bo(struct amdgpu_device *adev, uint64_t addr; int r; - r = amdgpu_bo_reserve(bo, false); - if (r) - return r; - r = reservation_object_reserve_shared(bo->tbo.resv); if (r) return r; r = ttm_bo_validate(&bo->tbo, &bo->placement, true, false); if (r) - goto error_unreserve; + goto error; addr = amdgpu_bo_gpu_offset(bo); entries = amdgpu_bo_size(bo) / 8; ib = kzalloc(sizeof(struct amdgpu_ib), GFP_KERNEL); if (!ib) - goto error_unreserve; + goto error; r = amdgpu_ib_get(ring, NULL, entries * 2 + 64, ib); if (r) @@ -378,16 +376,14 @@ static int amdgpu_vm_clear_bo(struct amdgpu_device *adev, if (!r) amdgpu_bo_fence(bo, fence, true); fence_put(fence); - if (amdgpu_enable_scheduler) { - amdgpu_bo_unreserve(bo); + if (amdgpu_enable_scheduler) return 0; - } + error_free: amdgpu_ib_free(adev, ib); kfree(ib); -error_unreserve: - amdgpu_bo_unreserve(bo); +error: return r; } @@ -1087,11 +1083,12 @@ int amdgpu_vm_bo_map(struct amdgpu_device *adev, AMDGPU_GEM_DOMAIN_VRAM, AMDGPU_GEM_CREATE_NO_CPU_ACCESS, NULL, resv, &pt); - ww_mutex_unlock(&resv->lock); - if (r) + if (r) { + ww_mutex_unlock(&resv->lock); goto error_free; - + } r = amdgpu_vm_clear_bo(adev, pt); + ww_mutex_unlock(&resv->lock); if (r) { amdgpu_bo_unref(&pt); goto error_free; @@ -1280,8 +1277,14 @@ int amdgpu_vm_init(struct amdgpu_device *adev, struct amdgpu_vm *vm) NULL, NULL, &vm->page_directory); if (r) return r; - + r = amdgpu_bo_reserve(vm->page_directory, false); + if (r) { + amdgpu_bo_unref(&vm->page_directory); + vm->page_directory = NULL; + return r; + } r = amdgpu_vm_clear_bo(adev, vm->page_directory); + amdgpu_bo_unreserve(vm->page_directory); if (r) { amdgpu_bo_unref(&vm->page_directory); vm->page_directory = NULL; From 49b02b180a541d6fb31031aaffe75496e9238942 Mon Sep 17 00:00:00 2001 From: Chunming Zhou Date: Fri, 13 Nov 2015 14:18:38 +0800 Subject: [PATCH 169/291] drm/amdgpu: reserve/unreserve objects out of map/unmap operations MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Change-Id: Id6514f2fb6e002437fdbe99353d5d35f4ac736c7 Signed-off-by: Chunming Zhou Reviewed-by: Christian König --- drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c | 21 +++++++++++++++--- drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c | 29 ++++++------------------- 2 files changed, 25 insertions(+), 25 deletions(-) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c index 16dca46314bd..00c5b580f56c 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c @@ -515,6 +515,9 @@ int amdgpu_gem_va_ioctl(struct drm_device *dev, void *data, struct amdgpu_fpriv *fpriv = filp->driver_priv; struct amdgpu_bo *rbo; struct amdgpu_bo_va *bo_va; + struct ttm_validate_buffer tv, tv_pd; + struct ww_acquire_ctx ticket; + struct list_head list, duplicates; uint32_t invalid_flags, va_flags = 0; int r = 0; @@ -552,7 +555,18 @@ int amdgpu_gem_va_ioctl(struct drm_device *dev, void *data, return -ENOENT; mutex_lock(&fpriv->vm.mutex); rbo = gem_to_amdgpu_bo(gobj); - r = amdgpu_bo_reserve(rbo, false); + INIT_LIST_HEAD(&list); + INIT_LIST_HEAD(&duplicates); + tv.bo = &rbo->tbo; + tv.shared = true; + list_add(&tv.head, &list); + + if (args->operation == AMDGPU_VA_OP_MAP) { + tv_pd.bo = &fpriv->vm.page_directory->tbo; + tv_pd.shared = true; + list_add(&tv_pd.head, &list); + } + r = ttm_eu_reserve_buffers(&ticket, &list, true, &duplicates); if (r) { mutex_unlock(&fpriv->vm.mutex); drm_gem_object_unreference_unlocked(gobj); @@ -561,7 +575,8 @@ int amdgpu_gem_va_ioctl(struct drm_device *dev, void *data, bo_va = amdgpu_vm_bo_find(&fpriv->vm, rbo); if (!bo_va) { - amdgpu_bo_unreserve(rbo); + ttm_eu_backoff_reservation(&ticket, &list); + drm_gem_object_unreference_unlocked(gobj); mutex_unlock(&fpriv->vm.mutex); return -ENOENT; } @@ -584,7 +599,7 @@ int amdgpu_gem_va_ioctl(struct drm_device *dev, void *data, default: break; } - + ttm_eu_backoff_reservation(&ticket, &list); if (!r && !(args->flags & AMDGPU_VM_DELAY_UPDATE)) amdgpu_gem_va_update_vm(adev, bo_va, args->operation); mutex_unlock(&fpriv->vm.mutex); diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c index e6dc19bc2dd8..159ce54bbd8d 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c @@ -985,7 +985,7 @@ struct amdgpu_bo_va *amdgpu_vm_bo_add(struct amdgpu_device *adev, * Add a mapping of the BO at the specefied addr into the VM. * Returns 0 for success, error for failure. * - * Object has to be reserved and gets unreserved by this function! + * Object has to be reserved and unreserved outside! */ int amdgpu_vm_bo_map(struct amdgpu_device *adev, struct amdgpu_bo_va *bo_va, @@ -1001,23 +1001,18 @@ int amdgpu_vm_bo_map(struct amdgpu_device *adev, /* validate the parameters */ if (saddr & AMDGPU_GPU_PAGE_MASK || offset & AMDGPU_GPU_PAGE_MASK || - size == 0 || size & AMDGPU_GPU_PAGE_MASK) { - amdgpu_bo_unreserve(bo_va->bo); + size == 0 || size & AMDGPU_GPU_PAGE_MASK) return -EINVAL; - } /* make sure object fit at this offset */ eaddr = saddr + size; - if ((saddr >= eaddr) || (offset + size > amdgpu_bo_size(bo_va->bo))) { - amdgpu_bo_unreserve(bo_va->bo); + if ((saddr >= eaddr) || (offset + size > amdgpu_bo_size(bo_va->bo))) return -EINVAL; - } last_pfn = eaddr / AMDGPU_GPU_PAGE_SIZE; if (last_pfn > adev->vm_manager.max_pfn) { dev_err(adev->dev, "va above limit (0x%08X > 0x%08X)\n", last_pfn, adev->vm_manager.max_pfn); - amdgpu_bo_unreserve(bo_va->bo); return -EINVAL; } @@ -1034,14 +1029,12 @@ int amdgpu_vm_bo_map(struct amdgpu_device *adev, dev_err(adev->dev, "bo %p va 0x%010Lx-0x%010Lx conflict with " "0x%010lx-0x%010lx\n", bo_va->bo, saddr, eaddr, tmp->it.start, tmp->it.last + 1); - amdgpu_bo_unreserve(bo_va->bo); r = -EINVAL; goto error; } mapping = kmalloc(sizeof(*mapping), GFP_KERNEL); if (!mapping) { - amdgpu_bo_unreserve(bo_va->bo); r = -ENOMEM; goto error; } @@ -1067,8 +1060,6 @@ int amdgpu_vm_bo_map(struct amdgpu_device *adev, if (eaddr > vm->max_pde_used) vm->max_pde_used = eaddr; - amdgpu_bo_unreserve(bo_va->bo); - /* walk over the address space and allocate the page tables */ for (pt_idx = saddr; pt_idx <= eaddr; ++pt_idx) { struct reservation_object *resv = vm->page_directory->tbo.resv; @@ -1077,18 +1068,15 @@ int amdgpu_vm_bo_map(struct amdgpu_device *adev, if (vm->page_tables[pt_idx].bo) continue; - ww_mutex_lock(&resv->lock, NULL); r = amdgpu_bo_create(adev, AMDGPU_VM_PTE_COUNT * 8, AMDGPU_GPU_PAGE_SIZE, true, AMDGPU_GEM_DOMAIN_VRAM, AMDGPU_GEM_CREATE_NO_CPU_ACCESS, NULL, resv, &pt); - if (r) { - ww_mutex_unlock(&resv->lock); + if (r) goto error_free; - } + r = amdgpu_vm_clear_bo(adev, pt); - ww_mutex_unlock(&resv->lock); if (r) { amdgpu_bo_unref(&pt); goto error_free; @@ -1122,7 +1110,7 @@ error: * Remove a mapping of the BO at the specefied addr from the VM. * Returns 0 for success, error for failure. * - * Object has to be reserved and gets unreserved by this function! + * Object has to be reserved and unreserved outside! */ int amdgpu_vm_bo_unmap(struct amdgpu_device *adev, struct amdgpu_bo_va *bo_va, @@ -1147,10 +1135,8 @@ int amdgpu_vm_bo_unmap(struct amdgpu_device *adev, break; } - if (&mapping->list == &bo_va->invalids) { - amdgpu_bo_unreserve(bo_va->bo); + if (&mapping->list == &bo_va->invalids) return -ENOENT; - } } list_del(&mapping->list); @@ -1163,7 +1149,6 @@ int amdgpu_vm_bo_unmap(struct amdgpu_device *adev, list_add(&mapping->list, &vm->freed); else kfree(mapping); - amdgpu_bo_unreserve(bo_va->bo); return 0; } From a5964396190d0c40dd549c23848c282fffa5d1f2 Mon Sep 17 00:00:00 2001 From: Rajmohan Mani Date: Wed, 18 Nov 2015 10:48:20 +0200 Subject: [PATCH 170/291] xhci: Workaround to get Intel xHCI reset working more reliably Existing Intel xHCI controllers require a delay of 1 mS, after setting the CMD_RESET bit in command register, before accessing any HC registers. This allows the HC to complete the reset operation and be ready for HC register access. Without this delay, the subsequent HC register access, may result in a system hang, very rarely. Verified CherryView / Braswell platforms go through over 5000 warm reboot cycles (which was not possible without this patch), without any xHCI reset hang. Signed-off-by: Rajmohan Mani Tested-by: Joe Lawrence Cc: stable Signed-off-by: Mathias Nyman Signed-off-by: Greg Kroah-Hartman --- drivers/usb/host/xhci.c | 10 ++++++++++ 1 file changed, 10 insertions(+) diff --git a/drivers/usb/host/xhci.c b/drivers/usb/host/xhci.c index 6e7dc6f93978..dfa44d3e8eee 100644 --- a/drivers/usb/host/xhci.c +++ b/drivers/usb/host/xhci.c @@ -175,6 +175,16 @@ int xhci_reset(struct xhci_hcd *xhci) command |= CMD_RESET; writel(command, &xhci->op_regs->command); + /* Existing Intel xHCI controllers require a delay of 1 mS, + * after setting the CMD_RESET bit, and before accessing any + * HC registers. This allows the HC to complete the + * reset operation and be ready for HC register access. + * Without this delay, the subsequent HC register access, + * may result in a system hang very rarely. + */ + if (xhci->quirks & XHCI_INTEL_HOST) + udelay(1000); + ret = xhci_handshake(&xhci->op_regs->command, CMD_RESET, 0, 10 * 1000 * 1000); if (ret) From 42df7215facf27be8d53e657dd4a12d4ebad0a44 Mon Sep 17 00:00:00 2001 From: Lu Baolu Date: Wed, 18 Nov 2015 10:48:21 +0200 Subject: [PATCH 171/291] usb: xhci: fix checking ep busy for CFC Function ep_ring_is_processing() checks the dequeue pointer in endpoint context to know whether an endpoint is busy with processing TRBs. This is not correct since dequeue pointer field in an endpoint context is only valid when the endpoint is in Halted or Stopped states. This buggy code causes audio noise when playing sound with USB headset connected to host controllers which support CFC (one of xhci 1.1 features). This patch should exist in stable kernel since v4.3. Reported-and-tested-by: YD Tseng Signed-off-by: Lu Baolu Cc: stable # v4.3 Signed-off-by: Mathias Nyman Signed-off-by: Greg Kroah-Hartman --- drivers/usb/host/xhci-ring.c | 32 ++++++-------------------------- 1 file changed, 6 insertions(+), 26 deletions(-) diff --git a/drivers/usb/host/xhci-ring.c b/drivers/usb/host/xhci-ring.c index fa836251ca21..6c5e8133cf87 100644 --- a/drivers/usb/host/xhci-ring.c +++ b/drivers/usb/host/xhci-ring.c @@ -3896,28 +3896,6 @@ cleanup: return ret; } -static int ep_ring_is_processing(struct xhci_hcd *xhci, - int slot_id, unsigned int ep_index) -{ - struct xhci_virt_device *xdev; - struct xhci_ring *ep_ring; - struct xhci_ep_ctx *ep_ctx; - struct xhci_virt_ep *xep; - dma_addr_t hw_deq; - - xdev = xhci->devs[slot_id]; - xep = &xhci->devs[slot_id]->eps[ep_index]; - ep_ring = xep->ring; - ep_ctx = xhci_get_ep_ctx(xhci, xdev->out_ctx, ep_index); - - if ((le32_to_cpu(ep_ctx->ep_info) & EP_STATE_MASK) != EP_STATE_RUNNING) - return 0; - - hw_deq = le64_to_cpu(ep_ctx->deq) & ~EP_CTX_CYCLE_MASK; - return (hw_deq != - xhci_trb_virt_to_dma(ep_ring->enq_seg, ep_ring->enqueue)); -} - /* * Check transfer ring to guarantee there is enough room for the urb. * Update ISO URB start_frame and interval. @@ -3983,10 +3961,12 @@ int xhci_queue_isoc_tx_prepare(struct xhci_hcd *xhci, gfp_t mem_flags, } /* Calculate the start frame and put it in urb->start_frame. */ - if (HCC_CFC(xhci->hcc_params) && - ep_ring_is_processing(xhci, slot_id, ep_index)) { - urb->start_frame = xep->next_frame_id; - goto skip_start_over; + if (HCC_CFC(xhci->hcc_params) && !list_empty(&ep_ring->td_list)) { + if ((le32_to_cpu(ep_ctx->ep_info) & EP_STATE_MASK) == + EP_STATE_RUNNING) { + urb->start_frame = xep->next_frame_id; + goto skip_start_over; + } } start_frame = readl(&xhci->run_regs->microframe_index); From dad67d5f3d0efe01d38c6cebcb6698280e51927b Mon Sep 17 00:00:00 2001 From: Mathias Nyman Date: Wed, 18 Nov 2015 10:48:22 +0200 Subject: [PATCH 172/291] xhci: Fix a race in usb2 LPM resume, blocking U3 for usb2 devices Clear device initiated resume variables once device is fully up and running in U0 state. Resume needs to be signaled for 20ms for usb2 devices before they can be moved to U0 state. An interrupt is triggered if a device initiates resume. As we handle the event in interrupt context we can not sleep for 20ms, so we instead set a resume flag, a timestamp, and start the roothub polling. The roothub code will later move the port to U0 when it finds a port in resume state with the resume flag set, and timestamp passed by 20ms. A host initiated resume is however not done in interrupt context, and host initiated resume code will directly signal resume, wait 20ms and then move the port to U0. These two codepaths can race, if we are in the middle of a host initated resume, while sleeping for 20ms, we may handle a port event and find the port in resume state. The port event handling code will assume the resume was device initiated and set the resume flag and timestamp. Root hub code will however not catch the port in resume state again as the host initated resume code has already moved the port to U0. The resume flag and timestamp will remain set for this port preventing port from suspending again (LPM setting port to U3) Fix this for now by always clearing the device initated resume parameters once port is in U0 Cc: stable Signed-off-by: Mathias Nyman Signed-off-by: Greg Kroah-Hartman --- drivers/usb/host/xhci-hub.c | 15 +++++++++------ 1 file changed, 9 insertions(+), 6 deletions(-) diff --git a/drivers/usb/host/xhci-hub.c b/drivers/usb/host/xhci-hub.c index 5d2d7e954bd4..0230965fb78c 100644 --- a/drivers/usb/host/xhci-hub.c +++ b/drivers/usb/host/xhci-hub.c @@ -782,12 +782,15 @@ static u32 xhci_get_port_status(struct usb_hcd *hcd, status |= USB_PORT_STAT_SUSPEND; } } - if ((raw_port_status & PORT_PLS_MASK) == XDEV_U0 - && (raw_port_status & PORT_POWER) - && (bus_state->suspended_ports & (1 << wIndex))) { - bus_state->suspended_ports &= ~(1 << wIndex); - if (hcd->speed < HCD_USB3) - bus_state->port_c_suspend |= 1 << wIndex; + if ((raw_port_status & PORT_PLS_MASK) == XDEV_U0 && + (raw_port_status & PORT_POWER)) { + if (bus_state->suspended_ports & (1 << wIndex)) { + bus_state->suspended_ports &= ~(1 << wIndex); + if (hcd->speed < HCD_USB3) + bus_state->port_c_suspend |= 1 << wIndex; + } + bus_state->resume_done[wIndex] = 0; + clear_bit(wIndex, &bus_state->resuming_ports); } if (raw_port_status & PORT_CONNECT) { status |= USB_PORT_STAT_CONNECTION; From c139aa60c1007429335131167a0ca181e38c5668 Mon Sep 17 00:00:00 2001 From: Will Deacon Date: Wed, 18 Nov 2015 10:13:08 +0000 Subject: [PATCH 173/291] arm64: barriers: fix smp_load_acquire to work with const arguments A newly introduced function in include/net/sock.h passes a const argument to smp_load_acquire: static inline int sk_state_load(const struct sock *sk) { return smp_load_acquire(&sk->sk_state); } This cause an allmodconfig build failure, since our underlying load-acquire implementation does not handle const types correctly: include/net/sock.h: In function 'sk_state_load': ./arch/arm64/include/asm/barrier.h:71:3: error: read-only variable '___p1' used as 'asm' output asm volatile ("ldarb %w0, %1" \ This patch fixes the problem by reusing the trick in READ_ONCE that loads via a non-const member of an anonymous union. This has the advantage of allowing us to use smp_load_acquire on packed structures (e.g. arch_spinlock_t) as well as primitive types. Cc: Arnd Bergmann Cc: David Daney Cc: Eric Dumazet Reported-by: Arnd Bergmann Reported-by: David Daney Signed-off-by: Will Deacon Signed-off-by: Catalin Marinas --- arch/arm64/include/asm/barrier.h | 16 ++++++++++------ 1 file changed, 10 insertions(+), 6 deletions(-) diff --git a/arch/arm64/include/asm/barrier.h b/arch/arm64/include/asm/barrier.h index 624f9679f4b0..9622eb48f894 100644 --- a/arch/arm64/include/asm/barrier.h +++ b/arch/arm64/include/asm/barrier.h @@ -64,27 +64,31 @@ do { \ #define smp_load_acquire(p) \ ({ \ - typeof(*p) ___p1; \ + union { typeof(*p) __val; char __c[1]; } __u; \ compiletime_assert_atomic_type(*p); \ switch (sizeof(*p)) { \ case 1: \ asm volatile ("ldarb %w0, %1" \ - : "=r" (___p1) : "Q" (*p) : "memory"); \ + : "=r" (*(__u8 *)__u.__c) \ + : "Q" (*p) : "memory"); \ break; \ case 2: \ asm volatile ("ldarh %w0, %1" \ - : "=r" (___p1) : "Q" (*p) : "memory"); \ + : "=r" (*(__u16 *)__u.__c) \ + : "Q" (*p) : "memory"); \ break; \ case 4: \ asm volatile ("ldar %w0, %1" \ - : "=r" (___p1) : "Q" (*p) : "memory"); \ + : "=r" (*(__u32 *)__u.__c) \ + : "Q" (*p) : "memory"); \ break; \ case 8: \ asm volatile ("ldar %0, %1" \ - : "=r" (___p1) : "Q" (*p) : "memory"); \ + : "=r" (*(__u64 *)__u.__c) \ + : "Q" (*p) : "memory"); \ break; \ } \ - ___p1; \ + __u.__val; \ }) #define read_barrier_depends() do { } while(0) From 1ddaa021b000220b5f2ad023e4f15ed44990974b Mon Sep 17 00:00:00 2001 From: Kamal Dasu Date: Tue, 17 Nov 2015 13:50:08 -0500 Subject: [PATCH 174/291] MAINTAINERS: brcmnand: Add co-maintainer for Broadcom SoCs Adding myself as co-maintainer of nand controller driver for the Broadcom SoCs. Signed-off-by: Kamal Dasu Acked-by: Florian Fainelli Signed-off-by: Brian Norris --- MAINTAINERS | 1 + 1 file changed, 1 insertion(+) diff --git a/MAINTAINERS b/MAINTAINERS index da691aad7031..d20d6f3d6509 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -2449,6 +2449,7 @@ F: drivers/firmware/broadcom/* BROADCOM STB NAND FLASH DRIVER M: Brian Norris +M: Kamal Dasu L: linux-mtd@lists.infradead.org L: bcm-kernel-feedback-list@broadcom.com S: Maintained From b57f9f34e27bf81c97b10d6725d71824e448c37e Mon Sep 17 00:00:00 2001 From: Glen Lee Date: Thu, 5 Nov 2015 18:51:23 +0900 Subject: [PATCH 175/291] Revert "Staging: wilc1000: coreconfigurator: Drop unneeded wrapper functions" The source and destination pointers are misplaced. This will be like, ether_addr_copy(data, bssid + ADDR2); -> ether_addr_copy(bssid, data + ADDR2); and also to use ether_addr_copy, it has to be proved that src/dst address are properly aligned(2). I revert this as author agree to drop this patch. This reverts commit d4622f68db8095dd54179e3134e97812727f6b89. Signed-off-by: Glen Lee Signed-off-by: Greg Kroah-Hartman --- drivers/staging/wilc1000/coreconfigurator.c | 48 +++++++++++---------- 1 file changed, 25 insertions(+), 23 deletions(-) diff --git a/drivers/staging/wilc1000/coreconfigurator.c b/drivers/staging/wilc1000/coreconfigurator.c index e10c6ffa698a..9568bdb6319b 100644 --- a/drivers/staging/wilc1000/coreconfigurator.c +++ b/drivers/staging/wilc1000/coreconfigurator.c @@ -13,12 +13,8 @@ #include "wilc_wlan.h" #include #include -#include #define TAG_PARAM_OFFSET (MAC_HDR_LEN + TIME_STAMP_LEN + \ BEACON_INTERVAL_LEN + CAP_INFO_LEN) -#define ADDR1 4 -#define ADDR2 10 -#define ADDR3 16 /* Basic Frame Type Codes (2-bit) */ enum basic_frame_type { @@ -175,32 +171,38 @@ static inline u8 get_from_ds(u8 *header) return ((header[1] & 0x02) >> 1); } +/* This function extracts the MAC Address in 'address1' field of the MAC */ +/* header and updates the MAC Address in the allocated 'addr' variable. */ +static inline void get_address1(u8 *pu8msa, u8 *addr) +{ + memcpy(addr, pu8msa + 4, 6); +} + +/* This function extracts the MAC Address in 'address2' field of the MAC */ +/* header and updates the MAC Address in the allocated 'addr' variable. */ +static inline void get_address2(u8 *pu8msa, u8 *addr) +{ + memcpy(addr, pu8msa + 10, 6); +} + +/* This function extracts the MAC Address in 'address3' field of the MAC */ +/* header and updates the MAC Address in the allocated 'addr' variable. */ +static inline void get_address3(u8 *pu8msa, u8 *addr) +{ + memcpy(addr, pu8msa + 16, 6); +} + /* This function extracts the BSSID from the incoming WLAN packet based on */ -/* the 'from ds' bit, and updates the MAC Address in the allocated 'data' */ +/* the 'from ds' bit, and updates the MAC Address in the allocated 'addr' */ /* variable. */ static inline void get_BSSID(u8 *data, u8 *bssid) { if (get_from_ds(data) == 1) - /* - * Extract the MAC Address in 'address2' field of the MAC - * header and update the MAC Address in the allocated 'data' - * variable. - */ - ether_addr_copy(data, bssid + ADDR2); + get_address2(data, bssid); else if (get_to_ds(data) == 1) - /* - * Extract the MAC Address in 'address1' field of the MAC - * header and update the MAC Address in the allocated 'data' - * variable. - */ - ether_addr_copy(data, bssid + ADDR1); + get_address1(data, bssid); else - /* - * Extract the MAC Address in 'address3' field of the MAC - * header and update the MAC Address in the allocated 'data' - * variable. - */ - ether_addr_copy(data, bssid + ADDR3); + get_address3(data, bssid); } /* This function extracts the SSID from a beacon/probe response frame */ From 799281a3c481a738801bf17e3079a6f91df56cd3 Mon Sep 17 00:00:00 2001 From: "Rafael J. Wysocki" Date: Wed, 18 Nov 2015 23:29:56 +0100 Subject: [PATCH 176/291] Revert "cpufreq: intel_pstate: Avoid calculation for max/min" Revert commit 4ef451487019 (cpufreq: intel_pstate: Avoid calculation for max/min) as it depends on commit 37afb0003242 (cpufreq: intel_pstate: Use ACPI perf configuration) that causes problems to happen and needs to be reverted. Conflicts: drivers/cpufreq/intel_pstate.c Signed-off-by: Rafael J. Wysocki --- drivers/cpufreq/intel_pstate.c | 48 ++++------------------------------ 1 file changed, 5 insertions(+), 43 deletions(-) diff --git a/drivers/cpufreq/intel_pstate.c b/drivers/cpufreq/intel_pstate.c index 2e31d097def6..d444000ce148 100644 --- a/drivers/cpufreq/intel_pstate.c +++ b/drivers/cpufreq/intel_pstate.c @@ -163,8 +163,6 @@ struct perf_limits { int max_sysfs_pct; int min_policy_pct; int min_sysfs_pct; - int max_perf_ctl; - int min_perf_ctl; }; static struct perf_limits performance_limits = { @@ -191,8 +189,6 @@ static struct perf_limits powersave_limits = { .max_sysfs_pct = 100, .min_policy_pct = 0, .min_sysfs_pct = 0, - .max_perf_ctl = 0, - .min_perf_ctl = 0, }; #ifdef CONFIG_CPU_FREQ_DEFAULT_GOV_PERFORMANCE @@ -938,23 +934,12 @@ static void intel_pstate_get_min_max(struct cpudata *cpu, int *min, int *max) * policy, or by cpu specific default values determined through * experimentation. */ - if (limits->max_perf_ctl && limits->max_sysfs_pct >= - limits->max_policy_pct) { - *max = limits->max_perf_ctl; - } else { - max_perf_adj = fp_toint(mul_fp(int_tofp(max_perf), - limits->max_perf)); - *max = clamp_t(int, max_perf_adj, cpu->pstate.min_pstate, - cpu->pstate.turbo_pstate); - } + max_perf_adj = fp_toint(mul_fp(int_tofp(max_perf), limits->max_perf)); + *max = clamp_t(int, max_perf_adj, + cpu->pstate.min_pstate, cpu->pstate.turbo_pstate); - if (limits->min_perf_ctl) { - *min = limits->min_perf_ctl; - } else { - min_perf = fp_toint(mul_fp(int_tofp(max_perf), - limits->min_perf)); - *min = clamp_t(int, min_perf, cpu->pstate.min_pstate, max_perf); - } + min_perf = fp_toint(mul_fp(int_tofp(max_perf), limits->min_perf)); + *min = clamp_t(int, min_perf, cpu->pstate.min_pstate, max_perf); } static void intel_pstate_set_pstate(struct cpudata *cpu, int pstate, bool force) @@ -1229,12 +1214,6 @@ static unsigned int intel_pstate_get(unsigned int cpu_num) static int intel_pstate_set_policy(struct cpufreq_policy *policy) { -#if IS_ENABLED(CONFIG_ACPI) - struct cpudata *cpu; - int i; -#endif - pr_debug("intel_pstate: %s max %u policy->max %u\n", __func__, - policy->cpuinfo.max_freq, policy->max); if (!policy->cpuinfo.max_freq) return -ENODEV; @@ -1270,23 +1249,6 @@ static int intel_pstate_set_policy(struct cpufreq_policy *policy) limits->max_perf = div_fp(int_tofp(limits->max_perf_pct), int_tofp(100)); -#if IS_ENABLED(CONFIG_ACPI) - cpu = all_cpu_data[policy->cpu]; - for (i = 0; i < cpu->acpi_perf_data.state_count; i++) { - int control; - - control = convert_to_native_pstate_format(cpu, i); - if (control * cpu->pstate.scaling == policy->max) - limits->max_perf_ctl = control; - if (control * cpu->pstate.scaling == policy->min) - limits->min_perf_ctl = control; - } - - pr_debug("intel_pstate: max %u policy_max %u perf_ctl [0x%x-0x%x]\n", - policy->cpuinfo.max_freq, policy->max, limits->min_perf_ctl, - limits->max_perf_ctl); -#endif - if (hwp_active) intel_pstate_hwp_set(); From 6ee11e413c495390dacabd96ad462eea9de9dfbd Mon Sep 17 00:00:00 2001 From: "Rafael J. Wysocki" Date: Thu, 19 Nov 2015 00:20:42 +0100 Subject: [PATCH 177/291] Revert "cpufreq: intel_pstate: Use ACPI perf configuration" Revert commit 37afb0003242 (cpufreq: intel_pstate: Use ACPI perf configuration) that is reported to cause a regression to happen on a system where invalid data are returned by the ACPI _PSS object. Since that commit makes assumptions regarding the _PSS output correctness that may turn out to be overly optimistic in general, there is a concern that it may introduce regression on more systems, so it's better to revert it now and we'll revisit the underlying issue in the next cycle with a more robust solution. Conflicts: drivers/cpufreq/intel_pstate.c Fixes: 37afb0003242 (cpufreq: intel_pstate: Use ACPI perf configuration) Reported-by: Borislav Petkov Signed-off-by: Rafael J. Wysocki --- drivers/cpufreq/Kconfig.x86 | 1 - drivers/cpufreq/intel_pstate.c | 171 +-------------------------------- 2 files changed, 1 insertion(+), 171 deletions(-) diff --git a/drivers/cpufreq/Kconfig.x86 b/drivers/cpufreq/Kconfig.x86 index adbd1de1cea5..c59bdcb83217 100644 --- a/drivers/cpufreq/Kconfig.x86 +++ b/drivers/cpufreq/Kconfig.x86 @@ -5,7 +5,6 @@ config X86_INTEL_PSTATE bool "Intel P state control" depends on X86 - select ACPI_PROCESSOR if ACPI help This driver provides a P state for Intel core processors. The driver implements an internal governor and will become diff --git a/drivers/cpufreq/intel_pstate.c b/drivers/cpufreq/intel_pstate.c index d444000ce148..ef05944d8b16 100644 --- a/drivers/cpufreq/intel_pstate.c +++ b/drivers/cpufreq/intel_pstate.c @@ -34,10 +34,6 @@ #include #include -#if IS_ENABLED(CONFIG_ACPI) -#include -#endif - #define BYT_RATIOS 0x66a #define BYT_VIDS 0x66b #define BYT_TURBO_RATIOS 0x66c @@ -117,9 +113,6 @@ struct cpudata { u64 prev_mperf; u64 prev_tsc; struct sample sample; -#if IS_ENABLED(CONFIG_ACPI) - struct acpi_processor_performance acpi_perf_data; -#endif }; static struct cpudata **all_cpu_data; @@ -150,7 +143,6 @@ struct cpu_defaults { static struct pstate_adjust_policy pid_params; static struct pstate_funcs pstate_funcs; static int hwp_active; -static int no_acpi_perf; struct perf_limits { int no_turbo; @@ -197,153 +189,6 @@ static struct perf_limits *limits = &performance_limits; static struct perf_limits *limits = &powersave_limits; #endif -#if IS_ENABLED(CONFIG_ACPI) -/* - * The max target pstate ratio is a 8 bit value in both PLATFORM_INFO MSR and - * in TURBO_RATIO_LIMIT MSR, which pstate driver stores in max_pstate and - * max_turbo_pstate fields. The PERF_CTL MSR contains 16 bit value for P state - * ratio, out of it only high 8 bits are used. For example 0x1700 is setting - * target ratio 0x17. The _PSS control value stores in a format which can be - * directly written to PERF_CTL MSR. But in intel_pstate driver this shift - * occurs during write to PERF_CTL (E.g. for cores core_set_pstate()). - * This function converts the _PSS control value to intel pstate driver format - * for comparison and assignment. - */ -static int convert_to_native_pstate_format(struct cpudata *cpu, int index) -{ - return cpu->acpi_perf_data.states[index].control >> 8; -} - -static int intel_pstate_init_perf_limits(struct cpufreq_policy *policy) -{ - struct cpudata *cpu; - int ret; - bool turbo_absent = false; - int max_pstate_index; - int min_pss_ctl, max_pss_ctl, turbo_pss_ctl; - int i; - - cpu = all_cpu_data[policy->cpu]; - - pr_debug("intel_pstate: default limits 0x%x 0x%x 0x%x\n", - cpu->pstate.min_pstate, cpu->pstate.max_pstate, - cpu->pstate.turbo_pstate); - - if (!cpu->acpi_perf_data.shared_cpu_map && - zalloc_cpumask_var_node(&cpu->acpi_perf_data.shared_cpu_map, - GFP_KERNEL, cpu_to_node(policy->cpu))) { - return -ENOMEM; - } - - ret = acpi_processor_register_performance(&cpu->acpi_perf_data, - policy->cpu); - if (ret) - return ret; - - /* - * Check if the control value in _PSS is for PERF_CTL MSR, which should - * guarantee that the states returned by it map to the states in our - * list directly. - */ - if (cpu->acpi_perf_data.control_register.space_id != - ACPI_ADR_SPACE_FIXED_HARDWARE) - return -EIO; - - pr_debug("intel_pstate: CPU%u - ACPI _PSS perf data\n", policy->cpu); - for (i = 0; i < cpu->acpi_perf_data.state_count; i++) - pr_debug(" %cP%d: %u MHz, %u mW, 0x%x\n", - (i == cpu->acpi_perf_data.state ? '*' : ' '), i, - (u32) cpu->acpi_perf_data.states[i].core_frequency, - (u32) cpu->acpi_perf_data.states[i].power, - (u32) cpu->acpi_perf_data.states[i].control); - - /* - * If there is only one entry _PSS, simply ignore _PSS and continue as - * usual without taking _PSS into account - */ - if (cpu->acpi_perf_data.state_count < 2) - return 0; - - turbo_pss_ctl = convert_to_native_pstate_format(cpu, 0); - min_pss_ctl = convert_to_native_pstate_format(cpu, - cpu->acpi_perf_data.state_count - 1); - /* Check if there is a turbo freq in _PSS */ - if (turbo_pss_ctl <= cpu->pstate.max_pstate && - turbo_pss_ctl > cpu->pstate.min_pstate) { - pr_debug("intel_pstate: no turbo range exists in _PSS\n"); - limits->no_turbo = limits->turbo_disabled = 1; - cpu->pstate.turbo_pstate = cpu->pstate.max_pstate; - turbo_absent = true; - } - - /* Check if the max non turbo p state < Intel P state max */ - max_pstate_index = turbo_absent ? 0 : 1; - max_pss_ctl = convert_to_native_pstate_format(cpu, max_pstate_index); - if (max_pss_ctl < cpu->pstate.max_pstate && - max_pss_ctl > cpu->pstate.min_pstate) - cpu->pstate.max_pstate = max_pss_ctl; - - /* check If min perf > Intel P State min */ - if (min_pss_ctl > cpu->pstate.min_pstate && - min_pss_ctl < cpu->pstate.max_pstate) { - cpu->pstate.min_pstate = min_pss_ctl; - policy->cpuinfo.min_freq = min_pss_ctl * cpu->pstate.scaling; - } - - if (turbo_absent) - policy->cpuinfo.max_freq = cpu->pstate.max_pstate * - cpu->pstate.scaling; - else { - policy->cpuinfo.max_freq = cpu->pstate.turbo_pstate * - cpu->pstate.scaling; - /* - * The _PSS table doesn't contain whole turbo frequency range. - * This just contains +1 MHZ above the max non turbo frequency, - * with control value corresponding to max turbo ratio. But - * when cpufreq set policy is called, it will call with this - * max frequency, which will cause a reduced performance as - * this driver uses real max turbo frequency as the max - * frequeny. So correct this frequency in _PSS table to - * correct max turbo frequency based on the turbo ratio. - * Also need to convert to MHz as _PSS freq is in MHz. - */ - cpu->acpi_perf_data.states[0].core_frequency = - turbo_pss_ctl * 100; - } - - pr_debug("intel_pstate: Updated limits using _PSS 0x%x 0x%x 0x%x\n", - cpu->pstate.min_pstate, cpu->pstate.max_pstate, - cpu->pstate.turbo_pstate); - pr_debug("intel_pstate: policy max_freq=%d Khz min_freq = %d KHz\n", - policy->cpuinfo.max_freq, policy->cpuinfo.min_freq); - - return 0; -} - -static int intel_pstate_exit_perf_limits(struct cpufreq_policy *policy) -{ - struct cpudata *cpu; - - if (!no_acpi_perf) - return 0; - - cpu = all_cpu_data[policy->cpu]; - acpi_processor_unregister_performance(policy->cpu); - return 0; -} - -#else -static int intel_pstate_init_perf_limits(struct cpufreq_policy *policy) -{ - return 0; -} - -static int intel_pstate_exit_perf_limits(struct cpufreq_policy *policy) -{ - return 0; -} -#endif - static inline void pid_reset(struct _pid *pid, int setpoint, int busy, int deadband, int integral) { pid->setpoint = setpoint; @@ -1303,30 +1148,18 @@ static int intel_pstate_cpu_init(struct cpufreq_policy *policy) policy->cpuinfo.min_freq = cpu->pstate.min_pstate * cpu->pstate.scaling; policy->cpuinfo.max_freq = cpu->pstate.turbo_pstate * cpu->pstate.scaling; - if (!no_acpi_perf) - intel_pstate_init_perf_limits(policy); - /* - * If there is no acpi perf data or error, we ignore and use Intel P - * state calculated limits, So this is not fatal error. - */ policy->cpuinfo.transition_latency = CPUFREQ_ETERNAL; cpumask_set_cpu(policy->cpu, policy->cpus); return 0; } -static int intel_pstate_cpu_exit(struct cpufreq_policy *policy) -{ - return intel_pstate_exit_perf_limits(policy); -} - static struct cpufreq_driver intel_pstate_driver = { .flags = CPUFREQ_CONST_LOOPS, .verify = intel_pstate_verify_policy, .setpolicy = intel_pstate_set_policy, .get = intel_pstate_get, .init = intel_pstate_cpu_init, - .exit = intel_pstate_cpu_exit, .stop_cpu = intel_pstate_stop_cpu, .name = "intel_pstate", }; @@ -1368,6 +1201,7 @@ static void copy_cpu_funcs(struct pstate_funcs *funcs) } #if IS_ENABLED(CONFIG_ACPI) +#include static bool intel_pstate_no_acpi_pss(void) { @@ -1563,9 +1397,6 @@ static int __init intel_pstate_setup(char *str) force_load = 1; if (!strcmp(str, "hwp_only")) hwp_only = 1; - if (!strcmp(str, "no_acpi")) - no_acpi_perf = 1; - return 0; } early_param("intel_pstate", intel_pstate_setup); From 938d21a2a6370241c86d515ca574aaaa9e8812f2 Mon Sep 17 00:00:00 2001 From: Philippe Longepe Date: Mon, 9 Nov 2015 17:40:46 -0800 Subject: [PATCH 178/291] cpufreq: intel_pstate: Replace BYT with ATOM Rename symbol and function names starting with "BYT" or "byt" to start with "ATOM" or "atom", respectively, so as to make it clear that they may apply to Atom in general and not just to Baytrail (the goal is to support several Atoms architectures eventually). This should not lead to any functional changes. Signed-off-by: Philippe Longepe Signed-off-by: Stephane Gasparini Acked-by: Srinivas Pandruvada [ rjw : Changelog ] Signed-off-by: Rafael J. Wysocki --- drivers/cpufreq/intel_pstate.c | 58 +++++++++++++++++----------------- 1 file changed, 29 insertions(+), 29 deletions(-) diff --git a/drivers/cpufreq/intel_pstate.c b/drivers/cpufreq/intel_pstate.c index ef05944d8b16..5f124e98f439 100644 --- a/drivers/cpufreq/intel_pstate.c +++ b/drivers/cpufreq/intel_pstate.c @@ -34,10 +34,10 @@ #include #include -#define BYT_RATIOS 0x66a -#define BYT_VIDS 0x66b -#define BYT_TURBO_RATIOS 0x66c -#define BYT_TURBO_VIDS 0x66d +#define ATOM_RATIOS 0x66a +#define ATOM_VIDS 0x66b +#define ATOM_TURBO_RATIOS 0x66c +#define ATOM_TURBO_VIDS 0x66d #define FRAC_BITS 8 #define int_tofp(X) ((int64_t)(X) << FRAC_BITS) @@ -528,31 +528,31 @@ static void intel_pstate_hwp_enable(struct cpudata *cpudata) wrmsrl_on_cpu(cpudata->cpu, MSR_PM_ENABLE, 0x1); } -static int byt_get_min_pstate(void) +static int atom_get_min_pstate(void) { u64 value; - rdmsrl(BYT_RATIOS, value); + rdmsrl(ATOM_RATIOS, value); return (value >> 8) & 0x7F; } -static int byt_get_max_pstate(void) +static int atom_get_max_pstate(void) { u64 value; - rdmsrl(BYT_RATIOS, value); + rdmsrl(ATOM_RATIOS, value); return (value >> 16) & 0x7F; } -static int byt_get_turbo_pstate(void) +static int atom_get_turbo_pstate(void) { u64 value; - rdmsrl(BYT_TURBO_RATIOS, value); + rdmsrl(ATOM_TURBO_RATIOS, value); return value & 0x7F; } -static void byt_set_pstate(struct cpudata *cpudata, int pstate) +static void atom_set_pstate(struct cpudata *cpudata, int pstate) { u64 val; int32_t vid_fp; @@ -577,10 +577,10 @@ static void byt_set_pstate(struct cpudata *cpudata, int pstate) wrmsrl_on_cpu(cpudata->cpu, MSR_IA32_PERF_CTL, val); } -#define BYT_BCLK_FREQS 5 -static int byt_freq_table[BYT_BCLK_FREQS] = { 833, 1000, 1333, 1167, 800}; +#define ATOM_BCLK_FREQS 5 +static int atom_freq_table[ATOM_BCLK_FREQS] = { 833, 1000, 1333, 1167, 800}; -static int byt_get_scaling(void) +static int atom_get_scaling(void) { u64 value; int i; @@ -588,16 +588,16 @@ static int byt_get_scaling(void) rdmsrl(MSR_FSB_FREQ, value); i = value & 0x3; - BUG_ON(i > BYT_BCLK_FREQS); + WARN_ON(i > ATOM_BCLK_FREQS); - return byt_freq_table[i] * 100; + return atom_freq_table[i] * 100; } -static void byt_get_vid(struct cpudata *cpudata) +static void atom_get_vid(struct cpudata *cpudata) { u64 value; - rdmsrl(BYT_VIDS, value); + rdmsrl(ATOM_VIDS, value); cpudata->vid.min = int_tofp((value >> 8) & 0x7f); cpudata->vid.max = int_tofp((value >> 16) & 0x7f); cpudata->vid.ratio = div_fp( @@ -605,7 +605,7 @@ static void byt_get_vid(struct cpudata *cpudata) int_tofp(cpudata->pstate.max_pstate - cpudata->pstate.min_pstate)); - rdmsrl(BYT_TURBO_VIDS, value); + rdmsrl(ATOM_TURBO_VIDS, value); cpudata->vid.turbo = value & 0x7f; } @@ -726,7 +726,7 @@ static struct cpu_defaults core_params = { }, }; -static struct cpu_defaults byt_params = { +static struct cpu_defaults atom_params = { .pid_policy = { .sample_rate_ms = 10, .deadband = 0, @@ -736,13 +736,13 @@ static struct cpu_defaults byt_params = { .i_gain_pct = 4, }, .funcs = { - .get_max = byt_get_max_pstate, - .get_max_physical = byt_get_max_pstate, - .get_min = byt_get_min_pstate, - .get_turbo = byt_get_turbo_pstate, - .set = byt_set_pstate, - .get_scaling = byt_get_scaling, - .get_vid = byt_get_vid, + .get_max = atom_get_max_pstate, + .get_max_physical = atom_get_max_pstate, + .get_min = atom_get_min_pstate, + .get_turbo = atom_get_turbo_pstate, + .set = atom_set_pstate, + .get_scaling = atom_get_scaling, + .get_vid = atom_get_vid, }, }; @@ -983,7 +983,7 @@ static void intel_pstate_timer_func(unsigned long __data) static const struct x86_cpu_id intel_pstate_cpu_ids[] = { ICPU(0x2a, core_params), ICPU(0x2d, core_params), - ICPU(0x37, byt_params), + ICPU(0x37, atom_params), ICPU(0x3a, core_params), ICPU(0x3c, core_params), ICPU(0x3d, core_params), @@ -992,7 +992,7 @@ static const struct x86_cpu_id intel_pstate_cpu_ids[] = { ICPU(0x45, core_params), ICPU(0x46, core_params), ICPU(0x47, core_params), - ICPU(0x4c, byt_params), + ICPU(0x4c, atom_params), ICPU(0x4e, core_params), ICPU(0x4f, core_params), ICPU(0x5e, core_params), From 1421df63c3cf956c69c26ac9660c6e223eeed980 Mon Sep 17 00:00:00 2001 From: Philippe Longepe Date: Mon, 9 Nov 2015 17:40:47 -0800 Subject: [PATCH 179/291] cpufreq: intel_pstate: Add separate support for Airmont cores There are two flavors of Atom cores to be supported by intel_pstate, Silvermont and Airmont, so make the driver distinguish between them by adding separate frequency tables. Separate the CPU defaults params for each of them and match the CPU IDs against them as appropriate. Signed-off-by: Philippe Longepe Signed-off-by: Stephane Gasparini Acked-by: Srinivas Pandruvada [ rjw: Subject and changelog ] Signed-off-by: Rafael J. Wysocki --- drivers/cpufreq/intel_pstate.c | 57 +++++++++++++++++++++++++++------- 1 file changed, 46 insertions(+), 11 deletions(-) diff --git a/drivers/cpufreq/intel_pstate.c b/drivers/cpufreq/intel_pstate.c index 5f124e98f439..001a532e342e 100644 --- a/drivers/cpufreq/intel_pstate.c +++ b/drivers/cpufreq/intel_pstate.c @@ -577,20 +577,35 @@ static void atom_set_pstate(struct cpudata *cpudata, int pstate) wrmsrl_on_cpu(cpudata->cpu, MSR_IA32_PERF_CTL, val); } -#define ATOM_BCLK_FREQS 5 -static int atom_freq_table[ATOM_BCLK_FREQS] = { 833, 1000, 1333, 1167, 800}; - -static int atom_get_scaling(void) +static int silvermont_get_scaling(void) { u64 value; int i; + /* Defined in Table 35-6 from SDM (Sept 2015) */ + static int silvermont_freq_table[] = { + 83300, 100000, 133300, 116700, 80000}; rdmsrl(MSR_FSB_FREQ, value); - i = value & 0x3; + i = value & 0x7; + WARN_ON(i > 4); - WARN_ON(i > ATOM_BCLK_FREQS); + return silvermont_freq_table[i]; +} - return atom_freq_table[i] * 100; +static int airmont_get_scaling(void) +{ + u64 value; + int i; + /* Defined in Table 35-10 from SDM (Sept 2015) */ + static int airmont_freq_table[] = { + 83300, 100000, 133300, 116700, 80000, + 93300, 90000, 88900, 87500}; + + rdmsrl(MSR_FSB_FREQ, value); + i = value & 0xF; + WARN_ON(i > 8); + + return airmont_freq_table[i]; } static void atom_get_vid(struct cpudata *cpudata) @@ -726,7 +741,7 @@ static struct cpu_defaults core_params = { }, }; -static struct cpu_defaults atom_params = { +static struct cpu_defaults silvermont_params = { .pid_policy = { .sample_rate_ms = 10, .deadband = 0, @@ -741,7 +756,27 @@ static struct cpu_defaults atom_params = { .get_min = atom_get_min_pstate, .get_turbo = atom_get_turbo_pstate, .set = atom_set_pstate, - .get_scaling = atom_get_scaling, + .get_scaling = silvermont_get_scaling, + .get_vid = atom_get_vid, + }, +}; + +static struct cpu_defaults airmont_params = { + .pid_policy = { + .sample_rate_ms = 10, + .deadband = 0, + .setpoint = 60, + .p_gain_pct = 14, + .d_gain_pct = 0, + .i_gain_pct = 4, + }, + .funcs = { + .get_max = atom_get_max_pstate, + .get_max_physical = atom_get_max_pstate, + .get_min = atom_get_min_pstate, + .get_turbo = atom_get_turbo_pstate, + .set = atom_set_pstate, + .get_scaling = airmont_get_scaling, .get_vid = atom_get_vid, }, }; @@ -983,7 +1018,7 @@ static void intel_pstate_timer_func(unsigned long __data) static const struct x86_cpu_id intel_pstate_cpu_ids[] = { ICPU(0x2a, core_params), ICPU(0x2d, core_params), - ICPU(0x37, atom_params), + ICPU(0x37, silvermont_params), ICPU(0x3a, core_params), ICPU(0x3c, core_params), ICPU(0x3d, core_params), @@ -992,7 +1027,7 @@ static const struct x86_cpu_id intel_pstate_cpu_ids[] = { ICPU(0x45, core_params), ICPU(0x46, core_params), ICPU(0x47, core_params), - ICPU(0x4c, atom_params), + ICPU(0x4c, airmont_params), ICPU(0x4e, core_params), ICPU(0x4f, core_params), ICPU(0x5e, core_params), From 2d4ee3036774e394d416cded9d5cf7661ffb4e4f Mon Sep 17 00:00:00 2001 From: Arnd Bergmann Date: Mon, 16 Nov 2015 22:27:55 +0100 Subject: [PATCH 180/291] cpufreq: mediatek: fix build error The recently added mt8173 cpufreq driver relies on the cpu topology that is always present on ARM64 but optional on ARM32: drivers/cpufreq/mt8173-cpufreq.c: In function 'mtk_cpufreq_init': drivers/cpufreq/mt8173-cpufreq.c:441:30: error: 'cpu_topology' undeclared (first use in this function) cpumask_copy(policy->cpus, &cpu_topology[policy->cpu].core_sibling); This refines the Kconfig dependencies so that we can still build on ARM32, but only if COMPILE_TEST is selected and the CPU topology code is present. Signed-off-by: Arnd Bergmann Signed-off-by: Rafael J. Wysocki --- drivers/cpufreq/Kconfig.arm | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/cpufreq/Kconfig.arm b/drivers/cpufreq/Kconfig.arm index cd0391e46c6d..c97d9ea97b48 100644 --- a/drivers/cpufreq/Kconfig.arm +++ b/drivers/cpufreq/Kconfig.arm @@ -84,6 +84,7 @@ config ARM_KIRKWOOD_CPUFREQ config ARM_MT8173_CPUFREQ bool "Mediatek MT8173 CPUFreq support" depends on ARCH_MEDIATEK && REGULATOR + depends on ARM64 || (ARM_CPU_TOPOLOGY && COMPILE_TEST) depends on !CPU_THERMAL || THERMAL=y select PM_OPP help From 3bf7f56e70c9f0edb4f91cbb73a393c3b1d24801 Mon Sep 17 00:00:00 2001 From: "Rafael J. Wysocki" Date: Thu, 19 Nov 2015 00:42:23 +0100 Subject: [PATCH 181/291] Revert "Documentation: kernel_parameters for Intel P state driver" Revert commit 053f56def57b (Documentation: kernel_parameters for Intel P state driver) as the code documented by it has been reverted already. Signed-off-by: Rafael J. Wysocki --- Documentation/kernel-parameters.txt | 3 --- 1 file changed, 3 deletions(-) diff --git a/Documentation/kernel-parameters.txt b/Documentation/kernel-parameters.txt index 9b75e2a760de..22a4b687ea5b 100644 --- a/Documentation/kernel-parameters.txt +++ b/Documentation/kernel-parameters.txt @@ -1546,9 +1546,6 @@ bytes respectively. Such letter suffixes can also be entirely omitted. hwp_only Only load intel_pstate on systems which support hardware P state control (HWP) if available. - no_acpi - Don't use ACPI processor performance control objects - _PSS and _PPC specified limits. intremap= [X86-64, Intel-IOMMU] on enable Interrupt Remapping (default) From 2ce47b44b25d8fb0114ff117813742adbefec8ff Mon Sep 17 00:00:00 2001 From: Bamvor Jian Zhang Date: Fri, 13 Nov 2015 11:17:51 +0800 Subject: [PATCH 182/291] selftests/seccomp: Get page size from sysconf The commit fd88d16c58c2 ("selftests/seccomp: Be more precise with syscall arguments.") use PAGE_SIZE directly which lead to build failure on arm64. Replace it with generic interface(sysconf(_SC_PAGESIZE)) to fix this failure. Build and test successful on x86_64 and arm64. Signed-off-by: Bamvor Jian Zhang Acked-by: Kees Cook Tested-by: Michael Ellerman Signed-off-by: Shuah Khan --- tools/testing/selftests/seccomp/seccomp_bpf.c | 11 +++++++---- 1 file changed, 7 insertions(+), 4 deletions(-) diff --git a/tools/testing/selftests/seccomp/seccomp_bpf.c b/tools/testing/selftests/seccomp/seccomp_bpf.c index e38cc54942db..882fe83a3554 100644 --- a/tools/testing/selftests/seccomp/seccomp_bpf.c +++ b/tools/testing/selftests/seccomp/seccomp_bpf.c @@ -492,6 +492,9 @@ TEST_SIGNAL(KILL_one_arg_six, SIGSYS) pid_t parent = getppid(); int fd; void *map1, *map2; + int page_size = sysconf(_SC_PAGESIZE); + + ASSERT_LT(0, page_size); ret = prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0); ASSERT_EQ(0, ret); @@ -504,16 +507,16 @@ TEST_SIGNAL(KILL_one_arg_six, SIGSYS) EXPECT_EQ(parent, syscall(__NR_getppid)); map1 = (void *)syscall(sysno, - NULL, PAGE_SIZE, PROT_READ, MAP_PRIVATE, fd, PAGE_SIZE); + NULL, page_size, PROT_READ, MAP_PRIVATE, fd, page_size); EXPECT_NE(MAP_FAILED, map1); /* mmap2() should never return. */ map2 = (void *)syscall(sysno, - NULL, PAGE_SIZE, PROT_READ, MAP_PRIVATE, fd, 0x0C0FFEE); + NULL, page_size, PROT_READ, MAP_PRIVATE, fd, 0x0C0FFEE); EXPECT_EQ(MAP_FAILED, map2); /* The test failed, so clean up the resources. */ - munmap(map1, PAGE_SIZE); - munmap(map2, PAGE_SIZE); + munmap(map1, page_size); + munmap(map2, page_size); close(fd); } From 0df9d41ab5d43dc5b20abc8b22a6b6d098b03994 Mon Sep 17 00:00:00 2001 From: Yigal Korman Date: Mon, 16 Nov 2015 14:09:15 +0200 Subject: [PATCH 183/291] mm, dax: fix DAX deadlocks (COW fault) DAX handling of COW faults has wrong locking sequence: dax_fault does i_mmap_lock_read do_cow_fault does i_mmap_unlock_write Ross's commit[1] missed a fix[2] that Kirill added to Matthew's commit[3]. Original COW locking logic was introduced by Matthew here[4]. This should be applied to v4.3 as well. [1] 0f90cc6609c7 mm, dax: fix DAX deadlocks [2] 52a2b53ffde6 mm, dax: use i_mmap_unlock_write() in do_cow_fault() [3] 843172978bb9 dax: fix race between simultaneous faults [4] 2e4cdab0584f mm: allow page fault handlers to perform the COW Cc: Cc: Boaz Harrosh Cc: Alexander Viro Cc: Dave Chinner Cc: Jan Kara Cc: "Kirill A. Shutemov" Cc: Matthew Wilcox Acked-by: Ross Zwisler Signed-off-by: Yigal Korman Signed-off-by: Dan Williams --- mm/memory.c | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/mm/memory.c b/mm/memory.c index deb679c31f2a..c387430f06c3 100644 --- a/mm/memory.c +++ b/mm/memory.c @@ -3015,9 +3015,9 @@ static int do_cow_fault(struct mm_struct *mm, struct vm_area_struct *vma, } else { /* * The fault handler has no page to lock, so it holds - * i_mmap_lock for write to protect against truncate. + * i_mmap_lock for read to protect against truncate. */ - i_mmap_unlock_write(vma->vm_file->f_mapping); + i_mmap_unlock_read(vma->vm_file->f_mapping); } goto uncharge_out; } @@ -3031,9 +3031,9 @@ static int do_cow_fault(struct mm_struct *mm, struct vm_area_struct *vma, } else { /* * The fault handler has no page to lock, so it holds - * i_mmap_lock for write to protect against truncate. + * i_mmap_lock for read to protect against truncate. */ - i_mmap_unlock_write(vma->vm_file->f_mapping); + i_mmap_unlock_read(vma->vm_file->f_mapping); } return ret; uncharge_out: From f6619ef7508261be2ba3ded313ccc46ce670d0d3 Mon Sep 17 00:00:00 2001 From: "Wang, Rui Y" Date: Wed, 18 Nov 2015 23:00:53 +0800 Subject: [PATCH 184/291] drm/mgag200: fix kernel hang in cursor code. The machine hang completely with the following message on the console: [ 487.777538] BUG: unable to handle kernel NULL pointer dereference at 0000000000000060 [ 487.777554] IP: [] _raw_spin_lock+0xe/0x30 [ 487.777557] PGD 42e9f7067 PUD 42f2fa067 PMD 0 [ 487.777560] Oops: 0002 [#1] SMP ... [ 487.777618] CPU: 21 PID: 3190 Comm: Xorg Tainted: G E 4.4.0-rc1-3-default+ #6 [ 487.777620] Hardware name: Intel Corporation BRICKLAND/BRICKLAND, BIOS BRHSXSD1.86B.0059.R00.1501081238 01/08/2015 [ 487.777621] task: ffff880853ae4680 ti: ffff8808696d4000 task.ti: ffff8808696d4000 [ 487.777625] RIP: 0010:[] [] _raw_spin_lock+0xe/0x30 [ 487.777627] RSP: 0018:ffff8808696d79c0 EFLAGS: 00010246 [ 487.777628] RAX: 0000000000000000 RBX: 0000000000000000 RCX: 0000000000000000 [ 487.777629] RDX: 0000000000000001 RSI: 0000000000000000 RDI: 0000000000000060 [ 487.777630] RBP: ffff8808696d79e0 R08: 0000000000000000 R09: ffff88086924a780 [ 487.777631] R10: 000000000001bb40 R11: 0000000000003246 R12: 0000000000000000 [ 487.777632] R13: ffff880463a27360 R14: ffff88046ca50218 R15: 0000000000000080 [ 487.777634] FS: 00007f3f81c5a8c0(0000) GS:ffff88086f060000(0000) knlGS:0000000000000000 [ 487.777635] CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 [ 487.777636] CR2: 0000000000000060 CR3: 000000042e678000 CR4: 00000000001406e0 [ 487.777638] DR0: 0000000000000000 DR1: 0000000000000000 DR2: 0000000000000000 [ 487.777639] DR3: 0000000000000000 DR6: 00000000fffe0ff0 DR7: 0000000000000400 [ 487.777639] Stack: [ 487.777642] ffffffffa00eb5fa ffff8808696d7b60 ffff88086b87d800 0000000000000000 [ 487.777644] ffff8808696d7ac8 ffffffffa01694b6 ffff8808696d7ae8 ffffffff8109c8d5 [ 487.777647] ffff880469158740 ffff880463a27000 ffff88086b87d800 ffff88086b87d800 [ 487.777647] Call Trace: [ 487.777674] [] ? drm_gem_object_lookup+0x1a/0xa0 [drm] [ 487.777681] [] mga_crtc_cursor_set+0xc6/0xb60 [mgag200] [ 487.777691] [] ? find_busiest_group+0x35/0x4a0 [ 487.777696] [] ? __might_sleep+0x44/0x80 [ 487.777699] [] ? __ww_mutex_lock+0x22/0x9c [ 487.777722] [] ? drm_modeset_lock+0x34/0xf0 [drm] [ 487.777733] [] restore_fbdev_mode+0xee/0x2a0 [drm_kms_helper] [ 487.777742] [] drm_fb_helper_restore_fbdev_mode_unlocked+0x2e/0x70 [drm_kms_helper] [ 487.777748] [] drm_fb_helper_set_par+0x27/0x50 [drm_kms_helper] [ 487.777752] [] fb_set_var+0x18c/0x3f0 [ 487.777777] [] ? __ext4_handle_dirty_metadata+0x8a/0x210 [ext4] [ 487.777783] [] fbcon_blank+0x1b7/0x2b0 [ 487.777790] [] do_unblank_screen+0xb3/0x1c0 [ 487.777795] [] vt_ioctl+0x118a/0x1210 [ 487.777801] [] tty_ioctl+0x3f0/0xc90 [ 487.777808] [] ? kzfree+0x28/0x30 [ 487.777813] [] ? mntput+0x1f/0x30 [ 487.777817] [] do_vfs_ioctl+0x30d/0x570 [ 487.777822] [] ? task_work_run+0x8a/0xa0 [ 487.777825] [] SyS_ioctl+0x74/0x80 [ 487.777829] [] entry_SYSCALL_64_fastpath+0x12/0x71 [ 487.777851] Code: 65 ff 0d ce 02 a8 7e 5d c3 ba 01 00 00 00 f0 0f b1 17 85 c0 75 e8 b0 01 5d c3 0f 1f 00 65 ff 05 b1 02 a8 7e 31 c0 ba 01 00 00 00 0f b1 17 85 c0 75 01 c3 55 89 c6 48 89 e5 e8 4e f5 b1 ff 5d [ 487.777854] RIP [] _raw_spin_lock+0xe/0x30 [ 487.777855] RSP [ 487.777856] CR2: 0000000000000060 [ 487.777860] ---[ end trace 672a2cd555e0ebd3 ]--- The cursor code may be entered with file_priv == NULL && handle == NULL. The problem was introduced by: "bf89209 drm/mga200g: Hold a proper reference for cursor_set" which calls drm_gem_object_lookup(dev, file_priv...). Previously this wasn't a problem because we checked the handle. Move the check early in the function can fix the problem. Signed-off-by: Rui Wang Reviewed-by: Daniel Vetter Signed-off-by: Dave Airlie --- drivers/gpu/drm/mgag200/mgag200_cursor.c | 11 +++++------ 1 file changed, 5 insertions(+), 6 deletions(-) diff --git a/drivers/gpu/drm/mgag200/mgag200_cursor.c b/drivers/gpu/drm/mgag200/mgag200_cursor.c index 4f2068fe5d88..a7bf6a90eae5 100644 --- a/drivers/gpu/drm/mgag200/mgag200_cursor.c +++ b/drivers/gpu/drm/mgag200/mgag200_cursor.c @@ -70,6 +70,11 @@ int mga_crtc_cursor_set(struct drm_crtc *crtc, BUG_ON(pixels_2 != pixels_current && pixels_2 != pixels_prev); BUG_ON(pixels_current == pixels_prev); + if (!handle || !file_priv) { + mga_hide_cursor(mdev); + return 0; + } + obj = drm_gem_object_lookup(dev, file_priv, handle); if (!obj) return -ENOENT; @@ -88,12 +93,6 @@ int mga_crtc_cursor_set(struct drm_crtc *crtc, goto out_unreserve1; } - if (!handle) { - mga_hide_cursor(mdev); - ret = 0; - goto out1; - } - /* Move cursor buffers into VRAM if they aren't already */ if (!pixels_1->pin_count) { ret = mgag200_bo_pin(pixels_1, TTM_PL_FLAG_VRAM, From 7383123647566a813692bb37a1389c767ed89158 Mon Sep 17 00:00:00 2001 From: Jani Nikula Date: Thu, 19 Nov 2015 10:26:30 +0200 Subject: [PATCH 185/291] Revert "drm/i915: skip modeset if compatible for everyone." This reverts commit 6764e9f8724f1231b4deac53b9a82286ac0830e7 Author: Maarten Lankhorst Date: Thu Aug 27 15:44:06 2015 +0200 drm/i915: skip modeset if compatible for everyone. Bring back the i915.fastboot module parameter, disabled by default, due to backlight regression on Chromebook Pixel 2015. Apparently the firmware of the Chromebook in question enables the panel but disables backlight to avoid a brief garbage scanout upon loading the kernel/module. With fastboot, we leave the backlight untouched, in this case disabled. The user would have to do a modeset (i.e. not just crank up the brightness) to enable the backlight. There is no clean fix readily available, so get back to the drawing board by reverting. [N.B. The reference below is for when the thread was included on public lists, and some of the context had already been dropped by then.] Reported-and-tested-by: Olof Johansson Acked-by: Maarten Lankhorst Acked-by: Daniel Vetter References: http://marc.info/?i=CAKMK7uES7xk05ki92oeX6gmvZWAh9f2vL7yz=6T+fGK9J3X7cQ@mail.gmail.com Fixes: 6764e9f8724f ("drm/i915: skip modeset if compatible for everyone.") Signed-off-by: Jani Nikula Link: http://patchwork.freedesktop.org/patch/msgid/1447921590-3785-1-git-send-email-jani.nikula@intel.com --- drivers/gpu/drm/i915/i915_drv.h | 1 + drivers/gpu/drm/i915/i915_params.c | 5 +++++ drivers/gpu/drm/i915/intel_display.c | 3 ++- 3 files changed, 8 insertions(+), 1 deletion(-) diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h index 84b5db257f1b..95bb27de774f 100644 --- a/drivers/gpu/drm/i915/i915_drv.h +++ b/drivers/gpu/drm/i915/i915_drv.h @@ -2651,6 +2651,7 @@ struct i915_params { int enable_cmd_parser; /* leave bools at the end to not create holes */ bool enable_hangcheck; + bool fastboot; bool prefault_disable; bool load_detect_test; bool reset; diff --git a/drivers/gpu/drm/i915/i915_params.c b/drivers/gpu/drm/i915/i915_params.c index 96bb23865eac..4be13a5eb932 100644 --- a/drivers/gpu/drm/i915/i915_params.c +++ b/drivers/gpu/drm/i915/i915_params.c @@ -40,6 +40,7 @@ struct i915_params i915 __read_mostly = { .preliminary_hw_support = IS_ENABLED(CONFIG_DRM_I915_PRELIMINARY_HW_SUPPORT), .disable_power_well = -1, .enable_ips = 1, + .fastboot = 0, .prefault_disable = 0, .load_detect_test = 0, .reset = true, @@ -133,6 +134,10 @@ MODULE_PARM_DESC(disable_power_well, module_param_named_unsafe(enable_ips, i915.enable_ips, int, 0600); MODULE_PARM_DESC(enable_ips, "Enable IPS (default: true)"); +module_param_named(fastboot, i915.fastboot, bool, 0600); +MODULE_PARM_DESC(fastboot, + "Try to skip unnecessary mode sets at boot time (default: false)"); + module_param_named_unsafe(prefault_disable, i915.prefault_disable, bool, 0600); MODULE_PARM_DESC(prefault_disable, "Disable page prefaulting for pread/pwrite/reloc (default:false). " diff --git a/drivers/gpu/drm/i915/intel_display.c b/drivers/gpu/drm/i915/intel_display.c index 002e2a41ce1b..71860f8680f9 100644 --- a/drivers/gpu/drm/i915/intel_display.c +++ b/drivers/gpu/drm/i915/intel_display.c @@ -13068,7 +13068,8 @@ static int intel_atomic_check(struct drm_device *dev, if (ret) return ret; - if (intel_pipe_config_compare(state->dev, + if (i915.fastboot && + intel_pipe_config_compare(state->dev, to_intel_crtc_state(crtc->state), pipe_config, true)) { crtc_state->mode_changed = false; From 112677d683d31ebd6a8e8b02e0620ae512354b2d Mon Sep 17 00:00:00 2001 From: Namhyung Kim Date: Tue, 17 Nov 2015 09:43:24 +0900 Subject: [PATCH 186/291] x86/ftrace: Add comment on static function tracing There was a confusion between update_ftrace_function() and static function tracing trampoline regarding 3rd parameter (ftrace_ops). Add a comment for clarification. Suggested-by: Steven Rostedt Signed-off-by: Namhyung Kim Cc: H. Peter Anvin Cc: Steven Rostedt Link: http://lkml.kernel.org/r/1447721004-2551-1-git-send-email-namhyung@kernel.org Signed-off-by: Thomas Gleixner --- arch/x86/kernel/mcount_64.S | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/arch/x86/kernel/mcount_64.S b/arch/x86/kernel/mcount_64.S index 94ea120fa21f..87e1762e2bca 100644 --- a/arch/x86/kernel/mcount_64.S +++ b/arch/x86/kernel/mcount_64.S @@ -278,6 +278,12 @@ trace: /* save_mcount_regs fills in first two parameters */ save_mcount_regs + /* + * When DYNAMIC_FTRACE is not defined, ARCH_SUPPORTS_FTRACE_OPS is not + * set (see include/asm/ftrace.h and include/linux/ftrace.h). Only the + * ip and parent ip are used and the list function is called when + * function tracing is enabled. + */ call *ftrace_trace_function restore_mcount_regs From 581b7f158fe0383b492acd1ce3fb4e99d4e57808 Mon Sep 17 00:00:00 2001 From: Andrew Cooper Date: Wed, 3 Jun 2015 10:31:14 +0100 Subject: [PATCH 187/291] x86/cpu: Fix SMAP check in PVOPS environments There appears to be no formal statement of what pv_irq_ops.save_fl() is supposed to return precisely. Native returns the full flags, while lguest and Xen only return the Interrupt Flag, and both have comments by the implementations stating that only the Interrupt Flag is looked at. This may have been true when initially implemented, but no longer is. To make matters worse, the Xen PVOP leaves the upper bits undefined, making the BUG_ON() undefined behaviour. Experimentally, this now trips for 32bit PV guests on Broadwell hardware. The BUG_ON() is consistent for an individual build, but not consistent for all builds. It has also been a sitting timebomb since SMAP support was introduced. Use native_save_fl() instead, which will obtain an accurate view of the AC flag. Signed-off-by: Andrew Cooper Reviewed-by: David Vrabel Tested-by: Rusty Russell Cc: Rusty Russell Cc: Konrad Rzeszutek Wilk Cc: Boris Ostrovsky Cc: Cc: Xen-devel CC: stable@vger.kernel.org Link: http://lkml.kernel.org/r/1433323874-6927-1-git-send-email-andrew.cooper3@citrix.com Signed-off-by: Thomas Gleixner --- arch/x86/kernel/cpu/common.c | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/arch/x86/kernel/cpu/common.c b/arch/x86/kernel/cpu/common.c index 4ddd780aeac9..c2b7522cbf35 100644 --- a/arch/x86/kernel/cpu/common.c +++ b/arch/x86/kernel/cpu/common.c @@ -273,10 +273,9 @@ __setup("nosmap", setup_disable_smap); static __always_inline void setup_smap(struct cpuinfo_x86 *c) { - unsigned long eflags; + unsigned long eflags = native_save_fl(); /* This should have been cleared long ago */ - raw_local_save_flags(eflags); BUG_ON(eflags & X86_EFLAGS_AC); if (cpu_has(c, X86_FEATURE_SMAP)) { From 5967c17b118a2bd1dd1d554cc4eee16233e52bec Mon Sep 17 00:00:00 2001 From: David Hildenbrand Date: Fri, 6 Nov 2015 12:08:48 +0100 Subject: [PATCH 188/291] KVM: s390: enable SIMD only when no VCPUs were created We should never allow to enable/disable any facilities for the guest when other VCPUs were already created. kvm_arch_vcpu_(load|put) relies on SIMD not changing during runtime. If somebody would create and run VCPUs and then decides to enable SIMD, undefined behaviour could be possible (e.g. vector save area not being set up). Acked-by: Christian Borntraeger Acked-by: Cornelia Huck Signed-off-by: David Hildenbrand Signed-off-by: Christian Borntraeger Cc: stable@vger.kernel.org # 4.1+ --- arch/s390/kvm/kvm-s390.c | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/arch/s390/kvm/kvm-s390.c b/arch/s390/kvm/kvm-s390.c index 8fe2f1c722dc..846589281b04 100644 --- a/arch/s390/kvm/kvm-s390.c +++ b/arch/s390/kvm/kvm-s390.c @@ -342,12 +342,16 @@ static int kvm_vm_ioctl_enable_cap(struct kvm *kvm, struct kvm_enable_cap *cap) r = 0; break; case KVM_CAP_S390_VECTOR_REGISTERS: - if (MACHINE_HAS_VX) { + mutex_lock(&kvm->lock); + if (atomic_read(&kvm->online_vcpus)) { + r = -EBUSY; + } else if (MACHINE_HAS_VX) { set_kvm_facility(kvm->arch.model.fac->mask, 129); set_kvm_facility(kvm->arch.model.fac->list, 129); r = 0; } else r = -EINVAL; + mutex_unlock(&kvm->lock); VM_EVENT(kvm, 3, "ENABLE: CAP_S390_VECTOR_REGISTERS %s", r ? "(not available)" : "(success)"); break; From 03c02807e25ef0f44767f28e939efc2c5deb0f3d Mon Sep 17 00:00:00 2001 From: Heiko Carstens Date: Fri, 13 Nov 2015 13:31:58 +0100 Subject: [PATCH 189/291] KVM: s390: fix pfmf intercept handler The pfmf intercept handler should check if the EDAT 1 facility is installed in the guest, not if it is installed in the host. Signed-off-by: Heiko Carstens Signed-off-by: Christian Borntraeger --- arch/s390/kvm/priv.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/s390/kvm/priv.c b/arch/s390/kvm/priv.c index 77191b85ea7a..d76b51cb4b62 100644 --- a/arch/s390/kvm/priv.c +++ b/arch/s390/kvm/priv.c @@ -660,7 +660,7 @@ static int handle_pfmf(struct kvm_vcpu *vcpu) kvm_s390_get_regs_rre(vcpu, ®1, ®2); - if (!MACHINE_HAS_PFMF) + if (!test_kvm_facility(vcpu->kvm, 8)) return kvm_s390_inject_program_int(vcpu, PGM_OPERATION); if (vcpu->arch.sie_block->gpsw.mask & PSW_MASK_PSTATE) From db27a7a37aa0b1f8b373f8b0fb72a2ccaafb85b7 Mon Sep 17 00:00:00 2001 From: David Hildenbrand Date: Thu, 5 Nov 2015 09:03:50 +0100 Subject: [PATCH 190/291] KVM: Provide function for VCPU lookup by id Let's provide a function to lookup a VCPU by id. Reviewed-by: Christian Borntraeger Reviewed-by: Dominik Dingel Signed-off-by: David Hildenbrand Signed-off-by: Christian Borntraeger [split patch from refactoring patch] --- include/linux/kvm_host.h | 11 +++++++++++ 1 file changed, 11 insertions(+) diff --git a/include/linux/kvm_host.h b/include/linux/kvm_host.h index 5706a2108f0a..c923350ca20a 100644 --- a/include/linux/kvm_host.h +++ b/include/linux/kvm_host.h @@ -460,6 +460,17 @@ static inline struct kvm_vcpu *kvm_get_vcpu(struct kvm *kvm, int i) (vcpup = kvm_get_vcpu(kvm, idx)) != NULL; \ idx++) +static inline struct kvm_vcpu *kvm_get_vcpu_by_id(struct kvm *kvm, int id) +{ + struct kvm_vcpu *vcpu; + int i; + + kvm_for_each_vcpu(i, vcpu, kvm) + if (vcpu->vcpu_id == id) + return vcpu; + return NULL; +} + #define kvm_for_each_memslot(memslot, slots) \ for (memslot = &slots->memslots[0]; \ memslot < slots->memslots + KVM_MEM_SLOTS_NUM && memslot->npages;\ From b85de33a1a3433487b6a721cfdce25ec8673e622 Mon Sep 17 00:00:00 2001 From: David Hildenbrand Date: Thu, 5 Nov 2015 09:38:15 +0100 Subject: [PATCH 191/291] KVM: s390: avoid memory overwrites on emergency signal injection Commit 383d0b050106 ("KVM: s390: handle pending local interrupts via bitmap") introduced a possible memory overwrite from user space. User space could pass an invalid emergency signal code (sending VCPU) and therefore exceed the bitmap. Let's take care of this case and check that the id is in the valid range. Reviewed-by: Dominik Dingel Cc: stable@vger.kernel.org # v3.19+ db27a7a KVM: Provide function for VCPU lookup by id Signed-off-by: David Hildenbrand Signed-off-by: Christian Borntraeger --- arch/s390/kvm/interrupt.c | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/arch/s390/kvm/interrupt.c b/arch/s390/kvm/interrupt.c index 373e32346d68..7242c2da4009 100644 --- a/arch/s390/kvm/interrupt.c +++ b/arch/s390/kvm/interrupt.c @@ -1110,6 +1110,10 @@ static int __inject_sigp_emergency(struct kvm_vcpu *vcpu, trace_kvm_s390_inject_vcpu(vcpu->vcpu_id, KVM_S390_INT_EMERGENCY, irq->u.emerg.code, 0); + /* sending vcpu invalid */ + if (kvm_get_vcpu_by_id(vcpu->kvm, irq->u.emerg.code) == NULL) + return -EINVAL; + set_bit(irq->u.emerg.code, li->sigp_emerg_pending); set_bit(IRQ_PEND_EXT_EMERGENCY, &li->pending_irqs); atomic_or(CPUSTAT_EXT_INT, li->cpuflags); From 152e9f65d66f0a3891efc3869440becc0e7ff53f Mon Sep 17 00:00:00 2001 From: David Hildenbrand Date: Thu, 5 Nov 2015 09:06:06 +0100 Subject: [PATCH 192/291] KVM: s390: fix wrong lookup of VCPUs by array index For now, VCPUs were always created sequentially with incrementing VCPU ids. Therefore, the index in the VCPUs array matched the id. As sequential creation might change with cpu hotplug, let's use the correct lookup function to find a VCPU by id, not array index. Let's also use kvm_lookup_vcpu() for validation of the sending VCPU on external call injection. Reviewed-by: Christian Borntraeger Signed-off-by: David Hildenbrand Signed-off-by: Christian Borntraeger Cc: stable@vger.kernel.org # db27a7a KVM: Provide function for VCPU lookup by id --- arch/s390/kvm/interrupt.c | 3 +-- arch/s390/kvm/sigp.c | 8 ++------ 2 files changed, 3 insertions(+), 8 deletions(-) diff --git a/arch/s390/kvm/interrupt.c b/arch/s390/kvm/interrupt.c index 7242c2da4009..6a75352f453c 100644 --- a/arch/s390/kvm/interrupt.c +++ b/arch/s390/kvm/interrupt.c @@ -1030,8 +1030,7 @@ static int __inject_extcall(struct kvm_vcpu *vcpu, struct kvm_s390_irq *irq) src_id, 0); /* sending vcpu invalid */ - if (src_id >= KVM_MAX_VCPUS || - kvm_get_vcpu(vcpu->kvm, src_id) == NULL) + if (kvm_get_vcpu_by_id(vcpu->kvm, src_id) == NULL) return -EINVAL; if (sclp.has_sigpif) diff --git a/arch/s390/kvm/sigp.c b/arch/s390/kvm/sigp.c index da690b69f9fe..77c22d685c7a 100644 --- a/arch/s390/kvm/sigp.c +++ b/arch/s390/kvm/sigp.c @@ -291,12 +291,8 @@ static int handle_sigp_dst(struct kvm_vcpu *vcpu, u8 order_code, u16 cpu_addr, u32 parameter, u64 *status_reg) { int rc; - struct kvm_vcpu *dst_vcpu; + struct kvm_vcpu *dst_vcpu = kvm_get_vcpu_by_id(vcpu->kvm, cpu_addr); - if (cpu_addr >= KVM_MAX_VCPUS) - return SIGP_CC_NOT_OPERATIONAL; - - dst_vcpu = kvm_get_vcpu(vcpu->kvm, cpu_addr); if (!dst_vcpu) return SIGP_CC_NOT_OPERATIONAL; @@ -478,7 +474,7 @@ int kvm_s390_handle_sigp_pei(struct kvm_vcpu *vcpu) trace_kvm_s390_handle_sigp_pei(vcpu, order_code, cpu_addr); if (order_code == SIGP_EXTERNAL_CALL) { - dest_vcpu = kvm_get_vcpu(vcpu->kvm, cpu_addr); + dest_vcpu = kvm_get_vcpu_by_id(vcpu->kvm, cpu_addr); BUG_ON(dest_vcpu == NULL); kvm_s390_vcpu_wakeup(dest_vcpu); From 5481c8fb1da2a573861095fbea078f30c664d0bd Mon Sep 17 00:00:00 2001 From: Daniel Vetter Date: Wed, 18 Nov 2015 18:46:48 +0100 Subject: [PATCH 193/291] drm/atomic-helper: Check encoder/crtc constraints MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit This was totally lost when I originally created the atomic helpers. We probably should also check possible_clones in the helpers, but since the legacy ones didn't do that this is for a separate patch. Reported-by: Ville Syrjälä Cc: Ville Syrjälä Cc: Daniel Stone Reviewed-by: Daniel Stone Reviewed-by: Ville Syrjälä Tested-by: Ville Syrjälä Signed-off-by: Daniel Vetter Signed-off-by: Jani Nikula Link: http://patchwork.freedesktop.org/patch/msgid/1447868808-10266-1-git-send-email-daniel.vetter@ffwll.ch --- drivers/gpu/drm/drm_atomic_helper.c | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/drivers/gpu/drm/drm_atomic_helper.c b/drivers/gpu/drm/drm_atomic_helper.c index cd398a245d2e..e5aec45bf985 100644 --- a/drivers/gpu/drm/drm_atomic_helper.c +++ b/drivers/gpu/drm/drm_atomic_helper.c @@ -210,6 +210,14 @@ update_connector_routing(struct drm_atomic_state *state, int conn_idx) return -EINVAL; } + if (!drm_encoder_crtc_ok(new_encoder, connector_state->crtc)) { + DRM_DEBUG_ATOMIC("[ENCODER:%d:%s] incompatible with [CRTC:%d]\n", + new_encoder->base.id, + new_encoder->name, + connector_state->crtc->base.id); + return -EINVAL; + } + if (new_encoder == connector_state->best_encoder) { DRM_DEBUG_ATOMIC("[CONNECTOR:%d:%s] keeps [ENCODER:%d:%s], now on [CRTC:%d]\n", connector->base.id, From 92e788b749862ebe9920360513a718e5dd4da7a9 Mon Sep 17 00:00:00 2001 From: Yang Shi Date: Wed, 18 Nov 2015 10:48:55 -0800 Subject: [PATCH 194/291] arm64: restore bogomips information in /proc/cpuinfo As previously reported, some userspace applications depend on bogomips showed by /proc/cpuinfo. Although there is much less legacy impact on aarch64 than arm, it does break libvirt. This patch reverts commit 326b16db9f69 ("arm64: delay: don't bother reporting bogomips in /proc/cpuinfo"), but with some tweak due to context change and without the pr_info(). Fixes: 326b16db9f69 ("arm64: delay: don't bother reporting bogomips in /proc/cpuinfo") Signed-off-by: Yang Shi Acked-by: Will Deacon Cc: # 3.12+ Signed-off-by: Catalin Marinas --- arch/arm64/kernel/cpuinfo.c | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/arch/arm64/kernel/cpuinfo.c b/arch/arm64/kernel/cpuinfo.c index 706679d0a0b4..212ae6361d8b 100644 --- a/arch/arm64/kernel/cpuinfo.c +++ b/arch/arm64/kernel/cpuinfo.c @@ -30,6 +30,7 @@ #include #include #include +#include /* * In case the boot CPU is hotpluggable, we record its initial state and @@ -112,6 +113,10 @@ static int c_show(struct seq_file *m, void *v) */ seq_printf(m, "processor\t: %d\n", i); + seq_printf(m, "BogoMIPS\t: %lu.%02lu\n", + loops_per_jiffy / (500000UL/HZ), + loops_per_jiffy / (5000UL/HZ) % 100); + /* * Dump out the common processor features in a single line. * Userspace should read the hwcaps with getauxval(AT_HWCAP) From bcbd94ff481ec1d7b5c824d90df82d0faafabd35 Mon Sep 17 00:00:00 2001 From: Mikulas Patocka Date: Thu, 19 Nov 2015 07:36:50 -0500 Subject: [PATCH 195/291] dm crypt: fix a possible hang due to race condition on exit A kernel thread executes __set_current_state(TASK_INTERRUPTIBLE), __add_wait_queue, spin_unlock_irq and then tests kthread_should_stop(). It is possible that the processor reorders memory accesses so that kthread_should_stop() is executed before __set_current_state(). If such reordering happens, there is a possible race on thread termination: CPU 0: calls kthread_should_stop() it tests KTHREAD_SHOULD_STOP bit, returns false CPU 1: calls kthread_stop(cc->write_thread) sets the KTHREAD_SHOULD_STOP bit calls wake_up_process on the kernel thread, that sets the thread state to TASK_RUNNING CPU 0: sets __set_current_state(TASK_INTERRUPTIBLE) spin_unlock_irq(&cc->write_thread_wait.lock) schedule() - and the process is stuck and never terminates, because the state is TASK_INTERRUPTIBLE and wake_up_process on CPU 1 already terminated Fix this race condition by using a new flag DM_CRYPT_EXIT_THREAD to signal that the kernel thread should exit. The flag is set and tested while holding cc->write_thread_wait.lock, so there is no possibility of racy access to the flag. Also, remove the unnecessary set_task_state(current, TASK_RUNNING) following the schedule() call. When the process was woken up, its state was already set to TASK_RUNNING. Other kernel code also doesn't set the state to TASK_RUNNING following schedule() (for example, do_wait_for_common in completion.c doesn't do it). Fixes: dc2676210c42 ("dm crypt: offload writes to thread") Signed-off-by: Mikulas Patocka Cc: stable@vger.kernel.org # v4.0+ Signed-off-by: Mike Snitzer --- drivers/md/dm-crypt.c | 22 +++++++++++++--------- 1 file changed, 13 insertions(+), 9 deletions(-) diff --git a/drivers/md/dm-crypt.c b/drivers/md/dm-crypt.c index 917d47e290ae..3147c8d09ea8 100644 --- a/drivers/md/dm-crypt.c +++ b/drivers/md/dm-crypt.c @@ -112,7 +112,8 @@ struct iv_tcw_private { * and encrypts / decrypts at the same time. */ enum flags { DM_CRYPT_SUSPENDED, DM_CRYPT_KEY_VALID, - DM_CRYPT_SAME_CPU, DM_CRYPT_NO_OFFLOAD }; + DM_CRYPT_SAME_CPU, DM_CRYPT_NO_OFFLOAD, + DM_CRYPT_EXIT_THREAD}; /* * The fields in here must be read only after initialization. @@ -1203,20 +1204,18 @@ continue_locked: if (!RB_EMPTY_ROOT(&cc->write_tree)) goto pop_from_list; + if (unlikely(test_bit(DM_CRYPT_EXIT_THREAD, &cc->flags))) { + spin_unlock_irq(&cc->write_thread_wait.lock); + break; + } + __set_current_state(TASK_INTERRUPTIBLE); __add_wait_queue(&cc->write_thread_wait, &wait); spin_unlock_irq(&cc->write_thread_wait.lock); - if (unlikely(kthread_should_stop())) { - set_task_state(current, TASK_RUNNING); - remove_wait_queue(&cc->write_thread_wait, &wait); - break; - } - schedule(); - set_task_state(current, TASK_RUNNING); spin_lock_irq(&cc->write_thread_wait.lock); __remove_wait_queue(&cc->write_thread_wait, &wait); goto continue_locked; @@ -1531,8 +1530,13 @@ static void crypt_dtr(struct dm_target *ti) if (!cc) return; - if (cc->write_thread) + if (cc->write_thread) { + spin_lock_irq(&cc->write_thread_wait.lock); + set_bit(DM_CRYPT_EXIT_THREAD, &cc->flags); + wake_up_locked(&cc->write_thread_wait); + spin_unlock_irq(&cc->write_thread_wait.lock); kthread_stop(cc->write_thread); + } if (cc->io_queue) destroy_workqueue(cc->io_queue); From 6824c5ef5e8900e61ce8ed40885cacc1c9301c14 Mon Sep 17 00:00:00 2001 From: Keith Busch Date: Wed, 18 Nov 2015 16:33:08 -0700 Subject: [PATCH 196/291] NVMe: Fix possible arithmetic overflow for max segments Reported-by: Paul Grabinar Signed-off-by: Keith Busch Signed-off-by: Jens Axboe --- drivers/nvme/host/pci.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/nvme/host/pci.c b/drivers/nvme/host/pci.c index 8187df204695..394fd1631cd0 100644 --- a/drivers/nvme/host/pci.c +++ b/drivers/nvme/host/pci.c @@ -2268,7 +2268,7 @@ static void nvme_alloc_ns(struct nvme_dev *dev, unsigned nsid) if (dev->max_hw_sectors) { blk_queue_max_hw_sectors(ns->queue, dev->max_hw_sectors); blk_queue_max_segments(ns->queue, - ((dev->max_hw_sectors << 9) / dev->page_size) + 1); + (dev->max_hw_sectors / (dev->page_size >> 9)) + 1); } if (dev->stripe_size) blk_queue_chunk_sectors(ns->queue, dev->stripe_size >> 9); From 2e6edc95382cc36423aff18a237173ad62d5ab52 Mon Sep 17 00:00:00 2001 From: Dan Williams Date: Thu, 19 Nov 2015 13:29:28 -0800 Subject: [PATCH 197/291] block: protect rw_page against device teardown Fix use after free crashes like the following: general protection fault: 0000 [#1] SMP Call Trace: [] ? pmem_do_bvec.isra.12+0xa6/0xf0 [nd_pmem] [] pmem_rw_page+0x42/0x80 [nd_pmem] [] bdev_read_page+0x50/0x60 [] do_mpage_readpage+0x510/0x770 [] ? I_BDEV+0x20/0x20 [] ? lru_cache_add+0x1c/0x50 [] mpage_readpages+0x107/0x170 [] ? I_BDEV+0x20/0x20 [] ? I_BDEV+0x20/0x20 [] blkdev_readpages+0x1d/0x20 [] __do_page_cache_readahead+0x28f/0x310 [] ? __do_page_cache_readahead+0x169/0x310 [] ? pagecache_get_page+0x2d/0x1d0 [] filemap_fault+0x396/0x530 [] __do_fault+0x4e/0xf0 [] handle_mm_fault+0x11bd/0x1b50 Cc: Cc: Jens Axboe Cc: Alexander Viro Reported-by: kbuild test robot Acked-by: Matthew Wilcox [willy: symmetry fixups] Signed-off-by: Dan Williams --- block/blk.h | 2 -- fs/block_dev.c | 18 ++++++++++++++++-- include/linux/blkdev.h | 2 ++ 3 files changed, 18 insertions(+), 4 deletions(-) diff --git a/block/blk.h b/block/blk.h index da722eb786df..c43926d3d74d 100644 --- a/block/blk.h +++ b/block/blk.h @@ -72,8 +72,6 @@ void blk_dequeue_request(struct request *rq); void __blk_queue_free_tags(struct request_queue *q); bool __blk_end_bidi_request(struct request *rq, int error, unsigned int nr_bytes, unsigned int bidi_bytes); -int blk_queue_enter(struct request_queue *q, gfp_t gfp); -void blk_queue_exit(struct request_queue *q); void blk_freeze_queue(struct request_queue *q); static inline void blk_queue_enter_live(struct request_queue *q) diff --git a/fs/block_dev.c b/fs/block_dev.c index bb0dfb1c7af1..c25639e907bd 100644 --- a/fs/block_dev.c +++ b/fs/block_dev.c @@ -390,9 +390,17 @@ int bdev_read_page(struct block_device *bdev, sector_t sector, struct page *page) { const struct block_device_operations *ops = bdev->bd_disk->fops; + int result = -EOPNOTSUPP; + if (!ops->rw_page || bdev_get_integrity(bdev)) - return -EOPNOTSUPP; - return ops->rw_page(bdev, sector + get_start_sect(bdev), page, READ); + return result; + + result = blk_queue_enter(bdev->bd_queue, GFP_KERNEL); + if (result) + return result; + result = ops->rw_page(bdev, sector + get_start_sect(bdev), page, READ); + blk_queue_exit(bdev->bd_queue); + return result; } EXPORT_SYMBOL_GPL(bdev_read_page); @@ -421,14 +429,20 @@ int bdev_write_page(struct block_device *bdev, sector_t sector, int result; int rw = (wbc->sync_mode == WB_SYNC_ALL) ? WRITE_SYNC : WRITE; const struct block_device_operations *ops = bdev->bd_disk->fops; + if (!ops->rw_page || bdev_get_integrity(bdev)) return -EOPNOTSUPP; + result = blk_queue_enter(bdev->bd_queue, GFP_KERNEL); + if (result) + return result; + set_page_writeback(page); result = ops->rw_page(bdev, sector + get_start_sect(bdev), page, rw); if (result) end_page_writeback(page); else unlock_page(page); + blk_queue_exit(bdev->bd_queue); return result; } EXPORT_SYMBOL_GPL(bdev_write_page); diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h index 3fe27f8d91f0..c0d2b7927c1f 100644 --- a/include/linux/blkdev.h +++ b/include/linux/blkdev.h @@ -794,6 +794,8 @@ extern int scsi_cmd_ioctl(struct request_queue *, struct gendisk *, fmode_t, extern int sg_scsi_ioctl(struct request_queue *, struct gendisk *, fmode_t, struct scsi_ioctl_command __user *); +extern int blk_queue_enter(struct request_queue *q, gfp_t gfp); +extern void blk_queue_exit(struct request_queue *q); extern void blk_start_queue(struct request_queue *q); extern void blk_stop_queue(struct request_queue *q); extern void blk_sync_queue(struct request_queue *q); From 6bb9535bc3f59194a0ae17b17ca71aecd0f7e3a2 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Matias=20Bj=C3=B8rling?= Date: Thu, 19 Nov 2015 12:50:08 +0100 Subject: [PATCH 198/291] null_blk: use ppa_cache pool MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Instead of using a page pool, we can save memory by only allocating room for 64 entries for the ppa command. Introduce a ppa_cache to allocate only the required memory for the ppa list. Signed-off-by: Matias Bjørling Signed-off-by: Jens Axboe --- drivers/block/null_blk.c | 25 +++++++++++++++++++++++-- 1 file changed, 23 insertions(+), 2 deletions(-) diff --git a/drivers/block/null_blk.c b/drivers/block/null_blk.c index 816525143a74..3e9c4f94b5be 100644 --- a/drivers/block/null_blk.c +++ b/drivers/block/null_blk.c @@ -47,6 +47,7 @@ static LIST_HEAD(nullb_list); static struct mutex lock; static int null_major; static int nullb_indexes; +static struct kmem_cache *ppa_cache; struct completion_queue { struct llist_head list; @@ -521,7 +522,7 @@ static void *null_lnvm_create_dma_pool(struct request_queue *q, char *name) { mempool_t *virtmem_pool; - virtmem_pool = mempool_create_page_pool(64, 0); + virtmem_pool = mempool_create_slab_pool(64, ppa_cache); if (!virtmem_pool) { pr_err("null_blk: Unable to create virtual memory pool\n"); return NULL; @@ -767,6 +768,12 @@ static int __init null_init(void) bs = PAGE_SIZE; } + if (use_lightnvm && bs != 4096) { + pr_warn("null_blk: LightNVM only supports 4k block size\n"); + pr_warn("null_blk: defaults block size to 4k\n"); + bs = 4096; + } + if (use_lightnvm && queue_mode != NULL_Q_MQ) { pr_warn("null_blk: LightNVM only supported for blk-mq\n"); pr_warn("null_blk: defaults queue mode to blk-mq\n"); @@ -803,15 +810,27 @@ static int __init null_init(void) if (null_major < 0) return null_major; + if (use_lightnvm) { + ppa_cache = kmem_cache_create("ppa_cache", 64 * sizeof(u64), + 0, 0, NULL); + if (!ppa_cache) { + pr_err("null_blk: unable to create ppa cache\n"); + return -ENOMEM; + } + } + for (i = 0; i < nr_devices; i++) { if (null_add_dev()) { unregister_blkdev(null_major, "nullb"); - return -EINVAL; + goto err_ppa; } } pr_info("null: module loaded\n"); return 0; +err_ppa: + kmem_cache_destroy(ppa_cache); + return -EINVAL; } static void __exit null_exit(void) @@ -826,6 +845,8 @@ static void __exit null_exit(void) null_del_dev(nullb); } mutex_unlock(&lock); + + kmem_cache_destroy(ppa_cache); } module_init(null_init); From 5b40db99099ddebe31e9b1b759894cf09c0c6679 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Matias=20Bj=C3=B8rling?= Date: Thu, 19 Nov 2015 12:50:09 +0100 Subject: [PATCH 199/291] null_blk: use device addressing mode MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The linear addressing mode was removed in 7386af2. Make null_blk instead expose the ppa format geometry and support the generic addressing mode. Signed-off-by: Matias Bjørling Signed-off-by: Jens Axboe --- drivers/block/null_blk.c | 28 +++++++++++++++++++++++----- 1 file changed, 23 insertions(+), 5 deletions(-) diff --git a/drivers/block/null_blk.c b/drivers/block/null_blk.c index 3e9c4f94b5be..d51c24ac529f 100644 --- a/drivers/block/null_blk.c +++ b/drivers/block/null_blk.c @@ -486,6 +486,7 @@ static int null_lnvm_submit_io(struct request_queue *q, struct nvm_rq *rqd) static int null_lnvm_id(struct request_queue *q, struct nvm_id *id) { sector_t size = gb * 1024 * 1024 * 1024ULL; + sector_t blksize; struct nvm_id_group *grp; id->ver_id = 0x1; @@ -493,17 +494,34 @@ static int null_lnvm_id(struct request_queue *q, struct nvm_id *id) id->cgrps = 1; id->cap = 0x3; id->dom = 0x1; - id->ppat = NVM_ADDRMODE_LINEAR; + + id->ppaf.blk_offset = 0; + id->ppaf.blk_len = 16; + id->ppaf.pg_offset = 16; + id->ppaf.pg_len = 16; + id->ppaf.sect_offset = 32; + id->ppaf.sect_len = 8; + id->ppaf.pln_offset = 40; + id->ppaf.pln_len = 8; + id->ppaf.lun_offset = 48; + id->ppaf.lun_len = 8; + id->ppaf.ch_offset = 56; + id->ppaf.ch_len = 8; do_div(size, bs); /* convert size to pages */ + do_div(size, 256); /* concert size to pgs pr blk */ grp = &id->groups[0]; grp->mtype = 0; - grp->fmtype = 1; + grp->fmtype = 0; grp->num_ch = 1; - grp->num_lun = 1; - grp->num_pln = 1; - grp->num_blk = size / 256; grp->num_pg = 256; + blksize = size; + do_div(size, (1 << 16)); + grp->num_lun = size + 1; + do_div(blksize, grp->num_lun); + grp->num_blk = blksize; + grp->num_pln = 1; + grp->fpg_sz = bs; grp->csecs = bs; grp->trdt = 25000; From 54514aa465e94316a4bf1c5dfe970536bec3e76f Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Matias=20Bj=C3=B8rling?= Date: Thu, 19 Nov 2015 12:50:10 +0100 Subject: [PATCH 200/291] null_blk: do not del gendisk with lightnvm MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The gendisk structure has not been initialized when using lightnvm. Make sure to not delete it upon exit. Also make sure that we use the appropriate disk_name at unregistration. Signed-off-by: Matias Bjørling Signed-off-by: Jens Axboe --- drivers/block/null_blk.c | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) diff --git a/drivers/block/null_blk.c b/drivers/block/null_blk.c index d51c24ac529f..5c8ba5484d86 100644 --- a/drivers/block/null_blk.c +++ b/drivers/block/null_blk.c @@ -435,12 +435,14 @@ static void null_del_dev(struct nullb *nullb) list_del_init(&nullb->list); if (use_lightnvm) - nvm_unregister(nullb->disk->disk_name); - del_gendisk(nullb->disk); + nvm_unregister(nullb->disk_name); + else + del_gendisk(nullb->disk); blk_cleanup_queue(nullb->q); if (queue_mode == NULL_Q_MQ) blk_mq_free_tag_set(&nullb->tag_set); - put_disk(nullb->disk); + if (!use_lightnvm) + put_disk(nullb->disk); cleanup_queues(nullb); kfree(nullb); } From 768acd64d68b232e0d2b9623d9846457355f0c27 Mon Sep 17 00:00:00 2001 From: Suravee Suthikulpanit Date: Wed, 18 Nov 2015 16:49:52 -0800 Subject: [PATCH 201/291] PCI: Fix OF logic in pci_dma_configure() This patch fixes a bug introduced by previous commit, which incorrectly checkes the of_node of the end-point device. Instead, it should check the of_node of the host bridge. Fixes: 50230713b639 ("PCI: OF: Move of_pci_dma_configure() to pci_dma_configure()") Reported-by: Robin Murphy Signed-off-by: Suravee Suthikulpanit Acked-by: Arnd Bergmann Signed-off-by: Rafael J. Wysocki --- drivers/pci/probe.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/pci/probe.c b/drivers/pci/probe.c index 064078e11017..dc8fbe5c09d3 100644 --- a/drivers/pci/probe.c +++ b/drivers/pci/probe.c @@ -1646,8 +1646,8 @@ static void pci_dma_configure(struct pci_dev *dev) { struct device *bridge = pci_get_host_bridge_device(dev); - if (IS_ENABLED(CONFIG_OF) && dev->dev.of_node) { - if (bridge->parent) + if (IS_ENABLED(CONFIG_OF) && + bridge->parent && bridge->parent->of_node) { of_dma_configure(&dev->dev, bridge->parent->of_node); } else if (has_acpi_companion(bridge)) { struct acpi_device *adev = to_acpi_device_node(bridge->fwnode); From 638148e20c7f8f6e95017fdc13bce8549a6925e0 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Bj=C3=B8rn=20Mork?= Date: Wed, 18 Nov 2015 21:12:33 +0100 Subject: [PATCH 202/291] USB: option: add XS Stick W100-2 from 4G Systems MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Thomas reports " 4gsystems sells two total different LTE-surfsticks under the same name. .. The newer version of XS Stick W100 is from "omega" .. Under windows the driver switches to the same ID, and uses MI03\6 for network and MI01\6 for modem. .. echo "1c9e 9b01" > /sys/bus/usb/drivers/qmi_wwan/new_id echo "1c9e 9b01" > /sys/bus/usb-serial/drivers/option1/new_id T: Bus=01 Lev=01 Prnt=01 Port=03 Cnt=01 Dev#= 4 Spd=480 MxCh= 0 D: Ver= 2.00 Cls=00(>ifc ) Sub=00 Prot=00 MxPS=64 #Cfgs= 1 P: Vendor=1c9e ProdID=9b01 Rev=02.32 S: Manufacturer=USB Modem S: Product=USB Modem S: SerialNumber= C: #Ifs= 5 Cfg#= 1 Atr=80 MxPwr=500mA I: If#= 0 Alt= 0 #EPs= 2 Cls=ff(vend.) Sub=ff Prot=ff Driver=option I: If#= 1 Alt= 0 #EPs= 3 Cls=ff(vend.) Sub=ff Prot=ff Driver=option I: If#= 2 Alt= 0 #EPs= 3 Cls=ff(vend.) Sub=ff Prot=ff Driver=option I: If#= 3 Alt= 0 #EPs= 3 Cls=ff(vend.) Sub=ff Prot=ff Driver=qmi_wwan I: If#= 4 Alt= 0 #EPs= 2 Cls=08(stor.) Sub=06 Prot=50 Driver=usb-storage Now all important things are there: wwp0s29f7u2i3 (net), ttyUSB2 (at), cdc-wdm0 (qmi), ttyUSB1 (at) There is also ttyUSB0, but it is not usable, at least not for at. The device works well with qmi and ModemManager-NetworkManager. " Reported-by: Thomas Schäfer Cc: Signed-off-by: Bjørn Mork Signed-off-by: Greg Kroah-Hartman --- drivers/usb/serial/option.c | 9 +++++++++ 1 file changed, 9 insertions(+) diff --git a/drivers/usb/serial/option.c b/drivers/usb/serial/option.c index 2ab2a33e00c4..f2280606b73c 100644 --- a/drivers/usb/serial/option.c +++ b/drivers/usb/serial/option.c @@ -355,6 +355,7 @@ static void option_instat_callback(struct urb *urb); /* This is the 4G XS Stick W14 a.k.a. Mobilcom Debitel Surf-Stick * * It seems to contain a Qualcomm QSC6240/6290 chipset */ #define FOUR_G_SYSTEMS_PRODUCT_W14 0x9603 +#define FOUR_G_SYSTEMS_PRODUCT_W100 0x9b01 /* iBall 3.5G connect wireless modem */ #define IBALL_3_5G_CONNECT 0x9605 @@ -520,6 +521,11 @@ static const struct option_blacklist_info four_g_w14_blacklist = { .sendsetup = BIT(0) | BIT(1), }; +static const struct option_blacklist_info four_g_w100_blacklist = { + .sendsetup = BIT(1) | BIT(2), + .reserved = BIT(3), +}; + static const struct option_blacklist_info alcatel_x200_blacklist = { .sendsetup = BIT(0) | BIT(1), .reserved = BIT(4), @@ -1643,6 +1649,9 @@ static const struct usb_device_id option_ids[] = { { USB_DEVICE(LONGCHEER_VENDOR_ID, FOUR_G_SYSTEMS_PRODUCT_W14), .driver_info = (kernel_ulong_t)&four_g_w14_blacklist }, + { USB_DEVICE(LONGCHEER_VENDOR_ID, FOUR_G_SYSTEMS_PRODUCT_W100), + .driver_info = (kernel_ulong_t)&four_g_w100_blacklist + }, { USB_DEVICE_INTERFACE_CLASS(LONGCHEER_VENDOR_ID, SPEEDUP_PRODUCT_SU9800, 0xff) }, { USB_DEVICE(LONGCHEER_VENDOR_ID, ZOOM_PRODUCT_4597) }, { USB_DEVICE(LONGCHEER_VENDOR_ID, IBALL_3_5G_CONNECT) }, From c4f161308ac280ce5b27681ddcc2bbac6de39c20 Mon Sep 17 00:00:00 2001 From: Peter Chen Date: Thu, 19 Nov 2015 11:47:47 +0800 Subject: [PATCH 203/291] usb: kconfig: fix warning of select USB_OTG When choose randconfig for kernel build, it reports below warning: "warning: (USB_OTG_FSM && FSL_USB2_OTG && USB_MV_OTG) selects USB_OTG which has unmet direct dependencies (USB_SUPPORT && USB && PM)" In fact, USB_OTG is visible symbol and depends on PM, so the driver needs to depend on it to reduce dependency problem. Signed-off-by: Peter Chen Reported-by: Arnd Bergmann Cc: Felipe Balbi Acked-by: Arnd Bergmann Signed-off-by: Greg Kroah-Hartman --- drivers/usb/core/Kconfig | 3 +-- drivers/usb/phy/Kconfig | 4 +--- 2 files changed, 2 insertions(+), 5 deletions(-) diff --git a/drivers/usb/core/Kconfig b/drivers/usb/core/Kconfig index a99c89e78126..dd280108758f 100644 --- a/drivers/usb/core/Kconfig +++ b/drivers/usb/core/Kconfig @@ -77,8 +77,7 @@ config USB_OTG_BLACKLIST_HUB config USB_OTG_FSM tristate "USB 2.0 OTG FSM implementation" - depends on USB - select USB_OTG + depends on USB && USB_OTG select USB_PHY help Implements OTG Finite State Machine as specified in On-The-Go diff --git a/drivers/usb/phy/Kconfig b/drivers/usb/phy/Kconfig index 173132416170..22e8ecb6bfbd 100644 --- a/drivers/usb/phy/Kconfig +++ b/drivers/usb/phy/Kconfig @@ -21,7 +21,6 @@ config AB8500_USB config FSL_USB2_OTG bool "Freescale USB OTG Transceiver Driver" depends on USB_EHCI_FSL && USB_FSL_USB2 && USB_OTG_FSM && PM - select USB_OTG select USB_PHY help Enable this to support Freescale USB OTG transceiver. @@ -168,8 +167,7 @@ config USB_QCOM_8X16_PHY config USB_MV_OTG tristate "Marvell USB OTG support" - depends on USB_EHCI_MV && USB_MV_UDC && PM - select USB_OTG + depends on USB_EHCI_MV && USB_MV_UDC && PM && USB_OTG select USB_PHY help Say Y here if you want to build Marvell USB OTG transciever From 44243ef42d77ae367e6e9c68afca3b4578d2d461 Mon Sep 17 00:00:00 2001 From: Simon Arlott Date: Sun, 15 Nov 2015 17:12:08 +0000 Subject: [PATCH 204/291] USB: MAINTAINERS: cxacru Make cxacru an orphan. I still have a few of these devices for testing but haven't had an ADSL1 connection for several years. Signed-off-by: Simon Arlott Cc: Duncan Sands Signed-off-by: Greg Kroah-Hartman --- MAINTAINERS | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/MAINTAINERS b/MAINTAINERS index e9caa4b28828..d1ef313905bf 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -2929,10 +2929,9 @@ S: Maintained F: drivers/platform/x86/compal-laptop.c CONEXANT ACCESSRUNNER USB DRIVER -M: Simon Arlott L: accessrunner-general@lists.sourceforge.net W: http://accessrunner.sourceforge.net/ -S: Maintained +S: Orphan F: drivers/usb/atm/cxacru.c CONFIGFS From 19cd80a214821f4b558560ebd76bfb2c38b4f3d8 Mon Sep 17 00:00:00 2001 From: Jiri Slaby Date: Mon, 2 Nov 2015 10:27:00 +0100 Subject: [PATCH 205/291] usblp: do not set TASK_INTERRUPTIBLE before lock It is not permitted to set task state before lock. usblp_wwait sets the state to TASK_INTERRUPTIBLE and calls mutex_lock_interruptible. Upon return from that function, the state will be TASK_RUNNING again. This is clearly a bug and a warning is generated with LOCKDEP too: WARNING: CPU: 1 PID: 5109 at kernel/sched/core.c:7404 __might_sleep+0x7d/0x90() do not call blocking ops when !TASK_RUNNING; state=1 set at [] usblp_wwait+0xa0/0x310 [usblp] Modules linked in: ... CPU: 1 PID: 5109 Comm: captmon Tainted: G W 4.2.5-0.gef2823b-default #1 Hardware name: LENOVO 23252SG/23252SG, BIOS G2ET33WW (1.13 ) 07/24/2012 ffffffff81a4edce ffff880236ec7ba8 ffffffff81716651 0000000000000000 ffff880236ec7bf8 ffff880236ec7be8 ffffffff8106e146 0000000000000282 ffffffff81a50119 000000000000028b 0000000000000000 ffff8802dab7c508 Call Trace: ... [] warn_slowpath_fmt+0x46/0x50 [] __might_sleep+0x7d/0x90 [] mutex_lock_interruptible_nested+0x2f/0x4b0 [] usblp_wwait+0xcc/0x310 [usblp] [] usblp_write+0x72/0x350 [usblp] [] __vfs_write+0x28/0xf0 ... Commit 7f477358e2384c54b190cc3b6ce28277050a041b (usblp: Implement the ENOSPC convention) moved the set prior locking. So move it back after the lock. Signed-off-by: Jiri Slaby Fixes: 7f477358e2 ("usblp: Implement the ENOSPC convention") Acked-By: Pete Zaitcev Cc: stable Signed-off-by: Greg Kroah-Hartman --- drivers/usb/class/usblp.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/usb/class/usblp.c b/drivers/usb/class/usblp.c index 433bbc34a8a4..071964c7847f 100644 --- a/drivers/usb/class/usblp.c +++ b/drivers/usb/class/usblp.c @@ -884,11 +884,11 @@ static int usblp_wwait(struct usblp *usblp, int nonblock) add_wait_queue(&usblp->wwait, &waita); for (;;) { - set_current_state(TASK_INTERRUPTIBLE); if (mutex_lock_interruptible(&usblp->mut)) { rc = -EINTR; break; } + set_current_state(TASK_INTERRUPTIBLE); rc = usblp_wtest(usblp, nonblock); mutex_unlock(&usblp->mut); if (rc <= 0) From 2b3f34451054768a3da27ea1e86da676bf4621bd Mon Sep 17 00:00:00 2001 From: Helge Deller Date: Fri, 20 Nov 2015 10:09:18 +0100 Subject: [PATCH 206/291] parisc: Fix wrong comment regarding first pmd entry flags The first pmd entry is marked with PxD_FLAG_ATTACHED instead of _PAGE_GATEWAY. Signed-off-by: Helge Deller --- arch/parisc/include/asm/pgalloc.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/parisc/include/asm/pgalloc.h b/arch/parisc/include/asm/pgalloc.h index 3edbb9fc91b4..f2fd327dce2e 100644 --- a/arch/parisc/include/asm/pgalloc.h +++ b/arch/parisc/include/asm/pgalloc.h @@ -35,7 +35,7 @@ static inline pgd_t *pgd_alloc(struct mm_struct *mm) PxD_FLAG_VALID | PxD_FLAG_ATTACHED) + (__u32)(__pa((unsigned long)pgd) >> PxD_VALUE_SHIFT)); - /* The first pmd entry also is marked with _PAGE_GATEWAY as + /* The first pmd entry also is marked with PxD_FLAG_ATTACHED as * a signal that this pmd may not be freed */ __pgd_val_set(*pgd, PxD_FLAG_ATTACHED); #endif From 1e208ae7326caa33ef7d4496d7ff1c404657a4a3 Mon Sep 17 00:00:00 2001 From: Helge Deller Date: Fri, 20 Nov 2015 10:12:42 +0100 Subject: [PATCH 207/291] parisc: Drop definition of start_thread_som for HP-UX SOM binaries The definition of start_thread_som was planned to be used to execute HP-UX SOM binaries. Since HP-UX compatibility was dropped with kernel 4.0 there is no need to carry it further. Signed-off-by: Helge Deller --- arch/parisc/include/asm/processor.h | 27 --------------------------- 1 file changed, 27 deletions(-) diff --git a/arch/parisc/include/asm/processor.h b/arch/parisc/include/asm/processor.h index 54adb60c0a42..7e759ecb1343 100644 --- a/arch/parisc/include/asm/processor.h +++ b/arch/parisc/include/asm/processor.h @@ -192,33 +192,6 @@ void show_trace(struct task_struct *task, unsigned long *stack); */ typedef unsigned int elf_caddr_t; -#define start_thread_som(regs, new_pc, new_sp) do { \ - unsigned long *sp = (unsigned long *)new_sp; \ - __u32 spaceid = (__u32)current->mm->context; \ - unsigned long pc = (unsigned long)new_pc; \ - /* offset pc for priv. level */ \ - pc |= 3; \ - \ - regs->iasq[0] = spaceid; \ - regs->iasq[1] = spaceid; \ - regs->iaoq[0] = pc; \ - regs->iaoq[1] = pc + 4; \ - regs->sr[2] = LINUX_GATEWAY_SPACE; \ - regs->sr[3] = 0xffff; \ - regs->sr[4] = spaceid; \ - regs->sr[5] = spaceid; \ - regs->sr[6] = spaceid; \ - regs->sr[7] = spaceid; \ - regs->gr[ 0] = USER_PSW; \ - regs->gr[30] = ((new_sp)+63)&~63; \ - regs->gr[31] = pc; \ - \ - get_user(regs->gr[26],&sp[0]); \ - get_user(regs->gr[25],&sp[-1]); \ - get_user(regs->gr[24],&sp[-2]); \ - get_user(regs->gr[23],&sp[-3]); \ -} while(0) - /* The ELF abi wants things done a "wee bit" differently than * som does. Supporting this behavior here avoids * having our own version of create_elf_tables. From 5011a7e808c9fec643d752c5a495a48f27268a48 Mon Sep 17 00:00:00 2001 From: Alban Bedel Date: Tue, 17 Nov 2015 09:40:07 +0100 Subject: [PATCH 208/291] MIPS: ath79: Fix the DDR control initialization on ar71xx and ar934x The DDR control initialization needs to know the SoC type, however ath79_detect_sys_type() was called after ath79_ddr_ctrl_init(). Reverse the order to fix the DDR control initialization on ar71xx and ar934x. Signed-off-by: Alban Bedel Cc: Felix Fietkau Cc: Qais Yousef Cc: Andrew Bresticker CC: stable@vger.kernel.org # v4.2+ Cc: linux-mips@linux-mips.org Cc: linux-kernel@vger.kernel.org Patchwork: https://patchwork.linux-mips.org/patch/11500/ Signed-off-by: Ralf Baechle --- arch/mips/ath79/setup.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/mips/ath79/setup.c b/arch/mips/ath79/setup.c index 1ba21204ebe0..9a0013703579 100644 --- a/arch/mips/ath79/setup.c +++ b/arch/mips/ath79/setup.c @@ -216,9 +216,9 @@ void __init plat_mem_setup(void) AR71XX_RESET_SIZE); ath79_pll_base = ioremap_nocache(AR71XX_PLL_BASE, AR71XX_PLL_SIZE); + ath79_detect_sys_type(); ath79_ddr_ctrl_init(); - ath79_detect_sys_type(); if (mips_machtype != ATH79_MACH_GENERIC_OF) detect_memory_region(0, ATH79_MEM_SIZE_MIN, ATH79_MEM_SIZE_MAX); From accbfb52d09ac00147f02c4335d1cc47665e6653 Mon Sep 17 00:00:00 2001 From: Alban Bedel Date: Tue, 17 Nov 2015 20:34:51 +0100 Subject: [PATCH 209/291] MIPS: ath79: Fix the size of the MISC INTC registers in ar9132.dtsi There is 2 registers that is 8 bytes long, not 4. Signed-off-by: Alban Bedel Cc: Thomas Gleixner Cc: Jason Cooper Cc: Marc Zyngier Cc: Alexander Couzens Cc: Joel Porquet Cc: Andrew Bresticker Cc: linux-mips@linux-mips.org Cc: linux-kernel@vger.kernel.org Patchwork: https://patchwork.linux-mips.org/patch/11508/ Signed-off-by: Ralf Baechle --- arch/mips/boot/dts/qca/ar9132.dtsi | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/mips/boot/dts/qca/ar9132.dtsi b/arch/mips/boot/dts/qca/ar9132.dtsi index fb7734eadbf0..13d0439496a9 100644 --- a/arch/mips/boot/dts/qca/ar9132.dtsi +++ b/arch/mips/boot/dts/qca/ar9132.dtsi @@ -107,7 +107,7 @@ miscintc: interrupt-controller@18060010 { compatible = "qca,ar9132-misc-intc", "qca,ar7100-misc-intc"; - reg = <0x18060010 0x4>; + reg = <0x18060010 0x8>; interrupt-parent = <&cpuintc>; interrupts = <6>; From 55f1d5988c52d481dc489e29ee5e8905b18ff859 Mon Sep 17 00:00:00 2001 From: Alban Bedel Date: Tue, 17 Nov 2015 21:52:00 +0100 Subject: [PATCH 210/291] MIPS: ath79: Add a machine entry for booting OF machines As I'm using a board with a broken old bootloader I hardcoded the mips_machtype and did't notice that the machine entry was still missing. [ralf@linux-mips.org: Fixed spelling message noticed by Sergei Shtylyov .] Signed-off-by: Alban Bedel Cc: Qais Yousef Cc: Felix Fietkau Cc: Andrew Bresticker Cc: linux-mips@linux-mips.org Cc: linux-kernel@vger.kernel.org Patchwork: https://patchwork.linux-mips.org/patch/11503/ Signed-off-by: Ralf Baechle --- arch/mips/ath79/setup.c | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/arch/mips/ath79/setup.c b/arch/mips/ath79/setup.c index 9a0013703579..8755d618e116 100644 --- a/arch/mips/ath79/setup.c +++ b/arch/mips/ath79/setup.c @@ -281,3 +281,8 @@ MIPS_MACHINE(ATH79_MACH_GENERIC, "Generic", "Generic AR71XX/AR724X/AR913X based board", ath79_generic_init); + +MIPS_MACHINE(ATH79_MACH_GENERIC_OF, + "DTB", + "Generic AR71XX/AR724X/AR913X based board (DT)", + NULL); From 8bb6fd585d419d906fb3c228db47fbc4e4272a00 Mon Sep 17 00:00:00 2001 From: Hou Zhiqiang Date: Tue, 17 Nov 2015 17:53:18 +0800 Subject: [PATCH 211/291] i2c: imx: fix a compiling error MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit drivers/i2c/busses/i2c-imx.c:978:2: error: implicit declaration of function ‘pinctrl_select_state’ [-Werror=implicit-function-declaration] pinctrl_select_state(i2c_imx->pinctrl, i2c_imx->pinctrl_pins_gpio); ^ Signed-off-by: Hou Zhiqiang Signed-off-by: Wolfram Sang --- drivers/i2c/busses/i2c-imx.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/i2c/busses/i2c-imx.c b/drivers/i2c/busses/i2c-imx.c index 1e4d99da4164..9bb0b056b25f 100644 --- a/drivers/i2c/busses/i2c-imx.c +++ b/drivers/i2c/busses/i2c-imx.c @@ -50,6 +50,7 @@ #include #include #include +#include #include #include #include From 9656eeebf3f1dd05376c4c923797369746d9a618 Mon Sep 17 00:00:00 2001 From: Lars-Peter Clausen Date: Mon, 16 Nov 2015 14:42:03 +0100 Subject: [PATCH 212/291] i2c: Revert "i2c: xiic: Do not reset controller before every transfer" Commit d701667bb331 ("i2c: xiic: Do not reset controller before every transfer") removed the reinitialization of the controller before the start of each transfer. Apparently this change is not safe to make and the commit results in random I2C bus failures. An easy way to trigger the issue is to run i2cdetect. Without the patch applied: 0 1 2 3 4 5 6 7 8 9 a b c d e f 00: -- -- -- -- -- -- -- -- -- -- -- -- -- 10: -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- 20: -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- 30: -- -- -- -- -- -- -- -- UU UU -- UU 3c -- -- UU 40: -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- 50: -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- 60: -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- 70: -- -- -- -- -- -- -- -- With the patch applied every other or so invocation: 0 1 2 3 4 5 6 7 8 9 a b c d e f 00: 03 04 05 06 07 08 09 0a 0b 0c 0d 0e 0f 10: 10 11 12 13 14 15 16 17 18 19 1a 1b 1c 1d 1e 1f 20: 20 21 22 23 24 25 26 27 28 29 2a 2b 2c 2d 2e 2f 30: -- -- -- -- -- -- -- -- UU UU -- UU 3c -- -- UU 40: -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- 50: -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- 60: -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- 70: -- -- -- -- -- -- -- -- So revert the commit for now. Fixes: d701667bb331 ("i2c: xiic: Do not reset controller before every transfer") Signed-off-by: Lars-Peter Clausen Acked-by: Shubhrajyoti Datta Signed-off-by: Wolfram Sang --- drivers/i2c/busses/i2c-xiic.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/drivers/i2c/busses/i2c-xiic.c b/drivers/i2c/busses/i2c-xiic.c index e23a7b068c60..705cf69ccf42 100644 --- a/drivers/i2c/busses/i2c-xiic.c +++ b/drivers/i2c/busses/i2c-xiic.c @@ -662,6 +662,9 @@ static void __xiic_start_xfer(struct xiic_i2c *i2c) static void xiic_start_xfer(struct xiic_i2c *i2c) { + spin_lock(&i2c->lock); + xiic_reinit(i2c); + spin_unlock(&i2c->lock); __xiic_start_xfer(i2c); } From d0fe5258e6be609dab7e53335d845818684d1dcf Mon Sep 17 00:00:00 2001 From: Lars-Peter Clausen Date: Mon, 16 Nov 2015 14:42:04 +0100 Subject: [PATCH 213/291] i2c: xiic: Prevent concurrent running of the IRQ handler and __xiic_start_xfer() Prior to commit e6c9a037bc8a ("i2c: xiic: Remove the disabling of interrupts") IRQs where disabled when the initial __xiic_start_xfer() was called. After the commit the interrupt is enabled while the function is running, this means it is possible for the interrupt to be triggered while the function is still running. When this happens the internal data structures get corrupted and undefined behavior can occur like the following crash: Internal error: Oops: 17 [#1] PREEMPT SMP ARM Modules linked in: CPU: 0 PID: 2040 Comm: i2cdetect Not tainted 4.0.0-02856-g047a308 #10956 Hardware name: Xilinx Zynq Platform task: ee0c9500 ti: e99a2000 task.ti: e99a2000 PC is at __xiic_start_xfer+0x6c4/0x7c8 LR is at __xiic_start_xfer+0x690/0x7c8 pc : [] lr : [] psr: 800f0013 sp : e99a3da8 ip : 00000000 fp : 00000000 r10: 00000001 r9 : 600f0013 r8 : f0180000 r7 : f0180000 r6 : c064e444 r5 : 00000017 r4 : ee031010 r3 : 00000000 r2 : 00000000 r1 : 600f0013 r0 : 0000000f Flags: Nzcv IRQs on FIQs on Mode SVC_32 ISA ARM Segment user Control: 18c5387d Table: 29a5404a DAC: 00000015 Process i2cdetect (pid: 2040, stack limit = 0xe99a2210) Stack: (0xe99a3da8 to 0xe99a4000) 3da0: ee031010 00000000 00000001 ee031020 ee031224 c02bc5ec 3dc0: ee34c604 00000000 ee0c9500 e99a3dcc e99a3dd0 e99a3dd0 e99a3dd8 c069f0e8 3de0: 00000000 ee031020 c064e100 ffff90bb e99a3e48 c02b6590 ee031020 00000001 3e00: e99a3e48 ee031020 00000000 e99a3e63 00000001 c02b6ec4 00000000 00000000 3e20: 00000000 c02b7320 e99a3ef0 00000000 00000000 e99e3df0 00000000 00000000 3e40: 00000103 2814575f 0000003e c00a0000 e99a3e85 0001003e ee0c0000 e99a3e63 3e60: eefd3578 c064e61c ee0c9500 c0041e04 0000056c e9a56db8 00006e5a b6f5c000 3e80: ee0c9548 eefd0040 00000001 eefd3540 ee0c9500 eefd39a0 c064b540 ee0c9500 3ea0: 00000000 ee92b000 00000000 bef4862c ee34c600 e99ecdc0 00000720 00000003 3ec0: e99a2000 00000000 00000000 c02b8b30 00000000 00000000 00000000 e99a3f24 3ee0: b6e80000 00000000 00000000 c04257e8 00000000 e99a3f24 c02b8f08 00000703 3f00: 00000003 c02116bc ee935300 00000000 bef4862c ee34c600 e99ecdc0 c02b91f0 3f20: e99ecdc0 00000720 bef4862c eeb725f8 e99ecdc0 c00c9e2c 00000003 00000003 3f40: ee248dc0 00000000 ee248dc8 00000002 eeb7c1a8 00000000 00000000 c00bb360 3f60: 00000000 00000000 00000003 ee248dc0 bef4862c e99ecdc0 e99ecdc0 00000720 3f80: 00000003 e99a2000 00000000 c00c9f68 00000000 00000000 b6f22000 00000036 3fa0: c000dfa4 c000de20 00000000 00000000 00000003 00000720 bef4862c bef4862c 3fc0: 00000000 00000000 b6f22000 00000036 00000000 00000000 b6f60000 00000000 3fe0: 00013040 bef48614 00008cab b6ecdbe6 400f0030 00000003 2f7fd821 2f7fdc21 [] (__xiic_start_xfer) from [] (xiic_xfer+0x94/0x168) [] (xiic_xfer) from [] (__i2c_transfer+0x4c/0x7c) [] (__i2c_transfer) from [] (i2c_transfer+0x9c/0xc4) [] (i2c_transfer) from [] (i2c_smbus_xfer+0x3a0/0x4ec) [] (i2c_smbus_xfer) from [] (i2cdev_ioctl_smbus+0xb0/0x214) [] (i2cdev_ioctl_smbus) from [] (i2cdev_ioctl+0xa0/0x1d4) [] (i2cdev_ioctl) from [] (do_vfs_ioctl+0x4b0/0x5b8) [] (do_vfs_ioctl) from [] (SyS_ioctl+0x34/0x5c) [] (SyS_ioctl) from [] (ret_fast_syscall+0x0/0x34) Code: e283300c e5843210 eafffe64 e5943210 (e1d320b4) The issue can easily be reproduced by performing I2C access under high system load or IO load. To fix the issue protect the invocation to __xiic_start_xfer() form xiic_start_xfer() with the same lock that is used to protect the interrupt handler. Fixes: e6c9a037bc8a ("i2c: xiic: Remove the disabling of interrupts") Signed-off-by: Lars-Peter Clausen Reviewed-by: Shubhrajyoti Datta Signed-off-by: Wolfram Sang --- drivers/i2c/busses/i2c-xiic.c | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/drivers/i2c/busses/i2c-xiic.c b/drivers/i2c/busses/i2c-xiic.c index 705cf69ccf42..0b20449e48cf 100644 --- a/drivers/i2c/busses/i2c-xiic.c +++ b/drivers/i2c/busses/i2c-xiic.c @@ -664,9 +664,8 @@ static void xiic_start_xfer(struct xiic_i2c *i2c) { spin_lock(&i2c->lock); xiic_reinit(i2c); - spin_unlock(&i2c->lock); - __xiic_start_xfer(i2c); + spin_unlock(&i2c->lock); } static int xiic_xfer(struct i2c_adapter *adap, struct i2c_msg *msgs, int num) From c18fba23061f16dde128e10d4869ba4e88e0e81a Mon Sep 17 00:00:00 2001 From: Grygorii Strashko Date: Thu, 12 Nov 2015 15:42:26 +0200 Subject: [PATCH 214/291] i2c: fix wakeup irq parsing This patch fixes obvious copy-past error in wake up irq parsing code which leads to the fact that dev_pm_set_wake_irq() will be called with wrong IRQ number when "wakeup" IRQ is not defined in DT. Fixes: 3fffd1283927 ("i2c: allow specifying separate wakeup interrupt in device tree") Signed-off-by: Grygorii Strashko Acked-by: Dmitry Torokhov Signed-off-by: Wolfram Sang Cc: # v4.3 --- drivers/i2c/i2c-core.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/i2c/i2c-core.c b/drivers/i2c/i2c-core.c index 040af5cc8143..ba8eb087f224 100644 --- a/drivers/i2c/i2c-core.c +++ b/drivers/i2c/i2c-core.c @@ -715,7 +715,7 @@ static int i2c_device_probe(struct device *dev) if (wakeirq > 0 && wakeirq != client->irq) status = dev_pm_set_dedicated_wake_irq(dev, wakeirq); else if (client->irq > 0) - status = dev_pm_set_wake_irq(dev, wakeirq); + status = dev_pm_set_wake_irq(dev, client->irq); else status = 0; From cdc5a3110e7c3ae793f367285789a6bc39c962dc Mon Sep 17 00:00:00 2001 From: Alexandra Yates Date: Thu, 5 Nov 2015 11:40:25 -0800 Subject: [PATCH 215/291] i2c: i801: add Intel Lewisburg device IDs Adding Intel codename Lewisburg platform device IDs for SMBus. Signed-off-by: Alexandra Yates Reviewed-by: Jean Delvare Signed-off-by: Wolfram Sang --- Documentation/i2c/busses/i2c-i801 | 1 + drivers/i2c/busses/Kconfig | 1 + drivers/i2c/busses/i2c-i801.c | 6 ++++++ 3 files changed, 8 insertions(+) diff --git a/Documentation/i2c/busses/i2c-i801 b/Documentation/i2c/busses/i2c-i801 index 6a4b1af724f8..1bba38dd2637 100644 --- a/Documentation/i2c/busses/i2c-i801 +++ b/Documentation/i2c/busses/i2c-i801 @@ -32,6 +32,7 @@ Supported adapters: * Intel Sunrise Point-LP (PCH) * Intel DNV (SOC) * Intel Broxton (SOC) + * Intel Lewisburg (PCH) Datasheets: Publicly available at the Intel website On Intel Patsburg and later chipsets, both the normal host SMBus controller diff --git a/drivers/i2c/busses/Kconfig b/drivers/i2c/busses/Kconfig index e24c2b680b47..7b0aa82ea38b 100644 --- a/drivers/i2c/busses/Kconfig +++ b/drivers/i2c/busses/Kconfig @@ -126,6 +126,7 @@ config I2C_I801 Sunrise Point-LP (PCH) DNV (SOC) Broxton (SOC) + Lewisburg (PCH) This driver can also be built as a module. If so, the module will be called i2c-i801. diff --git a/drivers/i2c/busses/i2c-i801.c b/drivers/i2c/busses/i2c-i801.c index c306751ceadb..f62d69799a9c 100644 --- a/drivers/i2c/busses/i2c-i801.c +++ b/drivers/i2c/busses/i2c-i801.c @@ -62,6 +62,8 @@ * Sunrise Point-LP (PCH) 0x9d23 32 hard yes yes yes * DNV (SOC) 0x19df 32 hard yes yes yes * Broxton (SOC) 0x5ad4 32 hard yes yes yes + * Lewisburg (PCH) 0xa1a3 32 hard yes yes yes + * Lewisburg Supersku (PCH) 0xa223 32 hard yes yes yes * * Features supported by this driver: * Software PEC no @@ -206,6 +208,8 @@ #define PCI_DEVICE_ID_INTEL_SUNRISEPOINT_LP_SMBUS 0x9d23 #define PCI_DEVICE_ID_INTEL_DNV_SMBUS 0x19df #define PCI_DEVICE_ID_INTEL_BROXTON_SMBUS 0x5ad4 +#define PCI_DEVICE_ID_INTEL_LEWISBURG_SMBUS 0xa1a3 +#define PCI_DEVICE_ID_INTEL_LEWISBURG_SSKU_SMBUS 0xa223 struct i801_mux_config { char *gpio_chip; @@ -869,6 +873,8 @@ static const struct pci_device_id i801_ids[] = { { PCI_DEVICE(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_SUNRISEPOINT_LP_SMBUS) }, { PCI_DEVICE(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_DNV_SMBUS) }, { PCI_DEVICE(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_BROXTON_SMBUS) }, + { PCI_DEVICE(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_LEWISBURG_SMBUS) }, + { PCI_DEVICE(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_LEWISBURG_SSKU_SMBUS) }, { 0, } }; From 480fc0db819d706ea5608545a12d33cf8d5a31ab Mon Sep 17 00:00:00 2001 From: Wenwei Tao Date: Fri, 20 Nov 2015 13:47:53 +0100 Subject: [PATCH 216/291] lightnvm: wrong return value and redundant free MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The return value should be non-zero under error conditions. Remove nvme_free(dev) to avoid free dev more than once. Signed-off-by: Wenwei Tao Signed-off-by: Matias Bjørling Signed-off-by: Jens Axboe --- drivers/lightnvm/core.c | 6 ++---- 1 file changed, 2 insertions(+), 4 deletions(-) diff --git a/drivers/lightnvm/core.c b/drivers/lightnvm/core.c index 8a556f3f36bb..bed47e7249a9 100644 --- a/drivers/lightnvm/core.c +++ b/drivers/lightnvm/core.c @@ -222,14 +222,13 @@ static void nvm_free(struct nvm_dev *dev) static int nvm_init(struct nvm_dev *dev) { struct nvmm_type *mt; - int ret = 0; + int ret = -EINVAL; if (!dev->q || !dev->ops) - return -EINVAL; + return ret; if (dev->ops->identity(dev->q, &dev->identity)) { pr_err("nvm: device could not be identified\n"); - ret = -EINVAL; goto err; } @@ -275,7 +274,6 @@ static int nvm_init(struct nvm_dev *dev) dev->nr_chnls); return 0; err: - nvm_free(dev); pr_err("nvm: failed to initialize nvm\n"); return ret; } From 93e70c1f2883f2db2d6a1f339d0e26f00b138e4e Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Matias=20Bj=C3=B8rling?= Date: Fri, 20 Nov 2015 13:47:54 +0100 Subject: [PATCH 217/291] lightnvm: missing free on init error MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit If either max_phys_sect is out of bound, the nvm_dev structure is not freed. Signed-off-by: Matias Bjørling Signed-off-by: Jens Axboe --- drivers/lightnvm/core.c | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/drivers/lightnvm/core.c b/drivers/lightnvm/core.c index bed47e7249a9..f61d325fd978 100644 --- a/drivers/lightnvm/core.c +++ b/drivers/lightnvm/core.c @@ -313,11 +313,13 @@ int nvm_register(struct request_queue *q, char *disk_name, "ppalist"); if (!dev->ppalist_pool) { pr_err("nvm: could not create ppa pool\n"); - return -ENOMEM; + ret = -ENOMEM; + goto err_init; } } else if (dev->ops->max_phys_sect > 256) { pr_info("nvm: max sectors supported is 256.\n"); - return -EINVAL; + ret = -EINVAL; + goto err_init; } down_write(&nvm_lock); From 47b3115ae7b799be8b77b0f024215ad4f68d6460 Mon Sep 17 00:00:00 2001 From: Wenwei Tao Date: Fri, 20 Nov 2015 13:47:55 +0100 Subject: [PATCH 218/291] nvme: lightnvm: use admin queues for admin cmds MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit According to the Open-Channel SSD Specification, the NVMe-NVM admin commands use vendor specific opcodes of NVMe, so use the NVMe admin queue to dispatch these commands. Signed-off-by: Wenwei Tao Updated by me to include set bad block table as well and also use the admin queue for l2p len calculation. Signed-off-by: Matias Bjørling Signed-off-by: Jens Axboe --- drivers/nvme/host/lightnvm.c | 17 ++++++++++------- 1 file changed, 10 insertions(+), 7 deletions(-) diff --git a/drivers/nvme/host/lightnvm.c b/drivers/nvme/host/lightnvm.c index 7e82fe33d6f8..9202d1a468d0 100644 --- a/drivers/nvme/host/lightnvm.c +++ b/drivers/nvme/host/lightnvm.c @@ -276,6 +276,7 @@ static int init_grps(struct nvm_id *nvm_id, struct nvme_nvm_id *nvme_nvm_id) static int nvme_nvm_identity(struct request_queue *q, struct nvm_id *nvm_id) { struct nvme_ns *ns = q->queuedata; + struct nvme_dev *dev = ns->dev; struct nvme_nvm_id *nvme_nvm_id; struct nvme_nvm_command c = {}; int ret; @@ -288,8 +289,8 @@ static int nvme_nvm_identity(struct request_queue *q, struct nvm_id *nvm_id) if (!nvme_nvm_id) return -ENOMEM; - ret = nvme_submit_sync_cmd(q, (struct nvme_command *)&c, nvme_nvm_id, - sizeof(struct nvme_nvm_id)); + ret = nvme_submit_sync_cmd(dev->admin_q, (struct nvme_command *)&c, + nvme_nvm_id, sizeof(struct nvme_nvm_id)); if (ret) { ret = -EIO; goto out; @@ -315,7 +316,7 @@ static int nvme_nvm_get_l2p_tbl(struct request_queue *q, u64 slba, u32 nlb, struct nvme_ns *ns = q->queuedata; struct nvme_dev *dev = ns->dev; struct nvme_nvm_command c = {}; - u32 len = queue_max_hw_sectors(q) << 9; + u32 len = queue_max_hw_sectors(dev->admin_q) << 9; u32 nlb_pr_rq = len / sizeof(u64); u64 cmd_slba = slba; void *entries; @@ -333,8 +334,8 @@ static int nvme_nvm_get_l2p_tbl(struct request_queue *q, u64 slba, u32 nlb, c.l2p.slba = cpu_to_le64(cmd_slba); c.l2p.nlb = cpu_to_le32(cmd_nlb); - ret = nvme_submit_sync_cmd(q, (struct nvme_command *)&c, - entries, len); + ret = nvme_submit_sync_cmd(dev->admin_q, + (struct nvme_command *)&c, entries, len); if (ret) { dev_err(dev->dev, "L2P table transfer failed (%d)\n", ret); @@ -375,7 +376,8 @@ static int nvme_nvm_get_bb_tbl(struct request_queue *q, struct ppa_addr ppa, if (!bb_tbl) return -ENOMEM; - ret = nvme_submit_sync_cmd(q, (struct nvme_command *)&c, bb_tbl, tblsz); + ret = nvme_submit_sync_cmd(dev->admin_q, (struct nvme_command *)&c, + bb_tbl, tblsz); if (ret) { dev_err(dev->dev, "get bad block table failed (%d)\n", ret); ret = -EIO; @@ -427,7 +429,8 @@ static int nvme_nvm_set_bb_tbl(struct request_queue *q, struct nvm_rq *rqd, c.set_bb.nlb = cpu_to_le16(rqd->nr_pages - 1); c.set_bb.value = type; - ret = nvme_submit_sync_cmd(q, (struct nvme_command *)&c, NULL, 0); + ret = nvme_submit_sync_cmd(dev->admin_q, (struct nvme_command *)&c, + NULL, 0); if (ret) dev_err(dev->dev, "set bad block table failed (%d)\n", ret); return ret; From 0b59733b95f9d7af6bee6e6a4d0d444eb694c514 Mon Sep 17 00:00:00 2001 From: Javier Gonzalez Date: Fri, 20 Nov 2015 13:47:56 +0100 Subject: [PATCH 219/291] lightnvm: keep track of block counts MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Maintain number of in use blocks, free blocks, and bad blocks in a per lun basis. This allows the upper layers to get information about the state of each lun. Also, account for blocks reserved to the device on the free block count. nr_free_blocks matches now the actual number of blocks on the free list when the device is booted. Signed-off-by: Javier Gonzalez Signed-off-by: Matias Bjørling Signed-off-by: Jens Axboe --- drivers/lightnvm/gennvm.c | 14 +++++++++++++- include/linux/lightnvm.h | 2 ++ 2 files changed, 15 insertions(+), 1 deletion(-) diff --git a/drivers/lightnvm/gennvm.c b/drivers/lightnvm/gennvm.c index c0d0eb2357a8..43c01e0af887 100644 --- a/drivers/lightnvm/gennvm.c +++ b/drivers/lightnvm/gennvm.c @@ -60,6 +60,8 @@ static int gennvm_luns_init(struct nvm_dev *dev, struct gen_nvm *gn) lun->vlun.lun_id = i % dev->luns_per_chnl; lun->vlun.chnl_id = i / dev->luns_per_chnl; lun->vlun.nr_free_blocks = dev->blks_per_lun; + lun->vlun.nr_inuse_blocks = 0; + lun->vlun.nr_bad_blocks = 0; } return 0; } @@ -87,6 +89,7 @@ static int gennvm_block_bb(struct ppa_addr ppa, int nr_blocks, u8 *blks, } list_move_tail(&blk->list, &lun->bb_list); + lun->vlun.nr_bad_blocks++; } return 0; @@ -139,6 +142,7 @@ static int gennvm_block_map(u64 slba, u32 nlb, __le64 *entries, void *private) list_move_tail(&blk->list, &lun->used_list); blk->type = 1; lun->vlun.nr_free_blocks--; + lun->vlun.nr_inuse_blocks++; } } @@ -167,8 +171,10 @@ static int gennvm_blocks_init(struct nvm_dev *dev, struct gen_nvm *gn) block->id = cur_block_id++; /* First block is reserved for device */ - if (unlikely(lun_iter == 0 && blk_iter == 0)) + if (unlikely(lun_iter == 0 && blk_iter == 0)) { + lun->vlun.nr_free_blocks--; continue; + } list_add_tail(&block->list, &lun->free_list); } @@ -266,6 +272,7 @@ static struct nvm_block *gennvm_get_blk(struct nvm_dev *dev, blk->type = 1; lun->vlun.nr_free_blocks--; + lun->vlun.nr_inuse_blocks++; spin_unlock(&vlun->lock); out: @@ -283,16 +290,21 @@ static void gennvm_put_blk(struct nvm_dev *dev, struct nvm_block *blk) case 1: list_move_tail(&blk->list, &lun->free_list); lun->vlun.nr_free_blocks++; + lun->vlun.nr_inuse_blocks--; blk->type = 0; break; case 2: list_move_tail(&blk->list, &lun->bb_list); + lun->vlun.nr_bad_blocks++; + lun->vlun.nr_inuse_blocks--; break; default: WARN_ON_ONCE(1); pr_err("gennvm: erroneous block type (%lu -> %u)\n", blk->id, blk->type); list_move_tail(&blk->list, &lun->bb_list); + lun->vlun.nr_bad_blocks++; + lun->vlun.nr_inuse_blocks--; } spin_unlock(&vlun->lock); diff --git a/include/linux/lightnvm.h b/include/linux/lightnvm.h index cbe288acb1de..831a20cf070c 100644 --- a/include/linux/lightnvm.h +++ b/include/linux/lightnvm.h @@ -213,7 +213,9 @@ struct nvm_lun { int lun_id; int chnl_id; + unsigned int nr_inuse_blocks; /* Number of used blocks */ unsigned int nr_free_blocks; /* Number of unused blocks */ + unsigned int nr_bad_blocks; /* Number of bad blocks */ struct nvm_block *blocks; spinlock_t lock; From 2fde0e482db2b43bb4ed0e9aebfbe78ebcbbf5a6 Mon Sep 17 00:00:00 2001 From: Javier Gonzalez Date: Fri, 20 Nov 2015 13:47:57 +0100 Subject: [PATCH 220/291] lightnvm: add free and bad lun info to show luns MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Add free block, used block, and bad block information to the show debug interface. This information is used to debug how targets track blocks. Also, change debug function name to make it more generic. Signed-off-by: Javier Gonzalez Signed-off-by: Matias Bjørling Signed-off-by: Jens Axboe --- drivers/lightnvm/core.c | 2 +- drivers/lightnvm/gennvm.c | 19 ++++++++++++++----- include/linux/lightnvm.h | 4 ++-- 3 files changed, 17 insertions(+), 8 deletions(-) diff --git a/drivers/lightnvm/core.c b/drivers/lightnvm/core.c index f61d325fd978..5178645ac42b 100644 --- a/drivers/lightnvm/core.c +++ b/drivers/lightnvm/core.c @@ -544,7 +544,7 @@ static int nvm_configure_show(const char *val) if (!dev->mt) return 0; - dev->mt->free_blocks_print(dev); + dev->mt->lun_info_print(dev); return 0; } diff --git a/drivers/lightnvm/gennvm.c b/drivers/lightnvm/gennvm.c index 43c01e0af887..e20e74ec6b91 100644 --- a/drivers/lightnvm/gennvm.c +++ b/drivers/lightnvm/gennvm.c @@ -464,15 +464,24 @@ static struct nvm_lun *gennvm_get_lun(struct nvm_dev *dev, int lunid) return &gn->luns[lunid].vlun; } -static void gennvm_free_blocks_print(struct nvm_dev *dev) +static void gennvm_lun_info_print(struct nvm_dev *dev) { struct gen_nvm *gn = dev->mp; struct gen_lun *lun; unsigned int i; - gennvm_for_each_lun(gn, lun, i) - pr_info("%s: lun%8u\t%u\n", - dev->name, i, lun->vlun.nr_free_blocks); + + gennvm_for_each_lun(gn, lun, i) { + spin_lock(&lun->vlun.lock); + + pr_info("%s: lun%8u\t%u\t%u\t%u\n", + dev->name, i, + lun->vlun.nr_free_blocks, + lun->vlun.nr_inuse_blocks, + lun->vlun.nr_bad_blocks); + + spin_unlock(&lun->vlun.lock); + } } static struct nvmm_type gennvm = { @@ -490,7 +499,7 @@ static struct nvmm_type gennvm = { .erase_blk = gennvm_erase_blk, .get_lun = gennvm_get_lun, - .free_blocks_print = gennvm_free_blocks_print, + .lun_info_print = gennvm_lun_info_print, }; static int __init gennvm_module_init(void) diff --git a/include/linux/lightnvm.h b/include/linux/lightnvm.h index 831a20cf070c..3db5552b17d5 100644 --- a/include/linux/lightnvm.h +++ b/include/linux/lightnvm.h @@ -380,7 +380,7 @@ typedef int (nvmm_end_io_fn)(struct nvm_rq *, int); typedef int (nvmm_erase_blk_fn)(struct nvm_dev *, struct nvm_block *, unsigned long); typedef struct nvm_lun *(nvmm_get_lun_fn)(struct nvm_dev *, int); -typedef void (nvmm_free_blocks_print_fn)(struct nvm_dev *); +typedef void (nvmm_lun_info_print_fn)(struct nvm_dev *); struct nvmm_type { const char *name; @@ -404,7 +404,7 @@ struct nvmm_type { nvmm_get_lun_fn *get_lun; /* Statistics */ - nvmm_free_blocks_print_fn *free_blocks_print; + nvmm_lun_info_print_fn *lun_info_print; struct list_head list; }; From 604e8c8da8854351496215d269c3fa93859e3fee Mon Sep 17 00:00:00 2001 From: Keith Busch Date: Fri, 20 Nov 2015 08:38:13 -0700 Subject: [PATCH 221/291] NVMe: reap completion entries when deleting queue Make sure that there are no unprocesssed entries on a completion queue before deleting it, and check for validity of the CQ door bell before writing completions to it. This fixes problems with doing a sysfs reset of the device while it's handling IO. Tested-by: Jon Derrick Signed-off-by: Jens Axboe --- drivers/nvme/host/pci.c | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/drivers/nvme/host/pci.c b/drivers/nvme/host/pci.c index 394fd1631cd0..930042fa2d69 100644 --- a/drivers/nvme/host/pci.c +++ b/drivers/nvme/host/pci.c @@ -968,7 +968,8 @@ static void __nvme_process_cq(struct nvme_queue *nvmeq, unsigned int *tag) if (head == nvmeq->cq_head && phase == nvmeq->cq_phase) return; - writel(head, nvmeq->q_db + nvmeq->dev->db_stride); + if (likely(nvmeq->cq_vector >= 0)) + writel(head, nvmeq->q_db + nvmeq->dev->db_stride); nvmeq->cq_head = head; nvmeq->cq_phase = phase; @@ -2787,6 +2788,10 @@ static void nvme_del_queue_end(struct nvme_queue *nvmeq) { struct nvme_delq_ctx *dq = nvmeq->cmdinfo.ctx; nvme_put_dq(dq); + + spin_lock_irq(&nvmeq->q_lock); + nvme_process_cq(nvmeq); + spin_unlock_irq(&nvmeq->q_lock); } static int adapter_async_del_queue(struct nvme_queue *nvmeq, u8 opcode, From 8aeea03195ee6e33fcb00039c414eabfc37a4eb8 Mon Sep 17 00:00:00 2001 From: Rasmus Villemoes Date: Fri, 20 Nov 2015 10:46:49 +0100 Subject: [PATCH 222/291] mtip32xx: use formatting capability of kthread_create_on_node kthread_create_on_node takes format+args, so there's no need to do the pretty-printing in advance. Moreover, "mtip_svc_thd_99" (including its '\0') only just fits in 16 bytes, so if index could ever go above 99 we'd have a stack buffer overflow. Signed-off-by: Rasmus Villemoes Reviewed-by: Jeff Moyer Signed-off-by: Jens Axboe --- drivers/block/mtip32xx/mtip32xx.c | 6 ++---- 1 file changed, 2 insertions(+), 4 deletions(-) diff --git a/drivers/block/mtip32xx/mtip32xx.c b/drivers/block/mtip32xx/mtip32xx.c index a28a562f7b7f..3457ac8c03e2 100644 --- a/drivers/block/mtip32xx/mtip32xx.c +++ b/drivers/block/mtip32xx/mtip32xx.c @@ -3810,7 +3810,6 @@ static int mtip_block_initialize(struct driver_data *dd) sector_t capacity; unsigned int index = 0; struct kobject *kobj; - unsigned char thd_name[16]; if (dd->disk) goto skip_create_disk; /* hw init done, before rebuild */ @@ -3958,10 +3957,9 @@ skip_create_disk: } start_service_thread: - sprintf(thd_name, "mtip_svc_thd_%02d", index); dd->mtip_svc_handler = kthread_create_on_node(mtip_service_thread, - dd, dd->numa_node, "%s", - thd_name); + dd, dd->numa_node, + "mtip_svc_thd_%02d", index); if (IS_ERR(dd->mtip_svc_handler)) { dev_err(&dd->pdev->dev, "service thread failed to start\n"); From 02e2a5bfebe99edcf9d694575a75032d53fe1b73 Mon Sep 17 00:00:00 2001 From: Kees Cook Date: Thu, 19 Nov 2015 17:18:54 -0800 Subject: [PATCH 223/291] mac: validate mac_partition is within sector If md->signature == MAC_DRIVER_MAGIC and md->block_size == 1023, a single 512 byte sector would be read (secsize / 512). However the partition structure would be located past the end of the buffer (secsize % 512). Signed-off-by: Kees Cook Cc: stable@vger.kernel.org Signed-off-by: Jens Axboe --- block/partitions/mac.c | 10 +++++++--- 1 file changed, 7 insertions(+), 3 deletions(-) diff --git a/block/partitions/mac.c b/block/partitions/mac.c index c2c48ec64b27..621317ac4d59 100644 --- a/block/partitions/mac.c +++ b/block/partitions/mac.c @@ -32,7 +32,7 @@ int mac_partition(struct parsed_partitions *state) Sector sect; unsigned char *data; int slot, blocks_in_map; - unsigned secsize; + unsigned secsize, datasize, partoffset; #ifdef CONFIG_PPC_PMAC int found_root = 0; int found_root_goodness = 0; @@ -50,10 +50,14 @@ int mac_partition(struct parsed_partitions *state) } secsize = be16_to_cpu(md->block_size); put_dev_sector(sect); - data = read_part_sector(state, secsize/512, §); + datasize = round_down(secsize, 512); + data = read_part_sector(state, datasize / 512, §); if (!data) return -1; - part = (struct mac_partition *) (data + secsize%512); + partoffset = secsize % 512; + if (partoffset + sizeof(*part) > datasize) + return -1; + part = (struct mac_partition *) (data + partoffset); if (be16_to_cpu(part->signature) != MAC_PARTITION_MAGIC) { put_dev_sector(sect); return 0; /* not a MacOS disk */ From 94a58c360a45c066ab5472cfd2bf2a4ba63aa532 Mon Sep 17 00:00:00 2001 From: Rasmus Villemoes Date: Fri, 20 Nov 2015 15:56:48 -0800 Subject: [PATCH 224/291] slab.h: sprinkle __assume_aligned attributes The various allocators return aligned memory. Telling the compiler that allows it to generate better code in many cases, for example when the return value is immediately passed to memset(). Some code does become larger, but at least we win twice as much as we lose: $ scripts/bloat-o-meter /tmp/vmlinux vmlinux add/remove: 0/0 grow/shrink: 13/52 up/down: 995/-2140 (-1145) An example of the different (and smaller) code can be seen in mm_alloc(). Before: : 48 8d 78 08 lea 0x8(%rax),%rdi : 48 89 c1 mov %rax,%rcx : 48 89 c2 mov %rax,%rdx : 48 c7 00 00 00 00 00 movq $0x0,(%rax) : 48 c7 80 48 03 00 00 movq $0x0,0x348(%rax) : 00 00 00 00 : 31 c0 xor %eax,%eax : 48 83 e7 f8 and $0xfffffffffffffff8,%rdi : 48 29 f9 sub %rdi,%rcx : 81 c1 50 03 00 00 add $0x350,%ecx : c1 e9 03 shr $0x3,%ecx : f3 48 ab rep stos %rax,%es:(%rdi) After: : 48 89 c2 mov %rax,%rdx : b9 6a 00 00 00 mov $0x6a,%ecx : 31 c0 xor %eax,%eax : 48 89 d7 mov %rdx,%rdi : f3 48 ab rep stos %rax,%es:(%rdi) So gcc's strategy is to do two possibly (but not really, of course) unaligned stores to the first and last word, then do an aligned rep stos covering the middle part with a little overlap. Maybe arches which do not allow unaligned stores gain even more. I don't know if gcc can actually make use of alignments greater than 8 for anything, so one could probably drop the __assume_xyz_alignment macros and just use __assume_aligned(8). The increases in code size are mostly caused by gcc deciding to opencode strlen() using the check-four-bytes-at-a-time trick when it knows the buffer is sufficiently aligned (one function grew by 200 bytes). Now it turns out that many of these strlen() calls showing up were in fact redundant, and they're gone from -next. Applying the two patches to next-20151001 bloat-o-meter instead says add/remove: 0/0 grow/shrink: 6/52 up/down: 244/-2140 (-1896) Signed-off-by: Rasmus Villemoes Acked-by: Christoph Lameter Cc: David Rientjes Cc: Pekka Enberg Cc: Joonsoo Kim Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/slab.h | 43 ++++++++++++++++++++++++++----------------- 1 file changed, 26 insertions(+), 17 deletions(-) diff --git a/include/linux/slab.h b/include/linux/slab.h index 7c82e3b307a3..96940772bb92 100644 --- a/include/linux/slab.h +++ b/include/linux/slab.h @@ -157,6 +157,24 @@ size_t ksize(const void *); #define ARCH_KMALLOC_MINALIGN __alignof__(unsigned long long) #endif +/* + * Setting ARCH_SLAB_MINALIGN in arch headers allows a different alignment. + * Intended for arches that get misalignment faults even for 64 bit integer + * aligned buffers. + */ +#ifndef ARCH_SLAB_MINALIGN +#define ARCH_SLAB_MINALIGN __alignof__(unsigned long long) +#endif + +/* + * kmalloc and friends return ARCH_KMALLOC_MINALIGN aligned + * pointers. kmem_cache_alloc and friends return ARCH_SLAB_MINALIGN + * aligned pointers. + */ +#define __assume_kmalloc_alignment __assume_aligned(ARCH_KMALLOC_MINALIGN) +#define __assume_slab_alignment __assume_aligned(ARCH_SLAB_MINALIGN) +#define __assume_page_alignment __assume_aligned(PAGE_SIZE) + /* * Kmalloc array related definitions */ @@ -286,8 +304,8 @@ static __always_inline int kmalloc_index(size_t size) } #endif /* !CONFIG_SLOB */ -void *__kmalloc(size_t size, gfp_t flags); -void *kmem_cache_alloc(struct kmem_cache *, gfp_t flags); +void *__kmalloc(size_t size, gfp_t flags) __assume_kmalloc_alignment; +void *kmem_cache_alloc(struct kmem_cache *, gfp_t flags) __assume_slab_alignment; void kmem_cache_free(struct kmem_cache *, void *); /* @@ -301,8 +319,8 @@ void kmem_cache_free_bulk(struct kmem_cache *, size_t, void **); bool kmem_cache_alloc_bulk(struct kmem_cache *, gfp_t, size_t, void **); #ifdef CONFIG_NUMA -void *__kmalloc_node(size_t size, gfp_t flags, int node); -void *kmem_cache_alloc_node(struct kmem_cache *, gfp_t flags, int node); +void *__kmalloc_node(size_t size, gfp_t flags, int node) __assume_kmalloc_alignment; +void *kmem_cache_alloc_node(struct kmem_cache *, gfp_t flags, int node) __assume_slab_alignment; #else static __always_inline void *__kmalloc_node(size_t size, gfp_t flags, int node) { @@ -316,12 +334,12 @@ static __always_inline void *kmem_cache_alloc_node(struct kmem_cache *s, gfp_t f #endif #ifdef CONFIG_TRACING -extern void *kmem_cache_alloc_trace(struct kmem_cache *, gfp_t, size_t); +extern void *kmem_cache_alloc_trace(struct kmem_cache *, gfp_t, size_t) __assume_slab_alignment; #ifdef CONFIG_NUMA extern void *kmem_cache_alloc_node_trace(struct kmem_cache *s, gfp_t gfpflags, - int node, size_t size); + int node, size_t size) __assume_slab_alignment; #else static __always_inline void * kmem_cache_alloc_node_trace(struct kmem_cache *s, @@ -354,10 +372,10 @@ kmem_cache_alloc_node_trace(struct kmem_cache *s, } #endif /* CONFIG_TRACING */ -extern void *kmalloc_order(size_t size, gfp_t flags, unsigned int order); +extern void *kmalloc_order(size_t size, gfp_t flags, unsigned int order) __assume_page_alignment; #ifdef CONFIG_TRACING -extern void *kmalloc_order_trace(size_t size, gfp_t flags, unsigned int order); +extern void *kmalloc_order_trace(size_t size, gfp_t flags, unsigned int order) __assume_page_alignment; #else static __always_inline void * kmalloc_order_trace(size_t size, gfp_t flags, unsigned int order) @@ -482,15 +500,6 @@ static __always_inline void *kmalloc_node(size_t size, gfp_t flags, int node) return __kmalloc_node(size, flags, node); } -/* - * Setting ARCH_SLAB_MINALIGN in arch headers allows a different alignment. - * Intended for arches that get misalignment faults even for 64 bit integer - * aligned buffers. - */ -#ifndef ARCH_SLAB_MINALIGN -#define ARCH_SLAB_MINALIGN __alignof__(unsigned long long) -#endif - struct memcg_cache_array { struct rcu_head rcu; struct kmem_cache *entries[0]; From dd7d664a2bcf93634a57d208e8e480ab7448c107 Mon Sep 17 00:00:00 2001 From: Moritz Fischer Date: Fri, 20 Nov 2015 15:56:51 -0800 Subject: [PATCH 225/291] MAINTAINERS: add Moritz as reviewer for FPGA Manager Framework Nominate myself as Reviewer. Signed-off-by: Moritz Fischer Acked-by: Alan Tull Cc: Greg Kroah-Hartman Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- MAINTAINERS | 1 + 1 file changed, 1 insertion(+) diff --git a/MAINTAINERS b/MAINTAINERS index b16bffabe70a..0f5fa0f978c2 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -4409,6 +4409,7 @@ K: fmc_d.*register FPGA MANAGER FRAMEWORK M: Alan Tull +R: Moritz Fischer S: Maintained F: drivers/fpga/ F: include/linux/fpga/fpga-mgr.h From 5cf6a51e6062afe7cc507f32f1e5f7e6497ae844 Mon Sep 17 00:00:00 2001 From: Daniel Baluta Date: Fri, 20 Nov 2015 15:56:53 -0800 Subject: [PATCH 226/291] configfs: allow dynamic group creation This patchset introduces IIO software triggers, offers a way of configuring them via configfs and adds the IIO hrtimer based interrupt source to be used with software triggers. The architecture is now split in 3 parts, to remove all IIO trigger specific parts from IIO configfs core: (1) IIO configfs - creates the root of the IIO configfs subsys. (2) IIO software triggers - software trigger implementation, dynamically creating /config/iio/triggers group. (3) IIO hrtimer trigger - is the first interrupt source for software triggers (with syfs to follow). Each trigger type can implement its own set of attributes. Lockdep seems to be happy with the locking in configfs patch. This patch (of 5): We don't want to hardcode default groups at subsystem creation time. We export: * configfs_register_group * configfs_unregister_group to allow drivers to programatically create/destroy groups later, after module init time. This is needed for IIO configfs support. (akpm: the other 4 patches to be merged via the IIO tree) Signed-off-by: Daniel Baluta Suggested-by: Lars-Peter Clausen Reviewed-by: Christoph Hellwig Acked-by: Joel Becker Cc: Hartmut Knaack Cc: Octavian Purdila Cc: Paul Bolle Cc: Adriana Reus Cc: Cristina Opriceana Cc: Peter Meerwald Cc: Alexander Viro Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- fs/configfs/dir.c | 110 +++++++++++++++++++++++++++++++++++++++ include/linux/configfs.h | 10 ++++ 2 files changed, 120 insertions(+) diff --git a/fs/configfs/dir.c b/fs/configfs/dir.c index c81ce7f200a6..a7a1b218f308 100644 --- a/fs/configfs/dir.c +++ b/fs/configfs/dir.c @@ -1636,6 +1636,116 @@ const struct file_operations configfs_dir_operations = { .iterate = configfs_readdir, }; +/** + * configfs_register_group - creates a parent-child relation between two groups + * @parent_group: parent group + * @group: child group + * + * link groups, creates dentry for the child and attaches it to the + * parent dentry. + * + * Return: 0 on success, negative errno code on error + */ +int configfs_register_group(struct config_group *parent_group, + struct config_group *group) +{ + struct configfs_subsystem *subsys = parent_group->cg_subsys; + struct dentry *parent; + int ret; + + mutex_lock(&subsys->su_mutex); + link_group(parent_group, group); + mutex_unlock(&subsys->su_mutex); + + parent = parent_group->cg_item.ci_dentry; + + mutex_lock_nested(&d_inode(parent)->i_mutex, I_MUTEX_PARENT); + ret = create_default_group(parent_group, group); + if (!ret) { + spin_lock(&configfs_dirent_lock); + configfs_dir_set_ready(group->cg_item.ci_dentry->d_fsdata); + spin_unlock(&configfs_dirent_lock); + } + mutex_unlock(&d_inode(parent)->i_mutex); + return ret; +} +EXPORT_SYMBOL(configfs_register_group); + +/** + * configfs_unregister_group() - unregisters a child group from its parent + * @group: parent group to be unregistered + * + * Undoes configfs_register_group() + */ +void configfs_unregister_group(struct config_group *group) +{ + struct configfs_subsystem *subsys = group->cg_subsys; + struct dentry *dentry = group->cg_item.ci_dentry; + struct dentry *parent = group->cg_item.ci_parent->ci_dentry; + + mutex_lock_nested(&d_inode(parent)->i_mutex, I_MUTEX_PARENT); + spin_lock(&configfs_dirent_lock); + configfs_detach_prep(dentry, NULL); + spin_unlock(&configfs_dirent_lock); + + configfs_detach_group(&group->cg_item); + d_inode(dentry)->i_flags |= S_DEAD; + dont_mount(dentry); + d_delete(dentry); + mutex_unlock(&d_inode(parent)->i_mutex); + + dput(dentry); + + mutex_lock(&subsys->su_mutex); + unlink_group(group); + mutex_unlock(&subsys->su_mutex); +} +EXPORT_SYMBOL(configfs_unregister_group); + +/** + * configfs_register_default_group() - allocates and registers a child group + * @parent_group: parent group + * @name: child group name + * @item_type: child item type description + * + * boilerplate to allocate and register a child group with its parent. We need + * kzalloc'ed memory because child's default_group is initially empty. + * + * Return: allocated config group or ERR_PTR() on error + */ +struct config_group * +configfs_register_default_group(struct config_group *parent_group, + const char *name, + struct config_item_type *item_type) +{ + int ret; + struct config_group *group; + + group = kzalloc(sizeof(*group), GFP_KERNEL); + if (!group) + return ERR_PTR(-ENOMEM); + config_group_init_type_name(group, name, item_type); + + ret = configfs_register_group(parent_group, group); + if (ret) { + kfree(group); + return ERR_PTR(ret); + } + return group; +} +EXPORT_SYMBOL(configfs_register_default_group); + +/** + * configfs_unregister_default_group() - unregisters and frees a child group + * @group: the group to act on + */ +void configfs_unregister_default_group(struct config_group *group) +{ + configfs_unregister_group(group); + kfree(group); +} +EXPORT_SYMBOL(configfs_unregister_default_group); + int configfs_register_subsystem(struct configfs_subsystem *subsys) { int err; diff --git a/include/linux/configfs.h b/include/linux/configfs.h index a8a335b7fce0..758a029011b1 100644 --- a/include/linux/configfs.h +++ b/include/linux/configfs.h @@ -197,6 +197,16 @@ static inline struct configfs_subsystem *to_configfs_subsystem(struct config_gro int configfs_register_subsystem(struct configfs_subsystem *subsys); void configfs_unregister_subsystem(struct configfs_subsystem *subsys); +int configfs_register_group(struct config_group *parent_group, + struct config_group *group); +void configfs_unregister_group(struct config_group *group); + +struct config_group * +configfs_register_default_group(struct config_group *parent_group, + const char *name, + struct config_item_type *item_type); +void configfs_unregister_default_group(struct config_group *group); + /* These functions can sleep and can alloc with GFP_KERNEL */ /* WARNING: These cannot be called underneath configfs callbacks!! */ int configfs_depend_item(struct configfs_subsystem *subsys, struct config_item *target); From 1491e30ed1a741009d1d38f9285f7a29e6c05c78 Mon Sep 17 00:00:00 2001 From: Dan Carpenter Date: Fri, 20 Nov 2015 15:56:56 -0800 Subject: [PATCH 227/291] ncpfs: don't allow negative timeouts This code causes a static checker warning because it's a user controlled variable where we cap the upper bound but not the lower bound. Let's return an -EINVAL for negative timeouts. [akpm@linux-foundation.org: remove unneeded `else'] Signed-off-by: Dan Carpenter Reviewed-by: Jan Kara Cc: Petr Vandrovec Cc: David Howells Cc: Jan Kara Cc: Al Viro Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- fs/ncpfs/ioctl.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/fs/ncpfs/ioctl.c b/fs/ncpfs/ioctl.c index 79b113048eac..0a3f9b594602 100644 --- a/fs/ncpfs/ioctl.c +++ b/fs/ncpfs/ioctl.c @@ -525,6 +525,8 @@ static long __ncp_ioctl(struct inode *inode, unsigned int cmd, unsigned long arg switch (rqdata.cmd) { case NCP_LOCK_EX: case NCP_LOCK_SH: + if (rqdata.timeout < 0) + return -EINVAL; if (rqdata.timeout == 0) rqdata.timeout = NCP_LOCK_DEFAULT_TIMEOUT; else if (rqdata.timeout > NCP_LOCK_MAX_TIMEOUT) From 429d48622cb08b7b4908cb63b43a135be9b3db09 Mon Sep 17 00:00:00 2001 From: Naoya Horiguchi Date: Fri, 20 Nov 2015 15:56:59 -0800 Subject: [PATCH 228/291] tools/vm/page-types.c: support KPF_IDLE PageIdle is exported in include/uapi/linux/kernel-page-flags.h, so let's make page-types.c tool handle it. Signed-off-by: Naoya Horiguchi Reviewed-by: Vladimir Davydov Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- tools/vm/page-types.c | 1 + 1 file changed, 1 insertion(+) diff --git a/tools/vm/page-types.c b/tools/vm/page-types.c index bcf5ec760eb9..5a6016224bb9 100644 --- a/tools/vm/page-types.c +++ b/tools/vm/page-types.c @@ -128,6 +128,7 @@ static const char * const page_flag_names[] = { [KPF_THP] = "t:thp", [KPF_BALLOON] = "o:balloon", [KPF_ZERO_PAGE] = "z:zero_page", + [KPF_IDLE] = "i:idle_page", [KPF_RESERVED] = "r:reserved", [KPF_MLOCKED] = "m:mlocked", From 7511c3ede752e6dd67df20779b4e11effe102637 Mon Sep 17 00:00:00 2001 From: Jerome Marchand Date: Fri, 20 Nov 2015 15:57:02 -0800 Subject: [PATCH 229/291] mm: vmalloc: don't remove inexistent guard hole in remove_vm_area() Commit 71394fe50146 ("mm: vmalloc: add flag preventing guard hole allocation") missed a spot. Currently remove_vm_area() decreases vm->size to "remove" the guard hole page, even when it isn't present. All but one users just free the vm_struct rigth away and never access vm->size anyway. Don't touch the size in remove_vm_area() and have __vunmap() use the proper get_vm_area_size() helper. Signed-off-by: Jerome Marchand Acked-by: Andrey Ryabinin Acked-by: David Rientjes Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- mm/vmalloc.c | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/mm/vmalloc.c b/mm/vmalloc.c index d04563480c94..8e3c9c5a3042 100644 --- a/mm/vmalloc.c +++ b/mm/vmalloc.c @@ -1443,7 +1443,6 @@ struct vm_struct *remove_vm_area(const void *addr) vmap_debug_free_range(va->va_start, va->va_end); kasan_free_shadow(vm); free_unmap_vmap_area(va); - vm->size -= PAGE_SIZE; return vm; } @@ -1468,8 +1467,8 @@ static void __vunmap(const void *addr, int deallocate_pages) return; } - debug_check_no_locks_freed(addr, area->size); - debug_check_no_obj_freed(addr, area->size); + debug_check_no_locks_freed(addr, get_vm_area_size(area)); + debug_check_no_obj_freed(addr, get_vm_area_size(area)); if (deallocate_pages) { int i; From 1a763615688b891246c5b0a932d7a95fea4c1a68 Mon Sep 17 00:00:00 2001 From: "Jason J. Herne" Date: Fri, 20 Nov 2015 15:57:04 -0800 Subject: [PATCH 230/291] mm: loosen MADV_NOHUGEPAGE to enable Qemu postcopy on s390 MADV_NOHUGEPAGE processing is too restrictive. kvm already disables hugepage but hugepage_madvise() takes the error path when we ask to turn on the MADV_NOHUGEPAGE bit and the bit is already on. This causes Qemu's new postcopy migration feature to fail on s390 because its first action is to madvise the guest address space as NOHUGEPAGE. This patch modifies the code so that the operation succeeds without error now. For consistency reasons do the same for MADV_HUGEPAGE. Signed-off-by: Jason J. Herne Reviewed-by: Andrea Arcangeli Acked-by: Christian Borntraeger Acked-by: David Rientjes Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- mm/huge_memory.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/mm/huge_memory.c b/mm/huge_memory.c index c29ddebc8705..62fe06bb7d04 100644 --- a/mm/huge_memory.c +++ b/mm/huge_memory.c @@ -2009,7 +2009,7 @@ int hugepage_madvise(struct vm_area_struct *vma, /* * Be somewhat over-protective like KSM for now! */ - if (*vm_flags & (VM_HUGEPAGE | VM_NO_THP)) + if (*vm_flags & VM_NO_THP) return -EINVAL; *vm_flags &= ~VM_NOHUGEPAGE; *vm_flags |= VM_HUGEPAGE; @@ -2025,7 +2025,7 @@ int hugepage_madvise(struct vm_area_struct *vma, /* * Be somewhat over-protective like KSM for now! */ - if (*vm_flags & (VM_NOHUGEPAGE | VM_NO_THP)) + if (*vm_flags & VM_NO_THP) return -EINVAL; *vm_flags &= ~VM_HUGEPAGE; *vm_flags |= VM_NOHUGEPAGE; From 1a47de6e4a8dc2aaf3c3fb544b60730b74abe0f1 Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Fri, 20 Nov 2015 15:57:07 -0800 Subject: [PATCH 231/291] various: fix pci_set_dma_mask return value checking pci_set_dma_mask returns a negative errno value, not a bool like pci_dma_supported. This of course was just a giant test for attention :) Signed-off-by: Christoph Hellwig Reported-by: Jongman Heo Tested-by: Jongman Heo [pcnet32] Acked-by: Mauro Carvalho Chehab Cc: Hans Verkuil Cc: Antti Palosaari Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- drivers/media/pci/cx23885/cx23885-core.c | 4 ++-- drivers/media/pci/cx25821/cx25821-core.c | 3 ++- drivers/media/pci/cx88/cx88-alsa.c | 4 ++-- drivers/media/pci/cx88/cx88-mpeg.c | 3 ++- drivers/media/pci/cx88/cx88-video.c | 4 ++-- drivers/media/pci/netup_unidvb/netup_unidvb_core.c | 2 +- drivers/media/pci/saa7134/saa7134-core.c | 4 ++-- drivers/media/pci/saa7164/saa7164-core.c | 4 ++-- drivers/media/pci/tw68/tw68-core.c | 4 ++-- drivers/net/ethernet/amd/pcnet32.c | 5 +++-- 10 files changed, 20 insertions(+), 17 deletions(-) diff --git a/drivers/media/pci/cx23885/cx23885-core.c b/drivers/media/pci/cx23885/cx23885-core.c index 35759a91d47d..e8f847226a19 100644 --- a/drivers/media/pci/cx23885/cx23885-core.c +++ b/drivers/media/pci/cx23885/cx23885-core.c @@ -1992,9 +1992,9 @@ static int cx23885_initdev(struct pci_dev *pci_dev, (unsigned long long)pci_resource_start(pci_dev, 0)); pci_set_master(pci_dev); - if (!pci_set_dma_mask(pci_dev, 0xffffffff)) { + err = pci_set_dma_mask(pci_dev, 0xffffffff); + if (err) { printk("%s/0: Oops: no 32bit PCI DMA ???\n", dev->name); - err = -EIO; goto fail_context; } diff --git a/drivers/media/pci/cx25821/cx25821-core.c b/drivers/media/pci/cx25821/cx25821-core.c index dbc695f32760..0042803a9de7 100644 --- a/drivers/media/pci/cx25821/cx25821-core.c +++ b/drivers/media/pci/cx25821/cx25821-core.c @@ -1319,7 +1319,8 @@ static int cx25821_initdev(struct pci_dev *pci_dev, dev->pci_lat, (unsigned long long)dev->base_io_addr); pci_set_master(pci_dev); - if (!pci_set_dma_mask(pci_dev, 0xffffffff)) { + err = pci_set_dma_mask(pci_dev, 0xffffffff); + if (err) { pr_err("%s/0: Oops: no 32bit PCI DMA ???\n", dev->name); err = -EIO; goto fail_irq; diff --git a/drivers/media/pci/cx88/cx88-alsa.c b/drivers/media/pci/cx88/cx88-alsa.c index 0ed1b6530374..1b5268f9bb24 100644 --- a/drivers/media/pci/cx88/cx88-alsa.c +++ b/drivers/media/pci/cx88/cx88-alsa.c @@ -890,9 +890,9 @@ static int snd_cx88_create(struct snd_card *card, struct pci_dev *pci, return err; } - if (!pci_set_dma_mask(pci,DMA_BIT_MASK(32))) { + err = pci_set_dma_mask(pci,DMA_BIT_MASK(32)); + if (err) { dprintk(0, "%s/1: Oops: no 32bit PCI DMA ???\n",core->name); - err = -EIO; cx88_core_put(core, pci); return err; } diff --git a/drivers/media/pci/cx88/cx88-mpeg.c b/drivers/media/pci/cx88/cx88-mpeg.c index 9db7767d1fe0..f34c229f9b37 100644 --- a/drivers/media/pci/cx88/cx88-mpeg.c +++ b/drivers/media/pci/cx88/cx88-mpeg.c @@ -393,7 +393,8 @@ static int cx8802_init_common(struct cx8802_dev *dev) if (pci_enable_device(dev->pci)) return -EIO; pci_set_master(dev->pci); - if (!pci_set_dma_mask(dev->pci,DMA_BIT_MASK(32))) { + err = pci_set_dma_mask(dev->pci,DMA_BIT_MASK(32)); + if (err) { printk("%s/2: Oops: no 32bit PCI DMA ???\n",dev->core->name); return -EIO; } diff --git a/drivers/media/pci/cx88/cx88-video.c b/drivers/media/pci/cx88/cx88-video.c index 0de1ad5a977d..aef9acf351f6 100644 --- a/drivers/media/pci/cx88/cx88-video.c +++ b/drivers/media/pci/cx88/cx88-video.c @@ -1314,9 +1314,9 @@ static int cx8800_initdev(struct pci_dev *pci_dev, dev->pci_lat,(unsigned long long)pci_resource_start(pci_dev,0)); pci_set_master(pci_dev); - if (!pci_set_dma_mask(pci_dev,DMA_BIT_MASK(32))) { + err = pci_set_dma_mask(pci_dev,DMA_BIT_MASK(32)); + if (err) { printk("%s/0: Oops: no 32bit PCI DMA ???\n",core->name); - err = -EIO; goto fail_core; } dev->alloc_ctx = vb2_dma_sg_init_ctx(&pci_dev->dev); diff --git a/drivers/media/pci/netup_unidvb/netup_unidvb_core.c b/drivers/media/pci/netup_unidvb/netup_unidvb_core.c index 60b2d462f98d..3fdbd81b5580 100644 --- a/drivers/media/pci/netup_unidvb/netup_unidvb_core.c +++ b/drivers/media/pci/netup_unidvb/netup_unidvb_core.c @@ -810,7 +810,7 @@ static int netup_unidvb_initdev(struct pci_dev *pci_dev, "%s(): board vendor 0x%x, revision 0x%x\n", __func__, board_vendor, board_revision); pci_set_master(pci_dev); - if (!pci_set_dma_mask(pci_dev, 0xffffffff)) { + if (pci_set_dma_mask(pci_dev, 0xffffffff) < 0) { dev_err(&pci_dev->dev, "%s(): 32bit PCI DMA is not supported\n", __func__); goto pci_detect_err; diff --git a/drivers/media/pci/saa7134/saa7134-core.c b/drivers/media/pci/saa7134/saa7134-core.c index e79d63eb774e..f720cea80e28 100644 --- a/drivers/media/pci/saa7134/saa7134-core.c +++ b/drivers/media/pci/saa7134/saa7134-core.c @@ -951,9 +951,9 @@ static int saa7134_initdev(struct pci_dev *pci_dev, pci_name(pci_dev), dev->pci_rev, pci_dev->irq, dev->pci_lat,(unsigned long long)pci_resource_start(pci_dev,0)); pci_set_master(pci_dev); - if (!pci_set_dma_mask(pci_dev, DMA_BIT_MASK(32))) { + err = pci_set_dma_mask(pci_dev, DMA_BIT_MASK(32)); + if (err) { pr_warn("%s: Oops: no 32bit PCI DMA ???\n", dev->name); - err = -EIO; goto fail1; } diff --git a/drivers/media/pci/saa7164/saa7164-core.c b/drivers/media/pci/saa7164/saa7164-core.c index 8f36b48ef733..8bbd092fbe1d 100644 --- a/drivers/media/pci/saa7164/saa7164-core.c +++ b/drivers/media/pci/saa7164/saa7164-core.c @@ -1264,9 +1264,9 @@ static int saa7164_initdev(struct pci_dev *pci_dev, pci_set_master(pci_dev); /* TODO */ - if (!pci_set_dma_mask(pci_dev, 0xffffffff)) { + err = pci_set_dma_mask(pci_dev, 0xffffffff); + if (err) { printk("%s/0: Oops: no 32bit PCI DMA ???\n", dev->name); - err = -EIO; goto fail_irq; } diff --git a/drivers/media/pci/tw68/tw68-core.c b/drivers/media/pci/tw68/tw68-core.c index 8c5655d351d3..4e77618fbb2b 100644 --- a/drivers/media/pci/tw68/tw68-core.c +++ b/drivers/media/pci/tw68/tw68-core.c @@ -257,9 +257,9 @@ static int tw68_initdev(struct pci_dev *pci_dev, dev->name, pci_name(pci_dev), dev->pci_rev, pci_dev->irq, dev->pci_lat, (u64)pci_resource_start(pci_dev, 0)); pci_set_master(pci_dev); - if (!pci_set_dma_mask(pci_dev, DMA_BIT_MASK(32))) { + err = pci_set_dma_mask(pci_dev, DMA_BIT_MASK(32)); + if (err) { pr_info("%s: Oops: no 32bit PCI DMA ???\n", dev->name); - err = -EIO; goto fail1; } diff --git a/drivers/net/ethernet/amd/pcnet32.c b/drivers/net/ethernet/amd/pcnet32.c index e2afabf3a465..7ccebae9cb48 100644 --- a/drivers/net/ethernet/amd/pcnet32.c +++ b/drivers/net/ethernet/amd/pcnet32.c @@ -1500,10 +1500,11 @@ pcnet32_probe_pci(struct pci_dev *pdev, const struct pci_device_id *ent) return -ENODEV; } - if (!pci_set_dma_mask(pdev, PCNET32_DMA_MASK)) { + err = pci_set_dma_mask(pdev, PCNET32_DMA_MASK); + if (err) { if (pcnet32_debug & NETIF_MSG_PROBE) pr_err("architecture does not support 32bit PCI busmaster DMA\n"); - return -ENODEV; + return err; } if (!request_region(ioaddr, PCNET32_TOTAL_SIZE, "pcnet32_probe_pci")) { if (pcnet32_debug & NETIF_MSG_PROBE) From 50e55bf626ad3ebbca45c0c0d03eb1710a139638 Mon Sep 17 00:00:00 2001 From: Yang Shi Date: Fri, 20 Nov 2015 15:57:10 -0800 Subject: [PATCH 232/291] mm/page-writeback.c: initialize m_dirty to avoid compile warning When building kernel with gcc 5.2, the below warning is raised: mm/page-writeback.c: In function 'balance_dirty_pages.isra.10': mm/page-writeback.c:1545:17: warning: 'm_dirty' may be used uninitialized in this function [-Wmaybe-uninitialized] unsigned long m_dirty, m_thresh, m_bg_thresh; The m_dirty{thresh, bg_thresh} are initialized in the block of "if (mdtc)", so if mdts is null, they won't be initialized before being used. Initialize m_dirty to zero, also initialize m_thresh and m_bg_thresh to keep consistency. They are used later by if condition: !mdtc || m_dirty <= dirty_freerun_ceiling(m_thresh, m_bg_thresh) If mdtc is null, dirty_freerun_ceiling will not be called at all, so the initialization will not change any behavior other than just ceasing the compile warning. (akpm: the patch actually reduces .text size by ~20 bytes on gcc-4.x.y) [akpm@linux-foundation.org: add comment] Signed-off-by: Yang Shi Cc: Tejun Heo Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- mm/page-writeback.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/mm/page-writeback.c b/mm/page-writeback.c index 2c90357c34ea..3e4d65445fa7 100644 --- a/mm/page-writeback.c +++ b/mm/page-writeback.c @@ -1542,7 +1542,9 @@ static void balance_dirty_pages(struct address_space *mapping, for (;;) { unsigned long now = jiffies; unsigned long dirty, thresh, bg_thresh; - unsigned long m_dirty, m_thresh, m_bg_thresh; + unsigned long m_dirty = 0; /* stop bogus uninit warnings */ + unsigned long m_thresh = 0; + unsigned long m_bg_thresh = 0; /* * Unstable writes are a feature of certain networked From 1817889e3b2cc1db8abb595712095129ff9156c1 Mon Sep 17 00:00:00 2001 From: Mike Kravetz Date: Fri, 20 Nov 2015 15:57:13 -0800 Subject: [PATCH 233/291] mm/hugetlbfs: fix bugs in fallocate hole punch of areas with holes Hugh Dickins pointed out problems with the new hugetlbfs fallocate hole punch code. These problems are in the routine remove_inode_hugepages and mostly occur in the case where there are holes in the range of pages to be removed. These holes could be the result of a previous hole punch or simply sparse allocation. The current code could access pages outside the specified range. remove_inode_hugepages handles both hole punch and truncate operations. Page index handling was fixed/cleaned up so that the loop index always matches the page being processed. The code now only makes a single pass through the range of pages as it was determined page faults could not race with truncate. A cond_resched() was added after removing up to PAGEVEC_SIZE pages. Some totally unnecessary code in hugetlbfs_fallocate() that remained from early development was also removed. Tested with fallocate tests submitted here: http://librelist.com/browser//libhugetlbfs/2015/6/25/patch-tests-add-tests-for-fallocate-system-call/ And, some ftruncate tests under development Fixes: b5cec28d36f5 ("hugetlbfs: truncate_hugepages() takes a range of pages") Signed-off-by: Mike Kravetz Acked-by: Hugh Dickins Cc: Dave Hansen Cc: Naoya Horiguchi Cc: Davidlohr Bueso Cc: "Hillf Danton" Cc: [4.3] Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- fs/hugetlbfs/inode.c | 65 ++++++++++++++++++++++---------------------- 1 file changed, 32 insertions(+), 33 deletions(-) diff --git a/fs/hugetlbfs/inode.c b/fs/hugetlbfs/inode.c index 316adb968b65..de4bdfac0cec 100644 --- a/fs/hugetlbfs/inode.c +++ b/fs/hugetlbfs/inode.c @@ -332,12 +332,17 @@ static void remove_huge_page(struct page *page) * truncation is indicated by end of range being LLONG_MAX * In this case, we first scan the range and release found pages. * After releasing pages, hugetlb_unreserve_pages cleans up region/reserv - * maps and global counts. + * maps and global counts. Page faults can not race with truncation + * in this routine. hugetlb_no_page() prevents page faults in the + * truncated range. It checks i_size before allocation, and again after + * with the page table lock for the page held. The same lock must be + * acquired to unmap a page. * hole punch is indicated if end is not LLONG_MAX * In the hole punch case we scan the range and release found pages. * Only when releasing a page is the associated region/reserv map * deleted. The region/reserv map for ranges without associated - * pages are not modified. + * pages are not modified. Page faults can race with hole punch. + * This is indicated if we find a mapped page. * Note: If the passed end of range value is beyond the end of file, but * not LLONG_MAX this routine still performs a hole punch operation. */ @@ -361,46 +366,37 @@ static void remove_inode_hugepages(struct inode *inode, loff_t lstart, next = start; while (next < end) { /* - * Make sure to never grab more pages that we - * might possibly need. + * Don't grab more pages than the number left in the range. */ if (end - next < lookup_nr) lookup_nr = end - next; /* - * This pagevec_lookup() may return pages past 'end', - * so we must check for page->index > end. + * When no more pages are found, we are done. */ - if (!pagevec_lookup(&pvec, mapping, next, lookup_nr)) { - if (next == start) - break; - next = start; - continue; - } + if (!pagevec_lookup(&pvec, mapping, next, lookup_nr)) + break; for (i = 0; i < pagevec_count(&pvec); ++i) { struct page *page = pvec.pages[i]; u32 hash; + /* + * The page (index) could be beyond end. This is + * only possible in the punch hole case as end is + * max page offset in the truncate case. + */ + next = page->index; + if (next >= end) + break; + hash = hugetlb_fault_mutex_hash(h, current->mm, &pseudo_vma, mapping, next, 0); mutex_lock(&hugetlb_fault_mutex_table[hash]); lock_page(page); - if (page->index >= end) { - unlock_page(page); - mutex_unlock(&hugetlb_fault_mutex_table[hash]); - next = end; /* we are done */ - break; - } - - /* - * If page is mapped, it was faulted in after being - * unmapped. Do nothing in this race case. In the - * normal case page is not mapped. - */ - if (!page_mapped(page)) { + if (likely(!page_mapped(page))) { bool rsv_on_error = !PagePrivate(page); /* * We must free the huge page and remove @@ -421,17 +417,23 @@ static void remove_inode_hugepages(struct inode *inode, loff_t lstart, hugetlb_fix_reserve_counts( inode, rsv_on_error); } + } else { + /* + * If page is mapped, it was faulted in after + * being unmapped. It indicates a race between + * hole punch and page fault. Do nothing in + * this case. Getting here in a truncate + * operation is a bug. + */ + BUG_ON(truncate_op); } - if (page->index > next) - next = page->index; - - ++next; unlock_page(page); - mutex_unlock(&hugetlb_fault_mutex_table[hash]); } + ++next; huge_pagevec_release(&pvec); + cond_resched(); } if (truncate_op) @@ -647,9 +649,6 @@ static long hugetlbfs_fallocate(struct file *file, int mode, loff_t offset, if (!(mode & FALLOC_FL_KEEP_SIZE) && offset + len > inode->i_size) i_size_write(inode, offset + len); inode->i_ctime = CURRENT_TIME; - spin_lock(&inode->i_lock); - inode->i_private = NULL; - spin_unlock(&inode->i_lock); out: mutex_unlock(&inode->i_mutex); return error; From 928a477102c4fc6739883415b66987207e3502f4 Mon Sep 17 00:00:00 2001 From: OGAWA Hirofumi Date: Fri, 20 Nov 2015 15:57:15 -0800 Subject: [PATCH 234/291] fat: fix fake_offset handling on error path For the root directory, . and .. are faked (using dir_emit_dots()) and ctx->pos is reset from 2 to 0. A corrupted root directory could cause fat_get_entry() to fail, but ->iterate() (fat_readdir()) reports progress to the VFS (with ctx->pos rewound to 0), so any following calls to ->iterate() continue to return the same entries again and again. The result is that userspace will never see the end of the directory, causing e.g. 'ls' to hang in a getdents() loop. [hirofumi@mail.parknet.co.jp: cleanup and make sure to correct fake_offset] Reported-by: Vegard Nossum Tested-by: Vegard Nossum Signed-off-by: Richard Weinberger Signed-off-by: OGAWA Hirofumi Cc: Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- fs/fat/dir.c | 16 +++++++++++----- 1 file changed, 11 insertions(+), 5 deletions(-) diff --git a/fs/fat/dir.c b/fs/fat/dir.c index 4afc4d9d2e41..8b2127ffb226 100644 --- a/fs/fat/dir.c +++ b/fs/fat/dir.c @@ -610,9 +610,9 @@ parse_record: int status = fat_parse_long(inode, &cpos, &bh, &de, &unicode, &nr_slots); if (status < 0) { - ctx->pos = cpos; + bh = NULL; ret = status; - goto out; + goto end_of_dir; } else if (status == PARSE_INVALID) goto record_end; else if (status == PARSE_NOT_LONGNAME) @@ -654,8 +654,9 @@ parse_record: fill_len = short_len; start_filldir: - if (!fake_offset) - ctx->pos = cpos - (nr_slots + 1) * sizeof(struct msdos_dir_entry); + ctx->pos = cpos - (nr_slots + 1) * sizeof(struct msdos_dir_entry); + if (fake_offset && ctx->pos < 2) + ctx->pos = 2; if (!memcmp(de->name, MSDOS_DOT, MSDOS_NAME)) { if (!dir_emit_dot(file, ctx)) @@ -681,14 +682,19 @@ record_end: fake_offset = 0; ctx->pos = cpos; goto get_new; + end_of_dir: - ctx->pos = cpos; + if (fake_offset && cpos < 2) + ctx->pos = 2; + else + ctx->pos = cpos; fill_failed: brelse(bh); if (unicode) __putname(unicode); out: mutex_unlock(&sbi->s_lock); + return ret; } From 459372545c9c0d6f491e280dccc8a54a61b60e56 Mon Sep 17 00:00:00 2001 From: Andrey Ryabinin Date: Fri, 20 Nov 2015 15:57:18 -0800 Subject: [PATCH 235/291] kasan: fix kmemleak false-positive in kasan_module_alloc() Kmemleak reports the following leak: unreferenced object 0xfffffbfff41ea000 (size 20480): comm "modprobe", pid 65199, jiffies 4298875551 (age 542.568s) hex dump (first 32 bytes): 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 ................ 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 ................ backtrace: [] kmemleak_alloc+0x4e/0xc0 [] __vmalloc_node_range+0x4b8/0x740 [] kasan_module_alloc+0x72/0xc0 [] module_alloc+0x78/0xb0 [] module_alloc_update_bounds+0x14/0x70 [] layout_and_allocate+0x16f4/0x3c90 [] load_module+0x2ff/0x6690 [] SyS_finit_module+0x136/0x170 [] system_call_fastpath+0x16/0x1b [] 0xffffffffffffffff kasan_module_alloc() allocates shadow memory for module and frees it on module unloading. It doesn't store the pointer to allocated shadow memory because it could be calculated from the shadowed address, i.e. kasan_mem_to_shadow(addr). Since kmemleak cannot find pointer to allocated shadow, it thinks that memory leaked. Use kmemleak_ignore() to tell kmemleak that this is not a leak and shadow memory doesn't contain any pointers. Signed-off-by: Andrey Ryabinin Acked-by: Catalin Marinas Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- mm/kasan/kasan.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/mm/kasan/kasan.c b/mm/kasan/kasan.c index d41b21bce6a0..bc0a8d8b8f42 100644 --- a/mm/kasan/kasan.c +++ b/mm/kasan/kasan.c @@ -19,6 +19,7 @@ #include #include #include +#include #include #include #include @@ -444,6 +445,7 @@ int kasan_module_alloc(void *addr, size_t size) if (ret) { find_vm_area(addr)->flags |= VM_KASAN; + kmemleak_ignore(ret); return 0; } From 9d8a765211335cfdad464b90fb19f546af5706ae Mon Sep 17 00:00:00 2001 From: Richard Weinberger Date: Fri, 20 Nov 2015 15:57:21 -0800 Subject: [PATCH 236/291] kernel/signal.c: unexport sigsuspend() sigsuspend() is nowhere used except in signal.c itself, so we can mark it static do not pollute the global namespace. But this patch is more than a boring cleanup patch, it fixes a real issue on UserModeLinux. UML has a special console driver to display ttys using xterm, or other terminal emulators, on the host side. Vegard reported that sometimes UML is unable to spawn a xterm and he's facing the following warning: WARNING: CPU: 0 PID: 908 at include/linux/thread_info.h:128 sigsuspend+0xab/0xc0() It turned out that this warning makes absolutely no sense as the UML xterm code calls sigsuspend() on the host side, at least it tries. But as the kernel itself offers a sigsuspend() symbol the linker choose this one instead of the glibc wrapper. Interestingly this code used to work since ever but always blocked signals on the wrong side. Some recent kernel change made the WARN_ON() trigger and uncovered the bug. It is a wonderful example of how much works by chance on computers. :-) Fixes: 68f3f16d9ad0f1 ("new helper: sigsuspend()") Signed-off-by: Richard Weinberger Reported-by: Vegard Nossum Tested-by: Vegard Nossum Acked-by: Oleg Nesterov Cc: [3.5+] Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/signal.h | 1 - kernel/signal.c | 2 +- 2 files changed, 1 insertion(+), 2 deletions(-) diff --git a/include/linux/signal.h b/include/linux/signal.h index ab1e0392b5ac..92557bbce7e7 100644 --- a/include/linux/signal.h +++ b/include/linux/signal.h @@ -239,7 +239,6 @@ extern int sigprocmask(int, sigset_t *, sigset_t *); extern void set_current_blocked(sigset_t *); extern void __set_current_blocked(const sigset_t *); extern int show_unhandled_signals; -extern int sigsuspend(sigset_t *); struct sigaction { #ifndef __ARCH_HAS_IRIX_SIGACTION diff --git a/kernel/signal.c b/kernel/signal.c index c0b01fe24bbd..f3f1f7a972fd 100644 --- a/kernel/signal.c +++ b/kernel/signal.c @@ -3503,7 +3503,7 @@ SYSCALL_DEFINE0(pause) #endif -int sigsuspend(sigset_t *set) +static int sigsuspend(sigset_t *set) { current->saved_sigmask = current->blocked; set_current_blocked(set); From 7625b3a0007decf2b135cb47ca67abc78a7b1bc1 Mon Sep 17 00:00:00 2001 From: Vitaly Kuznetsov Date: Fri, 20 Nov 2015 15:57:24 -0800 Subject: [PATCH 237/291] kernel/panic.c: turn off locks debug before releasing console lock Commit 08d78658f393 ("panic: release stale console lock to always get the logbuf printed out") introduced an unwanted bad unlock balance report when panic() is called directly and not from OOPS (e.g. from out_of_memory()). The difference is that in case of OOPS we disable locks debug in oops_enter() and on direct panic call nobody does that. Fixes: 08d78658f393 ("panic: release stale console lock to always get the logbuf printed out") Reported-by: kernel test robot Signed-off-by: Vitaly Kuznetsov Cc: HATAYAMA Daisuke Cc: Masami Hiramatsu Cc: Jiri Kosina Cc: Baoquan He Cc: Prarit Bhargava Cc: Xie XiuQi Cc: Seth Jennings Cc: "K. Y. Srinivasan" Cc: Jan Kara Cc: Petr Mladek Cc: Yasuaki Ishimatsu Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- kernel/panic.c | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/kernel/panic.c b/kernel/panic.c index 4579dbb7ed87..4b150bc0c6c1 100644 --- a/kernel/panic.c +++ b/kernel/panic.c @@ -152,8 +152,11 @@ void panic(const char *fmt, ...) * We may have ended up stopping the CPU holding the lock (in * smp_send_stop()) while still having some valuable data in the console * buffer. Try to acquire the lock then release it regardless of the - * result. The release will also print the buffers out. + * result. The release will also print the buffers out. Locks debug + * should be disabled to avoid reporting bad unlock balance when + * panic() is not being callled from OOPS. */ + debug_locks_off(); console_trylock(); console_unlock(); From 875fa6fbb8d8d5bbf8cc2fa7b340a716f50ee0c7 Mon Sep 17 00:00:00 2001 From: Viresh Kumar Date: Fri, 20 Nov 2015 15:57:27 -0800 Subject: [PATCH 238/291] PM/OPP: add entry in MAINTAINERS Add entry for operating performance points into MAINTAINERS file. This will also allow get_maintainers to list OPP stakeholders properly. Signed-off-by: Viresh Kumar Acked-by: Stephen Boyd Cc: Rafael Wysocki Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- MAINTAINERS | 12 ++++++++++++ 1 file changed, 12 insertions(+) diff --git a/MAINTAINERS b/MAINTAINERS index 0f5fa0f978c2..bab1b03a19fd 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -7903,6 +7903,18 @@ S: Maintained F: net/openvswitch/ F: include/uapi/linux/openvswitch.h +OPERATING PERFORMANCE POINTS (OPP) +M: Viresh Kumar +M: Nishanth Menon +M: Stephen Boyd +L: linux-pm@vger.kernel.org +S: Maintained +T: git git://git.kernel.org/pub/scm/linux/kernel/git/vireshk/pm.git +F: drivers/base/power/opp/ +F: include/linux/pm_opp.h +F: Documentation/power/opp.txt +F: Documentation/devicetree/bindings/opp/ + OPL4 DRIVER M: Clemens Ladisch L: alsa-devel@alsa-project.org (moderated for non-subscribers) From 8f1eb48758aacf6c1ffce18179295adbf3bd7640 Mon Sep 17 00:00:00 2001 From: Junxiao Bi Date: Fri, 20 Nov 2015 15:57:30 -0800 Subject: [PATCH 239/291] ocfs2: fix umask ignored issue New created file's mode is not masked with umask, and this makes umask not work for ocfs2 volume. Fixes: 702e5bc ("ocfs2: use generic posix ACL infrastructure") Signed-off-by: Junxiao Bi Cc: Gang He Cc: Mark Fasheh Cc: Joel Becker Cc: Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- fs/ocfs2/namei.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/fs/ocfs2/namei.c b/fs/ocfs2/namei.c index 3b48ac25d8a7..a03f6f433075 100644 --- a/fs/ocfs2/namei.c +++ b/fs/ocfs2/namei.c @@ -372,6 +372,8 @@ static int ocfs2_mknod(struct inode *dir, mlog_errno(status); goto leave; } + /* update inode->i_mode after mask with "umask". */ + inode->i_mode = mode; handle = ocfs2_start_trans(osb, ocfs2_mknod_credits(osb->sb, S_ISDIR(mode), From 21fa8442799945beaca074cb5bcf7cfe24969d59 Mon Sep 17 00:00:00 2001 From: Jeff Layton Date: Fri, 20 Nov 2015 15:57:32 -0800 Subject: [PATCH 240/291] mm: fix up sparse warning in gfpflags_allow_blocking sparse says: include/linux/gfp.h:274:26: warning: incorrect type in return expression (different base types) include/linux/gfp.h:274:26: expected bool include/linux/gfp.h:274:26: got restricted gfp_t ...add a forced cast to silence the warning. Signed-off-by: Jeff Layton Cc: Mel Gorman Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/gfp.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/include/linux/gfp.h b/include/linux/gfp.h index 6523109e136d..8942af0813e3 100644 --- a/include/linux/gfp.h +++ b/include/linux/gfp.h @@ -271,7 +271,7 @@ static inline int gfpflags_to_migratetype(const gfp_t gfp_flags) static inline bool gfpflags_allow_blocking(const gfp_t gfp_flags) { - return gfp_flags & __GFP_DIRECT_RECLAIM; + return (bool __force)(gfp_flags & __GFP_DIRECT_RECLAIM); } #ifdef CONFIG_HIGHMEM From a380a3c75529a5c42b78c0d64a46404f8cb0c0d1 Mon Sep 17 00:00:00 2001 From: Christoph Lameter Date: Fri, 20 Nov 2015 15:57:35 -0800 Subject: [PATCH 241/291] slub: create new ___slab_alloc function that can be called with irqs disabled Bulk alloc needs a function like that because it enables interrupts before calling __slab_alloc which promptly disables them again using the expensive local_irq_save(). Signed-off-by: Christoph Lameter Cc: Jesper Dangaard Brouer Cc: Pekka Enberg Cc: David Rientjes Cc: Joonsoo Kim Cc: Alexander Duyck Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- mm/slub.c | 44 +++++++++++++++++++++++++++++--------------- 1 file changed, 29 insertions(+), 15 deletions(-) diff --git a/mm/slub.c b/mm/slub.c index 7cb4bf9ae320..2a952751bb50 100644 --- a/mm/slub.c +++ b/mm/slub.c @@ -2295,23 +2295,15 @@ static inline void *get_freelist(struct kmem_cache *s, struct page *page) * And if we were unable to get a new slab from the partial slab lists then * we need to allocate a new slab. This is the slowest path since it involves * a call to the page allocator and the setup of a new slab. + * + * Version of __slab_alloc to use when we know that interrupts are + * already disabled (which is the case for bulk allocation). */ -static void *__slab_alloc(struct kmem_cache *s, gfp_t gfpflags, int node, +static void *___slab_alloc(struct kmem_cache *s, gfp_t gfpflags, int node, unsigned long addr, struct kmem_cache_cpu *c) { void *freelist; struct page *page; - unsigned long flags; - - local_irq_save(flags); -#ifdef CONFIG_PREEMPT - /* - * We may have been preempted and rescheduled on a different - * cpu before disabling interrupts. Need to reload cpu area - * pointer. - */ - c = this_cpu_ptr(s->cpu_slab); -#endif page = c->page; if (!page) @@ -2369,7 +2361,6 @@ load_freelist: VM_BUG_ON(!c->page->frozen); c->freelist = get_freepointer(s, freelist); c->tid = next_tid(c->tid); - local_irq_restore(flags); return freelist; new_slab: @@ -2386,7 +2377,6 @@ new_slab: if (unlikely(!freelist)) { slab_out_of_memory(s, gfpflags, node); - local_irq_restore(flags); return NULL; } @@ -2402,10 +2392,34 @@ new_slab: deactivate_slab(s, page, get_freepointer(s, freelist)); c->page = NULL; c->freelist = NULL; - local_irq_restore(flags); return freelist; } +/* + * Another one that disabled interrupt and compensates for possible + * cpu changes by refetching the per cpu area pointer. + */ +static void *__slab_alloc(struct kmem_cache *s, gfp_t gfpflags, int node, + unsigned long addr, struct kmem_cache_cpu *c) +{ + void *p; + unsigned long flags; + + local_irq_save(flags); +#ifdef CONFIG_PREEMPT + /* + * We may have been preempted and rescheduled on a different + * cpu before disabling interrupts. Need to reload cpu area + * pointer. + */ + c = this_cpu_ptr(s->cpu_slab); +#endif + + p = ___slab_alloc(s, gfpflags, node, addr, c); + local_irq_restore(flags); + return p; +} + /* * Inlined fastpath so that allocation functions (kmalloc, kmem_cache_alloc) * have the fastpath folded into their functions. So no function call From 87098373e244840e00bd1c93884c1d917411597e Mon Sep 17 00:00:00 2001 From: Christoph Lameter Date: Fri, 20 Nov 2015 15:57:38 -0800 Subject: [PATCH 242/291] slub: avoid irqoff/on in bulk allocation Use the new function that can do allocation while interrupts are disabled. Avoids irq on/off sequences. Signed-off-by: Christoph Lameter Cc: Jesper Dangaard Brouer Cc: Pekka Enberg Cc: David Rientjes Cc: Joonsoo Kim Cc: Alexander Duyck Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- mm/slub.c | 24 +++++++++++------------- 1 file changed, 11 insertions(+), 13 deletions(-) diff --git a/mm/slub.c b/mm/slub.c index 2a952751bb50..23f9d8d26422 100644 --- a/mm/slub.c +++ b/mm/slub.c @@ -2818,30 +2818,23 @@ bool kmem_cache_alloc_bulk(struct kmem_cache *s, gfp_t flags, size_t size, void *object = c->freelist; if (unlikely(!object)) { - local_irq_enable(); /* * Invoking slow path likely have side-effect * of re-populating per CPU c->freelist */ - p[i] = __slab_alloc(s, flags, NUMA_NO_NODE, + p[i] = ___slab_alloc(s, flags, NUMA_NO_NODE, _RET_IP_, c); - if (unlikely(!p[i])) { - __kmem_cache_free_bulk(s, i, p); - return false; - } - local_irq_disable(); + if (unlikely(!p[i])) + goto error; + c = this_cpu_ptr(s->cpu_slab); continue; /* goto for-loop */ } /* kmem_cache debug support */ s = slab_pre_alloc_hook(s, flags); - if (unlikely(!s)) { - __kmem_cache_free_bulk(s, i, p); - c->tid = next_tid(c->tid); - local_irq_enable(); - return false; - } + if (unlikely(!s)) + goto error; c->freelist = get_freepointer(s, object); p[i] = object; @@ -2861,6 +2854,11 @@ bool kmem_cache_alloc_bulk(struct kmem_cache *s, gfp_t flags, size_t size, } return true; + +error: + __kmem_cache_free_bulk(s, i, p); + local_irq_enable(); + return false; } EXPORT_SYMBOL(kmem_cache_alloc_bulk); From b4a64718797b84b64a6ddf3d4183c29c2e79ef1d Mon Sep 17 00:00:00 2001 From: Jesper Dangaard Brouer Date: Fri, 20 Nov 2015 15:57:41 -0800 Subject: [PATCH 243/291] slub: mark the dangling ifdef #else of CONFIG_SLUB_DEBUG The #ifdef of CONFIG_SLUB_DEBUG is located very far from the associated #else. For readability mark it with a comment. Signed-off-by: Jesper Dangaard Brouer Acked-by: Christoph Lameter Cc: Pekka Enberg Cc: David Rientjes Cc: Joonsoo Kim Cc: Alexander Duyck Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- mm/slub.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/mm/slub.c b/mm/slub.c index 23f9d8d26422..a0c1365f6426 100644 --- a/mm/slub.c +++ b/mm/slub.c @@ -1204,7 +1204,7 @@ unsigned long kmem_cache_flags(unsigned long object_size, return flags; } -#else +#else /* !CONFIG_SLUB_DEBUG */ static inline void setup_object_debug(struct kmem_cache *s, struct page *page, void *object) {} From ee0c1a65cf95230d5eb3d9de94fd2ead9a428c67 Mon Sep 17 00:00:00 2001 From: Peter Hurley Date: Wed, 11 Nov 2015 08:03:54 -0500 Subject: [PATCH 244/291] tty: Fix tty_send_xchar() lock order inversion The correct lock order is atomic_write_lock => termios_rwsem, as established by tty_write() => n_tty_write(). Fixes: c274f6ef1c666 ("tty: Hold termios_rwsem for tcflow(TCIxxx)") Reported-and-Tested-by: Dmitry Vyukov Cc: # v3.18+ Signed-off-by: Peter Hurley Signed-off-by: Greg Kroah-Hartman --- drivers/tty/tty_io.c | 4 ++++ drivers/tty/tty_ioctl.c | 4 ---- 2 files changed, 4 insertions(+), 4 deletions(-) diff --git a/drivers/tty/tty_io.c b/drivers/tty/tty_io.c index 0c41dbcb90b8..bcc8e1e8bb72 100644 --- a/drivers/tty/tty_io.c +++ b/drivers/tty/tty_io.c @@ -1282,18 +1282,22 @@ int tty_send_xchar(struct tty_struct *tty, char ch) int was_stopped = tty->stopped; if (tty->ops->send_xchar) { + down_read(&tty->termios_rwsem); tty->ops->send_xchar(tty, ch); + up_read(&tty->termios_rwsem); return 0; } if (tty_write_lock(tty, 0) < 0) return -ERESTARTSYS; + down_read(&tty->termios_rwsem); if (was_stopped) start_tty(tty); tty->ops->write(tty, &ch, 1); if (was_stopped) stop_tty(tty); + up_read(&tty->termios_rwsem); tty_write_unlock(tty); return 0; } diff --git a/drivers/tty/tty_ioctl.c b/drivers/tty/tty_ioctl.c index 9c5aebfe7053..1445dd39aa62 100644 --- a/drivers/tty/tty_ioctl.c +++ b/drivers/tty/tty_ioctl.c @@ -1147,16 +1147,12 @@ int n_tty_ioctl_helper(struct tty_struct *tty, struct file *file, spin_unlock_irq(&tty->flow_lock); break; case TCIOFF: - down_read(&tty->termios_rwsem); if (STOP_CHAR(tty) != __DISABLED_CHAR) retval = tty_send_xchar(tty, STOP_CHAR(tty)); - up_read(&tty->termios_rwsem); break; case TCION: - down_read(&tty->termios_rwsem); if (START_CHAR(tty) != __DISABLED_CHAR) retval = tty_send_xchar(tty, START_CHAR(tty)); - up_read(&tty->termios_rwsem); break; default: return -EINVAL; From 17a69219713b5063698038151fea9167b5b0c7b4 Mon Sep 17 00:00:00 2001 From: Peter Hurley Date: Sun, 8 Nov 2015 07:53:06 -0500 Subject: [PATCH 245/291] tty: Fix direct use of tty buffer work Recent abstraction of tty buffer work introduced api to manage tty input kworker; use it. Fixes: e176058f0de5 ("tty: Abstract tty buffer work") Signed-off-by: Peter Hurley Signed-off-by: Greg Kroah-Hartman --- drivers/tty/tty_ldisc.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/tty/tty_ldisc.c b/drivers/tty/tty_ldisc.c index 5af8f1874c1a..629e3c865072 100644 --- a/drivers/tty/tty_ldisc.c +++ b/drivers/tty/tty_ldisc.c @@ -592,7 +592,7 @@ int tty_set_ldisc(struct tty_struct *tty, int ldisc) /* Restart the work queue in case no characters kick it off. Safe if already running */ - schedule_work(&tty->port->buf.work); + tty_buffer_restart_work(tty->port); tty_unlock(tty); return retval; From 83f7fa6c2976a9db9edf3abf7749176ca36fb4cc Mon Sep 17 00:00:00 2001 From: Simon Arlott Date: Sun, 15 Nov 2015 16:14:32 +0000 Subject: [PATCH 246/291] bcm63xx_uart: Use the device name when registering an interrupt Use the device name when registering an interrupt so that multiple ports don't all have the same interrupt name. Signed-off-by: Simon Arlott Signed-off-by: Greg Kroah-Hartman --- drivers/tty/serial/bcm63xx_uart.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/tty/serial/bcm63xx_uart.c b/drivers/tty/serial/bcm63xx_uart.c index 681e0f3d5e0e..a1c0a89d9c7f 100644 --- a/drivers/tty/serial/bcm63xx_uart.c +++ b/drivers/tty/serial/bcm63xx_uart.c @@ -474,7 +474,7 @@ static int bcm_uart_startup(struct uart_port *port) /* register irq and enable rx interrupts */ ret = request_irq(port->irq, bcm_uart_interrupt, 0, - bcm_uart_type(port), port); + dev_name(port->dev), port); if (ret) return ret; bcm_uart_writel(port, UART_RX_INT_MASK, UART_IR_REG); From bec8bf61dc911ebe04a7997db2c44ed51d8ade9d Mon Sep 17 00:00:00 2001 From: Peter Hurley Date: Tue, 20 Oct 2015 09:55:21 -0400 Subject: [PATCH 247/291] serial: fsl_lpuart: Fix earlycon support Earlycon support for Freescale lpuart should only be enabled when console support is enabled. Fixes: 1d59b382f1c4 ("serial: fsl_lpuart: add earlycon support") Acked-by: Stefan Agner Signed-off-by: Peter Hurley Acked-by: Arnd Bergmann Signed-off-by: Greg Kroah-Hartman --- drivers/tty/serial/Kconfig | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/tty/serial/Kconfig b/drivers/tty/serial/Kconfig index 1aec4404062d..f38beb28e7ae 100644 --- a/drivers/tty/serial/Kconfig +++ b/drivers/tty/serial/Kconfig @@ -1539,7 +1539,6 @@ config SERIAL_FSL_LPUART tristate "Freescale lpuart serial port support" depends on HAS_DMA select SERIAL_CORE - select SERIAL_EARLYCON help Support for the on-chip lpuart on some Freescale SOCs. @@ -1547,6 +1546,7 @@ config SERIAL_FSL_LPUART_CONSOLE bool "Console on Freescale lpuart serial port" depends on SERIAL_FSL_LPUART=y select SERIAL_CORE_CONSOLE + select SERIAL_EARLYCON help If you have enabled the lpuart serial port on the Freescale SoCs, you can make it the console by answering Y to this option. From f356d7a7ddb5ea545e81c84eecfdf1b5ab4647fc Mon Sep 17 00:00:00 2001 From: Guenter Roeck Date: Sun, 1 Nov 2015 18:32:56 -0800 Subject: [PATCH 248/291] serial: etraxfs-uart: Fix crash MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Since commit 7d8c70d8048c ("serial: mctrl-gpio: rename init function"), crisv32 either do not build or crash as follows. Unable to handle kernel NULL pointer dereference Linux 4.3.0-rc7-next-20151101 #1 Sun Nov 1 11:41:28 PST 2015 ... Call Trace: [] show_stack+0x0/0x9e [] printk+0x0/0x2c [] show_registers+0x14a/0x1c2 [] printk+0x0/0x2c [] die_if_kernel+0x7c/0x9e [] do_page_fault+0x32e/0x3e6 [] of_get_property+0x0/0x2c [] of_irq_parse_raw+0x12a/0x376 [] of_get_property+0x0/0x2c [] get_page_from_freelist+0x73e/0x856 [] of_get_property+0x0/0x2c [] d_mmu_refill+0x10a/0x112 [] devm_kmalloc+0x40/0x56 [] add_dr+0xc/0x1c [] devm_add_action+0x2/0x4e [] mctrl_gpio_init_noauto+0x1c/0x76 [] mctrl_gpio_init+0x22/0x110 The function call in the etraxfs-uart driver was not renamed, possibly due to interference with commit 7b9c5162c182 ("serial: etraxfs-uart: use mctrl_gpio helpers for handling modem signals"). Fixes: 7d8c70d8048c ("serial: mctrl-gpio: rename init function") Signed-off-by: Guenter Roeck Acked-by: Uwe Kleine-König Acked-by: Niklas Cassel Signed-off-by: Greg Kroah-Hartman --- drivers/tty/serial/etraxfs-uart.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/tty/serial/etraxfs-uart.c b/drivers/tty/serial/etraxfs-uart.c index 6813e316e9ff..2f80bc7e44fb 100644 --- a/drivers/tty/serial/etraxfs-uart.c +++ b/drivers/tty/serial/etraxfs-uart.c @@ -894,7 +894,7 @@ static int etraxfs_uart_probe(struct platform_device *pdev) up->regi_ser = of_iomap(np, 0); up->port.dev = &pdev->dev; - up->gpios = mctrl_gpio_init(&pdev->dev, 0); + up->gpios = mctrl_gpio_init_noauto(&pdev->dev, 0); if (IS_ERR(up->gpios)) return PTR_ERR(up->gpios); From 6b2a3d628aa752f0ab825fc6d4d07b09e274d1c1 Mon Sep 17 00:00:00 2001 From: Peter Hurley Date: Sun, 8 Nov 2015 08:52:31 -0500 Subject: [PATCH 249/291] tty: audit: Fix audit source MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The data to audit/record is in the 'from' buffer (ie., the input read buffer). Fixes: 72586c6061ab ("n_tty: Fix auditing support for cannonical mode") Cc: stable # 4.1+ Cc: Miloslav Trmač Signed-off-by: Peter Hurley Acked-by: Laura Abbott Signed-off-by: Greg Kroah-Hartman --- drivers/tty/n_tty.c | 2 +- drivers/tty/tty_audit.c | 2 +- include/linux/tty.h | 6 +++--- 3 files changed, 5 insertions(+), 5 deletions(-) diff --git a/drivers/tty/n_tty.c b/drivers/tty/n_tty.c index 13844261cd5f..ed776149261e 100644 --- a/drivers/tty/n_tty.c +++ b/drivers/tty/n_tty.c @@ -169,7 +169,7 @@ static inline int tty_copy_to_user(struct tty_struct *tty, { struct n_tty_data *ldata = tty->disc_data; - tty_audit_add_data(tty, to, n, ldata->icanon); + tty_audit_add_data(tty, from, n, ldata->icanon); return copy_to_user(to, from, n); } diff --git a/drivers/tty/tty_audit.c b/drivers/tty/tty_audit.c index 90ca082935f6..3d245cd3d8e6 100644 --- a/drivers/tty/tty_audit.c +++ b/drivers/tty/tty_audit.c @@ -265,7 +265,7 @@ static struct tty_audit_buf *tty_audit_buf_get(struct tty_struct *tty, * * Audit @data of @size from @tty, if necessary. */ -void tty_audit_add_data(struct tty_struct *tty, unsigned char *data, +void tty_audit_add_data(struct tty_struct *tty, const void *data, size_t size, unsigned icanon) { struct tty_audit_buf *buf; diff --git a/include/linux/tty.h b/include/linux/tty.h index 5b04b0a5375b..5e31f1b99037 100644 --- a/include/linux/tty.h +++ b/include/linux/tty.h @@ -607,7 +607,7 @@ extern void n_tty_inherit_ops(struct tty_ldisc_ops *ops); /* tty_audit.c */ #ifdef CONFIG_AUDIT -extern void tty_audit_add_data(struct tty_struct *tty, unsigned char *data, +extern void tty_audit_add_data(struct tty_struct *tty, const void *data, size_t size, unsigned icanon); extern void tty_audit_exit(void); extern void tty_audit_fork(struct signal_struct *sig); @@ -615,8 +615,8 @@ extern void tty_audit_tiocsti(struct tty_struct *tty, char ch); extern void tty_audit_push(struct tty_struct *tty); extern int tty_audit_push_current(void); #else -static inline void tty_audit_add_data(struct tty_struct *tty, - unsigned char *data, size_t size, unsigned icanon) +static inline void tty_audit_add_data(struct tty_struct *tty, const void *data, + size_t size, unsigned icanon) { } static inline void tty_audit_tiocsti(struct tty_struct *tty, char ch) From fa70045e9df47d2e7a698e5f7a0d21423a58cdaa Mon Sep 17 00:00:00 2001 From: Heikki Krogerus Date: Thu, 12 Nov 2015 15:21:23 +0200 Subject: [PATCH 250/291] serial: 8250_mid: Add missing dependency 8250_mid uses rational_best_approximation() function, so the driver needs to select CONFIG_RATIONAL option. This fixes build error when CONFIG_RATIONAL is not enabled: drivers/built-in.o: In function `mid8250_set_termios': 8250_mid.c:(.text+0x10169a): undefined reference to `rational_best_approximation' Reported-by: Randy Dunlap Signed-off-by: Heikki Krogerus Acked-by: Andy Shevchenko Signed-off-by: Arnd Bergmann Signed-off-by: Greg Kroah-Hartman --- drivers/tty/serial/8250/Kconfig | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/tty/serial/8250/Kconfig b/drivers/tty/serial/8250/Kconfig index e6f5e12a2d83..6412f1455beb 100644 --- a/drivers/tty/serial/8250/Kconfig +++ b/drivers/tty/serial/8250/Kconfig @@ -373,6 +373,7 @@ config SERIAL_8250_MID depends on SERIAL_8250 && PCI select HSU_DMA if SERIAL_8250_DMA select HSU_DMA_PCI if X86_INTEL_MID + select RATIONAL help Selecting this option will enable handling of the extra features present on the UART found on Intel Medfield SOC and various other From bd63acf9e9a6b48e9c6c15f0f413f85a481cb5ef Mon Sep 17 00:00:00 2001 From: Arnd Bergmann Date: Mon, 16 Nov 2015 16:48:09 +0100 Subject: [PATCH 251/291] serial: export fsl8250_handle_irq fsl8250_handle_irq is now used by the of_serial driver, and that fails if it is a loadable module: ERROR: "fsl8250_handle_irq" [drivers/tty/serial/of_serial.ko] undefined! This exports the symbol to avoid randconfig errors. Signed-off-by: Arnd Bergmann Fixes: d43b54d269d2 ("serial: Enable Freescale 16550 workaround on arm") Cc: Scott Wood Signed-off-by: Jeff Mahoney Signed-off-by: Greg Kroah-Hartman --- drivers/tty/serial/8250/8250_fsl.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/tty/serial/8250/8250_fsl.c b/drivers/tty/serial/8250/8250_fsl.c index c0533a57ec53..910bfee5a88b 100644 --- a/drivers/tty/serial/8250/8250_fsl.c +++ b/drivers/tty/serial/8250/8250_fsl.c @@ -60,3 +60,4 @@ int fsl8250_handle_irq(struct uart_port *port) spin_unlock_irqrestore(&up->port.lock, flags); return 1; } +EXPORT_SYMBOL_GPL(fsl8250_handle_irq); From b094f89ca42fbb8ce40174d5f85ca8430e499da6 Mon Sep 17 00:00:00 2001 From: Jens Axboe Date: Fri, 20 Nov 2015 20:29:45 -0700 Subject: [PATCH 252/291] blk-mq: fix calling unplug callbacks with preempt disabled Liu reported that running certain parts of xfstests threw the following error: BUG: sleeping function called from invalid context at mm/page_alloc.c:3190 in_atomic(): 1, irqs_disabled(): 0, pid: 6, name: kworker/u16:0 3 locks held by kworker/u16:0/6: #0: ("writeback"){++++.+}, at: [] process_one_work+0x173/0x730 #1: ((&(&wb->dwork)->work)){+.+.+.}, at: [] process_one_work+0x173/0x730 #2: (&type->s_umount_key#44){+++++.}, at: [] trylock_super+0x25/0x60 CPU: 5 PID: 6 Comm: kworker/u16:0 Tainted: G OE 4.3.0+ #3 Hardware name: Red Hat KVM, BIOS Bochs 01/01/2011 Workqueue: writeback wb_workfn (flush-btrfs-108) ffffffff81a3abab ffff88042e282ba8 ffffffff8130191b ffffffff81a3abab 0000000000000c76 ffff88042e282ba8 ffff88042e27c180 ffff88042e282bd8 ffffffff8108ed95 ffff880400000004 0000000000000000 0000000000000c76 Call Trace: [] dump_stack+0x4f/0x74 [] ___might_sleep+0x185/0x240 [] __might_sleep+0x52/0x90 [] __alloc_pages_nodemask+0x268/0x410 [] ? sched_clock_local+0x1c/0x90 [] ? local_clock+0x21/0x40 [] ? __lock_release+0x420/0x510 [] ? __lock_acquired+0x16c/0x3c0 [] alloc_pages_current+0xc5/0x210 [] ? rbio_is_full+0x55/0x70 [btrfs] [] ? mark_held_locks+0x78/0xa0 [] ? _raw_spin_unlock_irqrestore+0x40/0x60 [] full_stripe_write+0x5a/0xc0 [btrfs] [] __raid56_parity_write+0x39/0x60 [btrfs] [] run_plug+0x11b/0x140 [btrfs] [] btrfs_raid_unplug+0x23/0x70 [btrfs] [] blk_flush_plug_list+0x82/0x1f0 [] blk_sq_make_request+0x1f9/0x740 [] ? generic_make_request_checks+0x222/0x7c0 [] ? blk_queue_enter+0x124/0x310 [] ? blk_queue_enter+0x92/0x310 [] generic_make_request+0x172/0x2c0 [] ? generic_make_request+0x164/0x2c0 [] submit_bio+0x70/0x140 [] ? rbio_add_io_page+0x99/0x150 [btrfs] [] finish_rmw+0x4d9/0x600 [btrfs] [] full_stripe_write+0x9c/0xc0 [btrfs] [] raid56_parity_write+0xef/0x160 [btrfs] [] btrfs_map_bio+0xe3/0x2d0 [btrfs] [] btrfs_submit_bio_hook+0x8d/0x1d0 [btrfs] [] submit_one_bio+0x74/0xb0 [btrfs] [] submit_extent_page+0xe5/0x1c0 [btrfs] [] __extent_writepage_io+0x408/0x4c0 [btrfs] [] ? alloc_dummy_extent_buffer+0x140/0x140 [btrfs] [] __extent_writepage+0x218/0x3a0 [btrfs] [] ? mark_held_locks+0x78/0xa0 [] extent_write_cache_pages.clone.0+0x2f9/0x400 [btrfs] [] extent_writepages+0x52/0x70 [btrfs] [] ? btrfs_set_inode_index+0x70/0x70 [btrfs] [] btrfs_writepages+0x27/0x30 [btrfs] [] do_writepages+0x23/0x40 [] __writeback_single_inode+0x89/0x4d0 [] ? writeback_sb_inodes+0x260/0x480 [] ? writeback_sb_inodes+0x260/0x480 [] ? writeback_sb_inodes+0x15f/0x480 [] writeback_sb_inodes+0x2d2/0x480 [] ? down_read_trylock+0x57/0x60 [] ? trylock_super+0x25/0x60 [] ? rcu_read_lock_sched_held+0x4f/0x90 [] __writeback_inodes_wb+0x8c/0xc0 [] wb_writeback+0x2b5/0x500 [] ? mark_held_locks+0x78/0xa0 [] ? __local_bh_enable_ip+0x68/0xc0 [] ? wb_do_writeback+0x62/0x310 [] wb_do_writeback+0xc1/0x310 [] ? set_worker_desc+0x79/0x90 [] wb_workfn+0x92/0x330 [] process_one_work+0x223/0x730 [] ? process_one_work+0x173/0x730 [] ? worker_thread+0x18f/0x430 [] worker_thread+0x11d/0x430 [] ? maybe_create_worker+0xf0/0xf0 [] ? maybe_create_worker+0xf0/0xf0 [] kthread+0xef/0x110 [] ? schedule_tail+0x1e/0xd0 [] ? __init_kthread_worker+0x70/0x70 [] ret_from_fork+0x3f/0x70 [] ? __init_kthread_worker+0x70/0x70 The issue is that we've got the software context pinned while calling blk_flush_plug_list(), which flushes callbacks that are allowed to sleep. btrfs and raid has such callbacks. Flip the checks around a bit, so we can enable preempt a bit earlier and flush plugs without having preempt disabled. This only affects blk-mq driven devices, and only those that register a single queue. Reported-by: Liu Bo Tested-by: Liu Bo Cc: stable@kernel.org Signed-off-by: Jens Axboe --- block/blk-mq.c | 14 +++++++++----- 1 file changed, 9 insertions(+), 5 deletions(-) diff --git a/block/blk-mq.c b/block/blk-mq.c index 3ae09de62f19..6d6f8feb48c0 100644 --- a/block/blk-mq.c +++ b/block/blk-mq.c @@ -1291,15 +1291,16 @@ static blk_qc_t blk_mq_make_request(struct request_queue *q, struct bio *bio) blk_mq_bio_to_request(rq, bio); /* - * we do limited pluging. If bio can be merged, do merge. + * We do limited pluging. If the bio can be merged, do that. * Otherwise the existing request in the plug list will be * issued. So the plug list will have one request at most */ if (plug) { /* * The plug list might get flushed before this. If that - * happens, same_queue_rq is invalid and plug list is empty - **/ + * happens, same_queue_rq is invalid and plug list is + * empty + */ if (same_queue_rq && !list_empty(&plug->mq_list)) { old_rq = same_queue_rq; list_del_init(&old_rq->queuelist); @@ -1380,12 +1381,15 @@ static blk_qc_t blk_sq_make_request(struct request_queue *q, struct bio *bio) blk_mq_bio_to_request(rq, bio); if (!request_count) trace_block_plug(q); - else if (request_count >= BLK_MAX_REQUEST_COUNT) { + + blk_mq_put_ctx(data.ctx); + + if (request_count >= BLK_MAX_REQUEST_COUNT) { blk_flush_plug_list(plug, false); trace_block_plug(q); } + list_add_tail(&rq->queuelist, &plug->mq_list); - blk_mq_put_ctx(data.ctx); return cookie; } From dcbf0d299c00ed4f82ea8d6e359ad88a5182f9b8 Mon Sep 17 00:00:00 2001 From: Helge Deller Date: Sun, 22 Nov 2015 12:14:14 +0100 Subject: [PATCH 253/291] parisc: Drop unused MADV_xxxK_PAGES flags from asm/mman.h Drop the MADV_xxK_PAGES flags, which were never used and were from a proposed API which was never integrated into the generic Linux kernel code. Cc: stable@vger.kernel.org Signed-off-by: Helge Deller --- arch/parisc/include/uapi/asm/mman.h | 10 ---------- 1 file changed, 10 deletions(-) diff --git a/arch/parisc/include/uapi/asm/mman.h b/arch/parisc/include/uapi/asm/mman.h index 294d251ca7b2..2ae13ce592e8 100644 --- a/arch/parisc/include/uapi/asm/mman.h +++ b/arch/parisc/include/uapi/asm/mman.h @@ -46,16 +46,6 @@ #define MADV_DONTFORK 10 /* don't inherit across fork */ #define MADV_DOFORK 11 /* do inherit across fork */ -/* The range 12-64 is reserved for page size specification. */ -#define MADV_4K_PAGES 12 /* Use 4K pages */ -#define MADV_16K_PAGES 14 /* Use 16K pages */ -#define MADV_64K_PAGES 16 /* Use 64K pages */ -#define MADV_256K_PAGES 18 /* Use 256K pages */ -#define MADV_1M_PAGES 20 /* Use 1 Megabyte pages */ -#define MADV_4M_PAGES 22 /* Use 4 Megabyte pages */ -#define MADV_16M_PAGES 24 /* Use 16 Megabyte pages */ -#define MADV_64M_PAGES 26 /* Use 64 Megabyte pages */ - #define MADV_MERGEABLE 65 /* KSM may merge identical pages */ #define MADV_UNMERGEABLE 66 /* KSM may not merge identical pages */ From 1f25ad26d65b3740f44d6e03edcd34a5f7b58850 Mon Sep 17 00:00:00 2001 From: Helge Deller Date: Fri, 20 Nov 2015 15:46:52 +0100 Subject: [PATCH 254/291] parisc: Add defines for Huge page support Huge pages on parisc will have the same size as one pmd table, which is on a 64bit kernel 2MB on a kernel with 4K kernel page sizes, and on a 32bit kernel 4MB when used with 4K kernel pages. Since parisc does not physically supports 2MB huge page sizes, emulate it with two consecutive 1MB page sizes instead. Keeping the same huge page size as one pmd will allow us to add transparent huge page support later on. Bit 21 in the pte flags was unused and will now be used to mark a page as huge page (_PAGE_HPAGE_BIT). Signed-off-by: Helge Deller --- arch/parisc/include/asm/page.h | 13 ++++++++++++- arch/parisc/include/asm/pgtable.h | 20 +++++++++++++++++--- 2 files changed, 29 insertions(+), 4 deletions(-) diff --git a/arch/parisc/include/asm/page.h b/arch/parisc/include/asm/page.h index 60d5d174dfe4..80e742a1c162 100644 --- a/arch/parisc/include/asm/page.h +++ b/arch/parisc/include/asm/page.h @@ -145,11 +145,22 @@ extern int npmem_ranges; #endif /* CONFIG_DISCONTIGMEM */ #ifdef CONFIG_HUGETLB_PAGE -#define HPAGE_SHIFT 22 /* 4MB (is this fixed?) */ +#define HPAGE_SHIFT PMD_SHIFT /* fixed for transparent huge pages */ #define HPAGE_SIZE ((1UL) << HPAGE_SHIFT) #define HPAGE_MASK (~(HPAGE_SIZE - 1)) #define HUGETLB_PAGE_ORDER (HPAGE_SHIFT - PAGE_SHIFT) + +#if defined(CONFIG_64BIT) && defined(CONFIG_PARISC_PAGE_SIZE_4KB) +# define REAL_HPAGE_SHIFT 20 /* 20 = 1MB */ +# define _HUGE_PAGE_SIZE_ENCODING_DEFAULT _PAGE_SIZE_ENCODING_1M +#elif !defined(CONFIG_64BIT) && defined(CONFIG_PARISC_PAGE_SIZE_4KB) +# define REAL_HPAGE_SHIFT 22 /* 22 = 4MB */ +# define _HUGE_PAGE_SIZE_ENCODING_DEFAULT _PAGE_SIZE_ENCODING_4M +#else +# define REAL_HPAGE_SHIFT 24 /* 24 = 16MB */ +# define _HUGE_PAGE_SIZE_ENCODING_DEFAULT _PAGE_SIZE_ENCODING_16M #endif +#endif /* CONFIG_HUGETLB_PAGE */ #define virt_addr_valid(kaddr) pfn_valid(__pa(kaddr) >> PAGE_SHIFT) diff --git a/arch/parisc/include/asm/pgtable.h b/arch/parisc/include/asm/pgtable.h index f93c4a4e6580..638317ae276e 100644 --- a/arch/parisc/include/asm/pgtable.h +++ b/arch/parisc/include/asm/pgtable.h @@ -167,7 +167,7 @@ static inline void purge_tlb_entries(struct mm_struct *mm, unsigned long addr) #define _PAGE_NO_CACHE_BIT 24 /* (0x080) Uncached Page (U bit) */ #define _PAGE_ACCESSED_BIT 23 /* (0x100) Software: Page Accessed */ #define _PAGE_PRESENT_BIT 22 /* (0x200) Software: translation valid */ -/* bit 21 was formerly the FLUSH bit but is now unused */ +#define _PAGE_HPAGE_BIT 21 /* (0x400) Software: Huge Page */ #define _PAGE_USER_BIT 20 /* (0x800) Software: User accessible page */ /* N.B. The bits are defined in terms of a 32 bit word above, so the */ @@ -194,6 +194,7 @@ static inline void purge_tlb_entries(struct mm_struct *mm, unsigned long addr) #define _PAGE_NO_CACHE (1 << xlate_pabit(_PAGE_NO_CACHE_BIT)) #define _PAGE_ACCESSED (1 << xlate_pabit(_PAGE_ACCESSED_BIT)) #define _PAGE_PRESENT (1 << xlate_pabit(_PAGE_PRESENT_BIT)) +#define _PAGE_HUGE (1 << xlate_pabit(_PAGE_HPAGE_BIT)) #define _PAGE_USER (1 << xlate_pabit(_PAGE_USER_BIT)) #define _PAGE_TABLE (_PAGE_PRESENT | _PAGE_READ | _PAGE_WRITE | _PAGE_DIRTY | _PAGE_ACCESSED) @@ -217,7 +218,7 @@ static inline void purge_tlb_entries(struct mm_struct *mm, unsigned long addr) #define PxD_FLAG_VALID (1 << xlate_pabit(_PxD_VALID_BIT)) #define PxD_FLAG_MASK (0xf) #define PxD_FLAG_SHIFT (4) -#define PxD_VALUE_SHIFT (8) /* (PAGE_SHIFT-PxD_FLAG_SHIFT) */ +#define PxD_VALUE_SHIFT (PFN_PTE_SHIFT-PxD_FLAG_SHIFT) #ifndef __ASSEMBLY__ @@ -362,6 +363,18 @@ static inline pte_t pte_mkyoung(pte_t pte) { pte_val(pte) |= _PAGE_ACCESSED; ret static inline pte_t pte_mkwrite(pte_t pte) { pte_val(pte) |= _PAGE_WRITE; return pte; } static inline pte_t pte_mkspecial(pte_t pte) { return pte; } +/* + * Huge pte definitions. + */ +#ifdef CONFIG_HUGETLB_PAGE +#define pte_huge(pte) (pte_val(pte) & _PAGE_HUGE) +#define pte_mkhuge(pte) (__pte(pte_val(pte) | _PAGE_HUGE)) +#else +#define pte_huge(pte) (0) +#define pte_mkhuge(pte) (pte) +#endif + + /* * Conversion functions: convert a page and protection to a page entry, * and a page entry and page directory to the page they refer to. @@ -410,8 +423,9 @@ static inline pte_t pte_modify(pte_t pte, pgprot_t newprot) /* Find an entry in the second-level page table.. */ #if CONFIG_PGTABLE_LEVELS == 3 +#define pmd_index(addr) (((addr) >> PMD_SHIFT) & (PTRS_PER_PMD - 1)) #define pmd_offset(dir,address) \ -((pmd_t *) pgd_page_vaddr(*(dir)) + (((address)>>PMD_SHIFT) & (PTRS_PER_PMD-1))) +((pmd_t *) pgd_page_vaddr(*(dir)) + pmd_index(address)) #else #define pmd_offset(dir,addr) ((pmd_t *) dir) #endif From 4182d0cdf853fb044b969318289ae9f451f69c86 Mon Sep 17 00:00:00 2001 From: Helge Deller Date: Fri, 20 Nov 2015 10:50:01 +0100 Subject: [PATCH 255/291] parisc: Initialize the fault vector earlier in the boot process. A fault vector on parisc needs to be 2K aligned. Furthermore the checksum of the fault vector needs to sum up to 0 which is being calculated and written at runtime. Up to now we aligned both PA20 and PA11 fault vectors on the same 4K page in order to easily write the checksum after having mapped the kernel read-only (by mapping this page only as read-write). But when we want to map the kernel text and data on huge pages this makes things harder. So, simplify it by aligning both fault vectors on 2K boundries and write the checksum before we map the page read-only. Signed-off-by: Helge Deller --- arch/parisc/kernel/entry.S | 11 +++-------- arch/parisc/kernel/setup.c | 3 +++ arch/parisc/kernel/traps.c | 35 +++++++++++++++-------------------- 3 files changed, 21 insertions(+), 28 deletions(-) diff --git a/arch/parisc/kernel/entry.S b/arch/parisc/kernel/entry.S index c5ef4081b01d..b2fdc44da0d5 100644 --- a/arch/parisc/kernel/entry.S +++ b/arch/parisc/kernel/entry.S @@ -646,17 +646,12 @@ /* - * Align fault_vector_20 on 4K boundary so that both - * fault_vector_11 and fault_vector_20 are on the - * same page. This is only necessary as long as we - * write protect the kernel text, which we may stop - * doing once we use large page translations to cover - * the static part of the kernel address space. + * Fault_vectors are architecturally required to be aligned on a 2K + * boundary */ .text - - .align 4096 + .align 2048 ENTRY(fault_vector_20) /* First vector is invalid (0) */ diff --git a/arch/parisc/kernel/setup.c b/arch/parisc/kernel/setup.c index 72a3c658ad7b..f097762d3922 100644 --- a/arch/parisc/kernel/setup.c +++ b/arch/parisc/kernel/setup.c @@ -377,6 +377,7 @@ arch_initcall(parisc_init); void start_parisc(void) { extern void start_kernel(void); + extern void early_trap_init(void); int ret, cpunum; struct pdc_coproc_cfg coproc_cfg; @@ -397,6 +398,8 @@ void start_parisc(void) panic("must have an fpu to boot linux"); } + early_trap_init(); /* initialize checksum of fault_vector */ + start_kernel(); // not reached } diff --git a/arch/parisc/kernel/traps.c b/arch/parisc/kernel/traps.c index b99b39f1da02..553b09855cfd 100644 --- a/arch/parisc/kernel/traps.c +++ b/arch/parisc/kernel/traps.c @@ -807,7 +807,7 @@ void notrace handle_interruption(int code, struct pt_regs *regs) } -int __init check_ivt(void *iva) +void __init initialize_ivt(const void *iva) { extern u32 os_hpmc_size; extern const u32 os_hpmc[]; @@ -818,8 +818,8 @@ int __init check_ivt(void *iva) u32 *hpmcp; u32 length; - if (strcmp((char *)iva, "cows can fly")) - return -1; + if (strcmp((const char *)iva, "cows can fly")) + panic("IVT invalid"); ivap = (u32 *)iva; @@ -839,28 +839,23 @@ int __init check_ivt(void *iva) check += ivap[i]; ivap[5] = -check; - - return 0; } + +/* early_trap_init() is called before we set up kernel mappings and + * write-protect the kernel */ +void __init early_trap_init(void) +{ + extern const void fault_vector_20; + #ifndef CONFIG_64BIT -extern const void fault_vector_11; + extern const void fault_vector_11; + initialize_ivt(&fault_vector_11); #endif -extern const void fault_vector_20; + + initialize_ivt(&fault_vector_20); +} void __init trap_init(void) { - void *iva; - - if (boot_cpu_data.cpu_type >= pcxu) - iva = (void *) &fault_vector_20; - else -#ifdef CONFIG_64BIT - panic("Can't boot 64-bit OS on PA1.1 processor!"); -#else - iva = (void *) &fault_vector_11; -#endif - - if (check_ivt(iva)) - panic("IVT invalid"); } From 332b42e4eb6e955f3be0bbbf1f272aa943954d98 Mon Sep 17 00:00:00 2001 From: Helge Deller Date: Fri, 20 Nov 2015 11:17:27 +0100 Subject: [PATCH 256/291] parisc: Increase initial kernel mapping to 32MB on 64bit kernel For the 64bit kernel the initially 16 MB kernel memory might become too small if you build a kernel with many modules built-in and with kernel text and data areas mapped on huge pages. This patch increases the initial mapping to 32MB for 64bit kernels and keeps 16MB for 32bit kernels. Signed-off-by: Helge Deller --- arch/parisc/include/asm/pgtable.h | 6 +++++- arch/parisc/kernel/head.S | 4 ++-- 2 files changed, 7 insertions(+), 3 deletions(-) diff --git a/arch/parisc/include/asm/pgtable.h b/arch/parisc/include/asm/pgtable.h index 638317ae276e..d8534f95915a 100644 --- a/arch/parisc/include/asm/pgtable.h +++ b/arch/parisc/include/asm/pgtable.h @@ -83,7 +83,11 @@ static inline void purge_tlb_entries(struct mm_struct *mm, unsigned long addr) printk("%s:%d: bad pgd %08lx.\n", __FILE__, __LINE__, (unsigned long)pgd_val(e)) /* This is the size of the initially mapped kernel memory */ -#define KERNEL_INITIAL_ORDER 24 /* 0 to 1<<24 = 16MB */ +#ifdef CONFIG_64BIT +#define KERNEL_INITIAL_ORDER 25 /* 1<<25 = 32MB */ +#else +#define KERNEL_INITIAL_ORDER 24 /* 1<<24 = 16MB */ +#endif #define KERNEL_INITIAL_SIZE (1 << KERNEL_INITIAL_ORDER) #if CONFIG_PGTABLE_LEVELS == 3 diff --git a/arch/parisc/kernel/head.S b/arch/parisc/kernel/head.S index e7d64527aff9..75aa0db9f69e 100644 --- a/arch/parisc/kernel/head.S +++ b/arch/parisc/kernel/head.S @@ -69,7 +69,7 @@ $bss_loop: stw,ma %arg2,4(%r1) stw,ma %arg3,4(%r1) - /* Initialize startup VM. Just map first 8/16 MB of memory */ + /* Initialize startup VM. Just map first 16/32 MB of memory */ load32 PA(swapper_pg_dir),%r4 mtctl %r4,%cr24 /* Initialize kernel root pointer */ mtctl %r4,%cr25 /* Initialize user root pointer */ @@ -107,7 +107,7 @@ $bss_loop: /* Now initialize the PTEs themselves. We use RWX for * everything ... it will get remapped correctly later */ ldo 0+_PAGE_KERNEL_RWX(%r0),%r3 /* Hardwired 0 phys addr start */ - ldi (1<<(KERNEL_INITIAL_ORDER-PAGE_SHIFT)),%r11 /* PFN count */ + load32 (1<<(KERNEL_INITIAL_ORDER-PAGE_SHIFT)),%r11 /* PFN count */ load32 PA(pg0),%r1 $pgt_fill_loop: From 337685e556c6f080bf4775950e3b9493852715f8 Mon Sep 17 00:00:00 2001 From: Helge Deller Date: Fri, 20 Nov 2015 11:22:32 +0100 Subject: [PATCH 257/291] parisc: Use long branch to do_syscall_trace_exit Use the 22bit instead of the 17bit branch instruction on a 64bit kernel to reach the do_syscall_trace_exit function from the gateway page. A huge page enabled kernel may need the additional branch distance bits. Signed-off-by: Helge Deller --- arch/parisc/kernel/syscall.S | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/arch/parisc/kernel/syscall.S b/arch/parisc/kernel/syscall.S index 0b8d26d3ba43..3fbd7252a4b2 100644 --- a/arch/parisc/kernel/syscall.S +++ b/arch/parisc/kernel/syscall.S @@ -369,7 +369,7 @@ tracesys_exit: ldo -16(%r30),%r29 /* Reference param save area */ #endif ldo TASK_REGS(%r1),%r26 - bl do_syscall_trace_exit,%r2 + BL do_syscall_trace_exit,%r2 STREG %r28,TASK_PT_GR28(%r1) /* save return value now */ ldo -THREAD_SZ_ALGN-FRAME_SIZE(%r30),%r1 /* get task ptr */ LDREG TI_TASK(%r1), %r1 @@ -390,7 +390,7 @@ tracesys_sigexit: #ifdef CONFIG_64BIT ldo -16(%r30),%r29 /* Reference param save area */ #endif - bl do_syscall_trace_exit,%r2 + BL do_syscall_trace_exit,%r2 ldo TASK_REGS(%r1),%r26 ldil L%syscall_exit_rfi,%r1 From 736d2169338a50c8814efc186b5423aee43b0c68 Mon Sep 17 00:00:00 2001 From: Helge Deller Date: Sun, 22 Nov 2015 00:07:06 +0100 Subject: [PATCH 258/291] parisc: Add Huge Page and HUGETLBFS support This patch adds huge page support to allow userspace to allocate huge pages and to use hugetlbfs filesystem on 32- and 64-bit Linux kernels. A later patch will add kernel support to map kernel text and data on huge pages. The only requirement is, that the kernel needs to be compiled for a PA8X00 CPU (PA2.0 architecture). Older PA1.X CPUs do not support variable page sizes. 64bit Kernels are compiled for PA2.0 by default. Technically on parisc multiple physical huge pages may be needed to emulate standard 2MB huge pages. Signed-off-by: Helge Deller --- arch/parisc/Kconfig | 3 + arch/parisc/include/asm/hugetlb.h | 85 ++++++++++++++++ arch/parisc/kernel/entry.S | 45 ++++++--- arch/parisc/kernel/setup.c | 11 +- arch/parisc/mm/Makefile | 1 + arch/parisc/mm/hugetlbpage.c | 161 ++++++++++++++++++++++++++++++ 6 files changed, 291 insertions(+), 15 deletions(-) create mode 100644 arch/parisc/include/asm/hugetlb.h create mode 100644 arch/parisc/mm/hugetlbpage.c diff --git a/arch/parisc/Kconfig b/arch/parisc/Kconfig index c36546959e86..729f89163bc3 100644 --- a/arch/parisc/Kconfig +++ b/arch/parisc/Kconfig @@ -108,6 +108,9 @@ config PGTABLE_LEVELS default 3 if 64BIT && PARISC_PAGE_SIZE_4KB default 2 +config SYS_SUPPORTS_HUGETLBFS + def_bool y if PA20 + source "init/Kconfig" source "kernel/Kconfig.freezer" diff --git a/arch/parisc/include/asm/hugetlb.h b/arch/parisc/include/asm/hugetlb.h new file mode 100644 index 000000000000..7d56a9ccb752 --- /dev/null +++ b/arch/parisc/include/asm/hugetlb.h @@ -0,0 +1,85 @@ +#ifndef _ASM_PARISC64_HUGETLB_H +#define _ASM_PARISC64_HUGETLB_H + +#include +#include + + +void set_huge_pte_at(struct mm_struct *mm, unsigned long addr, + pte_t *ptep, pte_t pte); + +pte_t huge_ptep_get_and_clear(struct mm_struct *mm, unsigned long addr, + pte_t *ptep); + +static inline int is_hugepage_only_range(struct mm_struct *mm, + unsigned long addr, + unsigned long len) { + return 0; +} + +/* + * If the arch doesn't supply something else, assume that hugepage + * size aligned regions are ok without further preparation. + */ +static inline int prepare_hugepage_range(struct file *file, + unsigned long addr, unsigned long len) +{ + if (len & ~HPAGE_MASK) + return -EINVAL; + if (addr & ~HPAGE_MASK) + return -EINVAL; + return 0; +} + +static inline void hugetlb_free_pgd_range(struct mmu_gather *tlb, + unsigned long addr, unsigned long end, + unsigned long floor, + unsigned long ceiling) +{ + free_pgd_range(tlb, addr, end, floor, ceiling); +} + +static inline void huge_ptep_clear_flush(struct vm_area_struct *vma, + unsigned long addr, pte_t *ptep) +{ +} + +static inline int huge_pte_none(pte_t pte) +{ + return pte_none(pte); +} + +static inline pte_t huge_pte_wrprotect(pte_t pte) +{ + return pte_wrprotect(pte); +} + +static inline void huge_ptep_set_wrprotect(struct mm_struct *mm, + unsigned long addr, pte_t *ptep) +{ + pte_t old_pte = *ptep; + set_huge_pte_at(mm, addr, ptep, pte_wrprotect(old_pte)); +} + +static inline int huge_ptep_set_access_flags(struct vm_area_struct *vma, + unsigned long addr, pte_t *ptep, + pte_t pte, int dirty) +{ + int changed = !pte_same(*ptep, pte); + if (changed) { + set_huge_pte_at(vma->vm_mm, addr, ptep, pte); + flush_tlb_page(vma, addr); + } + return changed; +} + +static inline pte_t huge_ptep_get(pte_t *ptep) +{ + return *ptep; +} + +static inline void arch_clear_hugepage_flags(struct page *page) +{ +} + +#endif /* _ASM_PARISC64_HUGETLB_H */ diff --git a/arch/parisc/kernel/entry.S b/arch/parisc/kernel/entry.S index b2fdc44da0d5..623496c11756 100644 --- a/arch/parisc/kernel/entry.S +++ b/arch/parisc/kernel/entry.S @@ -502,21 +502,38 @@ STREG \pte,0(\ptp) .endm + /* We have (depending on the page size): + * - 38 to 52-bit Physical Page Number + * - 12 to 26-bit page offset + */ /* bitshift difference between a PFN (based on kernel's PAGE_SIZE) * to a CPU TLB 4k PFN (4k => 12 bits to shift) */ - #define PAGE_ADD_SHIFT (PAGE_SHIFT-12) + #define PAGE_ADD_SHIFT (PAGE_SHIFT-12) + #define PAGE_ADD_HUGE_SHIFT (REAL_HPAGE_SHIFT-12) /* Drop prot bits and convert to page addr for iitlbt and idtlbt */ - .macro convert_for_tlb_insert20 pte + .macro convert_for_tlb_insert20 pte,tmp +#ifdef CONFIG_HUGETLB_PAGE + copy \pte,\tmp + extrd,u \tmp,(63-ASM_PFN_PTE_SHIFT)+(63-58)+PAGE_ADD_SHIFT,\ + 64-PAGE_SHIFT-PAGE_ADD_SHIFT,\pte + + depdi _PAGE_SIZE_ENCODING_DEFAULT,63,\ + (63-58)+PAGE_ADD_SHIFT,\pte + extrd,u,*= \tmp,_PAGE_HPAGE_BIT+32,1,%r0 + depdi _HUGE_PAGE_SIZE_ENCODING_DEFAULT,63,\ + (63-58)+PAGE_ADD_HUGE_SHIFT,\pte +#else /* Huge pages disabled */ extrd,u \pte,(63-ASM_PFN_PTE_SHIFT)+(63-58)+PAGE_ADD_SHIFT,\ 64-PAGE_SHIFT-PAGE_ADD_SHIFT,\pte depdi _PAGE_SIZE_ENCODING_DEFAULT,63,\ (63-58)+PAGE_ADD_SHIFT,\pte +#endif .endm /* Convert the pte and prot to tlb insertion values. How * this happens is quite subtle, read below */ - .macro make_insert_tlb spc,pte,prot + .macro make_insert_tlb spc,pte,prot,tmp space_to_prot \spc \prot /* create prot id from space */ /* The following is the real subtlety. This is depositing * T <-> _PAGE_REFTRAP @@ -553,7 +570,7 @@ depdi 1,12,1,\prot /* Drop prot bits and convert to page addr for iitlbt and idtlbt */ - convert_for_tlb_insert20 \pte + convert_for_tlb_insert20 \pte \tmp .endm /* Identical macro to make_insert_tlb above, except it @@ -1142,7 +1159,7 @@ dtlb_miss_20w: tlb_lock spc,ptp,pte,t0,t1,dtlb_check_alias_20w update_accessed ptp,pte,t0,t1 - make_insert_tlb spc,pte,prot + make_insert_tlb spc,pte,prot,t1 idtlbt pte,prot @@ -1168,7 +1185,7 @@ nadtlb_miss_20w: tlb_lock spc,ptp,pte,t0,t1,nadtlb_check_alias_20w update_accessed ptp,pte,t0,t1 - make_insert_tlb spc,pte,prot + make_insert_tlb spc,pte,prot,t1 idtlbt pte,prot @@ -1262,7 +1279,7 @@ dtlb_miss_20: tlb_lock spc,ptp,pte,t0,t1,dtlb_check_alias_20 update_accessed ptp,pte,t0,t1 - make_insert_tlb spc,pte,prot + make_insert_tlb spc,pte,prot,t1 f_extend pte,t1 @@ -1290,7 +1307,7 @@ nadtlb_miss_20: tlb_lock spc,ptp,pte,t0,t1,nadtlb_check_alias_20 update_accessed ptp,pte,t0,t1 - make_insert_tlb spc,pte,prot + make_insert_tlb spc,pte,prot,t1 f_extend pte,t1 @@ -1399,7 +1416,7 @@ itlb_miss_20w: tlb_lock spc,ptp,pte,t0,t1,itlb_fault update_accessed ptp,pte,t0,t1 - make_insert_tlb spc,pte,prot + make_insert_tlb spc,pte,prot,t1 iitlbt pte,prot @@ -1423,7 +1440,7 @@ naitlb_miss_20w: tlb_lock spc,ptp,pte,t0,t1,naitlb_check_alias_20w update_accessed ptp,pte,t0,t1 - make_insert_tlb spc,pte,prot + make_insert_tlb spc,pte,prot,t1 iitlbt pte,prot @@ -1509,7 +1526,7 @@ itlb_miss_20: tlb_lock spc,ptp,pte,t0,t1,itlb_fault update_accessed ptp,pte,t0,t1 - make_insert_tlb spc,pte,prot + make_insert_tlb spc,pte,prot,t1 f_extend pte,t1 @@ -1529,7 +1546,7 @@ naitlb_miss_20: tlb_lock spc,ptp,pte,t0,t1,naitlb_check_alias_20 update_accessed ptp,pte,t0,t1 - make_insert_tlb spc,pte,prot + make_insert_tlb spc,pte,prot,t1 f_extend pte,t1 @@ -1561,7 +1578,7 @@ dbit_trap_20w: tlb_lock spc,ptp,pte,t0,t1,dbit_fault update_dirty ptp,pte,t1 - make_insert_tlb spc,pte,prot + make_insert_tlb spc,pte,prot,t1 idtlbt pte,prot @@ -1605,7 +1622,7 @@ dbit_trap_20: tlb_lock spc,ptp,pte,t0,t1,dbit_fault update_dirty ptp,pte,t1 - make_insert_tlb spc,pte,prot + make_insert_tlb spc,pte,prot,t1 f_extend pte,t1 diff --git a/arch/parisc/kernel/setup.c b/arch/parisc/kernel/setup.c index f097762d3922..f7ea626e29c9 100644 --- a/arch/parisc/kernel/setup.c +++ b/arch/parisc/kernel/setup.c @@ -130,7 +130,16 @@ void __init setup_arch(char **cmdline_p) printk(KERN_INFO "The 32-bit Kernel has started...\n"); #endif - printk(KERN_INFO "Default page size is %dKB.\n", (int)(PAGE_SIZE / 1024)); + printk(KERN_INFO "Kernel default page size is %d KB. Huge pages ", + (int)(PAGE_SIZE / 1024)); +#ifdef CONFIG_HUGETLB_PAGE + printk(KERN_CONT "enabled with %d MB physical and %d MB virtual size", + 1 << (REAL_HPAGE_SHIFT - 20), 1 << (HPAGE_SHIFT - 20)); +#else + printk(KERN_CONT "disabled"); +#endif + printk(KERN_CONT ".\n"); + pdc_console_init(); diff --git a/arch/parisc/mm/Makefile b/arch/parisc/mm/Makefile index 758ceefb373a..134393de69d2 100644 --- a/arch/parisc/mm/Makefile +++ b/arch/parisc/mm/Makefile @@ -3,3 +3,4 @@ # obj-y := init.o fault.o ioremap.o +obj-$(CONFIG_HUGETLB_PAGE) += hugetlbpage.o diff --git a/arch/parisc/mm/hugetlbpage.c b/arch/parisc/mm/hugetlbpage.c new file mode 100644 index 000000000000..f6fdc77a72bd --- /dev/null +++ b/arch/parisc/mm/hugetlbpage.c @@ -0,0 +1,161 @@ +/* + * PARISC64 Huge TLB page support. + * + * This parisc implementation is heavily based on the SPARC and x86 code. + * + * Copyright (C) 2015 Helge Deller + */ + +#include +#include +#include +#include +#include + +#include +#include +#include +#include +#include +#include + + +unsigned long +hugetlb_get_unmapped_area(struct file *file, unsigned long addr, + unsigned long len, unsigned long pgoff, unsigned long flags) +{ + struct hstate *h = hstate_file(file); + + if (len & ~huge_page_mask(h)) + return -EINVAL; + if (len > TASK_SIZE) + return -ENOMEM; + + if (flags & MAP_FIXED) + if (prepare_hugepage_range(file, addr, len)) + return -EINVAL; + + if (addr) + addr = ALIGN(addr, huge_page_size(h)); + + /* we need to make sure the colouring is OK */ + return arch_get_unmapped_area(file, addr, len, pgoff, flags); +} + + +pte_t *huge_pte_alloc(struct mm_struct *mm, + unsigned long addr, unsigned long sz) +{ + pgd_t *pgd; + pud_t *pud; + pmd_t *pmd; + pte_t *pte = NULL; + + /* We must align the address, because our caller will run + * set_huge_pte_at() on whatever we return, which writes out + * all of the sub-ptes for the hugepage range. So we have + * to give it the first such sub-pte. + */ + addr &= HPAGE_MASK; + + pgd = pgd_offset(mm, addr); + pud = pud_alloc(mm, pgd, addr); + if (pud) { + pmd = pmd_alloc(mm, pud, addr); + if (pmd) + pte = pte_alloc_map(mm, NULL, pmd, addr); + } + return pte; +} + +pte_t *huge_pte_offset(struct mm_struct *mm, unsigned long addr) +{ + pgd_t *pgd; + pud_t *pud; + pmd_t *pmd; + pte_t *pte = NULL; + + addr &= HPAGE_MASK; + + pgd = pgd_offset(mm, addr); + if (!pgd_none(*pgd)) { + pud = pud_offset(pgd, addr); + if (!pud_none(*pud)) { + pmd = pmd_offset(pud, addr); + if (!pmd_none(*pmd)) + pte = pte_offset_map(pmd, addr); + } + } + return pte; +} + +/* Purge data and instruction TLB entries. Must be called holding + * the pa_tlb_lock. The TLB purge instructions are slow on SMP + * machines since the purge must be broadcast to all CPUs. + */ +static inline void purge_tlb_entries_huge(struct mm_struct *mm, unsigned long addr) +{ + int i; + + /* We may use multiple physical huge pages (e.g. 2x1 MB) to emulate + * Linux standard huge pages (e.g. 2 MB) */ + BUILD_BUG_ON(REAL_HPAGE_SHIFT > HPAGE_SHIFT); + + addr &= HPAGE_MASK; + addr |= _HUGE_PAGE_SIZE_ENCODING_DEFAULT; + + for (i = 0; i < (1 << (HPAGE_SHIFT-REAL_HPAGE_SHIFT)); i++) { + mtsp(mm->context, 1); + pdtlb(addr); + if (unlikely(split_tlb)) + pitlb(addr); + addr += (1UL << REAL_HPAGE_SHIFT); + } +} + +void set_huge_pte_at(struct mm_struct *mm, unsigned long addr, + pte_t *ptep, pte_t entry) +{ + unsigned long addr_start; + int i; + + addr &= HPAGE_MASK; + addr_start = addr; + + for (i = 0; i < (1 << HUGETLB_PAGE_ORDER); i++) { + /* Directly write pte entry. We could call set_pte_at(mm, addr, ptep, entry) + * instead, but then we get double locking on pa_tlb_lock. */ + *ptep = entry; + ptep++; + + /* Drop the PAGE_SIZE/non-huge tlb entry */ + purge_tlb_entries(mm, addr); + + addr += PAGE_SIZE; + pte_val(entry) += PAGE_SIZE; + } + + purge_tlb_entries_huge(mm, addr_start); +} + + +pte_t huge_ptep_get_and_clear(struct mm_struct *mm, unsigned long addr, + pte_t *ptep) +{ + pte_t entry; + + entry = *ptep; + set_huge_pte_at(mm, addr, ptep, __pte(0)); + + return entry; +} + +int pmd_huge(pmd_t pmd) +{ + return 0; +} + +int pud_huge(pud_t pud) +{ + return 0; +} From 41b85a1163386f8328ad570f383973cb3975d2fa Mon Sep 17 00:00:00 2001 From: Helge Deller Date: Sun, 22 Nov 2015 00:07:44 +0100 Subject: [PATCH 259/291] parisc: Map kernel text and data on huge pages Adjust the linker script and map_pages() to map kernel text and data on physical 1MB huge/large pages. Signed-off-by: Helge Deller --- arch/parisc/kernel/asm-offsets.c | 8 +++++++ arch/parisc/kernel/vmlinux.lds.S | 9 ++++--- arch/parisc/mm/init.c | 40 ++++++++++++++------------------ 3 files changed, 31 insertions(+), 26 deletions(-) diff --git a/arch/parisc/kernel/asm-offsets.c b/arch/parisc/kernel/asm-offsets.c index 59001cea13f9..d2f62570a7b1 100644 --- a/arch/parisc/kernel/asm-offsets.c +++ b/arch/parisc/kernel/asm-offsets.c @@ -289,6 +289,14 @@ int main(void) DEFINE(ASM_PTE_ENTRY_SIZE, PTE_ENTRY_SIZE); DEFINE(ASM_PFN_PTE_SHIFT, PFN_PTE_SHIFT); DEFINE(ASM_PT_INITIAL, PT_INITIAL); + BLANK(); + /* HUGEPAGE_SIZE is only used in vmlinux.lds.S to align kernel text + * and kernel data on physical huge pages */ +#ifdef CONFIG_HUGETLB_PAGE + DEFINE(HUGEPAGE_SIZE, 1UL << REAL_HPAGE_SHIFT); +#else + DEFINE(HUGEPAGE_SIZE, PAGE_SIZE); +#endif BLANK(); DEFINE(EXCDATA_IP, offsetof(struct exception_data, fault_ip)); DEFINE(EXCDATA_SPACE, offsetof(struct exception_data, fault_space)); diff --git a/arch/parisc/kernel/vmlinux.lds.S b/arch/parisc/kernel/vmlinux.lds.S index 0dacc5ca555a..308f29081d46 100644 --- a/arch/parisc/kernel/vmlinux.lds.S +++ b/arch/parisc/kernel/vmlinux.lds.S @@ -60,7 +60,7 @@ SECTIONS EXIT_DATA } PERCPU_SECTION(8) - . = ALIGN(PAGE_SIZE); + . = ALIGN(HUGEPAGE_SIZE); __init_end = .; /* freed after init ends here */ @@ -116,7 +116,7 @@ SECTIONS * that we can properly leave these * as writable */ - . = ALIGN(PAGE_SIZE); + . = ALIGN(HUGEPAGE_SIZE); data_start = .; EXCEPTION_TABLE(8) @@ -135,8 +135,11 @@ SECTIONS _edata = .; /* BSS */ - BSS_SECTION(PAGE_SIZE, PAGE_SIZE, 8) + BSS_SECTION(PAGE_SIZE, PAGE_SIZE, PAGE_SIZE) + /* bootmap is allocated in setup_bootmem() directly behind bss. */ + + . = ALIGN(HUGEPAGE_SIZE); _end = . ; STABS_DEBUG diff --git a/arch/parisc/mm/init.c b/arch/parisc/mm/init.c index c229427fa546..ac90df1119bd 100644 --- a/arch/parisc/mm/init.c +++ b/arch/parisc/mm/init.c @@ -407,15 +407,11 @@ static void __init map_pages(unsigned long start_vaddr, unsigned long vaddr; unsigned long ro_start; unsigned long ro_end; - unsigned long fv_addr; - unsigned long gw_addr; - extern const unsigned long fault_vector_20; - extern void * const linux_gateway_page; + unsigned long kernel_end; ro_start = __pa((unsigned long)_text); ro_end = __pa((unsigned long)&data_start); - fv_addr = __pa((unsigned long)&fault_vector_20) & PAGE_MASK; - gw_addr = __pa((unsigned long)&linux_gateway_page) & PAGE_MASK; + kernel_end = __pa((unsigned long)&_end); end_paddr = start_paddr + size; @@ -473,24 +469,25 @@ static void __init map_pages(unsigned long start_vaddr, for (tmp2 = start_pte; tmp2 < PTRS_PER_PTE; tmp2++, pg_table++) { pte_t pte; - /* - * Map the fault vector writable so we can - * write the HPMC checksum. - */ if (force) pte = __mk_pte(address, pgprot); - else if (parisc_text_address(vaddr) && - address != fv_addr) + else if (parisc_text_address(vaddr)) { pte = __mk_pte(address, PAGE_KERNEL_EXEC); + if (address >= ro_start && address < kernel_end) + pte = pte_mkhuge(pte); + } else #if defined(CONFIG_PARISC_PAGE_SIZE_4KB) - if (address >= ro_start && address < ro_end - && address != fv_addr - && address != gw_addr) - pte = __mk_pte(address, PAGE_KERNEL_RO); - else + if (address >= ro_start && address < ro_end) { + pte = __mk_pte(address, PAGE_KERNEL_EXEC); + pte = pte_mkhuge(pte); + } else #endif + { pte = __mk_pte(address, pgprot); + if (address >= ro_start && address < kernel_end) + pte = pte_mkhuge(pte); + } if (address >= end_paddr) { if (force) @@ -534,15 +531,12 @@ void free_initmem(void) /* force the kernel to see the new TLB entries */ __flush_tlb_range(0, init_begin, init_end); - /* Attempt to catch anyone trying to execute code here - * by filling the page with BRK insns. - */ - memset((void *)init_begin, 0x00, init_end - init_begin); + /* finally dump all the instructions which were cached, since the * pages are no-longer executable */ flush_icache_range(init_begin, init_end); - free_initmem_default(-1); + free_initmem_default(POISON_FREE_INITMEM); /* set up a new led state on systems shipped LED State panel */ pdc_chassis_send_status(PDC_CHASSIS_DIRECT_BCOMPLETE); @@ -712,8 +706,8 @@ static void __init pagetable_init(void) unsigned long size; start_paddr = pmem_ranges[range].start_pfn << PAGE_SHIFT; - end_paddr = start_paddr + (pmem_ranges[range].pages << PAGE_SHIFT); size = pmem_ranges[range].pages << PAGE_SHIFT; + end_paddr = start_paddr + size; map_pages((unsigned long)__va(start_paddr), start_paddr, size, PAGE_KERNEL, 0); From 81084651d73737988355f167065fab8a73574db1 Mon Sep 17 00:00:00 2001 From: Jesper Dangaard Brouer Date: Fri, 20 Nov 2015 15:57:46 -0800 Subject: [PATCH 260/291] slub: support for bulk free with SLUB freelists Make it possible to free a freelist with several objects by adjusting API of slab_free() and __slab_free() to have head, tail and an objects counter (cnt). Tail being NULL indicate single object free of head object. This allow compiler inline constant propagation in slab_free() and slab_free_freelist_hook() to avoid adding any overhead in case of single object free. This allows a freelist with several objects (all within the same slab-page) to be free'ed using a single locked cmpxchg_double in __slab_free() and with an unlocked cmpxchg_double in slab_free(). Object debugging on the free path is also extended to handle these freelists. When CONFIG_SLUB_DEBUG is enabled it will also detect if objects don't belong to the same slab-page. These changes are needed for the next patch to bulk free the detached freelists it introduces and constructs. Micro benchmarking showed no performance reduction due to this change, when debugging is turned off (compiled with CONFIG_SLUB_DEBUG). Signed-off-by: Jesper Dangaard Brouer Signed-off-by: Alexander Duyck Acked-by: Christoph Lameter Cc: Pekka Enberg Cc: David Rientjes Cc: Joonsoo Kim Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- mm/slub.c | 85 +++++++++++++++++++++++++++++++++++++++++++------------ 1 file changed, 67 insertions(+), 18 deletions(-) diff --git a/mm/slub.c b/mm/slub.c index a0c1365f6426..d52f0d0ab712 100644 --- a/mm/slub.c +++ b/mm/slub.c @@ -1065,11 +1065,15 @@ bad: return 0; } +/* Supports checking bulk free of a constructed freelist */ static noinline struct kmem_cache_node *free_debug_processing( - struct kmem_cache *s, struct page *page, void *object, + struct kmem_cache *s, struct page *page, + void *head, void *tail, int bulk_cnt, unsigned long addr, unsigned long *flags) { struct kmem_cache_node *n = get_node(s, page_to_nid(page)); + void *object = head; + int cnt = 0; spin_lock_irqsave(&n->list_lock, *flags); slab_lock(page); @@ -1077,6 +1081,9 @@ static noinline struct kmem_cache_node *free_debug_processing( if (!check_slab(s, page)) goto fail; +next_object: + cnt++; + if (!check_valid_pointer(s, page, object)) { slab_err(s, page, "Invalid object pointer 0x%p", object); goto fail; @@ -1107,8 +1114,19 @@ static noinline struct kmem_cache_node *free_debug_processing( if (s->flags & SLAB_STORE_USER) set_track(s, object, TRACK_FREE, addr); trace(s, page, object, 0); + /* Freepointer not overwritten by init_object(), SLAB_POISON moved it */ init_object(s, object, SLUB_RED_INACTIVE); + + /* Reached end of constructed freelist yet? */ + if (object != tail) { + object = get_freepointer(s, object); + goto next_object; + } out: + if (cnt != bulk_cnt) + slab_err(s, page, "Bulk freelist count(%d) invalid(%d)\n", + bulk_cnt, cnt); + slab_unlock(page); /* * Keep node_lock to preserve integrity @@ -1212,7 +1230,8 @@ static inline int alloc_debug_processing(struct kmem_cache *s, struct page *page, void *object, unsigned long addr) { return 0; } static inline struct kmem_cache_node *free_debug_processing( - struct kmem_cache *s, struct page *page, void *object, + struct kmem_cache *s, struct page *page, + void *head, void *tail, int bulk_cnt, unsigned long addr, unsigned long *flags) { return NULL; } static inline int slab_pad_check(struct kmem_cache *s, struct page *page) @@ -1308,6 +1327,29 @@ static inline void slab_free_hook(struct kmem_cache *s, void *x) kasan_slab_free(s, x); } +static inline void slab_free_freelist_hook(struct kmem_cache *s, + void *head, void *tail) +{ +/* + * Compiler cannot detect this function can be removed if slab_free_hook() + * evaluates to nothing. Thus, catch all relevant config debug options here. + */ +#if defined(CONFIG_KMEMCHECK) || \ + defined(CONFIG_LOCKDEP) || \ + defined(CONFIG_DEBUG_KMEMLEAK) || \ + defined(CONFIG_DEBUG_OBJECTS_FREE) || \ + defined(CONFIG_KASAN) + + void *object = head; + void *tail_obj = tail ? : head; + + do { + slab_free_hook(s, object); + } while ((object != tail_obj) && + (object = get_freepointer(s, object))); +#endif +} + static void setup_object(struct kmem_cache *s, struct page *page, void *object) { @@ -2583,10 +2625,11 @@ EXPORT_SYMBOL(kmem_cache_alloc_node_trace); * handling required then we can return immediately. */ static void __slab_free(struct kmem_cache *s, struct page *page, - void *x, unsigned long addr) + void *head, void *tail, int cnt, + unsigned long addr) + { void *prior; - void **object = (void *)x; int was_frozen; struct page new; unsigned long counters; @@ -2596,7 +2639,8 @@ static void __slab_free(struct kmem_cache *s, struct page *page, stat(s, FREE_SLOWPATH); if (kmem_cache_debug(s) && - !(n = free_debug_processing(s, page, x, addr, &flags))) + !(n = free_debug_processing(s, page, head, tail, cnt, + addr, &flags))) return; do { @@ -2606,10 +2650,10 @@ static void __slab_free(struct kmem_cache *s, struct page *page, } prior = page->freelist; counters = page->counters; - set_freepointer(s, object, prior); + set_freepointer(s, tail, prior); new.counters = counters; was_frozen = new.frozen; - new.inuse--; + new.inuse -= cnt; if ((!new.inuse || !prior) && !was_frozen) { if (kmem_cache_has_cpu_partial(s) && !prior) { @@ -2640,7 +2684,7 @@ static void __slab_free(struct kmem_cache *s, struct page *page, } while (!cmpxchg_double_slab(s, page, prior, counters, - object, new.counters, + head, new.counters, "__slab_free")); if (likely(!n)) { @@ -2705,15 +2749,20 @@ slab_empty: * * If fastpath is not possible then fall back to __slab_free where we deal * with all sorts of special processing. + * + * Bulk free of a freelist with several objects (all pointing to the + * same page) possible by specifying head and tail ptr, plus objects + * count (cnt). Bulk free indicated by tail pointer being set. */ -static __always_inline void slab_free(struct kmem_cache *s, - struct page *page, void *x, unsigned long addr) +static __always_inline void slab_free(struct kmem_cache *s, struct page *page, + void *head, void *tail, int cnt, + unsigned long addr) { - void **object = (void *)x; + void *tail_obj = tail ? : head; struct kmem_cache_cpu *c; unsigned long tid; - slab_free_hook(s, x); + slab_free_freelist_hook(s, head, tail); redo: /* @@ -2732,19 +2781,19 @@ redo: barrier(); if (likely(page == c->page)) { - set_freepointer(s, object, c->freelist); + set_freepointer(s, tail_obj, c->freelist); if (unlikely(!this_cpu_cmpxchg_double( s->cpu_slab->freelist, s->cpu_slab->tid, c->freelist, tid, - object, next_tid(tid)))) { + head, next_tid(tid)))) { note_cmpxchg_failure("slab_free", s, tid); goto redo; } stat(s, FREE_FASTPATH); } else - __slab_free(s, page, x, addr); + __slab_free(s, page, head, tail_obj, cnt, addr); } @@ -2753,7 +2802,7 @@ void kmem_cache_free(struct kmem_cache *s, void *x) s = cache_from_obj(s, x); if (!s) return; - slab_free(s, virt_to_head_page(x), x, _RET_IP_); + slab_free(s, virt_to_head_page(x), x, NULL, 1, _RET_IP_); trace_kmem_cache_free(_RET_IP_, x); } EXPORT_SYMBOL(kmem_cache_free); @@ -2788,7 +2837,7 @@ void kmem_cache_free_bulk(struct kmem_cache *s, size_t size, void **p) c->tid = next_tid(c->tid); local_irq_enable(); /* Slowpath: overhead locked cmpxchg_double_slab */ - __slab_free(s, page, object, _RET_IP_); + __slab_free(s, page, object, object, 1, _RET_IP_); local_irq_disable(); c = this_cpu_ptr(s->cpu_slab); } @@ -3523,7 +3572,7 @@ void kfree(const void *x) __free_kmem_pages(page, compound_order(page)); return; } - slab_free(page->slab_cache, page, object, _RET_IP_); + slab_free(page->slab_cache, page, object, NULL, 1, _RET_IP_); } EXPORT_SYMBOL(kfree); From d0ecd894e3d5f768a84403b34019c4a7daa05882 Mon Sep 17 00:00:00 2001 From: Jesper Dangaard Brouer Date: Fri, 20 Nov 2015 15:57:49 -0800 Subject: [PATCH 261/291] slub: optimize bulk slowpath free by detached freelist This change focus on improving the speed of object freeing in the "slowpath" of kmem_cache_free_bulk. The calls slab_free (fastpath) and __slab_free (slowpath) have been extended with support for bulk free, which amortize the overhead of the (locked) cmpxchg_double. To use the new bulking feature, we build what I call a detached freelist. The detached freelist takes advantage of three properties: 1) the free function call owns the object that is about to be freed, thus writing into this memory is synchronization-free. 2) many freelist's can co-exist side-by-side in the same slab-page each with a separate head pointer. 3) it is the visibility of the head pointer that needs synchronization. Given these properties, the brilliant part is that the detached freelist can be constructed without any need for synchronization. The freelist is constructed directly in the page objects, without any synchronization needed. The detached freelist is allocated on the stack of the function call kmem_cache_free_bulk. Thus, the freelist head pointer is not visible to other CPUs. All objects in a SLUB freelist must belong to the same slab-page. Thus, constructing the detached freelist is about matching objects that belong to the same slab-page. The bulk free array is scanned is a progressive manor with a limited look-ahead facility. Kmem debug support is handled in call of slab_free(). Notice kmem_cache_free_bulk no longer need to disable IRQs. This only slowed down single free bulk with approx 3 cycles. Performance data: Benchmarked[1] obj size 256 bytes on CPU i7-4790K @ 4.00GHz SLUB fastpath single object quick reuse: 47 cycles(tsc) 11.931 ns To get stable and comparable numbers, the kernel have been booted with "slab_merge" (this also improve performance for larger bulk sizes). Performance data, compared against fallback bulking: bulk - fallback bulk - improvement with this patch 1 - 62 cycles(tsc) 15.662 ns - 49 cycles(tsc) 12.407 ns- improved 21.0% 2 - 55 cycles(tsc) 13.935 ns - 30 cycles(tsc) 7.506 ns - improved 45.5% 3 - 53 cycles(tsc) 13.341 ns - 23 cycles(tsc) 5.865 ns - improved 56.6% 4 - 52 cycles(tsc) 13.081 ns - 20 cycles(tsc) 5.048 ns - improved 61.5% 8 - 50 cycles(tsc) 12.627 ns - 18 cycles(tsc) 4.659 ns - improved 64.0% 16 - 49 cycles(tsc) 12.412 ns - 17 cycles(tsc) 4.495 ns - improved 65.3% 30 - 49 cycles(tsc) 12.484 ns - 18 cycles(tsc) 4.533 ns - improved 63.3% 32 - 50 cycles(tsc) 12.627 ns - 18 cycles(tsc) 4.707 ns - improved 64.0% 34 - 96 cycles(tsc) 24.243 ns - 23 cycles(tsc) 5.976 ns - improved 76.0% 48 - 83 cycles(tsc) 20.818 ns - 21 cycles(tsc) 5.329 ns - improved 74.7% 64 - 74 cycles(tsc) 18.700 ns - 20 cycles(tsc) 5.127 ns - improved 73.0% 128 - 90 cycles(tsc) 22.734 ns - 27 cycles(tsc) 6.833 ns - improved 70.0% 158 - 99 cycles(tsc) 24.776 ns - 30 cycles(tsc) 7.583 ns - improved 69.7% 250 - 104 cycles(tsc) 26.089 ns - 37 cycles(tsc) 9.280 ns - improved 64.4% Performance data, compared current in-kernel bulking: bulk - curr in-kernel - improvement with this patch 1 - 46 cycles(tsc) - 49 cycles(tsc) - improved (cycles:-3) -6.5% 2 - 27 cycles(tsc) - 30 cycles(tsc) - improved (cycles:-3) -11.1% 3 - 21 cycles(tsc) - 23 cycles(tsc) - improved (cycles:-2) -9.5% 4 - 18 cycles(tsc) - 20 cycles(tsc) - improved (cycles:-2) -11.1% 8 - 17 cycles(tsc) - 18 cycles(tsc) - improved (cycles:-1) -5.9% 16 - 18 cycles(tsc) - 17 cycles(tsc) - improved (cycles: 1) 5.6% 30 - 18 cycles(tsc) - 18 cycles(tsc) - improved (cycles: 0) 0.0% 32 - 18 cycles(tsc) - 18 cycles(tsc) - improved (cycles: 0) 0.0% 34 - 78 cycles(tsc) - 23 cycles(tsc) - improved (cycles:55) 70.5% 48 - 60 cycles(tsc) - 21 cycles(tsc) - improved (cycles:39) 65.0% 64 - 49 cycles(tsc) - 20 cycles(tsc) - improved (cycles:29) 59.2% 128 - 69 cycles(tsc) - 27 cycles(tsc) - improved (cycles:42) 60.9% 158 - 79 cycles(tsc) - 30 cycles(tsc) - improved (cycles:49) 62.0% 250 - 86 cycles(tsc) - 37 cycles(tsc) - improved (cycles:49) 57.0% Performance with normal SLUB merging is significantly slower for larger bulking. This is believed to (primarily) be an effect of not having to share the per-CPU data-structures, as tuning per-CPU size can achieve similar performance. bulk - slab_nomerge - normal SLUB merge 1 - 49 cycles(tsc) - 49 cycles(tsc) - merge slower with cycles:0 2 - 30 cycles(tsc) - 30 cycles(tsc) - merge slower with cycles:0 3 - 23 cycles(tsc) - 23 cycles(tsc) - merge slower with cycles:0 4 - 20 cycles(tsc) - 20 cycles(tsc) - merge slower with cycles:0 8 - 18 cycles(tsc) - 18 cycles(tsc) - merge slower with cycles:0 16 - 17 cycles(tsc) - 17 cycles(tsc) - merge slower with cycles:0 30 - 18 cycles(tsc) - 23 cycles(tsc) - merge slower with cycles:5 32 - 18 cycles(tsc) - 22 cycles(tsc) - merge slower with cycles:4 34 - 23 cycles(tsc) - 22 cycles(tsc) - merge slower with cycles:-1 48 - 21 cycles(tsc) - 22 cycles(tsc) - merge slower with cycles:1 64 - 20 cycles(tsc) - 48 cycles(tsc) - merge slower with cycles:28 128 - 27 cycles(tsc) - 57 cycles(tsc) - merge slower with cycles:30 158 - 30 cycles(tsc) - 59 cycles(tsc) - merge slower with cycles:29 250 - 37 cycles(tsc) - 56 cycles(tsc) - merge slower with cycles:19 Joint work with Alexander Duyck. [1] https://github.com/netoptimizer/prototype-kernel/blob/master/kernel/mm/slab_bulk_test01.c [akpm@linux-foundation.org: BUG_ON -> WARN_ON;return] Signed-off-by: Jesper Dangaard Brouer Signed-off-by: Alexander Duyck Acked-by: Christoph Lameter Cc: Pekka Enberg Cc: David Rientjes Cc: Joonsoo Kim Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- mm/slub.c | 113 ++++++++++++++++++++++++++++++++++++++---------------- 1 file changed, 81 insertions(+), 32 deletions(-) diff --git a/mm/slub.c b/mm/slub.c index d52f0d0ab712..c17c5202864d 100644 --- a/mm/slub.c +++ b/mm/slub.c @@ -2807,44 +2807,93 @@ void kmem_cache_free(struct kmem_cache *s, void *x) } EXPORT_SYMBOL(kmem_cache_free); +struct detached_freelist { + struct page *page; + void *tail; + void *freelist; + int cnt; +}; + +/* + * This function progressively scans the array with free objects (with + * a limited look ahead) and extract objects belonging to the same + * page. It builds a detached freelist directly within the given + * page/objects. This can happen without any need for + * synchronization, because the objects are owned by running process. + * The freelist is build up as a single linked list in the objects. + * The idea is, that this detached freelist can then be bulk + * transferred to the real freelist(s), but only requiring a single + * synchronization primitive. Look ahead in the array is limited due + * to performance reasons. + */ +static int build_detached_freelist(struct kmem_cache *s, size_t size, + void **p, struct detached_freelist *df) +{ + size_t first_skipped_index = 0; + int lookahead = 3; + void *object; + + /* Always re-init detached_freelist */ + df->page = NULL; + + do { + object = p[--size]; + } while (!object && size); + + if (!object) + return 0; + + /* Start new detached freelist */ + set_freepointer(s, object, NULL); + df->page = virt_to_head_page(object); + df->tail = object; + df->freelist = object; + p[size] = NULL; /* mark object processed */ + df->cnt = 1; + + while (size) { + object = p[--size]; + if (!object) + continue; /* Skip processed objects */ + + /* df->page is always set at this point */ + if (df->page == virt_to_head_page(object)) { + /* Opportunity build freelist */ + set_freepointer(s, object, df->freelist); + df->freelist = object; + df->cnt++; + p[size] = NULL; /* mark object processed */ + + continue; + } + + /* Limit look ahead search */ + if (!--lookahead) + break; + + if (!first_skipped_index) + first_skipped_index = size + 1; + } + + return first_skipped_index; +} + + /* Note that interrupts must be enabled when calling this function. */ void kmem_cache_free_bulk(struct kmem_cache *s, size_t size, void **p) { - struct kmem_cache_cpu *c; - struct page *page; - int i; + if (WARN_ON(!size)) + return; - local_irq_disable(); - c = this_cpu_ptr(s->cpu_slab); + do { + struct detached_freelist df; - for (i = 0; i < size; i++) { - void *object = p[i]; + size = build_detached_freelist(s, size, p, &df); + if (unlikely(!df.page)) + continue; - BUG_ON(!object); - /* kmem cache debug support */ - s = cache_from_obj(s, object); - if (unlikely(!s)) - goto exit; - slab_free_hook(s, object); - - page = virt_to_head_page(object); - - if (c->page == page) { - /* Fastpath: local CPU free */ - set_freepointer(s, object, c->freelist); - c->freelist = object; - } else { - c->tid = next_tid(c->tid); - local_irq_enable(); - /* Slowpath: overhead locked cmpxchg_double_slab */ - __slab_free(s, page, object, object, 1, _RET_IP_); - local_irq_disable(); - c = this_cpu_ptr(s->cpu_slab); - } - } -exit: - c->tid = next_tid(c->tid); - local_irq_enable(); + slab_free(s, df.page, df.freelist, df.tail, df.cnt, _RET_IP_); + } while (likely(size)); } EXPORT_SYMBOL(kmem_cache_free_bulk); From 03ec0ed57ffc77720b811dbb6d44733b58360d9f Mon Sep 17 00:00:00 2001 From: Jesper Dangaard Brouer Date: Fri, 20 Nov 2015 15:57:52 -0800 Subject: [PATCH 262/291] slub: fix kmem cgroup bug in kmem_cache_alloc_bulk The call slab_pre_alloc_hook() interacts with kmemgc and is not allowed to be called several times inside the bulk alloc for loop, due to the call to memcg_kmem_get_cache(). This would result in hitting the VM_BUG_ON in __memcg_kmem_get_cache. As suggested by Vladimir Davydov, change slab_post_alloc_hook() to be able to handle an array of objects. A subtle detail is, loop iterator "i" in slab_post_alloc_hook() must have same type (size_t) as size argument. This helps the compiler to easier realize that it can remove the loop, when all debug statements inside loop evaluates to nothing. Note, this is only an issue because the kernel is compiled with GCC option: -fno-strict-overflow In slab_alloc_node() the compiler inlines and optimizes the invocation of slab_post_alloc_hook(s, flags, 1, &object) by removing the loop and access object directly. Signed-off-by: Jesper Dangaard Brouer Reported-by: Vladimir Davydov Suggested-by: Vladimir Davydov Reviewed-by: Vladimir Davydov Cc: Christoph Lameter Cc: Pekka Enberg Cc: David Rientjes Cc: Joonsoo Kim Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- mm/slub.c | 40 ++++++++++++++++++++++------------------ 1 file changed, 22 insertions(+), 18 deletions(-) diff --git a/mm/slub.c b/mm/slub.c index c17c5202864d..ce1797623391 100644 --- a/mm/slub.c +++ b/mm/slub.c @@ -1292,14 +1292,21 @@ static inline struct kmem_cache *slab_pre_alloc_hook(struct kmem_cache *s, return memcg_kmem_get_cache(s, flags); } -static inline void slab_post_alloc_hook(struct kmem_cache *s, - gfp_t flags, void *object) +static inline void slab_post_alloc_hook(struct kmem_cache *s, gfp_t flags, + size_t size, void **p) { + size_t i; + flags &= gfp_allowed_mask; - kmemcheck_slab_alloc(s, flags, object, slab_ksize(s)); - kmemleak_alloc_recursive(object, s->object_size, 1, s->flags, flags); + for (i = 0; i < size; i++) { + void *object = p[i]; + + kmemcheck_slab_alloc(s, flags, object, slab_ksize(s)); + kmemleak_alloc_recursive(object, s->object_size, 1, + s->flags, flags); + kasan_slab_alloc(s, object); + } memcg_kmem_put_cache(s); - kasan_slab_alloc(s, object); } static inline void slab_free_hook(struct kmem_cache *s, void *x) @@ -2475,7 +2482,7 @@ static void *__slab_alloc(struct kmem_cache *s, gfp_t gfpflags, int node, static __always_inline void *slab_alloc_node(struct kmem_cache *s, gfp_t gfpflags, int node, unsigned long addr) { - void **object; + void *object; struct kmem_cache_cpu *c; struct page *page; unsigned long tid; @@ -2554,7 +2561,7 @@ redo: if (unlikely(gfpflags & __GFP_ZERO) && object) memset(object, 0, s->object_size); - slab_post_alloc_hook(s, gfpflags, object); + slab_post_alloc_hook(s, gfpflags, 1, &object); return object; } @@ -2904,6 +2911,10 @@ bool kmem_cache_alloc_bulk(struct kmem_cache *s, gfp_t flags, size_t size, struct kmem_cache_cpu *c; int i; + /* memcg and kmem_cache debug support */ + s = slab_pre_alloc_hook(s, flags); + if (unlikely(!s)) + return false; /* * Drain objects in the per cpu slab, while disabling local * IRQs, which protects against PREEMPT and interrupts @@ -2928,17 +2939,8 @@ bool kmem_cache_alloc_bulk(struct kmem_cache *s, gfp_t flags, size_t size, c = this_cpu_ptr(s->cpu_slab); continue; /* goto for-loop */ } - - /* kmem_cache debug support */ - s = slab_pre_alloc_hook(s, flags); - if (unlikely(!s)) - goto error; - c->freelist = get_freepointer(s, object); p[i] = object; - - /* kmem_cache debug support */ - slab_post_alloc_hook(s, flags, object); } c->tid = next_tid(c->tid); local_irq_enable(); @@ -2951,11 +2953,13 @@ bool kmem_cache_alloc_bulk(struct kmem_cache *s, gfp_t flags, size_t size, memset(p[j], 0, s->object_size); } + /* memcg and kmem_cache debug support */ + slab_post_alloc_hook(s, flags, size, p); return true; - error: - __kmem_cache_free_bulk(s, i, p); local_irq_enable(); + slab_post_alloc_hook(s, flags, i, p); + __kmem_cache_free_bulk(s, i, p); return false; } EXPORT_SYMBOL(kmem_cache_alloc_bulk); From 033745189b1bae3fc931beeaf48604ee7c259309 Mon Sep 17 00:00:00 2001 From: Jesper Dangaard Brouer Date: Fri, 20 Nov 2015 15:57:55 -0800 Subject: [PATCH 263/291] slub: add missing kmem cgroup support to kmem_cache_free_bulk Initial implementation missed support for kmem cgroup support in kmem_cache_free_bulk() call, add this. If CONFIG_MEMCG_KMEM is not enabled, the compiler should be smart enough to not add any asm code. Incoming bulk free objects can belong to different kmem cgroups, and object free call can happen at a later point outside memcg context. Thus, we need to keep the orig kmem_cache, to correctly verify if a memcg object match against its "root_cache" (s->memcg_params.root_cache). Signed-off-by: Jesper Dangaard Brouer Reviewed-by: Vladimir Davydov Cc: Christoph Lameter Cc: Pekka Enberg Cc: David Rientjes Cc: Joonsoo Kim Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- mm/slub.c | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/mm/slub.c b/mm/slub.c index ce1797623391..34847044dfe5 100644 --- a/mm/slub.c +++ b/mm/slub.c @@ -2887,13 +2887,17 @@ static int build_detached_freelist(struct kmem_cache *s, size_t size, /* Note that interrupts must be enabled when calling this function. */ -void kmem_cache_free_bulk(struct kmem_cache *s, size_t size, void **p) +void kmem_cache_free_bulk(struct kmem_cache *orig_s, size_t size, void **p) { if (WARN_ON(!size)) return; do { struct detached_freelist df; + struct kmem_cache *s; + + /* Support for memcg */ + s = cache_from_obj(orig_s, p[size - 1]); size = build_detached_freelist(s, size, p, &df); if (unlikely(!df.page)) From 865762a8119e74b5f0e236d2d8eaaf8be9292a06 Mon Sep 17 00:00:00 2001 From: Jesper Dangaard Brouer Date: Fri, 20 Nov 2015 15:57:58 -0800 Subject: [PATCH 264/291] slab/slub: adjust kmem_cache_alloc_bulk API Adjust kmem_cache_alloc_bulk API before we have any real users. Adjust API to return type 'int' instead of previously type 'bool'. This is done to allow future extension of the bulk alloc API. A future extension could be to allow SLUB to stop at a page boundary, when specified by a flag, and then return the number of objects. The advantage of this approach, would make it easier to make bulk alloc run without local IRQs disabled. With an approach of cmpxchg "stealing" the entire c->freelist or page->freelist. To avoid overshooting we would stop processing at a slab-page boundary. Else we always end up returning some objects at the cost of another cmpxchg. To keep compatible with future users of this API linking against an older kernel when using the new flag, we need to return the number of allocated objects with this API change. Signed-off-by: Jesper Dangaard Brouer Cc: Vladimir Davydov Acked-by: Christoph Lameter Cc: Pekka Enberg Cc: David Rientjes Cc: Joonsoo Kim Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/slab.h | 2 +- mm/slab.c | 2 +- mm/slab.h | 2 +- mm/slab_common.c | 6 +++--- mm/slob.c | 2 +- mm/slub.c | 8 ++++---- 6 files changed, 11 insertions(+), 11 deletions(-) diff --git a/include/linux/slab.h b/include/linux/slab.h index 96940772bb92..2037a861e367 100644 --- a/include/linux/slab.h +++ b/include/linux/slab.h @@ -316,7 +316,7 @@ void kmem_cache_free(struct kmem_cache *, void *); * Note that interrupts must be enabled when calling these functions. */ void kmem_cache_free_bulk(struct kmem_cache *, size_t, void **); -bool kmem_cache_alloc_bulk(struct kmem_cache *, gfp_t, size_t, void **); +int kmem_cache_alloc_bulk(struct kmem_cache *, gfp_t, size_t, void **); #ifdef CONFIG_NUMA void *__kmalloc_node(size_t size, gfp_t flags, int node) __assume_kmalloc_alignment; diff --git a/mm/slab.c b/mm/slab.c index e0819fa96559..4765c97ce690 100644 --- a/mm/slab.c +++ b/mm/slab.c @@ -3419,7 +3419,7 @@ void kmem_cache_free_bulk(struct kmem_cache *s, size_t size, void **p) } EXPORT_SYMBOL(kmem_cache_free_bulk); -bool kmem_cache_alloc_bulk(struct kmem_cache *s, gfp_t flags, size_t size, +int kmem_cache_alloc_bulk(struct kmem_cache *s, gfp_t flags, size_t size, void **p) { return __kmem_cache_alloc_bulk(s, flags, size, p); diff --git a/mm/slab.h b/mm/slab.h index 27492eb678f7..7b6087197997 100644 --- a/mm/slab.h +++ b/mm/slab.h @@ -170,7 +170,7 @@ ssize_t slabinfo_write(struct file *file, const char __user *buffer, * may be allocated or freed using these operations. */ void __kmem_cache_free_bulk(struct kmem_cache *, size_t, void **); -bool __kmem_cache_alloc_bulk(struct kmem_cache *, gfp_t, size_t, void **); +int __kmem_cache_alloc_bulk(struct kmem_cache *, gfp_t, size_t, void **); #ifdef CONFIG_MEMCG_KMEM /* diff --git a/mm/slab_common.c b/mm/slab_common.c index d88e97c10a2e..3c6a86b4ec25 100644 --- a/mm/slab_common.c +++ b/mm/slab_common.c @@ -112,7 +112,7 @@ void __kmem_cache_free_bulk(struct kmem_cache *s, size_t nr, void **p) kmem_cache_free(s, p[i]); } -bool __kmem_cache_alloc_bulk(struct kmem_cache *s, gfp_t flags, size_t nr, +int __kmem_cache_alloc_bulk(struct kmem_cache *s, gfp_t flags, size_t nr, void **p) { size_t i; @@ -121,10 +121,10 @@ bool __kmem_cache_alloc_bulk(struct kmem_cache *s, gfp_t flags, size_t nr, void *x = p[i] = kmem_cache_alloc(s, flags); if (!x) { __kmem_cache_free_bulk(s, i, p); - return false; + return 0; } } - return true; + return i; } #ifdef CONFIG_MEMCG_KMEM diff --git a/mm/slob.c b/mm/slob.c index 0d7e5df74d1f..17e8f8cc7c53 100644 --- a/mm/slob.c +++ b/mm/slob.c @@ -617,7 +617,7 @@ void kmem_cache_free_bulk(struct kmem_cache *s, size_t size, void **p) } EXPORT_SYMBOL(kmem_cache_free_bulk); -bool kmem_cache_alloc_bulk(struct kmem_cache *s, gfp_t flags, size_t size, +int kmem_cache_alloc_bulk(struct kmem_cache *s, gfp_t flags, size_t size, void **p) { return __kmem_cache_alloc_bulk(s, flags, size, p); diff --git a/mm/slub.c b/mm/slub.c index 34847044dfe5..46997517406e 100644 --- a/mm/slub.c +++ b/mm/slub.c @@ -2909,8 +2909,8 @@ void kmem_cache_free_bulk(struct kmem_cache *orig_s, size_t size, void **p) EXPORT_SYMBOL(kmem_cache_free_bulk); /* Note that interrupts must be enabled when calling this function. */ -bool kmem_cache_alloc_bulk(struct kmem_cache *s, gfp_t flags, size_t size, - void **p) +int kmem_cache_alloc_bulk(struct kmem_cache *s, gfp_t flags, size_t size, + void **p) { struct kmem_cache_cpu *c; int i; @@ -2959,12 +2959,12 @@ bool kmem_cache_alloc_bulk(struct kmem_cache *s, gfp_t flags, size_t size, /* memcg and kmem_cache debug support */ slab_post_alloc_hook(s, flags, size, p); - return true; + return i; error: local_irq_enable(); slab_post_alloc_hook(s, flags, i, p); __kmem_cache_free_bulk(s, i, p); - return false; + return 0; } EXPORT_SYMBOL(kmem_cache_alloc_bulk); From 1ec218373b8ebda821aec00bb156a9c94fad9cd4 Mon Sep 17 00:00:00 2001 From: Linus Torvalds Date: Sun, 22 Nov 2015 16:45:59 -0800 Subject: [PATCH 265/291] Linux 4.4-rc2 --- Makefile | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Makefile b/Makefile index 3a0234f50f36..2ffdf9d6f339 100644 --- a/Makefile +++ b/Makefile @@ -1,7 +1,7 @@ VERSION = 4 PATCHLEVEL = 4 SUBLEVEL = 0 -EXTRAVERSION = -rc1 +EXTRAVERSION = -rc2 NAME = Blurry Fish Butt # *DOCUMENTATION* From 0fcb04d59351f790efb8da18edefd6ab4d9bbf3b Mon Sep 17 00:00:00 2001 From: Mike Snitzer Date: Mon, 23 Nov 2015 13:44:38 -0500 Subject: [PATCH 266/291] dm thin: fix regression in advertised discard limits When establishing a thin device's discard limits we cannot rely on the underlying thin-pool device's discard capabilities (which are inherited from the thin-pool's underlying data device) given that DM thin devices must provide discard support even when the thin-pool's underlying data device doesn't support discards. Users were exposed to this thin device discard limits regression if their thin-pool's underlying data device does _not_ support discards. This regression caused all upper-layers that called the blkdev_issue_discard() interface to not be able to issue discards to thin devices (because discard_granularity was 0). This regression wasn't caught earlier because the device-mapper-test-suite's extensive 'thin-provisioning' discard tests are only ever performed against thin-pool's with data devices that support discards. Fix is to have thin_io_hints() test the pool's 'discard_enabled' feature rather than inferring whether or not a thin device's discard support should be enabled by looking at the thin-pool's discard_granularity. Fixes: 216076705 ("dm thin: disable discard support for thin devices if pool's is disabled") Reported-by: Mike Gerber Signed-off-by: Mike Snitzer Cc: stable@vger.kernel.org # 4.1+ --- drivers/md/dm-thin.c | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/drivers/md/dm-thin.c b/drivers/md/dm-thin.c index 9f0b94fad361..63903a5a5d9e 100644 --- a/drivers/md/dm-thin.c +++ b/drivers/md/dm-thin.c @@ -4250,10 +4250,9 @@ static void thin_io_hints(struct dm_target *ti, struct queue_limits *limits) { struct thin_c *tc = ti->private; struct pool *pool = tc->pool; - struct queue_limits *pool_limits = dm_get_queue_limits(pool->pool_md); - if (!pool_limits->discard_granularity) - return; /* pool's discard support is disabled */ + if (!pool->pf.discard_enabled) + return; limits->discard_granularity = pool->sectors_per_block << SECTOR_SHIFT; limits->max_discard_sectors = 2048 * 1024 * 16; /* 16G */ From 0ebf7f10d67a70e120f365018f1c5fce9ddc567d Mon Sep 17 00:00:00 2001 From: Al Viro Date: Mon, 23 Nov 2015 21:11:08 -0500 Subject: [PATCH 267/291] fix sysvfs symlinks The thing got broken back in 2002 - sysvfs does *not* have inline symlinks; even short ones have bodies stored in the first block of file. sysv_symlink() handles that correctly; unfortunately, attempting to look an existing symlink up will end up confusing them for inline symlinks, and interpret the block number containing the body as the body itself. Nobody has noticed until now, which says something about the level of testing sysvfs gets ;-/ Cc: stable@vger.kernel.org # all of them, not that anyone cared Signed-off-by: Al Viro --- fs/sysv/inode.c | 11 ++--------- 1 file changed, 2 insertions(+), 9 deletions(-) diff --git a/fs/sysv/inode.c b/fs/sysv/inode.c index 590ad9206e3f..02fa1dcc5969 100644 --- a/fs/sysv/inode.c +++ b/fs/sysv/inode.c @@ -162,15 +162,8 @@ void sysv_set_inode(struct inode *inode, dev_t rdev) inode->i_fop = &sysv_dir_operations; inode->i_mapping->a_ops = &sysv_aops; } else if (S_ISLNK(inode->i_mode)) { - if (inode->i_blocks) { - inode->i_op = &sysv_symlink_inode_operations; - inode->i_mapping->a_ops = &sysv_aops; - } else { - inode->i_op = &simple_symlink_inode_operations; - inode->i_link = (char *)SYSV_I(inode)->i_data; - nd_terminate_link(inode->i_link, inode->i_size, - sizeof(SYSV_I(inode)->i_data) - 1); - } + inode->i_op = &sysv_symlink_inode_operations; + inode->i_mapping->a_ops = &sysv_aops; } else init_special_inode(inode, inode->i_mode, rdev); } From c725bfce7968009756ed2836a8cd7ba4dc163011 Mon Sep 17 00:00:00 2001 From: Jan Kara Date: Mon, 23 Nov 2015 13:09:50 +0100 Subject: [PATCH 268/291] vfs: Make sendfile(2) killable even better Commit 296291cdd162 (mm: make sendfile(2) killable) fixed an issue where sendfile(2) was doing a lot of tiny writes into a filesystem and thus was unkillable for a long time. However sendfile(2) can be (mis)used to issue lots of writes into arbitrary file descriptor such as evenfd or similar special file descriptors which never hit the standard filesystem write path and thus are still unkillable. E.g. the following example from Dmitry burns CPU for ~16s on my test system without possibility to be killed: int r1 = eventfd(0, 0); int r2 = memfd_create("", 0); unsigned long n = 1<<30; fallocate(r2, 0, 0, n); sendfile(r1, r2, 0, n); There are actually quite a few tests for pending signals in sendfile code however we data to write is always available none of them seems to trigger. So fix the problem by adding a test for pending signal into splice_from_pipe_next() also before the loop waiting for pipe buffers to be available. This should fix all the lockup issues with sendfile of the do-ton-of-tiny-writes nature. CC: stable@vger.kernel.org Reported-by: Dmitry Vyukov Signed-off-by: Jan Kara Signed-off-by: Al Viro --- fs/splice.c | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/fs/splice.c b/fs/splice.c index 801c21cd77fe..22adbbe51e52 100644 --- a/fs/splice.c +++ b/fs/splice.c @@ -809,6 +809,13 @@ static int splice_from_pipe_feed(struct pipe_inode_info *pipe, struct splice_des */ static int splice_from_pipe_next(struct pipe_inode_info *pipe, struct splice_desc *sd) { + /* + * Check for signal early to make process killable when there are + * always buffers available + */ + if (signal_pending(current)) + return -ERESTARTSYS; + while (!pipe->nrbufs) { if (!pipe->writers) return 0; From c2489e07c0a71a56fb2c84bc0ee66cddfca7d068 Mon Sep 17 00:00:00 2001 From: Jan Kara Date: Mon, 23 Nov 2015 13:09:51 +0100 Subject: [PATCH 269/291] vfs: Avoid softlockups with sendfile(2) The following test program from Dmitry can cause softlockups or RCU stalls as it copies 1GB from tmpfs into eventfd and we don't have any scheduling point at that path in sendfile(2) implementation: int r1 = eventfd(0, 0); int r2 = memfd_create("", 0); unsigned long n = 1<<30; fallocate(r2, 0, 0, n); sendfile(r1, r2, 0, n); Add cond_resched() into __splice_from_pipe() to fix the problem. CC: Dmitry Vyukov CC: stable@vger.kernel.org Signed-off-by: Jan Kara Signed-off-by: Al Viro --- fs/splice.c | 1 + 1 file changed, 1 insertion(+) diff --git a/fs/splice.c b/fs/splice.c index 22adbbe51e52..4cf700d50b40 100644 --- a/fs/splice.c +++ b/fs/splice.c @@ -891,6 +891,7 @@ ssize_t __splice_from_pipe(struct pipe_inode_info *pipe, struct splice_desc *sd, splice_from_pipe_begin(sd); do { + cond_resched(); ret = splice_from_pipe_next(pipe, sd); if (ret > 0) ret = splice_from_pipe_feed(pipe, sd, actor); From 578270bfbd2803dc7b0b03fbc2ac119efbc73195 Mon Sep 17 00:00:00 2001 From: Ming Lei Date: Tue, 24 Nov 2015 10:35:29 +0800 Subject: [PATCH 270/291] block: fix segment split Inside blk_bio_segment_split(), previous bvec pointer(bvprvp) always points to the iterator local variable, which is obviously wrong, so fix it by pointing to the local variable of 'bvprv'. Fixes: 5014c311baa2b(block: fix bogus compiler warnings in blk-merge.c) Cc: stable@kernel.org #4.3 Reported-by: Michael Ellerman Reported-by: Mark Salter Tested-by: Laurent Dufour Tested-by: Mark Salter Signed-off-by: Ming Lei Signed-off-by: Jens Axboe --- block/blk-merge.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/block/blk-merge.c b/block/blk-merge.c index de5716d8e525..f2efe8ae75bb 100644 --- a/block/blk-merge.c +++ b/block/blk-merge.c @@ -98,7 +98,7 @@ static struct bio *blk_bio_segment_split(struct request_queue *q, seg_size += bv.bv_len; bvprv = bv; - bvprvp = &bv; + bvprvp = &bvprv; sectors += bv.bv_len >> 9; continue; } @@ -108,7 +108,7 @@ new_segment: nsegs++; bvprv = bv; - bvprvp = &bv; + bvprvp = &bvprv; seg_size = bv.bv_len; sectors += bv.bv_len >> 9; } From 02e707424c2eadbcda68cd38876c9f4434ca8e1a Mon Sep 17 00:00:00 2001 From: Ming Lei Date: Tue, 24 Nov 2015 10:35:30 +0800 Subject: [PATCH 271/291] blk-merge: fix blk_bio_segment_split Commit bdced438acd83a(block: setup bi_phys_segments after splitting) introduces function of computing bio->bi_phys_segments during bio splitting. Unfortunately both bio->bi_seg_front_size and bio->bi_seg_back_size arn't computed, so too many physical segments may be obtained for one request since both the two are used to check if one segment across two bios can be possible. This patch fixes the issue by computing the two variables in blk_bio_segment_split(). Fixes: bdced438acd83a(block: setup bi_phys_segments after splitting) Reported-by: Michael Ellerman Reported-by: Mark Salter Tested-by: Laurent Dufour Tested-by: Mark Salter Signed-off-by: Ming Lei Signed-off-by: Jens Axboe --- block/blk-merge.c | 22 +++++++++++++++++++--- 1 file changed, 19 insertions(+), 3 deletions(-) diff --git a/block/blk-merge.c b/block/blk-merge.c index f2efe8ae75bb..50793cdc5331 100644 --- a/block/blk-merge.c +++ b/block/blk-merge.c @@ -76,6 +76,9 @@ static struct bio *blk_bio_segment_split(struct request_queue *q, struct bio_vec bv, bvprv, *bvprvp = NULL; struct bvec_iter iter; unsigned seg_size = 0, nsegs = 0, sectors = 0; + unsigned front_seg_size = bio->bi_seg_front_size; + bool do_split = true; + struct bio *new = NULL; bio_for_each_segment(bv, bio, iter) { if (sectors + (bv.bv_len >> 9) > queue_max_sectors(q)) @@ -111,13 +114,26 @@ new_segment: bvprvp = &bvprv; seg_size = bv.bv_len; sectors += bv.bv_len >> 9; + + if (nsegs == 1 && seg_size > front_seg_size) + front_seg_size = seg_size; } - *segs = nsegs; - return NULL; + do_split = false; split: *segs = nsegs; - return bio_split(bio, sectors, GFP_NOIO, bs); + + if (do_split) { + new = bio_split(bio, sectors, GFP_NOIO, bs); + if (new) + bio = new; + } + + bio->bi_seg_front_size = front_seg_size; + if (seg_size > bio->bi_seg_back_size) + bio->bi_seg_back_size = seg_size; + + return do_split ? new : NULL; } void blk_queue_split(struct request_queue *q, struct bio **bio, From 12e57f59ca3344a588531f68eeede45666e8a6e0 Mon Sep 17 00:00:00 2001 From: Ming Lei Date: Tue, 24 Nov 2015 10:35:31 +0800 Subject: [PATCH 272/291] blk-merge: warn if figured out segment number is bigger than nr_phys_segments We had seen lots of reports of this kind issue, so add one warnning in blk-merge, then it can be triggered easily and avoid to depend on warning/bug from drivers. Signed-off-by: Ming Lei Signed-off-by: Jens Axboe --- block/blk-merge.c | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/block/blk-merge.c b/block/blk-merge.c index 50793cdc5331..41a55ba0d78e 100644 --- a/block/blk-merge.c +++ b/block/blk-merge.c @@ -428,6 +428,12 @@ int blk_rq_map_sg(struct request_queue *q, struct request *rq, if (sg) sg_mark_end(sg); + /* + * Something must have been wrong if the figured number of + * segment is bigger than number of req's physical segments + */ + WARN_ON(nsegs > rq->nr_phys_segments); + return nsegs; } EXPORT_SYMBOL(blk_rq_map_sg); From e6fab54423450d699a09ec2b899473a541f61971 Mon Sep 17 00:00:00 2001 From: Ard Biesheuvel Date: Tue, 10 Nov 2015 15:11:20 +0100 Subject: [PATCH 273/291] ARM/arm64: KVM: test properly for a PTE's uncachedness The open coded tests for checking whether a PTE maps a page as uncached use a flawed '(pte_val(xxx) & CONST) != CONST' pattern, which is not guaranteed to work since the type of a mapping is not a set of mutually exclusive bits For HYP mappings, the type is an index into the MAIR table (i.e, the index itself does not contain any information whatsoever about the type of the mapping), and for stage-2 mappings it is a bit field where normal memory and device types are defined as follows: #define MT_S2_NORMAL 0xf #define MT_S2_DEVICE_nGnRE 0x1 I.e., masking *and* comparing with the latter matches on the former, and we have been getting lucky merely because the S2 device mappings also have the PTE_UXN bit set, or we would misidentify memory mappings as device mappings. Since the unmap_range() code path (which contains one instance of the flawed test) is used both for HYP mappings and stage-2 mappings, and considering the difference between the two, it is non-trivial to fix this by rewriting the tests in place, as it would involve passing down the type of mapping through all the functions. However, since HYP mappings and stage-2 mappings both deal with host physical addresses, we can simply check whether the mapping is backed by memory that is managed by the host kernel, and only perform the D-cache maintenance if this is the case. Cc: stable@vger.kernel.org Signed-off-by: Ard Biesheuvel Tested-by: Pavel Fedin Reviewed-by: Christoffer Dall Signed-off-by: Christoffer Dall --- arch/arm/kvm/mmu.c | 15 +++++++-------- 1 file changed, 7 insertions(+), 8 deletions(-) diff --git a/arch/arm/kvm/mmu.c b/arch/arm/kvm/mmu.c index 6984342da13d..7dace909d5cf 100644 --- a/arch/arm/kvm/mmu.c +++ b/arch/arm/kvm/mmu.c @@ -98,6 +98,11 @@ static void kvm_flush_dcache_pud(pud_t pud) __kvm_flush_dcache_pud(pud); } +static bool kvm_is_device_pfn(unsigned long pfn) +{ + return !pfn_valid(pfn); +} + /** * stage2_dissolve_pmd() - clear and flush huge PMD entry * @kvm: pointer to kvm structure. @@ -213,7 +218,7 @@ static void unmap_ptes(struct kvm *kvm, pmd_t *pmd, kvm_tlb_flush_vmid_ipa(kvm, addr); /* No need to invalidate the cache for device mappings */ - if ((pte_val(old_pte) & PAGE_S2_DEVICE) != PAGE_S2_DEVICE) + if (!kvm_is_device_pfn(__phys_to_pfn(addr))) kvm_flush_dcache_pte(old_pte); put_page(virt_to_page(pte)); @@ -305,8 +310,7 @@ static void stage2_flush_ptes(struct kvm *kvm, pmd_t *pmd, pte = pte_offset_kernel(pmd, addr); do { - if (!pte_none(*pte) && - (pte_val(*pte) & PAGE_S2_DEVICE) != PAGE_S2_DEVICE) + if (!pte_none(*pte) && !kvm_is_device_pfn(__phys_to_pfn(addr))) kvm_flush_dcache_pte(*pte); } while (pte++, addr += PAGE_SIZE, addr != end); } @@ -1037,11 +1041,6 @@ static bool kvm_is_write_fault(struct kvm_vcpu *vcpu) return kvm_vcpu_dabt_iswrite(vcpu); } -static bool kvm_is_device_pfn(unsigned long pfn) -{ - return !pfn_valid(pfn); -} - /** * stage2_wp_ptes - write protect PMD range * @pmd: pointer to pmd entry From c0f0963464c24e034b858441205455bf2a5d93ad Mon Sep 17 00:00:00 2001 From: Marc Zyngier Date: Mon, 16 Nov 2015 10:28:17 +0000 Subject: [PATCH 274/291] arm64: KVM: Fix AArch32 to AArch64 register mapping When running a 32bit guest under a 64bit hypervisor, the ARMv8 architecture defines a mapping of the 32bit registers in the 64bit space. This includes banked registers that are being demultiplexed over the 64bit ones. On exceptions caused by an operation involving a 32bit register, the HW exposes the register number in the ESR_EL2 register. It was so far understood that SW had to distinguish between AArch32 and AArch64 accesses (based on the current AArch32 mode and register number). It turns out that I misinterpreted the ARM ARM, and the clue is in D1.20.1: "For some exceptions, the exception syndrome given in the ESR_ELx identifies one or more register numbers from the issued instruction that generated the exception. Where the exception is taken from an Exception level using AArch32 these register numbers give the AArch64 view of the register." Which means that the HW is already giving us the translated version, and that we shouldn't try to interpret it at all (for example, doing an MMIO operation from the IRQ mode using the LR register leads to very unexpected behaviours). The fix is thus not to perform a call to vcpu_reg32() at all from vcpu_reg(), and use whatever register number is supplied directly. The only case we need to find out about the mapping is when we actively generate a register access, which only occurs when injecting a fault in a guest. Cc: stable@vger.kernel.org Reviewed-by: Robin Murphy Signed-off-by: Marc Zyngier Signed-off-by: Christoffer Dall --- arch/arm64/include/asm/kvm_emulate.h | 8 +++++--- arch/arm64/kvm/inject_fault.c | 2 +- 2 files changed, 6 insertions(+), 4 deletions(-) diff --git a/arch/arm64/include/asm/kvm_emulate.h b/arch/arm64/include/asm/kvm_emulate.h index 17e92f05b1fe..3ca894ecf699 100644 --- a/arch/arm64/include/asm/kvm_emulate.h +++ b/arch/arm64/include/asm/kvm_emulate.h @@ -99,11 +99,13 @@ static inline void vcpu_set_thumb(struct kvm_vcpu *vcpu) *vcpu_cpsr(vcpu) |= COMPAT_PSR_T_BIT; } +/* + * vcpu_reg should always be passed a register number coming from a + * read of ESR_EL2. Otherwise, it may give the wrong result on AArch32 + * with banked registers. + */ static inline unsigned long *vcpu_reg(const struct kvm_vcpu *vcpu, u8 reg_num) { - if (vcpu_mode_is_32bit(vcpu)) - return vcpu_reg32(vcpu, reg_num); - return (unsigned long *)&vcpu_gp_regs(vcpu)->regs.regs[reg_num]; } diff --git a/arch/arm64/kvm/inject_fault.c b/arch/arm64/kvm/inject_fault.c index 85c57158dcd9..648112e90ed5 100644 --- a/arch/arm64/kvm/inject_fault.c +++ b/arch/arm64/kvm/inject_fault.c @@ -48,7 +48,7 @@ static void prepare_fault32(struct kvm_vcpu *vcpu, u32 mode, u32 vect_offset) /* Note: These now point to the banked copies */ *vcpu_spsr(vcpu) = new_spsr_value; - *vcpu_reg(vcpu, 14) = *vcpu_pc(vcpu) + return_offset; + *vcpu_reg32(vcpu, 14) = *vcpu_pc(vcpu) + return_offset; /* Branch to exception vector */ if (sctlr & (1 << 13)) From 498cd5c32be6e32bc0f8efcad48ab094bb2bfdf3 Mon Sep 17 00:00:00 2001 From: Marc Zyngier Date: Mon, 16 Nov 2015 10:28:18 +0000 Subject: [PATCH 275/291] arm64: KVM: Add workaround for Cortex-A57 erratum 834220 Cortex-A57 parts up to r1p2 can misreport Stage 2 translation faults when a Stage 1 permission fault or device alignment fault should have been reported. This patch implements the workaround (which is to validate that the Stage-1 translation actually succeeds) by using code patching. Cc: stable@vger.kernel.org Reviewed-by: Will Deacon Signed-off-by: Marc Zyngier Signed-off-by: Christoffer Dall --- arch/arm64/Kconfig | 21 +++++++++++++++++++++ arch/arm64/include/asm/cpufeature.h | 3 ++- arch/arm64/kernel/cpu_errata.c | 9 +++++++++ arch/arm64/kvm/hyp.S | 6 ++++++ 4 files changed, 38 insertions(+), 1 deletion(-) diff --git a/arch/arm64/Kconfig b/arch/arm64/Kconfig index 9ac16a482ff1..e55848c1edf4 100644 --- a/arch/arm64/Kconfig +++ b/arch/arm64/Kconfig @@ -316,6 +316,27 @@ config ARM64_ERRATUM_832075 If unsure, say Y. +config ARM64_ERRATUM_834220 + bool "Cortex-A57: 834220: Stage 2 translation fault might be incorrectly reported in presence of a Stage 1 fault" + depends on KVM + default y + help + This option adds an alternative code sequence to work around ARM + erratum 834220 on Cortex-A57 parts up to r1p2. + + Affected Cortex-A57 parts might report a Stage 2 translation + fault as the result of a Stage 1 fault for load crossing a + page boundary when there is a permission or device memory + alignment fault at Stage 1 and a translation fault at Stage 2. + + The workaround is to verify that the Stage 1 translation + doesn't generate a fault before handling the Stage 2 fault. + Please note that this does not necessarily enable the workaround, + as it depends on the alternative framework, which will only patch + the kernel if an affected CPU is detected. + + If unsure, say Y. + config ARM64_ERRATUM_845719 bool "Cortex-A53: 845719: a load might read incorrect data" depends on COMPAT diff --git a/arch/arm64/include/asm/cpufeature.h b/arch/arm64/include/asm/cpufeature.h index 11d5bb0fdd54..52722ee73dba 100644 --- a/arch/arm64/include/asm/cpufeature.h +++ b/arch/arm64/include/asm/cpufeature.h @@ -29,8 +29,9 @@ #define ARM64_HAS_PAN 4 #define ARM64_HAS_LSE_ATOMICS 5 #define ARM64_WORKAROUND_CAVIUM_23154 6 +#define ARM64_WORKAROUND_834220 7 -#define ARM64_NCAPS 7 +#define ARM64_NCAPS 8 #ifndef __ASSEMBLY__ diff --git a/arch/arm64/kernel/cpu_errata.c b/arch/arm64/kernel/cpu_errata.c index 24926f2504f7..feb6b4efa641 100644 --- a/arch/arm64/kernel/cpu_errata.c +++ b/arch/arm64/kernel/cpu_errata.c @@ -75,6 +75,15 @@ const struct arm64_cpu_capabilities arm64_errata[] = { (1 << MIDR_VARIANT_SHIFT) | 2), }, #endif +#ifdef CONFIG_ARM64_ERRATUM_834220 + { + /* Cortex-A57 r0p0 - r1p2 */ + .desc = "ARM erratum 834220", + .capability = ARM64_WORKAROUND_834220, + MIDR_RANGE(MIDR_CORTEX_A57, 0x00, + (1 << MIDR_VARIANT_SHIFT) | 2), + }, +#endif #ifdef CONFIG_ARM64_ERRATUM_845719 { /* Cortex-A53 r0p[01234] */ diff --git a/arch/arm64/kvm/hyp.S b/arch/arm64/kvm/hyp.S index 1599701ef044..ff2e038f0007 100644 --- a/arch/arm64/kvm/hyp.S +++ b/arch/arm64/kvm/hyp.S @@ -1015,9 +1015,15 @@ el1_trap: b.ne 1f // Not an abort we care about /* This is an abort. Check for permission fault */ +alternative_if_not ARM64_WORKAROUND_834220 and x2, x1, #ESR_ELx_FSC_TYPE cmp x2, #FSC_PERM b.ne 1f // Not a permission fault +alternative_else + nop // Use the permission fault path to + nop // check for a valid S1 translation, + nop // regardless of the ESR value. +alternative_endif /* * Check for Stage-1 page table walk, which is guaranteed From 7e16aa81f9f6a7cfe2287b788a7d62abc2880185 Mon Sep 17 00:00:00 2001 From: Christoffer Dall Date: Tue, 24 Nov 2015 10:31:07 +0100 Subject: [PATCH 276/291] KVM: arm/arm64: Fix preemptible timer active state crazyness We were setting the physical active state on the GIC distributor in a preemptible section, which could cause us to set the active state on different physical CPU from the one we were actually going to run on, hacoc ensues. Since we are no longer descheduling/scheduling soft timers in the flush/sync timer functions, simply moving the timer flush into a non-preemptible section. Reviewed-by: Marc Zyngier Signed-off-by: Christoffer Dall --- arch/arm/kvm/arm.c | 7 +------ 1 file changed, 1 insertion(+), 6 deletions(-) diff --git a/arch/arm/kvm/arm.c b/arch/arm/kvm/arm.c index eab83b2435b8..e06fd299de08 100644 --- a/arch/arm/kvm/arm.c +++ b/arch/arm/kvm/arm.c @@ -563,18 +563,13 @@ int kvm_arch_vcpu_ioctl_run(struct kvm_vcpu *vcpu, struct kvm_run *run) if (vcpu->arch.power_off || vcpu->arch.pause) vcpu_sleep(vcpu); - /* - * Disarming the background timer must be done in a - * preemptible context, as this call may sleep. - */ - kvm_timer_flush_hwstate(vcpu); - /* * Preparing the interrupts to be injected also * involves poking the GIC, which must be done in a * non-preemptible context. */ preempt_disable(); + kvm_timer_flush_hwstate(vcpu); kvm_vgic_flush_hwstate(vcpu); local_irq_disable(); From 0e3dfda91d9fe8e2c4d0b5d21434b173a241eeaf Mon Sep 17 00:00:00 2001 From: Christoffer Dall Date: Tue, 24 Nov 2015 16:23:05 +0100 Subject: [PATCH 277/291] KVM: arm/arm64: arch_timer: Preserve physical dist. active state on LR.active We were incorrectly removing the active state from the physical distributor on the timer interrupt when the timer output level was deasserted. We shouldn't be doing this without considering the virtual interrupt's active state, because the architecture requires that when an LR has the HW bit set and the pending or active bits set, then the physical interrupt must also have the corresponding bits set. This addresses an issue where we have been observing an inconsistency between the LR state and the physical distributor state where the LR state was active and the physical distributor was not active, which shouldn't happen. Reviewed-by: Marc Zyngier Signed-off-by: Christoffer Dall --- include/kvm/arm_vgic.h | 2 +- virt/kvm/arm/arch_timer.c | 28 +++++++++++++++++----------- virt/kvm/arm/vgic.c | 34 ++++++++++++++++++++++------------ 3 files changed, 40 insertions(+), 24 deletions(-) diff --git a/include/kvm/arm_vgic.h b/include/kvm/arm_vgic.h index 9c747cb14ad8..d2f41477f8ae 100644 --- a/include/kvm/arm_vgic.h +++ b/include/kvm/arm_vgic.h @@ -342,10 +342,10 @@ int kvm_vgic_inject_mapped_irq(struct kvm *kvm, int cpuid, struct irq_phys_map *map, bool level); void vgic_v3_dispatch_sgi(struct kvm_vcpu *vcpu, u64 reg); int kvm_vgic_vcpu_pending_irq(struct kvm_vcpu *vcpu); -int kvm_vgic_vcpu_active_irq(struct kvm_vcpu *vcpu); struct irq_phys_map *kvm_vgic_map_phys_irq(struct kvm_vcpu *vcpu, int virt_irq, int irq); int kvm_vgic_unmap_phys_irq(struct kvm_vcpu *vcpu, struct irq_phys_map *map); +bool kvm_vgic_map_is_active(struct kvm_vcpu *vcpu, struct irq_phys_map *map); #define irqchip_in_kernel(k) (!!((k)->arch.vgic.in_kernel)) #define vgic_initialized(k) (!!((k)->arch.vgic.nr_cpus)) diff --git a/virt/kvm/arm/arch_timer.c b/virt/kvm/arm/arch_timer.c index 21a0ab2d8919..69bca185c471 100644 --- a/virt/kvm/arm/arch_timer.c +++ b/virt/kvm/arm/arch_timer.c @@ -221,17 +221,23 @@ void kvm_timer_flush_hwstate(struct kvm_vcpu *vcpu) kvm_timer_update_state(vcpu); /* - * If we enter the guest with the virtual input level to the VGIC - * asserted, then we have already told the VGIC what we need to, and - * we don't need to exit from the guest until the guest deactivates - * the already injected interrupt, so therefore we should set the - * hardware active state to prevent unnecessary exits from the guest. - * - * Conversely, if the virtual input level is deasserted, then always - * clear the hardware active state to ensure that hardware interrupts - * from the timer triggers a guest exit. - */ - if (timer->irq.level) + * If we enter the guest with the virtual input level to the VGIC + * asserted, then we have already told the VGIC what we need to, and + * we don't need to exit from the guest until the guest deactivates + * the already injected interrupt, so therefore we should set the + * hardware active state to prevent unnecessary exits from the guest. + * + * Also, if we enter the guest with the virtual timer interrupt active, + * then it must be active on the physical distributor, because we set + * the HW bit and the guest must be able to deactivate the virtual and + * physical interrupt at the same time. + * + * Conversely, if the virtual input level is deasserted and the virtual + * interrupt is not active, then always clear the hardware active state + * to ensure that hardware interrupts from the timer triggers a guest + * exit. + */ + if (timer->irq.level || kvm_vgic_map_is_active(vcpu, timer->map)) phys_active = true; else phys_active = false; diff --git a/virt/kvm/arm/vgic.c b/virt/kvm/arm/vgic.c index 533538385d5d..97e2c088e1e9 100644 --- a/virt/kvm/arm/vgic.c +++ b/virt/kvm/arm/vgic.c @@ -1096,6 +1096,27 @@ static void vgic_retire_lr(int lr_nr, struct kvm_vcpu *vcpu) vgic_set_lr(vcpu, lr_nr, vlr); } +static bool dist_active_irq(struct kvm_vcpu *vcpu) +{ + struct vgic_dist *dist = &vcpu->kvm->arch.vgic; + + return test_bit(vcpu->vcpu_id, dist->irq_active_on_cpu); +} + +bool kvm_vgic_map_is_active(struct kvm_vcpu *vcpu, struct irq_phys_map *map) +{ + int i; + + for (i = 0; i < vcpu->arch.vgic_cpu.nr_lr; i++) { + struct vgic_lr vlr = vgic_get_lr(vcpu, i); + + if (vlr.irq == map->virt_irq && vlr.state & LR_STATE_ACTIVE) + return true; + } + + return dist_active_irq(vcpu); +} + /* * An interrupt may have been disabled after being made pending on the * CPU interface (the classic case is a timer running while we're @@ -1248,7 +1269,7 @@ static void __kvm_vgic_flush_hwstate(struct kvm_vcpu *vcpu) * may have been serviced from another vcpu. In all cases, * move along. */ - if (!kvm_vgic_vcpu_pending_irq(vcpu) && !kvm_vgic_vcpu_active_irq(vcpu)) + if (!kvm_vgic_vcpu_pending_irq(vcpu) && !dist_active_irq(vcpu)) goto epilog; /* SGIs */ @@ -1479,17 +1500,6 @@ int kvm_vgic_vcpu_pending_irq(struct kvm_vcpu *vcpu) return test_bit(vcpu->vcpu_id, dist->irq_pending_on_cpu); } -int kvm_vgic_vcpu_active_irq(struct kvm_vcpu *vcpu) -{ - struct vgic_dist *dist = &vcpu->kvm->arch.vgic; - - if (!irqchip_in_kernel(vcpu->kvm)) - return 0; - - return test_bit(vcpu->vcpu_id, dist->irq_active_on_cpu); -} - - void vgic_kick_vcpus(struct kvm *kvm) { struct kvm_vcpu *vcpu; From 9f958c11b780af74fc311b96d6124740aa21cc68 Mon Sep 17 00:00:00 2001 From: Christoffer Dall Date: Tue, 24 Nov 2015 16:34:31 +0100 Subject: [PATCH 278/291] KVM: arm/arm64: vgic: Trust the LR state for HW IRQs We were probing the physial distributor state for the active state of a HW virtual IRQ, because we had seen evidence that the LR state was not cleared when the guest deactivated a virtual interrupted. However, this issue turned out to be a software bug in the GIC, which was solved by: 84aab5e68c2a5e1e18d81ae8308c3ce25d501b29 (KVM: arm/arm64: arch_timer: Preserve physical dist. active state on LR.active, 2015-11-24) Therefore, get rid of the complexities and just look at the LR. Reviewed-by: Marc Zyngier Signed-off-by: Christoffer Dall --- virt/kvm/arm/vgic.c | 16 ++-------------- 1 file changed, 2 insertions(+), 14 deletions(-) diff --git a/virt/kvm/arm/vgic.c b/virt/kvm/arm/vgic.c index 97e2c088e1e9..65461f821a75 100644 --- a/virt/kvm/arm/vgic.c +++ b/virt/kvm/arm/vgic.c @@ -1417,25 +1417,13 @@ static bool vgic_process_maintenance(struct kvm_vcpu *vcpu) static bool vgic_sync_hwirq(struct kvm_vcpu *vcpu, int lr, struct vgic_lr vlr) { struct vgic_dist *dist = &vcpu->kvm->arch.vgic; - struct irq_phys_map *map; - bool phys_active; bool level_pending; - int ret; if (!(vlr.state & LR_HW)) return false; - map = vgic_irq_map_search(vcpu, vlr.irq); - BUG_ON(!map); - - ret = irq_get_irqchip_state(map->irq, - IRQCHIP_STATE_ACTIVE, - &phys_active); - - WARN_ON(ret); - - if (phys_active) - return 0; + if (vlr.state & LR_STATE_ACTIVE) + return false; spin_lock(&dist->lock); level_pending = process_queued_irq(vcpu, lr, vlr); From 1d7a4e313abbc7200982e5a68121483a3aa32295 Mon Sep 17 00:00:00 2001 From: Mark Rutland Date: Mon, 16 Nov 2015 13:55:51 +0000 Subject: [PATCH 279/291] arm64: kvm: avoid %p in __kvm_hyp_panic Currently __kvm_hyp_panic uses %p for values which are not pointers, such as the ESR value. This can confusingly lead to "(null)" being printed for the value. Use %x instead, and only use %p for host pointers. Signed-off-by: Mark Rutland Acked-by: Marc Zyngier Cc: Christoffer Dall Signed-off-by: Christoffer Dall --- arch/arm64/kvm/hyp.S | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/arm64/kvm/hyp.S b/arch/arm64/kvm/hyp.S index ff2e038f0007..ce70817a9223 100644 --- a/arch/arm64/kvm/hyp.S +++ b/arch/arm64/kvm/hyp.S @@ -914,7 +914,7 @@ __kvm_hyp_panic: ENDPROC(__kvm_hyp_panic) __hyp_panic_str: - .ascii "HYP panic:\nPS:%08x PC:%p ESR:%p\nFAR:%p HPFAR:%p PAR:%p\nVCPU:%p\n\0" + .ascii "HYP panic:\nPS:%08x PC:%016x ESR:%08x\nFAR:%016x HPFAR:%016x PAR:%016x\nVCPU:%p\n\0" .align 2 From fbb4574ce9a37e15a9872860bf202f2be5bdf6c4 Mon Sep 17 00:00:00 2001 From: Mark Rutland Date: Mon, 16 Nov 2015 13:58:29 +0000 Subject: [PATCH 280/291] arm64: kvm: report original PAR_EL1 upon panic If we call __kvm_hyp_panic while a guest context is active, we call __restore_sysregs before acquiring the system register values for the panic, in the process throwing away the PAR_EL1 value at the point of the panic. This patch modifies __kvm_hyp_panic to stash the PAR_EL1 value prior to restoring host register values, enabling us to report the original values at the point of the panic. Acked-by: Marc Zyngier Signed-off-by: Mark Rutland Signed-off-by: Christoffer Dall --- arch/arm64/kvm/hyp.S | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/arch/arm64/kvm/hyp.S b/arch/arm64/kvm/hyp.S index ce70817a9223..86c289832272 100644 --- a/arch/arm64/kvm/hyp.S +++ b/arch/arm64/kvm/hyp.S @@ -864,6 +864,10 @@ ENTRY(__kvm_flush_vm_context) ENDPROC(__kvm_flush_vm_context) __kvm_hyp_panic: + // Stash PAR_EL1 before corrupting it in __restore_sysregs + mrs x0, par_el1 + push x0, xzr + // Guess the context by looking at VTTBR: // If zero, then we're already a host. // Otherwise restore a minimal host context before panicing. @@ -898,7 +902,7 @@ __kvm_hyp_panic: mrs x3, esr_el2 mrs x4, far_el2 mrs x5, hpfar_el2 - mrs x6, par_el1 + pop x6, xzr // active context PAR_EL1 mrs x7, tpidr_el2 mov lr, #(PSR_F_BIT | PSR_I_BIT | PSR_A_BIT | PSR_D_BIT |\ From 81b1a832d79749058863cffe2c0ed4ef40f6e6ec Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Tue, 24 Nov 2015 11:39:54 -0800 Subject: [PATCH 281/291] pidns: fix NULL dereference in __task_pid_nr_ns() I got a crash during a "perf top" session that was caused by a race in __task_pid_nr_ns() : pid_nr_ns() was inlined, but apparently compiler chose to read task->pids[type].pid twice, and the pid->level dereference crashed because we got a NULL pointer at the second read : if (pid && ns->level <= pid->level) { // CRASH Just use RCU API properly to solve this race, and not worry about "perf top" crashing hosts :( get_task_pid() can benefit from same fix. Signed-off-by: Eric Dumazet Signed-off-by: Linus Torvalds --- kernel/pid.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/kernel/pid.c b/kernel/pid.c index ca368793808e..78b3d9f80d44 100644 --- a/kernel/pid.c +++ b/kernel/pid.c @@ -467,7 +467,7 @@ struct pid *get_task_pid(struct task_struct *task, enum pid_type type) rcu_read_lock(); if (type != PIDTYPE_PID) task = task->group_leader; - pid = get_pid(task->pids[type].pid); + pid = get_pid(rcu_dereference(task->pids[type].pid)); rcu_read_unlock(); return pid; } @@ -528,7 +528,7 @@ pid_t __task_pid_nr_ns(struct task_struct *task, enum pid_type type, if (likely(pid_alive(task))) { if (type != PIDTYPE_PID) task = task->group_leader; - nr = pid_nr_ns(task->pids[type].pid, ns); + nr = pid_nr_ns(rcu_dereference(task->pids[type].pid), ns); } rcu_read_unlock(); From c5c9f25b98a568451d665afe4aeefe17bf9f2995 Mon Sep 17 00:00:00 2001 From: Nishanth Aravamudan Date: Tue, 24 Nov 2015 09:55:05 -0700 Subject: [PATCH 282/291] NVMe: default to 4k device page size We received a bug report recently when DDW (64-bit direct DMA on Power) is not enabled for NVMe devices. In that case, we fall back to 32-bit DMA via the IOMMU, which is always done via 4K TCEs (Translation Control Entries). The NVMe device driver, though, assumes that the DMA alignment for the PRP entries will match the device's page size, and that the DMA aligment matches the kernel's page aligment. On Power, the the IOMMU page size, as mentioned above, can be 4K, while the device can have a page size of 8K, while the kernel has a page size of 64K. This eventually trips the BUG_ON in nvme_setup_prps(), as we have a 'dma_len' that is a multiple of 4K but not 8K (e.g., 0xF000). In this particular case of page sizes, we clearly want to use the IOMMU's page size in the driver. And generally, the NVMe driver in this function should be using the IOMMU's page size for the default device page size, rather than the kernel's page size. There is not currently an API to obtain the IOMMU's page size across all architectures and in the interest of a stop-gap fix to this functional issue, default the NVMe device page size to 4K, with the intent of adding such an API and implementation across all architectures in the next merge window. With the functionally equivalent v3 of this patch, our hardware test exerciser survives when using 32-bit DMA; without the patch, the kernel will BUG within a few minutes. Signed-off-by: Nishanth Aravamudan Signed-off-by: Jens Axboe --- drivers/nvme/host/pci.c | 15 ++++++--------- 1 file changed, 6 insertions(+), 9 deletions(-) diff --git a/drivers/nvme/host/pci.c b/drivers/nvme/host/pci.c index 930042fa2d69..d9d6229e9f3f 100644 --- a/drivers/nvme/host/pci.c +++ b/drivers/nvme/host/pci.c @@ -1728,9 +1728,13 @@ static int nvme_configure_admin_queue(struct nvme_dev *dev) u32 aqa; u64 cap = lo_hi_readq(&dev->bar->cap); struct nvme_queue *nvmeq; - unsigned page_shift = PAGE_SHIFT; + /* + * default to a 4K page size, with the intention to update this + * path in the future to accomodate architectures with differing + * kernel and IO page sizes. + */ + unsigned page_shift = 12; unsigned dev_page_min = NVME_CAP_MPSMIN(cap) + 12; - unsigned dev_page_max = NVME_CAP_MPSMAX(cap) + 12; if (page_shift < dev_page_min) { dev_err(dev->dev, @@ -1739,13 +1743,6 @@ static int nvme_configure_admin_queue(struct nvme_dev *dev) 1 << page_shift); return -ENODEV; } - if (page_shift > dev_page_max) { - dev_info(dev->dev, - "Device maximum page size (%u) smaller than " - "host (%u); enabling work-around\n", - 1 << dev_page_max, 1 << page_shift); - page_shift = dev_page_max; - } dev->subsystem = readl(&dev->bar->vs) >= NVME_VS(1, 1) ? NVME_CAP_NSSRC(cap) : 0; From bf508e910b02a6107a5aa054e03c6fc8a65dae1e Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Fri, 16 Oct 2015 07:58:31 +0200 Subject: [PATCH 283/291] nvme: add missing unmaps in nvme_queue_rq When we fail various metadata related operations in nvme_queue_rq we need to unmap the data SGL. Cc: stable@vger.kernel.org Signed-off-by: Christoph Hellwig Signed-off-by: Keith Busch Signed-off-by: Jens Axboe --- drivers/nvme/host/pci.c | 15 ++++++++++++--- 1 file changed, 12 insertions(+), 3 deletions(-) diff --git a/drivers/nvme/host/pci.c b/drivers/nvme/host/pci.c index d9d6229e9f3f..f3b53af789ef 100644 --- a/drivers/nvme/host/pci.c +++ b/drivers/nvme/host/pci.c @@ -896,19 +896,28 @@ static int nvme_queue_rq(struct blk_mq_hw_ctx *hctx, goto retry_cmd; } if (blk_integrity_rq(req)) { - if (blk_rq_count_integrity_sg(req->q, req->bio) != 1) + if (blk_rq_count_integrity_sg(req->q, req->bio) != 1) { + dma_unmap_sg(dev->dev, iod->sg, iod->nents, + dma_dir); goto error_cmd; + } sg_init_table(iod->meta_sg, 1); if (blk_rq_map_integrity_sg( - req->q, req->bio, iod->meta_sg) != 1) + req->q, req->bio, iod->meta_sg) != 1) { + dma_unmap_sg(dev->dev, iod->sg, iod->nents, + dma_dir); goto error_cmd; + } if (rq_data_dir(req)) nvme_dif_remap(req, nvme_dif_prep); - if (!dma_map_sg(nvmeq->q_dmadev, iod->meta_sg, 1, dma_dir)) + if (!dma_map_sg(nvmeq->q_dmadev, iod->meta_sg, 1, dma_dir)) { + dma_unmap_sg(dev->dev, iod->sg, iod->nents, + dma_dir); goto error_cmd; + } } } From 55ce0da1da287822e5ffb5fcd6e357180d5ba4cd Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Fri, 30 Oct 2015 20:47:04 +0800 Subject: [PATCH 284/291] block: fix blk_abort_request for blk-mq drivers We only added the request to the request list for the !blk-mq case, so we should only delete it in that case as well. Signed-off-by: Christoph Hellwig Signed-off-by: Jens Axboe --- block/blk-timeout.c | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) diff --git a/block/blk-timeout.c b/block/blk-timeout.c index 246dfb16c3d9..aa40aa93381b 100644 --- a/block/blk-timeout.c +++ b/block/blk-timeout.c @@ -158,11 +158,13 @@ void blk_abort_request(struct request *req) { if (blk_mark_rq_complete(req)) return; - blk_delete_timer(req); - if (req->q->mq_ops) + + if (req->q->mq_ops) { blk_mq_rq_timed_out(req, false); - else + } else { + blk_delete_timer(req); blk_rq_timed_out(req); + } } EXPORT_SYMBOL_GPL(blk_abort_request); From b2467e744f89fcb2e723143c2b78bcbaf391828a Mon Sep 17 00:00:00 2001 From: Haozhong Zhang Date: Wed, 25 Nov 2015 17:21:39 +0800 Subject: [PATCH 285/291] KVM: nVMX: remove incorrect vpid check in nested invvpid emulation This patch removes the vpid check when emulating nested invvpid instruction of type all-contexts invalidation. The existing code is incorrect because: (1) According to Intel SDM Vol 3, Section "INVVPID - Invalidate Translations Based on VPID", invvpid instruction does not check vpid in the invvpid descriptor when its type is all-contexts invalidation. (2) According to the same document, invvpid of type all-contexts invalidation does not require there is an active VMCS, so/and get_vmcs12() in the existing code may result in a NULL-pointer dereference. In practice, it can crash both KVM itself and L1 hypervisors that use invvpid (e.g. Xen). Signed-off-by: Haozhong Zhang Signed-off-by: Paolo Bonzini --- arch/x86/kvm/vmx.c | 5 ----- 1 file changed, 5 deletions(-) diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c index 87acc5221740..af823a388c19 100644 --- a/arch/x86/kvm/vmx.c +++ b/arch/x86/kvm/vmx.c @@ -7394,11 +7394,6 @@ static int handle_invvpid(struct kvm_vcpu *vcpu) switch (type) { case VMX_VPID_EXTENT_ALL_CONTEXT: - if (get_vmcs12(vcpu)->virtual_processor_id == 0) { - nested_vmx_failValid(vcpu, - VMXERR_INVALID_OPERAND_TO_INVEPT_INVVPID); - return 1; - } __vmx_flush_tlb(vcpu, to_vmx(vcpu)->nested.vpid02); nested_vmx_succeed(vcpu); break; From dcd8376c369fa8fde8269e721b14f50475dd397b Mon Sep 17 00:00:00 2001 From: Jens Axboe Date: Wed, 25 Nov 2015 10:12:54 -0700 Subject: [PATCH 286/291] Revert "blk-flush: Queue through IO scheduler when flush not required" This reverts commit 1b2ff19e6a957b1ef0f365ad331b608af80e932e. Jan writes: -- Thanks for report! After some investigation I found out we allocate elevator specific data in __get_request() only for non-flush requests. And this is actually required since the flush machinery uses the space in struct request for something else. Doh. So my patch is just wrong and not easy to fix since at the time __get_request() is called we are not sure whether the flush machinery will be used in the end. Jens, please revert 1b2ff19e6a957b1ef0f365ad331b608af80e932e. Thanks! I'm somewhat surprised that you can reliably hit the race where flushing gets disabled for the device just while the request is in flight. But I guess during boot it makes some sense. -- So let's just revert it, we can fix the queue run manually after the fact. This race is rare enough that it didn't trigger in testing, it requires the specific disable-while-in-flight scenario to trigger. --- block/blk-flush.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/block/blk-flush.c b/block/blk-flush.c index c81d56ec308f..9c423e53324a 100644 --- a/block/blk-flush.c +++ b/block/blk-flush.c @@ -422,7 +422,7 @@ void blk_insert_flush(struct request *rq) if (q->mq_ops) { blk_mq_insert_request(rq, false, false, true); } else - q->elevator->type->ops.elevator_add_req_fn(q, rq); + list_add_tail(&rq->queuelist, &q->queue_head); return; } From 20ef10c1b3068f105004e247d8e7dd8120fa4b9a Mon Sep 17 00:00:00 2001 From: Josh Poimboeuf Date: Thu, 26 Nov 2015 09:42:08 +1030 Subject: [PATCH 287/291] module: Use the same logic for setting and unsetting RO/NX When setting a module's RO and NX permissions, set_section_ro_nx() is used, but when clearing them, unset_module_{init,core}_ro_nx() are used. The unset functions don't have the same checks the set function has for partial page protections. It's probably harmless, but it's still confusingly asymmetrical. Instead, use the same logic to do both. Also add some new set_module_{init,core}_ro_nx() helper functions for more symmetry with the unset functions. Signed-off-by: Josh Poimboeuf Signed-off-by: Rusty Russell Signed-off-by: Jiri Kosina --- kernel/module.c | 57 ++++++++++++++++++++++++++----------------------- 1 file changed, 30 insertions(+), 27 deletions(-) diff --git a/kernel/module.c b/kernel/module.c index 8f051a106676..14b224967e7b 100644 --- a/kernel/module.c +++ b/kernel/module.c @@ -1886,7 +1886,9 @@ void set_page_attributes(void *start, void *end, int (*set)(unsigned long start, static void set_section_ro_nx(void *base, unsigned long text_size, unsigned long ro_size, - unsigned long total_size) + unsigned long total_size, + int (*set_ro)(unsigned long start, int num_pages), + int (*set_nx)(unsigned long start, int num_pages)) { /* begin and end PFNs of the current subsection */ unsigned long begin_pfn; @@ -1898,7 +1900,7 @@ static void set_section_ro_nx(void *base, * - Do not protect last partial page. */ if (ro_size > 0) - set_page_attributes(base, base + ro_size, set_memory_ro); + set_page_attributes(base, base + ro_size, set_ro); /* * Set NX permissions for module data: @@ -1909,28 +1911,36 @@ static void set_section_ro_nx(void *base, begin_pfn = PFN_UP((unsigned long)base + text_size); end_pfn = PFN_UP((unsigned long)base + total_size); if (end_pfn > begin_pfn) - set_memory_nx(begin_pfn << PAGE_SHIFT, end_pfn - begin_pfn); + set_nx(begin_pfn << PAGE_SHIFT, end_pfn - begin_pfn); } } +static void set_module_core_ro_nx(struct module *mod) +{ + set_section_ro_nx(mod->module_core, mod->core_text_size, + mod->core_ro_size, mod->core_size, + set_memory_ro, set_memory_nx); +} + static void unset_module_core_ro_nx(struct module *mod) { - set_page_attributes(mod->module_core + mod->core_text_size, - mod->module_core + mod->core_size, - set_memory_x); - set_page_attributes(mod->module_core, - mod->module_core + mod->core_ro_size, - set_memory_rw); + set_section_ro_nx(mod->module_core, mod->core_text_size, + mod->core_ro_size, mod->core_size, + set_memory_rw, set_memory_x); +} + +static void set_module_init_ro_nx(struct module *mod) +{ + set_section_ro_nx(mod->module_init, mod->init_text_size, + mod->init_ro_size, mod->init_size, + set_memory_ro, set_memory_nx); } static void unset_module_init_ro_nx(struct module *mod) { - set_page_attributes(mod->module_init + mod->init_text_size, - mod->module_init + mod->init_size, - set_memory_x); - set_page_attributes(mod->module_init, - mod->module_init + mod->init_ro_size, - set_memory_rw); + set_section_ro_nx(mod->module_init, mod->init_text_size, + mod->init_ro_size, mod->init_size, + set_memory_rw, set_memory_x); } /* Iterate through all modules and set each module's text as RW */ @@ -1979,7 +1989,8 @@ void set_all_modules_text_ro(void) mutex_unlock(&module_mutex); } #else -static inline void set_section_ro_nx(void *base, unsigned long text_size, unsigned long ro_size, unsigned long total_size) { } +static void set_module_core_ro_nx(struct module *mod) { } +static void set_module_init_ro_nx(struct module *mod) { } static void unset_module_core_ro_nx(struct module *mod) { } static void unset_module_init_ro_nx(struct module *mod) { } #endif @@ -3373,17 +3384,9 @@ static int complete_formation(struct module *mod, struct load_info *info) /* This relies on module_mutex for list integrity. */ module_bug_finalize(info->hdr, info->sechdrs, mod); - /* Set RO and NX regions for core */ - set_section_ro_nx(mod->module_core, - mod->core_text_size, - mod->core_ro_size, - mod->core_size); - - /* Set RO and NX regions for init */ - set_section_ro_nx(mod->module_init, - mod->init_text_size, - mod->init_ro_size, - mod->init_size); + /* Set RO and NX regions */ + set_module_init_ro_nx(mod); + set_module_core_ro_nx(mod); /* Mark state as coming so strong_try_module_get() ignores us, * but kallsyms etc. can see us. */ From c65abf358f211c3f88c8ed714dff25775ab49fc1 Mon Sep 17 00:00:00 2001 From: Rusty Russell Date: Thu, 26 Nov 2015 09:43:08 +1030 Subject: [PATCH 288/291] gcov: use within_module() helper. An exact mapping would be within_module_core(), but at this stage (MODULE_STATE_GOING) the init section is empty, and this is clearer. Reviewed-by: Peter Oberparleiter Signed-off-by: Rusty Russell Signed-off-by: Jiri Kosina --- kernel/gcov/base.c | 7 +------ 1 file changed, 1 insertion(+), 6 deletions(-) diff --git a/kernel/gcov/base.c b/kernel/gcov/base.c index 7080ae1eb6c1..2f9df37940a0 100644 --- a/kernel/gcov/base.c +++ b/kernel/gcov/base.c @@ -123,11 +123,6 @@ void gcov_enable_events(void) } #ifdef CONFIG_MODULES -static inline int within(void *addr, void *start, unsigned long size) -{ - return ((addr >= start) && (addr < start + size)); -} - /* Update list and generate events when modules are unloaded. */ static int gcov_module_notifier(struct notifier_block *nb, unsigned long event, void *data) @@ -142,7 +137,7 @@ static int gcov_module_notifier(struct notifier_block *nb, unsigned long event, /* Remove entries located in module from linked list. */ while ((info = gcov_info_next(info))) { - if (within(info, mod->module_core, mod->core_size)) { + if (within_module((unsigned long)info, mod)) { gcov_info_unlink(prev, info); if (gcov_events_enabled) gcov_event(GCOV_REMOVE, info); From 7523e4dc5057e157212b4741abd6256e03404cf1 Mon Sep 17 00:00:00 2001 From: Rusty Russell Date: Thu, 26 Nov 2015 09:44:08 +1030 Subject: [PATCH 289/291] module: use a structure to encapsulate layout. Makes it easier to handle init vs core cleanly, though the change is fairly invasive across random architectures. It simplifies the rbtree code immediately, however, while keeping the core data together in the same cachline (now iff the rbtree code is enabled). Acked-by: Peter Zijlstra Reviewed-by: Josh Poimboeuf Signed-off-by: Rusty Russell Signed-off-by: Jiri Kosina --- arch/alpha/kernel/module.c | 2 +- arch/arc/kernel/unwind.c | 4 +- arch/arm/kernel/module-plts.c | 2 +- arch/avr32/kernel/module.c | 12 +- arch/ia64/kernel/module.c | 14 +-- arch/metag/kernel/module.c | 4 +- arch/mips/kernel/vpe.c | 6 +- arch/parisc/kernel/module.c | 32 ++--- arch/powerpc/kernel/module_32.c | 6 +- arch/s390/kernel/module.c | 22 ++-- arch/x86/kernel/livepatch.c | 6 +- include/linux/module.h | 64 +++++----- kernel/debug/kdb/kdb_main.c | 4 +- kernel/module.c | 199 +++++++++++++++----------------- 14 files changed, 178 insertions(+), 199 deletions(-) diff --git a/arch/alpha/kernel/module.c b/arch/alpha/kernel/module.c index 2fd00b7077e4..936bc8f89a67 100644 --- a/arch/alpha/kernel/module.c +++ b/arch/alpha/kernel/module.c @@ -160,7 +160,7 @@ apply_relocate_add(Elf64_Shdr *sechdrs, const char *strtab, /* The small sections were sorted to the end of the segment. The following should definitely cover them. */ - gp = (u64)me->module_core + me->core_size - 0x8000; + gp = (u64)me->core_layout.base + me->core_layout.size - 0x8000; got = sechdrs[me->arch.gotsecindex].sh_addr; for (i = 0; i < n; i++) { diff --git a/arch/arc/kernel/unwind.c b/arch/arc/kernel/unwind.c index 93c6ea52b671..e0034a6656ef 100644 --- a/arch/arc/kernel/unwind.c +++ b/arch/arc/kernel/unwind.c @@ -372,8 +372,8 @@ void *unwind_add_table(struct module *module, const void *table_start, return NULL; init_unwind_table(table, module->name, - module->module_core, module->core_size, - module->module_init, module->init_size, + module->core_layout.base, module->core_layout.size, + module->init_layout.base, module->init_layout.size, table_start, table_size, NULL, 0); diff --git a/arch/arm/kernel/module-plts.c b/arch/arm/kernel/module-plts.c index 097e2e201b9f..0c7efc3446c0 100644 --- a/arch/arm/kernel/module-plts.c +++ b/arch/arm/kernel/module-plts.c @@ -32,7 +32,7 @@ struct plt_entries { static bool in_init(const struct module *mod, u32 addr) { - return addr - (u32)mod->module_init < mod->init_size; + return addr - (u32)mod->init_layout.base < mod->init_layout.size; } u32 get_module_plt(struct module *mod, unsigned long loc, Elf32_Addr val) diff --git a/arch/avr32/kernel/module.c b/arch/avr32/kernel/module.c index 164efa009e5b..2b4c54c04cb6 100644 --- a/arch/avr32/kernel/module.c +++ b/arch/avr32/kernel/module.c @@ -118,9 +118,9 @@ int module_frob_arch_sections(Elf_Ehdr *hdr, Elf_Shdr *sechdrs, * Increase core size to make room for GOT and set start * offset for GOT. */ - module->core_size = ALIGN(module->core_size, 4); - module->arch.got_offset = module->core_size; - module->core_size += module->arch.got_size; + module->core_layout.size = ALIGN(module->core_layout.size, 4); + module->arch.got_offset = module->core_layout.size; + module->core_layout.size += module->arch.got_size; return 0; @@ -177,7 +177,7 @@ int apply_relocate_add(Elf32_Shdr *sechdrs, const char *strtab, if (!info->got_initialized) { Elf32_Addr *gotent; - gotent = (module->module_core + gotent = (module->core_layout.base + module->arch.got_offset + info->got_offset); *gotent = relocation; @@ -255,8 +255,8 @@ int apply_relocate_add(Elf32_Shdr *sechdrs, const char *strtab, */ pr_debug("GOTPC: PC=0x%x, got_offset=0x%lx, core=0x%p\n", relocation, module->arch.got_offset, - module->module_core); - relocation -= ((unsigned long)module->module_core + module->core_layout.base); + relocation -= ((unsigned long)module->core_layout.base + module->arch.got_offset); *location = relocation; break; diff --git a/arch/ia64/kernel/module.c b/arch/ia64/kernel/module.c index b15933c31b2f..6ab0ae7d6535 100644 --- a/arch/ia64/kernel/module.c +++ b/arch/ia64/kernel/module.c @@ -486,13 +486,13 @@ module_frob_arch_sections (Elf_Ehdr *ehdr, Elf_Shdr *sechdrs, char *secstrings, static inline int in_init (const struct module *mod, uint64_t addr) { - return addr - (uint64_t) mod->module_init < mod->init_size; + return addr - (uint64_t) mod->init_layout.base < mod->init_layout.size; } static inline int in_core (const struct module *mod, uint64_t addr) { - return addr - (uint64_t) mod->module_core < mod->core_size; + return addr - (uint64_t) mod->core_layout.base < mod->core_layout.size; } static inline int @@ -675,7 +675,7 @@ do_reloc (struct module *mod, uint8_t r_type, Elf64_Sym *sym, uint64_t addend, break; case RV_BDREL: - val -= (uint64_t) (in_init(mod, val) ? mod->module_init : mod->module_core); + val -= (uint64_t) (in_init(mod, val) ? mod->init_layout.base : mod->core_layout.base); break; case RV_LTV: @@ -810,15 +810,15 @@ apply_relocate_add (Elf64_Shdr *sechdrs, const char *strtab, unsigned int symind * addresses have been selected... */ uint64_t gp; - if (mod->core_size > MAX_LTOFF) + if (mod->core_layout.size > MAX_LTOFF) /* * This takes advantage of fact that SHF_ARCH_SMALL gets allocated * at the end of the module. */ - gp = mod->core_size - MAX_LTOFF / 2; + gp = mod->core_layout.size - MAX_LTOFF / 2; else - gp = mod->core_size / 2; - gp = (uint64_t) mod->module_core + ((gp + 7) & -8); + gp = mod->core_layout.size / 2; + gp = (uint64_t) mod->core_layout.base + ((gp + 7) & -8); mod->arch.gp = gp; DEBUGP("%s: placing gp at 0x%lx\n", __func__, gp); } diff --git a/arch/metag/kernel/module.c b/arch/metag/kernel/module.c index 986331cd0a52..bb8dfba9a763 100644 --- a/arch/metag/kernel/module.c +++ b/arch/metag/kernel/module.c @@ -176,8 +176,8 @@ static uint32_t do_plt_call(void *location, Elf32_Addr val, tramp[1] = 0xac000001 | ((val & 0x0000ffff) << 3); /* Init, or core PLT? */ - if (location >= mod->module_core - && location < mod->module_core + mod->core_size) + if (location >= mod->core_layout.base + && location < mod->core_layout.base + mod->core_layout.size) entry = (void *)sechdrs[mod->arch.core_plt_section].sh_addr; else entry = (void *)sechdrs[mod->arch.init_plt_section].sh_addr; diff --git a/arch/mips/kernel/vpe.c b/arch/mips/kernel/vpe.c index 9067b651c7a2..544ea21bfef9 100644 --- a/arch/mips/kernel/vpe.c +++ b/arch/mips/kernel/vpe.c @@ -205,11 +205,11 @@ static void layout_sections(struct module *mod, const Elf_Ehdr *hdr, || s->sh_entsize != ~0UL) continue; s->sh_entsize = - get_offset((unsigned long *)&mod->core_size, s); + get_offset((unsigned long *)&mod->core_layout.size, s); } if (m == 0) - mod->core_text_size = mod->core_size; + mod->core_layout.text_size = mod->core_layout.size; } } @@ -641,7 +641,7 @@ static int vpe_elfload(struct vpe *v) layout_sections(&mod, hdr, sechdrs, secstrings); } - v->load_addr = alloc_progmem(mod.core_size); + v->load_addr = alloc_progmem(mod.core_layout.size); if (!v->load_addr) return -ENOMEM; diff --git a/arch/parisc/kernel/module.c b/arch/parisc/kernel/module.c index 3c63a820fcda..b9d75d9fa9ac 100644 --- a/arch/parisc/kernel/module.c +++ b/arch/parisc/kernel/module.c @@ -42,9 +42,9 @@ * We are not doing SEGREL32 handling correctly. According to the ABI, we * should do a value offset, like this: * if (in_init(me, (void *)val)) - * val -= (uint32_t)me->module_init; + * val -= (uint32_t)me->init_layout.base; * else - * val -= (uint32_t)me->module_core; + * val -= (uint32_t)me->core_layout.base; * However, SEGREL32 is used only for PARISC unwind entries, and we want * those entries to have an absolute address, and not just an offset. * @@ -100,14 +100,14 @@ * or init pieces the location is */ static inline int in_init(struct module *me, void *loc) { - return (loc >= me->module_init && - loc <= (me->module_init + me->init_size)); + return (loc >= me->init_layout.base && + loc <= (me->init_layout.base + me->init_layout.size)); } static inline int in_core(struct module *me, void *loc) { - return (loc >= me->module_core && - loc <= (me->module_core + me->core_size)); + return (loc >= me->core_layout.base && + loc <= (me->core_layout.base + me->core_layout.size)); } static inline int in_local(struct module *me, void *loc) @@ -367,13 +367,13 @@ int module_frob_arch_sections(CONST Elf_Ehdr *hdr, } /* align things a bit */ - me->core_size = ALIGN(me->core_size, 16); - me->arch.got_offset = me->core_size; - me->core_size += gots * sizeof(struct got_entry); + me->core_layout.size = ALIGN(me->core_layout.size, 16); + me->arch.got_offset = me->core_layout.size; + me->core_layout.size += gots * sizeof(struct got_entry); - me->core_size = ALIGN(me->core_size, 16); - me->arch.fdesc_offset = me->core_size; - me->core_size += fdescs * sizeof(Elf_Fdesc); + me->core_layout.size = ALIGN(me->core_layout.size, 16); + me->arch.fdesc_offset = me->core_layout.size; + me->core_layout.size += fdescs * sizeof(Elf_Fdesc); me->arch.got_max = gots; me->arch.fdesc_max = fdescs; @@ -391,7 +391,7 @@ static Elf64_Word get_got(struct module *me, unsigned long value, long addend) BUG_ON(value == 0); - got = me->module_core + me->arch.got_offset; + got = me->core_layout.base + me->arch.got_offset; for (i = 0; got[i].addr; i++) if (got[i].addr == value) goto out; @@ -409,7 +409,7 @@ static Elf64_Word get_got(struct module *me, unsigned long value, long addend) #ifdef CONFIG_64BIT static Elf_Addr get_fdesc(struct module *me, unsigned long value) { - Elf_Fdesc *fdesc = me->module_core + me->arch.fdesc_offset; + Elf_Fdesc *fdesc = me->core_layout.base + me->arch.fdesc_offset; if (!value) { printk(KERN_ERR "%s: zero OPD requested!\n", me->name); @@ -427,7 +427,7 @@ static Elf_Addr get_fdesc(struct module *me, unsigned long value) /* Create new one */ fdesc->addr = value; - fdesc->gp = (Elf_Addr)me->module_core + me->arch.got_offset; + fdesc->gp = (Elf_Addr)me->core_layout.base + me->arch.got_offset; return (Elf_Addr)fdesc; } #endif /* CONFIG_64BIT */ @@ -839,7 +839,7 @@ register_unwind_table(struct module *me, table = (unsigned char *)sechdrs[me->arch.unwind_section].sh_addr; end = table + sechdrs[me->arch.unwind_section].sh_size; - gp = (Elf_Addr)me->module_core + me->arch.got_offset; + gp = (Elf_Addr)me->core_layout.base + me->arch.got_offset; DEBUGP("register_unwind_table(), sect = %d at 0x%p - 0x%p (gp=0x%lx)\n", me->arch.unwind_section, table, end, gp); diff --git a/arch/powerpc/kernel/module_32.c b/arch/powerpc/kernel/module_32.c index c94d2e018d84..2c01665eb410 100644 --- a/arch/powerpc/kernel/module_32.c +++ b/arch/powerpc/kernel/module_32.c @@ -188,8 +188,8 @@ static uint32_t do_plt_call(void *location, pr_debug("Doing plt for call to 0x%x at 0x%x\n", val, (unsigned int)location); /* Init, or core PLT? */ - if (location >= mod->module_core - && location < mod->module_core + mod->core_size) + if (location >= mod->core_layout.base + && location < mod->core_layout.base + mod->core_layout.size) entry = (void *)sechdrs[mod->arch.core_plt_section].sh_addr; else entry = (void *)sechdrs[mod->arch.init_plt_section].sh_addr; @@ -296,7 +296,7 @@ int apply_relocate_add(Elf32_Shdr *sechdrs, } #ifdef CONFIG_DYNAMIC_FTRACE module->arch.tramp = - do_plt_call(module->module_core, + do_plt_call(module->core_layout.base, (unsigned long)ftrace_caller, sechdrs, module); #endif diff --git a/arch/s390/kernel/module.c b/arch/s390/kernel/module.c index 0c1a679314dd..7873e171457c 100644 --- a/arch/s390/kernel/module.c +++ b/arch/s390/kernel/module.c @@ -159,11 +159,11 @@ int module_frob_arch_sections(Elf_Ehdr *hdr, Elf_Shdr *sechdrs, /* Increase core size by size of got & plt and set start offsets for got and plt. */ - me->core_size = ALIGN(me->core_size, 4); - me->arch.got_offset = me->core_size; - me->core_size += me->arch.got_size; - me->arch.plt_offset = me->core_size; - me->core_size += me->arch.plt_size; + me->core_layout.size = ALIGN(me->core_layout.size, 4); + me->arch.got_offset = me->core_layout.size; + me->core_layout.size += me->arch.got_size; + me->arch.plt_offset = me->core_layout.size; + me->core_layout.size += me->arch.plt_size; return 0; } @@ -279,7 +279,7 @@ static int apply_rela(Elf_Rela *rela, Elf_Addr base, Elf_Sym *symtab, if (info->got_initialized == 0) { Elf_Addr *gotent; - gotent = me->module_core + me->arch.got_offset + + gotent = me->core_layout.base + me->arch.got_offset + info->got_offset; *gotent = val; info->got_initialized = 1; @@ -302,7 +302,7 @@ static int apply_rela(Elf_Rela *rela, Elf_Addr base, Elf_Sym *symtab, rc = apply_rela_bits(loc, val, 0, 64, 0); else if (r_type == R_390_GOTENT || r_type == R_390_GOTPLTENT) { - val += (Elf_Addr) me->module_core - loc; + val += (Elf_Addr) me->core_layout.base - loc; rc = apply_rela_bits(loc, val, 1, 32, 1); } break; @@ -315,7 +315,7 @@ static int apply_rela(Elf_Rela *rela, Elf_Addr base, Elf_Sym *symtab, case R_390_PLTOFF64: /* 16 bit offset from GOT to PLT. */ if (info->plt_initialized == 0) { unsigned int *ip; - ip = me->module_core + me->arch.plt_offset + + ip = me->core_layout.base + me->arch.plt_offset + info->plt_offset; ip[0] = 0x0d10e310; /* basr 1,0; lg 1,10(1); br 1 */ ip[1] = 0x100a0004; @@ -334,7 +334,7 @@ static int apply_rela(Elf_Rela *rela, Elf_Addr base, Elf_Sym *symtab, val - loc + 0xffffUL < 0x1ffffeUL) || (r_type == R_390_PLT32DBL && val - loc + 0xffffffffULL < 0x1fffffffeULL))) - val = (Elf_Addr) me->module_core + + val = (Elf_Addr) me->core_layout.base + me->arch.plt_offset + info->plt_offset; val += rela->r_addend - loc; @@ -356,7 +356,7 @@ static int apply_rela(Elf_Rela *rela, Elf_Addr base, Elf_Sym *symtab, case R_390_GOTOFF32: /* 32 bit offset to GOT. */ case R_390_GOTOFF64: /* 64 bit offset to GOT. */ val = val + rela->r_addend - - ((Elf_Addr) me->module_core + me->arch.got_offset); + ((Elf_Addr) me->core_layout.base + me->arch.got_offset); if (r_type == R_390_GOTOFF16) rc = apply_rela_bits(loc, val, 0, 16, 0); else if (r_type == R_390_GOTOFF32) @@ -366,7 +366,7 @@ static int apply_rela(Elf_Rela *rela, Elf_Addr base, Elf_Sym *symtab, break; case R_390_GOTPC: /* 32 bit PC relative offset to GOT. */ case R_390_GOTPCDBL: /* 32 bit PC rel. off. to GOT shifted by 1. */ - val = (Elf_Addr) me->module_core + me->arch.got_offset + + val = (Elf_Addr) me->core_layout.base + me->arch.got_offset + rela->r_addend - loc; if (r_type == R_390_GOTPC) rc = apply_rela_bits(loc, val, 1, 32, 0); diff --git a/arch/x86/kernel/livepatch.c b/arch/x86/kernel/livepatch.c index d1d35ccffed3..bcc06e82a593 100644 --- a/arch/x86/kernel/livepatch.c +++ b/arch/x86/kernel/livepatch.c @@ -41,8 +41,8 @@ int klp_write_module_reloc(struct module *mod, unsigned long type, int ret, numpages, size = 4; bool readonly; unsigned long val; - unsigned long core = (unsigned long)mod->module_core; - unsigned long core_size = mod->core_size; + unsigned long core = (unsigned long)mod->core_layout.base; + unsigned long core_size = mod->core_layout.size; switch (type) { case R_X86_64_NONE: @@ -72,7 +72,7 @@ int klp_write_module_reloc(struct module *mod, unsigned long type, readonly = false; #ifdef CONFIG_DEBUG_SET_MODULE_RONX - if (loc < core + mod->core_ro_size) + if (loc < core + mod->core_layout.ro_size) readonly = true; #endif diff --git a/include/linux/module.h b/include/linux/module.h index 3a19c79918e0..6e68e8cf4d0d 100644 --- a/include/linux/module.h +++ b/include/linux/module.h @@ -302,6 +302,28 @@ struct mod_tree_node { struct latch_tree_node node; }; +struct module_layout { + /* The actual code + data. */ + void *base; + /* Total size. */ + unsigned int size; + /* The size of the executable code. */ + unsigned int text_size; + /* Size of RO section of the module (text+rodata) */ + unsigned int ro_size; + +#ifdef CONFIG_MODULES_TREE_LOOKUP + struct mod_tree_node mtn; +#endif +}; + +#ifdef CONFIG_MODULES_TREE_LOOKUP +/* Only touch one cacheline for common rbtree-for-core-layout case. */ +#define __module_layout_align ____cacheline_aligned +#else +#define __module_layout_align +#endif + struct module { enum module_state state; @@ -366,37 +388,9 @@ struct module { /* Startup function. */ int (*init)(void); - /* - * If this is non-NULL, vfree() after init() returns. - * - * Cacheline align here, such that: - * module_init, module_core, init_size, core_size, - * init_text_size, core_text_size and mtn_core::{mod,node[0]} - * are on the same cacheline. - */ - void *module_init ____cacheline_aligned; - - /* Here is the actual code + data, vfree'd on unload. */ - void *module_core; - - /* Here are the sizes of the init and core sections */ - unsigned int init_size, core_size; - - /* The size of the executable code in each section. */ - unsigned int init_text_size, core_text_size; - -#ifdef CONFIG_MODULES_TREE_LOOKUP - /* - * We want mtn_core::{mod,node[0]} to be in the same cacheline as the - * above entries such that a regular lookup will only touch one - * cacheline. - */ - struct mod_tree_node mtn_core; - struct mod_tree_node mtn_init; -#endif - - /* Size of RO sections of the module (text+rodata) */ - unsigned int init_ro_size, core_ro_size; + /* Core layout: rbtree is accessed frequently, so keep together. */ + struct module_layout core_layout __module_layout_align; + struct module_layout init_layout; /* Arch-specific module values */ struct mod_arch_specific arch; @@ -505,15 +499,15 @@ bool is_module_text_address(unsigned long addr); static inline bool within_module_core(unsigned long addr, const struct module *mod) { - return (unsigned long)mod->module_core <= addr && - addr < (unsigned long)mod->module_core + mod->core_size; + return (unsigned long)mod->core_layout.base <= addr && + addr < (unsigned long)mod->core_layout.base + mod->core_layout.size; } static inline bool within_module_init(unsigned long addr, const struct module *mod) { - return (unsigned long)mod->module_init <= addr && - addr < (unsigned long)mod->module_init + mod->init_size; + return (unsigned long)mod->init_layout.base <= addr && + addr < (unsigned long)mod->init_layout.base + mod->init_layout.size; } static inline bool within_module(unsigned long addr, const struct module *mod) diff --git a/kernel/debug/kdb/kdb_main.c b/kernel/debug/kdb/kdb_main.c index 4121345498e0..2a20c0dfdafc 100644 --- a/kernel/debug/kdb/kdb_main.c +++ b/kernel/debug/kdb/kdb_main.c @@ -2021,7 +2021,7 @@ static int kdb_lsmod(int argc, const char **argv) continue; kdb_printf("%-20s%8u 0x%p ", mod->name, - mod->core_size, (void *)mod); + mod->core_layout.size, (void *)mod); #ifdef CONFIG_MODULE_UNLOAD kdb_printf("%4d ", module_refcount(mod)); #endif @@ -2031,7 +2031,7 @@ static int kdb_lsmod(int argc, const char **argv) kdb_printf(" (Loading)"); else kdb_printf(" (Live)"); - kdb_printf(" 0x%p", mod->module_core); + kdb_printf(" 0x%p", mod->core_layout.base); #ifdef CONFIG_MODULE_UNLOAD { diff --git a/kernel/module.c b/kernel/module.c index 14b224967e7b..a0a3d6d9d5e8 100644 --- a/kernel/module.c +++ b/kernel/module.c @@ -108,13 +108,6 @@ static LIST_HEAD(modules); * Use a latched RB-tree for __module_address(); this allows us to use * RCU-sched lookups of the address from any context. * - * Because modules have two address ranges: init and core, we need two - * latch_tree_nodes entries. Therefore we need the back-pointer from - * mod_tree_node. - * - * Because init ranges are short lived we mark them unlikely and have placed - * them outside the critical cacheline in struct module. - * * This is conditional on PERF_EVENTS || TRACING because those can really hit * __module_address() hard by doing a lot of stack unwinding; potentially from * NMI context. @@ -122,24 +115,16 @@ static LIST_HEAD(modules); static __always_inline unsigned long __mod_tree_val(struct latch_tree_node *n) { - struct mod_tree_node *mtn = container_of(n, struct mod_tree_node, node); - struct module *mod = mtn->mod; + struct module_layout *layout = container_of(n, struct module_layout, mtn.node); - if (unlikely(mtn == &mod->mtn_init)) - return (unsigned long)mod->module_init; - - return (unsigned long)mod->module_core; + return (unsigned long)layout->base; } static __always_inline unsigned long __mod_tree_size(struct latch_tree_node *n) { - struct mod_tree_node *mtn = container_of(n, struct mod_tree_node, node); - struct module *mod = mtn->mod; + struct module_layout *layout = container_of(n, struct module_layout, mtn.node); - if (unlikely(mtn == &mod->mtn_init)) - return (unsigned long)mod->init_size; - - return (unsigned long)mod->core_size; + return (unsigned long)layout->size; } static __always_inline bool @@ -197,23 +182,23 @@ static void __mod_tree_remove(struct mod_tree_node *node) */ static void mod_tree_insert(struct module *mod) { - mod->mtn_core.mod = mod; - mod->mtn_init.mod = mod; + mod->core_layout.mtn.mod = mod; + mod->init_layout.mtn.mod = mod; - __mod_tree_insert(&mod->mtn_core); - if (mod->init_size) - __mod_tree_insert(&mod->mtn_init); + __mod_tree_insert(&mod->core_layout.mtn); + if (mod->init_layout.size) + __mod_tree_insert(&mod->init_layout.mtn); } static void mod_tree_remove_init(struct module *mod) { - if (mod->init_size) - __mod_tree_remove(&mod->mtn_init); + if (mod->init_layout.size) + __mod_tree_remove(&mod->init_layout.mtn); } static void mod_tree_remove(struct module *mod) { - __mod_tree_remove(&mod->mtn_core); + __mod_tree_remove(&mod->core_layout.mtn); mod_tree_remove_init(mod); } @@ -267,9 +252,9 @@ static void __mod_update_bounds(void *base, unsigned int size) static void mod_update_bounds(struct module *mod) { - __mod_update_bounds(mod->module_core, mod->core_size); - if (mod->init_size) - __mod_update_bounds(mod->module_init, mod->init_size); + __mod_update_bounds(mod->core_layout.base, mod->core_layout.size); + if (mod->init_layout.size) + __mod_update_bounds(mod->init_layout.base, mod->init_layout.size); } #ifdef CONFIG_KGDB_KDB @@ -1214,7 +1199,7 @@ struct module_attribute module_uevent = static ssize_t show_coresize(struct module_attribute *mattr, struct module_kobject *mk, char *buffer) { - return sprintf(buffer, "%u\n", mk->mod->core_size); + return sprintf(buffer, "%u\n", mk->mod->core_layout.size); } static struct module_attribute modinfo_coresize = @@ -1223,7 +1208,7 @@ static struct module_attribute modinfo_coresize = static ssize_t show_initsize(struct module_attribute *mattr, struct module_kobject *mk, char *buffer) { - return sprintf(buffer, "%u\n", mk->mod->init_size); + return sprintf(buffer, "%u\n", mk->mod->init_layout.size); } static struct module_attribute modinfo_initsize = @@ -1917,29 +1902,29 @@ static void set_section_ro_nx(void *base, static void set_module_core_ro_nx(struct module *mod) { - set_section_ro_nx(mod->module_core, mod->core_text_size, - mod->core_ro_size, mod->core_size, + set_section_ro_nx(mod->core_layout.base, mod->core_layout.text_size, + mod->core_layout.ro_size, mod->core_layout.size, set_memory_ro, set_memory_nx); } static void unset_module_core_ro_nx(struct module *mod) { - set_section_ro_nx(mod->module_core, mod->core_text_size, - mod->core_ro_size, mod->core_size, + set_section_ro_nx(mod->core_layout.base, mod->core_layout.text_size, + mod->core_layout.ro_size, mod->core_layout.size, set_memory_rw, set_memory_x); } static void set_module_init_ro_nx(struct module *mod) { - set_section_ro_nx(mod->module_init, mod->init_text_size, - mod->init_ro_size, mod->init_size, + set_section_ro_nx(mod->init_layout.base, mod->init_layout.text_size, + mod->init_layout.ro_size, mod->init_layout.size, set_memory_ro, set_memory_nx); } static void unset_module_init_ro_nx(struct module *mod) { - set_section_ro_nx(mod->module_init, mod->init_text_size, - mod->init_ro_size, mod->init_size, + set_section_ro_nx(mod->init_layout.base, mod->init_layout.text_size, + mod->init_layout.ro_size, mod->init_layout.size, set_memory_rw, set_memory_x); } @@ -1952,14 +1937,14 @@ void set_all_modules_text_rw(void) list_for_each_entry_rcu(mod, &modules, list) { if (mod->state == MODULE_STATE_UNFORMED) continue; - if ((mod->module_core) && (mod->core_text_size)) { - set_page_attributes(mod->module_core, - mod->module_core + mod->core_text_size, + if ((mod->core_layout.base) && (mod->core_layout.text_size)) { + set_page_attributes(mod->core_layout.base, + mod->core_layout.base + mod->core_layout.text_size, set_memory_rw); } - if ((mod->module_init) && (mod->init_text_size)) { - set_page_attributes(mod->module_init, - mod->module_init + mod->init_text_size, + if ((mod->init_layout.base) && (mod->init_layout.text_size)) { + set_page_attributes(mod->init_layout.base, + mod->init_layout.base + mod->init_layout.text_size, set_memory_rw); } } @@ -1975,14 +1960,14 @@ void set_all_modules_text_ro(void) list_for_each_entry_rcu(mod, &modules, list) { if (mod->state == MODULE_STATE_UNFORMED) continue; - if ((mod->module_core) && (mod->core_text_size)) { - set_page_attributes(mod->module_core, - mod->module_core + mod->core_text_size, + if ((mod->core_layout.base) && (mod->core_layout.text_size)) { + set_page_attributes(mod->core_layout.base, + mod->core_layout.base + mod->core_layout.text_size, set_memory_ro); } - if ((mod->module_init) && (mod->init_text_size)) { - set_page_attributes(mod->module_init, - mod->module_init + mod->init_text_size, + if ((mod->init_layout.base) && (mod->init_layout.text_size)) { + set_page_attributes(mod->init_layout.base, + mod->init_layout.base + mod->init_layout.text_size, set_memory_ro); } } @@ -2047,16 +2032,16 @@ static void free_module(struct module *mod) /* This may be NULL, but that's OK */ unset_module_init_ro_nx(mod); module_arch_freeing_init(mod); - module_memfree(mod->module_init); + module_memfree(mod->init_layout.base); kfree(mod->args); percpu_modfree(mod); /* Free lock-classes; relies on the preceding sync_rcu(). */ - lockdep_free_key_range(mod->module_core, mod->core_size); + lockdep_free_key_range(mod->core_layout.base, mod->core_layout.size); /* Finally, free the core (containing the module structure) */ unset_module_core_ro_nx(mod); - module_memfree(mod->module_core); + module_memfree(mod->core_layout.base); #ifdef CONFIG_MPU update_protections(current->mm); @@ -2259,20 +2244,20 @@ static void layout_sections(struct module *mod, struct load_info *info) || s->sh_entsize != ~0UL || strstarts(sname, ".init")) continue; - s->sh_entsize = get_offset(mod, &mod->core_size, s, i); + s->sh_entsize = get_offset(mod, &mod->core_layout.size, s, i); pr_debug("\t%s\n", sname); } switch (m) { case 0: /* executable */ - mod->core_size = debug_align(mod->core_size); - mod->core_text_size = mod->core_size; + mod->core_layout.size = debug_align(mod->core_layout.size); + mod->core_layout.text_size = mod->core_layout.size; break; case 1: /* RO: text and ro-data */ - mod->core_size = debug_align(mod->core_size); - mod->core_ro_size = mod->core_size; + mod->core_layout.size = debug_align(mod->core_layout.size); + mod->core_layout.ro_size = mod->core_layout.size; break; case 3: /* whole core */ - mod->core_size = debug_align(mod->core_size); + mod->core_layout.size = debug_align(mod->core_layout.size); break; } } @@ -2288,21 +2273,21 @@ static void layout_sections(struct module *mod, struct load_info *info) || s->sh_entsize != ~0UL || !strstarts(sname, ".init")) continue; - s->sh_entsize = (get_offset(mod, &mod->init_size, s, i) + s->sh_entsize = (get_offset(mod, &mod->init_layout.size, s, i) | INIT_OFFSET_MASK); pr_debug("\t%s\n", sname); } switch (m) { case 0: /* executable */ - mod->init_size = debug_align(mod->init_size); - mod->init_text_size = mod->init_size; + mod->init_layout.size = debug_align(mod->init_layout.size); + mod->init_layout.text_size = mod->init_layout.size; break; case 1: /* RO: text and ro-data */ - mod->init_size = debug_align(mod->init_size); - mod->init_ro_size = mod->init_size; + mod->init_layout.size = debug_align(mod->init_layout.size); + mod->init_layout.ro_size = mod->init_layout.size; break; case 3: /* whole init */ - mod->init_size = debug_align(mod->init_size); + mod->init_layout.size = debug_align(mod->init_layout.size); break; } } @@ -2477,7 +2462,7 @@ static void layout_symtab(struct module *mod, struct load_info *info) /* Put symbol section at end of init part of module. */ symsect->sh_flags |= SHF_ALLOC; - symsect->sh_entsize = get_offset(mod, &mod->init_size, symsect, + symsect->sh_entsize = get_offset(mod, &mod->init_layout.size, symsect, info->index.sym) | INIT_OFFSET_MASK; pr_debug("\t%s\n", info->secstrings + symsect->sh_name); @@ -2494,16 +2479,16 @@ static void layout_symtab(struct module *mod, struct load_info *info) } /* Append room for core symbols at end of core part. */ - info->symoffs = ALIGN(mod->core_size, symsect->sh_addralign ?: 1); - info->stroffs = mod->core_size = info->symoffs + ndst * sizeof(Elf_Sym); - mod->core_size += strtab_size; - mod->core_size = debug_align(mod->core_size); + info->symoffs = ALIGN(mod->core_layout.size, symsect->sh_addralign ?: 1); + info->stroffs = mod->core_layout.size = info->symoffs + ndst * sizeof(Elf_Sym); + mod->core_layout.size += strtab_size; + mod->core_layout.size = debug_align(mod->core_layout.size); /* Put string table section at end of init part of module. */ strsect->sh_flags |= SHF_ALLOC; - strsect->sh_entsize = get_offset(mod, &mod->init_size, strsect, + strsect->sh_entsize = get_offset(mod, &mod->init_layout.size, strsect, info->index.str) | INIT_OFFSET_MASK; - mod->init_size = debug_align(mod->init_size); + mod->init_layout.size = debug_align(mod->init_layout.size); pr_debug("\t%s\n", info->secstrings + strsect->sh_name); } @@ -2524,8 +2509,8 @@ static void add_kallsyms(struct module *mod, const struct load_info *info) for (i = 0; i < mod->num_symtab; i++) mod->symtab[i].st_info = elf_type(&mod->symtab[i], info); - mod->core_symtab = dst = mod->module_core + info->symoffs; - mod->core_strtab = s = mod->module_core + info->stroffs; + mod->core_symtab = dst = mod->core_layout.base + info->symoffs; + mod->core_strtab = s = mod->core_layout.base + info->stroffs; src = mod->symtab; for (ndst = i = 0; i < mod->num_symtab; i++) { if (i == 0 || @@ -2975,7 +2960,7 @@ static int move_module(struct module *mod, struct load_info *info) void *ptr; /* Do the allocs. */ - ptr = module_alloc(mod->core_size); + ptr = module_alloc(mod->core_layout.size); /* * The pointer to this block is stored in the module structure * which is inside the block. Just mark it as not being a @@ -2985,11 +2970,11 @@ static int move_module(struct module *mod, struct load_info *info) if (!ptr) return -ENOMEM; - memset(ptr, 0, mod->core_size); - mod->module_core = ptr; + memset(ptr, 0, mod->core_layout.size); + mod->core_layout.base = ptr; - if (mod->init_size) { - ptr = module_alloc(mod->init_size); + if (mod->init_layout.size) { + ptr = module_alloc(mod->init_layout.size); /* * The pointer to this block is stored in the module structure * which is inside the block. This block doesn't need to be @@ -2998,13 +2983,13 @@ static int move_module(struct module *mod, struct load_info *info) */ kmemleak_ignore(ptr); if (!ptr) { - module_memfree(mod->module_core); + module_memfree(mod->core_layout.base); return -ENOMEM; } - memset(ptr, 0, mod->init_size); - mod->module_init = ptr; + memset(ptr, 0, mod->init_layout.size); + mod->init_layout.base = ptr; } else - mod->module_init = NULL; + mod->init_layout.base = NULL; /* Transfer each section which specifies SHF_ALLOC */ pr_debug("final section addresses:\n"); @@ -3016,10 +3001,10 @@ static int move_module(struct module *mod, struct load_info *info) continue; if (shdr->sh_entsize & INIT_OFFSET_MASK) - dest = mod->module_init + dest = mod->init_layout.base + (shdr->sh_entsize & ~INIT_OFFSET_MASK); else - dest = mod->module_core + shdr->sh_entsize; + dest = mod->core_layout.base + shdr->sh_entsize; if (shdr->sh_type != SHT_NOBITS) memcpy(dest, (void *)shdr->sh_addr, shdr->sh_size); @@ -3081,12 +3066,12 @@ static void flush_module_icache(const struct module *mod) * Do it before processing of module parameters, so the module * can provide parameter accessor functions of its own. */ - if (mod->module_init) - flush_icache_range((unsigned long)mod->module_init, - (unsigned long)mod->module_init - + mod->init_size); - flush_icache_range((unsigned long)mod->module_core, - (unsigned long)mod->module_core + mod->core_size); + if (mod->init_layout.base) + flush_icache_range((unsigned long)mod->init_layout.base, + (unsigned long)mod->init_layout.base + + mod->init_layout.size); + flush_icache_range((unsigned long)mod->core_layout.base, + (unsigned long)mod->core_layout.base + mod->core_layout.size); set_fs(old_fs); } @@ -3144,8 +3129,8 @@ static void module_deallocate(struct module *mod, struct load_info *info) { percpu_modfree(mod); module_arch_freeing_init(mod); - module_memfree(mod->module_init); - module_memfree(mod->module_core); + module_memfree(mod->init_layout.base); + module_memfree(mod->core_layout.base); } int __weak module_finalize(const Elf_Ehdr *hdr, @@ -3232,7 +3217,7 @@ static noinline int do_init_module(struct module *mod) ret = -ENOMEM; goto fail; } - freeinit->module_init = mod->module_init; + freeinit->module_init = mod->init_layout.base; /* * We want to find out whether @mod uses async during init. Clear @@ -3292,10 +3277,10 @@ static noinline int do_init_module(struct module *mod) mod_tree_remove_init(mod); unset_module_init_ro_nx(mod); module_arch_freeing_init(mod); - mod->module_init = NULL; - mod->init_size = 0; - mod->init_ro_size = 0; - mod->init_text_size = 0; + mod->init_layout.base = NULL; + mod->init_layout.size = 0; + mod->init_layout.ro_size = 0; + mod->init_layout.text_size = 0; /* * We want to free module_init, but be aware that kallsyms may be * walking this with preempt disabled. In all the failure paths, we @@ -3575,7 +3560,7 @@ static int load_module(struct load_info *info, const char __user *uargs, mutex_unlock(&module_mutex); free_module: /* Free lock-classes; relies on the preceding sync_rcu() */ - lockdep_free_key_range(mod->module_core, mod->core_size); + lockdep_free_key_range(mod->core_layout.base, mod->core_layout.size); module_deallocate(mod, info); free_copy: @@ -3653,9 +3638,9 @@ static const char *get_ksymbol(struct module *mod, /* At worse, next value is at end of module */ if (within_module_init(addr, mod)) - nextval = (unsigned long)mod->module_init+mod->init_text_size; + nextval = (unsigned long)mod->init_layout.base+mod->init_layout.text_size; else - nextval = (unsigned long)mod->module_core+mod->core_text_size; + nextval = (unsigned long)mod->core_layout.base+mod->core_layout.text_size; /* Scan for closest preceding symbol, and next symbol. (ELF starts real symbols at 1). */ @@ -3902,7 +3887,7 @@ static int m_show(struct seq_file *m, void *p) return 0; seq_printf(m, "%s %u", - mod->name, mod->init_size + mod->core_size); + mod->name, mod->init_layout.size + mod->core_layout.size); print_unload_info(m, mod); /* Informative for users. */ @@ -3911,7 +3896,7 @@ static int m_show(struct seq_file *m, void *p) mod->state == MODULE_STATE_COMING ? "Loading" : "Live"); /* Used by oprofile and other similar tools. */ - seq_printf(m, " 0x%pK", mod->module_core); + seq_printf(m, " 0x%pK", mod->core_layout.base); /* Taints info */ if (mod->taints) @@ -4054,8 +4039,8 @@ struct module *__module_text_address(unsigned long addr) struct module *mod = __module_address(addr); if (mod) { /* Make sure it's within the text section. */ - if (!within(addr, mod->module_init, mod->init_text_size) - && !within(addr, mod->module_core, mod->core_text_size)) + if (!within(addr, mod->init_layout.base, mod->init_layout.text_size) + && !within(addr, mod->core_layout.base, mod->core_layout.text_size)) mod = NULL; } return mod; From 85c898db6327353d38f3dd428457384cf81f83f8 Mon Sep 17 00:00:00 2001 From: Rusty Russell Date: Thu, 26 Nov 2015 09:45:08 +1030 Subject: [PATCH 290/291] module: clean up RO/NX handling. Modules have three sections: text, rodata and writable data. The code handled the case where these overlapped, however they never can: debug_align() ensures they are always page-aligned. This is why we got away with manually traversing the pages in set_all_modules_text_rw() without rounding. We create three helper functions: frob_text(), frob_rodata() and frob_writable_data(). We then call these explicitly at every point, so it's clear what we're doing. We also expose module_enable_ro() and module_disable_ro() for livepatch to use. Reviewed-by: Josh Poimboeuf Signed-off-by: Rusty Russell Signed-off-by: Jiri Kosina --- include/linux/module.h | 4 + kernel/module.c | 170 +++++++++++++++++++---------------------- 2 files changed, 82 insertions(+), 92 deletions(-) diff --git a/include/linux/module.h b/include/linux/module.h index 6e68e8cf4d0d..4560d8f1545d 100644 --- a/include/linux/module.h +++ b/include/linux/module.h @@ -762,9 +762,13 @@ extern int module_sysfs_initialized; #ifdef CONFIG_DEBUG_SET_MODULE_RONX extern void set_all_modules_text_rw(void); extern void set_all_modules_text_ro(void); +extern void module_enable_ro(const struct module *mod); +extern void module_disable_ro(const struct module *mod); #else static inline void set_all_modules_text_rw(void) { } static inline void set_all_modules_text_ro(void) { } +static inline void module_enable_ro(const struct module *mod) { } +static inline void module_disable_ro(const struct module *mod) { } #endif #ifdef CONFIG_GENERIC_BUG diff --git a/kernel/module.c b/kernel/module.c index a0a3d6d9d5e8..77212128f34a 100644 --- a/kernel/module.c +++ b/kernel/module.c @@ -80,15 +80,6 @@ # define debug_align(X) (X) #endif -/* - * Given BASE and SIZE this macro calculates the number of pages the - * memory regions occupies - */ -#define MOD_NUMBER_OF_PAGES(BASE, SIZE) (((SIZE) > 0) ? \ - (PFN_DOWN((unsigned long)(BASE) + (SIZE) - 1) - \ - PFN_DOWN((unsigned long)BASE) + 1) \ - : (0UL)) - /* If this is set, the section belongs in the init part of the module */ #define INIT_OFFSET_MASK (1UL << (BITS_PER_LONG-1)) @@ -1858,74 +1849,75 @@ static void mod_sysfs_teardown(struct module *mod) /* * LKM RO/NX protection: protect module's text/ro-data * from modification and any data from execution. + * + * General layout of module is: + * [text] [read-only-data] [writable data] + * text_size -----^ ^ ^ + * ro_size ------------------------| | + * size -------------------------------------------| + * + * These values are always page-aligned (as is base) */ -void set_page_attributes(void *start, void *end, int (*set)(unsigned long start, int num_pages)) +static void frob_text(const struct module_layout *layout, + int (*set_memory)(unsigned long start, int num_pages)) { - unsigned long begin_pfn = PFN_DOWN((unsigned long)start); - unsigned long end_pfn = PFN_DOWN((unsigned long)end); - - if (end_pfn > begin_pfn) - set(begin_pfn << PAGE_SHIFT, end_pfn - begin_pfn); + BUG_ON((unsigned long)layout->base & (PAGE_SIZE-1)); + BUG_ON((unsigned long)layout->text_size & (PAGE_SIZE-1)); + set_memory((unsigned long)layout->base, + layout->text_size >> PAGE_SHIFT); } -static void set_section_ro_nx(void *base, - unsigned long text_size, - unsigned long ro_size, - unsigned long total_size, - int (*set_ro)(unsigned long start, int num_pages), - int (*set_nx)(unsigned long start, int num_pages)) +static void frob_rodata(const struct module_layout *layout, + int (*set_memory)(unsigned long start, int num_pages)) { - /* begin and end PFNs of the current subsection */ - unsigned long begin_pfn; - unsigned long end_pfn; - - /* - * Set RO for module text and RO-data: - * - Always protect first page. - * - Do not protect last partial page. - */ - if (ro_size > 0) - set_page_attributes(base, base + ro_size, set_ro); - - /* - * Set NX permissions for module data: - * - Do not protect first partial page. - * - Always protect last page. - */ - if (total_size > text_size) { - begin_pfn = PFN_UP((unsigned long)base + text_size); - end_pfn = PFN_UP((unsigned long)base + total_size); - if (end_pfn > begin_pfn) - set_nx(begin_pfn << PAGE_SHIFT, end_pfn - begin_pfn); - } + BUG_ON((unsigned long)layout->base & (PAGE_SIZE-1)); + BUG_ON((unsigned long)layout->text_size & (PAGE_SIZE-1)); + BUG_ON((unsigned long)layout->ro_size & (PAGE_SIZE-1)); + set_memory((unsigned long)layout->base + layout->text_size, + (layout->ro_size - layout->text_size) >> PAGE_SHIFT); } -static void set_module_core_ro_nx(struct module *mod) +static void frob_writable_data(const struct module_layout *layout, + int (*set_memory)(unsigned long start, int num_pages)) { - set_section_ro_nx(mod->core_layout.base, mod->core_layout.text_size, - mod->core_layout.ro_size, mod->core_layout.size, - set_memory_ro, set_memory_nx); + BUG_ON((unsigned long)layout->base & (PAGE_SIZE-1)); + BUG_ON((unsigned long)layout->ro_size & (PAGE_SIZE-1)); + BUG_ON((unsigned long)layout->size & (PAGE_SIZE-1)); + set_memory((unsigned long)layout->base + layout->ro_size, + (layout->size - layout->ro_size) >> PAGE_SHIFT); } -static void unset_module_core_ro_nx(struct module *mod) +/* livepatching wants to disable read-only so it can frob module. */ +void module_disable_ro(const struct module *mod) { - set_section_ro_nx(mod->core_layout.base, mod->core_layout.text_size, - mod->core_layout.ro_size, mod->core_layout.size, - set_memory_rw, set_memory_x); + frob_text(&mod->core_layout, set_memory_rw); + frob_rodata(&mod->core_layout, set_memory_rw); + frob_text(&mod->init_layout, set_memory_rw); + frob_rodata(&mod->init_layout, set_memory_rw); } -static void set_module_init_ro_nx(struct module *mod) +void module_enable_ro(const struct module *mod) { - set_section_ro_nx(mod->init_layout.base, mod->init_layout.text_size, - mod->init_layout.ro_size, mod->init_layout.size, - set_memory_ro, set_memory_nx); + frob_text(&mod->core_layout, set_memory_ro); + frob_rodata(&mod->core_layout, set_memory_ro); + frob_text(&mod->init_layout, set_memory_ro); + frob_rodata(&mod->init_layout, set_memory_ro); } -static void unset_module_init_ro_nx(struct module *mod) +static void module_enable_nx(const struct module *mod) { - set_section_ro_nx(mod->init_layout.base, mod->init_layout.text_size, - mod->init_layout.ro_size, mod->init_layout.size, - set_memory_rw, set_memory_x); + frob_rodata(&mod->core_layout, set_memory_nx); + frob_writable_data(&mod->core_layout, set_memory_nx); + frob_rodata(&mod->init_layout, set_memory_nx); + frob_writable_data(&mod->init_layout, set_memory_nx); +} + +static void module_disable_nx(const struct module *mod) +{ + frob_rodata(&mod->core_layout, set_memory_x); + frob_writable_data(&mod->core_layout, set_memory_x); + frob_rodata(&mod->init_layout, set_memory_x); + frob_writable_data(&mod->init_layout, set_memory_x); } /* Iterate through all modules and set each module's text as RW */ @@ -1937,16 +1929,9 @@ void set_all_modules_text_rw(void) list_for_each_entry_rcu(mod, &modules, list) { if (mod->state == MODULE_STATE_UNFORMED) continue; - if ((mod->core_layout.base) && (mod->core_layout.text_size)) { - set_page_attributes(mod->core_layout.base, - mod->core_layout.base + mod->core_layout.text_size, - set_memory_rw); - } - if ((mod->init_layout.base) && (mod->init_layout.text_size)) { - set_page_attributes(mod->init_layout.base, - mod->init_layout.base + mod->init_layout.text_size, - set_memory_rw); - } + + frob_text(&mod->core_layout, set_memory_rw); + frob_text(&mod->init_layout, set_memory_rw); } mutex_unlock(&module_mutex); } @@ -1960,24 +1945,25 @@ void set_all_modules_text_ro(void) list_for_each_entry_rcu(mod, &modules, list) { if (mod->state == MODULE_STATE_UNFORMED) continue; - if ((mod->core_layout.base) && (mod->core_layout.text_size)) { - set_page_attributes(mod->core_layout.base, - mod->core_layout.base + mod->core_layout.text_size, - set_memory_ro); - } - if ((mod->init_layout.base) && (mod->init_layout.text_size)) { - set_page_attributes(mod->init_layout.base, - mod->init_layout.base + mod->init_layout.text_size, - set_memory_ro); - } + + frob_text(&mod->core_layout, set_memory_ro); + frob_text(&mod->init_layout, set_memory_ro); } mutex_unlock(&module_mutex); } + +static void disable_ro_nx(const struct module_layout *layout) +{ + frob_text(layout, set_memory_rw); + frob_rodata(layout, set_memory_rw); + frob_rodata(layout, set_memory_x); + frob_writable_data(layout, set_memory_x); +} + #else -static void set_module_core_ro_nx(struct module *mod) { } -static void set_module_init_ro_nx(struct module *mod) { } -static void unset_module_core_ro_nx(struct module *mod) { } -static void unset_module_init_ro_nx(struct module *mod) { } +static void disable_ro_nx(const struct module_layout *layout) { } +static void module_enable_nx(const struct module *mod) { } +static void module_disable_nx(const struct module *mod) { } #endif void __weak module_memfree(void *module_region) @@ -2029,8 +2015,8 @@ static void free_module(struct module *mod) synchronize_sched(); mutex_unlock(&module_mutex); - /* This may be NULL, but that's OK */ - unset_module_init_ro_nx(mod); + /* This may be empty, but that's OK */ + disable_ro_nx(&mod->init_layout); module_arch_freeing_init(mod); module_memfree(mod->init_layout.base); kfree(mod->args); @@ -2040,7 +2026,7 @@ static void free_module(struct module *mod) lockdep_free_key_range(mod->core_layout.base, mod->core_layout.size); /* Finally, free the core (containing the module structure) */ - unset_module_core_ro_nx(mod); + disable_ro_nx(&mod->core_layout); module_memfree(mod->core_layout.base); #ifdef CONFIG_MPU @@ -3275,7 +3261,7 @@ static noinline int do_init_module(struct module *mod) mod->strtab = mod->core_strtab; #endif mod_tree_remove_init(mod); - unset_module_init_ro_nx(mod); + disable_ro_nx(&mod->init_layout); module_arch_freeing_init(mod); mod->init_layout.base = NULL; mod->init_layout.size = 0; @@ -3370,8 +3356,8 @@ static int complete_formation(struct module *mod, struct load_info *info) module_bug_finalize(info->hdr, info->sechdrs, mod); /* Set RO and NX regions */ - set_module_init_ro_nx(mod); - set_module_core_ro_nx(mod); + module_enable_ro(mod); + module_enable_nx(mod); /* Mark state as coming so strong_try_module_get() ignores us, * but kallsyms etc. can see us. */ @@ -3536,8 +3522,8 @@ static int load_module(struct load_info *info, const char __user *uargs, MODULE_STATE_GOING, mod); /* we can't deallocate the module until we clear memory protection */ - unset_module_init_ro_nx(mod); - unset_module_core_ro_nx(mod); + module_disable_ro(mod); + module_disable_nx(mod); ddebug_cleanup: dynamic_debug_remove(info->debug); From e0224418516b4d8a6c2160574bac18447c354ef0 Mon Sep 17 00:00:00 2001 From: Miroslav Benes Date: Thu, 26 Nov 2015 13:18:06 +1030 Subject: [PATCH 291/291] module: keep percpu symbols in module's symtab Currently, percpu symbols from .data..percpu ELF section of a module are not copied over and stored in final symtab array of struct module. Consequently such symbol cannot be returned via kallsyms API (for example kallsyms_lookup_name). This can be especially confusing when the percpu symbol is exported. Only its __ksymtab et al. are present in its symtab. The culprit is in layout_and_allocate() function where SHF_ALLOC flag is dropped for .data..percpu section. There is in fact no need to copy the section to final struct module, because kernel module loader allocates extra percpu section by itself. Unfortunately only symbols from SHF_ALLOC sections are copied due to a check in is_core_symbol(). The patch changes is_core_symbol() function to copy over also percpu symbols (their st_shndx points to .data..percpu ELF section). We do it only if CONFIG_KALLSYMS_ALL is set to be consistent with the rest of the function (ELF section is SHF_ALLOC but !SHF_EXECINSTR). Finally elf_type() returns type 'a' for a percpu symbol because its address is absolute. Signed-off-by: Miroslav Benes Signed-off-by: Rusty Russell Signed-off-by: Jiri Kosina --- kernel/module.c | 15 +++++++++++---- 1 file changed, 11 insertions(+), 4 deletions(-) diff --git a/kernel/module.c b/kernel/module.c index 77212128f34a..912e891e0e2f 100644 --- a/kernel/module.c +++ b/kernel/module.c @@ -2383,7 +2383,7 @@ static char elf_type(const Elf_Sym *sym, const struct load_info *info) } if (sym->st_shndx == SHN_UNDEF) return 'U'; - if (sym->st_shndx == SHN_ABS) + if (sym->st_shndx == SHN_ABS || sym->st_shndx == info->index.pcpu) return 'a'; if (sym->st_shndx >= SHN_LORESERVE) return '?'; @@ -2412,7 +2412,7 @@ static char elf_type(const Elf_Sym *sym, const struct load_info *info) } static bool is_core_symbol(const Elf_Sym *src, const Elf_Shdr *sechdrs, - unsigned int shnum) + unsigned int shnum, unsigned int pcpundx) { const Elf_Shdr *sec; @@ -2421,6 +2421,11 @@ static bool is_core_symbol(const Elf_Sym *src, const Elf_Shdr *sechdrs, || !src->st_name) return false; +#ifdef CONFIG_KALLSYMS_ALL + if (src->st_shndx == pcpundx) + return true; +#endif + sec = sechdrs + src->st_shndx; if (!(sec->sh_flags & SHF_ALLOC) #ifndef CONFIG_KALLSYMS_ALL @@ -2458,7 +2463,8 @@ static void layout_symtab(struct module *mod, struct load_info *info) /* Compute total space required for the core symbols' strtab. */ for (ndst = i = 0; i < nsrc; i++) { if (i == 0 || - is_core_symbol(src+i, info->sechdrs, info->hdr->e_shnum)) { + is_core_symbol(src+i, info->sechdrs, info->hdr->e_shnum, + info->index.pcpu)) { strtab_size += strlen(&info->strtab[src[i].st_name])+1; ndst++; } @@ -2500,7 +2506,8 @@ static void add_kallsyms(struct module *mod, const struct load_info *info) src = mod->symtab; for (ndst = i = 0; i < mod->num_symtab; i++) { if (i == 0 || - is_core_symbol(src+i, info->sechdrs, info->hdr->e_shnum)) { + is_core_symbol(src+i, info->sechdrs, info->hdr->e_shnum, + info->index.pcpu)) { dst[ndst] = src[i]; dst[ndst++].st_name = s - mod->core_strtab; s += strlcpy(s, &mod->strtab[src[i].st_name],