genirq/irqchip fixes for 4.8-rc4

- A critical fix for chained irqchip where we failed to configure
   the cascade interrupt trigger
 - A GIC fix for self-IPI in SMP-on-UP configurations
 - A PM fix for GICv3
 - A initialization fix the the GICv3 ITS, triggered by kexec
 -----BEGIN PGP SIGNATURE-----
 Version: GnuPG v1
 
 iQIcBAABAgAGBQJXurN4AAoJECPQ0LrRPXpD9QMQAIqQV4zUVJURbVqalAQu3ysn
 tqrB7ZRd9bV4bvHSUCtsmNomQ1r3aa1gLrq1oRbfZW5Qorn9t86qvsmWSwmt33th
 cPM1/EBPoNWUcCb2KujLzeDklA7dC+VfZLBHd7n2MIPKAhhStNOY6E9el+95oc4m
 r8NXRNhaUMe8l9Ip6G2N2Gtj4ncVLr4Z/q6cIM9C8UkmsXSTfQvDlm2y/yye7bOo
 doBNJd+lrthEic3/UichoGPScKi65BxJDEl/oWZsvb7YHleHWfYfS2Gua2frxFWZ
 mvowObr3zSsiJh/KHPPpbMEezbMr6qY2gq4LsKq8ht9uG6xtU38GFjK0ihZ9EHn2
 iOAjvf0I6UPDl04qvVEp8cpAitNQsqdInOdW/oJ+CbJKPzc+gzE9GSBmcgd8Ut3I
 AeWEpuPnmj0VGHPHnL6LQPCHbqH4xMqxdSLaMlxi+xCbgpZ8yFkvnHhEfypxZA7l
 T1IgRUBYIR7ZTzGLmyD2mRlF6VWJHn1pnhsz4thZy677mON0hvWGvo6bbbEows63
 cZMtUL9S9kZW874RG6RLhhHWHt2uwZnNXVmHiOc1+YDs133LJnMLYDxYIj4/S24P
 aRtWVVa6IuPNbA3RJUmr0EpOAWiwW2lowV76cBSybmKadJeVI/dW7m2GsLRjO0id
 FMBX3nWl3ShaWBJ3nrjQ
 =vmKj
 -----END PGP SIGNATURE-----

Merge tag 'irqchip-for-4.8-rc4' of git://git.kernel.org/pub/scm/linux/kernel/git/maz/arm-platforms into irq/urgent

Pull genirq/irqchip fixes for 4.8-rc4 from Marc Zygnier

- A critical fix for chained irqchip where we failed to configure
  the cascade interrupt trigger
- A GIC fix for self-IPI in SMP-on-UP configurations
- A PM fix for GICv3
- A initialization fix the the GICv3 ITS, triggered by kexec
This commit is contained in:
Thomas Gleixner 2016-08-22 10:34:13 +02:00
commit d16c995fe4
246 changed files with 2445 additions and 1201 deletions

View File

@ -14,6 +14,12 @@ add_random (RW)
This file allows to turn off the disk entropy contribution. Default
value of this file is '1'(on).
dax (RO)
--------
This file indicates whether the device supports Direct Access (DAX),
used by CPU-addressable storage to bypass the pagecache. It shows '1'
if true, '0' if not.
discard_granularity (RO)
-----------------------
This shows the size of internal allocation of the device in bytes, if
@ -46,6 +52,12 @@ hw_sector_size (RO)
-------------------
This is the hardware sector size of the device, in bytes.
io_poll (RW)
------------
When read, this file shows the total number of block IO polls and how
many returned success. Writing '0' to this file will disable polling
for this device. Writing any non-zero value will enable this feature.
iostats (RW)
-------------
This file is used to control (on/off) the iostats accounting of the
@ -151,5 +163,11 @@ device state. This means that it might not be safe to toggle the
setting from "write back" to "write through", since that will also
eliminate cache flushes issued by the kernel.
write_same_max_bytes (RO)
-------------------------
This is the number of bytes the device can write in a single write-same
command. A value of '0' means write-same is not supported by this
device.
Jens Axboe <jens.axboe@oracle.com>, February 2009

View File

@ -1004,6 +1004,7 @@ N: meson
ARM/Annapurna Labs ALPINE ARCHITECTURE
M: Tsahee Zidenberg <tsahee@annapurnalabs.com>
M: Antoine Tenart <antoine.tenart@free-electrons.com>
L: linux-arm-kernel@lists.infradead.org (moderated for non-subscribers)
S: Maintained
F: arch/arm/mach-alpine/
F: arch/arm/boot/dts/alpine*

View File

@ -1,7 +1,7 @@
VERSION = 4
PATCHLEVEL = 8
SUBLEVEL = 0
EXTRAVERSION = -rc1
EXTRAVERSION = -rc2
NAME = Psychotic Stoned Sheep
# *DOCUMENTATION*
@ -635,13 +635,6 @@ endif
# Tell gcc to never replace conditional load with a non-conditional one
KBUILD_CFLAGS += $(call cc-option,--param=allow-store-data-races=0)
PHONY += gcc-plugins
gcc-plugins: scripts_basic
ifdef CONFIG_GCC_PLUGINS
$(Q)$(MAKE) $(build)=scripts/gcc-plugins
endif
@:
include scripts/Makefile.gcc-plugins
ifdef CONFIG_READABLE_ASM

View File

@ -260,12 +260,14 @@ machdirs := $(patsubst %,arch/arm/mach-%/,$(machine-y))
platdirs := $(patsubst %,arch/arm/plat-%/,$(sort $(plat-y)))
ifneq ($(CONFIG_ARCH_MULTIPLATFORM),y)
ifneq ($(CONFIG_ARM_SINGLE_ARMV7M),y)
ifeq ($(KBUILD_SRC),)
KBUILD_CPPFLAGS += $(patsubst %,-I%include,$(machdirs) $(platdirs))
else
KBUILD_CPPFLAGS += $(patsubst %,-I$(srctree)/%include,$(machdirs) $(platdirs))
endif
endif
endif
export TEXT_OFFSET GZFLAGS MMUEXT

View File

@ -70,13 +70,12 @@
* associativity as these may be erroneously set
* up by boot loader(s).
*/
cache-size = <1048576>; // 1MB
cache-sets = <4096>;
cache-size = <131072>; // 128KB
cache-sets = <512>;
cache-line-size = <32>;
arm,parity-disable;
arm,tag-latency = <1>;
arm,data-latency = <1 1>;
arm,dirty-latency = <1>;
arm,tag-latency = <1 1 1>;
arm,data-latency = <1 1 1>;
};
scu: scu@1f000000 {

View File

@ -42,7 +42,7 @@
};
syscon {
compatible = "arm,integrator-ap-syscon";
compatible = "arm,integrator-ap-syscon", "syscon";
reg = <0x11000000 0x100>;
interrupt-parent = <&pic>;
/* These are the logical module IRQs */

View File

@ -94,7 +94,7 @@
};
syscon {
compatible = "arm,integrator-cp-syscon";
compatible = "arm,integrator-cp-syscon", "syscon";
reg = <0xcb000000 0x100>;
};

View File

@ -70,14 +70,6 @@
cpu_on = <0x84000003>;
};
psci {
compatible = "arm,psci";
method = "smc";
cpu_suspend = <0x84000001>;
cpu_off = <0x84000002>;
cpu_on = <0x84000003>;
};
soc {
#address-cells = <1>;
#size-cells = <1>;

View File

@ -1382,7 +1382,7 @@
* Pin 41: BR_UART1_TXD
* Pin 44: BR_UART1_RXD
*/
serial@70006000 {
serial@0,70006000 {
compatible = "nvidia,tegra124-hsuart", "nvidia,tegra30-hsuart";
status = "okay";
};
@ -1394,7 +1394,7 @@
* Pin 71: UART2_CTS_L
* Pin 74: UART2_RTS_L
*/
serial@70006040 {
serial@0,70006040 {
compatible = "nvidia,tegra124-hsuart", "nvidia,tegra30-hsuart";
status = "okay";
};

View File

@ -58,7 +58,7 @@ CONFIG_SERIAL_OF_PLATFORM=y
# CONFIG_IOMMU_SUPPORT is not set
CONFIG_FIRMWARE_MEMMAP=y
CONFIG_FANOTIFY=y
CONFIG_PRINTK_TIME=1
CONFIG_PRINTK_TIME=y
CONFIG_DYNAMIC_DEBUG=y
CONFIG_STRIP_ASM_SYMS=y
CONFIG_PAGE_POISONING=y

View File

@ -59,7 +59,7 @@ CONFIG_SERIAL_OF_PLATFORM=y
# CONFIG_IOMMU_SUPPORT is not set
CONFIG_FIRMWARE_MEMMAP=y
CONFIG_FANOTIFY=y
CONFIG_PRINTK_TIME=1
CONFIG_PRINTK_TIME=y
CONFIG_DYNAMIC_DEBUG=y
CONFIG_STRIP_ASM_SYMS=y
CONFIG_PAGE_POISONING=y

View File

@ -279,8 +279,12 @@ asmlinkage long sys_oabi_epoll_wait(int epfd,
mm_segment_t fs;
long ret, err, i;
if (maxevents <= 0 || maxevents > (INT_MAX/sizeof(struct epoll_event)))
if (maxevents <= 0 ||
maxevents > (INT_MAX/sizeof(*kbuf)) ||
maxevents > (INT_MAX/sizeof(*events)))
return -EINVAL;
if (!access_ok(VERIFY_WRITE, events, sizeof(*events) * maxevents))
return -EFAULT;
kbuf = kmalloc(sizeof(*kbuf) * maxevents, GFP_KERNEL);
if (!kbuf)
return -ENOMEM;
@ -317,6 +321,8 @@ asmlinkage long sys_oabi_semtimedop(int semid,
if (nsops < 1 || nsops > SEMOPM)
return -EINVAL;
if (!access_ok(VERIFY_READ, tsops, sizeof(*tsops) * nsops))
return -EFAULT;
sops = kmalloc(sizeof(*sops) * nsops, GFP_KERNEL);
if (!sops)
return -ENOMEM;

View File

@ -1009,9 +1009,13 @@ long kvm_arch_vm_ioctl(struct file *filp,
switch (ioctl) {
case KVM_CREATE_IRQCHIP: {
int ret;
if (!vgic_present)
return -ENXIO;
return kvm_vgic_create(kvm, KVM_DEV_TYPE_ARM_VGIC_V2);
mutex_lock(&kvm->lock);
ret = kvm_vgic_create(kvm, KVM_DEV_TYPE_ARM_VGIC_V2);
mutex_unlock(&kvm->lock);
return ret;
}
case KVM_ARM_SET_DEVICE_ADDR: {
struct kvm_arm_device_addr dev_addr;

View File

@ -1,13 +1,13 @@
menuconfig ARCH_CLPS711X
bool "Cirrus Logic EP721x/EP731x-based"
depends on ARCH_MULTI_V4T
select ARCH_REQUIRE_GPIOLIB
select AUTO_ZRELADDR
select CLKSRC_OF
select CLPS711X_TIMER
select COMMON_CLK
select CPU_ARM720T
select GENERIC_CLOCKEVENTS
select GPIOLIB
select MFD_SYSCON
select OF_IRQ
select USE_OF

View File

@ -1,5 +1,4 @@
ccflags-$(CONFIG_ARCH_MULTIPLATFORM) := -I$(srctree)/$(src)/include \
-I$(srctree)/arch/arm/plat-orion/include
ccflags-$(CONFIG_ARCH_MULTIPLATFORM) := -I$(srctree)/arch/arm/plat-orion/include
AFLAGS_coherency_ll.o := -Wa,-march=armv7-a
CFLAGS_pmsu.o := -march=armv7-a

View File

@ -11,11 +11,13 @@ if ARCH_OXNAS
config MACH_OX810SE
bool "Support OX810SE Based Products"
select ARCH_HAS_RESET_CONTROLLER
select COMMON_CLK_OXNAS
select CPU_ARM926T
select MFD_SYSCON
select OXNAS_RPS_TIMER
select PINCTRL_OXNAS
select RESET_CONTROLLER
select RESET_OXNAS
select VERSATILE_FPGA_IRQ
help

View File

@ -13,6 +13,7 @@
*/
#include <linux/kernel.h>
#include <linux/module.h> /* symbol_get ; symbol_put */
#include <linux/init.h>
#include <linux/platform_device.h>
#include <linux/major.h>

View File

@ -13,6 +13,7 @@
*/
#include <linux/kernel.h>
#include <linux/module.h> /* symbol_get ; symbol_put */
#include <linux/platform_device.h>
#include <linux/delay.h>
#include <linux/gpio_keys.h>

View File

@ -1,8 +1,7 @@
#
# Makefile for the linux kernel.
#
ccflags-$(CONFIG_ARCH_MULTIPLATFORM) := -I$(srctree)/$(src)/include \
-I$(srctree)/arch/arm/plat-versatile/include
ccflags-$(CONFIG_ARCH_MULTIPLATFORM) := -I$(srctree)/arch/arm/plat-versatile/include
obj-y := core.o
obj-$(CONFIG_REALVIEW_DT) += realview-dt.o

View File

@ -5,7 +5,7 @@
#
# Licensed under GPLv2
ccflags-$(CONFIG_ARCH_MULTIPLATFORM) += -I$(srctree)/$(src)/include -I$(srctree)/arch/arm/plat-samsung/include
ccflags-$(CONFIG_ARCH_MULTIPLATFORM) += -I$(srctree)/arch/arm/plat-samsung/include
# Core

View File

@ -40,5 +40,8 @@ bool shmobile_smp_cpu_can_disable(unsigned int cpu)
bool __init shmobile_smp_init_fallback_ops(void)
{
/* fallback on PSCI/smp_ops if no other DT based method is detected */
if (!IS_ENABLED(CONFIG_SMP))
return false;
return platform_can_secondary_boot() ? true : false;
}

View File

@ -8,7 +8,7 @@ config ARCH_SUNXI
config ARCH_ALPINE
bool "Annapurna Labs Alpine platform"
select ALPINE_MSI
select ALPINE_MSI if PCI
help
This enables support for the Annapurna Labs Alpine
Soc family.
@ -66,7 +66,7 @@ config ARCH_LG1K
config ARCH_HISI
bool "Hisilicon SoC Family"
select ARM_TIMER_SP804
select HISILICON_IRQ_MBIGEN
select HISILICON_IRQ_MBIGEN if PCI
help
This enables support for Hisilicon ARMv8 SoC family

View File

@ -12,6 +12,7 @@
/dts-v1/;
#include "exynos7.dtsi"
#include <dt-bindings/interrupt-controller/irq.h>
#include <dt-bindings/clock/samsung,s2mps11.h>
/ {
model = "Samsung Exynos7 Espresso board based on EXYNOS7";
@ -43,6 +44,8 @@
&rtc {
status = "okay";
clocks = <&clock_ccore PCLK_RTC>, <&s2mps15_osc S2MPS11_CLK_AP>;
clock-names = "rtc", "rtc_src";
};
&watchdog {

View File

@ -1,4 +1,3 @@
# CONFIG_LOCALVERSION_AUTO is not set
CONFIG_SYSVIPC=y
CONFIG_POSIX_MQUEUE=y
CONFIG_AUDIT=y
@ -15,10 +14,14 @@ CONFIG_IKCONFIG_PROC=y
CONFIG_LOG_BUF_SHIFT=14
CONFIG_MEMCG=y
CONFIG_MEMCG_SWAP=y
CONFIG_BLK_CGROUP=y
CONFIG_CGROUP_PIDS=y
CONFIG_CGROUP_HUGETLB=y
# CONFIG_UTS_NS is not set
# CONFIG_IPC_NS is not set
# CONFIG_NET_NS is not set
CONFIG_CPUSETS=y
CONFIG_CGROUP_DEVICE=y
CONFIG_CGROUP_CPUACCT=y
CONFIG_CGROUP_PERF=y
CONFIG_USER_NS=y
CONFIG_SCHED_AUTOGROUP=y
CONFIG_BLK_DEV_INITRD=y
CONFIG_KALLSYMS_ALL=y
@ -71,6 +74,7 @@ CONFIG_PREEMPT=y
CONFIG_KSM=y
CONFIG_TRANSPARENT_HUGEPAGE=y
CONFIG_CMA=y
CONFIG_SECCOMP=y
CONFIG_XEN=y
CONFIG_KEXEC=y
# CONFIG_CORE_DUMP_DEFAULT_ELF_HEADERS is not set
@ -84,10 +88,37 @@ CONFIG_NET=y
CONFIG_PACKET=y
CONFIG_UNIX=y
CONFIG_INET=y
CONFIG_IP_MULTICAST=y
CONFIG_IP_PNP=y
CONFIG_IP_PNP_DHCP=y
CONFIG_IP_PNP_BOOTP=y
# CONFIG_IPV6 is not set
CONFIG_IPV6=m
CONFIG_NETFILTER=y
CONFIG_NF_CONNTRACK=m
CONFIG_NF_CONNTRACK_EVENTS=y
CONFIG_NETFILTER_XT_TARGET_CHECKSUM=m
CONFIG_NETFILTER_XT_TARGET_LOG=m
CONFIG_NETFILTER_XT_MATCH_ADDRTYPE=m
CONFIG_NETFILTER_XT_MATCH_CONNTRACK=m
CONFIG_NF_CONNTRACK_IPV4=m
CONFIG_IP_NF_IPTABLES=m
CONFIG_IP_NF_FILTER=m
CONFIG_IP_NF_TARGET_REJECT=m
CONFIG_IP_NF_NAT=m
CONFIG_IP_NF_TARGET_MASQUERADE=m
CONFIG_IP_NF_MANGLE=m
CONFIG_NF_CONNTRACK_IPV6=m
CONFIG_IP6_NF_IPTABLES=m
CONFIG_IP6_NF_FILTER=m
CONFIG_IP6_NF_TARGET_REJECT=m
CONFIG_IP6_NF_MANGLE=m
CONFIG_IP6_NF_NAT=m
CONFIG_IP6_NF_TARGET_MASQUERADE=m
CONFIG_BRIDGE=m
CONFIG_BRIDGE_VLAN_FILTERING=y
CONFIG_VLAN_8021Q=m
CONFIG_VLAN_8021Q_GVRP=y
CONFIG_VLAN_8021Q_MVRP=y
CONFIG_BPF_JIT=y
CONFIG_CFG80211=m
CONFIG_MAC80211=m
@ -103,6 +134,7 @@ CONFIG_MTD=y
CONFIG_MTD_M25P80=y
CONFIG_MTD_SPI_NOR=y
CONFIG_BLK_DEV_LOOP=y
CONFIG_BLK_DEV_NBD=m
CONFIG_VIRTIO_BLK=y
CONFIG_SRAM=y
# CONFIG_SCSI_PROC_FS is not set
@ -120,7 +152,10 @@ CONFIG_SATA_SIL24=y
CONFIG_PATA_PLATFORM=y
CONFIG_PATA_OF_PLATFORM=y
CONFIG_NETDEVICES=y
CONFIG_MACVLAN=m
CONFIG_MACVTAP=m
CONFIG_TUN=y
CONFIG_VETH=m
CONFIG_VIRTIO_NET=y
CONFIG_AMD_XGBE=y
CONFIG_NET_XGENE=y
@ -350,12 +385,16 @@ CONFIG_EXYNOS_ADC=y
CONFIG_PWM_SAMSUNG=y
CONFIG_EXT2_FS=y
CONFIG_EXT3_FS=y
CONFIG_EXT4_FS_POSIX_ACL=y
CONFIG_BTRFS_FS=m
CONFIG_BTRFS_FS_POSIX_ACL=y
CONFIG_FANOTIFY=y
CONFIG_FANOTIFY_ACCESS_PERMISSIONS=y
CONFIG_QUOTA=y
CONFIG_AUTOFS4_FS=y
CONFIG_FUSE_FS=y
CONFIG_CUSE=y
CONFIG_FUSE_FS=m
CONFIG_CUSE=m
CONFIG_OVERLAY_FS=m
CONFIG_VFAT_FS=y
CONFIG_TMPFS=y
CONFIG_HUGETLBFS=y

View File

@ -22,7 +22,6 @@
#define __ARCH_WANT_KPROBES_INSN_SLOT
#define MAX_INSN_SIZE 1
#define MAX_STACK_SIZE 128
#define flush_insn_slot(p) do { } while (0)
#define kretprobe_blacklist_size 0
@ -47,7 +46,6 @@ struct kprobe_ctlblk {
struct prev_kprobe prev_kprobe;
struct kprobe_step_ctx ss_ctx;
struct pt_regs jprobe_saved_regs;
char jprobes_stack[MAX_STACK_SIZE];
};
void arch_remove_kprobe(struct kprobe *);

View File

@ -353,6 +353,8 @@ el1_sync:
lsr x24, x1, #ESR_ELx_EC_SHIFT // exception class
cmp x24, #ESR_ELx_EC_DABT_CUR // data abort in EL1
b.eq el1_da
cmp x24, #ESR_ELx_EC_IABT_CUR // instruction abort in EL1
b.eq el1_ia
cmp x24, #ESR_ELx_EC_SYS64 // configurable trap
b.eq el1_undef
cmp x24, #ESR_ELx_EC_SP_ALIGN // stack alignment exception
@ -364,6 +366,11 @@ el1_sync:
cmp x24, #ESR_ELx_EC_BREAKPT_CUR // debug exception in EL1
b.ge el1_dbg
b el1_inv
el1_ia:
/*
* Fall through to the Data abort case
*/
el1_da:
/*
* Data abort handling

View File

@ -35,6 +35,7 @@
#include <asm/sections.h>
#include <asm/smp.h>
#include <asm/suspend.h>
#include <asm/sysreg.h>
#include <asm/virt.h>
/*
@ -217,12 +218,22 @@ static int create_safe_exec_page(void *src_start, size_t length,
set_pte(pte, __pte(virt_to_phys((void *)dst) |
pgprot_val(PAGE_KERNEL_EXEC)));
/* Load our new page tables */
asm volatile("msr ttbr0_el1, %0;"
"isb;"
"tlbi vmalle1is;"
"dsb ish;"
"isb" : : "r"(virt_to_phys(pgd)));
/*
* Load our new page tables. A strict BBM approach requires that we
* ensure that TLBs are free of any entries that may overlap with the
* global mappings we are about to install.
*
* For a real hibernate/resume cycle TTBR0 currently points to a zero
* page, but TLBs may contain stale ASID-tagged entries (e.g. for EFI
* runtime services), while for a userspace-driven test_resume cycle it
* points to userspace page tables (and we must point it at a zero page
* ourselves). Elsewhere we only (un)install the idmap with preemption
* disabled, so T0SZ should be as required regardless.
*/
cpu_set_reserved_ttbr0();
local_flush_tlb_all();
write_sysreg(virt_to_phys(pgd), ttbr0_el1);
isb();
*phys_dst_addr = virt_to_phys((void *)dst);
@ -393,6 +404,38 @@ int swsusp_arch_resume(void)
void __noreturn (*hibernate_exit)(phys_addr_t, phys_addr_t, void *,
void *, phys_addr_t, phys_addr_t);
/*
* Restoring the memory image will overwrite the ttbr1 page tables.
* Create a second copy of just the linear map, and use this when
* restoring.
*/
tmp_pg_dir = (pgd_t *)get_safe_page(GFP_ATOMIC);
if (!tmp_pg_dir) {
pr_err("Failed to allocate memory for temporary page tables.");
rc = -ENOMEM;
goto out;
}
rc = copy_page_tables(tmp_pg_dir, PAGE_OFFSET, 0);
if (rc)
goto out;
/*
* Since we only copied the linear map, we need to find restore_pblist's
* linear map address.
*/
lm_restore_pblist = LMADDR(restore_pblist);
/*
* We need a zero page that is zero before & after resume in order to
* to break before make on the ttbr1 page tables.
*/
zero_page = (void *)get_safe_page(GFP_ATOMIC);
if (!zero_page) {
pr_err("Failed to allocate zero page.");
rc = -ENOMEM;
goto out;
}
/*
* Locate the exit code in the bottom-but-one page, so that *NULL
* still has disastrous affects.
@ -418,27 +461,6 @@ int swsusp_arch_resume(void)
*/
__flush_dcache_area(hibernate_exit, exit_size);
/*
* Restoring the memory image will overwrite the ttbr1 page tables.
* Create a second copy of just the linear map, and use this when
* restoring.
*/
tmp_pg_dir = (pgd_t *)get_safe_page(GFP_ATOMIC);
if (!tmp_pg_dir) {
pr_err("Failed to allocate memory for temporary page tables.");
rc = -ENOMEM;
goto out;
}
rc = copy_page_tables(tmp_pg_dir, PAGE_OFFSET, 0);
if (rc)
goto out;
/*
* Since we only copied the linear map, we need to find restore_pblist's
* linear map address.
*/
lm_restore_pblist = LMADDR(restore_pblist);
/*
* KASLR will cause the el2 vectors to be in a different location in
* the resumed kernel. Load hibernate's temporary copy into el2.
@ -453,12 +475,6 @@ int swsusp_arch_resume(void)
__hyp_set_vectors(el2_vectors);
}
/*
* We need a zero page that is zero before & after resume in order to
* to break before make on the ttbr1 page tables.
*/
zero_page = (void *)get_safe_page(GFP_ATOMIC);
hibernate_exit(virt_to_phys(tmp_pg_dir), resume_hdr.ttbr1_el1,
resume_hdr.reenter_kernel, lm_restore_pblist,
resume_hdr.__hyp_stub_vectors, virt_to_phys(zero_page));

View File

@ -41,18 +41,6 @@ DEFINE_PER_CPU(struct kprobe_ctlblk, kprobe_ctlblk);
static void __kprobes
post_kprobe_handler(struct kprobe_ctlblk *, struct pt_regs *);
static inline unsigned long min_stack_size(unsigned long addr)
{
unsigned long size;
if (on_irq_stack(addr, raw_smp_processor_id()))
size = IRQ_STACK_PTR(raw_smp_processor_id()) - addr;
else
size = (unsigned long)current_thread_info() + THREAD_START_SP - addr;
return min(size, FIELD_SIZEOF(struct kprobe_ctlblk, jprobes_stack));
}
static void __kprobes arch_prepare_ss_slot(struct kprobe *p)
{
/* prepare insn slot */
@ -489,20 +477,15 @@ int __kprobes setjmp_pre_handler(struct kprobe *p, struct pt_regs *regs)
{
struct jprobe *jp = container_of(p, struct jprobe, kp);
struct kprobe_ctlblk *kcb = get_kprobe_ctlblk();
long stack_ptr = kernel_stack_pointer(regs);
kcb->jprobe_saved_regs = *regs;
/*
* As Linus pointed out, gcc assumes that the callee
* owns the argument space and could overwrite it, e.g.
* tailcall optimization. So, to be absolutely safe
* we also save and restore enough stack bytes to cover
* the argument area.
* Since we can't be sure where in the stack frame "stacked"
* pass-by-value arguments are stored we just don't try to
* duplicate any of the stack. Do not use jprobes on functions that
* use more than 64 bytes (after padding each to an 8 byte boundary)
* of arguments, or pass individual arguments larger than 16 bytes.
*/
kasan_disable_current();
memcpy(kcb->jprobes_stack, (void *)stack_ptr,
min_stack_size(stack_ptr));
kasan_enable_current();
instruction_pointer_set(regs, (unsigned long) jp->entry);
preempt_disable();
@ -554,10 +537,6 @@ int __kprobes longjmp_break_handler(struct kprobe *p, struct pt_regs *regs)
}
unpause_graph_tracing();
*regs = kcb->jprobe_saved_regs;
kasan_disable_current();
memcpy((void *)stack_addr, kcb->jprobes_stack,
min_stack_size(stack_addr));
kasan_enable_current();
preempt_enable_no_resched();
return 1;
}

View File

@ -661,9 +661,9 @@ void __init smp_init_cpus(void)
acpi_table_parse_madt(ACPI_MADT_TYPE_GENERIC_INTERRUPT,
acpi_parse_gic_cpu_interface, 0);
if (cpu_count > NR_CPUS)
pr_warn("no. of cores (%d) greater than configured maximum of %d - clipping\n",
cpu_count, NR_CPUS);
if (cpu_count > nr_cpu_ids)
pr_warn("Number of cores (%d) exceeds configured maximum of %d - clipping\n",
cpu_count, nr_cpu_ids);
if (!bootcpu_valid) {
pr_err("missing boot CPU MPIDR, not enabling secondaries\n");
@ -677,7 +677,7 @@ void __init smp_init_cpus(void)
* with entries in cpu_logical_map while initializing the cpus.
* If the cpu set-up fails, invalidate the cpu_logical_map entry.
*/
for (i = 1; i < NR_CPUS; i++) {
for (i = 1; i < nr_cpu_ids; i++) {
if (cpu_logical_map(i) != INVALID_HWID) {
if (smp_cpu_setup(i))
cpu_logical_map(i) = INVALID_HWID;

View File

@ -153,6 +153,11 @@ int ptep_set_access_flags(struct vm_area_struct *vma,
}
#endif
static bool is_el1_instruction_abort(unsigned int esr)
{
return ESR_ELx_EC(esr) == ESR_ELx_EC_IABT_CUR;
}
/*
* The kernel tried to access some page that wasn't present.
*/
@ -161,8 +166,9 @@ static void __do_kernel_fault(struct mm_struct *mm, unsigned long addr,
{
/*
* Are we prepared to handle this kernel fault?
* We are almost certainly not prepared to handle instruction faults.
*/
if (fixup_exception(regs))
if (!is_el1_instruction_abort(esr) && fixup_exception(regs))
return;
/*
@ -267,7 +273,8 @@ static inline bool is_permission_fault(unsigned int esr)
unsigned int ec = ESR_ELx_EC(esr);
unsigned int fsc_type = esr & ESR_ELx_FSC_TYPE;
return (ec == ESR_ELx_EC_DABT_CUR && fsc_type == ESR_ELx_FSC_PERM);
return (ec == ESR_ELx_EC_DABT_CUR && fsc_type == ESR_ELx_FSC_PERM) ||
(ec == ESR_ELx_EC_IABT_CUR && fsc_type == ESR_ELx_FSC_PERM);
}
static bool is_el0_instruction_abort(unsigned int esr)
@ -312,6 +319,9 @@ static int __kprobes do_page_fault(unsigned long addr, unsigned int esr,
if (regs->orig_addr_limit == KERNEL_DS)
die("Accessing user space memory with fs=KERNEL_DS", regs, esr);
if (is_el1_instruction_abort(esr))
die("Attempting to execute userspace memory", regs, esr);
if (!search_exception_tables(regs->pc))
die("Accessing user space memory outside uaccess.h routines", regs, esr);
}

View File

@ -3,6 +3,8 @@
#ifdef __KERNEL__
#include <linux/types.h>
/* H8/300 internal I/O functions */
#define __raw_readb __raw_readb

View File

@ -213,7 +213,6 @@ static inline int frame_extra_sizes(int f)
static inline void adjustformat(struct pt_regs *regs)
{
((struct switch_stack *)regs - 1)->a5 = current->mm->start_data;
/*
* set format byte to make stack appear modulo 4, which it will
* be when doing the rte

View File

@ -390,7 +390,6 @@ void __init mem_init(void)
free_all_bootmem();
mem_init_print_info(NULL);
show_mem(0);
}
void free_initmem(void)

View File

@ -1642,8 +1642,14 @@ enum emulation_result kvm_mips_emulate_cache(union mips_instruction inst,
preempt_disable();
if (KVM_GUEST_KSEGX(va) == KVM_GUEST_KSEG0) {
if (kvm_mips_host_tlb_lookup(vcpu, va) < 0)
kvm_mips_handle_kseg0_tlb_fault(va, vcpu);
if (kvm_mips_host_tlb_lookup(vcpu, va) < 0 &&
kvm_mips_handle_kseg0_tlb_fault(va, vcpu)) {
kvm_err("%s: handling mapped kseg0 tlb fault for %lx, vcpu: %p, ASID: %#lx\n",
__func__, va, vcpu, read_c0_entryhi());
er = EMULATE_FAIL;
preempt_enable();
goto done;
}
} else if ((KVM_GUEST_KSEGX(va) < KVM_GUEST_KSEG0) ||
KVM_GUEST_KSEGX(va) == KVM_GUEST_KSEG23) {
int index;
@ -1680,12 +1686,18 @@ enum emulation_result kvm_mips_emulate_cache(union mips_instruction inst,
run, vcpu);
preempt_enable();
goto dont_update_pc;
} else {
/*
* We fault an entry from the guest tlb to the
* shadow host TLB
*/
kvm_mips_handle_mapped_seg_tlb_fault(vcpu, tlb);
}
/*
* We fault an entry from the guest tlb to the
* shadow host TLB
*/
if (kvm_mips_handle_mapped_seg_tlb_fault(vcpu, tlb)) {
kvm_err("%s: handling mapped seg tlb fault for %lx, index: %u, vcpu: %p, ASID: %#lx\n",
__func__, va, index, vcpu,
read_c0_entryhi());
er = EMULATE_FAIL;
preempt_enable();
goto done;
}
}
} else {
@ -2659,7 +2671,12 @@ enum emulation_result kvm_mips_handle_tlbmiss(u32 cause,
* OK we have a Guest TLB entry, now inject it into the
* shadow host TLB
*/
kvm_mips_handle_mapped_seg_tlb_fault(vcpu, tlb);
if (kvm_mips_handle_mapped_seg_tlb_fault(vcpu, tlb)) {
kvm_err("%s: handling mapped seg tlb fault for %lx, index: %u, vcpu: %p, ASID: %#lx\n",
__func__, va, index, vcpu,
read_c0_entryhi());
er = EMULATE_FAIL;
}
}
}

View File

@ -99,7 +99,7 @@ int kvm_mips_handle_kseg0_tlb_fault(unsigned long badvaddr,
}
gfn = (KVM_GUEST_CPHYSADDR(badvaddr) >> PAGE_SHIFT);
if (gfn >= kvm->arch.guest_pmap_npages) {
if ((gfn | 1) >= kvm->arch.guest_pmap_npages) {
kvm_err("%s: Invalid gfn: %#llx, BadVaddr: %#lx\n", __func__,
gfn, badvaddr);
kvm_mips_dump_host_tlbs();
@ -138,35 +138,49 @@ int kvm_mips_handle_mapped_seg_tlb_fault(struct kvm_vcpu *vcpu,
unsigned long entryhi = 0, entrylo0 = 0, entrylo1 = 0;
struct kvm *kvm = vcpu->kvm;
kvm_pfn_t pfn0, pfn1;
gfn_t gfn0, gfn1;
long tlb_lo[2];
int ret;
if ((tlb->tlb_hi & VPN2_MASK) == 0) {
pfn0 = 0;
pfn1 = 0;
} else {
if (kvm_mips_map_page(kvm, mips3_tlbpfn_to_paddr(tlb->tlb_lo[0])
>> PAGE_SHIFT) < 0)
return -1;
tlb_lo[0] = tlb->tlb_lo[0];
tlb_lo[1] = tlb->tlb_lo[1];
if (kvm_mips_map_page(kvm, mips3_tlbpfn_to_paddr(tlb->tlb_lo[1])
>> PAGE_SHIFT) < 0)
return -1;
/*
* The commpage address must not be mapped to anything else if the guest
* TLB contains entries nearby, or commpage accesses will break.
*/
if (!((tlb->tlb_hi ^ KVM_GUEST_COMMPAGE_ADDR) &
VPN2_MASK & (PAGE_MASK << 1)))
tlb_lo[(KVM_GUEST_COMMPAGE_ADDR >> PAGE_SHIFT) & 1] = 0;
pfn0 = kvm->arch.guest_pmap[
mips3_tlbpfn_to_paddr(tlb->tlb_lo[0]) >> PAGE_SHIFT];
pfn1 = kvm->arch.guest_pmap[
mips3_tlbpfn_to_paddr(tlb->tlb_lo[1]) >> PAGE_SHIFT];
gfn0 = mips3_tlbpfn_to_paddr(tlb_lo[0]) >> PAGE_SHIFT;
gfn1 = mips3_tlbpfn_to_paddr(tlb_lo[1]) >> PAGE_SHIFT;
if (gfn0 >= kvm->arch.guest_pmap_npages ||
gfn1 >= kvm->arch.guest_pmap_npages) {
kvm_err("%s: Invalid gfn: [%#llx, %#llx], EHi: %#lx\n",
__func__, gfn0, gfn1, tlb->tlb_hi);
kvm_mips_dump_guest_tlbs(vcpu);
return -1;
}
if (kvm_mips_map_page(kvm, gfn0) < 0)
return -1;
if (kvm_mips_map_page(kvm, gfn1) < 0)
return -1;
pfn0 = kvm->arch.guest_pmap[gfn0];
pfn1 = kvm->arch.guest_pmap[gfn1];
/* Get attributes from the Guest TLB */
entrylo0 = mips3_paddr_to_tlbpfn(pfn0 << PAGE_SHIFT) |
((_page_cachable_default >> _CACHE_SHIFT) << ENTRYLO_C_SHIFT) |
(tlb->tlb_lo[0] & ENTRYLO_D) |
(tlb->tlb_lo[0] & ENTRYLO_V);
(tlb_lo[0] & ENTRYLO_D) |
(tlb_lo[0] & ENTRYLO_V);
entrylo1 = mips3_paddr_to_tlbpfn(pfn1 << PAGE_SHIFT) |
((_page_cachable_default >> _CACHE_SHIFT) << ENTRYLO_C_SHIFT) |
(tlb->tlb_lo[1] & ENTRYLO_D) |
(tlb->tlb_lo[1] & ENTRYLO_V);
(tlb_lo[1] & ENTRYLO_D) |
(tlb_lo[1] & ENTRYLO_V);
kvm_debug("@ %#lx tlb_lo0: 0x%08lx tlb_lo1: 0x%08lx\n", vcpu->arch.pc,
tlb->tlb_lo[0], tlb->tlb_lo[1]);
@ -354,9 +368,15 @@ u32 kvm_get_inst(u32 *opc, struct kvm_vcpu *vcpu)
local_irq_restore(flags);
return KVM_INVALID_INST;
}
kvm_mips_handle_mapped_seg_tlb_fault(vcpu,
&vcpu->arch.
guest_tlb[index]);
if (kvm_mips_handle_mapped_seg_tlb_fault(vcpu,
&vcpu->arch.guest_tlb[index])) {
kvm_err("%s: handling mapped seg tlb fault failed for %p, index: %u, vcpu: %p, ASID: %#lx\n",
__func__, opc, index, vcpu,
read_c0_entryhi());
kvm_mips_dump_guest_tlbs(vcpu);
local_irq_restore(flags);
return KVM_INVALID_INST;
}
inst = *(opc);
}
local_irq_restore(flags);

View File

@ -66,29 +66,28 @@ endif
UTS_MACHINE := $(OLDARCH)
ifeq ($(CONFIG_CPU_LITTLE_ENDIAN),y)
override CC += -mlittle-endian
ifneq ($(cc-name),clang)
override CC += -mno-strict-align
endif
override AS += -mlittle-endian
override LD += -EL
override CROSS32CC += -mlittle-endian
override CROSS32AS += -mlittle-endian
LDEMULATION := lppc
GNUTARGET := powerpcle
MULTIPLEWORD := -mno-multiple
KBUILD_CFLAGS_MODULE += $(call cc-option,-mno-save-toc-indirect)
else
ifeq ($(call cc-option-yn,-mbig-endian),y)
override CC += -mbig-endian
override AS += -mbig-endian
endif
override LD += -EB
LDEMULATION := ppc
GNUTARGET := powerpc
MULTIPLEWORD := -mmultiple
endif
cflags-$(CONFIG_CPU_BIG_ENDIAN) += $(call cc-option,-mbig-endian)
cflags-$(CONFIG_CPU_LITTLE_ENDIAN) += -mlittle-endian
ifneq ($(cc-name),clang)
cflags-$(CONFIG_CPU_LITTLE_ENDIAN) += -mno-strict-align
endif
aflags-$(CONFIG_CPU_BIG_ENDIAN) += $(call cc-option,-mbig-endian)
aflags-$(CONFIG_CPU_LITTLE_ENDIAN) += -mlittle-endian
ifeq ($(HAS_BIARCH),y)
override AS += -a$(CONFIG_WORD_SIZE)
override LD += -m elf$(CONFIG_WORD_SIZE)$(LDEMULATION)
@ -232,6 +231,9 @@ cpu-as-$(CONFIG_E200) += -Wa,-me200
KBUILD_AFLAGS += $(cpu-as-y)
KBUILD_CFLAGS += $(cpu-as-y)
KBUILD_AFLAGS += $(aflags-y)
KBUILD_CFLAGS += $(cflags-y)
head-y := arch/powerpc/kernel/head_$(CONFIG_WORD_SIZE).o
head-$(CONFIG_8xx) := arch/powerpc/kernel/head_8xx.o
head-$(CONFIG_40x) := arch/powerpc/kernel/head_40x.o

View File

@ -4,6 +4,7 @@
#include <linux/module.h>
#include <linux/string.h>
#include <linux/kernel.h>
#include <linux/cpufeature.h>
#include <asm/switch_to.h>
#define CHKSUM_BLOCK_SIZE 1
@ -157,7 +158,7 @@ static void __exit crc32c_vpmsum_mod_fini(void)
crypto_unregister_shash(&alg);
}
module_init(crc32c_vpmsum_mod_init);
module_cpu_feature_match(PPC_MODULE_FEATURE_VEC_CRYPTO, crc32c_vpmsum_mod_init);
module_exit(crc32c_vpmsum_mod_fini);
MODULE_AUTHOR("Anton Blanchard <anton@samba.org>");

View File

@ -19,4 +19,17 @@ extern u64 pnv_first_deep_stop_state;
#endif
/* Idle state entry routines */
#ifdef CONFIG_PPC_P7_NAP
#define IDLE_STATE_ENTER_SEQ(IDLE_INST) \
/* Magic NAP/SLEEP/WINKLE mode enter sequence */ \
std r0,0(r1); \
ptesync; \
ld r0,0(r1); \
1: cmp cr0,r0,r0; \
bne 1b; \
IDLE_INST; \
b .
#endif /* CONFIG_PPC_P7_NAP */
#endif

View File

@ -186,6 +186,7 @@ label##3: \
#ifndef __ASSEMBLY__
void apply_feature_fixups(void);
void setup_feature_keys(void);
#endif
#endif /* __ASM_POWERPC_FEATURE_FIXUPS_H */

View File

@ -75,14 +75,6 @@ static inline void disable_kernel_spe(void)
static inline void __giveup_spe(struct task_struct *t) { }
#endif
#ifdef CONFIG_PPC_TRANSACTIONAL_MEM
extern void flush_tmregs_to_thread(struct task_struct *);
#else
static inline void flush_tmregs_to_thread(struct task_struct *t)
{
}
#endif
static inline void clear_task_ebb(struct task_struct *t)
{
#ifdef CONFIG_PPC_BOOK3S_64

View File

@ -159,6 +159,8 @@ extern void xics_teardown_cpu(void);
extern void xics_kexec_teardown_cpu(int secondary);
extern void xics_migrate_irqs_away(void);
extern void icp_native_eoi(struct irq_data *d);
extern int xics_set_irq_type(struct irq_data *d, unsigned int flow_type);
extern int xics_retrigger(struct irq_data *data);
#ifdef CONFIG_SMP
extern int xics_get_irq_server(unsigned int virq, const struct cpumask *cpumask,
unsigned int strict_check);

View File

@ -168,10 +168,10 @@ static size_t eeh_dump_dev_log(struct eeh_dev *edev, char *buf, size_t len)
int n = 0, l = 0;
char buffer[128];
n += scnprintf(buf+n, len-n, "%04x:%02x:%02x:%01x\n",
n += scnprintf(buf+n, len-n, "%04x:%02x:%02x.%01x\n",
edev->phb->global_number, pdn->busno,
PCI_SLOT(pdn->devfn), PCI_FUNC(pdn->devfn));
pr_warn("EEH: of node=%04x:%02x:%02x:%01x\n",
pr_warn("EEH: of node=%04x:%02x:%02x.%01x\n",
edev->phb->global_number, pdn->busno,
PCI_SLOT(pdn->devfn), PCI_FUNC(pdn->devfn));

View File

@ -144,29 +144,14 @@ machine_check_pSeries_1:
* vector
*/
SET_SCRATCH0(r13) /* save r13 */
#ifdef CONFIG_PPC_P7_NAP
BEGIN_FTR_SECTION
/* Running native on arch 2.06 or later, check if we are
* waking up from nap. We only handle no state loss and
* supervisor state loss. We do -not- handle hypervisor
* state loss at this time.
/*
* Running native on arch 2.06 or later, we may wakeup from winkle
* inside machine check. If yes, then last bit of HSPGR0 would be set
* to 1. Hence clear it unconditionally.
*/
mfspr r13,SPRN_SRR1
rlwinm. r13,r13,47-31,30,31
OPT_GET_SPR(r13, SPRN_CFAR, CPU_FTR_CFAR)
beq 9f
mfspr r13,SPRN_SRR1
rlwinm. r13,r13,47-31,30,31
/* waking up from powersave (nap) state */
cmpwi cr1,r13,2
/* Total loss of HV state is fatal. let's just stay stuck here */
OPT_GET_SPR(r13, SPRN_CFAR, CPU_FTR_CFAR)
bgt cr1,.
9:
OPT_SET_SPR(r13, SPRN_CFAR, CPU_FTR_CFAR)
END_FTR_SECTION_IFSET(CPU_FTR_HVMODE | CPU_FTR_ARCH_206)
#endif /* CONFIG_PPC_P7_NAP */
GET_PACA(r13)
clrrdi r13,r13,1
SET_PACA(r13)
EXCEPTION_PROLOG_0(PACA_EXMC)
BEGIN_FTR_SECTION
b machine_check_powernv_early
@ -1273,25 +1258,51 @@ machine_check_handle_early:
* Check if thread was in power saving mode. We come here when any
* of the following is true:
* a. thread wasn't in power saving mode
* b. thread was in power saving mode with no state loss or
* supervisor state loss
* b. thread was in power saving mode with no state loss,
* supervisor state loss or hypervisor state loss.
*
* Go back to nap again if (b) is true.
* Go back to nap/sleep/winkle mode again if (b) is true.
*/
rlwinm. r11,r12,47-31,30,31 /* Was it in power saving mode? */
beq 4f /* No, it wasn;t */
/* Thread was in power saving mode. Go back to nap again. */
cmpwi r11,2
bne 3f
/* Supervisor state loss */
blt 3f
/* Supervisor/Hypervisor state loss */
li r0,1
stb r0,PACA_NAPSTATELOST(r13)
3: bl machine_check_queue_event
MACHINE_CHECK_HANDLER_WINDUP
GET_PACA(r13)
ld r1,PACAR1(r13)
li r3,PNV_THREAD_NAP
b pnv_enter_arch207_idle_mode
/*
* Check what idle state this CPU was in and go back to same mode
* again.
*/
lbz r3,PACA_THREAD_IDLE_STATE(r13)
cmpwi r3,PNV_THREAD_NAP
bgt 10f
IDLE_STATE_ENTER_SEQ(PPC_NAP)
/* No return */
10:
cmpwi r3,PNV_THREAD_SLEEP
bgt 2f
IDLE_STATE_ENTER_SEQ(PPC_SLEEP)
/* No return */
2:
/*
* Go back to winkle. Please note that this thread was woken up in
* machine check from winkle and have not restored the per-subcore
* state. Hence before going back to winkle, set last bit of HSPGR0
* to 1. This will make sure that if this thread gets woken up
* again at reset vector 0x100 then it will get chance to restore
* the subcore state.
*/
ori r13,r13,1
SET_PACA(r13)
IDLE_STATE_ENTER_SEQ(PPC_WINKLE)
/* No return */
4:
#endif
/*

View File

@ -44,18 +44,6 @@
PSSCR_PSLL_MASK | PSSCR_TR_MASK | \
PSSCR_MTL_MASK
/* Idle state entry routines */
#define IDLE_STATE_ENTER_SEQ(IDLE_INST) \
/* Magic NAP/SLEEP/WINKLE mode enter sequence */ \
std r0,0(r1); \
ptesync; \
ld r0,0(r1); \
1: cmp cr0,r0,r0; \
bne 1b; \
IDLE_INST; \
b .
.text
/*
@ -363,8 +351,8 @@ _GLOBAL(power9_idle_stop)
* cr3 - set to gt if waking up with partial/complete hypervisor state loss
*/
_GLOBAL(pnv_restore_hyp_resource)
ld r2,PACATOC(r13);
BEGIN_FTR_SECTION
ld r2,PACATOC(r13);
/*
* POWER ISA 3. Use PSSCR to determine if we
* are waking up from deep idle state
@ -395,6 +383,9 @@ END_FTR_SECTION_IFSET(CPU_FTR_ARCH_300)
*/
clrldi r5,r13,63
clrrdi r13,r13,1
/* Now that we are sure r13 is corrected, load TOC */
ld r2,PACATOC(r13);
cmpwi cr4,r5,1
mtspr SPRN_HSPRG0,r13

View File

@ -92,7 +92,8 @@ void save_mce_event(struct pt_regs *regs, long handled,
mce->in_use = 1;
mce->initiator = MCE_INITIATOR_CPU;
if (handled)
/* Mark it recovered if we have handled it and MSR(RI=1). */
if (handled && (regs->msr & MSR_RI))
mce->disposition = MCE_DISPOSITION_RECOVERED;
else
mce->disposition = MCE_DISPOSITION_NOT_RECOVERED;

View File

@ -78,6 +78,7 @@ EXPORT_SYMBOL(get_pci_dma_ops);
static int get_phb_number(struct device_node *dn)
{
int ret, phb_id = -1;
u32 prop_32;
u64 prop;
/*
@ -86,8 +87,10 @@ static int get_phb_number(struct device_node *dn)
* reading "ibm,opal-phbid", only present in OPAL environment.
*/
ret = of_property_read_u64(dn, "ibm,opal-phbid", &prop);
if (ret)
ret = of_property_read_u32_index(dn, "reg", 1, (u32 *)&prop);
if (ret) {
ret = of_property_read_u32_index(dn, "reg", 1, &prop_32);
prop = prop_32;
}
if (!ret)
phb_id = (int)(prop & (MAX_PHBS - 1));

View File

@ -1074,26 +1074,6 @@ static inline void restore_sprs(struct thread_struct *old_thread,
#endif
}
#ifdef CONFIG_PPC_TRANSACTIONAL_MEM
void flush_tmregs_to_thread(struct task_struct *tsk)
{
/*
* Process self tracing is not yet supported through
* ptrace interface. Ptrace generic code should have
* prevented this from happening in the first place.
* Warn once here with the message, if some how it
* is attempted.
*/
WARN_ONCE(tsk == current,
"Not expecting ptrace on self: TM regs may be incorrect\n");
/*
* If task is not current, it should have been flushed
* already to it's thread_struct during __switch_to().
*/
}
#endif
struct task_struct *__switch_to(struct task_struct *prev,
struct task_struct *new)
{

View File

@ -2940,7 +2940,7 @@ unsigned long __init prom_init(unsigned long r3, unsigned long r4,
/* Don't print anything after quiesce under OPAL, it crashes OFW */
if (of_platform != PLATFORM_OPAL) {
prom_printf("Booting Linux via __start() ...\n");
prom_printf("Booting Linux via __start() @ 0x%lx ...\n", kbase);
prom_debug("->dt_header_start=0x%x\n", hdr);
}

View File

@ -38,6 +38,7 @@
#include <asm/page.h>
#include <asm/pgtable.h>
#include <asm/switch_to.h>
#include <asm/tm.h>
#define CREATE_TRACE_POINTS
#include <trace/events/syscalls.h>
@ -118,6 +119,24 @@ static const struct pt_regs_offset regoffset_table[] = {
REG_OFFSET_END,
};
#ifdef CONFIG_PPC_TRANSACTIONAL_MEM
static void flush_tmregs_to_thread(struct task_struct *tsk)
{
/*
* If task is not current, it will have been flushed already to
* it's thread_struct during __switch_to().
*
* A reclaim flushes ALL the state.
*/
if (tsk == current && MSR_TM_SUSPENDED(mfmsr()))
tm_reclaim_current(TM_CAUSE_SIGNAL);
}
#else
static inline void flush_tmregs_to_thread(struct task_struct *tsk) { }
#endif
/**
* regs_query_register_offset() - query register offset from its name
* @name: the name of a register

View File

@ -93,15 +93,16 @@ notrace unsigned long __init early_init(unsigned long dt_ptr)
* and we are running with enough of the MMU enabled to have our
* proper kernel virtual addresses
*
* Find out what kind of machine we're on and save any data we need
* from the early boot process (devtree is copied on pmac by prom_init()).
* This is called very early on the boot process, after a minimal
* MMU environment has been set up but before MMU_init is called.
* We do the initial parsing of the flat device-tree and prepares
* for the MMU to be fully initialized.
*/
extern unsigned int memset_nocache_branch; /* Insn to be replaced by NOP */
notrace void __init machine_init(u64 dt_ptr)
{
/* Configure static keys first, now that we're relocated. */
setup_feature_keys();
/* Enable early debugging if any specified (see udbg.h) */
udbg_early_init();

View File

@ -300,6 +300,7 @@ void __init early_setup(unsigned long dt_ptr)
/* Apply all the dynamic patching */
apply_feature_fixups();
setup_feature_keys();
/* Initialize the hash table or TLB handling */
early_init_mmu();

View File

@ -22,6 +22,7 @@
#include <linux/security.h>
#include <linux/memblock.h>
#include <asm/cpu_has_feature.h>
#include <asm/pgtable.h>
#include <asm/processor.h>
#include <asm/mmu.h>

View File

@ -30,7 +30,7 @@ CPPFLAGS_vdso32.lds += -P -C -Upowerpc
$(obj)/vdso32_wrapper.o : $(obj)/vdso32.so
# link rule for the .so file, .lds has to be first
$(obj)/vdso32.so.dbg: $(src)/vdso32.lds $(obj-vdso32)
$(obj)/vdso32.so.dbg: $(src)/vdso32.lds $(obj-vdso32) FORCE
$(call if_changed,vdso32ld)
# strip rule for the .so file
@ -39,12 +39,12 @@ $(obj)/%.so: $(obj)/%.so.dbg FORCE
$(call if_changed,objcopy)
# assembly rules for the .S files
$(obj-vdso32): %.o: %.S
$(obj-vdso32): %.o: %.S FORCE
$(call if_changed_dep,vdso32as)
# actual build commands
quiet_cmd_vdso32ld = VDSO32L $@
cmd_vdso32ld = $(CROSS32CC) $(c_flags) -Wl,-T $^ -o $@
cmd_vdso32ld = $(CROSS32CC) $(c_flags) -o $@ -Wl,-T$(filter %.lds,$^) $(filter %.o,$^)
quiet_cmd_vdso32as = VDSO32A $@
cmd_vdso32as = $(CROSS32CC) $(a_flags) -c -o $@ $<

View File

@ -23,7 +23,7 @@ CPPFLAGS_vdso64.lds += -P -C -U$(ARCH)
$(obj)/vdso64_wrapper.o : $(obj)/vdso64.so
# link rule for the .so file, .lds has to be first
$(obj)/vdso64.so.dbg: $(src)/vdso64.lds $(obj-vdso64)
$(obj)/vdso64.so.dbg: $(src)/vdso64.lds $(obj-vdso64) FORCE
$(call if_changed,vdso64ld)
# strip rule for the .so file
@ -32,12 +32,12 @@ $(obj)/%.so: $(obj)/%.so.dbg FORCE
$(call if_changed,objcopy)
# assembly rules for the .S files
$(obj-vdso64): %.o: %.S
$(obj-vdso64): %.o: %.S FORCE
$(call if_changed_dep,vdso64as)
# actual build commands
quiet_cmd_vdso64ld = VDSO64L $@
cmd_vdso64ld = $(CC) $(c_flags) -Wl,-T $^ -o $@
cmd_vdso64ld = $(CC) $(c_flags) -o $@ -Wl,-T$(filter %.lds,$^) $(filter %.o,$^)
quiet_cmd_vdso64as = VDSO64A $@
cmd_vdso64as = $(CC) $(a_flags) -c -o $@ $<

View File

@ -1329,20 +1329,16 @@ static int kvmppc_xics_create(struct kvm_device *dev, u32 type)
xics->kvm = kvm;
/* Already there ? */
mutex_lock(&kvm->lock);
if (kvm->arch.xics)
ret = -EEXIST;
else
kvm->arch.xics = xics;
mutex_unlock(&kvm->lock);
if (ret) {
kfree(xics);
return ret;
}
xics_debugfs_init(xics);
#ifdef CONFIG_KVM_BOOK3S_HV_POSSIBLE
if (cpu_has_feature(CPU_FTR_ARCH_206)) {
/* Enable real mode support */
@ -1354,9 +1350,17 @@ static int kvmppc_xics_create(struct kvm_device *dev, u32 type)
return 0;
}
static void kvmppc_xics_init(struct kvm_device *dev)
{
struct kvmppc_xics *xics = (struct kvmppc_xics *)dev->private;
xics_debugfs_init(xics);
}
struct kvm_device_ops kvm_xics_ops = {
.name = "kvm-xics",
.create = kvmppc_xics_create,
.init = kvmppc_xics_init,
.destroy = kvmppc_xics_free,
.set_attr = xics_set_attr,
.get_attr = xics_get_attr,

View File

@ -127,8 +127,9 @@ _GLOBAL(csum_partial_copy_generic)
stw r7,12(r1)
stw r8,8(r1)
andi. r0,r4,1 /* is destination address even ? */
cmplwi cr7,r0,0
rlwinm r0,r4,3,0x8
rlwnm r6,r6,r0,0,31 /* odd destination address: rotate one byte */
cmplwi cr7,r0,0 /* is destination address even ? */
addic r12,r6,0
addi r6,r4,-4
neg r0,r4
@ -237,7 +238,7 @@ _GLOBAL(csum_partial_copy_generic)
66: addze r3,r12
addi r1,r1,16
beqlr+ cr7
rlwinm r3,r3,8,0,31 /* swap bytes for odd destination */
rlwinm r3,r3,8,0,31 /* odd destination address: rotate one byte */
blr
/* read fault */

View File

@ -188,7 +188,10 @@ void __init apply_feature_fixups(void)
&__start___fw_ftr_fixup, &__stop___fw_ftr_fixup);
#endif
do_final_fixups();
}
void __init setup_feature_keys(void)
{
/*
* Initialise jump label. This causes all the cpu/mmu_has_feature()
* checks to take on their correct polarity based on the current set of

View File

@ -496,8 +496,10 @@ spufs_mkgang(struct inode *dir, struct dentry *dentry, umode_t mode)
gang = alloc_spu_gang();
SPUFS_I(inode)->i_ctx = NULL;
SPUFS_I(inode)->i_gang = gang;
if (!gang)
if (!gang) {
ret = -ENOMEM;
goto out_iput;
}
inode->i_op = &simple_dir_inode_operations;
inode->i_fop = &simple_dir_operations;

View File

@ -187,6 +187,11 @@ static void pci_dma_dev_setup_pasemi(struct pci_dev *dev)
if (dev->vendor == 0x1959 && dev->device == 0xa007 &&
!firmware_has_feature(FW_FEATURE_LPAR)) {
dev->dev.archdata.dma_ops = &dma_direct_ops;
/*
* Set the coherent DMA mask to prevent the iommu
* being used unnecessarily
*/
dev->dev.coherent_dma_mask = DMA_BIT_MASK(44);
return;
}
#endif

View File

@ -228,7 +228,8 @@ int __init opal_event_init(void)
}
/* Install interrupt handler */
rc = request_irq(virq, opal_interrupt, 0, "opal", NULL);
rc = request_irq(virq, opal_interrupt, IRQF_TRIGGER_LOW,
"opal", NULL);
if (rc) {
irq_dispose_mapping(virq);
pr_warn("Error %d requesting irq %d (0x%x)\n",

View File

@ -399,6 +399,7 @@ static int opal_recover_mce(struct pt_regs *regs,
if (!(regs->msr & MSR_RI)) {
/* If MSR_RI isn't set, we cannot recover */
pr_err("Machine check interrupt unrecoverable: MSR(RI=0)\n");
recovered = 0;
} else if (evt->disposition == MCE_DISPOSITION_RECOVERED) {
/* Platform corrected itself */

View File

@ -111,10 +111,17 @@ static int __init iommu_setup(char *str)
}
early_param("iommu", iommu_setup);
static inline bool pnv_pci_is_mem_pref_64(unsigned long flags)
static inline bool pnv_pci_is_m64(struct pnv_phb *phb, struct resource *r)
{
return ((flags & (IORESOURCE_MEM_64 | IORESOURCE_PREFETCH)) ==
(IORESOURCE_MEM_64 | IORESOURCE_PREFETCH));
/*
* WARNING: We cannot rely on the resource flags. The Linux PCI
* allocation code sometimes decides to put a 64-bit prefetchable
* BAR in the 32-bit window, so we have to compare the addresses.
*
* For simplicity we only test resource start.
*/
return (r->start >= phb->ioda.m64_base &&
r->start < (phb->ioda.m64_base + phb->ioda.m64_size));
}
static struct pnv_ioda_pe *pnv_ioda_init_pe(struct pnv_phb *phb, int pe_no)
@ -229,7 +236,7 @@ static void pnv_ioda_reserve_dev_m64_pe(struct pci_dev *pdev,
sgsz = phb->ioda.m64_segsize;
for (i = 0; i <= PCI_ROM_RESOURCE; i++) {
r = &pdev->resource[i];
if (!r->parent || !pnv_pci_is_mem_pref_64(r->flags))
if (!r->parent || !pnv_pci_is_m64(phb, r))
continue;
start = _ALIGN_DOWN(r->start - base, sgsz);
@ -1877,7 +1884,7 @@ static void pnv_pci_phb3_tce_invalidate(struct pnv_ioda_pe *pe, bool rm,
unsigned shift, unsigned long index,
unsigned long npages)
{
__be64 __iomem *invalidate = pnv_ioda_get_inval_reg(pe->phb, false);
__be64 __iomem *invalidate = pnv_ioda_get_inval_reg(pe->phb, rm);
unsigned long start, end, inc;
/* We'll invalidate DMA address in PE scope */
@ -2863,7 +2870,7 @@ static void pnv_pci_ioda_fixup_iov_resources(struct pci_dev *pdev)
res = &pdev->resource[i + PCI_IOV_RESOURCES];
if (!res->flags || res->parent)
continue;
if (!pnv_pci_is_mem_pref_64(res->flags)) {
if (!pnv_pci_is_m64(phb, res)) {
dev_warn(&pdev->dev, "Don't support SR-IOV with"
" non M64 VF BAR%d: %pR. \n",
i, res);
@ -2958,7 +2965,7 @@ static void pnv_ioda_setup_pe_res(struct pnv_ioda_pe *pe,
index++;
}
} else if ((res->flags & IORESOURCE_MEM) &&
!pnv_pci_is_mem_pref_64(res->flags)) {
!pnv_pci_is_m64(phb, res)) {
region.start = res->start -
phb->hose->mem_offset[0] -
phb->ioda.m32_pci_base;
@ -3083,9 +3090,12 @@ static resource_size_t pnv_pci_window_alignment(struct pci_bus *bus,
bridge = bridge->bus->self;
}
/* We fail back to M32 if M64 isn't supported */
if (phb->ioda.m64_segsize &&
pnv_pci_is_mem_pref_64(type))
/*
* We fall back to M32 if M64 isn't supported. We enforce the M64
* alignment for any 64-bit resource, PCIe doesn't care and
* bridges only do 64-bit prefetchable anyway.
*/
if (phb->ioda.m64_segsize && (type & IORESOURCE_MEM_64))
return phb->ioda.m64_segsize;
if (type & IORESOURCE_MEM)
return phb->ioda.m32_segsize;
@ -3125,7 +3135,7 @@ static void pnv_pci_fixup_bridge_resources(struct pci_bus *bus,
w = NULL;
if (r->flags & type & IORESOURCE_IO)
w = &hose->io_resource;
else if (pnv_pci_is_mem_pref_64(r->flags) &&
else if (pnv_pci_is_m64(phb, r) &&
(type & IORESOURCE_PREFETCH) &&
phb->ioda.m64_segsize)
w = &hose->mem_resources[1];

View File

@ -320,19 +320,6 @@ static int dlpar_remove_device_tree_lmb(struct of_drconf_cell *lmb)
return dlpar_update_device_tree_lmb(lmb);
}
static struct memory_block *lmb_to_memblock(struct of_drconf_cell *lmb)
{
unsigned long section_nr;
struct mem_section *mem_sect;
struct memory_block *mem_block;
section_nr = pfn_to_section_nr(PFN_DOWN(lmb->base_addr));
mem_sect = __nr_to_section(section_nr);
mem_block = find_memory_block(mem_sect);
return mem_block;
}
#ifdef CONFIG_MEMORY_HOTREMOVE
static int pseries_remove_memblock(unsigned long base, unsigned int memblock_size)
{
@ -420,6 +407,19 @@ static bool lmb_is_removable(struct of_drconf_cell *lmb)
static int dlpar_add_lmb(struct of_drconf_cell *);
static struct memory_block *lmb_to_memblock(struct of_drconf_cell *lmb)
{
unsigned long section_nr;
struct mem_section *mem_sect;
struct memory_block *mem_block;
section_nr = pfn_to_section_nr(PFN_DOWN(lmb->base_addr));
mem_sect = __nr_to_section(section_nr);
mem_block = find_memory_block(mem_sect);
return mem_block;
}
static int dlpar_remove_lmb(struct of_drconf_cell *lmb)
{
struct memory_block *mem_block;

View File

@ -1,6 +1,7 @@
config PPC_XICS
def_bool n
select PPC_SMP_MUXED_IPI
select HARDIRQS_SW_RESEND
config PPC_ICP_NATIVE
def_bool n

View File

@ -156,7 +156,9 @@ static struct irq_chip ics_opal_irq_chip = {
.irq_mask = ics_opal_mask_irq,
.irq_unmask = ics_opal_unmask_irq,
.irq_eoi = NULL, /* Patched at init time */
.irq_set_affinity = ics_opal_set_affinity
.irq_set_affinity = ics_opal_set_affinity,
.irq_set_type = xics_set_irq_type,
.irq_retrigger = xics_retrigger,
};
static int ics_opal_map(struct ics *ics, unsigned int virq);

View File

@ -163,7 +163,9 @@ static struct irq_chip ics_rtas_irq_chip = {
.irq_mask = ics_rtas_mask_irq,
.irq_unmask = ics_rtas_unmask_irq,
.irq_eoi = NULL, /* Patched at init time */
.irq_set_affinity = ics_rtas_set_affinity
.irq_set_affinity = ics_rtas_set_affinity,
.irq_set_type = xics_set_irq_type,
.irq_retrigger = xics_retrigger,
};
static int ics_rtas_map(struct ics *ics, unsigned int virq)

View File

@ -328,8 +328,12 @@ static int xics_host_map(struct irq_domain *h, unsigned int virq,
pr_devel("xics: map virq %d, hwirq 0x%lx\n", virq, hw);
/* They aren't all level sensitive but we just don't really know */
irq_set_status_flags(virq, IRQ_LEVEL);
/*
* Mark interrupts as edge sensitive by default so that resend
* actually works. The device-tree parsing will turn the LSIs
* back to level.
*/
irq_clear_status_flags(virq, IRQ_LEVEL);
/* Don't call into ICS for IPIs */
if (hw == XICS_IPI) {
@ -351,13 +355,54 @@ static int xics_host_xlate(struct irq_domain *h, struct device_node *ct,
irq_hw_number_t *out_hwirq, unsigned int *out_flags)
{
/* Current xics implementation translates everything
* to level. It is not technically right for MSIs but this
* is irrelevant at this point. We might get smarter in the future
*/
*out_hwirq = intspec[0];
*out_flags = IRQ_TYPE_LEVEL_LOW;
/*
* If intsize is at least 2, we look for the type in the second cell,
* we assume the LSB indicates a level interrupt.
*/
if (intsize > 1) {
if (intspec[1] & 1)
*out_flags = IRQ_TYPE_LEVEL_LOW;
else
*out_flags = IRQ_TYPE_EDGE_RISING;
} else
*out_flags = IRQ_TYPE_LEVEL_LOW;
return 0;
}
int xics_set_irq_type(struct irq_data *d, unsigned int flow_type)
{
/*
* We only support these. This has really no effect other than setting
* the corresponding descriptor bits mind you but those will in turn
* affect the resend function when re-enabling an edge interrupt.
*
* Set set the default to edge as explained in map().
*/
if (flow_type == IRQ_TYPE_DEFAULT || flow_type == IRQ_TYPE_NONE)
flow_type = IRQ_TYPE_EDGE_RISING;
if (flow_type != IRQ_TYPE_EDGE_RISING &&
flow_type != IRQ_TYPE_LEVEL_LOW)
return -EINVAL;
irqd_set_trigger_type(d, flow_type);
return IRQ_SET_MASK_OK_NOCOPY;
}
int xics_retrigger(struct irq_data *data)
{
/*
* We need to push a dummy CPPR when retriggering, since the subsequent
* EOI will try to pop it. Passing 0 works, as the function hard codes
* the priority value anyway.
*/
xics_push_cppr(0);
/* Tell the core to do a soft retrigger */
return 0;
}

View File

@ -872,4 +872,17 @@ config S390_GUEST
Select this option if you want to run the kernel as a guest under
the KVM hypervisor.
config S390_GUEST_OLD_TRANSPORT
def_bool y
prompt "Guest support for old s390 virtio transport (DEPRECATED)"
depends on S390_GUEST
help
Enable this option to add support for the old s390-virtio
transport (i.e. virtio devices NOT based on virtio-ccw). This
type of virtio devices is only available on the experimental
kuli userspace or with old (< 2.6) qemu. If you are running
with a modern version of qemu (which supports virtio-ccw since
1.4 and uses it by default since version 2.4), you probably won't
need this.
endmenu

View File

@ -1672,6 +1672,7 @@ int kvm_arch_vcpu_init(struct kvm_vcpu *vcpu)
KVM_SYNC_CRS |
KVM_SYNC_ARCH0 |
KVM_SYNC_PFAULT;
kvm_s390_set_prefix(vcpu, 0);
if (test_kvm_facility(vcpu->kvm, 64))
vcpu->run->kvm_valid_regs |= KVM_SYNC_RICCB;
/* fprs can be synchronized via vrs, even if the guest has no vx. With
@ -2361,8 +2362,10 @@ retry:
rc = gmap_mprotect_notify(vcpu->arch.gmap,
kvm_s390_get_prefix(vcpu),
PAGE_SIZE * 2, PROT_WRITE);
if (rc)
if (rc) {
kvm_make_request(KVM_REQ_MMU_RELOAD, vcpu);
return rc;
}
goto retry;
}

View File

@ -98,7 +98,7 @@ static inline void arch_bprm_mm_init(struct mm_struct *mm,
}
static inline bool arch_vma_access_permitted(struct vm_area_struct *vma,
bool write, bool foreign)
bool write, bool execute, bool foreign)
{
/* by default, allow everything */
return true;

View File

@ -5,6 +5,8 @@
OBJECT_FILES_NON_STANDARD_entry_$(BITS).o := y
OBJECT_FILES_NON_STANDARD_entry_64_compat.o := y
CFLAGS_syscall_64.o += -Wno-override-init
CFLAGS_syscall_32.o += -Wno-override-init
obj-y := entry_$(BITS).o thunk_$(BITS).o syscall_$(BITS).o
obj-y += common.o

View File

@ -288,11 +288,15 @@ return_from_SYSCALL_64:
jne opportunistic_sysret_failed
/*
* SYSRET can't restore RF. SYSRET can restore TF, but unlike IRET,
* restoring TF results in a trap from userspace immediately after
* SYSRET. This would cause an infinite loop whenever #DB happens
* with register state that satisfies the opportunistic SYSRET
* conditions. For example, single-stepping this user code:
* SYSCALL clears RF when it saves RFLAGS in R11 and SYSRET cannot
* restore RF properly. If the slowpath sets it for whatever reason, we
* need to restore it correctly.
*
* SYSRET can restore TF, but unlike IRET, restoring TF results in a
* trap from userspace immediately after SYSRET. This would cause an
* infinite loop whenever #DB happens with register state that satisfies
* the opportunistic SYSRET conditions. For example, single-stepping
* this user code:
*
* movq $stuck_here, %rcx
* pushfq
@ -601,9 +605,20 @@ apicinterrupt3 \num trace(\sym) smp_trace(\sym)
.endm
#endif
/* Make sure APIC interrupt handlers end up in the irqentry section: */
#if defined(CONFIG_FUNCTION_GRAPH_TRACER) || defined(CONFIG_KASAN)
# define PUSH_SECTION_IRQENTRY .pushsection .irqentry.text, "ax"
# define POP_SECTION_IRQENTRY .popsection
#else
# define PUSH_SECTION_IRQENTRY
# define POP_SECTION_IRQENTRY
#endif
.macro apicinterrupt num sym do_sym
PUSH_SECTION_IRQENTRY
apicinterrupt3 \num \sym \do_sym
trace_apicinterrupt \num \sym
POP_SECTION_IRQENTRY
.endm
#ifdef CONFIG_SMP

View File

@ -100,6 +100,12 @@ static void snb_uncore_msr_init_box(struct intel_uncore_box *box)
}
}
static void snb_uncore_msr_enable_box(struct intel_uncore_box *box)
{
wrmsrl(SNB_UNC_PERF_GLOBAL_CTL,
SNB_UNC_GLOBAL_CTL_EN | SNB_UNC_GLOBAL_CTL_CORE_ALL);
}
static void snb_uncore_msr_exit_box(struct intel_uncore_box *box)
{
if (box->pmu->pmu_idx == 0)
@ -127,6 +133,7 @@ static struct attribute_group snb_uncore_format_group = {
static struct intel_uncore_ops snb_uncore_msr_ops = {
.init_box = snb_uncore_msr_init_box,
.enable_box = snb_uncore_msr_enable_box,
.exit_box = snb_uncore_msr_exit_box,
.disable_event = snb_uncore_msr_disable_event,
.enable_event = snb_uncore_msr_enable_event,
@ -192,6 +199,12 @@ static void skl_uncore_msr_init_box(struct intel_uncore_box *box)
}
}
static void skl_uncore_msr_enable_box(struct intel_uncore_box *box)
{
wrmsrl(SKL_UNC_PERF_GLOBAL_CTL,
SNB_UNC_GLOBAL_CTL_EN | SKL_UNC_GLOBAL_CTL_CORE_ALL);
}
static void skl_uncore_msr_exit_box(struct intel_uncore_box *box)
{
if (box->pmu->pmu_idx == 0)
@ -200,6 +213,7 @@ static void skl_uncore_msr_exit_box(struct intel_uncore_box *box)
static struct intel_uncore_ops skl_uncore_msr_ops = {
.init_box = skl_uncore_msr_init_box,
.enable_box = skl_uncore_msr_enable_box,
.exit_box = skl_uncore_msr_exit_box,
.disable_event = snb_uncore_msr_disable_event,
.enable_event = snb_uncore_msr_enable_event,

View File

@ -2626,7 +2626,7 @@ void hswep_uncore_cpu_init(void)
static struct intel_uncore_type hswep_uncore_ha = {
.name = "ha",
.num_counters = 5,
.num_counters = 4,
.num_boxes = 2,
.perf_ctr_bits = 48,
SNBEP_UNCORE_PCI_COMMON_INIT(),
@ -2645,7 +2645,7 @@ static struct uncore_event_desc hswep_uncore_imc_events[] = {
static struct intel_uncore_type hswep_uncore_imc = {
.name = "imc",
.num_counters = 5,
.num_counters = 4,
.num_boxes = 8,
.perf_ctr_bits = 48,
.fixed_ctr_bits = 48,
@ -2691,7 +2691,7 @@ static struct intel_uncore_type hswep_uncore_irp = {
static struct intel_uncore_type hswep_uncore_qpi = {
.name = "qpi",
.num_counters = 5,
.num_counters = 4,
.num_boxes = 3,
.perf_ctr_bits = 48,
.perf_ctr = SNBEP_PCI_PMON_CTR0,
@ -2773,7 +2773,7 @@ static struct event_constraint hswep_uncore_r3qpi_constraints[] = {
static struct intel_uncore_type hswep_uncore_r3qpi = {
.name = "r3qpi",
.num_counters = 4,
.num_counters = 3,
.num_boxes = 3,
.perf_ctr_bits = 44,
.constraints = hswep_uncore_r3qpi_constraints,
@ -2972,7 +2972,7 @@ static struct intel_uncore_type bdx_uncore_ha = {
static struct intel_uncore_type bdx_uncore_imc = {
.name = "imc",
.num_counters = 5,
.num_counters = 4,
.num_boxes = 8,
.perf_ctr_bits = 48,
.fixed_ctr_bits = 48,

View File

@ -135,6 +135,7 @@ extern void init_apic_mappings(void);
void register_lapic_address(unsigned long address);
extern void setup_boot_APIC_clock(void);
extern void setup_secondary_APIC_clock(void);
extern void lapic_update_tsc_freq(void);
extern int APIC_init_uniprocessor(void);
#ifdef CONFIG_X86_64
@ -170,6 +171,7 @@ static inline void init_apic_mappings(void) { }
static inline void disable_local_APIC(void) { }
# define setup_boot_APIC_clock x86_init_noop
# define setup_secondary_APIC_clock x86_init_noop
static inline void lapic_update_tsc_freq(void) { }
#endif /* !CONFIG_X86_LOCAL_APIC */
#ifdef CONFIG_X86_X2APIC

View File

@ -22,10 +22,6 @@ typedef struct {
#ifdef CONFIG_SMP
unsigned int irq_resched_count;
unsigned int irq_call_count;
/*
* irq_tlb_count is double-counted in irq_call_count, so it must be
* subtracted from irq_call_count when displaying irq_call_count
*/
unsigned int irq_tlb_count;
#endif
#ifdef CONFIG_X86_THERMAL_VECTOR

View File

@ -5,10 +5,10 @@ struct x86_mapping_info {
void *(*alloc_pgt_page)(void *); /* allocate buf for page table */
void *context; /* context for alloc_pgt_page */
unsigned long pmd_flag; /* page flag for PMD entry */
bool kernel_mapping; /* kernel mapping or ident mapping */
unsigned long offset; /* ident mapping offset */
};
int kernel_ident_mapping_init(struct x86_mapping_info *info, pgd_t *pgd_page,
unsigned long addr, unsigned long end);
unsigned long pstart, unsigned long pend);
#endif /* _ASM_X86_INIT_H */

View File

@ -145,7 +145,7 @@ static inline int pgd_large(pgd_t pgd) { return 0; }
*
* | ... | 11| 10| 9|8|7|6|5| 4| 3|2|1|0| <- bit number
* | ... |SW3|SW2|SW1|G|L|D|A|CD|WT|U|W|P| <- bit names
* | OFFSET (14->63) | TYPE (10-13) |0|X|X|X| X| X|X|X|0| <- swp entry
* | OFFSET (14->63) | TYPE (9-13) |0|X|X|X| X| X|X|X|0| <- swp entry
*
* G (8) is aliased and used as a PROT_NONE indicator for
* !present ptes. We need to start storing swap entries above
@ -156,7 +156,7 @@ static inline int pgd_large(pgd_t pgd) { return 0; }
#define SWP_TYPE_FIRST_BIT (_PAGE_BIT_PROTNONE + 1)
#define SWP_TYPE_BITS 5
/* Place the offset above the type: */
#define SWP_OFFSET_FIRST_BIT (SWP_TYPE_FIRST_BIT + SWP_TYPE_BITS + 1)
#define SWP_OFFSET_FIRST_BIT (SWP_TYPE_FIRST_BIT + SWP_TYPE_BITS)
#define MAX_SWAPFILES_CHECK() BUILD_BUG_ON(MAX_SWAPFILES_SHIFT > SWP_TYPE_BITS)

View File

@ -58,7 +58,15 @@ extern unsigned char boot_gdt[];
extern unsigned char secondary_startup_64[];
#endif
static inline size_t real_mode_size_needed(void)
{
if (real_mode_header)
return 0; /* already allocated. */
return ALIGN(real_mode_blob_end - real_mode_blob, PAGE_SIZE);
}
void set_real_mode_mem(phys_addr_t mem, size_t size);
void reserve_real_mode(void);
void setup_real_mode(void);
#endif /* _ARCH_X86_REALMODE_H */

View File

@ -135,7 +135,14 @@ static inline void cr4_set_bits_and_update_boot(unsigned long mask)
static inline void __native_flush_tlb(void)
{
/*
* If current->mm == NULL then we borrow a mm which may change during a
* task switch and therefore we must not be preempted while we write CR3
* back:
*/
preempt_disable();
native_write_cr3(native_read_cr3());
preempt_enable();
}
static inline void __native_flush_tlb_global_irq_disabled(void)

View File

@ -79,7 +79,7 @@ struct uv_gam_range_entry {
u16 nasid; /* HNasid */
u16 sockid; /* Socket ID, high bits of APIC ID */
u16 pnode; /* Index to MMR and GRU spaces */
u32 pxm; /* ACPI proximity domain number */
u32 unused2;
u32 limit; /* PA bits 56:26 (UV_GAM_RANGE_SHFT) */
};
@ -88,7 +88,8 @@ struct uv_gam_range_entry {
#define UV_SYSTAB_VERSION_UV4 0x400 /* UV4 BIOS base version */
#define UV_SYSTAB_VERSION_UV4_1 0x401 /* + gpa_shift */
#define UV_SYSTAB_VERSION_UV4_2 0x402 /* + TYPE_NVRAM/WINDOW/MBOX */
#define UV_SYSTAB_VERSION_UV4_LATEST UV_SYSTAB_VERSION_UV4_2
#define UV_SYSTAB_VERSION_UV4_3 0x403 /* - GAM Range PXM Value */
#define UV_SYSTAB_VERSION_UV4_LATEST UV_SYSTAB_VERSION_UV4_3
#define UV_SYSTAB_TYPE_UNUSED 0 /* End of table (offset == 0) */
#define UV_SYSTAB_TYPE_GAM_PARAMS 1 /* GAM PARAM conversions */

View File

@ -313,7 +313,7 @@ int lapic_get_maxlvt(void)
/* Clock divisor */
#define APIC_DIVISOR 16
#define TSC_DIVISOR 32
#define TSC_DIVISOR 8
/*
* This function sets up the local APIC timer, with a timeout of
@ -565,12 +565,36 @@ static void setup_APIC_timer(void)
CLOCK_EVT_FEAT_DUMMY);
levt->set_next_event = lapic_next_deadline;
clockevents_config_and_register(levt,
(tsc_khz / TSC_DIVISOR) * 1000,
tsc_khz * (1000 / TSC_DIVISOR),
0xF, ~0UL);
} else
clockevents_register_device(levt);
}
/*
* Install the updated TSC frequency from recalibration at the TSC
* deadline clockevent devices.
*/
static void __lapic_update_tsc_freq(void *info)
{
struct clock_event_device *levt = this_cpu_ptr(&lapic_events);
if (!this_cpu_has(X86_FEATURE_TSC_DEADLINE_TIMER))
return;
clockevents_update_freq(levt, tsc_khz * (1000 / TSC_DIVISOR));
}
void lapic_update_tsc_freq(void)
{
/*
* The clockevent device's ->mult and ->shift can both be
* changed. In order to avoid races, schedule the frequency
* update code on each CPU.
*/
on_each_cpu(__lapic_update_tsc_freq, NULL, 0);
}
/*
* In this functions we calibrate APIC bus clocks to the external timer.
*

View File

@ -155,7 +155,7 @@ static void init_x2apic_ldr(void)
/*
* At CPU state changes, update the x2apic cluster sibling info.
*/
int x2apic_prepare_cpu(unsigned int cpu)
static int x2apic_prepare_cpu(unsigned int cpu)
{
if (!zalloc_cpumask_var(&per_cpu(cpus_in_cluster, cpu), GFP_KERNEL))
return -ENOMEM;
@ -168,7 +168,7 @@ int x2apic_prepare_cpu(unsigned int cpu)
return 0;
}
int x2apic_dead_cpu(unsigned int this_cpu)
static int x2apic_dead_cpu(unsigned int this_cpu)
{
int cpu;
@ -186,13 +186,18 @@ int x2apic_dead_cpu(unsigned int this_cpu)
static int x2apic_cluster_probe(void)
{
int cpu = smp_processor_id();
int ret;
if (!x2apic_mode)
return 0;
ret = cpuhp_setup_state(CPUHP_X2APIC_PREPARE, "X2APIC_PREPARE",
x2apic_prepare_cpu, x2apic_dead_cpu);
if (ret < 0) {
pr_err("Failed to register X2APIC_PREPARE\n");
return 0;
}
cpumask_set_cpu(cpu, per_cpu(cpus_in_cluster, cpu));
cpuhp_setup_state(CPUHP_X2APIC_PREPARE, "X2APIC_PREPARE",
x2apic_prepare_cpu, x2apic_dead_cpu);
return 1;
}

View File

@ -223,6 +223,11 @@ static int __init uv_acpi_madt_oem_check(char *oem_id, char *oem_table_id)
if (strncmp(oem_id, "SGI", 3) != 0)
return 0;
if (numa_off) {
pr_err("UV: NUMA is off, disabling UV support\n");
return 0;
}
/* Setup early hub type field in uv_hub_info for Node 0 */
uv_cpu_info->p_uv_hub_info = &uv_hub_info_node0;
@ -325,7 +330,7 @@ static __init void build_uv_gr_table(void)
struct uv_gam_range_entry *gre = uv_gre_table;
struct uv_gam_range_s *grt;
unsigned long last_limit = 0, ram_limit = 0;
int bytes, i, sid, lsid = -1;
int bytes, i, sid, lsid = -1, indx = 0, lindx = -1;
if (!gre)
return;
@ -356,11 +361,12 @@ static __init void build_uv_gr_table(void)
}
sid = gre->sockid - _min_socket;
if (lsid < sid) { /* new range */
grt = &_gr_table[sid];
grt->base = lsid;
grt = &_gr_table[indx];
grt->base = lindx;
grt->nasid = gre->nasid;
grt->limit = last_limit = gre->limit;
lsid = sid;
lindx = indx++;
continue;
}
if (lsid == sid && !ram_limit) { /* update range */
@ -371,7 +377,7 @@ static __init void build_uv_gr_table(void)
}
if (!ram_limit) { /* non-contiguous ram range */
grt++;
grt->base = sid - 1;
grt->base = lindx;
grt->nasid = gre->nasid;
grt->limit = last_limit = gre->limit;
continue;
@ -1155,19 +1161,18 @@ static void __init decode_gam_rng_tbl(unsigned long ptr)
for (; gre->type != UV_GAM_RANGE_TYPE_UNUSED; gre++) {
if (!index) {
pr_info("UV: GAM Range Table...\n");
pr_info("UV: # %20s %14s %5s %4s %5s %3s %2s %3s\n",
pr_info("UV: # %20s %14s %5s %4s %5s %3s %2s\n",
"Range", "", "Size", "Type", "NASID",
"SID", "PN", "PXM");
"SID", "PN");
}
pr_info(
"UV: %2d: 0x%014lx-0x%014lx %5luG %3d %04x %02x %02x %3d\n",
"UV: %2d: 0x%014lx-0x%014lx %5luG %3d %04x %02x %02x\n",
index++,
(unsigned long)lgre << UV_GAM_RANGE_SHFT,
(unsigned long)gre->limit << UV_GAM_RANGE_SHFT,
((unsigned long)(gre->limit - lgre)) >>
(30 - UV_GAM_RANGE_SHFT), /* 64M -> 1G */
gre->type, gre->nasid, gre->sockid,
gre->pnode, gre->pxm);
gre->type, gre->nasid, gre->sockid, gre->pnode);
lgre = gre->limit;
if (sock_min > gre->sockid)
@ -1286,7 +1291,7 @@ static void __init build_socket_tables(void)
_pnode_to_socket[i] = SOCK_EMPTY;
/* fill in pnode/node/addr conversion list values */
pr_info("UV: GAM Building socket/pnode/pxm conversion tables\n");
pr_info("UV: GAM Building socket/pnode conversion tables\n");
for (; gre->type != UV_GAM_RANGE_TYPE_UNUSED; gre++) {
if (gre->type == UV_GAM_RANGE_TYPE_HOLE)
continue;
@ -1294,20 +1299,18 @@ static void __init build_socket_tables(void)
if (_socket_to_pnode[i] != SOCK_EMPTY)
continue; /* duplicate */
_socket_to_pnode[i] = gre->pnode;
_socket_to_node[i] = gre->pxm;
i = gre->pnode - minpnode;
_pnode_to_socket[i] = gre->sockid;
pr_info(
"UV: sid:%02x type:%d nasid:%04x pn:%02x pxm:%2d pn2s:%2x\n",
"UV: sid:%02x type:%d nasid:%04x pn:%02x pn2s:%2x\n",
gre->sockid, gre->type, gre->nasid,
_socket_to_pnode[gre->sockid - minsock],
_socket_to_node[gre->sockid - minsock],
_pnode_to_socket[gre->pnode - minpnode]);
}
/* check socket -> node values */
/* Set socket -> node values */
lnid = -1;
for_each_present_cpu(cpu) {
int nid = cpu_to_node(cpu);
@ -1318,14 +1321,9 @@ static void __init build_socket_tables(void)
lnid = nid;
apicid = per_cpu(x86_cpu_to_apicid, cpu);
sockid = apicid >> uv_cpuid.socketid_shift;
i = sockid - minsock;
if (nid != _socket_to_node[i]) {
pr_warn(
"UV: %02x: type:%d socket:%02x PXM:%02x != node:%2d\n",
i, sockid, gre->type, _socket_to_node[i], nid);
_socket_to_node[i] = nid;
}
_socket_to_node[sockid - minsock] = nid;
pr_info("UV: sid:%02x: apicid:%04x node:%2d\n",
sockid, apicid, nid);
}
/* Setup physical blade to pnode translation from GAM Range Table */

View File

@ -866,105 +866,17 @@ const void *get_xsave_field_ptr(int xsave_state)
return get_xsave_addr(&fpu->state.xsave, xsave_state);
}
/*
* Set xfeatures (aka XSTATE_BV) bit for a feature that we want
* to take out of its "init state". This will ensure that an
* XRSTOR actually restores the state.
*/
static void fpu__xfeature_set_non_init(struct xregs_state *xsave,
int xstate_feature_mask)
{
xsave->header.xfeatures |= xstate_feature_mask;
}
/*
* This function is safe to call whether the FPU is in use or not.
*
* Note that this only works on the current task.
*
* Inputs:
* @xsave_state: state which is defined in xsave.h (e.g. XFEATURE_MASK_FP,
* XFEATURE_MASK_SSE, etc...)
* @xsave_state_ptr: a pointer to a copy of the state that you would
* like written in to the current task's FPU xsave state. This pointer
* must not be located in the current tasks's xsave area.
* Output:
* address of the state in the xsave area or NULL if the state
* is not present or is in its 'init state'.
*/
static void fpu__xfeature_set_state(int xstate_feature_mask,
void *xstate_feature_src, size_t len)
{
struct xregs_state *xsave = &current->thread.fpu.state.xsave;
struct fpu *fpu = &current->thread.fpu;
void *dst;
if (!boot_cpu_has(X86_FEATURE_XSAVE)) {
WARN_ONCE(1, "%s() attempted with no xsave support", __func__);
return;
}
/*
* Tell the FPU code that we need the FPU state to be in
* 'fpu' (not in the registers), and that we need it to
* be stable while we write to it.
*/
fpu__current_fpstate_write_begin();
/*
* This method *WILL* *NOT* work for compact-format
* buffers. If the 'xstate_feature_mask' is unset in
* xcomp_bv then we may need to move other feature state
* "up" in the buffer.
*/
if (xsave->header.xcomp_bv & xstate_feature_mask) {
WARN_ON_ONCE(1);
goto out;
}
/* find the location in the xsave buffer of the desired state */
dst = __raw_xsave_addr(&fpu->state.xsave, xstate_feature_mask);
/*
* Make sure that the pointer being passed in did not
* come from the xsave buffer itself.
*/
WARN_ONCE(xstate_feature_src == dst, "set from xsave buffer itself");
/* put the caller-provided data in the location */
memcpy(dst, xstate_feature_src, len);
/*
* Mark the xfeature so that the CPU knows there is state
* in the buffer now.
*/
fpu__xfeature_set_non_init(xsave, xstate_feature_mask);
out:
/*
* We are done writing to the 'fpu'. Reenable preeption
* and (possibly) move the fpstate back in to the fpregs.
*/
fpu__current_fpstate_write_end();
}
#define NR_VALID_PKRU_BITS (CONFIG_NR_PROTECTION_KEYS * 2)
#define PKRU_VALID_MASK (NR_VALID_PKRU_BITS - 1)
/*
* This will go out and modify the XSAVE buffer so that PKRU is
* set to a particular state for access to 'pkey'.
*
* PKRU state does affect kernel access to user memory. We do
* not modfiy PKRU *itself* here, only the XSAVE state that will
* be restored in to PKRU when we return back to userspace.
* This will go out and modify PKRU register to set the access
* rights for @pkey to @init_val.
*/
int arch_set_user_pkey_access(struct task_struct *tsk, int pkey,
unsigned long init_val)
{
struct xregs_state *xsave = &tsk->thread.fpu.state.xsave;
struct pkru_state *old_pkru_state;
struct pkru_state new_pkru_state;
u32 old_pkru;
int pkey_shift = (pkey * PKRU_BITS_PER_PKEY);
u32 new_pkru_bits = 0;
@ -974,6 +886,15 @@ int arch_set_user_pkey_access(struct task_struct *tsk, int pkey,
*/
if (!boot_cpu_has(X86_FEATURE_OSPKE))
return -EINVAL;
/*
* For most XSAVE components, this would be an arduous task:
* brining fpstate up to date with fpregs, updating fpstate,
* then re-populating fpregs. But, for components that are
* never lazily managed, we can just access the fpregs
* directly. PKRU is never managed lazily, so we can just
* manipulate it directly. Make sure it stays that way.
*/
WARN_ON_ONCE(!use_eager_fpu());
/* Set the bits we need in PKRU: */
if (init_val & PKEY_DISABLE_ACCESS)
@ -984,37 +905,12 @@ int arch_set_user_pkey_access(struct task_struct *tsk, int pkey,
/* Shift the bits in to the correct place in PKRU for pkey: */
new_pkru_bits <<= pkey_shift;
/* Locate old copy of the state in the xsave buffer: */
old_pkru_state = get_xsave_addr(xsave, XFEATURE_MASK_PKRU);
/* Get old PKRU and mask off any old bits in place: */
old_pkru = read_pkru();
old_pkru &= ~((PKRU_AD_BIT|PKRU_WD_BIT) << pkey_shift);
/*
* When state is not in the buffer, it is in the init
* state, set it manually. Otherwise, copy out the old
* state.
*/
if (!old_pkru_state)
new_pkru_state.pkru = 0;
else
new_pkru_state.pkru = old_pkru_state->pkru;
/* Mask off any old bits in place: */
new_pkru_state.pkru &= ~((PKRU_AD_BIT|PKRU_WD_BIT) << pkey_shift);
/* Set the newly-requested bits: */
new_pkru_state.pkru |= new_pkru_bits;
/*
* We could theoretically live without zeroing pkru.pad.
* The current XSAVE feature state definition says that
* only bytes 0->3 are used. But we do not want to
* chance leaking kernel stack out to userspace in case a
* memcpy() of the whole xsave buffer was done.
*
* They're in the same cacheline anyway.
*/
new_pkru_state.pad = 0;
fpu__xfeature_set_state(XFEATURE_MASK_PKRU, &new_pkru_state, sizeof(new_pkru_state));
/* Write old part along with new part: */
write_pkru(old_pkru | new_pkru_bits);
return 0;
}

View File

@ -25,8 +25,6 @@ static void __init i386_default_early_setup(void)
/* Initialize 32bit specific setup functions */
x86_init.resources.reserve_resources = i386_reserve_resources;
x86_init.mpparse.setup_ioapic_ids = setup_ioapic_ids_from_mpc;
reserve_bios_regions();
}
asmlinkage __visible void __init i386_start_kernel(void)

View File

@ -183,7 +183,6 @@ void __init x86_64_start_reservations(char *real_mode_data)
copy_bootdata(__va(real_mode_data));
x86_early_init_platform_quirks();
reserve_bios_regions();
switch (boot_params.hdr.hardware_subarch) {
case X86_SUBARCH_INTEL_MID:

View File

@ -1242,7 +1242,7 @@ irqreturn_t hpet_rtc_interrupt(int irq, void *dev_id)
memset(&curr_time, 0, sizeof(struct rtc_time));
if (hpet_rtc_flags & (RTC_UIE | RTC_AIE))
mc146818_set_time(&curr_time);
mc146818_get_time(&curr_time);
if (hpet_rtc_flags & RTC_UIE &&
curr_time.tm_sec != hpet_prev_update_sec) {

View File

@ -102,8 +102,7 @@ int arch_show_interrupts(struct seq_file *p, int prec)
seq_puts(p, " Rescheduling interrupts\n");
seq_printf(p, "%*s: ", prec, "CAL");
for_each_online_cpu(j)
seq_printf(p, "%10u ", irq_stats(j)->irq_call_count -
irq_stats(j)->irq_tlb_count);
seq_printf(p, "%10u ", irq_stats(j)->irq_call_count);
seq_puts(p, " Function call interrupts\n");
seq_printf(p, "%*s: ", prec, "TLB");
for_each_online_cpu(j)

View File

@ -936,8 +936,6 @@ void __init setup_arch(char **cmdline_p)
x86_init.oem.arch_setup();
kernel_randomize_memory();
iomem_resource.end = (1ULL << boot_cpu_data.x86_phys_bits) - 1;
setup_memory_map();
parse_setup_data();
@ -1055,6 +1053,12 @@ void __init setup_arch(char **cmdline_p)
max_possible_pfn = max_pfn;
/*
* Define random base addresses for memory sections after max_pfn is
* defined and before each memory section base is used.
*/
kernel_randomize_memory();
#ifdef CONFIG_X86_32
/* max_low_pfn get updated here */
find_low_pfn_range();
@ -1097,6 +1101,8 @@ void __init setup_arch(char **cmdline_p)
efi_find_mirror();
}
reserve_bios_regions();
/*
* The EFI specification says that boot service code won't be called
* after ExitBootServices(). This is, in fact, a lie.
@ -1125,7 +1131,15 @@ void __init setup_arch(char **cmdline_p)
early_trap_pf_init();
setup_real_mode();
/*
* Update mmu_cr4_features (and, indirectly, trampoline_cr4_features)
* with the current CR4 value. This may not be necessary, but
* auditing all the early-boot CR4 manipulation would be needed to
* rule it out.
*/
if (boot_cpu_data.cpuid_level >= 0)
/* A CPU has %cr4 if and only if it has CPUID. */
mmu_cr4_features = __read_cr4();
memblock_set_current_limit(get_max_mapped());
@ -1174,13 +1188,6 @@ void __init setup_arch(char **cmdline_p)
kasan_init();
if (boot_cpu_data.cpuid_level >= 0) {
/* A CPU has %cr4 if and only if it has CPUID */
mmu_cr4_features = __read_cr4();
if (trampoline_cr4_features)
*trampoline_cr4_features = mmu_cr4_features;
}
#ifdef CONFIG_X86_32
/* sync back kernel address range */
clone_pgd_range(initial_page_table + KERNEL_PGD_BOUNDARY,

View File

@ -22,6 +22,7 @@
#include <asm/nmi.h>
#include <asm/x86_init.h>
#include <asm/geode.h>
#include <asm/apic.h>
unsigned int __read_mostly cpu_khz; /* TSC clocks / usec, not used here */
EXPORT_SYMBOL(cpu_khz);
@ -1249,6 +1250,9 @@ static void tsc_refine_calibration_work(struct work_struct *work)
(unsigned long)tsc_khz / 1000,
(unsigned long)tsc_khz % 1000);
/* Inform the TSC deadline clockevent devices about the recalibration */
lapic_update_tsc_freq();
out:
if (boot_cpu_has(X86_FEATURE_ART))
art_related_clocksource = &clocksource_tsc;

View File

@ -357,20 +357,22 @@ static void riprel_analyze(struct arch_uprobe *auprobe, struct insn *insn)
*cursor &= 0xfe;
}
/*
* Similar treatment for VEX3 prefix.
* TODO: add XOP/EVEX treatment when insn decoder supports them
* Similar treatment for VEX3/EVEX prefix.
* TODO: add XOP treatment when insn decoder supports them
*/
if (insn->vex_prefix.nbytes == 3) {
if (insn->vex_prefix.nbytes >= 3) {
/*
* vex2: c5 rvvvvLpp (has no b bit)
* vex3/xop: c4/8f rxbmmmmm wvvvvLpp
* evex: 62 rxbR00mm wvvvv1pp zllBVaaa
* (evex will need setting of both b and x since
* in non-sib encoding evex.x is 4th bit of MODRM.rm)
* Setting VEX3.b (setting because it has inverted meaning):
* Setting VEX3.b (setting because it has inverted meaning).
* Setting EVEX.x since (in non-SIB encoding) EVEX.x
* is the 4th bit of MODRM.rm, and needs the same treatment.
* For VEX3-encoded insns, VEX3.x value has no effect in
* non-SIB encoding, the change is superfluous but harmless.
*/
cursor = auprobe->insn + insn_offset_vex_prefix(insn) + 1;
*cursor |= 0x20;
*cursor |= 0x60;
}
/*
@ -415,12 +417,10 @@ static void riprel_analyze(struct arch_uprobe *auprobe, struct insn *insn)
reg = MODRM_REG(insn); /* Fetch modrm.reg */
reg2 = 0xff; /* Fetch vex.vvvv */
if (insn->vex_prefix.nbytes == 2)
reg2 = insn->vex_prefix.bytes[1];
else if (insn->vex_prefix.nbytes == 3)
if (insn->vex_prefix.nbytes)
reg2 = insn->vex_prefix.bytes[2];
/*
* TODO: add XOP, EXEV vvvv reading.
* TODO: add XOP vvvv reading.
*
* vex.vvvv field is in bits 6-3, bits are inverted.
* But in 32-bit mode, high-order bit may be ignored.

View File

@ -19,7 +19,7 @@
#include <asm/cpufeature.h>
#include <asm/setup.h>
#define debug_putstr(v) early_printk(v)
#define debug_putstr(v) early_printk("%s", v)
#define has_cpuflag(f) boot_cpu_has(f)
#define get_boot_seed() kaslr_offset()
#endif

View File

@ -3,15 +3,17 @@
* included by both the compressed kernel and the regular kernel.
*/
static void ident_pmd_init(unsigned long pmd_flag, pmd_t *pmd_page,
static void ident_pmd_init(struct x86_mapping_info *info, pmd_t *pmd_page,
unsigned long addr, unsigned long end)
{
addr &= PMD_MASK;
for (; addr < end; addr += PMD_SIZE) {
pmd_t *pmd = pmd_page + pmd_index(addr);
if (!pmd_present(*pmd))
set_pmd(pmd, __pmd(addr | pmd_flag));
if (pmd_present(*pmd))
continue;
set_pmd(pmd, __pmd((addr - info->offset) | info->pmd_flag));
}
}
@ -30,13 +32,13 @@ static int ident_pud_init(struct x86_mapping_info *info, pud_t *pud_page,
if (pud_present(*pud)) {
pmd = pmd_offset(pud, 0);
ident_pmd_init(info->pmd_flag, pmd, addr, next);
ident_pmd_init(info, pmd, addr, next);
continue;
}
pmd = (pmd_t *)info->alloc_pgt_page(info->context);
if (!pmd)
return -ENOMEM;
ident_pmd_init(info->pmd_flag, pmd, addr, next);
ident_pmd_init(info, pmd, addr, next);
set_pud(pud, __pud(__pa(pmd) | _KERNPG_TABLE));
}
@ -44,14 +46,15 @@ static int ident_pud_init(struct x86_mapping_info *info, pud_t *pud_page,
}
int kernel_ident_mapping_init(struct x86_mapping_info *info, pgd_t *pgd_page,
unsigned long addr, unsigned long end)
unsigned long pstart, unsigned long pend)
{
unsigned long addr = pstart + info->offset;
unsigned long end = pend + info->offset;
unsigned long next;
int result;
int off = info->kernel_mapping ? pgd_index(__PAGE_OFFSET) : 0;
for (; addr < end; addr = next) {
pgd_t *pgd = pgd_page + pgd_index(addr) + off;
pgd_t *pgd = pgd_page + pgd_index(addr);
pud_t *pud;
next = (addr & PGDIR_MASK) + PGDIR_SIZE;

View File

@ -122,8 +122,18 @@ __ref void *alloc_low_pages(unsigned int num)
return __va(pfn << PAGE_SHIFT);
}
/* need 3 4k for initial PMD_SIZE, 3 4k for 0-ISA_END_ADDRESS */
#define INIT_PGT_BUF_SIZE (6 * PAGE_SIZE)
/*
* By default need 3 4k for initial PMD_SIZE, 3 4k for 0-ISA_END_ADDRESS.
* With KASLR memory randomization, depending on the machine e820 memory
* and the PUD alignment. We may need twice more pages when KASLR memory
* randomization is enabled.
*/
#ifndef CONFIG_RANDOMIZE_MEMORY
#define INIT_PGD_PAGE_COUNT 6
#else
#define INIT_PGD_PAGE_COUNT 12
#endif
#define INIT_PGT_BUF_SIZE (INIT_PGD_PAGE_COUNT * PAGE_SIZE)
RESERVE_BRK(early_pgt_alloc, INIT_PGT_BUF_SIZE);
void __init early_alloc_pgt_buf(void)
{

View File

@ -97,7 +97,7 @@ void __init kernel_randomize_memory(void)
* add padding if needed (especially for memory hotplug support).
*/
BUG_ON(kaslr_regions[0].base != &page_offset_base);
memory_tb = ((max_pfn << PAGE_SHIFT) >> TB_SHIFT) +
memory_tb = DIV_ROUND_UP(max_pfn << PAGE_SHIFT, 1UL << TB_SHIFT) +
CONFIG_RANDOMIZE_MEMORY_PHYSICAL_PADDING;
/* Adapt phyiscal memory region size based on available memory */

View File

@ -254,6 +254,7 @@ void __init efi_free_boot_services(void)
for_each_efi_memory_desc(md) {
unsigned long long start = md->phys_addr;
unsigned long long size = md->num_pages << EFI_PAGE_SHIFT;
size_t rm_size;
if (md->type != EFI_BOOT_SERVICES_CODE &&
md->type != EFI_BOOT_SERVICES_DATA)
@ -263,6 +264,26 @@ void __init efi_free_boot_services(void)
if (md->attribute & EFI_MEMORY_RUNTIME)
continue;
/*
* Nasty quirk: if all sub-1MB memory is used for boot
* services, we can get here without having allocated the
* real mode trampoline. It's too late to hand boot services
* memory back to the memblock allocator, so instead
* try to manually allocate the trampoline if needed.
*
* I've seen this on a Dell XPS 13 9350 with firmware
* 1.4.4 with SGX enabled booting Linux via Fedora 24's
* grub2-efi on a hard disk. (And no, I don't know why
* this happened, but Linux should still try to boot rather
* panicing early.)
*/
rm_size = real_mode_size_needed();
if (rm_size && (start + rm_size) < (1<<20) && size >= rm_size) {
set_real_mode_mem(start, rm_size);
start += rm_size;
size -= rm_size;
}
free_bootmem_late(start, size);
}

View File

@ -187,7 +187,8 @@ EXPORT_SYMBOL_GPL(uv_bios_set_legacy_vga_target);
void uv_bios_init(void)
{
uv_systab = NULL;
if ((efi.uv_systab == EFI_INVALID_TABLE_ADDR) || !efi.uv_systab) {
if ((efi.uv_systab == EFI_INVALID_TABLE_ADDR) ||
!efi.uv_systab || efi_runtime_disabled()) {
pr_crit("UV: UVsystab: missing\n");
return;
}
@ -199,12 +200,14 @@ void uv_bios_init(void)
return;
}
/* Starting with UV4 the UV systab size is variable */
if (uv_systab->revision >= UV_SYSTAB_VERSION_UV4) {
int size = uv_systab->size;
iounmap(uv_systab);
uv_systab = ioremap(efi.uv_systab, uv_systab->size);
uv_systab = ioremap(efi.uv_systab, size);
if (!uv_systab) {
pr_err("UV: UVsystab: ioremap(%d) failed!\n",
uv_systab->size);
pr_err("UV: UVsystab: ioremap(%d) failed!\n", size);
return;
}
}

View File

@ -87,7 +87,7 @@ static int set_up_temporary_mappings(void)
struct x86_mapping_info info = {
.alloc_pgt_page = alloc_pgt_page,
.pmd_flag = __PAGE_KERNEL_LARGE_EXEC,
.kernel_mapping = true,
.offset = __PAGE_OFFSET,
};
unsigned long mstart, mend;
pgd_t *pgd;

View File

@ -1,9 +1,11 @@
#include <linux/io.h>
#include <linux/slab.h>
#include <linux/memblock.h>
#include <asm/cacheflush.h>
#include <asm/pgtable.h>
#include <asm/realmode.h>
#include <asm/tlbflush.h>
struct real_mode_header *real_mode_header;
u32 *trampoline_cr4_features;
@ -11,25 +13,37 @@ u32 *trampoline_cr4_features;
/* Hold the pgd entry used on booting additional CPUs */
pgd_t trampoline_pgd_entry;
void __init reserve_real_mode(void)
void __init set_real_mode_mem(phys_addr_t mem, size_t size)
{
phys_addr_t mem;
unsigned char *base;
size_t size = PAGE_ALIGN(real_mode_blob_end - real_mode_blob);
void *base = __va(mem);
/* Has to be under 1M so we can execute real-mode AP code. */
mem = memblock_find_in_range(0, 1<<20, size, PAGE_SIZE);
if (!mem)
panic("Cannot allocate trampoline\n");
base = __va(mem);
memblock_reserve(mem, size);
real_mode_header = (struct real_mode_header *) base;
printk(KERN_DEBUG "Base memory trampoline at [%p] %llx size %zu\n",
base, (unsigned long long)mem, size);
}
void __init setup_real_mode(void)
void __init reserve_real_mode(void)
{
phys_addr_t mem;
size_t size = real_mode_size_needed();
if (!size)
return;
WARN_ON(slab_is_available());
/* Has to be under 1M so we can execute real-mode AP code. */
mem = memblock_find_in_range(0, 1<<20, size, PAGE_SIZE);
if (!mem) {
pr_info("No sub-1M memory is available for the trampoline\n");
return;
}
memblock_reserve(mem, size);
set_real_mode_mem(mem, size);
}
static void __init setup_real_mode(void)
{
u16 real_mode_seg;
const u32 *rel;
@ -84,7 +98,7 @@ void __init setup_real_mode(void)
trampoline_header->start = (u64) secondary_startup_64;
trampoline_cr4_features = &trampoline_header->cr4;
*trampoline_cr4_features = __read_cr4();
*trampoline_cr4_features = mmu_cr4_features;
trampoline_pgd = (u64 *) __va(real_mode_header->trampoline_pgd);
trampoline_pgd[0] = trampoline_pgd_entry.pgd;
@ -100,7 +114,7 @@ void __init setup_real_mode(void)
* need to mark it executable at do_pre_smp_initcalls() at least,
* thus run it as a early_initcall().
*/
static int __init set_real_mode_permissions(void)
static void __init set_real_mode_permissions(void)
{
unsigned char *base = (unsigned char *) real_mode_header;
size_t size = PAGE_ALIGN(real_mode_blob_end - real_mode_blob);
@ -119,7 +133,16 @@ static int __init set_real_mode_permissions(void)
set_memory_nx((unsigned long) base, size >> PAGE_SHIFT);
set_memory_ro((unsigned long) base, ro_size >> PAGE_SHIFT);
set_memory_x((unsigned long) text_start, text_size >> PAGE_SHIFT);
}
static int __init init_real_mode(void)
{
if (!real_mode_header)
panic("Real mode trampoline was not allocated");
setup_real_mode();
set_real_mode_permissions();
return 0;
}
early_initcall(set_real_mode_permissions);
early_initcall(init_real_mode);

Some files were not shown because too many files have changed in this diff Show More