forked from OSchip/llvm-project
[X86] Generate unaligned access for fixed slots in unaligned stack
loadRegFromStackSlot()/storeRegToStackSlot() can generate aligned access instructions for stack slots even if the stack is unaligned, based on the assumption that the stack can be realigned. However, this doesn't work for fixed slots, which are e.g. used for spilling XMM registers in a non-leaf function with `__attribute__((preserve_all))`. When compiling such code with `-mstack-alignment=8`, this causes general protection faults. Fix it by only considering stack realignment for non-fixed slots. Note that this changes the output of three existing tests which spill AVX registers, since AVX requires higher alignment than the ABI provides on stack frame entry. Reviewed By: rnk, jyknight Differential Revision: https://reviews.llvm.org/D73126
This commit is contained in:
parent
11ef356d9e
commit
1ac98044df
|
@ -3794,7 +3794,8 @@ void X86InstrInfo::storeRegToStackSlot(MachineBasicBlock &MBB,
|
|||
const TargetRegisterClass *RC,
|
||||
const TargetRegisterInfo *TRI) const {
|
||||
const MachineFunction &MF = *MBB.getParent();
|
||||
assert(MF.getFrameInfo().getObjectSize(FrameIdx) >= TRI->getSpillSize(*RC) &&
|
||||
const MachineFrameInfo &MFI = MF.getFrameInfo();
|
||||
assert(MFI.getObjectSize(FrameIdx) >= TRI->getSpillSize(*RC) &&
|
||||
"Stack slot too small for store");
|
||||
if (RC->getID() == X86::TILERegClassID) {
|
||||
unsigned Opc = X86::TILESTORED;
|
||||
|
@ -3812,7 +3813,7 @@ void X86InstrInfo::storeRegToStackSlot(MachineBasicBlock &MBB,
|
|||
unsigned Alignment = std::max<uint32_t>(TRI->getSpillSize(*RC), 16);
|
||||
bool isAligned =
|
||||
(Subtarget.getFrameLowering()->getStackAlign() >= Alignment) ||
|
||||
RI.canRealignStack(MF);
|
||||
(RI.canRealignStack(MF) && !MFI.isFixedObjectIndex(FrameIdx));
|
||||
unsigned Opc = getStoreRegOpcode(SrcReg, RC, isAligned, Subtarget);
|
||||
addFrameReference(BuildMI(MBB, MI, DebugLoc(), get(Opc)), FrameIdx)
|
||||
.addReg(SrcReg, getKillRegState(isKill));
|
||||
|
@ -3838,10 +3839,11 @@ void X86InstrInfo::loadRegFromStackSlot(MachineBasicBlock &MBB,
|
|||
MO.setIsKill(true);
|
||||
} else {
|
||||
const MachineFunction &MF = *MBB.getParent();
|
||||
const MachineFrameInfo &MFI = MF.getFrameInfo();
|
||||
unsigned Alignment = std::max<uint32_t>(TRI->getSpillSize(*RC), 16);
|
||||
bool isAligned =
|
||||
(Subtarget.getFrameLowering()->getStackAlign() >= Alignment) ||
|
||||
RI.canRealignStack(MF);
|
||||
(RI.canRealignStack(MF) && !MFI.isFixedObjectIndex(FrameIdx));
|
||||
unsigned Opc = getLoadRegOpcode(DestReg, RC, isAligned, Subtarget);
|
||||
addFrameReference(BuildMI(MBB, MI, DebugLoc(), get(Opc), DestReg),
|
||||
FrameIdx);
|
||||
|
|
|
@ -513,22 +513,22 @@ entry:
|
|||
;AVX: pushq %rdx
|
||||
;AVX: pushq %rcx
|
||||
;AVX: pushq %rbx
|
||||
;AVX: vmovaps %ymm15
|
||||
;AVX-NEXT: vmovaps %ymm14
|
||||
;AVX-NEXT: vmovaps %ymm13
|
||||
;AVX-NEXT: vmovaps %ymm12
|
||||
;AVX-NEXT: vmovaps %ymm11
|
||||
;AVX-NEXT: vmovaps %ymm10
|
||||
;AVX-NEXT: vmovaps %ymm9
|
||||
;AVX-NEXT: vmovaps %ymm8
|
||||
;AVX-NEXT: vmovaps %ymm7
|
||||
;AVX-NEXT: vmovaps %ymm6
|
||||
;AVX-NEXT: vmovaps %ymm5
|
||||
;AVX-NEXT: vmovaps %ymm4
|
||||
;AVX-NEXT: vmovaps %ymm3
|
||||
;AVX-NEXT: vmovaps %ymm2
|
||||
;AVX-NEXT: vmovaps %ymm1
|
||||
;AVX-NEXT: vmovaps %ymm0
|
||||
;AVX: vmovups %ymm15
|
||||
;AVX-NEXT: vmovups %ymm14
|
||||
;AVX-NEXT: vmovups %ymm13
|
||||
;AVX-NEXT: vmovups %ymm12
|
||||
;AVX-NEXT: vmovups %ymm11
|
||||
;AVX-NEXT: vmovups %ymm10
|
||||
;AVX-NEXT: vmovups %ymm9
|
||||
;AVX-NEXT: vmovups %ymm8
|
||||
;AVX-NEXT: vmovups %ymm7
|
||||
;AVX-NEXT: vmovups %ymm6
|
||||
;AVX-NEXT: vmovups %ymm5
|
||||
;AVX-NEXT: vmovups %ymm4
|
||||
;AVX-NEXT: vmovups %ymm3
|
||||
;AVX-NEXT: vmovups %ymm2
|
||||
;AVX-NEXT: vmovups %ymm1
|
||||
;AVX-NEXT: vmovups %ymm0
|
||||
call void asm sideeffect "", "~{rax},~{rbx},~{rcx},~{rdx},~{rsi},~{rdi},~{r8},~{r9},~{r10},~{r11},~{r12},~{r13},~{r14},~{r15},~{rbp},~{xmm0},~{xmm1},~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15}"()
|
||||
ret void
|
||||
}
|
||||
|
|
|
@ -67,27 +67,27 @@ define <16 x float> @testf16_regs(<16 x float> %a, <16 x float> %b) nounwind {
|
|||
|
||||
; test calling conventions - prolog and epilog
|
||||
; WIN64-LABEL: test_prolog_epilog
|
||||
; WIN64: vmovaps {{%ymm([6-9]|1[0-5])}}, {{.*(%rbp).*}} # 32-byte Spill
|
||||
; WIN64: vmovaps {{%ymm([6-9]|1[0-5])}}, {{.*(%rbp).*}} # 32-byte Spill
|
||||
; WIN64: vmovaps {{%ymm([6-9]|1[0-5])}}, {{.*(%rbp).*}} # 32-byte Spill
|
||||
; WIN64: vmovaps {{%ymm([6-9]|1[0-5])}}, {{.*(%rbp).*}} # 32-byte Spill
|
||||
; WIN64: vmovaps {{%ymm([6-9]|1[0-5])}}, {{.*(%rbp).*}} # 32-byte Spill
|
||||
; WIN64: vmovaps {{%ymm([6-9]|1[0-5])}}, {{.*(%rbp).*}} # 32-byte Spill
|
||||
; WIN64: vmovaps {{%ymm([6-9]|1[0-5])}}, {{.*(%rbp).*}} # 32-byte Spill
|
||||
; WIN64: vmovaps {{%ymm([6-9]|1[0-5])}}, {{.*(%rbp).*}} # 32-byte Spill
|
||||
; WIN64: vmovaps {{%ymm([6-9]|1[0-5])}}, {{.*(%rbp).*}} # 32-byte Spill
|
||||
; WIN64: vmovaps {{%ymm([6-9]|1[0-5])}}, {{.*(%rbp).*}} # 32-byte Spill
|
||||
; WIN64: vmovups {{%ymm([6-9]|1[0-5])}}, {{.*(%rbp).*}} # 32-byte Spill
|
||||
; WIN64: vmovups {{%ymm([6-9]|1[0-5])}}, {{.*(%rbp).*}} # 32-byte Spill
|
||||
; WIN64: vmovups {{%ymm([6-9]|1[0-5])}}, {{.*(%rbp).*}} # 32-byte Spill
|
||||
; WIN64: vmovups {{%ymm([6-9]|1[0-5])}}, {{.*(%rbp).*}} # 32-byte Spill
|
||||
; WIN64: vmovups {{%ymm([6-9]|1[0-5])}}, {{.*(%rbp).*}} # 32-byte Spill
|
||||
; WIN64: vmovups {{%ymm([6-9]|1[0-5])}}, {{.*(%rbp).*}} # 32-byte Spill
|
||||
; WIN64: vmovups {{%ymm([6-9]|1[0-5])}}, {{.*(%rbp).*}} # 32-byte Spill
|
||||
; WIN64: vmovups {{%ymm([6-9]|1[0-5])}}, {{.*(%rbp).*}} # 32-byte Spill
|
||||
; WIN64: vmovups {{%ymm([6-9]|1[0-5])}}, {{.*(%rbp).*}} # 32-byte Spill
|
||||
; WIN64: vmovups {{%ymm([6-9]|1[0-5])}}, {{.*(%rbp).*}} # 32-byte Spill
|
||||
; WIN64: call
|
||||
; WIN64: vmovaps {{.*(%rbp).*}}, {{%ymm([6-9]|1[0-5])}} # 32-byte Reload
|
||||
; WIN64: vmovaps {{.*(%rbp).*}}, {{%ymm([6-9]|1[0-5])}} # 32-byte Reload
|
||||
; WIN64: vmovaps {{.*(%rbp).*}}, {{%ymm([6-9]|1[0-5])}} # 32-byte Reload
|
||||
; WIN64: vmovaps {{.*(%rbp).*}}, {{%ymm([6-9]|1[0-5])}} # 32-byte Reload
|
||||
; WIN64: vmovaps {{.*(%rbp).*}}, {{%ymm([6-9]|1[0-5])}} # 32-byte Reload
|
||||
; WIN64: vmovaps {{.*(%rbp).*}}, {{%ymm([6-9]|1[0-5])}} # 32-byte Reload
|
||||
; WIN64: vmovaps {{.*(%rbp).*}}, {{%ymm([6-9]|1[0-5])}} # 32-byte Reload
|
||||
; WIN64: vmovaps {{.*(%rbp).*}}, {{%ymm([6-9]|1[0-5])}} # 32-byte Reload
|
||||
; WIN64: vmovaps {{.*(%rbp).*}}, {{%ymm([6-9]|1[0-5])}} # 32-byte Reload
|
||||
; WIN64: vmovaps {{.*(%rbp).*}}, {{%ymm([6-9]|1[0-5])}} # 32-byte Reload
|
||||
; WIN64: vmovups {{.*(%rbp).*}}, {{%ymm([6-9]|1[0-5])}} # 32-byte Reload
|
||||
; WIN64: vmovups {{.*(%rbp).*}}, {{%ymm([6-9]|1[0-5])}} # 32-byte Reload
|
||||
; WIN64: vmovups {{.*(%rbp).*}}, {{%ymm([6-9]|1[0-5])}} # 32-byte Reload
|
||||
; WIN64: vmovups {{.*(%rbp).*}}, {{%ymm([6-9]|1[0-5])}} # 32-byte Reload
|
||||
; WIN64: vmovups {{.*(%rbp).*}}, {{%ymm([6-9]|1[0-5])}} # 32-byte Reload
|
||||
; WIN64: vmovups {{.*(%rbp).*}}, {{%ymm([6-9]|1[0-5])}} # 32-byte Reload
|
||||
; WIN64: vmovups {{.*(%rbp).*}}, {{%ymm([6-9]|1[0-5])}} # 32-byte Reload
|
||||
; WIN64: vmovups {{.*(%rbp).*}}, {{%ymm([6-9]|1[0-5])}} # 32-byte Reload
|
||||
; WIN64: vmovups {{.*(%rbp).*}}, {{%ymm([6-9]|1[0-5])}} # 32-byte Reload
|
||||
; WIN64: vmovups {{.*(%rbp).*}}, {{%ymm([6-9]|1[0-5])}} # 32-byte Reload
|
||||
|
||||
; X64-LABEL: test_prolog_epilog
|
||||
; X64: vmovups {{%ymm([8-9]|1[0-5])}}, {{.*}}(%rsp) ## 32-byte Spill
|
||||
|
|
|
@ -190,44 +190,44 @@ define intel_ocl_bicc <16 x float> @test_prolog_epilog(<16 x float> %a, <16 x fl
|
|||
; WIN64-KNL-NEXT: kmovw %k6, {{[-0-9]+}}(%r{{[sb]}}p) # 2-byte Spill
|
||||
; WIN64-KNL-NEXT: kmovw %k5, {{[-0-9]+}}(%r{{[sb]}}p) # 2-byte Spill
|
||||
; WIN64-KNL-NEXT: kmovw %k4, {{[-0-9]+}}(%r{{[sb]}}p) # 2-byte Spill
|
||||
; WIN64-KNL-NEXT: vmovaps %zmm21, {{[-0-9]+}}(%r{{[sb]}}p) # 64-byte Spill
|
||||
; WIN64-KNL-NEXT: vmovaps %zmm20, {{[-0-9]+}}(%r{{[sb]}}p) # 64-byte Spill
|
||||
; WIN64-KNL-NEXT: vmovaps %zmm19, {{[-0-9]+}}(%r{{[sb]}}p) # 64-byte Spill
|
||||
; WIN64-KNL-NEXT: vmovaps %zmm18, {{[-0-9]+}}(%r{{[sb]}}p) # 64-byte Spill
|
||||
; WIN64-KNL-NEXT: vmovaps %zmm17, {{[-0-9]+}}(%r{{[sb]}}p) # 64-byte Spill
|
||||
; WIN64-KNL-NEXT: vmovaps %zmm16, {{[-0-9]+}}(%r{{[sb]}}p) # 64-byte Spill
|
||||
; WIN64-KNL-NEXT: vmovaps %zmm15, {{[-0-9]+}}(%r{{[sb]}}p) # 64-byte Spill
|
||||
; WIN64-KNL-NEXT: vmovaps %zmm14, {{[-0-9]+}}(%r{{[sb]}}p) # 64-byte Spill
|
||||
; WIN64-KNL-NEXT: vmovaps %zmm13, {{[-0-9]+}}(%r{{[sb]}}p) # 64-byte Spill
|
||||
; WIN64-KNL-NEXT: vmovaps %zmm12, {{[-0-9]+}}(%r{{[sb]}}p) # 64-byte Spill
|
||||
; WIN64-KNL-NEXT: vmovaps %zmm11, {{[-0-9]+}}(%r{{[sb]}}p) # 64-byte Spill
|
||||
; WIN64-KNL-NEXT: vmovaps %zmm10, {{[-0-9]+}}(%r{{[sb]}}p) # 64-byte Spill
|
||||
; WIN64-KNL-NEXT: vmovaps %zmm9, {{[-0-9]+}}(%r{{[sb]}}p) # 64-byte Spill
|
||||
; WIN64-KNL-NEXT: vmovaps %zmm8, {{[-0-9]+}}(%r{{[sb]}}p) # 64-byte Spill
|
||||
; WIN64-KNL-NEXT: vmovaps %zmm7, {{[-0-9]+}}(%r{{[sb]}}p) # 64-byte Spill
|
||||
; WIN64-KNL-NEXT: vmovaps %zmm6, {{[-0-9]+}}(%r{{[sb]}}p) # 64-byte Spill
|
||||
; WIN64-KNL-NEXT: vmovups %zmm21, {{[-0-9]+}}(%r{{[sb]}}p) # 64-byte Spill
|
||||
; WIN64-KNL-NEXT: vmovups %zmm20, {{[-0-9]+}}(%r{{[sb]}}p) # 64-byte Spill
|
||||
; WIN64-KNL-NEXT: vmovups %zmm19, {{[-0-9]+}}(%r{{[sb]}}p) # 64-byte Spill
|
||||
; WIN64-KNL-NEXT: vmovups %zmm18, {{[-0-9]+}}(%r{{[sb]}}p) # 64-byte Spill
|
||||
; WIN64-KNL-NEXT: vmovups %zmm17, {{[-0-9]+}}(%r{{[sb]}}p) # 64-byte Spill
|
||||
; WIN64-KNL-NEXT: vmovups %zmm16, {{[-0-9]+}}(%r{{[sb]}}p) # 64-byte Spill
|
||||
; WIN64-KNL-NEXT: vmovups %zmm15, {{[-0-9]+}}(%r{{[sb]}}p) # 64-byte Spill
|
||||
; WIN64-KNL-NEXT: vmovups %zmm14, {{[-0-9]+}}(%r{{[sb]}}p) # 64-byte Spill
|
||||
; WIN64-KNL-NEXT: vmovups %zmm13, {{[-0-9]+}}(%r{{[sb]}}p) # 64-byte Spill
|
||||
; WIN64-KNL-NEXT: vmovups %zmm12, {{[-0-9]+}}(%r{{[sb]}}p) # 64-byte Spill
|
||||
; WIN64-KNL-NEXT: vmovups %zmm11, {{[-0-9]+}}(%r{{[sb]}}p) # 64-byte Spill
|
||||
; WIN64-KNL-NEXT: vmovups %zmm10, {{[-0-9]+}}(%r{{[sb]}}p) # 64-byte Spill
|
||||
; WIN64-KNL-NEXT: vmovups %zmm9, {{[-0-9]+}}(%r{{[sb]}}p) # 64-byte Spill
|
||||
; WIN64-KNL-NEXT: vmovups %zmm8, {{[-0-9]+}}(%r{{[sb]}}p) # 64-byte Spill
|
||||
; WIN64-KNL-NEXT: vmovups %zmm7, {{[-0-9]+}}(%r{{[sb]}}p) # 64-byte Spill
|
||||
; WIN64-KNL-NEXT: vmovups %zmm6, {{[-0-9]+}}(%r{{[sb]}}p) # 64-byte Spill
|
||||
; WIN64-KNL-NEXT: andq $-64, %rsp
|
||||
; WIN64-KNL-NEXT: vmovaps %zmm1, {{[0-9]+}}(%rsp)
|
||||
; WIN64-KNL-NEXT: vmovaps %zmm0, {{[0-9]+}}(%rsp)
|
||||
; WIN64-KNL-NEXT: leaq {{[0-9]+}}(%rsp), %rcx
|
||||
; WIN64-KNL-NEXT: leaq {{[0-9]+}}(%rsp), %rdx
|
||||
; WIN64-KNL-NEXT: callq func_float16
|
||||
; WIN64-KNL-NEXT: vmovaps {{[-0-9]+}}(%r{{[sb]}}p), %zmm6 # 64-byte Reload
|
||||
; WIN64-KNL-NEXT: vmovaps {{[-0-9]+}}(%r{{[sb]}}p), %zmm7 # 64-byte Reload
|
||||
; WIN64-KNL-NEXT: vmovaps {{[-0-9]+}}(%r{{[sb]}}p), %zmm8 # 64-byte Reload
|
||||
; WIN64-KNL-NEXT: vmovaps {{[-0-9]+}}(%r{{[sb]}}p), %zmm9 # 64-byte Reload
|
||||
; WIN64-KNL-NEXT: vmovaps {{[-0-9]+}}(%r{{[sb]}}p), %zmm10 # 64-byte Reload
|
||||
; WIN64-KNL-NEXT: vmovaps {{[-0-9]+}}(%r{{[sb]}}p), %zmm11 # 64-byte Reload
|
||||
; WIN64-KNL-NEXT: vmovaps {{[-0-9]+}}(%r{{[sb]}}p), %zmm12 # 64-byte Reload
|
||||
; WIN64-KNL-NEXT: vmovaps {{[-0-9]+}}(%r{{[sb]}}p), %zmm13 # 64-byte Reload
|
||||
; WIN64-KNL-NEXT: vmovaps {{[-0-9]+}}(%r{{[sb]}}p), %zmm14 # 64-byte Reload
|
||||
; WIN64-KNL-NEXT: vmovaps {{[-0-9]+}}(%r{{[sb]}}p), %zmm15 # 64-byte Reload
|
||||
; WIN64-KNL-NEXT: vmovaps {{[-0-9]+}}(%r{{[sb]}}p), %zmm16 # 64-byte Reload
|
||||
; WIN64-KNL-NEXT: vmovaps {{[-0-9]+}}(%r{{[sb]}}p), %zmm17 # 64-byte Reload
|
||||
; WIN64-KNL-NEXT: vmovaps {{[-0-9]+}}(%r{{[sb]}}p), %zmm18 # 64-byte Reload
|
||||
; WIN64-KNL-NEXT: vmovaps {{[-0-9]+}}(%r{{[sb]}}p), %zmm19 # 64-byte Reload
|
||||
; WIN64-KNL-NEXT: vmovaps {{[-0-9]+}}(%r{{[sb]}}p), %zmm20 # 64-byte Reload
|
||||
; WIN64-KNL-NEXT: vmovaps {{[-0-9]+}}(%r{{[sb]}}p), %zmm21 # 64-byte Reload
|
||||
; WIN64-KNL-NEXT: vmovups {{[-0-9]+}}(%r{{[sb]}}p), %zmm6 # 64-byte Reload
|
||||
; WIN64-KNL-NEXT: vmovups {{[-0-9]+}}(%r{{[sb]}}p), %zmm7 # 64-byte Reload
|
||||
; WIN64-KNL-NEXT: vmovups {{[-0-9]+}}(%r{{[sb]}}p), %zmm8 # 64-byte Reload
|
||||
; WIN64-KNL-NEXT: vmovups {{[-0-9]+}}(%r{{[sb]}}p), %zmm9 # 64-byte Reload
|
||||
; WIN64-KNL-NEXT: vmovups {{[-0-9]+}}(%r{{[sb]}}p), %zmm10 # 64-byte Reload
|
||||
; WIN64-KNL-NEXT: vmovups {{[-0-9]+}}(%r{{[sb]}}p), %zmm11 # 64-byte Reload
|
||||
; WIN64-KNL-NEXT: vmovups {{[-0-9]+}}(%r{{[sb]}}p), %zmm12 # 64-byte Reload
|
||||
; WIN64-KNL-NEXT: vmovups {{[-0-9]+}}(%r{{[sb]}}p), %zmm13 # 64-byte Reload
|
||||
; WIN64-KNL-NEXT: vmovups {{[-0-9]+}}(%r{{[sb]}}p), %zmm14 # 64-byte Reload
|
||||
; WIN64-KNL-NEXT: vmovups {{[-0-9]+}}(%r{{[sb]}}p), %zmm15 # 64-byte Reload
|
||||
; WIN64-KNL-NEXT: vmovups {{[-0-9]+}}(%r{{[sb]}}p), %zmm16 # 64-byte Reload
|
||||
; WIN64-KNL-NEXT: vmovups {{[-0-9]+}}(%r{{[sb]}}p), %zmm17 # 64-byte Reload
|
||||
; WIN64-KNL-NEXT: vmovups {{[-0-9]+}}(%r{{[sb]}}p), %zmm18 # 64-byte Reload
|
||||
; WIN64-KNL-NEXT: vmovups {{[-0-9]+}}(%r{{[sb]}}p), %zmm19 # 64-byte Reload
|
||||
; WIN64-KNL-NEXT: vmovups {{[-0-9]+}}(%r{{[sb]}}p), %zmm20 # 64-byte Reload
|
||||
; WIN64-KNL-NEXT: vmovups {{[-0-9]+}}(%r{{[sb]}}p), %zmm21 # 64-byte Reload
|
||||
; WIN64-KNL-NEXT: kmovw {{[-0-9]+}}(%r{{[sb]}}p), %k4 # 2-byte Reload
|
||||
; WIN64-KNL-NEXT: kmovw {{[-0-9]+}}(%r{{[sb]}}p), %k5 # 2-byte Reload
|
||||
; WIN64-KNL-NEXT: kmovw {{[-0-9]+}}(%r{{[sb]}}p), %k6 # 2-byte Reload
|
||||
|
@ -245,44 +245,44 @@ define intel_ocl_bicc <16 x float> @test_prolog_epilog(<16 x float> %a, <16 x fl
|
|||
; WIN64-SKX-NEXT: kmovq %k6, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
|
||||
; WIN64-SKX-NEXT: kmovq %k5, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
|
||||
; WIN64-SKX-NEXT: kmovq %k4, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
|
||||
; WIN64-SKX-NEXT: vmovaps %zmm21, {{[-0-9]+}}(%r{{[sb]}}p) # 64-byte Spill
|
||||
; WIN64-SKX-NEXT: vmovaps %zmm20, {{[-0-9]+}}(%r{{[sb]}}p) # 64-byte Spill
|
||||
; WIN64-SKX-NEXT: vmovaps %zmm19, {{[-0-9]+}}(%r{{[sb]}}p) # 64-byte Spill
|
||||
; WIN64-SKX-NEXT: vmovaps %zmm18, {{[-0-9]+}}(%r{{[sb]}}p) # 64-byte Spill
|
||||
; WIN64-SKX-NEXT: vmovaps %zmm17, {{[-0-9]+}}(%r{{[sb]}}p) # 64-byte Spill
|
||||
; WIN64-SKX-NEXT: vmovaps %zmm16, {{[-0-9]+}}(%r{{[sb]}}p) # 64-byte Spill
|
||||
; WIN64-SKX-NEXT: vmovaps %zmm15, {{[-0-9]+}}(%r{{[sb]}}p) # 64-byte Spill
|
||||
; WIN64-SKX-NEXT: vmovaps %zmm14, {{[-0-9]+}}(%r{{[sb]}}p) # 64-byte Spill
|
||||
; WIN64-SKX-NEXT: vmovaps %zmm13, {{[-0-9]+}}(%r{{[sb]}}p) # 64-byte Spill
|
||||
; WIN64-SKX-NEXT: vmovaps %zmm12, {{[-0-9]+}}(%r{{[sb]}}p) # 64-byte Spill
|
||||
; WIN64-SKX-NEXT: vmovaps %zmm11, {{[-0-9]+}}(%r{{[sb]}}p) # 64-byte Spill
|
||||
; WIN64-SKX-NEXT: vmovaps %zmm10, {{[-0-9]+}}(%r{{[sb]}}p) # 64-byte Spill
|
||||
; WIN64-SKX-NEXT: vmovaps %zmm9, {{[-0-9]+}}(%r{{[sb]}}p) # 64-byte Spill
|
||||
; WIN64-SKX-NEXT: vmovaps %zmm8, {{[-0-9]+}}(%r{{[sb]}}p) # 64-byte Spill
|
||||
; WIN64-SKX-NEXT: vmovaps %zmm7, {{[-0-9]+}}(%r{{[sb]}}p) # 64-byte Spill
|
||||
; WIN64-SKX-NEXT: vmovaps %zmm6, {{[-0-9]+}}(%r{{[sb]}}p) # 64-byte Spill
|
||||
; WIN64-SKX-NEXT: vmovups %zmm21, {{[-0-9]+}}(%r{{[sb]}}p) # 64-byte Spill
|
||||
; WIN64-SKX-NEXT: vmovups %zmm20, {{[-0-9]+}}(%r{{[sb]}}p) # 64-byte Spill
|
||||
; WIN64-SKX-NEXT: vmovups %zmm19, {{[-0-9]+}}(%r{{[sb]}}p) # 64-byte Spill
|
||||
; WIN64-SKX-NEXT: vmovups %zmm18, {{[-0-9]+}}(%r{{[sb]}}p) # 64-byte Spill
|
||||
; WIN64-SKX-NEXT: vmovups %zmm17, {{[-0-9]+}}(%r{{[sb]}}p) # 64-byte Spill
|
||||
; WIN64-SKX-NEXT: vmovups %zmm16, {{[-0-9]+}}(%r{{[sb]}}p) # 64-byte Spill
|
||||
; WIN64-SKX-NEXT: vmovups %zmm15, {{[-0-9]+}}(%r{{[sb]}}p) # 64-byte Spill
|
||||
; WIN64-SKX-NEXT: vmovups %zmm14, {{[-0-9]+}}(%r{{[sb]}}p) # 64-byte Spill
|
||||
; WIN64-SKX-NEXT: vmovups %zmm13, {{[-0-9]+}}(%r{{[sb]}}p) # 64-byte Spill
|
||||
; WIN64-SKX-NEXT: vmovups %zmm12, {{[-0-9]+}}(%r{{[sb]}}p) # 64-byte Spill
|
||||
; WIN64-SKX-NEXT: vmovups %zmm11, {{[-0-9]+}}(%r{{[sb]}}p) # 64-byte Spill
|
||||
; WIN64-SKX-NEXT: vmovups %zmm10, {{[-0-9]+}}(%r{{[sb]}}p) # 64-byte Spill
|
||||
; WIN64-SKX-NEXT: vmovups %zmm9, {{[-0-9]+}}(%r{{[sb]}}p) # 64-byte Spill
|
||||
; WIN64-SKX-NEXT: vmovups %zmm8, {{[-0-9]+}}(%r{{[sb]}}p) # 64-byte Spill
|
||||
; WIN64-SKX-NEXT: vmovups %zmm7, {{[-0-9]+}}(%r{{[sb]}}p) # 64-byte Spill
|
||||
; WIN64-SKX-NEXT: vmovups %zmm6, {{[-0-9]+}}(%r{{[sb]}}p) # 64-byte Spill
|
||||
; WIN64-SKX-NEXT: andq $-64, %rsp
|
||||
; WIN64-SKX-NEXT: vmovaps %zmm1, {{[0-9]+}}(%rsp)
|
||||
; WIN64-SKX-NEXT: vmovaps %zmm0, {{[0-9]+}}(%rsp)
|
||||
; WIN64-SKX-NEXT: leaq {{[0-9]+}}(%rsp), %rcx
|
||||
; WIN64-SKX-NEXT: leaq {{[0-9]+}}(%rsp), %rdx
|
||||
; WIN64-SKX-NEXT: callq func_float16
|
||||
; WIN64-SKX-NEXT: vmovaps {{[-0-9]+}}(%r{{[sb]}}p), %zmm6 # 64-byte Reload
|
||||
; WIN64-SKX-NEXT: vmovaps {{[-0-9]+}}(%r{{[sb]}}p), %zmm7 # 64-byte Reload
|
||||
; WIN64-SKX-NEXT: vmovaps {{[-0-9]+}}(%r{{[sb]}}p), %zmm8 # 64-byte Reload
|
||||
; WIN64-SKX-NEXT: vmovaps {{[-0-9]+}}(%r{{[sb]}}p), %zmm9 # 64-byte Reload
|
||||
; WIN64-SKX-NEXT: vmovaps {{[-0-9]+}}(%r{{[sb]}}p), %zmm10 # 64-byte Reload
|
||||
; WIN64-SKX-NEXT: vmovaps {{[-0-9]+}}(%r{{[sb]}}p), %zmm11 # 64-byte Reload
|
||||
; WIN64-SKX-NEXT: vmovaps {{[-0-9]+}}(%r{{[sb]}}p), %zmm12 # 64-byte Reload
|
||||
; WIN64-SKX-NEXT: vmovaps {{[-0-9]+}}(%r{{[sb]}}p), %zmm13 # 64-byte Reload
|
||||
; WIN64-SKX-NEXT: vmovaps {{[-0-9]+}}(%r{{[sb]}}p), %zmm14 # 64-byte Reload
|
||||
; WIN64-SKX-NEXT: vmovaps {{[-0-9]+}}(%r{{[sb]}}p), %zmm15 # 64-byte Reload
|
||||
; WIN64-SKX-NEXT: vmovaps {{[-0-9]+}}(%r{{[sb]}}p), %zmm16 # 64-byte Reload
|
||||
; WIN64-SKX-NEXT: vmovaps {{[-0-9]+}}(%r{{[sb]}}p), %zmm17 # 64-byte Reload
|
||||
; WIN64-SKX-NEXT: vmovaps {{[-0-9]+}}(%r{{[sb]}}p), %zmm18 # 64-byte Reload
|
||||
; WIN64-SKX-NEXT: vmovaps {{[-0-9]+}}(%r{{[sb]}}p), %zmm19 # 64-byte Reload
|
||||
; WIN64-SKX-NEXT: vmovaps {{[-0-9]+}}(%r{{[sb]}}p), %zmm20 # 64-byte Reload
|
||||
; WIN64-SKX-NEXT: vmovaps {{[-0-9]+}}(%r{{[sb]}}p), %zmm21 # 64-byte Reload
|
||||
; WIN64-SKX-NEXT: vmovups {{[-0-9]+}}(%r{{[sb]}}p), %zmm6 # 64-byte Reload
|
||||
; WIN64-SKX-NEXT: vmovups {{[-0-9]+}}(%r{{[sb]}}p), %zmm7 # 64-byte Reload
|
||||
; WIN64-SKX-NEXT: vmovups {{[-0-9]+}}(%r{{[sb]}}p), %zmm8 # 64-byte Reload
|
||||
; WIN64-SKX-NEXT: vmovups {{[-0-9]+}}(%r{{[sb]}}p), %zmm9 # 64-byte Reload
|
||||
; WIN64-SKX-NEXT: vmovups {{[-0-9]+}}(%r{{[sb]}}p), %zmm10 # 64-byte Reload
|
||||
; WIN64-SKX-NEXT: vmovups {{[-0-9]+}}(%r{{[sb]}}p), %zmm11 # 64-byte Reload
|
||||
; WIN64-SKX-NEXT: vmovups {{[-0-9]+}}(%r{{[sb]}}p), %zmm12 # 64-byte Reload
|
||||
; WIN64-SKX-NEXT: vmovups {{[-0-9]+}}(%r{{[sb]}}p), %zmm13 # 64-byte Reload
|
||||
; WIN64-SKX-NEXT: vmovups {{[-0-9]+}}(%r{{[sb]}}p), %zmm14 # 64-byte Reload
|
||||
; WIN64-SKX-NEXT: vmovups {{[-0-9]+}}(%r{{[sb]}}p), %zmm15 # 64-byte Reload
|
||||
; WIN64-SKX-NEXT: vmovups {{[-0-9]+}}(%r{{[sb]}}p), %zmm16 # 64-byte Reload
|
||||
; WIN64-SKX-NEXT: vmovups {{[-0-9]+}}(%r{{[sb]}}p), %zmm17 # 64-byte Reload
|
||||
; WIN64-SKX-NEXT: vmovups {{[-0-9]+}}(%r{{[sb]}}p), %zmm18 # 64-byte Reload
|
||||
; WIN64-SKX-NEXT: vmovups {{[-0-9]+}}(%r{{[sb]}}p), %zmm19 # 64-byte Reload
|
||||
; WIN64-SKX-NEXT: vmovups {{[-0-9]+}}(%r{{[sb]}}p), %zmm20 # 64-byte Reload
|
||||
; WIN64-SKX-NEXT: vmovups {{[-0-9]+}}(%r{{[sb]}}p), %zmm21 # 64-byte Reload
|
||||
; WIN64-SKX-NEXT: kmovq {{[-0-9]+}}(%r{{[sb]}}p), %k4 # 8-byte Reload
|
||||
; WIN64-SKX-NEXT: kmovq {{[-0-9]+}}(%r{{[sb]}}p), %k5 # 8-byte Reload
|
||||
; WIN64-SKX-NEXT: kmovq {{[-0-9]+}}(%r{{[sb]}}p), %k6 # 8-byte Reload
|
||||
|
|
|
@ -0,0 +1,15 @@
|
|||
; Make sure that when the stack may be misaligned on function entry, fixed frame
|
||||
; elements (here: XMM spills) are accessed using instructions that tolerate
|
||||
; unaligned access.
|
||||
;
|
||||
; RUN: llc -mtriple=x86_64-unknown-linux-gnu -mcpu=x86-64 -mattr=+sse,+sse-unaligned-mem -stack-alignment=8 --frame-pointer=all < %s | FileCheck %s
|
||||
|
||||
define dso_local preserve_allcc void @func() #0 {
|
||||
; CHECK-LABEL: func:
|
||||
; CHECK: movups %xmm0, -{{[0-9]+}}(%rbp)
|
||||
call void asm sideeffect "", "~{xmm0},~{dirflag},~{fpsr},~{flags}"() #1
|
||||
; CHECK: movups -{{[0-9]+}}(%rbp), %xmm0
|
||||
ret void
|
||||
}
|
||||
|
||||
attributes #0 = { nounwind }
|
Loading…
Reference in New Issue