forked from OSchip/llvm-project
[WinEH] Allocate space in funclets stack to save XMM CSRs
Summary: This is an alternate approach to D63396 Currently funclets reuse the same stack slots that are used in the parent function for saving callee-saved xmm registers. If the parent function modifies a callee-saved xmm register before an excpetion is thrown, the catch handler will overwrite the original saved value. This patch allocates space in funclets stack for saving callee-saved xmm registers and uses RSP instead RBP to access memory. Signed-off-by: Pengfei Wang <pengfei.wang@intel.com> Reviewers: rnk, RKSimon, craig.topper, annita.zhang, LuoYuanke, andrew.w.kaylor Subscribers: hiraditya, llvm-commits Tags: #llvm Differential Revision: https://reviews.llvm.org/D66596 Signed-off-by: Pengfei Wang <pengfei.wang@intel.com> llvm-svn: 370005
This commit is contained in:
parent
6a808d5a83
commit
564fb58a32
|
@ -1389,9 +1389,13 @@ void X86FrameLowering::emitPrologue(MachineFunction &MF,
|
|||
int FI;
|
||||
if (unsigned Reg = TII.isStoreToStackSlot(FrameInstr, FI)) {
|
||||
if (X86::FR64RegClass.contains(Reg)) {
|
||||
int Offset;
|
||||
unsigned IgnoredFrameReg;
|
||||
int Offset = getFrameIndexReference(MF, FI, IgnoredFrameReg);
|
||||
Offset += SEHFrameOffset;
|
||||
if (IsWin64Prologue && IsFunclet)
|
||||
Offset = getWin64EHFrameIndexRef(MF, FI, IgnoredFrameReg);
|
||||
else
|
||||
Offset = getFrameIndexReference(MF, FI, IgnoredFrameReg) +
|
||||
SEHFrameOffset;
|
||||
|
||||
HasWinCFI = true;
|
||||
assert(!NeedsWinFPO && "SEH_SaveXMM incompatible with FPO data");
|
||||
|
@ -1547,9 +1551,13 @@ X86FrameLowering::getPSPSlotOffsetFromSP(const MachineFunction &MF) const {
|
|||
|
||||
unsigned
|
||||
X86FrameLowering::getWinEHFuncletFrameSize(const MachineFunction &MF) const {
|
||||
const X86MachineFunctionInfo *X86FI = MF.getInfo<X86MachineFunctionInfo>();
|
||||
// This is the size of the pushed CSRs.
|
||||
unsigned CSSize =
|
||||
MF.getInfo<X86MachineFunctionInfo>()->getCalleeSavedFrameSize();
|
||||
unsigned CSSize = X86FI->getCalleeSavedFrameSize();
|
||||
// This is the size of callee saved XMMs.
|
||||
const auto& WinEHXMMSlotInfo = X86FI->getWinEHXMMSlotInfo();
|
||||
unsigned XMMSize = WinEHXMMSlotInfo.size() *
|
||||
TRI->getSpillSize(X86::VR128RegClass);
|
||||
// This is the amount of stack a funclet needs to allocate.
|
||||
unsigned UsedSize;
|
||||
EHPersonality Personality =
|
||||
|
@ -1569,7 +1577,7 @@ X86FrameLowering::getWinEHFuncletFrameSize(const MachineFunction &MF) const {
|
|||
unsigned FrameSizeMinusRBP = alignTo(CSSize + UsedSize, getStackAlignment());
|
||||
// Subtract out the size of the callee saved registers. This is how much stack
|
||||
// each funclet will allocate.
|
||||
return FrameSizeMinusRBP - CSSize;
|
||||
return FrameSizeMinusRBP + XMMSize - CSSize;
|
||||
}
|
||||
|
||||
static bool isTailCallOpcode(unsigned Opc) {
|
||||
|
@ -1843,6 +1851,20 @@ int X86FrameLowering::getFrameIndexReference(const MachineFunction &MF, int FI,
|
|||
return Offset + FPDelta;
|
||||
}
|
||||
|
||||
int X86FrameLowering::getWin64EHFrameIndexRef(const MachineFunction &MF,
|
||||
int FI, unsigned &FrameReg) const {
|
||||
const MachineFrameInfo &MFI = MF.getFrameInfo();
|
||||
const X86MachineFunctionInfo *X86FI = MF.getInfo<X86MachineFunctionInfo>();
|
||||
const auto& WinEHXMMSlotInfo = X86FI->getWinEHXMMSlotInfo();
|
||||
const auto it = WinEHXMMSlotInfo.find(FI);
|
||||
|
||||
if (it == WinEHXMMSlotInfo.end())
|
||||
return getFrameIndexReference(MF, FI, FrameReg);
|
||||
|
||||
FrameReg = TRI->getStackRegister();
|
||||
return alignTo(MFI.getMaxCallFrameSize(), getStackAlignment()) + it->second;
|
||||
}
|
||||
|
||||
int X86FrameLowering::getFrameIndexReferenceSP(const MachineFunction &MF,
|
||||
int FI, unsigned &FrameReg,
|
||||
int Adjustment) const {
|
||||
|
@ -1941,6 +1963,8 @@ bool X86FrameLowering::assignCalleeSavedSpillSlots(
|
|||
X86MachineFunctionInfo *X86FI = MF.getInfo<X86MachineFunctionInfo>();
|
||||
|
||||
unsigned CalleeSavedFrameSize = 0;
|
||||
unsigned XMMCalleeSavedFrameSize = 0;
|
||||
auto &WinEHXMMSlotInfo = X86FI->getWinEHXMMSlotInfo();
|
||||
int SpillSlotOffset = getOffsetOfLocalArea() + X86FI->getTCReturnAddrDelta();
|
||||
|
||||
int64_t TailCallReturnAddrDelta = X86FI->getTCReturnAddrDelta();
|
||||
|
@ -2018,12 +2042,20 @@ bool X86FrameLowering::assignCalleeSavedSpillSlots(
|
|||
unsigned Size = TRI->getSpillSize(*RC);
|
||||
unsigned Align = TRI->getSpillAlignment(*RC);
|
||||
// ensure alignment
|
||||
SpillSlotOffset -= std::abs(SpillSlotOffset) % Align;
|
||||
assert(SpillSlotOffset < 0 && "SpillSlotOffset should always < 0 on X86");
|
||||
SpillSlotOffset = -alignTo(-SpillSlotOffset, Align);
|
||||
|
||||
// spill into slot
|
||||
SpillSlotOffset -= Size;
|
||||
int SlotIndex = MFI.CreateFixedSpillStackObject(Size, SpillSlotOffset);
|
||||
CSI[i - 1].setFrameIdx(SlotIndex);
|
||||
MFI.ensureMaxAlignment(Align);
|
||||
|
||||
// Save the start offset and size of XMM in stack frame for funclets.
|
||||
if (X86::VR128RegClass.contains(Reg)) {
|
||||
WinEHXMMSlotInfo[SlotIndex] = XMMCalleeSavedFrameSize;
|
||||
XMMCalleeSavedFrameSize += Size;
|
||||
}
|
||||
}
|
||||
|
||||
return true;
|
||||
|
|
|
@ -99,6 +99,8 @@ public:
|
|||
int getFrameIndexReference(const MachineFunction &MF, int FI,
|
||||
unsigned &FrameReg) const override;
|
||||
|
||||
int getWin64EHFrameIndexRef(const MachineFunction &MF,
|
||||
int FI, unsigned &SPReg) const;
|
||||
int getFrameIndexReferenceSP(const MachineFunction &MF,
|
||||
int FI, unsigned &SPReg, int Adjustment) const;
|
||||
int getFrameIndexReferencePreferSP(const MachineFunction &MF, int FI,
|
||||
|
|
|
@ -36,6 +36,10 @@ class X86MachineFunctionInfo : public MachineFunctionInfo {
|
|||
/// is stashed.
|
||||
signed char RestoreBasePointerOffset = 0;
|
||||
|
||||
/// WinEHXMMSlotInfo - Slot information of XMM registers in the stack frame
|
||||
/// in bytes.
|
||||
DenseMap<int, unsigned> WinEHXMMSlotInfo;
|
||||
|
||||
/// CalleeSavedFrameSize - Size of the callee-saved register portion of the
|
||||
/// stack frame in bytes.
|
||||
unsigned CalleeSavedFrameSize = 0;
|
||||
|
@ -120,6 +124,10 @@ public:
|
|||
void setRestoreBasePointer(const MachineFunction *MF);
|
||||
int getRestoreBasePointerOffset() const {return RestoreBasePointerOffset; }
|
||||
|
||||
DenseMap<int, unsigned>& getWinEHXMMSlotInfo() { return WinEHXMMSlotInfo; }
|
||||
const DenseMap<int, unsigned>& getWinEHXMMSlotInfo() const {
|
||||
return WinEHXMMSlotInfo; }
|
||||
|
||||
unsigned getCalleeSavedFrameSize() const { return CalleeSavedFrameSize; }
|
||||
void setCalleeSavedFrameSize(unsigned bytes) { CalleeSavedFrameSize = bytes; }
|
||||
|
||||
|
|
|
@ -692,12 +692,27 @@ static bool tryOptimizeLEAtoMOV(MachineBasicBlock::iterator II) {
|
|||
return true;
|
||||
}
|
||||
|
||||
static bool isFuncletReturnInstr(MachineInstr &MI) {
|
||||
switch (MI.getOpcode()) {
|
||||
case X86::CATCHRET:
|
||||
case X86::CLEANUPRET:
|
||||
return true;
|
||||
default:
|
||||
return false;
|
||||
}
|
||||
llvm_unreachable("impossible");
|
||||
}
|
||||
|
||||
void
|
||||
X86RegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator II,
|
||||
int SPAdj, unsigned FIOperandNum,
|
||||
RegScavenger *RS) const {
|
||||
MachineInstr &MI = *II;
|
||||
MachineFunction &MF = *MI.getParent()->getParent();
|
||||
MachineBasicBlock &MBB = *MI.getParent();
|
||||
MachineFunction &MF = *MBB.getParent();
|
||||
MachineBasicBlock::iterator MBBI = MBB.getFirstTerminator();
|
||||
bool IsEHFuncletEpilogue = MBBI == MBB.end() ? false
|
||||
: isFuncletReturnInstr(*MBBI);
|
||||
const X86FrameLowering *TFI = getFrameLowering(MF);
|
||||
int FrameIndex = MI.getOperand(FIOperandNum).getIndex();
|
||||
|
||||
|
@ -709,6 +724,8 @@ X86RegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator II,
|
|||
MF.getFrameInfo().isFixedObjectIndex(FrameIndex)) &&
|
||||
"Return instruction can only reference SP relative frame objects");
|
||||
FIOffset = TFI->getFrameIndexReferenceSP(MF, FrameIndex, BasePtr, 0);
|
||||
} else if (TFI->Is64Bit && (MBB.isEHFuncletEntry() || IsEHFuncletEpilogue)) {
|
||||
FIOffset = TFI->getWin64EHFrameIndexRef(MF, FrameIndex, BasePtr);
|
||||
} else {
|
||||
FIOffset = TFI->getFrameIndexReference(MF, FrameIndex, BasePtr);
|
||||
}
|
||||
|
|
|
@ -184,7 +184,7 @@ define intel_ocl_bicc <16 x float> @test_prolog_epilog(<16 x float> %a, <16 x fl
|
|||
; WIN64-KNL-LABEL: test_prolog_epilog:
|
||||
; WIN64-KNL: # %bb.0:
|
||||
; WIN64-KNL-NEXT: pushq %rbp
|
||||
; WIN64-KNL-NEXT: subq $1328, %rsp # imm = 0x530
|
||||
; WIN64-KNL-NEXT: subq $1264, %rsp # imm = 0x4F0
|
||||
; WIN64-KNL-NEXT: leaq {{[0-9]+}}(%rsp), %rbp
|
||||
; WIN64-KNL-NEXT: kmovw %k7, {{[-0-9]+}}(%r{{[sb]}}p) # 2-byte Spill
|
||||
; WIN64-KNL-NEXT: kmovw %k6, {{[-0-9]+}}(%r{{[sb]}}p) # 2-byte Spill
|
||||
|
@ -232,14 +232,14 @@ define intel_ocl_bicc <16 x float> @test_prolog_epilog(<16 x float> %a, <16 x fl
|
|||
; WIN64-KNL-NEXT: kmovw {{[-0-9]+}}(%r{{[sb]}}p), %k5 # 2-byte Reload
|
||||
; WIN64-KNL-NEXT: kmovw {{[-0-9]+}}(%r{{[sb]}}p), %k6 # 2-byte Reload
|
||||
; WIN64-KNL-NEXT: kmovw {{[-0-9]+}}(%r{{[sb]}}p), %k7 # 2-byte Reload
|
||||
; WIN64-KNL-NEXT: leaq 1200(%rbp), %rsp
|
||||
; WIN64-KNL-NEXT: leaq 1136(%rbp), %rsp
|
||||
; WIN64-KNL-NEXT: popq %rbp
|
||||
; WIN64-KNL-NEXT: retq
|
||||
;
|
||||
; WIN64-SKX-LABEL: test_prolog_epilog:
|
||||
; WIN64-SKX: # %bb.0:
|
||||
; WIN64-SKX-NEXT: pushq %rbp
|
||||
; WIN64-SKX-NEXT: subq $1328, %rsp # imm = 0x530
|
||||
; WIN64-SKX-NEXT: subq $1264, %rsp # imm = 0x4F0
|
||||
; WIN64-SKX-NEXT: leaq {{[0-9]+}}(%rsp), %rbp
|
||||
; WIN64-SKX-NEXT: kmovq %k7, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
|
||||
; WIN64-SKX-NEXT: kmovq %k6, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
|
||||
|
@ -287,7 +287,7 @@ define intel_ocl_bicc <16 x float> @test_prolog_epilog(<16 x float> %a, <16 x fl
|
|||
; WIN64-SKX-NEXT: kmovq {{[-0-9]+}}(%r{{[sb]}}p), %k5 # 8-byte Reload
|
||||
; WIN64-SKX-NEXT: kmovq {{[-0-9]+}}(%r{{[sb]}}p), %k6 # 8-byte Reload
|
||||
; WIN64-SKX-NEXT: kmovq {{[-0-9]+}}(%r{{[sb]}}p), %k7 # 8-byte Reload
|
||||
; WIN64-SKX-NEXT: leaq 1200(%rbp), %rsp
|
||||
; WIN64-SKX-NEXT: leaq 1136(%rbp), %rsp
|
||||
; WIN64-SKX-NEXT: popq %rbp
|
||||
; WIN64-SKX-NEXT: retq
|
||||
;
|
||||
|
@ -346,7 +346,7 @@ define intel_ocl_bicc <16 x float> @test_prolog_epilog(<16 x float> %a, <16 x fl
|
|||
; X64-SKX: ## %bb.0:
|
||||
; X64-SKX-NEXT: pushq %rsi
|
||||
; X64-SKX-NEXT: pushq %rdi
|
||||
; X64-SKX-NEXT: subq $1192, %rsp ## imm = 0x4A8
|
||||
; X64-SKX-NEXT: subq $1064, %rsp ## imm = 0x428
|
||||
; X64-SKX-NEXT: kmovq %k7, {{[-0-9]+}}(%r{{[sb]}}p) ## 8-byte Spill
|
||||
; X64-SKX-NEXT: kmovq %k6, {{[-0-9]+}}(%r{{[sb]}}p) ## 8-byte Spill
|
||||
; X64-SKX-NEXT: kmovq %k5, {{[-0-9]+}}(%r{{[sb]}}p) ## 8-byte Spill
|
||||
|
@ -388,7 +388,7 @@ define intel_ocl_bicc <16 x float> @test_prolog_epilog(<16 x float> %a, <16 x fl
|
|||
; X64-SKX-NEXT: kmovq {{[-0-9]+}}(%r{{[sb]}}p), %k5 ## 8-byte Reload
|
||||
; X64-SKX-NEXT: kmovq {{[-0-9]+}}(%r{{[sb]}}p), %k6 ## 8-byte Reload
|
||||
; X64-SKX-NEXT: kmovq {{[-0-9]+}}(%r{{[sb]}}p), %k7 ## 8-byte Reload
|
||||
; X64-SKX-NEXT: addq $1192, %rsp ## imm = 0x4A8
|
||||
; X64-SKX-NEXT: addq $1064, %rsp ## imm = 0x428
|
||||
; X64-SKX-NEXT: popq %rdi
|
||||
; X64-SKX-NEXT: popq %rsi
|
||||
; X64-SKX-NEXT: retq
|
||||
|
|
|
@ -51,3 +51,18 @@ catch:
|
|||
; CHECK: popq %rbp
|
||||
; CHECK: retq
|
||||
; CHECK: .seh_handlerdata
|
||||
; CHECK: # %catch
|
||||
; CHECK: movq %rdx, 16(%rsp)
|
||||
; CHECK: pushq %rbp
|
||||
; CHECK: .seh_pushreg 5
|
||||
; CHECK: subq $48, %rsp
|
||||
; CHECK: .seh_stackalloc 48
|
||||
; CHECK: leaq 64(%rdx), %rbp
|
||||
; CHECK: movapd %xmm6, 32(%rsp)
|
||||
; CHECK: .seh_savexmm 6, 32
|
||||
; CHECK: .seh_endprologue
|
||||
; CHECK: movapd 32(%rsp), %xmm6
|
||||
; CHECK: leaq .LBB0_1(%rip), %rax
|
||||
; CHECK: addq $48, %rsp
|
||||
; CHECK: popq %rbp
|
||||
; CHECK: retq # CATCHRET
|
||||
|
|
|
@ -0,0 +1,115 @@
|
|||
; RUN: llc -mtriple=x86_64-pc-windows-msvc -mattr=+avx < %s | FileCheck %s
|
||||
|
||||
; void bar(int a, int b, int c, int d, int e);
|
||||
; void baz(int x);
|
||||
;
|
||||
; void foo(int a, int b, int c, int d, int e)
|
||||
; {
|
||||
; __asm("nop" ::: "bx", "cx", "xmm5", "xmm6", "ymm7");
|
||||
; try {
|
||||
; bar(a, b, c, d, e);
|
||||
; }
|
||||
; catch (...) {
|
||||
; baz(a);
|
||||
; if (a)
|
||||
; __asm("nop" ::: "xmm8");
|
||||
; }
|
||||
; }
|
||||
|
||||
%rtti.TypeDescriptor2 = type { i8**, i8*, [3 x i8] }
|
||||
|
||||
$"??_R0H@8" = comdat any
|
||||
|
||||
@"??_7type_info@@6B@" = external constant i8*
|
||||
@"??_R0H@8" = linkonce_odr global %rtti.TypeDescriptor2 { i8** @"??_7type_info@@6B@", i8* null, [3 x i8] c".H\00" }, comdat
|
||||
|
||||
declare dso_local i32 @__CxxFrameHandler3(...)
|
||||
declare dso_local void @"?bar@@YAXHHHHH@Z"(i32, i32, i32, i32, i32)
|
||||
declare dso_local void @"?baz@@YAXH@Z"(i32)
|
||||
|
||||
define dso_local void @"?foo@@YAXHHHHH@Z"(i32 %a, i32 %b, i32 %c, i32 %d, i32 %e) personality i8* bitcast (i32 (...)* @__CxxFrameHandler3 to i8*) {
|
||||
entry:
|
||||
%e.addr = alloca i32, align 4
|
||||
%d.addr = alloca i32, align 4
|
||||
%c.addr = alloca i32, align 4
|
||||
%b.addr = alloca i32, align 4
|
||||
%a.addr = alloca i32, align 4
|
||||
store i32 %e, i32* %e.addr, align 4
|
||||
store i32 %d, i32* %d.addr, align 4
|
||||
store i32 %c, i32* %c.addr, align 4
|
||||
store i32 %b, i32* %b.addr, align 4
|
||||
store i32 %a, i32* %a.addr, align 4
|
||||
call void asm sideeffect "nop", "~{bx},~{cx},~{xmm5},~{xmm6},~{ymm7}"()
|
||||
%0 = load i32, i32* %e.addr, align 4
|
||||
%1 = load i32, i32* %d.addr, align 4
|
||||
%2 = load i32, i32* %c.addr, align 4
|
||||
%3 = load i32, i32* %b.addr, align 4
|
||||
%4 = load i32, i32* %a.addr, align 4
|
||||
invoke void @"?bar@@YAXHHHHH@Z"(i32 %4, i32 %3, i32 %2, i32 %1, i32 %0)
|
||||
to label %invoke.cont unwind label %catch.dispatch
|
||||
|
||||
catch.dispatch: ; preds = %entry
|
||||
%5 = catchswitch within none [label %catch] unwind to caller
|
||||
|
||||
catch: ; preds = %catch.dispatch
|
||||
%6 = catchpad within %5 [i8* null, i32 64, i8* null]
|
||||
%7 = load i32, i32* %a.addr, align 4
|
||||
call void @"?baz@@YAXH@Z"(i32 %7) [ "funclet"(token %6) ]
|
||||
%8 = load i32, i32* %a.addr, align 4
|
||||
%tobool = icmp ne i32 %8, 0
|
||||
br i1 %tobool, label %if.then, label %if.end
|
||||
|
||||
if.then: ; preds = %catch
|
||||
call void asm sideeffect "nop", "~{xmm8}"() [ "funclet"(token %6) ]
|
||||
br label %if.end
|
||||
|
||||
invoke.cont: ; preds = %entry
|
||||
br label %try.cont
|
||||
|
||||
if.end: ; preds = %if.then, %catch
|
||||
catchret from %6 to label %catchret.dest
|
||||
|
||||
catchret.dest: ; preds = %if.end
|
||||
br label %try.cont
|
||||
|
||||
try.cont: ; preds = %catchret.dest, %invoke.cont
|
||||
ret void
|
||||
}
|
||||
|
||||
; CHECK: # %catch
|
||||
; CHECK: movq %rdx, 16(%rsp)
|
||||
; CHECK: pushq %rbp
|
||||
; CHECK: .seh_pushreg 5
|
||||
; CHECK: pushq %rbx
|
||||
; CHECK: .seh_pushreg 3
|
||||
; CHECK: subq $88, %rsp
|
||||
; CHECK: .seh_stackalloc 88
|
||||
; CHECK: leaq 112(%rdx), %rbp
|
||||
; CHECK: vmovaps %xmm8, 48(%rsp)
|
||||
; CHECK: .seh_savexmm 8, 48
|
||||
; CHECK: vmovaps %xmm7, 64(%rsp)
|
||||
; CHECK: .seh_savexmm 7, 64
|
||||
; CHECK: vmovaps %xmm6, 80(%rsp)
|
||||
; CHECK: .seh_savexmm 6, 80
|
||||
; CHECK: .seh_endprologue
|
||||
; CHECK: movl -{{[0-9]+}}(%rbp), %ecx
|
||||
; CHECK: vmovaps 80(%rsp), %xmm6
|
||||
; CHECK: vmovaps 64(%rsp), %xmm7
|
||||
; CHECK: vmovaps 48(%rsp), %xmm8
|
||||
; CHECK: leaq .LBB0_1(%rip), %rax
|
||||
; CHECK: addq $88, %rsp
|
||||
; CHECK: popq %rbx
|
||||
; CHECK: popq %rbp
|
||||
; CHECK: retq # CATCHRET
|
||||
|
||||
; CHECK-LABEL: "$handlerMap$0$?foo@@YAXHHHHH@Z":
|
||||
; CHECK-NEXT: .long 64 # Adjectives
|
||||
; CHECK-NEXT: .long 0 # Type
|
||||
; CHECK-NEXT: .long 0 # CatchObjOffset
|
||||
; CHECK-NEXT: .long "?catch$2@?0??foo@@YAXHHHHH@Z@4HA"@IMGREL # Handler
|
||||
; Sum of:
|
||||
; 16 RDX store offset
|
||||
; 16 two pushes
|
||||
; 72 stack alloc
|
||||
; CHECK-NEXT: .long 120 # ParentFrameOffset
|
||||
|
|
@ -294,7 +294,7 @@ define x86_intrcc void @foo(i8* %frame) {
|
|||
; CHECK64-SKX-NEXT: kmovq %k0, {{[0-9]+}}(%rsp) ## 8-byte Spill
|
||||
; CHECK64-SKX-NEXT: ## encoding: [0xc4,0xe1,0xf8,0x91,0x84,0x24,0x30,0x08,0x00,0x00]
|
||||
; CHECK64-SKX-NEXT: vmovups %zmm31, {{[0-9]+}}(%rsp) ## 64-byte Spill
|
||||
; CHECK64-SKX-NEXT: ## encoding: [0x62,0x61,0x7c,0x48,0x11,0xbc,0x24,0xe0,0x07,0x00,0x00]
|
||||
; CHECK64-SKX-NEXT: ## encoding: [0x62,0x61,0x7c,0x48,0x11,0x7c,0x24,0x1f]
|
||||
; CHECK64-SKX-NEXT: vmovups %zmm30, {{[0-9]+}}(%rsp) ## 64-byte Spill
|
||||
; CHECK64-SKX-NEXT: ## encoding: [0x62,0x61,0x7c,0x48,0x11,0x74,0x24,0x1e]
|
||||
; CHECK64-SKX-NEXT: vmovups %zmm29, {{[0-9]+}}(%rsp) ## 64-byte Spill
|
||||
|
@ -398,7 +398,7 @@ define x86_intrcc void @foo(i8* %frame) {
|
|||
; CHECK64-SKX-NEXT: .cfi_offset %xmm28, -448
|
||||
; CHECK64-SKX-NEXT: .cfi_offset %xmm29, -384
|
||||
; CHECK64-SKX-NEXT: .cfi_offset %xmm30, -320
|
||||
; CHECK64-SKX-NEXT: .cfi_offset %xmm31, -224
|
||||
; CHECK64-SKX-NEXT: .cfi_offset %xmm31, -256
|
||||
; CHECK64-SKX-NEXT: .cfi_offset %k0, -144
|
||||
; CHECK64-SKX-NEXT: .cfi_offset %k1, -136
|
||||
; CHECK64-SKX-NEXT: .cfi_offset %k2, -128
|
||||
|
@ -474,7 +474,7 @@ define x86_intrcc void @foo(i8* %frame) {
|
|||
; CHECK64-SKX-NEXT: vmovups {{[0-9]+}}(%rsp), %zmm30 ## 64-byte Reload
|
||||
; CHECK64-SKX-NEXT: ## encoding: [0x62,0x61,0x7c,0x48,0x10,0x74,0x24,0x1e]
|
||||
; CHECK64-SKX-NEXT: vmovups {{[0-9]+}}(%rsp), %zmm31 ## 64-byte Reload
|
||||
; CHECK64-SKX-NEXT: ## encoding: [0x62,0x61,0x7c,0x48,0x10,0xbc,0x24,0xe0,0x07,0x00,0x00]
|
||||
; CHECK64-SKX-NEXT: ## encoding: [0x62,0x61,0x7c,0x48,0x10,0x7c,0x24,0x1f]
|
||||
; CHECK64-SKX-NEXT: kmovq {{[0-9]+}}(%rsp), %k0 ## 8-byte Reload
|
||||
; CHECK64-SKX-NEXT: ## encoding: [0xc4,0xe1,0xf8,0x90,0x84,0x24,0x30,0x08,0x00,0x00]
|
||||
; CHECK64-SKX-NEXT: kmovq {{[0-9]+}}(%rsp), %k1 ## 8-byte Reload
|
||||
|
@ -635,7 +635,7 @@ define x86_intrcc void @foo(i8* %frame) {
|
|||
; CHECK32-SKX-NEXT: kmovq %k0, {{[0-9]+}}(%esp) ## 8-byte Spill
|
||||
; CHECK32-SKX-NEXT: ## encoding: [0xc4,0xe1,0xf8,0x91,0x84,0x24,0x30,0x02,0x00,0x00]
|
||||
; CHECK32-SKX-NEXT: vmovups %zmm7, {{[0-9]+}}(%esp) ## 64-byte Spill
|
||||
; CHECK32-SKX-NEXT: ## encoding: [0x62,0xf1,0x7c,0x48,0x11,0xbc,0x24,0xe0,0x01,0x00,0x00]
|
||||
; CHECK32-SKX-NEXT: ## encoding: [0x62,0xf1,0x7c,0x48,0x11,0x7c,0x24,0x07]
|
||||
; CHECK32-SKX-NEXT: vmovups %zmm6, {{[0-9]+}}(%esp) ## 64-byte Spill
|
||||
; CHECK32-SKX-NEXT: ## encoding: [0x62,0xf1,0x7c,0x48,0x11,0x74,0x24,0x06]
|
||||
; CHECK32-SKX-NEXT: vmovups %zmm5, {{[0-9]+}}(%esp) ## 64-byte Spill
|
||||
|
@ -661,7 +661,7 @@ define x86_intrcc void @foo(i8* %frame) {
|
|||
; CHECK32-SKX-NEXT: .cfi_offset %xmm4, -384
|
||||
; CHECK32-SKX-NEXT: .cfi_offset %xmm5, -320
|
||||
; CHECK32-SKX-NEXT: .cfi_offset %xmm6, -256
|
||||
; CHECK32-SKX-NEXT: .cfi_offset %xmm7, -160
|
||||
; CHECK32-SKX-NEXT: .cfi_offset %xmm7, -192
|
||||
; CHECK32-SKX-NEXT: .cfi_offset %k0, -80
|
||||
; CHECK32-SKX-NEXT: .cfi_offset %k1, -72
|
||||
; CHECK32-SKX-NEXT: .cfi_offset %k2, -64
|
||||
|
@ -689,7 +689,7 @@ define x86_intrcc void @foo(i8* %frame) {
|
|||
; CHECK32-SKX-NEXT: vmovups {{[0-9]+}}(%esp), %zmm6 ## 64-byte Reload
|
||||
; CHECK32-SKX-NEXT: ## encoding: [0x62,0xf1,0x7c,0x48,0x10,0x74,0x24,0x06]
|
||||
; CHECK32-SKX-NEXT: vmovups {{[0-9]+}}(%esp), %zmm7 ## 64-byte Reload
|
||||
; CHECK32-SKX-NEXT: ## encoding: [0x62,0xf1,0x7c,0x48,0x10,0xbc,0x24,0xe0,0x01,0x00,0x00]
|
||||
; CHECK32-SKX-NEXT: ## encoding: [0x62,0xf1,0x7c,0x48,0x10,0x7c,0x24,0x07]
|
||||
; CHECK32-SKX-NEXT: kmovq {{[0-9]+}}(%esp), %k0 ## 8-byte Reload
|
||||
; CHECK32-SKX-NEXT: ## encoding: [0xc4,0xe1,0xf8,0x90,0x84,0x24,0x30,0x02,0x00,0x00]
|
||||
; CHECK32-SKX-NEXT: kmovq {{[0-9]+}}(%esp), %k1 ## 8-byte Reload
|
||||
|
|
Loading…
Reference in New Issue