[anyregcc] Fix callee-save mask for anyregcc

Use separate callee-save masks for XMM and YMM registers for anyregcc on X86 and
select the proper mask depending on the target cpu we compile for.

llvm-svn: 198985
This commit is contained in:
Juergen Ributzka 2014-01-11 01:00:27 +00:00
parent fd6e4b91b7
commit 976d94b834
4 changed files with 141 additions and 25 deletions

View File

@ -729,6 +729,8 @@ function. The operand fields are:
* ``ccc``: code 0
* ``fastcc``: code 8
* ``coldcc``: code 9
* ``webkit_jscc``: code 12
* ``anyregcc``: code 13
* ``x86_stdcallcc``: code 64
* ``x86_fastcallcc``: code 65
* ``arm_apcscc``: code 66

View File

@ -620,14 +620,15 @@ def CSR_64EHRet : CalleeSavedRegs<(add RAX, RDX, CSR_64)>;
def CSR_Win64 : CalleeSavedRegs<(add RBX, RBP, RDI, RSI, R12, R13, R14, R15,
(sequence "XMM%u", 6, 15))>;
def CSR_MostRegs_64 : CalleeSavedRegs<(add RBX, RCX, RDX, RSI, RDI, R8, R9, R10,
def CSR_64_MostRegs : CalleeSavedRegs<(add RBX, RCX, RDX, RSI, RDI, R8, R9, R10,
R11, R12, R13, R14, R15, RBP,
(sequence "XMM%u", 0, 15))>;
def CSR_AllRegs_64 : CalleeSavedRegs<(add CSR_MostRegs_64, RAX, RSP,
(sequence "XMM%u", 16, 31),
(sequence "YMM%u", 0, 31),
(sequence "ZMM%u", 0, 31))>;
def CSR_64_AllRegs : CalleeSavedRegs<(add CSR_64_MostRegs, RAX, RSP,
(sequence "XMM%u", 16, 31))>;
def CSR_64_AllRegs_AVX : CalleeSavedRegs<(sub (add CSR_64_MostRegs, RAX, RSP,
(sequence "YMM%u", 0, 31)),
(sequence "XMM%u", 0, 15))>;
// Standard C + YMM6-15
def CSR_Win64_Intel_OCL_BI_AVX : CalleeSavedRegs<(add RBX, RBP, RDI, RSI, R12,

View File

@ -234,17 +234,18 @@ X86RegisterInfo::getRegPressureLimit(const TargetRegisterClass *RC,
const uint16_t *
X86RegisterInfo::getCalleeSavedRegs(const MachineFunction *MF) const {
bool HasAVX = TM.getSubtarget<X86Subtarget>().hasAVX();
bool HasAVX512 = TM.getSubtarget<X86Subtarget>().hasAVX512();
switch (MF->getFunction()->getCallingConv()) {
case CallingConv::GHC:
case CallingConv::HiPE:
return CSR_NoRegs_SaveList;
case CallingConv::AnyReg:
return CSR_AllRegs_64_SaveList;
if (HasAVX)
return CSR_64_AllRegs_AVX_SaveList;
return CSR_64_AllRegs_SaveList;
case CallingConv::Intel_OCL_BI: {
bool HasAVX = TM.getSubtarget<X86Subtarget>().hasAVX();
bool HasAVX512 = TM.getSubtarget<X86Subtarget>().hasAVX512();
if (HasAVX512 && IsWin64)
return CSR_Win64_Intel_OCL_BI_AVX512_SaveList;
if (HasAVX512 && Is64Bit)
@ -257,12 +258,10 @@ X86RegisterInfo::getCalleeSavedRegs(const MachineFunction *MF) const {
return CSR_64_Intel_OCL_BI_SaveList;
break;
}
case CallingConv::Cold:
if (Is64Bit)
return CSR_MostRegs_64_SaveList;
return CSR_64_MostRegs_SaveList;
break;
default:
break;
}
@ -285,7 +284,15 @@ X86RegisterInfo::getCallPreservedMask(CallingConv::ID CC) const {
bool HasAVX = TM.getSubtarget<X86Subtarget>().hasAVX();
bool HasAVX512 = TM.getSubtarget<X86Subtarget>().hasAVX512();
if (CC == CallingConv::Intel_OCL_BI) {
switch (CC) {
case CallingConv::GHC:
case CallingConv::HiPE:
return CSR_NoRegs_RegMask;
case CallingConv::AnyReg:
if (HasAVX)
return CSR_64_AllRegs_AVX_RegMask;
return CSR_64_AllRegs_RegMask;
case CallingConv::Intel_OCL_BI: {
if (IsWin64 && HasAVX512)
return CSR_Win64_Intel_OCL_BI_AVX512_RegMask;
if (Is64Bit && HasAVX512)
@ -297,17 +304,20 @@ X86RegisterInfo::getCallPreservedMask(CallingConv::ID CC) const {
if (!HasAVX && !IsWin64 && Is64Bit)
return CSR_64_Intel_OCL_BI_RegMask;
}
if (CC == CallingConv::GHC || CC == CallingConv::HiPE)
return CSR_NoRegs_RegMask;
if (CC == CallingConv::AnyReg)
return CSR_AllRegs_64_RegMask;
if (!Is64Bit)
return CSR_32_RegMask;
if (CC == CallingConv::Cold)
return CSR_MostRegs_64_RegMask;
if (IsWin64)
return CSR_Win64_RegMask;
return CSR_64_RegMask;
case CallingConv::Cold:
if (Is64Bit)
return CSR_64_MostRegs_RegMask;
break;
default:
break;
}
if (Is64Bit) {
if (IsWin64)
return CSR_Win64_RegMask;
return CSR_64_RegMask;
}
return CSR_32_RegMask;
}
const uint32_t*

View File

@ -1,4 +1,7 @@
; RUN: llc < %s -mtriple=x86_64-apple-darwin -disable-fp-elim | FileCheck %s
; RUN: llc < %s -march=x86-64 -mcpu=corei7 | FileCheck --check-prefix=SSE %s
; RUN: llc < %s -march=x86-64 -mcpu=corei7-avx | FileCheck --check-prefix=AVX %s
; Stackmap Header: no constants - 6 callsites
; CHECK-LABEL: .section __LLVM_STACKMAPS,__llvm_stackmaps
@ -336,5 +339,105 @@ entry:
ret i64 %result
}
; Make sure all regs are spilled
define anyregcc void @anyregcc1() {
entry:
;SSE-LABEL: anyregcc1
;SSE: pushq %rax
;SSE: pushq %rbp
;SSE: pushq %r15
;SSE: pushq %r14
;SSE: pushq %r13
;SSE: pushq %r12
;SSE: pushq %r11
;SSE: pushq %r10
;SSE: pushq %r9
;SSE: pushq %r8
;SSE: pushq %rdi
;SSE: pushq %rsi
;SSE: pushq %rdx
;SSE: pushq %rcx
;SSE: pushq %rbx
;SSE: movaps %xmm15
;SSE-NEXT: movaps %xmm14
;SSE-NEXT: movaps %xmm13
;SSE-NEXT: movaps %xmm12
;SSE-NEXT: movaps %xmm11
;SSE-NEXT: movaps %xmm10
;SSE-NEXT: movaps %xmm9
;SSE-NEXT: movaps %xmm8
;SSE-NEXT: movaps %xmm7
;SSE-NEXT: movaps %xmm6
;SSE-NEXT: movaps %xmm5
;SSE-NEXT: movaps %xmm4
;SSE-NEXT: movaps %xmm3
;SSE-NEXT: movaps %xmm2
;SSE-NEXT: movaps %xmm1
;SSE-NEXT: movaps %xmm0
;AVX-LABEL:anyregcc1
;AVX: pushq %rax
;AVX: pushq %rbp
;AVX: pushq %r15
;AVX: pushq %r14
;AVX: pushq %r13
;AVX: pushq %r12
;AVX: pushq %r11
;AVX: pushq %r10
;AVX: pushq %r9
;AVX: pushq %r8
;AVX: pushq %rdi
;AVX: pushq %rsi
;AVX: pushq %rdx
;AVX: pushq %rcx
;AVX: pushq %rbx
;AVX: vmovups %ymm15
;AVX-NEXT: vmovups %ymm14
;AVX-NEXT: vmovups %ymm13
;AVX-NEXT: vmovups %ymm12
;AVX-NEXT: vmovups %ymm11
;AVX-NEXT: vmovups %ymm10
;AVX-NEXT: vmovups %ymm9
;AVX-NEXT: vmovups %ymm8
;AVX-NEXT: vmovups %ymm7
;AVX-NEXT: vmovups %ymm6
;AVX-NEXT: vmovups %ymm5
;AVX-NEXT: vmovups %ymm4
;AVX-NEXT: vmovups %ymm3
;AVX-NEXT: vmovups %ymm2
;AVX-NEXT: vmovups %ymm1
;AVX-NEXT: vmovups %ymm0
call void asm sideeffect "", "~{rax},~{rbx},~{rcx},~{rdx},~{rsi},~{rdi},~{r8},~{r9},~{r10},~{r11},~{r12},~{r13},~{r14},~{r15},~{rbp},~{xmm0},~{xmm1},~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15}"()
ret void
}
; Make sure we don't spill any XMMs/YMMs
declare anyregcc void @foo()
define void @anyregcc2() {
entry:
;SSE-LABEL: anyregcc2
;SSE-NOT: movaps %xmm
;AVX-LABEL: anyregcc2
;AVX-NOT: vmovups %ymm
%a0 = call <2 x double> asm sideeffect "", "={xmm0}"() nounwind
%a1 = call <2 x double> asm sideeffect "", "={xmm1}"() nounwind
%a2 = call <2 x double> asm sideeffect "", "={xmm2}"() nounwind
%a3 = call <2 x double> asm sideeffect "", "={xmm3}"() nounwind
%a4 = call <2 x double> asm sideeffect "", "={xmm4}"() nounwind
%a5 = call <2 x double> asm sideeffect "", "={xmm5}"() nounwind
%a6 = call <2 x double> asm sideeffect "", "={xmm6}"() nounwind
%a7 = call <2 x double> asm sideeffect "", "={xmm7}"() nounwind
%a8 = call <2 x double> asm sideeffect "", "={xmm8}"() nounwind
%a9 = call <2 x double> asm sideeffect "", "={xmm9}"() nounwind
%a10 = call <2 x double> asm sideeffect "", "={xmm10}"() nounwind
%a11 = call <2 x double> asm sideeffect "", "={xmm11}"() nounwind
%a12 = call <2 x double> asm sideeffect "", "={xmm12}"() nounwind
%a13 = call <2 x double> asm sideeffect "", "={xmm13}"() nounwind
%a14 = call <2 x double> asm sideeffect "", "={xmm14}"() nounwind
%a15 = call <2 x double> asm sideeffect "", "={xmm15}"() nounwind
call anyregcc void @foo()
call void asm sideeffect "", "{xmm0},{xmm1},{xmm2},{xmm3},{xmm4},{xmm5},{xmm6},{xmm7},{xmm8},{xmm9},{xmm10},{xmm11},{xmm12},{xmm13},{xmm14},{xmm15}"(<2 x double> %a0, <2 x double> %a1, <2 x double> %a2, <2 x double> %a3, <2 x double> %a4, <2 x double> %a5, <2 x double> %a6, <2 x double> %a7, <2 x double> %a8, <2 x double> %a9, <2 x double> %a10, <2 x double> %a11, <2 x double> %a12, <2 x double> %a13, <2 x double> %a14, <2 x double> %a15)
ret void
}
declare void @llvm.experimental.patchpoint.void(i64, i32, i8*, i32, ...)
declare i64 @llvm.experimental.patchpoint.i64(i64, i32, i8*, i32, ...)