forked from OSchip/llvm-project
[PowerPC][P10] Add Vector pair calling convention
Add the calling convention for the vector pair registers. These registers overlap with the vector registers. Part of an original patch by: Lei Huang Reviewed By: nemanjai, #powerpc Differential Revision: https://reviews.llvm.org/D117225
This commit is contained in:
parent
5791e28f30
commit
78406ac898
|
@ -363,3 +363,25 @@ def CSR_64_AllRegs_VSX : CalleeSavedRegs<(add CSR_64_AllRegs_Altivec,
|
|||
|
||||
def CSR_64_AllRegs_AIX_Dflt_VSX : CalleeSavedRegs<(add CSR_64_AllRegs_Altivec,
|
||||
(sequence "VSL%u", 0, 19))>;
|
||||
|
||||
def CSR_ALL_VSRP : CalleeSavedRegs<(sequence "VSRp%u", 0, 31)>;
|
||||
|
||||
def CSR_VSRP :
|
||||
CalleeSavedRegs<(add VSRp26, VSRp27, VSRp28, VSRp29, VSRp30, VSRp31)>;
|
||||
|
||||
def CSR_SVR432_VSRP : CalleeSavedRegs<(add CSR_SVR432_Altivec, CSR_VSRP)>;
|
||||
|
||||
def CSR_SVR464_VSRP : CalleeSavedRegs<(add CSR_PPC64_Altivec, CSR_VSRP)>;
|
||||
|
||||
def CSR_SVR464_R2_VSRP : CalleeSavedRegs<(add CSR_SVR464_VSRP, X2)>;
|
||||
|
||||
def CSR_SVR32_ColdCC_VSRP : CalleeSavedRegs<(add CSR_SVR32_ColdCC_Altivec,
|
||||
(sub CSR_ALL_VSRP, VSRp17))>;
|
||||
|
||||
def CSR_SVR64_ColdCC_VSRP : CalleeSavedRegs<(add CSR_SVR64_ColdCC,
|
||||
(sub CSR_ALL_VSRP, VSRp17))>;
|
||||
|
||||
def CSR_SVR64_ColdCC_R2_VSRP : CalleeSavedRegs<(add CSR_SVR64_ColdCC_VSRP, X2)>;
|
||||
|
||||
def CSR_64_AllRegs_VSRP :
|
||||
CalleeSavedRegs<(add CSR_64_AllRegs_VSX, CSR_ALL_VSRP)>;
|
||||
|
|
|
@ -1974,6 +1974,15 @@ void PPCFrameLowering::determineCalleeSaves(MachineFunction &MF,
|
|||
|
||||
const PPCRegisterInfo *RegInfo = Subtarget.getRegisterInfo();
|
||||
|
||||
// Do not explicitly save the callee saved VSRp registers.
|
||||
// The individual VSR subregisters will be saved instead.
|
||||
SavedRegs.reset(PPC::VSRp26);
|
||||
SavedRegs.reset(PPC::VSRp27);
|
||||
SavedRegs.reset(PPC::VSRp28);
|
||||
SavedRegs.reset(PPC::VSRp29);
|
||||
SavedRegs.reset(PPC::VSRp30);
|
||||
SavedRegs.reset(PPC::VSRp31);
|
||||
|
||||
// Save and clear the LR state.
|
||||
PPCFunctionInfo *FI = MF.getInfo<PPCFunctionInfo>();
|
||||
unsigned LR = RegInfo->getRARegister();
|
||||
|
|
|
@ -183,6 +183,8 @@ PPCRegisterInfo::getCalleeSavedRegs(const MachineFunction *MF) const {
|
|||
if (!TM.isPPC64() && Subtarget.isAIXABI())
|
||||
report_fatal_error("AnyReg unimplemented on 32-bit AIX.");
|
||||
if (Subtarget.hasVSX()) {
|
||||
if (Subtarget.pairedVectorMemops())
|
||||
return CSR_64_AllRegs_VSRP_SaveList;
|
||||
if (Subtarget.isAIXABI() && !TM.getAIXExtendedAltivecABI())
|
||||
return CSR_64_AllRegs_AIX_Dflt_VSX_SaveList;
|
||||
return CSR_64_AllRegs_VSX_SaveList;
|
||||
|
@ -210,6 +212,9 @@ PPCRegisterInfo::getCalleeSavedRegs(const MachineFunction *MF) const {
|
|||
if (Subtarget.isAIXABI())
|
||||
report_fatal_error("Cold calling unimplemented on AIX.");
|
||||
if (TM.isPPC64()) {
|
||||
if (Subtarget.pairedVectorMemops())
|
||||
return SaveR2 ? CSR_SVR64_ColdCC_R2_VSRP_SaveList
|
||||
: CSR_SVR64_ColdCC_VSRP_SaveList;
|
||||
if (Subtarget.hasAltivec())
|
||||
return SaveR2 ? CSR_SVR64_ColdCC_R2_Altivec_SaveList
|
||||
: CSR_SVR64_ColdCC_Altivec_SaveList;
|
||||
|
@ -217,7 +222,9 @@ PPCRegisterInfo::getCalleeSavedRegs(const MachineFunction *MF) const {
|
|||
: CSR_SVR64_ColdCC_SaveList;
|
||||
}
|
||||
// 32-bit targets.
|
||||
if (Subtarget.hasAltivec())
|
||||
if (Subtarget.pairedVectorMemops())
|
||||
return CSR_SVR32_ColdCC_VSRP_SaveList;
|
||||
else if (Subtarget.hasAltivec())
|
||||
return CSR_SVR32_ColdCC_Altivec_SaveList;
|
||||
else if (Subtarget.hasSPE())
|
||||
return CSR_SVR32_ColdCC_SPE_SaveList;
|
||||
|
@ -225,6 +232,8 @@ PPCRegisterInfo::getCalleeSavedRegs(const MachineFunction *MF) const {
|
|||
}
|
||||
// Standard calling convention CSRs.
|
||||
if (TM.isPPC64()) {
|
||||
if (Subtarget.pairedVectorMemops())
|
||||
return SaveR2 ? CSR_SVR464_R2_VSRP_SaveList : CSR_SVR464_VSRP_SaveList;
|
||||
if (Subtarget.hasAltivec() &&
|
||||
(!Subtarget.isAIXABI() || TM.getAIXExtendedAltivecABI())) {
|
||||
return SaveR2 ? CSR_PPC64_R2_Altivec_SaveList
|
||||
|
@ -239,6 +248,8 @@ PPCRegisterInfo::getCalleeSavedRegs(const MachineFunction *MF) const {
|
|||
: CSR_AIX32_SaveList;
|
||||
return CSR_AIX32_SaveList;
|
||||
}
|
||||
if (Subtarget.pairedVectorMemops())
|
||||
return CSR_SVR432_VSRP_SaveList;
|
||||
if (Subtarget.hasAltivec())
|
||||
return CSR_SVR432_Altivec_SaveList;
|
||||
else if (Subtarget.hasSPE())
|
||||
|
@ -252,6 +263,8 @@ PPCRegisterInfo::getCallPreservedMask(const MachineFunction &MF,
|
|||
const PPCSubtarget &Subtarget = MF.getSubtarget<PPCSubtarget>();
|
||||
if (CC == CallingConv::AnyReg) {
|
||||
if (Subtarget.hasVSX()) {
|
||||
if (Subtarget.pairedVectorMemops())
|
||||
return CSR_64_AllRegs_VSRP_RegMask;
|
||||
if (Subtarget.isAIXABI() && !TM.getAIXExtendedAltivecABI())
|
||||
return CSR_64_AllRegs_AIX_Dflt_VSX_RegMask;
|
||||
return CSR_64_AllRegs_VSX_RegMask;
|
||||
|
@ -275,20 +288,32 @@ PPCRegisterInfo::getCallPreservedMask(const MachineFunction &MF,
|
|||
}
|
||||
|
||||
if (CC == CallingConv::Cold) {
|
||||
return TM.isPPC64() ? (Subtarget.hasAltivec() ? CSR_SVR64_ColdCC_Altivec_RegMask
|
||||
: CSR_SVR64_ColdCC_RegMask)
|
||||
: (Subtarget.hasAltivec() ? CSR_SVR32_ColdCC_Altivec_RegMask
|
||||
: (Subtarget.hasSPE()
|
||||
? CSR_SVR32_ColdCC_SPE_RegMask
|
||||
: CSR_SVR32_ColdCC_RegMask));
|
||||
if (TM.isPPC64())
|
||||
return Subtarget.pairedVectorMemops()
|
||||
? CSR_SVR64_ColdCC_VSRP_RegMask
|
||||
: (Subtarget.hasAltivec() ? CSR_SVR64_ColdCC_Altivec_RegMask
|
||||
: CSR_SVR64_ColdCC_RegMask);
|
||||
else
|
||||
return Subtarget.pairedVectorMemops()
|
||||
? CSR_SVR32_ColdCC_VSRP_RegMask
|
||||
: (Subtarget.hasAltivec()
|
||||
? CSR_SVR32_ColdCC_Altivec_RegMask
|
||||
: (Subtarget.hasSPE() ? CSR_SVR32_ColdCC_SPE_RegMask
|
||||
: CSR_SVR32_ColdCC_RegMask));
|
||||
}
|
||||
|
||||
return TM.isPPC64() ? (Subtarget.hasAltivec() ? CSR_PPC64_Altivec_RegMask
|
||||
: CSR_PPC64_RegMask)
|
||||
: (Subtarget.hasAltivec()
|
||||
? CSR_SVR432_Altivec_RegMask
|
||||
: (Subtarget.hasSPE() ? CSR_SVR432_SPE_RegMask
|
||||
: CSR_SVR432_RegMask));
|
||||
if (TM.isPPC64())
|
||||
return Subtarget.pairedVectorMemops()
|
||||
? CSR_SVR464_VSRP_RegMask
|
||||
: (Subtarget.hasAltivec() ? CSR_PPC64_Altivec_RegMask
|
||||
: CSR_PPC64_RegMask);
|
||||
else
|
||||
return Subtarget.pairedVectorMemops()
|
||||
? CSR_SVR432_VSRP_RegMask
|
||||
: (Subtarget.hasAltivec()
|
||||
? CSR_SVR432_Altivec_RegMask
|
||||
: (Subtarget.hasSPE() ? CSR_SVR432_SPE_RegMask
|
||||
: CSR_SVR432_RegMask));
|
||||
}
|
||||
|
||||
const uint32_t*
|
||||
|
|
|
@ -13,23 +13,29 @@ define void @intrinsics1(<16 x i8> %vc1, <16 x i8> %vc2, <16 x i8> %vc3, <16 x i
|
|||
; CHECK-LABEL: intrinsics1:
|
||||
; CHECK: # %bb.0:
|
||||
; CHECK-NEXT: mflr r0
|
||||
; CHECK-NEXT: std r0, 16(r1)
|
||||
; CHECK-NEXT: stdu r1, -176(r1)
|
||||
; CHECK-NEXT: .cfi_def_cfa_offset 176
|
||||
; CHECK-NEXT: .cfi_offset lr, 16
|
||||
; CHECK-NEXT: .cfi_offset r30, -16
|
||||
; CHECK-NEXT: std r30, -16(r1) # 8-byte Folded Spill
|
||||
; CHECK-NEXT: std r0, 16(r1)
|
||||
; CHECK-NEXT: stdu r1, -176(r1)
|
||||
; CHECK-NEXT: # kill: def $v5 killed $v5 killed $vsrp18 def $vsrp18
|
||||
; CHECK-NEXT: # kill: def $v4 killed $v4 killed $vsrp18 def $vsrp18
|
||||
; CHECK-NEXT: # kill: def $v3 killed $v3 killed $vsrp17 def $vsrp17
|
||||
; CHECK-NEXT: # kill: def $v2 killed $v2 killed $vsrp17 def $vsrp17
|
||||
; CHECK-NEXT: xxlor vs0, v2, v2
|
||||
; CHECK-NEXT: xxlor vs1, v3, v3
|
||||
; CHECK-NEXT: stxvp vsp34, 128(r1) # 32-byte Folded Spill
|
||||
; CHECK-NEXT: xxlor vs2, v4, v4
|
||||
; CHECK-NEXT: xxlor vs3, v5, v5
|
||||
; CHECK-NEXT: .cfi_offset v28, -80
|
||||
; CHECK-NEXT: .cfi_offset v29, -64
|
||||
; CHECK-NEXT: .cfi_offset v30, -48
|
||||
; CHECK-NEXT: .cfi_offset v31, -32
|
||||
; CHECK-NEXT: stxv v28, 96(r1) # 16-byte Folded Spill
|
||||
; CHECK-NEXT: stxv v29, 112(r1) # 16-byte Folded Spill
|
||||
; CHECK-NEXT: vmr v29, v3
|
||||
; CHECK-NEXT: vmr v28, v2
|
||||
; CHECK-NEXT: xxlor vs0, v28, v28
|
||||
; CHECK-NEXT: stxv v30, 128(r1) # 16-byte Folded Spill
|
||||
; CHECK-NEXT: stxv v31, 144(r1) # 16-byte Folded Spill
|
||||
; CHECK-NEXT: vmr v31, v5
|
||||
; CHECK-NEXT: vmr v30, v4
|
||||
; CHECK-NEXT: std r30, 160(r1) # 8-byte Folded Spill
|
||||
; CHECK-NEXT: xxlor vs1, v29, v29
|
||||
; CHECK-NEXT: xxlor vs2, v30, v30
|
||||
; CHECK-NEXT: xxlor vs3, v31, v31
|
||||
; CHECK-NEXT: ld r30, 272(r1)
|
||||
; CHECK-NEXT: stxvp vsp36, 96(r1) # 32-byte Folded Spill
|
||||
; CHECK-NEXT: xxmtacc acc0
|
||||
; CHECK-NEXT: xvf16ger2pp acc0, v2, v4
|
||||
; CHECK-NEXT: xxmfacc acc0
|
||||
|
@ -39,17 +45,19 @@ define void @intrinsics1(<16 x i8> %vc1, <16 x i8> %vc2, <16 x i8> %vc3, <16 x i
|
|||
; CHECK-NEXT: lxvp vsp0, 64(r1)
|
||||
; CHECK-NEXT: lxvp vsp2, 32(r1)
|
||||
; CHECK-NEXT: xxmtacc acc0
|
||||
; CHECK-NEXT: lxvp vsp34, 128(r1) # 32-byte Folded Reload
|
||||
; CHECK-NEXT: lxvp vsp36, 96(r1) # 32-byte Folded Reload
|
||||
; CHECK-NEXT: xvf16ger2pp acc0, v2, v4
|
||||
; CHECK-NEXT: xvf16ger2pp acc0, v28, v30
|
||||
; CHECK-NEXT: lxv v31, 144(r1) # 16-byte Folded Reload
|
||||
; CHECK-NEXT: lxv v30, 128(r1) # 16-byte Folded Reload
|
||||
; CHECK-NEXT: lxv v29, 112(r1) # 16-byte Folded Reload
|
||||
; CHECK-NEXT: lxv v28, 96(r1) # 16-byte Folded Reload
|
||||
; CHECK-NEXT: xxmfacc acc0
|
||||
; CHECK-NEXT: stxv vs0, 48(r30)
|
||||
; CHECK-NEXT: stxv vs1, 32(r30)
|
||||
; CHECK-NEXT: stxv vs2, 16(r30)
|
||||
; CHECK-NEXT: stxv vs3, 0(r30)
|
||||
; CHECK-NEXT: ld r30, 160(r1) # 8-byte Folded Reload
|
||||
; CHECK-NEXT: addi r1, r1, 176
|
||||
; CHECK-NEXT: ld r0, 16(r1)
|
||||
; CHECK-NEXT: ld r30, -16(r1) # 8-byte Folded Reload
|
||||
; CHECK-NEXT: mtlr r0
|
||||
; CHECK-NEXT: blr
|
||||
;
|
||||
|
@ -61,17 +69,23 @@ define void @intrinsics1(<16 x i8> %vc1, <16 x i8> %vc2, <16 x i8> %vc3, <16 x i
|
|||
; CHECK-BE-NEXT: .cfi_def_cfa_offset 256
|
||||
; CHECK-BE-NEXT: .cfi_offset lr, 16
|
||||
; CHECK-BE-NEXT: .cfi_offset r30, -16
|
||||
; CHECK-BE-NEXT: .cfi_offset v28, -80
|
||||
; CHECK-BE-NEXT: .cfi_offset v29, -64
|
||||
; CHECK-BE-NEXT: .cfi_offset v30, -48
|
||||
; CHECK-BE-NEXT: .cfi_offset v31, -32
|
||||
; CHECK-BE-NEXT: stxv v28, 176(r1) # 16-byte Folded Spill
|
||||
; CHECK-BE-NEXT: stxv v29, 192(r1) # 16-byte Folded Spill
|
||||
; CHECK-BE-NEXT: vmr v29, v3
|
||||
; CHECK-BE-NEXT: vmr v28, v2
|
||||
; CHECK-BE-NEXT: xxlor vs0, v28, v28
|
||||
; CHECK-BE-NEXT: stxv v30, 208(r1) # 16-byte Folded Spill
|
||||
; CHECK-BE-NEXT: stxv v31, 224(r1) # 16-byte Folded Spill
|
||||
; CHECK-BE-NEXT: vmr v31, v5
|
||||
; CHECK-BE-NEXT: vmr v30, v4
|
||||
; CHECK-BE-NEXT: std r30, 240(r1) # 8-byte Folded Spill
|
||||
; CHECK-BE-NEXT: # kill: def $v5 killed $v5 killed $vsrp18 def $vsrp18
|
||||
; CHECK-BE-NEXT: # kill: def $v4 killed $v4 killed $vsrp18 def $vsrp18
|
||||
; CHECK-BE-NEXT: # kill: def $v3 killed $v3 killed $vsrp17 def $vsrp17
|
||||
; CHECK-BE-NEXT: # kill: def $v2 killed $v2 killed $vsrp17 def $vsrp17
|
||||
; CHECK-BE-NEXT: xxlor vs0, v2, v2
|
||||
; CHECK-BE-NEXT: xxlor vs1, v3, v3
|
||||
; CHECK-BE-NEXT: stxvp vsp34, 208(r1) # 32-byte Folded Spill
|
||||
; CHECK-BE-NEXT: xxlor vs2, v4, v4
|
||||
; CHECK-BE-NEXT: xxlor vs3, v5, v5
|
||||
; CHECK-BE-NEXT: stxvp vsp36, 176(r1) # 32-byte Folded Spill
|
||||
; CHECK-BE-NEXT: xxlor vs1, v29, v29
|
||||
; CHECK-BE-NEXT: xxlor vs2, v30, v30
|
||||
; CHECK-BE-NEXT: xxlor vs3, v31, v31
|
||||
; CHECK-BE-NEXT: ld r30, 368(r1)
|
||||
; CHECK-BE-NEXT: xxmtacc acc0
|
||||
; CHECK-BE-NEXT: xvf16ger2pp acc0, v2, v4
|
||||
|
@ -83,9 +97,11 @@ define void @intrinsics1(<16 x i8> %vc1, <16 x i8> %vc2, <16 x i8> %vc3, <16 x i
|
|||
; CHECK-BE-NEXT: lxvp vsp0, 112(r1)
|
||||
; CHECK-BE-NEXT: lxvp vsp2, 144(r1)
|
||||
; CHECK-BE-NEXT: xxmtacc acc0
|
||||
; CHECK-BE-NEXT: lxvp vsp34, 208(r1) # 32-byte Folded Reload
|
||||
; CHECK-BE-NEXT: lxvp vsp36, 176(r1) # 32-byte Folded Reload
|
||||
; CHECK-BE-NEXT: xvf16ger2pp acc0, v2, v4
|
||||
; CHECK-BE-NEXT: xvf16ger2pp acc0, v28, v30
|
||||
; CHECK-BE-NEXT: lxv v31, 224(r1) # 16-byte Folded Reload
|
||||
; CHECK-BE-NEXT: lxv v30, 208(r1) # 16-byte Folded Reload
|
||||
; CHECK-BE-NEXT: lxv v29, 192(r1) # 16-byte Folded Reload
|
||||
; CHECK-BE-NEXT: lxv v28, 176(r1) # 16-byte Folded Reload
|
||||
; CHECK-BE-NEXT: xxmfacc acc0
|
||||
; CHECK-BE-NEXT: stxv vs1, 16(r30)
|
||||
; CHECK-BE-NEXT: stxv vs0, 0(r30)
|
||||
|
|
Loading…
Reference in New Issue