[PowerPC][P10] Add Vector pair calling convention

Add the calling convention for the vector pair registers.
These registers overlap with the vector registers.

Part of an original patch by: Lei Huang

Reviewed By: nemanjai, #powerpc

Differential Revision: https://reviews.llvm.org/D117225
This commit is contained in:
Stefan Pintilie 2022-03-11 10:06:17 -06:00
parent 5791e28f30
commit 78406ac898
4 changed files with 115 additions and 43 deletions

View File

@ -363,3 +363,25 @@ def CSR_64_AllRegs_VSX : CalleeSavedRegs<(add CSR_64_AllRegs_Altivec,
def CSR_64_AllRegs_AIX_Dflt_VSX : CalleeSavedRegs<(add CSR_64_AllRegs_Altivec,
(sequence "VSL%u", 0, 19))>;
def CSR_ALL_VSRP : CalleeSavedRegs<(sequence "VSRp%u", 0, 31)>;
def CSR_VSRP :
CalleeSavedRegs<(add VSRp26, VSRp27, VSRp28, VSRp29, VSRp30, VSRp31)>;
def CSR_SVR432_VSRP : CalleeSavedRegs<(add CSR_SVR432_Altivec, CSR_VSRP)>;
def CSR_SVR464_VSRP : CalleeSavedRegs<(add CSR_PPC64_Altivec, CSR_VSRP)>;
def CSR_SVR464_R2_VSRP : CalleeSavedRegs<(add CSR_SVR464_VSRP, X2)>;
def CSR_SVR32_ColdCC_VSRP : CalleeSavedRegs<(add CSR_SVR32_ColdCC_Altivec,
(sub CSR_ALL_VSRP, VSRp17))>;
def CSR_SVR64_ColdCC_VSRP : CalleeSavedRegs<(add CSR_SVR64_ColdCC,
(sub CSR_ALL_VSRP, VSRp17))>;
def CSR_SVR64_ColdCC_R2_VSRP : CalleeSavedRegs<(add CSR_SVR64_ColdCC_VSRP, X2)>;
def CSR_64_AllRegs_VSRP :
CalleeSavedRegs<(add CSR_64_AllRegs_VSX, CSR_ALL_VSRP)>;

View File

@ -1974,6 +1974,15 @@ void PPCFrameLowering::determineCalleeSaves(MachineFunction &MF,
const PPCRegisterInfo *RegInfo = Subtarget.getRegisterInfo();
// Do not explicitly save the callee saved VSRp registers.
// The individual VSR subregisters will be saved instead.
SavedRegs.reset(PPC::VSRp26);
SavedRegs.reset(PPC::VSRp27);
SavedRegs.reset(PPC::VSRp28);
SavedRegs.reset(PPC::VSRp29);
SavedRegs.reset(PPC::VSRp30);
SavedRegs.reset(PPC::VSRp31);
// Save and clear the LR state.
PPCFunctionInfo *FI = MF.getInfo<PPCFunctionInfo>();
unsigned LR = RegInfo->getRARegister();

View File

@ -183,6 +183,8 @@ PPCRegisterInfo::getCalleeSavedRegs(const MachineFunction *MF) const {
if (!TM.isPPC64() && Subtarget.isAIXABI())
report_fatal_error("AnyReg unimplemented on 32-bit AIX.");
if (Subtarget.hasVSX()) {
if (Subtarget.pairedVectorMemops())
return CSR_64_AllRegs_VSRP_SaveList;
if (Subtarget.isAIXABI() && !TM.getAIXExtendedAltivecABI())
return CSR_64_AllRegs_AIX_Dflt_VSX_SaveList;
return CSR_64_AllRegs_VSX_SaveList;
@ -210,6 +212,9 @@ PPCRegisterInfo::getCalleeSavedRegs(const MachineFunction *MF) const {
if (Subtarget.isAIXABI())
report_fatal_error("Cold calling unimplemented on AIX.");
if (TM.isPPC64()) {
if (Subtarget.pairedVectorMemops())
return SaveR2 ? CSR_SVR64_ColdCC_R2_VSRP_SaveList
: CSR_SVR64_ColdCC_VSRP_SaveList;
if (Subtarget.hasAltivec())
return SaveR2 ? CSR_SVR64_ColdCC_R2_Altivec_SaveList
: CSR_SVR64_ColdCC_Altivec_SaveList;
@ -217,7 +222,9 @@ PPCRegisterInfo::getCalleeSavedRegs(const MachineFunction *MF) const {
: CSR_SVR64_ColdCC_SaveList;
}
// 32-bit targets.
if (Subtarget.hasAltivec())
if (Subtarget.pairedVectorMemops())
return CSR_SVR32_ColdCC_VSRP_SaveList;
else if (Subtarget.hasAltivec())
return CSR_SVR32_ColdCC_Altivec_SaveList;
else if (Subtarget.hasSPE())
return CSR_SVR32_ColdCC_SPE_SaveList;
@ -225,6 +232,8 @@ PPCRegisterInfo::getCalleeSavedRegs(const MachineFunction *MF) const {
}
// Standard calling convention CSRs.
if (TM.isPPC64()) {
if (Subtarget.pairedVectorMemops())
return SaveR2 ? CSR_SVR464_R2_VSRP_SaveList : CSR_SVR464_VSRP_SaveList;
if (Subtarget.hasAltivec() &&
(!Subtarget.isAIXABI() || TM.getAIXExtendedAltivecABI())) {
return SaveR2 ? CSR_PPC64_R2_Altivec_SaveList
@ -239,6 +248,8 @@ PPCRegisterInfo::getCalleeSavedRegs(const MachineFunction *MF) const {
: CSR_AIX32_SaveList;
return CSR_AIX32_SaveList;
}
if (Subtarget.pairedVectorMemops())
return CSR_SVR432_VSRP_SaveList;
if (Subtarget.hasAltivec())
return CSR_SVR432_Altivec_SaveList;
else if (Subtarget.hasSPE())
@ -252,6 +263,8 @@ PPCRegisterInfo::getCallPreservedMask(const MachineFunction &MF,
const PPCSubtarget &Subtarget = MF.getSubtarget<PPCSubtarget>();
if (CC == CallingConv::AnyReg) {
if (Subtarget.hasVSX()) {
if (Subtarget.pairedVectorMemops())
return CSR_64_AllRegs_VSRP_RegMask;
if (Subtarget.isAIXABI() && !TM.getAIXExtendedAltivecABI())
return CSR_64_AllRegs_AIX_Dflt_VSX_RegMask;
return CSR_64_AllRegs_VSX_RegMask;
@ -275,20 +288,32 @@ PPCRegisterInfo::getCallPreservedMask(const MachineFunction &MF,
}
if (CC == CallingConv::Cold) {
return TM.isPPC64() ? (Subtarget.hasAltivec() ? CSR_SVR64_ColdCC_Altivec_RegMask
: CSR_SVR64_ColdCC_RegMask)
: (Subtarget.hasAltivec() ? CSR_SVR32_ColdCC_Altivec_RegMask
: (Subtarget.hasSPE()
? CSR_SVR32_ColdCC_SPE_RegMask
: CSR_SVR32_ColdCC_RegMask));
if (TM.isPPC64())
return Subtarget.pairedVectorMemops()
? CSR_SVR64_ColdCC_VSRP_RegMask
: (Subtarget.hasAltivec() ? CSR_SVR64_ColdCC_Altivec_RegMask
: CSR_SVR64_ColdCC_RegMask);
else
return Subtarget.pairedVectorMemops()
? CSR_SVR32_ColdCC_VSRP_RegMask
: (Subtarget.hasAltivec()
? CSR_SVR32_ColdCC_Altivec_RegMask
: (Subtarget.hasSPE() ? CSR_SVR32_ColdCC_SPE_RegMask
: CSR_SVR32_ColdCC_RegMask));
}
return TM.isPPC64() ? (Subtarget.hasAltivec() ? CSR_PPC64_Altivec_RegMask
: CSR_PPC64_RegMask)
: (Subtarget.hasAltivec()
? CSR_SVR432_Altivec_RegMask
: (Subtarget.hasSPE() ? CSR_SVR432_SPE_RegMask
: CSR_SVR432_RegMask));
if (TM.isPPC64())
return Subtarget.pairedVectorMemops()
? CSR_SVR464_VSRP_RegMask
: (Subtarget.hasAltivec() ? CSR_PPC64_Altivec_RegMask
: CSR_PPC64_RegMask);
else
return Subtarget.pairedVectorMemops()
? CSR_SVR432_VSRP_RegMask
: (Subtarget.hasAltivec()
? CSR_SVR432_Altivec_RegMask
: (Subtarget.hasSPE() ? CSR_SVR432_SPE_RegMask
: CSR_SVR432_RegMask));
}
const uint32_t*

View File

@ -13,23 +13,29 @@ define void @intrinsics1(<16 x i8> %vc1, <16 x i8> %vc2, <16 x i8> %vc3, <16 x i
; CHECK-LABEL: intrinsics1:
; CHECK: # %bb.0:
; CHECK-NEXT: mflr r0
; CHECK-NEXT: std r0, 16(r1)
; CHECK-NEXT: stdu r1, -176(r1)
; CHECK-NEXT: .cfi_def_cfa_offset 176
; CHECK-NEXT: .cfi_offset lr, 16
; CHECK-NEXT: .cfi_offset r30, -16
; CHECK-NEXT: std r30, -16(r1) # 8-byte Folded Spill
; CHECK-NEXT: std r0, 16(r1)
; CHECK-NEXT: stdu r1, -176(r1)
; CHECK-NEXT: # kill: def $v5 killed $v5 killed $vsrp18 def $vsrp18
; CHECK-NEXT: # kill: def $v4 killed $v4 killed $vsrp18 def $vsrp18
; CHECK-NEXT: # kill: def $v3 killed $v3 killed $vsrp17 def $vsrp17
; CHECK-NEXT: # kill: def $v2 killed $v2 killed $vsrp17 def $vsrp17
; CHECK-NEXT: xxlor vs0, v2, v2
; CHECK-NEXT: xxlor vs1, v3, v3
; CHECK-NEXT: stxvp vsp34, 128(r1) # 32-byte Folded Spill
; CHECK-NEXT: xxlor vs2, v4, v4
; CHECK-NEXT: xxlor vs3, v5, v5
; CHECK-NEXT: .cfi_offset v28, -80
; CHECK-NEXT: .cfi_offset v29, -64
; CHECK-NEXT: .cfi_offset v30, -48
; CHECK-NEXT: .cfi_offset v31, -32
; CHECK-NEXT: stxv v28, 96(r1) # 16-byte Folded Spill
; CHECK-NEXT: stxv v29, 112(r1) # 16-byte Folded Spill
; CHECK-NEXT: vmr v29, v3
; CHECK-NEXT: vmr v28, v2
; CHECK-NEXT: xxlor vs0, v28, v28
; CHECK-NEXT: stxv v30, 128(r1) # 16-byte Folded Spill
; CHECK-NEXT: stxv v31, 144(r1) # 16-byte Folded Spill
; CHECK-NEXT: vmr v31, v5
; CHECK-NEXT: vmr v30, v4
; CHECK-NEXT: std r30, 160(r1) # 8-byte Folded Spill
; CHECK-NEXT: xxlor vs1, v29, v29
; CHECK-NEXT: xxlor vs2, v30, v30
; CHECK-NEXT: xxlor vs3, v31, v31
; CHECK-NEXT: ld r30, 272(r1)
; CHECK-NEXT: stxvp vsp36, 96(r1) # 32-byte Folded Spill
; CHECK-NEXT: xxmtacc acc0
; CHECK-NEXT: xvf16ger2pp acc0, v2, v4
; CHECK-NEXT: xxmfacc acc0
@ -39,17 +45,19 @@ define void @intrinsics1(<16 x i8> %vc1, <16 x i8> %vc2, <16 x i8> %vc3, <16 x i
; CHECK-NEXT: lxvp vsp0, 64(r1)
; CHECK-NEXT: lxvp vsp2, 32(r1)
; CHECK-NEXT: xxmtacc acc0
; CHECK-NEXT: lxvp vsp34, 128(r1) # 32-byte Folded Reload
; CHECK-NEXT: lxvp vsp36, 96(r1) # 32-byte Folded Reload
; CHECK-NEXT: xvf16ger2pp acc0, v2, v4
; CHECK-NEXT: xvf16ger2pp acc0, v28, v30
; CHECK-NEXT: lxv v31, 144(r1) # 16-byte Folded Reload
; CHECK-NEXT: lxv v30, 128(r1) # 16-byte Folded Reload
; CHECK-NEXT: lxv v29, 112(r1) # 16-byte Folded Reload
; CHECK-NEXT: lxv v28, 96(r1) # 16-byte Folded Reload
; CHECK-NEXT: xxmfacc acc0
; CHECK-NEXT: stxv vs0, 48(r30)
; CHECK-NEXT: stxv vs1, 32(r30)
; CHECK-NEXT: stxv vs2, 16(r30)
; CHECK-NEXT: stxv vs3, 0(r30)
; CHECK-NEXT: ld r30, 160(r1) # 8-byte Folded Reload
; CHECK-NEXT: addi r1, r1, 176
; CHECK-NEXT: ld r0, 16(r1)
; CHECK-NEXT: ld r30, -16(r1) # 8-byte Folded Reload
; CHECK-NEXT: mtlr r0
; CHECK-NEXT: blr
;
@ -61,17 +69,23 @@ define void @intrinsics1(<16 x i8> %vc1, <16 x i8> %vc2, <16 x i8> %vc3, <16 x i
; CHECK-BE-NEXT: .cfi_def_cfa_offset 256
; CHECK-BE-NEXT: .cfi_offset lr, 16
; CHECK-BE-NEXT: .cfi_offset r30, -16
; CHECK-BE-NEXT: .cfi_offset v28, -80
; CHECK-BE-NEXT: .cfi_offset v29, -64
; CHECK-BE-NEXT: .cfi_offset v30, -48
; CHECK-BE-NEXT: .cfi_offset v31, -32
; CHECK-BE-NEXT: stxv v28, 176(r1) # 16-byte Folded Spill
; CHECK-BE-NEXT: stxv v29, 192(r1) # 16-byte Folded Spill
; CHECK-BE-NEXT: vmr v29, v3
; CHECK-BE-NEXT: vmr v28, v2
; CHECK-BE-NEXT: xxlor vs0, v28, v28
; CHECK-BE-NEXT: stxv v30, 208(r1) # 16-byte Folded Spill
; CHECK-BE-NEXT: stxv v31, 224(r1) # 16-byte Folded Spill
; CHECK-BE-NEXT: vmr v31, v5
; CHECK-BE-NEXT: vmr v30, v4
; CHECK-BE-NEXT: std r30, 240(r1) # 8-byte Folded Spill
; CHECK-BE-NEXT: # kill: def $v5 killed $v5 killed $vsrp18 def $vsrp18
; CHECK-BE-NEXT: # kill: def $v4 killed $v4 killed $vsrp18 def $vsrp18
; CHECK-BE-NEXT: # kill: def $v3 killed $v3 killed $vsrp17 def $vsrp17
; CHECK-BE-NEXT: # kill: def $v2 killed $v2 killed $vsrp17 def $vsrp17
; CHECK-BE-NEXT: xxlor vs0, v2, v2
; CHECK-BE-NEXT: xxlor vs1, v3, v3
; CHECK-BE-NEXT: stxvp vsp34, 208(r1) # 32-byte Folded Spill
; CHECK-BE-NEXT: xxlor vs2, v4, v4
; CHECK-BE-NEXT: xxlor vs3, v5, v5
; CHECK-BE-NEXT: stxvp vsp36, 176(r1) # 32-byte Folded Spill
; CHECK-BE-NEXT: xxlor vs1, v29, v29
; CHECK-BE-NEXT: xxlor vs2, v30, v30
; CHECK-BE-NEXT: xxlor vs3, v31, v31
; CHECK-BE-NEXT: ld r30, 368(r1)
; CHECK-BE-NEXT: xxmtacc acc0
; CHECK-BE-NEXT: xvf16ger2pp acc0, v2, v4
@ -83,9 +97,11 @@ define void @intrinsics1(<16 x i8> %vc1, <16 x i8> %vc2, <16 x i8> %vc3, <16 x i
; CHECK-BE-NEXT: lxvp vsp0, 112(r1)
; CHECK-BE-NEXT: lxvp vsp2, 144(r1)
; CHECK-BE-NEXT: xxmtacc acc0
; CHECK-BE-NEXT: lxvp vsp34, 208(r1) # 32-byte Folded Reload
; CHECK-BE-NEXT: lxvp vsp36, 176(r1) # 32-byte Folded Reload
; CHECK-BE-NEXT: xvf16ger2pp acc0, v2, v4
; CHECK-BE-NEXT: xvf16ger2pp acc0, v28, v30
; CHECK-BE-NEXT: lxv v31, 224(r1) # 16-byte Folded Reload
; CHECK-BE-NEXT: lxv v30, 208(r1) # 16-byte Folded Reload
; CHECK-BE-NEXT: lxv v29, 192(r1) # 16-byte Folded Reload
; CHECK-BE-NEXT: lxv v28, 176(r1) # 16-byte Folded Reload
; CHECK-BE-NEXT: xxmfacc acc0
; CHECK-BE-NEXT: stxv vs1, 16(r30)
; CHECK-BE-NEXT: stxv vs0, 0(r30)