forked from OSchip/llvm-project
[PowerPC] Implement more fusion types for Power10
This implements the rest of Power10 instruction fusion pairs, according to user manual, including 'wide immediate', 'load compare', 'zero move' and 'SHA3 assist'. Only 'SHA3 assist' is enabled by default. Reviewed By: shchenz Differential Revision: https://reviews.llvm.org/D112912
This commit is contained in:
parent
8ea3e70fb0
commit
59f4b3d308
|
@ -203,6 +203,22 @@ def FeatureLogicalFusion :
|
|||
SubtargetFeature<"fuse-logical", "HasLogicalFusion", "true",
|
||||
"Target supports Logical Operations fusion",
|
||||
[FeatureFusion]>;
|
||||
def FeatureSha3Fusion :
|
||||
SubtargetFeature<"fuse-sha3", "HasSha3Fusion", "true",
|
||||
"Target supports SHA3 assist fusion",
|
||||
[FeatureFusion]>;
|
||||
def FeatureCompareFusion:
|
||||
SubtargetFeature<"fuse-cmp", "HasCompareFusion", "true",
|
||||
"Target supports Comparison Operations fusion",
|
||||
[FeatureFusion]>;
|
||||
def FeatureWideImmFusion:
|
||||
SubtargetFeature<"fuse-wideimm", "HasWideImmFusion", "true",
|
||||
"Target supports Wide-Immediate fusion",
|
||||
[FeatureFusion]>;
|
||||
def FeatureZeroMoveFusion:
|
||||
SubtargetFeature<"fuse-zeromove", "HasZeroMoveFusion", "true",
|
||||
"Target supports move to SPR with branch fusion",
|
||||
[FeatureFusion]>;
|
||||
def FeatureUnalignedFloats :
|
||||
SubtargetFeature<"allow-unaligned-fp-access", "AllowsUnalignedFPAccess",
|
||||
"true", "CPU does not trap on unaligned FP access">;
|
||||
|
@ -393,7 +409,7 @@ def ProcessorFeatures {
|
|||
// still exist with the exception of those we know are Power9 specific.
|
||||
list<SubtargetFeature> FusionFeatures = [
|
||||
FeatureStoreFusion, FeatureAddLogicalFusion, FeatureLogicalAddFusion,
|
||||
FeatureLogicalFusion, FeatureArithAddFusion
|
||||
FeatureLogicalFusion, FeatureArithAddFusion, FeatureSha3Fusion,
|
||||
];
|
||||
list<SubtargetFeature> P10AdditionalFeatures =
|
||||
!listconcat(FusionFeatures, [
|
||||
|
|
|
@ -149,6 +149,79 @@ static bool checkOpConstraints(FusionFeature::FusionKind Kd,
|
|||
case FusionFeature::FK_SldiAdd:
|
||||
return (matchingImmOps(FirstMI, 2, 3) && matchingImmOps(FirstMI, 3, 60)) ||
|
||||
(matchingImmOps(FirstMI, 2, 6) && matchingImmOps(FirstMI, 3, 57));
|
||||
|
||||
// rldicl rx, ra, 1, 0 - xor
|
||||
case FusionFeature::FK_RotateLeftXor:
|
||||
return matchingImmOps(FirstMI, 2, 1) && matchingImmOps(FirstMI, 3, 0);
|
||||
|
||||
// rldicr rx, ra, 1, 63 - xor
|
||||
case FusionFeature::FK_RotateRightXor:
|
||||
return matchingImmOps(FirstMI, 2, 1) && matchingImmOps(FirstMI, 3, 63);
|
||||
|
||||
// We actually use CMPW* and CMPD*, 'l' doesn't exist as an operand in instr.
|
||||
|
||||
// { lbz,lbzx,lhz,lhzx,lwz,lwzx } - cmpi 0,1,rx,{ 0,1,-1 }
|
||||
// { lbz,lbzx,lhz,lhzx,lwz,lwzx } - cmpli 0,L,rx,{ 0,1 }
|
||||
case FusionFeature::FK_LoadCmp1:
|
||||
// { ld,ldx } - cmpi 0,1,rx,{ 0,1,-1 }
|
||||
// { ld,ldx } - cmpli 0,1,rx,{ 0,1 }
|
||||
case FusionFeature::FK_LoadCmp2: {
|
||||
const MachineOperand &BT = SecondMI.getOperand(0);
|
||||
if (!BT.isReg() ||
|
||||
(!Register::isVirtualRegister(BT.getReg()) && BT.getReg() != PPC::CR0))
|
||||
return false;
|
||||
if (SecondMI.getOpcode() == PPC::CMPDI &&
|
||||
matchingImmOps(SecondMI, 2, -1, 16))
|
||||
return true;
|
||||
return matchingImmOps(SecondMI, 2, 0) || matchingImmOps(SecondMI, 2, 1);
|
||||
}
|
||||
|
||||
// { lha,lhax,lwa,lwax } - cmpi 0,L,rx,{ 0,1,-1 }
|
||||
case FusionFeature::FK_LoadCmp3: {
|
||||
const MachineOperand &BT = SecondMI.getOperand(0);
|
||||
if (!BT.isReg() ||
|
||||
(!Register::isVirtualRegister(BT.getReg()) && BT.getReg() != PPC::CR0))
|
||||
return false;
|
||||
return matchingImmOps(SecondMI, 2, 0) || matchingImmOps(SecondMI, 2, 1) ||
|
||||
matchingImmOps(SecondMI, 2, -1, 16);
|
||||
}
|
||||
|
||||
// mtctr - { bcctr,bcctrl }
|
||||
case FusionFeature::FK_ZeroMoveCTR:
|
||||
// ( mtctr rx ) is alias of ( mtspr 9, rx )
|
||||
return (FirstMI.getOpcode() != PPC::MTSPR &&
|
||||
FirstMI.getOpcode() != PPC::MTSPR8) ||
|
||||
matchingImmOps(FirstMI, 0, 9);
|
||||
|
||||
// mtlr - { bclr,bclrl }
|
||||
case FusionFeature::FK_ZeroMoveLR:
|
||||
// ( mtlr rx ) is alias of ( mtspr 8, rx )
|
||||
return (FirstMI.getOpcode() != PPC::MTSPR &&
|
||||
FirstMI.getOpcode() != PPC::MTSPR8) ||
|
||||
matchingImmOps(FirstMI, 0, 8);
|
||||
|
||||
// addis rx,ra,si - addi rt,rx,SI, SI >= 0
|
||||
case FusionFeature::FK_AddisAddi: {
|
||||
const MachineOperand &RA = FirstMI.getOperand(1);
|
||||
const MachineOperand &SI = SecondMI.getOperand(2);
|
||||
if (!SI.isImm() || !RA.isReg())
|
||||
return false;
|
||||
if (RA.getReg() == PPC::ZERO || RA.getReg() == PPC::ZERO8)
|
||||
return false;
|
||||
return SignExtend64(SI.getImm(), 16) >= 0;
|
||||
}
|
||||
|
||||
// addi rx,ra,si - addis rt,rx,SI, ra > 0, SI >= 2
|
||||
case FusionFeature::FK_AddiAddis: {
|
||||
const MachineOperand &RA = FirstMI.getOperand(1);
|
||||
const MachineOperand &SI = FirstMI.getOperand(2);
|
||||
if (!SI.isImm() || !RA.isReg())
|
||||
return false;
|
||||
if (RA.getReg() == PPC::ZERO || RA.getReg() == PPC::ZERO8)
|
||||
return false;
|
||||
int64_t ExtendedSI = SignExtend64(SI.getImm(), 16);
|
||||
return ExtendedSI >= 2;
|
||||
}
|
||||
}
|
||||
|
||||
llvm_unreachable("All the cases should have been handled");
|
||||
|
|
|
@ -78,5 +78,80 @@ FUSION_FEATURE(VecLogical, hasLogicalFusion, -1,
|
|||
FUSION_FEATURE(SldiAdd, hasArithAddFusion, -1, FUSION_OP_SET(RLDICR, RLDICR_32),
|
||||
FUSION_OP_SET(ADD4, ADD8, SUBF, SUBF8))
|
||||
|
||||
// rldicl rx, ra, 1, 0 - xor
|
||||
FUSION_FEATURE(RotateLeftXor, hasSha3Fusion, 1,
|
||||
FUSION_OP_SET(RLDICL, RLDICL_32, RLDICL_32_64),
|
||||
FUSION_OP_SET(XOR, XOR8))
|
||||
|
||||
// rldicr rx, ra, 1, 63 - xor
|
||||
FUSION_FEATURE(RotateRightXor, hasSha3Fusion, 1,
|
||||
FUSION_OP_SET(RLDICR, RLDICR_32), FUSION_OP_SET(XOR, XOR8))
|
||||
|
||||
// There're two special cases in 'load-compare' series, so we have to split
|
||||
// them into several pattern groups to fit into current framework. This can
|
||||
// be clearer once we switched to a more expressive approach.
|
||||
|
||||
// { lbz,lbzx,lhz,lhzx,lwz,lwzx } - cmpi 0,1,rx,{ 0,1,-1 }
|
||||
// { lbz,lbzx,lhz,lhzx,lwz,lwzx } - cmpli 0,L,rx,{ 0,1 }
|
||||
FUSION_FEATURE(LoadCmp1, hasCompareFusion, 1,
|
||||
FUSION_OP_SET(LBZ, LBZ8, LBZX, LBZX8, LBZXTLS, LBZXTLS_,
|
||||
LBZXTLS_32, LHZ, LHZ8, LHZX, LHZX8, LHZXTLS,
|
||||
LHZXTLS_, LHZXTLS_32, LWZ, LWZ8, LWZX, LWZX8,
|
||||
LWZXTLS, LWZXTLS_, LWZXTLS_32),
|
||||
FUSION_OP_SET(CMPDI, CMPLDI, CMPLWI))
|
||||
|
||||
// { ld,ldx } - cmpi 0,1,rx,{ 0,1,-1 }
|
||||
// { ld,ldx } - cmpli 0,1,rx,{ 0,1 }
|
||||
FUSION_FEATURE(LoadCmp2, hasCompareFusion, 1,
|
||||
FUSION_OP_SET(LD, LDX, LDXTLS, LDXTLS_),
|
||||
FUSION_OP_SET(CMPDI, CMPLDI))
|
||||
|
||||
// { lha,lhax,lwa,lwax } - cmpi 0,L,rx,{ 0,1,-1 }
|
||||
FUSION_FEATURE(LoadCmp3, hasCompareFusion, 1,
|
||||
FUSION_OP_SET(LHA, LHA8, LHAX, LHAX8, LWA, LWA_32, LWAX,
|
||||
LWAX_32),
|
||||
FUSION_OP_SET(CMPLDI, CMPLWI))
|
||||
|
||||
// ori - oris
|
||||
FUSION_FEATURE(OriOris, hasWideImmFusion, 1, FUSION_OP_SET(ORI, ORI8),
|
||||
FUSION_OP_SET(ORIS, ORIS8))
|
||||
|
||||
// lis - ori
|
||||
FUSION_FEATURE(LisOri, hasWideImmFusion, 1, FUSION_OP_SET(LIS, LIS8),
|
||||
FUSION_OP_SET(ORI, ORI8))
|
||||
|
||||
// oris - ori
|
||||
FUSION_FEATURE(OrisOri, hasWideImmFusion, 1, FUSION_OP_SET(ORIS, ORIS8),
|
||||
FUSION_OP_SET(ORI, ORI8))
|
||||
|
||||
// xori - xoris
|
||||
FUSION_FEATURE(XoriXoris, hasWideImmFusion, 1, FUSION_OP_SET(XORI, XORI8),
|
||||
FUSION_OP_SET(XORIS, XORIS8))
|
||||
|
||||
// xoris - xori
|
||||
FUSION_FEATURE(XorisXori, hasWideImmFusion, 1, FUSION_OP_SET(XORIS, XORIS8),
|
||||
FUSION_OP_SET(XORI, XORI8))
|
||||
|
||||
// addis rx,ra,si - addi rt,rx,SI, SI >= 0
|
||||
FUSION_FEATURE(AddisAddi, hasWideImmFusion, 1,
|
||||
FUSION_OP_SET(ADDIS, ADDIS8, ADDIStocHA8),
|
||||
FUSION_OP_SET(ADDI, ADDI8, ADDItocL))
|
||||
|
||||
// addi rx,ra,si - addis rt,rx,SI, ra > 0, SI >= 2
|
||||
FUSION_FEATURE(AddiAddis, hasWideImmFusion, 1,
|
||||
FUSION_OP_SET(ADDI, ADDI8, ADDItocL),
|
||||
FUSION_OP_SET(ADDIS, ADDIS8, ADDIStocHA8))
|
||||
|
||||
// mtctr - { bcctr,bcctrl }
|
||||
FUSION_FEATURE(ZeroMoveCTR, hasZeroMoveFusion, -1,
|
||||
FUSION_OP_SET(MTCTR, MTCTRloop, MTSPR8, MTSPR),
|
||||
FUSION_OP_SET(BCCTR, BCCTRn, BCCTR8, BCCTR8n, BCCTRL, BCCTRLn,
|
||||
BCCTRL8, BCCTRL8n, gBCCTR, gBCCTRL))
|
||||
|
||||
// mtlr - { bclr,bclrl }
|
||||
FUSION_FEATURE(ZeroMoveLR, hasZeroMoveFusion, -1,
|
||||
FUSION_OP_SET(MTLR8, MTLR, MTSPR8, MTSPR),
|
||||
FUSION_OP_SET(BCLR, BCLRn, gBCLR, BCLRL, BCLRLn, gBCLRL))
|
||||
|
||||
#undef FUSION_FEATURE
|
||||
#undef FUSION_OP_SET
|
||||
|
|
|
@ -131,6 +131,10 @@ void PPCSubtarget::initializeEnvironment() {
|
|||
HasAddLogicalFusion = false;
|
||||
HasLogicalAddFusion = false;
|
||||
HasLogicalFusion = false;
|
||||
HasSha3Fusion = false;
|
||||
HasCompareFusion = false;
|
||||
HasWideImmFusion = false;
|
||||
HasZeroMoveFusion = false;
|
||||
IsISA2_06 = false;
|
||||
IsISA2_07 = false;
|
||||
IsISA3_0 = false;
|
||||
|
|
|
@ -151,6 +151,10 @@ protected:
|
|||
bool HasAddLogicalFusion;
|
||||
bool HasLogicalAddFusion;
|
||||
bool HasLogicalFusion;
|
||||
bool HasSha3Fusion;
|
||||
bool HasCompareFusion;
|
||||
bool HasWideImmFusion;
|
||||
bool HasZeroMoveFusion;
|
||||
bool IsISA2_06;
|
||||
bool IsISA2_07;
|
||||
bool IsISA3_0;
|
||||
|
@ -340,6 +344,10 @@ public:
|
|||
bool hasAddLogicalFusion() const { return HasAddLogicalFusion; }
|
||||
bool hasLogicalAddFusion() const { return HasLogicalAddFusion; }
|
||||
bool hasLogicalFusion() const { return HasLogicalFusion; }
|
||||
bool hasCompareFusion() const { return HasCompareFusion; }
|
||||
bool hasWideImmFusion() const { return HasWideImmFusion; }
|
||||
bool hasSha3Fusion() const { return HasSha3Fusion; }
|
||||
bool hasZeroMoveFusion() const { return HasZeroMoveFusion; }
|
||||
bool needsSwapsForVSXMemOps() const {
|
||||
return hasVSX() && isLittleEndian() && !hasP9Vector();
|
||||
}
|
||||
|
|
|
@ -1,6 +1,7 @@
|
|||
# REQUIRES: asserts
|
||||
# RUN: llc -mtriple=powerpc64le-unknown-linux-gnu -mcpu=pwr10 -x=mir < %s \
|
||||
# RUN: -debug-only=machine-scheduler -start-before=postmisched 2>&1 \
|
||||
# RUN: -mattr=+fuse-zeromove,+fuse-cmp,+fuse-wideimm \
|
||||
# RUN: | FileCheck %s
|
||||
|
||||
# CHECK: add_mulld:%bb.0
|
||||
|
@ -93,3 +94,55 @@ body: |
|
|||
renamable $x3 = ADD8 killed renamable $x4, $x5
|
||||
BLR8 implicit $lr8, implicit $rm, implicit $x3
|
||||
...
|
||||
|
||||
# CHECK: rldicl_xor:%bb.0
|
||||
# CHECK: Macro fuse: SU(0) - SU(1) / RLDICL - XOR8
|
||||
---
|
||||
name: rldicl_xor
|
||||
tracksRegLiveness: true
|
||||
body: |
|
||||
bb.0.entry:
|
||||
liveins: $x3, $x4, $x5
|
||||
renamable $x4 = RLDICL $x3, 1, 0
|
||||
renamable $x3 = XOR8 killed renamable $x4, $x5
|
||||
BLR8 implicit $lr8, implicit $rm, implicit $x3
|
||||
...
|
||||
|
||||
# CHECK: rldicr_xor:%bb.0
|
||||
# CHECK: Macro fuse: SU(0) - SU(1) / RLDICR - XOR8
|
||||
---
|
||||
name: rldicr_xor
|
||||
tracksRegLiveness: true
|
||||
body: |
|
||||
bb.0.entry:
|
||||
liveins: $x3, $x4, $x5
|
||||
renamable $x4 = RLDICR $x3, 1, 63
|
||||
renamable $x3 = XOR8 killed renamable $x4, $x5
|
||||
BLR8 implicit $lr8, implicit $rm, implicit $x3
|
||||
...
|
||||
|
||||
# CHECK: ori_oris:%bb.0
|
||||
# CHECK: Macro fuse: SU(0) - SU(1) / ORI8 - ORIS8
|
||||
---
|
||||
name: ori_oris
|
||||
tracksRegLiveness: true
|
||||
body: |
|
||||
bb.0.entry:
|
||||
liveins: $x3, $x4
|
||||
renamable $x4 = ORI8 $x3, 63
|
||||
renamable $x3 = ORIS8 killed renamable $x4, 20
|
||||
BLR8 implicit $lr8, implicit $rm, implicit $x3
|
||||
...
|
||||
|
||||
# CHECK: load_cmp:%bb.0
|
||||
# CHECK: Macro fuse: SU(0) - SU(1) / LD - CMPDI
|
||||
---
|
||||
name: load_cmp
|
||||
tracksRegLiveness: true
|
||||
body: |
|
||||
bb.0.entry:
|
||||
liveins: $x3, $x4, $x5
|
||||
renamable $x3 = LD 0, killed renamable $x3
|
||||
renamable $cr0 = CMPDI killed renamable $x3, 0
|
||||
renamable $x3 = ISEL8 killed renamable $x5, killed renamable $x4, renamable $cr0lt, implicit killed $cr0
|
||||
BLR8 implicit $lr8, implicit $rm, implicit $x3
|
||||
|
|
Loading…
Reference in New Issue