[TLI][PowerPC] Introduce TLI query to check if MULH is cheaper than MUL + SHIFT

This patch introduces a TargetLowering query, isMulhCheaperThanMulShift.

Currently in DAG Combine, it will transform mulhs/mulhu into a
wider multiply and a shift if the wide multiply is legal.

This TLI function is implemented on 64-bit PowerPC, as it is more desirable to
have multiply-high over multiply + shift for words and doublewords. Having
multiply-high can also aid in further transformations that can be done.

Differential Revision: https://reviews.llvm.org/D78271
This commit is contained in:
Amy Kwan 2020-05-23 16:46:33 -05:00
parent 6e48a6e407
commit b631f86ac5
9 changed files with 729 additions and 957 deletions

View File

@ -1652,6 +1652,10 @@ public:
virtual bool isJumpTableRelative() const;
/// Return true if a mulh[s|u] node for a specific type is cheaper than
/// a multiply followed by a shift. This is false by default.
virtual bool isMulhCheaperThanMulShift(EVT Type) const { return false; }
/// If a physical register, this specifies the register that
/// llvm.savestack/llvm.restorestack should save and restore.
unsigned getStackPointerRegisterToSaveRestore() const {

View File

@ -4118,7 +4118,7 @@ SDValue DAGCombiner::visitMULHS(SDNode *N) {
// If the type twice as wide is legal, transform the mulhs to a wider multiply
// plus a shift.
if (VT.isSimple() && !VT.isVector()) {
if (!TLI.isMulhCheaperThanMulShift(VT) && VT.isSimple() && !VT.isVector()) {
MVT Simple = VT.getSimpleVT();
unsigned SimpleSize = Simple.getSizeInBits();
EVT NewVT = EVT::getIntegerVT(*DAG.getContext(), SimpleSize*2);
@ -4174,7 +4174,7 @@ SDValue DAGCombiner::visitMULHU(SDNode *N) {
// If the type twice as wide is legal, transform the mulhu to a wider multiply
// plus a shift.
if (VT.isSimple() && !VT.isVector()) {
if (!TLI.isMulhCheaperThanMulShift(VT) && VT.isSimple() && !VT.isVector()) {
MVT Simple = VT.getSimpleVT();
unsigned SimpleSize = Simple.getSizeInBits();
EVT NewVT = EVT::getIntegerVT(*DAG.getContext(), SimpleSize*2);

View File

@ -1401,6 +1401,16 @@ bool PPCTargetLowering::preferIncOfAddToSubOfNot(EVT VT) const {
return VT.isScalarInteger();
}
/// isMulhCheaperThanMulShift - Return true if a mulh[s|u] node for a specific
/// type is cheaper than a multiply followed by a shift.
/// This is true for words and doublewords on 64-bit PowerPC.
bool PPCTargetLowering::isMulhCheaperThanMulShift(EVT Type) const {
if (Subtarget.isPPC64() && (isOperationLegal(ISD::MULHS, Type) ||
isOperationLegal(ISD::MULHU, Type)))
return true;
return TargetLowering::isMulhCheaperThanMulShift(Type);
}
const char *PPCTargetLowering::getTargetNodeName(unsigned Opcode) const {
switch ((PPCISD::NodeType)Opcode) {
case PPCISD::FIRST_NUMBER: break;

View File

@ -950,6 +950,11 @@ namespace llvm {
Register
getExceptionSelectorRegister(const Constant *PersonalityFn) const override;
/// isMulhCheaperThanMulShift - Return true if a mulh[s|u] node for a
/// specific type is cheaper than a multiply followed by a shift.
/// This is true for words and doublewords on 64-bit PowerPC.
bool isMulhCheaperThanMulShift(EVT Type) const override;
/// Override to support customized stack guard loading.
bool useLoadStackGuardNode() const override;
void insertSSPDeclarations(Module &M) const override;

View File

@ -509,10 +509,9 @@ define i64 @test_ds_cross_basic_blocks(i8* %0, i32 signext %1) {
; CHECK-NEXT: bdz .LBB6_9
; CHECK-NEXT: .LBB6_4: #
; CHECK-NEXT: lbzu r0, 1(r5)
; CHECK-NEXT: clrldi r27, r0, 32
; CHECK-NEXT: mulld r27, r27, r4
; CHECK-NEXT: rldicl r27, r27, 31, 33
; CHECK-NEXT: slwi r26, r27, 1
; CHECK-NEXT: mulhwu r27, r0, r4
; CHECK-NEXT: rlwinm r26, r27, 0, 0, 30
; CHECK-NEXT: srwi r27, r27, 1
; CHECK-NEXT: add r27, r27, r26
; CHECK-NEXT: subf r0, r27, r0
; CHECK-NEXT: cmplwi r0, 1

View File

@ -91,14 +91,12 @@ define dso_local signext i32 @foo(i32 signext %x, i32 signext %y) nounwind {
; CHECK-P9-NEXT: bl bar
; CHECK-P9-NEXT: nop
; CHECK-P9-NEXT: mr r30, r3
; CHECK-P9-NEXT: extsw r3, r28
; CHECK-P9-NEXT: mulld r4, r3, r27
; CHECK-P9-NEXT: rldicl r5, r4, 1, 63
; CHECK-P9-NEXT: rldicl r4, r4, 32, 32
; CHECK-P9-NEXT: add r4, r4, r5
; CHECK-P9-NEXT: slwi r5, r4, 1
; CHECK-P9-NEXT: add r4, r4, r5
; CHECK-P9-NEXT: subf r3, r4, r3
; CHECK-P9-NEXT: mulhw r3, r28, r27
; CHECK-P9-NEXT: srwi r4, r3, 31
; CHECK-P9-NEXT: add r3, r3, r4
; CHECK-P9-NEXT: slwi r4, r3, 1
; CHECK-P9-NEXT: add r3, r3, r4
; CHECK-P9-NEXT: subf r3, r3, r28
; CHECK-P9-NEXT: cmplwi r3, 1
; CHECK-P9-NEXT: beq cr0, .LBB1_1
; CHECK-P9-NEXT: # %bb.5: # %while.cond

View File

@ -205,13 +205,13 @@ entry:
ret i32 %rem
; CHECK-LABEL: modulo_const3_sw
; CHECK-NOT: modsw
; CHECK: mull
; CHECK: mulh
; CHECK-NOT: modsw
; CHECK: sub
; CHECK-NOT: modsw
; CHECK: blr
; CHECK-PWR8-LABEL: modulo_const3_sw
; CHECK-PWR8: mull
; CHECK-PWR8: mulh
; CHECK-PWR8: sub
; CHECK-PWR8: blr
}

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff