Revert "Revert "[AArch64][GlobalISel] Optimize G_FCMP + G_SELECT pairs when G_SELECT is fp""

When looking through copies, make sure to not try to find the vreg def of a physreg.
Normally getVRegDef will return nullptr in this case, but if there happens to be
multiple defs then it will assert.

This fixes PR42129.

llvm-svn: 362666
This commit is contained in:
Amara Emerson 2019-06-05 23:46:16 +00:00
parent 34c8b835b1
commit c37ff0d138
2 changed files with 447 additions and 8 deletions

View File

@ -172,6 +172,7 @@ private:
bool tryOptVectorShuffle(MachineInstr &I) const;
bool tryOptVectorDup(MachineInstr &MI) const;
bool tryOptSelect(MachineInstr &MI) const;
const AArch64TargetMachine &TM;
const AArch64Subtarget &STI;
@ -741,6 +742,19 @@ static unsigned selectFPConvOpc(unsigned GenericOpc, LLT DstTy, LLT SrcTy) {
return GenericOpc;
}
static unsigned selectSelectOpc(MachineInstr &I, MachineRegisterInfo &MRI,
const RegisterBankInfo &RBI) {
const TargetRegisterInfo &TRI = *MRI.getTargetRegisterInfo();
bool IsFP = (RBI.getRegBank(I.getOperand(0).getReg(), MRI, TRI)->getID() !=
AArch64::GPRRegBankID);
LLT Ty = MRI.getType(I.getOperand(0).getReg());
if (Ty == LLT::scalar(32))
return IsFP ? AArch64::FCSELSrrr : AArch64::CSELWr;
else if (Ty == LLT::scalar(64) || Ty == LLT::pointer(0, 64))
return IsFP ? AArch64::FCSELDrrr : AArch64::CSELXr;
return 0;
}
/// Helper function to select the opcode for a G_FCMP.
static unsigned selectFCMPOpc(MachineInstr &I, MachineRegisterInfo &MRI) {
// If this is a compare against +0.0, then we don't have to explicitly
@ -1774,16 +1788,11 @@ bool AArch64InstructionSelector::select(MachineInstr &I,
// select instead of an integer select.
bool IsFP = (RBI.getRegBank(I.getOperand(0).getReg(), MRI, TRI)->getID() !=
AArch64::GPRRegBankID);
unsigned CSelOpc = 0;
if (Ty == LLT::scalar(32)) {
CSelOpc = IsFP ? AArch64::FCSELSrrr : AArch64::CSELWr;
} else if (Ty == LLT::scalar(64) || Ty == LLT::pointer(0, 64)) {
CSelOpc = IsFP ? AArch64::FCSELDrrr : AArch64::CSELXr;
} else {
return false;
}
if (IsFP && tryOptSelect(I))
return true;
unsigned CSelOpc = selectSelectOpc(I, MRI, RBI);
MachineInstr &TstMI =
*BuildMI(MBB, I, I.getDebugLoc(), TII.get(AArch64::ANDSWri))
.addDef(AArch64::WZR)
@ -2810,6 +2819,85 @@ MachineInstr *AArch64InstructionSelector::emitFMovForFConstant(
return &I;
}
bool AArch64InstructionSelector::tryOptSelect(MachineInstr &I) const {
MachineIRBuilder MIB(I);
MachineRegisterInfo &MRI = *MIB.getMRI();
const TargetRegisterInfo &TRI = *MRI.getTargetRegisterInfo();
// We want to recognize this pattern:
//
// $z = G_FCMP pred, $x, $y
// ...
// $w = G_SELECT $z, $a, $b
//
// Where the value of $z is *only* ever used by the G_SELECT (possibly with
// some copies/truncs in between.)
//
// If we see this, then we can emit something like this:
//
// fcmp $x, $y
// fcsel $w, $a, $b, pred
//
// Rather than emitting both of the rather long sequences in the standard
// G_FCMP/G_SELECT select methods.
// First, check if the condition is defined by a compare.
MachineInstr *CondDef = MRI.getVRegDef(I.getOperand(1).getReg());
while (CondDef) {
// We can only fold if all of the defs have one use.
if (!MRI.hasOneUse(CondDef->getOperand(0).getReg()))
return false;
// We can skip over G_TRUNC since the condition is 1-bit.
// Truncating/extending can have no impact on the value.
unsigned Opc = CondDef->getOpcode();
if (Opc != TargetOpcode::COPY && Opc != TargetOpcode::G_TRUNC)
break;
CondDef = MRI.getVRegDef(CondDef->getOperand(1).getReg());
}
// Is the condition defined by a compare?
// TODO: Handle G_ICMP.
if (!CondDef || CondDef->getOpcode() != TargetOpcode::G_FCMP)
return false;
// Get the condition code for the select.
AArch64CC::CondCode CondCode;
AArch64CC::CondCode CondCode2;
changeFCMPPredToAArch64CC(
(CmpInst::Predicate)CondDef->getOperand(1).getPredicate(), CondCode,
CondCode2);
// changeFCMPPredToAArch64CC sets CondCode2 to AL when we require two
// instructions to emit the comparison.
// TODO: Handle FCMP_UEQ and FCMP_ONE. After that, this check will be
// unnecessary.
if (CondCode2 != AArch64CC::AL)
return false;
// Make sure we'll be able to select the compare.
unsigned CmpOpc = selectFCMPOpc(*CondDef, MRI);
if (!CmpOpc)
return false;
// Emit a new compare.
auto Cmp = MIB.buildInstr(CmpOpc, {}, {CondDef->getOperand(2).getReg()});
if (CmpOpc != AArch64::FCMPSri && CmpOpc != AArch64::FCMPDri)
Cmp.addUse(CondDef->getOperand(3).getReg());
// Emit the select.
unsigned CSelOpc = selectSelectOpc(I, MRI, RBI);
auto CSel =
MIB.buildInstr(CSelOpc, {I.getOperand(0).getReg()},
{I.getOperand(2).getReg(), I.getOperand(3).getReg()})
.addImm(CondCode);
constrainSelectedInstRegOperands(*Cmp, TII, TRI, RBI);
constrainSelectedInstRegOperands(*CSel, TII, TRI, RBI);
I.eraseFromParent();
return true;
}
bool AArch64InstructionSelector::tryOptVectorDup(MachineInstr &I) const {
// Try to match a vector splat operation into a dup instruction.
// We're looking for this pattern:

View File

@ -0,0 +1,351 @@
# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py
# RUN: llc -O0 -mtriple=aarch64 -run-pass=instruction-select -verify-machineinstrs %s -o - | FileCheck %s
#
# Verify the following:
#
# - We can fold compares into selects.
# - This only happens when the result of the compare is only used by the select.
#
# Also verify that, for now:
#
# - We only support doing this with G_FCMP.
# - We only support condition flags that require a single instruction.
#
...
---
name: fcmp_more_than_one_user_no_fold
alignment: 2
legalized: true
regBankSelected: true
tracksRegLiveness: true
body: |
bb.0:
liveins: $s0, $s1, $w1
; CHECK-LABEL: name: fcmp_more_than_one_user_no_fold
; CHECK: liveins: $s0, $s1, $w1
; CHECK: [[COPY:%[0-9]+]]:fpr32 = COPY $s0
; CHECK: [[COPY1:%[0-9]+]]:fpr32 = COPY $s1
; CHECK: [[FMOVS0_:%[0-9]+]]:fpr32 = FMOVS0
; CHECK: FCMPSri [[COPY]], implicit-def $nzcv
; CHECK: [[CSINCWr:%[0-9]+]]:gpr32 = CSINCWr $wzr, $wzr, 1, implicit $nzcv
; CHECK: [[COPY2:%[0-9]+]]:fpr32 = COPY [[CSINCWr]]
; CHECK: [[COPY3:%[0-9]+]]:gpr32 = COPY [[COPY2]]
; CHECK: $wzr = ANDSWri [[COPY3]], 0, implicit-def $nzcv
; CHECK: [[FCSELSrrr:%[0-9]+]]:fpr32 = FCSELSrrr [[FMOVS0_]], [[COPY1]], 1, implicit $nzcv
; CHECK: $w1 = COPY [[CSINCWr]]
; CHECK: $s0 = COPY [[FCSELSrrr]]
; CHECK: RET_ReallyLR implicit $s0
%0:fpr(s32) = COPY $s0
%1:fpr(s32) = COPY $s1
%2:fpr(s32) = G_FCONSTANT float 0.000000e+00
%5:gpr(s32) = G_FCMP floatpred(oeq), %0(s32), %2
%3:gpr(s1) = G_TRUNC %5(s32)
%6:fpr(s1) = COPY %3(s1)
%4:fpr(s32) = G_SELECT %6(s1), %2, %1
$w1 = COPY %5(s32)
$s0 = COPY %4(s32)
RET_ReallyLR implicit $s0
...
---
name: using_icmp
alignment: 2
legalized: true
regBankSelected: true
tracksRegLiveness: true
body: |
bb.0:
liveins: $s0, $w0
; CHECK-LABEL: name: using_icmp
; CHECK: liveins: $s0, $w0
; CHECK: [[COPY:%[0-9]+]]:gpr32 = COPY $w0
; CHECK: [[COPY1:%[0-9]+]]:fpr32 = COPY $s0
; CHECK: [[MOVi32imm:%[0-9]+]]:gpr32 = MOVi32imm 0
; CHECK: [[FMOVS0_:%[0-9]+]]:fpr32 = FMOVS0
; CHECK: $wzr = SUBSWrr [[COPY]], [[MOVi32imm]], implicit-def $nzcv
; CHECK: [[CSINCWr:%[0-9]+]]:gpr32 = CSINCWr $wzr, $wzr, 1, implicit $nzcv
; CHECK: [[COPY2:%[0-9]+]]:fpr32 = COPY [[CSINCWr]]
; CHECK: [[COPY3:%[0-9]+]]:gpr32 = COPY [[COPY2]]
; CHECK: $wzr = ANDSWri [[COPY3]], 0, implicit-def $nzcv
; CHECK: [[FCSELSrrr:%[0-9]+]]:fpr32 = FCSELSrrr [[COPY1]], [[FMOVS0_]], 1, implicit $nzcv
; CHECK: $s0 = COPY [[FCSELSrrr]]
; CHECK: RET_ReallyLR implicit $s0
%0:gpr(s32) = COPY $w0
%1:fpr(s32) = COPY $s0
%2:gpr(s32) = G_CONSTANT i32 0
%5:fpr(s32) = G_FCONSTANT float 0.000000e+00
%6:gpr(s32) = G_ICMP intpred(eq), %0(s32), %2
%3:gpr(s1) = G_TRUNC %6(s32)
%7:fpr(s1) = COPY %3(s1)
%4:fpr(s32) = G_SELECT %7(s1), %1, %5
$s0 = COPY %4(s32)
RET_ReallyLR implicit $s0
...
---
name: foeq
alignment: 2
legalized: true
regBankSelected: true
tracksRegLiveness: true
body: |
bb.0:
liveins: $s0, $s1
; CHECK-LABEL: name: foeq
; CHECK: liveins: $s0, $s1
; CHECK: [[COPY:%[0-9]+]]:fpr32 = COPY $s0
; CHECK: [[COPY1:%[0-9]+]]:fpr32 = COPY $s1
; CHECK: [[FMOVS0_:%[0-9]+]]:fpr32 = FMOVS0
; CHECK: FCMPSri [[COPY]], implicit-def $nzcv
; CHECK: [[FCSELSrrr:%[0-9]+]]:fpr32 = FCSELSrrr [[FMOVS0_]], [[COPY1]], 0, implicit $nzcv
; CHECK: $s0 = COPY [[FCSELSrrr]]
; CHECK: RET_ReallyLR implicit $s0
%0:fpr(s32) = COPY $s0
%1:fpr(s32) = COPY $s1
%2:fpr(s32) = G_FCONSTANT float 0.000000e+00
%5:gpr(s32) = G_FCMP floatpred(oeq), %0(s32), %2
%3:gpr(s1) = G_TRUNC %5(s32)
%6:fpr(s1) = COPY %3(s1)
%4:fpr(s32) = G_SELECT %6(s1), %2, %1
$s0 = COPY %4(s32)
RET_ReallyLR implicit $s0
...
---
name: fueq
alignment: 2
legalized: true
regBankSelected: true
tracksRegLiveness: true
body: |
bb.0:
liveins: $s0, $s1
; CHECK-LABEL: name: fueq
; CHECK: liveins: $s0, $s1
; CHECK: [[COPY:%[0-9]+]]:fpr32 = COPY $s0
; CHECK: [[COPY1:%[0-9]+]]:fpr32 = COPY $s1
; CHECK: [[FMOVS0_:%[0-9]+]]:fpr32 = FMOVS0
; CHECK: FCMPSri [[COPY]], implicit-def $nzcv
; CHECK: [[CSINCWr:%[0-9]+]]:gpr32 = CSINCWr $wzr, $wzr, 1, implicit $nzcv
; CHECK: [[CSINCWr1:%[0-9]+]]:gpr32 = CSINCWr $wzr, $wzr, 7, implicit $nzcv
; CHECK: [[ORRWrr:%[0-9]+]]:gpr32 = ORRWrr [[CSINCWr]], [[CSINCWr1]]
; CHECK: [[COPY2:%[0-9]+]]:fpr32 = COPY [[ORRWrr]]
; CHECK: [[COPY3:%[0-9]+]]:gpr32 = COPY [[COPY2]]
; CHECK: $wzr = ANDSWri [[COPY3]], 0, implicit-def $nzcv
; CHECK: [[FCSELSrrr:%[0-9]+]]:fpr32 = FCSELSrrr [[FMOVS0_]], [[COPY1]], 1, implicit $nzcv
; CHECK: $s0 = COPY [[FCSELSrrr]]
; CHECK: RET_ReallyLR implicit $s0
%0:fpr(s32) = COPY $s0
%1:fpr(s32) = COPY $s1
%2:fpr(s32) = G_FCONSTANT float 0.000000e+00
%5:gpr(s32) = G_FCMP floatpred(ueq), %0(s32), %2
%3:gpr(s1) = G_TRUNC %5(s32)
%6:fpr(s1) = COPY %3(s1)
%4:fpr(s32) = G_SELECT %6(s1), %2, %1
$s0 = COPY %4(s32)
RET_ReallyLR implicit $s0
...
---
name: fone
alignment: 2
legalized: true
regBankSelected: true
tracksRegLiveness: true
body: |
bb.0:
liveins: $s0, $s1
; CHECK-LABEL: name: fone
; CHECK: liveins: $s0, $s1
; CHECK: [[COPY:%[0-9]+]]:fpr32 = COPY $s0
; CHECK: [[COPY1:%[0-9]+]]:fpr32 = COPY $s1
; CHECK: [[FMOVS0_:%[0-9]+]]:fpr32 = FMOVS0
; CHECK: FCMPSri [[COPY]], implicit-def $nzcv
; CHECK: [[CSINCWr:%[0-9]+]]:gpr32 = CSINCWr $wzr, $wzr, 5, implicit $nzcv
; CHECK: [[CSINCWr1:%[0-9]+]]:gpr32 = CSINCWr $wzr, $wzr, 13, implicit $nzcv
; CHECK: [[ORRWrr:%[0-9]+]]:gpr32 = ORRWrr [[CSINCWr]], [[CSINCWr1]]
; CHECK: [[COPY2:%[0-9]+]]:fpr32 = COPY [[ORRWrr]]
; CHECK: [[COPY3:%[0-9]+]]:gpr32 = COPY [[COPY2]]
; CHECK: $wzr = ANDSWri [[COPY3]], 0, implicit-def $nzcv
; CHECK: [[FCSELSrrr:%[0-9]+]]:fpr32 = FCSELSrrr [[COPY1]], [[FMOVS0_]], 1, implicit $nzcv
; CHECK: $s0 = COPY [[FCSELSrrr]]
; CHECK: RET_ReallyLR implicit $s0
%0:fpr(s32) = COPY $s0
%1:fpr(s32) = COPY $s1
%2:fpr(s32) = G_FCONSTANT float 0.000000e+00
%5:gpr(s32) = G_FCMP floatpred(one), %0(s32), %2
%3:gpr(s1) = G_TRUNC %5(s32)
%6:fpr(s1) = COPY %3(s1)
%4:fpr(s32) = G_SELECT %6(s1), %1, %2
$s0 = COPY %4(s32)
RET_ReallyLR implicit $s0
...
---
name: fune
alignment: 2
legalized: true
regBankSelected: true
tracksRegLiveness: true
body: |
bb.0:
liveins: $s0, $s1
; CHECK-LABEL: name: fune
; CHECK: liveins: $s0, $s1
; CHECK: [[COPY:%[0-9]+]]:fpr32 = COPY $s0
; CHECK: [[COPY1:%[0-9]+]]:fpr32 = COPY $s1
; CHECK: [[FMOVS0_:%[0-9]+]]:fpr32 = FMOVS0
; CHECK: FCMPSri [[COPY]], implicit-def $nzcv
; CHECK: [[FCSELSrrr:%[0-9]+]]:fpr32 = FCSELSrrr [[COPY1]], [[FMOVS0_]], 1, implicit $nzcv
; CHECK: $s0 = COPY [[FCSELSrrr]]
; CHECK: RET_ReallyLR implicit $s0
%0:fpr(s32) = COPY $s0
%1:fpr(s32) = COPY $s1
%2:fpr(s32) = G_FCONSTANT float 0.000000e+00
%5:gpr(s32) = G_FCMP floatpred(une), %0(s32), %2
%3:gpr(s1) = G_TRUNC %5(s32)
%6:fpr(s1) = COPY %3(s1)
%4:fpr(s32) = G_SELECT %6(s1), %1, %2
$s0 = COPY %4(s32)
RET_ReallyLR implicit $s0
...
---
name: doeq
alignment: 2
legalized: true
regBankSelected: true
tracksRegLiveness: true
body: |
bb.0:
liveins: $d0, $d1
; CHECK-LABEL: name: doeq
; CHECK: liveins: $d0, $d1
; CHECK: [[COPY:%[0-9]+]]:fpr64 = COPY $d0
; CHECK: [[COPY1:%[0-9]+]]:fpr64 = COPY $d1
; CHECK: [[FMOVD0_:%[0-9]+]]:fpr64 = FMOVD0
; CHECK: FCMPDri [[COPY]], implicit-def $nzcv
; CHECK: [[FCSELDrrr:%[0-9]+]]:fpr64 = FCSELDrrr [[FMOVD0_]], [[COPY1]], 0, implicit $nzcv
; CHECK: $d0 = COPY [[FCSELDrrr]]
; CHECK: RET_ReallyLR implicit $d0
%0:fpr(s64) = COPY $d0
%1:fpr(s64) = COPY $d1
%2:fpr(s64) = G_FCONSTANT double 0.000000e+00
%5:gpr(s32) = G_FCMP floatpred(oeq), %0(s64), %2
%3:gpr(s1) = G_TRUNC %5(s32)
%6:fpr(s1) = COPY %3(s1)
%4:fpr(s64) = G_SELECT %6(s1), %2, %1
$d0 = COPY %4(s64)
RET_ReallyLR implicit $d0
...
---
name: dueq
alignment: 2
legalized: true
regBankSelected: true
tracksRegLiveness: true
body: |
bb.0:
liveins: $d0, $d1
; CHECK-LABEL: name: dueq
; CHECK: liveins: $d0, $d1
; CHECK: [[COPY:%[0-9]+]]:fpr64 = COPY $d0
; CHECK: [[COPY1:%[0-9]+]]:fpr64 = COPY $d1
; CHECK: [[FMOVD0_:%[0-9]+]]:fpr64 = FMOVD0
; CHECK: FCMPDri [[COPY]], implicit-def $nzcv
; CHECK: [[CSINCWr:%[0-9]+]]:gpr32 = CSINCWr $wzr, $wzr, 1, implicit $nzcv
; CHECK: [[CSINCWr1:%[0-9]+]]:gpr32 = CSINCWr $wzr, $wzr, 7, implicit $nzcv
; CHECK: [[ORRWrr:%[0-9]+]]:gpr32 = ORRWrr [[CSINCWr]], [[CSINCWr1]]
; CHECK: [[COPY2:%[0-9]+]]:fpr32 = COPY [[ORRWrr]]
; CHECK: [[COPY3:%[0-9]+]]:gpr32 = COPY [[COPY2]]
; CHECK: $wzr = ANDSWri [[COPY3]], 0, implicit-def $nzcv
; CHECK: [[FCSELDrrr:%[0-9]+]]:fpr64 = FCSELDrrr [[FMOVD0_]], [[COPY1]], 1, implicit $nzcv
; CHECK: $d0 = COPY [[FCSELDrrr]]
; CHECK: RET_ReallyLR implicit $d0
%0:fpr(s64) = COPY $d0
%1:fpr(s64) = COPY $d1
%2:fpr(s64) = G_FCONSTANT double 0.000000e+00
%5:gpr(s32) = G_FCMP floatpred(ueq), %0(s64), %2
%3:gpr(s1) = G_TRUNC %5(s32)
%6:fpr(s1) = COPY %3(s1)
%4:fpr(s64) = G_SELECT %6(s1), %2, %1
$d0 = COPY %4(s64)
RET_ReallyLR implicit $d0
...
---
name: done
alignment: 2
legalized: true
regBankSelected: true
tracksRegLiveness: true
body: |
bb.0:
liveins: $d0, $d1
; CHECK-LABEL: name: done
; CHECK: liveins: $d0, $d1
; CHECK: [[COPY:%[0-9]+]]:fpr64 = COPY $d0
; CHECK: [[COPY1:%[0-9]+]]:fpr64 = COPY $d1
; CHECK: [[FMOVD0_:%[0-9]+]]:fpr64 = FMOVD0
; CHECK: FCMPDri [[COPY]], implicit-def $nzcv
; CHECK: [[CSINCWr:%[0-9]+]]:gpr32 = CSINCWr $wzr, $wzr, 5, implicit $nzcv
; CHECK: [[CSINCWr1:%[0-9]+]]:gpr32 = CSINCWr $wzr, $wzr, 13, implicit $nzcv
; CHECK: [[ORRWrr:%[0-9]+]]:gpr32 = ORRWrr [[CSINCWr]], [[CSINCWr1]]
; CHECK: [[COPY2:%[0-9]+]]:fpr32 = COPY [[ORRWrr]]
; CHECK: [[COPY3:%[0-9]+]]:gpr32 = COPY [[COPY2]]
; CHECK: $wzr = ANDSWri [[COPY3]], 0, implicit-def $nzcv
; CHECK: [[FCSELDrrr:%[0-9]+]]:fpr64 = FCSELDrrr [[COPY1]], [[FMOVD0_]], 1, implicit $nzcv
; CHECK: $d0 = COPY [[FCSELDrrr]]
; CHECK: RET_ReallyLR implicit $d0
%0:fpr(s64) = COPY $d0
%1:fpr(s64) = COPY $d1
%2:fpr(s64) = G_FCONSTANT double 0.000000e+00
%5:gpr(s32) = G_FCMP floatpred(one), %0(s64), %2
%3:gpr(s1) = G_TRUNC %5(s32)
%6:fpr(s1) = COPY %3(s1)
%4:fpr(s64) = G_SELECT %6(s1), %1, %2
$d0 = COPY %4(s64)
RET_ReallyLR implicit $d0
...
---
name: dune
alignment: 2
legalized: true
regBankSelected: true
tracksRegLiveness: true
body: |
bb.0:
liveins: $d0, $d1
; CHECK-LABEL: name: dune
; CHECK: liveins: $d0, $d1
; CHECK: [[COPY:%[0-9]+]]:fpr64 = COPY $d0
; CHECK: [[COPY1:%[0-9]+]]:fpr64 = COPY $d1
; CHECK: [[FMOVD0_:%[0-9]+]]:fpr64 = FMOVD0
; CHECK: FCMPDri [[COPY]], implicit-def $nzcv
; CHECK: [[FCSELDrrr:%[0-9]+]]:fpr64 = FCSELDrrr [[COPY1]], [[FMOVD0_]], 1, implicit $nzcv
; CHECK: $d0 = COPY [[FCSELDrrr]]
; CHECK: RET_ReallyLR implicit $d0
%0:fpr(s64) = COPY $d0
%1:fpr(s64) = COPY $d1
%2:fpr(s64) = G_FCONSTANT double 0.000000e+00
%5:gpr(s32) = G_FCMP floatpred(une), %0(s64), %2
%3:gpr(s1) = G_TRUNC %5(s32)
%6:fpr(s1) = COPY %3(s1)
%4:fpr(s64) = G_SELECT %6(s1), %1, %2
$d0 = COPY %4(s64)
RET_ReallyLR implicit $d0
...