[AArch64][GlobalISel] NFC: Refactor G_FCMP selection code

Refactor this so it's similar to the existing integer comparison code.

Also add some missing 64-bit testcases to select-fcmp.mir.

Refactoring to prep for improving selection for G_FCMP-related conditional
branches etc.

Differential Revision: https://reviews.llvm.org/D88614
This commit is contained in:
Jessica Paquette 2020-09-29 18:23:02 -07:00
parent e4f50e587f
commit bc43ddf42f
2 changed files with 139 additions and 84 deletions

View File

@ -172,6 +172,11 @@ private:
emitIntegerCompare(MachineOperand &LHS, MachineOperand &RHS, emitIntegerCompare(MachineOperand &LHS, MachineOperand &RHS,
MachineOperand &Predicate, MachineOperand &Predicate,
MachineIRBuilder &MIRBuilder) const; MachineIRBuilder &MIRBuilder) const;
/// Emit a floating point comparison between \p LHS and \p RHS.
MachineInstr *emitFPCompare(Register LHS, Register RHS,
MachineIRBuilder &MIRBuilder) const;
MachineInstr *emitInstr(unsigned Opcode, MachineInstr *emitInstr(unsigned Opcode,
std::initializer_list<llvm::DstOp> DstOps, std::initializer_list<llvm::DstOp> DstOps,
std::initializer_list<llvm::SrcOp> SrcOps, std::initializer_list<llvm::SrcOp> SrcOps,
@ -238,9 +243,16 @@ private:
MachineInstr *emitFMovForFConstant(MachineInstr &MI, MachineInstr *emitFMovForFConstant(MachineInstr &MI,
MachineRegisterInfo &MRI) const; MachineRegisterInfo &MRI) const;
/// Emit a CSet for a compare. /// Emit a CSet for an integer compare.
///
/// \p DefReg is expected to be a 32-bit scalar register.
MachineInstr *emitCSetForICMP(Register DefReg, unsigned Pred, MachineInstr *emitCSetForICMP(Register DefReg, unsigned Pred,
MachineIRBuilder &MIRBuilder) const; MachineIRBuilder &MIRBuilder) const;
/// Emit a CSet for a FP compare.
///
/// \p Dst is expected to be a 32-bit scalar register.
MachineInstr *emitCSetForFCmp(Register Dst, CmpInst::Predicate Pred,
MachineIRBuilder &MIRBuilder) const;
/// Emit a TB(N)Z instruction which tests \p Bit in \p TestReg. /// Emit a TB(N)Z instruction which tests \p Bit in \p TestReg.
/// \p IsNegative is true if the test should be "not zero". /// \p IsNegative is true if the test should be "not zero".
@ -998,20 +1010,6 @@ static unsigned selectSelectOpc(MachineInstr &I, MachineRegisterInfo &MRI,
return 0; return 0;
} }
/// Helper function to select the opcode for a G_FCMP.
static unsigned selectFCMPOpc(MachineInstr &I, MachineRegisterInfo &MRI) {
// If this is a compare against +0.0, then we don't have to explicitly
// materialize a constant.
const ConstantFP *FPImm = getConstantFPVRegVal(I.getOperand(3).getReg(), MRI);
bool ShouldUseImm = FPImm && (FPImm->isZero() && !FPImm->isNegative());
unsigned OpSize = MRI.getType(I.getOperand(2).getReg()).getSizeInBits();
if (OpSize != 32 && OpSize != 64)
return 0;
unsigned CmpOpcTbl[2][2] = {{AArch64::FCMPSrr, AArch64::FCMPDrr},
{AArch64::FCMPSri, AArch64::FCMPDri}};
return CmpOpcTbl[ShouldUseImm][OpSize == 64];
}
/// Returns true if \p P is an unsigned integer comparison predicate. /// Returns true if \p P is an unsigned integer comparison predicate.
static bool isUnsignedICMPPred(const CmpInst::Predicate P) { static bool isUnsignedICMPPred(const CmpInst::Predicate P) {
switch (P) { switch (P) {
@ -2882,64 +2880,13 @@ bool AArch64InstructionSelector::select(MachineInstr &I) {
} }
case TargetOpcode::G_FCMP: { case TargetOpcode::G_FCMP: {
if (Ty != LLT::scalar(32)) { MachineIRBuilder MIRBuilder(I);
LLVM_DEBUG(dbgs() << "G_FCMP result has type: " << Ty CmpInst::Predicate Pred =
<< ", expected: " << LLT::scalar(32) << '\n'); static_cast<CmpInst::Predicate>(I.getOperand(1).getPredicate());
if (!emitFPCompare(I.getOperand(2).getReg(), I.getOperand(3).getReg(),
MIRBuilder) ||
!emitCSetForFCmp(I.getOperand(0).getReg(), Pred, MIRBuilder))
return false; return false;
}
unsigned CmpOpc = selectFCMPOpc(I, MRI);
if (!CmpOpc)
return false;
// FIXME: regbank
AArch64CC::CondCode CC1, CC2;
changeFCMPPredToAArch64CC(
(CmpInst::Predicate)I.getOperand(1).getPredicate(), CC1, CC2);
// Partially build the compare. Decide if we need to add a use for the
// third operand based off whether or not we're comparing against 0.0.
auto CmpMI = BuildMI(MBB, I, I.getDebugLoc(), TII.get(CmpOpc))
.addUse(I.getOperand(2).getReg());
// If we don't have an immediate compare, then we need to add a use of the
// register which wasn't used for the immediate.
// Note that the immediate will always be the last operand.
if (CmpOpc != AArch64::FCMPSri && CmpOpc != AArch64::FCMPDri)
CmpMI = CmpMI.addUse(I.getOperand(3).getReg());
const Register DefReg = I.getOperand(0).getReg();
Register Def1Reg = DefReg;
if (CC2 != AArch64CC::AL)
Def1Reg = MRI.createVirtualRegister(&AArch64::GPR32RegClass);
MachineInstr &CSetMI =
*BuildMI(MBB, I, I.getDebugLoc(), TII.get(AArch64::CSINCWr))
.addDef(Def1Reg)
.addUse(AArch64::WZR)
.addUse(AArch64::WZR)
.addImm(getInvertedCondCode(CC1));
if (CC2 != AArch64CC::AL) {
Register Def2Reg = MRI.createVirtualRegister(&AArch64::GPR32RegClass);
MachineInstr &CSet2MI =
*BuildMI(MBB, I, I.getDebugLoc(), TII.get(AArch64::CSINCWr))
.addDef(Def2Reg)
.addUse(AArch64::WZR)
.addUse(AArch64::WZR)
.addImm(getInvertedCondCode(CC2));
MachineInstr &OrMI =
*BuildMI(MBB, I, I.getDebugLoc(), TII.get(AArch64::ORRWrr))
.addDef(DefReg)
.addUse(Def1Reg)
.addUse(Def2Reg);
constrainSelectedInstRegOperands(OrMI, TII, TRI, RBI);
constrainSelectedInstRegOperands(CSet2MI, TII, TRI, RBI);
}
constrainSelectedInstRegOperands(*CmpMI, TII, TRI, RBI);
constrainSelectedInstRegOperands(CSetMI, TII, TRI, RBI);
I.eraseFromParent(); I.eraseFromParent();
return true; return true;
} }
@ -3984,6 +3931,66 @@ AArch64InstructionSelector::emitIntegerCompare(
return {&*CmpMI, P}; return {&*CmpMI, P};
} }
MachineInstr *AArch64InstructionSelector::emitCSetForFCmp(
Register Dst, CmpInst::Predicate Pred, MachineIRBuilder &MIRBuilder) const {
MachineRegisterInfo &MRI = *MIRBuilder.getMRI();
#ifndef NDEBUG
LLT Ty = MRI.getType(Dst);
assert(!Ty.isVector() && Ty.getSizeInBits() == 32 &&
"Expected a 32-bit scalar register?");
#endif
const Register ZeroReg = AArch64::WZR;
auto EmitCSet = [&](Register CsetDst, AArch64CC::CondCode CC) {
auto CSet =
MIRBuilder.buildInstr(AArch64::CSINCWr, {CsetDst}, {ZeroReg, ZeroReg})
.addImm(getInvertedCondCode(CC));
constrainSelectedInstRegOperands(*CSet, TII, TRI, RBI);
return &*CSet;
};
AArch64CC::CondCode CC1, CC2;
changeFCMPPredToAArch64CC(Pred, CC1, CC2);
if (CC2 == AArch64CC::AL)
return EmitCSet(Dst, CC1);
const TargetRegisterClass *RC = &AArch64::GPR32RegClass;
Register Def1Reg = MRI.createVirtualRegister(RC);
Register Def2Reg = MRI.createVirtualRegister(RC);
EmitCSet(Def1Reg, CC1);
EmitCSet(Def2Reg, CC2);
auto OrMI = MIRBuilder.buildInstr(AArch64::ORRWrr, {Dst}, {Def1Reg, Def2Reg});
constrainSelectedInstRegOperands(*OrMI, TII, TRI, RBI);
return &*OrMI;
}
MachineInstr *
AArch64InstructionSelector::emitFPCompare(Register LHS, Register RHS,
MachineIRBuilder &MIRBuilder) const {
MachineRegisterInfo &MRI = *MIRBuilder.getMRI();
LLT Ty = MRI.getType(LHS);
if (Ty.isVector())
return nullptr;
unsigned OpSize = Ty.getSizeInBits();
if (OpSize != 32 && OpSize != 64)
return nullptr;
// If this is a compare against +0.0, then we don't have
// to explicitly materialize a constant.
const ConstantFP *FPImm = getConstantFPVRegVal(RHS, MRI);
bool ShouldUseImm = FPImm && (FPImm->isZero() && !FPImm->isNegative());
unsigned CmpOpcTbl[2][2] = {{AArch64::FCMPSrr, AArch64::FCMPDrr},
{AArch64::FCMPSri, AArch64::FCMPDri}};
unsigned CmpOpc = CmpOpcTbl[ShouldUseImm][OpSize == 64];
// Partially build the compare. Decide if we need to add a use for the
// third operand based off whether or not we're comparing against 0.0.
auto CmpMI = MIRBuilder.buildInstr(CmpOpc).addUse(LHS);
if (!ShouldUseImm)
CmpMI.addUse(RHS);
constrainSelectedInstRegOperands(*CmpMI, TII, TRI, RBI);
return &*CmpMI;
}
MachineInstr *AArch64InstructionSelector::emitVectorConcat( MachineInstr *AArch64InstructionSelector::emitVectorConcat(
Optional<Register> Dst, Register Op1, Register Op2, Optional<Register> Dst, Register Op1, Register Op2,
MachineIRBuilder &MIRBuilder) const { MachineIRBuilder &MIRBuilder) const {
@ -4169,10 +4176,10 @@ bool AArch64InstructionSelector::tryOptSelect(MachineInstr &I) const {
CondCode = changeICMPPredToAArch64CC(Pred); CondCode = changeICMPPredToAArch64CC(Pred);
} else { } else {
// Get the condition code for the select. // Get the condition code for the select.
CmpInst::Predicate Pred =
static_cast<CmpInst::Predicate>(CondDef->getOperand(1).getPredicate());
AArch64CC::CondCode CondCode2; AArch64CC::CondCode CondCode2;
changeFCMPPredToAArch64CC( changeFCMPPredToAArch64CC(Pred, CondCode, CondCode2);
(CmpInst::Predicate)CondDef->getOperand(1).getPredicate(), CondCode,
CondCode2);
// changeFCMPPredToAArch64CC sets CondCode2 to AL when we require two // changeFCMPPredToAArch64CC sets CondCode2 to AL when we require two
// instructions to emit the comparison. // instructions to emit the comparison.
@ -4181,16 +4188,11 @@ bool AArch64InstructionSelector::tryOptSelect(MachineInstr &I) const {
if (CondCode2 != AArch64CC::AL) if (CondCode2 != AArch64CC::AL)
return false; return false;
// Make sure we'll be able to select the compare. if (!emitFPCompare(CondDef->getOperand(2).getReg(),
unsigned CmpOpc = selectFCMPOpc(*CondDef, MRI); CondDef->getOperand(3).getReg(), MIB)) {
if (!CmpOpc) LLVM_DEBUG(dbgs() << "Couldn't emit compare for select!\n");
return false; return false;
}
// Emit a new compare.
auto Cmp = MIB.buildInstr(CmpOpc, {}, {CondDef->getOperand(2).getReg()});
if (CmpOpc != AArch64::FCMPSri && CmpOpc != AArch64::FCMPDri)
Cmp.addUse(CondDef->getOperand(3).getReg());
constrainSelectedInstRegOperands(*Cmp, TII, TRI, RBI);
} }
// Emit the select. // Emit the select.

View File

@ -54,3 +54,56 @@ body: |
%3:gpr(s32) = G_FCMP floatpred(oeq), %0(s32), %2 %3:gpr(s32) = G_FCMP floatpred(oeq), %0(s32), %2
$s0 = COPY %3(s32) $s0 = COPY %3(s32)
RET_ReallyLR implicit $s0 RET_ReallyLR implicit $s0
...
---
name: notzero_s64
alignment: 4
legalized: true
regBankSelected: true
tracksRegLiveness: true
machineFunctionInfo: {}
body: |
bb.1:
liveins: $d0, $d1
; CHECK-LABEL: name: notzero_s64
; CHECK: liveins: $d0, $d1
; CHECK: [[COPY:%[0-9]+]]:fpr64 = COPY $d0
; CHECK: [[FMOVDi:%[0-9]+]]:fpr64 = FMOVDi 112
; CHECK: FCMPDrr [[COPY]], [[FMOVDi]], implicit-def $nzcv
; CHECK: [[CSINCWr:%[0-9]+]]:gpr32 = CSINCWr $wzr, $wzr, 1, implicit $nzcv
; CHECK: $s0 = COPY [[CSINCWr]]
; CHECK: RET_ReallyLR implicit $s0
%0:fpr(s64) = COPY $d0
%1:fpr(s64) = COPY $d1
%2:fpr(s64) = G_FCONSTANT double 1.000000e+00
%3:gpr(s32) = G_FCMP floatpred(oeq), %0(s64), %2
$s0 = COPY %3(s32)
RET_ReallyLR implicit $s0
...
---
name: zero_s64
alignment: 4
legalized: true
regBankSelected: true
tracksRegLiveness: true
body: |
bb.1:
liveins: $d0, $d1, $s0
; CHECK-LABEL: name: zero_s64
; CHECK: liveins: $d0, $d1, $s0
; CHECK: [[COPY:%[0-9]+]]:fpr64 = COPY $d0
; CHECK: FCMPDri [[COPY]], implicit-def $nzcv
; CHECK: [[CSINCWr:%[0-9]+]]:gpr32 = CSINCWr $wzr, $wzr, 1, implicit $nzcv
; CHECK: $s0 = COPY [[CSINCWr]]
; CHECK: RET_ReallyLR implicit $s0
%0:fpr(s64) = COPY $d0
%1:fpr(s64) = COPY $d1
%2:fpr(s64) = G_FCONSTANT double 0.000000e+00
%3:gpr(s32) = G_FCMP floatpred(oeq), %0(s64), %2
$s0 = COPY %3(s32)
RET_ReallyLR implicit $s0