[AArch64][GlobalISel] Fold 64-bit cmps with 64-bit adds

G_ICMP is selected to an arithmetic overflow op (ADDS/SUBS/etc) with a dead
destination + a CSINC instruction.

We have a fold which allows us to combine 32-bit adds with G_ICMP.

The problem with G_ICMP is that we model it as always having a 32-bit
destination even though it can be a 64-bit operation. So, we were missing some
opportunities for 64-bit folds.

This patch teaches the fold to recognize 64-bit G_ICMPs + refactors some of
the code surrounding CSINC accordingly.

(Later down the line, I think we should probably change the way we handle G_ICMP
in general.)

Differential Revision: https://reviews.llvm.org/D111088
This commit is contained in:
Jessica Paquette 2021-10-04 11:53:37 -07:00
parent 2ba572a82a
commit 5dc339d982
2 changed files with 285 additions and 139 deletions

View File

@ -276,13 +276,9 @@ private:
const RegisterBank &DstRB, LLT ScalarTy,
Register VecReg, unsigned LaneIdx,
MachineIRBuilder &MIRBuilder) const;
/// Emit a CSet for an integer compare.
///
/// \p DefReg and \p SrcReg are expected to be 32-bit scalar registers.
MachineInstr *emitCSetForICMP(Register DefReg, unsigned Pred,
MachineIRBuilder &MIRBuilder,
Register SrcReg = AArch64::WZR) const;
MachineInstr *emitCSINC(Register Dst, Register Src1, Register Src2,
AArch64CC::CondCode Pred,
MachineIRBuilder &MIRBuilder) const;
/// Emit a CSet for a FP compare.
///
/// \p Dst is expected to be a 32-bit scalar register.
@ -2213,27 +2209,55 @@ bool AArch64InstructionSelector::earlySelect(MachineInstr &I) {
// fold the add into the cset for the cmp by using cinc.
//
// FIXME: This would probably be a lot nicer in PostLegalizerLowering.
Register X = I.getOperand(1).getReg();
// Only handle scalars. Scalar G_ICMP is only legal for s32, so bail out
// early if we see it.
LLT Ty = MRI.getType(X);
if (Ty.isVector() || Ty.getSizeInBits() != 32)
Register AddDst = I.getOperand(0).getReg();
Register AddLHS = I.getOperand(1).getReg();
Register AddRHS = I.getOperand(2).getReg();
// Only handle scalars.
LLT Ty = MRI.getType(AddLHS);
if (Ty.isVector())
return false;
Register CmpReg = I.getOperand(2).getReg();
MachineInstr *Cmp = getOpcodeDef(TargetOpcode::G_ICMP, CmpReg, MRI);
// Since G_ICMP is modeled as ADDS/SUBS/ANDS, we can handle 32 bits or 64
// bits.
unsigned Size = Ty.getSizeInBits();
if (Size != 32 && Size != 64)
return false;
auto MatchCmp = [&](Register Reg) -> MachineInstr * {
if (!MRI.hasOneNonDBGUse(Reg))
return nullptr;
// If the LHS of the add is 32 bits, then we want to fold a 32-bit
// compare.
if (Size == 32)
return getOpcodeDef(TargetOpcode::G_ICMP, Reg, MRI);
// We model scalar compares using 32-bit destinations right now.
// If it's a 64-bit compare, it'll have 64-bit sources.
Register ZExt;
if (!mi_match(Reg, MRI,
m_OneNonDBGUse(m_GZExt(m_OneNonDBGUse(m_Reg(ZExt))))))
return nullptr;
auto *Cmp = getOpcodeDef(TargetOpcode::G_ICMP, ZExt, MRI);
if (!Cmp ||
MRI.getType(Cmp->getOperand(2).getReg()).getSizeInBits() != 64)
return nullptr;
return Cmp;
};
// Try to match
// z + (cmp pred, x, y)
MachineInstr *Cmp = MatchCmp(AddRHS);
if (!Cmp) {
std::swap(X, CmpReg);
Cmp = getOpcodeDef(TargetOpcode::G_ICMP, CmpReg, MRI);
// (cmp pred, x, y) + z
std::swap(AddLHS, AddRHS);
Cmp = MatchCmp(AddRHS);
if (!Cmp)
return false;
}
auto Pred =
static_cast<CmpInst::Predicate>(Cmp->getOperand(1).getPredicate());
emitIntegerCompare(Cmp->getOperand(2), Cmp->getOperand(3),
Cmp->getOperand(1), MIB);
emitCSetForICMP(I.getOperand(0).getReg(), Pred, MIB, X);
auto &PredOp = Cmp->getOperand(1);
auto Pred = static_cast<CmpInst::Predicate>(PredOp.getPredicate());
const AArch64CC::CondCode InvCC =
changeICMPPredToAArch64CC(CmpInst::getInversePredicate(Pred));
MIB.setInstrAndDebugLoc(I);
emitIntegerCompare(/*LHS=*/Cmp->getOperand(2),
/*RHS=*/Cmp->getOperand(3), PredOp, MIB);
emitCSINC(/*Dst=*/AddDst, /*Src =*/AddLHS, /*Src2=*/AddLHS, InvCC, MIB);
I.eraseFromParent();
return true;
}
@ -2963,10 +2987,8 @@ bool AArch64InstructionSelector::select(MachineInstr &I) {
// false, so to get the increment when it's true, we need to use the
// inverse. In this case, we want to increment when carry is set.
Register ZReg = AArch64::WZR;
auto CsetMI = MIB.buildInstr(AArch64::CSINCWr, {I.getOperand(1).getReg()},
{ZReg, ZReg})
.addImm(getInvertedCondCode(OpAndCC.second));
constrainSelectedInstRegOperands(*CsetMI, TII, TRI, RBI);
emitCSINC(/*Dst=*/I.getOperand(1).getReg(), /*Src1=*/ZReg, /*Src2=*/ZReg,
getInvertedCondCode(OpAndCC.second), MIB);
I.eraseFromParent();
return true;
}
@ -3303,9 +3325,11 @@ bool AArch64InstructionSelector::select(MachineInstr &I) {
}
auto Pred = static_cast<CmpInst::Predicate>(I.getOperand(1).getPredicate());
emitIntegerCompare(I.getOperand(2), I.getOperand(3), I.getOperand(1),
MIB);
emitCSetForICMP(I.getOperand(0).getReg(), Pred, MIB);
const AArch64CC::CondCode InvCC =
changeICMPPredToAArch64CC(CmpInst::getInversePredicate(Pred));
emitIntegerCompare(I.getOperand(2), I.getOperand(3), I.getOperand(1), MIB);
emitCSINC(/*Dst=*/I.getOperand(0).getReg(), /*Src1=*/AArch64::WZR,
/*Src2=*/AArch64::WZR, InvCC, MIB);
I.eraseFromParent();
return true;
}
@ -4451,25 +4475,19 @@ MachineInstr *AArch64InstructionSelector::emitCSetForFCmp(
assert(!Ty.isVector() && Ty.getSizeInBits() == 32 &&
"Expected a 32-bit scalar register?");
#endif
const Register ZeroReg = AArch64::WZR;
auto EmitCSet = [&](Register CsetDst, AArch64CC::CondCode CC) {
auto CSet =
MIRBuilder.buildInstr(AArch64::CSINCWr, {CsetDst}, {ZeroReg, ZeroReg})
.addImm(getInvertedCondCode(CC));
constrainSelectedInstRegOperands(*CSet, TII, TRI, RBI);
return &*CSet;
};
const Register ZReg = AArch64::WZR;
AArch64CC::CondCode CC1, CC2;
changeFCMPPredToAArch64CC(Pred, CC1, CC2);
auto InvCC1 = AArch64CC::getInvertedCondCode(CC1);
if (CC2 == AArch64CC::AL)
return EmitCSet(Dst, CC1);
return emitCSINC(/*Dst=*/Dst, /*Src1=*/ZReg, /*Src2=*/ZReg, InvCC1,
MIRBuilder);
const TargetRegisterClass *RC = &AArch64::GPR32RegClass;
Register Def1Reg = MRI.createVirtualRegister(RC);
Register Def2Reg = MRI.createVirtualRegister(RC);
EmitCSet(Def1Reg, CC1);
EmitCSet(Def2Reg, CC2);
auto InvCC2 = AArch64CC::getInvertedCondCode(CC2);
emitCSINC(/*Dst=*/Def1Reg, /*Src1=*/ZReg, /*Src2=*/ZReg, InvCC1, MIRBuilder);
emitCSINC(/*Dst=*/Def2Reg, /*Src1=*/ZReg, /*Src2=*/ZReg, InvCC2, MIRBuilder);
auto OrMI = MIRBuilder.buildInstr(AArch64::ORRWrr, {Dst}, {Def1Reg, Def2Reg});
constrainSelectedInstRegOperands(*OrMI, TII, TRI, RBI);
return &*OrMI;
@ -4578,16 +4596,25 @@ MachineInstr *AArch64InstructionSelector::emitVectorConcat(
}
MachineInstr *
AArch64InstructionSelector::emitCSetForICMP(Register DefReg, unsigned Pred,
MachineIRBuilder &MIRBuilder,
Register SrcReg) const {
// CSINC increments the result when the predicate is false. Invert it.
const AArch64CC::CondCode InvCC = changeICMPPredToAArch64CC(
CmpInst::getInversePredicate((CmpInst::Predicate)Pred));
auto I = MIRBuilder.buildInstr(AArch64::CSINCWr, {DefReg}, {SrcReg, SrcReg})
.addImm(InvCC);
constrainSelectedInstRegOperands(*I, TII, TRI, RBI);
return &*I;
AArch64InstructionSelector::emitCSINC(Register Dst, Register Src1,
Register Src2, AArch64CC::CondCode Pred,
MachineIRBuilder &MIRBuilder) const {
auto &MRI = *MIRBuilder.getMRI();
const RegClassOrRegBank &RegClassOrBank = MRI.getRegClassOrRegBank(Dst);
// If we used a register class, then this won't necessarily have an LLT.
// Compute the size based off whether or not we have a class or bank.
unsigned Size;
if (const auto *RC = RegClassOrBank.dyn_cast<const TargetRegisterClass *>())
Size = TRI.getRegSizeInBits(*RC);
else
Size = MRI.getType(Dst).getSizeInBits();
// Some opcodes use s1.
assert(Size <= 64 && "Expected 64 bits or less only!");
static const unsigned OpcTable[2] = {AArch64::CSINCWr, AArch64::CSINCXr};
unsigned Opc = OpcTable[Size == 64];
auto CSINC = MIRBuilder.buildInstr(Opc, {Dst}, {Src1, Src2}).addImm(Pred);
constrainSelectedInstRegOperands(*CSINC, TII, TRI, RBI);
return &*CSINC;
}
std::pair<MachineInstr *, AArch64CC::CondCode>

View File

@ -11,11 +11,12 @@ body: |
; CHECK-LABEL: name: cmp_imm_32
; CHECK: liveins: $w0
; CHECK: [[COPY:%[0-9]+]]:gpr32sp = COPY $w0
; CHECK: [[SUBSWri:%[0-9]+]]:gpr32 = SUBSWri [[COPY]], 42, 0, implicit-def $nzcv
; CHECK: [[CSINCWr:%[0-9]+]]:gpr32 = CSINCWr $wzr, $wzr, 1, implicit $nzcv
; CHECK: $w0 = COPY [[CSINCWr]]
; CHECK: RET_ReallyLR implicit $w0
; CHECK-NEXT: {{ $}}
; CHECK-NEXT: [[COPY:%[0-9]+]]:gpr32sp = COPY $w0
; CHECK-NEXT: [[SUBSWri:%[0-9]+]]:gpr32 = SUBSWri [[COPY]], 42, 0, implicit-def $nzcv
; CHECK-NEXT: [[CSINCWr:%[0-9]+]]:gpr32 = CSINCWr $wzr, $wzr, 1, implicit $nzcv
; CHECK-NEXT: $w0 = COPY [[CSINCWr]]
; CHECK-NEXT: RET_ReallyLR implicit $w0
%0:gpr(s32) = COPY $w0
%1:gpr(s32) = G_CONSTANT i32 42
%5:gpr(s32) = G_ICMP intpred(eq), %0(s32), %1
@ -34,11 +35,12 @@ body: |
; CHECK-LABEL: name: cmp_imm_64
; CHECK: liveins: $x0
; CHECK: [[COPY:%[0-9]+]]:gpr64sp = COPY $x0
; CHECK: [[SUBSXri:%[0-9]+]]:gpr64 = SUBSXri [[COPY]], 42, 0, implicit-def $nzcv
; CHECK: [[CSINCWr:%[0-9]+]]:gpr32 = CSINCWr $wzr, $wzr, 1, implicit $nzcv
; CHECK: $w0 = COPY [[CSINCWr]]
; CHECK: RET_ReallyLR implicit $w0
; CHECK-NEXT: {{ $}}
; CHECK-NEXT: [[COPY:%[0-9]+]]:gpr64sp = COPY $x0
; CHECK-NEXT: [[SUBSXri:%[0-9]+]]:gpr64 = SUBSXri [[COPY]], 42, 0, implicit-def $nzcv
; CHECK-NEXT: [[CSINCWr:%[0-9]+]]:gpr32 = CSINCWr $wzr, $wzr, 1, implicit $nzcv
; CHECK-NEXT: $w0 = COPY [[CSINCWr]]
; CHECK-NEXT: RET_ReallyLR implicit $w0
%0:gpr(s64) = COPY $x0
%1:gpr(s64) = G_CONSTANT i64 42
%5:gpr(s32) = G_ICMP intpred(eq), %0(s64), %1
@ -57,13 +59,14 @@ body: |
; CHECK-LABEL: name: cmp_imm_out_of_range
; CHECK: liveins: $x0
; CHECK: [[COPY:%[0-9]+]]:gpr64 = COPY $x0
; CHECK: [[MOVi32imm:%[0-9]+]]:gpr32 = MOVi32imm 13132
; CHECK: [[SUBREG_TO_REG:%[0-9]+]]:gpr64 = SUBREG_TO_REG 0, [[MOVi32imm]], %subreg.sub_32
; CHECK: [[SUBSXrr:%[0-9]+]]:gpr64 = SUBSXrr [[COPY]], [[SUBREG_TO_REG]], implicit-def $nzcv
; CHECK: [[CSINCWr:%[0-9]+]]:gpr32 = CSINCWr $wzr, $wzr, 1, implicit $nzcv
; CHECK: $w0 = COPY [[CSINCWr]]
; CHECK: RET_ReallyLR implicit $w0
; CHECK-NEXT: {{ $}}
; CHECK-NEXT: [[COPY:%[0-9]+]]:gpr64 = COPY $x0
; CHECK-NEXT: [[MOVi32imm:%[0-9]+]]:gpr32 = MOVi32imm 13132
; CHECK-NEXT: [[SUBREG_TO_REG:%[0-9]+]]:gpr64 = SUBREG_TO_REG 0, [[MOVi32imm]], %subreg.sub_32
; CHECK-NEXT: [[SUBSXrr:%[0-9]+]]:gpr64 = SUBSXrr [[COPY]], [[SUBREG_TO_REG]], implicit-def $nzcv
; CHECK-NEXT: [[CSINCWr:%[0-9]+]]:gpr32 = CSINCWr $wzr, $wzr, 1, implicit $nzcv
; CHECK-NEXT: $w0 = COPY [[CSINCWr]]
; CHECK-NEXT: RET_ReallyLR implicit $w0
%0:gpr(s64) = COPY $x0
%1:gpr(s64) = G_CONSTANT i64 13132
%5:gpr(s32) = G_ICMP intpred(eq), %0(s64), %1
@ -81,11 +84,12 @@ body: |
liveins: $w0
; CHECK-LABEL: name: cmp_imm_lookthrough
; CHECK: liveins: $w0
; CHECK: [[COPY:%[0-9]+]]:gpr32sp = COPY $w0
; CHECK: [[SUBSWri:%[0-9]+]]:gpr32 = SUBSWri [[COPY]], 42, 0, implicit-def $nzcv
; CHECK: [[CSINCWr:%[0-9]+]]:gpr32 = CSINCWr $wzr, $wzr, 1, implicit $nzcv
; CHECK: $w0 = COPY [[CSINCWr]]
; CHECK: RET_ReallyLR implicit $w0
; CHECK-NEXT: {{ $}}
; CHECK-NEXT: [[COPY:%[0-9]+]]:gpr32sp = COPY $w0
; CHECK-NEXT: [[SUBSWri:%[0-9]+]]:gpr32 = SUBSWri [[COPY]], 42, 0, implicit-def $nzcv
; CHECK-NEXT: [[CSINCWr:%[0-9]+]]:gpr32 = CSINCWr $wzr, $wzr, 1, implicit $nzcv
; CHECK-NEXT: $w0 = COPY [[CSINCWr]]
; CHECK-NEXT: RET_ReallyLR implicit $w0
%0:gpr(s32) = COPY $w0
%1:gpr(s64) = G_CONSTANT i64 42
%2:gpr(s32) = G_TRUNC %1(s64)
@ -104,11 +108,12 @@ body: |
liveins: $w0
; CHECK-LABEL: name: cmp_imm_lookthrough_bad_trunc
; CHECK: liveins: $w0
; CHECK: [[COPY:%[0-9]+]]:gpr32sp = COPY $w0
; CHECK: [[SUBSWri:%[0-9]+]]:gpr32 = SUBSWri [[COPY]], 0, 0, implicit-def $nzcv
; CHECK: [[CSINCWr:%[0-9]+]]:gpr32 = CSINCWr $wzr, $wzr, 1, implicit $nzcv
; CHECK: $w0 = COPY [[CSINCWr]]
; CHECK: RET_ReallyLR implicit $w0
; CHECK-NEXT: {{ $}}
; CHECK-NEXT: [[COPY:%[0-9]+]]:gpr32sp = COPY $w0
; CHECK-NEXT: [[SUBSWri:%[0-9]+]]:gpr32 = SUBSWri [[COPY]], 0, 0, implicit-def $nzcv
; CHECK-NEXT: [[CSINCWr:%[0-9]+]]:gpr32 = CSINCWr $wzr, $wzr, 1, implicit $nzcv
; CHECK-NEXT: $w0 = COPY [[CSINCWr]]
; CHECK-NEXT: RET_ReallyLR implicit $w0
%0:gpr(s32) = COPY $w0
%1:gpr(s64) = G_CONSTANT i64 68719476736 ; 0x1000000000
%2:gpr(s32) = G_TRUNC %1(s64) ; Value truncates to 0
@ -127,11 +132,12 @@ body: |
liveins: $w0
; CHECK-LABEL: name: cmp_neg_imm_32
; CHECK: liveins: $w0
; CHECK: %reg0:gpr32sp = COPY $w0
; CHECK: [[ADDSWri:%[0-9]+]]:gpr32 = ADDSWri %reg0, 10, 0, implicit-def $nzcv
; CHECK: %cmp:gpr32 = CSINCWr $wzr, $wzr, 1, implicit $nzcv
; CHECK: $w0 = COPY %cmp
; CHECK: RET_ReallyLR implicit $w0
; CHECK-NEXT: {{ $}}
; CHECK-NEXT: %reg0:gpr32sp = COPY $w0
; CHECK-NEXT: [[ADDSWri:%[0-9]+]]:gpr32 = ADDSWri %reg0, 10, 0, implicit-def $nzcv
; CHECK-NEXT: %cmp:gpr32 = CSINCWr $wzr, $wzr, 1, implicit $nzcv
; CHECK-NEXT: $w0 = COPY %cmp
; CHECK-NEXT: RET_ReallyLR implicit $w0
%reg0:gpr(s32) = COPY $w0
%cst:gpr(s32) = G_CONSTANT i32 -10
%cmp:gpr(s32) = G_ICMP intpred(eq), %reg0(s32), %cst
@ -149,11 +155,12 @@ body: |
liveins: $x0
; CHECK-LABEL: name: cmp_neg_imm_64
; CHECK: liveins: $x0
; CHECK: %reg0:gpr64sp = COPY $x0
; CHECK: [[ADDSXri:%[0-9]+]]:gpr64 = ADDSXri %reg0, 10, 0, implicit-def $nzcv
; CHECK: %cmp:gpr32 = CSINCWr $wzr, $wzr, 1, implicit $nzcv
; CHECK: $w0 = COPY %cmp
; CHECK: RET_ReallyLR implicit $w0
; CHECK-NEXT: {{ $}}
; CHECK-NEXT: %reg0:gpr64sp = COPY $x0
; CHECK-NEXT: [[ADDSXri:%[0-9]+]]:gpr64 = ADDSXri %reg0, 10, 0, implicit-def $nzcv
; CHECK-NEXT: %cmp:gpr32 = CSINCWr $wzr, $wzr, 1, implicit $nzcv
; CHECK-NEXT: $w0 = COPY %cmp
; CHECK-NEXT: RET_ReallyLR implicit $w0
%reg0:gpr(s64) = COPY $x0
%cst:gpr(s64) = G_CONSTANT i64 -10
%cmp:gpr(s32) = G_ICMP intpred(eq), %reg0(s64), %cst
@ -171,12 +178,13 @@ body: |
liveins: $w0
; CHECK-LABEL: name: cmp_neg_imm_invalid
; CHECK: liveins: $w0
; CHECK: %reg0:gpr32 = COPY $w0
; CHECK: %cst:gpr32 = MOVi32imm -5000
; CHECK: [[SUBSWrr:%[0-9]+]]:gpr32 = SUBSWrr %reg0, %cst, implicit-def $nzcv
; CHECK: %cmp:gpr32 = CSINCWr $wzr, $wzr, 1, implicit $nzcv
; CHECK: $w0 = COPY %cmp
; CHECK: RET_ReallyLR implicit $w0
; CHECK-NEXT: {{ $}}
; CHECK-NEXT: %reg0:gpr32 = COPY $w0
; CHECK-NEXT: %cst:gpr32 = MOVi32imm -5000
; CHECK-NEXT: [[SUBSWrr:%[0-9]+]]:gpr32 = SUBSWrr %reg0, %cst, implicit-def $nzcv
; CHECK-NEXT: %cmp:gpr32 = CSINCWr $wzr, $wzr, 1, implicit $nzcv
; CHECK-NEXT: $w0 = COPY %cmp
; CHECK-NEXT: RET_ReallyLR implicit $w0
%reg0:gpr(s32) = COPY $w0
%cst:gpr(s32) = G_CONSTANT i32 -5000
%cmp:gpr(s32) = G_ICMP intpred(eq), %reg0(s32), %cst
@ -194,12 +202,13 @@ body: |
; CHECK-LABEL: name: cmp_arith_extended_s64
; CHECK: liveins: $w0, $x1
; CHECK: %reg0:gpr32 = COPY $w0
; CHECK: %reg1:gpr64sp = COPY $x1
; CHECK: [[SUBSXrx:%[0-9]+]]:gpr64 = SUBSXrx %reg1, %reg0, 18, implicit-def $nzcv
; CHECK: %cmp:gpr32 = CSINCWr $wzr, $wzr, 9, implicit $nzcv
; CHECK: $w0 = COPY %cmp
; CHECK: RET_ReallyLR implicit $w0
; CHECK-NEXT: {{ $}}
; CHECK-NEXT: %reg0:gpr32 = COPY $w0
; CHECK-NEXT: %reg1:gpr64sp = COPY $x1
; CHECK-NEXT: [[SUBSXrx:%[0-9]+]]:gpr64 = SUBSXrx %reg1, %reg0, 18, implicit-def $nzcv
; CHECK-NEXT: %cmp:gpr32 = CSINCWr $wzr, $wzr, 9, implicit $nzcv
; CHECK-NEXT: $w0 = COPY %cmp
; CHECK-NEXT: RET_ReallyLR implicit $w0
%reg0:gpr(s32) = COPY $w0
%reg1:gpr(s64) = COPY $x1
%ext:gpr(s64) = G_ZEXT %reg0(s32)
@ -221,14 +230,15 @@ body: |
; CHECK-LABEL: name: cmp_arith_extended_s32
; CHECK: liveins: $w0, $w1, $h0
; CHECK: [[SUBREG_TO_REG:%[0-9]+]]:fpr32 = SUBREG_TO_REG 0, $h0, %subreg.hsub
; CHECK: %reg0:gpr32all = COPY [[SUBREG_TO_REG]]
; CHECK: %reg1:gpr32sp = COPY $w1
; CHECK: [[COPY:%[0-9]+]]:gpr32 = COPY %reg0
; CHECK: [[SUBSWrx:%[0-9]+]]:gpr32 = SUBSWrx %reg1, [[COPY]], 10, implicit-def $nzcv
; CHECK: %cmp:gpr32 = CSINCWr $wzr, $wzr, 9, implicit $nzcv
; CHECK: $w0 = COPY %cmp
; CHECK: RET_ReallyLR implicit $w0
; CHECK-NEXT: {{ $}}
; CHECK-NEXT: [[SUBREG_TO_REG:%[0-9]+]]:fpr32 = SUBREG_TO_REG 0, $h0, %subreg.hsub
; CHECK-NEXT: %reg0:gpr32all = COPY [[SUBREG_TO_REG]]
; CHECK-NEXT: %reg1:gpr32sp = COPY $w1
; CHECK-NEXT: [[COPY:%[0-9]+]]:gpr32 = COPY %reg0
; CHECK-NEXT: [[SUBSWrx:%[0-9]+]]:gpr32 = SUBSWrx %reg1, [[COPY]], 10, implicit-def $nzcv
; CHECK-NEXT: %cmp:gpr32 = CSINCWr $wzr, $wzr, 9, implicit $nzcv
; CHECK-NEXT: $w0 = COPY %cmp
; CHECK-NEXT: RET_ReallyLR implicit $w0
%reg0:gpr(s16) = COPY $h0
%reg1:gpr(s32) = COPY $w1
%ext:gpr(s32) = G_ZEXT %reg0(s16)
@ -252,14 +262,15 @@ body: |
; CHECK-LABEL: name: cmp_arith_extended_shl_too_large
; CHECK: liveins: $w0, $x1
; CHECK: %reg0:gpr32 = COPY $w0
; CHECK: %reg1:gpr64 = COPY $x1
; CHECK: [[ORRWrs:%[0-9]+]]:gpr32 = ORRWrs $wzr, %reg0, 0
; CHECK: %ext:gpr64 = SUBREG_TO_REG 0, [[ORRWrs]], %subreg.sub_32
; CHECK: [[SUBSXrs:%[0-9]+]]:gpr64 = SUBSXrs %reg1, %ext, 5, implicit-def $nzcv
; CHECK: %cmp:gpr32 = CSINCWr $wzr, $wzr, 9, implicit $nzcv
; CHECK: $w0 = COPY %cmp
; CHECK: RET_ReallyLR implicit $w0
; CHECK-NEXT: {{ $}}
; CHECK-NEXT: %reg0:gpr32 = COPY $w0
; CHECK-NEXT: %reg1:gpr64 = COPY $x1
; CHECK-NEXT: [[ORRWrs:%[0-9]+]]:gpr32 = ORRWrs $wzr, %reg0, 0
; CHECK-NEXT: %ext:gpr64 = SUBREG_TO_REG 0, [[ORRWrs]], %subreg.sub_32
; CHECK-NEXT: [[SUBSXrs:%[0-9]+]]:gpr64 = SUBSXrs %reg1, %ext, 5, implicit-def $nzcv
; CHECK-NEXT: %cmp:gpr32 = CSINCWr $wzr, $wzr, 9, implicit $nzcv
; CHECK-NEXT: $w0 = COPY %cmp
; CHECK-NEXT: RET_ReallyLR implicit $w0
%reg0:gpr(s32) = COPY $w0
%reg1:gpr(s64) = COPY $x1
%ext:gpr(s64) = G_ZEXT %reg0(s32)
@ -284,13 +295,14 @@ body: |
; CHECK-LABEL: name: cmp_add_rhs
; CHECK: liveins: $w0, $w1, $w2
; CHECK: %cmp_lhs:gpr32 = COPY $w0
; CHECK: %cmp_rhs:gpr32 = COPY $w1
; CHECK: %add_rhs:gpr32 = COPY $w2
; CHECK: [[SUBSWrr:%[0-9]+]]:gpr32 = SUBSWrr %cmp_lhs, %cmp_rhs, implicit-def $nzcv
; CHECK: %add:gpr32 = CSINCWr %add_rhs, %add_rhs, 1, implicit $nzcv
; CHECK: $w0 = COPY %add
; CHECK: RET_ReallyLR implicit $w0
; CHECK-NEXT: {{ $}}
; CHECK-NEXT: %cmp_lhs:gpr32 = COPY $w0
; CHECK-NEXT: %cmp_rhs:gpr32 = COPY $w1
; CHECK-NEXT: %add_rhs:gpr32 = COPY $w2
; CHECK-NEXT: [[SUBSWrr:%[0-9]+]]:gpr32 = SUBSWrr %cmp_lhs, %cmp_rhs, implicit-def $nzcv
; CHECK-NEXT: %add:gpr32 = CSINCWr %add_rhs, %add_rhs, 1, implicit $nzcv
; CHECK-NEXT: $w0 = COPY %add
; CHECK-NEXT: RET_ReallyLR implicit $w0
%cmp_lhs:gpr(s32) = COPY $w0
%cmp_rhs:gpr(s32) = COPY $w1
%add_rhs:gpr(s32) = COPY $w2
@ -314,13 +326,14 @@ body: |
; CHECK-LABEL: name: cmp_add_lhs
; CHECK: liveins: $w0, $w1, $w2
; CHECK: %cmp_lhs:gpr32 = COPY $w0
; CHECK: %cmp_rhs:gpr32 = COPY $w1
; CHECK: %add_lhs:gpr32 = COPY $w2
; CHECK: [[SUBSWrr:%[0-9]+]]:gpr32 = SUBSWrr %cmp_lhs, %cmp_rhs, implicit-def $nzcv
; CHECK: %add:gpr32 = CSINCWr %add_lhs, %add_lhs, 1, implicit $nzcv
; CHECK: $w0 = COPY %add
; CHECK: RET_ReallyLR implicit $w0
; CHECK-NEXT: {{ $}}
; CHECK-NEXT: %cmp_lhs:gpr32 = COPY $w0
; CHECK-NEXT: %cmp_rhs:gpr32 = COPY $w1
; CHECK-NEXT: %add_lhs:gpr32 = COPY $w2
; CHECK-NEXT: [[SUBSWrr:%[0-9]+]]:gpr32 = SUBSWrr %cmp_lhs, %cmp_rhs, implicit-def $nzcv
; CHECK-NEXT: %add:gpr32 = CSINCWr %add_lhs, %add_lhs, 1, implicit $nzcv
; CHECK-NEXT: $w0 = COPY %add
; CHECK-NEXT: RET_ReallyLR implicit $w0
%cmp_lhs:gpr(s32) = COPY $w0
%cmp_rhs:gpr(s32) = COPY $w1
%add_lhs:gpr(s32) = COPY $w2
@ -344,13 +357,14 @@ body: |
; CHECK-LABEL: name: cmp_add_lhs_vector
; CHECK: liveins: $q0, $q1, $q2
; CHECK: %cmp_lhs:fpr128 = COPY $q0
; CHECK: %cmp_rhs:fpr128 = COPY $q1
; CHECK: %add_lhs:fpr128 = COPY $q2
; CHECK: [[CMEQv4i32_:%[0-9]+]]:fpr128 = CMEQv4i32 %cmp_lhs, %cmp_rhs
; CHECK: %add:fpr128 = ADDv4i32 %add_lhs, [[CMEQv4i32_]]
; CHECK: $q0 = COPY %add
; CHECK: RET_ReallyLR implicit $q0
; CHECK-NEXT: {{ $}}
; CHECK-NEXT: %cmp_lhs:fpr128 = COPY $q0
; CHECK-NEXT: %cmp_rhs:fpr128 = COPY $q1
; CHECK-NEXT: %add_lhs:fpr128 = COPY $q2
; CHECK-NEXT: [[CMEQv4i32_:%[0-9]+]]:fpr128 = CMEQv4i32 %cmp_lhs, %cmp_rhs
; CHECK-NEXT: %add:fpr128 = ADDv4i32 %add_lhs, [[CMEQv4i32_]]
; CHECK-NEXT: $q0 = COPY %add
; CHECK-NEXT: RET_ReallyLR implicit $q0
%cmp_lhs:fpr(<4 x s32>) = COPY $q0
%cmp_rhs:fpr(<4 x s32>) = COPY $q1
%add_lhs:fpr(<4 x s32>) = COPY $q2
@ -358,3 +372,108 @@ body: |
%add:fpr(<4 x s32>) = G_ADD %add_lhs, %cmp
$q0 = COPY %add(<4 x s32>)
RET_ReallyLR implicit $q0
...
---
name: cmp_add_rhs_64
legalized: true
regBankSelected: true
tracksRegLiveness: true
machineFunctionInfo: {}
body: |
bb.0:
liveins: $x0, $x1, $x2
; The CSINC should use the add's RHS.
; CHECK-LABEL: name: cmp_add_rhs_64
; CHECK: liveins: $x0, $x1, $x2
; CHECK-NEXT: {{ $}}
; CHECK-NEXT: %cmp_lhs:gpr64 = COPY $x0
; CHECK-NEXT: %cmp_rhs:gpr64 = COPY $x1
; CHECK-NEXT: %add_rhs:gpr64 = COPY $x2
; CHECK-NEXT: [[SUBSXrr:%[0-9]+]]:gpr64 = SUBSXrr %cmp_lhs, %cmp_rhs, implicit-def $nzcv
; CHECK-NEXT: %add:gpr64 = CSINCXr %add_rhs, %add_rhs, 1, implicit $nzcv
; CHECK-NEXT: $x0 = COPY %add
; CHECK-NEXT: RET_ReallyLR implicit $x0
%cmp_lhs:gpr(s64) = COPY $x0
%cmp_rhs:gpr(s64) = COPY $x1
%add_rhs:gpr(s64) = COPY $x2
%cmp:gpr(s32) = G_ICMP intpred(eq), %cmp_lhs(s64), %cmp_rhs
%cmp_ext:gpr(s64) = G_ZEXT %cmp
%add:gpr(s64) = G_ADD %cmp_ext, %add_rhs
$x0 = COPY %add(s64)
RET_ReallyLR implicit $x0
...
---
name: cmp_add_rhs_64_zext_multi_use
legalized: true
regBankSelected: true
tracksRegLiveness: true
machineFunctionInfo: {}
body: |
bb.0:
liveins: $x0, $x1, $x2
; The ZExt is used more than once so don't fold.
; CHECK-LABEL: name: cmp_add_rhs_64_zext_multi_use
; CHECK: liveins: $x0, $x1, $x2
; CHECK-NEXT: {{ $}}
; CHECK-NEXT: %cmp_lhs:gpr64 = COPY $x0
; CHECK-NEXT: %cmp_rhs:gpr64 = COPY $x1
; CHECK-NEXT: %add_rhs:gpr64 = COPY $x2
; CHECK-NEXT: [[SUBSXrr:%[0-9]+]]:gpr64 = SUBSXrr %cmp_lhs, %cmp_rhs, implicit-def $nzcv
; CHECK-NEXT: %cmp:gpr32 = CSINCWr $wzr, $wzr, 1, implicit $nzcv
; CHECK-NEXT: %cmp_ext:gpr64 = SUBREG_TO_REG 0, %cmp, %subreg.sub_32
; CHECK-NEXT: %add:gpr64 = ADDXrr %cmp_ext, %add_rhs
; CHECK-NEXT: %or:gpr64 = ORRXrr %add, %cmp_ext
; CHECK-NEXT: $x0 = COPY %or
; CHECK-NEXT: RET_ReallyLR implicit $x0
%cmp_lhs:gpr(s64) = COPY $x0
%cmp_rhs:gpr(s64) = COPY $x1
%add_rhs:gpr(s64) = COPY $x2
%cmp:gpr(s32) = G_ICMP intpred(eq), %cmp_lhs(s64), %cmp_rhs
%cmp_ext:gpr(s64) = G_ZEXT %cmp
%add:gpr(s64) = G_ADD %cmp_ext, %add_rhs
%or:gpr(s64) = G_OR %add, %cmp_ext
$x0 = COPY %or(s64)
RET_ReallyLR implicit $x0
...
---
name: cmp_add_rhs_64_cmp_multi_use
legalized: true
regBankSelected: true
tracksRegLiveness: true
machineFunctionInfo: {}
body: |
bb.0:
liveins: $x0, $x1, $x2
; The cmp is used more than once so don't fold.
; CHECK-LABEL: name: cmp_add_rhs_64_cmp_multi_use
; CHECK: liveins: $x0, $x1, $x2
; CHECK-NEXT: {{ $}}
; CHECK-NEXT: %cmp_lhs:gpr64 = COPY $x0
; CHECK-NEXT: %cmp_rhs:gpr64 = COPY $x1
; CHECK-NEXT: %add_rhs:gpr64 = COPY $x2
; CHECK-NEXT: [[SUBSXrr:%[0-9]+]]:gpr64 = SUBSXrr %cmp_lhs, %cmp_rhs, implicit-def $nzcv
; CHECK-NEXT: %cmp:gpr32 = CSINCWr $wzr, $wzr, 1, implicit $nzcv
; CHECK-NEXT: %cmp_ext:gpr64 = SUBREG_TO_REG 0, %cmp, %subreg.sub_32
; CHECK-NEXT: %add:gpr64 = ADDXrr %cmp_ext, %add_rhs
; CHECK-NEXT: [[DEF:%[0-9]+]]:gpr64all = IMPLICIT_DEF
; CHECK-NEXT: [[INSERT_SUBREG:%[0-9]+]]:gpr64 = INSERT_SUBREG [[DEF]], %cmp, %subreg.sub_32
; CHECK-NEXT: %cmp_ext2:gpr64 = SBFMXri [[INSERT_SUBREG]], 0, 31
; CHECK-NEXT: %or:gpr64 = ORRXrr %add, %cmp_ext2
; CHECK-NEXT: $x0 = COPY %or
; CHECK-NEXT: RET_ReallyLR implicit $x0
%cmp_lhs:gpr(s64) = COPY $x0
%cmp_rhs:gpr(s64) = COPY $x1
%add_rhs:gpr(s64) = COPY $x2
%cmp:gpr(s32) = G_ICMP intpred(eq), %cmp_lhs(s64), %cmp_rhs
%cmp_ext:gpr(s64) = G_ZEXT %cmp
%add:gpr(s64) = G_ADD %cmp_ext, %add_rhs
%cmp_ext2:gpr(s64) = G_SEXT %cmp
%or:gpr(s64) = G_OR %add, %cmp_ext2
$x0 = COPY %or(s64)
RET_ReallyLR implicit $x0