forked from OSchip/llvm-project
[AArch64][GlobalISel] Fold 64-bit cmps with 64-bit adds
G_ICMP is selected to an arithmetic overflow op (ADDS/SUBS/etc) with a dead destination + a CSINC instruction. We have a fold which allows us to combine 32-bit adds with G_ICMP. The problem with G_ICMP is that we model it as always having a 32-bit destination even though it can be a 64-bit operation. So, we were missing some opportunities for 64-bit folds. This patch teaches the fold to recognize 64-bit G_ICMPs + refactors some of the code surrounding CSINC accordingly. (Later down the line, I think we should probably change the way we handle G_ICMP in general.) Differential Revision: https://reviews.llvm.org/D111088
This commit is contained in:
parent
2ba572a82a
commit
5dc339d982
|
@ -276,13 +276,9 @@ private:
|
|||
const RegisterBank &DstRB, LLT ScalarTy,
|
||||
Register VecReg, unsigned LaneIdx,
|
||||
MachineIRBuilder &MIRBuilder) const;
|
||||
|
||||
/// Emit a CSet for an integer compare.
|
||||
///
|
||||
/// \p DefReg and \p SrcReg are expected to be 32-bit scalar registers.
|
||||
MachineInstr *emitCSetForICMP(Register DefReg, unsigned Pred,
|
||||
MachineIRBuilder &MIRBuilder,
|
||||
Register SrcReg = AArch64::WZR) const;
|
||||
MachineInstr *emitCSINC(Register Dst, Register Src1, Register Src2,
|
||||
AArch64CC::CondCode Pred,
|
||||
MachineIRBuilder &MIRBuilder) const;
|
||||
/// Emit a CSet for a FP compare.
|
||||
///
|
||||
/// \p Dst is expected to be a 32-bit scalar register.
|
||||
|
@ -2213,27 +2209,55 @@ bool AArch64InstructionSelector::earlySelect(MachineInstr &I) {
|
|||
// fold the add into the cset for the cmp by using cinc.
|
||||
//
|
||||
// FIXME: This would probably be a lot nicer in PostLegalizerLowering.
|
||||
Register X = I.getOperand(1).getReg();
|
||||
|
||||
// Only handle scalars. Scalar G_ICMP is only legal for s32, so bail out
|
||||
// early if we see it.
|
||||
LLT Ty = MRI.getType(X);
|
||||
if (Ty.isVector() || Ty.getSizeInBits() != 32)
|
||||
Register AddDst = I.getOperand(0).getReg();
|
||||
Register AddLHS = I.getOperand(1).getReg();
|
||||
Register AddRHS = I.getOperand(2).getReg();
|
||||
// Only handle scalars.
|
||||
LLT Ty = MRI.getType(AddLHS);
|
||||
if (Ty.isVector())
|
||||
return false;
|
||||
|
||||
Register CmpReg = I.getOperand(2).getReg();
|
||||
MachineInstr *Cmp = getOpcodeDef(TargetOpcode::G_ICMP, CmpReg, MRI);
|
||||
// Since G_ICMP is modeled as ADDS/SUBS/ANDS, we can handle 32 bits or 64
|
||||
// bits.
|
||||
unsigned Size = Ty.getSizeInBits();
|
||||
if (Size != 32 && Size != 64)
|
||||
return false;
|
||||
auto MatchCmp = [&](Register Reg) -> MachineInstr * {
|
||||
if (!MRI.hasOneNonDBGUse(Reg))
|
||||
return nullptr;
|
||||
// If the LHS of the add is 32 bits, then we want to fold a 32-bit
|
||||
// compare.
|
||||
if (Size == 32)
|
||||
return getOpcodeDef(TargetOpcode::G_ICMP, Reg, MRI);
|
||||
// We model scalar compares using 32-bit destinations right now.
|
||||
// If it's a 64-bit compare, it'll have 64-bit sources.
|
||||
Register ZExt;
|
||||
if (!mi_match(Reg, MRI,
|
||||
m_OneNonDBGUse(m_GZExt(m_OneNonDBGUse(m_Reg(ZExt))))))
|
||||
return nullptr;
|
||||
auto *Cmp = getOpcodeDef(TargetOpcode::G_ICMP, ZExt, MRI);
|
||||
if (!Cmp ||
|
||||
MRI.getType(Cmp->getOperand(2).getReg()).getSizeInBits() != 64)
|
||||
return nullptr;
|
||||
return Cmp;
|
||||
};
|
||||
// Try to match
|
||||
// z + (cmp pred, x, y)
|
||||
MachineInstr *Cmp = MatchCmp(AddRHS);
|
||||
if (!Cmp) {
|
||||
std::swap(X, CmpReg);
|
||||
Cmp = getOpcodeDef(TargetOpcode::G_ICMP, CmpReg, MRI);
|
||||
// (cmp pred, x, y) + z
|
||||
std::swap(AddLHS, AddRHS);
|
||||
Cmp = MatchCmp(AddRHS);
|
||||
if (!Cmp)
|
||||
return false;
|
||||
}
|
||||
auto Pred =
|
||||
static_cast<CmpInst::Predicate>(Cmp->getOperand(1).getPredicate());
|
||||
emitIntegerCompare(Cmp->getOperand(2), Cmp->getOperand(3),
|
||||
Cmp->getOperand(1), MIB);
|
||||
emitCSetForICMP(I.getOperand(0).getReg(), Pred, MIB, X);
|
||||
auto &PredOp = Cmp->getOperand(1);
|
||||
auto Pred = static_cast<CmpInst::Predicate>(PredOp.getPredicate());
|
||||
const AArch64CC::CondCode InvCC =
|
||||
changeICMPPredToAArch64CC(CmpInst::getInversePredicate(Pred));
|
||||
MIB.setInstrAndDebugLoc(I);
|
||||
emitIntegerCompare(/*LHS=*/Cmp->getOperand(2),
|
||||
/*RHS=*/Cmp->getOperand(3), PredOp, MIB);
|
||||
emitCSINC(/*Dst=*/AddDst, /*Src =*/AddLHS, /*Src2=*/AddLHS, InvCC, MIB);
|
||||
I.eraseFromParent();
|
||||
return true;
|
||||
}
|
||||
|
@ -2963,10 +2987,8 @@ bool AArch64InstructionSelector::select(MachineInstr &I) {
|
|||
// false, so to get the increment when it's true, we need to use the
|
||||
// inverse. In this case, we want to increment when carry is set.
|
||||
Register ZReg = AArch64::WZR;
|
||||
auto CsetMI = MIB.buildInstr(AArch64::CSINCWr, {I.getOperand(1).getReg()},
|
||||
{ZReg, ZReg})
|
||||
.addImm(getInvertedCondCode(OpAndCC.second));
|
||||
constrainSelectedInstRegOperands(*CsetMI, TII, TRI, RBI);
|
||||
emitCSINC(/*Dst=*/I.getOperand(1).getReg(), /*Src1=*/ZReg, /*Src2=*/ZReg,
|
||||
getInvertedCondCode(OpAndCC.second), MIB);
|
||||
I.eraseFromParent();
|
||||
return true;
|
||||
}
|
||||
|
@ -3303,9 +3325,11 @@ bool AArch64InstructionSelector::select(MachineInstr &I) {
|
|||
}
|
||||
|
||||
auto Pred = static_cast<CmpInst::Predicate>(I.getOperand(1).getPredicate());
|
||||
emitIntegerCompare(I.getOperand(2), I.getOperand(3), I.getOperand(1),
|
||||
MIB);
|
||||
emitCSetForICMP(I.getOperand(0).getReg(), Pred, MIB);
|
||||
const AArch64CC::CondCode InvCC =
|
||||
changeICMPPredToAArch64CC(CmpInst::getInversePredicate(Pred));
|
||||
emitIntegerCompare(I.getOperand(2), I.getOperand(3), I.getOperand(1), MIB);
|
||||
emitCSINC(/*Dst=*/I.getOperand(0).getReg(), /*Src1=*/AArch64::WZR,
|
||||
/*Src2=*/AArch64::WZR, InvCC, MIB);
|
||||
I.eraseFromParent();
|
||||
return true;
|
||||
}
|
||||
|
@ -4451,25 +4475,19 @@ MachineInstr *AArch64InstructionSelector::emitCSetForFCmp(
|
|||
assert(!Ty.isVector() && Ty.getSizeInBits() == 32 &&
|
||||
"Expected a 32-bit scalar register?");
|
||||
#endif
|
||||
const Register ZeroReg = AArch64::WZR;
|
||||
auto EmitCSet = [&](Register CsetDst, AArch64CC::CondCode CC) {
|
||||
auto CSet =
|
||||
MIRBuilder.buildInstr(AArch64::CSINCWr, {CsetDst}, {ZeroReg, ZeroReg})
|
||||
.addImm(getInvertedCondCode(CC));
|
||||
constrainSelectedInstRegOperands(*CSet, TII, TRI, RBI);
|
||||
return &*CSet;
|
||||
};
|
||||
|
||||
const Register ZReg = AArch64::WZR;
|
||||
AArch64CC::CondCode CC1, CC2;
|
||||
changeFCMPPredToAArch64CC(Pred, CC1, CC2);
|
||||
auto InvCC1 = AArch64CC::getInvertedCondCode(CC1);
|
||||
if (CC2 == AArch64CC::AL)
|
||||
return EmitCSet(Dst, CC1);
|
||||
|
||||
return emitCSINC(/*Dst=*/Dst, /*Src1=*/ZReg, /*Src2=*/ZReg, InvCC1,
|
||||
MIRBuilder);
|
||||
const TargetRegisterClass *RC = &AArch64::GPR32RegClass;
|
||||
Register Def1Reg = MRI.createVirtualRegister(RC);
|
||||
Register Def2Reg = MRI.createVirtualRegister(RC);
|
||||
EmitCSet(Def1Reg, CC1);
|
||||
EmitCSet(Def2Reg, CC2);
|
||||
auto InvCC2 = AArch64CC::getInvertedCondCode(CC2);
|
||||
emitCSINC(/*Dst=*/Def1Reg, /*Src1=*/ZReg, /*Src2=*/ZReg, InvCC1, MIRBuilder);
|
||||
emitCSINC(/*Dst=*/Def2Reg, /*Src1=*/ZReg, /*Src2=*/ZReg, InvCC2, MIRBuilder);
|
||||
auto OrMI = MIRBuilder.buildInstr(AArch64::ORRWrr, {Dst}, {Def1Reg, Def2Reg});
|
||||
constrainSelectedInstRegOperands(*OrMI, TII, TRI, RBI);
|
||||
return &*OrMI;
|
||||
|
@ -4578,16 +4596,25 @@ MachineInstr *AArch64InstructionSelector::emitVectorConcat(
|
|||
}
|
||||
|
||||
MachineInstr *
|
||||
AArch64InstructionSelector::emitCSetForICMP(Register DefReg, unsigned Pred,
|
||||
MachineIRBuilder &MIRBuilder,
|
||||
Register SrcReg) const {
|
||||
// CSINC increments the result when the predicate is false. Invert it.
|
||||
const AArch64CC::CondCode InvCC = changeICMPPredToAArch64CC(
|
||||
CmpInst::getInversePredicate((CmpInst::Predicate)Pred));
|
||||
auto I = MIRBuilder.buildInstr(AArch64::CSINCWr, {DefReg}, {SrcReg, SrcReg})
|
||||
.addImm(InvCC);
|
||||
constrainSelectedInstRegOperands(*I, TII, TRI, RBI);
|
||||
return &*I;
|
||||
AArch64InstructionSelector::emitCSINC(Register Dst, Register Src1,
|
||||
Register Src2, AArch64CC::CondCode Pred,
|
||||
MachineIRBuilder &MIRBuilder) const {
|
||||
auto &MRI = *MIRBuilder.getMRI();
|
||||
const RegClassOrRegBank &RegClassOrBank = MRI.getRegClassOrRegBank(Dst);
|
||||
// If we used a register class, then this won't necessarily have an LLT.
|
||||
// Compute the size based off whether or not we have a class or bank.
|
||||
unsigned Size;
|
||||
if (const auto *RC = RegClassOrBank.dyn_cast<const TargetRegisterClass *>())
|
||||
Size = TRI.getRegSizeInBits(*RC);
|
||||
else
|
||||
Size = MRI.getType(Dst).getSizeInBits();
|
||||
// Some opcodes use s1.
|
||||
assert(Size <= 64 && "Expected 64 bits or less only!");
|
||||
static const unsigned OpcTable[2] = {AArch64::CSINCWr, AArch64::CSINCXr};
|
||||
unsigned Opc = OpcTable[Size == 64];
|
||||
auto CSINC = MIRBuilder.buildInstr(Opc, {Dst}, {Src1, Src2}).addImm(Pred);
|
||||
constrainSelectedInstRegOperands(*CSINC, TII, TRI, RBI);
|
||||
return &*CSINC;
|
||||
}
|
||||
|
||||
std::pair<MachineInstr *, AArch64CC::CondCode>
|
||||
|
|
|
@ -11,11 +11,12 @@ body: |
|
|||
|
||||
; CHECK-LABEL: name: cmp_imm_32
|
||||
; CHECK: liveins: $w0
|
||||
; CHECK: [[COPY:%[0-9]+]]:gpr32sp = COPY $w0
|
||||
; CHECK: [[SUBSWri:%[0-9]+]]:gpr32 = SUBSWri [[COPY]], 42, 0, implicit-def $nzcv
|
||||
; CHECK: [[CSINCWr:%[0-9]+]]:gpr32 = CSINCWr $wzr, $wzr, 1, implicit $nzcv
|
||||
; CHECK: $w0 = COPY [[CSINCWr]]
|
||||
; CHECK: RET_ReallyLR implicit $w0
|
||||
; CHECK-NEXT: {{ $}}
|
||||
; CHECK-NEXT: [[COPY:%[0-9]+]]:gpr32sp = COPY $w0
|
||||
; CHECK-NEXT: [[SUBSWri:%[0-9]+]]:gpr32 = SUBSWri [[COPY]], 42, 0, implicit-def $nzcv
|
||||
; CHECK-NEXT: [[CSINCWr:%[0-9]+]]:gpr32 = CSINCWr $wzr, $wzr, 1, implicit $nzcv
|
||||
; CHECK-NEXT: $w0 = COPY [[CSINCWr]]
|
||||
; CHECK-NEXT: RET_ReallyLR implicit $w0
|
||||
%0:gpr(s32) = COPY $w0
|
||||
%1:gpr(s32) = G_CONSTANT i32 42
|
||||
%5:gpr(s32) = G_ICMP intpred(eq), %0(s32), %1
|
||||
|
@ -34,11 +35,12 @@ body: |
|
|||
|
||||
; CHECK-LABEL: name: cmp_imm_64
|
||||
; CHECK: liveins: $x0
|
||||
; CHECK: [[COPY:%[0-9]+]]:gpr64sp = COPY $x0
|
||||
; CHECK: [[SUBSXri:%[0-9]+]]:gpr64 = SUBSXri [[COPY]], 42, 0, implicit-def $nzcv
|
||||
; CHECK: [[CSINCWr:%[0-9]+]]:gpr32 = CSINCWr $wzr, $wzr, 1, implicit $nzcv
|
||||
; CHECK: $w0 = COPY [[CSINCWr]]
|
||||
; CHECK: RET_ReallyLR implicit $w0
|
||||
; CHECK-NEXT: {{ $}}
|
||||
; CHECK-NEXT: [[COPY:%[0-9]+]]:gpr64sp = COPY $x0
|
||||
; CHECK-NEXT: [[SUBSXri:%[0-9]+]]:gpr64 = SUBSXri [[COPY]], 42, 0, implicit-def $nzcv
|
||||
; CHECK-NEXT: [[CSINCWr:%[0-9]+]]:gpr32 = CSINCWr $wzr, $wzr, 1, implicit $nzcv
|
||||
; CHECK-NEXT: $w0 = COPY [[CSINCWr]]
|
||||
; CHECK-NEXT: RET_ReallyLR implicit $w0
|
||||
%0:gpr(s64) = COPY $x0
|
||||
%1:gpr(s64) = G_CONSTANT i64 42
|
||||
%5:gpr(s32) = G_ICMP intpred(eq), %0(s64), %1
|
||||
|
@ -57,13 +59,14 @@ body: |
|
|||
|
||||
; CHECK-LABEL: name: cmp_imm_out_of_range
|
||||
; CHECK: liveins: $x0
|
||||
; CHECK: [[COPY:%[0-9]+]]:gpr64 = COPY $x0
|
||||
; CHECK: [[MOVi32imm:%[0-9]+]]:gpr32 = MOVi32imm 13132
|
||||
; CHECK: [[SUBREG_TO_REG:%[0-9]+]]:gpr64 = SUBREG_TO_REG 0, [[MOVi32imm]], %subreg.sub_32
|
||||
; CHECK: [[SUBSXrr:%[0-9]+]]:gpr64 = SUBSXrr [[COPY]], [[SUBREG_TO_REG]], implicit-def $nzcv
|
||||
; CHECK: [[CSINCWr:%[0-9]+]]:gpr32 = CSINCWr $wzr, $wzr, 1, implicit $nzcv
|
||||
; CHECK: $w0 = COPY [[CSINCWr]]
|
||||
; CHECK: RET_ReallyLR implicit $w0
|
||||
; CHECK-NEXT: {{ $}}
|
||||
; CHECK-NEXT: [[COPY:%[0-9]+]]:gpr64 = COPY $x0
|
||||
; CHECK-NEXT: [[MOVi32imm:%[0-9]+]]:gpr32 = MOVi32imm 13132
|
||||
; CHECK-NEXT: [[SUBREG_TO_REG:%[0-9]+]]:gpr64 = SUBREG_TO_REG 0, [[MOVi32imm]], %subreg.sub_32
|
||||
; CHECK-NEXT: [[SUBSXrr:%[0-9]+]]:gpr64 = SUBSXrr [[COPY]], [[SUBREG_TO_REG]], implicit-def $nzcv
|
||||
; CHECK-NEXT: [[CSINCWr:%[0-9]+]]:gpr32 = CSINCWr $wzr, $wzr, 1, implicit $nzcv
|
||||
; CHECK-NEXT: $w0 = COPY [[CSINCWr]]
|
||||
; CHECK-NEXT: RET_ReallyLR implicit $w0
|
||||
%0:gpr(s64) = COPY $x0
|
||||
%1:gpr(s64) = G_CONSTANT i64 13132
|
||||
%5:gpr(s32) = G_ICMP intpred(eq), %0(s64), %1
|
||||
|
@ -81,11 +84,12 @@ body: |
|
|||
liveins: $w0
|
||||
; CHECK-LABEL: name: cmp_imm_lookthrough
|
||||
; CHECK: liveins: $w0
|
||||
; CHECK: [[COPY:%[0-9]+]]:gpr32sp = COPY $w0
|
||||
; CHECK: [[SUBSWri:%[0-9]+]]:gpr32 = SUBSWri [[COPY]], 42, 0, implicit-def $nzcv
|
||||
; CHECK: [[CSINCWr:%[0-9]+]]:gpr32 = CSINCWr $wzr, $wzr, 1, implicit $nzcv
|
||||
; CHECK: $w0 = COPY [[CSINCWr]]
|
||||
; CHECK: RET_ReallyLR implicit $w0
|
||||
; CHECK-NEXT: {{ $}}
|
||||
; CHECK-NEXT: [[COPY:%[0-9]+]]:gpr32sp = COPY $w0
|
||||
; CHECK-NEXT: [[SUBSWri:%[0-9]+]]:gpr32 = SUBSWri [[COPY]], 42, 0, implicit-def $nzcv
|
||||
; CHECK-NEXT: [[CSINCWr:%[0-9]+]]:gpr32 = CSINCWr $wzr, $wzr, 1, implicit $nzcv
|
||||
; CHECK-NEXT: $w0 = COPY [[CSINCWr]]
|
||||
; CHECK-NEXT: RET_ReallyLR implicit $w0
|
||||
%0:gpr(s32) = COPY $w0
|
||||
%1:gpr(s64) = G_CONSTANT i64 42
|
||||
%2:gpr(s32) = G_TRUNC %1(s64)
|
||||
|
@ -104,11 +108,12 @@ body: |
|
|||
liveins: $w0
|
||||
; CHECK-LABEL: name: cmp_imm_lookthrough_bad_trunc
|
||||
; CHECK: liveins: $w0
|
||||
; CHECK: [[COPY:%[0-9]+]]:gpr32sp = COPY $w0
|
||||
; CHECK: [[SUBSWri:%[0-9]+]]:gpr32 = SUBSWri [[COPY]], 0, 0, implicit-def $nzcv
|
||||
; CHECK: [[CSINCWr:%[0-9]+]]:gpr32 = CSINCWr $wzr, $wzr, 1, implicit $nzcv
|
||||
; CHECK: $w0 = COPY [[CSINCWr]]
|
||||
; CHECK: RET_ReallyLR implicit $w0
|
||||
; CHECK-NEXT: {{ $}}
|
||||
; CHECK-NEXT: [[COPY:%[0-9]+]]:gpr32sp = COPY $w0
|
||||
; CHECK-NEXT: [[SUBSWri:%[0-9]+]]:gpr32 = SUBSWri [[COPY]], 0, 0, implicit-def $nzcv
|
||||
; CHECK-NEXT: [[CSINCWr:%[0-9]+]]:gpr32 = CSINCWr $wzr, $wzr, 1, implicit $nzcv
|
||||
; CHECK-NEXT: $w0 = COPY [[CSINCWr]]
|
||||
; CHECK-NEXT: RET_ReallyLR implicit $w0
|
||||
%0:gpr(s32) = COPY $w0
|
||||
%1:gpr(s64) = G_CONSTANT i64 68719476736 ; 0x1000000000
|
||||
%2:gpr(s32) = G_TRUNC %1(s64) ; Value truncates to 0
|
||||
|
@ -127,11 +132,12 @@ body: |
|
|||
liveins: $w0
|
||||
; CHECK-LABEL: name: cmp_neg_imm_32
|
||||
; CHECK: liveins: $w0
|
||||
; CHECK: %reg0:gpr32sp = COPY $w0
|
||||
; CHECK: [[ADDSWri:%[0-9]+]]:gpr32 = ADDSWri %reg0, 10, 0, implicit-def $nzcv
|
||||
; CHECK: %cmp:gpr32 = CSINCWr $wzr, $wzr, 1, implicit $nzcv
|
||||
; CHECK: $w0 = COPY %cmp
|
||||
; CHECK: RET_ReallyLR implicit $w0
|
||||
; CHECK-NEXT: {{ $}}
|
||||
; CHECK-NEXT: %reg0:gpr32sp = COPY $w0
|
||||
; CHECK-NEXT: [[ADDSWri:%[0-9]+]]:gpr32 = ADDSWri %reg0, 10, 0, implicit-def $nzcv
|
||||
; CHECK-NEXT: %cmp:gpr32 = CSINCWr $wzr, $wzr, 1, implicit $nzcv
|
||||
; CHECK-NEXT: $w0 = COPY %cmp
|
||||
; CHECK-NEXT: RET_ReallyLR implicit $w0
|
||||
%reg0:gpr(s32) = COPY $w0
|
||||
%cst:gpr(s32) = G_CONSTANT i32 -10
|
||||
%cmp:gpr(s32) = G_ICMP intpred(eq), %reg0(s32), %cst
|
||||
|
@ -149,11 +155,12 @@ body: |
|
|||
liveins: $x0
|
||||
; CHECK-LABEL: name: cmp_neg_imm_64
|
||||
; CHECK: liveins: $x0
|
||||
; CHECK: %reg0:gpr64sp = COPY $x0
|
||||
; CHECK: [[ADDSXri:%[0-9]+]]:gpr64 = ADDSXri %reg0, 10, 0, implicit-def $nzcv
|
||||
; CHECK: %cmp:gpr32 = CSINCWr $wzr, $wzr, 1, implicit $nzcv
|
||||
; CHECK: $w0 = COPY %cmp
|
||||
; CHECK: RET_ReallyLR implicit $w0
|
||||
; CHECK-NEXT: {{ $}}
|
||||
; CHECK-NEXT: %reg0:gpr64sp = COPY $x0
|
||||
; CHECK-NEXT: [[ADDSXri:%[0-9]+]]:gpr64 = ADDSXri %reg0, 10, 0, implicit-def $nzcv
|
||||
; CHECK-NEXT: %cmp:gpr32 = CSINCWr $wzr, $wzr, 1, implicit $nzcv
|
||||
; CHECK-NEXT: $w0 = COPY %cmp
|
||||
; CHECK-NEXT: RET_ReallyLR implicit $w0
|
||||
%reg0:gpr(s64) = COPY $x0
|
||||
%cst:gpr(s64) = G_CONSTANT i64 -10
|
||||
%cmp:gpr(s32) = G_ICMP intpred(eq), %reg0(s64), %cst
|
||||
|
@ -171,12 +178,13 @@ body: |
|
|||
liveins: $w0
|
||||
; CHECK-LABEL: name: cmp_neg_imm_invalid
|
||||
; CHECK: liveins: $w0
|
||||
; CHECK: %reg0:gpr32 = COPY $w0
|
||||
; CHECK: %cst:gpr32 = MOVi32imm -5000
|
||||
; CHECK: [[SUBSWrr:%[0-9]+]]:gpr32 = SUBSWrr %reg0, %cst, implicit-def $nzcv
|
||||
; CHECK: %cmp:gpr32 = CSINCWr $wzr, $wzr, 1, implicit $nzcv
|
||||
; CHECK: $w0 = COPY %cmp
|
||||
; CHECK: RET_ReallyLR implicit $w0
|
||||
; CHECK-NEXT: {{ $}}
|
||||
; CHECK-NEXT: %reg0:gpr32 = COPY $w0
|
||||
; CHECK-NEXT: %cst:gpr32 = MOVi32imm -5000
|
||||
; CHECK-NEXT: [[SUBSWrr:%[0-9]+]]:gpr32 = SUBSWrr %reg0, %cst, implicit-def $nzcv
|
||||
; CHECK-NEXT: %cmp:gpr32 = CSINCWr $wzr, $wzr, 1, implicit $nzcv
|
||||
; CHECK-NEXT: $w0 = COPY %cmp
|
||||
; CHECK-NEXT: RET_ReallyLR implicit $w0
|
||||
%reg0:gpr(s32) = COPY $w0
|
||||
%cst:gpr(s32) = G_CONSTANT i32 -5000
|
||||
%cmp:gpr(s32) = G_ICMP intpred(eq), %reg0(s32), %cst
|
||||
|
@ -194,12 +202,13 @@ body: |
|
|||
|
||||
; CHECK-LABEL: name: cmp_arith_extended_s64
|
||||
; CHECK: liveins: $w0, $x1
|
||||
; CHECK: %reg0:gpr32 = COPY $w0
|
||||
; CHECK: %reg1:gpr64sp = COPY $x1
|
||||
; CHECK: [[SUBSXrx:%[0-9]+]]:gpr64 = SUBSXrx %reg1, %reg0, 18, implicit-def $nzcv
|
||||
; CHECK: %cmp:gpr32 = CSINCWr $wzr, $wzr, 9, implicit $nzcv
|
||||
; CHECK: $w0 = COPY %cmp
|
||||
; CHECK: RET_ReallyLR implicit $w0
|
||||
; CHECK-NEXT: {{ $}}
|
||||
; CHECK-NEXT: %reg0:gpr32 = COPY $w0
|
||||
; CHECK-NEXT: %reg1:gpr64sp = COPY $x1
|
||||
; CHECK-NEXT: [[SUBSXrx:%[0-9]+]]:gpr64 = SUBSXrx %reg1, %reg0, 18, implicit-def $nzcv
|
||||
; CHECK-NEXT: %cmp:gpr32 = CSINCWr $wzr, $wzr, 9, implicit $nzcv
|
||||
; CHECK-NEXT: $w0 = COPY %cmp
|
||||
; CHECK-NEXT: RET_ReallyLR implicit $w0
|
||||
%reg0:gpr(s32) = COPY $w0
|
||||
%reg1:gpr(s64) = COPY $x1
|
||||
%ext:gpr(s64) = G_ZEXT %reg0(s32)
|
||||
|
@ -221,14 +230,15 @@ body: |
|
|||
|
||||
; CHECK-LABEL: name: cmp_arith_extended_s32
|
||||
; CHECK: liveins: $w0, $w1, $h0
|
||||
; CHECK: [[SUBREG_TO_REG:%[0-9]+]]:fpr32 = SUBREG_TO_REG 0, $h0, %subreg.hsub
|
||||
; CHECK: %reg0:gpr32all = COPY [[SUBREG_TO_REG]]
|
||||
; CHECK: %reg1:gpr32sp = COPY $w1
|
||||
; CHECK: [[COPY:%[0-9]+]]:gpr32 = COPY %reg0
|
||||
; CHECK: [[SUBSWrx:%[0-9]+]]:gpr32 = SUBSWrx %reg1, [[COPY]], 10, implicit-def $nzcv
|
||||
; CHECK: %cmp:gpr32 = CSINCWr $wzr, $wzr, 9, implicit $nzcv
|
||||
; CHECK: $w0 = COPY %cmp
|
||||
; CHECK: RET_ReallyLR implicit $w0
|
||||
; CHECK-NEXT: {{ $}}
|
||||
; CHECK-NEXT: [[SUBREG_TO_REG:%[0-9]+]]:fpr32 = SUBREG_TO_REG 0, $h0, %subreg.hsub
|
||||
; CHECK-NEXT: %reg0:gpr32all = COPY [[SUBREG_TO_REG]]
|
||||
; CHECK-NEXT: %reg1:gpr32sp = COPY $w1
|
||||
; CHECK-NEXT: [[COPY:%[0-9]+]]:gpr32 = COPY %reg0
|
||||
; CHECK-NEXT: [[SUBSWrx:%[0-9]+]]:gpr32 = SUBSWrx %reg1, [[COPY]], 10, implicit-def $nzcv
|
||||
; CHECK-NEXT: %cmp:gpr32 = CSINCWr $wzr, $wzr, 9, implicit $nzcv
|
||||
; CHECK-NEXT: $w0 = COPY %cmp
|
||||
; CHECK-NEXT: RET_ReallyLR implicit $w0
|
||||
%reg0:gpr(s16) = COPY $h0
|
||||
%reg1:gpr(s32) = COPY $w1
|
||||
%ext:gpr(s32) = G_ZEXT %reg0(s16)
|
||||
|
@ -252,14 +262,15 @@ body: |
|
|||
|
||||
; CHECK-LABEL: name: cmp_arith_extended_shl_too_large
|
||||
; CHECK: liveins: $w0, $x1
|
||||
; CHECK: %reg0:gpr32 = COPY $w0
|
||||
; CHECK: %reg1:gpr64 = COPY $x1
|
||||
; CHECK: [[ORRWrs:%[0-9]+]]:gpr32 = ORRWrs $wzr, %reg0, 0
|
||||
; CHECK: %ext:gpr64 = SUBREG_TO_REG 0, [[ORRWrs]], %subreg.sub_32
|
||||
; CHECK: [[SUBSXrs:%[0-9]+]]:gpr64 = SUBSXrs %reg1, %ext, 5, implicit-def $nzcv
|
||||
; CHECK: %cmp:gpr32 = CSINCWr $wzr, $wzr, 9, implicit $nzcv
|
||||
; CHECK: $w0 = COPY %cmp
|
||||
; CHECK: RET_ReallyLR implicit $w0
|
||||
; CHECK-NEXT: {{ $}}
|
||||
; CHECK-NEXT: %reg0:gpr32 = COPY $w0
|
||||
; CHECK-NEXT: %reg1:gpr64 = COPY $x1
|
||||
; CHECK-NEXT: [[ORRWrs:%[0-9]+]]:gpr32 = ORRWrs $wzr, %reg0, 0
|
||||
; CHECK-NEXT: %ext:gpr64 = SUBREG_TO_REG 0, [[ORRWrs]], %subreg.sub_32
|
||||
; CHECK-NEXT: [[SUBSXrs:%[0-9]+]]:gpr64 = SUBSXrs %reg1, %ext, 5, implicit-def $nzcv
|
||||
; CHECK-NEXT: %cmp:gpr32 = CSINCWr $wzr, $wzr, 9, implicit $nzcv
|
||||
; CHECK-NEXT: $w0 = COPY %cmp
|
||||
; CHECK-NEXT: RET_ReallyLR implicit $w0
|
||||
%reg0:gpr(s32) = COPY $w0
|
||||
%reg1:gpr(s64) = COPY $x1
|
||||
%ext:gpr(s64) = G_ZEXT %reg0(s32)
|
||||
|
@ -284,13 +295,14 @@ body: |
|
|||
|
||||
; CHECK-LABEL: name: cmp_add_rhs
|
||||
; CHECK: liveins: $w0, $w1, $w2
|
||||
; CHECK: %cmp_lhs:gpr32 = COPY $w0
|
||||
; CHECK: %cmp_rhs:gpr32 = COPY $w1
|
||||
; CHECK: %add_rhs:gpr32 = COPY $w2
|
||||
; CHECK: [[SUBSWrr:%[0-9]+]]:gpr32 = SUBSWrr %cmp_lhs, %cmp_rhs, implicit-def $nzcv
|
||||
; CHECK: %add:gpr32 = CSINCWr %add_rhs, %add_rhs, 1, implicit $nzcv
|
||||
; CHECK: $w0 = COPY %add
|
||||
; CHECK: RET_ReallyLR implicit $w0
|
||||
; CHECK-NEXT: {{ $}}
|
||||
; CHECK-NEXT: %cmp_lhs:gpr32 = COPY $w0
|
||||
; CHECK-NEXT: %cmp_rhs:gpr32 = COPY $w1
|
||||
; CHECK-NEXT: %add_rhs:gpr32 = COPY $w2
|
||||
; CHECK-NEXT: [[SUBSWrr:%[0-9]+]]:gpr32 = SUBSWrr %cmp_lhs, %cmp_rhs, implicit-def $nzcv
|
||||
; CHECK-NEXT: %add:gpr32 = CSINCWr %add_rhs, %add_rhs, 1, implicit $nzcv
|
||||
; CHECK-NEXT: $w0 = COPY %add
|
||||
; CHECK-NEXT: RET_ReallyLR implicit $w0
|
||||
%cmp_lhs:gpr(s32) = COPY $w0
|
||||
%cmp_rhs:gpr(s32) = COPY $w1
|
||||
%add_rhs:gpr(s32) = COPY $w2
|
||||
|
@ -314,13 +326,14 @@ body: |
|
|||
|
||||
; CHECK-LABEL: name: cmp_add_lhs
|
||||
; CHECK: liveins: $w0, $w1, $w2
|
||||
; CHECK: %cmp_lhs:gpr32 = COPY $w0
|
||||
; CHECK: %cmp_rhs:gpr32 = COPY $w1
|
||||
; CHECK: %add_lhs:gpr32 = COPY $w2
|
||||
; CHECK: [[SUBSWrr:%[0-9]+]]:gpr32 = SUBSWrr %cmp_lhs, %cmp_rhs, implicit-def $nzcv
|
||||
; CHECK: %add:gpr32 = CSINCWr %add_lhs, %add_lhs, 1, implicit $nzcv
|
||||
; CHECK: $w0 = COPY %add
|
||||
; CHECK: RET_ReallyLR implicit $w0
|
||||
; CHECK-NEXT: {{ $}}
|
||||
; CHECK-NEXT: %cmp_lhs:gpr32 = COPY $w0
|
||||
; CHECK-NEXT: %cmp_rhs:gpr32 = COPY $w1
|
||||
; CHECK-NEXT: %add_lhs:gpr32 = COPY $w2
|
||||
; CHECK-NEXT: [[SUBSWrr:%[0-9]+]]:gpr32 = SUBSWrr %cmp_lhs, %cmp_rhs, implicit-def $nzcv
|
||||
; CHECK-NEXT: %add:gpr32 = CSINCWr %add_lhs, %add_lhs, 1, implicit $nzcv
|
||||
; CHECK-NEXT: $w0 = COPY %add
|
||||
; CHECK-NEXT: RET_ReallyLR implicit $w0
|
||||
%cmp_lhs:gpr(s32) = COPY $w0
|
||||
%cmp_rhs:gpr(s32) = COPY $w1
|
||||
%add_lhs:gpr(s32) = COPY $w2
|
||||
|
@ -344,13 +357,14 @@ body: |
|
|||
|
||||
; CHECK-LABEL: name: cmp_add_lhs_vector
|
||||
; CHECK: liveins: $q0, $q1, $q2
|
||||
; CHECK: %cmp_lhs:fpr128 = COPY $q0
|
||||
; CHECK: %cmp_rhs:fpr128 = COPY $q1
|
||||
; CHECK: %add_lhs:fpr128 = COPY $q2
|
||||
; CHECK: [[CMEQv4i32_:%[0-9]+]]:fpr128 = CMEQv4i32 %cmp_lhs, %cmp_rhs
|
||||
; CHECK: %add:fpr128 = ADDv4i32 %add_lhs, [[CMEQv4i32_]]
|
||||
; CHECK: $q0 = COPY %add
|
||||
; CHECK: RET_ReallyLR implicit $q0
|
||||
; CHECK-NEXT: {{ $}}
|
||||
; CHECK-NEXT: %cmp_lhs:fpr128 = COPY $q0
|
||||
; CHECK-NEXT: %cmp_rhs:fpr128 = COPY $q1
|
||||
; CHECK-NEXT: %add_lhs:fpr128 = COPY $q2
|
||||
; CHECK-NEXT: [[CMEQv4i32_:%[0-9]+]]:fpr128 = CMEQv4i32 %cmp_lhs, %cmp_rhs
|
||||
; CHECK-NEXT: %add:fpr128 = ADDv4i32 %add_lhs, [[CMEQv4i32_]]
|
||||
; CHECK-NEXT: $q0 = COPY %add
|
||||
; CHECK-NEXT: RET_ReallyLR implicit $q0
|
||||
%cmp_lhs:fpr(<4 x s32>) = COPY $q0
|
||||
%cmp_rhs:fpr(<4 x s32>) = COPY $q1
|
||||
%add_lhs:fpr(<4 x s32>) = COPY $q2
|
||||
|
@ -358,3 +372,108 @@ body: |
|
|||
%add:fpr(<4 x s32>) = G_ADD %add_lhs, %cmp
|
||||
$q0 = COPY %add(<4 x s32>)
|
||||
RET_ReallyLR implicit $q0
|
||||
|
||||
...
|
||||
---
|
||||
name: cmp_add_rhs_64
|
||||
legalized: true
|
||||
regBankSelected: true
|
||||
tracksRegLiveness: true
|
||||
machineFunctionInfo: {}
|
||||
body: |
|
||||
bb.0:
|
||||
liveins: $x0, $x1, $x2
|
||||
|
||||
; The CSINC should use the add's RHS.
|
||||
; CHECK-LABEL: name: cmp_add_rhs_64
|
||||
; CHECK: liveins: $x0, $x1, $x2
|
||||
; CHECK-NEXT: {{ $}}
|
||||
; CHECK-NEXT: %cmp_lhs:gpr64 = COPY $x0
|
||||
; CHECK-NEXT: %cmp_rhs:gpr64 = COPY $x1
|
||||
; CHECK-NEXT: %add_rhs:gpr64 = COPY $x2
|
||||
; CHECK-NEXT: [[SUBSXrr:%[0-9]+]]:gpr64 = SUBSXrr %cmp_lhs, %cmp_rhs, implicit-def $nzcv
|
||||
; CHECK-NEXT: %add:gpr64 = CSINCXr %add_rhs, %add_rhs, 1, implicit $nzcv
|
||||
; CHECK-NEXT: $x0 = COPY %add
|
||||
; CHECK-NEXT: RET_ReallyLR implicit $x0
|
||||
%cmp_lhs:gpr(s64) = COPY $x0
|
||||
%cmp_rhs:gpr(s64) = COPY $x1
|
||||
%add_rhs:gpr(s64) = COPY $x2
|
||||
%cmp:gpr(s32) = G_ICMP intpred(eq), %cmp_lhs(s64), %cmp_rhs
|
||||
%cmp_ext:gpr(s64) = G_ZEXT %cmp
|
||||
%add:gpr(s64) = G_ADD %cmp_ext, %add_rhs
|
||||
$x0 = COPY %add(s64)
|
||||
RET_ReallyLR implicit $x0
|
||||
|
||||
...
|
||||
---
|
||||
name: cmp_add_rhs_64_zext_multi_use
|
||||
legalized: true
|
||||
regBankSelected: true
|
||||
tracksRegLiveness: true
|
||||
machineFunctionInfo: {}
|
||||
body: |
|
||||
bb.0:
|
||||
liveins: $x0, $x1, $x2
|
||||
|
||||
; The ZExt is used more than once so don't fold.
|
||||
; CHECK-LABEL: name: cmp_add_rhs_64_zext_multi_use
|
||||
; CHECK: liveins: $x0, $x1, $x2
|
||||
; CHECK-NEXT: {{ $}}
|
||||
; CHECK-NEXT: %cmp_lhs:gpr64 = COPY $x0
|
||||
; CHECK-NEXT: %cmp_rhs:gpr64 = COPY $x1
|
||||
; CHECK-NEXT: %add_rhs:gpr64 = COPY $x2
|
||||
; CHECK-NEXT: [[SUBSXrr:%[0-9]+]]:gpr64 = SUBSXrr %cmp_lhs, %cmp_rhs, implicit-def $nzcv
|
||||
; CHECK-NEXT: %cmp:gpr32 = CSINCWr $wzr, $wzr, 1, implicit $nzcv
|
||||
; CHECK-NEXT: %cmp_ext:gpr64 = SUBREG_TO_REG 0, %cmp, %subreg.sub_32
|
||||
; CHECK-NEXT: %add:gpr64 = ADDXrr %cmp_ext, %add_rhs
|
||||
; CHECK-NEXT: %or:gpr64 = ORRXrr %add, %cmp_ext
|
||||
; CHECK-NEXT: $x0 = COPY %or
|
||||
; CHECK-NEXT: RET_ReallyLR implicit $x0
|
||||
%cmp_lhs:gpr(s64) = COPY $x0
|
||||
%cmp_rhs:gpr(s64) = COPY $x1
|
||||
%add_rhs:gpr(s64) = COPY $x2
|
||||
%cmp:gpr(s32) = G_ICMP intpred(eq), %cmp_lhs(s64), %cmp_rhs
|
||||
%cmp_ext:gpr(s64) = G_ZEXT %cmp
|
||||
%add:gpr(s64) = G_ADD %cmp_ext, %add_rhs
|
||||
%or:gpr(s64) = G_OR %add, %cmp_ext
|
||||
$x0 = COPY %or(s64)
|
||||
RET_ReallyLR implicit $x0
|
||||
|
||||
...
|
||||
---
|
||||
name: cmp_add_rhs_64_cmp_multi_use
|
||||
legalized: true
|
||||
regBankSelected: true
|
||||
tracksRegLiveness: true
|
||||
machineFunctionInfo: {}
|
||||
body: |
|
||||
bb.0:
|
||||
liveins: $x0, $x1, $x2
|
||||
|
||||
; The cmp is used more than once so don't fold.
|
||||
; CHECK-LABEL: name: cmp_add_rhs_64_cmp_multi_use
|
||||
; CHECK: liveins: $x0, $x1, $x2
|
||||
; CHECK-NEXT: {{ $}}
|
||||
; CHECK-NEXT: %cmp_lhs:gpr64 = COPY $x0
|
||||
; CHECK-NEXT: %cmp_rhs:gpr64 = COPY $x1
|
||||
; CHECK-NEXT: %add_rhs:gpr64 = COPY $x2
|
||||
; CHECK-NEXT: [[SUBSXrr:%[0-9]+]]:gpr64 = SUBSXrr %cmp_lhs, %cmp_rhs, implicit-def $nzcv
|
||||
; CHECK-NEXT: %cmp:gpr32 = CSINCWr $wzr, $wzr, 1, implicit $nzcv
|
||||
; CHECK-NEXT: %cmp_ext:gpr64 = SUBREG_TO_REG 0, %cmp, %subreg.sub_32
|
||||
; CHECK-NEXT: %add:gpr64 = ADDXrr %cmp_ext, %add_rhs
|
||||
; CHECK-NEXT: [[DEF:%[0-9]+]]:gpr64all = IMPLICIT_DEF
|
||||
; CHECK-NEXT: [[INSERT_SUBREG:%[0-9]+]]:gpr64 = INSERT_SUBREG [[DEF]], %cmp, %subreg.sub_32
|
||||
; CHECK-NEXT: %cmp_ext2:gpr64 = SBFMXri [[INSERT_SUBREG]], 0, 31
|
||||
; CHECK-NEXT: %or:gpr64 = ORRXrr %add, %cmp_ext2
|
||||
; CHECK-NEXT: $x0 = COPY %or
|
||||
; CHECK-NEXT: RET_ReallyLR implicit $x0
|
||||
%cmp_lhs:gpr(s64) = COPY $x0
|
||||
%cmp_rhs:gpr(s64) = COPY $x1
|
||||
%add_rhs:gpr(s64) = COPY $x2
|
||||
%cmp:gpr(s32) = G_ICMP intpred(eq), %cmp_lhs(s64), %cmp_rhs
|
||||
%cmp_ext:gpr(s64) = G_ZEXT %cmp
|
||||
%add:gpr(s64) = G_ADD %cmp_ext, %add_rhs
|
||||
%cmp_ext2:gpr(s64) = G_SEXT %cmp
|
||||
%or:gpr(s64) = G_OR %add, %cmp_ext2
|
||||
$x0 = COPY %or(s64)
|
||||
RET_ReallyLR implicit $x0
|
||||
|
|
Loading…
Reference in New Issue