forked from OSchip/llvm-project
[GlobalISel] Combine G_UMULH x, (1 << c)) -> x >> (bitwidth - c)
In order to not generate an unnecessary G_CTLZ, I extended the constant folder in the CSEMIRBuilder to handle G_CTLZ. I also added some extra handing of vector constants too. It seems we don't have any support for doing constant folding of vector constants, so the tests show some other useless G_SUB instructions too. Differential Revision: https://reviews.llvm.org/D111036
This commit is contained in:
parent
23800b05be
commit
08b3c0d995
|
@ -610,6 +610,10 @@ public:
|
|||
bool matchUDivByConst(MachineInstr &MI);
|
||||
void applyUDivByConst(MachineInstr &MI);
|
||||
|
||||
// G_UMULH x, (1 << c)) -> x >> (bitwidth - c)
|
||||
bool matchUMulHToLShr(MachineInstr &MI);
|
||||
void applyUMulHToLShr(MachineInstr &MI);
|
||||
|
||||
/// Try to transform \p MI by using all of the above
|
||||
/// combine functions. Returns true if changed.
|
||||
bool tryCombine(MachineInstr &MI);
|
||||
|
|
|
@ -272,6 +272,11 @@ Optional<APFloat> ConstantFoldIntToFloat(unsigned Opcode, LLT DstTy,
|
|||
Register Src,
|
||||
const MachineRegisterInfo &MRI);
|
||||
|
||||
/// Tries to constant fold a G_CTLZ operation on \p Src. If \p Src is a vector
|
||||
/// then it tries to do an element-wise constant fold.
|
||||
Optional<SmallVector<unsigned>>
|
||||
ConstantFoldCTLZ(Register Src, const MachineRegisterInfo &MRI);
|
||||
|
||||
/// Test if the given value is known to have exactly one bit set. This differs
|
||||
/// from computeKnownBits in that it doesn't necessarily determine which bit is
|
||||
/// set.
|
||||
|
|
|
@ -724,6 +724,14 @@ def mulo_by_2: GICombineRule<
|
|||
[{ return Helper.matchMulOBy2(*${root}, ${matchinfo}); }]),
|
||||
(apply [{ Helper.applyBuildFnNoErase(*${root}, ${matchinfo}); }])>;
|
||||
|
||||
def mulh_to_lshr : GICombineRule<
|
||||
(defs root:$root),
|
||||
(match (wip_match_opcode G_UMULH):$root,
|
||||
[{ return Helper.matchUMulHToLShr(*${root}); }]),
|
||||
(apply [{ Helper.applyUMulHToLShr(*${root}); }])>;
|
||||
|
||||
def mulh_combines : GICombineGroup<[mulh_to_lshr]>;
|
||||
|
||||
// FIXME: These should use the custom predicate feature once it lands.
|
||||
def undef_combines : GICombineGroup<[undef_to_fp_zero, undef_to_int_zero,
|
||||
undef_to_negative_one,
|
||||
|
@ -771,7 +779,7 @@ def all_combines : GICombineGroup<[trivial_combines, insert_vec_elt_combines,
|
|||
shift_immed_chain, shift_of_shifted_logic_chain, load_or_combine,
|
||||
truncstore_merge, div_rem_to_divrem, funnel_shift_combines,
|
||||
form_bitfield_extract, constant_fold, fabs_fneg_fold,
|
||||
intdiv_combines]>;
|
||||
intdiv_combines, mulh_combines]>;
|
||||
|
||||
// A combine group used to for prelegalizer combiners at -O0. The combines in
|
||||
// this group have been selected based on experiments to balance code size and
|
||||
|
|
|
@ -13,6 +13,7 @@
|
|||
|
||||
#include "llvm/CodeGen/GlobalISel/CSEMIRBuilder.h"
|
||||
#include "llvm/CodeGen/GlobalISel/GISelChangeObserver.h"
|
||||
#include "llvm/CodeGen/GlobalISel/Utils.h"
|
||||
#include "llvm/IR/DebugInfoMetadata.h"
|
||||
|
||||
using namespace llvm;
|
||||
|
@ -213,6 +214,22 @@ MachineInstrBuilder CSEMIRBuilder::buildInstr(unsigned Opc,
|
|||
return buildFConstant(DstOps[0], *Cst);
|
||||
break;
|
||||
}
|
||||
case TargetOpcode::G_CTLZ: {
|
||||
assert(SrcOps.size() == 1 && "Expected one source");
|
||||
assert(DstOps.size() == 1 && "Expected one dest");
|
||||
auto MaybeCsts = ConstantFoldCTLZ(SrcOps[0].getReg(), *getMRI());
|
||||
if (!MaybeCsts)
|
||||
break;
|
||||
if (MaybeCsts->size() == 1)
|
||||
return buildConstant(DstOps[0], (*MaybeCsts)[0]);
|
||||
// This was a vector constant. Build a G_BUILD_VECTOR for them.
|
||||
SmallVector<Register> ConstantRegs;
|
||||
LLT VecTy = DstOps[0].getLLTTy(*getMRI());
|
||||
for (unsigned Cst : *MaybeCsts)
|
||||
ConstantRegs.emplace_back(
|
||||
buildConstant(VecTy.getScalarType(), Cst).getReg(0));
|
||||
return buildBuildVector(DstOps[0], ConstantRegs);
|
||||
}
|
||||
}
|
||||
bool CanCopy = checkCopyToDefsPossible(DstOps);
|
||||
if (!canPerformCSEForOpc(Opc))
|
||||
|
|
|
@ -69,6 +69,16 @@ static unsigned littleEndianByteAt(const unsigned ByteWidth, const unsigned I) {
|
|||
return I;
|
||||
}
|
||||
|
||||
/// Determines the LogBase2 value for a non-null input value using the
|
||||
/// transform: LogBase2(V) = (EltBits - 1) - ctlz(V).
|
||||
static Register buildLogBase2(Register V, MachineIRBuilder &MIB) {
|
||||
auto &MRI = *MIB.getMRI();
|
||||
LLT Ty = MRI.getType(V);
|
||||
auto Ctlz = MIB.buildCTLZ(Ty, V);
|
||||
auto Base = MIB.buildConstant(Ty, Ty.getScalarSizeInBits() - 1);
|
||||
return MIB.buildSub(Ty, Base, Ctlz).getReg(0);
|
||||
}
|
||||
|
||||
/// \returns The big endian in-memory byte position of byte \p I in a
|
||||
/// \p ByteWidth bytes wide type.
|
||||
///
|
||||
|
@ -4579,6 +4589,36 @@ void CombinerHelper::applyUDivByConst(MachineInstr &MI) {
|
|||
replaceSingleDefInstWithReg(MI, NewMI->getOperand(0).getReg());
|
||||
}
|
||||
|
||||
bool CombinerHelper::matchUMulHToLShr(MachineInstr &MI) {
|
||||
assert(MI.getOpcode() == TargetOpcode::G_UMULH);
|
||||
Register RHS = MI.getOperand(2).getReg();
|
||||
Register Dst = MI.getOperand(0).getReg();
|
||||
LLT Ty = MRI.getType(Dst);
|
||||
LLT ShiftAmtTy = getTargetLowering().getPreferredShiftAmountTy(Ty);
|
||||
auto CstVal = isConstantOrConstantSplatVector(*MRI.getVRegDef(RHS), MRI);
|
||||
if (!CstVal || CstVal->isOne() || !isPowerOf2_64(CstVal->getZExtValue()))
|
||||
return false;
|
||||
return isLegalOrBeforeLegalizer({TargetOpcode::G_LSHR, {Ty, ShiftAmtTy}});
|
||||
}
|
||||
|
||||
void CombinerHelper::applyUMulHToLShr(MachineInstr &MI) {
|
||||
Register LHS = MI.getOperand(1).getReg();
|
||||
Register RHS = MI.getOperand(2).getReg();
|
||||
Register Dst = MI.getOperand(0).getReg();
|
||||
LLT Ty = MRI.getType(Dst);
|
||||
LLT ShiftAmtTy = getTargetLowering().getPreferredShiftAmountTy(Ty);
|
||||
unsigned NumEltBits = Ty.getScalarSizeInBits();
|
||||
|
||||
Builder.setInstrAndDebugLoc(MI);
|
||||
auto LogBase2 = buildLogBase2(RHS, Builder);
|
||||
auto ShiftAmt =
|
||||
Builder.buildSub(Ty, Builder.buildConstant(Ty, NumEltBits), LogBase2);
|
||||
auto Trunc = Builder.buildZExtOrTrunc(ShiftAmtTy, ShiftAmt);
|
||||
Builder.buildLShr(Dst, LHS, Trunc);
|
||||
MI.eraseFromParent();
|
||||
}
|
||||
|
||||
|
||||
bool CombinerHelper::tryCombine(MachineInstr &MI) {
|
||||
if (tryCombineCopy(MI))
|
||||
return true;
|
||||
|
|
|
@ -726,6 +726,37 @@ Optional<APFloat> llvm::ConstantFoldIntToFloat(unsigned Opcode, LLT DstTy,
|
|||
return None;
|
||||
}
|
||||
|
||||
Optional<SmallVector<unsigned>>
|
||||
llvm::ConstantFoldCTLZ(Register Src, const MachineRegisterInfo &MRI) {
|
||||
LLT Ty = MRI.getType(Src);
|
||||
SmallVector<unsigned> FoldedCTLZs;
|
||||
auto tryFoldScalar = [&](Register R) -> Optional<unsigned> {
|
||||
auto MaybeCst = getIConstantVRegVal(R, MRI);
|
||||
if (!MaybeCst)
|
||||
return None;
|
||||
return MaybeCst->countLeadingZeros();
|
||||
};
|
||||
if (Ty.isVector()) {
|
||||
// Try to constant fold each element.
|
||||
auto *BV = getOpcodeDef<GBuildVector>(Src, MRI);
|
||||
if (!BV)
|
||||
return None;
|
||||
for (unsigned SrcIdx = 0; SrcIdx < BV->getNumSources(); ++SrcIdx) {
|
||||
if (auto MaybeFold = tryFoldScalar(BV->getSourceReg(SrcIdx))) {
|
||||
FoldedCTLZs.emplace_back(*MaybeFold);
|
||||
continue;
|
||||
}
|
||||
return None;
|
||||
}
|
||||
return FoldedCTLZs;
|
||||
}
|
||||
if (auto MaybeCst = tryFoldScalar(Src)) {
|
||||
FoldedCTLZs.emplace_back(*MaybeCst);
|
||||
return FoldedCTLZs;
|
||||
}
|
||||
return None;
|
||||
}
|
||||
|
||||
bool llvm::isKnownToBeAPowerOfTwo(Register Reg, const MachineRegisterInfo &MRI,
|
||||
GISelKnownBits *KB) {
|
||||
Optional<DefinitionAndSourceRegister> DefSrcReg =
|
||||
|
|
|
@ -22,14 +22,16 @@ define <8 x i16> @combine_vec_udiv_uniform(<8 x i16> %x) {
|
|||
; GISEL-NEXT: ldr q1, [x8, :lo12:.LCPI0_1]
|
||||
; GISEL-NEXT: adrp x8, .LCPI0_0
|
||||
; GISEL-NEXT: ldr q2, [x8, :lo12:.LCPI0_0]
|
||||
; GISEL-NEXT: umull2 v3.4s, v0.8h, v1.8h
|
||||
; GISEL-NEXT: umull v1.4s, v0.4h, v1.4h
|
||||
; GISEL-NEXT: uzp2 v1.8h, v1.8h, v3.8h
|
||||
; GISEL-NEXT: sub v0.8h, v0.8h, v1.8h
|
||||
; GISEL-NEXT: umull2 v3.4s, v0.8h, v2.8h
|
||||
; GISEL-NEXT: umull v0.4s, v0.4h, v2.4h
|
||||
; GISEL-NEXT: uzp2 v0.8h, v0.8h, v3.8h
|
||||
; GISEL-NEXT: add v0.8h, v0.8h, v1.8h
|
||||
; GISEL-NEXT: adrp x8, .LCPI0_2
|
||||
; GISEL-NEXT: ldr q3, [x8, :lo12:.LCPI0_2]
|
||||
; GISEL-NEXT: sub v1.8h, v2.8h, v1.8h
|
||||
; GISEL-NEXT: neg v1.8h, v1.8h
|
||||
; GISEL-NEXT: umull2 v2.4s, v0.8h, v3.8h
|
||||
; GISEL-NEXT: umull v3.4s, v0.4h, v3.4h
|
||||
; GISEL-NEXT: uzp2 v2.8h, v3.8h, v2.8h
|
||||
; GISEL-NEXT: sub v0.8h, v0.8h, v2.8h
|
||||
; GISEL-NEXT: ushl v0.8h, v0.8h, v1.8h
|
||||
; GISEL-NEXT: add v0.8h, v0.8h, v2.8h
|
||||
; GISEL-NEXT: ushr v0.8h, v0.8h, #4
|
||||
; GISEL-NEXT: ret
|
||||
%1 = udiv <8 x i16> %x, <i16 23, i16 23, i16 23, i16 23, i16 23, i16 23, i16 23, i16 23>
|
||||
|
@ -155,28 +157,30 @@ define <8 x i16> @combine_vec_udiv_nonuniform3(<8 x i16> %x) {
|
|||
;
|
||||
; GISEL-LABEL: combine_vec_udiv_nonuniform3:
|
||||
; GISEL: // %bb.0:
|
||||
; GISEL-NEXT: adrp x8, .LCPI3_5
|
||||
; GISEL-NEXT: ldr q1, [x8, :lo12:.LCPI3_5]
|
||||
; GISEL-NEXT: adrp x8, .LCPI3_4
|
||||
; GISEL-NEXT: ldr q1, [x8, :lo12:.LCPI3_4]
|
||||
; GISEL-NEXT: adrp x8, .LCPI3_3
|
||||
; GISEL-NEXT: ldr q2, [x8, :lo12:.LCPI3_3]
|
||||
; GISEL-NEXT: ldr q2, [x8, :lo12:.LCPI3_4]
|
||||
; GISEL-NEXT: adrp x8, .LCPI3_2
|
||||
; GISEL-NEXT: ldr q3, [x8, :lo12:.LCPI3_2]
|
||||
; GISEL-NEXT: adrp x8, .LCPI3_1
|
||||
; GISEL-NEXT: ldr q4, [x8, :lo12:.LCPI3_1]
|
||||
; GISEL-NEXT: adrp x8, .LCPI3_3
|
||||
; GISEL-NEXT: ldr q5, [x8, :lo12:.LCPI3_3]
|
||||
; GISEL-NEXT: adrp x8, .LCPI3_0
|
||||
; GISEL-NEXT: ldr q5, [x8, :lo12:.LCPI3_0]
|
||||
; GISEL-NEXT: umull2 v6.4s, v0.8h, v2.8h
|
||||
; GISEL-NEXT: ldr q6, [x8, :lo12:.LCPI3_0]
|
||||
; GISEL-NEXT: sub v3.8h, v4.8h, v3.8h
|
||||
; GISEL-NEXT: umull2 v4.4s, v0.8h, v2.8h
|
||||
; GISEL-NEXT: umull v2.4s, v0.4h, v2.4h
|
||||
; GISEL-NEXT: uzp2 v2.8h, v2.8h, v6.8h
|
||||
; GISEL-NEXT: cmeq v1.8h, v1.8h, v5.8h
|
||||
; GISEL-NEXT: sub v5.8h, v0.8h, v2.8h
|
||||
; GISEL-NEXT: umull2 v6.4s, v5.8h, v3.8h
|
||||
; GISEL-NEXT: umull v3.4s, v5.4h, v3.4h
|
||||
; GISEL-NEXT: uzp2 v3.8h, v3.8h, v6.8h
|
||||
; GISEL-NEXT: neg v4.8h, v4.8h
|
||||
; GISEL-NEXT: uzp2 v2.8h, v2.8h, v4.8h
|
||||
; GISEL-NEXT: neg v3.8h, v3.8h
|
||||
; GISEL-NEXT: sub v4.8h, v0.8h, v2.8h
|
||||
; GISEL-NEXT: cmeq v1.8h, v1.8h, v6.8h
|
||||
; GISEL-NEXT: ushl v3.8h, v4.8h, v3.8h
|
||||
; GISEL-NEXT: neg v5.8h, v5.8h
|
||||
; GISEL-NEXT: shl v1.8h, v1.8h, #15
|
||||
; GISEL-NEXT: add v2.8h, v3.8h, v2.8h
|
||||
; GISEL-NEXT: ushl v2.8h, v2.8h, v4.8h
|
||||
; GISEL-NEXT: ushl v2.8h, v2.8h, v5.8h
|
||||
; GISEL-NEXT: sshr v1.8h, v1.8h, #15
|
||||
; GISEL-NEXT: bif v0.16b, v2.16b, v1.16b
|
||||
; GISEL-NEXT: ret
|
||||
|
|
|
@ -34,17 +34,20 @@ body: |
|
|||
; CHECK-NEXT: {{ $}}
|
||||
; CHECK-NEXT: [[COPY:%[0-9]+]]:_(<8 x s16>) = COPY $q0
|
||||
; CHECK-NEXT: [[C:%[0-9]+]]:_(s16) = G_CONSTANT i16 25645
|
||||
; CHECK-NEXT: [[C1:%[0-9]+]]:_(s16) = G_CONSTANT i16 -32768
|
||||
; CHECK-NEXT: [[C2:%[0-9]+]]:_(s16) = G_CONSTANT i16 4
|
||||
; CHECK-NEXT: [[C1:%[0-9]+]]:_(s16) = G_CONSTANT i16 4
|
||||
; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s16>) = G_BUILD_VECTOR [[C]](s16), [[C]](s16), [[C]](s16), [[C]](s16), [[C]](s16), [[C]](s16), [[C]](s16), [[C]](s16)
|
||||
; CHECK-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<8 x s16>) = G_BUILD_VECTOR [[C1]](s16), [[C1]](s16), [[C1]](s16), [[C1]](s16), [[C1]](s16), [[C1]](s16), [[C1]](s16), [[C1]](s16)
|
||||
; CHECK-NEXT: [[BUILD_VECTOR2:%[0-9]+]]:_(<8 x s16>) = G_BUILD_VECTOR [[C2]](s16), [[C2]](s16), [[C2]](s16), [[C2]](s16), [[C2]](s16), [[C2]](s16), [[C2]](s16), [[C2]](s16)
|
||||
; CHECK-NEXT: [[UMULH:%[0-9]+]]:_(<8 x s16>) = G_UMULH [[COPY]], [[BUILD_VECTOR]]
|
||||
; CHECK-NEXT: [[SUB:%[0-9]+]]:_(<8 x s16>) = G_SUB [[COPY]], [[UMULH]]
|
||||
; CHECK-NEXT: [[UMULH1:%[0-9]+]]:_(<8 x s16>) = G_UMULH [[SUB]], [[BUILD_VECTOR1]]
|
||||
; CHECK-NEXT: [[ADD:%[0-9]+]]:_(<8 x s16>) = G_ADD [[UMULH1]], [[UMULH]]
|
||||
; CHECK-NEXT: [[LSHR:%[0-9]+]]:_(<8 x s16>) = G_LSHR [[ADD]], [[BUILD_VECTOR2]](<8 x s16>)
|
||||
; CHECK-NEXT: $q0 = COPY [[LSHR]](<8 x s16>)
|
||||
; CHECK-NEXT: [[C2:%[0-9]+]]:_(s16) = G_CONSTANT i16 15
|
||||
; CHECK-NEXT: [[BUILD_VECTOR2:%[0-9]+]]:_(<8 x s16>) = G_BUILD_VECTOR [[C2]](s16), [[C2]](s16), [[C2]](s16), [[C2]](s16), [[C2]](s16), [[C2]](s16), [[C2]](s16), [[C2]](s16)
|
||||
; CHECK-NEXT: [[C3:%[0-9]+]]:_(s16) = G_CONSTANT i16 16
|
||||
; CHECK-NEXT: [[BUILD_VECTOR3:%[0-9]+]]:_(<8 x s16>) = G_BUILD_VECTOR [[C3]](s16), [[C3]](s16), [[C3]](s16), [[C3]](s16), [[C3]](s16), [[C3]](s16), [[C3]](s16), [[C3]](s16)
|
||||
; CHECK-NEXT: [[SUB1:%[0-9]+]]:_(<8 x s16>) = G_SUB [[BUILD_VECTOR3]], [[BUILD_VECTOR2]]
|
||||
; CHECK-NEXT: [[LSHR:%[0-9]+]]:_(<8 x s16>) = G_LSHR [[SUB]], [[SUB1]](<8 x s16>)
|
||||
; CHECK-NEXT: [[ADD:%[0-9]+]]:_(<8 x s16>) = G_ADD [[LSHR]], [[UMULH]]
|
||||
; CHECK-NEXT: [[LSHR1:%[0-9]+]]:_(<8 x s16>) = G_LSHR [[ADD]], [[BUILD_VECTOR1]](<8 x s16>)
|
||||
; CHECK-NEXT: $q0 = COPY [[LSHR1]](<8 x s16>)
|
||||
; CHECK-NEXT: RET_ReallyLR implicit $q0
|
||||
%0:_(<8 x s16>) = COPY $q0
|
||||
%2:_(s16) = G_CONSTANT i16 23
|
||||
|
@ -208,30 +211,33 @@ body: |
|
|||
; CHECK-NEXT: [[C7:%[0-9]+]]:_(s16) = G_CONSTANT i16 127
|
||||
; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s16>) = G_BUILD_VECTOR [[C]](s16), [[C1]](s16), [[C2]](s16), [[C3]](s16), [[C4]](s16), [[C5]](s16), [[C6]](s16), [[C7]](s16)
|
||||
; CHECK-NEXT: [[C8:%[0-9]+]]:_(s16) = G_CONSTANT i16 9363
|
||||
; CHECK-NEXT: [[C9:%[0-9]+]]:_(s16) = G_CONSTANT i16 -32768
|
||||
; CHECK-NEXT: [[C10:%[0-9]+]]:_(s16) = G_CONSTANT i16 2
|
||||
; CHECK-NEXT: [[C11:%[0-9]+]]:_(s16) = G_CONSTANT i16 25645
|
||||
; CHECK-NEXT: [[C12:%[0-9]+]]:_(s16) = G_CONSTANT i16 4
|
||||
; CHECK-NEXT: [[C13:%[0-9]+]]:_(s16) = G_CONSTANT i16 18351
|
||||
; CHECK-NEXT: [[C14:%[0-9]+]]:_(s16) = G_CONSTANT i16 12137
|
||||
; CHECK-NEXT: [[C15:%[0-9]+]]:_(s16) = G_CONSTANT i16 2115
|
||||
; CHECK-NEXT: [[C16:%[0-9]+]]:_(s16) = G_CONSTANT i16 23705
|
||||
; CHECK-NEXT: [[C17:%[0-9]+]]:_(s16) = G_CONSTANT i16 5
|
||||
; CHECK-NEXT: [[C18:%[0-9]+]]:_(s16) = G_CONSTANT i16 1041
|
||||
; CHECK-NEXT: [[C19:%[0-9]+]]:_(s16) = G_CONSTANT i16 517
|
||||
; CHECK-NEXT: [[C20:%[0-9]+]]:_(s16) = G_CONSTANT i16 6
|
||||
; CHECK-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<8 x s16>) = G_BUILD_VECTOR [[C8]](s16), [[C11]](s16), [[C13]](s16), [[C14]](s16), [[C15]](s16), [[C16]](s16), [[C18]](s16), [[C19]](s16)
|
||||
; CHECK-NEXT: [[BUILD_VECTOR2:%[0-9]+]]:_(<8 x s16>) = G_BUILD_VECTOR [[C9]](s16), [[C9]](s16), [[C9]](s16), [[C9]](s16), [[C9]](s16), [[C9]](s16), [[C9]](s16), [[C9]](s16)
|
||||
; CHECK-NEXT: [[BUILD_VECTOR3:%[0-9]+]]:_(<8 x s16>) = G_BUILD_VECTOR [[C10]](s16), [[C12]](s16), [[C12]](s16), [[C12]](s16), [[C12]](s16), [[C17]](s16), [[C17]](s16), [[C20]](s16)
|
||||
; CHECK-NEXT: [[C9:%[0-9]+]]:_(s16) = G_CONSTANT i16 2
|
||||
; CHECK-NEXT: [[C10:%[0-9]+]]:_(s16) = G_CONSTANT i16 25645
|
||||
; CHECK-NEXT: [[C11:%[0-9]+]]:_(s16) = G_CONSTANT i16 4
|
||||
; CHECK-NEXT: [[C12:%[0-9]+]]:_(s16) = G_CONSTANT i16 18351
|
||||
; CHECK-NEXT: [[C13:%[0-9]+]]:_(s16) = G_CONSTANT i16 12137
|
||||
; CHECK-NEXT: [[C14:%[0-9]+]]:_(s16) = G_CONSTANT i16 2115
|
||||
; CHECK-NEXT: [[C15:%[0-9]+]]:_(s16) = G_CONSTANT i16 23705
|
||||
; CHECK-NEXT: [[C16:%[0-9]+]]:_(s16) = G_CONSTANT i16 5
|
||||
; CHECK-NEXT: [[C17:%[0-9]+]]:_(s16) = G_CONSTANT i16 1041
|
||||
; CHECK-NEXT: [[C18:%[0-9]+]]:_(s16) = G_CONSTANT i16 517
|
||||
; CHECK-NEXT: [[C19:%[0-9]+]]:_(s16) = G_CONSTANT i16 6
|
||||
; CHECK-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<8 x s16>) = G_BUILD_VECTOR [[C8]](s16), [[C10]](s16), [[C12]](s16), [[C13]](s16), [[C14]](s16), [[C15]](s16), [[C17]](s16), [[C18]](s16)
|
||||
; CHECK-NEXT: [[BUILD_VECTOR2:%[0-9]+]]:_(<8 x s16>) = G_BUILD_VECTOR [[C9]](s16), [[C11]](s16), [[C11]](s16), [[C11]](s16), [[C11]](s16), [[C16]](s16), [[C16]](s16), [[C19]](s16)
|
||||
; CHECK-NEXT: [[UMULH:%[0-9]+]]:_(<8 x s16>) = G_UMULH [[COPY]], [[BUILD_VECTOR1]]
|
||||
; CHECK-NEXT: [[SUB:%[0-9]+]]:_(<8 x s16>) = G_SUB [[COPY]], [[UMULH]]
|
||||
; CHECK-NEXT: [[UMULH1:%[0-9]+]]:_(<8 x s16>) = G_UMULH [[SUB]], [[BUILD_VECTOR2]]
|
||||
; CHECK-NEXT: [[ADD:%[0-9]+]]:_(<8 x s16>) = G_ADD [[UMULH1]], [[UMULH]]
|
||||
; CHECK-NEXT: [[LSHR:%[0-9]+]]:_(<8 x s16>) = G_LSHR [[ADD]], [[BUILD_VECTOR3]](<8 x s16>)
|
||||
; CHECK-NEXT: [[C21:%[0-9]+]]:_(s16) = G_CONSTANT i16 1
|
||||
; CHECK-NEXT: [[C20:%[0-9]+]]:_(s16) = G_CONSTANT i16 15
|
||||
; CHECK-NEXT: [[BUILD_VECTOR3:%[0-9]+]]:_(<8 x s16>) = G_BUILD_VECTOR [[C20]](s16), [[C20]](s16), [[C20]](s16), [[C20]](s16), [[C20]](s16), [[C20]](s16), [[C20]](s16), [[C20]](s16)
|
||||
; CHECK-NEXT: [[C21:%[0-9]+]]:_(s16) = G_CONSTANT i16 16
|
||||
; CHECK-NEXT: [[BUILD_VECTOR4:%[0-9]+]]:_(<8 x s16>) = G_BUILD_VECTOR [[C21]](s16), [[C21]](s16), [[C21]](s16), [[C21]](s16), [[C21]](s16), [[C21]](s16), [[C21]](s16), [[C21]](s16)
|
||||
; CHECK-NEXT: [[ICMP:%[0-9]+]]:_(<8 x s1>) = G_ICMP intpred(eq), [[BUILD_VECTOR]](<8 x s16>), [[BUILD_VECTOR4]]
|
||||
; CHECK-NEXT: [[SELECT:%[0-9]+]]:_(<8 x s16>) = G_SELECT [[ICMP]](<8 x s1>), [[COPY]], [[LSHR]]
|
||||
; CHECK-NEXT: [[SUB1:%[0-9]+]]:_(<8 x s16>) = G_SUB [[BUILD_VECTOR4]], [[BUILD_VECTOR3]]
|
||||
; CHECK-NEXT: [[LSHR:%[0-9]+]]:_(<8 x s16>) = G_LSHR [[SUB]], [[SUB1]](<8 x s16>)
|
||||
; CHECK-NEXT: [[ADD:%[0-9]+]]:_(<8 x s16>) = G_ADD [[LSHR]], [[UMULH]]
|
||||
; CHECK-NEXT: [[LSHR1:%[0-9]+]]:_(<8 x s16>) = G_LSHR [[ADD]], [[BUILD_VECTOR2]](<8 x s16>)
|
||||
; CHECK-NEXT: [[C22:%[0-9]+]]:_(s16) = G_CONSTANT i16 1
|
||||
; CHECK-NEXT: [[BUILD_VECTOR5:%[0-9]+]]:_(<8 x s16>) = G_BUILD_VECTOR [[C22]](s16), [[C22]](s16), [[C22]](s16), [[C22]](s16), [[C22]](s16), [[C22]](s16), [[C22]](s16), [[C22]](s16)
|
||||
; CHECK-NEXT: [[ICMP:%[0-9]+]]:_(<8 x s1>) = G_ICMP intpred(eq), [[BUILD_VECTOR]](<8 x s16>), [[BUILD_VECTOR5]]
|
||||
; CHECK-NEXT: [[SELECT:%[0-9]+]]:_(<8 x s16>) = G_SELECT [[ICMP]](<8 x s1>), [[COPY]], [[LSHR1]]
|
||||
; CHECK-NEXT: $q0 = COPY [[SELECT]](<8 x s16>)
|
||||
; CHECK-NEXT: RET_ReallyLR implicit $q0
|
||||
%0:_(<8 x s16>) = COPY $q0
|
||||
|
|
|
@ -0,0 +1,101 @@
|
|||
# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py
|
||||
# RUN: llc -run-pass=aarch64-prelegalizer-combiner -verify-machineinstrs -mtriple aarch64-unknown-unknown %s -o - | FileCheck %s
|
||||
|
||||
---
|
||||
name: mul_to_lshr
|
||||
alignment: 4
|
||||
tracksRegLiveness: true
|
||||
frameInfo:
|
||||
maxAlignment: 1
|
||||
machineFunctionInfo: {}
|
||||
body: |
|
||||
bb.0:
|
||||
liveins: $x0
|
||||
; CHECK-LABEL: name: mul_to_lshr
|
||||
; CHECK: liveins: $x0
|
||||
; CHECK-NEXT: {{ $}}
|
||||
; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s64) = COPY $x0
|
||||
; CHECK-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 61
|
||||
; CHECK-NEXT: [[LSHR:%[0-9]+]]:_(s64) = G_LSHR [[COPY]], [[C]](s64)
|
||||
; CHECK-NEXT: $x0 = COPY [[LSHR]](s64)
|
||||
%0:_(s64) = COPY $x0
|
||||
%1:_(s64) = G_CONSTANT i64 8
|
||||
%2:_(s64) = G_UMULH %0, %1(s64)
|
||||
$x0 = COPY %2(s64)
|
||||
...
|
||||
---
|
||||
name: mul_to_lshr_vector
|
||||
alignment: 4
|
||||
tracksRegLiveness: true
|
||||
frameInfo:
|
||||
maxAlignment: 1
|
||||
machineFunctionInfo: {}
|
||||
body: |
|
||||
bb.0:
|
||||
liveins: $q0
|
||||
; CHECK-LABEL: name: mul_to_lshr_vector
|
||||
; CHECK: liveins: $q0
|
||||
; CHECK-NEXT: {{ $}}
|
||||
; CHECK-NEXT: [[COPY:%[0-9]+]]:_(<4 x s32>) = COPY $q0
|
||||
; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 28
|
||||
; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[C]](s32), [[C]](s32), [[C]](s32), [[C]](s32)
|
||||
; CHECK-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 31
|
||||
; CHECK-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[C1]](s32), [[C1]](s32), [[C1]](s32), [[C1]](s32)
|
||||
; CHECK-NEXT: [[SUB:%[0-9]+]]:_(<4 x s32>) = G_SUB [[BUILD_VECTOR1]], [[BUILD_VECTOR]]
|
||||
; CHECK-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 32
|
||||
; CHECK-NEXT: [[BUILD_VECTOR2:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[C2]](s32), [[C2]](s32), [[C2]](s32), [[C2]](s32)
|
||||
; CHECK-NEXT: [[SUB1:%[0-9]+]]:_(<4 x s32>) = G_SUB [[BUILD_VECTOR2]], [[SUB]]
|
||||
; CHECK-NEXT: [[LSHR:%[0-9]+]]:_(<4 x s32>) = G_LSHR [[COPY]], [[SUB1]](<4 x s32>)
|
||||
; CHECK-NEXT: $q0 = COPY [[LSHR]](<4 x s32>)
|
||||
%0:_(<4 x s32>) = COPY $q0
|
||||
%1:_(s32) = G_CONSTANT i32 8
|
||||
%bv:_(<4 x s32>) = G_BUILD_VECTOR %1, %1, %1, %1
|
||||
%2:_(<4 x s32>) = G_UMULH %0, %bv(<4 x s32>)
|
||||
$q0 = COPY %2(<4 x s32>)
|
||||
...
|
||||
---
|
||||
name: mul_to_lshr_vector_out_of_range_shift
|
||||
alignment: 4
|
||||
tracksRegLiveness: true
|
||||
frameInfo:
|
||||
maxAlignment: 1
|
||||
machineFunctionInfo: {}
|
||||
body: |
|
||||
bb.0:
|
||||
liveins: $q0
|
||||
; CHECK-LABEL: name: mul_to_lshr_vector_out_of_range_shift
|
||||
; CHECK: liveins: $q0
|
||||
; CHECK-NEXT: {{ $}}
|
||||
; CHECK-NEXT: [[COPY:%[0-9]+]]:_(<4 x s32>) = COPY $q0
|
||||
; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 1
|
||||
; CHECK-NEXT: %bv:_(<4 x s32>) = G_BUILD_VECTOR [[C]](s32), [[C]](s32), [[C]](s32), [[C]](s32)
|
||||
; CHECK-NEXT: [[UMULH:%[0-9]+]]:_(<4 x s32>) = G_UMULH [[COPY]], %bv
|
||||
; CHECK-NEXT: $q0 = COPY [[UMULH]](<4 x s32>)
|
||||
%0:_(<4 x s32>) = COPY $q0
|
||||
%1:_(s32) = G_CONSTANT i32 1
|
||||
%bv:_(<4 x s32>) = G_BUILD_VECTOR %1, %1, %1, %1
|
||||
%2:_(<4 x s32>) = G_UMULH %0, %bv(<4 x s32>)
|
||||
$q0 = COPY %2(<4 x s32>)
|
||||
...
|
||||
---
|
||||
name: mul_to_lshr_out_of_range_shift
|
||||
alignment: 4
|
||||
tracksRegLiveness: true
|
||||
frameInfo:
|
||||
maxAlignment: 1
|
||||
machineFunctionInfo: {}
|
||||
body: |
|
||||
bb.0:
|
||||
liveins: $x0
|
||||
; CHECK-LABEL: name: mul_to_lshr_out_of_range_shift
|
||||
; CHECK: liveins: $x0
|
||||
; CHECK-NEXT: {{ $}}
|
||||
; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s64) = COPY $x0
|
||||
; CHECK-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 1
|
||||
; CHECK-NEXT: [[UMULH:%[0-9]+]]:_(s64) = G_UMULH [[COPY]], [[C]]
|
||||
; CHECK-NEXT: $x0 = COPY [[UMULH]](s64)
|
||||
%0:_(s64) = COPY $x0
|
||||
%1:_(s64) = G_CONSTANT i64 1
|
||||
%2:_(s64) = G_UMULH %0, %1(s64)
|
||||
$x0 = COPY %2(s64)
|
||||
...
|
|
@ -222,8 +222,7 @@ define i32 @v_udiv_i32_pow2k_denom(i32 %num) {
|
|||
; CHECK-LABEL: v_udiv_i32_pow2k_denom:
|
||||
; CHECK: ; %bb.0:
|
||||
; CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
||||
; CHECK-NEXT: v_mov_b32_e32 v1, 0x100000
|
||||
; CHECK-NEXT: v_mul_hi_u32 v0, v0, v1
|
||||
; CHECK-NEXT: v_lshrrev_b32_e32 v0, 12, v0
|
||||
; CHECK-NEXT: s_setpc_b64 s[30:31]
|
||||
%result = udiv i32 %num, 4096
|
||||
ret i32 %result
|
||||
|
@ -233,9 +232,8 @@ define <2 x i32> @v_udiv_v2i32_pow2k_denom(<2 x i32> %num) {
|
|||
; CHECK-LABEL: v_udiv_v2i32_pow2k_denom:
|
||||
; CHECK: ; %bb.0:
|
||||
; CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
||||
; CHECK-NEXT: s_mov_b32 s4, 0x100000
|
||||
; CHECK-NEXT: v_mul_hi_u32 v0, v0, s4
|
||||
; CHECK-NEXT: v_mul_hi_u32 v1, v1, s4
|
||||
; CHECK-NEXT: v_lshrrev_b32_e32 v0, 12, v0
|
||||
; CHECK-NEXT: v_lshrrev_b32_e32 v1, 12, v1
|
||||
; CHECK-NEXT: s_setpc_b64 s[30:31]
|
||||
%result = udiv <2 x i32> %num, <i32 4096, i32 4096>
|
||||
ret <2 x i32> %result
|
||||
|
@ -257,38 +255,21 @@ define i32 @v_udiv_i32_oddk_denom(i32 %num) {
|
|||
}
|
||||
|
||||
define <2 x i32> @v_udiv_v2i32_oddk_denom(<2 x i32> %num) {
|
||||
; GISEL-LABEL: v_udiv_v2i32_oddk_denom:
|
||||
; GISEL: ; %bb.0:
|
||||
; GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
||||
; GISEL-NEXT: s_mov_b32 s4, 0xb2a50881
|
||||
; GISEL-NEXT: s_brev_b32 s5, 1
|
||||
; GISEL-NEXT: v_mul_hi_u32 v2, v0, s4
|
||||
; GISEL-NEXT: v_mul_hi_u32 v3, v1, s4
|
||||
; GISEL-NEXT: v_sub_i32_e32 v0, vcc, v0, v2
|
||||
; GISEL-NEXT: v_sub_i32_e32 v1, vcc, v1, v3
|
||||
; GISEL-NEXT: v_mul_hi_u32 v0, v0, s5
|
||||
; GISEL-NEXT: v_mul_hi_u32 v1, v1, s5
|
||||
; GISEL-NEXT: v_add_i32_e32 v0, vcc, v0, v2
|
||||
; GISEL-NEXT: v_add_i32_e32 v1, vcc, v1, v3
|
||||
; GISEL-NEXT: v_lshrrev_b32_e32 v0, 20, v0
|
||||
; GISEL-NEXT: v_lshrrev_b32_e32 v1, 20, v1
|
||||
; GISEL-NEXT: s_setpc_b64 s[30:31]
|
||||
;
|
||||
; CGP-LABEL: v_udiv_v2i32_oddk_denom:
|
||||
; CGP: ; %bb.0:
|
||||
; CGP-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
||||
; CGP-NEXT: s_mov_b32 s4, 0xb2a50881
|
||||
; CGP-NEXT: v_mul_hi_u32 v2, v0, s4
|
||||
; CGP-NEXT: v_mul_hi_u32 v3, v1, s4
|
||||
; CGP-NEXT: v_sub_i32_e32 v0, vcc, v0, v2
|
||||
; CGP-NEXT: v_sub_i32_e32 v1, vcc, v1, v3
|
||||
; CGP-NEXT: v_lshrrev_b32_e32 v0, 1, v0
|
||||
; CGP-NEXT: v_lshrrev_b32_e32 v1, 1, v1
|
||||
; CGP-NEXT: v_add_i32_e32 v0, vcc, v0, v2
|
||||
; CGP-NEXT: v_add_i32_e32 v1, vcc, v1, v3
|
||||
; CGP-NEXT: v_lshrrev_b32_e32 v0, 20, v0
|
||||
; CGP-NEXT: v_lshrrev_b32_e32 v1, 20, v1
|
||||
; CGP-NEXT: s_setpc_b64 s[30:31]
|
||||
; CHECK-LABEL: v_udiv_v2i32_oddk_denom:
|
||||
; CHECK: ; %bb.0:
|
||||
; CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
||||
; CHECK-NEXT: s_mov_b32 s4, 0xb2a50881
|
||||
; CHECK-NEXT: v_mul_hi_u32 v2, v0, s4
|
||||
; CHECK-NEXT: v_mul_hi_u32 v3, v1, s4
|
||||
; CHECK-NEXT: v_sub_i32_e32 v0, vcc, v0, v2
|
||||
; CHECK-NEXT: v_sub_i32_e32 v1, vcc, v1, v3
|
||||
; CHECK-NEXT: v_lshrrev_b32_e32 v0, 1, v0
|
||||
; CHECK-NEXT: v_lshrrev_b32_e32 v1, 1, v1
|
||||
; CHECK-NEXT: v_add_i32_e32 v0, vcc, v0, v2
|
||||
; CHECK-NEXT: v_add_i32_e32 v1, vcc, v1, v3
|
||||
; CHECK-NEXT: v_lshrrev_b32_e32 v0, 20, v0
|
||||
; CHECK-NEXT: v_lshrrev_b32_e32 v1, 20, v1
|
||||
; CHECK-NEXT: s_setpc_b64 s[30:31]
|
||||
%result = udiv <2 x i32> %num, <i32 1235195, i32 1235195>
|
||||
ret <2 x i32> %result
|
||||
}
|
||||
|
|
|
@ -969,78 +969,28 @@ define i64 @v_udiv_i64_pow2k_denom(i64 %num) {
|
|||
; CHECK-LABEL: v_udiv_i64_pow2k_denom:
|
||||
; CHECK: ; %bb.0:
|
||||
; CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
||||
; CHECK-NEXT: s_mov_b32 s4, 0x100000
|
||||
; CHECK-NEXT: v_lshlrev_b32_e32 v2, 20, v0
|
||||
; CHECK-NEXT: v_mul_hi_u32 v3, v0, 0
|
||||
; CHECK-NEXT: v_lshlrev_b32_e32 v4, 20, v1
|
||||
; CHECK-NEXT: v_mul_hi_u32 v5, v1, 0
|
||||
; CHECK-NEXT: v_add_i32_e32 v2, vcc, 0, v2
|
||||
; CHECK-NEXT: v_cndmask_b32_e64 v6, 0, 1, vcc
|
||||
; CHECK-NEXT: v_mul_hi_u32 v0, v0, s4
|
||||
; CHECK-NEXT: v_add_i32_e32 v4, vcc, v4, v5
|
||||
; CHECK-NEXT: v_cndmask_b32_e64 v5, 0, 1, vcc
|
||||
; CHECK-NEXT: v_add_i32_e32 v2, vcc, v2, v3
|
||||
; CHECK-NEXT: v_cndmask_b32_e64 v2, 0, 1, vcc
|
||||
; CHECK-NEXT: v_add_i32_e32 v0, vcc, v4, v0
|
||||
; CHECK-NEXT: v_cndmask_b32_e64 v3, 0, 1, vcc
|
||||
; CHECK-NEXT: v_add_i32_e32 v2, vcc, v6, v2
|
||||
; CHECK-NEXT: v_add_i32_e32 v3, vcc, v5, v3
|
||||
; CHECK-NEXT: v_add_i32_e32 v0, vcc, v0, v2
|
||||
; CHECK-NEXT: v_cndmask_b32_e64 v2, 0, 1, vcc
|
||||
; CHECK-NEXT: v_add_i32_e32 v2, vcc, v3, v2
|
||||
; CHECK-NEXT: v_mul_hi_u32 v1, v1, s4
|
||||
; CHECK-NEXT: v_add_i32_e32 v1, vcc, v1, v2
|
||||
; CHECK-NEXT: v_lshr_b64 v[0:1], v[0:1], 12
|
||||
; CHECK-NEXT: s_setpc_b64 s[30:31]
|
||||
%result = udiv i64 %num, 4096
|
||||
ret i64 %result
|
||||
}
|
||||
|
||||
define <2 x i64> @v_udiv_v2i64_pow2k_denom(<2 x i64> %num) {
|
||||
; CHECK-LABEL: v_udiv_v2i64_pow2k_denom:
|
||||
; CHECK: ; %bb.0:
|
||||
; CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
||||
; CHECK-NEXT: s_mov_b32 s4, 0x100000
|
||||
; CHECK-NEXT: v_lshlrev_b32_e32 v4, 20, v0
|
||||
; CHECK-NEXT: v_mul_hi_u32 v5, v0, 0
|
||||
; CHECK-NEXT: v_lshlrev_b32_e32 v6, 20, v1
|
||||
; CHECK-NEXT: v_mul_hi_u32 v7, v1, 0
|
||||
; CHECK-NEXT: v_lshlrev_b32_e32 v8, 20, v2
|
||||
; CHECK-NEXT: v_mul_hi_u32 v9, v2, 0
|
||||
; CHECK-NEXT: v_lshlrev_b32_e32 v10, 20, v3
|
||||
; CHECK-NEXT: v_mul_hi_u32 v11, v3, 0
|
||||
; CHECK-NEXT: v_add_i32_e32 v4, vcc, 0, v4
|
||||
; CHECK-NEXT: v_cndmask_b32_e64 v12, 0, 1, vcc
|
||||
; CHECK-NEXT: v_mul_hi_u32 v0, v0, s4
|
||||
; CHECK-NEXT: v_add_i32_e32 v6, vcc, v6, v7
|
||||
; CHECK-NEXT: v_cndmask_b32_e64 v7, 0, 1, vcc
|
||||
; CHECK-NEXT: v_mul_hi_u32 v1, v1, s4
|
||||
; CHECK-NEXT: v_add_i32_e32 v8, vcc, 0, v8
|
||||
; CHECK-NEXT: v_cndmask_b32_e64 v13, 0, 1, vcc
|
||||
; CHECK-NEXT: v_mul_hi_u32 v2, v2, s4
|
||||
; CHECK-NEXT: v_add_i32_e32 v10, vcc, v10, v11
|
||||
; CHECK-NEXT: v_cndmask_b32_e64 v11, 0, 1, vcc
|
||||
; CHECK-NEXT: v_mul_hi_u32 v3, v3, s4
|
||||
; CHECK-NEXT: v_add_i32_e32 v4, vcc, v4, v5
|
||||
; CHECK-NEXT: v_cndmask_b32_e64 v4, 0, 1, vcc
|
||||
; CHECK-NEXT: v_add_i32_e32 v0, vcc, v6, v0
|
||||
; CHECK-NEXT: v_cndmask_b32_e64 v5, 0, 1, vcc
|
||||
; CHECK-NEXT: v_add_i32_e32 v6, vcc, v8, v9
|
||||
; CHECK-NEXT: v_cndmask_b32_e64 v6, 0, 1, vcc
|
||||
; CHECK-NEXT: v_add_i32_e32 v2, vcc, v10, v2
|
||||
; CHECK-NEXT: v_cndmask_b32_e64 v8, 0, 1, vcc
|
||||
; CHECK-NEXT: v_add_i32_e32 v4, vcc, v12, v4
|
||||
; CHECK-NEXT: v_add_i32_e32 v5, vcc, v7, v5
|
||||
; CHECK-NEXT: v_add_i32_e32 v6, vcc, v13, v6
|
||||
; CHECK-NEXT: v_add_i32_e32 v7, vcc, v11, v8
|
||||
; CHECK-NEXT: v_add_i32_e32 v0, vcc, v0, v4
|
||||
; CHECK-NEXT: v_cndmask_b32_e64 v4, 0, 1, vcc
|
||||
; CHECK-NEXT: v_add_i32_e32 v2, vcc, v2, v6
|
||||
; CHECK-NEXT: v_cndmask_b32_e64 v6, 0, 1, vcc
|
||||
; CHECK-NEXT: v_add_i32_e32 v4, vcc, v5, v4
|
||||
; CHECK-NEXT: v_add_i32_e32 v5, vcc, v7, v6
|
||||
; CHECK-NEXT: v_add_i32_e32 v1, vcc, v1, v4
|
||||
; CHECK-NEXT: v_add_i32_e32 v3, vcc, v3, v5
|
||||
; CHECK-NEXT: s_setpc_b64 s[30:31]
|
||||
; GISEL-LABEL: v_udiv_v2i64_pow2k_denom:
|
||||
; GISEL: ; %bb.0:
|
||||
; GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
||||
; GISEL-NEXT: s_sub_u32 s4, 63, 11
|
||||
; GISEL-NEXT: s_sub_u32 s4, 64, s4
|
||||
; GISEL-NEXT: v_lshr_b64 v[0:1], v[0:1], s4
|
||||
; GISEL-NEXT: v_lshr_b64 v[2:3], v[2:3], s4
|
||||
; GISEL-NEXT: s_setpc_b64 s[30:31]
|
||||
;
|
||||
; CGP-LABEL: v_udiv_v2i64_pow2k_denom:
|
||||
; CGP: ; %bb.0:
|
||||
; CGP-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
||||
; CGP-NEXT: v_lshr_b64 v[0:1], v[0:1], 12
|
||||
; CGP-NEXT: v_lshr_b64 v[2:3], v[2:3], 12
|
||||
; CGP-NEXT: s_setpc_b64 s[30:31]
|
||||
%result = udiv <2 x i64> %num, <i64 4096, i64 4096>
|
||||
ret <2 x i64> %result
|
||||
}
|
||||
|
|
|
@ -163,4 +163,46 @@ TEST_F(AArch64GISelMITest, TestCSEImmediateNextCSE) {
|
|||
EXPECT_TRUE(CSEB.getInsertPt() == CSEB.getMBB().end());
|
||||
}
|
||||
|
||||
TEST_F(AArch64GISelMITest, TestConstantFoldCTL) {
|
||||
setUp();
|
||||
if (!TM)
|
||||
return;
|
||||
|
||||
LLT s32 = LLT::scalar(32);
|
||||
|
||||
GISelCSEInfo CSEInfo;
|
||||
CSEInfo.setCSEConfig(std::make_unique<CSEConfigConstantOnly>());
|
||||
CSEInfo.analyze(*MF);
|
||||
B.setCSEInfo(&CSEInfo);
|
||||
CSEMIRBuilder CSEB(B.getState());
|
||||
auto Cst8 = CSEB.buildConstant(s32, 8);
|
||||
auto *CtlzDef = &*CSEB.buildCTLZ(s32, Cst8);
|
||||
EXPECT_TRUE(CtlzDef->getOpcode() == TargetOpcode::G_CONSTANT);
|
||||
EXPECT_TRUE(CtlzDef->getOperand(1).getCImm()->getZExtValue() == 28);
|
||||
|
||||
// Test vector.
|
||||
auto Cst16 = CSEB.buildConstant(s32, 16);
|
||||
auto Cst32 = CSEB.buildConstant(s32, 32);
|
||||
auto Cst64 = CSEB.buildConstant(s32, 64);
|
||||
LLT VecTy = LLT::fixed_vector(4, s32);
|
||||
auto BV = CSEB.buildBuildVector(VecTy, {Cst8.getReg(0), Cst16.getReg(0),
|
||||
Cst32.getReg(0), Cst64.getReg(0)});
|
||||
CSEB.buildCTLZ(VecTy, BV);
|
||||
|
||||
auto CheckStr = R"(
|
||||
; CHECK: [[CST8:%[0-9]+]]:_(s32) = G_CONSTANT i32 8
|
||||
; CHECK: [[CST28:%[0-9]+]]:_(s32) = G_CONSTANT i32 28
|
||||
; CHECK: [[CST16:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
|
||||
; CHECK: [[CST32:%[0-9]+]]:_(s32) = G_CONSTANT i32 32
|
||||
; CHECK: [[CST64:%[0-9]+]]:_(s32) = G_CONSTANT i32 64
|
||||
; CHECK: [[BV1:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[CST8]]:_(s32), [[CST16]]:_(s32), [[CST32]]:_(s32), [[CST64]]:_(s32)
|
||||
; CHECK: [[CST27:%[0-9]+]]:_(s32) = G_CONSTANT i32 27
|
||||
; CHECK: [[CST26:%[0-9]+]]:_(s32) = G_CONSTANT i32 26
|
||||
; CHECK: [[CST25:%[0-9]+]]:_(s32) = G_CONSTANT i32 25
|
||||
; CHECK: [[BV2:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[CST28]]:_(s32), [[CST27]]:_(s32), [[CST26]]:_(s32), [[CST25]]:_(s32)
|
||||
)";
|
||||
|
||||
EXPECT_TRUE(CheckMachineFunction(*MF, CheckStr)) << *MF;
|
||||
}
|
||||
|
||||
} // namespace
|
||||
|
|
Loading…
Reference in New Issue