forked from OSchip/llvm-project
[GlobalISel] Port the udiv -> mul by constant combine.
This is a straight port from the equivalent DAG combine. Differential Revision: https://reviews.llvm.org/D110890
This commit is contained in:
parent
e356027016
commit
8bfc0e06dc
|
@ -602,6 +602,14 @@ public:
|
|||
/// feeding a G_AND instruction \p MI.
|
||||
bool matchNarrowBinopFeedingAnd(MachineInstr &MI, BuildFnTy &MatchInfo);
|
||||
|
||||
/// Given an G_UDIV \p MI expressing a divide by constant, return an
|
||||
/// expression that implements it by multiplying by a magic number.
|
||||
/// Ref: "Hacker's Delight" or "The PowerPC Compiler Writer's Guide".
|
||||
MachineInstr *buildUDivUsingMul(MachineInstr &MI);
|
||||
/// Combine G_UDIV by constant into a multiply by magic constant.
|
||||
bool matchUDivByConst(MachineInstr &MI);
|
||||
void applyUDivByConst(MachineInstr &MI);
|
||||
|
||||
/// Try to transform \p MI by using all of the above
|
||||
/// combine functions. Returns true if changed.
|
||||
bool tryCombine(MachineInstr &MI);
|
||||
|
|
|
@ -397,6 +397,11 @@ bool isBuildVectorAllOnes(const MachineInstr &MI,
|
|||
Optional<RegOrConstant> getVectorSplat(const MachineInstr &MI,
|
||||
const MachineRegisterInfo &MRI);
|
||||
|
||||
/// Determines if \p MI defines a constant integer or a build vector of
|
||||
/// constant integers. Treats undef values as constants.
|
||||
bool isConstantOrConstantVector(MachineInstr &MI,
|
||||
const MachineRegisterInfo &MRI);
|
||||
|
||||
/// Determines if \p MI defines a constant integer or a splat vector of
|
||||
/// constant integers.
|
||||
/// \returns the scalar constant or None.
|
||||
|
|
|
@ -694,6 +694,15 @@ def bitfield_extract_from_shr : GICombineRule<
|
|||
def form_bitfield_extract : GICombineGroup<[bitfield_extract_from_sext_inreg,
|
||||
bitfield_extract_from_and,
|
||||
bitfield_extract_from_shr]>;
|
||||
|
||||
def udiv_by_const : GICombineRule<
|
||||
(defs root:$root),
|
||||
(match (wip_match_opcode G_UDIV):$root,
|
||||
[{ return Helper.matchUDivByConst(*${root}); }]),
|
||||
(apply [{ Helper.applyUDivByConst(*${root}); }])>;
|
||||
|
||||
def intdiv_combines : GICombineGroup<[udiv_by_const]>;
|
||||
|
||||
def reassoc_ptradd : GICombineRule<
|
||||
(defs root:$root, build_fn_matchinfo:$matchinfo),
|
||||
(match (wip_match_opcode G_PTR_ADD):$root,
|
||||
|
@ -761,7 +770,8 @@ def all_combines : GICombineGroup<[trivial_combines, insert_vec_elt_combines,
|
|||
const_combines, xor_of_and_with_same_reg, ptr_add_with_zero,
|
||||
shift_immed_chain, shift_of_shifted_logic_chain, load_or_combine,
|
||||
truncstore_merge, div_rem_to_divrem, funnel_shift_combines,
|
||||
form_bitfield_extract, constant_fold, fabs_fneg_fold]>;
|
||||
form_bitfield_extract, constant_fold, fabs_fneg_fold,
|
||||
intdiv_combines]>;
|
||||
|
||||
// A combine group used to for prelegalizer combiners at -O0. The combines in
|
||||
// this group have been selected based on experiments to balance code size and
|
||||
|
|
|
@ -30,6 +30,7 @@
|
|||
#include "llvm/CodeGen/TargetOpcodes.h"
|
||||
#include "llvm/IR/DataLayout.h"
|
||||
#include "llvm/Support/Casting.h"
|
||||
#include "llvm/Support/DivisionByConstantInfo.h"
|
||||
#include "llvm/Support/MathExtras.h"
|
||||
#include <tuple>
|
||||
|
||||
|
@ -4422,6 +4423,162 @@ bool CombinerHelper::matchMulOBy2(MachineInstr &MI, BuildFnTy &MatchInfo) {
|
|||
return true;
|
||||
}
|
||||
|
||||
MachineInstr *CombinerHelper::buildUDivUsingMul(MachineInstr &MI) {
|
||||
assert(MI.getOpcode() == TargetOpcode::G_UDIV);
|
||||
auto &UDiv = cast<GenericMachineInstr>(MI);
|
||||
Register Dst = UDiv.getReg(0);
|
||||
Register LHS = UDiv.getReg(1);
|
||||
Register RHS = UDiv.getReg(2);
|
||||
LLT Ty = MRI.getType(Dst);
|
||||
LLT ScalarTy = Ty.getScalarType();
|
||||
const unsigned EltBits = ScalarTy.getScalarSizeInBits();
|
||||
LLT ShiftAmtTy = getTargetLowering().getPreferredShiftAmountTy(Ty);
|
||||
LLT ScalarShiftAmtTy = ShiftAmtTy.getScalarType();
|
||||
auto &MIB = Builder;
|
||||
MIB.setInstrAndDebugLoc(MI);
|
||||
|
||||
bool UseNPQ = false;
|
||||
SmallVector<Register, 16> PreShifts, PostShifts, MagicFactors, NPQFactors;
|
||||
|
||||
auto BuildUDIVPattern = [&](const Constant *C) {
|
||||
auto *CI = cast<ConstantInt>(C);
|
||||
const APInt &Divisor = CI->getValue();
|
||||
UnsignedDivisonByConstantInfo magics =
|
||||
UnsignedDivisonByConstantInfo::get(Divisor);
|
||||
unsigned PreShift = 0, PostShift = 0;
|
||||
|
||||
// If the divisor is even, we can avoid using the expensive fixup by
|
||||
// shifting the divided value upfront.
|
||||
if (magics.IsAdd != 0 && !Divisor[0]) {
|
||||
PreShift = Divisor.countTrailingZeros();
|
||||
// Get magic number for the shifted divisor.
|
||||
magics =
|
||||
UnsignedDivisonByConstantInfo::get(Divisor.lshr(PreShift), PreShift);
|
||||
assert(magics.IsAdd == 0 && "Should use cheap fixup now");
|
||||
}
|
||||
|
||||
APInt Magic = magics.Magic;
|
||||
|
||||
unsigned SelNPQ;
|
||||
if (magics.IsAdd == 0 || Divisor.isOneValue()) {
|
||||
assert(magics.ShiftAmount < Divisor.getBitWidth() &&
|
||||
"We shouldn't generate an undefined shift!");
|
||||
PostShift = magics.ShiftAmount;
|
||||
SelNPQ = false;
|
||||
} else {
|
||||
PostShift = magics.ShiftAmount - 1;
|
||||
SelNPQ = true;
|
||||
}
|
||||
|
||||
PreShifts.push_back(
|
||||
MIB.buildConstant(ScalarShiftAmtTy, PreShift).getReg(0));
|
||||
MagicFactors.push_back(MIB.buildConstant(ScalarTy, Magic).getReg(0));
|
||||
NPQFactors.push_back(
|
||||
MIB.buildConstant(ScalarTy,
|
||||
SelNPQ ? APInt::getOneBitSet(EltBits, EltBits - 1)
|
||||
: APInt::getZero(EltBits))
|
||||
.getReg(0));
|
||||
PostShifts.push_back(
|
||||
MIB.buildConstant(ScalarShiftAmtTy, PostShift).getReg(0));
|
||||
UseNPQ |= SelNPQ;
|
||||
return true;
|
||||
};
|
||||
|
||||
// Collect the shifts/magic values from each element.
|
||||
bool Matched = matchUnaryPredicate(MRI, RHS, BuildUDIVPattern);
|
||||
(void)Matched;
|
||||
assert(Matched && "Expected unary predicate match to succeed");
|
||||
|
||||
Register PreShift, PostShift, MagicFactor, NPQFactor;
|
||||
auto *RHSDef = getOpcodeDef<GBuildVector>(RHS, MRI);
|
||||
if (RHSDef) {
|
||||
PreShift = MIB.buildBuildVector(ShiftAmtTy, PreShifts).getReg(0);
|
||||
MagicFactor = MIB.buildBuildVector(Ty, MagicFactors).getReg(0);
|
||||
NPQFactor = MIB.buildBuildVector(Ty, NPQFactors).getReg(0);
|
||||
PostShift = MIB.buildBuildVector(ShiftAmtTy, PostShifts).getReg(0);
|
||||
} else {
|
||||
assert(MRI.getType(RHS).isScalar() &&
|
||||
"Non-build_vector operation should have been a scalar");
|
||||
PreShift = PreShifts[0];
|
||||
MagicFactor = MagicFactors[0];
|
||||
PostShift = PostShifts[0];
|
||||
}
|
||||
|
||||
Register Q = LHS;
|
||||
Q = MIB.buildLShr(Ty, Q, PreShift).getReg(0);
|
||||
|
||||
// Multiply the numerator (operand 0) by the magic value.
|
||||
Q = MIB.buildUMulH(Ty, Q, MagicFactor).getReg(0);
|
||||
|
||||
if (UseNPQ) {
|
||||
Register NPQ = MIB.buildSub(Ty, LHS, Q).getReg(0);
|
||||
|
||||
// For vectors we might have a mix of non-NPQ/NPQ paths, so use
|
||||
// G_UMULH to act as a SRL-by-1 for NPQ, else multiply by zero.
|
||||
if (Ty.isVector())
|
||||
NPQ = MIB.buildUMulH(Ty, NPQ, NPQFactor).getReg(0);
|
||||
else
|
||||
NPQ = MIB.buildLShr(Ty, NPQ, MIB.buildConstant(ShiftAmtTy, 1)).getReg(0);
|
||||
|
||||
Q = MIB.buildAdd(Ty, NPQ, Q).getReg(0);
|
||||
}
|
||||
|
||||
Q = MIB.buildLShr(Ty, Q, PostShift).getReg(0);
|
||||
auto One = MIB.buildConstant(Ty, 1);
|
||||
auto IsOne = MIB.buildICmp(
|
||||
CmpInst::Predicate::ICMP_EQ,
|
||||
Ty.isScalar() ? LLT::scalar(1) : Ty.changeElementSize(1), RHS, One);
|
||||
return MIB.buildSelect(Ty, IsOne, LHS, Q);
|
||||
}
|
||||
|
||||
bool CombinerHelper::matchUDivByConst(MachineInstr &MI) {
|
||||
assert(MI.getOpcode() == TargetOpcode::G_UDIV);
|
||||
Register Dst = MI.getOperand(0).getReg();
|
||||
Register RHS = MI.getOperand(2).getReg();
|
||||
LLT DstTy = MRI.getType(Dst);
|
||||
auto *RHSDef = MRI.getVRegDef(RHS);
|
||||
if (!isConstantOrConstantVector(*RHSDef, MRI))
|
||||
return false;
|
||||
|
||||
auto &MF = *MI.getMF();
|
||||
AttributeList Attr = MF.getFunction().getAttributes();
|
||||
const auto &TLI = getTargetLowering();
|
||||
LLVMContext &Ctx = MF.getFunction().getContext();
|
||||
auto &DL = MF.getDataLayout();
|
||||
if (TLI.isIntDivCheap(getApproximateEVTForLLT(DstTy, DL, Ctx), Attr))
|
||||
return false;
|
||||
|
||||
// Don't do this for minsize because the instruction sequence is usually
|
||||
// larger.
|
||||
if (MF.getFunction().hasMinSize())
|
||||
return false;
|
||||
|
||||
// Don't do this if the types are not going to be legal.
|
||||
if (LI) {
|
||||
if (!isLegalOrBeforeLegalizer({TargetOpcode::G_MUL, {DstTy, DstTy}}))
|
||||
return false;
|
||||
if (!isLegalOrBeforeLegalizer({TargetOpcode::G_UMULH, {DstTy}}))
|
||||
return false;
|
||||
if (!isLegalOrBeforeLegalizer(
|
||||
{TargetOpcode::G_ICMP,
|
||||
{DstTy.isVector() ? DstTy.changeElementSize(1) : LLT::scalar(1),
|
||||
DstTy}}))
|
||||
return false;
|
||||
}
|
||||
|
||||
auto CheckEltValue = [&](const Constant *C) {
|
||||
if (auto *CI = dyn_cast_or_null<ConstantInt>(C))
|
||||
return !CI->isZero();
|
||||
return false;
|
||||
};
|
||||
return matchUnaryPredicate(MRI, RHS, CheckEltValue);
|
||||
}
|
||||
|
||||
void CombinerHelper::applyUDivByConst(MachineInstr &MI) {
|
||||
auto *NewMI = buildUDivUsingMul(MI);
|
||||
replaceSingleDefInstWithReg(MI, NewMI->getOperand(0).getReg());
|
||||
}
|
||||
|
||||
bool CombinerHelper::tryCombine(MachineInstr &MI) {
|
||||
if (tryCombineCopy(MI))
|
||||
return true;
|
||||
|
|
|
@ -1016,6 +1016,23 @@ Optional<RegOrConstant> llvm::getVectorSplat(const MachineInstr &MI,
|
|||
return RegOrConstant(Reg);
|
||||
}
|
||||
|
||||
bool llvm::isConstantOrConstantVector(MachineInstr &MI,
|
||||
const MachineRegisterInfo &MRI) {
|
||||
Register Def = MI.getOperand(0).getReg();
|
||||
if (auto C = getIConstantVRegValWithLookThrough(Def, MRI))
|
||||
return true;
|
||||
GBuildVector *BV = dyn_cast<GBuildVector>(&MI);
|
||||
if (!BV)
|
||||
return false;
|
||||
for (unsigned SrcIdx = 0; SrcIdx < BV->getNumSources(); ++SrcIdx) {
|
||||
if (getIConstantVRegValWithLookThrough(BV->getSourceReg(SrcIdx), MRI) ||
|
||||
getOpcodeDef<GImplicitDef>(BV->getSourceReg(SrcIdx), MRI))
|
||||
continue;
|
||||
return false;
|
||||
}
|
||||
return true;
|
||||
}
|
||||
|
||||
Optional<APInt>
|
||||
llvm::isConstantOrConstantSplatVector(MachineInstr &MI,
|
||||
const MachineRegisterInfo &MRI) {
|
||||
|
|
|
@ -0,0 +1,287 @@
|
|||
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
|
||||
; RUN: llc < %s -mtriple=aarch64-unknown-unknown | FileCheck %s --check-prefixes=SDAG
|
||||
; RUN: llc < %s -mtriple=aarch64-unknown-unknown -global-isel | FileCheck %s --check-prefixes=GISEL
|
||||
|
||||
; These tests are taken from the combine-udiv.ll in X86.
|
||||
define <8 x i16> @combine_vec_udiv_uniform(<8 x i16> %x) {
|
||||
; SDAG-LABEL: combine_vec_udiv_uniform:
|
||||
; SDAG: // %bb.0:
|
||||
; SDAG-NEXT: mov w8, #25645
|
||||
; SDAG-NEXT: dup v1.8h, w8
|
||||
; SDAG-NEXT: umull2 v2.4s, v0.8h, v1.8h
|
||||
; SDAG-NEXT: umull v1.4s, v0.4h, v1.4h
|
||||
; SDAG-NEXT: uzp2 v1.8h, v1.8h, v2.8h
|
||||
; SDAG-NEXT: sub v0.8h, v0.8h, v1.8h
|
||||
; SDAG-NEXT: usra v1.8h, v0.8h, #1
|
||||
; SDAG-NEXT: ushr v0.8h, v1.8h, #4
|
||||
; SDAG-NEXT: ret
|
||||
;
|
||||
; GISEL-LABEL: combine_vec_udiv_uniform:
|
||||
; GISEL: // %bb.0:
|
||||
; GISEL-NEXT: adrp x8, .LCPI0_1
|
||||
; GISEL-NEXT: ldr q1, [x8, :lo12:.LCPI0_1]
|
||||
; GISEL-NEXT: adrp x8, .LCPI0_0
|
||||
; GISEL-NEXT: ldr q2, [x8, :lo12:.LCPI0_0]
|
||||
; GISEL-NEXT: umull2 v3.4s, v0.8h, v1.8h
|
||||
; GISEL-NEXT: umull v1.4s, v0.4h, v1.4h
|
||||
; GISEL-NEXT: uzp2 v1.8h, v1.8h, v3.8h
|
||||
; GISEL-NEXT: sub v0.8h, v0.8h, v1.8h
|
||||
; GISEL-NEXT: umull2 v3.4s, v0.8h, v2.8h
|
||||
; GISEL-NEXT: umull v0.4s, v0.4h, v2.4h
|
||||
; GISEL-NEXT: uzp2 v0.8h, v0.8h, v3.8h
|
||||
; GISEL-NEXT: add v0.8h, v0.8h, v1.8h
|
||||
; GISEL-NEXT: ushr v0.8h, v0.8h, #4
|
||||
; GISEL-NEXT: ret
|
||||
%1 = udiv <8 x i16> %x, <i16 23, i16 23, i16 23, i16 23, i16 23, i16 23, i16 23, i16 23>
|
||||
ret <8 x i16> %1
|
||||
}
|
||||
|
||||
define <8 x i16> @combine_vec_udiv_nonuniform(<8 x i16> %x) {
|
||||
; SDAG-LABEL: combine_vec_udiv_nonuniform:
|
||||
; SDAG: // %bb.0:
|
||||
; SDAG-NEXT: adrp x8, .LCPI1_0
|
||||
; SDAG-NEXT: ldr q1, [x8, :lo12:.LCPI1_0]
|
||||
; SDAG-NEXT: adrp x8, .LCPI1_1
|
||||
; SDAG-NEXT: ldr q2, [x8, :lo12:.LCPI1_1]
|
||||
; SDAG-NEXT: adrp x8, .LCPI1_2
|
||||
; SDAG-NEXT: ldr q3, [x8, :lo12:.LCPI1_2]
|
||||
; SDAG-NEXT: ushl v1.8h, v0.8h, v1.8h
|
||||
; SDAG-NEXT: umull2 v4.4s, v1.8h, v2.8h
|
||||
; SDAG-NEXT: umull v1.4s, v1.4h, v2.4h
|
||||
; SDAG-NEXT: adrp x8, .LCPI1_3
|
||||
; SDAG-NEXT: uzp2 v1.8h, v1.8h, v4.8h
|
||||
; SDAG-NEXT: ldr q2, [x8, :lo12:.LCPI1_3]
|
||||
; SDAG-NEXT: sub v0.8h, v0.8h, v1.8h
|
||||
; SDAG-NEXT: umull2 v4.4s, v0.8h, v3.8h
|
||||
; SDAG-NEXT: umull v0.4s, v0.4h, v3.4h
|
||||
; SDAG-NEXT: uzp2 v0.8h, v0.8h, v4.8h
|
||||
; SDAG-NEXT: add v0.8h, v0.8h, v1.8h
|
||||
; SDAG-NEXT: ushl v0.8h, v0.8h, v2.8h
|
||||
; SDAG-NEXT: ret
|
||||
;
|
||||
; GISEL-LABEL: combine_vec_udiv_nonuniform:
|
||||
; GISEL: // %bb.0:
|
||||
; GISEL-NEXT: adrp x8, .LCPI1_5
|
||||
; GISEL-NEXT: ldr q1, [x8, :lo12:.LCPI1_5]
|
||||
; GISEL-NEXT: adrp x8, .LCPI1_4
|
||||
; GISEL-NEXT: ldr q2, [x8, :lo12:.LCPI1_4]
|
||||
; GISEL-NEXT: adrp x8, .LCPI1_3
|
||||
; GISEL-NEXT: ldr q3, [x8, :lo12:.LCPI1_3]
|
||||
; GISEL-NEXT: adrp x8, .LCPI1_1
|
||||
; GISEL-NEXT: ldr q4, [x8, :lo12:.LCPI1_1]
|
||||
; GISEL-NEXT: adrp x8, .LCPI1_0
|
||||
; GISEL-NEXT: ldr q5, [x8, :lo12:.LCPI1_0]
|
||||
; GISEL-NEXT: adrp x8, .LCPI1_2
|
||||
; GISEL-NEXT: neg v2.8h, v2.8h
|
||||
; GISEL-NEXT: ldr q6, [x8, :lo12:.LCPI1_2]
|
||||
; GISEL-NEXT: ushl v2.8h, v0.8h, v2.8h
|
||||
; GISEL-NEXT: cmeq v1.8h, v1.8h, v5.8h
|
||||
; GISEL-NEXT: umull2 v5.4s, v2.8h, v3.8h
|
||||
; GISEL-NEXT: umull v2.4s, v2.4h, v3.4h
|
||||
; GISEL-NEXT: uzp2 v2.8h, v2.8h, v5.8h
|
||||
; GISEL-NEXT: sub v3.8h, v0.8h, v2.8h
|
||||
; GISEL-NEXT: umull2 v5.4s, v3.8h, v6.8h
|
||||
; GISEL-NEXT: umull v3.4s, v3.4h, v6.4h
|
||||
; GISEL-NEXT: uzp2 v3.8h, v3.8h, v5.8h
|
||||
; GISEL-NEXT: neg v4.8h, v4.8h
|
||||
; GISEL-NEXT: shl v1.8h, v1.8h, #15
|
||||
; GISEL-NEXT: add v2.8h, v3.8h, v2.8h
|
||||
; GISEL-NEXT: ushl v2.8h, v2.8h, v4.8h
|
||||
; GISEL-NEXT: sshr v1.8h, v1.8h, #15
|
||||
; GISEL-NEXT: bif v0.16b, v2.16b, v1.16b
|
||||
; GISEL-NEXT: ret
|
||||
%1 = udiv <8 x i16> %x, <i16 23, i16 34, i16 -23, i16 56, i16 128, i16 -1, i16 -256, i16 -32768>
|
||||
ret <8 x i16> %1
|
||||
}
|
||||
|
||||
define <8 x i16> @combine_vec_udiv_nonuniform2(<8 x i16> %x) {
|
||||
; SDAG-LABEL: combine_vec_udiv_nonuniform2:
|
||||
; SDAG: // %bb.0:
|
||||
; SDAG-NEXT: adrp x8, .LCPI2_0
|
||||
; SDAG-NEXT: adrp x9, .LCPI2_1
|
||||
; SDAG-NEXT: ldr q1, [x8, :lo12:.LCPI2_0]
|
||||
; SDAG-NEXT: ldr q2, [x9, :lo12:.LCPI2_1]
|
||||
; SDAG-NEXT: adrp x8, .LCPI2_2
|
||||
; SDAG-NEXT: ldr q3, [x8, :lo12:.LCPI2_2]
|
||||
; SDAG-NEXT: ushl v0.8h, v0.8h, v1.8h
|
||||
; SDAG-NEXT: umull2 v1.4s, v0.8h, v2.8h
|
||||
; SDAG-NEXT: umull v0.4s, v0.4h, v2.4h
|
||||
; SDAG-NEXT: uzp2 v0.8h, v0.8h, v1.8h
|
||||
; SDAG-NEXT: ushl v0.8h, v0.8h, v3.8h
|
||||
; SDAG-NEXT: ret
|
||||
;
|
||||
; GISEL-LABEL: combine_vec_udiv_nonuniform2:
|
||||
; GISEL: // %bb.0:
|
||||
; GISEL-NEXT: adrp x8, .LCPI2_4
|
||||
; GISEL-NEXT: ldr q1, [x8, :lo12:.LCPI2_4]
|
||||
; GISEL-NEXT: adrp x8, .LCPI2_3
|
||||
; GISEL-NEXT: ldr q2, [x8, :lo12:.LCPI2_3]
|
||||
; GISEL-NEXT: adrp x8, .LCPI2_1
|
||||
; GISEL-NEXT: ldr q3, [x8, :lo12:.LCPI2_1]
|
||||
; GISEL-NEXT: adrp x8, .LCPI2_0
|
||||
; GISEL-NEXT: ldr q4, [x8, :lo12:.LCPI2_0]
|
||||
; GISEL-NEXT: adrp x8, .LCPI2_2
|
||||
; GISEL-NEXT: ldr q5, [x8, :lo12:.LCPI2_2]
|
||||
; GISEL-NEXT: neg v2.8h, v2.8h
|
||||
; GISEL-NEXT: ushl v2.8h, v0.8h, v2.8h
|
||||
; GISEL-NEXT: cmeq v1.8h, v1.8h, v4.8h
|
||||
; GISEL-NEXT: umull2 v4.4s, v2.8h, v5.8h
|
||||
; GISEL-NEXT: umull v2.4s, v2.4h, v5.4h
|
||||
; GISEL-NEXT: neg v3.8h, v3.8h
|
||||
; GISEL-NEXT: shl v1.8h, v1.8h, #15
|
||||
; GISEL-NEXT: uzp2 v2.8h, v2.8h, v4.8h
|
||||
; GISEL-NEXT: ushl v2.8h, v2.8h, v3.8h
|
||||
; GISEL-NEXT: sshr v1.8h, v1.8h, #15
|
||||
; GISEL-NEXT: bif v0.16b, v2.16b, v1.16b
|
||||
; GISEL-NEXT: ret
|
||||
%1 = udiv <8 x i16> %x, <i16 -34, i16 35, i16 36, i16 -37, i16 38, i16 -39, i16 40, i16 -41>
|
||||
ret <8 x i16> %1
|
||||
}
|
||||
|
||||
define <8 x i16> @combine_vec_udiv_nonuniform3(<8 x i16> %x) {
|
||||
; SDAG-LABEL: combine_vec_udiv_nonuniform3:
|
||||
; SDAG: // %bb.0:
|
||||
; SDAG-NEXT: adrp x8, .LCPI3_0
|
||||
; SDAG-NEXT: ldr q1, [x8, :lo12:.LCPI3_0]
|
||||
; SDAG-NEXT: adrp x8, .LCPI3_1
|
||||
; SDAG-NEXT: ldr q3, [x8, :lo12:.LCPI3_1]
|
||||
; SDAG-NEXT: umull2 v2.4s, v0.8h, v1.8h
|
||||
; SDAG-NEXT: umull v1.4s, v0.4h, v1.4h
|
||||
; SDAG-NEXT: uzp2 v1.8h, v1.8h, v2.8h
|
||||
; SDAG-NEXT: sub v0.8h, v0.8h, v1.8h
|
||||
; SDAG-NEXT: usra v1.8h, v0.8h, #1
|
||||
; SDAG-NEXT: ushl v0.8h, v1.8h, v3.8h
|
||||
; SDAG-NEXT: ret
|
||||
;
|
||||
; GISEL-LABEL: combine_vec_udiv_nonuniform3:
|
||||
; GISEL: // %bb.0:
|
||||
; GISEL-NEXT: adrp x8, .LCPI3_4
|
||||
; GISEL-NEXT: ldr q1, [x8, :lo12:.LCPI3_4]
|
||||
; GISEL-NEXT: adrp x8, .LCPI3_3
|
||||
; GISEL-NEXT: ldr q2, [x8, :lo12:.LCPI3_3]
|
||||
; GISEL-NEXT: adrp x8, .LCPI3_2
|
||||
; GISEL-NEXT: ldr q3, [x8, :lo12:.LCPI3_2]
|
||||
; GISEL-NEXT: adrp x8, .LCPI3_1
|
||||
; GISEL-NEXT: ldr q4, [x8, :lo12:.LCPI3_1]
|
||||
; GISEL-NEXT: adrp x8, .LCPI3_0
|
||||
; GISEL-NEXT: ldr q5, [x8, :lo12:.LCPI3_0]
|
||||
; GISEL-NEXT: umull2 v6.4s, v0.8h, v2.8h
|
||||
; GISEL-NEXT: umull v2.4s, v0.4h, v2.4h
|
||||
; GISEL-NEXT: uzp2 v2.8h, v2.8h, v6.8h
|
||||
; GISEL-NEXT: cmeq v1.8h, v1.8h, v5.8h
|
||||
; GISEL-NEXT: sub v5.8h, v0.8h, v2.8h
|
||||
; GISEL-NEXT: umull2 v6.4s, v5.8h, v3.8h
|
||||
; GISEL-NEXT: umull v3.4s, v5.4h, v3.4h
|
||||
; GISEL-NEXT: uzp2 v3.8h, v3.8h, v6.8h
|
||||
; GISEL-NEXT: neg v4.8h, v4.8h
|
||||
; GISEL-NEXT: shl v1.8h, v1.8h, #15
|
||||
; GISEL-NEXT: add v2.8h, v3.8h, v2.8h
|
||||
; GISEL-NEXT: ushl v2.8h, v2.8h, v4.8h
|
||||
; GISEL-NEXT: sshr v1.8h, v1.8h, #15
|
||||
; GISEL-NEXT: bif v0.16b, v2.16b, v1.16b
|
||||
; GISEL-NEXT: ret
|
||||
%1 = udiv <8 x i16> %x, <i16 7, i16 23, i16 25, i16 27, i16 31, i16 47, i16 63, i16 127>
|
||||
ret <8 x i16> %1
|
||||
}
|
||||
|
||||
define <16 x i8> @combine_vec_udiv_nonuniform4(<16 x i8> %x) {
|
||||
; SDAG-LABEL: combine_vec_udiv_nonuniform4:
|
||||
; SDAG: // %bb.0:
|
||||
; SDAG-NEXT: adrp x8, .LCPI4_0
|
||||
; SDAG-NEXT: ldr q1, [x8, :lo12:.LCPI4_0]
|
||||
; SDAG-NEXT: adrp x8, .LCPI4_1
|
||||
; SDAG-NEXT: ldr q2, [x8, :lo12:.LCPI4_1]
|
||||
; SDAG-NEXT: adrp x8, .LCPI4_2
|
||||
; SDAG-NEXT: ldr q3, [x8, :lo12:.LCPI4_2]
|
||||
; SDAG-NEXT: adrp x8, .LCPI4_3
|
||||
; SDAG-NEXT: ldr q4, [x8, :lo12:.LCPI4_3]
|
||||
; SDAG-NEXT: umull2 v5.8h, v0.16b, v1.16b
|
||||
; SDAG-NEXT: umull v1.8h, v0.8b, v1.8b
|
||||
; SDAG-NEXT: uzp2 v1.16b, v1.16b, v5.16b
|
||||
; SDAG-NEXT: ushl v1.16b, v1.16b, v2.16b
|
||||
; SDAG-NEXT: and v1.16b, v1.16b, v3.16b
|
||||
; SDAG-NEXT: and v0.16b, v0.16b, v4.16b
|
||||
; SDAG-NEXT: orr v0.16b, v0.16b, v1.16b
|
||||
; SDAG-NEXT: ret
|
||||
;
|
||||
; GISEL-LABEL: combine_vec_udiv_nonuniform4:
|
||||
; GISEL: // %bb.0:
|
||||
; GISEL-NEXT: adrp x8, .LCPI4_3
|
||||
; GISEL-NEXT: ldr q1, [x8, :lo12:.LCPI4_3]
|
||||
; GISEL-NEXT: adrp x8, .LCPI4_0
|
||||
; GISEL-NEXT: ldr q2, [x8, :lo12:.LCPI4_0]
|
||||
; GISEL-NEXT: adrp x8, .LCPI4_2
|
||||
; GISEL-NEXT: ldr q3, [x8, :lo12:.LCPI4_2]
|
||||
; GISEL-NEXT: adrp x8, .LCPI4_1
|
||||
; GISEL-NEXT: ldr q4, [x8, :lo12:.LCPI4_1]
|
||||
; GISEL-NEXT: cmeq v1.16b, v1.16b, v2.16b
|
||||
; GISEL-NEXT: umull2 v2.8h, v0.16b, v3.16b
|
||||
; GISEL-NEXT: umull v3.8h, v0.8b, v3.8b
|
||||
; GISEL-NEXT: neg v4.16b, v4.16b
|
||||
; GISEL-NEXT: uzp2 v2.16b, v3.16b, v2.16b
|
||||
; GISEL-NEXT: shl v1.16b, v1.16b, #7
|
||||
; GISEL-NEXT: ushl v2.16b, v2.16b, v4.16b
|
||||
; GISEL-NEXT: sshr v1.16b, v1.16b, #7
|
||||
; GISEL-NEXT: bif v0.16b, v2.16b, v1.16b
|
||||
; GISEL-NEXT: ret
|
||||
%div = udiv <16 x i8> %x, <i8 -64, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1>
|
||||
ret <16 x i8> %div
|
||||
}
|
||||
|
||||
define <8 x i16> @pr38477(<8 x i16> %a0) {
|
||||
; SDAG-LABEL: pr38477:
|
||||
; SDAG: // %bb.0:
|
||||
; SDAG-NEXT: adrp x8, .LCPI5_0
|
||||
; SDAG-NEXT: ldr q1, [x8, :lo12:.LCPI5_0]
|
||||
; SDAG-NEXT: adrp x8, .LCPI5_1
|
||||
; SDAG-NEXT: ldr q2, [x8, :lo12:.LCPI5_1]
|
||||
; SDAG-NEXT: adrp x8, .LCPI5_2
|
||||
; SDAG-NEXT: umull2 v4.4s, v0.8h, v1.8h
|
||||
; SDAG-NEXT: umull v1.4s, v0.4h, v1.4h
|
||||
; SDAG-NEXT: uzp2 v1.8h, v1.8h, v4.8h
|
||||
; SDAG-NEXT: ldr q3, [x8, :lo12:.LCPI5_2]
|
||||
; SDAG-NEXT: adrp x8, .LCPI5_3
|
||||
; SDAG-NEXT: sub v4.8h, v0.8h, v1.8h
|
||||
; SDAG-NEXT: umull2 v5.4s, v4.8h, v2.8h
|
||||
; SDAG-NEXT: umull v2.4s, v4.4h, v2.4h
|
||||
; SDAG-NEXT: ldr q4, [x8, :lo12:.LCPI5_3]
|
||||
; SDAG-NEXT: adrp x8, .LCPI5_4
|
||||
; SDAG-NEXT: uzp2 v2.8h, v2.8h, v5.8h
|
||||
; SDAG-NEXT: ldr q5, [x8, :lo12:.LCPI5_4]
|
||||
; SDAG-NEXT: add v1.8h, v2.8h, v1.8h
|
||||
; SDAG-NEXT: ushl v1.8h, v1.8h, v3.8h
|
||||
; SDAG-NEXT: and v1.16b, v1.16b, v4.16b
|
||||
; SDAG-NEXT: and v0.16b, v0.16b, v5.16b
|
||||
; SDAG-NEXT: orr v0.16b, v0.16b, v1.16b
|
||||
; SDAG-NEXT: ret
|
||||
;
|
||||
; GISEL-LABEL: pr38477:
|
||||
; GISEL: // %bb.0:
|
||||
; GISEL-NEXT: adrp x8, .LCPI5_4
|
||||
; GISEL-NEXT: ldr q1, [x8, :lo12:.LCPI5_4]
|
||||
; GISEL-NEXT: adrp x8, .LCPI5_3
|
||||
; GISEL-NEXT: ldr q2, [x8, :lo12:.LCPI5_3]
|
||||
; GISEL-NEXT: adrp x8, .LCPI5_2
|
||||
; GISEL-NEXT: ldr q3, [x8, :lo12:.LCPI5_2]
|
||||
; GISEL-NEXT: adrp x8, .LCPI5_1
|
||||
; GISEL-NEXT: ldr q4, [x8, :lo12:.LCPI5_1]
|
||||
; GISEL-NEXT: adrp x8, .LCPI5_0
|
||||
; GISEL-NEXT: ldr q5, [x8, :lo12:.LCPI5_0]
|
||||
; GISEL-NEXT: umull2 v6.4s, v0.8h, v2.8h
|
||||
; GISEL-NEXT: umull v2.4s, v0.4h, v2.4h
|
||||
; GISEL-NEXT: uzp2 v2.8h, v2.8h, v6.8h
|
||||
; GISEL-NEXT: cmeq v1.8h, v1.8h, v5.8h
|
||||
; GISEL-NEXT: sub v5.8h, v0.8h, v2.8h
|
||||
; GISEL-NEXT: umull2 v6.4s, v5.8h, v3.8h
|
||||
; GISEL-NEXT: umull v3.4s, v5.4h, v3.4h
|
||||
; GISEL-NEXT: uzp2 v3.8h, v3.8h, v6.8h
|
||||
; GISEL-NEXT: neg v4.8h, v4.8h
|
||||
; GISEL-NEXT: shl v1.8h, v1.8h, #15
|
||||
; GISEL-NEXT: add v2.8h, v3.8h, v2.8h
|
||||
; GISEL-NEXT: ushl v2.8h, v2.8h, v4.8h
|
||||
; GISEL-NEXT: sshr v1.8h, v1.8h, #15
|
||||
; GISEL-NEXT: bif v0.16b, v2.16b, v1.16b
|
||||
; GISEL-NEXT: ret
|
||||
%1 = udiv <8 x i16> %a0, <i16 1, i16 119, i16 73, i16 -111, i16 -3, i16 118, i16 32, i16 31>
|
||||
ret <8 x i16> %1
|
||||
}
|
|
@ -0,0 +1,353 @@
|
|||
# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py
|
||||
# RUN: llc -o - -mtriple=aarch64-unknown-unknown -run-pass=aarch64-prelegalizer-combiner -verify-machineinstrs %s | FileCheck %s
|
||||
---
|
||||
name: udiv_by_scalar_const
|
||||
body: |
|
||||
bb.1:
|
||||
liveins: $w0
|
||||
; CHECK-LABEL: name: udiv_by_scalar_const
|
||||
; CHECK: [[COPY:%[0-9]+]]:_(s32) = COPY $w0
|
||||
; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 1
|
||||
; CHECK-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 818089009
|
||||
; CHECK-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 2
|
||||
; CHECK-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[COPY]], [[C]](s32)
|
||||
; CHECK-NEXT: [[UMULH:%[0-9]+]]:_(s32) = G_UMULH [[LSHR]], [[C1]]
|
||||
; CHECK-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[UMULH]], [[C2]](s32)
|
||||
; CHECK-NEXT: $w0 = COPY [[LSHR1]](s32)
|
||||
%0:_(s32) = COPY $w0
|
||||
%cst:_(s32) = G_CONSTANT i32 42
|
||||
%2:_(s32) = G_UDIV %0(s32), %cst(s32)
|
||||
$w0 = COPY %2(s32)
|
||||
...
|
||||
---
|
||||
name: combine_vec_udiv_uniform
|
||||
alignment: 4
|
||||
tracksRegLiveness: true
|
||||
liveins:
|
||||
- { reg: '$q0' }
|
||||
body: |
|
||||
bb.1:
|
||||
liveins: $q0
|
||||
|
||||
; CHECK-LABEL: name: combine_vec_udiv_uniform
|
||||
; CHECK: liveins: $q0
|
||||
; CHECK-NEXT: {{ $}}
|
||||
; CHECK-NEXT: [[COPY:%[0-9]+]]:_(<8 x s16>) = COPY $q0
|
||||
; CHECK-NEXT: [[C:%[0-9]+]]:_(s16) = G_CONSTANT i16 25645
|
||||
; CHECK-NEXT: [[C1:%[0-9]+]]:_(s16) = G_CONSTANT i16 -32768
|
||||
; CHECK-NEXT: [[C2:%[0-9]+]]:_(s16) = G_CONSTANT i16 4
|
||||
; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s16>) = G_BUILD_VECTOR [[C]](s16), [[C]](s16), [[C]](s16), [[C]](s16), [[C]](s16), [[C]](s16), [[C]](s16), [[C]](s16)
|
||||
; CHECK-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<8 x s16>) = G_BUILD_VECTOR [[C1]](s16), [[C1]](s16), [[C1]](s16), [[C1]](s16), [[C1]](s16), [[C1]](s16), [[C1]](s16), [[C1]](s16)
|
||||
; CHECK-NEXT: [[BUILD_VECTOR2:%[0-9]+]]:_(<8 x s16>) = G_BUILD_VECTOR [[C2]](s16), [[C2]](s16), [[C2]](s16), [[C2]](s16), [[C2]](s16), [[C2]](s16), [[C2]](s16), [[C2]](s16)
|
||||
; CHECK-NEXT: [[UMULH:%[0-9]+]]:_(<8 x s16>) = G_UMULH [[COPY]], [[BUILD_VECTOR]]
|
||||
; CHECK-NEXT: [[SUB:%[0-9]+]]:_(<8 x s16>) = G_SUB [[COPY]], [[UMULH]]
|
||||
; CHECK-NEXT: [[UMULH1:%[0-9]+]]:_(<8 x s16>) = G_UMULH [[SUB]], [[BUILD_VECTOR1]]
|
||||
; CHECK-NEXT: [[ADD:%[0-9]+]]:_(<8 x s16>) = G_ADD [[UMULH1]], [[UMULH]]
|
||||
; CHECK-NEXT: [[LSHR:%[0-9]+]]:_(<8 x s16>) = G_LSHR [[ADD]], [[BUILD_VECTOR2]](<8 x s16>)
|
||||
; CHECK-NEXT: $q0 = COPY [[LSHR]](<8 x s16>)
|
||||
; CHECK-NEXT: RET_ReallyLR implicit $q0
|
||||
%0:_(<8 x s16>) = COPY $q0
|
||||
%2:_(s16) = G_CONSTANT i16 23
|
||||
%1:_(<8 x s16>) = G_BUILD_VECTOR %2(s16), %2(s16), %2(s16), %2(s16), %2(s16), %2(s16), %2(s16), %2(s16)
|
||||
%3:_(<8 x s16>) = G_UDIV %0, %1
|
||||
$q0 = COPY %3(<8 x s16>)
|
||||
RET_ReallyLR implicit $q0
|
||||
|
||||
...
|
||||
---
|
||||
name: combine_vec_udiv_nonuniform
|
||||
alignment: 4
|
||||
tracksRegLiveness: true
|
||||
liveins:
|
||||
- { reg: '$q0' }
|
||||
body: |
|
||||
bb.1:
|
||||
liveins: $q0
|
||||
|
||||
; CHECK-LABEL: name: combine_vec_udiv_nonuniform
|
||||
; CHECK: liveins: $q0
|
||||
; CHECK-NEXT: {{ $}}
|
||||
; CHECK-NEXT: [[COPY:%[0-9]+]]:_(<8 x s16>) = COPY $q0
|
||||
; CHECK-NEXT: [[C:%[0-9]+]]:_(s16) = G_CONSTANT i16 23
|
||||
; CHECK-NEXT: [[C1:%[0-9]+]]:_(s16) = G_CONSTANT i16 34
|
||||
; CHECK-NEXT: [[C2:%[0-9]+]]:_(s16) = G_CONSTANT i16 -23
|
||||
; CHECK-NEXT: [[C3:%[0-9]+]]:_(s16) = G_CONSTANT i16 56
|
||||
; CHECK-NEXT: [[C4:%[0-9]+]]:_(s16) = G_CONSTANT i16 128
|
||||
; CHECK-NEXT: [[C5:%[0-9]+]]:_(s16) = G_CONSTANT i16 -1
|
||||
; CHECK-NEXT: [[C6:%[0-9]+]]:_(s16) = G_CONSTANT i16 -256
|
||||
; CHECK-NEXT: [[C7:%[0-9]+]]:_(s16) = G_CONSTANT i16 -32768
|
||||
; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s16>) = G_BUILD_VECTOR [[C]](s16), [[C1]](s16), [[C2]](s16), [[C3]](s16), [[C4]](s16), [[C5]](s16), [[C6]](s16), [[C7]](s16)
|
||||
; CHECK-NEXT: [[C8:%[0-9]+]]:_(s16) = G_CONSTANT i16 0
|
||||
; CHECK-NEXT: [[C9:%[0-9]+]]:_(s16) = G_CONSTANT i16 25645
|
||||
; CHECK-NEXT: [[C10:%[0-9]+]]:_(s16) = G_CONSTANT i16 4
|
||||
; CHECK-NEXT: [[C11:%[0-9]+]]:_(s16) = G_CONSTANT i16 -3855
|
||||
; CHECK-NEXT: [[C12:%[0-9]+]]:_(s16) = G_CONSTANT i16 5
|
||||
; CHECK-NEXT: [[C13:%[0-9]+]]:_(s16) = G_CONSTANT i16 8195
|
||||
; CHECK-NEXT: [[C14:%[0-9]+]]:_(s16) = G_CONSTANT i16 13
|
||||
; CHECK-NEXT: [[C15:%[0-9]+]]:_(s16) = G_CONSTANT i16 3
|
||||
; CHECK-NEXT: [[C16:%[0-9]+]]:_(s16) = G_CONSTANT i16 9363
|
||||
; CHECK-NEXT: [[C17:%[0-9]+]]:_(s16) = G_CONSTANT i16 512
|
||||
; CHECK-NEXT: [[C18:%[0-9]+]]:_(s16) = G_CONSTANT i16 -32767
|
||||
; CHECK-NEXT: [[C19:%[0-9]+]]:_(s16) = G_CONSTANT i16 15
|
||||
; CHECK-NEXT: [[C20:%[0-9]+]]:_(s16) = G_CONSTANT i16 -32639
|
||||
; CHECK-NEXT: [[C21:%[0-9]+]]:_(s16) = G_CONSTANT i16 2
|
||||
; CHECK-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<8 x s16>) = G_BUILD_VECTOR [[C8]](s16), [[C8]](s16), [[C8]](s16), [[C15]](s16), [[C8]](s16), [[C8]](s16), [[C8]](s16), [[C8]](s16)
|
||||
; CHECK-NEXT: [[BUILD_VECTOR2:%[0-9]+]]:_(<8 x s16>) = G_BUILD_VECTOR [[C9]](s16), [[C11]](s16), [[C13]](s16), [[C16]](s16), [[C17]](s16), [[C18]](s16), [[C20]](s16), [[C21]](s16)
|
||||
; CHECK-NEXT: [[BUILD_VECTOR3:%[0-9]+]]:_(<8 x s16>) = G_BUILD_VECTOR [[C7]](s16), [[C8]](s16), [[C8]](s16), [[C8]](s16), [[C8]](s16), [[C8]](s16), [[C8]](s16), [[C8]](s16)
|
||||
; CHECK-NEXT: [[BUILD_VECTOR4:%[0-9]+]]:_(<8 x s16>) = G_BUILD_VECTOR [[C10]](s16), [[C12]](s16), [[C14]](s16), [[C8]](s16), [[C8]](s16), [[C19]](s16), [[C19]](s16), [[C8]](s16)
|
||||
; CHECK-NEXT: [[LSHR:%[0-9]+]]:_(<8 x s16>) = G_LSHR [[COPY]], [[BUILD_VECTOR1]](<8 x s16>)
|
||||
; CHECK-NEXT: [[UMULH:%[0-9]+]]:_(<8 x s16>) = G_UMULH [[LSHR]], [[BUILD_VECTOR2]]
|
||||
; CHECK-NEXT: [[SUB:%[0-9]+]]:_(<8 x s16>) = G_SUB [[COPY]], [[UMULH]]
|
||||
; CHECK-NEXT: [[UMULH1:%[0-9]+]]:_(<8 x s16>) = G_UMULH [[SUB]], [[BUILD_VECTOR3]]
|
||||
; CHECK-NEXT: [[ADD:%[0-9]+]]:_(<8 x s16>) = G_ADD [[UMULH1]], [[UMULH]]
|
||||
; CHECK-NEXT: [[LSHR1:%[0-9]+]]:_(<8 x s16>) = G_LSHR [[ADD]], [[BUILD_VECTOR4]](<8 x s16>)
|
||||
; CHECK-NEXT: [[C22:%[0-9]+]]:_(s16) = G_CONSTANT i16 1
|
||||
; CHECK-NEXT: [[BUILD_VECTOR5:%[0-9]+]]:_(<8 x s16>) = G_BUILD_VECTOR [[C22]](s16), [[C22]](s16), [[C22]](s16), [[C22]](s16), [[C22]](s16), [[C22]](s16), [[C22]](s16), [[C22]](s16)
|
||||
; CHECK-NEXT: [[ICMP:%[0-9]+]]:_(<8 x s1>) = G_ICMP intpred(eq), [[BUILD_VECTOR]](<8 x s16>), [[BUILD_VECTOR5]]
|
||||
; CHECK-NEXT: [[SELECT:%[0-9]+]]:_(<8 x s16>) = G_SELECT [[ICMP]](<8 x s1>), [[COPY]], [[LSHR1]]
|
||||
; CHECK-NEXT: $q0 = COPY [[SELECT]](<8 x s16>)
|
||||
; CHECK-NEXT: RET_ReallyLR implicit $q0
|
||||
%0:_(<8 x s16>) = COPY $q0
|
||||
%2:_(s16) = G_CONSTANT i16 23
|
||||
%3:_(s16) = G_CONSTANT i16 34
|
||||
%4:_(s16) = G_CONSTANT i16 -23
|
||||
%5:_(s16) = G_CONSTANT i16 56
|
||||
%6:_(s16) = G_CONSTANT i16 128
|
||||
%7:_(s16) = G_CONSTANT i16 -1
|
||||
%8:_(s16) = G_CONSTANT i16 -256
|
||||
%9:_(s16) = G_CONSTANT i16 -32768
|
||||
%1:_(<8 x s16>) = G_BUILD_VECTOR %2(s16), %3(s16), %4(s16), %5(s16), %6(s16), %7(s16), %8(s16), %9(s16)
|
||||
%10:_(<8 x s16>) = G_UDIV %0, %1
|
||||
$q0 = COPY %10(<8 x s16>)
|
||||
RET_ReallyLR implicit $q0
|
||||
|
||||
...
|
||||
---
|
||||
name: combine_vec_udiv_nonuniform2
|
||||
alignment: 4
|
||||
tracksRegLiveness: true
|
||||
liveins:
|
||||
- { reg: '$q0' }
|
||||
body: |
|
||||
bb.1:
|
||||
liveins: $q0
|
||||
|
||||
; CHECK-LABEL: name: combine_vec_udiv_nonuniform2
|
||||
; CHECK: liveins: $q0
|
||||
; CHECK-NEXT: {{ $}}
|
||||
; CHECK-NEXT: [[COPY:%[0-9]+]]:_(<8 x s16>) = COPY $q0
|
||||
; CHECK-NEXT: [[C:%[0-9]+]]:_(s16) = G_CONSTANT i16 -34
|
||||
; CHECK-NEXT: [[C1:%[0-9]+]]:_(s16) = G_CONSTANT i16 35
|
||||
; CHECK-NEXT: [[C2:%[0-9]+]]:_(s16) = G_CONSTANT i16 36
|
||||
; CHECK-NEXT: [[C3:%[0-9]+]]:_(s16) = G_CONSTANT i16 -37
|
||||
; CHECK-NEXT: [[C4:%[0-9]+]]:_(s16) = G_CONSTANT i16 38
|
||||
; CHECK-NEXT: [[C5:%[0-9]+]]:_(s16) = G_CONSTANT i16 -39
|
||||
; CHECK-NEXT: [[C6:%[0-9]+]]:_(s16) = G_CONSTANT i16 40
|
||||
; CHECK-NEXT: [[C7:%[0-9]+]]:_(s16) = G_CONSTANT i16 -41
|
||||
; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s16>) = G_BUILD_VECTOR [[C]](s16), [[C1]](s16), [[C2]](s16), [[C3]](s16), [[C4]](s16), [[C5]](s16), [[C6]](s16), [[C7]](s16)
|
||||
; CHECK-NEXT: [[C8:%[0-9]+]]:_(s16) = G_CONSTANT i16 1
|
||||
; CHECK-NEXT: [[C9:%[0-9]+]]:_(s16) = G_CONSTANT i16 16393
|
||||
; CHECK-NEXT: [[C10:%[0-9]+]]:_(s16) = G_CONSTANT i16 0
|
||||
; CHECK-NEXT: [[C11:%[0-9]+]]:_(s16) = G_CONSTANT i16 13
|
||||
; CHECK-NEXT: [[C12:%[0-9]+]]:_(s16) = G_CONSTANT i16 -5617
|
||||
; CHECK-NEXT: [[C13:%[0-9]+]]:_(s16) = G_CONSTANT i16 5
|
||||
; CHECK-NEXT: [[C14:%[0-9]+]]:_(s16) = G_CONSTANT i16 -7281
|
||||
; CHECK-NEXT: [[C15:%[0-9]+]]:_(s16) = G_CONSTANT i16 -32749
|
||||
; CHECK-NEXT: [[C16:%[0-9]+]]:_(s16) = G_CONSTANT i16 15
|
||||
; CHECK-NEXT: [[C17:%[0-9]+]]:_(s16) = G_CONSTANT i16 -10347
|
||||
; CHECK-NEXT: [[C18:%[0-9]+]]:_(s16) = G_CONSTANT i16 8197
|
||||
; CHECK-NEXT: [[C19:%[0-9]+]]:_(s16) = G_CONSTANT i16 -13107
|
||||
; CHECK-NEXT: [[C20:%[0-9]+]]:_(s16) = G_CONSTANT i16 -32747
|
||||
; CHECK-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<8 x s16>) = G_BUILD_VECTOR [[C8]](s16), [[C10]](s16), [[C10]](s16), [[C10]](s16), [[C10]](s16), [[C10]](s16), [[C10]](s16), [[C10]](s16)
|
||||
; CHECK-NEXT: [[BUILD_VECTOR2:%[0-9]+]]:_(<8 x s16>) = G_BUILD_VECTOR [[C9]](s16), [[C12]](s16), [[C14]](s16), [[C15]](s16), [[C17]](s16), [[C18]](s16), [[C19]](s16), [[C20]](s16)
|
||||
; CHECK-NEXT: [[BUILD_VECTOR3:%[0-9]+]]:_(<8 x s16>) = G_BUILD_VECTOR [[C11]](s16), [[C13]](s16), [[C13]](s16), [[C16]](s16), [[C13]](s16), [[C11]](s16), [[C13]](s16), [[C16]](s16)
|
||||
; CHECK-NEXT: [[LSHR:%[0-9]+]]:_(<8 x s16>) = G_LSHR [[COPY]], [[BUILD_VECTOR1]](<8 x s16>)
|
||||
; CHECK-NEXT: [[UMULH:%[0-9]+]]:_(<8 x s16>) = G_UMULH [[LSHR]], [[BUILD_VECTOR2]]
|
||||
; CHECK-NEXT: [[LSHR1:%[0-9]+]]:_(<8 x s16>) = G_LSHR [[UMULH]], [[BUILD_VECTOR3]](<8 x s16>)
|
||||
; CHECK-NEXT: [[BUILD_VECTOR4:%[0-9]+]]:_(<8 x s16>) = G_BUILD_VECTOR [[C8]](s16), [[C8]](s16), [[C8]](s16), [[C8]](s16), [[C8]](s16), [[C8]](s16), [[C8]](s16), [[C8]](s16)
|
||||
; CHECK-NEXT: [[ICMP:%[0-9]+]]:_(<8 x s1>) = G_ICMP intpred(eq), [[BUILD_VECTOR]](<8 x s16>), [[BUILD_VECTOR4]]
|
||||
; CHECK-NEXT: [[SELECT:%[0-9]+]]:_(<8 x s16>) = G_SELECT [[ICMP]](<8 x s1>), [[COPY]], [[LSHR1]]
|
||||
; CHECK-NEXT: $q0 = COPY [[SELECT]](<8 x s16>)
|
||||
; CHECK-NEXT: RET_ReallyLR implicit $q0
|
||||
%0:_(<8 x s16>) = COPY $q0
|
||||
%2:_(s16) = G_CONSTANT i16 -34
|
||||
%3:_(s16) = G_CONSTANT i16 35
|
||||
%4:_(s16) = G_CONSTANT i16 36
|
||||
%5:_(s16) = G_CONSTANT i16 -37
|
||||
%6:_(s16) = G_CONSTANT i16 38
|
||||
%7:_(s16) = G_CONSTANT i16 -39
|
||||
%8:_(s16) = G_CONSTANT i16 40
|
||||
%9:_(s16) = G_CONSTANT i16 -41
|
||||
%1:_(<8 x s16>) = G_BUILD_VECTOR %2(s16), %3(s16), %4(s16), %5(s16), %6(s16), %7(s16), %8(s16), %9(s16)
|
||||
%10:_(<8 x s16>) = G_UDIV %0, %1
|
||||
$q0 = COPY %10(<8 x s16>)
|
||||
RET_ReallyLR implicit $q0
|
||||
|
||||
...
|
||||
---
|
||||
name: combine_vec_udiv_nonuniform3
|
||||
alignment: 4
|
||||
tracksRegLiveness: true
|
||||
liveins:
|
||||
- { reg: '$q0' }
|
||||
body: |
|
||||
bb.1:
|
||||
liveins: $q0
|
||||
|
||||
; CHECK-LABEL: name: combine_vec_udiv_nonuniform3
|
||||
; CHECK: liveins: $q0
|
||||
; CHECK-NEXT: {{ $}}
|
||||
; CHECK-NEXT: [[COPY:%[0-9]+]]:_(<8 x s16>) = COPY $q0
|
||||
; CHECK-NEXT: [[C:%[0-9]+]]:_(s16) = G_CONSTANT i16 7
|
||||
; CHECK-NEXT: [[C1:%[0-9]+]]:_(s16) = G_CONSTANT i16 23
|
||||
; CHECK-NEXT: [[C2:%[0-9]+]]:_(s16) = G_CONSTANT i16 25
|
||||
; CHECK-NEXT: [[C3:%[0-9]+]]:_(s16) = G_CONSTANT i16 27
|
||||
; CHECK-NEXT: [[C4:%[0-9]+]]:_(s16) = G_CONSTANT i16 31
|
||||
; CHECK-NEXT: [[C5:%[0-9]+]]:_(s16) = G_CONSTANT i16 47
|
||||
; CHECK-NEXT: [[C6:%[0-9]+]]:_(s16) = G_CONSTANT i16 63
|
||||
; CHECK-NEXT: [[C7:%[0-9]+]]:_(s16) = G_CONSTANT i16 127
|
||||
; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s16>) = G_BUILD_VECTOR [[C]](s16), [[C1]](s16), [[C2]](s16), [[C3]](s16), [[C4]](s16), [[C5]](s16), [[C6]](s16), [[C7]](s16)
|
||||
; CHECK-NEXT: [[C8:%[0-9]+]]:_(s16) = G_CONSTANT i16 9363
|
||||
; CHECK-NEXT: [[C9:%[0-9]+]]:_(s16) = G_CONSTANT i16 -32768
|
||||
; CHECK-NEXT: [[C10:%[0-9]+]]:_(s16) = G_CONSTANT i16 2
|
||||
; CHECK-NEXT: [[C11:%[0-9]+]]:_(s16) = G_CONSTANT i16 25645
|
||||
; CHECK-NEXT: [[C12:%[0-9]+]]:_(s16) = G_CONSTANT i16 4
|
||||
; CHECK-NEXT: [[C13:%[0-9]+]]:_(s16) = G_CONSTANT i16 18351
|
||||
; CHECK-NEXT: [[C14:%[0-9]+]]:_(s16) = G_CONSTANT i16 12137
|
||||
; CHECK-NEXT: [[C15:%[0-9]+]]:_(s16) = G_CONSTANT i16 2115
|
||||
; CHECK-NEXT: [[C16:%[0-9]+]]:_(s16) = G_CONSTANT i16 23705
|
||||
; CHECK-NEXT: [[C17:%[0-9]+]]:_(s16) = G_CONSTANT i16 5
|
||||
; CHECK-NEXT: [[C18:%[0-9]+]]:_(s16) = G_CONSTANT i16 1041
|
||||
; CHECK-NEXT: [[C19:%[0-9]+]]:_(s16) = G_CONSTANT i16 517
|
||||
; CHECK-NEXT: [[C20:%[0-9]+]]:_(s16) = G_CONSTANT i16 6
|
||||
; CHECK-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<8 x s16>) = G_BUILD_VECTOR [[C8]](s16), [[C11]](s16), [[C13]](s16), [[C14]](s16), [[C15]](s16), [[C16]](s16), [[C18]](s16), [[C19]](s16)
|
||||
; CHECK-NEXT: [[BUILD_VECTOR2:%[0-9]+]]:_(<8 x s16>) = G_BUILD_VECTOR [[C9]](s16), [[C9]](s16), [[C9]](s16), [[C9]](s16), [[C9]](s16), [[C9]](s16), [[C9]](s16), [[C9]](s16)
|
||||
; CHECK-NEXT: [[BUILD_VECTOR3:%[0-9]+]]:_(<8 x s16>) = G_BUILD_VECTOR [[C10]](s16), [[C12]](s16), [[C12]](s16), [[C12]](s16), [[C12]](s16), [[C17]](s16), [[C17]](s16), [[C20]](s16)
|
||||
; CHECK-NEXT: [[UMULH:%[0-9]+]]:_(<8 x s16>) = G_UMULH [[COPY]], [[BUILD_VECTOR1]]
|
||||
; CHECK-NEXT: [[SUB:%[0-9]+]]:_(<8 x s16>) = G_SUB [[COPY]], [[UMULH]]
|
||||
; CHECK-NEXT: [[UMULH1:%[0-9]+]]:_(<8 x s16>) = G_UMULH [[SUB]], [[BUILD_VECTOR2]]
|
||||
; CHECK-NEXT: [[ADD:%[0-9]+]]:_(<8 x s16>) = G_ADD [[UMULH1]], [[UMULH]]
|
||||
; CHECK-NEXT: [[LSHR:%[0-9]+]]:_(<8 x s16>) = G_LSHR [[ADD]], [[BUILD_VECTOR3]](<8 x s16>)
|
||||
; CHECK-NEXT: [[C21:%[0-9]+]]:_(s16) = G_CONSTANT i16 1
|
||||
; CHECK-NEXT: [[BUILD_VECTOR4:%[0-9]+]]:_(<8 x s16>) = G_BUILD_VECTOR [[C21]](s16), [[C21]](s16), [[C21]](s16), [[C21]](s16), [[C21]](s16), [[C21]](s16), [[C21]](s16), [[C21]](s16)
|
||||
; CHECK-NEXT: [[ICMP:%[0-9]+]]:_(<8 x s1>) = G_ICMP intpred(eq), [[BUILD_VECTOR]](<8 x s16>), [[BUILD_VECTOR4]]
|
||||
; CHECK-NEXT: [[SELECT:%[0-9]+]]:_(<8 x s16>) = G_SELECT [[ICMP]](<8 x s1>), [[COPY]], [[LSHR]]
|
||||
; CHECK-NEXT: $q0 = COPY [[SELECT]](<8 x s16>)
|
||||
; CHECK-NEXT: RET_ReallyLR implicit $q0
|
||||
%0:_(<8 x s16>) = COPY $q0
|
||||
%2:_(s16) = G_CONSTANT i16 7
|
||||
%3:_(s16) = G_CONSTANT i16 23
|
||||
%4:_(s16) = G_CONSTANT i16 25
|
||||
%5:_(s16) = G_CONSTANT i16 27
|
||||
%6:_(s16) = G_CONSTANT i16 31
|
||||
%7:_(s16) = G_CONSTANT i16 47
|
||||
%8:_(s16) = G_CONSTANT i16 63
|
||||
%9:_(s16) = G_CONSTANT i16 127
|
||||
%1:_(<8 x s16>) = G_BUILD_VECTOR %2(s16), %3(s16), %4(s16), %5(s16), %6(s16), %7(s16), %8(s16), %9(s16)
|
||||
%10:_(<8 x s16>) = G_UDIV %0, %1
|
||||
$q0 = COPY %10(<8 x s16>)
|
||||
RET_ReallyLR implicit $q0
|
||||
|
||||
...
|
||||
---
|
||||
name: combine_vec_udiv_nonuniform4
|
||||
alignment: 4
|
||||
tracksRegLiveness: true
|
||||
liveins:
|
||||
- { reg: '$q0' }
|
||||
body: |
|
||||
bb.1:
|
||||
liveins: $q0
|
||||
|
||||
; CHECK-LABEL: name: combine_vec_udiv_nonuniform4
|
||||
; CHECK: liveins: $q0
|
||||
; CHECK-NEXT: {{ $}}
|
||||
; CHECK-NEXT: [[COPY:%[0-9]+]]:_(<16 x s8>) = COPY $q0
|
||||
; CHECK-NEXT: [[C:%[0-9]+]]:_(s8) = G_CONSTANT i8 -64
|
||||
; CHECK-NEXT: [[C1:%[0-9]+]]:_(s8) = G_CONSTANT i8 1
|
||||
; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<16 x s8>) = G_BUILD_VECTOR [[C]](s8), [[C1]](s8), [[C1]](s8), [[C1]](s8), [[C1]](s8), [[C1]](s8), [[C1]](s8), [[C1]](s8), [[C1]](s8), [[C1]](s8), [[C1]](s8), [[C1]](s8), [[C1]](s8), [[C1]](s8), [[C1]](s8), [[C1]](s8)
|
||||
; CHECK-NEXT: [[C2:%[0-9]+]]:_(s8) = G_CONSTANT i8 0
|
||||
; CHECK-NEXT: [[C3:%[0-9]+]]:_(s8) = G_CONSTANT i8 -85
|
||||
; CHECK-NEXT: [[C4:%[0-9]+]]:_(s8) = G_CONSTANT i8 7
|
||||
; CHECK-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<16 x s8>) = G_BUILD_VECTOR [[C3]](s8), [[C2]](s8), [[C2]](s8), [[C2]](s8), [[C2]](s8), [[C2]](s8), [[C2]](s8), [[C2]](s8), [[C2]](s8), [[C2]](s8), [[C2]](s8), [[C2]](s8), [[C2]](s8), [[C2]](s8), [[C2]](s8), [[C2]](s8)
|
||||
; CHECK-NEXT: [[BUILD_VECTOR2:%[0-9]+]]:_(<16 x s8>) = G_BUILD_VECTOR [[C4]](s8), [[C2]](s8), [[C2]](s8), [[C2]](s8), [[C2]](s8), [[C2]](s8), [[C2]](s8), [[C2]](s8), [[C2]](s8), [[C2]](s8), [[C2]](s8), [[C2]](s8), [[C2]](s8), [[C2]](s8), [[C2]](s8), [[C2]](s8)
|
||||
; CHECK-NEXT: [[UMULH:%[0-9]+]]:_(<16 x s8>) = G_UMULH [[COPY]], [[BUILD_VECTOR1]]
|
||||
; CHECK-NEXT: [[LSHR:%[0-9]+]]:_(<16 x s8>) = G_LSHR [[UMULH]], [[BUILD_VECTOR2]](<16 x s8>)
|
||||
; CHECK-NEXT: [[BUILD_VECTOR3:%[0-9]+]]:_(<16 x s8>) = G_BUILD_VECTOR [[C1]](s8), [[C1]](s8), [[C1]](s8), [[C1]](s8), [[C1]](s8), [[C1]](s8), [[C1]](s8), [[C1]](s8), [[C1]](s8), [[C1]](s8), [[C1]](s8), [[C1]](s8), [[C1]](s8), [[C1]](s8), [[C1]](s8), [[C1]](s8)
|
||||
; CHECK-NEXT: [[ICMP:%[0-9]+]]:_(<16 x s1>) = G_ICMP intpred(eq), [[BUILD_VECTOR]](<16 x s8>), [[BUILD_VECTOR3]]
|
||||
; CHECK-NEXT: [[SELECT:%[0-9]+]]:_(<16 x s8>) = G_SELECT [[ICMP]](<16 x s1>), [[COPY]], [[LSHR]]
|
||||
; CHECK-NEXT: $q0 = COPY [[SELECT]](<16 x s8>)
|
||||
; CHECK-NEXT: RET_ReallyLR implicit $q0
|
||||
%0:_(<16 x s8>) = COPY $q0
|
||||
%2:_(s8) = G_CONSTANT i8 -64
|
||||
%3:_(s8) = G_CONSTANT i8 1
|
||||
%1:_(<16 x s8>) = G_BUILD_VECTOR %2(s8), %3(s8), %3(s8), %3(s8), %3(s8), %3(s8), %3(s8), %3(s8), %3(s8), %3(s8), %3(s8), %3(s8), %3(s8), %3(s8), %3(s8), %3(s8)
|
||||
%4:_(<16 x s8>) = G_UDIV %0, %1
|
||||
$q0 = COPY %4(<16 x s8>)
|
||||
RET_ReallyLR implicit $q0
|
||||
|
||||
...
|
||||
---
|
||||
name: pr38477
|
||||
alignment: 4
|
||||
tracksRegLiveness: true
|
||||
liveins:
|
||||
- { reg: '$q0' }
|
||||
body: |
|
||||
bb.1:
|
||||
liveins: $q0
|
||||
|
||||
; CHECK-LABEL: name: pr38477
|
||||
; CHECK: liveins: $q0
|
||||
; CHECK-NEXT: {{ $}}
|
||||
; CHECK-NEXT: [[COPY:%[0-9]+]]:_(<8 x s16>) = COPY $q0
|
||||
; CHECK-NEXT: [[C:%[0-9]+]]:_(s16) = G_CONSTANT i16 1
|
||||
; CHECK-NEXT: [[C1:%[0-9]+]]:_(s16) = G_CONSTANT i16 119
|
||||
; CHECK-NEXT: [[C2:%[0-9]+]]:_(s16) = G_CONSTANT i16 73
|
||||
; CHECK-NEXT: [[C3:%[0-9]+]]:_(s16) = G_CONSTANT i16 -111
|
||||
; CHECK-NEXT: [[C4:%[0-9]+]]:_(s16) = G_CONSTANT i16 -3
|
||||
; CHECK-NEXT: [[C5:%[0-9]+]]:_(s16) = G_CONSTANT i16 118
|
||||
; CHECK-NEXT: [[C6:%[0-9]+]]:_(s16) = G_CONSTANT i16 32
|
||||
; CHECK-NEXT: [[C7:%[0-9]+]]:_(s16) = G_CONSTANT i16 31
|
||||
; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s16>) = G_BUILD_VECTOR [[C]](s16), [[C1]](s16), [[C2]](s16), [[C3]](s16), [[C4]](s16), [[C5]](s16), [[C6]](s16), [[C7]](s16)
|
||||
; CHECK-NEXT: [[C8:%[0-9]+]]:_(s16) = G_CONSTANT i16 0
|
||||
; CHECK-NEXT: [[C9:%[0-9]+]]:_(s16) = G_CONSTANT i16 4957
|
||||
; CHECK-NEXT: [[C10:%[0-9]+]]:_(s16) = G_CONSTANT i16 -32768
|
||||
; CHECK-NEXT: [[C11:%[0-9]+]]:_(s16) = G_CONSTANT i16 6
|
||||
; CHECK-NEXT: [[C12:%[0-9]+]]:_(s16) = G_CONSTANT i16 -8079
|
||||
; CHECK-NEXT: [[C13:%[0-9]+]]:_(s16) = G_CONSTANT i16 4103
|
||||
; CHECK-NEXT: [[C14:%[0-9]+]]:_(s16) = G_CONSTANT i16 12
|
||||
; CHECK-NEXT: [[C15:%[0-9]+]]:_(s16) = G_CONSTANT i16 16385
|
||||
; CHECK-NEXT: [[C16:%[0-9]+]]:_(s16) = G_CONSTANT i16 14
|
||||
; CHECK-NEXT: [[C17:%[0-9]+]]:_(s16) = G_CONSTANT i16 -29991
|
||||
; CHECK-NEXT: [[C18:%[0-9]+]]:_(s16) = G_CONSTANT i16 2048
|
||||
; CHECK-NEXT: [[C19:%[0-9]+]]:_(s16) = G_CONSTANT i16 2115
|
||||
; CHECK-NEXT: [[C20:%[0-9]+]]:_(s16) = G_CONSTANT i16 4
|
||||
; CHECK-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<8 x s16>) = G_BUILD_VECTOR [[C8]](s16), [[C9]](s16), [[C12]](s16), [[C13]](s16), [[C15]](s16), [[C17]](s16), [[C18]](s16), [[C19]](s16)
|
||||
; CHECK-NEXT: [[BUILD_VECTOR2:%[0-9]+]]:_(<8 x s16>) = G_BUILD_VECTOR [[C8]](s16), [[C10]](s16), [[C8]](s16), [[C8]](s16), [[C8]](s16), [[C8]](s16), [[C8]](s16), [[C10]](s16)
|
||||
; CHECK-NEXT: [[BUILD_VECTOR3:%[0-9]+]]:_(<8 x s16>) = G_BUILD_VECTOR [[C8]](s16), [[C11]](s16), [[C11]](s16), [[C14]](s16), [[C16]](s16), [[C11]](s16), [[C8]](s16), [[C20]](s16)
|
||||
; CHECK-NEXT: [[UMULH:%[0-9]+]]:_(<8 x s16>) = G_UMULH [[COPY]], [[BUILD_VECTOR1]]
|
||||
; CHECK-NEXT: [[SUB:%[0-9]+]]:_(<8 x s16>) = G_SUB [[COPY]], [[UMULH]]
|
||||
; CHECK-NEXT: [[UMULH1:%[0-9]+]]:_(<8 x s16>) = G_UMULH [[SUB]], [[BUILD_VECTOR2]]
|
||||
; CHECK-NEXT: [[ADD:%[0-9]+]]:_(<8 x s16>) = G_ADD [[UMULH1]], [[UMULH]]
|
||||
; CHECK-NEXT: [[LSHR:%[0-9]+]]:_(<8 x s16>) = G_LSHR [[ADD]], [[BUILD_VECTOR3]](<8 x s16>)
|
||||
; CHECK-NEXT: [[BUILD_VECTOR4:%[0-9]+]]:_(<8 x s16>) = G_BUILD_VECTOR [[C]](s16), [[C]](s16), [[C]](s16), [[C]](s16), [[C]](s16), [[C]](s16), [[C]](s16), [[C]](s16)
|
||||
; CHECK-NEXT: [[ICMP:%[0-9]+]]:_(<8 x s1>) = G_ICMP intpred(eq), [[BUILD_VECTOR]](<8 x s16>), [[BUILD_VECTOR4]]
|
||||
; CHECK-NEXT: [[SELECT:%[0-9]+]]:_(<8 x s16>) = G_SELECT [[ICMP]](<8 x s1>), [[COPY]], [[LSHR]]
|
||||
; CHECK-NEXT: $q0 = COPY [[SELECT]](<8 x s16>)
|
||||
; CHECK-NEXT: RET_ReallyLR implicit $q0
|
||||
%0:_(<8 x s16>) = COPY $q0
|
||||
%2:_(s16) = G_CONSTANT i16 1
|
||||
%3:_(s16) = G_CONSTANT i16 119
|
||||
%4:_(s16) = G_CONSTANT i16 73
|
||||
%5:_(s16) = G_CONSTANT i16 -111
|
||||
%6:_(s16) = G_CONSTANT i16 -3
|
||||
%7:_(s16) = G_CONSTANT i16 118
|
||||
%8:_(s16) = G_CONSTANT i16 32
|
||||
%9:_(s16) = G_CONSTANT i16 31
|
||||
%1:_(<8 x s16>) = G_BUILD_VECTOR %2(s16), %3(s16), %4(s16), %5(s16), %6(s16), %7(s16), %8(s16), %9(s16)
|
||||
%10:_(<8 x s16>) = G_UDIV %0, %1
|
||||
$q0 = COPY %10(<8 x s16>)
|
||||
RET_ReallyLR implicit $q0
|
||||
|
||||
...
|
|
@ -222,117 +222,21 @@ define i32 @v_udiv_i32_pow2k_denom(i32 %num) {
|
|||
; CHECK-LABEL: v_udiv_i32_pow2k_denom:
|
||||
; CHECK: ; %bb.0:
|
||||
; CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
||||
; CHECK-NEXT: s_movk_i32 s6, 0x1000
|
||||
; CHECK-NEXT: v_rcp_iflag_f32_e32 v1, 0x45800000
|
||||
; CHECK-NEXT: v_mov_b32_e32 v2, 0xfffff000
|
||||
; CHECK-NEXT: v_mul_f32_e32 v1, 0x4f7ffffe, v1
|
||||
; CHECK-NEXT: v_cvt_u32_f32_e32 v1, v1
|
||||
; CHECK-NEXT: v_mul_lo_u32 v2, v2, v1
|
||||
; CHECK-NEXT: v_mul_hi_u32 v2, v1, v2
|
||||
; CHECK-NEXT: v_add_i32_e32 v1, vcc, v1, v2
|
||||
; CHECK-NEXT: v_mul_hi_u32 v1, v0, v1
|
||||
; CHECK-NEXT: v_lshlrev_b32_e32 v2, 12, v1
|
||||
; CHECK-NEXT: v_add_i32_e32 v3, vcc, 1, v1
|
||||
; CHECK-NEXT: v_sub_i32_e32 v0, vcc, v0, v2
|
||||
; CHECK-NEXT: v_cmp_le_u32_e32 vcc, s6, v0
|
||||
; CHECK-NEXT: v_cndmask_b32_e32 v1, v1, v3, vcc
|
||||
; CHECK-NEXT: v_subrev_i32_e64 v2, s[4:5], s6, v0
|
||||
; CHECK-NEXT: v_cndmask_b32_e32 v0, v0, v2, vcc
|
||||
; CHECK-NEXT: v_add_i32_e32 v2, vcc, 1, v1
|
||||
; CHECK-NEXT: v_cmp_le_u32_e32 vcc, s6, v0
|
||||
; CHECK-NEXT: v_cndmask_b32_e32 v0, v1, v2, vcc
|
||||
; CHECK-NEXT: v_mov_b32_e32 v1, 0x100000
|
||||
; CHECK-NEXT: v_mul_hi_u32 v0, v0, v1
|
||||
; CHECK-NEXT: s_setpc_b64 s[30:31]
|
||||
%result = udiv i32 %num, 4096
|
||||
ret i32 %result
|
||||
}
|
||||
|
||||
define <2 x i32> @v_udiv_v2i32_pow2k_denom(<2 x i32> %num) {
|
||||
; GISEL-LABEL: v_udiv_v2i32_pow2k_denom:
|
||||
; GISEL: ; %bb.0:
|
||||
; GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
||||
; GISEL-NEXT: s_movk_i32 s4, 0x1000
|
||||
; GISEL-NEXT: v_mov_b32_e32 v2, 0x1000
|
||||
; GISEL-NEXT: v_mov_b32_e32 v3, 0xfffff000
|
||||
; GISEL-NEXT: v_cvt_f32_u32_e32 v4, s4
|
||||
; GISEL-NEXT: v_cvt_f32_u32_e32 v5, v2
|
||||
; GISEL-NEXT: v_rcp_iflag_f32_e32 v4, v4
|
||||
; GISEL-NEXT: v_rcp_iflag_f32_e32 v5, v5
|
||||
; GISEL-NEXT: v_mul_f32_e32 v4, 0x4f7ffffe, v4
|
||||
; GISEL-NEXT: v_mul_f32_e32 v5, 0x4f7ffffe, v5
|
||||
; GISEL-NEXT: v_cvt_u32_f32_e32 v4, v4
|
||||
; GISEL-NEXT: v_cvt_u32_f32_e32 v5, v5
|
||||
; GISEL-NEXT: v_mul_lo_u32 v6, v3, v4
|
||||
; GISEL-NEXT: v_mul_lo_u32 v3, v3, v5
|
||||
; GISEL-NEXT: v_mul_hi_u32 v6, v4, v6
|
||||
; GISEL-NEXT: v_mul_hi_u32 v3, v5, v3
|
||||
; GISEL-NEXT: v_add_i32_e32 v4, vcc, v4, v6
|
||||
; GISEL-NEXT: v_add_i32_e32 v3, vcc, v5, v3
|
||||
; GISEL-NEXT: v_mul_hi_u32 v4, v0, v4
|
||||
; GISEL-NEXT: v_mul_hi_u32 v3, v1, v3
|
||||
; GISEL-NEXT: v_lshlrev_b32_e32 v5, 12, v4
|
||||
; GISEL-NEXT: v_add_i32_e32 v6, vcc, 1, v4
|
||||
; GISEL-NEXT: v_lshlrev_b32_e32 v7, 12, v3
|
||||
; GISEL-NEXT: v_add_i32_e32 v8, vcc, 1, v3
|
||||
; GISEL-NEXT: v_sub_i32_e32 v0, vcc, v0, v5
|
||||
; GISEL-NEXT: v_sub_i32_e32 v1, vcc, v1, v7
|
||||
; GISEL-NEXT: v_cmp_le_u32_e32 vcc, s4, v0
|
||||
; GISEL-NEXT: v_cndmask_b32_e32 v4, v4, v6, vcc
|
||||
; GISEL-NEXT: v_subrev_i32_e64 v5, s[4:5], s4, v0
|
||||
; GISEL-NEXT: v_cmp_ge_u32_e64 s[4:5], v1, v2
|
||||
; GISEL-NEXT: v_cndmask_b32_e64 v3, v3, v8, s[4:5]
|
||||
; GISEL-NEXT: v_sub_i32_e64 v6, s[6:7], v1, v2
|
||||
; GISEL-NEXT: v_cndmask_b32_e32 v0, v0, v5, vcc
|
||||
; GISEL-NEXT: v_add_i32_e32 v5, vcc, 1, v4
|
||||
; GISEL-NEXT: v_cndmask_b32_e64 v1, v1, v6, s[4:5]
|
||||
; GISEL-NEXT: v_add_i32_e32 v6, vcc, 1, v3
|
||||
; GISEL-NEXT: v_cmp_ge_u32_e32 vcc, v0, v2
|
||||
; GISEL-NEXT: v_cndmask_b32_e32 v0, v4, v5, vcc
|
||||
; GISEL-NEXT: v_cmp_ge_u32_e32 vcc, v1, v2
|
||||
; GISEL-NEXT: v_cndmask_b32_e32 v1, v3, v6, vcc
|
||||
; GISEL-NEXT: s_setpc_b64 s[30:31]
|
||||
;
|
||||
; CGP-LABEL: v_udiv_v2i32_pow2k_denom:
|
||||
; CGP: ; %bb.0:
|
||||
; CGP-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
||||
; CGP-NEXT: s_movk_i32 s8, 0x1000
|
||||
; CGP-NEXT: v_rcp_iflag_f32_e32 v2, 0x45800000
|
||||
; CGP-NEXT: s_movk_i32 s4, 0xf000
|
||||
; CGP-NEXT: v_mov_b32_e32 v3, 0xfffff000
|
||||
; CGP-NEXT: v_mov_b32_e32 v4, 0x1000
|
||||
; CGP-NEXT: v_rcp_iflag_f32_e32 v5, 0x45800000
|
||||
; CGP-NEXT: v_mul_f32_e32 v2, 0x4f7ffffe, v2
|
||||
; CGP-NEXT: v_mul_f32_e32 v5, 0x4f7ffffe, v5
|
||||
; CGP-NEXT: v_cvt_u32_f32_e32 v2, v2
|
||||
; CGP-NEXT: v_cvt_u32_f32_e32 v5, v5
|
||||
; CGP-NEXT: v_mul_lo_u32 v6, s4, v2
|
||||
; CGP-NEXT: v_mul_lo_u32 v3, v3, v5
|
||||
; CGP-NEXT: v_mul_hi_u32 v6, v2, v6
|
||||
; CGP-NEXT: v_mul_hi_u32 v3, v5, v3
|
||||
; CGP-NEXT: v_add_i32_e32 v2, vcc, v2, v6
|
||||
; CGP-NEXT: v_add_i32_e32 v3, vcc, v5, v3
|
||||
; CGP-NEXT: v_mul_hi_u32 v2, v0, v2
|
||||
; CGP-NEXT: v_mul_hi_u32 v3, v1, v3
|
||||
; CGP-NEXT: v_lshlrev_b32_e32 v5, 12, v2
|
||||
; CGP-NEXT: v_add_i32_e32 v6, vcc, 1, v2
|
||||
; CGP-NEXT: v_lshlrev_b32_e32 v7, 12, v3
|
||||
; CGP-NEXT: v_add_i32_e32 v8, vcc, 1, v3
|
||||
; CGP-NEXT: v_sub_i32_e32 v0, vcc, v0, v5
|
||||
; CGP-NEXT: v_sub_i32_e32 v1, vcc, v1, v7
|
||||
; CGP-NEXT: v_cmp_le_u32_e32 vcc, s8, v0
|
||||
; CGP-NEXT: v_cndmask_b32_e32 v2, v2, v6, vcc
|
||||
; CGP-NEXT: v_subrev_i32_e64 v5, s[4:5], s8, v0
|
||||
; CGP-NEXT: v_cmp_le_u32_e64 s[4:5], s8, v1
|
||||
; CGP-NEXT: v_cndmask_b32_e64 v3, v3, v8, s[4:5]
|
||||
; CGP-NEXT: v_sub_i32_e64 v6, s[6:7], v1, v4
|
||||
; CGP-NEXT: v_cndmask_b32_e32 v0, v0, v5, vcc
|
||||
; CGP-NEXT: v_add_i32_e32 v5, vcc, 1, v2
|
||||
; CGP-NEXT: v_cndmask_b32_e64 v1, v1, v6, s[4:5]
|
||||
; CGP-NEXT: v_add_i32_e32 v6, vcc, 1, v3
|
||||
; CGP-NEXT: v_cmp_le_u32_e32 vcc, s8, v0
|
||||
; CGP-NEXT: v_cndmask_b32_e32 v0, v2, v5, vcc
|
||||
; CGP-NEXT: v_cmp_ge_u32_e32 vcc, v1, v4
|
||||
; CGP-NEXT: v_cndmask_b32_e32 v1, v3, v6, vcc
|
||||
; CGP-NEXT: s_setpc_b64 s[30:31]
|
||||
; CHECK-LABEL: v_udiv_v2i32_pow2k_denom:
|
||||
; CHECK: ; %bb.0:
|
||||
; CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
||||
; CHECK-NEXT: s_mov_b32 s4, 0x100000
|
||||
; CHECK-NEXT: v_mul_hi_u32 v0, v0, s4
|
||||
; CHECK-NEXT: v_mul_hi_u32 v1, v1, s4
|
||||
; CHECK-NEXT: s_setpc_b64 s[30:31]
|
||||
%result = udiv <2 x i32> %num, <i32 4096, i32 4096>
|
||||
ret <2 x i32> %result
|
||||
}
|
||||
|
@ -341,25 +245,12 @@ define i32 @v_udiv_i32_oddk_denom(i32 %num) {
|
|||
; CHECK-LABEL: v_udiv_i32_oddk_denom:
|
||||
; CHECK: ; %bb.0:
|
||||
; CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
||||
; CHECK-NEXT: s_mov_b32 s6, 0x12d8fb
|
||||
; CHECK-NEXT: v_rcp_iflag_f32_e32 v1, 0x4996c7d8
|
||||
; CHECK-NEXT: v_mov_b32_e32 v2, 0xffed2705
|
||||
; CHECK-NEXT: v_mul_f32_e32 v1, 0x4f7ffffe, v1
|
||||
; CHECK-NEXT: v_cvt_u32_f32_e32 v1, v1
|
||||
; CHECK-NEXT: v_mul_lo_u32 v2, v2, v1
|
||||
; CHECK-NEXT: v_mul_hi_u32 v2, v1, v2
|
||||
; CHECK-NEXT: v_add_i32_e32 v1, vcc, v1, v2
|
||||
; CHECK-NEXT: v_mov_b32_e32 v1, 0xb2a50881
|
||||
; CHECK-NEXT: v_mul_hi_u32 v1, v0, v1
|
||||
; CHECK-NEXT: v_mul_lo_u32 v2, v1, s6
|
||||
; CHECK-NEXT: v_add_i32_e32 v3, vcc, 1, v1
|
||||
; CHECK-NEXT: v_sub_i32_e32 v0, vcc, v0, v2
|
||||
; CHECK-NEXT: v_cmp_le_u32_e32 vcc, s6, v0
|
||||
; CHECK-NEXT: v_cndmask_b32_e32 v1, v1, v3, vcc
|
||||
; CHECK-NEXT: v_subrev_i32_e64 v2, s[4:5], s6, v0
|
||||
; CHECK-NEXT: v_cndmask_b32_e32 v0, v0, v2, vcc
|
||||
; CHECK-NEXT: v_add_i32_e32 v2, vcc, 1, v1
|
||||
; CHECK-NEXT: v_cmp_le_u32_e32 vcc, s6, v0
|
||||
; CHECK-NEXT: v_cndmask_b32_e32 v0, v1, v2, vcc
|
||||
; CHECK-NEXT: v_sub_i32_e32 v0, vcc, v0, v1
|
||||
; CHECK-NEXT: v_lshrrev_b32_e32 v0, 1, v0
|
||||
; CHECK-NEXT: v_add_i32_e32 v0, vcc, v0, v1
|
||||
; CHECK-NEXT: v_lshrrev_b32_e32 v0, 20, v0
|
||||
; CHECK-NEXT: s_setpc_b64 s[30:31]
|
||||
%result = udiv i32 %num, 1235195
|
||||
ret i32 %result
|
||||
|
@ -369,87 +260,34 @@ define <2 x i32> @v_udiv_v2i32_oddk_denom(<2 x i32> %num) {
|
|||
; GISEL-LABEL: v_udiv_v2i32_oddk_denom:
|
||||
; GISEL: ; %bb.0:
|
||||
; GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
||||
; GISEL-NEXT: s_mov_b32 s8, 0x12d8fb
|
||||
; GISEL-NEXT: v_mov_b32_e32 v2, 0x12d8fb
|
||||
; GISEL-NEXT: v_mov_b32_e32 v3, 0xffed2705
|
||||
; GISEL-NEXT: v_cvt_f32_u32_e32 v4, s8
|
||||
; GISEL-NEXT: v_cvt_f32_u32_e32 v5, v2
|
||||
; GISEL-NEXT: v_rcp_iflag_f32_e32 v4, v4
|
||||
; GISEL-NEXT: v_rcp_iflag_f32_e32 v5, v5
|
||||
; GISEL-NEXT: v_mul_f32_e32 v4, 0x4f7ffffe, v4
|
||||
; GISEL-NEXT: v_mul_f32_e32 v5, 0x4f7ffffe, v5
|
||||
; GISEL-NEXT: v_cvt_u32_f32_e32 v4, v4
|
||||
; GISEL-NEXT: v_cvt_u32_f32_e32 v5, v5
|
||||
; GISEL-NEXT: v_mul_lo_u32 v6, v3, v4
|
||||
; GISEL-NEXT: v_mul_lo_u32 v3, v3, v5
|
||||
; GISEL-NEXT: v_mul_hi_u32 v6, v4, v6
|
||||
; GISEL-NEXT: v_mul_hi_u32 v3, v5, v3
|
||||
; GISEL-NEXT: v_add_i32_e32 v4, vcc, v4, v6
|
||||
; GISEL-NEXT: v_add_i32_e32 v3, vcc, v5, v3
|
||||
; GISEL-NEXT: v_mul_hi_u32 v4, v0, v4
|
||||
; GISEL-NEXT: v_mul_hi_u32 v3, v1, v3
|
||||
; GISEL-NEXT: v_mul_lo_u32 v5, v4, s8
|
||||
; GISEL-NEXT: v_add_i32_e32 v6, vcc, 1, v4
|
||||
; GISEL-NEXT: v_mul_lo_u32 v7, v3, v2
|
||||
; GISEL-NEXT: v_add_i32_e32 v8, vcc, 1, v3
|
||||
; GISEL-NEXT: v_sub_i32_e32 v0, vcc, v0, v5
|
||||
; GISEL-NEXT: v_sub_i32_e32 v1, vcc, v1, v7
|
||||
; GISEL-NEXT: v_cmp_le_u32_e32 vcc, s8, v0
|
||||
; GISEL-NEXT: v_cndmask_b32_e32 v4, v4, v6, vcc
|
||||
; GISEL-NEXT: v_subrev_i32_e64 v5, s[4:5], s8, v0
|
||||
; GISEL-NEXT: v_cmp_ge_u32_e64 s[4:5], v1, v2
|
||||
; GISEL-NEXT: v_cndmask_b32_e64 v3, v3, v8, s[4:5]
|
||||
; GISEL-NEXT: v_sub_i32_e64 v6, s[6:7], v1, v2
|
||||
; GISEL-NEXT: v_cndmask_b32_e32 v0, v0, v5, vcc
|
||||
; GISEL-NEXT: v_add_i32_e32 v5, vcc, 1, v4
|
||||
; GISEL-NEXT: v_cndmask_b32_e64 v1, v1, v6, s[4:5]
|
||||
; GISEL-NEXT: v_add_i32_e32 v6, vcc, 1, v3
|
||||
; GISEL-NEXT: v_cmp_le_u32_e32 vcc, s8, v0
|
||||
; GISEL-NEXT: v_cndmask_b32_e32 v0, v4, v5, vcc
|
||||
; GISEL-NEXT: v_cmp_ge_u32_e32 vcc, v1, v2
|
||||
; GISEL-NEXT: v_cndmask_b32_e32 v1, v3, v6, vcc
|
||||
; GISEL-NEXT: s_mov_b32 s4, 0xb2a50881
|
||||
; GISEL-NEXT: s_brev_b32 s5, 1
|
||||
; GISEL-NEXT: v_mul_hi_u32 v2, v0, s4
|
||||
; GISEL-NEXT: v_mul_hi_u32 v3, v1, s4
|
||||
; GISEL-NEXT: v_sub_i32_e32 v0, vcc, v0, v2
|
||||
; GISEL-NEXT: v_sub_i32_e32 v1, vcc, v1, v3
|
||||
; GISEL-NEXT: v_mul_hi_u32 v0, v0, s5
|
||||
; GISEL-NEXT: v_mul_hi_u32 v1, v1, s5
|
||||
; GISEL-NEXT: v_add_i32_e32 v0, vcc, v0, v2
|
||||
; GISEL-NEXT: v_add_i32_e32 v1, vcc, v1, v3
|
||||
; GISEL-NEXT: v_lshrrev_b32_e32 v0, 20, v0
|
||||
; GISEL-NEXT: v_lshrrev_b32_e32 v1, 20, v1
|
||||
; GISEL-NEXT: s_setpc_b64 s[30:31]
|
||||
;
|
||||
; CGP-LABEL: v_udiv_v2i32_oddk_denom:
|
||||
; CGP: ; %bb.0:
|
||||
; CGP-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
||||
; CGP-NEXT: s_mov_b32 s8, 0x12d8fb
|
||||
; CGP-NEXT: v_rcp_iflag_f32_e32 v2, 0x4996c7d8
|
||||
; CGP-NEXT: s_mov_b32 s4, 0xffed2705
|
||||
; CGP-NEXT: v_mov_b32_e32 v3, 0x12d8fb
|
||||
; CGP-NEXT: v_rcp_iflag_f32_e32 v4, 0x4996c7d8
|
||||
; CGP-NEXT: v_mul_f32_e32 v2, 0x4f7ffffe, v2
|
||||
; CGP-NEXT: v_mul_f32_e32 v4, 0x4f7ffffe, v4
|
||||
; CGP-NEXT: v_cvt_u32_f32_e32 v2, v2
|
||||
; CGP-NEXT: v_cvt_u32_f32_e32 v4, v4
|
||||
; CGP-NEXT: v_mul_lo_u32 v5, s4, v2
|
||||
; CGP-NEXT: v_mul_lo_u32 v6, s4, v4
|
||||
; CGP-NEXT: v_mul_hi_u32 v5, v2, v5
|
||||
; CGP-NEXT: v_mul_hi_u32 v6, v4, v6
|
||||
; CGP-NEXT: v_add_i32_e32 v2, vcc, v2, v5
|
||||
; CGP-NEXT: v_add_i32_e32 v4, vcc, v4, v6
|
||||
; CGP-NEXT: v_mul_hi_u32 v2, v0, v2
|
||||
; CGP-NEXT: v_mul_hi_u32 v4, v1, v4
|
||||
; CGP-NEXT: v_mul_lo_u32 v5, v2, s8
|
||||
; CGP-NEXT: v_add_i32_e32 v6, vcc, 1, v2
|
||||
; CGP-NEXT: v_mul_lo_u32 v7, v4, s8
|
||||
; CGP-NEXT: v_add_i32_e32 v8, vcc, 1, v4
|
||||
; CGP-NEXT: v_sub_i32_e32 v0, vcc, v0, v5
|
||||
; CGP-NEXT: v_sub_i32_e32 v1, vcc, v1, v7
|
||||
; CGP-NEXT: v_cmp_le_u32_e32 vcc, s8, v0
|
||||
; CGP-NEXT: v_cndmask_b32_e32 v2, v2, v6, vcc
|
||||
; CGP-NEXT: v_subrev_i32_e64 v5, s[4:5], s8, v0
|
||||
; CGP-NEXT: v_cmp_ge_u32_e64 s[4:5], v1, v3
|
||||
; CGP-NEXT: v_cndmask_b32_e64 v4, v4, v8, s[4:5]
|
||||
; CGP-NEXT: v_sub_i32_e64 v6, s[6:7], v1, v3
|
||||
; CGP-NEXT: v_cndmask_b32_e32 v0, v0, v5, vcc
|
||||
; CGP-NEXT: v_add_i32_e32 v5, vcc, 1, v2
|
||||
; CGP-NEXT: v_cndmask_b32_e64 v1, v1, v6, s[4:5]
|
||||
; CGP-NEXT: v_add_i32_e32 v6, vcc, 1, v4
|
||||
; CGP-NEXT: v_cmp_le_u32_e32 vcc, s8, v0
|
||||
; CGP-NEXT: v_cndmask_b32_e32 v0, v2, v5, vcc
|
||||
; CGP-NEXT: v_cmp_ge_u32_e32 vcc, v1, v3
|
||||
; CGP-NEXT: v_cndmask_b32_e32 v1, v4, v6, vcc
|
||||
; CGP-NEXT: s_mov_b32 s4, 0xb2a50881
|
||||
; CGP-NEXT: v_mul_hi_u32 v2, v0, s4
|
||||
; CGP-NEXT: v_mul_hi_u32 v3, v1, s4
|
||||
; CGP-NEXT: v_sub_i32_e32 v0, vcc, v0, v2
|
||||
; CGP-NEXT: v_sub_i32_e32 v1, vcc, v1, v3
|
||||
; CGP-NEXT: v_lshrrev_b32_e32 v0, 1, v0
|
||||
; CGP-NEXT: v_lshrrev_b32_e32 v1, 1, v1
|
||||
; CGP-NEXT: v_add_i32_e32 v0, vcc, v0, v2
|
||||
; CGP-NEXT: v_add_i32_e32 v1, vcc, v1, v3
|
||||
; CGP-NEXT: v_lshrrev_b32_e32 v0, 20, v0
|
||||
; CGP-NEXT: v_lshrrev_b32_e32 v1, 20, v1
|
||||
; CGP-NEXT: s_setpc_b64 s[30:31]
|
||||
%result = udiv <2 x i32> %num, <i32 1235195, i32 1235195>
|
||||
ret <2 x i32> %result
|
||||
|
|
File diff suppressed because it is too large
Load Diff
Loading…
Reference in New Issue