forked from OSchip/llvm-project
Revert "[SLP] General improvements of SLP vectorization process."
This reverts commit r310255. llvm-svn: 310257
This commit is contained in:
parent
3d3bde7682
commit
53d523c9eb
|
@ -100,19 +100,6 @@ private:
|
|||
slpvectorizer::BoUpSLP &R,
|
||||
TargetTransformInfo *TTI);
|
||||
|
||||
/// Try to vectorize trees that start at insertvalue instructions.
|
||||
bool vectorizeInsertValueInst(InsertValueInst *IVI, BasicBlock *BB,
|
||||
slpvectorizer::BoUpSLP &R);
|
||||
/// Try to vectorize trees that start at insertelement instructions.
|
||||
bool vectorizeInsertElementInst(InsertElementInst *IEI, BasicBlock *BB,
|
||||
slpvectorizer::BoUpSLP &R);
|
||||
/// Try to vectorize trees that start at compare instructions.
|
||||
bool vectorizeCmpInst(CmpInst *CI, BasicBlock *BB, slpvectorizer::BoUpSLP &R);
|
||||
/// Tries to vectorize constructs started from CmpInst, InsertValueInst or
|
||||
/// InsertElementInst instructions.
|
||||
bool vectorizeSimpleInstructions(SmallVectorImpl<WeakVH> &Instructions,
|
||||
BasicBlock *BB, slpvectorizer::BoUpSLP &R);
|
||||
|
||||
/// \brief Scan the basic block and look for patterns that are likely to start
|
||||
/// a vectorization chain.
|
||||
bool vectorizeChainsInBlock(BasicBlock *BB, slpvectorizer::BoUpSLP &R);
|
||||
|
|
|
@ -4387,7 +4387,7 @@ bool SLPVectorizerPass::tryToVectorize(Instruction *I, BoUpSLP &R) {
|
|||
if (!I)
|
||||
return false;
|
||||
|
||||
if (!isa<BinaryOperator>(I) && !isa<CmpInst>(I))
|
||||
if (!isa<BinaryOperator>(I))
|
||||
return false;
|
||||
|
||||
Value *P = I->getParent();
|
||||
|
@ -4925,30 +4925,39 @@ private:
|
|||
/// %rb = insertelement <4 x float> %ra, float %s1, i32 1
|
||||
/// %rc = insertelement <4 x float> %rb, float %s2, i32 2
|
||||
/// %rd = insertelement <4 x float> %rc, float %s3, i32 3
|
||||
/// starting from the last insertelement instruction.
|
||||
///
|
||||
/// Returns true if it matches
|
||||
///
|
||||
static bool findBuildVector(InsertElementInst *LastInsertElem,
|
||||
static bool findBuildVector(InsertElementInst *FirstInsertElem,
|
||||
SmallVectorImpl<Value *> &BuildVector,
|
||||
SmallVectorImpl<Value *> &BuildVectorOpds) {
|
||||
Value *V = nullptr;
|
||||
do {
|
||||
BuildVector.push_back(LastInsertElem);
|
||||
BuildVectorOpds.push_back(LastInsertElem->getOperand(1));
|
||||
V = LastInsertElem->getOperand(0);
|
||||
if (isa<UndefValue>(V))
|
||||
break;
|
||||
LastInsertElem = dyn_cast<InsertElementInst>(V);
|
||||
if (!LastInsertElem || !LastInsertElem->hasOneUse())
|
||||
if (!isa<UndefValue>(FirstInsertElem->getOperand(0)))
|
||||
return false;
|
||||
|
||||
InsertElementInst *IE = FirstInsertElem;
|
||||
while (true) {
|
||||
BuildVector.push_back(IE);
|
||||
BuildVectorOpds.push_back(IE->getOperand(1));
|
||||
|
||||
if (IE->use_empty())
|
||||
return false;
|
||||
} while (true);
|
||||
std::reverse(BuildVector.begin(), BuildVector.end());
|
||||
std::reverse(BuildVectorOpds.begin(), BuildVectorOpds.end());
|
||||
return true;
|
||||
|
||||
InsertElementInst *NextUse = dyn_cast<InsertElementInst>(IE->user_back());
|
||||
if (!NextUse)
|
||||
return true;
|
||||
|
||||
// If this isn't the final use, make sure the next insertelement is the only
|
||||
// use. It's OK if the final constructed vector is used multiple times
|
||||
if (!IE->hasOneUse())
|
||||
return false;
|
||||
|
||||
IE = NextUse;
|
||||
}
|
||||
|
||||
return false;
|
||||
}
|
||||
|
||||
/// \brief Like findBuildVector, but looks for construction of aggregate.
|
||||
/// \brief Like findBuildVector, but looks backwards for construction of aggregate.
|
||||
///
|
||||
/// \return true if it matches.
|
||||
static bool findBuildAggregate(InsertValueInst *IV,
|
||||
|
@ -5133,64 +5142,6 @@ bool SLPVectorizerPass::vectorizeRootInstruction(PHINode *P, Value *V,
|
|||
ExtraVectorization);
|
||||
}
|
||||
|
||||
bool SLPVectorizerPass::vectorizeInsertValueInst(InsertValueInst *IVI,
|
||||
BasicBlock *BB, BoUpSLP &R) {
|
||||
const DataLayout &DL = BB->getModule()->getDataLayout();
|
||||
if (!R.canMapToVector(IVI->getType(), DL))
|
||||
return false;
|
||||
|
||||
SmallVector<Value *, 16> BuildVector;
|
||||
SmallVector<Value *, 16> BuildVectorOpds;
|
||||
if (!findBuildAggregate(IVI, BuildVector, BuildVectorOpds))
|
||||
return false;
|
||||
|
||||
DEBUG(dbgs() << "SLP: array mappable to vector: " << *IVI << "\n");
|
||||
return tryToVectorizeList(BuildVectorOpds, R, BuildVector, false);
|
||||
}
|
||||
|
||||
bool SLPVectorizerPass::vectorizeInsertElementInst(InsertElementInst *IEI,
|
||||
BasicBlock *BB, BoUpSLP &R) {
|
||||
SmallVector<Value *, 16> BuildVector;
|
||||
SmallVector<Value *, 16> BuildVectorOpds;
|
||||
if (!findBuildVector(IEI, BuildVector, BuildVectorOpds))
|
||||
return false;
|
||||
|
||||
// Vectorize starting with the build vector operands ignoring the BuildVector
|
||||
// instructions for the purpose of scheduling and user extraction.
|
||||
return tryToVectorizeList(BuildVectorOpds, R, BuildVector);
|
||||
}
|
||||
|
||||
bool SLPVectorizerPass::vectorizeCmpInst(CmpInst *CI, BasicBlock *BB,
|
||||
BoUpSLP &R) {
|
||||
if (tryToVectorizePair(CI->getOperand(0), CI->getOperand(1), R))
|
||||
return true;
|
||||
|
||||
bool OpsChanged = false;
|
||||
for (int Idx = 0; Idx < 2; ++Idx) {
|
||||
OpsChanged |=
|
||||
vectorizeRootInstruction(nullptr, CI->getOperand(Idx), BB, R, TTI);
|
||||
}
|
||||
return OpsChanged;
|
||||
}
|
||||
|
||||
bool SLPVectorizerPass::vectorizeSimpleInstructions(
|
||||
SmallVectorImpl<WeakVH> &Instructions, BasicBlock *BB, BoUpSLP &R) {
|
||||
bool OpsChanged = false;
|
||||
for (auto &VH : reverse(Instructions)) {
|
||||
auto *I = dyn_cast_or_null<Instruction>(VH);
|
||||
if (!I)
|
||||
continue;
|
||||
if (auto *LastInsertValue = dyn_cast<InsertValueInst>(I))
|
||||
OpsChanged |= vectorizeInsertValueInst(LastInsertValue, BB, R);
|
||||
else if (auto *LastInsertElem = dyn_cast<InsertElementInst>(I))
|
||||
OpsChanged |= vectorizeInsertElementInst(LastInsertElem, BB, R);
|
||||
else if (auto *CI = dyn_cast<CmpInst>(I))
|
||||
OpsChanged |= vectorizeCmpInst(CI, BB, R);
|
||||
}
|
||||
Instructions.clear();
|
||||
return OpsChanged;
|
||||
}
|
||||
|
||||
bool SLPVectorizerPass::vectorizeChainsInBlock(BasicBlock *BB, BoUpSLP &R) {
|
||||
bool Changed = false;
|
||||
SmallVector<Value *, 4> Incoming;
|
||||
|
@ -5250,21 +5201,10 @@ bool SLPVectorizerPass::vectorizeChainsInBlock(BasicBlock *BB, BoUpSLP &R) {
|
|||
|
||||
VisitedInstrs.clear();
|
||||
|
||||
SmallVector<WeakVH, 8> PostProcessInstructions;
|
||||
SmallDenseSet<Instruction *, 4> KeyNodes;
|
||||
for (BasicBlock::iterator it = BB->begin(), e = BB->end(); it != e; it++) {
|
||||
// We may go through BB multiple times so skip the one we have checked.
|
||||
if (!VisitedInstrs.insert(&*it).second) {
|
||||
if (it->use_empty() && KeyNodes.count(&*it) > 0 &&
|
||||
vectorizeSimpleInstructions(PostProcessInstructions, BB, R)) {
|
||||
// We would like to start over since some instructions are deleted
|
||||
// and the iterator may become invalid value.
|
||||
Changed = true;
|
||||
it = BB->begin();
|
||||
e = BB->end();
|
||||
}
|
||||
if (!VisitedInstrs.insert(&*it).second)
|
||||
continue;
|
||||
}
|
||||
|
||||
if (isa<DbgInfoIntrinsic>(it))
|
||||
continue;
|
||||
|
@ -5286,40 +5226,97 @@ bool SLPVectorizerPass::vectorizeChainsInBlock(BasicBlock *BB, BoUpSLP &R) {
|
|||
continue;
|
||||
}
|
||||
|
||||
// Ran into an instruction without users, like terminator, or function call
|
||||
// with ignored return value, store. Ignore unused instructions (basing on
|
||||
// instruction type, except for CallInst and InvokeInst).
|
||||
if (it->use_empty() && (it->getType()->isVoidTy() || isa<CallInst>(it) ||
|
||||
isa<InvokeInst>(it))) {
|
||||
KeyNodes.insert(&*it);
|
||||
bool OpsChanged = false;
|
||||
if (ShouldStartVectorizeHorAtStore || !isa<StoreInst>(it)) {
|
||||
for (auto *V : it->operand_values()) {
|
||||
// Try to match and vectorize a horizontal reduction.
|
||||
OpsChanged |= vectorizeRootInstruction(nullptr, V, BB, R, TTI);
|
||||
if (ShouldStartVectorizeHorAtStore) {
|
||||
if (StoreInst *SI = dyn_cast<StoreInst>(it)) {
|
||||
// Try to match and vectorize a horizontal reduction.
|
||||
if (vectorizeRootInstruction(nullptr, SI->getValueOperand(), BB, R,
|
||||
TTI)) {
|
||||
Changed = true;
|
||||
it = BB->begin();
|
||||
e = BB->end();
|
||||
continue;
|
||||
}
|
||||
}
|
||||
// Start vectorization of post-process list of instructions from the
|
||||
// top-tree instructions to try to vectorize as many instructions as
|
||||
// possible.
|
||||
OpsChanged |= vectorizeSimpleInstructions(PostProcessInstructions, BB, R);
|
||||
if (OpsChanged) {
|
||||
}
|
||||
|
||||
// Try to vectorize horizontal reductions feeding into a return.
|
||||
if (ReturnInst *RI = dyn_cast<ReturnInst>(it)) {
|
||||
if (RI->getNumOperands() != 0) {
|
||||
// Try to match and vectorize a horizontal reduction.
|
||||
if (vectorizeRootInstruction(nullptr, RI->getOperand(0), BB, R, TTI)) {
|
||||
Changed = true;
|
||||
it = BB->begin();
|
||||
e = BB->end();
|
||||
continue;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// Try to vectorize trees that start at compare instructions.
|
||||
if (CmpInst *CI = dyn_cast<CmpInst>(it)) {
|
||||
if (tryToVectorizePair(CI->getOperand(0), CI->getOperand(1), R)) {
|
||||
Changed = true;
|
||||
// We would like to start over since some instructions are deleted
|
||||
// and the iterator may become invalid value.
|
||||
Changed = true;
|
||||
it = BB->begin();
|
||||
e = BB->end();
|
||||
continue;
|
||||
}
|
||||
|
||||
for (int I = 0; I < 2; ++I) {
|
||||
if (vectorizeRootInstruction(nullptr, CI->getOperand(I), BB, R, TTI)) {
|
||||
Changed = true;
|
||||
// We would like to start over since some instructions are deleted
|
||||
// and the iterator may become invalid value.
|
||||
it = BB->begin();
|
||||
e = BB->end();
|
||||
break;
|
||||
}
|
||||
}
|
||||
continue;
|
||||
}
|
||||
|
||||
if (isa<InsertElementInst>(it) || isa<CmpInst>(it) ||
|
||||
isa<InsertValueInst>(it))
|
||||
PostProcessInstructions.push_back(&*it);
|
||||
// Try to vectorize trees that start at insertelement instructions.
|
||||
if (InsertElementInst *FirstInsertElem = dyn_cast<InsertElementInst>(it)) {
|
||||
SmallVector<Value *, 16> BuildVector;
|
||||
SmallVector<Value *, 16> BuildVectorOpds;
|
||||
if (!findBuildVector(FirstInsertElem, BuildVector, BuildVectorOpds))
|
||||
continue;
|
||||
|
||||
// Vectorize starting with the build vector operands ignoring the
|
||||
// BuildVector instructions for the purpose of scheduling and user
|
||||
// extraction.
|
||||
if (tryToVectorizeList(BuildVectorOpds, R, BuildVector)) {
|
||||
Changed = true;
|
||||
it = BB->begin();
|
||||
e = BB->end();
|
||||
}
|
||||
|
||||
continue;
|
||||
}
|
||||
|
||||
// Try to vectorize trees that start at insertvalue instructions feeding into
|
||||
// a store.
|
||||
if (StoreInst *SI = dyn_cast<StoreInst>(it)) {
|
||||
if (InsertValueInst *LastInsertValue = dyn_cast<InsertValueInst>(SI->getValueOperand())) {
|
||||
const DataLayout &DL = BB->getModule()->getDataLayout();
|
||||
if (R.canMapToVector(SI->getValueOperand()->getType(), DL)) {
|
||||
SmallVector<Value *, 16> BuildVector;
|
||||
SmallVector<Value *, 16> BuildVectorOpds;
|
||||
if (!findBuildAggregate(LastInsertValue, BuildVector, BuildVectorOpds))
|
||||
continue;
|
||||
|
||||
DEBUG(dbgs() << "SLP: store of array mappable to vector: " << *SI << "\n");
|
||||
if (tryToVectorizeList(BuildVectorOpds, R, BuildVector, false)) {
|
||||
Changed = true;
|
||||
it = BB->begin();
|
||||
e = BB->end();
|
||||
}
|
||||
continue;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
assert(PostProcessInstructions.empty() &&
|
||||
"Not all instruction were processed.");
|
||||
|
||||
return Changed;
|
||||
}
|
||||
|
|
|
@ -31,8 +31,10 @@ define void @PR28330(i32 %n) {
|
|||
;
|
||||
; GATHER-LABEL: @PR28330(
|
||||
; GATHER-NEXT: entry:
|
||||
; GATHER-NEXT: [[TMP0:%.*]] = load <2 x i8>, <2 x i8>* bitcast (i8* getelementptr inbounds ([80 x i8], [80 x i8]* @a, i64 0, i64 1) to <2 x i8>*), align 1
|
||||
; GATHER-NEXT: [[TMP1:%.*]] = icmp eq <2 x i8> [[TMP0]], zeroinitializer
|
||||
; GATHER-NEXT: [[TMP0:%.*]] = load i8, i8* getelementptr inbounds ([80 x i8], [80 x i8]* @a, i64 0, i64 1), align 1
|
||||
; GATHER-NEXT: [[TMP1:%.*]] = icmp eq i8 [[TMP0]], 0
|
||||
; GATHER-NEXT: [[TMP2:%.*]] = load i8, i8* getelementptr inbounds ([80 x i8], [80 x i8]* @a, i64 0, i64 2), align 2
|
||||
; GATHER-NEXT: [[TMP3:%.*]] = icmp eq i8 [[TMP2]], 0
|
||||
; GATHER-NEXT: [[TMP4:%.*]] = load i8, i8* getelementptr inbounds ([80 x i8], [80 x i8]* @a, i64 0, i64 3), align 1
|
||||
; GATHER-NEXT: [[TMP5:%.*]] = icmp eq i8 [[TMP4]], 0
|
||||
; GATHER-NEXT: [[TMP6:%.*]] = load i8, i8* getelementptr inbounds ([80 x i8], [80 x i8]* @a, i64 0, i64 4), align 4
|
||||
|
@ -48,11 +50,10 @@ define void @PR28330(i32 %n) {
|
|||
; GATHER-NEXT: br label [[FOR_BODY:%.*]]
|
||||
; GATHER: for.body:
|
||||
; GATHER-NEXT: [[TMP17:%.*]] = phi i32 [ [[BIN_EXTRA:%.*]], [[FOR_BODY]] ], [ 0, [[ENTRY:%.*]] ]
|
||||
; GATHER-NEXT: [[TMP2:%.*]] = select <2 x i1> [[TMP1]], <2 x i32> <i32 -720, i32 -720>, <2 x i32> <i32 -80, i32 -80>
|
||||
; GATHER-NEXT: [[TMP3:%.*]] = extractelement <2 x i32> [[TMP2]], i32 0
|
||||
; GATHER-NEXT: [[TMP20:%.*]] = add i32 [[TMP17]], [[TMP3]]
|
||||
; GATHER-NEXT: [[TMP4:%.*]] = extractelement <2 x i32> [[TMP2]], i32 1
|
||||
; GATHER-NEXT: [[TMP22:%.*]] = add i32 [[TMP20]], [[TMP4]]
|
||||
; GATHER-NEXT: [[TMP19:%.*]] = select i1 [[TMP1]], i32 -720, i32 -80
|
||||
; GATHER-NEXT: [[TMP20:%.*]] = add i32 [[TMP17]], [[TMP19]]
|
||||
; GATHER-NEXT: [[TMP21:%.*]] = select i1 [[TMP3]], i32 -720, i32 -80
|
||||
; GATHER-NEXT: [[TMP22:%.*]] = add i32 [[TMP20]], [[TMP21]]
|
||||
; GATHER-NEXT: [[TMP23:%.*]] = select i1 [[TMP5]], i32 -720, i32 -80
|
||||
; GATHER-NEXT: [[TMP24:%.*]] = add i32 [[TMP22]], [[TMP23]]
|
||||
; GATHER-NEXT: [[TMP25:%.*]] = select i1 [[TMP7]], i32 -720, i32 -80
|
||||
|
@ -64,16 +65,16 @@ define void @PR28330(i32 %n) {
|
|||
; GATHER-NEXT: [[TMP31:%.*]] = select i1 [[TMP13]], i32 -720, i32 -80
|
||||
; GATHER-NEXT: [[TMP32:%.*]] = add i32 [[TMP30]], [[TMP31]]
|
||||
; GATHER-NEXT: [[TMP33:%.*]] = select i1 [[TMP15]], i32 -720, i32 -80
|
||||
; GATHER-NEXT: [[TMP5:%.*]] = insertelement <8 x i32> undef, i32 [[TMP3]], i32 0
|
||||
; GATHER-NEXT: [[TMP6:%.*]] = insertelement <8 x i32> [[TMP5]], i32 [[TMP4]], i32 1
|
||||
; GATHER-NEXT: [[TMP7:%.*]] = insertelement <8 x i32> [[TMP6]], i32 [[TMP23]], i32 2
|
||||
; GATHER-NEXT: [[TMP8:%.*]] = insertelement <8 x i32> [[TMP7]], i32 [[TMP25]], i32 3
|
||||
; GATHER-NEXT: [[TMP9:%.*]] = insertelement <8 x i32> [[TMP8]], i32 [[TMP27]], i32 4
|
||||
; GATHER-NEXT: [[TMP10:%.*]] = insertelement <8 x i32> [[TMP9]], i32 [[TMP29]], i32 5
|
||||
; GATHER-NEXT: [[TMP11:%.*]] = insertelement <8 x i32> [[TMP10]], i32 [[TMP31]], i32 6
|
||||
; GATHER-NEXT: [[TMP12:%.*]] = insertelement <8 x i32> [[TMP11]], i32 [[TMP33]], i32 7
|
||||
; GATHER-NEXT: [[TMP13:%.*]] = call i32 @llvm.experimental.vector.reduce.add.i32.v8i32(<8 x i32> [[TMP12]])
|
||||
; GATHER-NEXT: [[BIN_EXTRA]] = add i32 [[TMP13]], [[TMP17]]
|
||||
; GATHER-NEXT: [[TMP0:%.*]] = insertelement <8 x i32> undef, i32 [[TMP19]], i32 0
|
||||
; GATHER-NEXT: [[TMP1:%.*]] = insertelement <8 x i32> [[TMP0]], i32 [[TMP21]], i32 1
|
||||
; GATHER-NEXT: [[TMP2:%.*]] = insertelement <8 x i32> [[TMP1]], i32 [[TMP23]], i32 2
|
||||
; GATHER-NEXT: [[TMP3:%.*]] = insertelement <8 x i32> [[TMP2]], i32 [[TMP25]], i32 3
|
||||
; GATHER-NEXT: [[TMP4:%.*]] = insertelement <8 x i32> [[TMP3]], i32 [[TMP27]], i32 4
|
||||
; GATHER-NEXT: [[TMP5:%.*]] = insertelement <8 x i32> [[TMP4]], i32 [[TMP29]], i32 5
|
||||
; GATHER-NEXT: [[TMP6:%.*]] = insertelement <8 x i32> [[TMP5]], i32 [[TMP31]], i32 6
|
||||
; GATHER-NEXT: [[TMP7:%.*]] = insertelement <8 x i32> [[TMP6]], i32 [[TMP33]], i32 7
|
||||
; GATHER-NEXT: [[TMP8:%.*]] = call i32 @llvm.experimental.vector.reduce.add.i32.v8i32(<8 x i32> [[TMP7]])
|
||||
; GATHER-NEXT: [[BIN_EXTRA]] = add i32 [[TMP8]], [[TMP17]]
|
||||
; GATHER-NEXT: [[TMP34:%.*]] = add i32 [[TMP32]], [[TMP33]]
|
||||
; GATHER-NEXT: br label [[FOR_BODY]]
|
||||
;
|
||||
|
@ -179,8 +180,10 @@ define void @PR32038(i32 %n) {
|
|||
;
|
||||
; GATHER-LABEL: @PR32038(
|
||||
; GATHER-NEXT: entry:
|
||||
; GATHER-NEXT: [[TMP0:%.*]] = load <2 x i8>, <2 x i8>* bitcast (i8* getelementptr inbounds ([80 x i8], [80 x i8]* @a, i64 0, i64 1) to <2 x i8>*), align 1
|
||||
; GATHER-NEXT: [[TMP1:%.*]] = icmp eq <2 x i8> [[TMP0]], zeroinitializer
|
||||
; GATHER-NEXT: [[TMP0:%.*]] = load i8, i8* getelementptr inbounds ([80 x i8], [80 x i8]* @a, i64 0, i64 1), align 1
|
||||
; GATHER-NEXT: [[TMP1:%.*]] = icmp eq i8 [[TMP0]], 0
|
||||
; GATHER-NEXT: [[TMP2:%.*]] = load i8, i8* getelementptr inbounds ([80 x i8], [80 x i8]* @a, i64 0, i64 2), align 2
|
||||
; GATHER-NEXT: [[TMP3:%.*]] = icmp eq i8 [[TMP2]], 0
|
||||
; GATHER-NEXT: [[TMP4:%.*]] = load i8, i8* getelementptr inbounds ([80 x i8], [80 x i8]* @a, i64 0, i64 3), align 1
|
||||
; GATHER-NEXT: [[TMP5:%.*]] = icmp eq i8 [[TMP4]], 0
|
||||
; GATHER-NEXT: [[TMP6:%.*]] = load i8, i8* getelementptr inbounds ([80 x i8], [80 x i8]* @a, i64 0, i64 4), align 4
|
||||
|
@ -196,11 +199,10 @@ define void @PR32038(i32 %n) {
|
|||
; GATHER-NEXT: br label [[FOR_BODY:%.*]]
|
||||
; GATHER: for.body:
|
||||
; GATHER-NEXT: [[TMP17:%.*]] = phi i32 [ [[BIN_EXTRA:%.*]], [[FOR_BODY]] ], [ 0, [[ENTRY:%.*]] ]
|
||||
; GATHER-NEXT: [[TMP2:%.*]] = select <2 x i1> [[TMP1]], <2 x i32> <i32 -720, i32 -720>, <2 x i32> <i32 -80, i32 -80>
|
||||
; GATHER-NEXT: [[TMP3:%.*]] = extractelement <2 x i32> [[TMP2]], i32 0
|
||||
; GATHER-NEXT: [[TMP20:%.*]] = add i32 -5, [[TMP3]]
|
||||
; GATHER-NEXT: [[TMP4:%.*]] = extractelement <2 x i32> [[TMP2]], i32 1
|
||||
; GATHER-NEXT: [[TMP22:%.*]] = add i32 [[TMP20]], [[TMP4]]
|
||||
; GATHER-NEXT: [[TMP19:%.*]] = select i1 [[TMP1]], i32 -720, i32 -80
|
||||
; GATHER-NEXT: [[TMP20:%.*]] = add i32 -5, [[TMP19]]
|
||||
; GATHER-NEXT: [[TMP21:%.*]] = select i1 [[TMP3]], i32 -720, i32 -80
|
||||
; GATHER-NEXT: [[TMP22:%.*]] = add i32 [[TMP20]], [[TMP21]]
|
||||
; GATHER-NEXT: [[TMP23:%.*]] = select i1 [[TMP5]], i32 -720, i32 -80
|
||||
; GATHER-NEXT: [[TMP24:%.*]] = add i32 [[TMP22]], [[TMP23]]
|
||||
; GATHER-NEXT: [[TMP25:%.*]] = select i1 [[TMP7]], i32 -720, i32 -80
|
||||
|
@ -212,27 +214,29 @@ define void @PR32038(i32 %n) {
|
|||
; GATHER-NEXT: [[TMP31:%.*]] = select i1 [[TMP13]], i32 -720, i32 -80
|
||||
; GATHER-NEXT: [[TMP32:%.*]] = add i32 [[TMP30]], [[TMP31]]
|
||||
; GATHER-NEXT: [[TMP33:%.*]] = select i1 [[TMP15]], i32 -720, i32 -80
|
||||
; GATHER-NEXT: [[TMP5:%.*]] = insertelement <8 x i32> undef, i32 [[TMP3]], i32 0
|
||||
; GATHER-NEXT: [[TMP6:%.*]] = insertelement <8 x i32> [[TMP5]], i32 [[TMP4]], i32 1
|
||||
; GATHER-NEXT: [[TMP7:%.*]] = insertelement <8 x i32> [[TMP6]], i32 [[TMP23]], i32 2
|
||||
; GATHER-NEXT: [[TMP8:%.*]] = insertelement <8 x i32> [[TMP7]], i32 [[TMP25]], i32 3
|
||||
; GATHER-NEXT: [[TMP9:%.*]] = insertelement <8 x i32> [[TMP8]], i32 [[TMP27]], i32 4
|
||||
; GATHER-NEXT: [[TMP10:%.*]] = insertelement <8 x i32> [[TMP9]], i32 [[TMP29]], i32 5
|
||||
; GATHER-NEXT: [[TMP11:%.*]] = insertelement <8 x i32> [[TMP10]], i32 [[TMP31]], i32 6
|
||||
; GATHER-NEXT: [[TMP12:%.*]] = insertelement <8 x i32> [[TMP11]], i32 [[TMP33]], i32 7
|
||||
; GATHER-NEXT: [[TMP13:%.*]] = call i32 @llvm.experimental.vector.reduce.add.i32.v8i32(<8 x i32> [[TMP12]])
|
||||
; GATHER-NEXT: [[BIN_EXTRA]] = add i32 [[TMP13]], -5
|
||||
; GATHER-NEXT: [[TMP0:%.*]] = insertelement <8 x i32> undef, i32 [[TMP19]], i32 0
|
||||
; GATHER-NEXT: [[TMP1:%.*]] = insertelement <8 x i32> [[TMP0]], i32 [[TMP21]], i32 1
|
||||
; GATHER-NEXT: [[TMP2:%.*]] = insertelement <8 x i32> [[TMP1]], i32 [[TMP23]], i32 2
|
||||
; GATHER-NEXT: [[TMP3:%.*]] = insertelement <8 x i32> [[TMP2]], i32 [[TMP25]], i32 3
|
||||
; GATHER-NEXT: [[TMP4:%.*]] = insertelement <8 x i32> [[TMP3]], i32 [[TMP27]], i32 4
|
||||
; GATHER-NEXT: [[TMP5:%.*]] = insertelement <8 x i32> [[TMP4]], i32 [[TMP29]], i32 5
|
||||
; GATHER-NEXT: [[TMP6:%.*]] = insertelement <8 x i32> [[TMP5]], i32 [[TMP31]], i32 6
|
||||
; GATHER-NEXT: [[TMP7:%.*]] = insertelement <8 x i32> [[TMP6]], i32 [[TMP33]], i32 7
|
||||
; GATHER-NEXT: [[TMP8:%.*]] = call i32 @llvm.experimental.vector.reduce.add.i32.v8i32(<8 x i32> [[TMP7]])
|
||||
; GATHER-NEXT: [[BIN_EXTRA]] = add i32 [[TMP8]], -5
|
||||
; GATHER-NEXT: [[TMP34:%.*]] = add i32 [[TMP32]], [[TMP33]]
|
||||
; GATHER-NEXT: br label [[FOR_BODY]]
|
||||
;
|
||||
; MAX-COST-LABEL: @PR32038(
|
||||
; MAX-COST-NEXT: entry:
|
||||
; MAX-COST-NEXT: [[TMP0:%.*]] = load <2 x i8>, <2 x i8>* bitcast (i8* getelementptr inbounds ([80 x i8], [80 x i8]* @a, i64 0, i64 1) to <2 x i8>*), align 1
|
||||
; MAX-COST-NEXT: [[TMP1:%.*]] = icmp eq <2 x i8> [[TMP0]], zeroinitializer
|
||||
; MAX-COST-NEXT: [[TMP0:%.*]] = load i8, i8* getelementptr inbounds ([80 x i8], [80 x i8]* @a, i64 0, i64 1), align 1
|
||||
; MAX-COST-NEXT: [[TMP1:%.*]] = icmp eq i8 [[TMP0]], 0
|
||||
; MAX-COST-NEXT: [[TMP2:%.*]] = load i8, i8* getelementptr inbounds ([80 x i8], [80 x i8]* @a, i64 0, i64 2), align 2
|
||||
; MAX-COST-NEXT: [[TMP3:%.*]] = icmp eq i8 [[TMP2]], 0
|
||||
; MAX-COST-NEXT: [[TMP4:%.*]] = load i8, i8* getelementptr inbounds ([80 x i8], [80 x i8]* @a, i64 0, i64 3), align 1
|
||||
; MAX-COST-NEXT: [[TMPP5:%.*]] = icmp eq i8 [[TMP4]], 0
|
||||
; MAX-COST-NEXT: [[TMP5:%.*]] = icmp eq i8 [[TMP4]], 0
|
||||
; MAX-COST-NEXT: [[TMP6:%.*]] = load i8, i8* getelementptr inbounds ([80 x i8], [80 x i8]* @a, i64 0, i64 4), align 4
|
||||
; MAX-COST-NEXT: [[TMPP7:%.*]] = icmp eq i8 [[TMP6]], 0
|
||||
; MAX-COST-NEXT: [[TMP7:%.*]] = icmp eq i8 [[TMP6]], 0
|
||||
; MAX-COST-NEXT: [[TMP8:%.*]] = load i8, i8* getelementptr inbounds ([80 x i8], [80 x i8]* @a, i64 0, i64 5), align 1
|
||||
; MAX-COST-NEXT: [[TMP9:%.*]] = icmp eq i8 [[TMP8]], 0
|
||||
; MAX-COST-NEXT: [[TMP10:%.*]] = load i8, i8* getelementptr inbounds ([80 x i8], [80 x i8]* @a, i64 0, i64 6), align 2
|
||||
|
@ -241,16 +245,14 @@ define void @PR32038(i32 %n) {
|
|||
; MAX-COST-NEXT: [[TMP13:%.*]] = icmp eq i8 [[TMP12]], 0
|
||||
; MAX-COST-NEXT: [[TMP14:%.*]] = load i8, i8* getelementptr inbounds ([80 x i8], [80 x i8]* @a, i64 0, i64 8), align 8
|
||||
; MAX-COST-NEXT: [[TMP15:%.*]] = icmp eq i8 [[TMP14]], 0
|
||||
; MAX-COST-NEXT: [[TMP0:%.*]] = insertelement <4 x i1> undef, i1 [[TMP1]], i32 0
|
||||
; MAX-COST-NEXT: [[TMP1:%.*]] = insertelement <4 x i1> [[TMP0]], i1 [[TMP3]], i32 1
|
||||
; MAX-COST-NEXT: [[TMP2:%.*]] = insertelement <4 x i1> [[TMP1]], i1 [[TMP5]], i32 2
|
||||
; MAX-COST-NEXT: [[TMP3:%.*]] = insertelement <4 x i1> [[TMP2]], i1 [[TMP7]], i32 3
|
||||
; MAX-COST-NEXT: br label [[FOR_BODY:%.*]]
|
||||
; MAX-COST: for.body:
|
||||
; MAX-COST-NEXT: [[TMP17:%.*]] = phi i32 [ [[TMP34:%.*]], [[FOR_BODY]] ], [ 0, [[ENTRY:%.*]] ]
|
||||
; MAX-COST-NEXT: [[TMP2:%.*]] = extractelement <2 x i1> [[TMP1]], i32 0
|
||||
; MAX-COST-NEXT: [[TMP3:%.*]] = insertelement <4 x i1> undef, i1 [[TMP2]], i32 0
|
||||
; MAX-COST-NEXT: [[TMP4:%.*]] = extractelement <2 x i1> [[TMP1]], i32 1
|
||||
; MAX-COST-NEXT: [[TMP5:%.*]] = insertelement <4 x i1> [[TMP3]], i1 [[TMP4]], i32 1
|
||||
; MAX-COST-NEXT: [[TMP6:%.*]] = insertelement <4 x i1> [[TMP5]], i1 [[TMPP5]], i32 2
|
||||
; MAX-COST-NEXT: [[TMP7:%.*]] = insertelement <4 x i1> [[TMP6]], i1 [[TMPP7]], i32 3
|
||||
; MAX-COST-NEXT: [[TMP8:%.*]] = select <4 x i1> [[TMP7]], <4 x i32> <i32 -720, i32 -720, i32 -720, i32 -720>, <4 x i32> <i32 -80, i32 -80, i32 -80, i32 -80>
|
||||
; MAX-COST-NEXT: [[TMP4:%.*]] = select <4 x i1> [[TMP3]], <4 x i32> <i32 -720, i32 -720, i32 -720, i32 -720>, <4 x i32> <i32 -80, i32 -80, i32 -80, i32 -80>
|
||||
; MAX-COST-NEXT: [[TMP20:%.*]] = add i32 -5, undef
|
||||
; MAX-COST-NEXT: [[TMP22:%.*]] = add i32 [[TMP20]], undef
|
||||
; MAX-COST-NEXT: [[TMP24:%.*]] = add i32 [[TMP22]], undef
|
||||
|
@ -258,10 +260,10 @@ define void @PR32038(i32 %n) {
|
|||
; MAX-COST-NEXT: [[TMP27:%.*]] = select i1 [[TMP9]], i32 -720, i32 -80
|
||||
; MAX-COST-NEXT: [[TMP28:%.*]] = add i32 [[TMP26]], [[TMP27]]
|
||||
; MAX-COST-NEXT: [[TMP29:%.*]] = select i1 [[TMP11]], i32 -720, i32 -80
|
||||
; MAX-COST-NEXT: [[TMP9:%.*]] = call i32 @llvm.experimental.vector.reduce.add.i32.v4i32(<4 x i32> [[TMP8]])
|
||||
; MAX-COST-NEXT: [[TMP10:%.*]] = add i32 [[TMP9]], [[TMP27]]
|
||||
; MAX-COST-NEXT: [[TMP11:%.*]] = add i32 [[TMP10]], [[TMP29]]
|
||||
; MAX-COST-NEXT: [[BIN_EXTRA:%.*]] = add i32 [[TMP11]], -5
|
||||
; MAX-COST-NEXT: [[TMP5:%.*]] = call i32 @llvm.experimental.vector.reduce.add.i32.v4i32(<4 x i32> [[TMP4]])
|
||||
; MAX-COST-NEXT: [[TMP6:%.*]] = add i32 [[TMP5]], [[TMP27]]
|
||||
; MAX-COST-NEXT: [[TMP7:%.*]] = add i32 [[TMP6]], [[TMP29]]
|
||||
; MAX-COST-NEXT: [[BIN_EXTRA:%.*]] = add i32 [[TMP7]], -5
|
||||
; MAX-COST-NEXT: [[TMP30:%.*]] = add i32 [[TMP28]], [[TMP29]]
|
||||
; MAX-COST-NEXT: [[TMP31:%.*]] = select i1 [[TMP13]], i32 -720, i32 -80
|
||||
; MAX-COST-NEXT: [[TMP32:%.*]] = add i32 [[BIN_EXTRA]], [[TMP31]]
|
||||
|
|
|
@ -817,22 +817,22 @@ declare i32 @foobar(i32)
|
|||
define void @i32_red_call(i32 %val) {
|
||||
; CHECK-LABEL: @i32_red_call(
|
||||
; CHECK-NEXT: entry:
|
||||
; CHECK-NEXT: [[TMP0:%.*]] = load <8 x i32>, <8 x i32>* bitcast ([32 x i32]* @arr_i32 to <8 x i32>*), align 16
|
||||
; CHECK-NEXT: [[ADD:%.*]] = add nsw i32 undef, undef
|
||||
; CHECK-NEXT: [[ADD_1:%.*]] = add nsw i32 undef, [[ADD]]
|
||||
; CHECK-NEXT: [[ADD_2:%.*]] = add nsw i32 undef, [[ADD_1]]
|
||||
; CHECK-NEXT: [[ADD_3:%.*]] = add nsw i32 undef, [[ADD_2]]
|
||||
; CHECK-NEXT: [[ADD_4:%.*]] = add nsw i32 undef, [[ADD_3]]
|
||||
; CHECK-NEXT: [[ADD_5:%.*]] = add nsw i32 undef, [[ADD_4]]
|
||||
; CHECK-NEXT: [[RDX_SHUF:%.*]] = shufflevector <8 x i32> [[TMP0]], <8 x i32> undef, <8 x i32> <i32 4, i32 5, i32 6, i32 7, i32 undef, i32 undef, i32 undef, i32 undef>
|
||||
; CHECK-NEXT: [[BIN_RDX:%.*]] = add nsw <8 x i32> [[TMP0]], [[RDX_SHUF]]
|
||||
; CHECK-NEXT: [[RDX_SHUF1:%.*]] = shufflevector <8 x i32> [[BIN_RDX]], <8 x i32> undef, <8 x i32> <i32 2, i32 3, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
|
||||
; CHECK-NEXT: [[BIN_RDX2:%.*]] = add nsw <8 x i32> [[BIN_RDX]], [[RDX_SHUF1]]
|
||||
; CHECK-NEXT: [[RDX_SHUF3:%.*]] = shufflevector <8 x i32> [[BIN_RDX2]], <8 x i32> undef, <8 x i32> <i32 1, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
|
||||
; CHECK-NEXT: [[BIN_RDX4:%.*]] = add nsw <8 x i32> [[BIN_RDX2]], [[RDX_SHUF3]]
|
||||
; CHECK-NEXT: [[TMP1:%.*]] = extractelement <8 x i32> [[BIN_RDX4]], i32 0
|
||||
; CHECK-NEXT: [[ADD_6:%.*]] = add nsw i32 undef, [[ADD_5]]
|
||||
; CHECK-NEXT: [[RES:%.*]] = call i32 @foobar(i32 [[TMP1]])
|
||||
; CHECK-NEXT: [[TMP0:%.*]] = load i32, i32* getelementptr inbounds ([32 x i32], [32 x i32]* @arr_i32, i64 0, i64 0), align 16
|
||||
; CHECK-NEXT: [[TMP1:%.*]] = load i32, i32* getelementptr inbounds ([32 x i32], [32 x i32]* @arr_i32, i64 0, i64 1), align 4
|
||||
; CHECK-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP1]], [[TMP0]]
|
||||
; CHECK-NEXT: [[TMP2:%.*]] = load i32, i32* getelementptr inbounds ([32 x i32], [32 x i32]* @arr_i32, i64 0, i64 2), align 8
|
||||
; CHECK-NEXT: [[ADD_1:%.*]] = add nsw i32 [[TMP2]], [[ADD]]
|
||||
; CHECK-NEXT: [[TMP3:%.*]] = load i32, i32* getelementptr inbounds ([32 x i32], [32 x i32]* @arr_i32, i64 0, i64 3), align 4
|
||||
; CHECK-NEXT: [[ADD_2:%.*]] = add nsw i32 [[TMP3]], [[ADD_1]]
|
||||
; CHECK-NEXT: [[TMP4:%.*]] = load i32, i32* getelementptr inbounds ([32 x i32], [32 x i32]* @arr_i32, i64 0, i64 4), align 16
|
||||
; CHECK-NEXT: [[ADD_3:%.*]] = add nsw i32 [[TMP4]], [[ADD_2]]
|
||||
; CHECK-NEXT: [[TMP5:%.*]] = load i32, i32* getelementptr inbounds ([32 x i32], [32 x i32]* @arr_i32, i64 0, i64 5), align 4
|
||||
; CHECK-NEXT: [[ADD_4:%.*]] = add nsw i32 [[TMP5]], [[ADD_3]]
|
||||
; CHECK-NEXT: [[TMP6:%.*]] = load i32, i32* getelementptr inbounds ([32 x i32], [32 x i32]* @arr_i32, i64 0, i64 6), align 8
|
||||
; CHECK-NEXT: [[ADD_5:%.*]] = add nsw i32 [[TMP6]], [[ADD_4]]
|
||||
; CHECK-NEXT: [[TMP7:%.*]] = load i32, i32* getelementptr inbounds ([32 x i32], [32 x i32]* @arr_i32, i64 0, i64 7), align 4
|
||||
; CHECK-NEXT: [[ADD_6:%.*]] = add nsw i32 [[TMP7]], [[ADD_5]]
|
||||
; CHECK-NEXT: [[RES:%.*]] = call i32 @foobar(i32 [[ADD_6]])
|
||||
; CHECK-NEXT: ret void
|
||||
;
|
||||
entry:
|
||||
|
@ -858,22 +858,22 @@ entry:
|
|||
define void @i32_red_invoke(i32 %val) personality i32 (...)* @__gxx_personality_v0 {
|
||||
; CHECK-LABEL: @i32_red_invoke(
|
||||
; CHECK-NEXT: entry:
|
||||
; CHECK-NEXT: [[TMP0:%.*]] = load <8 x i32>, <8 x i32>* bitcast ([32 x i32]* @arr_i32 to <8 x i32>*), align 16
|
||||
; CHECK-NEXT: [[ADD:%.*]] = add nsw i32 undef, undef
|
||||
; CHECK-NEXT: [[ADD_1:%.*]] = add nsw i32 undef, [[ADD]]
|
||||
; CHECK-NEXT: [[ADD_2:%.*]] = add nsw i32 undef, [[ADD_1]]
|
||||
; CHECK-NEXT: [[ADD_3:%.*]] = add nsw i32 undef, [[ADD_2]]
|
||||
; CHECK-NEXT: [[ADD_4:%.*]] = add nsw i32 undef, [[ADD_3]]
|
||||
; CHECK-NEXT: [[ADD_5:%.*]] = add nsw i32 undef, [[ADD_4]]
|
||||
; CHECK-NEXT: [[RDX_SHUF:%.*]] = shufflevector <8 x i32> [[TMP0]], <8 x i32> undef, <8 x i32> <i32 4, i32 5, i32 6, i32 7, i32 undef, i32 undef, i32 undef, i32 undef>
|
||||
; CHECK-NEXT: [[BIN_RDX:%.*]] = add nsw <8 x i32> [[TMP0]], [[RDX_SHUF]]
|
||||
; CHECK-NEXT: [[RDX_SHUF1:%.*]] = shufflevector <8 x i32> [[BIN_RDX]], <8 x i32> undef, <8 x i32> <i32 2, i32 3, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
|
||||
; CHECK-NEXT: [[BIN_RDX2:%.*]] = add nsw <8 x i32> [[BIN_RDX]], [[RDX_SHUF1]]
|
||||
; CHECK-NEXT: [[RDX_SHUF3:%.*]] = shufflevector <8 x i32> [[BIN_RDX2]], <8 x i32> undef, <8 x i32> <i32 1, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
|
||||
; CHECK-NEXT: [[BIN_RDX4:%.*]] = add nsw <8 x i32> [[BIN_RDX2]], [[RDX_SHUF3]]
|
||||
; CHECK-NEXT: [[TMP1:%.*]] = extractelement <8 x i32> [[BIN_RDX4]], i32 0
|
||||
; CHECK-NEXT: [[ADD_6:%.*]] = add nsw i32 undef, [[ADD_5]]
|
||||
; CHECK-NEXT: [[RES:%.*]] = invoke i32 @foobar(i32 [[TMP1]])
|
||||
; CHECK-NEXT: [[TMP0:%.*]] = load i32, i32* getelementptr inbounds ([32 x i32], [32 x i32]* @arr_i32, i64 0, i64 0), align 16
|
||||
; CHECK-NEXT: [[TMP1:%.*]] = load i32, i32* getelementptr inbounds ([32 x i32], [32 x i32]* @arr_i32, i64 0, i64 1), align 4
|
||||
; CHECK-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP1]], [[TMP0]]
|
||||
; CHECK-NEXT: [[TMP2:%.*]] = load i32, i32* getelementptr inbounds ([32 x i32], [32 x i32]* @arr_i32, i64 0, i64 2), align 8
|
||||
; CHECK-NEXT: [[ADD_1:%.*]] = add nsw i32 [[TMP2]], [[ADD]]
|
||||
; CHECK-NEXT: [[TMP3:%.*]] = load i32, i32* getelementptr inbounds ([32 x i32], [32 x i32]* @arr_i32, i64 0, i64 3), align 4
|
||||
; CHECK-NEXT: [[ADD_2:%.*]] = add nsw i32 [[TMP3]], [[ADD_1]]
|
||||
; CHECK-NEXT: [[TMP4:%.*]] = load i32, i32* getelementptr inbounds ([32 x i32], [32 x i32]* @arr_i32, i64 0, i64 4), align 16
|
||||
; CHECK-NEXT: [[ADD_3:%.*]] = add nsw i32 [[TMP4]], [[ADD_2]]
|
||||
; CHECK-NEXT: [[TMP5:%.*]] = load i32, i32* getelementptr inbounds ([32 x i32], [32 x i32]* @arr_i32, i64 0, i64 5), align 4
|
||||
; CHECK-NEXT: [[ADD_4:%.*]] = add nsw i32 [[TMP5]], [[ADD_3]]
|
||||
; CHECK-NEXT: [[TMP6:%.*]] = load i32, i32* getelementptr inbounds ([32 x i32], [32 x i32]* @arr_i32, i64 0, i64 6), align 8
|
||||
; CHECK-NEXT: [[ADD_5:%.*]] = add nsw i32 [[TMP6]], [[ADD_4]]
|
||||
; CHECK-NEXT: [[TMP7:%.*]] = load i32, i32* getelementptr inbounds ([32 x i32], [32 x i32]* @arr_i32, i64 0, i64 7), align 4
|
||||
; CHECK-NEXT: [[ADD_6:%.*]] = add nsw i32 [[TMP7]], [[ADD_5]]
|
||||
; CHECK-NEXT: [[RES:%.*]] = invoke i32 @foobar(i32 [[ADD_6]])
|
||||
; CHECK-NEXT: to label [[NORMAL:%.*]] unwind label [[EXCEPTION:%.*]]
|
||||
; CHECK: exception:
|
||||
; CHECK-NEXT: [[CLEANUP:%.*]] = landingpad i8
|
||||
|
|
|
@ -303,30 +303,24 @@ define <4 x float> @simple_select_no_users(<4 x float> %a, <4 x float> %b, <4 x
|
|||
; CHECK-NEXT: [[B1:%.*]] = extractelement <4 x float> %b, i32 1
|
||||
; CHECK-NEXT: [[B2:%.*]] = extractelement <4 x float> %b, i32 2
|
||||
; CHECK-NEXT: [[B3:%.*]] = extractelement <4 x float> %b, i32 3
|
||||
; CHECK-NEXT: [[TMP1:%.*]] = insertelement <2 x i32> undef, i32 [[C0]], i32 0
|
||||
; CHECK-NEXT: [[TMP2:%.*]] = insertelement <2 x i32> [[TMP1]], i32 [[C1]], i32 1
|
||||
; CHECK-NEXT: [[CMP0:%.*]] = icmp ne i32 [[C0]], 0
|
||||
; CHECK-NEXT: [[CMP1:%.*]] = icmp ne i32 [[C1]], 0
|
||||
; CHECK-NEXT: [[TMP1:%.*]] = insertelement <2 x i32> undef, i32 [[C2]], i32 0
|
||||
; CHECK-NEXT: [[TMP2:%.*]] = insertelement <2 x i32> [[TMP1]], i32 [[C3]], i32 1
|
||||
; CHECK-NEXT: [[TMP3:%.*]] = icmp ne <2 x i32> [[TMP2]], zeroinitializer
|
||||
; CHECK-NEXT: [[TMP4:%.*]] = insertelement <2 x i32> undef, i32 [[C2]], i32 0
|
||||
; CHECK-NEXT: [[TMP5:%.*]] = insertelement <2 x i32> [[TMP4]], i32 [[C3]], i32 1
|
||||
; CHECK-NEXT: [[TMP6:%.*]] = icmp ne <2 x i32> [[TMP5]], zeroinitializer
|
||||
; CHECK-NEXT: [[TMP7:%.*]] = insertelement <2 x float> undef, float [[A0]], i32 0
|
||||
; CHECK-NEXT: [[TMP8:%.*]] = insertelement <2 x float> [[TMP7]], float [[A1]], i32 1
|
||||
; CHECK-NEXT: [[TMP9:%.*]] = insertelement <2 x float> undef, float [[B0]], i32 0
|
||||
; CHECK-NEXT: [[TMP10:%.*]] = insertelement <2 x float> [[TMP9]], float [[B1]], i32 1
|
||||
; CHECK-NEXT: [[TMP11:%.*]] = select <2 x i1> [[TMP3]], <2 x float> [[TMP8]], <2 x float> [[TMP10]]
|
||||
; CHECK-NEXT: [[TMP12:%.*]] = insertelement <2 x float> undef, float [[A2]], i32 0
|
||||
; CHECK-NEXT: [[TMP13:%.*]] = insertelement <2 x float> [[TMP12]], float [[A3]], i32 1
|
||||
; CHECK-NEXT: [[TMP14:%.*]] = insertelement <2 x float> undef, float [[B2]], i32 0
|
||||
; CHECK-NEXT: [[TMP15:%.*]] = insertelement <2 x float> [[TMP14]], float [[B3]], i32 1
|
||||
; CHECK-NEXT: [[TMP16:%.*]] = select <2 x i1> [[TMP6]], <2 x float> [[TMP13]], <2 x float> [[TMP15]]
|
||||
; CHECK-NEXT: [[TMP17:%.*]] = extractelement <2 x float> [[TMP11]], i32 0
|
||||
; CHECK-NEXT: [[RA:%.*]] = insertelement <4 x float> undef, float [[TMP17]], i32 0
|
||||
; CHECK-NEXT: [[TMP18:%.*]] = extractelement <2 x float> [[TMP11]], i32 1
|
||||
; CHECK-NEXT: [[RB:%.*]] = insertelement <4 x float> [[RA]], float [[TMP18]], i32 1
|
||||
; CHECK-NEXT: [[TMP19:%.*]] = extractelement <2 x float> [[TMP16]], i32 0
|
||||
; CHECK-NEXT: [[RC:%.*]] = insertelement <4 x float> undef, float [[TMP19]], i32 2
|
||||
; CHECK-NEXT: [[TMP20:%.*]] = extractelement <2 x float> [[TMP16]], i32 1
|
||||
; CHECK-NEXT: [[RD:%.*]] = insertelement <4 x float> [[RC]], float [[TMP20]], i32 3
|
||||
; CHECK-NEXT: [[S0:%.*]] = select i1 [[CMP0]], float [[A0]], float [[B0]]
|
||||
; CHECK-NEXT: [[S1:%.*]] = select i1 [[CMP1]], float [[A1]], float [[B1]]
|
||||
; CHECK-NEXT: [[TMP4:%.*]] = insertelement <2 x float> undef, float [[A2]], i32 0
|
||||
; CHECK-NEXT: [[TMP5:%.*]] = insertelement <2 x float> [[TMP4]], float [[A3]], i32 1
|
||||
; CHECK-NEXT: [[TMP6:%.*]] = insertelement <2 x float> undef, float [[B2]], i32 0
|
||||
; CHECK-NEXT: [[TMP7:%.*]] = insertelement <2 x float> [[TMP6]], float [[B3]], i32 1
|
||||
; CHECK-NEXT: [[TMP8:%.*]] = select <2 x i1> [[TMP3]], <2 x float> [[TMP5]], <2 x float> [[TMP7]]
|
||||
; CHECK-NEXT: [[RA:%.*]] = insertelement <4 x float> undef, float [[S0]], i32 0
|
||||
; CHECK-NEXT: [[RB:%.*]] = insertelement <4 x float> [[RA]], float [[S1]], i32 1
|
||||
; CHECK-NEXT: [[TMP9:%.*]] = extractelement <2 x float> [[TMP8]], i32 0
|
||||
; CHECK-NEXT: [[RC:%.*]] = insertelement <4 x float> undef, float [[TMP9]], i32 2
|
||||
; CHECK-NEXT: [[TMP10:%.*]] = extractelement <2 x float> [[TMP8]], i32 1
|
||||
; CHECK-NEXT: [[RD:%.*]] = insertelement <4 x float> [[RC]], float [[TMP10]], i32 3
|
||||
; CHECK-NEXT: ret <4 x float> [[RD]]
|
||||
;
|
||||
; ZEROTHRESH-LABEL: @simple_select_no_users(
|
||||
|
|
Loading…
Reference in New Issue