forked from OSchip/llvm-project
Revert "[SLP]Alternate vectorization for cmp instructions."
This reverts commit afaaecc88c
.
Crashes when compiling SciPy, test case https://reviews.llvm.org/P8276
This commit is contained in:
parent
762f0b5463
commit
5281f0dab2
|
@ -471,36 +471,17 @@ static bool isValidForAlternation(unsigned Opcode) {
|
|||
return true;
|
||||
}
|
||||
|
||||
static InstructionsState getSameOpcode(ArrayRef<Value *> VL,
|
||||
unsigned BaseIndex = 0);
|
||||
|
||||
/// Checks if the provided operands of 2 cmp instructions are compatible, i.e.
|
||||
/// compatible instructions or constants, or just some other regular values.
|
||||
static bool areCompatibleCmpOps(Value *BaseOp0, Value *BaseOp1, Value *Op0,
|
||||
Value *Op1) {
|
||||
return (isConstant(BaseOp0) && isConstant(Op0)) ||
|
||||
(isConstant(BaseOp1) && isConstant(Op1)) ||
|
||||
(!isa<Instruction>(BaseOp0) && !isa<Instruction>(Op0) &&
|
||||
!isa<Instruction>(BaseOp1) && !isa<Instruction>(Op1)) ||
|
||||
getSameOpcode({BaseOp0, Op0}).getOpcode() ||
|
||||
getSameOpcode({BaseOp1, Op1}).getOpcode();
|
||||
}
|
||||
|
||||
/// \returns analysis of the Instructions in \p VL described in
|
||||
/// InstructionsState, the Opcode that we suppose the whole list
|
||||
/// could be vectorized even if its structure is diverse.
|
||||
static InstructionsState getSameOpcode(ArrayRef<Value *> VL,
|
||||
unsigned BaseIndex) {
|
||||
unsigned BaseIndex = 0) {
|
||||
// Make sure these are all Instructions.
|
||||
if (llvm::any_of(VL, [](Value *V) { return !isa<Instruction>(V); }))
|
||||
return InstructionsState(VL[BaseIndex], nullptr, nullptr);
|
||||
|
||||
bool IsCastOp = isa<CastInst>(VL[BaseIndex]);
|
||||
bool IsBinOp = isa<BinaryOperator>(VL[BaseIndex]);
|
||||
bool IsCmpOp = isa<CmpInst>(VL[BaseIndex]);
|
||||
CmpInst::Predicate BasePred =
|
||||
IsCmpOp ? cast<CmpInst>(VL[BaseIndex])->getPredicate()
|
||||
: CmpInst::BAD_ICMP_PREDICATE;
|
||||
unsigned Opcode = cast<Instruction>(VL[BaseIndex])->getOpcode();
|
||||
unsigned AltOpcode = Opcode;
|
||||
unsigned AltIndex = BaseIndex;
|
||||
|
@ -533,47 +514,6 @@ static InstructionsState getSameOpcode(ArrayRef<Value *> VL,
|
|||
continue;
|
||||
}
|
||||
}
|
||||
} else if (IsCmpOp && isa<CmpInst>(VL[Cnt])) {
|
||||
auto *BaseInst = cast<Instruction>(VL[BaseIndex]);
|
||||
auto *Inst = cast<Instruction>(VL[Cnt]);
|
||||
Type *Ty0 = BaseInst->getOperand(0)->getType();
|
||||
Type *Ty1 = Inst->getOperand(0)->getType();
|
||||
if (Ty0 == Ty1) {
|
||||
Value *BaseOp0 = BaseInst->getOperand(0);
|
||||
Value *BaseOp1 = BaseInst->getOperand(1);
|
||||
Value *Op0 = Inst->getOperand(0);
|
||||
Value *Op1 = Inst->getOperand(1);
|
||||
CmpInst::Predicate CurrentPred =
|
||||
cast<CmpInst>(VL[Cnt])->getPredicate();
|
||||
// Check for compatible operands. If the corresponding operands are not
|
||||
// compatible - need to perform alternate vectorization.
|
||||
if (InstOpcode == Opcode) {
|
||||
if (BasePred == CurrentPred &&
|
||||
areCompatibleCmpOps(BaseOp0, BaseOp1, Op0, Op1))
|
||||
continue;
|
||||
if (BasePred == CmpInst::getSwappedPredicate(CurrentPred) &&
|
||||
areCompatibleCmpOps(BaseOp0, BaseOp1, Op1, Op0))
|
||||
continue;
|
||||
auto *AltInst = cast<CmpInst>(VL[AltIndex]);
|
||||
CmpInst::Predicate AltPred = AltInst->getPredicate();
|
||||
Value *AltOp0 = AltInst->getOperand(0);
|
||||
Value *AltOp1 = AltInst->getOperand(1);
|
||||
// Check if operands are compatible with alternate operands.
|
||||
if (AltPred == CurrentPred &&
|
||||
areCompatibleCmpOps(AltOp0, AltOp1, Op0, Op1))
|
||||
continue;
|
||||
if (AltPred == CmpInst::getSwappedPredicate(CurrentPred) &&
|
||||
areCompatibleCmpOps(AltOp0, AltOp1, Op1, Op0))
|
||||
continue;
|
||||
}
|
||||
if (BaseIndex == AltIndex) {
|
||||
assert(isValidForAlternation(Opcode) &&
|
||||
isValidForAlternation(InstOpcode) &&
|
||||
"Cast isn't safe for alternation, logic needs to be updated!");
|
||||
AltIndex = Cnt;
|
||||
continue;
|
||||
}
|
||||
}
|
||||
} else if (InstOpcode == Opcode || InstOpcode == AltOpcode)
|
||||
continue;
|
||||
return InstructionsState(VL[BaseIndex], nullptr, nullptr);
|
||||
|
@ -4414,41 +4354,9 @@ void BoUpSLP::buildTree_rec(ArrayRef<Value *> VL, unsigned Depth,
|
|||
LLVM_DEBUG(dbgs() << "SLP: added a ShuffleVector op.\n");
|
||||
|
||||
// Reorder operands if reordering would enable vectorization.
|
||||
auto *CI = dyn_cast<CmpInst>(VL0);
|
||||
if (isa<BinaryOperator>(VL0) || CI) {
|
||||
if (isa<BinaryOperator>(VL0)) {
|
||||
ValueList Left, Right;
|
||||
if (!CI || all_of(VL, [](Value *V) {
|
||||
return cast<CmpInst>(V)->isCommutative();
|
||||
})) {
|
||||
reorderInputsAccordingToOpcode(VL, Left, Right, *DL, *SE, *this);
|
||||
} else {
|
||||
CmpInst::Predicate P0 = CI->getPredicate();
|
||||
CmpInst::Predicate AltP0 = cast<CmpInst>(S.AltOp)->getPredicate();
|
||||
CmpInst::Predicate AltP0Swapped = CmpInst::getSwappedPredicate(AltP0);
|
||||
Value *BaseOp0 = VL0->getOperand(0);
|
||||
Value *BaseOp1 = VL0->getOperand(1);
|
||||
// Collect operands - commute if it uses the swapped predicate or
|
||||
// alternate operation.
|
||||
for (Value *V : VL) {
|
||||
auto *Cmp = cast<CmpInst>(V);
|
||||
Value *LHS = Cmp->getOperand(0);
|
||||
Value *RHS = Cmp->getOperand(1);
|
||||
CmpInst::Predicate CurrentPred = CI->getPredicate();
|
||||
CmpInst::Predicate CurrentPredSwapped =
|
||||
CmpInst::getSwappedPredicate(CurrentPred);
|
||||
if (P0 == AltP0 || P0 == AltP0Swapped) {
|
||||
if ((P0 == CurrentPred &&
|
||||
!areCompatibleCmpOps(BaseOp0, BaseOp1, LHS, RHS)) ||
|
||||
(P0 == CurrentPredSwapped &&
|
||||
!areCompatibleCmpOps(BaseOp0, BaseOp1, RHS, LHS)))
|
||||
std::swap(LHS, RHS);
|
||||
} else if (!areCompatibleCmpOps(BaseOp0, BaseOp1, LHS, RHS)) {
|
||||
std::swap(LHS, RHS);
|
||||
}
|
||||
Left.push_back(LHS);
|
||||
Right.push_back(RHS);
|
||||
}
|
||||
}
|
||||
reorderInputsAccordingToOpcode(VL, Left, Right, *DL, *SE, *this);
|
||||
TE->setOperand(0, Left);
|
||||
TE->setOperand(1, Right);
|
||||
buildTree_rec(Left, Depth + 1, {TE, 0});
|
||||
|
@ -5380,8 +5288,7 @@ InstructionCost BoUpSLP::getEntryCost(const TreeEntry *E,
|
|||
((Instruction::isBinaryOp(E->getOpcode()) &&
|
||||
Instruction::isBinaryOp(E->getAltOpcode())) ||
|
||||
(Instruction::isCast(E->getOpcode()) &&
|
||||
Instruction::isCast(E->getAltOpcode())) ||
|
||||
(isa<CmpInst>(VL0) && isa<CmpInst>(E->getAltOp()))) &&
|
||||
Instruction::isCast(E->getAltOpcode()))) &&
|
||||
"Invalid Shuffle Vector Operand");
|
||||
InstructionCost ScalarCost = 0;
|
||||
if (NeedToShuffleReuses) {
|
||||
|
@ -5429,14 +5336,6 @@ InstructionCost BoUpSLP::getEntryCost(const TreeEntry *E,
|
|||
VecCost = TTI->getArithmeticInstrCost(E->getOpcode(), VecTy, CostKind);
|
||||
VecCost += TTI->getArithmeticInstrCost(E->getAltOpcode(), VecTy,
|
||||
CostKind);
|
||||
} else if (auto *CI0 = dyn_cast<CmpInst>(VL0)) {
|
||||
VecCost = TTI->getCmpSelInstrCost(E->getOpcode(), ScalarTy,
|
||||
Builder.getInt1Ty(),
|
||||
CI0->getPredicate(), CostKind, VL0);
|
||||
VecCost += TTI->getCmpSelInstrCost(
|
||||
E->getOpcode(), ScalarTy, Builder.getInt1Ty(),
|
||||
cast<CmpInst>(E->getAltOp())->getPredicate(), CostKind,
|
||||
E->getAltOp());
|
||||
} else {
|
||||
Type *Src0SclTy = E->getMainOp()->getOperand(0)->getType();
|
||||
Type *Src1SclTy = E->getAltOp()->getOperand(0)->getType();
|
||||
|
@ -5453,28 +5352,6 @@ InstructionCost BoUpSLP::getEntryCost(const TreeEntry *E,
|
|||
E->Scalars, E->ReorderIndices, E->ReuseShuffleIndices,
|
||||
[E](Instruction *I) {
|
||||
assert(E->isOpcodeOrAlt(I) && "Unexpected main/alternate opcode");
|
||||
if (auto *CI0 = dyn_cast<CmpInst>(E->getMainOp())) {
|
||||
auto *AltCI0 = cast<CmpInst>(E->getAltOp());
|
||||
auto *CI = cast<CmpInst>(I);
|
||||
CmpInst::Predicate P0 = CI0->getPredicate();
|
||||
CmpInst::Predicate AltP0 = AltCI0->getPredicate();
|
||||
CmpInst::Predicate AltP0Swapped =
|
||||
CmpInst::getSwappedPredicate(AltP0);
|
||||
CmpInst::Predicate CurrentPred = CI->getPredicate();
|
||||
CmpInst::Predicate CurrentPredSwapped =
|
||||
CmpInst::getSwappedPredicate(CurrentPred);
|
||||
if (P0 == AltP0 || P0 == AltP0Swapped) {
|
||||
unsigned Idx =
|
||||
std::distance(E->Scalars.begin(), find(E->Scalars, I));
|
||||
// Alternate cmps have same/swapped predicate as main cmps but
|
||||
// different order of compatible operands.
|
||||
ArrayRef<Value *> VLOp0 = E->getOperand(0);
|
||||
return (P0 == CurrentPred && CI->getOperand(0) != VLOp0[Idx]) ||
|
||||
(P0 == CurrentPredSwapped &&
|
||||
CI->getOperand(1) != VLOp0[Idx]);
|
||||
}
|
||||
return CurrentPred != P0 && CurrentPredSwapped != P0;
|
||||
}
|
||||
return I->getOpcode() == E->getAltOpcode();
|
||||
},
|
||||
Mask);
|
||||
|
@ -6957,12 +6834,11 @@ Value *BoUpSLP::vectorizeTree(TreeEntry *E) {
|
|||
((Instruction::isBinaryOp(E->getOpcode()) &&
|
||||
Instruction::isBinaryOp(E->getAltOpcode())) ||
|
||||
(Instruction::isCast(E->getOpcode()) &&
|
||||
Instruction::isCast(E->getAltOpcode())) ||
|
||||
(isa<CmpInst>(VL0) && isa<CmpInst>(E->getAltOp()))) &&
|
||||
Instruction::isCast(E->getAltOpcode()))) &&
|
||||
"Invalid Shuffle Vector Operand");
|
||||
|
||||
Value *LHS = nullptr, *RHS = nullptr;
|
||||
if (Instruction::isBinaryOp(E->getOpcode()) || isa<CmpInst>(VL0)) {
|
||||
if (Instruction::isBinaryOp(E->getOpcode())) {
|
||||
setInsertPointAfterBundle(E);
|
||||
LHS = vectorizeTree(E->getOperand(0));
|
||||
RHS = vectorizeTree(E->getOperand(1));
|
||||
|
@ -6982,15 +6858,6 @@ Value *BoUpSLP::vectorizeTree(TreeEntry *E) {
|
|||
static_cast<Instruction::BinaryOps>(E->getOpcode()), LHS, RHS);
|
||||
V1 = Builder.CreateBinOp(
|
||||
static_cast<Instruction::BinaryOps>(E->getAltOpcode()), LHS, RHS);
|
||||
} else if (auto *CI0 = dyn_cast<CmpInst>(VL0)) {
|
||||
V0 = Builder.CreateCmp(CI0->getPredicate(), LHS, RHS);
|
||||
auto *AltCI = cast<CmpInst>(E->getAltOp());
|
||||
CmpInst::Predicate AltPred = AltCI->getPredicate();
|
||||
unsigned AltIdx =
|
||||
std::distance(E->Scalars.begin(), find(E->Scalars, AltCI));
|
||||
if (AltCI->getOperand(0) != E->getOperand(0)[AltIdx])
|
||||
AltPred = CmpInst::getSwappedPredicate(AltPred);
|
||||
V1 = Builder.CreateCmp(AltPred, LHS, RHS);
|
||||
} else {
|
||||
V0 = Builder.CreateCast(
|
||||
static_cast<Instruction::CastOps>(E->getOpcode()), LHS, VecTy);
|
||||
|
@ -7015,28 +6882,6 @@ Value *BoUpSLP::vectorizeTree(TreeEntry *E) {
|
|||
E->Scalars, E->ReorderIndices, E->ReuseShuffleIndices,
|
||||
[E](Instruction *I) {
|
||||
assert(E->isOpcodeOrAlt(I) && "Unexpected main/alternate opcode");
|
||||
if (auto *CI0 = dyn_cast<CmpInst>(E->getMainOp())) {
|
||||
auto *AltCI0 = cast<CmpInst>(E->getAltOp());
|
||||
auto *CI = cast<CmpInst>(I);
|
||||
CmpInst::Predicate P0 = CI0->getPredicate();
|
||||
CmpInst::Predicate AltP0 = AltCI0->getPredicate();
|
||||
CmpInst::Predicate AltP0Swapped =
|
||||
CmpInst::getSwappedPredicate(AltP0);
|
||||
CmpInst::Predicate CurrentPred = CI->getPredicate();
|
||||
CmpInst::Predicate CurrentPredSwapped =
|
||||
CmpInst::getSwappedPredicate(CurrentPred);
|
||||
if (P0 == AltP0 || P0 == AltP0Swapped) {
|
||||
unsigned Idx =
|
||||
std::distance(E->Scalars.begin(), find(E->Scalars, I));
|
||||
// Alternate cmps have same/swapped predicate as main cmps but
|
||||
// different order of compatible operands.
|
||||
ArrayRef<Value *> VLOp0 = E->getOperand(0);
|
||||
return (P0 == CurrentPred && CI->getOperand(0) != VLOp0[Idx]) ||
|
||||
(P0 == CurrentPredSwapped &&
|
||||
CI->getOperand(1) != VLOp0[Idx]);
|
||||
}
|
||||
return CurrentPred != P0 && CurrentPredSwapped != P0;
|
||||
}
|
||||
return I->getOpcode() == E->getAltOpcode();
|
||||
},
|
||||
Mask, &OpScalars, &AltScalars);
|
||||
|
|
|
@ -90,17 +90,24 @@ return:
|
|||
define float @test_merge_anyof_v4sf(<4 x float> %t) {
|
||||
; CHECK-LABEL: @test_merge_anyof_v4sf(
|
||||
; CHECK-NEXT: entry:
|
||||
; CHECK-NEXT: [[SHUFFLE:%.*]] = shufflevector <4 x float> [[T:%.*]], <4 x float> poison, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 0, i32 1, i32 2, i32 3>
|
||||
; CHECK-NEXT: [[TMP0:%.*]] = fcmp olt <8 x float> [[SHUFFLE]], <float 0.000000e+00, float 0.000000e+00, float 0.000000e+00, float 0.000000e+00, float 1.000000e+00, float 1.000000e+00, float 1.000000e+00, float 1.000000e+00>
|
||||
; CHECK-NEXT: [[TMP1:%.*]] = fcmp ogt <8 x float> [[SHUFFLE]], <float 0.000000e+00, float 0.000000e+00, float 0.000000e+00, float 0.000000e+00, float 1.000000e+00, float 1.000000e+00, float 1.000000e+00, float 1.000000e+00>
|
||||
; CHECK-NEXT: [[TMP2:%.*]] = shufflevector <8 x i1> [[TMP0]], <8 x i1> [[TMP1]], <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 12, i32 13, i32 14, i32 15>
|
||||
; CHECK-NEXT: [[TMP3:%.*]] = freeze <8 x i1> [[TMP2]]
|
||||
; CHECK-NEXT: [[TMP4:%.*]] = bitcast <8 x i1> [[TMP3]] to i8
|
||||
; CHECK-NEXT: [[DOTNOT:%.*]] = icmp eq i8 [[TMP4]], 0
|
||||
; CHECK-NEXT: [[SHIFT:%.*]] = shufflevector <4 x float> [[T]], <4 x float> poison, <4 x i32> <i32 1, i32 undef, i32 undef, i32 undef>
|
||||
; CHECK-NEXT: [[TMP5:%.*]] = fadd <4 x float> [[SHIFT]], [[T]]
|
||||
; CHECK-NEXT: [[ADD:%.*]] = extractelement <4 x float> [[TMP5]], i64 0
|
||||
; CHECK-NEXT: [[RETVAL_0:%.*]] = select i1 [[DOTNOT]], float [[ADD]], float 0.000000e+00
|
||||
; CHECK-NEXT: [[TMP0:%.*]] = extractelement <4 x float> [[T:%.*]], i64 3
|
||||
; CHECK-NEXT: [[TMP1:%.*]] = extractelement <4 x float> [[T]], i64 2
|
||||
; CHECK-NEXT: [[TMP2:%.*]] = extractelement <4 x float> [[T]], i64 1
|
||||
; CHECK-NEXT: [[TMP3:%.*]] = extractelement <4 x float> [[T]], i64 0
|
||||
; CHECK-NEXT: [[T_FR:%.*]] = freeze <4 x float> [[T]]
|
||||
; CHECK-NEXT: [[TMP4:%.*]] = fcmp olt <4 x float> [[T_FR]], zeroinitializer
|
||||
; CHECK-NEXT: [[TMP5:%.*]] = bitcast <4 x i1> [[TMP4]] to i4
|
||||
; CHECK-NEXT: [[TMP6:%.*]] = icmp ne i4 [[TMP5]], 0
|
||||
; CHECK-NEXT: [[CMP19:%.*]] = fcmp ogt float [[TMP3]], 1.000000e+00
|
||||
; CHECK-NEXT: [[OR_COND3:%.*]] = select i1 [[TMP6]], i1 true, i1 [[CMP19]]
|
||||
; CHECK-NEXT: [[CMP24:%.*]] = fcmp ogt float [[TMP2]], 1.000000e+00
|
||||
; CHECK-NEXT: [[OR_COND4:%.*]] = select i1 [[OR_COND3]], i1 true, i1 [[CMP24]]
|
||||
; CHECK-NEXT: [[CMP29:%.*]] = fcmp ogt float [[TMP1]], 1.000000e+00
|
||||
; CHECK-NEXT: [[OR_COND5:%.*]] = select i1 [[OR_COND4]], i1 true, i1 [[CMP29]]
|
||||
; CHECK-NEXT: [[CMP34:%.*]] = fcmp ogt float [[TMP0]], 1.000000e+00
|
||||
; CHECK-NEXT: [[OR_COND6:%.*]] = select i1 [[OR_COND5]], i1 true, i1 [[CMP34]]
|
||||
; CHECK-NEXT: [[ADD:%.*]] = fadd float [[TMP3]], [[TMP2]]
|
||||
; CHECK-NEXT: [[RETVAL_0:%.*]] = select i1 [[OR_COND6]], float 0.000000e+00, float [[ADD]]
|
||||
; CHECK-NEXT: ret float [[RETVAL_0]]
|
||||
;
|
||||
entry:
|
||||
|
@ -413,18 +420,24 @@ return:
|
|||
define float @test_merge_anyof_v4si(<4 x i32> %t) {
|
||||
; CHECK-LABEL: @test_merge_anyof_v4si(
|
||||
; CHECK-NEXT: entry:
|
||||
; CHECK-NEXT: [[SHUFFLE:%.*]] = shufflevector <4 x i32> [[T:%.*]], <4 x i32> poison, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 0, i32 1, i32 2, i32 3>
|
||||
; CHECK-NEXT: [[TMP0:%.*]] = icmp slt <8 x i32> [[SHUFFLE]], <i32 1, i32 1, i32 1, i32 1, i32 255, i32 255, i32 255, i32 255>
|
||||
; CHECK-NEXT: [[TMP1:%.*]] = icmp sgt <8 x i32> [[SHUFFLE]], <i32 1, i32 1, i32 1, i32 1, i32 255, i32 255, i32 255, i32 255>
|
||||
; CHECK-NEXT: [[TMP2:%.*]] = shufflevector <8 x i1> [[TMP0]], <8 x i1> [[TMP1]], <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 12, i32 13, i32 14, i32 15>
|
||||
; CHECK-NEXT: [[TMP3:%.*]] = freeze <8 x i1> [[TMP2]]
|
||||
; CHECK-NEXT: [[TMP4:%.*]] = bitcast <8 x i1> [[TMP3]] to i8
|
||||
; CHECK-NEXT: [[DOTNOT:%.*]] = icmp eq i8 [[TMP4]], 0
|
||||
; CHECK-NEXT: [[SHIFT:%.*]] = shufflevector <4 x i32> [[T]], <4 x i32> poison, <4 x i32> <i32 1, i32 undef, i32 undef, i32 undef>
|
||||
; CHECK-NEXT: [[TMP5:%.*]] = add nsw <4 x i32> [[SHIFT]], [[T]]
|
||||
; CHECK-NEXT: [[ADD:%.*]] = extractelement <4 x i32> [[TMP5]], i64 0
|
||||
; CHECK-NEXT: [[T_FR:%.*]] = freeze <4 x i32> [[T:%.*]]
|
||||
; CHECK-NEXT: [[TMP0:%.*]] = icmp slt <4 x i32> [[T_FR]], <i32 1, i32 1, i32 1, i32 1>
|
||||
; CHECK-NEXT: [[TMP1:%.*]] = bitcast <4 x i1> [[TMP0]] to i4
|
||||
; CHECK-NEXT: [[TMP2:%.*]] = icmp ne i4 [[TMP1]], 0
|
||||
; CHECK-NEXT: [[TMP3:%.*]] = icmp sgt <4 x i32> [[T_FR]], <i32 255, i32 255, i32 255, i32 255>
|
||||
; CHECK-NEXT: [[TMP4:%.*]] = extractelement <4 x i1> [[TMP3]], i64 0
|
||||
; CHECK-NEXT: [[OR_COND3:%.*]] = or i1 [[TMP2]], [[TMP4]]
|
||||
; CHECK-NEXT: [[TMP5:%.*]] = extractelement <4 x i1> [[TMP3]], i64 1
|
||||
; CHECK-NEXT: [[OR_COND4:%.*]] = or i1 [[OR_COND3]], [[TMP5]]
|
||||
; CHECK-NEXT: [[TMP6:%.*]] = extractelement <4 x i1> [[TMP3]], i64 2
|
||||
; CHECK-NEXT: [[OR_COND5:%.*]] = or i1 [[OR_COND4]], [[TMP6]]
|
||||
; CHECK-NEXT: [[TMP7:%.*]] = extractelement <4 x i1> [[TMP3]], i64 3
|
||||
; CHECK-NEXT: [[OR_COND6:%.*]] = or i1 [[OR_COND5]], [[TMP7]]
|
||||
; CHECK-NEXT: [[SHIFT:%.*]] = shufflevector <4 x i32> [[T_FR]], <4 x i32> poison, <4 x i32> <i32 1, i32 undef, i32 undef, i32 undef>
|
||||
; CHECK-NEXT: [[TMP8:%.*]] = add nsw <4 x i32> [[SHIFT]], [[T_FR]]
|
||||
; CHECK-NEXT: [[ADD:%.*]] = extractelement <4 x i32> [[TMP8]], i64 0
|
||||
; CHECK-NEXT: [[CONV:%.*]] = sitofp i32 [[ADD]] to float
|
||||
; CHECK-NEXT: [[RETVAL_0:%.*]] = select i1 [[DOTNOT]], float [[CONV]], float 0.000000e+00
|
||||
; CHECK-NEXT: [[RETVAL_0:%.*]] = select i1 [[OR_COND6]], float 0.000000e+00, float [[CONV]]
|
||||
; CHECK-NEXT: ret float [[RETVAL_0]]
|
||||
;
|
||||
entry:
|
||||
|
|
|
@ -1,6 +1,6 @@
|
|||
; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
|
||||
; RUN: opt < %s -slp-vectorizer -instcombine -S -mtriple=x86_64--- -mattr=+sse2 | FileCheck %s
|
||||
; RUN: opt < %s -slp-vectorizer -instcombine -S -mtriple=x86_64--- -mattr=+avx | FileCheck %s
|
||||
; RUN: opt < %s -slp-vectorizer -instcombine -S -mtriple=x86_64--- -mattr=+sse2 | FileCheck %s --check-prefixes=CHECK,SSE
|
||||
; RUN: opt < %s -slp-vectorizer -instcombine -S -mtriple=x86_64--- -mattr=+avx | FileCheck %s --check-prefixes=CHECK,AVX
|
||||
|
||||
;
|
||||
; Check that we can commute operands based on the predicate.
|
||||
|
@ -235,14 +235,46 @@ define <4 x i32> @fcmp_ogt_olt_v4i32(<4 x float> %a, float* %b) {
|
|||
}
|
||||
|
||||
define <4 x i32> @fcmp_ord_uno_v4i32(<4 x float> %a, float* %b) {
|
||||
; CHECK-LABEL: @fcmp_ord_uno_v4i32(
|
||||
; CHECK-NEXT: [[TMP1:%.*]] = bitcast float* [[B:%.*]] to <4 x float>*
|
||||
; CHECK-NEXT: [[TMP2:%.*]] = load <4 x float>, <4 x float>* [[TMP1]], align 4
|
||||
; CHECK-NEXT: [[TMP3:%.*]] = fcmp ord <4 x float> [[TMP2]], [[A:%.*]]
|
||||
; CHECK-NEXT: [[TMP4:%.*]] = fcmp uno <4 x float> [[TMP2]], [[A]]
|
||||
; CHECK-NEXT: [[TMP5:%.*]] = shufflevector <4 x i1> [[TMP3]], <4 x i1> [[TMP4]], <4 x i32> <i32 0, i32 5, i32 6, i32 3>
|
||||
; CHECK-NEXT: [[R:%.*]] = sext <4 x i1> [[TMP5]] to <4 x i32>
|
||||
; CHECK-NEXT: ret <4 x i32> [[R]]
|
||||
; SSE-LABEL: @fcmp_ord_uno_v4i32(
|
||||
; SSE-NEXT: [[P1:%.*]] = getelementptr inbounds float, float* [[B:%.*]], i64 1
|
||||
; SSE-NEXT: [[P3:%.*]] = getelementptr inbounds float, float* [[B]], i64 3
|
||||
; SSE-NEXT: [[B0:%.*]] = load float, float* [[B]], align 4
|
||||
; SSE-NEXT: [[TMP1:%.*]] = bitcast float* [[P1]] to <2 x float>*
|
||||
; SSE-NEXT: [[TMP2:%.*]] = load <2 x float>, <2 x float>* [[TMP1]], align 4
|
||||
; SSE-NEXT: [[B3:%.*]] = load float, float* [[P3]], align 4
|
||||
; SSE-NEXT: [[TMP3:%.*]] = shufflevector <4 x float> [[A:%.*]], <4 x float> undef, <2 x i32> <i32 1, i32 2>
|
||||
; SSE-NEXT: [[TMP4:%.*]] = fcmp uno <2 x float> [[TMP2]], [[TMP3]]
|
||||
; SSE-NEXT: [[TMP5:%.*]] = shufflevector <4 x float> [[A]], <4 x float> undef, <2 x i32> <i32 3, i32 0>
|
||||
; SSE-NEXT: [[TMP6:%.*]] = insertelement <2 x float> poison, float [[B3]], i64 0
|
||||
; SSE-NEXT: [[TMP7:%.*]] = insertelement <2 x float> [[TMP6]], float [[B0]], i64 1
|
||||
; SSE-NEXT: [[TMP8:%.*]] = fcmp ord <2 x float> [[TMP5]], [[TMP7]]
|
||||
; SSE-NEXT: [[TMP9:%.*]] = shufflevector <2 x i1> [[TMP8]], <2 x i1> poison, <4 x i32> <i32 0, i32 undef, i32 undef, i32 undef>
|
||||
; SSE-NEXT: [[D0:%.*]] = shufflevector <2 x i1> [[TMP8]], <2 x i1> undef, <4 x i32> <i32 1, i32 undef, i32 undef, i32 undef>
|
||||
; SSE-NEXT: [[TMP10:%.*]] = shufflevector <2 x i1> [[TMP4]], <2 x i1> poison, <4 x i32> <i32 0, i32 1, i32 undef, i32 undef>
|
||||
; SSE-NEXT: [[D21:%.*]] = shufflevector <4 x i1> [[D0]], <4 x i1> [[TMP10]], <4 x i32> <i32 0, i32 4, i32 5, i32 undef>
|
||||
; SSE-NEXT: [[D3:%.*]] = shufflevector <4 x i1> [[D21]], <4 x i1> [[TMP9]], <4 x i32> <i32 0, i32 1, i32 2, i32 4>
|
||||
; SSE-NEXT: [[R:%.*]] = sext <4 x i1> [[D3]] to <4 x i32>
|
||||
; SSE-NEXT: ret <4 x i32> [[R]]
|
||||
;
|
||||
; AVX-LABEL: @fcmp_ord_uno_v4i32(
|
||||
; AVX-NEXT: [[A0:%.*]] = extractelement <4 x float> [[A:%.*]], i64 0
|
||||
; AVX-NEXT: [[A3:%.*]] = extractelement <4 x float> [[A]], i64 3
|
||||
; AVX-NEXT: [[P1:%.*]] = getelementptr inbounds float, float* [[B:%.*]], i64 1
|
||||
; AVX-NEXT: [[P3:%.*]] = getelementptr inbounds float, float* [[B]], i64 3
|
||||
; AVX-NEXT: [[B0:%.*]] = load float, float* [[B]], align 4
|
||||
; AVX-NEXT: [[TMP1:%.*]] = bitcast float* [[P1]] to <2 x float>*
|
||||
; AVX-NEXT: [[TMP2:%.*]] = load <2 x float>, <2 x float>* [[TMP1]], align 4
|
||||
; AVX-NEXT: [[B3:%.*]] = load float, float* [[P3]], align 4
|
||||
; AVX-NEXT: [[C0:%.*]] = fcmp ord float [[A0]], [[B0]]
|
||||
; AVX-NEXT: [[TMP3:%.*]] = shufflevector <4 x float> [[A]], <4 x float> undef, <2 x i32> <i32 1, i32 2>
|
||||
; AVX-NEXT: [[TMP4:%.*]] = fcmp uno <2 x float> [[TMP2]], [[TMP3]]
|
||||
; AVX-NEXT: [[C3:%.*]] = fcmp ord float [[A3]], [[B3]]
|
||||
; AVX-NEXT: [[D0:%.*]] = insertelement <4 x i1> poison, i1 [[C0]], i64 0
|
||||
; AVX-NEXT: [[TMP5:%.*]] = shufflevector <2 x i1> [[TMP4]], <2 x i1> poison, <4 x i32> <i32 0, i32 1, i32 undef, i32 undef>
|
||||
; AVX-NEXT: [[D21:%.*]] = shufflevector <4 x i1> [[D0]], <4 x i1> [[TMP5]], <4 x i32> <i32 0, i32 4, i32 5, i32 undef>
|
||||
; AVX-NEXT: [[D3:%.*]] = insertelement <4 x i1> [[D21]], i1 [[C3]], i64 3
|
||||
; AVX-NEXT: [[R:%.*]] = sext <4 x i1> [[D3]] to <4 x i32>
|
||||
; AVX-NEXT: ret <4 x i32> [[R]]
|
||||
;
|
||||
%a0 = extractelement <4 x float> %a, i32 0
|
||||
%a1 = extractelement <4 x float> %a, i32 1
|
||||
|
|
|
@ -1,6 +1,6 @@
|
|||
; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
|
||||
; RUN: opt < %s -slp-vectorizer -instcombine -S -mtriple=x86_64--- -mattr=+sse2 | FileCheck %s
|
||||
; RUN: opt < %s -slp-vectorizer -instcombine -S -mtriple=x86_64--- -mattr=+avx | FileCheck %s
|
||||
; RUN: opt < %s -slp-vectorizer -instcombine -S -mtriple=x86_64--- -mattr=+sse2 | FileCheck %s --check-prefixes=CHECK,SSE
|
||||
; RUN: opt < %s -slp-vectorizer -instcombine -S -mtriple=x86_64--- -mattr=+avx | FileCheck %s --check-prefixes=CHECK,AVX
|
||||
|
||||
;
|
||||
; Check that we can commute operands based on the predicate.
|
||||
|
@ -235,14 +235,46 @@ define <4 x i32> @fcmp_ogt_olt_v4i32(<4 x float> %a, float* %b) {
|
|||
}
|
||||
|
||||
define <4 x i32> @fcmp_ord_uno_v4i32(<4 x float> %a, float* %b) {
|
||||
; CHECK-LABEL: @fcmp_ord_uno_v4i32(
|
||||
; CHECK-NEXT: [[TMP1:%.*]] = bitcast float* [[B:%.*]] to <4 x float>*
|
||||
; CHECK-NEXT: [[TMP2:%.*]] = load <4 x float>, <4 x float>* [[TMP1]], align 4
|
||||
; CHECK-NEXT: [[TMP3:%.*]] = fcmp ord <4 x float> [[TMP2]], [[A:%.*]]
|
||||
; CHECK-NEXT: [[TMP4:%.*]] = fcmp uno <4 x float> [[TMP2]], [[A]]
|
||||
; CHECK-NEXT: [[TMP5:%.*]] = shufflevector <4 x i1> [[TMP3]], <4 x i1> [[TMP4]], <4 x i32> <i32 0, i32 5, i32 6, i32 3>
|
||||
; CHECK-NEXT: [[R:%.*]] = sext <4 x i1> [[TMP5]] to <4 x i32>
|
||||
; CHECK-NEXT: ret <4 x i32> [[R]]
|
||||
; SSE-LABEL: @fcmp_ord_uno_v4i32(
|
||||
; SSE-NEXT: [[P1:%.*]] = getelementptr inbounds float, float* [[B:%.*]], i64 1
|
||||
; SSE-NEXT: [[P3:%.*]] = getelementptr inbounds float, float* [[B]], i64 3
|
||||
; SSE-NEXT: [[B0:%.*]] = load float, float* [[B]], align 4
|
||||
; SSE-NEXT: [[TMP1:%.*]] = bitcast float* [[P1]] to <2 x float>*
|
||||
; SSE-NEXT: [[TMP2:%.*]] = load <2 x float>, <2 x float>* [[TMP1]], align 4
|
||||
; SSE-NEXT: [[B3:%.*]] = load float, float* [[P3]], align 4
|
||||
; SSE-NEXT: [[TMP3:%.*]] = shufflevector <4 x float> [[A:%.*]], <4 x float> undef, <2 x i32> <i32 1, i32 2>
|
||||
; SSE-NEXT: [[TMP4:%.*]] = fcmp uno <2 x float> [[TMP2]], [[TMP3]]
|
||||
; SSE-NEXT: [[TMP5:%.*]] = shufflevector <4 x float> [[A]], <4 x float> undef, <2 x i32> <i32 3, i32 0>
|
||||
; SSE-NEXT: [[TMP6:%.*]] = insertelement <2 x float> poison, float [[B3]], i64 0
|
||||
; SSE-NEXT: [[TMP7:%.*]] = insertelement <2 x float> [[TMP6]], float [[B0]], i64 1
|
||||
; SSE-NEXT: [[TMP8:%.*]] = fcmp ord <2 x float> [[TMP5]], [[TMP7]]
|
||||
; SSE-NEXT: [[TMP9:%.*]] = shufflevector <2 x i1> [[TMP8]], <2 x i1> poison, <4 x i32> <i32 0, i32 undef, i32 undef, i32 undef>
|
||||
; SSE-NEXT: [[D0:%.*]] = shufflevector <2 x i1> [[TMP8]], <2 x i1> undef, <4 x i32> <i32 1, i32 undef, i32 undef, i32 undef>
|
||||
; SSE-NEXT: [[TMP10:%.*]] = shufflevector <2 x i1> [[TMP4]], <2 x i1> poison, <4 x i32> <i32 0, i32 1, i32 undef, i32 undef>
|
||||
; SSE-NEXT: [[D21:%.*]] = shufflevector <4 x i1> [[D0]], <4 x i1> [[TMP10]], <4 x i32> <i32 0, i32 4, i32 5, i32 undef>
|
||||
; SSE-NEXT: [[D3:%.*]] = shufflevector <4 x i1> [[D21]], <4 x i1> [[TMP9]], <4 x i32> <i32 0, i32 1, i32 2, i32 4>
|
||||
; SSE-NEXT: [[R:%.*]] = sext <4 x i1> [[D3]] to <4 x i32>
|
||||
; SSE-NEXT: ret <4 x i32> [[R]]
|
||||
;
|
||||
; AVX-LABEL: @fcmp_ord_uno_v4i32(
|
||||
; AVX-NEXT: [[A0:%.*]] = extractelement <4 x float> [[A:%.*]], i64 0
|
||||
; AVX-NEXT: [[A3:%.*]] = extractelement <4 x float> [[A]], i64 3
|
||||
; AVX-NEXT: [[P1:%.*]] = getelementptr inbounds float, float* [[B:%.*]], i64 1
|
||||
; AVX-NEXT: [[P3:%.*]] = getelementptr inbounds float, float* [[B]], i64 3
|
||||
; AVX-NEXT: [[B0:%.*]] = load float, float* [[B]], align 4
|
||||
; AVX-NEXT: [[TMP1:%.*]] = bitcast float* [[P1]] to <2 x float>*
|
||||
; AVX-NEXT: [[TMP2:%.*]] = load <2 x float>, <2 x float>* [[TMP1]], align 4
|
||||
; AVX-NEXT: [[B3:%.*]] = load float, float* [[P3]], align 4
|
||||
; AVX-NEXT: [[C0:%.*]] = fcmp ord float [[A0]], [[B0]]
|
||||
; AVX-NEXT: [[TMP3:%.*]] = shufflevector <4 x float> [[A]], <4 x float> undef, <2 x i32> <i32 1, i32 2>
|
||||
; AVX-NEXT: [[TMP4:%.*]] = fcmp uno <2 x float> [[TMP2]], [[TMP3]]
|
||||
; AVX-NEXT: [[C3:%.*]] = fcmp ord float [[A3]], [[B3]]
|
||||
; AVX-NEXT: [[D0:%.*]] = insertelement <4 x i1> undef, i1 [[C0]], i64 0
|
||||
; AVX-NEXT: [[TMP5:%.*]] = shufflevector <2 x i1> [[TMP4]], <2 x i1> poison, <4 x i32> <i32 0, i32 1, i32 undef, i32 undef>
|
||||
; AVX-NEXT: [[D21:%.*]] = shufflevector <4 x i1> [[D0]], <4 x i1> [[TMP5]], <4 x i32> <i32 0, i32 4, i32 5, i32 undef>
|
||||
; AVX-NEXT: [[D3:%.*]] = insertelement <4 x i1> [[D21]], i1 [[C3]], i64 3
|
||||
; AVX-NEXT: [[R:%.*]] = sext <4 x i1> [[D3]] to <4 x i32>
|
||||
; AVX-NEXT: ret <4 x i32> [[R]]
|
||||
;
|
||||
%a0 = extractelement <4 x float> %a, i32 0
|
||||
%a1 = extractelement <4 x float> %a, i32 1
|
||||
|
|
|
@ -913,22 +913,18 @@ define i32 @maxi8_mutiple_uses(i32) {
|
|||
; THRESH-NEXT: [[TMP9:%.*]] = icmp sgt i32 [[TMP8]], [[TMP6]]
|
||||
; THRESH-NEXT: [[TMP10:%.*]] = select i1 [[TMP9]], i32 [[TMP8]], i32 [[TMP6]]
|
||||
; THRESH-NEXT: [[TMP11:%.*]] = insertelement <2 x i32> poison, i32 [[TMP10]], i32 0
|
||||
; THRESH-NEXT: [[TMP12:%.*]] = insertelement <2 x i32> [[TMP11]], i32 [[TMP4]], i32 1
|
||||
; THRESH-NEXT: [[TMP12:%.*]] = insertelement <2 x i32> [[TMP11]], i32 [[TMP3]], i32 1
|
||||
; THRESH-NEXT: [[TMP13:%.*]] = insertelement <2 x i32> poison, i32 [[TMP7]], i32 0
|
||||
; THRESH-NEXT: [[TMP14:%.*]] = insertelement <2 x i32> [[TMP13]], i32 [[TMP3]], i32 1
|
||||
; THRESH-NEXT: [[TMP14:%.*]] = insertelement <2 x i32> [[TMP13]], i32 [[TMP4]], i32 1
|
||||
; THRESH-NEXT: [[TMP15:%.*]] = icmp sgt <2 x i32> [[TMP12]], [[TMP14]]
|
||||
; THRESH-NEXT: [[TMP16:%.*]] = icmp slt <2 x i32> [[TMP12]], [[TMP14]]
|
||||
; THRESH-NEXT: [[TMP17:%.*]] = shufflevector <2 x i1> [[TMP15]], <2 x i1> [[TMP16]], <2 x i32> <i32 0, i32 3>
|
||||
; THRESH-NEXT: [[TMP18:%.*]] = insertelement <2 x i32> [[TMP11]], i32 [[TMP3]], i32 1
|
||||
; THRESH-NEXT: [[TMP19:%.*]] = insertelement <2 x i32> [[TMP13]], i32 [[TMP4]], i32 1
|
||||
; THRESH-NEXT: [[TMP20:%.*]] = select <2 x i1> [[TMP17]], <2 x i32> [[TMP18]], <2 x i32> [[TMP19]]
|
||||
; THRESH-NEXT: [[TMP21:%.*]] = extractelement <2 x i32> [[TMP20]], i32 0
|
||||
; THRESH-NEXT: [[TMP22:%.*]] = extractelement <2 x i32> [[TMP20]], i32 1
|
||||
; THRESH-NEXT: [[OP_EXTRA:%.*]] = icmp sgt i32 [[TMP21]], [[TMP22]]
|
||||
; THRESH-NEXT: [[OP_EXTRA1:%.*]] = select i1 [[OP_EXTRA]], i32 [[TMP21]], i32 [[TMP22]]
|
||||
; THRESH-NEXT: [[TMP23:%.*]] = extractelement <2 x i1> [[TMP17]], i32 1
|
||||
; THRESH-NEXT: [[TMP24:%.*]] = select i1 [[TMP23]], i32 3, i32 4
|
||||
; THRESH-NEXT: store i32 [[TMP24]], i32* @var, align 8
|
||||
; THRESH-NEXT: [[TMP16:%.*]] = select <2 x i1> [[TMP15]], <2 x i32> [[TMP12]], <2 x i32> [[TMP14]]
|
||||
; THRESH-NEXT: [[TMP17:%.*]] = extractelement <2 x i32> [[TMP16]], i32 0
|
||||
; THRESH-NEXT: [[TMP18:%.*]] = extractelement <2 x i32> [[TMP16]], i32 1
|
||||
; THRESH-NEXT: [[OP_EXTRA:%.*]] = icmp sgt i32 [[TMP17]], [[TMP18]]
|
||||
; THRESH-NEXT: [[OP_EXTRA1:%.*]] = select i1 [[OP_EXTRA]], i32 [[TMP17]], i32 [[TMP18]]
|
||||
; THRESH-NEXT: [[TMP19:%.*]] = extractelement <2 x i1> [[TMP15]], i32 1
|
||||
; THRESH-NEXT: [[TMP20:%.*]] = select i1 [[TMP19]], i32 3, i32 4
|
||||
; THRESH-NEXT: store i32 [[TMP20]], i32* @var, align 8
|
||||
; THRESH-NEXT: ret i32 [[OP_EXTRA1]]
|
||||
;
|
||||
%2 = load i32, i32* getelementptr inbounds ([32 x i32], [32 x i32]* @arr, i64 0, i64 0), align 16
|
||||
|
|
|
@ -214,13 +214,19 @@ define i1 @logical_and_icmp_subvec(<4 x i32> %x) {
|
|||
|
||||
define i1 @logical_and_icmp_clamp(<4 x i32> %x) {
|
||||
; CHECK-LABEL: @logical_and_icmp_clamp(
|
||||
; CHECK-NEXT: [[SHUFFLE:%.*]] = shufflevector <4 x i32> [[X:%.*]], <4 x i32> poison, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 0, i32 1, i32 2, i32 3>
|
||||
; CHECK-NEXT: [[TMP1:%.*]] = icmp slt <8 x i32> [[SHUFFLE]], <i32 42, i32 42, i32 42, i32 42, i32 17, i32 17, i32 17, i32 17>
|
||||
; CHECK-NEXT: [[TMP2:%.*]] = icmp sgt <8 x i32> [[SHUFFLE]], <i32 42, i32 42, i32 42, i32 42, i32 17, i32 17, i32 17, i32 17>
|
||||
; CHECK-NEXT: [[TMP3:%.*]] = shufflevector <8 x i1> [[TMP1]], <8 x i1> [[TMP2]], <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 12, i32 13, i32 14, i32 15>
|
||||
; CHECK-NEXT: [[TMP4:%.*]] = freeze <8 x i1> [[TMP3]]
|
||||
; CHECK-NEXT: [[TMP5:%.*]] = call i1 @llvm.vector.reduce.and.v8i1(<8 x i1> [[TMP4]])
|
||||
; CHECK-NEXT: ret i1 [[TMP5]]
|
||||
; CHECK-NEXT: [[TMP1:%.*]] = icmp slt <4 x i32> [[X:%.*]], <i32 42, i32 42, i32 42, i32 42>
|
||||
; CHECK-NEXT: [[TMP2:%.*]] = icmp sgt <4 x i32> [[X]], <i32 17, i32 17, i32 17, i32 17>
|
||||
; CHECK-NEXT: [[TMP3:%.*]] = freeze <4 x i1> [[TMP1]]
|
||||
; CHECK-NEXT: [[TMP4:%.*]] = call i1 @llvm.vector.reduce.and.v4i1(<4 x i1> [[TMP3]])
|
||||
; CHECK-NEXT: [[TMP5:%.*]] = extractelement <4 x i1> [[TMP2]], i32 0
|
||||
; CHECK-NEXT: [[S4:%.*]] = select i1 [[TMP4]], i1 [[TMP5]], i1 false
|
||||
; CHECK-NEXT: [[TMP6:%.*]] = extractelement <4 x i1> [[TMP2]], i32 1
|
||||
; CHECK-NEXT: [[S5:%.*]] = select i1 [[S4]], i1 [[TMP6]], i1 false
|
||||
; CHECK-NEXT: [[TMP7:%.*]] = extractelement <4 x i1> [[TMP2]], i32 2
|
||||
; CHECK-NEXT: [[S6:%.*]] = select i1 [[S5]], i1 [[TMP7]], i1 false
|
||||
; CHECK-NEXT: [[TMP8:%.*]] = extractelement <4 x i1> [[TMP2]], i32 3
|
||||
; CHECK-NEXT: [[S7:%.*]] = select i1 [[S6]], i1 [[TMP8]], i1 false
|
||||
; CHECK-NEXT: ret i1 [[S7]]
|
||||
;
|
||||
%x0 = extractelement <4 x i32> %x, i32 0
|
||||
%x1 = extractelement <4 x i32> %x, i32 1
|
||||
|
@ -245,53 +251,28 @@ define i1 @logical_and_icmp_clamp(<4 x i32> %x) {
|
|||
}
|
||||
|
||||
define i1 @logical_and_icmp_clamp_extra_use_cmp(<4 x i32> %x) {
|
||||
; SSE-LABEL: @logical_and_icmp_clamp_extra_use_cmp(
|
||||
; SSE-NEXT: [[X0:%.*]] = extractelement <4 x i32> [[X:%.*]], i32 0
|
||||
; SSE-NEXT: [[X1:%.*]] = extractelement <4 x i32> [[X]], i32 1
|
||||
; SSE-NEXT: [[X2:%.*]] = extractelement <4 x i32> [[X]], i32 2
|
||||
; SSE-NEXT: [[X3:%.*]] = extractelement <4 x i32> [[X]], i32 3
|
||||
; SSE-NEXT: [[C2:%.*]] = icmp slt i32 [[X2]], 42
|
||||
; SSE-NEXT: call void @use1(i1 [[C2]])
|
||||
; SSE-NEXT: [[TMP1:%.*]] = insertelement <4 x i32> poison, i32 [[X0]], i32 0
|
||||
; SSE-NEXT: [[TMP2:%.*]] = insertelement <4 x i32> [[TMP1]], i32 [[X1]], i32 1
|
||||
; SSE-NEXT: [[TMP3:%.*]] = insertelement <4 x i32> [[TMP2]], i32 [[X3]], i32 2
|
||||
; SSE-NEXT: [[TMP4:%.*]] = insertelement <4 x i32> [[TMP3]], i32 [[X0]], i32 3
|
||||
; SSE-NEXT: [[TMP5:%.*]] = icmp slt <4 x i32> [[TMP4]], <i32 42, i32 42, i32 42, i32 17>
|
||||
; SSE-NEXT: [[TMP6:%.*]] = icmp sgt <4 x i32> [[TMP4]], <i32 42, i32 42, i32 42, i32 17>
|
||||
; SSE-NEXT: [[TMP7:%.*]] = shufflevector <4 x i1> [[TMP5]], <4 x i1> [[TMP6]], <4 x i32> <i32 0, i32 1, i32 2, i32 7>
|
||||
; SSE-NEXT: [[D1:%.*]] = icmp sgt i32 [[X1]], 17
|
||||
; SSE-NEXT: [[D2:%.*]] = icmp sgt i32 [[X2]], 17
|
||||
; SSE-NEXT: [[D3:%.*]] = icmp sgt i32 [[X3]], 17
|
||||
; SSE-NEXT: [[TMP8:%.*]] = freeze <4 x i1> [[TMP7]]
|
||||
; SSE-NEXT: [[TMP9:%.*]] = call i1 @llvm.vector.reduce.and.v4i1(<4 x i1> [[TMP8]])
|
||||
; SSE-NEXT: [[OP_EXTRA:%.*]] = select i1 [[TMP9]], i1 [[C2]], i1 false
|
||||
; SSE-NEXT: [[S5:%.*]] = select i1 [[OP_EXTRA]], i1 [[D1]], i1 false
|
||||
; SSE-NEXT: [[S6:%.*]] = select i1 [[S5]], i1 [[D2]], i1 false
|
||||
; SSE-NEXT: [[S7:%.*]] = select i1 [[S6]], i1 [[D3]], i1 false
|
||||
; SSE-NEXT: ret i1 [[S7]]
|
||||
;
|
||||
; AVX-LABEL: @logical_and_icmp_clamp_extra_use_cmp(
|
||||
; AVX-NEXT: [[SHUFFLE:%.*]] = shufflevector <4 x i32> [[X:%.*]], <4 x i32> poison, <8 x i32> <i32 3, i32 2, i32 1, i32 0, i32 3, i32 2, i32 1, i32 0>
|
||||
; AVX-NEXT: [[TMP1:%.*]] = icmp slt <8 x i32> [[SHUFFLE]], <i32 42, i32 42, i32 42, i32 42, i32 17, i32 17, i32 17, i32 17>
|
||||
; AVX-NEXT: [[TMP2:%.*]] = icmp sgt <8 x i32> [[SHUFFLE]], <i32 42, i32 42, i32 42, i32 42, i32 17, i32 17, i32 17, i32 17>
|
||||
; AVX-NEXT: [[TMP3:%.*]] = shufflevector <8 x i1> [[TMP1]], <8 x i1> [[TMP2]], <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 12, i32 13, i32 14, i32 15>
|
||||
; AVX-NEXT: [[TMP4:%.*]] = extractelement <8 x i1> [[TMP3]], i32 1
|
||||
; AVX-NEXT: call void @use1(i1 [[TMP4]])
|
||||
; AVX-NEXT: [[TMP5:%.*]] = extractelement <8 x i1> [[TMP3]], i32 2
|
||||
; AVX-NEXT: [[TMP6:%.*]] = extractelement <8 x i1> [[TMP3]], i32 3
|
||||
; AVX-NEXT: [[S1:%.*]] = select i1 [[TMP6]], i1 [[TMP5]], i1 false
|
||||
; AVX-NEXT: [[S2:%.*]] = select i1 [[S1]], i1 [[TMP4]], i1 false
|
||||
; AVX-NEXT: [[TMP7:%.*]] = extractelement <8 x i1> [[TMP3]], i32 0
|
||||
; AVX-NEXT: [[S3:%.*]] = select i1 [[S2]], i1 [[TMP7]], i1 false
|
||||
; AVX-NEXT: [[TMP8:%.*]] = extractelement <8 x i1> [[TMP3]], i32 7
|
||||
; AVX-NEXT: [[S4:%.*]] = select i1 [[S3]], i1 [[TMP8]], i1 false
|
||||
; AVX-NEXT: [[TMP9:%.*]] = extractelement <8 x i1> [[TMP3]], i32 6
|
||||
; AVX-NEXT: [[S5:%.*]] = select i1 [[S4]], i1 [[TMP9]], i1 false
|
||||
; AVX-NEXT: [[TMP10:%.*]] = extractelement <8 x i1> [[TMP3]], i32 5
|
||||
; AVX-NEXT: [[S6:%.*]] = select i1 [[S5]], i1 [[TMP10]], i1 false
|
||||
; AVX-NEXT: [[TMP11:%.*]] = extractelement <8 x i1> [[TMP3]], i32 4
|
||||
; AVX-NEXT: [[S7:%.*]] = select i1 [[S6]], i1 [[TMP11]], i1 false
|
||||
; AVX-NEXT: ret i1 [[S7]]
|
||||
; CHECK-LABEL: @logical_and_icmp_clamp_extra_use_cmp(
|
||||
; CHECK-NEXT: [[X0:%.*]] = extractelement <4 x i32> [[X:%.*]], i32 0
|
||||
; CHECK-NEXT: [[X1:%.*]] = extractelement <4 x i32> [[X]], i32 1
|
||||
; CHECK-NEXT: [[X2:%.*]] = extractelement <4 x i32> [[X]], i32 2
|
||||
; CHECK-NEXT: [[X3:%.*]] = extractelement <4 x i32> [[X]], i32 3
|
||||
; CHECK-NEXT: [[C0:%.*]] = icmp slt i32 [[X0]], 42
|
||||
; CHECK-NEXT: [[C1:%.*]] = icmp slt i32 [[X1]], 42
|
||||
; CHECK-NEXT: [[C2:%.*]] = icmp slt i32 [[X2]], 42
|
||||
; CHECK-NEXT: call void @use1(i1 [[C2]])
|
||||
; CHECK-NEXT: [[C3:%.*]] = icmp slt i32 [[X3]], 42
|
||||
; CHECK-NEXT: [[D0:%.*]] = icmp sgt i32 [[X0]], 17
|
||||
; CHECK-NEXT: [[D1:%.*]] = icmp sgt i32 [[X1]], 17
|
||||
; CHECK-NEXT: [[D2:%.*]] = icmp sgt i32 [[X2]], 17
|
||||
; CHECK-NEXT: [[D3:%.*]] = icmp sgt i32 [[X3]], 17
|
||||
; CHECK-NEXT: [[S1:%.*]] = select i1 [[C0]], i1 [[C1]], i1 false
|
||||
; CHECK-NEXT: [[S2:%.*]] = select i1 [[S1]], i1 [[C2]], i1 false
|
||||
; CHECK-NEXT: [[S3:%.*]] = select i1 [[S2]], i1 [[C3]], i1 false
|
||||
; CHECK-NEXT: [[S4:%.*]] = select i1 [[S3]], i1 [[D0]], i1 false
|
||||
; CHECK-NEXT: [[S5:%.*]] = select i1 [[S4]], i1 [[D1]], i1 false
|
||||
; CHECK-NEXT: [[S6:%.*]] = select i1 [[S5]], i1 [[D2]], i1 false
|
||||
; CHECK-NEXT: [[S7:%.*]] = select i1 [[S6]], i1 [[D3]], i1 false
|
||||
; CHECK-NEXT: ret i1 [[S7]]
|
||||
;
|
||||
%x0 = extractelement <4 x i32> %x, i32 0
|
||||
%x1 = extractelement <4 x i32> %x, i32 1
|
||||
|
@ -317,49 +298,28 @@ define i1 @logical_and_icmp_clamp_extra_use_cmp(<4 x i32> %x) {
|
|||
}
|
||||
|
||||
define i1 @logical_and_icmp_clamp_extra_use_select(<4 x i32> %x) {
|
||||
; SSE-LABEL: @logical_and_icmp_clamp_extra_use_select(
|
||||
; SSE-NEXT: [[TMP1:%.*]] = extractelement <4 x i32> [[X:%.*]], i32 3
|
||||
; SSE-NEXT: [[TMP2:%.*]] = extractelement <4 x i32> [[X]], i32 2
|
||||
; SSE-NEXT: [[TMP3:%.*]] = extractelement <4 x i32> [[X]], i32 1
|
||||
; SSE-NEXT: [[TMP4:%.*]] = extractelement <4 x i32> [[X]], i32 0
|
||||
; SSE-NEXT: [[C0:%.*]] = icmp slt i32 [[TMP4]], 42
|
||||
; SSE-NEXT: [[C1:%.*]] = icmp slt i32 [[TMP3]], 42
|
||||
; SSE-NEXT: [[C2:%.*]] = icmp slt i32 [[TMP2]], 42
|
||||
; SSE-NEXT: [[TMP5:%.*]] = icmp slt <4 x i32> [[X]], <i32 17, i32 17, i32 17, i32 42>
|
||||
; SSE-NEXT: [[TMP6:%.*]] = icmp sgt <4 x i32> [[X]], <i32 17, i32 17, i32 17, i32 42>
|
||||
; SSE-NEXT: [[TMP7:%.*]] = shufflevector <4 x i1> [[TMP5]], <4 x i1> [[TMP6]], <4 x i32> <i32 4, i32 5, i32 6, i32 3>
|
||||
; SSE-NEXT: [[D3:%.*]] = icmp sgt i32 [[TMP1]], 17
|
||||
; SSE-NEXT: [[S1:%.*]] = select i1 [[C0]], i1 [[C1]], i1 false
|
||||
; SSE-NEXT: [[S2:%.*]] = select i1 [[S1]], i1 [[C2]], i1 false
|
||||
; SSE-NEXT: call void @use1(i1 [[S2]])
|
||||
; SSE-NEXT: [[TMP8:%.*]] = freeze <4 x i1> [[TMP7]]
|
||||
; SSE-NEXT: [[TMP9:%.*]] = call i1 @llvm.vector.reduce.and.v4i1(<4 x i1> [[TMP8]])
|
||||
; SSE-NEXT: [[OP_EXTRA:%.*]] = select i1 [[TMP9]], i1 [[S2]], i1 false
|
||||
; SSE-NEXT: [[S7:%.*]] = select i1 [[OP_EXTRA]], i1 [[D3]], i1 false
|
||||
; SSE-NEXT: ret i1 [[S7]]
|
||||
;
|
||||
; AVX-LABEL: @logical_and_icmp_clamp_extra_use_select(
|
||||
; AVX-NEXT: [[SHUFFLE:%.*]] = shufflevector <4 x i32> [[X:%.*]], <4 x i32> poison, <8 x i32> <i32 3, i32 2, i32 1, i32 0, i32 3, i32 2, i32 1, i32 0>
|
||||
; AVX-NEXT: [[TMP1:%.*]] = icmp slt <8 x i32> [[SHUFFLE]], <i32 42, i32 42, i32 42, i32 42, i32 17, i32 17, i32 17, i32 17>
|
||||
; AVX-NEXT: [[TMP2:%.*]] = icmp sgt <8 x i32> [[SHUFFLE]], <i32 42, i32 42, i32 42, i32 42, i32 17, i32 17, i32 17, i32 17>
|
||||
; AVX-NEXT: [[TMP3:%.*]] = shufflevector <8 x i1> [[TMP1]], <8 x i1> [[TMP2]], <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 12, i32 13, i32 14, i32 15>
|
||||
; AVX-NEXT: [[TMP4:%.*]] = extractelement <8 x i1> [[TMP3]], i32 2
|
||||
; AVX-NEXT: [[TMP5:%.*]] = extractelement <8 x i1> [[TMP3]], i32 3
|
||||
; AVX-NEXT: [[S1:%.*]] = select i1 [[TMP5]], i1 [[TMP4]], i1 false
|
||||
; AVX-NEXT: [[TMP6:%.*]] = extractelement <8 x i1> [[TMP3]], i32 1
|
||||
; AVX-NEXT: [[S2:%.*]] = select i1 [[S1]], i1 [[TMP6]], i1 false
|
||||
; AVX-NEXT: call void @use1(i1 [[S2]])
|
||||
; AVX-NEXT: [[TMP7:%.*]] = extractelement <8 x i1> [[TMP3]], i32 0
|
||||
; AVX-NEXT: [[S3:%.*]] = select i1 [[S2]], i1 [[TMP7]], i1 false
|
||||
; AVX-NEXT: [[TMP8:%.*]] = extractelement <8 x i1> [[TMP3]], i32 7
|
||||
; AVX-NEXT: [[S4:%.*]] = select i1 [[S3]], i1 [[TMP8]], i1 false
|
||||
; AVX-NEXT: [[TMP9:%.*]] = extractelement <8 x i1> [[TMP3]], i32 6
|
||||
; AVX-NEXT: [[S5:%.*]] = select i1 [[S4]], i1 [[TMP9]], i1 false
|
||||
; AVX-NEXT: [[TMP10:%.*]] = extractelement <8 x i1> [[TMP3]], i32 5
|
||||
; AVX-NEXT: [[S6:%.*]] = select i1 [[S5]], i1 [[TMP10]], i1 false
|
||||
; AVX-NEXT: [[TMP11:%.*]] = extractelement <8 x i1> [[TMP3]], i32 4
|
||||
; AVX-NEXT: [[S7:%.*]] = select i1 [[S6]], i1 [[TMP11]], i1 false
|
||||
; AVX-NEXT: ret i1 [[S7]]
|
||||
; CHECK-LABEL: @logical_and_icmp_clamp_extra_use_select(
|
||||
; CHECK-NEXT: [[X0:%.*]] = extractelement <4 x i32> [[X:%.*]], i32 0
|
||||
; CHECK-NEXT: [[X1:%.*]] = extractelement <4 x i32> [[X]], i32 1
|
||||
; CHECK-NEXT: [[X2:%.*]] = extractelement <4 x i32> [[X]], i32 2
|
||||
; CHECK-NEXT: [[X3:%.*]] = extractelement <4 x i32> [[X]], i32 3
|
||||
; CHECK-NEXT: [[C0:%.*]] = icmp slt i32 [[X0]], 42
|
||||
; CHECK-NEXT: [[C1:%.*]] = icmp slt i32 [[X1]], 42
|
||||
; CHECK-NEXT: [[C2:%.*]] = icmp slt i32 [[X2]], 42
|
||||
; CHECK-NEXT: [[C3:%.*]] = icmp slt i32 [[X3]], 42
|
||||
; CHECK-NEXT: [[D0:%.*]] = icmp sgt i32 [[X0]], 17
|
||||
; CHECK-NEXT: [[D1:%.*]] = icmp sgt i32 [[X1]], 17
|
||||
; CHECK-NEXT: [[D2:%.*]] = icmp sgt i32 [[X2]], 17
|
||||
; CHECK-NEXT: [[D3:%.*]] = icmp sgt i32 [[X3]], 17
|
||||
; CHECK-NEXT: [[S1:%.*]] = select i1 [[C0]], i1 [[C1]], i1 false
|
||||
; CHECK-NEXT: [[S2:%.*]] = select i1 [[S1]], i1 [[C2]], i1 false
|
||||
; CHECK-NEXT: call void @use1(i1 [[S2]])
|
||||
; CHECK-NEXT: [[S3:%.*]] = select i1 [[S2]], i1 [[C3]], i1 false
|
||||
; CHECK-NEXT: [[S4:%.*]] = select i1 [[S3]], i1 [[D0]], i1 false
|
||||
; CHECK-NEXT: [[S5:%.*]] = select i1 [[S4]], i1 [[D1]], i1 false
|
||||
; CHECK-NEXT: [[S6:%.*]] = select i1 [[S5]], i1 [[D2]], i1 false
|
||||
; CHECK-NEXT: [[S7:%.*]] = select i1 [[S6]], i1 [[D3]], i1 false
|
||||
; CHECK-NEXT: ret i1 [[S7]]
|
||||
;
|
||||
%x0 = extractelement <4 x i32> %x, i32 0
|
||||
%x1 = extractelement <4 x i32> %x, i32 1
|
||||
|
@ -435,47 +395,25 @@ define i1 @logical_and_icmp_clamp_v8i32(<8 x i32> %x, <8 x i32> %y) {
|
|||
}
|
||||
|
||||
define i1 @logical_and_icmp_clamp_partial(<4 x i32> %x) {
|
||||
; SSE-LABEL: @logical_and_icmp_clamp_partial(
|
||||
; SSE-NEXT: [[X0:%.*]] = extractelement <4 x i32> [[X:%.*]], i32 0
|
||||
; SSE-NEXT: [[X1:%.*]] = extractelement <4 x i32> [[X]], i32 1
|
||||
; SSE-NEXT: [[X2:%.*]] = extractelement <4 x i32> [[X]], i32 2
|
||||
; SSE-NEXT: [[X3:%.*]] = extractelement <4 x i32> [[X]], i32 3
|
||||
; SSE-NEXT: [[TMP1:%.*]] = insertelement <4 x i32> poison, i32 [[X0]], i32 0
|
||||
; SSE-NEXT: [[TMP2:%.*]] = insertelement <4 x i32> [[TMP1]], i32 [[X1]], i32 1
|
||||
; SSE-NEXT: [[TMP3:%.*]] = insertelement <4 x i32> [[TMP2]], i32 [[X2]], i32 2
|
||||
; SSE-NEXT: [[TMP4:%.*]] = insertelement <4 x i32> [[TMP3]], i32 [[X0]], i32 3
|
||||
; SSE-NEXT: [[TMP5:%.*]] = icmp slt <4 x i32> [[TMP4]], <i32 42, i32 42, i32 42, i32 17>
|
||||
; SSE-NEXT: [[TMP6:%.*]] = icmp sgt <4 x i32> [[TMP4]], <i32 42, i32 42, i32 42, i32 17>
|
||||
; SSE-NEXT: [[TMP7:%.*]] = shufflevector <4 x i1> [[TMP5]], <4 x i1> [[TMP6]], <4 x i32> <i32 0, i32 1, i32 2, i32 7>
|
||||
; SSE-NEXT: [[D1:%.*]] = icmp sgt i32 [[X1]], 17
|
||||
; SSE-NEXT: [[D2:%.*]] = icmp sgt i32 [[X2]], 17
|
||||
; SSE-NEXT: [[D3:%.*]] = icmp sgt i32 [[X3]], 17
|
||||
; SSE-NEXT: [[TMP8:%.*]] = freeze <4 x i1> [[TMP7]]
|
||||
; SSE-NEXT: [[TMP9:%.*]] = call i1 @llvm.vector.reduce.and.v4i1(<4 x i1> [[TMP8]])
|
||||
; SSE-NEXT: [[S5:%.*]] = select i1 [[TMP9]], i1 [[D1]], i1 false
|
||||
; SSE-NEXT: [[S6:%.*]] = select i1 [[S5]], i1 [[D2]], i1 false
|
||||
; SSE-NEXT: [[S7:%.*]] = select i1 [[S6]], i1 [[D3]], i1 false
|
||||
; SSE-NEXT: ret i1 [[S7]]
|
||||
;
|
||||
; AVX-LABEL: @logical_and_icmp_clamp_partial(
|
||||
; AVX-NEXT: [[X0:%.*]] = extractelement <4 x i32> [[X:%.*]], i32 0
|
||||
; AVX-NEXT: [[X1:%.*]] = extractelement <4 x i32> [[X]], i32 1
|
||||
; AVX-NEXT: [[X2:%.*]] = extractelement <4 x i32> [[X]], i32 2
|
||||
; AVX-NEXT: [[X3:%.*]] = extractelement <4 x i32> [[X]], i32 3
|
||||
; AVX-NEXT: [[C0:%.*]] = icmp slt i32 [[X0]], 42
|
||||
; AVX-NEXT: [[C1:%.*]] = icmp slt i32 [[X1]], 42
|
||||
; AVX-NEXT: [[C2:%.*]] = icmp slt i32 [[X2]], 42
|
||||
; AVX-NEXT: [[D0:%.*]] = icmp sgt i32 [[X0]], 17
|
||||
; AVX-NEXT: [[D1:%.*]] = icmp sgt i32 [[X1]], 17
|
||||
; AVX-NEXT: [[D2:%.*]] = icmp sgt i32 [[X2]], 17
|
||||
; AVX-NEXT: [[D3:%.*]] = icmp sgt i32 [[X3]], 17
|
||||
; AVX-NEXT: [[S1:%.*]] = select i1 [[C0]], i1 [[C1]], i1 false
|
||||
; AVX-NEXT: [[S2:%.*]] = select i1 [[S1]], i1 [[C2]], i1 false
|
||||
; AVX-NEXT: [[S4:%.*]] = select i1 [[S2]], i1 [[D0]], i1 false
|
||||
; AVX-NEXT: [[S5:%.*]] = select i1 [[S4]], i1 [[D1]], i1 false
|
||||
; AVX-NEXT: [[S6:%.*]] = select i1 [[S5]], i1 [[D2]], i1 false
|
||||
; AVX-NEXT: [[S7:%.*]] = select i1 [[S6]], i1 [[D3]], i1 false
|
||||
; AVX-NEXT: ret i1 [[S7]]
|
||||
; CHECK-LABEL: @logical_and_icmp_clamp_partial(
|
||||
; CHECK-NEXT: [[X0:%.*]] = extractelement <4 x i32> [[X:%.*]], i32 0
|
||||
; CHECK-NEXT: [[X1:%.*]] = extractelement <4 x i32> [[X]], i32 1
|
||||
; CHECK-NEXT: [[X2:%.*]] = extractelement <4 x i32> [[X]], i32 2
|
||||
; CHECK-NEXT: [[X3:%.*]] = extractelement <4 x i32> [[X]], i32 3
|
||||
; CHECK-NEXT: [[C0:%.*]] = icmp slt i32 [[X0]], 42
|
||||
; CHECK-NEXT: [[C1:%.*]] = icmp slt i32 [[X1]], 42
|
||||
; CHECK-NEXT: [[C2:%.*]] = icmp slt i32 [[X2]], 42
|
||||
; CHECK-NEXT: [[D0:%.*]] = icmp sgt i32 [[X0]], 17
|
||||
; CHECK-NEXT: [[D1:%.*]] = icmp sgt i32 [[X1]], 17
|
||||
; CHECK-NEXT: [[D2:%.*]] = icmp sgt i32 [[X2]], 17
|
||||
; CHECK-NEXT: [[D3:%.*]] = icmp sgt i32 [[X3]], 17
|
||||
; CHECK-NEXT: [[S1:%.*]] = select i1 [[C0]], i1 [[C1]], i1 false
|
||||
; CHECK-NEXT: [[S2:%.*]] = select i1 [[S1]], i1 [[C2]], i1 false
|
||||
; CHECK-NEXT: [[S4:%.*]] = select i1 [[S2]], i1 [[D0]], i1 false
|
||||
; CHECK-NEXT: [[S5:%.*]] = select i1 [[S4]], i1 [[D1]], i1 false
|
||||
; CHECK-NEXT: [[S6:%.*]] = select i1 [[S5]], i1 [[D2]], i1 false
|
||||
; CHECK-NEXT: [[S7:%.*]] = select i1 [[S6]], i1 [[D3]], i1 false
|
||||
; CHECK-NEXT: ret i1 [[S7]]
|
||||
;
|
||||
%x0 = extractelement <4 x i32> %x, i32 0
|
||||
%x1 = extractelement <4 x i32> %x, i32 1
|
||||
|
@ -500,44 +438,27 @@ define i1 @logical_and_icmp_clamp_partial(<4 x i32> %x) {
|
|||
}
|
||||
|
||||
define i1 @logical_and_icmp_clamp_pred_diff(<4 x i32> %x) {
|
||||
; SSE-LABEL: @logical_and_icmp_clamp_pred_diff(
|
||||
; SSE-NEXT: [[TMP1:%.*]] = icmp slt <4 x i32> [[X:%.*]], <i32 42, i32 42, i32 42, i32 42>
|
||||
; SSE-NEXT: [[TMP2:%.*]] = icmp ult <4 x i32> [[X]], <i32 42, i32 42, i32 42, i32 42>
|
||||
; SSE-NEXT: [[TMP3:%.*]] = shufflevector <4 x i1> [[TMP1]], <4 x i1> [[TMP2]], <4 x i32> <i32 0, i32 1, i32 2, i32 7>
|
||||
; SSE-NEXT: [[TMP4:%.*]] = icmp sgt <4 x i32> [[X]], <i32 17, i32 17, i32 17, i32 17>
|
||||
; SSE-NEXT: [[TMP5:%.*]] = freeze <4 x i1> [[TMP3]]
|
||||
; SSE-NEXT: [[TMP6:%.*]] = call i1 @llvm.vector.reduce.and.v4i1(<4 x i1> [[TMP5]])
|
||||
; SSE-NEXT: [[TMP7:%.*]] = extractelement <4 x i1> [[TMP4]], i32 0
|
||||
; SSE-NEXT: [[S4:%.*]] = select i1 [[TMP6]], i1 [[TMP7]], i1 false
|
||||
; SSE-NEXT: [[TMP8:%.*]] = extractelement <4 x i1> [[TMP4]], i32 1
|
||||
; SSE-NEXT: [[S5:%.*]] = select i1 [[S4]], i1 [[TMP8]], i1 false
|
||||
; SSE-NEXT: [[TMP9:%.*]] = extractelement <4 x i1> [[TMP4]], i32 2
|
||||
; SSE-NEXT: [[S6:%.*]] = select i1 [[S5]], i1 [[TMP9]], i1 false
|
||||
; SSE-NEXT: [[TMP10:%.*]] = extractelement <4 x i1> [[TMP4]], i32 3
|
||||
; SSE-NEXT: [[S7:%.*]] = select i1 [[S6]], i1 [[TMP10]], i1 false
|
||||
; SSE-NEXT: ret i1 [[S7]]
|
||||
;
|
||||
; AVX-LABEL: @logical_and_icmp_clamp_pred_diff(
|
||||
; AVX-NEXT: [[X0:%.*]] = extractelement <4 x i32> [[X:%.*]], i32 0
|
||||
; AVX-NEXT: [[X1:%.*]] = extractelement <4 x i32> [[X]], i32 1
|
||||
; AVX-NEXT: [[X2:%.*]] = extractelement <4 x i32> [[X]], i32 2
|
||||
; AVX-NEXT: [[X3:%.*]] = extractelement <4 x i32> [[X]], i32 3
|
||||
; AVX-NEXT: [[C0:%.*]] = icmp slt i32 [[X0]], 42
|
||||
; AVX-NEXT: [[C1:%.*]] = icmp slt i32 [[X1]], 42
|
||||
; AVX-NEXT: [[C2:%.*]] = icmp slt i32 [[X2]], 42
|
||||
; AVX-NEXT: [[C3:%.*]] = icmp ult i32 [[X3]], 42
|
||||
; AVX-NEXT: [[D0:%.*]] = icmp sgt i32 [[X0]], 17
|
||||
; AVX-NEXT: [[D1:%.*]] = icmp sgt i32 [[X1]], 17
|
||||
; AVX-NEXT: [[D2:%.*]] = icmp sgt i32 [[X2]], 17
|
||||
; AVX-NEXT: [[D3:%.*]] = icmp sgt i32 [[X3]], 17
|
||||
; AVX-NEXT: [[S1:%.*]] = select i1 [[C0]], i1 [[C1]], i1 false
|
||||
; AVX-NEXT: [[S2:%.*]] = select i1 [[S1]], i1 [[C2]], i1 false
|
||||
; AVX-NEXT: [[S3:%.*]] = select i1 [[S2]], i1 [[C3]], i1 false
|
||||
; AVX-NEXT: [[S4:%.*]] = select i1 [[S3]], i1 [[D0]], i1 false
|
||||
; AVX-NEXT: [[S5:%.*]] = select i1 [[S4]], i1 [[D1]], i1 false
|
||||
; AVX-NEXT: [[S6:%.*]] = select i1 [[S5]], i1 [[D2]], i1 false
|
||||
; AVX-NEXT: [[S7:%.*]] = select i1 [[S6]], i1 [[D3]], i1 false
|
||||
; AVX-NEXT: ret i1 [[S7]]
|
||||
; CHECK-LABEL: @logical_and_icmp_clamp_pred_diff(
|
||||
; CHECK-NEXT: [[X0:%.*]] = extractelement <4 x i32> [[X:%.*]], i32 0
|
||||
; CHECK-NEXT: [[X1:%.*]] = extractelement <4 x i32> [[X]], i32 1
|
||||
; CHECK-NEXT: [[X2:%.*]] = extractelement <4 x i32> [[X]], i32 2
|
||||
; CHECK-NEXT: [[X3:%.*]] = extractelement <4 x i32> [[X]], i32 3
|
||||
; CHECK-NEXT: [[C0:%.*]] = icmp slt i32 [[X0]], 42
|
||||
; CHECK-NEXT: [[C1:%.*]] = icmp slt i32 [[X1]], 42
|
||||
; CHECK-NEXT: [[C2:%.*]] = icmp slt i32 [[X2]], 42
|
||||
; CHECK-NEXT: [[C3:%.*]] = icmp ult i32 [[X3]], 42
|
||||
; CHECK-NEXT: [[D0:%.*]] = icmp sgt i32 [[X0]], 17
|
||||
; CHECK-NEXT: [[D1:%.*]] = icmp sgt i32 [[X1]], 17
|
||||
; CHECK-NEXT: [[D2:%.*]] = icmp sgt i32 [[X2]], 17
|
||||
; CHECK-NEXT: [[D3:%.*]] = icmp sgt i32 [[X3]], 17
|
||||
; CHECK-NEXT: [[S1:%.*]] = select i1 [[C0]], i1 [[C1]], i1 false
|
||||
; CHECK-NEXT: [[S2:%.*]] = select i1 [[S1]], i1 [[C2]], i1 false
|
||||
; CHECK-NEXT: [[S3:%.*]] = select i1 [[S2]], i1 [[C3]], i1 false
|
||||
; CHECK-NEXT: [[S4:%.*]] = select i1 [[S3]], i1 [[D0]], i1 false
|
||||
; CHECK-NEXT: [[S5:%.*]] = select i1 [[S4]], i1 [[D1]], i1 false
|
||||
; CHECK-NEXT: [[S6:%.*]] = select i1 [[S5]], i1 [[D2]], i1 false
|
||||
; CHECK-NEXT: [[S7:%.*]] = select i1 [[S6]], i1 [[D3]], i1 false
|
||||
; CHECK-NEXT: ret i1 [[S7]]
|
||||
;
|
||||
%x0 = extractelement <4 x i32> %x, i32 0
|
||||
%x1 = extractelement <4 x i32> %x, i32 1
|
||||
|
|
Loading…
Reference in New Issue