Reland "[SLP] Consider alternatives for cost of select instructions."

This reverts the revert commit a1b53db324.

This patch includes a fix for a reported issue, caused by
matchSelectPattern returning UMIN for selects of pointers in
some cases by looking to some connected casts.

For now, ensure integer instrinsics are only returned for selects of
ints or int vectors.
This commit is contained in:
Florian Hahn 2020-10-31 10:51:19 +00:00
parent b231396122
commit 799033d8c5
5 changed files with 221 additions and 401 deletions

View File

@ -728,6 +728,14 @@ constexpr unsigned MaxAnalysisRecursionDepth = 6;
/// minimum/maximum flavor.
CmpInst::Predicate getInverseMinMaxPred(SelectPatternFlavor SPF);
/// Check if the values in \p VL are select instructions that can be converted
/// to a min or max (vector) intrinsic. Returns the intrinsic ID, if such a
/// conversion is possible, together with a bool indicating whether all select
/// conditions are only used by the selects. Otherwise return
/// Intrinsic::not_intrinsic.
std::pair<Intrinsic::ID, bool>
canConvertToMinOrMaxIntrinsic(ArrayRef<Value *> VL);
/// Return true if RHS is known to be implied true by LHS. Return false if
/// RHS is known to be implied false by LHS. Otherwise, return None if no
/// implication can be made.

View File

@ -5991,6 +5991,46 @@ CmpInst::Predicate llvm::getInverseMinMaxPred(SelectPatternFlavor SPF) {
return getMinMaxPred(getInverseMinMaxFlavor(SPF));
}
std::pair<Intrinsic::ID, bool>
llvm::canConvertToMinOrMaxIntrinsic(ArrayRef<Value *> VL) {
// Check if VL contains select instructions that can be folded into a min/max
// vector intrinsic and return the intrinsic if it is possible.
// TODO: Support floating point min/max.
bool AllCmpSingleUse = true;
SelectPatternResult SelectPattern;
SelectPattern.Flavor = SPF_UNKNOWN;
if (all_of(VL, [&SelectPattern, &AllCmpSingleUse](Value *I) {
Value *LHS, *RHS;
auto CurrentPattern = matchSelectPattern(I, LHS, RHS);
if (!SelectPatternResult::isMinOrMax(CurrentPattern.Flavor) ||
CurrentPattern.Flavor == SPF_FMINNUM ||
CurrentPattern.Flavor == SPF_FMAXNUM ||
!I->getType()->isIntOrIntVectorTy())
return false;
if (SelectPattern.Flavor != SPF_UNKNOWN &&
SelectPattern.Flavor != CurrentPattern.Flavor)
return false;
SelectPattern = CurrentPattern;
AllCmpSingleUse &=
match(I, m_Select(m_OneUse(m_Value()), m_Value(), m_Value()));
return true;
})) {
switch (SelectPattern.Flavor) {
case SPF_SMIN:
return {Intrinsic::smin, AllCmpSingleUse};
case SPF_UMIN:
return {Intrinsic::umin, AllCmpSingleUse};
case SPF_SMAX:
return {Intrinsic::smax, AllCmpSingleUse};
case SPF_UMAX:
return {Intrinsic::umax, AllCmpSingleUse};
default:
llvm_unreachable("unexpected select pattern flavor");
}
}
return {Intrinsic::not_intrinsic, false};
}
/// Return true if "icmp Pred LHS RHS" is always true.
static bool isTruePredicate(CmpInst::Predicate Pred, const Value *LHS,
const Value *RHS, const DataLayout &DL,

View File

@ -3549,6 +3549,21 @@ int BoUpSLP::getEntryCost(TreeEntry *E) {
int ScalarCost = VecTy->getNumElements() * ScalarEltCost;
int VecCost = TTI->getCmpSelInstrCost(E->getOpcode(), VecTy, MaskTy,
CostKind, VL0);
// Check if it is possible and profitable to use min/max for selects in
// VL.
//
auto IntrinsicAndUse = canConvertToMinOrMaxIntrinsic(VL);
if (IntrinsicAndUse.first != Intrinsic::not_intrinsic) {
IntrinsicCostAttributes CostAttrs(IntrinsicAndUse.first, VecTy,
{VecTy, VecTy});
int IntrinsicCost = TTI->getIntrinsicInstrCost(CostAttrs, CostKind);
// If the selects are the only uses of the compares, they will be dead
// and we can adjust the cost by removing their cost.
if (IntrinsicAndUse.second)
IntrinsicCost -= TTI->getCmpSelInstrCost(Instruction::ICmp, VecTy,
MaskTy, CostKind);
VecCost = std::min(VecCost, IntrinsicCost);
}
return ReuseShuffleCost + VecCost - ScalarCost;
}
case Instruction::FNeg:

View File

@ -1,6 +1,6 @@
; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
; RUN: opt -slp-vectorizer -S %s | FileCheck %s
; RUN: opt -passes='slp-vectorizer' -S %s | FileCheck %s
; RUN: opt -aa-pipeline=basic-aa -passes='slp-vectorizer' -S %s | FileCheck %s
target datalayout = "e-m:o-i64:64-i128:128-n32:64-S128"
target triple = "arm64-apple-ios5.0.0"
@ -8,45 +8,19 @@ target triple = "arm64-apple-ios5.0.0"
define void @select_umin_8xi16(i16* %ptr, i16 %x) {
; CHECK-LABEL: @select_umin_8xi16(
; CHECK-NEXT: entry:
; CHECK-NEXT: [[L_0:%.*]] = load i16, i16* [[PTR:%.*]], align 2
; CHECK-NEXT: [[CMP_0:%.*]] = icmp ult i16 [[L_0]], 16383
; CHECK-NEXT: [[S_0:%.*]] = select i1 [[CMP_0]], i16 [[L_0]], i16 16383
; CHECK-NEXT: store i16 [[S_0]], i16* [[PTR]], align 2
; CHECK-NEXT: [[GEP_1:%.*]] = getelementptr inbounds i16, i16* [[PTR]], i16 1
; CHECK-NEXT: [[L_1:%.*]] = load i16, i16* [[GEP_1]], align 2
; CHECK-NEXT: [[CMP_1:%.*]] = icmp ult i16 [[L_1]], 16383
; CHECK-NEXT: [[S_1:%.*]] = select i1 [[CMP_1]], i16 [[L_1]], i16 16383
; CHECK-NEXT: store i16 [[S_1]], i16* [[GEP_1]], align 2
; CHECK-NEXT: [[GEP_1:%.*]] = getelementptr inbounds i16, i16* [[PTR:%.*]], i16 1
; CHECK-NEXT: [[GEP_2:%.*]] = getelementptr inbounds i16, i16* [[PTR]], i16 2
; CHECK-NEXT: [[L_2:%.*]] = load i16, i16* [[GEP_2]], align 2
; CHECK-NEXT: [[CMP_2:%.*]] = icmp ult i16 [[L_2]], 16383
; CHECK-NEXT: [[S_2:%.*]] = select i1 [[CMP_2]], i16 [[L_2]], i16 16383
; CHECK-NEXT: store i16 [[S_2]], i16* [[GEP_2]], align 2
; CHECK-NEXT: [[GEP_3:%.*]] = getelementptr inbounds i16, i16* [[PTR]], i16 3
; CHECK-NEXT: [[L_3:%.*]] = load i16, i16* [[GEP_3]], align 2
; CHECK-NEXT: [[CMP_3:%.*]] = icmp ult i16 [[L_3]], 16383
; CHECK-NEXT: [[S_3:%.*]] = select i1 [[CMP_3]], i16 [[L_3]], i16 16383
; CHECK-NEXT: store i16 [[S_3]], i16* [[GEP_3]], align 2
; CHECK-NEXT: [[GEP_4:%.*]] = getelementptr inbounds i16, i16* [[PTR]], i16 4
; CHECK-NEXT: [[L_4:%.*]] = load i16, i16* [[GEP_4]], align 2
; CHECK-NEXT: [[CMP_4:%.*]] = icmp ult i16 [[L_4]], 16383
; CHECK-NEXT: [[S_4:%.*]] = select i1 [[CMP_4]], i16 [[L_4]], i16 16383
; CHECK-NEXT: store i16 [[S_4]], i16* [[GEP_4]], align 2
; CHECK-NEXT: [[GEP_5:%.*]] = getelementptr inbounds i16, i16* [[PTR]], i16 5
; CHECK-NEXT: [[L_5:%.*]] = load i16, i16* [[GEP_5]], align 2
; CHECK-NEXT: [[CMP_5:%.*]] = icmp ult i16 [[L_5]], 16383
; CHECK-NEXT: [[S_5:%.*]] = select i1 [[CMP_5]], i16 [[L_5]], i16 16383
; CHECK-NEXT: store i16 [[S_5]], i16* [[GEP_5]], align 2
; CHECK-NEXT: [[GEP_6:%.*]] = getelementptr inbounds i16, i16* [[PTR]], i16 6
; CHECK-NEXT: [[L_6:%.*]] = load i16, i16* [[GEP_6]], align 2
; CHECK-NEXT: [[CMP_6:%.*]] = icmp ult i16 [[L_6]], 16383
; CHECK-NEXT: [[S_6:%.*]] = select i1 [[CMP_6]], i16 [[L_6]], i16 16383
; CHECK-NEXT: store i16 [[S_6]], i16* [[GEP_6]], align 2
; CHECK-NEXT: [[GEP_7:%.*]] = getelementptr inbounds i16, i16* [[PTR]], i16 7
; CHECK-NEXT: [[L_7:%.*]] = load i16, i16* [[GEP_7]], align 2
; CHECK-NEXT: [[CMP_7:%.*]] = icmp ult i16 [[L_7]], 16383
; CHECK-NEXT: [[S_7:%.*]] = select i1 [[CMP_7]], i16 [[L_7]], i16 16383
; CHECK-NEXT: store i16 [[S_7]], i16* [[GEP_7]], align 2
; CHECK-NEXT: [[TMP0:%.*]] = bitcast i16* [[PTR]] to <8 x i16>*
; CHECK-NEXT: [[TMP1:%.*]] = load <8 x i16>, <8 x i16>* [[TMP0]], align 2
; CHECK-NEXT: [[TMP2:%.*]] = icmp ult <8 x i16> [[TMP1]], <i16 16383, i16 16383, i16 16383, i16 16383, i16 16383, i16 16383, i16 16383, i16 16383>
; CHECK-NEXT: [[TMP3:%.*]] = select <8 x i1> [[TMP2]], <8 x i16> [[TMP1]], <8 x i16> <i16 16383, i16 16383, i16 16383, i16 16383, i16 16383, i16 16383, i16 16383, i16 16383>
; CHECK-NEXT: [[TMP4:%.*]] = bitcast i16* [[PTR]] to <8 x i16>*
; CHECK-NEXT: store <8 x i16> [[TMP3]], <8 x i16>* [[TMP4]], align 2
; CHECK-NEXT: ret void
;
entry:
@ -102,25 +76,15 @@ entry:
define void @select_umin_4xi32(i32* %ptr, i32 %x) {
; CHECK-LABEL: @select_umin_4xi32(
; CHECK-NEXT: entry:
; CHECK-NEXT: [[L_0:%.*]] = load i32, i32* [[PTR:%.*]], align 4
; CHECK-NEXT: [[CMP_0:%.*]] = icmp ult i32 [[L_0]], 16383
; CHECK-NEXT: [[S_0:%.*]] = select i1 [[CMP_0]], i32 [[L_0]], i32 16383
; CHECK-NEXT: store i32 [[S_0]], i32* [[PTR]], align 4
; CHECK-NEXT: [[GEP_1:%.*]] = getelementptr inbounds i32, i32* [[PTR]], i32 1
; CHECK-NEXT: [[L_1:%.*]] = load i32, i32* [[GEP_1]], align 4
; CHECK-NEXT: [[CMP_1:%.*]] = icmp ult i32 [[L_1]], 16383
; CHECK-NEXT: [[S_1:%.*]] = select i1 [[CMP_1]], i32 [[L_1]], i32 16383
; CHECK-NEXT: store i32 [[S_1]], i32* [[GEP_1]], align 4
; CHECK-NEXT: [[GEP_1:%.*]] = getelementptr inbounds i32, i32* [[PTR:%.*]], i32 1
; CHECK-NEXT: [[GEP_2:%.*]] = getelementptr inbounds i32, i32* [[PTR]], i32 2
; CHECK-NEXT: [[L_2:%.*]] = load i32, i32* [[GEP_2]], align 4
; CHECK-NEXT: [[CMP_2:%.*]] = icmp ult i32 [[L_2]], 16383
; CHECK-NEXT: [[S_2:%.*]] = select i1 [[CMP_2]], i32 [[L_2]], i32 16383
; CHECK-NEXT: store i32 [[S_2]], i32* [[GEP_2]], align 4
; CHECK-NEXT: [[GEP_3:%.*]] = getelementptr inbounds i32, i32* [[PTR]], i32 3
; CHECK-NEXT: [[L_3:%.*]] = load i32, i32* [[GEP_3]], align 4
; CHECK-NEXT: [[CMP_3:%.*]] = icmp ult i32 [[L_3]], 16383
; CHECK-NEXT: [[S_3:%.*]] = select i1 [[CMP_3]], i32 [[L_3]], i32 16383
; CHECK-NEXT: store i32 [[S_3]], i32* [[GEP_3]], align 4
; CHECK-NEXT: [[TMP0:%.*]] = bitcast i32* [[PTR]] to <4 x i32>*
; CHECK-NEXT: [[TMP1:%.*]] = load <4 x i32>, <4 x i32>* [[TMP0]], align 4
; CHECK-NEXT: [[TMP2:%.*]] = icmp ult <4 x i32> [[TMP1]], <i32 16383, i32 16383, i32 16383, i32 16383>
; CHECK-NEXT: [[TMP3:%.*]] = select <4 x i1> [[TMP2]], <4 x i32> [[TMP1]], <4 x i32> <i32 16383, i32 16383, i32 16383, i32 16383>
; CHECK-NEXT: [[TMP4:%.*]] = bitcast i32* [[PTR]] to <4 x i32>*
; CHECK-NEXT: store <4 x i32> [[TMP3]], <4 x i32>* [[TMP4]], align 4
; CHECK-NEXT: ret void
;
entry:
@ -235,45 +199,19 @@ entry:
define void @select_umin_ule_8xi16(i16* %ptr, i16 %x) {
; CHECK-LABEL: @select_umin_ule_8xi16(
; CHECK-NEXT: entry:
; CHECK-NEXT: [[L_0:%.*]] = load i16, i16* [[PTR:%.*]], align 2
; CHECK-NEXT: [[CMP_0:%.*]] = icmp ule i16 [[L_0]], 16383
; CHECK-NEXT: [[S_0:%.*]] = select i1 [[CMP_0]], i16 [[L_0]], i16 16383
; CHECK-NEXT: store i16 [[S_0]], i16* [[PTR]], align 2
; CHECK-NEXT: [[GEP_1:%.*]] = getelementptr inbounds i16, i16* [[PTR]], i16 1
; CHECK-NEXT: [[L_1:%.*]] = load i16, i16* [[GEP_1]], align 2
; CHECK-NEXT: [[CMP_1:%.*]] = icmp ule i16 [[L_1]], 16383
; CHECK-NEXT: [[S_1:%.*]] = select i1 [[CMP_1]], i16 [[L_1]], i16 16383
; CHECK-NEXT: store i16 [[S_1]], i16* [[GEP_1]], align 2
; CHECK-NEXT: [[GEP_1:%.*]] = getelementptr inbounds i16, i16* [[PTR:%.*]], i16 1
; CHECK-NEXT: [[GEP_2:%.*]] = getelementptr inbounds i16, i16* [[PTR]], i16 2
; CHECK-NEXT: [[L_2:%.*]] = load i16, i16* [[GEP_2]], align 2
; CHECK-NEXT: [[CMP_2:%.*]] = icmp ule i16 [[L_2]], 16383
; CHECK-NEXT: [[S_2:%.*]] = select i1 [[CMP_2]], i16 [[L_2]], i16 16383
; CHECK-NEXT: store i16 [[S_2]], i16* [[GEP_2]], align 2
; CHECK-NEXT: [[GEP_3:%.*]] = getelementptr inbounds i16, i16* [[PTR]], i16 3
; CHECK-NEXT: [[L_3:%.*]] = load i16, i16* [[GEP_3]], align 2
; CHECK-NEXT: [[CMP_3:%.*]] = icmp ule i16 [[L_3]], 16383
; CHECK-NEXT: [[S_3:%.*]] = select i1 [[CMP_3]], i16 [[L_3]], i16 16383
; CHECK-NEXT: store i16 [[S_3]], i16* [[GEP_3]], align 2
; CHECK-NEXT: [[GEP_4:%.*]] = getelementptr inbounds i16, i16* [[PTR]], i16 4
; CHECK-NEXT: [[L_4:%.*]] = load i16, i16* [[GEP_4]], align 2
; CHECK-NEXT: [[CMP_4:%.*]] = icmp ule i16 [[L_4]], 16383
; CHECK-NEXT: [[S_4:%.*]] = select i1 [[CMP_4]], i16 [[L_4]], i16 16383
; CHECK-NEXT: store i16 [[S_4]], i16* [[GEP_4]], align 2
; CHECK-NEXT: [[GEP_5:%.*]] = getelementptr inbounds i16, i16* [[PTR]], i16 5
; CHECK-NEXT: [[L_5:%.*]] = load i16, i16* [[GEP_5]], align 2
; CHECK-NEXT: [[CMP_5:%.*]] = icmp ule i16 [[L_5]], 16383
; CHECK-NEXT: [[S_5:%.*]] = select i1 [[CMP_5]], i16 [[L_5]], i16 16383
; CHECK-NEXT: store i16 [[S_5]], i16* [[GEP_5]], align 2
; CHECK-NEXT: [[GEP_6:%.*]] = getelementptr inbounds i16, i16* [[PTR]], i16 6
; CHECK-NEXT: [[L_6:%.*]] = load i16, i16* [[GEP_6]], align 2
; CHECK-NEXT: [[CMP_6:%.*]] = icmp ule i16 [[L_6]], 16383
; CHECK-NEXT: [[S_6:%.*]] = select i1 [[CMP_6]], i16 [[L_6]], i16 16383
; CHECK-NEXT: store i16 [[S_6]], i16* [[GEP_6]], align 2
; CHECK-NEXT: [[GEP_7:%.*]] = getelementptr inbounds i16, i16* [[PTR]], i16 7
; CHECK-NEXT: [[L_7:%.*]] = load i16, i16* [[GEP_7]], align 2
; CHECK-NEXT: [[CMP_7:%.*]] = icmp ule i16 [[L_7]], 16383
; CHECK-NEXT: [[S_7:%.*]] = select i1 [[CMP_7]], i16 [[L_7]], i16 16383
; CHECK-NEXT: store i16 [[S_7]], i16* [[GEP_7]], align 2
; CHECK-NEXT: [[TMP0:%.*]] = bitcast i16* [[PTR]] to <8 x i16>*
; CHECK-NEXT: [[TMP1:%.*]] = load <8 x i16>, <8 x i16>* [[TMP0]], align 2
; CHECK-NEXT: [[TMP2:%.*]] = icmp ule <8 x i16> [[TMP1]], <i16 16383, i16 16383, i16 16383, i16 16383, i16 16383, i16 16383, i16 16383, i16 16383>
; CHECK-NEXT: [[TMP3:%.*]] = select <8 x i1> [[TMP2]], <8 x i16> [[TMP1]], <8 x i16> <i16 16383, i16 16383, i16 16383, i16 16383, i16 16383, i16 16383, i16 16383, i16 16383>
; CHECK-NEXT: [[TMP4:%.*]] = bitcast i16* [[PTR]] to <8 x i16>*
; CHECK-NEXT: store <8 x i16> [[TMP3]], <8 x i16>* [[TMP4]], align 2
; CHECK-NEXT: ret void
;
entry:
@ -329,25 +267,15 @@ entry:
define void @select_umin_ule_4xi32(i32* %ptr, i32 %x) {
; CHECK-LABEL: @select_umin_ule_4xi32(
; CHECK-NEXT: entry:
; CHECK-NEXT: [[L_0:%.*]] = load i32, i32* [[PTR:%.*]], align 4
; CHECK-NEXT: [[CMP_0:%.*]] = icmp ule i32 [[L_0]], 16383
; CHECK-NEXT: [[S_0:%.*]] = select i1 [[CMP_0]], i32 [[L_0]], i32 16383
; CHECK-NEXT: store i32 [[S_0]], i32* [[PTR]], align 4
; CHECK-NEXT: [[GEP_1:%.*]] = getelementptr inbounds i32, i32* [[PTR]], i32 1
; CHECK-NEXT: [[L_1:%.*]] = load i32, i32* [[GEP_1]], align 4
; CHECK-NEXT: [[CMP_1:%.*]] = icmp ule i32 [[L_1]], 16383
; CHECK-NEXT: [[S_1:%.*]] = select i1 [[CMP_1]], i32 [[L_1]], i32 16383
; CHECK-NEXT: store i32 [[S_1]], i32* [[GEP_1]], align 4
; CHECK-NEXT: [[GEP_1:%.*]] = getelementptr inbounds i32, i32* [[PTR:%.*]], i32 1
; CHECK-NEXT: [[GEP_2:%.*]] = getelementptr inbounds i32, i32* [[PTR]], i32 2
; CHECK-NEXT: [[L_2:%.*]] = load i32, i32* [[GEP_2]], align 4
; CHECK-NEXT: [[CMP_2:%.*]] = icmp ule i32 [[L_2]], 16383
; CHECK-NEXT: [[S_2:%.*]] = select i1 [[CMP_2]], i32 [[L_2]], i32 16383
; CHECK-NEXT: store i32 [[S_2]], i32* [[GEP_2]], align 4
; CHECK-NEXT: [[GEP_3:%.*]] = getelementptr inbounds i32, i32* [[PTR]], i32 3
; CHECK-NEXT: [[L_3:%.*]] = load i32, i32* [[GEP_3]], align 4
; CHECK-NEXT: [[CMP_3:%.*]] = icmp ule i32 [[L_3]], 16383
; CHECK-NEXT: [[S_3:%.*]] = select i1 [[CMP_3]], i32 [[L_3]], i32 16383
; CHECK-NEXT: store i32 [[S_3]], i32* [[GEP_3]], align 4
; CHECK-NEXT: [[TMP0:%.*]] = bitcast i32* [[PTR]] to <4 x i32>*
; CHECK-NEXT: [[TMP1:%.*]] = load <4 x i32>, <4 x i32>* [[TMP0]], align 4
; CHECK-NEXT: [[TMP2:%.*]] = icmp ule <4 x i32> [[TMP1]], <i32 16383, i32 16383, i32 16383, i32 16383>
; CHECK-NEXT: [[TMP3:%.*]] = select <4 x i1> [[TMP2]], <4 x i32> [[TMP1]], <4 x i32> <i32 16383, i32 16383, i32 16383, i32 16383>
; CHECK-NEXT: [[TMP4:%.*]] = bitcast i32* [[PTR]] to <4 x i32>*
; CHECK-NEXT: store <4 x i32> [[TMP3]], <4 x i32>* [[TMP4]], align 4
; CHECK-NEXT: ret void
;
entry:
@ -410,45 +338,19 @@ entry:
define void @select_smin_8xi16(i16* %ptr, i16 %x) {
; CHECK-LABEL: @select_smin_8xi16(
; CHECK-NEXT: entry:
; CHECK-NEXT: [[L_0:%.*]] = load i16, i16* [[PTR:%.*]], align 2
; CHECK-NEXT: [[CMP_0:%.*]] = icmp slt i16 [[L_0]], 16383
; CHECK-NEXT: [[S_0:%.*]] = select i1 [[CMP_0]], i16 [[L_0]], i16 16383
; CHECK-NEXT: store i16 [[S_0]], i16* [[PTR]], align 2
; CHECK-NEXT: [[GEP_1:%.*]] = getelementptr inbounds i16, i16* [[PTR]], i16 1
; CHECK-NEXT: [[L_1:%.*]] = load i16, i16* [[GEP_1]], align 2
; CHECK-NEXT: [[CMP_1:%.*]] = icmp slt i16 [[L_1]], 16383
; CHECK-NEXT: [[S_1:%.*]] = select i1 [[CMP_1]], i16 [[L_1]], i16 16383
; CHECK-NEXT: store i16 [[S_1]], i16* [[GEP_1]], align 2
; CHECK-NEXT: [[GEP_1:%.*]] = getelementptr inbounds i16, i16* [[PTR:%.*]], i16 1
; CHECK-NEXT: [[GEP_2:%.*]] = getelementptr inbounds i16, i16* [[PTR]], i16 2
; CHECK-NEXT: [[L_2:%.*]] = load i16, i16* [[GEP_2]], align 2
; CHECK-NEXT: [[CMP_2:%.*]] = icmp slt i16 [[L_2]], 16383
; CHECK-NEXT: [[S_2:%.*]] = select i1 [[CMP_2]], i16 [[L_2]], i16 16383
; CHECK-NEXT: store i16 [[S_2]], i16* [[GEP_2]], align 2
; CHECK-NEXT: [[GEP_3:%.*]] = getelementptr inbounds i16, i16* [[PTR]], i16 3
; CHECK-NEXT: [[L_3:%.*]] = load i16, i16* [[GEP_3]], align 2
; CHECK-NEXT: [[CMP_3:%.*]] = icmp slt i16 [[L_3]], 16383
; CHECK-NEXT: [[S_3:%.*]] = select i1 [[CMP_3]], i16 [[L_3]], i16 16383
; CHECK-NEXT: store i16 [[S_3]], i16* [[GEP_3]], align 2
; CHECK-NEXT: [[GEP_4:%.*]] = getelementptr inbounds i16, i16* [[PTR]], i16 4
; CHECK-NEXT: [[L_4:%.*]] = load i16, i16* [[GEP_4]], align 2
; CHECK-NEXT: [[CMP_4:%.*]] = icmp slt i16 [[L_4]], 16383
; CHECK-NEXT: [[S_4:%.*]] = select i1 [[CMP_4]], i16 [[L_4]], i16 16383
; CHECK-NEXT: store i16 [[S_4]], i16* [[GEP_4]], align 2
; CHECK-NEXT: [[GEP_5:%.*]] = getelementptr inbounds i16, i16* [[PTR]], i16 5
; CHECK-NEXT: [[L_5:%.*]] = load i16, i16* [[GEP_5]], align 2
; CHECK-NEXT: [[CMP_5:%.*]] = icmp slt i16 [[L_5]], 16383
; CHECK-NEXT: [[S_5:%.*]] = select i1 [[CMP_5]], i16 [[L_5]], i16 16383
; CHECK-NEXT: store i16 [[S_5]], i16* [[GEP_5]], align 2
; CHECK-NEXT: [[GEP_6:%.*]] = getelementptr inbounds i16, i16* [[PTR]], i16 6
; CHECK-NEXT: [[L_6:%.*]] = load i16, i16* [[GEP_6]], align 2
; CHECK-NEXT: [[CMP_6:%.*]] = icmp slt i16 [[L_6]], 16383
; CHECK-NEXT: [[S_6:%.*]] = select i1 [[CMP_6]], i16 [[L_6]], i16 16383
; CHECK-NEXT: store i16 [[S_6]], i16* [[GEP_6]], align 2
; CHECK-NEXT: [[GEP_7:%.*]] = getelementptr inbounds i16, i16* [[PTR]], i16 7
; CHECK-NEXT: [[L_7:%.*]] = load i16, i16* [[GEP_7]], align 2
; CHECK-NEXT: [[CMP_7:%.*]] = icmp slt i16 [[L_7]], 16383
; CHECK-NEXT: [[S_7:%.*]] = select i1 [[CMP_7]], i16 [[L_7]], i16 16383
; CHECK-NEXT: store i16 [[S_7]], i16* [[GEP_7]], align 2
; CHECK-NEXT: [[TMP0:%.*]] = bitcast i16* [[PTR]] to <8 x i16>*
; CHECK-NEXT: [[TMP1:%.*]] = load <8 x i16>, <8 x i16>* [[TMP0]], align 2
; CHECK-NEXT: [[TMP2:%.*]] = icmp slt <8 x i16> [[TMP1]], <i16 16383, i16 16383, i16 16383, i16 16383, i16 16383, i16 16383, i16 16383, i16 16383>
; CHECK-NEXT: [[TMP3:%.*]] = select <8 x i1> [[TMP2]], <8 x i16> [[TMP1]], <8 x i16> <i16 16383, i16 16383, i16 16383, i16 16383, i16 16383, i16 16383, i16 16383, i16 16383>
; CHECK-NEXT: [[TMP4:%.*]] = bitcast i16* [[PTR]] to <8 x i16>*
; CHECK-NEXT: store <8 x i16> [[TMP3]], <8 x i16>* [[TMP4]], align 2
; CHECK-NEXT: ret void
;
entry:
@ -504,25 +406,15 @@ entry:
define void @select_smin_4xi32(i32* %ptr, i32 %x) {
; CHECK-LABEL: @select_smin_4xi32(
; CHECK-NEXT: entry:
; CHECK-NEXT: [[L_0:%.*]] = load i32, i32* [[PTR:%.*]], align 4
; CHECK-NEXT: [[CMP_0:%.*]] = icmp slt i32 [[L_0]], 16383
; CHECK-NEXT: [[S_0:%.*]] = select i1 [[CMP_0]], i32 [[L_0]], i32 16383
; CHECK-NEXT: store i32 [[S_0]], i32* [[PTR]], align 4
; CHECK-NEXT: [[GEP_1:%.*]] = getelementptr inbounds i32, i32* [[PTR]], i32 1
; CHECK-NEXT: [[L_1:%.*]] = load i32, i32* [[GEP_1]], align 4
; CHECK-NEXT: [[CMP_1:%.*]] = icmp slt i32 [[L_1]], 16383
; CHECK-NEXT: [[S_1:%.*]] = select i1 [[CMP_1]], i32 [[L_1]], i32 16383
; CHECK-NEXT: store i32 [[S_1]], i32* [[GEP_1]], align 4
; CHECK-NEXT: [[GEP_1:%.*]] = getelementptr inbounds i32, i32* [[PTR:%.*]], i32 1
; CHECK-NEXT: [[GEP_2:%.*]] = getelementptr inbounds i32, i32* [[PTR]], i32 2
; CHECK-NEXT: [[L_2:%.*]] = load i32, i32* [[GEP_2]], align 4
; CHECK-NEXT: [[CMP_2:%.*]] = icmp slt i32 [[L_2]], 16383
; CHECK-NEXT: [[S_2:%.*]] = select i1 [[CMP_2]], i32 [[L_2]], i32 16383
; CHECK-NEXT: store i32 [[S_2]], i32* [[GEP_2]], align 4
; CHECK-NEXT: [[GEP_3:%.*]] = getelementptr inbounds i32, i32* [[PTR]], i32 3
; CHECK-NEXT: [[L_3:%.*]] = load i32, i32* [[GEP_3]], align 4
; CHECK-NEXT: [[CMP_3:%.*]] = icmp slt i32 [[L_3]], 16383
; CHECK-NEXT: [[S_3:%.*]] = select i1 [[CMP_3]], i32 [[L_3]], i32 16383
; CHECK-NEXT: store i32 [[S_3]], i32* [[GEP_3]], align 4
; CHECK-NEXT: [[TMP0:%.*]] = bitcast i32* [[PTR]] to <4 x i32>*
; CHECK-NEXT: [[TMP1:%.*]] = load <4 x i32>, <4 x i32>* [[TMP0]], align 4
; CHECK-NEXT: [[TMP2:%.*]] = icmp slt <4 x i32> [[TMP1]], <i32 16383, i32 16383, i32 16383, i32 16383>
; CHECK-NEXT: [[TMP3:%.*]] = select <4 x i1> [[TMP2]], <4 x i32> [[TMP1]], <4 x i32> <i32 16383, i32 16383, i32 16383, i32 16383>
; CHECK-NEXT: [[TMP4:%.*]] = bitcast i32* [[PTR]] to <4 x i32>*
; CHECK-NEXT: store <4 x i32> [[TMP3]], <4 x i32>* [[TMP4]], align 4
; CHECK-NEXT: ret void
;
entry:
@ -585,45 +477,19 @@ entry:
define void @select_smin_sle_8xi16(i16* %ptr, i16 %x) {
; CHECK-LABEL: @select_smin_sle_8xi16(
; CHECK-NEXT: entry:
; CHECK-NEXT: [[L_0:%.*]] = load i16, i16* [[PTR:%.*]], align 2
; CHECK-NEXT: [[CMP_0:%.*]] = icmp sle i16 [[L_0]], 16383
; CHECK-NEXT: [[S_0:%.*]] = select i1 [[CMP_0]], i16 [[L_0]], i16 16383
; CHECK-NEXT: store i16 [[S_0]], i16* [[PTR]], align 2
; CHECK-NEXT: [[GEP_1:%.*]] = getelementptr inbounds i16, i16* [[PTR]], i16 1
; CHECK-NEXT: [[L_1:%.*]] = load i16, i16* [[GEP_1]], align 2
; CHECK-NEXT: [[CMP_1:%.*]] = icmp sle i16 [[L_1]], 16383
; CHECK-NEXT: [[S_1:%.*]] = select i1 [[CMP_1]], i16 [[L_1]], i16 16383
; CHECK-NEXT: store i16 [[S_1]], i16* [[GEP_1]], align 2
; CHECK-NEXT: [[GEP_1:%.*]] = getelementptr inbounds i16, i16* [[PTR:%.*]], i16 1
; CHECK-NEXT: [[GEP_2:%.*]] = getelementptr inbounds i16, i16* [[PTR]], i16 2
; CHECK-NEXT: [[L_2:%.*]] = load i16, i16* [[GEP_2]], align 2
; CHECK-NEXT: [[CMP_2:%.*]] = icmp sle i16 [[L_2]], 16383
; CHECK-NEXT: [[S_2:%.*]] = select i1 [[CMP_2]], i16 [[L_2]], i16 16383
; CHECK-NEXT: store i16 [[S_2]], i16* [[GEP_2]], align 2
; CHECK-NEXT: [[GEP_3:%.*]] = getelementptr inbounds i16, i16* [[PTR]], i16 3
; CHECK-NEXT: [[L_3:%.*]] = load i16, i16* [[GEP_3]], align 2
; CHECK-NEXT: [[CMP_3:%.*]] = icmp sle i16 [[L_3]], 16383
; CHECK-NEXT: [[S_3:%.*]] = select i1 [[CMP_3]], i16 [[L_3]], i16 16383
; CHECK-NEXT: store i16 [[S_3]], i16* [[GEP_3]], align 2
; CHECK-NEXT: [[GEP_4:%.*]] = getelementptr inbounds i16, i16* [[PTR]], i16 4
; CHECK-NEXT: [[L_4:%.*]] = load i16, i16* [[GEP_4]], align 2
; CHECK-NEXT: [[CMP_4:%.*]] = icmp sle i16 [[L_4]], 16383
; CHECK-NEXT: [[S_4:%.*]] = select i1 [[CMP_4]], i16 [[L_4]], i16 16383
; CHECK-NEXT: store i16 [[S_4]], i16* [[GEP_4]], align 2
; CHECK-NEXT: [[GEP_5:%.*]] = getelementptr inbounds i16, i16* [[PTR]], i16 5
; CHECK-NEXT: [[L_5:%.*]] = load i16, i16* [[GEP_5]], align 2
; CHECK-NEXT: [[CMP_5:%.*]] = icmp sle i16 [[L_5]], 16383
; CHECK-NEXT: [[S_5:%.*]] = select i1 [[CMP_5]], i16 [[L_5]], i16 16383
; CHECK-NEXT: store i16 [[S_5]], i16* [[GEP_5]], align 2
; CHECK-NEXT: [[GEP_6:%.*]] = getelementptr inbounds i16, i16* [[PTR]], i16 6
; CHECK-NEXT: [[L_6:%.*]] = load i16, i16* [[GEP_6]], align 2
; CHECK-NEXT: [[CMP_6:%.*]] = icmp sle i16 [[L_6]], 16383
; CHECK-NEXT: [[S_6:%.*]] = select i1 [[CMP_6]], i16 [[L_6]], i16 16383
; CHECK-NEXT: store i16 [[S_6]], i16* [[GEP_6]], align 2
; CHECK-NEXT: [[GEP_7:%.*]] = getelementptr inbounds i16, i16* [[PTR]], i16 7
; CHECK-NEXT: [[L_7:%.*]] = load i16, i16* [[GEP_7]], align 2
; CHECK-NEXT: [[CMP_7:%.*]] = icmp sle i16 [[L_7]], 16383
; CHECK-NEXT: [[S_7:%.*]] = select i1 [[CMP_7]], i16 [[L_7]], i16 16383
; CHECK-NEXT: store i16 [[S_7]], i16* [[GEP_7]], align 2
; CHECK-NEXT: [[TMP0:%.*]] = bitcast i16* [[PTR]] to <8 x i16>*
; CHECK-NEXT: [[TMP1:%.*]] = load <8 x i16>, <8 x i16>* [[TMP0]], align 2
; CHECK-NEXT: [[TMP2:%.*]] = icmp sle <8 x i16> [[TMP1]], <i16 16383, i16 16383, i16 16383, i16 16383, i16 16383, i16 16383, i16 16383, i16 16383>
; CHECK-NEXT: [[TMP3:%.*]] = select <8 x i1> [[TMP2]], <8 x i16> [[TMP1]], <8 x i16> <i16 16383, i16 16383, i16 16383, i16 16383, i16 16383, i16 16383, i16 16383, i16 16383>
; CHECK-NEXT: [[TMP4:%.*]] = bitcast i16* [[PTR]] to <8 x i16>*
; CHECK-NEXT: store <8 x i16> [[TMP3]], <8 x i16>* [[TMP4]], align 2
; CHECK-NEXT: ret void
;
entry:
@ -679,25 +545,15 @@ entry:
define void @select_smin_sle_4xi32(i32* %ptr, i32 %x) {
; CHECK-LABEL: @select_smin_sle_4xi32(
; CHECK-NEXT: entry:
; CHECK-NEXT: [[L_0:%.*]] = load i32, i32* [[PTR:%.*]], align 4
; CHECK-NEXT: [[CMP_0:%.*]] = icmp sle i32 [[L_0]], 16383
; CHECK-NEXT: [[S_0:%.*]] = select i1 [[CMP_0]], i32 [[L_0]], i32 16383
; CHECK-NEXT: store i32 [[S_0]], i32* [[PTR]], align 4
; CHECK-NEXT: [[GEP_1:%.*]] = getelementptr inbounds i32, i32* [[PTR]], i32 1
; CHECK-NEXT: [[L_1:%.*]] = load i32, i32* [[GEP_1]], align 4
; CHECK-NEXT: [[CMP_1:%.*]] = icmp sle i32 [[L_1]], 16383
; CHECK-NEXT: [[S_1:%.*]] = select i1 [[CMP_1]], i32 [[L_1]], i32 16383
; CHECK-NEXT: store i32 [[S_1]], i32* [[GEP_1]], align 4
; CHECK-NEXT: [[GEP_1:%.*]] = getelementptr inbounds i32, i32* [[PTR:%.*]], i32 1
; CHECK-NEXT: [[GEP_2:%.*]] = getelementptr inbounds i32, i32* [[PTR]], i32 2
; CHECK-NEXT: [[L_2:%.*]] = load i32, i32* [[GEP_2]], align 4
; CHECK-NEXT: [[CMP_2:%.*]] = icmp sle i32 [[L_2]], 16383
; CHECK-NEXT: [[S_2:%.*]] = select i1 [[CMP_2]], i32 [[L_2]], i32 16383
; CHECK-NEXT: store i32 [[S_2]], i32* [[GEP_2]], align 4
; CHECK-NEXT: [[GEP_3:%.*]] = getelementptr inbounds i32, i32* [[PTR]], i32 3
; CHECK-NEXT: [[L_3:%.*]] = load i32, i32* [[GEP_3]], align 4
; CHECK-NEXT: [[CMP_3:%.*]] = icmp sle i32 [[L_3]], 16383
; CHECK-NEXT: [[S_3:%.*]] = select i1 [[CMP_3]], i32 [[L_3]], i32 16383
; CHECK-NEXT: store i32 [[S_3]], i32* [[GEP_3]], align 4
; CHECK-NEXT: [[TMP0:%.*]] = bitcast i32* [[PTR]] to <4 x i32>*
; CHECK-NEXT: [[TMP1:%.*]] = load <4 x i32>, <4 x i32>* [[TMP0]], align 4
; CHECK-NEXT: [[TMP2:%.*]] = icmp sle <4 x i32> [[TMP1]], <i32 16383, i32 16383, i32 16383, i32 16383>
; CHECK-NEXT: [[TMP3:%.*]] = select <4 x i1> [[TMP2]], <4 x i32> [[TMP1]], <4 x i32> <i32 16383, i32 16383, i32 16383, i32 16383>
; CHECK-NEXT: [[TMP4:%.*]] = bitcast i32* [[PTR]] to <4 x i32>*
; CHECK-NEXT: store <4 x i32> [[TMP3]], <4 x i32>* [[TMP4]], align 4
; CHECK-NEXT: ret void
;
entry:
@ -759,45 +615,19 @@ entry:
define void @select_umax_8xi16(i16* %ptr, i16 %x) {
; CHECK-LABEL: @select_umax_8xi16(
; CHECK-NEXT: entry:
; CHECK-NEXT: [[L_0:%.*]] = load i16, i16* [[PTR:%.*]], align 2
; CHECK-NEXT: [[CMP_0:%.*]] = icmp ugt i16 [[L_0]], 16383
; CHECK-NEXT: [[S_0:%.*]] = select i1 [[CMP_0]], i16 [[L_0]], i16 16383
; CHECK-NEXT: store i16 [[S_0]], i16* [[PTR]], align 2
; CHECK-NEXT: [[GEP_1:%.*]] = getelementptr inbounds i16, i16* [[PTR]], i16 1
; CHECK-NEXT: [[L_1:%.*]] = load i16, i16* [[GEP_1]], align 2
; CHECK-NEXT: [[CMP_1:%.*]] = icmp ugt i16 [[L_1]], 16383
; CHECK-NEXT: [[S_1:%.*]] = select i1 [[CMP_1]], i16 [[L_1]], i16 16383
; CHECK-NEXT: store i16 [[S_1]], i16* [[GEP_1]], align 2
; CHECK-NEXT: [[GEP_1:%.*]] = getelementptr inbounds i16, i16* [[PTR:%.*]], i16 1
; CHECK-NEXT: [[GEP_2:%.*]] = getelementptr inbounds i16, i16* [[PTR]], i16 2
; CHECK-NEXT: [[L_2:%.*]] = load i16, i16* [[GEP_2]], align 2
; CHECK-NEXT: [[CMP_2:%.*]] = icmp ugt i16 [[L_2]], 16383
; CHECK-NEXT: [[S_2:%.*]] = select i1 [[CMP_2]], i16 [[L_2]], i16 16383
; CHECK-NEXT: store i16 [[S_2]], i16* [[GEP_2]], align 2
; CHECK-NEXT: [[GEP_3:%.*]] = getelementptr inbounds i16, i16* [[PTR]], i16 3
; CHECK-NEXT: [[L_3:%.*]] = load i16, i16* [[GEP_3]], align 2
; CHECK-NEXT: [[CMP_3:%.*]] = icmp ugt i16 [[L_3]], 16383
; CHECK-NEXT: [[S_3:%.*]] = select i1 [[CMP_3]], i16 [[L_3]], i16 16383
; CHECK-NEXT: store i16 [[S_3]], i16* [[GEP_3]], align 2
; CHECK-NEXT: [[GEP_4:%.*]] = getelementptr inbounds i16, i16* [[PTR]], i16 4
; CHECK-NEXT: [[L_4:%.*]] = load i16, i16* [[GEP_4]], align 2
; CHECK-NEXT: [[CMP_4:%.*]] = icmp ugt i16 [[L_4]], 16383
; CHECK-NEXT: [[S_4:%.*]] = select i1 [[CMP_4]], i16 [[L_4]], i16 16383
; CHECK-NEXT: store i16 [[S_4]], i16* [[GEP_4]], align 2
; CHECK-NEXT: [[GEP_5:%.*]] = getelementptr inbounds i16, i16* [[PTR]], i16 5
; CHECK-NEXT: [[L_5:%.*]] = load i16, i16* [[GEP_5]], align 2
; CHECK-NEXT: [[CMP_5:%.*]] = icmp ugt i16 [[L_5]], 16383
; CHECK-NEXT: [[S_5:%.*]] = select i1 [[CMP_5]], i16 [[L_5]], i16 16383
; CHECK-NEXT: store i16 [[S_5]], i16* [[GEP_5]], align 2
; CHECK-NEXT: [[GEP_6:%.*]] = getelementptr inbounds i16, i16* [[PTR]], i16 6
; CHECK-NEXT: [[L_6:%.*]] = load i16, i16* [[GEP_6]], align 2
; CHECK-NEXT: [[CMP_6:%.*]] = icmp ugt i16 [[L_6]], 16383
; CHECK-NEXT: [[S_6:%.*]] = select i1 [[CMP_6]], i16 [[L_6]], i16 16383
; CHECK-NEXT: store i16 [[S_6]], i16* [[GEP_6]], align 2
; CHECK-NEXT: [[GEP_7:%.*]] = getelementptr inbounds i16, i16* [[PTR]], i16 7
; CHECK-NEXT: [[L_7:%.*]] = load i16, i16* [[GEP_7]], align 2
; CHECK-NEXT: [[CMP_7:%.*]] = icmp ugt i16 [[L_7]], 16383
; CHECK-NEXT: [[S_7:%.*]] = select i1 [[CMP_7]], i16 [[L_7]], i16 16383
; CHECK-NEXT: store i16 [[S_7]], i16* [[GEP_7]], align 2
; CHECK-NEXT: [[TMP0:%.*]] = bitcast i16* [[PTR]] to <8 x i16>*
; CHECK-NEXT: [[TMP1:%.*]] = load <8 x i16>, <8 x i16>* [[TMP0]], align 2
; CHECK-NEXT: [[TMP2:%.*]] = icmp ugt <8 x i16> [[TMP1]], <i16 16383, i16 16383, i16 16383, i16 16383, i16 16383, i16 16383, i16 16383, i16 16383>
; CHECK-NEXT: [[TMP3:%.*]] = select <8 x i1> [[TMP2]], <8 x i16> [[TMP1]], <8 x i16> <i16 16383, i16 16383, i16 16383, i16 16383, i16 16383, i16 16383, i16 16383, i16 16383>
; CHECK-NEXT: [[TMP4:%.*]] = bitcast i16* [[PTR]] to <8 x i16>*
; CHECK-NEXT: store <8 x i16> [[TMP3]], <8 x i16>* [[TMP4]], align 2
; CHECK-NEXT: ret void
;
entry:
@ -853,25 +683,15 @@ entry:
define void @select_umax_4xi32(i32* %ptr, i32 %x) {
; CHECK-LABEL: @select_umax_4xi32(
; CHECK-NEXT: entry:
; CHECK-NEXT: [[L_0:%.*]] = load i32, i32* [[PTR:%.*]], align 4
; CHECK-NEXT: [[CMP_0:%.*]] = icmp ugt i32 [[L_0]], 16383
; CHECK-NEXT: [[S_0:%.*]] = select i1 [[CMP_0]], i32 [[L_0]], i32 16383
; CHECK-NEXT: store i32 [[S_0]], i32* [[PTR]], align 4
; CHECK-NEXT: [[GEP_1:%.*]] = getelementptr inbounds i32, i32* [[PTR]], i32 1
; CHECK-NEXT: [[L_1:%.*]] = load i32, i32* [[GEP_1]], align 4
; CHECK-NEXT: [[CMP_1:%.*]] = icmp ugt i32 [[L_1]], 16383
; CHECK-NEXT: [[S_1:%.*]] = select i1 [[CMP_1]], i32 [[L_1]], i32 16383
; CHECK-NEXT: store i32 [[S_1]], i32* [[GEP_1]], align 4
; CHECK-NEXT: [[GEP_1:%.*]] = getelementptr inbounds i32, i32* [[PTR:%.*]], i32 1
; CHECK-NEXT: [[GEP_2:%.*]] = getelementptr inbounds i32, i32* [[PTR]], i32 2
; CHECK-NEXT: [[L_2:%.*]] = load i32, i32* [[GEP_2]], align 4
; CHECK-NEXT: [[CMP_2:%.*]] = icmp ugt i32 [[L_2]], 16383
; CHECK-NEXT: [[S_2:%.*]] = select i1 [[CMP_2]], i32 [[L_2]], i32 16383
; CHECK-NEXT: store i32 [[S_2]], i32* [[GEP_2]], align 4
; CHECK-NEXT: [[GEP_3:%.*]] = getelementptr inbounds i32, i32* [[PTR]], i32 3
; CHECK-NEXT: [[L_3:%.*]] = load i32, i32* [[GEP_3]], align 4
; CHECK-NEXT: [[CMP_3:%.*]] = icmp ugt i32 [[L_3]], 16383
; CHECK-NEXT: [[S_3:%.*]] = select i1 [[CMP_3]], i32 [[L_3]], i32 16383
; CHECK-NEXT: store i32 [[S_3]], i32* [[GEP_3]], align 4
; CHECK-NEXT: [[TMP0:%.*]] = bitcast i32* [[PTR]] to <4 x i32>*
; CHECK-NEXT: [[TMP1:%.*]] = load <4 x i32>, <4 x i32>* [[TMP0]], align 4
; CHECK-NEXT: [[TMP2:%.*]] = icmp ugt <4 x i32> [[TMP1]], <i32 16383, i32 16383, i32 16383, i32 16383>
; CHECK-NEXT: [[TMP3:%.*]] = select <4 x i1> [[TMP2]], <4 x i32> [[TMP1]], <4 x i32> <i32 16383, i32 16383, i32 16383, i32 16383>
; CHECK-NEXT: [[TMP4:%.*]] = bitcast i32* [[PTR]] to <4 x i32>*
; CHECK-NEXT: store <4 x i32> [[TMP3]], <4 x i32>* [[TMP4]], align 4
; CHECK-NEXT: ret void
;
entry:
@ -934,45 +754,19 @@ entry:
define void @select_umax_uge_8xi16(i16* %ptr, i16 %x) {
; CHECK-LABEL: @select_umax_uge_8xi16(
; CHECK-NEXT: entry:
; CHECK-NEXT: [[L_0:%.*]] = load i16, i16* [[PTR:%.*]], align 2
; CHECK-NEXT: [[CMP_0:%.*]] = icmp uge i16 [[L_0]], 16383
; CHECK-NEXT: [[S_0:%.*]] = select i1 [[CMP_0]], i16 [[L_0]], i16 16383
; CHECK-NEXT: store i16 [[S_0]], i16* [[PTR]], align 2
; CHECK-NEXT: [[GEP_1:%.*]] = getelementptr inbounds i16, i16* [[PTR]], i16 1
; CHECK-NEXT: [[L_1:%.*]] = load i16, i16* [[GEP_1]], align 2
; CHECK-NEXT: [[CMP_1:%.*]] = icmp uge i16 [[L_1]], 16383
; CHECK-NEXT: [[S_1:%.*]] = select i1 [[CMP_1]], i16 [[L_1]], i16 16383
; CHECK-NEXT: store i16 [[S_1]], i16* [[GEP_1]], align 2
; CHECK-NEXT: [[GEP_1:%.*]] = getelementptr inbounds i16, i16* [[PTR:%.*]], i16 1
; CHECK-NEXT: [[GEP_2:%.*]] = getelementptr inbounds i16, i16* [[PTR]], i16 2
; CHECK-NEXT: [[L_2:%.*]] = load i16, i16* [[GEP_2]], align 2
; CHECK-NEXT: [[CMP_2:%.*]] = icmp uge i16 [[L_2]], 16383
; CHECK-NEXT: [[S_2:%.*]] = select i1 [[CMP_2]], i16 [[L_2]], i16 16383
; CHECK-NEXT: store i16 [[S_2]], i16* [[GEP_2]], align 2
; CHECK-NEXT: [[GEP_3:%.*]] = getelementptr inbounds i16, i16* [[PTR]], i16 3
; CHECK-NEXT: [[L_3:%.*]] = load i16, i16* [[GEP_3]], align 2
; CHECK-NEXT: [[CMP_3:%.*]] = icmp uge i16 [[L_3]], 16383
; CHECK-NEXT: [[S_3:%.*]] = select i1 [[CMP_3]], i16 [[L_3]], i16 16383
; CHECK-NEXT: store i16 [[S_3]], i16* [[GEP_3]], align 2
; CHECK-NEXT: [[GEP_4:%.*]] = getelementptr inbounds i16, i16* [[PTR]], i16 4
; CHECK-NEXT: [[L_4:%.*]] = load i16, i16* [[GEP_4]], align 2
; CHECK-NEXT: [[CMP_4:%.*]] = icmp uge i16 [[L_4]], 16383
; CHECK-NEXT: [[S_4:%.*]] = select i1 [[CMP_4]], i16 [[L_4]], i16 16383
; CHECK-NEXT: store i16 [[S_4]], i16* [[GEP_4]], align 2
; CHECK-NEXT: [[GEP_5:%.*]] = getelementptr inbounds i16, i16* [[PTR]], i16 5
; CHECK-NEXT: [[L_5:%.*]] = load i16, i16* [[GEP_5]], align 2
; CHECK-NEXT: [[CMP_5:%.*]] = icmp uge i16 [[L_5]], 16383
; CHECK-NEXT: [[S_5:%.*]] = select i1 [[CMP_5]], i16 [[L_5]], i16 16383
; CHECK-NEXT: store i16 [[S_5]], i16* [[GEP_5]], align 2
; CHECK-NEXT: [[GEP_6:%.*]] = getelementptr inbounds i16, i16* [[PTR]], i16 6
; CHECK-NEXT: [[L_6:%.*]] = load i16, i16* [[GEP_6]], align 2
; CHECK-NEXT: [[CMP_6:%.*]] = icmp uge i16 [[L_6]], 16383
; CHECK-NEXT: [[S_6:%.*]] = select i1 [[CMP_6]], i16 [[L_6]], i16 16383
; CHECK-NEXT: store i16 [[S_6]], i16* [[GEP_6]], align 2
; CHECK-NEXT: [[GEP_7:%.*]] = getelementptr inbounds i16, i16* [[PTR]], i16 7
; CHECK-NEXT: [[L_7:%.*]] = load i16, i16* [[GEP_7]], align 2
; CHECK-NEXT: [[CMP_7:%.*]] = icmp uge i16 [[L_7]], 16383
; CHECK-NEXT: [[S_7:%.*]] = select i1 [[CMP_7]], i16 [[L_7]], i16 16383
; CHECK-NEXT: store i16 [[S_7]], i16* [[GEP_7]], align 2
; CHECK-NEXT: [[TMP0:%.*]] = bitcast i16* [[PTR]] to <8 x i16>*
; CHECK-NEXT: [[TMP1:%.*]] = load <8 x i16>, <8 x i16>* [[TMP0]], align 2
; CHECK-NEXT: [[TMP2:%.*]] = icmp uge <8 x i16> [[TMP1]], <i16 16383, i16 16383, i16 16383, i16 16383, i16 16383, i16 16383, i16 16383, i16 16383>
; CHECK-NEXT: [[TMP3:%.*]] = select <8 x i1> [[TMP2]], <8 x i16> [[TMP1]], <8 x i16> <i16 16383, i16 16383, i16 16383, i16 16383, i16 16383, i16 16383, i16 16383, i16 16383>
; CHECK-NEXT: [[TMP4:%.*]] = bitcast i16* [[PTR]] to <8 x i16>*
; CHECK-NEXT: store <8 x i16> [[TMP3]], <8 x i16>* [[TMP4]], align 2
; CHECK-NEXT: ret void
;
entry:
@ -1028,25 +822,15 @@ entry:
define void @select_umax_uge_4xi32(i32* %ptr, i32 %x) {
; CHECK-LABEL: @select_umax_uge_4xi32(
; CHECK-NEXT: entry:
; CHECK-NEXT: [[L_0:%.*]] = load i32, i32* [[PTR:%.*]], align 4
; CHECK-NEXT: [[CMP_0:%.*]] = icmp uge i32 [[L_0]], 16383
; CHECK-NEXT: [[S_0:%.*]] = select i1 [[CMP_0]], i32 [[L_0]], i32 16383
; CHECK-NEXT: store i32 [[S_0]], i32* [[PTR]], align 4
; CHECK-NEXT: [[GEP_1:%.*]] = getelementptr inbounds i32, i32* [[PTR]], i32 1
; CHECK-NEXT: [[L_1:%.*]] = load i32, i32* [[GEP_1]], align 4
; CHECK-NEXT: [[CMP_1:%.*]] = icmp uge i32 [[L_1]], 16383
; CHECK-NEXT: [[S_1:%.*]] = select i1 [[CMP_1]], i32 [[L_1]], i32 16383
; CHECK-NEXT: store i32 [[S_1]], i32* [[GEP_1]], align 4
; CHECK-NEXT: [[GEP_1:%.*]] = getelementptr inbounds i32, i32* [[PTR:%.*]], i32 1
; CHECK-NEXT: [[GEP_2:%.*]] = getelementptr inbounds i32, i32* [[PTR]], i32 2
; CHECK-NEXT: [[L_2:%.*]] = load i32, i32* [[GEP_2]], align 4
; CHECK-NEXT: [[CMP_2:%.*]] = icmp uge i32 [[L_2]], 16383
; CHECK-NEXT: [[S_2:%.*]] = select i1 [[CMP_2]], i32 [[L_2]], i32 16383
; CHECK-NEXT: store i32 [[S_2]], i32* [[GEP_2]], align 4
; CHECK-NEXT: [[GEP_3:%.*]] = getelementptr inbounds i32, i32* [[PTR]], i32 3
; CHECK-NEXT: [[L_3:%.*]] = load i32, i32* [[GEP_3]], align 4
; CHECK-NEXT: [[CMP_3:%.*]] = icmp uge i32 [[L_3]], 16383
; CHECK-NEXT: [[S_3:%.*]] = select i1 [[CMP_3]], i32 [[L_3]], i32 16383
; CHECK-NEXT: store i32 [[S_3]], i32* [[GEP_3]], align 4
; CHECK-NEXT: [[TMP0:%.*]] = bitcast i32* [[PTR]] to <4 x i32>*
; CHECK-NEXT: [[TMP1:%.*]] = load <4 x i32>, <4 x i32>* [[TMP0]], align 4
; CHECK-NEXT: [[TMP2:%.*]] = icmp uge <4 x i32> [[TMP1]], <i32 16383, i32 16383, i32 16383, i32 16383>
; CHECK-NEXT: [[TMP3:%.*]] = select <4 x i1> [[TMP2]], <4 x i32> [[TMP1]], <4 x i32> <i32 16383, i32 16383, i32 16383, i32 16383>
; CHECK-NEXT: [[TMP4:%.*]] = bitcast i32* [[PTR]] to <4 x i32>*
; CHECK-NEXT: store <4 x i32> [[TMP3]], <4 x i32>* [[TMP4]], align 4
; CHECK-NEXT: ret void
;
entry:
@ -1109,45 +893,19 @@ entry:
define void @select_smax_8xi16(i16* %ptr, i16 %x) {
; CHECK-LABEL: @select_smax_8xi16(
; CHECK-NEXT: entry:
; CHECK-NEXT: [[L_0:%.*]] = load i16, i16* [[PTR:%.*]], align 2
; CHECK-NEXT: [[CMP_0:%.*]] = icmp sgt i16 [[L_0]], 16383
; CHECK-NEXT: [[S_0:%.*]] = select i1 [[CMP_0]], i16 [[L_0]], i16 16383
; CHECK-NEXT: store i16 [[S_0]], i16* [[PTR]], align 2
; CHECK-NEXT: [[GEP_1:%.*]] = getelementptr inbounds i16, i16* [[PTR]], i16 1
; CHECK-NEXT: [[L_1:%.*]] = load i16, i16* [[GEP_1]], align 2
; CHECK-NEXT: [[CMP_1:%.*]] = icmp sgt i16 [[L_1]], 16383
; CHECK-NEXT: [[S_1:%.*]] = select i1 [[CMP_1]], i16 [[L_1]], i16 16383
; CHECK-NEXT: store i16 [[S_1]], i16* [[GEP_1]], align 2
; CHECK-NEXT: [[GEP_1:%.*]] = getelementptr inbounds i16, i16* [[PTR:%.*]], i16 1
; CHECK-NEXT: [[GEP_2:%.*]] = getelementptr inbounds i16, i16* [[PTR]], i16 2
; CHECK-NEXT: [[L_2:%.*]] = load i16, i16* [[GEP_2]], align 2
; CHECK-NEXT: [[CMP_2:%.*]] = icmp sgt i16 [[L_2]], 16383
; CHECK-NEXT: [[S_2:%.*]] = select i1 [[CMP_2]], i16 [[L_2]], i16 16383
; CHECK-NEXT: store i16 [[S_2]], i16* [[GEP_2]], align 2
; CHECK-NEXT: [[GEP_3:%.*]] = getelementptr inbounds i16, i16* [[PTR]], i16 3
; CHECK-NEXT: [[L_3:%.*]] = load i16, i16* [[GEP_3]], align 2
; CHECK-NEXT: [[CMP_3:%.*]] = icmp sgt i16 [[L_3]], 16383
; CHECK-NEXT: [[S_3:%.*]] = select i1 [[CMP_3]], i16 [[L_3]], i16 16383
; CHECK-NEXT: store i16 [[S_3]], i16* [[GEP_3]], align 2
; CHECK-NEXT: [[GEP_4:%.*]] = getelementptr inbounds i16, i16* [[PTR]], i16 4
; CHECK-NEXT: [[L_4:%.*]] = load i16, i16* [[GEP_4]], align 2
; CHECK-NEXT: [[CMP_4:%.*]] = icmp sgt i16 [[L_4]], 16383
; CHECK-NEXT: [[S_4:%.*]] = select i1 [[CMP_4]], i16 [[L_4]], i16 16383
; CHECK-NEXT: store i16 [[S_4]], i16* [[GEP_4]], align 2
; CHECK-NEXT: [[GEP_5:%.*]] = getelementptr inbounds i16, i16* [[PTR]], i16 5
; CHECK-NEXT: [[L_5:%.*]] = load i16, i16* [[GEP_5]], align 2
; CHECK-NEXT: [[CMP_5:%.*]] = icmp sgt i16 [[L_5]], 16383
; CHECK-NEXT: [[S_5:%.*]] = select i1 [[CMP_5]], i16 [[L_5]], i16 16383
; CHECK-NEXT: store i16 [[S_5]], i16* [[GEP_5]], align 2
; CHECK-NEXT: [[GEP_6:%.*]] = getelementptr inbounds i16, i16* [[PTR]], i16 6
; CHECK-NEXT: [[L_6:%.*]] = load i16, i16* [[GEP_6]], align 2
; CHECK-NEXT: [[CMP_6:%.*]] = icmp sgt i16 [[L_6]], 16383
; CHECK-NEXT: [[S_6:%.*]] = select i1 [[CMP_6]], i16 [[L_6]], i16 16383
; CHECK-NEXT: store i16 [[S_6]], i16* [[GEP_6]], align 2
; CHECK-NEXT: [[GEP_7:%.*]] = getelementptr inbounds i16, i16* [[PTR]], i16 7
; CHECK-NEXT: [[L_7:%.*]] = load i16, i16* [[GEP_7]], align 2
; CHECK-NEXT: [[CMP_7:%.*]] = icmp sgt i16 [[L_7]], 16383
; CHECK-NEXT: [[S_7:%.*]] = select i1 [[CMP_7]], i16 [[L_7]], i16 16383
; CHECK-NEXT: store i16 [[S_7]], i16* [[GEP_7]], align 2
; CHECK-NEXT: [[TMP0:%.*]] = bitcast i16* [[PTR]] to <8 x i16>*
; CHECK-NEXT: [[TMP1:%.*]] = load <8 x i16>, <8 x i16>* [[TMP0]], align 2
; CHECK-NEXT: [[TMP2:%.*]] = icmp sgt <8 x i16> [[TMP1]], <i16 16383, i16 16383, i16 16383, i16 16383, i16 16383, i16 16383, i16 16383, i16 16383>
; CHECK-NEXT: [[TMP3:%.*]] = select <8 x i1> [[TMP2]], <8 x i16> [[TMP1]], <8 x i16> <i16 16383, i16 16383, i16 16383, i16 16383, i16 16383, i16 16383, i16 16383, i16 16383>
; CHECK-NEXT: [[TMP4:%.*]] = bitcast i16* [[PTR]] to <8 x i16>*
; CHECK-NEXT: store <8 x i16> [[TMP3]], <8 x i16>* [[TMP4]], align 2
; CHECK-NEXT: ret void
;
entry:
@ -1203,25 +961,15 @@ entry:
define void @select_smax_4xi32(i32* %ptr, i32 %x) {
; CHECK-LABEL: @select_smax_4xi32(
; CHECK-NEXT: entry:
; CHECK-NEXT: [[L_0:%.*]] = load i32, i32* [[PTR:%.*]], align 4
; CHECK-NEXT: [[CMP_0:%.*]] = icmp sgt i32 [[L_0]], 16383
; CHECK-NEXT: [[S_0:%.*]] = select i1 [[CMP_0]], i32 [[L_0]], i32 16383
; CHECK-NEXT: store i32 [[S_0]], i32* [[PTR]], align 4
; CHECK-NEXT: [[GEP_1:%.*]] = getelementptr inbounds i32, i32* [[PTR]], i32 1
; CHECK-NEXT: [[L_1:%.*]] = load i32, i32* [[GEP_1]], align 4
; CHECK-NEXT: [[CMP_1:%.*]] = icmp sgt i32 [[L_1]], 16383
; CHECK-NEXT: [[S_1:%.*]] = select i1 [[CMP_1]], i32 [[L_1]], i32 16383
; CHECK-NEXT: store i32 [[S_1]], i32* [[GEP_1]], align 4
; CHECK-NEXT: [[GEP_1:%.*]] = getelementptr inbounds i32, i32* [[PTR:%.*]], i32 1
; CHECK-NEXT: [[GEP_2:%.*]] = getelementptr inbounds i32, i32* [[PTR]], i32 2
; CHECK-NEXT: [[L_2:%.*]] = load i32, i32* [[GEP_2]], align 4
; CHECK-NEXT: [[CMP_2:%.*]] = icmp sgt i32 [[L_2]], 16383
; CHECK-NEXT: [[S_2:%.*]] = select i1 [[CMP_2]], i32 [[L_2]], i32 16383
; CHECK-NEXT: store i32 [[S_2]], i32* [[GEP_2]], align 4
; CHECK-NEXT: [[GEP_3:%.*]] = getelementptr inbounds i32, i32* [[PTR]], i32 3
; CHECK-NEXT: [[L_3:%.*]] = load i32, i32* [[GEP_3]], align 4
; CHECK-NEXT: [[CMP_3:%.*]] = icmp sgt i32 [[L_3]], 16383
; CHECK-NEXT: [[S_3:%.*]] = select i1 [[CMP_3]], i32 [[L_3]], i32 16383
; CHECK-NEXT: store i32 [[S_3]], i32* [[GEP_3]], align 4
; CHECK-NEXT: [[TMP0:%.*]] = bitcast i32* [[PTR]] to <4 x i32>*
; CHECK-NEXT: [[TMP1:%.*]] = load <4 x i32>, <4 x i32>* [[TMP0]], align 4
; CHECK-NEXT: [[TMP2:%.*]] = icmp sgt <4 x i32> [[TMP1]], <i32 16383, i32 16383, i32 16383, i32 16383>
; CHECK-NEXT: [[TMP3:%.*]] = select <4 x i1> [[TMP2]], <4 x i32> [[TMP1]], <4 x i32> <i32 16383, i32 16383, i32 16383, i32 16383>
; CHECK-NEXT: [[TMP4:%.*]] = bitcast i32* [[PTR]] to <4 x i32>*
; CHECK-NEXT: store <4 x i32> [[TMP3]], <4 x i32>* [[TMP4]], align 4
; CHECK-NEXT: ret void
;
entry:
@ -1285,45 +1033,19 @@ entry:
define void @select_smax_sge_8xi16(i16* %ptr, i16 %x) {
; CHECK-LABEL: @select_smax_sge_8xi16(
; CHECK-NEXT: entry:
; CHECK-NEXT: [[L_0:%.*]] = load i16, i16* [[PTR:%.*]], align 2
; CHECK-NEXT: [[CMP_0:%.*]] = icmp sge i16 [[L_0]], 16383
; CHECK-NEXT: [[S_0:%.*]] = select i1 [[CMP_0]], i16 [[L_0]], i16 16383
; CHECK-NEXT: store i16 [[S_0]], i16* [[PTR]], align 2
; CHECK-NEXT: [[GEP_1:%.*]] = getelementptr inbounds i16, i16* [[PTR]], i16 1
; CHECK-NEXT: [[L_1:%.*]] = load i16, i16* [[GEP_1]], align 2
; CHECK-NEXT: [[CMP_1:%.*]] = icmp sge i16 [[L_1]], 16383
; CHECK-NEXT: [[S_1:%.*]] = select i1 [[CMP_1]], i16 [[L_1]], i16 16383
; CHECK-NEXT: store i16 [[S_1]], i16* [[GEP_1]], align 2
; CHECK-NEXT: [[GEP_1:%.*]] = getelementptr inbounds i16, i16* [[PTR:%.*]], i16 1
; CHECK-NEXT: [[GEP_2:%.*]] = getelementptr inbounds i16, i16* [[PTR]], i16 2
; CHECK-NEXT: [[L_2:%.*]] = load i16, i16* [[GEP_2]], align 2
; CHECK-NEXT: [[CMP_2:%.*]] = icmp sge i16 [[L_2]], 16383
; CHECK-NEXT: [[S_2:%.*]] = select i1 [[CMP_2]], i16 [[L_2]], i16 16383
; CHECK-NEXT: store i16 [[S_2]], i16* [[GEP_2]], align 2
; CHECK-NEXT: [[GEP_3:%.*]] = getelementptr inbounds i16, i16* [[PTR]], i16 3
; CHECK-NEXT: [[L_3:%.*]] = load i16, i16* [[GEP_3]], align 2
; CHECK-NEXT: [[CMP_3:%.*]] = icmp sge i16 [[L_3]], 16383
; CHECK-NEXT: [[S_3:%.*]] = select i1 [[CMP_3]], i16 [[L_3]], i16 16383
; CHECK-NEXT: store i16 [[S_3]], i16* [[GEP_3]], align 2
; CHECK-NEXT: [[GEP_4:%.*]] = getelementptr inbounds i16, i16* [[PTR]], i16 4
; CHECK-NEXT: [[L_4:%.*]] = load i16, i16* [[GEP_4]], align 2
; CHECK-NEXT: [[CMP_4:%.*]] = icmp sge i16 [[L_4]], 16383
; CHECK-NEXT: [[S_4:%.*]] = select i1 [[CMP_4]], i16 [[L_4]], i16 16383
; CHECK-NEXT: store i16 [[S_4]], i16* [[GEP_4]], align 2
; CHECK-NEXT: [[GEP_5:%.*]] = getelementptr inbounds i16, i16* [[PTR]], i16 5
; CHECK-NEXT: [[L_5:%.*]] = load i16, i16* [[GEP_5]], align 2
; CHECK-NEXT: [[CMP_5:%.*]] = icmp sge i16 [[L_5]], 16383
; CHECK-NEXT: [[S_5:%.*]] = select i1 [[CMP_5]], i16 [[L_5]], i16 16383
; CHECK-NEXT: store i16 [[S_5]], i16* [[GEP_5]], align 2
; CHECK-NEXT: [[GEP_6:%.*]] = getelementptr inbounds i16, i16* [[PTR]], i16 6
; CHECK-NEXT: [[L_6:%.*]] = load i16, i16* [[GEP_6]], align 2
; CHECK-NEXT: [[CMP_6:%.*]] = icmp sge i16 [[L_6]], 16383
; CHECK-NEXT: [[S_6:%.*]] = select i1 [[CMP_6]], i16 [[L_6]], i16 16383
; CHECK-NEXT: store i16 [[S_6]], i16* [[GEP_6]], align 2
; CHECK-NEXT: [[GEP_7:%.*]] = getelementptr inbounds i16, i16* [[PTR]], i16 7
; CHECK-NEXT: [[L_7:%.*]] = load i16, i16* [[GEP_7]], align 2
; CHECK-NEXT: [[CMP_7:%.*]] = icmp sge i16 [[L_7]], 16383
; CHECK-NEXT: [[S_7:%.*]] = select i1 [[CMP_7]], i16 [[L_7]], i16 16383
; CHECK-NEXT: store i16 [[S_7]], i16* [[GEP_7]], align 2
; CHECK-NEXT: [[TMP0:%.*]] = bitcast i16* [[PTR]] to <8 x i16>*
; CHECK-NEXT: [[TMP1:%.*]] = load <8 x i16>, <8 x i16>* [[TMP0]], align 2
; CHECK-NEXT: [[TMP2:%.*]] = icmp sge <8 x i16> [[TMP1]], <i16 16383, i16 16383, i16 16383, i16 16383, i16 16383, i16 16383, i16 16383, i16 16383>
; CHECK-NEXT: [[TMP3:%.*]] = select <8 x i1> [[TMP2]], <8 x i16> [[TMP1]], <8 x i16> <i16 16383, i16 16383, i16 16383, i16 16383, i16 16383, i16 16383, i16 16383, i16 16383>
; CHECK-NEXT: [[TMP4:%.*]] = bitcast i16* [[PTR]] to <8 x i16>*
; CHECK-NEXT: store <8 x i16> [[TMP3]], <8 x i16>* [[TMP4]], align 2
; CHECK-NEXT: ret void
;
entry:
@ -1379,25 +1101,15 @@ entry:
define void @select_smax_sge_4xi32(i32* %ptr, i32 %x) {
; CHECK-LABEL: @select_smax_sge_4xi32(
; CHECK-NEXT: entry:
; CHECK-NEXT: [[L_0:%.*]] = load i32, i32* [[PTR:%.*]], align 4
; CHECK-NEXT: [[CMP_0:%.*]] = icmp sge i32 [[L_0]], 16383
; CHECK-NEXT: [[S_0:%.*]] = select i1 [[CMP_0]], i32 [[L_0]], i32 16383
; CHECK-NEXT: store i32 [[S_0]], i32* [[PTR]], align 4
; CHECK-NEXT: [[GEP_1:%.*]] = getelementptr inbounds i32, i32* [[PTR]], i32 1
; CHECK-NEXT: [[L_1:%.*]] = load i32, i32* [[GEP_1]], align 4
; CHECK-NEXT: [[CMP_1:%.*]] = icmp sge i32 [[L_1]], 16383
; CHECK-NEXT: [[S_1:%.*]] = select i1 [[CMP_1]], i32 [[L_1]], i32 16383
; CHECK-NEXT: store i32 [[S_1]], i32* [[GEP_1]], align 4
; CHECK-NEXT: [[GEP_1:%.*]] = getelementptr inbounds i32, i32* [[PTR:%.*]], i32 1
; CHECK-NEXT: [[GEP_2:%.*]] = getelementptr inbounds i32, i32* [[PTR]], i32 2
; CHECK-NEXT: [[L_2:%.*]] = load i32, i32* [[GEP_2]], align 4
; CHECK-NEXT: [[CMP_2:%.*]] = icmp sge i32 [[L_2]], 16383
; CHECK-NEXT: [[S_2:%.*]] = select i1 [[CMP_2]], i32 [[L_2]], i32 16383
; CHECK-NEXT: store i32 [[S_2]], i32* [[GEP_2]], align 4
; CHECK-NEXT: [[GEP_3:%.*]] = getelementptr inbounds i32, i32* [[PTR]], i32 3
; CHECK-NEXT: [[L_3:%.*]] = load i32, i32* [[GEP_3]], align 4
; CHECK-NEXT: [[CMP_3:%.*]] = icmp sge i32 [[L_3]], 16383
; CHECK-NEXT: [[S_3:%.*]] = select i1 [[CMP_3]], i32 [[L_3]], i32 16383
; CHECK-NEXT: store i32 [[S_3]], i32* [[GEP_3]], align 4
; CHECK-NEXT: [[TMP0:%.*]] = bitcast i32* [[PTR]] to <4 x i32>*
; CHECK-NEXT: [[TMP1:%.*]] = load <4 x i32>, <4 x i32>* [[TMP0]], align 4
; CHECK-NEXT: [[TMP2:%.*]] = icmp sge <4 x i32> [[TMP1]], <i32 16383, i32 16383, i32 16383, i32 16383>
; CHECK-NEXT: [[TMP3:%.*]] = select <4 x i1> [[TMP2]], <4 x i32> [[TMP1]], <4 x i32> <i32 16383, i32 16383, i32 16383, i32 16383>
; CHECK-NEXT: [[TMP4:%.*]] = bitcast i32* [[PTR]] to <4 x i32>*
; CHECK-NEXT: store <4 x i32> [[TMP3]], <4 x i32>* [[TMP4]], align 4
; CHECK-NEXT: ret void
;
entry:

View File

@ -0,0 +1,45 @@
; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
; RUN: opt -slp-vectorizer -S %s | FileCheck %s
target datalayout = "e-m:e-p:32:32-p270:32:32-p271:32:32-p272:64:64-f64:32:64-f80:32-n8:16:32-S128"
target triple = "i386-unknown-linux-gnu"
; Make sure we do not crash while computing the cost for @test.
define i1 @test(float* %p1, float* %p2, i8* %p3, i1 %c) #0 {
; CHECK-LABEL: @test(
; CHECK-NEXT: [[SCEVGEP21:%.*]] = getelementptr float, float* [[P1:%.*]], i32 0
; CHECK-NEXT: [[L0:%.*]] = icmp ult float* [[P2:%.*]], [[SCEVGEP21]]
; CHECK-NEXT: [[UMIN:%.*]] = select i1 [[L0]], float* [[P2]], float* [[SCEVGEP21]]
; CHECK-NEXT: [[UMIN22:%.*]] = bitcast float* [[UMIN]] to i8*
; CHECK-NEXT: [[SCEVGEP31:%.*]] = getelementptr float, float* [[P1]], i32 1
; CHECK-NEXT: [[L1:%.*]] = icmp ult float* [[SCEVGEP31]], [[P2]]
; CHECK-NEXT: [[UMIN33:%.*]] = select i1 [[L1]], float* [[SCEVGEP31]], float* [[P2]]
; CHECK-NEXT: [[UMIN3334:%.*]] = bitcast float* [[UMIN33]] to i8*
; CHECK-NEXT: [[BOUND0:%.*]] = icmp ugt i8* [[P3:%.*]], [[UMIN22]]
; CHECK-NEXT: [[FOUND_CONFLICT:%.*]] = and i1 [[BOUND0]], [[C:%.*]]
; CHECK-NEXT: [[CONFLICT_RDX:%.*]] = or i1 [[FOUND_CONFLICT]], [[C]]
; CHECK-NEXT: [[BOUND042:%.*]] = icmp ugt i8* [[P3]], [[UMIN3334]]
; CHECK-NEXT: [[FOUND_CONFLICT44:%.*]] = and i1 [[BOUND042]], [[C]]
; CHECK-NEXT: [[CONFLICT_RDX45:%.*]] = or i1 [[CONFLICT_RDX]], [[FOUND_CONFLICT44]]
; CHECK-NEXT: [[CONFLICT_RDX49:%.*]] = or i1 [[CONFLICT_RDX45]], [[C]]
; CHECK-NEXT: ret i1 [[CONFLICT_RDX49]]
;
%scevgep21 = getelementptr float, float* %p1, i32 0
%l0 = icmp ult float* %p2, %scevgep21
%umin = select i1 %l0, float* %p2, float* %scevgep21
%umin22 = bitcast float* %umin to i8*
%scevgep31 = getelementptr float, float* %p1, i32 1
%l1 = icmp ult float* %scevgep31, %p2
%umin33 = select i1 %l1, float* %scevgep31, float* %p2
%umin3334 = bitcast float* %umin33 to i8*
%bound0 = icmp ugt i8* %p3, %umin22
%found.conflict = and i1 %bound0, %c
%conflict.rdx = or i1 %found.conflict, %c
%bound042 = icmp ugt i8* %p3, %umin3334
%found.conflict44 = and i1 %bound042, %c
%conflict.rdx45 = or i1 %conflict.rdx, %found.conflict44
%conflict.rdx49 = or i1 %conflict.rdx45, %c
ret i1 %conflict.rdx49
}
attributes #0 = { "target-cpu"="pentium-m" }