[SLP] Pass VecPred argument to getCmpSelInstrCost.

Check if all compares in VL have the same predicate and pass it to
getCmpSelInstrCost, to improve cost-modeling on targets that only
support compare/select combinations for certain uniform predicates.

This leads to additional vectorization in some cases

```
Same hash: 217 (filtered out)
Remaining: 19
Metric: SLP.NumVectorInstructions

Program                                        base    slp2    diff
 test-suite...marks/SciMark2-C/scimark2.test    11.00   26.00  136.4%
 test-suite...T2006/445.gobmk/445.gobmk.test    79.00  135.00  70.9%
 test-suite...ediabench/gsm/toast/toast.test    54.00   71.00  31.5%
 test-suite...telecomm-gsm/telecomm-gsm.test    54.00   71.00  31.5%
 test-suite...CI_Purple/SMG2000/smg2000.test   426.00  542.00  27.2%
 test-suite...ch/g721/g721encode/encode.test    30.00   24.00  -20.0%
 test-suite...000/186.crafty/186.crafty.test   116.00  138.00  19.0%
 test-suite...ications/JM/ldecod/ldecod.test   697.00  765.00   9.8%
 test-suite...6/464.h264ref/464.h264ref.test   822.00  886.00   7.8%
 test-suite...chmarks/MallocBench/gs/gs.test   154.00  162.00   5.2%
 test-suite...nsumer-lame/consumer-lame.test   621.00  651.00   4.8%
 test-suite...lications/ClamAV/clamscan.test   223.00  231.00   3.6%
 test-suite...marks/7zip/7zip-benchmark.test   680.00  695.00   2.2%
 test-suite...CFP2000/177.mesa/177.mesa.test   2121.00 2129.00  0.4%
 test-suite...:: External/Povray/povray.test   2406.00 2412.00  0.2%
 test-suite...TimberWolfMC/timberwolfmc.test   634.00  634.00   0.0%
 test-suite...CFP2006/433.milc/433.milc.test   1036.00 1036.00  0.0%
 test-suite.../Benchmarks/nbench/nbench.test   321.00  321.00   0.0%
 test-suite...ctions-flt/Reductions-flt.test    NaN      5.00   nan%
```

Reviewed By: RKSimon

Differential Revision: https://reviews.llvm.org/D90124
This commit is contained in:
Florian Hahn 2020-11-03 09:55:47 +00:00
parent 3bdeb2ac2e
commit d9cbf39a37
4 changed files with 183 additions and 238 deletions

View File

@ -3547,9 +3547,26 @@ int BoUpSLP::getEntryCost(TreeEntry *E) {
}
auto *MaskTy = FixedVectorType::get(Builder.getInt1Ty(), VL.size());
int ScalarCost = VecTy->getNumElements() * ScalarEltCost;
int VecCost =
TTI->getCmpSelInstrCost(E->getOpcode(), VecTy, MaskTy,
CmpInst::BAD_ICMP_PREDICATE, CostKind, VL0);
// Check if all entries in VL are either compares or selects with compares
// as condition that have the same predicates.
CmpInst::Predicate VecPred = CmpInst::BAD_ICMP_PREDICATE;
bool First = true;
for (auto *V : VL) {
CmpInst::Predicate CurrentPred;
auto MatchCmp = m_Cmp(CurrentPred, m_Value(), m_Value());
if ((!match(V, m_Select(MatchCmp, m_Value(), m_Value())) &&
!match(V, MatchCmp)) ||
(!First && VecPred != CurrentPred)) {
VecPred = CmpInst::BAD_ICMP_PREDICATE;
break;
}
First = false;
VecPred = CurrentPred;
}
int VecCost = TTI->getCmpSelInstrCost(E->getOpcode(), VecTy, MaskTy,
VecPred, CostKind, VL0);
// Check if it is possible and profitable to use min/max for selects in
// VL.
//

View File

@ -15,7 +15,7 @@ target triple = "aarch64--linux"
; YAML-NEXT: Function: test_select
; YAML-NEXT: Args:
; YAML-NEXT: - String: 'Vectorized horizontal reduction with cost '
; YAML-NEXT: - Cost: '-8'
; YAML-NEXT: - Cost: '-20'
; YAML-NEXT: - String: ' and with tree size '
; YAML-NEXT: - TreeSize: '8'
@ -244,7 +244,7 @@ for.end: ; preds = %for.end.loopexit, %
; YAML-NEXT: Function: test_unrolled_select
; YAML-NEXT: Args:
; YAML-NEXT: - String: 'Vectorized horizontal reduction with cost '
; YAML-NEXT: - Cost: '-31'
; YAML-NEXT: - Cost: '-37'
; YAML-NEXT: - String: ' and with tree size '
; YAML-NEXT: - TreeSize: '10'

View File

@ -165,19 +165,18 @@ entry:
ret void
}
; There is no <2 x i64> version of umin.
; There is no <2 x i64> version of umin, but we can efficiently lower
; compare/select pairs with uniform predicates.
define void @select_umin_2xi64(i64* %ptr, i64 %x) {
; CHECK-LABEL: @select_umin_2xi64(
; CHECK-NEXT: entry:
; CHECK-NEXT: [[L_0:%.*]] = load i64, i64* [[PTR:%.*]], align 8
; CHECK-NEXT: [[CMP_0:%.*]] = icmp ult i64 [[L_0]], 16383
; CHECK-NEXT: [[S_0:%.*]] = select i1 [[CMP_0]], i64 [[L_0]], i64 16383
; CHECK-NEXT: store i64 [[S_0]], i64* [[PTR]], align 4
; CHECK-NEXT: [[GEP_1:%.*]] = getelementptr inbounds i64, i64* [[PTR]], i64 1
; CHECK-NEXT: [[L_1:%.*]] = load i64, i64* [[GEP_1]], align 8
; CHECK-NEXT: [[CMP_1:%.*]] = icmp ult i64 [[L_1]], 16383
; CHECK-NEXT: [[S_1:%.*]] = select i1 [[CMP_1]], i64 [[L_1]], i64 16383
; CHECK-NEXT: store i64 [[S_1]], i64* [[GEP_1]], align 4
; CHECK-NEXT: [[GEP_1:%.*]] = getelementptr inbounds i64, i64* [[PTR:%.*]], i64 1
; CHECK-NEXT: [[TMP0:%.*]] = bitcast i64* [[PTR]] to <2 x i64>*
; CHECK-NEXT: [[TMP1:%.*]] = load <2 x i64>, <2 x i64>* [[TMP0]], align 8
; CHECK-NEXT: [[TMP2:%.*]] = icmp ult <2 x i64> [[TMP1]], <i64 16383, i64 16383>
; CHECK-NEXT: [[TMP3:%.*]] = select <2 x i1> [[TMP2]], <2 x i64> [[TMP1]], <2 x i64> <i64 16383, i64 16383>
; CHECK-NEXT: [[TMP4:%.*]] = bitcast i64* [[PTR]] to <2 x i64>*
; CHECK-NEXT: store <2 x i64> [[TMP3]], <2 x i64>* [[TMP4]], align 4
; CHECK-NEXT: ret void
;
entry:
@ -305,19 +304,18 @@ entry:
ret void
}
; There is no <2 x i64> version of umin.
; There is no <2 x i64> version of umin, but we can efficiently lower
; compare/select pairs with uniform predicates.
define void @select_umin_ule_2xi64(i64* %ptr, i64 %x) {
; CHECK-LABEL: @select_umin_ule_2xi64(
; CHECK-NEXT: entry:
; CHECK-NEXT: [[L_0:%.*]] = load i64, i64* [[PTR:%.*]], align 8
; CHECK-NEXT: [[CMP_0:%.*]] = icmp ule i64 [[L_0]], 16383
; CHECK-NEXT: [[S_0:%.*]] = select i1 [[CMP_0]], i64 [[L_0]], i64 16383
; CHECK-NEXT: store i64 [[S_0]], i64* [[PTR]], align 4
; CHECK-NEXT: [[GEP_1:%.*]] = getelementptr inbounds i64, i64* [[PTR]], i64 1
; CHECK-NEXT: [[L_1:%.*]] = load i64, i64* [[GEP_1]], align 8
; CHECK-NEXT: [[CMP_1:%.*]] = icmp ule i64 [[L_1]], 16383
; CHECK-NEXT: [[S_1:%.*]] = select i1 [[CMP_1]], i64 [[L_1]], i64 16383
; CHECK-NEXT: store i64 [[S_1]], i64* [[GEP_1]], align 4
; CHECK-NEXT: [[GEP_1:%.*]] = getelementptr inbounds i64, i64* [[PTR:%.*]], i64 1
; CHECK-NEXT: [[TMP0:%.*]] = bitcast i64* [[PTR]] to <2 x i64>*
; CHECK-NEXT: [[TMP1:%.*]] = load <2 x i64>, <2 x i64>* [[TMP0]], align 8
; CHECK-NEXT: [[TMP2:%.*]] = icmp ule <2 x i64> [[TMP1]], <i64 16383, i64 16383>
; CHECK-NEXT: [[TMP3:%.*]] = select <2 x i1> [[TMP2]], <2 x i64> [[TMP1]], <2 x i64> <i64 16383, i64 16383>
; CHECK-NEXT: [[TMP4:%.*]] = bitcast i64* [[PTR]] to <2 x i64>*
; CHECK-NEXT: store <2 x i64> [[TMP3]], <2 x i64>* [[TMP4]], align 4
; CHECK-NEXT: ret void
;
entry:
@ -444,19 +442,18 @@ entry:
ret void
}
; There is no <2 x i64> version of smin.
; There is no <2 x i64> version of smin, but we can efficiently lower
; compare/select pairs with uniform predicates.
define void @select_smin_2xi64(i64* %ptr, i64 %x) {
; CHECK-LABEL: @select_smin_2xi64(
; CHECK-NEXT: entry:
; CHECK-NEXT: [[L_0:%.*]] = load i64, i64* [[PTR:%.*]], align 8
; CHECK-NEXT: [[CMP_0:%.*]] = icmp slt i64 [[L_0]], 16383
; CHECK-NEXT: [[S_0:%.*]] = select i1 [[CMP_0]], i64 [[L_0]], i64 16383
; CHECK-NEXT: store i64 [[S_0]], i64* [[PTR]], align 4
; CHECK-NEXT: [[GEP_1:%.*]] = getelementptr inbounds i64, i64* [[PTR]], i64 1
; CHECK-NEXT: [[L_1:%.*]] = load i64, i64* [[GEP_1]], align 8
; CHECK-NEXT: [[CMP_1:%.*]] = icmp slt i64 [[L_1]], 16383
; CHECK-NEXT: [[S_1:%.*]] = select i1 [[CMP_1]], i64 [[L_1]], i64 16383
; CHECK-NEXT: store i64 [[S_1]], i64* [[GEP_1]], align 4
; CHECK-NEXT: [[GEP_1:%.*]] = getelementptr inbounds i64, i64* [[PTR:%.*]], i64 1
; CHECK-NEXT: [[TMP0:%.*]] = bitcast i64* [[PTR]] to <2 x i64>*
; CHECK-NEXT: [[TMP1:%.*]] = load <2 x i64>, <2 x i64>* [[TMP0]], align 8
; CHECK-NEXT: [[TMP2:%.*]] = icmp slt <2 x i64> [[TMP1]], <i64 16383, i64 16383>
; CHECK-NEXT: [[TMP3:%.*]] = select <2 x i1> [[TMP2]], <2 x i64> [[TMP1]], <2 x i64> <i64 16383, i64 16383>
; CHECK-NEXT: [[TMP4:%.*]] = bitcast i64* [[PTR]] to <2 x i64>*
; CHECK-NEXT: store <2 x i64> [[TMP3]], <2 x i64>* [[TMP4]], align 4
; CHECK-NEXT: ret void
;
entry:
@ -583,19 +580,18 @@ entry:
ret void
}
; There is no <2 x i64> version of smin.
; There is no <2 x i64> version of smin, but we can efficiently lower
; compare/select pairs with uniform predicates.
define void @select_smin_sle_2xi64(i64* %ptr, i64 %x) {
; CHECK-LABEL: @select_smin_sle_2xi64(
; CHECK-NEXT: entry:
; CHECK-NEXT: [[L_0:%.*]] = load i64, i64* [[PTR:%.*]], align 8
; CHECK-NEXT: [[CMP_0:%.*]] = icmp sle i64 [[L_0]], 16383
; CHECK-NEXT: [[S_0:%.*]] = select i1 [[CMP_0]], i64 [[L_0]], i64 16383
; CHECK-NEXT: store i64 [[S_0]], i64* [[PTR]], align 4
; CHECK-NEXT: [[GEP_1:%.*]] = getelementptr inbounds i64, i64* [[PTR]], i64 1
; CHECK-NEXT: [[L_1:%.*]] = load i64, i64* [[GEP_1]], align 8
; CHECK-NEXT: [[CMP_1:%.*]] = icmp sle i64 [[L_1]], 16383
; CHECK-NEXT: [[S_1:%.*]] = select i1 [[CMP_1]], i64 [[L_1]], i64 16383
; CHECK-NEXT: store i64 [[S_1]], i64* [[GEP_1]], align 4
; CHECK-NEXT: [[GEP_1:%.*]] = getelementptr inbounds i64, i64* [[PTR:%.*]], i64 1
; CHECK-NEXT: [[TMP0:%.*]] = bitcast i64* [[PTR]] to <2 x i64>*
; CHECK-NEXT: [[TMP1:%.*]] = load <2 x i64>, <2 x i64>* [[TMP0]], align 8
; CHECK-NEXT: [[TMP2:%.*]] = icmp sle <2 x i64> [[TMP1]], <i64 16383, i64 16383>
; CHECK-NEXT: [[TMP3:%.*]] = select <2 x i1> [[TMP2]], <2 x i64> [[TMP1]], <2 x i64> <i64 16383, i64 16383>
; CHECK-NEXT: [[TMP4:%.*]] = bitcast i64* [[PTR]] to <2 x i64>*
; CHECK-NEXT: store <2 x i64> [[TMP3]], <2 x i64>* [[TMP4]], align 4
; CHECK-NEXT: ret void
;
entry:
@ -721,19 +717,18 @@ entry:
ret void
}
; There is no <2 x i64> version of umax.
; There is no <2 x i64> version of umax, but we can efficiently lower
; compare/select pairs with uniform predicates.
define void @select_umax_2xi64(i64* %ptr, i64 %x) {
; CHECK-LABEL: @select_umax_2xi64(
; CHECK-NEXT: entry:
; CHECK-NEXT: [[L_0:%.*]] = load i64, i64* [[PTR:%.*]], align 8
; CHECK-NEXT: [[CMP_0:%.*]] = icmp ugt i64 [[L_0]], 16383
; CHECK-NEXT: [[S_0:%.*]] = select i1 [[CMP_0]], i64 [[L_0]], i64 16383
; CHECK-NEXT: store i64 [[S_0]], i64* [[PTR]], align 4
; CHECK-NEXT: [[GEP_1:%.*]] = getelementptr inbounds i64, i64* [[PTR]], i64 1
; CHECK-NEXT: [[L_1:%.*]] = load i64, i64* [[GEP_1]], align 8
; CHECK-NEXT: [[CMP_1:%.*]] = icmp ugt i64 [[L_1]], 16383
; CHECK-NEXT: [[S_1:%.*]] = select i1 [[CMP_1]], i64 [[L_1]], i64 16383
; CHECK-NEXT: store i64 [[S_1]], i64* [[GEP_1]], align 4
; CHECK-NEXT: [[GEP_1:%.*]] = getelementptr inbounds i64, i64* [[PTR:%.*]], i64 1
; CHECK-NEXT: [[TMP0:%.*]] = bitcast i64* [[PTR]] to <2 x i64>*
; CHECK-NEXT: [[TMP1:%.*]] = load <2 x i64>, <2 x i64>* [[TMP0]], align 8
; CHECK-NEXT: [[TMP2:%.*]] = icmp ugt <2 x i64> [[TMP1]], <i64 16383, i64 16383>
; CHECK-NEXT: [[TMP3:%.*]] = select <2 x i1> [[TMP2]], <2 x i64> [[TMP1]], <2 x i64> <i64 16383, i64 16383>
; CHECK-NEXT: [[TMP4:%.*]] = bitcast i64* [[PTR]] to <2 x i64>*
; CHECK-NEXT: store <2 x i64> [[TMP3]], <2 x i64>* [[TMP4]], align 4
; CHECK-NEXT: ret void
;
entry:
@ -860,19 +855,18 @@ entry:
ret void
}
; There is no <2 x i64> version of umax.
; There is no <2 x i64> version of umax, but we can efficiently lower
; compare/select pairs with uniform predicates.
define void @select_umax_uge_2xi64(i64* %ptr, i64 %x) {
; CHECK-LABEL: @select_umax_uge_2xi64(
; CHECK-NEXT: entry:
; CHECK-NEXT: [[L_0:%.*]] = load i64, i64* [[PTR:%.*]], align 8
; CHECK-NEXT: [[CMP_0:%.*]] = icmp uge i64 [[L_0]], 16383
; CHECK-NEXT: [[S_0:%.*]] = select i1 [[CMP_0]], i64 [[L_0]], i64 16383
; CHECK-NEXT: store i64 [[S_0]], i64* [[PTR]], align 4
; CHECK-NEXT: [[GEP_1:%.*]] = getelementptr inbounds i64, i64* [[PTR]], i64 1
; CHECK-NEXT: [[L_1:%.*]] = load i64, i64* [[GEP_1]], align 8
; CHECK-NEXT: [[CMP_1:%.*]] = icmp uge i64 [[L_1]], 16383
; CHECK-NEXT: [[S_1:%.*]] = select i1 [[CMP_1]], i64 [[L_1]], i64 16383
; CHECK-NEXT: store i64 [[S_1]], i64* [[GEP_1]], align 4
; CHECK-NEXT: [[GEP_1:%.*]] = getelementptr inbounds i64, i64* [[PTR:%.*]], i64 1
; CHECK-NEXT: [[TMP0:%.*]] = bitcast i64* [[PTR]] to <2 x i64>*
; CHECK-NEXT: [[TMP1:%.*]] = load <2 x i64>, <2 x i64>* [[TMP0]], align 8
; CHECK-NEXT: [[TMP2:%.*]] = icmp uge <2 x i64> [[TMP1]], <i64 16383, i64 16383>
; CHECK-NEXT: [[TMP3:%.*]] = select <2 x i1> [[TMP2]], <2 x i64> [[TMP1]], <2 x i64> <i64 16383, i64 16383>
; CHECK-NEXT: [[TMP4:%.*]] = bitcast i64* [[PTR]] to <2 x i64>*
; CHECK-NEXT: store <2 x i64> [[TMP3]], <2 x i64>* [[TMP4]], align 4
; CHECK-NEXT: ret void
;
entry:
@ -999,19 +993,18 @@ entry:
ret void
}
; There is no <2 x i64> version of smax.
; There is no <2 x i64> version of smax, but we can efficiently lower
; compare/select pairs with uniform predicates.
define void @select_smax_2xi64(i64* %ptr, i64 %x) {
; CHECK-LABEL: @select_smax_2xi64(
; CHECK-NEXT: entry:
; CHECK-NEXT: [[L_0:%.*]] = load i64, i64* [[PTR:%.*]], align 8
; CHECK-NEXT: [[CMP_0:%.*]] = icmp sgt i64 [[L_0]], 16383
; CHECK-NEXT: [[S_0:%.*]] = select i1 [[CMP_0]], i64 [[L_0]], i64 16383
; CHECK-NEXT: store i64 [[S_0]], i64* [[PTR]], align 4
; CHECK-NEXT: [[GEP_1:%.*]] = getelementptr inbounds i64, i64* [[PTR]], i64 1
; CHECK-NEXT: [[L_1:%.*]] = load i64, i64* [[GEP_1]], align 8
; CHECK-NEXT: [[CMP_1:%.*]] = icmp sgt i64 [[L_1]], 16383
; CHECK-NEXT: [[S_1:%.*]] = select i1 [[CMP_1]], i64 [[L_1]], i64 16383
; CHECK-NEXT: store i64 [[S_1]], i64* [[GEP_1]], align 4
; CHECK-NEXT: [[GEP_1:%.*]] = getelementptr inbounds i64, i64* [[PTR:%.*]], i64 1
; CHECK-NEXT: [[TMP0:%.*]] = bitcast i64* [[PTR]] to <2 x i64>*
; CHECK-NEXT: [[TMP1:%.*]] = load <2 x i64>, <2 x i64>* [[TMP0]], align 8
; CHECK-NEXT: [[TMP2:%.*]] = icmp sgt <2 x i64> [[TMP1]], <i64 16383, i64 16383>
; CHECK-NEXT: [[TMP3:%.*]] = select <2 x i1> [[TMP2]], <2 x i64> [[TMP1]], <2 x i64> <i64 16383, i64 16383>
; CHECK-NEXT: [[TMP4:%.*]] = bitcast i64* [[PTR]] to <2 x i64>*
; CHECK-NEXT: store <2 x i64> [[TMP3]], <2 x i64>* [[TMP4]], align 4
; CHECK-NEXT: ret void
;
entry:
@ -1139,19 +1132,18 @@ entry:
ret void
}
; There is no <2 x i64> version of smax.
; There is no <2 x i64> version of smax, but we can efficiently lower
; compare/select pairs with uniform predicates.
define void @select_smax_sge_2xi64(i64* %ptr, i64 %x) {
; CHECK-LABEL: @select_smax_sge_2xi64(
; CHECK-NEXT: entry:
; CHECK-NEXT: [[L_0:%.*]] = load i64, i64* [[PTR:%.*]], align 8
; CHECK-NEXT: [[CMP_0:%.*]] = icmp sge i64 [[L_0]], 16383
; CHECK-NEXT: [[S_0:%.*]] = select i1 [[CMP_0]], i64 [[L_0]], i64 16383
; CHECK-NEXT: store i64 [[S_0]], i64* [[PTR]], align 4
; CHECK-NEXT: [[GEP_1:%.*]] = getelementptr inbounds i64, i64* [[PTR]], i64 1
; CHECK-NEXT: [[L_1:%.*]] = load i64, i64* [[GEP_1]], align 8
; CHECK-NEXT: [[CMP_1:%.*]] = icmp sge i64 [[L_1]], 16383
; CHECK-NEXT: [[S_1:%.*]] = select i1 [[CMP_1]], i64 [[L_1]], i64 16383
; CHECK-NEXT: store i64 [[S_1]], i64* [[GEP_1]], align 4
; CHECK-NEXT: [[GEP_1:%.*]] = getelementptr inbounds i64, i64* [[PTR:%.*]], i64 1
; CHECK-NEXT: [[TMP0:%.*]] = bitcast i64* [[PTR]] to <2 x i64>*
; CHECK-NEXT: [[TMP1:%.*]] = load <2 x i64>, <2 x i64>* [[TMP0]], align 8
; CHECK-NEXT: [[TMP2:%.*]] = icmp sge <2 x i64> [[TMP1]], <i64 16383, i64 16383>
; CHECK-NEXT: [[TMP3:%.*]] = select <2 x i1> [[TMP2]], <2 x i64> [[TMP1]], <2 x i64> <i64 16383, i64 16383>
; CHECK-NEXT: [[TMP4:%.*]] = bitcast i64* [[PTR]] to <2 x i64>*
; CHECK-NEXT: store <2 x i64> [[TMP3]], <2 x i64>* [[TMP4]], align 4
; CHECK-NEXT: ret void
;
entry:

View File

@ -193,45 +193,27 @@ entry:
define void @select_uniform_ugt_8xi8(i8* %ptr, i8 %x) {
; CHECK-LABEL: @select_uniform_ugt_8xi8(
; CHECK-NEXT: entry:
; CHECK-NEXT: [[L_0:%.*]] = load i8, i8* [[PTR:%.*]], align 1
; CHECK-NEXT: [[CMP_0:%.*]] = icmp ugt i8 [[L_0]], -1
; CHECK-NEXT: [[S_0:%.*]] = select i1 [[CMP_0]], i8 [[L_0]], i8 [[X:%.*]]
; CHECK-NEXT: store i8 [[S_0]], i8* [[PTR]], align 2
; CHECK-NEXT: [[GEP_1:%.*]] = getelementptr inbounds i8, i8* [[PTR]], i8 1
; CHECK-NEXT: [[L_1:%.*]] = load i8, i8* [[GEP_1]], align 1
; CHECK-NEXT: [[CMP_1:%.*]] = icmp ugt i8 [[L_1]], -1
; CHECK-NEXT: [[S_1:%.*]] = select i1 [[CMP_1]], i8 [[L_1]], i8 [[X]]
; CHECK-NEXT: store i8 [[S_1]], i8* [[GEP_1]], align 2
; CHECK-NEXT: [[GEP_1:%.*]] = getelementptr inbounds i8, i8* [[PTR:%.*]], i8 1
; CHECK-NEXT: [[GEP_2:%.*]] = getelementptr inbounds i8, i8* [[PTR]], i8 2
; CHECK-NEXT: [[L_2:%.*]] = load i8, i8* [[GEP_2]], align 1
; CHECK-NEXT: [[CMP_2:%.*]] = icmp ugt i8 [[L_2]], -1
; CHECK-NEXT: [[S_2:%.*]] = select i1 [[CMP_2]], i8 [[L_2]], i8 [[X]]
; CHECK-NEXT: store i8 [[S_2]], i8* [[GEP_2]], align 2
; CHECK-NEXT: [[GEP_3:%.*]] = getelementptr inbounds i8, i8* [[PTR]], i8 3
; CHECK-NEXT: [[L_3:%.*]] = load i8, i8* [[GEP_3]], align 1
; CHECK-NEXT: [[CMP_3:%.*]] = icmp ugt i8 [[L_3]], -1
; CHECK-NEXT: [[S_3:%.*]] = select i1 [[CMP_3]], i8 [[L_3]], i8 [[X]]
; CHECK-NEXT: store i8 [[S_3]], i8* [[GEP_3]], align 2
; CHECK-NEXT: [[GEP_4:%.*]] = getelementptr inbounds i8, i8* [[PTR]], i8 4
; CHECK-NEXT: [[L_4:%.*]] = load i8, i8* [[GEP_4]], align 1
; CHECK-NEXT: [[CMP_4:%.*]] = icmp ugt i8 [[L_4]], -1
; CHECK-NEXT: [[S_4:%.*]] = select i1 [[CMP_4]], i8 [[L_4]], i8 [[X]]
; CHECK-NEXT: store i8 [[S_4]], i8* [[GEP_4]], align 2
; CHECK-NEXT: [[GEP_5:%.*]] = getelementptr inbounds i8, i8* [[PTR]], i8 5
; CHECK-NEXT: [[L_5:%.*]] = load i8, i8* [[GEP_5]], align 1
; CHECK-NEXT: [[CMP_5:%.*]] = icmp ugt i8 [[L_5]], -1
; CHECK-NEXT: [[S_5:%.*]] = select i1 [[CMP_5]], i8 [[L_5]], i8 [[X]]
; CHECK-NEXT: store i8 [[S_5]], i8* [[GEP_5]], align 2
; CHECK-NEXT: [[GEP_6:%.*]] = getelementptr inbounds i8, i8* [[PTR]], i8 6
; CHECK-NEXT: [[L_6:%.*]] = load i8, i8* [[GEP_6]], align 1
; CHECK-NEXT: [[CMP_6:%.*]] = icmp ugt i8 [[L_6]], -1
; CHECK-NEXT: [[S_6:%.*]] = select i1 [[CMP_6]], i8 [[L_6]], i8 [[X]]
; CHECK-NEXT: store i8 [[S_6]], i8* [[GEP_6]], align 2
; CHECK-NEXT: [[GEP_7:%.*]] = getelementptr inbounds i8, i8* [[PTR]], i8 7
; CHECK-NEXT: [[L_7:%.*]] = load i8, i8* [[GEP_7]], align 1
; CHECK-NEXT: [[CMP_7:%.*]] = icmp ugt i8 [[L_7]], -1
; CHECK-NEXT: [[S_7:%.*]] = select i1 [[CMP_7]], i8 [[L_7]], i8 [[X]]
; CHECK-NEXT: store i8 [[S_7]], i8* [[GEP_7]], align 2
; CHECK-NEXT: [[TMP0:%.*]] = bitcast i8* [[PTR]] to <8 x i8>*
; CHECK-NEXT: [[TMP1:%.*]] = load <8 x i8>, <8 x i8>* [[TMP0]], align 1
; CHECK-NEXT: [[TMP2:%.*]] = icmp ugt <8 x i8> [[TMP1]], <i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1>
; CHECK-NEXT: [[TMP3:%.*]] = insertelement <8 x i8> undef, i8 [[X:%.*]], i32 0
; CHECK-NEXT: [[TMP4:%.*]] = insertelement <8 x i8> [[TMP3]], i8 [[X]], i32 1
; CHECK-NEXT: [[TMP5:%.*]] = insertelement <8 x i8> [[TMP4]], i8 [[X]], i32 2
; CHECK-NEXT: [[TMP6:%.*]] = insertelement <8 x i8> [[TMP5]], i8 [[X]], i32 3
; CHECK-NEXT: [[TMP7:%.*]] = insertelement <8 x i8> [[TMP6]], i8 [[X]], i32 4
; CHECK-NEXT: [[TMP8:%.*]] = insertelement <8 x i8> [[TMP7]], i8 [[X]], i32 5
; CHECK-NEXT: [[TMP9:%.*]] = insertelement <8 x i8> [[TMP8]], i8 [[X]], i32 6
; CHECK-NEXT: [[TMP10:%.*]] = insertelement <8 x i8> [[TMP9]], i8 [[X]], i32 7
; CHECK-NEXT: [[TMP11:%.*]] = select <8 x i1> [[TMP2]], <8 x i8> [[TMP1]], <8 x i8> [[TMP10]]
; CHECK-NEXT: [[TMP12:%.*]] = bitcast i8* [[PTR]] to <8 x i8>*
; CHECK-NEXT: store <8 x i8> [[TMP11]], <8 x i8>* [[TMP12]], align 2
; CHECK-NEXT: ret void
;
entry:
@ -287,50 +269,34 @@ entry:
define void @select_uniform_ugt_16xi8(i8* %ptr, i8 %x) {
; CHECK-LABEL: @select_uniform_ugt_16xi8(
; CHECK-NEXT: entry:
; CHECK-NEXT: [[L_0:%.*]] = load i8, i8* [[PTR:%.*]], align 1
; CHECK-NEXT: [[CMP_0:%.*]] = icmp ugt i8 [[L_0]], -1
; CHECK-NEXT: [[S_0:%.*]] = select i1 [[CMP_0]], i8 [[L_0]], i8 [[X:%.*]]
; CHECK-NEXT: store i8 [[S_0]], i8* [[PTR]], align 2
; CHECK-NEXT: [[GEP_1:%.*]] = getelementptr inbounds i8, i8* [[PTR]], i8 1
; CHECK-NEXT: [[L_1:%.*]] = load i8, i8* [[GEP_1]], align 1
; CHECK-NEXT: [[CMP_1:%.*]] = icmp ugt i8 [[L_1]], -1
; CHECK-NEXT: [[S_1:%.*]] = select i1 [[CMP_1]], i8 [[L_1]], i8 [[X]]
; CHECK-NEXT: store i8 [[S_1]], i8* [[GEP_1]], align 2
; CHECK-NEXT: [[GEP_1:%.*]] = getelementptr inbounds i8, i8* [[PTR:%.*]], i8 1
; CHECK-NEXT: [[GEP_2:%.*]] = getelementptr inbounds i8, i8* [[PTR]], i8 2
; CHECK-NEXT: [[L_2:%.*]] = load i8, i8* [[GEP_2]], align 1
; CHECK-NEXT: [[CMP_2:%.*]] = icmp ugt i8 [[L_2]], -1
; CHECK-NEXT: [[S_2:%.*]] = select i1 [[CMP_2]], i8 [[L_2]], i8 [[X]]
; CHECK-NEXT: store i8 [[S_2]], i8* [[GEP_2]], align 2
; CHECK-NEXT: [[GEP_3:%.*]] = getelementptr inbounds i8, i8* [[PTR]], i8 3
; CHECK-NEXT: [[L_3:%.*]] = load i8, i8* [[GEP_3]], align 1
; CHECK-NEXT: [[CMP_3:%.*]] = icmp ugt i8 [[L_3]], -1
; CHECK-NEXT: [[S_3:%.*]] = select i1 [[CMP_3]], i8 [[L_3]], i8 [[X]]
; CHECK-NEXT: store i8 [[S_3]], i8* [[GEP_3]], align 2
; CHECK-NEXT: [[GEP_4:%.*]] = getelementptr inbounds i8, i8* [[PTR]], i8 4
; CHECK-NEXT: [[L_4:%.*]] = load i8, i8* [[GEP_4]], align 1
; CHECK-NEXT: [[CMP_4:%.*]] = icmp ugt i8 [[L_4]], -1
; CHECK-NEXT: [[S_4:%.*]] = select i1 [[CMP_4]], i8 [[L_4]], i8 [[X]]
; CHECK-NEXT: store i8 [[S_4]], i8* [[GEP_4]], align 2
; CHECK-NEXT: [[GEP_5:%.*]] = getelementptr inbounds i8, i8* [[PTR]], i8 5
; CHECK-NEXT: [[L_5:%.*]] = load i8, i8* [[GEP_5]], align 1
; CHECK-NEXT: [[CMP_5:%.*]] = icmp ugt i8 [[L_5]], -1
; CHECK-NEXT: [[S_5:%.*]] = select i1 [[CMP_5]], i8 [[L_5]], i8 [[X]]
; CHECK-NEXT: store i8 [[S_5]], i8* [[GEP_5]], align 2
; CHECK-NEXT: [[GEP_6:%.*]] = getelementptr inbounds i8, i8* [[PTR]], i8 6
; CHECK-NEXT: [[L_6:%.*]] = load i8, i8* [[GEP_6]], align 1
; CHECK-NEXT: [[CMP_6:%.*]] = icmp ugt i8 [[L_6]], -1
; CHECK-NEXT: [[S_6:%.*]] = select i1 [[CMP_6]], i8 [[L_6]], i8 [[X]]
; CHECK-NEXT: store i8 [[S_6]], i8* [[GEP_6]], align 2
; CHECK-NEXT: [[GEP_7:%.*]] = getelementptr inbounds i8, i8* [[PTR]], i8 7
; CHECK-NEXT: [[L_7:%.*]] = load i8, i8* [[GEP_7]], align 1
; CHECK-NEXT: [[CMP_7:%.*]] = icmp ugt i8 [[L_7]], -1
; CHECK-NEXT: [[S_7:%.*]] = select i1 [[CMP_7]], i8 [[L_7]], i8 [[X]]
; CHECK-NEXT: store i8 [[S_7]], i8* [[GEP_7]], align 2
; CHECK-NEXT: [[TMP0:%.*]] = bitcast i8* [[PTR]] to <8 x i8>*
; CHECK-NEXT: [[TMP1:%.*]] = load <8 x i8>, <8 x i8>* [[TMP0]], align 1
; CHECK-NEXT: [[TMP2:%.*]] = icmp ugt <8 x i8> [[TMP1]], <i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1>
; CHECK-NEXT: [[TMP3:%.*]] = insertelement <8 x i8> undef, i8 [[X:%.*]], i32 0
; CHECK-NEXT: [[TMP4:%.*]] = insertelement <8 x i8> [[TMP3]], i8 [[X]], i32 1
; CHECK-NEXT: [[TMP5:%.*]] = insertelement <8 x i8> [[TMP4]], i8 [[X]], i32 2
; CHECK-NEXT: [[TMP6:%.*]] = insertelement <8 x i8> [[TMP5]], i8 [[X]], i32 3
; CHECK-NEXT: [[TMP7:%.*]] = insertelement <8 x i8> [[TMP6]], i8 [[X]], i32 4
; CHECK-NEXT: [[TMP8:%.*]] = insertelement <8 x i8> [[TMP7]], i8 [[X]], i32 5
; CHECK-NEXT: [[TMP9:%.*]] = insertelement <8 x i8> [[TMP8]], i8 [[X]], i32 6
; CHECK-NEXT: [[TMP10:%.*]] = insertelement <8 x i8> [[TMP9]], i8 [[X]], i32 7
; CHECK-NEXT: [[TMP11:%.*]] = select <8 x i1> [[TMP2]], <8 x i8> [[TMP1]], <8 x i8> [[TMP10]]
; CHECK-NEXT: [[TMP12:%.*]] = bitcast i8* [[PTR]] to <8 x i8>*
; CHECK-NEXT: store <8 x i8> [[TMP11]], <8 x i8>* [[TMP12]], align 2
; CHECK-NEXT: [[GEP_8:%.*]] = getelementptr inbounds i8, i8* [[PTR]], i8 8
; CHECK-NEXT: [[L_8:%.*]] = load i8, i8* [[GEP_8]], align 1
; CHECK-NEXT: [[CMP_8:%.*]] = icmp ugt i8 [[L_8]], -1
; CHECK-NEXT: [[S_8:%.*]] = select i1 [[CMP_8]], i8 [[L_0]], i8 [[X]]
; CHECK-NEXT: store i8 [[S_0]], i8* [[GEP_8]], align 2
; CHECK-NEXT: [[TMP13:%.*]] = extractelement <8 x i8> [[TMP1]], i32 0
; CHECK-NEXT: [[S_8:%.*]] = select i1 [[CMP_8]], i8 [[TMP13]], i8 [[X]]
; CHECK-NEXT: [[TMP14:%.*]] = extractelement <8 x i8> [[TMP11]], i32 0
; CHECK-NEXT: store i8 [[TMP14]], i8* [[GEP_8]], align 2
; CHECK-NEXT: [[GEP_9:%.*]] = getelementptr inbounds i8, i8* [[PTR]], i8 9
; CHECK-NEXT: [[L_9:%.*]] = load i8, i8* [[GEP_9]], align 1
; CHECK-NEXT: [[CMP_9:%.*]] = icmp ugt i8 [[L_9]], -1
@ -471,25 +437,19 @@ entry:
define void @select_uniform_ugt_4xi16(i16* %ptr, i16 %x) {
; CHECK-LABEL: @select_uniform_ugt_4xi16(
; CHECK-NEXT: entry:
; CHECK-NEXT: [[L_0:%.*]] = load i16, i16* [[PTR:%.*]], align 2
; CHECK-NEXT: [[CMP_0:%.*]] = icmp ugt i16 [[L_0]], 16383
; CHECK-NEXT: [[S_0:%.*]] = select i1 [[CMP_0]], i16 [[L_0]], i16 [[X:%.*]]
; CHECK-NEXT: store i16 [[S_0]], i16* [[PTR]], align 2
; CHECK-NEXT: [[GEP_1:%.*]] = getelementptr inbounds i16, i16* [[PTR]], i16 1
; CHECK-NEXT: [[L_1:%.*]] = load i16, i16* [[GEP_1]], align 2
; CHECK-NEXT: [[CMP_1:%.*]] = icmp ugt i16 [[L_1]], 16383
; CHECK-NEXT: [[S_1:%.*]] = select i1 [[CMP_1]], i16 [[L_1]], i16 [[X]]
; CHECK-NEXT: store i16 [[S_1]], i16* [[GEP_1]], align 2
; CHECK-NEXT: [[GEP_1:%.*]] = getelementptr inbounds i16, i16* [[PTR:%.*]], i16 1
; CHECK-NEXT: [[GEP_2:%.*]] = getelementptr inbounds i16, i16* [[PTR]], i16 2
; CHECK-NEXT: [[L_2:%.*]] = load i16, i16* [[GEP_2]], align 2
; CHECK-NEXT: [[CMP_2:%.*]] = icmp ugt i16 [[L_2]], 16383
; CHECK-NEXT: [[S_2:%.*]] = select i1 [[CMP_2]], i16 [[L_2]], i16 [[X]]
; CHECK-NEXT: store i16 [[S_2]], i16* [[GEP_2]], align 2
; CHECK-NEXT: [[GEP_3:%.*]] = getelementptr inbounds i16, i16* [[PTR]], i16 3
; CHECK-NEXT: [[L_3:%.*]] = load i16, i16* [[GEP_3]], align 2
; CHECK-NEXT: [[CMP_3:%.*]] = icmp ugt i16 [[L_3]], 16383
; CHECK-NEXT: [[S_3:%.*]] = select i1 [[CMP_3]], i16 [[L_3]], i16 [[X]]
; CHECK-NEXT: store i16 [[S_3]], i16* [[GEP_3]], align 2
; CHECK-NEXT: [[TMP0:%.*]] = bitcast i16* [[PTR]] to <4 x i16>*
; CHECK-NEXT: [[TMP1:%.*]] = load <4 x i16>, <4 x i16>* [[TMP0]], align 2
; CHECK-NEXT: [[TMP2:%.*]] = icmp ugt <4 x i16> [[TMP1]], <i16 16383, i16 16383, i16 16383, i16 16383>
; CHECK-NEXT: [[TMP3:%.*]] = insertelement <4 x i16> undef, i16 [[X:%.*]], i32 0
; CHECK-NEXT: [[TMP4:%.*]] = insertelement <4 x i16> [[TMP3]], i16 [[X]], i32 1
; CHECK-NEXT: [[TMP5:%.*]] = insertelement <4 x i16> [[TMP4]], i16 [[X]], i32 2
; CHECK-NEXT: [[TMP6:%.*]] = insertelement <4 x i16> [[TMP5]], i16 [[X]], i32 3
; CHECK-NEXT: [[TMP7:%.*]] = select <4 x i1> [[TMP2]], <4 x i16> [[TMP1]], <4 x i16> [[TMP6]]
; CHECK-NEXT: [[TMP8:%.*]] = bitcast i16* [[PTR]] to <4 x i16>*
; CHECK-NEXT: store <4 x i16> [[TMP7]], <4 x i16>* [[TMP8]], align 2
; CHECK-NEXT: ret void
;
entry:
@ -522,45 +482,27 @@ entry:
define void @select_uniform_ult_8xi16(i16* %ptr, i16 %x) {
; CHECK-LABEL: @select_uniform_ult_8xi16(
; CHECK-NEXT: entry:
; CHECK-NEXT: [[L_0:%.*]] = load i16, i16* [[PTR:%.*]], align 2
; CHECK-NEXT: [[CMP_0:%.*]] = icmp ult i16 [[L_0]], 16383
; CHECK-NEXT: [[S_0:%.*]] = select i1 [[CMP_0]], i16 [[L_0]], i16 [[X:%.*]]
; CHECK-NEXT: store i16 [[S_0]], i16* [[PTR]], align 2
; CHECK-NEXT: [[GEP_1:%.*]] = getelementptr inbounds i16, i16* [[PTR]], i16 1
; CHECK-NEXT: [[L_1:%.*]] = load i16, i16* [[GEP_1]], align 2
; CHECK-NEXT: [[CMP_1:%.*]] = icmp ult i16 [[L_1]], 16383
; CHECK-NEXT: [[S_1:%.*]] = select i1 [[CMP_1]], i16 [[L_1]], i16 [[X]]
; CHECK-NEXT: store i16 [[S_1]], i16* [[GEP_1]], align 2
; CHECK-NEXT: [[GEP_1:%.*]] = getelementptr inbounds i16, i16* [[PTR:%.*]], i16 1
; CHECK-NEXT: [[GEP_2:%.*]] = getelementptr inbounds i16, i16* [[PTR]], i16 2
; CHECK-NEXT: [[L_2:%.*]] = load i16, i16* [[GEP_2]], align 2
; CHECK-NEXT: [[CMP_2:%.*]] = icmp ult i16 [[L_2]], 16383
; CHECK-NEXT: [[S_2:%.*]] = select i1 [[CMP_2]], i16 [[L_2]], i16 [[X]]
; CHECK-NEXT: store i16 [[S_2]], i16* [[GEP_2]], align 2
; CHECK-NEXT: [[GEP_3:%.*]] = getelementptr inbounds i16, i16* [[PTR]], i16 3
; CHECK-NEXT: [[L_3:%.*]] = load i16, i16* [[GEP_3]], align 2
; CHECK-NEXT: [[CMP_3:%.*]] = icmp ult i16 [[L_3]], 16383
; CHECK-NEXT: [[S_3:%.*]] = select i1 [[CMP_3]], i16 [[L_3]], i16 [[X]]
; CHECK-NEXT: store i16 [[S_3]], i16* [[GEP_3]], align 2
; CHECK-NEXT: [[GEP_4:%.*]] = getelementptr inbounds i16, i16* [[PTR]], i16 4
; CHECK-NEXT: [[L_4:%.*]] = load i16, i16* [[GEP_4]], align 2
; CHECK-NEXT: [[CMP_4:%.*]] = icmp ult i16 [[L_4]], 16383
; CHECK-NEXT: [[S_4:%.*]] = select i1 [[CMP_4]], i16 [[L_4]], i16 [[X]]
; CHECK-NEXT: store i16 [[S_4]], i16* [[GEP_4]], align 2
; CHECK-NEXT: [[GEP_5:%.*]] = getelementptr inbounds i16, i16* [[PTR]], i16 5
; CHECK-NEXT: [[L_5:%.*]] = load i16, i16* [[GEP_5]], align 2
; CHECK-NEXT: [[CMP_5:%.*]] = icmp ult i16 [[L_5]], 16383
; CHECK-NEXT: [[S_5:%.*]] = select i1 [[CMP_5]], i16 [[L_5]], i16 [[X]]
; CHECK-NEXT: store i16 [[S_5]], i16* [[GEP_5]], align 2
; CHECK-NEXT: [[GEP_6:%.*]] = getelementptr inbounds i16, i16* [[PTR]], i16 6
; CHECK-NEXT: [[L_6:%.*]] = load i16, i16* [[GEP_6]], align 2
; CHECK-NEXT: [[CMP_6:%.*]] = icmp ult i16 [[L_6]], 16383
; CHECK-NEXT: [[S_6:%.*]] = select i1 [[CMP_6]], i16 [[L_6]], i16 [[X]]
; CHECK-NEXT: store i16 [[S_6]], i16* [[GEP_6]], align 2
; CHECK-NEXT: [[GEP_7:%.*]] = getelementptr inbounds i16, i16* [[PTR]], i16 7
; CHECK-NEXT: [[L_7:%.*]] = load i16, i16* [[GEP_7]], align 2
; CHECK-NEXT: [[CMP_7:%.*]] = icmp ult i16 [[L_7]], 16383
; CHECK-NEXT: [[S_7:%.*]] = select i1 [[CMP_7]], i16 [[L_7]], i16 [[X]]
; CHECK-NEXT: store i16 [[S_7]], i16* [[GEP_7]], align 2
; CHECK-NEXT: [[TMP0:%.*]] = bitcast i16* [[PTR]] to <8 x i16>*
; CHECK-NEXT: [[TMP1:%.*]] = load <8 x i16>, <8 x i16>* [[TMP0]], align 2
; CHECK-NEXT: [[TMP2:%.*]] = icmp ult <8 x i16> [[TMP1]], <i16 16383, i16 16383, i16 16383, i16 16383, i16 16383, i16 16383, i16 16383, i16 16383>
; CHECK-NEXT: [[TMP3:%.*]] = insertelement <8 x i16> undef, i16 [[X:%.*]], i32 0
; CHECK-NEXT: [[TMP4:%.*]] = insertelement <8 x i16> [[TMP3]], i16 [[X]], i32 1
; CHECK-NEXT: [[TMP5:%.*]] = insertelement <8 x i16> [[TMP4]], i16 [[X]], i32 2
; CHECK-NEXT: [[TMP6:%.*]] = insertelement <8 x i16> [[TMP5]], i16 [[X]], i32 3
; CHECK-NEXT: [[TMP7:%.*]] = insertelement <8 x i16> [[TMP6]], i16 [[X]], i32 4
; CHECK-NEXT: [[TMP8:%.*]] = insertelement <8 x i16> [[TMP7]], i16 [[X]], i32 5
; CHECK-NEXT: [[TMP9:%.*]] = insertelement <8 x i16> [[TMP8]], i16 [[X]], i32 6
; CHECK-NEXT: [[TMP10:%.*]] = insertelement <8 x i16> [[TMP9]], i16 [[X]], i32 7
; CHECK-NEXT: [[TMP11:%.*]] = select <8 x i1> [[TMP2]], <8 x i16> [[TMP1]], <8 x i16> [[TMP10]]
; CHECK-NEXT: [[TMP12:%.*]] = bitcast i16* [[PTR]] to <8 x i16>*
; CHECK-NEXT: store <8 x i16> [[TMP11]], <8 x i16>* [[TMP12]], align 2
; CHECK-NEXT: ret void
;
entry:
@ -616,15 +558,15 @@ entry:
define void @select_uniform_eq_2xi32(i32* %ptr, i32 %x) {
; CHECK-LABEL: @select_uniform_eq_2xi32(
; CHECK-NEXT: entry:
; CHECK-NEXT: [[L_0:%.*]] = load i32, i32* [[PTR:%.*]], align 4
; CHECK-NEXT: [[CMP_0:%.*]] = icmp eq i32 [[L_0]], 16383
; CHECK-NEXT: [[S_0:%.*]] = select i1 [[CMP_0]], i32 [[L_0]], i32 [[X:%.*]]
; CHECK-NEXT: store i32 [[S_0]], i32* [[PTR]], align 2
; CHECK-NEXT: [[GEP_1:%.*]] = getelementptr inbounds i32, i32* [[PTR]], i32 1
; CHECK-NEXT: [[L_1:%.*]] = load i32, i32* [[GEP_1]], align 4
; CHECK-NEXT: [[CMP_1:%.*]] = icmp eq i32 [[L_1]], 16383
; CHECK-NEXT: [[S_1:%.*]] = select i1 [[CMP_1]], i32 [[L_1]], i32 [[X]]
; CHECK-NEXT: store i32 [[S_1]], i32* [[GEP_1]], align 2
; CHECK-NEXT: [[GEP_1:%.*]] = getelementptr inbounds i32, i32* [[PTR:%.*]], i32 1
; CHECK-NEXT: [[TMP0:%.*]] = bitcast i32* [[PTR]] to <2 x i32>*
; CHECK-NEXT: [[TMP1:%.*]] = load <2 x i32>, <2 x i32>* [[TMP0]], align 4
; CHECK-NEXT: [[TMP2:%.*]] = icmp eq <2 x i32> [[TMP1]], <i32 16383, i32 16383>
; CHECK-NEXT: [[TMP3:%.*]] = insertelement <2 x i32> undef, i32 [[X:%.*]], i32 0
; CHECK-NEXT: [[TMP4:%.*]] = insertelement <2 x i32> [[TMP3]], i32 [[X]], i32 1
; CHECK-NEXT: [[TMP5:%.*]] = select <2 x i1> [[TMP2]], <2 x i32> [[TMP1]], <2 x i32> [[TMP4]]
; CHECK-NEXT: [[TMP6:%.*]] = bitcast i32* [[PTR]] to <2 x i32>*
; CHECK-NEXT: store <2 x i32> [[TMP5]], <2 x i32>* [[TMP6]], align 2
; CHECK-NEXT: ret void
;
entry:
@ -645,25 +587,19 @@ entry:
define void @select_uniform_eq_4xi32(i32* %ptr, i32 %x) {
; CHECK-LABEL: @select_uniform_eq_4xi32(
; CHECK-NEXT: entry:
; CHECK-NEXT: [[L_0:%.*]] = load i32, i32* [[PTR:%.*]], align 4
; CHECK-NEXT: [[CMP_0:%.*]] = icmp eq i32 [[L_0]], 16383
; CHECK-NEXT: [[S_0:%.*]] = select i1 [[CMP_0]], i32 [[L_0]], i32 [[X:%.*]]
; CHECK-NEXT: store i32 [[S_0]], i32* [[PTR]], align 2
; CHECK-NEXT: [[GEP_1:%.*]] = getelementptr inbounds i32, i32* [[PTR]], i32 1
; CHECK-NEXT: [[L_1:%.*]] = load i32, i32* [[GEP_1]], align 4
; CHECK-NEXT: [[CMP_1:%.*]] = icmp eq i32 [[L_1]], 16383
; CHECK-NEXT: [[S_1:%.*]] = select i1 [[CMP_1]], i32 [[L_1]], i32 [[X]]
; CHECK-NEXT: store i32 [[S_1]], i32* [[GEP_1]], align 2
; CHECK-NEXT: [[GEP_1:%.*]] = getelementptr inbounds i32, i32* [[PTR:%.*]], i32 1
; CHECK-NEXT: [[GEP_2:%.*]] = getelementptr inbounds i32, i32* [[PTR]], i32 2
; CHECK-NEXT: [[L_2:%.*]] = load i32, i32* [[GEP_2]], align 4
; CHECK-NEXT: [[CMP_2:%.*]] = icmp eq i32 [[L_2]], 16383
; CHECK-NEXT: [[S_2:%.*]] = select i1 [[CMP_2]], i32 [[L_2]], i32 [[X]]
; CHECK-NEXT: store i32 [[S_2]], i32* [[GEP_2]], align 2
; CHECK-NEXT: [[GEP_3:%.*]] = getelementptr inbounds i32, i32* [[PTR]], i32 3
; CHECK-NEXT: [[L_3:%.*]] = load i32, i32* [[GEP_3]], align 4
; CHECK-NEXT: [[CMP_3:%.*]] = icmp eq i32 [[L_3]], 16383
; CHECK-NEXT: [[S_3:%.*]] = select i1 [[CMP_3]], i32 [[L_3]], i32 [[X]]
; CHECK-NEXT: store i32 [[S_3]], i32* [[GEP_3]], align 2
; CHECK-NEXT: [[TMP0:%.*]] = bitcast i32* [[PTR]] to <4 x i32>*
; CHECK-NEXT: [[TMP1:%.*]] = load <4 x i32>, <4 x i32>* [[TMP0]], align 4
; CHECK-NEXT: [[TMP2:%.*]] = icmp eq <4 x i32> [[TMP1]], <i32 16383, i32 16383, i32 16383, i32 16383>
; CHECK-NEXT: [[TMP3:%.*]] = insertelement <4 x i32> undef, i32 [[X:%.*]], i32 0
; CHECK-NEXT: [[TMP4:%.*]] = insertelement <4 x i32> [[TMP3]], i32 [[X]], i32 1
; CHECK-NEXT: [[TMP5:%.*]] = insertelement <4 x i32> [[TMP4]], i32 [[X]], i32 2
; CHECK-NEXT: [[TMP6:%.*]] = insertelement <4 x i32> [[TMP5]], i32 [[X]], i32 3
; CHECK-NEXT: [[TMP7:%.*]] = select <4 x i1> [[TMP2]], <4 x i32> [[TMP1]], <4 x i32> [[TMP6]]
; CHECK-NEXT: [[TMP8:%.*]] = bitcast i32* [[PTR]] to <4 x i32>*
; CHECK-NEXT: store <4 x i32> [[TMP7]], <4 x i32>* [[TMP8]], align 2
; CHECK-NEXT: ret void
;
entry:
@ -695,15 +631,15 @@ entry:
define void @select_uniform_ne_2xi64(i64* %ptr, i64 %x) {
; CHECK-LABEL: @select_uniform_ne_2xi64(
; CHECK-NEXT: entry:
; CHECK-NEXT: [[L_0:%.*]] = load i64, i64* [[PTR:%.*]], align 8
; CHECK-NEXT: [[CMP_0:%.*]] = icmp ne i64 [[L_0]], 16383
; CHECK-NEXT: [[S_0:%.*]] = select i1 [[CMP_0]], i64 [[L_0]], i64 [[X:%.*]]
; CHECK-NEXT: store i64 [[S_0]], i64* [[PTR]], align 2
; CHECK-NEXT: [[GEP_1:%.*]] = getelementptr inbounds i64, i64* [[PTR]], i64 1
; CHECK-NEXT: [[L_1:%.*]] = load i64, i64* [[GEP_1]], align 8
; CHECK-NEXT: [[CMP_1:%.*]] = icmp ne i64 [[L_1]], 16383
; CHECK-NEXT: [[S_1:%.*]] = select i1 [[CMP_1]], i64 [[L_1]], i64 [[X]]
; CHECK-NEXT: store i64 [[S_1]], i64* [[GEP_1]], align 2
; CHECK-NEXT: [[GEP_1:%.*]] = getelementptr inbounds i64, i64* [[PTR:%.*]], i64 1
; CHECK-NEXT: [[TMP0:%.*]] = bitcast i64* [[PTR]] to <2 x i64>*
; CHECK-NEXT: [[TMP1:%.*]] = load <2 x i64>, <2 x i64>* [[TMP0]], align 8
; CHECK-NEXT: [[TMP2:%.*]] = icmp ne <2 x i64> [[TMP1]], <i64 16383, i64 16383>
; CHECK-NEXT: [[TMP3:%.*]] = insertelement <2 x i64> undef, i64 [[X:%.*]], i32 0
; CHECK-NEXT: [[TMP4:%.*]] = insertelement <2 x i64> [[TMP3]], i64 [[X]], i32 1
; CHECK-NEXT: [[TMP5:%.*]] = select <2 x i1> [[TMP2]], <2 x i64> [[TMP1]], <2 x i64> [[TMP4]]
; CHECK-NEXT: [[TMP6:%.*]] = bitcast i64* [[PTR]] to <2 x i64>*
; CHECK-NEXT: store <2 x i64> [[TMP5]], <2 x i64>* [[TMP6]], align 2
; CHECK-NEXT: ret void
;
entry: