forked from OSchip/llvm-project
[SLP] Pass VecPred argument to getCmpSelInstrCost.
Check if all compares in VL have the same predicate and pass it to getCmpSelInstrCost, to improve cost-modeling on targets that only support compare/select combinations for certain uniform predicates. This leads to additional vectorization in some cases ``` Same hash: 217 (filtered out) Remaining: 19 Metric: SLP.NumVectorInstructions Program base slp2 diff test-suite...marks/SciMark2-C/scimark2.test 11.00 26.00 136.4% test-suite...T2006/445.gobmk/445.gobmk.test 79.00 135.00 70.9% test-suite...ediabench/gsm/toast/toast.test 54.00 71.00 31.5% test-suite...telecomm-gsm/telecomm-gsm.test 54.00 71.00 31.5% test-suite...CI_Purple/SMG2000/smg2000.test 426.00 542.00 27.2% test-suite...ch/g721/g721encode/encode.test 30.00 24.00 -20.0% test-suite...000/186.crafty/186.crafty.test 116.00 138.00 19.0% test-suite...ications/JM/ldecod/ldecod.test 697.00 765.00 9.8% test-suite...6/464.h264ref/464.h264ref.test 822.00 886.00 7.8% test-suite...chmarks/MallocBench/gs/gs.test 154.00 162.00 5.2% test-suite...nsumer-lame/consumer-lame.test 621.00 651.00 4.8% test-suite...lications/ClamAV/clamscan.test 223.00 231.00 3.6% test-suite...marks/7zip/7zip-benchmark.test 680.00 695.00 2.2% test-suite...CFP2000/177.mesa/177.mesa.test 2121.00 2129.00 0.4% test-suite...:: External/Povray/povray.test 2406.00 2412.00 0.2% test-suite...TimberWolfMC/timberwolfmc.test 634.00 634.00 0.0% test-suite...CFP2006/433.milc/433.milc.test 1036.00 1036.00 0.0% test-suite.../Benchmarks/nbench/nbench.test 321.00 321.00 0.0% test-suite...ctions-flt/Reductions-flt.test NaN 5.00 nan% ``` Reviewed By: RKSimon Differential Revision: https://reviews.llvm.org/D90124
This commit is contained in:
parent
3bdeb2ac2e
commit
d9cbf39a37
|
@ -3547,9 +3547,26 @@ int BoUpSLP::getEntryCost(TreeEntry *E) {
|
|||
}
|
||||
auto *MaskTy = FixedVectorType::get(Builder.getInt1Ty(), VL.size());
|
||||
int ScalarCost = VecTy->getNumElements() * ScalarEltCost;
|
||||
int VecCost =
|
||||
TTI->getCmpSelInstrCost(E->getOpcode(), VecTy, MaskTy,
|
||||
CmpInst::BAD_ICMP_PREDICATE, CostKind, VL0);
|
||||
|
||||
// Check if all entries in VL are either compares or selects with compares
|
||||
// as condition that have the same predicates.
|
||||
CmpInst::Predicate VecPred = CmpInst::BAD_ICMP_PREDICATE;
|
||||
bool First = true;
|
||||
for (auto *V : VL) {
|
||||
CmpInst::Predicate CurrentPred;
|
||||
auto MatchCmp = m_Cmp(CurrentPred, m_Value(), m_Value());
|
||||
if ((!match(V, m_Select(MatchCmp, m_Value(), m_Value())) &&
|
||||
!match(V, MatchCmp)) ||
|
||||
(!First && VecPred != CurrentPred)) {
|
||||
VecPred = CmpInst::BAD_ICMP_PREDICATE;
|
||||
break;
|
||||
}
|
||||
First = false;
|
||||
VecPred = CurrentPred;
|
||||
}
|
||||
|
||||
int VecCost = TTI->getCmpSelInstrCost(E->getOpcode(), VecTy, MaskTy,
|
||||
VecPred, CostKind, VL0);
|
||||
// Check if it is possible and profitable to use min/max for selects in
|
||||
// VL.
|
||||
//
|
||||
|
|
|
@ -15,7 +15,7 @@ target triple = "aarch64--linux"
|
|||
; YAML-NEXT: Function: test_select
|
||||
; YAML-NEXT: Args:
|
||||
; YAML-NEXT: - String: 'Vectorized horizontal reduction with cost '
|
||||
; YAML-NEXT: - Cost: '-8'
|
||||
; YAML-NEXT: - Cost: '-20'
|
||||
; YAML-NEXT: - String: ' and with tree size '
|
||||
; YAML-NEXT: - TreeSize: '8'
|
||||
|
||||
|
@ -244,7 +244,7 @@ for.end: ; preds = %for.end.loopexit, %
|
|||
; YAML-NEXT: Function: test_unrolled_select
|
||||
; YAML-NEXT: Args:
|
||||
; YAML-NEXT: - String: 'Vectorized horizontal reduction with cost '
|
||||
; YAML-NEXT: - Cost: '-31'
|
||||
; YAML-NEXT: - Cost: '-37'
|
||||
; YAML-NEXT: - String: ' and with tree size '
|
||||
; YAML-NEXT: - TreeSize: '10'
|
||||
|
||||
|
|
|
@ -165,19 +165,18 @@ entry:
|
|||
ret void
|
||||
}
|
||||
|
||||
; There is no <2 x i64> version of umin.
|
||||
; There is no <2 x i64> version of umin, but we can efficiently lower
|
||||
; compare/select pairs with uniform predicates.
|
||||
define void @select_umin_2xi64(i64* %ptr, i64 %x) {
|
||||
; CHECK-LABEL: @select_umin_2xi64(
|
||||
; CHECK-NEXT: entry:
|
||||
; CHECK-NEXT: [[L_0:%.*]] = load i64, i64* [[PTR:%.*]], align 8
|
||||
; CHECK-NEXT: [[CMP_0:%.*]] = icmp ult i64 [[L_0]], 16383
|
||||
; CHECK-NEXT: [[S_0:%.*]] = select i1 [[CMP_0]], i64 [[L_0]], i64 16383
|
||||
; CHECK-NEXT: store i64 [[S_0]], i64* [[PTR]], align 4
|
||||
; CHECK-NEXT: [[GEP_1:%.*]] = getelementptr inbounds i64, i64* [[PTR]], i64 1
|
||||
; CHECK-NEXT: [[L_1:%.*]] = load i64, i64* [[GEP_1]], align 8
|
||||
; CHECK-NEXT: [[CMP_1:%.*]] = icmp ult i64 [[L_1]], 16383
|
||||
; CHECK-NEXT: [[S_1:%.*]] = select i1 [[CMP_1]], i64 [[L_1]], i64 16383
|
||||
; CHECK-NEXT: store i64 [[S_1]], i64* [[GEP_1]], align 4
|
||||
; CHECK-NEXT: [[GEP_1:%.*]] = getelementptr inbounds i64, i64* [[PTR:%.*]], i64 1
|
||||
; CHECK-NEXT: [[TMP0:%.*]] = bitcast i64* [[PTR]] to <2 x i64>*
|
||||
; CHECK-NEXT: [[TMP1:%.*]] = load <2 x i64>, <2 x i64>* [[TMP0]], align 8
|
||||
; CHECK-NEXT: [[TMP2:%.*]] = icmp ult <2 x i64> [[TMP1]], <i64 16383, i64 16383>
|
||||
; CHECK-NEXT: [[TMP3:%.*]] = select <2 x i1> [[TMP2]], <2 x i64> [[TMP1]], <2 x i64> <i64 16383, i64 16383>
|
||||
; CHECK-NEXT: [[TMP4:%.*]] = bitcast i64* [[PTR]] to <2 x i64>*
|
||||
; CHECK-NEXT: store <2 x i64> [[TMP3]], <2 x i64>* [[TMP4]], align 4
|
||||
; CHECK-NEXT: ret void
|
||||
;
|
||||
entry:
|
||||
|
@ -305,19 +304,18 @@ entry:
|
|||
ret void
|
||||
}
|
||||
|
||||
; There is no <2 x i64> version of umin.
|
||||
; There is no <2 x i64> version of umin, but we can efficiently lower
|
||||
; compare/select pairs with uniform predicates.
|
||||
define void @select_umin_ule_2xi64(i64* %ptr, i64 %x) {
|
||||
; CHECK-LABEL: @select_umin_ule_2xi64(
|
||||
; CHECK-NEXT: entry:
|
||||
; CHECK-NEXT: [[L_0:%.*]] = load i64, i64* [[PTR:%.*]], align 8
|
||||
; CHECK-NEXT: [[CMP_0:%.*]] = icmp ule i64 [[L_0]], 16383
|
||||
; CHECK-NEXT: [[S_0:%.*]] = select i1 [[CMP_0]], i64 [[L_0]], i64 16383
|
||||
; CHECK-NEXT: store i64 [[S_0]], i64* [[PTR]], align 4
|
||||
; CHECK-NEXT: [[GEP_1:%.*]] = getelementptr inbounds i64, i64* [[PTR]], i64 1
|
||||
; CHECK-NEXT: [[L_1:%.*]] = load i64, i64* [[GEP_1]], align 8
|
||||
; CHECK-NEXT: [[CMP_1:%.*]] = icmp ule i64 [[L_1]], 16383
|
||||
; CHECK-NEXT: [[S_1:%.*]] = select i1 [[CMP_1]], i64 [[L_1]], i64 16383
|
||||
; CHECK-NEXT: store i64 [[S_1]], i64* [[GEP_1]], align 4
|
||||
; CHECK-NEXT: [[GEP_1:%.*]] = getelementptr inbounds i64, i64* [[PTR:%.*]], i64 1
|
||||
; CHECK-NEXT: [[TMP0:%.*]] = bitcast i64* [[PTR]] to <2 x i64>*
|
||||
; CHECK-NEXT: [[TMP1:%.*]] = load <2 x i64>, <2 x i64>* [[TMP0]], align 8
|
||||
; CHECK-NEXT: [[TMP2:%.*]] = icmp ule <2 x i64> [[TMP1]], <i64 16383, i64 16383>
|
||||
; CHECK-NEXT: [[TMP3:%.*]] = select <2 x i1> [[TMP2]], <2 x i64> [[TMP1]], <2 x i64> <i64 16383, i64 16383>
|
||||
; CHECK-NEXT: [[TMP4:%.*]] = bitcast i64* [[PTR]] to <2 x i64>*
|
||||
; CHECK-NEXT: store <2 x i64> [[TMP3]], <2 x i64>* [[TMP4]], align 4
|
||||
; CHECK-NEXT: ret void
|
||||
;
|
||||
entry:
|
||||
|
@ -444,19 +442,18 @@ entry:
|
|||
ret void
|
||||
}
|
||||
|
||||
; There is no <2 x i64> version of smin.
|
||||
; There is no <2 x i64> version of smin, but we can efficiently lower
|
||||
; compare/select pairs with uniform predicates.
|
||||
define void @select_smin_2xi64(i64* %ptr, i64 %x) {
|
||||
; CHECK-LABEL: @select_smin_2xi64(
|
||||
; CHECK-NEXT: entry:
|
||||
; CHECK-NEXT: [[L_0:%.*]] = load i64, i64* [[PTR:%.*]], align 8
|
||||
; CHECK-NEXT: [[CMP_0:%.*]] = icmp slt i64 [[L_0]], 16383
|
||||
; CHECK-NEXT: [[S_0:%.*]] = select i1 [[CMP_0]], i64 [[L_0]], i64 16383
|
||||
; CHECK-NEXT: store i64 [[S_0]], i64* [[PTR]], align 4
|
||||
; CHECK-NEXT: [[GEP_1:%.*]] = getelementptr inbounds i64, i64* [[PTR]], i64 1
|
||||
; CHECK-NEXT: [[L_1:%.*]] = load i64, i64* [[GEP_1]], align 8
|
||||
; CHECK-NEXT: [[CMP_1:%.*]] = icmp slt i64 [[L_1]], 16383
|
||||
; CHECK-NEXT: [[S_1:%.*]] = select i1 [[CMP_1]], i64 [[L_1]], i64 16383
|
||||
; CHECK-NEXT: store i64 [[S_1]], i64* [[GEP_1]], align 4
|
||||
; CHECK-NEXT: [[GEP_1:%.*]] = getelementptr inbounds i64, i64* [[PTR:%.*]], i64 1
|
||||
; CHECK-NEXT: [[TMP0:%.*]] = bitcast i64* [[PTR]] to <2 x i64>*
|
||||
; CHECK-NEXT: [[TMP1:%.*]] = load <2 x i64>, <2 x i64>* [[TMP0]], align 8
|
||||
; CHECK-NEXT: [[TMP2:%.*]] = icmp slt <2 x i64> [[TMP1]], <i64 16383, i64 16383>
|
||||
; CHECK-NEXT: [[TMP3:%.*]] = select <2 x i1> [[TMP2]], <2 x i64> [[TMP1]], <2 x i64> <i64 16383, i64 16383>
|
||||
; CHECK-NEXT: [[TMP4:%.*]] = bitcast i64* [[PTR]] to <2 x i64>*
|
||||
; CHECK-NEXT: store <2 x i64> [[TMP3]], <2 x i64>* [[TMP4]], align 4
|
||||
; CHECK-NEXT: ret void
|
||||
;
|
||||
entry:
|
||||
|
@ -583,19 +580,18 @@ entry:
|
|||
ret void
|
||||
}
|
||||
|
||||
; There is no <2 x i64> version of smin.
|
||||
; There is no <2 x i64> version of smin, but we can efficiently lower
|
||||
; compare/select pairs with uniform predicates.
|
||||
define void @select_smin_sle_2xi64(i64* %ptr, i64 %x) {
|
||||
; CHECK-LABEL: @select_smin_sle_2xi64(
|
||||
; CHECK-NEXT: entry:
|
||||
; CHECK-NEXT: [[L_0:%.*]] = load i64, i64* [[PTR:%.*]], align 8
|
||||
; CHECK-NEXT: [[CMP_0:%.*]] = icmp sle i64 [[L_0]], 16383
|
||||
; CHECK-NEXT: [[S_0:%.*]] = select i1 [[CMP_0]], i64 [[L_0]], i64 16383
|
||||
; CHECK-NEXT: store i64 [[S_0]], i64* [[PTR]], align 4
|
||||
; CHECK-NEXT: [[GEP_1:%.*]] = getelementptr inbounds i64, i64* [[PTR]], i64 1
|
||||
; CHECK-NEXT: [[L_1:%.*]] = load i64, i64* [[GEP_1]], align 8
|
||||
; CHECK-NEXT: [[CMP_1:%.*]] = icmp sle i64 [[L_1]], 16383
|
||||
; CHECK-NEXT: [[S_1:%.*]] = select i1 [[CMP_1]], i64 [[L_1]], i64 16383
|
||||
; CHECK-NEXT: store i64 [[S_1]], i64* [[GEP_1]], align 4
|
||||
; CHECK-NEXT: [[GEP_1:%.*]] = getelementptr inbounds i64, i64* [[PTR:%.*]], i64 1
|
||||
; CHECK-NEXT: [[TMP0:%.*]] = bitcast i64* [[PTR]] to <2 x i64>*
|
||||
; CHECK-NEXT: [[TMP1:%.*]] = load <2 x i64>, <2 x i64>* [[TMP0]], align 8
|
||||
; CHECK-NEXT: [[TMP2:%.*]] = icmp sle <2 x i64> [[TMP1]], <i64 16383, i64 16383>
|
||||
; CHECK-NEXT: [[TMP3:%.*]] = select <2 x i1> [[TMP2]], <2 x i64> [[TMP1]], <2 x i64> <i64 16383, i64 16383>
|
||||
; CHECK-NEXT: [[TMP4:%.*]] = bitcast i64* [[PTR]] to <2 x i64>*
|
||||
; CHECK-NEXT: store <2 x i64> [[TMP3]], <2 x i64>* [[TMP4]], align 4
|
||||
; CHECK-NEXT: ret void
|
||||
;
|
||||
entry:
|
||||
|
@ -721,19 +717,18 @@ entry:
|
|||
ret void
|
||||
}
|
||||
|
||||
; There is no <2 x i64> version of umax.
|
||||
; There is no <2 x i64> version of umax, but we can efficiently lower
|
||||
; compare/select pairs with uniform predicates.
|
||||
define void @select_umax_2xi64(i64* %ptr, i64 %x) {
|
||||
; CHECK-LABEL: @select_umax_2xi64(
|
||||
; CHECK-NEXT: entry:
|
||||
; CHECK-NEXT: [[L_0:%.*]] = load i64, i64* [[PTR:%.*]], align 8
|
||||
; CHECK-NEXT: [[CMP_0:%.*]] = icmp ugt i64 [[L_0]], 16383
|
||||
; CHECK-NEXT: [[S_0:%.*]] = select i1 [[CMP_0]], i64 [[L_0]], i64 16383
|
||||
; CHECK-NEXT: store i64 [[S_0]], i64* [[PTR]], align 4
|
||||
; CHECK-NEXT: [[GEP_1:%.*]] = getelementptr inbounds i64, i64* [[PTR]], i64 1
|
||||
; CHECK-NEXT: [[L_1:%.*]] = load i64, i64* [[GEP_1]], align 8
|
||||
; CHECK-NEXT: [[CMP_1:%.*]] = icmp ugt i64 [[L_1]], 16383
|
||||
; CHECK-NEXT: [[S_1:%.*]] = select i1 [[CMP_1]], i64 [[L_1]], i64 16383
|
||||
; CHECK-NEXT: store i64 [[S_1]], i64* [[GEP_1]], align 4
|
||||
; CHECK-NEXT: [[GEP_1:%.*]] = getelementptr inbounds i64, i64* [[PTR:%.*]], i64 1
|
||||
; CHECK-NEXT: [[TMP0:%.*]] = bitcast i64* [[PTR]] to <2 x i64>*
|
||||
; CHECK-NEXT: [[TMP1:%.*]] = load <2 x i64>, <2 x i64>* [[TMP0]], align 8
|
||||
; CHECK-NEXT: [[TMP2:%.*]] = icmp ugt <2 x i64> [[TMP1]], <i64 16383, i64 16383>
|
||||
; CHECK-NEXT: [[TMP3:%.*]] = select <2 x i1> [[TMP2]], <2 x i64> [[TMP1]], <2 x i64> <i64 16383, i64 16383>
|
||||
; CHECK-NEXT: [[TMP4:%.*]] = bitcast i64* [[PTR]] to <2 x i64>*
|
||||
; CHECK-NEXT: store <2 x i64> [[TMP3]], <2 x i64>* [[TMP4]], align 4
|
||||
; CHECK-NEXT: ret void
|
||||
;
|
||||
entry:
|
||||
|
@ -860,19 +855,18 @@ entry:
|
|||
ret void
|
||||
}
|
||||
|
||||
; There is no <2 x i64> version of umax.
|
||||
; There is no <2 x i64> version of umax, but we can efficiently lower
|
||||
; compare/select pairs with uniform predicates.
|
||||
define void @select_umax_uge_2xi64(i64* %ptr, i64 %x) {
|
||||
; CHECK-LABEL: @select_umax_uge_2xi64(
|
||||
; CHECK-NEXT: entry:
|
||||
; CHECK-NEXT: [[L_0:%.*]] = load i64, i64* [[PTR:%.*]], align 8
|
||||
; CHECK-NEXT: [[CMP_0:%.*]] = icmp uge i64 [[L_0]], 16383
|
||||
; CHECK-NEXT: [[S_0:%.*]] = select i1 [[CMP_0]], i64 [[L_0]], i64 16383
|
||||
; CHECK-NEXT: store i64 [[S_0]], i64* [[PTR]], align 4
|
||||
; CHECK-NEXT: [[GEP_1:%.*]] = getelementptr inbounds i64, i64* [[PTR]], i64 1
|
||||
; CHECK-NEXT: [[L_1:%.*]] = load i64, i64* [[GEP_1]], align 8
|
||||
; CHECK-NEXT: [[CMP_1:%.*]] = icmp uge i64 [[L_1]], 16383
|
||||
; CHECK-NEXT: [[S_1:%.*]] = select i1 [[CMP_1]], i64 [[L_1]], i64 16383
|
||||
; CHECK-NEXT: store i64 [[S_1]], i64* [[GEP_1]], align 4
|
||||
; CHECK-NEXT: [[GEP_1:%.*]] = getelementptr inbounds i64, i64* [[PTR:%.*]], i64 1
|
||||
; CHECK-NEXT: [[TMP0:%.*]] = bitcast i64* [[PTR]] to <2 x i64>*
|
||||
; CHECK-NEXT: [[TMP1:%.*]] = load <2 x i64>, <2 x i64>* [[TMP0]], align 8
|
||||
; CHECK-NEXT: [[TMP2:%.*]] = icmp uge <2 x i64> [[TMP1]], <i64 16383, i64 16383>
|
||||
; CHECK-NEXT: [[TMP3:%.*]] = select <2 x i1> [[TMP2]], <2 x i64> [[TMP1]], <2 x i64> <i64 16383, i64 16383>
|
||||
; CHECK-NEXT: [[TMP4:%.*]] = bitcast i64* [[PTR]] to <2 x i64>*
|
||||
; CHECK-NEXT: store <2 x i64> [[TMP3]], <2 x i64>* [[TMP4]], align 4
|
||||
; CHECK-NEXT: ret void
|
||||
;
|
||||
entry:
|
||||
|
@ -999,19 +993,18 @@ entry:
|
|||
ret void
|
||||
}
|
||||
|
||||
; There is no <2 x i64> version of smax.
|
||||
; There is no <2 x i64> version of smax, but we can efficiently lower
|
||||
; compare/select pairs with uniform predicates.
|
||||
define void @select_smax_2xi64(i64* %ptr, i64 %x) {
|
||||
; CHECK-LABEL: @select_smax_2xi64(
|
||||
; CHECK-NEXT: entry:
|
||||
; CHECK-NEXT: [[L_0:%.*]] = load i64, i64* [[PTR:%.*]], align 8
|
||||
; CHECK-NEXT: [[CMP_0:%.*]] = icmp sgt i64 [[L_0]], 16383
|
||||
; CHECK-NEXT: [[S_0:%.*]] = select i1 [[CMP_0]], i64 [[L_0]], i64 16383
|
||||
; CHECK-NEXT: store i64 [[S_0]], i64* [[PTR]], align 4
|
||||
; CHECK-NEXT: [[GEP_1:%.*]] = getelementptr inbounds i64, i64* [[PTR]], i64 1
|
||||
; CHECK-NEXT: [[L_1:%.*]] = load i64, i64* [[GEP_1]], align 8
|
||||
; CHECK-NEXT: [[CMP_1:%.*]] = icmp sgt i64 [[L_1]], 16383
|
||||
; CHECK-NEXT: [[S_1:%.*]] = select i1 [[CMP_1]], i64 [[L_1]], i64 16383
|
||||
; CHECK-NEXT: store i64 [[S_1]], i64* [[GEP_1]], align 4
|
||||
; CHECK-NEXT: [[GEP_1:%.*]] = getelementptr inbounds i64, i64* [[PTR:%.*]], i64 1
|
||||
; CHECK-NEXT: [[TMP0:%.*]] = bitcast i64* [[PTR]] to <2 x i64>*
|
||||
; CHECK-NEXT: [[TMP1:%.*]] = load <2 x i64>, <2 x i64>* [[TMP0]], align 8
|
||||
; CHECK-NEXT: [[TMP2:%.*]] = icmp sgt <2 x i64> [[TMP1]], <i64 16383, i64 16383>
|
||||
; CHECK-NEXT: [[TMP3:%.*]] = select <2 x i1> [[TMP2]], <2 x i64> [[TMP1]], <2 x i64> <i64 16383, i64 16383>
|
||||
; CHECK-NEXT: [[TMP4:%.*]] = bitcast i64* [[PTR]] to <2 x i64>*
|
||||
; CHECK-NEXT: store <2 x i64> [[TMP3]], <2 x i64>* [[TMP4]], align 4
|
||||
; CHECK-NEXT: ret void
|
||||
;
|
||||
entry:
|
||||
|
@ -1139,19 +1132,18 @@ entry:
|
|||
ret void
|
||||
}
|
||||
|
||||
; There is no <2 x i64> version of smax.
|
||||
; There is no <2 x i64> version of smax, but we can efficiently lower
|
||||
; compare/select pairs with uniform predicates.
|
||||
define void @select_smax_sge_2xi64(i64* %ptr, i64 %x) {
|
||||
; CHECK-LABEL: @select_smax_sge_2xi64(
|
||||
; CHECK-NEXT: entry:
|
||||
; CHECK-NEXT: [[L_0:%.*]] = load i64, i64* [[PTR:%.*]], align 8
|
||||
; CHECK-NEXT: [[CMP_0:%.*]] = icmp sge i64 [[L_0]], 16383
|
||||
; CHECK-NEXT: [[S_0:%.*]] = select i1 [[CMP_0]], i64 [[L_0]], i64 16383
|
||||
; CHECK-NEXT: store i64 [[S_0]], i64* [[PTR]], align 4
|
||||
; CHECK-NEXT: [[GEP_1:%.*]] = getelementptr inbounds i64, i64* [[PTR]], i64 1
|
||||
; CHECK-NEXT: [[L_1:%.*]] = load i64, i64* [[GEP_1]], align 8
|
||||
; CHECK-NEXT: [[CMP_1:%.*]] = icmp sge i64 [[L_1]], 16383
|
||||
; CHECK-NEXT: [[S_1:%.*]] = select i1 [[CMP_1]], i64 [[L_1]], i64 16383
|
||||
; CHECK-NEXT: store i64 [[S_1]], i64* [[GEP_1]], align 4
|
||||
; CHECK-NEXT: [[GEP_1:%.*]] = getelementptr inbounds i64, i64* [[PTR:%.*]], i64 1
|
||||
; CHECK-NEXT: [[TMP0:%.*]] = bitcast i64* [[PTR]] to <2 x i64>*
|
||||
; CHECK-NEXT: [[TMP1:%.*]] = load <2 x i64>, <2 x i64>* [[TMP0]], align 8
|
||||
; CHECK-NEXT: [[TMP2:%.*]] = icmp sge <2 x i64> [[TMP1]], <i64 16383, i64 16383>
|
||||
; CHECK-NEXT: [[TMP3:%.*]] = select <2 x i1> [[TMP2]], <2 x i64> [[TMP1]], <2 x i64> <i64 16383, i64 16383>
|
||||
; CHECK-NEXT: [[TMP4:%.*]] = bitcast i64* [[PTR]] to <2 x i64>*
|
||||
; CHECK-NEXT: store <2 x i64> [[TMP3]], <2 x i64>* [[TMP4]], align 4
|
||||
; CHECK-NEXT: ret void
|
||||
;
|
||||
entry:
|
||||
|
|
|
@ -193,45 +193,27 @@ entry:
|
|||
define void @select_uniform_ugt_8xi8(i8* %ptr, i8 %x) {
|
||||
; CHECK-LABEL: @select_uniform_ugt_8xi8(
|
||||
; CHECK-NEXT: entry:
|
||||
; CHECK-NEXT: [[L_0:%.*]] = load i8, i8* [[PTR:%.*]], align 1
|
||||
; CHECK-NEXT: [[CMP_0:%.*]] = icmp ugt i8 [[L_0]], -1
|
||||
; CHECK-NEXT: [[S_0:%.*]] = select i1 [[CMP_0]], i8 [[L_0]], i8 [[X:%.*]]
|
||||
; CHECK-NEXT: store i8 [[S_0]], i8* [[PTR]], align 2
|
||||
; CHECK-NEXT: [[GEP_1:%.*]] = getelementptr inbounds i8, i8* [[PTR]], i8 1
|
||||
; CHECK-NEXT: [[L_1:%.*]] = load i8, i8* [[GEP_1]], align 1
|
||||
; CHECK-NEXT: [[CMP_1:%.*]] = icmp ugt i8 [[L_1]], -1
|
||||
; CHECK-NEXT: [[S_1:%.*]] = select i1 [[CMP_1]], i8 [[L_1]], i8 [[X]]
|
||||
; CHECK-NEXT: store i8 [[S_1]], i8* [[GEP_1]], align 2
|
||||
; CHECK-NEXT: [[GEP_1:%.*]] = getelementptr inbounds i8, i8* [[PTR:%.*]], i8 1
|
||||
; CHECK-NEXT: [[GEP_2:%.*]] = getelementptr inbounds i8, i8* [[PTR]], i8 2
|
||||
; CHECK-NEXT: [[L_2:%.*]] = load i8, i8* [[GEP_2]], align 1
|
||||
; CHECK-NEXT: [[CMP_2:%.*]] = icmp ugt i8 [[L_2]], -1
|
||||
; CHECK-NEXT: [[S_2:%.*]] = select i1 [[CMP_2]], i8 [[L_2]], i8 [[X]]
|
||||
; CHECK-NEXT: store i8 [[S_2]], i8* [[GEP_2]], align 2
|
||||
; CHECK-NEXT: [[GEP_3:%.*]] = getelementptr inbounds i8, i8* [[PTR]], i8 3
|
||||
; CHECK-NEXT: [[L_3:%.*]] = load i8, i8* [[GEP_3]], align 1
|
||||
; CHECK-NEXT: [[CMP_3:%.*]] = icmp ugt i8 [[L_3]], -1
|
||||
; CHECK-NEXT: [[S_3:%.*]] = select i1 [[CMP_3]], i8 [[L_3]], i8 [[X]]
|
||||
; CHECK-NEXT: store i8 [[S_3]], i8* [[GEP_3]], align 2
|
||||
; CHECK-NEXT: [[GEP_4:%.*]] = getelementptr inbounds i8, i8* [[PTR]], i8 4
|
||||
; CHECK-NEXT: [[L_4:%.*]] = load i8, i8* [[GEP_4]], align 1
|
||||
; CHECK-NEXT: [[CMP_4:%.*]] = icmp ugt i8 [[L_4]], -1
|
||||
; CHECK-NEXT: [[S_4:%.*]] = select i1 [[CMP_4]], i8 [[L_4]], i8 [[X]]
|
||||
; CHECK-NEXT: store i8 [[S_4]], i8* [[GEP_4]], align 2
|
||||
; CHECK-NEXT: [[GEP_5:%.*]] = getelementptr inbounds i8, i8* [[PTR]], i8 5
|
||||
; CHECK-NEXT: [[L_5:%.*]] = load i8, i8* [[GEP_5]], align 1
|
||||
; CHECK-NEXT: [[CMP_5:%.*]] = icmp ugt i8 [[L_5]], -1
|
||||
; CHECK-NEXT: [[S_5:%.*]] = select i1 [[CMP_5]], i8 [[L_5]], i8 [[X]]
|
||||
; CHECK-NEXT: store i8 [[S_5]], i8* [[GEP_5]], align 2
|
||||
; CHECK-NEXT: [[GEP_6:%.*]] = getelementptr inbounds i8, i8* [[PTR]], i8 6
|
||||
; CHECK-NEXT: [[L_6:%.*]] = load i8, i8* [[GEP_6]], align 1
|
||||
; CHECK-NEXT: [[CMP_6:%.*]] = icmp ugt i8 [[L_6]], -1
|
||||
; CHECK-NEXT: [[S_6:%.*]] = select i1 [[CMP_6]], i8 [[L_6]], i8 [[X]]
|
||||
; CHECK-NEXT: store i8 [[S_6]], i8* [[GEP_6]], align 2
|
||||
; CHECK-NEXT: [[GEP_7:%.*]] = getelementptr inbounds i8, i8* [[PTR]], i8 7
|
||||
; CHECK-NEXT: [[L_7:%.*]] = load i8, i8* [[GEP_7]], align 1
|
||||
; CHECK-NEXT: [[CMP_7:%.*]] = icmp ugt i8 [[L_7]], -1
|
||||
; CHECK-NEXT: [[S_7:%.*]] = select i1 [[CMP_7]], i8 [[L_7]], i8 [[X]]
|
||||
; CHECK-NEXT: store i8 [[S_7]], i8* [[GEP_7]], align 2
|
||||
; CHECK-NEXT: [[TMP0:%.*]] = bitcast i8* [[PTR]] to <8 x i8>*
|
||||
; CHECK-NEXT: [[TMP1:%.*]] = load <8 x i8>, <8 x i8>* [[TMP0]], align 1
|
||||
; CHECK-NEXT: [[TMP2:%.*]] = icmp ugt <8 x i8> [[TMP1]], <i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1>
|
||||
; CHECK-NEXT: [[TMP3:%.*]] = insertelement <8 x i8> undef, i8 [[X:%.*]], i32 0
|
||||
; CHECK-NEXT: [[TMP4:%.*]] = insertelement <8 x i8> [[TMP3]], i8 [[X]], i32 1
|
||||
; CHECK-NEXT: [[TMP5:%.*]] = insertelement <8 x i8> [[TMP4]], i8 [[X]], i32 2
|
||||
; CHECK-NEXT: [[TMP6:%.*]] = insertelement <8 x i8> [[TMP5]], i8 [[X]], i32 3
|
||||
; CHECK-NEXT: [[TMP7:%.*]] = insertelement <8 x i8> [[TMP6]], i8 [[X]], i32 4
|
||||
; CHECK-NEXT: [[TMP8:%.*]] = insertelement <8 x i8> [[TMP7]], i8 [[X]], i32 5
|
||||
; CHECK-NEXT: [[TMP9:%.*]] = insertelement <8 x i8> [[TMP8]], i8 [[X]], i32 6
|
||||
; CHECK-NEXT: [[TMP10:%.*]] = insertelement <8 x i8> [[TMP9]], i8 [[X]], i32 7
|
||||
; CHECK-NEXT: [[TMP11:%.*]] = select <8 x i1> [[TMP2]], <8 x i8> [[TMP1]], <8 x i8> [[TMP10]]
|
||||
; CHECK-NEXT: [[TMP12:%.*]] = bitcast i8* [[PTR]] to <8 x i8>*
|
||||
; CHECK-NEXT: store <8 x i8> [[TMP11]], <8 x i8>* [[TMP12]], align 2
|
||||
; CHECK-NEXT: ret void
|
||||
;
|
||||
entry:
|
||||
|
@ -287,50 +269,34 @@ entry:
|
|||
define void @select_uniform_ugt_16xi8(i8* %ptr, i8 %x) {
|
||||
; CHECK-LABEL: @select_uniform_ugt_16xi8(
|
||||
; CHECK-NEXT: entry:
|
||||
; CHECK-NEXT: [[L_0:%.*]] = load i8, i8* [[PTR:%.*]], align 1
|
||||
; CHECK-NEXT: [[CMP_0:%.*]] = icmp ugt i8 [[L_0]], -1
|
||||
; CHECK-NEXT: [[S_0:%.*]] = select i1 [[CMP_0]], i8 [[L_0]], i8 [[X:%.*]]
|
||||
; CHECK-NEXT: store i8 [[S_0]], i8* [[PTR]], align 2
|
||||
; CHECK-NEXT: [[GEP_1:%.*]] = getelementptr inbounds i8, i8* [[PTR]], i8 1
|
||||
; CHECK-NEXT: [[L_1:%.*]] = load i8, i8* [[GEP_1]], align 1
|
||||
; CHECK-NEXT: [[CMP_1:%.*]] = icmp ugt i8 [[L_1]], -1
|
||||
; CHECK-NEXT: [[S_1:%.*]] = select i1 [[CMP_1]], i8 [[L_1]], i8 [[X]]
|
||||
; CHECK-NEXT: store i8 [[S_1]], i8* [[GEP_1]], align 2
|
||||
; CHECK-NEXT: [[GEP_1:%.*]] = getelementptr inbounds i8, i8* [[PTR:%.*]], i8 1
|
||||
; CHECK-NEXT: [[GEP_2:%.*]] = getelementptr inbounds i8, i8* [[PTR]], i8 2
|
||||
; CHECK-NEXT: [[L_2:%.*]] = load i8, i8* [[GEP_2]], align 1
|
||||
; CHECK-NEXT: [[CMP_2:%.*]] = icmp ugt i8 [[L_2]], -1
|
||||
; CHECK-NEXT: [[S_2:%.*]] = select i1 [[CMP_2]], i8 [[L_2]], i8 [[X]]
|
||||
; CHECK-NEXT: store i8 [[S_2]], i8* [[GEP_2]], align 2
|
||||
; CHECK-NEXT: [[GEP_3:%.*]] = getelementptr inbounds i8, i8* [[PTR]], i8 3
|
||||
; CHECK-NEXT: [[L_3:%.*]] = load i8, i8* [[GEP_3]], align 1
|
||||
; CHECK-NEXT: [[CMP_3:%.*]] = icmp ugt i8 [[L_3]], -1
|
||||
; CHECK-NEXT: [[S_3:%.*]] = select i1 [[CMP_3]], i8 [[L_3]], i8 [[X]]
|
||||
; CHECK-NEXT: store i8 [[S_3]], i8* [[GEP_3]], align 2
|
||||
; CHECK-NEXT: [[GEP_4:%.*]] = getelementptr inbounds i8, i8* [[PTR]], i8 4
|
||||
; CHECK-NEXT: [[L_4:%.*]] = load i8, i8* [[GEP_4]], align 1
|
||||
; CHECK-NEXT: [[CMP_4:%.*]] = icmp ugt i8 [[L_4]], -1
|
||||
; CHECK-NEXT: [[S_4:%.*]] = select i1 [[CMP_4]], i8 [[L_4]], i8 [[X]]
|
||||
; CHECK-NEXT: store i8 [[S_4]], i8* [[GEP_4]], align 2
|
||||
; CHECK-NEXT: [[GEP_5:%.*]] = getelementptr inbounds i8, i8* [[PTR]], i8 5
|
||||
; CHECK-NEXT: [[L_5:%.*]] = load i8, i8* [[GEP_5]], align 1
|
||||
; CHECK-NEXT: [[CMP_5:%.*]] = icmp ugt i8 [[L_5]], -1
|
||||
; CHECK-NEXT: [[S_5:%.*]] = select i1 [[CMP_5]], i8 [[L_5]], i8 [[X]]
|
||||
; CHECK-NEXT: store i8 [[S_5]], i8* [[GEP_5]], align 2
|
||||
; CHECK-NEXT: [[GEP_6:%.*]] = getelementptr inbounds i8, i8* [[PTR]], i8 6
|
||||
; CHECK-NEXT: [[L_6:%.*]] = load i8, i8* [[GEP_6]], align 1
|
||||
; CHECK-NEXT: [[CMP_6:%.*]] = icmp ugt i8 [[L_6]], -1
|
||||
; CHECK-NEXT: [[S_6:%.*]] = select i1 [[CMP_6]], i8 [[L_6]], i8 [[X]]
|
||||
; CHECK-NEXT: store i8 [[S_6]], i8* [[GEP_6]], align 2
|
||||
; CHECK-NEXT: [[GEP_7:%.*]] = getelementptr inbounds i8, i8* [[PTR]], i8 7
|
||||
; CHECK-NEXT: [[L_7:%.*]] = load i8, i8* [[GEP_7]], align 1
|
||||
; CHECK-NEXT: [[CMP_7:%.*]] = icmp ugt i8 [[L_7]], -1
|
||||
; CHECK-NEXT: [[S_7:%.*]] = select i1 [[CMP_7]], i8 [[L_7]], i8 [[X]]
|
||||
; CHECK-NEXT: store i8 [[S_7]], i8* [[GEP_7]], align 2
|
||||
; CHECK-NEXT: [[TMP0:%.*]] = bitcast i8* [[PTR]] to <8 x i8>*
|
||||
; CHECK-NEXT: [[TMP1:%.*]] = load <8 x i8>, <8 x i8>* [[TMP0]], align 1
|
||||
; CHECK-NEXT: [[TMP2:%.*]] = icmp ugt <8 x i8> [[TMP1]], <i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1>
|
||||
; CHECK-NEXT: [[TMP3:%.*]] = insertelement <8 x i8> undef, i8 [[X:%.*]], i32 0
|
||||
; CHECK-NEXT: [[TMP4:%.*]] = insertelement <8 x i8> [[TMP3]], i8 [[X]], i32 1
|
||||
; CHECK-NEXT: [[TMP5:%.*]] = insertelement <8 x i8> [[TMP4]], i8 [[X]], i32 2
|
||||
; CHECK-NEXT: [[TMP6:%.*]] = insertelement <8 x i8> [[TMP5]], i8 [[X]], i32 3
|
||||
; CHECK-NEXT: [[TMP7:%.*]] = insertelement <8 x i8> [[TMP6]], i8 [[X]], i32 4
|
||||
; CHECK-NEXT: [[TMP8:%.*]] = insertelement <8 x i8> [[TMP7]], i8 [[X]], i32 5
|
||||
; CHECK-NEXT: [[TMP9:%.*]] = insertelement <8 x i8> [[TMP8]], i8 [[X]], i32 6
|
||||
; CHECK-NEXT: [[TMP10:%.*]] = insertelement <8 x i8> [[TMP9]], i8 [[X]], i32 7
|
||||
; CHECK-NEXT: [[TMP11:%.*]] = select <8 x i1> [[TMP2]], <8 x i8> [[TMP1]], <8 x i8> [[TMP10]]
|
||||
; CHECK-NEXT: [[TMP12:%.*]] = bitcast i8* [[PTR]] to <8 x i8>*
|
||||
; CHECK-NEXT: store <8 x i8> [[TMP11]], <8 x i8>* [[TMP12]], align 2
|
||||
; CHECK-NEXT: [[GEP_8:%.*]] = getelementptr inbounds i8, i8* [[PTR]], i8 8
|
||||
; CHECK-NEXT: [[L_8:%.*]] = load i8, i8* [[GEP_8]], align 1
|
||||
; CHECK-NEXT: [[CMP_8:%.*]] = icmp ugt i8 [[L_8]], -1
|
||||
; CHECK-NEXT: [[S_8:%.*]] = select i1 [[CMP_8]], i8 [[L_0]], i8 [[X]]
|
||||
; CHECK-NEXT: store i8 [[S_0]], i8* [[GEP_8]], align 2
|
||||
; CHECK-NEXT: [[TMP13:%.*]] = extractelement <8 x i8> [[TMP1]], i32 0
|
||||
; CHECK-NEXT: [[S_8:%.*]] = select i1 [[CMP_8]], i8 [[TMP13]], i8 [[X]]
|
||||
; CHECK-NEXT: [[TMP14:%.*]] = extractelement <8 x i8> [[TMP11]], i32 0
|
||||
; CHECK-NEXT: store i8 [[TMP14]], i8* [[GEP_8]], align 2
|
||||
; CHECK-NEXT: [[GEP_9:%.*]] = getelementptr inbounds i8, i8* [[PTR]], i8 9
|
||||
; CHECK-NEXT: [[L_9:%.*]] = load i8, i8* [[GEP_9]], align 1
|
||||
; CHECK-NEXT: [[CMP_9:%.*]] = icmp ugt i8 [[L_9]], -1
|
||||
|
@ -471,25 +437,19 @@ entry:
|
|||
define void @select_uniform_ugt_4xi16(i16* %ptr, i16 %x) {
|
||||
; CHECK-LABEL: @select_uniform_ugt_4xi16(
|
||||
; CHECK-NEXT: entry:
|
||||
; CHECK-NEXT: [[L_0:%.*]] = load i16, i16* [[PTR:%.*]], align 2
|
||||
; CHECK-NEXT: [[CMP_0:%.*]] = icmp ugt i16 [[L_0]], 16383
|
||||
; CHECK-NEXT: [[S_0:%.*]] = select i1 [[CMP_0]], i16 [[L_0]], i16 [[X:%.*]]
|
||||
; CHECK-NEXT: store i16 [[S_0]], i16* [[PTR]], align 2
|
||||
; CHECK-NEXT: [[GEP_1:%.*]] = getelementptr inbounds i16, i16* [[PTR]], i16 1
|
||||
; CHECK-NEXT: [[L_1:%.*]] = load i16, i16* [[GEP_1]], align 2
|
||||
; CHECK-NEXT: [[CMP_1:%.*]] = icmp ugt i16 [[L_1]], 16383
|
||||
; CHECK-NEXT: [[S_1:%.*]] = select i1 [[CMP_1]], i16 [[L_1]], i16 [[X]]
|
||||
; CHECK-NEXT: store i16 [[S_1]], i16* [[GEP_1]], align 2
|
||||
; CHECK-NEXT: [[GEP_1:%.*]] = getelementptr inbounds i16, i16* [[PTR:%.*]], i16 1
|
||||
; CHECK-NEXT: [[GEP_2:%.*]] = getelementptr inbounds i16, i16* [[PTR]], i16 2
|
||||
; CHECK-NEXT: [[L_2:%.*]] = load i16, i16* [[GEP_2]], align 2
|
||||
; CHECK-NEXT: [[CMP_2:%.*]] = icmp ugt i16 [[L_2]], 16383
|
||||
; CHECK-NEXT: [[S_2:%.*]] = select i1 [[CMP_2]], i16 [[L_2]], i16 [[X]]
|
||||
; CHECK-NEXT: store i16 [[S_2]], i16* [[GEP_2]], align 2
|
||||
; CHECK-NEXT: [[GEP_3:%.*]] = getelementptr inbounds i16, i16* [[PTR]], i16 3
|
||||
; CHECK-NEXT: [[L_3:%.*]] = load i16, i16* [[GEP_3]], align 2
|
||||
; CHECK-NEXT: [[CMP_3:%.*]] = icmp ugt i16 [[L_3]], 16383
|
||||
; CHECK-NEXT: [[S_3:%.*]] = select i1 [[CMP_3]], i16 [[L_3]], i16 [[X]]
|
||||
; CHECK-NEXT: store i16 [[S_3]], i16* [[GEP_3]], align 2
|
||||
; CHECK-NEXT: [[TMP0:%.*]] = bitcast i16* [[PTR]] to <4 x i16>*
|
||||
; CHECK-NEXT: [[TMP1:%.*]] = load <4 x i16>, <4 x i16>* [[TMP0]], align 2
|
||||
; CHECK-NEXT: [[TMP2:%.*]] = icmp ugt <4 x i16> [[TMP1]], <i16 16383, i16 16383, i16 16383, i16 16383>
|
||||
; CHECK-NEXT: [[TMP3:%.*]] = insertelement <4 x i16> undef, i16 [[X:%.*]], i32 0
|
||||
; CHECK-NEXT: [[TMP4:%.*]] = insertelement <4 x i16> [[TMP3]], i16 [[X]], i32 1
|
||||
; CHECK-NEXT: [[TMP5:%.*]] = insertelement <4 x i16> [[TMP4]], i16 [[X]], i32 2
|
||||
; CHECK-NEXT: [[TMP6:%.*]] = insertelement <4 x i16> [[TMP5]], i16 [[X]], i32 3
|
||||
; CHECK-NEXT: [[TMP7:%.*]] = select <4 x i1> [[TMP2]], <4 x i16> [[TMP1]], <4 x i16> [[TMP6]]
|
||||
; CHECK-NEXT: [[TMP8:%.*]] = bitcast i16* [[PTR]] to <4 x i16>*
|
||||
; CHECK-NEXT: store <4 x i16> [[TMP7]], <4 x i16>* [[TMP8]], align 2
|
||||
; CHECK-NEXT: ret void
|
||||
;
|
||||
entry:
|
||||
|
@ -522,45 +482,27 @@ entry:
|
|||
define void @select_uniform_ult_8xi16(i16* %ptr, i16 %x) {
|
||||
; CHECK-LABEL: @select_uniform_ult_8xi16(
|
||||
; CHECK-NEXT: entry:
|
||||
; CHECK-NEXT: [[L_0:%.*]] = load i16, i16* [[PTR:%.*]], align 2
|
||||
; CHECK-NEXT: [[CMP_0:%.*]] = icmp ult i16 [[L_0]], 16383
|
||||
; CHECK-NEXT: [[S_0:%.*]] = select i1 [[CMP_0]], i16 [[L_0]], i16 [[X:%.*]]
|
||||
; CHECK-NEXT: store i16 [[S_0]], i16* [[PTR]], align 2
|
||||
; CHECK-NEXT: [[GEP_1:%.*]] = getelementptr inbounds i16, i16* [[PTR]], i16 1
|
||||
; CHECK-NEXT: [[L_1:%.*]] = load i16, i16* [[GEP_1]], align 2
|
||||
; CHECK-NEXT: [[CMP_1:%.*]] = icmp ult i16 [[L_1]], 16383
|
||||
; CHECK-NEXT: [[S_1:%.*]] = select i1 [[CMP_1]], i16 [[L_1]], i16 [[X]]
|
||||
; CHECK-NEXT: store i16 [[S_1]], i16* [[GEP_1]], align 2
|
||||
; CHECK-NEXT: [[GEP_1:%.*]] = getelementptr inbounds i16, i16* [[PTR:%.*]], i16 1
|
||||
; CHECK-NEXT: [[GEP_2:%.*]] = getelementptr inbounds i16, i16* [[PTR]], i16 2
|
||||
; CHECK-NEXT: [[L_2:%.*]] = load i16, i16* [[GEP_2]], align 2
|
||||
; CHECK-NEXT: [[CMP_2:%.*]] = icmp ult i16 [[L_2]], 16383
|
||||
; CHECK-NEXT: [[S_2:%.*]] = select i1 [[CMP_2]], i16 [[L_2]], i16 [[X]]
|
||||
; CHECK-NEXT: store i16 [[S_2]], i16* [[GEP_2]], align 2
|
||||
; CHECK-NEXT: [[GEP_3:%.*]] = getelementptr inbounds i16, i16* [[PTR]], i16 3
|
||||
; CHECK-NEXT: [[L_3:%.*]] = load i16, i16* [[GEP_3]], align 2
|
||||
; CHECK-NEXT: [[CMP_3:%.*]] = icmp ult i16 [[L_3]], 16383
|
||||
; CHECK-NEXT: [[S_3:%.*]] = select i1 [[CMP_3]], i16 [[L_3]], i16 [[X]]
|
||||
; CHECK-NEXT: store i16 [[S_3]], i16* [[GEP_3]], align 2
|
||||
; CHECK-NEXT: [[GEP_4:%.*]] = getelementptr inbounds i16, i16* [[PTR]], i16 4
|
||||
; CHECK-NEXT: [[L_4:%.*]] = load i16, i16* [[GEP_4]], align 2
|
||||
; CHECK-NEXT: [[CMP_4:%.*]] = icmp ult i16 [[L_4]], 16383
|
||||
; CHECK-NEXT: [[S_4:%.*]] = select i1 [[CMP_4]], i16 [[L_4]], i16 [[X]]
|
||||
; CHECK-NEXT: store i16 [[S_4]], i16* [[GEP_4]], align 2
|
||||
; CHECK-NEXT: [[GEP_5:%.*]] = getelementptr inbounds i16, i16* [[PTR]], i16 5
|
||||
; CHECK-NEXT: [[L_5:%.*]] = load i16, i16* [[GEP_5]], align 2
|
||||
; CHECK-NEXT: [[CMP_5:%.*]] = icmp ult i16 [[L_5]], 16383
|
||||
; CHECK-NEXT: [[S_5:%.*]] = select i1 [[CMP_5]], i16 [[L_5]], i16 [[X]]
|
||||
; CHECK-NEXT: store i16 [[S_5]], i16* [[GEP_5]], align 2
|
||||
; CHECK-NEXT: [[GEP_6:%.*]] = getelementptr inbounds i16, i16* [[PTR]], i16 6
|
||||
; CHECK-NEXT: [[L_6:%.*]] = load i16, i16* [[GEP_6]], align 2
|
||||
; CHECK-NEXT: [[CMP_6:%.*]] = icmp ult i16 [[L_6]], 16383
|
||||
; CHECK-NEXT: [[S_6:%.*]] = select i1 [[CMP_6]], i16 [[L_6]], i16 [[X]]
|
||||
; CHECK-NEXT: store i16 [[S_6]], i16* [[GEP_6]], align 2
|
||||
; CHECK-NEXT: [[GEP_7:%.*]] = getelementptr inbounds i16, i16* [[PTR]], i16 7
|
||||
; CHECK-NEXT: [[L_7:%.*]] = load i16, i16* [[GEP_7]], align 2
|
||||
; CHECK-NEXT: [[CMP_7:%.*]] = icmp ult i16 [[L_7]], 16383
|
||||
; CHECK-NEXT: [[S_7:%.*]] = select i1 [[CMP_7]], i16 [[L_7]], i16 [[X]]
|
||||
; CHECK-NEXT: store i16 [[S_7]], i16* [[GEP_7]], align 2
|
||||
; CHECK-NEXT: [[TMP0:%.*]] = bitcast i16* [[PTR]] to <8 x i16>*
|
||||
; CHECK-NEXT: [[TMP1:%.*]] = load <8 x i16>, <8 x i16>* [[TMP0]], align 2
|
||||
; CHECK-NEXT: [[TMP2:%.*]] = icmp ult <8 x i16> [[TMP1]], <i16 16383, i16 16383, i16 16383, i16 16383, i16 16383, i16 16383, i16 16383, i16 16383>
|
||||
; CHECK-NEXT: [[TMP3:%.*]] = insertelement <8 x i16> undef, i16 [[X:%.*]], i32 0
|
||||
; CHECK-NEXT: [[TMP4:%.*]] = insertelement <8 x i16> [[TMP3]], i16 [[X]], i32 1
|
||||
; CHECK-NEXT: [[TMP5:%.*]] = insertelement <8 x i16> [[TMP4]], i16 [[X]], i32 2
|
||||
; CHECK-NEXT: [[TMP6:%.*]] = insertelement <8 x i16> [[TMP5]], i16 [[X]], i32 3
|
||||
; CHECK-NEXT: [[TMP7:%.*]] = insertelement <8 x i16> [[TMP6]], i16 [[X]], i32 4
|
||||
; CHECK-NEXT: [[TMP8:%.*]] = insertelement <8 x i16> [[TMP7]], i16 [[X]], i32 5
|
||||
; CHECK-NEXT: [[TMP9:%.*]] = insertelement <8 x i16> [[TMP8]], i16 [[X]], i32 6
|
||||
; CHECK-NEXT: [[TMP10:%.*]] = insertelement <8 x i16> [[TMP9]], i16 [[X]], i32 7
|
||||
; CHECK-NEXT: [[TMP11:%.*]] = select <8 x i1> [[TMP2]], <8 x i16> [[TMP1]], <8 x i16> [[TMP10]]
|
||||
; CHECK-NEXT: [[TMP12:%.*]] = bitcast i16* [[PTR]] to <8 x i16>*
|
||||
; CHECK-NEXT: store <8 x i16> [[TMP11]], <8 x i16>* [[TMP12]], align 2
|
||||
; CHECK-NEXT: ret void
|
||||
;
|
||||
entry:
|
||||
|
@ -616,15 +558,15 @@ entry:
|
|||
define void @select_uniform_eq_2xi32(i32* %ptr, i32 %x) {
|
||||
; CHECK-LABEL: @select_uniform_eq_2xi32(
|
||||
; CHECK-NEXT: entry:
|
||||
; CHECK-NEXT: [[L_0:%.*]] = load i32, i32* [[PTR:%.*]], align 4
|
||||
; CHECK-NEXT: [[CMP_0:%.*]] = icmp eq i32 [[L_0]], 16383
|
||||
; CHECK-NEXT: [[S_0:%.*]] = select i1 [[CMP_0]], i32 [[L_0]], i32 [[X:%.*]]
|
||||
; CHECK-NEXT: store i32 [[S_0]], i32* [[PTR]], align 2
|
||||
; CHECK-NEXT: [[GEP_1:%.*]] = getelementptr inbounds i32, i32* [[PTR]], i32 1
|
||||
; CHECK-NEXT: [[L_1:%.*]] = load i32, i32* [[GEP_1]], align 4
|
||||
; CHECK-NEXT: [[CMP_1:%.*]] = icmp eq i32 [[L_1]], 16383
|
||||
; CHECK-NEXT: [[S_1:%.*]] = select i1 [[CMP_1]], i32 [[L_1]], i32 [[X]]
|
||||
; CHECK-NEXT: store i32 [[S_1]], i32* [[GEP_1]], align 2
|
||||
; CHECK-NEXT: [[GEP_1:%.*]] = getelementptr inbounds i32, i32* [[PTR:%.*]], i32 1
|
||||
; CHECK-NEXT: [[TMP0:%.*]] = bitcast i32* [[PTR]] to <2 x i32>*
|
||||
; CHECK-NEXT: [[TMP1:%.*]] = load <2 x i32>, <2 x i32>* [[TMP0]], align 4
|
||||
; CHECK-NEXT: [[TMP2:%.*]] = icmp eq <2 x i32> [[TMP1]], <i32 16383, i32 16383>
|
||||
; CHECK-NEXT: [[TMP3:%.*]] = insertelement <2 x i32> undef, i32 [[X:%.*]], i32 0
|
||||
; CHECK-NEXT: [[TMP4:%.*]] = insertelement <2 x i32> [[TMP3]], i32 [[X]], i32 1
|
||||
; CHECK-NEXT: [[TMP5:%.*]] = select <2 x i1> [[TMP2]], <2 x i32> [[TMP1]], <2 x i32> [[TMP4]]
|
||||
; CHECK-NEXT: [[TMP6:%.*]] = bitcast i32* [[PTR]] to <2 x i32>*
|
||||
; CHECK-NEXT: store <2 x i32> [[TMP5]], <2 x i32>* [[TMP6]], align 2
|
||||
; CHECK-NEXT: ret void
|
||||
;
|
||||
entry:
|
||||
|
@ -645,25 +587,19 @@ entry:
|
|||
define void @select_uniform_eq_4xi32(i32* %ptr, i32 %x) {
|
||||
; CHECK-LABEL: @select_uniform_eq_4xi32(
|
||||
; CHECK-NEXT: entry:
|
||||
; CHECK-NEXT: [[L_0:%.*]] = load i32, i32* [[PTR:%.*]], align 4
|
||||
; CHECK-NEXT: [[CMP_0:%.*]] = icmp eq i32 [[L_0]], 16383
|
||||
; CHECK-NEXT: [[S_0:%.*]] = select i1 [[CMP_0]], i32 [[L_0]], i32 [[X:%.*]]
|
||||
; CHECK-NEXT: store i32 [[S_0]], i32* [[PTR]], align 2
|
||||
; CHECK-NEXT: [[GEP_1:%.*]] = getelementptr inbounds i32, i32* [[PTR]], i32 1
|
||||
; CHECK-NEXT: [[L_1:%.*]] = load i32, i32* [[GEP_1]], align 4
|
||||
; CHECK-NEXT: [[CMP_1:%.*]] = icmp eq i32 [[L_1]], 16383
|
||||
; CHECK-NEXT: [[S_1:%.*]] = select i1 [[CMP_1]], i32 [[L_1]], i32 [[X]]
|
||||
; CHECK-NEXT: store i32 [[S_1]], i32* [[GEP_1]], align 2
|
||||
; CHECK-NEXT: [[GEP_1:%.*]] = getelementptr inbounds i32, i32* [[PTR:%.*]], i32 1
|
||||
; CHECK-NEXT: [[GEP_2:%.*]] = getelementptr inbounds i32, i32* [[PTR]], i32 2
|
||||
; CHECK-NEXT: [[L_2:%.*]] = load i32, i32* [[GEP_2]], align 4
|
||||
; CHECK-NEXT: [[CMP_2:%.*]] = icmp eq i32 [[L_2]], 16383
|
||||
; CHECK-NEXT: [[S_2:%.*]] = select i1 [[CMP_2]], i32 [[L_2]], i32 [[X]]
|
||||
; CHECK-NEXT: store i32 [[S_2]], i32* [[GEP_2]], align 2
|
||||
; CHECK-NEXT: [[GEP_3:%.*]] = getelementptr inbounds i32, i32* [[PTR]], i32 3
|
||||
; CHECK-NEXT: [[L_3:%.*]] = load i32, i32* [[GEP_3]], align 4
|
||||
; CHECK-NEXT: [[CMP_3:%.*]] = icmp eq i32 [[L_3]], 16383
|
||||
; CHECK-NEXT: [[S_3:%.*]] = select i1 [[CMP_3]], i32 [[L_3]], i32 [[X]]
|
||||
; CHECK-NEXT: store i32 [[S_3]], i32* [[GEP_3]], align 2
|
||||
; CHECK-NEXT: [[TMP0:%.*]] = bitcast i32* [[PTR]] to <4 x i32>*
|
||||
; CHECK-NEXT: [[TMP1:%.*]] = load <4 x i32>, <4 x i32>* [[TMP0]], align 4
|
||||
; CHECK-NEXT: [[TMP2:%.*]] = icmp eq <4 x i32> [[TMP1]], <i32 16383, i32 16383, i32 16383, i32 16383>
|
||||
; CHECK-NEXT: [[TMP3:%.*]] = insertelement <4 x i32> undef, i32 [[X:%.*]], i32 0
|
||||
; CHECK-NEXT: [[TMP4:%.*]] = insertelement <4 x i32> [[TMP3]], i32 [[X]], i32 1
|
||||
; CHECK-NEXT: [[TMP5:%.*]] = insertelement <4 x i32> [[TMP4]], i32 [[X]], i32 2
|
||||
; CHECK-NEXT: [[TMP6:%.*]] = insertelement <4 x i32> [[TMP5]], i32 [[X]], i32 3
|
||||
; CHECK-NEXT: [[TMP7:%.*]] = select <4 x i1> [[TMP2]], <4 x i32> [[TMP1]], <4 x i32> [[TMP6]]
|
||||
; CHECK-NEXT: [[TMP8:%.*]] = bitcast i32* [[PTR]] to <4 x i32>*
|
||||
; CHECK-NEXT: store <4 x i32> [[TMP7]], <4 x i32>* [[TMP8]], align 2
|
||||
; CHECK-NEXT: ret void
|
||||
;
|
||||
entry:
|
||||
|
@ -695,15 +631,15 @@ entry:
|
|||
define void @select_uniform_ne_2xi64(i64* %ptr, i64 %x) {
|
||||
; CHECK-LABEL: @select_uniform_ne_2xi64(
|
||||
; CHECK-NEXT: entry:
|
||||
; CHECK-NEXT: [[L_0:%.*]] = load i64, i64* [[PTR:%.*]], align 8
|
||||
; CHECK-NEXT: [[CMP_0:%.*]] = icmp ne i64 [[L_0]], 16383
|
||||
; CHECK-NEXT: [[S_0:%.*]] = select i1 [[CMP_0]], i64 [[L_0]], i64 [[X:%.*]]
|
||||
; CHECK-NEXT: store i64 [[S_0]], i64* [[PTR]], align 2
|
||||
; CHECK-NEXT: [[GEP_1:%.*]] = getelementptr inbounds i64, i64* [[PTR]], i64 1
|
||||
; CHECK-NEXT: [[L_1:%.*]] = load i64, i64* [[GEP_1]], align 8
|
||||
; CHECK-NEXT: [[CMP_1:%.*]] = icmp ne i64 [[L_1]], 16383
|
||||
; CHECK-NEXT: [[S_1:%.*]] = select i1 [[CMP_1]], i64 [[L_1]], i64 [[X]]
|
||||
; CHECK-NEXT: store i64 [[S_1]], i64* [[GEP_1]], align 2
|
||||
; CHECK-NEXT: [[GEP_1:%.*]] = getelementptr inbounds i64, i64* [[PTR:%.*]], i64 1
|
||||
; CHECK-NEXT: [[TMP0:%.*]] = bitcast i64* [[PTR]] to <2 x i64>*
|
||||
; CHECK-NEXT: [[TMP1:%.*]] = load <2 x i64>, <2 x i64>* [[TMP0]], align 8
|
||||
; CHECK-NEXT: [[TMP2:%.*]] = icmp ne <2 x i64> [[TMP1]], <i64 16383, i64 16383>
|
||||
; CHECK-NEXT: [[TMP3:%.*]] = insertelement <2 x i64> undef, i64 [[X:%.*]], i32 0
|
||||
; CHECK-NEXT: [[TMP4:%.*]] = insertelement <2 x i64> [[TMP3]], i64 [[X]], i32 1
|
||||
; CHECK-NEXT: [[TMP5:%.*]] = select <2 x i1> [[TMP2]], <2 x i64> [[TMP1]], <2 x i64> [[TMP4]]
|
||||
; CHECK-NEXT: [[TMP6:%.*]] = bitcast i64* [[PTR]] to <2 x i64>*
|
||||
; CHECK-NEXT: store <2 x i64> [[TMP5]], <2 x i64>* [[TMP6]], align 2
|
||||
; CHECK-NEXT: ret void
|
||||
;
|
||||
entry:
|
||||
|
|
Loading…
Reference in New Issue