forked from OSchip/llvm-project
[CostModel][X86] Add CTLZ_ZERO_UNDEF/CTTZ_ZERO_UNDEF cost handling
Without LZCNT/BMI, the *_ZERO_UNDEF costs are cheaper as they can avoid the zero handling.
This commit is contained in:
parent
dd61726d5b
commit
d90a42d64c
|
@ -3804,7 +3804,9 @@ X86TTIImpl::getIntrinsicInstrCost(const IntrinsicCostAttributes &ICA,
|
|||
{ ISD::BITREVERSE, MVT::i64, { 14 } },
|
||||
{ ISD::BSWAP, MVT::i64, { 1 } },
|
||||
{ ISD::CTLZ, MVT::i64, { 4 } }, // BSR+XOR or BSR+XOR+CMOV
|
||||
{ ISD::CTLZ_ZERO_UNDEF, MVT::i64,{ 1 } }, // BSR+XOR
|
||||
{ ISD::CTTZ, MVT::i64, { 3 } }, // TEST+BSF+CMOV/BRANCH
|
||||
{ ISD::CTTZ_ZERO_UNDEF, MVT::i64,{ 1 } }, // BSR
|
||||
{ ISD::CTPOP, MVT::i64, { 10, 6, 19, 19 } },
|
||||
{ ISD::ROTL, MVT::i64, { 1 } },
|
||||
{ ISD::ROTR, MVT::i64, { 1 } },
|
||||
|
@ -3825,9 +3827,15 @@ X86TTIImpl::getIntrinsicInstrCost(const IntrinsicCostAttributes &ICA,
|
|||
{ ISD::CTLZ, MVT::i32, { 4 } }, // BSR+XOR or BSR+XOR+CMOV
|
||||
{ ISD::CTLZ, MVT::i16, { 4 } }, // BSR+XOR or BSR+XOR+CMOV
|
||||
{ ISD::CTLZ, MVT::i8, { 4 } }, // BSR+XOR or BSR+XOR+CMOV
|
||||
{ ISD::CTLZ_ZERO_UNDEF, MVT::i32,{ 1 } }, // BSR+XOR
|
||||
{ ISD::CTLZ_ZERO_UNDEF, MVT::i16,{ 2 } }, // BSR+XOR
|
||||
{ ISD::CTLZ_ZERO_UNDEF, MVT::i8, { 2 } }, // BSR+XOR
|
||||
{ ISD::CTTZ, MVT::i32, { 3 } }, // TEST+BSF+CMOV/BRANCH
|
||||
{ ISD::CTTZ, MVT::i16, { 3 } }, // TEST+BSF+CMOV/BRANCH
|
||||
{ ISD::CTTZ, MVT::i8, { 3 } }, // TEST+BSF+CMOV/BRANCH
|
||||
{ ISD::CTTZ_ZERO_UNDEF, MVT::i32,{ 1 } }, // BSF
|
||||
{ ISD::CTTZ_ZERO_UNDEF, MVT::i16,{ 2 } }, // BSF
|
||||
{ ISD::CTTZ_ZERO_UNDEF, MVT::i8, { 2 } }, // BSF
|
||||
{ ISD::CTPOP, MVT::i32, { 8, 7, 15, 15 } },
|
||||
{ ISD::CTPOP, MVT::i16, { 9, 8, 17, 17 } },
|
||||
{ ISD::CTPOP, MVT::i8, { 7, 6, 13, 13 } },
|
||||
|
@ -3869,14 +3877,12 @@ X86TTIImpl::getIntrinsicInstrCost(const IntrinsicCostAttributes &ICA,
|
|||
break;
|
||||
case Intrinsic::ctlz:
|
||||
ISD = ISD::CTLZ;
|
||||
// TODO: Handle CTLZ_ZERO_UNDEF.
|
||||
break;
|
||||
case Intrinsic::ctpop:
|
||||
ISD = ISD::CTPOP;
|
||||
break;
|
||||
case Intrinsic::cttz:
|
||||
ISD = ISD::CTTZ;
|
||||
// TODO: Handle CTTZ_ZERO_UNDEF.
|
||||
break;
|
||||
case Intrinsic::fshl:
|
||||
ISD = ISD::FSHL;
|
||||
|
@ -3969,6 +3975,16 @@ X86TTIImpl::getIntrinsicInstrCost(const IntrinsicCostAttributes &ICA,
|
|||
return LT.first * Cost;
|
||||
}
|
||||
|
||||
// Without BMI/LZCNT see if we're only looking for a *_ZERO_UNDEF cost.
|
||||
if (((ISD == ISD::CTTZ && !ST->hasBMI()) ||
|
||||
(ISD == ISD::CTLZ && !ST->hasLZCNT())) &&
|
||||
!MTy.isVector() && !ICA.isTypeBasedOnly()) {
|
||||
const SmallVectorImpl<const Value *> &Args = ICA.getArgs();
|
||||
if (auto *Cst = dyn_cast<ConstantInt>(Args[1]))
|
||||
if (Cst->isAllOnesValue())
|
||||
ISD = ISD == ISD::CTTZ ? ISD::CTTZ_ZERO_UNDEF : ISD::CTLZ_ZERO_UNDEF;
|
||||
}
|
||||
|
||||
// FSQRT is a single instruction.
|
||||
if (ISD == ISD::FSQRT && CostKind == TTI::TCK_CodeSize)
|
||||
return LT.first;
|
||||
|
|
|
@ -1,6 +1,6 @@
|
|||
; NOTE: Assertions have been autogenerated by utils/update_analyze_test_checks.py
|
||||
; RUN: opt < %s -mtriple=x86_64-unknown-linux-gnu -passes="print<cost-model>" 2>&1 -disable-output -mattr=-lzcnt,+sse2 | FileCheck %s -check-prefixes=SSE2,NOLZCNT
|
||||
; RUN: opt < %s -mtriple=x86_64-unknown-linux-gnu -passes="print<cost-model>" 2>&1 -disable-output -mattr=+lzcnt,+sse2 | FileCheck %s -check-prefixes=SSE2,LZCNT
|
||||
; RUN: opt < %s -mtriple=x86_64-unknown-linux-gnu -passes="print<cost-model>" 2>&1 -disable-output -mattr=-lzcnt,+sse2 | FileCheck %s -check-prefixes=NOLZCNT
|
||||
; RUN: opt < %s -mtriple=x86_64-unknown-linux-gnu -passes="print<cost-model>" 2>&1 -disable-output -mattr=+lzcnt,+sse2 | FileCheck %s -check-prefixes=LZCNT,SSE2
|
||||
; RUN: opt < %s -mtriple=x86_64-unknown-linux-gnu -passes="print<cost-model>" 2>&1 -disable-output -mattr=+lzcnt,+sse4.2 | FileCheck %s -check-prefixes=LZCNT,SSE42
|
||||
; RUN: opt < %s -mtriple=x86_64-unknown-linux-gnu -passes="print<cost-model>" 2>&1 -disable-output -mattr=+lzcnt,+avx | FileCheck %s -check-prefixes=LZCNT,AVX1
|
||||
; RUN: opt < %s -mtriple=x86_64-unknown-linux-gnu -passes="print<cost-model>" 2>&1 -disable-output -mattr=+lzcnt,+avx2 | FileCheck %s -check-prefixes=LZCNT,AVX2
|
||||
|
@ -30,7 +30,7 @@ define i64 @var_ctlz_i64(i64 %a) {
|
|||
|
||||
define i64 @var_ctlz_i64u(i64 %a) {
|
||||
; NOLZCNT-LABEL: 'var_ctlz_i64u'
|
||||
; NOLZCNT-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %ctlz = call i64 @llvm.ctlz.i64(i64 %a, i1 true)
|
||||
; NOLZCNT-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %ctlz = call i64 @llvm.ctlz.i64(i64 %a, i1 true)
|
||||
; NOLZCNT-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i64 %ctlz
|
||||
;
|
||||
; LZCNT-LABEL: 'var_ctlz_i64u'
|
||||
|
@ -56,7 +56,7 @@ define i32 @var_ctlz_i32(i32 %a) {
|
|||
|
||||
define i32 @var_ctlz_i32u(i32 %a) {
|
||||
; NOLZCNT-LABEL: 'var_ctlz_i32u'
|
||||
; NOLZCNT-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %ctlz = call i32 @llvm.ctlz.i32(i32 %a, i1 true)
|
||||
; NOLZCNT-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %ctlz = call i32 @llvm.ctlz.i32(i32 %a, i1 true)
|
||||
; NOLZCNT-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 %ctlz
|
||||
;
|
||||
; LZCNT-LABEL: 'var_ctlz_i32u'
|
||||
|
@ -82,7 +82,7 @@ define i16 @var_ctlz_i16(i16 %a) {
|
|||
|
||||
define i16 @var_ctlz_i16u(i16 %a) {
|
||||
; NOLZCNT-LABEL: 'var_ctlz_i16u'
|
||||
; NOLZCNT-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %ctlz = call i16 @llvm.ctlz.i16(i16 %a, i1 true)
|
||||
; NOLZCNT-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %ctlz = call i16 @llvm.ctlz.i16(i16 %a, i1 true)
|
||||
; NOLZCNT-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i16 %ctlz
|
||||
;
|
||||
; LZCNT-LABEL: 'var_ctlz_i16u'
|
||||
|
@ -108,7 +108,7 @@ define i8 @var_ctlz_i8(i8 %a) {
|
|||
|
||||
define i8 @var_ctlz_i8u(i8 %a) {
|
||||
; NOLZCNT-LABEL: 'var_ctlz_i8u'
|
||||
; NOLZCNT-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %ctlz = call i8 @llvm.ctlz.i8(i8 %a, i1 true)
|
||||
; NOLZCNT-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %ctlz = call i8 @llvm.ctlz.i8(i8 %a, i1 true)
|
||||
; NOLZCNT-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i8 %ctlz
|
||||
;
|
||||
; LZCNT-LABEL: 'var_ctlz_i8u'
|
||||
|
@ -137,6 +137,10 @@ declare <32 x i16> @llvm.ctlz.v32i16(<32 x i16>, i1)
|
|||
declare <64 x i8> @llvm.ctlz.v64i8(<64 x i8>, i1)
|
||||
|
||||
define <2 x i64> @var_ctlz_v2i64(<2 x i64> %a) {
|
||||
; NOLZCNT-LABEL: 'var_ctlz_v2i64'
|
||||
; NOLZCNT-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %ctlz = call <2 x i64> @llvm.ctlz.v2i64(<2 x i64> %a, i1 false)
|
||||
; NOLZCNT-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <2 x i64> %ctlz
|
||||
;
|
||||
; SSE2-LABEL: 'var_ctlz_v2i64'
|
||||
; SSE2-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %ctlz = call <2 x i64> @llvm.ctlz.v2i64(<2 x i64> %a, i1 false)
|
||||
; SSE2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <2 x i64> %ctlz
|
||||
|
@ -169,6 +173,10 @@ define <2 x i64> @var_ctlz_v2i64(<2 x i64> %a) {
|
|||
}
|
||||
|
||||
define <2 x i64> @var_ctlz_v2i64u(<2 x i64> %a) {
|
||||
; NOLZCNT-LABEL: 'var_ctlz_v2i64u'
|
||||
; NOLZCNT-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %ctlz = call <2 x i64> @llvm.ctlz.v2i64(<2 x i64> %a, i1 true)
|
||||
; NOLZCNT-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <2 x i64> %ctlz
|
||||
;
|
||||
; SSE2-LABEL: 'var_ctlz_v2i64u'
|
||||
; SSE2-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %ctlz = call <2 x i64> @llvm.ctlz.v2i64(<2 x i64> %a, i1 true)
|
||||
; SSE2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <2 x i64> %ctlz
|
||||
|
@ -201,6 +209,10 @@ define <2 x i64> @var_ctlz_v2i64u(<2 x i64> %a) {
|
|||
}
|
||||
|
||||
define <4 x i64> @var_ctlz_v4i64(<4 x i64> %a) {
|
||||
; NOLZCNT-LABEL: 'var_ctlz_v4i64'
|
||||
; NOLZCNT-NEXT: Cost Model: Found an estimated cost of 20 for instruction: %ctlz = call <4 x i64> @llvm.ctlz.v4i64(<4 x i64> %a, i1 false)
|
||||
; NOLZCNT-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <4 x i64> %ctlz
|
||||
;
|
||||
; SSE2-LABEL: 'var_ctlz_v4i64'
|
||||
; SSE2-NEXT: Cost Model: Found an estimated cost of 20 for instruction: %ctlz = call <4 x i64> @llvm.ctlz.v4i64(<4 x i64> %a, i1 false)
|
||||
; SSE2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <4 x i64> %ctlz
|
||||
|
@ -230,6 +242,10 @@ define <4 x i64> @var_ctlz_v4i64(<4 x i64> %a) {
|
|||
}
|
||||
|
||||
define <4 x i64> @var_ctlz_v4i64u(<4 x i64> %a) {
|
||||
; NOLZCNT-LABEL: 'var_ctlz_v4i64u'
|
||||
; NOLZCNT-NEXT: Cost Model: Found an estimated cost of 20 for instruction: %ctlz = call <4 x i64> @llvm.ctlz.v4i64(<4 x i64> %a, i1 true)
|
||||
; NOLZCNT-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <4 x i64> %ctlz
|
||||
;
|
||||
; SSE2-LABEL: 'var_ctlz_v4i64u'
|
||||
; SSE2-NEXT: Cost Model: Found an estimated cost of 20 for instruction: %ctlz = call <4 x i64> @llvm.ctlz.v4i64(<4 x i64> %a, i1 true)
|
||||
; SSE2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <4 x i64> %ctlz
|
||||
|
@ -259,6 +275,10 @@ define <4 x i64> @var_ctlz_v4i64u(<4 x i64> %a) {
|
|||
}
|
||||
|
||||
define <8 x i64> @var_ctlz_v8i64(<8 x i64> %a) {
|
||||
; NOLZCNT-LABEL: 'var_ctlz_v8i64'
|
||||
; NOLZCNT-NEXT: Cost Model: Found an estimated cost of 40 for instruction: %ctlz = call <8 x i64> @llvm.ctlz.v8i64(<8 x i64> %a, i1 false)
|
||||
; NOLZCNT-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <8 x i64> %ctlz
|
||||
;
|
||||
; SSE2-LABEL: 'var_ctlz_v8i64'
|
||||
; SSE2-NEXT: Cost Model: Found an estimated cost of 40 for instruction: %ctlz = call <8 x i64> @llvm.ctlz.v8i64(<8 x i64> %a, i1 false)
|
||||
; SSE2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <8 x i64> %ctlz
|
||||
|
@ -292,6 +312,10 @@ define <8 x i64> @var_ctlz_v8i64(<8 x i64> %a) {
|
|||
}
|
||||
|
||||
define <8 x i64> @var_ctlz_v8i64u(<8 x i64> %a) {
|
||||
; NOLZCNT-LABEL: 'var_ctlz_v8i64u'
|
||||
; NOLZCNT-NEXT: Cost Model: Found an estimated cost of 40 for instruction: %ctlz = call <8 x i64> @llvm.ctlz.v8i64(<8 x i64> %a, i1 true)
|
||||
; NOLZCNT-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <8 x i64> %ctlz
|
||||
;
|
||||
; SSE2-LABEL: 'var_ctlz_v8i64u'
|
||||
; SSE2-NEXT: Cost Model: Found an estimated cost of 40 for instruction: %ctlz = call <8 x i64> @llvm.ctlz.v8i64(<8 x i64> %a, i1 true)
|
||||
; SSE2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <8 x i64> %ctlz
|
||||
|
@ -325,6 +349,10 @@ define <8 x i64> @var_ctlz_v8i64u(<8 x i64> %a) {
|
|||
}
|
||||
|
||||
define <4 x i32> @var_ctlz_v4i32(<4 x i32> %a) {
|
||||
; NOLZCNT-LABEL: 'var_ctlz_v4i32'
|
||||
; NOLZCNT-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %ctlz = call <4 x i32> @llvm.ctlz.v4i32(<4 x i32> %a, i1 false)
|
||||
; NOLZCNT-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <4 x i32> %ctlz
|
||||
;
|
||||
; SSE2-LABEL: 'var_ctlz_v4i32'
|
||||
; SSE2-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %ctlz = call <4 x i32> @llvm.ctlz.v4i32(<4 x i32> %a, i1 false)
|
||||
; SSE2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <4 x i32> %ctlz
|
||||
|
@ -357,6 +385,10 @@ define <4 x i32> @var_ctlz_v4i32(<4 x i32> %a) {
|
|||
}
|
||||
|
||||
define <4 x i32> @var_ctlz_v4i32u(<4 x i32> %a) {
|
||||
; NOLZCNT-LABEL: 'var_ctlz_v4i32u'
|
||||
; NOLZCNT-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %ctlz = call <4 x i32> @llvm.ctlz.v4i32(<4 x i32> %a, i1 true)
|
||||
; NOLZCNT-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <4 x i32> %ctlz
|
||||
;
|
||||
; SSE2-LABEL: 'var_ctlz_v4i32u'
|
||||
; SSE2-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %ctlz = call <4 x i32> @llvm.ctlz.v4i32(<4 x i32> %a, i1 true)
|
||||
; SSE2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <4 x i32> %ctlz
|
||||
|
@ -389,6 +421,10 @@ define <4 x i32> @var_ctlz_v4i32u(<4 x i32> %a) {
|
|||
}
|
||||
|
||||
define <8 x i32> @var_ctlz_v8i32(<8 x i32> %a) {
|
||||
; NOLZCNT-LABEL: 'var_ctlz_v8i32'
|
||||
; NOLZCNT-NEXT: Cost Model: Found an estimated cost of 20 for instruction: %ctlz = call <8 x i32> @llvm.ctlz.v8i32(<8 x i32> %a, i1 false)
|
||||
; NOLZCNT-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <8 x i32> %ctlz
|
||||
;
|
||||
; SSE2-LABEL: 'var_ctlz_v8i32'
|
||||
; SSE2-NEXT: Cost Model: Found an estimated cost of 20 for instruction: %ctlz = call <8 x i32> @llvm.ctlz.v8i32(<8 x i32> %a, i1 false)
|
||||
; SSE2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <8 x i32> %ctlz
|
||||
|
@ -418,6 +454,10 @@ define <8 x i32> @var_ctlz_v8i32(<8 x i32> %a) {
|
|||
}
|
||||
|
||||
define <8 x i32> @var_ctlz_v8i32u(<8 x i32> %a) {
|
||||
; NOLZCNT-LABEL: 'var_ctlz_v8i32u'
|
||||
; NOLZCNT-NEXT: Cost Model: Found an estimated cost of 20 for instruction: %ctlz = call <8 x i32> @llvm.ctlz.v8i32(<8 x i32> %a, i1 true)
|
||||
; NOLZCNT-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <8 x i32> %ctlz
|
||||
;
|
||||
; SSE2-LABEL: 'var_ctlz_v8i32u'
|
||||
; SSE2-NEXT: Cost Model: Found an estimated cost of 20 for instruction: %ctlz = call <8 x i32> @llvm.ctlz.v8i32(<8 x i32> %a, i1 true)
|
||||
; SSE2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <8 x i32> %ctlz
|
||||
|
@ -447,6 +487,10 @@ define <8 x i32> @var_ctlz_v8i32u(<8 x i32> %a) {
|
|||
}
|
||||
|
||||
define <16 x i32> @var_ctlz_v16i32(<16 x i32> %a) {
|
||||
; NOLZCNT-LABEL: 'var_ctlz_v16i32'
|
||||
; NOLZCNT-NEXT: Cost Model: Found an estimated cost of 40 for instruction: %ctlz = call <16 x i32> @llvm.ctlz.v16i32(<16 x i32> %a, i1 false)
|
||||
; NOLZCNT-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <16 x i32> %ctlz
|
||||
;
|
||||
; SSE2-LABEL: 'var_ctlz_v16i32'
|
||||
; SSE2-NEXT: Cost Model: Found an estimated cost of 40 for instruction: %ctlz = call <16 x i32> @llvm.ctlz.v16i32(<16 x i32> %a, i1 false)
|
||||
; SSE2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <16 x i32> %ctlz
|
||||
|
@ -480,6 +524,10 @@ define <16 x i32> @var_ctlz_v16i32(<16 x i32> %a) {
|
|||
}
|
||||
|
||||
define <16 x i32> @var_ctlz_v16i32u(<16 x i32> %a) {
|
||||
; NOLZCNT-LABEL: 'var_ctlz_v16i32u'
|
||||
; NOLZCNT-NEXT: Cost Model: Found an estimated cost of 40 for instruction: %ctlz = call <16 x i32> @llvm.ctlz.v16i32(<16 x i32> %a, i1 true)
|
||||
; NOLZCNT-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <16 x i32> %ctlz
|
||||
;
|
||||
; SSE2-LABEL: 'var_ctlz_v16i32u'
|
||||
; SSE2-NEXT: Cost Model: Found an estimated cost of 40 for instruction: %ctlz = call <16 x i32> @llvm.ctlz.v16i32(<16 x i32> %a, i1 true)
|
||||
; SSE2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <16 x i32> %ctlz
|
||||
|
@ -513,6 +561,10 @@ define <16 x i32> @var_ctlz_v16i32u(<16 x i32> %a) {
|
|||
}
|
||||
|
||||
define <8 x i16> @var_ctlz_v8i16(<8 x i16> %a) {
|
||||
; NOLZCNT-LABEL: 'var_ctlz_v8i16'
|
||||
; NOLZCNT-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %ctlz = call <8 x i16> @llvm.ctlz.v8i16(<8 x i16> %a, i1 false)
|
||||
; NOLZCNT-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <8 x i16> %ctlz
|
||||
;
|
||||
; SSE2-LABEL: 'var_ctlz_v8i16'
|
||||
; SSE2-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %ctlz = call <8 x i16> @llvm.ctlz.v8i16(<8 x i16> %a, i1 false)
|
||||
; SSE2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <8 x i16> %ctlz
|
||||
|
@ -545,6 +597,10 @@ define <8 x i16> @var_ctlz_v8i16(<8 x i16> %a) {
|
|||
}
|
||||
|
||||
define <8 x i16> @var_ctlz_v8i16u(<8 x i16> %a) {
|
||||
; NOLZCNT-LABEL: 'var_ctlz_v8i16u'
|
||||
; NOLZCNT-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %ctlz = call <8 x i16> @llvm.ctlz.v8i16(<8 x i16> %a, i1 true)
|
||||
; NOLZCNT-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <8 x i16> %ctlz
|
||||
;
|
||||
; SSE2-LABEL: 'var_ctlz_v8i16u'
|
||||
; SSE2-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %ctlz = call <8 x i16> @llvm.ctlz.v8i16(<8 x i16> %a, i1 true)
|
||||
; SSE2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <8 x i16> %ctlz
|
||||
|
@ -577,6 +633,10 @@ define <8 x i16> @var_ctlz_v8i16u(<8 x i16> %a) {
|
|||
}
|
||||
|
||||
define <16 x i16> @var_ctlz_v16i16(<16 x i16> %a) {
|
||||
; NOLZCNT-LABEL: 'var_ctlz_v16i16'
|
||||
; NOLZCNT-NEXT: Cost Model: Found an estimated cost of 18 for instruction: %ctlz = call <16 x i16> @llvm.ctlz.v16i16(<16 x i16> %a, i1 false)
|
||||
; NOLZCNT-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <16 x i16> %ctlz
|
||||
;
|
||||
; SSE2-LABEL: 'var_ctlz_v16i16'
|
||||
; SSE2-NEXT: Cost Model: Found an estimated cost of 18 for instruction: %ctlz = call <16 x i16> @llvm.ctlz.v16i16(<16 x i16> %a, i1 false)
|
||||
; SSE2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <16 x i16> %ctlz
|
||||
|
@ -606,6 +666,10 @@ define <16 x i16> @var_ctlz_v16i16(<16 x i16> %a) {
|
|||
}
|
||||
|
||||
define <16 x i16> @var_ctlz_v16i16u(<16 x i16> %a) {
|
||||
; NOLZCNT-LABEL: 'var_ctlz_v16i16u'
|
||||
; NOLZCNT-NEXT: Cost Model: Found an estimated cost of 18 for instruction: %ctlz = call <16 x i16> @llvm.ctlz.v16i16(<16 x i16> %a, i1 true)
|
||||
; NOLZCNT-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <16 x i16> %ctlz
|
||||
;
|
||||
; SSE2-LABEL: 'var_ctlz_v16i16u'
|
||||
; SSE2-NEXT: Cost Model: Found an estimated cost of 18 for instruction: %ctlz = call <16 x i16> @llvm.ctlz.v16i16(<16 x i16> %a, i1 true)
|
||||
; SSE2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <16 x i16> %ctlz
|
||||
|
@ -635,6 +699,10 @@ define <16 x i16> @var_ctlz_v16i16u(<16 x i16> %a) {
|
|||
}
|
||||
|
||||
define <32 x i16> @var_ctlz_v32i16(<32 x i16> %a) {
|
||||
; NOLZCNT-LABEL: 'var_ctlz_v32i16'
|
||||
; NOLZCNT-NEXT: Cost Model: Found an estimated cost of 36 for instruction: %ctlz = call <32 x i16> @llvm.ctlz.v32i16(<32 x i16> %a, i1 false)
|
||||
; NOLZCNT-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <32 x i16> %ctlz
|
||||
;
|
||||
; SSE2-LABEL: 'var_ctlz_v32i16'
|
||||
; SSE2-NEXT: Cost Model: Found an estimated cost of 36 for instruction: %ctlz = call <32 x i16> @llvm.ctlz.v32i16(<32 x i16> %a, i1 false)
|
||||
; SSE2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <32 x i16> %ctlz
|
||||
|
@ -668,6 +736,10 @@ define <32 x i16> @var_ctlz_v32i16(<32 x i16> %a) {
|
|||
}
|
||||
|
||||
define <32 x i16> @var_ctlz_v32i16u(<32 x i16> %a) {
|
||||
; NOLZCNT-LABEL: 'var_ctlz_v32i16u'
|
||||
; NOLZCNT-NEXT: Cost Model: Found an estimated cost of 36 for instruction: %ctlz = call <32 x i16> @llvm.ctlz.v32i16(<32 x i16> %a, i1 true)
|
||||
; NOLZCNT-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <32 x i16> %ctlz
|
||||
;
|
||||
; SSE2-LABEL: 'var_ctlz_v32i16u'
|
||||
; SSE2-NEXT: Cost Model: Found an estimated cost of 36 for instruction: %ctlz = call <32 x i16> @llvm.ctlz.v32i16(<32 x i16> %a, i1 true)
|
||||
; SSE2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <32 x i16> %ctlz
|
||||
|
@ -701,6 +773,10 @@ define <32 x i16> @var_ctlz_v32i16u(<32 x i16> %a) {
|
|||
}
|
||||
|
||||
define <16 x i8> @var_ctlz_v16i8(<16 x i8> %a) {
|
||||
; NOLZCNT-LABEL: 'var_ctlz_v16i8'
|
||||
; NOLZCNT-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %ctlz = call <16 x i8> @llvm.ctlz.v16i8(<16 x i8> %a, i1 false)
|
||||
; NOLZCNT-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <16 x i8> %ctlz
|
||||
;
|
||||
; SSE2-LABEL: 'var_ctlz_v16i8'
|
||||
; SSE2-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %ctlz = call <16 x i8> @llvm.ctlz.v16i8(<16 x i8> %a, i1 false)
|
||||
; SSE2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <16 x i8> %ctlz
|
||||
|
@ -733,6 +809,10 @@ define <16 x i8> @var_ctlz_v16i8(<16 x i8> %a) {
|
|||
}
|
||||
|
||||
define <16 x i8> @var_ctlz_v16i8u(<16 x i8> %a) {
|
||||
; NOLZCNT-LABEL: 'var_ctlz_v16i8u'
|
||||
; NOLZCNT-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %ctlz = call <16 x i8> @llvm.ctlz.v16i8(<16 x i8> %a, i1 true)
|
||||
; NOLZCNT-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <16 x i8> %ctlz
|
||||
;
|
||||
; SSE2-LABEL: 'var_ctlz_v16i8u'
|
||||
; SSE2-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %ctlz = call <16 x i8> @llvm.ctlz.v16i8(<16 x i8> %a, i1 true)
|
||||
; SSE2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <16 x i8> %ctlz
|
||||
|
@ -765,6 +845,10 @@ define <16 x i8> @var_ctlz_v16i8u(<16 x i8> %a) {
|
|||
}
|
||||
|
||||
define <32 x i8> @var_ctlz_v32i8(<32 x i8> %a) {
|
||||
; NOLZCNT-LABEL: 'var_ctlz_v32i8'
|
||||
; NOLZCNT-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %ctlz = call <32 x i8> @llvm.ctlz.v32i8(<32 x i8> %a, i1 false)
|
||||
; NOLZCNT-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <32 x i8> %ctlz
|
||||
;
|
||||
; SSE2-LABEL: 'var_ctlz_v32i8'
|
||||
; SSE2-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %ctlz = call <32 x i8> @llvm.ctlz.v32i8(<32 x i8> %a, i1 false)
|
||||
; SSE2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <32 x i8> %ctlz
|
||||
|
@ -794,6 +878,10 @@ define <32 x i8> @var_ctlz_v32i8(<32 x i8> %a) {
|
|||
}
|
||||
|
||||
define <32 x i8> @var_ctlz_v32i8u(<32 x i8> %a) {
|
||||
; NOLZCNT-LABEL: 'var_ctlz_v32i8u'
|
||||
; NOLZCNT-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %ctlz = call <32 x i8> @llvm.ctlz.v32i8(<32 x i8> %a, i1 true)
|
||||
; NOLZCNT-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <32 x i8> %ctlz
|
||||
;
|
||||
; SSE2-LABEL: 'var_ctlz_v32i8u'
|
||||
; SSE2-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %ctlz = call <32 x i8> @llvm.ctlz.v32i8(<32 x i8> %a, i1 true)
|
||||
; SSE2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <32 x i8> %ctlz
|
||||
|
@ -823,6 +911,10 @@ define <32 x i8> @var_ctlz_v32i8u(<32 x i8> %a) {
|
|||
}
|
||||
|
||||
define <64 x i8> @var_ctlz_v64i8(<64 x i8> %a) {
|
||||
; NOLZCNT-LABEL: 'var_ctlz_v64i8'
|
||||
; NOLZCNT-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %ctlz = call <64 x i8> @llvm.ctlz.v64i8(<64 x i8> %a, i1 false)
|
||||
; NOLZCNT-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <64 x i8> %ctlz
|
||||
;
|
||||
; SSE2-LABEL: 'var_ctlz_v64i8'
|
||||
; SSE2-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %ctlz = call <64 x i8> @llvm.ctlz.v64i8(<64 x i8> %a, i1 false)
|
||||
; SSE2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <64 x i8> %ctlz
|
||||
|
@ -856,6 +948,10 @@ define <64 x i8> @var_ctlz_v64i8(<64 x i8> %a) {
|
|||
}
|
||||
|
||||
define <64 x i8> @var_ctlz_v64i8u(<64 x i8> %a) {
|
||||
; NOLZCNT-LABEL: 'var_ctlz_v64i8u'
|
||||
; NOLZCNT-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %ctlz = call <64 x i8> @llvm.ctlz.v64i8(<64 x i8> %a, i1 true)
|
||||
; NOLZCNT-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <64 x i8> %ctlz
|
||||
;
|
||||
; SSE2-LABEL: 'var_ctlz_v64i8u'
|
||||
; SSE2-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %ctlz = call <64 x i8> @llvm.ctlz.v64i8(<64 x i8> %a, i1 true)
|
||||
; SSE2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <64 x i8> %ctlz
|
||||
|
|
|
@ -1,6 +1,6 @@
|
|||
; NOTE: Assertions have been autogenerated by utils/update_analyze_test_checks.py
|
||||
; RUN: opt < %s -mtriple=x86_64-unknown-linux-gnu -passes="print<cost-model>" 2>&1 -disable-output -mattr=-bmi,+sse2 | FileCheck %s -check-prefixes=SSE2,NOBMI
|
||||
; RUN: opt < %s -mtriple=x86_64-unknown-linux-gnu -passes="print<cost-model>" 2>&1 -disable-output -mattr=+bmi,+sse2 | FileCheck %s -check-prefixes=SSE2,BMI
|
||||
; RUN: opt < %s -mtriple=x86_64-unknown-linux-gnu -passes="print<cost-model>" 2>&1 -disable-output -mattr=-bmi,+sse2 | FileCheck %s -check-prefixes=NOBMI
|
||||
; RUN: opt < %s -mtriple=x86_64-unknown-linux-gnu -passes="print<cost-model>" 2>&1 -disable-output -mattr=+bmi,+sse2 | FileCheck %s -check-prefixes=BMI,SSE2
|
||||
; RUN: opt < %s -mtriple=x86_64-unknown-linux-gnu -passes="print<cost-model>" 2>&1 -disable-output -mattr=+bmi,+sse4.2 | FileCheck %s -check-prefixes=BMI,SSE42
|
||||
; RUN: opt < %s -mtriple=x86_64-unknown-linux-gnu -passes="print<cost-model>" 2>&1 -disable-output -mattr=+bmi,+avx | FileCheck %s -check-prefixes=BMI,AVX1
|
||||
; RUN: opt < %s -mtriple=x86_64-unknown-linux-gnu -passes="print<cost-model>" 2>&1 -disable-output -mattr=+bmi,+avx2 | FileCheck %s -check-prefixes=BMI,AVX2
|
||||
|
@ -32,7 +32,7 @@ define i64 @var_cttz_i64(i64 %a) {
|
|||
|
||||
define i64 @var_cttz_i64u(i64 %a) {
|
||||
; NOBMI-LABEL: 'var_cttz_i64u'
|
||||
; NOBMI-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %cttz = call i64 @llvm.cttz.i64(i64 %a, i1 true)
|
||||
; NOBMI-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %cttz = call i64 @llvm.cttz.i64(i64 %a, i1 true)
|
||||
; NOBMI-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i64 %cttz
|
||||
;
|
||||
; BMI-LABEL: 'var_cttz_i64u'
|
||||
|
@ -58,7 +58,7 @@ define i32 @var_cttz_i32(i32 %a) {
|
|||
|
||||
define i32 @var_cttz_i32u(i32 %a) {
|
||||
; NOBMI-LABEL: 'var_cttz_i32u'
|
||||
; NOBMI-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %cttz = call i32 @llvm.cttz.i32(i32 %a, i1 true)
|
||||
; NOBMI-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %cttz = call i32 @llvm.cttz.i32(i32 %a, i1 true)
|
||||
; NOBMI-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 %cttz
|
||||
;
|
||||
; BMI-LABEL: 'var_cttz_i32u'
|
||||
|
@ -84,7 +84,7 @@ define i16 @var_cttz_i16(i16 %a) {
|
|||
|
||||
define i16 @var_cttz_i16u(i16 %a) {
|
||||
; NOBMI-LABEL: 'var_cttz_i16u'
|
||||
; NOBMI-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %cttz = call i16 @llvm.cttz.i16(i16 %a, i1 true)
|
||||
; NOBMI-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %cttz = call i16 @llvm.cttz.i16(i16 %a, i1 true)
|
||||
; NOBMI-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i16 %cttz
|
||||
;
|
||||
; BMI-LABEL: 'var_cttz_i16u'
|
||||
|
@ -110,7 +110,7 @@ define i8 @var_cttz_i8(i8 %a) {
|
|||
|
||||
define i8 @var_cttz_i8u(i8 %a) {
|
||||
; NOBMI-LABEL: 'var_cttz_i8u'
|
||||
; NOBMI-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %cttz = call i8 @llvm.cttz.i8(i8 %a, i1 true)
|
||||
; NOBMI-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %cttz = call i8 @llvm.cttz.i8(i8 %a, i1 true)
|
||||
; NOBMI-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i8 %cttz
|
||||
;
|
||||
; BMI-LABEL: 'var_cttz_i8u'
|
||||
|
@ -139,6 +139,10 @@ declare <32 x i16> @llvm.cttz.v32i16(<32 x i16>, i1)
|
|||
declare <64 x i8> @llvm.cttz.v64i8(<64 x i8>, i1)
|
||||
|
||||
define <2 x i64> @var_cttz_v2i64(<2 x i64> %a) {
|
||||
; NOBMI-LABEL: 'var_cttz_v2i64'
|
||||
; NOBMI-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %cttz = call <2 x i64> @llvm.cttz.v2i64(<2 x i64> %a, i1 false)
|
||||
; NOBMI-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <2 x i64> %cttz
|
||||
;
|
||||
; SSE2-LABEL: 'var_cttz_v2i64'
|
||||
; SSE2-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %cttz = call <2 x i64> @llvm.cttz.v2i64(<2 x i64> %a, i1 false)
|
||||
; SSE2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <2 x i64> %cttz
|
||||
|
@ -164,6 +168,10 @@ define <2 x i64> @var_cttz_v2i64(<2 x i64> %a) {
|
|||
}
|
||||
|
||||
define <2 x i64> @var_cttz_v2i64u(<2 x i64> %a) {
|
||||
; NOBMI-LABEL: 'var_cttz_v2i64u'
|
||||
; NOBMI-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %cttz = call <2 x i64> @llvm.cttz.v2i64(<2 x i64> %a, i1 true)
|
||||
; NOBMI-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <2 x i64> %cttz
|
||||
;
|
||||
; SSE2-LABEL: 'var_cttz_v2i64u'
|
||||
; SSE2-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %cttz = call <2 x i64> @llvm.cttz.v2i64(<2 x i64> %a, i1 true)
|
||||
; SSE2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <2 x i64> %cttz
|
||||
|
@ -189,6 +197,10 @@ define <2 x i64> @var_cttz_v2i64u(<2 x i64> %a) {
|
|||
}
|
||||
|
||||
define <4 x i64> @var_cttz_v4i64(<4 x i64> %a) {
|
||||
; NOBMI-LABEL: 'var_cttz_v4i64'
|
||||
; NOBMI-NEXT: Cost Model: Found an estimated cost of 28 for instruction: %cttz = call <4 x i64> @llvm.cttz.v4i64(<4 x i64> %a, i1 false)
|
||||
; NOBMI-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <4 x i64> %cttz
|
||||
;
|
||||
; SSE2-LABEL: 'var_cttz_v4i64'
|
||||
; SSE2-NEXT: Cost Model: Found an estimated cost of 28 for instruction: %cttz = call <4 x i64> @llvm.cttz.v4i64(<4 x i64> %a, i1 false)
|
||||
; SSE2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <4 x i64> %cttz
|
||||
|
@ -214,6 +226,10 @@ define <4 x i64> @var_cttz_v4i64(<4 x i64> %a) {
|
|||
}
|
||||
|
||||
define <4 x i64> @var_cttz_v4i64u(<4 x i64> %a) {
|
||||
; NOBMI-LABEL: 'var_cttz_v4i64u'
|
||||
; NOBMI-NEXT: Cost Model: Found an estimated cost of 28 for instruction: %cttz = call <4 x i64> @llvm.cttz.v4i64(<4 x i64> %a, i1 true)
|
||||
; NOBMI-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <4 x i64> %cttz
|
||||
;
|
||||
; SSE2-LABEL: 'var_cttz_v4i64u'
|
||||
; SSE2-NEXT: Cost Model: Found an estimated cost of 28 for instruction: %cttz = call <4 x i64> @llvm.cttz.v4i64(<4 x i64> %a, i1 true)
|
||||
; SSE2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <4 x i64> %cttz
|
||||
|
@ -239,6 +255,10 @@ define <4 x i64> @var_cttz_v4i64u(<4 x i64> %a) {
|
|||
}
|
||||
|
||||
define <8 x i64> @var_cttz_v8i64(<8 x i64> %a) {
|
||||
; NOBMI-LABEL: 'var_cttz_v8i64'
|
||||
; NOBMI-NEXT: Cost Model: Found an estimated cost of 56 for instruction: %cttz = call <8 x i64> @llvm.cttz.v8i64(<8 x i64> %a, i1 false)
|
||||
; NOBMI-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <8 x i64> %cttz
|
||||
;
|
||||
; SSE2-LABEL: 'var_cttz_v8i64'
|
||||
; SSE2-NEXT: Cost Model: Found an estimated cost of 56 for instruction: %cttz = call <8 x i64> @llvm.cttz.v8i64(<8 x i64> %a, i1 false)
|
||||
; SSE2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <8 x i64> %cttz
|
||||
|
@ -276,6 +296,10 @@ define <8 x i64> @var_cttz_v8i64(<8 x i64> %a) {
|
|||
}
|
||||
|
||||
define <8 x i64> @var_cttz_v8i64u(<8 x i64> %a) {
|
||||
; NOBMI-LABEL: 'var_cttz_v8i64u'
|
||||
; NOBMI-NEXT: Cost Model: Found an estimated cost of 56 for instruction: %cttz = call <8 x i64> @llvm.cttz.v8i64(<8 x i64> %a, i1 true)
|
||||
; NOBMI-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <8 x i64> %cttz
|
||||
;
|
||||
; SSE2-LABEL: 'var_cttz_v8i64u'
|
||||
; SSE2-NEXT: Cost Model: Found an estimated cost of 56 for instruction: %cttz = call <8 x i64> @llvm.cttz.v8i64(<8 x i64> %a, i1 true)
|
||||
; SSE2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <8 x i64> %cttz
|
||||
|
@ -313,6 +337,10 @@ define <8 x i64> @var_cttz_v8i64u(<8 x i64> %a) {
|
|||
}
|
||||
|
||||
define <4 x i32> @var_cttz_v4i32(<4 x i32> %a) {
|
||||
; NOBMI-LABEL: 'var_cttz_v4i32'
|
||||
; NOBMI-NEXT: Cost Model: Found an estimated cost of 18 for instruction: %cttz = call <4 x i32> @llvm.cttz.v4i32(<4 x i32> %a, i1 false)
|
||||
; NOBMI-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <4 x i32> %cttz
|
||||
;
|
||||
; SSE2-LABEL: 'var_cttz_v4i32'
|
||||
; SSE2-NEXT: Cost Model: Found an estimated cost of 18 for instruction: %cttz = call <4 x i32> @llvm.cttz.v4i32(<4 x i32> %a, i1 false)
|
||||
; SSE2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <4 x i32> %cttz
|
||||
|
@ -338,6 +366,10 @@ define <4 x i32> @var_cttz_v4i32(<4 x i32> %a) {
|
|||
}
|
||||
|
||||
define <4 x i32> @var_cttz_v4i32u(<4 x i32> %a) {
|
||||
; NOBMI-LABEL: 'var_cttz_v4i32u'
|
||||
; NOBMI-NEXT: Cost Model: Found an estimated cost of 18 for instruction: %cttz = call <4 x i32> @llvm.cttz.v4i32(<4 x i32> %a, i1 true)
|
||||
; NOBMI-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <4 x i32> %cttz
|
||||
;
|
||||
; SSE2-LABEL: 'var_cttz_v4i32u'
|
||||
; SSE2-NEXT: Cost Model: Found an estimated cost of 18 for instruction: %cttz = call <4 x i32> @llvm.cttz.v4i32(<4 x i32> %a, i1 true)
|
||||
; SSE2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <4 x i32> %cttz
|
||||
|
@ -363,6 +395,10 @@ define <4 x i32> @var_cttz_v4i32u(<4 x i32> %a) {
|
|||
}
|
||||
|
||||
define <8 x i32> @var_cttz_v8i32(<8 x i32> %a) {
|
||||
; NOBMI-LABEL: 'var_cttz_v8i32'
|
||||
; NOBMI-NEXT: Cost Model: Found an estimated cost of 36 for instruction: %cttz = call <8 x i32> @llvm.cttz.v8i32(<8 x i32> %a, i1 false)
|
||||
; NOBMI-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <8 x i32> %cttz
|
||||
;
|
||||
; SSE2-LABEL: 'var_cttz_v8i32'
|
||||
; SSE2-NEXT: Cost Model: Found an estimated cost of 36 for instruction: %cttz = call <8 x i32> @llvm.cttz.v8i32(<8 x i32> %a, i1 false)
|
||||
; SSE2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <8 x i32> %cttz
|
||||
|
@ -388,6 +424,10 @@ define <8 x i32> @var_cttz_v8i32(<8 x i32> %a) {
|
|||
}
|
||||
|
||||
define <8 x i32> @var_cttz_v8i32u(<8 x i32> %a) {
|
||||
; NOBMI-LABEL: 'var_cttz_v8i32u'
|
||||
; NOBMI-NEXT: Cost Model: Found an estimated cost of 36 for instruction: %cttz = call <8 x i32> @llvm.cttz.v8i32(<8 x i32> %a, i1 true)
|
||||
; NOBMI-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <8 x i32> %cttz
|
||||
;
|
||||
; SSE2-LABEL: 'var_cttz_v8i32u'
|
||||
; SSE2-NEXT: Cost Model: Found an estimated cost of 36 for instruction: %cttz = call <8 x i32> @llvm.cttz.v8i32(<8 x i32> %a, i1 true)
|
||||
; SSE2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <8 x i32> %cttz
|
||||
|
@ -413,6 +453,10 @@ define <8 x i32> @var_cttz_v8i32u(<8 x i32> %a) {
|
|||
}
|
||||
|
||||
define <16 x i32> @var_cttz_v16i32(<16 x i32> %a) {
|
||||
; NOBMI-LABEL: 'var_cttz_v16i32'
|
||||
; NOBMI-NEXT: Cost Model: Found an estimated cost of 72 for instruction: %cttz = call <16 x i32> @llvm.cttz.v16i32(<16 x i32> %a, i1 false)
|
||||
; NOBMI-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <16 x i32> %cttz
|
||||
;
|
||||
; SSE2-LABEL: 'var_cttz_v16i32'
|
||||
; SSE2-NEXT: Cost Model: Found an estimated cost of 72 for instruction: %cttz = call <16 x i32> @llvm.cttz.v16i32(<16 x i32> %a, i1 false)
|
||||
; SSE2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <16 x i32> %cttz
|
||||
|
@ -450,6 +494,10 @@ define <16 x i32> @var_cttz_v16i32(<16 x i32> %a) {
|
|||
}
|
||||
|
||||
define <16 x i32> @var_cttz_v16i32u(<16 x i32> %a) {
|
||||
; NOBMI-LABEL: 'var_cttz_v16i32u'
|
||||
; NOBMI-NEXT: Cost Model: Found an estimated cost of 72 for instruction: %cttz = call <16 x i32> @llvm.cttz.v16i32(<16 x i32> %a, i1 true)
|
||||
; NOBMI-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <16 x i32> %cttz
|
||||
;
|
||||
; SSE2-LABEL: 'var_cttz_v16i32u'
|
||||
; SSE2-NEXT: Cost Model: Found an estimated cost of 72 for instruction: %cttz = call <16 x i32> @llvm.cttz.v16i32(<16 x i32> %a, i1 true)
|
||||
; SSE2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <16 x i32> %cttz
|
||||
|
@ -487,6 +535,10 @@ define <16 x i32> @var_cttz_v16i32u(<16 x i32> %a) {
|
|||
}
|
||||
|
||||
define <8 x i16> @var_cttz_v8i16(<8 x i16> %a) {
|
||||
; NOBMI-LABEL: 'var_cttz_v8i16'
|
||||
; NOBMI-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %cttz = call <8 x i16> @llvm.cttz.v8i16(<8 x i16> %a, i1 false)
|
||||
; NOBMI-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <8 x i16> %cttz
|
||||
;
|
||||
; SSE2-LABEL: 'var_cttz_v8i16'
|
||||
; SSE2-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %cttz = call <8 x i16> @llvm.cttz.v8i16(<8 x i16> %a, i1 false)
|
||||
; SSE2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <8 x i16> %cttz
|
||||
|
@ -512,6 +564,10 @@ define <8 x i16> @var_cttz_v8i16(<8 x i16> %a) {
|
|||
}
|
||||
|
||||
define <8 x i16> @var_cttz_v8i16u(<8 x i16> %a) {
|
||||
; NOBMI-LABEL: 'var_cttz_v8i16u'
|
||||
; NOBMI-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %cttz = call <8 x i16> @llvm.cttz.v8i16(<8 x i16> %a, i1 true)
|
||||
; NOBMI-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <8 x i16> %cttz
|
||||
;
|
||||
; SSE2-LABEL: 'var_cttz_v8i16u'
|
||||
; SSE2-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %cttz = call <8 x i16> @llvm.cttz.v8i16(<8 x i16> %a, i1 true)
|
||||
; SSE2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <8 x i16> %cttz
|
||||
|
@ -537,6 +593,10 @@ define <8 x i16> @var_cttz_v8i16u(<8 x i16> %a) {
|
|||
}
|
||||
|
||||
define <16 x i16> @var_cttz_v16i16(<16 x i16> %a) {
|
||||
; NOBMI-LABEL: 'var_cttz_v16i16'
|
||||
; NOBMI-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %cttz = call <16 x i16> @llvm.cttz.v16i16(<16 x i16> %a, i1 false)
|
||||
; NOBMI-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <16 x i16> %cttz
|
||||
;
|
||||
; SSE2-LABEL: 'var_cttz_v16i16'
|
||||
; SSE2-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %cttz = call <16 x i16> @llvm.cttz.v16i16(<16 x i16> %a, i1 false)
|
||||
; SSE2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <16 x i16> %cttz
|
||||
|
@ -562,6 +622,10 @@ define <16 x i16> @var_cttz_v16i16(<16 x i16> %a) {
|
|||
}
|
||||
|
||||
define <16 x i16> @var_cttz_v16i16u(<16 x i16> %a) {
|
||||
; NOBMI-LABEL: 'var_cttz_v16i16u'
|
||||
; NOBMI-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %cttz = call <16 x i16> @llvm.cttz.v16i16(<16 x i16> %a, i1 true)
|
||||
; NOBMI-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <16 x i16> %cttz
|
||||
;
|
||||
; SSE2-LABEL: 'var_cttz_v16i16u'
|
||||
; SSE2-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %cttz = call <16 x i16> @llvm.cttz.v16i16(<16 x i16> %a, i1 true)
|
||||
; SSE2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <16 x i16> %cttz
|
||||
|
@ -587,6 +651,10 @@ define <16 x i16> @var_cttz_v16i16u(<16 x i16> %a) {
|
|||
}
|
||||
|
||||
define <32 x i16> @var_cttz_v32i16(<32 x i16> %a) {
|
||||
; NOBMI-LABEL: 'var_cttz_v32i16'
|
||||
; NOBMI-NEXT: Cost Model: Found an estimated cost of 64 for instruction: %cttz = call <32 x i16> @llvm.cttz.v32i16(<32 x i16> %a, i1 false)
|
||||
; NOBMI-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <32 x i16> %cttz
|
||||
;
|
||||
; SSE2-LABEL: 'var_cttz_v32i16'
|
||||
; SSE2-NEXT: Cost Model: Found an estimated cost of 64 for instruction: %cttz = call <32 x i16> @llvm.cttz.v32i16(<32 x i16> %a, i1 false)
|
||||
; SSE2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <32 x i16> %cttz
|
||||
|
@ -624,6 +692,10 @@ define <32 x i16> @var_cttz_v32i16(<32 x i16> %a) {
|
|||
}
|
||||
|
||||
define <32 x i16> @var_cttz_v32i16u(<32 x i16> %a) {
|
||||
; NOBMI-LABEL: 'var_cttz_v32i16u'
|
||||
; NOBMI-NEXT: Cost Model: Found an estimated cost of 64 for instruction: %cttz = call <32 x i16> @llvm.cttz.v32i16(<32 x i16> %a, i1 true)
|
||||
; NOBMI-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <32 x i16> %cttz
|
||||
;
|
||||
; SSE2-LABEL: 'var_cttz_v32i16u'
|
||||
; SSE2-NEXT: Cost Model: Found an estimated cost of 64 for instruction: %cttz = call <32 x i16> @llvm.cttz.v32i16(<32 x i16> %a, i1 true)
|
||||
; SSE2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <32 x i16> %cttz
|
||||
|
@ -661,6 +733,10 @@ define <32 x i16> @var_cttz_v32i16u(<32 x i16> %a) {
|
|||
}
|
||||
|
||||
define <16 x i8> @var_cttz_v16i8(<16 x i8> %a) {
|
||||
; NOBMI-LABEL: 'var_cttz_v16i8'
|
||||
; NOBMI-NEXT: Cost Model: Found an estimated cost of 13 for instruction: %cttz = call <16 x i8> @llvm.cttz.v16i8(<16 x i8> %a, i1 false)
|
||||
; NOBMI-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <16 x i8> %cttz
|
||||
;
|
||||
; SSE2-LABEL: 'var_cttz_v16i8'
|
||||
; SSE2-NEXT: Cost Model: Found an estimated cost of 13 for instruction: %cttz = call <16 x i8> @llvm.cttz.v16i8(<16 x i8> %a, i1 false)
|
||||
; SSE2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <16 x i8> %cttz
|
||||
|
@ -686,6 +762,10 @@ define <16 x i8> @var_cttz_v16i8(<16 x i8> %a) {
|
|||
}
|
||||
|
||||
define <16 x i8> @var_cttz_v16i8u(<16 x i8> %a) {
|
||||
; NOBMI-LABEL: 'var_cttz_v16i8u'
|
||||
; NOBMI-NEXT: Cost Model: Found an estimated cost of 13 for instruction: %cttz = call <16 x i8> @llvm.cttz.v16i8(<16 x i8> %a, i1 true)
|
||||
; NOBMI-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <16 x i8> %cttz
|
||||
;
|
||||
; SSE2-LABEL: 'var_cttz_v16i8u'
|
||||
; SSE2-NEXT: Cost Model: Found an estimated cost of 13 for instruction: %cttz = call <16 x i8> @llvm.cttz.v16i8(<16 x i8> %a, i1 true)
|
||||
; SSE2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <16 x i8> %cttz
|
||||
|
@ -711,6 +791,10 @@ define <16 x i8> @var_cttz_v16i8u(<16 x i8> %a) {
|
|||
}
|
||||
|
||||
define <32 x i8> @var_cttz_v32i8(<32 x i8> %a) {
|
||||
; NOBMI-LABEL: 'var_cttz_v32i8'
|
||||
; NOBMI-NEXT: Cost Model: Found an estimated cost of 26 for instruction: %cttz = call <32 x i8> @llvm.cttz.v32i8(<32 x i8> %a, i1 false)
|
||||
; NOBMI-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <32 x i8> %cttz
|
||||
;
|
||||
; SSE2-LABEL: 'var_cttz_v32i8'
|
||||
; SSE2-NEXT: Cost Model: Found an estimated cost of 26 for instruction: %cttz = call <32 x i8> @llvm.cttz.v32i8(<32 x i8> %a, i1 false)
|
||||
; SSE2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <32 x i8> %cttz
|
||||
|
@ -736,6 +820,10 @@ define <32 x i8> @var_cttz_v32i8(<32 x i8> %a) {
|
|||
}
|
||||
|
||||
define <32 x i8> @var_cttz_v32i8u(<32 x i8> %a) {
|
||||
; NOBMI-LABEL: 'var_cttz_v32i8u'
|
||||
; NOBMI-NEXT: Cost Model: Found an estimated cost of 26 for instruction: %cttz = call <32 x i8> @llvm.cttz.v32i8(<32 x i8> %a, i1 true)
|
||||
; NOBMI-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <32 x i8> %cttz
|
||||
;
|
||||
; SSE2-LABEL: 'var_cttz_v32i8u'
|
||||
; SSE2-NEXT: Cost Model: Found an estimated cost of 26 for instruction: %cttz = call <32 x i8> @llvm.cttz.v32i8(<32 x i8> %a, i1 true)
|
||||
; SSE2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <32 x i8> %cttz
|
||||
|
@ -761,6 +849,10 @@ define <32 x i8> @var_cttz_v32i8u(<32 x i8> %a) {
|
|||
}
|
||||
|
||||
define <64 x i8> @var_cttz_v64i8(<64 x i8> %a) {
|
||||
; NOBMI-LABEL: 'var_cttz_v64i8'
|
||||
; NOBMI-NEXT: Cost Model: Found an estimated cost of 52 for instruction: %cttz = call <64 x i8> @llvm.cttz.v64i8(<64 x i8> %a, i1 false)
|
||||
; NOBMI-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <64 x i8> %cttz
|
||||
;
|
||||
; SSE2-LABEL: 'var_cttz_v64i8'
|
||||
; SSE2-NEXT: Cost Model: Found an estimated cost of 52 for instruction: %cttz = call <64 x i8> @llvm.cttz.v64i8(<64 x i8> %a, i1 false)
|
||||
; SSE2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <64 x i8> %cttz
|
||||
|
@ -798,6 +890,10 @@ define <64 x i8> @var_cttz_v64i8(<64 x i8> %a) {
|
|||
}
|
||||
|
||||
define <64 x i8> @var_cttz_v64i8u(<64 x i8> %a) {
|
||||
; NOBMI-LABEL: 'var_cttz_v64i8u'
|
||||
; NOBMI-NEXT: Cost Model: Found an estimated cost of 52 for instruction: %cttz = call <64 x i8> @llvm.cttz.v64i8(<64 x i8> %a, i1 true)
|
||||
; NOBMI-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <64 x i8> %cttz
|
||||
;
|
||||
; SSE2-LABEL: 'var_cttz_v64i8u'
|
||||
; SSE2-NEXT: Cost Model: Found an estimated cost of 52 for instruction: %cttz = call <64 x i8> @llvm.cttz.v64i8(<64 x i8> %a, i1 true)
|
||||
; SSE2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <64 x i8> %cttz
|
||||
|
|
|
@ -258,7 +258,7 @@ define void @cttz(i32 %a, <16 x i32> %va) {
|
|||
|
||||
define void @ctlz(i32 %a, <16 x i32> %va) {
|
||||
; THRU-LABEL: 'ctlz'
|
||||
; THRU-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %s = call i32 @llvm.ctlz.i32(i32 %a, i1 true)
|
||||
; THRU-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %s = call i32 @llvm.ctlz.i32(i32 %a, i1 true)
|
||||
; THRU-NEXT: Cost Model: Found an estimated cost of 40 for instruction: %v = call <16 x i32> @llvm.ctlz.v16i32(<16 x i32> %va, i1 true)
|
||||
; THRU-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void
|
||||
;
|
||||
|
|
|
@ -589,11 +589,47 @@ define void @ctlz_undef_4i64() #0 {
|
|||
}
|
||||
|
||||
define void @ctlz_undef_4i32() #0 {
|
||||
; CHECK-LABEL: @ctlz_undef_4i32(
|
||||
; CHECK-NEXT: [[TMP1:%.*]] = load <4 x i32>, <4 x i32>* bitcast ([8 x i32]* @src32 to <4 x i32>*), align 4
|
||||
; CHECK-NEXT: [[TMP2:%.*]] = call <4 x i32> @llvm.ctlz.v4i32(<4 x i32> [[TMP1]], i1 true)
|
||||
; CHECK-NEXT: store <4 x i32> [[TMP2]], <4 x i32>* bitcast ([8 x i32]* @dst32 to <4 x i32>*), align 4
|
||||
; CHECK-NEXT: ret void
|
||||
; SSE-LABEL: @ctlz_undef_4i32(
|
||||
; SSE-NEXT: [[LD0:%.*]] = load i32, i32* getelementptr inbounds ([8 x i32], [8 x i32]* @src32, i32 0, i64 0), align 4
|
||||
; SSE-NEXT: [[LD1:%.*]] = load i32, i32* getelementptr inbounds ([8 x i32], [8 x i32]* @src32, i32 0, i64 1), align 4
|
||||
; SSE-NEXT: [[LD2:%.*]] = load i32, i32* getelementptr inbounds ([8 x i32], [8 x i32]* @src32, i32 0, i64 2), align 4
|
||||
; SSE-NEXT: [[LD3:%.*]] = load i32, i32* getelementptr inbounds ([8 x i32], [8 x i32]* @src32, i32 0, i64 3), align 4
|
||||
; SSE-NEXT: [[CTLZ0:%.*]] = call i32 @llvm.ctlz.i32(i32 [[LD0]], i1 true)
|
||||
; SSE-NEXT: [[CTLZ1:%.*]] = call i32 @llvm.ctlz.i32(i32 [[LD1]], i1 true)
|
||||
; SSE-NEXT: [[CTLZ2:%.*]] = call i32 @llvm.ctlz.i32(i32 [[LD2]], i1 true)
|
||||
; SSE-NEXT: [[CTLZ3:%.*]] = call i32 @llvm.ctlz.i32(i32 [[LD3]], i1 true)
|
||||
; SSE-NEXT: store i32 [[CTLZ0]], i32* getelementptr inbounds ([8 x i32], [8 x i32]* @dst32, i32 0, i64 0), align 4
|
||||
; SSE-NEXT: store i32 [[CTLZ1]], i32* getelementptr inbounds ([8 x i32], [8 x i32]* @dst32, i32 0, i64 1), align 4
|
||||
; SSE-NEXT: store i32 [[CTLZ2]], i32* getelementptr inbounds ([8 x i32], [8 x i32]* @dst32, i32 0, i64 2), align 4
|
||||
; SSE-NEXT: store i32 [[CTLZ3]], i32* getelementptr inbounds ([8 x i32], [8 x i32]* @dst32, i32 0, i64 3), align 4
|
||||
; SSE-NEXT: ret void
|
||||
;
|
||||
; AVX1-LABEL: @ctlz_undef_4i32(
|
||||
; AVX1-NEXT: [[LD0:%.*]] = load i32, i32* getelementptr inbounds ([8 x i32], [8 x i32]* @src32, i32 0, i64 0), align 4
|
||||
; AVX1-NEXT: [[LD1:%.*]] = load i32, i32* getelementptr inbounds ([8 x i32], [8 x i32]* @src32, i32 0, i64 1), align 4
|
||||
; AVX1-NEXT: [[LD2:%.*]] = load i32, i32* getelementptr inbounds ([8 x i32], [8 x i32]* @src32, i32 0, i64 2), align 4
|
||||
; AVX1-NEXT: [[LD3:%.*]] = load i32, i32* getelementptr inbounds ([8 x i32], [8 x i32]* @src32, i32 0, i64 3), align 4
|
||||
; AVX1-NEXT: [[CTLZ0:%.*]] = call i32 @llvm.ctlz.i32(i32 [[LD0]], i1 true)
|
||||
; AVX1-NEXT: [[CTLZ1:%.*]] = call i32 @llvm.ctlz.i32(i32 [[LD1]], i1 true)
|
||||
; AVX1-NEXT: [[CTLZ2:%.*]] = call i32 @llvm.ctlz.i32(i32 [[LD2]], i1 true)
|
||||
; AVX1-NEXT: [[CTLZ3:%.*]] = call i32 @llvm.ctlz.i32(i32 [[LD3]], i1 true)
|
||||
; AVX1-NEXT: store i32 [[CTLZ0]], i32* getelementptr inbounds ([8 x i32], [8 x i32]* @dst32, i32 0, i64 0), align 4
|
||||
; AVX1-NEXT: store i32 [[CTLZ1]], i32* getelementptr inbounds ([8 x i32], [8 x i32]* @dst32, i32 0, i64 1), align 4
|
||||
; AVX1-NEXT: store i32 [[CTLZ2]], i32* getelementptr inbounds ([8 x i32], [8 x i32]* @dst32, i32 0, i64 2), align 4
|
||||
; AVX1-NEXT: store i32 [[CTLZ3]], i32* getelementptr inbounds ([8 x i32], [8 x i32]* @dst32, i32 0, i64 3), align 4
|
||||
; AVX1-NEXT: ret void
|
||||
;
|
||||
; AVX2-LABEL: @ctlz_undef_4i32(
|
||||
; AVX2-NEXT: [[TMP1:%.*]] = load <4 x i32>, <4 x i32>* bitcast ([8 x i32]* @src32 to <4 x i32>*), align 4
|
||||
; AVX2-NEXT: [[TMP2:%.*]] = call <4 x i32> @llvm.ctlz.v4i32(<4 x i32> [[TMP1]], i1 true)
|
||||
; AVX2-NEXT: store <4 x i32> [[TMP2]], <4 x i32>* bitcast ([8 x i32]* @dst32 to <4 x i32>*), align 4
|
||||
; AVX2-NEXT: ret void
|
||||
;
|
||||
; AVX512-LABEL: @ctlz_undef_4i32(
|
||||
; AVX512-NEXT: [[TMP1:%.*]] = load <4 x i32>, <4 x i32>* bitcast ([8 x i32]* @src32 to <4 x i32>*), align 4
|
||||
; AVX512-NEXT: [[TMP2:%.*]] = call <4 x i32> @llvm.ctlz.v4i32(<4 x i32> [[TMP1]], i1 true)
|
||||
; AVX512-NEXT: store <4 x i32> [[TMP2]], <4 x i32>* bitcast ([8 x i32]* @dst32 to <4 x i32>*), align 4
|
||||
; AVX512-NEXT: ret void
|
||||
;
|
||||
%ld0 = load i32, i32* getelementptr inbounds ([8 x i32], [8 x i32]* @src32, i32 0, i64 0), align 4
|
||||
%ld1 = load i32, i32* getelementptr inbounds ([8 x i32], [8 x i32]* @src32, i32 0, i64 1), align 4
|
||||
|
@ -612,19 +648,70 @@ define void @ctlz_undef_4i32() #0 {
|
|||
|
||||
define void @ctlz_undef_8i32() #0 {
|
||||
; SSE-LABEL: @ctlz_undef_8i32(
|
||||
; SSE-NEXT: [[TMP1:%.*]] = load <4 x i32>, <4 x i32>* bitcast ([8 x i32]* @src32 to <4 x i32>*), align 2
|
||||
; SSE-NEXT: [[TMP2:%.*]] = call <4 x i32> @llvm.ctlz.v4i32(<4 x i32> [[TMP1]], i1 true)
|
||||
; SSE-NEXT: store <4 x i32> [[TMP2]], <4 x i32>* bitcast ([8 x i32]* @dst32 to <4 x i32>*), align 2
|
||||
; SSE-NEXT: [[TMP3:%.*]] = load <4 x i32>, <4 x i32>* bitcast (i32* getelementptr inbounds ([8 x i32], [8 x i32]* @src32, i32 0, i64 4) to <4 x i32>*), align 2
|
||||
; SSE-NEXT: [[TMP4:%.*]] = call <4 x i32> @llvm.ctlz.v4i32(<4 x i32> [[TMP3]], i1 true)
|
||||
; SSE-NEXT: store <4 x i32> [[TMP4]], <4 x i32>* bitcast (i32* getelementptr inbounds ([8 x i32], [8 x i32]* @dst32, i32 0, i64 4) to <4 x i32>*), align 2
|
||||
; SSE-NEXT: [[LD0:%.*]] = load i32, i32* getelementptr inbounds ([8 x i32], [8 x i32]* @src32, i32 0, i64 0), align 2
|
||||
; SSE-NEXT: [[LD1:%.*]] = load i32, i32* getelementptr inbounds ([8 x i32], [8 x i32]* @src32, i32 0, i64 1), align 2
|
||||
; SSE-NEXT: [[LD2:%.*]] = load i32, i32* getelementptr inbounds ([8 x i32], [8 x i32]* @src32, i32 0, i64 2), align 2
|
||||
; SSE-NEXT: [[LD3:%.*]] = load i32, i32* getelementptr inbounds ([8 x i32], [8 x i32]* @src32, i32 0, i64 3), align 2
|
||||
; SSE-NEXT: [[LD4:%.*]] = load i32, i32* getelementptr inbounds ([8 x i32], [8 x i32]* @src32, i32 0, i64 4), align 2
|
||||
; SSE-NEXT: [[LD5:%.*]] = load i32, i32* getelementptr inbounds ([8 x i32], [8 x i32]* @src32, i32 0, i64 5), align 2
|
||||
; SSE-NEXT: [[LD6:%.*]] = load i32, i32* getelementptr inbounds ([8 x i32], [8 x i32]* @src32, i32 0, i64 6), align 2
|
||||
; SSE-NEXT: [[LD7:%.*]] = load i32, i32* getelementptr inbounds ([8 x i32], [8 x i32]* @src32, i32 0, i64 7), align 2
|
||||
; SSE-NEXT: [[CTLZ0:%.*]] = call i32 @llvm.ctlz.i32(i32 [[LD0]], i1 true)
|
||||
; SSE-NEXT: [[CTLZ1:%.*]] = call i32 @llvm.ctlz.i32(i32 [[LD1]], i1 true)
|
||||
; SSE-NEXT: [[CTLZ2:%.*]] = call i32 @llvm.ctlz.i32(i32 [[LD2]], i1 true)
|
||||
; SSE-NEXT: [[CTLZ3:%.*]] = call i32 @llvm.ctlz.i32(i32 [[LD3]], i1 true)
|
||||
; SSE-NEXT: [[CTLZ4:%.*]] = call i32 @llvm.ctlz.i32(i32 [[LD4]], i1 true)
|
||||
; SSE-NEXT: [[CTLZ5:%.*]] = call i32 @llvm.ctlz.i32(i32 [[LD5]], i1 true)
|
||||
; SSE-NEXT: [[CTLZ6:%.*]] = call i32 @llvm.ctlz.i32(i32 [[LD6]], i1 true)
|
||||
; SSE-NEXT: [[CTLZ7:%.*]] = call i32 @llvm.ctlz.i32(i32 [[LD7]], i1 true)
|
||||
; SSE-NEXT: store i32 [[CTLZ0]], i32* getelementptr inbounds ([8 x i32], [8 x i32]* @dst32, i32 0, i64 0), align 2
|
||||
; SSE-NEXT: store i32 [[CTLZ1]], i32* getelementptr inbounds ([8 x i32], [8 x i32]* @dst32, i32 0, i64 1), align 2
|
||||
; SSE-NEXT: store i32 [[CTLZ2]], i32* getelementptr inbounds ([8 x i32], [8 x i32]* @dst32, i32 0, i64 2), align 2
|
||||
; SSE-NEXT: store i32 [[CTLZ3]], i32* getelementptr inbounds ([8 x i32], [8 x i32]* @dst32, i32 0, i64 3), align 2
|
||||
; SSE-NEXT: store i32 [[CTLZ4]], i32* getelementptr inbounds ([8 x i32], [8 x i32]* @dst32, i32 0, i64 4), align 2
|
||||
; SSE-NEXT: store i32 [[CTLZ5]], i32* getelementptr inbounds ([8 x i32], [8 x i32]* @dst32, i32 0, i64 5), align 2
|
||||
; SSE-NEXT: store i32 [[CTLZ6]], i32* getelementptr inbounds ([8 x i32], [8 x i32]* @dst32, i32 0, i64 6), align 2
|
||||
; SSE-NEXT: store i32 [[CTLZ7]], i32* getelementptr inbounds ([8 x i32], [8 x i32]* @dst32, i32 0, i64 7), align 2
|
||||
; SSE-NEXT: ret void
|
||||
;
|
||||
; AVX-LABEL: @ctlz_undef_8i32(
|
||||
; AVX-NEXT: [[TMP1:%.*]] = load <8 x i32>, <8 x i32>* bitcast ([8 x i32]* @src32 to <8 x i32>*), align 2
|
||||
; AVX-NEXT: [[TMP2:%.*]] = call <8 x i32> @llvm.ctlz.v8i32(<8 x i32> [[TMP1]], i1 true)
|
||||
; AVX-NEXT: store <8 x i32> [[TMP2]], <8 x i32>* bitcast ([8 x i32]* @dst32 to <8 x i32>*), align 2
|
||||
; AVX-NEXT: ret void
|
||||
; AVX1-LABEL: @ctlz_undef_8i32(
|
||||
; AVX1-NEXT: [[LD0:%.*]] = load i32, i32* getelementptr inbounds ([8 x i32], [8 x i32]* @src32, i32 0, i64 0), align 2
|
||||
; AVX1-NEXT: [[LD1:%.*]] = load i32, i32* getelementptr inbounds ([8 x i32], [8 x i32]* @src32, i32 0, i64 1), align 2
|
||||
; AVX1-NEXT: [[LD2:%.*]] = load i32, i32* getelementptr inbounds ([8 x i32], [8 x i32]* @src32, i32 0, i64 2), align 2
|
||||
; AVX1-NEXT: [[LD3:%.*]] = load i32, i32* getelementptr inbounds ([8 x i32], [8 x i32]* @src32, i32 0, i64 3), align 2
|
||||
; AVX1-NEXT: [[LD4:%.*]] = load i32, i32* getelementptr inbounds ([8 x i32], [8 x i32]* @src32, i32 0, i64 4), align 2
|
||||
; AVX1-NEXT: [[LD5:%.*]] = load i32, i32* getelementptr inbounds ([8 x i32], [8 x i32]* @src32, i32 0, i64 5), align 2
|
||||
; AVX1-NEXT: [[LD6:%.*]] = load i32, i32* getelementptr inbounds ([8 x i32], [8 x i32]* @src32, i32 0, i64 6), align 2
|
||||
; AVX1-NEXT: [[LD7:%.*]] = load i32, i32* getelementptr inbounds ([8 x i32], [8 x i32]* @src32, i32 0, i64 7), align 2
|
||||
; AVX1-NEXT: [[CTLZ0:%.*]] = call i32 @llvm.ctlz.i32(i32 [[LD0]], i1 true)
|
||||
; AVX1-NEXT: [[CTLZ1:%.*]] = call i32 @llvm.ctlz.i32(i32 [[LD1]], i1 true)
|
||||
; AVX1-NEXT: [[CTLZ2:%.*]] = call i32 @llvm.ctlz.i32(i32 [[LD2]], i1 true)
|
||||
; AVX1-NEXT: [[CTLZ3:%.*]] = call i32 @llvm.ctlz.i32(i32 [[LD3]], i1 true)
|
||||
; AVX1-NEXT: [[CTLZ4:%.*]] = call i32 @llvm.ctlz.i32(i32 [[LD4]], i1 true)
|
||||
; AVX1-NEXT: [[CTLZ5:%.*]] = call i32 @llvm.ctlz.i32(i32 [[LD5]], i1 true)
|
||||
; AVX1-NEXT: [[CTLZ6:%.*]] = call i32 @llvm.ctlz.i32(i32 [[LD6]], i1 true)
|
||||
; AVX1-NEXT: [[CTLZ7:%.*]] = call i32 @llvm.ctlz.i32(i32 [[LD7]], i1 true)
|
||||
; AVX1-NEXT: store i32 [[CTLZ0]], i32* getelementptr inbounds ([8 x i32], [8 x i32]* @dst32, i32 0, i64 0), align 2
|
||||
; AVX1-NEXT: store i32 [[CTLZ1]], i32* getelementptr inbounds ([8 x i32], [8 x i32]* @dst32, i32 0, i64 1), align 2
|
||||
; AVX1-NEXT: store i32 [[CTLZ2]], i32* getelementptr inbounds ([8 x i32], [8 x i32]* @dst32, i32 0, i64 2), align 2
|
||||
; AVX1-NEXT: store i32 [[CTLZ3]], i32* getelementptr inbounds ([8 x i32], [8 x i32]* @dst32, i32 0, i64 3), align 2
|
||||
; AVX1-NEXT: store i32 [[CTLZ4]], i32* getelementptr inbounds ([8 x i32], [8 x i32]* @dst32, i32 0, i64 4), align 2
|
||||
; AVX1-NEXT: store i32 [[CTLZ5]], i32* getelementptr inbounds ([8 x i32], [8 x i32]* @dst32, i32 0, i64 5), align 2
|
||||
; AVX1-NEXT: store i32 [[CTLZ6]], i32* getelementptr inbounds ([8 x i32], [8 x i32]* @dst32, i32 0, i64 6), align 2
|
||||
; AVX1-NEXT: store i32 [[CTLZ7]], i32* getelementptr inbounds ([8 x i32], [8 x i32]* @dst32, i32 0, i64 7), align 2
|
||||
; AVX1-NEXT: ret void
|
||||
;
|
||||
; AVX2-LABEL: @ctlz_undef_8i32(
|
||||
; AVX2-NEXT: [[TMP1:%.*]] = load <8 x i32>, <8 x i32>* bitcast ([8 x i32]* @src32 to <8 x i32>*), align 2
|
||||
; AVX2-NEXT: [[TMP2:%.*]] = call <8 x i32> @llvm.ctlz.v8i32(<8 x i32> [[TMP1]], i1 true)
|
||||
; AVX2-NEXT: store <8 x i32> [[TMP2]], <8 x i32>* bitcast ([8 x i32]* @dst32 to <8 x i32>*), align 2
|
||||
; AVX2-NEXT: ret void
|
||||
;
|
||||
; AVX512-LABEL: @ctlz_undef_8i32(
|
||||
; AVX512-NEXT: [[TMP1:%.*]] = load <8 x i32>, <8 x i32>* bitcast ([8 x i32]* @src32 to <8 x i32>*), align 2
|
||||
; AVX512-NEXT: [[TMP2:%.*]] = call <8 x i32> @llvm.ctlz.v8i32(<8 x i32> [[TMP1]], i1 true)
|
||||
; AVX512-NEXT: store <8 x i32> [[TMP2]], <8 x i32>* bitcast ([8 x i32]* @dst32 to <8 x i32>*), align 2
|
||||
; AVX512-NEXT: ret void
|
||||
;
|
||||
%ld0 = load i32, i32* getelementptr inbounds ([8 x i32], [8 x i32]* @src32, i32 0, i64 0), align 2
|
||||
%ld1 = load i32, i32* getelementptr inbounds ([8 x i32], [8 x i32]* @src32, i32 0, i64 1), align 2
|
||||
|
|
|
@ -571,32 +571,47 @@ define void @cttz_undef_4i64() #0 {
|
|||
}
|
||||
|
||||
define void @cttz_undef_4i32() #0 {
|
||||
; SSE2-LABEL: @cttz_undef_4i32(
|
||||
; SSE2-NEXT: [[LD0:%.*]] = load i32, i32* getelementptr inbounds ([8 x i32], [8 x i32]* @src32, i32 0, i64 0), align 4
|
||||
; SSE2-NEXT: [[LD1:%.*]] = load i32, i32* getelementptr inbounds ([8 x i32], [8 x i32]* @src32, i32 0, i64 1), align 4
|
||||
; SSE2-NEXT: [[LD2:%.*]] = load i32, i32* getelementptr inbounds ([8 x i32], [8 x i32]* @src32, i32 0, i64 2), align 4
|
||||
; SSE2-NEXT: [[LD3:%.*]] = load i32, i32* getelementptr inbounds ([8 x i32], [8 x i32]* @src32, i32 0, i64 3), align 4
|
||||
; SSE2-NEXT: [[CTTZ0:%.*]] = call i32 @llvm.cttz.i32(i32 [[LD0]], i1 true)
|
||||
; SSE2-NEXT: [[CTTZ1:%.*]] = call i32 @llvm.cttz.i32(i32 [[LD1]], i1 true)
|
||||
; SSE2-NEXT: [[CTTZ2:%.*]] = call i32 @llvm.cttz.i32(i32 [[LD2]], i1 true)
|
||||
; SSE2-NEXT: [[CTTZ3:%.*]] = call i32 @llvm.cttz.i32(i32 [[LD3]], i1 true)
|
||||
; SSE2-NEXT: store i32 [[CTTZ0]], i32* getelementptr inbounds ([8 x i32], [8 x i32]* @dst32, i32 0, i64 0), align 4
|
||||
; SSE2-NEXT: store i32 [[CTTZ1]], i32* getelementptr inbounds ([8 x i32], [8 x i32]* @dst32, i32 0, i64 1), align 4
|
||||
; SSE2-NEXT: store i32 [[CTTZ2]], i32* getelementptr inbounds ([8 x i32], [8 x i32]* @dst32, i32 0, i64 2), align 4
|
||||
; SSE2-NEXT: store i32 [[CTTZ3]], i32* getelementptr inbounds ([8 x i32], [8 x i32]* @dst32, i32 0, i64 3), align 4
|
||||
; SSE2-NEXT: ret void
|
||||
; SSE-LABEL: @cttz_undef_4i32(
|
||||
; SSE-NEXT: [[LD0:%.*]] = load i32, i32* getelementptr inbounds ([8 x i32], [8 x i32]* @src32, i32 0, i64 0), align 4
|
||||
; SSE-NEXT: [[LD1:%.*]] = load i32, i32* getelementptr inbounds ([8 x i32], [8 x i32]* @src32, i32 0, i64 1), align 4
|
||||
; SSE-NEXT: [[LD2:%.*]] = load i32, i32* getelementptr inbounds ([8 x i32], [8 x i32]* @src32, i32 0, i64 2), align 4
|
||||
; SSE-NEXT: [[LD3:%.*]] = load i32, i32* getelementptr inbounds ([8 x i32], [8 x i32]* @src32, i32 0, i64 3), align 4
|
||||
; SSE-NEXT: [[CTTZ0:%.*]] = call i32 @llvm.cttz.i32(i32 [[LD0]], i1 true)
|
||||
; SSE-NEXT: [[CTTZ1:%.*]] = call i32 @llvm.cttz.i32(i32 [[LD1]], i1 true)
|
||||
; SSE-NEXT: [[CTTZ2:%.*]] = call i32 @llvm.cttz.i32(i32 [[LD2]], i1 true)
|
||||
; SSE-NEXT: [[CTTZ3:%.*]] = call i32 @llvm.cttz.i32(i32 [[LD3]], i1 true)
|
||||
; SSE-NEXT: store i32 [[CTTZ0]], i32* getelementptr inbounds ([8 x i32], [8 x i32]* @dst32, i32 0, i64 0), align 4
|
||||
; SSE-NEXT: store i32 [[CTTZ1]], i32* getelementptr inbounds ([8 x i32], [8 x i32]* @dst32, i32 0, i64 1), align 4
|
||||
; SSE-NEXT: store i32 [[CTTZ2]], i32* getelementptr inbounds ([8 x i32], [8 x i32]* @dst32, i32 0, i64 2), align 4
|
||||
; SSE-NEXT: store i32 [[CTTZ3]], i32* getelementptr inbounds ([8 x i32], [8 x i32]* @dst32, i32 0, i64 3), align 4
|
||||
; SSE-NEXT: ret void
|
||||
;
|
||||
; SSE42-LABEL: @cttz_undef_4i32(
|
||||
; SSE42-NEXT: [[TMP1:%.*]] = load <4 x i32>, <4 x i32>* bitcast ([8 x i32]* @src32 to <4 x i32>*), align 4
|
||||
; SSE42-NEXT: [[TMP2:%.*]] = call <4 x i32> @llvm.cttz.v4i32(<4 x i32> [[TMP1]], i1 true)
|
||||
; SSE42-NEXT: store <4 x i32> [[TMP2]], <4 x i32>* bitcast ([8 x i32]* @dst32 to <4 x i32>*), align 4
|
||||
; SSE42-NEXT: ret void
|
||||
; AVX1-LABEL: @cttz_undef_4i32(
|
||||
; AVX1-NEXT: [[LD0:%.*]] = load i32, i32* getelementptr inbounds ([8 x i32], [8 x i32]* @src32, i32 0, i64 0), align 4
|
||||
; AVX1-NEXT: [[LD1:%.*]] = load i32, i32* getelementptr inbounds ([8 x i32], [8 x i32]* @src32, i32 0, i64 1), align 4
|
||||
; AVX1-NEXT: [[LD2:%.*]] = load i32, i32* getelementptr inbounds ([8 x i32], [8 x i32]* @src32, i32 0, i64 2), align 4
|
||||
; AVX1-NEXT: [[LD3:%.*]] = load i32, i32* getelementptr inbounds ([8 x i32], [8 x i32]* @src32, i32 0, i64 3), align 4
|
||||
; AVX1-NEXT: [[CTTZ0:%.*]] = call i32 @llvm.cttz.i32(i32 [[LD0]], i1 true)
|
||||
; AVX1-NEXT: [[CTTZ1:%.*]] = call i32 @llvm.cttz.i32(i32 [[LD1]], i1 true)
|
||||
; AVX1-NEXT: [[CTTZ2:%.*]] = call i32 @llvm.cttz.i32(i32 [[LD2]], i1 true)
|
||||
; AVX1-NEXT: [[CTTZ3:%.*]] = call i32 @llvm.cttz.i32(i32 [[LD3]], i1 true)
|
||||
; AVX1-NEXT: store i32 [[CTTZ0]], i32* getelementptr inbounds ([8 x i32], [8 x i32]* @dst32, i32 0, i64 0), align 4
|
||||
; AVX1-NEXT: store i32 [[CTTZ1]], i32* getelementptr inbounds ([8 x i32], [8 x i32]* @dst32, i32 0, i64 1), align 4
|
||||
; AVX1-NEXT: store i32 [[CTTZ2]], i32* getelementptr inbounds ([8 x i32], [8 x i32]* @dst32, i32 0, i64 2), align 4
|
||||
; AVX1-NEXT: store i32 [[CTTZ3]], i32* getelementptr inbounds ([8 x i32], [8 x i32]* @dst32, i32 0, i64 3), align 4
|
||||
; AVX1-NEXT: ret void
|
||||
;
|
||||
; AVX-LABEL: @cttz_undef_4i32(
|
||||
; AVX-NEXT: [[TMP1:%.*]] = load <4 x i32>, <4 x i32>* bitcast ([8 x i32]* @src32 to <4 x i32>*), align 4
|
||||
; AVX-NEXT: [[TMP2:%.*]] = call <4 x i32> @llvm.cttz.v4i32(<4 x i32> [[TMP1]], i1 true)
|
||||
; AVX-NEXT: store <4 x i32> [[TMP2]], <4 x i32>* bitcast ([8 x i32]* @dst32 to <4 x i32>*), align 4
|
||||
; AVX-NEXT: ret void
|
||||
; AVX2-LABEL: @cttz_undef_4i32(
|
||||
; AVX2-NEXT: [[TMP1:%.*]] = load <4 x i32>, <4 x i32>* bitcast ([8 x i32]* @src32 to <4 x i32>*), align 4
|
||||
; AVX2-NEXT: [[TMP2:%.*]] = call <4 x i32> @llvm.cttz.v4i32(<4 x i32> [[TMP1]], i1 true)
|
||||
; AVX2-NEXT: store <4 x i32> [[TMP2]], <4 x i32>* bitcast ([8 x i32]* @dst32 to <4 x i32>*), align 4
|
||||
; AVX2-NEXT: ret void
|
||||
;
|
||||
; AVX512-LABEL: @cttz_undef_4i32(
|
||||
; AVX512-NEXT: [[TMP1:%.*]] = load <4 x i32>, <4 x i32>* bitcast ([8 x i32]* @src32 to <4 x i32>*), align 4
|
||||
; AVX512-NEXT: [[TMP2:%.*]] = call <4 x i32> @llvm.cttz.v4i32(<4 x i32> [[TMP1]], i1 true)
|
||||
; AVX512-NEXT: store <4 x i32> [[TMP2]], <4 x i32>* bitcast ([8 x i32]* @dst32 to <4 x i32>*), align 4
|
||||
; AVX512-NEXT: ret void
|
||||
;
|
||||
%ld0 = load i32, i32* getelementptr inbounds ([8 x i32], [8 x i32]* @src32, i32 0, i64 0), align 4
|
||||
%ld1 = load i32, i32* getelementptr inbounds ([8 x i32], [8 x i32]* @src32, i32 0, i64 1), align 4
|
||||
|
@ -614,47 +629,71 @@ define void @cttz_undef_4i32() #0 {
|
|||
}
|
||||
|
||||
define void @cttz_undef_8i32() #0 {
|
||||
; SSE2-LABEL: @cttz_undef_8i32(
|
||||
; SSE2-NEXT: [[LD0:%.*]] = load i32, i32* getelementptr inbounds ([8 x i32], [8 x i32]* @src32, i32 0, i64 0), align 2
|
||||
; SSE2-NEXT: [[LD1:%.*]] = load i32, i32* getelementptr inbounds ([8 x i32], [8 x i32]* @src32, i32 0, i64 1), align 2
|
||||
; SSE2-NEXT: [[LD2:%.*]] = load i32, i32* getelementptr inbounds ([8 x i32], [8 x i32]* @src32, i32 0, i64 2), align 2
|
||||
; SSE2-NEXT: [[LD3:%.*]] = load i32, i32* getelementptr inbounds ([8 x i32], [8 x i32]* @src32, i32 0, i64 3), align 2
|
||||
; SSE2-NEXT: [[LD4:%.*]] = load i32, i32* getelementptr inbounds ([8 x i32], [8 x i32]* @src32, i32 0, i64 4), align 2
|
||||
; SSE2-NEXT: [[LD5:%.*]] = load i32, i32* getelementptr inbounds ([8 x i32], [8 x i32]* @src32, i32 0, i64 5), align 2
|
||||
; SSE2-NEXT: [[LD6:%.*]] = load i32, i32* getelementptr inbounds ([8 x i32], [8 x i32]* @src32, i32 0, i64 6), align 2
|
||||
; SSE2-NEXT: [[LD7:%.*]] = load i32, i32* getelementptr inbounds ([8 x i32], [8 x i32]* @src32, i32 0, i64 7), align 2
|
||||
; SSE2-NEXT: [[CTTZ0:%.*]] = call i32 @llvm.cttz.i32(i32 [[LD0]], i1 true)
|
||||
; SSE2-NEXT: [[CTTZ1:%.*]] = call i32 @llvm.cttz.i32(i32 [[LD1]], i1 true)
|
||||
; SSE2-NEXT: [[CTTZ2:%.*]] = call i32 @llvm.cttz.i32(i32 [[LD2]], i1 true)
|
||||
; SSE2-NEXT: [[CTTZ3:%.*]] = call i32 @llvm.cttz.i32(i32 [[LD3]], i1 true)
|
||||
; SSE2-NEXT: [[CTTZ4:%.*]] = call i32 @llvm.cttz.i32(i32 [[LD4]], i1 true)
|
||||
; SSE2-NEXT: [[CTTZ5:%.*]] = call i32 @llvm.cttz.i32(i32 [[LD5]], i1 true)
|
||||
; SSE2-NEXT: [[CTTZ6:%.*]] = call i32 @llvm.cttz.i32(i32 [[LD6]], i1 true)
|
||||
; SSE2-NEXT: [[CTTZ7:%.*]] = call i32 @llvm.cttz.i32(i32 [[LD7]], i1 true)
|
||||
; SSE2-NEXT: store i32 [[CTTZ0]], i32* getelementptr inbounds ([8 x i32], [8 x i32]* @dst32, i32 0, i64 0), align 2
|
||||
; SSE2-NEXT: store i32 [[CTTZ1]], i32* getelementptr inbounds ([8 x i32], [8 x i32]* @dst32, i32 0, i64 1), align 2
|
||||
; SSE2-NEXT: store i32 [[CTTZ2]], i32* getelementptr inbounds ([8 x i32], [8 x i32]* @dst32, i32 0, i64 2), align 2
|
||||
; SSE2-NEXT: store i32 [[CTTZ3]], i32* getelementptr inbounds ([8 x i32], [8 x i32]* @dst32, i32 0, i64 3), align 2
|
||||
; SSE2-NEXT: store i32 [[CTTZ4]], i32* getelementptr inbounds ([8 x i32], [8 x i32]* @dst32, i32 0, i64 4), align 2
|
||||
; SSE2-NEXT: store i32 [[CTTZ5]], i32* getelementptr inbounds ([8 x i32], [8 x i32]* @dst32, i32 0, i64 5), align 2
|
||||
; SSE2-NEXT: store i32 [[CTTZ6]], i32* getelementptr inbounds ([8 x i32], [8 x i32]* @dst32, i32 0, i64 6), align 2
|
||||
; SSE2-NEXT: store i32 [[CTTZ7]], i32* getelementptr inbounds ([8 x i32], [8 x i32]* @dst32, i32 0, i64 7), align 2
|
||||
; SSE2-NEXT: ret void
|
||||
; SSE-LABEL: @cttz_undef_8i32(
|
||||
; SSE-NEXT: [[LD0:%.*]] = load i32, i32* getelementptr inbounds ([8 x i32], [8 x i32]* @src32, i32 0, i64 0), align 2
|
||||
; SSE-NEXT: [[LD1:%.*]] = load i32, i32* getelementptr inbounds ([8 x i32], [8 x i32]* @src32, i32 0, i64 1), align 2
|
||||
; SSE-NEXT: [[LD2:%.*]] = load i32, i32* getelementptr inbounds ([8 x i32], [8 x i32]* @src32, i32 0, i64 2), align 2
|
||||
; SSE-NEXT: [[LD3:%.*]] = load i32, i32* getelementptr inbounds ([8 x i32], [8 x i32]* @src32, i32 0, i64 3), align 2
|
||||
; SSE-NEXT: [[LD4:%.*]] = load i32, i32* getelementptr inbounds ([8 x i32], [8 x i32]* @src32, i32 0, i64 4), align 2
|
||||
; SSE-NEXT: [[LD5:%.*]] = load i32, i32* getelementptr inbounds ([8 x i32], [8 x i32]* @src32, i32 0, i64 5), align 2
|
||||
; SSE-NEXT: [[LD6:%.*]] = load i32, i32* getelementptr inbounds ([8 x i32], [8 x i32]* @src32, i32 0, i64 6), align 2
|
||||
; SSE-NEXT: [[LD7:%.*]] = load i32, i32* getelementptr inbounds ([8 x i32], [8 x i32]* @src32, i32 0, i64 7), align 2
|
||||
; SSE-NEXT: [[CTTZ0:%.*]] = call i32 @llvm.cttz.i32(i32 [[LD0]], i1 true)
|
||||
; SSE-NEXT: [[CTTZ1:%.*]] = call i32 @llvm.cttz.i32(i32 [[LD1]], i1 true)
|
||||
; SSE-NEXT: [[CTTZ2:%.*]] = call i32 @llvm.cttz.i32(i32 [[LD2]], i1 true)
|
||||
; SSE-NEXT: [[CTTZ3:%.*]] = call i32 @llvm.cttz.i32(i32 [[LD3]], i1 true)
|
||||
; SSE-NEXT: [[CTTZ4:%.*]] = call i32 @llvm.cttz.i32(i32 [[LD4]], i1 true)
|
||||
; SSE-NEXT: [[CTTZ5:%.*]] = call i32 @llvm.cttz.i32(i32 [[LD5]], i1 true)
|
||||
; SSE-NEXT: [[CTTZ6:%.*]] = call i32 @llvm.cttz.i32(i32 [[LD6]], i1 true)
|
||||
; SSE-NEXT: [[CTTZ7:%.*]] = call i32 @llvm.cttz.i32(i32 [[LD7]], i1 true)
|
||||
; SSE-NEXT: store i32 [[CTTZ0]], i32* getelementptr inbounds ([8 x i32], [8 x i32]* @dst32, i32 0, i64 0), align 2
|
||||
; SSE-NEXT: store i32 [[CTTZ1]], i32* getelementptr inbounds ([8 x i32], [8 x i32]* @dst32, i32 0, i64 1), align 2
|
||||
; SSE-NEXT: store i32 [[CTTZ2]], i32* getelementptr inbounds ([8 x i32], [8 x i32]* @dst32, i32 0, i64 2), align 2
|
||||
; SSE-NEXT: store i32 [[CTTZ3]], i32* getelementptr inbounds ([8 x i32], [8 x i32]* @dst32, i32 0, i64 3), align 2
|
||||
; SSE-NEXT: store i32 [[CTTZ4]], i32* getelementptr inbounds ([8 x i32], [8 x i32]* @dst32, i32 0, i64 4), align 2
|
||||
; SSE-NEXT: store i32 [[CTTZ5]], i32* getelementptr inbounds ([8 x i32], [8 x i32]* @dst32, i32 0, i64 5), align 2
|
||||
; SSE-NEXT: store i32 [[CTTZ6]], i32* getelementptr inbounds ([8 x i32], [8 x i32]* @dst32, i32 0, i64 6), align 2
|
||||
; SSE-NEXT: store i32 [[CTTZ7]], i32* getelementptr inbounds ([8 x i32], [8 x i32]* @dst32, i32 0, i64 7), align 2
|
||||
; SSE-NEXT: ret void
|
||||
;
|
||||
; SSE42-LABEL: @cttz_undef_8i32(
|
||||
; SSE42-NEXT: [[TMP1:%.*]] = load <4 x i32>, <4 x i32>* bitcast ([8 x i32]* @src32 to <4 x i32>*), align 2
|
||||
; SSE42-NEXT: [[TMP2:%.*]] = call <4 x i32> @llvm.cttz.v4i32(<4 x i32> [[TMP1]], i1 true)
|
||||
; SSE42-NEXT: store <4 x i32> [[TMP2]], <4 x i32>* bitcast ([8 x i32]* @dst32 to <4 x i32>*), align 2
|
||||
; SSE42-NEXT: [[TMP3:%.*]] = load <4 x i32>, <4 x i32>* bitcast (i32* getelementptr inbounds ([8 x i32], [8 x i32]* @src32, i32 0, i64 4) to <4 x i32>*), align 2
|
||||
; SSE42-NEXT: [[TMP4:%.*]] = call <4 x i32> @llvm.cttz.v4i32(<4 x i32> [[TMP3]], i1 true)
|
||||
; SSE42-NEXT: store <4 x i32> [[TMP4]], <4 x i32>* bitcast (i32* getelementptr inbounds ([8 x i32], [8 x i32]* @dst32, i32 0, i64 4) to <4 x i32>*), align 2
|
||||
; SSE42-NEXT: ret void
|
||||
; AVX1-LABEL: @cttz_undef_8i32(
|
||||
; AVX1-NEXT: [[LD0:%.*]] = load i32, i32* getelementptr inbounds ([8 x i32], [8 x i32]* @src32, i32 0, i64 0), align 2
|
||||
; AVX1-NEXT: [[LD1:%.*]] = load i32, i32* getelementptr inbounds ([8 x i32], [8 x i32]* @src32, i32 0, i64 1), align 2
|
||||
; AVX1-NEXT: [[LD2:%.*]] = load i32, i32* getelementptr inbounds ([8 x i32], [8 x i32]* @src32, i32 0, i64 2), align 2
|
||||
; AVX1-NEXT: [[LD3:%.*]] = load i32, i32* getelementptr inbounds ([8 x i32], [8 x i32]* @src32, i32 0, i64 3), align 2
|
||||
; AVX1-NEXT: [[LD4:%.*]] = load i32, i32* getelementptr inbounds ([8 x i32], [8 x i32]* @src32, i32 0, i64 4), align 2
|
||||
; AVX1-NEXT: [[LD5:%.*]] = load i32, i32* getelementptr inbounds ([8 x i32], [8 x i32]* @src32, i32 0, i64 5), align 2
|
||||
; AVX1-NEXT: [[LD6:%.*]] = load i32, i32* getelementptr inbounds ([8 x i32], [8 x i32]* @src32, i32 0, i64 6), align 2
|
||||
; AVX1-NEXT: [[LD7:%.*]] = load i32, i32* getelementptr inbounds ([8 x i32], [8 x i32]* @src32, i32 0, i64 7), align 2
|
||||
; AVX1-NEXT: [[CTTZ0:%.*]] = call i32 @llvm.cttz.i32(i32 [[LD0]], i1 true)
|
||||
; AVX1-NEXT: [[CTTZ1:%.*]] = call i32 @llvm.cttz.i32(i32 [[LD1]], i1 true)
|
||||
; AVX1-NEXT: [[CTTZ2:%.*]] = call i32 @llvm.cttz.i32(i32 [[LD2]], i1 true)
|
||||
; AVX1-NEXT: [[CTTZ3:%.*]] = call i32 @llvm.cttz.i32(i32 [[LD3]], i1 true)
|
||||
; AVX1-NEXT: [[CTTZ4:%.*]] = call i32 @llvm.cttz.i32(i32 [[LD4]], i1 true)
|
||||
; AVX1-NEXT: [[CTTZ5:%.*]] = call i32 @llvm.cttz.i32(i32 [[LD5]], i1 true)
|
||||
; AVX1-NEXT: [[CTTZ6:%.*]] = call i32 @llvm.cttz.i32(i32 [[LD6]], i1 true)
|
||||
; AVX1-NEXT: [[CTTZ7:%.*]] = call i32 @llvm.cttz.i32(i32 [[LD7]], i1 true)
|
||||
; AVX1-NEXT: store i32 [[CTTZ0]], i32* getelementptr inbounds ([8 x i32], [8 x i32]* @dst32, i32 0, i64 0), align 2
|
||||
; AVX1-NEXT: store i32 [[CTTZ1]], i32* getelementptr inbounds ([8 x i32], [8 x i32]* @dst32, i32 0, i64 1), align 2
|
||||
; AVX1-NEXT: store i32 [[CTTZ2]], i32* getelementptr inbounds ([8 x i32], [8 x i32]* @dst32, i32 0, i64 2), align 2
|
||||
; AVX1-NEXT: store i32 [[CTTZ3]], i32* getelementptr inbounds ([8 x i32], [8 x i32]* @dst32, i32 0, i64 3), align 2
|
||||
; AVX1-NEXT: store i32 [[CTTZ4]], i32* getelementptr inbounds ([8 x i32], [8 x i32]* @dst32, i32 0, i64 4), align 2
|
||||
; AVX1-NEXT: store i32 [[CTTZ5]], i32* getelementptr inbounds ([8 x i32], [8 x i32]* @dst32, i32 0, i64 5), align 2
|
||||
; AVX1-NEXT: store i32 [[CTTZ6]], i32* getelementptr inbounds ([8 x i32], [8 x i32]* @dst32, i32 0, i64 6), align 2
|
||||
; AVX1-NEXT: store i32 [[CTTZ7]], i32* getelementptr inbounds ([8 x i32], [8 x i32]* @dst32, i32 0, i64 7), align 2
|
||||
; AVX1-NEXT: ret void
|
||||
;
|
||||
; AVX-LABEL: @cttz_undef_8i32(
|
||||
; AVX-NEXT: [[TMP1:%.*]] = load <8 x i32>, <8 x i32>* bitcast ([8 x i32]* @src32 to <8 x i32>*), align 2
|
||||
; AVX-NEXT: [[TMP2:%.*]] = call <8 x i32> @llvm.cttz.v8i32(<8 x i32> [[TMP1]], i1 true)
|
||||
; AVX-NEXT: store <8 x i32> [[TMP2]], <8 x i32>* bitcast ([8 x i32]* @dst32 to <8 x i32>*), align 2
|
||||
; AVX-NEXT: ret void
|
||||
; AVX2-LABEL: @cttz_undef_8i32(
|
||||
; AVX2-NEXT: [[TMP1:%.*]] = load <8 x i32>, <8 x i32>* bitcast ([8 x i32]* @src32 to <8 x i32>*), align 2
|
||||
; AVX2-NEXT: [[TMP2:%.*]] = call <8 x i32> @llvm.cttz.v8i32(<8 x i32> [[TMP1]], i1 true)
|
||||
; AVX2-NEXT: store <8 x i32> [[TMP2]], <8 x i32>* bitcast ([8 x i32]* @dst32 to <8 x i32>*), align 2
|
||||
; AVX2-NEXT: ret void
|
||||
;
|
||||
; AVX512-LABEL: @cttz_undef_8i32(
|
||||
; AVX512-NEXT: [[TMP1:%.*]] = load <8 x i32>, <8 x i32>* bitcast ([8 x i32]* @src32 to <8 x i32>*), align 2
|
||||
; AVX512-NEXT: [[TMP2:%.*]] = call <8 x i32> @llvm.cttz.v8i32(<8 x i32> [[TMP1]], i1 true)
|
||||
; AVX512-NEXT: store <8 x i32> [[TMP2]], <8 x i32>* bitcast ([8 x i32]* @dst32 to <8 x i32>*), align 2
|
||||
; AVX512-NEXT: ret void
|
||||
;
|
||||
%ld0 = load i32, i32* getelementptr inbounds ([8 x i32], [8 x i32]* @src32, i32 0, i64 0), align 2
|
||||
%ld1 = load i32, i32* getelementptr inbounds ([8 x i32], [8 x i32]* @src32, i32 0, i64 1), align 2
|
||||
|
|
Loading…
Reference in New Issue