[X86][SSE] (Reapplied) Replace (V)PMOVSX and (V)PMOVZX integer extension intrinsics with generic IR (llvm)

This patch removes the llvm intrinsics VPMOVSX and (V)PMOVZX sign/zero extension intrinsics and auto-upgrades to SEXT/ZEXT calls instead. We already did this for SSE41 PMOVSX sometime ago so much of that implementation can be reused.

Reapplied now that the the companion patch (D20684) removes/auto-upgrade the clang intrinsics has been committed.

Differential Revision: http://reviews.llvm.org/D20686

llvm-svn: 271131
This commit is contained in:
Simon Pilgrim 2016-05-28 18:03:41 +00:00
parent 328b6d3903
commit 9602d678cb
17 changed files with 362 additions and 858 deletions

View File

@ -756,28 +756,6 @@ let TargetPrefix = "x86" in { // All intrinsics start with "llvm.x86.".
llvm_i32_ty], [IntrNoMem]>;
}
// Vector sign and zero extend
let TargetPrefix = "x86" in { // All intrinsics start with "llvm.x86.".
def int_x86_sse41_pmovzxbd : GCCBuiltin<"__builtin_ia32_pmovzxbd128">,
Intrinsic<[llvm_v4i32_ty], [llvm_v16i8_ty],
[IntrNoMem]>;
def int_x86_sse41_pmovzxbq : GCCBuiltin<"__builtin_ia32_pmovzxbq128">,
Intrinsic<[llvm_v2i64_ty], [llvm_v16i8_ty],
[IntrNoMem]>;
def int_x86_sse41_pmovzxbw : GCCBuiltin<"__builtin_ia32_pmovzxbw128">,
Intrinsic<[llvm_v8i16_ty], [llvm_v16i8_ty],
[IntrNoMem]>;
def int_x86_sse41_pmovzxdq : GCCBuiltin<"__builtin_ia32_pmovzxdq128">,
Intrinsic<[llvm_v2i64_ty], [llvm_v4i32_ty],
[IntrNoMem]>;
def int_x86_sse41_pmovzxwd : GCCBuiltin<"__builtin_ia32_pmovzxwd128">,
Intrinsic<[llvm_v4i32_ty], [llvm_v8i16_ty],
[IntrNoMem]>;
def int_x86_sse41_pmovzxwq : GCCBuiltin<"__builtin_ia32_pmovzxwq128">,
Intrinsic<[llvm_v2i64_ty], [llvm_v8i16_ty],
[IntrNoMem]>;
}
// Vector min element
let TargetPrefix = "x86" in { // All intrinsics start with "llvm.x86.".
def int_x86_sse41_phminposuw : GCCBuiltin<"__builtin_ia32_phminposuw128">,
@ -2866,46 +2844,6 @@ let TargetPrefix = "x86" in { // All intrinsics start with "llvm.x86.".
llvm_v32i16_ty, llvm_i32_ty], [IntrNoMem]>;
}
// Vector sign and zero extend
let TargetPrefix = "x86" in { // All intrinsics start with "llvm.x86.".
def int_x86_avx2_pmovsxbd : GCCBuiltin<"__builtin_ia32_pmovsxbd256">,
Intrinsic<[llvm_v8i32_ty], [llvm_v16i8_ty],
[IntrNoMem]>;
def int_x86_avx2_pmovsxbq : GCCBuiltin<"__builtin_ia32_pmovsxbq256">,
Intrinsic<[llvm_v4i64_ty], [llvm_v16i8_ty],
[IntrNoMem]>;
def int_x86_avx2_pmovsxbw : GCCBuiltin<"__builtin_ia32_pmovsxbw256">,
Intrinsic<[llvm_v16i16_ty], [llvm_v16i8_ty],
[IntrNoMem]>;
def int_x86_avx2_pmovsxdq : GCCBuiltin<"__builtin_ia32_pmovsxdq256">,
Intrinsic<[llvm_v4i64_ty], [llvm_v4i32_ty],
[IntrNoMem]>;
def int_x86_avx2_pmovsxwd : GCCBuiltin<"__builtin_ia32_pmovsxwd256">,
Intrinsic<[llvm_v8i32_ty], [llvm_v8i16_ty],
[IntrNoMem]>;
def int_x86_avx2_pmovsxwq : GCCBuiltin<"__builtin_ia32_pmovsxwq256">,
Intrinsic<[llvm_v4i64_ty], [llvm_v8i16_ty],
[IntrNoMem]>;
def int_x86_avx2_pmovzxbd : GCCBuiltin<"__builtin_ia32_pmovzxbd256">,
Intrinsic<[llvm_v8i32_ty], [llvm_v16i8_ty],
[IntrNoMem]>;
def int_x86_avx2_pmovzxbq : GCCBuiltin<"__builtin_ia32_pmovzxbq256">,
Intrinsic<[llvm_v4i64_ty], [llvm_v16i8_ty],
[IntrNoMem]>;
def int_x86_avx2_pmovzxbw : GCCBuiltin<"__builtin_ia32_pmovzxbw256">,
Intrinsic<[llvm_v16i16_ty], [llvm_v16i8_ty],
[IntrNoMem]>;
def int_x86_avx2_pmovzxdq : GCCBuiltin<"__builtin_ia32_pmovzxdq256">,
Intrinsic<[llvm_v4i64_ty], [llvm_v4i32_ty],
[IntrNoMem]>;
def int_x86_avx2_pmovzxwd : GCCBuiltin<"__builtin_ia32_pmovzxwd256">,
Intrinsic<[llvm_v8i32_ty], [llvm_v8i16_ty],
[IntrNoMem]>;
def int_x86_avx2_pmovzxwq : GCCBuiltin<"__builtin_ia32_pmovzxwq256">,
Intrinsic<[llvm_v4i64_ty], [llvm_v8i16_ty],
[IntrNoMem]>;
}
// Vector blend
let TargetPrefix = "x86" in { // All intrinsics start with "llvm.x86.".
def int_x86_avx2_pblendvb : GCCBuiltin<"__builtin_ia32_pblendvb256">,

View File

@ -178,6 +178,9 @@ static bool UpgradeIntrinsicFunction1(Function *F, Function *&NewFn) {
Name.startswith("x86.avx2.pbroadcast") ||
Name.startswith("x86.avx.vpermil.") ||
Name.startswith("x86.sse41.pmovsx") ||
Name.startswith("x86.sse41.pmovzx") ||
Name.startswith("x86.avx2.pmovsx") ||
Name.startswith("x86.avx2.pmovzx") ||
Name == "x86.sse2.cvtdq2pd" ||
Name == "x86.sse2.cvtps2pd" ||
Name == "x86.avx.cvtdq2.pd.256" ||
@ -544,19 +547,25 @@ void llvm::UpgradeIntrinsicCall(CallInst *CI, Function *NewFn) {
for (unsigned I = 0; I < EltNum; ++I)
Rep = Builder.CreateInsertElement(Rep, Load,
ConstantInt::get(I32Ty, I));
} else if (Name.startswith("llvm.x86.sse41.pmovsx")) {
} else if (Name.startswith("llvm.x86.sse41.pmovsx") ||
Name.startswith("llvm.x86.sse41.pmovzx") ||
Name.startswith("llvm.x86.avx2.pmovsx") ||
Name.startswith("llvm.x86.avx2.pmovzx")) {
VectorType *SrcTy = cast<VectorType>(CI->getArgOperand(0)->getType());
VectorType *DstTy = cast<VectorType>(CI->getType());
unsigned NumDstElts = DstTy->getNumElements();
// Extract a subvector of the first NumDstElts lanes and sign extend.
// Extract a subvector of the first NumDstElts lanes and sign/zero extend.
SmallVector<int, 8> ShuffleMask;
for (int i = 0; i != (int)NumDstElts; ++i)
ShuffleMask.push_back(i);
Value *SV = Builder.CreateShuffleVector(
CI->getArgOperand(0), UndefValue::get(SrcTy), ShuffleMask);
Rep = Builder.CreateSExt(SV, DstTy);
bool DoSext = (StringRef::npos != Name.find("pmovsx"));
Rep = DoSext ? Builder.CreateSExt(SV, DstTy)
: Builder.CreateZExt(SV, DstTy);
} else if (Name == "llvm.x86.avx2.vbroadcasti128") {
// Replace vbroadcasts with a vector shuffle.
Type *VT = VectorType::get(Type::getInt64Ty(C), 2);

View File

@ -361,18 +361,6 @@ static const IntrinsicData IntrinsicsWithoutChain[] = {
X86_INTRINSIC_DATA(avx2_pminu_d, INTR_TYPE_2OP, ISD::UMIN, 0),
X86_INTRINSIC_DATA(avx2_pminu_w, INTR_TYPE_2OP, ISD::UMIN, 0),
X86_INTRINSIC_DATA(avx2_pmovmskb, INTR_TYPE_1OP, X86ISD::MOVMSK, 0),
X86_INTRINSIC_DATA(avx2_pmovsxbd, INTR_TYPE_1OP, X86ISD::VSEXT, 0),
X86_INTRINSIC_DATA(avx2_pmovsxbq, INTR_TYPE_1OP, X86ISD::VSEXT, 0),
X86_INTRINSIC_DATA(avx2_pmovsxbw, INTR_TYPE_1OP, X86ISD::VSEXT, 0),
X86_INTRINSIC_DATA(avx2_pmovsxdq, INTR_TYPE_1OP, X86ISD::VSEXT, 0),
X86_INTRINSIC_DATA(avx2_pmovsxwd, INTR_TYPE_1OP, X86ISD::VSEXT, 0),
X86_INTRINSIC_DATA(avx2_pmovsxwq, INTR_TYPE_1OP, X86ISD::VSEXT, 0),
X86_INTRINSIC_DATA(avx2_pmovzxbd, INTR_TYPE_1OP, X86ISD::VZEXT, 0),
X86_INTRINSIC_DATA(avx2_pmovzxbq, INTR_TYPE_1OP, X86ISD::VZEXT, 0),
X86_INTRINSIC_DATA(avx2_pmovzxbw, INTR_TYPE_1OP, X86ISD::VZEXT, 0),
X86_INTRINSIC_DATA(avx2_pmovzxdq, INTR_TYPE_1OP, X86ISD::VZEXT, 0),
X86_INTRINSIC_DATA(avx2_pmovzxwd, INTR_TYPE_1OP, X86ISD::VZEXT, 0),
X86_INTRINSIC_DATA(avx2_pmovzxwq, INTR_TYPE_1OP, X86ISD::VZEXT, 0),
X86_INTRINSIC_DATA(avx2_pmul_dq, INTR_TYPE_2OP, X86ISD::PMULDQ, 0),
X86_INTRINSIC_DATA(avx2_pmulh_w, INTR_TYPE_2OP, ISD::MULHS, 0),
X86_INTRINSIC_DATA(avx2_pmulhu_w, INTR_TYPE_2OP, ISD::MULHU, 0),
@ -2288,12 +2276,6 @@ static const IntrinsicData IntrinsicsWithoutChain[] = {
X86_INTRINSIC_DATA(sse41_pminsd, INTR_TYPE_2OP, ISD::SMIN, 0),
X86_INTRINSIC_DATA(sse41_pminud, INTR_TYPE_2OP, ISD::UMIN, 0),
X86_INTRINSIC_DATA(sse41_pminuw, INTR_TYPE_2OP, ISD::UMIN, 0),
X86_INTRINSIC_DATA(sse41_pmovzxbd, INTR_TYPE_1OP, X86ISD::VZEXT, 0),
X86_INTRINSIC_DATA(sse41_pmovzxbq, INTR_TYPE_1OP, X86ISD::VZEXT, 0),
X86_INTRINSIC_DATA(sse41_pmovzxbw, INTR_TYPE_1OP, X86ISD::VZEXT, 0),
X86_INTRINSIC_DATA(sse41_pmovzxdq, INTR_TYPE_1OP, X86ISD::VZEXT, 0),
X86_INTRINSIC_DATA(sse41_pmovzxwd, INTR_TYPE_1OP, X86ISD::VZEXT, 0),
X86_INTRINSIC_DATA(sse41_pmovzxwq, INTR_TYPE_1OP, X86ISD::VZEXT, 0),
X86_INTRINSIC_DATA(sse41_pmuldq, INTR_TYPE_2OP, X86ISD::PMULDQ, 0),
X86_INTRINSIC_DATA(sse4a_extrqi, INTR_TYPE_3OP, X86ISD::EXTRQI, 0),
X86_INTRINSIC_DATA(sse4a_insertqi, INTR_TYPE_4OP, X86ISD::INSERTQI, 0),

View File

@ -325,24 +325,6 @@ static Value *simplifyX86immShift(const IntrinsicInst &II,
return Builder.CreateAShr(Vec, ShiftVec);
}
static Value *simplifyX86extend(const IntrinsicInst &II,
InstCombiner::BuilderTy &Builder,
bool SignExtend) {
VectorType *SrcTy = cast<VectorType>(II.getArgOperand(0)->getType());
VectorType *DstTy = cast<VectorType>(II.getType());
unsigned NumDstElts = DstTy->getNumElements();
// Extract a subvector of the first NumDstElts lanes and sign/zero extend.
SmallVector<int, 8> ShuffleMask;
for (int i = 0; i != (int)NumDstElts; ++i)
ShuffleMask.push_back(i);
Value *SV = Builder.CreateShuffleVector(II.getArgOperand(0),
UndefValue::get(SrcTy), ShuffleMask);
return SignExtend ? Builder.CreateSExt(SV, DstTy)
: Builder.CreateZExt(SV, DstTy);
}
static Value *simplifyX86insertps(const IntrinsicInst &II,
InstCombiner::BuilderTy &Builder) {
auto *CInt = dyn_cast<ConstantInt>(II.getArgOperand(2));
@ -1649,32 +1631,6 @@ Instruction *InstCombiner::visitCallInst(CallInst &CI) {
break;
}
case Intrinsic::x86_avx2_pmovsxbd:
case Intrinsic::x86_avx2_pmovsxbq:
case Intrinsic::x86_avx2_pmovsxbw:
case Intrinsic::x86_avx2_pmovsxdq:
case Intrinsic::x86_avx2_pmovsxwd:
case Intrinsic::x86_avx2_pmovsxwq:
if (Value *V = simplifyX86extend(*II, *Builder, true))
return replaceInstUsesWith(*II, V);
break;
case Intrinsic::x86_sse41_pmovzxbd:
case Intrinsic::x86_sse41_pmovzxbq:
case Intrinsic::x86_sse41_pmovzxbw:
case Intrinsic::x86_sse41_pmovzxdq:
case Intrinsic::x86_sse41_pmovzxwd:
case Intrinsic::x86_sse41_pmovzxwq:
case Intrinsic::x86_avx2_pmovzxbd:
case Intrinsic::x86_avx2_pmovzxbq:
case Intrinsic::x86_avx2_pmovzxbw:
case Intrinsic::x86_avx2_pmovzxdq:
case Intrinsic::x86_avx2_pmovzxwd:
case Intrinsic::x86_avx2_pmovzxwq:
if (Value *V = simplifyX86extend(*II, *Builder, false))
return replaceInstUsesWith(*II, V);
break;
case Intrinsic::x86_sse41_insertps:
if (Value *V = simplifyX86insertps(*II, *Builder))
return replaceInstUsesWith(*II, V);

View File

@ -247,6 +247,72 @@ define <2 x i64> @test_x86_sse41_pmovsxwq(<8 x i16> %a0) {
declare <2 x i64> @llvm.x86.sse41.pmovsxwq(<8 x i16>) nounwind readnone
define <4 x i32> @test_x86_sse41_pmovzxbd(<16 x i8> %a0) {
; CHECK-LABEL: test_x86_sse41_pmovzxbd:
; CHECK: ## BB#0:
; CHECK-NEXT: vpmovzxbd {{.*#+}} xmm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero
; CHECK-NEXT: retl
%res = call <4 x i32> @llvm.x86.sse41.pmovzxbd(<16 x i8> %a0) ; <<4 x i32>> [#uses=1]
ret <4 x i32> %res
}
declare <4 x i32> @llvm.x86.sse41.pmovzxbd(<16 x i8>) nounwind readnone
define <2 x i64> @test_x86_sse41_pmovzxbq(<16 x i8> %a0) {
; CHECK-LABEL: test_x86_sse41_pmovzxbq:
; CHECK: ## BB#0:
; CHECK-NEXT: vpmovzxbq {{.*#+}} xmm0 = xmm0[0],zero,zero,zero,zero,zero,zero,zero,xmm0[1],zero,zero,zero,zero,zero,zero,zero
; CHECK-NEXT: retl
%res = call <2 x i64> @llvm.x86.sse41.pmovzxbq(<16 x i8> %a0) ; <<2 x i64>> [#uses=1]
ret <2 x i64> %res
}
declare <2 x i64> @llvm.x86.sse41.pmovzxbq(<16 x i8>) nounwind readnone
define <8 x i16> @test_x86_sse41_pmovzxbw(<16 x i8> %a0) {
; CHECK-LABEL: test_x86_sse41_pmovzxbw:
; CHECK: ## BB#0:
; CHECK-NEXT: vpmovzxbw {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero
; CHECK-NEXT: retl
%res = call <8 x i16> @llvm.x86.sse41.pmovzxbw(<16 x i8> %a0) ; <<8 x i16>> [#uses=1]
ret <8 x i16> %res
}
declare <8 x i16> @llvm.x86.sse41.pmovzxbw(<16 x i8>) nounwind readnone
define <2 x i64> @test_x86_sse41_pmovzxdq(<4 x i32> %a0) {
; CHECK-LABEL: test_x86_sse41_pmovzxdq:
; CHECK: ## BB#0:
; CHECK-NEXT: vpmovzxdq {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero
; CHECK-NEXT: retl
%res = call <2 x i64> @llvm.x86.sse41.pmovzxdq(<4 x i32> %a0) ; <<2 x i64>> [#uses=1]
ret <2 x i64> %res
}
declare <2 x i64> @llvm.x86.sse41.pmovzxdq(<4 x i32>) nounwind readnone
define <4 x i32> @test_x86_sse41_pmovzxwd(<8 x i16> %a0) {
; CHECK-LABEL: test_x86_sse41_pmovzxwd:
; CHECK: ## BB#0:
; CHECK-NEXT: vpmovzxwd {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero
; CHECK-NEXT: retl
%res = call <4 x i32> @llvm.x86.sse41.pmovzxwd(<8 x i16> %a0) ; <<4 x i32>> [#uses=1]
ret <4 x i32> %res
}
declare <4 x i32> @llvm.x86.sse41.pmovzxwd(<8 x i16>) nounwind readnone
define <2 x i64> @test_x86_sse41_pmovzxwq(<8 x i16> %a0) {
; CHECK-LABEL: test_x86_sse41_pmovzxwq:
; CHECK: ## BB#0:
; CHECK-NEXT: vpmovzxwq {{.*#+}} xmm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero
; CHECK-NEXT: retl
%res = call <2 x i64> @llvm.x86.sse41.pmovzxwq(<8 x i16> %a0) ; <<2 x i64>> [#uses=1]
ret <2 x i64> %res
}
declare <2 x i64> @llvm.x86.sse41.pmovzxwq(<8 x i16>) nounwind readnone
define <2 x double> @test_x86_sse2_cvtdq2pd(<4 x i32> %a0) {
; CHECK-LABEL: test_x86_sse2_cvtdq2pd:
; CHECK: ## BB#0:

View File

@ -1,4 +1,4 @@
; NOTE: Assertions have been autogenerated by update_llc_test_checks.py
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
; RUN: llc < %s -mtriple=i686-apple-darwin -mattr=avx,aes,pclmul | FileCheck %s --check-prefix=AVX
; RUN: llc < %s -mtriple=i686-apple-darwin -mattr=avx512vl,aes,pclmul | FileCheck %s --check-prefix=AVX512VL
@ -1800,102 +1800,6 @@ define <8 x i16> @test_x86_sse41_pminuw(<8 x i16> %a0, <8 x i16> %a1) {
declare <8 x i16> @llvm.x86.sse41.pminuw(<8 x i16>, <8 x i16>) nounwind readnone
define <4 x i32> @test_x86_sse41_pmovzxbd(<16 x i8> %a0) {
; AVX-LABEL: test_x86_sse41_pmovzxbd:
; AVX: ## BB#0:
; AVX-NEXT: vpmovzxbd {{.*#+}} xmm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero
; AVX-NEXT: retl
;
; AVX512VL-LABEL: test_x86_sse41_pmovzxbd:
; AVX512VL: ## BB#0:
; AVX512VL-NEXT: vpmovzxbd {{.*#+}} xmm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero
; AVX512VL-NEXT: retl
%res = call <4 x i32> @llvm.x86.sse41.pmovzxbd(<16 x i8> %a0) ; <<4 x i32>> [#uses=1]
ret <4 x i32> %res
}
declare <4 x i32> @llvm.x86.sse41.pmovzxbd(<16 x i8>) nounwind readnone
define <2 x i64> @test_x86_sse41_pmovzxbq(<16 x i8> %a0) {
; AVX-LABEL: test_x86_sse41_pmovzxbq:
; AVX: ## BB#0:
; AVX-NEXT: vpmovzxbq {{.*#+}} xmm0 = xmm0[0],zero,zero,zero,zero,zero,zero,zero,xmm0[1],zero,zero,zero,zero,zero,zero,zero
; AVX-NEXT: retl
;
; AVX512VL-LABEL: test_x86_sse41_pmovzxbq:
; AVX512VL: ## BB#0:
; AVX512VL-NEXT: vpmovzxbq {{.*#+}} xmm0 = xmm0[0],zero,zero,zero,zero,zero,zero,zero,xmm0[1],zero,zero,zero,zero,zero,zero,zero
; AVX512VL-NEXT: retl
%res = call <2 x i64> @llvm.x86.sse41.pmovzxbq(<16 x i8> %a0) ; <<2 x i64>> [#uses=1]
ret <2 x i64> %res
}
declare <2 x i64> @llvm.x86.sse41.pmovzxbq(<16 x i8>) nounwind readnone
define <8 x i16> @test_x86_sse41_pmovzxbw(<16 x i8> %a0) {
; AVX-LABEL: test_x86_sse41_pmovzxbw:
; AVX: ## BB#0:
; AVX-NEXT: vpmovzxbw {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero
; AVX-NEXT: retl
;
; AVX512VL-LABEL: test_x86_sse41_pmovzxbw:
; AVX512VL: ## BB#0:
; AVX512VL-NEXT: vpmovzxbw {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero
; AVX512VL-NEXT: retl
%res = call <8 x i16> @llvm.x86.sse41.pmovzxbw(<16 x i8> %a0) ; <<8 x i16>> [#uses=1]
ret <8 x i16> %res
}
declare <8 x i16> @llvm.x86.sse41.pmovzxbw(<16 x i8>) nounwind readnone
define <2 x i64> @test_x86_sse41_pmovzxdq(<4 x i32> %a0) {
; AVX-LABEL: test_x86_sse41_pmovzxdq:
; AVX: ## BB#0:
; AVX-NEXT: vpmovzxdq {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero
; AVX-NEXT: retl
;
; AVX512VL-LABEL: test_x86_sse41_pmovzxdq:
; AVX512VL: ## BB#0:
; AVX512VL-NEXT: vpmovzxdq {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero
; AVX512VL-NEXT: retl
%res = call <2 x i64> @llvm.x86.sse41.pmovzxdq(<4 x i32> %a0) ; <<2 x i64>> [#uses=1]
ret <2 x i64> %res
}
declare <2 x i64> @llvm.x86.sse41.pmovzxdq(<4 x i32>) nounwind readnone
define <4 x i32> @test_x86_sse41_pmovzxwd(<8 x i16> %a0) {
; AVX-LABEL: test_x86_sse41_pmovzxwd:
; AVX: ## BB#0:
; AVX-NEXT: vpmovzxwd {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero
; AVX-NEXT: retl
;
; AVX512VL-LABEL: test_x86_sse41_pmovzxwd:
; AVX512VL: ## BB#0:
; AVX512VL-NEXT: vpmovzxwd {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero
; AVX512VL-NEXT: retl
%res = call <4 x i32> @llvm.x86.sse41.pmovzxwd(<8 x i16> %a0) ; <<4 x i32>> [#uses=1]
ret <4 x i32> %res
}
declare <4 x i32> @llvm.x86.sse41.pmovzxwd(<8 x i16>) nounwind readnone
define <2 x i64> @test_x86_sse41_pmovzxwq(<8 x i16> %a0) {
; AVX-LABEL: test_x86_sse41_pmovzxwq:
; AVX: ## BB#0:
; AVX-NEXT: vpmovzxwq {{.*#+}} xmm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero
; AVX-NEXT: retl
;
; AVX512VL-LABEL: test_x86_sse41_pmovzxwq:
; AVX512VL: ## BB#0:
; AVX512VL-NEXT: vpmovzxwq {{.*#+}} xmm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero
; AVX512VL-NEXT: retl
%res = call <2 x i64> @llvm.x86.sse41.pmovzxwq(<8 x i16> %a0) ; <<2 x i64>> [#uses=1]
ret <2 x i64> %res
}
declare <2 x i64> @llvm.x86.sse41.pmovzxwq(<8 x i16>) nounwind readnone
define <2 x i64> @test_x86_sse41_pmuldq(<4 x i32> %a0, <4 x i32> %a1) {
; AVX-LABEL: test_x86_sse41_pmuldq:
; AVX: ## BB#0:
@ -4126,7 +4030,7 @@ define void @test_x86_avx_storeu_dq_256(i8* %a0, <32 x i8> %a1) {
; AVX512VL-LABEL: test_x86_avx_storeu_dq_256:
; AVX512VL: ## BB#0:
; AVX512VL-NEXT: movl {{[0-9]+}}(%esp), %eax
; AVX512VL-NEXT: vpaddb LCPI231_0, %ymm0, %ymm0
; AVX512VL-NEXT: vpaddb LCPI225_0, %ymm0, %ymm0
; AVX512VL-NEXT: vmovdqu %ymm0, (%eax)
; AVX512VL-NEXT: retl
%a2 = add <32 x i8> %a1, <i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1>
@ -4367,7 +4271,7 @@ define <4 x double> @test_x86_avx_vpermilvar_pd_256_2(<4 x double> %a0) {
;
; AVX512VL-LABEL: test_x86_avx_vpermilvar_pd_256_2:
; AVX512VL: ## BB#0:
; AVX512VL-NEXT: vpermilpd LCPI245_0, %ymm0, %ymm0
; AVX512VL-NEXT: vpermilpd LCPI239_0, %ymm0, %ymm0
; AVX512VL-NEXT: retl
%res = call <4 x double> @llvm.x86.avx.vpermilvar.pd.256(<4 x double> %a0, <4 x i64> <i64 2, i64 0, i64 0, i64 2>) ; <<4 x double>> [#uses=1]
ret <4 x double> %res
@ -4859,7 +4763,7 @@ define void @movnt_dq(i8* %p, <2 x i64> %a1) nounwind {
; AVX-LABEL: movnt_dq:
; AVX: ## BB#0:
; AVX-NEXT: movl {{[0-9]+}}(%esp), %eax
; AVX-NEXT: vpaddq LCPI272_0, %xmm0, %xmm0
; AVX-NEXT: vpaddq LCPI266_0, %xmm0, %xmm0
; AVX-NEXT: vmovntdq %ymm0, (%eax)
; AVX-NEXT: vzeroupper
; AVX-NEXT: retl
@ -4867,7 +4771,7 @@ define void @movnt_dq(i8* %p, <2 x i64> %a1) nounwind {
; AVX512VL-LABEL: movnt_dq:
; AVX512VL: ## BB#0:
; AVX512VL-NEXT: movl {{[0-9]+}}(%esp), %eax
; AVX512VL-NEXT: vpaddq LCPI272_0, %xmm0, %xmm0
; AVX512VL-NEXT: vpaddq LCPI266_0, %xmm0, %xmm0
; AVX512VL-NEXT: vmovntdq %ymm0, (%eax)
; AVX512VL-NEXT: retl
%a2 = add <2 x i64> %a1, <i64 1, i64 1>

View File

@ -740,11 +740,10 @@ define <4 x i64> @test_mm256_cvtepi8_epi16(<2 x i64> %a0) {
; X64-NEXT: vpmovsxbw %xmm0, %ymm0
; X64-NEXT: retq
%arg0 = bitcast <2 x i64> %a0 to <16 x i8>
%call = call <16 x i16> @llvm.x86.avx2.pmovsxbw(<16 x i8> %arg0)
%res = bitcast <16 x i16> %call to <4 x i64>
%ext = sext <16 x i8> %arg0 to <16 x i16>
%res = bitcast <16 x i16> %ext to <4 x i64>
ret <4 x i64> %res
}
declare <16 x i16> @llvm.x86.avx2.pmovsxbw(<16 x i8>) nounwind readnone
define <4 x i64> @test_mm256_cvtepi8_epi32(<2 x i64> %a0) {
; X32-LABEL: test_mm256_cvtepi8_epi32:
@ -757,11 +756,11 @@ define <4 x i64> @test_mm256_cvtepi8_epi32(<2 x i64> %a0) {
; X64-NEXT: vpmovsxbd %xmm0, %ymm0
; X64-NEXT: retq
%arg0 = bitcast <2 x i64> %a0 to <16 x i8>
%call = call <8 x i32> @llvm.x86.avx2.pmovsxbd(<16 x i8> %arg0)
%res = bitcast <8 x i32> %call to <4 x i64>
%shuf = shufflevector <16 x i8> %arg0, <16 x i8> undef, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
%ext = sext <8 x i8> %shuf to <8 x i32>
%res = bitcast <8 x i32> %ext to <4 x i64>
ret <4 x i64> %res
}
declare <8 x i32> @llvm.x86.avx2.pmovsxbd(<16 x i8>) nounwind readnone
define <4 x i64> @test_mm256_cvtepi8_epi64(<2 x i64> %a0) {
; X32-LABEL: test_mm256_cvtepi8_epi64:
@ -774,10 +773,10 @@ define <4 x i64> @test_mm256_cvtepi8_epi64(<2 x i64> %a0) {
; X64-NEXT: vpmovsxbq %xmm0, %ymm0
; X64-NEXT: retq
%arg0 = bitcast <2 x i64> %a0 to <16 x i8>
%call = call <4 x i64> @llvm.x86.avx2.pmovsxbq(<16 x i8> %arg0)
ret <4 x i64> %call
%shuf = shufflevector <16 x i8> %arg0, <16 x i8> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
%ext = sext <4 x i8> %shuf to <4 x i64>
ret <4 x i64> %ext
}
declare <4 x i64> @llvm.x86.avx2.pmovsxbq(<16 x i8>) nounwind readnone
define <4 x i64> @test_mm256_cvtepi16_epi32(<2 x i64> %a0) {
; X32-LABEL: test_mm256_cvtepi16_epi32:
@ -790,11 +789,10 @@ define <4 x i64> @test_mm256_cvtepi16_epi32(<2 x i64> %a0) {
; X64-NEXT: vpmovsxwd %xmm0, %ymm0
; X64-NEXT: retq
%arg0 = bitcast <2 x i64> %a0 to <8 x i16>
%call = call <8 x i32> @llvm.x86.avx2.pmovsxwd(<8 x i16> %arg0)
%res = bitcast <8 x i32> %call to <4 x i64>
%ext = sext <8 x i16> %arg0 to <8 x i32>
%res = bitcast <8 x i32> %ext to <4 x i64>
ret <4 x i64> %res
}
declare <8 x i32> @llvm.x86.avx2.pmovsxwd(<8 x i16>) nounwind readnone
define <4 x i64> @test_mm256_cvtepi16_epi64(<2 x i64> %a0) {
; X32-LABEL: test_mm256_cvtepi16_epi64:
@ -807,10 +805,10 @@ define <4 x i64> @test_mm256_cvtepi16_epi64(<2 x i64> %a0) {
; X64-NEXT: vpmovsxwq %xmm0, %ymm0
; X64-NEXT: retq
%arg0 = bitcast <2 x i64> %a0 to <8 x i16>
%call = call <4 x i64> @llvm.x86.avx2.pmovsxwq(<8 x i16> %arg0)
ret <4 x i64> %call
%shuf = shufflevector <8 x i16> %arg0, <8 x i16> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
%ext = sext <4 x i16> %shuf to <4 x i64>
ret <4 x i64> %ext
}
declare <4 x i64> @llvm.x86.avx2.pmovsxwq(<8 x i16>) nounwind readnone
define <4 x i64> @test_mm256_cvtepi32_epi64(<2 x i64> %a0) {
; X32-LABEL: test_mm256_cvtepi32_epi64:
@ -823,10 +821,9 @@ define <4 x i64> @test_mm256_cvtepi32_epi64(<2 x i64> %a0) {
; X64-NEXT: vpmovsxdq %xmm0, %ymm0
; X64-NEXT: retq
%arg0 = bitcast <2 x i64> %a0 to <4 x i32>
%res = call <4 x i64> @llvm.x86.avx2.pmovsxdq(<4 x i32> %arg0)
ret <4 x i64> %res
%ext = sext <4 x i32> %arg0 to <4 x i64>
ret <4 x i64> %ext
}
declare <4 x i64> @llvm.x86.avx2.pmovsxdq(<4 x i32>) nounwind readnone
define <4 x i64> @test_mm256_cvtepu8_epi16(<2 x i64> %a0) {
; X32-LABEL: test_mm256_cvtepu8_epi16:
@ -839,11 +836,10 @@ define <4 x i64> @test_mm256_cvtepu8_epi16(<2 x i64> %a0) {
; X64-NEXT: vpmovzxbw {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero,xmm0[8],zero,xmm0[9],zero,xmm0[10],zero,xmm0[11],zero,xmm0[12],zero,xmm0[13],zero,xmm0[14],zero,xmm0[15],zero
; X64-NEXT: retq
%arg0 = bitcast <2 x i64> %a0 to <16 x i8>
%call = call <16 x i16> @llvm.x86.avx2.pmovzxbw(<16 x i8> %arg0)
%res = bitcast <16 x i16> %call to <4 x i64>
%ext = zext <16 x i8> %arg0 to <16 x i16>
%res = bitcast <16 x i16> %ext to <4 x i64>
ret <4 x i64> %res
}
declare <16 x i16> @llvm.x86.avx2.pmovzxbw(<16 x i8>) nounwind readnone
define <4 x i64> @test_mm256_cvtepu8_epi32(<2 x i64> %a0) {
; X32-LABEL: test_mm256_cvtepu8_epi32:
@ -856,11 +852,11 @@ define <4 x i64> @test_mm256_cvtepu8_epi32(<2 x i64> %a0) {
; X64-NEXT: vpmovzxbd {{.*#+}} ymm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero,xmm0[4],zero,zero,zero,xmm0[5],zero,zero,zero,xmm0[6],zero,zero,zero,xmm0[7],zero,zero,zero
; X64-NEXT: retq
%arg0 = bitcast <2 x i64> %a0 to <16 x i8>
%call = call <8 x i32> @llvm.x86.avx2.pmovzxbd(<16 x i8> %arg0)
%res = bitcast <8 x i32> %call to <4 x i64>
%shuf = shufflevector <16 x i8> %arg0, <16 x i8> undef, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
%ext = zext <8 x i8> %shuf to <8 x i32>
%res = bitcast <8 x i32> %ext to <4 x i64>
ret <4 x i64> %res
}
declare <8 x i32> @llvm.x86.avx2.pmovzxbd(<16 x i8>) nounwind readnone
define <4 x i64> @test_mm256_cvtepu8_epi64(<2 x i64> %a0) {
; X32-LABEL: test_mm256_cvtepu8_epi64:
@ -873,10 +869,10 @@ define <4 x i64> @test_mm256_cvtepu8_epi64(<2 x i64> %a0) {
; X64-NEXT: vpmovzxbq {{.*#+}} ymm0 = xmm0[0],zero,zero,zero,zero,zero,zero,zero,xmm0[1],zero,zero,zero,zero,zero,zero,zero,xmm0[2],zero,zero,zero,zero,zero,zero,zero,xmm0[3],zero,zero,zero,zero,zero,zero,zero
; X64-NEXT: retq
%arg0 = bitcast <2 x i64> %a0 to <16 x i8>
%call = call <4 x i64> @llvm.x86.avx2.pmovzxbq(<16 x i8> %arg0)
ret <4 x i64> %call
%shuf = shufflevector <16 x i8> %arg0, <16 x i8> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
%ext = zext <4 x i8> %shuf to <4 x i64>
ret <4 x i64> %ext
}
declare <4 x i64> @llvm.x86.avx2.pmovzxbq(<16 x i8>) nounwind readnone
define <4 x i64> @test_mm256_cvtepu16_epi32(<2 x i64> %a0) {
; X32-LABEL: test_mm256_cvtepu16_epi32:
@ -889,11 +885,10 @@ define <4 x i64> @test_mm256_cvtepu16_epi32(<2 x i64> %a0) {
; X64-NEXT: vpmovzxwd {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero
; X64-NEXT: retq
%arg0 = bitcast <2 x i64> %a0 to <8 x i16>
%call = call <8 x i32> @llvm.x86.avx2.pmovzxwd(<8 x i16> %arg0)
%res = bitcast <8 x i32> %call to <4 x i64>
%ext = zext <8 x i16> %arg0 to <8 x i32>
%res = bitcast <8 x i32> %ext to <4 x i64>
ret <4 x i64> %res
}
declare <8 x i32> @llvm.x86.avx2.pmovzxwd(<8 x i16>) nounwind readnone
define <4 x i64> @test_mm256_cvtepu16_epi64(<2 x i64> %a0) {
; X32-LABEL: test_mm256_cvtepu16_epi64:
@ -906,10 +901,10 @@ define <4 x i64> @test_mm256_cvtepu16_epi64(<2 x i64> %a0) {
; X64-NEXT: vpmovzxwq {{.*#+}} ymm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero
; X64-NEXT: retq
%arg0 = bitcast <2 x i64> %a0 to <8 x i16>
%call = call <4 x i64> @llvm.x86.avx2.pmovzxwq(<8 x i16> %arg0)
ret <4 x i64> %call
%shuf = shufflevector <8 x i16> %arg0, <8 x i16> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
%ext = zext <4 x i16> %shuf to <4 x i64>
ret <4 x i64> %ext
}
declare <4 x i64> @llvm.x86.avx2.pmovzxwq(<8 x i16>) nounwind readnone
define <4 x i64> @test_mm256_cvtepu32_epi64(<2 x i64> %a0) {
; X32-LABEL: test_mm256_cvtepu32_epi64:
@ -922,10 +917,9 @@ define <4 x i64> @test_mm256_cvtepu32_epi64(<2 x i64> %a0) {
; X64-NEXT: vpmovzxdq {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero
; X64-NEXT: retq
%arg0 = bitcast <2 x i64> %a0 to <4 x i32>
%res = call <4 x i64> @llvm.x86.avx2.pmovzxdq(<4 x i32> %arg0)
ret <4 x i64> %res
%ext = zext <4 x i32> %arg0 to <4 x i64>
ret <4 x i64> %ext
}
declare <4 x i64> @llvm.x86.avx2.pmovzxdq(<4 x i32>) nounwind readnone
define <2 x i64> @test_mm256_extracti128_si256(<4 x i64> %a0) nounwind {
; X32-LABEL: test_mm256_extracti128_si256:

View File

@ -203,3 +203,99 @@ define <4 x i64> @test_x86_avx2_pbroadcastq_256(<2 x i64> %a0) {
ret <4 x i64> %res
}
declare <4 x i64> @llvm.x86.avx2.pbroadcastq.256(<2 x i64>) nounwind readonly
define <8 x i32> @test_x86_avx2_pmovsxbd(<16 x i8> %a0) {
; CHECK: vpmovsxbd
%res = call <8 x i32> @llvm.x86.avx2.pmovsxbd(<16 x i8> %a0) ; <<8 x i32>> [#uses=1]
ret <8 x i32> %res
}
declare <8 x i32> @llvm.x86.avx2.pmovsxbd(<16 x i8>) nounwind readnone
define <4 x i64> @test_x86_avx2_pmovsxbq(<16 x i8> %a0) {
; CHECK: vpmovsxbq
%res = call <4 x i64> @llvm.x86.avx2.pmovsxbq(<16 x i8> %a0) ; <<4 x i64>> [#uses=1]
ret <4 x i64> %res
}
declare <4 x i64> @llvm.x86.avx2.pmovsxbq(<16 x i8>) nounwind readnone
define <16 x i16> @test_x86_avx2_pmovsxbw(<16 x i8> %a0) {
; CHECK: vpmovsxbw
%res = call <16 x i16> @llvm.x86.avx2.pmovsxbw(<16 x i8> %a0) ; <<8 x i16>> [#uses=1]
ret <16 x i16> %res
}
declare <16 x i16> @llvm.x86.avx2.pmovsxbw(<16 x i8>) nounwind readnone
define <4 x i64> @test_x86_avx2_pmovsxdq(<4 x i32> %a0) {
; CHECK: vpmovsxdq
%res = call <4 x i64> @llvm.x86.avx2.pmovsxdq(<4 x i32> %a0) ; <<4 x i64>> [#uses=1]
ret <4 x i64> %res
}
declare <4 x i64> @llvm.x86.avx2.pmovsxdq(<4 x i32>) nounwind readnone
define <8 x i32> @test_x86_avx2_pmovsxwd(<8 x i16> %a0) {
; CHECK: vpmovsxwd
%res = call <8 x i32> @llvm.x86.avx2.pmovsxwd(<8 x i16> %a0) ; <<8 x i32>> [#uses=1]
ret <8 x i32> %res
}
declare <8 x i32> @llvm.x86.avx2.pmovsxwd(<8 x i16>) nounwind readnone
define <4 x i64> @test_x86_avx2_pmovsxwq(<8 x i16> %a0) {
; CHECK: vpmovsxwq
%res = call <4 x i64> @llvm.x86.avx2.pmovsxwq(<8 x i16> %a0) ; <<4 x i64>> [#uses=1]
ret <4 x i64> %res
}
declare <4 x i64> @llvm.x86.avx2.pmovsxwq(<8 x i16>) nounwind readnone
define <8 x i32> @test_x86_avx2_pmovzxbd(<16 x i8> %a0) {
; CHECK: vpmovzxbd
%res = call <8 x i32> @llvm.x86.avx2.pmovzxbd(<16 x i8> %a0) ; <<8 x i32>> [#uses=1]
ret <8 x i32> %res
}
declare <8 x i32> @llvm.x86.avx2.pmovzxbd(<16 x i8>) nounwind readnone
define <4 x i64> @test_x86_avx2_pmovzxbq(<16 x i8> %a0) {
; CHECK: vpmovzxbq
%res = call <4 x i64> @llvm.x86.avx2.pmovzxbq(<16 x i8> %a0) ; <<4 x i64>> [#uses=1]
ret <4 x i64> %res
}
declare <4 x i64> @llvm.x86.avx2.pmovzxbq(<16 x i8>) nounwind readnone
define <16 x i16> @test_x86_avx2_pmovzxbw(<16 x i8> %a0) {
; CHECK: vpmovzxbw
%res = call <16 x i16> @llvm.x86.avx2.pmovzxbw(<16 x i8> %a0) ; <<16 x i16>> [#uses=1]
ret <16 x i16> %res
}
declare <16 x i16> @llvm.x86.avx2.pmovzxbw(<16 x i8>) nounwind readnone
define <4 x i64> @test_x86_avx2_pmovzxdq(<4 x i32> %a0) {
; CHECK: vpmovzxdq
%res = call <4 x i64> @llvm.x86.avx2.pmovzxdq(<4 x i32> %a0) ; <<4 x i64>> [#uses=1]
ret <4 x i64> %res
}
declare <4 x i64> @llvm.x86.avx2.pmovzxdq(<4 x i32>) nounwind readnone
define <8 x i32> @test_x86_avx2_pmovzxwd(<8 x i16> %a0) {
; CHECK: vpmovzxwd
%res = call <8 x i32> @llvm.x86.avx2.pmovzxwd(<8 x i16> %a0) ; <<8 x i32>> [#uses=1]
ret <8 x i32> %res
}
declare <8 x i32> @llvm.x86.avx2.pmovzxwd(<8 x i16>) nounwind readnone
define <4 x i64> @test_x86_avx2_pmovzxwq(<8 x i16> %a0) {
; CHECK: vpmovzxwq
%res = call <4 x i64> @llvm.x86.avx2.pmovzxwq(<8 x i16> %a0) ; <<4 x i64>> [#uses=1]
ret <4 x i64> %res
}
declare <4 x i64> @llvm.x86.avx2.pmovzxwq(<8 x i16>) nounwind readnone

View File

@ -1,4 +1,3 @@
; NOTE: Assertions have been autogenerated by update_llc_test_checks.py
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
; RUN: llc < %s -mtriple=i686-apple-darwin -mattr=avx2 | FileCheck %s --check-prefix=AVX2
; RUN: llc < %s -mtriple=i686-apple-darwin -mattr=avx512vl | FileCheck %s --check-prefix=AVX512VL
@ -1078,198 +1077,6 @@ define <16 x i16> @test_x86_avx2_pminuw(<16 x i16> %a0, <16 x i16> %a1) {
declare <16 x i16> @llvm.x86.avx2.pminu.w(<16 x i16>, <16 x i16>) nounwind readnone
define <8 x i32> @test_x86_avx2_pmovsxbd(<16 x i8> %a0) {
; AVX2-LABEL: test_x86_avx2_pmovsxbd:
; AVX2: ## BB#0:
; AVX2-NEXT: vpmovsxbd %xmm0, %ymm0
; AVX2-NEXT: retl
;
; AVX512VL-LABEL: test_x86_avx2_pmovsxbd:
; AVX512VL: ## BB#0:
; AVX512VL-NEXT: vpmovsxbd %xmm0, %ymm0
; AVX512VL-NEXT: retl
%res = call <8 x i32> @llvm.x86.avx2.pmovsxbd(<16 x i8> %a0) ; <<8 x i32>> [#uses=1]
ret <8 x i32> %res
}
declare <8 x i32> @llvm.x86.avx2.pmovsxbd(<16 x i8>) nounwind readnone
define <4 x i64> @test_x86_avx2_pmovsxbq(<16 x i8> %a0) {
; AVX2-LABEL: test_x86_avx2_pmovsxbq:
; AVX2: ## BB#0:
; AVX2-NEXT: vpmovsxbq %xmm0, %ymm0
; AVX2-NEXT: retl
;
; AVX512VL-LABEL: test_x86_avx2_pmovsxbq:
; AVX512VL: ## BB#0:
; AVX512VL-NEXT: vpmovsxbq %xmm0, %ymm0
; AVX512VL-NEXT: retl
%res = call <4 x i64> @llvm.x86.avx2.pmovsxbq(<16 x i8> %a0) ; <<4 x i64>> [#uses=1]
ret <4 x i64> %res
}
declare <4 x i64> @llvm.x86.avx2.pmovsxbq(<16 x i8>) nounwind readnone
define <16 x i16> @test_x86_avx2_pmovsxbw(<16 x i8> %a0) {
; AVX2-LABEL: test_x86_avx2_pmovsxbw:
; AVX2: ## BB#0:
; AVX2-NEXT: vpmovsxbw %xmm0, %ymm0
; AVX2-NEXT: retl
;
; AVX512VL-LABEL: test_x86_avx2_pmovsxbw:
; AVX512VL: ## BB#0:
; AVX512VL-NEXT: vpmovsxbw %xmm0, %ymm0
; AVX512VL-NEXT: retl
%res = call <16 x i16> @llvm.x86.avx2.pmovsxbw(<16 x i8> %a0) ; <<8 x i16>> [#uses=1]
ret <16 x i16> %res
}
declare <16 x i16> @llvm.x86.avx2.pmovsxbw(<16 x i8>) nounwind readnone
define <4 x i64> @test_x86_avx2_pmovsxdq(<4 x i32> %a0) {
; AVX2-LABEL: test_x86_avx2_pmovsxdq:
; AVX2: ## BB#0:
; AVX2-NEXT: vpmovsxdq %xmm0, %ymm0
; AVX2-NEXT: retl
;
; AVX512VL-LABEL: test_x86_avx2_pmovsxdq:
; AVX512VL: ## BB#0:
; AVX512VL-NEXT: vpmovsxdq %xmm0, %ymm0
; AVX512VL-NEXT: retl
%res = call <4 x i64> @llvm.x86.avx2.pmovsxdq(<4 x i32> %a0) ; <<4 x i64>> [#uses=1]
ret <4 x i64> %res
}
declare <4 x i64> @llvm.x86.avx2.pmovsxdq(<4 x i32>) nounwind readnone
define <8 x i32> @test_x86_avx2_pmovsxwd(<8 x i16> %a0) {
; AVX2-LABEL: test_x86_avx2_pmovsxwd:
; AVX2: ## BB#0:
; AVX2-NEXT: vpmovsxwd %xmm0, %ymm0
; AVX2-NEXT: retl
;
; AVX512VL-LABEL: test_x86_avx2_pmovsxwd:
; AVX512VL: ## BB#0:
; AVX512VL-NEXT: vpmovsxwd %xmm0, %ymm0
; AVX512VL-NEXT: retl
%res = call <8 x i32> @llvm.x86.avx2.pmovsxwd(<8 x i16> %a0) ; <<8 x i32>> [#uses=1]
ret <8 x i32> %res
}
declare <8 x i32> @llvm.x86.avx2.pmovsxwd(<8 x i16>) nounwind readnone
define <4 x i64> @test_x86_avx2_pmovsxwq(<8 x i16> %a0) {
; AVX2-LABEL: test_x86_avx2_pmovsxwq:
; AVX2: ## BB#0:
; AVX2-NEXT: vpmovsxwq %xmm0, %ymm0
; AVX2-NEXT: retl
;
; AVX512VL-LABEL: test_x86_avx2_pmovsxwq:
; AVX512VL: ## BB#0:
; AVX512VL-NEXT: vpmovsxwq %xmm0, %ymm0
; AVX512VL-NEXT: retl
%res = call <4 x i64> @llvm.x86.avx2.pmovsxwq(<8 x i16> %a0) ; <<4 x i64>> [#uses=1]
ret <4 x i64> %res
}
declare <4 x i64> @llvm.x86.avx2.pmovsxwq(<8 x i16>) nounwind readnone
define <8 x i32> @test_x86_avx2_pmovzxbd(<16 x i8> %a0) {
; AVX2-LABEL: test_x86_avx2_pmovzxbd:
; AVX2: ## BB#0:
; AVX2-NEXT: vpmovzxbd {{.*#+}} ymm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero,xmm0[4],zero,zero,zero,xmm0[5],zero,zero,zero,xmm0[6],zero,zero,zero,xmm0[7],zero,zero,zero
; AVX2-NEXT: retl
;
; AVX512VL-LABEL: test_x86_avx2_pmovzxbd:
; AVX512VL: ## BB#0:
; AVX512VL-NEXT: vpmovzxbd {{.*#+}} ymm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero,xmm0[4],zero,zero,zero,xmm0[5],zero,zero,zero,xmm0[6],zero,zero,zero,xmm0[7],zero,zero,zero
; AVX512VL-NEXT: retl
%res = call <8 x i32> @llvm.x86.avx2.pmovzxbd(<16 x i8> %a0) ; <<8 x i32>> [#uses=1]
ret <8 x i32> %res
}
declare <8 x i32> @llvm.x86.avx2.pmovzxbd(<16 x i8>) nounwind readnone
define <4 x i64> @test_x86_avx2_pmovzxbq(<16 x i8> %a0) {
; AVX2-LABEL: test_x86_avx2_pmovzxbq:
; AVX2: ## BB#0:
; AVX2-NEXT: vpmovzxbq {{.*#+}} ymm0 = xmm0[0],zero,zero,zero,zero,zero,zero,zero,xmm0[1],zero,zero,zero,zero,zero,zero,zero,xmm0[2],zero,zero,zero,zero,zero,zero,zero,xmm0[3],zero,zero,zero,zero,zero,zero,zero
; AVX2-NEXT: retl
;
; AVX512VL-LABEL: test_x86_avx2_pmovzxbq:
; AVX512VL: ## BB#0:
; AVX512VL-NEXT: vpmovzxbq {{.*#+}} ymm0 = xmm0[0],zero,zero,zero,zero,zero,zero,zero,xmm0[1],zero,zero,zero,zero,zero,zero,zero,xmm0[2],zero,zero,zero,zero,zero,zero,zero,xmm0[3],zero,zero,zero,zero,zero,zero,zero
; AVX512VL-NEXT: retl
%res = call <4 x i64> @llvm.x86.avx2.pmovzxbq(<16 x i8> %a0) ; <<4 x i64>> [#uses=1]
ret <4 x i64> %res
}
declare <4 x i64> @llvm.x86.avx2.pmovzxbq(<16 x i8>) nounwind readnone
define <16 x i16> @test_x86_avx2_pmovzxbw(<16 x i8> %a0) {
; AVX2-LABEL: test_x86_avx2_pmovzxbw:
; AVX2: ## BB#0:
; AVX2-NEXT: vpmovzxbw {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero,xmm0[8],zero,xmm0[9],zero,xmm0[10],zero,xmm0[11],zero,xmm0[12],zero,xmm0[13],zero,xmm0[14],zero,xmm0[15],zero
; AVX2-NEXT: retl
;
; AVX512VL-LABEL: test_x86_avx2_pmovzxbw:
; AVX512VL: ## BB#0:
; AVX512VL-NEXT: vpmovzxbw {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero,xmm0[8],zero,xmm0[9],zero,xmm0[10],zero,xmm0[11],zero,xmm0[12],zero,xmm0[13],zero,xmm0[14],zero,xmm0[15],zero
; AVX512VL-NEXT: retl
%res = call <16 x i16> @llvm.x86.avx2.pmovzxbw(<16 x i8> %a0) ; <<16 x i16>> [#uses=1]
ret <16 x i16> %res
}
declare <16 x i16> @llvm.x86.avx2.pmovzxbw(<16 x i8>) nounwind readnone
define <4 x i64> @test_x86_avx2_pmovzxdq(<4 x i32> %a0) {
; AVX2-LABEL: test_x86_avx2_pmovzxdq:
; AVX2: ## BB#0:
; AVX2-NEXT: vpmovzxdq {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero
; AVX2-NEXT: retl
;
; AVX512VL-LABEL: test_x86_avx2_pmovzxdq:
; AVX512VL: ## BB#0:
; AVX512VL-NEXT: vpmovzxdq {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero
; AVX512VL-NEXT: retl
%res = call <4 x i64> @llvm.x86.avx2.pmovzxdq(<4 x i32> %a0) ; <<4 x i64>> [#uses=1]
ret <4 x i64> %res
}
declare <4 x i64> @llvm.x86.avx2.pmovzxdq(<4 x i32>) nounwind readnone
define <8 x i32> @test_x86_avx2_pmovzxwd(<8 x i16> %a0) {
; AVX2-LABEL: test_x86_avx2_pmovzxwd:
; AVX2: ## BB#0:
; AVX2-NEXT: vpmovzxwd {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero
; AVX2-NEXT: retl
;
; AVX512VL-LABEL: test_x86_avx2_pmovzxwd:
; AVX512VL: ## BB#0:
; AVX512VL-NEXT: vpmovzxwd {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero
; AVX512VL-NEXT: retl
%res = call <8 x i32> @llvm.x86.avx2.pmovzxwd(<8 x i16> %a0) ; <<8 x i32>> [#uses=1]
ret <8 x i32> %res
}
declare <8 x i32> @llvm.x86.avx2.pmovzxwd(<8 x i16>) nounwind readnone
define <4 x i64> @test_x86_avx2_pmovzxwq(<8 x i16> %a0) {
; AVX2-LABEL: test_x86_avx2_pmovzxwq:
; AVX2: ## BB#0:
; AVX2-NEXT: vpmovzxwq {{.*#+}} ymm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero
; AVX2-NEXT: retl
;
; AVX512VL-LABEL: test_x86_avx2_pmovzxwq:
; AVX512VL: ## BB#0:
; AVX512VL-NEXT: vpmovzxwq {{.*#+}} ymm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero
; AVX512VL-NEXT: retl
%res = call <4 x i64> @llvm.x86.avx2.pmovzxwq(<8 x i16> %a0) ; <<4 x i64>> [#uses=1]
ret <4 x i64> %res
}
declare <4 x i64> @llvm.x86.avx2.pmovzxwq(<8 x i16>) nounwind readnone
define <4 x i64> @test_x86_avx2_pmul.dq(<8 x i32> %a0, <8 x i32> %a1) {
%res = call <4 x i64> @llvm.x86.avx2.pmul.dq(<8 x i32> %a0, <8 x i32> %a1) ; <<2 x i64>> [#uses=1]
ret <4 x i64> %res
@ -1674,7 +1481,7 @@ define void @test_x86_avx_storeu_dq_256(i8* %a0, <32 x i8> %a1) {
; AVX2-LABEL: test_x86_avx_storeu_dq_256:
; AVX2: ## BB#0:
; AVX2-NEXT: movl {{[0-9]+}}(%esp), %eax
; AVX2-NEXT: vpaddb LCPI103_0, %ymm0, %ymm0
; AVX2-NEXT: vpaddb LCPI91_0, %ymm0, %ymm0
; AVX2-NEXT: vmovdqu %ymm0, (%eax)
; AVX2-NEXT: vzeroupper
; AVX2-NEXT: retl
@ -1682,7 +1489,7 @@ define void @test_x86_avx_storeu_dq_256(i8* %a0, <32 x i8> %a1) {
; AVX512VL-LABEL: test_x86_avx_storeu_dq_256:
; AVX512VL: ## BB#0:
; AVX512VL-NEXT: movl {{[0-9]+}}(%esp), %eax
; AVX512VL-NEXT: vpaddb LCPI103_0, %ymm0, %ymm0
; AVX512VL-NEXT: vpaddb LCPI91_0, %ymm0, %ymm0
; AVX512VL-NEXT: vmovdqu %ymm0, (%eax)
; AVX512VL-NEXT: retl
%a2 = add <32 x i8> %a1, <i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1>

View File

@ -1,10 +1,10 @@
; RUN: llc < %s -mtriple=x86_64-apple-darwin -mcpu=x86-64 -mattr=+avx2 | FileCheck %s
define <16 x i16> @test_lvm_x86_avx2_pmovsxbw(<16 x i8>* %a) {
; CHECK-LABEL: test_lvm_x86_avx2_pmovsxbw
define <16 x i16> @test_llvm_x86_avx2_pmovsxbw(<16 x i8>* %a) {
; CHECK-LABEL: test_llvm_x86_avx2_pmovsxbw
; CHECK: vpmovsxbw (%rdi), %ymm0
%1 = load <16 x i8>, <16 x i8>* %a, align 1
%2 = call <16 x i16> @llvm.x86.avx2.pmovsxbw(<16 x i8> %1)
%2 = sext <16 x i8> %1 to <16 x i16>
ret <16 x i16> %2
}
@ -12,23 +12,25 @@ define <8 x i32> @test_llvm_x86_avx2_pmovsxbd(<16 x i8>* %a) {
; CHECK-LABEL: test_llvm_x86_avx2_pmovsxbd
; CHECK: vpmovsxbd (%rdi), %ymm0
%1 = load <16 x i8>, <16 x i8>* %a, align 1
%2 = call <8 x i32> @llvm.x86.avx2.pmovsxbd(<16 x i8> %1)
ret <8 x i32> %2
%2 = shufflevector <16 x i8> %1, <16 x i8> undef, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
%3 = sext <8 x i8> %2 to <8 x i32>
ret <8 x i32> %3
}
define <4 x i64> @test_llvm_x86_avx2_pmovsxbq(<16 x i8>* %a) {
; CHECK-LABEL: test_llvm_x86_avx2_pmovsxbq
; CHECK: vpmovsxbq (%rdi), %ymm0
%1 = load <16 x i8>, <16 x i8>* %a, align 1
%2 = call <4 x i64> @llvm.x86.avx2.pmovsxbq(<16 x i8> %1)
ret <4 x i64> %2
%2 = shufflevector <16 x i8> %1, <16 x i8> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
%3 = sext <4 x i8> %2 to <4 x i64>
ret <4 x i64> %3
}
define <8 x i32> @test_llvm_x86_avx2_pmovsxwd(<8 x i16>* %a) {
; CHECK-LABEL: test_llvm_x86_avx2_pmovsxwd
; CHECK: vpmovsxwd (%rdi), %ymm0
%1 = load <8 x i16>, <8 x i16>* %a, align 1
%2 = call <8 x i32> @llvm.x86.avx2.pmovsxwd(<8 x i16> %1)
%2 = sext <8 x i16> %1 to <8 x i32>
ret <8 x i32> %2
}
@ -36,23 +38,24 @@ define <4 x i64> @test_llvm_x86_avx2_pmovsxwq(<8 x i16>* %a) {
; CHECK-LABEL: test_llvm_x86_avx2_pmovsxwq
; CHECK: vpmovsxwq (%rdi), %ymm0
%1 = load <8 x i16>, <8 x i16>* %a, align 1
%2 = call <4 x i64> @llvm.x86.avx2.pmovsxwq(<8 x i16> %1)
ret <4 x i64> %2
%2 = shufflevector <8 x i16> %1, <8 x i16> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
%3 = sext <4 x i16> %2 to <4 x i64>
ret <4 x i64> %3
}
define <4 x i64> @test_llvm_x86_avx2_pmovsxdq(<4 x i32>* %a) {
; CHECK-LABEL: test_llvm_x86_avx2_pmovsxdq
; CHECK: vpmovsxdq (%rdi), %ymm0
%1 = load <4 x i32>, <4 x i32>* %a, align 1
%2 = call <4 x i64> @llvm.x86.avx2.pmovsxdq(<4 x i32> %1)
%2 = sext <4 x i32> %1 to <4 x i64>
ret <4 x i64> %2
}
define <16 x i16> @test_lvm_x86_avx2_pmovzxbw(<16 x i8>* %a) {
; CHECK-LABEL: test_lvm_x86_avx2_pmovzxbw
define <16 x i16> @test_llvm_x86_avx2_pmovzxbw(<16 x i8>* %a) {
; CHECK-LABEL: test_llvm_x86_avx2_pmovzxbw
; CHECK: vpmovzxbw (%rdi), %ymm0
%1 = load <16 x i8>, <16 x i8>* %a, align 1
%2 = call <16 x i16> @llvm.x86.avx2.pmovzxbw(<16 x i8> %1)
%2 = zext <16 x i8> %1 to <16 x i16>
ret <16 x i16> %2
}
@ -60,23 +63,25 @@ define <8 x i32> @test_llvm_x86_avx2_pmovzxbd(<16 x i8>* %a) {
; CHECK-LABEL: test_llvm_x86_avx2_pmovzxbd
; CHECK: vpmovzxbd (%rdi), %ymm0
%1 = load <16 x i8>, <16 x i8>* %a, align 1
%2 = call <8 x i32> @llvm.x86.avx2.pmovzxbd(<16 x i8> %1)
ret <8 x i32> %2
%2 = shufflevector <16 x i8> %1, <16 x i8> undef, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
%3 = zext <8 x i8> %2 to <8 x i32>
ret <8 x i32> %3
}
define <4 x i64> @test_llvm_x86_avx2_pmovzxbq(<16 x i8>* %a) {
; CHECK-LABEL: test_llvm_x86_avx2_pmovzxbq
; CHECK: vpmovzxbq (%rdi), %ymm0
%1 = load <16 x i8>, <16 x i8>* %a, align 1
%2 = call <4 x i64> @llvm.x86.avx2.pmovzxbq(<16 x i8> %1)
ret <4 x i64> %2
%2 = shufflevector <16 x i8> %1, <16 x i8> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
%3 = zext <4 x i8> %2 to <4 x i64>
ret <4 x i64> %3
}
define <8 x i32> @test_llvm_x86_avx2_pmovzxwd(<8 x i16>* %a) {
; CHECK-LABEL: test_llvm_x86_avx2_pmovzxwd
; CHECK: vpmovzxwd (%rdi), %ymm0
%1 = load <8 x i16>, <8 x i16>* %a, align 1
%2 = call <8 x i32> @llvm.x86.avx2.pmovzxwd(<8 x i16> %1)
%2 = zext <8 x i16> %1 to <8 x i32>
ret <8 x i32> %2
}
@ -84,27 +89,15 @@ define <4 x i64> @test_llvm_x86_avx2_pmovzxwq(<8 x i16>* %a) {
; CHECK-LABEL: test_llvm_x86_avx2_pmovzxwq
; CHECK: vpmovzxwq (%rdi), %ymm0
%1 = load <8 x i16>, <8 x i16>* %a, align 1
%2 = call <4 x i64> @llvm.x86.avx2.pmovzxwq(<8 x i16> %1)
ret <4 x i64> %2
%2 = shufflevector <8 x i16> %1, <8 x i16> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
%3 = zext <4 x i16> %2 to <4 x i64>
ret <4 x i64> %3
}
define <4 x i64> @test_llvm_x86_avx2_pmovzxdq(<4 x i32>* %a) {
; CHECK-LABEL: test_llvm_x86_avx2_pmovzxdq
; CHECK: vpmovzxdq (%rdi), %ymm0
%1 = load <4 x i32>, <4 x i32>* %a, align 1
%2 = call <4 x i64> @llvm.x86.avx2.pmovzxdq(<4 x i32> %1)
%2 = zext <4 x i32> %1 to <4 x i64>
ret <4 x i64> %2
}
declare <4 x i64> @llvm.x86.avx2.pmovzxdq(<4 x i32>)
declare <4 x i64> @llvm.x86.avx2.pmovzxwq(<8 x i16>)
declare <8 x i32> @llvm.x86.avx2.pmovzxwd(<8 x i16>)
declare <4 x i64> @llvm.x86.avx2.pmovzxbq(<16 x i8>)
declare <8 x i32> @llvm.x86.avx2.pmovzxbd(<16 x i8>)
declare <16 x i16> @llvm.x86.avx2.pmovzxbw(<16 x i8>)
declare <4 x i64> @llvm.x86.avx2.pmovsxdq(<4 x i32>)
declare <4 x i64> @llvm.x86.avx2.pmovsxwq(<8 x i16>)
declare <8 x i32> @llvm.x86.avx2.pmovsxwd(<8 x i16>)
declare <4 x i64> @llvm.x86.avx2.pmovsxbq(<16 x i8>)
declare <8 x i32> @llvm.x86.avx2.pmovsxbd(<16 x i8>)
declare <16 x i16> @llvm.x86.avx2.pmovsxbw(<16 x i8>)

View File

@ -301,11 +301,11 @@ define <2 x i64> @test_mm_cvtepu8_epi16(<2 x i64> %a0) {
; X64-NEXT: pmovzxbw {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero
; X64-NEXT: retq
%arg0 = bitcast <2 x i64> %a0 to <16 x i8>
%zext = call <8 x i16> @llvm.x86.sse41.pmovzxbw(<16 x i8> %arg0)
%res = bitcast <8 x i16> %zext to <2 x i64>
%ext0 = shufflevector <16 x i8> %arg0, <16 x i8> undef, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
%sext = zext <8 x i8> %ext0 to <8 x i16>
%res = bitcast <8 x i16> %sext to <2 x i64>
ret <2 x i64> %res
}
declare <8 x i16> @llvm.x86.sse41.pmovzxbw(<16 x i8>) nounwind readnone
define <2 x i64> @test_mm_cvtepu8_epi32(<2 x i64> %a0) {
; X32-LABEL: test_mm_cvtepu8_epi32:
@ -318,11 +318,11 @@ define <2 x i64> @test_mm_cvtepu8_epi32(<2 x i64> %a0) {
; X64-NEXT: pmovzxbd {{.*#+}} xmm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero
; X64-NEXT: retq
%arg0 = bitcast <2 x i64> %a0 to <16 x i8>
%zext = call <4 x i32> @llvm.x86.sse41.pmovzxbd(<16 x i8> %arg0)
%res = bitcast <4 x i32> %zext to <2 x i64>
%ext0 = shufflevector <16 x i8> %arg0, <16 x i8> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
%sext = zext <4 x i8> %ext0 to <4 x i32>
%res = bitcast <4 x i32> %sext to <2 x i64>
ret <2 x i64> %res
}
declare <4 x i32> @llvm.x86.sse41.pmovzxbd(<16 x i8>) nounwind readnone
define <2 x i64> @test_mm_cvtepu8_epi64(<2 x i64> %a0) {
; X32-LABEL: test_mm_cvtepu8_epi64:
@ -335,10 +335,10 @@ define <2 x i64> @test_mm_cvtepu8_epi64(<2 x i64> %a0) {
; X64-NEXT: pmovzxbq {{.*#+}} xmm0 = xmm0[0],zero,zero,zero,zero,zero,zero,zero,xmm0[1],zero,zero,zero,zero,zero,zero,zero
; X64-NEXT: retq
%arg0 = bitcast <2 x i64> %a0 to <16 x i8>
%zext = call <2 x i64> @llvm.x86.sse41.pmovzxbq(<16 x i8> %arg0)
ret <2 x i64> %zext
%ext0 = shufflevector <16 x i8> %arg0, <16 x i8> undef, <2 x i32> <i32 0, i32 1>
%sext = zext <2 x i8> %ext0 to <2 x i64>
ret <2 x i64> %sext
}
declare <2 x i64> @llvm.x86.sse41.pmovzxbq(<16 x i8>) nounwind readnone
define <2 x i64> @test_mm_cvtepu16_epi32(<2 x i64> %a0) {
; X32-LABEL: test_mm_cvtepu16_epi32:
@ -351,11 +351,11 @@ define <2 x i64> @test_mm_cvtepu16_epi32(<2 x i64> %a0) {
; X64-NEXT: pmovzxwd {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero
; X64-NEXT: retq
%arg0 = bitcast <2 x i64> %a0 to <8 x i16>
%zext = call <4 x i32> @llvm.x86.sse41.pmovzxwd(<8 x i16> %arg0)
%res = bitcast <4 x i32> %zext to <2 x i64>
%ext0 = shufflevector <8 x i16> %arg0, <8 x i16> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
%sext = zext <4 x i16> %ext0 to <4 x i32>
%res = bitcast <4 x i32> %sext to <2 x i64>
ret <2 x i64> %res
}
declare <4 x i32> @llvm.x86.sse41.pmovzxwd(<8 x i16>) nounwind readnone
define <2 x i64> @test_mm_cvtepu16_epi64(<2 x i64> %a0) {
; X32-LABEL: test_mm_cvtepu16_epi64:
@ -368,10 +368,10 @@ define <2 x i64> @test_mm_cvtepu16_epi64(<2 x i64> %a0) {
; X64-NEXT: pmovzxwq {{.*#+}} xmm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero
; X64-NEXT: retq
%arg0 = bitcast <2 x i64> %a0 to <8 x i16>
%zext = call <2 x i64> @llvm.x86.sse41.pmovzxwq(<8 x i16> %arg0)
ret <2 x i64> %zext
%ext0 = shufflevector <8 x i16> %arg0, <8 x i16> undef, <2 x i32> <i32 0, i32 1>
%sext = zext <2 x i16> %ext0 to <2 x i64>
ret <2 x i64> %sext
}
declare <2 x i64> @llvm.x86.sse41.pmovzxwq(<8 x i16>) nounwind readnone
define <2 x i64> @test_mm_cvtepu32_epi64(<2 x i64> %a0) {
; X32-LABEL: test_mm_cvtepu32_epi64:
@ -384,10 +384,10 @@ define <2 x i64> @test_mm_cvtepu32_epi64(<2 x i64> %a0) {
; X64-NEXT: pmovzxdq {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero
; X64-NEXT: retq
%arg0 = bitcast <2 x i64> %a0 to <4 x i32>
%zext = call <2 x i64> @llvm.x86.sse41.pmovzxdq(<4 x i32> %arg0)
ret <2 x i64> %zext
%ext0 = shufflevector <4 x i32> %arg0, <4 x i32> undef, <2 x i32> <i32 0, i32 1>
%sext = zext <2 x i32> %ext0 to <2 x i64>
ret <2 x i64> %sext
}
declare <2 x i64> @llvm.x86.sse41.pmovzxdq(<4 x i32>) nounwind readnone
define <2 x double> @test_mm_dp_pd(<2 x double> %a0, <2 x double> %a1) {
; X32-LABEL: test_mm_dp_pd:

View File

@ -145,3 +145,69 @@ define <2 x i64> @test_x86_sse41_pmovsxwq(<8 x i16> %a0) {
ret <2 x i64> %res
}
declare <2 x i64> @llvm.x86.sse41.pmovsxwq(<8 x i16>) nounwind readnone
define <4 x i32> @test_x86_sse41_pmovzxbd(<16 x i8> %a0) {
; CHECK-LABEL: test_x86_sse41_pmovzxbd:
; CHECK: ## BB#0:
; CHECK-NEXT: pmovzxbd {{.*#+}} xmm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero
; CHECK-NEXT: retl
%res = call <4 x i32> @llvm.x86.sse41.pmovzxbd(<16 x i8> %a0) ; <<4 x i32>> [#uses=1]
ret <4 x i32> %res
}
declare <4 x i32> @llvm.x86.sse41.pmovzxbd(<16 x i8>) nounwind readnone
define <2 x i64> @test_x86_sse41_pmovzxbq(<16 x i8> %a0) {
; CHECK-LABEL: test_x86_sse41_pmovzxbq:
; CHECK: ## BB#0:
; CHECK-NEXT: pmovzxbq {{.*#+}} xmm0 = xmm0[0],zero,zero,zero,zero,zero,zero,zero,xmm0[1],zero,zero,zero,zero,zero,zero,zero
; CHECK-NEXT: retl
%res = call <2 x i64> @llvm.x86.sse41.pmovzxbq(<16 x i8> %a0) ; <<2 x i64>> [#uses=1]
ret <2 x i64> %res
}
declare <2 x i64> @llvm.x86.sse41.pmovzxbq(<16 x i8>) nounwind readnone
define <8 x i16> @test_x86_sse41_pmovzxbw(<16 x i8> %a0) {
; CHECK-LABEL: test_x86_sse41_pmovzxbw:
; CHECK: ## BB#0:
; CHECK-NEXT: pmovzxbw {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero
; CHECK-NEXT: retl
%res = call <8 x i16> @llvm.x86.sse41.pmovzxbw(<16 x i8> %a0) ; <<8 x i16>> [#uses=1]
ret <8 x i16> %res
}
declare <8 x i16> @llvm.x86.sse41.pmovzxbw(<16 x i8>) nounwind readnone
define <2 x i64> @test_x86_sse41_pmovzxdq(<4 x i32> %a0) {
; CHECK-LABEL: test_x86_sse41_pmovzxdq:
; CHECK: ## BB#0:
; CHECK-NEXT: pmovzxdq {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero
; CHECK-NEXT: retl
%res = call <2 x i64> @llvm.x86.sse41.pmovzxdq(<4 x i32> %a0) ; <<2 x i64>> [#uses=1]
ret <2 x i64> %res
}
declare <2 x i64> @llvm.x86.sse41.pmovzxdq(<4 x i32>) nounwind readnone
define <4 x i32> @test_x86_sse41_pmovzxwd(<8 x i16> %a0) {
; CHECK-LABEL: test_x86_sse41_pmovzxwd:
; CHECK: ## BB#0:
; CHECK-NEXT: pmovzxwd {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero
; CHECK-NEXT: retl
%res = call <4 x i32> @llvm.x86.sse41.pmovzxwd(<8 x i16> %a0) ; <<4 x i32>> [#uses=1]
ret <4 x i32> %res
}
declare <4 x i32> @llvm.x86.sse41.pmovzxwd(<8 x i16>) nounwind readnone
define <2 x i64> @test_x86_sse41_pmovzxwq(<8 x i16> %a0) {
; CHECK-LABEL: test_x86_sse41_pmovzxwq:
; CHECK: ## BB#0:
; CHECK-NEXT: pmovzxwq {{.*#+}} xmm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero
; CHECK-NEXT: retl
%res = call <2 x i64> @llvm.x86.sse41.pmovzxwq(<8 x i16> %a0) ; <<2 x i64>> [#uses=1]
ret <2 x i64> %res
}
declare <2 x i64> @llvm.x86.sse41.pmovzxwq(<8 x i16>) nounwind readnone

View File

@ -284,102 +284,6 @@ define <8 x i16> @test_x86_sse41_pminuw(<8 x i16> %a0, <8 x i16> %a1) {
declare <8 x i16> @llvm.x86.sse41.pminuw(<8 x i16>, <8 x i16>) nounwind readnone
define <4 x i32> @test_x86_sse41_pmovzxbd(<16 x i8> %a0) {
; SSE41-LABEL: test_x86_sse41_pmovzxbd:
; SSE41: ## BB#0:
; SSE41-NEXT: pmovzxbd {{.*#+}} xmm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero
; SSE41-NEXT: retl
;
; KNL-LABEL: test_x86_sse41_pmovzxbd:
; KNL: ## BB#0:
; KNL-NEXT: vpmovzxbd {{.*#+}} xmm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero
; KNL-NEXT: retl
%res = call <4 x i32> @llvm.x86.sse41.pmovzxbd(<16 x i8> %a0) ; <<4 x i32>> [#uses=1]
ret <4 x i32> %res
}
declare <4 x i32> @llvm.x86.sse41.pmovzxbd(<16 x i8>) nounwind readnone
define <2 x i64> @test_x86_sse41_pmovzxbq(<16 x i8> %a0) {
; SSE41-LABEL: test_x86_sse41_pmovzxbq:
; SSE41: ## BB#0:
; SSE41-NEXT: pmovzxbq {{.*#+}} xmm0 = xmm0[0],zero,zero,zero,zero,zero,zero,zero,xmm0[1],zero,zero,zero,zero,zero,zero,zero
; SSE41-NEXT: retl
;
; KNL-LABEL: test_x86_sse41_pmovzxbq:
; KNL: ## BB#0:
; KNL-NEXT: vpmovzxbq {{.*#+}} xmm0 = xmm0[0],zero,zero,zero,zero,zero,zero,zero,xmm0[1],zero,zero,zero,zero,zero,zero,zero
; KNL-NEXT: retl
%res = call <2 x i64> @llvm.x86.sse41.pmovzxbq(<16 x i8> %a0) ; <<2 x i64>> [#uses=1]
ret <2 x i64> %res
}
declare <2 x i64> @llvm.x86.sse41.pmovzxbq(<16 x i8>) nounwind readnone
define <8 x i16> @test_x86_sse41_pmovzxbw(<16 x i8> %a0) {
; SSE41-LABEL: test_x86_sse41_pmovzxbw:
; SSE41: ## BB#0:
; SSE41-NEXT: pmovzxbw {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero
; SSE41-NEXT: retl
;
; KNL-LABEL: test_x86_sse41_pmovzxbw:
; KNL: ## BB#0:
; KNL-NEXT: vpmovzxbw {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero
; KNL-NEXT: retl
%res = call <8 x i16> @llvm.x86.sse41.pmovzxbw(<16 x i8> %a0) ; <<8 x i16>> [#uses=1]
ret <8 x i16> %res
}
declare <8 x i16> @llvm.x86.sse41.pmovzxbw(<16 x i8>) nounwind readnone
define <2 x i64> @test_x86_sse41_pmovzxdq(<4 x i32> %a0) {
; SSE41-LABEL: test_x86_sse41_pmovzxdq:
; SSE41: ## BB#0:
; SSE41-NEXT: pmovzxdq {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero
; SSE41-NEXT: retl
;
; KNL-LABEL: test_x86_sse41_pmovzxdq:
; KNL: ## BB#0:
; KNL-NEXT: vpmovzxdq {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero
; KNL-NEXT: retl
%res = call <2 x i64> @llvm.x86.sse41.pmovzxdq(<4 x i32> %a0) ; <<2 x i64>> [#uses=1]
ret <2 x i64> %res
}
declare <2 x i64> @llvm.x86.sse41.pmovzxdq(<4 x i32>) nounwind readnone
define <4 x i32> @test_x86_sse41_pmovzxwd(<8 x i16> %a0) {
; SSE41-LABEL: test_x86_sse41_pmovzxwd:
; SSE41: ## BB#0:
; SSE41-NEXT: pmovzxwd {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero
; SSE41-NEXT: retl
;
; KNL-LABEL: test_x86_sse41_pmovzxwd:
; KNL: ## BB#0:
; KNL-NEXT: vpmovzxwd {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero
; KNL-NEXT: retl
%res = call <4 x i32> @llvm.x86.sse41.pmovzxwd(<8 x i16> %a0) ; <<4 x i32>> [#uses=1]
ret <4 x i32> %res
}
declare <4 x i32> @llvm.x86.sse41.pmovzxwd(<8 x i16>) nounwind readnone
define <2 x i64> @test_x86_sse41_pmovzxwq(<8 x i16> %a0) {
; SSE41-LABEL: test_x86_sse41_pmovzxwq:
; SSE41: ## BB#0:
; SSE41-NEXT: pmovzxwq {{.*#+}} xmm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero
; SSE41-NEXT: retl
;
; KNL-LABEL: test_x86_sse41_pmovzxwq:
; KNL: ## BB#0:
; KNL-NEXT: vpmovzxwq {{.*#+}} xmm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero
; KNL-NEXT: retl
%res = call <2 x i64> @llvm.x86.sse41.pmovzxwq(<8 x i16> %a0) ; <<2 x i64>> [#uses=1]
ret <2 x i64> %res
}
declare <2 x i64> @llvm.x86.sse41.pmovzxwq(<8 x i16>) nounwind readnone
define <2 x i64> @test_x86_sse41_pmuldq(<4 x i32> %a0, <4 x i32> %a1) {
; SSE41-LABEL: test_x86_sse41_pmuldq:
; SSE41: ## BB#0:

View File

@ -109,8 +109,9 @@ define <8 x i16> @test_llvm_x86_sse41_pmovzxbw(<16 x i8>* %a) {
; AVX-NEXT: vpmovzxbw {{.*#+}} xmm0 = mem[0],zero,mem[1],zero,mem[2],zero,mem[3],zero,mem[4],zero,mem[5],zero,mem[6],zero,mem[7],zero
; AVX-NEXT: retq
%1 = load <16 x i8>, <16 x i8>* %a, align 1
%2 = call <8 x i16> @llvm.x86.sse41.pmovzxbw(<16 x i8> %1)
ret <8 x i16> %2
%2 = shufflevector <16 x i8> %1, <16 x i8> undef, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
%3 = zext <8 x i8> %2 to <8 x i16>
ret <8 x i16> %3
}
define <4 x i32> @test_llvm_x86_sse41_pmovzxbd(<16 x i8>* %a) {
@ -124,8 +125,9 @@ define <4 x i32> @test_llvm_x86_sse41_pmovzxbd(<16 x i8>* %a) {
; AVX-NEXT: vpmovzxbd {{.*#+}} xmm0 = mem[0],zero,zero,zero,mem[1],zero,zero,zero,mem[2],zero,zero,zero,mem[3],zero,zero,zero
; AVX-NEXT: retq
%1 = load <16 x i8>, <16 x i8>* %a, align 1
%2 = call <4 x i32> @llvm.x86.sse41.pmovzxbd(<16 x i8> %1)
ret <4 x i32> %2
%2 = shufflevector <16 x i8> %1, <16 x i8> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
%3 = zext <4 x i8> %2 to <4 x i32>
ret <4 x i32> %3
}
define <2 x i64> @test_llvm_x86_sse41_pmovzxbq(<16 x i8>* %a) {
@ -139,8 +141,9 @@ define <2 x i64> @test_llvm_x86_sse41_pmovzxbq(<16 x i8>* %a) {
; AVX-NEXT: vpmovzxbq {{.*#+}} xmm0 = mem[0],zero,zero,zero,zero,zero,zero,zero,mem[1],zero,zero,zero,zero,zero,zero,zero
; AVX-NEXT: retq
%1 = load <16 x i8>, <16 x i8>* %a, align 1
%2 = call <2 x i64> @llvm.x86.sse41.pmovzxbq(<16 x i8> %1)
ret <2 x i64> %2
%2 = shufflevector <16 x i8> %1, <16 x i8> undef, <2 x i32> <i32 0, i32 1>
%3 = zext <2 x i8> %2 to <2 x i64>
ret <2 x i64> %3
}
define <4 x i32> @test_llvm_x86_sse41_pmovzxwd(<8 x i16>* %a) {
@ -154,8 +157,9 @@ define <4 x i32> @test_llvm_x86_sse41_pmovzxwd(<8 x i16>* %a) {
; AVX-NEXT: vpmovzxwd {{.*#+}} xmm0 = mem[0],zero,mem[1],zero,mem[2],zero,mem[3],zero
; AVX-NEXT: retq
%1 = load <8 x i16>, <8 x i16>* %a, align 1
%2 = call <4 x i32> @llvm.x86.sse41.pmovzxwd(<8 x i16> %1)
ret <4 x i32> %2
%2 = shufflevector <8 x i16> %1, <8 x i16> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
%3 = zext <4 x i16> %2 to <4 x i32>
ret <4 x i32> %3
}
define <2 x i64> @test_llvm_x86_sse41_pmovzxwq(<8 x i16>* %a) {
@ -169,8 +173,9 @@ define <2 x i64> @test_llvm_x86_sse41_pmovzxwq(<8 x i16>* %a) {
; AVX-NEXT: vpmovzxwq {{.*#+}} xmm0 = mem[0],zero,zero,zero,mem[1],zero,zero,zero
; AVX-NEXT: retq
%1 = load <8 x i16>, <8 x i16>* %a, align 1
%2 = call <2 x i64> @llvm.x86.sse41.pmovzxwq(<8 x i16> %1)
ret <2 x i64> %2
%2 = shufflevector <8 x i16> %1, <8 x i16> undef, <2 x i32> <i32 0, i32 1>
%3 = zext <2 x i16> %2 to <2 x i64>
ret <2 x i64> %3
}
define <2 x i64> @test_llvm_x86_sse41_pmovzxdq(<4 x i32>* %a) {
@ -184,13 +189,7 @@ define <2 x i64> @test_llvm_x86_sse41_pmovzxdq(<4 x i32>* %a) {
; AVX-NEXT: vpmovzxdq {{.*#+}} xmm0 = mem[0],zero,mem[1],zero
; AVX-NEXT: retq
%1 = load <4 x i32>, <4 x i32>* %a, align 1
%2 = call <2 x i64> @llvm.x86.sse41.pmovzxdq(<4 x i32> %1)
ret <2 x i64> %2
%2 = shufflevector <4 x i32> %1, <4 x i32> undef, <2 x i32> <i32 0, i32 1>
%3 = zext <2 x i32> %2 to <2 x i64>
ret <2 x i64> %3
}
declare <2 x i64> @llvm.x86.sse41.pmovzxdq(<4 x i32>)
declare <2 x i64> @llvm.x86.sse41.pmovzxwq(<8 x i16>)
declare <4 x i32> @llvm.x86.sse41.pmovzxwd(<8 x i16>)
declare <2 x i64> @llvm.x86.sse41.pmovzxbq(<16 x i8>)
declare <4 x i32> @llvm.x86.sse41.pmovzxbd(<16 x i8>)
declare <8 x i16> @llvm.x86.sse41.pmovzxbw(<16 x i8>)

View File

@ -662,19 +662,19 @@ define <8 x i32> @stack_fold_pmovsxbd(<16 x i8> %a0) {
;CHECK-LABEL: stack_fold_pmovsxbd
;CHECK: vpmovsxbd {{-?[0-9]*}}(%rsp), {{%ymm[0-9][0-9]*}} {{.*#+}} 16-byte Folded Reload
%1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm1},~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"()
%2 = call <8 x i32> @llvm.x86.avx2.pmovsxbd(<16 x i8> %a0)
ret <8 x i32> %2
%2 = shufflevector <16 x i8> %a0, <16 x i8> undef, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
%3 = sext <8 x i8> %2 to <8 x i32>
ret <8 x i32> %3
}
declare <8 x i32> @llvm.x86.avx2.pmovsxbd(<16 x i8>) nounwind readnone
define <4 x i64> @stack_fold_pmovsxbq(<16 x i8> %a0) {
;CHECK-LABEL: stack_fold_pmovsxbq
;CHECK: pmovsxbq {{-?[0-9]*}}(%rsp), {{%ymm[0-9][0-9]*}} {{.*#+}} 16-byte Folded Reload
%1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm1},~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"()
%2 = call <4 x i64> @llvm.x86.avx2.pmovsxbq(<16 x i8> %a0)
ret <4 x i64> %2
%2 = shufflevector <16 x i8> %a0, <16 x i8> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
%3 = sext <4 x i8> %2 to <4 x i64>
ret <4 x i64> %3
}
declare <4 x i64> @llvm.x86.avx2.pmovsxbq(<16 x i8>) nounwind readnone
define <16 x i16> @stack_fold_pmovsxbw(<16 x i8> %a0) {
;CHECK-LABEL: stack_fold_pmovsxbw
@ -704,64 +704,61 @@ define <4 x i64> @stack_fold_pmovsxwq(<8 x i16> %a0) {
;CHECK-LABEL: stack_fold_pmovsxwq
;CHECK: vpmovsxwq {{-?[0-9]*}}(%rsp), {{%ymm[0-9][0-9]*}} {{.*#+}} 16-byte Folded Reload
%1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm1},~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"()
%2 = call <4 x i64> @llvm.x86.avx2.pmovsxwq(<8 x i16> %a0)
ret <4 x i64> %2
%2 = shufflevector <8 x i16> %a0, <8 x i16> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
%3 = sext <4 x i16> %2 to <4 x i64>
ret <4 x i64> %3
}
declare <4 x i64> @llvm.x86.avx2.pmovsxwq(<8 x i16>) nounwind readnone
define <8 x i32> @stack_fold_pmovzxbd(<16 x i8> %a0) {
;CHECK-LABEL: stack_fold_pmovzxbd
;CHECK: vpmovzxbd {{-?[0-9]*}}(%rsp), {{%ymm[0-9][0-9]*}} {{.*#+}} 16-byte Folded Reload
%1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm1},~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"()
%2 = call <8 x i32> @llvm.x86.avx2.pmovzxbd(<16 x i8> %a0)
ret <8 x i32> %2
%2 = shufflevector <16 x i8> %a0, <16 x i8> undef, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
%3 = zext <8 x i8> %2 to <8 x i32>
ret <8 x i32> %3
}
declare <8 x i32> @llvm.x86.avx2.pmovzxbd(<16 x i8>) nounwind readnone
define <4 x i64> @stack_fold_pmovzxbq(<16 x i8> %a0) {
;CHECK-LABEL: stack_fold_pmovzxbq
;CHECK: vpmovzxbq {{-?[0-9]*}}(%rsp), {{%ymm[0-9][0-9]*}} {{.*#+}} 16-byte Folded Reload
%1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm1},~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"()
%2 = call <4 x i64> @llvm.x86.avx2.pmovzxbq(<16 x i8> %a0)
ret <4 x i64> %2
%2 = shufflevector <16 x i8> %a0, <16 x i8> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
%3 = zext <4 x i8> %2 to <4 x i64>
ret <4 x i64> %3
}
declare <4 x i64> @llvm.x86.avx2.pmovzxbq(<16 x i8>) nounwind readnone
define <16 x i16> @stack_fold_pmovzxbw(<16 x i8> %a0) {
;CHECK-LABEL: stack_fold_pmovzxbw
;CHECK: vpmovzxbw {{-?[0-9]*}}(%rsp), {{%ymm[0-9][0-9]*}} {{.*#+}} 16-byte Folded Reload
%1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm1},~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"()
%2 = call <16 x i16> @llvm.x86.avx2.pmovzxbw(<16 x i8> %a0)
%2 = zext <16 x i8> %a0 to <16 x i16>
ret <16 x i16> %2
}
declare <16 x i16> @llvm.x86.avx2.pmovzxbw(<16 x i8>) nounwind readnone
define <4 x i64> @stack_fold_pmovzxdq(<4 x i32> %a0) {
;CHECK-LABEL: stack_fold_pmovzxdq
;CHECK: vpmovzxdq {{-?[0-9]*}}(%rsp), {{%ymm[0-9][0-9]*}} {{.*#+}} 16-byte Folded Reload
%1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm1},~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"()
%2 = call <4 x i64> @llvm.x86.avx2.pmovzxdq(<4 x i32> %a0)
%2 = zext <4 x i32> %a0 to <4 x i64>
ret <4 x i64> %2
}
declare <4 x i64> @llvm.x86.avx2.pmovzxdq(<4 x i32>) nounwind readnone
define <8 x i32> @stack_fold_pmovzxwd(<8 x i16> %a0) {
;CHECK-LABEL: stack_fold_pmovzxwd
;CHECK: vpmovzxwd {{-?[0-9]*}}(%rsp), {{%ymm[0-9][0-9]*}} {{.*#+}} 16-byte Folded Reload
%1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm1},~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"()
%2 = call <8 x i32> @llvm.x86.avx2.pmovzxwd(<8 x i16> %a0)
%2 = zext <8 x i16> %a0 to <8 x i32>
ret <8 x i32> %2
}
declare <8 x i32> @llvm.x86.avx2.pmovzxwd(<8 x i16>) nounwind readnone
define <4 x i64> @stack_fold_pmovzxwq(<8 x i16> %a0) {
;CHECK-LABEL: stack_fold_pmovzxwq
;CHECK: vpmovzxwq {{-?[0-9]*}}(%rsp), {{%ymm[0-9][0-9]*}} {{.*#+}} 16-byte Folded Reload
%1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm1},~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"()
%2 = call <4 x i64> @llvm.x86.avx2.pmovzxwq(<8 x i16> %a0)
ret <4 x i64> %2
%2 = shufflevector <8 x i16> %a0, <8 x i16> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
%3 = zext <4 x i16> %2 to <4 x i64>
ret <4 x i64> %3
}
declare <4 x i64> @llvm.x86.avx2.pmovzxwq(<8 x i16>) nounwind readnone
define <4 x i64> @stack_fold_pmuldq(<8 x i32> %a0, <8 x i32> %a1) {
;CHECK-LABEL: stack_fold_pmuldq

View File

@ -1,70 +0,0 @@
; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
; RUN: opt < %s -instcombine -S | FileCheck %s
declare <8 x i32> @llvm.x86.avx2.pmovsxbd(<16 x i8>) nounwind readnone
declare <4 x i64> @llvm.x86.avx2.pmovsxbq(<16 x i8>) nounwind readnone
declare <16 x i16> @llvm.x86.avx2.pmovsxbw(<16 x i8>) nounwind readnone
declare <4 x i64> @llvm.x86.avx2.pmovsxdq(<4 x i32>) nounwind readnone
declare <8 x i32> @llvm.x86.avx2.pmovsxwd(<8 x i16>) nounwind readnone
declare <4 x i64> @llvm.x86.avx2.pmovsxwq(<8 x i16>) nounwind readnone
;
; Basic sign extension tests
;
define <8 x i32> @avx2_pmovsxbd(<16 x i8> %v) nounwind readnone {
; CHECK-LABEL: @avx2_pmovsxbd(
; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <16 x i8> %v, <16 x i8> undef, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
; CHECK-NEXT: [[TMP2:%.*]] = sext <8 x i8> [[TMP1]] to <8 x i32>
; CHECK-NEXT: ret <8 x i32> [[TMP2]]
;
%res = call <8 x i32> @llvm.x86.avx2.pmovsxbd(<16 x i8> %v)
ret <8 x i32> %res
}
define <4 x i64> @avx2_pmovsxbq(<16 x i8> %v) nounwind readnone {
; CHECK-LABEL: @avx2_pmovsxbq(
; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <16 x i8> %v, <16 x i8> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
; CHECK-NEXT: [[TMP2:%.*]] = sext <4 x i8> [[TMP1]] to <4 x i64>
; CHECK-NEXT: ret <4 x i64> [[TMP2]]
;
%res = call <4 x i64> @llvm.x86.avx2.pmovsxbq(<16 x i8> %v)
ret <4 x i64> %res
}
define <16 x i16> @avx2_pmovsxbw(<16 x i8> %v) nounwind readnone {
; CHECK-LABEL: @avx2_pmovsxbw(
; CHECK-NEXT: [[TMP1:%.*]] = sext <16 x i8> %v to <16 x i16>
; CHECK-NEXT: ret <16 x i16> [[TMP1]]
;
%res = call <16 x i16> @llvm.x86.avx2.pmovsxbw(<16 x i8> %v)
ret <16 x i16> %res
}
define <4 x i64> @avx2_pmovsxdq(<4 x i32> %v) nounwind readnone {
; CHECK-LABEL: @avx2_pmovsxdq(
; CHECK-NEXT: [[TMP1:%.*]] = sext <4 x i32> %v to <4 x i64>
; CHECK-NEXT: ret <4 x i64> [[TMP1]]
;
%res = call <4 x i64> @llvm.x86.avx2.pmovsxdq(<4 x i32> %v)
ret <4 x i64> %res
}
define <8 x i32> @avx2_pmovsxwd(<8 x i16> %v) nounwind readnone {
; CHECK-LABEL: @avx2_pmovsxwd(
; CHECK-NEXT: [[TMP1:%.*]] = sext <8 x i16> %v to <8 x i32>
; CHECK-NEXT: ret <8 x i32> [[TMP1]]
;
%res = call <8 x i32> @llvm.x86.avx2.pmovsxwd(<8 x i16> %v)
ret <8 x i32> %res
}
define <4 x i64> @avx2_pmovsxwq(<8 x i16> %v) nounwind readnone {
; CHECK-LABEL: @avx2_pmovsxwq(
; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <8 x i16> %v, <8 x i16> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
; CHECK-NEXT: [[TMP2:%.*]] = sext <4 x i16> [[TMP1]] to <4 x i64>
; CHECK-NEXT: ret <4 x i64> [[TMP2]]
;
%res = call <4 x i64> @llvm.x86.avx2.pmovsxwq(<8 x i16> %v)
ret <4 x i64> %res
}

View File

@ -1,137 +0,0 @@
; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
; RUN: opt < %s -instcombine -S | FileCheck %s
declare <4 x i32> @llvm.x86.sse41.pmovzxbd(<16 x i8>) nounwind readnone
declare <2 x i64> @llvm.x86.sse41.pmovzxbq(<16 x i8>) nounwind readnone
declare <8 x i16> @llvm.x86.sse41.pmovzxbw(<16 x i8>) nounwind readnone
declare <2 x i64> @llvm.x86.sse41.pmovzxdq(<4 x i32>) nounwind readnone
declare <4 x i32> @llvm.x86.sse41.pmovzxwd(<8 x i16>) nounwind readnone
declare <2 x i64> @llvm.x86.sse41.pmovzxwq(<8 x i16>) nounwind readnone
declare <8 x i32> @llvm.x86.avx2.pmovzxbd(<16 x i8>) nounwind readnone
declare <4 x i64> @llvm.x86.avx2.pmovzxbq(<16 x i8>) nounwind readnone
declare <16 x i16> @llvm.x86.avx2.pmovzxbw(<16 x i8>) nounwind readnone
declare <4 x i64> @llvm.x86.avx2.pmovzxdq(<4 x i32>) nounwind readnone
declare <8 x i32> @llvm.x86.avx2.pmovzxwd(<8 x i16>) nounwind readnone
declare <4 x i64> @llvm.x86.avx2.pmovzxwq(<8 x i16>) nounwind readnone
;
; Basic zero extension tests
;
define <4 x i32> @sse41_pmovzxbd(<16 x i8> %v) nounwind readnone {
; CHECK-LABEL: @sse41_pmovzxbd(
; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <16 x i8> %v, <16 x i8> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
; CHECK-NEXT: [[TMP2:%.*]] = zext <4 x i8> [[TMP1]] to <4 x i32>
; CHECK-NEXT: ret <4 x i32> [[TMP2]]
;
%res = call <4 x i32> @llvm.x86.sse41.pmovzxbd(<16 x i8> %v)
ret <4 x i32> %res
}
define <2 x i64> @sse41_pmovzxbq(<16 x i8> %v) nounwind readnone {
; CHECK-LABEL: @sse41_pmovzxbq(
; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <16 x i8> %v, <16 x i8> undef, <2 x i32> <i32 0, i32 1>
; CHECK-NEXT: [[TMP2:%.*]] = zext <2 x i8> [[TMP1]] to <2 x i64>
; CHECK-NEXT: ret <2 x i64> [[TMP2]]
;
%res = call <2 x i64> @llvm.x86.sse41.pmovzxbq(<16 x i8> %v)
ret <2 x i64> %res
}
define <8 x i16> @sse41_pmovzxbw(<16 x i8> %v) nounwind readnone {
; CHECK-LABEL: @sse41_pmovzxbw(
; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <16 x i8> %v, <16 x i8> undef, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
; CHECK-NEXT: [[TMP2:%.*]] = zext <8 x i8> [[TMP1]] to <8 x i16>
; CHECK-NEXT: ret <8 x i16> [[TMP2]]
;
%res = call <8 x i16> @llvm.x86.sse41.pmovzxbw(<16 x i8> %v)
ret <8 x i16> %res
}
define <2 x i64> @sse41_pmovzxdq(<4 x i32> %v) nounwind readnone {
; CHECK-LABEL: @sse41_pmovzxdq(
; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <4 x i32> %v, <4 x i32> undef, <2 x i32> <i32 0, i32 1>
; CHECK-NEXT: [[TMP2:%.*]] = zext <2 x i32> [[TMP1]] to <2 x i64>
; CHECK-NEXT: ret <2 x i64> [[TMP2]]
;
%res = call <2 x i64> @llvm.x86.sse41.pmovzxdq(<4 x i32> %v)
ret <2 x i64> %res
}
define <4 x i32> @sse41_pmovzxwd(<8 x i16> %v) nounwind readnone {
; CHECK-LABEL: @sse41_pmovzxwd(
; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <8 x i16> %v, <8 x i16> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
; CHECK-NEXT: [[TMP2:%.*]] = zext <4 x i16> [[TMP1]] to <4 x i32>
; CHECK-NEXT: ret <4 x i32> [[TMP2]]
;
%res = call <4 x i32> @llvm.x86.sse41.pmovzxwd(<8 x i16> %v)
ret <4 x i32> %res
}
define <2 x i64> @sse41_pmovzxwq(<8 x i16> %v) nounwind readnone {
; CHECK-LABEL: @sse41_pmovzxwq(
; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <8 x i16> %v, <8 x i16> undef, <2 x i32> <i32 0, i32 1>
; CHECK-NEXT: [[TMP2:%.*]] = zext <2 x i16> [[TMP1]] to <2 x i64>
; CHECK-NEXT: ret <2 x i64> [[TMP2]]
;
%res = call <2 x i64> @llvm.x86.sse41.pmovzxwq(<8 x i16> %v)
ret <2 x i64> %res
}
define <8 x i32> @avx2_pmovzxbd(<16 x i8> %v) nounwind readnone {
; CHECK-LABEL: @avx2_pmovzxbd(
; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <16 x i8> %v, <16 x i8> undef, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
; CHECK-NEXT: [[TMP2:%.*]] = zext <8 x i8> [[TMP1]] to <8 x i32>
; CHECK-NEXT: ret <8 x i32> [[TMP2]]
;
%res = call <8 x i32> @llvm.x86.avx2.pmovzxbd(<16 x i8> %v)
ret <8 x i32> %res
}
define <4 x i64> @avx2_pmovzxbq(<16 x i8> %v) nounwind readnone {
; CHECK-LABEL: @avx2_pmovzxbq(
; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <16 x i8> %v, <16 x i8> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
; CHECK-NEXT: [[TMP2:%.*]] = zext <4 x i8> [[TMP1]] to <4 x i64>
; CHECK-NEXT: ret <4 x i64> [[TMP2]]
;
%res = call <4 x i64> @llvm.x86.avx2.pmovzxbq(<16 x i8> %v)
ret <4 x i64> %res
}
define <16 x i16> @avx2_pmovzxbw(<16 x i8> %v) nounwind readnone {
; CHECK-LABEL: @avx2_pmovzxbw(
; CHECK-NEXT: [[TMP1:%.*]] = zext <16 x i8> %v to <16 x i16>
; CHECK-NEXT: ret <16 x i16> [[TMP1]]
;
%res = call <16 x i16> @llvm.x86.avx2.pmovzxbw(<16 x i8> %v)
ret <16 x i16> %res
}
define <4 x i64> @avx2_pmovzxdq(<4 x i32> %v) nounwind readnone {
; CHECK-LABEL: @avx2_pmovzxdq(
; CHECK-NEXT: [[TMP1:%.*]] = zext <4 x i32> %v to <4 x i64>
; CHECK-NEXT: ret <4 x i64> [[TMP1]]
;
%res = call <4 x i64> @llvm.x86.avx2.pmovzxdq(<4 x i32> %v)
ret <4 x i64> %res
}
define <8 x i32> @avx2_pmovzxwd(<8 x i16> %v) nounwind readnone {
; CHECK-LABEL: @avx2_pmovzxwd(
; CHECK-NEXT: [[TMP1:%.*]] = zext <8 x i16> %v to <8 x i32>
; CHECK-NEXT: ret <8 x i32> [[TMP1]]
;
%res = call <8 x i32> @llvm.x86.avx2.pmovzxwd(<8 x i16> %v)
ret <8 x i32> %res
}
define <4 x i64> @avx2_pmovzxwq(<8 x i16> %v) nounwind readnone {
; CHECK-LABEL: @avx2_pmovzxwq(
; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <8 x i16> %v, <8 x i16> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
; CHECK-NEXT: [[TMP2:%.*]] = zext <4 x i16> [[TMP1]] to <4 x i64>
; CHECK-NEXT: ret <4 x i64> [[TMP2]]
;
%res = call <4 x i64> @llvm.x86.avx2.pmovzxwq(<8 x i16> %v)
ret <4 x i64> %res
}