forked from OSchip/llvm-project
[X86] int_x86_avx2_permps and X86ISD::VPERMV should take an integer vector for its shuffle indices.
llvm-svn: 254269
This commit is contained in:
parent
5237b3991d
commit
ecae476e4c
|
@ -2431,7 +2431,7 @@ let TargetPrefix = "x86" in { // All intrinsics start with "llvm.x86.".
|
|||
Intrinsic<[llvm_v8i32_ty], [llvm_v8i32_ty, llvm_v8i32_ty],
|
||||
[IntrNoMem]>;
|
||||
def int_x86_avx2_permps : GCCBuiltin<"__builtin_ia32_permvarsf256">,
|
||||
Intrinsic<[llvm_v8f32_ty], [llvm_v8f32_ty, llvm_v8f32_ty],
|
||||
Intrinsic<[llvm_v8f32_ty], [llvm_v8f32_ty, llvm_v8i32_ty],
|
||||
[IntrNoMem]>;
|
||||
def int_x86_avx2_vperm2i128 : GCCBuiltin<"__builtin_ia32_permti256">,
|
||||
Intrinsic<[llvm_v4i64_ty], [llvm_v4i64_ty,
|
||||
|
|
|
@ -10539,9 +10539,7 @@ static SDValue lowerV8F32VectorShuffle(SDValue Op, SDValue V1, SDValue V2,
|
|||
if (Subtarget->hasAVX2())
|
||||
return DAG.getNode(
|
||||
X86ISD::VPERMV, DL, MVT::v8f32,
|
||||
DAG.getBitcast(MVT::v8f32, DAG.getNode(ISD::BUILD_VECTOR, DL,
|
||||
MVT::v8i32, VPermMask)),
|
||||
V1);
|
||||
DAG.getNode(ISD::BUILD_VECTOR, DL, MVT::v8i32, VPermMask), V1);
|
||||
|
||||
// Otherwise, fall back.
|
||||
return lowerVectorShuffleAsLanePermuteAndBlend(DL, MVT::v8f32, V1, V2, Mask,
|
||||
|
|
|
@ -376,7 +376,11 @@ def X86vpmaddwd : SDNode<"X86ISD::VPMADDWD" , SDTPack>;
|
|||
|
||||
def X86VPermilpv : SDNode<"X86ISD::VPERMILPV", SDTShuff2OpM>;
|
||||
def X86VPermilpi : SDNode<"X86ISD::VPERMILPI", SDTShuff2OpI>;
|
||||
def X86VPermv : SDNode<"X86ISD::VPERMV", SDTShuff2Op>;
|
||||
def X86VPermv : SDNode<"X86ISD::VPERMV",
|
||||
SDTypeProfile<1, 2, [SDTCisVec<0>, SDTCisInt<1>,
|
||||
SDTCisSameNumEltsAs<0,1>,
|
||||
SDTCisSameSizeAs<0,1>,
|
||||
SDTCisSameAs<0,2>]>>;
|
||||
def X86VPermi : SDNode<"X86ISD::VPERMI", SDTShuff2OpI>;
|
||||
def X86VPermt2 : SDNode<"X86ISD::VPERMV3",
|
||||
SDTypeProfile<1, 3, [SDTCisVec<0>,
|
||||
|
|
|
@ -668,15 +668,15 @@ define <8 x i32> @test_x86_avx2_permd(<8 x i32> %a0, <8 x i32> %a1) {
|
|||
declare <8 x i32> @llvm.x86.avx2.permd(<8 x i32>, <8 x i32>) nounwind readonly
|
||||
|
||||
|
||||
define <8 x float> @test_x86_avx2_permps(<8 x float> %a0, <8 x float> %a1) {
|
||||
define <8 x float> @test_x86_avx2_permps(<8 x float> %a0, <8 x i32> %a1) {
|
||||
; Check that the arguments are swapped between the intrinsic definition
|
||||
; and its lowering. Indeed, the offsets are the first source in
|
||||
; the instruction.
|
||||
; CHECK: vpermps %ymm0, %ymm1, %ymm0
|
||||
%res = call <8 x float> @llvm.x86.avx2.permps(<8 x float> %a0, <8 x float> %a1) ; <<8 x float>> [#uses=1]
|
||||
%res = call <8 x float> @llvm.x86.avx2.permps(<8 x float> %a0, <8 x i32> %a1) ; <<8 x float>> [#uses=1]
|
||||
ret <8 x float> %res
|
||||
}
|
||||
declare <8 x float> @llvm.x86.avx2.permps(<8 x float>, <8 x float>) nounwind readonly
|
||||
declare <8 x float> @llvm.x86.avx2.permps(<8 x float>, <8 x i32>) nounwind readonly
|
||||
|
||||
|
||||
define <4 x i64> @test_x86_avx2_vperm2i128(<4 x i64> %a0, <4 x i64> %a1) {
|
||||
|
|
|
@ -455,14 +455,14 @@ define <4 x double> @stack_fold_permpd(<4 x double> %a0) {
|
|||
ret <4 x double> %3
|
||||
}
|
||||
|
||||
define <8 x float> @stack_fold_permps(<8 x float> %a0, <8 x float> %a1) {
|
||||
define <8 x float> @stack_fold_permps(<8 x i32> %a0, <8 x float> %a1) {
|
||||
;CHECK-LABEL: stack_fold_permps
|
||||
;CHECK: vpermps {{-?[0-9]*}}(%rsp), {{%ymm[0-9][0-9]*}}, {{%ymm[0-9][0-9]*}} {{.*#+}} 32-byte Folded Reload
|
||||
%1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"()
|
||||
%2 = call <8 x float> @llvm.x86.avx2.permps(<8 x float> %a1, <8 x float> %a0)
|
||||
%2 = call <8 x float> @llvm.x86.avx2.permps(<8 x float> %a1, <8 x i32> %a0)
|
||||
ret <8 x float> %2
|
||||
}
|
||||
declare <8 x float> @llvm.x86.avx2.permps(<8 x float>, <8 x float>) nounwind readonly
|
||||
declare <8 x float> @llvm.x86.avx2.permps(<8 x float>, <8 x i32>) nounwind readonly
|
||||
|
||||
define <4 x i64> @stack_fold_permq(<4 x i64> %a0) {
|
||||
;CHECK-LABEL: stack_fold_permq
|
||||
|
|
Loading…
Reference in New Issue