forked from OSchip/llvm-project
[X86] Don't zero the upper bits of the k-register before extracting a single bit from a vXi1.
This doesn't match the semantics of the extract_vector_elt operation. Nothing downstream knows the bits were zeroed so they still get masked or sign extended after the extrat anyway. llvm-svn: 320723
This commit is contained in:
parent
708c9f5947
commit
600f1ba333
|
@ -14528,12 +14528,9 @@ static SDValue ExtractBitFromMaskVector(SDValue Op, SelectionDAG &DAG,
|
|||
|
||||
// Use kshiftlw/rw instruction.
|
||||
unsigned IdxVal = cast<ConstantSDNode>(Idx)->getZExtValue();
|
||||
unsigned MaxShift = VecVT.getVectorNumElements() - 1;
|
||||
if (MaxShift - IdxVal)
|
||||
Vec = DAG.getNode(X86ISD::KSHIFTL, dl, VecVT, Vec,
|
||||
DAG.getConstant(MaxShift - IdxVal, dl, MVT::i8));
|
||||
Vec = DAG.getNode(X86ISD::KSHIFTR, dl, VecVT, Vec,
|
||||
DAG.getConstant(MaxShift, dl, MVT::i8));
|
||||
if (IdxVal != 0)
|
||||
Vec = DAG.getNode(X86ISD::KSHIFTR, dl, VecVT, Vec,
|
||||
DAG.getConstant(IdxVal, dl, MVT::i8));
|
||||
return DAG.getNode(X86ISD::VEXTRACT, dl, Op.getSimpleValueType(), Vec,
|
||||
DAG.getIntPtrConstant(0, dl));
|
||||
}
|
||||
|
|
|
@ -158,8 +158,7 @@ define <16 x i32> @test11(<16 x i32>%a, <16 x i32>%b) {
|
|||
; KNL-LABEL: test11:
|
||||
; KNL: ## %bb.0:
|
||||
; KNL-NEXT: vpcmpltud %zmm1, %zmm0, %k0
|
||||
; KNL-NEXT: kshiftlw $11, %k0, %k0
|
||||
; KNL-NEXT: kshiftrw $15, %k0, %k0
|
||||
; KNL-NEXT: kshiftrw $4, %k0, %k0
|
||||
; KNL-NEXT: kmovw %k0, %eax
|
||||
; KNL-NEXT: testb $1, %al
|
||||
; KNL-NEXT: je LBB10_2
|
||||
|
@ -173,8 +172,7 @@ define <16 x i32> @test11(<16 x i32>%a, <16 x i32>%b) {
|
|||
; SKX-LABEL: test11:
|
||||
; SKX: ## %bb.0:
|
||||
; SKX-NEXT: vpcmpltud %zmm1, %zmm0, %k0
|
||||
; SKX-NEXT: kshiftlw $11, %k0, %k0
|
||||
; SKX-NEXT: kshiftrw $15, %k0, %k0
|
||||
; SKX-NEXT: kshiftrw $4, %k0, %k0
|
||||
; SKX-NEXT: kmovd %k0, %eax
|
||||
; SKX-NEXT: testb $1, %al
|
||||
; SKX-NEXT: je LBB10_2
|
||||
|
@ -198,8 +196,6 @@ define i64 @test12(<16 x i64>%a, <16 x i64>%b, i64 %a1, i64 %b1) {
|
|||
; KNL-LABEL: test12:
|
||||
; KNL: ## %bb.0:
|
||||
; KNL-NEXT: vpcmpgtq %zmm0, %zmm2, %k0
|
||||
; KNL-NEXT: kshiftlw $15, %k0, %k0
|
||||
; KNL-NEXT: kshiftrw $15, %k0, %k0
|
||||
; KNL-NEXT: kmovw %k0, %eax
|
||||
; KNL-NEXT: testb $1, %al
|
||||
; KNL-NEXT: cmoveq %rsi, %rdi
|
||||
|
@ -210,8 +206,6 @@ define i64 @test12(<16 x i64>%a, <16 x i64>%b, i64 %a1, i64 %b1) {
|
|||
; SKX-LABEL: test12:
|
||||
; SKX: ## %bb.0:
|
||||
; SKX-NEXT: vpcmpgtq %zmm0, %zmm2, %k0
|
||||
; SKX-NEXT: kshiftlb $7, %k0, %k0
|
||||
; SKX-NEXT: kshiftrb $7, %k0, %k0
|
||||
; SKX-NEXT: kmovd %k0, %eax
|
||||
; SKX-NEXT: testb $1, %al
|
||||
; SKX-NEXT: cmoveq %rsi, %rdi
|
||||
|
@ -264,8 +258,7 @@ define i64 @test14(<8 x i64>%a, <8 x i64>%b, i64 %a1, i64 %b1) {
|
|||
; KNL-LABEL: test14:
|
||||
; KNL: ## %bb.0:
|
||||
; KNL-NEXT: vpcmpgtq %zmm0, %zmm1, %k0
|
||||
; KNL-NEXT: kshiftlw $11, %k0, %k0
|
||||
; KNL-NEXT: kshiftrw $15, %k0, %k0
|
||||
; KNL-NEXT: kshiftrw $4, %k0, %k0
|
||||
; KNL-NEXT: kmovw %k0, %eax
|
||||
; KNL-NEXT: testb $1, %al
|
||||
; KNL-NEXT: cmoveq %rsi, %rdi
|
||||
|
@ -276,8 +269,7 @@ define i64 @test14(<8 x i64>%a, <8 x i64>%b, i64 %a1, i64 %b1) {
|
|||
; SKX-LABEL: test14:
|
||||
; SKX: ## %bb.0:
|
||||
; SKX-NEXT: vpcmpgtq %zmm0, %zmm1, %k0
|
||||
; SKX-NEXT: kshiftlb $3, %k0, %k0
|
||||
; SKX-NEXT: kshiftrb $7, %k0, %k0
|
||||
; SKX-NEXT: kshiftrb $4, %k0, %k0
|
||||
; SKX-NEXT: kmovd %k0, %eax
|
||||
; SKX-NEXT: testb $1, %al
|
||||
; SKX-NEXT: cmoveq %rsi, %rdi
|
||||
|
@ -976,8 +968,6 @@ define zeroext i8 @test_extractelement_v2i1(<2 x i64> %a, <2 x i64> %b) {
|
|||
; SKX-LABEL: test_extractelement_v2i1:
|
||||
; SKX: ## %bb.0:
|
||||
; SKX-NEXT: vpcmpnleuq %xmm1, %xmm0, %k0
|
||||
; SKX-NEXT: kshiftlw $15, %k0, %k0
|
||||
; SKX-NEXT: kshiftrw $15, %k0, %k0
|
||||
; SKX-NEXT: kmovd %k0, %eax
|
||||
; SKX-NEXT: andb $1, %al
|
||||
; SKX-NEXT: movb $4, %cl
|
||||
|
@ -1007,8 +997,6 @@ define zeroext i8 @extractelement_v2i1_alt(<2 x i64> %a, <2 x i64> %b) {
|
|||
; SKX-LABEL: extractelement_v2i1_alt:
|
||||
; SKX: ## %bb.0:
|
||||
; SKX-NEXT: vpcmpnleuq %xmm1, %xmm0, %k0
|
||||
; SKX-NEXT: kshiftlw $15, %k0, %k0
|
||||
; SKX-NEXT: kshiftrw $15, %k0, %k0
|
||||
; SKX-NEXT: kmovd %k0, %eax
|
||||
; SKX-NEXT: andb $1, %al
|
||||
; SKX-NEXT: movb $4, %cl
|
||||
|
@ -1036,8 +1024,7 @@ define zeroext i8 @test_extractelement_v4i1(<4 x i32> %a, <4 x i32> %b) {
|
|||
; SKX-LABEL: test_extractelement_v4i1:
|
||||
; SKX: ## %bb.0:
|
||||
; SKX-NEXT: vpcmpnleud %xmm1, %xmm0, %k0
|
||||
; SKX-NEXT: kshiftlw $12, %k0, %k0
|
||||
; SKX-NEXT: kshiftrw $15, %k0, %k0
|
||||
; SKX-NEXT: kshiftrw $3, %k0, %k0
|
||||
; SKX-NEXT: kmovd %k0, %eax
|
||||
; SKX-NEXT: andl $1, %eax
|
||||
; SKX-NEXT: retq
|
||||
|
@ -1062,8 +1049,7 @@ define zeroext i8 @test_extractelement_v32i1(<32 x i8> %a, <32 x i8> %b) {
|
|||
; SKX-LABEL: test_extractelement_v32i1:
|
||||
; SKX: ## %bb.0:
|
||||
; SKX-NEXT: vpcmpnleub %ymm1, %ymm0, %k0
|
||||
; SKX-NEXT: kshiftld $29, %k0, %k0
|
||||
; SKX-NEXT: kshiftrd $31, %k0, %k0
|
||||
; SKX-NEXT: kshiftrd $2, %k0, %k0
|
||||
; SKX-NEXT: kmovd %k0, %eax
|
||||
; SKX-NEXT: andl $1, %eax
|
||||
; SKX-NEXT: vzeroupper
|
||||
|
|
|
@ -3004,18 +3004,12 @@ declare <8 x i64> @llvm.x86.avx512.mask.pmulu.dq.512(<16 x i32>, <16 x i32>, <8
|
|||
define <4 x float> @test_mask_vextractf32x4(<4 x float> %b, <16 x float> %a, i8 %mask) {
|
||||
; CHECK-LABEL: test_mask_vextractf32x4:
|
||||
; CHECK: ## %bb.0:
|
||||
; CHECK-NEXT: vmovd %edi, %xmm2
|
||||
; CHECK-NEXT: kmovw %edi, %k0
|
||||
; CHECK-NEXT: kshiftlw $12, %k0, %k1
|
||||
; CHECK-NEXT: kshiftrw $15, %k1, %k1
|
||||
; CHECK-NEXT: kshiftlw $13, %k0, %k2
|
||||
; CHECK-NEXT: kshiftrw $15, %k2, %k2
|
||||
; CHECK-NEXT: kshiftlw $15, %k0, %k3
|
||||
; CHECK-NEXT: kshiftrw $15, %k3, %k3
|
||||
; CHECK-NEXT: kshiftlw $14, %k0, %k0
|
||||
; CHECK-NEXT: kshiftrw $15, %k0, %k0
|
||||
; CHECK-NEXT: kshiftrw $3, %k0, %k1
|
||||
; CHECK-NEXT: kshiftrw $2, %k0, %k2
|
||||
; CHECK-NEXT: kshiftrw $1, %k0, %k0
|
||||
; CHECK-NEXT: kmovw %k0, %eax
|
||||
; CHECK-NEXT: kmovw %k3, %ecx
|
||||
; CHECK-NEXT: vmovd %ecx, %xmm2
|
||||
; CHECK-NEXT: vpinsrb $4, %eax, %xmm2, %xmm2
|
||||
; CHECK-NEXT: kmovw %k2, %eax
|
||||
; CHECK-NEXT: vpinsrb $8, %eax, %xmm2, %xmm2
|
||||
|
@ -3035,18 +3029,12 @@ define <4 x i64> @test_mask_vextracti64x4(<4 x i64> %b, <8 x i64> %a, i8 %mask)
|
|||
; CHECK-LABEL: test_mask_vextracti64x4:
|
||||
; CHECK: ## %bb.0:
|
||||
; CHECK-NEXT: vextractf64x4 $1, %zmm1, %ymm1
|
||||
; CHECK-NEXT: vmovd %edi, %xmm2
|
||||
; CHECK-NEXT: kmovw %edi, %k0
|
||||
; CHECK-NEXT: kshiftlw $12, %k0, %k1
|
||||
; CHECK-NEXT: kshiftrw $15, %k1, %k1
|
||||
; CHECK-NEXT: kshiftlw $13, %k0, %k2
|
||||
; CHECK-NEXT: kshiftrw $15, %k2, %k2
|
||||
; CHECK-NEXT: kshiftlw $15, %k0, %k3
|
||||
; CHECK-NEXT: kshiftrw $15, %k3, %k3
|
||||
; CHECK-NEXT: kshiftlw $14, %k0, %k0
|
||||
; CHECK-NEXT: kshiftrw $15, %k0, %k0
|
||||
; CHECK-NEXT: kshiftrw $3, %k0, %k1
|
||||
; CHECK-NEXT: kshiftrw $2, %k0, %k2
|
||||
; CHECK-NEXT: kshiftrw $1, %k0, %k0
|
||||
; CHECK-NEXT: kmovw %k0, %eax
|
||||
; CHECK-NEXT: kmovw %k3, %ecx
|
||||
; CHECK-NEXT: vmovd %ecx, %xmm2
|
||||
; CHECK-NEXT: vpinsrb $4, %eax, %xmm2, %xmm2
|
||||
; CHECK-NEXT: kmovw %k2, %eax
|
||||
; CHECK-NEXT: vpinsrb $8, %eax, %xmm2, %xmm2
|
||||
|
@ -3065,18 +3053,12 @@ declare <4 x i64> @llvm.x86.avx512.mask.vextracti64x4.512(<8 x i64>, i32, <4 x i
|
|||
define <4 x i32> @test_maskz_vextracti32x4(<16 x i32> %a, i8 %mask) {
|
||||
; CHECK-LABEL: test_maskz_vextracti32x4:
|
||||
; CHECK: ## %bb.0:
|
||||
; CHECK-NEXT: vmovd %edi, %xmm1
|
||||
; CHECK-NEXT: kmovw %edi, %k0
|
||||
; CHECK-NEXT: kshiftlw $12, %k0, %k1
|
||||
; CHECK-NEXT: kshiftrw $15, %k1, %k1
|
||||
; CHECK-NEXT: kshiftlw $13, %k0, %k2
|
||||
; CHECK-NEXT: kshiftrw $15, %k2, %k2
|
||||
; CHECK-NEXT: kshiftlw $15, %k0, %k3
|
||||
; CHECK-NEXT: kshiftrw $15, %k3, %k3
|
||||
; CHECK-NEXT: kshiftlw $14, %k0, %k0
|
||||
; CHECK-NEXT: kshiftrw $15, %k0, %k0
|
||||
; CHECK-NEXT: kshiftrw $3, %k0, %k1
|
||||
; CHECK-NEXT: kshiftrw $2, %k0, %k2
|
||||
; CHECK-NEXT: kshiftrw $1, %k0, %k0
|
||||
; CHECK-NEXT: kmovw %k0, %eax
|
||||
; CHECK-NEXT: kmovw %k3, %ecx
|
||||
; CHECK-NEXT: vmovd %ecx, %xmm1
|
||||
; CHECK-NEXT: vpinsrb $4, %eax, %xmm1, %xmm1
|
||||
; CHECK-NEXT: kmovw %k2, %eax
|
||||
; CHECK-NEXT: vpinsrb $8, %eax, %xmm1, %xmm1
|
||||
|
|
|
@ -321,8 +321,7 @@ define i32 @zext_test1(<16 x i32> %a, <16 x i32> %b) {
|
|||
; KNL-LABEL: zext_test1:
|
||||
; KNL: ## %bb.0:
|
||||
; KNL-NEXT: vpcmpnleud %zmm1, %zmm0, %k0
|
||||
; KNL-NEXT: kshiftlw $10, %k0, %k0
|
||||
; KNL-NEXT: kshiftrw $15, %k0, %k0
|
||||
; KNL-NEXT: kshiftrw $5, %k0, %k0
|
||||
; KNL-NEXT: kmovw %k0, %eax
|
||||
; KNL-NEXT: andl $1, %eax
|
||||
; KNL-NEXT: vzeroupper
|
||||
|
@ -331,8 +330,7 @@ define i32 @zext_test1(<16 x i32> %a, <16 x i32> %b) {
|
|||
; SKX-LABEL: zext_test1:
|
||||
; SKX: ## %bb.0:
|
||||
; SKX-NEXT: vpcmpnleud %zmm1, %zmm0, %k0
|
||||
; SKX-NEXT: kshiftlw $10, %k0, %k0
|
||||
; SKX-NEXT: kshiftrw $15, %k0, %k0
|
||||
; SKX-NEXT: kshiftrw $5, %k0, %k0
|
||||
; SKX-NEXT: kmovd %k0, %eax
|
||||
; SKX-NEXT: andl $1, %eax
|
||||
; SKX-NEXT: vzeroupper
|
||||
|
@ -341,8 +339,7 @@ define i32 @zext_test1(<16 x i32> %a, <16 x i32> %b) {
|
|||
; AVX512BW-LABEL: zext_test1:
|
||||
; AVX512BW: ## %bb.0:
|
||||
; AVX512BW-NEXT: vpcmpnleud %zmm1, %zmm0, %k0
|
||||
; AVX512BW-NEXT: kshiftlw $10, %k0, %k0
|
||||
; AVX512BW-NEXT: kshiftrw $15, %k0, %k0
|
||||
; AVX512BW-NEXT: kshiftrw $5, %k0, %k0
|
||||
; AVX512BW-NEXT: kmovd %k0, %eax
|
||||
; AVX512BW-NEXT: andl $1, %eax
|
||||
; AVX512BW-NEXT: vzeroupper
|
||||
|
@ -351,8 +348,7 @@ define i32 @zext_test1(<16 x i32> %a, <16 x i32> %b) {
|
|||
; AVX512DQ-LABEL: zext_test1:
|
||||
; AVX512DQ: ## %bb.0:
|
||||
; AVX512DQ-NEXT: vpcmpnleud %zmm1, %zmm0, %k0
|
||||
; AVX512DQ-NEXT: kshiftlw $10, %k0, %k0
|
||||
; AVX512DQ-NEXT: kshiftrw $15, %k0, %k0
|
||||
; AVX512DQ-NEXT: kshiftrw $5, %k0, %k0
|
||||
; AVX512DQ-NEXT: kmovw %k0, %eax
|
||||
; AVX512DQ-NEXT: andl $1, %eax
|
||||
; AVX512DQ-NEXT: vzeroupper
|
||||
|
@ -367,8 +363,7 @@ define i16 @zext_test2(<16 x i32> %a, <16 x i32> %b) {
|
|||
; KNL-LABEL: zext_test2:
|
||||
; KNL: ## %bb.0:
|
||||
; KNL-NEXT: vpcmpnleud %zmm1, %zmm0, %k0
|
||||
; KNL-NEXT: kshiftlw $10, %k0, %k0
|
||||
; KNL-NEXT: kshiftrw $15, %k0, %k0
|
||||
; KNL-NEXT: kshiftrw $5, %k0, %k0
|
||||
; KNL-NEXT: kmovw %k0, %eax
|
||||
; KNL-NEXT: andl $1, %eax
|
||||
; KNL-NEXT: ## kill: def %ax killed %ax killed %eax
|
||||
|
@ -378,8 +373,7 @@ define i16 @zext_test2(<16 x i32> %a, <16 x i32> %b) {
|
|||
; SKX-LABEL: zext_test2:
|
||||
; SKX: ## %bb.0:
|
||||
; SKX-NEXT: vpcmpnleud %zmm1, %zmm0, %k0
|
||||
; SKX-NEXT: kshiftlw $10, %k0, %k0
|
||||
; SKX-NEXT: kshiftrw $15, %k0, %k0
|
||||
; SKX-NEXT: kshiftrw $5, %k0, %k0
|
||||
; SKX-NEXT: kmovd %k0, %eax
|
||||
; SKX-NEXT: andl $1, %eax
|
||||
; SKX-NEXT: ## kill: def %ax killed %ax killed %eax
|
||||
|
@ -389,8 +383,7 @@ define i16 @zext_test2(<16 x i32> %a, <16 x i32> %b) {
|
|||
; AVX512BW-LABEL: zext_test2:
|
||||
; AVX512BW: ## %bb.0:
|
||||
; AVX512BW-NEXT: vpcmpnleud %zmm1, %zmm0, %k0
|
||||
; AVX512BW-NEXT: kshiftlw $10, %k0, %k0
|
||||
; AVX512BW-NEXT: kshiftrw $15, %k0, %k0
|
||||
; AVX512BW-NEXT: kshiftrw $5, %k0, %k0
|
||||
; AVX512BW-NEXT: kmovd %k0, %eax
|
||||
; AVX512BW-NEXT: andl $1, %eax
|
||||
; AVX512BW-NEXT: ## kill: def %ax killed %ax killed %eax
|
||||
|
@ -400,8 +393,7 @@ define i16 @zext_test2(<16 x i32> %a, <16 x i32> %b) {
|
|||
; AVX512DQ-LABEL: zext_test2:
|
||||
; AVX512DQ: ## %bb.0:
|
||||
; AVX512DQ-NEXT: vpcmpnleud %zmm1, %zmm0, %k0
|
||||
; AVX512DQ-NEXT: kshiftlw $10, %k0, %k0
|
||||
; AVX512DQ-NEXT: kshiftrw $15, %k0, %k0
|
||||
; AVX512DQ-NEXT: kshiftrw $5, %k0, %k0
|
||||
; AVX512DQ-NEXT: kmovw %k0, %eax
|
||||
; AVX512DQ-NEXT: andl $1, %eax
|
||||
; AVX512DQ-NEXT: ## kill: def %ax killed %ax killed %eax
|
||||
|
@ -417,8 +409,7 @@ define i8 @zext_test3(<16 x i32> %a, <16 x i32> %b) {
|
|||
; KNL-LABEL: zext_test3:
|
||||
; KNL: ## %bb.0:
|
||||
; KNL-NEXT: vpcmpnleud %zmm1, %zmm0, %k0
|
||||
; KNL-NEXT: kshiftlw $10, %k0, %k0
|
||||
; KNL-NEXT: kshiftrw $15, %k0, %k0
|
||||
; KNL-NEXT: kshiftrw $5, %k0, %k0
|
||||
; KNL-NEXT: kmovw %k0, %eax
|
||||
; KNL-NEXT: andb $1, %al
|
||||
; KNL-NEXT: ## kill: def %al killed %al killed %eax
|
||||
|
@ -428,8 +419,7 @@ define i8 @zext_test3(<16 x i32> %a, <16 x i32> %b) {
|
|||
; SKX-LABEL: zext_test3:
|
||||
; SKX: ## %bb.0:
|
||||
; SKX-NEXT: vpcmpnleud %zmm1, %zmm0, %k0
|
||||
; SKX-NEXT: kshiftlw $10, %k0, %k0
|
||||
; SKX-NEXT: kshiftrw $15, %k0, %k0
|
||||
; SKX-NEXT: kshiftrw $5, %k0, %k0
|
||||
; SKX-NEXT: kmovd %k0, %eax
|
||||
; SKX-NEXT: andb $1, %al
|
||||
; SKX-NEXT: ## kill: def %al killed %al killed %eax
|
||||
|
@ -439,8 +429,7 @@ define i8 @zext_test3(<16 x i32> %a, <16 x i32> %b) {
|
|||
; AVX512BW-LABEL: zext_test3:
|
||||
; AVX512BW: ## %bb.0:
|
||||
; AVX512BW-NEXT: vpcmpnleud %zmm1, %zmm0, %k0
|
||||
; AVX512BW-NEXT: kshiftlw $10, %k0, %k0
|
||||
; AVX512BW-NEXT: kshiftrw $15, %k0, %k0
|
||||
; AVX512BW-NEXT: kshiftrw $5, %k0, %k0
|
||||
; AVX512BW-NEXT: kmovd %k0, %eax
|
||||
; AVX512BW-NEXT: andb $1, %al
|
||||
; AVX512BW-NEXT: ## kill: def %al killed %al killed %eax
|
||||
|
@ -450,8 +439,7 @@ define i8 @zext_test3(<16 x i32> %a, <16 x i32> %b) {
|
|||
; AVX512DQ-LABEL: zext_test3:
|
||||
; AVX512DQ: ## %bb.0:
|
||||
; AVX512DQ-NEXT: vpcmpnleud %zmm1, %zmm0, %k0
|
||||
; AVX512DQ-NEXT: kshiftlw $10, %k0, %k0
|
||||
; AVX512DQ-NEXT: kshiftrw $15, %k0, %k0
|
||||
; AVX512DQ-NEXT: kshiftrw $5, %k0, %k0
|
||||
; AVX512DQ-NEXT: kmovw %k0, %eax
|
||||
; AVX512DQ-NEXT: andb $1, %al
|
||||
; AVX512DQ-NEXT: ## kill: def %al killed %al killed %eax
|
||||
|
@ -1151,10 +1139,8 @@ define <8 x i1> @test18(i8 %a, i16 %y) {
|
|||
; KNL: ## %bb.0:
|
||||
; KNL-NEXT: kmovw %edi, %k0
|
||||
; KNL-NEXT: kmovw %esi, %k1
|
||||
; KNL-NEXT: kshiftlw $7, %k1, %k2
|
||||
; KNL-NEXT: kshiftrw $15, %k2, %k2
|
||||
; KNL-NEXT: kshiftlw $6, %k1, %k1
|
||||
; KNL-NEXT: kshiftrw $15, %k1, %k1
|
||||
; KNL-NEXT: kshiftrw $8, %k1, %k2
|
||||
; KNL-NEXT: kshiftrw $9, %k1, %k1
|
||||
; KNL-NEXT: kshiftrw $6, %k0, %k3
|
||||
; KNL-NEXT: kxorw %k1, %k3, %k1
|
||||
; KNL-NEXT: kshiftlw $15, %k1, %k1
|
||||
|
@ -1175,10 +1161,8 @@ define <8 x i1> @test18(i8 %a, i16 %y) {
|
|||
; SKX: ## %bb.0:
|
||||
; SKX-NEXT: kmovd %edi, %k0
|
||||
; SKX-NEXT: kmovd %esi, %k1
|
||||
; SKX-NEXT: kshiftlw $7, %k1, %k2
|
||||
; SKX-NEXT: kshiftrw $15, %k2, %k2
|
||||
; SKX-NEXT: kshiftlw $6, %k1, %k1
|
||||
; SKX-NEXT: kshiftrw $15, %k1, %k1
|
||||
; SKX-NEXT: kshiftrw $8, %k1, %k2
|
||||
; SKX-NEXT: kshiftrw $9, %k1, %k1
|
||||
; SKX-NEXT: kshiftrb $6, %k0, %k3
|
||||
; SKX-NEXT: kxorb %k1, %k3, %k1
|
||||
; SKX-NEXT: kshiftlb $7, %k1, %k1
|
||||
|
@ -1195,10 +1179,8 @@ define <8 x i1> @test18(i8 %a, i16 %y) {
|
|||
; AVX512BW: ## %bb.0:
|
||||
; AVX512BW-NEXT: kmovd %edi, %k0
|
||||
; AVX512BW-NEXT: kmovd %esi, %k1
|
||||
; AVX512BW-NEXT: kshiftlw $7, %k1, %k2
|
||||
; AVX512BW-NEXT: kshiftrw $15, %k2, %k2
|
||||
; AVX512BW-NEXT: kshiftlw $6, %k1, %k1
|
||||
; AVX512BW-NEXT: kshiftrw $15, %k1, %k1
|
||||
; AVX512BW-NEXT: kshiftrw $8, %k1, %k2
|
||||
; AVX512BW-NEXT: kshiftrw $9, %k1, %k1
|
||||
; AVX512BW-NEXT: kshiftrw $6, %k0, %k3
|
||||
; AVX512BW-NEXT: kxorw %k1, %k3, %k1
|
||||
; AVX512BW-NEXT: kshiftlw $15, %k1, %k1
|
||||
|
@ -1218,10 +1200,8 @@ define <8 x i1> @test18(i8 %a, i16 %y) {
|
|||
; AVX512DQ: ## %bb.0:
|
||||
; AVX512DQ-NEXT: kmovw %edi, %k0
|
||||
; AVX512DQ-NEXT: kmovw %esi, %k1
|
||||
; AVX512DQ-NEXT: kshiftlw $7, %k1, %k2
|
||||
; AVX512DQ-NEXT: kshiftrw $15, %k2, %k2
|
||||
; AVX512DQ-NEXT: kshiftlw $6, %k1, %k1
|
||||
; AVX512DQ-NEXT: kshiftrw $15, %k1, %k1
|
||||
; AVX512DQ-NEXT: kshiftrw $8, %k1, %k2
|
||||
; AVX512DQ-NEXT: kshiftrw $9, %k1, %k1
|
||||
; AVX512DQ-NEXT: kshiftrb $6, %k0, %k3
|
||||
; AVX512DQ-NEXT: kxorb %k1, %k3, %k1
|
||||
; AVX512DQ-NEXT: kshiftlb $7, %k1, %k1
|
||||
|
|
|
@ -6948,8 +6948,7 @@ define i32 @zext_test1(<16 x i32> %a, <16 x i32> %b) {
|
|||
; GENERIC-LABEL: zext_test1:
|
||||
; GENERIC: # %bb.0:
|
||||
; GENERIC-NEXT: vpcmpnleud %zmm1, %zmm0, %k0 # sched: [3:1.00]
|
||||
; GENERIC-NEXT: kshiftlw $10, %k0, %k0 # sched: [1:1.00]
|
||||
; GENERIC-NEXT: kshiftrw $15, %k0, %k0 # sched: [1:1.00]
|
||||
; GENERIC-NEXT: kshiftrw $5, %k0, %k0 # sched: [1:1.00]
|
||||
; GENERIC-NEXT: kmovd %k0, %eax # sched: [1:0.33]
|
||||
; GENERIC-NEXT: andl $1, %eax # sched: [1:0.33]
|
||||
; GENERIC-NEXT: vzeroupper # sched: [100:0.33]
|
||||
|
@ -6958,8 +6957,7 @@ define i32 @zext_test1(<16 x i32> %a, <16 x i32> %b) {
|
|||
; SKX-LABEL: zext_test1:
|
||||
; SKX: # %bb.0:
|
||||
; SKX-NEXT: vpcmpnleud %zmm1, %zmm0, %k0 # sched: [3:1.00]
|
||||
; SKX-NEXT: kshiftlw $10, %k0, %k0 # sched: [3:1.00]
|
||||
; SKX-NEXT: kshiftrw $15, %k0, %k0 # sched: [3:1.00]
|
||||
; SKX-NEXT: kshiftrw $5, %k0, %k0 # sched: [3:1.00]
|
||||
; SKX-NEXT: kmovd %k0, %eax # sched: [3:1.00]
|
||||
; SKX-NEXT: andl $1, %eax # sched: [1:0.25]
|
||||
; SKX-NEXT: vzeroupper # sched: [4:1.00]
|
||||
|
@ -6974,8 +6972,7 @@ define i16 @zext_test2(<16 x i32> %a, <16 x i32> %b) {
|
|||
; GENERIC-LABEL: zext_test2:
|
||||
; GENERIC: # %bb.0:
|
||||
; GENERIC-NEXT: vpcmpnleud %zmm1, %zmm0, %k0 # sched: [3:1.00]
|
||||
; GENERIC-NEXT: kshiftlw $10, %k0, %k0 # sched: [1:1.00]
|
||||
; GENERIC-NEXT: kshiftrw $15, %k0, %k0 # sched: [1:1.00]
|
||||
; GENERIC-NEXT: kshiftrw $5, %k0, %k0 # sched: [1:1.00]
|
||||
; GENERIC-NEXT: kmovd %k0, %eax # sched: [1:0.33]
|
||||
; GENERIC-NEXT: andl $1, %eax # sched: [1:0.33]
|
||||
; GENERIC-NEXT: # kill: def %ax killed %ax killed %eax
|
||||
|
@ -6985,8 +6982,7 @@ define i16 @zext_test2(<16 x i32> %a, <16 x i32> %b) {
|
|||
; SKX-LABEL: zext_test2:
|
||||
; SKX: # %bb.0:
|
||||
; SKX-NEXT: vpcmpnleud %zmm1, %zmm0, %k0 # sched: [3:1.00]
|
||||
; SKX-NEXT: kshiftlw $10, %k0, %k0 # sched: [3:1.00]
|
||||
; SKX-NEXT: kshiftrw $15, %k0, %k0 # sched: [3:1.00]
|
||||
; SKX-NEXT: kshiftrw $5, %k0, %k0 # sched: [3:1.00]
|
||||
; SKX-NEXT: kmovd %k0, %eax # sched: [3:1.00]
|
||||
; SKX-NEXT: andl $1, %eax # sched: [1:0.25]
|
||||
; SKX-NEXT: # kill: def %ax killed %ax killed %eax
|
||||
|
@ -7002,8 +6998,7 @@ define i8 @zext_test3(<16 x i32> %a, <16 x i32> %b) {
|
|||
; GENERIC-LABEL: zext_test3:
|
||||
; GENERIC: # %bb.0:
|
||||
; GENERIC-NEXT: vpcmpnleud %zmm1, %zmm0, %k0 # sched: [3:1.00]
|
||||
; GENERIC-NEXT: kshiftlw $10, %k0, %k0 # sched: [1:1.00]
|
||||
; GENERIC-NEXT: kshiftrw $15, %k0, %k0 # sched: [1:1.00]
|
||||
; GENERIC-NEXT: kshiftrw $5, %k0, %k0 # sched: [1:1.00]
|
||||
; GENERIC-NEXT: kmovd %k0, %eax # sched: [1:0.33]
|
||||
; GENERIC-NEXT: andb $1, %al # sched: [1:0.33]
|
||||
; GENERIC-NEXT: # kill: def %al killed %al killed %eax
|
||||
|
@ -7013,8 +7008,7 @@ define i8 @zext_test3(<16 x i32> %a, <16 x i32> %b) {
|
|||
; SKX-LABEL: zext_test3:
|
||||
; SKX: # %bb.0:
|
||||
; SKX-NEXT: vpcmpnleud %zmm1, %zmm0, %k0 # sched: [3:1.00]
|
||||
; SKX-NEXT: kshiftlw $10, %k0, %k0 # sched: [3:1.00]
|
||||
; SKX-NEXT: kshiftrw $15, %k0, %k0 # sched: [3:1.00]
|
||||
; SKX-NEXT: kshiftrw $5, %k0, %k0 # sched: [3:1.00]
|
||||
; SKX-NEXT: kmovd %k0, %eax # sched: [3:1.00]
|
||||
; SKX-NEXT: andb $1, %al # sched: [1:0.25]
|
||||
; SKX-NEXT: # kill: def %al killed %al killed %eax
|
||||
|
@ -7392,10 +7386,8 @@ define <8 x i1> @vmov_test18(i8 %a, i16 %y) {
|
|||
; GENERIC: # %bb.0:
|
||||
; GENERIC-NEXT: kmovd %edi, %k0 # sched: [1:0.33]
|
||||
; GENERIC-NEXT: kmovd %esi, %k1 # sched: [1:0.33]
|
||||
; GENERIC-NEXT: kshiftlw $7, %k1, %k2 # sched: [1:1.00]
|
||||
; GENERIC-NEXT: kshiftrw $15, %k2, %k2 # sched: [1:1.00]
|
||||
; GENERIC-NEXT: kshiftlw $6, %k1, %k1 # sched: [1:1.00]
|
||||
; GENERIC-NEXT: kshiftrw $15, %k1, %k1 # sched: [1:1.00]
|
||||
; GENERIC-NEXT: kshiftrw $8, %k1, %k2 # sched: [1:1.00]
|
||||
; GENERIC-NEXT: kshiftrw $9, %k1, %k1 # sched: [1:1.00]
|
||||
; GENERIC-NEXT: kshiftrb $6, %k0, %k3 # sched: [1:1.00]
|
||||
; GENERIC-NEXT: kxorb %k1, %k3, %k1 # sched: [1:1.00]
|
||||
; GENERIC-NEXT: kshiftlb $7, %k1, %k1 # sched: [1:1.00]
|
||||
|
@ -7412,10 +7404,8 @@ define <8 x i1> @vmov_test18(i8 %a, i16 %y) {
|
|||
; SKX: # %bb.0:
|
||||
; SKX-NEXT: kmovd %edi, %k0 # sched: [1:1.00]
|
||||
; SKX-NEXT: kmovd %esi, %k1 # sched: [1:1.00]
|
||||
; SKX-NEXT: kshiftlw $7, %k1, %k2 # sched: [3:1.00]
|
||||
; SKX-NEXT: kshiftrw $15, %k2, %k2 # sched: [3:1.00]
|
||||
; SKX-NEXT: kshiftlw $6, %k1, %k1 # sched: [3:1.00]
|
||||
; SKX-NEXT: kshiftrw $15, %k1, %k1 # sched: [3:1.00]
|
||||
; SKX-NEXT: kshiftrw $8, %k1, %k2 # sched: [3:1.00]
|
||||
; SKX-NEXT: kshiftrw $9, %k1, %k1 # sched: [3:1.00]
|
||||
; SKX-NEXT: kshiftrb $6, %k0, %k3 # sched: [3:1.00]
|
||||
; SKX-NEXT: kxorb %k1, %k3, %k1 # sched: [1:1.00]
|
||||
; SKX-NEXT: kshiftlb $7, %k1, %k1 # sched: [3:1.00]
|
||||
|
|
|
@ -7,14 +7,10 @@ define <2 x double>@test_int_x86_avx512_mask_vextractf64x2_512(<8 x double> %x0,
|
|||
; CHECK-LABEL: test_int_x86_avx512_mask_vextractf64x2_512:
|
||||
; CHECK: ## %bb.0:
|
||||
; CHECK-NEXT: vextractf128 $1, %ymm0, %xmm0
|
||||
; CHECK-NEXT: vmovd %edi, %xmm2
|
||||
; CHECK-NEXT: kmovw %edi, %k0
|
||||
; CHECK-NEXT: kshiftlb $7, %k0, %k1
|
||||
; CHECK-NEXT: kshiftrb $7, %k1, %k1
|
||||
; CHECK-NEXT: kshiftlb $6, %k0, %k0
|
||||
; CHECK-NEXT: kshiftrb $7, %k0, %k0
|
||||
; CHECK-NEXT: kshiftrb $1, %k0, %k0
|
||||
; CHECK-NEXT: kmovw %k0, %eax
|
||||
; CHECK-NEXT: kmovw %k1, %ecx
|
||||
; CHECK-NEXT: vmovd %ecx, %xmm2
|
||||
; CHECK-NEXT: vpinsrb $8, %eax, %xmm2, %xmm2
|
||||
; CHECK-NEXT: vpsllq $63, %xmm2, %xmm2
|
||||
; CHECK-NEXT: vpsraq $63, %zmm2, %zmm2
|
||||
|
|
File diff suppressed because it is too large
Load Diff
|
@ -1684,105 +1684,89 @@ declare <3 x i32> @llvm.masked.gather.v3i32.v3p0i32(<3 x i32*>, i32, <3 x i1>, <
|
|||
define <3 x i32> @test30(<3 x i32*> %base, <3 x i32> %ind, <3 x i1> %mask, <3 x i32> %src0) {
|
||||
; KNL_64-LABEL: test30:
|
||||
; KNL_64: # %bb.0:
|
||||
; KNL_64-NEXT: kmovw %edx, %k0
|
||||
; KNL_64-NEXT: kmovw %esi, %k2
|
||||
; KNL_64-NEXT: vpmovsxdq %xmm1, %ymm1
|
||||
; KNL_64-NEXT: vpsllq $2, %ymm1, %ymm1
|
||||
; KNL_64-NEXT: vpaddq %ymm1, %ymm0, %ymm1
|
||||
; KNL_64-NEXT: testb $1, %dil
|
||||
; KNL_64-NEXT: # implicit-def: %xmm0
|
||||
; KNL_64-NEXT: je .LBB31_2
|
||||
; KNL_64-NEXT: # %bb.1: # %cond.load
|
||||
; KNL_64-NEXT: vmovq %xmm1, %rax
|
||||
; KNL_64-NEXT: vmovd {{.*#+}} xmm0 = mem[0],zero,zero,zero
|
||||
; KNL_64-NEXT: .LBB31_2: # %else
|
||||
; KNL_64-NEXT: kmovw %edi, %k1
|
||||
; KNL_64-NEXT: kshiftlw $15, %k2, %k2
|
||||
; KNL_64-NEXT: kshiftrw $15, %k2, %k2
|
||||
; KNL_64-NEXT: kmovw %k2, %eax
|
||||
; KNL_64-NEXT: testb $1, %al
|
||||
; KNL_64-NEXT: je .LBB31_4
|
||||
; KNL_64-NEXT: # %bb.3: # %cond.load1
|
||||
; KNL_64-NEXT: vpextrq $1, %xmm1, %rax
|
||||
; KNL_64-NEXT: vpinsrd $1, (%rax), %xmm0, %xmm0
|
||||
; KNL_64-NEXT: jne .LBB31_1
|
||||
; KNL_64-NEXT: # %bb.2: # %else
|
||||
; KNL_64-NEXT: testb $1, %sil
|
||||
; KNL_64-NEXT: jne .LBB31_3
|
||||
; KNL_64-NEXT: .LBB31_4: # %else2
|
||||
; KNL_64-NEXT: kshiftlw $15, %k0, %k0
|
||||
; KNL_64-NEXT: kshiftrw $15, %k0, %k0
|
||||
; KNL_64-NEXT: kmovw %k0, %eax
|
||||
; KNL_64-NEXT: testb $1, %al
|
||||
; KNL_64-NEXT: je .LBB31_6
|
||||
; KNL_64-NEXT: # %bb.5: # %cond.load4
|
||||
; KNL_64-NEXT: vextracti128 $1, %ymm1, %xmm1
|
||||
; KNL_64-NEXT: vmovq %xmm1, %rax
|
||||
; KNL_64-NEXT: vpinsrd $2, (%rax), %xmm0, %xmm0
|
||||
; KNL_64-NEXT: testb $1, %dl
|
||||
; KNL_64-NEXT: jne .LBB31_5
|
||||
; KNL_64-NEXT: .LBB31_6: # %else5
|
||||
; KNL_64-NEXT: kmovw %k2, %eax
|
||||
; KNL_64-NEXT: kshiftlw $15, %k1, %k1
|
||||
; KNL_64-NEXT: kshiftrw $15, %k1, %k1
|
||||
; KNL_64-NEXT: kmovw %k1, %ecx
|
||||
; KNL_64-NEXT: vmovd %ecx, %xmm1
|
||||
; KNL_64-NEXT: vpinsrb $4, %eax, %xmm1, %xmm1
|
||||
; KNL_64-NEXT: kmovw %k0, %eax
|
||||
; KNL_64-NEXT: vpinsrb $8, %eax, %xmm1, %xmm1
|
||||
; KNL_64-NEXT: vmovd %edi, %xmm1
|
||||
; KNL_64-NEXT: vpinsrb $4, %esi, %xmm1, %xmm1
|
||||
; KNL_64-NEXT: vpinsrb $8, %edx, %xmm1, %xmm1
|
||||
; KNL_64-NEXT: vpslld $31, %xmm1, %xmm1
|
||||
; KNL_64-NEXT: vblendvps %xmm1, %xmm0, %xmm2, %xmm0
|
||||
; KNL_64-NEXT: vzeroupper
|
||||
; KNL_64-NEXT: retq
|
||||
; KNL_64-NEXT: .LBB31_1: # %cond.load
|
||||
; KNL_64-NEXT: vmovq %xmm1, %rax
|
||||
; KNL_64-NEXT: vmovd {{.*#+}} xmm0 = mem[0],zero,zero,zero
|
||||
; KNL_64-NEXT: testb $1, %sil
|
||||
; KNL_64-NEXT: je .LBB31_4
|
||||
; KNL_64-NEXT: .LBB31_3: # %cond.load1
|
||||
; KNL_64-NEXT: vpextrq $1, %xmm1, %rax
|
||||
; KNL_64-NEXT: vpinsrd $1, (%rax), %xmm0, %xmm0
|
||||
; KNL_64-NEXT: testb $1, %dl
|
||||
; KNL_64-NEXT: je .LBB31_6
|
||||
; KNL_64-NEXT: .LBB31_5: # %cond.load4
|
||||
; KNL_64-NEXT: vextracti128 $1, %ymm1, %xmm1
|
||||
; KNL_64-NEXT: vmovq %xmm1, %rax
|
||||
; KNL_64-NEXT: vpinsrd $2, (%rax), %xmm0, %xmm0
|
||||
; KNL_64-NEXT: jmp .LBB31_6
|
||||
;
|
||||
; KNL_32-LABEL: test30:
|
||||
; KNL_32: # %bb.0:
|
||||
; KNL_32-NEXT: pushl %esi
|
||||
; KNL_32-NEXT: .cfi_def_cfa_offset 8
|
||||
; KNL_32-NEXT: .cfi_offset %esi, -8
|
||||
; KNL_32-NEXT: movl {{[0-9]+}}(%esp), %eax
|
||||
; KNL_32-NEXT: kmovw %eax, %k0
|
||||
; KNL_32-NEXT: movl {{[0-9]+}}(%esp), %eax
|
||||
; KNL_32-NEXT: kmovw %eax, %k2
|
||||
; KNL_32-NEXT: movl {{[0-9]+}}(%esp), %eax
|
||||
; KNL_32-NEXT: movl {{[0-9]+}}(%esp), %ecx
|
||||
; KNL_32-NEXT: movl {{[0-9]+}}(%esp), %edx
|
||||
; KNL_32-NEXT: vpslld $2, %xmm1, %xmm1
|
||||
; KNL_32-NEXT: vpaddd %xmm1, %xmm0, %xmm1
|
||||
; KNL_32-NEXT: testb $1, %al
|
||||
; KNL_32-NEXT: testb $1, %dl
|
||||
; KNL_32-NEXT: # implicit-def: %xmm0
|
||||
; KNL_32-NEXT: je .LBB31_2
|
||||
; KNL_32-NEXT: # %bb.1: # %cond.load
|
||||
; KNL_32-NEXT: vmovd %xmm1, %ecx
|
||||
; KNL_32-NEXT: vmovd {{.*#+}} xmm0 = mem[0],zero,zero,zero
|
||||
; KNL_32-NEXT: .LBB31_2: # %else
|
||||
; KNL_32-NEXT: kmovw %eax, %k1
|
||||
; KNL_32-NEXT: kshiftlw $15, %k2, %k2
|
||||
; KNL_32-NEXT: kshiftrw $15, %k2, %k2
|
||||
; KNL_32-NEXT: kmovw %k2, %eax
|
||||
; KNL_32-NEXT: testb $1, %al
|
||||
; KNL_32-NEXT: je .LBB31_4
|
||||
; KNL_32-NEXT: # %bb.3: # %cond.load1
|
||||
; KNL_32-NEXT: vpextrd $1, %xmm1, %eax
|
||||
; KNL_32-NEXT: vpinsrd $1, (%eax), %xmm0, %xmm0
|
||||
; KNL_32-NEXT: jne .LBB31_1
|
||||
; KNL_32-NEXT: # %bb.2: # %else
|
||||
; KNL_32-NEXT: testb $1, %cl
|
||||
; KNL_32-NEXT: jne .LBB31_3
|
||||
; KNL_32-NEXT: .LBB31_4: # %else2
|
||||
; KNL_32-NEXT: kshiftlw $15, %k0, %k0
|
||||
; KNL_32-NEXT: kshiftrw $15, %k0, %k0
|
||||
; KNL_32-NEXT: kmovw %k0, %eax
|
||||
; KNL_32-NEXT: testb $1, %al
|
||||
; KNL_32-NEXT: je .LBB31_6
|
||||
; KNL_32-NEXT: # %bb.5: # %cond.load4
|
||||
; KNL_32-NEXT: vpextrd $2, %xmm1, %eax
|
||||
; KNL_32-NEXT: vpinsrd $2, (%eax), %xmm0, %xmm0
|
||||
; KNL_32-NEXT: jne .LBB31_5
|
||||
; KNL_32-NEXT: .LBB31_6: # %else5
|
||||
; KNL_32-NEXT: kmovw %k2, %eax
|
||||
; KNL_32-NEXT: kshiftlw $15, %k1, %k1
|
||||
; KNL_32-NEXT: kshiftrw $15, %k1, %k1
|
||||
; KNL_32-NEXT: kmovw %k1, %ecx
|
||||
; KNL_32-NEXT: vmovd %ecx, %xmm1
|
||||
; KNL_32-NEXT: vpinsrb $4, %eax, %xmm1, %xmm1
|
||||
; KNL_32-NEXT: kmovw %k0, %eax
|
||||
; KNL_32-NEXT: vmovd %edx, %xmm1
|
||||
; KNL_32-NEXT: vpinsrb $4, %ecx, %xmm1, %xmm1
|
||||
; KNL_32-NEXT: vpinsrb $8, %eax, %xmm1, %xmm1
|
||||
; KNL_32-NEXT: vpslld $31, %xmm1, %xmm1
|
||||
; KNL_32-NEXT: vblendvps %xmm1, %xmm0, %xmm2, %xmm0
|
||||
; KNL_32-NEXT: popl %esi
|
||||
; KNL_32-NEXT: retl
|
||||
; KNL_32-NEXT: .LBB31_1: # %cond.load
|
||||
; KNL_32-NEXT: vmovd %xmm1, %esi
|
||||
; KNL_32-NEXT: vmovd {{.*#+}} xmm0 = mem[0],zero,zero,zero
|
||||
; KNL_32-NEXT: testb $1, %cl
|
||||
; KNL_32-NEXT: je .LBB31_4
|
||||
; KNL_32-NEXT: .LBB31_3: # %cond.load1
|
||||
; KNL_32-NEXT: vpextrd $1, %xmm1, %esi
|
||||
; KNL_32-NEXT: vpinsrd $1, (%esi), %xmm0, %xmm0
|
||||
; KNL_32-NEXT: testb $1, %al
|
||||
; KNL_32-NEXT: je .LBB31_6
|
||||
; KNL_32-NEXT: .LBB31_5: # %cond.load4
|
||||
; KNL_32-NEXT: vpextrd $2, %xmm1, %esi
|
||||
; KNL_32-NEXT: vpinsrd $2, (%esi), %xmm0, %xmm0
|
||||
; KNL_32-NEXT: jmp .LBB31_6
|
||||
;
|
||||
; SKX-LABEL: test30:
|
||||
; SKX: # %bb.0:
|
||||
; SKX-NEXT: vpslld $31, %xmm2, %xmm2
|
||||
; SKX-NEXT: vptestmd %xmm2, %xmm2, %k1
|
||||
; SKX-NEXT: kshiftlw $15, %k1, %k0
|
||||
; SKX-NEXT: kshiftrw $15, %k0, %k0
|
||||
; SKX-NEXT: kmovw %k0, %eax
|
||||
; SKX-NEXT: kmovw %k1, %eax
|
||||
; SKX-NEXT: vpmovsxdq %xmm1, %ymm1
|
||||
; SKX-NEXT: vpsllq $2, %ymm1, %ymm1
|
||||
; SKX-NEXT: vpaddq %ymm1, %ymm0, %ymm1
|
||||
|
@ -1793,8 +1777,7 @@ define <3 x i32> @test30(<3 x i32*> %base, <3 x i32> %ind, <3 x i1> %mask, <3 x
|
|||
; SKX-NEXT: vmovq %xmm1, %rax
|
||||
; SKX-NEXT: vmovd {{.*#+}} xmm0 = mem[0],zero,zero,zero
|
||||
; SKX-NEXT: .LBB31_2: # %else
|
||||
; SKX-NEXT: kshiftlw $14, %k1, %k0
|
||||
; SKX-NEXT: kshiftrw $15, %k0, %k0
|
||||
; SKX-NEXT: kshiftrw $1, %k1, %k0
|
||||
; SKX-NEXT: kmovw %k0, %eax
|
||||
; SKX-NEXT: testb $1, %al
|
||||
; SKX-NEXT: je .LBB31_4
|
||||
|
@ -1802,8 +1785,7 @@ define <3 x i32> @test30(<3 x i32*> %base, <3 x i32> %ind, <3 x i1> %mask, <3 x
|
|||
; SKX-NEXT: vpextrq $1, %xmm1, %rax
|
||||
; SKX-NEXT: vpinsrd $1, (%rax), %xmm0, %xmm0
|
||||
; SKX-NEXT: .LBB31_4: # %else2
|
||||
; SKX-NEXT: kshiftlw $13, %k1, %k0
|
||||
; SKX-NEXT: kshiftrw $15, %k0, %k0
|
||||
; SKX-NEXT: kshiftrw $2, %k1, %k0
|
||||
; SKX-NEXT: kmovw %k0, %eax
|
||||
; SKX-NEXT: testb $1, %al
|
||||
; SKX-NEXT: je .LBB31_6
|
||||
|
@ -1823,9 +1805,7 @@ define <3 x i32> @test30(<3 x i32*> %base, <3 x i32> %ind, <3 x i1> %mask, <3 x
|
|||
; SKX_32-NEXT: .cfi_def_cfa_offset 16
|
||||
; SKX_32-NEXT: vpslld $31, %xmm2, %xmm2
|
||||
; SKX_32-NEXT: vptestmd %xmm2, %xmm2, %k1
|
||||
; SKX_32-NEXT: kshiftlw $15, %k1, %k0
|
||||
; SKX_32-NEXT: kshiftrw $15, %k0, %k0
|
||||
; SKX_32-NEXT: kmovw %k0, %eax
|
||||
; SKX_32-NEXT: kmovw %k1, %eax
|
||||
; SKX_32-NEXT: vpslld $2, %xmm1, %xmm1
|
||||
; SKX_32-NEXT: vpaddd %xmm1, %xmm0, %xmm2
|
||||
; SKX_32-NEXT: testb $1, %al
|
||||
|
@ -1835,8 +1815,7 @@ define <3 x i32> @test30(<3 x i32*> %base, <3 x i32> %ind, <3 x i1> %mask, <3 x
|
|||
; SKX_32-NEXT: vmovd %xmm2, %eax
|
||||
; SKX_32-NEXT: vmovd {{.*#+}} xmm1 = mem[0],zero,zero,zero
|
||||
; SKX_32-NEXT: .LBB31_2: # %else
|
||||
; SKX_32-NEXT: kshiftlw $14, %k1, %k0
|
||||
; SKX_32-NEXT: kshiftrw $15, %k0, %k0
|
||||
; SKX_32-NEXT: kshiftrw $1, %k1, %k0
|
||||
; SKX_32-NEXT: kmovw %k0, %eax
|
||||
; SKX_32-NEXT: testb $1, %al
|
||||
; SKX_32-NEXT: je .LBB31_4
|
||||
|
@ -1845,8 +1824,7 @@ define <3 x i32> @test30(<3 x i32*> %base, <3 x i32> %ind, <3 x i1> %mask, <3 x
|
|||
; SKX_32-NEXT: vpinsrd $1, (%eax), %xmm1, %xmm1
|
||||
; SKX_32-NEXT: .LBB31_4: # %else2
|
||||
; SKX_32-NEXT: vmovdqa {{[0-9]+}}(%esp), %xmm0
|
||||
; SKX_32-NEXT: kshiftlw $13, %k1, %k0
|
||||
; SKX_32-NEXT: kshiftrw $15, %k0, %k0
|
||||
; SKX_32-NEXT: kshiftrw $2, %k1, %k0
|
||||
; SKX_32-NEXT: kmovw %k0, %eax
|
||||
; SKX_32-NEXT: testb $1, %al
|
||||
; SKX_32-NEXT: je .LBB31_6
|
||||
|
|
|
@ -41,40 +41,34 @@ target triple = "x86_64-unknown-linux-gnu"
|
|||
; SKX-NEXT: vpslld $31, %xmm0, %xmm0
|
||||
; SKX-NEXT: vptestmd %xmm0, %xmm0, %k0
|
||||
; SKX-NEXT: kshiftrw $2, %k0, %k1
|
||||
; SKX-NEXT: kshiftlw $15, %k1, %k2
|
||||
; SKX-NEXT: kshiftrw $15, %k2, %k2
|
||||
; SKX-NEXT: kshiftrw $1, %k1, %k2
|
||||
; SKX-NEXT: kmovd %k2, %eax
|
||||
; SKX-NEXT: testb $1, %al
|
||||
; SKX-NEXT: fld1
|
||||
; SKX-NEXT: fldz
|
||||
; SKX-NEXT: fld %st(0)
|
||||
; SKX-NEXT: fcmovne %st(2), %st(0)
|
||||
; SKX-NEXT: kshiftlw $14, %k1, %k1
|
||||
; SKX-NEXT: kshiftrw $15, %k1, %k1
|
||||
; SKX-NEXT: kmovd %k1, %eax
|
||||
; SKX-NEXT: testb $1, %al
|
||||
; SKX-NEXT: fld %st(1)
|
||||
; SKX-NEXT: fcmovne %st(3), %st(0)
|
||||
; SKX-NEXT: kshiftlw $15, %k0, %k1
|
||||
; SKX-NEXT: kshiftrw $15, %k1, %k1
|
||||
; SKX-NEXT: kshiftrw $1, %k0, %k1
|
||||
; SKX-NEXT: kmovd %k1, %eax
|
||||
; SKX-NEXT: testb $1, %al
|
||||
; SKX-NEXT: fld %st(2)
|
||||
; SKX-NEXT: fcmovne %st(4), %st(0)
|
||||
; SKX-NEXT: kshiftlw $14, %k0, %k0
|
||||
; SKX-NEXT: kshiftrw $15, %k0, %k0
|
||||
; SKX-NEXT: kmovd %k0, %eax
|
||||
; SKX-NEXT: testb $1, %al
|
||||
; SKX-NEXT: fxch %st(3)
|
||||
; SKX-NEXT: fcmovne %st(4), %st(0)
|
||||
; SKX-NEXT: fstp %st(4)
|
||||
; SKX-NEXT: fxch %st(3)
|
||||
; SKX-NEXT: fstpt 10(%rdi)
|
||||
; SKX-NEXT: fxch %st(1)
|
||||
; SKX-NEXT: fstpt (%rdi)
|
||||
; SKX-NEXT: fxch %st(1)
|
||||
; SKX-NEXT: fstpt 30(%rdi)
|
||||
; SKX-NEXT: fstpt 10(%rdi)
|
||||
; SKX-NEXT: fxch %st(1)
|
||||
; SKX-NEXT: fstpt 20(%rdi)
|
||||
; SKX-NEXT: fstpt 30(%rdi)
|
||||
; SKX-NEXT: retq
|
||||
bb:
|
||||
%tmp = select <4 x i1> %m, <4 x x86_fp80> <x86_fp80 0xK3FFF8000000000000000, x86_fp80 0xK3FFF8000000000000000, x86_fp80 0xK3FFF8000000000000000, x86_fp80 0xK3FFF8000000000000000>, <4 x x86_fp80> zeroinitializer
|
||||
|
|
Loading…
Reference in New Issue