[x86] Add test cases to demonstrate some dumb mask->gpr->mask transition sequences.

llvm-svn: 324693
This commit is contained in:
Craig Topper 2018-02-09 01:14:17 +00:00
parent 892bb89bc9
commit 79c3255fe4
2 changed files with 112 additions and 0 deletions

View File

@ -4986,3 +4986,47 @@ define <8 x i64> @test_x86_avx512_maskz_psrlv_q_512(<8 x i64> %a0, <8 x i64> %a1
declare <8 x i64> @llvm.x86.avx512.psrlv.q.512(<8 x i64>, <8 x i64>) nounwind readnone
define <16 x float> @bad_mask_transition(<8 x double> %a, <8 x double> %b, <8 x double> %c, <8 x double> %d, <16 x float> %e, <16 x float> %f) {
; CHECK-LABEL: bad_mask_transition:
; CHECK: ## %bb.0: ## %entry
; CHECK-NEXT: vcmplt_oqpd %zmm1, %zmm0, %k0
; CHECK-NEXT: kmovw %k0, %eax
; CHECK-NEXT: vcmplt_oqpd %zmm3, %zmm2, %k0
; CHECK-NEXT: kmovw %k0, %ecx
; CHECK-NEXT: movzbl %al, %eax
; CHECK-NEXT: movzbl %cl, %ecx
; CHECK-NEXT: kmovw %eax, %k0
; CHECK-NEXT: kmovw %ecx, %k1
; CHECK-NEXT: kunpckbw %k0, %k1, %k1
; CHECK-NEXT: vblendmps %zmm5, %zmm4, %zmm0 {%k1}
; CHECK-NEXT: retq
entry:
%0 = tail call i8 @llvm.x86.avx512.mask.cmp.pd.512(<8 x double> %a, <8 x double> %b, i32 17, i8 -1, i32 4)
%1 = tail call i8 @llvm.x86.avx512.mask.cmp.pd.512(<8 x double> %c, <8 x double> %d, i32 17, i8 -1, i32 4)
%conv = zext i8 %0 to i16
%conv2 = zext i8 %1 to i16
%2 = bitcast i16 %conv to <16 x i1>
%3 = bitcast i16 %conv2 to <16 x i1>
%4 = shufflevector <16 x i1> %2, <16 x i1> undef, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
%5 = shufflevector <16 x i1> %3, <16 x i1> undef, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
%6 = shufflevector <8 x i1> %4, <8 x i1> %5, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
%7 = select <16 x i1> %6, <16 x float> %f, <16 x float> %e
ret <16 x float> %7
}
define <16 x float> @bad_mask_transition_2(<8 x double> %a, <8 x double> %b, <8 x double> %c, <8 x double> %d, <16 x float> %e, <16 x float> %f) {
; CHECK-LABEL: bad_mask_transition_2:
; CHECK: ## %bb.0: ## %entry
; CHECK-NEXT: vcmplt_oqpd %zmm1, %zmm0, %k0
; CHECK-NEXT: kmovw %k0, %eax
; CHECK-NEXT: movzbl %al, %eax
; CHECK-NEXT: kmovw %eax, %k1
; CHECK-NEXT: vblendmps %zmm5, %zmm4, %zmm0 {%k1}
; CHECK-NEXT: retq
entry:
%0 = tail call i8 @llvm.x86.avx512.mask.cmp.pd.512(<8 x double> %a, <8 x double> %b, i32 17, i8 -1, i32 4)
%conv = zext i8 %0 to i16
%1 = bitcast i16 %conv to <16 x i1>
%2 = select <16 x i1> %1, <16 x float> %f, <16 x float> %e
ret <16 x float> %2
}

View File

@ -2747,3 +2747,71 @@ bb.1:
bb.2:
ret void
}
; This is derived from an intrinsic test where v4i1 mask was created by _mm_cmp_epi32_mask, then it was passed to _mm512_mask_blend_epi32 which uses a v16i1 mask.
; The widening happens in the scalar domain between the intrinsics. The middle end optmized it to this.
define <8 x i64> @mask_widening(<2 x i64> %a, <2 x i64> %b, <2 x i64> %c, <2 x i64> %d, <8 x i64> %e, <8 x i64> %f) {
; KNL-LABEL: mask_widening:
; KNL: ## %bb.0: ## %entry
; KNL-NEXT: ## kill: def $xmm1 killed $xmm1 def $zmm1
; KNL-NEXT: ## kill: def $xmm0 killed $xmm0 def $zmm0
; KNL-NEXT: vpcmpeqd %zmm1, %zmm0, %k0
; KNL-NEXT: kshiftlw $12, %k0, %k0
; KNL-NEXT: kshiftrw $12, %k0, %k1
; KNL-NEXT: vpternlogd $255, %zmm0, %zmm0, %zmm0 {%k1} {z}
; KNL-NEXT: vpxor %xmm1, %xmm1, %xmm1
; KNL-NEXT: vshufi32x4 {{.*#+}} zmm0 = zmm0[0,1,2,3,4,5,6,7],zmm1[8,9,10,11,12,13,14,15]
; KNL-NEXT: vptestmd %zmm0, %zmm0, %k1
; KNL-NEXT: vpblendmd %zmm5, %zmm4, %zmm0 {%k1}
; KNL-NEXT: retq
;
; SKX-LABEL: mask_widening:
; SKX: ## %bb.0: ## %entry
; SKX-NEXT: vpcmpeqd %xmm1, %xmm0, %k0
; SKX-NEXT: vpmovm2d %k0, %zmm0
; SKX-NEXT: vpxor %xmm1, %xmm1, %xmm1
; SKX-NEXT: vshufi32x4 {{.*#+}} zmm0 = zmm0[0,1,2,3,4,5,6,7],zmm1[8,9,10,11,12,13,14,15]
; SKX-NEXT: vpmovd2m %zmm0, %k1
; SKX-NEXT: vpblendmd %zmm5, %zmm4, %zmm0 {%k1}
; SKX-NEXT: retq
;
; AVX512BW-LABEL: mask_widening:
; AVX512BW: ## %bb.0: ## %entry
; AVX512BW-NEXT: ## kill: def $xmm1 killed $xmm1 def $zmm1
; AVX512BW-NEXT: ## kill: def $xmm0 killed $xmm0 def $zmm0
; AVX512BW-NEXT: vpcmpeqd %zmm1, %zmm0, %k0
; AVX512BW-NEXT: kshiftlw $12, %k0, %k0
; AVX512BW-NEXT: kshiftrw $12, %k0, %k1
; AVX512BW-NEXT: vpternlogd $255, %zmm0, %zmm0, %zmm0 {%k1} {z}
; AVX512BW-NEXT: vpxor %xmm1, %xmm1, %xmm1
; AVX512BW-NEXT: vshufi32x4 {{.*#+}} zmm0 = zmm0[0,1,2,3,4,5,6,7],zmm1[8,9,10,11,12,13,14,15]
; AVX512BW-NEXT: vptestmd %zmm0, %zmm0, %k1
; AVX512BW-NEXT: vpblendmd %zmm5, %zmm4, %zmm0 {%k1}
; AVX512BW-NEXT: retq
;
; AVX512DQ-LABEL: mask_widening:
; AVX512DQ: ## %bb.0: ## %entry
; AVX512DQ-NEXT: ## kill: def $xmm1 killed $xmm1 def $zmm1
; AVX512DQ-NEXT: ## kill: def $xmm0 killed $xmm0 def $zmm0
; AVX512DQ-NEXT: vpcmpeqd %zmm1, %zmm0, %k0
; AVX512DQ-NEXT: kshiftlb $4, %k0, %k0
; AVX512DQ-NEXT: kshiftrb $4, %k0, %k0
; AVX512DQ-NEXT: vpmovm2d %k0, %zmm0
; AVX512DQ-NEXT: vpxor %xmm1, %xmm1, %xmm1
; AVX512DQ-NEXT: vshufi32x4 {{.*#+}} zmm0 = zmm0[0,1,2,3,4,5,6,7],zmm1[8,9,10,11,12,13,14,15]
; AVX512DQ-NEXT: vpmovd2m %zmm0, %k1
; AVX512DQ-NEXT: vpblendmd %zmm5, %zmm4, %zmm0 {%k1}
; AVX512DQ-NEXT: retq
entry:
%0 = bitcast <2 x i64> %a to <4 x i32>
%1 = bitcast <2 x i64> %b to <4 x i32>
%2 = icmp eq <4 x i32> %0, %1
%3 = shufflevector <4 x i1> %2, <4 x i1> zeroinitializer, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
%4 = bitcast <8 x i64> %f to <16 x i32>
%5 = bitcast <8 x i64> %e to <16 x i32>
%6 = shufflevector <8 x i1> %3, <8 x i1> <i1 false, i1 undef, i1 false, i1 false, i1 false, i1 false, i1 false, i1 false>, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 8, i32 8, i32 8, i32 8, i32 8, i32 8, i32 8>
%7 = select <16 x i1> %6, <16 x i32> %4, <16 x i32> %5
%8 = bitcast <16 x i32> %7 to <8 x i64>
ret <8 x i64> %8
}