[X86] Redefine avx512 packed fpclass intrinsics to return a vXi1 mask and implement the mask input argument using an 'and' IR instruction.

The frontend will surround the intrinsic with the appropriate marshalling to/from a scalar type to match the sigature of the builtin that software expects.

By exposing the vXi1 type directly in the llvm intrinsic we make it available to optimizers much earlier. This can enable the scalar marshalling code to be optimized away.

llvm-svn: 335563
This commit is contained in:
Craig Topper 2018-06-26 00:43:46 +00:00
parent 9b4322ce31
commit c2ee4a5035
6 changed files with 464 additions and 24 deletions

View File

@ -0,0 +1,161 @@
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
; RUN: llc < %s -mtriple=i686-unknown-unknown -mattr=+avx512dq | FileCheck %s --check-prefixes=CHECK,X86
; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx512dq | FileCheck %s --check-prefixes=CHECK,X64
; NOTE: This should use IR equivalent to what is generated by clang/test/CodeGen/avx512dq-builtins.c
define zeroext i8 @test_mm512_mask_fpclass_pd_mask(i8 zeroext %__U, <8 x double> %__A) {
; X86-LABEL: test_mm512_mask_fpclass_pd_mask:
; X86: # %bb.0: # %entry
; X86-NEXT: vfpclasspd $4, %zmm0, %k0
; X86-NEXT: kmovw %k0, %eax
; X86-NEXT: andb {{[0-9]+}}(%esp), %al
; X86-NEXT: # kill: def $al killed $al killed $eax
; X86-NEXT: vzeroupper
; X86-NEXT: retl
;
; X64-LABEL: test_mm512_mask_fpclass_pd_mask:
; X64: # %bb.0: # %entry
; X64-NEXT: vfpclasspd $4, %zmm0, %k0
; X64-NEXT: kmovw %k0, %eax
; X64-NEXT: andb %dil, %al
; X64-NEXT: # kill: def $al killed $al killed $eax
; X64-NEXT: vzeroupper
; X64-NEXT: retq
entry:
%0 = tail call <8 x i1> @llvm.x86.avx512.mask.fpclass.pd.512(<8 x double> %__A, i32 4)
%1 = bitcast i8 %__U to <8 x i1>
%2 = and <8 x i1> %0, %1
%3 = bitcast <8 x i1> %2 to i8
ret i8 %3
}
declare <8 x i1> @llvm.x86.avx512.mask.fpclass.pd.512(<8 x double>, i32)
define zeroext i8 @test_mm512_fpclass_pd_mask(<8 x double> %__A) {
; CHECK-LABEL: test_mm512_fpclass_pd_mask:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: vfpclasspd $4, %zmm0, %k0
; CHECK-NEXT: kmovw %k0, %eax
; CHECK-NEXT: # kill: def $al killed $al killed $eax
; CHECK-NEXT: vzeroupper
; CHECK-NEXT: ret{{[l|q]}}
entry:
%0 = tail call <8 x i1> @llvm.x86.avx512.mask.fpclass.pd.512(<8 x double> %__A, i32 4)
%1 = bitcast <8 x i1> %0 to i8
ret i8 %1
}
define zeroext i16 @test_mm512_mask_fpclass_ps_mask(i16 zeroext %__U, <16 x float> %__A) {
; X86-LABEL: test_mm512_mask_fpclass_ps_mask:
; X86: # %bb.0: # %entry
; X86-NEXT: vfpclassps $4, %zmm0, %k0
; X86-NEXT: kmovw %k0, %eax
; X86-NEXT: andw {{[0-9]+}}(%esp), %ax
; X86-NEXT: # kill: def $ax killed $ax killed $eax
; X86-NEXT: vzeroupper
; X86-NEXT: retl
;
; X64-LABEL: test_mm512_mask_fpclass_ps_mask:
; X64: # %bb.0: # %entry
; X64-NEXT: vfpclassps $4, %zmm0, %k0
; X64-NEXT: kmovw %k0, %eax
; X64-NEXT: andl %edi, %eax
; X64-NEXT: # kill: def $ax killed $ax killed $eax
; X64-NEXT: vzeroupper
; X64-NEXT: retq
entry:
%0 = tail call <16 x i1> @llvm.x86.avx512.mask.fpclass.ps.512(<16 x float> %__A, i32 4)
%1 = bitcast i16 %__U to <16 x i1>
%2 = and <16 x i1> %0, %1
%3 = bitcast <16 x i1> %2 to i16
ret i16 %3
}
declare <16 x i1> @llvm.x86.avx512.mask.fpclass.ps.512(<16 x float>, i32)
define zeroext i16 @test_mm512_fpclass_ps_mask(<16 x float> %__A) {
; CHECK-LABEL: test_mm512_fpclass_ps_mask:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: vfpclassps $4, %zmm0, %k0
; CHECK-NEXT: kmovw %k0, %eax
; CHECK-NEXT: # kill: def $ax killed $ax killed $eax
; CHECK-NEXT: vzeroupper
; CHECK-NEXT: ret{{[l|q]}}
entry:
%0 = tail call <16 x i1> @llvm.x86.avx512.mask.fpclass.ps.512(<16 x float> %__A, i32 4)
%1 = bitcast <16 x i1> %0 to i16
ret i16 %1
}
define zeroext i8 @test_mm_fpclass_sd_mask(<4 x float> %__A) {
; CHECK-LABEL: test_mm_fpclass_sd_mask:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: vfpclasssd $2, %xmm0, %k0
; CHECK-NEXT: kmovw %k0, %eax
; CHECK-NEXT: # kill: def $al killed $al killed $eax
; CHECK-NEXT: ret{{[l|q]}}
entry:
%0 = bitcast <4 x float> %__A to <2 x double>
%1 = tail call i8 @llvm.x86.avx512.mask.fpclass.sd(<2 x double> %0, i32 2, i8 -1)
ret i8 %1
}
declare i8 @llvm.x86.avx512.mask.fpclass.sd(<2 x double>, i32, i8)
define zeroext i8 @test_mm_mask_fpclass_sd_mask(i8 zeroext %__U, <4 x float> %__A) {
; X86-LABEL: test_mm_mask_fpclass_sd_mask:
; X86: # %bb.0: # %entry
; X86-NEXT: kmovb {{[0-9]+}}(%esp), %k1
; X86-NEXT: vfpclasssd $2, %xmm0, %k0 {%k1}
; X86-NEXT: kmovw %k0, %eax
; X86-NEXT: # kill: def $al killed $al killed $eax
; X86-NEXT: retl
;
; X64-LABEL: test_mm_mask_fpclass_sd_mask:
; X64: # %bb.0: # %entry
; X64-NEXT: kmovw %edi, %k1
; X64-NEXT: vfpclasssd $2, %xmm0, %k0 {%k1}
; X64-NEXT: kmovw %k0, %eax
; X64-NEXT: # kill: def $al killed $al killed $eax
; X64-NEXT: retq
entry:
%0 = bitcast <4 x float> %__A to <2 x double>
%1 = tail call i8 @llvm.x86.avx512.mask.fpclass.sd(<2 x double> %0, i32 2, i8 %__U)
ret i8 %1
}
define zeroext i8 @test_mm_fpclass_ss_mask(<4 x float> %__A) {
; CHECK-LABEL: test_mm_fpclass_ss_mask:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: vfpclassss $2, %xmm0, %k0
; CHECK-NEXT: kmovw %k0, %eax
; CHECK-NEXT: # kill: def $al killed $al killed $eax
; CHECK-NEXT: ret{{[l|q]}}
entry:
%0 = tail call i8 @llvm.x86.avx512.mask.fpclass.ss(<4 x float> %__A, i32 2, i8 -1)
ret i8 %0
}
declare i8 @llvm.x86.avx512.mask.fpclass.ss(<4 x float>, i32, i8)
define zeroext i8 @test_mm_mask_fpclass_ss_mask(i8 zeroext %__U, <4 x float> %__A) {
; X86-LABEL: test_mm_mask_fpclass_ss_mask:
; X86: # %bb.0: # %entry
; X86-NEXT: kmovb {{[0-9]+}}(%esp), %k1
; X86-NEXT: vfpclassss $2, %xmm0, %k0 {%k1}
; X86-NEXT: kmovw %k0, %eax
; X86-NEXT: # kill: def $al killed $al killed $eax
; X86-NEXT: retl
;
; X64-LABEL: test_mm_mask_fpclass_ss_mask:
; X64: # %bb.0: # %entry
; X64-NEXT: kmovw %edi, %k1
; X64-NEXT: vfpclassss $2, %xmm0, %k0 {%k1}
; X64-NEXT: kmovw %k0, %eax
; X64-NEXT: # kill: def $al killed $al killed $eax
; X64-NEXT: retq
entry:
%0 = tail call i8 @llvm.x86.avx512.mask.fpclass.ss(<4 x float> %__A, i32 2, i8 %__U)
ret i8 %0
}

View File

@ -536,3 +536,34 @@ define i8@test_int_x86_avx512_cvtq2mask_512(<8 x i64> %x0) {
%res = call i8 @llvm.x86.avx512.cvtq2mask.512(<8 x i64> %x0)
ret i8 %res
}
declare i8 @llvm.x86.avx512.mask.fpclass.pd.512(<8 x double>, i32, i8)
define i8 @test_int_x86_avx512_mask_fpclass_pd_512(<8 x double> %x0) {
; CHECK-LABEL: test_int_x86_avx512_mask_fpclass_pd_512:
; CHECK: # %bb.0:
; CHECK-NEXT: vfpclasspd $2, %zmm0, %k1 # encoding: [0x62,0xf3,0xfd,0x48,0x66,0xc8,0x02]
; CHECK-NEXT: vfpclasspd $4, %zmm0, %k0 {%k1} # encoding: [0x62,0xf3,0xfd,0x49,0x66,0xc0,0x04]
; CHECK-NEXT: kmovw %k0, %eax # encoding: [0xc5,0xf8,0x93,0xc0]
; CHECK-NEXT: # kill: def $al killed $al killed $eax
; CHECK-NEXT: vzeroupper # encoding: [0xc5,0xf8,0x77]
; CHECK-NEXT: ret{{[l|q]}} # encoding: [0xc3]
%res = call i8 @llvm.x86.avx512.mask.fpclass.pd.512(<8 x double> %x0, i32 4, i8 -1)
%res1 = call i8 @llvm.x86.avx512.mask.fpclass.pd.512(<8 x double> %x0, i32 2, i8 %res)
ret i8 %res1
}
declare i16 @llvm.x86.avx512.mask.fpclass.ps.512(<16 x float>, i32, i16)
define i16@test_int_x86_avx512_mask_fpclass_ps_512(<16 x float> %x0) {
; CHECK-LABEL: test_int_x86_avx512_mask_fpclass_ps_512:
; CHECK: # %bb.0:
; CHECK-NEXT: vfpclassps $2, %zmm0, %k1 # encoding: [0x62,0xf3,0x7d,0x48,0x66,0xc8,0x02]
; CHECK-NEXT: vfpclassps $4, %zmm0, %k0 {%k1} # encoding: [0x62,0xf3,0x7d,0x49,0x66,0xc0,0x04]
; CHECK-NEXT: kmovw %k0, %eax # encoding: [0xc5,0xf8,0x93,0xc0]
; CHECK-NEXT: # kill: def $ax killed $ax killed $eax
; CHECK-NEXT: vzeroupper # encoding: [0xc5,0xf8,0x77]
; CHECK-NEXT: ret{{[l|q]}} # encoding: [0xc3]
%res = call i16 @llvm.x86.avx512.mask.fpclass.ps.512(<16 x float> %x0, i32 4, i16 -1)
%res1 = call i16 @llvm.x86.avx512.mask.fpclass.ps.512(<16 x float> %x0, i32 2, i16 %res)
ret i16 %res1
}

View File

@ -600,7 +600,7 @@ define <2 x double>@test_int_x86_avx512_mask_range_sd(<2 x double> %x0, <2 x dou
ret <2 x double> %res4
}
declare i8 @llvm.x86.avx512.mask.fpclass.pd.512(<8 x double>, i32, i8)
declare <8 x i1> @llvm.x86.avx512.mask.fpclass.pd.512(<8 x double>, i32)
define i8 @test_int_x86_avx512_mask_fpclass_pd_512(<8 x double> %x0) {
; CHECK-LABEL: test_int_x86_avx512_mask_fpclass_pd_512:
@ -611,11 +611,13 @@ define i8 @test_int_x86_avx512_mask_fpclass_pd_512(<8 x double> %x0) {
; CHECK-NEXT: # kill: def $al killed $al killed $eax
; CHECK-NEXT: vzeroupper # encoding: [0xc5,0xf8,0x77]
; CHECK-NEXT: ret{{[l|q]}} # encoding: [0xc3]
%res = call i8 @llvm.x86.avx512.mask.fpclass.pd.512(<8 x double> %x0, i32 4, i8 -1)
%res1 = call i8 @llvm.x86.avx512.mask.fpclass.pd.512(<8 x double> %x0, i32 2, i8 %res)
ret i8 %res1
%res = call <8 x i1> @llvm.x86.avx512.mask.fpclass.pd.512(<8 x double> %x0, i32 4)
%res1 = call <8 x i1> @llvm.x86.avx512.mask.fpclass.pd.512(<8 x double> %x0, i32 2)
%1 = and <8 x i1> %res1, %res
%2 = bitcast <8 x i1> %1 to i8
ret i8 %2
}
declare i16 @llvm.x86.avx512.mask.fpclass.ps.512(<16 x float>, i32, i16)
declare <16 x i1> @llvm.x86.avx512.mask.fpclass.ps.512(<16 x float>, i32)
define i16@test_int_x86_avx512_mask_fpclass_ps_512(<16 x float> %x0) {
; CHECK-LABEL: test_int_x86_avx512_mask_fpclass_ps_512:
@ -626,9 +628,11 @@ define i16@test_int_x86_avx512_mask_fpclass_ps_512(<16 x float> %x0) {
; CHECK-NEXT: # kill: def $ax killed $ax killed $eax
; CHECK-NEXT: vzeroupper # encoding: [0xc5,0xf8,0x77]
; CHECK-NEXT: ret{{[l|q]}} # encoding: [0xc3]
%res = call i16 @llvm.x86.avx512.mask.fpclass.ps.512(<16 x float> %x0, i32 4, i16 -1)
%res1 = call i16 @llvm.x86.avx512.mask.fpclass.ps.512(<16 x float> %x0, i32 2, i16 %res)
ret i16 %res1
%res = call <16 x i1> @llvm.x86.avx512.mask.fpclass.ps.512(<16 x float> %x0, i32 4)
%res1 = call <16 x i1> @llvm.x86.avx512.mask.fpclass.ps.512(<16 x float> %x0, i32 2)
%1 = and <16 x i1> %res1, %res
%2 = bitcast <16 x i1> %1 to i16
ret i16 %2
}
declare i8 @llvm.x86.avx512.mask.fpclass.sd(<2 x double>, i32, i8)

View File

@ -203,3 +203,174 @@ entry:
%1 = select <4 x i1> %extract.i, <4 x double> %conv.i.i, <4 x double> zeroinitializer
ret <4 x double> %1
}
define zeroext i8 @test_mm_mask_fpclass_pd_mask(i8 zeroext %__U, <2 x double> %__A) {
; X86-LABEL: test_mm_mask_fpclass_pd_mask:
; X86: # %bb.0: # %entry
; X86-NEXT: kmovb {{[0-9]+}}(%esp), %k1
; X86-NEXT: vfpclasspd $2, %xmm0, %k0 {%k1}
; X86-NEXT: kmovw %k0, %eax
; X86-NEXT: # kill: def $al killed $al killed $eax
; X86-NEXT: retl
;
; X64-LABEL: test_mm_mask_fpclass_pd_mask:
; X64: # %bb.0: # %entry
; X64-NEXT: kmovw %edi, %k1
; X64-NEXT: vfpclasspd $2, %xmm0, %k0 {%k1}
; X64-NEXT: kmovw %k0, %eax
; X64-NEXT: # kill: def $al killed $al killed $eax
; X64-NEXT: retq
entry:
%0 = tail call <2 x i1> @llvm.x86.avx512.mask.fpclass.pd.128(<2 x double> %__A, i32 2)
%1 = bitcast i8 %__U to <8 x i1>
%extract = shufflevector <8 x i1> %1, <8 x i1> undef, <2 x i32> <i32 0, i32 1>
%2 = and <2 x i1> %0, %extract
%3 = shufflevector <2 x i1> %2, <2 x i1> zeroinitializer, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3>
%4 = bitcast <8 x i1> %3 to i8
ret i8 %4
}
declare <2 x i1> @llvm.x86.avx512.mask.fpclass.pd.128(<2 x double>, i32)
define zeroext i8 @test_mm_fpclass_pd_mask(<2 x double> %__A) {
; CHECK-LABEL: test_mm_fpclass_pd_mask:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: vfpclasspd $2, %xmm0, %k0
; CHECK-NEXT: kmovw %k0, %eax
; CHECK-NEXT: # kill: def $al killed $al killed $eax
; CHECK-NEXT: ret{{[l|q]}}
entry:
%0 = tail call <2 x i1> @llvm.x86.avx512.mask.fpclass.pd.128(<2 x double> %__A, i32 2)
%1 = shufflevector <2 x i1> %0, <2 x i1> zeroinitializer, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3>
%2 = bitcast <8 x i1> %1 to i8
ret i8 %2
}
define zeroext i8 @test_mm256_mask_fpclass_pd_mask(i8 zeroext %__U, <4 x double> %__A) {
; X86-LABEL: test_mm256_mask_fpclass_pd_mask:
; X86: # %bb.0: # %entry
; X86-NEXT: kmovb {{[0-9]+}}(%esp), %k1
; X86-NEXT: vfpclasspd $2, %ymm0, %k0 {%k1}
; X86-NEXT: kmovw %k0, %eax
; X86-NEXT: # kill: def $al killed $al killed $eax
; X86-NEXT: vzeroupper
; X86-NEXT: retl
;
; X64-LABEL: test_mm256_mask_fpclass_pd_mask:
; X64: # %bb.0: # %entry
; X64-NEXT: kmovw %edi, %k1
; X64-NEXT: vfpclasspd $2, %ymm0, %k0 {%k1}
; X64-NEXT: kmovw %k0, %eax
; X64-NEXT: # kill: def $al killed $al killed $eax
; X64-NEXT: vzeroupper
; X64-NEXT: retq
entry:
%0 = tail call <4 x i1> @llvm.x86.avx512.mask.fpclass.pd.256(<4 x double> %__A, i32 2)
%1 = bitcast i8 %__U to <8 x i1>
%extract = shufflevector <8 x i1> %1, <8 x i1> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
%2 = and <4 x i1> %0, %extract
%3 = shufflevector <4 x i1> %2, <4 x i1> zeroinitializer, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
%4 = bitcast <8 x i1> %3 to i8
ret i8 %4
}
declare <4 x i1> @llvm.x86.avx512.mask.fpclass.pd.256(<4 x double>, i32)
define zeroext i8 @test_mm256_fpclass_pd_mask(<4 x double> %__A) {
; CHECK-LABEL: test_mm256_fpclass_pd_mask:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: vfpclasspd $2, %ymm0, %k0
; CHECK-NEXT: kmovw %k0, %eax
; CHECK-NEXT: # kill: def $al killed $al killed $eax
; CHECK-NEXT: vzeroupper
; CHECK-NEXT: ret{{[l|q]}}
entry:
%0 = tail call <4 x i1> @llvm.x86.avx512.mask.fpclass.pd.256(<4 x double> %__A, i32 2)
%1 = shufflevector <4 x i1> %0, <4 x i1> zeroinitializer, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
%2 = bitcast <8 x i1> %1 to i8
ret i8 %2
}
define zeroext i8 @test_mm_mask_fpclass_ps_mask(i8 zeroext %__U, <4 x float> %__A) {
; X86-LABEL: test_mm_mask_fpclass_ps_mask:
; X86: # %bb.0: # %entry
; X86-NEXT: kmovb {{[0-9]+}}(%esp), %k1
; X86-NEXT: vfpclassps $2, %xmm0, %k0 {%k1}
; X86-NEXT: kmovw %k0, %eax
; X86-NEXT: # kill: def $al killed $al killed $eax
; X86-NEXT: retl
;
; X64-LABEL: test_mm_mask_fpclass_ps_mask:
; X64: # %bb.0: # %entry
; X64-NEXT: kmovw %edi, %k1
; X64-NEXT: vfpclassps $2, %xmm0, %k0 {%k1}
; X64-NEXT: kmovw %k0, %eax
; X64-NEXT: # kill: def $al killed $al killed $eax
; X64-NEXT: retq
entry:
%0 = tail call <4 x i1> @llvm.x86.avx512.mask.fpclass.ps.128(<4 x float> %__A, i32 2)
%1 = bitcast i8 %__U to <8 x i1>
%extract = shufflevector <8 x i1> %1, <8 x i1> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
%2 = and <4 x i1> %0, %extract
%3 = shufflevector <4 x i1> %2, <4 x i1> zeroinitializer, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
%4 = bitcast <8 x i1> %3 to i8
ret i8 %4
}
declare <4 x i1> @llvm.x86.avx512.mask.fpclass.ps.128(<4 x float>, i32)
define zeroext i8 @test_mm_fpclass_ps_mask(<4 x float> %__A) {
; CHECK-LABEL: test_mm_fpclass_ps_mask:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: vfpclassps $2, %xmm0, %k0
; CHECK-NEXT: kmovw %k0, %eax
; CHECK-NEXT: # kill: def $al killed $al killed $eax
; CHECK-NEXT: ret{{[l|q]}}
entry:
%0 = tail call <4 x i1> @llvm.x86.avx512.mask.fpclass.ps.128(<4 x float> %__A, i32 2)
%1 = shufflevector <4 x i1> %0, <4 x i1> zeroinitializer, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
%2 = bitcast <8 x i1> %1 to i8
ret i8 %2
}
define zeroext i8 @test_mm256_mask_fpclass_ps_mask(i8 zeroext %__U, <8 x float> %__A) {
; X86-LABEL: test_mm256_mask_fpclass_ps_mask:
; X86: # %bb.0: # %entry
; X86-NEXT: vfpclassps $2, %ymm0, %k0
; X86-NEXT: kmovw %k0, %eax
; X86-NEXT: andb {{[0-9]+}}(%esp), %al
; X86-NEXT: # kill: def $al killed $al killed $eax
; X86-NEXT: vzeroupper
; X86-NEXT: retl
;
; X64-LABEL: test_mm256_mask_fpclass_ps_mask:
; X64: # %bb.0: # %entry
; X64-NEXT: vfpclassps $2, %ymm0, %k0
; X64-NEXT: kmovw %k0, %eax
; X64-NEXT: andb %dil, %al
; X64-NEXT: # kill: def $al killed $al killed $eax
; X64-NEXT: vzeroupper
; X64-NEXT: retq
entry:
%0 = tail call <8 x i1> @llvm.x86.avx512.mask.fpclass.ps.256(<8 x float> %__A, i32 2)
%1 = bitcast i8 %__U to <8 x i1>
%2 = and <8 x i1> %0, %1
%3 = bitcast <8 x i1> %2 to i8
ret i8 %3
}
declare <8 x i1> @llvm.x86.avx512.mask.fpclass.ps.256(<8 x float>, i32)
define zeroext i8 @test_mm256_fpclass_ps_mask(<8 x float> %__A) {
; CHECK-LABEL: test_mm256_fpclass_ps_mask:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: vfpclassps $2, %ymm0, %k0
; CHECK-NEXT: kmovw %k0, %eax
; CHECK-NEXT: # kill: def $al killed $al killed $eax
; CHECK-NEXT: vzeroupper
; CHECK-NEXT: ret{{[l|q]}}
entry:
%0 = tail call <8 x i1> @llvm.x86.avx512.mask.fpclass.ps.256(<8 x float> %__A, i32 2)
%1 = bitcast <8 x i1> %0 to i8
ret i8 %1
}

View File

@ -2928,3 +2928,65 @@ define <4 x double>@test_int_x86_avx512_mask_cvt_uqq2pd_256(<4 x i64> %x0, <4 x
%res2 = fadd <4 x double> %res, %res1
ret <4 x double> %res2
}
declare i8 @llvm.x86.avx512.mask.fpclass.ps.128(<4 x float>, i32, i8)
define i8 @test_int_x86_avx512_mask_fpclass_ps_128(<4 x float> %x0) {
; CHECK-LABEL: test_int_x86_avx512_mask_fpclass_ps_128:
; CHECK: # %bb.0:
; CHECK-NEXT: vfpclassps $4, %xmm0, %k1 # encoding: [0x62,0xf3,0x7d,0x08,0x66,0xc8,0x04]
; CHECK-NEXT: vfpclassps $2, %xmm0, %k0 {%k1} # encoding: [0x62,0xf3,0x7d,0x09,0x66,0xc0,0x02]
; CHECK-NEXT: kmovw %k0, %eax # encoding: [0xc5,0xf8,0x93,0xc0]
; CHECK-NEXT: # kill: def $al killed $al killed $eax
; CHECK-NEXT: ret{{[l|q]}} # encoding: [0xc3]
%res = call i8 @llvm.x86.avx512.mask.fpclass.ps.128(<4 x float> %x0, i32 2, i8 -1)
%res1 = call i8 @llvm.x86.avx512.mask.fpclass.ps.128(<4 x float> %x0, i32 4, i8 %res)
ret i8 %res1
}
declare i8 @llvm.x86.avx512.mask.fpclass.ps.256(<8 x float>, i32, i8)
define i8 @test_int_x86_avx512_mask_fpclass_ps_256(<8 x float> %x0) {
; CHECK-LABEL: test_int_x86_avx512_mask_fpclass_ps_256:
; CHECK: # %bb.0:
; CHECK-NEXT: vfpclassps $4, %ymm0, %k1 # encoding: [0x62,0xf3,0x7d,0x28,0x66,0xc8,0x04]
; CHECK-NEXT: vfpclassps $2, %ymm0, %k0 {%k1} # encoding: [0x62,0xf3,0x7d,0x29,0x66,0xc0,0x02]
; CHECK-NEXT: kmovw %k0, %eax # encoding: [0xc5,0xf8,0x93,0xc0]
; CHECK-NEXT: # kill: def $al killed $al killed $eax
; CHECK-NEXT: vzeroupper # encoding: [0xc5,0xf8,0x77]
; CHECK-NEXT: ret{{[l|q]}} # encoding: [0xc3]
%res = call i8 @llvm.x86.avx512.mask.fpclass.ps.256(<8 x float> %x0, i32 2, i8 -1)
%res1 = call i8 @llvm.x86.avx512.mask.fpclass.ps.256(<8 x float> %x0, i32 4, i8 %res)
ret i8 %res1
}
declare i8 @llvm.x86.avx512.mask.fpclass.pd.128(<2 x double>, i32, i8)
define i8 @test_int_x86_avx512_mask_fpclass_pd_128(<2 x double> %x0) {
; CHECK-LABEL: test_int_x86_avx512_mask_fpclass_pd_128:
; CHECK: # %bb.0:
; CHECK-NEXT: vfpclasspd $2, %xmm0, %k1 # encoding: [0x62,0xf3,0xfd,0x08,0x66,0xc8,0x02]
; CHECK-NEXT: vfpclasspd $4, %xmm0, %k0 {%k1} # encoding: [0x62,0xf3,0xfd,0x09,0x66,0xc0,0x04]
; CHECK-NEXT: kmovw %k0, %eax # encoding: [0xc5,0xf8,0x93,0xc0]
; CHECK-NEXT: # kill: def $al killed $al killed $eax
; CHECK-NEXT: ret{{[l|q]}} # encoding: [0xc3]
%res = call i8 @llvm.x86.avx512.mask.fpclass.pd.128(<2 x double> %x0, i32 4, i8 -1)
%res1 = call i8 @llvm.x86.avx512.mask.fpclass.pd.128(<2 x double> %x0, i32 2, i8 %res)
ret i8 %res1
}
declare i8 @llvm.x86.avx512.mask.fpclass.pd.256(<4 x double>, i32, i8)
define i8 @test_int_x86_avx512_mask_fpclass_pd_256(<4 x double> %x0) {
; CHECK-LABEL: test_int_x86_avx512_mask_fpclass_pd_256:
; CHECK: # %bb.0:
; CHECK-NEXT: vfpclasspd $4, %ymm0, %k1 # encoding: [0x62,0xf3,0xfd,0x28,0x66,0xc8,0x04]
; CHECK-NEXT: vfpclasspd $2, %ymm0, %k0 {%k1} # encoding: [0x62,0xf3,0xfd,0x29,0x66,0xc0,0x02]
; CHECK-NEXT: kmovw %k0, %eax # encoding: [0xc5,0xf8,0x93,0xc0]
; CHECK-NEXT: # kill: def $al killed $al killed $eax
; CHECK-NEXT: vzeroupper # encoding: [0xc5,0xf8,0x77]
; CHECK-NEXT: ret{{[l|q]}} # encoding: [0xc3]
%res = call i8 @llvm.x86.avx512.mask.fpclass.pd.256(<4 x double> %x0, i32 2, i8 -1)
%res1 = call i8 @llvm.x86.avx512.mask.fpclass.pd.256(<4 x double> %x0, i32 4, i8 %res)
ret i8 %res1
}

View File

@ -734,7 +734,7 @@ define <8 x float>@test_int_x86_avx512_mask_range_ps_256(<8 x float> %x0, <8 x f
ret <8 x float> %res2
}
declare i8 @llvm.x86.avx512.mask.fpclass.ps.128(<4 x float>, i32, i8)
declare <4 x i1> @llvm.x86.avx512.mask.fpclass.ps.128(<4 x float>, i32)
define i8 @test_int_x86_avx512_mask_fpclass_ps_128(<4 x float> %x0) {
; CHECK-LABEL: test_int_x86_avx512_mask_fpclass_ps_128:
@ -744,12 +744,15 @@ define i8 @test_int_x86_avx512_mask_fpclass_ps_128(<4 x float> %x0) {
; CHECK-NEXT: kmovw %k0, %eax # encoding: [0xc5,0xf8,0x93,0xc0]
; CHECK-NEXT: # kill: def $al killed $al killed $eax
; CHECK-NEXT: ret{{[l|q]}} # encoding: [0xc3]
%res = call i8 @llvm.x86.avx512.mask.fpclass.ps.128(<4 x float> %x0, i32 2, i8 -1)
%res1 = call i8 @llvm.x86.avx512.mask.fpclass.ps.128(<4 x float> %x0, i32 4, i8 %res)
ret i8 %res1
%res = call <4 x i1> @llvm.x86.avx512.mask.fpclass.ps.128(<4 x float> %x0, i32 2)
%res1 = call <4 x i1> @llvm.x86.avx512.mask.fpclass.ps.128(<4 x float> %x0, i32 4)
%1 = and <4 x i1> %res1, %res
%2 = shufflevector <4 x i1> %1, <4 x i1> zeroinitializer, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
%3 = bitcast <8 x i1> %2 to i8
ret i8 %3
}
declare i8 @llvm.x86.avx512.mask.fpclass.ps.256(<8 x float>, i32, i8)
declare <8 x i1> @llvm.x86.avx512.mask.fpclass.ps.256(<8 x float>, i32)
define i8 @test_int_x86_avx512_mask_fpclass_ps_256(<8 x float> %x0) {
; CHECK-LABEL: test_int_x86_avx512_mask_fpclass_ps_256:
@ -760,12 +763,14 @@ define i8 @test_int_x86_avx512_mask_fpclass_ps_256(<8 x float> %x0) {
; CHECK-NEXT: # kill: def $al killed $al killed $eax
; CHECK-NEXT: vzeroupper # encoding: [0xc5,0xf8,0x77]
; CHECK-NEXT: ret{{[l|q]}} # encoding: [0xc3]
%res = call i8 @llvm.x86.avx512.mask.fpclass.ps.256(<8 x float> %x0, i32 2, i8 -1)
%res1 = call i8 @llvm.x86.avx512.mask.fpclass.ps.256(<8 x float> %x0, i32 4, i8 %res)
ret i8 %res1
%res = call <8 x i1> @llvm.x86.avx512.mask.fpclass.ps.256(<8 x float> %x0, i32 2)
%res1 = call <8 x i1> @llvm.x86.avx512.mask.fpclass.ps.256(<8 x float> %x0, i32 4)
%1 = and <8 x i1> %res1, %res
%2 = bitcast <8 x i1> %1 to i8
ret i8 %2
}
declare i8 @llvm.x86.avx512.mask.fpclass.pd.128(<2 x double>, i32, i8)
declare <2 x i1> @llvm.x86.avx512.mask.fpclass.pd.128(<2 x double>, i32)
define i8 @test_int_x86_avx512_mask_fpclass_pd_128(<2 x double> %x0) {
; CHECK-LABEL: test_int_x86_avx512_mask_fpclass_pd_128:
@ -775,12 +780,15 @@ define i8 @test_int_x86_avx512_mask_fpclass_pd_128(<2 x double> %x0) {
; CHECK-NEXT: kmovw %k0, %eax # encoding: [0xc5,0xf8,0x93,0xc0]
; CHECK-NEXT: # kill: def $al killed $al killed $eax
; CHECK-NEXT: ret{{[l|q]}} # encoding: [0xc3]
%res = call i8 @llvm.x86.avx512.mask.fpclass.pd.128(<2 x double> %x0, i32 4, i8 -1)
%res1 = call i8 @llvm.x86.avx512.mask.fpclass.pd.128(<2 x double> %x0, i32 2, i8 %res)
ret i8 %res1
%res = call <2 x i1> @llvm.x86.avx512.mask.fpclass.pd.128(<2 x double> %x0, i32 4)
%res1 = call <2 x i1> @llvm.x86.avx512.mask.fpclass.pd.128(<2 x double> %x0, i32 2)
%1 = and <2 x i1> %res1, %res
%2 = shufflevector <2 x i1> %1, <2 x i1> zeroinitializer, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3>
%3 = bitcast <8 x i1> %2 to i8
ret i8 %3
}
declare i8 @llvm.x86.avx512.mask.fpclass.pd.256(<4 x double>, i32, i8)
declare <4 x i1> @llvm.x86.avx512.mask.fpclass.pd.256(<4 x double>, i32)
define i8 @test_int_x86_avx512_mask_fpclass_pd_256(<4 x double> %x0) {
; CHECK-LABEL: test_int_x86_avx512_mask_fpclass_pd_256:
@ -791,7 +799,10 @@ define i8 @test_int_x86_avx512_mask_fpclass_pd_256(<4 x double> %x0) {
; CHECK-NEXT: # kill: def $al killed $al killed $eax
; CHECK-NEXT: vzeroupper # encoding: [0xc5,0xf8,0x77]
; CHECK-NEXT: ret{{[l|q]}} # encoding: [0xc3]
%res = call i8 @llvm.x86.avx512.mask.fpclass.pd.256(<4 x double> %x0, i32 2, i8 -1)
%res1 = call i8 @llvm.x86.avx512.mask.fpclass.pd.256(<4 x double> %x0, i32 4, i8 %res)
ret i8 %res1
%res = call <4 x i1> @llvm.x86.avx512.mask.fpclass.pd.256(<4 x double> %x0, i32 2)
%res1 = call <4 x i1> @llvm.x86.avx512.mask.fpclass.pd.256(<4 x double> %x0, i32 4)
%1 = and <4 x i1> %res1, %res
%2 = shufflevector <4 x i1> %1, <4 x i1> zeroinitializer, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
%3 = bitcast <8 x i1> %2 to i8
ret i8 %3
}