[X86] Add fast isel tests for some of the avx512 truncate intrinsics to match current clang codegen.

llvm-svn: 332326
This commit is contained in:
Craig Topper 2018-05-15 04:26:27 +00:00
parent 9f62b4c8a8
commit fadf8b8dec
4 changed files with 451 additions and 0 deletions

View File

@ -2169,5 +2169,190 @@ entry:
ret <8 x double> %1
}
define <2 x i64> @test_mm512_cvtepi32_epi8(<8 x i64> %__A) {
; X32-LABEL: test_mm512_cvtepi32_epi8:
; X32: # %bb.0: # %entry
; X32-NEXT: vpmovdb %zmm0, %xmm0
; X32-NEXT: vzeroupper
; X32-NEXT: retl
;
; X64-LABEL: test_mm512_cvtepi32_epi8:
; X64: # %bb.0: # %entry
; X64-NEXT: vpmovdb %zmm0, %xmm0
; X64-NEXT: vzeroupper
; X64-NEXT: retq
entry:
%0 = bitcast <8 x i64> %__A to <16 x i32>
%conv.i = trunc <16 x i32> %0 to <16 x i8>
%1 = bitcast <16 x i8> %conv.i to <2 x i64>
ret <2 x i64> %1
}
define <2 x i64> @test_mm512_mask_cvtepi32_epi8(<2 x i64> %__O, i16 zeroext %__M, <8 x i64> %__A) {
; X32-LABEL: test_mm512_mask_cvtepi32_epi8:
; X32: # %bb.0: # %entry
; X32-NEXT: kmovw {{[0-9]+}}(%esp), %k1
; X32-NEXT: vpmovdb %zmm1, %xmm0 {%k1}
; X32-NEXT: vzeroupper
; X32-NEXT: retl
;
; X64-LABEL: test_mm512_mask_cvtepi32_epi8:
; X64: # %bb.0: # %entry
; X64-NEXT: kmovw %edi, %k1
; X64-NEXT: vpmovdb %zmm1, %xmm0 {%k1}
; X64-NEXT: vzeroupper
; X64-NEXT: retq
entry:
%0 = bitcast <8 x i64> %__A to <16 x i32>
%1 = bitcast <2 x i64> %__O to <16 x i8>
%2 = tail call <16 x i8> @llvm.x86.avx512.mask.pmov.db.512(<16 x i32> %0, <16 x i8> %1, i16 %__M)
%3 = bitcast <16 x i8> %2 to <2 x i64>
ret <2 x i64> %3
}
define <2 x i64> @test_mm512_maskz_cvtepi32_epi8(i16 zeroext %__M, <8 x i64> %__A) {
; X32-LABEL: test_mm512_maskz_cvtepi32_epi8:
; X32: # %bb.0: # %entry
; X32-NEXT: kmovw {{[0-9]+}}(%esp), %k1
; X32-NEXT: vpmovdb %zmm0, %xmm0 {%k1} {z}
; X32-NEXT: vzeroupper
; X32-NEXT: retl
;
; X64-LABEL: test_mm512_maskz_cvtepi32_epi8:
; X64: # %bb.0: # %entry
; X64-NEXT: kmovw %edi, %k1
; X64-NEXT: vpmovdb %zmm0, %xmm0 {%k1} {z}
; X64-NEXT: vzeroupper
; X64-NEXT: retq
entry:
%0 = bitcast <8 x i64> %__A to <16 x i32>
%1 = tail call <16 x i8> @llvm.x86.avx512.mask.pmov.db.512(<16 x i32> %0, <16 x i8> zeroinitializer, i16 %__M)
%2 = bitcast <16 x i8> %1 to <2 x i64>
ret <2 x i64> %2
}
define <4 x i64> @test_mm512_cvtepi64_epi32(<8 x i64> %__A) {
; X32-LABEL: test_mm512_cvtepi64_epi32:
; X32: # %bb.0: # %entry
; X32-NEXT: vpmovqd %zmm0, %ymm0
; X32-NEXT: retl
;
; X64-LABEL: test_mm512_cvtepi64_epi32:
; X64: # %bb.0: # %entry
; X64-NEXT: vpmovqd %zmm0, %ymm0
; X64-NEXT: retq
entry:
%conv.i = trunc <8 x i64> %__A to <8 x i32>
%0 = bitcast <8 x i32> %conv.i to <4 x i64>
ret <4 x i64> %0
}
define <4 x i64> @test_mm512_mask_cvtepi64_epi32(<4 x i64> %__O, i8 zeroext %__M, <8 x i64> %__A) {
; X32-LABEL: test_mm512_mask_cvtepi64_epi32:
; X32: # %bb.0: # %entry
; X32-NEXT: movb {{[0-9]+}}(%esp), %al
; X32-NEXT: kmovw %eax, %k1
; X32-NEXT: vpmovqd %zmm1, %ymm0 {%k1}
; X32-NEXT: retl
;
; X64-LABEL: test_mm512_mask_cvtepi64_epi32:
; X64: # %bb.0: # %entry
; X64-NEXT: kmovw %edi, %k1
; X64-NEXT: vpmovqd %zmm1, %ymm0 {%k1}
; X64-NEXT: retq
entry:
%conv.i.i = trunc <8 x i64> %__A to <8 x i32>
%0 = bitcast <4 x i64> %__O to <8 x i32>
%1 = bitcast i8 %__M to <8 x i1>
%2 = select <8 x i1> %1, <8 x i32> %conv.i.i, <8 x i32> %0
%3 = bitcast <8 x i32> %2 to <4 x i64>
ret <4 x i64> %3
}
define <4 x i64> @test_mm512_maskz_cvtepi64_epi32(i8 zeroext %__M, <8 x i64> %__A) {
; X32-LABEL: test_mm512_maskz_cvtepi64_epi32:
; X32: # %bb.0: # %entry
; X32-NEXT: movb {{[0-9]+}}(%esp), %al
; X32-NEXT: kmovw %eax, %k1
; X32-NEXT: vpmovqd %zmm0, %ymm0 {%k1} {z}
; X32-NEXT: retl
;
; X64-LABEL: test_mm512_maskz_cvtepi64_epi32:
; X64: # %bb.0: # %entry
; X64-NEXT: kmovw %edi, %k1
; X64-NEXT: vpmovqd %zmm0, %ymm0 {%k1} {z}
; X64-NEXT: retq
entry:
%conv.i.i = trunc <8 x i64> %__A to <8 x i32>
%0 = bitcast i8 %__M to <8 x i1>
%1 = select <8 x i1> %0, <8 x i32> %conv.i.i, <8 x i32> zeroinitializer
%2 = bitcast <8 x i32> %1 to <4 x i64>
ret <4 x i64> %2
}
define <2 x i64> @test_mm512_cvtepi64_epi16(<8 x i64> %__A) {
; X32-LABEL: test_mm512_cvtepi64_epi16:
; X32: # %bb.0: # %entry
; X32-NEXT: vpmovqw %zmm0, %xmm0
; X32-NEXT: vzeroupper
; X32-NEXT: retl
;
; X64-LABEL: test_mm512_cvtepi64_epi16:
; X64: # %bb.0: # %entry
; X64-NEXT: vpmovqw %zmm0, %xmm0
; X64-NEXT: vzeroupper
; X64-NEXT: retq
entry:
%conv.i = trunc <8 x i64> %__A to <8 x i16>
%0 = bitcast <8 x i16> %conv.i to <2 x i64>
ret <2 x i64> %0
}
define <2 x i64> @test_mm512_mask_cvtepi64_epi16(<2 x i64> %__O, i8 zeroext %__M, <8 x i64> %__A) {
; X32-LABEL: test_mm512_mask_cvtepi64_epi16:
; X32: # %bb.0: # %entry
; X32-NEXT: movb {{[0-9]+}}(%esp), %al
; X32-NEXT: kmovw %eax, %k1
; X32-NEXT: vpmovqw %zmm1, %xmm0 {%k1}
; X32-NEXT: vzeroupper
; X32-NEXT: retl
;
; X64-LABEL: test_mm512_mask_cvtepi64_epi16:
; X64: # %bb.0: # %entry
; X64-NEXT: kmovw %edi, %k1
; X64-NEXT: vpmovqw %zmm1, %xmm0 {%k1}
; X64-NEXT: vzeroupper
; X64-NEXT: retq
entry:
%0 = bitcast <2 x i64> %__O to <8 x i16>
%1 = tail call <8 x i16> @llvm.x86.avx512.mask.pmov.qw.512(<8 x i64> %__A, <8 x i16> %0, i8 %__M)
%2 = bitcast <8 x i16> %1 to <2 x i64>
ret <2 x i64> %2
}
define <2 x i64> @test_mm512_maskz_cvtepi64_epi16(i8 zeroext %__M, <8 x i64> %__A) {
; X32-LABEL: test_mm512_maskz_cvtepi64_epi16:
; X32: # %bb.0: # %entry
; X32-NEXT: movb {{[0-9]+}}(%esp), %al
; X32-NEXT: kmovw %eax, %k1
; X32-NEXT: vpmovqw %zmm0, %xmm0 {%k1} {z}
; X32-NEXT: vzeroupper
; X32-NEXT: retl
;
; X64-LABEL: test_mm512_maskz_cvtepi64_epi16:
; X64: # %bb.0: # %entry
; X64-NEXT: kmovw %edi, %k1
; X64-NEXT: vpmovqw %zmm0, %xmm0 {%k1} {z}
; X64-NEXT: vzeroupper
; X64-NEXT: retq
entry:
%0 = tail call <8 x i16> @llvm.x86.avx512.mask.pmov.qw.512(<8 x i64> %__A, <8 x i16> zeroinitializer, i8 %__M)
%1 = bitcast <8 x i16> %0 to <2 x i64>
ret <2 x i64> %1
}
declare <16 x i8> @llvm.x86.avx512.mask.pmov.db.512(<16 x i32>, <16 x i8>, i16)
declare <8 x i16> @llvm.x86.avx512.mask.pmov.qw.512(<8 x i64>, <8 x i16>, i8)
!0 = !{i32 1}

View File

@ -801,5 +801,65 @@ entry:
ret i32 %4
}
define <4 x i64> @test_mm512_cvtepi16_epi8(<8 x i64> %__A) {
; X32-LABEL: test_mm512_cvtepi16_epi8:
; X32: # %bb.0: # %entry
; X32-NEXT: vpmovwb %zmm0, %ymm0
; X32-NEXT: retl
;
; X64-LABEL: test_mm512_cvtepi16_epi8:
; X64: # %bb.0: # %entry
; X64-NEXT: vpmovwb %zmm0, %ymm0
; X64-NEXT: retq
entry:
%0 = bitcast <8 x i64> %__A to <32 x i16>
%conv.i = trunc <32 x i16> %0 to <32 x i8>
%1 = bitcast <32 x i8> %conv.i to <4 x i64>
ret <4 x i64> %1
}
define <4 x i64> @test_mm512_mask_cvtepi16_epi8(<4 x i64> %__O, i32 %__M, <8 x i64> %__A) {
; X32-LABEL: test_mm512_mask_cvtepi16_epi8:
; X32: # %bb.0: # %entry
; X32-NEXT: kmovd {{[0-9]+}}(%esp), %k1
; X32-NEXT: vpmovwb %zmm1, %ymm0 {%k1}
; X32-NEXT: retl
;
; X64-LABEL: test_mm512_mask_cvtepi16_epi8:
; X64: # %bb.0: # %entry
; X64-NEXT: kmovd %edi, %k1
; X64-NEXT: vpmovwb %zmm1, %ymm0 {%k1}
; X64-NEXT: retq
entry:
%0 = bitcast <8 x i64> %__A to <32 x i16>
%conv.i.i = trunc <32 x i16> %0 to <32 x i8>
%1 = bitcast <4 x i64> %__O to <32 x i8>
%2 = bitcast i32 %__M to <32 x i1>
%3 = select <32 x i1> %2, <32 x i8> %conv.i.i, <32 x i8> %1
%4 = bitcast <32 x i8> %3 to <4 x i64>
ret <4 x i64> %4
}
define <4 x i64> @test_mm512_maskz_cvtepi16_epi8(i32 %__M, <8 x i64> %__A) {
; X32-LABEL: test_mm512_maskz_cvtepi16_epi8:
; X32: # %bb.0: # %entry
; X32-NEXT: kmovd {{[0-9]+}}(%esp), %k1
; X32-NEXT: vpmovwb %zmm0, %ymm0 {%k1} {z}
; X32-NEXT: retl
;
; X64-LABEL: test_mm512_maskz_cvtepi16_epi8:
; X64: # %bb.0: # %entry
; X64-NEXT: kmovd %edi, %k1
; X64-NEXT: vpmovwb %zmm0, %ymm0 {%k1} {z}
; X64-NEXT: retq
entry:
%0 = bitcast <8 x i64> %__A to <32 x i16>
%conv.i.i = trunc <32 x i16> %0 to <32 x i8>
%1 = bitcast i32 %__M to <32 x i1>
%2 = select <32 x i1> %1, <32 x i8> %conv.i.i, <32 x i8> zeroinitializer
%3 = bitcast <32 x i8> %2 to <4 x i64>
ret <4 x i64> %3
}
!0 = !{i32 1}

View File

@ -811,5 +811,79 @@ define <4 x i64> @test_mm256_maskz_broadcastw_epi16(i16 %a0, <2 x i64> %a1) {
ret <4 x i64> %res2
}
define <2 x i64> @test_mm256_cvtepi16_epi8(<4 x i64> %__A) {
; X32-LABEL: test_mm256_cvtepi16_epi8:
; X32: # %bb.0: # %entry
; X32-NEXT: vpmovwb %ymm0, %xmm0
; X32-NEXT: vzeroupper
; X32-NEXT: retl
;
; X64-LABEL: test_mm256_cvtepi16_epi8:
; X64: # %bb.0: # %entry
; X64-NEXT: vpmovwb %ymm0, %xmm0
; X64-NEXT: vzeroupper
; X64-NEXT: retq
entry:
%0 = bitcast <4 x i64> %__A to <16 x i16>
%conv.i = trunc <16 x i16> %0 to <16 x i8>
%1 = bitcast <16 x i8> %conv.i to <2 x i64>
ret <2 x i64> %1
}
define <2 x i64> @test_mm256_mask_cvtepi16_epi8(<2 x i64> %__O, i16 zeroext %__M, <4 x i64> %__A) {
; X32-LABEL: test_mm256_mask_cvtepi16_epi8:
; X32: # %bb.0: # %entry
; X32-NEXT: movzwl {{[0-9]+}}(%esp), %eax
; X32-NEXT: movzbl %al, %eax
; X32-NEXT: kmovd %eax, %k1
; X32-NEXT: vpmovwb %ymm1, %xmm0 {%k1}
; X32-NEXT: vzeroupper
; X32-NEXT: retl
;
; X64-LABEL: test_mm256_mask_cvtepi16_epi8:
; X64: # %bb.0: # %entry
; X64-NEXT: movzbl %dil, %eax
; X64-NEXT: kmovd %eax, %k1
; X64-NEXT: vpmovwb %ymm1, %xmm0 {%k1}
; X64-NEXT: vzeroupper
; X64-NEXT: retq
entry:
%conv1.i = and i16 %__M, 255
%0 = bitcast <4 x i64> %__A to <16 x i16>
%conv.i.i = trunc <16 x i16> %0 to <16 x i8>
%1 = bitcast <2 x i64> %__O to <16 x i8>
%2 = bitcast i16 %conv1.i to <16 x i1>
%3 = select <16 x i1> %2, <16 x i8> %conv.i.i, <16 x i8> %1
%4 = bitcast <16 x i8> %3 to <2 x i64>
ret <2 x i64> %4
}
define <2 x i64> @test_mm256_maskz_cvtepi16_epi8(i16 zeroext %__M, <4 x i64> %__A) {
; X32-LABEL: test_mm256_maskz_cvtepi16_epi8:
; X32: # %bb.0: # %entry
; X32-NEXT: movzwl {{[0-9]+}}(%esp), %eax
; X32-NEXT: movzbl %al, %eax
; X32-NEXT: kmovd %eax, %k1
; X32-NEXT: vpmovwb %ymm0, %xmm0 {%k1} {z}
; X32-NEXT: vzeroupper
; X32-NEXT: retl
;
; X64-LABEL: test_mm256_maskz_cvtepi16_epi8:
; X64: # %bb.0: # %entry
; X64-NEXT: movzbl %dil, %eax
; X64-NEXT: kmovd %eax, %k1
; X64-NEXT: vpmovwb %ymm0, %xmm0 {%k1} {z}
; X64-NEXT: vzeroupper
; X64-NEXT: retq
entry:
%conv1.i = and i16 %__M, 255
%0 = bitcast <4 x i64> %__A to <16 x i16>
%conv.i.i = trunc <16 x i16> %0 to <16 x i8>
%1 = bitcast i16 %conv1.i to <16 x i1>
%2 = select <16 x i1> %1, <16 x i8> %conv.i.i, <16 x i8> zeroinitializer
%3 = bitcast <16 x i8> %2 to <2 x i64>
ret <2 x i64> %3
}
!0 = !{i32 1}

View File

@ -3404,6 +3404,137 @@ entry:
ret <2 x i64> %tmp4
}
define <2 x i64> @test_mm256_cvtepi32_epi16(<4 x i64> %__A) local_unnamed_addr #0 {
; X32-LABEL: test_mm256_cvtepi32_epi16:
; X32: # %bb.0: # %entry
; X32-NEXT: vpmovdw %ymm0, %xmm0
; X32-NEXT: vzeroupper
; X32-NEXT: retl
;
; X64-LABEL: test_mm256_cvtepi32_epi16:
; X64: # %bb.0: # %entry
; X64-NEXT: vpmovdw %ymm0, %xmm0
; X64-NEXT: vzeroupper
; X64-NEXT: retq
entry:
%0 = bitcast <4 x i64> %__A to <8 x i32>
%conv.i = trunc <8 x i32> %0 to <8 x i16>
%1 = bitcast <8 x i16> %conv.i to <2 x i64>
ret <2 x i64> %1
}
define <2 x i64> @test_mm256_mask_cvtepi32_epi16(<2 x i64> %__O, i8 zeroext %__M, <4 x i64> %__A) {
; X32-LABEL: test_mm256_mask_cvtepi32_epi16:
; X32: # %bb.0: # %entry
; X32-NEXT: movb {{[0-9]+}}(%esp), %al
; X32-NEXT: kmovw %eax, %k1
; X32-NEXT: vpmovdw %ymm1, %xmm0 {%k1}
; X32-NEXT: vzeroupper
; X32-NEXT: retl
;
; X64-LABEL: test_mm256_mask_cvtepi32_epi16:
; X64: # %bb.0: # %entry
; X64-NEXT: kmovw %edi, %k1
; X64-NEXT: vpmovdw %ymm1, %xmm0 {%k1}
; X64-NEXT: vzeroupper
; X64-NEXT: retq
entry:
%0 = bitcast <4 x i64> %__A to <8 x i32>
%1 = bitcast <2 x i64> %__O to <8 x i16>
%2 = tail call <8 x i16> @llvm.x86.avx512.mask.pmov.dw.256(<8 x i32> %0, <8 x i16> %1, i8 %__M)
%3 = bitcast <8 x i16> %2 to <2 x i64>
ret <2 x i64> %3
}
define <2 x i64> @test_mm256_maskz_cvtepi32_epi16(i8 zeroext %__M, <4 x i64> %__A) {
; X32-LABEL: test_mm256_maskz_cvtepi32_epi16:
; X32: # %bb.0: # %entry
; X32-NEXT: movb {{[0-9]+}}(%esp), %al
; X32-NEXT: kmovw %eax, %k1
; X32-NEXT: vpmovdw %ymm0, %xmm0 {%k1} {z}
; X32-NEXT: vzeroupper
; X32-NEXT: retl
;
; X64-LABEL: test_mm256_maskz_cvtepi32_epi16:
; X64: # %bb.0: # %entry
; X64-NEXT: kmovw %edi, %k1
; X64-NEXT: vpmovdw %ymm0, %xmm0 {%k1} {z}
; X64-NEXT: vzeroupper
; X64-NEXT: retq
entry:
%0 = bitcast <4 x i64> %__A to <8 x i32>
%1 = tail call <8 x i16> @llvm.x86.avx512.mask.pmov.dw.256(<8 x i32> %0, <8 x i16> zeroinitializer, i8 %__M)
%2 = bitcast <8 x i16> %1 to <2 x i64>
ret <2 x i64> %2
}
define <2 x i64> @test_mm256_cvtepi64_epi32(<4 x i64> %__A) local_unnamed_addr #0 {
; X32-LABEL: test_mm256_cvtepi64_epi32:
; X32: # %bb.0: # %entry
; X32-NEXT: vpmovqd %ymm0, %xmm0
; X32-NEXT: vzeroupper
; X32-NEXT: retl
;
; X64-LABEL: test_mm256_cvtepi64_epi32:
; X64: # %bb.0: # %entry
; X64-NEXT: vpmovqd %ymm0, %xmm0
; X64-NEXT: vzeroupper
; X64-NEXT: retq
entry:
%conv.i = trunc <4 x i64> %__A to <4 x i32>
%0 = bitcast <4 x i32> %conv.i to <2 x i64>
ret <2 x i64> %0
}
define <2 x i64> @test_mm256_mask_cvtepi64_epi32(<2 x i64> %__O, i8 zeroext %__M, <4 x i64> %__A) {
; X32-LABEL: test_mm256_mask_cvtepi64_epi32:
; X32: # %bb.0: # %entry
; X32-NEXT: movb {{[0-9]+}}(%esp), %al
; X32-NEXT: kmovw %eax, %k1
; X32-NEXT: vpmovqd %ymm1, %xmm0 {%k1}
; X32-NEXT: vzeroupper
; X32-NEXT: retl
;
; X64-LABEL: test_mm256_mask_cvtepi64_epi32:
; X64: # %bb.0: # %entry
; X64-NEXT: kmovw %edi, %k1
; X64-NEXT: vpmovqd %ymm1, %xmm0 {%k1}
; X64-NEXT: vzeroupper
; X64-NEXT: retq
entry:
%conv.i.i = trunc <4 x i64> %__A to <4 x i32>
%0 = bitcast <2 x i64> %__O to <4 x i32>
%1 = bitcast i8 %__M to <8 x i1>
%extract.i = shufflevector <8 x i1> %1, <8 x i1> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
%2 = select <4 x i1> %extract.i, <4 x i32> %conv.i.i, <4 x i32> %0
%3 = bitcast <4 x i32> %2 to <2 x i64>
ret <2 x i64> %3
}
define <2 x i64> @test_mm256_maskz_cvtepi64_epi32(i8 zeroext %__M, <4 x i64> %__A) {
; X32-LABEL: test_mm256_maskz_cvtepi64_epi32:
; X32: # %bb.0: # %entry
; X32-NEXT: movb {{[0-9]+}}(%esp), %al
; X32-NEXT: kmovw %eax, %k1
; X32-NEXT: vpmovqd %ymm0, %xmm0 {%k1} {z}
; X32-NEXT: vzeroupper
; X32-NEXT: retl
;
; X64-LABEL: test_mm256_maskz_cvtepi64_epi32:
; X64: # %bb.0: # %entry
; X64-NEXT: kmovw %edi, %k1
; X64-NEXT: vpmovqd %ymm0, %xmm0 {%k1} {z}
; X64-NEXT: vzeroupper
; X64-NEXT: retq
entry:
%conv.i.i = trunc <4 x i64> %__A to <4 x i32>
%0 = bitcast i8 %__M to <8 x i1>
%extract.i = shufflevector <8 x i1> %0, <8 x i1> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
%1 = select <4 x i1> %extract.i, <4 x i32> %conv.i.i, <4 x i32> zeroinitializer
%2 = bitcast <4 x i32> %1 to <2 x i64>
ret <2 x i64> %2
}
declare <4 x float> @llvm.x86.sse2.cvtdq2ps(<4 x i32>)
declare <8 x float> @llvm.x86.avx.cvtdq2.ps.256(<8 x i32>)
declare <4 x i32> @llvm.x86.avx512.mask.cvtpd2dq.128(<2 x double>, <4 x i32>, i8)
@ -3426,5 +3557,6 @@ declare <4 x i32> @llvm.x86.avx512.mask.cvttps2udq.128(<4 x float>, <4 x i32>, i
declare <8 x i32> @llvm.x86.avx512.mask.cvttps2udq.256(<8 x float>, <8 x i32>, i8)
declare <4 x float> @llvm.x86.avx512.mask.cvtudq2ps.128(<4 x i32>, <4 x float>, i8)
declare <8 x float> @llvm.x86.avx512.mask.cvtudq2ps.256(<8 x i32>, <8 x float>, i8)
declare <8 x i16> @llvm.x86.avx512.mask.pmov.dw.256(<8 x i32>, <8 x i16>, i8)
!0 = !{i32 1}