forked from OSchip/llvm-project
[X86] Add expandload and compresstore fast-isel tests for avx512f and avx512vl. Update existing tests for avx512vbmi2 to use target independent intrinsics.
llvm-svn: 334368
This commit is contained in:
parent
15bee8f1c0
commit
304bd747af
|
@ -5666,10 +5666,276 @@ entry:
|
|||
ret <2 x double> %0
|
||||
}
|
||||
|
||||
define <8 x i64> @test_mm512_mask_expandloadu_epi64(<8 x i64> %__W, i8 zeroext %__U, i8* readonly %__P) {
|
||||
; X86-LABEL: test_mm512_mask_expandloadu_epi64:
|
||||
; X86: # %bb.0: # %entry
|
||||
; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
|
||||
; X86-NEXT: movb {{[0-9]+}}(%esp), %cl
|
||||
; X86-NEXT: kmovw %ecx, %k1
|
||||
; X86-NEXT: vpexpandq (%eax), %zmm0 {%k1}
|
||||
; X86-NEXT: retl
|
||||
;
|
||||
; X64-LABEL: test_mm512_mask_expandloadu_epi64:
|
||||
; X64: # %bb.0: # %entry
|
||||
; X64-NEXT: kmovw %edi, %k1
|
||||
; X64-NEXT: vpexpandq (%rsi), %zmm0 {%k1}
|
||||
; X64-NEXT: retq
|
||||
entry:
|
||||
%0 = bitcast i8* %__P to i64*
|
||||
%1 = bitcast i8 %__U to <8 x i1>
|
||||
%2 = tail call <8 x i64> @llvm.masked.expandload.v8i64(i64* %0, <8 x i1> %1, <8 x i64> %__W)
|
||||
ret <8 x i64> %2
|
||||
}
|
||||
|
||||
define <8 x i64> @test_mm512_maskz_expandloadu_epi64(i8 zeroext %__U, i8* readonly %__P) {
|
||||
; X86-LABEL: test_mm512_maskz_expandloadu_epi64:
|
||||
; X86: # %bb.0: # %entry
|
||||
; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
|
||||
; X86-NEXT: movb {{[0-9]+}}(%esp), %cl
|
||||
; X86-NEXT: kmovw %ecx, %k1
|
||||
; X86-NEXT: vpexpandq (%eax), %zmm0 {%k1} {z}
|
||||
; X86-NEXT: retl
|
||||
;
|
||||
; X64-LABEL: test_mm512_maskz_expandloadu_epi64:
|
||||
; X64: # %bb.0: # %entry
|
||||
; X64-NEXT: kmovw %edi, %k1
|
||||
; X64-NEXT: vpexpandq (%rsi), %zmm0 {%k1} {z}
|
||||
; X64-NEXT: retq
|
||||
entry:
|
||||
%0 = bitcast i8* %__P to i64*
|
||||
%1 = bitcast i8 %__U to <8 x i1>
|
||||
%2 = tail call <8 x i64> @llvm.masked.expandload.v8i64(i64* %0, <8 x i1> %1, <8 x i64> zeroinitializer)
|
||||
ret <8 x i64> %2
|
||||
}
|
||||
|
||||
define <8 x double> @test_mm512_mask_expandloadu_pd(<8 x double> %__W, i8 zeroext %__U, i8* readonly %__P) {
|
||||
; X86-LABEL: test_mm512_mask_expandloadu_pd:
|
||||
; X86: # %bb.0: # %entry
|
||||
; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
|
||||
; X86-NEXT: movb {{[0-9]+}}(%esp), %cl
|
||||
; X86-NEXT: kmovw %ecx, %k1
|
||||
; X86-NEXT: vexpandpd (%eax), %zmm0 {%k1}
|
||||
; X86-NEXT: retl
|
||||
;
|
||||
; X64-LABEL: test_mm512_mask_expandloadu_pd:
|
||||
; X64: # %bb.0: # %entry
|
||||
; X64-NEXT: kmovw %edi, %k1
|
||||
; X64-NEXT: vexpandpd (%rsi), %zmm0 {%k1}
|
||||
; X64-NEXT: retq
|
||||
entry:
|
||||
%0 = bitcast i8* %__P to double*
|
||||
%1 = bitcast i8 %__U to <8 x i1>
|
||||
%2 = tail call <8 x double> @llvm.masked.expandload.v8f64(double* %0, <8 x i1> %1, <8 x double> %__W)
|
||||
ret <8 x double> %2
|
||||
}
|
||||
|
||||
define <8 x double> @test_mm512_maskz_expandloadu_pd(i8 zeroext %__U, i8* readonly %__P) {
|
||||
; X86-LABEL: test_mm512_maskz_expandloadu_pd:
|
||||
; X86: # %bb.0: # %entry
|
||||
; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
|
||||
; X86-NEXT: movb {{[0-9]+}}(%esp), %cl
|
||||
; X86-NEXT: kmovw %ecx, %k1
|
||||
; X86-NEXT: vexpandpd (%eax), %zmm0 {%k1} {z}
|
||||
; X86-NEXT: retl
|
||||
;
|
||||
; X64-LABEL: test_mm512_maskz_expandloadu_pd:
|
||||
; X64: # %bb.0: # %entry
|
||||
; X64-NEXT: kmovw %edi, %k1
|
||||
; X64-NEXT: vexpandpd (%rsi), %zmm0 {%k1} {z}
|
||||
; X64-NEXT: retq
|
||||
entry:
|
||||
%0 = bitcast i8* %__P to double*
|
||||
%1 = bitcast i8 %__U to <8 x i1>
|
||||
%2 = tail call <8 x double> @llvm.masked.expandload.v8f64(double* %0, <8 x i1> %1, <8 x double> zeroinitializer)
|
||||
ret <8 x double> %2
|
||||
}
|
||||
|
||||
define <8 x i64> @test_mm512_mask_expandloadu_epi32(<8 x i64> %__W, i16 zeroext %__U, i8* readonly %__P) {
|
||||
; X86-LABEL: test_mm512_mask_expandloadu_epi32:
|
||||
; X86: # %bb.0: # %entry
|
||||
; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
|
||||
; X86-NEXT: kmovw {{[0-9]+}}(%esp), %k1
|
||||
; X86-NEXT: vpexpandd (%eax), %zmm0 {%k1}
|
||||
; X86-NEXT: retl
|
||||
;
|
||||
; X64-LABEL: test_mm512_mask_expandloadu_epi32:
|
||||
; X64: # %bb.0: # %entry
|
||||
; X64-NEXT: kmovw %edi, %k1
|
||||
; X64-NEXT: vpexpandd (%rsi), %zmm0 {%k1}
|
||||
; X64-NEXT: retq
|
||||
entry:
|
||||
%0 = bitcast <8 x i64> %__W to <16 x i32>
|
||||
%1 = bitcast i8* %__P to i32*
|
||||
%2 = bitcast i16 %__U to <16 x i1>
|
||||
%3 = tail call <16 x i32> @llvm.masked.expandload.v16i32(i32* %1, <16 x i1> %2, <16 x i32> %0) #11
|
||||
%4 = bitcast <16 x i32> %3 to <8 x i64>
|
||||
ret <8 x i64> %4
|
||||
}
|
||||
|
||||
define <8 x i64> @test_mm512_maskz_expandloadu_epi32(i16 zeroext %__U, i8* readonly %__P) {
|
||||
; X86-LABEL: test_mm512_maskz_expandloadu_epi32:
|
||||
; X86: # %bb.0: # %entry
|
||||
; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
|
||||
; X86-NEXT: kmovw {{[0-9]+}}(%esp), %k1
|
||||
; X86-NEXT: vpexpandd (%eax), %zmm0 {%k1} {z}
|
||||
; X86-NEXT: retl
|
||||
;
|
||||
; X64-LABEL: test_mm512_maskz_expandloadu_epi32:
|
||||
; X64: # %bb.0: # %entry
|
||||
; X64-NEXT: kmovw %edi, %k1
|
||||
; X64-NEXT: vpexpandd (%rsi), %zmm0 {%k1} {z}
|
||||
; X64-NEXT: retq
|
||||
entry:
|
||||
%0 = bitcast i8* %__P to i32*
|
||||
%1 = bitcast i16 %__U to <16 x i1>
|
||||
%2 = tail call <16 x i32> @llvm.masked.expandload.v16i32(i32* %0, <16 x i1> %1, <16 x i32> zeroinitializer)
|
||||
%3 = bitcast <16 x i32> %2 to <8 x i64>
|
||||
ret <8 x i64> %3
|
||||
}
|
||||
|
||||
define <16 x float> @test_mm512_mask_expandloadu_ps(<16 x float> %__W, i16 zeroext %__U, i8* readonly %__P) {
|
||||
; X86-LABEL: test_mm512_mask_expandloadu_ps:
|
||||
; X86: # %bb.0: # %entry
|
||||
; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
|
||||
; X86-NEXT: kmovw {{[0-9]+}}(%esp), %k1
|
||||
; X86-NEXT: vexpandps (%eax), %zmm0 {%k1}
|
||||
; X86-NEXT: retl
|
||||
;
|
||||
; X64-LABEL: test_mm512_mask_expandloadu_ps:
|
||||
; X64: # %bb.0: # %entry
|
||||
; X64-NEXT: kmovw %edi, %k1
|
||||
; X64-NEXT: vexpandps (%rsi), %zmm0 {%k1}
|
||||
; X64-NEXT: retq
|
||||
entry:
|
||||
%0 = bitcast i8* %__P to float*
|
||||
%1 = bitcast i16 %__U to <16 x i1>
|
||||
%2 = tail call <16 x float> @llvm.masked.expandload.v16f32(float* %0, <16 x i1> %1, <16 x float> %__W) #11
|
||||
ret <16 x float> %2
|
||||
}
|
||||
|
||||
define <16 x float> @test_mm512_maskz_expandloadu_ps(i16 zeroext %__U, i8* readonly %__P) {
|
||||
; X86-LABEL: test_mm512_maskz_expandloadu_ps:
|
||||
; X86: # %bb.0: # %entry
|
||||
; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
|
||||
; X86-NEXT: kmovw {{[0-9]+}}(%esp), %k1
|
||||
; X86-NEXT: vexpandps (%eax), %zmm0 {%k1} {z}
|
||||
; X86-NEXT: retl
|
||||
;
|
||||
; X64-LABEL: test_mm512_maskz_expandloadu_ps:
|
||||
; X64: # %bb.0: # %entry
|
||||
; X64-NEXT: kmovw %edi, %k1
|
||||
; X64-NEXT: vexpandps (%rsi), %zmm0 {%k1} {z}
|
||||
; X64-NEXT: retq
|
||||
entry:
|
||||
%0 = bitcast i8* %__P to float*
|
||||
%1 = bitcast i16 %__U to <16 x i1>
|
||||
%2 = tail call <16 x float> @llvm.masked.expandload.v16f32(float* %0, <16 x i1> %1, <16 x float> zeroinitializer)
|
||||
ret <16 x float> %2
|
||||
}
|
||||
|
||||
define void @test_mm512_mask_compressstoreu_pd(i8* %__P, i8 zeroext %__U, <8 x double> %__A) {
|
||||
; X86-LABEL: test_mm512_mask_compressstoreu_pd:
|
||||
; X86: # %bb.0: # %entry
|
||||
; X86-NEXT: movb {{[0-9]+}}(%esp), %al
|
||||
; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx
|
||||
; X86-NEXT: kmovw %eax, %k1
|
||||
; X86-NEXT: vcompresspd %zmm0, (%ecx) {%k1}
|
||||
; X86-NEXT: vzeroupper
|
||||
; X86-NEXT: retl
|
||||
;
|
||||
; X64-LABEL: test_mm512_mask_compressstoreu_pd:
|
||||
; X64: # %bb.0: # %entry
|
||||
; X64-NEXT: kmovw %esi, %k1
|
||||
; X64-NEXT: vcompresspd %zmm0, (%rdi) {%k1}
|
||||
; X64-NEXT: vzeroupper
|
||||
; X64-NEXT: retq
|
||||
entry:
|
||||
%0 = bitcast i8* %__P to double*
|
||||
%1 = bitcast i8 %__U to <8 x i1>
|
||||
tail call void @llvm.masked.compressstore.v8f64(<8 x double> %__A, double* %0, <8 x i1> %1)
|
||||
ret void
|
||||
}
|
||||
|
||||
define void @test_mm512_mask_compressstoreu_epi64(i8* %__P, i8 zeroext %__U, <8 x i64> %__A) {
|
||||
; X86-LABEL: test_mm512_mask_compressstoreu_epi64:
|
||||
; X86: # %bb.0: # %entry
|
||||
; X86-NEXT: movb {{[0-9]+}}(%esp), %al
|
||||
; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx
|
||||
; X86-NEXT: kmovw %eax, %k1
|
||||
; X86-NEXT: vpcompressq %zmm0, (%ecx) {%k1}
|
||||
; X86-NEXT: vzeroupper
|
||||
; X86-NEXT: retl
|
||||
;
|
||||
; X64-LABEL: test_mm512_mask_compressstoreu_epi64:
|
||||
; X64: # %bb.0: # %entry
|
||||
; X64-NEXT: kmovw %esi, %k1
|
||||
; X64-NEXT: vpcompressq %zmm0, (%rdi) {%k1}
|
||||
; X64-NEXT: vzeroupper
|
||||
; X64-NEXT: retq
|
||||
entry:
|
||||
%0 = bitcast i8* %__P to i64*
|
||||
%1 = bitcast i8 %__U to <8 x i1>
|
||||
tail call void @llvm.masked.compressstore.v8i64(<8 x i64> %__A, i64* %0, <8 x i1> %1)
|
||||
ret void
|
||||
}
|
||||
|
||||
define void @test_mm512_mask_compressstoreu_ps(i8* %__P, i16 zeroext %__U, <16 x float> %__A) {
|
||||
; X86-LABEL: test_mm512_mask_compressstoreu_ps:
|
||||
; X86: # %bb.0: # %entry
|
||||
; X86-NEXT: kmovw {{[0-9]+}}(%esp), %k1
|
||||
; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
|
||||
; X86-NEXT: vcompressps %zmm0, (%eax) {%k1}
|
||||
; X86-NEXT: vzeroupper
|
||||
; X86-NEXT: retl
|
||||
;
|
||||
; X64-LABEL: test_mm512_mask_compressstoreu_ps:
|
||||
; X64: # %bb.0: # %entry
|
||||
; X64-NEXT: kmovw %esi, %k1
|
||||
; X64-NEXT: vcompressps %zmm0, (%rdi) {%k1}
|
||||
; X64-NEXT: vzeroupper
|
||||
; X64-NEXT: retq
|
||||
entry:
|
||||
%0 = bitcast i8* %__P to float*
|
||||
%1 = bitcast i16 %__U to <16 x i1>
|
||||
tail call void @llvm.masked.compressstore.v16f32(<16 x float> %__A, float* %0, <16 x i1> %1)
|
||||
ret void
|
||||
}
|
||||
|
||||
define void @test_mm512_mask_compressstoreu_epi32(i8* %__P, i16 zeroext %__U, <8 x i64> %__A) {
|
||||
; X86-LABEL: test_mm512_mask_compressstoreu_epi32:
|
||||
; X86: # %bb.0: # %entry
|
||||
; X86-NEXT: kmovw {{[0-9]+}}(%esp), %k1
|
||||
; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
|
||||
; X86-NEXT: vpcompressd %zmm0, (%eax) {%k1}
|
||||
; X86-NEXT: vzeroupper
|
||||
; X86-NEXT: retl
|
||||
;
|
||||
; X64-LABEL: test_mm512_mask_compressstoreu_epi32:
|
||||
; X64: # %bb.0: # %entry
|
||||
; X64-NEXT: kmovw %esi, %k1
|
||||
; X64-NEXT: vpcompressd %zmm0, (%rdi) {%k1}
|
||||
; X64-NEXT: vzeroupper
|
||||
; X64-NEXT: retq
|
||||
entry:
|
||||
%0 = bitcast <8 x i64> %__A to <16 x i32>
|
||||
%1 = bitcast i8* %__P to i32*
|
||||
%2 = bitcast i16 %__U to <16 x i1>
|
||||
tail call void @llvm.masked.compressstore.v16i32(<16 x i32> %0, i32* %1, <16 x i1> %2)
|
||||
ret void
|
||||
}
|
||||
|
||||
declare <8 x double> @llvm.fma.v8f64(<8 x double>, <8 x double>, <8 x double>) #9
|
||||
declare <16 x float> @llvm.fma.v16f32(<16 x float>, <16 x float>, <16 x float>) #9
|
||||
declare float @llvm.fma.f32(float, float, float) #9
|
||||
declare double @llvm.fma.f64(double, double, double) #9
|
||||
declare <8 x i64> @llvm.masked.expandload.v8i64(i64*, <8 x i1>, <8 x i64>)
|
||||
declare <8 x double> @llvm.masked.expandload.v8f64(double*, <8 x i1>, <8 x double>)
|
||||
declare <16 x i32> @llvm.masked.expandload.v16i32(i32*, <16 x i1>, <16 x i32>) #10
|
||||
declare <16 x float> @llvm.masked.expandload.v16f32(float*, <16 x i1>, <16 x float>)
|
||||
declare void @llvm.masked.compressstore.v8f64(<8 x double>, double*, <8 x i1>)
|
||||
declare void @llvm.masked.compressstore.v8i64(<8 x i64>, i64*, <8 x i1>)
|
||||
declare void @llvm.masked.compressstore.v16f32(<16 x float>, float*, <16 x i1>)
|
||||
declare void @llvm.masked.compressstore.v16i32(<16 x i32>, i32*, <16 x i1>)
|
||||
|
||||
!0 = !{i32 1}
|
||||
|
||||
|
|
|
@ -103,7 +103,9 @@ define void @test_mm512_mask_compressstoreu_epi16(i8* %__P, i32 %__U, <8 x i64>
|
|||
; X64-NEXT: retq
|
||||
entry:
|
||||
%0 = bitcast <8 x i64> %__D to <32 x i16>
|
||||
tail call void @llvm.x86.avx512.mask.compress.store.w.512(i8* %__P, <32 x i16> %0, i32 %__U)
|
||||
%1 = bitcast i8* %__P to i16*
|
||||
%2 = bitcast i32 %__U to <32 x i1>
|
||||
tail call void @llvm.masked.compressstore.v32i16(<32 x i16> %0, i16* %1, <32 x i1> %2)
|
||||
ret void
|
||||
}
|
||||
|
||||
|
@ -126,7 +128,8 @@ define void @test_mm512_mask_compressstoreu_epi8(i8* %__P, i64 %__U, <8 x i64> %
|
|||
; X64-NEXT: retq
|
||||
entry:
|
||||
%0 = bitcast <8 x i64> %__D to <64 x i8>
|
||||
tail call void @llvm.x86.avx512.mask.compress.store.b.512(i8* %__P, <64 x i8> %0, i64 %__U)
|
||||
%1 = bitcast i64 %__U to <64 x i1>
|
||||
tail call void @llvm.masked.compressstore.v64i8(<64 x i8> %0, i8* %__P, <64 x i1> %1)
|
||||
ret void
|
||||
}
|
||||
|
||||
|
@ -227,9 +230,11 @@ define <8 x i64> @test_mm512_mask_expandloadu_epi16(<8 x i64> %__S, i32 %__U, i8
|
|||
; X64-NEXT: retq
|
||||
entry:
|
||||
%0 = bitcast <8 x i64> %__S to <32 x i16>
|
||||
%1 = tail call <32 x i16> @llvm.x86.avx512.mask.expand.load.w.512(i8* %__P, <32 x i16> %0, i32 %__U)
|
||||
%2 = bitcast <32 x i16> %1 to <8 x i64>
|
||||
ret <8 x i64> %2
|
||||
%1 = bitcast i8* %__P to i16*
|
||||
%2 = bitcast i32 %__U to <32 x i1>
|
||||
%3 = tail call <32 x i16> @llvm.masked.expandload.v32i16(i16* %1, <32 x i1> %2, <32 x i16> %0)
|
||||
%4 = bitcast <32 x i16> %3 to <8 x i64>
|
||||
ret <8 x i64> %4
|
||||
}
|
||||
|
||||
define <8 x i64> @test_mm512_maskz_expandloadu_epi16(i32 %__U, i8* readonly %__P) {
|
||||
|
@ -246,9 +251,11 @@ define <8 x i64> @test_mm512_maskz_expandloadu_epi16(i32 %__U, i8* readonly %__P
|
|||
; X64-NEXT: vpexpandw (%rsi), %zmm0 {%k1} {z}
|
||||
; X64-NEXT: retq
|
||||
entry:
|
||||
%0 = tail call <32 x i16> @llvm.x86.avx512.mask.expand.load.w.512(i8* %__P, <32 x i16> zeroinitializer, i32 %__U)
|
||||
%1 = bitcast <32 x i16> %0 to <8 x i64>
|
||||
ret <8 x i64> %1
|
||||
%0 = bitcast i8* %__P to i16*
|
||||
%1 = bitcast i32 %__U to <32 x i1>
|
||||
%2 = tail call <32 x i16> @llvm.masked.expandload.v32i16(i16* %0, <32 x i1> %1, <32 x i16> zeroinitializer)
|
||||
%3 = bitcast <32 x i16> %2 to <8 x i64>
|
||||
ret <8 x i64> %3
|
||||
}
|
||||
|
||||
define <8 x i64> @test_mm512_mask_expandloadu_epi8(<8 x i64> %__S, i64 %__U, i8* readonly %__P) {
|
||||
|
@ -268,9 +275,10 @@ define <8 x i64> @test_mm512_mask_expandloadu_epi8(<8 x i64> %__S, i64 %__U, i8*
|
|||
; X64-NEXT: retq
|
||||
entry:
|
||||
%0 = bitcast <8 x i64> %__S to <64 x i8>
|
||||
%1 = tail call <64 x i8> @llvm.x86.avx512.mask.expand.load.b.512(i8* %__P, <64 x i8> %0, i64 %__U)
|
||||
%2 = bitcast <64 x i8> %1 to <8 x i64>
|
||||
ret <8 x i64> %2
|
||||
%1 = bitcast i64 %__U to <64 x i1>
|
||||
%2 = tail call <64 x i8> @llvm.masked.expandload.v64i8(i8* %__P, <64 x i1> %1, <64 x i8> %0)
|
||||
%3 = bitcast <64 x i8> %2 to <8 x i64>
|
||||
ret <8 x i64> %3
|
||||
}
|
||||
|
||||
define <8 x i64> @test_mm512_maskz_expandloadu_epi8(i64 %__U, i8* readonly %__P) {
|
||||
|
@ -289,9 +297,10 @@ define <8 x i64> @test_mm512_maskz_expandloadu_epi8(i64 %__U, i8* readonly %__P)
|
|||
; X64-NEXT: vpexpandb (%rsi), %zmm0 {%k1} {z}
|
||||
; X64-NEXT: retq
|
||||
entry:
|
||||
%0 = tail call <64 x i8> @llvm.x86.avx512.mask.expand.load.b.512(i8* %__P, <64 x i8> zeroinitializer, i64 %__U)
|
||||
%1 = bitcast <64 x i8> %0 to <8 x i64>
|
||||
ret <8 x i64> %1
|
||||
%0 = bitcast i64 %__U to <64 x i1>
|
||||
%1 = tail call <64 x i8> @llvm.masked.expandload.v64i8(i8* %__P, <64 x i1> %0, <64 x i8> zeroinitializer)
|
||||
%2 = bitcast <64 x i8> %1 to <8 x i64>
|
||||
ret <8 x i64> %2
|
||||
}
|
||||
|
||||
define <8 x i64> @test_mm512_mask_shldi_epi64(<8 x i64> %__S, i8 zeroext %__U, <8 x i64> %__A, <8 x i64> %__B) {
|
||||
|
@ -932,12 +941,12 @@ entry:
|
|||
|
||||
declare <32 x i16> @llvm.x86.avx512.mask.compress.w.512(<32 x i16>, <32 x i16>, i32)
|
||||
declare <64 x i8> @llvm.x86.avx512.mask.compress.b.512(<64 x i8>, <64 x i8>, i64)
|
||||
declare void @llvm.x86.avx512.mask.compress.store.w.512(i8*, <32 x i16>, i32)
|
||||
declare void @llvm.x86.avx512.mask.compress.store.b.512(i8*, <64 x i8>, i64)
|
||||
declare void @llvm.masked.compressstore.v32i16(<32 x i16>, i16*, <32 x i1>)
|
||||
declare void @llvm.masked.compressstore.v64i8(<64 x i8>, i8*, <64 x i1>)
|
||||
declare <32 x i16> @llvm.x86.avx512.mask.expand.w.512(<32 x i16>, <32 x i16>, i32)
|
||||
declare <64 x i8> @llvm.x86.avx512.mask.expand.b.512(<64 x i8>, <64 x i8>, i64)
|
||||
declare <32 x i16> @llvm.x86.avx512.mask.expand.load.w.512(i8*, <32 x i16>, i32)
|
||||
declare <64 x i8> @llvm.x86.avx512.mask.expand.load.b.512(i8*, <64 x i8>, i64)
|
||||
declare <32 x i16> @llvm.masked.expandload.v32i16(i16*, <32 x i1>, <32 x i16>)
|
||||
declare <64 x i8> @llvm.masked.expandload.v64i8(i8*, <64 x i1>, <64 x i8>)
|
||||
declare <8 x i64> @llvm.x86.avx512.mask.vpshldv.q.512(<8 x i64>, <8 x i64>, <8 x i64>, i8)
|
||||
declare <8 x i64> @llvm.x86.avx512.maskz.vpshldv.q.512(<8 x i64>, <8 x i64>, <8 x i64>, i8)
|
||||
declare <16 x i32> @llvm.x86.avx512.mask.vpshldv.d.512(<16 x i32>, <16 x i32>, <16 x i32>, i16)
|
||||
|
|
|
@ -100,7 +100,9 @@ define void @test_mm_mask_compressstoreu_epi16(i8* %__P, i8 zeroext %__U, <2 x i
|
|||
; X64-NEXT: retq
|
||||
entry:
|
||||
%0 = bitcast <2 x i64> %__D to <8 x i16>
|
||||
tail call void @llvm.x86.avx512.mask.compress.store.w.128(i8* %__P, <8 x i16> %0, i8 %__U)
|
||||
%1 = bitcast i8* %__P to i16*
|
||||
%2 = bitcast i8 %__U to <8 x i1>
|
||||
tail call void @llvm.masked.compressstore.v8i16(<8 x i16> %0, i16* %1, <8 x i1> %2)
|
||||
ret void
|
||||
}
|
||||
|
||||
|
@ -119,7 +121,8 @@ define void @test_mm_mask_compressstoreu_epi8(i8* %__P, i16 zeroext %__U, <2 x i
|
|||
; X64-NEXT: retq
|
||||
entry:
|
||||
%0 = bitcast <2 x i64> %__D to <16 x i8>
|
||||
tail call void @llvm.x86.avx512.mask.compress.store.b.128(i8* %__P, <16 x i8> %0, i16 %__U)
|
||||
%1 = bitcast i16 %__U to <16 x i1>
|
||||
tail call void @llvm.masked.compressstore.v16i8(<16 x i8> %0, i8* %__P, <16 x i1> %1)
|
||||
ret void
|
||||
}
|
||||
|
||||
|
@ -219,9 +222,11 @@ define <2 x i64> @test_mm_mask_expandloadu_epi16(<2 x i64> %__S, i8 zeroext %__U
|
|||
; X64-NEXT: retq
|
||||
entry:
|
||||
%0 = bitcast <2 x i64> %__S to <8 x i16>
|
||||
%1 = tail call <8 x i16> @llvm.x86.avx512.mask.expand.load.w.128(i8* %__P, <8 x i16> %0, i8 %__U)
|
||||
%2 = bitcast <8 x i16> %1 to <2 x i64>
|
||||
ret <2 x i64> %2
|
||||
%1 = bitcast i8* %__P to i16*
|
||||
%2 = bitcast i8 %__U to <8 x i1>
|
||||
%3 = tail call <8 x i16> @llvm.masked.expandload.v8i16(i16* %1, <8 x i1> %2, <8 x i16> %0)
|
||||
%4 = bitcast <8 x i16> %3 to <2 x i64>
|
||||
ret <2 x i64> %4
|
||||
}
|
||||
|
||||
define <2 x i64> @test_mm_maskz_expandloadu_epi16(i8 zeroext %__U, i8* readonly %__P) {
|
||||
|
@ -239,9 +244,11 @@ define <2 x i64> @test_mm_maskz_expandloadu_epi16(i8 zeroext %__U, i8* readonly
|
|||
; X64-NEXT: vpexpandw (%rsi), %xmm0 {%k1} {z}
|
||||
; X64-NEXT: retq
|
||||
entry:
|
||||
%0 = tail call <8 x i16> @llvm.x86.avx512.mask.expand.load.w.128(i8* %__P, <8 x i16> zeroinitializer, i8 %__U)
|
||||
%1 = bitcast <8 x i16> %0 to <2 x i64>
|
||||
ret <2 x i64> %1
|
||||
%0 = bitcast i8* %__P to i16*
|
||||
%1 = bitcast i8 %__U to <8 x i1>
|
||||
%2 = tail call <8 x i16> @llvm.masked.expandload.v8i16(i16* %0, <8 x i1> %1, <8 x i16> zeroinitializer)
|
||||
%3 = bitcast <8 x i16> %2 to <2 x i64>
|
||||
ret <2 x i64> %3
|
||||
}
|
||||
|
||||
define <2 x i64> @test_mm_mask_expandloadu_epi8(<2 x i64> %__S, i16 zeroext %__U, i8* readonly %__P) {
|
||||
|
@ -259,9 +266,10 @@ define <2 x i64> @test_mm_mask_expandloadu_epi8(<2 x i64> %__S, i16 zeroext %__U
|
|||
; X64-NEXT: retq
|
||||
entry:
|
||||
%0 = bitcast <2 x i64> %__S to <16 x i8>
|
||||
%1 = tail call <16 x i8> @llvm.x86.avx512.mask.expand.load.b.128(i8* %__P, <16 x i8> %0, i16 %__U)
|
||||
%2 = bitcast <16 x i8> %1 to <2 x i64>
|
||||
ret <2 x i64> %2
|
||||
%1 = bitcast i16 %__U to <16 x i1>
|
||||
%2 = tail call <16 x i8> @llvm.masked.expandload.v16i8(i8* %__P, <16 x i1> %1, <16 x i8> %0)
|
||||
%3 = bitcast <16 x i8> %2 to <2 x i64>
|
||||
ret <2 x i64> %3
|
||||
}
|
||||
|
||||
define <2 x i64> @test_mm_maskz_expandloadu_epi8(i16 zeroext %__U, i8* readonly %__P) {
|
||||
|
@ -278,9 +286,10 @@ define <2 x i64> @test_mm_maskz_expandloadu_epi8(i16 zeroext %__U, i8* readonly
|
|||
; X64-NEXT: vpexpandb (%rsi), %xmm0 {%k1} {z}
|
||||
; X64-NEXT: retq
|
||||
entry:
|
||||
%0 = tail call <16 x i8> @llvm.x86.avx512.mask.expand.load.b.128(i8* %__P, <16 x i8> zeroinitializer, i16 %__U)
|
||||
%1 = bitcast <16 x i8> %0 to <2 x i64>
|
||||
ret <2 x i64> %1
|
||||
%0 = bitcast i16 %__U to <16 x i1>
|
||||
%1 = tail call <16 x i8> @llvm.masked.expandload.v16i8(i8* %__P, <16 x i1> %0, <16 x i8> zeroinitializer)
|
||||
%2 = bitcast <16 x i8> %1 to <2 x i64>
|
||||
ret <2 x i64> %2
|
||||
}
|
||||
|
||||
define <4 x i64> @test_mm256_mask_compress_epi16(<4 x i64> %__S, i16 zeroext %__U, <4 x i64> %__D) {
|
||||
|
@ -378,7 +387,9 @@ define void @test_mm256_mask_compressstoreu_epi16(i8* %__P, i16 zeroext %__U, <4
|
|||
; X64-NEXT: retq
|
||||
entry:
|
||||
%0 = bitcast <4 x i64> %__D to <16 x i16>
|
||||
tail call void @llvm.x86.avx512.mask.compress.store.w.256(i8* %__P, <16 x i16> %0, i16 %__U)
|
||||
%1 = bitcast i8* %__P to i16*
|
||||
%2 = bitcast i16 %__U to <16 x i1>
|
||||
tail call void @llvm.masked.compressstore.v16i16(<16 x i16> %0, i16* %1, <16 x i1> %2)
|
||||
ret void
|
||||
}
|
||||
|
||||
|
@ -399,7 +410,8 @@ define void @test_mm256_mask_compressstoreu_epi8(i8* %__P, i32 %__U, <4 x i64> %
|
|||
; X64-NEXT: retq
|
||||
entry:
|
||||
%0 = bitcast <4 x i64> %__D to <32 x i8>
|
||||
tail call void @llvm.x86.avx512.mask.compress.store.b.256(i8* %__P, <32 x i8> %0, i32 %__U)
|
||||
%1 = bitcast i32 %__U to <32 x i1>
|
||||
tail call void @llvm.masked.compressstore.v32i8(<32 x i8> %0, i8* %__P, <32 x i1> %1)
|
||||
ret void
|
||||
}
|
||||
|
||||
|
@ -496,9 +508,11 @@ define <4 x i64> @test_mm256_mask_expandloadu_epi16(<4 x i64> %__S, i16 zeroext
|
|||
; X64-NEXT: retq
|
||||
entry:
|
||||
%0 = bitcast <4 x i64> %__S to <16 x i16>
|
||||
%1 = tail call <16 x i16> @llvm.x86.avx512.mask.expand.load.w.256(i8* %__P, <16 x i16> %0, i16 %__U)
|
||||
%2 = bitcast <16 x i16> %1 to <4 x i64>
|
||||
ret <4 x i64> %2
|
||||
%1 = bitcast i8* %__P to i16*
|
||||
%2 = bitcast i16 %__U to <16 x i1>
|
||||
%3 = tail call <16 x i16> @llvm.masked.expandload.v16i16(i16* %1, <16 x i1> %2, <16 x i16> %0)
|
||||
%4 = bitcast <16 x i16> %3 to <4 x i64>
|
||||
ret <4 x i64> %4
|
||||
}
|
||||
|
||||
define <4 x i64> @test_mm256_maskz_expandloadu_epi16(i16 zeroext %__U, i8* readonly %__P) {
|
||||
|
@ -515,9 +529,11 @@ define <4 x i64> @test_mm256_maskz_expandloadu_epi16(i16 zeroext %__U, i8* reado
|
|||
; X64-NEXT: vpexpandw (%rsi), %ymm0 {%k1} {z}
|
||||
; X64-NEXT: retq
|
||||
entry:
|
||||
%0 = tail call <16 x i16> @llvm.x86.avx512.mask.expand.load.w.256(i8* %__P, <16 x i16> zeroinitializer, i16 %__U)
|
||||
%1 = bitcast <16 x i16> %0 to <4 x i64>
|
||||
ret <4 x i64> %1
|
||||
%0 = bitcast i8* %__P to i16*
|
||||
%1 = bitcast i16 %__U to <16 x i1>
|
||||
%2 = tail call <16 x i16> @llvm.masked.expandload.v16i16(i16* %0, <16 x i1> %1, <16 x i16> zeroinitializer)
|
||||
%3 = bitcast <16 x i16> %2 to <4 x i64>
|
||||
ret <4 x i64> %3
|
||||
}
|
||||
|
||||
define <4 x i64> @test_mm256_mask_expandloadu_epi8(<4 x i64> %__S, i32 %__U, i8* readonly %__P) {
|
||||
|
@ -535,9 +551,10 @@ define <4 x i64> @test_mm256_mask_expandloadu_epi8(<4 x i64> %__S, i32 %__U, i8*
|
|||
; X64-NEXT: retq
|
||||
entry:
|
||||
%0 = bitcast <4 x i64> %__S to <32 x i8>
|
||||
%1 = tail call <32 x i8> @llvm.x86.avx512.mask.expand.load.b.256(i8* %__P, <32 x i8> %0, i32 %__U)
|
||||
%2 = bitcast <32 x i8> %1 to <4 x i64>
|
||||
ret <4 x i64> %2
|
||||
%1 = bitcast i32 %__U to <32 x i1>
|
||||
%2 = tail call <32 x i8> @llvm.masked.expandload.v32i8(i8* %__P, <32 x i1> %1, <32 x i8> %0)
|
||||
%3 = bitcast <32 x i8> %2 to <4 x i64>
|
||||
ret <4 x i64> %3
|
||||
}
|
||||
|
||||
define <4 x i64> @test_mm256_maskz_expandloadu_epi8(i32 %__U, i8* readonly %__P) {
|
||||
|
@ -554,9 +571,10 @@ define <4 x i64> @test_mm256_maskz_expandloadu_epi8(i32 %__U, i8* readonly %__P)
|
|||
; X64-NEXT: vpexpandb (%rsi), %ymm0 {%k1} {z}
|
||||
; X64-NEXT: retq
|
||||
entry:
|
||||
%0 = tail call <32 x i8> @llvm.x86.avx512.mask.expand.load.b.256(i8* %__P, <32 x i8> zeroinitializer, i32 %__U)
|
||||
%1 = bitcast <32 x i8> %0 to <4 x i64>
|
||||
ret <4 x i64> %1
|
||||
%0 = bitcast i32 %__U to <32 x i1>
|
||||
%1 = tail call <32 x i8> @llvm.masked.expandload.v32i8(i8* %__P, <32 x i1> %0, <32 x i8> zeroinitializer)
|
||||
%2 = bitcast <32 x i8> %1 to <4 x i64>
|
||||
ret <4 x i64> %2
|
||||
}
|
||||
|
||||
define <4 x i64> @test_mm256_mask_shldi_epi64(<4 x i64> %__S, i8 zeroext %__U, <4 x i64> %__A, <4 x i64> %__B) {
|
||||
|
@ -1857,20 +1875,20 @@ entry:
|
|||
|
||||
declare <8 x i16> @llvm.x86.avx512.mask.compress.w.128(<8 x i16>, <8 x i16>, i8)
|
||||
declare <16 x i8> @llvm.x86.avx512.mask.compress.b.128(<16 x i8>, <16 x i8>, i16)
|
||||
declare void @llvm.x86.avx512.mask.compress.store.w.128(i8*, <8 x i16>, i8)
|
||||
declare void @llvm.x86.avx512.mask.compress.store.b.128(i8*, <16 x i8>, i16)
|
||||
declare void @llvm.masked.compressstore.v8i16(<8 x i16>, i16*, <8 x i1>)
|
||||
declare void @llvm.masked.compressstore.v16i8(<16 x i8>, i8*, <16 x i1>)
|
||||
declare <8 x i16> @llvm.x86.avx512.mask.expand.w.128(<8 x i16>, <8 x i16>, i8)
|
||||
declare <16 x i8> @llvm.x86.avx512.mask.expand.b.128(<16 x i8>, <16 x i8>, i16)
|
||||
declare <8 x i16> @llvm.x86.avx512.mask.expand.load.w.128(i8*, <8 x i16>, i8)
|
||||
declare <16 x i8> @llvm.x86.avx512.mask.expand.load.b.128(i8*, <16 x i8>, i16)
|
||||
declare <8 x i16> @llvm.masked.expandload.v8i16(i16*, <8 x i1>, <8 x i16>)
|
||||
declare <16 x i8> @llvm.masked.expandload.v16i8(i8*, <16 x i1>, <16 x i8>)
|
||||
declare <16 x i16> @llvm.x86.avx512.mask.compress.w.256(<16 x i16>, <16 x i16>, i16)
|
||||
declare <32 x i8> @llvm.x86.avx512.mask.compress.b.256(<32 x i8>, <32 x i8>, i32)
|
||||
declare void @llvm.x86.avx512.mask.compress.store.w.256(i8*, <16 x i16>, i16)
|
||||
declare void @llvm.x86.avx512.mask.compress.store.b.256(i8*, <32 x i8>, i32)
|
||||
declare void @llvm.masked.compressstore.v16i16(<16 x i16>, i16*, <16 x i1>)
|
||||
declare void @llvm.masked.compressstore.v32i8(<32 x i8>, i8*, <32 x i1>)
|
||||
declare <16 x i16> @llvm.x86.avx512.mask.expand.w.256(<16 x i16>, <16 x i16>, i16)
|
||||
declare <32 x i8> @llvm.x86.avx512.mask.expand.b.256(<32 x i8>, <32 x i8>, i32)
|
||||
declare <16 x i16> @llvm.x86.avx512.mask.expand.load.w.256(i8*, <16 x i16>, i16)
|
||||
declare <32 x i8> @llvm.x86.avx512.mask.expand.load.b.256(i8*, <32 x i8>, i32)
|
||||
declare <16 x i16> @llvm.masked.expandload.v16i16(i16*, <16 x i1>, <16 x i16>)
|
||||
declare <32 x i8> @llvm.masked.expandload.v32i8(i8*, <32 x i1>, <32 x i8>)
|
||||
declare <4 x i64> @llvm.x86.avx512.mask.vpshldv.q.256(<4 x i64>, <4 x i64>, <4 x i64>, i8)
|
||||
declare <4 x i64> @llvm.x86.avx512.maskz.vpshldv.q.256(<4 x i64>, <4 x i64>, <4 x i64>, i8)
|
||||
declare <2 x i64> @llvm.x86.avx512.mask.vpshldv.q.128(<2 x i64>, <2 x i64>, <2 x i64>, i8)
|
||||
|
|
|
@ -5831,6 +5831,545 @@ entry:
|
|||
ret <8 x float> %2
|
||||
}
|
||||
|
||||
define <2 x double> @test_mm_mask_expandloadu_pd(<2 x double> %__W, i8 zeroext %__U, i8* readonly %__P) {
|
||||
; X86-LABEL: test_mm_mask_expandloadu_pd:
|
||||
; X86: # %bb.0: # %entry
|
||||
; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
|
||||
; X86-NEXT: movb {{[0-9]+}}(%esp), %cl
|
||||
; X86-NEXT: kmovw %ecx, %k1
|
||||
; X86-NEXT: vexpandpd (%eax), %xmm0 {%k1}
|
||||
; X86-NEXT: retl
|
||||
;
|
||||
; X64-LABEL: test_mm_mask_expandloadu_pd:
|
||||
; X64: # %bb.0: # %entry
|
||||
; X64-NEXT: kmovw %edi, %k1
|
||||
; X64-NEXT: vexpandpd (%rsi), %xmm0 {%k1}
|
||||
; X64-NEXT: retq
|
||||
entry:
|
||||
%0 = bitcast i8* %__P to double*
|
||||
%1 = bitcast i8 %__U to <8 x i1>
|
||||
%extract.i = shufflevector <8 x i1> %1, <8 x i1> undef, <2 x i32> <i32 0, i32 1>
|
||||
%2 = tail call <2 x double> @llvm.masked.expandload.v2f64(double* %0, <2 x i1> %extract.i, <2 x double> %__W)
|
||||
ret <2 x double> %2
|
||||
}
|
||||
|
||||
define <2 x double> @test_mm_maskz_expandloadu_pd(i8 zeroext %__U, i8* readonly %__P) {
|
||||
; X86-LABEL: test_mm_maskz_expandloadu_pd:
|
||||
; X86: # %bb.0: # %entry
|
||||
; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
|
||||
; X86-NEXT: movb {{[0-9]+}}(%esp), %cl
|
||||
; X86-NEXT: kmovw %ecx, %k1
|
||||
; X86-NEXT: vexpandpd (%eax), %xmm0 {%k1} {z}
|
||||
; X86-NEXT: retl
|
||||
;
|
||||
; X64-LABEL: test_mm_maskz_expandloadu_pd:
|
||||
; X64: # %bb.0: # %entry
|
||||
; X64-NEXT: kmovw %edi, %k1
|
||||
; X64-NEXT: vexpandpd (%rsi), %xmm0 {%k1} {z}
|
||||
; X64-NEXT: retq
|
||||
entry:
|
||||
%0 = bitcast i8* %__P to double*
|
||||
%1 = bitcast i8 %__U to <8 x i1>
|
||||
%extract.i = shufflevector <8 x i1> %1, <8 x i1> undef, <2 x i32> <i32 0, i32 1>
|
||||
%2 = tail call <2 x double> @llvm.masked.expandload.v2f64(double* %0, <2 x i1> %extract.i, <2 x double> zeroinitializer)
|
||||
ret <2 x double> %2
|
||||
}
|
||||
|
||||
define <4 x double> @test_mm256_mask_expandloadu_pd(<4 x double> %__W, i8 zeroext %__U, i8* readonly %__P) {
|
||||
; X86-LABEL: test_mm256_mask_expandloadu_pd:
|
||||
; X86: # %bb.0: # %entry
|
||||
; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
|
||||
; X86-NEXT: movb {{[0-9]+}}(%esp), %cl
|
||||
; X86-NEXT: kmovw %ecx, %k1
|
||||
; X86-NEXT: vexpandpd (%eax), %ymm0 {%k1}
|
||||
; X86-NEXT: retl
|
||||
;
|
||||
; X64-LABEL: test_mm256_mask_expandloadu_pd:
|
||||
; X64: # %bb.0: # %entry
|
||||
; X64-NEXT: kmovw %edi, %k1
|
||||
; X64-NEXT: vexpandpd (%rsi), %ymm0 {%k1}
|
||||
; X64-NEXT: retq
|
||||
entry:
|
||||
%0 = bitcast i8* %__P to double*
|
||||
%1 = bitcast i8 %__U to <8 x i1>
|
||||
%extract.i = shufflevector <8 x i1> %1, <8 x i1> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
|
||||
%2 = tail call <4 x double> @llvm.masked.expandload.v4f64(double* %0, <4 x i1> %extract.i, <4 x double> %__W)
|
||||
ret <4 x double> %2
|
||||
}
|
||||
|
||||
define <4 x double> @test_mm256_maskz_expandloadu_pd(i8 zeroext %__U, i8* readonly %__P) {
|
||||
; X86-LABEL: test_mm256_maskz_expandloadu_pd:
|
||||
; X86: # %bb.0: # %entry
|
||||
; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
|
||||
; X86-NEXT: movb {{[0-9]+}}(%esp), %cl
|
||||
; X86-NEXT: kmovw %ecx, %k1
|
||||
; X86-NEXT: vexpandpd (%eax), %ymm0 {%k1} {z}
|
||||
; X86-NEXT: retl
|
||||
;
|
||||
; X64-LABEL: test_mm256_maskz_expandloadu_pd:
|
||||
; X64: # %bb.0: # %entry
|
||||
; X64-NEXT: kmovw %edi, %k1
|
||||
; X64-NEXT: vexpandpd (%rsi), %ymm0 {%k1} {z}
|
||||
; X64-NEXT: retq
|
||||
entry:
|
||||
%0 = bitcast i8* %__P to double*
|
||||
%1 = bitcast i8 %__U to <8 x i1>
|
||||
%extract.i = shufflevector <8 x i1> %1, <8 x i1> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
|
||||
%2 = tail call <4 x double> @llvm.masked.expandload.v4f64(double* %0, <4 x i1> %extract.i, <4 x double> zeroinitializer)
|
||||
ret <4 x double> %2
|
||||
}
|
||||
|
||||
define <2 x i64> @test_mm_mask_expandloadu_epi64(<2 x i64> %__W, i8 zeroext %__U, i8* readonly %__P) {
|
||||
; X86-LABEL: test_mm_mask_expandloadu_epi64:
|
||||
; X86: # %bb.0: # %entry
|
||||
; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
|
||||
; X86-NEXT: movb {{[0-9]+}}(%esp), %cl
|
||||
; X86-NEXT: kmovw %ecx, %k1
|
||||
; X86-NEXT: vpexpandq (%eax), %xmm0 {%k1}
|
||||
; X86-NEXT: retl
|
||||
;
|
||||
; X64-LABEL: test_mm_mask_expandloadu_epi64:
|
||||
; X64: # %bb.0: # %entry
|
||||
; X64-NEXT: kmovw %edi, %k1
|
||||
; X64-NEXT: vpexpandq (%rsi), %xmm0 {%k1}
|
||||
; X64-NEXT: retq
|
||||
entry:
|
||||
%0 = bitcast i8* %__P to i64*
|
||||
%1 = bitcast i8 %__U to <8 x i1>
|
||||
%extract.i = shufflevector <8 x i1> %1, <8 x i1> undef, <2 x i32> <i32 0, i32 1>
|
||||
%2 = tail call <2 x i64> @llvm.masked.expandload.v2i64(i64* %0, <2 x i1> %extract.i, <2 x i64> %__W) #10
|
||||
ret <2 x i64> %2
|
||||
}
|
||||
|
||||
define <2 x i64> @test_mm_maskz_expandloadu_epi64(i8 zeroext %__U, i8* readonly %__P) {
|
||||
; X86-LABEL: test_mm_maskz_expandloadu_epi64:
|
||||
; X86: # %bb.0: # %entry
|
||||
; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
|
||||
; X86-NEXT: movb {{[0-9]+}}(%esp), %cl
|
||||
; X86-NEXT: kmovw %ecx, %k1
|
||||
; X86-NEXT: vpexpandq (%eax), %xmm0 {%k1} {z}
|
||||
; X86-NEXT: retl
|
||||
;
|
||||
; X64-LABEL: test_mm_maskz_expandloadu_epi64:
|
||||
; X64: # %bb.0: # %entry
|
||||
; X64-NEXT: kmovw %edi, %k1
|
||||
; X64-NEXT: vpexpandq (%rsi), %xmm0 {%k1} {z}
|
||||
; X64-NEXT: retq
|
||||
entry:
|
||||
%0 = bitcast i8* %__P to i64*
|
||||
%1 = bitcast i8 %__U to <8 x i1>
|
||||
%extract.i = shufflevector <8 x i1> %1, <8 x i1> undef, <2 x i32> <i32 0, i32 1>
|
||||
%2 = tail call <2 x i64> @llvm.masked.expandload.v2i64(i64* %0, <2 x i1> %extract.i, <2 x i64> zeroinitializer)
|
||||
ret <2 x i64> %2
|
||||
}
|
||||
|
||||
define <4 x i64> @test_mm256_mask_expandloadu_epi64(<4 x i64> %__W, i8 zeroext %__U, i8* readonly %__P) {
|
||||
; X86-LABEL: test_mm256_mask_expandloadu_epi64:
|
||||
; X86: # %bb.0: # %entry
|
||||
; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
|
||||
; X86-NEXT: movb {{[0-9]+}}(%esp), %cl
|
||||
; X86-NEXT: kmovw %ecx, %k1
|
||||
; X86-NEXT: vpexpandq (%eax), %ymm0 {%k1}
|
||||
; X86-NEXT: retl
|
||||
;
|
||||
; X64-LABEL: test_mm256_mask_expandloadu_epi64:
|
||||
; X64: # %bb.0: # %entry
|
||||
; X64-NEXT: kmovw %edi, %k1
|
||||
; X64-NEXT: vpexpandq (%rsi), %ymm0 {%k1}
|
||||
; X64-NEXT: retq
|
||||
entry:
|
||||
%0 = bitcast i8* %__P to i64*
|
||||
%1 = bitcast i8 %__U to <8 x i1>
|
||||
%extract.i = shufflevector <8 x i1> %1, <8 x i1> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
|
||||
%2 = tail call <4 x i64> @llvm.masked.expandload.v4i64(i64* %0, <4 x i1> %extract.i, <4 x i64> %__W) #10
|
||||
ret <4 x i64> %2
|
||||
}
|
||||
|
||||
define <4 x i64> @test_mm256_maskz_expandloadu_epi64(i8 zeroext %__U, i8* readonly %__P) {
|
||||
; X86-LABEL: test_mm256_maskz_expandloadu_epi64:
|
||||
; X86: # %bb.0: # %entry
|
||||
; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
|
||||
; X86-NEXT: movb {{[0-9]+}}(%esp), %cl
|
||||
; X86-NEXT: kmovw %ecx, %k1
|
||||
; X86-NEXT: vpexpandq (%eax), %ymm0 {%k1} {z}
|
||||
; X86-NEXT: retl
|
||||
;
|
||||
; X64-LABEL: test_mm256_maskz_expandloadu_epi64:
|
||||
; X64: # %bb.0: # %entry
|
||||
; X64-NEXT: kmovw %edi, %k1
|
||||
; X64-NEXT: vpexpandq (%rsi), %ymm0 {%k1} {z}
|
||||
; X64-NEXT: retq
|
||||
entry:
|
||||
%0 = bitcast i8* %__P to i64*
|
||||
%1 = bitcast i8 %__U to <8 x i1>
|
||||
%extract.i = shufflevector <8 x i1> %1, <8 x i1> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
|
||||
%2 = tail call <4 x i64> @llvm.masked.expandload.v4i64(i64* %0, <4 x i1> %extract.i, <4 x i64> zeroinitializer)
|
||||
ret <4 x i64> %2
|
||||
}
|
||||
|
||||
define <4 x float> @test_mm_mask_expandloadu_ps(<4 x float> %__W, i8 zeroext %__U, i8* readonly %__P) {
|
||||
; X86-LABEL: test_mm_mask_expandloadu_ps:
|
||||
; X86: # %bb.0: # %entry
|
||||
; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
|
||||
; X86-NEXT: movb {{[0-9]+}}(%esp), %cl
|
||||
; X86-NEXT: kmovw %ecx, %k1
|
||||
; X86-NEXT: vexpandps (%eax), %xmm0 {%k1}
|
||||
; X86-NEXT: retl
|
||||
;
|
||||
; X64-LABEL: test_mm_mask_expandloadu_ps:
|
||||
; X64: # %bb.0: # %entry
|
||||
; X64-NEXT: kmovw %edi, %k1
|
||||
; X64-NEXT: vexpandps (%rsi), %xmm0 {%k1}
|
||||
; X64-NEXT: retq
|
||||
entry:
|
||||
%0 = bitcast i8* %__P to float*
|
||||
%1 = bitcast i8 %__U to <8 x i1>
|
||||
%extract.i = shufflevector <8 x i1> %1, <8 x i1> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
|
||||
%2 = tail call <4 x float> @llvm.masked.expandload.v4f32(float* %0, <4 x i1> %extract.i, <4 x float> %__W)
|
||||
ret <4 x float> %2
|
||||
}
|
||||
|
||||
define <4 x float> @test_mm_maskz_expandloadu_ps(i8 zeroext %__U, i8* readonly %__P) {
|
||||
; X86-LABEL: test_mm_maskz_expandloadu_ps:
|
||||
; X86: # %bb.0: # %entry
|
||||
; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
|
||||
; X86-NEXT: movb {{[0-9]+}}(%esp), %cl
|
||||
; X86-NEXT: kmovw %ecx, %k1
|
||||
; X86-NEXT: vexpandps (%eax), %xmm0 {%k1} {z}
|
||||
; X86-NEXT: retl
|
||||
;
|
||||
; X64-LABEL: test_mm_maskz_expandloadu_ps:
|
||||
; X64: # %bb.0: # %entry
|
||||
; X64-NEXT: kmovw %edi, %k1
|
||||
; X64-NEXT: vexpandps (%rsi), %xmm0 {%k1} {z}
|
||||
; X64-NEXT: retq
|
||||
entry:
|
||||
%0 = bitcast i8* %__P to float*
|
||||
%1 = bitcast i8 %__U to <8 x i1>
|
||||
%extract.i = shufflevector <8 x i1> %1, <8 x i1> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
|
||||
%2 = tail call <4 x float> @llvm.masked.expandload.v4f32(float* %0, <4 x i1> %extract.i, <4 x float> zeroinitializer)
|
||||
ret <4 x float> %2
|
||||
}
|
||||
|
||||
define <8 x float> @test_mm256_mask_expandloadu_ps(<8 x float> %__W, i8 zeroext %__U, i8* readonly %__P) {
|
||||
; X86-LABEL: test_mm256_mask_expandloadu_ps:
|
||||
; X86: # %bb.0: # %entry
|
||||
; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
|
||||
; X86-NEXT: movb {{[0-9]+}}(%esp), %cl
|
||||
; X86-NEXT: kmovw %ecx, %k1
|
||||
; X86-NEXT: vexpandps (%eax), %ymm0 {%k1}
|
||||
; X86-NEXT: retl
|
||||
;
|
||||
; X64-LABEL: test_mm256_mask_expandloadu_ps:
|
||||
; X64: # %bb.0: # %entry
|
||||
; X64-NEXT: kmovw %edi, %k1
|
||||
; X64-NEXT: vexpandps (%rsi), %ymm0 {%k1}
|
||||
; X64-NEXT: retq
|
||||
entry:
|
||||
%0 = bitcast i8* %__P to float*
|
||||
%1 = bitcast i8 %__U to <8 x i1>
|
||||
%2 = tail call <8 x float> @llvm.masked.expandload.v8f32(float* %0, <8 x i1> %1, <8 x float> %__W)
|
||||
ret <8 x float> %2
|
||||
}
|
||||
|
||||
define <8 x float> @test_mm256_maskz_expandloadu_ps(i8 zeroext %__U, i8* readonly %__P) {
|
||||
; X86-LABEL: test_mm256_maskz_expandloadu_ps:
|
||||
; X86: # %bb.0: # %entry
|
||||
; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
|
||||
; X86-NEXT: movb {{[0-9]+}}(%esp), %cl
|
||||
; X86-NEXT: kmovw %ecx, %k1
|
||||
; X86-NEXT: vexpandps (%eax), %ymm0 {%k1} {z}
|
||||
; X86-NEXT: retl
|
||||
;
|
||||
; X64-LABEL: test_mm256_maskz_expandloadu_ps:
|
||||
; X64: # %bb.0: # %entry
|
||||
; X64-NEXT: kmovw %edi, %k1
|
||||
; X64-NEXT: vexpandps (%rsi), %ymm0 {%k1} {z}
|
||||
; X64-NEXT: retq
|
||||
entry:
|
||||
%0 = bitcast i8* %__P to float*
|
||||
%1 = bitcast i8 %__U to <8 x i1>
|
||||
%2 = tail call <8 x float> @llvm.masked.expandload.v8f32(float* %0, <8 x i1> %1, <8 x float> zeroinitializer)
|
||||
ret <8 x float> %2
|
||||
}
|
||||
|
||||
define <2 x i64> @test_mm_mask_expandloadu_epi32(<2 x i64> %__W, i8 zeroext %__U, i8* readonly %__P) {
|
||||
; X86-LABEL: test_mm_mask_expandloadu_epi32:
|
||||
; X86: # %bb.0: # %entry
|
||||
; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
|
||||
; X86-NEXT: movb {{[0-9]+}}(%esp), %cl
|
||||
; X86-NEXT: kmovw %ecx, %k1
|
||||
; X86-NEXT: vpexpandd (%eax), %xmm0 {%k1}
|
||||
; X86-NEXT: retl
|
||||
;
|
||||
; X64-LABEL: test_mm_mask_expandloadu_epi32:
|
||||
; X64: # %bb.0: # %entry
|
||||
; X64-NEXT: kmovw %edi, %k1
|
||||
; X64-NEXT: vpexpandd (%rsi), %xmm0 {%k1}
|
||||
; X64-NEXT: retq
|
||||
entry:
|
||||
%0 = bitcast <2 x i64> %__W to <4 x i32>
|
||||
%1 = bitcast i8* %__P to i32*
|
||||
%2 = bitcast i8 %__U to <8 x i1>
|
||||
%extract.i = shufflevector <8 x i1> %2, <8 x i1> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
|
||||
%3 = tail call <4 x i32> @llvm.masked.expandload.v4i32(i32* %1, <4 x i1> %extract.i, <4 x i32> %0)
|
||||
%4 = bitcast <4 x i32> %3 to <2 x i64>
|
||||
ret <2 x i64> %4
|
||||
}
|
||||
|
||||
define <2 x i64> @test_mm_maskz_expandloadu_epi32(i8 zeroext %__U, i8* readonly %__P) {
|
||||
; X86-LABEL: test_mm_maskz_expandloadu_epi32:
|
||||
; X86: # %bb.0: # %entry
|
||||
; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
|
||||
; X86-NEXT: movb {{[0-9]+}}(%esp), %cl
|
||||
; X86-NEXT: kmovw %ecx, %k1
|
||||
; X86-NEXT: vpexpandd (%eax), %xmm0 {%k1} {z}
|
||||
; X86-NEXT: retl
|
||||
;
|
||||
; X64-LABEL: test_mm_maskz_expandloadu_epi32:
|
||||
; X64: # %bb.0: # %entry
|
||||
; X64-NEXT: kmovw %edi, %k1
|
||||
; X64-NEXT: vpexpandd (%rsi), %xmm0 {%k1} {z}
|
||||
; X64-NEXT: retq
|
||||
entry:
|
||||
%0 = bitcast i8* %__P to i32*
|
||||
%1 = bitcast i8 %__U to <8 x i1>
|
||||
%extract.i = shufflevector <8 x i1> %1, <8 x i1> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
|
||||
%2 = tail call <4 x i32> @llvm.masked.expandload.v4i32(i32* %0, <4 x i1> %extract.i, <4 x i32> zeroinitializer)
|
||||
%3 = bitcast <4 x i32> %2 to <2 x i64>
|
||||
ret <2 x i64> %3
|
||||
}
|
||||
|
||||
define <4 x i64> @test_mm256_mask_expandloadu_epi32(<4 x i64> %__W, i8 zeroext %__U, i8* readonly %__P) {
|
||||
; X86-LABEL: test_mm256_mask_expandloadu_epi32:
|
||||
; X86: # %bb.0: # %entry
|
||||
; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
|
||||
; X86-NEXT: movb {{[0-9]+}}(%esp), %cl
|
||||
; X86-NEXT: kmovw %ecx, %k1
|
||||
; X86-NEXT: vpexpandd (%eax), %ymm0 {%k1}
|
||||
; X86-NEXT: retl
|
||||
;
|
||||
; X64-LABEL: test_mm256_mask_expandloadu_epi32:
|
||||
; X64: # %bb.0: # %entry
|
||||
; X64-NEXT: kmovw %edi, %k1
|
||||
; X64-NEXT: vpexpandd (%rsi), %ymm0 {%k1}
|
||||
; X64-NEXT: retq
|
||||
entry:
|
||||
%0 = bitcast <4 x i64> %__W to <8 x i32>
|
||||
%1 = bitcast i8* %__P to i32*
|
||||
%2 = bitcast i8 %__U to <8 x i1>
|
||||
%3 = tail call <8 x i32> @llvm.masked.expandload.v8i32(i32* %1, <8 x i1> %2, <8 x i32> %0)
|
||||
%4 = bitcast <8 x i32> %3 to <4 x i64>
|
||||
ret <4 x i64> %4
|
||||
}
|
||||
|
||||
define <4 x i64> @test_mm256_maskz_expandloadu_epi32(i8 zeroext %__U, i8* readonly %__P) {
|
||||
; X86-LABEL: test_mm256_maskz_expandloadu_epi32:
|
||||
; X86: # %bb.0: # %entry
|
||||
; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
|
||||
; X86-NEXT: movb {{[0-9]+}}(%esp), %cl
|
||||
; X86-NEXT: kmovw %ecx, %k1
|
||||
; X86-NEXT: vpexpandd (%eax), %ymm0 {%k1} {z}
|
||||
; X86-NEXT: retl
|
||||
;
|
||||
; X64-LABEL: test_mm256_maskz_expandloadu_epi32:
|
||||
; X64: # %bb.0: # %entry
|
||||
; X64-NEXT: kmovw %edi, %k1
|
||||
; X64-NEXT: vpexpandd (%rsi), %ymm0 {%k1} {z}
|
||||
; X64-NEXT: retq
|
||||
entry:
|
||||
%0 = bitcast i8* %__P to i32*
|
||||
%1 = bitcast i8 %__U to <8 x i1>
|
||||
%2 = tail call <8 x i32> @llvm.masked.expandload.v8i32(i32* %0, <8 x i1> %1, <8 x i32> zeroinitializer)
|
||||
%3 = bitcast <8 x i32> %2 to <4 x i64>
|
||||
ret <4 x i64> %3
|
||||
}
|
||||
|
||||
define void @test_mm_mask_compressstoreu_pd(i8* %__P, i8 zeroext %__U, <2 x double> %__A) {
|
||||
; X86-LABEL: test_mm_mask_compressstoreu_pd:
|
||||
; X86: # %bb.0: # %entry
|
||||
; X86-NEXT: movb {{[0-9]+}}(%esp), %al
|
||||
; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx
|
||||
; X86-NEXT: kmovw %eax, %k1
|
||||
; X86-NEXT: vcompresspd %xmm0, (%ecx) {%k1}
|
||||
; X86-NEXT: retl
|
||||
;
|
||||
; X64-LABEL: test_mm_mask_compressstoreu_pd:
|
||||
; X64: # %bb.0: # %entry
|
||||
; X64-NEXT: kmovw %esi, %k1
|
||||
; X64-NEXT: vcompresspd %xmm0, (%rdi) {%k1}
|
||||
; X64-NEXT: retq
|
||||
entry:
|
||||
%0 = bitcast i8* %__P to double*
|
||||
%1 = bitcast i8 %__U to <8 x i1>
|
||||
%extract.i = shufflevector <8 x i1> %1, <8 x i1> undef, <2 x i32> <i32 0, i32 1>
|
||||
tail call void @llvm.masked.compressstore.v2f64(<2 x double> %__A, double* %0, <2 x i1> %extract.i)
|
||||
ret void
|
||||
}
|
||||
|
||||
define void @test_mm256_mask_compressstoreu_pd(i8* %__P, i8 zeroext %__U, <4 x double> %__A) {
|
||||
; X86-LABEL: test_mm256_mask_compressstoreu_pd:
|
||||
; X86: # %bb.0: # %entry
|
||||
; X86-NEXT: movb {{[0-9]+}}(%esp), %al
|
||||
; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx
|
||||
; X86-NEXT: kmovw %eax, %k1
|
||||
; X86-NEXT: vcompresspd %ymm0, (%ecx) {%k1}
|
||||
; X86-NEXT: vzeroupper
|
||||
; X86-NEXT: retl
|
||||
;
|
||||
; X64-LABEL: test_mm256_mask_compressstoreu_pd:
|
||||
; X64: # %bb.0: # %entry
|
||||
; X64-NEXT: kmovw %esi, %k1
|
||||
; X64-NEXT: vcompresspd %ymm0, (%rdi) {%k1}
|
||||
; X64-NEXT: vzeroupper
|
||||
; X64-NEXT: retq
|
||||
entry:
|
||||
%0 = bitcast i8* %__P to double*
|
||||
%1 = bitcast i8 %__U to <8 x i1>
|
||||
%extract.i = shufflevector <8 x i1> %1, <8 x i1> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
|
||||
tail call void @llvm.masked.compressstore.v4f64(<4 x double> %__A, double* %0, <4 x i1> %extract.i)
|
||||
ret void
|
||||
}
|
||||
|
||||
define void @test_mm_mask_compressstoreu_epi64(i8* %__P, i8 zeroext %__U, <2 x i64> %__A) {
|
||||
; X86-LABEL: test_mm_mask_compressstoreu_epi64:
|
||||
; X86: # %bb.0: # %entry
|
||||
; X86-NEXT: movb {{[0-9]+}}(%esp), %al
|
||||
; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx
|
||||
; X86-NEXT: kmovw %eax, %k1
|
||||
; X86-NEXT: vpcompressq %xmm0, (%ecx) {%k1}
|
||||
; X86-NEXT: retl
|
||||
;
|
||||
; X64-LABEL: test_mm_mask_compressstoreu_epi64:
|
||||
; X64: # %bb.0: # %entry
|
||||
; X64-NEXT: kmovw %esi, %k1
|
||||
; X64-NEXT: vpcompressq %xmm0, (%rdi) {%k1}
|
||||
; X64-NEXT: retq
|
||||
entry:
|
||||
%0 = bitcast i8* %__P to i64*
|
||||
%1 = bitcast i8 %__U to <8 x i1>
|
||||
%extract.i = shufflevector <8 x i1> %1, <8 x i1> undef, <2 x i32> <i32 0, i32 1>
|
||||
tail call void @llvm.masked.compressstore.v2i64(<2 x i64> %__A, i64* %0, <2 x i1> %extract.i)
|
||||
ret void
|
||||
}
|
||||
|
||||
define void @test_mm256_mask_compressstoreu_epi64(i8* %__P, i8 zeroext %__U, <4 x i64> %__A) {
|
||||
; X86-LABEL: test_mm256_mask_compressstoreu_epi64:
|
||||
; X86: # %bb.0: # %entry
|
||||
; X86-NEXT: movb {{[0-9]+}}(%esp), %al
|
||||
; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx
|
||||
; X86-NEXT: kmovw %eax, %k1
|
||||
; X86-NEXT: vpcompressq %ymm0, (%ecx) {%k1}
|
||||
; X86-NEXT: vzeroupper
|
||||
; X86-NEXT: retl
|
||||
;
|
||||
; X64-LABEL: test_mm256_mask_compressstoreu_epi64:
|
||||
; X64: # %bb.0: # %entry
|
||||
; X64-NEXT: kmovw %esi, %k1
|
||||
; X64-NEXT: vpcompressq %ymm0, (%rdi) {%k1}
|
||||
; X64-NEXT: vzeroupper
|
||||
; X64-NEXT: retq
|
||||
entry:
|
||||
%0 = bitcast i8* %__P to i64*
|
||||
%1 = bitcast i8 %__U to <8 x i1>
|
||||
%extract.i = shufflevector <8 x i1> %1, <8 x i1> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
|
||||
tail call void @llvm.masked.compressstore.v4i64(<4 x i64> %__A, i64* %0, <4 x i1> %extract.i)
|
||||
ret void
|
||||
}
|
||||
|
||||
define void @test_mm_mask_compressstoreu_ps(i8* %__P, i8 zeroext %__U, <4 x float> %__A) {
|
||||
; X86-LABEL: test_mm_mask_compressstoreu_ps:
|
||||
; X86: # %bb.0: # %entry
|
||||
; X86-NEXT: movb {{[0-9]+}}(%esp), %al
|
||||
; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx
|
||||
; X86-NEXT: kmovw %eax, %k1
|
||||
; X86-NEXT: vcompressps %xmm0, (%ecx) {%k1}
|
||||
; X86-NEXT: retl
|
||||
;
|
||||
; X64-LABEL: test_mm_mask_compressstoreu_ps:
|
||||
; X64: # %bb.0: # %entry
|
||||
; X64-NEXT: kmovw %esi, %k1
|
||||
; X64-NEXT: vcompressps %xmm0, (%rdi) {%k1}
|
||||
; X64-NEXT: retq
|
||||
entry:
|
||||
%0 = bitcast i8* %__P to float*
|
||||
%1 = bitcast i8 %__U to <8 x i1>
|
||||
%extract.i = shufflevector <8 x i1> %1, <8 x i1> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
|
||||
tail call void @llvm.masked.compressstore.v4f32(<4 x float> %__A, float* %0, <4 x i1> %extract.i)
|
||||
ret void
|
||||
}
|
||||
|
||||
define void @test_mm256_mask_compressstoreu_ps(i8* %__P, i8 zeroext %__U, <8 x float> %__A) {
|
||||
; X86-LABEL: test_mm256_mask_compressstoreu_ps:
|
||||
; X86: # %bb.0: # %entry
|
||||
; X86-NEXT: movb {{[0-9]+}}(%esp), %al
|
||||
; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx
|
||||
; X86-NEXT: kmovw %eax, %k1
|
||||
; X86-NEXT: vcompressps %ymm0, (%ecx) {%k1}
|
||||
; X86-NEXT: vzeroupper
|
||||
; X86-NEXT: retl
|
||||
;
|
||||
; X64-LABEL: test_mm256_mask_compressstoreu_ps:
|
||||
; X64: # %bb.0: # %entry
|
||||
; X64-NEXT: kmovw %esi, %k1
|
||||
; X64-NEXT: vcompressps %ymm0, (%rdi) {%k1}
|
||||
; X64-NEXT: vzeroupper
|
||||
; X64-NEXT: retq
|
||||
entry:
|
||||
%0 = bitcast i8* %__P to float*
|
||||
%1 = bitcast i8 %__U to <8 x i1>
|
||||
tail call void @llvm.masked.compressstore.v8f32(<8 x float> %__A, float* %0, <8 x i1> %1)
|
||||
ret void
|
||||
}
|
||||
|
||||
define void @test_mm_mask_compressstoreu_epi32(i8* %__P, i8 zeroext %__U, <2 x i64> %__A) {
|
||||
; X86-LABEL: test_mm_mask_compressstoreu_epi32:
|
||||
; X86: # %bb.0: # %entry
|
||||
; X86-NEXT: movb {{[0-9]+}}(%esp), %al
|
||||
; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx
|
||||
; X86-NEXT: kmovw %eax, %k1
|
||||
; X86-NEXT: vpcompressd %xmm0, (%ecx) {%k1}
|
||||
; X86-NEXT: retl
|
||||
;
|
||||
; X64-LABEL: test_mm_mask_compressstoreu_epi32:
|
||||
; X64: # %bb.0: # %entry
|
||||
; X64-NEXT: kmovw %esi, %k1
|
||||
; X64-NEXT: vpcompressd %xmm0, (%rdi) {%k1}
|
||||
; X64-NEXT: retq
|
||||
entry:
|
||||
%0 = bitcast <2 x i64> %__A to <4 x i32>
|
||||
%1 = bitcast i8* %__P to i32*
|
||||
%2 = bitcast i8 %__U to <8 x i1>
|
||||
%extract.i = shufflevector <8 x i1> %2, <8 x i1> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
|
||||
tail call void @llvm.masked.compressstore.v4i32(<4 x i32> %0, i32* %1, <4 x i1> %extract.i)
|
||||
ret void
|
||||
}
|
||||
|
||||
define void @test_mm256_mask_compressstoreu_epi32(i8* %__P, i8 zeroext %__U, <4 x i64> %__A) {
|
||||
; X86-LABEL: test_mm256_mask_compressstoreu_epi32:
|
||||
; X86: # %bb.0: # %entry
|
||||
; X86-NEXT: movb {{[0-9]+}}(%esp), %al
|
||||
; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx
|
||||
; X86-NEXT: kmovw %eax, %k1
|
||||
; X86-NEXT: vpcompressd %ymm0, (%ecx) {%k1}
|
||||
; X86-NEXT: vzeroupper
|
||||
; X86-NEXT: retl
|
||||
;
|
||||
; X64-LABEL: test_mm256_mask_compressstoreu_epi32:
|
||||
; X64: # %bb.0: # %entry
|
||||
; X64-NEXT: kmovw %esi, %k1
|
||||
; X64-NEXT: vpcompressd %ymm0, (%rdi) {%k1}
|
||||
; X64-NEXT: vzeroupper
|
||||
; X64-NEXT: retq
|
||||
entry:
|
||||
%0 = bitcast <4 x i64> %__A to <8 x i32>
|
||||
%1 = bitcast i8* %__P to i32*
|
||||
%2 = bitcast i8 %__U to <8 x i1>
|
||||
tail call void @llvm.masked.compressstore.v8i32(<8 x i32> %0, i32* %1, <8 x i1> %2) #10
|
||||
ret void
|
||||
}
|
||||
|
||||
|
||||
declare <2 x double> @llvm.fma.v2f64(<2 x double>, <2 x double>, <2 x double>) #8
|
||||
declare <4 x double> @llvm.fma.v4f64(<4 x double>, <4 x double>, <4 x double>) #8
|
||||
declare <4 x float> @llvm.fma.v4f32(<4 x float>, <4 x float>, <4 x float>) #8
|
||||
|
@ -5863,5 +6402,21 @@ declare <4 x float> @llvm.x86.avx512.vpermi2var.ps.128(<4 x float>, <4 x i32>, <
|
|||
declare <8 x float> @llvm.x86.avx512.vpermi2var.ps.256(<8 x float>, <8 x i32>, <8 x float>)
|
||||
declare <2 x i64> @llvm.x86.avx512.vpermi2var.q.128(<2 x i64>, <2 x i64>, <2 x i64>)
|
||||
declare <4 x i64> @llvm.x86.avx512.vpermi2var.q.256(<4 x i64>, <4 x i64>, <4 x i64>)
|
||||
declare <2 x double> @llvm.masked.expandload.v2f64(double*, <2 x i1>, <2 x double>)
|
||||
declare <4 x double> @llvm.masked.expandload.v4f64(double*, <4 x i1>, <4 x double>)
|
||||
declare <2 x i64> @llvm.masked.expandload.v2i64(i64*, <2 x i1>, <2 x i64>)
|
||||
declare <4 x i64> @llvm.masked.expandload.v4i64(i64*, <4 x i1>, <4 x i64>)
|
||||
declare <4 x float> @llvm.masked.expandload.v4f32(float*, <4 x i1>, <4 x float>)
|
||||
declare <8 x float> @llvm.masked.expandload.v8f32(float*, <8 x i1>, <8 x float>)
|
||||
declare <4 x i32> @llvm.masked.expandload.v4i32(i32*, <4 x i1>, <4 x i32>)
|
||||
declare <8 x i32> @llvm.masked.expandload.v8i32(i32*, <8 x i1>, <8 x i32>)
|
||||
declare void @llvm.masked.compressstore.v2f64(<2 x double>, double*, <2 x i1>)
|
||||
declare void @llvm.masked.compressstore.v4f64(<4 x double>, double*, <4 x i1>)
|
||||
declare void @llvm.masked.compressstore.v2i64(<2 x i64>, i64*, <2 x i1>)
|
||||
declare void @llvm.masked.compressstore.v4i64(<4 x i64>, i64*, <4 x i1>)
|
||||
declare void @llvm.masked.compressstore.v4f32(<4 x float>, float*, <4 x i1>)
|
||||
declare void @llvm.masked.compressstore.v8f32(<8 x float>, float*, <8 x i1>)
|
||||
declare void @llvm.masked.compressstore.v4i32(<4 x i32>, i32*, <4 x i1>)
|
||||
declare void @llvm.masked.compressstore.v8i32(<8 x i32>, i32*, <8 x i1>)
|
||||
|
||||
!0 = !{i32 1}
|
||||
|
|
Loading…
Reference in New Issue