forked from OSchip/llvm-project
[AVX512] Add intrinsics for masked aligned FP loads and stores
Similar to the unaligned cases. Test was generated with update_llc_test_checks.py. Part of <rdar://problem/17688758> llvm-svn: 226296
This commit is contained in:
parent
9b8cfa212c
commit
3e8b22bc1b
|
@ -1363,6 +1363,12 @@ let TargetPrefix = "x86" in { // All intrinsics start with "llvm.x86.".
|
||||||
def int_x86_avx512_mask_loadu_pd_512 : GCCBuiltin<"__builtin_ia32_loadupd512_mask">,
|
def int_x86_avx512_mask_loadu_pd_512 : GCCBuiltin<"__builtin_ia32_loadupd512_mask">,
|
||||||
Intrinsic<[llvm_v8f64_ty], [llvm_ptr_ty, llvm_v8f64_ty, llvm_i8_ty],
|
Intrinsic<[llvm_v8f64_ty], [llvm_ptr_ty, llvm_v8f64_ty, llvm_i8_ty],
|
||||||
[IntrReadArgMem]>;
|
[IntrReadArgMem]>;
|
||||||
|
def int_x86_avx512_mask_load_ps_512 : GCCBuiltin<"__builtin_ia32_loadaps512_mask">,
|
||||||
|
Intrinsic<[llvm_v16f32_ty], [llvm_ptr_ty, llvm_v16f32_ty, llvm_i16_ty],
|
||||||
|
[IntrReadArgMem]>;
|
||||||
|
def int_x86_avx512_mask_load_pd_512 : GCCBuiltin<"__builtin_ia32_loadapd512_mask">,
|
||||||
|
Intrinsic<[llvm_v8f64_ty], [llvm_ptr_ty, llvm_v8f64_ty, llvm_i8_ty],
|
||||||
|
[IntrReadArgMem]>;
|
||||||
}
|
}
|
||||||
|
|
||||||
// Conditional store ops
|
// Conditional store ops
|
||||||
|
@ -1389,6 +1395,14 @@ let TargetPrefix = "x86" in { // All intrinsics start with "llvm.x86.".
|
||||||
GCCBuiltin<"__builtin_ia32_storeupd512_mask">,
|
GCCBuiltin<"__builtin_ia32_storeupd512_mask">,
|
||||||
Intrinsic<[], [llvm_ptr_ty, llvm_v8f64_ty, llvm_i8_ty],
|
Intrinsic<[], [llvm_ptr_ty, llvm_v8f64_ty, llvm_i8_ty],
|
||||||
[IntrReadWriteArgMem]>;
|
[IntrReadWriteArgMem]>;
|
||||||
|
def int_x86_avx512_mask_store_ps_512 :
|
||||||
|
GCCBuiltin<"__builtin_ia32_storeaps512_mask">,
|
||||||
|
Intrinsic<[], [llvm_ptr_ty, llvm_v16f32_ty, llvm_i16_ty],
|
||||||
|
[IntrReadWriteArgMem]>;
|
||||||
|
def int_x86_avx512_mask_store_pd_512 :
|
||||||
|
GCCBuiltin<"__builtin_ia32_storeapd512_mask">,
|
||||||
|
Intrinsic<[], [llvm_ptr_ty, llvm_v8f64_ty, llvm_i8_ty],
|
||||||
|
[IntrReadWriteArgMem]>;
|
||||||
def int_x86_avx512_mask_store_ss :
|
def int_x86_avx512_mask_store_ss :
|
||||||
GCCBuiltin<"__builtin_ia32_storess_mask">,
|
GCCBuiltin<"__builtin_ia32_storess_mask">,
|
||||||
Intrinsic<[], [llvm_ptr_ty, llvm_v4f32_ty, llvm_i8_ty],
|
Intrinsic<[], [llvm_ptr_ty, llvm_v4f32_ty, llvm_i8_ty],
|
||||||
|
|
|
@ -2200,6 +2200,22 @@ def: Pat<(v16f32 (int_x86_avx512_mask_loadu_ps_512 addr:$ptr,
|
||||||
(bc_v16f32 (v16i32 immAllZerosV)), GR16:$mask)),
|
(bc_v16f32 (v16i32 immAllZerosV)), GR16:$mask)),
|
||||||
(VMOVUPSZrmkz (v16i1 (COPY_TO_REGCLASS GR16:$mask, VK16WM)), addr:$ptr)>;
|
(VMOVUPSZrmkz (v16i1 (COPY_TO_REGCLASS GR16:$mask, VK16WM)), addr:$ptr)>;
|
||||||
|
|
||||||
|
def: Pat<(v8f64 (int_x86_avx512_mask_load_pd_512 addr:$ptr,
|
||||||
|
(bc_v8f64 (v16i32 immAllZerosV)), GR8:$mask)),
|
||||||
|
(VMOVAPDZrmkz (v8i1 (COPY_TO_REGCLASS GR8:$mask, VK8WM)), addr:$ptr)>;
|
||||||
|
|
||||||
|
def: Pat<(v16f32 (int_x86_avx512_mask_load_ps_512 addr:$ptr,
|
||||||
|
(bc_v16f32 (v16i32 immAllZerosV)), GR16:$mask)),
|
||||||
|
(VMOVAPSZrmkz (v16i1 (COPY_TO_REGCLASS GR16:$mask, VK16WM)), addr:$ptr)>;
|
||||||
|
|
||||||
|
def: Pat<(v8f64 (int_x86_avx512_mask_load_pd_512 addr:$ptr,
|
||||||
|
(bc_v8f64 (v16i32 immAllZerosV)), (i8 -1))),
|
||||||
|
(VMOVAPDZrm addr:$ptr)>;
|
||||||
|
|
||||||
|
def: Pat<(v16f32 (int_x86_avx512_mask_load_ps_512 addr:$ptr,
|
||||||
|
(bc_v16f32 (v16i32 immAllZerosV)), (i16 -1))),
|
||||||
|
(VMOVAPSZrm addr:$ptr)>;
|
||||||
|
|
||||||
def: Pat<(int_x86_avx512_mask_storeu_ps_512 addr:$ptr, (v16f32 VR512:$src),
|
def: Pat<(int_x86_avx512_mask_storeu_ps_512 addr:$ptr, (v16f32 VR512:$src),
|
||||||
GR16:$mask),
|
GR16:$mask),
|
||||||
(VMOVUPSZmrk addr:$ptr, (v16i1 (COPY_TO_REGCLASS GR16:$mask, VK16WM)),
|
(VMOVUPSZmrk addr:$ptr, (v16i1 (COPY_TO_REGCLASS GR16:$mask, VK16WM)),
|
||||||
|
@ -2209,6 +2225,15 @@ def: Pat<(int_x86_avx512_mask_storeu_pd_512 addr:$ptr, (v8f64 VR512:$src),
|
||||||
(VMOVUPDZmrk addr:$ptr, (v8i1 (COPY_TO_REGCLASS GR8:$mask, VK8WM)),
|
(VMOVUPDZmrk addr:$ptr, (v8i1 (COPY_TO_REGCLASS GR8:$mask, VK8WM)),
|
||||||
VR512:$src)>;
|
VR512:$src)>;
|
||||||
|
|
||||||
|
def: Pat<(int_x86_avx512_mask_store_ps_512 addr:$ptr, (v16f32 VR512:$src),
|
||||||
|
GR16:$mask),
|
||||||
|
(VMOVAPSZmrk addr:$ptr, (v16i1 (COPY_TO_REGCLASS GR16:$mask, VK16WM)),
|
||||||
|
VR512:$src)>;
|
||||||
|
def: Pat<(int_x86_avx512_mask_store_pd_512 addr:$ptr, (v8f64 VR512:$src),
|
||||||
|
GR8:$mask),
|
||||||
|
(VMOVAPDZmrk addr:$ptr, (v8i1 (COPY_TO_REGCLASS GR8:$mask, VK8WM)),
|
||||||
|
VR512:$src)>;
|
||||||
|
|
||||||
def: Pat<(masked_store addr:$ptr, VK8WM:$mask, (v8f32 VR256:$src)),
|
def: Pat<(masked_store addr:$ptr, VK8WM:$mask, (v8f32 VR256:$src)),
|
||||||
(VMOVUPSZmrk addr:$ptr,
|
(VMOVUPSZmrk addr:$ptr,
|
||||||
(v16i1 (COPY_TO_REGCLASS VK8WM:$mask, VK16WM)),
|
(v16i1 (COPY_TO_REGCLASS VK8WM:$mask, VK16WM)),
|
||||||
|
|
|
@ -551,7 +551,73 @@ define void @test_store2(<8 x double> %data, i8* %ptr, i8 %mask) {
|
||||||
ret void
|
ret void
|
||||||
}
|
}
|
||||||
|
|
||||||
declare void @llvm.x86.avx512.mask.storeu.pd.512(i8*, <8 x double>, i8 )
|
declare void @llvm.x86.avx512.mask.storeu.pd.512(i8*, <8 x double>, i8)
|
||||||
|
|
||||||
|
define void @test_mask_store_aligned_ps(<16 x float> %data, i8* %ptr, i16 %mask) {
|
||||||
|
; CHECK-LABEL: test_mask_store_aligned_ps:
|
||||||
|
; CHECK: ## BB#0:
|
||||||
|
; CHECK-NEXT: kmovw %esi, %k1
|
||||||
|
; CHECK-NEXT: vmovaps %zmm0, (%rdi) {%k1}
|
||||||
|
; CHECK-NEXT: retq
|
||||||
|
call void @llvm.x86.avx512.mask.store.ps.512(i8* %ptr, <16 x float> %data, i16 %mask)
|
||||||
|
ret void
|
||||||
|
}
|
||||||
|
|
||||||
|
declare void @llvm.x86.avx512.mask.store.ps.512(i8*, <16 x float>, i16 )
|
||||||
|
|
||||||
|
define void @test_mask_store_aligned_pd(<8 x double> %data, i8* %ptr, i8 %mask) {
|
||||||
|
; CHECK-LABEL: test_mask_store_aligned_pd:
|
||||||
|
; CHECK: ## BB#0:
|
||||||
|
; CHECK-NEXT: kmovw %esi, %k1
|
||||||
|
; CHECK-NEXT: vmovapd %zmm0, (%rdi) {%k1}
|
||||||
|
; CHECK-NEXT: retq
|
||||||
|
call void @llvm.x86.avx512.mask.store.pd.512(i8* %ptr, <8 x double> %data, i8 %mask)
|
||||||
|
ret void
|
||||||
|
}
|
||||||
|
|
||||||
|
declare void @llvm.x86.avx512.mask.store.pd.512(i8*, <8 x double>, i8)
|
||||||
|
|
||||||
|
define <16 x float> @test_maskz_load_aligned_ps(<16 x float> %data, i8* %ptr, i16 %mask) {
|
||||||
|
; CHECK-LABEL: test_maskz_load_aligned_ps:
|
||||||
|
; CHECK: ## BB#0:
|
||||||
|
; CHECK-NEXT: kmovw %esi, %k1
|
||||||
|
; CHECK-NEXT: vmovaps (%rdi), %zmm0 {%k1} {z}
|
||||||
|
; CHECK-NEXT: retq
|
||||||
|
%res = call <16 x float> @llvm.x86.avx512.mask.load.ps.512(i8* %ptr, <16 x float> zeroinitializer, i16 %mask)
|
||||||
|
ret <16 x float> %res
|
||||||
|
}
|
||||||
|
|
||||||
|
declare <16 x float> @llvm.x86.avx512.mask.load.ps.512(i8*, <16 x float>, i16)
|
||||||
|
|
||||||
|
define <8 x double> @test_maskz_load_aligned_pd(<8 x double> %data, i8* %ptr, i8 %mask) {
|
||||||
|
; CHECK-LABEL: test_maskz_load_aligned_pd:
|
||||||
|
; CHECK: ## BB#0:
|
||||||
|
; CHECK-NEXT: kmovw %esi, %k1
|
||||||
|
; CHECK-NEXT: vmovapd (%rdi), %zmm0 {%k1} {z}
|
||||||
|
; CHECK-NEXT: retq
|
||||||
|
%res = call <8 x double> @llvm.x86.avx512.mask.load.pd.512(i8* %ptr, <8 x double> zeroinitializer, i8 %mask)
|
||||||
|
ret <8 x double> %res
|
||||||
|
}
|
||||||
|
|
||||||
|
declare <8 x double> @llvm.x86.avx512.mask.load.pd.512(i8*, <8 x double>, i8)
|
||||||
|
|
||||||
|
define <16 x float> @test_load_aligned_ps(<16 x float> %data, i8* %ptr, i16 %mask) {
|
||||||
|
; CHECK-LABEL: test_load_aligned_ps:
|
||||||
|
; CHECK: ## BB#0:
|
||||||
|
; CHECK-NEXT: vmovaps (%rdi), %zmm0
|
||||||
|
; CHECK-NEXT: retq
|
||||||
|
%res = call <16 x float> @llvm.x86.avx512.mask.load.ps.512(i8* %ptr, <16 x float> zeroinitializer, i16 -1)
|
||||||
|
ret <16 x float> %res
|
||||||
|
}
|
||||||
|
|
||||||
|
define <8 x double> @test_load_aligned_pd(<8 x double> %data, i8* %ptr, i8 %mask) {
|
||||||
|
; CHECK-LABEL: test_load_aligned_pd:
|
||||||
|
; CHECK: ## BB#0:
|
||||||
|
; CHECK-NEXT: vmovapd (%rdi), %zmm0
|
||||||
|
; CHECK-NEXT: retq
|
||||||
|
%res = call <8 x double> @llvm.x86.avx512.mask.load.pd.512(i8* %ptr, <8 x double> zeroinitializer, i8 -1)
|
||||||
|
ret <8 x double> %res
|
||||||
|
}
|
||||||
|
|
||||||
define <16 x float> @test_vpermt2ps(<16 x float>%x, <16 x float>%y, <16 x i32>%perm) {
|
define <16 x float> @test_vpermt2ps(<16 x float>%x, <16 x float>%y, <16 x i32>%perm) {
|
||||||
; CHECK: vpermt2ps {{.*}}encoding: [0x62,0xf2,0x6d,0x48,0x7f,0xc1]
|
; CHECK: vpermt2ps {{.*}}encoding: [0x62,0xf2,0x6d,0x48,0x7f,0xc1]
|
||||||
|
|
Loading…
Reference in New Issue