[AVX512] Add intrinsics for masked aligned FP loads and stores

Similar to the unaligned cases.

Test was generated with update_llc_test_checks.py.

Part of <rdar://problem/17688758>

llvm-svn: 226296
This commit is contained in:
Adam Nemet 2015-01-16 18:50:09 +00:00
parent 9b8cfa212c
commit 3e8b22bc1b
3 changed files with 106 additions and 1 deletions

View File

@ -1363,6 +1363,12 @@ let TargetPrefix = "x86" in { // All intrinsics start with "llvm.x86.".
def int_x86_avx512_mask_loadu_pd_512 : GCCBuiltin<"__builtin_ia32_loadupd512_mask">, def int_x86_avx512_mask_loadu_pd_512 : GCCBuiltin<"__builtin_ia32_loadupd512_mask">,
Intrinsic<[llvm_v8f64_ty], [llvm_ptr_ty, llvm_v8f64_ty, llvm_i8_ty], Intrinsic<[llvm_v8f64_ty], [llvm_ptr_ty, llvm_v8f64_ty, llvm_i8_ty],
[IntrReadArgMem]>; [IntrReadArgMem]>;
def int_x86_avx512_mask_load_ps_512 : GCCBuiltin<"__builtin_ia32_loadaps512_mask">,
Intrinsic<[llvm_v16f32_ty], [llvm_ptr_ty, llvm_v16f32_ty, llvm_i16_ty],
[IntrReadArgMem]>;
def int_x86_avx512_mask_load_pd_512 : GCCBuiltin<"__builtin_ia32_loadapd512_mask">,
Intrinsic<[llvm_v8f64_ty], [llvm_ptr_ty, llvm_v8f64_ty, llvm_i8_ty],
[IntrReadArgMem]>;
} }
// Conditional store ops // Conditional store ops
@ -1389,6 +1395,14 @@ let TargetPrefix = "x86" in { // All intrinsics start with "llvm.x86.".
GCCBuiltin<"__builtin_ia32_storeupd512_mask">, GCCBuiltin<"__builtin_ia32_storeupd512_mask">,
Intrinsic<[], [llvm_ptr_ty, llvm_v8f64_ty, llvm_i8_ty], Intrinsic<[], [llvm_ptr_ty, llvm_v8f64_ty, llvm_i8_ty],
[IntrReadWriteArgMem]>; [IntrReadWriteArgMem]>;
def int_x86_avx512_mask_store_ps_512 :
GCCBuiltin<"__builtin_ia32_storeaps512_mask">,
Intrinsic<[], [llvm_ptr_ty, llvm_v16f32_ty, llvm_i16_ty],
[IntrReadWriteArgMem]>;
def int_x86_avx512_mask_store_pd_512 :
GCCBuiltin<"__builtin_ia32_storeapd512_mask">,
Intrinsic<[], [llvm_ptr_ty, llvm_v8f64_ty, llvm_i8_ty],
[IntrReadWriteArgMem]>;
def int_x86_avx512_mask_store_ss : def int_x86_avx512_mask_store_ss :
GCCBuiltin<"__builtin_ia32_storess_mask">, GCCBuiltin<"__builtin_ia32_storess_mask">,
Intrinsic<[], [llvm_ptr_ty, llvm_v4f32_ty, llvm_i8_ty], Intrinsic<[], [llvm_ptr_ty, llvm_v4f32_ty, llvm_i8_ty],

View File

@ -2200,6 +2200,22 @@ def: Pat<(v16f32 (int_x86_avx512_mask_loadu_ps_512 addr:$ptr,
(bc_v16f32 (v16i32 immAllZerosV)), GR16:$mask)), (bc_v16f32 (v16i32 immAllZerosV)), GR16:$mask)),
(VMOVUPSZrmkz (v16i1 (COPY_TO_REGCLASS GR16:$mask, VK16WM)), addr:$ptr)>; (VMOVUPSZrmkz (v16i1 (COPY_TO_REGCLASS GR16:$mask, VK16WM)), addr:$ptr)>;
def: Pat<(v8f64 (int_x86_avx512_mask_load_pd_512 addr:$ptr,
(bc_v8f64 (v16i32 immAllZerosV)), GR8:$mask)),
(VMOVAPDZrmkz (v8i1 (COPY_TO_REGCLASS GR8:$mask, VK8WM)), addr:$ptr)>;
def: Pat<(v16f32 (int_x86_avx512_mask_load_ps_512 addr:$ptr,
(bc_v16f32 (v16i32 immAllZerosV)), GR16:$mask)),
(VMOVAPSZrmkz (v16i1 (COPY_TO_REGCLASS GR16:$mask, VK16WM)), addr:$ptr)>;
def: Pat<(v8f64 (int_x86_avx512_mask_load_pd_512 addr:$ptr,
(bc_v8f64 (v16i32 immAllZerosV)), (i8 -1))),
(VMOVAPDZrm addr:$ptr)>;
def: Pat<(v16f32 (int_x86_avx512_mask_load_ps_512 addr:$ptr,
(bc_v16f32 (v16i32 immAllZerosV)), (i16 -1))),
(VMOVAPSZrm addr:$ptr)>;
def: Pat<(int_x86_avx512_mask_storeu_ps_512 addr:$ptr, (v16f32 VR512:$src), def: Pat<(int_x86_avx512_mask_storeu_ps_512 addr:$ptr, (v16f32 VR512:$src),
GR16:$mask), GR16:$mask),
(VMOVUPSZmrk addr:$ptr, (v16i1 (COPY_TO_REGCLASS GR16:$mask, VK16WM)), (VMOVUPSZmrk addr:$ptr, (v16i1 (COPY_TO_REGCLASS GR16:$mask, VK16WM)),
@ -2209,6 +2225,15 @@ def: Pat<(int_x86_avx512_mask_storeu_pd_512 addr:$ptr, (v8f64 VR512:$src),
(VMOVUPDZmrk addr:$ptr, (v8i1 (COPY_TO_REGCLASS GR8:$mask, VK8WM)), (VMOVUPDZmrk addr:$ptr, (v8i1 (COPY_TO_REGCLASS GR8:$mask, VK8WM)),
VR512:$src)>; VR512:$src)>;
def: Pat<(int_x86_avx512_mask_store_ps_512 addr:$ptr, (v16f32 VR512:$src),
GR16:$mask),
(VMOVAPSZmrk addr:$ptr, (v16i1 (COPY_TO_REGCLASS GR16:$mask, VK16WM)),
VR512:$src)>;
def: Pat<(int_x86_avx512_mask_store_pd_512 addr:$ptr, (v8f64 VR512:$src),
GR8:$mask),
(VMOVAPDZmrk addr:$ptr, (v8i1 (COPY_TO_REGCLASS GR8:$mask, VK8WM)),
VR512:$src)>;
def: Pat<(masked_store addr:$ptr, VK8WM:$mask, (v8f32 VR256:$src)), def: Pat<(masked_store addr:$ptr, VK8WM:$mask, (v8f32 VR256:$src)),
(VMOVUPSZmrk addr:$ptr, (VMOVUPSZmrk addr:$ptr,
(v16i1 (COPY_TO_REGCLASS VK8WM:$mask, VK16WM)), (v16i1 (COPY_TO_REGCLASS VK8WM:$mask, VK16WM)),

View File

@ -551,7 +551,73 @@ define void @test_store2(<8 x double> %data, i8* %ptr, i8 %mask) {
ret void ret void
} }
declare void @llvm.x86.avx512.mask.storeu.pd.512(i8*, <8 x double>, i8 ) declare void @llvm.x86.avx512.mask.storeu.pd.512(i8*, <8 x double>, i8)
define void @test_mask_store_aligned_ps(<16 x float> %data, i8* %ptr, i16 %mask) {
; CHECK-LABEL: test_mask_store_aligned_ps:
; CHECK: ## BB#0:
; CHECK-NEXT: kmovw %esi, %k1
; CHECK-NEXT: vmovaps %zmm0, (%rdi) {%k1}
; CHECK-NEXT: retq
call void @llvm.x86.avx512.mask.store.ps.512(i8* %ptr, <16 x float> %data, i16 %mask)
ret void
}
declare void @llvm.x86.avx512.mask.store.ps.512(i8*, <16 x float>, i16 )
define void @test_mask_store_aligned_pd(<8 x double> %data, i8* %ptr, i8 %mask) {
; CHECK-LABEL: test_mask_store_aligned_pd:
; CHECK: ## BB#0:
; CHECK-NEXT: kmovw %esi, %k1
; CHECK-NEXT: vmovapd %zmm0, (%rdi) {%k1}
; CHECK-NEXT: retq
call void @llvm.x86.avx512.mask.store.pd.512(i8* %ptr, <8 x double> %data, i8 %mask)
ret void
}
declare void @llvm.x86.avx512.mask.store.pd.512(i8*, <8 x double>, i8)
define <16 x float> @test_maskz_load_aligned_ps(<16 x float> %data, i8* %ptr, i16 %mask) {
; CHECK-LABEL: test_maskz_load_aligned_ps:
; CHECK: ## BB#0:
; CHECK-NEXT: kmovw %esi, %k1
; CHECK-NEXT: vmovaps (%rdi), %zmm0 {%k1} {z}
; CHECK-NEXT: retq
%res = call <16 x float> @llvm.x86.avx512.mask.load.ps.512(i8* %ptr, <16 x float> zeroinitializer, i16 %mask)
ret <16 x float> %res
}
declare <16 x float> @llvm.x86.avx512.mask.load.ps.512(i8*, <16 x float>, i16)
define <8 x double> @test_maskz_load_aligned_pd(<8 x double> %data, i8* %ptr, i8 %mask) {
; CHECK-LABEL: test_maskz_load_aligned_pd:
; CHECK: ## BB#0:
; CHECK-NEXT: kmovw %esi, %k1
; CHECK-NEXT: vmovapd (%rdi), %zmm0 {%k1} {z}
; CHECK-NEXT: retq
%res = call <8 x double> @llvm.x86.avx512.mask.load.pd.512(i8* %ptr, <8 x double> zeroinitializer, i8 %mask)
ret <8 x double> %res
}
declare <8 x double> @llvm.x86.avx512.mask.load.pd.512(i8*, <8 x double>, i8)
define <16 x float> @test_load_aligned_ps(<16 x float> %data, i8* %ptr, i16 %mask) {
; CHECK-LABEL: test_load_aligned_ps:
; CHECK: ## BB#0:
; CHECK-NEXT: vmovaps (%rdi), %zmm0
; CHECK-NEXT: retq
%res = call <16 x float> @llvm.x86.avx512.mask.load.ps.512(i8* %ptr, <16 x float> zeroinitializer, i16 -1)
ret <16 x float> %res
}
define <8 x double> @test_load_aligned_pd(<8 x double> %data, i8* %ptr, i8 %mask) {
; CHECK-LABEL: test_load_aligned_pd:
; CHECK: ## BB#0:
; CHECK-NEXT: vmovapd (%rdi), %zmm0
; CHECK-NEXT: retq
%res = call <8 x double> @llvm.x86.avx512.mask.load.pd.512(i8* %ptr, <8 x double> zeroinitializer, i8 -1)
ret <8 x double> %res
}
define <16 x float> @test_vpermt2ps(<16 x float>%x, <16 x float>%y, <16 x i32>%perm) { define <16 x float> @test_vpermt2ps(<16 x float>%x, <16 x float>%y, <16 x i32>%perm) {
; CHECK: vpermt2ps {{.*}}encoding: [0x62,0xf2,0x6d,0x48,0x7f,0xc1] ; CHECK: vpermt2ps {{.*}}encoding: [0x62,0xf2,0x6d,0x48,0x7f,0xc1]