[X86] Expand the testing of expand and compress intrinsics

The avx512f intrinsic tests were in the avx512vl file. We were also missing some combinations of masking.

This does show that we fail to use the zero masking form of expand loads when the passthru is zero. I'll try to get that fixed shortly.

llvm-svn: 333795
This commit is contained in:
Craig Topper 2018-06-01 21:59:24 +00:00
parent d7e11ee342
commit c45479c08e
4 changed files with 1864 additions and 336 deletions

View File

@ -1,6 +1,475 @@
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
; RUN: llc < %s -mtriple=x86_64-apple-darwin -mattr=+avx512f | FileCheck %s
define void @test_mask_compress_store_pd_512(i8* %addr, <8 x double> %data, i8 %mask) {
; CHECK-LABEL: test_mask_compress_store_pd_512:
; CHECK: ## %bb.0:
; CHECK-NEXT: kmovw %esi, %k1
; CHECK-NEXT: vcompresspd %zmm0, (%rdi) {%k1}
; CHECK-NEXT: vzeroupper
; CHECK-NEXT: retq
call void @llvm.x86.avx512.mask.compress.store.pd.512(i8* %addr, <8 x double> %data, i8 %mask)
ret void
}
declare void @llvm.x86.avx512.mask.compress.store.pd.512(i8* %addr, <8 x double> %data, i8 %mask)
define <8 x double> @test_mask_compress_pd_512(<8 x double> %data, <8 x double> %passthru, i8 %mask) {
; CHECK-LABEL: test_mask_compress_pd_512:
; CHECK: ## %bb.0:
; CHECK-NEXT: kmovw %edi, %k1
; CHECK-NEXT: vcompresspd %zmm0, %zmm1 {%k1}
; CHECK-NEXT: vmovdqa64 %zmm1, %zmm0
; CHECK-NEXT: retq
%res = call <8 x double> @llvm.x86.avx512.mask.compress.pd.512(<8 x double> %data, <8 x double> %passthru, i8 %mask)
ret <8 x double> %res
}
define <8 x double> @test_maskz_compress_pd_512(<8 x double> %data, i8 %mask) {
; CHECK-LABEL: test_maskz_compress_pd_512:
; CHECK: ## %bb.0:
; CHECK-NEXT: kmovw %edi, %k1
; CHECK-NEXT: vcompresspd %zmm0, %zmm0 {%k1} {z}
; CHECK-NEXT: retq
%res = call <8 x double> @llvm.x86.avx512.mask.compress.pd.512(<8 x double> %data, <8 x double> zeroinitializer, i8 %mask)
ret <8 x double> %res
}
define <8 x double> @test_compress_pd_512(<8 x double> %data) {
; CHECK-LABEL: test_compress_pd_512:
; CHECK: ## %bb.0:
; CHECK-NEXT: retq
%res = call <8 x double> @llvm.x86.avx512.mask.compress.pd.512(<8 x double> %data, <8 x double> undef, i8 -1)
ret <8 x double> %res
}
declare <8 x double> @llvm.x86.avx512.mask.compress.pd.512(<8 x double> %data, <8 x double> %src0, i8 %mask)
define void @test_compress_store_pd_512(i8* %addr, <8 x double> %data) {
; CHECK-LABEL: test_compress_store_pd_512:
; CHECK: ## %bb.0:
; CHECK-NEXT: vmovups %zmm0, (%rdi)
; CHECK-NEXT: vzeroupper
; CHECK-NEXT: retq
call void @llvm.x86.avx512.mask.compress.store.pd.512(i8* %addr, <8 x double> %data, i8 -1)
ret void
}
define void @test_mask_compress_store_ps_512(i8* %addr, <16 x float> %data, i16 %mask) {
; CHECK-LABEL: test_mask_compress_store_ps_512:
; CHECK: ## %bb.0:
; CHECK-NEXT: kmovw %esi, %k1
; CHECK-NEXT: vcompressps %zmm0, (%rdi) {%k1}
; CHECK-NEXT: vzeroupper
; CHECK-NEXT: retq
call void @llvm.x86.avx512.mask.compress.store.ps.512(i8* %addr, <16 x float> %data, i16 %mask)
ret void
}
declare void @llvm.x86.avx512.mask.compress.store.ps.512(i8* %addr, <16 x float> %data, i16 %mask)
define <16 x float> @test_mask_compress_ps_512(<16 x float> %data, <16 x float> %passthru, i16 %mask) {
; CHECK-LABEL: test_mask_compress_ps_512:
; CHECK: ## %bb.0:
; CHECK-NEXT: kmovw %edi, %k1
; CHECK-NEXT: vcompressps %zmm0, %zmm1 {%k1}
; CHECK-NEXT: vmovdqa64 %zmm1, %zmm0
; CHECK-NEXT: retq
%res = call <16 x float> @llvm.x86.avx512.mask.compress.ps.512(<16 x float> %data, <16 x float> %passthru, i16 %mask)
ret <16 x float> %res
}
define <16 x float> @test_maskz_compress_ps_512(<16 x float> %data, i16 %mask) {
; CHECK-LABEL: test_maskz_compress_ps_512:
; CHECK: ## %bb.0:
; CHECK-NEXT: kmovw %edi, %k1
; CHECK-NEXT: vcompressps %zmm0, %zmm0 {%k1} {z}
; CHECK-NEXT: retq
%res = call <16 x float> @llvm.x86.avx512.mask.compress.ps.512(<16 x float> %data, <16 x float> zeroinitializer, i16 %mask)
ret <16 x float> %res
}
define <16 x float> @test_compress_ps_512(<16 x float> %data) {
; CHECK-LABEL: test_compress_ps_512:
; CHECK: ## %bb.0:
; CHECK-NEXT: retq
%res = call <16 x float> @llvm.x86.avx512.mask.compress.ps.512(<16 x float> %data, <16 x float> undef, i16 -1)
ret <16 x float> %res
}
declare <16 x float> @llvm.x86.avx512.mask.compress.ps.512(<16 x float> %data, <16 x float> %src0, i16 %mask)
define void @test_compress_store_ps_512(i8* %addr, <16 x float> %data) {
; CHECK-LABEL: test_compress_store_ps_512:
; CHECK: ## %bb.0:
; CHECK-NEXT: vmovups %zmm0, (%rdi)
; CHECK-NEXT: vzeroupper
; CHECK-NEXT: retq
call void @llvm.x86.avx512.mask.compress.store.ps.512(i8* %addr, <16 x float> %data, i16 -1)
ret void
}
define void @test_mask_compress_store_q_512(i8* %addr, <8 x i64> %data, i8 %mask) {
; CHECK-LABEL: test_mask_compress_store_q_512:
; CHECK: ## %bb.0:
; CHECK-NEXT: kmovw %esi, %k1
; CHECK-NEXT: vpcompressq %zmm0, (%rdi) {%k1}
; CHECK-NEXT: vzeroupper
; CHECK-NEXT: retq
call void @llvm.x86.avx512.mask.compress.store.q.512(i8* %addr, <8 x i64> %data, i8 %mask)
ret void
}
declare void @llvm.x86.avx512.mask.compress.store.q.512(i8* %addr, <8 x i64> %data, i8 %mask)
define <8 x i64> @test_mask_compress_q_512(<8 x i64> %data, <8 x i64> %passthru, i8 %mask) {
; CHECK-LABEL: test_mask_compress_q_512:
; CHECK: ## %bb.0:
; CHECK-NEXT: kmovw %edi, %k1
; CHECK-NEXT: vpcompressq %zmm0, %zmm1 {%k1}
; CHECK-NEXT: vmovdqa64 %zmm1, %zmm0
; CHECK-NEXT: retq
%res = call <8 x i64> @llvm.x86.avx512.mask.compress.q.512(<8 x i64> %data, <8 x i64> %passthru, i8 %mask)
ret <8 x i64> %res
}
define <8 x i64> @test_maskz_compress_q_512(<8 x i64> %data, i8 %mask) {
; CHECK-LABEL: test_maskz_compress_q_512:
; CHECK: ## %bb.0:
; CHECK-NEXT: kmovw %edi, %k1
; CHECK-NEXT: vpcompressq %zmm0, %zmm0 {%k1} {z}
; CHECK-NEXT: retq
%res = call <8 x i64> @llvm.x86.avx512.mask.compress.q.512(<8 x i64> %data, <8 x i64> zeroinitializer, i8 %mask)
ret <8 x i64> %res
}
define <8 x i64> @test_compress_q_512(<8 x i64> %data) {
; CHECK-LABEL: test_compress_q_512:
; CHECK: ## %bb.0:
; CHECK-NEXT: retq
%res = call <8 x i64> @llvm.x86.avx512.mask.compress.q.512(<8 x i64> %data, <8 x i64> undef, i8 -1)
ret <8 x i64> %res
}
declare <8 x i64> @llvm.x86.avx512.mask.compress.q.512(<8 x i64> %data, <8 x i64> %src0, i8 %mask)
define void @test_compress_store_q_512(i8* %addr, <8 x i64> %data) {
; CHECK-LABEL: test_compress_store_q_512:
; CHECK: ## %bb.0:
; CHECK-NEXT: vmovups %zmm0, (%rdi)
; CHECK-NEXT: vzeroupper
; CHECK-NEXT: retq
call void @llvm.x86.avx512.mask.compress.store.q.512(i8* %addr, <8 x i64> %data, i8 -1)
ret void
}
define void @test_mask_compress_store_d_512(i8* %addr, <16 x i32> %data, i16 %mask) {
; CHECK-LABEL: test_mask_compress_store_d_512:
; CHECK: ## %bb.0:
; CHECK-NEXT: kmovw %esi, %k1
; CHECK-NEXT: vpcompressd %zmm0, (%rdi) {%k1}
; CHECK-NEXT: vzeroupper
; CHECK-NEXT: retq
call void @llvm.x86.avx512.mask.compress.store.d.512(i8* %addr, <16 x i32> %data, i16 %mask)
ret void
}
declare void @llvm.x86.avx512.mask.compress.store.d.512(i8* %addr, <16 x i32> %data, i16 %mask)
define <16 x i32> @test_mask_compress_d_512(<16 x i32> %data, <16 x i32> %passthru, i16 %mask) {
; CHECK-LABEL: test_mask_compress_d_512:
; CHECK: ## %bb.0:
; CHECK-NEXT: kmovw %edi, %k1
; CHECK-NEXT: vpcompressd %zmm0, %zmm1 {%k1}
; CHECK-NEXT: vmovdqa64 %zmm1, %zmm0
; CHECK-NEXT: retq
%res = call <16 x i32> @llvm.x86.avx512.mask.compress.d.512(<16 x i32> %data, <16 x i32> %passthru, i16 %mask)
ret <16 x i32> %res
}
define <16 x i32> @test_maskz_compress_d_512(<16 x i32> %data, i16 %mask) {
; CHECK-LABEL: test_maskz_compress_d_512:
; CHECK: ## %bb.0:
; CHECK-NEXT: kmovw %edi, %k1
; CHECK-NEXT: vpcompressd %zmm0, %zmm0 {%k1} {z}
; CHECK-NEXT: retq
%res = call <16 x i32> @llvm.x86.avx512.mask.compress.d.512(<16 x i32> %data, <16 x i32> zeroinitializer, i16 %mask)
ret <16 x i32> %res
}
define <16 x i32> @test_compress_d_512(<16 x i32> %data) {
; CHECK-LABEL: test_compress_d_512:
; CHECK: ## %bb.0:
; CHECK-NEXT: retq
%res = call <16 x i32> @llvm.x86.avx512.mask.compress.d.512(<16 x i32> %data, <16 x i32> undef, i16 -1)
ret <16 x i32> %res
}
declare <16 x i32> @llvm.x86.avx512.mask.compress.d.512(<16 x i32> %data, <16 x i32> %src0, i16 %mask)
define void @test_compress_store_d_512(i8* %addr, <16 x i32> %data) {
; CHECK-LABEL: test_compress_store_d_512:
; CHECK: ## %bb.0:
; CHECK-NEXT: vmovups %zmm0, (%rdi)
; CHECK-NEXT: vzeroupper
; CHECK-NEXT: retq
call void @llvm.x86.avx512.mask.compress.store.d.512(i8* %addr, <16 x i32> %data, i16 -1)
ret void
}
define <8 x double> @test_mask_expand_load_pd_512(i8* %addr, <8 x double> %data, i8 %mask) {
; CHECK-LABEL: test_mask_expand_load_pd_512:
; CHECK: ## %bb.0:
; CHECK-NEXT: kmovw %esi, %k1
; CHECK-NEXT: vexpandpd (%rdi), %zmm0 {%k1}
; CHECK-NEXT: retq
%res = call <8 x double> @llvm.x86.avx512.mask.expand.load.pd.512(i8* %addr, <8 x double> %data, i8 %mask)
ret <8 x double> %res
}
define <8 x double> @test_maskz_expand_load_pd_512(i8* %addr, i8 %mask) {
; CHECK-LABEL: test_maskz_expand_load_pd_512:
; CHECK: ## %bb.0:
; CHECK-NEXT: kmovw %esi, %k1
; CHECK-NEXT: vpxor %xmm0, %xmm0, %xmm0
; CHECK-NEXT: vexpandpd (%rdi), %zmm0 {%k1}
; CHECK-NEXT: retq
%res = call <8 x double> @llvm.x86.avx512.mask.expand.load.pd.512(i8* %addr, <8 x double> zeroinitializer, i8 %mask)
ret <8 x double> %res
}
declare <8 x double> @llvm.x86.avx512.mask.expand.load.pd.512(i8* %addr, <8 x double> %data, i8 %mask)
define <8 x double> @test_expand_pd_512(<8 x double> %data) {
; CHECK-LABEL: test_expand_pd_512:
; CHECK: ## %bb.0:
; CHECK-NEXT: retq
%res = call <8 x double> @llvm.x86.avx512.mask.expand.pd.512(<8 x double> %data, <8 x double> undef, i8 -1)
ret <8 x double> %res
}
define <8 x double> @test_mask_expand_pd_512(<8 x double> %data, <8 x double> %passthru, i8 %mask) {
; CHECK-LABEL: test_mask_expand_pd_512:
; CHECK: ## %bb.0:
; CHECK-NEXT: kmovw %edi, %k1
; CHECK-NEXT: vexpandpd %zmm0, %zmm1 {%k1}
; CHECK-NEXT: vmovdqa64 %zmm1, %zmm0
; CHECK-NEXT: retq
%res = call <8 x double> @llvm.x86.avx512.mask.expand.pd.512(<8 x double> %data, <8 x double> %passthru, i8 %mask)
ret <8 x double> %res
}
define <8 x double> @test_maskz_expand_pd_512(<8 x double> %data, i8 %mask) {
; CHECK-LABEL: test_maskz_expand_pd_512:
; CHECK: ## %bb.0:
; CHECK-NEXT: kmovw %edi, %k1
; CHECK-NEXT: vexpandpd %zmm0, %zmm0 {%k1} {z}
; CHECK-NEXT: retq
%res = call <8 x double> @llvm.x86.avx512.mask.expand.pd.512(<8 x double> %data, <8 x double> zeroinitializer, i8 %mask)
ret <8 x double> %res
}
declare <8 x double> @llvm.x86.avx512.mask.expand.pd.512(<8 x double> %data, <8 x double> %src0, i8 %mask)
define <8 x double> @test_expand_load_pd_512(i8* %addr, <8 x double> %data) {
; CHECK-LABEL: test_expand_load_pd_512:
; CHECK: ## %bb.0:
; CHECK-NEXT: vmovups (%rdi), %zmm0
; CHECK-NEXT: retq
%res = call <8 x double> @llvm.x86.avx512.mask.expand.load.pd.512(i8* %addr, <8 x double> %data, i8 -1)
ret <8 x double> %res
}
define <16 x float> @test_mask_expand_load_ps_512(i8* %addr, <16 x float> %data, i16 %mask) {
; CHECK-LABEL: test_mask_expand_load_ps_512:
; CHECK: ## %bb.0:
; CHECK-NEXT: kmovw %esi, %k1
; CHECK-NEXT: vexpandps (%rdi), %zmm0 {%k1}
; CHECK-NEXT: retq
%res = call <16 x float> @llvm.x86.avx512.mask.expand.load.ps.512(i8* %addr, <16 x float> %data, i16 %mask)
ret <16 x float> %res
}
define <16 x float> @test_maskz_expand_load_ps_512(i8* %addr, i16 %mask) {
; CHECK-LABEL: test_maskz_expand_load_ps_512:
; CHECK: ## %bb.0:
; CHECK-NEXT: kmovw %esi, %k1
; CHECK-NEXT: vpxor %xmm0, %xmm0, %xmm0
; CHECK-NEXT: vexpandps (%rdi), %zmm0 {%k1}
; CHECK-NEXT: retq
%res = call <16 x float> @llvm.x86.avx512.mask.expand.load.ps.512(i8* %addr, <16 x float> zeroinitializer, i16 %mask)
ret <16 x float> %res
}
declare <16 x float> @llvm.x86.avx512.mask.expand.load.ps.512(i8* %addr, <16 x float> %data, i16 %mask)
define <16 x float> @test_expand_ps_512(<16 x float> %data) {
; CHECK-LABEL: test_expand_ps_512:
; CHECK: ## %bb.0:
; CHECK-NEXT: retq
%res = call <16 x float> @llvm.x86.avx512.mask.expand.ps.512(<16 x float> %data, <16 x float> undef, i16 -1)
ret <16 x float> %res
}
define <16 x float> @test_mask_expand_ps_512(<16 x float> %data, <16 x float> %passthru, i16 %mask) {
; CHECK-LABEL: test_mask_expand_ps_512:
; CHECK: ## %bb.0:
; CHECK-NEXT: kmovw %edi, %k1
; CHECK-NEXT: vexpandps %zmm0, %zmm1 {%k1}
; CHECK-NEXT: vmovdqa64 %zmm1, %zmm0
; CHECK-NEXT: retq
%res = call <16 x float> @llvm.x86.avx512.mask.expand.ps.512(<16 x float> %data, <16 x float> %passthru, i16 %mask)
ret <16 x float> %res
}
define <16 x float> @test_maskz_expand_ps_512(<16 x float> %data, i16 %mask) {
; CHECK-LABEL: test_maskz_expand_ps_512:
; CHECK: ## %bb.0:
; CHECK-NEXT: kmovw %edi, %k1
; CHECK-NEXT: vexpandps %zmm0, %zmm0 {%k1} {z}
; CHECK-NEXT: retq
%res = call <16 x float> @llvm.x86.avx512.mask.expand.ps.512(<16 x float> %data, <16 x float> zeroinitializer, i16 %mask)
ret <16 x float> %res
}
declare <16 x float> @llvm.x86.avx512.mask.expand.ps.512(<16 x float> %data, <16 x float> %src0, i16 %mask)
define <16 x float> @test_expand_load_ps_512(i8* %addr, <16 x float> %data) {
; CHECK-LABEL: test_expand_load_ps_512:
; CHECK: ## %bb.0:
; CHECK-NEXT: vmovups (%rdi), %zmm0
; CHECK-NEXT: retq
%res = call <16 x float> @llvm.x86.avx512.mask.expand.load.ps.512(i8* %addr, <16 x float> %data, i16 -1)
ret <16 x float> %res
}
define <8 x i64> @test_mask_expand_load_q_512(i8* %addr, <8 x i64> %data, i8 %mask) {
; CHECK-LABEL: test_mask_expand_load_q_512:
; CHECK: ## %bb.0:
; CHECK-NEXT: kmovw %esi, %k1
; CHECK-NEXT: vpexpandq (%rdi), %zmm0 {%k1}
; CHECK-NEXT: retq
%res = call <8 x i64> @llvm.x86.avx512.mask.expand.load.q.512(i8* %addr, <8 x i64> %data, i8 %mask)
ret <8 x i64> %res
}
define <8 x i64> @test_maskz_expand_load_q_512(i8* %addr, i8 %mask) {
; CHECK-LABEL: test_maskz_expand_load_q_512:
; CHECK: ## %bb.0:
; CHECK-NEXT: kmovw %esi, %k1
; CHECK-NEXT: vpxor %xmm0, %xmm0, %xmm0
; CHECK-NEXT: vpexpandq (%rdi), %zmm0 {%k1}
; CHECK-NEXT: retq
%res = call <8 x i64> @llvm.x86.avx512.mask.expand.load.q.512(i8* %addr, <8 x i64> zeroinitializer, i8 %mask)
ret <8 x i64> %res
}
declare <8 x i64> @llvm.x86.avx512.mask.expand.load.q.512(i8* %addr, <8 x i64> %data, i8 %mask)
define <8 x i64> @test_expand_q_512(<8 x i64> %data) {
; CHECK-LABEL: test_expand_q_512:
; CHECK: ## %bb.0:
; CHECK-NEXT: retq
%res = call <8 x i64> @llvm.x86.avx512.mask.expand.q.512(<8 x i64> %data, <8 x i64> undef, i8 -1)
ret <8 x i64> %res
}
define <8 x i64> @test_mask_expand_q_512(<8 x i64> %data, <8 x i64> %passthru, i8 %mask) {
; CHECK-LABEL: test_mask_expand_q_512:
; CHECK: ## %bb.0:
; CHECK-NEXT: kmovw %edi, %k1
; CHECK-NEXT: vpexpandq %zmm0, %zmm1 {%k1}
; CHECK-NEXT: vmovdqa64 %zmm1, %zmm0
; CHECK-NEXT: retq
%res = call <8 x i64> @llvm.x86.avx512.mask.expand.q.512(<8 x i64> %data, <8 x i64> %passthru, i8 %mask)
ret <8 x i64> %res
}
define <8 x i64> @test_maskz_expand_q_512(<8 x i64> %data, i8 %mask) {
; CHECK-LABEL: test_maskz_expand_q_512:
; CHECK: ## %bb.0:
; CHECK-NEXT: kmovw %edi, %k1
; CHECK-NEXT: vpexpandq %zmm0, %zmm0 {%k1} {z}
; CHECK-NEXT: retq
%res = call <8 x i64> @llvm.x86.avx512.mask.expand.q.512(<8 x i64> %data, <8 x i64> zeroinitializer, i8 %mask)
ret <8 x i64> %res
}
declare <8 x i64> @llvm.x86.avx512.mask.expand.q.512(<8 x i64> %data, <8 x i64> %src0, i8 %mask)
define <8 x i64> @test_expand_load_q_512(i8* %addr, <8 x i64> %data) {
; CHECK-LABEL: test_expand_load_q_512:
; CHECK: ## %bb.0:
; CHECK-NEXT: vmovups (%rdi), %zmm0
; CHECK-NEXT: retq
%res = call <8 x i64> @llvm.x86.avx512.mask.expand.load.q.512(i8* %addr, <8 x i64> %data, i8 -1)
ret <8 x i64> %res
}
define <16 x i32> @test_mask_expand_load_d_512(i8* %addr, <16 x i32> %data, i16 %mask) {
; CHECK-LABEL: test_mask_expand_load_d_512:
; CHECK: ## %bb.0:
; CHECK-NEXT: kmovw %esi, %k1
; CHECK-NEXT: vpexpandd (%rdi), %zmm0 {%k1}
; CHECK-NEXT: retq
%res = call <16 x i32> @llvm.x86.avx512.mask.expand.load.d.512(i8* %addr, <16 x i32> %data, i16 %mask)
ret <16 x i32> %res
}
define <16 x i32> @test_maskz_expand_load_d_512(i8* %addr, i16 %mask) {
; CHECK-LABEL: test_maskz_expand_load_d_512:
; CHECK: ## %bb.0:
; CHECK-NEXT: kmovw %esi, %k1
; CHECK-NEXT: vpxor %xmm0, %xmm0, %xmm0
; CHECK-NEXT: vpexpandd (%rdi), %zmm0 {%k1}
; CHECK-NEXT: retq
%res = call <16 x i32> @llvm.x86.avx512.mask.expand.load.d.512(i8* %addr, <16 x i32> zeroinitializer, i16 %mask)
ret <16 x i32> %res
}
declare <16 x i32> @llvm.x86.avx512.mask.expand.load.d.512(i8* %addr, <16 x i32> %data, i16 %mask)
define <16 x i32> @test_expand_d_512(<16 x i32> %data) {
; CHECK-LABEL: test_expand_d_512:
; CHECK: ## %bb.0:
; CHECK-NEXT: retq
%res = call <16 x i32> @llvm.x86.avx512.mask.expand.d.512(<16 x i32> %data, <16 x i32> undef, i16 -1)
ret <16 x i32> %res
}
define <16 x i32> @test_mask_expand_d_512(<16 x i32> %data, <16 x i32> %passthru, i16 %mask) {
; CHECK-LABEL: test_mask_expand_d_512:
; CHECK: ## %bb.0:
; CHECK-NEXT: kmovw %edi, %k1
; CHECK-NEXT: vpexpandd %zmm0, %zmm1 {%k1}
; CHECK-NEXT: vmovdqa64 %zmm1, %zmm0
; CHECK-NEXT: retq
%res = call <16 x i32> @llvm.x86.avx512.mask.expand.d.512(<16 x i32> %data, <16 x i32> %passthru, i16 %mask)
ret <16 x i32> %res
}
define <16 x i32> @test_maskz_expand_d_512(<16 x i32> %data, i16 %mask) {
; CHECK-LABEL: test_maskz_expand_d_512:
; CHECK: ## %bb.0:
; CHECK-NEXT: kmovw %edi, %k1
; CHECK-NEXT: vpexpandd %zmm0, %zmm0 {%k1} {z}
; CHECK-NEXT: retq
%res = call <16 x i32> @llvm.x86.avx512.mask.expand.d.512(<16 x i32> %data, <16 x i32> zeroinitializer, i16 %mask)
ret <16 x i32> %res
}
declare <16 x i32> @llvm.x86.avx512.mask.expand.d.512(<16 x i32> %data, <16 x i32> %src0, i16 %mask)
define <16 x i32> @test_expand_load_d_512(i8* %addr, <16 x i32> %data) {
; CHECK-LABEL: test_expand_load_d_512:
; CHECK: ## %bb.0:
; CHECK-NEXT: vmovups (%rdi), %zmm0
; CHECK-NEXT: retq
%res = call <16 x i32> @llvm.x86.avx512.mask.expand.load.d.512(i8* %addr, <16 x i32> %data, i16 -1)
ret <16 x i32> %res
}
define <16 x float> @test_rcp_ps_512(<16 x float> %a0) {
; CHECK-LABEL: test_rcp_ps_512:
; CHECK: ## %bb.0:

View File

@ -1,8 +1,8 @@
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
; RUN: llc < %s -mtriple=x86_64-apple-darwin -mcpu=knl -mattr=+avx512vbmi2 | FileCheck %s
define <32 x i16> @test_expand_load_w_512(i8* %addr, <32 x i16> %data, i32 %mask) {
; CHECK-LABEL: test_expand_load_w_512:
define <32 x i16> @test_mask_expand_load_w_512(i8* %addr, <32 x i16> %data, i32 %mask) {
; CHECK-LABEL: test_mask_expand_load_w_512:
; CHECK: ## %bb.0:
; CHECK-NEXT: kmovd %esi, %k1
; CHECK-NEXT: vpexpandw (%rdi), %zmm0 {%k1}
@ -10,21 +10,62 @@ define <32 x i16> @test_expand_load_w_512(i8* %addr, <32 x i16> %data, i32 %mask
%res = call <32 x i16> @llvm.x86.avx512.mask.expand.load.w.512(i8* %addr, <32 x i16> %data, i32 %mask)
ret <32 x i16> %res
}
declare <32 x i16> @llvm.x86.avx512.mask.expand.load.w.512(i8* %addr, <32 x i16> %data, i32 %mask)
define void @test_compress_store_w_512(i8* %addr, <32 x i16> %data, i32 %mask) {
; CHECK-LABEL: test_compress_store_w_512:
define <32 x i16> @test_maskz_expand_load_w_512(i8* %addr, i32 %mask) {
; CHECK-LABEL: test_maskz_expand_load_w_512:
; CHECK: ## %bb.0:
; CHECK-NEXT: kmovd %esi, %k1
; CHECK-NEXT: vpcompressw %zmm0, (%rdi) {%k1}
; CHECK-NEXT: vpxor %xmm0, %xmm0, %xmm0
; CHECK-NEXT: vpexpandw (%rdi), %zmm0 {%k1}
; CHECK-NEXT: retq
call void @llvm.x86.avx512.mask.compress.store.w.512(i8* %addr, <32 x i16> %data, i32 %mask)
ret void
%res = call <32 x i16> @llvm.x86.avx512.mask.expand.load.w.512(i8* %addr, <32 x i16> zeroinitializer, i32 %mask)
ret <32 x i16> %res
}
declare void @llvm.x86.avx512.mask.compress.store.w.512(i8* %addr, <32 x i16> %data, i32 %mask)
define <64 x i8> @test_expand_load_b_512(i8* %addr, <64 x i8> %data, i64 %mask) {
; CHECK-LABEL: test_expand_load_b_512:
declare <32 x i16> @llvm.x86.avx512.mask.expand.load.w.512(i8* %addr, <32 x i16> %data, i32 %mask)
define <32 x i16> @test_expand_w_512(<32 x i16> %data) {
; CHECK-LABEL: test_expand_w_512:
; CHECK: ## %bb.0:
; CHECK-NEXT: retq
%res = call <32 x i16> @llvm.x86.avx512.mask.expand.w.512(<32 x i16> %data, <32 x i16> undef, i32 -1)
ret <32 x i16> %res
}
define <32 x i16> @test_mask_expand_w_512(<32 x i16> %data, <32 x i16> %passthru, i32 %mask) {
; CHECK-LABEL: test_mask_expand_w_512:
; CHECK: ## %bb.0:
; CHECK-NEXT: kmovd %edi, %k1
; CHECK-NEXT: vpexpandw %zmm0, %zmm1 {%k1}
; CHECK-NEXT: vmovdqa64 %zmm1, %zmm0
; CHECK-NEXT: retq
%res = call <32 x i16> @llvm.x86.avx512.mask.expand.w.512(<32 x i16> %data, <32 x i16> %passthru, i32 %mask)
ret <32 x i16> %res
}
define <32 x i16> @test_maskz_expand_w_512(<32 x i16> %data, i32 %mask) {
; CHECK-LABEL: test_maskz_expand_w_512:
; CHECK: ## %bb.0:
; CHECK-NEXT: kmovd %edi, %k1
; CHECK-NEXT: vpexpandw %zmm0, %zmm0 {%k1} {z}
; CHECK-NEXT: retq
%res = call <32 x i16> @llvm.x86.avx512.mask.expand.w.512(<32 x i16> %data, <32 x i16> zeroinitializer, i32 %mask)
ret <32 x i16> %res
}
declare <32 x i16> @llvm.x86.avx512.mask.expand.w.512(<32 x i16> %data, <32 x i16> %src0, i32 %mask)
define <32 x i16> @test_expand_load_w_512(i8* %addr, <32 x i16> %data) {
; CHECK-LABEL: test_expand_load_w_512:
; CHECK: ## %bb.0:
; CHECK-NEXT: vmovups (%rdi), %zmm0
; CHECK-NEXT: retq
%res = call <32 x i16> @llvm.x86.avx512.mask.expand.load.w.512(i8* %addr, <32 x i16> %data, i32 -1)
ret <32 x i16> %res
}
define <64 x i8> @test_mask_expand_load_b_512(i8* %addr, <64 x i8> %data, i64 %mask) {
; CHECK-LABEL: test_mask_expand_load_b_512:
; CHECK: ## %bb.0:
; CHECK-NEXT: kmovq %rsi, %k1
; CHECK-NEXT: vpexpandb (%rdi), %zmm0 {%k1}
@ -32,10 +73,114 @@ define <64 x i8> @test_expand_load_b_512(i8* %addr, <64 x i8> %data, i64 %mask)
%res = call <64 x i8> @llvm.x86.avx512.mask.expand.load.b.512(i8* %addr, <64 x i8> %data, i64 %mask)
ret <64 x i8> %res
}
define <64 x i8> @test_maskz_expand_load_b_512(i8* %addr, i64 %mask) {
; CHECK-LABEL: test_maskz_expand_load_b_512:
; CHECK: ## %bb.0:
; CHECK-NEXT: kmovq %rsi, %k1
; CHECK-NEXT: vpxor %xmm0, %xmm0, %xmm0
; CHECK-NEXT: vpexpandb (%rdi), %zmm0 {%k1}
; CHECK-NEXT: retq
%res = call <64 x i8> @llvm.x86.avx512.mask.expand.load.b.512(i8* %addr, <64 x i8> zeroinitializer, i64 %mask)
ret <64 x i8> %res
}
declare <64 x i8> @llvm.x86.avx512.mask.expand.load.b.512(i8* %addr, <64 x i8> %data, i64 %mask)
define void @test_compress_store_b_512(i8* %addr, <64 x i8> %data, i64 %mask) {
; CHECK-LABEL: test_compress_store_b_512:
define <64 x i8> @test_expand_b_512(<64 x i8> %data) {
; CHECK-LABEL: test_expand_b_512:
; CHECK: ## %bb.0:
; CHECK-NEXT: retq
%res = call <64 x i8> @llvm.x86.avx512.mask.expand.b.512(<64 x i8> %data, <64 x i8> undef, i64 -1)
ret <64 x i8> %res
}
define <64 x i8> @test_mask_expand_b_512(<64 x i8> %data, <64 x i8> %passthru, i64 %mask) {
; CHECK-LABEL: test_mask_expand_b_512:
; CHECK: ## %bb.0:
; CHECK-NEXT: kmovq %rdi, %k1
; CHECK-NEXT: vpexpandb %zmm0, %zmm1 {%k1}
; CHECK-NEXT: vmovdqa64 %zmm1, %zmm0
; CHECK-NEXT: retq
%res = call <64 x i8> @llvm.x86.avx512.mask.expand.b.512(<64 x i8> %data, <64 x i8> %passthru, i64 %mask)
ret <64 x i8> %res
}
define <64 x i8> @test_maskz_expand_b_512(<64 x i8> %data, i64 %mask) {
; CHECK-LABEL: test_maskz_expand_b_512:
; CHECK: ## %bb.0:
; CHECK-NEXT: kmovq %rdi, %k1
; CHECK-NEXT: vpexpandb %zmm0, %zmm0 {%k1} {z}
; CHECK-NEXT: retq
%res = call <64 x i8> @llvm.x86.avx512.mask.expand.b.512(<64 x i8> %data, <64 x i8> zeroinitializer, i64 %mask)
ret <64 x i8> %res
}
declare <64 x i8> @llvm.x86.avx512.mask.expand.b.512(<64 x i8> %data, <64 x i8> %src0, i64 %mask)
define <64 x i8> @test_expand_load_b_512(i8* %addr, <64 x i8> %data) {
; CHECK-LABEL: test_expand_load_b_512:
; CHECK: ## %bb.0:
; CHECK-NEXT: vmovups (%rdi), %zmm0
; CHECK-NEXT: retq
%res = call <64 x i8> @llvm.x86.avx512.mask.expand.load.b.512(i8* %addr, <64 x i8> %data, i64 -1)
ret <64 x i8> %res
}
define void @test_mask_compress_store_w_512(i8* %addr, <32 x i16> %data, i32 %mask) {
; CHECK-LABEL: test_mask_compress_store_w_512:
; CHECK: ## %bb.0:
; CHECK-NEXT: kmovd %esi, %k1
; CHECK-NEXT: vpcompressw %zmm0, (%rdi) {%k1}
; CHECK-NEXT: retq
call void @llvm.x86.avx512.mask.compress.store.w.512(i8* %addr, <32 x i16> %data, i32 %mask)
ret void
}
declare void @llvm.x86.avx512.mask.compress.store.w.512(i8* %addr, <32 x i16> %data, i32 %mask)
define <32 x i16> @test_mask_compress_w_512(<32 x i16> %data, <32 x i16> %passthru, i32 %mask) {
; CHECK-LABEL: test_mask_compress_w_512:
; CHECK: ## %bb.0:
; CHECK-NEXT: kmovd %edi, %k1
; CHECK-NEXT: vpcompressw %zmm0, %zmm1 {%k1}
; CHECK-NEXT: vmovdqa64 %zmm1, %zmm0
; CHECK-NEXT: retq
%res = call <32 x i16> @llvm.x86.avx512.mask.compress.w.512(<32 x i16> %data, <32 x i16> %passthru, i32 %mask)
ret <32 x i16> %res
}
define <32 x i16> @test_maskz_compress_w_512(<32 x i16> %data, i32 %mask) {
; CHECK-LABEL: test_maskz_compress_w_512:
; CHECK: ## %bb.0:
; CHECK-NEXT: kmovd %edi, %k1
; CHECK-NEXT: vpcompressw %zmm0, %zmm0 {%k1} {z}
; CHECK-NEXT: retq
%res = call <32 x i16> @llvm.x86.avx512.mask.compress.w.512(<32 x i16> %data, <32 x i16> zeroinitializer, i32 %mask)
ret <32 x i16> %res
}
define <32 x i16> @test_compress_w_512(<32 x i16> %data) {
; CHECK-LABEL: test_compress_w_512:
; CHECK: ## %bb.0:
; CHECK-NEXT: retq
%res = call <32 x i16> @llvm.x86.avx512.mask.compress.w.512(<32 x i16> %data, <32 x i16> undef, i32 -1)
ret <32 x i16> %res
}
declare <32 x i16> @llvm.x86.avx512.mask.compress.w.512(<32 x i16> %data, <32 x i16> %src0, i32 %mask)
define void @test_compress_store_w_512(i8* %addr, <32 x i16> %data) {
; CHECK-LABEL: test_compress_store_w_512:
; CHECK: ## %bb.0:
; CHECK-NEXT: vmovups %zmm0, (%rdi)
; CHECK-NEXT: retq
call void @llvm.x86.avx512.mask.compress.store.w.512(i8* %addr, <32 x i16> %data, i32 -1)
ret void
}
define void @test_mask_compress_store_b_512(i8* %addr, <64 x i8> %data, i64 %mask) {
; CHECK-LABEL: test_mask_compress_store_b_512:
; CHECK: ## %bb.0:
; CHECK-NEXT: kmovq %rsi, %k1
; CHECK-NEXT: vpcompressb %zmm0, (%rdi) {%k1}
@ -43,53 +188,48 @@ define void @test_compress_store_b_512(i8* %addr, <64 x i8> %data, i64 %mask) {
call void @llvm.x86.avx512.mask.compress.store.b.512(i8* %addr, <64 x i8> %data, i64 %mask)
ret void
}
declare void @llvm.x86.avx512.mask.compress.store.b.512(i8* %addr, <64 x i8> %data, i64 %mask)
define <32 x i16> @test_compress_w_512(<32 x i16> %data, <32 x i16> %src, i32 %mask) {
; CHECK-LABEL: test_compress_w_512:
; CHECK: ## %bb.0:
; CHECK-NEXT: kmovd %edi, %k1
; CHECK-NEXT: vpcompressw %zmm0, %zmm1 {%k1}
; CHECK-NEXT: vmovdqa64 %zmm1, %zmm0
; CHECK-NEXT: retq
%res = call <32 x i16> @llvm.x86.avx512.mask.compress.w.512(<32 x i16> %data, <32 x i16> %src, i32 %mask)
ret <32 x i16> %res
}
declare <32 x i16> @llvm.x86.avx512.mask.compress.w.512(<32 x i16>, <32 x i16>, i32)
define <64 x i8> @test_compress_b_512(<64 x i8> %data, <64 x i8> %src, i64 %mask) {
; CHECK-LABEL: test_compress_b_512:
define <64 x i8> @test_mask_compress_b_512(<64 x i8> %data, <64 x i8> %passthru, i64 %mask) {
; CHECK-LABEL: test_mask_compress_b_512:
; CHECK: ## %bb.0:
; CHECK-NEXT: kmovq %rdi, %k1
; CHECK-NEXT: vpcompressb %zmm0, %zmm1 {%k1}
; CHECK-NEXT: vmovdqa64 %zmm1, %zmm0
; CHECK-NEXT: retq
%res = call <64 x i8> @llvm.x86.avx512.mask.compress.b.512(<64 x i8> %data, <64 x i8> %src, i64 %mask)
%res = call <64 x i8> @llvm.x86.avx512.mask.compress.b.512(<64 x i8> %data, <64 x i8> %passthru, i64 %mask)
ret <64 x i8> %res
}
declare <64 x i8> @llvm.x86.avx512.mask.compress.b.512(<64 x i8>, <64 x i8>, i64)
define <32 x i16> @test_expand_w_512(i8* %addr, <32 x i16> %data, i32 %mask) {
; CHECK-LABEL: test_expand_w_512:
define <64 x i8> @test_maskz_compress_b_512(<64 x i8> %data, i64 %mask) {
; CHECK-LABEL: test_maskz_compress_b_512:
; CHECK: ## %bb.0:
; CHECK-NEXT: kmovd %esi, %k1
; CHECK-NEXT: vpexpandw %zmm0, %zmm0 {%k1} {z}
; CHECK-NEXT: kmovq %rdi, %k1
; CHECK-NEXT: vpcompressb %zmm0, %zmm0 {%k1} {z}
; CHECK-NEXT: retq
%res = call <32 x i16> @llvm.x86.avx512.mask.expand.w.512(<32 x i16> %data, <32 x i16> zeroinitializer, i32 %mask)
ret <32 x i16> %res
}
declare <32 x i16> @llvm.x86.avx512.mask.expand.w.512(<32 x i16>, <32 x i16>, i32)
define <64 x i8> @test_expand_b_512(i8* %addr, <64 x i8> %data, i64 %mask) {
; CHECK-LABEL: test_expand_b_512:
; CHECK: ## %bb.0:
; CHECK-NEXT: kmovq %rsi, %k1
; CHECK-NEXT: vpexpandb %zmm0, %zmm0 {%k1} {z}
; CHECK-NEXT: retq
%res = call <64 x i8> @llvm.x86.avx512.mask.expand.b.512(<64 x i8> %data, <64 x i8> zeroinitializer, i64 %mask)
%res = call <64 x i8> @llvm.x86.avx512.mask.compress.b.512(<64 x i8> %data, <64 x i8> zeroinitializer, i64 %mask)
ret <64 x i8> %res
}
declare <64 x i8> @llvm.x86.avx512.mask.expand.b.512(<64 x i8>, <64 x i8>, i64)
define <64 x i8> @test_compress_b_512(<64 x i8> %data) {
; CHECK-LABEL: test_compress_b_512:
; CHECK: ## %bb.0:
; CHECK-NEXT: retq
%res = call <64 x i8> @llvm.x86.avx512.mask.compress.b.512(<64 x i8> %data, <64 x i8> undef, i64 -1)
ret <64 x i8> %res
}
declare <64 x i8> @llvm.x86.avx512.mask.compress.b.512(<64 x i8> %data, <64 x i8> %src0, i64 %mask)
define void @test_compress_store_b_512(i8* %addr, <64 x i8> %data) {
; CHECK-LABEL: test_compress_store_b_512:
; CHECK: ## %bb.0:
; CHECK-NEXT: vmovups %zmm0, (%rdi)
; CHECK-NEXT: retq
call void @llvm.x86.avx512.mask.compress.store.b.512(i8* %addr, <64 x i8> %data, i64 -1)
ret void
}
define <16 x i32>@test_int_x86_avx512_mask_vpshld_d_512(<16 x i32> %x0, <16 x i32> %x1, <16 x i32> %x3, i16 %x4) {
; CHECK-LABEL: test_int_x86_avx512_mask_vpshld_d_512:

View File

@ -1,109 +1,8 @@
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
; RUN: llc < %s -mtriple=x86_64-apple-darwin -mcpu=knl -mattr=+avx512vl,+avx512vbmi2 | FileCheck %s
define <16 x i16> @test_compress_w_256(<16 x i16> %src, <16 x i16> %data, i16 %mask) {
; CHECK-LABEL: test_compress_w_256:
; CHECK: ## %bb.0:
; CHECK-NEXT: kmovd %edi, %k1
; CHECK-NEXT: vpcompressw %ymm1, %ymm0 {%k1}
; CHECK-NEXT: retq
%res = call <16 x i16> @llvm.x86.avx512.mask.compress.w.256(<16 x i16> %data, <16 x i16> %src, i16 %mask)
ret <16 x i16> %res
}
declare <16 x i16> @llvm.x86.avx512.mask.compress.w.256(<16 x i16>, <16 x i16>, i16)
define <8 x i16> @test_compress_w_128(<8 x i16> %data, i8 %mask) {
; CHECK-LABEL: test_compress_w_128:
; CHECK: ## %bb.0:
; CHECK-NEXT: kmovd %edi, %k1
; CHECK-NEXT: vpcompressw %xmm0, %xmm0 {%k1} {z}
; CHECK-NEXT: retq
%res = call <8 x i16> @llvm.x86.avx512.mask.compress.w.128(<8 x i16> %data, <8 x i16> zeroinitializer, i8 %mask)
ret <8 x i16> %res
}
declare <8 x i16> @llvm.x86.avx512.mask.compress.w.128(<8 x i16>, <8 x i16>, i8)
define <32 x i8> @test_compress_b_256(<32 x i8> %src, <32 x i8> %data, i32 %mask) {
; CHECK-LABEL: test_compress_b_256:
; CHECK: ## %bb.0:
; CHECK-NEXT: kmovd %edi, %k1
; CHECK-NEXT: vpcompressb %ymm1, %ymm0 {%k1}
; CHECK-NEXT: retq
%res = call <32 x i8> @llvm.x86.avx512.mask.compress.b.256(<32 x i8> %data, <32 x i8> %src, i32 %mask)
ret <32 x i8> %res
}
declare <32 x i8> @llvm.x86.avx512.mask.compress.b.256(<32 x i8>, <32 x i8>, i32)
define <16 x i8> @test_compress_b_128(<16 x i8> %data, i16 %mask) {
; CHECK-LABEL: test_compress_b_128:
; CHECK: ## %bb.0:
; CHECK-NEXT: kmovd %edi, %k1
; CHECK-NEXT: vpcompressb %xmm0, %xmm0 {%k1} {z}
; CHECK-NEXT: retq
%res = call <16 x i8> @llvm.x86.avx512.mask.compress.b.128(<16 x i8> %data, <16 x i8> zeroinitializer, i16 %mask)
ret <16 x i8> %res
}
declare <16 x i8> @llvm.x86.avx512.mask.compress.b.128(<16 x i8>, <16 x i8>, i16)
define <32 x i8> @test_expand_b_256(<32 x i8> %data, <32 x i8> %src, i32 %mask) {
; CHECK-LABEL: test_expand_b_256:
; CHECK: ## %bb.0:
; CHECK-NEXT: kmovd %edi, %k1
; CHECK-NEXT: vpexpandb %ymm0, %ymm1 {%k1}
; CHECK-NEXT: vmovdqa %ymm1, %ymm0
; CHECK-NEXT: retq
%res = call <32 x i8> @llvm.x86.avx512.mask.expand.b.256( <32 x i8> %data, <32 x i8> %src, i32 %mask)
ret <32 x i8> %res
}
declare <32 x i8> @llvm.x86.avx512.mask.expand.b.256(<32 x i8>, <32 x i8>, i32)
define <16 x i8> @test_expand_b_128(<16 x i8> %data, i16 %mask) {
; CHECK-LABEL: test_expand_b_128:
; CHECK: ## %bb.0:
; CHECK-NEXT: kmovd %edi, %k1
; CHECK-NEXT: vpexpandb %xmm0, %xmm0 {%k1} {z}
; CHECK-NEXT: retq
%res = call <16 x i8> @llvm.x86.avx512.mask.expand.b.128(<16 x i8> %data, <16 x i8> zeroinitializer, i16 %mask)
ret <16 x i8> %res
}
declare <16 x i8> @llvm.x86.avx512.mask.expand.b.128(<16 x i8>, <16 x i8>, i16)
define <16 x i16> @test_expand_w_256(<16 x i16> %data, <16 x i16> %src, i16 %mask) {
; CHECK-LABEL: test_expand_w_256:
; CHECK: ## %bb.0:
; CHECK-NEXT: kmovd %edi, %k1
; CHECK-NEXT: vpexpandw %ymm0, %ymm1 {%k1}
; CHECK-NEXT: vmovdqa %ymm1, %ymm0
; CHECK-NEXT: retq
%res = call <16 x i16> @llvm.x86.avx512.mask.expand.w.256( <16 x i16> %data, <16 x i16> %src, i16 %mask)
ret <16 x i16> %res
}
declare <16 x i16> @llvm.x86.avx512.mask.expand.w.256(<16 x i16>, <16 x i16>, i16)
define <8 x i16> @test_expand_w_128(<8 x i16> %data, i8 %mask) {
; CHECK-LABEL: test_expand_w_128:
; CHECK: ## %bb.0:
; CHECK-NEXT: kmovd %edi, %k1
; CHECK-NEXT: vpexpandw %xmm0, %xmm0 {%k1} {z}
; CHECK-NEXT: retq
%res = call <8 x i16> @llvm.x86.avx512.mask.expand.w.128(<8 x i16> %data, <8 x i16> zeroinitializer, i8 %mask)
ret <8 x i16> %res
}
declare <8 x i16> @llvm.x86.avx512.mask.expand.w.128(<8 x i16>, <8 x i16>, i8)
define <16 x i16> @test_expand_load_w_256(i8* %addr, <16 x i16> %data, i16 %mask) {
; CHECK-LABEL: test_expand_load_w_256:
; CHECK: ## %bb.0:
; CHECK-NEXT: kmovd %esi, %k1
; CHECK-NEXT: vpexpandw (%rdi), %ymm0 {%k1}
; CHECK-NEXT: retq
%res = call <16 x i16> @llvm.x86.avx512.mask.expand.load.w.256(i8* %addr, <16 x i16> %data, i16 %mask)
ret <16 x i16> %res
}
declare <16 x i16> @llvm.x86.avx512.mask.expand.load.w.256(i8* %addr, <16 x i16> %data, i16 %mask)
define <8 x i16> @test_expand_load_w_128(i8* %addr, <8 x i16> %data, i8 %mask) {
; CHECK-LABEL: test_expand_load_w_128:
define <8 x i16> @test_mask_expand_load_w_128(i8* %addr, <8 x i16> %data, i8 %mask) {
; CHECK-LABEL: test_mask_expand_load_w_128:
; CHECK: ## %bb.0:
; CHECK-NEXT: kmovd %esi, %k1
; CHECK-NEXT: vpexpandw (%rdi), %xmm0 {%k1}
@ -111,43 +10,62 @@ define <8 x i16> @test_expand_load_w_128(i8* %addr, <8 x i16> %data, i8 %mask) {
%res = call <8 x i16> @llvm.x86.avx512.mask.expand.load.w.128(i8* %addr, <8 x i16> %data, i8 %mask)
ret <8 x i16> %res
}
define <8 x i16> @test_maskz_expand_load_w_128(i8* %addr, i8 %mask) {
; CHECK-LABEL: test_maskz_expand_load_w_128:
; CHECK: ## %bb.0:
; CHECK-NEXT: kmovd %esi, %k1
; CHECK-NEXT: vpxor %xmm0, %xmm0, %xmm0
; CHECK-NEXT: vpexpandw (%rdi), %xmm0 {%k1}
; CHECK-NEXT: retq
%res = call <8 x i16> @llvm.x86.avx512.mask.expand.load.w.128(i8* %addr, <8 x i16> zeroinitializer, i8 %mask)
ret <8 x i16> %res
}
declare <8 x i16> @llvm.x86.avx512.mask.expand.load.w.128(i8* %addr, <8 x i16> %data, i8 %mask)
define void @test_compress_store_w_256(i8* %addr, <16 x i16> %data, i16 %mask) {
; CHECK-LABEL: test_compress_store_w_256:
define <8 x i16> @test_expand_w_128(<8 x i16> %data) {
; CHECK-LABEL: test_expand_w_128:
; CHECK: ## %bb.0:
; CHECK-NEXT: kmovd %esi, %k1
; CHECK-NEXT: vpcompressw %ymm0, (%rdi) {%k1}
; CHECK-NEXT: retq
call void @llvm.x86.avx512.mask.compress.store.w.256(i8* %addr, <16 x i16> %data, i16 %mask)
ret void
%res = call <8 x i16> @llvm.x86.avx512.mask.expand.w.128(<8 x i16> %data, <8 x i16> undef, i8 -1)
ret <8 x i16> %res
}
declare void @llvm.x86.avx512.mask.compress.store.w.256(i8* %addr, <16 x i16> %data, i16 %mask)
define void @test_compress_store_w_128(i8* %addr, <8 x i16> %data, i8 %mask) {
; CHECK-LABEL: test_compress_store_w_128:
define <8 x i16> @test_mask_expand_w_128(<8 x i16> %data, <8 x i16> %passthru, i8 %mask) {
; CHECK-LABEL: test_mask_expand_w_128:
; CHECK: ## %bb.0:
; CHECK-NEXT: kmovd %esi, %k1
; CHECK-NEXT: vpcompressw %xmm0, (%rdi) {%k1}
; CHECK-NEXT: kmovd %edi, %k1
; CHECK-NEXT: vpexpandw %xmm0, %xmm1 {%k1}
; CHECK-NEXT: vmovdqa %xmm1, %xmm0
; CHECK-NEXT: retq
call void @llvm.x86.avx512.mask.compress.store.w.128(i8* %addr, <8 x i16> %data, i8 %mask)
ret void
%res = call <8 x i16> @llvm.x86.avx512.mask.expand.w.128(<8 x i16> %data, <8 x i16> %passthru, i8 %mask)
ret <8 x i16> %res
}
declare void @llvm.x86.avx512.mask.compress.store.w.128(i8* %addr, <8 x i16> %data, i8 %mask)
define <32 x i8> @test_expand_load_b_256(i8* %addr, <32 x i8> %data, i32 %mask) {
; CHECK-LABEL: test_expand_load_b_256:
define <8 x i16> @test_maskz_expand_w_128(<8 x i16> %data, i8 %mask) {
; CHECK-LABEL: test_maskz_expand_w_128:
; CHECK: ## %bb.0:
; CHECK-NEXT: kmovd %esi, %k1
; CHECK-NEXT: vpexpandb (%rdi), %ymm0 {%k1}
; CHECK-NEXT: kmovd %edi, %k1
; CHECK-NEXT: vpexpandw %xmm0, %xmm0 {%k1} {z}
; CHECK-NEXT: retq
%res = call <32 x i8> @llvm.x86.avx512.mask.expand.load.b.256(i8* %addr, <32 x i8> %data, i32 %mask)
ret <32 x i8> %res
%res = call <8 x i16> @llvm.x86.avx512.mask.expand.w.128(<8 x i16> %data, <8 x i16> zeroinitializer, i8 %mask)
ret <8 x i16> %res
}
declare <32 x i8> @llvm.x86.avx512.mask.expand.load.b.256(i8* %addr, <32 x i8> %data, i32 %mask)
define <16 x i8> @test_expand_load_b_128(i8* %addr, <16 x i8> %data, i16 %mask) {
; CHECK-LABEL: test_expand_load_b_128:
declare <8 x i16> @llvm.x86.avx512.mask.expand.w.128(<8 x i16> %data, <8 x i16> %src0, i8 %mask)
define <8 x i16> @test_expand_load_w_128(i8* %addr, <8 x i16> %data) {
; CHECK-LABEL: test_expand_load_w_128:
; CHECK: ## %bb.0:
; CHECK-NEXT: vmovups (%rdi), %xmm0
; CHECK-NEXT: retq
%res = call <8 x i16> @llvm.x86.avx512.mask.expand.load.w.128(i8* %addr, <8 x i16> %data, i8 -1)
ret <8 x i16> %res
}
define <16 x i8> @test_mask_expand_load_b_128(i8* %addr, <16 x i8> %data, i16 %mask) {
; CHECK-LABEL: test_mask_expand_load_b_128:
; CHECK: ## %bb.0:
; CHECK-NEXT: kmovd %esi, %k1
; CHECK-NEXT: vpexpandb (%rdi), %xmm0 {%k1}
@ -155,21 +73,114 @@ define <16 x i8> @test_expand_load_b_128(i8* %addr, <16 x i8> %data, i16 %mask)
%res = call <16 x i8> @llvm.x86.avx512.mask.expand.load.b.128(i8* %addr, <16 x i8> %data, i16 %mask)
ret <16 x i8> %res
}
declare <16 x i8> @llvm.x86.avx512.mask.expand.load.b.128(i8* %addr, <16 x i8> %data, i16 %mask)
define void @test_compress_store_b_256(i8* %addr, <32 x i8> %data, i32 %mask) {
; CHECK-LABEL: test_compress_store_b_256:
define <16 x i8> @test_maskz_expand_load_b_128(i8* %addr, i16 %mask) {
; CHECK-LABEL: test_maskz_expand_load_b_128:
; CHECK: ## %bb.0:
; CHECK-NEXT: kmovd %esi, %k1
; CHECK-NEXT: vpcompressb %ymm0, (%rdi) {%k1}
; CHECK-NEXT: vpxor %xmm0, %xmm0, %xmm0
; CHECK-NEXT: vpexpandb (%rdi), %xmm0 {%k1}
; CHECK-NEXT: retq
call void @llvm.x86.avx512.mask.compress.store.b.256(i8* %addr, <32 x i8> %data, i32 %mask)
%res = call <16 x i8> @llvm.x86.avx512.mask.expand.load.b.128(i8* %addr, <16 x i8> zeroinitializer, i16 %mask)
ret <16 x i8> %res
}
declare <16 x i8> @llvm.x86.avx512.mask.expand.load.b.128(i8* %addr, <16 x i8> %data, i16 %mask)
define <16 x i8> @test_expand_b_128(<16 x i8> %data) {
; CHECK-LABEL: test_expand_b_128:
; CHECK: ## %bb.0:
; CHECK-NEXT: retq
%res = call <16 x i8> @llvm.x86.avx512.mask.expand.b.128(<16 x i8> %data, <16 x i8> undef, i16 -1)
ret <16 x i8> %res
}
define <16 x i8> @test_mask_expand_b_128(<16 x i8> %data, <16 x i8> %passthru, i16 %mask) {
; CHECK-LABEL: test_mask_expand_b_128:
; CHECK: ## %bb.0:
; CHECK-NEXT: kmovd %edi, %k1
; CHECK-NEXT: vpexpandb %xmm0, %xmm1 {%k1}
; CHECK-NEXT: vmovdqa %xmm1, %xmm0
; CHECK-NEXT: retq
%res = call <16 x i8> @llvm.x86.avx512.mask.expand.b.128(<16 x i8> %data, <16 x i8> %passthru, i16 %mask)
ret <16 x i8> %res
}
define <16 x i8> @test_maskz_expand_b_128(<16 x i8> %data, i16 %mask) {
; CHECK-LABEL: test_maskz_expand_b_128:
; CHECK: ## %bb.0:
; CHECK-NEXT: kmovd %edi, %k1
; CHECK-NEXT: vpexpandb %xmm0, %xmm0 {%k1} {z}
; CHECK-NEXT: retq
%res = call <16 x i8> @llvm.x86.avx512.mask.expand.b.128(<16 x i8> %data, <16 x i8> zeroinitializer, i16 %mask)
ret <16 x i8> %res
}
declare <16 x i8> @llvm.x86.avx512.mask.expand.b.128(<16 x i8> %data, <16 x i8> %src0, i16 %mask)
define <16 x i8> @test_expand_load_b_128(i8* %addr, <16 x i8> %data) {
; CHECK-LABEL: test_expand_load_b_128:
; CHECK: ## %bb.0:
; CHECK-NEXT: vmovups (%rdi), %xmm0
; CHECK-NEXT: retq
%res = call <16 x i8> @llvm.x86.avx512.mask.expand.load.b.128(i8* %addr, <16 x i8> %data, i16 -1)
ret <16 x i8> %res
}
define void @test_mask_compress_store_w_128(i8* %addr, <8 x i16> %data, i8 %mask) {
; CHECK-LABEL: test_mask_compress_store_w_128:
; CHECK: ## %bb.0:
; CHECK-NEXT: kmovd %esi, %k1
; CHECK-NEXT: vpcompressw %xmm0, (%rdi) {%k1}
; CHECK-NEXT: retq
call void @llvm.x86.avx512.mask.compress.store.w.128(i8* %addr, <8 x i16> %data, i8 %mask)
ret void
}
declare void @llvm.x86.avx512.mask.compress.store.b.256(i8* %addr, <32 x i8> %data, i32 %mask)
define void @test_compress_store_b_128(i8* %addr, <16 x i8> %data, i16 %mask) {
; CHECK-LABEL: test_compress_store_b_128:
declare void @llvm.x86.avx512.mask.compress.store.w.128(i8* %addr, <8 x i16> %data, i8 %mask)
define <8 x i16> @test_mask_compress_w_128(<8 x i16> %data, <8 x i16> %passthru, i8 %mask) {
; CHECK-LABEL: test_mask_compress_w_128:
; CHECK: ## %bb.0:
; CHECK-NEXT: kmovd %edi, %k1
; CHECK-NEXT: vpcompressw %xmm0, %xmm1 {%k1}
; CHECK-NEXT: vmovdqa %xmm1, %xmm0
; CHECK-NEXT: retq
%res = call <8 x i16> @llvm.x86.avx512.mask.compress.w.128(<8 x i16> %data, <8 x i16> %passthru, i8 %mask)
ret <8 x i16> %res
}
define <8 x i16> @test_maskz_compress_w_128(<8 x i16> %data, i8 %mask) {
; CHECK-LABEL: test_maskz_compress_w_128:
; CHECK: ## %bb.0:
; CHECK-NEXT: kmovd %edi, %k1
; CHECK-NEXT: vpcompressw %xmm0, %xmm0 {%k1} {z}
; CHECK-NEXT: retq
%res = call <8 x i16> @llvm.x86.avx512.mask.compress.w.128(<8 x i16> %data, <8 x i16> zeroinitializer, i8 %mask)
ret <8 x i16> %res
}
define <8 x i16> @test_compress_w_128(<8 x i16> %data) {
; CHECK-LABEL: test_compress_w_128:
; CHECK: ## %bb.0:
; CHECK-NEXT: retq
%res = call <8 x i16> @llvm.x86.avx512.mask.compress.w.128(<8 x i16> %data, <8 x i16> undef, i8 -1)
ret <8 x i16> %res
}
declare <8 x i16> @llvm.x86.avx512.mask.compress.w.128(<8 x i16> %data, <8 x i16> %src0, i8 %mask)
define void @test_compress_store_w_128(i8* %addr, <8 x i16> %data) {
; CHECK-LABEL: test_compress_store_w_128:
; CHECK: ## %bb.0:
; CHECK-NEXT: vmovups %xmm0, (%rdi)
; CHECK-NEXT: retq
call void @llvm.x86.avx512.mask.compress.store.w.128(i8* %addr, <8 x i16> %data, i8 -1)
ret void
}
define void @test_mask_compress_store_b_128(i8* %addr, <16 x i8> %data, i16 %mask) {
; CHECK-LABEL: test_mask_compress_store_b_128:
; CHECK: ## %bb.0:
; CHECK-NEXT: kmovd %esi, %k1
; CHECK-NEXT: vpcompressb %xmm0, (%rdi) {%k1}
@ -177,8 +188,279 @@ define void @test_compress_store_b_128(i8* %addr, <16 x i8> %data, i16 %mask) {
call void @llvm.x86.avx512.mask.compress.store.b.128(i8* %addr, <16 x i8> %data, i16 %mask)
ret void
}
declare void @llvm.x86.avx512.mask.compress.store.b.128(i8* %addr, <16 x i8> %data, i16 %mask)
define <16 x i8> @test_mask_compress_b_128(<16 x i8> %data, <16 x i8> %passthru, i16 %mask) {
; CHECK-LABEL: test_mask_compress_b_128:
; CHECK: ## %bb.0:
; CHECK-NEXT: kmovd %edi, %k1
; CHECK-NEXT: vpcompressb %xmm0, %xmm1 {%k1}
; CHECK-NEXT: vmovdqa %xmm1, %xmm0
; CHECK-NEXT: retq
%res = call <16 x i8> @llvm.x86.avx512.mask.compress.b.128(<16 x i8> %data, <16 x i8> %passthru, i16 %mask)
ret <16 x i8> %res
}
define <16 x i8> @test_maskz_compress_b_128(<16 x i8> %data, i16 %mask) {
; CHECK-LABEL: test_maskz_compress_b_128:
; CHECK: ## %bb.0:
; CHECK-NEXT: kmovd %edi, %k1
; CHECK-NEXT: vpcompressb %xmm0, %xmm0 {%k1} {z}
; CHECK-NEXT: retq
%res = call <16 x i8> @llvm.x86.avx512.mask.compress.b.128(<16 x i8> %data, <16 x i8> zeroinitializer, i16 %mask)
ret <16 x i8> %res
}
define <16 x i8> @test_compress_b_128(<16 x i8> %data) {
; CHECK-LABEL: test_compress_b_128:
; CHECK: ## %bb.0:
; CHECK-NEXT: retq
%res = call <16 x i8> @llvm.x86.avx512.mask.compress.b.128(<16 x i8> %data, <16 x i8> undef, i16 -1)
ret <16 x i8> %res
}
declare <16 x i8> @llvm.x86.avx512.mask.compress.b.128(<16 x i8> %data, <16 x i8> %src0, i16 %mask)
define void @test_compress_store_b_128(i8* %addr, <16 x i8> %data) {
; CHECK-LABEL: test_compress_store_b_128:
; CHECK: ## %bb.0:
; CHECK-NEXT: vmovups %xmm0, (%rdi)
; CHECK-NEXT: retq
call void @llvm.x86.avx512.mask.compress.store.b.128(i8* %addr, <16 x i8> %data, i16 -1)
ret void
}
define <16 x i16> @test_mask_expand_load_w_256(i8* %addr, <16 x i16> %data, i16 %mask) {
; CHECK-LABEL: test_mask_expand_load_w_256:
; CHECK: ## %bb.0:
; CHECK-NEXT: kmovd %esi, %k1
; CHECK-NEXT: vpexpandw (%rdi), %ymm0 {%k1}
; CHECK-NEXT: retq
%res = call <16 x i16> @llvm.x86.avx512.mask.expand.load.w.256(i8* %addr, <16 x i16> %data, i16 %mask)
ret <16 x i16> %res
}
define <16 x i16> @test_maskz_expand_load_w_256(i8* %addr, i16 %mask) {
; CHECK-LABEL: test_maskz_expand_load_w_256:
; CHECK: ## %bb.0:
; CHECK-NEXT: kmovd %esi, %k1
; CHECK-NEXT: vpxor %xmm0, %xmm0, %xmm0
; CHECK-NEXT: vpexpandw (%rdi), %ymm0 {%k1}
; CHECK-NEXT: retq
%res = call <16 x i16> @llvm.x86.avx512.mask.expand.load.w.256(i8* %addr, <16 x i16> zeroinitializer, i16 %mask)
ret <16 x i16> %res
}
declare <16 x i16> @llvm.x86.avx512.mask.expand.load.w.256(i8* %addr, <16 x i16> %data, i16 %mask)
define <16 x i16> @test_expand_w_256(<16 x i16> %data) {
; CHECK-LABEL: test_expand_w_256:
; CHECK: ## %bb.0:
; CHECK-NEXT: retq
%res = call <16 x i16> @llvm.x86.avx512.mask.expand.w.256(<16 x i16> %data, <16 x i16> undef, i16 -1)
ret <16 x i16> %res
}
define <16 x i16> @test_mask_expand_w_256(<16 x i16> %data, <16 x i16> %passthru, i16 %mask) {
; CHECK-LABEL: test_mask_expand_w_256:
; CHECK: ## %bb.0:
; CHECK-NEXT: kmovd %edi, %k1
; CHECK-NEXT: vpexpandw %ymm0, %ymm1 {%k1}
; CHECK-NEXT: vmovdqa %ymm1, %ymm0
; CHECK-NEXT: retq
%res = call <16 x i16> @llvm.x86.avx512.mask.expand.w.256(<16 x i16> %data, <16 x i16> %passthru, i16 %mask)
ret <16 x i16> %res
}
define <16 x i16> @test_maskz_expand_w_256(<16 x i16> %data, i16 %mask) {
; CHECK-LABEL: test_maskz_expand_w_256:
; CHECK: ## %bb.0:
; CHECK-NEXT: kmovd %edi, %k1
; CHECK-NEXT: vpexpandw %ymm0, %ymm0 {%k1} {z}
; CHECK-NEXT: retq
%res = call <16 x i16> @llvm.x86.avx512.mask.expand.w.256(<16 x i16> %data, <16 x i16> zeroinitializer, i16 %mask)
ret <16 x i16> %res
}
declare <16 x i16> @llvm.x86.avx512.mask.expand.w.256(<16 x i16> %data, <16 x i16> %src0, i16 %mask)
define <16 x i16> @test_expand_load_w_256(i8* %addr, <16 x i16> %data) {
; CHECK-LABEL: test_expand_load_w_256:
; CHECK: ## %bb.0:
; CHECK-NEXT: vmovups (%rdi), %ymm0
; CHECK-NEXT: retq
%res = call <16 x i16> @llvm.x86.avx512.mask.expand.load.w.256(i8* %addr, <16 x i16> %data, i16 -1)
ret <16 x i16> %res
}
define <32 x i8> @test_mask_expand_load_b_256(i8* %addr, <32 x i8> %data, i32 %mask) {
; CHECK-LABEL: test_mask_expand_load_b_256:
; CHECK: ## %bb.0:
; CHECK-NEXT: kmovd %esi, %k1
; CHECK-NEXT: vpexpandb (%rdi), %ymm0 {%k1}
; CHECK-NEXT: retq
%res = call <32 x i8> @llvm.x86.avx512.mask.expand.load.b.256(i8* %addr, <32 x i8> %data, i32 %mask)
ret <32 x i8> %res
}
define <32 x i8> @test_maskz_expand_load_b_256(i8* %addr, i32 %mask) {
; CHECK-LABEL: test_maskz_expand_load_b_256:
; CHECK: ## %bb.0:
; CHECK-NEXT: kmovd %esi, %k1
; CHECK-NEXT: vpxor %xmm0, %xmm0, %xmm0
; CHECK-NEXT: vpexpandb (%rdi), %ymm0 {%k1}
; CHECK-NEXT: retq
%res = call <32 x i8> @llvm.x86.avx512.mask.expand.load.b.256(i8* %addr, <32 x i8> zeroinitializer, i32 %mask)
ret <32 x i8> %res
}
declare <32 x i8> @llvm.x86.avx512.mask.expand.load.b.256(i8* %addr, <32 x i8> %data, i32 %mask)
define <32 x i8> @test_expand_b_256(<32 x i8> %data) {
; CHECK-LABEL: test_expand_b_256:
; CHECK: ## %bb.0:
; CHECK-NEXT: retq
%res = call <32 x i8> @llvm.x86.avx512.mask.expand.b.256(<32 x i8> %data, <32 x i8> undef, i32 -1)
ret <32 x i8> %res
}
define <32 x i8> @test_mask_expand_b_256(<32 x i8> %data, <32 x i8> %passthru, i32 %mask) {
; CHECK-LABEL: test_mask_expand_b_256:
; CHECK: ## %bb.0:
; CHECK-NEXT: kmovd %edi, %k1
; CHECK-NEXT: vpexpandb %ymm0, %ymm1 {%k1}
; CHECK-NEXT: vmovdqa %ymm1, %ymm0
; CHECK-NEXT: retq
%res = call <32 x i8> @llvm.x86.avx512.mask.expand.b.256(<32 x i8> %data, <32 x i8> %passthru, i32 %mask)
ret <32 x i8> %res
}
define <32 x i8> @test_maskz_expand_b_256(<32 x i8> %data, i32 %mask) {
; CHECK-LABEL: test_maskz_expand_b_256:
; CHECK: ## %bb.0:
; CHECK-NEXT: kmovd %edi, %k1
; CHECK-NEXT: vpexpandb %ymm0, %ymm0 {%k1} {z}
; CHECK-NEXT: retq
%res = call <32 x i8> @llvm.x86.avx512.mask.expand.b.256(<32 x i8> %data, <32 x i8> zeroinitializer, i32 %mask)
ret <32 x i8> %res
}
declare <32 x i8> @llvm.x86.avx512.mask.expand.b.256(<32 x i8> %data, <32 x i8> %src0, i32 %mask)
define <32 x i8> @test_expand_load_b_256(i8* %addr, <32 x i8> %data) {
; CHECK-LABEL: test_expand_load_b_256:
; CHECK: ## %bb.0:
; CHECK-NEXT: vmovups (%rdi), %ymm0
; CHECK-NEXT: retq
%res = call <32 x i8> @llvm.x86.avx512.mask.expand.load.b.256(i8* %addr, <32 x i8> %data, i32 -1)
ret <32 x i8> %res
}
define void @test_mask_compress_store_w_256(i8* %addr, <16 x i16> %data, i16 %mask) {
; CHECK-LABEL: test_mask_compress_store_w_256:
; CHECK: ## %bb.0:
; CHECK-NEXT: kmovd %esi, %k1
; CHECK-NEXT: vpcompressw %ymm0, (%rdi) {%k1}
; CHECK-NEXT: retq
call void @llvm.x86.avx512.mask.compress.store.w.256(i8* %addr, <16 x i16> %data, i16 %mask)
ret void
}
declare void @llvm.x86.avx512.mask.compress.store.w.256(i8* %addr, <16 x i16> %data, i16 %mask)
define <16 x i16> @test_mask_compress_w_256(<16 x i16> %data, <16 x i16> %passthru, i16 %mask) {
; CHECK-LABEL: test_mask_compress_w_256:
; CHECK: ## %bb.0:
; CHECK-NEXT: kmovd %edi, %k1
; CHECK-NEXT: vpcompressw %ymm0, %ymm1 {%k1}
; CHECK-NEXT: vmovdqa %ymm1, %ymm0
; CHECK-NEXT: retq
%res = call <16 x i16> @llvm.x86.avx512.mask.compress.w.256(<16 x i16> %data, <16 x i16> %passthru, i16 %mask)
ret <16 x i16> %res
}
define <16 x i16> @test_maskz_compress_w_256(<16 x i16> %data, i16 %mask) {
; CHECK-LABEL: test_maskz_compress_w_256:
; CHECK: ## %bb.0:
; CHECK-NEXT: kmovd %edi, %k1
; CHECK-NEXT: vpcompressw %ymm0, %ymm0 {%k1} {z}
; CHECK-NEXT: retq
%res = call <16 x i16> @llvm.x86.avx512.mask.compress.w.256(<16 x i16> %data, <16 x i16> zeroinitializer, i16 %mask)
ret <16 x i16> %res
}
define <16 x i16> @test_compress_w_256(<16 x i16> %data) {
; CHECK-LABEL: test_compress_w_256:
; CHECK: ## %bb.0:
; CHECK-NEXT: retq
%res = call <16 x i16> @llvm.x86.avx512.mask.compress.w.256(<16 x i16> %data, <16 x i16> undef, i16 -1)
ret <16 x i16> %res
}
declare <16 x i16> @llvm.x86.avx512.mask.compress.w.256(<16 x i16> %data, <16 x i16> %src0, i16 %mask)
define void @test_compress_store_w_256(i8* %addr, <16 x i16> %data) {
; CHECK-LABEL: test_compress_store_w_256:
; CHECK: ## %bb.0:
; CHECK-NEXT: vmovups %ymm0, (%rdi)
; CHECK-NEXT: retq
call void @llvm.x86.avx512.mask.compress.store.w.256(i8* %addr, <16 x i16> %data, i16 -1)
ret void
}
define void @test_mask_compress_store_b_256(i8* %addr, <32 x i8> %data, i32 %mask) {
; CHECK-LABEL: test_mask_compress_store_b_256:
; CHECK: ## %bb.0:
; CHECK-NEXT: kmovd %esi, %k1
; CHECK-NEXT: vpcompressb %ymm0, (%rdi) {%k1}
; CHECK-NEXT: retq
call void @llvm.x86.avx512.mask.compress.store.b.256(i8* %addr, <32 x i8> %data, i32 %mask)
ret void
}
declare void @llvm.x86.avx512.mask.compress.store.b.256(i8* %addr, <32 x i8> %data, i32 %mask)
define <32 x i8> @test_mask_compress_b_256(<32 x i8> %data, <32 x i8> %passthru, i32 %mask) {
; CHECK-LABEL: test_mask_compress_b_256:
; CHECK: ## %bb.0:
; CHECK-NEXT: kmovd %edi, %k1
; CHECK-NEXT: vpcompressb %ymm0, %ymm1 {%k1}
; CHECK-NEXT: vmovdqa %ymm1, %ymm0
; CHECK-NEXT: retq
%res = call <32 x i8> @llvm.x86.avx512.mask.compress.b.256(<32 x i8> %data, <32 x i8> %passthru, i32 %mask)
ret <32 x i8> %res
}
define <32 x i8> @test_maskz_compress_b_256(<32 x i8> %data, i32 %mask) {
; CHECK-LABEL: test_maskz_compress_b_256:
; CHECK: ## %bb.0:
; CHECK-NEXT: kmovd %edi, %k1
; CHECK-NEXT: vpcompressb %ymm0, %ymm0 {%k1} {z}
; CHECK-NEXT: retq
%res = call <32 x i8> @llvm.x86.avx512.mask.compress.b.256(<32 x i8> %data, <32 x i8> zeroinitializer, i32 %mask)
ret <32 x i8> %res
}
define <32 x i8> @test_compress_b_256(<32 x i8> %data) {
; CHECK-LABEL: test_compress_b_256:
; CHECK: ## %bb.0:
; CHECK-NEXT: retq
%res = call <32 x i8> @llvm.x86.avx512.mask.compress.b.256(<32 x i8> %data, <32 x i8> undef, i32 -1)
ret <32 x i8> %res
}
declare <32 x i8> @llvm.x86.avx512.mask.compress.b.256(<32 x i8> %data, <32 x i8> %src0, i32 %mask)
define void @test_compress_store_b_256(i8* %addr, <32 x i8> %data) {
; CHECK-LABEL: test_compress_store_b_256:
; CHECK: ## %bb.0:
; CHECK-NEXT: vmovups %ymm0, (%rdi)
; CHECK-NEXT: retq
call void @llvm.x86.avx512.mask.compress.store.b.256(i8* %addr, <32 x i8> %data, i32 -1)
ret void
}
define <4 x i32>@test_int_x86_avx512_mask_vpshld_d_128(<4 x i32> %x0, <4 x i32> %x1,<4 x i32> %x3, i8 %x4) {
; CHECK-LABEL: test_int_x86_avx512_mask_vpshld_d_128:
; CHECK: ## %bb.0:

File diff suppressed because it is too large Load Diff