forked from OSchip/llvm-project
[X86][AVX] Add AVX512DQ coverage for masked memory ops tests (PR34584)
llvm-svn: 359395
This commit is contained in:
parent
8651edf898
commit
fed302ae37
|
@ -4,7 +4,8 @@
|
|||
; RUN: llc < %s -disable-peephole -mtriple=x86_64-apple-darwin -mattr=avx | FileCheck %s --check-prefixes=AVX,AVX1OR2,AVX1
|
||||
; RUN: llc < %s -disable-peephole -mtriple=x86_64-apple-darwin -mattr=avx2 | FileCheck %s --check-prefixes=AVX,AVX1OR2,AVX2
|
||||
; RUN: llc < %s -disable-peephole -mtriple=x86_64-apple-darwin -mattr=avx512f | FileCheck %s --check-prefixes=AVX,AVX512,AVX512F
|
||||
; RUN: llc < %s -disable-peephole -mtriple=x86_64-apple-darwin -mattr=avx512f,avx512bw,avx512vl | FileCheck %s --check-prefixes=AVX,AVX512,AVX512VLBW
|
||||
; RUN: llc < %s -disable-peephole -mtriple=x86_64-apple-darwin -mattr=avx512f,avx512dq,avx512vl | FileCheck %s --check-prefixes=AVX,AVX512,AVX512VL,AVX512VLDQ
|
||||
; RUN: llc < %s -disable-peephole -mtriple=x86_64-apple-darwin -mattr=avx512f,avx512bw,avx512vl | FileCheck %s --check-prefixes=AVX,AVX512,AVX512VL,AVX512VLBW
|
||||
|
||||
;
|
||||
; vXf64
|
||||
|
@ -266,6 +267,15 @@ define void @compressstore_v8f64_v8i1(double* %base, <8 x double> %V, <8 x i1> %
|
|||
; AVX512F-NEXT: vzeroupper
|
||||
; AVX512F-NEXT: retq
|
||||
;
|
||||
; AVX512VLDQ-LABEL: compressstore_v8f64_v8i1:
|
||||
; AVX512VLDQ: ## %bb.0:
|
||||
; AVX512VLDQ-NEXT: vpmovzxwd {{.*#+}} ymm1 = xmm1[0],zero,xmm1[1],zero,xmm1[2],zero,xmm1[3],zero,xmm1[4],zero,xmm1[5],zero,xmm1[6],zero,xmm1[7],zero
|
||||
; AVX512VLDQ-NEXT: vpslld $31, %ymm1, %ymm1
|
||||
; AVX512VLDQ-NEXT: vpmovd2m %ymm1, %k1
|
||||
; AVX512VLDQ-NEXT: vcompresspd %zmm0, (%rdi) {%k1}
|
||||
; AVX512VLDQ-NEXT: vzeroupper
|
||||
; AVX512VLDQ-NEXT: retq
|
||||
;
|
||||
; AVX512VLBW-LABEL: compressstore_v8f64_v8i1:
|
||||
; AVX512VLBW: ## %bb.0:
|
||||
; AVX512VLBW-NEXT: vpsllw $15, %xmm1, %xmm1
|
||||
|
@ -789,6 +799,33 @@ define void @compressstore_v16f64_v16i1(double* %base, <16 x double> %V, <16 x i
|
|||
; AVX512F-NEXT: vzeroupper
|
||||
; AVX512F-NEXT: retq
|
||||
;
|
||||
; AVX512VLDQ-LABEL: compressstore_v16f64_v16i1:
|
||||
; AVX512VLDQ: ## %bb.0:
|
||||
; AVX512VLDQ-NEXT: vpmovzxbd {{.*#+}} zmm2 = xmm2[0],zero,zero,zero,xmm2[1],zero,zero,zero,xmm2[2],zero,zero,zero,xmm2[3],zero,zero,zero,xmm2[4],zero,zero,zero,xmm2[5],zero,zero,zero,xmm2[6],zero,zero,zero,xmm2[7],zero,zero,zero,xmm2[8],zero,zero,zero,xmm2[9],zero,zero,zero,xmm2[10],zero,zero,zero,xmm2[11],zero,zero,zero,xmm2[12],zero,zero,zero,xmm2[13],zero,zero,zero,xmm2[14],zero,zero,zero,xmm2[15],zero,zero,zero
|
||||
; AVX512VLDQ-NEXT: vpslld $31, %zmm2, %zmm2
|
||||
; AVX512VLDQ-NEXT: vpmovd2m %zmm2, %k1
|
||||
; AVX512VLDQ-NEXT: kmovb %k1, %eax
|
||||
; AVX512VLDQ-NEXT: movl %eax, %ecx
|
||||
; AVX512VLDQ-NEXT: shrl %ecx
|
||||
; AVX512VLDQ-NEXT: andl $-43, %ecx
|
||||
; AVX512VLDQ-NEXT: subl %ecx, %eax
|
||||
; AVX512VLDQ-NEXT: movl %eax, %ecx
|
||||
; AVX512VLDQ-NEXT: andl $858993459, %ecx ## imm = 0x33333333
|
||||
; AVX512VLDQ-NEXT: shrl $2, %eax
|
||||
; AVX512VLDQ-NEXT: andl $858993459, %eax ## imm = 0x33333333
|
||||
; AVX512VLDQ-NEXT: addl %ecx, %eax
|
||||
; AVX512VLDQ-NEXT: movl %eax, %ecx
|
||||
; AVX512VLDQ-NEXT: shrl $4, %ecx
|
||||
; AVX512VLDQ-NEXT: addl %eax, %ecx
|
||||
; AVX512VLDQ-NEXT: andl $252645135, %ecx ## imm = 0xF0F0F0F
|
||||
; AVX512VLDQ-NEXT: imull $16843009, %ecx, %eax ## imm = 0x1010101
|
||||
; AVX512VLDQ-NEXT: shrl $24, %eax
|
||||
; AVX512VLDQ-NEXT: kshiftrw $8, %k1, %k2
|
||||
; AVX512VLDQ-NEXT: vcompresspd %zmm1, (%rdi,%rax,8) {%k2}
|
||||
; AVX512VLDQ-NEXT: vcompresspd %zmm0, (%rdi) {%k1}
|
||||
; AVX512VLDQ-NEXT: vzeroupper
|
||||
; AVX512VLDQ-NEXT: retq
|
||||
;
|
||||
; AVX512VLBW-LABEL: compressstore_v16f64_v16i1:
|
||||
; AVX512VLBW: ## %bb.0:
|
||||
; AVX512VLBW-NEXT: vpsllw $7, %xmm2, %xmm2
|
||||
|
@ -919,13 +956,13 @@ define void @compressstore_v2f32_v2i32(float* %base, <2 x float> %V, <2 x i32> %
|
|||
; AVX512F-NEXT: vzeroupper
|
||||
; AVX512F-NEXT: retq
|
||||
;
|
||||
; AVX512VLBW-LABEL: compressstore_v2f32_v2i32:
|
||||
; AVX512VLBW: ## %bb.0:
|
||||
; AVX512VLBW-NEXT: vpxor %xmm2, %xmm2, %xmm2
|
||||
; AVX512VLBW-NEXT: vpblendd {{.*#+}} xmm1 = xmm1[0],xmm2[1],xmm1[2],xmm2[3]
|
||||
; AVX512VLBW-NEXT: vptestnmq %xmm1, %xmm1, %k1
|
||||
; AVX512VLBW-NEXT: vcompressps %xmm0, (%rdi) {%k1}
|
||||
; AVX512VLBW-NEXT: retq
|
||||
; AVX512VL-LABEL: compressstore_v2f32_v2i32:
|
||||
; AVX512VL: ## %bb.0:
|
||||
; AVX512VL-NEXT: vpxor %xmm2, %xmm2, %xmm2
|
||||
; AVX512VL-NEXT: vpblendd {{.*#+}} xmm1 = xmm1[0],xmm2[1],xmm1[2],xmm2[3]
|
||||
; AVX512VL-NEXT: vptestnmq %xmm1, %xmm1, %k1
|
||||
; AVX512VL-NEXT: vcompressps %xmm0, (%rdi) {%k1}
|
||||
; AVX512VL-NEXT: retq
|
||||
%mask = icmp eq <2 x i32> %trigger, zeroinitializer
|
||||
call void @llvm.masked.compressstore.v2f32(<2 x float> %V, float* %base, <2 x i1> %mask)
|
||||
ret void
|
||||
|
@ -1041,6 +1078,13 @@ define void @compressstore_v4f32_v4i1(float* %base, <4 x float> %V, <4 x i1> %ma
|
|||
; AVX512F-NEXT: vzeroupper
|
||||
; AVX512F-NEXT: retq
|
||||
;
|
||||
; AVX512VLDQ-LABEL: compressstore_v4f32_v4i1:
|
||||
; AVX512VLDQ: ## %bb.0:
|
||||
; AVX512VLDQ-NEXT: vpslld $31, %xmm1, %xmm1
|
||||
; AVX512VLDQ-NEXT: vpmovd2m %xmm1, %k1
|
||||
; AVX512VLDQ-NEXT: vcompressps %xmm0, (%rdi) {%k1}
|
||||
; AVX512VLDQ-NEXT: retq
|
||||
;
|
||||
; AVX512VLBW-LABEL: compressstore_v4f32_v4i1:
|
||||
; AVX512VLBW: ## %bb.0:
|
||||
; AVX512VLBW-NEXT: vpslld $31, %xmm1, %xmm1
|
||||
|
@ -1254,6 +1298,15 @@ define void @compressstore_v8f32_v8i1(float* %base, <8 x float> %V, <8 x i1> %ma
|
|||
; AVX512F-NEXT: vzeroupper
|
||||
; AVX512F-NEXT: retq
|
||||
;
|
||||
; AVX512VLDQ-LABEL: compressstore_v8f32_v8i1:
|
||||
; AVX512VLDQ: ## %bb.0:
|
||||
; AVX512VLDQ-NEXT: vpmovzxwd {{.*#+}} ymm1 = xmm1[0],zero,xmm1[1],zero,xmm1[2],zero,xmm1[3],zero,xmm1[4],zero,xmm1[5],zero,xmm1[6],zero,xmm1[7],zero
|
||||
; AVX512VLDQ-NEXT: vpslld $31, %ymm1, %ymm1
|
||||
; AVX512VLDQ-NEXT: vpmovd2m %ymm1, %k1
|
||||
; AVX512VLDQ-NEXT: vcompressps %ymm0, (%rdi) {%k1}
|
||||
; AVX512VLDQ-NEXT: vzeroupper
|
||||
; AVX512VLDQ-NEXT: retq
|
||||
;
|
||||
; AVX512VLBW-LABEL: compressstore_v8f32_v8i1:
|
||||
; AVX512VLBW: ## %bb.0:
|
||||
; AVX512VLBW-NEXT: vpsllw $15, %xmm1, %xmm1
|
||||
|
@ -1347,6 +1400,14 @@ define void @compressstore_v16f32_const(float* %base, <16 x float> %V) {
|
|||
; AVX512F-NEXT: vzeroupper
|
||||
; AVX512F-NEXT: retq
|
||||
;
|
||||
; AVX512VLDQ-LABEL: compressstore_v16f32_const:
|
||||
; AVX512VLDQ: ## %bb.0:
|
||||
; AVX512VLDQ-NEXT: movw $-2049, %ax ## imm = 0xF7FF
|
||||
; AVX512VLDQ-NEXT: kmovw %eax, %k1
|
||||
; AVX512VLDQ-NEXT: vcompressps %zmm0, (%rdi) {%k1}
|
||||
; AVX512VLDQ-NEXT: vzeroupper
|
||||
; AVX512VLDQ-NEXT: retq
|
||||
;
|
||||
; AVX512VLBW-LABEL: compressstore_v16f32_const:
|
||||
; AVX512VLBW: ## %bb.0:
|
||||
; AVX512VLBW-NEXT: movw $-2049, %ax ## imm = 0xF7FF
|
||||
|
@ -2730,6 +2791,13 @@ define void @compressstore_v2i64_v2i1(i64* %base, <2 x i64> %V, <2 x i1> %mask)
|
|||
; AVX512F-NEXT: vzeroupper
|
||||
; AVX512F-NEXT: retq
|
||||
;
|
||||
; AVX512VLDQ-LABEL: compressstore_v2i64_v2i1:
|
||||
; AVX512VLDQ: ## %bb.0:
|
||||
; AVX512VLDQ-NEXT: vpsllq $63, %xmm1, %xmm1
|
||||
; AVX512VLDQ-NEXT: vpmovq2m %xmm1, %k1
|
||||
; AVX512VLDQ-NEXT: vpcompressq %xmm0, (%rdi) {%k1}
|
||||
; AVX512VLDQ-NEXT: retq
|
||||
;
|
||||
; AVX512VLBW-LABEL: compressstore_v2i64_v2i1:
|
||||
; AVX512VLBW: ## %bb.0:
|
||||
; AVX512VLBW-NEXT: vpsllq $63, %xmm1, %xmm1
|
||||
|
@ -2884,6 +2952,14 @@ define void @compressstore_v4i64_v4i1(i64* %base, <4 x i64> %V, <4 x i1> %mask)
|
|||
; AVX512F-NEXT: vzeroupper
|
||||
; AVX512F-NEXT: retq
|
||||
;
|
||||
; AVX512VLDQ-LABEL: compressstore_v4i64_v4i1:
|
||||
; AVX512VLDQ: ## %bb.0:
|
||||
; AVX512VLDQ-NEXT: vpslld $31, %xmm1, %xmm1
|
||||
; AVX512VLDQ-NEXT: vpmovd2m %xmm1, %k1
|
||||
; AVX512VLDQ-NEXT: vpcompressq %ymm0, (%rdi) {%k1}
|
||||
; AVX512VLDQ-NEXT: vzeroupper
|
||||
; AVX512VLDQ-NEXT: retq
|
||||
;
|
||||
; AVX512VLBW-LABEL: compressstore_v4i64_v4i1:
|
||||
; AVX512VLBW: ## %bb.0:
|
||||
; AVX512VLBW-NEXT: vpslld $31, %xmm1, %xmm1
|
||||
|
@ -3155,6 +3231,15 @@ define void @compressstore_v8i64_v8i1(i64* %base, <8 x i64> %V, <8 x i1> %mask)
|
|||
; AVX512F-NEXT: vzeroupper
|
||||
; AVX512F-NEXT: retq
|
||||
;
|
||||
; AVX512VLDQ-LABEL: compressstore_v8i64_v8i1:
|
||||
; AVX512VLDQ: ## %bb.0:
|
||||
; AVX512VLDQ-NEXT: vpmovzxwd {{.*#+}} ymm1 = xmm1[0],zero,xmm1[1],zero,xmm1[2],zero,xmm1[3],zero,xmm1[4],zero,xmm1[5],zero,xmm1[6],zero,xmm1[7],zero
|
||||
; AVX512VLDQ-NEXT: vpslld $31, %ymm1, %ymm1
|
||||
; AVX512VLDQ-NEXT: vpmovd2m %ymm1, %k1
|
||||
; AVX512VLDQ-NEXT: vpcompressq %zmm0, (%rdi) {%k1}
|
||||
; AVX512VLDQ-NEXT: vzeroupper
|
||||
; AVX512VLDQ-NEXT: retq
|
||||
;
|
||||
; AVX512VLBW-LABEL: compressstore_v8i64_v8i1:
|
||||
; AVX512VLBW: ## %bb.0:
|
||||
; AVX512VLBW-NEXT: vpsllw $15, %xmm1, %xmm1
|
||||
|
@ -3290,11 +3375,11 @@ define void @compressstore_v4i32_v4i32(i32* %base, <4 x i32> %V, <4 x i32> %trig
|
|||
; AVX512F-NEXT: vzeroupper
|
||||
; AVX512F-NEXT: retq
|
||||
;
|
||||
; AVX512VLBW-LABEL: compressstore_v4i32_v4i32:
|
||||
; AVX512VLBW: ## %bb.0:
|
||||
; AVX512VLBW-NEXT: vptestnmd %xmm1, %xmm1, %k1
|
||||
; AVX512VLBW-NEXT: vpcompressd %xmm0, (%rdi) {%k1}
|
||||
; AVX512VLBW-NEXT: retq
|
||||
; AVX512VL-LABEL: compressstore_v4i32_v4i32:
|
||||
; AVX512VL: ## %bb.0:
|
||||
; AVX512VL-NEXT: vptestnmd %xmm1, %xmm1, %k1
|
||||
; AVX512VL-NEXT: vpcompressd %xmm0, (%rdi) {%k1}
|
||||
; AVX512VL-NEXT: retq
|
||||
%mask = icmp eq <4 x i32> %trigger, zeroinitializer
|
||||
call void @llvm.masked.compressstore.v4i32(<4 x i32> %V, i32* %base, <4 x i1> %mask)
|
||||
ret void
|
||||
|
@ -3597,6 +3682,89 @@ define void @compressstore_v8i16_v8i16(i16* %base, <8 x i16> %V, <8 x i16> %trig
|
|||
; AVX512F-NEXT: vzeroupper
|
||||
; AVX512F-NEXT: retq
|
||||
;
|
||||
; AVX512VLDQ-LABEL: compressstore_v8i16_v8i16:
|
||||
; AVX512VLDQ: ## %bb.0:
|
||||
; AVX512VLDQ-NEXT: vpxor %xmm2, %xmm2, %xmm2
|
||||
; AVX512VLDQ-NEXT: vpcmpeqw %xmm2, %xmm1, %xmm2
|
||||
; AVX512VLDQ-NEXT: vpmovsxwd %xmm2, %ymm2
|
||||
; AVX512VLDQ-NEXT: vpmovd2m %ymm2, %k0
|
||||
; AVX512VLDQ-NEXT: kmovw %k0, %eax
|
||||
; AVX512VLDQ-NEXT: testb $1, %al
|
||||
; AVX512VLDQ-NEXT: je LBB11_2
|
||||
; AVX512VLDQ-NEXT: ## %bb.1: ## %cond.store
|
||||
; AVX512VLDQ-NEXT: vpextrw $0, %xmm0, (%rdi)
|
||||
; AVX512VLDQ-NEXT: addq $2, %rdi
|
||||
; AVX512VLDQ-NEXT: LBB11_2: ## %else
|
||||
; AVX512VLDQ-NEXT: kshiftrb $1, %k0, %k0
|
||||
; AVX512VLDQ-NEXT: kmovw %k0, %eax
|
||||
; AVX512VLDQ-NEXT: testb $1, %al
|
||||
; AVX512VLDQ-NEXT: je LBB11_4
|
||||
; AVX512VLDQ-NEXT: ## %bb.3: ## %cond.store1
|
||||
; AVX512VLDQ-NEXT: vpextrw $1, %xmm0, (%rdi)
|
||||
; AVX512VLDQ-NEXT: addq $2, %rdi
|
||||
; AVX512VLDQ-NEXT: LBB11_4: ## %else2
|
||||
; AVX512VLDQ-NEXT: vpxor %xmm2, %xmm2, %xmm2
|
||||
; AVX512VLDQ-NEXT: vpcmpeqw %xmm2, %xmm1, %xmm2
|
||||
; AVX512VLDQ-NEXT: vpmovsxwd %xmm2, %ymm2
|
||||
; AVX512VLDQ-NEXT: vpmovd2m %ymm2, %k0
|
||||
; AVX512VLDQ-NEXT: kshiftrb $2, %k0, %k1
|
||||
; AVX512VLDQ-NEXT: kmovw %k1, %eax
|
||||
; AVX512VLDQ-NEXT: testb $1, %al
|
||||
; AVX512VLDQ-NEXT: je LBB11_6
|
||||
; AVX512VLDQ-NEXT: ## %bb.5: ## %cond.store4
|
||||
; AVX512VLDQ-NEXT: vpextrw $2, %xmm0, (%rdi)
|
||||
; AVX512VLDQ-NEXT: addq $2, %rdi
|
||||
; AVX512VLDQ-NEXT: LBB11_6: ## %else5
|
||||
; AVX512VLDQ-NEXT: kshiftrb $3, %k0, %k0
|
||||
; AVX512VLDQ-NEXT: kmovw %k0, %eax
|
||||
; AVX512VLDQ-NEXT: testb $1, %al
|
||||
; AVX512VLDQ-NEXT: je LBB11_8
|
||||
; AVX512VLDQ-NEXT: ## %bb.7: ## %cond.store7
|
||||
; AVX512VLDQ-NEXT: vpextrw $3, %xmm0, (%rdi)
|
||||
; AVX512VLDQ-NEXT: addq $2, %rdi
|
||||
; AVX512VLDQ-NEXT: LBB11_8: ## %else8
|
||||
; AVX512VLDQ-NEXT: vpxor %xmm2, %xmm2, %xmm2
|
||||
; AVX512VLDQ-NEXT: vpcmpeqw %xmm2, %xmm1, %xmm2
|
||||
; AVX512VLDQ-NEXT: vpmovsxwd %xmm2, %ymm2
|
||||
; AVX512VLDQ-NEXT: vpmovd2m %ymm2, %k0
|
||||
; AVX512VLDQ-NEXT: kshiftrb $4, %k0, %k1
|
||||
; AVX512VLDQ-NEXT: kmovw %k1, %eax
|
||||
; AVX512VLDQ-NEXT: testb $1, %al
|
||||
; AVX512VLDQ-NEXT: je LBB11_10
|
||||
; AVX512VLDQ-NEXT: ## %bb.9: ## %cond.store10
|
||||
; AVX512VLDQ-NEXT: vpextrw $4, %xmm0, (%rdi)
|
||||
; AVX512VLDQ-NEXT: addq $2, %rdi
|
||||
; AVX512VLDQ-NEXT: LBB11_10: ## %else11
|
||||
; AVX512VLDQ-NEXT: kshiftrb $5, %k0, %k0
|
||||
; AVX512VLDQ-NEXT: kmovw %k0, %eax
|
||||
; AVX512VLDQ-NEXT: testb $1, %al
|
||||
; AVX512VLDQ-NEXT: je LBB11_12
|
||||
; AVX512VLDQ-NEXT: ## %bb.11: ## %cond.store13
|
||||
; AVX512VLDQ-NEXT: vpextrw $5, %xmm0, (%rdi)
|
||||
; AVX512VLDQ-NEXT: addq $2, %rdi
|
||||
; AVX512VLDQ-NEXT: LBB11_12: ## %else14
|
||||
; AVX512VLDQ-NEXT: vpxor %xmm2, %xmm2, %xmm2
|
||||
; AVX512VLDQ-NEXT: vpcmpeqw %xmm2, %xmm1, %xmm1
|
||||
; AVX512VLDQ-NEXT: vpmovsxwd %xmm1, %ymm1
|
||||
; AVX512VLDQ-NEXT: vpmovd2m %ymm1, %k0
|
||||
; AVX512VLDQ-NEXT: kshiftrb $6, %k0, %k1
|
||||
; AVX512VLDQ-NEXT: kmovw %k1, %eax
|
||||
; AVX512VLDQ-NEXT: testb $1, %al
|
||||
; AVX512VLDQ-NEXT: je LBB11_14
|
||||
; AVX512VLDQ-NEXT: ## %bb.13: ## %cond.store16
|
||||
; AVX512VLDQ-NEXT: vpextrw $6, %xmm0, (%rdi)
|
||||
; AVX512VLDQ-NEXT: addq $2, %rdi
|
||||
; AVX512VLDQ-NEXT: LBB11_14: ## %else17
|
||||
; AVX512VLDQ-NEXT: kshiftrb $7, %k0, %k0
|
||||
; AVX512VLDQ-NEXT: kmovw %k0, %eax
|
||||
; AVX512VLDQ-NEXT: testb $1, %al
|
||||
; AVX512VLDQ-NEXT: je LBB11_16
|
||||
; AVX512VLDQ-NEXT: ## %bb.15: ## %cond.store19
|
||||
; AVX512VLDQ-NEXT: vpextrw $7, %xmm0, (%rdi)
|
||||
; AVX512VLDQ-NEXT: LBB11_16: ## %else20
|
||||
; AVX512VLDQ-NEXT: vzeroupper
|
||||
; AVX512VLDQ-NEXT: retq
|
||||
;
|
||||
; AVX512VLBW-LABEL: compressstore_v8i16_v8i16:
|
||||
; AVX512VLBW: ## %bb.0:
|
||||
; AVX512VLBW-NEXT: vptestnmw %xmm1, %xmm1, %k0
|
||||
|
@ -4249,6 +4417,169 @@ define void @compressstore_v16i8_v16i8(i8* %base, <16 x i8> %V, <16 x i8> %trigg
|
|||
; AVX512F-NEXT: vzeroupper
|
||||
; AVX512F-NEXT: retq
|
||||
;
|
||||
; AVX512VLDQ-LABEL: compressstore_v16i8_v16i8:
|
||||
; AVX512VLDQ: ## %bb.0:
|
||||
; AVX512VLDQ-NEXT: vpxor %xmm2, %xmm2, %xmm2
|
||||
; AVX512VLDQ-NEXT: vpcmpeqb %xmm2, %xmm1, %xmm2
|
||||
; AVX512VLDQ-NEXT: vpmovsxbd %xmm2, %zmm2
|
||||
; AVX512VLDQ-NEXT: vpmovd2m %zmm2, %k0
|
||||
; AVX512VLDQ-NEXT: kmovw %k0, %eax
|
||||
; AVX512VLDQ-NEXT: testb $1, %al
|
||||
; AVX512VLDQ-NEXT: je LBB12_2
|
||||
; AVX512VLDQ-NEXT: ## %bb.1: ## %cond.store
|
||||
; AVX512VLDQ-NEXT: vpextrb $0, %xmm0, (%rdi)
|
||||
; AVX512VLDQ-NEXT: incq %rdi
|
||||
; AVX512VLDQ-NEXT: LBB12_2: ## %else
|
||||
; AVX512VLDQ-NEXT: kshiftrw $1, %k0, %k0
|
||||
; AVX512VLDQ-NEXT: kmovw %k0, %eax
|
||||
; AVX512VLDQ-NEXT: testb $1, %al
|
||||
; AVX512VLDQ-NEXT: je LBB12_4
|
||||
; AVX512VLDQ-NEXT: ## %bb.3: ## %cond.store1
|
||||
; AVX512VLDQ-NEXT: vpextrb $1, %xmm0, (%rdi)
|
||||
; AVX512VLDQ-NEXT: incq %rdi
|
||||
; AVX512VLDQ-NEXT: LBB12_4: ## %else2
|
||||
; AVX512VLDQ-NEXT: vpxor %xmm2, %xmm2, %xmm2
|
||||
; AVX512VLDQ-NEXT: vpcmpeqb %xmm2, %xmm1, %xmm2
|
||||
; AVX512VLDQ-NEXT: vpmovsxbd %xmm2, %zmm2
|
||||
; AVX512VLDQ-NEXT: vpmovd2m %zmm2, %k0
|
||||
; AVX512VLDQ-NEXT: kshiftrw $2, %k0, %k1
|
||||
; AVX512VLDQ-NEXT: kmovw %k1, %eax
|
||||
; AVX512VLDQ-NEXT: testb $1, %al
|
||||
; AVX512VLDQ-NEXT: je LBB12_6
|
||||
; AVX512VLDQ-NEXT: ## %bb.5: ## %cond.store4
|
||||
; AVX512VLDQ-NEXT: vpextrb $2, %xmm0, (%rdi)
|
||||
; AVX512VLDQ-NEXT: incq %rdi
|
||||
; AVX512VLDQ-NEXT: LBB12_6: ## %else5
|
||||
; AVX512VLDQ-NEXT: kshiftrw $3, %k0, %k0
|
||||
; AVX512VLDQ-NEXT: kmovw %k0, %eax
|
||||
; AVX512VLDQ-NEXT: testb $1, %al
|
||||
; AVX512VLDQ-NEXT: je LBB12_8
|
||||
; AVX512VLDQ-NEXT: ## %bb.7: ## %cond.store7
|
||||
; AVX512VLDQ-NEXT: vpextrb $3, %xmm0, (%rdi)
|
||||
; AVX512VLDQ-NEXT: incq %rdi
|
||||
; AVX512VLDQ-NEXT: LBB12_8: ## %else8
|
||||
; AVX512VLDQ-NEXT: vpxor %xmm2, %xmm2, %xmm2
|
||||
; AVX512VLDQ-NEXT: vpcmpeqb %xmm2, %xmm1, %xmm2
|
||||
; AVX512VLDQ-NEXT: vpmovsxbd %xmm2, %zmm2
|
||||
; AVX512VLDQ-NEXT: vpmovd2m %zmm2, %k0
|
||||
; AVX512VLDQ-NEXT: kshiftrw $4, %k0, %k1
|
||||
; AVX512VLDQ-NEXT: kmovw %k1, %eax
|
||||
; AVX512VLDQ-NEXT: testb $1, %al
|
||||
; AVX512VLDQ-NEXT: je LBB12_10
|
||||
; AVX512VLDQ-NEXT: ## %bb.9: ## %cond.store10
|
||||
; AVX512VLDQ-NEXT: vpextrb $4, %xmm0, (%rdi)
|
||||
; AVX512VLDQ-NEXT: incq %rdi
|
||||
; AVX512VLDQ-NEXT: LBB12_10: ## %else11
|
||||
; AVX512VLDQ-NEXT: kshiftrw $5, %k0, %k0
|
||||
; AVX512VLDQ-NEXT: kmovw %k0, %eax
|
||||
; AVX512VLDQ-NEXT: testb $1, %al
|
||||
; AVX512VLDQ-NEXT: je LBB12_12
|
||||
; AVX512VLDQ-NEXT: ## %bb.11: ## %cond.store13
|
||||
; AVX512VLDQ-NEXT: vpextrb $5, %xmm0, (%rdi)
|
||||
; AVX512VLDQ-NEXT: incq %rdi
|
||||
; AVX512VLDQ-NEXT: LBB12_12: ## %else14
|
||||
; AVX512VLDQ-NEXT: vpxor %xmm2, %xmm2, %xmm2
|
||||
; AVX512VLDQ-NEXT: vpcmpeqb %xmm2, %xmm1, %xmm2
|
||||
; AVX512VLDQ-NEXT: vpmovsxbd %xmm2, %zmm2
|
||||
; AVX512VLDQ-NEXT: vpmovd2m %zmm2, %k0
|
||||
; AVX512VLDQ-NEXT: kshiftrw $6, %k0, %k1
|
||||
; AVX512VLDQ-NEXT: kmovw %k1, %eax
|
||||
; AVX512VLDQ-NEXT: testb $1, %al
|
||||
; AVX512VLDQ-NEXT: je LBB12_14
|
||||
; AVX512VLDQ-NEXT: ## %bb.13: ## %cond.store16
|
||||
; AVX512VLDQ-NEXT: vpextrb $6, %xmm0, (%rdi)
|
||||
; AVX512VLDQ-NEXT: incq %rdi
|
||||
; AVX512VLDQ-NEXT: LBB12_14: ## %else17
|
||||
; AVX512VLDQ-NEXT: kshiftrw $7, %k0, %k0
|
||||
; AVX512VLDQ-NEXT: kmovw %k0, %eax
|
||||
; AVX512VLDQ-NEXT: testb $1, %al
|
||||
; AVX512VLDQ-NEXT: je LBB12_16
|
||||
; AVX512VLDQ-NEXT: ## %bb.15: ## %cond.store19
|
||||
; AVX512VLDQ-NEXT: vpextrb $7, %xmm0, (%rdi)
|
||||
; AVX512VLDQ-NEXT: incq %rdi
|
||||
; AVX512VLDQ-NEXT: LBB12_16: ## %else20
|
||||
; AVX512VLDQ-NEXT: vpxor %xmm2, %xmm2, %xmm2
|
||||
; AVX512VLDQ-NEXT: vpcmpeqb %xmm2, %xmm1, %xmm2
|
||||
; AVX512VLDQ-NEXT: vpmovsxbd %xmm2, %zmm2
|
||||
; AVX512VLDQ-NEXT: vpmovd2m %zmm2, %k0
|
||||
; AVX512VLDQ-NEXT: kshiftrw $8, %k0, %k1
|
||||
; AVX512VLDQ-NEXT: kmovw %k1, %eax
|
||||
; AVX512VLDQ-NEXT: testb $1, %al
|
||||
; AVX512VLDQ-NEXT: je LBB12_18
|
||||
; AVX512VLDQ-NEXT: ## %bb.17: ## %cond.store22
|
||||
; AVX512VLDQ-NEXT: vpextrb $8, %xmm0, (%rdi)
|
||||
; AVX512VLDQ-NEXT: incq %rdi
|
||||
; AVX512VLDQ-NEXT: LBB12_18: ## %else23
|
||||
; AVX512VLDQ-NEXT: kshiftrw $9, %k0, %k0
|
||||
; AVX512VLDQ-NEXT: kmovw %k0, %eax
|
||||
; AVX512VLDQ-NEXT: testb $1, %al
|
||||
; AVX512VLDQ-NEXT: je LBB12_20
|
||||
; AVX512VLDQ-NEXT: ## %bb.19: ## %cond.store25
|
||||
; AVX512VLDQ-NEXT: vpextrb $9, %xmm0, (%rdi)
|
||||
; AVX512VLDQ-NEXT: incq %rdi
|
||||
; AVX512VLDQ-NEXT: LBB12_20: ## %else26
|
||||
; AVX512VLDQ-NEXT: vpxor %xmm2, %xmm2, %xmm2
|
||||
; AVX512VLDQ-NEXT: vpcmpeqb %xmm2, %xmm1, %xmm2
|
||||
; AVX512VLDQ-NEXT: vpmovsxbd %xmm2, %zmm2
|
||||
; AVX512VLDQ-NEXT: vpmovd2m %zmm2, %k0
|
||||
; AVX512VLDQ-NEXT: kshiftrw $10, %k0, %k1
|
||||
; AVX512VLDQ-NEXT: kmovw %k1, %eax
|
||||
; AVX512VLDQ-NEXT: testb $1, %al
|
||||
; AVX512VLDQ-NEXT: je LBB12_22
|
||||
; AVX512VLDQ-NEXT: ## %bb.21: ## %cond.store28
|
||||
; AVX512VLDQ-NEXT: vpextrb $10, %xmm0, (%rdi)
|
||||
; AVX512VLDQ-NEXT: incq %rdi
|
||||
; AVX512VLDQ-NEXT: LBB12_22: ## %else29
|
||||
; AVX512VLDQ-NEXT: kshiftrw $11, %k0, %k0
|
||||
; AVX512VLDQ-NEXT: kmovw %k0, %eax
|
||||
; AVX512VLDQ-NEXT: testb $1, %al
|
||||
; AVX512VLDQ-NEXT: je LBB12_24
|
||||
; AVX512VLDQ-NEXT: ## %bb.23: ## %cond.store31
|
||||
; AVX512VLDQ-NEXT: vpextrb $11, %xmm0, (%rdi)
|
||||
; AVX512VLDQ-NEXT: incq %rdi
|
||||
; AVX512VLDQ-NEXT: LBB12_24: ## %else32
|
||||
; AVX512VLDQ-NEXT: vpxor %xmm2, %xmm2, %xmm2
|
||||
; AVX512VLDQ-NEXT: vpcmpeqb %xmm2, %xmm1, %xmm2
|
||||
; AVX512VLDQ-NEXT: vpmovsxbd %xmm2, %zmm2
|
||||
; AVX512VLDQ-NEXT: vpmovd2m %zmm2, %k0
|
||||
; AVX512VLDQ-NEXT: kshiftrw $12, %k0, %k1
|
||||
; AVX512VLDQ-NEXT: kmovw %k1, %eax
|
||||
; AVX512VLDQ-NEXT: testb $1, %al
|
||||
; AVX512VLDQ-NEXT: je LBB12_26
|
||||
; AVX512VLDQ-NEXT: ## %bb.25: ## %cond.store34
|
||||
; AVX512VLDQ-NEXT: vpextrb $12, %xmm0, (%rdi)
|
||||
; AVX512VLDQ-NEXT: incq %rdi
|
||||
; AVX512VLDQ-NEXT: LBB12_26: ## %else35
|
||||
; AVX512VLDQ-NEXT: kshiftrw $13, %k0, %k0
|
||||
; AVX512VLDQ-NEXT: kmovw %k0, %eax
|
||||
; AVX512VLDQ-NEXT: testb $1, %al
|
||||
; AVX512VLDQ-NEXT: je LBB12_28
|
||||
; AVX512VLDQ-NEXT: ## %bb.27: ## %cond.store37
|
||||
; AVX512VLDQ-NEXT: vpextrb $13, %xmm0, (%rdi)
|
||||
; AVX512VLDQ-NEXT: incq %rdi
|
||||
; AVX512VLDQ-NEXT: LBB12_28: ## %else38
|
||||
; AVX512VLDQ-NEXT: vpxor %xmm2, %xmm2, %xmm2
|
||||
; AVX512VLDQ-NEXT: vpcmpeqb %xmm2, %xmm1, %xmm1
|
||||
; AVX512VLDQ-NEXT: vpmovsxbd %xmm1, %zmm1
|
||||
; AVX512VLDQ-NEXT: vpmovd2m %zmm1, %k0
|
||||
; AVX512VLDQ-NEXT: kshiftrw $14, %k0, %k1
|
||||
; AVX512VLDQ-NEXT: kmovw %k1, %eax
|
||||
; AVX512VLDQ-NEXT: testb $1, %al
|
||||
; AVX512VLDQ-NEXT: je LBB12_30
|
||||
; AVX512VLDQ-NEXT: ## %bb.29: ## %cond.store40
|
||||
; AVX512VLDQ-NEXT: vpextrb $14, %xmm0, (%rdi)
|
||||
; AVX512VLDQ-NEXT: incq %rdi
|
||||
; AVX512VLDQ-NEXT: LBB12_30: ## %else41
|
||||
; AVX512VLDQ-NEXT: kshiftrw $15, %k0, %k0
|
||||
; AVX512VLDQ-NEXT: kmovw %k0, %eax
|
||||
; AVX512VLDQ-NEXT: testb $1, %al
|
||||
; AVX512VLDQ-NEXT: je LBB12_32
|
||||
; AVX512VLDQ-NEXT: ## %bb.31: ## %cond.store43
|
||||
; AVX512VLDQ-NEXT: vpextrb $15, %xmm0, (%rdi)
|
||||
; AVX512VLDQ-NEXT: LBB12_32: ## %else44
|
||||
; AVX512VLDQ-NEXT: vzeroupper
|
||||
; AVX512VLDQ-NEXT: retq
|
||||
;
|
||||
; AVX512VLBW-LABEL: compressstore_v16i8_v16i8:
|
||||
; AVX512VLBW: ## %bb.0:
|
||||
; AVX512VLBW-NEXT: vptestnmb %xmm1, %xmm1, %k0
|
||||
|
|
|
@ -4,7 +4,8 @@
|
|||
; RUN: llc < %s -disable-peephole -mtriple=x86_64-apple-darwin -mattr=avx | FileCheck %s --check-prefixes=AVX,AVX1OR2,AVX1
|
||||
; RUN: llc < %s -disable-peephole -mtriple=x86_64-apple-darwin -mattr=avx2 | FileCheck %s --check-prefixes=AVX,AVX1OR2,AVX2
|
||||
; RUN: llc < %s -disable-peephole -mtriple=x86_64-apple-darwin -mattr=avx512f | FileCheck %s --check-prefixes=AVX,AVX512,AVX512F
|
||||
; RUN: llc < %s -disable-peephole -mtriple=x86_64-apple-darwin -mattr=avx512f,avx512bw,avx512vl | FileCheck %s --check-prefixes=AVX,AVX512,AVX512VLBW
|
||||
; RUN: llc < %s -disable-peephole -mtriple=x86_64-apple-darwin -mattr=avx512f,avx512dq,avx512vl | FileCheck %s --check-prefixes=AVX,AVX512,AVX512VL,AVX512VLDQ
|
||||
; RUN: llc < %s -disable-peephole -mtriple=x86_64-apple-darwin -mattr=avx512f,avx512bw,avx512vl | FileCheck %s --check-prefixes=AVX,AVX512,AVX512VL,AVX512VLBW
|
||||
|
||||
;
|
||||
; vXf64
|
||||
|
@ -82,11 +83,11 @@ define <2 x double> @expandload_v2f64_v2i64(double* %base, <2 x double> %src0, <
|
|||
; AVX512F-NEXT: vzeroupper
|
||||
; AVX512F-NEXT: retq
|
||||
;
|
||||
; AVX512VLBW-LABEL: expandload_v2f64_v2i64:
|
||||
; AVX512VLBW: ## %bb.0:
|
||||
; AVX512VLBW-NEXT: vptestnmq %xmm1, %xmm1, %k1
|
||||
; AVX512VLBW-NEXT: vexpandpd (%rdi), %xmm0 {%k1}
|
||||
; AVX512VLBW-NEXT: retq
|
||||
; AVX512VL-LABEL: expandload_v2f64_v2i64:
|
||||
; AVX512VL: ## %bb.0:
|
||||
; AVX512VL-NEXT: vptestnmq %xmm1, %xmm1, %k1
|
||||
; AVX512VL-NEXT: vexpandpd (%rdi), %xmm0 {%k1}
|
||||
; AVX512VL-NEXT: retq
|
||||
%mask = icmp eq <2 x i64> %trigger, zeroinitializer
|
||||
%res = call <2 x double> @llvm.masked.expandload.v2f64(double* %base, <2 x i1> %mask, <2 x double> %src0)
|
||||
ret <2 x double>%res
|
||||
|
@ -263,11 +264,11 @@ define <4 x double> @expandload_v4f64_v4i64(double* %base, <4 x double> %src0, <
|
|||
; AVX512F-NEXT: ## kill: def $ymm0 killed $ymm0 killed $zmm0
|
||||
; AVX512F-NEXT: retq
|
||||
;
|
||||
; AVX512VLBW-LABEL: expandload_v4f64_v4i64:
|
||||
; AVX512VLBW: ## %bb.0:
|
||||
; AVX512VLBW-NEXT: vptestnmq %ymm1, %ymm1, %k1
|
||||
; AVX512VLBW-NEXT: vexpandpd (%rdi), %ymm0 {%k1}
|
||||
; AVX512VLBW-NEXT: retq
|
||||
; AVX512VL-LABEL: expandload_v4f64_v4i64:
|
||||
; AVX512VL: ## %bb.0:
|
||||
; AVX512VL-NEXT: vptestnmq %ymm1, %ymm1, %k1
|
||||
; AVX512VL-NEXT: vexpandpd (%rdi), %ymm0 {%k1}
|
||||
; AVX512VL-NEXT: retq
|
||||
%mask = icmp eq <4 x i64> %trigger, zeroinitializer
|
||||
%res = call <4 x double> @llvm.masked.expandload.v4f64(double* %base, <4 x i1> %mask, <4 x double> %src0)
|
||||
ret <4 x double>%res
|
||||
|
@ -471,6 +472,14 @@ define <8 x double> @expandload_v8f64_v8i1(double* %base, <8 x double> %src0, <8
|
|||
; AVX512F-NEXT: vexpandpd (%rdi), %zmm0 {%k1}
|
||||
; AVX512F-NEXT: retq
|
||||
;
|
||||
; AVX512VLDQ-LABEL: expandload_v8f64_v8i1:
|
||||
; AVX512VLDQ: ## %bb.0:
|
||||
; AVX512VLDQ-NEXT: vpmovzxwd {{.*#+}} ymm1 = xmm1[0],zero,xmm1[1],zero,xmm1[2],zero,xmm1[3],zero,xmm1[4],zero,xmm1[5],zero,xmm1[6],zero,xmm1[7],zero
|
||||
; AVX512VLDQ-NEXT: vpslld $31, %ymm1, %ymm1
|
||||
; AVX512VLDQ-NEXT: vpmovd2m %ymm1, %k1
|
||||
; AVX512VLDQ-NEXT: vexpandpd (%rdi), %zmm0 {%k1}
|
||||
; AVX512VLDQ-NEXT: retq
|
||||
;
|
||||
; AVX512VLBW-LABEL: expandload_v8f64_v8i1:
|
||||
; AVX512VLBW: ## %bb.0:
|
||||
; AVX512VLBW-NEXT: vpsllw $15, %xmm1, %xmm1
|
||||
|
@ -1180,6 +1189,31 @@ define <16 x double> @expandload_v16f64_v16i32(double* %base, <16 x double> %src
|
|||
; AVX512F-NEXT: vexpandpd (%rdi,%rax,8), %zmm1 {%k1}
|
||||
; AVX512F-NEXT: retq
|
||||
;
|
||||
; AVX512VLDQ-LABEL: expandload_v16f64_v16i32:
|
||||
; AVX512VLDQ: ## %bb.0:
|
||||
; AVX512VLDQ-NEXT: vextracti64x4 $1, %zmm2, %ymm3
|
||||
; AVX512VLDQ-NEXT: vptestnmd %ymm3, %ymm3, %k1
|
||||
; AVX512VLDQ-NEXT: vptestnmd %ymm2, %ymm2, %k2
|
||||
; AVX512VLDQ-NEXT: kmovb %k2, %eax
|
||||
; AVX512VLDQ-NEXT: movl %eax, %ecx
|
||||
; AVX512VLDQ-NEXT: shrl %ecx
|
||||
; AVX512VLDQ-NEXT: andl $-43, %ecx
|
||||
; AVX512VLDQ-NEXT: subl %ecx, %eax
|
||||
; AVX512VLDQ-NEXT: movl %eax, %ecx
|
||||
; AVX512VLDQ-NEXT: andl $858993459, %ecx ## imm = 0x33333333
|
||||
; AVX512VLDQ-NEXT: shrl $2, %eax
|
||||
; AVX512VLDQ-NEXT: andl $858993459, %eax ## imm = 0x33333333
|
||||
; AVX512VLDQ-NEXT: addl %ecx, %eax
|
||||
; AVX512VLDQ-NEXT: movl %eax, %ecx
|
||||
; AVX512VLDQ-NEXT: shrl $4, %ecx
|
||||
; AVX512VLDQ-NEXT: addl %eax, %ecx
|
||||
; AVX512VLDQ-NEXT: andl $252645135, %ecx ## imm = 0xF0F0F0F
|
||||
; AVX512VLDQ-NEXT: imull $16843009, %ecx, %eax ## imm = 0x1010101
|
||||
; AVX512VLDQ-NEXT: shrl $24, %eax
|
||||
; AVX512VLDQ-NEXT: vexpandpd (%rdi,%rax,8), %zmm1 {%k1}
|
||||
; AVX512VLDQ-NEXT: vexpandpd (%rdi), %zmm0 {%k2}
|
||||
; AVX512VLDQ-NEXT: retq
|
||||
;
|
||||
; AVX512VLBW-LABEL: expandload_v16f64_v16i32:
|
||||
; AVX512VLBW: ## %bb.0:
|
||||
; AVX512VLBW-NEXT: vextracti64x4 $1, %zmm2, %ymm3
|
||||
|
@ -1317,13 +1351,13 @@ define <2 x float> @expandload_v2f32_v2i1(float* %base, <2 x float> %src0, <2 x
|
|||
; AVX512F-NEXT: vzeroupper
|
||||
; AVX512F-NEXT: retq
|
||||
;
|
||||
; AVX512VLBW-LABEL: expandload_v2f32_v2i1:
|
||||
; AVX512VLBW: ## %bb.0:
|
||||
; AVX512VLBW-NEXT: vpxor %xmm2, %xmm2, %xmm2
|
||||
; AVX512VLBW-NEXT: vpblendd {{.*#+}} xmm1 = xmm1[0],xmm2[1],xmm1[2],xmm2[3]
|
||||
; AVX512VLBW-NEXT: vptestnmq %xmm1, %xmm1, %k1
|
||||
; AVX512VLBW-NEXT: vexpandps (%rdi), %xmm0 {%k1}
|
||||
; AVX512VLBW-NEXT: retq
|
||||
; AVX512VL-LABEL: expandload_v2f32_v2i1:
|
||||
; AVX512VL: ## %bb.0:
|
||||
; AVX512VL-NEXT: vpxor %xmm2, %xmm2, %xmm2
|
||||
; AVX512VL-NEXT: vpblendd {{.*#+}} xmm1 = xmm1[0],xmm2[1],xmm1[2],xmm2[3]
|
||||
; AVX512VL-NEXT: vptestnmq %xmm1, %xmm1, %k1
|
||||
; AVX512VL-NEXT: vexpandps (%rdi), %xmm0 {%k1}
|
||||
; AVX512VL-NEXT: retq
|
||||
%mask = icmp eq <2 x i32> %trigger, zeroinitializer
|
||||
%res = call <2 x float> @llvm.masked.expandload.v2f32(float* %base, <2 x i1> %mask, <2 x float> %src0)
|
||||
ret <2 x float> %res
|
||||
|
@ -1367,6 +1401,13 @@ define <4 x float> @expandload_v4f32_const(float* %base, <4 x float> %src0) {
|
|||
; AVX512F-NEXT: vzeroupper
|
||||
; AVX512F-NEXT: retq
|
||||
;
|
||||
; AVX512VLDQ-LABEL: expandload_v4f32_const:
|
||||
; AVX512VLDQ: ## %bb.0:
|
||||
; AVX512VLDQ-NEXT: movb $7, %al
|
||||
; AVX512VLDQ-NEXT: kmovw %eax, %k1
|
||||
; AVX512VLDQ-NEXT: vexpandps (%rdi), %xmm0 {%k1}
|
||||
; AVX512VLDQ-NEXT: retq
|
||||
;
|
||||
; AVX512VLBW-LABEL: expandload_v4f32_const:
|
||||
; AVX512VLBW: ## %bb.0:
|
||||
; AVX512VLBW-NEXT: movb $7, %al
|
||||
|
@ -1444,6 +1485,13 @@ define <16 x float> @expandload_v16f32_const(float* %base, <16 x float> %src0) {
|
|||
; AVX512F-NEXT: vexpandps (%rdi), %zmm0 {%k1}
|
||||
; AVX512F-NEXT: retq
|
||||
;
|
||||
; AVX512VLDQ-LABEL: expandload_v16f32_const:
|
||||
; AVX512VLDQ: ## %bb.0:
|
||||
; AVX512VLDQ-NEXT: movw $30719, %ax ## imm = 0x77FF
|
||||
; AVX512VLDQ-NEXT: kmovw %eax, %k1
|
||||
; AVX512VLDQ-NEXT: vexpandps (%rdi), %zmm0 {%k1}
|
||||
; AVX512VLDQ-NEXT: retq
|
||||
;
|
||||
; AVX512VLBW-LABEL: expandload_v16f32_const:
|
||||
; AVX512VLBW: ## %bb.0:
|
||||
; AVX512VLBW-NEXT: movw $30719, %ax ## imm = 0x77FF
|
||||
|
@ -1489,6 +1537,13 @@ define <16 x float> @expandload_v16f32_const_undef(float* %base) {
|
|||
; AVX512F-NEXT: vexpandps (%rdi), %zmm0 {%k1} {z}
|
||||
; AVX512F-NEXT: retq
|
||||
;
|
||||
; AVX512VLDQ-LABEL: expandload_v16f32_const_undef:
|
||||
; AVX512VLDQ: ## %bb.0:
|
||||
; AVX512VLDQ-NEXT: movw $-2049, %ax ## imm = 0xF7FF
|
||||
; AVX512VLDQ-NEXT: kmovw %eax, %k1
|
||||
; AVX512VLDQ-NEXT: vexpandps (%rdi), %zmm0 {%k1} {z}
|
||||
; AVX512VLDQ-NEXT: retq
|
||||
;
|
||||
; AVX512VLBW-LABEL: expandload_v16f32_const_undef:
|
||||
; AVX512VLBW: ## %bb.0:
|
||||
; AVX512VLBW-NEXT: movw $-2049, %ax ## imm = 0xF7FF
|
||||
|
@ -2954,6 +3009,13 @@ define <2 x i64> @expandload_v2i64_const(i64* %base, <2 x i64> %src0) {
|
|||
; AVX512F-NEXT: vzeroupper
|
||||
; AVX512F-NEXT: retq
|
||||
;
|
||||
; AVX512VLDQ-LABEL: expandload_v2i64_const:
|
||||
; AVX512VLDQ: ## %bb.0:
|
||||
; AVX512VLDQ-NEXT: movb $2, %al
|
||||
; AVX512VLDQ-NEXT: kmovw %eax, %k1
|
||||
; AVX512VLDQ-NEXT: vpexpandq (%rdi), %xmm0 {%k1}
|
||||
; AVX512VLDQ-NEXT: retq
|
||||
;
|
||||
; AVX512VLBW-LABEL: expandload_v2i64_const:
|
||||
; AVX512VLBW: ## %bb.0:
|
||||
; AVX512VLBW-NEXT: movb $2, %al
|
||||
|
@ -3094,11 +3156,11 @@ define <4 x i32> @expandload_v4i32_v4i32(i32* %base, <4 x i32> %src0, <4 x i32>
|
|||
; AVX512F-NEXT: vzeroupper
|
||||
; AVX512F-NEXT: retq
|
||||
;
|
||||
; AVX512VLBW-LABEL: expandload_v4i32_v4i32:
|
||||
; AVX512VLBW: ## %bb.0:
|
||||
; AVX512VLBW-NEXT: vptestnmd %xmm1, %xmm1, %k1
|
||||
; AVX512VLBW-NEXT: vpexpandd (%rdi), %xmm0 {%k1}
|
||||
; AVX512VLBW-NEXT: retq
|
||||
; AVX512VL-LABEL: expandload_v4i32_v4i32:
|
||||
; AVX512VL: ## %bb.0:
|
||||
; AVX512VL-NEXT: vptestnmd %xmm1, %xmm1, %k1
|
||||
; AVX512VL-NEXT: vpexpandd (%rdi), %xmm0 {%k1}
|
||||
; AVX512VL-NEXT: retq
|
||||
%mask = icmp eq <4 x i32> %trigger, zeroinitializer
|
||||
%res = call <4 x i32> @llvm.masked.expandload.v4i32(i32* %base, <4 x i1> %mask, <4 x i32> %src0)
|
||||
ret <4 x i32>%res
|
||||
|
@ -3393,6 +3455,89 @@ define <8 x i16> @expandload_v8i16_v8i16(i16* %base, <8 x i16> %src0, <8 x i16>
|
|||
; AVX512F-NEXT: vzeroupper
|
||||
; AVX512F-NEXT: retq
|
||||
;
|
||||
; AVX512VLDQ-LABEL: expandload_v8i16_v8i16:
|
||||
; AVX512VLDQ: ## %bb.0:
|
||||
; AVX512VLDQ-NEXT: vpxor %xmm2, %xmm2, %xmm2
|
||||
; AVX512VLDQ-NEXT: vpcmpeqw %xmm2, %xmm1, %xmm2
|
||||
; AVX512VLDQ-NEXT: vpmovsxwd %xmm2, %ymm2
|
||||
; AVX512VLDQ-NEXT: vpmovd2m %ymm2, %k0
|
||||
; AVX512VLDQ-NEXT: kmovw %k0, %eax
|
||||
; AVX512VLDQ-NEXT: testb $1, %al
|
||||
; AVX512VLDQ-NEXT: je LBB11_2
|
||||
; AVX512VLDQ-NEXT: ## %bb.1: ## %cond.load
|
||||
; AVX512VLDQ-NEXT: vpinsrw $0, (%rdi), %xmm0, %xmm0
|
||||
; AVX512VLDQ-NEXT: addq $2, %rdi
|
||||
; AVX512VLDQ-NEXT: LBB11_2: ## %else
|
||||
; AVX512VLDQ-NEXT: kshiftrb $1, %k0, %k0
|
||||
; AVX512VLDQ-NEXT: kmovw %k0, %eax
|
||||
; AVX512VLDQ-NEXT: testb $1, %al
|
||||
; AVX512VLDQ-NEXT: je LBB11_4
|
||||
; AVX512VLDQ-NEXT: ## %bb.3: ## %cond.load1
|
||||
; AVX512VLDQ-NEXT: vpinsrw $1, (%rdi), %xmm0, %xmm0
|
||||
; AVX512VLDQ-NEXT: addq $2, %rdi
|
||||
; AVX512VLDQ-NEXT: LBB11_4: ## %else2
|
||||
; AVX512VLDQ-NEXT: vpxor %xmm2, %xmm2, %xmm2
|
||||
; AVX512VLDQ-NEXT: vpcmpeqw %xmm2, %xmm1, %xmm2
|
||||
; AVX512VLDQ-NEXT: vpmovsxwd %xmm2, %ymm2
|
||||
; AVX512VLDQ-NEXT: vpmovd2m %ymm2, %k0
|
||||
; AVX512VLDQ-NEXT: kshiftrb $2, %k0, %k1
|
||||
; AVX512VLDQ-NEXT: kmovw %k1, %eax
|
||||
; AVX512VLDQ-NEXT: testb $1, %al
|
||||
; AVX512VLDQ-NEXT: je LBB11_6
|
||||
; AVX512VLDQ-NEXT: ## %bb.5: ## %cond.load5
|
||||
; AVX512VLDQ-NEXT: vpinsrw $2, (%rdi), %xmm0, %xmm0
|
||||
; AVX512VLDQ-NEXT: addq $2, %rdi
|
||||
; AVX512VLDQ-NEXT: LBB11_6: ## %else6
|
||||
; AVX512VLDQ-NEXT: kshiftrb $3, %k0, %k0
|
||||
; AVX512VLDQ-NEXT: kmovw %k0, %eax
|
||||
; AVX512VLDQ-NEXT: testb $1, %al
|
||||
; AVX512VLDQ-NEXT: je LBB11_8
|
||||
; AVX512VLDQ-NEXT: ## %bb.7: ## %cond.load9
|
||||
; AVX512VLDQ-NEXT: vpinsrw $3, (%rdi), %xmm0, %xmm0
|
||||
; AVX512VLDQ-NEXT: addq $2, %rdi
|
||||
; AVX512VLDQ-NEXT: LBB11_8: ## %else10
|
||||
; AVX512VLDQ-NEXT: vpxor %xmm2, %xmm2, %xmm2
|
||||
; AVX512VLDQ-NEXT: vpcmpeqw %xmm2, %xmm1, %xmm2
|
||||
; AVX512VLDQ-NEXT: vpmovsxwd %xmm2, %ymm2
|
||||
; AVX512VLDQ-NEXT: vpmovd2m %ymm2, %k0
|
||||
; AVX512VLDQ-NEXT: kshiftrb $4, %k0, %k1
|
||||
; AVX512VLDQ-NEXT: kmovw %k1, %eax
|
||||
; AVX512VLDQ-NEXT: testb $1, %al
|
||||
; AVX512VLDQ-NEXT: je LBB11_10
|
||||
; AVX512VLDQ-NEXT: ## %bb.9: ## %cond.load13
|
||||
; AVX512VLDQ-NEXT: vpinsrw $4, (%rdi), %xmm0, %xmm0
|
||||
; AVX512VLDQ-NEXT: addq $2, %rdi
|
||||
; AVX512VLDQ-NEXT: LBB11_10: ## %else14
|
||||
; AVX512VLDQ-NEXT: kshiftrb $5, %k0, %k0
|
||||
; AVX512VLDQ-NEXT: kmovw %k0, %eax
|
||||
; AVX512VLDQ-NEXT: testb $1, %al
|
||||
; AVX512VLDQ-NEXT: je LBB11_12
|
||||
; AVX512VLDQ-NEXT: ## %bb.11: ## %cond.load17
|
||||
; AVX512VLDQ-NEXT: vpinsrw $5, (%rdi), %xmm0, %xmm0
|
||||
; AVX512VLDQ-NEXT: addq $2, %rdi
|
||||
; AVX512VLDQ-NEXT: LBB11_12: ## %else18
|
||||
; AVX512VLDQ-NEXT: vpxor %xmm2, %xmm2, %xmm2
|
||||
; AVX512VLDQ-NEXT: vpcmpeqw %xmm2, %xmm1, %xmm1
|
||||
; AVX512VLDQ-NEXT: vpmovsxwd %xmm1, %ymm1
|
||||
; AVX512VLDQ-NEXT: vpmovd2m %ymm1, %k0
|
||||
; AVX512VLDQ-NEXT: kshiftrb $6, %k0, %k1
|
||||
; AVX512VLDQ-NEXT: kmovw %k1, %eax
|
||||
; AVX512VLDQ-NEXT: testb $1, %al
|
||||
; AVX512VLDQ-NEXT: je LBB11_14
|
||||
; AVX512VLDQ-NEXT: ## %bb.13: ## %cond.load21
|
||||
; AVX512VLDQ-NEXT: vpinsrw $6, (%rdi), %xmm0, %xmm0
|
||||
; AVX512VLDQ-NEXT: addq $2, %rdi
|
||||
; AVX512VLDQ-NEXT: LBB11_14: ## %else22
|
||||
; AVX512VLDQ-NEXT: kshiftrb $7, %k0, %k0
|
||||
; AVX512VLDQ-NEXT: kmovw %k0, %eax
|
||||
; AVX512VLDQ-NEXT: testb $1, %al
|
||||
; AVX512VLDQ-NEXT: je LBB11_16
|
||||
; AVX512VLDQ-NEXT: ## %bb.15: ## %cond.load25
|
||||
; AVX512VLDQ-NEXT: vpinsrw $7, (%rdi), %xmm0, %xmm0
|
||||
; AVX512VLDQ-NEXT: LBB11_16: ## %else26
|
||||
; AVX512VLDQ-NEXT: vzeroupper
|
||||
; AVX512VLDQ-NEXT: retq
|
||||
;
|
||||
; AVX512VLBW-LABEL: expandload_v8i16_v8i16:
|
||||
; AVX512VLBW: ## %bb.0:
|
||||
; AVX512VLBW-NEXT: vptestnmw %xmm1, %xmm1, %k0
|
||||
|
@ -4120,6 +4265,169 @@ define <16 x i8> @expandload_v16i8_v16i8(i8* %base, <16 x i8> %src0, <16 x i8> %
|
|||
; AVX512F-NEXT: vzeroupper
|
||||
; AVX512F-NEXT: retq
|
||||
;
|
||||
; AVX512VLDQ-LABEL: expandload_v16i8_v16i8:
|
||||
; AVX512VLDQ: ## %bb.0:
|
||||
; AVX512VLDQ-NEXT: vpxor %xmm2, %xmm2, %xmm2
|
||||
; AVX512VLDQ-NEXT: vpcmpeqb %xmm2, %xmm1, %xmm2
|
||||
; AVX512VLDQ-NEXT: vpmovsxbd %xmm2, %zmm2
|
||||
; AVX512VLDQ-NEXT: vpmovd2m %zmm2, %k0
|
||||
; AVX512VLDQ-NEXT: kmovw %k0, %eax
|
||||
; AVX512VLDQ-NEXT: testb $1, %al
|
||||
; AVX512VLDQ-NEXT: je LBB12_2
|
||||
; AVX512VLDQ-NEXT: ## %bb.1: ## %cond.load
|
||||
; AVX512VLDQ-NEXT: vpinsrb $0, (%rdi), %xmm0, %xmm0
|
||||
; AVX512VLDQ-NEXT: incq %rdi
|
||||
; AVX512VLDQ-NEXT: LBB12_2: ## %else
|
||||
; AVX512VLDQ-NEXT: kshiftrw $1, %k0, %k0
|
||||
; AVX512VLDQ-NEXT: kmovw %k0, %eax
|
||||
; AVX512VLDQ-NEXT: testb $1, %al
|
||||
; AVX512VLDQ-NEXT: je LBB12_4
|
||||
; AVX512VLDQ-NEXT: ## %bb.3: ## %cond.load1
|
||||
; AVX512VLDQ-NEXT: vpinsrb $1, (%rdi), %xmm0, %xmm0
|
||||
; AVX512VLDQ-NEXT: incq %rdi
|
||||
; AVX512VLDQ-NEXT: LBB12_4: ## %else2
|
||||
; AVX512VLDQ-NEXT: vpxor %xmm2, %xmm2, %xmm2
|
||||
; AVX512VLDQ-NEXT: vpcmpeqb %xmm2, %xmm1, %xmm2
|
||||
; AVX512VLDQ-NEXT: vpmovsxbd %xmm2, %zmm2
|
||||
; AVX512VLDQ-NEXT: vpmovd2m %zmm2, %k0
|
||||
; AVX512VLDQ-NEXT: kshiftrw $2, %k0, %k1
|
||||
; AVX512VLDQ-NEXT: kmovw %k1, %eax
|
||||
; AVX512VLDQ-NEXT: testb $1, %al
|
||||
; AVX512VLDQ-NEXT: je LBB12_6
|
||||
; AVX512VLDQ-NEXT: ## %bb.5: ## %cond.load5
|
||||
; AVX512VLDQ-NEXT: vpinsrb $2, (%rdi), %xmm0, %xmm0
|
||||
; AVX512VLDQ-NEXT: incq %rdi
|
||||
; AVX512VLDQ-NEXT: LBB12_6: ## %else6
|
||||
; AVX512VLDQ-NEXT: kshiftrw $3, %k0, %k0
|
||||
; AVX512VLDQ-NEXT: kmovw %k0, %eax
|
||||
; AVX512VLDQ-NEXT: testb $1, %al
|
||||
; AVX512VLDQ-NEXT: je LBB12_8
|
||||
; AVX512VLDQ-NEXT: ## %bb.7: ## %cond.load9
|
||||
; AVX512VLDQ-NEXT: vpinsrb $3, (%rdi), %xmm0, %xmm0
|
||||
; AVX512VLDQ-NEXT: incq %rdi
|
||||
; AVX512VLDQ-NEXT: LBB12_8: ## %else10
|
||||
; AVX512VLDQ-NEXT: vpxor %xmm2, %xmm2, %xmm2
|
||||
; AVX512VLDQ-NEXT: vpcmpeqb %xmm2, %xmm1, %xmm2
|
||||
; AVX512VLDQ-NEXT: vpmovsxbd %xmm2, %zmm2
|
||||
; AVX512VLDQ-NEXT: vpmovd2m %zmm2, %k0
|
||||
; AVX512VLDQ-NEXT: kshiftrw $4, %k0, %k1
|
||||
; AVX512VLDQ-NEXT: kmovw %k1, %eax
|
||||
; AVX512VLDQ-NEXT: testb $1, %al
|
||||
; AVX512VLDQ-NEXT: je LBB12_10
|
||||
; AVX512VLDQ-NEXT: ## %bb.9: ## %cond.load13
|
||||
; AVX512VLDQ-NEXT: vpinsrb $4, (%rdi), %xmm0, %xmm0
|
||||
; AVX512VLDQ-NEXT: incq %rdi
|
||||
; AVX512VLDQ-NEXT: LBB12_10: ## %else14
|
||||
; AVX512VLDQ-NEXT: kshiftrw $5, %k0, %k0
|
||||
; AVX512VLDQ-NEXT: kmovw %k0, %eax
|
||||
; AVX512VLDQ-NEXT: testb $1, %al
|
||||
; AVX512VLDQ-NEXT: je LBB12_12
|
||||
; AVX512VLDQ-NEXT: ## %bb.11: ## %cond.load17
|
||||
; AVX512VLDQ-NEXT: vpinsrb $5, (%rdi), %xmm0, %xmm0
|
||||
; AVX512VLDQ-NEXT: incq %rdi
|
||||
; AVX512VLDQ-NEXT: LBB12_12: ## %else18
|
||||
; AVX512VLDQ-NEXT: vpxor %xmm2, %xmm2, %xmm2
|
||||
; AVX512VLDQ-NEXT: vpcmpeqb %xmm2, %xmm1, %xmm2
|
||||
; AVX512VLDQ-NEXT: vpmovsxbd %xmm2, %zmm2
|
||||
; AVX512VLDQ-NEXT: vpmovd2m %zmm2, %k0
|
||||
; AVX512VLDQ-NEXT: kshiftrw $6, %k0, %k1
|
||||
; AVX512VLDQ-NEXT: kmovw %k1, %eax
|
||||
; AVX512VLDQ-NEXT: testb $1, %al
|
||||
; AVX512VLDQ-NEXT: je LBB12_14
|
||||
; AVX512VLDQ-NEXT: ## %bb.13: ## %cond.load21
|
||||
; AVX512VLDQ-NEXT: vpinsrb $6, (%rdi), %xmm0, %xmm0
|
||||
; AVX512VLDQ-NEXT: incq %rdi
|
||||
; AVX512VLDQ-NEXT: LBB12_14: ## %else22
|
||||
; AVX512VLDQ-NEXT: kshiftrw $7, %k0, %k0
|
||||
; AVX512VLDQ-NEXT: kmovw %k0, %eax
|
||||
; AVX512VLDQ-NEXT: testb $1, %al
|
||||
; AVX512VLDQ-NEXT: je LBB12_16
|
||||
; AVX512VLDQ-NEXT: ## %bb.15: ## %cond.load25
|
||||
; AVX512VLDQ-NEXT: vpinsrb $7, (%rdi), %xmm0, %xmm0
|
||||
; AVX512VLDQ-NEXT: incq %rdi
|
||||
; AVX512VLDQ-NEXT: LBB12_16: ## %else26
|
||||
; AVX512VLDQ-NEXT: vpxor %xmm2, %xmm2, %xmm2
|
||||
; AVX512VLDQ-NEXT: vpcmpeqb %xmm2, %xmm1, %xmm2
|
||||
; AVX512VLDQ-NEXT: vpmovsxbd %xmm2, %zmm2
|
||||
; AVX512VLDQ-NEXT: vpmovd2m %zmm2, %k0
|
||||
; AVX512VLDQ-NEXT: kshiftrw $8, %k0, %k1
|
||||
; AVX512VLDQ-NEXT: kmovw %k1, %eax
|
||||
; AVX512VLDQ-NEXT: testb $1, %al
|
||||
; AVX512VLDQ-NEXT: je LBB12_18
|
||||
; AVX512VLDQ-NEXT: ## %bb.17: ## %cond.load29
|
||||
; AVX512VLDQ-NEXT: vpinsrb $8, (%rdi), %xmm0, %xmm0
|
||||
; AVX512VLDQ-NEXT: incq %rdi
|
||||
; AVX512VLDQ-NEXT: LBB12_18: ## %else30
|
||||
; AVX512VLDQ-NEXT: kshiftrw $9, %k0, %k0
|
||||
; AVX512VLDQ-NEXT: kmovw %k0, %eax
|
||||
; AVX512VLDQ-NEXT: testb $1, %al
|
||||
; AVX512VLDQ-NEXT: je LBB12_20
|
||||
; AVX512VLDQ-NEXT: ## %bb.19: ## %cond.load33
|
||||
; AVX512VLDQ-NEXT: vpinsrb $9, (%rdi), %xmm0, %xmm0
|
||||
; AVX512VLDQ-NEXT: incq %rdi
|
||||
; AVX512VLDQ-NEXT: LBB12_20: ## %else34
|
||||
; AVX512VLDQ-NEXT: vpxor %xmm2, %xmm2, %xmm2
|
||||
; AVX512VLDQ-NEXT: vpcmpeqb %xmm2, %xmm1, %xmm2
|
||||
; AVX512VLDQ-NEXT: vpmovsxbd %xmm2, %zmm2
|
||||
; AVX512VLDQ-NEXT: vpmovd2m %zmm2, %k0
|
||||
; AVX512VLDQ-NEXT: kshiftrw $10, %k0, %k1
|
||||
; AVX512VLDQ-NEXT: kmovw %k1, %eax
|
||||
; AVX512VLDQ-NEXT: testb $1, %al
|
||||
; AVX512VLDQ-NEXT: je LBB12_22
|
||||
; AVX512VLDQ-NEXT: ## %bb.21: ## %cond.load37
|
||||
; AVX512VLDQ-NEXT: vpinsrb $10, (%rdi), %xmm0, %xmm0
|
||||
; AVX512VLDQ-NEXT: incq %rdi
|
||||
; AVX512VLDQ-NEXT: LBB12_22: ## %else38
|
||||
; AVX512VLDQ-NEXT: kshiftrw $11, %k0, %k0
|
||||
; AVX512VLDQ-NEXT: kmovw %k0, %eax
|
||||
; AVX512VLDQ-NEXT: testb $1, %al
|
||||
; AVX512VLDQ-NEXT: je LBB12_24
|
||||
; AVX512VLDQ-NEXT: ## %bb.23: ## %cond.load41
|
||||
; AVX512VLDQ-NEXT: vpinsrb $11, (%rdi), %xmm0, %xmm0
|
||||
; AVX512VLDQ-NEXT: incq %rdi
|
||||
; AVX512VLDQ-NEXT: LBB12_24: ## %else42
|
||||
; AVX512VLDQ-NEXT: vpxor %xmm2, %xmm2, %xmm2
|
||||
; AVX512VLDQ-NEXT: vpcmpeqb %xmm2, %xmm1, %xmm2
|
||||
; AVX512VLDQ-NEXT: vpmovsxbd %xmm2, %zmm2
|
||||
; AVX512VLDQ-NEXT: vpmovd2m %zmm2, %k0
|
||||
; AVX512VLDQ-NEXT: kshiftrw $12, %k0, %k1
|
||||
; AVX512VLDQ-NEXT: kmovw %k1, %eax
|
||||
; AVX512VLDQ-NEXT: testb $1, %al
|
||||
; AVX512VLDQ-NEXT: je LBB12_26
|
||||
; AVX512VLDQ-NEXT: ## %bb.25: ## %cond.load45
|
||||
; AVX512VLDQ-NEXT: vpinsrb $12, (%rdi), %xmm0, %xmm0
|
||||
; AVX512VLDQ-NEXT: incq %rdi
|
||||
; AVX512VLDQ-NEXT: LBB12_26: ## %else46
|
||||
; AVX512VLDQ-NEXT: kshiftrw $13, %k0, %k0
|
||||
; AVX512VLDQ-NEXT: kmovw %k0, %eax
|
||||
; AVX512VLDQ-NEXT: testb $1, %al
|
||||
; AVX512VLDQ-NEXT: je LBB12_28
|
||||
; AVX512VLDQ-NEXT: ## %bb.27: ## %cond.load49
|
||||
; AVX512VLDQ-NEXT: vpinsrb $13, (%rdi), %xmm0, %xmm0
|
||||
; AVX512VLDQ-NEXT: incq %rdi
|
||||
; AVX512VLDQ-NEXT: LBB12_28: ## %else50
|
||||
; AVX512VLDQ-NEXT: vpxor %xmm2, %xmm2, %xmm2
|
||||
; AVX512VLDQ-NEXT: vpcmpeqb %xmm2, %xmm1, %xmm1
|
||||
; AVX512VLDQ-NEXT: vpmovsxbd %xmm1, %zmm1
|
||||
; AVX512VLDQ-NEXT: vpmovd2m %zmm1, %k0
|
||||
; AVX512VLDQ-NEXT: kshiftrw $14, %k0, %k1
|
||||
; AVX512VLDQ-NEXT: kmovw %k1, %eax
|
||||
; AVX512VLDQ-NEXT: testb $1, %al
|
||||
; AVX512VLDQ-NEXT: je LBB12_30
|
||||
; AVX512VLDQ-NEXT: ## %bb.29: ## %cond.load53
|
||||
; AVX512VLDQ-NEXT: vpinsrb $14, (%rdi), %xmm0, %xmm0
|
||||
; AVX512VLDQ-NEXT: incq %rdi
|
||||
; AVX512VLDQ-NEXT: LBB12_30: ## %else54
|
||||
; AVX512VLDQ-NEXT: kshiftrw $15, %k0, %k0
|
||||
; AVX512VLDQ-NEXT: kmovw %k0, %eax
|
||||
; AVX512VLDQ-NEXT: testb $1, %al
|
||||
; AVX512VLDQ-NEXT: je LBB12_32
|
||||
; AVX512VLDQ-NEXT: ## %bb.31: ## %cond.load57
|
||||
; AVX512VLDQ-NEXT: vpinsrb $15, (%rdi), %xmm0, %xmm0
|
||||
; AVX512VLDQ-NEXT: LBB12_32: ## %else58
|
||||
; AVX512VLDQ-NEXT: vzeroupper
|
||||
; AVX512VLDQ-NEXT: retq
|
||||
;
|
||||
; AVX512VLBW-LABEL: expandload_v16i8_v16i8:
|
||||
; AVX512VLBW: ## %bb.0:
|
||||
; AVX512VLBW-NEXT: vptestnmb %xmm1, %xmm1, %k0
|
||||
|
|
File diff suppressed because it is too large
Load Diff
|
@ -4,7 +4,8 @@
|
|||
; RUN: llc < %s -mtriple=x86_64-apple-darwin -mattr=avx | FileCheck %s --check-prefixes=AVX,AVX1OR2,AVX1
|
||||
; RUN: llc < %s -mtriple=x86_64-apple-darwin -mattr=avx2 | FileCheck %s --check-prefixes=AVX,AVX1OR2,AVX2
|
||||
; RUN: llc < %s -mtriple=x86_64-apple-darwin -mattr=avx512f | FileCheck %s --check-prefixes=AVX,AVX512,AVX512F
|
||||
; RUN: llc < %s -mtriple=x86_64-apple-darwin -mattr=avx512f,avx512bw,avx512vl | FileCheck %s --check-prefixes=AVX,AVX512,AVX512VLBW
|
||||
; RUN: llc < %s -mtriple=x86_64-apple-darwin -mattr=avx512f,avx512dq,avx512vl | FileCheck %s --check-prefixes=AVX,AVX512,AVX512VL,AVX512VLDQ
|
||||
; RUN: llc < %s -mtriple=x86_64-apple-darwin -mattr=avx512f,avx512bw,avx512vl | FileCheck %s --check-prefixes=AVX,AVX512,AVX512VL,AVX512VLBW
|
||||
|
||||
;
|
||||
; vXf64
|
||||
|
@ -94,6 +95,12 @@ define void @store_v2f64_v2i64(<2 x i64> %trigger, <2 x double>* %addr, <2 x dou
|
|||
; AVX512F-NEXT: vzeroupper
|
||||
; AVX512F-NEXT: retq
|
||||
;
|
||||
; AVX512VLDQ-LABEL: store_v2f64_v2i64:
|
||||
; AVX512VLDQ: ## %bb.0:
|
||||
; AVX512VLDQ-NEXT: vpmovq2m %xmm0, %k1
|
||||
; AVX512VLDQ-NEXT: vmovupd %xmm1, (%rdi) {%k1}
|
||||
; AVX512VLDQ-NEXT: retq
|
||||
;
|
||||
; AVX512VLBW-LABEL: store_v2f64_v2i64:
|
||||
; AVX512VLBW: ## %bb.0:
|
||||
; AVX512VLBW-NEXT: vpxor %xmm2, %xmm2, %xmm2
|
||||
|
@ -201,6 +208,13 @@ define void @store_v4f64_v4i64(<4 x i64> %trigger, <4 x double>* %addr, <4 x dou
|
|||
; AVX512F-NEXT: vzeroupper
|
||||
; AVX512F-NEXT: retq
|
||||
;
|
||||
; AVX512VLDQ-LABEL: store_v4f64_v4i64:
|
||||
; AVX512VLDQ: ## %bb.0:
|
||||
; AVX512VLDQ-NEXT: vpmovq2m %ymm0, %k1
|
||||
; AVX512VLDQ-NEXT: vmovupd %ymm1, (%rdi) {%k1}
|
||||
; AVX512VLDQ-NEXT: vzeroupper
|
||||
; AVX512VLDQ-NEXT: retq
|
||||
;
|
||||
; AVX512VLBW-LABEL: store_v4f64_v4i64:
|
||||
; AVX512VLBW: ## %bb.0:
|
||||
; AVX512VLBW-NEXT: vpxor %xmm2, %xmm2, %xmm2
|
||||
|
@ -289,21 +303,18 @@ define void @store_v2f32_v2i32(<2 x i32> %trigger, <2 x float>* %addr, <2 x floa
|
|||
; AVX512F-NEXT: vzeroupper
|
||||
; AVX512F-NEXT: retq
|
||||
;
|
||||
; AVX512VLBW-LABEL: store_v2f32_v2i32:
|
||||
; AVX512VLBW: ## %bb.0:
|
||||
; AVX512VLBW-NEXT: vpxor %xmm2, %xmm2, %xmm2
|
||||
; AVX512VLBW-NEXT: vpblendd {{.*#+}} xmm0 = xmm0[0],xmm2[1],xmm0[2],xmm2[3]
|
||||
; AVX512VLBW-NEXT: vptestnmq %xmm0, %xmm0, %k1
|
||||
; AVX512VLBW-NEXT: vmovups %xmm1, (%rdi) {%k1}
|
||||
; AVX512VLBW-NEXT: retq
|
||||
; AVX512VL-LABEL: store_v2f32_v2i32:
|
||||
; AVX512VL: ## %bb.0:
|
||||
; AVX512VL-NEXT: vpxor %xmm2, %xmm2, %xmm2
|
||||
; AVX512VL-NEXT: vpblendd {{.*#+}} xmm0 = xmm0[0],xmm2[1],xmm0[2],xmm2[3]
|
||||
; AVX512VL-NEXT: vptestnmq %xmm0, %xmm0, %k1
|
||||
; AVX512VL-NEXT: vmovups %xmm1, (%rdi) {%k1}
|
||||
; AVX512VL-NEXT: retq
|
||||
%mask = icmp eq <2 x i32> %trigger, zeroinitializer
|
||||
call void @llvm.masked.store.v2f32.p0v2f32(<2 x float> %val, <2 x float>* %addr, i32 4, <2 x i1> %mask)
|
||||
ret void
|
||||
}
|
||||
|
||||
; PR34584: The mask bit for each data element is the most significant bit of the mask operand, so a compare isn't needed.
|
||||
; FIXME: The AVX512 code should be improved to use 'vpmovd2m'. Add tests for 512-bit vectors when implementing that.
|
||||
|
||||
define void @store_v4f32_v4i32(<4 x float> %x, <4 x float>* %ptr, <4 x float> %y, <4 x i32> %mask) {
|
||||
; SSE2-LABEL: store_v4f32_v4i32:
|
||||
; SSE2: ## %bb.0:
|
||||
|
@ -391,6 +402,12 @@ define void @store_v4f32_v4i32(<4 x float> %x, <4 x float>* %ptr, <4 x float> %y
|
|||
; AVX512F-NEXT: vzeroupper
|
||||
; AVX512F-NEXT: retq
|
||||
;
|
||||
; AVX512VLDQ-LABEL: store_v4f32_v4i32:
|
||||
; AVX512VLDQ: ## %bb.0:
|
||||
; AVX512VLDQ-NEXT: vpmovd2m %xmm2, %k1
|
||||
; AVX512VLDQ-NEXT: vmovups %xmm0, (%rdi) {%k1}
|
||||
; AVX512VLDQ-NEXT: retq
|
||||
;
|
||||
; AVX512VLBW-LABEL: store_v4f32_v4i32:
|
||||
; AVX512VLBW: ## %bb.0:
|
||||
; AVX512VLBW-NEXT: vpxor %xmm1, %xmm1, %xmm1
|
||||
|
@ -555,6 +572,13 @@ define void @store_v8f32_v8i32(<8 x float> %x, <8 x float>* %ptr, <8 x float> %y
|
|||
; AVX512F-NEXT: vzeroupper
|
||||
; AVX512F-NEXT: retq
|
||||
;
|
||||
; AVX512VLDQ-LABEL: store_v8f32_v8i32:
|
||||
; AVX512VLDQ: ## %bb.0:
|
||||
; AVX512VLDQ-NEXT: vpmovd2m %ymm2, %k1
|
||||
; AVX512VLDQ-NEXT: vmovups %ymm0, (%rdi) {%k1}
|
||||
; AVX512VLDQ-NEXT: vzeroupper
|
||||
; AVX512VLDQ-NEXT: retq
|
||||
;
|
||||
; AVX512VLBW-LABEL: store_v8f32_v8i32:
|
||||
; AVX512VLBW: ## %bb.0:
|
||||
; AVX512VLBW-NEXT: vpxor %xmm1, %xmm1, %xmm1
|
||||
|
@ -835,13 +859,28 @@ define void @store_v16f32_v16i32(<16 x float> %x, <16 x float>* %ptr, <16 x floa
|
|||
; AVX1OR2-NEXT: vzeroupper
|
||||
; AVX1OR2-NEXT: retq
|
||||
;
|
||||
; AVX512-LABEL: store_v16f32_v16i32:
|
||||
; AVX512: ## %bb.0:
|
||||
; AVX512-NEXT: vpxor %xmm1, %xmm1, %xmm1
|
||||
; AVX512-NEXT: vpcmpgtd %zmm2, %zmm1, %k1
|
||||
; AVX512-NEXT: vmovups %zmm0, (%rdi) {%k1}
|
||||
; AVX512-NEXT: vzeroupper
|
||||
; AVX512-NEXT: retq
|
||||
; AVX512F-LABEL: store_v16f32_v16i32:
|
||||
; AVX512F: ## %bb.0:
|
||||
; AVX512F-NEXT: vpxor %xmm1, %xmm1, %xmm1
|
||||
; AVX512F-NEXT: vpcmpgtd %zmm2, %zmm1, %k1
|
||||
; AVX512F-NEXT: vmovups %zmm0, (%rdi) {%k1}
|
||||
; AVX512F-NEXT: vzeroupper
|
||||
; AVX512F-NEXT: retq
|
||||
;
|
||||
; AVX512VLDQ-LABEL: store_v16f32_v16i32:
|
||||
; AVX512VLDQ: ## %bb.0:
|
||||
; AVX512VLDQ-NEXT: vpmovd2m %zmm2, %k1
|
||||
; AVX512VLDQ-NEXT: vmovups %zmm0, (%rdi) {%k1}
|
||||
; AVX512VLDQ-NEXT: vzeroupper
|
||||
; AVX512VLDQ-NEXT: retq
|
||||
;
|
||||
; AVX512VLBW-LABEL: store_v16f32_v16i32:
|
||||
; AVX512VLBW: ## %bb.0:
|
||||
; AVX512VLBW-NEXT: vpxor %xmm1, %xmm1, %xmm1
|
||||
; AVX512VLBW-NEXT: vpcmpgtd %zmm2, %zmm1, %k1
|
||||
; AVX512VLBW-NEXT: vmovups %zmm0, (%rdi) {%k1}
|
||||
; AVX512VLBW-NEXT: vzeroupper
|
||||
; AVX512VLBW-NEXT: retq
|
||||
%bool_mask = icmp slt <16 x i32> %mask, zeroinitializer
|
||||
call void @llvm.masked.store.v16f32.p0v16f32(<16 x float> %x, <16 x float>* %ptr, i32 1, <16 x i1> %bool_mask)
|
||||
ret void
|
||||
|
@ -918,6 +957,12 @@ define void @store_v2i64_v2i64(<2 x i64> %trigger, <2 x i64>* %addr, <2 x i64> %
|
|||
; AVX512F-NEXT: vzeroupper
|
||||
; AVX512F-NEXT: retq
|
||||
;
|
||||
; AVX512VLDQ-LABEL: store_v2i64_v2i64:
|
||||
; AVX512VLDQ: ## %bb.0:
|
||||
; AVX512VLDQ-NEXT: vpmovq2m %xmm0, %k1
|
||||
; AVX512VLDQ-NEXT: vmovdqu64 %xmm1, (%rdi) {%k1}
|
||||
; AVX512VLDQ-NEXT: retq
|
||||
;
|
||||
; AVX512VLBW-LABEL: store_v2i64_v2i64:
|
||||
; AVX512VLBW: ## %bb.0:
|
||||
; AVX512VLBW-NEXT: vpxor %xmm2, %xmm2, %xmm2
|
||||
|
@ -1033,6 +1078,13 @@ define void @store_v4i64_v4i64(<4 x i64> %trigger, <4 x i64>* %addr, <4 x i64> %
|
|||
; AVX512F-NEXT: vzeroupper
|
||||
; AVX512F-NEXT: retq
|
||||
;
|
||||
; AVX512VLDQ-LABEL: store_v4i64_v4i64:
|
||||
; AVX512VLDQ: ## %bb.0:
|
||||
; AVX512VLDQ-NEXT: vpmovq2m %ymm0, %k1
|
||||
; AVX512VLDQ-NEXT: vmovdqu64 %ymm1, (%rdi) {%k1}
|
||||
; AVX512VLDQ-NEXT: vzeroupper
|
||||
; AVX512VLDQ-NEXT: retq
|
||||
;
|
||||
; AVX512VLBW-LABEL: store_v4i64_v4i64:
|
||||
; AVX512VLBW: ## %bb.0:
|
||||
; AVX512VLBW-NEXT: vpxor %xmm2, %xmm2, %xmm2
|
||||
|
@ -1146,13 +1198,13 @@ define void @store_v2i32_v2i32(<2 x i32> %trigger, <2 x i32>* %addr, <2 x i32> %
|
|||
; AVX512F-NEXT: vzeroupper
|
||||
; AVX512F-NEXT: retq
|
||||
;
|
||||
; AVX512VLBW-LABEL: store_v2i32_v2i32:
|
||||
; AVX512VLBW: ## %bb.0:
|
||||
; AVX512VLBW-NEXT: vpxor %xmm2, %xmm2, %xmm2
|
||||
; AVX512VLBW-NEXT: vpblendd {{.*#+}} xmm0 = xmm0[0],xmm2[1],xmm0[2],xmm2[3]
|
||||
; AVX512VLBW-NEXT: vptestnmq %xmm0, %xmm0, %k1
|
||||
; AVX512VLBW-NEXT: vpmovqd %xmm1, (%rdi) {%k1}
|
||||
; AVX512VLBW-NEXT: retq
|
||||
; AVX512VL-LABEL: store_v2i32_v2i32:
|
||||
; AVX512VL: ## %bb.0:
|
||||
; AVX512VL-NEXT: vpxor %xmm2, %xmm2, %xmm2
|
||||
; AVX512VL-NEXT: vpblendd {{.*#+}} xmm0 = xmm0[0],xmm2[1],xmm0[2],xmm2[3]
|
||||
; AVX512VL-NEXT: vptestnmq %xmm0, %xmm0, %k1
|
||||
; AVX512VL-NEXT: vpmovqd %xmm1, (%rdi) {%k1}
|
||||
; AVX512VL-NEXT: retq
|
||||
%mask = icmp eq <2 x i32> %trigger, zeroinitializer
|
||||
call void @llvm.masked.store.v2i32.p0v2i32(<2 x i32> %val, <2 x i32>* %addr, i32 4, <2 x i1> %mask)
|
||||
ret void
|
||||
|
@ -1251,11 +1303,11 @@ define void @store_v4i32_v4i32(<4 x i32> %trigger, <4 x i32>* %addr, <4 x i32> %
|
|||
; AVX512F-NEXT: vzeroupper
|
||||
; AVX512F-NEXT: retq
|
||||
;
|
||||
; AVX512VLBW-LABEL: store_v4i32_v4i32:
|
||||
; AVX512VLBW: ## %bb.0:
|
||||
; AVX512VLBW-NEXT: vptestnmd %xmm0, %xmm0, %k1
|
||||
; AVX512VLBW-NEXT: vmovdqu32 %xmm1, (%rdi) {%k1}
|
||||
; AVX512VLBW-NEXT: retq
|
||||
; AVX512VL-LABEL: store_v4i32_v4i32:
|
||||
; AVX512VL: ## %bb.0:
|
||||
; AVX512VL-NEXT: vptestnmd %xmm0, %xmm0, %k1
|
||||
; AVX512VL-NEXT: vmovdqu32 %xmm1, (%rdi) {%k1}
|
||||
; AVX512VL-NEXT: retq
|
||||
%mask = icmp eq <4 x i32> %trigger, zeroinitializer
|
||||
call void @llvm.masked.store.v4i32.p0v4i32(<4 x i32> %val, <4 x i32>* %addr, i32 4, <4 x i1> %mask)
|
||||
ret void
|
||||
|
@ -1422,12 +1474,12 @@ define void @store_v8i32_v8i32(<8 x i32> %trigger, <8 x i32>* %addr, <8 x i32> %
|
|||
; AVX512F-NEXT: vzeroupper
|
||||
; AVX512F-NEXT: retq
|
||||
;
|
||||
; AVX512VLBW-LABEL: store_v8i32_v8i32:
|
||||
; AVX512VLBW: ## %bb.0:
|
||||
; AVX512VLBW-NEXT: vptestnmd %ymm0, %ymm0, %k1
|
||||
; AVX512VLBW-NEXT: vmovdqu32 %ymm1, (%rdi) {%k1}
|
||||
; AVX512VLBW-NEXT: vzeroupper
|
||||
; AVX512VLBW-NEXT: retq
|
||||
; AVX512VL-LABEL: store_v8i32_v8i32:
|
||||
; AVX512VL: ## %bb.0:
|
||||
; AVX512VL-NEXT: vptestnmd %ymm0, %ymm0, %k1
|
||||
; AVX512VL-NEXT: vmovdqu32 %ymm1, (%rdi) {%k1}
|
||||
; AVX512VL-NEXT: vzeroupper
|
||||
; AVX512VL-NEXT: retq
|
||||
%mask = icmp eq <8 x i32> %trigger, zeroinitializer
|
||||
call void @llvm.masked.store.v8i32.p0v8i32(<8 x i32> %val, <8 x i32>* %addr, i32 4, <8 x i1> %mask)
|
||||
ret void
|
||||
|
@ -1702,6 +1754,82 @@ define void @store_v8i16_v8i16(<8 x i16> %trigger, <8 x i16>* %addr, <8 x i16> %
|
|||
; AVX512F-NEXT: vzeroupper
|
||||
; AVX512F-NEXT: retq
|
||||
;
|
||||
; AVX512VLDQ-LABEL: store_v8i16_v8i16:
|
||||
; AVX512VLDQ: ## %bb.0:
|
||||
; AVX512VLDQ-NEXT: vpxor %xmm2, %xmm2, %xmm2
|
||||
; AVX512VLDQ-NEXT: vpcmpeqw %xmm2, %xmm0, %xmm2
|
||||
; AVX512VLDQ-NEXT: vpmovsxwd %xmm2, %ymm2
|
||||
; AVX512VLDQ-NEXT: vpmovd2m %ymm2, %k0
|
||||
; AVX512VLDQ-NEXT: kmovw %k0, %eax
|
||||
; AVX512VLDQ-NEXT: testb $1, %al
|
||||
; AVX512VLDQ-NEXT: je LBB13_2
|
||||
; AVX512VLDQ-NEXT: ## %bb.1: ## %cond.store
|
||||
; AVX512VLDQ-NEXT: vpextrw $0, %xmm1, (%rdi)
|
||||
; AVX512VLDQ-NEXT: LBB13_2: ## %else
|
||||
; AVX512VLDQ-NEXT: kshiftrb $1, %k0, %k0
|
||||
; AVX512VLDQ-NEXT: kmovw %k0, %eax
|
||||
; AVX512VLDQ-NEXT: testb $1, %al
|
||||
; AVX512VLDQ-NEXT: je LBB13_4
|
||||
; AVX512VLDQ-NEXT: ## %bb.3: ## %cond.store1
|
||||
; AVX512VLDQ-NEXT: vpextrw $1, %xmm1, 2(%rdi)
|
||||
; AVX512VLDQ-NEXT: LBB13_4: ## %else2
|
||||
; AVX512VLDQ-NEXT: vpxor %xmm2, %xmm2, %xmm2
|
||||
; AVX512VLDQ-NEXT: vpcmpeqw %xmm2, %xmm0, %xmm2
|
||||
; AVX512VLDQ-NEXT: vpmovsxwd %xmm2, %ymm2
|
||||
; AVX512VLDQ-NEXT: vpmovd2m %ymm2, %k0
|
||||
; AVX512VLDQ-NEXT: kshiftrb $2, %k0, %k1
|
||||
; AVX512VLDQ-NEXT: kmovw %k1, %eax
|
||||
; AVX512VLDQ-NEXT: testb $1, %al
|
||||
; AVX512VLDQ-NEXT: je LBB13_6
|
||||
; AVX512VLDQ-NEXT: ## %bb.5: ## %cond.store3
|
||||
; AVX512VLDQ-NEXT: vpextrw $2, %xmm1, 4(%rdi)
|
||||
; AVX512VLDQ-NEXT: LBB13_6: ## %else4
|
||||
; AVX512VLDQ-NEXT: kshiftrb $3, %k0, %k0
|
||||
; AVX512VLDQ-NEXT: kmovw %k0, %eax
|
||||
; AVX512VLDQ-NEXT: testb $1, %al
|
||||
; AVX512VLDQ-NEXT: je LBB13_8
|
||||
; AVX512VLDQ-NEXT: ## %bb.7: ## %cond.store5
|
||||
; AVX512VLDQ-NEXT: vpextrw $3, %xmm1, 6(%rdi)
|
||||
; AVX512VLDQ-NEXT: LBB13_8: ## %else6
|
||||
; AVX512VLDQ-NEXT: vpxor %xmm2, %xmm2, %xmm2
|
||||
; AVX512VLDQ-NEXT: vpcmpeqw %xmm2, %xmm0, %xmm2
|
||||
; AVX512VLDQ-NEXT: vpmovsxwd %xmm2, %ymm2
|
||||
; AVX512VLDQ-NEXT: vpmovd2m %ymm2, %k0
|
||||
; AVX512VLDQ-NEXT: kshiftrb $4, %k0, %k1
|
||||
; AVX512VLDQ-NEXT: kmovw %k1, %eax
|
||||
; AVX512VLDQ-NEXT: testb $1, %al
|
||||
; AVX512VLDQ-NEXT: je LBB13_10
|
||||
; AVX512VLDQ-NEXT: ## %bb.9: ## %cond.store7
|
||||
; AVX512VLDQ-NEXT: vpextrw $4, %xmm1, 8(%rdi)
|
||||
; AVX512VLDQ-NEXT: LBB13_10: ## %else8
|
||||
; AVX512VLDQ-NEXT: kshiftrb $5, %k0, %k0
|
||||
; AVX512VLDQ-NEXT: kmovw %k0, %eax
|
||||
; AVX512VLDQ-NEXT: testb $1, %al
|
||||
; AVX512VLDQ-NEXT: je LBB13_12
|
||||
; AVX512VLDQ-NEXT: ## %bb.11: ## %cond.store9
|
||||
; AVX512VLDQ-NEXT: vpextrw $5, %xmm1, 10(%rdi)
|
||||
; AVX512VLDQ-NEXT: LBB13_12: ## %else10
|
||||
; AVX512VLDQ-NEXT: vpxor %xmm2, %xmm2, %xmm2
|
||||
; AVX512VLDQ-NEXT: vpcmpeqw %xmm2, %xmm0, %xmm0
|
||||
; AVX512VLDQ-NEXT: vpmovsxwd %xmm0, %ymm0
|
||||
; AVX512VLDQ-NEXT: vpmovd2m %ymm0, %k0
|
||||
; AVX512VLDQ-NEXT: kshiftrb $6, %k0, %k1
|
||||
; AVX512VLDQ-NEXT: kmovw %k1, %eax
|
||||
; AVX512VLDQ-NEXT: testb $1, %al
|
||||
; AVX512VLDQ-NEXT: je LBB13_14
|
||||
; AVX512VLDQ-NEXT: ## %bb.13: ## %cond.store11
|
||||
; AVX512VLDQ-NEXT: vpextrw $6, %xmm1, 12(%rdi)
|
||||
; AVX512VLDQ-NEXT: LBB13_14: ## %else12
|
||||
; AVX512VLDQ-NEXT: kshiftrb $7, %k0, %k0
|
||||
; AVX512VLDQ-NEXT: kmovw %k0, %eax
|
||||
; AVX512VLDQ-NEXT: testb $1, %al
|
||||
; AVX512VLDQ-NEXT: je LBB13_16
|
||||
; AVX512VLDQ-NEXT: ## %bb.15: ## %cond.store13
|
||||
; AVX512VLDQ-NEXT: vpextrw $7, %xmm1, 14(%rdi)
|
||||
; AVX512VLDQ-NEXT: LBB13_16: ## %else14
|
||||
; AVX512VLDQ-NEXT: vzeroupper
|
||||
; AVX512VLDQ-NEXT: retq
|
||||
;
|
||||
; AVX512VLBW-LABEL: store_v8i16_v8i16:
|
||||
; AVX512VLBW: ## %bb.0:
|
||||
; AVX512VLBW-NEXT: vptestnmw %xmm0, %xmm0, %k1
|
||||
|
@ -2376,6 +2504,162 @@ define void @store_v16i16_v16i16(<16 x i16> %trigger, <16 x i16>* %addr, <16 x i
|
|||
; AVX512F-NEXT: vzeroupper
|
||||
; AVX512F-NEXT: retq
|
||||
;
|
||||
; AVX512VLDQ-LABEL: store_v16i16_v16i16:
|
||||
; AVX512VLDQ: ## %bb.0:
|
||||
; AVX512VLDQ-NEXT: vpxor %xmm2, %xmm2, %xmm2
|
||||
; AVX512VLDQ-NEXT: vpcmpeqw %ymm2, %ymm0, %ymm2
|
||||
; AVX512VLDQ-NEXT: vpmovsxwd %ymm2, %zmm2
|
||||
; AVX512VLDQ-NEXT: vpmovd2m %zmm2, %k0
|
||||
; AVX512VLDQ-NEXT: kmovw %k0, %eax
|
||||
; AVX512VLDQ-NEXT: testb $1, %al
|
||||
; AVX512VLDQ-NEXT: je LBB14_2
|
||||
; AVX512VLDQ-NEXT: ## %bb.1: ## %cond.store
|
||||
; AVX512VLDQ-NEXT: vpextrw $0, %xmm1, (%rdi)
|
||||
; AVX512VLDQ-NEXT: LBB14_2: ## %else
|
||||
; AVX512VLDQ-NEXT: kshiftrw $1, %k0, %k0
|
||||
; AVX512VLDQ-NEXT: kmovw %k0, %eax
|
||||
; AVX512VLDQ-NEXT: testb $1, %al
|
||||
; AVX512VLDQ-NEXT: je LBB14_4
|
||||
; AVX512VLDQ-NEXT: ## %bb.3: ## %cond.store1
|
||||
; AVX512VLDQ-NEXT: vpextrw $1, %xmm1, 2(%rdi)
|
||||
; AVX512VLDQ-NEXT: LBB14_4: ## %else2
|
||||
; AVX512VLDQ-NEXT: vpxor %xmm2, %xmm2, %xmm2
|
||||
; AVX512VLDQ-NEXT: vpcmpeqw %ymm2, %ymm0, %ymm2
|
||||
; AVX512VLDQ-NEXT: vpmovsxwd %ymm2, %zmm2
|
||||
; AVX512VLDQ-NEXT: vpmovd2m %zmm2, %k0
|
||||
; AVX512VLDQ-NEXT: kshiftrw $2, %k0, %k1
|
||||
; AVX512VLDQ-NEXT: kmovw %k1, %eax
|
||||
; AVX512VLDQ-NEXT: testb $1, %al
|
||||
; AVX512VLDQ-NEXT: je LBB14_6
|
||||
; AVX512VLDQ-NEXT: ## %bb.5: ## %cond.store3
|
||||
; AVX512VLDQ-NEXT: vpextrw $2, %xmm1, 4(%rdi)
|
||||
; AVX512VLDQ-NEXT: LBB14_6: ## %else4
|
||||
; AVX512VLDQ-NEXT: kshiftrw $3, %k0, %k0
|
||||
; AVX512VLDQ-NEXT: kmovw %k0, %eax
|
||||
; AVX512VLDQ-NEXT: testb $1, %al
|
||||
; AVX512VLDQ-NEXT: je LBB14_8
|
||||
; AVX512VLDQ-NEXT: ## %bb.7: ## %cond.store5
|
||||
; AVX512VLDQ-NEXT: vpextrw $3, %xmm1, 6(%rdi)
|
||||
; AVX512VLDQ-NEXT: LBB14_8: ## %else6
|
||||
; AVX512VLDQ-NEXT: vpxor %xmm2, %xmm2, %xmm2
|
||||
; AVX512VLDQ-NEXT: vpcmpeqw %ymm2, %ymm0, %ymm2
|
||||
; AVX512VLDQ-NEXT: vpmovsxwd %ymm2, %zmm2
|
||||
; AVX512VLDQ-NEXT: vpmovd2m %zmm2, %k0
|
||||
; AVX512VLDQ-NEXT: kshiftrw $4, %k0, %k1
|
||||
; AVX512VLDQ-NEXT: kmovw %k1, %eax
|
||||
; AVX512VLDQ-NEXT: testb $1, %al
|
||||
; AVX512VLDQ-NEXT: je LBB14_10
|
||||
; AVX512VLDQ-NEXT: ## %bb.9: ## %cond.store7
|
||||
; AVX512VLDQ-NEXT: vpextrw $4, %xmm1, 8(%rdi)
|
||||
; AVX512VLDQ-NEXT: LBB14_10: ## %else8
|
||||
; AVX512VLDQ-NEXT: kshiftrw $5, %k0, %k0
|
||||
; AVX512VLDQ-NEXT: kmovw %k0, %eax
|
||||
; AVX512VLDQ-NEXT: testb $1, %al
|
||||
; AVX512VLDQ-NEXT: je LBB14_12
|
||||
; AVX512VLDQ-NEXT: ## %bb.11: ## %cond.store9
|
||||
; AVX512VLDQ-NEXT: vpextrw $5, %xmm1, 10(%rdi)
|
||||
; AVX512VLDQ-NEXT: LBB14_12: ## %else10
|
||||
; AVX512VLDQ-NEXT: vpxor %xmm2, %xmm2, %xmm2
|
||||
; AVX512VLDQ-NEXT: vpcmpeqw %ymm2, %ymm0, %ymm2
|
||||
; AVX512VLDQ-NEXT: vpmovsxwd %ymm2, %zmm2
|
||||
; AVX512VLDQ-NEXT: vpmovd2m %zmm2, %k0
|
||||
; AVX512VLDQ-NEXT: kshiftrw $6, %k0, %k1
|
||||
; AVX512VLDQ-NEXT: kmovw %k1, %eax
|
||||
; AVX512VLDQ-NEXT: testb $1, %al
|
||||
; AVX512VLDQ-NEXT: je LBB14_14
|
||||
; AVX512VLDQ-NEXT: ## %bb.13: ## %cond.store11
|
||||
; AVX512VLDQ-NEXT: vpextrw $6, %xmm1, 12(%rdi)
|
||||
; AVX512VLDQ-NEXT: LBB14_14: ## %else12
|
||||
; AVX512VLDQ-NEXT: kshiftrw $7, %k0, %k0
|
||||
; AVX512VLDQ-NEXT: kmovw %k0, %eax
|
||||
; AVX512VLDQ-NEXT: testb $1, %al
|
||||
; AVX512VLDQ-NEXT: je LBB14_16
|
||||
; AVX512VLDQ-NEXT: ## %bb.15: ## %cond.store13
|
||||
; AVX512VLDQ-NEXT: vpextrw $7, %xmm1, 14(%rdi)
|
||||
; AVX512VLDQ-NEXT: LBB14_16: ## %else14
|
||||
; AVX512VLDQ-NEXT: vpxor %xmm2, %xmm2, %xmm2
|
||||
; AVX512VLDQ-NEXT: vpcmpeqw %ymm2, %ymm0, %ymm2
|
||||
; AVX512VLDQ-NEXT: vpmovsxwd %ymm2, %zmm2
|
||||
; AVX512VLDQ-NEXT: vpmovd2m %zmm2, %k0
|
||||
; AVX512VLDQ-NEXT: kshiftrw $8, %k0, %k1
|
||||
; AVX512VLDQ-NEXT: kmovw %k1, %eax
|
||||
; AVX512VLDQ-NEXT: testb $1, %al
|
||||
; AVX512VLDQ-NEXT: je LBB14_18
|
||||
; AVX512VLDQ-NEXT: ## %bb.17: ## %cond.store15
|
||||
; AVX512VLDQ-NEXT: vextracti128 $1, %ymm1, %xmm2
|
||||
; AVX512VLDQ-NEXT: vpextrw $0, %xmm2, 16(%rdi)
|
||||
; AVX512VLDQ-NEXT: LBB14_18: ## %else16
|
||||
; AVX512VLDQ-NEXT: kshiftrw $9, %k0, %k0
|
||||
; AVX512VLDQ-NEXT: kmovw %k0, %eax
|
||||
; AVX512VLDQ-NEXT: testb $1, %al
|
||||
; AVX512VLDQ-NEXT: je LBB14_20
|
||||
; AVX512VLDQ-NEXT: ## %bb.19: ## %cond.store17
|
||||
; AVX512VLDQ-NEXT: vextracti128 $1, %ymm1, %xmm2
|
||||
; AVX512VLDQ-NEXT: vpextrw $1, %xmm2, 18(%rdi)
|
||||
; AVX512VLDQ-NEXT: LBB14_20: ## %else18
|
||||
; AVX512VLDQ-NEXT: vpxor %xmm2, %xmm2, %xmm2
|
||||
; AVX512VLDQ-NEXT: vpcmpeqw %ymm2, %ymm0, %ymm2
|
||||
; AVX512VLDQ-NEXT: vpmovsxwd %ymm2, %zmm2
|
||||
; AVX512VLDQ-NEXT: vpmovd2m %zmm2, %k0
|
||||
; AVX512VLDQ-NEXT: kshiftrw $10, %k0, %k1
|
||||
; AVX512VLDQ-NEXT: kmovw %k1, %eax
|
||||
; AVX512VLDQ-NEXT: testb $1, %al
|
||||
; AVX512VLDQ-NEXT: je LBB14_22
|
||||
; AVX512VLDQ-NEXT: ## %bb.21: ## %cond.store19
|
||||
; AVX512VLDQ-NEXT: vextracti128 $1, %ymm1, %xmm2
|
||||
; AVX512VLDQ-NEXT: vpextrw $2, %xmm2, 20(%rdi)
|
||||
; AVX512VLDQ-NEXT: LBB14_22: ## %else20
|
||||
; AVX512VLDQ-NEXT: kshiftrw $11, %k0, %k0
|
||||
; AVX512VLDQ-NEXT: kmovw %k0, %eax
|
||||
; AVX512VLDQ-NEXT: testb $1, %al
|
||||
; AVX512VLDQ-NEXT: je LBB14_24
|
||||
; AVX512VLDQ-NEXT: ## %bb.23: ## %cond.store21
|
||||
; AVX512VLDQ-NEXT: vextracti128 $1, %ymm1, %xmm2
|
||||
; AVX512VLDQ-NEXT: vpextrw $3, %xmm2, 22(%rdi)
|
||||
; AVX512VLDQ-NEXT: LBB14_24: ## %else22
|
||||
; AVX512VLDQ-NEXT: vpxor %xmm2, %xmm2, %xmm2
|
||||
; AVX512VLDQ-NEXT: vpcmpeqw %ymm2, %ymm0, %ymm2
|
||||
; AVX512VLDQ-NEXT: vpmovsxwd %ymm2, %zmm2
|
||||
; AVX512VLDQ-NEXT: vpmovd2m %zmm2, %k0
|
||||
; AVX512VLDQ-NEXT: kshiftrw $12, %k0, %k1
|
||||
; AVX512VLDQ-NEXT: kmovw %k1, %eax
|
||||
; AVX512VLDQ-NEXT: testb $1, %al
|
||||
; AVX512VLDQ-NEXT: je LBB14_26
|
||||
; AVX512VLDQ-NEXT: ## %bb.25: ## %cond.store23
|
||||
; AVX512VLDQ-NEXT: vextracti128 $1, %ymm1, %xmm2
|
||||
; AVX512VLDQ-NEXT: vpextrw $4, %xmm2, 24(%rdi)
|
||||
; AVX512VLDQ-NEXT: LBB14_26: ## %else24
|
||||
; AVX512VLDQ-NEXT: kshiftrw $13, %k0, %k0
|
||||
; AVX512VLDQ-NEXT: kmovw %k0, %eax
|
||||
; AVX512VLDQ-NEXT: testb $1, %al
|
||||
; AVX512VLDQ-NEXT: je LBB14_28
|
||||
; AVX512VLDQ-NEXT: ## %bb.27: ## %cond.store25
|
||||
; AVX512VLDQ-NEXT: vextracti128 $1, %ymm1, %xmm2
|
||||
; AVX512VLDQ-NEXT: vpextrw $5, %xmm2, 26(%rdi)
|
||||
; AVX512VLDQ-NEXT: LBB14_28: ## %else26
|
||||
; AVX512VLDQ-NEXT: vpxor %xmm2, %xmm2, %xmm2
|
||||
; AVX512VLDQ-NEXT: vpcmpeqw %ymm2, %ymm0, %ymm0
|
||||
; AVX512VLDQ-NEXT: vpmovsxwd %ymm0, %zmm0
|
||||
; AVX512VLDQ-NEXT: vpmovd2m %zmm0, %k0
|
||||
; AVX512VLDQ-NEXT: kshiftrw $14, %k0, %k1
|
||||
; AVX512VLDQ-NEXT: kmovw %k1, %eax
|
||||
; AVX512VLDQ-NEXT: testb $1, %al
|
||||
; AVX512VLDQ-NEXT: je LBB14_30
|
||||
; AVX512VLDQ-NEXT: ## %bb.29: ## %cond.store27
|
||||
; AVX512VLDQ-NEXT: vextracti128 $1, %ymm1, %xmm0
|
||||
; AVX512VLDQ-NEXT: vpextrw $6, %xmm0, 28(%rdi)
|
||||
; AVX512VLDQ-NEXT: LBB14_30: ## %else28
|
||||
; AVX512VLDQ-NEXT: kshiftrw $15, %k0, %k0
|
||||
; AVX512VLDQ-NEXT: kmovw %k0, %eax
|
||||
; AVX512VLDQ-NEXT: testb $1, %al
|
||||
; AVX512VLDQ-NEXT: je LBB14_32
|
||||
; AVX512VLDQ-NEXT: ## %bb.31: ## %cond.store29
|
||||
; AVX512VLDQ-NEXT: vextracti128 $1, %ymm1, %xmm0
|
||||
; AVX512VLDQ-NEXT: vpextrw $7, %xmm0, 30(%rdi)
|
||||
; AVX512VLDQ-NEXT: LBB14_32: ## %else30
|
||||
; AVX512VLDQ-NEXT: vzeroupper
|
||||
; AVX512VLDQ-NEXT: retq
|
||||
;
|
||||
; AVX512VLBW-LABEL: store_v16i16_v16i16:
|
||||
; AVX512VLBW: ## %bb.0:
|
||||
; AVX512VLBW-NEXT: vptestnmw %ymm0, %ymm0, %k1
|
||||
|
@ -2908,6 +3192,154 @@ define void @store_v16i8_v16i8(<16 x i8> %trigger, <16 x i8>* %addr, <16 x i8> %
|
|||
; AVX512F-NEXT: vzeroupper
|
||||
; AVX512F-NEXT: retq
|
||||
;
|
||||
; AVX512VLDQ-LABEL: store_v16i8_v16i8:
|
||||
; AVX512VLDQ: ## %bb.0:
|
||||
; AVX512VLDQ-NEXT: vpxor %xmm2, %xmm2, %xmm2
|
||||
; AVX512VLDQ-NEXT: vpcmpeqb %xmm2, %xmm0, %xmm2
|
||||
; AVX512VLDQ-NEXT: vpmovsxbd %xmm2, %zmm2
|
||||
; AVX512VLDQ-NEXT: vpmovd2m %zmm2, %k0
|
||||
; AVX512VLDQ-NEXT: kmovw %k0, %eax
|
||||
; AVX512VLDQ-NEXT: testb $1, %al
|
||||
; AVX512VLDQ-NEXT: je LBB15_2
|
||||
; AVX512VLDQ-NEXT: ## %bb.1: ## %cond.store
|
||||
; AVX512VLDQ-NEXT: vpextrb $0, %xmm1, (%rdi)
|
||||
; AVX512VLDQ-NEXT: LBB15_2: ## %else
|
||||
; AVX512VLDQ-NEXT: kshiftrw $1, %k0, %k0
|
||||
; AVX512VLDQ-NEXT: kmovw %k0, %eax
|
||||
; AVX512VLDQ-NEXT: testb $1, %al
|
||||
; AVX512VLDQ-NEXT: je LBB15_4
|
||||
; AVX512VLDQ-NEXT: ## %bb.3: ## %cond.store1
|
||||
; AVX512VLDQ-NEXT: vpextrb $1, %xmm1, 1(%rdi)
|
||||
; AVX512VLDQ-NEXT: LBB15_4: ## %else2
|
||||
; AVX512VLDQ-NEXT: vpxor %xmm2, %xmm2, %xmm2
|
||||
; AVX512VLDQ-NEXT: vpcmpeqb %xmm2, %xmm0, %xmm2
|
||||
; AVX512VLDQ-NEXT: vpmovsxbd %xmm2, %zmm2
|
||||
; AVX512VLDQ-NEXT: vpmovd2m %zmm2, %k0
|
||||
; AVX512VLDQ-NEXT: kshiftrw $2, %k0, %k1
|
||||
; AVX512VLDQ-NEXT: kmovw %k1, %eax
|
||||
; AVX512VLDQ-NEXT: testb $1, %al
|
||||
; AVX512VLDQ-NEXT: je LBB15_6
|
||||
; AVX512VLDQ-NEXT: ## %bb.5: ## %cond.store3
|
||||
; AVX512VLDQ-NEXT: vpextrb $2, %xmm1, 2(%rdi)
|
||||
; AVX512VLDQ-NEXT: LBB15_6: ## %else4
|
||||
; AVX512VLDQ-NEXT: kshiftrw $3, %k0, %k0
|
||||
; AVX512VLDQ-NEXT: kmovw %k0, %eax
|
||||
; AVX512VLDQ-NEXT: testb $1, %al
|
||||
; AVX512VLDQ-NEXT: je LBB15_8
|
||||
; AVX512VLDQ-NEXT: ## %bb.7: ## %cond.store5
|
||||
; AVX512VLDQ-NEXT: vpextrb $3, %xmm1, 3(%rdi)
|
||||
; AVX512VLDQ-NEXT: LBB15_8: ## %else6
|
||||
; AVX512VLDQ-NEXT: vpxor %xmm2, %xmm2, %xmm2
|
||||
; AVX512VLDQ-NEXT: vpcmpeqb %xmm2, %xmm0, %xmm2
|
||||
; AVX512VLDQ-NEXT: vpmovsxbd %xmm2, %zmm2
|
||||
; AVX512VLDQ-NEXT: vpmovd2m %zmm2, %k0
|
||||
; AVX512VLDQ-NEXT: kshiftrw $4, %k0, %k1
|
||||
; AVX512VLDQ-NEXT: kmovw %k1, %eax
|
||||
; AVX512VLDQ-NEXT: testb $1, %al
|
||||
; AVX512VLDQ-NEXT: je LBB15_10
|
||||
; AVX512VLDQ-NEXT: ## %bb.9: ## %cond.store7
|
||||
; AVX512VLDQ-NEXT: vpextrb $4, %xmm1, 4(%rdi)
|
||||
; AVX512VLDQ-NEXT: LBB15_10: ## %else8
|
||||
; AVX512VLDQ-NEXT: kshiftrw $5, %k0, %k0
|
||||
; AVX512VLDQ-NEXT: kmovw %k0, %eax
|
||||
; AVX512VLDQ-NEXT: testb $1, %al
|
||||
; AVX512VLDQ-NEXT: je LBB15_12
|
||||
; AVX512VLDQ-NEXT: ## %bb.11: ## %cond.store9
|
||||
; AVX512VLDQ-NEXT: vpextrb $5, %xmm1, 5(%rdi)
|
||||
; AVX512VLDQ-NEXT: LBB15_12: ## %else10
|
||||
; AVX512VLDQ-NEXT: vpxor %xmm2, %xmm2, %xmm2
|
||||
; AVX512VLDQ-NEXT: vpcmpeqb %xmm2, %xmm0, %xmm2
|
||||
; AVX512VLDQ-NEXT: vpmovsxbd %xmm2, %zmm2
|
||||
; AVX512VLDQ-NEXT: vpmovd2m %zmm2, %k0
|
||||
; AVX512VLDQ-NEXT: kshiftrw $6, %k0, %k1
|
||||
; AVX512VLDQ-NEXT: kmovw %k1, %eax
|
||||
; AVX512VLDQ-NEXT: testb $1, %al
|
||||
; AVX512VLDQ-NEXT: je LBB15_14
|
||||
; AVX512VLDQ-NEXT: ## %bb.13: ## %cond.store11
|
||||
; AVX512VLDQ-NEXT: vpextrb $6, %xmm1, 6(%rdi)
|
||||
; AVX512VLDQ-NEXT: LBB15_14: ## %else12
|
||||
; AVX512VLDQ-NEXT: kshiftrw $7, %k0, %k0
|
||||
; AVX512VLDQ-NEXT: kmovw %k0, %eax
|
||||
; AVX512VLDQ-NEXT: testb $1, %al
|
||||
; AVX512VLDQ-NEXT: je LBB15_16
|
||||
; AVX512VLDQ-NEXT: ## %bb.15: ## %cond.store13
|
||||
; AVX512VLDQ-NEXT: vpextrb $7, %xmm1, 7(%rdi)
|
||||
; AVX512VLDQ-NEXT: LBB15_16: ## %else14
|
||||
; AVX512VLDQ-NEXT: vpxor %xmm2, %xmm2, %xmm2
|
||||
; AVX512VLDQ-NEXT: vpcmpeqb %xmm2, %xmm0, %xmm2
|
||||
; AVX512VLDQ-NEXT: vpmovsxbd %xmm2, %zmm2
|
||||
; AVX512VLDQ-NEXT: vpmovd2m %zmm2, %k0
|
||||
; AVX512VLDQ-NEXT: kshiftrw $8, %k0, %k1
|
||||
; AVX512VLDQ-NEXT: kmovw %k1, %eax
|
||||
; AVX512VLDQ-NEXT: testb $1, %al
|
||||
; AVX512VLDQ-NEXT: je LBB15_18
|
||||
; AVX512VLDQ-NEXT: ## %bb.17: ## %cond.store15
|
||||
; AVX512VLDQ-NEXT: vpextrb $8, %xmm1, 8(%rdi)
|
||||
; AVX512VLDQ-NEXT: LBB15_18: ## %else16
|
||||
; AVX512VLDQ-NEXT: kshiftrw $9, %k0, %k0
|
||||
; AVX512VLDQ-NEXT: kmovw %k0, %eax
|
||||
; AVX512VLDQ-NEXT: testb $1, %al
|
||||
; AVX512VLDQ-NEXT: je LBB15_20
|
||||
; AVX512VLDQ-NEXT: ## %bb.19: ## %cond.store17
|
||||
; AVX512VLDQ-NEXT: vpextrb $9, %xmm1, 9(%rdi)
|
||||
; AVX512VLDQ-NEXT: LBB15_20: ## %else18
|
||||
; AVX512VLDQ-NEXT: vpxor %xmm2, %xmm2, %xmm2
|
||||
; AVX512VLDQ-NEXT: vpcmpeqb %xmm2, %xmm0, %xmm2
|
||||
; AVX512VLDQ-NEXT: vpmovsxbd %xmm2, %zmm2
|
||||
; AVX512VLDQ-NEXT: vpmovd2m %zmm2, %k0
|
||||
; AVX512VLDQ-NEXT: kshiftrw $10, %k0, %k1
|
||||
; AVX512VLDQ-NEXT: kmovw %k1, %eax
|
||||
; AVX512VLDQ-NEXT: testb $1, %al
|
||||
; AVX512VLDQ-NEXT: je LBB15_22
|
||||
; AVX512VLDQ-NEXT: ## %bb.21: ## %cond.store19
|
||||
; AVX512VLDQ-NEXT: vpextrb $10, %xmm1, 10(%rdi)
|
||||
; AVX512VLDQ-NEXT: LBB15_22: ## %else20
|
||||
; AVX512VLDQ-NEXT: kshiftrw $11, %k0, %k0
|
||||
; AVX512VLDQ-NEXT: kmovw %k0, %eax
|
||||
; AVX512VLDQ-NEXT: testb $1, %al
|
||||
; AVX512VLDQ-NEXT: je LBB15_24
|
||||
; AVX512VLDQ-NEXT: ## %bb.23: ## %cond.store21
|
||||
; AVX512VLDQ-NEXT: vpextrb $11, %xmm1, 11(%rdi)
|
||||
; AVX512VLDQ-NEXT: LBB15_24: ## %else22
|
||||
; AVX512VLDQ-NEXT: vpxor %xmm2, %xmm2, %xmm2
|
||||
; AVX512VLDQ-NEXT: vpcmpeqb %xmm2, %xmm0, %xmm2
|
||||
; AVX512VLDQ-NEXT: vpmovsxbd %xmm2, %zmm2
|
||||
; AVX512VLDQ-NEXT: vpmovd2m %zmm2, %k0
|
||||
; AVX512VLDQ-NEXT: kshiftrw $12, %k0, %k1
|
||||
; AVX512VLDQ-NEXT: kmovw %k1, %eax
|
||||
; AVX512VLDQ-NEXT: testb $1, %al
|
||||
; AVX512VLDQ-NEXT: je LBB15_26
|
||||
; AVX512VLDQ-NEXT: ## %bb.25: ## %cond.store23
|
||||
; AVX512VLDQ-NEXT: vpextrb $12, %xmm1, 12(%rdi)
|
||||
; AVX512VLDQ-NEXT: LBB15_26: ## %else24
|
||||
; AVX512VLDQ-NEXT: kshiftrw $13, %k0, %k0
|
||||
; AVX512VLDQ-NEXT: kmovw %k0, %eax
|
||||
; AVX512VLDQ-NEXT: testb $1, %al
|
||||
; AVX512VLDQ-NEXT: je LBB15_28
|
||||
; AVX512VLDQ-NEXT: ## %bb.27: ## %cond.store25
|
||||
; AVX512VLDQ-NEXT: vpextrb $13, %xmm1, 13(%rdi)
|
||||
; AVX512VLDQ-NEXT: LBB15_28: ## %else26
|
||||
; AVX512VLDQ-NEXT: vpxor %xmm2, %xmm2, %xmm2
|
||||
; AVX512VLDQ-NEXT: vpcmpeqb %xmm2, %xmm0, %xmm0
|
||||
; AVX512VLDQ-NEXT: vpmovsxbd %xmm0, %zmm0
|
||||
; AVX512VLDQ-NEXT: vpmovd2m %zmm0, %k0
|
||||
; AVX512VLDQ-NEXT: kshiftrw $14, %k0, %k1
|
||||
; AVX512VLDQ-NEXT: kmovw %k1, %eax
|
||||
; AVX512VLDQ-NEXT: testb $1, %al
|
||||
; AVX512VLDQ-NEXT: je LBB15_30
|
||||
; AVX512VLDQ-NEXT: ## %bb.29: ## %cond.store27
|
||||
; AVX512VLDQ-NEXT: vpextrb $14, %xmm1, 14(%rdi)
|
||||
; AVX512VLDQ-NEXT: LBB15_30: ## %else28
|
||||
; AVX512VLDQ-NEXT: kshiftrw $15, %k0, %k0
|
||||
; AVX512VLDQ-NEXT: kmovw %k0, %eax
|
||||
; AVX512VLDQ-NEXT: testb $1, %al
|
||||
; AVX512VLDQ-NEXT: je LBB15_32
|
||||
; AVX512VLDQ-NEXT: ## %bb.31: ## %cond.store29
|
||||
; AVX512VLDQ-NEXT: vpextrb $15, %xmm1, 15(%rdi)
|
||||
; AVX512VLDQ-NEXT: LBB15_32: ## %else30
|
||||
; AVX512VLDQ-NEXT: vzeroupper
|
||||
; AVX512VLDQ-NEXT: retq
|
||||
;
|
||||
; AVX512VLBW-LABEL: store_v16i8_v16i8:
|
||||
; AVX512VLBW: ## %bb.0:
|
||||
; AVX512VLBW-NEXT: vptestnmb %xmm0, %xmm0, %k1
|
||||
|
@ -4253,6 +4685,337 @@ define void @store_v32i8_v32i8(<32 x i8> %trigger, <32 x i8>* %addr, <32 x i8> %
|
|||
; AVX512F-NEXT: vzeroupper
|
||||
; AVX512F-NEXT: retq
|
||||
;
|
||||
; AVX512VLDQ-LABEL: store_v32i8_v32i8:
|
||||
; AVX512VLDQ: ## %bb.0:
|
||||
; AVX512VLDQ-NEXT: vpxor %xmm2, %xmm2, %xmm2
|
||||
; AVX512VLDQ-NEXT: vpcmpeqb %ymm2, %ymm0, %ymm2
|
||||
; AVX512VLDQ-NEXT: vpmovsxbd %xmm2, %zmm3
|
||||
; AVX512VLDQ-NEXT: vpmovd2m %zmm3, %k0
|
||||
; AVX512VLDQ-NEXT: kmovw %k0, %eax
|
||||
; AVX512VLDQ-NEXT: testb $1, %al
|
||||
; AVX512VLDQ-NEXT: je LBB16_2
|
||||
; AVX512VLDQ-NEXT: ## %bb.1: ## %cond.store
|
||||
; AVX512VLDQ-NEXT: vpextrb $0, %xmm1, (%rdi)
|
||||
; AVX512VLDQ-NEXT: LBB16_2: ## %else
|
||||
; AVX512VLDQ-NEXT: vpmovsxbd %xmm2, %zmm2
|
||||
; AVX512VLDQ-NEXT: vpmovd2m %zmm2, %k0
|
||||
; AVX512VLDQ-NEXT: kshiftrw $1, %k0, %k0
|
||||
; AVX512VLDQ-NEXT: kmovw %k0, %eax
|
||||
; AVX512VLDQ-NEXT: testb $1, %al
|
||||
; AVX512VLDQ-NEXT: je LBB16_4
|
||||
; AVX512VLDQ-NEXT: ## %bb.3: ## %cond.store1
|
||||
; AVX512VLDQ-NEXT: vpextrb $1, %xmm1, 1(%rdi)
|
||||
; AVX512VLDQ-NEXT: LBB16_4: ## %else2
|
||||
; AVX512VLDQ-NEXT: vpxor %xmm2, %xmm2, %xmm2
|
||||
; AVX512VLDQ-NEXT: vpcmpeqb %ymm2, %ymm0, %ymm2
|
||||
; AVX512VLDQ-NEXT: vpmovsxbd %xmm2, %zmm3
|
||||
; AVX512VLDQ-NEXT: vpmovd2m %zmm3, %k0
|
||||
; AVX512VLDQ-NEXT: kshiftrw $2, %k0, %k0
|
||||
; AVX512VLDQ-NEXT: kmovw %k0, %eax
|
||||
; AVX512VLDQ-NEXT: testb $1, %al
|
||||
; AVX512VLDQ-NEXT: je LBB16_6
|
||||
; AVX512VLDQ-NEXT: ## %bb.5: ## %cond.store3
|
||||
; AVX512VLDQ-NEXT: vpextrb $2, %xmm1, 2(%rdi)
|
||||
; AVX512VLDQ-NEXT: LBB16_6: ## %else4
|
||||
; AVX512VLDQ-NEXT: vpmovsxbd %xmm2, %zmm2
|
||||
; AVX512VLDQ-NEXT: vpmovd2m %zmm2, %k0
|
||||
; AVX512VLDQ-NEXT: kshiftrw $3, %k0, %k0
|
||||
; AVX512VLDQ-NEXT: kmovw %k0, %eax
|
||||
; AVX512VLDQ-NEXT: testb $1, %al
|
||||
; AVX512VLDQ-NEXT: je LBB16_8
|
||||
; AVX512VLDQ-NEXT: ## %bb.7: ## %cond.store5
|
||||
; AVX512VLDQ-NEXT: vpextrb $3, %xmm1, 3(%rdi)
|
||||
; AVX512VLDQ-NEXT: LBB16_8: ## %else6
|
||||
; AVX512VLDQ-NEXT: vpxor %xmm2, %xmm2, %xmm2
|
||||
; AVX512VLDQ-NEXT: vpcmpeqb %ymm2, %ymm0, %ymm2
|
||||
; AVX512VLDQ-NEXT: vpmovsxbd %xmm2, %zmm3
|
||||
; AVX512VLDQ-NEXT: vpmovd2m %zmm3, %k0
|
||||
; AVX512VLDQ-NEXT: kshiftrw $4, %k0, %k0
|
||||
; AVX512VLDQ-NEXT: kmovw %k0, %eax
|
||||
; AVX512VLDQ-NEXT: testb $1, %al
|
||||
; AVX512VLDQ-NEXT: je LBB16_10
|
||||
; AVX512VLDQ-NEXT: ## %bb.9: ## %cond.store7
|
||||
; AVX512VLDQ-NEXT: vpextrb $4, %xmm1, 4(%rdi)
|
||||
; AVX512VLDQ-NEXT: LBB16_10: ## %else8
|
||||
; AVX512VLDQ-NEXT: vpmovsxbd %xmm2, %zmm2
|
||||
; AVX512VLDQ-NEXT: vpmovd2m %zmm2, %k0
|
||||
; AVX512VLDQ-NEXT: kshiftrw $5, %k0, %k0
|
||||
; AVX512VLDQ-NEXT: kmovw %k0, %eax
|
||||
; AVX512VLDQ-NEXT: testb $1, %al
|
||||
; AVX512VLDQ-NEXT: je LBB16_12
|
||||
; AVX512VLDQ-NEXT: ## %bb.11: ## %cond.store9
|
||||
; AVX512VLDQ-NEXT: vpextrb $5, %xmm1, 5(%rdi)
|
||||
; AVX512VLDQ-NEXT: LBB16_12: ## %else10
|
||||
; AVX512VLDQ-NEXT: vpxor %xmm2, %xmm2, %xmm2
|
||||
; AVX512VLDQ-NEXT: vpcmpeqb %ymm2, %ymm0, %ymm2
|
||||
; AVX512VLDQ-NEXT: vpmovsxbd %xmm2, %zmm3
|
||||
; AVX512VLDQ-NEXT: vpmovd2m %zmm3, %k0
|
||||
; AVX512VLDQ-NEXT: kshiftrw $6, %k0, %k0
|
||||
; AVX512VLDQ-NEXT: kmovw %k0, %eax
|
||||
; AVX512VLDQ-NEXT: testb $1, %al
|
||||
; AVX512VLDQ-NEXT: je LBB16_14
|
||||
; AVX512VLDQ-NEXT: ## %bb.13: ## %cond.store11
|
||||
; AVX512VLDQ-NEXT: vpextrb $6, %xmm1, 6(%rdi)
|
||||
; AVX512VLDQ-NEXT: LBB16_14: ## %else12
|
||||
; AVX512VLDQ-NEXT: vpmovsxbd %xmm2, %zmm2
|
||||
; AVX512VLDQ-NEXT: vpmovd2m %zmm2, %k0
|
||||
; AVX512VLDQ-NEXT: kshiftrw $7, %k0, %k0
|
||||
; AVX512VLDQ-NEXT: kmovw %k0, %eax
|
||||
; AVX512VLDQ-NEXT: testb $1, %al
|
||||
; AVX512VLDQ-NEXT: je LBB16_16
|
||||
; AVX512VLDQ-NEXT: ## %bb.15: ## %cond.store13
|
||||
; AVX512VLDQ-NEXT: vpextrb $7, %xmm1, 7(%rdi)
|
||||
; AVX512VLDQ-NEXT: LBB16_16: ## %else14
|
||||
; AVX512VLDQ-NEXT: vpxor %xmm2, %xmm2, %xmm2
|
||||
; AVX512VLDQ-NEXT: vpcmpeqb %ymm2, %ymm0, %ymm2
|
||||
; AVX512VLDQ-NEXT: vpmovsxbd %xmm2, %zmm3
|
||||
; AVX512VLDQ-NEXT: vpmovd2m %zmm3, %k0
|
||||
; AVX512VLDQ-NEXT: kshiftrw $8, %k0, %k0
|
||||
; AVX512VLDQ-NEXT: kmovw %k0, %eax
|
||||
; AVX512VLDQ-NEXT: testb $1, %al
|
||||
; AVX512VLDQ-NEXT: je LBB16_18
|
||||
; AVX512VLDQ-NEXT: ## %bb.17: ## %cond.store15
|
||||
; AVX512VLDQ-NEXT: vpextrb $8, %xmm1, 8(%rdi)
|
||||
; AVX512VLDQ-NEXT: LBB16_18: ## %else16
|
||||
; AVX512VLDQ-NEXT: vpmovsxbd %xmm2, %zmm2
|
||||
; AVX512VLDQ-NEXT: vpmovd2m %zmm2, %k0
|
||||
; AVX512VLDQ-NEXT: kshiftrw $9, %k0, %k0
|
||||
; AVX512VLDQ-NEXT: kmovw %k0, %eax
|
||||
; AVX512VLDQ-NEXT: testb $1, %al
|
||||
; AVX512VLDQ-NEXT: je LBB16_20
|
||||
; AVX512VLDQ-NEXT: ## %bb.19: ## %cond.store17
|
||||
; AVX512VLDQ-NEXT: vpextrb $9, %xmm1, 9(%rdi)
|
||||
; AVX512VLDQ-NEXT: LBB16_20: ## %else18
|
||||
; AVX512VLDQ-NEXT: vpxor %xmm2, %xmm2, %xmm2
|
||||
; AVX512VLDQ-NEXT: vpcmpeqb %ymm2, %ymm0, %ymm2
|
||||
; AVX512VLDQ-NEXT: vpmovsxbd %xmm2, %zmm3
|
||||
; AVX512VLDQ-NEXT: vpmovd2m %zmm3, %k0
|
||||
; AVX512VLDQ-NEXT: kshiftrw $10, %k0, %k0
|
||||
; AVX512VLDQ-NEXT: kmovw %k0, %eax
|
||||
; AVX512VLDQ-NEXT: testb $1, %al
|
||||
; AVX512VLDQ-NEXT: je LBB16_22
|
||||
; AVX512VLDQ-NEXT: ## %bb.21: ## %cond.store19
|
||||
; AVX512VLDQ-NEXT: vpextrb $10, %xmm1, 10(%rdi)
|
||||
; AVX512VLDQ-NEXT: LBB16_22: ## %else20
|
||||
; AVX512VLDQ-NEXT: vpmovsxbd %xmm2, %zmm2
|
||||
; AVX512VLDQ-NEXT: vpmovd2m %zmm2, %k0
|
||||
; AVX512VLDQ-NEXT: kshiftrw $11, %k0, %k0
|
||||
; AVX512VLDQ-NEXT: kmovw %k0, %eax
|
||||
; AVX512VLDQ-NEXT: testb $1, %al
|
||||
; AVX512VLDQ-NEXT: je LBB16_24
|
||||
; AVX512VLDQ-NEXT: ## %bb.23: ## %cond.store21
|
||||
; AVX512VLDQ-NEXT: vpextrb $11, %xmm1, 11(%rdi)
|
||||
; AVX512VLDQ-NEXT: LBB16_24: ## %else22
|
||||
; AVX512VLDQ-NEXT: vpxor %xmm2, %xmm2, %xmm2
|
||||
; AVX512VLDQ-NEXT: vpcmpeqb %ymm2, %ymm0, %ymm2
|
||||
; AVX512VLDQ-NEXT: vpmovsxbd %xmm2, %zmm3
|
||||
; AVX512VLDQ-NEXT: vpmovd2m %zmm3, %k0
|
||||
; AVX512VLDQ-NEXT: kshiftrw $12, %k0, %k0
|
||||
; AVX512VLDQ-NEXT: kmovw %k0, %eax
|
||||
; AVX512VLDQ-NEXT: testb $1, %al
|
||||
; AVX512VLDQ-NEXT: je LBB16_26
|
||||
; AVX512VLDQ-NEXT: ## %bb.25: ## %cond.store23
|
||||
; AVX512VLDQ-NEXT: vpextrb $12, %xmm1, 12(%rdi)
|
||||
; AVX512VLDQ-NEXT: LBB16_26: ## %else24
|
||||
; AVX512VLDQ-NEXT: vpmovsxbd %xmm2, %zmm2
|
||||
; AVX512VLDQ-NEXT: vpmovd2m %zmm2, %k0
|
||||
; AVX512VLDQ-NEXT: kshiftrw $13, %k0, %k0
|
||||
; AVX512VLDQ-NEXT: kmovw %k0, %eax
|
||||
; AVX512VLDQ-NEXT: testb $1, %al
|
||||
; AVX512VLDQ-NEXT: je LBB16_28
|
||||
; AVX512VLDQ-NEXT: ## %bb.27: ## %cond.store25
|
||||
; AVX512VLDQ-NEXT: vpextrb $13, %xmm1, 13(%rdi)
|
||||
; AVX512VLDQ-NEXT: LBB16_28: ## %else26
|
||||
; AVX512VLDQ-NEXT: vpxor %xmm2, %xmm2, %xmm2
|
||||
; AVX512VLDQ-NEXT: vpcmpeqb %ymm2, %ymm0, %ymm2
|
||||
; AVX512VLDQ-NEXT: vpmovsxbd %xmm2, %zmm3
|
||||
; AVX512VLDQ-NEXT: vpmovd2m %zmm3, %k0
|
||||
; AVX512VLDQ-NEXT: kshiftrw $14, %k0, %k0
|
||||
; AVX512VLDQ-NEXT: kmovw %k0, %eax
|
||||
; AVX512VLDQ-NEXT: testb $1, %al
|
||||
; AVX512VLDQ-NEXT: je LBB16_30
|
||||
; AVX512VLDQ-NEXT: ## %bb.29: ## %cond.store27
|
||||
; AVX512VLDQ-NEXT: vpextrb $14, %xmm1, 14(%rdi)
|
||||
; AVX512VLDQ-NEXT: LBB16_30: ## %else28
|
||||
; AVX512VLDQ-NEXT: vpmovsxbd %xmm2, %zmm2
|
||||
; AVX512VLDQ-NEXT: vpmovd2m %zmm2, %k0
|
||||
; AVX512VLDQ-NEXT: kshiftrw $15, %k0, %k0
|
||||
; AVX512VLDQ-NEXT: kmovw %k0, %eax
|
||||
; AVX512VLDQ-NEXT: testb $1, %al
|
||||
; AVX512VLDQ-NEXT: je LBB16_32
|
||||
; AVX512VLDQ-NEXT: ## %bb.31: ## %cond.store29
|
||||
; AVX512VLDQ-NEXT: vpextrb $15, %xmm1, 15(%rdi)
|
||||
; AVX512VLDQ-NEXT: LBB16_32: ## %else30
|
||||
; AVX512VLDQ-NEXT: vpxor %xmm2, %xmm2, %xmm2
|
||||
; AVX512VLDQ-NEXT: vpcmpeqb %ymm2, %ymm0, %ymm2
|
||||
; AVX512VLDQ-NEXT: vextracti128 $1, %ymm2, %xmm2
|
||||
; AVX512VLDQ-NEXT: vpmovsxbd %xmm2, %zmm2
|
||||
; AVX512VLDQ-NEXT: vpmovd2m %zmm2, %k0
|
||||
; AVX512VLDQ-NEXT: kmovw %k0, %eax
|
||||
; AVX512VLDQ-NEXT: testb $1, %al
|
||||
; AVX512VLDQ-NEXT: je LBB16_34
|
||||
; AVX512VLDQ-NEXT: ## %bb.33: ## %cond.store31
|
||||
; AVX512VLDQ-NEXT: vextracti128 $1, %ymm1, %xmm2
|
||||
; AVX512VLDQ-NEXT: vpextrb $0, %xmm2, 16(%rdi)
|
||||
; AVX512VLDQ-NEXT: LBB16_34: ## %else32
|
||||
; AVX512VLDQ-NEXT: kshiftrw $1, %k0, %k0
|
||||
; AVX512VLDQ-NEXT: kmovw %k0, %eax
|
||||
; AVX512VLDQ-NEXT: testb $1, %al
|
||||
; AVX512VLDQ-NEXT: je LBB16_36
|
||||
; AVX512VLDQ-NEXT: ## %bb.35: ## %cond.store33
|
||||
; AVX512VLDQ-NEXT: vextracti128 $1, %ymm1, %xmm2
|
||||
; AVX512VLDQ-NEXT: vpextrb $1, %xmm2, 17(%rdi)
|
||||
; AVX512VLDQ-NEXT: LBB16_36: ## %else34
|
||||
; AVX512VLDQ-NEXT: vpxor %xmm2, %xmm2, %xmm2
|
||||
; AVX512VLDQ-NEXT: vpcmpeqb %ymm2, %ymm0, %ymm2
|
||||
; AVX512VLDQ-NEXT: vextracti128 $1, %ymm2, %xmm2
|
||||
; AVX512VLDQ-NEXT: vpmovsxbd %xmm2, %zmm2
|
||||
; AVX512VLDQ-NEXT: vpmovd2m %zmm2, %k0
|
||||
; AVX512VLDQ-NEXT: kshiftrw $2, %k0, %k1
|
||||
; AVX512VLDQ-NEXT: kmovw %k1, %eax
|
||||
; AVX512VLDQ-NEXT: testb $1, %al
|
||||
; AVX512VLDQ-NEXT: je LBB16_38
|
||||
; AVX512VLDQ-NEXT: ## %bb.37: ## %cond.store35
|
||||
; AVX512VLDQ-NEXT: vextracti128 $1, %ymm1, %xmm2
|
||||
; AVX512VLDQ-NEXT: vpextrb $2, %xmm2, 18(%rdi)
|
||||
; AVX512VLDQ-NEXT: LBB16_38: ## %else36
|
||||
; AVX512VLDQ-NEXT: kshiftrw $3, %k0, %k0
|
||||
; AVX512VLDQ-NEXT: kmovw %k0, %eax
|
||||
; AVX512VLDQ-NEXT: testb $1, %al
|
||||
; AVX512VLDQ-NEXT: je LBB16_40
|
||||
; AVX512VLDQ-NEXT: ## %bb.39: ## %cond.store37
|
||||
; AVX512VLDQ-NEXT: vextracti128 $1, %ymm1, %xmm2
|
||||
; AVX512VLDQ-NEXT: vpextrb $3, %xmm2, 19(%rdi)
|
||||
; AVX512VLDQ-NEXT: LBB16_40: ## %else38
|
||||
; AVX512VLDQ-NEXT: vpxor %xmm2, %xmm2, %xmm2
|
||||
; AVX512VLDQ-NEXT: vpcmpeqb %ymm2, %ymm0, %ymm2
|
||||
; AVX512VLDQ-NEXT: vextracti128 $1, %ymm2, %xmm2
|
||||
; AVX512VLDQ-NEXT: vpmovsxbd %xmm2, %zmm2
|
||||
; AVX512VLDQ-NEXT: vpmovd2m %zmm2, %k0
|
||||
; AVX512VLDQ-NEXT: kshiftrw $4, %k0, %k1
|
||||
; AVX512VLDQ-NEXT: kmovw %k1, %eax
|
||||
; AVX512VLDQ-NEXT: testb $1, %al
|
||||
; AVX512VLDQ-NEXT: je LBB16_42
|
||||
; AVX512VLDQ-NEXT: ## %bb.41: ## %cond.store39
|
||||
; AVX512VLDQ-NEXT: vextracti128 $1, %ymm1, %xmm2
|
||||
; AVX512VLDQ-NEXT: vpextrb $4, %xmm2, 20(%rdi)
|
||||
; AVX512VLDQ-NEXT: LBB16_42: ## %else40
|
||||
; AVX512VLDQ-NEXT: kshiftrw $5, %k0, %k0
|
||||
; AVX512VLDQ-NEXT: kmovw %k0, %eax
|
||||
; AVX512VLDQ-NEXT: testb $1, %al
|
||||
; AVX512VLDQ-NEXT: je LBB16_44
|
||||
; AVX512VLDQ-NEXT: ## %bb.43: ## %cond.store41
|
||||
; AVX512VLDQ-NEXT: vextracti128 $1, %ymm1, %xmm2
|
||||
; AVX512VLDQ-NEXT: vpextrb $5, %xmm2, 21(%rdi)
|
||||
; AVX512VLDQ-NEXT: LBB16_44: ## %else42
|
||||
; AVX512VLDQ-NEXT: vpxor %xmm2, %xmm2, %xmm2
|
||||
; AVX512VLDQ-NEXT: vpcmpeqb %ymm2, %ymm0, %ymm2
|
||||
; AVX512VLDQ-NEXT: vextracti128 $1, %ymm2, %xmm2
|
||||
; AVX512VLDQ-NEXT: vpmovsxbd %xmm2, %zmm2
|
||||
; AVX512VLDQ-NEXT: vpmovd2m %zmm2, %k0
|
||||
; AVX512VLDQ-NEXT: kshiftrw $6, %k0, %k1
|
||||
; AVX512VLDQ-NEXT: kmovw %k1, %eax
|
||||
; AVX512VLDQ-NEXT: testb $1, %al
|
||||
; AVX512VLDQ-NEXT: je LBB16_46
|
||||
; AVX512VLDQ-NEXT: ## %bb.45: ## %cond.store43
|
||||
; AVX512VLDQ-NEXT: vextracti128 $1, %ymm1, %xmm2
|
||||
; AVX512VLDQ-NEXT: vpextrb $6, %xmm2, 22(%rdi)
|
||||
; AVX512VLDQ-NEXT: LBB16_46: ## %else44
|
||||
; AVX512VLDQ-NEXT: kshiftrw $7, %k0, %k0
|
||||
; AVX512VLDQ-NEXT: kmovw %k0, %eax
|
||||
; AVX512VLDQ-NEXT: testb $1, %al
|
||||
; AVX512VLDQ-NEXT: je LBB16_48
|
||||
; AVX512VLDQ-NEXT: ## %bb.47: ## %cond.store45
|
||||
; AVX512VLDQ-NEXT: vextracti128 $1, %ymm1, %xmm2
|
||||
; AVX512VLDQ-NEXT: vpextrb $7, %xmm2, 23(%rdi)
|
||||
; AVX512VLDQ-NEXT: LBB16_48: ## %else46
|
||||
; AVX512VLDQ-NEXT: vpxor %xmm2, %xmm2, %xmm2
|
||||
; AVX512VLDQ-NEXT: vpcmpeqb %ymm2, %ymm0, %ymm2
|
||||
; AVX512VLDQ-NEXT: vextracti128 $1, %ymm2, %xmm2
|
||||
; AVX512VLDQ-NEXT: vpmovsxbd %xmm2, %zmm2
|
||||
; AVX512VLDQ-NEXT: vpmovd2m %zmm2, %k0
|
||||
; AVX512VLDQ-NEXT: kshiftrw $8, %k0, %k1
|
||||
; AVX512VLDQ-NEXT: kmovw %k1, %eax
|
||||
; AVX512VLDQ-NEXT: testb $1, %al
|
||||
; AVX512VLDQ-NEXT: je LBB16_50
|
||||
; AVX512VLDQ-NEXT: ## %bb.49: ## %cond.store47
|
||||
; AVX512VLDQ-NEXT: vextracti128 $1, %ymm1, %xmm2
|
||||
; AVX512VLDQ-NEXT: vpextrb $8, %xmm2, 24(%rdi)
|
||||
; AVX512VLDQ-NEXT: LBB16_50: ## %else48
|
||||
; AVX512VLDQ-NEXT: kshiftrw $9, %k0, %k0
|
||||
; AVX512VLDQ-NEXT: kmovw %k0, %eax
|
||||
; AVX512VLDQ-NEXT: testb $1, %al
|
||||
; AVX512VLDQ-NEXT: je LBB16_52
|
||||
; AVX512VLDQ-NEXT: ## %bb.51: ## %cond.store49
|
||||
; AVX512VLDQ-NEXT: vextracti128 $1, %ymm1, %xmm2
|
||||
; AVX512VLDQ-NEXT: vpextrb $9, %xmm2, 25(%rdi)
|
||||
; AVX512VLDQ-NEXT: LBB16_52: ## %else50
|
||||
; AVX512VLDQ-NEXT: vpxor %xmm2, %xmm2, %xmm2
|
||||
; AVX512VLDQ-NEXT: vpcmpeqb %ymm2, %ymm0, %ymm2
|
||||
; AVX512VLDQ-NEXT: vextracti128 $1, %ymm2, %xmm2
|
||||
; AVX512VLDQ-NEXT: vpmovsxbd %xmm2, %zmm2
|
||||
; AVX512VLDQ-NEXT: vpmovd2m %zmm2, %k0
|
||||
; AVX512VLDQ-NEXT: kshiftrw $10, %k0, %k1
|
||||
; AVX512VLDQ-NEXT: kmovw %k1, %eax
|
||||
; AVX512VLDQ-NEXT: testb $1, %al
|
||||
; AVX512VLDQ-NEXT: je LBB16_54
|
||||
; AVX512VLDQ-NEXT: ## %bb.53: ## %cond.store51
|
||||
; AVX512VLDQ-NEXT: vextracti128 $1, %ymm1, %xmm2
|
||||
; AVX512VLDQ-NEXT: vpextrb $10, %xmm2, 26(%rdi)
|
||||
; AVX512VLDQ-NEXT: LBB16_54: ## %else52
|
||||
; AVX512VLDQ-NEXT: kshiftrw $11, %k0, %k0
|
||||
; AVX512VLDQ-NEXT: kmovw %k0, %eax
|
||||
; AVX512VLDQ-NEXT: testb $1, %al
|
||||
; AVX512VLDQ-NEXT: je LBB16_56
|
||||
; AVX512VLDQ-NEXT: ## %bb.55: ## %cond.store53
|
||||
; AVX512VLDQ-NEXT: vextracti128 $1, %ymm1, %xmm2
|
||||
; AVX512VLDQ-NEXT: vpextrb $11, %xmm2, 27(%rdi)
|
||||
; AVX512VLDQ-NEXT: LBB16_56: ## %else54
|
||||
; AVX512VLDQ-NEXT: vpxor %xmm2, %xmm2, %xmm2
|
||||
; AVX512VLDQ-NEXT: vpcmpeqb %ymm2, %ymm0, %ymm2
|
||||
; AVX512VLDQ-NEXT: vextracti128 $1, %ymm2, %xmm2
|
||||
; AVX512VLDQ-NEXT: vpmovsxbd %xmm2, %zmm2
|
||||
; AVX512VLDQ-NEXT: vpmovd2m %zmm2, %k0
|
||||
; AVX512VLDQ-NEXT: kshiftrw $12, %k0, %k1
|
||||
; AVX512VLDQ-NEXT: kmovw %k1, %eax
|
||||
; AVX512VLDQ-NEXT: testb $1, %al
|
||||
; AVX512VLDQ-NEXT: je LBB16_58
|
||||
; AVX512VLDQ-NEXT: ## %bb.57: ## %cond.store55
|
||||
; AVX512VLDQ-NEXT: vextracti128 $1, %ymm1, %xmm2
|
||||
; AVX512VLDQ-NEXT: vpextrb $12, %xmm2, 28(%rdi)
|
||||
; AVX512VLDQ-NEXT: LBB16_58: ## %else56
|
||||
; AVX512VLDQ-NEXT: kshiftrw $13, %k0, %k0
|
||||
; AVX512VLDQ-NEXT: kmovw %k0, %eax
|
||||
; AVX512VLDQ-NEXT: testb $1, %al
|
||||
; AVX512VLDQ-NEXT: je LBB16_60
|
||||
; AVX512VLDQ-NEXT: ## %bb.59: ## %cond.store57
|
||||
; AVX512VLDQ-NEXT: vextracti128 $1, %ymm1, %xmm2
|
||||
; AVX512VLDQ-NEXT: vpextrb $13, %xmm2, 29(%rdi)
|
||||
; AVX512VLDQ-NEXT: LBB16_60: ## %else58
|
||||
; AVX512VLDQ-NEXT: vpxor %xmm2, %xmm2, %xmm2
|
||||
; AVX512VLDQ-NEXT: vpcmpeqb %ymm2, %ymm0, %ymm0
|
||||
; AVX512VLDQ-NEXT: vextracti128 $1, %ymm0, %xmm0
|
||||
; AVX512VLDQ-NEXT: vpmovsxbd %xmm0, %zmm0
|
||||
; AVX512VLDQ-NEXT: vpmovd2m %zmm0, %k0
|
||||
; AVX512VLDQ-NEXT: kshiftrw $14, %k0, %k1
|
||||
; AVX512VLDQ-NEXT: kmovw %k1, %eax
|
||||
; AVX512VLDQ-NEXT: testb $1, %al
|
||||
; AVX512VLDQ-NEXT: je LBB16_62
|
||||
; AVX512VLDQ-NEXT: ## %bb.61: ## %cond.store59
|
||||
; AVX512VLDQ-NEXT: vextracti128 $1, %ymm1, %xmm0
|
||||
; AVX512VLDQ-NEXT: vpextrb $14, %xmm0, 30(%rdi)
|
||||
; AVX512VLDQ-NEXT: LBB16_62: ## %else60
|
||||
; AVX512VLDQ-NEXT: kshiftrw $15, %k0, %k0
|
||||
; AVX512VLDQ-NEXT: kmovw %k0, %eax
|
||||
; AVX512VLDQ-NEXT: testb $1, %al
|
||||
; AVX512VLDQ-NEXT: je LBB16_64
|
||||
; AVX512VLDQ-NEXT: ## %bb.63: ## %cond.store61
|
||||
; AVX512VLDQ-NEXT: vextracti128 $1, %ymm1, %xmm0
|
||||
; AVX512VLDQ-NEXT: vpextrb $15, %xmm0, 31(%rdi)
|
||||
; AVX512VLDQ-NEXT: LBB16_64: ## %else62
|
||||
; AVX512VLDQ-NEXT: vzeroupper
|
||||
; AVX512VLDQ-NEXT: retq
|
||||
;
|
||||
; AVX512VLBW-LABEL: store_v32i8_v32i8:
|
||||
; AVX512VLBW: ## %bb.0:
|
||||
; AVX512VLBW-NEXT: vptestnmb %ymm0, %ymm0, %k1
|
||||
|
@ -4293,11 +5056,11 @@ define void @mstore_constmask_v4i32_v4i32(<4 x i32> %trigger, <4 x i32>* %addr,
|
|||
; AVX512F-NEXT: vzeroupper
|
||||
; AVX512F-NEXT: retq
|
||||
;
|
||||
; AVX512VLBW-LABEL: mstore_constmask_v4i32_v4i32:
|
||||
; AVX512VLBW: ## %bb.0:
|
||||
; AVX512VLBW-NEXT: kxnorw %k0, %k0, %k1
|
||||
; AVX512VLBW-NEXT: vmovdqu32 %xmm1, (%rdi) {%k1}
|
||||
; AVX512VLBW-NEXT: retq
|
||||
; AVX512VL-LABEL: mstore_constmask_v4i32_v4i32:
|
||||
; AVX512VL: ## %bb.0:
|
||||
; AVX512VL-NEXT: kxnorw %k0, %k0, %k1
|
||||
; AVX512VL-NEXT: vmovdqu32 %xmm1, (%rdi) {%k1}
|
||||
; AVX512VL-NEXT: retq
|
||||
%mask = icmp eq <4 x i32> %trigger, zeroinitializer
|
||||
call void @llvm.masked.store.v4i32.p0v4i32(<4 x i32> %val, <4 x i32>* %addr, i32 4, <4 x i1><i1 true, i1 true, i1 true, i1 true>)
|
||||
ret void
|
||||
|
@ -4491,6 +5254,14 @@ define void @masked_store_bool_mask_demand_trunc_sext(<4 x double> %x, <4 x doub
|
|||
; AVX512F-NEXT: vzeroupper
|
||||
; AVX512F-NEXT: retq
|
||||
;
|
||||
; AVX512VLDQ-LABEL: masked_store_bool_mask_demand_trunc_sext:
|
||||
; AVX512VLDQ: ## %bb.0:
|
||||
; AVX512VLDQ-NEXT: vpslld $31, %xmm1, %xmm1
|
||||
; AVX512VLDQ-NEXT: vpmovd2m %xmm1, %k1
|
||||
; AVX512VLDQ-NEXT: vmovupd %ymm0, (%rdi) {%k1}
|
||||
; AVX512VLDQ-NEXT: vzeroupper
|
||||
; AVX512VLDQ-NEXT: retq
|
||||
;
|
||||
; AVX512VLBW-LABEL: masked_store_bool_mask_demand_trunc_sext:
|
||||
; AVX512VLBW: ## %bb.0:
|
||||
; AVX512VLBW-NEXT: vpslld $31, %xmm1, %xmm1
|
||||
|
@ -4606,11 +5377,11 @@ define void @one_mask_bit_set1_variable(<4 x float>* %addr, <4 x float> %val, <4
|
|||
; AVX512F-NEXT: vzeroupper
|
||||
; AVX512F-NEXT: retq
|
||||
;
|
||||
; AVX512VLBW-LABEL: one_mask_bit_set1_variable:
|
||||
; AVX512VLBW: ## %bb.0:
|
||||
; AVX512VLBW-NEXT: vptestmd {{.*}}(%rip){1to4}, %xmm1, %k1
|
||||
; AVX512VLBW-NEXT: vmovups %xmm0, (%rdi) {%k1}
|
||||
; AVX512VLBW-NEXT: retq
|
||||
; AVX512VL-LABEL: one_mask_bit_set1_variable:
|
||||
; AVX512VL: ## %bb.0:
|
||||
; AVX512VL-NEXT: vptestmd {{.*}}(%rip){1to4}, %xmm1, %k1
|
||||
; AVX512VL-NEXT: vmovups %xmm0, (%rdi) {%k1}
|
||||
; AVX512VL-NEXT: retq
|
||||
%mask_signbit = and <4 x i32> %mask, <i32 2147483648, i32 2147483648, i32 2147483648, i32 2147483648>
|
||||
%mask_bool = icmp ne <4 x i32> %mask_signbit, zeroinitializer
|
||||
call void @llvm.masked.store.v4f32.p0v4f32(<4 x float> %val, <4 x float>* %addr, i32 1, <4 x i1> %mask_bool)
|
||||
|
@ -4708,6 +5479,17 @@ define void @widen_masked_store(<3 x i32> %v, <3 x i32>* %p, <3 x i1> %mask) {
|
|||
; AVX512F-NEXT: vzeroupper
|
||||
; AVX512F-NEXT: retq
|
||||
;
|
||||
; AVX512VLDQ-LABEL: widen_masked_store:
|
||||
; AVX512VLDQ: ## %bb.0:
|
||||
; AVX512VLDQ-NEXT: vpslld $31, %xmm1, %xmm1
|
||||
; AVX512VLDQ-NEXT: vpmovd2m %xmm1, %k0
|
||||
; AVX512VLDQ-NEXT: vpmovm2d %k0, %xmm1
|
||||
; AVX512VLDQ-NEXT: vpxor %xmm2, %xmm2, %xmm2
|
||||
; AVX512VLDQ-NEXT: vpblendd {{.*#+}} xmm1 = xmm1[0,1,2],xmm2[3]
|
||||
; AVX512VLDQ-NEXT: vpmovd2m %xmm1, %k1
|
||||
; AVX512VLDQ-NEXT: vmovdqa32 %xmm0, (%rdi) {%k1}
|
||||
; AVX512VLDQ-NEXT: retq
|
||||
;
|
||||
; AVX512VLBW-LABEL: widen_masked_store:
|
||||
; AVX512VLBW: ## %bb.0:
|
||||
; AVX512VLBW-NEXT: vpslld $31, %xmm1, %xmm1
|
||||
|
|
Loading…
Reference in New Issue