diff --git a/llvm/test/CodeGen/X86/avx512-load-store.ll b/llvm/test/CodeGen/X86/avx512-load-store.ll index fe1003e8b739..92b55eaab1d7 100644 --- a/llvm/test/CodeGen/X86/avx512-load-store.ll +++ b/llvm/test/CodeGen/X86/avx512-load-store.ll @@ -1,12 +1,22 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -; RUN: llc < %s -O2 -mattr=avx512f -mtriple=x86_64-unknown | FileCheck %s +; RUN: llc < %s -O2 -mattr=avx512f -mtriple=x86_64-unknown | FileCheck %s --check-prefix=CHECK --check-prefix=CHECK64 +; RUN: llc < %s -O2 -mattr=avx512f -mtriple=i386-unknown | FileCheck %s --check-prefix=CHECK --check-prefix=CHECK32 define <4 x float> @test_mm_mask_move_ss(<4 x float> %__W, i8 zeroext %__U, <4 x float> %__A, <4 x float> %__B) local_unnamed_addr #0 { -; CHECK-LABEL: test_mm_mask_move_ss: -; CHECK: # BB#0: # %entry -; CHECK-NEXT: kmovw %edi, %k1 -; CHECK-NEXT: vmovss %xmm2, %xmm1, %xmm0 {%k1} -; CHECK-NEXT: retq +; CHECK64-LABEL: test_mm_mask_move_ss: +; CHECK64: # BB#0: # %entry +; CHECK64-NEXT: kmovw %edi, %k1 +; CHECK64-NEXT: vmovss %xmm2, %xmm1, %xmm0 {%k1} +; CHECK64-NEXT: retq +; +; CHECK32-LABEL: test_mm_mask_move_ss: +; CHECK32: # BB#0: # %entry +; CHECK32-NEXT: movb {{[0-9]+}}(%esp), %al +; CHECK32-NEXT: andl $1, %eax +; CHECK32-NEXT: kmovw %eax, %k1 +; CHECK32-NEXT: vmovss %xmm2, %xmm0, %xmm0 {%k1} +; CHECK32-NEXT: vmovss {{.*#+}} xmm0 = xmm0[0],xmm1[1,2,3] +; CHECK32-NEXT: retl entry: %0 = and i8 %__U, 1 %tobool.i = icmp ne i8 %0, 0 @@ -18,11 +28,21 @@ entry: } define <4 x float> @test_mm_maskz_move_ss(i8 zeroext %__U, <4 x float> %__A, <4 x float> %__B) local_unnamed_addr #0 { -; CHECK-LABEL: test_mm_maskz_move_ss: -; CHECK: # BB#0: # %entry -; CHECK-NEXT: kmovw %edi, %k1 -; CHECK-NEXT: vmovss %xmm1, %xmm0, %xmm0 {%k1} {z} -; CHECK-NEXT: retq +; CHECK64-LABEL: test_mm_maskz_move_ss: +; CHECK64: # BB#0: # %entry +; CHECK64-NEXT: kmovw %edi, %k1 +; CHECK64-NEXT: vmovss %xmm1, %xmm0, %xmm0 {%k1} {z} +; CHECK64-NEXT: retq +; +; CHECK32-LABEL: test_mm_maskz_move_ss: +; CHECK32: # BB#0: # %entry +; CHECK32-NEXT: movb {{[0-9]+}}(%esp), %al +; CHECK32-NEXT: andl $1, %eax +; CHECK32-NEXT: kmovw %eax, %k1 +; CHECK32-NEXT: vxorps %xmm2, %xmm2, %xmm2 +; CHECK32-NEXT: vmovss %xmm1, %xmm0, %xmm2 {%k1} +; CHECK32-NEXT: vmovss {{.*#+}} xmm0 = xmm2[0],xmm0[1,2,3] +; CHECK32-NEXT: retl entry: %0 = and i8 %__U, 1 %tobool.i = icmp ne i8 %0, 0 @@ -33,11 +53,20 @@ entry: } define <2 x double> @test_mm_mask_move_sd(<2 x double> %__W, i8 zeroext %__U, <2 x double> %__A, <2 x double> %__B) local_unnamed_addr #0 { -; CHECK-LABEL: test_mm_mask_move_sd: -; CHECK: # BB#0: # %entry -; CHECK-NEXT: kmovw %edi, %k1 -; CHECK-NEXT: vmovsd %xmm2, %xmm1, %xmm0 {%k1} -; CHECK-NEXT: retq +; CHECK64-LABEL: test_mm_mask_move_sd: +; CHECK64: # BB#0: # %entry +; CHECK64-NEXT: kmovw %edi, %k1 +; CHECK64-NEXT: vmovsd %xmm2, %xmm1, %xmm0 {%k1} +; CHECK64-NEXT: retq +; +; CHECK32-LABEL: test_mm_mask_move_sd: +; CHECK32: # BB#0: # %entry +; CHECK32-NEXT: movb {{[0-9]+}}(%esp), %al +; CHECK32-NEXT: andl $1, %eax +; CHECK32-NEXT: kmovw %eax, %k1 +; CHECK32-NEXT: vmovsd %xmm2, %xmm0, %xmm0 {%k1} +; CHECK32-NEXT: vmovsd {{.*#+}} xmm0 = xmm0[0],xmm1[1] +; CHECK32-NEXT: retl entry: %0 = and i8 %__U, 1 %tobool.i = icmp ne i8 %0, 0 @@ -49,11 +78,21 @@ entry: } define <2 x double> @test_mm_maskz_move_sd(i8 zeroext %__U, <2 x double> %__A, <2 x double> %__B) local_unnamed_addr #0 { -; CHECK-LABEL: test_mm_maskz_move_sd: -; CHECK: # BB#0: # %entry -; CHECK-NEXT: kmovw %edi, %k1 -; CHECK-NEXT: vmovsd %xmm1, %xmm0, %xmm0 {%k1} {z} -; CHECK-NEXT: retq +; CHECK64-LABEL: test_mm_maskz_move_sd: +; CHECK64: # BB#0: # %entry +; CHECK64-NEXT: kmovw %edi, %k1 +; CHECK64-NEXT: vmovsd %xmm1, %xmm0, %xmm0 {%k1} {z} +; CHECK64-NEXT: retq +; +; CHECK32-LABEL: test_mm_maskz_move_sd: +; CHECK32: # BB#0: # %entry +; CHECK32-NEXT: movb {{[0-9]+}}(%esp), %al +; CHECK32-NEXT: andl $1, %eax +; CHECK32-NEXT: kmovw %eax, %k1 +; CHECK32-NEXT: vxorpd %xmm2, %xmm2, %xmm2 +; CHECK32-NEXT: vmovsd %xmm1, %xmm0, %xmm2 {%k1} +; CHECK32-NEXT: vmovsd {{.*#+}} xmm0 = xmm2[0],xmm0[1] +; CHECK32-NEXT: retl entry: %0 = and i8 %__U, 1 %tobool.i = icmp ne i8 %0, 0 @@ -64,11 +103,22 @@ entry: } define void @test_mm_mask_store_ss(float* %__W, i8 zeroext %__U, <4 x float> %__A) local_unnamed_addr #1 { -; CHECK-LABEL: test_mm_mask_store_ss: -; CHECK: # BB#0: # %entry -; CHECK-NEXT: kmovw %esi, %k1 -; CHECK-NEXT: vmovss %xmm0, (%rdi) {%k1} -; CHECK-NEXT: retq +; CHECK64-LABEL: test_mm_mask_store_ss: +; CHECK64: # BB#0: # %entry +; CHECK64-NEXT: kmovw %esi, %k1 +; CHECK64-NEXT: vmovss %xmm0, (%rdi) {%k1} +; CHECK64-NEXT: retq +; +; CHECK32-LABEL: test_mm_mask_store_ss: +; CHECK32: # BB#0: # %entry +; CHECK32-NEXT: # kill: %XMM0 %XMM0 %ZMM0 +; CHECK32-NEXT: movl {{[0-9]+}}(%esp), %eax +; CHECK32-NEXT: movzbl {{[0-9]+}}(%esp), %ecx +; CHECK32-NEXT: andl $1, %ecx +; CHECK32-NEXT: kmovw %ecx, %k1 +; CHECK32-NEXT: vmovups %zmm0, (%eax) {%k1} +; CHECK32-NEXT: vzeroupper +; CHECK32-NEXT: retl entry: %0 = bitcast float* %__W to <16 x float>* %shuffle.i.i = shufflevector <4 x float> %__A, <4 x float> undef, <16 x i32> @@ -80,11 +130,22 @@ entry: } define void @test_mm_mask_store_sd(double* %__W, i8 zeroext %__U, <2 x double> %__A) local_unnamed_addr #1 { -; CHECK-LABEL: test_mm_mask_store_sd: -; CHECK: # BB#0: # %entry -; CHECK-NEXT: kmovw %esi, %k1 -; CHECK-NEXT: vmovsd %xmm0, (%rdi) {%k1} -; CHECK-NEXT: retq +; CHECK64-LABEL: test_mm_mask_store_sd: +; CHECK64: # BB#0: # %entry +; CHECK64-NEXT: kmovw %esi, %k1 +; CHECK64-NEXT: vmovsd %xmm0, (%rdi) {%k1} +; CHECK64-NEXT: retq +; +; CHECK32-LABEL: test_mm_mask_store_sd: +; CHECK32: # BB#0: # %entry +; CHECK32-NEXT: # kill: %XMM0 %XMM0 %ZMM0 +; CHECK32-NEXT: movl {{[0-9]+}}(%esp), %eax +; CHECK32-NEXT: movb {{[0-9]+}}(%esp), %cl +; CHECK32-NEXT: andb $1, %cl +; CHECK32-NEXT: kmovw %ecx, %k1 +; CHECK32-NEXT: vmovupd %zmm0, (%eax) {%k1} +; CHECK32-NEXT: vzeroupper +; CHECK32-NEXT: retl entry: %0 = bitcast double* %__W to <8 x double>* %shuffle.i.i = shufflevector <2 x double> %__A, <2 x double> undef, <8 x i32> @@ -95,11 +156,24 @@ entry: } define <4 x float> @test_mm_mask_load_ss(<4 x float> %__A, i8 zeroext %__U, float* %__W) local_unnamed_addr #2 { -; CHECK-LABEL: test_mm_mask_load_ss: -; CHECK: # BB#0: # %entry -; CHECK-NEXT: kmovw %edi, %k1 -; CHECK-NEXT: vmovss (%rsi), %xmm0 {%k1} -; CHECK-NEXT: retq +; CHECK64-LABEL: test_mm_mask_load_ss: +; CHECK64: # BB#0: # %entry +; CHECK64-NEXT: kmovw %edi, %k1 +; CHECK64-NEXT: vmovss (%rsi), %xmm0 {%k1} +; CHECK64-NEXT: retq +; +; CHECK32-LABEL: test_mm_mask_load_ss: +; CHECK32: # BB#0: # %entry +; CHECK32-NEXT: movl {{[0-9]+}}(%esp), %eax +; CHECK32-NEXT: movzbl {{[0-9]+}}(%esp), %ecx +; CHECK32-NEXT: vxorps %xmm1, %xmm1, %xmm1 +; CHECK32-NEXT: vmovss {{.*#+}} xmm0 = xmm0[0],xmm1[1,2,3] +; CHECK32-NEXT: andl $1, %ecx +; CHECK32-NEXT: kmovw %ecx, %k1 +; CHECK32-NEXT: vmovups (%eax), %zmm0 {%k1} +; CHECK32-NEXT: # kill: %XMM0 %XMM0 %ZMM0 +; CHECK32-NEXT: vzeroupper +; CHECK32-NEXT: retl entry: %shuffle.i = shufflevector <4 x float> %__A, <4 x float> , <4 x i32> %0 = bitcast float* %__W to <16 x float>* @@ -113,11 +187,23 @@ entry: } define <2 x double> @test_mm_mask_load_sd(<2 x double> %__A, i8 zeroext %__U, double* %__W) local_unnamed_addr #2 { -; CHECK-LABEL: test_mm_mask_load_sd: -; CHECK: # BB#0: # %entry -; CHECK-NEXT: kmovw %edi, %k1 -; CHECK-NEXT: vmovsd (%rsi), %xmm0 {%k1} -; CHECK-NEXT: retq +; CHECK64-LABEL: test_mm_mask_load_sd: +; CHECK64: # BB#0: # %entry +; CHECK64-NEXT: kmovw %edi, %k1 +; CHECK64-NEXT: vmovsd (%rsi), %xmm0 {%k1} +; CHECK64-NEXT: retq +; +; CHECK32-LABEL: test_mm_mask_load_sd: +; CHECK32: # BB#0: # %entry +; CHECK32-NEXT: movl {{[0-9]+}}(%esp), %eax +; CHECK32-NEXT: movb {{[0-9]+}}(%esp), %cl +; CHECK32-NEXT: vmovq {{.*#+}} xmm0 = xmm0[0],zero +; CHECK32-NEXT: andb $1, %cl +; CHECK32-NEXT: kmovw %ecx, %k1 +; CHECK32-NEXT: vmovupd (%eax), %zmm0 {%k1} +; CHECK32-NEXT: # kill: %XMM0 %XMM0 %ZMM0 +; CHECK32-NEXT: vzeroupper +; CHECK32-NEXT: retl entry: %shuffle5.i = insertelement <2 x double> %__A, double 0.000000e+00, i32 1 %0 = bitcast double* %__W to <8 x double>* @@ -130,11 +216,22 @@ entry: } define <4 x float> @test_mm_maskz_load_ss(i8 zeroext %__U, float* %__W) local_unnamed_addr #2 { -; CHECK-LABEL: test_mm_maskz_load_ss: -; CHECK: # BB#0: # %entry -; CHECK-NEXT: kmovw %edi, %k1 -; CHECK-NEXT: vmovss (%rsi), %xmm0 {%k1} {z} -; CHECK-NEXT: retq +; CHECK64-LABEL: test_mm_maskz_load_ss: +; CHECK64: # BB#0: # %entry +; CHECK64-NEXT: kmovw %edi, %k1 +; CHECK64-NEXT: vmovss (%rsi), %xmm0 {%k1} {z} +; CHECK64-NEXT: retq +; +; CHECK32-LABEL: test_mm_maskz_load_ss: +; CHECK32: # BB#0: # %entry +; CHECK32-NEXT: movl {{[0-9]+}}(%esp), %eax +; CHECK32-NEXT: movzbl {{[0-9]+}}(%esp), %ecx +; CHECK32-NEXT: andl $1, %ecx +; CHECK32-NEXT: kmovw %ecx, %k1 +; CHECK32-NEXT: vmovups (%eax), %zmm0 {%k1} {z} +; CHECK32-NEXT: # kill: %XMM0 %XMM0 %ZMM0 +; CHECK32-NEXT: vzeroupper +; CHECK32-NEXT: retl entry: %0 = bitcast float* %__W to <16 x float>* %1 = and i8 %__U, 1 @@ -146,11 +243,22 @@ entry: } define <2 x double> @test_mm_maskz_load_sd(i8 zeroext %__U, double* %__W) local_unnamed_addr #2 { -; CHECK-LABEL: test_mm_maskz_load_sd: -; CHECK: # BB#0: # %entry -; CHECK-NEXT: kmovw %edi, %k1 -; CHECK-NEXT: vmovsd (%rsi), %xmm0 {%k1} {z} -; CHECK-NEXT: retq +; CHECK64-LABEL: test_mm_maskz_load_sd: +; CHECK64: # BB#0: # %entry +; CHECK64-NEXT: kmovw %edi, %k1 +; CHECK64-NEXT: vmovsd (%rsi), %xmm0 {%k1} {z} +; CHECK64-NEXT: retq +; +; CHECK32-LABEL: test_mm_maskz_load_sd: +; CHECK32: # BB#0: # %entry +; CHECK32-NEXT: movl {{[0-9]+}}(%esp), %eax +; CHECK32-NEXT: movb {{[0-9]+}}(%esp), %cl +; CHECK32-NEXT: andb $1, %cl +; CHECK32-NEXT: kmovw %ecx, %k1 +; CHECK32-NEXT: vmovupd (%eax), %zmm0 {%k1} {z} +; CHECK32-NEXT: # kill: %XMM0 %XMM0 %ZMM0 +; CHECK32-NEXT: vzeroupper +; CHECK32-NEXT: retl entry: %0 = bitcast double* %__W to <8 x double>* %1 = and i8 %__U, 1