forked from OSchip/llvm-project
Automatically generate AVX512 test cases. NFC
llvm-svn: 369264
This commit is contained in:
parent
9d5e8a476f
commit
8130154115
|
@ -11,12 +11,12 @@ define void @bar__512(<16 x i32>* %var) #0 {
|
|||
; CHECK-NEXT: movq %rdi, %rbx
|
||||
; CHECK-NEXT: vmovups (%rdi), %zmm0
|
||||
; CHECK-NEXT: vmovups %zmm0, (%rsp) ## 64-byte Spill
|
||||
; CHECK-NEXT: vbroadcastss {{.*}}(%rip), %zmm1
|
||||
; CHECK-NEXT: vbroadcastss {{.*#+}} zmm1 = [4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4]
|
||||
; CHECK-NEXT: vmovaps %zmm1, (%rdi)
|
||||
; CHECK-NEXT: callq _Print__512
|
||||
; CHECK-NEXT: vmovups (%rsp), %zmm0 ## 64-byte Reload
|
||||
; CHECK-NEXT: callq _Print__512
|
||||
; CHECK-NEXT: vbroadcastss {{.*}}(%rip), %zmm0
|
||||
; CHECK-NEXT: vbroadcastss {{.*#+}} zmm0 = [7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7]
|
||||
; CHECK-NEXT: vmovaps %zmm0, (%rbx)
|
||||
; CHECK-NEXT: addq $112, %rsp
|
||||
; CHECK-NEXT: popq %rbx
|
||||
|
|
|
@ -9,7 +9,7 @@ declare <2 x double> @llvm.x86.avx512.mask3.vfmsub.sd(<2 x double>, <2 x double>
|
|||
define <4 x float> @test_int_x86_avx512_mask3_vfmadd_ss_load0(<4 x float>* %x0ptr, <4 x float> %x1, <4 x float> %x2) {
|
||||
; CHECK-LABEL: test_int_x86_avx512_mask3_vfmadd_ss_load0:
|
||||
; CHECK: ## %bb.0:
|
||||
; CHECK-NEXT: vfmadd231ss (%rdi), %xmm0, %xmm1
|
||||
; CHECK-NEXT: vfmadd231ss {{.*#+}} xmm1 = (xmm0 * mem) + xmm1
|
||||
; CHECK-NEXT: vmovaps %xmm1, %xmm0
|
||||
; CHECK-NEXT: retq
|
||||
%x0 = load <4 x float>, <4 x float>* %x0ptr
|
||||
|
@ -20,7 +20,7 @@ define <4 x float> @test_int_x86_avx512_mask3_vfmadd_ss_load0(<4 x float>* %x0pt
|
|||
define <4 x float> @test_int_x86_avx512_mask3_vfmadd_ss_load1(<4 x float> %x0, <4 x float>* %x1ptr, <4 x float> %x2){
|
||||
; CHECK-LABEL: test_int_x86_avx512_mask3_vfmadd_ss_load1:
|
||||
; CHECK: ## %bb.0:
|
||||
; CHECK-NEXT: vfmadd231ss (%rdi), %xmm0, %xmm1
|
||||
; CHECK-NEXT: vfmadd231ss {{.*#+}} xmm1 = (xmm0 * mem) + xmm1
|
||||
; CHECK-NEXT: vmovaps %xmm1, %xmm0
|
||||
; CHECK-NEXT: retq
|
||||
%x1 = load <4 x float>, <4 x float>* %x1ptr
|
||||
|
@ -31,7 +31,7 @@ define <4 x float> @test_int_x86_avx512_mask3_vfmadd_ss_load1(<4 x float> %x0, <
|
|||
define <2 x double> @test_int_x86_avx512_mask3_vfmadd_sd_load0(<2 x double>* %x0ptr, <2 x double> %x1, <2 x double> %x2) {
|
||||
; CHECK-LABEL: test_int_x86_avx512_mask3_vfmadd_sd_load0:
|
||||
; CHECK: ## %bb.0:
|
||||
; CHECK-NEXT: vfmadd231sd (%rdi), %xmm0, %xmm1
|
||||
; CHECK-NEXT: vfmadd231sd {{.*#+}} xmm1 = (xmm0 * mem) + xmm1
|
||||
; CHECK-NEXT: vmovapd %xmm1, %xmm0
|
||||
; CHECK-NEXT: retq
|
||||
%x0 = load <2 x double>, <2 x double>* %x0ptr
|
||||
|
@ -42,7 +42,7 @@ define <2 x double> @test_int_x86_avx512_mask3_vfmadd_sd_load0(<2 x double>* %x0
|
|||
define <2 x double> @test_int_x86_avx512_mask3_vfmadd_sd_load1(<2 x double> %x0, <2 x double>* %x1ptr, <2 x double> %x2){
|
||||
; CHECK-LABEL: test_int_x86_avx512_mask3_vfmadd_sd_load1:
|
||||
; CHECK: ## %bb.0:
|
||||
; CHECK-NEXT: vfmadd231sd (%rdi), %xmm0, %xmm1
|
||||
; CHECK-NEXT: vfmadd231sd {{.*#+}} xmm1 = (xmm0 * mem) + xmm1
|
||||
; CHECK-NEXT: vmovapd %xmm1, %xmm0
|
||||
; CHECK-NEXT: retq
|
||||
%x1 = load <2 x double>, <2 x double>* %x1ptr
|
||||
|
@ -53,7 +53,7 @@ define <2 x double> @test_int_x86_avx512_mask3_vfmadd_sd_load1(<2 x double> %x0,
|
|||
define <4 x float> @test_int_x86_avx512_mask3_vfmsub_ss_load0(<4 x float>* %x0ptr, <4 x float> %x1, <4 x float> %x2) {
|
||||
; CHECK-LABEL: test_int_x86_avx512_mask3_vfmsub_ss_load0:
|
||||
; CHECK: ## %bb.0:
|
||||
; CHECK-NEXT: vfmsub231ss (%rdi), %xmm0, %xmm1
|
||||
; CHECK-NEXT: vfmsub231ss {{.*#+}} xmm1 = (xmm0 * mem) - xmm1
|
||||
; CHECK-NEXT: vmovaps %xmm1, %xmm0
|
||||
; CHECK-NEXT: retq
|
||||
%x0 = load <4 x float>, <4 x float>* %x0ptr
|
||||
|
@ -64,7 +64,7 @@ define <4 x float> @test_int_x86_avx512_mask3_vfmsub_ss_load0(<4 x float>* %x0pt
|
|||
define <4 x float> @test_int_x86_avx512_mask3_vfmsub_ss_load1(<4 x float> %x0, <4 x float>* %x1ptr, <4 x float> %x2){
|
||||
; CHECK-LABEL: test_int_x86_avx512_mask3_vfmsub_ss_load1:
|
||||
; CHECK: ## %bb.0:
|
||||
; CHECK-NEXT: vfmsub231ss (%rdi), %xmm0, %xmm1
|
||||
; CHECK-NEXT: vfmsub231ss {{.*#+}} xmm1 = (xmm0 * mem) - xmm1
|
||||
; CHECK-NEXT: vmovaps %xmm1, %xmm0
|
||||
; CHECK-NEXT: retq
|
||||
%x1 = load <4 x float>, <4 x float>* %x1ptr
|
||||
|
@ -75,7 +75,7 @@ define <4 x float> @test_int_x86_avx512_mask3_vfmsub_ss_load1(<4 x float> %x0, <
|
|||
define <2 x double> @test_int_x86_avx512_mask3_vfmsub_sd_load0(<2 x double>* %x0ptr, <2 x double> %x1, <2 x double> %x2) {
|
||||
; CHECK-LABEL: test_int_x86_avx512_mask3_vfmsub_sd_load0:
|
||||
; CHECK: ## %bb.0:
|
||||
; CHECK-NEXT: vfmsub231sd (%rdi), %xmm0, %xmm1
|
||||
; CHECK-NEXT: vfmsub231sd {{.*#+}} xmm1 = (xmm0 * mem) - xmm1
|
||||
; CHECK-NEXT: vmovapd %xmm1, %xmm0
|
||||
; CHECK-NEXT: retq
|
||||
%x0 = load <2 x double>, <2 x double>* %x0ptr
|
||||
|
@ -86,7 +86,7 @@ define <2 x double> @test_int_x86_avx512_mask3_vfmsub_sd_load0(<2 x double>* %x0
|
|||
define <2 x double> @test_int_x86_avx512_mask3_vfmsub_sd_load1(<2 x double> %x0, <2 x double>* %x1ptr, <2 x double> %x2){
|
||||
; CHECK-LABEL: test_int_x86_avx512_mask3_vfmsub_sd_load1:
|
||||
; CHECK: ## %bb.0:
|
||||
; CHECK-NEXT: vfmsub231sd (%rdi), %xmm0, %xmm1
|
||||
; CHECK-NEXT: vfmsub231sd {{.*#+}} xmm1 = (xmm0 * mem) - xmm1
|
||||
; CHECK-NEXT: vmovapd %xmm1, %xmm0
|
||||
; CHECK-NEXT: retq
|
||||
%x1 = load <2 x double>, <2 x double>* %x1ptr
|
||||
|
|
|
@ -1,10 +1,14 @@
|
|||
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
|
||||
; RUN: llc < %s -mtriple=x86_64-apple-darwin -mcpu=knl | FileCheck %s
|
||||
|
||||
;CHECK-LABEL: test
|
||||
;CHECK-NOT: dec
|
||||
;CHECK-NOT: enc
|
||||
;CHECK: ret
|
||||
define i32 @test(i32 %a, i32 %b) {
|
||||
; CHECK-LABEL: test:
|
||||
; CHECK: ## %bb.0:
|
||||
; CHECK-NEXT: ## kill: def $edi killed $edi def $rdi
|
||||
; CHECK-NEXT: leal -1(%rdi), %eax
|
||||
; CHECK-NEXT: addl $1, %esi
|
||||
; CHECK-NEXT: imull %esi, %eax
|
||||
; CHECK-NEXT: retq
|
||||
%a1 = add i32 %a, -1
|
||||
%b1 = add i32 %b, 1
|
||||
%res = mul i32 %a1, %b1
|
||||
|
|
|
@ -94,12 +94,12 @@ define <16 x float> @testf16_regs(<16 x float> %a, <16 x float> %b) nounwind {
|
|||
; X32-NEXT: movl %esp, %ebp
|
||||
; X32-NEXT: andl $-64, %esp
|
||||
; X32-NEXT: subl $256, %esp ## imm = 0x100
|
||||
; X32-NEXT: vmovaps %zmm1, {{[0-9]+}}(%esp) ## 64-byte Spill
|
||||
; X32-NEXT: vmovaps %zmm1, {{[-0-9]+}}(%e{{[sb]}}p) ## 64-byte Spill
|
||||
; X32-NEXT: vaddps %zmm1, %zmm0, %zmm0
|
||||
; X32-NEXT: leal {{[0-9]+}}(%esp), %eax
|
||||
; X32-NEXT: movl %eax, (%esp)
|
||||
; X32-NEXT: calll _func_float16_ptr
|
||||
; X32-NEXT: vaddps {{[0-9]+}}(%esp), %zmm0, %zmm0 ## 64-byte Folded Reload
|
||||
; X32-NEXT: vaddps {{[-0-9]+}}(%e{{[sb]}}p), %zmm0, %zmm0 ## 64-byte Folded Reload
|
||||
; X32-NEXT: vaddps {{[0-9]+}}(%esp), %zmm0, %zmm0
|
||||
; X32-NEXT: movl %ebp, %esp
|
||||
; X32-NEXT: popl %ebp
|
||||
|
@ -186,52 +186,52 @@ define intel_ocl_bicc <16 x float> @test_prolog_epilog(<16 x float> %a, <16 x fl
|
|||
; WIN64-KNL-NEXT: pushq %rbp
|
||||
; WIN64-KNL-NEXT: subq $1264, %rsp # imm = 0x4F0
|
||||
; WIN64-KNL-NEXT: leaq {{[0-9]+}}(%rsp), %rbp
|
||||
; WIN64-KNL-NEXT: kmovw %k7, 1134(%rbp) # 2-byte Spill
|
||||
; WIN64-KNL-NEXT: kmovw %k6, 1132(%rbp) # 2-byte Spill
|
||||
; WIN64-KNL-NEXT: kmovw %k5, 1130(%rbp) # 2-byte Spill
|
||||
; WIN64-KNL-NEXT: kmovw %k4, 1128(%rbp) # 2-byte Spill
|
||||
; WIN64-KNL-NEXT: vmovaps %zmm21, 1024(%rbp) # 64-byte Spill
|
||||
; WIN64-KNL-NEXT: vmovaps %zmm20, 960(%rbp) # 64-byte Spill
|
||||
; WIN64-KNL-NEXT: vmovaps %zmm19, 896(%rbp) # 64-byte Spill
|
||||
; WIN64-KNL-NEXT: vmovaps %zmm18, 832(%rbp) # 64-byte Spill
|
||||
; WIN64-KNL-NEXT: vmovaps %zmm17, 768(%rbp) # 64-byte Spill
|
||||
; WIN64-KNL-NEXT: vmovaps %zmm16, 704(%rbp) # 64-byte Spill
|
||||
; WIN64-KNL-NEXT: vmovaps %zmm15, 640(%rbp) # 64-byte Spill
|
||||
; WIN64-KNL-NEXT: vmovaps %zmm14, 576(%rbp) # 64-byte Spill
|
||||
; WIN64-KNL-NEXT: vmovaps %zmm13, 512(%rbp) # 64-byte Spill
|
||||
; WIN64-KNL-NEXT: vmovaps %zmm12, 448(%rbp) # 64-byte Spill
|
||||
; WIN64-KNL-NEXT: vmovaps %zmm11, 384(%rbp) # 64-byte Spill
|
||||
; WIN64-KNL-NEXT: vmovaps %zmm10, 320(%rbp) # 64-byte Spill
|
||||
; WIN64-KNL-NEXT: vmovaps %zmm9, 256(%rbp) # 64-byte Spill
|
||||
; WIN64-KNL-NEXT: vmovaps %zmm8, 192(%rbp) # 64-byte Spill
|
||||
; WIN64-KNL-NEXT: vmovaps %zmm7, 128(%rbp) # 64-byte Spill
|
||||
; WIN64-KNL-NEXT: vmovaps %zmm6, 64(%rbp) # 64-byte Spill
|
||||
; WIN64-KNL-NEXT: kmovw %k7, {{[-0-9]+}}(%r{{[sb]}}p) # 2-byte Spill
|
||||
; WIN64-KNL-NEXT: kmovw %k6, {{[-0-9]+}}(%r{{[sb]}}p) # 2-byte Spill
|
||||
; WIN64-KNL-NEXT: kmovw %k5, {{[-0-9]+}}(%r{{[sb]}}p) # 2-byte Spill
|
||||
; WIN64-KNL-NEXT: kmovw %k4, {{[-0-9]+}}(%r{{[sb]}}p) # 2-byte Spill
|
||||
; WIN64-KNL-NEXT: vmovaps %zmm21, {{[-0-9]+}}(%r{{[sb]}}p) # 64-byte Spill
|
||||
; WIN64-KNL-NEXT: vmovaps %zmm20, {{[-0-9]+}}(%r{{[sb]}}p) # 64-byte Spill
|
||||
; WIN64-KNL-NEXT: vmovaps %zmm19, {{[-0-9]+}}(%r{{[sb]}}p) # 64-byte Spill
|
||||
; WIN64-KNL-NEXT: vmovaps %zmm18, {{[-0-9]+}}(%r{{[sb]}}p) # 64-byte Spill
|
||||
; WIN64-KNL-NEXT: vmovaps %zmm17, {{[-0-9]+}}(%r{{[sb]}}p) # 64-byte Spill
|
||||
; WIN64-KNL-NEXT: vmovaps %zmm16, {{[-0-9]+}}(%r{{[sb]}}p) # 64-byte Spill
|
||||
; WIN64-KNL-NEXT: vmovaps %zmm15, {{[-0-9]+}}(%r{{[sb]}}p) # 64-byte Spill
|
||||
; WIN64-KNL-NEXT: vmovaps %zmm14, {{[-0-9]+}}(%r{{[sb]}}p) # 64-byte Spill
|
||||
; WIN64-KNL-NEXT: vmovaps %zmm13, {{[-0-9]+}}(%r{{[sb]}}p) # 64-byte Spill
|
||||
; WIN64-KNL-NEXT: vmovaps %zmm12, {{[-0-9]+}}(%r{{[sb]}}p) # 64-byte Spill
|
||||
; WIN64-KNL-NEXT: vmovaps %zmm11, {{[-0-9]+}}(%r{{[sb]}}p) # 64-byte Spill
|
||||
; WIN64-KNL-NEXT: vmovaps %zmm10, {{[-0-9]+}}(%r{{[sb]}}p) # 64-byte Spill
|
||||
; WIN64-KNL-NEXT: vmovaps %zmm9, {{[-0-9]+}}(%r{{[sb]}}p) # 64-byte Spill
|
||||
; WIN64-KNL-NEXT: vmovaps %zmm8, {{[-0-9]+}}(%r{{[sb]}}p) # 64-byte Spill
|
||||
; WIN64-KNL-NEXT: vmovaps %zmm7, {{[-0-9]+}}(%r{{[sb]}}p) # 64-byte Spill
|
||||
; WIN64-KNL-NEXT: vmovaps %zmm6, {{[-0-9]+}}(%r{{[sb]}}p) # 64-byte Spill
|
||||
; WIN64-KNL-NEXT: andq $-64, %rsp
|
||||
; WIN64-KNL-NEXT: vmovaps %zmm1, {{[0-9]+}}(%rsp)
|
||||
; WIN64-KNL-NEXT: vmovaps %zmm0, {{[0-9]+}}(%rsp)
|
||||
; WIN64-KNL-NEXT: leaq {{[0-9]+}}(%rsp), %rcx
|
||||
; WIN64-KNL-NEXT: leaq {{[0-9]+}}(%rsp), %rdx
|
||||
; WIN64-KNL-NEXT: callq func_float16
|
||||
; WIN64-KNL-NEXT: vmovaps 64(%rbp), %zmm6 # 64-byte Reload
|
||||
; WIN64-KNL-NEXT: vmovaps 128(%rbp), %zmm7 # 64-byte Reload
|
||||
; WIN64-KNL-NEXT: vmovaps 192(%rbp), %zmm8 # 64-byte Reload
|
||||
; WIN64-KNL-NEXT: vmovaps 256(%rbp), %zmm9 # 64-byte Reload
|
||||
; WIN64-KNL-NEXT: vmovaps 320(%rbp), %zmm10 # 64-byte Reload
|
||||
; WIN64-KNL-NEXT: vmovaps 384(%rbp), %zmm11 # 64-byte Reload
|
||||
; WIN64-KNL-NEXT: vmovaps 448(%rbp), %zmm12 # 64-byte Reload
|
||||
; WIN64-KNL-NEXT: vmovaps 512(%rbp), %zmm13 # 64-byte Reload
|
||||
; WIN64-KNL-NEXT: vmovaps 576(%rbp), %zmm14 # 64-byte Reload
|
||||
; WIN64-KNL-NEXT: vmovaps 640(%rbp), %zmm15 # 64-byte Reload
|
||||
; WIN64-KNL-NEXT: vmovaps 704(%rbp), %zmm16 # 64-byte Reload
|
||||
; WIN64-KNL-NEXT: vmovaps 768(%rbp), %zmm17 # 64-byte Reload
|
||||
; WIN64-KNL-NEXT: vmovaps 832(%rbp), %zmm18 # 64-byte Reload
|
||||
; WIN64-KNL-NEXT: vmovaps 896(%rbp), %zmm19 # 64-byte Reload
|
||||
; WIN64-KNL-NEXT: vmovaps 960(%rbp), %zmm20 # 64-byte Reload
|
||||
; WIN64-KNL-NEXT: vmovaps 1024(%rbp), %zmm21 # 64-byte Reload
|
||||
; WIN64-KNL-NEXT: kmovw 1128(%rbp), %k4 # 2-byte Reload
|
||||
; WIN64-KNL-NEXT: kmovw 1130(%rbp), %k5 # 2-byte Reload
|
||||
; WIN64-KNL-NEXT: kmovw 1132(%rbp), %k6 # 2-byte Reload
|
||||
; WIN64-KNL-NEXT: kmovw 1134(%rbp), %k7 # 2-byte Reload
|
||||
; WIN64-KNL-NEXT: vmovaps {{[-0-9]+}}(%r{{[sb]}}p), %zmm6 # 64-byte Reload
|
||||
; WIN64-KNL-NEXT: vmovaps {{[-0-9]+}}(%r{{[sb]}}p), %zmm7 # 64-byte Reload
|
||||
; WIN64-KNL-NEXT: vmovaps {{[-0-9]+}}(%r{{[sb]}}p), %zmm8 # 64-byte Reload
|
||||
; WIN64-KNL-NEXT: vmovaps {{[-0-9]+}}(%r{{[sb]}}p), %zmm9 # 64-byte Reload
|
||||
; WIN64-KNL-NEXT: vmovaps {{[-0-9]+}}(%r{{[sb]}}p), %zmm10 # 64-byte Reload
|
||||
; WIN64-KNL-NEXT: vmovaps {{[-0-9]+}}(%r{{[sb]}}p), %zmm11 # 64-byte Reload
|
||||
; WIN64-KNL-NEXT: vmovaps {{[-0-9]+}}(%r{{[sb]}}p), %zmm12 # 64-byte Reload
|
||||
; WIN64-KNL-NEXT: vmovaps {{[-0-9]+}}(%r{{[sb]}}p), %zmm13 # 64-byte Reload
|
||||
; WIN64-KNL-NEXT: vmovaps {{[-0-9]+}}(%r{{[sb]}}p), %zmm14 # 64-byte Reload
|
||||
; WIN64-KNL-NEXT: vmovaps {{[-0-9]+}}(%r{{[sb]}}p), %zmm15 # 64-byte Reload
|
||||
; WIN64-KNL-NEXT: vmovaps {{[-0-9]+}}(%r{{[sb]}}p), %zmm16 # 64-byte Reload
|
||||
; WIN64-KNL-NEXT: vmovaps {{[-0-9]+}}(%r{{[sb]}}p), %zmm17 # 64-byte Reload
|
||||
; WIN64-KNL-NEXT: vmovaps {{[-0-9]+}}(%r{{[sb]}}p), %zmm18 # 64-byte Reload
|
||||
; WIN64-KNL-NEXT: vmovaps {{[-0-9]+}}(%r{{[sb]}}p), %zmm19 # 64-byte Reload
|
||||
; WIN64-KNL-NEXT: vmovaps {{[-0-9]+}}(%r{{[sb]}}p), %zmm20 # 64-byte Reload
|
||||
; WIN64-KNL-NEXT: vmovaps {{[-0-9]+}}(%r{{[sb]}}p), %zmm21 # 64-byte Reload
|
||||
; WIN64-KNL-NEXT: kmovw {{[-0-9]+}}(%r{{[sb]}}p), %k4 # 2-byte Reload
|
||||
; WIN64-KNL-NEXT: kmovw {{[-0-9]+}}(%r{{[sb]}}p), %k5 # 2-byte Reload
|
||||
; WIN64-KNL-NEXT: kmovw {{[-0-9]+}}(%r{{[sb]}}p), %k6 # 2-byte Reload
|
||||
; WIN64-KNL-NEXT: kmovw {{[-0-9]+}}(%r{{[sb]}}p), %k7 # 2-byte Reload
|
||||
; WIN64-KNL-NEXT: leaq 1136(%rbp), %rsp
|
||||
; WIN64-KNL-NEXT: popq %rbp
|
||||
; WIN64-KNL-NEXT: retq
|
||||
|
@ -241,52 +241,52 @@ define intel_ocl_bicc <16 x float> @test_prolog_epilog(<16 x float> %a, <16 x fl
|
|||
; WIN64-SKX-NEXT: pushq %rbp
|
||||
; WIN64-SKX-NEXT: subq $1264, %rsp # imm = 0x4F0
|
||||
; WIN64-SKX-NEXT: leaq {{[0-9]+}}(%rsp), %rbp
|
||||
; WIN64-SKX-NEXT: kmovq %k7, 1128(%rbp) # 8-byte Spill
|
||||
; WIN64-SKX-NEXT: kmovq %k6, 1120(%rbp) # 8-byte Spill
|
||||
; WIN64-SKX-NEXT: kmovq %k5, 1112(%rbp) # 8-byte Spill
|
||||
; WIN64-SKX-NEXT: kmovq %k4, 1104(%rbp) # 8-byte Spill
|
||||
; WIN64-SKX-NEXT: vmovaps %zmm21, 1024(%rbp) # 64-byte Spill
|
||||
; WIN64-SKX-NEXT: vmovaps %zmm20, 960(%rbp) # 64-byte Spill
|
||||
; WIN64-SKX-NEXT: vmovaps %zmm19, 896(%rbp) # 64-byte Spill
|
||||
; WIN64-SKX-NEXT: vmovaps %zmm18, 832(%rbp) # 64-byte Spill
|
||||
; WIN64-SKX-NEXT: vmovaps %zmm17, 768(%rbp) # 64-byte Spill
|
||||
; WIN64-SKX-NEXT: vmovaps %zmm16, 704(%rbp) # 64-byte Spill
|
||||
; WIN64-SKX-NEXT: vmovaps %zmm15, 640(%rbp) # 64-byte Spill
|
||||
; WIN64-SKX-NEXT: vmovaps %zmm14, 576(%rbp) # 64-byte Spill
|
||||
; WIN64-SKX-NEXT: vmovaps %zmm13, 512(%rbp) # 64-byte Spill
|
||||
; WIN64-SKX-NEXT: vmovaps %zmm12, 448(%rbp) # 64-byte Spill
|
||||
; WIN64-SKX-NEXT: vmovaps %zmm11, 384(%rbp) # 64-byte Spill
|
||||
; WIN64-SKX-NEXT: vmovaps %zmm10, 320(%rbp) # 64-byte Spill
|
||||
; WIN64-SKX-NEXT: vmovaps %zmm9, 256(%rbp) # 64-byte Spill
|
||||
; WIN64-SKX-NEXT: vmovaps %zmm8, 192(%rbp) # 64-byte Spill
|
||||
; WIN64-SKX-NEXT: vmovaps %zmm7, 128(%rbp) # 64-byte Spill
|
||||
; WIN64-SKX-NEXT: vmovaps %zmm6, 64(%rbp) # 64-byte Spill
|
||||
; WIN64-SKX-NEXT: kmovq %k7, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
|
||||
; WIN64-SKX-NEXT: kmovq %k6, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
|
||||
; WIN64-SKX-NEXT: kmovq %k5, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
|
||||
; WIN64-SKX-NEXT: kmovq %k4, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
|
||||
; WIN64-SKX-NEXT: vmovaps %zmm21, {{[-0-9]+}}(%r{{[sb]}}p) # 64-byte Spill
|
||||
; WIN64-SKX-NEXT: vmovaps %zmm20, {{[-0-9]+}}(%r{{[sb]}}p) # 64-byte Spill
|
||||
; WIN64-SKX-NEXT: vmovaps %zmm19, {{[-0-9]+}}(%r{{[sb]}}p) # 64-byte Spill
|
||||
; WIN64-SKX-NEXT: vmovaps %zmm18, {{[-0-9]+}}(%r{{[sb]}}p) # 64-byte Spill
|
||||
; WIN64-SKX-NEXT: vmovaps %zmm17, {{[-0-9]+}}(%r{{[sb]}}p) # 64-byte Spill
|
||||
; WIN64-SKX-NEXT: vmovaps %zmm16, {{[-0-9]+}}(%r{{[sb]}}p) # 64-byte Spill
|
||||
; WIN64-SKX-NEXT: vmovaps %zmm15, {{[-0-9]+}}(%r{{[sb]}}p) # 64-byte Spill
|
||||
; WIN64-SKX-NEXT: vmovaps %zmm14, {{[-0-9]+}}(%r{{[sb]}}p) # 64-byte Spill
|
||||
; WIN64-SKX-NEXT: vmovaps %zmm13, {{[-0-9]+}}(%r{{[sb]}}p) # 64-byte Spill
|
||||
; WIN64-SKX-NEXT: vmovaps %zmm12, {{[-0-9]+}}(%r{{[sb]}}p) # 64-byte Spill
|
||||
; WIN64-SKX-NEXT: vmovaps %zmm11, {{[-0-9]+}}(%r{{[sb]}}p) # 64-byte Spill
|
||||
; WIN64-SKX-NEXT: vmovaps %zmm10, {{[-0-9]+}}(%r{{[sb]}}p) # 64-byte Spill
|
||||
; WIN64-SKX-NEXT: vmovaps %zmm9, {{[-0-9]+}}(%r{{[sb]}}p) # 64-byte Spill
|
||||
; WIN64-SKX-NEXT: vmovaps %zmm8, {{[-0-9]+}}(%r{{[sb]}}p) # 64-byte Spill
|
||||
; WIN64-SKX-NEXT: vmovaps %zmm7, {{[-0-9]+}}(%r{{[sb]}}p) # 64-byte Spill
|
||||
; WIN64-SKX-NEXT: vmovaps %zmm6, {{[-0-9]+}}(%r{{[sb]}}p) # 64-byte Spill
|
||||
; WIN64-SKX-NEXT: andq $-64, %rsp
|
||||
; WIN64-SKX-NEXT: vmovaps %zmm1, {{[0-9]+}}(%rsp)
|
||||
; WIN64-SKX-NEXT: vmovaps %zmm0, {{[0-9]+}}(%rsp)
|
||||
; WIN64-SKX-NEXT: leaq {{[0-9]+}}(%rsp), %rcx
|
||||
; WIN64-SKX-NEXT: leaq {{[0-9]+}}(%rsp), %rdx
|
||||
; WIN64-SKX-NEXT: callq func_float16
|
||||
; WIN64-SKX-NEXT: vmovaps 64(%rbp), %zmm6 # 64-byte Reload
|
||||
; WIN64-SKX-NEXT: vmovaps 128(%rbp), %zmm7 # 64-byte Reload
|
||||
; WIN64-SKX-NEXT: vmovaps 192(%rbp), %zmm8 # 64-byte Reload
|
||||
; WIN64-SKX-NEXT: vmovaps 256(%rbp), %zmm9 # 64-byte Reload
|
||||
; WIN64-SKX-NEXT: vmovaps 320(%rbp), %zmm10 # 64-byte Reload
|
||||
; WIN64-SKX-NEXT: vmovaps 384(%rbp), %zmm11 # 64-byte Reload
|
||||
; WIN64-SKX-NEXT: vmovaps 448(%rbp), %zmm12 # 64-byte Reload
|
||||
; WIN64-SKX-NEXT: vmovaps 512(%rbp), %zmm13 # 64-byte Reload
|
||||
; WIN64-SKX-NEXT: vmovaps 576(%rbp), %zmm14 # 64-byte Reload
|
||||
; WIN64-SKX-NEXT: vmovaps 640(%rbp), %zmm15 # 64-byte Reload
|
||||
; WIN64-SKX-NEXT: vmovaps 704(%rbp), %zmm16 # 64-byte Reload
|
||||
; WIN64-SKX-NEXT: vmovaps 768(%rbp), %zmm17 # 64-byte Reload
|
||||
; WIN64-SKX-NEXT: vmovaps 832(%rbp), %zmm18 # 64-byte Reload
|
||||
; WIN64-SKX-NEXT: vmovaps 896(%rbp), %zmm19 # 64-byte Reload
|
||||
; WIN64-SKX-NEXT: vmovaps 960(%rbp), %zmm20 # 64-byte Reload
|
||||
; WIN64-SKX-NEXT: vmovaps 1024(%rbp), %zmm21 # 64-byte Reload
|
||||
; WIN64-SKX-NEXT: kmovq 1104(%rbp), %k4 # 8-byte Reload
|
||||
; WIN64-SKX-NEXT: kmovq 1112(%rbp), %k5 # 8-byte Reload
|
||||
; WIN64-SKX-NEXT: kmovq 1120(%rbp), %k6 # 8-byte Reload
|
||||
; WIN64-SKX-NEXT: kmovq 1128(%rbp), %k7 # 8-byte Reload
|
||||
; WIN64-SKX-NEXT: vmovaps {{[-0-9]+}}(%r{{[sb]}}p), %zmm6 # 64-byte Reload
|
||||
; WIN64-SKX-NEXT: vmovaps {{[-0-9]+}}(%r{{[sb]}}p), %zmm7 # 64-byte Reload
|
||||
; WIN64-SKX-NEXT: vmovaps {{[-0-9]+}}(%r{{[sb]}}p), %zmm8 # 64-byte Reload
|
||||
; WIN64-SKX-NEXT: vmovaps {{[-0-9]+}}(%r{{[sb]}}p), %zmm9 # 64-byte Reload
|
||||
; WIN64-SKX-NEXT: vmovaps {{[-0-9]+}}(%r{{[sb]}}p), %zmm10 # 64-byte Reload
|
||||
; WIN64-SKX-NEXT: vmovaps {{[-0-9]+}}(%r{{[sb]}}p), %zmm11 # 64-byte Reload
|
||||
; WIN64-SKX-NEXT: vmovaps {{[-0-9]+}}(%r{{[sb]}}p), %zmm12 # 64-byte Reload
|
||||
; WIN64-SKX-NEXT: vmovaps {{[-0-9]+}}(%r{{[sb]}}p), %zmm13 # 64-byte Reload
|
||||
; WIN64-SKX-NEXT: vmovaps {{[-0-9]+}}(%r{{[sb]}}p), %zmm14 # 64-byte Reload
|
||||
; WIN64-SKX-NEXT: vmovaps {{[-0-9]+}}(%r{{[sb]}}p), %zmm15 # 64-byte Reload
|
||||
; WIN64-SKX-NEXT: vmovaps {{[-0-9]+}}(%r{{[sb]}}p), %zmm16 # 64-byte Reload
|
||||
; WIN64-SKX-NEXT: vmovaps {{[-0-9]+}}(%r{{[sb]}}p), %zmm17 # 64-byte Reload
|
||||
; WIN64-SKX-NEXT: vmovaps {{[-0-9]+}}(%r{{[sb]}}p), %zmm18 # 64-byte Reload
|
||||
; WIN64-SKX-NEXT: vmovaps {{[-0-9]+}}(%r{{[sb]}}p), %zmm19 # 64-byte Reload
|
||||
; WIN64-SKX-NEXT: vmovaps {{[-0-9]+}}(%r{{[sb]}}p), %zmm20 # 64-byte Reload
|
||||
; WIN64-SKX-NEXT: vmovaps {{[-0-9]+}}(%r{{[sb]}}p), %zmm21 # 64-byte Reload
|
||||
; WIN64-SKX-NEXT: kmovq {{[-0-9]+}}(%r{{[sb]}}p), %k4 # 8-byte Reload
|
||||
; WIN64-SKX-NEXT: kmovq {{[-0-9]+}}(%r{{[sb]}}p), %k5 # 8-byte Reload
|
||||
; WIN64-SKX-NEXT: kmovq {{[-0-9]+}}(%r{{[sb]}}p), %k6 # 8-byte Reload
|
||||
; WIN64-SKX-NEXT: kmovq {{[-0-9]+}}(%r{{[sb]}}p), %k7 # 8-byte Reload
|
||||
; WIN64-SKX-NEXT: leaq 1136(%rbp), %rsp
|
||||
; WIN64-SKX-NEXT: popq %rbp
|
||||
; WIN64-SKX-NEXT: retq
|
||||
|
@ -296,47 +296,47 @@ define intel_ocl_bicc <16 x float> @test_prolog_epilog(<16 x float> %a, <16 x fl
|
|||
; X64-KNL-NEXT: pushq %rsi
|
||||
; X64-KNL-NEXT: pushq %rdi
|
||||
; X64-KNL-NEXT: subq $1064, %rsp ## imm = 0x428
|
||||
; X64-KNL-NEXT: kmovw %k7, {{[0-9]+}}(%rsp) ## 2-byte Spill
|
||||
; X64-KNL-NEXT: kmovw %k6, {{[0-9]+}}(%rsp) ## 2-byte Spill
|
||||
; X64-KNL-NEXT: kmovw %k5, {{[0-9]+}}(%rsp) ## 2-byte Spill
|
||||
; X64-KNL-NEXT: kmovw %k4, {{[0-9]+}}(%rsp) ## 2-byte Spill
|
||||
; X64-KNL-NEXT: vmovups %zmm31, {{[0-9]+}}(%rsp) ## 64-byte Spill
|
||||
; X64-KNL-NEXT: vmovups %zmm30, {{[0-9]+}}(%rsp) ## 64-byte Spill
|
||||
; X64-KNL-NEXT: vmovups %zmm29, {{[0-9]+}}(%rsp) ## 64-byte Spill
|
||||
; X64-KNL-NEXT: vmovups %zmm28, {{[0-9]+}}(%rsp) ## 64-byte Spill
|
||||
; X64-KNL-NEXT: vmovups %zmm27, {{[0-9]+}}(%rsp) ## 64-byte Spill
|
||||
; X64-KNL-NEXT: vmovups %zmm26, {{[0-9]+}}(%rsp) ## 64-byte Spill
|
||||
; X64-KNL-NEXT: vmovups %zmm25, {{[0-9]+}}(%rsp) ## 64-byte Spill
|
||||
; X64-KNL-NEXT: vmovups %zmm24, {{[0-9]+}}(%rsp) ## 64-byte Spill
|
||||
; X64-KNL-NEXT: vmovups %zmm23, {{[0-9]+}}(%rsp) ## 64-byte Spill
|
||||
; X64-KNL-NEXT: vmovups %zmm22, {{[0-9]+}}(%rsp) ## 64-byte Spill
|
||||
; X64-KNL-NEXT: vmovups %zmm21, {{[0-9]+}}(%rsp) ## 64-byte Spill
|
||||
; X64-KNL-NEXT: vmovups %zmm20, {{[0-9]+}}(%rsp) ## 64-byte Spill
|
||||
; X64-KNL-NEXT: vmovups %zmm19, {{[0-9]+}}(%rsp) ## 64-byte Spill
|
||||
; X64-KNL-NEXT: vmovups %zmm18, {{[0-9]+}}(%rsp) ## 64-byte Spill
|
||||
; X64-KNL-NEXT: vmovups %zmm17, {{[0-9]+}}(%rsp) ## 64-byte Spill
|
||||
; X64-KNL-NEXT: kmovw %k7, {{[-0-9]+}}(%r{{[sb]}}p) ## 2-byte Spill
|
||||
; X64-KNL-NEXT: kmovw %k6, {{[-0-9]+}}(%r{{[sb]}}p) ## 2-byte Spill
|
||||
; X64-KNL-NEXT: kmovw %k5, {{[-0-9]+}}(%r{{[sb]}}p) ## 2-byte Spill
|
||||
; X64-KNL-NEXT: kmovw %k4, {{[-0-9]+}}(%r{{[sb]}}p) ## 2-byte Spill
|
||||
; X64-KNL-NEXT: vmovups %zmm31, {{[-0-9]+}}(%r{{[sb]}}p) ## 64-byte Spill
|
||||
; X64-KNL-NEXT: vmovups %zmm30, {{[-0-9]+}}(%r{{[sb]}}p) ## 64-byte Spill
|
||||
; X64-KNL-NEXT: vmovups %zmm29, {{[-0-9]+}}(%r{{[sb]}}p) ## 64-byte Spill
|
||||
; X64-KNL-NEXT: vmovups %zmm28, {{[-0-9]+}}(%r{{[sb]}}p) ## 64-byte Spill
|
||||
; X64-KNL-NEXT: vmovups %zmm27, {{[-0-9]+}}(%r{{[sb]}}p) ## 64-byte Spill
|
||||
; X64-KNL-NEXT: vmovups %zmm26, {{[-0-9]+}}(%r{{[sb]}}p) ## 64-byte Spill
|
||||
; X64-KNL-NEXT: vmovups %zmm25, {{[-0-9]+}}(%r{{[sb]}}p) ## 64-byte Spill
|
||||
; X64-KNL-NEXT: vmovups %zmm24, {{[-0-9]+}}(%r{{[sb]}}p) ## 64-byte Spill
|
||||
; X64-KNL-NEXT: vmovups %zmm23, {{[-0-9]+}}(%r{{[sb]}}p) ## 64-byte Spill
|
||||
; X64-KNL-NEXT: vmovups %zmm22, {{[-0-9]+}}(%r{{[sb]}}p) ## 64-byte Spill
|
||||
; X64-KNL-NEXT: vmovups %zmm21, {{[-0-9]+}}(%r{{[sb]}}p) ## 64-byte Spill
|
||||
; X64-KNL-NEXT: vmovups %zmm20, {{[-0-9]+}}(%r{{[sb]}}p) ## 64-byte Spill
|
||||
; X64-KNL-NEXT: vmovups %zmm19, {{[-0-9]+}}(%r{{[sb]}}p) ## 64-byte Spill
|
||||
; X64-KNL-NEXT: vmovups %zmm18, {{[-0-9]+}}(%r{{[sb]}}p) ## 64-byte Spill
|
||||
; X64-KNL-NEXT: vmovups %zmm17, {{[-0-9]+}}(%r{{[sb]}}p) ## 64-byte Spill
|
||||
; X64-KNL-NEXT: vmovups %zmm16, (%rsp) ## 64-byte Spill
|
||||
; X64-KNL-NEXT: callq _func_float16
|
||||
; X64-KNL-NEXT: vmovups (%rsp), %zmm16 ## 64-byte Reload
|
||||
; X64-KNL-NEXT: vmovups {{[0-9]+}}(%rsp), %zmm17 ## 64-byte Reload
|
||||
; X64-KNL-NEXT: vmovups {{[0-9]+}}(%rsp), %zmm18 ## 64-byte Reload
|
||||
; X64-KNL-NEXT: vmovups {{[0-9]+}}(%rsp), %zmm19 ## 64-byte Reload
|
||||
; X64-KNL-NEXT: vmovups {{[0-9]+}}(%rsp), %zmm20 ## 64-byte Reload
|
||||
; X64-KNL-NEXT: vmovups {{[0-9]+}}(%rsp), %zmm21 ## 64-byte Reload
|
||||
; X64-KNL-NEXT: vmovups {{[0-9]+}}(%rsp), %zmm22 ## 64-byte Reload
|
||||
; X64-KNL-NEXT: vmovups {{[0-9]+}}(%rsp), %zmm23 ## 64-byte Reload
|
||||
; X64-KNL-NEXT: vmovups {{[0-9]+}}(%rsp), %zmm24 ## 64-byte Reload
|
||||
; X64-KNL-NEXT: vmovups {{[0-9]+}}(%rsp), %zmm25 ## 64-byte Reload
|
||||
; X64-KNL-NEXT: vmovups {{[0-9]+}}(%rsp), %zmm26 ## 64-byte Reload
|
||||
; X64-KNL-NEXT: vmovups {{[0-9]+}}(%rsp), %zmm27 ## 64-byte Reload
|
||||
; X64-KNL-NEXT: vmovups {{[0-9]+}}(%rsp), %zmm28 ## 64-byte Reload
|
||||
; X64-KNL-NEXT: vmovups {{[0-9]+}}(%rsp), %zmm29 ## 64-byte Reload
|
||||
; X64-KNL-NEXT: vmovups {{[0-9]+}}(%rsp), %zmm30 ## 64-byte Reload
|
||||
; X64-KNL-NEXT: vmovups {{[0-9]+}}(%rsp), %zmm31 ## 64-byte Reload
|
||||
; X64-KNL-NEXT: kmovw {{[0-9]+}}(%rsp), %k4 ## 2-byte Reload
|
||||
; X64-KNL-NEXT: kmovw {{[0-9]+}}(%rsp), %k5 ## 2-byte Reload
|
||||
; X64-KNL-NEXT: kmovw {{[0-9]+}}(%rsp), %k6 ## 2-byte Reload
|
||||
; X64-KNL-NEXT: kmovw {{[0-9]+}}(%rsp), %k7 ## 2-byte Reload
|
||||
; X64-KNL-NEXT: vmovups {{[-0-9]+}}(%r{{[sb]}}p), %zmm17 ## 64-byte Reload
|
||||
; X64-KNL-NEXT: vmovups {{[-0-9]+}}(%r{{[sb]}}p), %zmm18 ## 64-byte Reload
|
||||
; X64-KNL-NEXT: vmovups {{[-0-9]+}}(%r{{[sb]}}p), %zmm19 ## 64-byte Reload
|
||||
; X64-KNL-NEXT: vmovups {{[-0-9]+}}(%r{{[sb]}}p), %zmm20 ## 64-byte Reload
|
||||
; X64-KNL-NEXT: vmovups {{[-0-9]+}}(%r{{[sb]}}p), %zmm21 ## 64-byte Reload
|
||||
; X64-KNL-NEXT: vmovups {{[-0-9]+}}(%r{{[sb]}}p), %zmm22 ## 64-byte Reload
|
||||
; X64-KNL-NEXT: vmovups {{[-0-9]+}}(%r{{[sb]}}p), %zmm23 ## 64-byte Reload
|
||||
; X64-KNL-NEXT: vmovups {{[-0-9]+}}(%r{{[sb]}}p), %zmm24 ## 64-byte Reload
|
||||
; X64-KNL-NEXT: vmovups {{[-0-9]+}}(%r{{[sb]}}p), %zmm25 ## 64-byte Reload
|
||||
; X64-KNL-NEXT: vmovups {{[-0-9]+}}(%r{{[sb]}}p), %zmm26 ## 64-byte Reload
|
||||
; X64-KNL-NEXT: vmovups {{[-0-9]+}}(%r{{[sb]}}p), %zmm27 ## 64-byte Reload
|
||||
; X64-KNL-NEXT: vmovups {{[-0-9]+}}(%r{{[sb]}}p), %zmm28 ## 64-byte Reload
|
||||
; X64-KNL-NEXT: vmovups {{[-0-9]+}}(%r{{[sb]}}p), %zmm29 ## 64-byte Reload
|
||||
; X64-KNL-NEXT: vmovups {{[-0-9]+}}(%r{{[sb]}}p), %zmm30 ## 64-byte Reload
|
||||
; X64-KNL-NEXT: vmovups {{[-0-9]+}}(%r{{[sb]}}p), %zmm31 ## 64-byte Reload
|
||||
; X64-KNL-NEXT: kmovw {{[-0-9]+}}(%r{{[sb]}}p), %k4 ## 2-byte Reload
|
||||
; X64-KNL-NEXT: kmovw {{[-0-9]+}}(%r{{[sb]}}p), %k5 ## 2-byte Reload
|
||||
; X64-KNL-NEXT: kmovw {{[-0-9]+}}(%r{{[sb]}}p), %k6 ## 2-byte Reload
|
||||
; X64-KNL-NEXT: kmovw {{[-0-9]+}}(%r{{[sb]}}p), %k7 ## 2-byte Reload
|
||||
; X64-KNL-NEXT: addq $1064, %rsp ## imm = 0x428
|
||||
; X64-KNL-NEXT: popq %rdi
|
||||
; X64-KNL-NEXT: popq %rsi
|
||||
|
@ -347,47 +347,47 @@ define intel_ocl_bicc <16 x float> @test_prolog_epilog(<16 x float> %a, <16 x fl
|
|||
; X64-SKX-NEXT: pushq %rsi
|
||||
; X64-SKX-NEXT: pushq %rdi
|
||||
; X64-SKX-NEXT: subq $1064, %rsp ## imm = 0x428
|
||||
; X64-SKX-NEXT: kmovq %k7, {{[0-9]+}}(%rsp) ## 8-byte Spill
|
||||
; X64-SKX-NEXT: kmovq %k6, {{[0-9]+}}(%rsp) ## 8-byte Spill
|
||||
; X64-SKX-NEXT: kmovq %k5, {{[0-9]+}}(%rsp) ## 8-byte Spill
|
||||
; X64-SKX-NEXT: kmovq %k4, {{[0-9]+}}(%rsp) ## 8-byte Spill
|
||||
; X64-SKX-NEXT: vmovups %zmm31, {{[0-9]+}}(%rsp) ## 64-byte Spill
|
||||
; X64-SKX-NEXT: vmovups %zmm30, {{[0-9]+}}(%rsp) ## 64-byte Spill
|
||||
; X64-SKX-NEXT: vmovups %zmm29, {{[0-9]+}}(%rsp) ## 64-byte Spill
|
||||
; X64-SKX-NEXT: vmovups %zmm28, {{[0-9]+}}(%rsp) ## 64-byte Spill
|
||||
; X64-SKX-NEXT: vmovups %zmm27, {{[0-9]+}}(%rsp) ## 64-byte Spill
|
||||
; X64-SKX-NEXT: vmovups %zmm26, {{[0-9]+}}(%rsp) ## 64-byte Spill
|
||||
; X64-SKX-NEXT: vmovups %zmm25, {{[0-9]+}}(%rsp) ## 64-byte Spill
|
||||
; X64-SKX-NEXT: vmovups %zmm24, {{[0-9]+}}(%rsp) ## 64-byte Spill
|
||||
; X64-SKX-NEXT: vmovups %zmm23, {{[0-9]+}}(%rsp) ## 64-byte Spill
|
||||
; X64-SKX-NEXT: vmovups %zmm22, {{[0-9]+}}(%rsp) ## 64-byte Spill
|
||||
; X64-SKX-NEXT: vmovups %zmm21, {{[0-9]+}}(%rsp) ## 64-byte Spill
|
||||
; X64-SKX-NEXT: vmovups %zmm20, {{[0-9]+}}(%rsp) ## 64-byte Spill
|
||||
; X64-SKX-NEXT: vmovups %zmm19, {{[0-9]+}}(%rsp) ## 64-byte Spill
|
||||
; X64-SKX-NEXT: vmovups %zmm18, {{[0-9]+}}(%rsp) ## 64-byte Spill
|
||||
; X64-SKX-NEXT: vmovups %zmm17, {{[0-9]+}}(%rsp) ## 64-byte Spill
|
||||
; X64-SKX-NEXT: kmovq %k7, {{[-0-9]+}}(%r{{[sb]}}p) ## 8-byte Spill
|
||||
; X64-SKX-NEXT: kmovq %k6, {{[-0-9]+}}(%r{{[sb]}}p) ## 8-byte Spill
|
||||
; X64-SKX-NEXT: kmovq %k5, {{[-0-9]+}}(%r{{[sb]}}p) ## 8-byte Spill
|
||||
; X64-SKX-NEXT: kmovq %k4, {{[-0-9]+}}(%r{{[sb]}}p) ## 8-byte Spill
|
||||
; X64-SKX-NEXT: vmovups %zmm31, {{[-0-9]+}}(%r{{[sb]}}p) ## 64-byte Spill
|
||||
; X64-SKX-NEXT: vmovups %zmm30, {{[-0-9]+}}(%r{{[sb]}}p) ## 64-byte Spill
|
||||
; X64-SKX-NEXT: vmovups %zmm29, {{[-0-9]+}}(%r{{[sb]}}p) ## 64-byte Spill
|
||||
; X64-SKX-NEXT: vmovups %zmm28, {{[-0-9]+}}(%r{{[sb]}}p) ## 64-byte Spill
|
||||
; X64-SKX-NEXT: vmovups %zmm27, {{[-0-9]+}}(%r{{[sb]}}p) ## 64-byte Spill
|
||||
; X64-SKX-NEXT: vmovups %zmm26, {{[-0-9]+}}(%r{{[sb]}}p) ## 64-byte Spill
|
||||
; X64-SKX-NEXT: vmovups %zmm25, {{[-0-9]+}}(%r{{[sb]}}p) ## 64-byte Spill
|
||||
; X64-SKX-NEXT: vmovups %zmm24, {{[-0-9]+}}(%r{{[sb]}}p) ## 64-byte Spill
|
||||
; X64-SKX-NEXT: vmovups %zmm23, {{[-0-9]+}}(%r{{[sb]}}p) ## 64-byte Spill
|
||||
; X64-SKX-NEXT: vmovups %zmm22, {{[-0-9]+}}(%r{{[sb]}}p) ## 64-byte Spill
|
||||
; X64-SKX-NEXT: vmovups %zmm21, {{[-0-9]+}}(%r{{[sb]}}p) ## 64-byte Spill
|
||||
; X64-SKX-NEXT: vmovups %zmm20, {{[-0-9]+}}(%r{{[sb]}}p) ## 64-byte Spill
|
||||
; X64-SKX-NEXT: vmovups %zmm19, {{[-0-9]+}}(%r{{[sb]}}p) ## 64-byte Spill
|
||||
; X64-SKX-NEXT: vmovups %zmm18, {{[-0-9]+}}(%r{{[sb]}}p) ## 64-byte Spill
|
||||
; X64-SKX-NEXT: vmovups %zmm17, {{[-0-9]+}}(%r{{[sb]}}p) ## 64-byte Spill
|
||||
; X64-SKX-NEXT: vmovups %zmm16, (%rsp) ## 64-byte Spill
|
||||
; X64-SKX-NEXT: callq _func_float16
|
||||
; X64-SKX-NEXT: vmovups (%rsp), %zmm16 ## 64-byte Reload
|
||||
; X64-SKX-NEXT: vmovups {{[0-9]+}}(%rsp), %zmm17 ## 64-byte Reload
|
||||
; X64-SKX-NEXT: vmovups {{[0-9]+}}(%rsp), %zmm18 ## 64-byte Reload
|
||||
; X64-SKX-NEXT: vmovups {{[0-9]+}}(%rsp), %zmm19 ## 64-byte Reload
|
||||
; X64-SKX-NEXT: vmovups {{[0-9]+}}(%rsp), %zmm20 ## 64-byte Reload
|
||||
; X64-SKX-NEXT: vmovups {{[0-9]+}}(%rsp), %zmm21 ## 64-byte Reload
|
||||
; X64-SKX-NEXT: vmovups {{[0-9]+}}(%rsp), %zmm22 ## 64-byte Reload
|
||||
; X64-SKX-NEXT: vmovups {{[0-9]+}}(%rsp), %zmm23 ## 64-byte Reload
|
||||
; X64-SKX-NEXT: vmovups {{[0-9]+}}(%rsp), %zmm24 ## 64-byte Reload
|
||||
; X64-SKX-NEXT: vmovups {{[0-9]+}}(%rsp), %zmm25 ## 64-byte Reload
|
||||
; X64-SKX-NEXT: vmovups {{[0-9]+}}(%rsp), %zmm26 ## 64-byte Reload
|
||||
; X64-SKX-NEXT: vmovups {{[0-9]+}}(%rsp), %zmm27 ## 64-byte Reload
|
||||
; X64-SKX-NEXT: vmovups {{[0-9]+}}(%rsp), %zmm28 ## 64-byte Reload
|
||||
; X64-SKX-NEXT: vmovups {{[0-9]+}}(%rsp), %zmm29 ## 64-byte Reload
|
||||
; X64-SKX-NEXT: vmovups {{[0-9]+}}(%rsp), %zmm30 ## 64-byte Reload
|
||||
; X64-SKX-NEXT: vmovups {{[0-9]+}}(%rsp), %zmm31 ## 64-byte Reload
|
||||
; X64-SKX-NEXT: kmovq {{[0-9]+}}(%rsp), %k4 ## 8-byte Reload
|
||||
; X64-SKX-NEXT: kmovq {{[0-9]+}}(%rsp), %k5 ## 8-byte Reload
|
||||
; X64-SKX-NEXT: kmovq {{[0-9]+}}(%rsp), %k6 ## 8-byte Reload
|
||||
; X64-SKX-NEXT: kmovq {{[0-9]+}}(%rsp), %k7 ## 8-byte Reload
|
||||
; X64-SKX-NEXT: vmovups {{[-0-9]+}}(%r{{[sb]}}p), %zmm17 ## 64-byte Reload
|
||||
; X64-SKX-NEXT: vmovups {{[-0-9]+}}(%r{{[sb]}}p), %zmm18 ## 64-byte Reload
|
||||
; X64-SKX-NEXT: vmovups {{[-0-9]+}}(%r{{[sb]}}p), %zmm19 ## 64-byte Reload
|
||||
; X64-SKX-NEXT: vmovups {{[-0-9]+}}(%r{{[sb]}}p), %zmm20 ## 64-byte Reload
|
||||
; X64-SKX-NEXT: vmovups {{[-0-9]+}}(%r{{[sb]}}p), %zmm21 ## 64-byte Reload
|
||||
; X64-SKX-NEXT: vmovups {{[-0-9]+}}(%r{{[sb]}}p), %zmm22 ## 64-byte Reload
|
||||
; X64-SKX-NEXT: vmovups {{[-0-9]+}}(%r{{[sb]}}p), %zmm23 ## 64-byte Reload
|
||||
; X64-SKX-NEXT: vmovups {{[-0-9]+}}(%r{{[sb]}}p), %zmm24 ## 64-byte Reload
|
||||
; X64-SKX-NEXT: vmovups {{[-0-9]+}}(%r{{[sb]}}p), %zmm25 ## 64-byte Reload
|
||||
; X64-SKX-NEXT: vmovups {{[-0-9]+}}(%r{{[sb]}}p), %zmm26 ## 64-byte Reload
|
||||
; X64-SKX-NEXT: vmovups {{[-0-9]+}}(%r{{[sb]}}p), %zmm27 ## 64-byte Reload
|
||||
; X64-SKX-NEXT: vmovups {{[-0-9]+}}(%r{{[sb]}}p), %zmm28 ## 64-byte Reload
|
||||
; X64-SKX-NEXT: vmovups {{[-0-9]+}}(%r{{[sb]}}p), %zmm29 ## 64-byte Reload
|
||||
; X64-SKX-NEXT: vmovups {{[-0-9]+}}(%r{{[sb]}}p), %zmm30 ## 64-byte Reload
|
||||
; X64-SKX-NEXT: vmovups {{[-0-9]+}}(%r{{[sb]}}p), %zmm31 ## 64-byte Reload
|
||||
; X64-SKX-NEXT: kmovq {{[-0-9]+}}(%r{{[sb]}}p), %k4 ## 8-byte Reload
|
||||
; X64-SKX-NEXT: kmovq {{[-0-9]+}}(%r{{[sb]}}p), %k5 ## 8-byte Reload
|
||||
; X64-SKX-NEXT: kmovq {{[-0-9]+}}(%r{{[sb]}}p), %k6 ## 8-byte Reload
|
||||
; X64-SKX-NEXT: kmovq {{[-0-9]+}}(%r{{[sb]}}p), %k7 ## 8-byte Reload
|
||||
; X64-SKX-NEXT: addq $1064, %rsp ## imm = 0x428
|
||||
; X64-SKX-NEXT: popq %rdi
|
||||
; X64-SKX-NEXT: popq %rsi
|
||||
|
|
|
@ -10,9 +10,9 @@ define <4 x i1> @test_4i1(<4 x i32> %a, <4 x i32> %b) {
|
|||
; CHECK-NEXT: vpcmpnleud %xmm1, %xmm0, %k0
|
||||
; CHECK-NEXT: vpcmpgtd %xmm1, %xmm0, %k1
|
||||
; CHECK-NEXT: korw %k1, %k0, %k0
|
||||
; CHECK-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) ## 2-byte Spill
|
||||
; CHECK-NEXT: kmovw %k0, {{[-0-9]+}}(%r{{[sb]}}p) ## 2-byte Spill
|
||||
; CHECK-NEXT: callq _f
|
||||
; CHECK-NEXT: kmovw {{[0-9]+}}(%rsp), %k0 ## 2-byte Reload
|
||||
; CHECK-NEXT: kmovw {{[-0-9]+}}(%r{{[sb]}}p), %k0 ## 2-byte Reload
|
||||
; CHECK-NEXT: vpmovm2d %k0, %xmm0
|
||||
; CHECK-NEXT: popq %rax
|
||||
; CHECK-NEXT: retq
|
||||
|
@ -32,10 +32,10 @@ define <8 x i1> @test_8i1(<8 x i32> %a, <8 x i32> %b) {
|
|||
; CHECK-NEXT: vpcmpnleud %ymm1, %ymm0, %k0
|
||||
; CHECK-NEXT: vpcmpgtd %ymm1, %ymm0, %k1
|
||||
; CHECK-NEXT: korb %k1, %k0, %k0
|
||||
; CHECK-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) ## 2-byte Spill
|
||||
; CHECK-NEXT: kmovw %k0, {{[-0-9]+}}(%r{{[sb]}}p) ## 2-byte Spill
|
||||
; CHECK-NEXT: vzeroupper
|
||||
; CHECK-NEXT: callq _f
|
||||
; CHECK-NEXT: kmovw {{[0-9]+}}(%rsp), %k0 ## 2-byte Reload
|
||||
; CHECK-NEXT: kmovw {{[-0-9]+}}(%r{{[sb]}}p), %k0 ## 2-byte Reload
|
||||
; CHECK-NEXT: vpmovm2w %k0, %xmm0
|
||||
; CHECK-NEXT: popq %rax
|
||||
; CHECK-NEXT: retq
|
||||
|
@ -55,10 +55,10 @@ define <16 x i1> @test_16i1(<16 x i32> %a, <16 x i32> %b) {
|
|||
; CHECK-NEXT: vpcmpnleud %zmm1, %zmm0, %k0
|
||||
; CHECK-NEXT: vpcmpgtd %zmm1, %zmm0, %k1
|
||||
; CHECK-NEXT: korw %k1, %k0, %k0
|
||||
; CHECK-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) ## 2-byte Spill
|
||||
; CHECK-NEXT: kmovw %k0, {{[-0-9]+}}(%r{{[sb]}}p) ## 2-byte Spill
|
||||
; CHECK-NEXT: vzeroupper
|
||||
; CHECK-NEXT: callq _f
|
||||
; CHECK-NEXT: kmovw {{[0-9]+}}(%rsp), %k0 ## 2-byte Reload
|
||||
; CHECK-NEXT: kmovw {{[-0-9]+}}(%r{{[sb]}}p), %k0 ## 2-byte Reload
|
||||
; CHECK-NEXT: vpmovm2b %k0, %xmm0
|
||||
; CHECK-NEXT: popq %rax
|
||||
; CHECK-NEXT: retq
|
||||
|
@ -77,10 +77,10 @@ define <32 x i1> @test_32i1(<32 x i16> %a, <32 x i16> %b) {
|
|||
; CHECK-NEXT: vpcmpnleuw %zmm1, %zmm0, %k0
|
||||
; CHECK-NEXT: vpcmpgtw %zmm1, %zmm0, %k1
|
||||
; CHECK-NEXT: kord %k1, %k0, %k0
|
||||
; CHECK-NEXT: kmovd %k0, {{[0-9]+}}(%rsp) ## 4-byte Spill
|
||||
; CHECK-NEXT: kmovd %k0, {{[-0-9]+}}(%r{{[sb]}}p) ## 4-byte Spill
|
||||
; CHECK-NEXT: vzeroupper
|
||||
; CHECK-NEXT: callq _f
|
||||
; CHECK-NEXT: kmovd {{[0-9]+}}(%rsp), %k0 ## 4-byte Reload
|
||||
; CHECK-NEXT: kmovd {{[-0-9]+}}(%r{{[sb]}}p), %k0 ## 4-byte Reload
|
||||
; CHECK-NEXT: vpmovm2b %k0, %ymm0
|
||||
; CHECK-NEXT: popq %rax
|
||||
; CHECK-NEXT: retq
|
||||
|
|
|
@ -58,7 +58,7 @@ define <2 x double> @test_int_x86_avx512_mask_vfmadd_sd(<2 x double> %a, <2 x do
|
|||
; CHECK-LABEL: test_int_x86_avx512_mask_vfmadd_sd:
|
||||
; CHECK: ## %bb.0:
|
||||
; CHECK-NEXT: kmovw %esi, %k1
|
||||
; CHECK-NEXT: vfmadd213sd (%rdi), %xmm1, %xmm0 {%k1}
|
||||
; CHECK-NEXT: vfmadd213sd {{.*#+}} xmm0 = (xmm1 * xmm0) + mem
|
||||
; CHECK-NEXT: retq
|
||||
%c.val = load double, double* %c
|
||||
%cv0 = insertelement <2 x double> undef, double %c.val, i32 0
|
||||
|
|
|
@ -1,33 +1,63 @@
|
|||
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
|
||||
; RUN: llc < %s -mtriple=x86_64-- -mattr=+avx512f,+avx512bw | FileCheck %s
|
||||
|
||||
define i32 @f(<16 x float> %A, <16 x float> %AA, i8* %B, <8 x double> %C, <8 x double> %CC, <8 x i64> %E, <8 x i64> %EE, <16 x i32> %F, <16 x i32> %FF, <32 x i16> %G, <32 x i16> %GG, <64 x i8> %H, <64 x i8> %HH, i32 * %loadptr) {
|
||||
; CHECK: vmovntps %z
|
||||
; CHECK-LABEL: f:
|
||||
; CHECK: # %bb.0:
|
||||
; CHECK-NEXT: pushq %rbp
|
||||
; CHECK-NEXT: .cfi_def_cfa_offset 16
|
||||
; CHECK-NEXT: .cfi_offset %rbp, -16
|
||||
; CHECK-NEXT: movq %rsp, %rbp
|
||||
; CHECK-NEXT: .cfi_def_cfa_register %rbp
|
||||
; CHECK-NEXT: andq $-64, %rsp
|
||||
; CHECK-NEXT: subq $64, %rsp
|
||||
; CHECK-NEXT: vmovdqa64 144(%rbp), %zmm8
|
||||
; CHECK-NEXT: vmovdqa64 16(%rbp), %zmm9
|
||||
; CHECK-NEXT: movl (%rsi), %eax
|
||||
; CHECK-NEXT: vaddps %zmm1, %zmm0, %zmm0
|
||||
; CHECK-NEXT: vmovntps %zmm0, (%rdi)
|
||||
; CHECK-NEXT: vpaddq %zmm5, %zmm4, %zmm0
|
||||
; CHECK-NEXT: addl (%rsi), %eax
|
||||
; CHECK-NEXT: vmovntdq %zmm0, (%rdi)
|
||||
; CHECK-NEXT: vaddpd %zmm3, %zmm2, %zmm0
|
||||
; CHECK-NEXT: addl (%rsi), %eax
|
||||
; CHECK-NEXT: vmovntpd %zmm0, (%rdi)
|
||||
; CHECK-NEXT: vpaddd %zmm7, %zmm6, %zmm0
|
||||
; CHECK-NEXT: addl (%rsi), %eax
|
||||
; CHECK-NEXT: vmovntdq %zmm0, (%rdi)
|
||||
; CHECK-NEXT: vpaddw 80(%rbp), %zmm9, %zmm0
|
||||
; CHECK-NEXT: addl (%rsi), %eax
|
||||
; CHECK-NEXT: vmovntdq %zmm0, (%rdi)
|
||||
; CHECK-NEXT: vpaddb 208(%rbp), %zmm8, %zmm0
|
||||
; CHECK-NEXT: addl (%rsi), %eax
|
||||
; CHECK-NEXT: vmovntdq %zmm0, (%rdi)
|
||||
; CHECK-NEXT: addl (%rsi), %eax
|
||||
; CHECK-NEXT: movq %rbp, %rsp
|
||||
; CHECK-NEXT: popq %rbp
|
||||
; CHECK-NEXT: .cfi_def_cfa %rsp, 8
|
||||
; CHECK-NEXT: vzeroupper
|
||||
; CHECK-NEXT: retq
|
||||
%v0 = load i32, i32* %loadptr, align 1
|
||||
%cast = bitcast i8* %B to <16 x float>*
|
||||
%A2 = fadd <16 x float> %A, %AA
|
||||
store <16 x float> %A2, <16 x float>* %cast, align 64, !nontemporal !0
|
||||
%v1 = load i32, i32* %loadptr, align 1
|
||||
; CHECK: vmovntdq %z
|
||||
%cast1 = bitcast i8* %B to <8 x i64>*
|
||||
%E2 = add <8 x i64> %E, %EE
|
||||
store <8 x i64> %E2, <8 x i64>* %cast1, align 64, !nontemporal !0
|
||||
%v2 = load i32, i32* %loadptr, align 1
|
||||
; CHECK: vmovntpd %z
|
||||
%cast2 = bitcast i8* %B to <8 x double>*
|
||||
%C2 = fadd <8 x double> %C, %CC
|
||||
store <8 x double> %C2, <8 x double>* %cast2, align 64, !nontemporal !0
|
||||
%v3 = load i32, i32* %loadptr, align 1
|
||||
; CHECK: vmovntdq %z
|
||||
%cast3 = bitcast i8* %B to <16 x i32>*
|
||||
%F2 = add <16 x i32> %F, %FF
|
||||
store <16 x i32> %F2, <16 x i32>* %cast3, align 64, !nontemporal !0
|
||||
%v4 = load i32, i32* %loadptr, align 1
|
||||
; CHECK: vmovntdq %z
|
||||
%cast4 = bitcast i8* %B to <32 x i16>*
|
||||
%G2 = add <32 x i16> %G, %GG
|
||||
store <32 x i16> %G2, <32 x i16>* %cast4, align 64, !nontemporal !0
|
||||
%v5 = load i32, i32* %loadptr, align 1
|
||||
; CHECK: vmovntdq %z
|
||||
%cast5 = bitcast i8* %B to <64 x i8>*
|
||||
%H2 = add <64 x i8> %H, %HH
|
||||
store <64 x i8> %H2, <64 x i8>* %cast5, align 64, !nontemporal !0
|
||||
|
|
|
@ -8,7 +8,7 @@ define <4 x float>@test_var_mask(<4 x float> %v0, <4 x float> %v1, <4 x float> %
|
|||
; CHECK-LABEL: test_var_mask:
|
||||
; CHECK: ## %bb.0:
|
||||
; CHECK-NEXT: kmovw %edi, %k1
|
||||
; CHECK-NEXT: vfmadd213ss %xmm2, %xmm1, %xmm0 {%k1}
|
||||
; CHECK-NEXT: vfmadd213ss {{.*#+}} xmm0 = (xmm1 * xmm0) + xmm2
|
||||
; CHECK-NEXT: retq
|
||||
%res = call <4 x float> @llvm.x86.avx512.mask.vfmadd.ss(<4 x float> %v0,<4 x float> %v1, <4 x float> %v2, i8 %mask, i32 4)
|
||||
ret < 4 x float> %res
|
||||
|
@ -18,7 +18,7 @@ define <4 x float>@test_var_maskz(<4 x float> %v0, <4 x float> %v1, <4 x float>
|
|||
; CHECK-LABEL: test_var_maskz:
|
||||
; CHECK: ## %bb.0:
|
||||
; CHECK-NEXT: kmovw %edi, %k1
|
||||
; CHECK-NEXT: vfmadd213ss %xmm2, %xmm1, %xmm0 {%k1} {z}
|
||||
; CHECK-NEXT: vfmadd213ss {{.*#+}} xmm0 = (xmm1 * xmm0) + xmm2
|
||||
; CHECK-NEXT: retq
|
||||
%res = call <4 x float> @llvm.x86.avx512.maskz.vfmadd.ss(<4 x float> %v0,<4 x float> %v1, <4 x float> %v2, i8 %mask, i32 4)
|
||||
ret < 4 x float> %res
|
||||
|
@ -67,7 +67,7 @@ define <4 x float>@test_const2_maskz(<4 x float> %v0, <4 x float> %v1, <4 x floa
|
|||
define <4 x float>@test_const_allone_mask(<4 x float> %v0, <4 x float> %v1, <4 x float> %v2) {
|
||||
; CHECK-LABEL: test_const_allone_mask:
|
||||
; CHECK: ## %bb.0:
|
||||
; CHECK-NEXT: vfmadd213ss %xmm2, %xmm1, %xmm0
|
||||
; CHECK-NEXT: vfmadd213ss {{.*#+}} xmm0 = (xmm1 * xmm0) + xmm2
|
||||
; CHECK-NEXT: retq
|
||||
%res = call <4 x float> @llvm.x86.avx512.mask.vfmadd.ss(<4 x float> %v0,<4 x float> %v1, <4 x float> %v2, i8 -1, i32 4)
|
||||
ret < 4 x float> %res
|
||||
|
@ -76,7 +76,7 @@ define <4 x float>@test_const_allone_mask(<4 x float> %v0, <4 x float> %v1, <4 x
|
|||
define <4 x float>@test_const_allone_maskz(<4 x float> %v0, <4 x float> %v1, <4 x float> %v2) {
|
||||
; CHECK-LABEL: test_const_allone_maskz:
|
||||
; CHECK: ## %bb.0:
|
||||
; CHECK-NEXT: vfmadd213ss %xmm2, %xmm1, %xmm0
|
||||
; CHECK-NEXT: vfmadd213ss {{.*#+}} xmm0 = (xmm1 * xmm0) + xmm2
|
||||
; CHECK-NEXT: retq
|
||||
%res = call <4 x float> @llvm.x86.avx512.maskz.vfmadd.ss(<4 x float> %v0,<4 x float> %v1, <4 x float> %v2, i8 -1, i32 4)
|
||||
ret < 4 x float> %res
|
||||
|
@ -85,7 +85,7 @@ define <4 x float>@test_const_allone_maskz(<4 x float> %v0, <4 x float> %v1, <4
|
|||
define <4 x float>@test_const_3_mask(<4 x float> %v0, <4 x float> %v1, <4 x float> %v2) {
|
||||
; CHECK-LABEL: test_const_3_mask:
|
||||
; CHECK: ## %bb.0:
|
||||
; CHECK-NEXT: vfmadd213ss %xmm2, %xmm1, %xmm0
|
||||
; CHECK-NEXT: vfmadd213ss {{.*#+}} xmm0 = (xmm1 * xmm0) + xmm2
|
||||
; CHECK-NEXT: retq
|
||||
%res = call <4 x float> @llvm.x86.avx512.mask.vfmadd.ss(<4 x float> %v0,<4 x float> %v1, <4 x float> %v2, i8 3, i32 4)
|
||||
ret < 4 x float> %res
|
||||
|
@ -94,7 +94,7 @@ define <4 x float>@test_const_3_mask(<4 x float> %v0, <4 x float> %v1, <4 x floa
|
|||
define <4 x float>@test_const_3_maskz(<4 x float> %v0, <4 x float> %v1, <4 x float> %v2) {
|
||||
; CHECK-LABEL: test_const_3_maskz:
|
||||
; CHECK: ## %bb.0:
|
||||
; CHECK-NEXT: vfmadd213ss %xmm2, %xmm1, %xmm0
|
||||
; CHECK-NEXT: vfmadd213ss {{.*#+}} xmm0 = (xmm1 * xmm0) + xmm2
|
||||
; CHECK-NEXT: retq
|
||||
%res = call <4 x float> @llvm.x86.avx512.maskz.vfmadd.ss(<4 x float> %v0,<4 x float> %v1, <4 x float> %v2, i8 3, i32 4)
|
||||
ret < 4 x float> %res
|
||||
|
|
|
@ -1,17 +1,29 @@
|
|||
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
|
||||
; RUN: llc < %s -mtriple=x86_64-apple-darwin -mcpu=skx --show-mc-encoding | FileCheck %s
|
||||
|
||||
define i32 @f256(<8 x float> %A, <8 x float> %AA, i8* %B, <4 x double> %C, <4 x double> %CC, i32 %D, <4 x i64> %E, <4 x i64> %EE, i32* %loadptr) {
|
||||
; CHECK: vmovntps %ymm{{.*}} ## EVEX TO VEX Compression encoding: [0xc5
|
||||
; CHECK-LABEL: f256:
|
||||
; CHECK: ## %bb.0:
|
||||
; CHECK-NEXT: movl (%rdx), %eax ## encoding: [0x8b,0x02]
|
||||
; CHECK-NEXT: vaddps %ymm1, %ymm0, %ymm0 ## EVEX TO VEX Compression encoding: [0xc5,0xfc,0x58,0xc1]
|
||||
; CHECK-NEXT: vmovntps %ymm0, (%rdi) ## EVEX TO VEX Compression encoding: [0xc5,0xfc,0x2b,0x07]
|
||||
; CHECK-NEXT: vpaddq %ymm5, %ymm4, %ymm0 ## EVEX TO VEX Compression encoding: [0xc5,0xdd,0xd4,0xc5]
|
||||
; CHECK-NEXT: addl (%rdx), %eax ## encoding: [0x03,0x02]
|
||||
; CHECK-NEXT: vmovntdq %ymm0, (%rdi) ## EVEX TO VEX Compression encoding: [0xc5,0xfd,0xe7,0x07]
|
||||
; CHECK-NEXT: vaddpd %ymm3, %ymm2, %ymm0 ## EVEX TO VEX Compression encoding: [0xc5,0xed,0x58,0xc3]
|
||||
; CHECK-NEXT: addl (%rdx), %eax ## encoding: [0x03,0x02]
|
||||
; CHECK-NEXT: vmovntpd %ymm0, (%rdi) ## EVEX TO VEX Compression encoding: [0xc5,0xfd,0x2b,0x07]
|
||||
; CHECK-NEXT: addl (%rdx), %eax ## encoding: [0x03,0x02]
|
||||
; CHECK-NEXT: vzeroupper ## encoding: [0xc5,0xf8,0x77]
|
||||
; CHECK-NEXT: retq ## encoding: [0xc3]
|
||||
%v0 = load i32, i32* %loadptr, align 1
|
||||
%cast = bitcast i8* %B to <8 x float>*
|
||||
%A2 = fadd <8 x float> %A, %AA
|
||||
store <8 x float> %A2, <8 x float>* %cast, align 64, !nontemporal !0
|
||||
; CHECK: vmovntdq %ymm{{.*}} ## EVEX TO VEX Compression encoding: [0xc5
|
||||
%v1 = load i32, i32* %loadptr, align 1
|
||||
%cast1 = bitcast i8* %B to <4 x i64>*
|
||||
%E2 = add <4 x i64> %E, %EE
|
||||
store <4 x i64> %E2, <4 x i64>* %cast1, align 64, !nontemporal !0
|
||||
; CHECK: vmovntpd %ymm{{.*}} ## EVEX TO VEX Compression encoding: [0xc5
|
||||
%v2 = load i32, i32* %loadptr, align 1
|
||||
%cast2 = bitcast i8* %B to <4 x double>*
|
||||
%C2 = fadd <4 x double> %C, %CC
|
||||
|
@ -24,17 +36,27 @@ define i32 @f256(<8 x float> %A, <8 x float> %AA, i8* %B, <4 x double> %C, <4 x
|
|||
}
|
||||
|
||||
define i32 @f128(<4 x float> %A, <4 x float> %AA, i8* %B, <2 x double> %C, <2 x double> %CC, i32 %D, <2 x i64> %E, <2 x i64> %EE, i32* %loadptr) {
|
||||
; CHECK-LABEL: f128:
|
||||
; CHECK: ## %bb.0:
|
||||
; CHECK-NEXT: movl (%rdx), %eax ## encoding: [0x8b,0x02]
|
||||
; CHECK-NEXT: vaddps %xmm1, %xmm0, %xmm0 ## EVEX TO VEX Compression encoding: [0xc5,0xf8,0x58,0xc1]
|
||||
; CHECK-NEXT: vmovntps %xmm0, (%rdi) ## EVEX TO VEX Compression encoding: [0xc5,0xf8,0x2b,0x07]
|
||||
; CHECK-NEXT: vpaddq %xmm5, %xmm4, %xmm0 ## EVEX TO VEX Compression encoding: [0xc5,0xd9,0xd4,0xc5]
|
||||
; CHECK-NEXT: addl (%rdx), %eax ## encoding: [0x03,0x02]
|
||||
; CHECK-NEXT: vmovntdq %xmm0, (%rdi) ## EVEX TO VEX Compression encoding: [0xc5,0xf9,0xe7,0x07]
|
||||
; CHECK-NEXT: vaddpd %xmm3, %xmm2, %xmm0 ## EVEX TO VEX Compression encoding: [0xc5,0xe9,0x58,0xc3]
|
||||
; CHECK-NEXT: addl (%rdx), %eax ## encoding: [0x03,0x02]
|
||||
; CHECK-NEXT: vmovntpd %xmm0, (%rdi) ## EVEX TO VEX Compression encoding: [0xc5,0xf9,0x2b,0x07]
|
||||
; CHECK-NEXT: addl (%rdx), %eax ## encoding: [0x03,0x02]
|
||||
; CHECK-NEXT: retq ## encoding: [0xc3]
|
||||
%v0 = load i32, i32* %loadptr, align 1
|
||||
; CHECK: vmovntps %xmm{{.*}} ## EVEX TO VEX Compression encoding: [0xc5
|
||||
%cast = bitcast i8* %B to <4 x float>*
|
||||
%A2 = fadd <4 x float> %A, %AA
|
||||
store <4 x float> %A2, <4 x float>* %cast, align 64, !nontemporal !0
|
||||
; CHECK: vmovntdq %xmm{{.*}} ## EVEX TO VEX Compression encoding: [0xc5
|
||||
%v1 = load i32, i32* %loadptr, align 1
|
||||
%cast1 = bitcast i8* %B to <2 x i64>*
|
||||
%E2 = add <2 x i64> %E, %EE
|
||||
store <2 x i64> %E2, <2 x i64>* %cast1, align 64, !nontemporal !0
|
||||
; CHECK: vmovntpd %xmm{{.*}} ## EVEX TO VEX Compression encoding: [0xc5
|
||||
%v2 = load i32, i32* %loadptr, align 1
|
||||
%cast2 = bitcast i8* %B to <2 x double>*
|
||||
%C2 = fadd <2 x double> %C, %CC
|
||||
|
|
Loading…
Reference in New Issue