llvm-project/llvm/test/CodeGen/X86/avx512-regcall-Mask.ll

1010 lines
38 KiB
LLVM

; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
; RUN: llc < %s -mtriple=i386-pc-win32 -mattr=+avx512bw | FileCheck %s --check-prefix=X32
; RUN: llc < %s -mtriple=x86_64-win32 -mattr=+avx512bw | FileCheck %s --check-prefix=CHECK64 --check-prefix=WIN64
; RUN: llc < %s -mtriple=x86_64-linux-gnu -mattr=+avx512bw | FileCheck %s --check-prefix=CHECK64 --check-prefix=LINUXOSX64
; Test regcall when receiving arguments of v64i1 type
define x86_regcallcc i64 @test_argv64i1(<64 x i1> %x0, <64 x i1> %x1, <64 x i1> %x2, <64 x i1> %x3, <64 x i1> %x4, <64 x i1> %x5, <64 x i1> %x6, <64 x i1> %x7, <64 x i1> %x8, <64 x i1> %x9, <64 x i1> %x10, <64 x i1> %x11, <64 x i1> %x12) {
; X32-LABEL: test_argv64i1:
; X32: # %bb.0:
; X32-NEXT: addl %edx, %eax
; X32-NEXT: adcl %edi, %ecx
; X32-NEXT: addl {{[0-9]+}}(%esp), %eax
; X32-NEXT: adcl {{[0-9]+}}(%esp), %ecx
; X32-NEXT: addl {{[0-9]+}}(%esp), %eax
; X32-NEXT: adcl {{[0-9]+}}(%esp), %ecx
; X32-NEXT: addl {{[0-9]+}}(%esp), %eax
; X32-NEXT: adcl {{[0-9]+}}(%esp), %ecx
; X32-NEXT: addl {{[0-9]+}}(%esp), %eax
; X32-NEXT: adcl {{[0-9]+}}(%esp), %ecx
; X32-NEXT: addl {{[0-9]+}}(%esp), %eax
; X32-NEXT: adcl {{[0-9]+}}(%esp), %ecx
; X32-NEXT: addl {{[0-9]+}}(%esp), %eax
; X32-NEXT: adcl {{[0-9]+}}(%esp), %ecx
; X32-NEXT: addl {{[0-9]+}}(%esp), %eax
; X32-NEXT: adcl {{[0-9]+}}(%esp), %ecx
; X32-NEXT: addl {{[0-9]+}}(%esp), %eax
; X32-NEXT: adcl {{[0-9]+}}(%esp), %ecx
; X32-NEXT: addl {{[0-9]+}}(%esp), %eax
; X32-NEXT: adcl {{[0-9]+}}(%esp), %ecx
; X32-NEXT: addl {{[0-9]+}}(%esp), %eax
; X32-NEXT: adcl {{[0-9]+}}(%esp), %ecx
; X32-NEXT: addl {{[0-9]+}}(%esp), %eax
; X32-NEXT: adcl {{[0-9]+}}(%esp), %ecx
; X32-NEXT: retl
;
; WIN64-LABEL: test_argv64i1:
; WIN64: # %bb.0:
; WIN64-NEXT: addq %rcx, %rax
; WIN64-NEXT: addq %rdx, %rax
; WIN64-NEXT: addq %rdi, %rax
; WIN64-NEXT: addq %rsi, %rax
; WIN64-NEXT: addq %r8, %rax
; WIN64-NEXT: addq %r9, %rax
; WIN64-NEXT: addq %r10, %rax
; WIN64-NEXT: addq %r11, %rax
; WIN64-NEXT: addq %r12, %rax
; WIN64-NEXT: addq %r14, %rax
; WIN64-NEXT: addq %r15, %rax
; WIN64-NEXT: addq {{[0-9]+}}(%rsp), %rax
; WIN64-NEXT: retq
;
; LINUXOSX64-LABEL: test_argv64i1:
; LINUXOSX64: # %bb.0:
; LINUXOSX64-NEXT: addq %rcx, %rax
; LINUXOSX64-NEXT: addq %rdx, %rax
; LINUXOSX64-NEXT: addq %rdi, %rax
; LINUXOSX64-NEXT: addq %rsi, %rax
; LINUXOSX64-NEXT: addq %r8, %rax
; LINUXOSX64-NEXT: addq %r9, %rax
; LINUXOSX64-NEXT: addq %r12, %rax
; LINUXOSX64-NEXT: addq %r13, %rax
; LINUXOSX64-NEXT: addq %r14, %rax
; LINUXOSX64-NEXT: addq %r15, %rax
; LINUXOSX64-NEXT: addq {{[0-9]+}}(%rsp), %rax
; LINUXOSX64-NEXT: addq {{[0-9]+}}(%rsp), %rax
; LINUXOSX64-NEXT: retq
%y0 = bitcast <64 x i1> %x0 to i64
%y1 = bitcast <64 x i1> %x1 to i64
%y2 = bitcast <64 x i1> %x2 to i64
%y3 = bitcast <64 x i1> %x3 to i64
%y4 = bitcast <64 x i1> %x4 to i64
%y5 = bitcast <64 x i1> %x5 to i64
%y6 = bitcast <64 x i1> %x6 to i64
%y7 = bitcast <64 x i1> %x7 to i64
%y8 = bitcast <64 x i1> %x8 to i64
%y9 = bitcast <64 x i1> %x9 to i64
%y10 = bitcast <64 x i1> %x10 to i64
%y11 = bitcast <64 x i1> %x11 to i64
%y12 = bitcast <64 x i1> %x12 to i64
%add1 = add i64 %y0, %y1
%add2 = add i64 %add1, %y2
%add3 = add i64 %add2, %y3
%add4 = add i64 %add3, %y4
%add5 = add i64 %add4, %y5
%add6 = add i64 %add5, %y6
%add7 = add i64 %add6, %y7
%add8 = add i64 %add7, %y8
%add9 = add i64 %add8, %y9
%add10 = add i64 %add9, %y10
%add11 = add i64 %add10, %y11
%add12 = add i64 %add11, %y12
ret i64 %add12
}
; Test regcall when passing arguments of v64i1 type
define i64 @caller_argv64i1() #0 {
; X32-LABEL: caller_argv64i1:
; X32: # %bb.0: # %entry
; X32-NEXT: pushl %edi
; X32-NEXT: subl $88, %esp
; X32-NEXT: vmovaps {{.*#+}} xmm0 = [2,1,2,1]
; X32-NEXT: vmovups %xmm0, {{[0-9]+}}(%esp)
; X32-NEXT: vmovaps {{.*#+}} zmm0 = [2,1,2,1,2,1,2,1,2,1,2,1,2,1,2,1]
; X32-NEXT: vmovups %zmm0, (%esp)
; X32-NEXT: movl $1, {{[0-9]+}}(%esp)
; X32-NEXT: movl $2, {{[0-9]+}}(%esp)
; X32-NEXT: movl $2, %eax
; X32-NEXT: movl $1, %ecx
; X32-NEXT: movl $2, %edx
; X32-NEXT: movl $1, %edi
; X32-NEXT: vzeroupper
; X32-NEXT: calll _test_argv64i1
; X32-NEXT: movl %ecx, %edx
; X32-NEXT: addl $88, %esp
; X32-NEXT: popl %edi
; X32-NEXT: retl
;
; WIN64-LABEL: caller_argv64i1:
; WIN64: # %bb.0: # %entry
; WIN64-NEXT: pushq %r15
; WIN64-NEXT: .seh_pushreg %r15
; WIN64-NEXT: pushq %r14
; WIN64-NEXT: .seh_pushreg %r14
; WIN64-NEXT: pushq %r12
; WIN64-NEXT: .seh_pushreg %r12
; WIN64-NEXT: pushq %rsi
; WIN64-NEXT: .seh_pushreg %rsi
; WIN64-NEXT: pushq %rdi
; WIN64-NEXT: .seh_pushreg %rdi
; WIN64-NEXT: subq $48, %rsp
; WIN64-NEXT: .seh_stackalloc 48
; WIN64-NEXT: vmovaps %xmm7, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
; WIN64-NEXT: .seh_savexmm %xmm7, 32
; WIN64-NEXT: vmovaps %xmm6, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
; WIN64-NEXT: .seh_savexmm %xmm6, 16
; WIN64-NEXT: .seh_endprologue
; WIN64-NEXT: movabsq $4294967298, %rax # imm = 0x100000002
; WIN64-NEXT: movq %rax, (%rsp)
; WIN64-NEXT: movq %rax, %rcx
; WIN64-NEXT: movq %rax, %rdx
; WIN64-NEXT: movq %rax, %rdi
; WIN64-NEXT: movq %rax, %r8
; WIN64-NEXT: movq %rax, %r9
; WIN64-NEXT: movq %rax, %r10
; WIN64-NEXT: movq %rax, %r11
; WIN64-NEXT: movq %rax, %r12
; WIN64-NEXT: movq %rax, %r14
; WIN64-NEXT: movq %rax, %r15
; WIN64-NEXT: movq %rax, %rsi
; WIN64-NEXT: callq test_argv64i1
; WIN64-NEXT: vmovaps {{[-0-9]+}}(%r{{[sb]}}p), %xmm6 # 16-byte Reload
; WIN64-NEXT: vmovaps {{[-0-9]+}}(%r{{[sb]}}p), %xmm7 # 16-byte Reload
; WIN64-NEXT: addq $48, %rsp
; WIN64-NEXT: popq %rdi
; WIN64-NEXT: popq %rsi
; WIN64-NEXT: popq %r12
; WIN64-NEXT: popq %r14
; WIN64-NEXT: popq %r15
; WIN64-NEXT: retq
; WIN64-NEXT: .seh_handlerdata
; WIN64-NEXT: .text
; WIN64-NEXT: .seh_endproc
;
; LINUXOSX64-LABEL: caller_argv64i1:
; LINUXOSX64: # %bb.0: # %entry
; LINUXOSX64-NEXT: pushq %r15
; LINUXOSX64-NEXT: .cfi_def_cfa_offset 16
; LINUXOSX64-NEXT: pushq %r14
; LINUXOSX64-NEXT: .cfi_def_cfa_offset 24
; LINUXOSX64-NEXT: pushq %r13
; LINUXOSX64-NEXT: .cfi_def_cfa_offset 32
; LINUXOSX64-NEXT: pushq %r12
; LINUXOSX64-NEXT: .cfi_def_cfa_offset 40
; LINUXOSX64-NEXT: pushq %rax
; LINUXOSX64-NEXT: .cfi_def_cfa_offset 48
; LINUXOSX64-NEXT: .cfi_offset %r12, -40
; LINUXOSX64-NEXT: .cfi_offset %r13, -32
; LINUXOSX64-NEXT: .cfi_offset %r14, -24
; LINUXOSX64-NEXT: .cfi_offset %r15, -16
; LINUXOSX64-NEXT: movabsq $4294967298, %rax # imm = 0x100000002
; LINUXOSX64-NEXT: movq %rax, %rcx
; LINUXOSX64-NEXT: movq %rax, %rdx
; LINUXOSX64-NEXT: movq %rax, %rdi
; LINUXOSX64-NEXT: movq %rax, %r8
; LINUXOSX64-NEXT: movq %rax, %r9
; LINUXOSX64-NEXT: movq %rax, %r12
; LINUXOSX64-NEXT: movq %rax, %r13
; LINUXOSX64-NEXT: movq %rax, %r14
; LINUXOSX64-NEXT: movq %rax, %r15
; LINUXOSX64-NEXT: movq %rax, %rsi
; LINUXOSX64-NEXT: pushq %rax
; LINUXOSX64-NEXT: .cfi_adjust_cfa_offset 8
; LINUXOSX64-NEXT: pushq %rax
; LINUXOSX64-NEXT: .cfi_adjust_cfa_offset 8
; LINUXOSX64-NEXT: callq test_argv64i1
; LINUXOSX64-NEXT: addq $24, %rsp
; LINUXOSX64-NEXT: .cfi_adjust_cfa_offset -24
; LINUXOSX64-NEXT: popq %r12
; LINUXOSX64-NEXT: .cfi_def_cfa_offset 32
; LINUXOSX64-NEXT: popq %r13
; LINUXOSX64-NEXT: .cfi_def_cfa_offset 24
; LINUXOSX64-NEXT: popq %r14
; LINUXOSX64-NEXT: .cfi_def_cfa_offset 16
; LINUXOSX64-NEXT: popq %r15
; LINUXOSX64-NEXT: .cfi_def_cfa_offset 8
; LINUXOSX64-NEXT: retq
entry:
%v0 = bitcast i64 4294967298 to <64 x i1>
%call = call x86_regcallcc i64 @test_argv64i1(<64 x i1> %v0, <64 x i1> %v0, <64 x i1> %v0,
<64 x i1> %v0, <64 x i1> %v0, <64 x i1> %v0,
<64 x i1> %v0, <64 x i1> %v0, <64 x i1> %v0,
<64 x i1> %v0, <64 x i1> %v0, <64 x i1> %v0,
<64 x i1> %v0)
ret i64 %call
}
; Test regcall when returning v64i1 type
define x86_regcallcc <64 x i1> @test_retv64i1() {
; X32-LABEL: test_retv64i1:
; X32: # %bb.0:
; X32-NEXT: movl $2, %eax
; X32-NEXT: movl $1, %ecx
; X32-NEXT: retl
;
; CHECK64-LABEL: test_retv64i1:
; CHECK64: # %bb.0:
; CHECK64-NEXT: movabsq $4294967298, %rax # imm = 0x100000002
; CHECK64-NEXT: retq
%a = bitcast i64 4294967298 to <64 x i1>
ret <64 x i1> %a
}
; Test regcall when processing result of v64i1 type
define <64 x i1> @caller_retv64i1() #0 {
; X32-LABEL: caller_retv64i1:
; X32: # %bb.0: # %entry
; X32-NEXT: calll _test_retv64i1
; X32-NEXT: kmovd %eax, %k0
; X32-NEXT: kmovd %ecx, %k1
; X32-NEXT: kunpckdq %k0, %k1, %k0
; X32-NEXT: vpmovm2b %k0, %zmm0
; X32-NEXT: retl
;
; WIN64-LABEL: caller_retv64i1:
; WIN64: # %bb.0: # %entry
; WIN64-NEXT: pushq %rsi
; WIN64-NEXT: .seh_pushreg %rsi
; WIN64-NEXT: pushq %rdi
; WIN64-NEXT: .seh_pushreg %rdi
; WIN64-NEXT: subq $40, %rsp
; WIN64-NEXT: .seh_stackalloc 40
; WIN64-NEXT: vmovaps %xmm7, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
; WIN64-NEXT: .seh_savexmm %xmm7, 16
; WIN64-NEXT: vmovaps %xmm6, (%rsp) # 16-byte Spill
; WIN64-NEXT: .seh_savexmm %xmm6, 0
; WIN64-NEXT: .seh_endprologue
; WIN64-NEXT: callq test_retv64i1
; WIN64-NEXT: kmovq %rax, %k0
; WIN64-NEXT: vpmovm2b %k0, %zmm0
; WIN64-NEXT: vmovaps (%rsp), %xmm6 # 16-byte Reload
; WIN64-NEXT: vmovaps {{[-0-9]+}}(%r{{[sb]}}p), %xmm7 # 16-byte Reload
; WIN64-NEXT: addq $40, %rsp
; WIN64-NEXT: popq %rdi
; WIN64-NEXT: popq %rsi
; WIN64-NEXT: retq
; WIN64-NEXT: .seh_handlerdata
; WIN64-NEXT: .text
; WIN64-NEXT: .seh_endproc
;
; LINUXOSX64-LABEL: caller_retv64i1:
; LINUXOSX64: # %bb.0: # %entry
; LINUXOSX64-NEXT: pushq %rax
; LINUXOSX64-NEXT: .cfi_def_cfa_offset 16
; LINUXOSX64-NEXT: callq test_retv64i1
; LINUXOSX64-NEXT: kmovq %rax, %k0
; LINUXOSX64-NEXT: vpmovm2b %k0, %zmm0
; LINUXOSX64-NEXT: popq %rax
; LINUXOSX64-NEXT: .cfi_def_cfa_offset 8
; LINUXOSX64-NEXT: retq
entry:
%call = call x86_regcallcc <64 x i1> @test_retv64i1()
ret <64 x i1> %call
}
; Test regcall when receiving arguments of v32i1 type
declare i32 @test_argv32i1helper(<32 x i1> %x0, <32 x i1> %x1, <32 x i1> %x2)
define x86_regcallcc i32 @test_argv32i1(<32 x i1> %x0, <32 x i1> %x1, <32 x i1> %x2) {
; X32-LABEL: test_argv32i1:
; X32: # %bb.0: # %entry
; X32-NEXT: pushl %esp
; X32-NEXT: subl $72, %esp
; X32-NEXT: vmovups %xmm7, {{[-0-9]+}}(%e{{[sb]}}p) # 16-byte Spill
; X32-NEXT: vmovups %xmm6, {{[-0-9]+}}(%e{{[sb]}}p) # 16-byte Spill
; X32-NEXT: vmovups %xmm5, {{[-0-9]+}}(%e{{[sb]}}p) # 16-byte Spill
; X32-NEXT: vmovups %xmm4, (%esp) # 16-byte Spill
; X32-NEXT: kmovd %edx, %k0
; X32-NEXT: kmovd %ecx, %k1
; X32-NEXT: kmovd %eax, %k2
; X32-NEXT: vpmovm2b %k2, %zmm0
; X32-NEXT: vpmovm2b %k1, %zmm1
; X32-NEXT: vpmovm2b %k0, %zmm2
; X32-NEXT: # kill: def $ymm0 killed $ymm0 killed $zmm0
; X32-NEXT: # kill: def $ymm1 killed $ymm1 killed $zmm1
; X32-NEXT: # kill: def $ymm2 killed $ymm2 killed $zmm2
; X32-NEXT: calll _test_argv32i1helper
; X32-NEXT: vmovups (%esp), %xmm4 # 16-byte Reload
; X32-NEXT: vmovups {{[-0-9]+}}(%e{{[sb]}}p), %xmm5 # 16-byte Reload
; X32-NEXT: vmovups {{[-0-9]+}}(%e{{[sb]}}p), %xmm6 # 16-byte Reload
; X32-NEXT: vmovups {{[-0-9]+}}(%e{{[sb]}}p), %xmm7 # 16-byte Reload
; X32-NEXT: addl $72, %esp
; X32-NEXT: popl %esp
; X32-NEXT: vzeroupper
; X32-NEXT: retl
;
; WIN64-LABEL: test_argv32i1:
; WIN64: # %bb.0: # %entry
; WIN64-NEXT: pushq %rbp
; WIN64-NEXT: .seh_pushreg %rbp
; WIN64-NEXT: pushq %r11
; WIN64-NEXT: .seh_pushreg %r11
; WIN64-NEXT: pushq %r10
; WIN64-NEXT: .seh_pushreg %r10
; WIN64-NEXT: pushq %rsp
; WIN64-NEXT: .seh_pushreg %rsp
; WIN64-NEXT: subq $152, %rsp
; WIN64-NEXT: .seh_stackalloc 152
; WIN64-NEXT: leaq {{[0-9]+}}(%rsp), %rbp
; WIN64-NEXT: .seh_setframe %rbp, 128
; WIN64-NEXT: .seh_endprologue
; WIN64-NEXT: andq $-32, %rsp
; WIN64-NEXT: kmovd %edx, %k0
; WIN64-NEXT: kmovd %eax, %k1
; WIN64-NEXT: kmovd %ecx, %k2
; WIN64-NEXT: vpmovm2b %k2, %zmm0
; WIN64-NEXT: vmovdqa %ymm0, {{[0-9]+}}(%rsp)
; WIN64-NEXT: vpmovm2b %k1, %zmm0
; WIN64-NEXT: vmovdqa %ymm0, {{[0-9]+}}(%rsp)
; WIN64-NEXT: vpmovm2b %k0, %zmm0
; WIN64-NEXT: vmovdqa %ymm0, {{[0-9]+}}(%rsp)
; WIN64-NEXT: leaq {{[0-9]+}}(%rsp), %rcx
; WIN64-NEXT: leaq {{[0-9]+}}(%rsp), %rdx
; WIN64-NEXT: leaq {{[0-9]+}}(%rsp), %r8
; WIN64-NEXT: vzeroupper
; WIN64-NEXT: callq test_argv32i1helper
; WIN64-NEXT: nop
; WIN64-NEXT: leaq 24(%rbp), %rsp
; WIN64-NEXT: popq %rsp
; WIN64-NEXT: popq %r10
; WIN64-NEXT: popq %r11
; WIN64-NEXT: popq %rbp
; WIN64-NEXT: retq
; WIN64-NEXT: .seh_handlerdata
; WIN64-NEXT: .text
; WIN64-NEXT: .seh_endproc
;
; LINUXOSX64-LABEL: test_argv32i1:
; LINUXOSX64: # %bb.0: # %entry
; LINUXOSX64-NEXT: pushq %rsp
; LINUXOSX64-NEXT: .cfi_def_cfa_offset 16
; LINUXOSX64-NEXT: subq $128, %rsp
; LINUXOSX64-NEXT: vmovaps %xmm15, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
; LINUXOSX64-NEXT: vmovaps %xmm14, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
; LINUXOSX64-NEXT: vmovaps %xmm13, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
; LINUXOSX64-NEXT: vmovaps %xmm12, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
; LINUXOSX64-NEXT: vmovaps %xmm11, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
; LINUXOSX64-NEXT: vmovaps %xmm10, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
; LINUXOSX64-NEXT: vmovaps %xmm9, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
; LINUXOSX64-NEXT: vmovaps %xmm8, (%rsp) # 16-byte Spill
; LINUXOSX64-NEXT: .cfi_def_cfa_offset 144
; LINUXOSX64-NEXT: .cfi_offset %rsp, -16
; LINUXOSX64-NEXT: .cfi_offset %xmm8, -144
; LINUXOSX64-NEXT: .cfi_offset %xmm9, -128
; LINUXOSX64-NEXT: .cfi_offset %xmm10, -112
; LINUXOSX64-NEXT: .cfi_offset %xmm11, -96
; LINUXOSX64-NEXT: .cfi_offset %xmm12, -80
; LINUXOSX64-NEXT: .cfi_offset %xmm13, -64
; LINUXOSX64-NEXT: .cfi_offset %xmm14, -48
; LINUXOSX64-NEXT: .cfi_offset %xmm15, -32
; LINUXOSX64-NEXT: kmovd %edx, %k0
; LINUXOSX64-NEXT: kmovd %ecx, %k1
; LINUXOSX64-NEXT: kmovd %eax, %k2
; LINUXOSX64-NEXT: vpmovm2b %k2, %zmm0
; LINUXOSX64-NEXT: vpmovm2b %k1, %zmm1
; LINUXOSX64-NEXT: vpmovm2b %k0, %zmm2
; LINUXOSX64-NEXT: # kill: def $ymm0 killed $ymm0 killed $zmm0
; LINUXOSX64-NEXT: # kill: def $ymm1 killed $ymm1 killed $zmm1
; LINUXOSX64-NEXT: # kill: def $ymm2 killed $ymm2 killed $zmm2
; LINUXOSX64-NEXT: callq test_argv32i1helper
; LINUXOSX64-NEXT: vmovaps (%rsp), %xmm8 # 16-byte Reload
; LINUXOSX64-NEXT: vmovaps {{[-0-9]+}}(%r{{[sb]}}p), %xmm9 # 16-byte Reload
; LINUXOSX64-NEXT: vmovaps {{[-0-9]+}}(%r{{[sb]}}p), %xmm10 # 16-byte Reload
; LINUXOSX64-NEXT: vmovaps {{[-0-9]+}}(%r{{[sb]}}p), %xmm11 # 16-byte Reload
; LINUXOSX64-NEXT: vmovaps {{[-0-9]+}}(%r{{[sb]}}p), %xmm12 # 16-byte Reload
; LINUXOSX64-NEXT: vmovaps {{[-0-9]+}}(%r{{[sb]}}p), %xmm13 # 16-byte Reload
; LINUXOSX64-NEXT: vmovaps {{[-0-9]+}}(%r{{[sb]}}p), %xmm14 # 16-byte Reload
; LINUXOSX64-NEXT: vmovaps {{[-0-9]+}}(%r{{[sb]}}p), %xmm15 # 16-byte Reload
; LINUXOSX64-NEXT: addq $128, %rsp
; LINUXOSX64-NEXT: .cfi_def_cfa_offset 16
; LINUXOSX64-NEXT: popq %rsp
; LINUXOSX64-NEXT: .cfi_def_cfa_offset 8
; LINUXOSX64-NEXT: vzeroupper
; LINUXOSX64-NEXT: retq
entry:
%res = call i32 @test_argv32i1helper(<32 x i1> %x0, <32 x i1> %x1, <32 x i1> %x2)
ret i32 %res
}
; Test regcall when passing arguments of v32i1 type
define i32 @caller_argv32i1() #0 {
; X32-LABEL: caller_argv32i1:
; X32: # %bb.0: # %entry
; X32-NEXT: movl $1, %eax
; X32-NEXT: movl $1, %ecx
; X32-NEXT: movl $1, %edx
; X32-NEXT: calll _test_argv32i1
; X32-NEXT: retl
;
; WIN64-LABEL: caller_argv32i1:
; WIN64: # %bb.0: # %entry
; WIN64-NEXT: pushq %rsi
; WIN64-NEXT: .seh_pushreg %rsi
; WIN64-NEXT: pushq %rdi
; WIN64-NEXT: .seh_pushreg %rdi
; WIN64-NEXT: subq $40, %rsp
; WIN64-NEXT: .seh_stackalloc 40
; WIN64-NEXT: vmovaps %xmm7, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
; WIN64-NEXT: .seh_savexmm %xmm7, 16
; WIN64-NEXT: vmovaps %xmm6, (%rsp) # 16-byte Spill
; WIN64-NEXT: .seh_savexmm %xmm6, 0
; WIN64-NEXT: .seh_endprologue
; WIN64-NEXT: movl $1, %eax
; WIN64-NEXT: movl $1, %ecx
; WIN64-NEXT: movl $1, %edx
; WIN64-NEXT: callq test_argv32i1
; WIN64-NEXT: vmovaps (%rsp), %xmm6 # 16-byte Reload
; WIN64-NEXT: vmovaps {{[-0-9]+}}(%r{{[sb]}}p), %xmm7 # 16-byte Reload
; WIN64-NEXT: addq $40, %rsp
; WIN64-NEXT: popq %rdi
; WIN64-NEXT: popq %rsi
; WIN64-NEXT: retq
; WIN64-NEXT: .seh_handlerdata
; WIN64-NEXT: .text
; WIN64-NEXT: .seh_endproc
;
; LINUXOSX64-LABEL: caller_argv32i1:
; LINUXOSX64: # %bb.0: # %entry
; LINUXOSX64-NEXT: pushq %rax
; LINUXOSX64-NEXT: .cfi_def_cfa_offset 16
; LINUXOSX64-NEXT: movl $1, %eax
; LINUXOSX64-NEXT: movl $1, %ecx
; LINUXOSX64-NEXT: movl $1, %edx
; LINUXOSX64-NEXT: callq test_argv32i1
; LINUXOSX64-NEXT: popq %rcx
; LINUXOSX64-NEXT: .cfi_def_cfa_offset 8
; LINUXOSX64-NEXT: retq
entry:
%v0 = bitcast i32 1 to <32 x i1>
%call = call x86_regcallcc i32 @test_argv32i1(<32 x i1> %v0, <32 x i1> %v0, <32 x i1> %v0)
ret i32 %call
}
; Test regcall when returning v32i1 type
define x86_regcallcc <32 x i1> @test_retv32i1() {
; X32-LABEL: test_retv32i1:
; X32: # %bb.0:
; X32-NEXT: movl $1, %eax
; X32-NEXT: retl
;
; CHECK64-LABEL: test_retv32i1:
; CHECK64: # %bb.0:
; CHECK64-NEXT: movl $1, %eax
; CHECK64-NEXT: retq
%a = bitcast i32 1 to <32 x i1>
ret <32 x i1> %a
}
; Test regcall when processing result of v32i1 type
define i32 @caller_retv32i1() #0 {
; X32-LABEL: caller_retv32i1:
; X32: # %bb.0: # %entry
; X32-NEXT: calll _test_retv32i1
; X32-NEXT: incl %eax
; X32-NEXT: retl
;
; WIN64-LABEL: caller_retv32i1:
; WIN64: # %bb.0: # %entry
; WIN64-NEXT: pushq %rsi
; WIN64-NEXT: .seh_pushreg %rsi
; WIN64-NEXT: pushq %rdi
; WIN64-NEXT: .seh_pushreg %rdi
; WIN64-NEXT: subq $40, %rsp
; WIN64-NEXT: .seh_stackalloc 40
; WIN64-NEXT: vmovaps %xmm7, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
; WIN64-NEXT: .seh_savexmm %xmm7, 16
; WIN64-NEXT: vmovaps %xmm6, (%rsp) # 16-byte Spill
; WIN64-NEXT: .seh_savexmm %xmm6, 0
; WIN64-NEXT: .seh_endprologue
; WIN64-NEXT: callq test_retv32i1
; WIN64-NEXT: incl %eax
; WIN64-NEXT: vmovaps (%rsp), %xmm6 # 16-byte Reload
; WIN64-NEXT: vmovaps {{[-0-9]+}}(%r{{[sb]}}p), %xmm7 # 16-byte Reload
; WIN64-NEXT: addq $40, %rsp
; WIN64-NEXT: popq %rdi
; WIN64-NEXT: popq %rsi
; WIN64-NEXT: retq
; WIN64-NEXT: .seh_handlerdata
; WIN64-NEXT: .text
; WIN64-NEXT: .seh_endproc
;
; LINUXOSX64-LABEL: caller_retv32i1:
; LINUXOSX64: # %bb.0: # %entry
; LINUXOSX64-NEXT: pushq %rax
; LINUXOSX64-NEXT: .cfi_def_cfa_offset 16
; LINUXOSX64-NEXT: callq test_retv32i1
; LINUXOSX64-NEXT: incl %eax
; LINUXOSX64-NEXT: popq %rcx
; LINUXOSX64-NEXT: .cfi_def_cfa_offset 8
; LINUXOSX64-NEXT: retq
entry:
%call = call x86_regcallcc <32 x i1> @test_retv32i1()
%c = bitcast <32 x i1> %call to i32
%add = add i32 %c, 1
ret i32 %add
}
; Test regcall when receiving arguments of v16i1 type
declare i16 @test_argv16i1helper(<16 x i1> %x0, <16 x i1> %x1, <16 x i1> %x2)
define x86_regcallcc i16 @test_argv16i1(<16 x i1> %x0, <16 x i1> %x1, <16 x i1> %x2) {
; X32-LABEL: test_argv16i1:
; X32: # %bb.0:
; X32-NEXT: pushl %esp
; X32-NEXT: subl $72, %esp
; X32-NEXT: vmovups %xmm7, {{[-0-9]+}}(%e{{[sb]}}p) # 16-byte Spill
; X32-NEXT: vmovups %xmm6, {{[-0-9]+}}(%e{{[sb]}}p) # 16-byte Spill
; X32-NEXT: vmovups %xmm5, {{[-0-9]+}}(%e{{[sb]}}p) # 16-byte Spill
; X32-NEXT: vmovups %xmm4, (%esp) # 16-byte Spill
; X32-NEXT: kmovd %edx, %k0
; X32-NEXT: kmovd %ecx, %k1
; X32-NEXT: kmovd %eax, %k2
; X32-NEXT: vpmovm2b %k2, %zmm0
; X32-NEXT: vpmovm2b %k1, %zmm1
; X32-NEXT: vpmovm2b %k0, %zmm2
; X32-NEXT: # kill: def $xmm0 killed $xmm0 killed $zmm0
; X32-NEXT: # kill: def $xmm1 killed $xmm1 killed $zmm1
; X32-NEXT: # kill: def $xmm2 killed $xmm2 killed $zmm2
; X32-NEXT: vzeroupper
; X32-NEXT: calll _test_argv16i1helper
; X32-NEXT: vmovups (%esp), %xmm4 # 16-byte Reload
; X32-NEXT: vmovups {{[-0-9]+}}(%e{{[sb]}}p), %xmm5 # 16-byte Reload
; X32-NEXT: vmovups {{[-0-9]+}}(%e{{[sb]}}p), %xmm6 # 16-byte Reload
; X32-NEXT: vmovups {{[-0-9]+}}(%e{{[sb]}}p), %xmm7 # 16-byte Reload
; X32-NEXT: addl $72, %esp
; X32-NEXT: popl %esp
; X32-NEXT: retl
;
; WIN64-LABEL: test_argv16i1:
; WIN64: # %bb.0:
; WIN64-NEXT: pushq %r11
; WIN64-NEXT: .seh_pushreg %r11
; WIN64-NEXT: pushq %r10
; WIN64-NEXT: .seh_pushreg %r10
; WIN64-NEXT: pushq %rsp
; WIN64-NEXT: .seh_pushreg %rsp
; WIN64-NEXT: subq $80, %rsp
; WIN64-NEXT: .seh_stackalloc 80
; WIN64-NEXT: .seh_endprologue
; WIN64-NEXT: kmovd %edx, %k0
; WIN64-NEXT: kmovd %eax, %k1
; WIN64-NEXT: kmovd %ecx, %k2
; WIN64-NEXT: vpmovm2b %k2, %zmm0
; WIN64-NEXT: vmovdqa %xmm0, {{[0-9]+}}(%rsp)
; WIN64-NEXT: vpmovm2b %k1, %zmm0
; WIN64-NEXT: vmovdqa %xmm0, {{[0-9]+}}(%rsp)
; WIN64-NEXT: vpmovm2b %k0, %zmm0
; WIN64-NEXT: vmovdqa %xmm0, {{[0-9]+}}(%rsp)
; WIN64-NEXT: leaq {{[0-9]+}}(%rsp), %rcx
; WIN64-NEXT: leaq {{[0-9]+}}(%rsp), %rdx
; WIN64-NEXT: leaq {{[0-9]+}}(%rsp), %r8
; WIN64-NEXT: vzeroupper
; WIN64-NEXT: callq test_argv16i1helper
; WIN64-NEXT: nop
; WIN64-NEXT: addq $80, %rsp
; WIN64-NEXT: popq %rsp
; WIN64-NEXT: popq %r10
; WIN64-NEXT: popq %r11
; WIN64-NEXT: retq
; WIN64-NEXT: .seh_handlerdata
; WIN64-NEXT: .text
; WIN64-NEXT: .seh_endproc
;
; LINUXOSX64-LABEL: test_argv16i1:
; LINUXOSX64: # %bb.0:
; LINUXOSX64-NEXT: pushq %rsp
; LINUXOSX64-NEXT: .cfi_def_cfa_offset 16
; LINUXOSX64-NEXT: subq $128, %rsp
; LINUXOSX64-NEXT: vmovaps %xmm15, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
; LINUXOSX64-NEXT: vmovaps %xmm14, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
; LINUXOSX64-NEXT: vmovaps %xmm13, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
; LINUXOSX64-NEXT: vmovaps %xmm12, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
; LINUXOSX64-NEXT: vmovaps %xmm11, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
; LINUXOSX64-NEXT: vmovaps %xmm10, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
; LINUXOSX64-NEXT: vmovaps %xmm9, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
; LINUXOSX64-NEXT: vmovaps %xmm8, (%rsp) # 16-byte Spill
; LINUXOSX64-NEXT: .cfi_def_cfa_offset 144
; LINUXOSX64-NEXT: .cfi_offset %rsp, -16
; LINUXOSX64-NEXT: .cfi_offset %xmm8, -144
; LINUXOSX64-NEXT: .cfi_offset %xmm9, -128
; LINUXOSX64-NEXT: .cfi_offset %xmm10, -112
; LINUXOSX64-NEXT: .cfi_offset %xmm11, -96
; LINUXOSX64-NEXT: .cfi_offset %xmm12, -80
; LINUXOSX64-NEXT: .cfi_offset %xmm13, -64
; LINUXOSX64-NEXT: .cfi_offset %xmm14, -48
; LINUXOSX64-NEXT: .cfi_offset %xmm15, -32
; LINUXOSX64-NEXT: kmovd %edx, %k0
; LINUXOSX64-NEXT: kmovd %ecx, %k1
; LINUXOSX64-NEXT: kmovd %eax, %k2
; LINUXOSX64-NEXT: vpmovm2b %k2, %zmm0
; LINUXOSX64-NEXT: vpmovm2b %k1, %zmm1
; LINUXOSX64-NEXT: vpmovm2b %k0, %zmm2
; LINUXOSX64-NEXT: # kill: def $xmm0 killed $xmm0 killed $zmm0
; LINUXOSX64-NEXT: # kill: def $xmm1 killed $xmm1 killed $zmm1
; LINUXOSX64-NEXT: # kill: def $xmm2 killed $xmm2 killed $zmm2
; LINUXOSX64-NEXT: vzeroupper
; LINUXOSX64-NEXT: callq test_argv16i1helper
; LINUXOSX64-NEXT: vmovaps (%rsp), %xmm8 # 16-byte Reload
; LINUXOSX64-NEXT: vmovaps {{[-0-9]+}}(%r{{[sb]}}p), %xmm9 # 16-byte Reload
; LINUXOSX64-NEXT: vmovaps {{[-0-9]+}}(%r{{[sb]}}p), %xmm10 # 16-byte Reload
; LINUXOSX64-NEXT: vmovaps {{[-0-9]+}}(%r{{[sb]}}p), %xmm11 # 16-byte Reload
; LINUXOSX64-NEXT: vmovaps {{[-0-9]+}}(%r{{[sb]}}p), %xmm12 # 16-byte Reload
; LINUXOSX64-NEXT: vmovaps {{[-0-9]+}}(%r{{[sb]}}p), %xmm13 # 16-byte Reload
; LINUXOSX64-NEXT: vmovaps {{[-0-9]+}}(%r{{[sb]}}p), %xmm14 # 16-byte Reload
; LINUXOSX64-NEXT: vmovaps {{[-0-9]+}}(%r{{[sb]}}p), %xmm15 # 16-byte Reload
; LINUXOSX64-NEXT: addq $128, %rsp
; LINUXOSX64-NEXT: .cfi_def_cfa_offset 16
; LINUXOSX64-NEXT: popq %rsp
; LINUXOSX64-NEXT: .cfi_def_cfa_offset 8
; LINUXOSX64-NEXT: retq
%res = call i16 @test_argv16i1helper(<16 x i1> %x0, <16 x i1> %x1, <16 x i1> %x2)
ret i16 %res
}
; Test regcall when passing arguments of v16i1 type
define i16 @caller_argv16i1() #0 {
; X32-LABEL: caller_argv16i1:
; X32: # %bb.0: # %entry
; X32-NEXT: movl $1, %eax
; X32-NEXT: movl $1, %ecx
; X32-NEXT: movl $1, %edx
; X32-NEXT: calll _test_argv16i1
; X32-NEXT: retl
;
; WIN64-LABEL: caller_argv16i1:
; WIN64: # %bb.0: # %entry
; WIN64-NEXT: pushq %rsi
; WIN64-NEXT: .seh_pushreg %rsi
; WIN64-NEXT: pushq %rdi
; WIN64-NEXT: .seh_pushreg %rdi
; WIN64-NEXT: subq $40, %rsp
; WIN64-NEXT: .seh_stackalloc 40
; WIN64-NEXT: vmovaps %xmm7, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
; WIN64-NEXT: .seh_savexmm %xmm7, 16
; WIN64-NEXT: vmovaps %xmm6, (%rsp) # 16-byte Spill
; WIN64-NEXT: .seh_savexmm %xmm6, 0
; WIN64-NEXT: .seh_endprologue
; WIN64-NEXT: movl $1, %eax
; WIN64-NEXT: movl $1, %ecx
; WIN64-NEXT: movl $1, %edx
; WIN64-NEXT: callq test_argv16i1
; WIN64-NEXT: vmovaps (%rsp), %xmm6 # 16-byte Reload
; WIN64-NEXT: vmovaps {{[-0-9]+}}(%r{{[sb]}}p), %xmm7 # 16-byte Reload
; WIN64-NEXT: addq $40, %rsp
; WIN64-NEXT: popq %rdi
; WIN64-NEXT: popq %rsi
; WIN64-NEXT: retq
; WIN64-NEXT: .seh_handlerdata
; WIN64-NEXT: .text
; WIN64-NEXT: .seh_endproc
;
; LINUXOSX64-LABEL: caller_argv16i1:
; LINUXOSX64: # %bb.0: # %entry
; LINUXOSX64-NEXT: pushq %rax
; LINUXOSX64-NEXT: .cfi_def_cfa_offset 16
; LINUXOSX64-NEXT: movl $1, %eax
; LINUXOSX64-NEXT: movl $1, %ecx
; LINUXOSX64-NEXT: movl $1, %edx
; LINUXOSX64-NEXT: callq test_argv16i1
; LINUXOSX64-NEXT: popq %rcx
; LINUXOSX64-NEXT: .cfi_def_cfa_offset 8
; LINUXOSX64-NEXT: retq
entry:
%v0 = bitcast i16 1 to <16 x i1>
%call = call x86_regcallcc i16 @test_argv16i1(<16 x i1> %v0, <16 x i1> %v0, <16 x i1> %v0)
ret i16 %call
}
; Test regcall when returning v16i1 type
define x86_regcallcc <16 x i1> @test_retv16i1() {
; X32-LABEL: test_retv16i1:
; X32: # %bb.0:
; X32-NEXT: movw $1, %ax
; X32-NEXT: retl
;
; CHECK64-LABEL: test_retv16i1:
; CHECK64: # %bb.0:
; CHECK64-NEXT: movw $1, %ax
; CHECK64-NEXT: retq
%a = bitcast i16 1 to <16 x i1>
ret <16 x i1> %a
}
; Test regcall when processing result of v16i1 type
define i16 @caller_retv16i1() #0 {
; X32-LABEL: caller_retv16i1:
; X32: # %bb.0: # %entry
; X32-NEXT: calll _test_retv16i1
; X32-NEXT: # kill: def $ax killed $ax def $eax
; X32-NEXT: incl %eax
; X32-NEXT: # kill: def $ax killed $ax killed $eax
; X32-NEXT: retl
;
; WIN64-LABEL: caller_retv16i1:
; WIN64: # %bb.0: # %entry
; WIN64-NEXT: pushq %rsi
; WIN64-NEXT: .seh_pushreg %rsi
; WIN64-NEXT: pushq %rdi
; WIN64-NEXT: .seh_pushreg %rdi
; WIN64-NEXT: subq $40, %rsp
; WIN64-NEXT: .seh_stackalloc 40
; WIN64-NEXT: vmovaps %xmm7, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
; WIN64-NEXT: .seh_savexmm %xmm7, 16
; WIN64-NEXT: vmovaps %xmm6, (%rsp) # 16-byte Spill
; WIN64-NEXT: .seh_savexmm %xmm6, 0
; WIN64-NEXT: .seh_endprologue
; WIN64-NEXT: callq test_retv16i1
; WIN64-NEXT: # kill: def $ax killed $ax def $eax
; WIN64-NEXT: incl %eax
; WIN64-NEXT: # kill: def $ax killed $ax killed $eax
; WIN64-NEXT: vmovaps (%rsp), %xmm6 # 16-byte Reload
; WIN64-NEXT: vmovaps {{[-0-9]+}}(%r{{[sb]}}p), %xmm7 # 16-byte Reload
; WIN64-NEXT: addq $40, %rsp
; WIN64-NEXT: popq %rdi
; WIN64-NEXT: popq %rsi
; WIN64-NEXT: retq
; WIN64-NEXT: .seh_handlerdata
; WIN64-NEXT: .text
; WIN64-NEXT: .seh_endproc
;
; LINUXOSX64-LABEL: caller_retv16i1:
; LINUXOSX64: # %bb.0: # %entry
; LINUXOSX64-NEXT: pushq %rax
; LINUXOSX64-NEXT: .cfi_def_cfa_offset 16
; LINUXOSX64-NEXT: callq test_retv16i1
; LINUXOSX64-NEXT: # kill: def $ax killed $ax def $eax
; LINUXOSX64-NEXT: incl %eax
; LINUXOSX64-NEXT: # kill: def $ax killed $ax killed $eax
; LINUXOSX64-NEXT: popq %rcx
; LINUXOSX64-NEXT: .cfi_def_cfa_offset 8
; LINUXOSX64-NEXT: retq
entry:
%call = call x86_regcallcc <16 x i1> @test_retv16i1()
%c = bitcast <16 x i1> %call to i16
%add = add i16 %c, 1
ret i16 %add
}
; Test regcall when receiving arguments of v8i1 type
declare i8 @test_argv8i1helper(<8 x i1> %x0, <8 x i1> %x1, <8 x i1> %x2)
define x86_regcallcc i8 @test_argv8i1(<8 x i1> %x0, <8 x i1> %x1, <8 x i1> %x2) {
; X32-LABEL: test_argv8i1:
; X32: # %bb.0:
; X32-NEXT: pushl %esp
; X32-NEXT: subl $72, %esp
; X32-NEXT: vmovups %xmm7, {{[-0-9]+}}(%e{{[sb]}}p) # 16-byte Spill
; X32-NEXT: vmovups %xmm6, {{[-0-9]+}}(%e{{[sb]}}p) # 16-byte Spill
; X32-NEXT: vmovups %xmm5, {{[-0-9]+}}(%e{{[sb]}}p) # 16-byte Spill
; X32-NEXT: vmovups %xmm4, (%esp) # 16-byte Spill
; X32-NEXT: kmovd %edx, %k0
; X32-NEXT: kmovd %ecx, %k1
; X32-NEXT: kmovd %eax, %k2
; X32-NEXT: vpmovm2w %k2, %zmm0
; X32-NEXT: vpmovm2w %k1, %zmm1
; X32-NEXT: vpmovm2w %k0, %zmm2
; X32-NEXT: # kill: def $xmm0 killed $xmm0 killed $zmm0
; X32-NEXT: # kill: def $xmm1 killed $xmm1 killed $zmm1
; X32-NEXT: # kill: def $xmm2 killed $xmm2 killed $zmm2
; X32-NEXT: vzeroupper
; X32-NEXT: calll _test_argv8i1helper
; X32-NEXT: vmovups (%esp), %xmm4 # 16-byte Reload
; X32-NEXT: vmovups {{[-0-9]+}}(%e{{[sb]}}p), %xmm5 # 16-byte Reload
; X32-NEXT: vmovups {{[-0-9]+}}(%e{{[sb]}}p), %xmm6 # 16-byte Reload
; X32-NEXT: vmovups {{[-0-9]+}}(%e{{[sb]}}p), %xmm7 # 16-byte Reload
; X32-NEXT: addl $72, %esp
; X32-NEXT: popl %esp
; X32-NEXT: retl
;
; WIN64-LABEL: test_argv8i1:
; WIN64: # %bb.0:
; WIN64-NEXT: pushq %r11
; WIN64-NEXT: .seh_pushreg %r11
; WIN64-NEXT: pushq %r10
; WIN64-NEXT: .seh_pushreg %r10
; WIN64-NEXT: pushq %rsp
; WIN64-NEXT: .seh_pushreg %rsp
; WIN64-NEXT: subq $80, %rsp
; WIN64-NEXT: .seh_stackalloc 80
; WIN64-NEXT: .seh_endprologue
; WIN64-NEXT: kmovd %edx, %k0
; WIN64-NEXT: kmovd %eax, %k1
; WIN64-NEXT: kmovd %ecx, %k2
; WIN64-NEXT: vpmovm2w %k2, %zmm0
; WIN64-NEXT: vmovdqa %xmm0, {{[0-9]+}}(%rsp)
; WIN64-NEXT: vpmovm2w %k1, %zmm0
; WIN64-NEXT: vmovdqa %xmm0, {{[0-9]+}}(%rsp)
; WIN64-NEXT: vpmovm2w %k0, %zmm0
; WIN64-NEXT: vmovdqa %xmm0, {{[0-9]+}}(%rsp)
; WIN64-NEXT: leaq {{[0-9]+}}(%rsp), %rcx
; WIN64-NEXT: leaq {{[0-9]+}}(%rsp), %rdx
; WIN64-NEXT: leaq {{[0-9]+}}(%rsp), %r8
; WIN64-NEXT: vzeroupper
; WIN64-NEXT: callq test_argv8i1helper
; WIN64-NEXT: nop
; WIN64-NEXT: addq $80, %rsp
; WIN64-NEXT: popq %rsp
; WIN64-NEXT: popq %r10
; WIN64-NEXT: popq %r11
; WIN64-NEXT: retq
; WIN64-NEXT: .seh_handlerdata
; WIN64-NEXT: .text
; WIN64-NEXT: .seh_endproc
;
; LINUXOSX64-LABEL: test_argv8i1:
; LINUXOSX64: # %bb.0:
; LINUXOSX64-NEXT: pushq %rsp
; LINUXOSX64-NEXT: .cfi_def_cfa_offset 16
; LINUXOSX64-NEXT: subq $128, %rsp
; LINUXOSX64-NEXT: vmovaps %xmm15, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
; LINUXOSX64-NEXT: vmovaps %xmm14, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
; LINUXOSX64-NEXT: vmovaps %xmm13, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
; LINUXOSX64-NEXT: vmovaps %xmm12, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
; LINUXOSX64-NEXT: vmovaps %xmm11, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
; LINUXOSX64-NEXT: vmovaps %xmm10, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
; LINUXOSX64-NEXT: vmovaps %xmm9, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
; LINUXOSX64-NEXT: vmovaps %xmm8, (%rsp) # 16-byte Spill
; LINUXOSX64-NEXT: .cfi_def_cfa_offset 144
; LINUXOSX64-NEXT: .cfi_offset %rsp, -16
; LINUXOSX64-NEXT: .cfi_offset %xmm8, -144
; LINUXOSX64-NEXT: .cfi_offset %xmm9, -128
; LINUXOSX64-NEXT: .cfi_offset %xmm10, -112
; LINUXOSX64-NEXT: .cfi_offset %xmm11, -96
; LINUXOSX64-NEXT: .cfi_offset %xmm12, -80
; LINUXOSX64-NEXT: .cfi_offset %xmm13, -64
; LINUXOSX64-NEXT: .cfi_offset %xmm14, -48
; LINUXOSX64-NEXT: .cfi_offset %xmm15, -32
; LINUXOSX64-NEXT: kmovd %edx, %k0
; LINUXOSX64-NEXT: kmovd %ecx, %k1
; LINUXOSX64-NEXT: kmovd %eax, %k2
; LINUXOSX64-NEXT: vpmovm2w %k2, %zmm0
; LINUXOSX64-NEXT: vpmovm2w %k1, %zmm1
; LINUXOSX64-NEXT: vpmovm2w %k0, %zmm2
; LINUXOSX64-NEXT: # kill: def $xmm0 killed $xmm0 killed $zmm0
; LINUXOSX64-NEXT: # kill: def $xmm1 killed $xmm1 killed $zmm1
; LINUXOSX64-NEXT: # kill: def $xmm2 killed $xmm2 killed $zmm2
; LINUXOSX64-NEXT: vzeroupper
; LINUXOSX64-NEXT: callq test_argv8i1helper
; LINUXOSX64-NEXT: vmovaps (%rsp), %xmm8 # 16-byte Reload
; LINUXOSX64-NEXT: vmovaps {{[-0-9]+}}(%r{{[sb]}}p), %xmm9 # 16-byte Reload
; LINUXOSX64-NEXT: vmovaps {{[-0-9]+}}(%r{{[sb]}}p), %xmm10 # 16-byte Reload
; LINUXOSX64-NEXT: vmovaps {{[-0-9]+}}(%r{{[sb]}}p), %xmm11 # 16-byte Reload
; LINUXOSX64-NEXT: vmovaps {{[-0-9]+}}(%r{{[sb]}}p), %xmm12 # 16-byte Reload
; LINUXOSX64-NEXT: vmovaps {{[-0-9]+}}(%r{{[sb]}}p), %xmm13 # 16-byte Reload
; LINUXOSX64-NEXT: vmovaps {{[-0-9]+}}(%r{{[sb]}}p), %xmm14 # 16-byte Reload
; LINUXOSX64-NEXT: vmovaps {{[-0-9]+}}(%r{{[sb]}}p), %xmm15 # 16-byte Reload
; LINUXOSX64-NEXT: addq $128, %rsp
; LINUXOSX64-NEXT: .cfi_def_cfa_offset 16
; LINUXOSX64-NEXT: popq %rsp
; LINUXOSX64-NEXT: .cfi_def_cfa_offset 8
; LINUXOSX64-NEXT: retq
%res = call i8 @test_argv8i1helper(<8 x i1> %x0, <8 x i1> %x1, <8 x i1> %x2)
ret i8 %res
}
; Test regcall when passing arguments of v8i1 type
define i8 @caller_argv8i1() #0 {
; X32-LABEL: caller_argv8i1:
; X32: # %bb.0: # %entry
; X32-NEXT: movl $1, %eax
; X32-NEXT: movl $1, %ecx
; X32-NEXT: movl $1, %edx
; X32-NEXT: calll _test_argv8i1
; X32-NEXT: retl
;
; WIN64-LABEL: caller_argv8i1:
; WIN64: # %bb.0: # %entry
; WIN64-NEXT: pushq %rsi
; WIN64-NEXT: .seh_pushreg %rsi
; WIN64-NEXT: pushq %rdi
; WIN64-NEXT: .seh_pushreg %rdi
; WIN64-NEXT: subq $40, %rsp
; WIN64-NEXT: .seh_stackalloc 40
; WIN64-NEXT: vmovaps %xmm7, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
; WIN64-NEXT: .seh_savexmm %xmm7, 16
; WIN64-NEXT: vmovaps %xmm6, (%rsp) # 16-byte Spill
; WIN64-NEXT: .seh_savexmm %xmm6, 0
; WIN64-NEXT: .seh_endprologue
; WIN64-NEXT: movl $1, %eax
; WIN64-NEXT: movl $1, %ecx
; WIN64-NEXT: movl $1, %edx
; WIN64-NEXT: callq test_argv8i1
; WIN64-NEXT: vmovaps (%rsp), %xmm6 # 16-byte Reload
; WIN64-NEXT: vmovaps {{[-0-9]+}}(%r{{[sb]}}p), %xmm7 # 16-byte Reload
; WIN64-NEXT: addq $40, %rsp
; WIN64-NEXT: popq %rdi
; WIN64-NEXT: popq %rsi
; WIN64-NEXT: retq
; WIN64-NEXT: .seh_handlerdata
; WIN64-NEXT: .text
; WIN64-NEXT: .seh_endproc
;
; LINUXOSX64-LABEL: caller_argv8i1:
; LINUXOSX64: # %bb.0: # %entry
; LINUXOSX64-NEXT: pushq %rax
; LINUXOSX64-NEXT: .cfi_def_cfa_offset 16
; LINUXOSX64-NEXT: movl $1, %eax
; LINUXOSX64-NEXT: movl $1, %ecx
; LINUXOSX64-NEXT: movl $1, %edx
; LINUXOSX64-NEXT: callq test_argv8i1
; LINUXOSX64-NEXT: popq %rcx
; LINUXOSX64-NEXT: .cfi_def_cfa_offset 8
; LINUXOSX64-NEXT: retq
entry:
%v0 = bitcast i8 1 to <8 x i1>
%call = call x86_regcallcc i8 @test_argv8i1(<8 x i1> %v0, <8 x i1> %v0, <8 x i1> %v0)
ret i8 %call
}
; Test regcall when returning v8i1 type
define x86_regcallcc <8 x i1> @test_retv8i1() {
; X32-LABEL: test_retv8i1:
; X32: # %bb.0:
; X32-NEXT: movb $1, %al
; X32-NEXT: retl
;
; CHECK64-LABEL: test_retv8i1:
; CHECK64: # %bb.0:
; CHECK64-NEXT: movb $1, %al
; CHECK64-NEXT: retq
%a = bitcast i8 1 to <8 x i1>
ret <8 x i1> %a
}
; Test regcall when processing result of v8i1 type
define <8 x i1> @caller_retv8i1() #0 {
; X32-LABEL: caller_retv8i1:
; X32: # %bb.0: # %entry
; X32-NEXT: calll _test_retv8i1
; X32-NEXT: # kill: def $al killed $al def $eax
; X32-NEXT: kmovd %eax, %k0
; X32-NEXT: vpmovm2w %k0, %zmm0
; X32-NEXT: # kill: def $xmm0 killed $xmm0 killed $zmm0
; X32-NEXT: vzeroupper
; X32-NEXT: retl
;
; WIN64-LABEL: caller_retv8i1:
; WIN64: # %bb.0: # %entry
; WIN64-NEXT: pushq %rsi
; WIN64-NEXT: .seh_pushreg %rsi
; WIN64-NEXT: pushq %rdi
; WIN64-NEXT: .seh_pushreg %rdi
; WIN64-NEXT: subq $40, %rsp
; WIN64-NEXT: .seh_stackalloc 40
; WIN64-NEXT: vmovaps %xmm7, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
; WIN64-NEXT: .seh_savexmm %xmm7, 16
; WIN64-NEXT: vmovaps %xmm6, (%rsp) # 16-byte Spill
; WIN64-NEXT: .seh_savexmm %xmm6, 0
; WIN64-NEXT: .seh_endprologue
; WIN64-NEXT: callq test_retv8i1
; WIN64-NEXT: # kill: def $al killed $al def $eax
; WIN64-NEXT: kmovd %eax, %k0
; WIN64-NEXT: vpmovm2w %k0, %zmm0
; WIN64-NEXT: # kill: def $xmm0 killed $xmm0 killed $zmm0
; WIN64-NEXT: vmovaps (%rsp), %xmm6 # 16-byte Reload
; WIN64-NEXT: vmovaps {{[-0-9]+}}(%r{{[sb]}}p), %xmm7 # 16-byte Reload
; WIN64-NEXT: addq $40, %rsp
; WIN64-NEXT: popq %rdi
; WIN64-NEXT: popq %rsi
; WIN64-NEXT: vzeroupper
; WIN64-NEXT: retq
; WIN64-NEXT: .seh_handlerdata
; WIN64-NEXT: .text
; WIN64-NEXT: .seh_endproc
;
; LINUXOSX64-LABEL: caller_retv8i1:
; LINUXOSX64: # %bb.0: # %entry
; LINUXOSX64-NEXT: pushq %rax
; LINUXOSX64-NEXT: .cfi_def_cfa_offset 16
; LINUXOSX64-NEXT: callq test_retv8i1
; LINUXOSX64-NEXT: # kill: def $al killed $al def $eax
; LINUXOSX64-NEXT: kmovd %eax, %k0
; LINUXOSX64-NEXT: vpmovm2w %k0, %zmm0
; LINUXOSX64-NEXT: # kill: def $xmm0 killed $xmm0 killed $zmm0
; LINUXOSX64-NEXT: popq %rax
; LINUXOSX64-NEXT: .cfi_def_cfa_offset 8
; LINUXOSX64-NEXT: vzeroupper
; LINUXOSX64-NEXT: retq
entry:
%call = call x86_regcallcc <8 x i1> @test_retv8i1()
ret <8 x i1> %call
}