llvm-project/llvm/test/CodeGen/X86/avx512-regcall-NoMask.ll

1320 lines
45 KiB
LLVM
Raw Normal View History

; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
; FIXME: Fix machine verifier issues and remove -verify-machineinstrs=0. PR39437.
; RUN: llc < %s -mtriple=i386-pc-win32 -mattr=+avx512f -mattr=+avx512vl -mattr=+avx512bw -mattr=+avx512dq -verify-machineinstrs=0 | FileCheck %s --check-prefix=X32
; RUN: llc < %s -mtriple=x86_64-win32 -mattr=+avx512f -mattr=+avx512vl -mattr=+avx512bw -mattr=+avx512dq -verify-machineinstrs=0 | FileCheck %s --check-prefix=WIN64
; RUN: llc < %s -mtriple=x86_64-linux-gnu -mattr=+avx512f -mattr=+avx512vl -mattr=+avx512bw -mattr=+avx512dq -verify-machineinstrs=0 | FileCheck %s --check-prefix=LINUXOSX64
2016-10-19 19:16:58 +08:00
; Test regcall when receiving/returning i1
define x86_regcallcc i1 @test_argReti1(i1 %a) {
; X32-LABEL: test_argReti1:
; X32: # %bb.0:
; X32-NEXT: incb %al
; X32-NEXT: # kill: def $al killed $al killed $eax
; X32-NEXT: retl
;
; WIN64-LABEL: test_argReti1:
; WIN64: # %bb.0:
; WIN64-NEXT: incb %al
; WIN64-NEXT: # kill: def $al killed $al killed $eax
; WIN64-NEXT: retq
;
; LINUXOSX64-LABEL: test_argReti1:
; LINUXOSX64: # %bb.0:
; LINUXOSX64-NEXT: incb %al
; LINUXOSX64-NEXT: # kill: def $al killed $al killed $eax
; LINUXOSX64-NEXT: retq
2016-10-19 19:16:58 +08:00
%add = add i1 %a, 1
ret i1 %add
}
; Test regcall when passing/retrieving i1
define x86_regcallcc i1 @test_CallargReti1(i1 %a) {
; X32-LABEL: test_CallargReti1:
; X32: # %bb.0:
; X32-NEXT: pushl %esp
; X32-NEXT: incb %al
; X32-NEXT: movzbl %al, %eax
; X32-NEXT: calll _test_argReti1
; X32-NEXT: incb %al
; X32-NEXT: popl %esp
; X32-NEXT: retl
;
; WIN64-LABEL: test_CallargReti1:
; WIN64: # %bb.0:
; WIN64-NEXT: pushq %rsp
; WIN64-NEXT: .seh_pushreg 4
; WIN64-NEXT: .seh_endprologue
; WIN64-NEXT: incb %al
; WIN64-NEXT: movzbl %al, %eax
; WIN64-NEXT: callq test_argReti1
; WIN64-NEXT: incb %al
; WIN64-NEXT: popq %rsp
; WIN64-NEXT: retq
; WIN64-NEXT: .seh_handlerdata
; WIN64-NEXT: .text
; WIN64-NEXT: .seh_endproc
;
; LINUXOSX64-LABEL: test_CallargReti1:
; LINUXOSX64: # %bb.0:
; LINUXOSX64-NEXT: pushq %rsp
; LINUXOSX64-NEXT: .cfi_def_cfa_offset 16
; LINUXOSX64-NEXT: .cfi_offset %rsp, -16
; LINUXOSX64-NEXT: incb %al
; LINUXOSX64-NEXT: movzbl %al, %eax
; LINUXOSX64-NEXT: callq test_argReti1
; LINUXOSX64-NEXT: incb %al
; LINUXOSX64-NEXT: popq %rsp
Correct dwarf unwind information in function epilogue This patch aims to provide correct dwarf unwind information in function epilogue for X86. It consists of two parts. The first part inserts CFI instructions that set appropriate cfa offset and cfa register in emitEpilogue() in X86FrameLowering. This part is X86 specific. The second part is platform independent and ensures that: * CFI instructions do not affect code generation (they are not counted as instructions when tail duplicating or tail merging) * Unwind information remains correct when a function is modified by different passes. This is done in a late pass by analyzing information about cfa offset and cfa register in BBs and inserting additional CFI directives where necessary. Added CFIInstrInserter pass: * analyzes each basic block to determine cfa offset and register are valid at its entry and exit * verifies that outgoing cfa offset and register of predecessor blocks match incoming values of their successors * inserts additional CFI directives at basic block beginning to correct the rule for calculating CFA Having CFI instructions in function epilogue can cause incorrect CFA calculation rule for some basic blocks. This can happen if, due to basic block reordering, or the existence of multiple epilogue blocks, some of the blocks have wrong cfa offset and register values set by the epilogue block above them. CFIInstrInserter is currently run only on X86, but can be used by any target that implements support for adding CFI instructions in epilogue. Patch by Violeta Vukobrat. Differential Revision: https://reviews.llvm.org/D42848 llvm-svn: 330706
2018-04-24 18:32:08 +08:00
; LINUXOSX64-NEXT: .cfi_def_cfa_offset 8
; LINUXOSX64-NEXT: retq
2016-10-19 19:16:58 +08:00
%b = add i1 %a, 1
%c = call x86_regcallcc i1 @test_argReti1(i1 %b)
%d = add i1 %c, 1
ret i1 %d
}
; Test regcall when receiving/returning i8
define x86_regcallcc i8 @test_argReti8(i8 %a) {
; X32-LABEL: test_argReti8:
; X32: # %bb.0:
; X32-NEXT: incb %al
; X32-NEXT: # kill: def $al killed $al killed $eax
; X32-NEXT: retl
;
; WIN64-LABEL: test_argReti8:
; WIN64: # %bb.0:
; WIN64-NEXT: incb %al
; WIN64-NEXT: # kill: def $al killed $al killed $eax
; WIN64-NEXT: retq
;
; LINUXOSX64-LABEL: test_argReti8:
; LINUXOSX64: # %bb.0:
; LINUXOSX64-NEXT: incb %al
; LINUXOSX64-NEXT: # kill: def $al killed $al killed $eax
; LINUXOSX64-NEXT: retq
2016-10-19 19:16:58 +08:00
%add = add i8 %a, 1
ret i8 %add
}
; Test regcall when passing/retrieving i8
define x86_regcallcc i8 @test_CallargReti8(i8 %a) {
; X32-LABEL: test_CallargReti8:
; X32: # %bb.0:
; X32-NEXT: pushl %esp
; X32-NEXT: incb %al
; X32-NEXT: movzbl %al, %eax
; X32-NEXT: calll _test_argReti8
; X32-NEXT: incb %al
; X32-NEXT: popl %esp
; X32-NEXT: retl
;
; WIN64-LABEL: test_CallargReti8:
; WIN64: # %bb.0:
; WIN64-NEXT: pushq %rsp
; WIN64-NEXT: .seh_pushreg 4
; WIN64-NEXT: .seh_endprologue
; WIN64-NEXT: incb %al
; WIN64-NEXT: movzbl %al, %eax
; WIN64-NEXT: callq test_argReti8
; WIN64-NEXT: incb %al
; WIN64-NEXT: popq %rsp
; WIN64-NEXT: retq
; WIN64-NEXT: .seh_handlerdata
; WIN64-NEXT: .text
; WIN64-NEXT: .seh_endproc
;
; LINUXOSX64-LABEL: test_CallargReti8:
; LINUXOSX64: # %bb.0:
; LINUXOSX64-NEXT: pushq %rsp
; LINUXOSX64-NEXT: .cfi_def_cfa_offset 16
; LINUXOSX64-NEXT: .cfi_offset %rsp, -16
; LINUXOSX64-NEXT: incb %al
; LINUXOSX64-NEXT: movzbl %al, %eax
; LINUXOSX64-NEXT: callq test_argReti8
; LINUXOSX64-NEXT: incb %al
; LINUXOSX64-NEXT: popq %rsp
Correct dwarf unwind information in function epilogue This patch aims to provide correct dwarf unwind information in function epilogue for X86. It consists of two parts. The first part inserts CFI instructions that set appropriate cfa offset and cfa register in emitEpilogue() in X86FrameLowering. This part is X86 specific. The second part is platform independent and ensures that: * CFI instructions do not affect code generation (they are not counted as instructions when tail duplicating or tail merging) * Unwind information remains correct when a function is modified by different passes. This is done in a late pass by analyzing information about cfa offset and cfa register in BBs and inserting additional CFI directives where necessary. Added CFIInstrInserter pass: * analyzes each basic block to determine cfa offset and register are valid at its entry and exit * verifies that outgoing cfa offset and register of predecessor blocks match incoming values of their successors * inserts additional CFI directives at basic block beginning to correct the rule for calculating CFA Having CFI instructions in function epilogue can cause incorrect CFA calculation rule for some basic blocks. This can happen if, due to basic block reordering, or the existence of multiple epilogue blocks, some of the blocks have wrong cfa offset and register values set by the epilogue block above them. CFIInstrInserter is currently run only on X86, but can be used by any target that implements support for adding CFI instructions in epilogue. Patch by Violeta Vukobrat. Differential Revision: https://reviews.llvm.org/D42848 llvm-svn: 330706
2018-04-24 18:32:08 +08:00
; LINUXOSX64-NEXT: .cfi_def_cfa_offset 8
; LINUXOSX64-NEXT: retq
2016-10-19 19:16:58 +08:00
%b = add i8 %a, 1
%c = call x86_regcallcc i8 @test_argReti8(i8 %b)
%d = add i8 %c, 1
ret i8 %d
}
; Test regcall when receiving/returning i16
define x86_regcallcc i16 @test_argReti16(i16 %a) {
; X32-LABEL: test_argReti16:
; X32: # %bb.0:
; X32-NEXT: incl %eax
; X32-NEXT: # kill: def $ax killed $ax killed $eax
; X32-NEXT: retl
;
; WIN64-LABEL: test_argReti16:
; WIN64: # %bb.0:
; WIN64-NEXT: incl %eax
; WIN64-NEXT: # kill: def $ax killed $ax killed $eax
; WIN64-NEXT: retq
;
; LINUXOSX64-LABEL: test_argReti16:
; LINUXOSX64: # %bb.0:
; LINUXOSX64-NEXT: incl %eax
; LINUXOSX64-NEXT: # kill: def $ax killed $ax killed $eax
; LINUXOSX64-NEXT: retq
2016-10-19 19:16:58 +08:00
%add = add i16 %a, 1
ret i16 %add
}
; Test regcall when passing/retrieving i16
define x86_regcallcc i16 @test_CallargReti16(i16 %a) {
; X32-LABEL: test_CallargReti16:
; X32: # %bb.0:
; X32-NEXT: pushl %esp
; X32-NEXT: incl %eax
; X32-NEXT: calll _test_argReti16
; X32-NEXT: # kill: def $ax killed $ax def $eax
; X32-NEXT: incl %eax
; X32-NEXT: # kill: def $ax killed $ax killed $eax
; X32-NEXT: popl %esp
; X32-NEXT: retl
;
; WIN64-LABEL: test_CallargReti16:
; WIN64: # %bb.0:
; WIN64-NEXT: pushq %rsp
; WIN64-NEXT: .seh_pushreg 4
; WIN64-NEXT: .seh_endprologue
; WIN64-NEXT: incl %eax
; WIN64-NEXT: callq test_argReti16
; WIN64-NEXT: # kill: def $ax killed $ax def $eax
; WIN64-NEXT: incl %eax
; WIN64-NEXT: # kill: def $ax killed $ax killed $eax
; WIN64-NEXT: popq %rsp
; WIN64-NEXT: retq
; WIN64-NEXT: .seh_handlerdata
; WIN64-NEXT: .text
; WIN64-NEXT: .seh_endproc
;
; LINUXOSX64-LABEL: test_CallargReti16:
; LINUXOSX64: # %bb.0:
; LINUXOSX64-NEXT: pushq %rsp
; LINUXOSX64-NEXT: .cfi_def_cfa_offset 16
; LINUXOSX64-NEXT: .cfi_offset %rsp, -16
; LINUXOSX64-NEXT: incl %eax
; LINUXOSX64-NEXT: callq test_argReti16
; LINUXOSX64-NEXT: # kill: def $ax killed $ax def $eax
; LINUXOSX64-NEXT: incl %eax
; LINUXOSX64-NEXT: # kill: def $ax killed $ax killed $eax
; LINUXOSX64-NEXT: popq %rsp
Correct dwarf unwind information in function epilogue This patch aims to provide correct dwarf unwind information in function epilogue for X86. It consists of two parts. The first part inserts CFI instructions that set appropriate cfa offset and cfa register in emitEpilogue() in X86FrameLowering. This part is X86 specific. The second part is platform independent and ensures that: * CFI instructions do not affect code generation (they are not counted as instructions when tail duplicating or tail merging) * Unwind information remains correct when a function is modified by different passes. This is done in a late pass by analyzing information about cfa offset and cfa register in BBs and inserting additional CFI directives where necessary. Added CFIInstrInserter pass: * analyzes each basic block to determine cfa offset and register are valid at its entry and exit * verifies that outgoing cfa offset and register of predecessor blocks match incoming values of their successors * inserts additional CFI directives at basic block beginning to correct the rule for calculating CFA Having CFI instructions in function epilogue can cause incorrect CFA calculation rule for some basic blocks. This can happen if, due to basic block reordering, or the existence of multiple epilogue blocks, some of the blocks have wrong cfa offset and register values set by the epilogue block above them. CFIInstrInserter is currently run only on X86, but can be used by any target that implements support for adding CFI instructions in epilogue. Patch by Violeta Vukobrat. Differential Revision: https://reviews.llvm.org/D42848 llvm-svn: 330706
2018-04-24 18:32:08 +08:00
; LINUXOSX64-NEXT: .cfi_def_cfa_offset 8
; LINUXOSX64-NEXT: retq
2016-10-19 19:16:58 +08:00
%b = add i16 %a, 1
%c = call x86_regcallcc i16 @test_argReti16(i16 %b)
%d = add i16 %c, 1
ret i16 %d
}
; Test regcall when receiving/returning i32
define x86_regcallcc i32 @test_argReti32(i32 %a) {
; X32-LABEL: test_argReti32:
; X32: # %bb.0:
; X32-NEXT: incl %eax
; X32-NEXT: retl
;
; WIN64-LABEL: test_argReti32:
; WIN64: # %bb.0:
; WIN64-NEXT: incl %eax
; WIN64-NEXT: retq
;
; LINUXOSX64-LABEL: test_argReti32:
; LINUXOSX64: # %bb.0:
; LINUXOSX64-NEXT: incl %eax
; LINUXOSX64-NEXT: retq
2016-10-19 19:16:58 +08:00
%add = add i32 %a, 1
ret i32 %add
}
; Test regcall when passing/retrieving i32
define x86_regcallcc i32 @test_CallargReti32(i32 %a) {
; X32-LABEL: test_CallargReti32:
; X32: # %bb.0:
; X32-NEXT: pushl %esp
; X32-NEXT: incl %eax
; X32-NEXT: calll _test_argReti32
; X32-NEXT: incl %eax
; X32-NEXT: popl %esp
; X32-NEXT: retl
;
; WIN64-LABEL: test_CallargReti32:
; WIN64: # %bb.0:
; WIN64-NEXT: pushq %rsp
; WIN64-NEXT: .seh_pushreg 4
; WIN64-NEXT: .seh_endprologue
; WIN64-NEXT: incl %eax
; WIN64-NEXT: callq test_argReti32
; WIN64-NEXT: incl %eax
; WIN64-NEXT: popq %rsp
; WIN64-NEXT: retq
; WIN64-NEXT: .seh_handlerdata
; WIN64-NEXT: .text
; WIN64-NEXT: .seh_endproc
;
; LINUXOSX64-LABEL: test_CallargReti32:
; LINUXOSX64: # %bb.0:
; LINUXOSX64-NEXT: pushq %rsp
; LINUXOSX64-NEXT: .cfi_def_cfa_offset 16
; LINUXOSX64-NEXT: .cfi_offset %rsp, -16
; LINUXOSX64-NEXT: incl %eax
; LINUXOSX64-NEXT: callq test_argReti32
; LINUXOSX64-NEXT: incl %eax
; LINUXOSX64-NEXT: popq %rsp
Correct dwarf unwind information in function epilogue This patch aims to provide correct dwarf unwind information in function epilogue for X86. It consists of two parts. The first part inserts CFI instructions that set appropriate cfa offset and cfa register in emitEpilogue() in X86FrameLowering. This part is X86 specific. The second part is platform independent and ensures that: * CFI instructions do not affect code generation (they are not counted as instructions when tail duplicating or tail merging) * Unwind information remains correct when a function is modified by different passes. This is done in a late pass by analyzing information about cfa offset and cfa register in BBs and inserting additional CFI directives where necessary. Added CFIInstrInserter pass: * analyzes each basic block to determine cfa offset and register are valid at its entry and exit * verifies that outgoing cfa offset and register of predecessor blocks match incoming values of their successors * inserts additional CFI directives at basic block beginning to correct the rule for calculating CFA Having CFI instructions in function epilogue can cause incorrect CFA calculation rule for some basic blocks. This can happen if, due to basic block reordering, or the existence of multiple epilogue blocks, some of the blocks have wrong cfa offset and register values set by the epilogue block above them. CFIInstrInserter is currently run only on X86, but can be used by any target that implements support for adding CFI instructions in epilogue. Patch by Violeta Vukobrat. Differential Revision: https://reviews.llvm.org/D42848 llvm-svn: 330706
2018-04-24 18:32:08 +08:00
; LINUXOSX64-NEXT: .cfi_def_cfa_offset 8
; LINUXOSX64-NEXT: retq
2016-10-19 19:16:58 +08:00
%b = add i32 %a, 1
%c = call x86_regcallcc i32 @test_argReti32(i32 %b)
%d = add i32 %c, 1
ret i32 %d
}
; Test regcall when receiving/returning i64
define x86_regcallcc i64 @test_argReti64(i64 %a) {
; X32-LABEL: test_argReti64:
; X32: # %bb.0:
; X32-NEXT: addl $3, %eax
; X32-NEXT: adcl $1, %ecx
; X32-NEXT: retl
;
; WIN64-LABEL: test_argReti64:
; WIN64: # %bb.0:
; WIN64-NEXT: movabsq $4294967299, %rcx # imm = 0x100000003
; WIN64-NEXT: addq %rcx, %rax
; WIN64-NEXT: retq
;
; LINUXOSX64-LABEL: test_argReti64:
; LINUXOSX64: # %bb.0:
; LINUXOSX64-NEXT: movabsq $4294967299, %rcx # imm = 0x100000003
; LINUXOSX64-NEXT: addq %rcx, %rax
; LINUXOSX64-NEXT: retq
2016-10-19 19:16:58 +08:00
%add = add i64 %a, 4294967299
ret i64 %add
}
; Test regcall when passing/retrieving i64
define x86_regcallcc i64 @test_CallargReti64(i64 %a) {
; X32-LABEL: test_CallargReti64:
; X32: # %bb.0:
; X32-NEXT: pushl %esp
; X32-NEXT: addl $1, %eax
; X32-NEXT: adcl $0, %ecx
; X32-NEXT: calll _test_argReti64
; X32-NEXT: addl $1, %eax
; X32-NEXT: adcl $0, %ecx
; X32-NEXT: popl %esp
; X32-NEXT: retl
;
; WIN64-LABEL: test_CallargReti64:
; WIN64: # %bb.0:
; WIN64-NEXT: pushq %rsp
; WIN64-NEXT: .seh_pushreg 4
; WIN64-NEXT: .seh_endprologue
; WIN64-NEXT: incq %rax
; WIN64-NEXT: callq test_argReti64
; WIN64-NEXT: incq %rax
; WIN64-NEXT: popq %rsp
; WIN64-NEXT: retq
; WIN64-NEXT: .seh_handlerdata
; WIN64-NEXT: .text
; WIN64-NEXT: .seh_endproc
;
; LINUXOSX64-LABEL: test_CallargReti64:
; LINUXOSX64: # %bb.0:
; LINUXOSX64-NEXT: pushq %rsp
; LINUXOSX64-NEXT: .cfi_def_cfa_offset 16
; LINUXOSX64-NEXT: .cfi_offset %rsp, -16
; LINUXOSX64-NEXT: incq %rax
; LINUXOSX64-NEXT: callq test_argReti64
; LINUXOSX64-NEXT: incq %rax
; LINUXOSX64-NEXT: popq %rsp
Correct dwarf unwind information in function epilogue This patch aims to provide correct dwarf unwind information in function epilogue for X86. It consists of two parts. The first part inserts CFI instructions that set appropriate cfa offset and cfa register in emitEpilogue() in X86FrameLowering. This part is X86 specific. The second part is platform independent and ensures that: * CFI instructions do not affect code generation (they are not counted as instructions when tail duplicating or tail merging) * Unwind information remains correct when a function is modified by different passes. This is done in a late pass by analyzing information about cfa offset and cfa register in BBs and inserting additional CFI directives where necessary. Added CFIInstrInserter pass: * analyzes each basic block to determine cfa offset and register are valid at its entry and exit * verifies that outgoing cfa offset and register of predecessor blocks match incoming values of their successors * inserts additional CFI directives at basic block beginning to correct the rule for calculating CFA Having CFI instructions in function epilogue can cause incorrect CFA calculation rule for some basic blocks. This can happen if, due to basic block reordering, or the existence of multiple epilogue blocks, some of the blocks have wrong cfa offset and register values set by the epilogue block above them. CFIInstrInserter is currently run only on X86, but can be used by any target that implements support for adding CFI instructions in epilogue. Patch by Violeta Vukobrat. Differential Revision: https://reviews.llvm.org/D42848 llvm-svn: 330706
2018-04-24 18:32:08 +08:00
; LINUXOSX64-NEXT: .cfi_def_cfa_offset 8
; LINUXOSX64-NEXT: retq
2016-10-19 19:16:58 +08:00
%b = add i64 %a, 1
%c = call x86_regcallcc i64 @test_argReti64(i64 %b)
%d = add i64 %c, 1
ret i64 %d
}
; Test regcall when receiving/returning float
define x86_regcallcc float @test_argRetFloat(float %a) {
; X32-LABEL: test_argRetFloat:
; X32: # %bb.0:
; X32-NEXT: vaddss __real@3f800000, %xmm0, %xmm0
; X32-NEXT: retl
;
; WIN64-LABEL: test_argRetFloat:
; WIN64: # %bb.0:
; WIN64-NEXT: vaddss __real@{{.*}}(%rip), %xmm0, %xmm0
; WIN64-NEXT: retq
;
; LINUXOSX64-LABEL: test_argRetFloat:
; LINUXOSX64: # %bb.0:
; LINUXOSX64-NEXT: vaddss {{.*}}(%rip), %xmm0, %xmm0
; LINUXOSX64-NEXT: retq
2016-10-19 19:16:58 +08:00
%add = fadd float 1.0, %a
ret float %add
}
; Test regcall when passing/retrieving float
define x86_regcallcc float @test_CallargRetFloat(float %a) {
; X32-LABEL: test_CallargRetFloat:
; X32: # %bb.0:
; X32-NEXT: pushl %esp
; X32-NEXT: subl $24, %esp
; X32-NEXT: vmovups %xmm4, (%esp) # 16-byte Spill
; X32-NEXT: vmovss {{.*#+}} xmm4 = mem[0],zero,zero,zero
; X32-NEXT: vaddss %xmm4, %xmm0, %xmm0
; X32-NEXT: calll _test_argRetFloat
; X32-NEXT: vaddss %xmm4, %xmm0, %xmm0
; X32-NEXT: vmovups (%esp), %xmm4 # 16-byte Reload
; X32-NEXT: addl $24, %esp
; X32-NEXT: popl %esp
; X32-NEXT: retl
;
; WIN64-LABEL: test_CallargRetFloat:
; WIN64: # %bb.0:
; WIN64-NEXT: pushq %rsp
; WIN64-NEXT: .seh_pushreg 4
; WIN64-NEXT: subq $16, %rsp
; WIN64-NEXT: .seh_stackalloc 16
; WIN64-NEXT: vmovaps %xmm8, (%rsp) # 16-byte Spill
; WIN64-NEXT: .seh_savexmm 8, 0
; WIN64-NEXT: .seh_endprologue
; WIN64-NEXT: vmovss {{.*#+}} xmm8 = mem[0],zero,zero,zero
; WIN64-NEXT: vaddss %xmm8, %xmm0, %xmm0
; WIN64-NEXT: callq test_argRetFloat
; WIN64-NEXT: vaddss %xmm8, %xmm0, %xmm0
; WIN64-NEXT: vmovaps (%rsp), %xmm8 # 16-byte Reload
; WIN64-NEXT: addq $16, %rsp
; WIN64-NEXT: popq %rsp
; WIN64-NEXT: retq
; WIN64-NEXT: .seh_handlerdata
; WIN64-NEXT: .text
; WIN64-NEXT: .seh_endproc
;
; LINUXOSX64-LABEL: test_CallargRetFloat:
; LINUXOSX64: # %bb.0:
; LINUXOSX64-NEXT: pushq %rsp
; LINUXOSX64-NEXT: .cfi_def_cfa_offset 16
; LINUXOSX64-NEXT: subq $16, %rsp
; LINUXOSX64-NEXT: vmovaps %xmm8, (%rsp) # 16-byte Spill
; LINUXOSX64-NEXT: .cfi_def_cfa_offset 32
; LINUXOSX64-NEXT: .cfi_offset %rsp, -16
; LINUXOSX64-NEXT: .cfi_offset %xmm8, -32
; LINUXOSX64-NEXT: vmovss {{.*#+}} xmm8 = mem[0],zero,zero,zero
; LINUXOSX64-NEXT: vaddss %xmm8, %xmm0, %xmm0
; LINUXOSX64-NEXT: callq test_argRetFloat
; LINUXOSX64-NEXT: vaddss %xmm8, %xmm0, %xmm0
; LINUXOSX64-NEXT: vmovaps (%rsp), %xmm8 # 16-byte Reload
; LINUXOSX64-NEXT: addq $16, %rsp
Correct dwarf unwind information in function epilogue This patch aims to provide correct dwarf unwind information in function epilogue for X86. It consists of two parts. The first part inserts CFI instructions that set appropriate cfa offset and cfa register in emitEpilogue() in X86FrameLowering. This part is X86 specific. The second part is platform independent and ensures that: * CFI instructions do not affect code generation (they are not counted as instructions when tail duplicating or tail merging) * Unwind information remains correct when a function is modified by different passes. This is done in a late pass by analyzing information about cfa offset and cfa register in BBs and inserting additional CFI directives where necessary. Added CFIInstrInserter pass: * analyzes each basic block to determine cfa offset and register are valid at its entry and exit * verifies that outgoing cfa offset and register of predecessor blocks match incoming values of their successors * inserts additional CFI directives at basic block beginning to correct the rule for calculating CFA Having CFI instructions in function epilogue can cause incorrect CFA calculation rule for some basic blocks. This can happen if, due to basic block reordering, or the existence of multiple epilogue blocks, some of the blocks have wrong cfa offset and register values set by the epilogue block above them. CFIInstrInserter is currently run only on X86, but can be used by any target that implements support for adding CFI instructions in epilogue. Patch by Violeta Vukobrat. Differential Revision: https://reviews.llvm.org/D42848 llvm-svn: 330706
2018-04-24 18:32:08 +08:00
; LINUXOSX64-NEXT: .cfi_def_cfa_offset 16
; LINUXOSX64-NEXT: popq %rsp
Correct dwarf unwind information in function epilogue This patch aims to provide correct dwarf unwind information in function epilogue for X86. It consists of two parts. The first part inserts CFI instructions that set appropriate cfa offset and cfa register in emitEpilogue() in X86FrameLowering. This part is X86 specific. The second part is platform independent and ensures that: * CFI instructions do not affect code generation (they are not counted as instructions when tail duplicating or tail merging) * Unwind information remains correct when a function is modified by different passes. This is done in a late pass by analyzing information about cfa offset and cfa register in BBs and inserting additional CFI directives where necessary. Added CFIInstrInserter pass: * analyzes each basic block to determine cfa offset and register are valid at its entry and exit * verifies that outgoing cfa offset and register of predecessor blocks match incoming values of their successors * inserts additional CFI directives at basic block beginning to correct the rule for calculating CFA Having CFI instructions in function epilogue can cause incorrect CFA calculation rule for some basic blocks. This can happen if, due to basic block reordering, or the existence of multiple epilogue blocks, some of the blocks have wrong cfa offset and register values set by the epilogue block above them. CFIInstrInserter is currently run only on X86, but can be used by any target that implements support for adding CFI instructions in epilogue. Patch by Violeta Vukobrat. Differential Revision: https://reviews.llvm.org/D42848 llvm-svn: 330706
2018-04-24 18:32:08 +08:00
; LINUXOSX64-NEXT: .cfi_def_cfa_offset 8
; LINUXOSX64-NEXT: retq
2016-10-19 19:16:58 +08:00
%b = fadd float 1.0, %a
%c = call x86_regcallcc float @test_argRetFloat(float %b)
%d = fadd float 1.0, %c
ret float %d
}
; Test regcall when receiving/returning double
define x86_regcallcc double @test_argRetDouble(double %a) {
; X32-LABEL: test_argRetDouble:
; X32: # %bb.0:
; X32-NEXT: vaddsd __real@3ff0000000000000, %xmm0, %xmm0
; X32-NEXT: retl
;
; WIN64-LABEL: test_argRetDouble:
; WIN64: # %bb.0:
; WIN64-NEXT: vaddsd __real@{{.*}}(%rip), %xmm0, %xmm0
; WIN64-NEXT: retq
;
; LINUXOSX64-LABEL: test_argRetDouble:
; LINUXOSX64: # %bb.0:
; LINUXOSX64-NEXT: vaddsd {{.*}}(%rip), %xmm0, %xmm0
; LINUXOSX64-NEXT: retq
2016-10-19 19:16:58 +08:00
%add = fadd double %a, 1.0
ret double %add
}
; Test regcall when passing/retrieving double
define x86_regcallcc double @test_CallargRetDouble(double %a) {
; X32-LABEL: test_CallargRetDouble:
; X32: # %bb.0:
; X32-NEXT: pushl %esp
; X32-NEXT: subl $24, %esp
; X32-NEXT: vmovups %xmm4, (%esp) # 16-byte Spill
; X32-NEXT: vmovsd {{.*#+}} xmm4 = mem[0],zero
; X32-NEXT: vaddsd %xmm4, %xmm0, %xmm0
; X32-NEXT: calll _test_argRetDouble
; X32-NEXT: vaddsd %xmm4, %xmm0, %xmm0
; X32-NEXT: vmovups (%esp), %xmm4 # 16-byte Reload
; X32-NEXT: addl $24, %esp
; X32-NEXT: popl %esp
; X32-NEXT: retl
;
; WIN64-LABEL: test_CallargRetDouble:
; WIN64: # %bb.0:
; WIN64-NEXT: pushq %rsp
; WIN64-NEXT: .seh_pushreg 4
; WIN64-NEXT: subq $16, %rsp
; WIN64-NEXT: .seh_stackalloc 16
; WIN64-NEXT: vmovaps %xmm8, (%rsp) # 16-byte Spill
; WIN64-NEXT: .seh_savexmm 8, 0
; WIN64-NEXT: .seh_endprologue
; WIN64-NEXT: vmovsd {{.*#+}} xmm8 = mem[0],zero
; WIN64-NEXT: vaddsd %xmm8, %xmm0, %xmm0
; WIN64-NEXT: callq test_argRetDouble
; WIN64-NEXT: vaddsd %xmm8, %xmm0, %xmm0
; WIN64-NEXT: vmovaps (%rsp), %xmm8 # 16-byte Reload
; WIN64-NEXT: addq $16, %rsp
; WIN64-NEXT: popq %rsp
; WIN64-NEXT: retq
; WIN64-NEXT: .seh_handlerdata
; WIN64-NEXT: .text
; WIN64-NEXT: .seh_endproc
;
; LINUXOSX64-LABEL: test_CallargRetDouble:
; LINUXOSX64: # %bb.0:
; LINUXOSX64-NEXT: pushq %rsp
; LINUXOSX64-NEXT: .cfi_def_cfa_offset 16
; LINUXOSX64-NEXT: subq $16, %rsp
; LINUXOSX64-NEXT: vmovaps %xmm8, (%rsp) # 16-byte Spill
; LINUXOSX64-NEXT: .cfi_def_cfa_offset 32
; LINUXOSX64-NEXT: .cfi_offset %rsp, -16
; LINUXOSX64-NEXT: .cfi_offset %xmm8, -32
; LINUXOSX64-NEXT: vmovsd {{.*#+}} xmm8 = mem[0],zero
; LINUXOSX64-NEXT: vaddsd %xmm8, %xmm0, %xmm0
; LINUXOSX64-NEXT: callq test_argRetDouble
; LINUXOSX64-NEXT: vaddsd %xmm8, %xmm0, %xmm0
; LINUXOSX64-NEXT: vmovaps (%rsp), %xmm8 # 16-byte Reload
; LINUXOSX64-NEXT: addq $16, %rsp
Correct dwarf unwind information in function epilogue This patch aims to provide correct dwarf unwind information in function epilogue for X86. It consists of two parts. The first part inserts CFI instructions that set appropriate cfa offset and cfa register in emitEpilogue() in X86FrameLowering. This part is X86 specific. The second part is platform independent and ensures that: * CFI instructions do not affect code generation (they are not counted as instructions when tail duplicating or tail merging) * Unwind information remains correct when a function is modified by different passes. This is done in a late pass by analyzing information about cfa offset and cfa register in BBs and inserting additional CFI directives where necessary. Added CFIInstrInserter pass: * analyzes each basic block to determine cfa offset and register are valid at its entry and exit * verifies that outgoing cfa offset and register of predecessor blocks match incoming values of their successors * inserts additional CFI directives at basic block beginning to correct the rule for calculating CFA Having CFI instructions in function epilogue can cause incorrect CFA calculation rule for some basic blocks. This can happen if, due to basic block reordering, or the existence of multiple epilogue blocks, some of the blocks have wrong cfa offset and register values set by the epilogue block above them. CFIInstrInserter is currently run only on X86, but can be used by any target that implements support for adding CFI instructions in epilogue. Patch by Violeta Vukobrat. Differential Revision: https://reviews.llvm.org/D42848 llvm-svn: 330706
2018-04-24 18:32:08 +08:00
; LINUXOSX64-NEXT: .cfi_def_cfa_offset 16
; LINUXOSX64-NEXT: popq %rsp
Correct dwarf unwind information in function epilogue This patch aims to provide correct dwarf unwind information in function epilogue for X86. It consists of two parts. The first part inserts CFI instructions that set appropriate cfa offset and cfa register in emitEpilogue() in X86FrameLowering. This part is X86 specific. The second part is platform independent and ensures that: * CFI instructions do not affect code generation (they are not counted as instructions when tail duplicating or tail merging) * Unwind information remains correct when a function is modified by different passes. This is done in a late pass by analyzing information about cfa offset and cfa register in BBs and inserting additional CFI directives where necessary. Added CFIInstrInserter pass: * analyzes each basic block to determine cfa offset and register are valid at its entry and exit * verifies that outgoing cfa offset and register of predecessor blocks match incoming values of their successors * inserts additional CFI directives at basic block beginning to correct the rule for calculating CFA Having CFI instructions in function epilogue can cause incorrect CFA calculation rule for some basic blocks. This can happen if, due to basic block reordering, or the existence of multiple epilogue blocks, some of the blocks have wrong cfa offset and register values set by the epilogue block above them. CFIInstrInserter is currently run only on X86, but can be used by any target that implements support for adding CFI instructions in epilogue. Patch by Violeta Vukobrat. Differential Revision: https://reviews.llvm.org/D42848 llvm-svn: 330706
2018-04-24 18:32:08 +08:00
; LINUXOSX64-NEXT: .cfi_def_cfa_offset 8
; LINUXOSX64-NEXT: retq
2016-10-19 19:16:58 +08:00
%b = fadd double 1.0, %a
%c = call x86_regcallcc double @test_argRetDouble(double %b)
%d = fadd double 1.0, %c
ret double %d
}
; Test regcall when receiving/returning long double
define x86_regcallcc x86_fp80 @test_argRetf80(x86_fp80 %a0) nounwind {
; X32-LABEL: test_argRetf80:
; X32: # %bb.0:
; X32-NEXT: fadd %st(0), %st(0)
; X32-NEXT: retl
;
; WIN64-LABEL: test_argRetf80:
; WIN64: # %bb.0:
; WIN64-NEXT: fadd %st(0), %st(0)
; WIN64-NEXT: retq
;
; LINUXOSX64-LABEL: test_argRetf80:
; LINUXOSX64: # %bb.0:
; LINUXOSX64-NEXT: fadd %st(0), %st(0)
; LINUXOSX64-NEXT: retq
%r0 = fadd x86_fp80 %a0, %a0
ret x86_fp80 %r0
}
; Test regcall when passing/retrieving long double
define x86_regcallcc x86_fp80 @test_CallargRetf80(x86_fp80 %a) {
; X32-LABEL: test_CallargRetf80:
; X32: # %bb.0:
; X32-NEXT: pushl %esp
; X32-NEXT: fadd %st(0), %st(0)
; X32-NEXT: calll _test_argRetf80
; X32-NEXT: fadd %st(0), %st(0)
; X32-NEXT: popl %esp
; X32-NEXT: retl
;
; WIN64-LABEL: test_CallargRetf80:
; WIN64: # %bb.0:
; WIN64-NEXT: pushq %rsp
; WIN64-NEXT: .seh_pushreg 4
; WIN64-NEXT: .seh_endprologue
; WIN64-NEXT: fadd %st(0), %st(0)
; WIN64-NEXT: callq test_argRetf80
; WIN64-NEXT: fadd %st(0), %st(0)
; WIN64-NEXT: popq %rsp
; WIN64-NEXT: retq
; WIN64-NEXT: .seh_handlerdata
; WIN64-NEXT: .text
; WIN64-NEXT: .seh_endproc
;
; LINUXOSX64-LABEL: test_CallargRetf80:
; LINUXOSX64: # %bb.0:
; LINUXOSX64-NEXT: pushq %rsp
; LINUXOSX64-NEXT: .cfi_def_cfa_offset 16
; LINUXOSX64-NEXT: .cfi_offset %rsp, -16
; LINUXOSX64-NEXT: fadd %st(0), %st(0)
; LINUXOSX64-NEXT: callq test_argRetf80
; LINUXOSX64-NEXT: fadd %st(0), %st(0)
; LINUXOSX64-NEXT: popq %rsp
Correct dwarf unwind information in function epilogue This patch aims to provide correct dwarf unwind information in function epilogue for X86. It consists of two parts. The first part inserts CFI instructions that set appropriate cfa offset and cfa register in emitEpilogue() in X86FrameLowering. This part is X86 specific. The second part is platform independent and ensures that: * CFI instructions do not affect code generation (they are not counted as instructions when tail duplicating or tail merging) * Unwind information remains correct when a function is modified by different passes. This is done in a late pass by analyzing information about cfa offset and cfa register in BBs and inserting additional CFI directives where necessary. Added CFIInstrInserter pass: * analyzes each basic block to determine cfa offset and register are valid at its entry and exit * verifies that outgoing cfa offset and register of predecessor blocks match incoming values of their successors * inserts additional CFI directives at basic block beginning to correct the rule for calculating CFA Having CFI instructions in function epilogue can cause incorrect CFA calculation rule for some basic blocks. This can happen if, due to basic block reordering, or the existence of multiple epilogue blocks, some of the blocks have wrong cfa offset and register values set by the epilogue block above them. CFIInstrInserter is currently run only on X86, but can be used by any target that implements support for adding CFI instructions in epilogue. Patch by Violeta Vukobrat. Differential Revision: https://reviews.llvm.org/D42848 llvm-svn: 330706
2018-04-24 18:32:08 +08:00
; LINUXOSX64-NEXT: .cfi_def_cfa_offset 8
; LINUXOSX64-NEXT: retq
%b = fadd x86_fp80 %a, %a
%c = call x86_regcallcc x86_fp80 @test_argRetf80(x86_fp80 %b)
%d = fadd x86_fp80 %c, %c
ret x86_fp80 %d
}
2016-10-19 19:16:58 +08:00
; Test regcall when receiving/returning pointer
define x86_regcallcc [4 x i32]* @test_argRetPointer([4 x i32]* %a) {
; X32-LABEL: test_argRetPointer:
; X32: # %bb.0:
; X32-NEXT: incl %eax
; X32-NEXT: retl
;
; WIN64-LABEL: test_argRetPointer:
; WIN64: # %bb.0:
; WIN64-NEXT: incl %eax
; WIN64-NEXT: retq
;
; LINUXOSX64-LABEL: test_argRetPointer:
; LINUXOSX64: # %bb.0:
; LINUXOSX64-NEXT: incl %eax
; LINUXOSX64-NEXT: retq
2016-10-19 19:16:58 +08:00
%b = ptrtoint [4 x i32]* %a to i32
%c = add i32 %b, 1
%d = inttoptr i32 %c to [4 x i32]*
ret [4 x i32]* %d
}
; Test regcall when passing/retrieving pointer
define x86_regcallcc [4 x i32]* @test_CallargRetPointer([4 x i32]* %a) {
; X32-LABEL: test_CallargRetPointer:
; X32: # %bb.0:
; X32-NEXT: pushl %esp
; X32-NEXT: incl %eax
; X32-NEXT: calll _test_argRetPointer
; X32-NEXT: incl %eax
; X32-NEXT: popl %esp
; X32-NEXT: retl
;
; WIN64-LABEL: test_CallargRetPointer:
; WIN64: # %bb.0:
; WIN64-NEXT: pushq %rsp
; WIN64-NEXT: .seh_pushreg 4
; WIN64-NEXT: .seh_endprologue
; WIN64-NEXT: incl %eax
; WIN64-NEXT: callq test_argRetPointer
; WIN64-NEXT: incl %eax
; WIN64-NEXT: popq %rsp
; WIN64-NEXT: retq
; WIN64-NEXT: .seh_handlerdata
; WIN64-NEXT: .text
; WIN64-NEXT: .seh_endproc
;
; LINUXOSX64-LABEL: test_CallargRetPointer:
; LINUXOSX64: # %bb.0:
; LINUXOSX64-NEXT: pushq %rsp
; LINUXOSX64-NEXT: .cfi_def_cfa_offset 16
; LINUXOSX64-NEXT: .cfi_offset %rsp, -16
; LINUXOSX64-NEXT: incl %eax
; LINUXOSX64-NEXT: callq test_argRetPointer
; LINUXOSX64-NEXT: incl %eax
; LINUXOSX64-NEXT: popq %rsp
Correct dwarf unwind information in function epilogue This patch aims to provide correct dwarf unwind information in function epilogue for X86. It consists of two parts. The first part inserts CFI instructions that set appropriate cfa offset and cfa register in emitEpilogue() in X86FrameLowering. This part is X86 specific. The second part is platform independent and ensures that: * CFI instructions do not affect code generation (they are not counted as instructions when tail duplicating or tail merging) * Unwind information remains correct when a function is modified by different passes. This is done in a late pass by analyzing information about cfa offset and cfa register in BBs and inserting additional CFI directives where necessary. Added CFIInstrInserter pass: * analyzes each basic block to determine cfa offset and register are valid at its entry and exit * verifies that outgoing cfa offset and register of predecessor blocks match incoming values of their successors * inserts additional CFI directives at basic block beginning to correct the rule for calculating CFA Having CFI instructions in function epilogue can cause incorrect CFA calculation rule for some basic blocks. This can happen if, due to basic block reordering, or the existence of multiple epilogue blocks, some of the blocks have wrong cfa offset and register values set by the epilogue block above them. CFIInstrInserter is currently run only on X86, but can be used by any target that implements support for adding CFI instructions in epilogue. Patch by Violeta Vukobrat. Differential Revision: https://reviews.llvm.org/D42848 llvm-svn: 330706
2018-04-24 18:32:08 +08:00
; LINUXOSX64-NEXT: .cfi_def_cfa_offset 8
; LINUXOSX64-NEXT: retq
2016-10-19 19:16:58 +08:00
%b = ptrtoint [4 x i32]* %a to i32
%c = add i32 %b, 1
%d = inttoptr i32 %c to [4 x i32]*
%e = call x86_regcallcc [4 x i32]* @test_argRetPointer([4 x i32]* %d)
%f = ptrtoint [4 x i32]* %e to i32
%g = add i32 %f, 1
%h = inttoptr i32 %g to [4 x i32]*
ret [4 x i32]* %h
}
; Test regcall when receiving/returning 128 bit vector
define x86_regcallcc <4 x i32> @test_argRet128Vector(<4 x i1> %x, <4 x i32> %a, <4 x i32> %b) {
; X32-LABEL: test_argRet128Vector:
; X32: # %bb.0:
; X32-NEXT: vpslld $31, %xmm0, %xmm0
; X32-NEXT: vpmovd2m %xmm0, %k1
; X32-NEXT: vpblendmd %xmm1, %xmm2, %xmm0 {%k1}
; X32-NEXT: retl
;
; WIN64-LABEL: test_argRet128Vector:
; WIN64: # %bb.0:
; WIN64-NEXT: vpslld $31, %xmm0, %xmm0
; WIN64-NEXT: vpmovd2m %xmm0, %k1
; WIN64-NEXT: vpblendmd %xmm1, %xmm2, %xmm0 {%k1}
; WIN64-NEXT: retq
;
; LINUXOSX64-LABEL: test_argRet128Vector:
; LINUXOSX64: # %bb.0:
; LINUXOSX64-NEXT: vpslld $31, %xmm0, %xmm0
; LINUXOSX64-NEXT: vpmovd2m %xmm0, %k1
; LINUXOSX64-NEXT: vpblendmd %xmm1, %xmm2, %xmm0 {%k1}
; LINUXOSX64-NEXT: retq
%d = select <4 x i1> %x, <4 x i32> %a, <4 x i32> %b
2016-10-19 19:16:58 +08:00
ret <4 x i32> %d
}
; Test regcall when passing/retrieving 128 bit vector
define x86_regcallcc <4 x i32> @test_CallargRet128Vector(<4 x i1> %x, <4 x i32> %a) {
; X32-LABEL: test_CallargRet128Vector:
; X32: # %bb.0:
; X32-NEXT: pushl %esp
; X32-NEXT: subl $40, %esp
; X32-NEXT: vmovups %xmm4, {{[-0-9]+}}(%e{{[sb]}}p) # 16-byte Spill
; X32-NEXT: vmovdqa %xmm1, %xmm4
; X32-NEXT: vpslld $31, %xmm0, %xmm1
; X32-NEXT: vpmovd2m %xmm1, %k1
; X32-NEXT: kmovw %k1, {{[-0-9]+}}(%e{{[sb]}}p) # 2-byte Spill
; X32-NEXT: vmovdqa %xmm4, %xmm1
; X32-NEXT: vmovdqa %xmm4, %xmm2
; X32-NEXT: calll _test_argRet128Vector
; X32-NEXT: kmovw {{[-0-9]+}}(%e{{[sb]}}p), %k1 # 2-byte Reload
; X32-NEXT: vmovdqa32 %xmm4, %xmm0 {%k1}
; X32-NEXT: vmovups {{[-0-9]+}}(%e{{[sb]}}p), %xmm4 # 16-byte Reload
; X32-NEXT: addl $40, %esp
; X32-NEXT: popl %esp
; X32-NEXT: retl
;
; WIN64-LABEL: test_CallargRet128Vector:
; WIN64: # %bb.0:
; WIN64-NEXT: pushq %rsp
; WIN64-NEXT: .seh_pushreg 4
; WIN64-NEXT: subq $32, %rsp
; WIN64-NEXT: .seh_stackalloc 32
; WIN64-NEXT: vmovaps %xmm8, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
; WIN64-NEXT: .seh_savexmm 8, 16
; WIN64-NEXT: .seh_endprologue
; WIN64-NEXT: vmovdqa %xmm1, %xmm8
; WIN64-NEXT: vpslld $31, %xmm0, %xmm1
; WIN64-NEXT: vpmovd2m %xmm1, %k1
; WIN64-NEXT: kmovw %k1, {{[-0-9]+}}(%r{{[sb]}}p) # 2-byte Spill
; WIN64-NEXT: vmovdqa %xmm8, %xmm1
; WIN64-NEXT: vmovdqa %xmm8, %xmm2
; WIN64-NEXT: callq test_argRet128Vector
; WIN64-NEXT: kmovw {{[-0-9]+}}(%r{{[sb]}}p), %k1 # 2-byte Reload
; WIN64-NEXT: vmovdqa32 %xmm8, %xmm0 {%k1}
; WIN64-NEXT: vmovaps {{[-0-9]+}}(%r{{[sb]}}p), %xmm8 # 16-byte Reload
; WIN64-NEXT: addq $32, %rsp
; WIN64-NEXT: popq %rsp
; WIN64-NEXT: retq
; WIN64-NEXT: .seh_handlerdata
; WIN64-NEXT: .text
; WIN64-NEXT: .seh_endproc
;
; LINUXOSX64-LABEL: test_CallargRet128Vector:
; LINUXOSX64: # %bb.0:
; LINUXOSX64-NEXT: pushq %rsp
; LINUXOSX64-NEXT: .cfi_def_cfa_offset 16
; LINUXOSX64-NEXT: subq $32, %rsp
; LINUXOSX64-NEXT: vmovaps %xmm8, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
; LINUXOSX64-NEXT: .cfi_def_cfa_offset 48
; LINUXOSX64-NEXT: .cfi_offset %rsp, -16
; LINUXOSX64-NEXT: .cfi_offset %xmm8, -32
; LINUXOSX64-NEXT: vmovdqa %xmm1, %xmm8
; LINUXOSX64-NEXT: vpslld $31, %xmm0, %xmm1
; LINUXOSX64-NEXT: vpmovd2m %xmm1, %k1
; LINUXOSX64-NEXT: kmovw %k1, {{[-0-9]+}}(%r{{[sb]}}p) # 2-byte Spill
; LINUXOSX64-NEXT: vmovdqa %xmm8, %xmm1
; LINUXOSX64-NEXT: vmovdqa %xmm8, %xmm2
; LINUXOSX64-NEXT: callq test_argRet128Vector
; LINUXOSX64-NEXT: kmovw {{[-0-9]+}}(%r{{[sb]}}p), %k1 # 2-byte Reload
; LINUXOSX64-NEXT: vmovdqa32 %xmm8, %xmm0 {%k1}
; LINUXOSX64-NEXT: vmovaps {{[-0-9]+}}(%r{{[sb]}}p), %xmm8 # 16-byte Reload
; LINUXOSX64-NEXT: addq $32, %rsp
Correct dwarf unwind information in function epilogue This patch aims to provide correct dwarf unwind information in function epilogue for X86. It consists of two parts. The first part inserts CFI instructions that set appropriate cfa offset and cfa register in emitEpilogue() in X86FrameLowering. This part is X86 specific. The second part is platform independent and ensures that: * CFI instructions do not affect code generation (they are not counted as instructions when tail duplicating or tail merging) * Unwind information remains correct when a function is modified by different passes. This is done in a late pass by analyzing information about cfa offset and cfa register in BBs and inserting additional CFI directives where necessary. Added CFIInstrInserter pass: * analyzes each basic block to determine cfa offset and register are valid at its entry and exit * verifies that outgoing cfa offset and register of predecessor blocks match incoming values of their successors * inserts additional CFI directives at basic block beginning to correct the rule for calculating CFA Having CFI instructions in function epilogue can cause incorrect CFA calculation rule for some basic blocks. This can happen if, due to basic block reordering, or the existence of multiple epilogue blocks, some of the blocks have wrong cfa offset and register values set by the epilogue block above them. CFIInstrInserter is currently run only on X86, but can be used by any target that implements support for adding CFI instructions in epilogue. Patch by Violeta Vukobrat. Differential Revision: https://reviews.llvm.org/D42848 llvm-svn: 330706
2018-04-24 18:32:08 +08:00
; LINUXOSX64-NEXT: .cfi_def_cfa_offset 16
; LINUXOSX64-NEXT: popq %rsp
Correct dwarf unwind information in function epilogue This patch aims to provide correct dwarf unwind information in function epilogue for X86. It consists of two parts. The first part inserts CFI instructions that set appropriate cfa offset and cfa register in emitEpilogue() in X86FrameLowering. This part is X86 specific. The second part is platform independent and ensures that: * CFI instructions do not affect code generation (they are not counted as instructions when tail duplicating or tail merging) * Unwind information remains correct when a function is modified by different passes. This is done in a late pass by analyzing information about cfa offset and cfa register in BBs and inserting additional CFI directives where necessary. Added CFIInstrInserter pass: * analyzes each basic block to determine cfa offset and register are valid at its entry and exit * verifies that outgoing cfa offset and register of predecessor blocks match incoming values of their successors * inserts additional CFI directives at basic block beginning to correct the rule for calculating CFA Having CFI instructions in function epilogue can cause incorrect CFA calculation rule for some basic blocks. This can happen if, due to basic block reordering, or the existence of multiple epilogue blocks, some of the blocks have wrong cfa offset and register values set by the epilogue block above them. CFIInstrInserter is currently run only on X86, but can be used by any target that implements support for adding CFI instructions in epilogue. Patch by Violeta Vukobrat. Differential Revision: https://reviews.llvm.org/D42848 llvm-svn: 330706
2018-04-24 18:32:08 +08:00
; LINUXOSX64-NEXT: .cfi_def_cfa_offset 8
; LINUXOSX64-NEXT: retq
%b = call x86_regcallcc <4 x i32> @test_argRet128Vector(<4 x i1> %x, <4 x i32> %a, <4 x i32> %a)
%c = select <4 x i1> %x, <4 x i32> %a, <4 x i32> %b
2016-10-19 19:16:58 +08:00
ret <4 x i32> %c
}
; Test regcall when receiving/returning 256 bit vector
define x86_regcallcc <8 x i32> @test_argRet256Vector(<8 x i1> %x, <8 x i32> %a, <8 x i32> %b) {
; X32-LABEL: test_argRet256Vector:
; X32: # %bb.0:
; X32-NEXT: kmovd %eax, %k1
; X32-NEXT: vpblendmd %ymm0, %ymm1, %ymm0 {%k1}
; X32-NEXT: retl
;
; WIN64-LABEL: test_argRet256Vector:
; WIN64: # %bb.0:
; WIN64-NEXT: kmovd %eax, %k1
; WIN64-NEXT: vpblendmd %ymm0, %ymm1, %ymm0 {%k1}
; WIN64-NEXT: retq
;
; LINUXOSX64-LABEL: test_argRet256Vector:
; LINUXOSX64: # %bb.0:
; LINUXOSX64-NEXT: kmovd %eax, %k1
; LINUXOSX64-NEXT: vpblendmd %ymm0, %ymm1, %ymm0 {%k1}
; LINUXOSX64-NEXT: retq
%d = select <8 x i1> %x, <8 x i32> %a, <8 x i32> %b
2016-10-19 19:16:58 +08:00
ret <8 x i32> %d
}
; Test regcall when passing/retrieving 256 bit vector
define x86_regcallcc <8 x i32> @test_CallargRet256Vector(<8 x i1> %x, <8 x i32> %a) {
; X32-LABEL: test_CallargRet256Vector:
; X32: # %bb.0:
; X32-NEXT: pushl %esp
; X32-NEXT: subl $88, %esp
; X32-NEXT: vmovdqu %ymm0, {{[-0-9]+}}(%e{{[sb]}}p) # 32-byte Spill
; X32-NEXT: kmovd %eax, %k1
; X32-NEXT: kmovw %k1, {{[-0-9]+}}(%e{{[sb]}}p) # 2-byte Spill
; X32-NEXT: vmovdqa %ymm0, %ymm1
; X32-NEXT: calll _test_argRet256Vector
; X32-NEXT: kmovw {{[-0-9]+}}(%e{{[sb]}}p), %k1 # 2-byte Reload
; X32-NEXT: vmovdqu {{[-0-9]+}}(%e{{[sb]}}p), %ymm1 # 32-byte Reload
; X32-NEXT: vmovdqa32 %ymm1, %ymm0 {%k1}
; X32-NEXT: addl $88, %esp
; X32-NEXT: popl %esp
; X32-NEXT: retl
;
; WIN64-LABEL: test_CallargRet256Vector:
; WIN64: # %bb.0:
; WIN64-NEXT: pushq %rsp
; WIN64-NEXT: .seh_pushreg 4
; WIN64-NEXT: subq $80, %rsp
; WIN64-NEXT: .seh_stackalloc 80
; WIN64-NEXT: .seh_endprologue
; WIN64-NEXT: vmovdqu %ymm0, {{[-0-9]+}}(%r{{[sb]}}p) # 32-byte Spill
; WIN64-NEXT: kmovd %eax, %k1
; WIN64-NEXT: kmovw %k1, {{[-0-9]+}}(%r{{[sb]}}p) # 2-byte Spill
; WIN64-NEXT: vmovdqa %ymm0, %ymm1
; WIN64-NEXT: callq test_argRet256Vector
; WIN64-NEXT: kmovw {{[-0-9]+}}(%r{{[sb]}}p), %k1 # 2-byte Reload
; WIN64-NEXT: vmovdqu {{[-0-9]+}}(%r{{[sb]}}p), %ymm1 # 32-byte Reload
; WIN64-NEXT: vmovdqa32 %ymm1, %ymm0 {%k1}
; WIN64-NEXT: addq $80, %rsp
; WIN64-NEXT: popq %rsp
; WIN64-NEXT: retq
; WIN64-NEXT: .seh_handlerdata
; WIN64-NEXT: .text
; WIN64-NEXT: .seh_endproc
;
; LINUXOSX64-LABEL: test_CallargRet256Vector:
; LINUXOSX64: # %bb.0:
; LINUXOSX64-NEXT: pushq %rsp
; LINUXOSX64-NEXT: .cfi_def_cfa_offset 16
; LINUXOSX64-NEXT: subq $80, %rsp
; LINUXOSX64-NEXT: .cfi_def_cfa_offset 96
; LINUXOSX64-NEXT: .cfi_offset %rsp, -16
; LINUXOSX64-NEXT: vmovdqu %ymm0, {{[-0-9]+}}(%r{{[sb]}}p) # 32-byte Spill
; LINUXOSX64-NEXT: kmovd %eax, %k1
; LINUXOSX64-NEXT: kmovw %k1, {{[-0-9]+}}(%r{{[sb]}}p) # 2-byte Spill
; LINUXOSX64-NEXT: vmovdqa %ymm0, %ymm1
; LINUXOSX64-NEXT: callq test_argRet256Vector
; LINUXOSX64-NEXT: kmovw {{[-0-9]+}}(%r{{[sb]}}p), %k1 # 2-byte Reload
; LINUXOSX64-NEXT: vmovdqu {{[-0-9]+}}(%r{{[sb]}}p), %ymm1 # 32-byte Reload
; LINUXOSX64-NEXT: vmovdqa32 %ymm1, %ymm0 {%k1}
; LINUXOSX64-NEXT: addq $80, %rsp
Correct dwarf unwind information in function epilogue This patch aims to provide correct dwarf unwind information in function epilogue for X86. It consists of two parts. The first part inserts CFI instructions that set appropriate cfa offset and cfa register in emitEpilogue() in X86FrameLowering. This part is X86 specific. The second part is platform independent and ensures that: * CFI instructions do not affect code generation (they are not counted as instructions when tail duplicating or tail merging) * Unwind information remains correct when a function is modified by different passes. This is done in a late pass by analyzing information about cfa offset and cfa register in BBs and inserting additional CFI directives where necessary. Added CFIInstrInserter pass: * analyzes each basic block to determine cfa offset and register are valid at its entry and exit * verifies that outgoing cfa offset and register of predecessor blocks match incoming values of their successors * inserts additional CFI directives at basic block beginning to correct the rule for calculating CFA Having CFI instructions in function epilogue can cause incorrect CFA calculation rule for some basic blocks. This can happen if, due to basic block reordering, or the existence of multiple epilogue blocks, some of the blocks have wrong cfa offset and register values set by the epilogue block above them. CFIInstrInserter is currently run only on X86, but can be used by any target that implements support for adding CFI instructions in epilogue. Patch by Violeta Vukobrat. Differential Revision: https://reviews.llvm.org/D42848 llvm-svn: 330706
2018-04-24 18:32:08 +08:00
; LINUXOSX64-NEXT: .cfi_def_cfa_offset 16
; LINUXOSX64-NEXT: popq %rsp
Correct dwarf unwind information in function epilogue This patch aims to provide correct dwarf unwind information in function epilogue for X86. It consists of two parts. The first part inserts CFI instructions that set appropriate cfa offset and cfa register in emitEpilogue() in X86FrameLowering. This part is X86 specific. The second part is platform independent and ensures that: * CFI instructions do not affect code generation (they are not counted as instructions when tail duplicating or tail merging) * Unwind information remains correct when a function is modified by different passes. This is done in a late pass by analyzing information about cfa offset and cfa register in BBs and inserting additional CFI directives where necessary. Added CFIInstrInserter pass: * analyzes each basic block to determine cfa offset and register are valid at its entry and exit * verifies that outgoing cfa offset and register of predecessor blocks match incoming values of their successors * inserts additional CFI directives at basic block beginning to correct the rule for calculating CFA Having CFI instructions in function epilogue can cause incorrect CFA calculation rule for some basic blocks. This can happen if, due to basic block reordering, or the existence of multiple epilogue blocks, some of the blocks have wrong cfa offset and register values set by the epilogue block above them. CFIInstrInserter is currently run only on X86, but can be used by any target that implements support for adding CFI instructions in epilogue. Patch by Violeta Vukobrat. Differential Revision: https://reviews.llvm.org/D42848 llvm-svn: 330706
2018-04-24 18:32:08 +08:00
; LINUXOSX64-NEXT: .cfi_def_cfa_offset 8
; LINUXOSX64-NEXT: retq
%b = call x86_regcallcc <8 x i32> @test_argRet256Vector(<8 x i1> %x, <8 x i32> %a, <8 x i32> %a)
%c = select <8 x i1> %x, <8 x i32> %a, <8 x i32> %b
2016-10-19 19:16:58 +08:00
ret <8 x i32> %c
}
; Test regcall when receiving/returning 512 bit vector
define x86_regcallcc <16 x i32> @test_argRet512Vector(<16 x i1> %x, <16 x i32> %a, <16 x i32> %b) {
; X32-LABEL: test_argRet512Vector:
; X32: # %bb.0:
; X32-NEXT: kmovd %eax, %k1
; X32-NEXT: vpblendmd %zmm0, %zmm1, %zmm0 {%k1}
; X32-NEXT: retl
;
; WIN64-LABEL: test_argRet512Vector:
; WIN64: # %bb.0:
; WIN64-NEXT: kmovd %eax, %k1
; WIN64-NEXT: vpblendmd %zmm0, %zmm1, %zmm0 {%k1}
; WIN64-NEXT: retq
;
; LINUXOSX64-LABEL: test_argRet512Vector:
; LINUXOSX64: # %bb.0:
; LINUXOSX64-NEXT: kmovd %eax, %k1
; LINUXOSX64-NEXT: vpblendmd %zmm0, %zmm1, %zmm0 {%k1}
; LINUXOSX64-NEXT: retq
%d = select <16 x i1> %x, <16 x i32> %a, <16 x i32> %b
2016-10-19 19:16:58 +08:00
ret <16 x i32> %d
}
; Test regcall when passing/retrieving 512 bit vector
define x86_regcallcc <16 x i32> @test_CallargRet512Vector(<16 x i1> %x, <16 x i32> %a) {
; X32-LABEL: test_CallargRet512Vector:
; X32: # %bb.0:
; X32-NEXT: pushl %esp
; X32-NEXT: subl $184, %esp
; X32-NEXT: vmovdqu64 %zmm0, {{[-0-9]+}}(%e{{[sb]}}p) # 64-byte Spill
; X32-NEXT: kmovd %eax, %k1
; X32-NEXT: kmovw %k1, {{[-0-9]+}}(%e{{[sb]}}p) # 2-byte Spill
; X32-NEXT: vmovdqa64 %zmm0, %zmm1
; X32-NEXT: calll _test_argRet512Vector
; X32-NEXT: kmovw {{[-0-9]+}}(%e{{[sb]}}p), %k1 # 2-byte Reload
; X32-NEXT: vmovdqu64 {{[-0-9]+}}(%e{{[sb]}}p), %zmm1 # 64-byte Reload
; X32-NEXT: vmovdqa32 %zmm1, %zmm0 {%k1}
; X32-NEXT: addl $184, %esp
; X32-NEXT: popl %esp
; X32-NEXT: retl
;
; WIN64-LABEL: test_CallargRet512Vector:
; WIN64: # %bb.0:
; WIN64-NEXT: pushq %rsp
; WIN64-NEXT: .seh_pushreg 4
; WIN64-NEXT: subq $176, %rsp
; WIN64-NEXT: .seh_stackalloc 176
; WIN64-NEXT: .seh_endprologue
; WIN64-NEXT: vmovdqu64 %zmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 64-byte Spill
; WIN64-NEXT: kmovd %eax, %k1
; WIN64-NEXT: kmovw %k1, {{[-0-9]+}}(%r{{[sb]}}p) # 2-byte Spill
; WIN64-NEXT: vmovdqa64 %zmm0, %zmm1
; WIN64-NEXT: callq test_argRet512Vector
; WIN64-NEXT: kmovw {{[-0-9]+}}(%r{{[sb]}}p), %k1 # 2-byte Reload
; WIN64-NEXT: vmovdqu64 {{[-0-9]+}}(%r{{[sb]}}p), %zmm1 # 64-byte Reload
; WIN64-NEXT: vmovdqa32 %zmm1, %zmm0 {%k1}
; WIN64-NEXT: addq $176, %rsp
; WIN64-NEXT: popq %rsp
; WIN64-NEXT: retq
; WIN64-NEXT: .seh_handlerdata
; WIN64-NEXT: .text
; WIN64-NEXT: .seh_endproc
;
; LINUXOSX64-LABEL: test_CallargRet512Vector:
; LINUXOSX64: # %bb.0:
; LINUXOSX64-NEXT: pushq %rsp
; LINUXOSX64-NEXT: .cfi_def_cfa_offset 16
; LINUXOSX64-NEXT: subq $176, %rsp
; LINUXOSX64-NEXT: .cfi_def_cfa_offset 192
; LINUXOSX64-NEXT: .cfi_offset %rsp, -16
; LINUXOSX64-NEXT: vmovdqu64 %zmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 64-byte Spill
; LINUXOSX64-NEXT: kmovd %eax, %k1
; LINUXOSX64-NEXT: kmovw %k1, {{[-0-9]+}}(%r{{[sb]}}p) # 2-byte Spill
; LINUXOSX64-NEXT: vmovdqa64 %zmm0, %zmm1
; LINUXOSX64-NEXT: callq test_argRet512Vector
; LINUXOSX64-NEXT: kmovw {{[-0-9]+}}(%r{{[sb]}}p), %k1 # 2-byte Reload
; LINUXOSX64-NEXT: vmovdqu64 {{[-0-9]+}}(%r{{[sb]}}p), %zmm1 # 64-byte Reload
; LINUXOSX64-NEXT: vmovdqa32 %zmm1, %zmm0 {%k1}
; LINUXOSX64-NEXT: addq $176, %rsp
Correct dwarf unwind information in function epilogue This patch aims to provide correct dwarf unwind information in function epilogue for X86. It consists of two parts. The first part inserts CFI instructions that set appropriate cfa offset and cfa register in emitEpilogue() in X86FrameLowering. This part is X86 specific. The second part is platform independent and ensures that: * CFI instructions do not affect code generation (they are not counted as instructions when tail duplicating or tail merging) * Unwind information remains correct when a function is modified by different passes. This is done in a late pass by analyzing information about cfa offset and cfa register in BBs and inserting additional CFI directives where necessary. Added CFIInstrInserter pass: * analyzes each basic block to determine cfa offset and register are valid at its entry and exit * verifies that outgoing cfa offset and register of predecessor blocks match incoming values of their successors * inserts additional CFI directives at basic block beginning to correct the rule for calculating CFA Having CFI instructions in function epilogue can cause incorrect CFA calculation rule for some basic blocks. This can happen if, due to basic block reordering, or the existence of multiple epilogue blocks, some of the blocks have wrong cfa offset and register values set by the epilogue block above them. CFIInstrInserter is currently run only on X86, but can be used by any target that implements support for adding CFI instructions in epilogue. Patch by Violeta Vukobrat. Differential Revision: https://reviews.llvm.org/D42848 llvm-svn: 330706
2018-04-24 18:32:08 +08:00
; LINUXOSX64-NEXT: .cfi_def_cfa_offset 16
; LINUXOSX64-NEXT: popq %rsp
Correct dwarf unwind information in function epilogue This patch aims to provide correct dwarf unwind information in function epilogue for X86. It consists of two parts. The first part inserts CFI instructions that set appropriate cfa offset and cfa register in emitEpilogue() in X86FrameLowering. This part is X86 specific. The second part is platform independent and ensures that: * CFI instructions do not affect code generation (they are not counted as instructions when tail duplicating or tail merging) * Unwind information remains correct when a function is modified by different passes. This is done in a late pass by analyzing information about cfa offset and cfa register in BBs and inserting additional CFI directives where necessary. Added CFIInstrInserter pass: * analyzes each basic block to determine cfa offset and register are valid at its entry and exit * verifies that outgoing cfa offset and register of predecessor blocks match incoming values of their successors * inserts additional CFI directives at basic block beginning to correct the rule for calculating CFA Having CFI instructions in function epilogue can cause incorrect CFA calculation rule for some basic blocks. This can happen if, due to basic block reordering, or the existence of multiple epilogue blocks, some of the blocks have wrong cfa offset and register values set by the epilogue block above them. CFIInstrInserter is currently run only on X86, but can be used by any target that implements support for adding CFI instructions in epilogue. Patch by Violeta Vukobrat. Differential Revision: https://reviews.llvm.org/D42848 llvm-svn: 330706
2018-04-24 18:32:08 +08:00
; LINUXOSX64-NEXT: .cfi_def_cfa_offset 8
; LINUXOSX64-NEXT: retq
%b = call x86_regcallcc <16 x i32> @test_argRet512Vector(<16 x i1> %x, <16 x i32> %a, <16 x i32> %a)
%c = select <16 x i1> %x, <16 x i32> %a, <16 x i32> %b
2016-10-19 19:16:58 +08:00
ret <16 x i32> %c
}
; Test regcall when running multiple input parameters - callee saved xmms
2016-10-19 19:16:58 +08:00
define x86_regcallcc <32 x float> @testf32_inp(<32 x float> %a, <32 x float> %b, <32 x float> %c) nounwind {
; X32-LABEL: testf32_inp:
; X32: # %bb.0:
; X32-NEXT: subl $44, %esp
; X32-NEXT: vmovups %xmm7, {{[-0-9]+}}(%e{{[sb]}}p) # 16-byte Spill
; X32-NEXT: vmovups %xmm6, (%esp) # 16-byte Spill
; X32-NEXT: vaddps %zmm2, %zmm0, %zmm6
; X32-NEXT: vaddps %zmm3, %zmm1, %zmm7
; X32-NEXT: vmulps %zmm2, %zmm0, %zmm0
; X32-NEXT: vsubps %zmm0, %zmm6, %zmm0
; X32-NEXT: vmulps %zmm3, %zmm1, %zmm1
; X32-NEXT: vsubps %zmm1, %zmm7, %zmm1
; X32-NEXT: vaddps %zmm4, %zmm0, %zmm0
; X32-NEXT: vaddps %zmm5, %zmm1, %zmm1
; X32-NEXT: vmovups (%esp), %xmm6 # 16-byte Reload
; X32-NEXT: vmovups {{[-0-9]+}}(%e{{[sb]}}p), %xmm7 # 16-byte Reload
; X32-NEXT: addl $44, %esp
; X32-NEXT: retl
;
; WIN64-LABEL: testf32_inp:
; WIN64: # %bb.0:
; WIN64-NEXT: vaddps %zmm2, %zmm0, %zmm6
; WIN64-NEXT: vaddps %zmm3, %zmm1, %zmm7
; WIN64-NEXT: vmulps %zmm2, %zmm0, %zmm0
; WIN64-NEXT: vsubps %zmm0, %zmm6, %zmm0
; WIN64-NEXT: vmulps %zmm3, %zmm1, %zmm1
; WIN64-NEXT: vsubps %zmm1, %zmm7, %zmm1
; WIN64-NEXT: vaddps %zmm4, %zmm0, %zmm0
; WIN64-NEXT: vaddps %zmm5, %zmm1, %zmm1
; WIN64-NEXT: retq
;
; LINUXOSX64-LABEL: testf32_inp:
; LINUXOSX64: # %bb.0:
; LINUXOSX64-NEXT: vaddps %zmm2, %zmm0, %zmm6
; LINUXOSX64-NEXT: vaddps %zmm3, %zmm1, %zmm7
; LINUXOSX64-NEXT: vmulps %zmm2, %zmm0, %zmm0
; LINUXOSX64-NEXT: vsubps %zmm0, %zmm6, %zmm0
; LINUXOSX64-NEXT: vmulps %zmm3, %zmm1, %zmm1
; LINUXOSX64-NEXT: vsubps %zmm1, %zmm7, %zmm1
; LINUXOSX64-NEXT: vaddps %zmm4, %zmm0, %zmm0
; LINUXOSX64-NEXT: vaddps %zmm5, %zmm1, %zmm1
; LINUXOSX64-NEXT: retq
2016-10-19 19:16:58 +08:00
%x1 = fadd <32 x float> %a, %b
%x2 = fmul <32 x float> %a, %b
%x3 = fsub <32 x float> %x1, %x2
%x4 = fadd <32 x float> %x3, %c
ret <32 x float> %x4
}
; Test regcall when running multiple input parameters - callee saved GPRs
define x86_regcallcc i32 @testi32_inp(i32 %a1, i32 %a2, i32 %a3, i32 %a4, i32 %a5, i32 %a6, i32 %b1, i32 %b2, i32 %b3, i32 %b4, i32 %b5, i32 %b6) nounwind {
; X32-LABEL: testi32_inp:
; X32: # %bb.0:
; X32-NEXT: pushl %ebp
; X32-NEXT: pushl %ebx
; X32-NEXT: subl $20, %esp
; X32-NEXT: movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
; X32-NEXT: movl %edi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
; X32-NEXT: movl %edx, (%esp) # 4-byte Spill
; X32-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
; X32-NEXT: movl %eax, %ebx
; X32-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
; X32-NEXT: subl %ecx, %ebx
; X32-NEXT: movl {{[0-9]+}}(%esp), %esi
; X32-NEXT: movl %esi, %ebp
; X32-NEXT: subl {{[0-9]+}}(%esp), %ebp
; X32-NEXT: imull %ebp, %ebx
; X32-NEXT: movl %edx, %ebp
; X32-NEXT: subl %edi, %ebp
; X32-NEXT: movl {{[0-9]+}}(%esp), %edx
; X32-NEXT: movl %edx, %ecx
; X32-NEXT: subl {{[0-9]+}}(%esp), %ecx
; X32-NEXT: imull %ebp, %ecx
; X32-NEXT: addl %ecx, %ebx
; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Reload
; X32-NEXT: movl %edi, %ebp
; X32-NEXT: subl {{[0-9]+}}(%esp), %ebp
; X32-NEXT: movl {{[0-9]+}}(%esp), %ecx
; X32-NEXT: movl %ecx, %eax
; X32-NEXT: subl {{[0-9]+}}(%esp), %eax
; X32-NEXT: imull %ebp, %eax
; X32-NEXT: addl %eax, %ebx
; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
; X32-NEXT: addl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Folded Reload
; X32-NEXT: movl (%esp), %ebp # 4-byte Reload
; X32-NEXT: addl {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Folded Reload
; X32-NEXT: addl {{[0-9]+}}(%esp), %edi
; X32-NEXT: addl {{[0-9]+}}(%esp), %esi
; X32-NEXT: imull %eax, %esi
; X32-NEXT: addl {{[0-9]+}}(%esp), %edx
; X32-NEXT: imull %ebp, %edx
; X32-NEXT: addl %esi, %edx
; X32-NEXT: addl {{[0-9]+}}(%esp), %ecx
; X32-NEXT: imull %edi, %ecx
; X32-NEXT: addl %edx, %ecx
; X32-NEXT: addl %ecx, %ebx
; X32-NEXT: movl %ebx, %eax
; X32-NEXT: addl $20, %esp
; X32-NEXT: popl %ebx
; X32-NEXT: popl %ebp
; X32-NEXT: retl
;
; WIN64-LABEL: testi32_inp:
; WIN64: # %bb.0:
; WIN64-NEXT: pushq %r13
; WIN64-NEXT: pushq %rbp
; WIN64-NEXT: pushq %rbx
; WIN64-NEXT: movl %eax, %r13d
; WIN64-NEXT: subl %ecx, %eax
; WIN64-NEXT: movl %edx, %ebp
; WIN64-NEXT: subl %edi, %ebp
; WIN64-NEXT: movl %r9d, %ebx
; WIN64-NEXT: subl %r10d, %ebx
; WIN64-NEXT: imull %ebx, %eax
; WIN64-NEXT: movl %r11d, %ebx
; WIN64-NEXT: subl %r12d, %ebx
; WIN64-NEXT: imull %ebp, %ebx
; WIN64-NEXT: movl %esi, %ebp
; WIN64-NEXT: subl %r8d, %ebp
; WIN64-NEXT: addl %ebx, %eax
; WIN64-NEXT: movl %r14d, %ebx
; WIN64-NEXT: subl %r15d, %ebx
; WIN64-NEXT: imull %ebp, %ebx
; WIN64-NEXT: addl %ebx, %eax
; WIN64-NEXT: addl %ecx, %r13d
; WIN64-NEXT: addl %edi, %edx
; WIN64-NEXT: addl %r8d, %esi
; WIN64-NEXT: addl %r10d, %r9d
; WIN64-NEXT: imull %r13d, %r9d
; WIN64-NEXT: addl %r12d, %r11d
; WIN64-NEXT: imull %edx, %r11d
; WIN64-NEXT: addl %r9d, %r11d
; WIN64-NEXT: addl %r15d, %r14d
; WIN64-NEXT: imull %esi, %r14d
; WIN64-NEXT: addl %r11d, %r14d
; WIN64-NEXT: addl %r14d, %eax
; WIN64-NEXT: popq %rbx
; WIN64-NEXT: popq %rbp
; WIN64-NEXT: popq %r13
; WIN64-NEXT: retq
;
; LINUXOSX64-LABEL: testi32_inp:
; LINUXOSX64: # %bb.0:
; LINUXOSX64-NEXT: pushq %rbp
; LINUXOSX64-NEXT: pushq %rbx
; LINUXOSX64-NEXT: movl %eax, %r10d
; LINUXOSX64-NEXT: movl {{[0-9]+}}(%rsp), %r11d
; LINUXOSX64-NEXT: subl %ecx, %eax
; LINUXOSX64-NEXT: movl %edx, %ebx
; LINUXOSX64-NEXT: subl %edi, %ebx
; LINUXOSX64-NEXT: movl %r9d, %ebp
; LINUXOSX64-NEXT: subl %r12d, %ebp
; LINUXOSX64-NEXT: imull %ebp, %eax
; LINUXOSX64-NEXT: movl %r13d, %ebp
; LINUXOSX64-NEXT: subl %r14d, %ebp
; LINUXOSX64-NEXT: imull %ebx, %ebp
; LINUXOSX64-NEXT: movl %esi, %ebx
; LINUXOSX64-NEXT: subl %r8d, %ebx
; LINUXOSX64-NEXT: addl %ebp, %eax
; LINUXOSX64-NEXT: movl %r15d, %ebp
; LINUXOSX64-NEXT: subl %r11d, %ebp
; LINUXOSX64-NEXT: imull %ebx, %ebp
; LINUXOSX64-NEXT: addl %ebp, %eax
; LINUXOSX64-NEXT: addl %ecx, %r10d
; LINUXOSX64-NEXT: addl %edi, %edx
; LINUXOSX64-NEXT: addl %r8d, %esi
; LINUXOSX64-NEXT: addl %r12d, %r9d
; LINUXOSX64-NEXT: imull %r10d, %r9d
; LINUXOSX64-NEXT: addl %r14d, %r13d
; LINUXOSX64-NEXT: imull %edx, %r13d
; LINUXOSX64-NEXT: addl %r9d, %r13d
; LINUXOSX64-NEXT: addl %r11d, %r15d
; LINUXOSX64-NEXT: imull %esi, %r15d
; LINUXOSX64-NEXT: addl %r13d, %r15d
; LINUXOSX64-NEXT: addl %r15d, %eax
; LINUXOSX64-NEXT: popq %rbx
; LINUXOSX64-NEXT: popq %rbp
; LINUXOSX64-NEXT: retq
2016-10-19 19:16:58 +08:00
%x1 = sub i32 %a1, %a2
%x2 = sub i32 %a3, %a4
%x3 = sub i32 %a5, %a6
%y1 = sub i32 %b1, %b2
%y2 = sub i32 %b3, %b4
%y3 = sub i32 %b5, %b6
%v1 = add i32 %a1, %a2
%v2 = add i32 %a3, %a4
%v3 = add i32 %a5, %a6
%w1 = add i32 %b1, %b2
%w2 = add i32 %b3, %b4
%w3 = add i32 %b5, %b6
%s1 = mul i32 %x1, %y1
%s2 = mul i32 %x2, %y2
%s3 = mul i32 %x3, %y3
%t1 = mul i32 %v1, %w1
%t2 = mul i32 %v2, %w2
%t3 = mul i32 %v3, %w3
%m1 = add i32 %s1, %s2
%m2 = add i32 %m1, %s3
%n1 = add i32 %t1, %t2
%n2 = add i32 %n1, %t3
%r1 = add i32 %m2, %n2
ret i32 %r1
}
; Test that parameters, overflowing register capacity, are passed through the stack
define x86_regcallcc <32 x float> @testf32_stack(<32 x float> %a0, <32 x float> %b0, <32 x float> %c0, <32 x float> %a1, <32 x float> %b1, <32 x float> %c1, <32 x float> %a2, <32 x float> %b2, <32 x float> %c2) nounwind {
; X32-LABEL: testf32_stack:
; X32: # %bb.0:
; X32-NEXT: pushl %ebp
; X32-NEXT: movl %esp, %ebp
; X32-NEXT: andl $-64, %esp
; X32-NEXT: subl $64, %esp
; X32-NEXT: vaddps %zmm3, %zmm1, %zmm1
; X32-NEXT: vaddps %zmm2, %zmm0, %zmm0
; X32-NEXT: vaddps %zmm0, %zmm4, %zmm0
; X32-NEXT: vaddps %zmm1, %zmm5, %zmm1
; X32-NEXT: vaddps %zmm1, %zmm7, %zmm1
; X32-NEXT: vaddps %zmm0, %zmm6, %zmm0
; X32-NEXT: vaddps 8(%ebp), %zmm0, %zmm0
; X32-NEXT: vaddps 72(%ebp), %zmm1, %zmm1
; X32-NEXT: vaddps 200(%ebp), %zmm1, %zmm1
; X32-NEXT: vaddps 136(%ebp), %zmm0, %zmm0
; X32-NEXT: vaddps 264(%ebp), %zmm0, %zmm0
; X32-NEXT: vaddps 328(%ebp), %zmm1, %zmm1
; X32-NEXT: vaddps 456(%ebp), %zmm1, %zmm1
; X32-NEXT: vaddps 392(%ebp), %zmm0, %zmm0
; X32-NEXT: vaddps 520(%ebp), %zmm0, %zmm0
; X32-NEXT: vaddps 584(%ebp), %zmm1, %zmm1
; X32-NEXT: movl %ebp, %esp
; X32-NEXT: popl %ebp
; X32-NEXT: retl
;
; WIN64-LABEL: testf32_stack:
; WIN64: # %bb.0:
; WIN64-NEXT: pushq %rbp
; WIN64-NEXT: subq $48, %rsp
; WIN64-NEXT: leaq {{[0-9]+}}(%rsp), %rbp
; WIN64-NEXT: andq $-64, %rsp
; WIN64-NEXT: vaddps %zmm3, %zmm1, %zmm1
; WIN64-NEXT: vaddps %zmm2, %zmm0, %zmm0
; WIN64-NEXT: vaddps %zmm0, %zmm4, %zmm0
; WIN64-NEXT: vaddps %zmm1, %zmm5, %zmm1
; WIN64-NEXT: vaddps %zmm1, %zmm7, %zmm1
; WIN64-NEXT: vaddps %zmm0, %zmm6, %zmm0
; WIN64-NEXT: vaddps %zmm0, %zmm8, %zmm0
; WIN64-NEXT: vaddps %zmm1, %zmm9, %zmm1
; WIN64-NEXT: vaddps %zmm1, %zmm11, %zmm1
; WIN64-NEXT: vaddps %zmm0, %zmm10, %zmm0
; WIN64-NEXT: vaddps %zmm0, %zmm12, %zmm0
; WIN64-NEXT: vaddps %zmm1, %zmm13, %zmm1
; WIN64-NEXT: vaddps %zmm1, %zmm15, %zmm1
; WIN64-NEXT: vaddps %zmm0, %zmm14, %zmm0
; WIN64-NEXT: vaddps 16(%rbp), %zmm0, %zmm0
; WIN64-NEXT: vaddps 80(%rbp), %zmm1, %zmm1
; WIN64-NEXT: movq %rbp, %rsp
; WIN64-NEXT: popq %rbp
; WIN64-NEXT: retq
;
; LINUXOSX64-LABEL: testf32_stack:
; LINUXOSX64: # %bb.0:
; LINUXOSX64-NEXT: pushq %rbp
; LINUXOSX64-NEXT: movq %rsp, %rbp
; LINUXOSX64-NEXT: andq $-64, %rsp
; LINUXOSX64-NEXT: subq $64, %rsp
; LINUXOSX64-NEXT: vaddps %zmm3, %zmm1, %zmm1
; LINUXOSX64-NEXT: vaddps %zmm2, %zmm0, %zmm0
; LINUXOSX64-NEXT: vaddps %zmm0, %zmm4, %zmm0
; LINUXOSX64-NEXT: vaddps %zmm1, %zmm5, %zmm1
; LINUXOSX64-NEXT: vaddps %zmm1, %zmm7, %zmm1
; LINUXOSX64-NEXT: vaddps %zmm0, %zmm6, %zmm0
; LINUXOSX64-NEXT: vaddps %zmm0, %zmm8, %zmm0
; LINUXOSX64-NEXT: vaddps %zmm1, %zmm9, %zmm1
; LINUXOSX64-NEXT: vaddps %zmm1, %zmm11, %zmm1
; LINUXOSX64-NEXT: vaddps %zmm0, %zmm10, %zmm0
; LINUXOSX64-NEXT: vaddps %zmm0, %zmm12, %zmm0
; LINUXOSX64-NEXT: vaddps %zmm1, %zmm13, %zmm1
; LINUXOSX64-NEXT: vaddps %zmm1, %zmm15, %zmm1
; LINUXOSX64-NEXT: vaddps %zmm0, %zmm14, %zmm0
; LINUXOSX64-NEXT: vaddps 16(%rbp), %zmm0, %zmm0
; LINUXOSX64-NEXT: vaddps 80(%rbp), %zmm1, %zmm1
; LINUXOSX64-NEXT: movq %rbp, %rsp
; LINUXOSX64-NEXT: popq %rbp
; LINUXOSX64-NEXT: retq
2016-10-19 19:16:58 +08:00
%x1 = fadd <32 x float> %a0, %b0
%x2 = fadd <32 x float> %c0, %x1
%x3 = fadd <32 x float> %a1, %x2
%x4 = fadd <32 x float> %b1, %x3
%x5 = fadd <32 x float> %c1, %x4
%x6 = fadd <32 x float> %a2, %x5
%x7 = fadd <32 x float> %b2, %x6
%x8 = fadd <32 x float> %c2, %x7
ret <32 x float> %x8
}
; Test regcall when passing/retrieving mixed types
define x86_regcallcc i32 @test_argRetMixTypes(double, float, i8 signext, i32, i64, i16 signext, i32*) #0 {
; X32-LABEL: test_argRetMixTypes:
; X32: # %bb.0:
; X32-NEXT: pushl %ebx
; X32-NEXT: movl {{[0-9]+}}(%esp), %ebx
; X32-NEXT: vcvtss2sd %xmm1, %xmm1, %xmm1
; X32-NEXT: vaddsd %xmm0, %xmm1, %xmm0
; X32-NEXT: vcvtsi2sdl %eax, %xmm2, %xmm1
; X32-NEXT: vaddsd %xmm1, %xmm0, %xmm0
; X32-NEXT: vcvtsi2sdl %ecx, %xmm2, %xmm1
; X32-NEXT: vaddsd %xmm1, %xmm0, %xmm0
; X32-NEXT: vmovd %edx, %xmm1
; X32-NEXT: vpinsrd $1, %edi, %xmm1, %xmm1
; X32-NEXT: vcvtqq2pd %ymm1, %ymm1
; X32-NEXT: vaddsd %xmm1, %xmm0, %xmm0
; X32-NEXT: vcvtsi2sdl %esi, %xmm2, %xmm1
; X32-NEXT: vaddsd %xmm1, %xmm0, %xmm0
; X32-NEXT: vcvtsi2sdl (%ebx), %xmm2, %xmm1
; X32-NEXT: vaddsd %xmm1, %xmm0, %xmm0
; X32-NEXT: vcvttsd2si %xmm0, %eax
; X32-NEXT: popl %ebx
; X32-NEXT: vzeroupper
; X32-NEXT: retl
;
; WIN64-LABEL: test_argRetMixTypes:
; WIN64: # %bb.0:
; WIN64-NEXT: vcvtss2sd %xmm1, %xmm1, %xmm1
; WIN64-NEXT: vaddsd %xmm0, %xmm1, %xmm0
; WIN64-NEXT: vcvtsi2sdl %eax, %xmm2, %xmm1
; WIN64-NEXT: vaddsd %xmm1, %xmm0, %xmm0
; WIN64-NEXT: vcvtsi2sdl %ecx, %xmm2, %xmm1
; WIN64-NEXT: vaddsd %xmm1, %xmm0, %xmm0
; WIN64-NEXT: vcvtsi2sdq %rdx, %xmm2, %xmm1
; WIN64-NEXT: vaddsd %xmm1, %xmm0, %xmm0
; WIN64-NEXT: vcvtsi2sdl %edi, %xmm2, %xmm1
; WIN64-NEXT: vaddsd %xmm1, %xmm0, %xmm0
; WIN64-NEXT: vcvtsi2sdl (%rsi), %xmm2, %xmm1
; WIN64-NEXT: vaddsd %xmm1, %xmm0, %xmm0
; WIN64-NEXT: vcvttsd2si %xmm0, %eax
; WIN64-NEXT: retq
;
; LINUXOSX64-LABEL: test_argRetMixTypes:
; LINUXOSX64: # %bb.0:
; LINUXOSX64-NEXT: vcvtss2sd %xmm1, %xmm1, %xmm1
; LINUXOSX64-NEXT: vaddsd %xmm0, %xmm1, %xmm0
; LINUXOSX64-NEXT: vcvtsi2sdl %eax, %xmm2, %xmm1
; LINUXOSX64-NEXT: vaddsd %xmm1, %xmm0, %xmm0
; LINUXOSX64-NEXT: vcvtsi2sdl %ecx, %xmm2, %xmm1
; LINUXOSX64-NEXT: vaddsd %xmm1, %xmm0, %xmm0
; LINUXOSX64-NEXT: vcvtsi2sdq %rdx, %xmm2, %xmm1
; LINUXOSX64-NEXT: vaddsd %xmm1, %xmm0, %xmm0
; LINUXOSX64-NEXT: vcvtsi2sdl %edi, %xmm2, %xmm1
; LINUXOSX64-NEXT: vaddsd %xmm1, %xmm0, %xmm0
; LINUXOSX64-NEXT: vcvtsi2sdl (%rsi), %xmm2, %xmm1
; LINUXOSX64-NEXT: vaddsd %xmm1, %xmm0, %xmm0
; LINUXOSX64-NEXT: vcvttsd2si %xmm0, %eax
; LINUXOSX64-NEXT: retq
2016-10-19 19:16:58 +08:00
%8 = fpext float %1 to double
%9 = fadd double %8, %0
%10 = sitofp i8 %2 to double
%11 = fadd double %9, %10
%12 = sitofp i32 %3 to double
%13 = fadd double %11, %12
%14 = sitofp i64 %4 to double
%15 = fadd double %13, %14
%16 = sitofp i16 %5 to double
%17 = fadd double %15, %16
%18 = load i32, i32* %6, align 4
%19 = sitofp i32 %18 to double
%20 = fadd double %17, %19
%21 = fptosi double %20 to i32
ret i32 %21
}
%struct.complex = type { float, double, i32, i8, i64}
define x86_regcallcc %struct.complex @test_argMultiRet(float, double, i32, i8, i64) local_unnamed_addr #0 {
; X32-LABEL: test_argMultiRet:
; X32: # %bb.0:
; X32-NEXT: vaddsd __real@4014000000000000, %xmm1, %xmm1
; X32-NEXT: movl $4, %eax
; X32-NEXT: movb $7, %cl
; X32-NEXT: movl $999, %edx # imm = 0x3E7
; X32-NEXT: xorl %edi, %edi
; X32-NEXT: retl
;
; WIN64-LABEL: test_argMultiRet:
; WIN64: # %bb.0:
; WIN64-NEXT: vaddsd __real@{{.*}}(%rip), %xmm1, %xmm1
; WIN64-NEXT: movl $999, %edx # imm = 0x3E7
; WIN64-NEXT: movl $4, %eax
; WIN64-NEXT: movb $7, %cl
; WIN64-NEXT: retq
;
; LINUXOSX64-LABEL: test_argMultiRet:
; LINUXOSX64: # %bb.0:
; LINUXOSX64-NEXT: vaddsd {{.*}}(%rip), %xmm1, %xmm1
; LINUXOSX64-NEXT: movl $999, %edx # imm = 0x3E7
; LINUXOSX64-NEXT: movl $4, %eax
; LINUXOSX64-NEXT: movb $7, %cl
; LINUXOSX64-NEXT: retq
2016-10-19 19:16:58 +08:00
%6 = fadd double %1, 5.000000e+00
%7 = insertvalue %struct.complex undef, float %0, 0
%8 = insertvalue %struct.complex %7, double %6, 1
%9 = insertvalue %struct.complex %8, i32 4, 2
%10 = insertvalue %struct.complex %9, i8 7, 3
%11 = insertvalue %struct.complex %10, i64 999, 4
ret %struct.complex %11
}