llvm-project/llvm/test/CodeGen/X86/avx512-regcall-NoMask.ll

Ignoring revisions in .git-blame-ignore-revs. Click here to bypass and see the normal blame view.

1309 lines
45 KiB
LLVM
Raw Normal View History

; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
; RUN: llc < %s -mtriple=i386-pc-win32 -mattr=+avx512f -mattr=+avx512vl -mattr=+avx512bw -mattr=+avx512dq -verify-machineinstrs | FileCheck %s --check-prefix=X32
; RUN: llc < %s -mtriple=x86_64-win32 -mattr=+avx512f -mattr=+avx512vl -mattr=+avx512bw -mattr=+avx512dq -verify-machineinstrs | FileCheck %s --check-prefix=WIN64
; RUN: llc < %s -mtriple=x86_64-linux-gnu -mattr=+avx512f -mattr=+avx512vl -mattr=+avx512bw -mattr=+avx512dq -verify-machineinstrs | FileCheck %s --check-prefix=LINUXOSX64
2016-10-19 19:16:58 +08:00
; Test regcall when receiving/returning i1
define dso_local x86_regcallcc i1 @test_argReti1(i1 %a) {
; X32-LABEL: test_argReti1:
; X32: # %bb.0:
; X32-NEXT: incb %al
; X32-NEXT: # kill: def $al killed $al killed $eax
; X32-NEXT: retl
;
; WIN64-LABEL: test_argReti1:
; WIN64: # %bb.0:
; WIN64-NEXT: incb %al
; WIN64-NEXT: # kill: def $al killed $al killed $eax
; WIN64-NEXT: retq
;
; LINUXOSX64-LABEL: test_argReti1:
; LINUXOSX64: # %bb.0:
; LINUXOSX64-NEXT: incb %al
; LINUXOSX64-NEXT: # kill: def $al killed $al killed $eax
; LINUXOSX64-NEXT: retq
2016-10-19 19:16:58 +08:00
%add = add i1 %a, 1
ret i1 %add
}
; Test regcall when passing/retrieving i1
define dso_local x86_regcallcc i1 @test_CallargReti1(i1 %a) {
; X32-LABEL: test_CallargReti1:
; X32: # %bb.0:
; X32-NEXT: incb %al
; X32-NEXT: movzbl %al, %eax
; X32-NEXT: calll _test_argReti1
; X32-NEXT: incb %al
; X32-NEXT: retl
;
; WIN64-LABEL: test_CallargReti1:
; WIN64: # %bb.0:
; WIN64-NEXT: pushq %rax
; WIN64-NEXT: .seh_stackalloc 8
; WIN64-NEXT: .seh_endprologue
; WIN64-NEXT: incb %al
; WIN64-NEXT: movzbl %al, %eax
; WIN64-NEXT: callq test_argReti1
; WIN64-NEXT: incb %al
; WIN64-NEXT: popq %rcx
; WIN64-NEXT: retq
; WIN64-NEXT: .seh_endproc
;
; LINUXOSX64-LABEL: test_CallargReti1:
; LINUXOSX64: # %bb.0:
; LINUXOSX64-NEXT: pushq %rax
; LINUXOSX64-NEXT: .cfi_def_cfa_offset 16
; LINUXOSX64-NEXT: incb %al
; LINUXOSX64-NEXT: movzbl %al, %eax
; LINUXOSX64-NEXT: callq test_argReti1
; LINUXOSX64-NEXT: incb %al
; LINUXOSX64-NEXT: popq %rcx
Correct dwarf unwind information in function epilogue This patch aims to provide correct dwarf unwind information in function epilogue for X86. It consists of two parts. The first part inserts CFI instructions that set appropriate cfa offset and cfa register in emitEpilogue() in X86FrameLowering. This part is X86 specific. The second part is platform independent and ensures that: * CFI instructions do not affect code generation (they are not counted as instructions when tail duplicating or tail merging) * Unwind information remains correct when a function is modified by different passes. This is done in a late pass by analyzing information about cfa offset and cfa register in BBs and inserting additional CFI directives where necessary. Added CFIInstrInserter pass: * analyzes each basic block to determine cfa offset and register are valid at its entry and exit * verifies that outgoing cfa offset and register of predecessor blocks match incoming values of their successors * inserts additional CFI directives at basic block beginning to correct the rule for calculating CFA Having CFI instructions in function epilogue can cause incorrect CFA calculation rule for some basic blocks. This can happen if, due to basic block reordering, or the existence of multiple epilogue blocks, some of the blocks have wrong cfa offset and register values set by the epilogue block above them. CFIInstrInserter is currently run only on X86, but can be used by any target that implements support for adding CFI instructions in epilogue. Patch by Violeta Vukobrat. Differential Revision: https://reviews.llvm.org/D42848 llvm-svn: 330706
2018-04-24 18:32:08 +08:00
; LINUXOSX64-NEXT: .cfi_def_cfa_offset 8
; LINUXOSX64-NEXT: retq
2016-10-19 19:16:58 +08:00
%b = add i1 %a, 1
%c = call x86_regcallcc i1 @test_argReti1(i1 %b)
%d = add i1 %c, 1
ret i1 %d
}
; Test regcall when receiving/returning i8
define dso_local x86_regcallcc i8 @test_argReti8(i8 %a) {
; X32-LABEL: test_argReti8:
; X32: # %bb.0:
; X32-NEXT: incb %al
; X32-NEXT: # kill: def $al killed $al killed $eax
; X32-NEXT: retl
;
; WIN64-LABEL: test_argReti8:
; WIN64: # %bb.0:
; WIN64-NEXT: incb %al
; WIN64-NEXT: # kill: def $al killed $al killed $eax
; WIN64-NEXT: retq
;
; LINUXOSX64-LABEL: test_argReti8:
; LINUXOSX64: # %bb.0:
; LINUXOSX64-NEXT: incb %al
; LINUXOSX64-NEXT: # kill: def $al killed $al killed $eax
; LINUXOSX64-NEXT: retq
2016-10-19 19:16:58 +08:00
%add = add i8 %a, 1
ret i8 %add
}
; Test regcall when passing/retrieving i8
define dso_local x86_regcallcc i8 @test_CallargReti8(i8 %a) {
; X32-LABEL: test_CallargReti8:
; X32: # %bb.0:
; X32-NEXT: incb %al
; X32-NEXT: movzbl %al, %eax
; X32-NEXT: calll _test_argReti8
; X32-NEXT: incb %al
; X32-NEXT: retl
;
; WIN64-LABEL: test_CallargReti8:
; WIN64: # %bb.0:
; WIN64-NEXT: pushq %rax
; WIN64-NEXT: .seh_stackalloc 8
; WIN64-NEXT: .seh_endprologue
; WIN64-NEXT: incb %al
; WIN64-NEXT: movzbl %al, %eax
; WIN64-NEXT: callq test_argReti8
; WIN64-NEXT: incb %al
; WIN64-NEXT: popq %rcx
; WIN64-NEXT: retq
; WIN64-NEXT: .seh_endproc
;
; LINUXOSX64-LABEL: test_CallargReti8:
; LINUXOSX64: # %bb.0:
; LINUXOSX64-NEXT: pushq %rax
; LINUXOSX64-NEXT: .cfi_def_cfa_offset 16
; LINUXOSX64-NEXT: incb %al
; LINUXOSX64-NEXT: movzbl %al, %eax
; LINUXOSX64-NEXT: callq test_argReti8
; LINUXOSX64-NEXT: incb %al
; LINUXOSX64-NEXT: popq %rcx
Correct dwarf unwind information in function epilogue This patch aims to provide correct dwarf unwind information in function epilogue for X86. It consists of two parts. The first part inserts CFI instructions that set appropriate cfa offset and cfa register in emitEpilogue() in X86FrameLowering. This part is X86 specific. The second part is platform independent and ensures that: * CFI instructions do not affect code generation (they are not counted as instructions when tail duplicating or tail merging) * Unwind information remains correct when a function is modified by different passes. This is done in a late pass by analyzing information about cfa offset and cfa register in BBs and inserting additional CFI directives where necessary. Added CFIInstrInserter pass: * analyzes each basic block to determine cfa offset and register are valid at its entry and exit * verifies that outgoing cfa offset and register of predecessor blocks match incoming values of their successors * inserts additional CFI directives at basic block beginning to correct the rule for calculating CFA Having CFI instructions in function epilogue can cause incorrect CFA calculation rule for some basic blocks. This can happen if, due to basic block reordering, or the existence of multiple epilogue blocks, some of the blocks have wrong cfa offset and register values set by the epilogue block above them. CFIInstrInserter is currently run only on X86, but can be used by any target that implements support for adding CFI instructions in epilogue. Patch by Violeta Vukobrat. Differential Revision: https://reviews.llvm.org/D42848 llvm-svn: 330706
2018-04-24 18:32:08 +08:00
; LINUXOSX64-NEXT: .cfi_def_cfa_offset 8
; LINUXOSX64-NEXT: retq
2016-10-19 19:16:58 +08:00
%b = add i8 %a, 1
%c = call x86_regcallcc i8 @test_argReti8(i8 %b)
%d = add i8 %c, 1
ret i8 %d
}
; Test regcall when receiving/returning i16
define dso_local x86_regcallcc i16 @test_argReti16(i16 %a) {
; X32-LABEL: test_argReti16:
; X32: # %bb.0:
; X32-NEXT: incl %eax
; X32-NEXT: # kill: def $ax killed $ax killed $eax
; X32-NEXT: retl
;
; WIN64-LABEL: test_argReti16:
; WIN64: # %bb.0:
; WIN64-NEXT: incl %eax
; WIN64-NEXT: # kill: def $ax killed $ax killed $eax
; WIN64-NEXT: retq
;
; LINUXOSX64-LABEL: test_argReti16:
; LINUXOSX64: # %bb.0:
; LINUXOSX64-NEXT: incl %eax
; LINUXOSX64-NEXT: # kill: def $ax killed $ax killed $eax
; LINUXOSX64-NEXT: retq
2016-10-19 19:16:58 +08:00
%add = add i16 %a, 1
ret i16 %add
}
; Test regcall when passing/retrieving i16
define dso_local x86_regcallcc i16 @test_CallargReti16(i16 %a) {
; X32-LABEL: test_CallargReti16:
; X32: # %bb.0:
; X32-NEXT: incl %eax
; X32-NEXT: calll _test_argReti16
; X32-NEXT: # kill: def $ax killed $ax def $eax
; X32-NEXT: incl %eax
; X32-NEXT: # kill: def $ax killed $ax killed $eax
; X32-NEXT: retl
;
; WIN64-LABEL: test_CallargReti16:
; WIN64: # %bb.0:
; WIN64-NEXT: pushq %rax
; WIN64-NEXT: .seh_stackalloc 8
; WIN64-NEXT: .seh_endprologue
; WIN64-NEXT: incl %eax
; WIN64-NEXT: callq test_argReti16
; WIN64-NEXT: # kill: def $ax killed $ax def $eax
; WIN64-NEXT: incl %eax
; WIN64-NEXT: # kill: def $ax killed $ax killed $eax
; WIN64-NEXT: popq %rcx
; WIN64-NEXT: retq
; WIN64-NEXT: .seh_endproc
;
; LINUXOSX64-LABEL: test_CallargReti16:
; LINUXOSX64: # %bb.0:
; LINUXOSX64-NEXT: pushq %rax
; LINUXOSX64-NEXT: .cfi_def_cfa_offset 16
; LINUXOSX64-NEXT: incl %eax
; LINUXOSX64-NEXT: callq test_argReti16
; LINUXOSX64-NEXT: # kill: def $ax killed $ax def $eax
; LINUXOSX64-NEXT: incl %eax
; LINUXOSX64-NEXT: # kill: def $ax killed $ax killed $eax
; LINUXOSX64-NEXT: popq %rcx
Correct dwarf unwind information in function epilogue This patch aims to provide correct dwarf unwind information in function epilogue for X86. It consists of two parts. The first part inserts CFI instructions that set appropriate cfa offset and cfa register in emitEpilogue() in X86FrameLowering. This part is X86 specific. The second part is platform independent and ensures that: * CFI instructions do not affect code generation (they are not counted as instructions when tail duplicating or tail merging) * Unwind information remains correct when a function is modified by different passes. This is done in a late pass by analyzing information about cfa offset and cfa register in BBs and inserting additional CFI directives where necessary. Added CFIInstrInserter pass: * analyzes each basic block to determine cfa offset and register are valid at its entry and exit * verifies that outgoing cfa offset and register of predecessor blocks match incoming values of their successors * inserts additional CFI directives at basic block beginning to correct the rule for calculating CFA Having CFI instructions in function epilogue can cause incorrect CFA calculation rule for some basic blocks. This can happen if, due to basic block reordering, or the existence of multiple epilogue blocks, some of the blocks have wrong cfa offset and register values set by the epilogue block above them. CFIInstrInserter is currently run only on X86, but can be used by any target that implements support for adding CFI instructions in epilogue. Patch by Violeta Vukobrat. Differential Revision: https://reviews.llvm.org/D42848 llvm-svn: 330706
2018-04-24 18:32:08 +08:00
; LINUXOSX64-NEXT: .cfi_def_cfa_offset 8
; LINUXOSX64-NEXT: retq
2016-10-19 19:16:58 +08:00
%b = add i16 %a, 1
%c = call x86_regcallcc i16 @test_argReti16(i16 %b)
%d = add i16 %c, 1
ret i16 %d
}
; Test regcall when receiving/returning i32
define dso_local x86_regcallcc i32 @test_argReti32(i32 %a) {
; X32-LABEL: test_argReti32:
; X32: # %bb.0:
; X32-NEXT: incl %eax
; X32-NEXT: retl
;
; WIN64-LABEL: test_argReti32:
; WIN64: # %bb.0:
; WIN64-NEXT: incl %eax
; WIN64-NEXT: retq
;
; LINUXOSX64-LABEL: test_argReti32:
; LINUXOSX64: # %bb.0:
; LINUXOSX64-NEXT: incl %eax
; LINUXOSX64-NEXT: retq
2016-10-19 19:16:58 +08:00
%add = add i32 %a, 1
ret i32 %add
}
; Test regcall when passing/retrieving i32
define dso_local x86_regcallcc i32 @test_CallargReti32(i32 %a) {
; X32-LABEL: test_CallargReti32:
; X32: # %bb.0:
; X32-NEXT: incl %eax
; X32-NEXT: calll _test_argReti32
; X32-NEXT: incl %eax
; X32-NEXT: retl
;
; WIN64-LABEL: test_CallargReti32:
; WIN64: # %bb.0:
; WIN64-NEXT: pushq %rax
; WIN64-NEXT: .seh_stackalloc 8
; WIN64-NEXT: .seh_endprologue
; WIN64-NEXT: incl %eax
; WIN64-NEXT: callq test_argReti32
; WIN64-NEXT: incl %eax
; WIN64-NEXT: popq %rcx
; WIN64-NEXT: retq
; WIN64-NEXT: .seh_endproc
;
; LINUXOSX64-LABEL: test_CallargReti32:
; LINUXOSX64: # %bb.0:
; LINUXOSX64-NEXT: pushq %rax
; LINUXOSX64-NEXT: .cfi_def_cfa_offset 16
; LINUXOSX64-NEXT: incl %eax
; LINUXOSX64-NEXT: callq test_argReti32
; LINUXOSX64-NEXT: incl %eax
; LINUXOSX64-NEXT: popq %rcx
Correct dwarf unwind information in function epilogue This patch aims to provide correct dwarf unwind information in function epilogue for X86. It consists of two parts. The first part inserts CFI instructions that set appropriate cfa offset and cfa register in emitEpilogue() in X86FrameLowering. This part is X86 specific. The second part is platform independent and ensures that: * CFI instructions do not affect code generation (they are not counted as instructions when tail duplicating or tail merging) * Unwind information remains correct when a function is modified by different passes. This is done in a late pass by analyzing information about cfa offset and cfa register in BBs and inserting additional CFI directives where necessary. Added CFIInstrInserter pass: * analyzes each basic block to determine cfa offset and register are valid at its entry and exit * verifies that outgoing cfa offset and register of predecessor blocks match incoming values of their successors * inserts additional CFI directives at basic block beginning to correct the rule for calculating CFA Having CFI instructions in function epilogue can cause incorrect CFA calculation rule for some basic blocks. This can happen if, due to basic block reordering, or the existence of multiple epilogue blocks, some of the blocks have wrong cfa offset and register values set by the epilogue block above them. CFIInstrInserter is currently run only on X86, but can be used by any target that implements support for adding CFI instructions in epilogue. Patch by Violeta Vukobrat. Differential Revision: https://reviews.llvm.org/D42848 llvm-svn: 330706
2018-04-24 18:32:08 +08:00
; LINUXOSX64-NEXT: .cfi_def_cfa_offset 8
; LINUXOSX64-NEXT: retq
2016-10-19 19:16:58 +08:00
%b = add i32 %a, 1
%c = call x86_regcallcc i32 @test_argReti32(i32 %b)
%d = add i32 %c, 1
ret i32 %d
}
; Test regcall when receiving/returning i64
define dso_local x86_regcallcc i64 @test_argReti64(i64 %a) {
; X32-LABEL: test_argReti64:
; X32: # %bb.0:
; X32-NEXT: addl $3, %eax
; X32-NEXT: adcl $1, %ecx
; X32-NEXT: retl
;
; WIN64-LABEL: test_argReti64:
; WIN64: # %bb.0:
; WIN64-NEXT: movabsq $4294967299, %rcx # imm = 0x100000003
; WIN64-NEXT: addq %rcx, %rax
; WIN64-NEXT: retq
;
; LINUXOSX64-LABEL: test_argReti64:
; LINUXOSX64: # %bb.0:
; LINUXOSX64-NEXT: movabsq $4294967299, %rcx # imm = 0x100000003
; LINUXOSX64-NEXT: addq %rcx, %rax
; LINUXOSX64-NEXT: retq
2016-10-19 19:16:58 +08:00
%add = add i64 %a, 4294967299
ret i64 %add
}
; Test regcall when passing/retrieving i64
define dso_local x86_regcallcc i64 @test_CallargReti64(i64 %a) {
; X32-LABEL: test_CallargReti64:
; X32: # %bb.0:
; X32-NEXT: addl $1, %eax
; X32-NEXT: adcl $0, %ecx
; X32-NEXT: calll _test_argReti64
; X32-NEXT: addl $1, %eax
; X32-NEXT: adcl $0, %ecx
; X32-NEXT: retl
;
; WIN64-LABEL: test_CallargReti64:
; WIN64: # %bb.0:
; WIN64-NEXT: pushq %rax
; WIN64-NEXT: .seh_stackalloc 8
; WIN64-NEXT: .seh_endprologue
; WIN64-NEXT: incq %rax
; WIN64-NEXT: callq test_argReti64
; WIN64-NEXT: incq %rax
; WIN64-NEXT: popq %rcx
; WIN64-NEXT: retq
; WIN64-NEXT: .seh_endproc
;
; LINUXOSX64-LABEL: test_CallargReti64:
; LINUXOSX64: # %bb.0:
; LINUXOSX64-NEXT: pushq %rax
; LINUXOSX64-NEXT: .cfi_def_cfa_offset 16
; LINUXOSX64-NEXT: incq %rax
; LINUXOSX64-NEXT: callq test_argReti64
; LINUXOSX64-NEXT: incq %rax
; LINUXOSX64-NEXT: popq %rcx
Correct dwarf unwind information in function epilogue This patch aims to provide correct dwarf unwind information in function epilogue for X86. It consists of two parts. The first part inserts CFI instructions that set appropriate cfa offset and cfa register in emitEpilogue() in X86FrameLowering. This part is X86 specific. The second part is platform independent and ensures that: * CFI instructions do not affect code generation (they are not counted as instructions when tail duplicating or tail merging) * Unwind information remains correct when a function is modified by different passes. This is done in a late pass by analyzing information about cfa offset and cfa register in BBs and inserting additional CFI directives where necessary. Added CFIInstrInserter pass: * analyzes each basic block to determine cfa offset and register are valid at its entry and exit * verifies that outgoing cfa offset and register of predecessor blocks match incoming values of their successors * inserts additional CFI directives at basic block beginning to correct the rule for calculating CFA Having CFI instructions in function epilogue can cause incorrect CFA calculation rule for some basic blocks. This can happen if, due to basic block reordering, or the existence of multiple epilogue blocks, some of the blocks have wrong cfa offset and register values set by the epilogue block above them. CFIInstrInserter is currently run only on X86, but can be used by any target that implements support for adding CFI instructions in epilogue. Patch by Violeta Vukobrat. Differential Revision: https://reviews.llvm.org/D42848 llvm-svn: 330706
2018-04-24 18:32:08 +08:00
; LINUXOSX64-NEXT: .cfi_def_cfa_offset 8
; LINUXOSX64-NEXT: retq
2016-10-19 19:16:58 +08:00
%b = add i64 %a, 1
%c = call x86_regcallcc i64 @test_argReti64(i64 %b)
%d = add i64 %c, 1
ret i64 %d
}
; Test regcall when receiving/returning float
define dso_local x86_regcallcc float @test_argRetFloat(float %a) {
; X32-LABEL: test_argRetFloat:
; X32: # %bb.0:
; X32-NEXT: vaddss __real@3f800000, %xmm0, %xmm0
; X32-NEXT: retl
;
; WIN64-LABEL: test_argRetFloat:
; WIN64: # %bb.0:
; WIN64-NEXT: vaddss __real@3f800000(%rip), %xmm0, %xmm0
; WIN64-NEXT: retq
;
; LINUXOSX64-LABEL: test_argRetFloat:
; LINUXOSX64: # %bb.0:
; LINUXOSX64-NEXT: vaddss {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
; LINUXOSX64-NEXT: retq
2016-10-19 19:16:58 +08:00
%add = fadd float 1.0, %a
ret float %add
}
; Test regcall when passing/retrieving float
define dso_local x86_regcallcc float @test_CallargRetFloat(float %a) {
; X32-LABEL: test_CallargRetFloat:
; X32: # %bb.0:
; X32-NEXT: subl $28, %esp
; X32-NEXT: vmovups %xmm4, (%esp) # 16-byte Spill
; X32-NEXT: vmovss {{.*#+}} xmm4 = mem[0],zero,zero,zero
; X32-NEXT: vaddss %xmm4, %xmm0, %xmm0
; X32-NEXT: calll _test_argRetFloat
; X32-NEXT: vaddss %xmm4, %xmm0, %xmm0
; X32-NEXT: vmovups (%esp), %xmm4 # 16-byte Reload
; X32-NEXT: addl $28, %esp
; X32-NEXT: retl
;
; WIN64-LABEL: test_CallargRetFloat:
; WIN64: # %bb.0:
; WIN64-NEXT: subq $24, %rsp
; WIN64-NEXT: .seh_stackalloc 24
; WIN64-NEXT: vmovaps %xmm8, (%rsp) # 16-byte Spill
; WIN64-NEXT: .seh_savexmm %xmm8, 0
; WIN64-NEXT: .seh_endprologue
; WIN64-NEXT: vmovss {{.*#+}} xmm8 = mem[0],zero,zero,zero
; WIN64-NEXT: vaddss %xmm0, %xmm8, %xmm0
; WIN64-NEXT: callq test_argRetFloat
; WIN64-NEXT: vaddss %xmm0, %xmm8, %xmm0
; WIN64-NEXT: vmovaps (%rsp), %xmm8 # 16-byte Reload
; WIN64-NEXT: addq $24, %rsp
; WIN64-NEXT: retq
; WIN64-NEXT: .seh_endproc
;
; LINUXOSX64-LABEL: test_CallargRetFloat:
; LINUXOSX64: # %bb.0:
; LINUXOSX64-NEXT: subq $24, %rsp
; LINUXOSX64-NEXT: vmovaps %xmm8, (%rsp) # 16-byte Spill
; LINUXOSX64-NEXT: .cfi_def_cfa_offset 32
; LINUXOSX64-NEXT: .cfi_offset %xmm8, -32
; LINUXOSX64-NEXT: vmovss {{.*#+}} xmm8 = mem[0],zero,zero,zero
; LINUXOSX64-NEXT: vaddss %xmm0, %xmm8, %xmm0
; LINUXOSX64-NEXT: callq test_argRetFloat
; LINUXOSX64-NEXT: vaddss %xmm0, %xmm8, %xmm0
; LINUXOSX64-NEXT: vmovaps (%rsp), %xmm8 # 16-byte Reload
; LINUXOSX64-NEXT: addq $24, %rsp
Correct dwarf unwind information in function epilogue This patch aims to provide correct dwarf unwind information in function epilogue for X86. It consists of two parts. The first part inserts CFI instructions that set appropriate cfa offset and cfa register in emitEpilogue() in X86FrameLowering. This part is X86 specific. The second part is platform independent and ensures that: * CFI instructions do not affect code generation (they are not counted as instructions when tail duplicating or tail merging) * Unwind information remains correct when a function is modified by different passes. This is done in a late pass by analyzing information about cfa offset and cfa register in BBs and inserting additional CFI directives where necessary. Added CFIInstrInserter pass: * analyzes each basic block to determine cfa offset and register are valid at its entry and exit * verifies that outgoing cfa offset and register of predecessor blocks match incoming values of their successors * inserts additional CFI directives at basic block beginning to correct the rule for calculating CFA Having CFI instructions in function epilogue can cause incorrect CFA calculation rule for some basic blocks. This can happen if, due to basic block reordering, or the existence of multiple epilogue blocks, some of the blocks have wrong cfa offset and register values set by the epilogue block above them. CFIInstrInserter is currently run only on X86, but can be used by any target that implements support for adding CFI instructions in epilogue. Patch by Violeta Vukobrat. Differential Revision: https://reviews.llvm.org/D42848 llvm-svn: 330706
2018-04-24 18:32:08 +08:00
; LINUXOSX64-NEXT: .cfi_def_cfa_offset 8
; LINUXOSX64-NEXT: retq
2016-10-19 19:16:58 +08:00
%b = fadd float 1.0, %a
%c = call x86_regcallcc float @test_argRetFloat(float %b)
%d = fadd float 1.0, %c
ret float %d
}
; Test regcall when receiving/returning double
define dso_local x86_regcallcc double @test_argRetDouble(double %a) {
; X32-LABEL: test_argRetDouble:
; X32: # %bb.0:
; X32-NEXT: vaddsd __real@3ff0000000000000, %xmm0, %xmm0
; X32-NEXT: retl
;
; WIN64-LABEL: test_argRetDouble:
; WIN64: # %bb.0:
; WIN64-NEXT: vaddsd __real@3ff0000000000000(%rip), %xmm0, %xmm0
; WIN64-NEXT: retq
;
; LINUXOSX64-LABEL: test_argRetDouble:
; LINUXOSX64: # %bb.0:
; LINUXOSX64-NEXT: vaddsd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
; LINUXOSX64-NEXT: retq
2016-10-19 19:16:58 +08:00
%add = fadd double %a, 1.0
ret double %add
}
; Test regcall when passing/retrieving double
define dso_local x86_regcallcc double @test_CallargRetDouble(double %a) {
; X32-LABEL: test_CallargRetDouble:
; X32: # %bb.0:
; X32-NEXT: subl $28, %esp
; X32-NEXT: vmovups %xmm4, (%esp) # 16-byte Spill
; X32-NEXT: vmovsd {{.*#+}} xmm4 = mem[0],zero
; X32-NEXT: vaddsd %xmm4, %xmm0, %xmm0
; X32-NEXT: calll _test_argRetDouble
; X32-NEXT: vaddsd %xmm4, %xmm0, %xmm0
; X32-NEXT: vmovups (%esp), %xmm4 # 16-byte Reload
; X32-NEXT: addl $28, %esp
; X32-NEXT: retl
;
; WIN64-LABEL: test_CallargRetDouble:
; WIN64: # %bb.0:
; WIN64-NEXT: subq $24, %rsp
; WIN64-NEXT: .seh_stackalloc 24
; WIN64-NEXT: vmovaps %xmm8, (%rsp) # 16-byte Spill
; WIN64-NEXT: .seh_savexmm %xmm8, 0
; WIN64-NEXT: .seh_endprologue
; WIN64-NEXT: vmovsd {{.*#+}} xmm8 = mem[0],zero
; WIN64-NEXT: vaddsd %xmm0, %xmm8, %xmm0
; WIN64-NEXT: callq test_argRetDouble
; WIN64-NEXT: vaddsd %xmm0, %xmm8, %xmm0
; WIN64-NEXT: vmovaps (%rsp), %xmm8 # 16-byte Reload
; WIN64-NEXT: addq $24, %rsp
; WIN64-NEXT: retq
; WIN64-NEXT: .seh_endproc
;
; LINUXOSX64-LABEL: test_CallargRetDouble:
; LINUXOSX64: # %bb.0:
; LINUXOSX64-NEXT: subq $24, %rsp
; LINUXOSX64-NEXT: vmovaps %xmm8, (%rsp) # 16-byte Spill
; LINUXOSX64-NEXT: .cfi_def_cfa_offset 32
; LINUXOSX64-NEXT: .cfi_offset %xmm8, -32
; LINUXOSX64-NEXT: vmovsd {{.*#+}} xmm8 = mem[0],zero
; LINUXOSX64-NEXT: vaddsd %xmm0, %xmm8, %xmm0
; LINUXOSX64-NEXT: callq test_argRetDouble
; LINUXOSX64-NEXT: vaddsd %xmm0, %xmm8, %xmm0
; LINUXOSX64-NEXT: vmovaps (%rsp), %xmm8 # 16-byte Reload
; LINUXOSX64-NEXT: addq $24, %rsp
Correct dwarf unwind information in function epilogue This patch aims to provide correct dwarf unwind information in function epilogue for X86. It consists of two parts. The first part inserts CFI instructions that set appropriate cfa offset and cfa register in emitEpilogue() in X86FrameLowering. This part is X86 specific. The second part is platform independent and ensures that: * CFI instructions do not affect code generation (they are not counted as instructions when tail duplicating or tail merging) * Unwind information remains correct when a function is modified by different passes. This is done in a late pass by analyzing information about cfa offset and cfa register in BBs and inserting additional CFI directives where necessary. Added CFIInstrInserter pass: * analyzes each basic block to determine cfa offset and register are valid at its entry and exit * verifies that outgoing cfa offset and register of predecessor blocks match incoming values of their successors * inserts additional CFI directives at basic block beginning to correct the rule for calculating CFA Having CFI instructions in function epilogue can cause incorrect CFA calculation rule for some basic blocks. This can happen if, due to basic block reordering, or the existence of multiple epilogue blocks, some of the blocks have wrong cfa offset and register values set by the epilogue block above them. CFIInstrInserter is currently run only on X86, but can be used by any target that implements support for adding CFI instructions in epilogue. Patch by Violeta Vukobrat. Differential Revision: https://reviews.llvm.org/D42848 llvm-svn: 330706
2018-04-24 18:32:08 +08:00
; LINUXOSX64-NEXT: .cfi_def_cfa_offset 8
; LINUXOSX64-NEXT: retq
2016-10-19 19:16:58 +08:00
%b = fadd double 1.0, %a
%c = call x86_regcallcc double @test_argRetDouble(double %b)
%d = fadd double 1.0, %c
ret double %d
}
; Test regcall when receiving/returning long double
define dso_local x86_regcallcc x86_fp80 @test_argRetf80(x86_fp80 %a0) nounwind {
; X32-LABEL: test_argRetf80:
; X32: # %bb.0:
; X32-NEXT: fadd %st, %st(0)
; X32-NEXT: retl
;
; WIN64-LABEL: test_argRetf80:
; WIN64: # %bb.0:
; WIN64-NEXT: fadd %st, %st(0)
; WIN64-NEXT: retq
;
; LINUXOSX64-LABEL: test_argRetf80:
; LINUXOSX64: # %bb.0:
; LINUXOSX64-NEXT: fadd %st, %st(0)
; LINUXOSX64-NEXT: retq
%r0 = fadd x86_fp80 %a0, %a0
ret x86_fp80 %r0
}
; Test regcall when receiving/returning long double
define dso_local x86_regcallcc double @test_argParamf80(x86_fp80 %a0) nounwind {
; X32-LABEL: test_argParamf80:
; X32: # %bb.0:
; X32-NEXT: pushl %ebp
; X32-NEXT: movl %esp, %ebp
; X32-NEXT: andl $-8, %esp
; X32-NEXT: subl $8, %esp
; X32-NEXT: fstpl (%esp)
; X32-NEXT: vmovsd {{.*#+}} xmm0 = mem[0],zero
; X32-NEXT: movl %ebp, %esp
; X32-NEXT: popl %ebp
; X32-NEXT: retl
;
; WIN64-LABEL: test_argParamf80:
; WIN64: # %bb.0:
; WIN64-NEXT: pushq %rax
; WIN64-NEXT: fstpl (%rsp)
; WIN64-NEXT: vmovsd {{.*#+}} xmm0 = mem[0],zero
; WIN64-NEXT: popq %rax
; WIN64-NEXT: retq
;
; LINUXOSX64-LABEL: test_argParamf80:
; LINUXOSX64: # %bb.0:
; LINUXOSX64-NEXT: fstpl -{{[0-9]+}}(%rsp)
; LINUXOSX64-NEXT: vmovsd {{.*#+}} xmm0 = mem[0],zero
; LINUXOSX64-NEXT: retq
%r0 = fptrunc x86_fp80 %a0 to double
ret double %r0
}
; Test regcall when passing/retrieving long double
define x86_regcallcc x86_fp80 @test_CallargRetf80(x86_fp80 %a) {
; X32-LABEL: test_CallargRetf80:
; X32: # %bb.0:
; X32-NEXT: fadd %st, %st(0)
; X32-NEXT: calll _test_argRetf80
; X32-NEXT: fadd %st, %st(0)
; X32-NEXT: retl
;
; WIN64-LABEL: test_CallargRetf80:
; WIN64: # %bb.0:
; WIN64-NEXT: pushq %rax
; WIN64-NEXT: .seh_stackalloc 8
; WIN64-NEXT: .seh_endprologue
; WIN64-NEXT: fadd %st, %st(0)
; WIN64-NEXT: callq test_argRetf80
; WIN64-NEXT: fadd %st, %st(0)
; WIN64-NEXT: popq %rax
; WIN64-NEXT: retq
; WIN64-NEXT: .seh_endproc
;
; LINUXOSX64-LABEL: test_CallargRetf80:
; LINUXOSX64: # %bb.0:
; LINUXOSX64-NEXT: pushq %rax
; LINUXOSX64-NEXT: .cfi_def_cfa_offset 16
; LINUXOSX64-NEXT: fadd %st, %st(0)
; LINUXOSX64-NEXT: callq test_argRetf80
; LINUXOSX64-NEXT: fadd %st, %st(0)
; LINUXOSX64-NEXT: popq %rax
Correct dwarf unwind information in function epilogue This patch aims to provide correct dwarf unwind information in function epilogue for X86. It consists of two parts. The first part inserts CFI instructions that set appropriate cfa offset and cfa register in emitEpilogue() in X86FrameLowering. This part is X86 specific. The second part is platform independent and ensures that: * CFI instructions do not affect code generation (they are not counted as instructions when tail duplicating or tail merging) * Unwind information remains correct when a function is modified by different passes. This is done in a late pass by analyzing information about cfa offset and cfa register in BBs and inserting additional CFI directives where necessary. Added CFIInstrInserter pass: * analyzes each basic block to determine cfa offset and register are valid at its entry and exit * verifies that outgoing cfa offset and register of predecessor blocks match incoming values of their successors * inserts additional CFI directives at basic block beginning to correct the rule for calculating CFA Having CFI instructions in function epilogue can cause incorrect CFA calculation rule for some basic blocks. This can happen if, due to basic block reordering, or the existence of multiple epilogue blocks, some of the blocks have wrong cfa offset and register values set by the epilogue block above them. CFIInstrInserter is currently run only on X86, but can be used by any target that implements support for adding CFI instructions in epilogue. Patch by Violeta Vukobrat. Differential Revision: https://reviews.llvm.org/D42848 llvm-svn: 330706
2018-04-24 18:32:08 +08:00
; LINUXOSX64-NEXT: .cfi_def_cfa_offset 8
; LINUXOSX64-NEXT: retq
%b = fadd x86_fp80 %a, %a
%c = call x86_regcallcc x86_fp80 @test_argRetf80(x86_fp80 %b)
%d = fadd x86_fp80 %c, %c
ret x86_fp80 %d
}
define dso_local x86_regcallcc double @test_CallargParamf80(x86_fp80 %a) {
; X32-LABEL: test_CallargParamf80:
; X32: # %bb.0:
; X32-NEXT: fadd %st, %st(0)
; X32-NEXT: calll _test_argParamf80
; X32-NEXT: vaddsd %xmm0, %xmm0, %xmm0
; X32-NEXT: retl
;
; WIN64-LABEL: test_CallargParamf80:
; WIN64: # %bb.0:
; WIN64-NEXT: pushq %rax
; WIN64-NEXT: .seh_stackalloc 8
; WIN64-NEXT: .seh_endprologue
; WIN64-NEXT: fadd %st, %st(0)
; WIN64-NEXT: callq test_argParamf80
; WIN64-NEXT: vaddsd %xmm0, %xmm0, %xmm0
; WIN64-NEXT: popq %rax
; WIN64-NEXT: retq
; WIN64-NEXT: .seh_endproc
;
; LINUXOSX64-LABEL: test_CallargParamf80:
; LINUXOSX64: # %bb.0:
; LINUXOSX64-NEXT: pushq %rax
; LINUXOSX64-NEXT: .cfi_def_cfa_offset 16
; LINUXOSX64-NEXT: fadd %st, %st(0)
; LINUXOSX64-NEXT: callq test_argParamf80
; LINUXOSX64-NEXT: vaddsd %xmm0, %xmm0, %xmm0
; LINUXOSX64-NEXT: popq %rax
; LINUXOSX64-NEXT: .cfi_def_cfa_offset 8
; LINUXOSX64-NEXT: retq
%b = fadd x86_fp80 %a, %a
%c = call x86_regcallcc double @test_argParamf80(x86_fp80 %b)
%d = fadd double %c, %c
ret double %d
}
2016-10-19 19:16:58 +08:00
; Test regcall when receiving/returning pointer
define dso_local x86_regcallcc [4 x i32]* @test_argRetPointer([4 x i32]* %a) {
; X32-LABEL: test_argRetPointer:
; X32: # %bb.0:
; X32-NEXT: incl %eax
; X32-NEXT: retl
;
; WIN64-LABEL: test_argRetPointer:
; WIN64: # %bb.0:
; WIN64-NEXT: incl %eax
; WIN64-NEXT: retq
;
; LINUXOSX64-LABEL: test_argRetPointer:
; LINUXOSX64: # %bb.0:
; LINUXOSX64-NEXT: incl %eax
; LINUXOSX64-NEXT: retq
2016-10-19 19:16:58 +08:00
%b = ptrtoint [4 x i32]* %a to i32
%c = add i32 %b, 1
%d = inttoptr i32 %c to [4 x i32]*
ret [4 x i32]* %d
}
; Test regcall when passing/retrieving pointer
define dso_local x86_regcallcc [4 x i32]* @test_CallargRetPointer([4 x i32]* %a) {
; X32-LABEL: test_CallargRetPointer:
; X32: # %bb.0:
; X32-NEXT: incl %eax
; X32-NEXT: calll _test_argRetPointer
; X32-NEXT: incl %eax
; X32-NEXT: retl
;
; WIN64-LABEL: test_CallargRetPointer:
; WIN64: # %bb.0:
; WIN64-NEXT: pushq %rax
; WIN64-NEXT: .seh_stackalloc 8
; WIN64-NEXT: .seh_endprologue
; WIN64-NEXT: incl %eax
; WIN64-NEXT: callq test_argRetPointer
; WIN64-NEXT: incl %eax
; WIN64-NEXT: popq %rcx
; WIN64-NEXT: retq
; WIN64-NEXT: .seh_endproc
;
; LINUXOSX64-LABEL: test_CallargRetPointer:
; LINUXOSX64: # %bb.0:
; LINUXOSX64-NEXT: pushq %rax
; LINUXOSX64-NEXT: .cfi_def_cfa_offset 16
; LINUXOSX64-NEXT: incl %eax
; LINUXOSX64-NEXT: callq test_argRetPointer
; LINUXOSX64-NEXT: incl %eax
; LINUXOSX64-NEXT: popq %rcx
Correct dwarf unwind information in function epilogue This patch aims to provide correct dwarf unwind information in function epilogue for X86. It consists of two parts. The first part inserts CFI instructions that set appropriate cfa offset and cfa register in emitEpilogue() in X86FrameLowering. This part is X86 specific. The second part is platform independent and ensures that: * CFI instructions do not affect code generation (they are not counted as instructions when tail duplicating or tail merging) * Unwind information remains correct when a function is modified by different passes. This is done in a late pass by analyzing information about cfa offset and cfa register in BBs and inserting additional CFI directives where necessary. Added CFIInstrInserter pass: * analyzes each basic block to determine cfa offset and register are valid at its entry and exit * verifies that outgoing cfa offset and register of predecessor blocks match incoming values of their successors * inserts additional CFI directives at basic block beginning to correct the rule for calculating CFA Having CFI instructions in function epilogue can cause incorrect CFA calculation rule for some basic blocks. This can happen if, due to basic block reordering, or the existence of multiple epilogue blocks, some of the blocks have wrong cfa offset and register values set by the epilogue block above them. CFIInstrInserter is currently run only on X86, but can be used by any target that implements support for adding CFI instructions in epilogue. Patch by Violeta Vukobrat. Differential Revision: https://reviews.llvm.org/D42848 llvm-svn: 330706
2018-04-24 18:32:08 +08:00
; LINUXOSX64-NEXT: .cfi_def_cfa_offset 8
; LINUXOSX64-NEXT: retq
2016-10-19 19:16:58 +08:00
%b = ptrtoint [4 x i32]* %a to i32
%c = add i32 %b, 1
%d = inttoptr i32 %c to [4 x i32]*
%e = call x86_regcallcc [4 x i32]* @test_argRetPointer([4 x i32]* %d)
%f = ptrtoint [4 x i32]* %e to i32
%g = add i32 %f, 1
%h = inttoptr i32 %g to [4 x i32]*
ret [4 x i32]* %h
}
; Test regcall when receiving/returning 128 bit vector
define dso_local x86_regcallcc <4 x i32> @test_argRet128Vector(<4 x i1> %x, <4 x i32> %a, <4 x i32> %b) {
; X32-LABEL: test_argRet128Vector:
; X32: # %bb.0:
; X32-NEXT: vpslld $31, %xmm0, %xmm0
; X32-NEXT: vpmovd2m %xmm0, %k1
; X32-NEXT: vpblendmd %xmm1, %xmm2, %xmm0 {%k1}
; X32-NEXT: retl
;
; WIN64-LABEL: test_argRet128Vector:
; WIN64: # %bb.0:
; WIN64-NEXT: vpslld $31, %xmm0, %xmm0
; WIN64-NEXT: vpmovd2m %xmm0, %k1
; WIN64-NEXT: vpblendmd %xmm1, %xmm2, %xmm0 {%k1}
; WIN64-NEXT: retq
;
; LINUXOSX64-LABEL: test_argRet128Vector:
; LINUXOSX64: # %bb.0:
; LINUXOSX64-NEXT: vpslld $31, %xmm0, %xmm0
; LINUXOSX64-NEXT: vpmovd2m %xmm0, %k1
; LINUXOSX64-NEXT: vpblendmd %xmm1, %xmm2, %xmm0 {%k1}
; LINUXOSX64-NEXT: retq
%d = select <4 x i1> %x, <4 x i32> %a, <4 x i32> %b
2016-10-19 19:16:58 +08:00
ret <4 x i32> %d
}
; Test regcall when passing/retrieving 128 bit vector
define dso_local x86_regcallcc <4 x i32> @test_CallargRet128Vector(<4 x i1> %x, <4 x i32> %a) {
; X32-LABEL: test_CallargRet128Vector:
; X32: # %bb.0:
; X32-NEXT: subl $44, %esp
; X32-NEXT: vmovups %xmm4, {{[-0-9]+}}(%e{{[sb]}}p) # 16-byte Spill
; X32-NEXT: vmovdqa %xmm1, %xmm4
; X32-NEXT: vpslld $31, %xmm0, %xmm1
; X32-NEXT: vpmovd2m %xmm1, %k1
; X32-NEXT: kmovw %k1, {{[-0-9]+}}(%e{{[sb]}}p) # 2-byte Spill
; X32-NEXT: vmovdqa %xmm4, %xmm1
; X32-NEXT: vmovdqa %xmm4, %xmm2
; X32-NEXT: calll _test_argRet128Vector
; X32-NEXT: kmovw {{[-0-9]+}}(%e{{[sb]}}p), %k1 # 2-byte Reload
; X32-NEXT: vmovdqa32 %xmm4, %xmm0 {%k1}
; X32-NEXT: vmovups {{[-0-9]+}}(%e{{[sb]}}p), %xmm4 # 16-byte Reload
; X32-NEXT: addl $44, %esp
; X32-NEXT: retl
;
; WIN64-LABEL: test_CallargRet128Vector:
; WIN64: # %bb.0:
; WIN64-NEXT: subq $40, %rsp
; WIN64-NEXT: .seh_stackalloc 40
; WIN64-NEXT: vmovaps %xmm8, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
; WIN64-NEXT: .seh_savexmm %xmm8, 16
; WIN64-NEXT: .seh_endprologue
; WIN64-NEXT: vmovdqa %xmm1, %xmm8
; WIN64-NEXT: vpslld $31, %xmm0, %xmm1
; WIN64-NEXT: vpmovd2m %xmm1, %k1
; WIN64-NEXT: kmovw %k1, {{[-0-9]+}}(%r{{[sb]}}p) # 2-byte Spill
; WIN64-NEXT: vmovdqa %xmm8, %xmm1
; WIN64-NEXT: vmovdqa %xmm8, %xmm2
; WIN64-NEXT: callq test_argRet128Vector
; WIN64-NEXT: kmovw {{[-0-9]+}}(%r{{[sb]}}p), %k1 # 2-byte Reload
; WIN64-NEXT: vmovdqa32 %xmm8, %xmm0 {%k1}
; WIN64-NEXT: vmovaps {{[-0-9]+}}(%r{{[sb]}}p), %xmm8 # 16-byte Reload
; WIN64-NEXT: addq $40, %rsp
; WIN64-NEXT: retq
; WIN64-NEXT: .seh_endproc
;
; LINUXOSX64-LABEL: test_CallargRet128Vector:
; LINUXOSX64: # %bb.0:
; LINUXOSX64-NEXT: subq $40, %rsp
; LINUXOSX64-NEXT: vmovaps %xmm8, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
; LINUXOSX64-NEXT: .cfi_def_cfa_offset 48
; LINUXOSX64-NEXT: .cfi_offset %xmm8, -32
; LINUXOSX64-NEXT: vmovdqa %xmm1, %xmm8
; LINUXOSX64-NEXT: vpslld $31, %xmm0, %xmm1
; LINUXOSX64-NEXT: vpmovd2m %xmm1, %k1
; LINUXOSX64-NEXT: kmovw %k1, {{[-0-9]+}}(%r{{[sb]}}p) # 2-byte Spill
; LINUXOSX64-NEXT: vmovdqa %xmm8, %xmm1
; LINUXOSX64-NEXT: vmovdqa %xmm8, %xmm2
; LINUXOSX64-NEXT: callq test_argRet128Vector
; LINUXOSX64-NEXT: kmovw {{[-0-9]+}}(%r{{[sb]}}p), %k1 # 2-byte Reload
; LINUXOSX64-NEXT: vmovdqa32 %xmm8, %xmm0 {%k1}
; LINUXOSX64-NEXT: vmovaps {{[-0-9]+}}(%r{{[sb]}}p), %xmm8 # 16-byte Reload
; LINUXOSX64-NEXT: addq $40, %rsp
Correct dwarf unwind information in function epilogue This patch aims to provide correct dwarf unwind information in function epilogue for X86. It consists of two parts. The first part inserts CFI instructions that set appropriate cfa offset and cfa register in emitEpilogue() in X86FrameLowering. This part is X86 specific. The second part is platform independent and ensures that: * CFI instructions do not affect code generation (they are not counted as instructions when tail duplicating or tail merging) * Unwind information remains correct when a function is modified by different passes. This is done in a late pass by analyzing information about cfa offset and cfa register in BBs and inserting additional CFI directives where necessary. Added CFIInstrInserter pass: * analyzes each basic block to determine cfa offset and register are valid at its entry and exit * verifies that outgoing cfa offset and register of predecessor blocks match incoming values of their successors * inserts additional CFI directives at basic block beginning to correct the rule for calculating CFA Having CFI instructions in function epilogue can cause incorrect CFA calculation rule for some basic blocks. This can happen if, due to basic block reordering, or the existence of multiple epilogue blocks, some of the blocks have wrong cfa offset and register values set by the epilogue block above them. CFIInstrInserter is currently run only on X86, but can be used by any target that implements support for adding CFI instructions in epilogue. Patch by Violeta Vukobrat. Differential Revision: https://reviews.llvm.org/D42848 llvm-svn: 330706
2018-04-24 18:32:08 +08:00
; LINUXOSX64-NEXT: .cfi_def_cfa_offset 8
; LINUXOSX64-NEXT: retq
%b = call x86_regcallcc <4 x i32> @test_argRet128Vector(<4 x i1> %x, <4 x i32> %a, <4 x i32> %a)
%c = select <4 x i1> %x, <4 x i32> %a, <4 x i32> %b
2016-10-19 19:16:58 +08:00
ret <4 x i32> %c
}
; Test regcall when receiving/returning 256 bit vector
define dso_local x86_regcallcc <8 x i32> @test_argRet256Vector(<8 x i1> %x, <8 x i32> %a, <8 x i32> %b) {
; X32-LABEL: test_argRet256Vector:
; X32: # %bb.0:
; X32-NEXT: kmovd %eax, %k1
; X32-NEXT: vpblendmd %ymm0, %ymm1, %ymm0 {%k1}
; X32-NEXT: retl
;
; WIN64-LABEL: test_argRet256Vector:
; WIN64: # %bb.0:
; WIN64-NEXT: kmovd %eax, %k1
; WIN64-NEXT: vpblendmd %ymm0, %ymm1, %ymm0 {%k1}
; WIN64-NEXT: retq
;
; LINUXOSX64-LABEL: test_argRet256Vector:
; LINUXOSX64: # %bb.0:
; LINUXOSX64-NEXT: kmovd %eax, %k1
; LINUXOSX64-NEXT: vpblendmd %ymm0, %ymm1, %ymm0 {%k1}
; LINUXOSX64-NEXT: retq
%d = select <8 x i1> %x, <8 x i32> %a, <8 x i32> %b
2016-10-19 19:16:58 +08:00
ret <8 x i32> %d
}
; Test regcall when passing/retrieving 256 bit vector
define dso_local x86_regcallcc <8 x i32> @test_CallargRet256Vector(<8 x i1> %x, <8 x i32> %a) {
; X32-LABEL: test_CallargRet256Vector:
; X32: # %bb.0:
; X32-NEXT: subl $36, %esp
; X32-NEXT: vmovdqu %ymm0, {{[-0-9]+}}(%e{{[sb]}}p) # 32-byte Spill
; X32-NEXT: kmovd %eax, %k1
; X32-NEXT: kmovw %k1, {{[-0-9]+}}(%e{{[sb]}}p) # 2-byte Spill
; X32-NEXT: vmovdqa %ymm0, %ymm1
; X32-NEXT: calll _test_argRet256Vector
; X32-NEXT: kmovw {{[-0-9]+}}(%e{{[sb]}}p), %k1 # 2-byte Reload
; X32-NEXT: vmovdqu {{[-0-9]+}}(%e{{[sb]}}p), %ymm1 # 32-byte Reload
; X32-NEXT: vmovdqa32 %ymm1, %ymm0 {%k1}
; X32-NEXT: addl $36, %esp
; X32-NEXT: retl
;
; WIN64-LABEL: test_CallargRet256Vector:
; WIN64: # %bb.0:
; WIN64-NEXT: subq $56, %rsp
; WIN64-NEXT: .seh_stackalloc 56
; WIN64-NEXT: .seh_endprologue
; WIN64-NEXT: vmovdqu %ymm0, {{[-0-9]+}}(%r{{[sb]}}p) # 32-byte Spill
; WIN64-NEXT: kmovd %eax, %k1
; WIN64-NEXT: kmovw %k1, {{[-0-9]+}}(%r{{[sb]}}p) # 2-byte Spill
; WIN64-NEXT: vmovdqa %ymm0, %ymm1
; WIN64-NEXT: callq test_argRet256Vector
; WIN64-NEXT: kmovw {{[-0-9]+}}(%r{{[sb]}}p), %k1 # 2-byte Reload
; WIN64-NEXT: vmovdqu {{[-0-9]+}}(%r{{[sb]}}p), %ymm1 # 32-byte Reload
; WIN64-NEXT: vmovdqa32 %ymm1, %ymm0 {%k1}
; WIN64-NEXT: addq $56, %rsp
; WIN64-NEXT: retq
; WIN64-NEXT: .seh_endproc
;
; LINUXOSX64-LABEL: test_CallargRet256Vector:
; LINUXOSX64: # %bb.0:
; LINUXOSX64-NEXT: subq $56, %rsp
; LINUXOSX64-NEXT: .cfi_def_cfa_offset 64
; LINUXOSX64-NEXT: vmovdqu %ymm0, {{[-0-9]+}}(%r{{[sb]}}p) # 32-byte Spill
; LINUXOSX64-NEXT: kmovd %eax, %k1
; LINUXOSX64-NEXT: kmovw %k1, {{[-0-9]+}}(%r{{[sb]}}p) # 2-byte Spill
; LINUXOSX64-NEXT: vmovdqa %ymm0, %ymm1
; LINUXOSX64-NEXT: callq test_argRet256Vector
; LINUXOSX64-NEXT: kmovw {{[-0-9]+}}(%r{{[sb]}}p), %k1 # 2-byte Reload
; LINUXOSX64-NEXT: vmovdqu {{[-0-9]+}}(%r{{[sb]}}p), %ymm1 # 32-byte Reload
; LINUXOSX64-NEXT: vmovdqa32 %ymm1, %ymm0 {%k1}
; LINUXOSX64-NEXT: addq $56, %rsp
Correct dwarf unwind information in function epilogue This patch aims to provide correct dwarf unwind information in function epilogue for X86. It consists of two parts. The first part inserts CFI instructions that set appropriate cfa offset and cfa register in emitEpilogue() in X86FrameLowering. This part is X86 specific. The second part is platform independent and ensures that: * CFI instructions do not affect code generation (they are not counted as instructions when tail duplicating or tail merging) * Unwind information remains correct when a function is modified by different passes. This is done in a late pass by analyzing information about cfa offset and cfa register in BBs and inserting additional CFI directives where necessary. Added CFIInstrInserter pass: * analyzes each basic block to determine cfa offset and register are valid at its entry and exit * verifies that outgoing cfa offset and register of predecessor blocks match incoming values of their successors * inserts additional CFI directives at basic block beginning to correct the rule for calculating CFA Having CFI instructions in function epilogue can cause incorrect CFA calculation rule for some basic blocks. This can happen if, due to basic block reordering, or the existence of multiple epilogue blocks, some of the blocks have wrong cfa offset and register values set by the epilogue block above them. CFIInstrInserter is currently run only on X86, but can be used by any target that implements support for adding CFI instructions in epilogue. Patch by Violeta Vukobrat. Differential Revision: https://reviews.llvm.org/D42848 llvm-svn: 330706
2018-04-24 18:32:08 +08:00
; LINUXOSX64-NEXT: .cfi_def_cfa_offset 8
; LINUXOSX64-NEXT: retq
%b = call x86_regcallcc <8 x i32> @test_argRet256Vector(<8 x i1> %x, <8 x i32> %a, <8 x i32> %a)
%c = select <8 x i1> %x, <8 x i32> %a, <8 x i32> %b
2016-10-19 19:16:58 +08:00
ret <8 x i32> %c
}
; Test regcall when receiving/returning 512 bit vector
define dso_local x86_regcallcc <16 x i32> @test_argRet512Vector(<16 x i1> %x, <16 x i32> %a, <16 x i32> %b) {
; X32-LABEL: test_argRet512Vector:
; X32: # %bb.0:
; X32-NEXT: kmovd %eax, %k1
; X32-NEXT: vpblendmd %zmm0, %zmm1, %zmm0 {%k1}
; X32-NEXT: retl
;
; WIN64-LABEL: test_argRet512Vector:
; WIN64: # %bb.0:
; WIN64-NEXT: kmovd %eax, %k1
; WIN64-NEXT: vpblendmd %zmm0, %zmm1, %zmm0 {%k1}
; WIN64-NEXT: retq
;
; LINUXOSX64-LABEL: test_argRet512Vector:
; LINUXOSX64: # %bb.0:
; LINUXOSX64-NEXT: kmovd %eax, %k1
; LINUXOSX64-NEXT: vpblendmd %zmm0, %zmm1, %zmm0 {%k1}
; LINUXOSX64-NEXT: retq
%d = select <16 x i1> %x, <16 x i32> %a, <16 x i32> %b
2016-10-19 19:16:58 +08:00
ret <16 x i32> %d
}
; Test regcall when passing/retrieving 512 bit vector
define dso_local x86_regcallcc <16 x i32> @test_CallargRet512Vector(<16 x i1> %x, <16 x i32> %a) {
; X32-LABEL: test_CallargRet512Vector:
; X32: # %bb.0:
; X32-NEXT: subl $68, %esp
; X32-NEXT: vmovdqu64 %zmm0, {{[-0-9]+}}(%e{{[sb]}}p) # 64-byte Spill
; X32-NEXT: kmovd %eax, %k1
; X32-NEXT: kmovw %k1, {{[-0-9]+}}(%e{{[sb]}}p) # 2-byte Spill
; X32-NEXT: vmovdqa64 %zmm0, %zmm1
; X32-NEXT: calll _test_argRet512Vector
; X32-NEXT: kmovw {{[-0-9]+}}(%e{{[sb]}}p), %k1 # 2-byte Reload
; X32-NEXT: vmovdqu64 {{[-0-9]+}}(%e{{[sb]}}p), %zmm1 # 64-byte Reload
; X32-NEXT: vmovdqa32 %zmm1, %zmm0 {%k1}
; X32-NEXT: addl $68, %esp
; X32-NEXT: retl
;
; WIN64-LABEL: test_CallargRet512Vector:
; WIN64: # %bb.0:
; WIN64-NEXT: subq $88, %rsp
; WIN64-NEXT: .seh_stackalloc 88
; WIN64-NEXT: .seh_endprologue
; WIN64-NEXT: vmovdqu64 %zmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 64-byte Spill
; WIN64-NEXT: kmovd %eax, %k1
; WIN64-NEXT: kmovw %k1, {{[-0-9]+}}(%r{{[sb]}}p) # 2-byte Spill
; WIN64-NEXT: vmovdqa64 %zmm0, %zmm1
; WIN64-NEXT: callq test_argRet512Vector
; WIN64-NEXT: kmovw {{[-0-9]+}}(%r{{[sb]}}p), %k1 # 2-byte Reload
; WIN64-NEXT: vmovdqu64 {{[-0-9]+}}(%r{{[sb]}}p), %zmm1 # 64-byte Reload
; WIN64-NEXT: vmovdqa32 %zmm1, %zmm0 {%k1}
; WIN64-NEXT: addq $88, %rsp
; WIN64-NEXT: retq
; WIN64-NEXT: .seh_endproc
;
; LINUXOSX64-LABEL: test_CallargRet512Vector:
; LINUXOSX64: # %bb.0:
; LINUXOSX64-NEXT: subq $88, %rsp
; LINUXOSX64-NEXT: .cfi_def_cfa_offset 96
; LINUXOSX64-NEXT: vmovdqu64 %zmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 64-byte Spill
; LINUXOSX64-NEXT: kmovd %eax, %k1
; LINUXOSX64-NEXT: kmovw %k1, {{[-0-9]+}}(%r{{[sb]}}p) # 2-byte Spill
; LINUXOSX64-NEXT: vmovdqa64 %zmm0, %zmm1
; LINUXOSX64-NEXT: callq test_argRet512Vector
; LINUXOSX64-NEXT: kmovw {{[-0-9]+}}(%r{{[sb]}}p), %k1 # 2-byte Reload
; LINUXOSX64-NEXT: vmovdqu64 {{[-0-9]+}}(%r{{[sb]}}p), %zmm1 # 64-byte Reload
; LINUXOSX64-NEXT: vmovdqa32 %zmm1, %zmm0 {%k1}
; LINUXOSX64-NEXT: addq $88, %rsp
Correct dwarf unwind information in function epilogue This patch aims to provide correct dwarf unwind information in function epilogue for X86. It consists of two parts. The first part inserts CFI instructions that set appropriate cfa offset and cfa register in emitEpilogue() in X86FrameLowering. This part is X86 specific. The second part is platform independent and ensures that: * CFI instructions do not affect code generation (they are not counted as instructions when tail duplicating or tail merging) * Unwind information remains correct when a function is modified by different passes. This is done in a late pass by analyzing information about cfa offset and cfa register in BBs and inserting additional CFI directives where necessary. Added CFIInstrInserter pass: * analyzes each basic block to determine cfa offset and register are valid at its entry and exit * verifies that outgoing cfa offset and register of predecessor blocks match incoming values of their successors * inserts additional CFI directives at basic block beginning to correct the rule for calculating CFA Having CFI instructions in function epilogue can cause incorrect CFA calculation rule for some basic blocks. This can happen if, due to basic block reordering, or the existence of multiple epilogue blocks, some of the blocks have wrong cfa offset and register values set by the epilogue block above them. CFIInstrInserter is currently run only on X86, but can be used by any target that implements support for adding CFI instructions in epilogue. Patch by Violeta Vukobrat. Differential Revision: https://reviews.llvm.org/D42848 llvm-svn: 330706
2018-04-24 18:32:08 +08:00
; LINUXOSX64-NEXT: .cfi_def_cfa_offset 8
; LINUXOSX64-NEXT: retq
%b = call x86_regcallcc <16 x i32> @test_argRet512Vector(<16 x i1> %x, <16 x i32> %a, <16 x i32> %a)
%c = select <16 x i1> %x, <16 x i32> %a, <16 x i32> %b
2016-10-19 19:16:58 +08:00
ret <16 x i32> %c
}
; Test regcall when running multiple input parameters - callee saved xmms
define dso_local x86_regcallcc <32 x float> @testf32_inp(<32 x float> %a, <32 x float> %b, <32 x float> %c) nounwind {
; X32-LABEL: testf32_inp:
; X32: # %bb.0:
; X32-NEXT: subl $44, %esp
; X32-NEXT: vmovups %xmm7, {{[-0-9]+}}(%e{{[sb]}}p) # 16-byte Spill
; X32-NEXT: vmovups %xmm6, (%esp) # 16-byte Spill
; X32-NEXT: vaddps %zmm2, %zmm0, %zmm6
; X32-NEXT: vaddps %zmm3, %zmm1, %zmm7
; X32-NEXT: vmulps %zmm2, %zmm0, %zmm0
; X32-NEXT: vsubps %zmm0, %zmm6, %zmm0
; X32-NEXT: vmulps %zmm3, %zmm1, %zmm1
; X32-NEXT: vsubps %zmm1, %zmm7, %zmm1
; X32-NEXT: vaddps %zmm4, %zmm0, %zmm0
; X32-NEXT: vaddps %zmm5, %zmm1, %zmm1
; X32-NEXT: vmovups (%esp), %xmm6 # 16-byte Reload
; X32-NEXT: vmovups {{[-0-9]+}}(%e{{[sb]}}p), %xmm7 # 16-byte Reload
; X32-NEXT: addl $44, %esp
; X32-NEXT: retl
;
; WIN64-LABEL: testf32_inp:
; WIN64: # %bb.0:
; WIN64-NEXT: vaddps %zmm2, %zmm0, %zmm6
; WIN64-NEXT: vaddps %zmm3, %zmm1, %zmm7
; WIN64-NEXT: vmulps %zmm2, %zmm0, %zmm0
; WIN64-NEXT: vsubps %zmm0, %zmm6, %zmm0
; WIN64-NEXT: vmulps %zmm3, %zmm1, %zmm1
; WIN64-NEXT: vsubps %zmm1, %zmm7, %zmm1
; WIN64-NEXT: vaddps %zmm4, %zmm0, %zmm0
; WIN64-NEXT: vaddps %zmm5, %zmm1, %zmm1
; WIN64-NEXT: retq
;
; LINUXOSX64-LABEL: testf32_inp:
; LINUXOSX64: # %bb.0:
; LINUXOSX64-NEXT: vaddps %zmm2, %zmm0, %zmm6
; LINUXOSX64-NEXT: vaddps %zmm3, %zmm1, %zmm7
; LINUXOSX64-NEXT: vmulps %zmm2, %zmm0, %zmm0
; LINUXOSX64-NEXT: vsubps %zmm0, %zmm6, %zmm0
; LINUXOSX64-NEXT: vmulps %zmm3, %zmm1, %zmm1
; LINUXOSX64-NEXT: vsubps %zmm1, %zmm7, %zmm1
; LINUXOSX64-NEXT: vaddps %zmm4, %zmm0, %zmm0
; LINUXOSX64-NEXT: vaddps %zmm5, %zmm1, %zmm1
; LINUXOSX64-NEXT: retq
2016-10-19 19:16:58 +08:00
%x1 = fadd <32 x float> %a, %b
%x2 = fmul <32 x float> %a, %b
%x3 = fsub <32 x float> %x1, %x2
%x4 = fadd <32 x float> %x3, %c
ret <32 x float> %x4
}
; Test regcall when running multiple input parameters - callee saved GPRs
define dso_local x86_regcallcc i32 @testi32_inp(i32 %a1, i32 %a2, i32 %a3, i32 %a4, i32 %a5, i32 %a6, i32 %b1, i32 %b2, i32 %b3, i32 %b4, i32 %b5, i32 %b6) nounwind {
; X32-LABEL: testi32_inp:
; X32: # %bb.0:
; X32-NEXT: pushl %ebp
; X32-NEXT: pushl %ebx
; X32-NEXT: subl $12, %esp
; X32-NEXT: movl %esi, (%esp) # 4-byte Spill
; X32-NEXT: movl %edi, %esi
; X32-NEXT: movl %edx, %ebx
; X32-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
; X32-NEXT: movl %eax, %edi
; X32-NEXT: leal (%edx,%esi), %eax
; X32-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
; X32-NEXT: subl %esi, %ebx
; X32-NEXT: movl %edi, %eax
; X32-NEXT: subl %ecx, %eax
RegAllocGreedy: Account for reserved registers in num regs heuristic This simple heuristic uses the estimated live range length combined with the number of registers in the class to switch which heuristic to use. This was taking the raw number of registers in the class, even though not all of them may be available. AMDGPU heavily relies on dynamically reserved numbers of registers based on user attributes to satisfy occupancy constraints, so the raw number is highly misleading. There are still a few problems here. In the original testcase that made me notice this, the live range size is incorrect after the scheduler rearranges instructions, since the instructions don't have the original InstrDist offsets. Additionally, I think it would be more appropriate to use the number of disjointly allocatable registers in the class. For the AMDGPU register tuples, there are a large number of registers in each tuple class, but only a small fraction can actually be allocated at the same time since they all overlap with each other. It seems we do not have a query that corresponds to the number of independently allocatable registers. Relatedly, I'm still debugging some allocation failures where overlapping tuples seem to not be handled correctly. The test changes are mostly noise. There are a handful of x86 tests that look like regressions with an additional spill, and a handful that now avoid a spill. The worst looking regression is likely test/Thumb2/mve-vld4.ll which introduces a few additional spills. test/CodeGen/AMDGPU/soft-clause-exceeds-register-budget.ll shows a massive improvement by completely eliminating a large number of spills inside a loop.
2021-08-22 02:54:51 +08:00
; X32-NEXT: movl {{[0-9]+}}(%esp), %ebp
; X32-NEXT: movl %ebp, %ecx
; X32-NEXT: subl {{[0-9]+}}(%esp), %ecx
; X32-NEXT: imull %eax, %ecx
; X32-NEXT: movl {{[0-9]+}}(%esp), %esi
; X32-NEXT: movl %esi, %eax
RegAllocGreedy: Account for reserved registers in num regs heuristic This simple heuristic uses the estimated live range length combined with the number of registers in the class to switch which heuristic to use. This was taking the raw number of registers in the class, even though not all of them may be available. AMDGPU heavily relies on dynamically reserved numbers of registers based on user attributes to satisfy occupancy constraints, so the raw number is highly misleading. There are still a few problems here. In the original testcase that made me notice this, the live range size is incorrect after the scheduler rearranges instructions, since the instructions don't have the original InstrDist offsets. Additionally, I think it would be more appropriate to use the number of disjointly allocatable registers in the class. For the AMDGPU register tuples, there are a large number of registers in each tuple class, but only a small fraction can actually be allocated at the same time since they all overlap with each other. It seems we do not have a query that corresponds to the number of independently allocatable registers. Relatedly, I'm still debugging some allocation failures where overlapping tuples seem to not be handled correctly. The test changes are mostly noise. There are a handful of x86 tests that look like regressions with an additional spill, and a handful that now avoid a spill. The worst looking regression is likely test/Thumb2/mve-vld4.ll which introduces a few additional spills. test/CodeGen/AMDGPU/soft-clause-exceeds-register-budget.ll shows a massive improvement by completely eliminating a large number of spills inside a loop.
2021-08-22 02:54:51 +08:00
; X32-NEXT: subl {{[0-9]+}}(%esp), %eax
; X32-NEXT: imull %ebx, %eax
; X32-NEXT: addl %ecx, %eax
; X32-NEXT: movl (%esp), %ebx # 4-byte Reload
; X32-NEXT: subl {{[0-9]+}}(%esp), %ebx
; X32-NEXT: movl {{[0-9]+}}(%esp), %edx
; X32-NEXT: movl %edx, %ecx
; X32-NEXT: subl {{[0-9]+}}(%esp), %ecx
; X32-NEXT: imull %ebx, %ecx
; X32-NEXT: addl %eax, %ecx
; X32-NEXT: addl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Folded Reload
; X32-NEXT: movl {{[0-9]+}}(%esp), %eax
; X32-NEXT: addl (%esp), %eax # 4-byte Folded Reload
RegAllocGreedy: Account for reserved registers in num regs heuristic This simple heuristic uses the estimated live range length combined with the number of registers in the class to switch which heuristic to use. This was taking the raw number of registers in the class, even though not all of them may be available. AMDGPU heavily relies on dynamically reserved numbers of registers based on user attributes to satisfy occupancy constraints, so the raw number is highly misleading. There are still a few problems here. In the original testcase that made me notice this, the live range size is incorrect after the scheduler rearranges instructions, since the instructions don't have the original InstrDist offsets. Additionally, I think it would be more appropriate to use the number of disjointly allocatable registers in the class. For the AMDGPU register tuples, there are a large number of registers in each tuple class, but only a small fraction can actually be allocated at the same time since they all overlap with each other. It seems we do not have a query that corresponds to the number of independently allocatable registers. Relatedly, I'm still debugging some allocation failures where overlapping tuples seem to not be handled correctly. The test changes are mostly noise. There are a handful of x86 tests that look like regressions with an additional spill, and a handful that now avoid a spill. The worst looking regression is likely test/Thumb2/mve-vld4.ll which introduces a few additional spills. test/CodeGen/AMDGPU/soft-clause-exceeds-register-budget.ll shows a massive improvement by completely eliminating a large number of spills inside a loop.
2021-08-22 02:54:51 +08:00
; X32-NEXT: addl {{[0-9]+}}(%esp), %ebp
; X32-NEXT: imull %ebp, %edi
; X32-NEXT: addl {{[0-9]+}}(%esp), %esi
; X32-NEXT: imull {{[0-9]+}}(%esp), %esi
; X32-NEXT: addl %esi, %edi
; X32-NEXT: addl {{[0-9]+}}(%esp), %edx
; X32-NEXT: imull %eax, %edx
; X32-NEXT: addl %edx, %edi
; X32-NEXT: addl %ecx, %edi
; X32-NEXT: movl %edi, %eax
; X32-NEXT: addl $12, %esp
; X32-NEXT: popl %ebx
; X32-NEXT: popl %ebp
; X32-NEXT: retl
;
; WIN64-LABEL: testi32_inp:
; WIN64: # %bb.0:
; WIN64-NEXT: pushq %r13
; WIN64-NEXT: pushq %rbp
; WIN64-NEXT: pushq %rbx
; WIN64-NEXT: # kill: def $edx killed $edx def $rdx
; WIN64-NEXT: movl %ecx, %ebx
; WIN64-NEXT: # kill: def $esi killed $esi def $rsi
; WIN64-NEXT: # kill: def $r15d killed $r15d def $r15
; WIN64-NEXT: # kill: def $r14d killed $r14d def $r14
; WIN64-NEXT: # kill: def $r12d killed $r12d def $r12
; WIN64-NEXT: # kill: def $r11d killed $r11d def $r11
; WIN64-NEXT: # kill: def $r10d killed $r10d def $r10
; WIN64-NEXT: # kill: def $r9d killed $r9d def $r9
; WIN64-NEXT: # kill: def $r8d killed $r8d def $r8
; WIN64-NEXT: # kill: def $edi killed $edi def $rdi
; WIN64-NEXT: leal (%rdx,%rdi), %r13d
; WIN64-NEXT: # kill: def $edx killed $edx killed $rdx
; WIN64-NEXT: subl %edi, %edx
; WIN64-NEXT: leal (%rsi,%r8), %ecx
; WIN64-NEXT: # kill: def $esi killed $esi killed $rsi
; WIN64-NEXT: subl %r8d, %esi
; WIN64-NEXT: leal (%r9,%r10), %r8d
; WIN64-NEXT: movl %r9d, %ebp
; WIN64-NEXT: subl %r10d, %ebp
; WIN64-NEXT: movl %eax, %edi
; WIN64-NEXT: movl %ebx, %r9d
; WIN64-NEXT: subl %ebx, %edi
; WIN64-NEXT: imull %edi, %ebp
; WIN64-NEXT: leal (%r11,%r12), %edi
; WIN64-NEXT: movl %r11d, %ebx
; WIN64-NEXT: subl %r12d, %ebx
; WIN64-NEXT: imull %edx, %ebx
; WIN64-NEXT: addl %ebp, %ebx
; WIN64-NEXT: leal (%r14,%r15), %edx
; WIN64-NEXT: movl %r14d, %ebp
; WIN64-NEXT: subl %r15d, %ebp
; WIN64-NEXT: imull %esi, %ebp
; WIN64-NEXT: addl %ebx, %ebp
; WIN64-NEXT: addl %r9d, %eax
; WIN64-NEXT: imull %r8d, %eax
; WIN64-NEXT: imull %r13d, %edi
; WIN64-NEXT: addl %edi, %eax
; WIN64-NEXT: imull %ecx, %edx
; WIN64-NEXT: addl %edx, %eax
; WIN64-NEXT: addl %ebp, %eax
; WIN64-NEXT: popq %rbx
; WIN64-NEXT: popq %rbp
; WIN64-NEXT: popq %r13
; WIN64-NEXT: retq
;
; LINUXOSX64-LABEL: testi32_inp:
; LINUXOSX64: # %bb.0:
; LINUXOSX64-NEXT: pushq %rbp
; LINUXOSX64-NEXT: pushq %rbx
; LINUXOSX64-NEXT: # kill: def $edx killed $edx def $rdx
; LINUXOSX64-NEXT: # kill: def $esi killed $esi def $rsi
; LINUXOSX64-NEXT: # kill: def $r14d killed $r14d def $r14
; LINUXOSX64-NEXT: # kill: def $r13d killed $r13d def $r13
; LINUXOSX64-NEXT: # kill: def $r12d killed $r12d def $r12
; LINUXOSX64-NEXT: # kill: def $r9d killed $r9d def $r9
; LINUXOSX64-NEXT: # kill: def $r8d killed $r8d def $r8
; LINUXOSX64-NEXT: # kill: def $edi killed $edi def $rdi
; LINUXOSX64-NEXT: leal (%rdx,%rdi), %r10d
; LINUXOSX64-NEXT: movl %edx, %ebp
; LINUXOSX64-NEXT: subl %edi, %ebp
; LINUXOSX64-NEXT: leal (%rsi,%r8), %r11d
; LINUXOSX64-NEXT: # kill: def $esi killed $esi killed $rsi
; LINUXOSX64-NEXT: subl %r8d, %esi
; LINUXOSX64-NEXT: leal (%r9,%r12), %r8d
; LINUXOSX64-NEXT: movl %r9d, %edi
; LINUXOSX64-NEXT: subl %r12d, %edi
; LINUXOSX64-NEXT: movl %eax, %edx
; LINUXOSX64-NEXT: subl %ecx, %edx
; LINUXOSX64-NEXT: imull %edx, %edi
; LINUXOSX64-NEXT: leal (%r13,%r14), %edx
; LINUXOSX64-NEXT: movl %r13d, %ebx
; LINUXOSX64-NEXT: subl %r14d, %ebx
; LINUXOSX64-NEXT: imull %ebp, %ebx
; LINUXOSX64-NEXT: movl 24(%rsp), %ebp
; LINUXOSX64-NEXT: addl %edi, %ebx
; LINUXOSX64-NEXT: movl %r15d, %edi
; LINUXOSX64-NEXT: subl %ebp, %edi
; LINUXOSX64-NEXT: imull %esi, %edi
; LINUXOSX64-NEXT: addl %ebx, %edi
; LINUXOSX64-NEXT: addl %ecx, %eax
; LINUXOSX64-NEXT: imull %r8d, %eax
; LINUXOSX64-NEXT: imull %r10d, %edx
; LINUXOSX64-NEXT: addl %edx, %eax
; LINUXOSX64-NEXT: addl %r15d, %ebp
; LINUXOSX64-NEXT: imull %r11d, %ebp
; LINUXOSX64-NEXT: addl %ebp, %eax
; LINUXOSX64-NEXT: addl %edi, %eax
; LINUXOSX64-NEXT: popq %rbx
; LINUXOSX64-NEXT: popq %rbp
; LINUXOSX64-NEXT: retq
2016-10-19 19:16:58 +08:00
%x1 = sub i32 %a1, %a2
%x2 = sub i32 %a3, %a4
%x3 = sub i32 %a5, %a6
%y1 = sub i32 %b1, %b2
%y2 = sub i32 %b3, %b4
%y3 = sub i32 %b5, %b6
%v1 = add i32 %a1, %a2
%v2 = add i32 %a3, %a4
%v3 = add i32 %a5, %a6
%w1 = add i32 %b1, %b2
%w2 = add i32 %b3, %b4
%w3 = add i32 %b5, %b6
%s1 = mul i32 %x1, %y1
%s2 = mul i32 %x2, %y2
%s3 = mul i32 %x3, %y3
%t1 = mul i32 %v1, %w1
%t2 = mul i32 %v2, %w2
%t3 = mul i32 %v3, %w3
%m1 = add i32 %s1, %s2
%m2 = add i32 %m1, %s3
%n1 = add i32 %t1, %t2
%n2 = add i32 %n1, %t3
%r1 = add i32 %m2, %n2
ret i32 %r1
}
; Test that parameters, overflowing register capacity, are passed through the stack
define dso_local x86_regcallcc <32 x float> @testf32_stack(<32 x float> %a0, <32 x float> %b0, <32 x float> %c0, <32 x float> %a1, <32 x float> %b1, <32 x float> %c1, <32 x float> %a2, <32 x float> %b2, <32 x float> %c2) nounwind {
; X32-LABEL: testf32_stack:
; X32: # %bb.0:
; X32-NEXT: pushl %ebp
; X32-NEXT: movl %esp, %ebp
; X32-NEXT: andl $-64, %esp
; X32-NEXT: subl $64, %esp
; X32-NEXT: vaddps %zmm3, %zmm1, %zmm1
; X32-NEXT: vaddps %zmm2, %zmm0, %zmm0
; X32-NEXT: vaddps %zmm0, %zmm4, %zmm0
; X32-NEXT: vaddps %zmm1, %zmm5, %zmm1
; X32-NEXT: vaddps %zmm1, %zmm7, %zmm1
; X32-NEXT: vaddps %zmm0, %zmm6, %zmm0
; X32-NEXT: vaddps 8(%ebp), %zmm0, %zmm0
; X32-NEXT: vaddps 72(%ebp), %zmm1, %zmm1
; X32-NEXT: vaddps 200(%ebp), %zmm1, %zmm1
; X32-NEXT: vaddps 136(%ebp), %zmm0, %zmm0
; X32-NEXT: vaddps 264(%ebp), %zmm0, %zmm0
; X32-NEXT: vaddps 328(%ebp), %zmm1, %zmm1
; X32-NEXT: vaddps 456(%ebp), %zmm1, %zmm1
; X32-NEXT: vaddps 392(%ebp), %zmm0, %zmm0
; X32-NEXT: vaddps 520(%ebp), %zmm0, %zmm0
; X32-NEXT: vaddps 584(%ebp), %zmm1, %zmm1
; X32-NEXT: movl %ebp, %esp
; X32-NEXT: popl %ebp
; X32-NEXT: retl
;
; WIN64-LABEL: testf32_stack:
; WIN64: # %bb.0:
; WIN64-NEXT: pushq %rbp
; WIN64-NEXT: subq $48, %rsp
; WIN64-NEXT: leaq {{[0-9]+}}(%rsp), %rbp
; WIN64-NEXT: andq $-64, %rsp
; WIN64-NEXT: vaddps %zmm3, %zmm1, %zmm1
; WIN64-NEXT: vaddps %zmm2, %zmm0, %zmm0
; WIN64-NEXT: vaddps %zmm0, %zmm4, %zmm0
; WIN64-NEXT: vaddps %zmm1, %zmm5, %zmm1
; WIN64-NEXT: vaddps %zmm1, %zmm7, %zmm1
; WIN64-NEXT: vaddps %zmm0, %zmm6, %zmm0
; WIN64-NEXT: vaddps %zmm0, %zmm8, %zmm0
; WIN64-NEXT: vaddps %zmm1, %zmm9, %zmm1
; WIN64-NEXT: vaddps %zmm1, %zmm11, %zmm1
; WIN64-NEXT: vaddps %zmm0, %zmm10, %zmm0
; WIN64-NEXT: vaddps %zmm0, %zmm12, %zmm0
; WIN64-NEXT: vaddps %zmm1, %zmm13, %zmm1
; WIN64-NEXT: vaddps %zmm1, %zmm15, %zmm1
; WIN64-NEXT: vaddps %zmm0, %zmm14, %zmm0
; WIN64-NEXT: vaddps 16(%rbp), %zmm0, %zmm0
; WIN64-NEXT: vaddps 80(%rbp), %zmm1, %zmm1
; WIN64-NEXT: movq %rbp, %rsp
; WIN64-NEXT: popq %rbp
; WIN64-NEXT: retq
;
; LINUXOSX64-LABEL: testf32_stack:
; LINUXOSX64: # %bb.0:
; LINUXOSX64-NEXT: pushq %rbp
; LINUXOSX64-NEXT: movq %rsp, %rbp
; LINUXOSX64-NEXT: andq $-64, %rsp
; LINUXOSX64-NEXT: subq $64, %rsp
; LINUXOSX64-NEXT: vaddps %zmm3, %zmm1, %zmm1
; LINUXOSX64-NEXT: vaddps %zmm2, %zmm0, %zmm0
; LINUXOSX64-NEXT: vaddps %zmm0, %zmm4, %zmm0
; LINUXOSX64-NEXT: vaddps %zmm1, %zmm5, %zmm1
; LINUXOSX64-NEXT: vaddps %zmm1, %zmm7, %zmm1
; LINUXOSX64-NEXT: vaddps %zmm0, %zmm6, %zmm0
; LINUXOSX64-NEXT: vaddps %zmm0, %zmm8, %zmm0
; LINUXOSX64-NEXT: vaddps %zmm1, %zmm9, %zmm1
; LINUXOSX64-NEXT: vaddps %zmm1, %zmm11, %zmm1
; LINUXOSX64-NEXT: vaddps %zmm0, %zmm10, %zmm0
; LINUXOSX64-NEXT: vaddps %zmm0, %zmm12, %zmm0
; LINUXOSX64-NEXT: vaddps %zmm1, %zmm13, %zmm1
; LINUXOSX64-NEXT: vaddps %zmm1, %zmm15, %zmm1
; LINUXOSX64-NEXT: vaddps %zmm0, %zmm14, %zmm0
; LINUXOSX64-NEXT: vaddps 16(%rbp), %zmm0, %zmm0
; LINUXOSX64-NEXT: vaddps 80(%rbp), %zmm1, %zmm1
; LINUXOSX64-NEXT: movq %rbp, %rsp
; LINUXOSX64-NEXT: popq %rbp
; LINUXOSX64-NEXT: retq
2016-10-19 19:16:58 +08:00
%x1 = fadd <32 x float> %a0, %b0
%x2 = fadd <32 x float> %c0, %x1
%x3 = fadd <32 x float> %a1, %x2
%x4 = fadd <32 x float> %b1, %x3
%x5 = fadd <32 x float> %c1, %x4
%x6 = fadd <32 x float> %a2, %x5
%x7 = fadd <32 x float> %b2, %x6
%x8 = fadd <32 x float> %c2, %x7
ret <32 x float> %x8
}
; Test regcall when passing/retrieving mixed types
define dso_local x86_regcallcc i32 @test_argRetMixTypes(double, float, i8 signext, i32, i64, i16 signext, i32*) #0 {
; X32-LABEL: test_argRetMixTypes:
; X32: # %bb.0:
; X32-NEXT: pushl %ebx
; X32-NEXT: movl {{[0-9]+}}(%esp), %ebx
; X32-NEXT: vcvtss2sd %xmm1, %xmm1, %xmm1
; X32-NEXT: vaddsd %xmm0, %xmm1, %xmm0
; X32-NEXT: vcvtsi2sd %eax, %xmm2, %xmm1
; X32-NEXT: vaddsd %xmm1, %xmm0, %xmm0
; X32-NEXT: vcvtsi2sd %ecx, %xmm2, %xmm1
; X32-NEXT: vaddsd %xmm1, %xmm0, %xmm0
; X32-NEXT: vmovd %edx, %xmm1
; X32-NEXT: vpinsrd $1, %edi, %xmm1, %xmm1
; X32-NEXT: vcvtqq2pd %ymm1, %ymm1
; X32-NEXT: vaddsd %xmm1, %xmm0, %xmm0
; X32-NEXT: vcvtsi2sd %esi, %xmm2, %xmm1
; X32-NEXT: vaddsd %xmm1, %xmm0, %xmm0
; X32-NEXT: vcvtsi2sdl (%ebx), %xmm2, %xmm1
; X32-NEXT: vaddsd %xmm1, %xmm0, %xmm0
; X32-NEXT: vcvttsd2si %xmm0, %eax
; X32-NEXT: popl %ebx
; X32-NEXT: vzeroupper
; X32-NEXT: retl
;
; WIN64-LABEL: test_argRetMixTypes:
; WIN64: # %bb.0:
; WIN64-NEXT: vcvtss2sd %xmm1, %xmm1, %xmm1
; WIN64-NEXT: vaddsd %xmm0, %xmm1, %xmm0
; WIN64-NEXT: vcvtsi2sd %eax, %xmm2, %xmm1
; WIN64-NEXT: vaddsd %xmm1, %xmm0, %xmm0
; WIN64-NEXT: vcvtsi2sd %ecx, %xmm2, %xmm1
; WIN64-NEXT: vaddsd %xmm1, %xmm0, %xmm0
; WIN64-NEXT: vcvtsi2sd %rdx, %xmm2, %xmm1
; WIN64-NEXT: vaddsd %xmm1, %xmm0, %xmm0
; WIN64-NEXT: vcvtsi2sd %edi, %xmm2, %xmm1
; WIN64-NEXT: vaddsd %xmm1, %xmm0, %xmm0
; WIN64-NEXT: vcvtsi2sdl (%rsi), %xmm2, %xmm1
; WIN64-NEXT: vaddsd %xmm1, %xmm0, %xmm0
; WIN64-NEXT: vcvttsd2si %xmm0, %eax
; WIN64-NEXT: retq
;
; LINUXOSX64-LABEL: test_argRetMixTypes:
; LINUXOSX64: # %bb.0:
; LINUXOSX64-NEXT: vcvtss2sd %xmm1, %xmm1, %xmm1
; LINUXOSX64-NEXT: vaddsd %xmm0, %xmm1, %xmm0
; LINUXOSX64-NEXT: vcvtsi2sd %eax, %xmm2, %xmm1
; LINUXOSX64-NEXT: vaddsd %xmm1, %xmm0, %xmm0
; LINUXOSX64-NEXT: vcvtsi2sd %ecx, %xmm2, %xmm1
; LINUXOSX64-NEXT: vaddsd %xmm1, %xmm0, %xmm0
; LINUXOSX64-NEXT: vcvtsi2sd %rdx, %xmm2, %xmm1
; LINUXOSX64-NEXT: vaddsd %xmm1, %xmm0, %xmm0
; LINUXOSX64-NEXT: vcvtsi2sd %edi, %xmm2, %xmm1
; LINUXOSX64-NEXT: vaddsd %xmm1, %xmm0, %xmm0
; LINUXOSX64-NEXT: vcvtsi2sdl (%rsi), %xmm2, %xmm1
; LINUXOSX64-NEXT: vaddsd %xmm1, %xmm0, %xmm0
; LINUXOSX64-NEXT: vcvttsd2si %xmm0, %eax
; LINUXOSX64-NEXT: retq
2016-10-19 19:16:58 +08:00
%8 = fpext float %1 to double
%9 = fadd double %8, %0
%10 = sitofp i8 %2 to double
%11 = fadd double %9, %10
%12 = sitofp i32 %3 to double
%13 = fadd double %11, %12
%14 = sitofp i64 %4 to double
%15 = fadd double %13, %14
%16 = sitofp i16 %5 to double
%17 = fadd double %15, %16
%18 = load i32, i32* %6, align 4
%19 = sitofp i32 %18 to double
%20 = fadd double %17, %19
%21 = fptosi double %20 to i32
ret i32 %21
}
%struct.complex = type { float, double, i32, i8, i64}
define x86_regcallcc %struct.complex @test_argMultiRet(float, double, i32, i8, i64) local_unnamed_addr #0 {
; X32-LABEL: test_argMultiRet:
; X32: # %bb.0:
; X32-NEXT: vaddsd __real@4014000000000000, %xmm1, %xmm1
; X32-NEXT: movl $4, %eax
; X32-NEXT: movb $7, %cl
; X32-NEXT: movl $999, %edx # imm = 0x3E7
; X32-NEXT: xorl %edi, %edi
; X32-NEXT: retl
;
; WIN64-LABEL: test_argMultiRet:
; WIN64: # %bb.0:
; WIN64-NEXT: vaddsd __real@4014000000000000(%rip), %xmm1, %xmm1
; WIN64-NEXT: movl $999, %edx # imm = 0x3E7
; WIN64-NEXT: movl $4, %eax
; WIN64-NEXT: movb $7, %cl
; WIN64-NEXT: retq
;
; LINUXOSX64-LABEL: test_argMultiRet:
; LINUXOSX64: # %bb.0:
; LINUXOSX64-NEXT: vaddsd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1, %xmm1
; LINUXOSX64-NEXT: movl $999, %edx # imm = 0x3E7
; LINUXOSX64-NEXT: movl $4, %eax
; LINUXOSX64-NEXT: movb $7, %cl
; LINUXOSX64-NEXT: retq
2016-10-19 19:16:58 +08:00
%6 = fadd double %1, 5.000000e+00
%7 = insertvalue %struct.complex undef, float %0, 0
%8 = insertvalue %struct.complex %7, double %6, 1
%9 = insertvalue %struct.complex %8, i32 4, 2
%10 = insertvalue %struct.complex %9, i8 7, 3
%11 = insertvalue %struct.complex %10, i64 999, 4
ret %struct.complex %11
}