llvm-project/llvm/test/CodeGen/X86/hipe-cc64.ll

; RUN: llc < %s -stack-symbol-ordering=0 -tailcallopt -relocation-model=static -code-model=medium -stack-alignment=8 -mtriple=x86_64-linux-gnu -mcpu=opteron | FileCheck %s

; Check the HiPE calling convention works (x86-64)

define void @zap(i64 %a, i64 %b) nounwind {
entry:
  ; CHECK:      movq %rsi, %rdx
  ; CHECK-NEXT: movl $8, %ecx
  ; CHECK-NEXT: movl $9, %r8d
  ; CHECK-NEXT: movq %rdi, %rsi
  ; CHECK-NEXT: callq addfour
  %0 = call cc 11 {i64, i64, i64} @addfour(i64 undef, i64 undef, i64 %a, i64 %b, i64 8, i64 9)
  %res = extractvalue {i64, i64, i64} %0, 2

  ; CHECK:      movl $1, %edx
  ; CHECK-NEXT: movl $2, %ecx
  ; CHECK-NEXT: movl $3, %r8d
  ; CHECK-NEXT: movq %rax, %r9
  ; CHECK:      callq foo
  tail call void @foo(i64 undef, i64 undef, i64 1, i64 2, i64 3, i64 %res) nounwind
  ret void
}

define cc 11 {i64, i64, i64} @addfour(i64 %hp, i64 %p, i64 %x, i64 %y, i64 %z, i64 %w) nounwind {
entry:
  ; CHECK:      leaq (%rsi,%rdx), %rax
  ; CHECK-NEXT: addq %rcx, %rax
  ; CHECK-NEXT: addq %r8, %rax
  %0 = add i64 %x, %y
  %1 = add i64 %0, %z
  %2 = add i64 %1, %w

  ; CHECK:      ret
  %res = insertvalue {i64, i64, i64} undef, i64 %2, 2
  ret {i64, i64, i64} %res
}

define cc 11 void @foo(i64 %hp, i64 %p, i64 %arg0, i64 %arg1, i64 %arg2, i64 %arg3) nounwind {
entry:
  ; CHECK:      movq  %r15, 40(%rsp)
  ; CHECK-NEXT: movq  %rbp, 32(%rsp)
  ; CHECK-NEXT: movq  %rsi, 24(%rsp)
  ; CHECK-NEXT: movq  %rdx, 16(%rsp)
  ; CHECK-NEXT: movq  %rcx, 8(%rsp)
  ; CHECK-NEXT: movq  %r8, (%rsp)
  %hp_var   = alloca i64
  %p_var    = alloca i64
  %arg0_var = alloca i64
  %arg1_var = alloca i64
  %arg2_var = alloca i64
  %arg3_var = alloca i64
  store i64 %hp, i64* %hp_var
  store i64 %p, i64* %p_var
  store i64 %arg0, i64* %arg0_var
  store i64 %arg1, i64* %arg1_var
  store i64 %arg2, i64* %arg2_var
  store i64 %arg3, i64* %arg3_var

  ; Loads are reading values just writen from corresponding register and are therefore noops. 
  %0 = load i64, i64* %hp_var
  %1 = load i64, i64* %p_var
  %2 = load i64, i64* %arg0_var
  %3 = load i64, i64* %arg1_var
  %4 = load i64, i64* %arg2_var
  %5 = load i64, i64* %arg3_var
  ; CHECK:      jmp bar
  tail call cc 11 void @bar(i64 %0, i64 %1, i64 %2, i64 %3, i64 %4, i64 %5) nounwind
  ret void
}

define cc 11 void @baz() nounwind {
  %tmp_clos = load i64, i64* @clos
  %tmp_clos2 = inttoptr i64 %tmp_clos to i64*
  %indirect_call = bitcast i64* %tmp_clos2 to void (i64, i64, i64)*
  ; CHECK:      movl $42, %esi
  ; CHECK-NEXT: jmpq *(%rax)
  tail call cc 11 void %indirect_call(i64 undef, i64 undef, i64 42) nounwind
  ret void
}

; Sanity-check the tail call sequence. Number of arguments was chosen as to
; expose a bug where the tail call sequence clobbered the stack.
define cc 11 { i64, i64, i64 } @tailcaller(i64 %hp, i64 %p) #0 {
  ; CHECK:      movl	$15, %esi
  ; CHECK-NEXT: movl	$31, %edx
  ; CHECK-NEXT: movl	$47, %ecx
  ; CHECK-NEXT: movl	$63, %r8d
  ; CHECK-NEXT: popq	%rax
  ; CHECK-NEXT: .cfi_def_cfa_offset 16
  ; CHECK-NEXT: jmp	tailcallee
  %ret = tail call cc11 { i64, i64, i64 } @tailcallee(i64 %hp, i64 %p, i64 15,
     i64 31, i64 47, i64 63, i64 79) #1
  ret { i64, i64, i64 } %ret
}

!hipe.literals = !{ !0, !1, !2 }
!0 = !{ !"P_NSP_LIMIT", i32 160 }
!1 = !{ !"X86_LEAF_WORDS", i32 24 }
!2 = !{ !"AMD64_LEAF_WORDS", i32 24 }
@clos = external constant i64
declare cc 11 void @bar(i64, i64, i64, i64, i64, i64)
declare cc 11 { i64, i64, i64 } @tailcallee(i64, i64, i64, i64, i64, i64, i64)
Re-land r335297 "[X86] Implement more of x86-64 large and medium PIC code models" Don't try to generate large PIC code for non-ELF targets. Neither COFF nor MachO have relocations for large position independent code, and users have been using "large PIC" code models to JIT 64-bit code for a while now. With this change, if they are generating ELF code, their JITed code will truly be PIC, but if they target MachO or COFF, it will contain 64-bit immediates that directly reference external symbols. For a JIT, that's perfectly fine. llvm-svn: 337740 2018-07-24 05:14:35 +08:00			`; RUN: llc < %s -stack-symbol-ordering=0 -tailcallopt -relocation-model=static -code-model=medium -stack-alignment=8 -mtriple=x86_64-linux-gnu -mcpu=opteron \| FileCheck %s`
Add the Erlang/HiPE calling convention, patch by Yiannis Tsiouris. llvm-svn: 168166 2012-11-16 20:36:39 +08:00
			`; Check the HiPE calling convention works (x86-64)`

			`define void @zap(i64 %a, i64 %b) nounwind {`
			`entry:`
[X86] Handle COPYs of physregs better (regalloc hints) Enable enableMultipleCopyHints() on X86. Original Patch by @jonpa: While enabling the mischeduler for SystemZ, it was discovered that for some reason a test needed one extra seemingly needless COPY (test/CodeGen/SystemZ/call-03.ll). The handling for that is resulted in this patch, which improves the register coalescing by providing not just one copy hint, but a sorted list of copy hints. On SystemZ, this gives ~12500 less register moves on SPEC, as well as marginally less spilling. Instead of improving just the SystemZ backend, the improvement has been implemented in common-code (calculateSpillWeightAndHint(). This gives a lot of test failures, but since this should be a general improvement I hope that the involved targets will help and review the test updates. Differential Revision: https://reviews.llvm.org/D38128 llvm-svn: 342578 2018-09-20 02:59:08 +08:00			`; CHECK: movq %rsi, %rdx`
Revert "Temporarily enable MI-Sched on X86." This reverts commit 98a9b72e8c56dc13a2617de84503a3d78352789c. llvm-svn: 184823 2013-06-25 10:48:58 +08:00			`; CHECK-NEXT: movl $8, %ecx`
			`; CHECK-NEXT: movl $9, %r8d`
Enable MI Sched for x86. This changes the SelectionDAG scheduling preference to source order. Soon, the SelectionDAG scheduler can be bypassed saving a nice chunk of compile time. Performance differences that result from this change are often a consequence of register coalescing. The register coalescer is far from perfect. Bugs can be filed for deficiencies. On x86 SandyBridge/Haswell, the source order schedule is often preserved, particularly for small blocks. Register pressure is generally improved over the SD scheduler's ILP mode. However, we are still able to handle large blocks that require latency hiding, unlike the SD scheduler's BURR mode. MI scheduler also attempts to discover the critical path in single-block loops and adjust heuristics accordingly. The MI scheduler relies on the new machine model. This is currently unimplemented for AVX, so we may not be generating the best code yet. Unit tests are updated so they don't depend on SD scheduling heuristics. llvm-svn: 192750 2013-10-16 07:33:07 +08:00			`; CHECK-NEXT: movq %rdi, %rsi`
Add the Erlang/HiPE calling convention, patch by Yiannis Tsiouris. llvm-svn: 168166 2012-11-16 20:36:39 +08:00			`; CHECK-NEXT: callq addfour`
			`%0 = call cc 11 {i64, i64, i64} @addfour(i64 undef, i64 undef, i64 %a, i64 %b, i64 8, i64 9)`
			`%res = extractvalue {i64, i64, i64} %0, 2`

			`; CHECK: movl $1, %edx`
			`; CHECK-NEXT: movl $2, %ecx`
			`; CHECK-NEXT: movl $3, %r8d`
			`; CHECK-NEXT: movq %rax, %r9`
			`; CHECK: callq foo`
			`tail call void @foo(i64 undef, i64 undef, i64 1, i64 2, i64 3, i64 %res) nounwind`
			`ret void`
			`}`

			`define cc 11 {i64, i64, i64} @addfour(i64 %hp, i64 %p, i64 %x, i64 %y, i64 %z, i64 %w) nounwind {`
			`entry:`
			`; CHECK: leaq (%rsi,%rdx), %rax`
			`; CHECK-NEXT: addq %rcx, %rax`
			`; CHECK-NEXT: addq %r8, %rax`
			`%0 = add i64 %x, %y`
			`%1 = add i64 %0, %z`
			`%2 = add i64 %1, %w`

			`; CHECK: ret`
			`%res = insertvalue {i64, i64, i64} undef, i64 %2, 2`
			`ret {i64, i64, i64} %res`
			`}`

			`define cc 11 void @foo(i64 %hp, i64 %p, i64 %arg0, i64 %arg1, i64 %arg2, i64 %arg3) nounwind {`
			`entry:`
			`; CHECK: movq %r15, 40(%rsp)`
			`; CHECK-NEXT: movq %rbp, 32(%rsp)`
			`; CHECK-NEXT: movq %rsi, 24(%rsp)`
			`; CHECK-NEXT: movq %rdx, 16(%rsp)`
			`; CHECK-NEXT: movq %rcx, 8(%rsp)`
			`; CHECK-NEXT: movq %r8, (%rsp)`
			`%hp_var = alloca i64`
			`%p_var = alloca i64`
			`%arg0_var = alloca i64`
			`%arg1_var = alloca i64`
			`%arg2_var = alloca i64`
			`%arg3_var = alloca i64`
			`store i64 %hp, i64* %hp_var`
			`store i64 %p, i64* %p_var`
			`store i64 %arg0, i64* %arg0_var`
			`store i64 %arg1, i64* %arg1_var`
			`store i64 %arg2, i64* %arg2_var`
			`store i64 %arg3, i64* %arg3_var`

[DAG] Improve Aliasing of operations to static alloca Re-recommiting after landing DAG extension-crash fix. Recommiting after adding check to avoid miscomputing alias information on addresses of the same base but different subindices. Memory accesses offset from frame indices may alias, e.g., we may merge write from function arguments passed on the stack when they are contiguous. As a result, when checking aliasing, we consider the underlying frame index's offset from the stack pointer. Static allocs are realized as stack objects in SelectionDAG, but its offset is not set until post-DAG causing DAGCombiner's alias check to consider access to static allocas to frequently alias. Modify isAlias to consider access between static allocas and access from other frame objects to be considered aliasing. Many test changes are included here. Most are fixes for tests which indirectly relied on our aliasing ability and needed to be modified to preserve their original intent. The remaining tests have minor improvements due to relaxed ordering. The exception is CodeGen/X86/2011-10-19-widen_vselect.ll which has a minor degradation dispite though the pre-legalized DAG is improved. Reviewers: rnk, mkuper, jonpa, hfinkel, uweigand Reviewed By: rnk Subscribers: sdardis, nemanjai, javed.absar, llvm-commits Differential Revision: https://reviews.llvm.org/D33345 llvm-svn: 308350 2017-07-19 04:06:24 +08:00			`; Loads are reading values just writen from corresponding register and are therefore noops.`
[opaque pointer type] Add textual IR support for explicit type parameter to load instruction Essentially the same as the GEP change in r230786. A similar migration script can be used to update test cases, though a few more test case improvements/changes were required this time around: (r229269-r229278) import fileinput import sys import re pat = re.compile(r"((?:=\|:\|^)\sload (?:atomic )?(?:volatile )?(.?))(\| addrspace\(\d+\) )\($\| (?:%\|@\|null\|undef\|blockaddress\|getelementptr\|addrspacecast\|bitcast\|inttoptr\|\[\[[a-zA-Z]\|\{\{).$)") for line in sys.stdin: sys.stdout.write(re.sub(pat, r"\1, \2\3*\4", line)) Reviewers: rafael, dexonsmith, grosser Differential Revision: http://reviews.llvm.org/D7649 llvm-svn: 230794 2015-02-28 05:17:42 +08:00			`%0 = load i64, i64* %hp_var`
			`%1 = load i64, i64* %p_var`
			`%2 = load i64, i64* %arg0_var`
			`%3 = load i64, i64* %arg1_var`
			`%4 = load i64, i64* %arg2_var`
			`%5 = load i64, i64* %arg3_var`
Add the Erlang/HiPE calling convention, patch by Yiannis Tsiouris. llvm-svn: 168166 2012-11-16 20:36:39 +08:00			`; CHECK: jmp bar`
			`tail call cc 11 void @bar(i64 %0, i64 %1, i64 %2, i64 %3, i64 %4, i64 %5) nounwind`
			`ret void`
			`}`

			`define cc 11 void @baz() nounwind {`
[opaque pointer type] Add textual IR support for explicit type parameter to load instruction Essentially the same as the GEP change in r230786. A similar migration script can be used to update test cases, though a few more test case improvements/changes were required this time around: (r229269-r229278) import fileinput import sys import re pat = re.compile(r"((?:=\|:\|^)\sload (?:atomic )?(?:volatile )?(.?))(\| addrspace\(\d+\) )\($\| (?:%\|@\|null\|undef\|blockaddress\|getelementptr\|addrspacecast\|bitcast\|inttoptr\|\[\[[a-zA-Z]\|\{\{).$)") for line in sys.stdin: sys.stdout.write(re.sub(pat, r"\1, \2\3*\4", line)) Reviewers: rafael, dexonsmith, grosser Differential Revision: http://reviews.llvm.org/D7649 llvm-svn: 230794 2015-02-28 05:17:42 +08:00			`%tmp_clos = load i64, i64* @clos`
Add the Erlang/HiPE calling convention, patch by Yiannis Tsiouris. llvm-svn: 168166 2012-11-16 20:36:39 +08:00			`%tmp_clos2 = inttoptr i64 %tmp_clos to i64*`
			`%indirect_call = bitcast i64* %tmp_clos2 to void (i64, i64, i64)*`
			`; CHECK: movl $42, %esi`
			`; CHECK-NEXT: jmpq *(%rax)`
			`tail call cc 11 void %indirect_call(i64 undef, i64 undef, i64 42) nounwind`
			`ret void`
			`}`

[X86] Fix tailcall return address clobber bug. This bug (llvm.org/PR28124) was introduced by r237977, which refactored the tail call sequence to be generated in two passes instead of one. Unfortunately, the stack adjustment produced by the first pass was not recognized by X86FrameLowering::mergeSPUpdates() in all cases, causing code such as the following, which clobbers the return address, to be generated: popl %edi popl %edi pushl %eax jmp tailcallee # TAILCALL To fix the problem, the entire stack adjustment is performed in X86ExpandPseudo::ExpandMI() for tail calls. Patch by Magnus Lång <margnus1@gmail.com> Differential Revision: http://reviews.llvm.org/D21325 llvm-svn: 275103 2016-07-12 05:03:03 +08:00			`; Sanity-check the tail call sequence. Number of arguments was chosen as to`
			`; expose a bug where the tail call sequence clobbered the stack.`
			`define cc 11 { i64, i64, i64 } @tailcaller(i64 %hp, i64 %p) #0 {`
			`; CHECK: movl $15, %esi`
			`; CHECK-NEXT: movl $31, %edx`
			`; CHECK-NEXT: movl $47, %ecx`
			`; CHECK-NEXT: movl $63, %r8d`
			`; CHECK-NEXT: popq %rax`
Correct dwarf unwind information in function epilogue This patch aims to provide correct dwarf unwind information in function epilogue for X86. It consists of two parts. The first part inserts CFI instructions that set appropriate cfa offset and cfa register in emitEpilogue() in X86FrameLowering. This part is X86 specific. The second part is platform independent and ensures that: * CFI instructions do not affect code generation (they are not counted as instructions when tail duplicating or tail merging) * Unwind information remains correct when a function is modified by different passes. This is done in a late pass by analyzing information about cfa offset and cfa register in BBs and inserting additional CFI directives where necessary. Added CFIInstrInserter pass: * analyzes each basic block to determine cfa offset and register are valid at its entry and exit * verifies that outgoing cfa offset and register of predecessor blocks match incoming values of their successors * inserts additional CFI directives at basic block beginning to correct the rule for calculating CFA Having CFI instructions in function epilogue can cause incorrect CFA calculation rule for some basic blocks. This can happen if, due to basic block reordering, or the existence of multiple epilogue blocks, some of the blocks have wrong cfa offset and register values set by the epilogue block above them. CFIInstrInserter is currently run only on X86, but can be used by any target that implements support for adding CFI instructions in epilogue. Patch by Violeta Vukobrat. Differential Revision: https://reviews.llvm.org/D42848 llvm-svn: 330706 2018-04-24 18:32:08 +08:00			`; CHECK-NEXT: .cfi_def_cfa_offset 16`
[X86] Fix tailcall return address clobber bug. This bug (llvm.org/PR28124) was introduced by r237977, which refactored the tail call sequence to be generated in two passes instead of one. Unfortunately, the stack adjustment produced by the first pass was not recognized by X86FrameLowering::mergeSPUpdates() in all cases, causing code such as the following, which clobbers the return address, to be generated: popl %edi popl %edi pushl %eax jmp tailcallee # TAILCALL To fix the problem, the entire stack adjustment is performed in X86ExpandPseudo::ExpandMI() for tail calls. Patch by Magnus Lång <margnus1@gmail.com> Differential Revision: http://reviews.llvm.org/D21325 llvm-svn: 275103 2016-07-12 05:03:03 +08:00			`; CHECK-NEXT: jmp tailcallee`
			`%ret = tail call cc11 { i64, i64, i64 } @tailcallee(i64 %hp, i64 %p, i64 15,`
			`i64 31, i64 47, i64 63, i64 79) #1`
			`ret { i64, i64, i64 } %ret`
			`}`

[X86] Extract HiPE prologue constants into metadata X86FrameLowering::adjustForHiPEPrologue() contains a hard-coded offset into an Erlang Runtime System-internal data structure (the PCB). As the layout of this data structure is prone to change, this poses problems for maintaining compatibility. To address this problem, the compiler can produce this information as module-level named metadata. For example (where P_NSP_LIMIT is the offending offset): !hipe.literals = !{ !2, !3, !4 } !2 = !{ !"P_NSP_LIMIT", i32 152 } !3 = !{ !"X86_LEAF_WORDS", i32 24 } !4 = !{ !"AMD64_LEAF_WORDS", i32 24 } Patch by Magnus Lang Differential Revision: http://reviews.llvm.org/D20363 llvm-svn: 273593 2016-06-24 02:17:25 +08:00			`!hipe.literals = !{ !0, !1, !2 }`
			`!0 = !{ !"P_NSP_LIMIT", i32 160 }`
			`!1 = !{ !"X86_LEAF_WORDS", i32 24 }`
			`!2 = !{ !"AMD64_LEAF_WORDS", i32 24 }`
Add the Erlang/HiPE calling convention, patch by Yiannis Tsiouris. llvm-svn: 168166 2012-11-16 20:36:39 +08:00			`@clos = external constant i64`
			`declare cc 11 void @bar(i64, i64, i64, i64, i64, i64)`
[X86] Fix tailcall return address clobber bug. This bug (llvm.org/PR28124) was introduced by r237977, which refactored the tail call sequence to be generated in two passes instead of one. Unfortunately, the stack adjustment produced by the first pass was not recognized by X86FrameLowering::mergeSPUpdates() in all cases, causing code such as the following, which clobbers the return address, to be generated: popl %edi popl %edi pushl %eax jmp tailcallee # TAILCALL To fix the problem, the entire stack adjustment is performed in X86ExpandPseudo::ExpandMI() for tail calls. Patch by Magnus Lång <margnus1@gmail.com> Differential Revision: http://reviews.llvm.org/D21325 llvm-svn: 275103 2016-07-12 05:03:03 +08:00			`declare cc 11 { i64, i64, i64 } @tailcallee(i64, i64, i64, i64, i64, i64, i64)`