Arguments spilled on the stack before a function call may have
alignment requirements, for example in the case of vectors.
These requirements are exploited by the code generator by using
move instructions that have similar alignment requirements, e.g.,
movaps on x86.
Although the code generator properly aligns the arguments with
respect to the displacement of the stack pointer it computes,
the displacement itself may cause misalignment. For example if
we have
%3 = load <16 x float>, <16 x float>* %1, align 64
call void @bar(<16 x float> %3, i32 0)
the x86 back-end emits:
movaps 32(%ecx), %xmm2
movaps (%ecx), %xmm0
movaps 16(%ecx), %xmm1
movaps 48(%ecx), %xmm3
subl $20, %esp <-- if %esp was 16-byte aligned before this instruction, it no longer will be afterwards
movaps %xmm3, (%esp) <-- movaps requires 16-byte alignment, while %esp is not aligned as such.
movl $0, 16(%esp)
calll __bar
To solve this, we need to make sure that the computed value with which
the stack pointer is changed is a multiple af the maximal alignment seen
during its computation. With this change we get proper alignment:
subl $32, %esp
movaps %xmm3, (%esp)
Differential Revision: http://reviews.llvm.org/D12337
llvm-svn: 248786
2015-09-29 18:12:57 +08:00
|
|
|
; RUN: llc -mcpu=generic -mtriple=i686-pc-windows-msvc -mattr=+sse < %s | FileCheck %s
|
|
|
|
|
|
|
|
; Check proper alignment of spilled vector
|
|
|
|
|
|
|
|
; CHECK-LABEL: spill_ok
|
|
|
|
; CHECK: subl $32, %esp
|
|
|
|
; CHECK: movaps %xmm3, (%esp)
|
|
|
|
; CHECK: movl $0, 16(%esp)
|
|
|
|
; CHECK: calll _bar
|
|
|
|
define void @spill_ok(i32, <16 x float> *) {
|
|
|
|
entry:
|
|
|
|
%2 = alloca i32, i32 %0
|
|
|
|
%3 = load <16 x float>, <16 x float> * %1, align 64
|
|
|
|
tail call void @bar(<16 x float> %3, i32 0) nounwind
|
|
|
|
ret void
|
|
|
|
}
|
|
|
|
|
|
|
|
declare void @bar(<16 x float> %a, i32 %b)
|
|
|
|
|
|
|
|
; Check that proper alignment of spilled vector does not affect vargs
|
|
|
|
|
|
|
|
; CHECK-LABEL: vargs_not_affected
|
Elide stores which are overwritten without being observed.
Summary:
In SelectionDAG, when a store is immediately chained to another store
to the same address, elide the first store as it has no observable
effects. This is causes small improvements dealing with intrinsics
lowered to stores.
Test notes:
* Many testcases overwrite store addresses multiple times and needed
minor changes, mainly making stores volatile to prevent the
optimization from optimizing the test away.
* Many X86 test cases optimized out instructions associated with
associated with va_start.
* Note that test_splat in CodeGen/AArch64/misched-stp.ll no longer has
dependencies to check and can probably be removed and potentially
replaced with another test.
Reviewers: rnk, john.brawn
Subscribers: aemerson, rengolin, qcolombet, jyknight, nemanjai, nhaehnle, javed.absar, llvm-commits
Differential Revision: https://reviews.llvm.org/D33206
llvm-svn: 303198
2017-05-17 03:43:56 +08:00
|
|
|
; CHECK: movl 28(%ebp), %eax
|
Arguments spilled on the stack before a function call may have
alignment requirements, for example in the case of vectors.
These requirements are exploited by the code generator by using
move instructions that have similar alignment requirements, e.g.,
movaps on x86.
Although the code generator properly aligns the arguments with
respect to the displacement of the stack pointer it computes,
the displacement itself may cause misalignment. For example if
we have
%3 = load <16 x float>, <16 x float>* %1, align 64
call void @bar(<16 x float> %3, i32 0)
the x86 back-end emits:
movaps 32(%ecx), %xmm2
movaps (%ecx), %xmm0
movaps 16(%ecx), %xmm1
movaps 48(%ecx), %xmm3
subl $20, %esp <-- if %esp was 16-byte aligned before this instruction, it no longer will be afterwards
movaps %xmm3, (%esp) <-- movaps requires 16-byte alignment, while %esp is not aligned as such.
movl $0, 16(%esp)
calll __bar
To solve this, we need to make sure that the computed value with which
the stack pointer is changed is a multiple af the maximal alignment seen
during its computation. With this change we get proper alignment:
subl $32, %esp
movaps %xmm3, (%esp)
Differential Revision: http://reviews.llvm.org/D12337
llvm-svn: 248786
2015-09-29 18:12:57 +08:00
|
|
|
define i32 @vargs_not_affected(<4 x float> %v, i8* %f, ...) {
|
|
|
|
entry:
|
|
|
|
%ap = alloca i8*, align 4
|
|
|
|
%0 = bitcast i8** %ap to i8*
|
|
|
|
call void @llvm.va_start(i8* %0)
|
|
|
|
%argp.cur = load i8*, i8** %ap, align 4
|
|
|
|
%argp.next = getelementptr inbounds i8, i8* %argp.cur, i32 4
|
|
|
|
store i8* %argp.next, i8** %ap, align 4
|
|
|
|
%1 = bitcast i8* %argp.cur to i32*
|
|
|
|
%2 = load i32, i32* %1, align 4
|
|
|
|
call void @llvm.va_end(i8* %0)
|
|
|
|
ret i32 %2
|
|
|
|
}
|
|
|
|
|
|
|
|
declare void @llvm.va_start(i8*)
|
|
|
|
|
|
|
|
declare void @llvm.va_end(i8*)
|