2017-01-05 08:20:51 +08:00
// RUN: %clang_cc1 -emit-llvm %s -o - -ffreestanding -triple=i386-pc-win32 | FileCheck %s --check-prefix=X32
// RUN: %clang_cc1 -emit-llvm %s -o - -ffreestanding -triple=x86_64-pc-win32 | FileCheck %s --check-prefix=X64
2014-11-01 06:00:51 +08:00
void __vectorcall v1 ( int a , int b ) { }
2017-01-05 08:20:51 +08:00
// X32: define x86_vectorcallcc void @"\01v1@@8"(i32 inreg %a, i32 inreg %b)
2014-11-01 06:00:51 +08:00
// X64: define x86_vectorcallcc void @"\01v1@@16"(i32 %a, i32 %b)
void __vectorcall v2 ( char a , char b ) { }
2017-01-05 08:20:51 +08:00
// X32: define x86_vectorcallcc void @"\01v2@@8"(i8 inreg signext %a, i8 inreg signext %b)
2014-11-01 06:00:51 +08:00
// X64: define x86_vectorcallcc void @"\01v2@@16"(i8 %a, i8 %b)
2016-05-03 01:41:07 +08:00
struct Small { int x ; } ;
2014-11-01 06:00:51 +08:00
void __vectorcall v3 ( int a , struct Small b , int c ) { }
2017-01-05 08:20:51 +08:00
// X32: define x86_vectorcallcc void @"\01v3@@12"(i32 inreg %a, i32 %b.0, i32 inreg %c)
2014-11-01 06:00:51 +08:00
// X64: define x86_vectorcallcc void @"\01v3@@24"(i32 %a, i32 %b.coerce, i32 %c)
struct Large { int a [ 5 ] ; } ;
void __vectorcall v4 ( int a , struct Large b , int c ) { }
2017-01-05 08:20:51 +08:00
// X32: define x86_vectorcallcc void @"\01v4@@28"(i32 inreg %a, %struct.Large* byval align 4 %b, i32 inreg %c)
2014-11-01 06:00:51 +08:00
// X64: define x86_vectorcallcc void @"\01v4@@40"(i32 %a, %struct.Large* %b, i32 %c)
struct HFA2 { double x , y ; } ;
struct HFA4 { double w , x , y , z ; } ;
struct HFA5 { double v , w , x , y , z ; } ;
void __vectorcall hfa1 ( int a , struct HFA4 b , int c ) { }
2017-01-05 08:20:51 +08:00
// X32: define x86_vectorcallcc void @"\01hfa1@@40"(i32 inreg %a, %struct.HFA4 inreg %b.coerce, i32 inreg %c)
// X64: define x86_vectorcallcc void @"\01hfa1@@48"(i32 %a, %struct.HFA4 inreg %b.coerce, i32 %c)
2014-11-01 06:00:51 +08:00
// HFAs that would require more than six total SSE registers are passed
// indirectly. Additional vector arguments can consume the rest of the SSE
// registers.
void __vectorcall hfa2 ( struct HFA4 a , struct HFA4 b , double c ) { }
2017-01-05 08:20:51 +08:00
// X32: define x86_vectorcallcc void @"\01hfa2@@72"(%struct.HFA4 inreg %a.coerce, %struct.HFA4* inreg %b, double %c)
// X64: define x86_vectorcallcc void @"\01hfa2@@72"(%struct.HFA4 inreg %a.coerce, %struct.HFA4* %b, double %c)
2014-11-01 06:00:51 +08:00
// Ensure that we pass builtin types directly while counting them against the
// SSE register usage.
void __vectorcall hfa3 ( double a , double b , double c , double d , double e , struct HFA2 f ) { }
2017-01-05 08:20:51 +08:00
// X32: define x86_vectorcallcc void @"\01hfa3@@56"(double %a, double %b, double %c, double %d, double %e, %struct.HFA2* inreg %f)
Compute and preserve alignment more faithfully in IR-generation.
Introduce an Address type to bundle a pointer value with an
alignment. Introduce APIs on CGBuilderTy to work with Address
values. Change core APIs on CGF/CGM to traffic in Address where
appropriate. Require alignments to be non-zero. Update a ton
of code to compute and propagate alignment information.
As part of this, I've promoted CGBuiltin's EmitPointerWithAlignment
helper function to CGF and made use of it in a number of places in
the expression emitter.
The end result is that we should now be significantly more correct
when performing operations on objects that are locally known to
be under-aligned. Since alignment is not reliably tracked in the
type system, there are inherent limits to this, but at least we
are no longer confused by standard operations like derived-to-base
conversions and array-to-pointer decay. I've also fixed a large
number of bugs where we were applying the complete-object alignment
to a pointer instead of the non-virtual alignment, although most of
these were hidden by the very conservative approach we took with
member alignment.
Also, because IRGen now reliably asserts on zero alignments, we
should no longer be subject to an absurd but frustrating recurring
bug where an incomplete type would report a zero alignment and then
we'd naively do a alignmentAtOffset on it and emit code using an
alignment equal to the largest power-of-two factor of the offset.
We should also now be emitting much more aggressive alignment
attributes in the presence of over-alignment. In particular,
field access now uses alignmentAtOffset instead of min.
Several times in this patch, I had to change the existing
code-generation pattern in order to more effectively use
the Address APIs. For the most part, this seems to be a strict
improvement, like doing pointer arithmetic with GEPs instead of
ptrtoint. That said, I've tried very hard to not change semantics,
but it is likely that I've failed in a few places, for which I
apologize.
ABIArgInfo now always carries the assumed alignment of indirect and
indirect byval arguments. In order to cut down on what was already
a dauntingly large patch, I changed the code to never set align
attributes in the IR on non-byval indirect arguments. That is,
we still generate code which assumes that indirect arguments have
the given alignment, but we don't express this information to the
backend except where it's semantically required (i.e. on byvals).
This is likely a minor regression for those targets that did provide
this information, but it'll be trivial to add it back in a later
patch.
I partially punted on applying this work to CGBuiltin. Please
do not add more uses of the CreateDefaultAligned{Load,Store}
APIs; they will be going away eventually.
llvm-svn: 246985
2015-09-08 16:05:57 +08:00
// X64: define x86_vectorcallcc void @"\01hfa3@@56"(double %a, double %b, double %c, double %d, double %e, %struct.HFA2* %f)
2014-11-01 06:00:51 +08:00
// Aggregates with more than four elements are not HFAs and are passed byval.
// Because they are not classified as homogeneous, they don't get special
// handling to ensure alignment.
void __vectorcall hfa4 ( struct HFA5 a ) { }
2017-01-05 08:20:51 +08:00
// X32: define x86_vectorcallcc void @"\01hfa4@@40"(%struct.HFA5* byval align 4)
2014-11-01 06:00:51 +08:00
// X64: define x86_vectorcallcc void @"\01hfa4@@40"(%struct.HFA5* %a)
// Return HFAs of 4 or fewer elements in registers.
static struct HFA2 g_hfa2 ;
struct HFA2 __vectorcall hfa5 ( void ) { return g_hfa2 ; }
2017-01-05 08:20:51 +08:00
// X32: define x86_vectorcallcc %struct.HFA2 @"\01hfa5@@0"()
2014-11-01 06:00:51 +08:00
// X64: define x86_vectorcallcc %struct.HFA2 @"\01hfa5@@0"()
typedef float __attribute__ ( ( vector_size ( 16 ) ) ) v4f32 ;
struct HVA2 { v4f32 x , y ; } ;
2017-01-05 08:20:51 +08:00
struct HVA3 { v4f32 w , x , y ; } ;
2014-11-01 06:00:51 +08:00
struct HVA4 { v4f32 w , x , y , z ; } ;
2017-01-05 08:20:51 +08:00
struct HVA5 { v4f32 w , x , y , z , p ; } ;
2014-11-01 06:00:51 +08:00
2017-01-05 08:20:51 +08:00
v4f32 __vectorcall hva1 ( int a , struct HVA4 b , int c ) { return b . w ; }
// X32: define x86_vectorcallcc <4 x float> @"\01hva1@@72"(i32 inreg %a, %struct.HVA4 inreg %b.coerce, i32 inreg %c)
// X64: define x86_vectorcallcc <4 x float> @"\01hva1@@80"(i32 %a, %struct.HVA4 inreg %b.coerce, i32 %c)
2014-11-01 06:00:51 +08:00
2017-01-05 08:20:51 +08:00
v4f32 __vectorcall hva2 ( struct HVA4 a , struct HVA4 b , v4f32 c ) { return c ; }
// X32: define x86_vectorcallcc <4 x float> @"\01hva2@@144"(%struct.HVA4 inreg %a.coerce, %struct.HVA4* inreg %b, <4 x float> %c)
// X64: define x86_vectorcallcc <4 x float> @"\01hva2@@144"(%struct.HVA4 inreg %a.coerce, %struct.HVA4* %b, <4 x float> %c)
2014-11-01 06:00:51 +08:00
2017-01-05 08:20:51 +08:00
v4f32 __vectorcall hva3 ( v4f32 a , v4f32 b , v4f32 c , v4f32 d , v4f32 e , struct HVA2 f ) { return f . x ; }
// X32: define x86_vectorcallcc <4 x float> @"\01hva3@@112"(<4 x float> %a, <4 x float> %b, <4 x float> %c, <4 x float> %d, <4 x float> %e, %struct.HVA2* inreg %f)
// X64: define x86_vectorcallcc <4 x float> @"\01hva3@@112"(<4 x float> %a, <4 x float> %b, <4 x float> %c, <4 x float> %d, <4 x float> %e, %struct.HVA2* %f)
// vector types have higher priority then HVA structures, So vector types are allocated first
// and HVAs are allocated if enough registers are available
v4f32 __vectorcall hva4 ( struct HVA4 a , struct HVA2 b , v4f32 c ) { return b . y ; }
// X32: define x86_vectorcallcc <4 x float> @"\01hva4@@112"(%struct.HVA4 inreg %a.coerce, %struct.HVA2* inreg %b, <4 x float> %c)
// X64: define x86_vectorcallcc <4 x float> @"\01hva4@@112"(%struct.HVA4 inreg %a.coerce, %struct.HVA2* %b, <4 x float> %c)
v4f32 __vectorcall hva5 ( struct HVA3 a , struct HVA3 b , v4f32 c , struct HVA2 d ) { return d . y ; }
// X32: define x86_vectorcallcc <4 x float> @"\01hva5@@144"(%struct.HVA3 inreg %a.coerce, %struct.HVA3* inreg %b, <4 x float> %c, %struct.HVA2 inreg %d.coerce)
// X64: define x86_vectorcallcc <4 x float> @"\01hva5@@144"(%struct.HVA3 inreg %a.coerce, %struct.HVA3* %b, <4 x float> %c, %struct.HVA2 inreg %d.coerce)
struct HVA4 __vectorcall hva6 ( struct HVA4 a , struct HVA4 b ) { return b ; }
// X32: define x86_vectorcallcc %struct.HVA4 @"\01hva6@@128"(%struct.HVA4 inreg %a.coerce, %struct.HVA4* inreg %b)
// X64: define x86_vectorcallcc %struct.HVA4 @"\01hva6@@128"(%struct.HVA4 inreg %a.coerce, %struct.HVA4* %b)
struct HVA5 __vectorcall hva7 ( ) { struct HVA5 a = { } ; return a ; }
// X32: define x86_vectorcallcc void @"\01hva7@@0"(%struct.HVA5* inreg noalias sret %agg.result)
// X64: define x86_vectorcallcc void @"\01hva7@@0"(%struct.HVA5* noalias sret %agg.result)
v4f32 __vectorcall hva8 ( v4f32 a , v4f32 b , v4f32 c , v4f32 d , int e , v4f32 f ) { return f ; }
// X32: define x86_vectorcallcc <4 x float> @"\01hva8@@84"(<4 x float> %a, <4 x float> %b, <4 x float> %c, <4 x float> %d, i32 inreg %e, <4 x float> %f)
// X64: define x86_vectorcallcc <4 x float> @"\01hva8@@88"(<4 x float> %a, <4 x float> %b, <4 x float> %c, <4 x float> %d, i32 %e, <4 x float> %f)
2014-11-01 06:00:51 +08:00
typedef float __attribute__ ( ( ext_vector_type ( 3 ) ) ) v3f32 ;
struct OddSizeHVA { v3f32 x , y ; } ;
void __vectorcall odd_size_hva ( struct OddSizeHVA a ) { }
2017-01-05 08:20:51 +08:00
// X32: define x86_vectorcallcc void @"\01odd_size_hva@@32"(%struct.OddSizeHVA inreg %a.coerce)
// X64: define x86_vectorcallcc void @"\01odd_size_hva@@32"(%struct.OddSizeHVA inreg %a.coerce)
2017-06-22 00:37:22 +08:00
// The Vectorcall ABI only allows passing the first 6 items in registers in x64, so this shouldn't
2017-01-05 08:20:51 +08:00
// consider 'p7' as a register. Instead p5 gets put into the register on the second pass.
2017-06-22 00:37:22 +08:00
// x86 should pass p2, p6 and p7 in registers, then p1 in the second pass.
struct HFA2 __vectorcall AddParticles ( struct HFA2 p1 , float p2 , struct HFA4 p3 , int p4 , struct HFA2 p5 , float p6 , float p7 , int p8 ) { return p1 ; }
// X32: define x86_vectorcallcc %struct.HFA2 @"\01AddParticles@@84"(%struct.HFA2 inreg %p1.coerce, float %p2, %struct.HFA4* inreg %p3, i32 inreg %p4, %struct.HFA2* %p5, float %p6, float %p7, i32 %p8)
// X64: define x86_vectorcallcc %struct.HFA2 @"\01AddParticles@@104"(%struct.HFA2 inreg %p1.coerce, float %p2, %struct.HFA4* %p3, i32 %p4, %struct.HFA2 inreg %p5.coerce, float %p6, float %p7, i32 %p8)
// Vectorcall in both architectures allows passing of an HVA as long as there is room,
// even if it is not one of the first 6 arguments. First pass puts p4 into a
// register on both. p9 ends up in a register in x86 only. Second pass puts p1
// in a register, does NOT put p7 in a register (since theres no room), then puts
// p8 in a register.
void __vectorcall HVAAnywhere ( struct HFA2 p1 , int p2 , int p3 , float p4 , int p5 , int p6 , struct HFA4 p7 , struct HFA2 p8 , float p9 ) { }
// X32: define x86_vectorcallcc void @"\01HVAAnywhere@@88"(%struct.HFA2 inreg %p1.coerce, i32 inreg %p2, i32 inreg %p3, float %p4, i32 %p5, i32 %p6, %struct.HFA4* %p7, %struct.HFA2 inreg %p8.coerce, float %p9)
// X64: define x86_vectorcallcc void @"\01HVAAnywhere@@112"(%struct.HFA2 inreg %p1.coerce, i32 %p2, i32 %p3, float %p4, i32 %p5, i32 %p6, %struct.HFA4* %p7, %struct.HFA2 inreg %p8.coerce, float %p9)