Daniel Dunbar
b8cba97cde
There is no reason for this test to invoke 'llc'.
...
llvm-svn: 109847
2010-07-30 03:30:55 +00:00
Chris Lattner
7f4b81af7a
fix rdar://8251384, another case where we could access beyond the
...
end of a struct. This improves the case when the struct being passed
contains 3 floats, either due to a struct or array of 3 things. Before
we'd generate this IR for the testcase:
define float @bar(double %X.coerce0, double %X.coerce1) nounwind {
entry:
%X = alloca %struct.foof, align 8 ; <%struct.foof*> [#uses=2]
%0 = bitcast %struct.foof* %X to %1* ; <%1*> [#uses=2]
%1 = getelementptr %1* %0, i32 0, i32 0 ; <double*> [#uses=1]
store double %X.coerce0, double* %1
%2 = getelementptr %1* %0, i32 0, i32 1 ; <double*> [#uses=1]
store double %X.coerce1, double* %2
%tmp = getelementptr inbounds %struct.foof* %X, i32 0, i32 2 ; <float*> [#uses=1]
%tmp1 = load float* %tmp ; <float> [#uses=1]
ret float %tmp1
}
which compiled (with optimization) to:
_bar: ## @bar
## BB#0: ## %entry
movd %xmm1, %rax
movd %eax, %xmm0
ret
Now we produce:
define float @bar(double %X.coerce0, float %X.coerce1) nounwind {
entry:
%X = alloca %struct.foof, align 8 ; <%struct.foof*> [#uses=2]
%0 = bitcast %struct.foof* %X to %0* ; <%0*> [#uses=2]
%1 = getelementptr %0* %0, i32 0, i32 0 ; <double*> [#uses=1]
store double %X.coerce0, double* %1
%2 = getelementptr %0* %0, i32 0, i32 1 ; <float*> [#uses=1]
store float %X.coerce1, float* %2
%tmp = getelementptr inbounds %struct.foof* %X, i32 0, i32 2 ; <float*> [#uses=1]
%tmp1 = load float* %tmp ; <float> [#uses=1]
ret float %tmp1
}
and:
_bar: ## @bar
## BB#0: ## %entry
movaps %xmm1, %xmm0
ret
llvm-svn: 109776
2010-07-29 18:13:09 +00:00
Chris Lattner
3f76342cfc
handle a case where we could access off the end of a function
...
that Eli pointed out, rdar://8249586
llvm-svn: 109762
2010-07-29 17:34:39 +00:00
Chris Lattner
44f9c3b3f1
in release mode, irbuilder doesn't add names to instructions,
...
this will hopefully fix the osuosl clang-i686-darwin10 builder.
llvm-svn: 109760
2010-07-29 17:14:05 +00:00
Chris Lattner
98076a25ce
This is a little bit far, but optimize cases like:
...
struct a {
struct c {
double x;
int y;
} x[1];
};
void foo(struct a A) {
}
into:
define void @foo(double %A.coerce0, i32 %A.coerce1) nounwind {
entry:
%A = alloca %struct.a, align 8 ; <%struct.a*> [#uses=1]
%0 = bitcast %struct.a* %A to %struct.c* ; <%struct.c*> [#uses=2]
%1 = getelementptr %struct.c* %0, i32 0, i32 0 ; <double*> [#uses=1]
store double %A.coerce0, double* %1
%2 = getelementptr %struct.c* %0, i32 0, i32 1 ; <i32*> [#uses=1]
store i32 %A.coerce1, i32* %2
instead of:
define void @foo(double %A.coerce0, i64 %A.coerce1) nounwind {
entry:
%A = alloca %struct.a, align 8 ; <%struct.a*> [#uses=1]
%0 = bitcast %struct.a* %A to %0* ; <%0*> [#uses=2]
%1 = getelementptr %0* %0, i32 0, i32 0 ; <double*> [#uses=1]
store double %A.coerce0, double* %1
%2 = getelementptr %0* %0, i32 0, i32 1 ; <i64*> [#uses=1]
store i64 %A.coerce1, i64* %2
I only do this now because I never want to look at this code again :)
llvm-svn: 109738
2010-07-29 07:43:55 +00:00
Chris Lattner
c8b7b53a1e
implement a todo: pass a eight-byte that consists of a
...
small integer + padding as that small integer. On code
like:
struct c { double x; int y; };
void bar(struct c C) { }
This means that we compile to:
define void @bar(double %C.coerce0, i32 %C.coerce1) nounwind {
entry:
%C = alloca %struct.c, align 8 ; <%struct.c*> [#uses=2]
%0 = getelementptr %struct.c* %C, i32 0, i32 0 ; <double*> [#uses=1]
store double %C.coerce0, double* %0
%1 = getelementptr %struct.c* %C, i32 0, i32 1 ; <i32*> [#uses=1]
store i32 %C.coerce1, i32* %1
instead of:
define void @bar(double %C.coerce0, i64 %C.coerce1) nounwind {
entry:
%C = alloca %struct.c, align 8 ; <%struct.c*> [#uses=3]
%0 = bitcast %struct.c* %C to %0* ; <%0*> [#uses=2]
%1 = getelementptr %0* %0, i32 0, i32 0 ; <double*> [#uses=1]
store double %C.coerce0, double* %1
%2 = getelementptr %0* %0, i32 0, i32 1 ; <i64*> [#uses=1]
store i64 %C.coerce1, i64* %2
which gives SRoA heartburn.
This implements rdar://5711709, a nice low number :)
llvm-svn: 109737
2010-07-29 07:30:00 +00:00
Chris Lattner
fe34c1d53e
Kill off the 'coerce' ABI passing form. Now 'direct' and 'extend' always
...
have a "coerce to" type which often matches the default lowering of Clang
type to LLVM IR type, but the coerce case can be handled by making them
not be the same.
This simplifies things and fixes issues where X86-64 abi lowering would
return coerce after making preferred types exactly match up. This caused
us to compile:
typedef float v4f32 __attribute__((__vector_size__(16)));
v4f32 foo(v4f32 X) {
return X+X;
}
into this code at -O0:
define <4 x float> @foo(<4 x float> %X.coerce) nounwind {
entry:
%retval = alloca <4 x float>, align 16 ; <<4 x float>*> [#uses=2]
%coerce = alloca <4 x float>, align 16 ; <<4 x float>*> [#uses=2]
%X.addr = alloca <4 x float>, align 16 ; <<4 x float>*> [#uses=3]
store <4 x float> %X.coerce, <4 x float>* %coerce
%X = load <4 x float>* %coerce ; <<4 x float>> [#uses=1]
store <4 x float> %X, <4 x float>* %X.addr
%tmp = load <4 x float>* %X.addr ; <<4 x float>> [#uses=1]
%tmp1 = load <4 x float>* %X.addr ; <<4 x float>> [#uses=1]
%add = fadd <4 x float> %tmp, %tmp1 ; <<4 x float>> [#uses=1]
store <4 x float> %add, <4 x float>* %retval
%0 = load <4 x float>* %retval ; <<4 x float>> [#uses=1]
ret <4 x float> %0
}
Now we get:
define <4 x float> @foo(<4 x float> %X) nounwind {
entry:
%X.addr = alloca <4 x float>, align 16 ; <<4 x float>*> [#uses=3]
store <4 x float> %X, <4 x float>* %X.addr
%tmp = load <4 x float>* %X.addr ; <<4 x float>> [#uses=1]
%tmp1 = load <4 x float>* %X.addr ; <<4 x float>> [#uses=1]
%add = fadd <4 x float> %tmp, %tmp1 ; <<4 x float>> [#uses=1]
ret <4 x float> %add
}
This implements rdar://8248065
llvm-svn: 109733
2010-07-29 06:26:06 +00:00
Chris Lattner
9fa15c3608
ignore structs that wrap vectors in IR, the abstraction shouldn't add penalty.
...
Before we'd compile the example into something like:
%coerce.dive2 = getelementptr %struct.v4f32wrapper* %retval, i32 0, i32 0 ; <<4 x float>*> [#uses=1]
%1 = bitcast <4 x float>* %coerce.dive2 to <2 x double>* ; <<2 x double>*> [#uses=1]
%2 = load <2 x double>* %1, align 1 ; <<2 x double>> [#uses=1]
ret <2 x double> %2
Now we produce:
%coerce.dive2 = getelementptr %struct.v4f32wrapper* %retval, i32 0, i32 0 ; <<4 x float>*> [#uses=1]
%0 = load <4 x float>* %coerce.dive2, align 1 ; <<4 x float>> [#uses=1]
ret <4 x float> %0
llvm-svn: 109732
2010-07-29 05:02:29 +00:00
Chris Lattner
4200fe4e50
move the 'pretty 16-byte vector' inferring code up to be shared
...
with return values, improving stuff that returns __m128 etc.
llvm-svn: 109731
2010-07-29 04:56:46 +00:00
Chris Lattner
3a44c7e55d
now that we have CGT around, we can start using preferred types
...
for return values too. Instead of compiling something like:
struct foo {
int *X;
float *Y;
};
struct foo test(struct foo *P) { return *P; }
to:
%1 = type { i64, i64 }
define %1 @test(%struct.foo* %P) nounwind {
entry:
%retval = alloca %struct.foo, align 8 ; <%struct.foo*> [#uses=2]
%P.addr = alloca %struct.foo*, align 8 ; <%struct.foo**> [#uses=2]
store %struct.foo* %P, %struct.foo** %P.addr
%tmp = load %struct.foo** %P.addr ; <%struct.foo*> [#uses=1]
%tmp1 = bitcast %struct.foo* %retval to i8* ; <i8*> [#uses=1]
%tmp2 = bitcast %struct.foo* %tmp to i8* ; <i8*> [#uses=1]
call void @llvm.memcpy.p0i8.p0i8.i64(i8* %tmp1, i8* %tmp2, i64 16, i32 8, i1 false)
%0 = bitcast %struct.foo* %retval to %1* ; <%1*> [#uses=1]
%1 = load %1* %0, align 1 ; <%1> [#uses=1]
ret %1 %1
}
We now get the result more type safe, with:
define %struct.foo @test(%struct.foo* %P) nounwind {
entry:
%retval = alloca %struct.foo, align 8 ; <%struct.foo*> [#uses=2]
%P.addr = alloca %struct.foo*, align 8 ; <%struct.foo**> [#uses=2]
store %struct.foo* %P, %struct.foo** %P.addr
%tmp = load %struct.foo** %P.addr ; <%struct.foo*> [#uses=1]
%tmp1 = bitcast %struct.foo* %retval to i8* ; <i8*> [#uses=1]
%tmp2 = bitcast %struct.foo* %tmp to i8* ; <i8*> [#uses=1]
call void @llvm.memcpy.p0i8.p0i8.i64(i8* %tmp1, i8* %tmp2, i64 16, i32 8, i1 false)
%0 = load %struct.foo* %retval ; <%struct.foo> [#uses=1]
ret %struct.foo %0
}
That memcpy is completely terrible, but I don't know how to fix it.
llvm-svn: 109729
2010-07-29 04:46:19 +00:00
Chris Lattner
f4ba08aeaf
pass argument vectors in a type that corresponds to the user type if
...
possible. This improves the example to pass <4 x float> instead of
<2 x double> but we still get awful code, and still don't get the
return value right.
llvm-svn: 109700
2010-07-28 23:47:21 +00:00
Chris Lattner
31faff5d58
use Get8ByteTypeAtOffset for the return value path as well so we
...
don't get errors similar to PR7714 on the return path.
llvm-svn: 109689
2010-07-28 23:06:14 +00:00
Chris Lattner
4c1e484f39
fix PR7714 by not referencing off the end of a struct when passed by value in
...
x86-64 abi. This also improves codegen as well. Some refactoring is needed of
this code.
llvm-svn: 109681
2010-07-28 22:15:08 +00:00
Fariborz Jahanian
d5010898ab
Fix flags in global block descriptor when
...
block returns structs. Fies radar 8241648.
Executable test added to llvm test suite.
llvm-svn: 109620
2010-07-28 19:07:18 +00:00
Fariborz Jahanian
0ebca28f1d
2nd argument of __builtin_expect must be evaluated
...
if it hs side-effect to matchgcc's behaviour.
Addresses radar 8172109.
llvm-svn: 109467
2010-07-26 23:11:03 +00:00
John McCall
a464ff9d15
Switch some random local-decl cleanups over to using lazy cleanups. Turn on
...
the block-release unwind cleanup: we're never going to test it if we don't turn
it on.
llvm-svn: 108992
2010-07-21 06:13:08 +00:00
Chandler Carruth
3973af797a
Fix a goof in my previous patch -- not all of the builtins return a value, some
...
fixed return types.
llvm-svn: 108657
2010-07-18 20:54:12 +00:00
Chandler Carruth
bc8cab16c5
Improve the representation of the atomic builtins in a few ways. First, we make
...
their call expressions synthetically have the "deduced" types based on their
first argument. We only insert conversions in the AST for arguments whose
values require conversion to match the value type expected. This keeps PR7600
closed by maintaining the return type, but avoids assertions due to unexpected
implicit casts making the type unsigned (test case added from Daniel).
The magic is moved into the codegen for the atomic builtin which inserts the
casts as needed at the IR level to raise the type to an integer suitable for
the LLVM intrinsic. This shouldn't cause any real change in functionality, but
now we can make the builtin be more truly polymorphic.
llvm-svn: 108638
2010-07-18 07:23:17 +00:00
Eli Friedman
eca55afea3
Fix for PR3800: make sure not to evaluate the expression for a read-write
...
asm operand twice.
llvm-svn: 108489
2010-07-16 00:55:21 +00:00
Daniel Dunbar
999daa57c7
Builtins/ARM: __clear_cache doesn't seem to have a consistent prototype, declare
...
the builtin as void __clear_cache(...) to workaround this, which appears to
match what GCC does.
llvm-svn: 108487
2010-07-16 00:31:23 +00:00
Daniel Dunbar
3348e2d175
IRgen: Support user defined attributes on block runtime functions.
...
- This issue here is that /usr/include/Blocks.h wants to define some of the
block runtime globals as weak, depending on the target. This doesn't work in
Clang because we aren't using the AST decl for these globals.
- The fix is a pretty gross hack which just watches all the decls for the
specific blocks globals we need to know about; if we see one we use it,
otherwise we use the hand coded type.
In time, I would like to clean this up by changing IRgen to ask Sema/AST for
the decl, which would then be lazily loaded from the builtin table if
necessary. This could be used in a whole host of places in IRgen and would
get rid of a lot of grotty hand coding of LLVM IR; however, we need some
extra Sema support for this as well as support for builtin global variables.
llvm-svn: 108482
2010-07-16 00:00:19 +00:00
Douglas Gregor
c5dded5f99
Improve test case. Thanks Eli
...
llvm-svn: 108470
2010-07-15 23:04:05 +00:00
Douglas Gregor
8997690ff1
Don't suppress the emission of available_externally functions marked
...
with always_inline attribute. Thanks to Howard for the tip.
llvm-svn: 108469
2010-07-15 22:58:18 +00:00
Douglas Gregor
603d81bf8d
When forming a function call or message send expression, be sure to
...
strip cv-qualifiers from the expression's type when the language calls
for it: in C, that's all the time, while C++ only does it for
non-class types.
Centralized the computation of the call expression type in
QualType::getCallResultType() and some helper functions in other nodes
(FunctionDecl, ObjCMethodDecl, FunctionType), and updated all relevant
callers of getResultType() to getCallResultType().
Fixes PR7598 and PR7463, along with a bunch of getResultType() call
sites that weren't stripping references off the result type (nothing
stripped cv-qualifiers properly before this change).
llvm-svn: 108234
2010-07-13 08:18:22 +00:00
Douglas Gregor
a700f68828
Reinstate the optimization suppressing available_externally functions
...
at -O0. The only change from the previous patch is that we don't try
to generate virtual method thunks for an available_externally
function.
llvm-svn: 108230
2010-07-13 06:02:28 +00:00
Douglas Gregor
553f3a9b30
Speculatively revert r108156; it appears to be breaking self-host.
...
llvm-svn: 108194
2010-07-12 21:08:32 +00:00
Douglas Gregor
dbb2806a7b
Do not generate LLVM IR for available_externally function bodies at
...
-O0, since we won't be using the definitions for anything anyway. For
lib/System/Path.o when built in Debug+Asserts mode, this leads to a 4%
improvement in compile time (and suppresses 440 function bodies).
<rdar://problem/7987644>
llvm-svn: 108156
2010-07-12 17:24:55 +00:00
Chris Lattner
33919e7450
fix PR7280 by making the warning on code like this:
...
int test1() {
return;
}
default to an error.
llvm-svn: 108108
2010-07-11 23:34:02 +00:00
Chris Lattner
06801d7371
allow this to pass on 32-bit hosts.
...
llvm-svn: 107845
2010-07-08 00:23:21 +00:00
Chris Lattner
cb7696cf35
fix the clang side of PR7437: EmitAggregateCopy
...
was not producing a memcpy with the right address
spaces because of two places in it doing casts of
the arguments to i8, one of which that didn't
preserve the address space.
There is also an optimizer bug here.
llvm-svn: 107842
2010-07-08 00:07:45 +00:00
Chris Lattner
26b1a19842
filecheckize this test.
...
llvm-svn: 107841
2010-07-08 00:05:45 +00:00
John McCall
11086fcb65
Don't consider casted non-global pointers to be evaluatable.
...
Fixes rdar://problem/8154689
llvm-svn: 107755
2010-07-07 05:08:32 +00:00
Chris Lattner
c401de9998
in the "coerce" case, the ABI handling code ends up making the
...
alloca for an argument. Make sure the argument gets the proper
decl alignment, which may be different than the type alignment.
This fixes PR7567
llvm-svn: 107627
2010-07-05 20:21:00 +00:00
Chris Lattner
53b479ff6a
fix PR7564 a cast where the bitfield struct init code
...
wasn't handling array padding elements right.
llvm-svn: 107621
2010-07-05 18:03:30 +00:00
Chris Lattner
0e7929f30c
fix rdar://8147692 - yet another crash due to my abi work.
...
llvm-svn: 107387
2010-07-01 06:20:47 +00:00
Daniel Dunbar
bb7ac52e02
Driver/IRgen: Add support for -momit-leaf-frame-pointer.
...
llvm-svn: 107367
2010-07-01 01:31:45 +00:00
Chris Lattner
5c740f1523
Reapply:
...
r107173, "fix PR7519: after thrashing around and remembering how all this stuff"
r107216, "fix PR7523, which was caused by the ABI code calling ConvertType instead"
This includes a fix to make ConvertTypeForMem handle the "recursive" case, and call
it as such when lowering function types which have an indirect result.
llvm-svn: 107310
2010-06-30 19:14:05 +00:00
Daniel Dunbar
e422266926
Revert r107173, "fix PR7519: after thrashing around and remembering how all this stuff", it broke bootstrap.
...
llvm-svn: 107232
2010-06-30 00:22:35 +00:00
Daniel Dunbar
c85ea8e175
IRgen: Assignment to Objective-C properties shouldn't reload the value (which
...
would trigger an extra method call).
- While in the area, I also changed Clang to not emit an unnecessary load from
'x' in cases like 'y = (x = 1)'.
llvm-svn: 107210
2010-06-29 22:00:45 +00:00
Daniel Dunbar
99e13101b2
tests: Fix test to not depend on instruction names.
...
llvm-svn: 107186
2010-06-29 18:34:40 +00:00
Chris Lattner
ab1e65e2ea
fix PR7519: after thrashing around and remembering how all this stuff
...
works, the fix is quite simple: just make sure to call ConvertTypeRecursive
when the function type being lowered is in the midst of ConvertType.
llvm-svn: 107173
2010-06-29 17:56:33 +00:00
Chris Lattner
22a931e3bb
Change X86_64ABIInfo to have ASTContext and TargetData ivars to
...
avoid passing ASTContext down through all the methods it has.
When classifying an argument, or argument piece, as INTEGER, check
to see if we have a pointer at exactly the same offset in the
preferred type. If so, use that pointer type instead of i64. This
allows us to compile A function taking a stringref into something
like this:
define i8* @foo(i64 %D.coerce0, i8* %D.coerce1) nounwind ssp {
entry:
%D = alloca %struct.DeclGroup, align 8 ; <%struct.DeclGroup*> [#uses=4]
%0 = getelementptr %struct.DeclGroup* %D, i32 0, i32 0 ; <i64*> [#uses=1]
store i64 %D.coerce0, i64* %0
%1 = getelementptr %struct.DeclGroup* %D, i32 0, i32 1 ; <i8**> [#uses=1]
store i8* %D.coerce1, i8** %1
%tmp = getelementptr inbounds %struct.DeclGroup* %D, i32 0, i32 0 ; <i64*> [#uses=1]
%tmp1 = load i64* %tmp ; <i64> [#uses=1]
%tmp2 = getelementptr inbounds %struct.DeclGroup* %D, i32 0, i32 1 ; <i8**> [#uses=1]
%tmp3 = load i8** %tmp2 ; <i8*> [#uses=1]
%add.ptr = getelementptr inbounds i8* %tmp3, i64 %tmp1 ; <i8*> [#uses=1]
ret i8* %add.ptr
}
instead of this:
define i8* @foo(i64 %D.coerce0, i64 %D.coerce1) nounwind ssp {
entry:
%D = alloca %struct.DeclGroup, align 8 ; <%struct.DeclGroup*> [#uses=3]
%0 = insertvalue %0 undef, i64 %D.coerce0, 0 ; <%0> [#uses=1]
%1 = insertvalue %0 %0, i64 %D.coerce1, 1 ; <%0> [#uses=1]
%2 = bitcast %struct.DeclGroup* %D to %0* ; <%0*> [#uses=1]
store %0 %1, %0* %2, align 1
%tmp = getelementptr inbounds %struct.DeclGroup* %D, i32 0, i32 0 ; <i64*> [#uses=1]
%tmp1 = load i64* %tmp ; <i64> [#uses=1]
%tmp2 = getelementptr inbounds %struct.DeclGroup* %D, i32 0, i32 1 ; <i8**> [#uses=1]
%tmp3 = load i8** %tmp2 ; <i8*> [#uses=1]
%add.ptr = getelementptr inbounds i8* %tmp3, i64 %tmp1 ; <i8*> [#uses=1]
ret i8* %add.ptr
}
This implements rdar://7375902 - [codegen quality] clang x86-64 ABI lowering code punishing StringRef
llvm-svn: 107123
2010-06-29 06:01:59 +00:00
Chris Lattner
9e748e9d6e
add IR names to coerced arguments.
...
llvm-svn: 107105
2010-06-29 00:14:52 +00:00
Chris Lattner
3dd716c3c3
Change CGCall to handle the "coerce" case where the coerce-to type
...
is a FCA to pass each of the elements as individual scalars. This
produces code fast isel is less likely to reject and is easier on
the optimizers.
For example, before we would compile:
struct DeclGroup { long NumDecls; char * Y; };
char * foo(DeclGroup D) {
return D.NumDecls+D.Y;
}
to:
%struct.DeclGroup = type { i64, i64 }
define i64 @_Z3foo9DeclGroup(%struct.DeclGroup) nounwind {
entry:
%D = alloca %struct.DeclGroup, align 8 ; <%struct.DeclGroup*> [#uses=3]
store %struct.DeclGroup %0, %struct.DeclGroup* %D, align 1
%tmp = getelementptr inbounds %struct.DeclGroup* %D, i32 0, i32 0 ; <i64*> [#uses=1]
%tmp1 = load i64* %tmp ; <i64> [#uses=1]
%tmp2 = getelementptr inbounds %struct.DeclGroup* %D, i32 0, i32 1 ; <i64*> [#uses=1]
%tmp3 = load i64* %tmp2 ; <i64> [#uses=1]
%add = add nsw i64 %tmp1, %tmp3 ; <i64> [#uses=1]
ret i64 %add
}
Now we get:
%0 = type { i64, i64 }
%struct.DeclGroup = type { i64, i8* }
define i8* @_Z3foo9DeclGroup(i64, i64) nounwind {
entry:
%D = alloca %struct.DeclGroup, align 8 ; <%struct.DeclGroup*> [#uses=3]
%2 = insertvalue %0 undef, i64 %0, 0 ; <%0> [#uses=1]
%3 = insertvalue %0 %2, i64 %1, 1 ; <%0> [#uses=1]
%4 = bitcast %struct.DeclGroup* %D to %0* ; <%0*> [#uses=1]
store %0 %3, %0* %4, align 1
%tmp = getelementptr inbounds %struct.DeclGroup* %D, i32 0, i32 0 ; <i64*> [#uses=1]
%tmp1 = load i64* %tmp ; <i64> [#uses=1]
%tmp2 = getelementptr inbounds %struct.DeclGroup* %D, i32 0, i32 1 ; <i8**> [#uses=1]
%tmp3 = load i8** %tmp2 ; <i8*> [#uses=1]
%add.ptr = getelementptr inbounds i8* %tmp3, i64 %tmp1 ; <i8*> [#uses=1]
ret i8* %add.ptr
}
Elimination of the FCA inside the function is still-to-come.
llvm-svn: 107099
2010-06-28 23:44:11 +00:00
Chris Lattner
a7d81ab7f3
X86-64:
...
pass/return structs of float/int as float/i32 instead of double/i64
to make the code generated for ABI cleaner. Passing in the low part
of a double is the same as passing in a float.
For example, we now compile:
struct DeclGroup { float NumDecls; };
float foo(DeclGroup D);
void bar(DeclGroup *D) {
foo(*D);
}
into:
%struct.DeclGroup = type { float }
define void @_Z3barP9DeclGroup(%struct.DeclGroup* %D) nounwind {
entry:
%D.addr = alloca %struct.DeclGroup*, align 8 ; <%struct.DeclGroup**> [#uses=2]
%agg.tmp = alloca %struct.DeclGroup, align 4 ; <%struct.DeclGroup*> [#uses=2]
store %struct.DeclGroup* %D, %struct.DeclGroup** %D.addr
%tmp = load %struct.DeclGroup** %D.addr ; <%struct.DeclGroup*> [#uses=1]
%tmp1 = bitcast %struct.DeclGroup* %agg.tmp to i8* ; <i8*> [#uses=1]
%tmp2 = bitcast %struct.DeclGroup* %tmp to i8* ; <i8*> [#uses=1]
call void @llvm.memcpy.p0i8.p0i8.i64(i8* %tmp1, i8* %tmp2, i64 4, i32 4, i1 false)
%coerce.dive = getelementptr %struct.DeclGroup* %agg.tmp, i32 0, i32 0 ; <float*> [#uses=1]
%0 = load float* %coerce.dive, align 1 ; <float> [#uses=1]
%call = call float @_Z3foo9DeclGroup(float %0) ; <float> [#uses=0]
ret void
}
instead of:
%struct.DeclGroup = type { float }
define void @_Z3barP9DeclGroup(%struct.DeclGroup* %D) nounwind {
entry:
%D.addr = alloca %struct.DeclGroup*, align 8 ; <%struct.DeclGroup**> [#uses=2]
%agg.tmp = alloca %struct.DeclGroup, align 4 ; <%struct.DeclGroup*> [#uses=2]
%tmp3 = alloca double ; <double*> [#uses=2]
store %struct.DeclGroup* %D, %struct.DeclGroup** %D.addr
%tmp = load %struct.DeclGroup** %D.addr ; <%struct.DeclGroup*> [#uses=1]
%tmp1 = bitcast %struct.DeclGroup* %agg.tmp to i8* ; <i8*> [#uses=1]
%tmp2 = bitcast %struct.DeclGroup* %tmp to i8* ; <i8*> [#uses=1]
call void @llvm.memcpy.p0i8.p0i8.i64(i8* %tmp1, i8* %tmp2, i64 4, i32 4, i1 false)
%coerce.dive = getelementptr %struct.DeclGroup* %agg.tmp, i32 0, i32 0 ; <float*> [#uses=1]
%0 = bitcast double* %tmp3 to float* ; <float*> [#uses=1]
%1 = load float* %coerce.dive ; <float> [#uses=1]
store float %1, float* %0, align 1
%2 = load double* %tmp3 ; <double> [#uses=1]
%call = call float @_Z3foo9DeclGroup(double %2) ; <float> [#uses=0]
ret void
}
which is this machine code (at -O0):
__Z3barP9DeclGroup:
subq $24, %rsp
movq %rdi, 16(%rsp)
movq 16(%rsp), %rdi
leaq 8(%rsp), %rax
movl (%rdi), %ecx
movl %ecx, (%rax)
movss 8(%rsp), %xmm0
callq __Z3foo9DeclGroup
addq $24, %rsp
ret
vs this:
__Z3barP9DeclGroup:
subq $24, %rsp
movq %rdi, 16(%rsp)
movq 16(%rsp), %rdi
leaq 8(%rsp), %rax
movl (%rdi), %ecx
movl %ecx, (%rax)
movss 8(%rsp), %xmm0
movss %xmm0, (%rsp)
movsd (%rsp), %xmm0
callq __Z3foo9DeclGroup
addq $24, %rsp
ret
At -O3, it is the difference between this now:
__Z3barP9DeclGroup:
movss (%rdi), %xmm0
jmp __Z3foo9DeclGroup # TAILCALL
vs this before:
__Z3barP9DeclGroup:
movl (%rdi), %eax
movd %rax, %xmm0
jmp __Z3foo9DeclGroup # TAILCALL
llvm-svn: 107048
2010-06-28 19:56:59 +00:00
Fariborz Jahanian
36ad0e99d5
Have __func__ and siblings point to block's implementation function
...
name. Fixes radar 7860965.
llvm-svn: 107044
2010-06-28 18:58:34 +00:00
Chris Lattner
d250b8e9a8
tweak test to pass on windows
...
llvm-svn: 107040
2010-06-28 18:29:14 +00:00
Chris Lattner
c1028f689e
Fix UnitTests/2004-02-02-NegativeZero.c, which regressed when
...
I broke negate of FP values.
llvm-svn: 107019
2010-06-28 17:12:37 +00:00
Chris Lattner
055097f024
If coercing something from int or pointer type to int or pointer type
...
(potentially after unwrapping it from a struct) do it without going through
memory. We now compile:
struct DeclGroup {
unsigned NumDecls;
};
int foo(DeclGroup D) {
return D.NumDecls;
}
into:
%struct.DeclGroup = type { i32 }
define i32 @_Z3foo9DeclGroup(i64) nounwind ssp noredzone {
entry:
%D = alloca %struct.DeclGroup, align 4 ; <%struct.DeclGroup*> [#uses=2]
%coerce.dive = getelementptr %struct.DeclGroup* %D, i32 0, i32 0 ; <i32*> [#uses=1]
%coerce.val.ii = trunc i64 %0 to i32 ; <i32> [#uses=1]
store i32 %coerce.val.ii, i32* %coerce.dive
%tmp = getelementptr inbounds %struct.DeclGroup* %D, i32 0, i32 0 ; <i32*> [#uses=1]
%tmp1 = load i32* %tmp ; <i32> [#uses=1]
ret i32 %tmp1
}
instead of:
%struct.DeclGroup = type { i32 }
define i32 @_Z3foo9DeclGroup(i64) nounwind ssp noredzone {
entry:
%D = alloca %struct.DeclGroup, align 4 ; <%struct.DeclGroup*> [#uses=2]
%tmp = alloca i64 ; <i64*> [#uses=2]
%coerce.dive = getelementptr %struct.DeclGroup* %D, i32 0, i32 0 ; <i32*> [#uses=1]
store i64 %0, i64* %tmp
%1 = bitcast i64* %tmp to i32* ; <i32*> [#uses=1]
%2 = load i32* %1, align 1 ; <i32> [#uses=1]
store i32 %2, i32* %coerce.dive
%tmp1 = getelementptr inbounds %struct.DeclGroup* %D, i32 0, i32 0 ; <i32*> [#uses=1]
%tmp2 = load i32* %tmp1 ; <i32> [#uses=1]
ret i32 %tmp2
}
... which is quite a bit less terrifying.
llvm-svn: 106975
2010-06-27 06:26:04 +00:00
Chris Lattner
895c52ba8b
Same patch as the previous on the store side. Before we compiled this:
...
struct DeclGroup {
unsigned NumDecls;
};
int foo(DeclGroup D) {
return D.NumDecls;
}
to:
%struct.DeclGroup = type { i32 }
define i32 @_Z3foo9DeclGroup(i64) nounwind ssp noredzone {
entry:
%D = alloca %struct.DeclGroup, align 4 ; <%struct.DeclGroup*> [#uses=2]
%tmp = alloca i64 ; <i64*> [#uses=2]
store i64 %0, i64* %tmp
%1 = bitcast i64* %tmp to %struct.DeclGroup* ; <%struct.DeclGroup*> [#uses=1]
%2 = load %struct.DeclGroup* %1, align 1 ; <%struct.DeclGroup> [#uses=1]
store %struct.DeclGroup %2, %struct.DeclGroup* %D
%tmp1 = getelementptr inbounds %struct.DeclGroup* %D, i32 0, i32 0 ; <i32*> [#uses=1]
%tmp2 = load i32* %tmp1 ; <i32> [#uses=1]
ret i32 %tmp2
}
which caused fast isel bailouts due to the FCA load/store of %2. Now
we generate this just blissful code:
%struct.DeclGroup = type { i32 }
define i32 @_Z3foo9DeclGroup(i64) nounwind ssp noredzone {
entry:
%D = alloca %struct.DeclGroup, align 4 ; <%struct.DeclGroup*> [#uses=2]
%tmp = alloca i64 ; <i64*> [#uses=2]
%coerce.dive = getelementptr %struct.DeclGroup* %D, i32 0, i32 0 ; <i32*> [#uses=1]
store i64 %0, i64* %tmp
%1 = bitcast i64* %tmp to i32* ; <i32*> [#uses=1]
%2 = load i32* %1, align 1 ; <i32> [#uses=1]
store i32 %2, i32* %coerce.dive
%tmp1 = getelementptr inbounds %struct.DeclGroup* %D, i32 0, i32 0 ; <i32*> [#uses=1]
%tmp2 = load i32* %tmp1 ; <i32> [#uses=1]
ret i32 %tmp2
}
This avoids fastisel bailing out and is groundwork for future patch.
This reduces bailouts on CGStmt.ll to 911 from 935.
llvm-svn: 106974
2010-06-27 06:04:18 +00:00