AArch64: look up EmitAArch64Scalar support before calling.

This fixes one immediate bug where an expression with side-effects
could be emitted twice during a NEON call.

It also prepares the way for folding CodeGen for many of the SISD
intrinsics into a table, reducing code size and hopefully increasing
performance eventually ("binary search + few switch cases" should be
better than "lots of switch cases").

llvm-svn: 201667
This commit is contained in:
Tim Northover 2014-02-19 11:55:06 +00:00
parent cd8535a96d
commit db3e5e2408
3 changed files with 469 additions and 668 deletions

File diff suppressed because it is too large Load Diff

View File

@ -0,0 +1,14 @@
// RUN: %clang_cc1 -triple aarch64-none-linux-gnu -target-feature +neon \
// RUN: -emit-llvm -O0 -o - %s | FileCheck %s
#include <arm_neon.h>
void *foo(void);
float32x2_t bar(void) {
// CHECK-LABEL: @bar
return vld1_f32(foo());
// CHECK: call i8* @foo
// CHECK-NOT: call i8* @foo
// CHECK: call <2 x float> @llvm.arm.neon.vld1
}

View File

@ -3165,10 +3165,10 @@ NeonEmitter::genOverloadTypeCheckCode(raw_ostream &OS) {
/// declaration of builtins, checking for unique builtin declarations.
void NeonEmitter::genBuiltinsDef(raw_ostream &OS) {
std::vector<Record *> RV = Records.getAllDerivedDefinitions("Inst");
StringMap<OpKind> EmittedMap;
// Generate BuiltinsNEON.
OS << "#ifdef GET_NEON_BUILTINS\n";
// We want to emit the intrinsics in alphabetical order, so use the more
// expensive std::map to gather them together first.
std::map<std::string, OpKind> EmittedMap;
for (unsigned i = 0, e = RV.size(); i != e; ++i) {
Record *R = RV[i];
@ -3203,9 +3203,17 @@ void NeonEmitter::genBuiltinsDef(raw_ostream &OS) {
continue;
EmittedMap[bd] = OpNone;
OS << bd << "\n";
}
}
// Generate BuiltinsNEON.
OS << "#ifdef GET_NEON_BUILTINS\n";
for (std::map<std::string, OpKind>::iterator I = EmittedMap.begin(),
E = EmittedMap.end();
I != E; ++I)
OS << I->first << "\n";
OS << "#endif\n\n";
}