2007-08-21 02:05:56 +08:00
|
|
|
//===---- CGBuiltin.cpp - Emit LLVM Code for builtins ---------------------===//
|
|
|
|
//
|
|
|
|
// The LLVM Compiler Infrastructure
|
|
|
|
//
|
2007-12-30 03:59:25 +08:00
|
|
|
// This file is distributed under the University of Illinois Open Source
|
|
|
|
// License. See LICENSE.TXT for details.
|
2007-08-21 02:05:56 +08:00
|
|
|
//
|
|
|
|
//===----------------------------------------------------------------------===//
|
|
|
|
//
|
|
|
|
// This contains code to emit Builtin calls as LLVM code.
|
|
|
|
//
|
|
|
|
//===----------------------------------------------------------------------===//
|
|
|
|
|
|
|
|
#include "CodeGenFunction.h"
|
2015-03-14 02:26:17 +08:00
|
|
|
#include "CGCXXABI.h"
|
2010-06-16 06:44:06 +08:00
|
|
|
#include "CGObjCRuntime.h"
|
2012-12-04 17:13:33 +08:00
|
|
|
#include "CodeGenModule.h"
|
|
|
|
#include "TargetInfo.h"
|
2007-08-31 12:31:45 +08:00
|
|
|
#include "clang/AST/ASTContext.h"
|
2008-08-11 13:35:13 +08:00
|
|
|
#include "clang/AST/Decl.h"
|
2009-06-14 09:05:48 +08:00
|
|
|
#include "clang/Basic/TargetBuiltins.h"
|
2012-12-04 17:13:33 +08:00
|
|
|
#include "clang/Basic/TargetInfo.h"
|
2013-10-31 05:53:58 +08:00
|
|
|
#include "clang/CodeGen/CGFunctionInfo.h"
|
2014-12-18 01:52:30 +08:00
|
|
|
#include "llvm/ADT/StringExtras.h"
|
2015-01-29 17:29:21 +08:00
|
|
|
#include "llvm/IR/CallSite.h"
|
2013-01-02 19:45:17 +08:00
|
|
|
#include "llvm/IR/DataLayout.h"
|
2014-12-18 01:52:30 +08:00
|
|
|
#include "llvm/IR/InlineAsm.h"
|
2013-01-02 19:45:17 +08:00
|
|
|
#include "llvm/IR/Intrinsics.h"
|
2015-03-26 03:41:41 +08:00
|
|
|
#include <sstream>
|
2011-07-09 06:45:14 +08:00
|
|
|
|
2007-08-21 02:05:56 +08:00
|
|
|
using namespace clang;
|
|
|
|
using namespace CodeGen;
|
2007-12-10 05:20:04 +08:00
|
|
|
using namespace llvm;
|
|
|
|
|
2011-09-14 07:05:03 +08:00
|
|
|
/// getBuiltinLibFunction - Given a builtin id for a function like
|
|
|
|
/// "__builtin_fabsf", return a Function* for "fabsf".
|
|
|
|
llvm::Value *CodeGenModule::getBuiltinLibFunction(const FunctionDecl *FD,
|
|
|
|
unsigned BuiltinID) {
|
|
|
|
assert(Context.BuiltinInfo.isLibFunction(BuiltinID));
|
|
|
|
|
|
|
|
// Get the name, skip over the __builtin_ prefix (if necessary).
|
|
|
|
StringRef Name;
|
|
|
|
GlobalDecl D(FD);
|
|
|
|
|
|
|
|
// If the builtin has been declared explicitly with an assembler label,
|
|
|
|
// use the mangled name. This differs from the plain label on platforms
|
|
|
|
// that prefix labels.
|
|
|
|
if (FD->hasAttr<AsmLabelAttr>())
|
|
|
|
Name = getMangledName(D);
|
|
|
|
else
|
2015-08-06 09:01:12 +08:00
|
|
|
Name = Context.BuiltinInfo.getName(BuiltinID) + 10;
|
2011-09-14 07:05:03 +08:00
|
|
|
|
|
|
|
llvm::FunctionType *Ty =
|
|
|
|
cast<llvm::FunctionType>(getTypes().ConvertType(FD->getType()));
|
|
|
|
|
|
|
|
return GetOrCreateLLVMFunction(Name, Ty, D, /*ForVTable=*/false);
|
|
|
|
}
|
|
|
|
|
2010-10-28 04:58:56 +08:00
|
|
|
/// Emit the conversions required to turn the given value into an
|
|
|
|
/// integer of the given size.
|
|
|
|
static Value *EmitToInt(CodeGenFunction &CGF, llvm::Value *V,
|
2011-07-18 12:24:23 +08:00
|
|
|
QualType T, llvm::IntegerType *IntType) {
|
2010-10-28 04:58:56 +08:00
|
|
|
V = CGF.EmitToMemory(V, T);
|
|
|
|
|
|
|
|
if (V->getType()->isPointerTy())
|
|
|
|
return CGF.Builder.CreatePtrToInt(V, IntType);
|
|
|
|
|
|
|
|
assert(V->getType() == IntType);
|
|
|
|
return V;
|
2010-07-18 15:23:17 +08:00
|
|
|
}
|
|
|
|
|
2010-10-28 04:58:56 +08:00
|
|
|
static Value *EmitFromInt(CodeGenFunction &CGF, llvm::Value *V,
|
2011-07-18 12:24:23 +08:00
|
|
|
QualType T, llvm::Type *ResultType) {
|
2010-10-28 04:58:56 +08:00
|
|
|
V = CGF.EmitFromMemory(V, T);
|
|
|
|
|
|
|
|
if (ResultType->isPointerTy())
|
|
|
|
return CGF.Builder.CreateIntToPtr(V, ResultType);
|
|
|
|
|
|
|
|
assert(V->getType() == ResultType);
|
|
|
|
return V;
|
2010-07-18 15:23:17 +08:00
|
|
|
}
|
|
|
|
|
2009-04-07 08:55:51 +08:00
|
|
|
/// Utility to insert an atomic instruction based on Instrinsic::ID
|
|
|
|
/// and the expression node.
|
2015-06-26 02:29:42 +08:00
|
|
|
static Value *MakeBinaryAtomicValue(CodeGenFunction &CGF,
|
|
|
|
llvm::AtomicRMWInst::BinOp Kind,
|
|
|
|
const CallExpr *E) {
|
2010-10-28 04:58:56 +08:00
|
|
|
QualType T = E->getType();
|
|
|
|
assert(E->getArg(0)->getType()->isPointerType());
|
|
|
|
assert(CGF.getContext().hasSameUnqualifiedType(T,
|
|
|
|
E->getArg(0)->getType()->getPointeeType()));
|
|
|
|
assert(CGF.getContext().hasSameUnqualifiedType(T, E->getArg(1)->getType()));
|
|
|
|
|
2010-09-22 07:40:48 +08:00
|
|
|
llvm::Value *DestPtr = CGF.EmitScalarExpr(E->getArg(0));
|
2012-10-25 23:39:14 +08:00
|
|
|
unsigned AddrSpace = DestPtr->getType()->getPointerAddressSpace();
|
2010-10-28 04:58:56 +08:00
|
|
|
|
2011-07-10 01:41:47 +08:00
|
|
|
llvm::IntegerType *IntType =
|
2010-07-18 15:23:17 +08:00
|
|
|
llvm::IntegerType::get(CGF.getLLVMContext(),
|
2010-10-28 04:58:56 +08:00
|
|
|
CGF.getContext().getTypeSize(T));
|
2011-07-10 01:41:47 +08:00
|
|
|
llvm::Type *IntPtrType = IntType->getPointerTo(AddrSpace);
|
2010-10-28 04:58:56 +08:00
|
|
|
|
|
|
|
llvm::Value *Args[2];
|
|
|
|
Args[0] = CGF.Builder.CreateBitCast(DestPtr, IntPtrType);
|
|
|
|
Args[1] = CGF.EmitScalarExpr(E->getArg(1));
|
2011-07-18 12:24:23 +08:00
|
|
|
llvm::Type *ValueType = Args[1]->getType();
|
2010-10-28 04:58:56 +08:00
|
|
|
Args[1] = EmitToInt(CGF, Args[1], T, IntType);
|
|
|
|
|
2011-09-07 09:41:24 +08:00
|
|
|
llvm::Value *Result =
|
|
|
|
CGF.Builder.CreateAtomicRMW(Kind, Args[0], Args[1],
|
|
|
|
llvm::SequentiallyConsistent);
|
2015-06-26 02:29:42 +08:00
|
|
|
return EmitFromInt(CGF, Result, T, ValueType);
|
|
|
|
}
|
|
|
|
|
2015-09-09 07:52:33 +08:00
|
|
|
static Value *EmitNontemporalStore(CodeGenFunction &CGF, const CallExpr *E) {
|
|
|
|
Value *Val = CGF.EmitScalarExpr(E->getArg(0));
|
|
|
|
Value *Address = CGF.EmitScalarExpr(E->getArg(1));
|
|
|
|
|
|
|
|
// Convert the type of the pointer to a pointer to the stored type.
|
|
|
|
Val = CGF.EmitToMemory(Val, E->getArg(0)->getType());
|
|
|
|
Value *BC = CGF.Builder.CreateBitCast(
|
|
|
|
Address, llvm::PointerType::getUnqual(Val->getType()), "cast");
|
|
|
|
LValue LV = CGF.MakeNaturalAlignAddrLValue(BC, E->getArg(0)->getType());
|
|
|
|
LV.setNontemporal(true);
|
|
|
|
CGF.EmitStoreOfScalar(Val, LV, false);
|
|
|
|
return nullptr;
|
|
|
|
}
|
|
|
|
|
|
|
|
static Value *EmitNontemporalLoad(CodeGenFunction &CGF, const CallExpr *E) {
|
|
|
|
Value *Address = CGF.EmitScalarExpr(E->getArg(0));
|
|
|
|
|
|
|
|
LValue LV = CGF.MakeNaturalAlignAddrLValue(Address, E->getType());
|
|
|
|
LV.setNontemporal(true);
|
|
|
|
return CGF.EmitLoadOfScalar(LV, E->getExprLoc());
|
|
|
|
}
|
|
|
|
|
2015-06-26 02:29:42 +08:00
|
|
|
static RValue EmitBinaryAtomic(CodeGenFunction &CGF,
|
|
|
|
llvm::AtomicRMWInst::BinOp Kind,
|
|
|
|
const CallExpr *E) {
|
|
|
|
return RValue::get(MakeBinaryAtomicValue(CGF, Kind, E));
|
2009-04-07 08:55:51 +08:00
|
|
|
}
|
|
|
|
|
|
|
|
/// Utility to insert an atomic instruction based Instrinsic::ID and
|
2010-10-28 04:58:56 +08:00
|
|
|
/// the expression node, where the return value is the result of the
|
|
|
|
/// operation.
|
2010-05-06 13:35:16 +08:00
|
|
|
static RValue EmitBinaryAtomicPost(CodeGenFunction &CGF,
|
2011-09-07 09:41:24 +08:00
|
|
|
llvm::AtomicRMWInst::BinOp Kind,
|
|
|
|
const CallExpr *E,
|
2014-10-03 04:53:50 +08:00
|
|
|
Instruction::BinaryOps Op,
|
|
|
|
bool Invert = false) {
|
2010-10-28 04:58:56 +08:00
|
|
|
QualType T = E->getType();
|
|
|
|
assert(E->getArg(0)->getType()->isPointerType());
|
|
|
|
assert(CGF.getContext().hasSameUnqualifiedType(T,
|
|
|
|
E->getArg(0)->getType()->getPointeeType()));
|
|
|
|
assert(CGF.getContext().hasSameUnqualifiedType(T, E->getArg(1)->getType()));
|
|
|
|
|
2010-09-22 07:40:48 +08:00
|
|
|
llvm::Value *DestPtr = CGF.EmitScalarExpr(E->getArg(0));
|
2012-10-25 23:39:14 +08:00
|
|
|
unsigned AddrSpace = DestPtr->getType()->getPointerAddressSpace();
|
2010-10-28 04:58:56 +08:00
|
|
|
|
2011-07-10 01:41:47 +08:00
|
|
|
llvm::IntegerType *IntType =
|
2010-07-18 15:23:17 +08:00
|
|
|
llvm::IntegerType::get(CGF.getLLVMContext(),
|
2010-10-28 04:58:56 +08:00
|
|
|
CGF.getContext().getTypeSize(T));
|
2011-07-10 01:41:47 +08:00
|
|
|
llvm::Type *IntPtrType = IntType->getPointerTo(AddrSpace);
|
2010-10-28 04:58:56 +08:00
|
|
|
|
|
|
|
llvm::Value *Args[2];
|
|
|
|
Args[1] = CGF.EmitScalarExpr(E->getArg(1));
|
2011-07-18 12:24:23 +08:00
|
|
|
llvm::Type *ValueType = Args[1]->getType();
|
2010-10-28 04:58:56 +08:00
|
|
|
Args[1] = EmitToInt(CGF, Args[1], T, IntType);
|
|
|
|
Args[0] = CGF.Builder.CreateBitCast(DestPtr, IntPtrType);
|
|
|
|
|
2011-09-07 09:41:24 +08:00
|
|
|
llvm::Value *Result =
|
|
|
|
CGF.Builder.CreateAtomicRMW(Kind, Args[0], Args[1],
|
|
|
|
llvm::SequentiallyConsistent);
|
2010-10-28 04:58:56 +08:00
|
|
|
Result = CGF.Builder.CreateBinOp(Op, Result, Args[1]);
|
2014-10-03 04:53:50 +08:00
|
|
|
if (Invert)
|
|
|
|
Result = CGF.Builder.CreateBinOp(llvm::Instruction::Xor, Result,
|
|
|
|
llvm::ConstantInt::get(IntType, -1));
|
2010-10-28 04:58:56 +08:00
|
|
|
Result = EmitFromInt(CGF, Result, T, ValueType);
|
|
|
|
return RValue::get(Result);
|
2008-05-10 06:40:52 +08:00
|
|
|
}
|
|
|
|
|
2015-06-26 02:29:42 +08:00
|
|
|
/// @brief Utility to insert an atomic cmpxchg instruction.
|
|
|
|
///
|
|
|
|
/// @param CGF The current codegen function.
|
|
|
|
/// @param E Builtin call expression to convert to cmpxchg.
|
|
|
|
/// arg0 - address to operate on
|
|
|
|
/// arg1 - value to compare with
|
|
|
|
/// arg2 - new value
|
|
|
|
/// @param ReturnBool Specifies whether to return success flag of
|
|
|
|
/// cmpxchg result or the old value.
|
|
|
|
///
|
|
|
|
/// @returns result of cmpxchg, according to ReturnBool
|
|
|
|
static Value *MakeAtomicCmpXchgValue(CodeGenFunction &CGF, const CallExpr *E,
|
|
|
|
bool ReturnBool) {
|
|
|
|
QualType T = ReturnBool ? E->getArg(1)->getType() : E->getType();
|
|
|
|
llvm::Value *DestPtr = CGF.EmitScalarExpr(E->getArg(0));
|
|
|
|
unsigned AddrSpace = DestPtr->getType()->getPointerAddressSpace();
|
|
|
|
|
|
|
|
llvm::IntegerType *IntType = llvm::IntegerType::get(
|
|
|
|
CGF.getLLVMContext(), CGF.getContext().getTypeSize(T));
|
|
|
|
llvm::Type *IntPtrType = IntType->getPointerTo(AddrSpace);
|
|
|
|
|
|
|
|
Value *Args[3];
|
|
|
|
Args[0] = CGF.Builder.CreateBitCast(DestPtr, IntPtrType);
|
|
|
|
Args[1] = CGF.EmitScalarExpr(E->getArg(1));
|
|
|
|
llvm::Type *ValueType = Args[1]->getType();
|
|
|
|
Args[1] = EmitToInt(CGF, Args[1], T, IntType);
|
|
|
|
Args[2] = EmitToInt(CGF, CGF.EmitScalarExpr(E->getArg(2)), T, IntType);
|
|
|
|
|
|
|
|
Value *Pair = CGF.Builder.CreateAtomicCmpXchg(Args[0], Args[1], Args[2],
|
|
|
|
llvm::SequentiallyConsistent,
|
|
|
|
llvm::SequentiallyConsistent);
|
|
|
|
if (ReturnBool)
|
|
|
|
// Extract boolean success flag and zext it to int.
|
|
|
|
return CGF.Builder.CreateZExt(CGF.Builder.CreateExtractValue(Pair, 1),
|
|
|
|
CGF.ConvertType(E->getType()));
|
|
|
|
else
|
|
|
|
// Extract old value and emit it using the same type as compare value.
|
|
|
|
return EmitFromInt(CGF, CGF.Builder.CreateExtractValue(Pair, 0), T,
|
|
|
|
ValueType);
|
|
|
|
}
|
|
|
|
|
2014-09-03 23:24:29 +08:00
|
|
|
/// EmitFAbs - Emit a call to @llvm.fabs().
|
2014-11-04 07:51:40 +08:00
|
|
|
static Value *EmitFAbs(CodeGenFunction &CGF, Value *V) {
|
2014-09-03 23:24:29 +08:00
|
|
|
Value *F = CGF.CGM.getIntrinsic(Intrinsic::fabs, V->getType());
|
|
|
|
llvm::CallInst *Call = CGF.Builder.CreateCall(F, V);
|
|
|
|
Call->setDoesNotAccessMemory();
|
|
|
|
return Call;
|
2010-05-06 13:35:16 +08:00
|
|
|
}
|
|
|
|
|
[Modules] Implement __builtin_isinf_sign in Clang.
Somehow, we never managed to implement this fully. We could constant
fold it like crazy, including constant folding complex arguments, etc.
But if you actually needed to generate code for it, error.
I've implemented it using the somewhat obvious lowering. Happy for
suggestions on a more clever way to lower this.
Now, what you might ask does this have to do with modules? Fun story. So
it turns out that libstdc++ actually uses __builtin_isinf_sign to
implement std::isinf when in C++98 mode, but only inside of a template.
So if we're lucky, and we never instantiate that, everything is good.
But once we try to instantiate that template function, we need this
builtin. All of my customers at least are using C++11 and so they never
hit this code path.
But what does that have to do with modules? Fun story. So it turns out
that with modules we actually observe a bunch of bugs in libstdc++ where
their <cmath> header clobbers things exposed by <math.h>. To fix these,
we have to provide global function definitions to replace the macros
that C99 would have used. And it turns out that ::isinf needs to be
implemented using the exact semantics used by the C++98 variant of
std::isinf. And so I started to fix this bug in libstdc++ and ceased to
be able to compile libstdc++ with Clang.
The yaks are legion.
llvm-svn: 232778
2015-03-20 06:39:51 +08:00
|
|
|
/// Emit the computation of the sign bit for a floating point value. Returns
|
|
|
|
/// the i1 sign bit value.
|
|
|
|
static Value *EmitSignBit(CodeGenFunction &CGF, Value *V) {
|
|
|
|
LLVMContext &C = CGF.CGM.getLLVMContext();
|
|
|
|
|
|
|
|
llvm::Type *Ty = V->getType();
|
|
|
|
int Width = Ty->getPrimitiveSizeInBits();
|
|
|
|
llvm::Type *IntTy = llvm::IntegerType::get(C, Width);
|
|
|
|
V = CGF.Builder.CreateBitCast(V, IntTy);
|
|
|
|
if (Ty->isPPC_FP128Ty()) {
|
|
|
|
// The higher-order double comes first, and so we need to truncate the
|
|
|
|
// pair to extract the overall sign. The order of the pair is the same
|
|
|
|
// in both little- and big-Endian modes.
|
|
|
|
Width >>= 1;
|
|
|
|
IntTy = llvm::IntegerType::get(C, Width);
|
|
|
|
V = CGF.Builder.CreateTrunc(V, IntTy);
|
|
|
|
}
|
|
|
|
Value *Zero = llvm::Constant::getNullValue(IntTy);
|
|
|
|
return CGF.Builder.CreateICmpSLT(V, Zero);
|
|
|
|
}
|
|
|
|
|
2011-09-14 07:05:03 +08:00
|
|
|
static RValue emitLibraryCall(CodeGenFunction &CGF, const FunctionDecl *Fn,
|
|
|
|
const CallExpr *E, llvm::Value *calleeValue) {
|
2014-08-22 04:26:47 +08:00
|
|
|
return CGF.EmitCall(E->getCallee()->getType(), calleeValue, E,
|
|
|
|
ReturnValueSlot(), Fn);
|
2011-09-14 07:05:03 +08:00
|
|
|
}
|
|
|
|
|
2013-01-13 10:22:39 +08:00
|
|
|
/// \brief Emit a call to llvm.{sadd,uadd,ssub,usub,smul,umul}.with.overflow.*
|
|
|
|
/// depending on IntrinsicID.
|
|
|
|
///
|
|
|
|
/// \arg CGF The current codegen function.
|
|
|
|
/// \arg IntrinsicID The ID for the Intrinsic we wish to generate.
|
|
|
|
/// \arg X The first argument to the llvm.*.with.overflow.*.
|
|
|
|
/// \arg Y The second argument to the llvm.*.with.overflow.*.
|
|
|
|
/// \arg Carry The carry returned by the llvm.*.with.overflow.*.
|
|
|
|
/// \returns The result (i.e. sum/product) returned by the intrinsic.
|
|
|
|
static llvm::Value *EmitOverflowIntrinsic(CodeGenFunction &CGF,
|
|
|
|
const llvm::Intrinsic::ID IntrinsicID,
|
|
|
|
llvm::Value *X, llvm::Value *Y,
|
|
|
|
llvm::Value *&Carry) {
|
|
|
|
// Make sure we have integers of the same width.
|
|
|
|
assert(X->getType() == Y->getType() &&
|
|
|
|
"Arguments must be the same type. (Did you forget to make sure both "
|
|
|
|
"arguments have the same integer width?)");
|
|
|
|
|
2013-01-13 19:26:44 +08:00
|
|
|
llvm::Value *Callee = CGF.CGM.getIntrinsic(IntrinsicID, X->getType());
|
2015-05-19 06:14:03 +08:00
|
|
|
llvm::Value *Tmp = CGF.Builder.CreateCall(Callee, {X, Y});
|
2013-01-13 10:22:39 +08:00
|
|
|
Carry = CGF.Builder.CreateExtractValue(Tmp, 1);
|
|
|
|
return CGF.Builder.CreateExtractValue(Tmp, 0);
|
|
|
|
}
|
|
|
|
|
2015-09-18 04:55:33 +08:00
|
|
|
Value *CodeGenFunction::EmitVAStartEnd(Value *ArgValue, bool IsStart) {
|
|
|
|
llvm::Type *DestType = Int8PtrTy;
|
|
|
|
if (ArgValue->getType() != DestType)
|
|
|
|
ArgValue =
|
|
|
|
Builder.CreateBitCast(ArgValue, DestType, ArgValue->getName().data());
|
|
|
|
|
|
|
|
Intrinsic::ID inst = IsStart ? Intrinsic::vastart : Intrinsic::vaend;
|
|
|
|
return Builder.CreateCall(CGM.getIntrinsic(inst), ArgValue);
|
|
|
|
}
|
|
|
|
|
2009-09-09 23:08:12 +08:00
|
|
|
RValue CodeGenFunction::EmitBuiltinExpr(const FunctionDecl *FD,
|
2014-12-13 07:41:25 +08:00
|
|
|
unsigned BuiltinID, const CallExpr *E,
|
|
|
|
ReturnValueSlot ReturnValue) {
|
2008-10-06 14:56:41 +08:00
|
|
|
// See if we can constant fold this builtin. If so, don't emit it at all.
|
2008-12-01 10:31:41 +08:00
|
|
|
Expr::EvalResult Result;
|
2012-01-07 04:03:09 +08:00
|
|
|
if (E->EvaluateAsRValue(Result, CGM.getContext()) &&
|
2011-04-26 07:10:07 +08:00
|
|
|
!Result.hasSideEffects()) {
|
2008-12-01 10:31:41 +08:00
|
|
|
if (Result.Val.isInt())
|
2011-02-08 16:22:06 +08:00
|
|
|
return RValue::get(llvm::ConstantInt::get(getLLVMContext(),
|
2009-07-25 07:12:58 +08:00
|
|
|
Result.Val.getInt()));
|
2010-10-02 07:43:16 +08:00
|
|
|
if (Result.Val.isFloat())
|
2011-02-08 16:22:06 +08:00
|
|
|
return RValue::get(llvm::ConstantFP::get(getLLVMContext(),
|
|
|
|
Result.Val.getFloat()));
|
2008-10-06 14:09:18 +08:00
|
|
|
}
|
2009-09-09 23:08:12 +08:00
|
|
|
|
2008-10-06 14:56:41 +08:00
|
|
|
switch (BuiltinID) {
|
|
|
|
default: break; // Handle intrinsics and libm functions below.
|
2008-10-06 15:26:43 +08:00
|
|
|
case Builtin::BI__builtin___CFStringMakeConstantString:
|
2010-01-23 10:40:42 +08:00
|
|
|
case Builtin::BI__builtin___NSStringMakeConstantString:
|
2014-05-21 13:09:00 +08:00
|
|
|
return RValue::get(CGM.EmitConstantExpr(E, E->getType(), nullptr));
|
2008-07-10 01:28:44 +08:00
|
|
|
case Builtin::BI__builtin_stdarg_start:
|
2007-10-13 07:56:29 +08:00
|
|
|
case Builtin::BI__builtin_va_start:
|
2014-03-26 23:38:33 +08:00
|
|
|
case Builtin::BI__va_start:
|
2015-09-18 04:55:33 +08:00
|
|
|
case Builtin::BI__builtin_va_end:
|
|
|
|
return RValue::get(
|
|
|
|
EmitVAStartEnd(BuiltinID == Builtin::BI__va_start
|
|
|
|
? EmitScalarExpr(E->getArg(0))
|
|
|
|
: EmitVAListRef(E->getArg(0)).getPointer(),
|
|
|
|
BuiltinID != Builtin::BI__builtin_va_end));
|
2008-02-10 04:26:43 +08:00
|
|
|
case Builtin::BI__builtin_va_copy: {
|
Compute and preserve alignment more faithfully in IR-generation.
Introduce an Address type to bundle a pointer value with an
alignment. Introduce APIs on CGBuilderTy to work with Address
values. Change core APIs on CGF/CGM to traffic in Address where
appropriate. Require alignments to be non-zero. Update a ton
of code to compute and propagate alignment information.
As part of this, I've promoted CGBuiltin's EmitPointerWithAlignment
helper function to CGF and made use of it in a number of places in
the expression emitter.
The end result is that we should now be significantly more correct
when performing operations on objects that are locally known to
be under-aligned. Since alignment is not reliably tracked in the
type system, there are inherent limits to this, but at least we
are no longer confused by standard operations like derived-to-base
conversions and array-to-pointer decay. I've also fixed a large
number of bugs where we were applying the complete-object alignment
to a pointer instead of the non-virtual alignment, although most of
these were hidden by the very conservative approach we took with
member alignment.
Also, because IRGen now reliably asserts on zero alignments, we
should no longer be subject to an absurd but frustrating recurring
bug where an incomplete type would report a zero alignment and then
we'd naively do a alignmentAtOffset on it and emit code using an
alignment equal to the largest power-of-two factor of the offset.
We should also now be emitting much more aggressive alignment
attributes in the presence of over-alignment. In particular,
field access now uses alignmentAtOffset instead of min.
Several times in this patch, I had to change the existing
code-generation pattern in order to more effectively use
the Address APIs. For the most part, this seems to be a strict
improvement, like doing pointer arithmetic with GEPs instead of
ptrtoint. That said, I've tried very hard to not change semantics,
but it is likely that I've failed in a few places, for which I
apologize.
ABIArgInfo now always carries the assumed alignment of indirect and
indirect byval arguments. In order to cut down on what was already
a dauntingly large patch, I changed the code to never set align
attributes in the IR on non-byval indirect arguments. That is,
we still generate code which assumes that indirect arguments have
the given alignment, but we don't express this information to the
backend except where it's semantically required (i.e. on byvals).
This is likely a minor regression for those targets that did provide
this information, but it'll be trivial to add it back in a later
patch.
I partially punted on applying this work to CGBuiltin. Please
do not add more uses of the CreateDefaultAligned{Load,Store}
APIs; they will be going away eventually.
llvm-svn: 246985
2015-09-08 16:05:57 +08:00
|
|
|
Value *DstPtr = EmitVAListRef(E->getArg(0)).getPointer();
|
|
|
|
Value *SrcPtr = EmitVAListRef(E->getArg(1)).getPointer();
|
2008-02-10 04:26:43 +08:00
|
|
|
|
2011-07-18 12:24:23 +08:00
|
|
|
llvm::Type *Type = Int8PtrTy;
|
2008-02-10 04:26:43 +08:00
|
|
|
|
|
|
|
DstPtr = Builder.CreateBitCast(DstPtr, Type);
|
|
|
|
SrcPtr = Builder.CreateBitCast(SrcPtr, Type);
|
2015-05-19 06:14:03 +08:00
|
|
|
return RValue::get(Builder.CreateCall(CGM.getIntrinsic(Intrinsic::vacopy),
|
|
|
|
{DstPtr, SrcPtr}));
|
2008-02-10 04:26:43 +08:00
|
|
|
}
|
2012-09-21 08:18:27 +08:00
|
|
|
case Builtin::BI__builtin_abs:
|
2012-01-18 06:11:30 +08:00
|
|
|
case Builtin::BI__builtin_labs:
|
|
|
|
case Builtin::BI__builtin_llabs: {
|
2009-09-09 23:08:12 +08:00
|
|
|
Value *ArgValue = EmitScalarExpr(E->getArg(0));
|
|
|
|
|
2008-07-23 14:53:34 +08:00
|
|
|
Value *NegOp = Builder.CreateNeg(ArgValue, "neg");
|
2009-09-09 23:08:12 +08:00
|
|
|
Value *CmpResult =
|
|
|
|
Builder.CreateICmpSGE(ArgValue,
|
2009-08-01 04:28:54 +08:00
|
|
|
llvm::Constant::getNullValue(ArgValue->getType()),
|
2008-07-23 14:53:34 +08:00
|
|
|
"abscond");
|
2009-09-09 23:08:12 +08:00
|
|
|
Value *Result =
|
2007-11-21 03:05:17 +08:00
|
|
|
Builder.CreateSelect(CmpResult, ArgValue, NegOp, "abs");
|
2009-09-09 23:08:12 +08:00
|
|
|
|
2007-11-21 03:05:17 +08:00
|
|
|
return RValue::get(Result);
|
|
|
|
}
|
2014-11-04 07:52:09 +08:00
|
|
|
case Builtin::BI__builtin_fabs:
|
|
|
|
case Builtin::BI__builtin_fabsf:
|
|
|
|
case Builtin::BI__builtin_fabsl: {
|
|
|
|
Value *Arg1 = EmitScalarExpr(E->getArg(0));
|
|
|
|
Value *Result = EmitFAbs(*this, Arg1);
|
|
|
|
return RValue::get(Result);
|
|
|
|
}
|
2014-09-26 09:19:41 +08:00
|
|
|
case Builtin::BI__builtin_fmod:
|
|
|
|
case Builtin::BI__builtin_fmodf:
|
|
|
|
case Builtin::BI__builtin_fmodl: {
|
|
|
|
Value *Arg1 = EmitScalarExpr(E->getArg(0));
|
|
|
|
Value *Arg2 = EmitScalarExpr(E->getArg(1));
|
|
|
|
Value *Result = Builder.CreateFRem(Arg1, Arg2, "fmod");
|
|
|
|
return RValue::get(Result);
|
|
|
|
}
|
2012-09-21 08:18:27 +08:00
|
|
|
|
2012-08-15 04:09:28 +08:00
|
|
|
case Builtin::BI__builtin_conj:
|
|
|
|
case Builtin::BI__builtin_conjf:
|
|
|
|
case Builtin::BI__builtin_conjl: {
|
|
|
|
ComplexPairTy ComplexVal = EmitComplexExpr(E->getArg(0));
|
|
|
|
Value *Real = ComplexVal.first;
|
|
|
|
Value *Imag = ComplexVal.second;
|
2012-09-21 08:18:27 +08:00
|
|
|
Value *Zero =
|
|
|
|
Imag->getType()->isFPOrFPVectorTy()
|
2012-08-15 04:09:28 +08:00
|
|
|
? llvm::ConstantFP::getZeroValueForNegation(Imag->getType())
|
|
|
|
: llvm::Constant::getNullValue(Imag->getType());
|
2012-09-21 08:18:27 +08:00
|
|
|
|
2012-08-15 04:09:28 +08:00
|
|
|
Imag = Builder.CreateFSub(Zero, Imag, "sub");
|
|
|
|
return RValue::getComplex(std::make_pair(Real, Imag));
|
|
|
|
}
|
|
|
|
case Builtin::BI__builtin_creal:
|
|
|
|
case Builtin::BI__builtin_crealf:
|
2012-12-19 04:58:04 +08:00
|
|
|
case Builtin::BI__builtin_creall:
|
|
|
|
case Builtin::BIcreal:
|
|
|
|
case Builtin::BIcrealf:
|
|
|
|
case Builtin::BIcreall: {
|
2012-08-15 04:09:28 +08:00
|
|
|
ComplexPairTy ComplexVal = EmitComplexExpr(E->getArg(0));
|
|
|
|
return RValue::get(ComplexVal.first);
|
|
|
|
}
|
2012-09-21 08:18:27 +08:00
|
|
|
|
2012-08-15 04:09:28 +08:00
|
|
|
case Builtin::BI__builtin_cimag:
|
|
|
|
case Builtin::BI__builtin_cimagf:
|
2012-12-19 04:58:04 +08:00
|
|
|
case Builtin::BI__builtin_cimagl:
|
|
|
|
case Builtin::BIcimag:
|
|
|
|
case Builtin::BIcimagf:
|
|
|
|
case Builtin::BIcimagl: {
|
2012-08-15 04:09:28 +08:00
|
|
|
ComplexPairTy ComplexVal = EmitComplexExpr(E->getArg(0));
|
|
|
|
return RValue::get(ComplexVal.second);
|
|
|
|
}
|
2012-09-21 08:18:27 +08:00
|
|
|
|
2012-01-29 02:42:57 +08:00
|
|
|
case Builtin::BI__builtin_ctzs:
|
2008-02-06 15:19:27 +08:00
|
|
|
case Builtin::BI__builtin_ctz:
|
|
|
|
case Builtin::BI__builtin_ctzl:
|
|
|
|
case Builtin::BI__builtin_ctzll: {
|
|
|
|
Value *ArgValue = EmitScalarExpr(E->getArg(0));
|
2009-09-09 23:08:12 +08:00
|
|
|
|
2011-07-10 01:41:47 +08:00
|
|
|
llvm::Type *ArgType = ArgValue->getType();
|
2011-07-15 01:45:50 +08:00
|
|
|
Value *F = CGM.getIntrinsic(Intrinsic::cttz, ArgType);
|
2008-02-06 15:19:27 +08:00
|
|
|
|
2011-07-18 12:24:23 +08:00
|
|
|
llvm::Type *ResultType = ConvertType(E->getType());
|
2013-04-17 06:48:15 +08:00
|
|
|
Value *ZeroUndef = Builder.getInt1(getTarget().isCLZForZeroUndef());
|
2015-05-19 06:14:03 +08:00
|
|
|
Value *Result = Builder.CreateCall(F, {ArgValue, ZeroUndef});
|
2008-02-06 15:19:27 +08:00
|
|
|
if (Result->getType() != ResultType)
|
2009-11-16 21:11:21 +08:00
|
|
|
Result = Builder.CreateIntCast(Result, ResultType, /*isSigned*/true,
|
|
|
|
"cast");
|
2008-02-06 15:19:27 +08:00
|
|
|
return RValue::get(Result);
|
|
|
|
}
|
2012-01-29 02:42:57 +08:00
|
|
|
case Builtin::BI__builtin_clzs:
|
2008-05-27 23:32:46 +08:00
|
|
|
case Builtin::BI__builtin_clz:
|
|
|
|
case Builtin::BI__builtin_clzl:
|
|
|
|
case Builtin::BI__builtin_clzll: {
|
|
|
|
Value *ArgValue = EmitScalarExpr(E->getArg(0));
|
2009-09-09 23:08:12 +08:00
|
|
|
|
2011-07-10 01:41:47 +08:00
|
|
|
llvm::Type *ArgType = ArgValue->getType();
|
2011-07-15 01:45:50 +08:00
|
|
|
Value *F = CGM.getIntrinsic(Intrinsic::ctlz, ArgType);
|
2008-05-27 23:32:46 +08:00
|
|
|
|
2011-07-18 12:24:23 +08:00
|
|
|
llvm::Type *ResultType = ConvertType(E->getType());
|
2013-04-17 06:48:15 +08:00
|
|
|
Value *ZeroUndef = Builder.getInt1(getTarget().isCLZForZeroUndef());
|
2015-05-19 06:14:03 +08:00
|
|
|
Value *Result = Builder.CreateCall(F, {ArgValue, ZeroUndef});
|
2008-05-27 23:32:46 +08:00
|
|
|
if (Result->getType() != ResultType)
|
2009-11-16 21:11:21 +08:00
|
|
|
Result = Builder.CreateIntCast(Result, ResultType, /*isSigned*/true,
|
|
|
|
"cast");
|
2008-05-27 23:32:46 +08:00
|
|
|
return RValue::get(Result);
|
|
|
|
}
|
2008-07-22 01:19:41 +08:00
|
|
|
case Builtin::BI__builtin_ffs:
|
|
|
|
case Builtin::BI__builtin_ffsl:
|
|
|
|
case Builtin::BI__builtin_ffsll: {
|
|
|
|
// ffs(x) -> x ? cttz(x) + 1 : 0
|
|
|
|
Value *ArgValue = EmitScalarExpr(E->getArg(0));
|
2009-09-09 23:08:12 +08:00
|
|
|
|
2011-07-10 01:41:47 +08:00
|
|
|
llvm::Type *ArgType = ArgValue->getType();
|
2011-07-15 01:45:50 +08:00
|
|
|
Value *F = CGM.getIntrinsic(Intrinsic::cttz, ArgType);
|
2009-09-09 23:08:12 +08:00
|
|
|
|
2011-07-18 12:24:23 +08:00
|
|
|
llvm::Type *ResultType = ConvertType(E->getType());
|
2015-05-19 06:14:03 +08:00
|
|
|
Value *Tmp =
|
|
|
|
Builder.CreateAdd(Builder.CreateCall(F, {ArgValue, Builder.getTrue()}),
|
|
|
|
llvm::ConstantInt::get(ArgType, 1));
|
2009-08-01 04:28:54 +08:00
|
|
|
Value *Zero = llvm::Constant::getNullValue(ArgType);
|
2008-07-22 01:19:41 +08:00
|
|
|
Value *IsZero = Builder.CreateICmpEQ(ArgValue, Zero, "iszero");
|
|
|
|
Value *Result = Builder.CreateSelect(IsZero, Zero, Tmp, "ffs");
|
|
|
|
if (Result->getType() != ResultType)
|
2009-11-16 21:11:21 +08:00
|
|
|
Result = Builder.CreateIntCast(Result, ResultType, /*isSigned*/true,
|
|
|
|
"cast");
|
2008-07-22 01:19:41 +08:00
|
|
|
return RValue::get(Result);
|
|
|
|
}
|
|
|
|
case Builtin::BI__builtin_parity:
|
|
|
|
case Builtin::BI__builtin_parityl:
|
|
|
|
case Builtin::BI__builtin_parityll: {
|
|
|
|
// parity(x) -> ctpop(x) & 1
|
|
|
|
Value *ArgValue = EmitScalarExpr(E->getArg(0));
|
2009-09-09 23:08:12 +08:00
|
|
|
|
2011-07-10 01:41:47 +08:00
|
|
|
llvm::Type *ArgType = ArgValue->getType();
|
2011-07-15 01:45:50 +08:00
|
|
|
Value *F = CGM.getIntrinsic(Intrinsic::ctpop, ArgType);
|
2009-09-09 23:08:12 +08:00
|
|
|
|
2011-07-18 12:24:23 +08:00
|
|
|
llvm::Type *ResultType = ConvertType(E->getType());
|
2011-09-28 05:06:10 +08:00
|
|
|
Value *Tmp = Builder.CreateCall(F, ArgValue);
|
|
|
|
Value *Result = Builder.CreateAnd(Tmp, llvm::ConstantInt::get(ArgType, 1));
|
2008-07-22 01:19:41 +08:00
|
|
|
if (Result->getType() != ResultType)
|
2009-11-16 21:11:21 +08:00
|
|
|
Result = Builder.CreateIntCast(Result, ResultType, /*isSigned*/true,
|
|
|
|
"cast");
|
2008-07-22 01:19:41 +08:00
|
|
|
return RValue::get(Result);
|
|
|
|
}
|
|
|
|
case Builtin::BI__builtin_popcount:
|
|
|
|
case Builtin::BI__builtin_popcountl:
|
|
|
|
case Builtin::BI__builtin_popcountll: {
|
|
|
|
Value *ArgValue = EmitScalarExpr(E->getArg(0));
|
2009-09-09 23:08:12 +08:00
|
|
|
|
2011-07-10 01:41:47 +08:00
|
|
|
llvm::Type *ArgType = ArgValue->getType();
|
2011-07-15 01:45:50 +08:00
|
|
|
Value *F = CGM.getIntrinsic(Intrinsic::ctpop, ArgType);
|
2009-09-09 23:08:12 +08:00
|
|
|
|
2011-07-18 12:24:23 +08:00
|
|
|
llvm::Type *ResultType = ConvertType(E->getType());
|
2011-09-28 05:06:10 +08:00
|
|
|
Value *Result = Builder.CreateCall(F, ArgValue);
|
2008-07-22 01:19:41 +08:00
|
|
|
if (Result->getType() != ResultType)
|
2009-11-16 21:11:21 +08:00
|
|
|
Result = Builder.CreateIntCast(Result, ResultType, /*isSigned*/true,
|
|
|
|
"cast");
|
2008-07-22 01:19:41 +08:00
|
|
|
return RValue::get(Result);
|
|
|
|
}
|
2015-09-03 04:01:30 +08:00
|
|
|
case Builtin::BI__builtin_unpredictable: {
|
|
|
|
// Always return the argument of __builtin_unpredictable. LLVM does not
|
|
|
|
// handle this builtin. Metadata for this builtin should be added directly
|
|
|
|
// to instructions such as branches or switches that use it.
|
|
|
|
return RValue::get(EmitScalarExpr(E->getArg(0)));
|
|
|
|
}
|
2010-07-27 07:11:03 +08:00
|
|
|
case Builtin::BI__builtin_expect: {
|
2011-04-26 07:10:07 +08:00
|
|
|
Value *ArgValue = EmitScalarExpr(E->getArg(0));
|
2011-07-10 01:41:47 +08:00
|
|
|
llvm::Type *ArgType = ArgValue->getType();
|
2011-07-09 06:45:14 +08:00
|
|
|
|
|
|
|
Value *ExpectedValue = EmitScalarExpr(E->getArg(1));
|
2015-01-27 04:51:58 +08:00
|
|
|
// Don't generate llvm.expect on -O0 as the backend won't use it for
|
|
|
|
// anything.
|
|
|
|
// Note, we still IRGen ExpectedValue because it could have side-effects.
|
|
|
|
if (CGM.getCodeGenOpts().OptimizationLevel == 0)
|
|
|
|
return RValue::get(ArgValue);
|
2011-07-09 06:45:14 +08:00
|
|
|
|
2015-01-27 04:51:58 +08:00
|
|
|
Value *FnExpect = CGM.getIntrinsic(Intrinsic::expect, ArgType);
|
2015-05-19 06:14:03 +08:00
|
|
|
Value *Result =
|
|
|
|
Builder.CreateCall(FnExpect, {ArgValue, ExpectedValue}, "expval");
|
2011-07-09 06:45:14 +08:00
|
|
|
return RValue::get(Result);
|
2010-07-27 07:11:03 +08:00
|
|
|
}
|
2014-09-08 06:58:14 +08:00
|
|
|
case Builtin::BI__builtin_assume_aligned: {
|
|
|
|
Value *PtrValue = EmitScalarExpr(E->getArg(0));
|
|
|
|
Value *OffsetValue =
|
|
|
|
(E->getNumArgs() > 2) ? EmitScalarExpr(E->getArg(2)) : nullptr;
|
|
|
|
|
|
|
|
Value *AlignmentValue = EmitScalarExpr(E->getArg(1));
|
|
|
|
ConstantInt *AlignmentCI = cast<ConstantInt>(AlignmentValue);
|
|
|
|
unsigned Alignment = (unsigned) AlignmentCI->getZExtValue();
|
|
|
|
|
|
|
|
EmitAlignmentAssumption(PtrValue, Alignment, OffsetValue);
|
|
|
|
return RValue::get(PtrValue);
|
|
|
|
}
|
|
|
|
case Builtin::BI__assume:
|
|
|
|
case Builtin::BI__builtin_assume: {
|
|
|
|
if (E->getArg(0)->HasSideEffects(getContext()))
|
|
|
|
return RValue::get(nullptr);
|
|
|
|
|
|
|
|
Value *ArgValue = EmitScalarExpr(E->getArg(0));
|
|
|
|
Value *FnAssume = CGM.getIntrinsic(Intrinsic::assume);
|
|
|
|
return RValue::get(Builder.CreateCall(FnAssume, ArgValue));
|
|
|
|
}
|
2012-10-06 22:42:22 +08:00
|
|
|
case Builtin::BI__builtin_bswap16:
|
2007-12-03 05:58:10 +08:00
|
|
|
case Builtin::BI__builtin_bswap32:
|
|
|
|
case Builtin::BI__builtin_bswap64: {
|
2007-12-13 15:34:23 +08:00
|
|
|
Value *ArgValue = EmitScalarExpr(E->getArg(0));
|
2011-07-10 01:41:47 +08:00
|
|
|
llvm::Type *ArgType = ArgValue->getType();
|
2011-07-15 01:45:50 +08:00
|
|
|
Value *F = CGM.getIntrinsic(Intrinsic::bswap, ArgType);
|
2011-09-28 05:06:10 +08:00
|
|
|
return RValue::get(Builder.CreateCall(F, ArgValue));
|
2009-09-09 23:08:12 +08:00
|
|
|
}
|
2008-09-04 05:13:56 +08:00
|
|
|
case Builtin::BI__builtin_object_size: {
|
2012-05-23 12:13:20 +08:00
|
|
|
// We rely on constant folding to deal with expressions with side effects.
|
|
|
|
assert(!E->getArg(0)->HasSideEffects(getContext()) &&
|
|
|
|
"should have been constant folded");
|
|
|
|
|
2009-10-27 07:39:48 +08:00
|
|
|
// We pass this builtin onto the optimizer so that it can
|
|
|
|
// figure out the object size in more complex cases.
|
2011-07-15 01:45:50 +08:00
|
|
|
llvm::Type *ResType = ConvertType(E->getType());
|
2012-09-21 08:18:27 +08:00
|
|
|
|
2009-12-23 11:49:37 +08:00
|
|
|
// LLVM only supports 0 and 2, make sure that we pass along that
|
|
|
|
// as a boolean.
|
|
|
|
Value *Ty = EmitScalarExpr(E->getArg(1));
|
|
|
|
ConstantInt *CI = dyn_cast<ConstantInt>(Ty);
|
|
|
|
assert(CI);
|
|
|
|
uint64_t val = CI->getZExtValue();
|
2012-09-21 08:18:27 +08:00
|
|
|
CI = ConstantInt::get(Builder.getInt1Ty(), (val & 0x2) >> 1);
|
2013-10-08 03:00:18 +08:00
|
|
|
// FIXME: Get right address space.
|
|
|
|
llvm::Type *Tys[] = { ResType, Builder.getInt8PtrTy(0) };
|
|
|
|
Value *F = CGM.getIntrinsic(Intrinsic::objectsize, Tys);
|
2015-05-19 06:14:03 +08:00
|
|
|
return RValue::get(
|
|
|
|
Builder.CreateCall(F, {EmitScalarExpr(E->getArg(0)), CI}));
|
2008-09-04 05:13:56 +08:00
|
|
|
}
|
2008-07-22 06:59:13 +08:00
|
|
|
case Builtin::BI__builtin_prefetch: {
|
|
|
|
Value *Locality, *RW, *Address = EmitScalarExpr(E->getArg(0));
|
|
|
|
// FIXME: Technically these constants should of type 'int', yes?
|
2009-09-09 23:08:12 +08:00
|
|
|
RW = (E->getNumArgs() > 1) ? EmitScalarExpr(E->getArg(1)) :
|
2010-06-27 15:15:29 +08:00
|
|
|
llvm::ConstantInt::get(Int32Ty, 0);
|
2009-09-09 23:08:12 +08:00
|
|
|
Locality = (E->getNumArgs() > 2) ? EmitScalarExpr(E->getArg(2)) :
|
2010-06-27 15:15:29 +08:00
|
|
|
llvm::ConstantInt::get(Int32Ty, 3);
|
2011-06-14 13:00:30 +08:00
|
|
|
Value *Data = llvm::ConstantInt::get(Int32Ty, 1);
|
2011-07-15 01:45:50 +08:00
|
|
|
Value *F = CGM.getIntrinsic(Intrinsic::prefetch);
|
2015-05-19 06:14:03 +08:00
|
|
|
return RValue::get(Builder.CreateCall(F, {Address, RW, Locality, Data}));
|
2008-07-22 06:59:13 +08:00
|
|
|
}
|
2012-08-06 06:03:08 +08:00
|
|
|
case Builtin::BI__builtin_readcyclecounter: {
|
|
|
|
Value *F = CGM.getIntrinsic(Intrinsic::readcyclecounter);
|
2015-07-15 01:27:39 +08:00
|
|
|
return RValue::get(Builder.CreateCall(F));
|
2012-08-06 06:03:08 +08:00
|
|
|
}
|
2014-03-26 23:36:05 +08:00
|
|
|
case Builtin::BI__builtin___clear_cache: {
|
|
|
|
Value *Begin = EmitScalarExpr(E->getArg(0));
|
|
|
|
Value *End = EmitScalarExpr(E->getArg(1));
|
|
|
|
Value *F = CGM.getIntrinsic(Intrinsic::clear_cache);
|
2015-05-19 06:14:03 +08:00
|
|
|
return RValue::get(Builder.CreateCall(F, {Begin, End}));
|
2014-03-26 23:36:05 +08:00
|
|
|
}
|
2015-07-03 06:15:41 +08:00
|
|
|
case Builtin::BI__builtin_trap:
|
|
|
|
return RValue::get(EmitTrapCall(Intrinsic::trap));
|
|
|
|
case Builtin::BI__debugbreak:
|
|
|
|
return RValue::get(EmitTrapCall(Intrinsic::debugtrap));
|
2009-09-21 11:09:59 +08:00
|
|
|
case Builtin::BI__builtin_unreachable: {
|
2014-11-08 06:29:38 +08:00
|
|
|
if (SanOpts.has(SanitizerKind::Unreachable)) {
|
2014-07-18 02:46:27 +08:00
|
|
|
SanitizerScope SanScope(this);
|
2014-11-12 06:03:54 +08:00
|
|
|
EmitCheck(std::make_pair(static_cast<llvm::Value *>(Builder.getFalse()),
|
|
|
|
SanitizerKind::Unreachable),
|
|
|
|
"builtin_unreachable", EmitCheckSourceLocation(E->getExprLoc()),
|
|
|
|
None);
|
2014-07-18 02:46:27 +08:00
|
|
|
} else
|
2011-01-12 11:41:02 +08:00
|
|
|
Builder.CreateUnreachable();
|
|
|
|
|
|
|
|
// We do need to preserve an insertion point.
|
2011-02-08 16:22:06 +08:00
|
|
|
EmitBlock(createBasicBlock("unreachable.cont"));
|
2011-01-12 11:41:02 +08:00
|
|
|
|
2014-05-21 13:09:00 +08:00
|
|
|
return RValue::get(nullptr);
|
2009-09-21 11:09:59 +08:00
|
|
|
}
|
2012-09-21 08:18:27 +08:00
|
|
|
|
2008-07-22 02:44:41 +08:00
|
|
|
case Builtin::BI__builtin_powi:
|
|
|
|
case Builtin::BI__builtin_powif:
|
2015-02-05 08:24:57 +08:00
|
|
|
case Builtin::BI__builtin_powil: {
|
2008-07-22 02:44:41 +08:00
|
|
|
Value *Base = EmitScalarExpr(E->getArg(0));
|
|
|
|
Value *Exponent = EmitScalarExpr(E->getArg(1));
|
2011-07-10 01:41:47 +08:00
|
|
|
llvm::Type *ArgType = Base->getType();
|
2011-07-15 01:45:50 +08:00
|
|
|
Value *F = CGM.getIntrinsic(Intrinsic::powi, ArgType);
|
2015-05-19 06:14:03 +08:00
|
|
|
return RValue::get(Builder.CreateCall(F, {Base, Exponent}));
|
2008-07-22 02:44:41 +08:00
|
|
|
}
|
|
|
|
|
2007-12-20 08:44:32 +08:00
|
|
|
case Builtin::BI__builtin_isgreater:
|
|
|
|
case Builtin::BI__builtin_isgreaterequal:
|
|
|
|
case Builtin::BI__builtin_isless:
|
|
|
|
case Builtin::BI__builtin_islessequal:
|
|
|
|
case Builtin::BI__builtin_islessgreater:
|
|
|
|
case Builtin::BI__builtin_isunordered: {
|
|
|
|
// Ordered comparisons: we know the arguments to these are matching scalar
|
|
|
|
// floating point values.
|
2009-09-09 23:08:12 +08:00
|
|
|
Value *LHS = EmitScalarExpr(E->getArg(0));
|
2007-12-20 08:44:32 +08:00
|
|
|
Value *RHS = EmitScalarExpr(E->getArg(1));
|
2009-09-09 23:08:12 +08:00
|
|
|
|
2007-12-20 08:44:32 +08:00
|
|
|
switch (BuiltinID) {
|
2011-09-23 13:06:16 +08:00
|
|
|
default: llvm_unreachable("Unknown ordered comparison");
|
2007-12-20 08:44:32 +08:00
|
|
|
case Builtin::BI__builtin_isgreater:
|
|
|
|
LHS = Builder.CreateFCmpOGT(LHS, RHS, "cmp");
|
|
|
|
break;
|
|
|
|
case Builtin::BI__builtin_isgreaterequal:
|
|
|
|
LHS = Builder.CreateFCmpOGE(LHS, RHS, "cmp");
|
|
|
|
break;
|
|
|
|
case Builtin::BI__builtin_isless:
|
|
|
|
LHS = Builder.CreateFCmpOLT(LHS, RHS, "cmp");
|
|
|
|
break;
|
|
|
|
case Builtin::BI__builtin_islessequal:
|
|
|
|
LHS = Builder.CreateFCmpOLE(LHS, RHS, "cmp");
|
|
|
|
break;
|
|
|
|
case Builtin::BI__builtin_islessgreater:
|
|
|
|
LHS = Builder.CreateFCmpONE(LHS, RHS, "cmp");
|
|
|
|
break;
|
2009-09-09 23:08:12 +08:00
|
|
|
case Builtin::BI__builtin_isunordered:
|
2007-12-20 08:44:32 +08:00
|
|
|
LHS = Builder.CreateFCmpUNO(LHS, RHS, "cmp");
|
|
|
|
break;
|
|
|
|
}
|
|
|
|
// ZExt bool to int type.
|
2011-09-28 05:06:10 +08:00
|
|
|
return RValue::get(Builder.CreateZExt(LHS, ConvertType(E->getType())));
|
2007-12-20 08:44:32 +08:00
|
|
|
}
|
2009-09-01 12:19:44 +08:00
|
|
|
case Builtin::BI__builtin_isnan: {
|
|
|
|
Value *V = EmitScalarExpr(E->getArg(0));
|
|
|
|
V = Builder.CreateFCmpUNO(V, V, "cmp");
|
2011-09-28 05:06:10 +08:00
|
|
|
return RValue::get(Builder.CreateZExt(V, ConvertType(E->getType())));
|
2009-09-01 12:19:44 +08:00
|
|
|
}
|
2012-09-21 08:18:27 +08:00
|
|
|
|
2010-05-06 13:35:16 +08:00
|
|
|
case Builtin::BI__builtin_isinf: {
|
|
|
|
// isinf(x) --> fabs(x) == infinity
|
|
|
|
Value *V = EmitScalarExpr(E->getArg(0));
|
2014-11-04 07:51:40 +08:00
|
|
|
V = EmitFAbs(*this, V);
|
2012-09-21 08:18:27 +08:00
|
|
|
|
2010-05-06 13:35:16 +08:00
|
|
|
V = Builder.CreateFCmpOEQ(V, ConstantFP::getInfinity(V->getType()),"isinf");
|
2011-09-28 05:06:10 +08:00
|
|
|
return RValue::get(Builder.CreateZExt(V, ConvertType(E->getType())));
|
2010-05-06 13:35:16 +08:00
|
|
|
}
|
2012-09-21 08:18:27 +08:00
|
|
|
|
[Modules] Implement __builtin_isinf_sign in Clang.
Somehow, we never managed to implement this fully. We could constant
fold it like crazy, including constant folding complex arguments, etc.
But if you actually needed to generate code for it, error.
I've implemented it using the somewhat obvious lowering. Happy for
suggestions on a more clever way to lower this.
Now, what you might ask does this have to do with modules? Fun story. So
it turns out that libstdc++ actually uses __builtin_isinf_sign to
implement std::isinf when in C++98 mode, but only inside of a template.
So if we're lucky, and we never instantiate that, everything is good.
But once we try to instantiate that template function, we need this
builtin. All of my customers at least are using C++11 and so they never
hit this code path.
But what does that have to do with modules? Fun story. So it turns out
that with modules we actually observe a bunch of bugs in libstdc++ where
their <cmath> header clobbers things exposed by <math.h>. To fix these,
we have to provide global function definitions to replace the macros
that C99 would have used. And it turns out that ::isinf needs to be
implemented using the exact semantics used by the C++98 variant of
std::isinf. And so I started to fix this bug in libstdc++ and ceased to
be able to compile libstdc++ with Clang.
The yaks are legion.
llvm-svn: 232778
2015-03-20 06:39:51 +08:00
|
|
|
case Builtin::BI__builtin_isinf_sign: {
|
|
|
|
// isinf_sign(x) -> fabs(x) == infinity ? (signbit(x) ? -1 : 1) : 0
|
|
|
|
Value *Arg = EmitScalarExpr(E->getArg(0));
|
|
|
|
Value *AbsArg = EmitFAbs(*this, Arg);
|
|
|
|
Value *IsInf = Builder.CreateFCmpOEQ(
|
|
|
|
AbsArg, ConstantFP::getInfinity(Arg->getType()), "isinf");
|
|
|
|
Value *IsNeg = EmitSignBit(*this, Arg);
|
|
|
|
|
|
|
|
llvm::Type *IntTy = ConvertType(E->getType());
|
|
|
|
Value *Zero = Constant::getNullValue(IntTy);
|
|
|
|
Value *One = ConstantInt::get(IntTy, 1);
|
|
|
|
Value *NegativeOne = ConstantInt::get(IntTy, -1);
|
|
|
|
Value *SignResult = Builder.CreateSelect(IsNeg, NegativeOne, One);
|
|
|
|
Value *Result = Builder.CreateSelect(IsInf, SignResult, Zero);
|
|
|
|
return RValue::get(Result);
|
|
|
|
}
|
2010-05-19 19:24:26 +08:00
|
|
|
|
|
|
|
case Builtin::BI__builtin_isnormal: {
|
|
|
|
// isnormal(x) --> x == x && fabsf(x) < infinity && fabsf(x) >= float_min
|
|
|
|
Value *V = EmitScalarExpr(E->getArg(0));
|
|
|
|
Value *Eq = Builder.CreateFCmpOEQ(V, V, "iseq");
|
|
|
|
|
2014-11-04 07:51:40 +08:00
|
|
|
Value *Abs = EmitFAbs(*this, V);
|
2010-05-19 19:24:26 +08:00
|
|
|
Value *IsLessThanInf =
|
|
|
|
Builder.CreateFCmpULT(Abs, ConstantFP::getInfinity(V->getType()),"isinf");
|
|
|
|
APFloat Smallest = APFloat::getSmallestNormalized(
|
|
|
|
getContext().getFloatTypeSemantics(E->getArg(0)->getType()));
|
|
|
|
Value *IsNormal =
|
|
|
|
Builder.CreateFCmpUGE(Abs, ConstantFP::get(V->getContext(), Smallest),
|
|
|
|
"isnormal");
|
|
|
|
V = Builder.CreateAnd(Eq, IsLessThanInf, "and");
|
|
|
|
V = Builder.CreateAnd(V, IsNormal, "and");
|
|
|
|
return RValue::get(Builder.CreateZExt(V, ConvertType(E->getType())));
|
|
|
|
}
|
|
|
|
|
2010-05-06 14:04:13 +08:00
|
|
|
case Builtin::BI__builtin_isfinite: {
|
2011-09-10 06:46:39 +08:00
|
|
|
// isfinite(x) --> x == x && fabs(x) != infinity;
|
2010-05-06 14:04:13 +08:00
|
|
|
Value *V = EmitScalarExpr(E->getArg(0));
|
|
|
|
Value *Eq = Builder.CreateFCmpOEQ(V, V, "iseq");
|
2012-09-21 08:18:27 +08:00
|
|
|
|
2014-11-04 07:51:40 +08:00
|
|
|
Value *Abs = EmitFAbs(*this, V);
|
2010-05-06 14:04:13 +08:00
|
|
|
Value *IsNotInf =
|
|
|
|
Builder.CreateFCmpUNE(Abs, ConstantFP::getInfinity(V->getType()),"isinf");
|
2012-09-21 08:18:27 +08:00
|
|
|
|
2010-05-06 14:04:13 +08:00
|
|
|
V = Builder.CreateAnd(Eq, IsNotInf, "and");
|
|
|
|
return RValue::get(Builder.CreateZExt(V, ConvertType(E->getType())));
|
|
|
|
}
|
2010-06-14 18:30:41 +08:00
|
|
|
|
|
|
|
case Builtin::BI__builtin_fpclassify: {
|
|
|
|
Value *V = EmitScalarExpr(E->getArg(5));
|
2011-07-18 12:24:23 +08:00
|
|
|
llvm::Type *Ty = ConvertType(E->getArg(5)->getType());
|
2010-06-14 18:30:41 +08:00
|
|
|
|
|
|
|
// Create Result
|
|
|
|
BasicBlock *Begin = Builder.GetInsertBlock();
|
|
|
|
BasicBlock *End = createBasicBlock("fpclassify_end", this->CurFn);
|
|
|
|
Builder.SetInsertPoint(End);
|
|
|
|
PHINode *Result =
|
2011-03-30 19:28:58 +08:00
|
|
|
Builder.CreatePHI(ConvertType(E->getArg(0)->getType()), 4,
|
2010-06-14 18:30:41 +08:00
|
|
|
"fpclassify_result");
|
|
|
|
|
|
|
|
// if (V==0) return FP_ZERO
|
|
|
|
Builder.SetInsertPoint(Begin);
|
|
|
|
Value *IsZero = Builder.CreateFCmpOEQ(V, Constant::getNullValue(Ty),
|
|
|
|
"iszero");
|
|
|
|
Value *ZeroLiteral = EmitScalarExpr(E->getArg(4));
|
|
|
|
BasicBlock *NotZero = createBasicBlock("fpclassify_not_zero", this->CurFn);
|
|
|
|
Builder.CreateCondBr(IsZero, End, NotZero);
|
|
|
|
Result->addIncoming(ZeroLiteral, Begin);
|
|
|
|
|
|
|
|
// if (V != V) return FP_NAN
|
|
|
|
Builder.SetInsertPoint(NotZero);
|
|
|
|
Value *IsNan = Builder.CreateFCmpUNO(V, V, "cmp");
|
|
|
|
Value *NanLiteral = EmitScalarExpr(E->getArg(0));
|
|
|
|
BasicBlock *NotNan = createBasicBlock("fpclassify_not_nan", this->CurFn);
|
|
|
|
Builder.CreateCondBr(IsNan, End, NotNan);
|
|
|
|
Result->addIncoming(NanLiteral, NotZero);
|
|
|
|
|
|
|
|
// if (fabs(V) == infinity) return FP_INFINITY
|
|
|
|
Builder.SetInsertPoint(NotNan);
|
2014-11-04 07:51:40 +08:00
|
|
|
Value *VAbs = EmitFAbs(*this, V);
|
2010-06-14 18:30:41 +08:00
|
|
|
Value *IsInf =
|
|
|
|
Builder.CreateFCmpOEQ(VAbs, ConstantFP::getInfinity(V->getType()),
|
|
|
|
"isinf");
|
|
|
|
Value *InfLiteral = EmitScalarExpr(E->getArg(1));
|
|
|
|
BasicBlock *NotInf = createBasicBlock("fpclassify_not_inf", this->CurFn);
|
|
|
|
Builder.CreateCondBr(IsInf, End, NotInf);
|
|
|
|
Result->addIncoming(InfLiteral, NotNan);
|
|
|
|
|
|
|
|
// if (fabs(V) >= MIN_NORMAL) return FP_NORMAL else FP_SUBNORMAL
|
|
|
|
Builder.SetInsertPoint(NotInf);
|
|
|
|
APFloat Smallest = APFloat::getSmallestNormalized(
|
|
|
|
getContext().getFloatTypeSemantics(E->getArg(5)->getType()));
|
|
|
|
Value *IsNormal =
|
|
|
|
Builder.CreateFCmpUGE(VAbs, ConstantFP::get(V->getContext(), Smallest),
|
|
|
|
"isnormal");
|
|
|
|
Value *NormalResult =
|
|
|
|
Builder.CreateSelect(IsNormal, EmitScalarExpr(E->getArg(2)),
|
|
|
|
EmitScalarExpr(E->getArg(3)));
|
|
|
|
Builder.CreateBr(End);
|
|
|
|
Result->addIncoming(NormalResult, NotInf);
|
|
|
|
|
|
|
|
// return Result
|
|
|
|
Builder.SetInsertPoint(End);
|
|
|
|
return RValue::get(Result);
|
|
|
|
}
|
2012-09-21 08:18:27 +08:00
|
|
|
|
2009-06-02 15:10:30 +08:00
|
|
|
case Builtin::BIalloca:
|
2013-11-14 06:58:53 +08:00
|
|
|
case Builtin::BI_alloca:
|
2008-06-17 01:15:14 +08:00
|
|
|
case Builtin::BI__builtin_alloca: {
|
|
|
|
Value *Size = EmitScalarExpr(E->getArg(0));
|
2011-09-28 05:06:10 +08:00
|
|
|
return RValue::get(Builder.CreateAlloca(Builder.getInt8Ty(), Size));
|
2008-07-22 08:26:45 +08:00
|
|
|
}
|
2010-01-24 03:00:10 +08:00
|
|
|
case Builtin::BIbzero:
|
2008-07-22 08:26:45 +08:00
|
|
|
case Builtin::BI__builtin_bzero: {
|
Compute and preserve alignment more faithfully in IR-generation.
Introduce an Address type to bundle a pointer value with an
alignment. Introduce APIs on CGBuilderTy to work with Address
values. Change core APIs on CGF/CGM to traffic in Address where
appropriate. Require alignments to be non-zero. Update a ton
of code to compute and propagate alignment information.
As part of this, I've promoted CGBuiltin's EmitPointerWithAlignment
helper function to CGF and made use of it in a number of places in
the expression emitter.
The end result is that we should now be significantly more correct
when performing operations on objects that are locally known to
be under-aligned. Since alignment is not reliably tracked in the
type system, there are inherent limits to this, but at least we
are no longer confused by standard operations like derived-to-base
conversions and array-to-pointer decay. I've also fixed a large
number of bugs where we were applying the complete-object alignment
to a pointer instead of the non-virtual alignment, although most of
these were hidden by the very conservative approach we took with
member alignment.
Also, because IRGen now reliably asserts on zero alignments, we
should no longer be subject to an absurd but frustrating recurring
bug where an incomplete type would report a zero alignment and then
we'd naively do a alignmentAtOffset on it and emit code using an
alignment equal to the largest power-of-two factor of the offset.
We should also now be emitting much more aggressive alignment
attributes in the presence of over-alignment. In particular,
field access now uses alignmentAtOffset instead of min.
Several times in this patch, I had to change the existing
code-generation pattern in order to more effectively use
the Address APIs. For the most part, this seems to be a strict
improvement, like doing pointer arithmetic with GEPs instead of
ptrtoint. That said, I've tried very hard to not change semantics,
but it is likely that I've failed in a few places, for which I
apologize.
ABIArgInfo now always carries the assumed alignment of indirect and
indirect byval arguments. In order to cut down on what was already
a dauntingly large patch, I changed the code to never set align
attributes in the IR on non-byval indirect arguments. That is,
we still generate code which assumes that indirect arguments have
the given alignment, but we don't express this information to the
backend except where it's semantically required (i.e. on byvals).
This is likely a minor regression for those targets that did provide
this information, but it'll be trivial to add it back in a later
patch.
I partially punted on applying this work to CGBuiltin. Please
do not add more uses of the CreateDefaultAligned{Load,Store}
APIs; they will be going away eventually.
llvm-svn: 246985
2015-09-08 16:05:57 +08:00
|
|
|
Address Dest = EmitPointerWithAlignment(E->getArg(0));
|
2010-04-04 11:10:52 +08:00
|
|
|
Value *SizeVal = EmitScalarExpr(E->getArg(1));
|
Compute and preserve alignment more faithfully in IR-generation.
Introduce an Address type to bundle a pointer value with an
alignment. Introduce APIs on CGBuilderTy to work with Address
values. Change core APIs on CGF/CGM to traffic in Address where
appropriate. Require alignments to be non-zero. Update a ton
of code to compute and propagate alignment information.
As part of this, I've promoted CGBuiltin's EmitPointerWithAlignment
helper function to CGF and made use of it in a number of places in
the expression emitter.
The end result is that we should now be significantly more correct
when performing operations on objects that are locally known to
be under-aligned. Since alignment is not reliably tracked in the
type system, there are inherent limits to this, but at least we
are no longer confused by standard operations like derived-to-base
conversions and array-to-pointer decay. I've also fixed a large
number of bugs where we were applying the complete-object alignment
to a pointer instead of the non-virtual alignment, although most of
these were hidden by the very conservative approach we took with
member alignment.
Also, because IRGen now reliably asserts on zero alignments, we
should no longer be subject to an absurd but frustrating recurring
bug where an incomplete type would report a zero alignment and then
we'd naively do a alignmentAtOffset on it and emit code using an
alignment equal to the largest power-of-two factor of the offset.
We should also now be emitting much more aggressive alignment
attributes in the presence of over-alignment. In particular,
field access now uses alignmentAtOffset instead of min.
Several times in this patch, I had to change the existing
code-generation pattern in order to more effectively use
the Address APIs. For the most part, this seems to be a strict
improvement, like doing pointer arithmetic with GEPs instead of
ptrtoint. That said, I've tried very hard to not change semantics,
but it is likely that I've failed in a few places, for which I
apologize.
ABIArgInfo now always carries the assumed alignment of indirect and
indirect byval arguments. In order to cut down on what was already
a dauntingly large patch, I changed the code to never set align
attributes in the IR on non-byval indirect arguments. That is,
we still generate code which assumes that indirect arguments have
the given alignment, but we don't express this information to the
backend except where it's semantically required (i.e. on byvals).
This is likely a minor regression for those targets that did provide
this information, but it'll be trivial to add it back in a later
patch.
I partially punted on applying this work to CGBuiltin. Please
do not add more uses of the CreateDefaultAligned{Load,Store}
APIs; they will be going away eventually.
llvm-svn: 246985
2015-09-08 16:05:57 +08:00
|
|
|
EmitNonNullArgCheck(RValue::get(Dest.getPointer()), E->getArg(0)->getType(),
|
2015-05-31 00:11:40 +08:00
|
|
|
E->getArg(0)->getExprLoc(), FD, 0);
|
Compute and preserve alignment more faithfully in IR-generation.
Introduce an Address type to bundle a pointer value with an
alignment. Introduce APIs on CGBuilderTy to work with Address
values. Change core APIs on CGF/CGM to traffic in Address where
appropriate. Require alignments to be non-zero. Update a ton
of code to compute and propagate alignment information.
As part of this, I've promoted CGBuiltin's EmitPointerWithAlignment
helper function to CGF and made use of it in a number of places in
the expression emitter.
The end result is that we should now be significantly more correct
when performing operations on objects that are locally known to
be under-aligned. Since alignment is not reliably tracked in the
type system, there are inherent limits to this, but at least we
are no longer confused by standard operations like derived-to-base
conversions and array-to-pointer decay. I've also fixed a large
number of bugs where we were applying the complete-object alignment
to a pointer instead of the non-virtual alignment, although most of
these were hidden by the very conservative approach we took with
member alignment.
Also, because IRGen now reliably asserts on zero alignments, we
should no longer be subject to an absurd but frustrating recurring
bug where an incomplete type would report a zero alignment and then
we'd naively do a alignmentAtOffset on it and emit code using an
alignment equal to the largest power-of-two factor of the offset.
We should also now be emitting much more aggressive alignment
attributes in the presence of over-alignment. In particular,
field access now uses alignmentAtOffset instead of min.
Several times in this patch, I had to change the existing
code-generation pattern in order to more effectively use
the Address APIs. For the most part, this seems to be a strict
improvement, like doing pointer arithmetic with GEPs instead of
ptrtoint. That said, I've tried very hard to not change semantics,
but it is likely that I've failed in a few places, for which I
apologize.
ABIArgInfo now always carries the assumed alignment of indirect and
indirect byval arguments. In order to cut down on what was already
a dauntingly large patch, I changed the code to never set align
attributes in the IR on non-byval indirect arguments. That is,
we still generate code which assumes that indirect arguments have
the given alignment, but we don't express this information to the
backend except where it's semantically required (i.e. on byvals).
This is likely a minor regression for those targets that did provide
this information, but it'll be trivial to add it back in a later
patch.
I partially punted on applying this work to CGBuiltin. Please
do not add more uses of the CreateDefaultAligned{Load,Store}
APIs; they will be going away eventually.
llvm-svn: 246985
2015-09-08 16:05:57 +08:00
|
|
|
Builder.CreateMemSet(Dest, Builder.getInt8(0), SizeVal, false);
|
|
|
|
return RValue::get(Dest.getPointer());
|
2008-06-17 01:15:14 +08:00
|
|
|
}
|
2009-12-17 08:14:28 +08:00
|
|
|
case Builtin::BImemcpy:
|
2008-05-20 07:27:48 +08:00
|
|
|
case Builtin::BI__builtin_memcpy: {
|
Compute and preserve alignment more faithfully in IR-generation.
Introduce an Address type to bundle a pointer value with an
alignment. Introduce APIs on CGBuilderTy to work with Address
values. Change core APIs on CGF/CGM to traffic in Address where
appropriate. Require alignments to be non-zero. Update a ton
of code to compute and propagate alignment information.
As part of this, I've promoted CGBuiltin's EmitPointerWithAlignment
helper function to CGF and made use of it in a number of places in
the expression emitter.
The end result is that we should now be significantly more correct
when performing operations on objects that are locally known to
be under-aligned. Since alignment is not reliably tracked in the
type system, there are inherent limits to this, but at least we
are no longer confused by standard operations like derived-to-base
conversions and array-to-pointer decay. I've also fixed a large
number of bugs where we were applying the complete-object alignment
to a pointer instead of the non-virtual alignment, although most of
these were hidden by the very conservative approach we took with
member alignment.
Also, because IRGen now reliably asserts on zero alignments, we
should no longer be subject to an absurd but frustrating recurring
bug where an incomplete type would report a zero alignment and then
we'd naively do a alignmentAtOffset on it and emit code using an
alignment equal to the largest power-of-two factor of the offset.
We should also now be emitting much more aggressive alignment
attributes in the presence of over-alignment. In particular,
field access now uses alignmentAtOffset instead of min.
Several times in this patch, I had to change the existing
code-generation pattern in order to more effectively use
the Address APIs. For the most part, this seems to be a strict
improvement, like doing pointer arithmetic with GEPs instead of
ptrtoint. That said, I've tried very hard to not change semantics,
but it is likely that I've failed in a few places, for which I
apologize.
ABIArgInfo now always carries the assumed alignment of indirect and
indirect byval arguments. In order to cut down on what was already
a dauntingly large patch, I changed the code to never set align
attributes in the IR on non-byval indirect arguments. That is,
we still generate code which assumes that indirect arguments have
the given alignment, but we don't express this information to the
backend except where it's semantically required (i.e. on byvals).
This is likely a minor regression for those targets that did provide
this information, but it'll be trivial to add it back in a later
patch.
I partially punted on applying this work to CGBuiltin. Please
do not add more uses of the CreateDefaultAligned{Load,Store}
APIs; they will be going away eventually.
llvm-svn: 246985
2015-09-08 16:05:57 +08:00
|
|
|
Address Dest = EmitPointerWithAlignment(E->getArg(0));
|
|
|
|
Address Src = EmitPointerWithAlignment(E->getArg(1));
|
2010-04-04 11:10:52 +08:00
|
|
|
Value *SizeVal = EmitScalarExpr(E->getArg(2));
|
Compute and preserve alignment more faithfully in IR-generation.
Introduce an Address type to bundle a pointer value with an
alignment. Introduce APIs on CGBuilderTy to work with Address
values. Change core APIs on CGF/CGM to traffic in Address where
appropriate. Require alignments to be non-zero. Update a ton
of code to compute and propagate alignment information.
As part of this, I've promoted CGBuiltin's EmitPointerWithAlignment
helper function to CGF and made use of it in a number of places in
the expression emitter.
The end result is that we should now be significantly more correct
when performing operations on objects that are locally known to
be under-aligned. Since alignment is not reliably tracked in the
type system, there are inherent limits to this, but at least we
are no longer confused by standard operations like derived-to-base
conversions and array-to-pointer decay. I've also fixed a large
number of bugs where we were applying the complete-object alignment
to a pointer instead of the non-virtual alignment, although most of
these were hidden by the very conservative approach we took with
member alignment.
Also, because IRGen now reliably asserts on zero alignments, we
should no longer be subject to an absurd but frustrating recurring
bug where an incomplete type would report a zero alignment and then
we'd naively do a alignmentAtOffset on it and emit code using an
alignment equal to the largest power-of-two factor of the offset.
We should also now be emitting much more aggressive alignment
attributes in the presence of over-alignment. In particular,
field access now uses alignmentAtOffset instead of min.
Several times in this patch, I had to change the existing
code-generation pattern in order to more effectively use
the Address APIs. For the most part, this seems to be a strict
improvement, like doing pointer arithmetic with GEPs instead of
ptrtoint. That said, I've tried very hard to not change semantics,
but it is likely that I've failed in a few places, for which I
apologize.
ABIArgInfo now always carries the assumed alignment of indirect and
indirect byval arguments. In order to cut down on what was already
a dauntingly large patch, I changed the code to never set align
attributes in the IR on non-byval indirect arguments. That is,
we still generate code which assumes that indirect arguments have
the given alignment, but we don't express this information to the
backend except where it's semantically required (i.e. on byvals).
This is likely a minor regression for those targets that did provide
this information, but it'll be trivial to add it back in a later
patch.
I partially punted on applying this work to CGBuiltin. Please
do not add more uses of the CreateDefaultAligned{Load,Store}
APIs; they will be going away eventually.
llvm-svn: 246985
2015-09-08 16:05:57 +08:00
|
|
|
EmitNonNullArgCheck(RValue::get(Dest.getPointer()), E->getArg(0)->getType(),
|
2015-05-31 00:11:40 +08:00
|
|
|
E->getArg(0)->getExprLoc(), FD, 0);
|
Compute and preserve alignment more faithfully in IR-generation.
Introduce an Address type to bundle a pointer value with an
alignment. Introduce APIs on CGBuilderTy to work with Address
values. Change core APIs on CGF/CGM to traffic in Address where
appropriate. Require alignments to be non-zero. Update a ton
of code to compute and propagate alignment information.
As part of this, I've promoted CGBuiltin's EmitPointerWithAlignment
helper function to CGF and made use of it in a number of places in
the expression emitter.
The end result is that we should now be significantly more correct
when performing operations on objects that are locally known to
be under-aligned. Since alignment is not reliably tracked in the
type system, there are inherent limits to this, but at least we
are no longer confused by standard operations like derived-to-base
conversions and array-to-pointer decay. I've also fixed a large
number of bugs where we were applying the complete-object alignment
to a pointer instead of the non-virtual alignment, although most of
these were hidden by the very conservative approach we took with
member alignment.
Also, because IRGen now reliably asserts on zero alignments, we
should no longer be subject to an absurd but frustrating recurring
bug where an incomplete type would report a zero alignment and then
we'd naively do a alignmentAtOffset on it and emit code using an
alignment equal to the largest power-of-two factor of the offset.
We should also now be emitting much more aggressive alignment
attributes in the presence of over-alignment. In particular,
field access now uses alignmentAtOffset instead of min.
Several times in this patch, I had to change the existing
code-generation pattern in order to more effectively use
the Address APIs. For the most part, this seems to be a strict
improvement, like doing pointer arithmetic with GEPs instead of
ptrtoint. That said, I've tried very hard to not change semantics,
but it is likely that I've failed in a few places, for which I
apologize.
ABIArgInfo now always carries the assumed alignment of indirect and
indirect byval arguments. In order to cut down on what was already
a dauntingly large patch, I changed the code to never set align
attributes in the IR on non-byval indirect arguments. That is,
we still generate code which assumes that indirect arguments have
the given alignment, but we don't express this information to the
backend except where it's semantically required (i.e. on byvals).
This is likely a minor regression for those targets that did provide
this information, but it'll be trivial to add it back in a later
patch.
I partially punted on applying this work to CGBuiltin. Please
do not add more uses of the CreateDefaultAligned{Load,Store}
APIs; they will be going away eventually.
llvm-svn: 246985
2015-09-08 16:05:57 +08:00
|
|
|
EmitNonNullArgCheck(RValue::get(Src.getPointer()), E->getArg(1)->getType(),
|
2015-05-31 00:11:40 +08:00
|
|
|
E->getArg(1)->getExprLoc(), FD, 1);
|
Compute and preserve alignment more faithfully in IR-generation.
Introduce an Address type to bundle a pointer value with an
alignment. Introduce APIs on CGBuilderTy to work with Address
values. Change core APIs on CGF/CGM to traffic in Address where
appropriate. Require alignments to be non-zero. Update a ton
of code to compute and propagate alignment information.
As part of this, I've promoted CGBuiltin's EmitPointerWithAlignment
helper function to CGF and made use of it in a number of places in
the expression emitter.
The end result is that we should now be significantly more correct
when performing operations on objects that are locally known to
be under-aligned. Since alignment is not reliably tracked in the
type system, there are inherent limits to this, but at least we
are no longer confused by standard operations like derived-to-base
conversions and array-to-pointer decay. I've also fixed a large
number of bugs where we were applying the complete-object alignment
to a pointer instead of the non-virtual alignment, although most of
these were hidden by the very conservative approach we took with
member alignment.
Also, because IRGen now reliably asserts on zero alignments, we
should no longer be subject to an absurd but frustrating recurring
bug where an incomplete type would report a zero alignment and then
we'd naively do a alignmentAtOffset on it and emit code using an
alignment equal to the largest power-of-two factor of the offset.
We should also now be emitting much more aggressive alignment
attributes in the presence of over-alignment. In particular,
field access now uses alignmentAtOffset instead of min.
Several times in this patch, I had to change the existing
code-generation pattern in order to more effectively use
the Address APIs. For the most part, this seems to be a strict
improvement, like doing pointer arithmetic with GEPs instead of
ptrtoint. That said, I've tried very hard to not change semantics,
but it is likely that I've failed in a few places, for which I
apologize.
ABIArgInfo now always carries the assumed alignment of indirect and
indirect byval arguments. In order to cut down on what was already
a dauntingly large patch, I changed the code to never set align
attributes in the IR on non-byval indirect arguments. That is,
we still generate code which assumes that indirect arguments have
the given alignment, but we don't express this information to the
backend except where it's semantically required (i.e. on byvals).
This is likely a minor regression for those targets that did provide
this information, but it'll be trivial to add it back in a later
patch.
I partially punted on applying this work to CGBuiltin. Please
do not add more uses of the CreateDefaultAligned{Load,Store}
APIs; they will be going away eventually.
llvm-svn: 246985
2015-09-08 16:05:57 +08:00
|
|
|
Builder.CreateMemCpy(Dest, Src, SizeVal, false);
|
|
|
|
return RValue::get(Dest.getPointer());
|
2008-07-22 08:26:45 +08:00
|
|
|
}
|
2012-09-21 08:18:27 +08:00
|
|
|
|
2011-04-17 08:40:24 +08:00
|
|
|
case Builtin::BI__builtin___memcpy_chk: {
|
2012-09-27 18:16:10 +08:00
|
|
|
// fold __builtin_memcpy_chk(x, y, cst1, cst2) to memcpy iff cst1<=cst2.
|
2011-10-11 02:28:20 +08:00
|
|
|
llvm::APSInt Size, DstSize;
|
|
|
|
if (!E->getArg(2)->EvaluateAsInt(Size, CGM.getContext()) ||
|
|
|
|
!E->getArg(3)->EvaluateAsInt(DstSize, CGM.getContext()))
|
2011-04-17 08:40:24 +08:00
|
|
|
break;
|
|
|
|
if (Size.ugt(DstSize))
|
|
|
|
break;
|
Compute and preserve alignment more faithfully in IR-generation.
Introduce an Address type to bundle a pointer value with an
alignment. Introduce APIs on CGBuilderTy to work with Address
values. Change core APIs on CGF/CGM to traffic in Address where
appropriate. Require alignments to be non-zero. Update a ton
of code to compute and propagate alignment information.
As part of this, I've promoted CGBuiltin's EmitPointerWithAlignment
helper function to CGF and made use of it in a number of places in
the expression emitter.
The end result is that we should now be significantly more correct
when performing operations on objects that are locally known to
be under-aligned. Since alignment is not reliably tracked in the
type system, there are inherent limits to this, but at least we
are no longer confused by standard operations like derived-to-base
conversions and array-to-pointer decay. I've also fixed a large
number of bugs where we were applying the complete-object alignment
to a pointer instead of the non-virtual alignment, although most of
these were hidden by the very conservative approach we took with
member alignment.
Also, because IRGen now reliably asserts on zero alignments, we
should no longer be subject to an absurd but frustrating recurring
bug where an incomplete type would report a zero alignment and then
we'd naively do a alignmentAtOffset on it and emit code using an
alignment equal to the largest power-of-two factor of the offset.
We should also now be emitting much more aggressive alignment
attributes in the presence of over-alignment. In particular,
field access now uses alignmentAtOffset instead of min.
Several times in this patch, I had to change the existing
code-generation pattern in order to more effectively use
the Address APIs. For the most part, this seems to be a strict
improvement, like doing pointer arithmetic with GEPs instead of
ptrtoint. That said, I've tried very hard to not change semantics,
but it is likely that I've failed in a few places, for which I
apologize.
ABIArgInfo now always carries the assumed alignment of indirect and
indirect byval arguments. In order to cut down on what was already
a dauntingly large patch, I changed the code to never set align
attributes in the IR on non-byval indirect arguments. That is,
we still generate code which assumes that indirect arguments have
the given alignment, but we don't express this information to the
backend except where it's semantically required (i.e. on byvals).
This is likely a minor regression for those targets that did provide
this information, but it'll be trivial to add it back in a later
patch.
I partially punted on applying this work to CGBuiltin. Please
do not add more uses of the CreateDefaultAligned{Load,Store}
APIs; they will be going away eventually.
llvm-svn: 246985
2015-09-08 16:05:57 +08:00
|
|
|
Address Dest = EmitPointerWithAlignment(E->getArg(0));
|
|
|
|
Address Src = EmitPointerWithAlignment(E->getArg(1));
|
2011-04-17 08:40:24 +08:00
|
|
|
Value *SizeVal = llvm::ConstantInt::get(Builder.getContext(), Size);
|
Compute and preserve alignment more faithfully in IR-generation.
Introduce an Address type to bundle a pointer value with an
alignment. Introduce APIs on CGBuilderTy to work with Address
values. Change core APIs on CGF/CGM to traffic in Address where
appropriate. Require alignments to be non-zero. Update a ton
of code to compute and propagate alignment information.
As part of this, I've promoted CGBuiltin's EmitPointerWithAlignment
helper function to CGF and made use of it in a number of places in
the expression emitter.
The end result is that we should now be significantly more correct
when performing operations on objects that are locally known to
be under-aligned. Since alignment is not reliably tracked in the
type system, there are inherent limits to this, but at least we
are no longer confused by standard operations like derived-to-base
conversions and array-to-pointer decay. I've also fixed a large
number of bugs where we were applying the complete-object alignment
to a pointer instead of the non-virtual alignment, although most of
these were hidden by the very conservative approach we took with
member alignment.
Also, because IRGen now reliably asserts on zero alignments, we
should no longer be subject to an absurd but frustrating recurring
bug where an incomplete type would report a zero alignment and then
we'd naively do a alignmentAtOffset on it and emit code using an
alignment equal to the largest power-of-two factor of the offset.
We should also now be emitting much more aggressive alignment
attributes in the presence of over-alignment. In particular,
field access now uses alignmentAtOffset instead of min.
Several times in this patch, I had to change the existing
code-generation pattern in order to more effectively use
the Address APIs. For the most part, this seems to be a strict
improvement, like doing pointer arithmetic with GEPs instead of
ptrtoint. That said, I've tried very hard to not change semantics,
but it is likely that I've failed in a few places, for which I
apologize.
ABIArgInfo now always carries the assumed alignment of indirect and
indirect byval arguments. In order to cut down on what was already
a dauntingly large patch, I changed the code to never set align
attributes in the IR on non-byval indirect arguments. That is,
we still generate code which assumes that indirect arguments have
the given alignment, but we don't express this information to the
backend except where it's semantically required (i.e. on byvals).
This is likely a minor regression for those targets that did provide
this information, but it'll be trivial to add it back in a later
patch.
I partially punted on applying this work to CGBuiltin. Please
do not add more uses of the CreateDefaultAligned{Load,Store}
APIs; they will be going away eventually.
llvm-svn: 246985
2015-09-08 16:05:57 +08:00
|
|
|
Builder.CreateMemCpy(Dest, Src, SizeVal, false);
|
|
|
|
return RValue::get(Dest.getPointer());
|
2011-04-17 08:40:24 +08:00
|
|
|
}
|
2012-09-21 08:18:27 +08:00
|
|
|
|
2010-06-17 00:22:04 +08:00
|
|
|
case Builtin::BI__builtin_objc_memmove_collectable: {
|
Compute and preserve alignment more faithfully in IR-generation.
Introduce an Address type to bundle a pointer value with an
alignment. Introduce APIs on CGBuilderTy to work with Address
values. Change core APIs on CGF/CGM to traffic in Address where
appropriate. Require alignments to be non-zero. Update a ton
of code to compute and propagate alignment information.
As part of this, I've promoted CGBuiltin's EmitPointerWithAlignment
helper function to CGF and made use of it in a number of places in
the expression emitter.
The end result is that we should now be significantly more correct
when performing operations on objects that are locally known to
be under-aligned. Since alignment is not reliably tracked in the
type system, there are inherent limits to this, but at least we
are no longer confused by standard operations like derived-to-base
conversions and array-to-pointer decay. I've also fixed a large
number of bugs where we were applying the complete-object alignment
to a pointer instead of the non-virtual alignment, although most of
these were hidden by the very conservative approach we took with
member alignment.
Also, because IRGen now reliably asserts on zero alignments, we
should no longer be subject to an absurd but frustrating recurring
bug where an incomplete type would report a zero alignment and then
we'd naively do a alignmentAtOffset on it and emit code using an
alignment equal to the largest power-of-two factor of the offset.
We should also now be emitting much more aggressive alignment
attributes in the presence of over-alignment. In particular,
field access now uses alignmentAtOffset instead of min.
Several times in this patch, I had to change the existing
code-generation pattern in order to more effectively use
the Address APIs. For the most part, this seems to be a strict
improvement, like doing pointer arithmetic with GEPs instead of
ptrtoint. That said, I've tried very hard to not change semantics,
but it is likely that I've failed in a few places, for which I
apologize.
ABIArgInfo now always carries the assumed alignment of indirect and
indirect byval arguments. In order to cut down on what was already
a dauntingly large patch, I changed the code to never set align
attributes in the IR on non-byval indirect arguments. That is,
we still generate code which assumes that indirect arguments have
the given alignment, but we don't express this information to the
backend except where it's semantically required (i.e. on byvals).
This is likely a minor regression for those targets that did provide
this information, but it'll be trivial to add it back in a later
patch.
I partially punted on applying this work to CGBuiltin. Please
do not add more uses of the CreateDefaultAligned{Load,Store}
APIs; they will be going away eventually.
llvm-svn: 246985
2015-09-08 16:05:57 +08:00
|
|
|
Address DestAddr = EmitPointerWithAlignment(E->getArg(0));
|
|
|
|
Address SrcAddr = EmitPointerWithAlignment(E->getArg(1));
|
2010-06-16 06:44:06 +08:00
|
|
|
Value *SizeVal = EmitScalarExpr(E->getArg(2));
|
2012-09-21 08:18:27 +08:00
|
|
|
CGM.getObjCRuntime().EmitGCMemmoveCollectable(*this,
|
Compute and preserve alignment more faithfully in IR-generation.
Introduce an Address type to bundle a pointer value with an
alignment. Introduce APIs on CGBuilderTy to work with Address
values. Change core APIs on CGF/CGM to traffic in Address where
appropriate. Require alignments to be non-zero. Update a ton
of code to compute and propagate alignment information.
As part of this, I've promoted CGBuiltin's EmitPointerWithAlignment
helper function to CGF and made use of it in a number of places in
the expression emitter.
The end result is that we should now be significantly more correct
when performing operations on objects that are locally known to
be under-aligned. Since alignment is not reliably tracked in the
type system, there are inherent limits to this, but at least we
are no longer confused by standard operations like derived-to-base
conversions and array-to-pointer decay. I've also fixed a large
number of bugs where we were applying the complete-object alignment
to a pointer instead of the non-virtual alignment, although most of
these were hidden by the very conservative approach we took with
member alignment.
Also, because IRGen now reliably asserts on zero alignments, we
should no longer be subject to an absurd but frustrating recurring
bug where an incomplete type would report a zero alignment and then
we'd naively do a alignmentAtOffset on it and emit code using an
alignment equal to the largest power-of-two factor of the offset.
We should also now be emitting much more aggressive alignment
attributes in the presence of over-alignment. In particular,
field access now uses alignmentAtOffset instead of min.
Several times in this patch, I had to change the existing
code-generation pattern in order to more effectively use
the Address APIs. For the most part, this seems to be a strict
improvement, like doing pointer arithmetic with GEPs instead of
ptrtoint. That said, I've tried very hard to not change semantics,
but it is likely that I've failed in a few places, for which I
apologize.
ABIArgInfo now always carries the assumed alignment of indirect and
indirect byval arguments. In order to cut down on what was already
a dauntingly large patch, I changed the code to never set align
attributes in the IR on non-byval indirect arguments. That is,
we still generate code which assumes that indirect arguments have
the given alignment, but we don't express this information to the
backend except where it's semantically required (i.e. on byvals).
This is likely a minor regression for those targets that did provide
this information, but it'll be trivial to add it back in a later
patch.
I partially punted on applying this work to CGBuiltin. Please
do not add more uses of the CreateDefaultAligned{Load,Store}
APIs; they will be going away eventually.
llvm-svn: 246985
2015-09-08 16:05:57 +08:00
|
|
|
DestAddr, SrcAddr, SizeVal);
|
|
|
|
return RValue::get(DestAddr.getPointer());
|
2010-06-16 06:44:06 +08:00
|
|
|
}
|
2011-04-17 08:40:24 +08:00
|
|
|
|
|
|
|
case Builtin::BI__builtin___memmove_chk: {
|
2012-09-27 18:16:10 +08:00
|
|
|
// fold __builtin_memmove_chk(x, y, cst1, cst2) to memmove iff cst1<=cst2.
|
2011-10-11 02:28:20 +08:00
|
|
|
llvm::APSInt Size, DstSize;
|
|
|
|
if (!E->getArg(2)->EvaluateAsInt(Size, CGM.getContext()) ||
|
|
|
|
!E->getArg(3)->EvaluateAsInt(DstSize, CGM.getContext()))
|
2011-04-17 08:40:24 +08:00
|
|
|
break;
|
|
|
|
if (Size.ugt(DstSize))
|
|
|
|
break;
|
Compute and preserve alignment more faithfully in IR-generation.
Introduce an Address type to bundle a pointer value with an
alignment. Introduce APIs on CGBuilderTy to work with Address
values. Change core APIs on CGF/CGM to traffic in Address where
appropriate. Require alignments to be non-zero. Update a ton
of code to compute and propagate alignment information.
As part of this, I've promoted CGBuiltin's EmitPointerWithAlignment
helper function to CGF and made use of it in a number of places in
the expression emitter.
The end result is that we should now be significantly more correct
when performing operations on objects that are locally known to
be under-aligned. Since alignment is not reliably tracked in the
type system, there are inherent limits to this, but at least we
are no longer confused by standard operations like derived-to-base
conversions and array-to-pointer decay. I've also fixed a large
number of bugs where we were applying the complete-object alignment
to a pointer instead of the non-virtual alignment, although most of
these were hidden by the very conservative approach we took with
member alignment.
Also, because IRGen now reliably asserts on zero alignments, we
should no longer be subject to an absurd but frustrating recurring
bug where an incomplete type would report a zero alignment and then
we'd naively do a alignmentAtOffset on it and emit code using an
alignment equal to the largest power-of-two factor of the offset.
We should also now be emitting much more aggressive alignment
attributes in the presence of over-alignment. In particular,
field access now uses alignmentAtOffset instead of min.
Several times in this patch, I had to change the existing
code-generation pattern in order to more effectively use
the Address APIs. For the most part, this seems to be a strict
improvement, like doing pointer arithmetic with GEPs instead of
ptrtoint. That said, I've tried very hard to not change semantics,
but it is likely that I've failed in a few places, for which I
apologize.
ABIArgInfo now always carries the assumed alignment of indirect and
indirect byval arguments. In order to cut down on what was already
a dauntingly large patch, I changed the code to never set align
attributes in the IR on non-byval indirect arguments. That is,
we still generate code which assumes that indirect arguments have
the given alignment, but we don't express this information to the
backend except where it's semantically required (i.e. on byvals).
This is likely a minor regression for those targets that did provide
this information, but it'll be trivial to add it back in a later
patch.
I partially punted on applying this work to CGBuiltin. Please
do not add more uses of the CreateDefaultAligned{Load,Store}
APIs; they will be going away eventually.
llvm-svn: 246985
2015-09-08 16:05:57 +08:00
|
|
|
Address Dest = EmitPointerWithAlignment(E->getArg(0));
|
|
|
|
Address Src = EmitPointerWithAlignment(E->getArg(1));
|
2011-04-17 08:40:24 +08:00
|
|
|
Value *SizeVal = llvm::ConstantInt::get(Builder.getContext(), Size);
|
Compute and preserve alignment more faithfully in IR-generation.
Introduce an Address type to bundle a pointer value with an
alignment. Introduce APIs on CGBuilderTy to work with Address
values. Change core APIs on CGF/CGM to traffic in Address where
appropriate. Require alignments to be non-zero. Update a ton
of code to compute and propagate alignment information.
As part of this, I've promoted CGBuiltin's EmitPointerWithAlignment
helper function to CGF and made use of it in a number of places in
the expression emitter.
The end result is that we should now be significantly more correct
when performing operations on objects that are locally known to
be under-aligned. Since alignment is not reliably tracked in the
type system, there are inherent limits to this, but at least we
are no longer confused by standard operations like derived-to-base
conversions and array-to-pointer decay. I've also fixed a large
number of bugs where we were applying the complete-object alignment
to a pointer instead of the non-virtual alignment, although most of
these were hidden by the very conservative approach we took with
member alignment.
Also, because IRGen now reliably asserts on zero alignments, we
should no longer be subject to an absurd but frustrating recurring
bug where an incomplete type would report a zero alignment and then
we'd naively do a alignmentAtOffset on it and emit code using an
alignment equal to the largest power-of-two factor of the offset.
We should also now be emitting much more aggressive alignment
attributes in the presence of over-alignment. In particular,
field access now uses alignmentAtOffset instead of min.
Several times in this patch, I had to change the existing
code-generation pattern in order to more effectively use
the Address APIs. For the most part, this seems to be a strict
improvement, like doing pointer arithmetic with GEPs instead of
ptrtoint. That said, I've tried very hard to not change semantics,
but it is likely that I've failed in a few places, for which I
apologize.
ABIArgInfo now always carries the assumed alignment of indirect and
indirect byval arguments. In order to cut down on what was already
a dauntingly large patch, I changed the code to never set align
attributes in the IR on non-byval indirect arguments. That is,
we still generate code which assumes that indirect arguments have
the given alignment, but we don't express this information to the
backend except where it's semantically required (i.e. on byvals).
This is likely a minor regression for those targets that did provide
this information, but it'll be trivial to add it back in a later
patch.
I partially punted on applying this work to CGBuiltin. Please
do not add more uses of the CreateDefaultAligned{Load,Store}
APIs; they will be going away eventually.
llvm-svn: 246985
2015-09-08 16:05:57 +08:00
|
|
|
Builder.CreateMemMove(Dest, Src, SizeVal, false);
|
|
|
|
return RValue::get(Dest.getPointer());
|
2011-04-17 08:40:24 +08:00
|
|
|
}
|
|
|
|
|
2009-12-17 08:14:28 +08:00
|
|
|
case Builtin::BImemmove:
|
2008-07-22 08:26:45 +08:00
|
|
|
case Builtin::BI__builtin_memmove: {
|
Compute and preserve alignment more faithfully in IR-generation.
Introduce an Address type to bundle a pointer value with an
alignment. Introduce APIs on CGBuilderTy to work with Address
values. Change core APIs on CGF/CGM to traffic in Address where
appropriate. Require alignments to be non-zero. Update a ton
of code to compute and propagate alignment information.
As part of this, I've promoted CGBuiltin's EmitPointerWithAlignment
helper function to CGF and made use of it in a number of places in
the expression emitter.
The end result is that we should now be significantly more correct
when performing operations on objects that are locally known to
be under-aligned. Since alignment is not reliably tracked in the
type system, there are inherent limits to this, but at least we
are no longer confused by standard operations like derived-to-base
conversions and array-to-pointer decay. I've also fixed a large
number of bugs where we were applying the complete-object alignment
to a pointer instead of the non-virtual alignment, although most of
these were hidden by the very conservative approach we took with
member alignment.
Also, because IRGen now reliably asserts on zero alignments, we
should no longer be subject to an absurd but frustrating recurring
bug where an incomplete type would report a zero alignment and then
we'd naively do a alignmentAtOffset on it and emit code using an
alignment equal to the largest power-of-two factor of the offset.
We should also now be emitting much more aggressive alignment
attributes in the presence of over-alignment. In particular,
field access now uses alignmentAtOffset instead of min.
Several times in this patch, I had to change the existing
code-generation pattern in order to more effectively use
the Address APIs. For the most part, this seems to be a strict
improvement, like doing pointer arithmetic with GEPs instead of
ptrtoint. That said, I've tried very hard to not change semantics,
but it is likely that I've failed in a few places, for which I
apologize.
ABIArgInfo now always carries the assumed alignment of indirect and
indirect byval arguments. In order to cut down on what was already
a dauntingly large patch, I changed the code to never set align
attributes in the IR on non-byval indirect arguments. That is,
we still generate code which assumes that indirect arguments have
the given alignment, but we don't express this information to the
backend except where it's semantically required (i.e. on byvals).
This is likely a minor regression for those targets that did provide
this information, but it'll be trivial to add it back in a later
patch.
I partially punted on applying this work to CGBuiltin. Please
do not add more uses of the CreateDefaultAligned{Load,Store}
APIs; they will be going away eventually.
llvm-svn: 246985
2015-09-08 16:05:57 +08:00
|
|
|
Address Dest = EmitPointerWithAlignment(E->getArg(0));
|
|
|
|
Address Src = EmitPointerWithAlignment(E->getArg(1));
|
2010-04-04 11:10:52 +08:00
|
|
|
Value *SizeVal = EmitScalarExpr(E->getArg(2));
|
Compute and preserve alignment more faithfully in IR-generation.
Introduce an Address type to bundle a pointer value with an
alignment. Introduce APIs on CGBuilderTy to work with Address
values. Change core APIs on CGF/CGM to traffic in Address where
appropriate. Require alignments to be non-zero. Update a ton
of code to compute and propagate alignment information.
As part of this, I've promoted CGBuiltin's EmitPointerWithAlignment
helper function to CGF and made use of it in a number of places in
the expression emitter.
The end result is that we should now be significantly more correct
when performing operations on objects that are locally known to
be under-aligned. Since alignment is not reliably tracked in the
type system, there are inherent limits to this, but at least we
are no longer confused by standard operations like derived-to-base
conversions and array-to-pointer decay. I've also fixed a large
number of bugs where we were applying the complete-object alignment
to a pointer instead of the non-virtual alignment, although most of
these were hidden by the very conservative approach we took with
member alignment.
Also, because IRGen now reliably asserts on zero alignments, we
should no longer be subject to an absurd but frustrating recurring
bug where an incomplete type would report a zero alignment and then
we'd naively do a alignmentAtOffset on it and emit code using an
alignment equal to the largest power-of-two factor of the offset.
We should also now be emitting much more aggressive alignment
attributes in the presence of over-alignment. In particular,
field access now uses alignmentAtOffset instead of min.
Several times in this patch, I had to change the existing
code-generation pattern in order to more effectively use
the Address APIs. For the most part, this seems to be a strict
improvement, like doing pointer arithmetic with GEPs instead of
ptrtoint. That said, I've tried very hard to not change semantics,
but it is likely that I've failed in a few places, for which I
apologize.
ABIArgInfo now always carries the assumed alignment of indirect and
indirect byval arguments. In order to cut down on what was already
a dauntingly large patch, I changed the code to never set align
attributes in the IR on non-byval indirect arguments. That is,
we still generate code which assumes that indirect arguments have
the given alignment, but we don't express this information to the
backend except where it's semantically required (i.e. on byvals).
This is likely a minor regression for those targets that did provide
this information, but it'll be trivial to add it back in a later
patch.
I partially punted on applying this work to CGBuiltin. Please
do not add more uses of the CreateDefaultAligned{Load,Store}
APIs; they will be going away eventually.
llvm-svn: 246985
2015-09-08 16:05:57 +08:00
|
|
|
EmitNonNullArgCheck(RValue::get(Dest.getPointer()), E->getArg(0)->getType(),
|
2015-05-31 00:11:40 +08:00
|
|
|
E->getArg(0)->getExprLoc(), FD, 0);
|
Compute and preserve alignment more faithfully in IR-generation.
Introduce an Address type to bundle a pointer value with an
alignment. Introduce APIs on CGBuilderTy to work with Address
values. Change core APIs on CGF/CGM to traffic in Address where
appropriate. Require alignments to be non-zero. Update a ton
of code to compute and propagate alignment information.
As part of this, I've promoted CGBuiltin's EmitPointerWithAlignment
helper function to CGF and made use of it in a number of places in
the expression emitter.
The end result is that we should now be significantly more correct
when performing operations on objects that are locally known to
be under-aligned. Since alignment is not reliably tracked in the
type system, there are inherent limits to this, but at least we
are no longer confused by standard operations like derived-to-base
conversions and array-to-pointer decay. I've also fixed a large
number of bugs where we were applying the complete-object alignment
to a pointer instead of the non-virtual alignment, although most of
these were hidden by the very conservative approach we took with
member alignment.
Also, because IRGen now reliably asserts on zero alignments, we
should no longer be subject to an absurd but frustrating recurring
bug where an incomplete type would report a zero alignment and then
we'd naively do a alignmentAtOffset on it and emit code using an
alignment equal to the largest power-of-two factor of the offset.
We should also now be emitting much more aggressive alignment
attributes in the presence of over-alignment. In particular,
field access now uses alignmentAtOffset instead of min.
Several times in this patch, I had to change the existing
code-generation pattern in order to more effectively use
the Address APIs. For the most part, this seems to be a strict
improvement, like doing pointer arithmetic with GEPs instead of
ptrtoint. That said, I've tried very hard to not change semantics,
but it is likely that I've failed in a few places, for which I
apologize.
ABIArgInfo now always carries the assumed alignment of indirect and
indirect byval arguments. In order to cut down on what was already
a dauntingly large patch, I changed the code to never set align
attributes in the IR on non-byval indirect arguments. That is,
we still generate code which assumes that indirect arguments have
the given alignment, but we don't express this information to the
backend except where it's semantically required (i.e. on byvals).
This is likely a minor regression for those targets that did provide
this information, but it'll be trivial to add it back in a later
patch.
I partially punted on applying this work to CGBuiltin. Please
do not add more uses of the CreateDefaultAligned{Load,Store}
APIs; they will be going away eventually.
llvm-svn: 246985
2015-09-08 16:05:57 +08:00
|
|
|
EmitNonNullArgCheck(RValue::get(Src.getPointer()), E->getArg(1)->getType(),
|
2015-05-31 00:11:40 +08:00
|
|
|
E->getArg(1)->getExprLoc(), FD, 1);
|
Compute and preserve alignment more faithfully in IR-generation.
Introduce an Address type to bundle a pointer value with an
alignment. Introduce APIs on CGBuilderTy to work with Address
values. Change core APIs on CGF/CGM to traffic in Address where
appropriate. Require alignments to be non-zero. Update a ton
of code to compute and propagate alignment information.
As part of this, I've promoted CGBuiltin's EmitPointerWithAlignment
helper function to CGF and made use of it in a number of places in
the expression emitter.
The end result is that we should now be significantly more correct
when performing operations on objects that are locally known to
be under-aligned. Since alignment is not reliably tracked in the
type system, there are inherent limits to this, but at least we
are no longer confused by standard operations like derived-to-base
conversions and array-to-pointer decay. I've also fixed a large
number of bugs where we were applying the complete-object alignment
to a pointer instead of the non-virtual alignment, although most of
these were hidden by the very conservative approach we took with
member alignment.
Also, because IRGen now reliably asserts on zero alignments, we
should no longer be subject to an absurd but frustrating recurring
bug where an incomplete type would report a zero alignment and then
we'd naively do a alignmentAtOffset on it and emit code using an
alignment equal to the largest power-of-two factor of the offset.
We should also now be emitting much more aggressive alignment
attributes in the presence of over-alignment. In particular,
field access now uses alignmentAtOffset instead of min.
Several times in this patch, I had to change the existing
code-generation pattern in order to more effectively use
the Address APIs. For the most part, this seems to be a strict
improvement, like doing pointer arithmetic with GEPs instead of
ptrtoint. That said, I've tried very hard to not change semantics,
but it is likely that I've failed in a few places, for which I
apologize.
ABIArgInfo now always carries the assumed alignment of indirect and
indirect byval arguments. In order to cut down on what was already
a dauntingly large patch, I changed the code to never set align
attributes in the IR on non-byval indirect arguments. That is,
we still generate code which assumes that indirect arguments have
the given alignment, but we don't express this information to the
backend except where it's semantically required (i.e. on byvals).
This is likely a minor regression for those targets that did provide
this information, but it'll be trivial to add it back in a later
patch.
I partially punted on applying this work to CGBuiltin. Please
do not add more uses of the CreateDefaultAligned{Load,Store}
APIs; they will be going away eventually.
llvm-svn: 246985
2015-09-08 16:05:57 +08:00
|
|
|
Builder.CreateMemMove(Dest, Src, SizeVal, false);
|
|
|
|
return RValue::get(Dest.getPointer());
|
2008-07-22 08:26:45 +08:00
|
|
|
}
|
2009-12-17 08:14:28 +08:00
|
|
|
case Builtin::BImemset:
|
2008-07-22 08:26:45 +08:00
|
|
|
case Builtin::BI__builtin_memset: {
|
Compute and preserve alignment more faithfully in IR-generation.
Introduce an Address type to bundle a pointer value with an
alignment. Introduce APIs on CGBuilderTy to work with Address
values. Change core APIs on CGF/CGM to traffic in Address where
appropriate. Require alignments to be non-zero. Update a ton
of code to compute and propagate alignment information.
As part of this, I've promoted CGBuiltin's EmitPointerWithAlignment
helper function to CGF and made use of it in a number of places in
the expression emitter.
The end result is that we should now be significantly more correct
when performing operations on objects that are locally known to
be under-aligned. Since alignment is not reliably tracked in the
type system, there are inherent limits to this, but at least we
are no longer confused by standard operations like derived-to-base
conversions and array-to-pointer decay. I've also fixed a large
number of bugs where we were applying the complete-object alignment
to a pointer instead of the non-virtual alignment, although most of
these were hidden by the very conservative approach we took with
member alignment.
Also, because IRGen now reliably asserts on zero alignments, we
should no longer be subject to an absurd but frustrating recurring
bug where an incomplete type would report a zero alignment and then
we'd naively do a alignmentAtOffset on it and emit code using an
alignment equal to the largest power-of-two factor of the offset.
We should also now be emitting much more aggressive alignment
attributes in the presence of over-alignment. In particular,
field access now uses alignmentAtOffset instead of min.
Several times in this patch, I had to change the existing
code-generation pattern in order to more effectively use
the Address APIs. For the most part, this seems to be a strict
improvement, like doing pointer arithmetic with GEPs instead of
ptrtoint. That said, I've tried very hard to not change semantics,
but it is likely that I've failed in a few places, for which I
apologize.
ABIArgInfo now always carries the assumed alignment of indirect and
indirect byval arguments. In order to cut down on what was already
a dauntingly large patch, I changed the code to never set align
attributes in the IR on non-byval indirect arguments. That is,
we still generate code which assumes that indirect arguments have
the given alignment, but we don't express this information to the
backend except where it's semantically required (i.e. on byvals).
This is likely a minor regression for those targets that did provide
this information, but it'll be trivial to add it back in a later
patch.
I partially punted on applying this work to CGBuiltin. Please
do not add more uses of the CreateDefaultAligned{Load,Store}
APIs; they will be going away eventually.
llvm-svn: 246985
2015-09-08 16:05:57 +08:00
|
|
|
Address Dest = EmitPointerWithAlignment(E->getArg(0));
|
2010-12-30 08:13:21 +08:00
|
|
|
Value *ByteVal = Builder.CreateTrunc(EmitScalarExpr(E->getArg(1)),
|
|
|
|
Builder.getInt8Ty());
|
2010-04-04 11:10:52 +08:00
|
|
|
Value *SizeVal = EmitScalarExpr(E->getArg(2));
|
Compute and preserve alignment more faithfully in IR-generation.
Introduce an Address type to bundle a pointer value with an
alignment. Introduce APIs on CGBuilderTy to work with Address
values. Change core APIs on CGF/CGM to traffic in Address where
appropriate. Require alignments to be non-zero. Update a ton
of code to compute and propagate alignment information.
As part of this, I've promoted CGBuiltin's EmitPointerWithAlignment
helper function to CGF and made use of it in a number of places in
the expression emitter.
The end result is that we should now be significantly more correct
when performing operations on objects that are locally known to
be under-aligned. Since alignment is not reliably tracked in the
type system, there are inherent limits to this, but at least we
are no longer confused by standard operations like derived-to-base
conversions and array-to-pointer decay. I've also fixed a large
number of bugs where we were applying the complete-object alignment
to a pointer instead of the non-virtual alignment, although most of
these were hidden by the very conservative approach we took with
member alignment.
Also, because IRGen now reliably asserts on zero alignments, we
should no longer be subject to an absurd but frustrating recurring
bug where an incomplete type would report a zero alignment and then
we'd naively do a alignmentAtOffset on it and emit code using an
alignment equal to the largest power-of-two factor of the offset.
We should also now be emitting much more aggressive alignment
attributes in the presence of over-alignment. In particular,
field access now uses alignmentAtOffset instead of min.
Several times in this patch, I had to change the existing
code-generation pattern in order to more effectively use
the Address APIs. For the most part, this seems to be a strict
improvement, like doing pointer arithmetic with GEPs instead of
ptrtoint. That said, I've tried very hard to not change semantics,
but it is likely that I've failed in a few places, for which I
apologize.
ABIArgInfo now always carries the assumed alignment of indirect and
indirect byval arguments. In order to cut down on what was already
a dauntingly large patch, I changed the code to never set align
attributes in the IR on non-byval indirect arguments. That is,
we still generate code which assumes that indirect arguments have
the given alignment, but we don't express this information to the
backend except where it's semantically required (i.e. on byvals).
This is likely a minor regression for those targets that did provide
this information, but it'll be trivial to add it back in a later
patch.
I partially punted on applying this work to CGBuiltin. Please
do not add more uses of the CreateDefaultAligned{Load,Store}
APIs; they will be going away eventually.
llvm-svn: 246985
2015-09-08 16:05:57 +08:00
|
|
|
EmitNonNullArgCheck(RValue::get(Dest.getPointer()), E->getArg(0)->getType(),
|
2015-05-31 00:11:40 +08:00
|
|
|
E->getArg(0)->getExprLoc(), FD, 0);
|
Compute and preserve alignment more faithfully in IR-generation.
Introduce an Address type to bundle a pointer value with an
alignment. Introduce APIs on CGBuilderTy to work with Address
values. Change core APIs on CGF/CGM to traffic in Address where
appropriate. Require alignments to be non-zero. Update a ton
of code to compute and propagate alignment information.
As part of this, I've promoted CGBuiltin's EmitPointerWithAlignment
helper function to CGF and made use of it in a number of places in
the expression emitter.
The end result is that we should now be significantly more correct
when performing operations on objects that are locally known to
be under-aligned. Since alignment is not reliably tracked in the
type system, there are inherent limits to this, but at least we
are no longer confused by standard operations like derived-to-base
conversions and array-to-pointer decay. I've also fixed a large
number of bugs where we were applying the complete-object alignment
to a pointer instead of the non-virtual alignment, although most of
these were hidden by the very conservative approach we took with
member alignment.
Also, because IRGen now reliably asserts on zero alignments, we
should no longer be subject to an absurd but frustrating recurring
bug where an incomplete type would report a zero alignment and then
we'd naively do a alignmentAtOffset on it and emit code using an
alignment equal to the largest power-of-two factor of the offset.
We should also now be emitting much more aggressive alignment
attributes in the presence of over-alignment. In particular,
field access now uses alignmentAtOffset instead of min.
Several times in this patch, I had to change the existing
code-generation pattern in order to more effectively use
the Address APIs. For the most part, this seems to be a strict
improvement, like doing pointer arithmetic with GEPs instead of
ptrtoint. That said, I've tried very hard to not change semantics,
but it is likely that I've failed in a few places, for which I
apologize.
ABIArgInfo now always carries the assumed alignment of indirect and
indirect byval arguments. In order to cut down on what was already
a dauntingly large patch, I changed the code to never set align
attributes in the IR on non-byval indirect arguments. That is,
we still generate code which assumes that indirect arguments have
the given alignment, but we don't express this information to the
backend except where it's semantically required (i.e. on byvals).
This is likely a minor regression for those targets that did provide
this information, but it'll be trivial to add it back in a later
patch.
I partially punted on applying this work to CGBuiltin. Please
do not add more uses of the CreateDefaultAligned{Load,Store}
APIs; they will be going away eventually.
llvm-svn: 246985
2015-09-08 16:05:57 +08:00
|
|
|
Builder.CreateMemSet(Dest, ByteVal, SizeVal, false);
|
|
|
|
return RValue::get(Dest.getPointer());
|
2008-05-20 07:27:48 +08:00
|
|
|
}
|
2011-04-17 08:40:24 +08:00
|
|
|
case Builtin::BI__builtin___memset_chk: {
|
2012-09-27 18:16:10 +08:00
|
|
|
// fold __builtin_memset_chk(x, y, cst1, cst2) to memset iff cst1<=cst2.
|
2011-10-11 02:28:20 +08:00
|
|
|
llvm::APSInt Size, DstSize;
|
|
|
|
if (!E->getArg(2)->EvaluateAsInt(Size, CGM.getContext()) ||
|
|
|
|
!E->getArg(3)->EvaluateAsInt(DstSize, CGM.getContext()))
|
2011-04-17 08:40:24 +08:00
|
|
|
break;
|
|
|
|
if (Size.ugt(DstSize))
|
|
|
|
break;
|
Compute and preserve alignment more faithfully in IR-generation.
Introduce an Address type to bundle a pointer value with an
alignment. Introduce APIs on CGBuilderTy to work with Address
values. Change core APIs on CGF/CGM to traffic in Address where
appropriate. Require alignments to be non-zero. Update a ton
of code to compute and propagate alignment information.
As part of this, I've promoted CGBuiltin's EmitPointerWithAlignment
helper function to CGF and made use of it in a number of places in
the expression emitter.
The end result is that we should now be significantly more correct
when performing operations on objects that are locally known to
be under-aligned. Since alignment is not reliably tracked in the
type system, there are inherent limits to this, but at least we
are no longer confused by standard operations like derived-to-base
conversions and array-to-pointer decay. I've also fixed a large
number of bugs where we were applying the complete-object alignment
to a pointer instead of the non-virtual alignment, although most of
these were hidden by the very conservative approach we took with
member alignment.
Also, because IRGen now reliably asserts on zero alignments, we
should no longer be subject to an absurd but frustrating recurring
bug where an incomplete type would report a zero alignment and then
we'd naively do a alignmentAtOffset on it and emit code using an
alignment equal to the largest power-of-two factor of the offset.
We should also now be emitting much more aggressive alignment
attributes in the presence of over-alignment. In particular,
field access now uses alignmentAtOffset instead of min.
Several times in this patch, I had to change the existing
code-generation pattern in order to more effectively use
the Address APIs. For the most part, this seems to be a strict
improvement, like doing pointer arithmetic with GEPs instead of
ptrtoint. That said, I've tried very hard to not change semantics,
but it is likely that I've failed in a few places, for which I
apologize.
ABIArgInfo now always carries the assumed alignment of indirect and
indirect byval arguments. In order to cut down on what was already
a dauntingly large patch, I changed the code to never set align
attributes in the IR on non-byval indirect arguments. That is,
we still generate code which assumes that indirect arguments have
the given alignment, but we don't express this information to the
backend except where it's semantically required (i.e. on byvals).
This is likely a minor regression for those targets that did provide
this information, but it'll be trivial to add it back in a later
patch.
I partially punted on applying this work to CGBuiltin. Please
do not add more uses of the CreateDefaultAligned{Load,Store}
APIs; they will be going away eventually.
llvm-svn: 246985
2015-09-08 16:05:57 +08:00
|
|
|
Address Dest = EmitPointerWithAlignment(E->getArg(0));
|
2011-04-17 08:40:24 +08:00
|
|
|
Value *ByteVal = Builder.CreateTrunc(EmitScalarExpr(E->getArg(1)),
|
|
|
|
Builder.getInt8Ty());
|
|
|
|
Value *SizeVal = llvm::ConstantInt::get(Builder.getContext(), Size);
|
Compute and preserve alignment more faithfully in IR-generation.
Introduce an Address type to bundle a pointer value with an
alignment. Introduce APIs on CGBuilderTy to work with Address
values. Change core APIs on CGF/CGM to traffic in Address where
appropriate. Require alignments to be non-zero. Update a ton
of code to compute and propagate alignment information.
As part of this, I've promoted CGBuiltin's EmitPointerWithAlignment
helper function to CGF and made use of it in a number of places in
the expression emitter.
The end result is that we should now be significantly more correct
when performing operations on objects that are locally known to
be under-aligned. Since alignment is not reliably tracked in the
type system, there are inherent limits to this, but at least we
are no longer confused by standard operations like derived-to-base
conversions and array-to-pointer decay. I've also fixed a large
number of bugs where we were applying the complete-object alignment
to a pointer instead of the non-virtual alignment, although most of
these were hidden by the very conservative approach we took with
member alignment.
Also, because IRGen now reliably asserts on zero alignments, we
should no longer be subject to an absurd but frustrating recurring
bug where an incomplete type would report a zero alignment and then
we'd naively do a alignmentAtOffset on it and emit code using an
alignment equal to the largest power-of-two factor of the offset.
We should also now be emitting much more aggressive alignment
attributes in the presence of over-alignment. In particular,
field access now uses alignmentAtOffset instead of min.
Several times in this patch, I had to change the existing
code-generation pattern in order to more effectively use
the Address APIs. For the most part, this seems to be a strict
improvement, like doing pointer arithmetic with GEPs instead of
ptrtoint. That said, I've tried very hard to not change semantics,
but it is likely that I've failed in a few places, for which I
apologize.
ABIArgInfo now always carries the assumed alignment of indirect and
indirect byval arguments. In order to cut down on what was already
a dauntingly large patch, I changed the code to never set align
attributes in the IR on non-byval indirect arguments. That is,
we still generate code which assumes that indirect arguments have
the given alignment, but we don't express this information to the
backend except where it's semantically required (i.e. on byvals).
This is likely a minor regression for those targets that did provide
this information, but it'll be trivial to add it back in a later
patch.
I partially punted on applying this work to CGBuiltin. Please
do not add more uses of the CreateDefaultAligned{Load,Store}
APIs; they will be going away eventually.
llvm-svn: 246985
2015-09-08 16:05:57 +08:00
|
|
|
Builder.CreateMemSet(Dest, ByteVal, SizeVal, false);
|
|
|
|
return RValue::get(Dest.getPointer());
|
2011-04-17 08:40:24 +08:00
|
|
|
}
|
2010-03-03 18:30:05 +08:00
|
|
|
case Builtin::BI__builtin_dwarf_cfa: {
|
|
|
|
// The offset in bytes from the first argument to the CFA.
|
|
|
|
//
|
|
|
|
// Why on earth is this in the frontend? Is there any reason at
|
|
|
|
// all that the backend can't reasonably determine this while
|
|
|
|
// lowering llvm.eh.dwarf.cfa()?
|
|
|
|
//
|
|
|
|
// TODO: If there's a satisfactory reason, add a target hook for
|
|
|
|
// this instead of hard-coding 0, which is correct for most targets.
|
|
|
|
int32_t Offset = 0;
|
|
|
|
|
2011-07-15 01:45:50 +08:00
|
|
|
Value *F = CGM.getIntrinsic(Intrinsic::eh_dwarf_cfa);
|
2012-09-21 08:18:27 +08:00
|
|
|
return RValue::get(Builder.CreateCall(F,
|
2010-06-27 15:15:29 +08:00
|
|
|
llvm::ConstantInt::get(Int32Ty, Offset)));
|
2010-03-03 18:30:05 +08:00
|
|
|
}
|
2008-05-20 16:59:34 +08:00
|
|
|
case Builtin::BI__builtin_return_address: {
|
2015-07-25 13:57:24 +08:00
|
|
|
Value *Depth =
|
|
|
|
CGM.EmitConstantExpr(E->getArg(0), getContext().UnsignedIntTy, this);
|
2011-07-15 01:45:50 +08:00
|
|
|
Value *F = CGM.getIntrinsic(Intrinsic::returnaddress);
|
2009-12-27 22:27:22 +08:00
|
|
|
return RValue::get(Builder.CreateCall(F, Depth));
|
2008-05-20 16:59:34 +08:00
|
|
|
}
|
|
|
|
case Builtin::BI__builtin_frame_address: {
|
2015-07-25 13:57:24 +08:00
|
|
|
Value *Depth =
|
|
|
|
CGM.EmitConstantExpr(E->getArg(0), getContext().UnsignedIntTy, this);
|
2011-07-15 01:45:50 +08:00
|
|
|
Value *F = CGM.getIntrinsic(Intrinsic::frameaddress);
|
2009-12-27 22:27:22 +08:00
|
|
|
return RValue::get(Builder.CreateCall(F, Depth));
|
2008-05-20 16:59:34 +08:00
|
|
|
}
|
2009-05-04 03:23:23 +08:00
|
|
|
case Builtin::BI__builtin_extract_return_addr: {
|
2010-03-03 12:15:11 +08:00
|
|
|
Value *Address = EmitScalarExpr(E->getArg(0));
|
|
|
|
Value *Result = getTargetHooks().decodeReturnAddress(*this, Address);
|
|
|
|
return RValue::get(Result);
|
|
|
|
}
|
|
|
|
case Builtin::BI__builtin_frob_return_addr: {
|
|
|
|
Value *Address = EmitScalarExpr(E->getArg(0));
|
|
|
|
Value *Result = getTargetHooks().encodeReturnAddress(*this, Address);
|
|
|
|
return RValue::get(Result);
|
2009-05-04 03:23:23 +08:00
|
|
|
}
|
2010-03-06 08:35:14 +08:00
|
|
|
case Builtin::BI__builtin_dwarf_sp_column: {
|
2011-07-18 12:24:23 +08:00
|
|
|
llvm::IntegerType *Ty
|
2010-03-06 08:35:14 +08:00
|
|
|
= cast<llvm::IntegerType>(ConvertType(E->getType()));
|
|
|
|
int Column = getTargetHooks().getDwarfEHStackPointer(CGM);
|
|
|
|
if (Column == -1) {
|
|
|
|
CGM.ErrorUnsupported(E, "__builtin_dwarf_sp_column");
|
|
|
|
return RValue::get(llvm::UndefValue::get(Ty));
|
|
|
|
}
|
|
|
|
return RValue::get(llvm::ConstantInt::get(Ty, Column, true));
|
|
|
|
}
|
|
|
|
case Builtin::BI__builtin_init_dwarf_reg_size_table: {
|
|
|
|
Value *Address = EmitScalarExpr(E->getArg(0));
|
|
|
|
if (getTargetHooks().initDwarfEHRegSizeTable(*this, Address))
|
|
|
|
CGM.ErrorUnsupported(E, "__builtin_init_dwarf_reg_size_table");
|
|
|
|
return RValue::get(llvm::UndefValue::get(ConvertType(E->getType())));
|
|
|
|
}
|
2010-03-03 13:38:58 +08:00
|
|
|
case Builtin::BI__builtin_eh_return: {
|
|
|
|
Value *Int = EmitScalarExpr(E->getArg(0));
|
|
|
|
Value *Ptr = EmitScalarExpr(E->getArg(1));
|
|
|
|
|
2011-07-18 12:24:23 +08:00
|
|
|
llvm::IntegerType *IntTy = cast<llvm::IntegerType>(Int->getType());
|
2010-03-03 13:38:58 +08:00
|
|
|
assert((IntTy->getBitWidth() == 32 || IntTy->getBitWidth() == 64) &&
|
|
|
|
"LLVM's __builtin_eh_return only supports 32- and 64-bit variants");
|
|
|
|
Value *F = CGM.getIntrinsic(IntTy->getBitWidth() == 32
|
|
|
|
? Intrinsic::eh_return_i32
|
2011-07-15 01:45:50 +08:00
|
|
|
: Intrinsic::eh_return_i64);
|
2015-05-19 06:14:03 +08:00
|
|
|
Builder.CreateCall(F, {Int, Ptr});
|
2011-01-12 11:41:02 +08:00
|
|
|
Builder.CreateUnreachable();
|
|
|
|
|
|
|
|
// We do need to preserve an insertion point.
|
2011-02-08 16:22:06 +08:00
|
|
|
EmitBlock(createBasicBlock("builtin_eh_return.cont"));
|
2011-01-12 11:41:02 +08:00
|
|
|
|
2014-05-21 13:09:00 +08:00
|
|
|
return RValue::get(nullptr);
|
2010-03-03 13:38:58 +08:00
|
|
|
}
|
2009-06-02 17:37:50 +08:00
|
|
|
case Builtin::BI__builtin_unwind_init: {
|
2011-07-15 01:45:50 +08:00
|
|
|
Value *F = CGM.getIntrinsic(Intrinsic::eh_unwind_init);
|
2015-07-15 01:27:39 +08:00
|
|
|
return RValue::get(Builder.CreateCall(F));
|
2009-06-02 17:37:50 +08:00
|
|
|
}
|
2010-03-02 10:31:24 +08:00
|
|
|
case Builtin::BI__builtin_extend_pointer: {
|
|
|
|
// Extends a pointer to the size of an _Unwind_Word, which is
|
2010-03-02 11:50:12 +08:00
|
|
|
// uint64_t on all platforms. Generally this gets poked into a
|
|
|
|
// register and eventually used as an address, so if the
|
|
|
|
// addressing registers are wider than pointers and the platform
|
|
|
|
// doesn't implicitly ignore high-order bits when doing
|
|
|
|
// addressing, we need to make sure we zext / sext based on
|
|
|
|
// the platform's expectations.
|
2010-03-02 10:31:24 +08:00
|
|
|
//
|
|
|
|
// See: http://gcc.gnu.org/ml/gcc-bugs/2002-02/msg00237.html
|
2010-03-02 11:50:12 +08:00
|
|
|
|
|
|
|
// Cast the pointer to intptr_t.
|
2010-03-02 10:31:24 +08:00
|
|
|
Value *Ptr = EmitScalarExpr(E->getArg(0));
|
2010-03-02 11:50:12 +08:00
|
|
|
Value *Result = Builder.CreatePtrToInt(Ptr, IntPtrTy, "extend.cast");
|
|
|
|
|
|
|
|
// If that's 64 bits, we're done.
|
|
|
|
if (IntPtrTy->getBitWidth() == 64)
|
|
|
|
return RValue::get(Result);
|
|
|
|
|
|
|
|
// Otherwise, ask the codegen data what to do.
|
2010-03-03 12:15:11 +08:00
|
|
|
if (getTargetHooks().extendPointerWithSExt())
|
2010-03-02 11:50:12 +08:00
|
|
|
return RValue::get(Builder.CreateSExt(Result, Int64Ty, "extend.sext"));
|
|
|
|
else
|
|
|
|
return RValue::get(Builder.CreateZExt(Result, Int64Ty, "extend.zext"));
|
2010-03-02 10:31:24 +08:00
|
|
|
}
|
2009-06-02 17:37:50 +08:00
|
|
|
case Builtin::BI__builtin_setjmp: {
|
2010-05-28 02:47:06 +08:00
|
|
|
// Buffer is a void**.
|
Compute and preserve alignment more faithfully in IR-generation.
Introduce an Address type to bundle a pointer value with an
alignment. Introduce APIs on CGBuilderTy to work with Address
values. Change core APIs on CGF/CGM to traffic in Address where
appropriate. Require alignments to be non-zero. Update a ton
of code to compute and propagate alignment information.
As part of this, I've promoted CGBuiltin's EmitPointerWithAlignment
helper function to CGF and made use of it in a number of places in
the expression emitter.
The end result is that we should now be significantly more correct
when performing operations on objects that are locally known to
be under-aligned. Since alignment is not reliably tracked in the
type system, there are inherent limits to this, but at least we
are no longer confused by standard operations like derived-to-base
conversions and array-to-pointer decay. I've also fixed a large
number of bugs where we were applying the complete-object alignment
to a pointer instead of the non-virtual alignment, although most of
these were hidden by the very conservative approach we took with
member alignment.
Also, because IRGen now reliably asserts on zero alignments, we
should no longer be subject to an absurd but frustrating recurring
bug where an incomplete type would report a zero alignment and then
we'd naively do a alignmentAtOffset on it and emit code using an
alignment equal to the largest power-of-two factor of the offset.
We should also now be emitting much more aggressive alignment
attributes in the presence of over-alignment. In particular,
field access now uses alignmentAtOffset instead of min.
Several times in this patch, I had to change the existing
code-generation pattern in order to more effectively use
the Address APIs. For the most part, this seems to be a strict
improvement, like doing pointer arithmetic with GEPs instead of
ptrtoint. That said, I've tried very hard to not change semantics,
but it is likely that I've failed in a few places, for which I
apologize.
ABIArgInfo now always carries the assumed alignment of indirect and
indirect byval arguments. In order to cut down on what was already
a dauntingly large patch, I changed the code to never set align
attributes in the IR on non-byval indirect arguments. That is,
we still generate code which assumes that indirect arguments have
the given alignment, but we don't express this information to the
backend except where it's semantically required (i.e. on byvals).
This is likely a minor regression for those targets that did provide
this information, but it'll be trivial to add it back in a later
patch.
I partially punted on applying this work to CGBuiltin. Please
do not add more uses of the CreateDefaultAligned{Load,Store}
APIs; they will be going away eventually.
llvm-svn: 246985
2015-09-08 16:05:57 +08:00
|
|
|
Address Buf = EmitPointerWithAlignment(E->getArg(0));
|
2010-05-28 02:47:06 +08:00
|
|
|
|
|
|
|
// Store the frame pointer to the setjmp buffer.
|
2009-06-02 17:37:50 +08:00
|
|
|
Value *FrameAddr =
|
2010-05-28 02:47:06 +08:00
|
|
|
Builder.CreateCall(CGM.getIntrinsic(Intrinsic::frameaddress),
|
2010-06-27 15:15:29 +08:00
|
|
|
ConstantInt::get(Int32Ty, 0));
|
2009-06-02 17:37:50 +08:00
|
|
|
Builder.CreateStore(FrameAddr, Buf);
|
2010-05-28 02:47:06 +08:00
|
|
|
|
2010-05-28 07:54:20 +08:00
|
|
|
// Store the stack pointer to the setjmp buffer.
|
|
|
|
Value *StackAddr =
|
2015-07-15 01:27:39 +08:00
|
|
|
Builder.CreateCall(CGM.getIntrinsic(Intrinsic::stacksave));
|
Compute and preserve alignment more faithfully in IR-generation.
Introduce an Address type to bundle a pointer value with an
alignment. Introduce APIs on CGBuilderTy to work with Address
values. Change core APIs on CGF/CGM to traffic in Address where
appropriate. Require alignments to be non-zero. Update a ton
of code to compute and propagate alignment information.
As part of this, I've promoted CGBuiltin's EmitPointerWithAlignment
helper function to CGF and made use of it in a number of places in
the expression emitter.
The end result is that we should now be significantly more correct
when performing operations on objects that are locally known to
be under-aligned. Since alignment is not reliably tracked in the
type system, there are inherent limits to this, but at least we
are no longer confused by standard operations like derived-to-base
conversions and array-to-pointer decay. I've also fixed a large
number of bugs where we were applying the complete-object alignment
to a pointer instead of the non-virtual alignment, although most of
these were hidden by the very conservative approach we took with
member alignment.
Also, because IRGen now reliably asserts on zero alignments, we
should no longer be subject to an absurd but frustrating recurring
bug where an incomplete type would report a zero alignment and then
we'd naively do a alignmentAtOffset on it and emit code using an
alignment equal to the largest power-of-two factor of the offset.
We should also now be emitting much more aggressive alignment
attributes in the presence of over-alignment. In particular,
field access now uses alignmentAtOffset instead of min.
Several times in this patch, I had to change the existing
code-generation pattern in order to more effectively use
the Address APIs. For the most part, this seems to be a strict
improvement, like doing pointer arithmetic with GEPs instead of
ptrtoint. That said, I've tried very hard to not change semantics,
but it is likely that I've failed in a few places, for which I
apologize.
ABIArgInfo now always carries the assumed alignment of indirect and
indirect byval arguments. In order to cut down on what was already
a dauntingly large patch, I changed the code to never set align
attributes in the IR on non-byval indirect arguments. That is,
we still generate code which assumes that indirect arguments have
the given alignment, but we don't express this information to the
backend except where it's semantically required (i.e. on byvals).
This is likely a minor regression for those targets that did provide
this information, but it'll be trivial to add it back in a later
patch.
I partially punted on applying this work to CGBuiltin. Please
do not add more uses of the CreateDefaultAligned{Load,Store}
APIs; they will be going away eventually.
llvm-svn: 246985
2015-09-08 16:05:57 +08:00
|
|
|
Address StackSaveSlot =
|
|
|
|
Builder.CreateConstInBoundsGEP(Buf, 2, getPointerSize());
|
2010-05-28 07:54:20 +08:00
|
|
|
Builder.CreateStore(StackAddr, StackSaveSlot);
|
|
|
|
|
2010-05-28 02:47:06 +08:00
|
|
|
// Call LLVM's EH setjmp, which is lightweight.
|
|
|
|
Value *F = CGM.getIntrinsic(Intrinsic::eh_sjlj_setjmp);
|
2011-02-08 16:22:06 +08:00
|
|
|
Buf = Builder.CreateBitCast(Buf, Int8PtrTy);
|
Compute and preserve alignment more faithfully in IR-generation.
Introduce an Address type to bundle a pointer value with an
alignment. Introduce APIs on CGBuilderTy to work with Address
values. Change core APIs on CGF/CGM to traffic in Address where
appropriate. Require alignments to be non-zero. Update a ton
of code to compute and propagate alignment information.
As part of this, I've promoted CGBuiltin's EmitPointerWithAlignment
helper function to CGF and made use of it in a number of places in
the expression emitter.
The end result is that we should now be significantly more correct
when performing operations on objects that are locally known to
be under-aligned. Since alignment is not reliably tracked in the
type system, there are inherent limits to this, but at least we
are no longer confused by standard operations like derived-to-base
conversions and array-to-pointer decay. I've also fixed a large
number of bugs where we were applying the complete-object alignment
to a pointer instead of the non-virtual alignment, although most of
these were hidden by the very conservative approach we took with
member alignment.
Also, because IRGen now reliably asserts on zero alignments, we
should no longer be subject to an absurd but frustrating recurring
bug where an incomplete type would report a zero alignment and then
we'd naively do a alignmentAtOffset on it and emit code using an
alignment equal to the largest power-of-two factor of the offset.
We should also now be emitting much more aggressive alignment
attributes in the presence of over-alignment. In particular,
field access now uses alignmentAtOffset instead of min.
Several times in this patch, I had to change the existing
code-generation pattern in order to more effectively use
the Address APIs. For the most part, this seems to be a strict
improvement, like doing pointer arithmetic with GEPs instead of
ptrtoint. That said, I've tried very hard to not change semantics,
but it is likely that I've failed in a few places, for which I
apologize.
ABIArgInfo now always carries the assumed alignment of indirect and
indirect byval arguments. In order to cut down on what was already
a dauntingly large patch, I changed the code to never set align
attributes in the IR on non-byval indirect arguments. That is,
we still generate code which assumes that indirect arguments have
the given alignment, but we don't express this information to the
backend except where it's semantically required (i.e. on byvals).
This is likely a minor regression for those targets that did provide
this information, but it'll be trivial to add it back in a later
patch.
I partially punted on applying this work to CGBuiltin. Please
do not add more uses of the CreateDefaultAligned{Load,Store}
APIs; they will be going away eventually.
llvm-svn: 246985
2015-09-08 16:05:57 +08:00
|
|
|
return RValue::get(Builder.CreateCall(F, Buf.getPointer()));
|
2009-06-02 17:37:50 +08:00
|
|
|
}
|
|
|
|
case Builtin::BI__builtin_longjmp: {
|
|
|
|
Value *Buf = EmitScalarExpr(E->getArg(0));
|
2011-02-08 16:22:06 +08:00
|
|
|
Buf = Builder.CreateBitCast(Buf, Int8PtrTy);
|
2010-05-28 02:47:06 +08:00
|
|
|
|
|
|
|
// Call LLVM's EH longjmp, which is lightweight.
|
|
|
|
Builder.CreateCall(CGM.getIntrinsic(Intrinsic::eh_sjlj_longjmp), Buf);
|
|
|
|
|
2011-01-12 11:41:02 +08:00
|
|
|
// longjmp doesn't return; mark this as unreachable.
|
|
|
|
Builder.CreateUnreachable();
|
|
|
|
|
|
|
|
// We do need to preserve an insertion point.
|
2011-02-08 16:22:06 +08:00
|
|
|
EmitBlock(createBasicBlock("longjmp.cont"));
|
2011-01-12 11:41:02 +08:00
|
|
|
|
2014-05-21 13:09:00 +08:00
|
|
|
return RValue::get(nullptr);
|
2009-06-02 17:37:50 +08:00
|
|
|
}
|
2008-05-10 06:40:52 +08:00
|
|
|
case Builtin::BI__sync_fetch_and_add:
|
|
|
|
case Builtin::BI__sync_fetch_and_sub:
|
2009-05-08 14:58:22 +08:00
|
|
|
case Builtin::BI__sync_fetch_and_or:
|
|
|
|
case Builtin::BI__sync_fetch_and_and:
|
|
|
|
case Builtin::BI__sync_fetch_and_xor:
|
2014-10-03 04:53:50 +08:00
|
|
|
case Builtin::BI__sync_fetch_and_nand:
|
2009-05-08 14:58:22 +08:00
|
|
|
case Builtin::BI__sync_add_and_fetch:
|
|
|
|
case Builtin::BI__sync_sub_and_fetch:
|
|
|
|
case Builtin::BI__sync_and_and_fetch:
|
|
|
|
case Builtin::BI__sync_or_and_fetch:
|
|
|
|
case Builtin::BI__sync_xor_and_fetch:
|
2014-10-03 04:53:50 +08:00
|
|
|
case Builtin::BI__sync_nand_and_fetch:
|
2009-05-08 14:58:22 +08:00
|
|
|
case Builtin::BI__sync_val_compare_and_swap:
|
|
|
|
case Builtin::BI__sync_bool_compare_and_swap:
|
|
|
|
case Builtin::BI__sync_lock_test_and_set:
|
|
|
|
case Builtin::BI__sync_lock_release:
|
2011-04-09 11:57:26 +08:00
|
|
|
case Builtin::BI__sync_swap:
|
2011-09-23 13:06:16 +08:00
|
|
|
llvm_unreachable("Shouldn't make it through sema");
|
2009-05-08 14:58:22 +08:00
|
|
|
case Builtin::BI__sync_fetch_and_add_1:
|
|
|
|
case Builtin::BI__sync_fetch_and_add_2:
|
|
|
|
case Builtin::BI__sync_fetch_and_add_4:
|
|
|
|
case Builtin::BI__sync_fetch_and_add_8:
|
|
|
|
case Builtin::BI__sync_fetch_and_add_16:
|
2011-09-07 09:41:24 +08:00
|
|
|
return EmitBinaryAtomic(*this, llvm::AtomicRMWInst::Add, E);
|
2009-05-08 14:58:22 +08:00
|
|
|
case Builtin::BI__sync_fetch_and_sub_1:
|
|
|
|
case Builtin::BI__sync_fetch_and_sub_2:
|
|
|
|
case Builtin::BI__sync_fetch_and_sub_4:
|
|
|
|
case Builtin::BI__sync_fetch_and_sub_8:
|
|
|
|
case Builtin::BI__sync_fetch_and_sub_16:
|
2011-09-07 09:41:24 +08:00
|
|
|
return EmitBinaryAtomic(*this, llvm::AtomicRMWInst::Sub, E);
|
2009-05-08 14:58:22 +08:00
|
|
|
case Builtin::BI__sync_fetch_and_or_1:
|
|
|
|
case Builtin::BI__sync_fetch_and_or_2:
|
|
|
|
case Builtin::BI__sync_fetch_and_or_4:
|
|
|
|
case Builtin::BI__sync_fetch_and_or_8:
|
|
|
|
case Builtin::BI__sync_fetch_and_or_16:
|
2011-09-07 09:41:24 +08:00
|
|
|
return EmitBinaryAtomic(*this, llvm::AtomicRMWInst::Or, E);
|
2009-05-08 14:58:22 +08:00
|
|
|
case Builtin::BI__sync_fetch_and_and_1:
|
|
|
|
case Builtin::BI__sync_fetch_and_and_2:
|
|
|
|
case Builtin::BI__sync_fetch_and_and_4:
|
|
|
|
case Builtin::BI__sync_fetch_and_and_8:
|
|
|
|
case Builtin::BI__sync_fetch_and_and_16:
|
2011-09-07 09:41:24 +08:00
|
|
|
return EmitBinaryAtomic(*this, llvm::AtomicRMWInst::And, E);
|
2009-05-08 14:58:22 +08:00
|
|
|
case Builtin::BI__sync_fetch_and_xor_1:
|
|
|
|
case Builtin::BI__sync_fetch_and_xor_2:
|
|
|
|
case Builtin::BI__sync_fetch_and_xor_4:
|
|
|
|
case Builtin::BI__sync_fetch_and_xor_8:
|
|
|
|
case Builtin::BI__sync_fetch_and_xor_16:
|
2011-09-07 09:41:24 +08:00
|
|
|
return EmitBinaryAtomic(*this, llvm::AtomicRMWInst::Xor, E);
|
2014-10-03 04:53:50 +08:00
|
|
|
case Builtin::BI__sync_fetch_and_nand_1:
|
|
|
|
case Builtin::BI__sync_fetch_and_nand_2:
|
|
|
|
case Builtin::BI__sync_fetch_and_nand_4:
|
|
|
|
case Builtin::BI__sync_fetch_and_nand_8:
|
|
|
|
case Builtin::BI__sync_fetch_and_nand_16:
|
|
|
|
return EmitBinaryAtomic(*this, llvm::AtomicRMWInst::Nand, E);
|
2009-09-09 23:08:12 +08:00
|
|
|
|
2009-05-08 14:58:22 +08:00
|
|
|
// Clang extensions: not overloaded yet.
|
2008-05-10 06:40:52 +08:00
|
|
|
case Builtin::BI__sync_fetch_and_min:
|
2011-09-07 09:41:24 +08:00
|
|
|
return EmitBinaryAtomic(*this, llvm::AtomicRMWInst::Min, E);
|
2008-05-10 06:40:52 +08:00
|
|
|
case Builtin::BI__sync_fetch_and_max:
|
2011-09-07 09:41:24 +08:00
|
|
|
return EmitBinaryAtomic(*this, llvm::AtomicRMWInst::Max, E);
|
2008-05-10 06:40:52 +08:00
|
|
|
case Builtin::BI__sync_fetch_and_umin:
|
2011-09-07 09:41:24 +08:00
|
|
|
return EmitBinaryAtomic(*this, llvm::AtomicRMWInst::UMin, E);
|
2008-05-10 06:40:52 +08:00
|
|
|
case Builtin::BI__sync_fetch_and_umax:
|
2011-09-07 09:41:24 +08:00
|
|
|
return EmitBinaryAtomic(*this, llvm::AtomicRMWInst::UMax, E);
|
2009-04-07 08:55:51 +08:00
|
|
|
|
2009-05-08 14:58:22 +08:00
|
|
|
case Builtin::BI__sync_add_and_fetch_1:
|
|
|
|
case Builtin::BI__sync_add_and_fetch_2:
|
|
|
|
case Builtin::BI__sync_add_and_fetch_4:
|
|
|
|
case Builtin::BI__sync_add_and_fetch_8:
|
|
|
|
case Builtin::BI__sync_add_and_fetch_16:
|
2011-09-07 09:41:24 +08:00
|
|
|
return EmitBinaryAtomicPost(*this, llvm::AtomicRMWInst::Add, E,
|
2009-04-07 08:55:51 +08:00
|
|
|
llvm::Instruction::Add);
|
2009-05-08 14:58:22 +08:00
|
|
|
case Builtin::BI__sync_sub_and_fetch_1:
|
|
|
|
case Builtin::BI__sync_sub_and_fetch_2:
|
|
|
|
case Builtin::BI__sync_sub_and_fetch_4:
|
|
|
|
case Builtin::BI__sync_sub_and_fetch_8:
|
|
|
|
case Builtin::BI__sync_sub_and_fetch_16:
|
2011-09-07 09:41:24 +08:00
|
|
|
return EmitBinaryAtomicPost(*this, llvm::AtomicRMWInst::Sub, E,
|
2009-04-07 08:55:51 +08:00
|
|
|
llvm::Instruction::Sub);
|
2009-05-08 14:58:22 +08:00
|
|
|
case Builtin::BI__sync_and_and_fetch_1:
|
|
|
|
case Builtin::BI__sync_and_and_fetch_2:
|
|
|
|
case Builtin::BI__sync_and_and_fetch_4:
|
|
|
|
case Builtin::BI__sync_and_and_fetch_8:
|
|
|
|
case Builtin::BI__sync_and_and_fetch_16:
|
2011-09-07 09:41:24 +08:00
|
|
|
return EmitBinaryAtomicPost(*this, llvm::AtomicRMWInst::And, E,
|
2009-04-07 08:55:51 +08:00
|
|
|
llvm::Instruction::And);
|
2009-05-08 14:58:22 +08:00
|
|
|
case Builtin::BI__sync_or_and_fetch_1:
|
|
|
|
case Builtin::BI__sync_or_and_fetch_2:
|
|
|
|
case Builtin::BI__sync_or_and_fetch_4:
|
|
|
|
case Builtin::BI__sync_or_and_fetch_8:
|
|
|
|
case Builtin::BI__sync_or_and_fetch_16:
|
2011-09-07 09:41:24 +08:00
|
|
|
return EmitBinaryAtomicPost(*this, llvm::AtomicRMWInst::Or, E,
|
2009-04-07 08:55:51 +08:00
|
|
|
llvm::Instruction::Or);
|
2009-05-08 14:58:22 +08:00
|
|
|
case Builtin::BI__sync_xor_and_fetch_1:
|
|
|
|
case Builtin::BI__sync_xor_and_fetch_2:
|
|
|
|
case Builtin::BI__sync_xor_and_fetch_4:
|
|
|
|
case Builtin::BI__sync_xor_and_fetch_8:
|
|
|
|
case Builtin::BI__sync_xor_and_fetch_16:
|
2011-09-07 09:41:24 +08:00
|
|
|
return EmitBinaryAtomicPost(*this, llvm::AtomicRMWInst::Xor, E,
|
2009-04-07 08:55:51 +08:00
|
|
|
llvm::Instruction::Xor);
|
2014-10-03 04:53:50 +08:00
|
|
|
case Builtin::BI__sync_nand_and_fetch_1:
|
|
|
|
case Builtin::BI__sync_nand_and_fetch_2:
|
|
|
|
case Builtin::BI__sync_nand_and_fetch_4:
|
|
|
|
case Builtin::BI__sync_nand_and_fetch_8:
|
|
|
|
case Builtin::BI__sync_nand_and_fetch_16:
|
|
|
|
return EmitBinaryAtomicPost(*this, llvm::AtomicRMWInst::Nand, E,
|
|
|
|
llvm::Instruction::And, true);
|
2009-09-09 23:08:12 +08:00
|
|
|
|
2009-05-08 14:58:22 +08:00
|
|
|
case Builtin::BI__sync_val_compare_and_swap_1:
|
|
|
|
case Builtin::BI__sync_val_compare_and_swap_2:
|
|
|
|
case Builtin::BI__sync_val_compare_and_swap_4:
|
|
|
|
case Builtin::BI__sync_val_compare_and_swap_8:
|
2015-06-26 02:29:42 +08:00
|
|
|
case Builtin::BI__sync_val_compare_and_swap_16:
|
|
|
|
return RValue::get(MakeAtomicCmpXchgValue(*this, E, false));
|
2009-04-07 08:55:51 +08:00
|
|
|
|
2009-05-08 14:58:22 +08:00
|
|
|
case Builtin::BI__sync_bool_compare_and_swap_1:
|
|
|
|
case Builtin::BI__sync_bool_compare_and_swap_2:
|
|
|
|
case Builtin::BI__sync_bool_compare_and_swap_4:
|
|
|
|
case Builtin::BI__sync_bool_compare_and_swap_8:
|
2015-06-26 02:29:42 +08:00
|
|
|
case Builtin::BI__sync_bool_compare_and_swap_16:
|
|
|
|
return RValue::get(MakeAtomicCmpXchgValue(*this, E, true));
|
2009-04-07 08:55:51 +08:00
|
|
|
|
2011-04-09 11:57:26 +08:00
|
|
|
case Builtin::BI__sync_swap_1:
|
|
|
|
case Builtin::BI__sync_swap_2:
|
|
|
|
case Builtin::BI__sync_swap_4:
|
|
|
|
case Builtin::BI__sync_swap_8:
|
|
|
|
case Builtin::BI__sync_swap_16:
|
2011-09-07 09:41:24 +08:00
|
|
|
return EmitBinaryAtomic(*this, llvm::AtomicRMWInst::Xchg, E);
|
2011-04-09 11:57:26 +08:00
|
|
|
|
2009-05-08 14:58:22 +08:00
|
|
|
case Builtin::BI__sync_lock_test_and_set_1:
|
|
|
|
case Builtin::BI__sync_lock_test_and_set_2:
|
|
|
|
case Builtin::BI__sync_lock_test_and_set_4:
|
|
|
|
case Builtin::BI__sync_lock_test_and_set_8:
|
|
|
|
case Builtin::BI__sync_lock_test_and_set_16:
|
2011-09-07 09:41:24 +08:00
|
|
|
return EmitBinaryAtomic(*this, llvm::AtomicRMWInst::Xchg, E);
|
2010-03-20 15:04:11 +08:00
|
|
|
|
2009-05-08 14:58:22 +08:00
|
|
|
case Builtin::BI__sync_lock_release_1:
|
|
|
|
case Builtin::BI__sync_lock_release_2:
|
|
|
|
case Builtin::BI__sync_lock_release_4:
|
|
|
|
case Builtin::BI__sync_lock_release_8:
|
2009-05-13 12:46:13 +08:00
|
|
|
case Builtin::BI__sync_lock_release_16: {
|
|
|
|
Value *Ptr = EmitScalarExpr(E->getArg(0));
|
2011-09-14 06:21:56 +08:00
|
|
|
QualType ElTy = E->getArg(0)->getType()->getPointeeType();
|
|
|
|
CharUnits StoreSize = getContext().getTypeSizeInChars(ElTy);
|
2012-03-16 09:48:04 +08:00
|
|
|
llvm::Type *ITy = llvm::IntegerType::get(getLLVMContext(),
|
|
|
|
StoreSize.getQuantity() * 8);
|
|
|
|
Ptr = Builder.CreateBitCast(Ptr, ITy->getPointerTo());
|
2012-09-21 08:18:27 +08:00
|
|
|
llvm::StoreInst *Store =
|
Compute and preserve alignment more faithfully in IR-generation.
Introduce an Address type to bundle a pointer value with an
alignment. Introduce APIs on CGBuilderTy to work with Address
values. Change core APIs on CGF/CGM to traffic in Address where
appropriate. Require alignments to be non-zero. Update a ton
of code to compute and propagate alignment information.
As part of this, I've promoted CGBuiltin's EmitPointerWithAlignment
helper function to CGF and made use of it in a number of places in
the expression emitter.
The end result is that we should now be significantly more correct
when performing operations on objects that are locally known to
be under-aligned. Since alignment is not reliably tracked in the
type system, there are inherent limits to this, but at least we
are no longer confused by standard operations like derived-to-base
conversions and array-to-pointer decay. I've also fixed a large
number of bugs where we were applying the complete-object alignment
to a pointer instead of the non-virtual alignment, although most of
these were hidden by the very conservative approach we took with
member alignment.
Also, because IRGen now reliably asserts on zero alignments, we
should no longer be subject to an absurd but frustrating recurring
bug where an incomplete type would report a zero alignment and then
we'd naively do a alignmentAtOffset on it and emit code using an
alignment equal to the largest power-of-two factor of the offset.
We should also now be emitting much more aggressive alignment
attributes in the presence of over-alignment. In particular,
field access now uses alignmentAtOffset instead of min.
Several times in this patch, I had to change the existing
code-generation pattern in order to more effectively use
the Address APIs. For the most part, this seems to be a strict
improvement, like doing pointer arithmetic with GEPs instead of
ptrtoint. That said, I've tried very hard to not change semantics,
but it is likely that I've failed in a few places, for which I
apologize.
ABIArgInfo now always carries the assumed alignment of indirect and
indirect byval arguments. In order to cut down on what was already
a dauntingly large patch, I changed the code to never set align
attributes in the IR on non-byval indirect arguments. That is,
we still generate code which assumes that indirect arguments have
the given alignment, but we don't express this information to the
backend except where it's semantically required (i.e. on byvals).
This is likely a minor regression for those targets that did provide
this information, but it'll be trivial to add it back in a later
patch.
I partially punted on applying this work to CGBuiltin. Please
do not add more uses of the CreateDefaultAligned{Load,Store}
APIs; they will be going away eventually.
llvm-svn: 246985
2015-09-08 16:05:57 +08:00
|
|
|
Builder.CreateAlignedStore(llvm::Constant::getNullValue(ITy), Ptr,
|
|
|
|
StoreSize);
|
2011-09-14 06:21:56 +08:00
|
|
|
Store->setAtomic(llvm::Release);
|
2014-05-21 13:09:00 +08:00
|
|
|
return RValue::get(nullptr);
|
2009-05-13 12:46:13 +08:00
|
|
|
}
|
2009-02-17 06:43:43 +08:00
|
|
|
|
2009-05-13 12:46:13 +08:00
|
|
|
case Builtin::BI__sync_synchronize: {
|
2011-09-07 09:41:24 +08:00
|
|
|
// We assume this is supposed to correspond to a C++0x-style
|
|
|
|
// sequentially-consistent fence (i.e. this is only usable for
|
|
|
|
// synchonization, not device I/O or anything like that). This intrinsic
|
2012-09-21 08:18:27 +08:00
|
|
|
// is really badly designed in the sense that in theory, there isn't
|
2011-09-07 09:41:24 +08:00
|
|
|
// any way to safely use it... but in practice, it mostly works
|
|
|
|
// to use it with non-atomic loads and stores to get acquire/release
|
|
|
|
// semantics.
|
|
|
|
Builder.CreateFence(llvm::SequentiallyConsistent);
|
2014-05-21 13:09:00 +08:00
|
|
|
return RValue::get(nullptr);
|
2009-05-13 12:46:13 +08:00
|
|
|
}
|
2009-09-09 23:08:12 +08:00
|
|
|
|
2015-09-09 07:52:33 +08:00
|
|
|
case Builtin::BI__builtin_nontemporal_load:
|
|
|
|
return RValue::get(EmitNontemporalLoad(*this, E));
|
|
|
|
case Builtin::BI__builtin_nontemporal_store:
|
|
|
|
return RValue::get(EmitNontemporalStore(*this, E));
|
Implement the missing pieces needed to support libstdc++4.7's <atomic>:
__atomic_test_and_set, __atomic_clear, plus a pile of undocumented __GCC_*
predefined macros.
Implement library fallback for __atomic_is_lock_free and
__c11_atomic_is_lock_free, and implement __atomic_always_lock_free.
Contrary to their documentation, GCC's __atomic_fetch_add family don't
multiply the operand by sizeof(T) when operating on a pointer type.
libstdc++ relies on this quirk. Remove this handling for all but the
__c11_atomic_fetch_add and __c11_atomic_fetch_sub builtins.
Contrary to their documentation, __atomic_test_and_set and __atomic_clear
take a first argument of type 'volatile void *', not 'void *' or 'bool *',
and __atomic_is_lock_free and __atomic_always_lock_free have an argument
of type 'const volatile void *', not 'void *'.
With this change, libstdc++4.7's <atomic> passes libc++'s atomic test suite,
except for a couple of libstdc++ bugs and some cases where libc++'s test
suite tests for properties which implementations have latitude to vary.
llvm-svn: 154640
2012-04-13 08:45:38 +08:00
|
|
|
case Builtin::BI__c11_atomic_is_lock_free:
|
|
|
|
case Builtin::BI__atomic_is_lock_free: {
|
|
|
|
// Call "bool __atomic_is_lock_free(size_t size, void *ptr)". For the
|
|
|
|
// __c11 builtin, ptr is 0 (indicating a properly-aligned object), since
|
|
|
|
// _Atomic(T) is always properly-aligned.
|
|
|
|
const char *LibCallName = "__atomic_is_lock_free";
|
|
|
|
CallArgList Args;
|
|
|
|
Args.add(RValue::get(EmitScalarExpr(E->getArg(0))),
|
|
|
|
getContext().getSizeType());
|
|
|
|
if (BuiltinID == Builtin::BI__atomic_is_lock_free)
|
|
|
|
Args.add(RValue::get(EmitScalarExpr(E->getArg(1))),
|
|
|
|
getContext().VoidPtrTy);
|
|
|
|
else
|
|
|
|
Args.add(RValue::get(llvm::Constant::getNullValue(VoidPtrTy)),
|
|
|
|
getContext().VoidPtrTy);
|
|
|
|
const CGFunctionInfo &FuncInfo =
|
2012-07-07 14:41:13 +08:00
|
|
|
CGM.getTypes().arrangeFreeFunctionCall(E->getType(), Args,
|
|
|
|
FunctionType::ExtInfo(),
|
|
|
|
RequiredArgs::All);
|
Implement the missing pieces needed to support libstdc++4.7's <atomic>:
__atomic_test_and_set, __atomic_clear, plus a pile of undocumented __GCC_*
predefined macros.
Implement library fallback for __atomic_is_lock_free and
__c11_atomic_is_lock_free, and implement __atomic_always_lock_free.
Contrary to their documentation, GCC's __atomic_fetch_add family don't
multiply the operand by sizeof(T) when operating on a pointer type.
libstdc++ relies on this quirk. Remove this handling for all but the
__c11_atomic_fetch_add and __c11_atomic_fetch_sub builtins.
Contrary to their documentation, __atomic_test_and_set and __atomic_clear
take a first argument of type 'volatile void *', not 'void *' or 'bool *',
and __atomic_is_lock_free and __atomic_always_lock_free have an argument
of type 'const volatile void *', not 'void *'.
With this change, libstdc++4.7's <atomic> passes libc++'s atomic test suite,
except for a couple of libstdc++ bugs and some cases where libc++'s test
suite tests for properties which implementations have latitude to vary.
llvm-svn: 154640
2012-04-13 08:45:38 +08:00
|
|
|
llvm::FunctionType *FTy = CGM.getTypes().GetFunctionType(FuncInfo);
|
|
|
|
llvm::Constant *Func = CGM.CreateRuntimeFunction(FTy, LibCallName);
|
|
|
|
return EmitCall(FuncInfo, Func, ReturnValueSlot(), Args);
|
|
|
|
}
|
|
|
|
|
|
|
|
case Builtin::BI__atomic_test_and_set: {
|
|
|
|
// Look at the argument type to determine whether this is a volatile
|
|
|
|
// operation. The parameter type is always volatile.
|
|
|
|
QualType PtrTy = E->getArg(0)->IgnoreImpCasts()->getType();
|
|
|
|
bool Volatile =
|
|
|
|
PtrTy->castAs<PointerType>()->getPointeeType().isVolatileQualified();
|
|
|
|
|
|
|
|
Value *Ptr = EmitScalarExpr(E->getArg(0));
|
2012-10-25 23:39:14 +08:00
|
|
|
unsigned AddrSpace = Ptr->getType()->getPointerAddressSpace();
|
Implement the missing pieces needed to support libstdc++4.7's <atomic>:
__atomic_test_and_set, __atomic_clear, plus a pile of undocumented __GCC_*
predefined macros.
Implement library fallback for __atomic_is_lock_free and
__c11_atomic_is_lock_free, and implement __atomic_always_lock_free.
Contrary to their documentation, GCC's __atomic_fetch_add family don't
multiply the operand by sizeof(T) when operating on a pointer type.
libstdc++ relies on this quirk. Remove this handling for all but the
__c11_atomic_fetch_add and __c11_atomic_fetch_sub builtins.
Contrary to their documentation, __atomic_test_and_set and __atomic_clear
take a first argument of type 'volatile void *', not 'void *' or 'bool *',
and __atomic_is_lock_free and __atomic_always_lock_free have an argument
of type 'const volatile void *', not 'void *'.
With this change, libstdc++4.7's <atomic> passes libc++'s atomic test suite,
except for a couple of libstdc++ bugs and some cases where libc++'s test
suite tests for properties which implementations have latitude to vary.
llvm-svn: 154640
2012-04-13 08:45:38 +08:00
|
|
|
Ptr = Builder.CreateBitCast(Ptr, Int8Ty->getPointerTo(AddrSpace));
|
|
|
|
Value *NewVal = Builder.getInt8(1);
|
|
|
|
Value *Order = EmitScalarExpr(E->getArg(1));
|
|
|
|
if (isa<llvm::ConstantInt>(Order)) {
|
|
|
|
int ord = cast<llvm::ConstantInt>(Order)->getZExtValue();
|
2014-05-21 13:09:00 +08:00
|
|
|
AtomicRMWInst *Result = nullptr;
|
Implement the missing pieces needed to support libstdc++4.7's <atomic>:
__atomic_test_and_set, __atomic_clear, plus a pile of undocumented __GCC_*
predefined macros.
Implement library fallback for __atomic_is_lock_free and
__c11_atomic_is_lock_free, and implement __atomic_always_lock_free.
Contrary to their documentation, GCC's __atomic_fetch_add family don't
multiply the operand by sizeof(T) when operating on a pointer type.
libstdc++ relies on this quirk. Remove this handling for all but the
__c11_atomic_fetch_add and __c11_atomic_fetch_sub builtins.
Contrary to their documentation, __atomic_test_and_set and __atomic_clear
take a first argument of type 'volatile void *', not 'void *' or 'bool *',
and __atomic_is_lock_free and __atomic_always_lock_free have an argument
of type 'const volatile void *', not 'void *'.
With this change, libstdc++4.7's <atomic> passes libc++'s atomic test suite,
except for a couple of libstdc++ bugs and some cases where libc++'s test
suite tests for properties which implementations have latitude to vary.
llvm-svn: 154640
2012-04-13 08:45:38 +08:00
|
|
|
switch (ord) {
|
|
|
|
case 0: // memory_order_relaxed
|
|
|
|
default: // invalid order
|
|
|
|
Result = Builder.CreateAtomicRMW(llvm::AtomicRMWInst::Xchg,
|
|
|
|
Ptr, NewVal,
|
|
|
|
llvm::Monotonic);
|
|
|
|
break;
|
|
|
|
case 1: // memory_order_consume
|
|
|
|
case 2: // memory_order_acquire
|
|
|
|
Result = Builder.CreateAtomicRMW(llvm::AtomicRMWInst::Xchg,
|
|
|
|
Ptr, NewVal,
|
|
|
|
llvm::Acquire);
|
|
|
|
break;
|
|
|
|
case 3: // memory_order_release
|
|
|
|
Result = Builder.CreateAtomicRMW(llvm::AtomicRMWInst::Xchg,
|
|
|
|
Ptr, NewVal,
|
|
|
|
llvm::Release);
|
|
|
|
break;
|
|
|
|
case 4: // memory_order_acq_rel
|
|
|
|
Result = Builder.CreateAtomicRMW(llvm::AtomicRMWInst::Xchg,
|
|
|
|
Ptr, NewVal,
|
|
|
|
llvm::AcquireRelease);
|
|
|
|
break;
|
|
|
|
case 5: // memory_order_seq_cst
|
|
|
|
Result = Builder.CreateAtomicRMW(llvm::AtomicRMWInst::Xchg,
|
|
|
|
Ptr, NewVal,
|
|
|
|
llvm::SequentiallyConsistent);
|
|
|
|
break;
|
|
|
|
}
|
|
|
|
Result->setVolatile(Volatile);
|
|
|
|
return RValue::get(Builder.CreateIsNotNull(Result, "tobool"));
|
|
|
|
}
|
|
|
|
|
|
|
|
llvm::BasicBlock *ContBB = createBasicBlock("atomic.continue", CurFn);
|
|
|
|
|
|
|
|
llvm::BasicBlock *BBs[5] = {
|
|
|
|
createBasicBlock("monotonic", CurFn),
|
|
|
|
createBasicBlock("acquire", CurFn),
|
|
|
|
createBasicBlock("release", CurFn),
|
|
|
|
createBasicBlock("acqrel", CurFn),
|
|
|
|
createBasicBlock("seqcst", CurFn)
|
|
|
|
};
|
|
|
|
llvm::AtomicOrdering Orders[5] = {
|
|
|
|
llvm::Monotonic, llvm::Acquire, llvm::Release,
|
|
|
|
llvm::AcquireRelease, llvm::SequentiallyConsistent
|
|
|
|
};
|
|
|
|
|
|
|
|
Order = Builder.CreateIntCast(Order, Builder.getInt32Ty(), false);
|
|
|
|
llvm::SwitchInst *SI = Builder.CreateSwitch(Order, BBs[0]);
|
|
|
|
|
|
|
|
Builder.SetInsertPoint(ContBB);
|
|
|
|
PHINode *Result = Builder.CreatePHI(Int8Ty, 5, "was_set");
|
|
|
|
|
|
|
|
for (unsigned i = 0; i < 5; ++i) {
|
|
|
|
Builder.SetInsertPoint(BBs[i]);
|
|
|
|
AtomicRMWInst *RMW = Builder.CreateAtomicRMW(llvm::AtomicRMWInst::Xchg,
|
|
|
|
Ptr, NewVal, Orders[i]);
|
|
|
|
RMW->setVolatile(Volatile);
|
|
|
|
Result->addIncoming(RMW, BBs[i]);
|
|
|
|
Builder.CreateBr(ContBB);
|
|
|
|
}
|
|
|
|
|
|
|
|
SI->addCase(Builder.getInt32(0), BBs[0]);
|
|
|
|
SI->addCase(Builder.getInt32(1), BBs[1]);
|
|
|
|
SI->addCase(Builder.getInt32(2), BBs[1]);
|
|
|
|
SI->addCase(Builder.getInt32(3), BBs[2]);
|
|
|
|
SI->addCase(Builder.getInt32(4), BBs[3]);
|
|
|
|
SI->addCase(Builder.getInt32(5), BBs[4]);
|
|
|
|
|
|
|
|
Builder.SetInsertPoint(ContBB);
|
|
|
|
return RValue::get(Builder.CreateIsNotNull(Result, "tobool"));
|
|
|
|
}
|
|
|
|
|
|
|
|
case Builtin::BI__atomic_clear: {
|
|
|
|
QualType PtrTy = E->getArg(0)->IgnoreImpCasts()->getType();
|
|
|
|
bool Volatile =
|
|
|
|
PtrTy->castAs<PointerType>()->getPointeeType().isVolatileQualified();
|
|
|
|
|
Compute and preserve alignment more faithfully in IR-generation.
Introduce an Address type to bundle a pointer value with an
alignment. Introduce APIs on CGBuilderTy to work with Address
values. Change core APIs on CGF/CGM to traffic in Address where
appropriate. Require alignments to be non-zero. Update a ton
of code to compute and propagate alignment information.
As part of this, I've promoted CGBuiltin's EmitPointerWithAlignment
helper function to CGF and made use of it in a number of places in
the expression emitter.
The end result is that we should now be significantly more correct
when performing operations on objects that are locally known to
be under-aligned. Since alignment is not reliably tracked in the
type system, there are inherent limits to this, but at least we
are no longer confused by standard operations like derived-to-base
conversions and array-to-pointer decay. I've also fixed a large
number of bugs where we were applying the complete-object alignment
to a pointer instead of the non-virtual alignment, although most of
these were hidden by the very conservative approach we took with
member alignment.
Also, because IRGen now reliably asserts on zero alignments, we
should no longer be subject to an absurd but frustrating recurring
bug where an incomplete type would report a zero alignment and then
we'd naively do a alignmentAtOffset on it and emit code using an
alignment equal to the largest power-of-two factor of the offset.
We should also now be emitting much more aggressive alignment
attributes in the presence of over-alignment. In particular,
field access now uses alignmentAtOffset instead of min.
Several times in this patch, I had to change the existing
code-generation pattern in order to more effectively use
the Address APIs. For the most part, this seems to be a strict
improvement, like doing pointer arithmetic with GEPs instead of
ptrtoint. That said, I've tried very hard to not change semantics,
but it is likely that I've failed in a few places, for which I
apologize.
ABIArgInfo now always carries the assumed alignment of indirect and
indirect byval arguments. In order to cut down on what was already
a dauntingly large patch, I changed the code to never set align
attributes in the IR on non-byval indirect arguments. That is,
we still generate code which assumes that indirect arguments have
the given alignment, but we don't express this information to the
backend except where it's semantically required (i.e. on byvals).
This is likely a minor regression for those targets that did provide
this information, but it'll be trivial to add it back in a later
patch.
I partially punted on applying this work to CGBuiltin. Please
do not add more uses of the CreateDefaultAligned{Load,Store}
APIs; they will be going away eventually.
llvm-svn: 246985
2015-09-08 16:05:57 +08:00
|
|
|
Address Ptr = EmitPointerWithAlignment(E->getArg(0));
|
|
|
|
unsigned AddrSpace = Ptr.getPointer()->getType()->getPointerAddressSpace();
|
Implement the missing pieces needed to support libstdc++4.7's <atomic>:
__atomic_test_and_set, __atomic_clear, plus a pile of undocumented __GCC_*
predefined macros.
Implement library fallback for __atomic_is_lock_free and
__c11_atomic_is_lock_free, and implement __atomic_always_lock_free.
Contrary to their documentation, GCC's __atomic_fetch_add family don't
multiply the operand by sizeof(T) when operating on a pointer type.
libstdc++ relies on this quirk. Remove this handling for all but the
__c11_atomic_fetch_add and __c11_atomic_fetch_sub builtins.
Contrary to their documentation, __atomic_test_and_set and __atomic_clear
take a first argument of type 'volatile void *', not 'void *' or 'bool *',
and __atomic_is_lock_free and __atomic_always_lock_free have an argument
of type 'const volatile void *', not 'void *'.
With this change, libstdc++4.7's <atomic> passes libc++'s atomic test suite,
except for a couple of libstdc++ bugs and some cases where libc++'s test
suite tests for properties which implementations have latitude to vary.
llvm-svn: 154640
2012-04-13 08:45:38 +08:00
|
|
|
Ptr = Builder.CreateBitCast(Ptr, Int8Ty->getPointerTo(AddrSpace));
|
|
|
|
Value *NewVal = Builder.getInt8(0);
|
|
|
|
Value *Order = EmitScalarExpr(E->getArg(1));
|
|
|
|
if (isa<llvm::ConstantInt>(Order)) {
|
|
|
|
int ord = cast<llvm::ConstantInt>(Order)->getZExtValue();
|
|
|
|
StoreInst *Store = Builder.CreateStore(NewVal, Ptr, Volatile);
|
|
|
|
switch (ord) {
|
|
|
|
case 0: // memory_order_relaxed
|
|
|
|
default: // invalid order
|
|
|
|
Store->setOrdering(llvm::Monotonic);
|
|
|
|
break;
|
|
|
|
case 3: // memory_order_release
|
|
|
|
Store->setOrdering(llvm::Release);
|
|
|
|
break;
|
|
|
|
case 5: // memory_order_seq_cst
|
|
|
|
Store->setOrdering(llvm::SequentiallyConsistent);
|
|
|
|
break;
|
|
|
|
}
|
2014-05-21 13:09:00 +08:00
|
|
|
return RValue::get(nullptr);
|
Implement the missing pieces needed to support libstdc++4.7's <atomic>:
__atomic_test_and_set, __atomic_clear, plus a pile of undocumented __GCC_*
predefined macros.
Implement library fallback for __atomic_is_lock_free and
__c11_atomic_is_lock_free, and implement __atomic_always_lock_free.
Contrary to their documentation, GCC's __atomic_fetch_add family don't
multiply the operand by sizeof(T) when operating on a pointer type.
libstdc++ relies on this quirk. Remove this handling for all but the
__c11_atomic_fetch_add and __c11_atomic_fetch_sub builtins.
Contrary to their documentation, __atomic_test_and_set and __atomic_clear
take a first argument of type 'volatile void *', not 'void *' or 'bool *',
and __atomic_is_lock_free and __atomic_always_lock_free have an argument
of type 'const volatile void *', not 'void *'.
With this change, libstdc++4.7's <atomic> passes libc++'s atomic test suite,
except for a couple of libstdc++ bugs and some cases where libc++'s test
suite tests for properties which implementations have latitude to vary.
llvm-svn: 154640
2012-04-13 08:45:38 +08:00
|
|
|
}
|
|
|
|
|
|
|
|
llvm::BasicBlock *ContBB = createBasicBlock("atomic.continue", CurFn);
|
|
|
|
|
|
|
|
llvm::BasicBlock *BBs[3] = {
|
|
|
|
createBasicBlock("monotonic", CurFn),
|
|
|
|
createBasicBlock("release", CurFn),
|
|
|
|
createBasicBlock("seqcst", CurFn)
|
|
|
|
};
|
|
|
|
llvm::AtomicOrdering Orders[3] = {
|
|
|
|
llvm::Monotonic, llvm::Release, llvm::SequentiallyConsistent
|
|
|
|
};
|
|
|
|
|
|
|
|
Order = Builder.CreateIntCast(Order, Builder.getInt32Ty(), false);
|
|
|
|
llvm::SwitchInst *SI = Builder.CreateSwitch(Order, BBs[0]);
|
|
|
|
|
|
|
|
for (unsigned i = 0; i < 3; ++i) {
|
|
|
|
Builder.SetInsertPoint(BBs[i]);
|
|
|
|
StoreInst *Store = Builder.CreateStore(NewVal, Ptr, Volatile);
|
|
|
|
Store->setOrdering(Orders[i]);
|
|
|
|
Builder.CreateBr(ContBB);
|
|
|
|
}
|
|
|
|
|
|
|
|
SI->addCase(Builder.getInt32(0), BBs[0]);
|
|
|
|
SI->addCase(Builder.getInt32(3), BBs[1]);
|
|
|
|
SI->addCase(Builder.getInt32(5), BBs[2]);
|
|
|
|
|
|
|
|
Builder.SetInsertPoint(ContBB);
|
2014-05-21 13:09:00 +08:00
|
|
|
return RValue::get(nullptr);
|
Implement the missing pieces needed to support libstdc++4.7's <atomic>:
__atomic_test_and_set, __atomic_clear, plus a pile of undocumented __GCC_*
predefined macros.
Implement library fallback for __atomic_is_lock_free and
__c11_atomic_is_lock_free, and implement __atomic_always_lock_free.
Contrary to their documentation, GCC's __atomic_fetch_add family don't
multiply the operand by sizeof(T) when operating on a pointer type.
libstdc++ relies on this quirk. Remove this handling for all but the
__c11_atomic_fetch_add and __c11_atomic_fetch_sub builtins.
Contrary to their documentation, __atomic_test_and_set and __atomic_clear
take a first argument of type 'volatile void *', not 'void *' or 'bool *',
and __atomic_is_lock_free and __atomic_always_lock_free have an argument
of type 'const volatile void *', not 'void *'.
With this change, libstdc++4.7's <atomic> passes libc++'s atomic test suite,
except for a couple of libstdc++ bugs and some cases where libc++'s test
suite tests for properties which implementations have latitude to vary.
llvm-svn: 154640
2012-04-13 08:45:38 +08:00
|
|
|
}
|
|
|
|
|
2011-10-11 10:20:01 +08:00
|
|
|
case Builtin::BI__atomic_thread_fence:
|
2012-04-12 01:55:32 +08:00
|
|
|
case Builtin::BI__atomic_signal_fence:
|
|
|
|
case Builtin::BI__c11_atomic_thread_fence:
|
|
|
|
case Builtin::BI__c11_atomic_signal_fence: {
|
2011-10-11 10:20:01 +08:00
|
|
|
llvm::SynchronizationScope Scope;
|
2012-04-12 01:55:32 +08:00
|
|
|
if (BuiltinID == Builtin::BI__atomic_signal_fence ||
|
|
|
|
BuiltinID == Builtin::BI__c11_atomic_signal_fence)
|
2011-10-11 10:20:01 +08:00
|
|
|
Scope = llvm::SingleThread;
|
|
|
|
else
|
|
|
|
Scope = llvm::CrossThread;
|
|
|
|
Value *Order = EmitScalarExpr(E->getArg(0));
|
|
|
|
if (isa<llvm::ConstantInt>(Order)) {
|
|
|
|
int ord = cast<llvm::ConstantInt>(Order)->getZExtValue();
|
|
|
|
switch (ord) {
|
|
|
|
case 0: // memory_order_relaxed
|
|
|
|
default: // invalid order
|
|
|
|
break;
|
|
|
|
case 1: // memory_order_consume
|
|
|
|
case 2: // memory_order_acquire
|
|
|
|
Builder.CreateFence(llvm::Acquire, Scope);
|
|
|
|
break;
|
|
|
|
case 3: // memory_order_release
|
|
|
|
Builder.CreateFence(llvm::Release, Scope);
|
|
|
|
break;
|
|
|
|
case 4: // memory_order_acq_rel
|
|
|
|
Builder.CreateFence(llvm::AcquireRelease, Scope);
|
|
|
|
break;
|
|
|
|
case 5: // memory_order_seq_cst
|
|
|
|
Builder.CreateFence(llvm::SequentiallyConsistent, Scope);
|
|
|
|
break;
|
|
|
|
}
|
2014-05-21 13:09:00 +08:00
|
|
|
return RValue::get(nullptr);
|
2011-10-11 10:20:01 +08:00
|
|
|
}
|
|
|
|
|
|
|
|
llvm::BasicBlock *AcquireBB, *ReleaseBB, *AcqRelBB, *SeqCstBB;
|
|
|
|
AcquireBB = createBasicBlock("acquire", CurFn);
|
|
|
|
ReleaseBB = createBasicBlock("release", CurFn);
|
|
|
|
AcqRelBB = createBasicBlock("acqrel", CurFn);
|
|
|
|
SeqCstBB = createBasicBlock("seqcst", CurFn);
|
|
|
|
llvm::BasicBlock *ContBB = createBasicBlock("atomic.continue", CurFn);
|
|
|
|
|
|
|
|
Order = Builder.CreateIntCast(Order, Builder.getInt32Ty(), false);
|
|
|
|
llvm::SwitchInst *SI = Builder.CreateSwitch(Order, ContBB);
|
|
|
|
|
|
|
|
Builder.SetInsertPoint(AcquireBB);
|
|
|
|
Builder.CreateFence(llvm::Acquire, Scope);
|
|
|
|
Builder.CreateBr(ContBB);
|
|
|
|
SI->addCase(Builder.getInt32(1), AcquireBB);
|
|
|
|
SI->addCase(Builder.getInt32(2), AcquireBB);
|
|
|
|
|
|
|
|
Builder.SetInsertPoint(ReleaseBB);
|
|
|
|
Builder.CreateFence(llvm::Release, Scope);
|
|
|
|
Builder.CreateBr(ContBB);
|
|
|
|
SI->addCase(Builder.getInt32(3), ReleaseBB);
|
|
|
|
|
|
|
|
Builder.SetInsertPoint(AcqRelBB);
|
|
|
|
Builder.CreateFence(llvm::AcquireRelease, Scope);
|
|
|
|
Builder.CreateBr(ContBB);
|
|
|
|
SI->addCase(Builder.getInt32(4), AcqRelBB);
|
|
|
|
|
|
|
|
Builder.SetInsertPoint(SeqCstBB);
|
|
|
|
Builder.CreateFence(llvm::SequentiallyConsistent, Scope);
|
|
|
|
Builder.CreateBr(ContBB);
|
|
|
|
SI->addCase(Builder.getInt32(5), SeqCstBB);
|
|
|
|
|
|
|
|
Builder.SetInsertPoint(ContBB);
|
2014-05-21 13:09:00 +08:00
|
|
|
return RValue::get(nullptr);
|
2011-10-11 10:20:01 +08:00
|
|
|
}
|
|
|
|
|
2009-02-17 06:43:43 +08:00
|
|
|
// Library functions with special handling.
|
|
|
|
case Builtin::BIsqrt:
|
|
|
|
case Builtin::BIsqrtf:
|
|
|
|
case Builtin::BIsqrtl: {
|
2013-09-13 07:57:55 +08:00
|
|
|
// Transform a call to sqrt* into a @llvm.sqrt.* intrinsic call, but only
|
|
|
|
// in finite- or unsafe-math mode (the intrinsic has different semantics
|
|
|
|
// for handling negative numbers compared to the library function, so
|
|
|
|
// -fmath-errno=0 is not enough).
|
|
|
|
if (!FD->hasAttr<ConstAttr>())
|
|
|
|
break;
|
|
|
|
if (!(CGM.getCodeGenOpts().UnsafeFPMath ||
|
|
|
|
CGM.getCodeGenOpts().NoNaNsFPMath))
|
|
|
|
break;
|
|
|
|
Value *Arg0 = EmitScalarExpr(E->getArg(0));
|
|
|
|
llvm::Type *ArgType = Arg0->getType();
|
|
|
|
Value *F = CGM.getIntrinsic(Intrinsic::sqrt, ArgType);
|
|
|
|
return RValue::get(Builder.CreateCall(F, Arg0));
|
2009-02-17 06:43:43 +08:00
|
|
|
}
|
|
|
|
|
2015-02-05 08:18:01 +08:00
|
|
|
case Builtin::BI__builtin_pow:
|
|
|
|
case Builtin::BI__builtin_powf:
|
|
|
|
case Builtin::BI__builtin_powl:
|
2009-02-17 06:43:43 +08:00
|
|
|
case Builtin::BIpow:
|
|
|
|
case Builtin::BIpowf:
|
|
|
|
case Builtin::BIpowl: {
|
2013-07-25 05:22:01 +08:00
|
|
|
// Transform a call to pow* into a @llvm.pow.* intrinsic call.
|
|
|
|
if (!FD->hasAttr<ConstAttr>())
|
|
|
|
break;
|
|
|
|
Value *Base = EmitScalarExpr(E->getArg(0));
|
|
|
|
Value *Exponent = EmitScalarExpr(E->getArg(1));
|
|
|
|
llvm::Type *ArgType = Base->getType();
|
|
|
|
Value *F = CGM.getIntrinsic(Intrinsic::pow, ArgType);
|
2015-05-19 06:14:03 +08:00
|
|
|
return RValue::get(Builder.CreateCall(F, {Base, Exponent}));
|
2009-02-17 06:43:43 +08:00
|
|
|
}
|
2010-03-06 10:17:52 +08:00
|
|
|
|
2011-07-09 05:39:34 +08:00
|
|
|
case Builtin::BIfma:
|
|
|
|
case Builtin::BIfmaf:
|
|
|
|
case Builtin::BIfmal:
|
|
|
|
case Builtin::BI__builtin_fma:
|
|
|
|
case Builtin::BI__builtin_fmaf:
|
|
|
|
case Builtin::BI__builtin_fmal: {
|
|
|
|
// Rewrite fma to intrinsic.
|
|
|
|
Value *FirstArg = EmitScalarExpr(E->getArg(0));
|
2011-07-10 01:41:47 +08:00
|
|
|
llvm::Type *ArgType = FirstArg->getType();
|
2011-07-15 01:45:50 +08:00
|
|
|
Value *F = CGM.getIntrinsic(Intrinsic::fma, ArgType);
|
2015-05-19 06:14:03 +08:00
|
|
|
return RValue::get(
|
|
|
|
Builder.CreateCall(F, {FirstArg, EmitScalarExpr(E->getArg(1)),
|
|
|
|
EmitScalarExpr(E->getArg(2))}));
|
2011-07-09 05:39:34 +08:00
|
|
|
}
|
|
|
|
|
2010-03-06 10:17:52 +08:00
|
|
|
case Builtin::BI__builtin_signbit:
|
|
|
|
case Builtin::BI__builtin_signbitf:
|
|
|
|
case Builtin::BI__builtin_signbitl: {
|
[Modules] Implement __builtin_isinf_sign in Clang.
Somehow, we never managed to implement this fully. We could constant
fold it like crazy, including constant folding complex arguments, etc.
But if you actually needed to generate code for it, error.
I've implemented it using the somewhat obvious lowering. Happy for
suggestions on a more clever way to lower this.
Now, what you might ask does this have to do with modules? Fun story. So
it turns out that libstdc++ actually uses __builtin_isinf_sign to
implement std::isinf when in C++98 mode, but only inside of a template.
So if we're lucky, and we never instantiate that, everything is good.
But once we try to instantiate that template function, we need this
builtin. All of my customers at least are using C++11 and so they never
hit this code path.
But what does that have to do with modules? Fun story. So it turns out
that with modules we actually observe a bunch of bugs in libstdc++ where
their <cmath> header clobbers things exposed by <math.h>. To fix these,
we have to provide global function definitions to replace the macros
that C99 would have used. And it turns out that ::isinf needs to be
implemented using the exact semantics used by the C++98 variant of
std::isinf. And so I started to fix this bug in libstdc++ and ceased to
be able to compile libstdc++ with Clang.
The yaks are legion.
llvm-svn: 232778
2015-03-20 06:39:51 +08:00
|
|
|
return RValue::get(
|
|
|
|
Builder.CreateZExt(EmitSignBit(*this, EmitScalarExpr(E->getArg(0))),
|
|
|
|
ConvertType(E->getType())));
|
2010-03-06 10:17:52 +08:00
|
|
|
}
|
2011-09-10 06:41:49 +08:00
|
|
|
case Builtin::BI__builtin_annotation: {
|
|
|
|
llvm::Value *AnnVal = EmitScalarExpr(E->getArg(0));
|
|
|
|
llvm::Value *F = CGM.getIntrinsic(llvm::Intrinsic::annotation,
|
|
|
|
AnnVal->getType());
|
|
|
|
|
|
|
|
// Get the annotation string, go through casts. Sema requires this to be a
|
|
|
|
// non-wide string literal, potentially casted, so the cast<> is safe.
|
|
|
|
const Expr *AnnotationStrExpr = E->getArg(1)->IgnoreParenCasts();
|
2013-01-13 03:30:44 +08:00
|
|
|
StringRef Str = cast<StringLiteral>(AnnotationStrExpr)->getString();
|
2011-09-10 06:41:49 +08:00
|
|
|
return RValue::get(EmitAnnotationCall(F, AnnVal, Str, E->getExprLoc()));
|
|
|
|
}
|
2013-06-19 04:40:40 +08:00
|
|
|
case Builtin::BI__builtin_addcb:
|
2013-01-13 10:22:39 +08:00
|
|
|
case Builtin::BI__builtin_addcs:
|
|
|
|
case Builtin::BI__builtin_addc:
|
|
|
|
case Builtin::BI__builtin_addcl:
|
2013-01-15 05:44:30 +08:00
|
|
|
case Builtin::BI__builtin_addcll:
|
2013-06-19 04:40:40 +08:00
|
|
|
case Builtin::BI__builtin_subcb:
|
2013-01-15 05:44:30 +08:00
|
|
|
case Builtin::BI__builtin_subcs:
|
|
|
|
case Builtin::BI__builtin_subc:
|
|
|
|
case Builtin::BI__builtin_subcl:
|
|
|
|
case Builtin::BI__builtin_subcll: {
|
2013-01-13 10:22:39 +08:00
|
|
|
|
|
|
|
// We translate all of these builtins from expressions of the form:
|
|
|
|
// int x = ..., y = ..., carryin = ..., carryout, result;
|
|
|
|
// result = __builtin_addc(x, y, carryin, &carryout);
|
|
|
|
//
|
|
|
|
// to LLVM IR of the form:
|
|
|
|
//
|
|
|
|
// %tmp1 = call {i32, i1} @llvm.uadd.with.overflow.i32(i32 %x, i32 %y)
|
|
|
|
// %tmpsum1 = extractvalue {i32, i1} %tmp1, 0
|
|
|
|
// %carry1 = extractvalue {i32, i1} %tmp1, 1
|
|
|
|
// %tmp2 = call {i32, i1} @llvm.uadd.with.overflow.i32(i32 %tmpsum1,
|
|
|
|
// i32 %carryin)
|
|
|
|
// %result = extractvalue {i32, i1} %tmp2, 0
|
|
|
|
// %carry2 = extractvalue {i32, i1} %tmp2, 1
|
|
|
|
// %tmp3 = or i1 %carry1, %carry2
|
|
|
|
// %tmp4 = zext i1 %tmp3 to i32
|
|
|
|
// store i32 %tmp4, i32* %carryout
|
|
|
|
|
|
|
|
// Scalarize our inputs.
|
|
|
|
llvm::Value *X = EmitScalarExpr(E->getArg(0));
|
|
|
|
llvm::Value *Y = EmitScalarExpr(E->getArg(1));
|
|
|
|
llvm::Value *Carryin = EmitScalarExpr(E->getArg(2));
|
Compute and preserve alignment more faithfully in IR-generation.
Introduce an Address type to bundle a pointer value with an
alignment. Introduce APIs on CGBuilderTy to work with Address
values. Change core APIs on CGF/CGM to traffic in Address where
appropriate. Require alignments to be non-zero. Update a ton
of code to compute and propagate alignment information.
As part of this, I've promoted CGBuiltin's EmitPointerWithAlignment
helper function to CGF and made use of it in a number of places in
the expression emitter.
The end result is that we should now be significantly more correct
when performing operations on objects that are locally known to
be under-aligned. Since alignment is not reliably tracked in the
type system, there are inherent limits to this, but at least we
are no longer confused by standard operations like derived-to-base
conversions and array-to-pointer decay. I've also fixed a large
number of bugs where we were applying the complete-object alignment
to a pointer instead of the non-virtual alignment, although most of
these were hidden by the very conservative approach we took with
member alignment.
Also, because IRGen now reliably asserts on zero alignments, we
should no longer be subject to an absurd but frustrating recurring
bug where an incomplete type would report a zero alignment and then
we'd naively do a alignmentAtOffset on it and emit code using an
alignment equal to the largest power-of-two factor of the offset.
We should also now be emitting much more aggressive alignment
attributes in the presence of over-alignment. In particular,
field access now uses alignmentAtOffset instead of min.
Several times in this patch, I had to change the existing
code-generation pattern in order to more effectively use
the Address APIs. For the most part, this seems to be a strict
improvement, like doing pointer arithmetic with GEPs instead of
ptrtoint. That said, I've tried very hard to not change semantics,
but it is likely that I've failed in a few places, for which I
apologize.
ABIArgInfo now always carries the assumed alignment of indirect and
indirect byval arguments. In order to cut down on what was already
a dauntingly large patch, I changed the code to never set align
attributes in the IR on non-byval indirect arguments. That is,
we still generate code which assumes that indirect arguments have
the given alignment, but we don't express this information to the
backend except where it's semantically required (i.e. on byvals).
This is likely a minor regression for those targets that did provide
this information, but it'll be trivial to add it back in a later
patch.
I partially punted on applying this work to CGBuiltin. Please
do not add more uses of the CreateDefaultAligned{Load,Store}
APIs; they will be going away eventually.
llvm-svn: 246985
2015-09-08 16:05:57 +08:00
|
|
|
Address CarryOutPtr = EmitPointerWithAlignment(E->getArg(3));
|
2013-01-13 10:22:39 +08:00
|
|
|
|
2013-01-15 05:44:30 +08:00
|
|
|
// Decide if we are lowering to a uadd.with.overflow or usub.with.overflow.
|
|
|
|
llvm::Intrinsic::ID IntrinsicId;
|
|
|
|
switch (BuiltinID) {
|
|
|
|
default: llvm_unreachable("Unknown multiprecision builtin id.");
|
2013-06-19 04:40:40 +08:00
|
|
|
case Builtin::BI__builtin_addcb:
|
2013-01-15 05:44:30 +08:00
|
|
|
case Builtin::BI__builtin_addcs:
|
|
|
|
case Builtin::BI__builtin_addc:
|
|
|
|
case Builtin::BI__builtin_addcl:
|
|
|
|
case Builtin::BI__builtin_addcll:
|
|
|
|
IntrinsicId = llvm::Intrinsic::uadd_with_overflow;
|
|
|
|
break;
|
2013-06-19 04:40:40 +08:00
|
|
|
case Builtin::BI__builtin_subcb:
|
2013-01-15 05:44:30 +08:00
|
|
|
case Builtin::BI__builtin_subcs:
|
|
|
|
case Builtin::BI__builtin_subc:
|
|
|
|
case Builtin::BI__builtin_subcl:
|
|
|
|
case Builtin::BI__builtin_subcll:
|
|
|
|
IntrinsicId = llvm::Intrinsic::usub_with_overflow;
|
|
|
|
break;
|
|
|
|
}
|
2013-01-13 10:22:39 +08:00
|
|
|
|
|
|
|
// Construct our resulting LLVM IR expression.
|
|
|
|
llvm::Value *Carry1;
|
|
|
|
llvm::Value *Sum1 = EmitOverflowIntrinsic(*this, IntrinsicId,
|
|
|
|
X, Y, Carry1);
|
|
|
|
llvm::Value *Carry2;
|
|
|
|
llvm::Value *Sum2 = EmitOverflowIntrinsic(*this, IntrinsicId,
|
|
|
|
Sum1, Carryin, Carry2);
|
|
|
|
llvm::Value *CarryOut = Builder.CreateZExt(Builder.CreateOr(Carry1, Carry2),
|
|
|
|
X->getType());
|
Compute and preserve alignment more faithfully in IR-generation.
Introduce an Address type to bundle a pointer value with an
alignment. Introduce APIs on CGBuilderTy to work with Address
values. Change core APIs on CGF/CGM to traffic in Address where
appropriate. Require alignments to be non-zero. Update a ton
of code to compute and propagate alignment information.
As part of this, I've promoted CGBuiltin's EmitPointerWithAlignment
helper function to CGF and made use of it in a number of places in
the expression emitter.
The end result is that we should now be significantly more correct
when performing operations on objects that are locally known to
be under-aligned. Since alignment is not reliably tracked in the
type system, there are inherent limits to this, but at least we
are no longer confused by standard operations like derived-to-base
conversions and array-to-pointer decay. I've also fixed a large
number of bugs where we were applying the complete-object alignment
to a pointer instead of the non-virtual alignment, although most of
these were hidden by the very conservative approach we took with
member alignment.
Also, because IRGen now reliably asserts on zero alignments, we
should no longer be subject to an absurd but frustrating recurring
bug where an incomplete type would report a zero alignment and then
we'd naively do a alignmentAtOffset on it and emit code using an
alignment equal to the largest power-of-two factor of the offset.
We should also now be emitting much more aggressive alignment
attributes in the presence of over-alignment. In particular,
field access now uses alignmentAtOffset instead of min.
Several times in this patch, I had to change the existing
code-generation pattern in order to more effectively use
the Address APIs. For the most part, this seems to be a strict
improvement, like doing pointer arithmetic with GEPs instead of
ptrtoint. That said, I've tried very hard to not change semantics,
but it is likely that I've failed in a few places, for which I
apologize.
ABIArgInfo now always carries the assumed alignment of indirect and
indirect byval arguments. In order to cut down on what was already
a dauntingly large patch, I changed the code to never set align
attributes in the IR on non-byval indirect arguments. That is,
we still generate code which assumes that indirect arguments have
the given alignment, but we don't express this information to the
backend except where it's semantically required (i.e. on byvals).
This is likely a minor regression for those targets that did provide
this information, but it'll be trivial to add it back in a later
patch.
I partially punted on applying this work to CGBuiltin. Please
do not add more uses of the CreateDefaultAligned{Load,Store}
APIs; they will be going away eventually.
llvm-svn: 246985
2015-09-08 16:05:57 +08:00
|
|
|
Builder.CreateStore(CarryOut, CarryOutPtr);
|
2013-01-13 10:22:39 +08:00
|
|
|
return RValue::get(Sum2);
|
|
|
|
}
|
2013-06-21 07:28:10 +08:00
|
|
|
case Builtin::BI__builtin_uadd_overflow:
|
|
|
|
case Builtin::BI__builtin_uaddl_overflow:
|
|
|
|
case Builtin::BI__builtin_uaddll_overflow:
|
|
|
|
case Builtin::BI__builtin_usub_overflow:
|
|
|
|
case Builtin::BI__builtin_usubl_overflow:
|
|
|
|
case Builtin::BI__builtin_usubll_overflow:
|
|
|
|
case Builtin::BI__builtin_umul_overflow:
|
|
|
|
case Builtin::BI__builtin_umull_overflow:
|
|
|
|
case Builtin::BI__builtin_umulll_overflow:
|
|
|
|
case Builtin::BI__builtin_sadd_overflow:
|
|
|
|
case Builtin::BI__builtin_saddl_overflow:
|
|
|
|
case Builtin::BI__builtin_saddll_overflow:
|
|
|
|
case Builtin::BI__builtin_ssub_overflow:
|
|
|
|
case Builtin::BI__builtin_ssubl_overflow:
|
|
|
|
case Builtin::BI__builtin_ssubll_overflow:
|
|
|
|
case Builtin::BI__builtin_smul_overflow:
|
|
|
|
case Builtin::BI__builtin_smull_overflow:
|
|
|
|
case Builtin::BI__builtin_smulll_overflow: {
|
|
|
|
|
|
|
|
// We translate all of these builtins directly to the relevant llvm IR node.
|
|
|
|
|
|
|
|
// Scalarize our inputs.
|
|
|
|
llvm::Value *X = EmitScalarExpr(E->getArg(0));
|
|
|
|
llvm::Value *Y = EmitScalarExpr(E->getArg(1));
|
Compute and preserve alignment more faithfully in IR-generation.
Introduce an Address type to bundle a pointer value with an
alignment. Introduce APIs on CGBuilderTy to work with Address
values. Change core APIs on CGF/CGM to traffic in Address where
appropriate. Require alignments to be non-zero. Update a ton
of code to compute and propagate alignment information.
As part of this, I've promoted CGBuiltin's EmitPointerWithAlignment
helper function to CGF and made use of it in a number of places in
the expression emitter.
The end result is that we should now be significantly more correct
when performing operations on objects that are locally known to
be under-aligned. Since alignment is not reliably tracked in the
type system, there are inherent limits to this, but at least we
are no longer confused by standard operations like derived-to-base
conversions and array-to-pointer decay. I've also fixed a large
number of bugs where we were applying the complete-object alignment
to a pointer instead of the non-virtual alignment, although most of
these were hidden by the very conservative approach we took with
member alignment.
Also, because IRGen now reliably asserts on zero alignments, we
should no longer be subject to an absurd but frustrating recurring
bug where an incomplete type would report a zero alignment and then
we'd naively do a alignmentAtOffset on it and emit code using an
alignment equal to the largest power-of-two factor of the offset.
We should also now be emitting much more aggressive alignment
attributes in the presence of over-alignment. In particular,
field access now uses alignmentAtOffset instead of min.
Several times in this patch, I had to change the existing
code-generation pattern in order to more effectively use
the Address APIs. For the most part, this seems to be a strict
improvement, like doing pointer arithmetic with GEPs instead of
ptrtoint. That said, I've tried very hard to not change semantics,
but it is likely that I've failed in a few places, for which I
apologize.
ABIArgInfo now always carries the assumed alignment of indirect and
indirect byval arguments. In order to cut down on what was already
a dauntingly large patch, I changed the code to never set align
attributes in the IR on non-byval indirect arguments. That is,
we still generate code which assumes that indirect arguments have
the given alignment, but we don't express this information to the
backend except where it's semantically required (i.e. on byvals).
This is likely a minor regression for those targets that did provide
this information, but it'll be trivial to add it back in a later
patch.
I partially punted on applying this work to CGBuiltin. Please
do not add more uses of the CreateDefaultAligned{Load,Store}
APIs; they will be going away eventually.
llvm-svn: 246985
2015-09-08 16:05:57 +08:00
|
|
|
Address SumOutPtr = EmitPointerWithAlignment(E->getArg(2));
|
2013-06-21 07:28:10 +08:00
|
|
|
|
|
|
|
// Decide which of the overflow intrinsics we are lowering to:
|
|
|
|
llvm::Intrinsic::ID IntrinsicId;
|
|
|
|
switch (BuiltinID) {
|
|
|
|
default: llvm_unreachable("Unknown security overflow builtin id.");
|
|
|
|
case Builtin::BI__builtin_uadd_overflow:
|
|
|
|
case Builtin::BI__builtin_uaddl_overflow:
|
|
|
|
case Builtin::BI__builtin_uaddll_overflow:
|
|
|
|
IntrinsicId = llvm::Intrinsic::uadd_with_overflow;
|
|
|
|
break;
|
|
|
|
case Builtin::BI__builtin_usub_overflow:
|
|
|
|
case Builtin::BI__builtin_usubl_overflow:
|
|
|
|
case Builtin::BI__builtin_usubll_overflow:
|
|
|
|
IntrinsicId = llvm::Intrinsic::usub_with_overflow;
|
|
|
|
break;
|
|
|
|
case Builtin::BI__builtin_umul_overflow:
|
|
|
|
case Builtin::BI__builtin_umull_overflow:
|
|
|
|
case Builtin::BI__builtin_umulll_overflow:
|
|
|
|
IntrinsicId = llvm::Intrinsic::umul_with_overflow;
|
|
|
|
break;
|
|
|
|
case Builtin::BI__builtin_sadd_overflow:
|
|
|
|
case Builtin::BI__builtin_saddl_overflow:
|
|
|
|
case Builtin::BI__builtin_saddll_overflow:
|
|
|
|
IntrinsicId = llvm::Intrinsic::sadd_with_overflow;
|
|
|
|
break;
|
|
|
|
case Builtin::BI__builtin_ssub_overflow:
|
|
|
|
case Builtin::BI__builtin_ssubl_overflow:
|
|
|
|
case Builtin::BI__builtin_ssubll_overflow:
|
|
|
|
IntrinsicId = llvm::Intrinsic::ssub_with_overflow;
|
|
|
|
break;
|
|
|
|
case Builtin::BI__builtin_smul_overflow:
|
|
|
|
case Builtin::BI__builtin_smull_overflow:
|
|
|
|
case Builtin::BI__builtin_smulll_overflow:
|
|
|
|
IntrinsicId = llvm::Intrinsic::smul_with_overflow;
|
|
|
|
break;
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
llvm::Value *Carry;
|
|
|
|
llvm::Value *Sum = EmitOverflowIntrinsic(*this, IntrinsicId, X, Y, Carry);
|
Compute and preserve alignment more faithfully in IR-generation.
Introduce an Address type to bundle a pointer value with an
alignment. Introduce APIs on CGBuilderTy to work with Address
values. Change core APIs on CGF/CGM to traffic in Address where
appropriate. Require alignments to be non-zero. Update a ton
of code to compute and propagate alignment information.
As part of this, I've promoted CGBuiltin's EmitPointerWithAlignment
helper function to CGF and made use of it in a number of places in
the expression emitter.
The end result is that we should now be significantly more correct
when performing operations on objects that are locally known to
be under-aligned. Since alignment is not reliably tracked in the
type system, there are inherent limits to this, but at least we
are no longer confused by standard operations like derived-to-base
conversions and array-to-pointer decay. I've also fixed a large
number of bugs where we were applying the complete-object alignment
to a pointer instead of the non-virtual alignment, although most of
these were hidden by the very conservative approach we took with
member alignment.
Also, because IRGen now reliably asserts on zero alignments, we
should no longer be subject to an absurd but frustrating recurring
bug where an incomplete type would report a zero alignment and then
we'd naively do a alignmentAtOffset on it and emit code using an
alignment equal to the largest power-of-two factor of the offset.
We should also now be emitting much more aggressive alignment
attributes in the presence of over-alignment. In particular,
field access now uses alignmentAtOffset instead of min.
Several times in this patch, I had to change the existing
code-generation pattern in order to more effectively use
the Address APIs. For the most part, this seems to be a strict
improvement, like doing pointer arithmetic with GEPs instead of
ptrtoint. That said, I've tried very hard to not change semantics,
but it is likely that I've failed in a few places, for which I
apologize.
ABIArgInfo now always carries the assumed alignment of indirect and
indirect byval arguments. In order to cut down on what was already
a dauntingly large patch, I changed the code to never set align
attributes in the IR on non-byval indirect arguments. That is,
we still generate code which assumes that indirect arguments have
the given alignment, but we don't express this information to the
backend except where it's semantically required (i.e. on byvals).
This is likely a minor regression for those targets that did provide
this information, but it'll be trivial to add it back in a later
patch.
I partially punted on applying this work to CGBuiltin. Please
do not add more uses of the CreateDefaultAligned{Load,Store}
APIs; they will be going away eventually.
llvm-svn: 246985
2015-09-08 16:05:57 +08:00
|
|
|
Builder.CreateStore(Sum, SumOutPtr);
|
2013-06-21 07:28:10 +08:00
|
|
|
|
|
|
|
return RValue::get(Carry);
|
|
|
|
}
|
2013-07-11 10:27:57 +08:00
|
|
|
case Builtin::BI__builtin_addressof:
|
Compute and preserve alignment more faithfully in IR-generation.
Introduce an Address type to bundle a pointer value with an
alignment. Introduce APIs on CGBuilderTy to work with Address
values. Change core APIs on CGF/CGM to traffic in Address where
appropriate. Require alignments to be non-zero. Update a ton
of code to compute and propagate alignment information.
As part of this, I've promoted CGBuiltin's EmitPointerWithAlignment
helper function to CGF and made use of it in a number of places in
the expression emitter.
The end result is that we should now be significantly more correct
when performing operations on objects that are locally known to
be under-aligned. Since alignment is not reliably tracked in the
type system, there are inherent limits to this, but at least we
are no longer confused by standard operations like derived-to-base
conversions and array-to-pointer decay. I've also fixed a large
number of bugs where we were applying the complete-object alignment
to a pointer instead of the non-virtual alignment, although most of
these were hidden by the very conservative approach we took with
member alignment.
Also, because IRGen now reliably asserts on zero alignments, we
should no longer be subject to an absurd but frustrating recurring
bug where an incomplete type would report a zero alignment and then
we'd naively do a alignmentAtOffset on it and emit code using an
alignment equal to the largest power-of-two factor of the offset.
We should also now be emitting much more aggressive alignment
attributes in the presence of over-alignment. In particular,
field access now uses alignmentAtOffset instead of min.
Several times in this patch, I had to change the existing
code-generation pattern in order to more effectively use
the Address APIs. For the most part, this seems to be a strict
improvement, like doing pointer arithmetic with GEPs instead of
ptrtoint. That said, I've tried very hard to not change semantics,
but it is likely that I've failed in a few places, for which I
apologize.
ABIArgInfo now always carries the assumed alignment of indirect and
indirect byval arguments. In order to cut down on what was already
a dauntingly large patch, I changed the code to never set align
attributes in the IR on non-byval indirect arguments. That is,
we still generate code which assumes that indirect arguments have
the given alignment, but we don't express this information to the
backend except where it's semantically required (i.e. on byvals).
This is likely a minor regression for those targets that did provide
this information, but it'll be trivial to add it back in a later
patch.
I partially punted on applying this work to CGBuiltin. Please
do not add more uses of the CreateDefaultAligned{Load,Store}
APIs; they will be going away eventually.
llvm-svn: 246985
2015-09-08 16:05:57 +08:00
|
|
|
return RValue::get(EmitLValue(E->getArg(0)).getPointer());
|
2014-06-04 07:27:44 +08:00
|
|
|
case Builtin::BI__builtin_operator_new:
|
|
|
|
return EmitBuiltinNewDeleteCall(FD->getType()->castAs<FunctionProtoType>(),
|
|
|
|
E->getArg(0), false);
|
|
|
|
case Builtin::BI__builtin_operator_delete:
|
|
|
|
return EmitBuiltinNewDeleteCall(FD->getType()->castAs<FunctionProtoType>(),
|
|
|
|
E->getArg(0), true);
|
2012-10-14 06:30:41 +08:00
|
|
|
case Builtin::BI__noop:
|
2014-07-12 04:22:55 +08:00
|
|
|
// __noop always evaluates to an integer literal zero.
|
|
|
|
return RValue::get(ConstantInt::get(IntTy, 0));
|
2014-12-13 07:41:25 +08:00
|
|
|
case Builtin::BI__builtin_call_with_static_chain: {
|
|
|
|
const CallExpr *Call = cast<CallExpr>(E->getArg(0));
|
|
|
|
const Expr *Chain = E->getArg(1);
|
|
|
|
return EmitCall(Call->getCallee()->getType(),
|
|
|
|
EmitScalarExpr(Call->getCallee()), Call, ReturnValue,
|
|
|
|
Call->getCalleeDecl(), EmitScalarExpr(Chain));
|
|
|
|
}
|
2014-06-19 04:51:10 +08:00
|
|
|
case Builtin::BI_InterlockedExchange:
|
|
|
|
case Builtin::BI_InterlockedExchangePointer:
|
|
|
|
return EmitBinaryAtomic(*this, llvm::AtomicRMWInst::Xchg, E);
|
|
|
|
case Builtin::BI_InterlockedCompareExchangePointer: {
|
|
|
|
llvm::Type *RTy;
|
|
|
|
llvm::IntegerType *IntType =
|
|
|
|
IntegerType::get(getLLVMContext(),
|
|
|
|
getContext().getTypeSize(E->getType()));
|
|
|
|
llvm::Type *IntPtrType = IntType->getPointerTo();
|
|
|
|
|
|
|
|
llvm::Value *Destination =
|
|
|
|
Builder.CreateBitCast(EmitScalarExpr(E->getArg(0)), IntPtrType);
|
|
|
|
|
|
|
|
llvm::Value *Exchange = EmitScalarExpr(E->getArg(1));
|
|
|
|
RTy = Exchange->getType();
|
|
|
|
Exchange = Builder.CreatePtrToInt(Exchange, IntType);
|
|
|
|
|
|
|
|
llvm::Value *Comparand =
|
|
|
|
Builder.CreatePtrToInt(EmitScalarExpr(E->getArg(2)), IntType);
|
|
|
|
|
|
|
|
auto Result = Builder.CreateAtomicCmpXchg(Destination, Comparand, Exchange,
|
|
|
|
SequentiallyConsistent,
|
|
|
|
SequentiallyConsistent);
|
|
|
|
Result->setVolatile(true);
|
|
|
|
|
|
|
|
return RValue::get(Builder.CreateIntToPtr(Builder.CreateExtractValue(Result,
|
|
|
|
0),
|
|
|
|
RTy));
|
|
|
|
}
|
2014-02-22 07:08:53 +08:00
|
|
|
case Builtin::BI_InterlockedCompareExchange: {
|
|
|
|
AtomicCmpXchgInst *CXI = Builder.CreateAtomicCmpXchg(
|
|
|
|
EmitScalarExpr(E->getArg(0)),
|
|
|
|
EmitScalarExpr(E->getArg(2)),
|
|
|
|
EmitScalarExpr(E->getArg(1)),
|
2014-03-11 18:49:03 +08:00
|
|
|
SequentiallyConsistent,
|
2014-02-22 07:08:53 +08:00
|
|
|
SequentiallyConsistent);
|
|
|
|
CXI->setVolatile(true);
|
2014-06-13 22:24:59 +08:00
|
|
|
return RValue::get(Builder.CreateExtractValue(CXI, 0));
|
2014-02-22 07:08:53 +08:00
|
|
|
}
|
|
|
|
case Builtin::BI_InterlockedIncrement: {
|
|
|
|
AtomicRMWInst *RMWI = Builder.CreateAtomicRMW(
|
|
|
|
AtomicRMWInst::Add,
|
|
|
|
EmitScalarExpr(E->getArg(0)),
|
|
|
|
ConstantInt::get(Int32Ty, 1),
|
|
|
|
llvm::SequentiallyConsistent);
|
|
|
|
RMWI->setVolatile(true);
|
|
|
|
return RValue::get(Builder.CreateAdd(RMWI, ConstantInt::get(Int32Ty, 1)));
|
|
|
|
}
|
|
|
|
case Builtin::BI_InterlockedDecrement: {
|
|
|
|
AtomicRMWInst *RMWI = Builder.CreateAtomicRMW(
|
|
|
|
AtomicRMWInst::Sub,
|
|
|
|
EmitScalarExpr(E->getArg(0)),
|
|
|
|
ConstantInt::get(Int32Ty, 1),
|
|
|
|
llvm::SequentiallyConsistent);
|
|
|
|
RMWI->setVolatile(true);
|
|
|
|
return RValue::get(Builder.CreateSub(RMWI, ConstantInt::get(Int32Ty, 1)));
|
|
|
|
}
|
|
|
|
case Builtin::BI_InterlockedExchangeAdd: {
|
|
|
|
AtomicRMWInst *RMWI = Builder.CreateAtomicRMW(
|
|
|
|
AtomicRMWInst::Add,
|
|
|
|
EmitScalarExpr(E->getArg(0)),
|
|
|
|
EmitScalarExpr(E->getArg(1)),
|
|
|
|
llvm::SequentiallyConsistent);
|
|
|
|
RMWI->setVolatile(true);
|
|
|
|
return RValue::get(RMWI);
|
|
|
|
}
|
2014-10-30 00:35:41 +08:00
|
|
|
case Builtin::BI__readfsdword: {
|
|
|
|
Value *IntToPtr =
|
|
|
|
Builder.CreateIntToPtr(EmitScalarExpr(E->getArg(0)),
|
|
|
|
llvm::PointerType::get(CGM.Int32Ty, 257));
|
|
|
|
LoadInst *Load =
|
|
|
|
Builder.CreateAlignedLoad(IntToPtr, /*Align=*/4, /*isVolatile=*/true);
|
|
|
|
return RValue::get(Load);
|
|
|
|
}
|
Initial support for Win64 SEH IR emission
The lowering looks a lot like normal EH lowering, with the exception
that the exceptions are caught by executing filter expression code
instead of matching typeinfo globals. The filter expressions are
outlined into functions which are used in landingpad clauses where
typeinfo would normally go.
Major aspects that still need work:
- Non-call exceptions in __try bodies won't work yet. The plan is to
outline the __try block in the frontend to keep things simple.
- Filter expressions cannot use local variables until capturing is
implemented.
- __finally blocks will not run after exceptions. Fixing this requires
work in the LLVM SEH preparation pass.
The IR lowering looks like this:
// C code:
bool safe_div(int n, int d, int *r) {
__try {
*r = normal_div(n, d);
} __except(_exception_code() == EXCEPTION_INT_DIVIDE_BY_ZERO) {
return false;
}
return true;
}
; LLVM IR:
define i32 @filter(i8* %e, i8* %fp) {
%ehptrs = bitcast i8* %e to i32**
%ehrec = load i32** %ehptrs
%code = load i32* %ehrec
%matches = icmp eq i32 %code, i32 u0xC0000094
%matches.i32 = zext i1 %matches to i32
ret i32 %matches.i32
}
define i1 zeroext @safe_div(i32 %n, i32 %d, i32* %r) {
%rr = invoke i32 @normal_div(i32 %n, i32 %d)
to label %normal unwind to label %lpad
normal:
store i32 %rr, i32* %r
ret i1 1
lpad:
%ehvals = landingpad {i8*, i32} personality i32 (...)* @__C_specific_handler
catch i8* bitcast (i32 (i8*, i8*)* @filter to i8*)
%ehptr = extractvalue {i8*, i32} %ehvals, i32 0
%sel = extractvalue {i8*, i32} %ehvals, i32 1
%filter_sel = call i32 @llvm.eh.seh.typeid.for(i8* bitcast (i32 (i8*, i8*)* @filter to i8*))
%matches = icmp eq i32 %sel, %filter_sel
br i1 %matches, label %eh.except, label %eh.resume
eh.except:
ret i1 false
eh.resume:
resume
}
Reviewers: rjmccall, rsmith, majnemer
Differential Revision: http://reviews.llvm.org/D5607
llvm-svn: 226760
2015-01-22 09:36:17 +08:00
|
|
|
|
|
|
|
case Builtin::BI__exception_code:
|
|
|
|
case Builtin::BI_exception_code:
|
|
|
|
return RValue::get(EmitSEHExceptionCode());
|
|
|
|
case Builtin::BI__exception_info:
|
|
|
|
case Builtin::BI_exception_info:
|
|
|
|
return RValue::get(EmitSEHExceptionInfo());
|
2015-02-05 06:37:07 +08:00
|
|
|
case Builtin::BI__abnormal_termination:
|
|
|
|
case Builtin::BI_abnormal_termination:
|
|
|
|
return RValue::get(EmitSEHAbnormalTermination());
|
2015-01-29 17:29:21 +08:00
|
|
|
case Builtin::BI_setjmpex: {
|
|
|
|
if (getTarget().getTriple().isOSMSVCRT()) {
|
|
|
|
llvm::Type *ArgTypes[] = {Int8PtrTy, Int8PtrTy};
|
|
|
|
llvm::AttributeSet ReturnsTwiceAttr =
|
|
|
|
AttributeSet::get(getLLVMContext(), llvm::AttributeSet::FunctionIndex,
|
|
|
|
llvm::Attribute::ReturnsTwice);
|
|
|
|
llvm::Constant *SetJmpEx = CGM.CreateRuntimeFunction(
|
|
|
|
llvm::FunctionType::get(IntTy, ArgTypes, /*isVarArg=*/false),
|
|
|
|
"_setjmpex", ReturnsTwiceAttr);
|
2015-03-21 01:03:35 +08:00
|
|
|
llvm::Value *Buf = Builder.CreateBitOrPointerCast(
|
|
|
|
EmitScalarExpr(E->getArg(0)), Int8PtrTy);
|
2015-01-29 17:29:21 +08:00
|
|
|
llvm::Value *FrameAddr =
|
|
|
|
Builder.CreateCall(CGM.getIntrinsic(Intrinsic::frameaddress),
|
|
|
|
ConstantInt::get(Int32Ty, 0));
|
|
|
|
llvm::Value *Args[] = {Buf, FrameAddr};
|
|
|
|
llvm::CallSite CS = EmitRuntimeCallOrInvoke(SetJmpEx, Args);
|
|
|
|
CS.setAttributes(ReturnsTwiceAttr);
|
|
|
|
return RValue::get(CS.getInstruction());
|
|
|
|
}
|
2015-03-21 01:03:35 +08:00
|
|
|
break;
|
2015-01-29 17:29:21 +08:00
|
|
|
}
|
|
|
|
case Builtin::BI_setjmp: {
|
|
|
|
if (getTarget().getTriple().isOSMSVCRT()) {
|
|
|
|
llvm::AttributeSet ReturnsTwiceAttr =
|
|
|
|
AttributeSet::get(getLLVMContext(), llvm::AttributeSet::FunctionIndex,
|
|
|
|
llvm::Attribute::ReturnsTwice);
|
2015-03-21 01:03:35 +08:00
|
|
|
llvm::Value *Buf = Builder.CreateBitOrPointerCast(
|
|
|
|
EmitScalarExpr(E->getArg(0)), Int8PtrTy);
|
2015-01-29 17:29:21 +08:00
|
|
|
llvm::CallSite CS;
|
|
|
|
if (getTarget().getTriple().getArch() == llvm::Triple::x86) {
|
|
|
|
llvm::Type *ArgTypes[] = {Int8PtrTy, IntTy};
|
|
|
|
llvm::Constant *SetJmp3 = CGM.CreateRuntimeFunction(
|
|
|
|
llvm::FunctionType::get(IntTy, ArgTypes, /*isVarArg=*/true),
|
|
|
|
"_setjmp3", ReturnsTwiceAttr);
|
|
|
|
llvm::Value *Count = ConstantInt::get(IntTy, 0);
|
|
|
|
llvm::Value *Args[] = {Buf, Count};
|
|
|
|
CS = EmitRuntimeCallOrInvoke(SetJmp3, Args);
|
|
|
|
} else {
|
|
|
|
llvm::Type *ArgTypes[] = {Int8PtrTy, Int8PtrTy};
|
|
|
|
llvm::Constant *SetJmp = CGM.CreateRuntimeFunction(
|
|
|
|
llvm::FunctionType::get(IntTy, ArgTypes, /*isVarArg=*/false),
|
|
|
|
"_setjmp", ReturnsTwiceAttr);
|
|
|
|
llvm::Value *FrameAddr =
|
|
|
|
Builder.CreateCall(CGM.getIntrinsic(Intrinsic::frameaddress),
|
|
|
|
ConstantInt::get(Int32Ty, 0));
|
|
|
|
llvm::Value *Args[] = {Buf, FrameAddr};
|
|
|
|
CS = EmitRuntimeCallOrInvoke(SetJmp, Args);
|
|
|
|
}
|
|
|
|
CS.setAttributes(ReturnsTwiceAttr);
|
|
|
|
return RValue::get(CS.getInstruction());
|
|
|
|
}
|
2015-03-21 01:03:35 +08:00
|
|
|
break;
|
2015-01-29 17:29:21 +08:00
|
|
|
}
|
2015-03-14 02:26:17 +08:00
|
|
|
|
|
|
|
case Builtin::BI__GetExceptionInfo: {
|
|
|
|
if (llvm::GlobalVariable *GV =
|
|
|
|
CGM.getCXXABI().getThrowInfo(FD->getParamDecl(0)->getType()))
|
|
|
|
return RValue::get(llvm::ConstantExpr::getBitCast(GV, CGM.Int8PtrTy));
|
|
|
|
break;
|
|
|
|
}
|
2008-05-15 15:38:03 +08:00
|
|
|
}
|
2009-09-09 23:08:12 +08:00
|
|
|
|
2011-09-14 07:05:03 +08:00
|
|
|
// If this is an alias for a lib function (e.g. __builtin_sin), emit
|
|
|
|
// the call using the normal call path, but using the unmangled
|
|
|
|
// version of the function name.
|
|
|
|
if (getContext().BuiltinInfo.isLibFunction(BuiltinID))
|
|
|
|
return emitLibraryCall(*this, FD, E,
|
|
|
|
CGM.getBuiltinLibFunction(FD, BuiltinID));
|
2012-09-21 08:18:27 +08:00
|
|
|
|
2011-09-14 07:05:03 +08:00
|
|
|
// If this is a predefined lib function (e.g. malloc), emit the call
|
|
|
|
// using exactly the normal call path.
|
|
|
|
if (getContext().BuiltinInfo.isPredefinedLibFunction(BuiltinID))
|
|
|
|
return emitLibraryCall(*this, FD, E, EmitScalarExpr(E->getCallee()));
|
2009-09-09 23:08:12 +08:00
|
|
|
|
2008-07-01 02:32:54 +08:00
|
|
|
// See if we have a target specific intrinsic.
|
2015-08-06 09:01:12 +08:00
|
|
|
const char *Name = getContext().BuiltinInfo.getName(BuiltinID);
|
2009-08-24 17:54:37 +08:00
|
|
|
Intrinsic::ID IntrinsicID = Intrinsic::not_intrinsic;
|
|
|
|
if (const char *Prefix =
|
2014-07-05 05:49:39 +08:00
|
|
|
llvm::Triple::getArchTypePrefix(getTarget().getTriple().getArch())) {
|
2009-08-24 17:54:37 +08:00
|
|
|
IntrinsicID = Intrinsic::getIntrinsicForGCCBuiltin(Prefix, Name);
|
2014-07-05 05:49:39 +08:00
|
|
|
// NOTE we dont need to perform a compatibility flag check here since the
|
|
|
|
// intrinsics are declared in Builtins*.def via LANGBUILTIN which filter the
|
|
|
|
// MS builtins via ALL_MS_LANGUAGES and are filtered earlier.
|
|
|
|
if (IntrinsicID == Intrinsic::not_intrinsic)
|
|
|
|
IntrinsicID = Intrinsic::getIntrinsicForMSBuiltin(Prefix, Name);
|
|
|
|
}
|
2009-09-09 23:08:12 +08:00
|
|
|
|
2008-07-01 02:32:54 +08:00
|
|
|
if (IntrinsicID != Intrinsic::not_intrinsic) {
|
|
|
|
SmallVector<Value*, 16> Args;
|
2009-09-09 23:08:12 +08:00
|
|
|
|
2010-10-02 08:09:12 +08:00
|
|
|
// Find out if any arguments are required to be integer constant
|
|
|
|
// expressions.
|
|
|
|
unsigned ICEArguments = 0;
|
|
|
|
ASTContext::GetBuiltinTypeError Error;
|
|
|
|
getContext().GetBuiltinType(BuiltinID, Error, &ICEArguments);
|
|
|
|
assert(Error == ASTContext::GE_None && "Should not codegen an error");
|
|
|
|
|
2008-07-01 02:32:54 +08:00
|
|
|
Function *F = CGM.getIntrinsic(IntrinsicID);
|
2011-07-18 12:24:23 +08:00
|
|
|
llvm::FunctionType *FTy = F->getFunctionType();
|
2009-09-09 23:08:12 +08:00
|
|
|
|
2008-07-01 02:32:54 +08:00
|
|
|
for (unsigned i = 0, e = E->getNumArgs(); i != e; ++i) {
|
2010-10-02 08:09:12 +08:00
|
|
|
Value *ArgValue;
|
|
|
|
// If this is a normal argument, just emit it as a scalar.
|
|
|
|
if ((ICEArguments & (1 << i)) == 0) {
|
|
|
|
ArgValue = EmitScalarExpr(E->getArg(i));
|
|
|
|
} else {
|
2012-09-21 08:18:27 +08:00
|
|
|
// If this is required to be a constant, constant fold it so that we
|
2010-10-02 08:09:12 +08:00
|
|
|
// know that the generated intrinsic gets a ConstantInt.
|
|
|
|
llvm::APSInt Result;
|
|
|
|
bool IsConst = E->getArg(i)->isIntegerConstantExpr(Result,getContext());
|
|
|
|
assert(IsConst && "Constant arg isn't actually constant?");
|
|
|
|
(void)IsConst;
|
2011-02-08 16:22:06 +08:00
|
|
|
ArgValue = llvm::ConstantInt::get(getLLVMContext(), Result);
|
2010-10-02 08:09:12 +08:00
|
|
|
}
|
2009-09-09 23:08:12 +08:00
|
|
|
|
2008-07-01 02:32:54 +08:00
|
|
|
// If the intrinsic arg type is different from the builtin arg type
|
|
|
|
// we need to do a bit cast.
|
2011-07-18 12:24:23 +08:00
|
|
|
llvm::Type *PTy = FTy->getParamType(i);
|
2008-07-01 02:32:54 +08:00
|
|
|
if (PTy != ArgValue->getType()) {
|
|
|
|
assert(PTy->canLosslesslyBitCastTo(FTy->getParamType(i)) &&
|
|
|
|
"Must be able to losslessly bit cast to param");
|
|
|
|
ArgValue = Builder.CreateBitCast(ArgValue, PTy);
|
|
|
|
}
|
2009-09-09 23:08:12 +08:00
|
|
|
|
2008-07-01 02:32:54 +08:00
|
|
|
Args.push_back(ArgValue);
|
|
|
|
}
|
2009-09-09 23:08:12 +08:00
|
|
|
|
2011-07-15 16:37:34 +08:00
|
|
|
Value *V = Builder.CreateCall(F, Args);
|
2008-07-01 02:32:54 +08:00
|
|
|
QualType BuiltinRetType = E->getType();
|
2009-09-09 23:08:12 +08:00
|
|
|
|
2012-02-07 08:39:47 +08:00
|
|
|
llvm::Type *RetTy = VoidTy;
|
2012-09-21 08:18:27 +08:00
|
|
|
if (!BuiltinRetType->isVoidType())
|
2012-02-07 08:39:47 +08:00
|
|
|
RetTy = ConvertType(BuiltinRetType);
|
2009-09-09 23:08:12 +08:00
|
|
|
|
2008-07-01 02:32:54 +08:00
|
|
|
if (RetTy != V->getType()) {
|
|
|
|
assert(V->getType()->canLosslesslyBitCastTo(RetTy) &&
|
|
|
|
"Must be able to losslessly bit cast result type");
|
|
|
|
V = Builder.CreateBitCast(V, RetTy);
|
|
|
|
}
|
2009-09-09 23:08:12 +08:00
|
|
|
|
2008-07-01 02:32:54 +08:00
|
|
|
return RValue::get(V);
|
|
|
|
}
|
2009-09-09 23:08:12 +08:00
|
|
|
|
2008-07-01 02:32:54 +08:00
|
|
|
// See if we have a target specific builtin that needs to be lowered.
|
2008-10-10 08:24:54 +08:00
|
|
|
if (Value *V = EmitTargetBuiltinExpr(BuiltinID, E))
|
2008-07-01 02:32:54 +08:00
|
|
|
return RValue::get(V);
|
2009-09-09 23:08:12 +08:00
|
|
|
|
2008-08-16 08:56:44 +08:00
|
|
|
ErrorUnsupported(E, "builtin function");
|
2009-09-09 23:08:12 +08:00
|
|
|
|
2008-07-01 02:32:54 +08:00
|
|
|
// Unknown builtin, for now just dump it out and return undef.
|
2013-03-08 05:37:08 +08:00
|
|
|
return GetUndefRValue(E->getType());
|
2009-09-09 23:08:12 +08:00
|
|
|
}
|
2007-12-10 07:17:02 +08:00
|
|
|
|
2015-09-23 01:23:22 +08:00
|
|
|
static Value *EmitTargetArchBuiltinExpr(CodeGenFunction *CGF,
|
|
|
|
unsigned BuiltinID, const CallExpr *E,
|
|
|
|
llvm::Triple::ArchType Arch) {
|
|
|
|
switch (Arch) {
|
2010-03-04 03:03:45 +08:00
|
|
|
case llvm::Triple::arm:
|
2014-03-28 22:40:46 +08:00
|
|
|
case llvm::Triple::armeb:
|
2010-03-04 03:03:45 +08:00
|
|
|
case llvm::Triple::thumb:
|
2014-03-28 22:40:46 +08:00
|
|
|
case llvm::Triple::thumbeb:
|
2015-09-23 01:23:22 +08:00
|
|
|
return CGF->EmitARMBuiltinExpr(BuiltinID, E);
|
2014-05-24 20:51:25 +08:00
|
|
|
case llvm::Triple::aarch64:
|
|
|
|
case llvm::Triple::aarch64_be:
|
2015-09-23 01:23:22 +08:00
|
|
|
return CGF->EmitAArch64BuiltinExpr(BuiltinID, E);
|
2009-08-24 17:54:37 +08:00
|
|
|
case llvm::Triple::x86:
|
|
|
|
case llvm::Triple::x86_64:
|
2015-09-23 01:23:22 +08:00
|
|
|
return CGF->EmitX86BuiltinExpr(BuiltinID, E);
|
2009-08-24 17:54:37 +08:00
|
|
|
case llvm::Triple::ppc:
|
|
|
|
case llvm::Triple::ppc64:
|
2013-07-26 09:36:11 +08:00
|
|
|
case llvm::Triple::ppc64le:
|
2015-09-23 01:23:22 +08:00
|
|
|
return CGF->EmitPPCBuiltinExpr(BuiltinID, E);
|
2014-06-25 04:45:01 +08:00
|
|
|
case llvm::Triple::r600:
|
2015-01-07 04:34:47 +08:00
|
|
|
case llvm::Triple::amdgcn:
|
2015-09-23 01:23:22 +08:00
|
|
|
return CGF->EmitAMDGPUBuiltinExpr(BuiltinID, E);
|
2015-04-01 20:54:25 +08:00
|
|
|
case llvm::Triple::systemz:
|
2015-09-23 01:23:22 +08:00
|
|
|
return CGF->EmitSystemZBuiltinExpr(BuiltinID, E);
|
2015-06-26 02:29:42 +08:00
|
|
|
case llvm::Triple::nvptx:
|
|
|
|
case llvm::Triple::nvptx64:
|
2015-09-23 01:23:22 +08:00
|
|
|
return CGF->EmitNVPTXBuiltinExpr(BuiltinID, E);
|
2015-09-04 06:51:53 +08:00
|
|
|
case llvm::Triple::wasm32:
|
|
|
|
case llvm::Triple::wasm64:
|
2015-09-23 01:23:22 +08:00
|
|
|
return CGF->EmitWebAssemblyBuiltinExpr(BuiltinID, E);
|
2009-08-24 17:54:37 +08:00
|
|
|
default:
|
2014-05-21 13:09:00 +08:00
|
|
|
return nullptr;
|
2009-08-24 17:54:37 +08:00
|
|
|
}
|
2008-10-10 08:24:54 +08:00
|
|
|
}
|
|
|
|
|
2015-09-23 01:23:22 +08:00
|
|
|
Value *CodeGenFunction::EmitTargetBuiltinExpr(unsigned BuiltinID,
|
|
|
|
const CallExpr *E) {
|
|
|
|
if (getContext().BuiltinInfo.isAuxBuiltinID(BuiltinID)) {
|
|
|
|
assert(getContext().getAuxTargetInfo() && "Missing aux target info");
|
|
|
|
return EmitTargetArchBuiltinExpr(
|
|
|
|
this, getContext().BuiltinInfo.getAuxBuiltinID(BuiltinID), E,
|
|
|
|
getContext().getAuxTargetInfo()->getTriple().getArch());
|
|
|
|
}
|
|
|
|
|
|
|
|
return EmitTargetArchBuiltinExpr(this, BuiltinID, E,
|
|
|
|
getTarget().getTriple().getArch());
|
|
|
|
}
|
|
|
|
|
2012-02-07 08:39:47 +08:00
|
|
|
static llvm::VectorType *GetNeonType(CodeGenFunction *CGF,
|
2013-09-24 10:48:06 +08:00
|
|
|
NeonTypeFlags TypeFlags,
|
|
|
|
bool V1Ty=false) {
|
2011-11-08 11:27:04 +08:00
|
|
|
int IsQuad = TypeFlags.isQuad();
|
|
|
|
switch (TypeFlags.getEltType()) {
|
2011-11-08 09:16:11 +08:00
|
|
|
case NeonTypeFlags::Int8:
|
|
|
|
case NeonTypeFlags::Poly8:
|
2013-09-24 10:48:06 +08:00
|
|
|
return llvm::VectorType::get(CGF->Int8Ty, V1Ty ? 1 : (8 << IsQuad));
|
2011-11-08 09:16:11 +08:00
|
|
|
case NeonTypeFlags::Int16:
|
|
|
|
case NeonTypeFlags::Poly16:
|
|
|
|
case NeonTypeFlags::Float16:
|
2013-09-24 10:48:06 +08:00
|
|
|
return llvm::VectorType::get(CGF->Int16Ty, V1Ty ? 1 : (4 << IsQuad));
|
2011-11-08 09:16:11 +08:00
|
|
|
case NeonTypeFlags::Int32:
|
2013-09-24 10:48:06 +08:00
|
|
|
return llvm::VectorType::get(CGF->Int32Ty, V1Ty ? 1 : (2 << IsQuad));
|
2011-11-08 09:16:11 +08:00
|
|
|
case NeonTypeFlags::Int64:
|
2013-11-14 11:29:16 +08:00
|
|
|
case NeonTypeFlags::Poly64:
|
2013-09-24 10:48:06 +08:00
|
|
|
return llvm::VectorType::get(CGF->Int64Ty, V1Ty ? 1 : (1 << IsQuad));
|
2013-12-10 14:49:01 +08:00
|
|
|
case NeonTypeFlags::Poly128:
|
|
|
|
// FIXME: i128 and f128 doesn't get fully support in Clang and llvm.
|
|
|
|
// There is a lot of i128 and f128 API missing.
|
|
|
|
// so we use v16i8 to represent poly128 and get pattern matched.
|
|
|
|
return llvm::VectorType::get(CGF->Int8Ty, 16);
|
2011-11-08 09:16:11 +08:00
|
|
|
case NeonTypeFlags::Float32:
|
2013-09-24 10:48:06 +08:00
|
|
|
return llvm::VectorType::get(CGF->FloatTy, V1Ty ? 1 : (2 << IsQuad));
|
2013-08-01 17:23:19 +08:00
|
|
|
case NeonTypeFlags::Float64:
|
2013-09-24 10:48:06 +08:00
|
|
|
return llvm::VectorType::get(CGF->DoubleTy, V1Ty ? 1 : (1 << IsQuad));
|
2012-01-17 10:30:50 +08:00
|
|
|
}
|
2013-09-27 00:36:08 +08:00
|
|
|
llvm_unreachable("Unknown vector element type!");
|
2010-06-08 00:01:56 +08:00
|
|
|
}
|
|
|
|
|
2015-08-25 07:41:31 +08:00
|
|
|
static llvm::VectorType *GetFloatNeonType(CodeGenFunction *CGF,
|
|
|
|
NeonTypeFlags IntTypeFlags) {
|
|
|
|
int IsQuad = IntTypeFlags.isQuad();
|
|
|
|
switch (IntTypeFlags.getEltType()) {
|
|
|
|
case NeonTypeFlags::Int32:
|
|
|
|
return llvm::VectorType::get(CGF->FloatTy, (2 << IsQuad));
|
|
|
|
case NeonTypeFlags::Int64:
|
|
|
|
return llvm::VectorType::get(CGF->DoubleTy, (1 << IsQuad));
|
|
|
|
default:
|
|
|
|
llvm_unreachable("Type can't be converted to floating-point!");
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2010-12-08 06:40:02 +08:00
|
|
|
Value *CodeGenFunction::EmitNeonSplat(Value *V, Constant *C) {
|
2010-06-10 08:17:56 +08:00
|
|
|
unsigned nElts = cast<llvm::VectorType>(V->getType())->getNumElements();
|
2012-01-25 13:34:41 +08:00
|
|
|
Value* SV = llvm::ConstantVector::getSplat(nElts, C);
|
2010-06-10 08:17:56 +08:00
|
|
|
return Builder.CreateShuffleVector(V, V, SV, "lane");
|
|
|
|
}
|
|
|
|
|
2010-06-08 14:03:01 +08:00
|
|
|
Value *CodeGenFunction::EmitNeonCall(Function *F, SmallVectorImpl<Value*> &Ops,
|
2010-12-09 06:37:56 +08:00
|
|
|
const char *name,
|
2010-06-14 13:21:25 +08:00
|
|
|
unsigned shift, bool rightshift) {
|
2010-06-08 14:03:01 +08:00
|
|
|
unsigned j = 0;
|
|
|
|
for (Function::const_arg_iterator ai = F->arg_begin(), ae = F->arg_end();
|
|
|
|
ai != ae; ++ai, ++j)
|
2010-06-14 13:21:25 +08:00
|
|
|
if (shift > 0 && shift == j)
|
|
|
|
Ops[j] = EmitNeonShiftVector(Ops[j], ai->getType(), rightshift);
|
|
|
|
else
|
|
|
|
Ops[j] = Builder.CreateBitCast(Ops[j], ai->getType(), name);
|
2010-06-08 14:03:01 +08:00
|
|
|
|
2011-07-15 16:37:34 +08:00
|
|
|
return Builder.CreateCall(F, Ops, name);
|
2010-06-08 14:03:01 +08:00
|
|
|
}
|
|
|
|
|
2012-09-21 08:18:27 +08:00
|
|
|
Value *CodeGenFunction::EmitNeonShiftVector(Value *V, llvm::Type *Ty,
|
2010-06-12 06:57:12 +08:00
|
|
|
bool neg) {
|
2012-01-25 13:34:41 +08:00
|
|
|
int SV = cast<ConstantInt>(V)->getSExtValue();
|
2015-07-28 23:40:11 +08:00
|
|
|
return ConstantInt::get(Ty, neg ? -SV : SV);
|
2010-06-12 06:57:12 +08:00
|
|
|
}
|
|
|
|
|
2013-10-04 21:13:15 +08:00
|
|
|
// \brief Right-shift a vector by a constant.
|
|
|
|
Value *CodeGenFunction::EmitNeonRShiftImm(Value *Vec, Value *Shift,
|
|
|
|
llvm::Type *Ty, bool usgn,
|
|
|
|
const char *name) {
|
|
|
|
llvm::VectorType *VTy = cast<llvm::VectorType>(Ty);
|
|
|
|
|
|
|
|
int ShiftAmt = cast<ConstantInt>(Shift)->getSExtValue();
|
|
|
|
int EltSize = VTy->getScalarSizeInBits();
|
|
|
|
|
|
|
|
Vec = Builder.CreateBitCast(Vec, Ty);
|
|
|
|
|
|
|
|
// lshr/ashr are undefined when the shift amount is equal to the vector
|
|
|
|
// element size.
|
|
|
|
if (ShiftAmt == EltSize) {
|
|
|
|
if (usgn) {
|
|
|
|
// Right-shifting an unsigned value by its size yields 0.
|
2015-07-28 23:40:11 +08:00
|
|
|
return llvm::ConstantAggregateZero::get(VTy);
|
2013-10-04 21:13:15 +08:00
|
|
|
} else {
|
|
|
|
// Right-shifting a signed value by its size is equivalent
|
|
|
|
// to a shift of size-1.
|
|
|
|
--ShiftAmt;
|
|
|
|
Shift = ConstantInt::get(VTy->getElementType(), ShiftAmt);
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
Shift = EmitNeonShiftVector(Shift, Ty, false);
|
|
|
|
if (usgn)
|
|
|
|
return Builder.CreateLShr(Vec, Shift, name);
|
|
|
|
else
|
|
|
|
return Builder.CreateAShr(Vec, Shift, name);
|
|
|
|
}
|
|
|
|
|
2014-02-21 19:57:24 +08:00
|
|
|
enum {
|
|
|
|
AddRetType = (1 << 0),
|
|
|
|
Add1ArgType = (1 << 1),
|
|
|
|
Add2ArgTypes = (1 << 2),
|
|
|
|
|
|
|
|
VectorizeRetType = (1 << 3),
|
|
|
|
VectorizeArgTypes = (1 << 4),
|
|
|
|
|
|
|
|
InventFloatType = (1 << 5),
|
|
|
|
UnsignedAlts = (1 << 6),
|
|
|
|
|
2014-03-29 23:09:45 +08:00
|
|
|
Use64BitVectors = (1 << 7),
|
|
|
|
Use128BitVectors = (1 << 8),
|
|
|
|
|
2014-02-21 19:57:24 +08:00
|
|
|
Vectorize1ArgType = Add1ArgType | VectorizeArgTypes,
|
|
|
|
VectorRet = AddRetType | VectorizeRetType,
|
|
|
|
VectorRetGetArgs01 =
|
|
|
|
AddRetType | Add2ArgTypes | VectorizeRetType | VectorizeArgTypes,
|
|
|
|
FpCmpzModifiers =
|
2014-02-21 20:16:59 +08:00
|
|
|
AddRetType | VectorizeRetType | Add1ArgType | InventFloatType
|
2014-02-21 19:57:24 +08:00
|
|
|
};
|
|
|
|
|
|
|
|
struct NeonIntrinsicInfo {
|
|
|
|
unsigned BuiltinID;
|
|
|
|
unsigned LLVMIntrinsic;
|
|
|
|
unsigned AltLLVMIntrinsic;
|
|
|
|
const char *NameHint;
|
|
|
|
unsigned TypeModifier;
|
|
|
|
|
|
|
|
bool operator<(unsigned RHSBuiltinID) const {
|
|
|
|
return BuiltinID < RHSBuiltinID;
|
|
|
|
}
|
|
|
|
};
|
|
|
|
|
|
|
|
#define NEONMAP0(NameBase) \
|
|
|
|
{ NEON::BI__builtin_neon_ ## NameBase, 0, 0, #NameBase, 0 }
|
|
|
|
|
|
|
|
#define NEONMAP1(NameBase, LLVMIntrinsic, TypeModifier) \
|
|
|
|
{ NEON:: BI__builtin_neon_ ## NameBase, \
|
|
|
|
Intrinsic::LLVMIntrinsic, 0, #NameBase, TypeModifier }
|
|
|
|
|
|
|
|
#define NEONMAP2(NameBase, LLVMIntrinsic, AltLLVMIntrinsic, TypeModifier) \
|
|
|
|
{ NEON:: BI__builtin_neon_ ## NameBase, \
|
|
|
|
Intrinsic::LLVMIntrinsic, Intrinsic::AltLLVMIntrinsic, \
|
|
|
|
#NameBase, TypeModifier }
|
|
|
|
|
|
|
|
static NeonIntrinsicInfo ARMSIMDIntrinsicMap [] = {
|
|
|
|
NEONMAP2(vabd_v, arm_neon_vabdu, arm_neon_vabds, Add1ArgType | UnsignedAlts),
|
|
|
|
NEONMAP2(vabdq_v, arm_neon_vabdu, arm_neon_vabds, Add1ArgType | UnsignedAlts),
|
|
|
|
NEONMAP1(vabs_v, arm_neon_vabs, 0),
|
|
|
|
NEONMAP1(vabsq_v, arm_neon_vabs, 0),
|
|
|
|
NEONMAP0(vaddhn_v),
|
|
|
|
NEONMAP1(vaesdq_v, arm_neon_aesd, 0),
|
|
|
|
NEONMAP1(vaeseq_v, arm_neon_aese, 0),
|
|
|
|
NEONMAP1(vaesimcq_v, arm_neon_aesimc, 0),
|
|
|
|
NEONMAP1(vaesmcq_v, arm_neon_aesmc, 0),
|
|
|
|
NEONMAP1(vbsl_v, arm_neon_vbsl, AddRetType),
|
|
|
|
NEONMAP1(vbslq_v, arm_neon_vbsl, AddRetType),
|
|
|
|
NEONMAP1(vcage_v, arm_neon_vacge, 0),
|
|
|
|
NEONMAP1(vcageq_v, arm_neon_vacge, 0),
|
|
|
|
NEONMAP1(vcagt_v, arm_neon_vacgt, 0),
|
|
|
|
NEONMAP1(vcagtq_v, arm_neon_vacgt, 0),
|
|
|
|
NEONMAP1(vcale_v, arm_neon_vacge, 0),
|
|
|
|
NEONMAP1(vcaleq_v, arm_neon_vacge, 0),
|
|
|
|
NEONMAP1(vcalt_v, arm_neon_vacgt, 0),
|
|
|
|
NEONMAP1(vcaltq_v, arm_neon_vacgt, 0),
|
|
|
|
NEONMAP1(vcls_v, arm_neon_vcls, Add1ArgType),
|
|
|
|
NEONMAP1(vclsq_v, arm_neon_vcls, Add1ArgType),
|
|
|
|
NEONMAP1(vclz_v, ctlz, Add1ArgType),
|
|
|
|
NEONMAP1(vclzq_v, ctlz, Add1ArgType),
|
|
|
|
NEONMAP1(vcnt_v, ctpop, Add1ArgType),
|
|
|
|
NEONMAP1(vcntq_v, ctpop, Add1ArgType),
|
2015-08-22 07:34:20 +08:00
|
|
|
NEONMAP1(vcvt_f16_f32, arm_neon_vcvtfp2hf, 0),
|
2014-02-21 19:57:24 +08:00
|
|
|
NEONMAP1(vcvt_f32_f16, arm_neon_vcvthf2fp, 0),
|
|
|
|
NEONMAP0(vcvt_f32_v),
|
|
|
|
NEONMAP2(vcvt_n_f32_v, arm_neon_vcvtfxu2fp, arm_neon_vcvtfxs2fp, 0),
|
|
|
|
NEONMAP1(vcvt_n_s32_v, arm_neon_vcvtfp2fxs, 0),
|
|
|
|
NEONMAP1(vcvt_n_s64_v, arm_neon_vcvtfp2fxs, 0),
|
|
|
|
NEONMAP1(vcvt_n_u32_v, arm_neon_vcvtfp2fxu, 0),
|
|
|
|
NEONMAP1(vcvt_n_u64_v, arm_neon_vcvtfp2fxu, 0),
|
|
|
|
NEONMAP0(vcvt_s32_v),
|
|
|
|
NEONMAP0(vcvt_s64_v),
|
|
|
|
NEONMAP0(vcvt_u32_v),
|
|
|
|
NEONMAP0(vcvt_u64_v),
|
|
|
|
NEONMAP1(vcvta_s32_v, arm_neon_vcvtas, 0),
|
|
|
|
NEONMAP1(vcvta_s64_v, arm_neon_vcvtas, 0),
|
|
|
|
NEONMAP1(vcvta_u32_v, arm_neon_vcvtau, 0),
|
|
|
|
NEONMAP1(vcvta_u64_v, arm_neon_vcvtau, 0),
|
|
|
|
NEONMAP1(vcvtaq_s32_v, arm_neon_vcvtas, 0),
|
|
|
|
NEONMAP1(vcvtaq_s64_v, arm_neon_vcvtas, 0),
|
|
|
|
NEONMAP1(vcvtaq_u32_v, arm_neon_vcvtau, 0),
|
|
|
|
NEONMAP1(vcvtaq_u64_v, arm_neon_vcvtau, 0),
|
|
|
|
NEONMAP1(vcvtm_s32_v, arm_neon_vcvtms, 0),
|
|
|
|
NEONMAP1(vcvtm_s64_v, arm_neon_vcvtms, 0),
|
|
|
|
NEONMAP1(vcvtm_u32_v, arm_neon_vcvtmu, 0),
|
|
|
|
NEONMAP1(vcvtm_u64_v, arm_neon_vcvtmu, 0),
|
|
|
|
NEONMAP1(vcvtmq_s32_v, arm_neon_vcvtms, 0),
|
|
|
|
NEONMAP1(vcvtmq_s64_v, arm_neon_vcvtms, 0),
|
|
|
|
NEONMAP1(vcvtmq_u32_v, arm_neon_vcvtmu, 0),
|
|
|
|
NEONMAP1(vcvtmq_u64_v, arm_neon_vcvtmu, 0),
|
|
|
|
NEONMAP1(vcvtn_s32_v, arm_neon_vcvtns, 0),
|
|
|
|
NEONMAP1(vcvtn_s64_v, arm_neon_vcvtns, 0),
|
|
|
|
NEONMAP1(vcvtn_u32_v, arm_neon_vcvtnu, 0),
|
|
|
|
NEONMAP1(vcvtn_u64_v, arm_neon_vcvtnu, 0),
|
|
|
|
NEONMAP1(vcvtnq_s32_v, arm_neon_vcvtns, 0),
|
|
|
|
NEONMAP1(vcvtnq_s64_v, arm_neon_vcvtns, 0),
|
|
|
|
NEONMAP1(vcvtnq_u32_v, arm_neon_vcvtnu, 0),
|
|
|
|
NEONMAP1(vcvtnq_u64_v, arm_neon_vcvtnu, 0),
|
|
|
|
NEONMAP1(vcvtp_s32_v, arm_neon_vcvtps, 0),
|
|
|
|
NEONMAP1(vcvtp_s64_v, arm_neon_vcvtps, 0),
|
|
|
|
NEONMAP1(vcvtp_u32_v, arm_neon_vcvtpu, 0),
|
|
|
|
NEONMAP1(vcvtp_u64_v, arm_neon_vcvtpu, 0),
|
|
|
|
NEONMAP1(vcvtpq_s32_v, arm_neon_vcvtps, 0),
|
|
|
|
NEONMAP1(vcvtpq_s64_v, arm_neon_vcvtps, 0),
|
|
|
|
NEONMAP1(vcvtpq_u32_v, arm_neon_vcvtpu, 0),
|
|
|
|
NEONMAP1(vcvtpq_u64_v, arm_neon_vcvtpu, 0),
|
|
|
|
NEONMAP0(vcvtq_f32_v),
|
|
|
|
NEONMAP2(vcvtq_n_f32_v, arm_neon_vcvtfxu2fp, arm_neon_vcvtfxs2fp, 0),
|
|
|
|
NEONMAP1(vcvtq_n_s32_v, arm_neon_vcvtfp2fxs, 0),
|
|
|
|
NEONMAP1(vcvtq_n_s64_v, arm_neon_vcvtfp2fxs, 0),
|
|
|
|
NEONMAP1(vcvtq_n_u32_v, arm_neon_vcvtfp2fxu, 0),
|
|
|
|
NEONMAP1(vcvtq_n_u64_v, arm_neon_vcvtfp2fxu, 0),
|
|
|
|
NEONMAP0(vcvtq_s32_v),
|
|
|
|
NEONMAP0(vcvtq_s64_v),
|
|
|
|
NEONMAP0(vcvtq_u32_v),
|
|
|
|
NEONMAP0(vcvtq_u64_v),
|
|
|
|
NEONMAP0(vext_v),
|
|
|
|
NEONMAP0(vextq_v),
|
|
|
|
NEONMAP0(vfma_v),
|
|
|
|
NEONMAP0(vfmaq_v),
|
|
|
|
NEONMAP2(vhadd_v, arm_neon_vhaddu, arm_neon_vhadds, Add1ArgType | UnsignedAlts),
|
|
|
|
NEONMAP2(vhaddq_v, arm_neon_vhaddu, arm_neon_vhadds, Add1ArgType | UnsignedAlts),
|
|
|
|
NEONMAP2(vhsub_v, arm_neon_vhsubu, arm_neon_vhsubs, Add1ArgType | UnsignedAlts),
|
|
|
|
NEONMAP2(vhsubq_v, arm_neon_vhsubu, arm_neon_vhsubs, Add1ArgType | UnsignedAlts),
|
|
|
|
NEONMAP0(vld1_dup_v),
|
|
|
|
NEONMAP1(vld1_v, arm_neon_vld1, 0),
|
|
|
|
NEONMAP0(vld1q_dup_v),
|
|
|
|
NEONMAP1(vld1q_v, arm_neon_vld1, 0),
|
|
|
|
NEONMAP1(vld2_lane_v, arm_neon_vld2lane, 0),
|
|
|
|
NEONMAP1(vld2_v, arm_neon_vld2, 0),
|
|
|
|
NEONMAP1(vld2q_lane_v, arm_neon_vld2lane, 0),
|
|
|
|
NEONMAP1(vld2q_v, arm_neon_vld2, 0),
|
|
|
|
NEONMAP1(vld3_lane_v, arm_neon_vld3lane, 0),
|
|
|
|
NEONMAP1(vld3_v, arm_neon_vld3, 0),
|
|
|
|
NEONMAP1(vld3q_lane_v, arm_neon_vld3lane, 0),
|
|
|
|
NEONMAP1(vld3q_v, arm_neon_vld3, 0),
|
|
|
|
NEONMAP1(vld4_lane_v, arm_neon_vld4lane, 0),
|
|
|
|
NEONMAP1(vld4_v, arm_neon_vld4, 0),
|
|
|
|
NEONMAP1(vld4q_lane_v, arm_neon_vld4lane, 0),
|
|
|
|
NEONMAP1(vld4q_v, arm_neon_vld4, 0),
|
|
|
|
NEONMAP2(vmax_v, arm_neon_vmaxu, arm_neon_vmaxs, Add1ArgType | UnsignedAlts),
|
2014-09-05 21:50:34 +08:00
|
|
|
NEONMAP1(vmaxnm_v, arm_neon_vmaxnm, Add1ArgType),
|
|
|
|
NEONMAP1(vmaxnmq_v, arm_neon_vmaxnm, Add1ArgType),
|
2014-02-21 19:57:24 +08:00
|
|
|
NEONMAP2(vmaxq_v, arm_neon_vmaxu, arm_neon_vmaxs, Add1ArgType | UnsignedAlts),
|
|
|
|
NEONMAP2(vmin_v, arm_neon_vminu, arm_neon_vmins, Add1ArgType | UnsignedAlts),
|
2014-09-05 21:50:34 +08:00
|
|
|
NEONMAP1(vminnm_v, arm_neon_vminnm, Add1ArgType),
|
|
|
|
NEONMAP1(vminnmq_v, arm_neon_vminnm, Add1ArgType),
|
2014-02-21 19:57:24 +08:00
|
|
|
NEONMAP2(vminq_v, arm_neon_vminu, arm_neon_vmins, Add1ArgType | UnsignedAlts),
|
|
|
|
NEONMAP0(vmovl_v),
|
|
|
|
NEONMAP0(vmovn_v),
|
|
|
|
NEONMAP1(vmul_v, arm_neon_vmulp, Add1ArgType),
|
|
|
|
NEONMAP0(vmull_v),
|
|
|
|
NEONMAP1(vmulq_v, arm_neon_vmulp, Add1ArgType),
|
|
|
|
NEONMAP2(vpadal_v, arm_neon_vpadalu, arm_neon_vpadals, UnsignedAlts),
|
|
|
|
NEONMAP2(vpadalq_v, arm_neon_vpadalu, arm_neon_vpadals, UnsignedAlts),
|
|
|
|
NEONMAP1(vpadd_v, arm_neon_vpadd, Add1ArgType),
|
|
|
|
NEONMAP2(vpaddl_v, arm_neon_vpaddlu, arm_neon_vpaddls, UnsignedAlts),
|
|
|
|
NEONMAP2(vpaddlq_v, arm_neon_vpaddlu, arm_neon_vpaddls, UnsignedAlts),
|
|
|
|
NEONMAP1(vpaddq_v, arm_neon_vpadd, Add1ArgType),
|
|
|
|
NEONMAP2(vpmax_v, arm_neon_vpmaxu, arm_neon_vpmaxs, Add1ArgType | UnsignedAlts),
|
|
|
|
NEONMAP2(vpmin_v, arm_neon_vpminu, arm_neon_vpmins, Add1ArgType | UnsignedAlts),
|
|
|
|
NEONMAP1(vqabs_v, arm_neon_vqabs, Add1ArgType),
|
|
|
|
NEONMAP1(vqabsq_v, arm_neon_vqabs, Add1ArgType),
|
|
|
|
NEONMAP2(vqadd_v, arm_neon_vqaddu, arm_neon_vqadds, Add1ArgType | UnsignedAlts),
|
|
|
|
NEONMAP2(vqaddq_v, arm_neon_vqaddu, arm_neon_vqadds, Add1ArgType | UnsignedAlts),
|
|
|
|
NEONMAP2(vqdmlal_v, arm_neon_vqdmull, arm_neon_vqadds, 0),
|
|
|
|
NEONMAP2(vqdmlsl_v, arm_neon_vqdmull, arm_neon_vqsubs, 0),
|
|
|
|
NEONMAP1(vqdmulh_v, arm_neon_vqdmulh, Add1ArgType),
|
|
|
|
NEONMAP1(vqdmulhq_v, arm_neon_vqdmulh, Add1ArgType),
|
|
|
|
NEONMAP1(vqdmull_v, arm_neon_vqdmull, Add1ArgType),
|
|
|
|
NEONMAP2(vqmovn_v, arm_neon_vqmovnu, arm_neon_vqmovns, Add1ArgType | UnsignedAlts),
|
|
|
|
NEONMAP1(vqmovun_v, arm_neon_vqmovnsu, Add1ArgType),
|
|
|
|
NEONMAP1(vqneg_v, arm_neon_vqneg, Add1ArgType),
|
|
|
|
NEONMAP1(vqnegq_v, arm_neon_vqneg, Add1ArgType),
|
|
|
|
NEONMAP1(vqrdmulh_v, arm_neon_vqrdmulh, Add1ArgType),
|
|
|
|
NEONMAP1(vqrdmulhq_v, arm_neon_vqrdmulh, Add1ArgType),
|
|
|
|
NEONMAP2(vqrshl_v, arm_neon_vqrshiftu, arm_neon_vqrshifts, Add1ArgType | UnsignedAlts),
|
|
|
|
NEONMAP2(vqrshlq_v, arm_neon_vqrshiftu, arm_neon_vqrshifts, Add1ArgType | UnsignedAlts),
|
|
|
|
NEONMAP2(vqshl_n_v, arm_neon_vqshiftu, arm_neon_vqshifts, UnsignedAlts),
|
|
|
|
NEONMAP2(vqshl_v, arm_neon_vqshiftu, arm_neon_vqshifts, Add1ArgType | UnsignedAlts),
|
|
|
|
NEONMAP2(vqshlq_n_v, arm_neon_vqshiftu, arm_neon_vqshifts, UnsignedAlts),
|
|
|
|
NEONMAP2(vqshlq_v, arm_neon_vqshiftu, arm_neon_vqshifts, Add1ArgType | UnsignedAlts),
|
2014-07-29 17:25:17 +08:00
|
|
|
NEONMAP1(vqshlu_n_v, arm_neon_vqshiftsu, 0),
|
|
|
|
NEONMAP1(vqshluq_n_v, arm_neon_vqshiftsu, 0),
|
2014-02-21 19:57:24 +08:00
|
|
|
NEONMAP2(vqsub_v, arm_neon_vqsubu, arm_neon_vqsubs, Add1ArgType | UnsignedAlts),
|
|
|
|
NEONMAP2(vqsubq_v, arm_neon_vqsubu, arm_neon_vqsubs, Add1ArgType | UnsignedAlts),
|
|
|
|
NEONMAP1(vraddhn_v, arm_neon_vraddhn, Add1ArgType),
|
|
|
|
NEONMAP2(vrecpe_v, arm_neon_vrecpe, arm_neon_vrecpe, 0),
|
|
|
|
NEONMAP2(vrecpeq_v, arm_neon_vrecpe, arm_neon_vrecpe, 0),
|
|
|
|
NEONMAP1(vrecps_v, arm_neon_vrecps, Add1ArgType),
|
|
|
|
NEONMAP1(vrecpsq_v, arm_neon_vrecps, Add1ArgType),
|
|
|
|
NEONMAP2(vrhadd_v, arm_neon_vrhaddu, arm_neon_vrhadds, Add1ArgType | UnsignedAlts),
|
|
|
|
NEONMAP2(vrhaddq_v, arm_neon_vrhaddu, arm_neon_vrhadds, Add1ArgType | UnsignedAlts),
|
2014-09-05 21:50:34 +08:00
|
|
|
NEONMAP1(vrnd_v, arm_neon_vrintz, Add1ArgType),
|
|
|
|
NEONMAP1(vrnda_v, arm_neon_vrinta, Add1ArgType),
|
|
|
|
NEONMAP1(vrndaq_v, arm_neon_vrinta, Add1ArgType),
|
|
|
|
NEONMAP1(vrndm_v, arm_neon_vrintm, Add1ArgType),
|
|
|
|
NEONMAP1(vrndmq_v, arm_neon_vrintm, Add1ArgType),
|
|
|
|
NEONMAP1(vrndn_v, arm_neon_vrintn, Add1ArgType),
|
|
|
|
NEONMAP1(vrndnq_v, arm_neon_vrintn, Add1ArgType),
|
|
|
|
NEONMAP1(vrndp_v, arm_neon_vrintp, Add1ArgType),
|
|
|
|
NEONMAP1(vrndpq_v, arm_neon_vrintp, Add1ArgType),
|
|
|
|
NEONMAP1(vrndq_v, arm_neon_vrintz, Add1ArgType),
|
|
|
|
NEONMAP1(vrndx_v, arm_neon_vrintx, Add1ArgType),
|
|
|
|
NEONMAP1(vrndxq_v, arm_neon_vrintx, Add1ArgType),
|
2014-02-21 19:57:24 +08:00
|
|
|
NEONMAP2(vrshl_v, arm_neon_vrshiftu, arm_neon_vrshifts, Add1ArgType | UnsignedAlts),
|
|
|
|
NEONMAP2(vrshlq_v, arm_neon_vrshiftu, arm_neon_vrshifts, Add1ArgType | UnsignedAlts),
|
2014-07-29 17:25:17 +08:00
|
|
|
NEONMAP2(vrshr_n_v, arm_neon_vrshiftu, arm_neon_vrshifts, UnsignedAlts),
|
|
|
|
NEONMAP2(vrshrq_n_v, arm_neon_vrshiftu, arm_neon_vrshifts, UnsignedAlts),
|
2014-02-21 19:57:24 +08:00
|
|
|
NEONMAP2(vrsqrte_v, arm_neon_vrsqrte, arm_neon_vrsqrte, 0),
|
|
|
|
NEONMAP2(vrsqrteq_v, arm_neon_vrsqrte, arm_neon_vrsqrte, 0),
|
|
|
|
NEONMAP1(vrsqrts_v, arm_neon_vrsqrts, Add1ArgType),
|
|
|
|
NEONMAP1(vrsqrtsq_v, arm_neon_vrsqrts, Add1ArgType),
|
|
|
|
NEONMAP1(vrsubhn_v, arm_neon_vrsubhn, Add1ArgType),
|
|
|
|
NEONMAP1(vsha1su0q_v, arm_neon_sha1su0, 0),
|
|
|
|
NEONMAP1(vsha1su1q_v, arm_neon_sha1su1, 0),
|
|
|
|
NEONMAP1(vsha256h2q_v, arm_neon_sha256h2, 0),
|
|
|
|
NEONMAP1(vsha256hq_v, arm_neon_sha256h, 0),
|
|
|
|
NEONMAP1(vsha256su0q_v, arm_neon_sha256su0, 0),
|
|
|
|
NEONMAP1(vsha256su1q_v, arm_neon_sha256su1, 0),
|
|
|
|
NEONMAP0(vshl_n_v),
|
|
|
|
NEONMAP2(vshl_v, arm_neon_vshiftu, arm_neon_vshifts, Add1ArgType | UnsignedAlts),
|
|
|
|
NEONMAP0(vshll_n_v),
|
|
|
|
NEONMAP0(vshlq_n_v),
|
|
|
|
NEONMAP2(vshlq_v, arm_neon_vshiftu, arm_neon_vshifts, Add1ArgType | UnsignedAlts),
|
|
|
|
NEONMAP0(vshr_n_v),
|
|
|
|
NEONMAP0(vshrn_n_v),
|
|
|
|
NEONMAP0(vshrq_n_v),
|
|
|
|
NEONMAP1(vst1_v, arm_neon_vst1, 0),
|
|
|
|
NEONMAP1(vst1q_v, arm_neon_vst1, 0),
|
|
|
|
NEONMAP1(vst2_lane_v, arm_neon_vst2lane, 0),
|
|
|
|
NEONMAP1(vst2_v, arm_neon_vst2, 0),
|
|
|
|
NEONMAP1(vst2q_lane_v, arm_neon_vst2lane, 0),
|
|
|
|
NEONMAP1(vst2q_v, arm_neon_vst2, 0),
|
|
|
|
NEONMAP1(vst3_lane_v, arm_neon_vst3lane, 0),
|
|
|
|
NEONMAP1(vst3_v, arm_neon_vst3, 0),
|
|
|
|
NEONMAP1(vst3q_lane_v, arm_neon_vst3lane, 0),
|
|
|
|
NEONMAP1(vst3q_v, arm_neon_vst3, 0),
|
|
|
|
NEONMAP1(vst4_lane_v, arm_neon_vst4lane, 0),
|
|
|
|
NEONMAP1(vst4_v, arm_neon_vst4, 0),
|
|
|
|
NEONMAP1(vst4q_lane_v, arm_neon_vst4lane, 0),
|
|
|
|
NEONMAP1(vst4q_v, arm_neon_vst4, 0),
|
|
|
|
NEONMAP0(vsubhn_v),
|
|
|
|
NEONMAP0(vtrn_v),
|
|
|
|
NEONMAP0(vtrnq_v),
|
|
|
|
NEONMAP0(vtst_v),
|
|
|
|
NEONMAP0(vtstq_v),
|
|
|
|
NEONMAP0(vuzp_v),
|
|
|
|
NEONMAP0(vuzpq_v),
|
|
|
|
NEONMAP0(vzip_v),
|
2014-02-21 20:16:59 +08:00
|
|
|
NEONMAP0(vzipq_v)
|
2014-02-21 19:57:24 +08:00
|
|
|
};
|
|
|
|
|
2014-05-24 20:52:07 +08:00
|
|
|
static NeonIntrinsicInfo AArch64SIMDIntrinsicMap[] = {
|
|
|
|
NEONMAP1(vabs_v, aarch64_neon_abs, 0),
|
|
|
|
NEONMAP1(vabsq_v, aarch64_neon_abs, 0),
|
2014-03-29 23:09:45 +08:00
|
|
|
NEONMAP0(vaddhn_v),
|
2014-05-24 20:52:07 +08:00
|
|
|
NEONMAP1(vaesdq_v, aarch64_crypto_aesd, 0),
|
|
|
|
NEONMAP1(vaeseq_v, aarch64_crypto_aese, 0),
|
|
|
|
NEONMAP1(vaesimcq_v, aarch64_crypto_aesimc, 0),
|
|
|
|
NEONMAP1(vaesmcq_v, aarch64_crypto_aesmc, 0),
|
|
|
|
NEONMAP1(vcage_v, aarch64_neon_facge, 0),
|
|
|
|
NEONMAP1(vcageq_v, aarch64_neon_facge, 0),
|
|
|
|
NEONMAP1(vcagt_v, aarch64_neon_facgt, 0),
|
|
|
|
NEONMAP1(vcagtq_v, aarch64_neon_facgt, 0),
|
|
|
|
NEONMAP1(vcale_v, aarch64_neon_facge, 0),
|
|
|
|
NEONMAP1(vcaleq_v, aarch64_neon_facge, 0),
|
|
|
|
NEONMAP1(vcalt_v, aarch64_neon_facgt, 0),
|
|
|
|
NEONMAP1(vcaltq_v, aarch64_neon_facgt, 0),
|
|
|
|
NEONMAP1(vcls_v, aarch64_neon_cls, Add1ArgType),
|
|
|
|
NEONMAP1(vclsq_v, aarch64_neon_cls, Add1ArgType),
|
2014-03-29 23:09:45 +08:00
|
|
|
NEONMAP1(vclz_v, ctlz, Add1ArgType),
|
|
|
|
NEONMAP1(vclzq_v, ctlz, Add1ArgType),
|
|
|
|
NEONMAP1(vcnt_v, ctpop, Add1ArgType),
|
|
|
|
NEONMAP1(vcntq_v, ctpop, Add1ArgType),
|
2015-08-22 07:34:20 +08:00
|
|
|
NEONMAP1(vcvt_f16_f32, aarch64_neon_vcvtfp2hf, 0),
|
2014-05-24 20:52:07 +08:00
|
|
|
NEONMAP1(vcvt_f32_f16, aarch64_neon_vcvthf2fp, 0),
|
2014-03-29 23:09:45 +08:00
|
|
|
NEONMAP0(vcvt_f32_v),
|
2014-05-24 20:52:07 +08:00
|
|
|
NEONMAP2(vcvt_n_f32_v, aarch64_neon_vcvtfxu2fp, aarch64_neon_vcvtfxs2fp, 0),
|
|
|
|
NEONMAP2(vcvt_n_f64_v, aarch64_neon_vcvtfxu2fp, aarch64_neon_vcvtfxs2fp, 0),
|
|
|
|
NEONMAP1(vcvt_n_s32_v, aarch64_neon_vcvtfp2fxs, 0),
|
|
|
|
NEONMAP1(vcvt_n_s64_v, aarch64_neon_vcvtfp2fxs, 0),
|
|
|
|
NEONMAP1(vcvt_n_u32_v, aarch64_neon_vcvtfp2fxu, 0),
|
|
|
|
NEONMAP1(vcvt_n_u64_v, aarch64_neon_vcvtfp2fxu, 0),
|
2014-03-29 23:09:45 +08:00
|
|
|
NEONMAP0(vcvtq_f32_v),
|
2014-05-24 20:52:07 +08:00
|
|
|
NEONMAP2(vcvtq_n_f32_v, aarch64_neon_vcvtfxu2fp, aarch64_neon_vcvtfxs2fp, 0),
|
|
|
|
NEONMAP2(vcvtq_n_f64_v, aarch64_neon_vcvtfxu2fp, aarch64_neon_vcvtfxs2fp, 0),
|
|
|
|
NEONMAP1(vcvtq_n_s32_v, aarch64_neon_vcvtfp2fxs, 0),
|
|
|
|
NEONMAP1(vcvtq_n_s64_v, aarch64_neon_vcvtfp2fxs, 0),
|
|
|
|
NEONMAP1(vcvtq_n_u32_v, aarch64_neon_vcvtfp2fxu, 0),
|
|
|
|
NEONMAP1(vcvtq_n_u64_v, aarch64_neon_vcvtfp2fxu, 0),
|
|
|
|
NEONMAP1(vcvtx_f32_v, aarch64_neon_fcvtxn, AddRetType | Add1ArgType),
|
2014-03-29 23:09:45 +08:00
|
|
|
NEONMAP0(vext_v),
|
|
|
|
NEONMAP0(vextq_v),
|
|
|
|
NEONMAP0(vfma_v),
|
|
|
|
NEONMAP0(vfmaq_v),
|
2014-05-24 20:52:07 +08:00
|
|
|
NEONMAP2(vhadd_v, aarch64_neon_uhadd, aarch64_neon_shadd, Add1ArgType | UnsignedAlts),
|
|
|
|
NEONMAP2(vhaddq_v, aarch64_neon_uhadd, aarch64_neon_shadd, Add1ArgType | UnsignedAlts),
|
|
|
|
NEONMAP2(vhsub_v, aarch64_neon_uhsub, aarch64_neon_shsub, Add1ArgType | UnsignedAlts),
|
|
|
|
NEONMAP2(vhsubq_v, aarch64_neon_uhsub, aarch64_neon_shsub, Add1ArgType | UnsignedAlts),
|
2014-03-29 23:09:45 +08:00
|
|
|
NEONMAP0(vmovl_v),
|
|
|
|
NEONMAP0(vmovn_v),
|
2014-05-24 20:52:07 +08:00
|
|
|
NEONMAP1(vmul_v, aarch64_neon_pmul, Add1ArgType),
|
|
|
|
NEONMAP1(vmulq_v, aarch64_neon_pmul, Add1ArgType),
|
|
|
|
NEONMAP1(vpadd_v, aarch64_neon_addp, Add1ArgType),
|
|
|
|
NEONMAP2(vpaddl_v, aarch64_neon_uaddlp, aarch64_neon_saddlp, UnsignedAlts),
|
|
|
|
NEONMAP2(vpaddlq_v, aarch64_neon_uaddlp, aarch64_neon_saddlp, UnsignedAlts),
|
|
|
|
NEONMAP1(vpaddq_v, aarch64_neon_addp, Add1ArgType),
|
|
|
|
NEONMAP1(vqabs_v, aarch64_neon_sqabs, Add1ArgType),
|
|
|
|
NEONMAP1(vqabsq_v, aarch64_neon_sqabs, Add1ArgType),
|
|
|
|
NEONMAP2(vqadd_v, aarch64_neon_uqadd, aarch64_neon_sqadd, Add1ArgType | UnsignedAlts),
|
|
|
|
NEONMAP2(vqaddq_v, aarch64_neon_uqadd, aarch64_neon_sqadd, Add1ArgType | UnsignedAlts),
|
|
|
|
NEONMAP2(vqdmlal_v, aarch64_neon_sqdmull, aarch64_neon_sqadd, 0),
|
|
|
|
NEONMAP2(vqdmlsl_v, aarch64_neon_sqdmull, aarch64_neon_sqsub, 0),
|
|
|
|
NEONMAP1(vqdmulh_v, aarch64_neon_sqdmulh, Add1ArgType),
|
|
|
|
NEONMAP1(vqdmulhq_v, aarch64_neon_sqdmulh, Add1ArgType),
|
|
|
|
NEONMAP1(vqdmull_v, aarch64_neon_sqdmull, Add1ArgType),
|
|
|
|
NEONMAP2(vqmovn_v, aarch64_neon_uqxtn, aarch64_neon_sqxtn, Add1ArgType | UnsignedAlts),
|
|
|
|
NEONMAP1(vqmovun_v, aarch64_neon_sqxtun, Add1ArgType),
|
|
|
|
NEONMAP1(vqneg_v, aarch64_neon_sqneg, Add1ArgType),
|
|
|
|
NEONMAP1(vqnegq_v, aarch64_neon_sqneg, Add1ArgType),
|
|
|
|
NEONMAP1(vqrdmulh_v, aarch64_neon_sqrdmulh, Add1ArgType),
|
|
|
|
NEONMAP1(vqrdmulhq_v, aarch64_neon_sqrdmulh, Add1ArgType),
|
|
|
|
NEONMAP2(vqrshl_v, aarch64_neon_uqrshl, aarch64_neon_sqrshl, Add1ArgType | UnsignedAlts),
|
|
|
|
NEONMAP2(vqrshlq_v, aarch64_neon_uqrshl, aarch64_neon_sqrshl, Add1ArgType | UnsignedAlts),
|
|
|
|
NEONMAP2(vqshl_n_v, aarch64_neon_uqshl, aarch64_neon_sqshl, UnsignedAlts),
|
|
|
|
NEONMAP2(vqshl_v, aarch64_neon_uqshl, aarch64_neon_sqshl, Add1ArgType | UnsignedAlts),
|
|
|
|
NEONMAP2(vqshlq_n_v, aarch64_neon_uqshl, aarch64_neon_sqshl,UnsignedAlts),
|
|
|
|
NEONMAP2(vqshlq_v, aarch64_neon_uqshl, aarch64_neon_sqshl, Add1ArgType | UnsignedAlts),
|
2014-07-29 17:25:17 +08:00
|
|
|
NEONMAP1(vqshlu_n_v, aarch64_neon_sqshlu, 0),
|
|
|
|
NEONMAP1(vqshluq_n_v, aarch64_neon_sqshlu, 0),
|
2014-05-24 20:52:07 +08:00
|
|
|
NEONMAP2(vqsub_v, aarch64_neon_uqsub, aarch64_neon_sqsub, Add1ArgType | UnsignedAlts),
|
|
|
|
NEONMAP2(vqsubq_v, aarch64_neon_uqsub, aarch64_neon_sqsub, Add1ArgType | UnsignedAlts),
|
|
|
|
NEONMAP1(vraddhn_v, aarch64_neon_raddhn, Add1ArgType),
|
|
|
|
NEONMAP2(vrecpe_v, aarch64_neon_frecpe, aarch64_neon_urecpe, 0),
|
|
|
|
NEONMAP2(vrecpeq_v, aarch64_neon_frecpe, aarch64_neon_urecpe, 0),
|
|
|
|
NEONMAP1(vrecps_v, aarch64_neon_frecps, Add1ArgType),
|
|
|
|
NEONMAP1(vrecpsq_v, aarch64_neon_frecps, Add1ArgType),
|
|
|
|
NEONMAP2(vrhadd_v, aarch64_neon_urhadd, aarch64_neon_srhadd, Add1ArgType | UnsignedAlts),
|
|
|
|
NEONMAP2(vrhaddq_v, aarch64_neon_urhadd, aarch64_neon_srhadd, Add1ArgType | UnsignedAlts),
|
|
|
|
NEONMAP2(vrshl_v, aarch64_neon_urshl, aarch64_neon_srshl, Add1ArgType | UnsignedAlts),
|
|
|
|
NEONMAP2(vrshlq_v, aarch64_neon_urshl, aarch64_neon_srshl, Add1ArgType | UnsignedAlts),
|
2014-07-29 17:25:17 +08:00
|
|
|
NEONMAP2(vrshr_n_v, aarch64_neon_urshl, aarch64_neon_srshl, UnsignedAlts),
|
|
|
|
NEONMAP2(vrshrq_n_v, aarch64_neon_urshl, aarch64_neon_srshl, UnsignedAlts),
|
2014-05-24 20:52:07 +08:00
|
|
|
NEONMAP2(vrsqrte_v, aarch64_neon_frsqrte, aarch64_neon_ursqrte, 0),
|
|
|
|
NEONMAP2(vrsqrteq_v, aarch64_neon_frsqrte, aarch64_neon_ursqrte, 0),
|
|
|
|
NEONMAP1(vrsqrts_v, aarch64_neon_frsqrts, Add1ArgType),
|
|
|
|
NEONMAP1(vrsqrtsq_v, aarch64_neon_frsqrts, Add1ArgType),
|
|
|
|
NEONMAP1(vrsubhn_v, aarch64_neon_rsubhn, Add1ArgType),
|
|
|
|
NEONMAP1(vsha1su0q_v, aarch64_crypto_sha1su0, 0),
|
|
|
|
NEONMAP1(vsha1su1q_v, aarch64_crypto_sha1su1, 0),
|
|
|
|
NEONMAP1(vsha256h2q_v, aarch64_crypto_sha256h2, 0),
|
|
|
|
NEONMAP1(vsha256hq_v, aarch64_crypto_sha256h, 0),
|
|
|
|
NEONMAP1(vsha256su0q_v, aarch64_crypto_sha256su0, 0),
|
|
|
|
NEONMAP1(vsha256su1q_v, aarch64_crypto_sha256su1, 0),
|
2014-03-29 23:09:45 +08:00
|
|
|
NEONMAP0(vshl_n_v),
|
2014-05-24 20:52:07 +08:00
|
|
|
NEONMAP2(vshl_v, aarch64_neon_ushl, aarch64_neon_sshl, Add1ArgType | UnsignedAlts),
|
2014-03-29 23:09:45 +08:00
|
|
|
NEONMAP0(vshll_n_v),
|
|
|
|
NEONMAP0(vshlq_n_v),
|
2014-05-24 20:52:07 +08:00
|
|
|
NEONMAP2(vshlq_v, aarch64_neon_ushl, aarch64_neon_sshl, Add1ArgType | UnsignedAlts),
|
2014-03-29 23:09:45 +08:00
|
|
|
NEONMAP0(vshr_n_v),
|
|
|
|
NEONMAP0(vshrn_n_v),
|
|
|
|
NEONMAP0(vshrq_n_v),
|
|
|
|
NEONMAP0(vsubhn_v),
|
|
|
|
NEONMAP0(vtst_v),
|
|
|
|
NEONMAP0(vtstq_v),
|
|
|
|
};
|
|
|
|
|
2014-05-24 20:52:07 +08:00
|
|
|
static NeonIntrinsicInfo AArch64SISDIntrinsicMap[] = {
|
|
|
|
NEONMAP1(vabdd_f64, aarch64_sisd_fabd, Add1ArgType),
|
|
|
|
NEONMAP1(vabds_f32, aarch64_sisd_fabd, Add1ArgType),
|
|
|
|
NEONMAP1(vabsd_s64, aarch64_neon_abs, Add1ArgType),
|
|
|
|
NEONMAP1(vaddlv_s32, aarch64_neon_saddlv, AddRetType | Add1ArgType),
|
|
|
|
NEONMAP1(vaddlv_u32, aarch64_neon_uaddlv, AddRetType | Add1ArgType),
|
|
|
|
NEONMAP1(vaddlvq_s32, aarch64_neon_saddlv, AddRetType | Add1ArgType),
|
|
|
|
NEONMAP1(vaddlvq_u32, aarch64_neon_uaddlv, AddRetType | Add1ArgType),
|
|
|
|
NEONMAP1(vaddv_f32, aarch64_neon_faddv, AddRetType | Add1ArgType),
|
|
|
|
NEONMAP1(vaddv_s32, aarch64_neon_saddv, AddRetType | Add1ArgType),
|
|
|
|
NEONMAP1(vaddv_u32, aarch64_neon_uaddv, AddRetType | Add1ArgType),
|
|
|
|
NEONMAP1(vaddvq_f32, aarch64_neon_faddv, AddRetType | Add1ArgType),
|
|
|
|
NEONMAP1(vaddvq_f64, aarch64_neon_faddv, AddRetType | Add1ArgType),
|
|
|
|
NEONMAP1(vaddvq_s32, aarch64_neon_saddv, AddRetType | Add1ArgType),
|
|
|
|
NEONMAP1(vaddvq_s64, aarch64_neon_saddv, AddRetType | Add1ArgType),
|
|
|
|
NEONMAP1(vaddvq_u32, aarch64_neon_uaddv, AddRetType | Add1ArgType),
|
|
|
|
NEONMAP1(vaddvq_u64, aarch64_neon_uaddv, AddRetType | Add1ArgType),
|
|
|
|
NEONMAP1(vcaged_f64, aarch64_neon_facge, AddRetType | Add1ArgType),
|
|
|
|
NEONMAP1(vcages_f32, aarch64_neon_facge, AddRetType | Add1ArgType),
|
|
|
|
NEONMAP1(vcagtd_f64, aarch64_neon_facgt, AddRetType | Add1ArgType),
|
|
|
|
NEONMAP1(vcagts_f32, aarch64_neon_facgt, AddRetType | Add1ArgType),
|
|
|
|
NEONMAP1(vcaled_f64, aarch64_neon_facge, AddRetType | Add1ArgType),
|
|
|
|
NEONMAP1(vcales_f32, aarch64_neon_facge, AddRetType | Add1ArgType),
|
|
|
|
NEONMAP1(vcaltd_f64, aarch64_neon_facgt, AddRetType | Add1ArgType),
|
|
|
|
NEONMAP1(vcalts_f32, aarch64_neon_facgt, AddRetType | Add1ArgType),
|
|
|
|
NEONMAP1(vcvtad_s64_f64, aarch64_neon_fcvtas, AddRetType | Add1ArgType),
|
|
|
|
NEONMAP1(vcvtad_u64_f64, aarch64_neon_fcvtau, AddRetType | Add1ArgType),
|
|
|
|
NEONMAP1(vcvtas_s32_f32, aarch64_neon_fcvtas, AddRetType | Add1ArgType),
|
|
|
|
NEONMAP1(vcvtas_u32_f32, aarch64_neon_fcvtau, AddRetType | Add1ArgType),
|
|
|
|
NEONMAP1(vcvtd_n_f64_s64, aarch64_neon_vcvtfxs2fp, AddRetType | Add1ArgType),
|
|
|
|
NEONMAP1(vcvtd_n_f64_u64, aarch64_neon_vcvtfxu2fp, AddRetType | Add1ArgType),
|
|
|
|
NEONMAP1(vcvtd_n_s64_f64, aarch64_neon_vcvtfp2fxs, AddRetType | Add1ArgType),
|
|
|
|
NEONMAP1(vcvtd_n_u64_f64, aarch64_neon_vcvtfp2fxu, AddRetType | Add1ArgType),
|
|
|
|
NEONMAP1(vcvtmd_s64_f64, aarch64_neon_fcvtms, AddRetType | Add1ArgType),
|
|
|
|
NEONMAP1(vcvtmd_u64_f64, aarch64_neon_fcvtmu, AddRetType | Add1ArgType),
|
|
|
|
NEONMAP1(vcvtms_s32_f32, aarch64_neon_fcvtms, AddRetType | Add1ArgType),
|
|
|
|
NEONMAP1(vcvtms_u32_f32, aarch64_neon_fcvtmu, AddRetType | Add1ArgType),
|
|
|
|
NEONMAP1(vcvtnd_s64_f64, aarch64_neon_fcvtns, AddRetType | Add1ArgType),
|
|
|
|
NEONMAP1(vcvtnd_u64_f64, aarch64_neon_fcvtnu, AddRetType | Add1ArgType),
|
|
|
|
NEONMAP1(vcvtns_s32_f32, aarch64_neon_fcvtns, AddRetType | Add1ArgType),
|
|
|
|
NEONMAP1(vcvtns_u32_f32, aarch64_neon_fcvtnu, AddRetType | Add1ArgType),
|
|
|
|
NEONMAP1(vcvtpd_s64_f64, aarch64_neon_fcvtps, AddRetType | Add1ArgType),
|
|
|
|
NEONMAP1(vcvtpd_u64_f64, aarch64_neon_fcvtpu, AddRetType | Add1ArgType),
|
|
|
|
NEONMAP1(vcvtps_s32_f32, aarch64_neon_fcvtps, AddRetType | Add1ArgType),
|
|
|
|
NEONMAP1(vcvtps_u32_f32, aarch64_neon_fcvtpu, AddRetType | Add1ArgType),
|
|
|
|
NEONMAP1(vcvts_n_f32_s32, aarch64_neon_vcvtfxs2fp, AddRetType | Add1ArgType),
|
|
|
|
NEONMAP1(vcvts_n_f32_u32, aarch64_neon_vcvtfxu2fp, AddRetType | Add1ArgType),
|
|
|
|
NEONMAP1(vcvts_n_s32_f32, aarch64_neon_vcvtfp2fxs, AddRetType | Add1ArgType),
|
|
|
|
NEONMAP1(vcvts_n_u32_f32, aarch64_neon_vcvtfp2fxu, AddRetType | Add1ArgType),
|
|
|
|
NEONMAP1(vcvtxd_f32_f64, aarch64_sisd_fcvtxn, 0),
|
|
|
|
NEONMAP1(vmaxnmv_f32, aarch64_neon_fmaxnmv, AddRetType | Add1ArgType),
|
|
|
|
NEONMAP1(vmaxnmvq_f32, aarch64_neon_fmaxnmv, AddRetType | Add1ArgType),
|
|
|
|
NEONMAP1(vmaxnmvq_f64, aarch64_neon_fmaxnmv, AddRetType | Add1ArgType),
|
|
|
|
NEONMAP1(vmaxv_f32, aarch64_neon_fmaxv, AddRetType | Add1ArgType),
|
|
|
|
NEONMAP1(vmaxv_s32, aarch64_neon_smaxv, AddRetType | Add1ArgType),
|
|
|
|
NEONMAP1(vmaxv_u32, aarch64_neon_umaxv, AddRetType | Add1ArgType),
|
|
|
|
NEONMAP1(vmaxvq_f32, aarch64_neon_fmaxv, AddRetType | Add1ArgType),
|
|
|
|
NEONMAP1(vmaxvq_f64, aarch64_neon_fmaxv, AddRetType | Add1ArgType),
|
|
|
|
NEONMAP1(vmaxvq_s32, aarch64_neon_smaxv, AddRetType | Add1ArgType),
|
|
|
|
NEONMAP1(vmaxvq_u32, aarch64_neon_umaxv, AddRetType | Add1ArgType),
|
|
|
|
NEONMAP1(vminnmv_f32, aarch64_neon_fminnmv, AddRetType | Add1ArgType),
|
|
|
|
NEONMAP1(vminnmvq_f32, aarch64_neon_fminnmv, AddRetType | Add1ArgType),
|
|
|
|
NEONMAP1(vminnmvq_f64, aarch64_neon_fminnmv, AddRetType | Add1ArgType),
|
|
|
|
NEONMAP1(vminv_f32, aarch64_neon_fminv, AddRetType | Add1ArgType),
|
|
|
|
NEONMAP1(vminv_s32, aarch64_neon_sminv, AddRetType | Add1ArgType),
|
|
|
|
NEONMAP1(vminv_u32, aarch64_neon_uminv, AddRetType | Add1ArgType),
|
|
|
|
NEONMAP1(vminvq_f32, aarch64_neon_fminv, AddRetType | Add1ArgType),
|
|
|
|
NEONMAP1(vminvq_f64, aarch64_neon_fminv, AddRetType | Add1ArgType),
|
|
|
|
NEONMAP1(vminvq_s32, aarch64_neon_sminv, AddRetType | Add1ArgType),
|
|
|
|
NEONMAP1(vminvq_u32, aarch64_neon_uminv, AddRetType | Add1ArgType),
|
|
|
|
NEONMAP1(vmull_p64, aarch64_neon_pmull64, 0),
|
|
|
|
NEONMAP1(vmulxd_f64, aarch64_neon_fmulx, Add1ArgType),
|
|
|
|
NEONMAP1(vmulxs_f32, aarch64_neon_fmulx, Add1ArgType),
|
|
|
|
NEONMAP1(vpaddd_s64, aarch64_neon_uaddv, AddRetType | Add1ArgType),
|
|
|
|
NEONMAP1(vpaddd_u64, aarch64_neon_uaddv, AddRetType | Add1ArgType),
|
|
|
|
NEONMAP1(vpmaxnmqd_f64, aarch64_neon_fmaxnmv, AddRetType | Add1ArgType),
|
|
|
|
NEONMAP1(vpmaxnms_f32, aarch64_neon_fmaxnmv, AddRetType | Add1ArgType),
|
|
|
|
NEONMAP1(vpmaxqd_f64, aarch64_neon_fmaxv, AddRetType | Add1ArgType),
|
|
|
|
NEONMAP1(vpmaxs_f32, aarch64_neon_fmaxv, AddRetType | Add1ArgType),
|
|
|
|
NEONMAP1(vpminnmqd_f64, aarch64_neon_fminnmv, AddRetType | Add1ArgType),
|
|
|
|
NEONMAP1(vpminnms_f32, aarch64_neon_fminnmv, AddRetType | Add1ArgType),
|
|
|
|
NEONMAP1(vpminqd_f64, aarch64_neon_fminv, AddRetType | Add1ArgType),
|
|
|
|
NEONMAP1(vpmins_f32, aarch64_neon_fminv, AddRetType | Add1ArgType),
|
|
|
|
NEONMAP1(vqabsb_s8, aarch64_neon_sqabs, Vectorize1ArgType | Use64BitVectors),
|
|
|
|
NEONMAP1(vqabsd_s64, aarch64_neon_sqabs, Add1ArgType),
|
|
|
|
NEONMAP1(vqabsh_s16, aarch64_neon_sqabs, Vectorize1ArgType | Use64BitVectors),
|
|
|
|
NEONMAP1(vqabss_s32, aarch64_neon_sqabs, Add1ArgType),
|
|
|
|
NEONMAP1(vqaddb_s8, aarch64_neon_sqadd, Vectorize1ArgType | Use64BitVectors),
|
|
|
|
NEONMAP1(vqaddb_u8, aarch64_neon_uqadd, Vectorize1ArgType | Use64BitVectors),
|
|
|
|
NEONMAP1(vqaddd_s64, aarch64_neon_sqadd, Add1ArgType),
|
|
|
|
NEONMAP1(vqaddd_u64, aarch64_neon_uqadd, Add1ArgType),
|
|
|
|
NEONMAP1(vqaddh_s16, aarch64_neon_sqadd, Vectorize1ArgType | Use64BitVectors),
|
|
|
|
NEONMAP1(vqaddh_u16, aarch64_neon_uqadd, Vectorize1ArgType | Use64BitVectors),
|
|
|
|
NEONMAP1(vqadds_s32, aarch64_neon_sqadd, Add1ArgType),
|
|
|
|
NEONMAP1(vqadds_u32, aarch64_neon_uqadd, Add1ArgType),
|
|
|
|
NEONMAP1(vqdmulhh_s16, aarch64_neon_sqdmulh, Vectorize1ArgType | Use64BitVectors),
|
|
|
|
NEONMAP1(vqdmulhs_s32, aarch64_neon_sqdmulh, Add1ArgType),
|
|
|
|
NEONMAP1(vqdmullh_s16, aarch64_neon_sqdmull, VectorRet | Use128BitVectors),
|
|
|
|
NEONMAP1(vqdmulls_s32, aarch64_neon_sqdmulls_scalar, 0),
|
|
|
|
NEONMAP1(vqmovnd_s64, aarch64_neon_scalar_sqxtn, AddRetType | Add1ArgType),
|
|
|
|
NEONMAP1(vqmovnd_u64, aarch64_neon_scalar_uqxtn, AddRetType | Add1ArgType),
|
|
|
|
NEONMAP1(vqmovnh_s16, aarch64_neon_sqxtn, VectorRet | Use64BitVectors),
|
|
|
|
NEONMAP1(vqmovnh_u16, aarch64_neon_uqxtn, VectorRet | Use64BitVectors),
|
|
|
|
NEONMAP1(vqmovns_s32, aarch64_neon_sqxtn, VectorRet | Use64BitVectors),
|
|
|
|
NEONMAP1(vqmovns_u32, aarch64_neon_uqxtn, VectorRet | Use64BitVectors),
|
|
|
|
NEONMAP1(vqmovund_s64, aarch64_neon_scalar_sqxtun, AddRetType | Add1ArgType),
|
|
|
|
NEONMAP1(vqmovunh_s16, aarch64_neon_sqxtun, VectorRet | Use64BitVectors),
|
|
|
|
NEONMAP1(vqmovuns_s32, aarch64_neon_sqxtun, VectorRet | Use64BitVectors),
|
|
|
|
NEONMAP1(vqnegb_s8, aarch64_neon_sqneg, Vectorize1ArgType | Use64BitVectors),
|
|
|
|
NEONMAP1(vqnegd_s64, aarch64_neon_sqneg, Add1ArgType),
|
|
|
|
NEONMAP1(vqnegh_s16, aarch64_neon_sqneg, Vectorize1ArgType | Use64BitVectors),
|
|
|
|
NEONMAP1(vqnegs_s32, aarch64_neon_sqneg, Add1ArgType),
|
|
|
|
NEONMAP1(vqrdmulhh_s16, aarch64_neon_sqrdmulh, Vectorize1ArgType | Use64BitVectors),
|
|
|
|
NEONMAP1(vqrdmulhs_s32, aarch64_neon_sqrdmulh, Add1ArgType),
|
|
|
|
NEONMAP1(vqrshlb_s8, aarch64_neon_sqrshl, Vectorize1ArgType | Use64BitVectors),
|
|
|
|
NEONMAP1(vqrshlb_u8, aarch64_neon_uqrshl, Vectorize1ArgType | Use64BitVectors),
|
|
|
|
NEONMAP1(vqrshld_s64, aarch64_neon_sqrshl, Add1ArgType),
|
|
|
|
NEONMAP1(vqrshld_u64, aarch64_neon_uqrshl, Add1ArgType),
|
|
|
|
NEONMAP1(vqrshlh_s16, aarch64_neon_sqrshl, Vectorize1ArgType | Use64BitVectors),
|
|
|
|
NEONMAP1(vqrshlh_u16, aarch64_neon_uqrshl, Vectorize1ArgType | Use64BitVectors),
|
|
|
|
NEONMAP1(vqrshls_s32, aarch64_neon_sqrshl, Add1ArgType),
|
|
|
|
NEONMAP1(vqrshls_u32, aarch64_neon_uqrshl, Add1ArgType),
|
|
|
|
NEONMAP1(vqrshrnd_n_s64, aarch64_neon_sqrshrn, AddRetType),
|
|
|
|
NEONMAP1(vqrshrnd_n_u64, aarch64_neon_uqrshrn, AddRetType),
|
|
|
|
NEONMAP1(vqrshrnh_n_s16, aarch64_neon_sqrshrn, VectorRet | Use64BitVectors),
|
|
|
|
NEONMAP1(vqrshrnh_n_u16, aarch64_neon_uqrshrn, VectorRet | Use64BitVectors),
|
|
|
|
NEONMAP1(vqrshrns_n_s32, aarch64_neon_sqrshrn, VectorRet | Use64BitVectors),
|
|
|
|
NEONMAP1(vqrshrns_n_u32, aarch64_neon_uqrshrn, VectorRet | Use64BitVectors),
|
|
|
|
NEONMAP1(vqrshrund_n_s64, aarch64_neon_sqrshrun, AddRetType),
|
|
|
|
NEONMAP1(vqrshrunh_n_s16, aarch64_neon_sqrshrun, VectorRet | Use64BitVectors),
|
|
|
|
NEONMAP1(vqrshruns_n_s32, aarch64_neon_sqrshrun, VectorRet | Use64BitVectors),
|
|
|
|
NEONMAP1(vqshlb_n_s8, aarch64_neon_sqshl, Vectorize1ArgType | Use64BitVectors),
|
|
|
|
NEONMAP1(vqshlb_n_u8, aarch64_neon_uqshl, Vectorize1ArgType | Use64BitVectors),
|
|
|
|
NEONMAP1(vqshlb_s8, aarch64_neon_sqshl, Vectorize1ArgType | Use64BitVectors),
|
|
|
|
NEONMAP1(vqshlb_u8, aarch64_neon_uqshl, Vectorize1ArgType | Use64BitVectors),
|
|
|
|
NEONMAP1(vqshld_s64, aarch64_neon_sqshl, Add1ArgType),
|
|
|
|
NEONMAP1(vqshld_u64, aarch64_neon_uqshl, Add1ArgType),
|
|
|
|
NEONMAP1(vqshlh_n_s16, aarch64_neon_sqshl, Vectorize1ArgType | Use64BitVectors),
|
|
|
|
NEONMAP1(vqshlh_n_u16, aarch64_neon_uqshl, Vectorize1ArgType | Use64BitVectors),
|
|
|
|
NEONMAP1(vqshlh_s16, aarch64_neon_sqshl, Vectorize1ArgType | Use64BitVectors),
|
|
|
|
NEONMAP1(vqshlh_u16, aarch64_neon_uqshl, Vectorize1ArgType | Use64BitVectors),
|
|
|
|
NEONMAP1(vqshls_n_s32, aarch64_neon_sqshl, Add1ArgType),
|
|
|
|
NEONMAP1(vqshls_n_u32, aarch64_neon_uqshl, Add1ArgType),
|
|
|
|
NEONMAP1(vqshls_s32, aarch64_neon_sqshl, Add1ArgType),
|
|
|
|
NEONMAP1(vqshls_u32, aarch64_neon_uqshl, Add1ArgType),
|
|
|
|
NEONMAP1(vqshlub_n_s8, aarch64_neon_sqshlu, Vectorize1ArgType | Use64BitVectors),
|
|
|
|
NEONMAP1(vqshluh_n_s16, aarch64_neon_sqshlu, Vectorize1ArgType | Use64BitVectors),
|
|
|
|
NEONMAP1(vqshlus_n_s32, aarch64_neon_sqshlu, Add1ArgType),
|
|
|
|
NEONMAP1(vqshrnd_n_s64, aarch64_neon_sqshrn, AddRetType),
|
|
|
|
NEONMAP1(vqshrnd_n_u64, aarch64_neon_uqshrn, AddRetType),
|
|
|
|
NEONMAP1(vqshrnh_n_s16, aarch64_neon_sqshrn, VectorRet | Use64BitVectors),
|
|
|
|
NEONMAP1(vqshrnh_n_u16, aarch64_neon_uqshrn, VectorRet | Use64BitVectors),
|
|
|
|
NEONMAP1(vqshrns_n_s32, aarch64_neon_sqshrn, VectorRet | Use64BitVectors),
|
|
|
|
NEONMAP1(vqshrns_n_u32, aarch64_neon_uqshrn, VectorRet | Use64BitVectors),
|
|
|
|
NEONMAP1(vqshrund_n_s64, aarch64_neon_sqshrun, AddRetType),
|
|
|
|
NEONMAP1(vqshrunh_n_s16, aarch64_neon_sqshrun, VectorRet | Use64BitVectors),
|
|
|
|
NEONMAP1(vqshruns_n_s32, aarch64_neon_sqshrun, VectorRet | Use64BitVectors),
|
|
|
|
NEONMAP1(vqsubb_s8, aarch64_neon_sqsub, Vectorize1ArgType | Use64BitVectors),
|
|
|
|
NEONMAP1(vqsubb_u8, aarch64_neon_uqsub, Vectorize1ArgType | Use64BitVectors),
|
|
|
|
NEONMAP1(vqsubd_s64, aarch64_neon_sqsub, Add1ArgType),
|
|
|
|
NEONMAP1(vqsubd_u64, aarch64_neon_uqsub, Add1ArgType),
|
|
|
|
NEONMAP1(vqsubh_s16, aarch64_neon_sqsub, Vectorize1ArgType | Use64BitVectors),
|
|
|
|
NEONMAP1(vqsubh_u16, aarch64_neon_uqsub, Vectorize1ArgType | Use64BitVectors),
|
|
|
|
NEONMAP1(vqsubs_s32, aarch64_neon_sqsub, Add1ArgType),
|
|
|
|
NEONMAP1(vqsubs_u32, aarch64_neon_uqsub, Add1ArgType),
|
|
|
|
NEONMAP1(vrecped_f64, aarch64_neon_frecpe, Add1ArgType),
|
|
|
|
NEONMAP1(vrecpes_f32, aarch64_neon_frecpe, Add1ArgType),
|
|
|
|
NEONMAP1(vrecpxd_f64, aarch64_neon_frecpx, Add1ArgType),
|
|
|
|
NEONMAP1(vrecpxs_f32, aarch64_neon_frecpx, Add1ArgType),
|
|
|
|
NEONMAP1(vrshld_s64, aarch64_neon_srshl, Add1ArgType),
|
|
|
|
NEONMAP1(vrshld_u64, aarch64_neon_urshl, Add1ArgType),
|
|
|
|
NEONMAP1(vrsqrted_f64, aarch64_neon_frsqrte, Add1ArgType),
|
|
|
|
NEONMAP1(vrsqrtes_f32, aarch64_neon_frsqrte, Add1ArgType),
|
|
|
|
NEONMAP1(vrsqrtsd_f64, aarch64_neon_frsqrts, Add1ArgType),
|
|
|
|
NEONMAP1(vrsqrtss_f32, aarch64_neon_frsqrts, Add1ArgType),
|
|
|
|
NEONMAP1(vsha1cq_u32, aarch64_crypto_sha1c, 0),
|
|
|
|
NEONMAP1(vsha1h_u32, aarch64_crypto_sha1h, 0),
|
|
|
|
NEONMAP1(vsha1mq_u32, aarch64_crypto_sha1m, 0),
|
|
|
|
NEONMAP1(vsha1pq_u32, aarch64_crypto_sha1p, 0),
|
|
|
|
NEONMAP1(vshld_s64, aarch64_neon_sshl, Add1ArgType),
|
|
|
|
NEONMAP1(vshld_u64, aarch64_neon_ushl, Add1ArgType),
|
|
|
|
NEONMAP1(vslid_n_s64, aarch64_neon_vsli, Vectorize1ArgType),
|
|
|
|
NEONMAP1(vslid_n_u64, aarch64_neon_vsli, Vectorize1ArgType),
|
|
|
|
NEONMAP1(vsqaddb_u8, aarch64_neon_usqadd, Vectorize1ArgType | Use64BitVectors),
|
|
|
|
NEONMAP1(vsqaddd_u64, aarch64_neon_usqadd, Add1ArgType),
|
|
|
|
NEONMAP1(vsqaddh_u16, aarch64_neon_usqadd, Vectorize1ArgType | Use64BitVectors),
|
|
|
|
NEONMAP1(vsqadds_u32, aarch64_neon_usqadd, Add1ArgType),
|
|
|
|
NEONMAP1(vsrid_n_s64, aarch64_neon_vsri, Vectorize1ArgType),
|
|
|
|
NEONMAP1(vsrid_n_u64, aarch64_neon_vsri, Vectorize1ArgType),
|
|
|
|
NEONMAP1(vuqaddb_s8, aarch64_neon_suqadd, Vectorize1ArgType | Use64BitVectors),
|
|
|
|
NEONMAP1(vuqaddd_s64, aarch64_neon_suqadd, Add1ArgType),
|
|
|
|
NEONMAP1(vuqaddh_s16, aarch64_neon_suqadd, Vectorize1ArgType | Use64BitVectors),
|
|
|
|
NEONMAP1(vuqadds_s32, aarch64_neon_suqadd, Add1ArgType),
|
2014-03-29 23:09:45 +08:00
|
|
|
};
|
|
|
|
|
2014-02-21 19:57:24 +08:00
|
|
|
#undef NEONMAP0
|
|
|
|
#undef NEONMAP1
|
|
|
|
#undef NEONMAP2
|
|
|
|
|
|
|
|
static bool NEONSIMDIntrinsicsProvenSorted = false;
|
|
|
|
|
2014-05-24 20:52:07 +08:00
|
|
|
static bool AArch64SIMDIntrinsicsProvenSorted = false;
|
|
|
|
static bool AArch64SISDIntrinsicsProvenSorted = false;
|
2014-03-29 23:09:45 +08:00
|
|
|
|
|
|
|
|
2014-02-21 19:57:24 +08:00
|
|
|
static const NeonIntrinsicInfo *
|
2014-06-29 07:22:23 +08:00
|
|
|
findNeonIntrinsicInMap(ArrayRef<NeonIntrinsicInfo> IntrinsicMap,
|
2014-02-21 19:57:24 +08:00
|
|
|
unsigned BuiltinID, bool &MapProvenSorted) {
|
|
|
|
|
|
|
|
#ifndef NDEBUG
|
|
|
|
if (!MapProvenSorted) {
|
|
|
|
// FIXME: use std::is_sorted once C++11 is allowed
|
|
|
|
for (unsigned i = 0; i < IntrinsicMap.size() - 1; ++i)
|
|
|
|
assert(IntrinsicMap[i].BuiltinID <= IntrinsicMap[i + 1].BuiltinID);
|
|
|
|
MapProvenSorted = true;
|
|
|
|
}
|
|
|
|
#endif
|
|
|
|
|
|
|
|
const NeonIntrinsicInfo *Builtin =
|
|
|
|
std::lower_bound(IntrinsicMap.begin(), IntrinsicMap.end(), BuiltinID);
|
|
|
|
|
|
|
|
if (Builtin != IntrinsicMap.end() && Builtin->BuiltinID == BuiltinID)
|
|
|
|
return Builtin;
|
|
|
|
|
2014-05-21 13:09:00 +08:00
|
|
|
return nullptr;
|
2014-02-21 19:57:24 +08:00
|
|
|
}
|
|
|
|
|
|
|
|
Function *CodeGenFunction::LookupNeonLLVMIntrinsic(unsigned IntrinsicID,
|
|
|
|
unsigned Modifier,
|
|
|
|
llvm::Type *ArgType,
|
|
|
|
const CallExpr *E) {
|
2014-03-29 23:09:45 +08:00
|
|
|
int VectorSize = 0;
|
|
|
|
if (Modifier & Use64BitVectors)
|
|
|
|
VectorSize = 64;
|
|
|
|
else if (Modifier & Use128BitVectors)
|
|
|
|
VectorSize = 128;
|
|
|
|
|
2014-02-21 19:57:24 +08:00
|
|
|
// Return type.
|
|
|
|
SmallVector<llvm::Type *, 3> Tys;
|
|
|
|
if (Modifier & AddRetType) {
|
2015-02-26 01:36:15 +08:00
|
|
|
llvm::Type *Ty = ConvertType(E->getCallReturnType(getContext()));
|
2014-02-21 19:57:24 +08:00
|
|
|
if (Modifier & VectorizeRetType)
|
2014-03-29 23:09:45 +08:00
|
|
|
Ty = llvm::VectorType::get(
|
|
|
|
Ty, VectorSize ? VectorSize / Ty->getPrimitiveSizeInBits() : 1);
|
2014-02-21 19:57:24 +08:00
|
|
|
|
|
|
|
Tys.push_back(Ty);
|
|
|
|
}
|
|
|
|
|
|
|
|
// Arguments.
|
2014-03-29 23:09:45 +08:00
|
|
|
if (Modifier & VectorizeArgTypes) {
|
|
|
|
int Elts = VectorSize ? VectorSize / ArgType->getPrimitiveSizeInBits() : 1;
|
|
|
|
ArgType = llvm::VectorType::get(ArgType, Elts);
|
|
|
|
}
|
2014-02-21 19:57:24 +08:00
|
|
|
|
|
|
|
if (Modifier & (Add1ArgType | Add2ArgTypes))
|
|
|
|
Tys.push_back(ArgType);
|
|
|
|
|
|
|
|
if (Modifier & Add2ArgTypes)
|
|
|
|
Tys.push_back(ArgType);
|
|
|
|
|
|
|
|
if (Modifier & InventFloatType)
|
|
|
|
Tys.push_back(FloatTy);
|
|
|
|
|
|
|
|
return CGM.getIntrinsic(IntrinsicID, Tys);
|
|
|
|
}
|
|
|
|
|
2014-03-29 23:09:45 +08:00
|
|
|
static Value *EmitCommonNeonSISDBuiltinExpr(CodeGenFunction &CGF,
|
|
|
|
const NeonIntrinsicInfo &SISDInfo,
|
|
|
|
SmallVectorImpl<Value *> &Ops,
|
|
|
|
const CallExpr *E) {
|
2014-03-31 23:47:09 +08:00
|
|
|
unsigned BuiltinID = SISDInfo.BuiltinID;
|
2014-03-29 23:09:45 +08:00
|
|
|
unsigned int Int = SISDInfo.LLVMIntrinsic;
|
|
|
|
unsigned Modifier = SISDInfo.TypeModifier;
|
|
|
|
const char *s = SISDInfo.NameHint;
|
|
|
|
|
2014-03-31 23:47:09 +08:00
|
|
|
switch (BuiltinID) {
|
|
|
|
case NEON::BI__builtin_neon_vcled_s64:
|
|
|
|
case NEON::BI__builtin_neon_vcled_u64:
|
|
|
|
case NEON::BI__builtin_neon_vcles_f32:
|
|
|
|
case NEON::BI__builtin_neon_vcled_f64:
|
|
|
|
case NEON::BI__builtin_neon_vcltd_s64:
|
|
|
|
case NEON::BI__builtin_neon_vcltd_u64:
|
|
|
|
case NEON::BI__builtin_neon_vclts_f32:
|
|
|
|
case NEON::BI__builtin_neon_vcltd_f64:
|
|
|
|
case NEON::BI__builtin_neon_vcales_f32:
|
|
|
|
case NEON::BI__builtin_neon_vcaled_f64:
|
|
|
|
case NEON::BI__builtin_neon_vcalts_f32:
|
|
|
|
case NEON::BI__builtin_neon_vcaltd_f64:
|
|
|
|
// Only one direction of comparisons actually exist, cmle is actually a cmge
|
|
|
|
// with swapped operands. The table gives us the right intrinsic but we
|
|
|
|
// still need to do the swap.
|
|
|
|
std::swap(Ops[0], Ops[1]);
|
|
|
|
break;
|
|
|
|
}
|
|
|
|
|
2014-03-29 23:09:45 +08:00
|
|
|
assert(Int && "Generic code assumes a valid intrinsic");
|
|
|
|
|
|
|
|
// Determine the type(s) of this overloaded AArch64 intrinsic.
|
|
|
|
const Expr *Arg = E->getArg(0);
|
|
|
|
llvm::Type *ArgTy = CGF.ConvertType(Arg->getType());
|
|
|
|
Function *F = CGF.LookupNeonLLVMIntrinsic(Int, Modifier, ArgTy, E);
|
|
|
|
|
|
|
|
int j = 0;
|
2014-05-31 08:22:12 +08:00
|
|
|
ConstantInt *C0 = ConstantInt::get(CGF.SizeTy, 0);
|
2014-03-29 23:09:45 +08:00
|
|
|
for (Function::const_arg_iterator ai = F->arg_begin(), ae = F->arg_end();
|
|
|
|
ai != ae; ++ai, ++j) {
|
|
|
|
llvm::Type *ArgTy = ai->getType();
|
|
|
|
if (Ops[j]->getType()->getPrimitiveSizeInBits() ==
|
|
|
|
ArgTy->getPrimitiveSizeInBits())
|
|
|
|
continue;
|
|
|
|
|
|
|
|
assert(ArgTy->isVectorTy() && !Ops[j]->getType()->isVectorTy());
|
|
|
|
// The constant argument to an _n_ intrinsic always has Int32Ty, so truncate
|
|
|
|
// it before inserting.
|
|
|
|
Ops[j] =
|
|
|
|
CGF.Builder.CreateTruncOrBitCast(Ops[j], ArgTy->getVectorElementType());
|
|
|
|
Ops[j] =
|
|
|
|
CGF.Builder.CreateInsertElement(UndefValue::get(ArgTy), Ops[j], C0);
|
|
|
|
}
|
|
|
|
|
|
|
|
Value *Result = CGF.EmitNeonCall(F, Ops, s);
|
|
|
|
llvm::Type *ResultType = CGF.ConvertType(E->getType());
|
|
|
|
if (ResultType->getPrimitiveSizeInBits() <
|
|
|
|
Result->getType()->getPrimitiveSizeInBits())
|
|
|
|
return CGF.Builder.CreateExtractElement(Result, C0);
|
|
|
|
|
|
|
|
return CGF.Builder.CreateBitCast(Result, ResultType, s);
|
|
|
|
}
|
2014-02-21 19:57:24 +08:00
|
|
|
|
|
|
|
Value *CodeGenFunction::EmitCommonNeonBuiltinExpr(
|
|
|
|
unsigned BuiltinID, unsigned LLVMIntrinsic, unsigned AltLLVMIntrinsic,
|
|
|
|
const char *NameHint, unsigned Modifier, const CallExpr *E,
|
Compute and preserve alignment more faithfully in IR-generation.
Introduce an Address type to bundle a pointer value with an
alignment. Introduce APIs on CGBuilderTy to work with Address
values. Change core APIs on CGF/CGM to traffic in Address where
appropriate. Require alignments to be non-zero. Update a ton
of code to compute and propagate alignment information.
As part of this, I've promoted CGBuiltin's EmitPointerWithAlignment
helper function to CGF and made use of it in a number of places in
the expression emitter.
The end result is that we should now be significantly more correct
when performing operations on objects that are locally known to
be under-aligned. Since alignment is not reliably tracked in the
type system, there are inherent limits to this, but at least we
are no longer confused by standard operations like derived-to-base
conversions and array-to-pointer decay. I've also fixed a large
number of bugs where we were applying the complete-object alignment
to a pointer instead of the non-virtual alignment, although most of
these were hidden by the very conservative approach we took with
member alignment.
Also, because IRGen now reliably asserts on zero alignments, we
should no longer be subject to an absurd but frustrating recurring
bug where an incomplete type would report a zero alignment and then
we'd naively do a alignmentAtOffset on it and emit code using an
alignment equal to the largest power-of-two factor of the offset.
We should also now be emitting much more aggressive alignment
attributes in the presence of over-alignment. In particular,
field access now uses alignmentAtOffset instead of min.
Several times in this patch, I had to change the existing
code-generation pattern in order to more effectively use
the Address APIs. For the most part, this seems to be a strict
improvement, like doing pointer arithmetic with GEPs instead of
ptrtoint. That said, I've tried very hard to not change semantics,
but it is likely that I've failed in a few places, for which I
apologize.
ABIArgInfo now always carries the assumed alignment of indirect and
indirect byval arguments. In order to cut down on what was already
a dauntingly large patch, I changed the code to never set align
attributes in the IR on non-byval indirect arguments. That is,
we still generate code which assumes that indirect arguments have
the given alignment, but we don't express this information to the
backend except where it's semantically required (i.e. on byvals).
This is likely a minor regression for those targets that did provide
this information, but it'll be trivial to add it back in a later
patch.
I partially punted on applying this work to CGBuiltin. Please
do not add more uses of the CreateDefaultAligned{Load,Store}
APIs; they will be going away eventually.
llvm-svn: 246985
2015-09-08 16:05:57 +08:00
|
|
|
SmallVectorImpl<llvm::Value *> &Ops, Address PtrOp0, Address PtrOp1) {
|
2014-01-30 22:48:01 +08:00
|
|
|
// Get the last argument, which specifies the vector type.
|
2014-02-21 19:57:24 +08:00
|
|
|
llvm::APSInt NeonTypeConst;
|
2014-01-30 22:48:01 +08:00
|
|
|
const Expr *Arg = E->getArg(E->getNumArgs() - 1);
|
2014-02-21 19:57:24 +08:00
|
|
|
if (!Arg->isIntegerConstantExpr(NeonTypeConst, getContext()))
|
2014-05-21 13:09:00 +08:00
|
|
|
return nullptr;
|
2014-01-30 22:48:01 +08:00
|
|
|
|
|
|
|
// Determine the type of this overloaded NEON intrinsic.
|
2014-02-21 19:57:24 +08:00
|
|
|
NeonTypeFlags Type(NeonTypeConst.getZExtValue());
|
2014-01-30 22:48:01 +08:00
|
|
|
bool Usgn = Type.isUnsigned();
|
2014-01-31 18:46:41 +08:00
|
|
|
bool Quad = Type.isQuad();
|
2014-01-30 22:48:01 +08:00
|
|
|
|
|
|
|
llvm::VectorType *VTy = GetNeonType(this, Type);
|
|
|
|
llvm::Type *Ty = VTy;
|
|
|
|
if (!Ty)
|
2014-05-21 13:09:00 +08:00
|
|
|
return nullptr;
|
2014-01-30 22:48:01 +08:00
|
|
|
|
Compute and preserve alignment more faithfully in IR-generation.
Introduce an Address type to bundle a pointer value with an
alignment. Introduce APIs on CGBuilderTy to work with Address
values. Change core APIs on CGF/CGM to traffic in Address where
appropriate. Require alignments to be non-zero. Update a ton
of code to compute and propagate alignment information.
As part of this, I've promoted CGBuiltin's EmitPointerWithAlignment
helper function to CGF and made use of it in a number of places in
the expression emitter.
The end result is that we should now be significantly more correct
when performing operations on objects that are locally known to
be under-aligned. Since alignment is not reliably tracked in the
type system, there are inherent limits to this, but at least we
are no longer confused by standard operations like derived-to-base
conversions and array-to-pointer decay. I've also fixed a large
number of bugs where we were applying the complete-object alignment
to a pointer instead of the non-virtual alignment, although most of
these were hidden by the very conservative approach we took with
member alignment.
Also, because IRGen now reliably asserts on zero alignments, we
should no longer be subject to an absurd but frustrating recurring
bug where an incomplete type would report a zero alignment and then
we'd naively do a alignmentAtOffset on it and emit code using an
alignment equal to the largest power-of-two factor of the offset.
We should also now be emitting much more aggressive alignment
attributes in the presence of over-alignment. In particular,
field access now uses alignmentAtOffset instead of min.
Several times in this patch, I had to change the existing
code-generation pattern in order to more effectively use
the Address APIs. For the most part, this seems to be a strict
improvement, like doing pointer arithmetic with GEPs instead of
ptrtoint. That said, I've tried very hard to not change semantics,
but it is likely that I've failed in a few places, for which I
apologize.
ABIArgInfo now always carries the assumed alignment of indirect and
indirect byval arguments. In order to cut down on what was already
a dauntingly large patch, I changed the code to never set align
attributes in the IR on non-byval indirect arguments. That is,
we still generate code which assumes that indirect arguments have
the given alignment, but we don't express this information to the
backend except where it's semantically required (i.e. on byvals).
This is likely a minor regression for those targets that did provide
this information, but it'll be trivial to add it back in a later
patch.
I partially punted on applying this work to CGBuiltin. Please
do not add more uses of the CreateDefaultAligned{Load,Store}
APIs; they will be going away eventually.
llvm-svn: 246985
2015-09-08 16:05:57 +08:00
|
|
|
auto getAlignmentValue32 = [&](Address addr) -> Value* {
|
|
|
|
return Builder.getInt32(addr.getAlignment().getQuantity());
|
|
|
|
};
|
|
|
|
|
2014-02-21 19:57:24 +08:00
|
|
|
unsigned Int = LLVMIntrinsic;
|
|
|
|
if ((Modifier & UnsignedAlts) && !Usgn)
|
|
|
|
Int = AltLLVMIntrinsic;
|
|
|
|
|
2014-01-30 22:47:57 +08:00
|
|
|
switch (BuiltinID) {
|
|
|
|
default: break;
|
2014-02-13 18:44:17 +08:00
|
|
|
case NEON::BI__builtin_neon_vabs_v:
|
|
|
|
case NEON::BI__builtin_neon_vabsq_v:
|
|
|
|
if (VTy->getElementType()->isFloatingPointTy())
|
|
|
|
return EmitNeonCall(CGM.getIntrinsic(Intrinsic::fabs, Ty), Ops, "vabs");
|
2014-02-21 19:57:24 +08:00
|
|
|
return EmitNeonCall(CGM.getIntrinsic(LLVMIntrinsic, Ty), Ops, "vabs");
|
2014-01-31 18:46:36 +08:00
|
|
|
case NEON::BI__builtin_neon_vaddhn_v: {
|
|
|
|
llvm::VectorType *SrcTy =
|
|
|
|
llvm::VectorType::getExtendedElementVectorType(VTy);
|
|
|
|
|
|
|
|
// %sum = add <4 x i32> %lhs, %rhs
|
|
|
|
Ops[0] = Builder.CreateBitCast(Ops[0], SrcTy);
|
|
|
|
Ops[1] = Builder.CreateBitCast(Ops[1], SrcTy);
|
|
|
|
Ops[0] = Builder.CreateAdd(Ops[0], Ops[1], "vaddhn");
|
|
|
|
|
|
|
|
// %high = lshr <4 x i32> %sum, <i32 16, i32 16, i32 16, i32 16>
|
2015-07-28 23:40:11 +08:00
|
|
|
Constant *ShiftAmt =
|
|
|
|
ConstantInt::get(SrcTy, SrcTy->getScalarSizeInBits() / 2);
|
2014-01-31 18:46:36 +08:00
|
|
|
Ops[0] = Builder.CreateLShr(Ops[0], ShiftAmt, "vaddhn");
|
|
|
|
|
|
|
|
// %res = trunc <4 x i32> %high to <4 x i16>
|
|
|
|
return Builder.CreateTrunc(Ops[0], VTy, "vaddhn");
|
|
|
|
}
|
2014-02-04 22:55:52 +08:00
|
|
|
case NEON::BI__builtin_neon_vcale_v:
|
|
|
|
case NEON::BI__builtin_neon_vcaleq_v:
|
|
|
|
case NEON::BI__builtin_neon_vcalt_v:
|
|
|
|
case NEON::BI__builtin_neon_vcaltq_v:
|
|
|
|
std::swap(Ops[0], Ops[1]);
|
2014-02-21 19:57:24 +08:00
|
|
|
case NEON::BI__builtin_neon_vcage_v:
|
|
|
|
case NEON::BI__builtin_neon_vcageq_v:
|
2014-02-04 22:55:52 +08:00
|
|
|
case NEON::BI__builtin_neon_vcagt_v:
|
|
|
|
case NEON::BI__builtin_neon_vcagtq_v: {
|
|
|
|
llvm::Type *VecFlt = llvm::VectorType::get(
|
|
|
|
VTy->getScalarSizeInBits() == 32 ? FloatTy : DoubleTy,
|
|
|
|
VTy->getNumElements());
|
|
|
|
llvm::Type *Tys[] = { VTy, VecFlt };
|
2014-02-21 19:57:24 +08:00
|
|
|
Function *F = CGM.getIntrinsic(LLVMIntrinsic, Tys);
|
|
|
|
return EmitNeonCall(F, Ops, NameHint);
|
2014-01-31 18:46:49 +08:00
|
|
|
}
|
|
|
|
case NEON::BI__builtin_neon_vclz_v:
|
2014-02-21 19:57:24 +08:00
|
|
|
case NEON::BI__builtin_neon_vclzq_v:
|
|
|
|
// We generate target-independent intrinsic, which needs a second argument
|
2014-01-31 18:46:49 +08:00
|
|
|
// for whether or not clz of zero is undefined; on ARM it isn't.
|
|
|
|
Ops.push_back(Builder.getInt1(getTarget().isCLZForZeroUndef()));
|
2014-02-21 19:57:24 +08:00
|
|
|
break;
|
2014-01-31 18:46:52 +08:00
|
|
|
case NEON::BI__builtin_neon_vcvt_f32_v:
|
|
|
|
case NEON::BI__builtin_neon_vcvtq_f32_v:
|
|
|
|
Ops[0] = Builder.CreateBitCast(Ops[0], Ty);
|
|
|
|
Ty = GetNeonType(this, NeonTypeFlags(NeonTypeFlags::Float32, false, Quad));
|
|
|
|
return Usgn ? Builder.CreateUIToFP(Ops[0], Ty, "vcvt")
|
|
|
|
: Builder.CreateSIToFP(Ops[0], Ty, "vcvt");
|
2014-01-31 18:46:41 +08:00
|
|
|
case NEON::BI__builtin_neon_vcvt_n_f32_v:
|
2014-03-29 23:09:45 +08:00
|
|
|
case NEON::BI__builtin_neon_vcvt_n_f64_v:
|
|
|
|
case NEON::BI__builtin_neon_vcvtq_n_f32_v:
|
|
|
|
case NEON::BI__builtin_neon_vcvtq_n_f64_v: {
|
2015-08-25 07:41:31 +08:00
|
|
|
llvm::Type *Tys[2] = { GetFloatNeonType(this, Type), Ty };
|
2014-02-21 19:57:24 +08:00
|
|
|
Int = Usgn ? LLVMIntrinsic : AltLLVMIntrinsic;
|
2014-01-31 18:46:41 +08:00
|
|
|
Function *F = CGM.getIntrinsic(Int, Tys);
|
|
|
|
return EmitNeonCall(F, Ops, "vcvt_n");
|
|
|
|
}
|
|
|
|
case NEON::BI__builtin_neon_vcvt_n_s32_v:
|
|
|
|
case NEON::BI__builtin_neon_vcvt_n_u32_v:
|
2014-02-11 19:27:44 +08:00
|
|
|
case NEON::BI__builtin_neon_vcvt_n_s64_v:
|
|
|
|
case NEON::BI__builtin_neon_vcvt_n_u64_v:
|
2014-01-31 18:46:41 +08:00
|
|
|
case NEON::BI__builtin_neon_vcvtq_n_s32_v:
|
2014-02-11 19:27:44 +08:00
|
|
|
case NEON::BI__builtin_neon_vcvtq_n_u32_v:
|
|
|
|
case NEON::BI__builtin_neon_vcvtq_n_s64_v:
|
|
|
|
case NEON::BI__builtin_neon_vcvtq_n_u64_v: {
|
2015-08-25 07:41:31 +08:00
|
|
|
llvm::Type *Tys[2] = { Ty, GetFloatNeonType(this, Type) };
|
2014-02-21 19:57:24 +08:00
|
|
|
Function *F = CGM.getIntrinsic(LLVMIntrinsic, Tys);
|
2014-01-31 18:46:41 +08:00
|
|
|
return EmitNeonCall(F, Ops, "vcvt_n");
|
|
|
|
}
|
2014-01-31 18:46:49 +08:00
|
|
|
case NEON::BI__builtin_neon_vcvt_s32_v:
|
|
|
|
case NEON::BI__builtin_neon_vcvt_u32_v:
|
2014-02-19 18:37:13 +08:00
|
|
|
case NEON::BI__builtin_neon_vcvt_s64_v:
|
|
|
|
case NEON::BI__builtin_neon_vcvt_u64_v:
|
2014-01-31 18:46:49 +08:00
|
|
|
case NEON::BI__builtin_neon_vcvtq_s32_v:
|
2014-02-19 18:37:13 +08:00
|
|
|
case NEON::BI__builtin_neon_vcvtq_u32_v:
|
|
|
|
case NEON::BI__builtin_neon_vcvtq_s64_v:
|
|
|
|
case NEON::BI__builtin_neon_vcvtq_u64_v: {
|
2015-08-25 07:41:31 +08:00
|
|
|
Ops[0] = Builder.CreateBitCast(Ops[0], GetFloatNeonType(this, Type));
|
2014-01-31 18:46:52 +08:00
|
|
|
return Usgn ? Builder.CreateFPToUI(Ops[0], Ty, "vcvt")
|
2014-01-31 18:46:49 +08:00
|
|
|
: Builder.CreateFPToSI(Ops[0], Ty, "vcvt");
|
|
|
|
}
|
2014-02-19 18:37:13 +08:00
|
|
|
case NEON::BI__builtin_neon_vcvta_s32_v:
|
|
|
|
case NEON::BI__builtin_neon_vcvta_s64_v:
|
|
|
|
case NEON::BI__builtin_neon_vcvta_u32_v:
|
|
|
|
case NEON::BI__builtin_neon_vcvta_u64_v:
|
|
|
|
case NEON::BI__builtin_neon_vcvtaq_s32_v:
|
|
|
|
case NEON::BI__builtin_neon_vcvtaq_s64_v:
|
|
|
|
case NEON::BI__builtin_neon_vcvtaq_u32_v:
|
2014-02-21 19:57:24 +08:00
|
|
|
case NEON::BI__builtin_neon_vcvtaq_u64_v:
|
2014-02-19 18:37:13 +08:00
|
|
|
case NEON::BI__builtin_neon_vcvtn_s32_v:
|
|
|
|
case NEON::BI__builtin_neon_vcvtn_s64_v:
|
|
|
|
case NEON::BI__builtin_neon_vcvtn_u32_v:
|
|
|
|
case NEON::BI__builtin_neon_vcvtn_u64_v:
|
|
|
|
case NEON::BI__builtin_neon_vcvtnq_s32_v:
|
|
|
|
case NEON::BI__builtin_neon_vcvtnq_s64_v:
|
|
|
|
case NEON::BI__builtin_neon_vcvtnq_u32_v:
|
2014-02-21 19:57:24 +08:00
|
|
|
case NEON::BI__builtin_neon_vcvtnq_u64_v:
|
2014-02-19 18:37:13 +08:00
|
|
|
case NEON::BI__builtin_neon_vcvtp_s32_v:
|
|
|
|
case NEON::BI__builtin_neon_vcvtp_s64_v:
|
|
|
|
case NEON::BI__builtin_neon_vcvtp_u32_v:
|
|
|
|
case NEON::BI__builtin_neon_vcvtp_u64_v:
|
|
|
|
case NEON::BI__builtin_neon_vcvtpq_s32_v:
|
|
|
|
case NEON::BI__builtin_neon_vcvtpq_s64_v:
|
|
|
|
case NEON::BI__builtin_neon_vcvtpq_u32_v:
|
2014-02-21 19:57:24 +08:00
|
|
|
case NEON::BI__builtin_neon_vcvtpq_u64_v:
|
2014-02-19 18:37:13 +08:00
|
|
|
case NEON::BI__builtin_neon_vcvtm_s32_v:
|
|
|
|
case NEON::BI__builtin_neon_vcvtm_s64_v:
|
|
|
|
case NEON::BI__builtin_neon_vcvtm_u32_v:
|
|
|
|
case NEON::BI__builtin_neon_vcvtm_u64_v:
|
|
|
|
case NEON::BI__builtin_neon_vcvtmq_s32_v:
|
|
|
|
case NEON::BI__builtin_neon_vcvtmq_s64_v:
|
|
|
|
case NEON::BI__builtin_neon_vcvtmq_u32_v:
|
|
|
|
case NEON::BI__builtin_neon_vcvtmq_u64_v: {
|
2015-08-25 07:41:31 +08:00
|
|
|
llvm::Type *Tys[2] = { Ty, GetFloatNeonType(this, Type) };
|
2014-02-21 19:57:24 +08:00
|
|
|
return EmitNeonCall(CGM.getIntrinsic(LLVMIntrinsic, Tys), Ops, NameHint);
|
2014-02-19 18:37:13 +08:00
|
|
|
}
|
2014-01-30 22:48:01 +08:00
|
|
|
case NEON::BI__builtin_neon_vext_v:
|
|
|
|
case NEON::BI__builtin_neon_vextq_v: {
|
|
|
|
int CV = cast<ConstantInt>(Ops[2])->getSExtValue();
|
|
|
|
SmallVector<Constant*, 16> Indices;
|
|
|
|
for (unsigned i = 0, e = VTy->getNumElements(); i != e; ++i)
|
|
|
|
Indices.push_back(ConstantInt::get(Int32Ty, i+CV));
|
|
|
|
|
|
|
|
Ops[0] = Builder.CreateBitCast(Ops[0], Ty);
|
|
|
|
Ops[1] = Builder.CreateBitCast(Ops[1], Ty);
|
|
|
|
Value *SV = llvm::ConstantVector::get(Indices);
|
|
|
|
return Builder.CreateShuffleVector(Ops[0], Ops[1], SV, "vext");
|
|
|
|
}
|
|
|
|
case NEON::BI__builtin_neon_vfma_v:
|
|
|
|
case NEON::BI__builtin_neon_vfmaq_v: {
|
|
|
|
Value *F = CGM.getIntrinsic(Intrinsic::fma, Ty);
|
|
|
|
Ops[0] = Builder.CreateBitCast(Ops[0], Ty);
|
|
|
|
Ops[1] = Builder.CreateBitCast(Ops[1], Ty);
|
|
|
|
Ops[2] = Builder.CreateBitCast(Ops[2], Ty);
|
|
|
|
|
|
|
|
// NEON intrinsic puts accumulator first, unlike the LLVM fma.
|
2015-05-19 06:14:03 +08:00
|
|
|
return Builder.CreateCall(F, {Ops[1], Ops[2], Ops[0]});
|
2014-01-30 22:48:01 +08:00
|
|
|
}
|
2014-01-31 18:46:45 +08:00
|
|
|
case NEON::BI__builtin_neon_vld1_v:
|
2015-09-30 18:56:56 +08:00
|
|
|
case NEON::BI__builtin_neon_vld1q_v: {
|
|
|
|
llvm::Type *Tys[] = {Ty, Int8PtrTy};
|
Compute and preserve alignment more faithfully in IR-generation.
Introduce an Address type to bundle a pointer value with an
alignment. Introduce APIs on CGBuilderTy to work with Address
values. Change core APIs on CGF/CGM to traffic in Address where
appropriate. Require alignments to be non-zero. Update a ton
of code to compute and propagate alignment information.
As part of this, I've promoted CGBuiltin's EmitPointerWithAlignment
helper function to CGF and made use of it in a number of places in
the expression emitter.
The end result is that we should now be significantly more correct
when performing operations on objects that are locally known to
be under-aligned. Since alignment is not reliably tracked in the
type system, there are inherent limits to this, but at least we
are no longer confused by standard operations like derived-to-base
conversions and array-to-pointer decay. I've also fixed a large
number of bugs where we were applying the complete-object alignment
to a pointer instead of the non-virtual alignment, although most of
these were hidden by the very conservative approach we took with
member alignment.
Also, because IRGen now reliably asserts on zero alignments, we
should no longer be subject to an absurd but frustrating recurring
bug where an incomplete type would report a zero alignment and then
we'd naively do a alignmentAtOffset on it and emit code using an
alignment equal to the largest power-of-two factor of the offset.
We should also now be emitting much more aggressive alignment
attributes in the presence of over-alignment. In particular,
field access now uses alignmentAtOffset instead of min.
Several times in this patch, I had to change the existing
code-generation pattern in order to more effectively use
the Address APIs. For the most part, this seems to be a strict
improvement, like doing pointer arithmetic with GEPs instead of
ptrtoint. That said, I've tried very hard to not change semantics,
but it is likely that I've failed in a few places, for which I
apologize.
ABIArgInfo now always carries the assumed alignment of indirect and
indirect byval arguments. In order to cut down on what was already
a dauntingly large patch, I changed the code to never set align
attributes in the IR on non-byval indirect arguments. That is,
we still generate code which assumes that indirect arguments have
the given alignment, but we don't express this information to the
backend except where it's semantically required (i.e. on byvals).
This is likely a minor regression for those targets that did provide
this information, but it'll be trivial to add it back in a later
patch.
I partially punted on applying this work to CGBuiltin. Please
do not add more uses of the CreateDefaultAligned{Load,Store}
APIs; they will be going away eventually.
llvm-svn: 246985
2015-09-08 16:05:57 +08:00
|
|
|
Ops.push_back(getAlignmentValue32(PtrOp0));
|
2015-09-30 18:56:56 +08:00
|
|
|
return EmitNeonCall(CGM.getIntrinsic(LLVMIntrinsic, Tys), Ops, "vld1");
|
|
|
|
}
|
2014-01-31 18:46:45 +08:00
|
|
|
case NEON::BI__builtin_neon_vld2_v:
|
2014-02-21 19:57:24 +08:00
|
|
|
case NEON::BI__builtin_neon_vld2q_v:
|
2014-01-31 18:46:45 +08:00
|
|
|
case NEON::BI__builtin_neon_vld3_v:
|
2014-02-21 19:57:24 +08:00
|
|
|
case NEON::BI__builtin_neon_vld3q_v:
|
2014-01-31 18:46:45 +08:00
|
|
|
case NEON::BI__builtin_neon_vld4_v:
|
|
|
|
case NEON::BI__builtin_neon_vld4q_v: {
|
2015-09-30 18:56:56 +08:00
|
|
|
llvm::Type *Tys[] = {Ty, Int8PtrTy};
|
|
|
|
Function *F = CGM.getIntrinsic(LLVMIntrinsic, Tys);
|
Compute and preserve alignment more faithfully in IR-generation.
Introduce an Address type to bundle a pointer value with an
alignment. Introduce APIs on CGBuilderTy to work with Address
values. Change core APIs on CGF/CGM to traffic in Address where
appropriate. Require alignments to be non-zero. Update a ton
of code to compute and propagate alignment information.
As part of this, I've promoted CGBuiltin's EmitPointerWithAlignment
helper function to CGF and made use of it in a number of places in
the expression emitter.
The end result is that we should now be significantly more correct
when performing operations on objects that are locally known to
be under-aligned. Since alignment is not reliably tracked in the
type system, there are inherent limits to this, but at least we
are no longer confused by standard operations like derived-to-base
conversions and array-to-pointer decay. I've also fixed a large
number of bugs where we were applying the complete-object alignment
to a pointer instead of the non-virtual alignment, although most of
these were hidden by the very conservative approach we took with
member alignment.
Also, because IRGen now reliably asserts on zero alignments, we
should no longer be subject to an absurd but frustrating recurring
bug where an incomplete type would report a zero alignment and then
we'd naively do a alignmentAtOffset on it and emit code using an
alignment equal to the largest power-of-two factor of the offset.
We should also now be emitting much more aggressive alignment
attributes in the presence of over-alignment. In particular,
field access now uses alignmentAtOffset instead of min.
Several times in this patch, I had to change the existing
code-generation pattern in order to more effectively use
the Address APIs. For the most part, this seems to be a strict
improvement, like doing pointer arithmetic with GEPs instead of
ptrtoint. That said, I've tried very hard to not change semantics,
but it is likely that I've failed in a few places, for which I
apologize.
ABIArgInfo now always carries the assumed alignment of indirect and
indirect byval arguments. In order to cut down on what was already
a dauntingly large patch, I changed the code to never set align
attributes in the IR on non-byval indirect arguments. That is,
we still generate code which assumes that indirect arguments have
the given alignment, but we don't express this information to the
backend except where it's semantically required (i.e. on byvals).
This is likely a minor regression for those targets that did provide
this information, but it'll be trivial to add it back in a later
patch.
I partially punted on applying this work to CGBuiltin. Please
do not add more uses of the CreateDefaultAligned{Load,Store}
APIs; they will be going away eventually.
llvm-svn: 246985
2015-09-08 16:05:57 +08:00
|
|
|
Value *Align = getAlignmentValue32(PtrOp1);
|
2015-05-19 06:14:03 +08:00
|
|
|
Ops[1] = Builder.CreateCall(F, {Ops[1], Align}, NameHint);
|
2014-01-31 18:46:45 +08:00
|
|
|
Ty = llvm::PointerType::getUnqual(Ops[1]->getType());
|
|
|
|
Ops[0] = Builder.CreateBitCast(Ops[0], Ty);
|
Compute and preserve alignment more faithfully in IR-generation.
Introduce an Address type to bundle a pointer value with an
alignment. Introduce APIs on CGBuilderTy to work with Address
values. Change core APIs on CGF/CGM to traffic in Address where
appropriate. Require alignments to be non-zero. Update a ton
of code to compute and propagate alignment information.
As part of this, I've promoted CGBuiltin's EmitPointerWithAlignment
helper function to CGF and made use of it in a number of places in
the expression emitter.
The end result is that we should now be significantly more correct
when performing operations on objects that are locally known to
be under-aligned. Since alignment is not reliably tracked in the
type system, there are inherent limits to this, but at least we
are no longer confused by standard operations like derived-to-base
conversions and array-to-pointer decay. I've also fixed a large
number of bugs where we were applying the complete-object alignment
to a pointer instead of the non-virtual alignment, although most of
these were hidden by the very conservative approach we took with
member alignment.
Also, because IRGen now reliably asserts on zero alignments, we
should no longer be subject to an absurd but frustrating recurring
bug where an incomplete type would report a zero alignment and then
we'd naively do a alignmentAtOffset on it and emit code using an
alignment equal to the largest power-of-two factor of the offset.
We should also now be emitting much more aggressive alignment
attributes in the presence of over-alignment. In particular,
field access now uses alignmentAtOffset instead of min.
Several times in this patch, I had to change the existing
code-generation pattern in order to more effectively use
the Address APIs. For the most part, this seems to be a strict
improvement, like doing pointer arithmetic with GEPs instead of
ptrtoint. That said, I've tried very hard to not change semantics,
but it is likely that I've failed in a few places, for which I
apologize.
ABIArgInfo now always carries the assumed alignment of indirect and
indirect byval arguments. In order to cut down on what was already
a dauntingly large patch, I changed the code to never set align
attributes in the IR on non-byval indirect arguments. That is,
we still generate code which assumes that indirect arguments have
the given alignment, but we don't express this information to the
backend except where it's semantically required (i.e. on byvals).
This is likely a minor regression for those targets that did provide
this information, but it'll be trivial to add it back in a later
patch.
I partially punted on applying this work to CGBuiltin. Please
do not add more uses of the CreateDefaultAligned{Load,Store}
APIs; they will be going away eventually.
llvm-svn: 246985
2015-09-08 16:05:57 +08:00
|
|
|
return Builder.CreateDefaultAlignedStore(Ops[1], Ops[0]);
|
2014-01-31 18:46:45 +08:00
|
|
|
}
|
|
|
|
case NEON::BI__builtin_neon_vld1_dup_v:
|
|
|
|
case NEON::BI__builtin_neon_vld1q_dup_v: {
|
|
|
|
Value *V = UndefValue::get(Ty);
|
|
|
|
Ty = llvm::PointerType::getUnqual(VTy->getElementType());
|
Compute and preserve alignment more faithfully in IR-generation.
Introduce an Address type to bundle a pointer value with an
alignment. Introduce APIs on CGBuilderTy to work with Address
values. Change core APIs on CGF/CGM to traffic in Address where
appropriate. Require alignments to be non-zero. Update a ton
of code to compute and propagate alignment information.
As part of this, I've promoted CGBuiltin's EmitPointerWithAlignment
helper function to CGF and made use of it in a number of places in
the expression emitter.
The end result is that we should now be significantly more correct
when performing operations on objects that are locally known to
be under-aligned. Since alignment is not reliably tracked in the
type system, there are inherent limits to this, but at least we
are no longer confused by standard operations like derived-to-base
conversions and array-to-pointer decay. I've also fixed a large
number of bugs where we were applying the complete-object alignment
to a pointer instead of the non-virtual alignment, although most of
these were hidden by the very conservative approach we took with
member alignment.
Also, because IRGen now reliably asserts on zero alignments, we
should no longer be subject to an absurd but frustrating recurring
bug where an incomplete type would report a zero alignment and then
we'd naively do a alignmentAtOffset on it and emit code using an
alignment equal to the largest power-of-two factor of the offset.
We should also now be emitting much more aggressive alignment
attributes in the presence of over-alignment. In particular,
field access now uses alignmentAtOffset instead of min.
Several times in this patch, I had to change the existing
code-generation pattern in order to more effectively use
the Address APIs. For the most part, this seems to be a strict
improvement, like doing pointer arithmetic with GEPs instead of
ptrtoint. That said, I've tried very hard to not change semantics,
but it is likely that I've failed in a few places, for which I
apologize.
ABIArgInfo now always carries the assumed alignment of indirect and
indirect byval arguments. In order to cut down on what was already
a dauntingly large patch, I changed the code to never set align
attributes in the IR on non-byval indirect arguments. That is,
we still generate code which assumes that indirect arguments have
the given alignment, but we don't express this information to the
backend except where it's semantically required (i.e. on byvals).
This is likely a minor regression for those targets that did provide
this information, but it'll be trivial to add it back in a later
patch.
I partially punted on applying this work to CGBuiltin. Please
do not add more uses of the CreateDefaultAligned{Load,Store}
APIs; they will be going away eventually.
llvm-svn: 246985
2015-09-08 16:05:57 +08:00
|
|
|
PtrOp0 = Builder.CreateBitCast(PtrOp0, Ty);
|
|
|
|
LoadInst *Ld = Builder.CreateLoad(PtrOp0);
|
2014-05-31 08:22:12 +08:00
|
|
|
llvm::Constant *CI = ConstantInt::get(SizeTy, 0);
|
2014-01-31 18:46:45 +08:00
|
|
|
Ops[0] = Builder.CreateInsertElement(V, Ld, CI);
|
|
|
|
return EmitNeonSplat(Ops[0], CI);
|
|
|
|
}
|
|
|
|
case NEON::BI__builtin_neon_vld2_lane_v:
|
2014-02-21 19:57:24 +08:00
|
|
|
case NEON::BI__builtin_neon_vld2q_lane_v:
|
2014-01-31 18:46:45 +08:00
|
|
|
case NEON::BI__builtin_neon_vld3_lane_v:
|
2014-02-21 19:57:24 +08:00
|
|
|
case NEON::BI__builtin_neon_vld3q_lane_v:
|
2014-01-31 18:46:45 +08:00
|
|
|
case NEON::BI__builtin_neon_vld4_lane_v:
|
|
|
|
case NEON::BI__builtin_neon_vld4q_lane_v: {
|
2015-09-30 18:56:56 +08:00
|
|
|
llvm::Type *Tys[] = {Ty, Int8PtrTy};
|
|
|
|
Function *F = CGM.getIntrinsic(LLVMIntrinsic, Tys);
|
2014-02-21 19:57:24 +08:00
|
|
|
for (unsigned I = 2; I < Ops.size() - 1; ++I)
|
|
|
|
Ops[I] = Builder.CreateBitCast(Ops[I], Ty);
|
Compute and preserve alignment more faithfully in IR-generation.
Introduce an Address type to bundle a pointer value with an
alignment. Introduce APIs on CGBuilderTy to work with Address
values. Change core APIs on CGF/CGM to traffic in Address where
appropriate. Require alignments to be non-zero. Update a ton
of code to compute and propagate alignment information.
As part of this, I've promoted CGBuiltin's EmitPointerWithAlignment
helper function to CGF and made use of it in a number of places in
the expression emitter.
The end result is that we should now be significantly more correct
when performing operations on objects that are locally known to
be under-aligned. Since alignment is not reliably tracked in the
type system, there are inherent limits to this, but at least we
are no longer confused by standard operations like derived-to-base
conversions and array-to-pointer decay. I've also fixed a large
number of bugs where we were applying the complete-object alignment
to a pointer instead of the non-virtual alignment, although most of
these were hidden by the very conservative approach we took with
member alignment.
Also, because IRGen now reliably asserts on zero alignments, we
should no longer be subject to an absurd but frustrating recurring
bug where an incomplete type would report a zero alignment and then
we'd naively do a alignmentAtOffset on it and emit code using an
alignment equal to the largest power-of-two factor of the offset.
We should also now be emitting much more aggressive alignment
attributes in the presence of over-alignment. In particular,
field access now uses alignmentAtOffset instead of min.
Several times in this patch, I had to change the existing
code-generation pattern in order to more effectively use
the Address APIs. For the most part, this seems to be a strict
improvement, like doing pointer arithmetic with GEPs instead of
ptrtoint. That said, I've tried very hard to not change semantics,
but it is likely that I've failed in a few places, for which I
apologize.
ABIArgInfo now always carries the assumed alignment of indirect and
indirect byval arguments. In order to cut down on what was already
a dauntingly large patch, I changed the code to never set align
attributes in the IR on non-byval indirect arguments. That is,
we still generate code which assumes that indirect arguments have
the given alignment, but we don't express this information to the
backend except where it's semantically required (i.e. on byvals).
This is likely a minor regression for those targets that did provide
this information, but it'll be trivial to add it back in a later
patch.
I partially punted on applying this work to CGBuiltin. Please
do not add more uses of the CreateDefaultAligned{Load,Store}
APIs; they will be going away eventually.
llvm-svn: 246985
2015-09-08 16:05:57 +08:00
|
|
|
Ops.push_back(getAlignmentValue32(PtrOp1));
|
2014-02-21 19:57:24 +08:00
|
|
|
Ops[1] = Builder.CreateCall(F, makeArrayRef(Ops).slice(1), NameHint);
|
2014-01-31 18:46:45 +08:00
|
|
|
Ty = llvm::PointerType::getUnqual(Ops[1]->getType());
|
|
|
|
Ops[0] = Builder.CreateBitCast(Ops[0], Ty);
|
Compute and preserve alignment more faithfully in IR-generation.
Introduce an Address type to bundle a pointer value with an
alignment. Introduce APIs on CGBuilderTy to work with Address
values. Change core APIs on CGF/CGM to traffic in Address where
appropriate. Require alignments to be non-zero. Update a ton
of code to compute and propagate alignment information.
As part of this, I've promoted CGBuiltin's EmitPointerWithAlignment
helper function to CGF and made use of it in a number of places in
the expression emitter.
The end result is that we should now be significantly more correct
when performing operations on objects that are locally known to
be under-aligned. Since alignment is not reliably tracked in the
type system, there are inherent limits to this, but at least we
are no longer confused by standard operations like derived-to-base
conversions and array-to-pointer decay. I've also fixed a large
number of bugs where we were applying the complete-object alignment
to a pointer instead of the non-virtual alignment, although most of
these were hidden by the very conservative approach we took with
member alignment.
Also, because IRGen now reliably asserts on zero alignments, we
should no longer be subject to an absurd but frustrating recurring
bug where an incomplete type would report a zero alignment and then
we'd naively do a alignmentAtOffset on it and emit code using an
alignment equal to the largest power-of-two factor of the offset.
We should also now be emitting much more aggressive alignment
attributes in the presence of over-alignment. In particular,
field access now uses alignmentAtOffset instead of min.
Several times in this patch, I had to change the existing
code-generation pattern in order to more effectively use
the Address APIs. For the most part, this seems to be a strict
improvement, like doing pointer arithmetic with GEPs instead of
ptrtoint. That said, I've tried very hard to not change semantics,
but it is likely that I've failed in a few places, for which I
apologize.
ABIArgInfo now always carries the assumed alignment of indirect and
indirect byval arguments. In order to cut down on what was already
a dauntingly large patch, I changed the code to never set align
attributes in the IR on non-byval indirect arguments. That is,
we still generate code which assumes that indirect arguments have
the given alignment, but we don't express this information to the
backend except where it's semantically required (i.e. on byvals).
This is likely a minor regression for those targets that did provide
this information, but it'll be trivial to add it back in a later
patch.
I partially punted on applying this work to CGBuiltin. Please
do not add more uses of the CreateDefaultAligned{Load,Store}
APIs; they will be going away eventually.
llvm-svn: 246985
2015-09-08 16:05:57 +08:00
|
|
|
return Builder.CreateDefaultAlignedStore(Ops[1], Ops[0]);
|
2014-01-31 18:46:45 +08:00
|
|
|
}
|
2014-01-31 18:46:41 +08:00
|
|
|
case NEON::BI__builtin_neon_vmovl_v: {
|
|
|
|
llvm::Type *DTy =llvm::VectorType::getTruncatedElementVectorType(VTy);
|
|
|
|
Ops[0] = Builder.CreateBitCast(Ops[0], DTy);
|
|
|
|
if (Usgn)
|
|
|
|
return Builder.CreateZExt(Ops[0], Ty, "vmovl");
|
|
|
|
return Builder.CreateSExt(Ops[0], Ty, "vmovl");
|
|
|
|
}
|
2014-01-31 18:46:49 +08:00
|
|
|
case NEON::BI__builtin_neon_vmovn_v: {
|
|
|
|
llvm::Type *QTy = llvm::VectorType::getExtendedElementVectorType(VTy);
|
|
|
|
Ops[0] = Builder.CreateBitCast(Ops[0], QTy);
|
|
|
|
return Builder.CreateTrunc(Ops[0], Ty, "vmovn");
|
|
|
|
}
|
2014-01-31 18:46:36 +08:00
|
|
|
case NEON::BI__builtin_neon_vmull_v:
|
|
|
|
// FIXME: the integer vmull operations could be emitted in terms of pure
|
|
|
|
// LLVM IR (2 exts followed by a mul). Unfortunately LLVM has a habit of
|
|
|
|
// hoisting the exts outside loops. Until global ISel comes along that can
|
|
|
|
// see through such movement this leads to bad CodeGen. So we need an
|
|
|
|
// intrinsic for now.
|
|
|
|
Int = Usgn ? Intrinsic::arm_neon_vmullu : Intrinsic::arm_neon_vmulls;
|
|
|
|
Int = Type.isPoly() ? (unsigned)Intrinsic::arm_neon_vmullp : Int;
|
|
|
|
return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vmull");
|
2014-01-31 18:46:49 +08:00
|
|
|
case NEON::BI__builtin_neon_vpadal_v:
|
|
|
|
case NEON::BI__builtin_neon_vpadalq_v: {
|
|
|
|
// The source operand type has twice as many elements of half the size.
|
|
|
|
unsigned EltBits = VTy->getElementType()->getPrimitiveSizeInBits();
|
|
|
|
llvm::Type *EltTy =
|
|
|
|
llvm::IntegerType::get(getLLVMContext(), EltBits / 2);
|
|
|
|
llvm::Type *NarrowTy =
|
|
|
|
llvm::VectorType::get(EltTy, VTy->getNumElements() * 2);
|
|
|
|
llvm::Type *Tys[2] = { Ty, NarrowTy };
|
2014-02-21 19:57:24 +08:00
|
|
|
return EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, NameHint);
|
2014-01-31 18:46:49 +08:00
|
|
|
}
|
|
|
|
case NEON::BI__builtin_neon_vpaddl_v:
|
|
|
|
case NEON::BI__builtin_neon_vpaddlq_v: {
|
|
|
|
// The source operand type has twice as many elements of half the size.
|
|
|
|
unsigned EltBits = VTy->getElementType()->getPrimitiveSizeInBits();
|
|
|
|
llvm::Type *EltTy = llvm::IntegerType::get(getLLVMContext(), EltBits / 2);
|
|
|
|
llvm::Type *NarrowTy =
|
|
|
|
llvm::VectorType::get(EltTy, VTy->getNumElements() * 2);
|
|
|
|
llvm::Type *Tys[2] = { Ty, NarrowTy };
|
|
|
|
return EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vpaddl");
|
|
|
|
}
|
2014-02-21 19:57:24 +08:00
|
|
|
case NEON::BI__builtin_neon_vqdmlal_v:
|
|
|
|
case NEON::BI__builtin_neon_vqdmlsl_v: {
|
2014-01-31 18:46:36 +08:00
|
|
|
SmallVector<Value *, 2> MulOps(Ops.begin() + 1, Ops.end());
|
2015-07-28 23:40:11 +08:00
|
|
|
Ops[1] =
|
|
|
|
EmitNeonCall(CGM.getIntrinsic(LLVMIntrinsic, Ty), MulOps, "vqdmlal");
|
|
|
|
Ops.resize(2);
|
|
|
|
return EmitNeonCall(CGM.getIntrinsic(AltLLVMIntrinsic, Ty), Ops, NameHint);
|
2014-01-31 18:46:36 +08:00
|
|
|
}
|
2014-01-31 18:46:41 +08:00
|
|
|
case NEON::BI__builtin_neon_vqshl_n_v:
|
|
|
|
case NEON::BI__builtin_neon_vqshlq_n_v:
|
|
|
|
return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vqshl_n",
|
|
|
|
1, false);
|
2014-07-29 17:25:17 +08:00
|
|
|
case NEON::BI__builtin_neon_vqshlu_n_v:
|
|
|
|
case NEON::BI__builtin_neon_vqshluq_n_v:
|
|
|
|
return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vqshlu_n",
|
|
|
|
1, false);
|
2014-01-31 18:46:52 +08:00
|
|
|
case NEON::BI__builtin_neon_vrecpe_v:
|
|
|
|
case NEON::BI__builtin_neon_vrecpeq_v:
|
|
|
|
case NEON::BI__builtin_neon_vrsqrte_v:
|
|
|
|
case NEON::BI__builtin_neon_vrsqrteq_v:
|
2014-02-21 19:57:24 +08:00
|
|
|
Int = Ty->isFPOrFPVectorTy() ? LLVMIntrinsic : AltLLVMIntrinsic;
|
|
|
|
return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, NameHint);
|
|
|
|
|
2014-07-29 17:25:17 +08:00
|
|
|
case NEON::BI__builtin_neon_vrshr_n_v:
|
|
|
|
case NEON::BI__builtin_neon_vrshrq_n_v:
|
|
|
|
return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vrshr_n",
|
|
|
|
1, true);
|
2014-01-31 18:46:41 +08:00
|
|
|
case NEON::BI__builtin_neon_vshl_n_v:
|
|
|
|
case NEON::BI__builtin_neon_vshlq_n_v:
|
|
|
|
Ops[1] = EmitNeonShiftVector(Ops[1], Ty, false);
|
|
|
|
return Builder.CreateShl(Builder.CreateBitCast(Ops[0],Ty), Ops[1],
|
|
|
|
"vshl_n");
|
2014-02-11 00:20:36 +08:00
|
|
|
case NEON::BI__builtin_neon_vshll_n_v: {
|
|
|
|
llvm::Type *SrcTy = llvm::VectorType::getTruncatedElementVectorType(VTy);
|
|
|
|
Ops[0] = Builder.CreateBitCast(Ops[0], SrcTy);
|
|
|
|
if (Usgn)
|
|
|
|
Ops[0] = Builder.CreateZExt(Ops[0], VTy);
|
|
|
|
else
|
|
|
|
Ops[0] = Builder.CreateSExt(Ops[0], VTy);
|
|
|
|
Ops[1] = EmitNeonShiftVector(Ops[1], VTy, false);
|
|
|
|
return Builder.CreateShl(Ops[0], Ops[1], "vshll_n");
|
|
|
|
}
|
2014-02-10 22:04:12 +08:00
|
|
|
case NEON::BI__builtin_neon_vshrn_n_v: {
|
|
|
|
llvm::Type *SrcTy = llvm::VectorType::getExtendedElementVectorType(VTy);
|
|
|
|
Ops[0] = Builder.CreateBitCast(Ops[0], SrcTy);
|
|
|
|
Ops[1] = EmitNeonShiftVector(Ops[1], SrcTy, false);
|
|
|
|
if (Usgn)
|
|
|
|
Ops[0] = Builder.CreateLShr(Ops[0], Ops[1]);
|
|
|
|
else
|
|
|
|
Ops[0] = Builder.CreateAShr(Ops[0], Ops[1]);
|
|
|
|
return Builder.CreateTrunc(Ops[0], Ty, "vshrn_n");
|
|
|
|
}
|
2014-01-31 18:46:36 +08:00
|
|
|
case NEON::BI__builtin_neon_vshr_n_v:
|
|
|
|
case NEON::BI__builtin_neon_vshrq_n_v:
|
|
|
|
return EmitNeonRShiftImm(Ops[0], Ops[1], Ty, Usgn, "vshr_n");
|
2014-01-31 18:46:45 +08:00
|
|
|
case NEON::BI__builtin_neon_vst1_v:
|
|
|
|
case NEON::BI__builtin_neon_vst1q_v:
|
|
|
|
case NEON::BI__builtin_neon_vst2_v:
|
|
|
|
case NEON::BI__builtin_neon_vst2q_v:
|
|
|
|
case NEON::BI__builtin_neon_vst3_v:
|
|
|
|
case NEON::BI__builtin_neon_vst3q_v:
|
|
|
|
case NEON::BI__builtin_neon_vst4_v:
|
|
|
|
case NEON::BI__builtin_neon_vst4q_v:
|
|
|
|
case NEON::BI__builtin_neon_vst2_lane_v:
|
|
|
|
case NEON::BI__builtin_neon_vst2q_lane_v:
|
|
|
|
case NEON::BI__builtin_neon_vst3_lane_v:
|
|
|
|
case NEON::BI__builtin_neon_vst3q_lane_v:
|
|
|
|
case NEON::BI__builtin_neon_vst4_lane_v:
|
2015-09-30 18:56:56 +08:00
|
|
|
case NEON::BI__builtin_neon_vst4q_lane_v: {
|
|
|
|
llvm::Type *Tys[] = {Int8PtrTy, Ty};
|
Compute and preserve alignment more faithfully in IR-generation.
Introduce an Address type to bundle a pointer value with an
alignment. Introduce APIs on CGBuilderTy to work with Address
values. Change core APIs on CGF/CGM to traffic in Address where
appropriate. Require alignments to be non-zero. Update a ton
of code to compute and propagate alignment information.
As part of this, I've promoted CGBuiltin's EmitPointerWithAlignment
helper function to CGF and made use of it in a number of places in
the expression emitter.
The end result is that we should now be significantly more correct
when performing operations on objects that are locally known to
be under-aligned. Since alignment is not reliably tracked in the
type system, there are inherent limits to this, but at least we
are no longer confused by standard operations like derived-to-base
conversions and array-to-pointer decay. I've also fixed a large
number of bugs where we were applying the complete-object alignment
to a pointer instead of the non-virtual alignment, although most of
these were hidden by the very conservative approach we took with
member alignment.
Also, because IRGen now reliably asserts on zero alignments, we
should no longer be subject to an absurd but frustrating recurring
bug where an incomplete type would report a zero alignment and then
we'd naively do a alignmentAtOffset on it and emit code using an
alignment equal to the largest power-of-two factor of the offset.
We should also now be emitting much more aggressive alignment
attributes in the presence of over-alignment. In particular,
field access now uses alignmentAtOffset instead of min.
Several times in this patch, I had to change the existing
code-generation pattern in order to more effectively use
the Address APIs. For the most part, this seems to be a strict
improvement, like doing pointer arithmetic with GEPs instead of
ptrtoint. That said, I've tried very hard to not change semantics,
but it is likely that I've failed in a few places, for which I
apologize.
ABIArgInfo now always carries the assumed alignment of indirect and
indirect byval arguments. In order to cut down on what was already
a dauntingly large patch, I changed the code to never set align
attributes in the IR on non-byval indirect arguments. That is,
we still generate code which assumes that indirect arguments have
the given alignment, but we don't express this information to the
backend except where it's semantically required (i.e. on byvals).
This is likely a minor regression for those targets that did provide
this information, but it'll be trivial to add it back in a later
patch.
I partially punted on applying this work to CGBuiltin. Please
do not add more uses of the CreateDefaultAligned{Load,Store}
APIs; they will be going away eventually.
llvm-svn: 246985
2015-09-08 16:05:57 +08:00
|
|
|
Ops.push_back(getAlignmentValue32(PtrOp0));
|
2015-09-30 18:56:56 +08:00
|
|
|
return EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "");
|
|
|
|
}
|
2014-01-31 18:46:36 +08:00
|
|
|
case NEON::BI__builtin_neon_vsubhn_v: {
|
|
|
|
llvm::VectorType *SrcTy =
|
|
|
|
llvm::VectorType::getExtendedElementVectorType(VTy);
|
|
|
|
|
|
|
|
// %sum = add <4 x i32> %lhs, %rhs
|
|
|
|
Ops[0] = Builder.CreateBitCast(Ops[0], SrcTy);
|
|
|
|
Ops[1] = Builder.CreateBitCast(Ops[1], SrcTy);
|
|
|
|
Ops[0] = Builder.CreateSub(Ops[0], Ops[1], "vsubhn");
|
|
|
|
|
|
|
|
// %high = lshr <4 x i32> %sum, <i32 16, i32 16, i32 16, i32 16>
|
2015-07-28 23:40:11 +08:00
|
|
|
Constant *ShiftAmt =
|
|
|
|
ConstantInt::get(SrcTy, SrcTy->getScalarSizeInBits() / 2);
|
2014-01-31 18:46:36 +08:00
|
|
|
Ops[0] = Builder.CreateLShr(Ops[0], ShiftAmt, "vsubhn");
|
|
|
|
|
|
|
|
// %res = trunc <4 x i32> %high to <4 x i16>
|
|
|
|
return Builder.CreateTrunc(Ops[0], VTy, "vsubhn");
|
|
|
|
}
|
2014-01-30 22:47:57 +08:00
|
|
|
case NEON::BI__builtin_neon_vtrn_v:
|
|
|
|
case NEON::BI__builtin_neon_vtrnq_v: {
|
2014-01-30 22:48:01 +08:00
|
|
|
Ops[0] = Builder.CreateBitCast(Ops[0], llvm::PointerType::getUnqual(Ty));
|
|
|
|
Ops[1] = Builder.CreateBitCast(Ops[1], Ty);
|
|
|
|
Ops[2] = Builder.CreateBitCast(Ops[2], Ty);
|
2014-05-21 13:09:00 +08:00
|
|
|
Value *SV = nullptr;
|
2014-01-30 22:47:57 +08:00
|
|
|
|
|
|
|
for (unsigned vi = 0; vi != 2; ++vi) {
|
|
|
|
SmallVector<Constant*, 16> Indices;
|
|
|
|
for (unsigned i = 0, e = VTy->getNumElements(); i != e; i += 2) {
|
|
|
|
Indices.push_back(Builder.getInt32(i+vi));
|
|
|
|
Indices.push_back(Builder.getInt32(i+e+vi));
|
|
|
|
}
|
2015-04-04 23:12:29 +08:00
|
|
|
Value *Addr = Builder.CreateConstInBoundsGEP1_32(Ty, Ops[0], vi);
|
2014-01-30 22:47:57 +08:00
|
|
|
SV = llvm::ConstantVector::get(Indices);
|
|
|
|
SV = Builder.CreateShuffleVector(Ops[1], Ops[2], SV, "vtrn");
|
Compute and preserve alignment more faithfully in IR-generation.
Introduce an Address type to bundle a pointer value with an
alignment. Introduce APIs on CGBuilderTy to work with Address
values. Change core APIs on CGF/CGM to traffic in Address where
appropriate. Require alignments to be non-zero. Update a ton
of code to compute and propagate alignment information.
As part of this, I've promoted CGBuiltin's EmitPointerWithAlignment
helper function to CGF and made use of it in a number of places in
the expression emitter.
The end result is that we should now be significantly more correct
when performing operations on objects that are locally known to
be under-aligned. Since alignment is not reliably tracked in the
type system, there are inherent limits to this, but at least we
are no longer confused by standard operations like derived-to-base
conversions and array-to-pointer decay. I've also fixed a large
number of bugs where we were applying the complete-object alignment
to a pointer instead of the non-virtual alignment, although most of
these were hidden by the very conservative approach we took with
member alignment.
Also, because IRGen now reliably asserts on zero alignments, we
should no longer be subject to an absurd but frustrating recurring
bug where an incomplete type would report a zero alignment and then
we'd naively do a alignmentAtOffset on it and emit code using an
alignment equal to the largest power-of-two factor of the offset.
We should also now be emitting much more aggressive alignment
attributes in the presence of over-alignment. In particular,
field access now uses alignmentAtOffset instead of min.
Several times in this patch, I had to change the existing
code-generation pattern in order to more effectively use
the Address APIs. For the most part, this seems to be a strict
improvement, like doing pointer arithmetic with GEPs instead of
ptrtoint. That said, I've tried very hard to not change semantics,
but it is likely that I've failed in a few places, for which I
apologize.
ABIArgInfo now always carries the assumed alignment of indirect and
indirect byval arguments. In order to cut down on what was already
a dauntingly large patch, I changed the code to never set align
attributes in the IR on non-byval indirect arguments. That is,
we still generate code which assumes that indirect arguments have
the given alignment, but we don't express this information to the
backend except where it's semantically required (i.e. on byvals).
This is likely a minor regression for those targets that did provide
this information, but it'll be trivial to add it back in a later
patch.
I partially punted on applying this work to CGBuiltin. Please
do not add more uses of the CreateDefaultAligned{Load,Store}
APIs; they will be going away eventually.
llvm-svn: 246985
2015-09-08 16:05:57 +08:00
|
|
|
SV = Builder.CreateDefaultAlignedStore(SV, Addr);
|
2014-01-30 22:47:57 +08:00
|
|
|
}
|
|
|
|
return SV;
|
|
|
|
}
|
2014-01-31 18:46:36 +08:00
|
|
|
case NEON::BI__builtin_neon_vtst_v:
|
|
|
|
case NEON::BI__builtin_neon_vtstq_v: {
|
|
|
|
Ops[0] = Builder.CreateBitCast(Ops[0], Ty);
|
|
|
|
Ops[1] = Builder.CreateBitCast(Ops[1], Ty);
|
|
|
|
Ops[0] = Builder.CreateAnd(Ops[0], Ops[1]);
|
|
|
|
Ops[0] = Builder.CreateICmp(ICmpInst::ICMP_NE, Ops[0],
|
|
|
|
ConstantAggregateZero::get(Ty));
|
|
|
|
return Builder.CreateSExt(Ops[0], Ty, "vtst");
|
|
|
|
}
|
2014-01-30 22:47:57 +08:00
|
|
|
case NEON::BI__builtin_neon_vuzp_v:
|
|
|
|
case NEON::BI__builtin_neon_vuzpq_v: {
|
2014-01-30 22:48:01 +08:00
|
|
|
Ops[0] = Builder.CreateBitCast(Ops[0], llvm::PointerType::getUnqual(Ty));
|
|
|
|
Ops[1] = Builder.CreateBitCast(Ops[1], Ty);
|
|
|
|
Ops[2] = Builder.CreateBitCast(Ops[2], Ty);
|
2014-05-21 13:09:00 +08:00
|
|
|
Value *SV = nullptr;
|
2014-01-30 22:47:57 +08:00
|
|
|
|
|
|
|
for (unsigned vi = 0; vi != 2; ++vi) {
|
|
|
|
SmallVector<Constant*, 16> Indices;
|
|
|
|
for (unsigned i = 0, e = VTy->getNumElements(); i != e; ++i)
|
|
|
|
Indices.push_back(ConstantInt::get(Int32Ty, 2*i+vi));
|
|
|
|
|
2015-04-04 23:12:29 +08:00
|
|
|
Value *Addr = Builder.CreateConstInBoundsGEP1_32(Ty, Ops[0], vi);
|
2014-01-30 22:47:57 +08:00
|
|
|
SV = llvm::ConstantVector::get(Indices);
|
|
|
|
SV = Builder.CreateShuffleVector(Ops[1], Ops[2], SV, "vuzp");
|
Compute and preserve alignment more faithfully in IR-generation.
Introduce an Address type to bundle a pointer value with an
alignment. Introduce APIs on CGBuilderTy to work with Address
values. Change core APIs on CGF/CGM to traffic in Address where
appropriate. Require alignments to be non-zero. Update a ton
of code to compute and propagate alignment information.
As part of this, I've promoted CGBuiltin's EmitPointerWithAlignment
helper function to CGF and made use of it in a number of places in
the expression emitter.
The end result is that we should now be significantly more correct
when performing operations on objects that are locally known to
be under-aligned. Since alignment is not reliably tracked in the
type system, there are inherent limits to this, but at least we
are no longer confused by standard operations like derived-to-base
conversions and array-to-pointer decay. I've also fixed a large
number of bugs where we were applying the complete-object alignment
to a pointer instead of the non-virtual alignment, although most of
these were hidden by the very conservative approach we took with
member alignment.
Also, because IRGen now reliably asserts on zero alignments, we
should no longer be subject to an absurd but frustrating recurring
bug where an incomplete type would report a zero alignment and then
we'd naively do a alignmentAtOffset on it and emit code using an
alignment equal to the largest power-of-two factor of the offset.
We should also now be emitting much more aggressive alignment
attributes in the presence of over-alignment. In particular,
field access now uses alignmentAtOffset instead of min.
Several times in this patch, I had to change the existing
code-generation pattern in order to more effectively use
the Address APIs. For the most part, this seems to be a strict
improvement, like doing pointer arithmetic with GEPs instead of
ptrtoint. That said, I've tried very hard to not change semantics,
but it is likely that I've failed in a few places, for which I
apologize.
ABIArgInfo now always carries the assumed alignment of indirect and
indirect byval arguments. In order to cut down on what was already
a dauntingly large patch, I changed the code to never set align
attributes in the IR on non-byval indirect arguments. That is,
we still generate code which assumes that indirect arguments have
the given alignment, but we don't express this information to the
backend except where it's semantically required (i.e. on byvals).
This is likely a minor regression for those targets that did provide
this information, but it'll be trivial to add it back in a later
patch.
I partially punted on applying this work to CGBuiltin. Please
do not add more uses of the CreateDefaultAligned{Load,Store}
APIs; they will be going away eventually.
llvm-svn: 246985
2015-09-08 16:05:57 +08:00
|
|
|
SV = Builder.CreateDefaultAlignedStore(SV, Addr);
|
2014-01-30 22:47:57 +08:00
|
|
|
}
|
|
|
|
return SV;
|
|
|
|
}
|
|
|
|
case NEON::BI__builtin_neon_vzip_v:
|
|
|
|
case NEON::BI__builtin_neon_vzipq_v: {
|
2014-01-30 22:48:01 +08:00
|
|
|
Ops[0] = Builder.CreateBitCast(Ops[0], llvm::PointerType::getUnqual(Ty));
|
|
|
|
Ops[1] = Builder.CreateBitCast(Ops[1], Ty);
|
|
|
|
Ops[2] = Builder.CreateBitCast(Ops[2], Ty);
|
2014-05-21 13:09:00 +08:00
|
|
|
Value *SV = nullptr;
|
2014-01-30 22:47:57 +08:00
|
|
|
|
|
|
|
for (unsigned vi = 0; vi != 2; ++vi) {
|
|
|
|
SmallVector<Constant*, 16> Indices;
|
|
|
|
for (unsigned i = 0, e = VTy->getNumElements(); i != e; i += 2) {
|
|
|
|
Indices.push_back(ConstantInt::get(Int32Ty, (i + vi*e) >> 1));
|
|
|
|
Indices.push_back(ConstantInt::get(Int32Ty, ((i + vi*e) >> 1)+e));
|
|
|
|
}
|
2015-04-04 23:12:29 +08:00
|
|
|
Value *Addr = Builder.CreateConstInBoundsGEP1_32(Ty, Ops[0], vi);
|
2014-01-30 22:47:57 +08:00
|
|
|
SV = llvm::ConstantVector::get(Indices);
|
|
|
|
SV = Builder.CreateShuffleVector(Ops[1], Ops[2], SV, "vzip");
|
Compute and preserve alignment more faithfully in IR-generation.
Introduce an Address type to bundle a pointer value with an
alignment. Introduce APIs on CGBuilderTy to work with Address
values. Change core APIs on CGF/CGM to traffic in Address where
appropriate. Require alignments to be non-zero. Update a ton
of code to compute and propagate alignment information.
As part of this, I've promoted CGBuiltin's EmitPointerWithAlignment
helper function to CGF and made use of it in a number of places in
the expression emitter.
The end result is that we should now be significantly more correct
when performing operations on objects that are locally known to
be under-aligned. Since alignment is not reliably tracked in the
type system, there are inherent limits to this, but at least we
are no longer confused by standard operations like derived-to-base
conversions and array-to-pointer decay. I've also fixed a large
number of bugs where we were applying the complete-object alignment
to a pointer instead of the non-virtual alignment, although most of
these were hidden by the very conservative approach we took with
member alignment.
Also, because IRGen now reliably asserts on zero alignments, we
should no longer be subject to an absurd but frustrating recurring
bug where an incomplete type would report a zero alignment and then
we'd naively do a alignmentAtOffset on it and emit code using an
alignment equal to the largest power-of-two factor of the offset.
We should also now be emitting much more aggressive alignment
attributes in the presence of over-alignment. In particular,
field access now uses alignmentAtOffset instead of min.
Several times in this patch, I had to change the existing
code-generation pattern in order to more effectively use
the Address APIs. For the most part, this seems to be a strict
improvement, like doing pointer arithmetic with GEPs instead of
ptrtoint. That said, I've tried very hard to not change semantics,
but it is likely that I've failed in a few places, for which I
apologize.
ABIArgInfo now always carries the assumed alignment of indirect and
indirect byval arguments. In order to cut down on what was already
a dauntingly large patch, I changed the code to never set align
attributes in the IR on non-byval indirect arguments. That is,
we still generate code which assumes that indirect arguments have
the given alignment, but we don't express this information to the
backend except where it's semantically required (i.e. on byvals).
This is likely a minor regression for those targets that did provide
this information, but it'll be trivial to add it back in a later
patch.
I partially punted on applying this work to CGBuiltin. Please
do not add more uses of the CreateDefaultAligned{Load,Store}
APIs; they will be going away eventually.
llvm-svn: 246985
2015-09-08 16:05:57 +08:00
|
|
|
SV = Builder.CreateDefaultAlignedStore(SV, Addr);
|
2014-01-30 22:47:57 +08:00
|
|
|
}
|
|
|
|
return SV;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2014-02-21 19:57:24 +08:00
|
|
|
assert(Int && "Expected valid intrinsic number");
|
2013-09-24 10:48:06 +08:00
|
|
|
|
2013-12-11 01:44:36 +08:00
|
|
|
// Determine the type(s) of this overloaded AArch64 intrinsic.
|
2014-02-21 19:57:24 +08:00
|
|
|
Function *F = LookupNeonLLVMIntrinsic(Int, Modifier, Ty, E);
|
2013-09-24 10:48:06 +08:00
|
|
|
|
2014-02-21 19:57:24 +08:00
|
|
|
Value *Result = EmitNeonCall(F, Ops, NameHint);
|
|
|
|
llvm::Type *ResultType = ConvertType(E->getType());
|
2013-09-24 10:48:06 +08:00
|
|
|
// AArch64 intrinsic one-element vector type cast to
|
|
|
|
// scalar type expected by the builtin
|
2014-02-21 19:57:24 +08:00
|
|
|
return Builder.CreateBitCast(Result, ResultType, NameHint);
|
2013-09-24 10:48:06 +08:00
|
|
|
}
|
|
|
|
|
2013-11-14 10:45:18 +08:00
|
|
|
Value *CodeGenFunction::EmitAArch64CompareBuiltinExpr(
|
|
|
|
Value *Op, llvm::Type *Ty, const CmpInst::Predicate Fp,
|
|
|
|
const CmpInst::Predicate Ip, const Twine &Name) {
|
2014-03-29 23:09:45 +08:00
|
|
|
llvm::Type *OTy = Op->getType();
|
|
|
|
|
|
|
|
// FIXME: this is utterly horrific. We should not be looking at previous
|
|
|
|
// codegen context to find out what needs doing. Unfortunately TableGen
|
|
|
|
// currently gives us exactly the same calls for vceqz_f32 and vceqz_s32
|
|
|
|
// (etc).
|
|
|
|
if (BitCastInst *BI = dyn_cast<BitCastInst>(Op))
|
|
|
|
OTy = BI->getOperand(0)->getType();
|
|
|
|
|
2013-11-14 10:45:18 +08:00
|
|
|
Op = Builder.CreateBitCast(Op, OTy);
|
2014-03-29 23:09:45 +08:00
|
|
|
if (OTy->getScalarType()->isFloatingPointTy()) {
|
|
|
|
Op = Builder.CreateFCmp(Fp, Op, Constant::getNullValue(OTy));
|
2013-11-14 10:45:18 +08:00
|
|
|
} else {
|
2014-03-29 23:09:45 +08:00
|
|
|
Op = Builder.CreateICmp(Ip, Op, Constant::getNullValue(OTy));
|
2013-11-14 10:45:18 +08:00
|
|
|
}
|
2013-12-23 10:44:00 +08:00
|
|
|
return Builder.CreateSExt(Op, Ty, Name);
|
2013-11-14 10:45:18 +08:00
|
|
|
}
|
|
|
|
|
2013-11-14 09:57:55 +08:00
|
|
|
static Value *packTBLDVectorList(CodeGenFunction &CGF, ArrayRef<Value *> Ops,
|
|
|
|
Value *ExtOp, Value *IndexOp,
|
|
|
|
llvm::Type *ResTy, unsigned IntID,
|
|
|
|
const char *Name) {
|
|
|
|
SmallVector<Value *, 2> TblOps;
|
|
|
|
if (ExtOp)
|
|
|
|
TblOps.push_back(ExtOp);
|
|
|
|
|
|
|
|
// Build a vector containing sequential number like (0, 1, 2, ..., 15)
|
|
|
|
SmallVector<Constant*, 16> Indices;
|
|
|
|
llvm::VectorType *TblTy = cast<llvm::VectorType>(Ops[0]->getType());
|
|
|
|
for (unsigned i = 0, e = TblTy->getNumElements(); i != e; ++i) {
|
|
|
|
Indices.push_back(ConstantInt::get(CGF.Int32Ty, 2*i));
|
|
|
|
Indices.push_back(ConstantInt::get(CGF.Int32Ty, 2*i+1));
|
|
|
|
}
|
|
|
|
Value *SV = llvm::ConstantVector::get(Indices);
|
|
|
|
|
|
|
|
int PairPos = 0, End = Ops.size() - 1;
|
|
|
|
while (PairPos < End) {
|
|
|
|
TblOps.push_back(CGF.Builder.CreateShuffleVector(Ops[PairPos],
|
|
|
|
Ops[PairPos+1], SV, Name));
|
|
|
|
PairPos += 2;
|
|
|
|
}
|
|
|
|
|
|
|
|
// If there's an odd number of 64-bit lookup table, fill the high 64-bit
|
|
|
|
// of the 128-bit lookup table with zero.
|
|
|
|
if (PairPos == End) {
|
|
|
|
Value *ZeroTbl = ConstantAggregateZero::get(TblTy);
|
|
|
|
TblOps.push_back(CGF.Builder.CreateShuffleVector(Ops[PairPos],
|
|
|
|
ZeroTbl, SV, Name));
|
|
|
|
}
|
|
|
|
|
|
|
|
Function *TblF;
|
|
|
|
TblOps.push_back(IndexOp);
|
2014-02-26 19:55:15 +08:00
|
|
|
TblF = CGF.CGM.getIntrinsic(IntID, ResTy);
|
2013-11-14 09:57:55 +08:00
|
|
|
|
|
|
|
return CGF.EmitNeonCall(TblF, TblOps, Name);
|
|
|
|
}
|
|
|
|
|
2014-12-04 12:52:37 +08:00
|
|
|
Value *CodeGenFunction::GetValueForARMHint(unsigned BuiltinID) {
|
2015-07-28 23:40:11 +08:00
|
|
|
unsigned Value;
|
2014-05-04 10:52:25 +08:00
|
|
|
switch (BuiltinID) {
|
2014-12-04 12:52:37 +08:00
|
|
|
default:
|
|
|
|
return nullptr;
|
2014-07-14 23:20:09 +08:00
|
|
|
case ARM::BI__builtin_arm_nop:
|
2015-07-28 23:40:11 +08:00
|
|
|
Value = 0;
|
|
|
|
break;
|
2014-07-03 10:43:20 +08:00
|
|
|
case ARM::BI__builtin_arm_yield:
|
2014-05-04 10:52:25 +08:00
|
|
|
case ARM::BI__yield:
|
2015-07-28 23:40:11 +08:00
|
|
|
Value = 1;
|
|
|
|
break;
|
2014-07-03 10:43:20 +08:00
|
|
|
case ARM::BI__builtin_arm_wfe:
|
2014-05-04 10:52:25 +08:00
|
|
|
case ARM::BI__wfe:
|
2015-07-28 23:40:11 +08:00
|
|
|
Value = 2;
|
|
|
|
break;
|
2014-07-03 10:43:20 +08:00
|
|
|
case ARM::BI__builtin_arm_wfi:
|
2014-05-04 10:52:25 +08:00
|
|
|
case ARM::BI__wfi:
|
2015-07-28 23:40:11 +08:00
|
|
|
Value = 3;
|
|
|
|
break;
|
2014-07-03 10:43:20 +08:00
|
|
|
case ARM::BI__builtin_arm_sev:
|
2014-05-04 10:52:25 +08:00
|
|
|
case ARM::BI__sev:
|
2015-07-28 23:40:11 +08:00
|
|
|
Value = 4;
|
|
|
|
break;
|
2014-07-03 10:43:20 +08:00
|
|
|
case ARM::BI__builtin_arm_sevl:
|
2014-05-04 10:52:25 +08:00
|
|
|
case ARM::BI__sevl:
|
2015-07-28 23:40:11 +08:00
|
|
|
Value = 5;
|
|
|
|
break;
|
2014-04-26 05:13:29 +08:00
|
|
|
}
|
2015-07-28 23:40:11 +08:00
|
|
|
|
|
|
|
return Builder.CreateCall(CGM.getIntrinsic(Intrinsic::arm_hint),
|
|
|
|
llvm::ConstantInt::get(Int32Ty, Value));
|
2014-12-04 12:52:37 +08:00
|
|
|
}
|
2014-04-26 05:13:29 +08:00
|
|
|
|
2015-06-16 01:51:01 +08:00
|
|
|
// Generates the IR for the read/write special register builtin,
|
|
|
|
// ValueType is the type of the value that is to be written or read,
|
|
|
|
// RegisterType is the type of the register being written to or read from.
|
|
|
|
static Value *EmitSpecialRegisterBuiltin(CodeGenFunction &CGF,
|
|
|
|
const CallExpr *E,
|
|
|
|
llvm::Type *RegisterType,
|
|
|
|
llvm::Type *ValueType, bool IsRead) {
|
|
|
|
// write and register intrinsics only support 32 and 64 bit operations.
|
|
|
|
assert((RegisterType->isIntegerTy(32) || RegisterType->isIntegerTy(64))
|
|
|
|
&& "Unsupported size for register.");
|
|
|
|
|
|
|
|
CodeGen::CGBuilderTy &Builder = CGF.Builder;
|
|
|
|
CodeGen::CodeGenModule &CGM = CGF.CGM;
|
|
|
|
LLVMContext &Context = CGM.getLLVMContext();
|
|
|
|
|
|
|
|
const Expr *SysRegStrExpr = E->getArg(0)->IgnoreParenCasts();
|
|
|
|
StringRef SysReg = cast<StringLiteral>(SysRegStrExpr)->getString();
|
|
|
|
|
|
|
|
llvm::Metadata *Ops[] = { llvm::MDString::get(Context, SysReg) };
|
|
|
|
llvm::MDNode *RegName = llvm::MDNode::get(Context, Ops);
|
|
|
|
llvm::Value *Metadata = llvm::MetadataAsValue::get(Context, RegName);
|
|
|
|
|
|
|
|
llvm::Type *Types[] = { RegisterType };
|
|
|
|
|
|
|
|
bool MixedTypes = RegisterType->isIntegerTy(64) && ValueType->isIntegerTy(32);
|
|
|
|
assert(!(RegisterType->isIntegerTy(32) && ValueType->isIntegerTy(64))
|
|
|
|
&& "Can't fit 64-bit value in 32-bit register");
|
|
|
|
|
|
|
|
if (IsRead) {
|
|
|
|
llvm::Value *F = CGM.getIntrinsic(llvm::Intrinsic::read_register, Types);
|
|
|
|
llvm::Value *Call = Builder.CreateCall(F, Metadata);
|
|
|
|
|
|
|
|
if (MixedTypes)
|
|
|
|
// Read into 64 bit register and then truncate result to 32 bit.
|
|
|
|
return Builder.CreateTrunc(Call, ValueType);
|
|
|
|
|
|
|
|
if (ValueType->isPointerTy())
|
|
|
|
// Have i32/i64 result (Call) but want to return a VoidPtrTy (i8*).
|
|
|
|
return Builder.CreateIntToPtr(Call, ValueType);
|
|
|
|
|
|
|
|
return Call;
|
|
|
|
}
|
|
|
|
|
|
|
|
llvm::Value *F = CGM.getIntrinsic(llvm::Intrinsic::write_register, Types);
|
|
|
|
llvm::Value *ArgValue = CGF.EmitScalarExpr(E->getArg(1));
|
|
|
|
if (MixedTypes) {
|
|
|
|
// Extend 32 bit write value to 64 bit to pass to write.
|
|
|
|
ArgValue = Builder.CreateZExt(ArgValue, RegisterType);
|
|
|
|
return Builder.CreateCall(F, { Metadata, ArgValue });
|
|
|
|
}
|
|
|
|
|
|
|
|
if (ValueType->isPointerTy()) {
|
|
|
|
// Have VoidPtrTy ArgValue but want to return an i32/i64.
|
|
|
|
ArgValue = Builder.CreatePtrToInt(ArgValue, RegisterType);
|
|
|
|
return Builder.CreateCall(F, { Metadata, ArgValue });
|
|
|
|
}
|
|
|
|
|
|
|
|
return Builder.CreateCall(F, { Metadata, ArgValue });
|
|
|
|
}
|
|
|
|
|
2015-06-24 14:05:20 +08:00
|
|
|
/// Return true if BuiltinID is an overloaded Neon intrinsic with an extra
|
|
|
|
/// argument that specifies the vector type.
|
|
|
|
static bool HasExtraNeonArgument(unsigned BuiltinID) {
|
|
|
|
switch (BuiltinID) {
|
|
|
|
default: break;
|
|
|
|
case NEON::BI__builtin_neon_vget_lane_i8:
|
|
|
|
case NEON::BI__builtin_neon_vget_lane_i16:
|
|
|
|
case NEON::BI__builtin_neon_vget_lane_i32:
|
|
|
|
case NEON::BI__builtin_neon_vget_lane_i64:
|
|
|
|
case NEON::BI__builtin_neon_vget_lane_f32:
|
|
|
|
case NEON::BI__builtin_neon_vgetq_lane_i8:
|
|
|
|
case NEON::BI__builtin_neon_vgetq_lane_i16:
|
|
|
|
case NEON::BI__builtin_neon_vgetq_lane_i32:
|
|
|
|
case NEON::BI__builtin_neon_vgetq_lane_i64:
|
|
|
|
case NEON::BI__builtin_neon_vgetq_lane_f32:
|
|
|
|
case NEON::BI__builtin_neon_vset_lane_i8:
|
|
|
|
case NEON::BI__builtin_neon_vset_lane_i16:
|
|
|
|
case NEON::BI__builtin_neon_vset_lane_i32:
|
|
|
|
case NEON::BI__builtin_neon_vset_lane_i64:
|
|
|
|
case NEON::BI__builtin_neon_vset_lane_f32:
|
|
|
|
case NEON::BI__builtin_neon_vsetq_lane_i8:
|
|
|
|
case NEON::BI__builtin_neon_vsetq_lane_i16:
|
|
|
|
case NEON::BI__builtin_neon_vsetq_lane_i32:
|
|
|
|
case NEON::BI__builtin_neon_vsetq_lane_i64:
|
|
|
|
case NEON::BI__builtin_neon_vsetq_lane_f32:
|
|
|
|
case NEON::BI__builtin_neon_vsha1h_u32:
|
|
|
|
case NEON::BI__builtin_neon_vsha1cq_u32:
|
|
|
|
case NEON::BI__builtin_neon_vsha1pq_u32:
|
|
|
|
case NEON::BI__builtin_neon_vsha1mq_u32:
|
|
|
|
case ARM::BI_MoveToCoprocessor:
|
|
|
|
case ARM::BI_MoveToCoprocessor2:
|
|
|
|
return false;
|
|
|
|
}
|
|
|
|
return true;
|
|
|
|
}
|
|
|
|
|
2014-12-04 12:52:37 +08:00
|
|
|
Value *CodeGenFunction::EmitARMBuiltinExpr(unsigned BuiltinID,
|
|
|
|
const CallExpr *E) {
|
|
|
|
if (auto Hint = GetValueForARMHint(BuiltinID))
|
|
|
|
return Hint;
|
2014-05-02 14:53:57 +08:00
|
|
|
|
2014-12-18 01:52:30 +08:00
|
|
|
if (BuiltinID == ARM::BI__emit) {
|
|
|
|
bool IsThumb = getTarget().getTriple().getArch() == llvm::Triple::thumb;
|
|
|
|
llvm::FunctionType *FTy =
|
|
|
|
llvm::FunctionType::get(VoidTy, /*Variadic=*/false);
|
|
|
|
|
|
|
|
APSInt Value;
|
|
|
|
if (!E->getArg(0)->EvaluateAsInt(Value, CGM.getContext()))
|
|
|
|
llvm_unreachable("Sema will ensure that the parameter is constant");
|
|
|
|
|
|
|
|
uint64_t ZExtValue = Value.zextOrTrunc(IsThumb ? 16 : 32).getZExtValue();
|
|
|
|
|
|
|
|
llvm::InlineAsm *Emit =
|
|
|
|
IsThumb ? InlineAsm::get(FTy, ".inst.n 0x" + utohexstr(ZExtValue), "",
|
|
|
|
/*SideEffects=*/true)
|
|
|
|
: InlineAsm::get(FTy, ".inst 0x" + utohexstr(ZExtValue), "",
|
|
|
|
/*SideEffects=*/true);
|
|
|
|
|
2015-07-15 01:27:39 +08:00
|
|
|
return Builder.CreateCall(Emit);
|
2014-12-18 01:52:30 +08:00
|
|
|
}
|
|
|
|
|
2014-08-26 20:48:06 +08:00
|
|
|
if (BuiltinID == ARM::BI__builtin_arm_dbg) {
|
|
|
|
Value *Option = EmitScalarExpr(E->getArg(0));
|
|
|
|
return Builder.CreateCall(CGM.getIntrinsic(Intrinsic::arm_dbg), Option);
|
|
|
|
}
|
|
|
|
|
2014-08-14 03:18:14 +08:00
|
|
|
if (BuiltinID == ARM::BI__builtin_arm_prefetch) {
|
|
|
|
Value *Address = EmitScalarExpr(E->getArg(0));
|
|
|
|
Value *RW = EmitScalarExpr(E->getArg(1));
|
|
|
|
Value *IsData = EmitScalarExpr(E->getArg(2));
|
|
|
|
|
|
|
|
// Locality is not supported on ARM target
|
|
|
|
Value *Locality = llvm::ConstantInt::get(Int32Ty, 3);
|
|
|
|
|
|
|
|
Value *F = CGM.getIntrinsic(Intrinsic::prefetch);
|
2015-05-19 06:14:03 +08:00
|
|
|
return Builder.CreateCall(F, {Address, RW, Locality, IsData});
|
2014-08-14 03:18:14 +08:00
|
|
|
}
|
|
|
|
|
2014-06-17 05:55:58 +08:00
|
|
|
if (BuiltinID == ARM::BI__builtin_arm_rbit) {
|
|
|
|
return Builder.CreateCall(CGM.getIntrinsic(Intrinsic::arm_rbit),
|
|
|
|
EmitScalarExpr(E->getArg(0)),
|
|
|
|
"rbit");
|
|
|
|
}
|
|
|
|
|
2010-06-09 11:48:40 +08:00
|
|
|
if (BuiltinID == ARM::BI__clear_cache) {
|
2013-05-14 20:45:47 +08:00
|
|
|
assert(E->getNumArgs() == 2 && "__clear_cache takes 2 arguments");
|
2010-06-08 01:26:50 +08:00
|
|
|
const FunctionDecl *FD = E->getDirectCallee();
|
2015-07-28 23:40:11 +08:00
|
|
|
Value *Ops[2];
|
2013-05-14 20:45:47 +08:00
|
|
|
for (unsigned i = 0; i < 2; i++)
|
2015-07-28 23:40:11 +08:00
|
|
|
Ops[i] = EmitScalarExpr(E->getArg(i));
|
2011-07-18 12:24:23 +08:00
|
|
|
llvm::Type *Ty = CGM.getTypes().ConvertType(FD->getType());
|
|
|
|
llvm::FunctionType *FTy = cast<llvm::FunctionType>(Ty);
|
2011-07-23 18:55:15 +08:00
|
|
|
StringRef Name = FD->getName();
|
2013-03-01 03:01:20 +08:00
|
|
|
return EmitNounwindRuntimeCall(CGM.CreateRuntimeFunction(FTy, Name), Ops);
|
2010-03-04 03:03:45 +08:00
|
|
|
}
|
2010-06-09 11:48:40 +08:00
|
|
|
|
2013-07-16 17:47:53 +08:00
|
|
|
if (BuiltinID == ARM::BI__builtin_arm_ldrexd ||
|
2014-07-02 20:56:02 +08:00
|
|
|
((BuiltinID == ARM::BI__builtin_arm_ldrex ||
|
|
|
|
BuiltinID == ARM::BI__builtin_arm_ldaex) &&
|
2014-07-06 04:10:05 +08:00
|
|
|
getContext().getTypeSize(E->getType()) == 64) ||
|
|
|
|
BuiltinID == ARM::BI__ldrexd) {
|
|
|
|
Function *F;
|
|
|
|
|
|
|
|
switch (BuiltinID) {
|
|
|
|
default: llvm_unreachable("unexpected builtin");
|
|
|
|
case ARM::BI__builtin_arm_ldaex:
|
|
|
|
F = CGM.getIntrinsic(Intrinsic::arm_ldaexd);
|
|
|
|
break;
|
|
|
|
case ARM::BI__builtin_arm_ldrexd:
|
|
|
|
case ARM::BI__builtin_arm_ldrex:
|
|
|
|
case ARM::BI__ldrexd:
|
|
|
|
F = CGM.getIntrinsic(Intrinsic::arm_ldrexd);
|
|
|
|
break;
|
|
|
|
}
|
2011-05-28 12:11:33 +08:00
|
|
|
|
|
|
|
Value *LdPtr = EmitScalarExpr(E->getArg(0));
|
2013-07-16 17:47:53 +08:00
|
|
|
Value *Val = Builder.CreateCall(F, Builder.CreateBitCast(LdPtr, Int8PtrTy),
|
|
|
|
"ldrexd");
|
2011-05-28 12:11:33 +08:00
|
|
|
|
|
|
|
Value *Val0 = Builder.CreateExtractValue(Val, 1);
|
|
|
|
Value *Val1 = Builder.CreateExtractValue(Val, 0);
|
|
|
|
Val0 = Builder.CreateZExt(Val0, Int64Ty);
|
|
|
|
Val1 = Builder.CreateZExt(Val1, Int64Ty);
|
|
|
|
|
|
|
|
Value *ShiftCst = llvm::ConstantInt::get(Int64Ty, 32);
|
|
|
|
Val = Builder.CreateShl(Val0, ShiftCst, "shl", true /* nuw */);
|
2013-07-16 17:47:53 +08:00
|
|
|
Val = Builder.CreateOr(Val, Val1);
|
|
|
|
return Builder.CreateBitCast(Val, ConvertType(E->getType()));
|
2011-05-28 12:11:33 +08:00
|
|
|
}
|
|
|
|
|
2014-07-02 20:56:02 +08:00
|
|
|
if (BuiltinID == ARM::BI__builtin_arm_ldrex ||
|
|
|
|
BuiltinID == ARM::BI__builtin_arm_ldaex) {
|
2013-07-16 17:47:53 +08:00
|
|
|
Value *LoadAddr = EmitScalarExpr(E->getArg(0));
|
|
|
|
|
|
|
|
QualType Ty = E->getType();
|
|
|
|
llvm::Type *RealResTy = ConvertType(Ty);
|
|
|
|
llvm::Type *IntResTy = llvm::IntegerType::get(getLLVMContext(),
|
|
|
|
getContext().getTypeSize(Ty));
|
|
|
|
LoadAddr = Builder.CreateBitCast(LoadAddr, IntResTy->getPointerTo());
|
|
|
|
|
2014-07-02 20:56:02 +08:00
|
|
|
Function *F = CGM.getIntrinsic(BuiltinID == ARM::BI__builtin_arm_ldaex
|
|
|
|
? Intrinsic::arm_ldaex
|
|
|
|
: Intrinsic::arm_ldrex,
|
|
|
|
LoadAddr->getType());
|
2013-07-16 17:47:53 +08:00
|
|
|
Value *Val = Builder.CreateCall(F, LoadAddr, "ldrex");
|
|
|
|
|
|
|
|
if (RealResTy->isPointerTy())
|
|
|
|
return Builder.CreateIntToPtr(Val, RealResTy);
|
|
|
|
else {
|
|
|
|
Val = Builder.CreateTruncOrBitCast(Val, IntResTy);
|
|
|
|
return Builder.CreateBitCast(Val, RealResTy);
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
if (BuiltinID == ARM::BI__builtin_arm_strexd ||
|
2014-07-02 20:56:02 +08:00
|
|
|
((BuiltinID == ARM::BI__builtin_arm_stlex ||
|
|
|
|
BuiltinID == ARM::BI__builtin_arm_strex) &&
|
2013-07-16 17:47:53 +08:00
|
|
|
getContext().getTypeSize(E->getArg(0)->getType()) == 64)) {
|
2014-07-02 20:56:02 +08:00
|
|
|
Function *F = CGM.getIntrinsic(BuiltinID == ARM::BI__builtin_arm_stlex
|
|
|
|
? Intrinsic::arm_stlexd
|
|
|
|
: Intrinsic::arm_strexd);
|
2014-12-02 06:02:27 +08:00
|
|
|
llvm::Type *STy = llvm::StructType::get(Int32Ty, Int32Ty, nullptr);
|
2011-05-28 12:11:33 +08:00
|
|
|
|
Compute and preserve alignment more faithfully in IR-generation.
Introduce an Address type to bundle a pointer value with an
alignment. Introduce APIs on CGBuilderTy to work with Address
values. Change core APIs on CGF/CGM to traffic in Address where
appropriate. Require alignments to be non-zero. Update a ton
of code to compute and propagate alignment information.
As part of this, I've promoted CGBuiltin's EmitPointerWithAlignment
helper function to CGF and made use of it in a number of places in
the expression emitter.
The end result is that we should now be significantly more correct
when performing operations on objects that are locally known to
be under-aligned. Since alignment is not reliably tracked in the
type system, there are inherent limits to this, but at least we
are no longer confused by standard operations like derived-to-base
conversions and array-to-pointer decay. I've also fixed a large
number of bugs where we were applying the complete-object alignment
to a pointer instead of the non-virtual alignment, although most of
these were hidden by the very conservative approach we took with
member alignment.
Also, because IRGen now reliably asserts on zero alignments, we
should no longer be subject to an absurd but frustrating recurring
bug where an incomplete type would report a zero alignment and then
we'd naively do a alignmentAtOffset on it and emit code using an
alignment equal to the largest power-of-two factor of the offset.
We should also now be emitting much more aggressive alignment
attributes in the presence of over-alignment. In particular,
field access now uses alignmentAtOffset instead of min.
Several times in this patch, I had to change the existing
code-generation pattern in order to more effectively use
the Address APIs. For the most part, this seems to be a strict
improvement, like doing pointer arithmetic with GEPs instead of
ptrtoint. That said, I've tried very hard to not change semantics,
but it is likely that I've failed in a few places, for which I
apologize.
ABIArgInfo now always carries the assumed alignment of indirect and
indirect byval arguments. In order to cut down on what was already
a dauntingly large patch, I changed the code to never set align
attributes in the IR on non-byval indirect arguments. That is,
we still generate code which assumes that indirect arguments have
the given alignment, but we don't express this information to the
backend except where it's semantically required (i.e. on byvals).
This is likely a minor regression for those targets that did provide
this information, but it'll be trivial to add it back in a later
patch.
I partially punted on applying this work to CGBuiltin. Please
do not add more uses of the CreateDefaultAligned{Load,Store}
APIs; they will be going away eventually.
llvm-svn: 246985
2015-09-08 16:05:57 +08:00
|
|
|
Address Tmp = CreateMemTemp(E->getArg(0)->getType());
|
2011-05-28 12:11:33 +08:00
|
|
|
Value *Val = EmitScalarExpr(E->getArg(0));
|
|
|
|
Builder.CreateStore(Val, Tmp);
|
|
|
|
|
Compute and preserve alignment more faithfully in IR-generation.
Introduce an Address type to bundle a pointer value with an
alignment. Introduce APIs on CGBuilderTy to work with Address
values. Change core APIs on CGF/CGM to traffic in Address where
appropriate. Require alignments to be non-zero. Update a ton
of code to compute and propagate alignment information.
As part of this, I've promoted CGBuiltin's EmitPointerWithAlignment
helper function to CGF and made use of it in a number of places in
the expression emitter.
The end result is that we should now be significantly more correct
when performing operations on objects that are locally known to
be under-aligned. Since alignment is not reliably tracked in the
type system, there are inherent limits to this, but at least we
are no longer confused by standard operations like derived-to-base
conversions and array-to-pointer decay. I've also fixed a large
number of bugs where we were applying the complete-object alignment
to a pointer instead of the non-virtual alignment, although most of
these were hidden by the very conservative approach we took with
member alignment.
Also, because IRGen now reliably asserts on zero alignments, we
should no longer be subject to an absurd but frustrating recurring
bug where an incomplete type would report a zero alignment and then
we'd naively do a alignmentAtOffset on it and emit code using an
alignment equal to the largest power-of-two factor of the offset.
We should also now be emitting much more aggressive alignment
attributes in the presence of over-alignment. In particular,
field access now uses alignmentAtOffset instead of min.
Several times in this patch, I had to change the existing
code-generation pattern in order to more effectively use
the Address APIs. For the most part, this seems to be a strict
improvement, like doing pointer arithmetic with GEPs instead of
ptrtoint. That said, I've tried very hard to not change semantics,
but it is likely that I've failed in a few places, for which I
apologize.
ABIArgInfo now always carries the assumed alignment of indirect and
indirect byval arguments. In order to cut down on what was already
a dauntingly large patch, I changed the code to never set align
attributes in the IR on non-byval indirect arguments. That is,
we still generate code which assumes that indirect arguments have
the given alignment, but we don't express this information to the
backend except where it's semantically required (i.e. on byvals).
This is likely a minor regression for those targets that did provide
this information, but it'll be trivial to add it back in a later
patch.
I partially punted on applying this work to CGBuiltin. Please
do not add more uses of the CreateDefaultAligned{Load,Store}
APIs; they will be going away eventually.
llvm-svn: 246985
2015-09-08 16:05:57 +08:00
|
|
|
Address LdPtr = Builder.CreateBitCast(Tmp,llvm::PointerType::getUnqual(STy));
|
2011-05-28 12:11:33 +08:00
|
|
|
Val = Builder.CreateLoad(LdPtr);
|
|
|
|
|
|
|
|
Value *Arg0 = Builder.CreateExtractValue(Val, 0);
|
|
|
|
Value *Arg1 = Builder.CreateExtractValue(Val, 1);
|
2013-07-16 17:47:53 +08:00
|
|
|
Value *StPtr = Builder.CreateBitCast(EmitScalarExpr(E->getArg(1)), Int8PtrTy);
|
2015-05-19 06:14:03 +08:00
|
|
|
return Builder.CreateCall(F, {Arg0, Arg1, StPtr}, "strexd");
|
2011-05-28 12:11:33 +08:00
|
|
|
}
|
|
|
|
|
2014-07-02 20:56:02 +08:00
|
|
|
if (BuiltinID == ARM::BI__builtin_arm_strex ||
|
|
|
|
BuiltinID == ARM::BI__builtin_arm_stlex) {
|
2013-07-16 17:47:53 +08:00
|
|
|
Value *StoreVal = EmitScalarExpr(E->getArg(0));
|
|
|
|
Value *StoreAddr = EmitScalarExpr(E->getArg(1));
|
|
|
|
|
|
|
|
QualType Ty = E->getArg(0)->getType();
|
|
|
|
llvm::Type *StoreTy = llvm::IntegerType::get(getLLVMContext(),
|
|
|
|
getContext().getTypeSize(Ty));
|
|
|
|
StoreAddr = Builder.CreateBitCast(StoreAddr, StoreTy->getPointerTo());
|
|
|
|
|
|
|
|
if (StoreVal->getType()->isPointerTy())
|
|
|
|
StoreVal = Builder.CreatePtrToInt(StoreVal, Int32Ty);
|
|
|
|
else {
|
|
|
|
StoreVal = Builder.CreateBitCast(StoreVal, StoreTy);
|
|
|
|
StoreVal = Builder.CreateZExtOrBitCast(StoreVal, Int32Ty);
|
|
|
|
}
|
|
|
|
|
2014-07-02 20:56:02 +08:00
|
|
|
Function *F = CGM.getIntrinsic(BuiltinID == ARM::BI__builtin_arm_stlex
|
|
|
|
? Intrinsic::arm_stlex
|
|
|
|
: Intrinsic::arm_strex,
|
|
|
|
StoreAddr->getType());
|
2015-05-19 06:14:03 +08:00
|
|
|
return Builder.CreateCall(F, {StoreVal, StoreAddr}, "strex");
|
2013-07-16 17:47:53 +08:00
|
|
|
}
|
|
|
|
|
|
|
|
if (BuiltinID == ARM::BI__builtin_arm_clrex) {
|
|
|
|
Function *F = CGM.getIntrinsic(Intrinsic::arm_clrex);
|
2015-07-15 01:27:39 +08:00
|
|
|
return Builder.CreateCall(F);
|
2013-07-16 17:47:53 +08:00
|
|
|
}
|
|
|
|
|
2013-09-18 18:07:09 +08:00
|
|
|
// CRC32
|
|
|
|
Intrinsic::ID CRCIntrinsicID = Intrinsic::not_intrinsic;
|
|
|
|
switch (BuiltinID) {
|
|
|
|
case ARM::BI__builtin_arm_crc32b:
|
|
|
|
CRCIntrinsicID = Intrinsic::arm_crc32b; break;
|
|
|
|
case ARM::BI__builtin_arm_crc32cb:
|
|
|
|
CRCIntrinsicID = Intrinsic::arm_crc32cb; break;
|
|
|
|
case ARM::BI__builtin_arm_crc32h:
|
|
|
|
CRCIntrinsicID = Intrinsic::arm_crc32h; break;
|
|
|
|
case ARM::BI__builtin_arm_crc32ch:
|
|
|
|
CRCIntrinsicID = Intrinsic::arm_crc32ch; break;
|
|
|
|
case ARM::BI__builtin_arm_crc32w:
|
|
|
|
case ARM::BI__builtin_arm_crc32d:
|
|
|
|
CRCIntrinsicID = Intrinsic::arm_crc32w; break;
|
|
|
|
case ARM::BI__builtin_arm_crc32cw:
|
|
|
|
case ARM::BI__builtin_arm_crc32cd:
|
|
|
|
CRCIntrinsicID = Intrinsic::arm_crc32cw; break;
|
|
|
|
}
|
|
|
|
|
|
|
|
if (CRCIntrinsicID != Intrinsic::not_intrinsic) {
|
|
|
|
Value *Arg0 = EmitScalarExpr(E->getArg(0));
|
|
|
|
Value *Arg1 = EmitScalarExpr(E->getArg(1));
|
|
|
|
|
|
|
|
// crc32{c,}d intrinsics are implemnted as two calls to crc32{c,}w
|
|
|
|
// intrinsics, hence we need different codegen for these cases.
|
|
|
|
if (BuiltinID == ARM::BI__builtin_arm_crc32d ||
|
|
|
|
BuiltinID == ARM::BI__builtin_arm_crc32cd) {
|
|
|
|
Value *C1 = llvm::ConstantInt::get(Int64Ty, 32);
|
|
|
|
Value *Arg1a = Builder.CreateTruncOrBitCast(Arg1, Int32Ty);
|
|
|
|
Value *Arg1b = Builder.CreateLShr(Arg1, C1);
|
|
|
|
Arg1b = Builder.CreateTruncOrBitCast(Arg1b, Int32Ty);
|
|
|
|
|
|
|
|
Function *F = CGM.getIntrinsic(CRCIntrinsicID);
|
2015-05-19 06:14:03 +08:00
|
|
|
Value *Res = Builder.CreateCall(F, {Arg0, Arg1a});
|
|
|
|
return Builder.CreateCall(F, {Res, Arg1b});
|
2013-09-18 18:07:09 +08:00
|
|
|
} else {
|
|
|
|
Arg1 = Builder.CreateZExtOrBitCast(Arg1, Int32Ty);
|
|
|
|
|
|
|
|
Function *F = CGM.getIntrinsic(CRCIntrinsicID);
|
2015-05-19 06:14:03 +08:00
|
|
|
return Builder.CreateCall(F, {Arg0, Arg1});
|
2013-09-18 18:07:09 +08:00
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2015-06-16 01:51:01 +08:00
|
|
|
if (BuiltinID == ARM::BI__builtin_arm_rsr ||
|
|
|
|
BuiltinID == ARM::BI__builtin_arm_rsr64 ||
|
|
|
|
BuiltinID == ARM::BI__builtin_arm_rsrp ||
|
|
|
|
BuiltinID == ARM::BI__builtin_arm_wsr ||
|
|
|
|
BuiltinID == ARM::BI__builtin_arm_wsr64 ||
|
|
|
|
BuiltinID == ARM::BI__builtin_arm_wsrp) {
|
|
|
|
|
|
|
|
bool IsRead = BuiltinID == ARM::BI__builtin_arm_rsr ||
|
|
|
|
BuiltinID == ARM::BI__builtin_arm_rsr64 ||
|
|
|
|
BuiltinID == ARM::BI__builtin_arm_rsrp;
|
|
|
|
|
|
|
|
bool IsPointerBuiltin = BuiltinID == ARM::BI__builtin_arm_rsrp ||
|
|
|
|
BuiltinID == ARM::BI__builtin_arm_wsrp;
|
|
|
|
|
|
|
|
bool Is64Bit = BuiltinID == ARM::BI__builtin_arm_rsr64 ||
|
|
|
|
BuiltinID == ARM::BI__builtin_arm_wsr64;
|
|
|
|
|
|
|
|
llvm::Type *ValueType;
|
|
|
|
llvm::Type *RegisterType;
|
|
|
|
if (IsPointerBuiltin) {
|
|
|
|
ValueType = VoidPtrTy;
|
|
|
|
RegisterType = Int32Ty;
|
|
|
|
} else if (Is64Bit) {
|
|
|
|
ValueType = RegisterType = Int64Ty;
|
|
|
|
} else {
|
|
|
|
ValueType = RegisterType = Int32Ty;
|
|
|
|
}
|
|
|
|
|
|
|
|
return EmitSpecialRegisterBuiltin(*this, E, RegisterType, ValueType, IsRead);
|
|
|
|
}
|
|
|
|
|
2015-06-04 09:43:41 +08:00
|
|
|
// Find out if any arguments are required to be integer constant
|
|
|
|
// expressions.
|
|
|
|
unsigned ICEArguments = 0;
|
|
|
|
ASTContext::GetBuiltinTypeError Error;
|
|
|
|
getContext().GetBuiltinType(BuiltinID, Error, &ICEArguments);
|
|
|
|
assert(Error == ASTContext::GE_None && "Should not codegen an error");
|
|
|
|
|
Compute and preserve alignment more faithfully in IR-generation.
Introduce an Address type to bundle a pointer value with an
alignment. Introduce APIs on CGBuilderTy to work with Address
values. Change core APIs on CGF/CGM to traffic in Address where
appropriate. Require alignments to be non-zero. Update a ton
of code to compute and propagate alignment information.
As part of this, I've promoted CGBuiltin's EmitPointerWithAlignment
helper function to CGF and made use of it in a number of places in
the expression emitter.
The end result is that we should now be significantly more correct
when performing operations on objects that are locally known to
be under-aligned. Since alignment is not reliably tracked in the
type system, there are inherent limits to this, but at least we
are no longer confused by standard operations like derived-to-base
conversions and array-to-pointer decay. I've also fixed a large
number of bugs where we were applying the complete-object alignment
to a pointer instead of the non-virtual alignment, although most of
these were hidden by the very conservative approach we took with
member alignment.
Also, because IRGen now reliably asserts on zero alignments, we
should no longer be subject to an absurd but frustrating recurring
bug where an incomplete type would report a zero alignment and then
we'd naively do a alignmentAtOffset on it and emit code using an
alignment equal to the largest power-of-two factor of the offset.
We should also now be emitting much more aggressive alignment
attributes in the presence of over-alignment. In particular,
field access now uses alignmentAtOffset instead of min.
Several times in this patch, I had to change the existing
code-generation pattern in order to more effectively use
the Address APIs. For the most part, this seems to be a strict
improvement, like doing pointer arithmetic with GEPs instead of
ptrtoint. That said, I've tried very hard to not change semantics,
but it is likely that I've failed in a few places, for which I
apologize.
ABIArgInfo now always carries the assumed alignment of indirect and
indirect byval arguments. In order to cut down on what was already
a dauntingly large patch, I changed the code to never set align
attributes in the IR on non-byval indirect arguments. That is,
we still generate code which assumes that indirect arguments have
the given alignment, but we don't express this information to the
backend except where it's semantically required (i.e. on byvals).
This is likely a minor regression for those targets that did provide
this information, but it'll be trivial to add it back in a later
patch.
I partially punted on applying this work to CGBuiltin. Please
do not add more uses of the CreateDefaultAligned{Load,Store}
APIs; they will be going away eventually.
llvm-svn: 246985
2015-09-08 16:05:57 +08:00
|
|
|
auto getAlignmentValue32 = [&](Address addr) -> Value* {
|
|
|
|
return Builder.getInt32(addr.getAlignment().getQuantity());
|
|
|
|
};
|
|
|
|
|
|
|
|
Address PtrOp0 = Address::invalid();
|
|
|
|
Address PtrOp1 = Address::invalid();
|
2011-07-23 18:55:15 +08:00
|
|
|
SmallVector<Value*, 4> Ops;
|
2015-06-24 14:05:20 +08:00
|
|
|
bool HasExtraArg = HasExtraNeonArgument(BuiltinID);
|
|
|
|
unsigned NumArgs = E->getNumArgs() - (HasExtraArg ? 1 : 0);
|
|
|
|
for (unsigned i = 0, e = NumArgs; i != e; i++) {
|
2012-08-23 11:10:17 +08:00
|
|
|
if (i == 0) {
|
|
|
|
switch (BuiltinID) {
|
2014-01-30 22:47:51 +08:00
|
|
|
case NEON::BI__builtin_neon_vld1_v:
|
|
|
|
case NEON::BI__builtin_neon_vld1q_v:
|
|
|
|
case NEON::BI__builtin_neon_vld1q_lane_v:
|
|
|
|
case NEON::BI__builtin_neon_vld1_lane_v:
|
|
|
|
case NEON::BI__builtin_neon_vld1_dup_v:
|
|
|
|
case NEON::BI__builtin_neon_vld1q_dup_v:
|
|
|
|
case NEON::BI__builtin_neon_vst1_v:
|
|
|
|
case NEON::BI__builtin_neon_vst1q_v:
|
|
|
|
case NEON::BI__builtin_neon_vst1q_lane_v:
|
|
|
|
case NEON::BI__builtin_neon_vst1_lane_v:
|
|
|
|
case NEON::BI__builtin_neon_vst2_v:
|
|
|
|
case NEON::BI__builtin_neon_vst2q_v:
|
|
|
|
case NEON::BI__builtin_neon_vst2_lane_v:
|
|
|
|
case NEON::BI__builtin_neon_vst2q_lane_v:
|
|
|
|
case NEON::BI__builtin_neon_vst3_v:
|
|
|
|
case NEON::BI__builtin_neon_vst3q_v:
|
|
|
|
case NEON::BI__builtin_neon_vst3_lane_v:
|
|
|
|
case NEON::BI__builtin_neon_vst3q_lane_v:
|
|
|
|
case NEON::BI__builtin_neon_vst4_v:
|
|
|
|
case NEON::BI__builtin_neon_vst4q_v:
|
|
|
|
case NEON::BI__builtin_neon_vst4_lane_v:
|
|
|
|
case NEON::BI__builtin_neon_vst4q_lane_v:
|
2012-08-23 11:10:17 +08:00
|
|
|
// Get the alignment for the argument in addition to the value;
|
|
|
|
// we'll use it later.
|
Compute and preserve alignment more faithfully in IR-generation.
Introduce an Address type to bundle a pointer value with an
alignment. Introduce APIs on CGBuilderTy to work with Address
values. Change core APIs on CGF/CGM to traffic in Address where
appropriate. Require alignments to be non-zero. Update a ton
of code to compute and propagate alignment information.
As part of this, I've promoted CGBuiltin's EmitPointerWithAlignment
helper function to CGF and made use of it in a number of places in
the expression emitter.
The end result is that we should now be significantly more correct
when performing operations on objects that are locally known to
be under-aligned. Since alignment is not reliably tracked in the
type system, there are inherent limits to this, but at least we
are no longer confused by standard operations like derived-to-base
conversions and array-to-pointer decay. I've also fixed a large
number of bugs where we were applying the complete-object alignment
to a pointer instead of the non-virtual alignment, although most of
these were hidden by the very conservative approach we took with
member alignment.
Also, because IRGen now reliably asserts on zero alignments, we
should no longer be subject to an absurd but frustrating recurring
bug where an incomplete type would report a zero alignment and then
we'd naively do a alignmentAtOffset on it and emit code using an
alignment equal to the largest power-of-two factor of the offset.
We should also now be emitting much more aggressive alignment
attributes in the presence of over-alignment. In particular,
field access now uses alignmentAtOffset instead of min.
Several times in this patch, I had to change the existing
code-generation pattern in order to more effectively use
the Address APIs. For the most part, this seems to be a strict
improvement, like doing pointer arithmetic with GEPs instead of
ptrtoint. That said, I've tried very hard to not change semantics,
but it is likely that I've failed in a few places, for which I
apologize.
ABIArgInfo now always carries the assumed alignment of indirect and
indirect byval arguments. In order to cut down on what was already
a dauntingly large patch, I changed the code to never set align
attributes in the IR on non-byval indirect arguments. That is,
we still generate code which assumes that indirect arguments have
the given alignment, but we don't express this information to the
backend except where it's semantically required (i.e. on byvals).
This is likely a minor regression for those targets that did provide
this information, but it'll be trivial to add it back in a later
patch.
I partially punted on applying this work to CGBuiltin. Please
do not add more uses of the CreateDefaultAligned{Load,Store}
APIs; they will be going away eventually.
llvm-svn: 246985
2015-09-08 16:05:57 +08:00
|
|
|
PtrOp0 = EmitPointerWithAlignment(E->getArg(0));
|
|
|
|
Ops.push_back(PtrOp0.getPointer());
|
2012-08-23 11:10:17 +08:00
|
|
|
continue;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
if (i == 1) {
|
|
|
|
switch (BuiltinID) {
|
2014-01-30 22:47:51 +08:00
|
|
|
case NEON::BI__builtin_neon_vld2_v:
|
|
|
|
case NEON::BI__builtin_neon_vld2q_v:
|
|
|
|
case NEON::BI__builtin_neon_vld3_v:
|
|
|
|
case NEON::BI__builtin_neon_vld3q_v:
|
|
|
|
case NEON::BI__builtin_neon_vld4_v:
|
|
|
|
case NEON::BI__builtin_neon_vld4q_v:
|
|
|
|
case NEON::BI__builtin_neon_vld2_lane_v:
|
|
|
|
case NEON::BI__builtin_neon_vld2q_lane_v:
|
|
|
|
case NEON::BI__builtin_neon_vld3_lane_v:
|
|
|
|
case NEON::BI__builtin_neon_vld3q_lane_v:
|
|
|
|
case NEON::BI__builtin_neon_vld4_lane_v:
|
|
|
|
case NEON::BI__builtin_neon_vld4q_lane_v:
|
|
|
|
case NEON::BI__builtin_neon_vld2_dup_v:
|
|
|
|
case NEON::BI__builtin_neon_vld3_dup_v:
|
|
|
|
case NEON::BI__builtin_neon_vld4_dup_v:
|
2012-08-23 11:10:17 +08:00
|
|
|
// Get the alignment for the argument in addition to the value;
|
|
|
|
// we'll use it later.
|
Compute and preserve alignment more faithfully in IR-generation.
Introduce an Address type to bundle a pointer value with an
alignment. Introduce APIs on CGBuilderTy to work with Address
values. Change core APIs on CGF/CGM to traffic in Address where
appropriate. Require alignments to be non-zero. Update a ton
of code to compute and propagate alignment information.
As part of this, I've promoted CGBuiltin's EmitPointerWithAlignment
helper function to CGF and made use of it in a number of places in
the expression emitter.
The end result is that we should now be significantly more correct
when performing operations on objects that are locally known to
be under-aligned. Since alignment is not reliably tracked in the
type system, there are inherent limits to this, but at least we
are no longer confused by standard operations like derived-to-base
conversions and array-to-pointer decay. I've also fixed a large
number of bugs where we were applying the complete-object alignment
to a pointer instead of the non-virtual alignment, although most of
these were hidden by the very conservative approach we took with
member alignment.
Also, because IRGen now reliably asserts on zero alignments, we
should no longer be subject to an absurd but frustrating recurring
bug where an incomplete type would report a zero alignment and then
we'd naively do a alignmentAtOffset on it and emit code using an
alignment equal to the largest power-of-two factor of the offset.
We should also now be emitting much more aggressive alignment
attributes in the presence of over-alignment. In particular,
field access now uses alignmentAtOffset instead of min.
Several times in this patch, I had to change the existing
code-generation pattern in order to more effectively use
the Address APIs. For the most part, this seems to be a strict
improvement, like doing pointer arithmetic with GEPs instead of
ptrtoint. That said, I've tried very hard to not change semantics,
but it is likely that I've failed in a few places, for which I
apologize.
ABIArgInfo now always carries the assumed alignment of indirect and
indirect byval arguments. In order to cut down on what was already
a dauntingly large patch, I changed the code to never set align
attributes in the IR on non-byval indirect arguments. That is,
we still generate code which assumes that indirect arguments have
the given alignment, but we don't express this information to the
backend except where it's semantically required (i.e. on byvals).
This is likely a minor regression for those targets that did provide
this information, but it'll be trivial to add it back in a later
patch.
I partially punted on applying this work to CGBuiltin. Please
do not add more uses of the CreateDefaultAligned{Load,Store}
APIs; they will be going away eventually.
llvm-svn: 246985
2015-09-08 16:05:57 +08:00
|
|
|
PtrOp1 = EmitPointerWithAlignment(E->getArg(1));
|
|
|
|
Ops.push_back(PtrOp1.getPointer());
|
2012-08-23 11:10:17 +08:00
|
|
|
continue;
|
|
|
|
}
|
|
|
|
}
|
2015-06-04 09:43:41 +08:00
|
|
|
|
|
|
|
if ((ICEArguments & (1 << i)) == 0) {
|
|
|
|
Ops.push_back(EmitScalarExpr(E->getArg(i)));
|
|
|
|
} else {
|
|
|
|
// If this is required to be a constant, constant fold it so that we know
|
|
|
|
// that the generated intrinsic gets a ConstantInt.
|
|
|
|
llvm::APSInt Result;
|
|
|
|
bool IsConst = E->getArg(i)->isIntegerConstantExpr(Result, getContext());
|
|
|
|
assert(IsConst && "Constant arg isn't actually constant?"); (void)IsConst;
|
|
|
|
Ops.push_back(llvm::ConstantInt::get(getLLVMContext(), Result));
|
|
|
|
}
|
2012-08-23 11:10:17 +08:00
|
|
|
}
|
2010-06-09 11:48:40 +08:00
|
|
|
|
2011-08-13 13:03:46 +08:00
|
|
|
switch (BuiltinID) {
|
|
|
|
default: break;
|
2015-06-24 14:05:20 +08:00
|
|
|
|
2014-01-30 22:47:51 +08:00
|
|
|
case NEON::BI__builtin_neon_vget_lane_i8:
|
|
|
|
case NEON::BI__builtin_neon_vget_lane_i16:
|
|
|
|
case NEON::BI__builtin_neon_vget_lane_i32:
|
|
|
|
case NEON::BI__builtin_neon_vget_lane_i64:
|
|
|
|
case NEON::BI__builtin_neon_vget_lane_f32:
|
|
|
|
case NEON::BI__builtin_neon_vgetq_lane_i8:
|
|
|
|
case NEON::BI__builtin_neon_vgetq_lane_i16:
|
|
|
|
case NEON::BI__builtin_neon_vgetq_lane_i32:
|
|
|
|
case NEON::BI__builtin_neon_vgetq_lane_i64:
|
|
|
|
case NEON::BI__builtin_neon_vgetq_lane_f32:
|
2015-06-24 14:05:20 +08:00
|
|
|
return Builder.CreateExtractElement(Ops[0], Ops[1], "vget_lane");
|
|
|
|
|
2014-01-30 22:47:51 +08:00
|
|
|
case NEON::BI__builtin_neon_vset_lane_i8:
|
|
|
|
case NEON::BI__builtin_neon_vset_lane_i16:
|
|
|
|
case NEON::BI__builtin_neon_vset_lane_i32:
|
|
|
|
case NEON::BI__builtin_neon_vset_lane_i64:
|
|
|
|
case NEON::BI__builtin_neon_vset_lane_f32:
|
|
|
|
case NEON::BI__builtin_neon_vsetq_lane_i8:
|
|
|
|
case NEON::BI__builtin_neon_vsetq_lane_i16:
|
|
|
|
case NEON::BI__builtin_neon_vsetq_lane_i32:
|
|
|
|
case NEON::BI__builtin_neon_vsetq_lane_i64:
|
|
|
|
case NEON::BI__builtin_neon_vsetq_lane_f32:
|
2011-08-13 13:03:46 +08:00
|
|
|
return Builder.CreateInsertElement(Ops[1], Ops[0], Ops[2], "vset_lane");
|
2014-02-04 01:28:04 +08:00
|
|
|
|
|
|
|
case NEON::BI__builtin_neon_vsha1h_u32:
|
|
|
|
return EmitNeonCall(CGM.getIntrinsic(Intrinsic::arm_neon_sha1h), Ops,
|
|
|
|
"vsha1h");
|
|
|
|
case NEON::BI__builtin_neon_vsha1cq_u32:
|
|
|
|
return EmitNeonCall(CGM.getIntrinsic(Intrinsic::arm_neon_sha1c), Ops,
|
|
|
|
"vsha1h");
|
|
|
|
case NEON::BI__builtin_neon_vsha1pq_u32:
|
|
|
|
return EmitNeonCall(CGM.getIntrinsic(Intrinsic::arm_neon_sha1p), Ops,
|
|
|
|
"vsha1h");
|
|
|
|
case NEON::BI__builtin_neon_vsha1mq_u32:
|
|
|
|
return EmitNeonCall(CGM.getIntrinsic(Intrinsic::arm_neon_sha1m), Ops,
|
|
|
|
"vsha1h");
|
2015-06-24 14:05:20 +08:00
|
|
|
|
|
|
|
// The ARM _MoveToCoprocessor builtins put the input register value as
|
|
|
|
// the first argument, but the LLVM intrinsic expects it as the third one.
|
|
|
|
case ARM::BI_MoveToCoprocessor:
|
|
|
|
case ARM::BI_MoveToCoprocessor2: {
|
|
|
|
Function *F = CGM.getIntrinsic(BuiltinID == ARM::BI_MoveToCoprocessor ?
|
|
|
|
Intrinsic::arm_mcr : Intrinsic::arm_mcr2);
|
|
|
|
return Builder.CreateCall(F, {Ops[1], Ops[2], Ops[0],
|
|
|
|
Ops[3], Ops[4], Ops[5]});
|
|
|
|
}
|
2011-08-13 13:03:46 +08:00
|
|
|
}
|
|
|
|
|
|
|
|
// Get the last argument, which specifies the vector type.
|
2015-06-24 14:05:20 +08:00
|
|
|
assert(HasExtraArg);
|
2010-06-09 11:48:40 +08:00
|
|
|
llvm::APSInt Result;
|
|
|
|
const Expr *Arg = E->getArg(E->getNumArgs()-1);
|
|
|
|
if (!Arg->isIntegerConstantExpr(Result, getContext()))
|
2014-05-21 13:09:00 +08:00
|
|
|
return nullptr;
|
2010-06-09 11:48:40 +08:00
|
|
|
|
2010-08-04 05:32:34 +08:00
|
|
|
if (BuiltinID == ARM::BI__builtin_arm_vcvtr_f ||
|
|
|
|
BuiltinID == ARM::BI__builtin_arm_vcvtr_d) {
|
|
|
|
// Determine the overloaded type of this builtin.
|
2011-07-10 01:41:47 +08:00
|
|
|
llvm::Type *Ty;
|
2010-08-04 05:32:34 +08:00
|
|
|
if (BuiltinID == ARM::BI__builtin_arm_vcvtr_f)
|
2012-02-07 08:39:47 +08:00
|
|
|
Ty = FloatTy;
|
2010-08-04 05:32:34 +08:00
|
|
|
else
|
2012-02-07 08:39:47 +08:00
|
|
|
Ty = DoubleTy;
|
2012-09-21 08:18:27 +08:00
|
|
|
|
2010-08-04 05:32:34 +08:00
|
|
|
// Determine whether this is an unsigned conversion or not.
|
|
|
|
bool usgn = Result.getZExtValue() == 1;
|
|
|
|
unsigned Int = usgn ? Intrinsic::arm_vcvtru : Intrinsic::arm_vcvtr;
|
|
|
|
|
|
|
|
// Call the appropriate intrinsic.
|
2011-07-15 01:45:50 +08:00
|
|
|
Function *F = CGM.getIntrinsic(Int, Ty);
|
2011-07-15 16:37:34 +08:00
|
|
|
return Builder.CreateCall(F, Ops, "vcvtr");
|
2010-08-04 05:32:34 +08:00
|
|
|
}
|
2012-09-21 08:18:27 +08:00
|
|
|
|
2010-08-04 05:32:34 +08:00
|
|
|
// Determine the type of this overloaded NEON intrinsic.
|
2011-11-08 09:16:11 +08:00
|
|
|
NeonTypeFlags Type(Result.getZExtValue());
|
|
|
|
bool usgn = Type.isUnsigned();
|
2010-12-04 01:10:22 +08:00
|
|
|
bool rightShift = false;
|
2010-06-09 11:48:40 +08:00
|
|
|
|
2012-02-07 08:39:47 +08:00
|
|
|
llvm::VectorType *VTy = GetNeonType(this, Type);
|
2011-07-10 01:41:47 +08:00
|
|
|
llvm::Type *Ty = VTy;
|
2010-06-09 11:48:40 +08:00
|
|
|
if (!Ty)
|
2014-05-21 13:09:00 +08:00
|
|
|
return nullptr;
|
2010-06-09 11:48:40 +08:00
|
|
|
|
2014-01-30 22:47:57 +08:00
|
|
|
// Many NEON builtins have identical semantics and uses in ARM and
|
|
|
|
// AArch64. Emit these in a single function.
|
2014-08-27 14:28:36 +08:00
|
|
|
auto IntrinsicMap = makeArrayRef(ARMSIMDIntrinsicMap);
|
2014-02-21 19:57:24 +08:00
|
|
|
const NeonIntrinsicInfo *Builtin = findNeonIntrinsicInMap(
|
|
|
|
IntrinsicMap, BuiltinID, NEONSIMDIntrinsicsProvenSorted);
|
|
|
|
if (Builtin)
|
|
|
|
return EmitCommonNeonBuiltinExpr(
|
|
|
|
Builtin->BuiltinID, Builtin->LLVMIntrinsic, Builtin->AltLLVMIntrinsic,
|
Compute and preserve alignment more faithfully in IR-generation.
Introduce an Address type to bundle a pointer value with an
alignment. Introduce APIs on CGBuilderTy to work with Address
values. Change core APIs on CGF/CGM to traffic in Address where
appropriate. Require alignments to be non-zero. Update a ton
of code to compute and propagate alignment information.
As part of this, I've promoted CGBuiltin's EmitPointerWithAlignment
helper function to CGF and made use of it in a number of places in
the expression emitter.
The end result is that we should now be significantly more correct
when performing operations on objects that are locally known to
be under-aligned. Since alignment is not reliably tracked in the
type system, there are inherent limits to this, but at least we
are no longer confused by standard operations like derived-to-base
conversions and array-to-pointer decay. I've also fixed a large
number of bugs where we were applying the complete-object alignment
to a pointer instead of the non-virtual alignment, although most of
these were hidden by the very conservative approach we took with
member alignment.
Also, because IRGen now reliably asserts on zero alignments, we
should no longer be subject to an absurd but frustrating recurring
bug where an incomplete type would report a zero alignment and then
we'd naively do a alignmentAtOffset on it and emit code using an
alignment equal to the largest power-of-two factor of the offset.
We should also now be emitting much more aggressive alignment
attributes in the presence of over-alignment. In particular,
field access now uses alignmentAtOffset instead of min.
Several times in this patch, I had to change the existing
code-generation pattern in order to more effectively use
the Address APIs. For the most part, this seems to be a strict
improvement, like doing pointer arithmetic with GEPs instead of
ptrtoint. That said, I've tried very hard to not change semantics,
but it is likely that I've failed in a few places, for which I
apologize.
ABIArgInfo now always carries the assumed alignment of indirect and
indirect byval arguments. In order to cut down on what was already
a dauntingly large patch, I changed the code to never set align
attributes in the IR on non-byval indirect arguments. That is,
we still generate code which assumes that indirect arguments have
the given alignment, but we don't express this information to the
backend except where it's semantically required (i.e. on byvals).
This is likely a minor regression for those targets that did provide
this information, but it'll be trivial to add it back in a later
patch.
I partially punted on applying this work to CGBuiltin. Please
do not add more uses of the CreateDefaultAligned{Load,Store}
APIs; they will be going away eventually.
llvm-svn: 246985
2015-09-08 16:05:57 +08:00
|
|
|
Builtin->NameHint, Builtin->TypeModifier, E, Ops, PtrOp0, PtrOp1);
|
2014-01-30 22:47:57 +08:00
|
|
|
|
2010-06-09 11:48:40 +08:00
|
|
|
unsigned Int;
|
|
|
|
switch (BuiltinID) {
|
2014-05-21 13:09:00 +08:00
|
|
|
default: return nullptr;
|
2014-01-30 22:47:51 +08:00
|
|
|
case NEON::BI__builtin_neon_vld1q_lane_v:
|
2012-08-15 01:27:04 +08:00
|
|
|
// Handle 64-bit integer elements as a special case. Use shuffles of
|
|
|
|
// one-element vectors to avoid poor code for i64 in the backend.
|
|
|
|
if (VTy->getElementType()->isIntegerTy(64)) {
|
|
|
|
// Extract the other lane.
|
|
|
|
Ops[1] = Builder.CreateBitCast(Ops[1], Ty);
|
2015-07-28 23:40:11 +08:00
|
|
|
uint32_t Lane = cast<ConstantInt>(Ops[2])->getZExtValue();
|
2012-08-15 01:27:04 +08:00
|
|
|
Value *SV = llvm::ConstantVector::get(ConstantInt::get(Int32Ty, 1-Lane));
|
|
|
|
Ops[1] = Builder.CreateShuffleVector(Ops[1], Ops[1], SV);
|
|
|
|
// Load the value as a one-element vector.
|
|
|
|
Ty = llvm::VectorType::get(VTy->getElementType(), 1);
|
2015-09-30 18:56:56 +08:00
|
|
|
llvm::Type *Tys[] = {Ty, Int8PtrTy};
|
|
|
|
Function *F = CGM.getIntrinsic(Intrinsic::arm_neon_vld1, Tys);
|
Compute and preserve alignment more faithfully in IR-generation.
Introduce an Address type to bundle a pointer value with an
alignment. Introduce APIs on CGBuilderTy to work with Address
values. Change core APIs on CGF/CGM to traffic in Address where
appropriate. Require alignments to be non-zero. Update a ton
of code to compute and propagate alignment information.
As part of this, I've promoted CGBuiltin's EmitPointerWithAlignment
helper function to CGF and made use of it in a number of places in
the expression emitter.
The end result is that we should now be significantly more correct
when performing operations on objects that are locally known to
be under-aligned. Since alignment is not reliably tracked in the
type system, there are inherent limits to this, but at least we
are no longer confused by standard operations like derived-to-base
conversions and array-to-pointer decay. I've also fixed a large
number of bugs where we were applying the complete-object alignment
to a pointer instead of the non-virtual alignment, although most of
these were hidden by the very conservative approach we took with
member alignment.
Also, because IRGen now reliably asserts on zero alignments, we
should no longer be subject to an absurd but frustrating recurring
bug where an incomplete type would report a zero alignment and then
we'd naively do a alignmentAtOffset on it and emit code using an
alignment equal to the largest power-of-two factor of the offset.
We should also now be emitting much more aggressive alignment
attributes in the presence of over-alignment. In particular,
field access now uses alignmentAtOffset instead of min.
Several times in this patch, I had to change the existing
code-generation pattern in order to more effectively use
the Address APIs. For the most part, this seems to be a strict
improvement, like doing pointer arithmetic with GEPs instead of
ptrtoint. That said, I've tried very hard to not change semantics,
but it is likely that I've failed in a few places, for which I
apologize.
ABIArgInfo now always carries the assumed alignment of indirect and
indirect byval arguments. In order to cut down on what was already
a dauntingly large patch, I changed the code to never set align
attributes in the IR on non-byval indirect arguments. That is,
we still generate code which assumes that indirect arguments have
the given alignment, but we don't express this information to the
backend except where it's semantically required (i.e. on byvals).
This is likely a minor regression for those targets that did provide
this information, but it'll be trivial to add it back in a later
patch.
I partially punted on applying this work to CGBuiltin. Please
do not add more uses of the CreateDefaultAligned{Load,Store}
APIs; they will be going away eventually.
llvm-svn: 246985
2015-09-08 16:05:57 +08:00
|
|
|
Value *Align = getAlignmentValue32(PtrOp0);
|
2015-05-19 06:14:03 +08:00
|
|
|
Value *Ld = Builder.CreateCall(F, {Ops[0], Align});
|
2012-08-15 01:27:04 +08:00
|
|
|
// Combine them.
|
2015-07-28 23:40:11 +08:00
|
|
|
uint32_t Indices[] = {1 - Lane, Lane};
|
|
|
|
SV = llvm::ConstantDataVector::get(getLLVMContext(), Indices);
|
2012-08-15 01:27:04 +08:00
|
|
|
return Builder.CreateShuffleVector(Ops[1], Ld, SV, "vld1q_lane");
|
|
|
|
}
|
|
|
|
// fall through
|
2014-01-30 22:47:51 +08:00
|
|
|
case NEON::BI__builtin_neon_vld1_lane_v: {
|
2010-06-21 07:05:28 +08:00
|
|
|
Ops[1] = Builder.CreateBitCast(Ops[1], Ty);
|
2015-09-09 09:37:18 +08:00
|
|
|
PtrOp0 = Builder.CreateElementBitCast(PtrOp0, VTy->getElementType());
|
Compute and preserve alignment more faithfully in IR-generation.
Introduce an Address type to bundle a pointer value with an
alignment. Introduce APIs on CGBuilderTy to work with Address
values. Change core APIs on CGF/CGM to traffic in Address where
appropriate. Require alignments to be non-zero. Update a ton
of code to compute and propagate alignment information.
As part of this, I've promoted CGBuiltin's EmitPointerWithAlignment
helper function to CGF and made use of it in a number of places in
the expression emitter.
The end result is that we should now be significantly more correct
when performing operations on objects that are locally known to
be under-aligned. Since alignment is not reliably tracked in the
type system, there are inherent limits to this, but at least we
are no longer confused by standard operations like derived-to-base
conversions and array-to-pointer decay. I've also fixed a large
number of bugs where we were applying the complete-object alignment
to a pointer instead of the non-virtual alignment, although most of
these were hidden by the very conservative approach we took with
member alignment.
Also, because IRGen now reliably asserts on zero alignments, we
should no longer be subject to an absurd but frustrating recurring
bug where an incomplete type would report a zero alignment and then
we'd naively do a alignmentAtOffset on it and emit code using an
alignment equal to the largest power-of-two factor of the offset.
We should also now be emitting much more aggressive alignment
attributes in the presence of over-alignment. In particular,
field access now uses alignmentAtOffset instead of min.
Several times in this patch, I had to change the existing
code-generation pattern in order to more effectively use
the Address APIs. For the most part, this seems to be a strict
improvement, like doing pointer arithmetic with GEPs instead of
ptrtoint. That said, I've tried very hard to not change semantics,
but it is likely that I've failed in a few places, for which I
apologize.
ABIArgInfo now always carries the assumed alignment of indirect and
indirect byval arguments. In order to cut down on what was already
a dauntingly large patch, I changed the code to never set align
attributes in the IR on non-byval indirect arguments. That is,
we still generate code which assumes that indirect arguments have
the given alignment, but we don't express this information to the
backend except where it's semantically required (i.e. on byvals).
This is likely a minor regression for those targets that did provide
this information, but it'll be trivial to add it back in a later
patch.
I partially punted on applying this work to CGBuiltin. Please
do not add more uses of the CreateDefaultAligned{Load,Store}
APIs; they will be going away eventually.
llvm-svn: 246985
2015-09-08 16:05:57 +08:00
|
|
|
Value *Ld = Builder.CreateLoad(PtrOp0);
|
2012-02-05 07:58:08 +08:00
|
|
|
return Builder.CreateInsertElement(Ops[1], Ld, Ops[2], "vld1_lane");
|
|
|
|
}
|
2014-01-30 22:47:51 +08:00
|
|
|
case NEON::BI__builtin_neon_vld2_dup_v:
|
|
|
|
case NEON::BI__builtin_neon_vld3_dup_v:
|
|
|
|
case NEON::BI__builtin_neon_vld4_dup_v: {
|
2010-12-11 06:54:58 +08:00
|
|
|
// Handle 64-bit elements as a special-case. There is no "dup" needed.
|
|
|
|
if (VTy->getElementType()->getPrimitiveSizeInBits() == 64) {
|
|
|
|
switch (BuiltinID) {
|
2014-01-30 22:47:51 +08:00
|
|
|
case NEON::BI__builtin_neon_vld2_dup_v:
|
2012-09-21 08:18:27 +08:00
|
|
|
Int = Intrinsic::arm_neon_vld2;
|
2010-12-11 06:54:58 +08:00
|
|
|
break;
|
2014-01-30 22:47:51 +08:00
|
|
|
case NEON::BI__builtin_neon_vld3_dup_v:
|
2012-09-21 08:18:27 +08:00
|
|
|
Int = Intrinsic::arm_neon_vld3;
|
2010-12-11 06:54:58 +08:00
|
|
|
break;
|
2014-01-30 22:47:51 +08:00
|
|
|
case NEON::BI__builtin_neon_vld4_dup_v:
|
2012-09-21 08:18:27 +08:00
|
|
|
Int = Intrinsic::arm_neon_vld4;
|
2010-12-11 06:54:58 +08:00
|
|
|
break;
|
2011-09-23 13:06:16 +08:00
|
|
|
default: llvm_unreachable("unknown vld_dup intrinsic?");
|
2010-12-11 06:54:58 +08:00
|
|
|
}
|
2015-09-30 18:56:56 +08:00
|
|
|
llvm::Type *Tys[] = {Ty, Int8PtrTy};
|
|
|
|
Function *F = CGM.getIntrinsic(Int, Tys);
|
Compute and preserve alignment more faithfully in IR-generation.
Introduce an Address type to bundle a pointer value with an
alignment. Introduce APIs on CGBuilderTy to work with Address
values. Change core APIs on CGF/CGM to traffic in Address where
appropriate. Require alignments to be non-zero. Update a ton
of code to compute and propagate alignment information.
As part of this, I've promoted CGBuiltin's EmitPointerWithAlignment
helper function to CGF and made use of it in a number of places in
the expression emitter.
The end result is that we should now be significantly more correct
when performing operations on objects that are locally known to
be under-aligned. Since alignment is not reliably tracked in the
type system, there are inherent limits to this, but at least we
are no longer confused by standard operations like derived-to-base
conversions and array-to-pointer decay. I've also fixed a large
number of bugs where we were applying the complete-object alignment
to a pointer instead of the non-virtual alignment, although most of
these were hidden by the very conservative approach we took with
member alignment.
Also, because IRGen now reliably asserts on zero alignments, we
should no longer be subject to an absurd but frustrating recurring
bug where an incomplete type would report a zero alignment and then
we'd naively do a alignmentAtOffset on it and emit code using an
alignment equal to the largest power-of-two factor of the offset.
We should also now be emitting much more aggressive alignment
attributes in the presence of over-alignment. In particular,
field access now uses alignmentAtOffset instead of min.
Several times in this patch, I had to change the existing
code-generation pattern in order to more effectively use
the Address APIs. For the most part, this seems to be a strict
improvement, like doing pointer arithmetic with GEPs instead of
ptrtoint. That said, I've tried very hard to not change semantics,
but it is likely that I've failed in a few places, for which I
apologize.
ABIArgInfo now always carries the assumed alignment of indirect and
indirect byval arguments. In order to cut down on what was already
a dauntingly large patch, I changed the code to never set align
attributes in the IR on non-byval indirect arguments. That is,
we still generate code which assumes that indirect arguments have
the given alignment, but we don't express this information to the
backend except where it's semantically required (i.e. on byvals).
This is likely a minor regression for those targets that did provide
this information, but it'll be trivial to add it back in a later
patch.
I partially punted on applying this work to CGBuiltin. Please
do not add more uses of the CreateDefaultAligned{Load,Store}
APIs; they will be going away eventually.
llvm-svn: 246985
2015-09-08 16:05:57 +08:00
|
|
|
llvm::Value *Align = getAlignmentValue32(PtrOp1);
|
2015-05-19 06:14:03 +08:00
|
|
|
Ops[1] = Builder.CreateCall(F, {Ops[1], Align}, "vld_dup");
|
2010-12-11 06:54:58 +08:00
|
|
|
Ty = llvm::PointerType::getUnqual(Ops[1]->getType());
|
|
|
|
Ops[0] = Builder.CreateBitCast(Ops[0], Ty);
|
Compute and preserve alignment more faithfully in IR-generation.
Introduce an Address type to bundle a pointer value with an
alignment. Introduce APIs on CGBuilderTy to work with Address
values. Change core APIs on CGF/CGM to traffic in Address where
appropriate. Require alignments to be non-zero. Update a ton
of code to compute and propagate alignment information.
As part of this, I've promoted CGBuiltin's EmitPointerWithAlignment
helper function to CGF and made use of it in a number of places in
the expression emitter.
The end result is that we should now be significantly more correct
when performing operations on objects that are locally known to
be under-aligned. Since alignment is not reliably tracked in the
type system, there are inherent limits to this, but at least we
are no longer confused by standard operations like derived-to-base
conversions and array-to-pointer decay. I've also fixed a large
number of bugs where we were applying the complete-object alignment
to a pointer instead of the non-virtual alignment, although most of
these were hidden by the very conservative approach we took with
member alignment.
Also, because IRGen now reliably asserts on zero alignments, we
should no longer be subject to an absurd but frustrating recurring
bug where an incomplete type would report a zero alignment and then
we'd naively do a alignmentAtOffset on it and emit code using an
alignment equal to the largest power-of-two factor of the offset.
We should also now be emitting much more aggressive alignment
attributes in the presence of over-alignment. In particular,
field access now uses alignmentAtOffset instead of min.
Several times in this patch, I had to change the existing
code-generation pattern in order to more effectively use
the Address APIs. For the most part, this seems to be a strict
improvement, like doing pointer arithmetic with GEPs instead of
ptrtoint. That said, I've tried very hard to not change semantics,
but it is likely that I've failed in a few places, for which I
apologize.
ABIArgInfo now always carries the assumed alignment of indirect and
indirect byval arguments. In order to cut down on what was already
a dauntingly large patch, I changed the code to never set align
attributes in the IR on non-byval indirect arguments. That is,
we still generate code which assumes that indirect arguments have
the given alignment, but we don't express this information to the
backend except where it's semantically required (i.e. on byvals).
This is likely a minor regression for those targets that did provide
this information, but it'll be trivial to add it back in a later
patch.
I partially punted on applying this work to CGBuiltin. Please
do not add more uses of the CreateDefaultAligned{Load,Store}
APIs; they will be going away eventually.
llvm-svn: 246985
2015-09-08 16:05:57 +08:00
|
|
|
return Builder.CreateDefaultAlignedStore(Ops[1], Ops[0]);
|
2010-12-11 06:54:58 +08:00
|
|
|
}
|
2010-06-21 07:05:28 +08:00
|
|
|
switch (BuiltinID) {
|
2014-01-30 22:47:51 +08:00
|
|
|
case NEON::BI__builtin_neon_vld2_dup_v:
|
2012-09-21 08:18:27 +08:00
|
|
|
Int = Intrinsic::arm_neon_vld2lane;
|
2010-06-21 07:05:28 +08:00
|
|
|
break;
|
2014-01-30 22:47:51 +08:00
|
|
|
case NEON::BI__builtin_neon_vld3_dup_v:
|
2012-09-21 08:18:27 +08:00
|
|
|
Int = Intrinsic::arm_neon_vld3lane;
|
2010-06-21 07:05:28 +08:00
|
|
|
break;
|
2014-01-30 22:47:51 +08:00
|
|
|
case NEON::BI__builtin_neon_vld4_dup_v:
|
2012-09-21 08:18:27 +08:00
|
|
|
Int = Intrinsic::arm_neon_vld4lane;
|
2010-06-21 07:05:28 +08:00
|
|
|
break;
|
2011-09-23 13:06:16 +08:00
|
|
|
default: llvm_unreachable("unknown vld_dup intrinsic?");
|
2010-06-21 07:05:28 +08:00
|
|
|
}
|
2015-09-30 18:56:56 +08:00
|
|
|
llvm::Type *Tys[] = {Ty, Int8PtrTy};
|
|
|
|
Function *F = CGM.getIntrinsic(Int, Tys);
|
2011-07-18 12:24:23 +08:00
|
|
|
llvm::StructType *STy = cast<llvm::StructType>(F->getReturnType());
|
2012-09-21 08:18:27 +08:00
|
|
|
|
2010-06-21 07:05:28 +08:00
|
|
|
SmallVector<Value*, 6> Args;
|
|
|
|
Args.push_back(Ops[1]);
|
|
|
|
Args.append(STy->getNumElements(), UndefValue::get(Ty));
|
|
|
|
|
2010-06-27 15:15:29 +08:00
|
|
|
llvm::Constant *CI = ConstantInt::get(Int32Ty, 0);
|
2010-06-21 07:05:28 +08:00
|
|
|
Args.push_back(CI);
|
Compute and preserve alignment more faithfully in IR-generation.
Introduce an Address type to bundle a pointer value with an
alignment. Introduce APIs on CGBuilderTy to work with Address
values. Change core APIs on CGF/CGM to traffic in Address where
appropriate. Require alignments to be non-zero. Update a ton
of code to compute and propagate alignment information.
As part of this, I've promoted CGBuiltin's EmitPointerWithAlignment
helper function to CGF and made use of it in a number of places in
the expression emitter.
The end result is that we should now be significantly more correct
when performing operations on objects that are locally known to
be under-aligned. Since alignment is not reliably tracked in the
type system, there are inherent limits to this, but at least we
are no longer confused by standard operations like derived-to-base
conversions and array-to-pointer decay. I've also fixed a large
number of bugs where we were applying the complete-object alignment
to a pointer instead of the non-virtual alignment, although most of
these were hidden by the very conservative approach we took with
member alignment.
Also, because IRGen now reliably asserts on zero alignments, we
should no longer be subject to an absurd but frustrating recurring
bug where an incomplete type would report a zero alignment and then
we'd naively do a alignmentAtOffset on it and emit code using an
alignment equal to the largest power-of-two factor of the offset.
We should also now be emitting much more aggressive alignment
attributes in the presence of over-alignment. In particular,
field access now uses alignmentAtOffset instead of min.
Several times in this patch, I had to change the existing
code-generation pattern in order to more effectively use
the Address APIs. For the most part, this seems to be a strict
improvement, like doing pointer arithmetic with GEPs instead of
ptrtoint. That said, I've tried very hard to not change semantics,
but it is likely that I've failed in a few places, for which I
apologize.
ABIArgInfo now always carries the assumed alignment of indirect and
indirect byval arguments. In order to cut down on what was already
a dauntingly large patch, I changed the code to never set align
attributes in the IR on non-byval indirect arguments. That is,
we still generate code which assumes that indirect arguments have
the given alignment, but we don't express this information to the
backend except where it's semantically required (i.e. on byvals).
This is likely a minor regression for those targets that did provide
this information, but it'll be trivial to add it back in a later
patch.
I partially punted on applying this work to CGBuiltin. Please
do not add more uses of the CreateDefaultAligned{Load,Store}
APIs; they will be going away eventually.
llvm-svn: 246985
2015-09-08 16:05:57 +08:00
|
|
|
Args.push_back(getAlignmentValue32(PtrOp1));
|
2012-09-21 08:18:27 +08:00
|
|
|
|
2011-07-15 16:37:34 +08:00
|
|
|
Ops[1] = Builder.CreateCall(F, Args, "vld_dup");
|
2010-06-21 07:05:28 +08:00
|
|
|
// splat lane 0 to all elts in each vector of the result.
|
|
|
|
for (unsigned i = 0, e = STy->getNumElements(); i != e; ++i) {
|
|
|
|
Value *Val = Builder.CreateExtractValue(Ops[1], i);
|
|
|
|
Value *Elt = Builder.CreateBitCast(Val, Ty);
|
|
|
|
Elt = EmitNeonSplat(Elt, CI);
|
|
|
|
Elt = Builder.CreateBitCast(Elt, Val->getType());
|
|
|
|
Ops[1] = Builder.CreateInsertValue(Ops[1], Elt, i);
|
|
|
|
}
|
|
|
|
Ty = llvm::PointerType::getUnqual(Ops[1]->getType());
|
|
|
|
Ops[0] = Builder.CreateBitCast(Ops[0], Ty);
|
Compute and preserve alignment more faithfully in IR-generation.
Introduce an Address type to bundle a pointer value with an
alignment. Introduce APIs on CGBuilderTy to work with Address
values. Change core APIs on CGF/CGM to traffic in Address where
appropriate. Require alignments to be non-zero. Update a ton
of code to compute and propagate alignment information.
As part of this, I've promoted CGBuiltin's EmitPointerWithAlignment
helper function to CGF and made use of it in a number of places in
the expression emitter.
The end result is that we should now be significantly more correct
when performing operations on objects that are locally known to
be under-aligned. Since alignment is not reliably tracked in the
type system, there are inherent limits to this, but at least we
are no longer confused by standard operations like derived-to-base
conversions and array-to-pointer decay. I've also fixed a large
number of bugs where we were applying the complete-object alignment
to a pointer instead of the non-virtual alignment, although most of
these were hidden by the very conservative approach we took with
member alignment.
Also, because IRGen now reliably asserts on zero alignments, we
should no longer be subject to an absurd but frustrating recurring
bug where an incomplete type would report a zero alignment and then
we'd naively do a alignmentAtOffset on it and emit code using an
alignment equal to the largest power-of-two factor of the offset.
We should also now be emitting much more aggressive alignment
attributes in the presence of over-alignment. In particular,
field access now uses alignmentAtOffset instead of min.
Several times in this patch, I had to change the existing
code-generation pattern in order to more effectively use
the Address APIs. For the most part, this seems to be a strict
improvement, like doing pointer arithmetic with GEPs instead of
ptrtoint. That said, I've tried very hard to not change semantics,
but it is likely that I've failed in a few places, for which I
apologize.
ABIArgInfo now always carries the assumed alignment of indirect and
indirect byval arguments. In order to cut down on what was already
a dauntingly large patch, I changed the code to never set align
attributes in the IR on non-byval indirect arguments. That is,
we still generate code which assumes that indirect arguments have
the given alignment, but we don't express this information to the
backend except where it's semantically required (i.e. on byvals).
This is likely a minor regression for those targets that did provide
this information, but it'll be trivial to add it back in a later
patch.
I partially punted on applying this work to CGBuiltin. Please
do not add more uses of the CreateDefaultAligned{Load,Store}
APIs; they will be going away eventually.
llvm-svn: 246985
2015-09-08 16:05:57 +08:00
|
|
|
return Builder.CreateDefaultAlignedStore(Ops[1], Ops[0]);
|
2010-06-21 07:05:28 +08:00
|
|
|
}
|
2014-01-30 22:47:51 +08:00
|
|
|
case NEON::BI__builtin_neon_vqrshrn_n_v:
|
2012-09-21 08:18:27 +08:00
|
|
|
Int =
|
|
|
|
usgn ? Intrinsic::arm_neon_vqrshiftnu : Intrinsic::arm_neon_vqrshiftns;
|
2011-07-15 01:45:50 +08:00
|
|
|
return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vqrshrn_n",
|
2010-06-14 13:21:25 +08:00
|
|
|
1, true);
|
2014-01-30 22:47:51 +08:00
|
|
|
case NEON::BI__builtin_neon_vqrshrun_n_v:
|
2011-07-15 01:45:50 +08:00
|
|
|
return EmitNeonCall(CGM.getIntrinsic(Intrinsic::arm_neon_vqrshiftnsu, Ty),
|
2010-12-09 06:37:56 +08:00
|
|
|
Ops, "vqrshrun_n", 1, true);
|
2014-01-30 22:47:51 +08:00
|
|
|
case NEON::BI__builtin_neon_vqshrn_n_v:
|
2010-06-14 13:21:25 +08:00
|
|
|
Int = usgn ? Intrinsic::arm_neon_vqshiftnu : Intrinsic::arm_neon_vqshiftns;
|
2011-07-15 01:45:50 +08:00
|
|
|
return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vqshrn_n",
|
2010-06-14 13:21:25 +08:00
|
|
|
1, true);
|
2014-01-30 22:47:51 +08:00
|
|
|
case NEON::BI__builtin_neon_vqshrun_n_v:
|
2011-07-15 01:45:50 +08:00
|
|
|
return EmitNeonCall(CGM.getIntrinsic(Intrinsic::arm_neon_vqshiftnsu, Ty),
|
2010-12-09 06:37:56 +08:00
|
|
|
Ops, "vqshrun_n", 1, true);
|
2014-01-30 22:47:51 +08:00
|
|
|
case NEON::BI__builtin_neon_vrecpe_v:
|
|
|
|
case NEON::BI__builtin_neon_vrecpeq_v:
|
2011-07-15 01:45:50 +08:00
|
|
|
return EmitNeonCall(CGM.getIntrinsic(Intrinsic::arm_neon_vrecpe, Ty),
|
2010-06-12 06:57:12 +08:00
|
|
|
Ops, "vrecpe");
|
2014-01-30 22:47:51 +08:00
|
|
|
case NEON::BI__builtin_neon_vrshrn_n_v:
|
2011-07-15 01:45:50 +08:00
|
|
|
return EmitNeonCall(CGM.getIntrinsic(Intrinsic::arm_neon_vrshiftn, Ty),
|
2010-12-09 06:37:56 +08:00
|
|
|
Ops, "vrshrn_n", 1, true);
|
2014-01-30 22:47:51 +08:00
|
|
|
case NEON::BI__builtin_neon_vrsra_n_v:
|
|
|
|
case NEON::BI__builtin_neon_vrsraq_n_v:
|
2010-06-12 14:06:07 +08:00
|
|
|
Ops[0] = Builder.CreateBitCast(Ops[0], Ty);
|
|
|
|
Ops[1] = Builder.CreateBitCast(Ops[1], Ty);
|
|
|
|
Ops[2] = EmitNeonShiftVector(Ops[2], Ty, true);
|
|
|
|
Int = usgn ? Intrinsic::arm_neon_vrshiftu : Intrinsic::arm_neon_vrshifts;
|
2015-05-19 06:14:03 +08:00
|
|
|
Ops[1] = Builder.CreateCall(CGM.getIntrinsic(Int, Ty), {Ops[1], Ops[2]});
|
2010-06-12 14:06:07 +08:00
|
|
|
return Builder.CreateAdd(Ops[0], Ops[1], "vrsra_n");
|
2014-01-30 22:47:51 +08:00
|
|
|
case NEON::BI__builtin_neon_vsri_n_v:
|
|
|
|
case NEON::BI__builtin_neon_vsriq_n_v:
|
2010-12-04 01:10:22 +08:00
|
|
|
rightShift = true;
|
2014-01-30 22:47:51 +08:00
|
|
|
case NEON::BI__builtin_neon_vsli_n_v:
|
|
|
|
case NEON::BI__builtin_neon_vsliq_n_v:
|
2010-12-04 01:10:22 +08:00
|
|
|
Ops[2] = EmitNeonShiftVector(Ops[2], Ty, rightShift);
|
2011-07-15 01:45:50 +08:00
|
|
|
return EmitNeonCall(CGM.getIntrinsic(Intrinsic::arm_neon_vshiftins, Ty),
|
2010-06-12 06:57:12 +08:00
|
|
|
Ops, "vsli_n");
|
2014-01-30 22:47:51 +08:00
|
|
|
case NEON::BI__builtin_neon_vsra_n_v:
|
|
|
|
case NEON::BI__builtin_neon_vsraq_n_v:
|
2010-06-12 06:57:12 +08:00
|
|
|
Ops[0] = Builder.CreateBitCast(Ops[0], Ty);
|
2013-10-04 21:13:15 +08:00
|
|
|
Ops[1] = EmitNeonRShiftImm(Ops[1], Ops[2], Ty, usgn, "vsra_n");
|
2010-06-12 06:57:12 +08:00
|
|
|
return Builder.CreateAdd(Ops[0], Ops[1]);
|
2014-01-30 22:47:51 +08:00
|
|
|
case NEON::BI__builtin_neon_vst1q_lane_v:
|
2012-08-15 01:27:04 +08:00
|
|
|
// Handle 64-bit integer elements as a special case. Use a shuffle to get
|
|
|
|
// a one-element vector and avoid poor code for i64 in the backend.
|
|
|
|
if (VTy->getElementType()->isIntegerTy(64)) {
|
|
|
|
Ops[1] = Builder.CreateBitCast(Ops[1], Ty);
|
|
|
|
Value *SV = llvm::ConstantVector::get(cast<llvm::Constant>(Ops[2]));
|
|
|
|
Ops[1] = Builder.CreateShuffleVector(Ops[1], Ops[1], SV);
|
Compute and preserve alignment more faithfully in IR-generation.
Introduce an Address type to bundle a pointer value with an
alignment. Introduce APIs on CGBuilderTy to work with Address
values. Change core APIs on CGF/CGM to traffic in Address where
appropriate. Require alignments to be non-zero. Update a ton
of code to compute and propagate alignment information.
As part of this, I've promoted CGBuiltin's EmitPointerWithAlignment
helper function to CGF and made use of it in a number of places in
the expression emitter.
The end result is that we should now be significantly more correct
when performing operations on objects that are locally known to
be under-aligned. Since alignment is not reliably tracked in the
type system, there are inherent limits to this, but at least we
are no longer confused by standard operations like derived-to-base
conversions and array-to-pointer decay. I've also fixed a large
number of bugs where we were applying the complete-object alignment
to a pointer instead of the non-virtual alignment, although most of
these were hidden by the very conservative approach we took with
member alignment.
Also, because IRGen now reliably asserts on zero alignments, we
should no longer be subject to an absurd but frustrating recurring
bug where an incomplete type would report a zero alignment and then
we'd naively do a alignmentAtOffset on it and emit code using an
alignment equal to the largest power-of-two factor of the offset.
We should also now be emitting much more aggressive alignment
attributes in the presence of over-alignment. In particular,
field access now uses alignmentAtOffset instead of min.
Several times in this patch, I had to change the existing
code-generation pattern in order to more effectively use
the Address APIs. For the most part, this seems to be a strict
improvement, like doing pointer arithmetic with GEPs instead of
ptrtoint. That said, I've tried very hard to not change semantics,
but it is likely that I've failed in a few places, for which I
apologize.
ABIArgInfo now always carries the assumed alignment of indirect and
indirect byval arguments. In order to cut down on what was already
a dauntingly large patch, I changed the code to never set align
attributes in the IR on non-byval indirect arguments. That is,
we still generate code which assumes that indirect arguments have
the given alignment, but we don't express this information to the
backend except where it's semantically required (i.e. on byvals).
This is likely a minor regression for those targets that did provide
this information, but it'll be trivial to add it back in a later
patch.
I partially punted on applying this work to CGBuiltin. Please
do not add more uses of the CreateDefaultAligned{Load,Store}
APIs; they will be going away eventually.
llvm-svn: 246985
2015-09-08 16:05:57 +08:00
|
|
|
Ops[2] = getAlignmentValue32(PtrOp0);
|
2015-09-30 18:56:56 +08:00
|
|
|
llvm::Type *Tys[] = {Int8PtrTy, Ops[1]->getType()};
|
2012-08-15 01:27:04 +08:00
|
|
|
return Builder.CreateCall(CGM.getIntrinsic(Intrinsic::arm_neon_vst1,
|
2015-09-30 18:56:56 +08:00
|
|
|
Tys), Ops);
|
2012-08-15 01:27:04 +08:00
|
|
|
}
|
|
|
|
// fall through
|
2014-01-30 22:47:51 +08:00
|
|
|
case NEON::BI__builtin_neon_vst1_lane_v: {
|
2010-06-12 06:57:12 +08:00
|
|
|
Ops[1] = Builder.CreateBitCast(Ops[1], Ty);
|
|
|
|
Ops[1] = Builder.CreateExtractElement(Ops[1], Ops[2]);
|
|
|
|
Ty = llvm::PointerType::getUnqual(Ops[1]->getType());
|
Compute and preserve alignment more faithfully in IR-generation.
Introduce an Address type to bundle a pointer value with an
alignment. Introduce APIs on CGBuilderTy to work with Address
values. Change core APIs on CGF/CGM to traffic in Address where
appropriate. Require alignments to be non-zero. Update a ton
of code to compute and propagate alignment information.
As part of this, I've promoted CGBuiltin's EmitPointerWithAlignment
helper function to CGF and made use of it in a number of places in
the expression emitter.
The end result is that we should now be significantly more correct
when performing operations on objects that are locally known to
be under-aligned. Since alignment is not reliably tracked in the
type system, there are inherent limits to this, but at least we
are no longer confused by standard operations like derived-to-base
conversions and array-to-pointer decay. I've also fixed a large
number of bugs where we were applying the complete-object alignment
to a pointer instead of the non-virtual alignment, although most of
these were hidden by the very conservative approach we took with
member alignment.
Also, because IRGen now reliably asserts on zero alignments, we
should no longer be subject to an absurd but frustrating recurring
bug where an incomplete type would report a zero alignment and then
we'd naively do a alignmentAtOffset on it and emit code using an
alignment equal to the largest power-of-two factor of the offset.
We should also now be emitting much more aggressive alignment
attributes in the presence of over-alignment. In particular,
field access now uses alignmentAtOffset instead of min.
Several times in this patch, I had to change the existing
code-generation pattern in order to more effectively use
the Address APIs. For the most part, this seems to be a strict
improvement, like doing pointer arithmetic with GEPs instead of
ptrtoint. That said, I've tried very hard to not change semantics,
but it is likely that I've failed in a few places, for which I
apologize.
ABIArgInfo now always carries the assumed alignment of indirect and
indirect byval arguments. In order to cut down on what was already
a dauntingly large patch, I changed the code to never set align
attributes in the IR on non-byval indirect arguments. That is,
we still generate code which assumes that indirect arguments have
the given alignment, but we don't express this information to the
backend except where it's semantically required (i.e. on byvals).
This is likely a minor regression for those targets that did provide
this information, but it'll be trivial to add it back in a later
patch.
I partially punted on applying this work to CGBuiltin. Please
do not add more uses of the CreateDefaultAligned{Load,Store}
APIs; they will be going away eventually.
llvm-svn: 246985
2015-09-08 16:05:57 +08:00
|
|
|
auto St = Builder.CreateStore(Ops[1], Builder.CreateBitCast(PtrOp0, Ty));
|
2012-02-05 07:58:08 +08:00
|
|
|
return St;
|
|
|
|
}
|
2014-01-30 22:47:51 +08:00
|
|
|
case NEON::BI__builtin_neon_vtbl1_v:
|
2010-06-09 09:10:23 +08:00
|
|
|
return EmitNeonCall(CGM.getIntrinsic(Intrinsic::arm_neon_vtbl1),
|
|
|
|
Ops, "vtbl1");
|
2014-01-30 22:47:51 +08:00
|
|
|
case NEON::BI__builtin_neon_vtbl2_v:
|
2010-06-09 09:10:23 +08:00
|
|
|
return EmitNeonCall(CGM.getIntrinsic(Intrinsic::arm_neon_vtbl2),
|
|
|
|
Ops, "vtbl2");
|
2014-01-30 22:47:51 +08:00
|
|
|
case NEON::BI__builtin_neon_vtbl3_v:
|
2010-06-09 09:10:23 +08:00
|
|
|
return EmitNeonCall(CGM.getIntrinsic(Intrinsic::arm_neon_vtbl3),
|
|
|
|
Ops, "vtbl3");
|
2014-01-30 22:47:51 +08:00
|
|
|
case NEON::BI__builtin_neon_vtbl4_v:
|
2010-06-09 09:10:23 +08:00
|
|
|
return EmitNeonCall(CGM.getIntrinsic(Intrinsic::arm_neon_vtbl4),
|
|
|
|
Ops, "vtbl4");
|
2014-01-30 22:47:51 +08:00
|
|
|
case NEON::BI__builtin_neon_vtbx1_v:
|
2010-06-09 09:10:23 +08:00
|
|
|
return EmitNeonCall(CGM.getIntrinsic(Intrinsic::arm_neon_vtbx1),
|
|
|
|
Ops, "vtbx1");
|
2014-01-30 22:47:51 +08:00
|
|
|
case NEON::BI__builtin_neon_vtbx2_v:
|
2010-06-09 09:10:23 +08:00
|
|
|
return EmitNeonCall(CGM.getIntrinsic(Intrinsic::arm_neon_vtbx2),
|
|
|
|
Ops, "vtbx2");
|
2014-01-30 22:47:51 +08:00
|
|
|
case NEON::BI__builtin_neon_vtbx3_v:
|
2010-06-09 09:10:23 +08:00
|
|
|
return EmitNeonCall(CGM.getIntrinsic(Intrinsic::arm_neon_vtbx3),
|
|
|
|
Ops, "vtbx3");
|
2014-01-30 22:47:51 +08:00
|
|
|
case NEON::BI__builtin_neon_vtbx4_v:
|
2010-06-09 09:10:23 +08:00
|
|
|
return EmitNeonCall(CGM.getIntrinsic(Intrinsic::arm_neon_vtbx4),
|
|
|
|
Ops, "vtbx4");
|
2010-03-04 03:03:45 +08:00
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2014-05-24 20:52:07 +08:00
|
|
|
static Value *EmitAArch64TblBuiltinExpr(CodeGenFunction &CGF, unsigned BuiltinID,
|
2014-03-29 23:09:45 +08:00
|
|
|
const CallExpr *E,
|
|
|
|
SmallVectorImpl<Value *> &Ops) {
|
|
|
|
unsigned int Int = 0;
|
2014-05-21 13:09:00 +08:00
|
|
|
const char *s = nullptr;
|
2014-03-29 23:09:45 +08:00
|
|
|
|
|
|
|
switch (BuiltinID) {
|
|
|
|
default:
|
2014-05-21 13:09:00 +08:00
|
|
|
return nullptr;
|
2014-03-29 23:09:45 +08:00
|
|
|
case NEON::BI__builtin_neon_vtbl1_v:
|
|
|
|
case NEON::BI__builtin_neon_vqtbl1_v:
|
|
|
|
case NEON::BI__builtin_neon_vqtbl1q_v:
|
|
|
|
case NEON::BI__builtin_neon_vtbl2_v:
|
|
|
|
case NEON::BI__builtin_neon_vqtbl2_v:
|
|
|
|
case NEON::BI__builtin_neon_vqtbl2q_v:
|
|
|
|
case NEON::BI__builtin_neon_vtbl3_v:
|
|
|
|
case NEON::BI__builtin_neon_vqtbl3_v:
|
|
|
|
case NEON::BI__builtin_neon_vqtbl3q_v:
|
|
|
|
case NEON::BI__builtin_neon_vtbl4_v:
|
|
|
|
case NEON::BI__builtin_neon_vqtbl4_v:
|
|
|
|
case NEON::BI__builtin_neon_vqtbl4q_v:
|
|
|
|
break;
|
|
|
|
case NEON::BI__builtin_neon_vtbx1_v:
|
|
|
|
case NEON::BI__builtin_neon_vqtbx1_v:
|
|
|
|
case NEON::BI__builtin_neon_vqtbx1q_v:
|
|
|
|
case NEON::BI__builtin_neon_vtbx2_v:
|
|
|
|
case NEON::BI__builtin_neon_vqtbx2_v:
|
|
|
|
case NEON::BI__builtin_neon_vqtbx2q_v:
|
|
|
|
case NEON::BI__builtin_neon_vtbx3_v:
|
|
|
|
case NEON::BI__builtin_neon_vqtbx3_v:
|
|
|
|
case NEON::BI__builtin_neon_vqtbx3q_v:
|
|
|
|
case NEON::BI__builtin_neon_vtbx4_v:
|
|
|
|
case NEON::BI__builtin_neon_vqtbx4_v:
|
|
|
|
case NEON::BI__builtin_neon_vqtbx4q_v:
|
|
|
|
break;
|
|
|
|
}
|
|
|
|
|
|
|
|
assert(E->getNumArgs() >= 3);
|
|
|
|
|
|
|
|
// Get the last argument, which specifies the vector type.
|
|
|
|
llvm::APSInt Result;
|
|
|
|
const Expr *Arg = E->getArg(E->getNumArgs() - 1);
|
|
|
|
if (!Arg->isIntegerConstantExpr(Result, CGF.getContext()))
|
2014-05-21 13:09:00 +08:00
|
|
|
return nullptr;
|
2014-03-29 23:09:45 +08:00
|
|
|
|
|
|
|
// Determine the type of this overloaded NEON intrinsic.
|
|
|
|
NeonTypeFlags Type(Result.getZExtValue());
|
2015-07-28 23:40:11 +08:00
|
|
|
llvm::VectorType *Ty = GetNeonType(&CGF, Type);
|
2014-03-29 23:09:45 +08:00
|
|
|
if (!Ty)
|
2014-05-21 13:09:00 +08:00
|
|
|
return nullptr;
|
2014-03-29 23:09:45 +08:00
|
|
|
|
|
|
|
CodeGen::CGBuilderTy &Builder = CGF.Builder;
|
|
|
|
|
|
|
|
// AArch64 scalar builtins are not overloaded, they do not have an extra
|
|
|
|
// argument that specifies the vector type, need to handle each case.
|
|
|
|
switch (BuiltinID) {
|
|
|
|
case NEON::BI__builtin_neon_vtbl1_v: {
|
2015-07-28 23:40:11 +08:00
|
|
|
return packTBLDVectorList(CGF, makeArrayRef(Ops).slice(0, 1), nullptr,
|
|
|
|
Ops[1], Ty, Intrinsic::aarch64_neon_tbl1,
|
|
|
|
"vtbl1");
|
2014-03-29 23:09:45 +08:00
|
|
|
}
|
|
|
|
case NEON::BI__builtin_neon_vtbl2_v: {
|
2015-07-28 23:40:11 +08:00
|
|
|
return packTBLDVectorList(CGF, makeArrayRef(Ops).slice(0, 2), nullptr,
|
|
|
|
Ops[2], Ty, Intrinsic::aarch64_neon_tbl1,
|
|
|
|
"vtbl1");
|
2014-03-29 23:09:45 +08:00
|
|
|
}
|
|
|
|
case NEON::BI__builtin_neon_vtbl3_v: {
|
2015-07-28 23:40:11 +08:00
|
|
|
return packTBLDVectorList(CGF, makeArrayRef(Ops).slice(0, 3), nullptr,
|
|
|
|
Ops[3], Ty, Intrinsic::aarch64_neon_tbl2,
|
|
|
|
"vtbl2");
|
2014-03-29 23:09:45 +08:00
|
|
|
}
|
|
|
|
case NEON::BI__builtin_neon_vtbl4_v: {
|
2015-07-28 23:40:11 +08:00
|
|
|
return packTBLDVectorList(CGF, makeArrayRef(Ops).slice(0, 4), nullptr,
|
|
|
|
Ops[4], Ty, Intrinsic::aarch64_neon_tbl2,
|
|
|
|
"vtbl2");
|
2014-03-29 23:09:45 +08:00
|
|
|
}
|
|
|
|
case NEON::BI__builtin_neon_vtbx1_v: {
|
2015-07-28 23:40:11 +08:00
|
|
|
Value *TblRes =
|
|
|
|
packTBLDVectorList(CGF, makeArrayRef(Ops).slice(1, 1), nullptr, Ops[2],
|
|
|
|
Ty, Intrinsic::aarch64_neon_tbl1, "vtbl1");
|
2014-03-29 23:09:45 +08:00
|
|
|
|
2015-07-28 23:40:11 +08:00
|
|
|
llvm::Constant *EightV = ConstantInt::get(Ty, 8);
|
2014-03-29 23:09:45 +08:00
|
|
|
Value *CmpRes = Builder.CreateICmp(ICmpInst::ICMP_UGE, Ops[2], EightV);
|
|
|
|
CmpRes = Builder.CreateSExt(CmpRes, Ty);
|
|
|
|
|
|
|
|
Value *EltsFromInput = Builder.CreateAnd(CmpRes, Ops[0]);
|
|
|
|
Value *EltsFromTbl = Builder.CreateAnd(Builder.CreateNot(CmpRes), TblRes);
|
|
|
|
return Builder.CreateOr(EltsFromInput, EltsFromTbl, "vtbx");
|
|
|
|
}
|
|
|
|
case NEON::BI__builtin_neon_vtbx2_v: {
|
2015-07-28 23:40:11 +08:00
|
|
|
return packTBLDVectorList(CGF, makeArrayRef(Ops).slice(1, 2), Ops[0],
|
|
|
|
Ops[3], Ty, Intrinsic::aarch64_neon_tbx1,
|
|
|
|
"vtbx1");
|
2014-03-29 23:09:45 +08:00
|
|
|
}
|
|
|
|
case NEON::BI__builtin_neon_vtbx3_v: {
|
2015-07-28 23:40:11 +08:00
|
|
|
Value *TblRes =
|
|
|
|
packTBLDVectorList(CGF, makeArrayRef(Ops).slice(1, 3), nullptr, Ops[4],
|
|
|
|
Ty, Intrinsic::aarch64_neon_tbl2, "vtbl2");
|
|
|
|
|
|
|
|
llvm::Constant *TwentyFourV = ConstantInt::get(Ty, 24);
|
2014-03-29 23:09:45 +08:00
|
|
|
Value *CmpRes = Builder.CreateICmp(ICmpInst::ICMP_UGE, Ops[4],
|
|
|
|
TwentyFourV);
|
|
|
|
CmpRes = Builder.CreateSExt(CmpRes, Ty);
|
|
|
|
|
|
|
|
Value *EltsFromInput = Builder.CreateAnd(CmpRes, Ops[0]);
|
|
|
|
Value *EltsFromTbl = Builder.CreateAnd(Builder.CreateNot(CmpRes), TblRes);
|
|
|
|
return Builder.CreateOr(EltsFromInput, EltsFromTbl, "vtbx");
|
|
|
|
}
|
|
|
|
case NEON::BI__builtin_neon_vtbx4_v: {
|
2015-07-28 23:40:11 +08:00
|
|
|
return packTBLDVectorList(CGF, makeArrayRef(Ops).slice(1, 4), Ops[0],
|
|
|
|
Ops[5], Ty, Intrinsic::aarch64_neon_tbx2,
|
|
|
|
"vtbx2");
|
2014-03-29 23:09:45 +08:00
|
|
|
}
|
|
|
|
case NEON::BI__builtin_neon_vqtbl1_v:
|
|
|
|
case NEON::BI__builtin_neon_vqtbl1q_v:
|
2014-05-24 20:52:07 +08:00
|
|
|
Int = Intrinsic::aarch64_neon_tbl1; s = "vtbl1"; break;
|
2014-03-29 23:09:45 +08:00
|
|
|
case NEON::BI__builtin_neon_vqtbl2_v:
|
|
|
|
case NEON::BI__builtin_neon_vqtbl2q_v: {
|
2014-05-24 20:52:07 +08:00
|
|
|
Int = Intrinsic::aarch64_neon_tbl2; s = "vtbl2"; break;
|
2014-03-29 23:09:45 +08:00
|
|
|
case NEON::BI__builtin_neon_vqtbl3_v:
|
|
|
|
case NEON::BI__builtin_neon_vqtbl3q_v:
|
2014-05-24 20:52:07 +08:00
|
|
|
Int = Intrinsic::aarch64_neon_tbl3; s = "vtbl3"; break;
|
2014-03-29 23:09:45 +08:00
|
|
|
case NEON::BI__builtin_neon_vqtbl4_v:
|
|
|
|
case NEON::BI__builtin_neon_vqtbl4q_v:
|
2014-05-24 20:52:07 +08:00
|
|
|
Int = Intrinsic::aarch64_neon_tbl4; s = "vtbl4"; break;
|
2014-03-29 23:09:45 +08:00
|
|
|
case NEON::BI__builtin_neon_vqtbx1_v:
|
|
|
|
case NEON::BI__builtin_neon_vqtbx1q_v:
|
2014-05-24 20:52:07 +08:00
|
|
|
Int = Intrinsic::aarch64_neon_tbx1; s = "vtbx1"; break;
|
2014-03-29 23:09:45 +08:00
|
|
|
case NEON::BI__builtin_neon_vqtbx2_v:
|
|
|
|
case NEON::BI__builtin_neon_vqtbx2q_v:
|
2014-05-24 20:52:07 +08:00
|
|
|
Int = Intrinsic::aarch64_neon_tbx2; s = "vtbx2"; break;
|
2014-03-29 23:09:45 +08:00
|
|
|
case NEON::BI__builtin_neon_vqtbx3_v:
|
|
|
|
case NEON::BI__builtin_neon_vqtbx3q_v:
|
2014-05-24 20:52:07 +08:00
|
|
|
Int = Intrinsic::aarch64_neon_tbx3; s = "vtbx3"; break;
|
2014-03-29 23:09:45 +08:00
|
|
|
case NEON::BI__builtin_neon_vqtbx4_v:
|
|
|
|
case NEON::BI__builtin_neon_vqtbx4q_v:
|
2014-05-24 20:52:07 +08:00
|
|
|
Int = Intrinsic::aarch64_neon_tbx4; s = "vtbx4"; break;
|
2014-03-29 23:09:45 +08:00
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
if (!Int)
|
2014-05-21 13:09:00 +08:00
|
|
|
return nullptr;
|
2014-03-29 23:09:45 +08:00
|
|
|
|
|
|
|
Function *F = CGF.CGM.getIntrinsic(Int, Ty);
|
|
|
|
return CGF.EmitNeonCall(F, Ops, s);
|
|
|
|
}
|
|
|
|
|
|
|
|
Value *CodeGenFunction::vectorWrapScalar16(Value *Op) {
|
|
|
|
llvm::Type *VTy = llvm::VectorType::get(Int16Ty, 4);
|
|
|
|
Op = Builder.CreateBitCast(Op, Int16Ty);
|
|
|
|
Value *V = UndefValue::get(VTy);
|
2014-05-31 08:22:12 +08:00
|
|
|
llvm::Constant *CI = ConstantInt::get(SizeTy, 0);
|
2014-03-29 23:09:45 +08:00
|
|
|
Op = Builder.CreateInsertElement(V, Op, CI);
|
|
|
|
return Op;
|
|
|
|
}
|
|
|
|
|
2014-05-24 20:52:07 +08:00
|
|
|
Value *CodeGenFunction::EmitAArch64BuiltinExpr(unsigned BuiltinID,
|
|
|
|
const CallExpr *E) {
|
2014-07-13 07:27:22 +08:00
|
|
|
unsigned HintID = static_cast<unsigned>(-1);
|
|
|
|
switch (BuiltinID) {
|
|
|
|
default: break;
|
2014-07-14 23:20:09 +08:00
|
|
|
case AArch64::BI__builtin_arm_nop:
|
|
|
|
HintID = 0;
|
|
|
|
break;
|
2014-07-13 07:27:22 +08:00
|
|
|
case AArch64::BI__builtin_arm_yield:
|
|
|
|
HintID = 1;
|
|
|
|
break;
|
|
|
|
case AArch64::BI__builtin_arm_wfe:
|
|
|
|
HintID = 2;
|
|
|
|
break;
|
|
|
|
case AArch64::BI__builtin_arm_wfi:
|
|
|
|
HintID = 3;
|
|
|
|
break;
|
|
|
|
case AArch64::BI__builtin_arm_sev:
|
|
|
|
HintID = 4;
|
|
|
|
break;
|
|
|
|
case AArch64::BI__builtin_arm_sevl:
|
|
|
|
HintID = 5;
|
|
|
|
break;
|
|
|
|
}
|
|
|
|
|
|
|
|
if (HintID != static_cast<unsigned>(-1)) {
|
|
|
|
Function *F = CGM.getIntrinsic(Intrinsic::aarch64_hint);
|
|
|
|
return Builder.CreateCall(F, llvm::ConstantInt::get(Int32Ty, HintID));
|
|
|
|
}
|
|
|
|
|
2014-08-14 03:18:20 +08:00
|
|
|
if (BuiltinID == AArch64::BI__builtin_arm_prefetch) {
|
|
|
|
Value *Address = EmitScalarExpr(E->getArg(0));
|
|
|
|
Value *RW = EmitScalarExpr(E->getArg(1));
|
|
|
|
Value *CacheLevel = EmitScalarExpr(E->getArg(2));
|
|
|
|
Value *RetentionPolicy = EmitScalarExpr(E->getArg(3));
|
|
|
|
Value *IsData = EmitScalarExpr(E->getArg(4));
|
|
|
|
|
|
|
|
Value *Locality = nullptr;
|
|
|
|
if (cast<llvm::ConstantInt>(RetentionPolicy)->isZero()) {
|
|
|
|
// Temporal fetch, needs to convert cache level to locality.
|
|
|
|
Locality = llvm::ConstantInt::get(Int32Ty,
|
|
|
|
-cast<llvm::ConstantInt>(CacheLevel)->getValue() + 3);
|
|
|
|
} else {
|
|
|
|
// Streaming fetch.
|
|
|
|
Locality = llvm::ConstantInt::get(Int32Ty, 0);
|
|
|
|
}
|
|
|
|
|
|
|
|
// FIXME: We need AArch64 specific LLVM intrinsic if we want to specify
|
|
|
|
// PLDL3STRM or PLDL2STRM.
|
|
|
|
Value *F = CGM.getIntrinsic(Intrinsic::prefetch);
|
2015-05-19 06:14:03 +08:00
|
|
|
return Builder.CreateCall(F, {Address, RW, Locality, IsData});
|
2014-08-14 03:18:20 +08:00
|
|
|
}
|
|
|
|
|
2014-06-17 05:56:02 +08:00
|
|
|
if (BuiltinID == AArch64::BI__builtin_arm_rbit) {
|
|
|
|
assert((getContext().getTypeSize(E->getType()) == 32) &&
|
|
|
|
"rbit of unusual size!");
|
|
|
|
llvm::Value *Arg = EmitScalarExpr(E->getArg(0));
|
|
|
|
return Builder.CreateCall(
|
|
|
|
CGM.getIntrinsic(Intrinsic::aarch64_rbit, Arg->getType()), Arg, "rbit");
|
|
|
|
}
|
|
|
|
if (BuiltinID == AArch64::BI__builtin_arm_rbit64) {
|
|
|
|
assert((getContext().getTypeSize(E->getType()) == 64) &&
|
|
|
|
"rbit of unusual size!");
|
|
|
|
llvm::Value *Arg = EmitScalarExpr(E->getArg(0));
|
|
|
|
return Builder.CreateCall(
|
|
|
|
CGM.getIntrinsic(Intrinsic::aarch64_rbit, Arg->getType()), Arg, "rbit");
|
|
|
|
}
|
|
|
|
|
2014-05-24 20:52:07 +08:00
|
|
|
if (BuiltinID == AArch64::BI__clear_cache) {
|
2014-03-29 23:09:45 +08:00
|
|
|
assert(E->getNumArgs() == 2 && "__clear_cache takes 2 arguments");
|
|
|
|
const FunctionDecl *FD = E->getDirectCallee();
|
2015-07-28 23:40:11 +08:00
|
|
|
Value *Ops[2];
|
2014-03-29 23:09:45 +08:00
|
|
|
for (unsigned i = 0; i < 2; i++)
|
2015-07-28 23:40:11 +08:00
|
|
|
Ops[i] = EmitScalarExpr(E->getArg(i));
|
2014-03-29 23:09:45 +08:00
|
|
|
llvm::Type *Ty = CGM.getTypes().ConvertType(FD->getType());
|
|
|
|
llvm::FunctionType *FTy = cast<llvm::FunctionType>(Ty);
|
|
|
|
StringRef Name = FD->getName();
|
|
|
|
return EmitNounwindRuntimeCall(CGM.CreateRuntimeFunction(FTy, Name), Ops);
|
|
|
|
}
|
|
|
|
|
2014-07-02 20:56:02 +08:00
|
|
|
if ((BuiltinID == AArch64::BI__builtin_arm_ldrex ||
|
|
|
|
BuiltinID == AArch64::BI__builtin_arm_ldaex) &&
|
2014-03-29 23:09:45 +08:00
|
|
|
getContext().getTypeSize(E->getType()) == 128) {
|
2014-07-02 20:56:02 +08:00
|
|
|
Function *F = CGM.getIntrinsic(BuiltinID == AArch64::BI__builtin_arm_ldaex
|
|
|
|
? Intrinsic::aarch64_ldaxp
|
|
|
|
: Intrinsic::aarch64_ldxp);
|
2014-03-29 23:09:45 +08:00
|
|
|
|
|
|
|
Value *LdPtr = EmitScalarExpr(E->getArg(0));
|
|
|
|
Value *Val = Builder.CreateCall(F, Builder.CreateBitCast(LdPtr, Int8PtrTy),
|
|
|
|
"ldxp");
|
|
|
|
|
|
|
|
Value *Val0 = Builder.CreateExtractValue(Val, 1);
|
|
|
|
Value *Val1 = Builder.CreateExtractValue(Val, 0);
|
|
|
|
llvm::Type *Int128Ty = llvm::IntegerType::get(getLLVMContext(), 128);
|
|
|
|
Val0 = Builder.CreateZExt(Val0, Int128Ty);
|
|
|
|
Val1 = Builder.CreateZExt(Val1, Int128Ty);
|
|
|
|
|
|
|
|
Value *ShiftCst = llvm::ConstantInt::get(Int128Ty, 64);
|
|
|
|
Val = Builder.CreateShl(Val0, ShiftCst, "shl", true /* nuw */);
|
|
|
|
Val = Builder.CreateOr(Val, Val1);
|
|
|
|
return Builder.CreateBitCast(Val, ConvertType(E->getType()));
|
2014-07-02 20:56:02 +08:00
|
|
|
} else if (BuiltinID == AArch64::BI__builtin_arm_ldrex ||
|
|
|
|
BuiltinID == AArch64::BI__builtin_arm_ldaex) {
|
2014-03-29 23:09:45 +08:00
|
|
|
Value *LoadAddr = EmitScalarExpr(E->getArg(0));
|
|
|
|
|
|
|
|
QualType Ty = E->getType();
|
|
|
|
llvm::Type *RealResTy = ConvertType(Ty);
|
|
|
|
llvm::Type *IntResTy = llvm::IntegerType::get(getLLVMContext(),
|
|
|
|
getContext().getTypeSize(Ty));
|
|
|
|
LoadAddr = Builder.CreateBitCast(LoadAddr, IntResTy->getPointerTo());
|
|
|
|
|
2014-07-02 20:56:02 +08:00
|
|
|
Function *F = CGM.getIntrinsic(BuiltinID == AArch64::BI__builtin_arm_ldaex
|
|
|
|
? Intrinsic::aarch64_ldaxr
|
|
|
|
: Intrinsic::aarch64_ldxr,
|
|
|
|
LoadAddr->getType());
|
2014-03-29 23:09:45 +08:00
|
|
|
Value *Val = Builder.CreateCall(F, LoadAddr, "ldxr");
|
|
|
|
|
|
|
|
if (RealResTy->isPointerTy())
|
|
|
|
return Builder.CreateIntToPtr(Val, RealResTy);
|
|
|
|
|
|
|
|
Val = Builder.CreateTruncOrBitCast(Val, IntResTy);
|
|
|
|
return Builder.CreateBitCast(Val, RealResTy);
|
|
|
|
}
|
|
|
|
|
2014-07-02 20:56:02 +08:00
|
|
|
if ((BuiltinID == AArch64::BI__builtin_arm_strex ||
|
|
|
|
BuiltinID == AArch64::BI__builtin_arm_stlex) &&
|
2014-03-29 23:09:45 +08:00
|
|
|
getContext().getTypeSize(E->getArg(0)->getType()) == 128) {
|
2014-07-02 20:56:02 +08:00
|
|
|
Function *F = CGM.getIntrinsic(BuiltinID == AArch64::BI__builtin_arm_stlex
|
|
|
|
? Intrinsic::aarch64_stlxp
|
|
|
|
: Intrinsic::aarch64_stxp);
|
2014-12-02 06:02:27 +08:00
|
|
|
llvm::Type *STy = llvm::StructType::get(Int64Ty, Int64Ty, nullptr);
|
2014-03-29 23:09:45 +08:00
|
|
|
|
Compute and preserve alignment more faithfully in IR-generation.
Introduce an Address type to bundle a pointer value with an
alignment. Introduce APIs on CGBuilderTy to work with Address
values. Change core APIs on CGF/CGM to traffic in Address where
appropriate. Require alignments to be non-zero. Update a ton
of code to compute and propagate alignment information.
As part of this, I've promoted CGBuiltin's EmitPointerWithAlignment
helper function to CGF and made use of it in a number of places in
the expression emitter.
The end result is that we should now be significantly more correct
when performing operations on objects that are locally known to
be under-aligned. Since alignment is not reliably tracked in the
type system, there are inherent limits to this, but at least we
are no longer confused by standard operations like derived-to-base
conversions and array-to-pointer decay. I've also fixed a large
number of bugs where we were applying the complete-object alignment
to a pointer instead of the non-virtual alignment, although most of
these were hidden by the very conservative approach we took with
member alignment.
Also, because IRGen now reliably asserts on zero alignments, we
should no longer be subject to an absurd but frustrating recurring
bug where an incomplete type would report a zero alignment and then
we'd naively do a alignmentAtOffset on it and emit code using an
alignment equal to the largest power-of-two factor of the offset.
We should also now be emitting much more aggressive alignment
attributes in the presence of over-alignment. In particular,
field access now uses alignmentAtOffset instead of min.
Several times in this patch, I had to change the existing
code-generation pattern in order to more effectively use
the Address APIs. For the most part, this seems to be a strict
improvement, like doing pointer arithmetic with GEPs instead of
ptrtoint. That said, I've tried very hard to not change semantics,
but it is likely that I've failed in a few places, for which I
apologize.
ABIArgInfo now always carries the assumed alignment of indirect and
indirect byval arguments. In order to cut down on what was already
a dauntingly large patch, I changed the code to never set align
attributes in the IR on non-byval indirect arguments. That is,
we still generate code which assumes that indirect arguments have
the given alignment, but we don't express this information to the
backend except where it's semantically required (i.e. on byvals).
This is likely a minor regression for those targets that did provide
this information, but it'll be trivial to add it back in a later
patch.
I partially punted on applying this work to CGBuiltin. Please
do not add more uses of the CreateDefaultAligned{Load,Store}
APIs; they will be going away eventually.
llvm-svn: 246985
2015-09-08 16:05:57 +08:00
|
|
|
Address Tmp = CreateMemTemp(E->getArg(0)->getType());
|
|
|
|
EmitAnyExprToMem(E->getArg(0), Tmp, Qualifiers(), /*init*/ true);
|
2014-03-29 23:09:45 +08:00
|
|
|
|
Compute and preserve alignment more faithfully in IR-generation.
Introduce an Address type to bundle a pointer value with an
alignment. Introduce APIs on CGBuilderTy to work with Address
values. Change core APIs on CGF/CGM to traffic in Address where
appropriate. Require alignments to be non-zero. Update a ton
of code to compute and propagate alignment information.
As part of this, I've promoted CGBuiltin's EmitPointerWithAlignment
helper function to CGF and made use of it in a number of places in
the expression emitter.
The end result is that we should now be significantly more correct
when performing operations on objects that are locally known to
be under-aligned. Since alignment is not reliably tracked in the
type system, there are inherent limits to this, but at least we
are no longer confused by standard operations like derived-to-base
conversions and array-to-pointer decay. I've also fixed a large
number of bugs where we were applying the complete-object alignment
to a pointer instead of the non-virtual alignment, although most of
these were hidden by the very conservative approach we took with
member alignment.
Also, because IRGen now reliably asserts on zero alignments, we
should no longer be subject to an absurd but frustrating recurring
bug where an incomplete type would report a zero alignment and then
we'd naively do a alignmentAtOffset on it and emit code using an
alignment equal to the largest power-of-two factor of the offset.
We should also now be emitting much more aggressive alignment
attributes in the presence of over-alignment. In particular,
field access now uses alignmentAtOffset instead of min.
Several times in this patch, I had to change the existing
code-generation pattern in order to more effectively use
the Address APIs. For the most part, this seems to be a strict
improvement, like doing pointer arithmetic with GEPs instead of
ptrtoint. That said, I've tried very hard to not change semantics,
but it is likely that I've failed in a few places, for which I
apologize.
ABIArgInfo now always carries the assumed alignment of indirect and
indirect byval arguments. In order to cut down on what was already
a dauntingly large patch, I changed the code to never set align
attributes in the IR on non-byval indirect arguments. That is,
we still generate code which assumes that indirect arguments have
the given alignment, but we don't express this information to the
backend except where it's semantically required (i.e. on byvals).
This is likely a minor regression for those targets that did provide
this information, but it'll be trivial to add it back in a later
patch.
I partially punted on applying this work to CGBuiltin. Please
do not add more uses of the CreateDefaultAligned{Load,Store}
APIs; they will be going away eventually.
llvm-svn: 246985
2015-09-08 16:05:57 +08:00
|
|
|
Tmp = Builder.CreateBitCast(Tmp, llvm::PointerType::getUnqual(STy));
|
|
|
|
llvm::Value *Val = Builder.CreateLoad(Tmp);
|
2014-03-29 23:09:45 +08:00
|
|
|
|
|
|
|
Value *Arg0 = Builder.CreateExtractValue(Val, 0);
|
|
|
|
Value *Arg1 = Builder.CreateExtractValue(Val, 1);
|
|
|
|
Value *StPtr = Builder.CreateBitCast(EmitScalarExpr(E->getArg(1)),
|
|
|
|
Int8PtrTy);
|
2015-05-19 06:14:03 +08:00
|
|
|
return Builder.CreateCall(F, {Arg0, Arg1, StPtr}, "stxp");
|
|
|
|
}
|
|
|
|
|
|
|
|
if (BuiltinID == AArch64::BI__builtin_arm_strex ||
|
|
|
|
BuiltinID == AArch64::BI__builtin_arm_stlex) {
|
2014-03-29 23:09:45 +08:00
|
|
|
Value *StoreVal = EmitScalarExpr(E->getArg(0));
|
|
|
|
Value *StoreAddr = EmitScalarExpr(E->getArg(1));
|
|
|
|
|
|
|
|
QualType Ty = E->getArg(0)->getType();
|
|
|
|
llvm::Type *StoreTy = llvm::IntegerType::get(getLLVMContext(),
|
|
|
|
getContext().getTypeSize(Ty));
|
|
|
|
StoreAddr = Builder.CreateBitCast(StoreAddr, StoreTy->getPointerTo());
|
|
|
|
|
|
|
|
if (StoreVal->getType()->isPointerTy())
|
|
|
|
StoreVal = Builder.CreatePtrToInt(StoreVal, Int64Ty);
|
|
|
|
else {
|
|
|
|
StoreVal = Builder.CreateBitCast(StoreVal, StoreTy);
|
|
|
|
StoreVal = Builder.CreateZExtOrBitCast(StoreVal, Int64Ty);
|
|
|
|
}
|
|
|
|
|
2014-07-02 20:56:02 +08:00
|
|
|
Function *F = CGM.getIntrinsic(BuiltinID == AArch64::BI__builtin_arm_stlex
|
|
|
|
? Intrinsic::aarch64_stlxr
|
|
|
|
: Intrinsic::aarch64_stxr,
|
|
|
|
StoreAddr->getType());
|
2015-05-19 06:14:03 +08:00
|
|
|
return Builder.CreateCall(F, {StoreVal, StoreAddr}, "stxr");
|
2014-03-29 23:09:45 +08:00
|
|
|
}
|
|
|
|
|
2014-05-24 20:52:07 +08:00
|
|
|
if (BuiltinID == AArch64::BI__builtin_arm_clrex) {
|
|
|
|
Function *F = CGM.getIntrinsic(Intrinsic::aarch64_clrex);
|
2015-07-15 01:27:39 +08:00
|
|
|
return Builder.CreateCall(F);
|
2014-03-29 23:09:45 +08:00
|
|
|
}
|
|
|
|
|
2015-07-28 21:10:10 +08:00
|
|
|
if (BuiltinID == AArch64::BI__builtin_thread_pointer) {
|
|
|
|
Function *F = CGM.getIntrinsic(Intrinsic::aarch64_thread_pointer);
|
|
|
|
return Builder.CreateCall(F);
|
|
|
|
}
|
|
|
|
|
2014-03-29 23:09:45 +08:00
|
|
|
// CRC32
|
|
|
|
Intrinsic::ID CRCIntrinsicID = Intrinsic::not_intrinsic;
|
|
|
|
switch (BuiltinID) {
|
2014-05-24 20:52:07 +08:00
|
|
|
case AArch64::BI__builtin_arm_crc32b:
|
|
|
|
CRCIntrinsicID = Intrinsic::aarch64_crc32b; break;
|
|
|
|
case AArch64::BI__builtin_arm_crc32cb:
|
|
|
|
CRCIntrinsicID = Intrinsic::aarch64_crc32cb; break;
|
|
|
|
case AArch64::BI__builtin_arm_crc32h:
|
|
|
|
CRCIntrinsicID = Intrinsic::aarch64_crc32h; break;
|
|
|
|
case AArch64::BI__builtin_arm_crc32ch:
|
|
|
|
CRCIntrinsicID = Intrinsic::aarch64_crc32ch; break;
|
|
|
|
case AArch64::BI__builtin_arm_crc32w:
|
|
|
|
CRCIntrinsicID = Intrinsic::aarch64_crc32w; break;
|
|
|
|
case AArch64::BI__builtin_arm_crc32cw:
|
|
|
|
CRCIntrinsicID = Intrinsic::aarch64_crc32cw; break;
|
|
|
|
case AArch64::BI__builtin_arm_crc32d:
|
|
|
|
CRCIntrinsicID = Intrinsic::aarch64_crc32x; break;
|
|
|
|
case AArch64::BI__builtin_arm_crc32cd:
|
|
|
|
CRCIntrinsicID = Intrinsic::aarch64_crc32cx; break;
|
2014-03-29 23:09:45 +08:00
|
|
|
}
|
|
|
|
|
|
|
|
if (CRCIntrinsicID != Intrinsic::not_intrinsic) {
|
|
|
|
Value *Arg0 = EmitScalarExpr(E->getArg(0));
|
|
|
|
Value *Arg1 = EmitScalarExpr(E->getArg(1));
|
|
|
|
Function *F = CGM.getIntrinsic(CRCIntrinsicID);
|
|
|
|
|
|
|
|
llvm::Type *DataTy = F->getFunctionType()->getParamType(1);
|
|
|
|
Arg1 = Builder.CreateZExtOrBitCast(Arg1, DataTy);
|
|
|
|
|
2015-05-19 06:14:03 +08:00
|
|
|
return Builder.CreateCall(F, {Arg0, Arg1});
|
2014-03-29 23:09:45 +08:00
|
|
|
}
|
|
|
|
|
2015-06-16 01:51:01 +08:00
|
|
|
if (BuiltinID == AArch64::BI__builtin_arm_rsr ||
|
|
|
|
BuiltinID == AArch64::BI__builtin_arm_rsr64 ||
|
|
|
|
BuiltinID == AArch64::BI__builtin_arm_rsrp ||
|
|
|
|
BuiltinID == AArch64::BI__builtin_arm_wsr ||
|
|
|
|
BuiltinID == AArch64::BI__builtin_arm_wsr64 ||
|
|
|
|
BuiltinID == AArch64::BI__builtin_arm_wsrp) {
|
|
|
|
|
|
|
|
bool IsRead = BuiltinID == AArch64::BI__builtin_arm_rsr ||
|
|
|
|
BuiltinID == AArch64::BI__builtin_arm_rsr64 ||
|
|
|
|
BuiltinID == AArch64::BI__builtin_arm_rsrp;
|
|
|
|
|
|
|
|
bool IsPointerBuiltin = BuiltinID == AArch64::BI__builtin_arm_rsrp ||
|
|
|
|
BuiltinID == AArch64::BI__builtin_arm_wsrp;
|
|
|
|
|
|
|
|
bool Is64Bit = BuiltinID != AArch64::BI__builtin_arm_rsr &&
|
|
|
|
BuiltinID != AArch64::BI__builtin_arm_wsr;
|
|
|
|
|
|
|
|
llvm::Type *ValueType;
|
|
|
|
llvm::Type *RegisterType = Int64Ty;
|
|
|
|
if (IsPointerBuiltin) {
|
|
|
|
ValueType = VoidPtrTy;
|
|
|
|
} else if (Is64Bit) {
|
|
|
|
ValueType = Int64Ty;
|
|
|
|
} else {
|
|
|
|
ValueType = Int32Ty;
|
|
|
|
}
|
|
|
|
|
|
|
|
return EmitSpecialRegisterBuiltin(*this, E, RegisterType, ValueType, IsRead);
|
|
|
|
}
|
|
|
|
|
2015-06-04 09:43:41 +08:00
|
|
|
// Find out if any arguments are required to be integer constant
|
|
|
|
// expressions.
|
|
|
|
unsigned ICEArguments = 0;
|
|
|
|
ASTContext::GetBuiltinTypeError Error;
|
|
|
|
getContext().GetBuiltinType(BuiltinID, Error, &ICEArguments);
|
|
|
|
assert(Error == ASTContext::GE_None && "Should not codegen an error");
|
|
|
|
|
2014-03-29 23:09:45 +08:00
|
|
|
llvm::SmallVector<Value*, 4> Ops;
|
2015-06-04 09:43:41 +08:00
|
|
|
for (unsigned i = 0, e = E->getNumArgs() - 1; i != e; i++) {
|
|
|
|
if ((ICEArguments & (1 << i)) == 0) {
|
|
|
|
Ops.push_back(EmitScalarExpr(E->getArg(i)));
|
|
|
|
} else {
|
|
|
|
// If this is required to be a constant, constant fold it so that we know
|
|
|
|
// that the generated intrinsic gets a ConstantInt.
|
|
|
|
llvm::APSInt Result;
|
|
|
|
bool IsConst = E->getArg(i)->isIntegerConstantExpr(Result, getContext());
|
|
|
|
assert(IsConst && "Constant arg isn't actually constant?");
|
|
|
|
(void)IsConst;
|
|
|
|
Ops.push_back(llvm::ConstantInt::get(getLLVMContext(), Result));
|
|
|
|
}
|
|
|
|
}
|
2014-03-29 23:09:45 +08:00
|
|
|
|
2014-08-27 14:28:36 +08:00
|
|
|
auto SISDMap = makeArrayRef(AArch64SISDIntrinsicMap);
|
2014-03-29 23:09:45 +08:00
|
|
|
const NeonIntrinsicInfo *Builtin = findNeonIntrinsicInMap(
|
2014-05-24 20:52:07 +08:00
|
|
|
SISDMap, BuiltinID, AArch64SISDIntrinsicsProvenSorted);
|
2014-03-29 23:09:45 +08:00
|
|
|
|
|
|
|
if (Builtin) {
|
|
|
|
Ops.push_back(EmitScalarExpr(E->getArg(E->getNumArgs() - 1)));
|
|
|
|
Value *Result = EmitCommonNeonSISDBuiltinExpr(*this, *Builtin, Ops, E);
|
|
|
|
assert(Result && "SISD intrinsic should have been handled");
|
|
|
|
return Result;
|
|
|
|
}
|
|
|
|
|
|
|
|
llvm::APSInt Result;
|
|
|
|
const Expr *Arg = E->getArg(E->getNumArgs()-1);
|
|
|
|
NeonTypeFlags Type(0);
|
|
|
|
if (Arg->isIntegerConstantExpr(Result, getContext()))
|
|
|
|
// Determine the type of this overloaded NEON intrinsic.
|
|
|
|
Type = NeonTypeFlags(Result.getZExtValue());
|
|
|
|
|
|
|
|
bool usgn = Type.isUnsigned();
|
|
|
|
bool quad = Type.isQuad();
|
|
|
|
|
|
|
|
// Handle non-overloaded intrinsics first.
|
|
|
|
switch (BuiltinID) {
|
|
|
|
default: break;
|
2014-04-01 20:23:08 +08:00
|
|
|
case NEON::BI__builtin_neon_vldrq_p128: {
|
|
|
|
llvm::Type *Int128PTy = llvm::Type::getIntNPtrTy(getLLVMContext(), 128);
|
|
|
|
Value *Ptr = Builder.CreateBitCast(EmitScalarExpr(E->getArg(0)), Int128PTy);
|
Compute and preserve alignment more faithfully in IR-generation.
Introduce an Address type to bundle a pointer value with an
alignment. Introduce APIs on CGBuilderTy to work with Address
values. Change core APIs on CGF/CGM to traffic in Address where
appropriate. Require alignments to be non-zero. Update a ton
of code to compute and propagate alignment information.
As part of this, I've promoted CGBuiltin's EmitPointerWithAlignment
helper function to CGF and made use of it in a number of places in
the expression emitter.
The end result is that we should now be significantly more correct
when performing operations on objects that are locally known to
be under-aligned. Since alignment is not reliably tracked in the
type system, there are inherent limits to this, but at least we
are no longer confused by standard operations like derived-to-base
conversions and array-to-pointer decay. I've also fixed a large
number of bugs where we were applying the complete-object alignment
to a pointer instead of the non-virtual alignment, although most of
these were hidden by the very conservative approach we took with
member alignment.
Also, because IRGen now reliably asserts on zero alignments, we
should no longer be subject to an absurd but frustrating recurring
bug where an incomplete type would report a zero alignment and then
we'd naively do a alignmentAtOffset on it and emit code using an
alignment equal to the largest power-of-two factor of the offset.
We should also now be emitting much more aggressive alignment
attributes in the presence of over-alignment. In particular,
field access now uses alignmentAtOffset instead of min.
Several times in this patch, I had to change the existing
code-generation pattern in order to more effectively use
the Address APIs. For the most part, this seems to be a strict
improvement, like doing pointer arithmetic with GEPs instead of
ptrtoint. That said, I've tried very hard to not change semantics,
but it is likely that I've failed in a few places, for which I
apologize.
ABIArgInfo now always carries the assumed alignment of indirect and
indirect byval arguments. In order to cut down on what was already
a dauntingly large patch, I changed the code to never set align
attributes in the IR on non-byval indirect arguments. That is,
we still generate code which assumes that indirect arguments have
the given alignment, but we don't express this information to the
backend except where it's semantically required (i.e. on byvals).
This is likely a minor regression for those targets that did provide
this information, but it'll be trivial to add it back in a later
patch.
I partially punted on applying this work to CGBuiltin. Please
do not add more uses of the CreateDefaultAligned{Load,Store}
APIs; they will be going away eventually.
llvm-svn: 246985
2015-09-08 16:05:57 +08:00
|
|
|
return Builder.CreateDefaultAlignedLoad(Ptr);
|
2014-04-01 20:23:08 +08:00
|
|
|
}
|
|
|
|
case NEON::BI__builtin_neon_vstrq_p128: {
|
|
|
|
llvm::Type *Int128PTy = llvm::Type::getIntNPtrTy(getLLVMContext(), 128);
|
|
|
|
Value *Ptr = Builder.CreateBitCast(Ops[0], Int128PTy);
|
Compute and preserve alignment more faithfully in IR-generation.
Introduce an Address type to bundle a pointer value with an
alignment. Introduce APIs on CGBuilderTy to work with Address
values. Change core APIs on CGF/CGM to traffic in Address where
appropriate. Require alignments to be non-zero. Update a ton
of code to compute and propagate alignment information.
As part of this, I've promoted CGBuiltin's EmitPointerWithAlignment
helper function to CGF and made use of it in a number of places in
the expression emitter.
The end result is that we should now be significantly more correct
when performing operations on objects that are locally known to
be under-aligned. Since alignment is not reliably tracked in the
type system, there are inherent limits to this, but at least we
are no longer confused by standard operations like derived-to-base
conversions and array-to-pointer decay. I've also fixed a large
number of bugs where we were applying the complete-object alignment
to a pointer instead of the non-virtual alignment, although most of
these were hidden by the very conservative approach we took with
member alignment.
Also, because IRGen now reliably asserts on zero alignments, we
should no longer be subject to an absurd but frustrating recurring
bug where an incomplete type would report a zero alignment and then
we'd naively do a alignmentAtOffset on it and emit code using an
alignment equal to the largest power-of-two factor of the offset.
We should also now be emitting much more aggressive alignment
attributes in the presence of over-alignment. In particular,
field access now uses alignmentAtOffset instead of min.
Several times in this patch, I had to change the existing
code-generation pattern in order to more effectively use
the Address APIs. For the most part, this seems to be a strict
improvement, like doing pointer arithmetic with GEPs instead of
ptrtoint. That said, I've tried very hard to not change semantics,
but it is likely that I've failed in a few places, for which I
apologize.
ABIArgInfo now always carries the assumed alignment of indirect and
indirect byval arguments. In order to cut down on what was already
a dauntingly large patch, I changed the code to never set align
attributes in the IR on non-byval indirect arguments. That is,
we still generate code which assumes that indirect arguments have
the given alignment, but we don't express this information to the
backend except where it's semantically required (i.e. on byvals).
This is likely a minor regression for those targets that did provide
this information, but it'll be trivial to add it back in a later
patch.
I partially punted on applying this work to CGBuiltin. Please
do not add more uses of the CreateDefaultAligned{Load,Store}
APIs; they will be going away eventually.
llvm-svn: 246985
2015-09-08 16:05:57 +08:00
|
|
|
return Builder.CreateDefaultAlignedStore(EmitScalarExpr(E->getArg(1)), Ptr);
|
2014-04-01 20:23:08 +08:00
|
|
|
}
|
2014-03-29 23:09:45 +08:00
|
|
|
case NEON::BI__builtin_neon_vcvts_u32_f32:
|
|
|
|
case NEON::BI__builtin_neon_vcvtd_u64_f64:
|
|
|
|
usgn = true;
|
|
|
|
// FALL THROUGH
|
|
|
|
case NEON::BI__builtin_neon_vcvts_s32_f32:
|
|
|
|
case NEON::BI__builtin_neon_vcvtd_s64_f64: {
|
|
|
|
Ops.push_back(EmitScalarExpr(E->getArg(0)));
|
|
|
|
bool Is64 = Ops[0]->getType()->getPrimitiveSizeInBits() == 64;
|
|
|
|
llvm::Type *InTy = Is64 ? Int64Ty : Int32Ty;
|
|
|
|
llvm::Type *FTy = Is64 ? DoubleTy : FloatTy;
|
|
|
|
Ops[0] = Builder.CreateBitCast(Ops[0], FTy);
|
|
|
|
if (usgn)
|
|
|
|
return Builder.CreateFPToUI(Ops[0], InTy);
|
|
|
|
return Builder.CreateFPToSI(Ops[0], InTy);
|
|
|
|
}
|
|
|
|
case NEON::BI__builtin_neon_vcvts_f32_u32:
|
|
|
|
case NEON::BI__builtin_neon_vcvtd_f64_u64:
|
|
|
|
usgn = true;
|
|
|
|
// FALL THROUGH
|
|
|
|
case NEON::BI__builtin_neon_vcvts_f32_s32:
|
|
|
|
case NEON::BI__builtin_neon_vcvtd_f64_s64: {
|
|
|
|
Ops.push_back(EmitScalarExpr(E->getArg(0)));
|
|
|
|
bool Is64 = Ops[0]->getType()->getPrimitiveSizeInBits() == 64;
|
|
|
|
llvm::Type *InTy = Is64 ? Int64Ty : Int32Ty;
|
|
|
|
llvm::Type *FTy = Is64 ? DoubleTy : FloatTy;
|
|
|
|
Ops[0] = Builder.CreateBitCast(Ops[0], InTy);
|
|
|
|
if (usgn)
|
|
|
|
return Builder.CreateUIToFP(Ops[0], FTy);
|
|
|
|
return Builder.CreateSIToFP(Ops[0], FTy);
|
|
|
|
}
|
|
|
|
case NEON::BI__builtin_neon_vpaddd_s64: {
|
2015-07-28 23:40:11 +08:00
|
|
|
llvm::Type *Ty = llvm::VectorType::get(Int64Ty, 2);
|
2014-03-29 23:09:45 +08:00
|
|
|
Value *Vec = EmitScalarExpr(E->getArg(0));
|
|
|
|
// The vector is v2f64, so make sure it's bitcast to that.
|
|
|
|
Vec = Builder.CreateBitCast(Vec, Ty, "v2i64");
|
2014-05-31 08:22:12 +08:00
|
|
|
llvm::Value *Idx0 = llvm::ConstantInt::get(SizeTy, 0);
|
|
|
|
llvm::Value *Idx1 = llvm::ConstantInt::get(SizeTy, 1);
|
2014-03-29 23:09:45 +08:00
|
|
|
Value *Op0 = Builder.CreateExtractElement(Vec, Idx0, "lane0");
|
|
|
|
Value *Op1 = Builder.CreateExtractElement(Vec, Idx1, "lane1");
|
|
|
|
// Pairwise addition of a v2f64 into a scalar f64.
|
|
|
|
return Builder.CreateAdd(Op0, Op1, "vpaddd");
|
|
|
|
}
|
|
|
|
case NEON::BI__builtin_neon_vpaddd_f64: {
|
|
|
|
llvm::Type *Ty =
|
2015-08-25 07:47:29 +08:00
|
|
|
llvm::VectorType::get(DoubleTy, 2);
|
2014-03-29 23:09:45 +08:00
|
|
|
Value *Vec = EmitScalarExpr(E->getArg(0));
|
|
|
|
// The vector is v2f64, so make sure it's bitcast to that.
|
|
|
|
Vec = Builder.CreateBitCast(Vec, Ty, "v2f64");
|
2014-05-31 08:22:12 +08:00
|
|
|
llvm::Value *Idx0 = llvm::ConstantInt::get(SizeTy, 0);
|
|
|
|
llvm::Value *Idx1 = llvm::ConstantInt::get(SizeTy, 1);
|
2014-03-29 23:09:45 +08:00
|
|
|
Value *Op0 = Builder.CreateExtractElement(Vec, Idx0, "lane0");
|
|
|
|
Value *Op1 = Builder.CreateExtractElement(Vec, Idx1, "lane1");
|
|
|
|
// Pairwise addition of a v2f64 into a scalar f64.
|
|
|
|
return Builder.CreateFAdd(Op0, Op1, "vpaddd");
|
|
|
|
}
|
|
|
|
case NEON::BI__builtin_neon_vpadds_f32: {
|
|
|
|
llvm::Type *Ty =
|
2015-08-25 07:47:29 +08:00
|
|
|
llvm::VectorType::get(FloatTy, 2);
|
2014-03-29 23:09:45 +08:00
|
|
|
Value *Vec = EmitScalarExpr(E->getArg(0));
|
|
|
|
// The vector is v2f32, so make sure it's bitcast to that.
|
|
|
|
Vec = Builder.CreateBitCast(Vec, Ty, "v2f32");
|
2014-05-31 08:22:12 +08:00
|
|
|
llvm::Value *Idx0 = llvm::ConstantInt::get(SizeTy, 0);
|
|
|
|
llvm::Value *Idx1 = llvm::ConstantInt::get(SizeTy, 1);
|
2014-03-29 23:09:45 +08:00
|
|
|
Value *Op0 = Builder.CreateExtractElement(Vec, Idx0, "lane0");
|
|
|
|
Value *Op1 = Builder.CreateExtractElement(Vec, Idx1, "lane1");
|
|
|
|
// Pairwise addition of a v2f32 into a scalar f32.
|
|
|
|
return Builder.CreateFAdd(Op0, Op1, "vpaddd");
|
|
|
|
}
|
|
|
|
case NEON::BI__builtin_neon_vceqzd_s64:
|
|
|
|
case NEON::BI__builtin_neon_vceqzd_f64:
|
|
|
|
case NEON::BI__builtin_neon_vceqzs_f32:
|
|
|
|
Ops.push_back(EmitScalarExpr(E->getArg(0)));
|
|
|
|
return EmitAArch64CompareBuiltinExpr(
|
2015-02-26 01:36:15 +08:00
|
|
|
Ops[0], ConvertType(E->getCallReturnType(getContext())),
|
|
|
|
ICmpInst::FCMP_OEQ, ICmpInst::ICMP_EQ, "vceqz");
|
2014-03-29 23:09:45 +08:00
|
|
|
case NEON::BI__builtin_neon_vcgezd_s64:
|
|
|
|
case NEON::BI__builtin_neon_vcgezd_f64:
|
|
|
|
case NEON::BI__builtin_neon_vcgezs_f32:
|
|
|
|
Ops.push_back(EmitScalarExpr(E->getArg(0)));
|
|
|
|
return EmitAArch64CompareBuiltinExpr(
|
2015-02-26 01:36:15 +08:00
|
|
|
Ops[0], ConvertType(E->getCallReturnType(getContext())),
|
|
|
|
ICmpInst::FCMP_OGE, ICmpInst::ICMP_SGE, "vcgez");
|
2014-03-29 23:09:45 +08:00
|
|
|
case NEON::BI__builtin_neon_vclezd_s64:
|
|
|
|
case NEON::BI__builtin_neon_vclezd_f64:
|
|
|
|
case NEON::BI__builtin_neon_vclezs_f32:
|
|
|
|
Ops.push_back(EmitScalarExpr(E->getArg(0)));
|
|
|
|
return EmitAArch64CompareBuiltinExpr(
|
2015-02-26 01:36:15 +08:00
|
|
|
Ops[0], ConvertType(E->getCallReturnType(getContext())),
|
|
|
|
ICmpInst::FCMP_OLE, ICmpInst::ICMP_SLE, "vclez");
|
2014-03-29 23:09:45 +08:00
|
|
|
case NEON::BI__builtin_neon_vcgtzd_s64:
|
|
|
|
case NEON::BI__builtin_neon_vcgtzd_f64:
|
|
|
|
case NEON::BI__builtin_neon_vcgtzs_f32:
|
|
|
|
Ops.push_back(EmitScalarExpr(E->getArg(0)));
|
|
|
|
return EmitAArch64CompareBuiltinExpr(
|
2015-02-26 01:36:15 +08:00
|
|
|
Ops[0], ConvertType(E->getCallReturnType(getContext())),
|
|
|
|
ICmpInst::FCMP_OGT, ICmpInst::ICMP_SGT, "vcgtz");
|
2014-03-29 23:09:45 +08:00
|
|
|
case NEON::BI__builtin_neon_vcltzd_s64:
|
|
|
|
case NEON::BI__builtin_neon_vcltzd_f64:
|
|
|
|
case NEON::BI__builtin_neon_vcltzs_f32:
|
|
|
|
Ops.push_back(EmitScalarExpr(E->getArg(0)));
|
|
|
|
return EmitAArch64CompareBuiltinExpr(
|
2015-02-26 01:36:15 +08:00
|
|
|
Ops[0], ConvertType(E->getCallReturnType(getContext())),
|
|
|
|
ICmpInst::FCMP_OLT, ICmpInst::ICMP_SLT, "vcltz");
|
2014-03-29 23:09:45 +08:00
|
|
|
|
|
|
|
case NEON::BI__builtin_neon_vceqzd_u64: {
|
|
|
|
Ops.push_back(EmitScalarExpr(E->getArg(0)));
|
2015-07-28 23:40:11 +08:00
|
|
|
Ops[0] = Builder.CreateBitCast(Ops[0], Int64Ty);
|
|
|
|
Ops[0] =
|
|
|
|
Builder.CreateICmpEQ(Ops[0], llvm::Constant::getNullValue(Int64Ty));
|
|
|
|
return Builder.CreateSExt(Ops[0], Int64Ty, "vceqzd");
|
2014-03-29 23:09:45 +08:00
|
|
|
}
|
|
|
|
case NEON::BI__builtin_neon_vceqd_f64:
|
|
|
|
case NEON::BI__builtin_neon_vcled_f64:
|
|
|
|
case NEON::BI__builtin_neon_vcltd_f64:
|
|
|
|
case NEON::BI__builtin_neon_vcged_f64:
|
|
|
|
case NEON::BI__builtin_neon_vcgtd_f64: {
|
|
|
|
llvm::CmpInst::Predicate P;
|
|
|
|
switch (BuiltinID) {
|
|
|
|
default: llvm_unreachable("missing builtin ID in switch!");
|
|
|
|
case NEON::BI__builtin_neon_vceqd_f64: P = llvm::FCmpInst::FCMP_OEQ; break;
|
|
|
|
case NEON::BI__builtin_neon_vcled_f64: P = llvm::FCmpInst::FCMP_OLE; break;
|
|
|
|
case NEON::BI__builtin_neon_vcltd_f64: P = llvm::FCmpInst::FCMP_OLT; break;
|
|
|
|
case NEON::BI__builtin_neon_vcged_f64: P = llvm::FCmpInst::FCMP_OGE; break;
|
|
|
|
case NEON::BI__builtin_neon_vcgtd_f64: P = llvm::FCmpInst::FCMP_OGT; break;
|
|
|
|
}
|
|
|
|
Ops.push_back(EmitScalarExpr(E->getArg(1)));
|
|
|
|
Ops[0] = Builder.CreateBitCast(Ops[0], DoubleTy);
|
|
|
|
Ops[1] = Builder.CreateBitCast(Ops[1], DoubleTy);
|
|
|
|
Ops[0] = Builder.CreateFCmp(P, Ops[0], Ops[1]);
|
|
|
|
return Builder.CreateSExt(Ops[0], Int64Ty, "vcmpd");
|
|
|
|
}
|
|
|
|
case NEON::BI__builtin_neon_vceqs_f32:
|
|
|
|
case NEON::BI__builtin_neon_vcles_f32:
|
|
|
|
case NEON::BI__builtin_neon_vclts_f32:
|
|
|
|
case NEON::BI__builtin_neon_vcges_f32:
|
|
|
|
case NEON::BI__builtin_neon_vcgts_f32: {
|
|
|
|
llvm::CmpInst::Predicate P;
|
|
|
|
switch (BuiltinID) {
|
|
|
|
default: llvm_unreachable("missing builtin ID in switch!");
|
|
|
|
case NEON::BI__builtin_neon_vceqs_f32: P = llvm::FCmpInst::FCMP_OEQ; break;
|
|
|
|
case NEON::BI__builtin_neon_vcles_f32: P = llvm::FCmpInst::FCMP_OLE; break;
|
|
|
|
case NEON::BI__builtin_neon_vclts_f32: P = llvm::FCmpInst::FCMP_OLT; break;
|
|
|
|
case NEON::BI__builtin_neon_vcges_f32: P = llvm::FCmpInst::FCMP_OGE; break;
|
|
|
|
case NEON::BI__builtin_neon_vcgts_f32: P = llvm::FCmpInst::FCMP_OGT; break;
|
|
|
|
}
|
|
|
|
Ops.push_back(EmitScalarExpr(E->getArg(1)));
|
|
|
|
Ops[0] = Builder.CreateBitCast(Ops[0], FloatTy);
|
|
|
|
Ops[1] = Builder.CreateBitCast(Ops[1], FloatTy);
|
|
|
|
Ops[0] = Builder.CreateFCmp(P, Ops[0], Ops[1]);
|
|
|
|
return Builder.CreateSExt(Ops[0], Int32Ty, "vcmpd");
|
|
|
|
}
|
|
|
|
case NEON::BI__builtin_neon_vceqd_s64:
|
|
|
|
case NEON::BI__builtin_neon_vceqd_u64:
|
|
|
|
case NEON::BI__builtin_neon_vcgtd_s64:
|
|
|
|
case NEON::BI__builtin_neon_vcgtd_u64:
|
|
|
|
case NEON::BI__builtin_neon_vcltd_s64:
|
|
|
|
case NEON::BI__builtin_neon_vcltd_u64:
|
|
|
|
case NEON::BI__builtin_neon_vcged_u64:
|
|
|
|
case NEON::BI__builtin_neon_vcged_s64:
|
|
|
|
case NEON::BI__builtin_neon_vcled_u64:
|
|
|
|
case NEON::BI__builtin_neon_vcled_s64: {
|
|
|
|
llvm::CmpInst::Predicate P;
|
|
|
|
switch (BuiltinID) {
|
|
|
|
default: llvm_unreachable("missing builtin ID in switch!");
|
|
|
|
case NEON::BI__builtin_neon_vceqd_s64:
|
|
|
|
case NEON::BI__builtin_neon_vceqd_u64:P = llvm::ICmpInst::ICMP_EQ;break;
|
|
|
|
case NEON::BI__builtin_neon_vcgtd_s64:P = llvm::ICmpInst::ICMP_SGT;break;
|
|
|
|
case NEON::BI__builtin_neon_vcgtd_u64:P = llvm::ICmpInst::ICMP_UGT;break;
|
|
|
|
case NEON::BI__builtin_neon_vcltd_s64:P = llvm::ICmpInst::ICMP_SLT;break;
|
|
|
|
case NEON::BI__builtin_neon_vcltd_u64:P = llvm::ICmpInst::ICMP_ULT;break;
|
|
|
|
case NEON::BI__builtin_neon_vcged_u64:P = llvm::ICmpInst::ICMP_UGE;break;
|
|
|
|
case NEON::BI__builtin_neon_vcged_s64:P = llvm::ICmpInst::ICMP_SGE;break;
|
|
|
|
case NEON::BI__builtin_neon_vcled_u64:P = llvm::ICmpInst::ICMP_ULE;break;
|
|
|
|
case NEON::BI__builtin_neon_vcled_s64:P = llvm::ICmpInst::ICMP_SLE;break;
|
|
|
|
}
|
|
|
|
Ops.push_back(EmitScalarExpr(E->getArg(1)));
|
2014-03-31 23:47:09 +08:00
|
|
|
Ops[0] = Builder.CreateBitCast(Ops[0], Int64Ty);
|
|
|
|
Ops[1] = Builder.CreateBitCast(Ops[1], Int64Ty);
|
2014-03-29 23:09:45 +08:00
|
|
|
Ops[0] = Builder.CreateICmp(P, Ops[0], Ops[1]);
|
2014-03-31 23:47:09 +08:00
|
|
|
return Builder.CreateSExt(Ops[0], Int64Ty, "vceqd");
|
2014-03-29 23:09:45 +08:00
|
|
|
}
|
|
|
|
case NEON::BI__builtin_neon_vtstd_s64:
|
|
|
|
case NEON::BI__builtin_neon_vtstd_u64: {
|
|
|
|
Ops.push_back(EmitScalarExpr(E->getArg(1)));
|
2015-07-28 23:40:11 +08:00
|
|
|
Ops[0] = Builder.CreateBitCast(Ops[0], Int64Ty);
|
|
|
|
Ops[1] = Builder.CreateBitCast(Ops[1], Int64Ty);
|
2014-03-29 23:09:45 +08:00
|
|
|
Ops[0] = Builder.CreateAnd(Ops[0], Ops[1]);
|
|
|
|
Ops[0] = Builder.CreateICmp(ICmpInst::ICMP_NE, Ops[0],
|
2015-07-28 23:40:11 +08:00
|
|
|
llvm::Constant::getNullValue(Int64Ty));
|
|
|
|
return Builder.CreateSExt(Ops[0], Int64Ty, "vtstd");
|
2014-03-29 23:09:45 +08:00
|
|
|
}
|
|
|
|
case NEON::BI__builtin_neon_vset_lane_i8:
|
|
|
|
case NEON::BI__builtin_neon_vset_lane_i16:
|
|
|
|
case NEON::BI__builtin_neon_vset_lane_i32:
|
|
|
|
case NEON::BI__builtin_neon_vset_lane_i64:
|
|
|
|
case NEON::BI__builtin_neon_vset_lane_f32:
|
|
|
|
case NEON::BI__builtin_neon_vsetq_lane_i8:
|
|
|
|
case NEON::BI__builtin_neon_vsetq_lane_i16:
|
|
|
|
case NEON::BI__builtin_neon_vsetq_lane_i32:
|
|
|
|
case NEON::BI__builtin_neon_vsetq_lane_i64:
|
|
|
|
case NEON::BI__builtin_neon_vsetq_lane_f32:
|
|
|
|
Ops.push_back(EmitScalarExpr(E->getArg(2)));
|
|
|
|
return Builder.CreateInsertElement(Ops[1], Ops[0], Ops[2], "vset_lane");
|
|
|
|
case NEON::BI__builtin_neon_vset_lane_f64:
|
|
|
|
// The vector type needs a cast for the v1f64 variant.
|
|
|
|
Ops[1] = Builder.CreateBitCast(Ops[1],
|
|
|
|
llvm::VectorType::get(DoubleTy, 1));
|
|
|
|
Ops.push_back(EmitScalarExpr(E->getArg(2)));
|
|
|
|
return Builder.CreateInsertElement(Ops[1], Ops[0], Ops[2], "vset_lane");
|
|
|
|
case NEON::BI__builtin_neon_vsetq_lane_f64:
|
|
|
|
// The vector type needs a cast for the v2f64 variant.
|
|
|
|
Ops[1] = Builder.CreateBitCast(Ops[1],
|
2015-08-25 07:47:29 +08:00
|
|
|
llvm::VectorType::get(DoubleTy, 2));
|
2014-03-29 23:09:45 +08:00
|
|
|
Ops.push_back(EmitScalarExpr(E->getArg(2)));
|
|
|
|
return Builder.CreateInsertElement(Ops[1], Ops[0], Ops[2], "vset_lane");
|
|
|
|
|
|
|
|
case NEON::BI__builtin_neon_vget_lane_i8:
|
|
|
|
case NEON::BI__builtin_neon_vdupb_lane_i8:
|
2015-07-28 23:40:11 +08:00
|
|
|
Ops[0] = Builder.CreateBitCast(Ops[0], llvm::VectorType::get(Int8Ty, 8));
|
2014-03-29 23:09:45 +08:00
|
|
|
return Builder.CreateExtractElement(Ops[0], EmitScalarExpr(E->getArg(1)),
|
|
|
|
"vget_lane");
|
|
|
|
case NEON::BI__builtin_neon_vgetq_lane_i8:
|
|
|
|
case NEON::BI__builtin_neon_vdupb_laneq_i8:
|
2015-07-28 23:40:11 +08:00
|
|
|
Ops[0] = Builder.CreateBitCast(Ops[0], llvm::VectorType::get(Int8Ty, 16));
|
2014-03-29 23:09:45 +08:00
|
|
|
return Builder.CreateExtractElement(Ops[0], EmitScalarExpr(E->getArg(1)),
|
|
|
|
"vgetq_lane");
|
|
|
|
case NEON::BI__builtin_neon_vget_lane_i16:
|
|
|
|
case NEON::BI__builtin_neon_vduph_lane_i16:
|
2015-07-28 23:40:11 +08:00
|
|
|
Ops[0] = Builder.CreateBitCast(Ops[0], llvm::VectorType::get(Int16Ty, 4));
|
2014-03-29 23:09:45 +08:00
|
|
|
return Builder.CreateExtractElement(Ops[0], EmitScalarExpr(E->getArg(1)),
|
|
|
|
"vget_lane");
|
|
|
|
case NEON::BI__builtin_neon_vgetq_lane_i16:
|
|
|
|
case NEON::BI__builtin_neon_vduph_laneq_i16:
|
2015-07-28 23:40:11 +08:00
|
|
|
Ops[0] = Builder.CreateBitCast(Ops[0], llvm::VectorType::get(Int16Ty, 8));
|
2014-03-29 23:09:45 +08:00
|
|
|
return Builder.CreateExtractElement(Ops[0], EmitScalarExpr(E->getArg(1)),
|
|
|
|
"vgetq_lane");
|
|
|
|
case NEON::BI__builtin_neon_vget_lane_i32:
|
|
|
|
case NEON::BI__builtin_neon_vdups_lane_i32:
|
2015-07-28 23:40:11 +08:00
|
|
|
Ops[0] = Builder.CreateBitCast(Ops[0], llvm::VectorType::get(Int32Ty, 2));
|
2014-03-29 23:09:45 +08:00
|
|
|
return Builder.CreateExtractElement(Ops[0], EmitScalarExpr(E->getArg(1)),
|
|
|
|
"vget_lane");
|
|
|
|
case NEON::BI__builtin_neon_vdups_lane_f32:
|
|
|
|
Ops[0] = Builder.CreateBitCast(Ops[0],
|
2015-08-25 07:47:29 +08:00
|
|
|
llvm::VectorType::get(FloatTy, 2));
|
2014-03-29 23:09:45 +08:00
|
|
|
return Builder.CreateExtractElement(Ops[0], EmitScalarExpr(E->getArg(1)),
|
|
|
|
"vdups_lane");
|
|
|
|
case NEON::BI__builtin_neon_vgetq_lane_i32:
|
|
|
|
case NEON::BI__builtin_neon_vdups_laneq_i32:
|
2015-07-28 23:40:11 +08:00
|
|
|
Ops[0] = Builder.CreateBitCast(Ops[0], llvm::VectorType::get(Int32Ty, 4));
|
2014-03-29 23:09:45 +08:00
|
|
|
return Builder.CreateExtractElement(Ops[0], EmitScalarExpr(E->getArg(1)),
|
|
|
|
"vgetq_lane");
|
|
|
|
case NEON::BI__builtin_neon_vget_lane_i64:
|
|
|
|
case NEON::BI__builtin_neon_vdupd_lane_i64:
|
2015-07-28 23:40:11 +08:00
|
|
|
Ops[0] = Builder.CreateBitCast(Ops[0], llvm::VectorType::get(Int64Ty, 1));
|
2014-03-29 23:09:45 +08:00
|
|
|
return Builder.CreateExtractElement(Ops[0], EmitScalarExpr(E->getArg(1)),
|
|
|
|
"vget_lane");
|
|
|
|
case NEON::BI__builtin_neon_vdupd_lane_f64:
|
|
|
|
Ops[0] = Builder.CreateBitCast(Ops[0],
|
2015-08-25 07:47:29 +08:00
|
|
|
llvm::VectorType::get(DoubleTy, 1));
|
2014-03-29 23:09:45 +08:00
|
|
|
return Builder.CreateExtractElement(Ops[0], EmitScalarExpr(E->getArg(1)),
|
|
|
|
"vdupd_lane");
|
|
|
|
case NEON::BI__builtin_neon_vgetq_lane_i64:
|
|
|
|
case NEON::BI__builtin_neon_vdupd_laneq_i64:
|
2015-07-28 23:40:11 +08:00
|
|
|
Ops[0] = Builder.CreateBitCast(Ops[0], llvm::VectorType::get(Int64Ty, 2));
|
2014-03-29 23:09:45 +08:00
|
|
|
return Builder.CreateExtractElement(Ops[0], EmitScalarExpr(E->getArg(1)),
|
|
|
|
"vgetq_lane");
|
|
|
|
case NEON::BI__builtin_neon_vget_lane_f32:
|
|
|
|
Ops[0] = Builder.CreateBitCast(Ops[0],
|
2015-08-25 07:47:29 +08:00
|
|
|
llvm::VectorType::get(FloatTy, 2));
|
2014-03-29 23:09:45 +08:00
|
|
|
return Builder.CreateExtractElement(Ops[0], EmitScalarExpr(E->getArg(1)),
|
|
|
|
"vget_lane");
|
|
|
|
case NEON::BI__builtin_neon_vget_lane_f64:
|
|
|
|
Ops[0] = Builder.CreateBitCast(Ops[0],
|
2015-08-25 07:47:29 +08:00
|
|
|
llvm::VectorType::get(DoubleTy, 1));
|
2014-03-29 23:09:45 +08:00
|
|
|
return Builder.CreateExtractElement(Ops[0], EmitScalarExpr(E->getArg(1)),
|
|
|
|
"vget_lane");
|
|
|
|
case NEON::BI__builtin_neon_vgetq_lane_f32:
|
|
|
|
case NEON::BI__builtin_neon_vdups_laneq_f32:
|
|
|
|
Ops[0] = Builder.CreateBitCast(Ops[0],
|
2015-08-25 07:47:29 +08:00
|
|
|
llvm::VectorType::get(FloatTy, 4));
|
2014-03-29 23:09:45 +08:00
|
|
|
return Builder.CreateExtractElement(Ops[0], EmitScalarExpr(E->getArg(1)),
|
|
|
|
"vgetq_lane");
|
|
|
|
case NEON::BI__builtin_neon_vgetq_lane_f64:
|
|
|
|
case NEON::BI__builtin_neon_vdupd_laneq_f64:
|
|
|
|
Ops[0] = Builder.CreateBitCast(Ops[0],
|
2015-08-25 07:47:29 +08:00
|
|
|
llvm::VectorType::get(DoubleTy, 2));
|
2014-03-29 23:09:45 +08:00
|
|
|
return Builder.CreateExtractElement(Ops[0], EmitScalarExpr(E->getArg(1)),
|
|
|
|
"vgetq_lane");
|
|
|
|
case NEON::BI__builtin_neon_vaddd_s64:
|
|
|
|
case NEON::BI__builtin_neon_vaddd_u64:
|
|
|
|
return Builder.CreateAdd(Ops[0], EmitScalarExpr(E->getArg(1)), "vaddd");
|
|
|
|
case NEON::BI__builtin_neon_vsubd_s64:
|
|
|
|
case NEON::BI__builtin_neon_vsubd_u64:
|
|
|
|
return Builder.CreateSub(Ops[0], EmitScalarExpr(E->getArg(1)), "vsubd");
|
|
|
|
case NEON::BI__builtin_neon_vqdmlalh_s16:
|
|
|
|
case NEON::BI__builtin_neon_vqdmlslh_s16: {
|
|
|
|
SmallVector<Value *, 2> ProductOps;
|
|
|
|
ProductOps.push_back(vectorWrapScalar16(Ops[1]));
|
|
|
|
ProductOps.push_back(vectorWrapScalar16(EmitScalarExpr(E->getArg(2))));
|
|
|
|
llvm::Type *VTy = llvm::VectorType::get(Int32Ty, 4);
|
2014-05-24 20:52:07 +08:00
|
|
|
Ops[1] = EmitNeonCall(CGM.getIntrinsic(Intrinsic::aarch64_neon_sqdmull, VTy),
|
2014-03-29 23:09:45 +08:00
|
|
|
ProductOps, "vqdmlXl");
|
2014-05-31 08:22:12 +08:00
|
|
|
Constant *CI = ConstantInt::get(SizeTy, 0);
|
2014-03-29 23:09:45 +08:00
|
|
|
Ops[1] = Builder.CreateExtractElement(Ops[1], CI, "lane0");
|
|
|
|
|
|
|
|
unsigned AccumInt = BuiltinID == NEON::BI__builtin_neon_vqdmlalh_s16
|
2014-05-24 20:52:07 +08:00
|
|
|
? Intrinsic::aarch64_neon_sqadd
|
|
|
|
: Intrinsic::aarch64_neon_sqsub;
|
2014-03-29 23:09:45 +08:00
|
|
|
return EmitNeonCall(CGM.getIntrinsic(AccumInt, Int32Ty), Ops, "vqdmlXl");
|
|
|
|
}
|
|
|
|
case NEON::BI__builtin_neon_vqshlud_n_s64: {
|
|
|
|
Ops.push_back(EmitScalarExpr(E->getArg(1)));
|
|
|
|
Ops[1] = Builder.CreateZExt(Ops[1], Int64Ty);
|
2014-05-24 20:52:07 +08:00
|
|
|
return EmitNeonCall(CGM.getIntrinsic(Intrinsic::aarch64_neon_sqshlu, Int64Ty),
|
2014-04-28 15:36:12 +08:00
|
|
|
Ops, "vqshlu_n");
|
2014-03-29 23:09:45 +08:00
|
|
|
}
|
|
|
|
case NEON::BI__builtin_neon_vqshld_n_u64:
|
|
|
|
case NEON::BI__builtin_neon_vqshld_n_s64: {
|
|
|
|
unsigned Int = BuiltinID == NEON::BI__builtin_neon_vqshld_n_u64
|
2014-05-24 20:52:07 +08:00
|
|
|
? Intrinsic::aarch64_neon_uqshl
|
|
|
|
: Intrinsic::aarch64_neon_sqshl;
|
2014-03-29 23:09:45 +08:00
|
|
|
Ops.push_back(EmitScalarExpr(E->getArg(1)));
|
|
|
|
Ops[1] = Builder.CreateZExt(Ops[1], Int64Ty);
|
2014-04-28 15:36:12 +08:00
|
|
|
return EmitNeonCall(CGM.getIntrinsic(Int, Int64Ty), Ops, "vqshl_n");
|
2014-03-29 23:09:45 +08:00
|
|
|
}
|
|
|
|
case NEON::BI__builtin_neon_vrshrd_n_u64:
|
|
|
|
case NEON::BI__builtin_neon_vrshrd_n_s64: {
|
|
|
|
unsigned Int = BuiltinID == NEON::BI__builtin_neon_vrshrd_n_u64
|
2014-05-24 20:52:07 +08:00
|
|
|
? Intrinsic::aarch64_neon_urshl
|
|
|
|
: Intrinsic::aarch64_neon_srshl;
|
2014-03-29 23:09:45 +08:00
|
|
|
Ops.push_back(EmitScalarExpr(E->getArg(1)));
|
2014-04-28 15:36:12 +08:00
|
|
|
int SV = cast<ConstantInt>(Ops[1])->getSExtValue();
|
|
|
|
Ops[1] = ConstantInt::get(Int64Ty, -SV);
|
|
|
|
return EmitNeonCall(CGM.getIntrinsic(Int, Int64Ty), Ops, "vrshr_n");
|
2014-03-29 23:09:45 +08:00
|
|
|
}
|
|
|
|
case NEON::BI__builtin_neon_vrsrad_n_u64:
|
|
|
|
case NEON::BI__builtin_neon_vrsrad_n_s64: {
|
|
|
|
unsigned Int = BuiltinID == NEON::BI__builtin_neon_vrsrad_n_u64
|
2014-05-24 20:52:07 +08:00
|
|
|
? Intrinsic::aarch64_neon_urshl
|
|
|
|
: Intrinsic::aarch64_neon_srshl;
|
2014-03-31 23:47:09 +08:00
|
|
|
Ops[1] = Builder.CreateBitCast(Ops[1], Int64Ty);
|
|
|
|
Ops.push_back(Builder.CreateNeg(EmitScalarExpr(E->getArg(2))));
|
2015-05-19 06:14:03 +08:00
|
|
|
Ops[1] = Builder.CreateCall(CGM.getIntrinsic(Int, Int64Ty),
|
|
|
|
{Ops[1], Builder.CreateSExt(Ops[2], Int64Ty)});
|
2014-03-31 23:47:09 +08:00
|
|
|
return Builder.CreateAdd(Ops[0], Builder.CreateBitCast(Ops[1], Int64Ty));
|
2014-03-29 23:09:45 +08:00
|
|
|
}
|
|
|
|
case NEON::BI__builtin_neon_vshld_n_s64:
|
|
|
|
case NEON::BI__builtin_neon_vshld_n_u64: {
|
|
|
|
llvm::ConstantInt *Amt = cast<ConstantInt>(EmitScalarExpr(E->getArg(1)));
|
|
|
|
return Builder.CreateShl(
|
2014-05-14 16:59:30 +08:00
|
|
|
Ops[0], ConstantInt::get(Int64Ty, Amt->getZExtValue()), "shld_n");
|
2014-03-29 23:09:45 +08:00
|
|
|
}
|
|
|
|
case NEON::BI__builtin_neon_vshrd_n_s64: {
|
|
|
|
llvm::ConstantInt *Amt = cast<ConstantInt>(EmitScalarExpr(E->getArg(1)));
|
|
|
|
return Builder.CreateAShr(
|
|
|
|
Ops[0], ConstantInt::get(Int64Ty, std::min(static_cast<uint64_t>(63),
|
|
|
|
Amt->getZExtValue())),
|
2014-05-14 16:59:30 +08:00
|
|
|
"shrd_n");
|
2014-03-29 23:09:45 +08:00
|
|
|
}
|
|
|
|
case NEON::BI__builtin_neon_vshrd_n_u64: {
|
|
|
|
llvm::ConstantInt *Amt = cast<ConstantInt>(EmitScalarExpr(E->getArg(1)));
|
2014-05-14 16:59:30 +08:00
|
|
|
uint64_t ShiftAmt = Amt->getZExtValue();
|
|
|
|
// Right-shifting an unsigned value by its size yields 0.
|
|
|
|
if (ShiftAmt == 64)
|
|
|
|
return ConstantInt::get(Int64Ty, 0);
|
|
|
|
return Builder.CreateLShr(Ops[0], ConstantInt::get(Int64Ty, ShiftAmt),
|
|
|
|
"shrd_n");
|
2014-03-29 23:09:45 +08:00
|
|
|
}
|
|
|
|
case NEON::BI__builtin_neon_vsrad_n_s64: {
|
|
|
|
llvm::ConstantInt *Amt = cast<ConstantInt>(EmitScalarExpr(E->getArg(2)));
|
|
|
|
Ops[1] = Builder.CreateAShr(
|
|
|
|
Ops[1], ConstantInt::get(Int64Ty, std::min(static_cast<uint64_t>(63),
|
|
|
|
Amt->getZExtValue())),
|
2014-05-14 16:59:30 +08:00
|
|
|
"shrd_n");
|
2014-03-29 23:09:45 +08:00
|
|
|
return Builder.CreateAdd(Ops[0], Ops[1]);
|
|
|
|
}
|
|
|
|
case NEON::BI__builtin_neon_vsrad_n_u64: {
|
|
|
|
llvm::ConstantInt *Amt = cast<ConstantInt>(EmitScalarExpr(E->getArg(2)));
|
2014-05-14 16:59:30 +08:00
|
|
|
uint64_t ShiftAmt = Amt->getZExtValue();
|
|
|
|
// Right-shifting an unsigned value by its size yields 0.
|
|
|
|
// As Op + 0 = Op, return Ops[0] directly.
|
|
|
|
if (ShiftAmt == 64)
|
|
|
|
return Ops[0];
|
|
|
|
Ops[1] = Builder.CreateLShr(Ops[1], ConstantInt::get(Int64Ty, ShiftAmt),
|
|
|
|
"shrd_n");
|
2014-03-29 23:09:45 +08:00
|
|
|
return Builder.CreateAdd(Ops[0], Ops[1]);
|
|
|
|
}
|
|
|
|
case NEON::BI__builtin_neon_vqdmlalh_lane_s16:
|
|
|
|
case NEON::BI__builtin_neon_vqdmlalh_laneq_s16:
|
|
|
|
case NEON::BI__builtin_neon_vqdmlslh_lane_s16:
|
|
|
|
case NEON::BI__builtin_neon_vqdmlslh_laneq_s16: {
|
|
|
|
Ops[2] = Builder.CreateExtractElement(Ops[2], EmitScalarExpr(E->getArg(3)),
|
|
|
|
"lane");
|
|
|
|
SmallVector<Value *, 2> ProductOps;
|
|
|
|
ProductOps.push_back(vectorWrapScalar16(Ops[1]));
|
|
|
|
ProductOps.push_back(vectorWrapScalar16(Ops[2]));
|
|
|
|
llvm::Type *VTy = llvm::VectorType::get(Int32Ty, 4);
|
2014-05-24 20:52:07 +08:00
|
|
|
Ops[1] = EmitNeonCall(CGM.getIntrinsic(Intrinsic::aarch64_neon_sqdmull, VTy),
|
2014-03-29 23:09:45 +08:00
|
|
|
ProductOps, "vqdmlXl");
|
2014-05-31 08:22:12 +08:00
|
|
|
Constant *CI = ConstantInt::get(SizeTy, 0);
|
2014-03-29 23:09:45 +08:00
|
|
|
Ops[1] = Builder.CreateExtractElement(Ops[1], CI, "lane0");
|
|
|
|
Ops.pop_back();
|
|
|
|
|
|
|
|
unsigned AccInt = (BuiltinID == NEON::BI__builtin_neon_vqdmlalh_lane_s16 ||
|
|
|
|
BuiltinID == NEON::BI__builtin_neon_vqdmlalh_laneq_s16)
|
2014-05-24 20:52:07 +08:00
|
|
|
? Intrinsic::aarch64_neon_sqadd
|
|
|
|
: Intrinsic::aarch64_neon_sqsub;
|
2014-03-29 23:09:45 +08:00
|
|
|
return EmitNeonCall(CGM.getIntrinsic(AccInt, Int32Ty), Ops, "vqdmlXl");
|
|
|
|
}
|
|
|
|
case NEON::BI__builtin_neon_vqdmlals_s32:
|
|
|
|
case NEON::BI__builtin_neon_vqdmlsls_s32: {
|
|
|
|
SmallVector<Value *, 2> ProductOps;
|
|
|
|
ProductOps.push_back(Ops[1]);
|
|
|
|
ProductOps.push_back(EmitScalarExpr(E->getArg(2)));
|
|
|
|
Ops[1] =
|
2014-05-24 20:52:07 +08:00
|
|
|
EmitNeonCall(CGM.getIntrinsic(Intrinsic::aarch64_neon_sqdmulls_scalar),
|
2014-03-29 23:09:45 +08:00
|
|
|
ProductOps, "vqdmlXl");
|
|
|
|
|
|
|
|
unsigned AccumInt = BuiltinID == NEON::BI__builtin_neon_vqdmlals_s32
|
2014-05-24 20:52:07 +08:00
|
|
|
? Intrinsic::aarch64_neon_sqadd
|
|
|
|
: Intrinsic::aarch64_neon_sqsub;
|
2014-03-29 23:09:45 +08:00
|
|
|
return EmitNeonCall(CGM.getIntrinsic(AccumInt, Int64Ty), Ops, "vqdmlXl");
|
|
|
|
}
|
|
|
|
case NEON::BI__builtin_neon_vqdmlals_lane_s32:
|
|
|
|
case NEON::BI__builtin_neon_vqdmlals_laneq_s32:
|
|
|
|
case NEON::BI__builtin_neon_vqdmlsls_lane_s32:
|
|
|
|
case NEON::BI__builtin_neon_vqdmlsls_laneq_s32: {
|
|
|
|
Ops[2] = Builder.CreateExtractElement(Ops[2], EmitScalarExpr(E->getArg(3)),
|
|
|
|
"lane");
|
|
|
|
SmallVector<Value *, 2> ProductOps;
|
|
|
|
ProductOps.push_back(Ops[1]);
|
|
|
|
ProductOps.push_back(Ops[2]);
|
|
|
|
Ops[1] =
|
2014-05-24 20:52:07 +08:00
|
|
|
EmitNeonCall(CGM.getIntrinsic(Intrinsic::aarch64_neon_sqdmulls_scalar),
|
2014-03-29 23:09:45 +08:00
|
|
|
ProductOps, "vqdmlXl");
|
|
|
|
Ops.pop_back();
|
|
|
|
|
|
|
|
unsigned AccInt = (BuiltinID == NEON::BI__builtin_neon_vqdmlals_lane_s32 ||
|
|
|
|
BuiltinID == NEON::BI__builtin_neon_vqdmlals_laneq_s32)
|
2014-05-24 20:52:07 +08:00
|
|
|
? Intrinsic::aarch64_neon_sqadd
|
|
|
|
: Intrinsic::aarch64_neon_sqsub;
|
2014-03-29 23:09:45 +08:00
|
|
|
return EmitNeonCall(CGM.getIntrinsic(AccInt, Int64Ty), Ops, "vqdmlXl");
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
llvm::VectorType *VTy = GetNeonType(this, Type);
|
|
|
|
llvm::Type *Ty = VTy;
|
|
|
|
if (!Ty)
|
2014-05-21 13:09:00 +08:00
|
|
|
return nullptr;
|
2014-03-29 23:09:45 +08:00
|
|
|
|
2014-05-24 20:52:07 +08:00
|
|
|
// Not all intrinsics handled by the common case work for AArch64 yet, so only
|
2014-03-29 23:09:45 +08:00
|
|
|
// defer to common code if it's been added to our special map.
|
2014-05-24 20:52:07 +08:00
|
|
|
Builtin = findNeonIntrinsicInMap(AArch64SIMDIntrinsicMap, BuiltinID,
|
|
|
|
AArch64SIMDIntrinsicsProvenSorted);
|
2014-03-29 23:09:45 +08:00
|
|
|
|
|
|
|
if (Builtin)
|
|
|
|
return EmitCommonNeonBuiltinExpr(
|
|
|
|
Builtin->BuiltinID, Builtin->LLVMIntrinsic, Builtin->AltLLVMIntrinsic,
|
Compute and preserve alignment more faithfully in IR-generation.
Introduce an Address type to bundle a pointer value with an
alignment. Introduce APIs on CGBuilderTy to work with Address
values. Change core APIs on CGF/CGM to traffic in Address where
appropriate. Require alignments to be non-zero. Update a ton
of code to compute and propagate alignment information.
As part of this, I've promoted CGBuiltin's EmitPointerWithAlignment
helper function to CGF and made use of it in a number of places in
the expression emitter.
The end result is that we should now be significantly more correct
when performing operations on objects that are locally known to
be under-aligned. Since alignment is not reliably tracked in the
type system, there are inherent limits to this, but at least we
are no longer confused by standard operations like derived-to-base
conversions and array-to-pointer decay. I've also fixed a large
number of bugs where we were applying the complete-object alignment
to a pointer instead of the non-virtual alignment, although most of
these were hidden by the very conservative approach we took with
member alignment.
Also, because IRGen now reliably asserts on zero alignments, we
should no longer be subject to an absurd but frustrating recurring
bug where an incomplete type would report a zero alignment and then
we'd naively do a alignmentAtOffset on it and emit code using an
alignment equal to the largest power-of-two factor of the offset.
We should also now be emitting much more aggressive alignment
attributes in the presence of over-alignment. In particular,
field access now uses alignmentAtOffset instead of min.
Several times in this patch, I had to change the existing
code-generation pattern in order to more effectively use
the Address APIs. For the most part, this seems to be a strict
improvement, like doing pointer arithmetic with GEPs instead of
ptrtoint. That said, I've tried very hard to not change semantics,
but it is likely that I've failed in a few places, for which I
apologize.
ABIArgInfo now always carries the assumed alignment of indirect and
indirect byval arguments. In order to cut down on what was already
a dauntingly large patch, I changed the code to never set align
attributes in the IR on non-byval indirect arguments. That is,
we still generate code which assumes that indirect arguments have
the given alignment, but we don't express this information to the
backend except where it's semantically required (i.e. on byvals).
This is likely a minor regression for those targets that did provide
this information, but it'll be trivial to add it back in a later
patch.
I partially punted on applying this work to CGBuiltin. Please
do not add more uses of the CreateDefaultAligned{Load,Store}
APIs; they will be going away eventually.
llvm-svn: 246985
2015-09-08 16:05:57 +08:00
|
|
|
Builtin->NameHint, Builtin->TypeModifier, E, Ops,
|
|
|
|
/*never use addresses*/ Address::invalid(), Address::invalid());
|
2014-03-29 23:09:45 +08:00
|
|
|
|
2014-05-24 20:52:07 +08:00
|
|
|
if (Value *V = EmitAArch64TblBuiltinExpr(*this, BuiltinID, E, Ops))
|
2014-03-29 23:09:45 +08:00
|
|
|
return V;
|
|
|
|
|
|
|
|
unsigned Int;
|
|
|
|
switch (BuiltinID) {
|
2014-05-21 13:09:00 +08:00
|
|
|
default: return nullptr;
|
2014-03-29 23:09:45 +08:00
|
|
|
case NEON::BI__builtin_neon_vbsl_v:
|
|
|
|
case NEON::BI__builtin_neon_vbslq_v: {
|
|
|
|
llvm::Type *BitTy = llvm::VectorType::getInteger(VTy);
|
|
|
|
Ops[0] = Builder.CreateBitCast(Ops[0], BitTy, "vbsl");
|
|
|
|
Ops[1] = Builder.CreateBitCast(Ops[1], BitTy, "vbsl");
|
|
|
|
Ops[2] = Builder.CreateBitCast(Ops[2], BitTy, "vbsl");
|
|
|
|
|
|
|
|
Ops[1] = Builder.CreateAnd(Ops[0], Ops[1], "vbsl");
|
|
|
|
Ops[2] = Builder.CreateAnd(Builder.CreateNot(Ops[0]), Ops[2], "vbsl");
|
|
|
|
Ops[0] = Builder.CreateOr(Ops[1], Ops[2], "vbsl");
|
|
|
|
return Builder.CreateBitCast(Ops[0], Ty);
|
|
|
|
}
|
|
|
|
case NEON::BI__builtin_neon_vfma_lane_v:
|
|
|
|
case NEON::BI__builtin_neon_vfmaq_lane_v: { // Only used for FP types
|
|
|
|
// The ARM builtins (and instructions) have the addend as the first
|
|
|
|
// operand, but the 'fma' intrinsics have it last. Swap it around here.
|
|
|
|
Value *Addend = Ops[0];
|
|
|
|
Value *Multiplicand = Ops[1];
|
|
|
|
Value *LaneSource = Ops[2];
|
|
|
|
Ops[0] = Multiplicand;
|
|
|
|
Ops[1] = LaneSource;
|
|
|
|
Ops[2] = Addend;
|
|
|
|
|
|
|
|
// Now adjust things to handle the lane access.
|
|
|
|
llvm::Type *SourceTy = BuiltinID == NEON::BI__builtin_neon_vfmaq_lane_v ?
|
|
|
|
llvm::VectorType::get(VTy->getElementType(), VTy->getNumElements() / 2) :
|
|
|
|
VTy;
|
|
|
|
llvm::Constant *cst = cast<Constant>(Ops[3]);
|
|
|
|
Value *SV = llvm::ConstantVector::getSplat(VTy->getNumElements(), cst);
|
|
|
|
Ops[1] = Builder.CreateBitCast(Ops[1], SourceTy);
|
|
|
|
Ops[1] = Builder.CreateShuffleVector(Ops[1], Ops[1], SV, "lane");
|
|
|
|
|
|
|
|
Ops.pop_back();
|
|
|
|
Int = Intrinsic::fma;
|
|
|
|
return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "fmla");
|
|
|
|
}
|
|
|
|
case NEON::BI__builtin_neon_vfma_laneq_v: {
|
|
|
|
llvm::VectorType *VTy = cast<llvm::VectorType>(Ty);
|
|
|
|
// v1f64 fma should be mapped to Neon scalar f64 fma
|
|
|
|
if (VTy && VTy->getElementType() == DoubleTy) {
|
|
|
|
Ops[0] = Builder.CreateBitCast(Ops[0], DoubleTy);
|
|
|
|
Ops[1] = Builder.CreateBitCast(Ops[1], DoubleTy);
|
|
|
|
llvm::Type *VTy = GetNeonType(this,
|
|
|
|
NeonTypeFlags(NeonTypeFlags::Float64, false, true));
|
|
|
|
Ops[2] = Builder.CreateBitCast(Ops[2], VTy);
|
|
|
|
Ops[2] = Builder.CreateExtractElement(Ops[2], Ops[3], "extract");
|
|
|
|
Value *F = CGM.getIntrinsic(Intrinsic::fma, DoubleTy);
|
2015-05-19 06:14:03 +08:00
|
|
|
Value *Result = Builder.CreateCall(F, {Ops[1], Ops[2], Ops[0]});
|
2014-03-29 23:09:45 +08:00
|
|
|
return Builder.CreateBitCast(Result, Ty);
|
|
|
|
}
|
|
|
|
Value *F = CGM.getIntrinsic(Intrinsic::fma, Ty);
|
|
|
|
Ops[0] = Builder.CreateBitCast(Ops[0], Ty);
|
|
|
|
Ops[1] = Builder.CreateBitCast(Ops[1], Ty);
|
|
|
|
|
|
|
|
llvm::Type *STy = llvm::VectorType::get(VTy->getElementType(),
|
|
|
|
VTy->getNumElements() * 2);
|
|
|
|
Ops[2] = Builder.CreateBitCast(Ops[2], STy);
|
|
|
|
Value* SV = llvm::ConstantVector::getSplat(VTy->getNumElements(),
|
|
|
|
cast<ConstantInt>(Ops[3]));
|
|
|
|
Ops[2] = Builder.CreateShuffleVector(Ops[2], Ops[2], SV, "lane");
|
|
|
|
|
2015-05-19 06:14:03 +08:00
|
|
|
return Builder.CreateCall(F, {Ops[2], Ops[1], Ops[0]});
|
2014-03-29 23:09:45 +08:00
|
|
|
}
|
|
|
|
case NEON::BI__builtin_neon_vfmaq_laneq_v: {
|
|
|
|
Value *F = CGM.getIntrinsic(Intrinsic::fma, Ty);
|
|
|
|
Ops[0] = Builder.CreateBitCast(Ops[0], Ty);
|
|
|
|
Ops[1] = Builder.CreateBitCast(Ops[1], Ty);
|
|
|
|
|
|
|
|
Ops[2] = Builder.CreateBitCast(Ops[2], Ty);
|
|
|
|
Ops[2] = EmitNeonSplat(Ops[2], cast<ConstantInt>(Ops[3]));
|
2015-05-19 06:14:03 +08:00
|
|
|
return Builder.CreateCall(F, {Ops[2], Ops[1], Ops[0]});
|
2014-03-29 23:09:45 +08:00
|
|
|
}
|
|
|
|
case NEON::BI__builtin_neon_vfmas_lane_f32:
|
|
|
|
case NEON::BI__builtin_neon_vfmas_laneq_f32:
|
|
|
|
case NEON::BI__builtin_neon_vfmad_lane_f64:
|
|
|
|
case NEON::BI__builtin_neon_vfmad_laneq_f64: {
|
|
|
|
Ops.push_back(EmitScalarExpr(E->getArg(3)));
|
2015-02-26 01:36:15 +08:00
|
|
|
llvm::Type *Ty = ConvertType(E->getCallReturnType(getContext()));
|
2014-03-29 23:09:45 +08:00
|
|
|
Value *F = CGM.getIntrinsic(Intrinsic::fma, Ty);
|
|
|
|
Ops[2] = Builder.CreateExtractElement(Ops[2], Ops[3], "extract");
|
2015-05-19 06:14:03 +08:00
|
|
|
return Builder.CreateCall(F, {Ops[1], Ops[2], Ops[0]});
|
2014-03-29 23:09:45 +08:00
|
|
|
}
|
|
|
|
case NEON::BI__builtin_neon_vfms_v:
|
|
|
|
case NEON::BI__builtin_neon_vfmsq_v: { // Only used for FP types
|
|
|
|
// FIXME: probably remove when we no longer support aarch64_simd.h
|
|
|
|
// (arm_neon.h delegates to vfma).
|
|
|
|
|
|
|
|
// The ARM builtins (and instructions) have the addend as the first
|
|
|
|
// operand, but the 'fma' intrinsics have it last. Swap it around here.
|
|
|
|
Value *Subtrahend = Ops[0];
|
|
|
|
Value *Multiplicand = Ops[2];
|
|
|
|
Ops[0] = Multiplicand;
|
|
|
|
Ops[2] = Subtrahend;
|
|
|
|
Ops[1] = Builder.CreateBitCast(Ops[1], VTy);
|
|
|
|
Ops[1] = Builder.CreateFNeg(Ops[1]);
|
|
|
|
Int = Intrinsic::fma;
|
|
|
|
return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "fmls");
|
|
|
|
}
|
|
|
|
case NEON::BI__builtin_neon_vmull_v:
|
|
|
|
// FIXME: improve sharing scheme to cope with 3 alternative LLVM intrinsics.
|
2014-05-24 20:52:07 +08:00
|
|
|
Int = usgn ? Intrinsic::aarch64_neon_umull : Intrinsic::aarch64_neon_smull;
|
|
|
|
if (Type.isPoly()) Int = Intrinsic::aarch64_neon_pmull;
|
2014-03-29 23:09:45 +08:00
|
|
|
return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vmull");
|
|
|
|
case NEON::BI__builtin_neon_vmax_v:
|
|
|
|
case NEON::BI__builtin_neon_vmaxq_v:
|
|
|
|
// FIXME: improve sharing scheme to cope with 3 alternative LLVM intrinsics.
|
2014-05-24 20:52:07 +08:00
|
|
|
Int = usgn ? Intrinsic::aarch64_neon_umax : Intrinsic::aarch64_neon_smax;
|
|
|
|
if (Ty->isFPOrFPVectorTy()) Int = Intrinsic::aarch64_neon_fmax;
|
2014-03-29 23:09:45 +08:00
|
|
|
return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vmax");
|
|
|
|
case NEON::BI__builtin_neon_vmin_v:
|
|
|
|
case NEON::BI__builtin_neon_vminq_v:
|
|
|
|
// FIXME: improve sharing scheme to cope with 3 alternative LLVM intrinsics.
|
2014-05-24 20:52:07 +08:00
|
|
|
Int = usgn ? Intrinsic::aarch64_neon_umin : Intrinsic::aarch64_neon_smin;
|
|
|
|
if (Ty->isFPOrFPVectorTy()) Int = Intrinsic::aarch64_neon_fmin;
|
2014-03-29 23:09:45 +08:00
|
|
|
return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vmin");
|
|
|
|
case NEON::BI__builtin_neon_vabd_v:
|
|
|
|
case NEON::BI__builtin_neon_vabdq_v:
|
|
|
|
// FIXME: improve sharing scheme to cope with 3 alternative LLVM intrinsics.
|
2014-05-24 20:52:07 +08:00
|
|
|
Int = usgn ? Intrinsic::aarch64_neon_uabd : Intrinsic::aarch64_neon_sabd;
|
|
|
|
if (Ty->isFPOrFPVectorTy()) Int = Intrinsic::aarch64_neon_fabd;
|
2014-03-29 23:09:45 +08:00
|
|
|
return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vabd");
|
|
|
|
case NEON::BI__builtin_neon_vpadal_v:
|
|
|
|
case NEON::BI__builtin_neon_vpadalq_v: {
|
|
|
|
unsigned ArgElts = VTy->getNumElements();
|
|
|
|
llvm::IntegerType *EltTy = cast<IntegerType>(VTy->getElementType());
|
|
|
|
unsigned BitWidth = EltTy->getBitWidth();
|
|
|
|
llvm::Type *ArgTy = llvm::VectorType::get(
|
|
|
|
llvm::IntegerType::get(getLLVMContext(), BitWidth/2), 2*ArgElts);
|
|
|
|
llvm::Type* Tys[2] = { VTy, ArgTy };
|
2014-05-24 20:52:07 +08:00
|
|
|
Int = usgn ? Intrinsic::aarch64_neon_uaddlp : Intrinsic::aarch64_neon_saddlp;
|
2014-03-29 23:09:45 +08:00
|
|
|
SmallVector<llvm::Value*, 1> TmpOps;
|
|
|
|
TmpOps.push_back(Ops[1]);
|
|
|
|
Function *F = CGM.getIntrinsic(Int, Tys);
|
|
|
|
llvm::Value *tmp = EmitNeonCall(F, TmpOps, "vpadal");
|
|
|
|
llvm::Value *addend = Builder.CreateBitCast(Ops[0], tmp->getType());
|
|
|
|
return Builder.CreateAdd(tmp, addend);
|
|
|
|
}
|
|
|
|
case NEON::BI__builtin_neon_vpmin_v:
|
|
|
|
case NEON::BI__builtin_neon_vpminq_v:
|
|
|
|
// FIXME: improve sharing scheme to cope with 3 alternative LLVM intrinsics.
|
2014-05-24 20:52:07 +08:00
|
|
|
Int = usgn ? Intrinsic::aarch64_neon_uminp : Intrinsic::aarch64_neon_sminp;
|
|
|
|
if (Ty->isFPOrFPVectorTy()) Int = Intrinsic::aarch64_neon_fminp;
|
2014-03-29 23:09:45 +08:00
|
|
|
return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vpmin");
|
|
|
|
case NEON::BI__builtin_neon_vpmax_v:
|
|
|
|
case NEON::BI__builtin_neon_vpmaxq_v:
|
|
|
|
// FIXME: improve sharing scheme to cope with 3 alternative LLVM intrinsics.
|
2014-05-24 20:52:07 +08:00
|
|
|
Int = usgn ? Intrinsic::aarch64_neon_umaxp : Intrinsic::aarch64_neon_smaxp;
|
|
|
|
if (Ty->isFPOrFPVectorTy()) Int = Intrinsic::aarch64_neon_fmaxp;
|
2014-03-29 23:09:45 +08:00
|
|
|
return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vpmax");
|
|
|
|
case NEON::BI__builtin_neon_vminnm_v:
|
|
|
|
case NEON::BI__builtin_neon_vminnmq_v:
|
2014-05-24 20:52:07 +08:00
|
|
|
Int = Intrinsic::aarch64_neon_fminnm;
|
2014-03-29 23:09:45 +08:00
|
|
|
return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vminnm");
|
|
|
|
case NEON::BI__builtin_neon_vmaxnm_v:
|
|
|
|
case NEON::BI__builtin_neon_vmaxnmq_v:
|
2014-05-24 20:52:07 +08:00
|
|
|
Int = Intrinsic::aarch64_neon_fmaxnm;
|
2014-03-29 23:09:45 +08:00
|
|
|
return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vmaxnm");
|
|
|
|
case NEON::BI__builtin_neon_vrecpss_f32: {
|
|
|
|
Ops.push_back(EmitScalarExpr(E->getArg(1)));
|
2015-08-25 07:47:29 +08:00
|
|
|
return EmitNeonCall(CGM.getIntrinsic(Intrinsic::aarch64_neon_frecps, FloatTy),
|
2014-03-29 23:09:45 +08:00
|
|
|
Ops, "vrecps");
|
|
|
|
}
|
|
|
|
case NEON::BI__builtin_neon_vrecpsd_f64: {
|
|
|
|
Ops.push_back(EmitScalarExpr(E->getArg(1)));
|
2015-08-25 07:47:29 +08:00
|
|
|
return EmitNeonCall(CGM.getIntrinsic(Intrinsic::aarch64_neon_frecps, DoubleTy),
|
2014-03-29 23:09:45 +08:00
|
|
|
Ops, "vrecps");
|
|
|
|
}
|
|
|
|
case NEON::BI__builtin_neon_vqshrun_n_v:
|
2014-05-24 20:52:07 +08:00
|
|
|
Int = Intrinsic::aarch64_neon_sqshrun;
|
2014-03-29 23:09:45 +08:00
|
|
|
return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vqshrun_n");
|
|
|
|
case NEON::BI__builtin_neon_vqrshrun_n_v:
|
2014-05-24 20:52:07 +08:00
|
|
|
Int = Intrinsic::aarch64_neon_sqrshrun;
|
2014-03-29 23:09:45 +08:00
|
|
|
return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vqrshrun_n");
|
|
|
|
case NEON::BI__builtin_neon_vqshrn_n_v:
|
2014-05-24 20:52:07 +08:00
|
|
|
Int = usgn ? Intrinsic::aarch64_neon_uqshrn : Intrinsic::aarch64_neon_sqshrn;
|
2014-03-29 23:09:45 +08:00
|
|
|
return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vqshrn_n");
|
|
|
|
case NEON::BI__builtin_neon_vrshrn_n_v:
|
2014-05-24 20:52:07 +08:00
|
|
|
Int = Intrinsic::aarch64_neon_rshrn;
|
2014-03-29 23:09:45 +08:00
|
|
|
return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vrshrn_n");
|
|
|
|
case NEON::BI__builtin_neon_vqrshrn_n_v:
|
2014-05-24 20:52:07 +08:00
|
|
|
Int = usgn ? Intrinsic::aarch64_neon_uqrshrn : Intrinsic::aarch64_neon_sqrshrn;
|
2014-03-29 23:09:45 +08:00
|
|
|
return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vqrshrn_n");
|
|
|
|
case NEON::BI__builtin_neon_vrnda_v:
|
|
|
|
case NEON::BI__builtin_neon_vrndaq_v: {
|
|
|
|
Int = Intrinsic::round;
|
|
|
|
return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vrnda");
|
|
|
|
}
|
|
|
|
case NEON::BI__builtin_neon_vrndi_v:
|
|
|
|
case NEON::BI__builtin_neon_vrndiq_v: {
|
|
|
|
Int = Intrinsic::nearbyint;
|
|
|
|
return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vrndi");
|
|
|
|
}
|
|
|
|
case NEON::BI__builtin_neon_vrndm_v:
|
|
|
|
case NEON::BI__builtin_neon_vrndmq_v: {
|
|
|
|
Int = Intrinsic::floor;
|
|
|
|
return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vrndm");
|
|
|
|
}
|
|
|
|
case NEON::BI__builtin_neon_vrndn_v:
|
|
|
|
case NEON::BI__builtin_neon_vrndnq_v: {
|
2014-05-24 20:52:07 +08:00
|
|
|
Int = Intrinsic::aarch64_neon_frintn;
|
2014-03-29 23:09:45 +08:00
|
|
|
return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vrndn");
|
|
|
|
}
|
|
|
|
case NEON::BI__builtin_neon_vrndp_v:
|
|
|
|
case NEON::BI__builtin_neon_vrndpq_v: {
|
|
|
|
Int = Intrinsic::ceil;
|
|
|
|
return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vrndp");
|
|
|
|
}
|
|
|
|
case NEON::BI__builtin_neon_vrndx_v:
|
|
|
|
case NEON::BI__builtin_neon_vrndxq_v: {
|
|
|
|
Int = Intrinsic::rint;
|
|
|
|
return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vrndx");
|
|
|
|
}
|
|
|
|
case NEON::BI__builtin_neon_vrnd_v:
|
|
|
|
case NEON::BI__builtin_neon_vrndq_v: {
|
|
|
|
Int = Intrinsic::trunc;
|
|
|
|
return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vrndz");
|
|
|
|
}
|
|
|
|
case NEON::BI__builtin_neon_vceqz_v:
|
|
|
|
case NEON::BI__builtin_neon_vceqzq_v:
|
|
|
|
return EmitAArch64CompareBuiltinExpr(Ops[0], Ty, ICmpInst::FCMP_OEQ,
|
|
|
|
ICmpInst::ICMP_EQ, "vceqz");
|
|
|
|
case NEON::BI__builtin_neon_vcgez_v:
|
|
|
|
case NEON::BI__builtin_neon_vcgezq_v:
|
|
|
|
return EmitAArch64CompareBuiltinExpr(Ops[0], Ty, ICmpInst::FCMP_OGE,
|
|
|
|
ICmpInst::ICMP_SGE, "vcgez");
|
|
|
|
case NEON::BI__builtin_neon_vclez_v:
|
|
|
|
case NEON::BI__builtin_neon_vclezq_v:
|
|
|
|
return EmitAArch64CompareBuiltinExpr(Ops[0], Ty, ICmpInst::FCMP_OLE,
|
|
|
|
ICmpInst::ICMP_SLE, "vclez");
|
|
|
|
case NEON::BI__builtin_neon_vcgtz_v:
|
|
|
|
case NEON::BI__builtin_neon_vcgtzq_v:
|
|
|
|
return EmitAArch64CompareBuiltinExpr(Ops[0], Ty, ICmpInst::FCMP_OGT,
|
|
|
|
ICmpInst::ICMP_SGT, "vcgtz");
|
|
|
|
case NEON::BI__builtin_neon_vcltz_v:
|
|
|
|
case NEON::BI__builtin_neon_vcltzq_v:
|
|
|
|
return EmitAArch64CompareBuiltinExpr(Ops[0], Ty, ICmpInst::FCMP_OLT,
|
|
|
|
ICmpInst::ICMP_SLT, "vcltz");
|
|
|
|
case NEON::BI__builtin_neon_vcvt_f64_v:
|
|
|
|
case NEON::BI__builtin_neon_vcvtq_f64_v:
|
|
|
|
Ops[0] = Builder.CreateBitCast(Ops[0], Ty);
|
|
|
|
Ty = GetNeonType(this, NeonTypeFlags(NeonTypeFlags::Float64, false, quad));
|
|
|
|
return usgn ? Builder.CreateUIToFP(Ops[0], Ty, "vcvt")
|
|
|
|
: Builder.CreateSIToFP(Ops[0], Ty, "vcvt");
|
|
|
|
case NEON::BI__builtin_neon_vcvt_f64_f32: {
|
|
|
|
assert(Type.getEltType() == NeonTypeFlags::Float64 && quad &&
|
|
|
|
"unexpected vcvt_f64_f32 builtin");
|
|
|
|
NeonTypeFlags SrcFlag = NeonTypeFlags(NeonTypeFlags::Float32, false, false);
|
|
|
|
Ops[0] = Builder.CreateBitCast(Ops[0], GetNeonType(this, SrcFlag));
|
|
|
|
|
|
|
|
return Builder.CreateFPExt(Ops[0], Ty, "vcvt");
|
|
|
|
}
|
|
|
|
case NEON::BI__builtin_neon_vcvt_f32_f64: {
|
|
|
|
assert(Type.getEltType() == NeonTypeFlags::Float32 &&
|
|
|
|
"unexpected vcvt_f32_f64 builtin");
|
|
|
|
NeonTypeFlags SrcFlag = NeonTypeFlags(NeonTypeFlags::Float64, false, true);
|
|
|
|
Ops[0] = Builder.CreateBitCast(Ops[0], GetNeonType(this, SrcFlag));
|
|
|
|
|
|
|
|
return Builder.CreateFPTrunc(Ops[0], Ty, "vcvt");
|
|
|
|
}
|
|
|
|
case NEON::BI__builtin_neon_vcvt_s32_v:
|
|
|
|
case NEON::BI__builtin_neon_vcvt_u32_v:
|
|
|
|
case NEON::BI__builtin_neon_vcvt_s64_v:
|
|
|
|
case NEON::BI__builtin_neon_vcvt_u64_v:
|
|
|
|
case NEON::BI__builtin_neon_vcvtq_s32_v:
|
|
|
|
case NEON::BI__builtin_neon_vcvtq_u32_v:
|
|
|
|
case NEON::BI__builtin_neon_vcvtq_s64_v:
|
|
|
|
case NEON::BI__builtin_neon_vcvtq_u64_v: {
|
2015-08-25 07:41:31 +08:00
|
|
|
Ops[0] = Builder.CreateBitCast(Ops[0], GetFloatNeonType(this, Type));
|
2014-03-29 23:09:45 +08:00
|
|
|
if (usgn)
|
|
|
|
return Builder.CreateFPToUI(Ops[0], Ty);
|
|
|
|
return Builder.CreateFPToSI(Ops[0], Ty);
|
|
|
|
}
|
|
|
|
case NEON::BI__builtin_neon_vcvta_s32_v:
|
|
|
|
case NEON::BI__builtin_neon_vcvtaq_s32_v:
|
|
|
|
case NEON::BI__builtin_neon_vcvta_u32_v:
|
|
|
|
case NEON::BI__builtin_neon_vcvtaq_u32_v:
|
|
|
|
case NEON::BI__builtin_neon_vcvta_s64_v:
|
|
|
|
case NEON::BI__builtin_neon_vcvtaq_s64_v:
|
|
|
|
case NEON::BI__builtin_neon_vcvta_u64_v:
|
|
|
|
case NEON::BI__builtin_neon_vcvtaq_u64_v: {
|
2014-05-24 20:52:07 +08:00
|
|
|
Int = usgn ? Intrinsic::aarch64_neon_fcvtau : Intrinsic::aarch64_neon_fcvtas;
|
2015-08-25 07:41:31 +08:00
|
|
|
llvm::Type *Tys[2] = { Ty, GetFloatNeonType(this, Type) };
|
2014-03-29 23:09:45 +08:00
|
|
|
return EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vcvta");
|
|
|
|
}
|
|
|
|
case NEON::BI__builtin_neon_vcvtm_s32_v:
|
|
|
|
case NEON::BI__builtin_neon_vcvtmq_s32_v:
|
|
|
|
case NEON::BI__builtin_neon_vcvtm_u32_v:
|
|
|
|
case NEON::BI__builtin_neon_vcvtmq_u32_v:
|
|
|
|
case NEON::BI__builtin_neon_vcvtm_s64_v:
|
|
|
|
case NEON::BI__builtin_neon_vcvtmq_s64_v:
|
|
|
|
case NEON::BI__builtin_neon_vcvtm_u64_v:
|
|
|
|
case NEON::BI__builtin_neon_vcvtmq_u64_v: {
|
2014-05-24 20:52:07 +08:00
|
|
|
Int = usgn ? Intrinsic::aarch64_neon_fcvtmu : Intrinsic::aarch64_neon_fcvtms;
|
2015-08-25 07:41:31 +08:00
|
|
|
llvm::Type *Tys[2] = { Ty, GetFloatNeonType(this, Type) };
|
2014-03-29 23:09:45 +08:00
|
|
|
return EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vcvtm");
|
|
|
|
}
|
|
|
|
case NEON::BI__builtin_neon_vcvtn_s32_v:
|
|
|
|
case NEON::BI__builtin_neon_vcvtnq_s32_v:
|
|
|
|
case NEON::BI__builtin_neon_vcvtn_u32_v:
|
|
|
|
case NEON::BI__builtin_neon_vcvtnq_u32_v:
|
|
|
|
case NEON::BI__builtin_neon_vcvtn_s64_v:
|
|
|
|
case NEON::BI__builtin_neon_vcvtnq_s64_v:
|
|
|
|
case NEON::BI__builtin_neon_vcvtn_u64_v:
|
|
|
|
case NEON::BI__builtin_neon_vcvtnq_u64_v: {
|
2014-05-24 20:52:07 +08:00
|
|
|
Int = usgn ? Intrinsic::aarch64_neon_fcvtnu : Intrinsic::aarch64_neon_fcvtns;
|
2015-08-25 07:41:31 +08:00
|
|
|
llvm::Type *Tys[2] = { Ty, GetFloatNeonType(this, Type) };
|
2014-03-29 23:09:45 +08:00
|
|
|
return EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vcvtn");
|
|
|
|
}
|
|
|
|
case NEON::BI__builtin_neon_vcvtp_s32_v:
|
|
|
|
case NEON::BI__builtin_neon_vcvtpq_s32_v:
|
|
|
|
case NEON::BI__builtin_neon_vcvtp_u32_v:
|
|
|
|
case NEON::BI__builtin_neon_vcvtpq_u32_v:
|
|
|
|
case NEON::BI__builtin_neon_vcvtp_s64_v:
|
|
|
|
case NEON::BI__builtin_neon_vcvtpq_s64_v:
|
|
|
|
case NEON::BI__builtin_neon_vcvtp_u64_v:
|
|
|
|
case NEON::BI__builtin_neon_vcvtpq_u64_v: {
|
2014-05-24 20:52:07 +08:00
|
|
|
Int = usgn ? Intrinsic::aarch64_neon_fcvtpu : Intrinsic::aarch64_neon_fcvtps;
|
2015-08-25 07:41:31 +08:00
|
|
|
llvm::Type *Tys[2] = { Ty, GetFloatNeonType(this, Type) };
|
2014-03-29 23:09:45 +08:00
|
|
|
return EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vcvtp");
|
|
|
|
}
|
|
|
|
case NEON::BI__builtin_neon_vmulx_v:
|
|
|
|
case NEON::BI__builtin_neon_vmulxq_v: {
|
2014-05-24 20:52:07 +08:00
|
|
|
Int = Intrinsic::aarch64_neon_fmulx;
|
2014-03-29 23:09:45 +08:00
|
|
|
return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vmulx");
|
|
|
|
}
|
|
|
|
case NEON::BI__builtin_neon_vmul_lane_v:
|
|
|
|
case NEON::BI__builtin_neon_vmul_laneq_v: {
|
|
|
|
// v1f64 vmul_lane should be mapped to Neon scalar mul lane
|
|
|
|
bool Quad = false;
|
|
|
|
if (BuiltinID == NEON::BI__builtin_neon_vmul_laneq_v)
|
|
|
|
Quad = true;
|
|
|
|
Ops[0] = Builder.CreateBitCast(Ops[0], DoubleTy);
|
|
|
|
llvm::Type *VTy = GetNeonType(this,
|
|
|
|
NeonTypeFlags(NeonTypeFlags::Float64, false, Quad));
|
|
|
|
Ops[1] = Builder.CreateBitCast(Ops[1], VTy);
|
|
|
|
Ops[1] = Builder.CreateExtractElement(Ops[1], Ops[2], "extract");
|
|
|
|
Value *Result = Builder.CreateFMul(Ops[0], Ops[1]);
|
|
|
|
return Builder.CreateBitCast(Result, Ty);
|
|
|
|
}
|
2014-03-31 23:47:09 +08:00
|
|
|
case NEON::BI__builtin_neon_vnegd_s64:
|
|
|
|
return Builder.CreateNeg(EmitScalarExpr(E->getArg(0)), "vnegd");
|
2014-03-29 23:09:45 +08:00
|
|
|
case NEON::BI__builtin_neon_vpmaxnm_v:
|
|
|
|
case NEON::BI__builtin_neon_vpmaxnmq_v: {
|
2014-05-24 20:52:07 +08:00
|
|
|
Int = Intrinsic::aarch64_neon_fmaxnmp;
|
2014-03-29 23:09:45 +08:00
|
|
|
return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vpmaxnm");
|
|
|
|
}
|
|
|
|
case NEON::BI__builtin_neon_vpminnm_v:
|
|
|
|
case NEON::BI__builtin_neon_vpminnmq_v: {
|
2014-05-24 20:52:07 +08:00
|
|
|
Int = Intrinsic::aarch64_neon_fminnmp;
|
2014-03-29 23:09:45 +08:00
|
|
|
return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vpminnm");
|
|
|
|
}
|
|
|
|
case NEON::BI__builtin_neon_vsqrt_v:
|
|
|
|
case NEON::BI__builtin_neon_vsqrtq_v: {
|
|
|
|
Int = Intrinsic::sqrt;
|
|
|
|
Ops[0] = Builder.CreateBitCast(Ops[0], Ty);
|
|
|
|
return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vsqrt");
|
|
|
|
}
|
|
|
|
case NEON::BI__builtin_neon_vrbit_v:
|
|
|
|
case NEON::BI__builtin_neon_vrbitq_v: {
|
2014-05-24 20:52:07 +08:00
|
|
|
Int = Intrinsic::aarch64_neon_rbit;
|
2014-03-29 23:09:45 +08:00
|
|
|
return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vrbit");
|
|
|
|
}
|
|
|
|
case NEON::BI__builtin_neon_vaddv_u8:
|
|
|
|
// FIXME: These are handled by the AArch64 scalar code.
|
|
|
|
usgn = true;
|
|
|
|
// FALLTHROUGH
|
|
|
|
case NEON::BI__builtin_neon_vaddv_s8: {
|
2014-05-24 20:52:07 +08:00
|
|
|
Int = usgn ? Intrinsic::aarch64_neon_uaddv : Intrinsic::aarch64_neon_saddv;
|
2015-07-28 23:40:11 +08:00
|
|
|
Ty = Int32Ty;
|
|
|
|
VTy = llvm::VectorType::get(Int8Ty, 8);
|
2014-03-29 23:09:45 +08:00
|
|
|
llvm::Type *Tys[2] = { Ty, VTy };
|
|
|
|
Ops.push_back(EmitScalarExpr(E->getArg(0)));
|
|
|
|
Ops[0] = EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vaddv");
|
2015-07-28 23:40:11 +08:00
|
|
|
return Builder.CreateTrunc(Ops[0], Int8Ty);
|
2014-03-29 23:09:45 +08:00
|
|
|
}
|
|
|
|
case NEON::BI__builtin_neon_vaddv_u16:
|
|
|
|
usgn = true;
|
|
|
|
// FALLTHROUGH
|
|
|
|
case NEON::BI__builtin_neon_vaddv_s16: {
|
2014-05-24 20:52:07 +08:00
|
|
|
Int = usgn ? Intrinsic::aarch64_neon_uaddv : Intrinsic::aarch64_neon_saddv;
|
2015-07-28 23:40:11 +08:00
|
|
|
Ty = Int32Ty;
|
|
|
|
VTy = llvm::VectorType::get(Int16Ty, 4);
|
2014-03-29 23:09:45 +08:00
|
|
|
llvm::Type *Tys[2] = { Ty, VTy };
|
|
|
|
Ops.push_back(EmitScalarExpr(E->getArg(0)));
|
|
|
|
Ops[0] = EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vaddv");
|
2015-07-28 23:40:11 +08:00
|
|
|
return Builder.CreateTrunc(Ops[0], Int16Ty);
|
2014-03-29 23:09:45 +08:00
|
|
|
}
|
|
|
|
case NEON::BI__builtin_neon_vaddvq_u8:
|
|
|
|
usgn = true;
|
|
|
|
// FALLTHROUGH
|
|
|
|
case NEON::BI__builtin_neon_vaddvq_s8: {
|
2014-05-24 20:52:07 +08:00
|
|
|
Int = usgn ? Intrinsic::aarch64_neon_uaddv : Intrinsic::aarch64_neon_saddv;
|
2015-07-28 23:40:11 +08:00
|
|
|
Ty = Int32Ty;
|
|
|
|
VTy = llvm::VectorType::get(Int8Ty, 16);
|
2014-03-29 23:09:45 +08:00
|
|
|
llvm::Type *Tys[2] = { Ty, VTy };
|
|
|
|
Ops.push_back(EmitScalarExpr(E->getArg(0)));
|
|
|
|
Ops[0] = EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vaddv");
|
2015-07-28 23:40:11 +08:00
|
|
|
return Builder.CreateTrunc(Ops[0], Int8Ty);
|
2014-03-29 23:09:45 +08:00
|
|
|
}
|
|
|
|
case NEON::BI__builtin_neon_vaddvq_u16:
|
|
|
|
usgn = true;
|
|
|
|
// FALLTHROUGH
|
|
|
|
case NEON::BI__builtin_neon_vaddvq_s16: {
|
2014-05-24 20:52:07 +08:00
|
|
|
Int = usgn ? Intrinsic::aarch64_neon_uaddv : Intrinsic::aarch64_neon_saddv;
|
2015-07-28 23:40:11 +08:00
|
|
|
Ty = Int32Ty;
|
|
|
|
VTy = llvm::VectorType::get(Int16Ty, 8);
|
2014-03-29 23:09:45 +08:00
|
|
|
llvm::Type *Tys[2] = { Ty, VTy };
|
|
|
|
Ops.push_back(EmitScalarExpr(E->getArg(0)));
|
|
|
|
Ops[0] = EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vaddv");
|
2015-07-28 23:40:11 +08:00
|
|
|
return Builder.CreateTrunc(Ops[0], Int16Ty);
|
2014-03-29 23:09:45 +08:00
|
|
|
}
|
|
|
|
case NEON::BI__builtin_neon_vmaxv_u8: {
|
2014-05-24 20:52:07 +08:00
|
|
|
Int = Intrinsic::aarch64_neon_umaxv;
|
2015-07-28 23:40:11 +08:00
|
|
|
Ty = Int32Ty;
|
|
|
|
VTy = llvm::VectorType::get(Int8Ty, 8);
|
2014-03-29 23:09:45 +08:00
|
|
|
llvm::Type *Tys[2] = { Ty, VTy };
|
|
|
|
Ops.push_back(EmitScalarExpr(E->getArg(0)));
|
|
|
|
Ops[0] = EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vmaxv");
|
2015-07-28 23:40:11 +08:00
|
|
|
return Builder.CreateTrunc(Ops[0], Int8Ty);
|
2014-03-29 23:09:45 +08:00
|
|
|
}
|
|
|
|
case NEON::BI__builtin_neon_vmaxv_u16: {
|
2014-05-24 20:52:07 +08:00
|
|
|
Int = Intrinsic::aarch64_neon_umaxv;
|
2015-07-28 23:40:11 +08:00
|
|
|
Ty = Int32Ty;
|
|
|
|
VTy = llvm::VectorType::get(Int16Ty, 4);
|
2014-03-29 23:09:45 +08:00
|
|
|
llvm::Type *Tys[2] = { Ty, VTy };
|
|
|
|
Ops.push_back(EmitScalarExpr(E->getArg(0)));
|
|
|
|
Ops[0] = EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vmaxv");
|
2015-07-28 23:40:11 +08:00
|
|
|
return Builder.CreateTrunc(Ops[0], Int16Ty);
|
2014-03-29 23:09:45 +08:00
|
|
|
}
|
|
|
|
case NEON::BI__builtin_neon_vmaxvq_u8: {
|
2014-05-24 20:52:07 +08:00
|
|
|
Int = Intrinsic::aarch64_neon_umaxv;
|
2015-07-28 23:40:11 +08:00
|
|
|
Ty = Int32Ty;
|
|
|
|
VTy = llvm::VectorType::get(Int8Ty, 16);
|
2014-03-29 23:09:45 +08:00
|
|
|
llvm::Type *Tys[2] = { Ty, VTy };
|
|
|
|
Ops.push_back(EmitScalarExpr(E->getArg(0)));
|
|
|
|
Ops[0] = EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vmaxv");
|
2015-07-28 23:40:11 +08:00
|
|
|
return Builder.CreateTrunc(Ops[0], Int8Ty);
|
2014-03-29 23:09:45 +08:00
|
|
|
}
|
|
|
|
case NEON::BI__builtin_neon_vmaxvq_u16: {
|
2014-05-24 20:52:07 +08:00
|
|
|
Int = Intrinsic::aarch64_neon_umaxv;
|
2015-07-28 23:40:11 +08:00
|
|
|
Ty = Int32Ty;
|
|
|
|
VTy = llvm::VectorType::get(Int16Ty, 8);
|
2014-03-29 23:09:45 +08:00
|
|
|
llvm::Type *Tys[2] = { Ty, VTy };
|
|
|
|
Ops.push_back(EmitScalarExpr(E->getArg(0)));
|
|
|
|
Ops[0] = EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vmaxv");
|
2015-07-28 23:40:11 +08:00
|
|
|
return Builder.CreateTrunc(Ops[0], Int16Ty);
|
2014-03-29 23:09:45 +08:00
|
|
|
}
|
|
|
|
case NEON::BI__builtin_neon_vmaxv_s8: {
|
2014-05-24 20:52:07 +08:00
|
|
|
Int = Intrinsic::aarch64_neon_smaxv;
|
2015-07-28 23:40:11 +08:00
|
|
|
Ty = Int32Ty;
|
|
|
|
VTy = llvm::VectorType::get(Int8Ty, 8);
|
2014-03-29 23:09:45 +08:00
|
|
|
llvm::Type *Tys[2] = { Ty, VTy };
|
|
|
|
Ops.push_back(EmitScalarExpr(E->getArg(0)));
|
|
|
|
Ops[0] = EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vmaxv");
|
2015-07-28 23:40:11 +08:00
|
|
|
return Builder.CreateTrunc(Ops[0], Int8Ty);
|
2014-03-29 23:09:45 +08:00
|
|
|
}
|
|
|
|
case NEON::BI__builtin_neon_vmaxv_s16: {
|
2014-05-24 20:52:07 +08:00
|
|
|
Int = Intrinsic::aarch64_neon_smaxv;
|
2015-07-28 23:40:11 +08:00
|
|
|
Ty = Int32Ty;
|
|
|
|
VTy = llvm::VectorType::get(Int16Ty, 4);
|
2014-03-29 23:09:45 +08:00
|
|
|
llvm::Type *Tys[2] = { Ty, VTy };
|
|
|
|
Ops.push_back(EmitScalarExpr(E->getArg(0)));
|
|
|
|
Ops[0] = EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vmaxv");
|
2015-07-28 23:40:11 +08:00
|
|
|
return Builder.CreateTrunc(Ops[0], Int16Ty);
|
2014-03-29 23:09:45 +08:00
|
|
|
}
|
|
|
|
case NEON::BI__builtin_neon_vmaxvq_s8: {
|
2014-05-24 20:52:07 +08:00
|
|
|
Int = Intrinsic::aarch64_neon_smaxv;
|
2015-07-28 23:40:11 +08:00
|
|
|
Ty = Int32Ty;
|
|
|
|
VTy = llvm::VectorType::get(Int8Ty, 16);
|
2014-03-29 23:09:45 +08:00
|
|
|
llvm::Type *Tys[2] = { Ty, VTy };
|
|
|
|
Ops.push_back(EmitScalarExpr(E->getArg(0)));
|
|
|
|
Ops[0] = EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vmaxv");
|
2015-07-28 23:40:11 +08:00
|
|
|
return Builder.CreateTrunc(Ops[0], Int8Ty);
|
2014-03-29 23:09:45 +08:00
|
|
|
}
|
|
|
|
case NEON::BI__builtin_neon_vmaxvq_s16: {
|
2014-05-24 20:52:07 +08:00
|
|
|
Int = Intrinsic::aarch64_neon_smaxv;
|
2015-07-28 23:40:11 +08:00
|
|
|
Ty = Int32Ty;
|
|
|
|
VTy = llvm::VectorType::get(Int16Ty, 8);
|
2014-03-29 23:09:45 +08:00
|
|
|
llvm::Type *Tys[2] = { Ty, VTy };
|
|
|
|
Ops.push_back(EmitScalarExpr(E->getArg(0)));
|
|
|
|
Ops[0] = EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vmaxv");
|
2015-07-28 23:40:11 +08:00
|
|
|
return Builder.CreateTrunc(Ops[0], Int16Ty);
|
2014-03-29 23:09:45 +08:00
|
|
|
}
|
|
|
|
case NEON::BI__builtin_neon_vminv_u8: {
|
2014-05-24 20:52:07 +08:00
|
|
|
Int = Intrinsic::aarch64_neon_uminv;
|
2015-07-28 23:40:11 +08:00
|
|
|
Ty = Int32Ty;
|
|
|
|
VTy = llvm::VectorType::get(Int8Ty, 8);
|
2014-03-29 23:09:45 +08:00
|
|
|
llvm::Type *Tys[2] = { Ty, VTy };
|
|
|
|
Ops.push_back(EmitScalarExpr(E->getArg(0)));
|
|
|
|
Ops[0] = EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vminv");
|
2015-07-28 23:40:11 +08:00
|
|
|
return Builder.CreateTrunc(Ops[0], Int8Ty);
|
2014-03-29 23:09:45 +08:00
|
|
|
}
|
|
|
|
case NEON::BI__builtin_neon_vminv_u16: {
|
2014-05-24 20:52:07 +08:00
|
|
|
Int = Intrinsic::aarch64_neon_uminv;
|
2015-07-28 23:40:11 +08:00
|
|
|
Ty = Int32Ty;
|
|
|
|
VTy = llvm::VectorType::get(Int16Ty, 4);
|
2014-03-29 23:09:45 +08:00
|
|
|
llvm::Type *Tys[2] = { Ty, VTy };
|
|
|
|
Ops.push_back(EmitScalarExpr(E->getArg(0)));
|
|
|
|
Ops[0] = EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vminv");
|
2015-07-28 23:40:11 +08:00
|
|
|
return Builder.CreateTrunc(Ops[0], Int16Ty);
|
2014-03-29 23:09:45 +08:00
|
|
|
}
|
|
|
|
case NEON::BI__builtin_neon_vminvq_u8: {
|
2014-05-24 20:52:07 +08:00
|
|
|
Int = Intrinsic::aarch64_neon_uminv;
|
2015-07-28 23:40:11 +08:00
|
|
|
Ty = Int32Ty;
|
|
|
|
VTy = llvm::VectorType::get(Int8Ty, 16);
|
2014-03-29 23:09:45 +08:00
|
|
|
llvm::Type *Tys[2] = { Ty, VTy };
|
|
|
|
Ops.push_back(EmitScalarExpr(E->getArg(0)));
|
|
|
|
Ops[0] = EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vminv");
|
2015-07-28 23:40:11 +08:00
|
|
|
return Builder.CreateTrunc(Ops[0], Int8Ty);
|
2014-03-29 23:09:45 +08:00
|
|
|
}
|
|
|
|
case NEON::BI__builtin_neon_vminvq_u16: {
|
2014-05-24 20:52:07 +08:00
|
|
|
Int = Intrinsic::aarch64_neon_uminv;
|
2015-07-28 23:40:11 +08:00
|
|
|
Ty = Int32Ty;
|
|
|
|
VTy = llvm::VectorType::get(Int16Ty, 8);
|
2014-03-29 23:09:45 +08:00
|
|
|
llvm::Type *Tys[2] = { Ty, VTy };
|
|
|
|
Ops.push_back(EmitScalarExpr(E->getArg(0)));
|
|
|
|
Ops[0] = EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vminv");
|
2015-07-28 23:40:11 +08:00
|
|
|
return Builder.CreateTrunc(Ops[0], Int16Ty);
|
2014-03-29 23:09:45 +08:00
|
|
|
}
|
|
|
|
case NEON::BI__builtin_neon_vminv_s8: {
|
2014-05-24 20:52:07 +08:00
|
|
|
Int = Intrinsic::aarch64_neon_sminv;
|
2015-07-28 23:40:11 +08:00
|
|
|
Ty = Int32Ty;
|
|
|
|
VTy = llvm::VectorType::get(Int8Ty, 8);
|
2014-03-29 23:09:45 +08:00
|
|
|
llvm::Type *Tys[2] = { Ty, VTy };
|
|
|
|
Ops.push_back(EmitScalarExpr(E->getArg(0)));
|
|
|
|
Ops[0] = EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vminv");
|
2015-07-28 23:40:11 +08:00
|
|
|
return Builder.CreateTrunc(Ops[0], Int8Ty);
|
2014-03-29 23:09:45 +08:00
|
|
|
}
|
|
|
|
case NEON::BI__builtin_neon_vminv_s16: {
|
2014-05-24 20:52:07 +08:00
|
|
|
Int = Intrinsic::aarch64_neon_sminv;
|
2015-07-28 23:40:11 +08:00
|
|
|
Ty = Int32Ty;
|
|
|
|
VTy = llvm::VectorType::get(Int16Ty, 4);
|
2014-03-29 23:09:45 +08:00
|
|
|
llvm::Type *Tys[2] = { Ty, VTy };
|
|
|
|
Ops.push_back(EmitScalarExpr(E->getArg(0)));
|
|
|
|
Ops[0] = EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vminv");
|
2015-07-28 23:40:11 +08:00
|
|
|
return Builder.CreateTrunc(Ops[0], Int16Ty);
|
2014-03-29 23:09:45 +08:00
|
|
|
}
|
|
|
|
case NEON::BI__builtin_neon_vminvq_s8: {
|
2014-05-24 20:52:07 +08:00
|
|
|
Int = Intrinsic::aarch64_neon_sminv;
|
2015-07-28 23:40:11 +08:00
|
|
|
Ty = Int32Ty;
|
|
|
|
VTy = llvm::VectorType::get(Int8Ty, 16);
|
2014-03-29 23:09:45 +08:00
|
|
|
llvm::Type *Tys[2] = { Ty, VTy };
|
|
|
|
Ops.push_back(EmitScalarExpr(E->getArg(0)));
|
|
|
|
Ops[0] = EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vminv");
|
2015-07-28 23:40:11 +08:00
|
|
|
return Builder.CreateTrunc(Ops[0], Int8Ty);
|
2014-03-29 23:09:45 +08:00
|
|
|
}
|
|
|
|
case NEON::BI__builtin_neon_vminvq_s16: {
|
2014-05-24 20:52:07 +08:00
|
|
|
Int = Intrinsic::aarch64_neon_sminv;
|
2015-07-28 23:40:11 +08:00
|
|
|
Ty = Int32Ty;
|
|
|
|
VTy = llvm::VectorType::get(Int16Ty, 8);
|
2014-03-29 23:09:45 +08:00
|
|
|
llvm::Type *Tys[2] = { Ty, VTy };
|
|
|
|
Ops.push_back(EmitScalarExpr(E->getArg(0)));
|
|
|
|
Ops[0] = EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vminv");
|
2015-07-28 23:40:11 +08:00
|
|
|
return Builder.CreateTrunc(Ops[0], Int16Ty);
|
2014-03-29 23:09:45 +08:00
|
|
|
}
|
|
|
|
case NEON::BI__builtin_neon_vmul_n_f64: {
|
|
|
|
Ops[0] = Builder.CreateBitCast(Ops[0], DoubleTy);
|
|
|
|
Value *RHS = Builder.CreateBitCast(EmitScalarExpr(E->getArg(1)), DoubleTy);
|
|
|
|
return Builder.CreateFMul(Ops[0], RHS);
|
|
|
|
}
|
|
|
|
case NEON::BI__builtin_neon_vaddlv_u8: {
|
2014-05-24 20:52:07 +08:00
|
|
|
Int = Intrinsic::aarch64_neon_uaddlv;
|
2015-07-28 23:40:11 +08:00
|
|
|
Ty = Int32Ty;
|
|
|
|
VTy = llvm::VectorType::get(Int8Ty, 8);
|
2014-03-29 23:09:45 +08:00
|
|
|
llvm::Type *Tys[2] = { Ty, VTy };
|
|
|
|
Ops.push_back(EmitScalarExpr(E->getArg(0)));
|
|
|
|
Ops[0] = EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vaddlv");
|
2015-07-28 23:40:11 +08:00
|
|
|
return Builder.CreateTrunc(Ops[0], Int16Ty);
|
2014-03-29 23:09:45 +08:00
|
|
|
}
|
|
|
|
case NEON::BI__builtin_neon_vaddlv_u16: {
|
2014-05-24 20:52:07 +08:00
|
|
|
Int = Intrinsic::aarch64_neon_uaddlv;
|
2015-07-28 23:40:11 +08:00
|
|
|
Ty = Int32Ty;
|
|
|
|
VTy = llvm::VectorType::get(Int16Ty, 4);
|
2014-03-29 23:09:45 +08:00
|
|
|
llvm::Type *Tys[2] = { Ty, VTy };
|
|
|
|
Ops.push_back(EmitScalarExpr(E->getArg(0)));
|
|
|
|
return EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vaddlv");
|
|
|
|
}
|
|
|
|
case NEON::BI__builtin_neon_vaddlvq_u8: {
|
2014-05-24 20:52:07 +08:00
|
|
|
Int = Intrinsic::aarch64_neon_uaddlv;
|
2015-07-28 23:40:11 +08:00
|
|
|
Ty = Int32Ty;
|
|
|
|
VTy = llvm::VectorType::get(Int8Ty, 16);
|
2014-03-29 23:09:45 +08:00
|
|
|
llvm::Type *Tys[2] = { Ty, VTy };
|
|
|
|
Ops.push_back(EmitScalarExpr(E->getArg(0)));
|
|
|
|
Ops[0] = EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vaddlv");
|
2015-07-28 23:40:11 +08:00
|
|
|
return Builder.CreateTrunc(Ops[0], Int16Ty);
|
2014-03-29 23:09:45 +08:00
|
|
|
}
|
|
|
|
case NEON::BI__builtin_neon_vaddlvq_u16: {
|
2014-05-24 20:52:07 +08:00
|
|
|
Int = Intrinsic::aarch64_neon_uaddlv;
|
2015-07-28 23:40:11 +08:00
|
|
|
Ty = Int32Ty;
|
|
|
|
VTy = llvm::VectorType::get(Int16Ty, 8);
|
2014-03-29 23:09:45 +08:00
|
|
|
llvm::Type *Tys[2] = { Ty, VTy };
|
|
|
|
Ops.push_back(EmitScalarExpr(E->getArg(0)));
|
|
|
|
return EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vaddlv");
|
|
|
|
}
|
|
|
|
case NEON::BI__builtin_neon_vaddlv_s8: {
|
2014-05-24 20:52:07 +08:00
|
|
|
Int = Intrinsic::aarch64_neon_saddlv;
|
2015-07-28 23:40:11 +08:00
|
|
|
Ty = Int32Ty;
|
|
|
|
VTy = llvm::VectorType::get(Int8Ty, 8);
|
2014-03-29 23:09:45 +08:00
|
|
|
llvm::Type *Tys[2] = { Ty, VTy };
|
|
|
|
Ops.push_back(EmitScalarExpr(E->getArg(0)));
|
|
|
|
Ops[0] = EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vaddlv");
|
2015-07-28 23:40:11 +08:00
|
|
|
return Builder.CreateTrunc(Ops[0], Int16Ty);
|
2014-03-29 23:09:45 +08:00
|
|
|
}
|
|
|
|
case NEON::BI__builtin_neon_vaddlv_s16: {
|
2014-05-24 20:52:07 +08:00
|
|
|
Int = Intrinsic::aarch64_neon_saddlv;
|
2015-07-28 23:40:11 +08:00
|
|
|
Ty = Int32Ty;
|
|
|
|
VTy = llvm::VectorType::get(Int16Ty, 4);
|
2014-03-29 23:09:45 +08:00
|
|
|
llvm::Type *Tys[2] = { Ty, VTy };
|
|
|
|
Ops.push_back(EmitScalarExpr(E->getArg(0)));
|
|
|
|
return EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vaddlv");
|
|
|
|
}
|
|
|
|
case NEON::BI__builtin_neon_vaddlvq_s8: {
|
2014-05-24 20:52:07 +08:00
|
|
|
Int = Intrinsic::aarch64_neon_saddlv;
|
2015-07-28 23:40:11 +08:00
|
|
|
Ty = Int32Ty;
|
|
|
|
VTy = llvm::VectorType::get(Int8Ty, 16);
|
2014-03-29 23:09:45 +08:00
|
|
|
llvm::Type *Tys[2] = { Ty, VTy };
|
|
|
|
Ops.push_back(EmitScalarExpr(E->getArg(0)));
|
|
|
|
Ops[0] = EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vaddlv");
|
2015-07-28 23:40:11 +08:00
|
|
|
return Builder.CreateTrunc(Ops[0], Int16Ty);
|
2014-03-29 23:09:45 +08:00
|
|
|
}
|
|
|
|
case NEON::BI__builtin_neon_vaddlvq_s16: {
|
2014-05-24 20:52:07 +08:00
|
|
|
Int = Intrinsic::aarch64_neon_saddlv;
|
2015-07-28 23:40:11 +08:00
|
|
|
Ty = Int32Ty;
|
|
|
|
VTy = llvm::VectorType::get(Int16Ty, 8);
|
2014-03-29 23:09:45 +08:00
|
|
|
llvm::Type *Tys[2] = { Ty, VTy };
|
|
|
|
Ops.push_back(EmitScalarExpr(E->getArg(0)));
|
|
|
|
return EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vaddlv");
|
|
|
|
}
|
|
|
|
case NEON::BI__builtin_neon_vsri_n_v:
|
|
|
|
case NEON::BI__builtin_neon_vsriq_n_v: {
|
2014-05-24 20:52:07 +08:00
|
|
|
Int = Intrinsic::aarch64_neon_vsri;
|
2014-03-29 23:09:45 +08:00
|
|
|
llvm::Function *Intrin = CGM.getIntrinsic(Int, Ty);
|
|
|
|
return EmitNeonCall(Intrin, Ops, "vsri_n");
|
|
|
|
}
|
|
|
|
case NEON::BI__builtin_neon_vsli_n_v:
|
|
|
|
case NEON::BI__builtin_neon_vsliq_n_v: {
|
2014-05-24 20:52:07 +08:00
|
|
|
Int = Intrinsic::aarch64_neon_vsli;
|
2014-03-29 23:09:45 +08:00
|
|
|
llvm::Function *Intrin = CGM.getIntrinsic(Int, Ty);
|
|
|
|
return EmitNeonCall(Intrin, Ops, "vsli_n");
|
|
|
|
}
|
|
|
|
case NEON::BI__builtin_neon_vsra_n_v:
|
|
|
|
case NEON::BI__builtin_neon_vsraq_n_v:
|
|
|
|
Ops[0] = Builder.CreateBitCast(Ops[0], Ty);
|
|
|
|
Ops[1] = EmitNeonRShiftImm(Ops[1], Ops[2], Ty, usgn, "vsra_n");
|
|
|
|
return Builder.CreateAdd(Ops[0], Ops[1]);
|
|
|
|
case NEON::BI__builtin_neon_vrsra_n_v:
|
|
|
|
case NEON::BI__builtin_neon_vrsraq_n_v: {
|
2014-05-24 20:52:07 +08:00
|
|
|
Int = usgn ? Intrinsic::aarch64_neon_urshl : Intrinsic::aarch64_neon_srshl;
|
2014-03-29 23:09:45 +08:00
|
|
|
SmallVector<llvm::Value*,2> TmpOps;
|
|
|
|
TmpOps.push_back(Ops[1]);
|
|
|
|
TmpOps.push_back(Ops[2]);
|
|
|
|
Function* F = CGM.getIntrinsic(Int, Ty);
|
|
|
|
llvm::Value *tmp = EmitNeonCall(F, TmpOps, "vrshr_n", 1, true);
|
|
|
|
Ops[0] = Builder.CreateBitCast(Ops[0], VTy);
|
|
|
|
return Builder.CreateAdd(Ops[0], tmp);
|
|
|
|
}
|
|
|
|
// FIXME: Sharing loads & stores with 32-bit is complicated by the absence
|
|
|
|
// of an Align parameter here.
|
|
|
|
case NEON::BI__builtin_neon_vld1_x2_v:
|
|
|
|
case NEON::BI__builtin_neon_vld1q_x2_v:
|
|
|
|
case NEON::BI__builtin_neon_vld1_x3_v:
|
|
|
|
case NEON::BI__builtin_neon_vld1q_x3_v:
|
|
|
|
case NEON::BI__builtin_neon_vld1_x4_v:
|
|
|
|
case NEON::BI__builtin_neon_vld1q_x4_v: {
|
|
|
|
llvm::Type *PTy = llvm::PointerType::getUnqual(VTy->getVectorElementType());
|
|
|
|
Ops[1] = Builder.CreateBitCast(Ops[1], PTy);
|
|
|
|
llvm::Type *Tys[2] = { VTy, PTy };
|
|
|
|
unsigned Int;
|
|
|
|
switch (BuiltinID) {
|
|
|
|
case NEON::BI__builtin_neon_vld1_x2_v:
|
|
|
|
case NEON::BI__builtin_neon_vld1q_x2_v:
|
2014-05-24 20:52:07 +08:00
|
|
|
Int = Intrinsic::aarch64_neon_ld1x2;
|
2014-03-29 23:09:45 +08:00
|
|
|
break;
|
|
|
|
case NEON::BI__builtin_neon_vld1_x3_v:
|
|
|
|
case NEON::BI__builtin_neon_vld1q_x3_v:
|
2014-05-24 20:52:07 +08:00
|
|
|
Int = Intrinsic::aarch64_neon_ld1x3;
|
2014-03-29 23:09:45 +08:00
|
|
|
break;
|
|
|
|
case NEON::BI__builtin_neon_vld1_x4_v:
|
|
|
|
case NEON::BI__builtin_neon_vld1q_x4_v:
|
2014-05-24 20:52:07 +08:00
|
|
|
Int = Intrinsic::aarch64_neon_ld1x4;
|
2014-03-29 23:09:45 +08:00
|
|
|
break;
|
|
|
|
}
|
|
|
|
Function *F = CGM.getIntrinsic(Int, Tys);
|
|
|
|
Ops[1] = Builder.CreateCall(F, Ops[1], "vld1xN");
|
|
|
|
Ty = llvm::PointerType::getUnqual(Ops[1]->getType());
|
|
|
|
Ops[0] = Builder.CreateBitCast(Ops[0], Ty);
|
Compute and preserve alignment more faithfully in IR-generation.
Introduce an Address type to bundle a pointer value with an
alignment. Introduce APIs on CGBuilderTy to work with Address
values. Change core APIs on CGF/CGM to traffic in Address where
appropriate. Require alignments to be non-zero. Update a ton
of code to compute and propagate alignment information.
As part of this, I've promoted CGBuiltin's EmitPointerWithAlignment
helper function to CGF and made use of it in a number of places in
the expression emitter.
The end result is that we should now be significantly more correct
when performing operations on objects that are locally known to
be under-aligned. Since alignment is not reliably tracked in the
type system, there are inherent limits to this, but at least we
are no longer confused by standard operations like derived-to-base
conversions and array-to-pointer decay. I've also fixed a large
number of bugs where we were applying the complete-object alignment
to a pointer instead of the non-virtual alignment, although most of
these were hidden by the very conservative approach we took with
member alignment.
Also, because IRGen now reliably asserts on zero alignments, we
should no longer be subject to an absurd but frustrating recurring
bug where an incomplete type would report a zero alignment and then
we'd naively do a alignmentAtOffset on it and emit code using an
alignment equal to the largest power-of-two factor of the offset.
We should also now be emitting much more aggressive alignment
attributes in the presence of over-alignment. In particular,
field access now uses alignmentAtOffset instead of min.
Several times in this patch, I had to change the existing
code-generation pattern in order to more effectively use
the Address APIs. For the most part, this seems to be a strict
improvement, like doing pointer arithmetic with GEPs instead of
ptrtoint. That said, I've tried very hard to not change semantics,
but it is likely that I've failed in a few places, for which I
apologize.
ABIArgInfo now always carries the assumed alignment of indirect and
indirect byval arguments. In order to cut down on what was already
a dauntingly large patch, I changed the code to never set align
attributes in the IR on non-byval indirect arguments. That is,
we still generate code which assumes that indirect arguments have
the given alignment, but we don't express this information to the
backend except where it's semantically required (i.e. on byvals).
This is likely a minor regression for those targets that did provide
this information, but it'll be trivial to add it back in a later
patch.
I partially punted on applying this work to CGBuiltin. Please
do not add more uses of the CreateDefaultAligned{Load,Store}
APIs; they will be going away eventually.
llvm-svn: 246985
2015-09-08 16:05:57 +08:00
|
|
|
return Builder.CreateDefaultAlignedStore(Ops[1], Ops[0]);
|
2014-03-29 23:09:45 +08:00
|
|
|
}
|
|
|
|
case NEON::BI__builtin_neon_vst1_x2_v:
|
|
|
|
case NEON::BI__builtin_neon_vst1q_x2_v:
|
|
|
|
case NEON::BI__builtin_neon_vst1_x3_v:
|
|
|
|
case NEON::BI__builtin_neon_vst1q_x3_v:
|
|
|
|
case NEON::BI__builtin_neon_vst1_x4_v:
|
|
|
|
case NEON::BI__builtin_neon_vst1q_x4_v: {
|
|
|
|
llvm::Type *PTy = llvm::PointerType::getUnqual(VTy->getVectorElementType());
|
|
|
|
llvm::Type *Tys[2] = { VTy, PTy };
|
|
|
|
unsigned Int;
|
|
|
|
switch (BuiltinID) {
|
|
|
|
case NEON::BI__builtin_neon_vst1_x2_v:
|
|
|
|
case NEON::BI__builtin_neon_vst1q_x2_v:
|
2014-05-24 20:52:07 +08:00
|
|
|
Int = Intrinsic::aarch64_neon_st1x2;
|
2014-03-29 23:09:45 +08:00
|
|
|
break;
|
|
|
|
case NEON::BI__builtin_neon_vst1_x3_v:
|
|
|
|
case NEON::BI__builtin_neon_vst1q_x3_v:
|
2014-05-24 20:52:07 +08:00
|
|
|
Int = Intrinsic::aarch64_neon_st1x3;
|
2014-03-29 23:09:45 +08:00
|
|
|
break;
|
|
|
|
case NEON::BI__builtin_neon_vst1_x4_v:
|
|
|
|
case NEON::BI__builtin_neon_vst1q_x4_v:
|
2014-05-24 20:52:07 +08:00
|
|
|
Int = Intrinsic::aarch64_neon_st1x4;
|
2014-03-29 23:09:45 +08:00
|
|
|
break;
|
|
|
|
}
|
2015-07-28 23:40:11 +08:00
|
|
|
std::rotate(Ops.begin(), Ops.begin() + 1, Ops.end());
|
|
|
|
return EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "");
|
2014-03-29 23:09:45 +08:00
|
|
|
}
|
|
|
|
case NEON::BI__builtin_neon_vld1_v:
|
|
|
|
case NEON::BI__builtin_neon_vld1q_v:
|
|
|
|
Ops[0] = Builder.CreateBitCast(Ops[0], llvm::PointerType::getUnqual(VTy));
|
Compute and preserve alignment more faithfully in IR-generation.
Introduce an Address type to bundle a pointer value with an
alignment. Introduce APIs on CGBuilderTy to work with Address
values. Change core APIs on CGF/CGM to traffic in Address where
appropriate. Require alignments to be non-zero. Update a ton
of code to compute and propagate alignment information.
As part of this, I've promoted CGBuiltin's EmitPointerWithAlignment
helper function to CGF and made use of it in a number of places in
the expression emitter.
The end result is that we should now be significantly more correct
when performing operations on objects that are locally known to
be under-aligned. Since alignment is not reliably tracked in the
type system, there are inherent limits to this, but at least we
are no longer confused by standard operations like derived-to-base
conversions and array-to-pointer decay. I've also fixed a large
number of bugs where we were applying the complete-object alignment
to a pointer instead of the non-virtual alignment, although most of
these were hidden by the very conservative approach we took with
member alignment.
Also, because IRGen now reliably asserts on zero alignments, we
should no longer be subject to an absurd but frustrating recurring
bug where an incomplete type would report a zero alignment and then
we'd naively do a alignmentAtOffset on it and emit code using an
alignment equal to the largest power-of-two factor of the offset.
We should also now be emitting much more aggressive alignment
attributes in the presence of over-alignment. In particular,
field access now uses alignmentAtOffset instead of min.
Several times in this patch, I had to change the existing
code-generation pattern in order to more effectively use
the Address APIs. For the most part, this seems to be a strict
improvement, like doing pointer arithmetic with GEPs instead of
ptrtoint. That said, I've tried very hard to not change semantics,
but it is likely that I've failed in a few places, for which I
apologize.
ABIArgInfo now always carries the assumed alignment of indirect and
indirect byval arguments. In order to cut down on what was already
a dauntingly large patch, I changed the code to never set align
attributes in the IR on non-byval indirect arguments. That is,
we still generate code which assumes that indirect arguments have
the given alignment, but we don't express this information to the
backend except where it's semantically required (i.e. on byvals).
This is likely a minor regression for those targets that did provide
this information, but it'll be trivial to add it back in a later
patch.
I partially punted on applying this work to CGBuiltin. Please
do not add more uses of the CreateDefaultAligned{Load,Store}
APIs; they will be going away eventually.
llvm-svn: 246985
2015-09-08 16:05:57 +08:00
|
|
|
return Builder.CreateDefaultAlignedLoad(Ops[0]);
|
2014-03-29 23:09:45 +08:00
|
|
|
case NEON::BI__builtin_neon_vst1_v:
|
|
|
|
case NEON::BI__builtin_neon_vst1q_v:
|
|
|
|
Ops[0] = Builder.CreateBitCast(Ops[0], llvm::PointerType::getUnqual(VTy));
|
|
|
|
Ops[1] = Builder.CreateBitCast(Ops[1], VTy);
|
Compute and preserve alignment more faithfully in IR-generation.
Introduce an Address type to bundle a pointer value with an
alignment. Introduce APIs on CGBuilderTy to work with Address
values. Change core APIs on CGF/CGM to traffic in Address where
appropriate. Require alignments to be non-zero. Update a ton
of code to compute and propagate alignment information.
As part of this, I've promoted CGBuiltin's EmitPointerWithAlignment
helper function to CGF and made use of it in a number of places in
the expression emitter.
The end result is that we should now be significantly more correct
when performing operations on objects that are locally known to
be under-aligned. Since alignment is not reliably tracked in the
type system, there are inherent limits to this, but at least we
are no longer confused by standard operations like derived-to-base
conversions and array-to-pointer decay. I've also fixed a large
number of bugs where we were applying the complete-object alignment
to a pointer instead of the non-virtual alignment, although most of
these were hidden by the very conservative approach we took with
member alignment.
Also, because IRGen now reliably asserts on zero alignments, we
should no longer be subject to an absurd but frustrating recurring
bug where an incomplete type would report a zero alignment and then
we'd naively do a alignmentAtOffset on it and emit code using an
alignment equal to the largest power-of-two factor of the offset.
We should also now be emitting much more aggressive alignment
attributes in the presence of over-alignment. In particular,
field access now uses alignmentAtOffset instead of min.
Several times in this patch, I had to change the existing
code-generation pattern in order to more effectively use
the Address APIs. For the most part, this seems to be a strict
improvement, like doing pointer arithmetic with GEPs instead of
ptrtoint. That said, I've tried very hard to not change semantics,
but it is likely that I've failed in a few places, for which I
apologize.
ABIArgInfo now always carries the assumed alignment of indirect and
indirect byval arguments. In order to cut down on what was already
a dauntingly large patch, I changed the code to never set align
attributes in the IR on non-byval indirect arguments. That is,
we still generate code which assumes that indirect arguments have
the given alignment, but we don't express this information to the
backend except where it's semantically required (i.e. on byvals).
This is likely a minor regression for those targets that did provide
this information, but it'll be trivial to add it back in a later
patch.
I partially punted on applying this work to CGBuiltin. Please
do not add more uses of the CreateDefaultAligned{Load,Store}
APIs; they will be going away eventually.
llvm-svn: 246985
2015-09-08 16:05:57 +08:00
|
|
|
return Builder.CreateDefaultAlignedStore(Ops[1], Ops[0]);
|
2014-03-29 23:09:45 +08:00
|
|
|
case NEON::BI__builtin_neon_vld1_lane_v:
|
|
|
|
case NEON::BI__builtin_neon_vld1q_lane_v:
|
|
|
|
Ops[1] = Builder.CreateBitCast(Ops[1], Ty);
|
|
|
|
Ty = llvm::PointerType::getUnqual(VTy->getElementType());
|
|
|
|
Ops[0] = Builder.CreateBitCast(Ops[0], Ty);
|
Compute and preserve alignment more faithfully in IR-generation.
Introduce an Address type to bundle a pointer value with an
alignment. Introduce APIs on CGBuilderTy to work with Address
values. Change core APIs on CGF/CGM to traffic in Address where
appropriate. Require alignments to be non-zero. Update a ton
of code to compute and propagate alignment information.
As part of this, I've promoted CGBuiltin's EmitPointerWithAlignment
helper function to CGF and made use of it in a number of places in
the expression emitter.
The end result is that we should now be significantly more correct
when performing operations on objects that are locally known to
be under-aligned. Since alignment is not reliably tracked in the
type system, there are inherent limits to this, but at least we
are no longer confused by standard operations like derived-to-base
conversions and array-to-pointer decay. I've also fixed a large
number of bugs where we were applying the complete-object alignment
to a pointer instead of the non-virtual alignment, although most of
these were hidden by the very conservative approach we took with
member alignment.
Also, because IRGen now reliably asserts on zero alignments, we
should no longer be subject to an absurd but frustrating recurring
bug where an incomplete type would report a zero alignment and then
we'd naively do a alignmentAtOffset on it and emit code using an
alignment equal to the largest power-of-two factor of the offset.
We should also now be emitting much more aggressive alignment
attributes in the presence of over-alignment. In particular,
field access now uses alignmentAtOffset instead of min.
Several times in this patch, I had to change the existing
code-generation pattern in order to more effectively use
the Address APIs. For the most part, this seems to be a strict
improvement, like doing pointer arithmetic with GEPs instead of
ptrtoint. That said, I've tried very hard to not change semantics,
but it is likely that I've failed in a few places, for which I
apologize.
ABIArgInfo now always carries the assumed alignment of indirect and
indirect byval arguments. In order to cut down on what was already
a dauntingly large patch, I changed the code to never set align
attributes in the IR on non-byval indirect arguments. That is,
we still generate code which assumes that indirect arguments have
the given alignment, but we don't express this information to the
backend except where it's semantically required (i.e. on byvals).
This is likely a minor regression for those targets that did provide
this information, but it'll be trivial to add it back in a later
patch.
I partially punted on applying this work to CGBuiltin. Please
do not add more uses of the CreateDefaultAligned{Load,Store}
APIs; they will be going away eventually.
llvm-svn: 246985
2015-09-08 16:05:57 +08:00
|
|
|
Ops[0] = Builder.CreateDefaultAlignedLoad(Ops[0]);
|
2014-03-29 23:09:45 +08:00
|
|
|
return Builder.CreateInsertElement(Ops[1], Ops[0], Ops[2], "vld1_lane");
|
|
|
|
case NEON::BI__builtin_neon_vld1_dup_v:
|
|
|
|
case NEON::BI__builtin_neon_vld1q_dup_v: {
|
|
|
|
Value *V = UndefValue::get(Ty);
|
|
|
|
Ty = llvm::PointerType::getUnqual(VTy->getElementType());
|
|
|
|
Ops[0] = Builder.CreateBitCast(Ops[0], Ty);
|
Compute and preserve alignment more faithfully in IR-generation.
Introduce an Address type to bundle a pointer value with an
alignment. Introduce APIs on CGBuilderTy to work with Address
values. Change core APIs on CGF/CGM to traffic in Address where
appropriate. Require alignments to be non-zero. Update a ton
of code to compute and propagate alignment information.
As part of this, I've promoted CGBuiltin's EmitPointerWithAlignment
helper function to CGF and made use of it in a number of places in
the expression emitter.
The end result is that we should now be significantly more correct
when performing operations on objects that are locally known to
be under-aligned. Since alignment is not reliably tracked in the
type system, there are inherent limits to this, but at least we
are no longer confused by standard operations like derived-to-base
conversions and array-to-pointer decay. I've also fixed a large
number of bugs where we were applying the complete-object alignment
to a pointer instead of the non-virtual alignment, although most of
these were hidden by the very conservative approach we took with
member alignment.
Also, because IRGen now reliably asserts on zero alignments, we
should no longer be subject to an absurd but frustrating recurring
bug where an incomplete type would report a zero alignment and then
we'd naively do a alignmentAtOffset on it and emit code using an
alignment equal to the largest power-of-two factor of the offset.
We should also now be emitting much more aggressive alignment
attributes in the presence of over-alignment. In particular,
field access now uses alignmentAtOffset instead of min.
Several times in this patch, I had to change the existing
code-generation pattern in order to more effectively use
the Address APIs. For the most part, this seems to be a strict
improvement, like doing pointer arithmetic with GEPs instead of
ptrtoint. That said, I've tried very hard to not change semantics,
but it is likely that I've failed in a few places, for which I
apologize.
ABIArgInfo now always carries the assumed alignment of indirect and
indirect byval arguments. In order to cut down on what was already
a dauntingly large patch, I changed the code to never set align
attributes in the IR on non-byval indirect arguments. That is,
we still generate code which assumes that indirect arguments have
the given alignment, but we don't express this information to the
backend except where it's semantically required (i.e. on byvals).
This is likely a minor regression for those targets that did provide
this information, but it'll be trivial to add it back in a later
patch.
I partially punted on applying this work to CGBuiltin. Please
do not add more uses of the CreateDefaultAligned{Load,Store}
APIs; they will be going away eventually.
llvm-svn: 246985
2015-09-08 16:05:57 +08:00
|
|
|
Ops[0] = Builder.CreateDefaultAlignedLoad(Ops[0]);
|
2014-06-03 03:48:59 +08:00
|
|
|
llvm::Constant *CI = ConstantInt::get(Int32Ty, 0);
|
2014-03-29 23:09:45 +08:00
|
|
|
Ops[0] = Builder.CreateInsertElement(V, Ops[0], CI);
|
|
|
|
return EmitNeonSplat(Ops[0], CI);
|
|
|
|
}
|
|
|
|
case NEON::BI__builtin_neon_vst1_lane_v:
|
|
|
|
case NEON::BI__builtin_neon_vst1q_lane_v:
|
|
|
|
Ops[1] = Builder.CreateBitCast(Ops[1], Ty);
|
|
|
|
Ops[1] = Builder.CreateExtractElement(Ops[1], Ops[2]);
|
|
|
|
Ty = llvm::PointerType::getUnqual(Ops[1]->getType());
|
Compute and preserve alignment more faithfully in IR-generation.
Introduce an Address type to bundle a pointer value with an
alignment. Introduce APIs on CGBuilderTy to work with Address
values. Change core APIs on CGF/CGM to traffic in Address where
appropriate. Require alignments to be non-zero. Update a ton
of code to compute and propagate alignment information.
As part of this, I've promoted CGBuiltin's EmitPointerWithAlignment
helper function to CGF and made use of it in a number of places in
the expression emitter.
The end result is that we should now be significantly more correct
when performing operations on objects that are locally known to
be under-aligned. Since alignment is not reliably tracked in the
type system, there are inherent limits to this, but at least we
are no longer confused by standard operations like derived-to-base
conversions and array-to-pointer decay. I've also fixed a large
number of bugs where we were applying the complete-object alignment
to a pointer instead of the non-virtual alignment, although most of
these were hidden by the very conservative approach we took with
member alignment.
Also, because IRGen now reliably asserts on zero alignments, we
should no longer be subject to an absurd but frustrating recurring
bug where an incomplete type would report a zero alignment and then
we'd naively do a alignmentAtOffset on it and emit code using an
alignment equal to the largest power-of-two factor of the offset.
We should also now be emitting much more aggressive alignment
attributes in the presence of over-alignment. In particular,
field access now uses alignmentAtOffset instead of min.
Several times in this patch, I had to change the existing
code-generation pattern in order to more effectively use
the Address APIs. For the most part, this seems to be a strict
improvement, like doing pointer arithmetic with GEPs instead of
ptrtoint. That said, I've tried very hard to not change semantics,
but it is likely that I've failed in a few places, for which I
apologize.
ABIArgInfo now always carries the assumed alignment of indirect and
indirect byval arguments. In order to cut down on what was already
a dauntingly large patch, I changed the code to never set align
attributes in the IR on non-byval indirect arguments. That is,
we still generate code which assumes that indirect arguments have
the given alignment, but we don't express this information to the
backend except where it's semantically required (i.e. on byvals).
This is likely a minor regression for those targets that did provide
this information, but it'll be trivial to add it back in a later
patch.
I partially punted on applying this work to CGBuiltin. Please
do not add more uses of the CreateDefaultAligned{Load,Store}
APIs; they will be going away eventually.
llvm-svn: 246985
2015-09-08 16:05:57 +08:00
|
|
|
return Builder.CreateDefaultAlignedStore(Ops[1],
|
|
|
|
Builder.CreateBitCast(Ops[0], Ty));
|
2014-03-29 23:09:45 +08:00
|
|
|
case NEON::BI__builtin_neon_vld2_v:
|
|
|
|
case NEON::BI__builtin_neon_vld2q_v: {
|
|
|
|
llvm::Type *PTy = llvm::PointerType::getUnqual(VTy);
|
|
|
|
Ops[1] = Builder.CreateBitCast(Ops[1], PTy);
|
|
|
|
llvm::Type *Tys[2] = { VTy, PTy };
|
2014-05-24 20:52:07 +08:00
|
|
|
Function *F = CGM.getIntrinsic(Intrinsic::aarch64_neon_ld2, Tys);
|
2014-03-29 23:09:45 +08:00
|
|
|
Ops[1] = Builder.CreateCall(F, Ops[1], "vld2");
|
|
|
|
Ops[0] = Builder.CreateBitCast(Ops[0],
|
|
|
|
llvm::PointerType::getUnqual(Ops[1]->getType()));
|
Compute and preserve alignment more faithfully in IR-generation.
Introduce an Address type to bundle a pointer value with an
alignment. Introduce APIs on CGBuilderTy to work with Address
values. Change core APIs on CGF/CGM to traffic in Address where
appropriate. Require alignments to be non-zero. Update a ton
of code to compute and propagate alignment information.
As part of this, I've promoted CGBuiltin's EmitPointerWithAlignment
helper function to CGF and made use of it in a number of places in
the expression emitter.
The end result is that we should now be significantly more correct
when performing operations on objects that are locally known to
be under-aligned. Since alignment is not reliably tracked in the
type system, there are inherent limits to this, but at least we
are no longer confused by standard operations like derived-to-base
conversions and array-to-pointer decay. I've also fixed a large
number of bugs where we were applying the complete-object alignment
to a pointer instead of the non-virtual alignment, although most of
these were hidden by the very conservative approach we took with
member alignment.
Also, because IRGen now reliably asserts on zero alignments, we
should no longer be subject to an absurd but frustrating recurring
bug where an incomplete type would report a zero alignment and then
we'd naively do a alignmentAtOffset on it and emit code using an
alignment equal to the largest power-of-two factor of the offset.
We should also now be emitting much more aggressive alignment
attributes in the presence of over-alignment. In particular,
field access now uses alignmentAtOffset instead of min.
Several times in this patch, I had to change the existing
code-generation pattern in order to more effectively use
the Address APIs. For the most part, this seems to be a strict
improvement, like doing pointer arithmetic with GEPs instead of
ptrtoint. That said, I've tried very hard to not change semantics,
but it is likely that I've failed in a few places, for which I
apologize.
ABIArgInfo now always carries the assumed alignment of indirect and
indirect byval arguments. In order to cut down on what was already
a dauntingly large patch, I changed the code to never set align
attributes in the IR on non-byval indirect arguments. That is,
we still generate code which assumes that indirect arguments have
the given alignment, but we don't express this information to the
backend except where it's semantically required (i.e. on byvals).
This is likely a minor regression for those targets that did provide
this information, but it'll be trivial to add it back in a later
patch.
I partially punted on applying this work to CGBuiltin. Please
do not add more uses of the CreateDefaultAligned{Load,Store}
APIs; they will be going away eventually.
llvm-svn: 246985
2015-09-08 16:05:57 +08:00
|
|
|
return Builder.CreateDefaultAlignedStore(Ops[1], Ops[0]);
|
2014-03-29 23:09:45 +08:00
|
|
|
}
|
|
|
|
case NEON::BI__builtin_neon_vld3_v:
|
|
|
|
case NEON::BI__builtin_neon_vld3q_v: {
|
|
|
|
llvm::Type *PTy = llvm::PointerType::getUnqual(VTy);
|
|
|
|
Ops[1] = Builder.CreateBitCast(Ops[1], PTy);
|
|
|
|
llvm::Type *Tys[2] = { VTy, PTy };
|
2014-05-24 20:52:07 +08:00
|
|
|
Function *F = CGM.getIntrinsic(Intrinsic::aarch64_neon_ld3, Tys);
|
2014-03-29 23:09:45 +08:00
|
|
|
Ops[1] = Builder.CreateCall(F, Ops[1], "vld3");
|
|
|
|
Ops[0] = Builder.CreateBitCast(Ops[0],
|
|
|
|
llvm::PointerType::getUnqual(Ops[1]->getType()));
|
Compute and preserve alignment more faithfully in IR-generation.
Introduce an Address type to bundle a pointer value with an
alignment. Introduce APIs on CGBuilderTy to work with Address
values. Change core APIs on CGF/CGM to traffic in Address where
appropriate. Require alignments to be non-zero. Update a ton
of code to compute and propagate alignment information.
As part of this, I've promoted CGBuiltin's EmitPointerWithAlignment
helper function to CGF and made use of it in a number of places in
the expression emitter.
The end result is that we should now be significantly more correct
when performing operations on objects that are locally known to
be under-aligned. Since alignment is not reliably tracked in the
type system, there are inherent limits to this, but at least we
are no longer confused by standard operations like derived-to-base
conversions and array-to-pointer decay. I've also fixed a large
number of bugs where we were applying the complete-object alignment
to a pointer instead of the non-virtual alignment, although most of
these were hidden by the very conservative approach we took with
member alignment.
Also, because IRGen now reliably asserts on zero alignments, we
should no longer be subject to an absurd but frustrating recurring
bug where an incomplete type would report a zero alignment and then
we'd naively do a alignmentAtOffset on it and emit code using an
alignment equal to the largest power-of-two factor of the offset.
We should also now be emitting much more aggressive alignment
attributes in the presence of over-alignment. In particular,
field access now uses alignmentAtOffset instead of min.
Several times in this patch, I had to change the existing
code-generation pattern in order to more effectively use
the Address APIs. For the most part, this seems to be a strict
improvement, like doing pointer arithmetic with GEPs instead of
ptrtoint. That said, I've tried very hard to not change semantics,
but it is likely that I've failed in a few places, for which I
apologize.
ABIArgInfo now always carries the assumed alignment of indirect and
indirect byval arguments. In order to cut down on what was already
a dauntingly large patch, I changed the code to never set align
attributes in the IR on non-byval indirect arguments. That is,
we still generate code which assumes that indirect arguments have
the given alignment, but we don't express this information to the
backend except where it's semantically required (i.e. on byvals).
This is likely a minor regression for those targets that did provide
this information, but it'll be trivial to add it back in a later
patch.
I partially punted on applying this work to CGBuiltin. Please
do not add more uses of the CreateDefaultAligned{Load,Store}
APIs; they will be going away eventually.
llvm-svn: 246985
2015-09-08 16:05:57 +08:00
|
|
|
return Builder.CreateDefaultAlignedStore(Ops[1], Ops[0]);
|
2014-03-29 23:09:45 +08:00
|
|
|
}
|
|
|
|
case NEON::BI__builtin_neon_vld4_v:
|
|
|
|
case NEON::BI__builtin_neon_vld4q_v: {
|
|
|
|
llvm::Type *PTy = llvm::PointerType::getUnqual(VTy);
|
|
|
|
Ops[1] = Builder.CreateBitCast(Ops[1], PTy);
|
|
|
|
llvm::Type *Tys[2] = { VTy, PTy };
|
2014-05-24 20:52:07 +08:00
|
|
|
Function *F = CGM.getIntrinsic(Intrinsic::aarch64_neon_ld4, Tys);
|
2014-03-29 23:09:45 +08:00
|
|
|
Ops[1] = Builder.CreateCall(F, Ops[1], "vld4");
|
|
|
|
Ops[0] = Builder.CreateBitCast(Ops[0],
|
|
|
|
llvm::PointerType::getUnqual(Ops[1]->getType()));
|
Compute and preserve alignment more faithfully in IR-generation.
Introduce an Address type to bundle a pointer value with an
alignment. Introduce APIs on CGBuilderTy to work with Address
values. Change core APIs on CGF/CGM to traffic in Address where
appropriate. Require alignments to be non-zero. Update a ton
of code to compute and propagate alignment information.
As part of this, I've promoted CGBuiltin's EmitPointerWithAlignment
helper function to CGF and made use of it in a number of places in
the expression emitter.
The end result is that we should now be significantly more correct
when performing operations on objects that are locally known to
be under-aligned. Since alignment is not reliably tracked in the
type system, there are inherent limits to this, but at least we
are no longer confused by standard operations like derived-to-base
conversions and array-to-pointer decay. I've also fixed a large
number of bugs where we were applying the complete-object alignment
to a pointer instead of the non-virtual alignment, although most of
these were hidden by the very conservative approach we took with
member alignment.
Also, because IRGen now reliably asserts on zero alignments, we
should no longer be subject to an absurd but frustrating recurring
bug where an incomplete type would report a zero alignment and then
we'd naively do a alignmentAtOffset on it and emit code using an
alignment equal to the largest power-of-two factor of the offset.
We should also now be emitting much more aggressive alignment
attributes in the presence of over-alignment. In particular,
field access now uses alignmentAtOffset instead of min.
Several times in this patch, I had to change the existing
code-generation pattern in order to more effectively use
the Address APIs. For the most part, this seems to be a strict
improvement, like doing pointer arithmetic with GEPs instead of
ptrtoint. That said, I've tried very hard to not change semantics,
but it is likely that I've failed in a few places, for which I
apologize.
ABIArgInfo now always carries the assumed alignment of indirect and
indirect byval arguments. In order to cut down on what was already
a dauntingly large patch, I changed the code to never set align
attributes in the IR on non-byval indirect arguments. That is,
we still generate code which assumes that indirect arguments have
the given alignment, but we don't express this information to the
backend except where it's semantically required (i.e. on byvals).
This is likely a minor regression for those targets that did provide
this information, but it'll be trivial to add it back in a later
patch.
I partially punted on applying this work to CGBuiltin. Please
do not add more uses of the CreateDefaultAligned{Load,Store}
APIs; they will be going away eventually.
llvm-svn: 246985
2015-09-08 16:05:57 +08:00
|
|
|
return Builder.CreateDefaultAlignedStore(Ops[1], Ops[0]);
|
2014-03-29 23:09:45 +08:00
|
|
|
}
|
2014-04-01 18:37:47 +08:00
|
|
|
case NEON::BI__builtin_neon_vld2_dup_v:
|
|
|
|
case NEON::BI__builtin_neon_vld2q_dup_v: {
|
2014-03-29 23:09:45 +08:00
|
|
|
llvm::Type *PTy =
|
|
|
|
llvm::PointerType::getUnqual(VTy->getElementType());
|
|
|
|
Ops[1] = Builder.CreateBitCast(Ops[1], PTy);
|
|
|
|
llvm::Type *Tys[2] = { VTy, PTy };
|
2014-05-24 20:52:07 +08:00
|
|
|
Function *F = CGM.getIntrinsic(Intrinsic::aarch64_neon_ld2r, Tys);
|
2014-03-29 23:09:45 +08:00
|
|
|
Ops[1] = Builder.CreateCall(F, Ops[1], "vld2");
|
|
|
|
Ops[0] = Builder.CreateBitCast(Ops[0],
|
|
|
|
llvm::PointerType::getUnqual(Ops[1]->getType()));
|
Compute and preserve alignment more faithfully in IR-generation.
Introduce an Address type to bundle a pointer value with an
alignment. Introduce APIs on CGBuilderTy to work with Address
values. Change core APIs on CGF/CGM to traffic in Address where
appropriate. Require alignments to be non-zero. Update a ton
of code to compute and propagate alignment information.
As part of this, I've promoted CGBuiltin's EmitPointerWithAlignment
helper function to CGF and made use of it in a number of places in
the expression emitter.
The end result is that we should now be significantly more correct
when performing operations on objects that are locally known to
be under-aligned. Since alignment is not reliably tracked in the
type system, there are inherent limits to this, but at least we
are no longer confused by standard operations like derived-to-base
conversions and array-to-pointer decay. I've also fixed a large
number of bugs where we were applying the complete-object alignment
to a pointer instead of the non-virtual alignment, although most of
these were hidden by the very conservative approach we took with
member alignment.
Also, because IRGen now reliably asserts on zero alignments, we
should no longer be subject to an absurd but frustrating recurring
bug where an incomplete type would report a zero alignment and then
we'd naively do a alignmentAtOffset on it and emit code using an
alignment equal to the largest power-of-two factor of the offset.
We should also now be emitting much more aggressive alignment
attributes in the presence of over-alignment. In particular,
field access now uses alignmentAtOffset instead of min.
Several times in this patch, I had to change the existing
code-generation pattern in order to more effectively use
the Address APIs. For the most part, this seems to be a strict
improvement, like doing pointer arithmetic with GEPs instead of
ptrtoint. That said, I've tried very hard to not change semantics,
but it is likely that I've failed in a few places, for which I
apologize.
ABIArgInfo now always carries the assumed alignment of indirect and
indirect byval arguments. In order to cut down on what was already
a dauntingly large patch, I changed the code to never set align
attributes in the IR on non-byval indirect arguments. That is,
we still generate code which assumes that indirect arguments have
the given alignment, but we don't express this information to the
backend except where it's semantically required (i.e. on byvals).
This is likely a minor regression for those targets that did provide
this information, but it'll be trivial to add it back in a later
patch.
I partially punted on applying this work to CGBuiltin. Please
do not add more uses of the CreateDefaultAligned{Load,Store}
APIs; they will be going away eventually.
llvm-svn: 246985
2015-09-08 16:05:57 +08:00
|
|
|
return Builder.CreateDefaultAlignedStore(Ops[1], Ops[0]);
|
2014-03-29 23:09:45 +08:00
|
|
|
}
|
2014-04-01 18:37:47 +08:00
|
|
|
case NEON::BI__builtin_neon_vld3_dup_v:
|
|
|
|
case NEON::BI__builtin_neon_vld3q_dup_v: {
|
2014-03-29 23:09:45 +08:00
|
|
|
llvm::Type *PTy =
|
|
|
|
llvm::PointerType::getUnqual(VTy->getElementType());
|
|
|
|
Ops[1] = Builder.CreateBitCast(Ops[1], PTy);
|
|
|
|
llvm::Type *Tys[2] = { VTy, PTy };
|
2014-05-24 20:52:07 +08:00
|
|
|
Function *F = CGM.getIntrinsic(Intrinsic::aarch64_neon_ld3r, Tys);
|
2014-03-29 23:09:45 +08:00
|
|
|
Ops[1] = Builder.CreateCall(F, Ops[1], "vld3");
|
|
|
|
Ops[0] = Builder.CreateBitCast(Ops[0],
|
|
|
|
llvm::PointerType::getUnqual(Ops[1]->getType()));
|
Compute and preserve alignment more faithfully in IR-generation.
Introduce an Address type to bundle a pointer value with an
alignment. Introduce APIs on CGBuilderTy to work with Address
values. Change core APIs on CGF/CGM to traffic in Address where
appropriate. Require alignments to be non-zero. Update a ton
of code to compute and propagate alignment information.
As part of this, I've promoted CGBuiltin's EmitPointerWithAlignment
helper function to CGF and made use of it in a number of places in
the expression emitter.
The end result is that we should now be significantly more correct
when performing operations on objects that are locally known to
be under-aligned. Since alignment is not reliably tracked in the
type system, there are inherent limits to this, but at least we
are no longer confused by standard operations like derived-to-base
conversions and array-to-pointer decay. I've also fixed a large
number of bugs where we were applying the complete-object alignment
to a pointer instead of the non-virtual alignment, although most of
these were hidden by the very conservative approach we took with
member alignment.
Also, because IRGen now reliably asserts on zero alignments, we
should no longer be subject to an absurd but frustrating recurring
bug where an incomplete type would report a zero alignment and then
we'd naively do a alignmentAtOffset on it and emit code using an
alignment equal to the largest power-of-two factor of the offset.
We should also now be emitting much more aggressive alignment
attributes in the presence of over-alignment. In particular,
field access now uses alignmentAtOffset instead of min.
Several times in this patch, I had to change the existing
code-generation pattern in order to more effectively use
the Address APIs. For the most part, this seems to be a strict
improvement, like doing pointer arithmetic with GEPs instead of
ptrtoint. That said, I've tried very hard to not change semantics,
but it is likely that I've failed in a few places, for which I
apologize.
ABIArgInfo now always carries the assumed alignment of indirect and
indirect byval arguments. In order to cut down on what was already
a dauntingly large patch, I changed the code to never set align
attributes in the IR on non-byval indirect arguments. That is,
we still generate code which assumes that indirect arguments have
the given alignment, but we don't express this information to the
backend except where it's semantically required (i.e. on byvals).
This is likely a minor regression for those targets that did provide
this information, but it'll be trivial to add it back in a later
patch.
I partially punted on applying this work to CGBuiltin. Please
do not add more uses of the CreateDefaultAligned{Load,Store}
APIs; they will be going away eventually.
llvm-svn: 246985
2015-09-08 16:05:57 +08:00
|
|
|
return Builder.CreateDefaultAlignedStore(Ops[1], Ops[0]);
|
2014-03-29 23:09:45 +08:00
|
|
|
}
|
2014-04-01 18:37:47 +08:00
|
|
|
case NEON::BI__builtin_neon_vld4_dup_v:
|
|
|
|
case NEON::BI__builtin_neon_vld4q_dup_v: {
|
2014-03-29 23:09:45 +08:00
|
|
|
llvm::Type *PTy =
|
|
|
|
llvm::PointerType::getUnqual(VTy->getElementType());
|
|
|
|
Ops[1] = Builder.CreateBitCast(Ops[1], PTy);
|
|
|
|
llvm::Type *Tys[2] = { VTy, PTy };
|
2014-05-24 20:52:07 +08:00
|
|
|
Function *F = CGM.getIntrinsic(Intrinsic::aarch64_neon_ld4r, Tys);
|
2014-03-29 23:09:45 +08:00
|
|
|
Ops[1] = Builder.CreateCall(F, Ops[1], "vld4");
|
|
|
|
Ops[0] = Builder.CreateBitCast(Ops[0],
|
|
|
|
llvm::PointerType::getUnqual(Ops[1]->getType()));
|
Compute and preserve alignment more faithfully in IR-generation.
Introduce an Address type to bundle a pointer value with an
alignment. Introduce APIs on CGBuilderTy to work with Address
values. Change core APIs on CGF/CGM to traffic in Address where
appropriate. Require alignments to be non-zero. Update a ton
of code to compute and propagate alignment information.
As part of this, I've promoted CGBuiltin's EmitPointerWithAlignment
helper function to CGF and made use of it in a number of places in
the expression emitter.
The end result is that we should now be significantly more correct
when performing operations on objects that are locally known to
be under-aligned. Since alignment is not reliably tracked in the
type system, there are inherent limits to this, but at least we
are no longer confused by standard operations like derived-to-base
conversions and array-to-pointer decay. I've also fixed a large
number of bugs where we were applying the complete-object alignment
to a pointer instead of the non-virtual alignment, although most of
these were hidden by the very conservative approach we took with
member alignment.
Also, because IRGen now reliably asserts on zero alignments, we
should no longer be subject to an absurd but frustrating recurring
bug where an incomplete type would report a zero alignment and then
we'd naively do a alignmentAtOffset on it and emit code using an
alignment equal to the largest power-of-two factor of the offset.
We should also now be emitting much more aggressive alignment
attributes in the presence of over-alignment. In particular,
field access now uses alignmentAtOffset instead of min.
Several times in this patch, I had to change the existing
code-generation pattern in order to more effectively use
the Address APIs. For the most part, this seems to be a strict
improvement, like doing pointer arithmetic with GEPs instead of
ptrtoint. That said, I've tried very hard to not change semantics,
but it is likely that I've failed in a few places, for which I
apologize.
ABIArgInfo now always carries the assumed alignment of indirect and
indirect byval arguments. In order to cut down on what was already
a dauntingly large patch, I changed the code to never set align
attributes in the IR on non-byval indirect arguments. That is,
we still generate code which assumes that indirect arguments have
the given alignment, but we don't express this information to the
backend except where it's semantically required (i.e. on byvals).
This is likely a minor regression for those targets that did provide
this information, but it'll be trivial to add it back in a later
patch.
I partially punted on applying this work to CGBuiltin. Please
do not add more uses of the CreateDefaultAligned{Load,Store}
APIs; they will be going away eventually.
llvm-svn: 246985
2015-09-08 16:05:57 +08:00
|
|
|
return Builder.CreateDefaultAlignedStore(Ops[1], Ops[0]);
|
2014-03-29 23:09:45 +08:00
|
|
|
}
|
|
|
|
case NEON::BI__builtin_neon_vld2_lane_v:
|
|
|
|
case NEON::BI__builtin_neon_vld2q_lane_v: {
|
|
|
|
llvm::Type *Tys[2] = { VTy, Ops[1]->getType() };
|
2014-05-24 20:52:07 +08:00
|
|
|
Function *F = CGM.getIntrinsic(Intrinsic::aarch64_neon_ld2lane, Tys);
|
2014-03-29 23:09:45 +08:00
|
|
|
Ops.push_back(Ops[1]);
|
|
|
|
Ops.erase(Ops.begin()+1);
|
|
|
|
Ops[1] = Builder.CreateBitCast(Ops[1], Ty);
|
|
|
|
Ops[2] = Builder.CreateBitCast(Ops[2], Ty);
|
2015-07-28 23:40:11 +08:00
|
|
|
Ops[3] = Builder.CreateZExt(Ops[3], Int64Ty);
|
2014-08-27 14:28:36 +08:00
|
|
|
Ops[1] = Builder.CreateCall(F, makeArrayRef(Ops).slice(1), "vld2_lane");
|
2014-03-29 23:09:45 +08:00
|
|
|
Ty = llvm::PointerType::getUnqual(Ops[1]->getType());
|
|
|
|
Ops[0] = Builder.CreateBitCast(Ops[0], Ty);
|
Compute and preserve alignment more faithfully in IR-generation.
Introduce an Address type to bundle a pointer value with an
alignment. Introduce APIs on CGBuilderTy to work with Address
values. Change core APIs on CGF/CGM to traffic in Address where
appropriate. Require alignments to be non-zero. Update a ton
of code to compute and propagate alignment information.
As part of this, I've promoted CGBuiltin's EmitPointerWithAlignment
helper function to CGF and made use of it in a number of places in
the expression emitter.
The end result is that we should now be significantly more correct
when performing operations on objects that are locally known to
be under-aligned. Since alignment is not reliably tracked in the
type system, there are inherent limits to this, but at least we
are no longer confused by standard operations like derived-to-base
conversions and array-to-pointer decay. I've also fixed a large
number of bugs where we were applying the complete-object alignment
to a pointer instead of the non-virtual alignment, although most of
these were hidden by the very conservative approach we took with
member alignment.
Also, because IRGen now reliably asserts on zero alignments, we
should no longer be subject to an absurd but frustrating recurring
bug where an incomplete type would report a zero alignment and then
we'd naively do a alignmentAtOffset on it and emit code using an
alignment equal to the largest power-of-two factor of the offset.
We should also now be emitting much more aggressive alignment
attributes in the presence of over-alignment. In particular,
field access now uses alignmentAtOffset instead of min.
Several times in this patch, I had to change the existing
code-generation pattern in order to more effectively use
the Address APIs. For the most part, this seems to be a strict
improvement, like doing pointer arithmetic with GEPs instead of
ptrtoint. That said, I've tried very hard to not change semantics,
but it is likely that I've failed in a few places, for which I
apologize.
ABIArgInfo now always carries the assumed alignment of indirect and
indirect byval arguments. In order to cut down on what was already
a dauntingly large patch, I changed the code to never set align
attributes in the IR on non-byval indirect arguments. That is,
we still generate code which assumes that indirect arguments have
the given alignment, but we don't express this information to the
backend except where it's semantically required (i.e. on byvals).
This is likely a minor regression for those targets that did provide
this information, but it'll be trivial to add it back in a later
patch.
I partially punted on applying this work to CGBuiltin. Please
do not add more uses of the CreateDefaultAligned{Load,Store}
APIs; they will be going away eventually.
llvm-svn: 246985
2015-09-08 16:05:57 +08:00
|
|
|
return Builder.CreateDefaultAlignedStore(Ops[1], Ops[0]);
|
2014-03-29 23:09:45 +08:00
|
|
|
}
|
|
|
|
case NEON::BI__builtin_neon_vld3_lane_v:
|
|
|
|
case NEON::BI__builtin_neon_vld3q_lane_v: {
|
|
|
|
llvm::Type *Tys[2] = { VTy, Ops[1]->getType() };
|
2014-05-24 20:52:07 +08:00
|
|
|
Function *F = CGM.getIntrinsic(Intrinsic::aarch64_neon_ld3lane, Tys);
|
2014-03-29 23:09:45 +08:00
|
|
|
Ops.push_back(Ops[1]);
|
|
|
|
Ops.erase(Ops.begin()+1);
|
|
|
|
Ops[1] = Builder.CreateBitCast(Ops[1], Ty);
|
|
|
|
Ops[2] = Builder.CreateBitCast(Ops[2], Ty);
|
|
|
|
Ops[3] = Builder.CreateBitCast(Ops[3], Ty);
|
2015-07-28 23:40:11 +08:00
|
|
|
Ops[4] = Builder.CreateZExt(Ops[4], Int64Ty);
|
2014-08-27 14:28:36 +08:00
|
|
|
Ops[1] = Builder.CreateCall(F, makeArrayRef(Ops).slice(1), "vld3_lane");
|
2014-03-29 23:09:45 +08:00
|
|
|
Ty = llvm::PointerType::getUnqual(Ops[1]->getType());
|
|
|
|
Ops[0] = Builder.CreateBitCast(Ops[0], Ty);
|
Compute and preserve alignment more faithfully in IR-generation.
Introduce an Address type to bundle a pointer value with an
alignment. Introduce APIs on CGBuilderTy to work with Address
values. Change core APIs on CGF/CGM to traffic in Address where
appropriate. Require alignments to be non-zero. Update a ton
of code to compute and propagate alignment information.
As part of this, I've promoted CGBuiltin's EmitPointerWithAlignment
helper function to CGF and made use of it in a number of places in
the expression emitter.
The end result is that we should now be significantly more correct
when performing operations on objects that are locally known to
be under-aligned. Since alignment is not reliably tracked in the
type system, there are inherent limits to this, but at least we
are no longer confused by standard operations like derived-to-base
conversions and array-to-pointer decay. I've also fixed a large
number of bugs where we were applying the complete-object alignment
to a pointer instead of the non-virtual alignment, although most of
these were hidden by the very conservative approach we took with
member alignment.
Also, because IRGen now reliably asserts on zero alignments, we
should no longer be subject to an absurd but frustrating recurring
bug where an incomplete type would report a zero alignment and then
we'd naively do a alignmentAtOffset on it and emit code using an
alignment equal to the largest power-of-two factor of the offset.
We should also now be emitting much more aggressive alignment
attributes in the presence of over-alignment. In particular,
field access now uses alignmentAtOffset instead of min.
Several times in this patch, I had to change the existing
code-generation pattern in order to more effectively use
the Address APIs. For the most part, this seems to be a strict
improvement, like doing pointer arithmetic with GEPs instead of
ptrtoint. That said, I've tried very hard to not change semantics,
but it is likely that I've failed in a few places, for which I
apologize.
ABIArgInfo now always carries the assumed alignment of indirect and
indirect byval arguments. In order to cut down on what was already
a dauntingly large patch, I changed the code to never set align
attributes in the IR on non-byval indirect arguments. That is,
we still generate code which assumes that indirect arguments have
the given alignment, but we don't express this information to the
backend except where it's semantically required (i.e. on byvals).
This is likely a minor regression for those targets that did provide
this information, but it'll be trivial to add it back in a later
patch.
I partially punted on applying this work to CGBuiltin. Please
do not add more uses of the CreateDefaultAligned{Load,Store}
APIs; they will be going away eventually.
llvm-svn: 246985
2015-09-08 16:05:57 +08:00
|
|
|
return Builder.CreateDefaultAlignedStore(Ops[1], Ops[0]);
|
2014-03-29 23:09:45 +08:00
|
|
|
}
|
|
|
|
case NEON::BI__builtin_neon_vld4_lane_v:
|
|
|
|
case NEON::BI__builtin_neon_vld4q_lane_v: {
|
|
|
|
llvm::Type *Tys[2] = { VTy, Ops[1]->getType() };
|
2014-05-24 20:52:07 +08:00
|
|
|
Function *F = CGM.getIntrinsic(Intrinsic::aarch64_neon_ld4lane, Tys);
|
2014-03-29 23:09:45 +08:00
|
|
|
Ops.push_back(Ops[1]);
|
|
|
|
Ops.erase(Ops.begin()+1);
|
|
|
|
Ops[1] = Builder.CreateBitCast(Ops[1], Ty);
|
|
|
|
Ops[2] = Builder.CreateBitCast(Ops[2], Ty);
|
|
|
|
Ops[3] = Builder.CreateBitCast(Ops[3], Ty);
|
|
|
|
Ops[4] = Builder.CreateBitCast(Ops[4], Ty);
|
2015-07-28 23:40:11 +08:00
|
|
|
Ops[5] = Builder.CreateZExt(Ops[5], Int64Ty);
|
2014-08-27 14:28:36 +08:00
|
|
|
Ops[1] = Builder.CreateCall(F, makeArrayRef(Ops).slice(1), "vld4_lane");
|
2014-03-29 23:09:45 +08:00
|
|
|
Ty = llvm::PointerType::getUnqual(Ops[1]->getType());
|
|
|
|
Ops[0] = Builder.CreateBitCast(Ops[0], Ty);
|
Compute and preserve alignment more faithfully in IR-generation.
Introduce an Address type to bundle a pointer value with an
alignment. Introduce APIs on CGBuilderTy to work with Address
values. Change core APIs on CGF/CGM to traffic in Address where
appropriate. Require alignments to be non-zero. Update a ton
of code to compute and propagate alignment information.
As part of this, I've promoted CGBuiltin's EmitPointerWithAlignment
helper function to CGF and made use of it in a number of places in
the expression emitter.
The end result is that we should now be significantly more correct
when performing operations on objects that are locally known to
be under-aligned. Since alignment is not reliably tracked in the
type system, there are inherent limits to this, but at least we
are no longer confused by standard operations like derived-to-base
conversions and array-to-pointer decay. I've also fixed a large
number of bugs where we were applying the complete-object alignment
to a pointer instead of the non-virtual alignment, although most of
these were hidden by the very conservative approach we took with
member alignment.
Also, because IRGen now reliably asserts on zero alignments, we
should no longer be subject to an absurd but frustrating recurring
bug where an incomplete type would report a zero alignment and then
we'd naively do a alignmentAtOffset on it and emit code using an
alignment equal to the largest power-of-two factor of the offset.
We should also now be emitting much more aggressive alignment
attributes in the presence of over-alignment. In particular,
field access now uses alignmentAtOffset instead of min.
Several times in this patch, I had to change the existing
code-generation pattern in order to more effectively use
the Address APIs. For the most part, this seems to be a strict
improvement, like doing pointer arithmetic with GEPs instead of
ptrtoint. That said, I've tried very hard to not change semantics,
but it is likely that I've failed in a few places, for which I
apologize.
ABIArgInfo now always carries the assumed alignment of indirect and
indirect byval arguments. In order to cut down on what was already
a dauntingly large patch, I changed the code to never set align
attributes in the IR on non-byval indirect arguments. That is,
we still generate code which assumes that indirect arguments have
the given alignment, but we don't express this information to the
backend except where it's semantically required (i.e. on byvals).
This is likely a minor regression for those targets that did provide
this information, but it'll be trivial to add it back in a later
patch.
I partially punted on applying this work to CGBuiltin. Please
do not add more uses of the CreateDefaultAligned{Load,Store}
APIs; they will be going away eventually.
llvm-svn: 246985
2015-09-08 16:05:57 +08:00
|
|
|
return Builder.CreateDefaultAlignedStore(Ops[1], Ops[0]);
|
2014-03-29 23:09:45 +08:00
|
|
|
}
|
|
|
|
case NEON::BI__builtin_neon_vst2_v:
|
|
|
|
case NEON::BI__builtin_neon_vst2q_v: {
|
|
|
|
Ops.push_back(Ops[0]);
|
|
|
|
Ops.erase(Ops.begin());
|
|
|
|
llvm::Type *Tys[2] = { VTy, Ops[2]->getType() };
|
2014-05-24 20:52:07 +08:00
|
|
|
return EmitNeonCall(CGM.getIntrinsic(Intrinsic::aarch64_neon_st2, Tys),
|
2014-03-29 23:09:45 +08:00
|
|
|
Ops, "");
|
|
|
|
}
|
|
|
|
case NEON::BI__builtin_neon_vst2_lane_v:
|
|
|
|
case NEON::BI__builtin_neon_vst2q_lane_v: {
|
|
|
|
Ops.push_back(Ops[0]);
|
|
|
|
Ops.erase(Ops.begin());
|
2015-07-28 23:40:11 +08:00
|
|
|
Ops[2] = Builder.CreateZExt(Ops[2], Int64Ty);
|
2014-03-29 23:09:45 +08:00
|
|
|
llvm::Type *Tys[2] = { VTy, Ops[3]->getType() };
|
2014-05-24 20:52:07 +08:00
|
|
|
return EmitNeonCall(CGM.getIntrinsic(Intrinsic::aarch64_neon_st2lane, Tys),
|
2014-03-29 23:09:45 +08:00
|
|
|
Ops, "");
|
|
|
|
}
|
|
|
|
case NEON::BI__builtin_neon_vst3_v:
|
|
|
|
case NEON::BI__builtin_neon_vst3q_v: {
|
|
|
|
Ops.push_back(Ops[0]);
|
|
|
|
Ops.erase(Ops.begin());
|
|
|
|
llvm::Type *Tys[2] = { VTy, Ops[3]->getType() };
|
2014-05-24 20:52:07 +08:00
|
|
|
return EmitNeonCall(CGM.getIntrinsic(Intrinsic::aarch64_neon_st3, Tys),
|
2014-03-29 23:09:45 +08:00
|
|
|
Ops, "");
|
|
|
|
}
|
|
|
|
case NEON::BI__builtin_neon_vst3_lane_v:
|
|
|
|
case NEON::BI__builtin_neon_vst3q_lane_v: {
|
|
|
|
Ops.push_back(Ops[0]);
|
|
|
|
Ops.erase(Ops.begin());
|
2015-07-28 23:40:11 +08:00
|
|
|
Ops[3] = Builder.CreateZExt(Ops[3], Int64Ty);
|
2014-03-29 23:09:45 +08:00
|
|
|
llvm::Type *Tys[2] = { VTy, Ops[4]->getType() };
|
2014-05-24 20:52:07 +08:00
|
|
|
return EmitNeonCall(CGM.getIntrinsic(Intrinsic::aarch64_neon_st3lane, Tys),
|
2014-03-29 23:09:45 +08:00
|
|
|
Ops, "");
|
|
|
|
}
|
|
|
|
case NEON::BI__builtin_neon_vst4_v:
|
|
|
|
case NEON::BI__builtin_neon_vst4q_v: {
|
|
|
|
Ops.push_back(Ops[0]);
|
|
|
|
Ops.erase(Ops.begin());
|
|
|
|
llvm::Type *Tys[2] = { VTy, Ops[4]->getType() };
|
2014-05-24 20:52:07 +08:00
|
|
|
return EmitNeonCall(CGM.getIntrinsic(Intrinsic::aarch64_neon_st4, Tys),
|
2014-03-29 23:09:45 +08:00
|
|
|
Ops, "");
|
|
|
|
}
|
|
|
|
case NEON::BI__builtin_neon_vst4_lane_v:
|
|
|
|
case NEON::BI__builtin_neon_vst4q_lane_v: {
|
|
|
|
Ops.push_back(Ops[0]);
|
|
|
|
Ops.erase(Ops.begin());
|
2015-07-28 23:40:11 +08:00
|
|
|
Ops[4] = Builder.CreateZExt(Ops[4], Int64Ty);
|
2014-03-29 23:09:45 +08:00
|
|
|
llvm::Type *Tys[2] = { VTy, Ops[5]->getType() };
|
2014-05-24 20:52:07 +08:00
|
|
|
return EmitNeonCall(CGM.getIntrinsic(Intrinsic::aarch64_neon_st4lane, Tys),
|
2014-03-29 23:09:45 +08:00
|
|
|
Ops, "");
|
|
|
|
}
|
|
|
|
case NEON::BI__builtin_neon_vtrn_v:
|
|
|
|
case NEON::BI__builtin_neon_vtrnq_v: {
|
|
|
|
Ops[0] = Builder.CreateBitCast(Ops[0], llvm::PointerType::getUnqual(Ty));
|
|
|
|
Ops[1] = Builder.CreateBitCast(Ops[1], Ty);
|
|
|
|
Ops[2] = Builder.CreateBitCast(Ops[2], Ty);
|
2014-05-21 13:09:00 +08:00
|
|
|
Value *SV = nullptr;
|
2014-03-29 23:09:45 +08:00
|
|
|
|
|
|
|
for (unsigned vi = 0; vi != 2; ++vi) {
|
|
|
|
SmallVector<Constant*, 16> Indices;
|
|
|
|
for (unsigned i = 0, e = VTy->getNumElements(); i != e; i += 2) {
|
|
|
|
Indices.push_back(ConstantInt::get(Int32Ty, i+vi));
|
|
|
|
Indices.push_back(ConstantInt::get(Int32Ty, i+e+vi));
|
|
|
|
}
|
2015-04-04 23:12:29 +08:00
|
|
|
Value *Addr = Builder.CreateConstInBoundsGEP1_32(Ty, Ops[0], vi);
|
2014-03-29 23:09:45 +08:00
|
|
|
SV = llvm::ConstantVector::get(Indices);
|
|
|
|
SV = Builder.CreateShuffleVector(Ops[1], Ops[2], SV, "vtrn");
|
Compute and preserve alignment more faithfully in IR-generation.
Introduce an Address type to bundle a pointer value with an
alignment. Introduce APIs on CGBuilderTy to work with Address
values. Change core APIs on CGF/CGM to traffic in Address where
appropriate. Require alignments to be non-zero. Update a ton
of code to compute and propagate alignment information.
As part of this, I've promoted CGBuiltin's EmitPointerWithAlignment
helper function to CGF and made use of it in a number of places in
the expression emitter.
The end result is that we should now be significantly more correct
when performing operations on objects that are locally known to
be under-aligned. Since alignment is not reliably tracked in the
type system, there are inherent limits to this, but at least we
are no longer confused by standard operations like derived-to-base
conversions and array-to-pointer decay. I've also fixed a large
number of bugs where we were applying the complete-object alignment
to a pointer instead of the non-virtual alignment, although most of
these were hidden by the very conservative approach we took with
member alignment.
Also, because IRGen now reliably asserts on zero alignments, we
should no longer be subject to an absurd but frustrating recurring
bug where an incomplete type would report a zero alignment and then
we'd naively do a alignmentAtOffset on it and emit code using an
alignment equal to the largest power-of-two factor of the offset.
We should also now be emitting much more aggressive alignment
attributes in the presence of over-alignment. In particular,
field access now uses alignmentAtOffset instead of min.
Several times in this patch, I had to change the existing
code-generation pattern in order to more effectively use
the Address APIs. For the most part, this seems to be a strict
improvement, like doing pointer arithmetic with GEPs instead of
ptrtoint. That said, I've tried very hard to not change semantics,
but it is likely that I've failed in a few places, for which I
apologize.
ABIArgInfo now always carries the assumed alignment of indirect and
indirect byval arguments. In order to cut down on what was already
a dauntingly large patch, I changed the code to never set align
attributes in the IR on non-byval indirect arguments. That is,
we still generate code which assumes that indirect arguments have
the given alignment, but we don't express this information to the
backend except where it's semantically required (i.e. on byvals).
This is likely a minor regression for those targets that did provide
this information, but it'll be trivial to add it back in a later
patch.
I partially punted on applying this work to CGBuiltin. Please
do not add more uses of the CreateDefaultAligned{Load,Store}
APIs; they will be going away eventually.
llvm-svn: 246985
2015-09-08 16:05:57 +08:00
|
|
|
SV = Builder.CreateDefaultAlignedStore(SV, Addr);
|
2014-03-29 23:09:45 +08:00
|
|
|
}
|
|
|
|
return SV;
|
|
|
|
}
|
|
|
|
case NEON::BI__builtin_neon_vuzp_v:
|
|
|
|
case NEON::BI__builtin_neon_vuzpq_v: {
|
|
|
|
Ops[0] = Builder.CreateBitCast(Ops[0], llvm::PointerType::getUnqual(Ty));
|
|
|
|
Ops[1] = Builder.CreateBitCast(Ops[1], Ty);
|
|
|
|
Ops[2] = Builder.CreateBitCast(Ops[2], Ty);
|
2014-05-21 13:09:00 +08:00
|
|
|
Value *SV = nullptr;
|
2014-03-29 23:09:45 +08:00
|
|
|
|
|
|
|
for (unsigned vi = 0; vi != 2; ++vi) {
|
|
|
|
SmallVector<Constant*, 16> Indices;
|
|
|
|
for (unsigned i = 0, e = VTy->getNumElements(); i != e; ++i)
|
|
|
|
Indices.push_back(ConstantInt::get(Int32Ty, 2*i+vi));
|
|
|
|
|
2015-04-04 23:12:29 +08:00
|
|
|
Value *Addr = Builder.CreateConstInBoundsGEP1_32(Ty, Ops[0], vi);
|
2014-03-29 23:09:45 +08:00
|
|
|
SV = llvm::ConstantVector::get(Indices);
|
|
|
|
SV = Builder.CreateShuffleVector(Ops[1], Ops[2], SV, "vuzp");
|
Compute and preserve alignment more faithfully in IR-generation.
Introduce an Address type to bundle a pointer value with an
alignment. Introduce APIs on CGBuilderTy to work with Address
values. Change core APIs on CGF/CGM to traffic in Address where
appropriate. Require alignments to be non-zero. Update a ton
of code to compute and propagate alignment information.
As part of this, I've promoted CGBuiltin's EmitPointerWithAlignment
helper function to CGF and made use of it in a number of places in
the expression emitter.
The end result is that we should now be significantly more correct
when performing operations on objects that are locally known to
be under-aligned. Since alignment is not reliably tracked in the
type system, there are inherent limits to this, but at least we
are no longer confused by standard operations like derived-to-base
conversions and array-to-pointer decay. I've also fixed a large
number of bugs where we were applying the complete-object alignment
to a pointer instead of the non-virtual alignment, although most of
these were hidden by the very conservative approach we took with
member alignment.
Also, because IRGen now reliably asserts on zero alignments, we
should no longer be subject to an absurd but frustrating recurring
bug where an incomplete type would report a zero alignment and then
we'd naively do a alignmentAtOffset on it and emit code using an
alignment equal to the largest power-of-two factor of the offset.
We should also now be emitting much more aggressive alignment
attributes in the presence of over-alignment. In particular,
field access now uses alignmentAtOffset instead of min.
Several times in this patch, I had to change the existing
code-generation pattern in order to more effectively use
the Address APIs. For the most part, this seems to be a strict
improvement, like doing pointer arithmetic with GEPs instead of
ptrtoint. That said, I've tried very hard to not change semantics,
but it is likely that I've failed in a few places, for which I
apologize.
ABIArgInfo now always carries the assumed alignment of indirect and
indirect byval arguments. In order to cut down on what was already
a dauntingly large patch, I changed the code to never set align
attributes in the IR on non-byval indirect arguments. That is,
we still generate code which assumes that indirect arguments have
the given alignment, but we don't express this information to the
backend except where it's semantically required (i.e. on byvals).
This is likely a minor regression for those targets that did provide
this information, but it'll be trivial to add it back in a later
patch.
I partially punted on applying this work to CGBuiltin. Please
do not add more uses of the CreateDefaultAligned{Load,Store}
APIs; they will be going away eventually.
llvm-svn: 246985
2015-09-08 16:05:57 +08:00
|
|
|
SV = Builder.CreateDefaultAlignedStore(SV, Addr);
|
2014-03-29 23:09:45 +08:00
|
|
|
}
|
|
|
|
return SV;
|
|
|
|
}
|
|
|
|
case NEON::BI__builtin_neon_vzip_v:
|
|
|
|
case NEON::BI__builtin_neon_vzipq_v: {
|
|
|
|
Ops[0] = Builder.CreateBitCast(Ops[0], llvm::PointerType::getUnqual(Ty));
|
|
|
|
Ops[1] = Builder.CreateBitCast(Ops[1], Ty);
|
|
|
|
Ops[2] = Builder.CreateBitCast(Ops[2], Ty);
|
2014-05-21 13:09:00 +08:00
|
|
|
Value *SV = nullptr;
|
2014-03-29 23:09:45 +08:00
|
|
|
|
|
|
|
for (unsigned vi = 0; vi != 2; ++vi) {
|
|
|
|
SmallVector<Constant*, 16> Indices;
|
|
|
|
for (unsigned i = 0, e = VTy->getNumElements(); i != e; i += 2) {
|
|
|
|
Indices.push_back(ConstantInt::get(Int32Ty, (i + vi*e) >> 1));
|
|
|
|
Indices.push_back(ConstantInt::get(Int32Ty, ((i + vi*e) >> 1)+e));
|
|
|
|
}
|
2015-04-04 23:12:29 +08:00
|
|
|
Value *Addr = Builder.CreateConstInBoundsGEP1_32(Ty, Ops[0], vi);
|
2014-03-29 23:09:45 +08:00
|
|
|
SV = llvm::ConstantVector::get(Indices);
|
|
|
|
SV = Builder.CreateShuffleVector(Ops[1], Ops[2], SV, "vzip");
|
Compute and preserve alignment more faithfully in IR-generation.
Introduce an Address type to bundle a pointer value with an
alignment. Introduce APIs on CGBuilderTy to work with Address
values. Change core APIs on CGF/CGM to traffic in Address where
appropriate. Require alignments to be non-zero. Update a ton
of code to compute and propagate alignment information.
As part of this, I've promoted CGBuiltin's EmitPointerWithAlignment
helper function to CGF and made use of it in a number of places in
the expression emitter.
The end result is that we should now be significantly more correct
when performing operations on objects that are locally known to
be under-aligned. Since alignment is not reliably tracked in the
type system, there are inherent limits to this, but at least we
are no longer confused by standard operations like derived-to-base
conversions and array-to-pointer decay. I've also fixed a large
number of bugs where we were applying the complete-object alignment
to a pointer instead of the non-virtual alignment, although most of
these were hidden by the very conservative approach we took with
member alignment.
Also, because IRGen now reliably asserts on zero alignments, we
should no longer be subject to an absurd but frustrating recurring
bug where an incomplete type would report a zero alignment and then
we'd naively do a alignmentAtOffset on it and emit code using an
alignment equal to the largest power-of-two factor of the offset.
We should also now be emitting much more aggressive alignment
attributes in the presence of over-alignment. In particular,
field access now uses alignmentAtOffset instead of min.
Several times in this patch, I had to change the existing
code-generation pattern in order to more effectively use
the Address APIs. For the most part, this seems to be a strict
improvement, like doing pointer arithmetic with GEPs instead of
ptrtoint. That said, I've tried very hard to not change semantics,
but it is likely that I've failed in a few places, for which I
apologize.
ABIArgInfo now always carries the assumed alignment of indirect and
indirect byval arguments. In order to cut down on what was already
a dauntingly large patch, I changed the code to never set align
attributes in the IR on non-byval indirect arguments. That is,
we still generate code which assumes that indirect arguments have
the given alignment, but we don't express this information to the
backend except where it's semantically required (i.e. on byvals).
This is likely a minor regression for those targets that did provide
this information, but it'll be trivial to add it back in a later
patch.
I partially punted on applying this work to CGBuiltin. Please
do not add more uses of the CreateDefaultAligned{Load,Store}
APIs; they will be going away eventually.
llvm-svn: 246985
2015-09-08 16:05:57 +08:00
|
|
|
SV = Builder.CreateDefaultAlignedStore(SV, Addr);
|
2014-03-29 23:09:45 +08:00
|
|
|
}
|
|
|
|
return SV;
|
|
|
|
}
|
|
|
|
case NEON::BI__builtin_neon_vqtbl1q_v: {
|
2014-05-24 20:52:07 +08:00
|
|
|
return EmitNeonCall(CGM.getIntrinsic(Intrinsic::aarch64_neon_tbl1, Ty),
|
2014-03-29 23:09:45 +08:00
|
|
|
Ops, "vtbl1");
|
|
|
|
}
|
|
|
|
case NEON::BI__builtin_neon_vqtbl2q_v: {
|
2014-05-24 20:52:07 +08:00
|
|
|
return EmitNeonCall(CGM.getIntrinsic(Intrinsic::aarch64_neon_tbl2, Ty),
|
2014-03-29 23:09:45 +08:00
|
|
|
Ops, "vtbl2");
|
|
|
|
}
|
|
|
|
case NEON::BI__builtin_neon_vqtbl3q_v: {
|
2014-05-24 20:52:07 +08:00
|
|
|
return EmitNeonCall(CGM.getIntrinsic(Intrinsic::aarch64_neon_tbl3, Ty),
|
2014-03-29 23:09:45 +08:00
|
|
|
Ops, "vtbl3");
|
|
|
|
}
|
|
|
|
case NEON::BI__builtin_neon_vqtbl4q_v: {
|
2014-05-24 20:52:07 +08:00
|
|
|
return EmitNeonCall(CGM.getIntrinsic(Intrinsic::aarch64_neon_tbl4, Ty),
|
2014-03-29 23:09:45 +08:00
|
|
|
Ops, "vtbl4");
|
|
|
|
}
|
|
|
|
case NEON::BI__builtin_neon_vqtbx1q_v: {
|
2014-05-24 20:52:07 +08:00
|
|
|
return EmitNeonCall(CGM.getIntrinsic(Intrinsic::aarch64_neon_tbx1, Ty),
|
2014-03-29 23:09:45 +08:00
|
|
|
Ops, "vtbx1");
|
|
|
|
}
|
|
|
|
case NEON::BI__builtin_neon_vqtbx2q_v: {
|
2014-05-24 20:52:07 +08:00
|
|
|
return EmitNeonCall(CGM.getIntrinsic(Intrinsic::aarch64_neon_tbx2, Ty),
|
2014-03-29 23:09:45 +08:00
|
|
|
Ops, "vtbx2");
|
|
|
|
}
|
|
|
|
case NEON::BI__builtin_neon_vqtbx3q_v: {
|
2014-05-24 20:52:07 +08:00
|
|
|
return EmitNeonCall(CGM.getIntrinsic(Intrinsic::aarch64_neon_tbx3, Ty),
|
2014-03-29 23:09:45 +08:00
|
|
|
Ops, "vtbx3");
|
|
|
|
}
|
|
|
|
case NEON::BI__builtin_neon_vqtbx4q_v: {
|
2014-05-24 20:52:07 +08:00
|
|
|
return EmitNeonCall(CGM.getIntrinsic(Intrinsic::aarch64_neon_tbx4, Ty),
|
2014-03-29 23:09:45 +08:00
|
|
|
Ops, "vtbx4");
|
|
|
|
}
|
|
|
|
case NEON::BI__builtin_neon_vsqadd_v:
|
|
|
|
case NEON::BI__builtin_neon_vsqaddq_v: {
|
2014-05-24 20:52:07 +08:00
|
|
|
Int = Intrinsic::aarch64_neon_usqadd;
|
2014-03-29 23:09:45 +08:00
|
|
|
return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vsqadd");
|
|
|
|
}
|
|
|
|
case NEON::BI__builtin_neon_vuqadd_v:
|
|
|
|
case NEON::BI__builtin_neon_vuqaddq_v: {
|
2014-05-24 20:52:07 +08:00
|
|
|
Int = Intrinsic::aarch64_neon_suqadd;
|
2014-03-29 23:09:45 +08:00
|
|
|
return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vuqadd");
|
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2010-10-09 16:47:25 +08:00
|
|
|
llvm::Value *CodeGenFunction::
|
2012-02-22 17:30:11 +08:00
|
|
|
BuildVector(ArrayRef<llvm::Value*> Ops) {
|
2010-10-09 16:47:25 +08:00
|
|
|
assert((Ops.size() & (Ops.size() - 1)) == 0 &&
|
|
|
|
"Not a power-of-two sized vector!");
|
|
|
|
bool AllConstants = true;
|
|
|
|
for (unsigned i = 0, e = Ops.size(); i != e && AllConstants; ++i)
|
|
|
|
AllConstants &= isa<Constant>(Ops[i]);
|
|
|
|
|
|
|
|
// If this is a constant vector, create a ConstantVector.
|
|
|
|
if (AllConstants) {
|
2012-01-25 13:34:41 +08:00
|
|
|
SmallVector<llvm::Constant*, 16> CstOps;
|
2010-10-09 16:47:25 +08:00
|
|
|
for (unsigned i = 0, e = Ops.size(); i != e; ++i)
|
|
|
|
CstOps.push_back(cast<Constant>(Ops[i]));
|
|
|
|
return llvm::ConstantVector::get(CstOps);
|
|
|
|
}
|
|
|
|
|
|
|
|
// Otherwise, insertelement the values to build the vector.
|
|
|
|
Value *Result =
|
|
|
|
llvm::UndefValue::get(llvm::VectorType::get(Ops[0]->getType(), Ops.size()));
|
|
|
|
|
|
|
|
for (unsigned i = 0, e = Ops.size(); i != e; ++i)
|
2012-01-25 13:34:41 +08:00
|
|
|
Result = Builder.CreateInsertElement(Result, Ops[i], Builder.getInt32(i));
|
2010-10-09 16:47:25 +08:00
|
|
|
|
|
|
|
return Result;
|
|
|
|
}
|
|
|
|
|
2009-09-09 23:08:12 +08:00
|
|
|
Value *CodeGenFunction::EmitX86BuiltinExpr(unsigned BuiltinID,
|
2007-12-13 15:34:23 +08:00
|
|
|
const CallExpr *E) {
|
2015-09-18 04:55:33 +08:00
|
|
|
if (BuiltinID == X86::BI__builtin_ms_va_start ||
|
|
|
|
BuiltinID == X86::BI__builtin_ms_va_end)
|
|
|
|
return EmitVAStartEnd(EmitMSVAListRef(E->getArg(0)).getPointer(),
|
|
|
|
BuiltinID == X86::BI__builtin_ms_va_start);
|
|
|
|
if (BuiltinID == X86::BI__builtin_ms_va_copy) {
|
|
|
|
// Lower this manually. We can't reliably determine whether or not any
|
|
|
|
// given va_copy() is for a Win64 va_list from the calling convention
|
|
|
|
// alone, because it's legal to do this from a System V ABI function.
|
|
|
|
// With opaque pointer types, we won't have enough information in LLVM
|
|
|
|
// IR to determine this from the argument types, either. Best to do it
|
|
|
|
// now, while we have enough information.
|
|
|
|
Address DestAddr = EmitMSVAListRef(E->getArg(0));
|
|
|
|
Address SrcAddr = EmitMSVAListRef(E->getArg(1));
|
|
|
|
|
|
|
|
llvm::Type *BPP = Int8PtrPtrTy;
|
|
|
|
|
|
|
|
DestAddr = Address(Builder.CreateBitCast(DestAddr.getPointer(), BPP, "cp"),
|
|
|
|
DestAddr.getAlignment());
|
|
|
|
SrcAddr = Address(Builder.CreateBitCast(SrcAddr.getPointer(), BPP, "ap"),
|
|
|
|
SrcAddr.getAlignment());
|
|
|
|
|
|
|
|
Value *ArgPtr = Builder.CreateLoad(SrcAddr, "ap.val");
|
|
|
|
return Builder.CreateStore(ArgPtr, DestAddr);
|
|
|
|
}
|
|
|
|
|
2011-07-23 18:55:15 +08:00
|
|
|
SmallVector<Value*, 4> Ops;
|
2007-12-15 01:48:24 +08:00
|
|
|
|
2010-10-02 08:09:12 +08:00
|
|
|
// Find out if any arguments are required to be integer constant expressions.
|
|
|
|
unsigned ICEArguments = 0;
|
|
|
|
ASTContext::GetBuiltinTypeError Error;
|
|
|
|
getContext().GetBuiltinType(BuiltinID, Error, &ICEArguments);
|
|
|
|
assert(Error == ASTContext::GE_None && "Should not codegen an error");
|
|
|
|
|
|
|
|
for (unsigned i = 0, e = E->getNumArgs(); i != e; i++) {
|
|
|
|
// If this is a normal argument, just emit it as a scalar.
|
|
|
|
if ((ICEArguments & (1 << i)) == 0) {
|
|
|
|
Ops.push_back(EmitScalarExpr(E->getArg(i)));
|
|
|
|
continue;
|
|
|
|
}
|
|
|
|
|
|
|
|
// If this is required to be a constant, constant fold it so that we know
|
|
|
|
// that the generated intrinsic gets a ConstantInt.
|
|
|
|
llvm::APSInt Result;
|
|
|
|
bool IsConst = E->getArg(i)->isIntegerConstantExpr(Result, getContext());
|
|
|
|
assert(IsConst && "Constant arg isn't actually constant?"); (void)IsConst;
|
2011-02-08 16:22:06 +08:00
|
|
|
Ops.push_back(llvm::ConstantInt::get(getLLVMContext(), Result));
|
2010-10-02 08:09:12 +08:00
|
|
|
}
|
2007-12-15 01:48:24 +08:00
|
|
|
|
2007-12-10 07:17:02 +08:00
|
|
|
switch (BuiltinID) {
|
2014-05-21 13:09:00 +08:00
|
|
|
default: return nullptr;
|
2015-06-30 05:00:05 +08:00
|
|
|
case X86::BI__builtin_cpu_supports: {
|
|
|
|
const Expr *FeatureExpr = E->getArg(0)->IgnoreParenCasts();
|
|
|
|
StringRef FeatureStr = cast<StringLiteral>(FeatureExpr)->getString();
|
|
|
|
|
|
|
|
// TODO: When/if this becomes more than x86 specific then use a TargetInfo
|
|
|
|
// based mapping.
|
|
|
|
// Processor features and mapping to processor feature value.
|
|
|
|
enum X86Features {
|
|
|
|
CMOV = 0,
|
|
|
|
MMX,
|
|
|
|
POPCNT,
|
|
|
|
SSE,
|
|
|
|
SSE2,
|
|
|
|
SSE3,
|
|
|
|
SSSE3,
|
|
|
|
SSE4_1,
|
|
|
|
SSE4_2,
|
|
|
|
AVX,
|
|
|
|
AVX2,
|
|
|
|
SSE4_A,
|
|
|
|
FMA4,
|
|
|
|
XOP,
|
|
|
|
FMA,
|
|
|
|
AVX512F,
|
|
|
|
BMI,
|
|
|
|
BMI2,
|
|
|
|
MAX
|
|
|
|
};
|
|
|
|
|
|
|
|
X86Features Feature = StringSwitch<X86Features>(FeatureStr)
|
|
|
|
.Case("cmov", X86Features::CMOV)
|
|
|
|
.Case("mmx", X86Features::MMX)
|
|
|
|
.Case("popcnt", X86Features::POPCNT)
|
|
|
|
.Case("sse", X86Features::SSE)
|
|
|
|
.Case("sse2", X86Features::SSE2)
|
|
|
|
.Case("sse3", X86Features::SSE3)
|
|
|
|
.Case("sse4.1", X86Features::SSE4_1)
|
|
|
|
.Case("sse4.2", X86Features::SSE4_2)
|
|
|
|
.Case("avx", X86Features::AVX)
|
|
|
|
.Case("avx2", X86Features::AVX2)
|
|
|
|
.Case("sse4a", X86Features::SSE4_A)
|
|
|
|
.Case("fma4", X86Features::FMA4)
|
|
|
|
.Case("xop", X86Features::XOP)
|
|
|
|
.Case("fma", X86Features::FMA)
|
|
|
|
.Case("avx512f", X86Features::AVX512F)
|
|
|
|
.Case("bmi", X86Features::BMI)
|
|
|
|
.Case("bmi2", X86Features::BMI2)
|
|
|
|
.Default(X86Features::MAX);
|
|
|
|
assert(Feature != X86Features::MAX && "Invalid feature!");
|
|
|
|
|
|
|
|
// Matching the struct layout from the compiler-rt/libgcc structure that is
|
|
|
|
// filled in:
|
|
|
|
// unsigned int __cpu_vendor;
|
|
|
|
// unsigned int __cpu_type;
|
|
|
|
// unsigned int __cpu_subtype;
|
|
|
|
// unsigned int __cpu_features[1];
|
|
|
|
llvm::Type *STy = llvm::StructType::get(
|
|
|
|
Int32Ty, Int32Ty, Int32Ty, llvm::ArrayType::get(Int32Ty, 1), nullptr);
|
|
|
|
|
|
|
|
// Grab the global __cpu_model.
|
|
|
|
llvm::Constant *CpuModel = CGM.CreateRuntimeVariable(STy, "__cpu_model");
|
|
|
|
|
|
|
|
// Grab the first (0th) element from the field __cpu_features off of the
|
|
|
|
// global in the struct STy.
|
|
|
|
Value *Idxs[] = {
|
|
|
|
ConstantInt::get(Int32Ty, 0),
|
|
|
|
ConstantInt::get(Int32Ty, 3),
|
|
|
|
ConstantInt::get(Int32Ty, 0)
|
|
|
|
};
|
|
|
|
Value *CpuFeatures = Builder.CreateGEP(STy, CpuModel, Idxs);
|
Compute and preserve alignment more faithfully in IR-generation.
Introduce an Address type to bundle a pointer value with an
alignment. Introduce APIs on CGBuilderTy to work with Address
values. Change core APIs on CGF/CGM to traffic in Address where
appropriate. Require alignments to be non-zero. Update a ton
of code to compute and propagate alignment information.
As part of this, I've promoted CGBuiltin's EmitPointerWithAlignment
helper function to CGF and made use of it in a number of places in
the expression emitter.
The end result is that we should now be significantly more correct
when performing operations on objects that are locally known to
be under-aligned. Since alignment is not reliably tracked in the
type system, there are inherent limits to this, but at least we
are no longer confused by standard operations like derived-to-base
conversions and array-to-pointer decay. I've also fixed a large
number of bugs where we were applying the complete-object alignment
to a pointer instead of the non-virtual alignment, although most of
these were hidden by the very conservative approach we took with
member alignment.
Also, because IRGen now reliably asserts on zero alignments, we
should no longer be subject to an absurd but frustrating recurring
bug where an incomplete type would report a zero alignment and then
we'd naively do a alignmentAtOffset on it and emit code using an
alignment equal to the largest power-of-two factor of the offset.
We should also now be emitting much more aggressive alignment
attributes in the presence of over-alignment. In particular,
field access now uses alignmentAtOffset instead of min.
Several times in this patch, I had to change the existing
code-generation pattern in order to more effectively use
the Address APIs. For the most part, this seems to be a strict
improvement, like doing pointer arithmetic with GEPs instead of
ptrtoint. That said, I've tried very hard to not change semantics,
but it is likely that I've failed in a few places, for which I
apologize.
ABIArgInfo now always carries the assumed alignment of indirect and
indirect byval arguments. In order to cut down on what was already
a dauntingly large patch, I changed the code to never set align
attributes in the IR on non-byval indirect arguments. That is,
we still generate code which assumes that indirect arguments have
the given alignment, but we don't express this information to the
backend except where it's semantically required (i.e. on byvals).
This is likely a minor regression for those targets that did provide
this information, but it'll be trivial to add it back in a later
patch.
I partially punted on applying this work to CGBuiltin. Please
do not add more uses of the CreateDefaultAligned{Load,Store}
APIs; they will be going away eventually.
llvm-svn: 246985
2015-09-08 16:05:57 +08:00
|
|
|
Value *Features = Builder.CreateAlignedLoad(CpuFeatures,
|
|
|
|
CharUnits::fromQuantity(4));
|
2015-06-30 05:00:05 +08:00
|
|
|
|
|
|
|
// Check the value of the bit corresponding to the feature requested.
|
|
|
|
Value *Bitset = Builder.CreateAnd(
|
|
|
|
Features, llvm::ConstantInt::get(Int32Ty, 1 << Feature));
|
|
|
|
return Builder.CreateICmpNE(Bitset, llvm::ConstantInt::get(Int32Ty, 0));
|
|
|
|
}
|
2014-02-22 07:08:53 +08:00
|
|
|
case X86::BI_mm_prefetch: {
|
Compute and preserve alignment more faithfully in IR-generation.
Introduce an Address type to bundle a pointer value with an
alignment. Introduce APIs on CGBuilderTy to work with Address
values. Change core APIs on CGF/CGM to traffic in Address where
appropriate. Require alignments to be non-zero. Update a ton
of code to compute and propagate alignment information.
As part of this, I've promoted CGBuiltin's EmitPointerWithAlignment
helper function to CGF and made use of it in a number of places in
the expression emitter.
The end result is that we should now be significantly more correct
when performing operations on objects that are locally known to
be under-aligned. Since alignment is not reliably tracked in the
type system, there are inherent limits to this, but at least we
are no longer confused by standard operations like derived-to-base
conversions and array-to-pointer decay. I've also fixed a large
number of bugs where we were applying the complete-object alignment
to a pointer instead of the non-virtual alignment, although most of
these were hidden by the very conservative approach we took with
member alignment.
Also, because IRGen now reliably asserts on zero alignments, we
should no longer be subject to an absurd but frustrating recurring
bug where an incomplete type would report a zero alignment and then
we'd naively do a alignmentAtOffset on it and emit code using an
alignment equal to the largest power-of-two factor of the offset.
We should also now be emitting much more aggressive alignment
attributes in the presence of over-alignment. In particular,
field access now uses alignmentAtOffset instead of min.
Several times in this patch, I had to change the existing
code-generation pattern in order to more effectively use
the Address APIs. For the most part, this seems to be a strict
improvement, like doing pointer arithmetic with GEPs instead of
ptrtoint. That said, I've tried very hard to not change semantics,
but it is likely that I've failed in a few places, for which I
apologize.
ABIArgInfo now always carries the assumed alignment of indirect and
indirect byval arguments. In order to cut down on what was already
a dauntingly large patch, I changed the code to never set align
attributes in the IR on non-byval indirect arguments. That is,
we still generate code which assumes that indirect arguments have
the given alignment, but we don't express this information to the
backend except where it's semantically required (i.e. on byvals).
This is likely a minor regression for those targets that did provide
this information, but it'll be trivial to add it back in a later
patch.
I partially punted on applying this work to CGBuiltin. Please
do not add more uses of the CreateDefaultAligned{Load,Store}
APIs; they will be going away eventually.
llvm-svn: 246985
2015-09-08 16:05:57 +08:00
|
|
|
Value *Address = Ops[0];
|
2014-02-22 07:08:53 +08:00
|
|
|
Value *RW = ConstantInt::get(Int32Ty, 0);
|
Compute and preserve alignment more faithfully in IR-generation.
Introduce an Address type to bundle a pointer value with an
alignment. Introduce APIs on CGBuilderTy to work with Address
values. Change core APIs on CGF/CGM to traffic in Address where
appropriate. Require alignments to be non-zero. Update a ton
of code to compute and propagate alignment information.
As part of this, I've promoted CGBuiltin's EmitPointerWithAlignment
helper function to CGF and made use of it in a number of places in
the expression emitter.
The end result is that we should now be significantly more correct
when performing operations on objects that are locally known to
be under-aligned. Since alignment is not reliably tracked in the
type system, there are inherent limits to this, but at least we
are no longer confused by standard operations like derived-to-base
conversions and array-to-pointer decay. I've also fixed a large
number of bugs where we were applying the complete-object alignment
to a pointer instead of the non-virtual alignment, although most of
these were hidden by the very conservative approach we took with
member alignment.
Also, because IRGen now reliably asserts on zero alignments, we
should no longer be subject to an absurd but frustrating recurring
bug where an incomplete type would report a zero alignment and then
we'd naively do a alignmentAtOffset on it and emit code using an
alignment equal to the largest power-of-two factor of the offset.
We should also now be emitting much more aggressive alignment
attributes in the presence of over-alignment. In particular,
field access now uses alignmentAtOffset instead of min.
Several times in this patch, I had to change the existing
code-generation pattern in order to more effectively use
the Address APIs. For the most part, this seems to be a strict
improvement, like doing pointer arithmetic with GEPs instead of
ptrtoint. That said, I've tried very hard to not change semantics,
but it is likely that I've failed in a few places, for which I
apologize.
ABIArgInfo now always carries the assumed alignment of indirect and
indirect byval arguments. In order to cut down on what was already
a dauntingly large patch, I changed the code to never set align
attributes in the IR on non-byval indirect arguments. That is,
we still generate code which assumes that indirect arguments have
the given alignment, but we don't express this information to the
backend except where it's semantically required (i.e. on byvals).
This is likely a minor regression for those targets that did provide
this information, but it'll be trivial to add it back in a later
patch.
I partially punted on applying this work to CGBuiltin. Please
do not add more uses of the CreateDefaultAligned{Load,Store}
APIs; they will be going away eventually.
llvm-svn: 246985
2015-09-08 16:05:57 +08:00
|
|
|
Value *Locality = Ops[1];
|
2014-02-22 07:08:53 +08:00
|
|
|
Value *Data = ConstantInt::get(Int32Ty, 1);
|
|
|
|
Value *F = CGM.getIntrinsic(Intrinsic::prefetch);
|
2015-05-19 06:14:03 +08:00
|
|
|
return Builder.CreateCall(F, {Address, RW, Locality, Data});
|
2014-02-22 07:08:53 +08:00
|
|
|
}
|
2015-08-27 05:17:12 +08:00
|
|
|
case X86::BI__builtin_ia32_undef128:
|
|
|
|
case X86::BI__builtin_ia32_undef256:
|
|
|
|
case X86::BI__builtin_ia32_undef512:
|
|
|
|
return UndefValue::get(ConvertType(E->getType()));
|
2010-10-09 16:47:25 +08:00
|
|
|
case X86::BI__builtin_ia32_vec_init_v8qi:
|
|
|
|
case X86::BI__builtin_ia32_vec_init_v4hi:
|
|
|
|
case X86::BI__builtin_ia32_vec_init_v2si:
|
|
|
|
return Builder.CreateBitCast(BuildVector(Ops),
|
2011-02-08 16:22:06 +08:00
|
|
|
llvm::Type::getX86_MMXTy(getLLVMContext()));
|
2010-10-10 11:19:11 +08:00
|
|
|
case X86::BI__builtin_ia32_vec_ext_v2si:
|
|
|
|
return Builder.CreateExtractElement(Ops[0],
|
|
|
|
llvm::ConstantInt::get(Ops[1]->getType(), 0));
|
2008-04-14 12:49:57 +08:00
|
|
|
case X86::BI__builtin_ia32_ldmxcsr: {
|
Compute and preserve alignment more faithfully in IR-generation.
Introduce an Address type to bundle a pointer value with an
alignment. Introduce APIs on CGBuilderTy to work with Address
values. Change core APIs on CGF/CGM to traffic in Address where
appropriate. Require alignments to be non-zero. Update a ton
of code to compute and propagate alignment information.
As part of this, I've promoted CGBuiltin's EmitPointerWithAlignment
helper function to CGF and made use of it in a number of places in
the expression emitter.
The end result is that we should now be significantly more correct
when performing operations on objects that are locally known to
be under-aligned. Since alignment is not reliably tracked in the
type system, there are inherent limits to this, but at least we
are no longer confused by standard operations like derived-to-base
conversions and array-to-pointer decay. I've also fixed a large
number of bugs where we were applying the complete-object alignment
to a pointer instead of the non-virtual alignment, although most of
these were hidden by the very conservative approach we took with
member alignment.
Also, because IRGen now reliably asserts on zero alignments, we
should no longer be subject to an absurd but frustrating recurring
bug where an incomplete type would report a zero alignment and then
we'd naively do a alignmentAtOffset on it and emit code using an
alignment equal to the largest power-of-two factor of the offset.
We should also now be emitting much more aggressive alignment
attributes in the presence of over-alignment. In particular,
field access now uses alignmentAtOffset instead of min.
Several times in this patch, I had to change the existing
code-generation pattern in order to more effectively use
the Address APIs. For the most part, this seems to be a strict
improvement, like doing pointer arithmetic with GEPs instead of
ptrtoint. That said, I've tried very hard to not change semantics,
but it is likely that I've failed in a few places, for which I
apologize.
ABIArgInfo now always carries the assumed alignment of indirect and
indirect byval arguments. In order to cut down on what was already
a dauntingly large patch, I changed the code to never set align
attributes in the IR on non-byval indirect arguments. That is,
we still generate code which assumes that indirect arguments have
the given alignment, but we don't express this information to the
backend except where it's semantically required (i.e. on byvals).
This is likely a minor regression for those targets that did provide
this information, but it'll be trivial to add it back in a later
patch.
I partially punted on applying this work to CGBuiltin. Please
do not add more uses of the CreateDefaultAligned{Load,Store}
APIs; they will be going away eventually.
llvm-svn: 246985
2015-09-08 16:05:57 +08:00
|
|
|
Address Tmp = CreateMemTemp(E->getArg(0)->getType());
|
2008-04-14 12:49:57 +08:00
|
|
|
Builder.CreateStore(Ops[0], Tmp);
|
|
|
|
return Builder.CreateCall(CGM.getIntrinsic(Intrinsic::x86_sse_ldmxcsr),
|
Compute and preserve alignment more faithfully in IR-generation.
Introduce an Address type to bundle a pointer value with an
alignment. Introduce APIs on CGBuilderTy to work with Address
values. Change core APIs on CGF/CGM to traffic in Address where
appropriate. Require alignments to be non-zero. Update a ton
of code to compute and propagate alignment information.
As part of this, I've promoted CGBuiltin's EmitPointerWithAlignment
helper function to CGF and made use of it in a number of places in
the expression emitter.
The end result is that we should now be significantly more correct
when performing operations on objects that are locally known to
be under-aligned. Since alignment is not reliably tracked in the
type system, there are inherent limits to this, but at least we
are no longer confused by standard operations like derived-to-base
conversions and array-to-pointer decay. I've also fixed a large
number of bugs where we were applying the complete-object alignment
to a pointer instead of the non-virtual alignment, although most of
these were hidden by the very conservative approach we took with
member alignment.
Also, because IRGen now reliably asserts on zero alignments, we
should no longer be subject to an absurd but frustrating recurring
bug where an incomplete type would report a zero alignment and then
we'd naively do a alignmentAtOffset on it and emit code using an
alignment equal to the largest power-of-two factor of the offset.
We should also now be emitting much more aggressive alignment
attributes in the presence of over-alignment. In particular,
field access now uses alignmentAtOffset instead of min.
Several times in this patch, I had to change the existing
code-generation pattern in order to more effectively use
the Address APIs. For the most part, this seems to be a strict
improvement, like doing pointer arithmetic with GEPs instead of
ptrtoint. That said, I've tried very hard to not change semantics,
but it is likely that I've failed in a few places, for which I
apologize.
ABIArgInfo now always carries the assumed alignment of indirect and
indirect byval arguments. In order to cut down on what was already
a dauntingly large patch, I changed the code to never set align
attributes in the IR on non-byval indirect arguments. That is,
we still generate code which assumes that indirect arguments have
the given alignment, but we don't express this information to the
backend except where it's semantically required (i.e. on byvals).
This is likely a minor regression for those targets that did provide
this information, but it'll be trivial to add it back in a later
patch.
I partially punted on applying this work to CGBuiltin. Please
do not add more uses of the CreateDefaultAligned{Load,Store}
APIs; they will be going away eventually.
llvm-svn: 246985
2015-09-08 16:05:57 +08:00
|
|
|
Builder.CreateBitCast(Tmp.getPointer(), Int8PtrTy));
|
2008-04-14 12:49:57 +08:00
|
|
|
}
|
|
|
|
case X86::BI__builtin_ia32_stmxcsr: {
|
Compute and preserve alignment more faithfully in IR-generation.
Introduce an Address type to bundle a pointer value with an
alignment. Introduce APIs on CGBuilderTy to work with Address
values. Change core APIs on CGF/CGM to traffic in Address where
appropriate. Require alignments to be non-zero. Update a ton
of code to compute and propagate alignment information.
As part of this, I've promoted CGBuiltin's EmitPointerWithAlignment
helper function to CGF and made use of it in a number of places in
the expression emitter.
The end result is that we should now be significantly more correct
when performing operations on objects that are locally known to
be under-aligned. Since alignment is not reliably tracked in the
type system, there are inherent limits to this, but at least we
are no longer confused by standard operations like derived-to-base
conversions and array-to-pointer decay. I've also fixed a large
number of bugs where we were applying the complete-object alignment
to a pointer instead of the non-virtual alignment, although most of
these were hidden by the very conservative approach we took with
member alignment.
Also, because IRGen now reliably asserts on zero alignments, we
should no longer be subject to an absurd but frustrating recurring
bug where an incomplete type would report a zero alignment and then
we'd naively do a alignmentAtOffset on it and emit code using an
alignment equal to the largest power-of-two factor of the offset.
We should also now be emitting much more aggressive alignment
attributes in the presence of over-alignment. In particular,
field access now uses alignmentAtOffset instead of min.
Several times in this patch, I had to change the existing
code-generation pattern in order to more effectively use
the Address APIs. For the most part, this seems to be a strict
improvement, like doing pointer arithmetic with GEPs instead of
ptrtoint. That said, I've tried very hard to not change semantics,
but it is likely that I've failed in a few places, for which I
apologize.
ABIArgInfo now always carries the assumed alignment of indirect and
indirect byval arguments. In order to cut down on what was already
a dauntingly large patch, I changed the code to never set align
attributes in the IR on non-byval indirect arguments. That is,
we still generate code which assumes that indirect arguments have
the given alignment, but we don't express this information to the
backend except where it's semantically required (i.e. on byvals).
This is likely a minor regression for those targets that did provide
this information, but it'll be trivial to add it back in a later
patch.
I partially punted on applying this work to CGBuiltin. Please
do not add more uses of the CreateDefaultAligned{Load,Store}
APIs; they will be going away eventually.
llvm-svn: 246985
2015-09-08 16:05:57 +08:00
|
|
|
Address Tmp = CreateMemTemp(E->getType());
|
2011-08-18 05:04:19 +08:00
|
|
|
Builder.CreateCall(CGM.getIntrinsic(Intrinsic::x86_sse_stmxcsr),
|
Compute and preserve alignment more faithfully in IR-generation.
Introduce an Address type to bundle a pointer value with an
alignment. Introduce APIs on CGBuilderTy to work with Address
values. Change core APIs on CGF/CGM to traffic in Address where
appropriate. Require alignments to be non-zero. Update a ton
of code to compute and propagate alignment information.
As part of this, I've promoted CGBuiltin's EmitPointerWithAlignment
helper function to CGF and made use of it in a number of places in
the expression emitter.
The end result is that we should now be significantly more correct
when performing operations on objects that are locally known to
be under-aligned. Since alignment is not reliably tracked in the
type system, there are inherent limits to this, but at least we
are no longer confused by standard operations like derived-to-base
conversions and array-to-pointer decay. I've also fixed a large
number of bugs where we were applying the complete-object alignment
to a pointer instead of the non-virtual alignment, although most of
these were hidden by the very conservative approach we took with
member alignment.
Also, because IRGen now reliably asserts on zero alignments, we
should no longer be subject to an absurd but frustrating recurring
bug where an incomplete type would report a zero alignment and then
we'd naively do a alignmentAtOffset on it and emit code using an
alignment equal to the largest power-of-two factor of the offset.
We should also now be emitting much more aggressive alignment
attributes in the presence of over-alignment. In particular,
field access now uses alignmentAtOffset instead of min.
Several times in this patch, I had to change the existing
code-generation pattern in order to more effectively use
the Address APIs. For the most part, this seems to be a strict
improvement, like doing pointer arithmetic with GEPs instead of
ptrtoint. That said, I've tried very hard to not change semantics,
but it is likely that I've failed in a few places, for which I
apologize.
ABIArgInfo now always carries the assumed alignment of indirect and
indirect byval arguments. In order to cut down on what was already
a dauntingly large patch, I changed the code to never set align
attributes in the IR on non-byval indirect arguments. That is,
we still generate code which assumes that indirect arguments have
the given alignment, but we don't express this information to the
backend except where it's semantically required (i.e. on byvals).
This is likely a minor regression for those targets that did provide
this information, but it'll be trivial to add it back in a later
patch.
I partially punted on applying this work to CGBuiltin. Please
do not add more uses of the CreateDefaultAligned{Load,Store}
APIs; they will be going away eventually.
llvm-svn: 246985
2015-09-08 16:05:57 +08:00
|
|
|
Builder.CreateBitCast(Tmp.getPointer(), Int8PtrTy));
|
2008-04-14 12:49:57 +08:00
|
|
|
return Builder.CreateLoad(Tmp, "stmxcsr");
|
|
|
|
}
|
2015-10-13 20:29:35 +08:00
|
|
|
case X86::BI__builtin_ia32_xsave:
|
|
|
|
case X86::BI__builtin_ia32_xsave64:
|
|
|
|
case X86::BI__builtin_ia32_xrstor:
|
|
|
|
case X86::BI__builtin_ia32_xrstor64:
|
|
|
|
case X86::BI__builtin_ia32_xsaveopt:
|
|
|
|
case X86::BI__builtin_ia32_xsaveopt64:
|
|
|
|
case X86::BI__builtin_ia32_xrstors:
|
|
|
|
case X86::BI__builtin_ia32_xrstors64:
|
|
|
|
case X86::BI__builtin_ia32_xsavec:
|
|
|
|
case X86::BI__builtin_ia32_xsavec64:
|
|
|
|
case X86::BI__builtin_ia32_xsaves:
|
|
|
|
case X86::BI__builtin_ia32_xsaves64: {
|
|
|
|
Intrinsic::ID ID;
|
|
|
|
#define INTRINSIC_X86_XSAVE_ID(NAME) \
|
|
|
|
case X86::BI__builtin_ia32_##NAME: \
|
|
|
|
ID = Intrinsic::x86_##NAME; \
|
|
|
|
break
|
|
|
|
switch (BuiltinID) {
|
|
|
|
default: llvm_unreachable("Unsupported intrinsic!");
|
|
|
|
INTRINSIC_X86_XSAVE_ID(xsave);
|
|
|
|
INTRINSIC_X86_XSAVE_ID(xsave64);
|
|
|
|
INTRINSIC_X86_XSAVE_ID(xrstor);
|
|
|
|
INTRINSIC_X86_XSAVE_ID(xrstor64);
|
|
|
|
INTRINSIC_X86_XSAVE_ID(xsaveopt);
|
|
|
|
INTRINSIC_X86_XSAVE_ID(xsaveopt64);
|
|
|
|
INTRINSIC_X86_XSAVE_ID(xrstors);
|
|
|
|
INTRINSIC_X86_XSAVE_ID(xrstors64);
|
|
|
|
INTRINSIC_X86_XSAVE_ID(xsavec);
|
|
|
|
INTRINSIC_X86_XSAVE_ID(xsavec64);
|
|
|
|
INTRINSIC_X86_XSAVE_ID(xsaves);
|
|
|
|
INTRINSIC_X86_XSAVE_ID(xsaves64);
|
|
|
|
}
|
|
|
|
#undef INTRINSIC_X86_XSAVE_ID
|
|
|
|
Value *Mhi = Builder.CreateTrunc(
|
|
|
|
Builder.CreateLShr(Ops[1], ConstantInt::get(Int64Ty, 32)), Int32Ty);
|
|
|
|
Value *Mlo = Builder.CreateTrunc(Ops[1], Int32Ty);
|
|
|
|
Ops[1] = Mhi;
|
|
|
|
Ops.push_back(Mlo);
|
|
|
|
return Builder.CreateCall(CGM.getIntrinsic(ID), Ops);
|
|
|
|
}
|
2008-04-14 12:49:57 +08:00
|
|
|
case X86::BI__builtin_ia32_storehps:
|
|
|
|
case X86::BI__builtin_ia32_storelps: {
|
2010-06-27 15:15:29 +08:00
|
|
|
llvm::Type *PtrTy = llvm::PointerType::getUnqual(Int64Ty);
|
|
|
|
llvm::Type *VecTy = llvm::VectorType::get(Int64Ty, 2);
|
2009-09-09 23:08:12 +08:00
|
|
|
|
2008-04-14 12:49:57 +08:00
|
|
|
// cast val v2i64
|
|
|
|
Ops[1] = Builder.CreateBitCast(Ops[1], VecTy, "cast");
|
2009-09-09 23:08:12 +08:00
|
|
|
|
2008-04-14 12:49:57 +08:00
|
|
|
// extract (0, 1)
|
|
|
|
unsigned Index = BuiltinID == X86::BI__builtin_ia32_storelps ? 0 : 1;
|
2014-05-31 08:22:12 +08:00
|
|
|
llvm::Value *Idx = llvm::ConstantInt::get(SizeTy, Index);
|
2008-04-14 12:49:57 +08:00
|
|
|
Ops[1] = Builder.CreateExtractElement(Ops[1], Idx, "extract");
|
|
|
|
|
|
|
|
// cast pointer to i64 & store
|
|
|
|
Ops[0] = Builder.CreateBitCast(Ops[0], PtrTy);
|
Compute and preserve alignment more faithfully in IR-generation.
Introduce an Address type to bundle a pointer value with an
alignment. Introduce APIs on CGBuilderTy to work with Address
values. Change core APIs on CGF/CGM to traffic in Address where
appropriate. Require alignments to be non-zero. Update a ton
of code to compute and propagate alignment information.
As part of this, I've promoted CGBuiltin's EmitPointerWithAlignment
helper function to CGF and made use of it in a number of places in
the expression emitter.
The end result is that we should now be significantly more correct
when performing operations on objects that are locally known to
be under-aligned. Since alignment is not reliably tracked in the
type system, there are inherent limits to this, but at least we
are no longer confused by standard operations like derived-to-base
conversions and array-to-pointer decay. I've also fixed a large
number of bugs where we were applying the complete-object alignment
to a pointer instead of the non-virtual alignment, although most of
these were hidden by the very conservative approach we took with
member alignment.
Also, because IRGen now reliably asserts on zero alignments, we
should no longer be subject to an absurd but frustrating recurring
bug where an incomplete type would report a zero alignment and then
we'd naively do a alignmentAtOffset on it and emit code using an
alignment equal to the largest power-of-two factor of the offset.
We should also now be emitting much more aggressive alignment
attributes in the presence of over-alignment. In particular,
field access now uses alignmentAtOffset instead of min.
Several times in this patch, I had to change the existing
code-generation pattern in order to more effectively use
the Address APIs. For the most part, this seems to be a strict
improvement, like doing pointer arithmetic with GEPs instead of
ptrtoint. That said, I've tried very hard to not change semantics,
but it is likely that I've failed in a few places, for which I
apologize.
ABIArgInfo now always carries the assumed alignment of indirect and
indirect byval arguments. In order to cut down on what was already
a dauntingly large patch, I changed the code to never set align
attributes in the IR on non-byval indirect arguments. That is,
we still generate code which assumes that indirect arguments have
the given alignment, but we don't express this information to the
backend except where it's semantically required (i.e. on byvals).
This is likely a minor regression for those targets that did provide
this information, but it'll be trivial to add it back in a later
patch.
I partially punted on applying this work to CGBuiltin. Please
do not add more uses of the CreateDefaultAligned{Load,Store}
APIs; they will be going away eventually.
llvm-svn: 246985
2015-09-08 16:05:57 +08:00
|
|
|
return Builder.CreateDefaultAlignedStore(Ops[1], Ops[0]);
|
2008-04-14 12:49:57 +08:00
|
|
|
}
|
2015-02-17 14:37:58 +08:00
|
|
|
case X86::BI__builtin_ia32_palignr128:
|
2011-12-19 15:03:25 +08:00
|
|
|
case X86::BI__builtin_ia32_palignr256: {
|
2015-02-17 14:37:58 +08:00
|
|
|
unsigned ShiftVal = cast<llvm::ConstantInt>(Ops[2])->getZExtValue();
|
2011-12-19 15:03:25 +08:00
|
|
|
|
2015-02-17 14:37:58 +08:00
|
|
|
unsigned NumElts =
|
|
|
|
cast<llvm::VectorType>(Ops[0]->getType())->getNumElements();
|
|
|
|
assert(NumElts % 16 == 0);
|
|
|
|
unsigned NumLanes = NumElts / 16;
|
|
|
|
unsigned NumLaneElts = NumElts / NumLanes;
|
|
|
|
|
|
|
|
// If palignr is shifting the pair of vectors more than the size of two
|
|
|
|
// lanes, emit zero.
|
|
|
|
if (ShiftVal >= (2 * NumLaneElts))
|
|
|
|
return llvm::Constant::getNullValue(ConvertType(E->getType()));
|
|
|
|
|
|
|
|
// If palignr is shifting the pair of input vectors more than one lane,
|
2015-02-17 15:18:01 +08:00
|
|
|
// but less than two lanes, convert to shifting in zeroes.
|
|
|
|
if (ShiftVal > NumLaneElts) {
|
|
|
|
ShiftVal -= NumLaneElts;
|
2015-07-20 23:31:17 +08:00
|
|
|
Ops[1] = Ops[0];
|
2015-02-17 15:18:01 +08:00
|
|
|
Ops[0] = llvm::Constant::getNullValue(Ops[0]->getType());
|
|
|
|
}
|
2011-12-19 15:03:25 +08:00
|
|
|
|
2015-07-28 23:40:11 +08:00
|
|
|
uint32_t Indices[32];
|
2015-02-17 15:18:01 +08:00
|
|
|
// 256-bit palignr operates on 128-bit lanes so we need to handle that
|
|
|
|
for (unsigned l = 0; l != NumElts; l += NumLaneElts) {
|
|
|
|
for (unsigned i = 0; i != NumLaneElts; ++i) {
|
|
|
|
unsigned Idx = ShiftVal + i;
|
|
|
|
if (Idx >= NumLaneElts)
|
|
|
|
Idx += NumElts - NumLaneElts; // End of lane, switch operand.
|
2015-07-28 23:40:11 +08:00
|
|
|
Indices[l + i] = Idx + l;
|
2015-02-17 15:18:01 +08:00
|
|
|
}
|
2011-12-19 15:03:25 +08:00
|
|
|
}
|
|
|
|
|
2015-07-28 23:40:11 +08:00
|
|
|
Value *SV = llvm::ConstantDataVector::get(getLLVMContext(),
|
|
|
|
makeArrayRef(Indices, NumElts));
|
2015-02-17 15:18:01 +08:00
|
|
|
return Builder.CreateShuffleVector(Ops[1], Ops[0], SV, "palignr");
|
2011-12-19 15:03:25 +08:00
|
|
|
}
|
2015-02-16 08:42:49 +08:00
|
|
|
case X86::BI__builtin_ia32_pslldqi256: {
|
|
|
|
// Shift value is in bits so divide by 8.
|
|
|
|
unsigned shiftVal = cast<llvm::ConstantInt>(Ops[1])->getZExtValue() >> 3;
|
|
|
|
|
|
|
|
// If pslldq is shifting the vector more than 15 bytes, emit zero.
|
|
|
|
if (shiftVal >= 16)
|
|
|
|
return llvm::Constant::getNullValue(ConvertType(E->getType()));
|
|
|
|
|
2015-07-28 23:40:11 +08:00
|
|
|
uint32_t Indices[32];
|
2015-02-16 08:42:49 +08:00
|
|
|
// 256-bit pslldq operates on 128-bit lanes so we need to handle that
|
|
|
|
for (unsigned l = 0; l != 32; l += 16) {
|
|
|
|
for (unsigned i = 0; i != 16; ++i) {
|
|
|
|
unsigned Idx = 32 + i - shiftVal;
|
|
|
|
if (Idx < 32) Idx -= 16; // end of lane, switch operand.
|
2015-07-28 23:40:11 +08:00
|
|
|
Indices[l + i] = Idx + l;
|
2015-02-16 08:42:49 +08:00
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
llvm::Type *VecTy = llvm::VectorType::get(Int8Ty, 32);
|
|
|
|
Ops[0] = Builder.CreateBitCast(Ops[0], VecTy, "cast");
|
|
|
|
Value *Zero = llvm::Constant::getNullValue(VecTy);
|
|
|
|
|
2015-07-28 23:40:11 +08:00
|
|
|
Value *SV = llvm::ConstantDataVector::get(getLLVMContext(), Indices);
|
2015-02-16 08:42:49 +08:00
|
|
|
SV = Builder.CreateShuffleVector(Zero, Ops[0], SV, "pslldq");
|
|
|
|
llvm::Type *ResultType = ConvertType(E->getType());
|
|
|
|
return Builder.CreateBitCast(SV, ResultType, "cast");
|
|
|
|
}
|
|
|
|
case X86::BI__builtin_ia32_psrldqi256: {
|
|
|
|
// Shift value is in bits so divide by 8.
|
|
|
|
unsigned shiftVal = cast<llvm::ConstantInt>(Ops[1])->getZExtValue() >> 3;
|
|
|
|
|
|
|
|
// If psrldq is shifting the vector more than 15 bytes, emit zero.
|
|
|
|
if (shiftVal >= 16)
|
|
|
|
return llvm::Constant::getNullValue(ConvertType(E->getType()));
|
|
|
|
|
2015-07-28 23:40:11 +08:00
|
|
|
uint32_t Indices[32];
|
2015-02-16 08:42:49 +08:00
|
|
|
// 256-bit psrldq operates on 128-bit lanes so we need to handle that
|
|
|
|
for (unsigned l = 0; l != 32; l += 16) {
|
|
|
|
for (unsigned i = 0; i != 16; ++i) {
|
|
|
|
unsigned Idx = i + shiftVal;
|
|
|
|
if (Idx >= 16) Idx += 16; // end of lane, switch operand.
|
2015-07-28 23:40:11 +08:00
|
|
|
Indices[l + i] = Idx + l;
|
2015-02-16 08:42:49 +08:00
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
llvm::Type *VecTy = llvm::VectorType::get(Int8Ty, 32);
|
|
|
|
Ops[0] = Builder.CreateBitCast(Ops[0], VecTy, "cast");
|
|
|
|
Value *Zero = llvm::Constant::getNullValue(VecTy);
|
|
|
|
|
2015-07-28 23:40:11 +08:00
|
|
|
Value *SV = llvm::ConstantDataVector::get(getLLVMContext(), Indices);
|
2015-02-16 08:42:49 +08:00
|
|
|
SV = Builder.CreateShuffleVector(Ops[0], Zero, SV, "psrldq");
|
|
|
|
llvm::Type *ResultType = ConvertType(E->getType());
|
|
|
|
return Builder.CreateBitCast(SV, ResultType, "cast");
|
|
|
|
}
|
2011-05-04 10:40:38 +08:00
|
|
|
case X86::BI__builtin_ia32_movntps:
|
2012-05-07 14:25:45 +08:00
|
|
|
case X86::BI__builtin_ia32_movntps256:
|
2011-05-04 10:40:38 +08:00
|
|
|
case X86::BI__builtin_ia32_movntpd:
|
2012-05-07 14:25:45 +08:00
|
|
|
case X86::BI__builtin_ia32_movntpd256:
|
2011-05-04 10:40:38 +08:00
|
|
|
case X86::BI__builtin_ia32_movntdq:
|
2012-05-07 14:25:45 +08:00
|
|
|
case X86::BI__builtin_ia32_movntdq256:
|
2013-09-24 07:38:39 +08:00
|
|
|
case X86::BI__builtin_ia32_movnti:
|
|
|
|
case X86::BI__builtin_ia32_movnti64: {
|
2014-12-10 02:39:32 +08:00
|
|
|
llvm::MDNode *Node = llvm::MDNode::get(
|
|
|
|
getLLVMContext(), llvm::ConstantAsMetadata::get(Builder.getInt32(1)));
|
2011-05-04 10:40:38 +08:00
|
|
|
|
|
|
|
// Convert the type of the pointer to a pointer to the stored type.
|
|
|
|
Value *BC = Builder.CreateBitCast(Ops[0],
|
|
|
|
llvm::PointerType::getUnqual(Ops[1]->getType()),
|
|
|
|
"cast");
|
Compute and preserve alignment more faithfully in IR-generation.
Introduce an Address type to bundle a pointer value with an
alignment. Introduce APIs on CGBuilderTy to work with Address
values. Change core APIs on CGF/CGM to traffic in Address where
appropriate. Require alignments to be non-zero. Update a ton
of code to compute and propagate alignment information.
As part of this, I've promoted CGBuiltin's EmitPointerWithAlignment
helper function to CGF and made use of it in a number of places in
the expression emitter.
The end result is that we should now be significantly more correct
when performing operations on objects that are locally known to
be under-aligned. Since alignment is not reliably tracked in the
type system, there are inherent limits to this, but at least we
are no longer confused by standard operations like derived-to-base
conversions and array-to-pointer decay. I've also fixed a large
number of bugs where we were applying the complete-object alignment
to a pointer instead of the non-virtual alignment, although most of
these were hidden by the very conservative approach we took with
member alignment.
Also, because IRGen now reliably asserts on zero alignments, we
should no longer be subject to an absurd but frustrating recurring
bug where an incomplete type would report a zero alignment and then
we'd naively do a alignmentAtOffset on it and emit code using an
alignment equal to the largest power-of-two factor of the offset.
We should also now be emitting much more aggressive alignment
attributes in the presence of over-alignment. In particular,
field access now uses alignmentAtOffset instead of min.
Several times in this patch, I had to change the existing
code-generation pattern in order to more effectively use
the Address APIs. For the most part, this seems to be a strict
improvement, like doing pointer arithmetic with GEPs instead of
ptrtoint. That said, I've tried very hard to not change semantics,
but it is likely that I've failed in a few places, for which I
apologize.
ABIArgInfo now always carries the assumed alignment of indirect and
indirect byval arguments. In order to cut down on what was already
a dauntingly large patch, I changed the code to never set align
attributes in the IR on non-byval indirect arguments. That is,
we still generate code which assumes that indirect arguments have
the given alignment, but we don't express this information to the
backend except where it's semantically required (i.e. on byvals).
This is likely a minor regression for those targets that did provide
this information, but it'll be trivial to add it back in a later
patch.
I partially punted on applying this work to CGBuiltin. Please
do not add more uses of the CreateDefaultAligned{Load,Store}
APIs; they will be going away eventually.
llvm-svn: 246985
2015-09-08 16:05:57 +08:00
|
|
|
StoreInst *SI = Builder.CreateDefaultAlignedStore(Ops[1], BC);
|
2011-05-04 10:40:38 +08:00
|
|
|
SI->setMetadata(CGM.getModule().getMDKindID("nontemporal"), Node);
|
2013-09-24 07:38:39 +08:00
|
|
|
|
|
|
|
// If the operand is an integer, we can't assume alignment. Otherwise,
|
|
|
|
// assume natural alignment.
|
|
|
|
QualType ArgTy = E->getArg(1)->getType();
|
|
|
|
unsigned Align;
|
|
|
|
if (ArgTy->isIntegerType())
|
|
|
|
Align = 1;
|
|
|
|
else
|
|
|
|
Align = getContext().getTypeSizeInChars(ArgTy).getQuantity();
|
|
|
|
SI->setAlignment(Align);
|
2011-05-04 10:40:38 +08:00
|
|
|
return SI;
|
|
|
|
}
|
2011-04-15 23:07:13 +08:00
|
|
|
// 3DNow!
|
|
|
|
case X86::BI__builtin_ia32_pswapdsf:
|
|
|
|
case X86::BI__builtin_ia32_pswapdsi: {
|
2012-02-20 15:35:45 +08:00
|
|
|
llvm::Type *MMXTy = llvm::Type::getX86_MMXTy(getLLVMContext());
|
|
|
|
Ops[0] = Builder.CreateBitCast(Ops[0], MMXTy, "cast");
|
2015-02-17 05:30:08 +08:00
|
|
|
llvm::Function *F = CGM.getIntrinsic(Intrinsic::x86_3dnowa_pswapd);
|
|
|
|
return Builder.CreateCall(F, Ops, "pswapd");
|
2011-04-15 23:07:13 +08:00
|
|
|
}
|
2012-07-12 17:33:03 +08:00
|
|
|
case X86::BI__builtin_ia32_rdrand16_step:
|
|
|
|
case X86::BI__builtin_ia32_rdrand32_step:
|
2013-03-29 13:17:55 +08:00
|
|
|
case X86::BI__builtin_ia32_rdrand64_step:
|
|
|
|
case X86::BI__builtin_ia32_rdseed16_step:
|
|
|
|
case X86::BI__builtin_ia32_rdseed32_step:
|
|
|
|
case X86::BI__builtin_ia32_rdseed64_step: {
|
2012-07-12 17:33:03 +08:00
|
|
|
Intrinsic::ID ID;
|
|
|
|
switch (BuiltinID) {
|
|
|
|
default: llvm_unreachable("Unsupported intrinsic!");
|
|
|
|
case X86::BI__builtin_ia32_rdrand16_step:
|
|
|
|
ID = Intrinsic::x86_rdrand_16;
|
|
|
|
break;
|
|
|
|
case X86::BI__builtin_ia32_rdrand32_step:
|
|
|
|
ID = Intrinsic::x86_rdrand_32;
|
|
|
|
break;
|
|
|
|
case X86::BI__builtin_ia32_rdrand64_step:
|
|
|
|
ID = Intrinsic::x86_rdrand_64;
|
|
|
|
break;
|
2013-03-29 13:17:55 +08:00
|
|
|
case X86::BI__builtin_ia32_rdseed16_step:
|
|
|
|
ID = Intrinsic::x86_rdseed_16;
|
|
|
|
break;
|
|
|
|
case X86::BI__builtin_ia32_rdseed32_step:
|
|
|
|
ID = Intrinsic::x86_rdseed_32;
|
|
|
|
break;
|
|
|
|
case X86::BI__builtin_ia32_rdseed64_step:
|
|
|
|
ID = Intrinsic::x86_rdseed_64;
|
|
|
|
break;
|
2012-07-12 17:33:03 +08:00
|
|
|
}
|
|
|
|
|
2015-07-15 01:27:39 +08:00
|
|
|
Value *Call = Builder.CreateCall(CGM.getIntrinsic(ID));
|
Compute and preserve alignment more faithfully in IR-generation.
Introduce an Address type to bundle a pointer value with an
alignment. Introduce APIs on CGBuilderTy to work with Address
values. Change core APIs on CGF/CGM to traffic in Address where
appropriate. Require alignments to be non-zero. Update a ton
of code to compute and propagate alignment information.
As part of this, I've promoted CGBuiltin's EmitPointerWithAlignment
helper function to CGF and made use of it in a number of places in
the expression emitter.
The end result is that we should now be significantly more correct
when performing operations on objects that are locally known to
be under-aligned. Since alignment is not reliably tracked in the
type system, there are inherent limits to this, but at least we
are no longer confused by standard operations like derived-to-base
conversions and array-to-pointer decay. I've also fixed a large
number of bugs where we were applying the complete-object alignment
to a pointer instead of the non-virtual alignment, although most of
these were hidden by the very conservative approach we took with
member alignment.
Also, because IRGen now reliably asserts on zero alignments, we
should no longer be subject to an absurd but frustrating recurring
bug where an incomplete type would report a zero alignment and then
we'd naively do a alignmentAtOffset on it and emit code using an
alignment equal to the largest power-of-two factor of the offset.
We should also now be emitting much more aggressive alignment
attributes in the presence of over-alignment. In particular,
field access now uses alignmentAtOffset instead of min.
Several times in this patch, I had to change the existing
code-generation pattern in order to more effectively use
the Address APIs. For the most part, this seems to be a strict
improvement, like doing pointer arithmetic with GEPs instead of
ptrtoint. That said, I've tried very hard to not change semantics,
but it is likely that I've failed in a few places, for which I
apologize.
ABIArgInfo now always carries the assumed alignment of indirect and
indirect byval arguments. In order to cut down on what was already
a dauntingly large patch, I changed the code to never set align
attributes in the IR on non-byval indirect arguments. That is,
we still generate code which assumes that indirect arguments have
the given alignment, but we don't express this information to the
backend except where it's semantically required (i.e. on byvals).
This is likely a minor regression for those targets that did provide
this information, but it'll be trivial to add it back in a later
patch.
I partially punted on applying this work to CGBuiltin. Please
do not add more uses of the CreateDefaultAligned{Load,Store}
APIs; they will be going away eventually.
llvm-svn: 246985
2015-09-08 16:05:57 +08:00
|
|
|
Builder.CreateDefaultAlignedStore(Builder.CreateExtractValue(Call, 0),
|
|
|
|
Ops[0]);
|
2012-07-12 17:33:03 +08:00
|
|
|
return Builder.CreateExtractValue(Call, 1);
|
|
|
|
}
|
2014-12-27 14:59:57 +08:00
|
|
|
// SSE comparison intrisics
|
|
|
|
case X86::BI__builtin_ia32_cmpeqps:
|
|
|
|
case X86::BI__builtin_ia32_cmpltps:
|
|
|
|
case X86::BI__builtin_ia32_cmpleps:
|
|
|
|
case X86::BI__builtin_ia32_cmpunordps:
|
|
|
|
case X86::BI__builtin_ia32_cmpneqps:
|
|
|
|
case X86::BI__builtin_ia32_cmpnltps:
|
|
|
|
case X86::BI__builtin_ia32_cmpnleps:
|
|
|
|
case X86::BI__builtin_ia32_cmpordps:
|
|
|
|
case X86::BI__builtin_ia32_cmpeqss:
|
|
|
|
case X86::BI__builtin_ia32_cmpltss:
|
|
|
|
case X86::BI__builtin_ia32_cmpless:
|
|
|
|
case X86::BI__builtin_ia32_cmpunordss:
|
|
|
|
case X86::BI__builtin_ia32_cmpneqss:
|
|
|
|
case X86::BI__builtin_ia32_cmpnltss:
|
|
|
|
case X86::BI__builtin_ia32_cmpnless:
|
|
|
|
case X86::BI__builtin_ia32_cmpordss:
|
|
|
|
case X86::BI__builtin_ia32_cmpeqpd:
|
|
|
|
case X86::BI__builtin_ia32_cmpltpd:
|
|
|
|
case X86::BI__builtin_ia32_cmplepd:
|
|
|
|
case X86::BI__builtin_ia32_cmpunordpd:
|
|
|
|
case X86::BI__builtin_ia32_cmpneqpd:
|
|
|
|
case X86::BI__builtin_ia32_cmpnltpd:
|
|
|
|
case X86::BI__builtin_ia32_cmpnlepd:
|
|
|
|
case X86::BI__builtin_ia32_cmpordpd:
|
|
|
|
case X86::BI__builtin_ia32_cmpeqsd:
|
|
|
|
case X86::BI__builtin_ia32_cmpltsd:
|
|
|
|
case X86::BI__builtin_ia32_cmplesd:
|
|
|
|
case X86::BI__builtin_ia32_cmpunordsd:
|
|
|
|
case X86::BI__builtin_ia32_cmpneqsd:
|
|
|
|
case X86::BI__builtin_ia32_cmpnltsd:
|
|
|
|
case X86::BI__builtin_ia32_cmpnlesd:
|
|
|
|
case X86::BI__builtin_ia32_cmpordsd:
|
|
|
|
// These exist so that the builtin that takes an immediate can be bounds
|
|
|
|
// checked by clang to avoid passing bad immediates to the backend. Since
|
|
|
|
// AVX has a larger immediate than SSE we would need separate builtins to
|
|
|
|
// do the different bounds checking. Rather than create a clang specific
|
|
|
|
// SSE only builtin, this implements eight separate builtins to match gcc
|
|
|
|
// implementation.
|
|
|
|
|
|
|
|
// Choose the immediate.
|
|
|
|
unsigned Imm;
|
|
|
|
switch (BuiltinID) {
|
|
|
|
default: llvm_unreachable("Unsupported intrinsic!");
|
|
|
|
case X86::BI__builtin_ia32_cmpeqps:
|
|
|
|
case X86::BI__builtin_ia32_cmpeqss:
|
|
|
|
case X86::BI__builtin_ia32_cmpeqpd:
|
|
|
|
case X86::BI__builtin_ia32_cmpeqsd:
|
|
|
|
Imm = 0;
|
|
|
|
break;
|
|
|
|
case X86::BI__builtin_ia32_cmpltps:
|
|
|
|
case X86::BI__builtin_ia32_cmpltss:
|
|
|
|
case X86::BI__builtin_ia32_cmpltpd:
|
|
|
|
case X86::BI__builtin_ia32_cmpltsd:
|
|
|
|
Imm = 1;
|
|
|
|
break;
|
|
|
|
case X86::BI__builtin_ia32_cmpleps:
|
|
|
|
case X86::BI__builtin_ia32_cmpless:
|
|
|
|
case X86::BI__builtin_ia32_cmplepd:
|
|
|
|
case X86::BI__builtin_ia32_cmplesd:
|
|
|
|
Imm = 2;
|
|
|
|
break;
|
|
|
|
case X86::BI__builtin_ia32_cmpunordps:
|
|
|
|
case X86::BI__builtin_ia32_cmpunordss:
|
|
|
|
case X86::BI__builtin_ia32_cmpunordpd:
|
|
|
|
case X86::BI__builtin_ia32_cmpunordsd:
|
|
|
|
Imm = 3;
|
|
|
|
break;
|
|
|
|
case X86::BI__builtin_ia32_cmpneqps:
|
|
|
|
case X86::BI__builtin_ia32_cmpneqss:
|
|
|
|
case X86::BI__builtin_ia32_cmpneqpd:
|
|
|
|
case X86::BI__builtin_ia32_cmpneqsd:
|
|
|
|
Imm = 4;
|
|
|
|
break;
|
|
|
|
case X86::BI__builtin_ia32_cmpnltps:
|
|
|
|
case X86::BI__builtin_ia32_cmpnltss:
|
|
|
|
case X86::BI__builtin_ia32_cmpnltpd:
|
|
|
|
case X86::BI__builtin_ia32_cmpnltsd:
|
|
|
|
Imm = 5;
|
|
|
|
break;
|
|
|
|
case X86::BI__builtin_ia32_cmpnleps:
|
|
|
|
case X86::BI__builtin_ia32_cmpnless:
|
|
|
|
case X86::BI__builtin_ia32_cmpnlepd:
|
|
|
|
case X86::BI__builtin_ia32_cmpnlesd:
|
|
|
|
Imm = 6;
|
|
|
|
break;
|
|
|
|
case X86::BI__builtin_ia32_cmpordps:
|
|
|
|
case X86::BI__builtin_ia32_cmpordss:
|
|
|
|
case X86::BI__builtin_ia32_cmpordpd:
|
|
|
|
case X86::BI__builtin_ia32_cmpordsd:
|
|
|
|
Imm = 7;
|
|
|
|
break;
|
|
|
|
}
|
|
|
|
|
|
|
|
// Choose the intrinsic ID.
|
|
|
|
const char *name;
|
|
|
|
Intrinsic::ID ID;
|
|
|
|
switch (BuiltinID) {
|
|
|
|
default: llvm_unreachable("Unsupported intrinsic!");
|
|
|
|
case X86::BI__builtin_ia32_cmpeqps:
|
|
|
|
case X86::BI__builtin_ia32_cmpltps:
|
|
|
|
case X86::BI__builtin_ia32_cmpleps:
|
|
|
|
case X86::BI__builtin_ia32_cmpunordps:
|
|
|
|
case X86::BI__builtin_ia32_cmpneqps:
|
|
|
|
case X86::BI__builtin_ia32_cmpnltps:
|
|
|
|
case X86::BI__builtin_ia32_cmpnleps:
|
|
|
|
case X86::BI__builtin_ia32_cmpordps:
|
|
|
|
name = "cmpps";
|
|
|
|
ID = Intrinsic::x86_sse_cmp_ps;
|
|
|
|
break;
|
|
|
|
case X86::BI__builtin_ia32_cmpeqss:
|
|
|
|
case X86::BI__builtin_ia32_cmpltss:
|
|
|
|
case X86::BI__builtin_ia32_cmpless:
|
|
|
|
case X86::BI__builtin_ia32_cmpunordss:
|
|
|
|
case X86::BI__builtin_ia32_cmpneqss:
|
|
|
|
case X86::BI__builtin_ia32_cmpnltss:
|
|
|
|
case X86::BI__builtin_ia32_cmpnless:
|
|
|
|
case X86::BI__builtin_ia32_cmpordss:
|
|
|
|
name = "cmpss";
|
|
|
|
ID = Intrinsic::x86_sse_cmp_ss;
|
|
|
|
break;
|
|
|
|
case X86::BI__builtin_ia32_cmpeqpd:
|
|
|
|
case X86::BI__builtin_ia32_cmpltpd:
|
|
|
|
case X86::BI__builtin_ia32_cmplepd:
|
|
|
|
case X86::BI__builtin_ia32_cmpunordpd:
|
|
|
|
case X86::BI__builtin_ia32_cmpneqpd:
|
|
|
|
case X86::BI__builtin_ia32_cmpnltpd:
|
|
|
|
case X86::BI__builtin_ia32_cmpnlepd:
|
|
|
|
case X86::BI__builtin_ia32_cmpordpd:
|
|
|
|
name = "cmppd";
|
|
|
|
ID = Intrinsic::x86_sse2_cmp_pd;
|
|
|
|
break;
|
|
|
|
case X86::BI__builtin_ia32_cmpeqsd:
|
|
|
|
case X86::BI__builtin_ia32_cmpltsd:
|
|
|
|
case X86::BI__builtin_ia32_cmplesd:
|
|
|
|
case X86::BI__builtin_ia32_cmpunordsd:
|
|
|
|
case X86::BI__builtin_ia32_cmpneqsd:
|
|
|
|
case X86::BI__builtin_ia32_cmpnltsd:
|
|
|
|
case X86::BI__builtin_ia32_cmpnlesd:
|
|
|
|
case X86::BI__builtin_ia32_cmpordsd:
|
|
|
|
name = "cmpsd";
|
|
|
|
ID = Intrinsic::x86_sse2_cmp_sd;
|
|
|
|
break;
|
|
|
|
}
|
|
|
|
|
|
|
|
Ops.push_back(llvm::ConstantInt::get(Int8Ty, Imm));
|
|
|
|
llvm::Function *F = CGM.getIntrinsic(ID);
|
|
|
|
return Builder.CreateCall(F, Ops, name);
|
2007-12-10 07:17:02 +08:00
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2011-12-13 05:14:55 +08:00
|
|
|
|
2009-09-09 23:08:12 +08:00
|
|
|
Value *CodeGenFunction::EmitPPCBuiltinExpr(unsigned BuiltinID,
|
2007-12-13 15:34:23 +08:00
|
|
|
const CallExpr *E) {
|
2011-07-23 18:55:15 +08:00
|
|
|
SmallVector<Value*, 4> Ops;
|
2010-04-14 11:54:58 +08:00
|
|
|
|
|
|
|
for (unsigned i = 0, e = E->getNumArgs(); i != e; i++)
|
|
|
|
Ops.push_back(EmitScalarExpr(E->getArg(i)));
|
|
|
|
|
|
|
|
Intrinsic::ID ID = Intrinsic::not_intrinsic;
|
|
|
|
|
|
|
|
switch (BuiltinID) {
|
2014-05-21 13:09:00 +08:00
|
|
|
default: return nullptr;
|
2010-04-14 11:54:58 +08:00
|
|
|
|
2015-09-01 07:55:19 +08:00
|
|
|
// __builtin_ppc_get_timebase is GCC 4.8+'s PowerPC-specific name for what we
|
|
|
|
// call __builtin_readcyclecounter.
|
|
|
|
case PPC::BI__builtin_ppc_get_timebase:
|
|
|
|
return Builder.CreateCall(CGM.getIntrinsic(Intrinsic::readcyclecounter));
|
|
|
|
|
2010-06-19 17:47:18 +08:00
|
|
|
// vec_ld, vec_lvsl, vec_lvsr
|
|
|
|
case PPC::BI__builtin_altivec_lvx:
|
|
|
|
case PPC::BI__builtin_altivec_lvxl:
|
|
|
|
case PPC::BI__builtin_altivec_lvebx:
|
|
|
|
case PPC::BI__builtin_altivec_lvehx:
|
|
|
|
case PPC::BI__builtin_altivec_lvewx:
|
|
|
|
case PPC::BI__builtin_altivec_lvsl:
|
|
|
|
case PPC::BI__builtin_altivec_lvsr:
|
2014-11-12 12:19:56 +08:00
|
|
|
case PPC::BI__builtin_vsx_lxvd2x:
|
|
|
|
case PPC::BI__builtin_vsx_lxvw4x:
|
2010-06-19 17:47:18 +08:00
|
|
|
{
|
2011-02-08 16:22:06 +08:00
|
|
|
Ops[1] = Builder.CreateBitCast(Ops[1], Int8PtrTy);
|
2010-06-19 17:47:18 +08:00
|
|
|
|
2011-09-28 05:06:10 +08:00
|
|
|
Ops[0] = Builder.CreateGEP(Ops[1], Ops[0]);
|
2010-06-19 17:47:18 +08:00
|
|
|
Ops.pop_back();
|
|
|
|
|
|
|
|
switch (BuiltinID) {
|
2011-09-23 13:06:16 +08:00
|
|
|
default: llvm_unreachable("Unsupported ld/lvsl/lvsr intrinsic!");
|
2010-06-19 17:47:18 +08:00
|
|
|
case PPC::BI__builtin_altivec_lvx:
|
|
|
|
ID = Intrinsic::ppc_altivec_lvx;
|
|
|
|
break;
|
|
|
|
case PPC::BI__builtin_altivec_lvxl:
|
|
|
|
ID = Intrinsic::ppc_altivec_lvxl;
|
|
|
|
break;
|
|
|
|
case PPC::BI__builtin_altivec_lvebx:
|
|
|
|
ID = Intrinsic::ppc_altivec_lvebx;
|
|
|
|
break;
|
|
|
|
case PPC::BI__builtin_altivec_lvehx:
|
|
|
|
ID = Intrinsic::ppc_altivec_lvehx;
|
|
|
|
break;
|
|
|
|
case PPC::BI__builtin_altivec_lvewx:
|
|
|
|
ID = Intrinsic::ppc_altivec_lvewx;
|
|
|
|
break;
|
|
|
|
case PPC::BI__builtin_altivec_lvsl:
|
|
|
|
ID = Intrinsic::ppc_altivec_lvsl;
|
|
|
|
break;
|
|
|
|
case PPC::BI__builtin_altivec_lvsr:
|
|
|
|
ID = Intrinsic::ppc_altivec_lvsr;
|
|
|
|
break;
|
2014-11-12 12:19:56 +08:00
|
|
|
case PPC::BI__builtin_vsx_lxvd2x:
|
|
|
|
ID = Intrinsic::ppc_vsx_lxvd2x;
|
|
|
|
break;
|
|
|
|
case PPC::BI__builtin_vsx_lxvw4x:
|
|
|
|
ID = Intrinsic::ppc_vsx_lxvw4x;
|
|
|
|
break;
|
2010-06-19 17:47:18 +08:00
|
|
|
}
|
|
|
|
llvm::Function *F = CGM.getIntrinsic(ID);
|
2011-07-15 16:37:34 +08:00
|
|
|
return Builder.CreateCall(F, Ops, "");
|
2010-06-19 17:47:18 +08:00
|
|
|
}
|
|
|
|
|
2010-04-14 11:54:58 +08:00
|
|
|
// vec_st
|
|
|
|
case PPC::BI__builtin_altivec_stvx:
|
|
|
|
case PPC::BI__builtin_altivec_stvxl:
|
|
|
|
case PPC::BI__builtin_altivec_stvebx:
|
|
|
|
case PPC::BI__builtin_altivec_stvehx:
|
|
|
|
case PPC::BI__builtin_altivec_stvewx:
|
2014-11-12 12:19:56 +08:00
|
|
|
case PPC::BI__builtin_vsx_stxvd2x:
|
|
|
|
case PPC::BI__builtin_vsx_stxvw4x:
|
2010-04-14 11:54:58 +08:00
|
|
|
{
|
2011-02-08 16:22:06 +08:00
|
|
|
Ops[2] = Builder.CreateBitCast(Ops[2], Int8PtrTy);
|
2011-09-28 05:06:10 +08:00
|
|
|
Ops[1] = Builder.CreateGEP(Ops[2], Ops[1]);
|
2010-04-14 11:54:58 +08:00
|
|
|
Ops.pop_back();
|
|
|
|
|
|
|
|
switch (BuiltinID) {
|
2011-09-23 13:06:16 +08:00
|
|
|
default: llvm_unreachable("Unsupported st intrinsic!");
|
2010-04-14 11:54:58 +08:00
|
|
|
case PPC::BI__builtin_altivec_stvx:
|
|
|
|
ID = Intrinsic::ppc_altivec_stvx;
|
|
|
|
break;
|
|
|
|
case PPC::BI__builtin_altivec_stvxl:
|
|
|
|
ID = Intrinsic::ppc_altivec_stvxl;
|
|
|
|
break;
|
|
|
|
case PPC::BI__builtin_altivec_stvebx:
|
|
|
|
ID = Intrinsic::ppc_altivec_stvebx;
|
|
|
|
break;
|
|
|
|
case PPC::BI__builtin_altivec_stvehx:
|
|
|
|
ID = Intrinsic::ppc_altivec_stvehx;
|
|
|
|
break;
|
|
|
|
case PPC::BI__builtin_altivec_stvewx:
|
|
|
|
ID = Intrinsic::ppc_altivec_stvewx;
|
|
|
|
break;
|
2014-11-12 12:19:56 +08:00
|
|
|
case PPC::BI__builtin_vsx_stxvd2x:
|
|
|
|
ID = Intrinsic::ppc_vsx_stxvd2x;
|
|
|
|
break;
|
|
|
|
case PPC::BI__builtin_vsx_stxvw4x:
|
|
|
|
ID = Intrinsic::ppc_vsx_stxvw4x;
|
|
|
|
break;
|
2010-04-14 11:54:58 +08:00
|
|
|
}
|
|
|
|
llvm::Function *F = CGM.getIntrinsic(ID);
|
2011-07-15 16:37:34 +08:00
|
|
|
return Builder.CreateCall(F, Ops, "");
|
2010-04-14 11:54:58 +08:00
|
|
|
}
|
Add missing builtins to altivec.h for ABI compliance (vol. 2)
This patch corresponds to review:
http://reviews.llvm.org/D10875
The bulk of the second round of additions to altivec.h.
The following interfaces were added:
vector double vec_floor(vector double)
vector double vec_madd(vector double, vector double, vector double)
vector float vec_msub(vector float, vector float, vector float)
vector double vec_msub(vector double, vector double, vector double)
vector float vec_mul(vector float, vector float)
vector double vec_mul(vector double, vector double)
vector float vec_nmadd(vector float, vector float, vector float)
vector double vec_nmadd(vector double, vector double, vector double)
vector double vec_nmsub(vector double, vector double, vector double)
vector double vec_nor(vector double, vector double)
vector double vec_or(vector double, vector double)
vector float vec_rint(vector float)
vector double vec_rint(vector double)
vector float vec_nearbyint(vector float)
vector double vec_nearbyint(vector double)
vector float vec_sqrt(vector float)
vector double vec_sqrt(vector double)
vector double vec_rsqrte(vector double)
vector double vec_sel(vector double, vector double, vector unsigned long long)
vector double vec_sel(vector double, vector double, vector unsigned long long)
vector double vec_sub(vector double, vector double)
vector double vec_trunc(vector double)
vector double vec_xor(vector double, vector double)
vector double vec_xor(vector double, vector bool long long)
vector double vec_xor(vector bool long long, vector double)
New VSX paths for the following interfaces:
vector float vec_madd(vector float, vector float, vector float)
vector float vec_nmsub(vector float, vector float, vector float)
vector float vec_rsqrte(vector float)
vector float vec_trunc(vector float)
vector float vec_floor(vector float)
llvm-svn: 241399
2015-07-05 14:40:52 +08:00
|
|
|
// Square root
|
|
|
|
case PPC::BI__builtin_vsx_xvsqrtsp:
|
|
|
|
case PPC::BI__builtin_vsx_xvsqrtdp: {
|
|
|
|
llvm::Type *ResultType = ConvertType(E->getType());
|
|
|
|
Value *X = EmitScalarExpr(E->getArg(0));
|
|
|
|
ID = Intrinsic::sqrt;
|
|
|
|
llvm::Function *F = CGM.getIntrinsic(ID, ResultType);
|
|
|
|
return Builder.CreateCall(F, X);
|
|
|
|
}
|
Add missing builtins to altivec.h for ABI compliance (vol. 4)
This patch corresponds to review:
http://reviews.llvm.org/D11184
A number of new interfaces for altivec.h (as mandated by the ABI):
vector float vec_cpsgn(vector float, vector float)
vector double vec_cpsgn(vector double, vector double)
vector double vec_or(vector bool long long, vector double)
vector double vec_or(vector double, vector bool long long)
vector double vec_re(vector double)
vector signed char vec_cntlz(vector signed char)
vector unsigned char vec_cntlz(vector unsigned char)
vector short vec_cntlz(vector short)
vector unsigned short vec_cntlz(vector unsigned short)
vector int vec_cntlz(vector int)
vector unsigned int vec_cntlz(vector unsigned int)
vector signed long long vec_cntlz(vector signed long long)
vector unsigned long long vec_cntlz(vector unsigned long long)
vector signed char vec_nand(vector bool signed char, vector signed char)
vector signed char vec_nand(vector signed char, vector bool signed char)
vector signed char vec_nand(vector signed char, vector signed char)
vector unsigned char vec_nand(vector bool unsigned char, vector unsigned char)
vector unsigned char vec_nand(vector unsigned char, vector bool unsigned char)
vector unsigned char vec_nand(vector unsigned char, vector unsigned char)
vector short vec_nand(vector bool short, vector short)
vector short vec_nand(vector short, vector bool short)
vector short vec_nand(vector short, vector short)
vector unsigned short vec_nand(vector bool unsigned short, vector unsigned short)
vector unsigned short vec_nand(vector unsigned short, vector bool unsigned short)
vector unsigned short vec_nand(vector unsigned short, vector unsigned short)
vector int vec_nand(vector bool int, vector int)
vector int vec_nand(vector int, vector bool int)
vector int vec_nand(vector int, vector int)
vector unsigned int vec_nand(vector bool unsigned int, vector unsigned int)
vector unsigned int vec_nand(vector unsigned int, vector bool unsigned int)
vector unsigned int vec_nand(vector unsigned int, vector unsigned int)
vector signed long long vec_nand(vector bool long long, vector signed long long)
vector signed long long vec_nand(vector signed long long, vector bool long long)
vector signed long long vec_nand(vector signed long long, vector signed long long)
vector unsigned long long vec_nand(vector bool long long, vector unsigned long long)
vector unsigned long long vec_nand(vector unsigned long long, vector bool long long)
vector unsigned long long vec_nand(vector unsigned long long, vector unsigned long long)
vector signed char vec_orc(vector bool signed char, vector signed char)
vector signed char vec_orc(vector signed char, vector bool signed char)
vector signed char vec_orc(vector signed char, vector signed char)
vector unsigned char vec_orc(vector bool unsigned char, vector unsigned char)
vector unsigned char vec_orc(vector unsigned char, vector bool unsigned char)
vector unsigned char vec_orc(vector unsigned char, vector unsigned char)
vector short vec_orc(vector bool short, vector short)
vector short vec_orc(vector short, vector bool short)
vector short vec_orc(vector short, vector short)
vector unsigned short vec_orc(vector bool unsigned short, vector unsigned short)
vector unsigned short vec_orc(vector unsigned short, vector bool unsigned short)
vector unsigned short vec_orc(vector unsigned short, vector unsigned short)
vector int vec_orc(vector bool int, vector int)
vector int vec_orc(vector int, vector bool int)
vector int vec_orc(vector int, vector int)
vector unsigned int vec_orc(vector bool unsigned int, vector unsigned int)
vector unsigned int vec_orc(vector unsigned int, vector bool unsigned int)
vector unsigned int vec_orc(vector unsigned int, vector unsigned int)
vector signed long long vec_orc(vector bool long long, vector signed long long)
vector signed long long vec_orc(vector signed long long, vector bool long long)
vector signed long long vec_orc(vector signed long long, vector signed long long)
vector unsigned long long vec_orc(vector bool long long, vector unsigned long long)
vector unsigned long long vec_orc(vector unsigned long long, vector bool long long)
vector unsigned long long vec_orc(vector unsigned long long, vector unsigned long long)
vector signed char vec_div(vector signed char, vector signed char)
vector unsigned char vec_div(vector unsigned char, vector unsigned char)
vector signed short vec_div(vector signed short, vector signed short)
vector unsigned short vec_div(vector unsigned short, vector unsigned short)
vector signed int vec_div(vector signed int, vector signed int)
vector unsigned int vec_div(vector unsigned int, vector unsigned int)
vector signed long long vec_div(vector signed long long, vector signed long long)
vector unsigned long long vec_div(vector unsigned long long, vector unsigned long long)
vector unsigned char vec_mul(vector unsigned char, vector unsigned char)
vector unsigned int vec_mul(vector unsigned int, vector unsigned int)
vector unsigned long long vec_mul(vector unsigned long long, vector unsigned long long)
vector unsigned short vec_mul(vector unsigned short, vector unsigned short)
vector signed char vec_mul(vector signed char, vector signed char)
vector signed int vec_mul(vector signed int, vector signed int)
vector signed long long vec_mul(vector signed long long, vector signed long long)
vector signed short vec_mul(vector signed short, vector signed short)
vector signed long long vec_mergeh(vector signed long long, vector signed long long)
vector signed long long vec_mergeh(vector signed long long, vector bool long long)
vector signed long long vec_mergeh(vector bool long long, vector signed long long)
vector unsigned long long vec_mergeh(vector unsigned long long, vector unsigned long long)
vector unsigned long long vec_mergeh(vector unsigned long long, vector bool long long)
vector unsigned long long vec_mergeh(vector bool long long, vector unsigned long long)
vector double vec_mergeh(vector double, vector double)
vector double vec_mergeh(vector double, vector bool long long)
vector double vec_mergeh(vector bool long long, vector double)
vector signed long long vec_mergel(vector signed long long, vector signed long long)
vector signed long long vec_mergel(vector signed long long, vector bool long long)
vector signed long long vec_mergel(vector bool long long, vector signed long long)
vector unsigned long long vec_mergel(vector unsigned long long, vector unsigned long long)
vector unsigned long long vec_mergel(vector unsigned long long, vector bool long long)
vector unsigned long long vec_mergel(vector bool long long, vector unsigned long long)
vector double vec_mergel(vector double, vector double)
vector double vec_mergel(vector double, vector bool long long)
vector double vec_mergel(vector bool long long, vector double)
vector signed int vec_pack(vector signed long long, vector signed long long)
vector unsigned int vec_pack(vector unsigned long long, vector unsigned long long)
vector bool int vec_pack(vector bool long long, vector bool long long)
llvm-svn: 242171
2015-07-15 01:50:27 +08:00
|
|
|
// Count leading zeros
|
|
|
|
case PPC::BI__builtin_altivec_vclzb:
|
|
|
|
case PPC::BI__builtin_altivec_vclzh:
|
|
|
|
case PPC::BI__builtin_altivec_vclzw:
|
|
|
|
case PPC::BI__builtin_altivec_vclzd: {
|
|
|
|
llvm::Type *ResultType = ConvertType(E->getType());
|
|
|
|
Value *X = EmitScalarExpr(E->getArg(0));
|
|
|
|
Value *Undef = ConstantInt::get(Builder.getInt1Ty(), false);
|
|
|
|
Function *F = CGM.getIntrinsic(Intrinsic::ctlz, ResultType);
|
|
|
|
return Builder.CreateCall(F, {X, Undef});
|
|
|
|
}
|
|
|
|
// Copy sign
|
|
|
|
case PPC::BI__builtin_vsx_xvcpsgnsp:
|
|
|
|
case PPC::BI__builtin_vsx_xvcpsgndp: {
|
|
|
|
llvm::Type *ResultType = ConvertType(E->getType());
|
|
|
|
Value *X = EmitScalarExpr(E->getArg(0));
|
|
|
|
Value *Y = EmitScalarExpr(E->getArg(1));
|
|
|
|
ID = Intrinsic::copysign;
|
|
|
|
llvm::Function *F = CGM.getIntrinsic(ID, ResultType);
|
|
|
|
return Builder.CreateCall(F, {X, Y});
|
|
|
|
}
|
Add missing builtins to altivec.h for ABI compliance (vol. 2)
This patch corresponds to review:
http://reviews.llvm.org/D10875
The bulk of the second round of additions to altivec.h.
The following interfaces were added:
vector double vec_floor(vector double)
vector double vec_madd(vector double, vector double, vector double)
vector float vec_msub(vector float, vector float, vector float)
vector double vec_msub(vector double, vector double, vector double)
vector float vec_mul(vector float, vector float)
vector double vec_mul(vector double, vector double)
vector float vec_nmadd(vector float, vector float, vector float)
vector double vec_nmadd(vector double, vector double, vector double)
vector double vec_nmsub(vector double, vector double, vector double)
vector double vec_nor(vector double, vector double)
vector double vec_or(vector double, vector double)
vector float vec_rint(vector float)
vector double vec_rint(vector double)
vector float vec_nearbyint(vector float)
vector double vec_nearbyint(vector double)
vector float vec_sqrt(vector float)
vector double vec_sqrt(vector double)
vector double vec_rsqrte(vector double)
vector double vec_sel(vector double, vector double, vector unsigned long long)
vector double vec_sel(vector double, vector double, vector unsigned long long)
vector double vec_sub(vector double, vector double)
vector double vec_trunc(vector double)
vector double vec_xor(vector double, vector double)
vector double vec_xor(vector double, vector bool long long)
vector double vec_xor(vector bool long long, vector double)
New VSX paths for the following interfaces:
vector float vec_madd(vector float, vector float, vector float)
vector float vec_nmsub(vector float, vector float, vector float)
vector float vec_rsqrte(vector float)
vector float vec_trunc(vector float)
vector float vec_floor(vector float)
llvm-svn: 241399
2015-07-05 14:40:52 +08:00
|
|
|
// Rounding/truncation
|
Add missing builtins to altivec.h for ABI compliance (vol. 1)
This patch corresponds to review:
http://reviews.llvm.org/D10637
This is the first round of additions of missing builtins listed in the ABI document. More to come (this builds onto what seurer already addes). This patch adds:
vector signed long long vec_abs(vector signed long long)
vector double vec_abs(vector double)
vector signed long long vec_add(vector signed long long, vector signed long long)
vector unsigned long long vec_add(vector unsigned long long, vector unsigned long long)
vector double vec_add(vector double, vector double)
vector double vec_and(vector bool long long, vector double)
vector double vec_and(vector double, vector bool long long)
vector double vec_and(vector double, vector double)
vector signed long long vec_and(vector signed long long, vector signed long long)
vector double vec_andc(vector bool long long, vector double)
vector double vec_andc(vector double, vector bool long long)
vector double vec_andc(vector double, vector double)
vector signed long long vec_andc(vector signed long long, vector signed long long)
vector double vec_ceil(vector double)
vector bool long long vec_cmpeq(vector double, vector double)
vector bool long long vec_cmpge(vector double, vector double)
vector bool long long vec_cmpge(vector signed long long, vector signed long long)
vector bool long long vec_cmpge(vector unsigned long long, vector unsigned long long)
vector bool long long vec_cmpgt(vector double, vector double)
vector bool long long vec_cmple(vector double, vector double)
vector bool long long vec_cmple(vector signed long long, vector signed long long)
vector bool long long vec_cmple(vector unsigned long long, vector unsigned long long)
vector bool long long vec_cmplt(vector double, vector double)
vector bool long long vec_cmplt(vector signed long long, vector signed long long)
vector bool long long vec_cmplt(vector unsigned long long, vector unsigned long long)
llvm-svn: 240821
2015-06-27 03:27:20 +08:00
|
|
|
case PPC::BI__builtin_vsx_xvrspip:
|
|
|
|
case PPC::BI__builtin_vsx_xvrdpip:
|
Add missing builtins to altivec.h for ABI compliance (vol. 2)
This patch corresponds to review:
http://reviews.llvm.org/D10875
The bulk of the second round of additions to altivec.h.
The following interfaces were added:
vector double vec_floor(vector double)
vector double vec_madd(vector double, vector double, vector double)
vector float vec_msub(vector float, vector float, vector float)
vector double vec_msub(vector double, vector double, vector double)
vector float vec_mul(vector float, vector float)
vector double vec_mul(vector double, vector double)
vector float vec_nmadd(vector float, vector float, vector float)
vector double vec_nmadd(vector double, vector double, vector double)
vector double vec_nmsub(vector double, vector double, vector double)
vector double vec_nor(vector double, vector double)
vector double vec_or(vector double, vector double)
vector float vec_rint(vector float)
vector double vec_rint(vector double)
vector float vec_nearbyint(vector float)
vector double vec_nearbyint(vector double)
vector float vec_sqrt(vector float)
vector double vec_sqrt(vector double)
vector double vec_rsqrte(vector double)
vector double vec_sel(vector double, vector double, vector unsigned long long)
vector double vec_sel(vector double, vector double, vector unsigned long long)
vector double vec_sub(vector double, vector double)
vector double vec_trunc(vector double)
vector double vec_xor(vector double, vector double)
vector double vec_xor(vector double, vector bool long long)
vector double vec_xor(vector bool long long, vector double)
New VSX paths for the following interfaces:
vector float vec_madd(vector float, vector float, vector float)
vector float vec_nmsub(vector float, vector float, vector float)
vector float vec_rsqrte(vector float)
vector float vec_trunc(vector float)
vector float vec_floor(vector float)
llvm-svn: 241399
2015-07-05 14:40:52 +08:00
|
|
|
case PPC::BI__builtin_vsx_xvrdpim:
|
|
|
|
case PPC::BI__builtin_vsx_xvrspim:
|
|
|
|
case PPC::BI__builtin_vsx_xvrdpi:
|
|
|
|
case PPC::BI__builtin_vsx_xvrspi:
|
|
|
|
case PPC::BI__builtin_vsx_xvrdpic:
|
|
|
|
case PPC::BI__builtin_vsx_xvrspic:
|
|
|
|
case PPC::BI__builtin_vsx_xvrdpiz:
|
|
|
|
case PPC::BI__builtin_vsx_xvrspiz: {
|
Add missing builtins to altivec.h for ABI compliance (vol. 1)
This patch corresponds to review:
http://reviews.llvm.org/D10637
This is the first round of additions of missing builtins listed in the ABI document. More to come (this builds onto what seurer already addes). This patch adds:
vector signed long long vec_abs(vector signed long long)
vector double vec_abs(vector double)
vector signed long long vec_add(vector signed long long, vector signed long long)
vector unsigned long long vec_add(vector unsigned long long, vector unsigned long long)
vector double vec_add(vector double, vector double)
vector double vec_and(vector bool long long, vector double)
vector double vec_and(vector double, vector bool long long)
vector double vec_and(vector double, vector double)
vector signed long long vec_and(vector signed long long, vector signed long long)
vector double vec_andc(vector bool long long, vector double)
vector double vec_andc(vector double, vector bool long long)
vector double vec_andc(vector double, vector double)
vector signed long long vec_andc(vector signed long long, vector signed long long)
vector double vec_ceil(vector double)
vector bool long long vec_cmpeq(vector double, vector double)
vector bool long long vec_cmpge(vector double, vector double)
vector bool long long vec_cmpge(vector signed long long, vector signed long long)
vector bool long long vec_cmpge(vector unsigned long long, vector unsigned long long)
vector bool long long vec_cmpgt(vector double, vector double)
vector bool long long vec_cmple(vector double, vector double)
vector bool long long vec_cmple(vector signed long long, vector signed long long)
vector bool long long vec_cmple(vector unsigned long long, vector unsigned long long)
vector bool long long vec_cmplt(vector double, vector double)
vector bool long long vec_cmplt(vector signed long long, vector signed long long)
vector bool long long vec_cmplt(vector unsigned long long, vector unsigned long long)
llvm-svn: 240821
2015-06-27 03:27:20 +08:00
|
|
|
llvm::Type *ResultType = ConvertType(E->getType());
|
|
|
|
Value *X = EmitScalarExpr(E->getArg(0));
|
Add missing builtins to altivec.h for ABI compliance (vol. 2)
This patch corresponds to review:
http://reviews.llvm.org/D10875
The bulk of the second round of additions to altivec.h.
The following interfaces were added:
vector double vec_floor(vector double)
vector double vec_madd(vector double, vector double, vector double)
vector float vec_msub(vector float, vector float, vector float)
vector double vec_msub(vector double, vector double, vector double)
vector float vec_mul(vector float, vector float)
vector double vec_mul(vector double, vector double)
vector float vec_nmadd(vector float, vector float, vector float)
vector double vec_nmadd(vector double, vector double, vector double)
vector double vec_nmsub(vector double, vector double, vector double)
vector double vec_nor(vector double, vector double)
vector double vec_or(vector double, vector double)
vector float vec_rint(vector float)
vector double vec_rint(vector double)
vector float vec_nearbyint(vector float)
vector double vec_nearbyint(vector double)
vector float vec_sqrt(vector float)
vector double vec_sqrt(vector double)
vector double vec_rsqrte(vector double)
vector double vec_sel(vector double, vector double, vector unsigned long long)
vector double vec_sel(vector double, vector double, vector unsigned long long)
vector double vec_sub(vector double, vector double)
vector double vec_trunc(vector double)
vector double vec_xor(vector double, vector double)
vector double vec_xor(vector double, vector bool long long)
vector double vec_xor(vector bool long long, vector double)
New VSX paths for the following interfaces:
vector float vec_madd(vector float, vector float, vector float)
vector float vec_nmsub(vector float, vector float, vector float)
vector float vec_rsqrte(vector float)
vector float vec_trunc(vector float)
vector float vec_floor(vector float)
llvm-svn: 241399
2015-07-05 14:40:52 +08:00
|
|
|
if (BuiltinID == PPC::BI__builtin_vsx_xvrdpim ||
|
|
|
|
BuiltinID == PPC::BI__builtin_vsx_xvrspim)
|
|
|
|
ID = Intrinsic::floor;
|
|
|
|
else if (BuiltinID == PPC::BI__builtin_vsx_xvrdpi ||
|
|
|
|
BuiltinID == PPC::BI__builtin_vsx_xvrspi)
|
|
|
|
ID = Intrinsic::round;
|
|
|
|
else if (BuiltinID == PPC::BI__builtin_vsx_xvrdpic ||
|
|
|
|
BuiltinID == PPC::BI__builtin_vsx_xvrspic)
|
|
|
|
ID = Intrinsic::nearbyint;
|
|
|
|
else if (BuiltinID == PPC::BI__builtin_vsx_xvrdpip ||
|
|
|
|
BuiltinID == PPC::BI__builtin_vsx_xvrspip)
|
|
|
|
ID = Intrinsic::ceil;
|
|
|
|
else if (BuiltinID == PPC::BI__builtin_vsx_xvrdpiz ||
|
|
|
|
BuiltinID == PPC::BI__builtin_vsx_xvrspiz)
|
|
|
|
ID = Intrinsic::trunc;
|
Add missing builtins to altivec.h for ABI compliance (vol. 1)
This patch corresponds to review:
http://reviews.llvm.org/D10637
This is the first round of additions of missing builtins listed in the ABI document. More to come (this builds onto what seurer already addes). This patch adds:
vector signed long long vec_abs(vector signed long long)
vector double vec_abs(vector double)
vector signed long long vec_add(vector signed long long, vector signed long long)
vector unsigned long long vec_add(vector unsigned long long, vector unsigned long long)
vector double vec_add(vector double, vector double)
vector double vec_and(vector bool long long, vector double)
vector double vec_and(vector double, vector bool long long)
vector double vec_and(vector double, vector double)
vector signed long long vec_and(vector signed long long, vector signed long long)
vector double vec_andc(vector bool long long, vector double)
vector double vec_andc(vector double, vector bool long long)
vector double vec_andc(vector double, vector double)
vector signed long long vec_andc(vector signed long long, vector signed long long)
vector double vec_ceil(vector double)
vector bool long long vec_cmpeq(vector double, vector double)
vector bool long long vec_cmpge(vector double, vector double)
vector bool long long vec_cmpge(vector signed long long, vector signed long long)
vector bool long long vec_cmpge(vector unsigned long long, vector unsigned long long)
vector bool long long vec_cmpgt(vector double, vector double)
vector bool long long vec_cmple(vector double, vector double)
vector bool long long vec_cmple(vector signed long long, vector signed long long)
vector bool long long vec_cmple(vector unsigned long long, vector unsigned long long)
vector bool long long vec_cmplt(vector double, vector double)
vector bool long long vec_cmplt(vector signed long long, vector signed long long)
vector bool long long vec_cmplt(vector unsigned long long, vector unsigned long long)
llvm-svn: 240821
2015-06-27 03:27:20 +08:00
|
|
|
llvm::Function *F = CGM.getIntrinsic(ID, ResultType);
|
|
|
|
return Builder.CreateCall(F, X);
|
2010-04-14 11:54:58 +08:00
|
|
|
}
|
Add missing builtins to altivec.h for ABI compliance (vol. 2)
This patch corresponds to review:
http://reviews.llvm.org/D10875
The bulk of the second round of additions to altivec.h.
The following interfaces were added:
vector double vec_floor(vector double)
vector double vec_madd(vector double, vector double, vector double)
vector float vec_msub(vector float, vector float, vector float)
vector double vec_msub(vector double, vector double, vector double)
vector float vec_mul(vector float, vector float)
vector double vec_mul(vector double, vector double)
vector float vec_nmadd(vector float, vector float, vector float)
vector double vec_nmadd(vector double, vector double, vector double)
vector double vec_nmsub(vector double, vector double, vector double)
vector double vec_nor(vector double, vector double)
vector double vec_or(vector double, vector double)
vector float vec_rint(vector float)
vector double vec_rint(vector double)
vector float vec_nearbyint(vector float)
vector double vec_nearbyint(vector double)
vector float vec_sqrt(vector float)
vector double vec_sqrt(vector double)
vector double vec_rsqrte(vector double)
vector double vec_sel(vector double, vector double, vector unsigned long long)
vector double vec_sel(vector double, vector double, vector unsigned long long)
vector double vec_sub(vector double, vector double)
vector double vec_trunc(vector double)
vector double vec_xor(vector double, vector double)
vector double vec_xor(vector double, vector bool long long)
vector double vec_xor(vector bool long long, vector double)
New VSX paths for the following interfaces:
vector float vec_madd(vector float, vector float, vector float)
vector float vec_nmsub(vector float, vector float, vector float)
vector float vec_rsqrte(vector float)
vector float vec_trunc(vector float)
vector float vec_floor(vector float)
llvm-svn: 241399
2015-07-05 14:40:52 +08:00
|
|
|
// FMA variations
|
|
|
|
case PPC::BI__builtin_vsx_xvmaddadp:
|
|
|
|
case PPC::BI__builtin_vsx_xvmaddasp:
|
|
|
|
case PPC::BI__builtin_vsx_xvnmaddadp:
|
|
|
|
case PPC::BI__builtin_vsx_xvnmaddasp:
|
|
|
|
case PPC::BI__builtin_vsx_xvmsubadp:
|
|
|
|
case PPC::BI__builtin_vsx_xvmsubasp:
|
|
|
|
case PPC::BI__builtin_vsx_xvnmsubadp:
|
|
|
|
case PPC::BI__builtin_vsx_xvnmsubasp: {
|
|
|
|
llvm::Type *ResultType = ConvertType(E->getType());
|
|
|
|
Value *X = EmitScalarExpr(E->getArg(0));
|
|
|
|
Value *Y = EmitScalarExpr(E->getArg(1));
|
|
|
|
Value *Z = EmitScalarExpr(E->getArg(2));
|
|
|
|
Value *Zero = llvm::ConstantFP::getZeroValueForNegation(ResultType);
|
|
|
|
llvm::Function *F = CGM.getIntrinsic(Intrinsic::fma, ResultType);
|
|
|
|
switch (BuiltinID) {
|
|
|
|
case PPC::BI__builtin_vsx_xvmaddadp:
|
|
|
|
case PPC::BI__builtin_vsx_xvmaddasp:
|
|
|
|
return Builder.CreateCall(F, {X, Y, Z});
|
|
|
|
case PPC::BI__builtin_vsx_xvnmaddadp:
|
|
|
|
case PPC::BI__builtin_vsx_xvnmaddasp:
|
|
|
|
return Builder.CreateFSub(Zero,
|
|
|
|
Builder.CreateCall(F, {X, Y, Z}), "sub");
|
|
|
|
case PPC::BI__builtin_vsx_xvmsubadp:
|
|
|
|
case PPC::BI__builtin_vsx_xvmsubasp:
|
|
|
|
return Builder.CreateCall(F,
|
|
|
|
{X, Y, Builder.CreateFSub(Zero, Z, "sub")});
|
|
|
|
case PPC::BI__builtin_vsx_xvnmsubadp:
|
|
|
|
case PPC::BI__builtin_vsx_xvnmsubasp:
|
|
|
|
Value *FsubRes =
|
|
|
|
Builder.CreateCall(F, {X, Y, Builder.CreateFSub(Zero, Z, "sub")});
|
|
|
|
return Builder.CreateFSub(Zero, FsubRes, "sub");
|
|
|
|
}
|
|
|
|
llvm_unreachable("Unknown FMA operation");
|
|
|
|
return nullptr; // Suppress no-return warning
|
|
|
|
}
|
|
|
|
}
|
2009-09-09 23:08:12 +08:00
|
|
|
}
|
2014-06-25 04:45:01 +08:00
|
|
|
|
2014-07-16 01:23:46 +08:00
|
|
|
// Emit an intrinsic that has 1 float or double.
|
|
|
|
static Value *emitUnaryFPBuiltin(CodeGenFunction &CGF,
|
|
|
|
const CallExpr *E,
|
|
|
|
unsigned IntrinsicID) {
|
|
|
|
llvm::Value *Src0 = CGF.EmitScalarExpr(E->getArg(0));
|
|
|
|
|
|
|
|
Value *F = CGF.CGM.getIntrinsic(IntrinsicID, Src0->getType());
|
|
|
|
return CGF.Builder.CreateCall(F, Src0);
|
|
|
|
}
|
|
|
|
|
|
|
|
// Emit an intrinsic that has 3 float or double operands.
|
|
|
|
static Value *emitTernaryFPBuiltin(CodeGenFunction &CGF,
|
|
|
|
const CallExpr *E,
|
|
|
|
unsigned IntrinsicID) {
|
|
|
|
llvm::Value *Src0 = CGF.EmitScalarExpr(E->getArg(0));
|
|
|
|
llvm::Value *Src1 = CGF.EmitScalarExpr(E->getArg(1));
|
|
|
|
llvm::Value *Src2 = CGF.EmitScalarExpr(E->getArg(2));
|
|
|
|
|
|
|
|
Value *F = CGF.CGM.getIntrinsic(IntrinsicID, Src0->getType());
|
2015-05-19 06:14:03 +08:00
|
|
|
return CGF.Builder.CreateCall(F, {Src0, Src1, Src2});
|
2014-07-16 01:23:46 +08:00
|
|
|
}
|
|
|
|
|
2014-08-16 01:44:32 +08:00
|
|
|
// Emit an intrinsic that has 1 float or double operand, and 1 integer.
|
|
|
|
static Value *emitFPIntBuiltin(CodeGenFunction &CGF,
|
|
|
|
const CallExpr *E,
|
|
|
|
unsigned IntrinsicID) {
|
|
|
|
llvm::Value *Src0 = CGF.EmitScalarExpr(E->getArg(0));
|
|
|
|
llvm::Value *Src1 = CGF.EmitScalarExpr(E->getArg(1));
|
|
|
|
|
|
|
|
Value *F = CGF.CGM.getIntrinsic(IntrinsicID, Src0->getType());
|
2015-05-19 06:14:03 +08:00
|
|
|
return CGF.Builder.CreateCall(F, {Src0, Src1});
|
2014-08-16 01:44:32 +08:00
|
|
|
}
|
|
|
|
|
2015-06-20 01:54:10 +08:00
|
|
|
Value *CodeGenFunction::EmitAMDGPUBuiltinExpr(unsigned BuiltinID,
|
|
|
|
const CallExpr *E) {
|
2014-06-25 04:45:01 +08:00
|
|
|
switch (BuiltinID) {
|
2015-06-20 01:54:10 +08:00
|
|
|
case AMDGPU::BI__builtin_amdgpu_div_scale:
|
|
|
|
case AMDGPU::BI__builtin_amdgpu_div_scalef: {
|
2014-06-25 04:45:01 +08:00
|
|
|
// Translate from the intrinsics's struct return to the builtin's out
|
|
|
|
// argument.
|
|
|
|
|
Compute and preserve alignment more faithfully in IR-generation.
Introduce an Address type to bundle a pointer value with an
alignment. Introduce APIs on CGBuilderTy to work with Address
values. Change core APIs on CGF/CGM to traffic in Address where
appropriate. Require alignments to be non-zero. Update a ton
of code to compute and propagate alignment information.
As part of this, I've promoted CGBuiltin's EmitPointerWithAlignment
helper function to CGF and made use of it in a number of places in
the expression emitter.
The end result is that we should now be significantly more correct
when performing operations on objects that are locally known to
be under-aligned. Since alignment is not reliably tracked in the
type system, there are inherent limits to this, but at least we
are no longer confused by standard operations like derived-to-base
conversions and array-to-pointer decay. I've also fixed a large
number of bugs where we were applying the complete-object alignment
to a pointer instead of the non-virtual alignment, although most of
these were hidden by the very conservative approach we took with
member alignment.
Also, because IRGen now reliably asserts on zero alignments, we
should no longer be subject to an absurd but frustrating recurring
bug where an incomplete type would report a zero alignment and then
we'd naively do a alignmentAtOffset on it and emit code using an
alignment equal to the largest power-of-two factor of the offset.
We should also now be emitting much more aggressive alignment
attributes in the presence of over-alignment. In particular,
field access now uses alignmentAtOffset instead of min.
Several times in this patch, I had to change the existing
code-generation pattern in order to more effectively use
the Address APIs. For the most part, this seems to be a strict
improvement, like doing pointer arithmetic with GEPs instead of
ptrtoint. That said, I've tried very hard to not change semantics,
but it is likely that I've failed in a few places, for which I
apologize.
ABIArgInfo now always carries the assumed alignment of indirect and
indirect byval arguments. In order to cut down on what was already
a dauntingly large patch, I changed the code to never set align
attributes in the IR on non-byval indirect arguments. That is,
we still generate code which assumes that indirect arguments have
the given alignment, but we don't express this information to the
backend except where it's semantically required (i.e. on byvals).
This is likely a minor regression for those targets that did provide
this information, but it'll be trivial to add it back in a later
patch.
I partially punted on applying this work to CGBuiltin. Please
do not add more uses of the CreateDefaultAligned{Load,Store}
APIs; they will be going away eventually.
llvm-svn: 246985
2015-09-08 16:05:57 +08:00
|
|
|
Address FlagOutPtr = EmitPointerWithAlignment(E->getArg(3));
|
2014-06-25 04:45:01 +08:00
|
|
|
|
|
|
|
llvm::Value *X = EmitScalarExpr(E->getArg(0));
|
|
|
|
llvm::Value *Y = EmitScalarExpr(E->getArg(1));
|
|
|
|
llvm::Value *Z = EmitScalarExpr(E->getArg(2));
|
|
|
|
|
|
|
|
llvm::Value *Callee = CGM.getIntrinsic(Intrinsic::AMDGPU_div_scale,
|
|
|
|
X->getType());
|
|
|
|
|
2015-05-19 06:14:03 +08:00
|
|
|
llvm::Value *Tmp = Builder.CreateCall(Callee, {X, Y, Z});
|
2014-06-25 04:45:01 +08:00
|
|
|
|
|
|
|
llvm::Value *Result = Builder.CreateExtractValue(Tmp, 0);
|
|
|
|
llvm::Value *Flag = Builder.CreateExtractValue(Tmp, 1);
|
|
|
|
|
|
|
|
llvm::Type *RealFlagType
|
Compute and preserve alignment more faithfully in IR-generation.
Introduce an Address type to bundle a pointer value with an
alignment. Introduce APIs on CGBuilderTy to work with Address
values. Change core APIs on CGF/CGM to traffic in Address where
appropriate. Require alignments to be non-zero. Update a ton
of code to compute and propagate alignment information.
As part of this, I've promoted CGBuiltin's EmitPointerWithAlignment
helper function to CGF and made use of it in a number of places in
the expression emitter.
The end result is that we should now be significantly more correct
when performing operations on objects that are locally known to
be under-aligned. Since alignment is not reliably tracked in the
type system, there are inherent limits to this, but at least we
are no longer confused by standard operations like derived-to-base
conversions and array-to-pointer decay. I've also fixed a large
number of bugs where we were applying the complete-object alignment
to a pointer instead of the non-virtual alignment, although most of
these were hidden by the very conservative approach we took with
member alignment.
Also, because IRGen now reliably asserts on zero alignments, we
should no longer be subject to an absurd but frustrating recurring
bug where an incomplete type would report a zero alignment and then
we'd naively do a alignmentAtOffset on it and emit code using an
alignment equal to the largest power-of-two factor of the offset.
We should also now be emitting much more aggressive alignment
attributes in the presence of over-alignment. In particular,
field access now uses alignmentAtOffset instead of min.
Several times in this patch, I had to change the existing
code-generation pattern in order to more effectively use
the Address APIs. For the most part, this seems to be a strict
improvement, like doing pointer arithmetic with GEPs instead of
ptrtoint. That said, I've tried very hard to not change semantics,
but it is likely that I've failed in a few places, for which I
apologize.
ABIArgInfo now always carries the assumed alignment of indirect and
indirect byval arguments. In order to cut down on what was already
a dauntingly large patch, I changed the code to never set align
attributes in the IR on non-byval indirect arguments. That is,
we still generate code which assumes that indirect arguments have
the given alignment, but we don't express this information to the
backend except where it's semantically required (i.e. on byvals).
This is likely a minor regression for those targets that did provide
this information, but it'll be trivial to add it back in a later
patch.
I partially punted on applying this work to CGBuiltin. Please
do not add more uses of the CreateDefaultAligned{Load,Store}
APIs; they will be going away eventually.
llvm-svn: 246985
2015-09-08 16:05:57 +08:00
|
|
|
= FlagOutPtr.getPointer()->getType()->getPointerElementType();
|
2014-06-25 04:45:01 +08:00
|
|
|
|
|
|
|
llvm::Value *FlagExt = Builder.CreateZExt(Flag, RealFlagType);
|
Compute and preserve alignment more faithfully in IR-generation.
Introduce an Address type to bundle a pointer value with an
alignment. Introduce APIs on CGBuilderTy to work with Address
values. Change core APIs on CGF/CGM to traffic in Address where
appropriate. Require alignments to be non-zero. Update a ton
of code to compute and propagate alignment information.
As part of this, I've promoted CGBuiltin's EmitPointerWithAlignment
helper function to CGF and made use of it in a number of places in
the expression emitter.
The end result is that we should now be significantly more correct
when performing operations on objects that are locally known to
be under-aligned. Since alignment is not reliably tracked in the
type system, there are inherent limits to this, but at least we
are no longer confused by standard operations like derived-to-base
conversions and array-to-pointer decay. I've also fixed a large
number of bugs where we were applying the complete-object alignment
to a pointer instead of the non-virtual alignment, although most of
these were hidden by the very conservative approach we took with
member alignment.
Also, because IRGen now reliably asserts on zero alignments, we
should no longer be subject to an absurd but frustrating recurring
bug where an incomplete type would report a zero alignment and then
we'd naively do a alignmentAtOffset on it and emit code using an
alignment equal to the largest power-of-two factor of the offset.
We should also now be emitting much more aggressive alignment
attributes in the presence of over-alignment. In particular,
field access now uses alignmentAtOffset instead of min.
Several times in this patch, I had to change the existing
code-generation pattern in order to more effectively use
the Address APIs. For the most part, this seems to be a strict
improvement, like doing pointer arithmetic with GEPs instead of
ptrtoint. That said, I've tried very hard to not change semantics,
but it is likely that I've failed in a few places, for which I
apologize.
ABIArgInfo now always carries the assumed alignment of indirect and
indirect byval arguments. In order to cut down on what was already
a dauntingly large patch, I changed the code to never set align
attributes in the IR on non-byval indirect arguments. That is,
we still generate code which assumes that indirect arguments have
the given alignment, but we don't express this information to the
backend except where it's semantically required (i.e. on byvals).
This is likely a minor regression for those targets that did provide
this information, but it'll be trivial to add it back in a later
patch.
I partially punted on applying this work to CGBuiltin. Please
do not add more uses of the CreateDefaultAligned{Load,Store}
APIs; they will be going away eventually.
llvm-svn: 246985
2015-09-08 16:05:57 +08:00
|
|
|
Builder.CreateStore(FlagExt, FlagOutPtr);
|
2014-06-25 04:45:01 +08:00
|
|
|
return Result;
|
2014-07-16 01:23:46 +08:00
|
|
|
}
|
2015-06-20 01:54:10 +08:00
|
|
|
case AMDGPU::BI__builtin_amdgpu_div_fmas:
|
|
|
|
case AMDGPU::BI__builtin_amdgpu_div_fmasf: {
|
2014-10-22 06:21:41 +08:00
|
|
|
llvm::Value *Src0 = EmitScalarExpr(E->getArg(0));
|
|
|
|
llvm::Value *Src1 = EmitScalarExpr(E->getArg(1));
|
|
|
|
llvm::Value *Src2 = EmitScalarExpr(E->getArg(2));
|
|
|
|
llvm::Value *Src3 = EmitScalarExpr(E->getArg(3));
|
|
|
|
|
|
|
|
llvm::Value *F = CGM.getIntrinsic(Intrinsic::AMDGPU_div_fmas,
|
|
|
|
Src0->getType());
|
|
|
|
llvm::Value *Src3ToBool = Builder.CreateIsNotNull(Src3);
|
2015-05-19 06:14:03 +08:00
|
|
|
return Builder.CreateCall(F, {Src0, Src1, Src2, Src3ToBool});
|
2014-10-22 06:21:41 +08:00
|
|
|
}
|
2015-06-20 01:54:10 +08:00
|
|
|
case AMDGPU::BI__builtin_amdgpu_div_fixup:
|
|
|
|
case AMDGPU::BI__builtin_amdgpu_div_fixupf:
|
2014-07-16 01:23:46 +08:00
|
|
|
return emitTernaryFPBuiltin(*this, E, Intrinsic::AMDGPU_div_fixup);
|
2015-06-20 01:54:10 +08:00
|
|
|
case AMDGPU::BI__builtin_amdgpu_trig_preop:
|
|
|
|
case AMDGPU::BI__builtin_amdgpu_trig_preopf:
|
2014-08-16 01:44:32 +08:00
|
|
|
return emitFPIntBuiltin(*this, E, Intrinsic::AMDGPU_trig_preop);
|
2015-06-20 01:54:10 +08:00
|
|
|
case AMDGPU::BI__builtin_amdgpu_rcp:
|
|
|
|
case AMDGPU::BI__builtin_amdgpu_rcpf:
|
2014-07-16 01:23:46 +08:00
|
|
|
return emitUnaryFPBuiltin(*this, E, Intrinsic::AMDGPU_rcp);
|
2015-06-20 01:54:10 +08:00
|
|
|
case AMDGPU::BI__builtin_amdgpu_rsq:
|
|
|
|
case AMDGPU::BI__builtin_amdgpu_rsqf:
|
2014-07-16 01:23:46 +08:00
|
|
|
return emitUnaryFPBuiltin(*this, E, Intrinsic::AMDGPU_rsq);
|
2015-06-20 01:54:10 +08:00
|
|
|
case AMDGPU::BI__builtin_amdgpu_rsq_clamped:
|
|
|
|
case AMDGPU::BI__builtin_amdgpu_rsq_clampedf:
|
2014-07-16 01:23:46 +08:00
|
|
|
return emitUnaryFPBuiltin(*this, E, Intrinsic::AMDGPU_rsq_clamped);
|
2015-06-20 01:54:10 +08:00
|
|
|
case AMDGPU::BI__builtin_amdgpu_ldexp:
|
|
|
|
case AMDGPU::BI__builtin_amdgpu_ldexpf:
|
2014-08-16 01:44:32 +08:00
|
|
|
return emitFPIntBuiltin(*this, E, Intrinsic::AMDGPU_ldexp);
|
2015-06-20 01:54:10 +08:00
|
|
|
case AMDGPU::BI__builtin_amdgpu_class:
|
|
|
|
case AMDGPU::BI__builtin_amdgpu_classf:
|
2015-01-07 07:14:57 +08:00
|
|
|
return emitFPIntBuiltin(*this, E, Intrinsic::AMDGPU_class);
|
2014-07-16 01:23:46 +08:00
|
|
|
default:
|
2014-06-25 04:45:01 +08:00
|
|
|
return nullptr;
|
|
|
|
}
|
|
|
|
}
|
2015-04-01 20:54:25 +08:00
|
|
|
|
2015-05-06 03:36:42 +08:00
|
|
|
/// Handle a SystemZ function in which the final argument is a pointer
|
|
|
|
/// to an int that receives the post-instruction CC value. At the LLVM level
|
|
|
|
/// this is represented as a function that returns a {result, cc} pair.
|
|
|
|
static Value *EmitSystemZIntrinsicWithCC(CodeGenFunction &CGF,
|
|
|
|
unsigned IntrinsicID,
|
|
|
|
const CallExpr *E) {
|
|
|
|
unsigned NumArgs = E->getNumArgs() - 1;
|
|
|
|
SmallVector<Value *, 8> Args(NumArgs);
|
|
|
|
for (unsigned I = 0; I < NumArgs; ++I)
|
|
|
|
Args[I] = CGF.EmitScalarExpr(E->getArg(I));
|
Compute and preserve alignment more faithfully in IR-generation.
Introduce an Address type to bundle a pointer value with an
alignment. Introduce APIs on CGBuilderTy to work with Address
values. Change core APIs on CGF/CGM to traffic in Address where
appropriate. Require alignments to be non-zero. Update a ton
of code to compute and propagate alignment information.
As part of this, I've promoted CGBuiltin's EmitPointerWithAlignment
helper function to CGF and made use of it in a number of places in
the expression emitter.
The end result is that we should now be significantly more correct
when performing operations on objects that are locally known to
be under-aligned. Since alignment is not reliably tracked in the
type system, there are inherent limits to this, but at least we
are no longer confused by standard operations like derived-to-base
conversions and array-to-pointer decay. I've also fixed a large
number of bugs where we were applying the complete-object alignment
to a pointer instead of the non-virtual alignment, although most of
these were hidden by the very conservative approach we took with
member alignment.
Also, because IRGen now reliably asserts on zero alignments, we
should no longer be subject to an absurd but frustrating recurring
bug where an incomplete type would report a zero alignment and then
we'd naively do a alignmentAtOffset on it and emit code using an
alignment equal to the largest power-of-two factor of the offset.
We should also now be emitting much more aggressive alignment
attributes in the presence of over-alignment. In particular,
field access now uses alignmentAtOffset instead of min.
Several times in this patch, I had to change the existing
code-generation pattern in order to more effectively use
the Address APIs. For the most part, this seems to be a strict
improvement, like doing pointer arithmetic with GEPs instead of
ptrtoint. That said, I've tried very hard to not change semantics,
but it is likely that I've failed in a few places, for which I
apologize.
ABIArgInfo now always carries the assumed alignment of indirect and
indirect byval arguments. In order to cut down on what was already
a dauntingly large patch, I changed the code to never set align
attributes in the IR on non-byval indirect arguments. That is,
we still generate code which assumes that indirect arguments have
the given alignment, but we don't express this information to the
backend except where it's semantically required (i.e. on byvals).
This is likely a minor regression for those targets that did provide
this information, but it'll be trivial to add it back in a later
patch.
I partially punted on applying this work to CGBuiltin. Please
do not add more uses of the CreateDefaultAligned{Load,Store}
APIs; they will be going away eventually.
llvm-svn: 246985
2015-09-08 16:05:57 +08:00
|
|
|
Address CCPtr = CGF.EmitPointerWithAlignment(E->getArg(NumArgs));
|
2015-05-06 03:36:42 +08:00
|
|
|
Value *F = CGF.CGM.getIntrinsic(IntrinsicID);
|
|
|
|
Value *Call = CGF.Builder.CreateCall(F, Args);
|
|
|
|
Value *CC = CGF.Builder.CreateExtractValue(Call, 1);
|
|
|
|
CGF.Builder.CreateStore(CC, CCPtr);
|
|
|
|
return CGF.Builder.CreateExtractValue(Call, 0);
|
|
|
|
}
|
|
|
|
|
2015-04-01 20:54:25 +08:00
|
|
|
Value *CodeGenFunction::EmitSystemZBuiltinExpr(unsigned BuiltinID,
|
|
|
|
const CallExpr *E) {
|
|
|
|
switch (BuiltinID) {
|
|
|
|
case SystemZ::BI__builtin_tbegin: {
|
|
|
|
Value *TDB = EmitScalarExpr(E->getArg(0));
|
|
|
|
Value *Control = llvm::ConstantInt::get(Int32Ty, 0xff0c);
|
|
|
|
Value *F = CGM.getIntrinsic(Intrinsic::s390_tbegin);
|
2015-05-19 06:14:03 +08:00
|
|
|
return Builder.CreateCall(F, {TDB, Control});
|
2015-04-01 20:54:25 +08:00
|
|
|
}
|
|
|
|
case SystemZ::BI__builtin_tbegin_nofloat: {
|
|
|
|
Value *TDB = EmitScalarExpr(E->getArg(0));
|
|
|
|
Value *Control = llvm::ConstantInt::get(Int32Ty, 0xff0c);
|
|
|
|
Value *F = CGM.getIntrinsic(Intrinsic::s390_tbegin_nofloat);
|
2015-05-19 06:14:03 +08:00
|
|
|
return Builder.CreateCall(F, {TDB, Control});
|
2015-04-01 20:54:25 +08:00
|
|
|
}
|
|
|
|
case SystemZ::BI__builtin_tbeginc: {
|
|
|
|
Value *TDB = llvm::ConstantPointerNull::get(Int8PtrTy);
|
|
|
|
Value *Control = llvm::ConstantInt::get(Int32Ty, 0xff08);
|
|
|
|
Value *F = CGM.getIntrinsic(Intrinsic::s390_tbeginc);
|
2015-05-19 06:14:03 +08:00
|
|
|
return Builder.CreateCall(F, {TDB, Control});
|
2015-04-01 20:54:25 +08:00
|
|
|
}
|
|
|
|
case SystemZ::BI__builtin_tabort: {
|
|
|
|
Value *Data = EmitScalarExpr(E->getArg(0));
|
|
|
|
Value *F = CGM.getIntrinsic(Intrinsic::s390_tabort);
|
|
|
|
return Builder.CreateCall(F, Builder.CreateSExt(Data, Int64Ty, "tabort"));
|
|
|
|
}
|
|
|
|
case SystemZ::BI__builtin_non_tx_store: {
|
|
|
|
Value *Address = EmitScalarExpr(E->getArg(0));
|
|
|
|
Value *Data = EmitScalarExpr(E->getArg(1));
|
|
|
|
Value *F = CGM.getIntrinsic(Intrinsic::s390_ntstg);
|
2015-05-19 06:14:03 +08:00
|
|
|
return Builder.CreateCall(F, {Data, Address});
|
2015-04-01 20:54:25 +08:00
|
|
|
}
|
|
|
|
|
2015-05-06 03:36:42 +08:00
|
|
|
// Vector builtins. Note that most vector builtins are mapped automatically
|
|
|
|
// to target-specific LLVM intrinsics. The ones handled specially here can
|
|
|
|
// be represented via standard LLVM IR, which is preferable to enable common
|
|
|
|
// LLVM optimizations.
|
|
|
|
|
|
|
|
case SystemZ::BI__builtin_s390_vpopctb:
|
|
|
|
case SystemZ::BI__builtin_s390_vpopcth:
|
|
|
|
case SystemZ::BI__builtin_s390_vpopctf:
|
|
|
|
case SystemZ::BI__builtin_s390_vpopctg: {
|
|
|
|
llvm::Type *ResultType = ConvertType(E->getType());
|
|
|
|
Value *X = EmitScalarExpr(E->getArg(0));
|
|
|
|
Function *F = CGM.getIntrinsic(Intrinsic::ctpop, ResultType);
|
|
|
|
return Builder.CreateCall(F, X);
|
|
|
|
}
|
|
|
|
|
|
|
|
case SystemZ::BI__builtin_s390_vclzb:
|
|
|
|
case SystemZ::BI__builtin_s390_vclzh:
|
|
|
|
case SystemZ::BI__builtin_s390_vclzf:
|
|
|
|
case SystemZ::BI__builtin_s390_vclzg: {
|
|
|
|
llvm::Type *ResultType = ConvertType(E->getType());
|
|
|
|
Value *X = EmitScalarExpr(E->getArg(0));
|
|
|
|
Value *Undef = ConstantInt::get(Builder.getInt1Ty(), false);
|
|
|
|
Function *F = CGM.getIntrinsic(Intrinsic::ctlz, ResultType);
|
2015-05-19 06:14:03 +08:00
|
|
|
return Builder.CreateCall(F, {X, Undef});
|
2015-05-06 03:36:42 +08:00
|
|
|
}
|
|
|
|
|
|
|
|
case SystemZ::BI__builtin_s390_vctzb:
|
|
|
|
case SystemZ::BI__builtin_s390_vctzh:
|
|
|
|
case SystemZ::BI__builtin_s390_vctzf:
|
|
|
|
case SystemZ::BI__builtin_s390_vctzg: {
|
|
|
|
llvm::Type *ResultType = ConvertType(E->getType());
|
|
|
|
Value *X = EmitScalarExpr(E->getArg(0));
|
|
|
|
Value *Undef = ConstantInt::get(Builder.getInt1Ty(), false);
|
|
|
|
Function *F = CGM.getIntrinsic(Intrinsic::cttz, ResultType);
|
2015-05-19 06:14:03 +08:00
|
|
|
return Builder.CreateCall(F, {X, Undef});
|
2015-05-06 03:36:42 +08:00
|
|
|
}
|
|
|
|
|
|
|
|
case SystemZ::BI__builtin_s390_vfsqdb: {
|
|
|
|
llvm::Type *ResultType = ConvertType(E->getType());
|
|
|
|
Value *X = EmitScalarExpr(E->getArg(0));
|
|
|
|
Function *F = CGM.getIntrinsic(Intrinsic::sqrt, ResultType);
|
|
|
|
return Builder.CreateCall(F, X);
|
|
|
|
}
|
|
|
|
case SystemZ::BI__builtin_s390_vfmadb: {
|
|
|
|
llvm::Type *ResultType = ConvertType(E->getType());
|
|
|
|
Value *X = EmitScalarExpr(E->getArg(0));
|
|
|
|
Value *Y = EmitScalarExpr(E->getArg(1));
|
|
|
|
Value *Z = EmitScalarExpr(E->getArg(2));
|
|
|
|
Function *F = CGM.getIntrinsic(Intrinsic::fma, ResultType);
|
2015-05-19 06:14:03 +08:00
|
|
|
return Builder.CreateCall(F, {X, Y, Z});
|
2015-05-06 03:36:42 +08:00
|
|
|
}
|
|
|
|
case SystemZ::BI__builtin_s390_vfmsdb: {
|
|
|
|
llvm::Type *ResultType = ConvertType(E->getType());
|
|
|
|
Value *X = EmitScalarExpr(E->getArg(0));
|
|
|
|
Value *Y = EmitScalarExpr(E->getArg(1));
|
|
|
|
Value *Z = EmitScalarExpr(E->getArg(2));
|
|
|
|
Value *Zero = llvm::ConstantFP::getZeroValueForNegation(ResultType);
|
|
|
|
Function *F = CGM.getIntrinsic(Intrinsic::fma, ResultType);
|
2015-05-19 06:14:03 +08:00
|
|
|
return Builder.CreateCall(F, {X, Y, Builder.CreateFSub(Zero, Z, "sub")});
|
2015-05-06 03:36:42 +08:00
|
|
|
}
|
|
|
|
case SystemZ::BI__builtin_s390_vflpdb: {
|
|
|
|
llvm::Type *ResultType = ConvertType(E->getType());
|
|
|
|
Value *X = EmitScalarExpr(E->getArg(0));
|
|
|
|
Function *F = CGM.getIntrinsic(Intrinsic::fabs, ResultType);
|
|
|
|
return Builder.CreateCall(F, X);
|
|
|
|
}
|
|
|
|
case SystemZ::BI__builtin_s390_vflndb: {
|
|
|
|
llvm::Type *ResultType = ConvertType(E->getType());
|
|
|
|
Value *X = EmitScalarExpr(E->getArg(0));
|
|
|
|
Value *Zero = llvm::ConstantFP::getZeroValueForNegation(ResultType);
|
|
|
|
Function *F = CGM.getIntrinsic(Intrinsic::fabs, ResultType);
|
|
|
|
return Builder.CreateFSub(Zero, Builder.CreateCall(F, X), "sub");
|
|
|
|
}
|
|
|
|
case SystemZ::BI__builtin_s390_vfidb: {
|
|
|
|
llvm::Type *ResultType = ConvertType(E->getType());
|
|
|
|
Value *X = EmitScalarExpr(E->getArg(0));
|
|
|
|
// Constant-fold the M4 and M5 mask arguments.
|
|
|
|
llvm::APSInt M4, M5;
|
|
|
|
bool IsConstM4 = E->getArg(1)->isIntegerConstantExpr(M4, getContext());
|
|
|
|
bool IsConstM5 = E->getArg(2)->isIntegerConstantExpr(M5, getContext());
|
|
|
|
assert(IsConstM4 && IsConstM5 && "Constant arg isn't actually constant?");
|
|
|
|
(void)IsConstM4; (void)IsConstM5;
|
|
|
|
// Check whether this instance of vfidb can be represented via a LLVM
|
|
|
|
// standard intrinsic. We only support some combinations of M4 and M5.
|
|
|
|
Intrinsic::ID ID = Intrinsic::not_intrinsic;
|
|
|
|
switch (M4.getZExtValue()) {
|
|
|
|
default: break;
|
|
|
|
case 0: // IEEE-inexact exception allowed
|
|
|
|
switch (M5.getZExtValue()) {
|
|
|
|
default: break;
|
|
|
|
case 0: ID = Intrinsic::rint; break;
|
|
|
|
}
|
|
|
|
break;
|
|
|
|
case 4: // IEEE-inexact exception suppressed
|
|
|
|
switch (M5.getZExtValue()) {
|
|
|
|
default: break;
|
|
|
|
case 0: ID = Intrinsic::nearbyint; break;
|
|
|
|
case 1: ID = Intrinsic::round; break;
|
|
|
|
case 5: ID = Intrinsic::trunc; break;
|
|
|
|
case 6: ID = Intrinsic::ceil; break;
|
|
|
|
case 7: ID = Intrinsic::floor; break;
|
|
|
|
}
|
|
|
|
break;
|
|
|
|
}
|
|
|
|
if (ID != Intrinsic::not_intrinsic) {
|
|
|
|
Function *F = CGM.getIntrinsic(ID, ResultType);
|
|
|
|
return Builder.CreateCall(F, X);
|
|
|
|
}
|
|
|
|
Function *F = CGM.getIntrinsic(Intrinsic::s390_vfidb);
|
|
|
|
Value *M4Value = llvm::ConstantInt::get(getLLVMContext(), M4);
|
|
|
|
Value *M5Value = llvm::ConstantInt::get(getLLVMContext(), M5);
|
2015-05-19 06:14:03 +08:00
|
|
|
return Builder.CreateCall(F, {X, M4Value, M5Value});
|
2015-05-06 03:36:42 +08:00
|
|
|
}
|
|
|
|
|
|
|
|
// Vector intrisincs that output the post-instruction CC value.
|
|
|
|
|
|
|
|
#define INTRINSIC_WITH_CC(NAME) \
|
|
|
|
case SystemZ::BI__builtin_##NAME: \
|
|
|
|
return EmitSystemZIntrinsicWithCC(*this, Intrinsic::NAME, E)
|
|
|
|
|
|
|
|
INTRINSIC_WITH_CC(s390_vpkshs);
|
|
|
|
INTRINSIC_WITH_CC(s390_vpksfs);
|
|
|
|
INTRINSIC_WITH_CC(s390_vpksgs);
|
|
|
|
|
|
|
|
INTRINSIC_WITH_CC(s390_vpklshs);
|
|
|
|
INTRINSIC_WITH_CC(s390_vpklsfs);
|
|
|
|
INTRINSIC_WITH_CC(s390_vpklsgs);
|
|
|
|
|
|
|
|
INTRINSIC_WITH_CC(s390_vceqbs);
|
|
|
|
INTRINSIC_WITH_CC(s390_vceqhs);
|
|
|
|
INTRINSIC_WITH_CC(s390_vceqfs);
|
|
|
|
INTRINSIC_WITH_CC(s390_vceqgs);
|
|
|
|
|
|
|
|
INTRINSIC_WITH_CC(s390_vchbs);
|
|
|
|
INTRINSIC_WITH_CC(s390_vchhs);
|
|
|
|
INTRINSIC_WITH_CC(s390_vchfs);
|
|
|
|
INTRINSIC_WITH_CC(s390_vchgs);
|
|
|
|
|
|
|
|
INTRINSIC_WITH_CC(s390_vchlbs);
|
|
|
|
INTRINSIC_WITH_CC(s390_vchlhs);
|
|
|
|
INTRINSIC_WITH_CC(s390_vchlfs);
|
|
|
|
INTRINSIC_WITH_CC(s390_vchlgs);
|
|
|
|
|
|
|
|
INTRINSIC_WITH_CC(s390_vfaebs);
|
|
|
|
INTRINSIC_WITH_CC(s390_vfaehs);
|
|
|
|
INTRINSIC_WITH_CC(s390_vfaefs);
|
|
|
|
|
|
|
|
INTRINSIC_WITH_CC(s390_vfaezbs);
|
|
|
|
INTRINSIC_WITH_CC(s390_vfaezhs);
|
|
|
|
INTRINSIC_WITH_CC(s390_vfaezfs);
|
|
|
|
|
|
|
|
INTRINSIC_WITH_CC(s390_vfeebs);
|
|
|
|
INTRINSIC_WITH_CC(s390_vfeehs);
|
|
|
|
INTRINSIC_WITH_CC(s390_vfeefs);
|
|
|
|
|
|
|
|
INTRINSIC_WITH_CC(s390_vfeezbs);
|
|
|
|
INTRINSIC_WITH_CC(s390_vfeezhs);
|
|
|
|
INTRINSIC_WITH_CC(s390_vfeezfs);
|
|
|
|
|
|
|
|
INTRINSIC_WITH_CC(s390_vfenebs);
|
|
|
|
INTRINSIC_WITH_CC(s390_vfenehs);
|
|
|
|
INTRINSIC_WITH_CC(s390_vfenefs);
|
|
|
|
|
|
|
|
INTRINSIC_WITH_CC(s390_vfenezbs);
|
|
|
|
INTRINSIC_WITH_CC(s390_vfenezhs);
|
|
|
|
INTRINSIC_WITH_CC(s390_vfenezfs);
|
|
|
|
|
|
|
|
INTRINSIC_WITH_CC(s390_vistrbs);
|
|
|
|
INTRINSIC_WITH_CC(s390_vistrhs);
|
|
|
|
INTRINSIC_WITH_CC(s390_vistrfs);
|
|
|
|
|
|
|
|
INTRINSIC_WITH_CC(s390_vstrcbs);
|
|
|
|
INTRINSIC_WITH_CC(s390_vstrchs);
|
|
|
|
INTRINSIC_WITH_CC(s390_vstrcfs);
|
|
|
|
|
|
|
|
INTRINSIC_WITH_CC(s390_vstrczbs);
|
|
|
|
INTRINSIC_WITH_CC(s390_vstrczhs);
|
|
|
|
INTRINSIC_WITH_CC(s390_vstrczfs);
|
|
|
|
|
|
|
|
INTRINSIC_WITH_CC(s390_vfcedbs);
|
|
|
|
INTRINSIC_WITH_CC(s390_vfchdbs);
|
|
|
|
INTRINSIC_WITH_CC(s390_vfchedbs);
|
|
|
|
|
|
|
|
INTRINSIC_WITH_CC(s390_vftcidb);
|
|
|
|
|
|
|
|
#undef INTRINSIC_WITH_CC
|
|
|
|
|
2015-04-01 20:54:25 +08:00
|
|
|
default:
|
|
|
|
return nullptr;
|
|
|
|
}
|
|
|
|
}
|
2015-06-26 02:29:42 +08:00
|
|
|
|
|
|
|
Value *CodeGenFunction::EmitNVPTXBuiltinExpr(unsigned BuiltinID,
|
|
|
|
const CallExpr *E) {
|
|
|
|
switch (BuiltinID) {
|
|
|
|
case NVPTX::BI__nvvm_atom_add_gen_i:
|
|
|
|
case NVPTX::BI__nvvm_atom_add_gen_l:
|
|
|
|
case NVPTX::BI__nvvm_atom_add_gen_ll:
|
|
|
|
return MakeBinaryAtomicValue(*this, llvm::AtomicRMWInst::Add, E);
|
|
|
|
|
|
|
|
case NVPTX::BI__nvvm_atom_sub_gen_i:
|
|
|
|
case NVPTX::BI__nvvm_atom_sub_gen_l:
|
|
|
|
case NVPTX::BI__nvvm_atom_sub_gen_ll:
|
|
|
|
return MakeBinaryAtomicValue(*this, llvm::AtomicRMWInst::Sub, E);
|
|
|
|
|
|
|
|
case NVPTX::BI__nvvm_atom_and_gen_i:
|
|
|
|
case NVPTX::BI__nvvm_atom_and_gen_l:
|
|
|
|
case NVPTX::BI__nvvm_atom_and_gen_ll:
|
|
|
|
return MakeBinaryAtomicValue(*this, llvm::AtomicRMWInst::And, E);
|
|
|
|
|
|
|
|
case NVPTX::BI__nvvm_atom_or_gen_i:
|
|
|
|
case NVPTX::BI__nvvm_atom_or_gen_l:
|
|
|
|
case NVPTX::BI__nvvm_atom_or_gen_ll:
|
|
|
|
return MakeBinaryAtomicValue(*this, llvm::AtomicRMWInst::Or, E);
|
|
|
|
|
|
|
|
case NVPTX::BI__nvvm_atom_xor_gen_i:
|
|
|
|
case NVPTX::BI__nvvm_atom_xor_gen_l:
|
|
|
|
case NVPTX::BI__nvvm_atom_xor_gen_ll:
|
|
|
|
return MakeBinaryAtomicValue(*this, llvm::AtomicRMWInst::Xor, E);
|
|
|
|
|
|
|
|
case NVPTX::BI__nvvm_atom_xchg_gen_i:
|
|
|
|
case NVPTX::BI__nvvm_atom_xchg_gen_l:
|
|
|
|
case NVPTX::BI__nvvm_atom_xchg_gen_ll:
|
|
|
|
return MakeBinaryAtomicValue(*this, llvm::AtomicRMWInst::Xchg, E);
|
|
|
|
|
|
|
|
case NVPTX::BI__nvvm_atom_max_gen_i:
|
|
|
|
case NVPTX::BI__nvvm_atom_max_gen_l:
|
|
|
|
case NVPTX::BI__nvvm_atom_max_gen_ll:
|
2015-09-01 01:25:51 +08:00
|
|
|
return MakeBinaryAtomicValue(*this, llvm::AtomicRMWInst::Max, E);
|
|
|
|
|
2015-06-26 02:29:42 +08:00
|
|
|
case NVPTX::BI__nvvm_atom_max_gen_ui:
|
|
|
|
case NVPTX::BI__nvvm_atom_max_gen_ul:
|
|
|
|
case NVPTX::BI__nvvm_atom_max_gen_ull:
|
2015-09-01 01:25:51 +08:00
|
|
|
return MakeBinaryAtomicValue(*this, llvm::AtomicRMWInst::UMax, E);
|
2015-06-26 02:29:42 +08:00
|
|
|
|
|
|
|
case NVPTX::BI__nvvm_atom_min_gen_i:
|
|
|
|
case NVPTX::BI__nvvm_atom_min_gen_l:
|
|
|
|
case NVPTX::BI__nvvm_atom_min_gen_ll:
|
2015-09-01 01:25:51 +08:00
|
|
|
return MakeBinaryAtomicValue(*this, llvm::AtomicRMWInst::Min, E);
|
|
|
|
|
2015-06-26 02:29:42 +08:00
|
|
|
case NVPTX::BI__nvvm_atom_min_gen_ui:
|
|
|
|
case NVPTX::BI__nvvm_atom_min_gen_ul:
|
|
|
|
case NVPTX::BI__nvvm_atom_min_gen_ull:
|
2015-09-01 01:25:51 +08:00
|
|
|
return MakeBinaryAtomicValue(*this, llvm::AtomicRMWInst::UMin, E);
|
2015-06-26 02:29:42 +08:00
|
|
|
|
|
|
|
case NVPTX::BI__nvvm_atom_cas_gen_i:
|
|
|
|
case NVPTX::BI__nvvm_atom_cas_gen_l:
|
|
|
|
case NVPTX::BI__nvvm_atom_cas_gen_ll:
|
2015-10-01 05:49:32 +08:00
|
|
|
// __nvvm_atom_cas_gen_* should return the old value rather than the
|
|
|
|
// success flag.
|
|
|
|
return MakeAtomicCmpXchgValue(*this, E, /*ReturnBool=*/false);
|
2015-06-26 02:29:42 +08:00
|
|
|
|
|
|
|
case NVPTX::BI__nvvm_atom_add_gen_f: {
|
|
|
|
Value *Ptr = EmitScalarExpr(E->getArg(0));
|
|
|
|
Value *Val = EmitScalarExpr(E->getArg(1));
|
|
|
|
// atomicrmw only deals with integer arguments so we need to use
|
|
|
|
// LLVM's nvvm_atomic_load_add_f32 intrinsic for that.
|
|
|
|
Value *FnALAF32 =
|
|
|
|
CGM.getIntrinsic(Intrinsic::nvvm_atomic_load_add_f32, Ptr->getType());
|
|
|
|
return Builder.CreateCall(FnALAF32, {Ptr, Val});
|
|
|
|
}
|
|
|
|
|
|
|
|
default:
|
|
|
|
return nullptr;
|
|
|
|
}
|
|
|
|
}
|
2015-09-04 06:51:53 +08:00
|
|
|
|
|
|
|
Value *CodeGenFunction::EmitWebAssemblyBuiltinExpr(unsigned BuiltinID,
|
|
|
|
const CallExpr *E) {
|
|
|
|
switch (BuiltinID) {
|
|
|
|
case WebAssembly::BI__builtin_wasm_page_size: {
|
|
|
|
llvm::Type *ResultType = ConvertType(E->getType());
|
|
|
|
Value *Callee = CGM.getIntrinsic(Intrinsic::wasm_page_size, ResultType);
|
|
|
|
return Builder.CreateCall(Callee);
|
|
|
|
}
|
2015-10-03 03:38:47 +08:00
|
|
|
case WebAssembly::BI__builtin_wasm_memory_size: {
|
|
|
|
llvm::Type *ResultType = ConvertType(E->getType());
|
|
|
|
Value *Callee = CGM.getIntrinsic(Intrinsic::wasm_memory_size, ResultType);
|
|
|
|
return Builder.CreateCall(Callee);
|
|
|
|
}
|
2015-10-03 04:20:01 +08:00
|
|
|
case WebAssembly::BI__builtin_wasm_resize_memory: {
|
|
|
|
Value *X = EmitScalarExpr(E->getArg(0));
|
|
|
|
Value *Callee = CGM.getIntrinsic(Intrinsic::wasm_resize_memory, X->getType());
|
|
|
|
return Builder.CreateCall(Callee, X);
|
|
|
|
}
|
2015-09-04 06:51:53 +08:00
|
|
|
|
|
|
|
default:
|
|
|
|
return nullptr;
|
|
|
|
}
|
|
|
|
}
|