2013-10-02 05:51:38 +08:00
|
|
|
//===--- CGCall.cpp - Encapsulate calling convention details --------------===//
|
2008-09-09 05:33:45 +08:00
|
|
|
//
|
|
|
|
// The LLVM Compiler Infrastructure
|
|
|
|
//
|
|
|
|
// This file is distributed under the University of Illinois Open Source
|
|
|
|
// License. See LICENSE.TXT for details.
|
|
|
|
//
|
|
|
|
//===----------------------------------------------------------------------===//
|
|
|
|
//
|
|
|
|
// These classes wrap the information about a call or function
|
|
|
|
// definition used to handle ABI compliancy.
|
|
|
|
//
|
|
|
|
//===----------------------------------------------------------------------===//
|
|
|
|
|
|
|
|
#include "CGCall.h"
|
2010-06-30 00:40:28 +08:00
|
|
|
#include "ABIInfo.h"
|
2012-12-04 17:13:33 +08:00
|
|
|
#include "CGCXXABI.h"
|
2008-09-09 05:33:45 +08:00
|
|
|
#include "CodeGenFunction.h"
|
2008-09-10 08:41:16 +08:00
|
|
|
#include "CodeGenModule.h"
|
2012-02-17 11:33:10 +08:00
|
|
|
#include "TargetInfo.h"
|
2008-09-09 05:33:45 +08:00
|
|
|
#include "clang/AST/Decl.h"
|
2009-04-04 06:48:58 +08:00
|
|
|
#include "clang/AST/DeclCXX.h"
|
2008-09-09 05:33:45 +08:00
|
|
|
#include "clang/AST/DeclObjC.h"
|
2012-12-04 17:13:33 +08:00
|
|
|
#include "clang/Basic/TargetInfo.h"
|
2013-10-31 05:53:58 +08:00
|
|
|
#include "clang/CodeGen/CGFunctionInfo.h"
|
2010-06-16 07:19:56 +08:00
|
|
|
#include "clang/Frontend/CodeGenOptions.h"
|
2013-03-01 06:49:57 +08:00
|
|
|
#include "llvm/ADT/StringExtras.h"
|
2013-01-02 19:45:17 +08:00
|
|
|
#include "llvm/IR/Attributes.h"
|
2014-03-04 19:02:08 +08:00
|
|
|
#include "llvm/IR/CallSite.h"
|
2013-01-02 19:45:17 +08:00
|
|
|
#include "llvm/IR/DataLayout.h"
|
|
|
|
#include "llvm/IR/InlineAsm.h"
|
2014-02-01 08:04:45 +08:00
|
|
|
#include "llvm/IR/Intrinsics.h"
|
2011-06-16 06:09:18 +08:00
|
|
|
#include "llvm/Transforms/Utils/Local.h"
|
2008-09-09 05:33:45 +08:00
|
|
|
using namespace clang;
|
|
|
|
using namespace CodeGen;
|
|
|
|
|
|
|
|
/***/
|
|
|
|
|
2010-02-06 05:31:56 +08:00
|
|
|
static unsigned ClangCallConvToLLVMCallConv(CallingConv CC) {
|
|
|
|
switch (CC) {
|
|
|
|
default: return llvm::CallingConv::C;
|
|
|
|
case CC_X86StdCall: return llvm::CallingConv::X86_StdCall;
|
|
|
|
case CC_X86FastCall: return llvm::CallingConv::X86_FastCall;
|
2010-05-19 00:57:00 +08:00
|
|
|
case CC_X86ThisCall: return llvm::CallingConv::X86_ThisCall;
|
2013-08-30 12:39:01 +08:00
|
|
|
case CC_X86_64Win64: return llvm::CallingConv::X86_64_Win64;
|
|
|
|
case CC_X86_64SysV: return llvm::CallingConv::X86_64_SysV;
|
2011-04-15 04:06:49 +08:00
|
|
|
case CC_AAPCS: return llvm::CallingConv::ARM_AAPCS;
|
|
|
|
case CC_AAPCS_VFP: return llvm::CallingConv::ARM_AAPCS_VFP;
|
2012-12-25 16:53:55 +08:00
|
|
|
case CC_IntelOclBicc: return llvm::CallingConv::Intel_OCL_BI;
|
2014-10-25 01:42:17 +08:00
|
|
|
// TODO: Add support for __pascal to LLVM.
|
|
|
|
case CC_X86Pascal: return llvm::CallingConv::C;
|
|
|
|
// TODO: Add support for __vectorcall to LLVM.
|
2014-11-01 06:00:51 +08:00
|
|
|
case CC_X86VectorCall: return llvm::CallingConv::X86_VectorCall;
|
2015-01-20 19:20:41 +08:00
|
|
|
case CC_SpirFunction: return llvm::CallingConv::SPIR_FUNC;
|
|
|
|
case CC_SpirKernel: return llvm::CallingConv::SPIR_KERNEL;
|
2010-02-06 05:31:56 +08:00
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2010-02-24 15:14:12 +08:00
|
|
|
/// Derives the 'this' type for codegen purposes, i.e. ignoring method
|
|
|
|
/// qualification.
|
|
|
|
/// FIXME: address space qualification?
|
2010-02-26 08:48:12 +08:00
|
|
|
static CanQualType GetThisType(ASTContext &Context, const CXXRecordDecl *RD) {
|
|
|
|
QualType RecTy = Context.getTagDeclType(RD)->getCanonicalTypeInternal();
|
|
|
|
return Context.getPointerType(CanQualType::CreateUnsafe(RecTy));
|
2010-02-24 15:14:12 +08:00
|
|
|
}
|
|
|
|
|
|
|
|
/// Returns the canonical formal type of the given C++ method.
|
2010-02-26 08:48:12 +08:00
|
|
|
static CanQual<FunctionProtoType> GetFormalType(const CXXMethodDecl *MD) {
|
|
|
|
return MD->getType()->getCanonicalTypeUnqualified()
|
|
|
|
.getAs<FunctionProtoType>();
|
2010-02-24 15:14:12 +08:00
|
|
|
}
|
|
|
|
|
|
|
|
/// Returns the "extra-canonicalized" return type, which discards
|
|
|
|
/// qualifiers on the return type. Codegen doesn't care about them,
|
|
|
|
/// and it makes ABI code a little easier to be able to assume that
|
|
|
|
/// all parameter and return types are top-level unqualified.
|
2010-02-26 08:48:12 +08:00
|
|
|
static CanQualType GetReturnType(QualType RetTy) {
|
|
|
|
return RetTy->getCanonicalTypeUnqualified().getUnqualifiedType();
|
2010-02-24 15:14:12 +08:00
|
|
|
}
|
|
|
|
|
2012-07-07 14:41:13 +08:00
|
|
|
/// Arrange the argument and result information for a value of the given
|
|
|
|
/// unprototyped freestanding function type.
|
2010-02-24 15:14:12 +08:00
|
|
|
const CGFunctionInfo &
|
2012-07-07 14:41:13 +08:00
|
|
|
CodeGenTypes::arrangeFreeFunctionType(CanQual<FunctionNoProtoType> FTNP) {
|
2012-02-17 11:33:10 +08:00
|
|
|
// When translating an unprototyped function type, always use a
|
|
|
|
// variadic type.
|
2014-01-26 00:55:45 +08:00
|
|
|
return arrangeLLVMFunctionInfo(FTNP->getReturnType().getUnqualifiedType(),
|
2014-12-13 07:41:25 +08:00
|
|
|
/*instanceMethod=*/false,
|
|
|
|
/*chainCall=*/false, None,
|
|
|
|
FTNP->getExtInfo(), RequiredArgs(0));
|
2012-02-17 11:33:10 +08:00
|
|
|
}
|
|
|
|
|
2012-07-07 14:41:13 +08:00
|
|
|
/// Arrange the LLVM function layout for a value of the given function
|
2014-08-14 07:55:54 +08:00
|
|
|
/// type, on top of any implicit parameters already stored.
|
|
|
|
static const CGFunctionInfo &
|
2014-12-13 07:41:25 +08:00
|
|
|
arrangeLLVMFunctionInfo(CodeGenTypes &CGT, bool instanceMethod,
|
2014-08-14 07:55:54 +08:00
|
|
|
SmallVectorImpl<CanQualType> &prefix,
|
|
|
|
CanQual<FunctionProtoType> FTP) {
|
2012-07-07 14:41:13 +08:00
|
|
|
RequiredArgs required = RequiredArgs::forPrototypePlus(FTP, prefix.size());
|
2009-02-03 07:23:47 +08:00
|
|
|
// FIXME: Kill copy.
|
2015-02-18 00:48:30 +08:00
|
|
|
prefix.append(FTP->param_type_begin(), FTP->param_type_end());
|
2014-01-26 00:55:45 +08:00
|
|
|
CanQualType resultType = FTP->getReturnType().getUnqualifiedType();
|
2014-12-13 07:41:25 +08:00
|
|
|
return CGT.arrangeLLVMFunctionInfo(resultType, instanceMethod,
|
|
|
|
/*chainCall=*/false, prefix,
|
2014-08-14 07:55:54 +08:00
|
|
|
FTP->getExtInfo(), required);
|
2010-02-24 15:14:12 +08:00
|
|
|
}
|
|
|
|
|
2012-02-17 11:33:10 +08:00
|
|
|
/// Arrange the argument and result information for a value of the
|
2012-07-07 14:41:13 +08:00
|
|
|
/// given freestanding function type.
|
2010-02-24 15:14:12 +08:00
|
|
|
const CGFunctionInfo &
|
2012-07-07 14:41:13 +08:00
|
|
|
CodeGenTypes::arrangeFreeFunctionType(CanQual<FunctionProtoType> FTP) {
|
2012-02-17 11:33:10 +08:00
|
|
|
SmallVector<CanQualType, 16> argTypes;
|
2014-12-13 07:41:25 +08:00
|
|
|
return ::arrangeLLVMFunctionInfo(*this, /*instanceMethod=*/false, argTypes,
|
|
|
|
FTP);
|
2009-09-12 06:24:53 +08:00
|
|
|
}
|
|
|
|
|
2013-12-19 00:23:37 +08:00
|
|
|
static CallingConv getCallingConventionForDecl(const Decl *D, bool IsWindows) {
|
2009-09-12 06:24:53 +08:00
|
|
|
// Set the appropriate calling convention for the Function.
|
|
|
|
if (D->hasAttr<StdCallAttr>())
|
2010-02-06 05:31:56 +08:00
|
|
|
return CC_X86StdCall;
|
2009-09-12 06:24:53 +08:00
|
|
|
|
|
|
|
if (D->hasAttr<FastCallAttr>())
|
2010-02-06 05:31:56 +08:00
|
|
|
return CC_X86FastCall;
|
2009-09-12 06:24:53 +08:00
|
|
|
|
2010-05-19 00:57:00 +08:00
|
|
|
if (D->hasAttr<ThisCallAttr>())
|
|
|
|
return CC_X86ThisCall;
|
|
|
|
|
2014-10-25 01:42:17 +08:00
|
|
|
if (D->hasAttr<VectorCallAttr>())
|
|
|
|
return CC_X86VectorCall;
|
|
|
|
|
2010-09-03 09:29:35 +08:00
|
|
|
if (D->hasAttr<PascalAttr>())
|
|
|
|
return CC_X86Pascal;
|
|
|
|
|
2011-04-15 04:06:49 +08:00
|
|
|
if (PcsAttr *PCS = D->getAttr<PcsAttr>())
|
|
|
|
return (PCS->getPCS() == PcsAttr::AAPCS ? CC_AAPCS : CC_AAPCS_VFP);
|
|
|
|
|
2012-12-25 16:53:55 +08:00
|
|
|
if (D->hasAttr<IntelOclBiccAttr>())
|
|
|
|
return CC_IntelOclBicc;
|
|
|
|
|
2013-12-19 00:23:37 +08:00
|
|
|
if (D->hasAttr<MSABIAttr>())
|
|
|
|
return IsWindows ? CC_C : CC_X86_64Win64;
|
|
|
|
|
|
|
|
if (D->hasAttr<SysVABIAttr>())
|
|
|
|
return IsWindows ? CC_X86_64SysV : CC_C;
|
|
|
|
|
2010-02-06 05:31:56 +08:00
|
|
|
return CC_C;
|
2008-09-10 12:01:49 +08:00
|
|
|
}
|
|
|
|
|
2012-02-17 11:33:10 +08:00
|
|
|
/// Arrange the argument and result information for a call to an
|
|
|
|
/// unknown C++ non-static member function of the given abstract type.
|
2013-08-21 14:25:03 +08:00
|
|
|
/// (Zero value of RD means we don't have any meaningful "this" argument type,
|
|
|
|
/// so fall back to a generic pointer type).
|
2012-02-17 11:33:10 +08:00
|
|
|
/// The member function must be an ordinary function, i.e. not a
|
|
|
|
/// constructor or destructor.
|
|
|
|
const CGFunctionInfo &
|
|
|
|
CodeGenTypes::arrangeCXXMethodType(const CXXRecordDecl *RD,
|
|
|
|
const FunctionProtoType *FTP) {
|
|
|
|
SmallVector<CanQualType, 16> argTypes;
|
2010-02-24 15:14:12 +08:00
|
|
|
|
2009-10-04 03:43:08 +08:00
|
|
|
// Add the 'this' pointer.
|
2013-08-21 14:25:03 +08:00
|
|
|
if (RD)
|
|
|
|
argTypes.push_back(GetThisType(Context, RD));
|
|
|
|
else
|
|
|
|
argTypes.push_back(Context.VoidPtrTy);
|
2010-02-24 15:14:12 +08:00
|
|
|
|
2014-08-14 07:55:54 +08:00
|
|
|
return ::arrangeLLVMFunctionInfo(
|
|
|
|
*this, true, argTypes,
|
|
|
|
FTP->getCanonicalTypeUnqualified().getAs<FunctionProtoType>());
|
2009-10-04 03:43:08 +08:00
|
|
|
}
|
|
|
|
|
2012-02-17 11:33:10 +08:00
|
|
|
/// Arrange the argument and result information for a declaration or
|
|
|
|
/// definition of the given C++ non-static member function. The
|
|
|
|
/// member function must be an ordinary function, i.e. not a
|
|
|
|
/// constructor or destructor.
|
|
|
|
const CGFunctionInfo &
|
|
|
|
CodeGenTypes::arrangeCXXMethodDeclaration(const CXXMethodDecl *MD) {
|
2013-09-09 22:48:42 +08:00
|
|
|
assert(!isa<CXXConstructorDecl>(MD) && "wrong method for constructors!");
|
2010-09-03 09:26:39 +08:00
|
|
|
assert(!isa<CXXDestructorDecl>(MD) && "wrong method for destructors!");
|
|
|
|
|
2012-02-17 11:33:10 +08:00
|
|
|
CanQual<FunctionProtoType> prototype = GetFormalType(MD);
|
2009-09-09 23:08:12 +08:00
|
|
|
|
2012-02-17 11:33:10 +08:00
|
|
|
if (MD->isInstance()) {
|
|
|
|
// The abstract case is perfectly fine.
|
2013-10-03 04:35:23 +08:00
|
|
|
const CXXRecordDecl *ThisType = TheCXXABI.getThisArgumentTypeForMethod(MD);
|
2013-08-21 14:25:03 +08:00
|
|
|
return arrangeCXXMethodType(ThisType, prototype.getTypePtr());
|
2012-02-17 11:33:10 +08:00
|
|
|
}
|
|
|
|
|
2012-07-07 14:41:13 +08:00
|
|
|
return arrangeFreeFunctionType(prototype);
|
2009-04-04 06:48:58 +08:00
|
|
|
}
|
|
|
|
|
2012-02-17 11:33:10 +08:00
|
|
|
const CGFunctionInfo &
|
2014-09-09 00:01:27 +08:00
|
|
|
CodeGenTypes::arrangeCXXStructorDeclaration(const CXXMethodDecl *MD,
|
|
|
|
StructorType Type) {
|
|
|
|
|
2012-02-17 11:33:10 +08:00
|
|
|
SmallVector<CanQualType, 16> argTypes;
|
2014-09-09 00:01:27 +08:00
|
|
|
argTypes.push_back(GetThisType(Context, MD->getParent()));
|
2013-07-01 04:40:16 +08:00
|
|
|
|
2014-09-09 00:01:27 +08:00
|
|
|
GlobalDecl GD;
|
|
|
|
if (auto *CD = dyn_cast<CXXConstructorDecl>(MD)) {
|
|
|
|
GD = GlobalDecl(CD, toCXXCtorType(Type));
|
|
|
|
} else {
|
|
|
|
auto *DD = dyn_cast<CXXDestructorDecl>(MD);
|
|
|
|
GD = GlobalDecl(DD, toCXXDtorType(Type));
|
|
|
|
}
|
2009-11-25 11:15:49 +08:00
|
|
|
|
2014-09-09 00:01:27 +08:00
|
|
|
CanQual<FunctionProtoType> FTP = GetFormalType(MD);
|
2010-08-31 15:33:07 +08:00
|
|
|
|
|
|
|
// Add the formal parameters.
|
2015-02-18 00:48:30 +08:00
|
|
|
argTypes.append(FTP->param_type_begin(), FTP->param_type_end());
|
2010-08-31 15:33:07 +08:00
|
|
|
|
2014-09-09 00:01:27 +08:00
|
|
|
TheCXXABI.buildStructorSignature(MD, Type, argTypes);
|
2013-12-18 03:46:40 +08:00
|
|
|
|
|
|
|
RequiredArgs required =
|
2014-09-09 00:01:27 +08:00
|
|
|
(MD->isVariadic() ? RequiredArgs(argTypes.size()) : RequiredArgs::All);
|
2013-12-18 03:46:40 +08:00
|
|
|
|
2012-07-07 14:41:13 +08:00
|
|
|
FunctionType::ExtInfo extInfo = FTP->getExtInfo();
|
2014-11-01 04:09:12 +08:00
|
|
|
CanQualType resultType = TheCXXABI.HasThisReturn(GD)
|
|
|
|
? argTypes.front()
|
|
|
|
: TheCXXABI.hasMostDerivedReturn(GD)
|
|
|
|
? CGM.getContext().VoidPtrTy
|
|
|
|
: Context.VoidTy;
|
2014-12-13 07:41:25 +08:00
|
|
|
return arrangeLLVMFunctionInfo(resultType, /*instanceMethod=*/true,
|
|
|
|
/*chainCall=*/false, argTypes, extInfo,
|
|
|
|
required);
|
2009-11-25 11:15:49 +08:00
|
|
|
}
|
|
|
|
|
2014-02-01 08:04:45 +08:00
|
|
|
/// Arrange a call to a C++ method, passing the given arguments.
|
|
|
|
const CGFunctionInfo &
|
|
|
|
CodeGenTypes::arrangeCXXConstructorCall(const CallArgList &args,
|
|
|
|
const CXXConstructorDecl *D,
|
|
|
|
CXXCtorType CtorKind,
|
|
|
|
unsigned ExtraArgs) {
|
|
|
|
// FIXME: Kill copy.
|
|
|
|
SmallVector<CanQualType, 16> ArgTypes;
|
2014-08-14 04:06:24 +08:00
|
|
|
for (const auto &Arg : args)
|
|
|
|
ArgTypes.push_back(Context.getCanonicalParamType(Arg.Ty));
|
2014-02-01 08:04:45 +08:00
|
|
|
|
|
|
|
CanQual<FunctionProtoType> FPT = GetFormalType(D);
|
|
|
|
RequiredArgs Required = RequiredArgs::forPrototypePlus(FPT, 1 + ExtraArgs);
|
|
|
|
GlobalDecl GD(D, CtorKind);
|
2014-11-01 04:09:12 +08:00
|
|
|
CanQualType ResultType = TheCXXABI.HasThisReturn(GD)
|
|
|
|
? ArgTypes.front()
|
|
|
|
: TheCXXABI.hasMostDerivedReturn(GD)
|
|
|
|
? CGM.getContext().VoidPtrTy
|
|
|
|
: Context.VoidTy;
|
2014-02-01 08:04:45 +08:00
|
|
|
|
|
|
|
FunctionType::ExtInfo Info = FPT->getExtInfo();
|
2014-12-13 07:41:25 +08:00
|
|
|
return arrangeLLVMFunctionInfo(ResultType, /*instanceMethod=*/true,
|
|
|
|
/*chainCall=*/false, ArgTypes, Info,
|
|
|
|
Required);
|
2014-02-01 08:04:45 +08:00
|
|
|
}
|
|
|
|
|
2012-02-17 11:33:10 +08:00
|
|
|
/// Arrange the argument and result information for the declaration or
|
|
|
|
/// definition of the given function.
|
|
|
|
const CGFunctionInfo &
|
|
|
|
CodeGenTypes::arrangeFunctionDeclaration(const FunctionDecl *FD) {
|
2009-05-13 04:27:19 +08:00
|
|
|
if (const CXXMethodDecl *MD = dyn_cast<CXXMethodDecl>(FD))
|
2009-04-04 06:48:58 +08:00
|
|
|
if (MD->isInstance())
|
2012-02-17 11:33:10 +08:00
|
|
|
return arrangeCXXMethodDeclaration(MD);
|
2009-09-09 23:08:12 +08:00
|
|
|
|
2010-02-26 08:48:12 +08:00
|
|
|
CanQualType FTy = FD->getType()->getCanonicalTypeUnqualified();
|
2012-02-17 11:33:10 +08:00
|
|
|
|
2010-02-26 08:48:12 +08:00
|
|
|
assert(isa<FunctionType>(FTy));
|
2012-02-17 11:33:10 +08:00
|
|
|
|
|
|
|
// When declaring a function without a prototype, always use a
|
|
|
|
// non-variadic type.
|
|
|
|
if (isa<FunctionNoProtoType>(FTy)) {
|
|
|
|
CanQual<FunctionNoProtoType> noProto = FTy.getAs<FunctionNoProtoType>();
|
2014-12-13 07:41:25 +08:00
|
|
|
return arrangeLLVMFunctionInfo(
|
|
|
|
noProto->getReturnType(), /*instanceMethod=*/false,
|
|
|
|
/*chainCall=*/false, None, noProto->getExtInfo(), RequiredArgs::All);
|
2012-02-17 11:33:10 +08:00
|
|
|
}
|
|
|
|
|
2010-02-26 08:48:12 +08:00
|
|
|
assert(isa<FunctionProtoType>(FTy));
|
2012-07-07 14:41:13 +08:00
|
|
|
return arrangeFreeFunctionType(FTy.getAs<FunctionProtoType>());
|
2008-09-09 05:33:45 +08:00
|
|
|
}
|
|
|
|
|
2012-02-17 11:33:10 +08:00
|
|
|
/// Arrange the argument and result information for the declaration or
|
|
|
|
/// definition of an Objective-C method.
|
|
|
|
const CGFunctionInfo &
|
|
|
|
CodeGenTypes::arrangeObjCMethodDeclaration(const ObjCMethodDecl *MD) {
|
|
|
|
// It happens that this is the same as a call with no optional
|
|
|
|
// arguments, except also using the formal 'self' type.
|
|
|
|
return arrangeObjCMessageSendSignature(MD, MD->getSelfDecl()->getType());
|
|
|
|
}
|
|
|
|
|
|
|
|
/// Arrange the argument and result information for the function type
|
|
|
|
/// through which to perform a send to the given Objective-C method,
|
|
|
|
/// using the given receiver type. The receiver type is not always
|
|
|
|
/// the 'self' type of the method or even an Objective-C pointer type.
|
|
|
|
/// This is *not* the right method for actually performing such a
|
|
|
|
/// message send, due to the possibility of optional arguments.
|
|
|
|
const CGFunctionInfo &
|
|
|
|
CodeGenTypes::arrangeObjCMessageSendSignature(const ObjCMethodDecl *MD,
|
|
|
|
QualType receiverType) {
|
|
|
|
SmallVector<CanQualType, 16> argTys;
|
|
|
|
argTys.push_back(Context.getCanonicalParamType(receiverType));
|
|
|
|
argTys.push_back(Context.getCanonicalParamType(Context.getObjCSelType()));
|
2009-02-03 07:23:47 +08:00
|
|
|
// FIXME: Kill copy?
|
2014-03-08 01:50:17 +08:00
|
|
|
for (const auto *I : MD->params()) {
|
|
|
|
argTys.push_back(Context.getCanonicalParamType(I->getType()));
|
2010-02-24 15:14:12 +08:00
|
|
|
}
|
2011-06-16 07:02:42 +08:00
|
|
|
|
|
|
|
FunctionType::ExtInfo einfo;
|
2013-12-19 00:23:37 +08:00
|
|
|
bool IsWindows = getContext().getTargetInfo().getTriple().isOSWindows();
|
|
|
|
einfo = einfo.withCallingConv(getCallingConventionForDecl(MD, IsWindows));
|
2011-06-16 07:02:42 +08:00
|
|
|
|
2012-03-11 15:00:24 +08:00
|
|
|
if (getContext().getLangOpts().ObjCAutoRefCount &&
|
2011-06-16 07:02:42 +08:00
|
|
|
MD->hasAttr<NSReturnsRetainedAttr>())
|
|
|
|
einfo = einfo.withProducesResult(true);
|
|
|
|
|
2012-02-17 11:33:10 +08:00
|
|
|
RequiredArgs required =
|
|
|
|
(MD->isVariadic() ? RequiredArgs(argTys.size()) : RequiredArgs::All);
|
|
|
|
|
2014-12-13 07:41:25 +08:00
|
|
|
return arrangeLLVMFunctionInfo(
|
|
|
|
GetReturnType(MD->getReturnType()), /*instanceMethod=*/false,
|
|
|
|
/*chainCall=*/false, argTys, einfo, required);
|
2008-09-09 05:33:45 +08:00
|
|
|
}
|
|
|
|
|
2012-02-17 11:33:10 +08:00
|
|
|
const CGFunctionInfo &
|
|
|
|
CodeGenTypes::arrangeGlobalDeclaration(GlobalDecl GD) {
|
2010-02-06 10:44:09 +08:00
|
|
|
// FIXME: Do we need to handle ObjCMethodDecl?
|
|
|
|
const FunctionDecl *FD = cast<FunctionDecl>(GD.getDecl());
|
2010-10-19 14:39:39 +08:00
|
|
|
|
2010-02-06 10:44:09 +08:00
|
|
|
if (const CXXConstructorDecl *CD = dyn_cast<CXXConstructorDecl>(FD))
|
2014-09-09 00:01:27 +08:00
|
|
|
return arrangeCXXStructorDeclaration(CD, getFromCtorType(GD.getCtorType()));
|
2010-02-06 10:44:09 +08:00
|
|
|
|
|
|
|
if (const CXXDestructorDecl *DD = dyn_cast<CXXDestructorDecl>(FD))
|
2014-09-09 00:01:27 +08:00
|
|
|
return arrangeCXXStructorDeclaration(DD, getFromDtorType(GD.getDtorType()));
|
2012-02-17 11:33:10 +08:00
|
|
|
|
|
|
|
return arrangeFunctionDeclaration(FD);
|
|
|
|
}
|
2010-10-19 14:39:39 +08:00
|
|
|
|
2014-08-30 05:43:29 +08:00
|
|
|
/// Arrange a thunk that takes 'this' as the first parameter followed by
|
|
|
|
/// varargs. Return a void pointer, regardless of the actual return type.
|
|
|
|
/// The body of the thunk will end in a musttail call to a function of the
|
|
|
|
/// correct type, and the caller will bitcast the function to the correct
|
|
|
|
/// prototype.
|
|
|
|
const CGFunctionInfo &
|
|
|
|
CodeGenTypes::arrangeMSMemberPointerThunk(const CXXMethodDecl *MD) {
|
|
|
|
assert(MD->isVirtual() && "only virtual memptrs have thunks");
|
|
|
|
CanQual<FunctionProtoType> FTP = GetFormalType(MD);
|
|
|
|
CanQualType ArgTys[] = { GetThisType(Context, MD->getParent()) };
|
2014-12-13 07:41:25 +08:00
|
|
|
return arrangeLLVMFunctionInfo(Context.VoidTy, /*instanceMethod=*/false,
|
|
|
|
/*chainCall=*/false, ArgTys,
|
2014-08-30 05:43:29 +08:00
|
|
|
FTP->getExtInfo(), RequiredArgs(1));
|
|
|
|
}
|
|
|
|
|
Fix the required args count for variadic blocks.
We were emitting calls to blocks as if all arguments were
required --- i.e. with signature (A,B,C,D,...) rather than
(A,B,...). This patch fixes that and accounts for the
implicit block-context argument as a required argument.
In addition, this patch changes the function type under which
we call unprototyped functions on platforms like x86-64 that
guarantee compatibility of variadic functions with unprototyped
function types; previously we would always call such functions
under the LLVM type T (...)*, but now we will call them under
the type T (A,B,C,D,...)*. This last change should have no
material effect except for making the type conventions more
explicit; it was a side-effect of the most convenient implementation.
llvm-svn: 169588
2012-12-07 15:03:17 +08:00
|
|
|
/// Arrange a call as unto a free function, except possibly with an
|
|
|
|
/// additional number of formal parameters considered required.
|
|
|
|
static const CGFunctionInfo &
|
|
|
|
arrangeFreeFunctionLikeCall(CodeGenTypes &CGT,
|
2013-10-11 04:57:00 +08:00
|
|
|
CodeGenModule &CGM,
|
Fix the required args count for variadic blocks.
We were emitting calls to blocks as if all arguments were
required --- i.e. with signature (A,B,C,D,...) rather than
(A,B,...). This patch fixes that and accounts for the
implicit block-context argument as a required argument.
In addition, this patch changes the function type under which
we call unprototyped functions on platforms like x86-64 that
guarantee compatibility of variadic functions with unprototyped
function types; previously we would always call such functions
under the LLVM type T (...)*, but now we will call them under
the type T (A,B,C,D,...)*. This last change should have no
material effect except for making the type conventions more
explicit; it was a side-effect of the most convenient implementation.
llvm-svn: 169588
2012-12-07 15:03:17 +08:00
|
|
|
const CallArgList &args,
|
|
|
|
const FunctionType *fnType,
|
2014-12-13 07:41:25 +08:00
|
|
|
unsigned numExtraRequiredArgs,
|
|
|
|
bool chainCall) {
|
Fix the required args count for variadic blocks.
We were emitting calls to blocks as if all arguments were
required --- i.e. with signature (A,B,C,D,...) rather than
(A,B,...). This patch fixes that and accounts for the
implicit block-context argument as a required argument.
In addition, this patch changes the function type under which
we call unprototyped functions on platforms like x86-64 that
guarantee compatibility of variadic functions with unprototyped
function types; previously we would always call such functions
under the LLVM type T (...)*, but now we will call them under
the type T (A,B,C,D,...)*. This last change should have no
material effect except for making the type conventions more
explicit; it was a side-effect of the most convenient implementation.
llvm-svn: 169588
2012-12-07 15:03:17 +08:00
|
|
|
assert(args.size() >= numExtraRequiredArgs);
|
|
|
|
|
|
|
|
// In most cases, there are no optional arguments.
|
|
|
|
RequiredArgs required = RequiredArgs::All;
|
|
|
|
|
|
|
|
// If we have a variadic prototype, the required arguments are the
|
|
|
|
// extra prefix plus the arguments in the prototype.
|
|
|
|
if (const FunctionProtoType *proto = dyn_cast<FunctionProtoType>(fnType)) {
|
|
|
|
if (proto->isVariadic())
|
2014-01-21 04:26:09 +08:00
|
|
|
required = RequiredArgs(proto->getNumParams() + numExtraRequiredArgs);
|
Fix the required args count for variadic blocks.
We were emitting calls to blocks as if all arguments were
required --- i.e. with signature (A,B,C,D,...) rather than
(A,B,...). This patch fixes that and accounts for the
implicit block-context argument as a required argument.
In addition, this patch changes the function type under which
we call unprototyped functions on platforms like x86-64 that
guarantee compatibility of variadic functions with unprototyped
function types; previously we would always call such functions
under the LLVM type T (...)*, but now we will call them under
the type T (A,B,C,D,...)*. This last change should have no
material effect except for making the type conventions more
explicit; it was a side-effect of the most convenient implementation.
llvm-svn: 169588
2012-12-07 15:03:17 +08:00
|
|
|
|
|
|
|
// If we don't have a prototype at all, but we're supposed to
|
|
|
|
// explicitly use the variadic convention for unprototyped calls,
|
|
|
|
// treat all of the arguments as required but preserve the nominal
|
|
|
|
// possibility of variadics.
|
2013-10-11 04:57:00 +08:00
|
|
|
} else if (CGM.getTargetCodeGenInfo()
|
|
|
|
.isNoProtoCallVariadic(args,
|
|
|
|
cast<FunctionNoProtoType>(fnType))) {
|
Fix the required args count for variadic blocks.
We were emitting calls to blocks as if all arguments were
required --- i.e. with signature (A,B,C,D,...) rather than
(A,B,...). This patch fixes that and accounts for the
implicit block-context argument as a required argument.
In addition, this patch changes the function type under which
we call unprototyped functions on platforms like x86-64 that
guarantee compatibility of variadic functions with unprototyped
function types; previously we would always call such functions
under the LLVM type T (...)*, but now we will call them under
the type T (A,B,C,D,...)*. This last change should have no
material effect except for making the type conventions more
explicit; it was a side-effect of the most convenient implementation.
llvm-svn: 169588
2012-12-07 15:03:17 +08:00
|
|
|
required = RequiredArgs(args.size());
|
|
|
|
}
|
|
|
|
|
2014-12-13 07:41:25 +08:00
|
|
|
// FIXME: Kill copy.
|
|
|
|
SmallVector<CanQualType, 16> argTypes;
|
|
|
|
for (const auto &arg : args)
|
|
|
|
argTypes.push_back(CGT.getContext().getCanonicalParamType(arg.Ty));
|
|
|
|
return CGT.arrangeLLVMFunctionInfo(GetReturnType(fnType->getReturnType()),
|
|
|
|
/*instanceMethod=*/false, chainCall,
|
|
|
|
argTypes, fnType->getExtInfo(), required);
|
Fix the required args count for variadic blocks.
We were emitting calls to blocks as if all arguments were
required --- i.e. with signature (A,B,C,D,...) rather than
(A,B,...). This patch fixes that and accounts for the
implicit block-context argument as a required argument.
In addition, this patch changes the function type under which
we call unprototyped functions on platforms like x86-64 that
guarantee compatibility of variadic functions with unprototyped
function types; previously we would always call such functions
under the LLVM type T (...)*, but now we will call them under
the type T (A,B,C,D,...)*. This last change should have no
material effect except for making the type conventions more
explicit; it was a side-effect of the most convenient implementation.
llvm-svn: 169588
2012-12-07 15:03:17 +08:00
|
|
|
}
|
|
|
|
|
2012-02-17 11:33:10 +08:00
|
|
|
/// Figure out the rules for calling a function with the given formal
|
|
|
|
/// type using the given arguments. The arguments are necessary
|
|
|
|
/// because the function might be unprototyped, in which case it's
|
|
|
|
/// target-dependent in crazy ways.
|
|
|
|
const CGFunctionInfo &
|
2012-07-07 14:41:13 +08:00
|
|
|
CodeGenTypes::arrangeFreeFunctionCall(const CallArgList &args,
|
2014-12-13 07:41:25 +08:00
|
|
|
const FunctionType *fnType,
|
|
|
|
bool chainCall) {
|
|
|
|
return arrangeFreeFunctionLikeCall(*this, CGM, args, fnType,
|
|
|
|
chainCall ? 1 : 0, chainCall);
|
Fix the required args count for variadic blocks.
We were emitting calls to blocks as if all arguments were
required --- i.e. with signature (A,B,C,D,...) rather than
(A,B,...). This patch fixes that and accounts for the
implicit block-context argument as a required argument.
In addition, this patch changes the function type under which
we call unprototyped functions on platforms like x86-64 that
guarantee compatibility of variadic functions with unprototyped
function types; previously we would always call such functions
under the LLVM type T (...)*, but now we will call them under
the type T (A,B,C,D,...)*. This last change should have no
material effect except for making the type conventions more
explicit; it was a side-effect of the most convenient implementation.
llvm-svn: 169588
2012-12-07 15:03:17 +08:00
|
|
|
}
|
2012-02-17 11:33:10 +08:00
|
|
|
|
Fix the required args count for variadic blocks.
We were emitting calls to blocks as if all arguments were
required --- i.e. with signature (A,B,C,D,...) rather than
(A,B,...). This patch fixes that and accounts for the
implicit block-context argument as a required argument.
In addition, this patch changes the function type under which
we call unprototyped functions on platforms like x86-64 that
guarantee compatibility of variadic functions with unprototyped
function types; previously we would always call such functions
under the LLVM type T (...)*, but now we will call them under
the type T (A,B,C,D,...)*. This last change should have no
material effect except for making the type conventions more
explicit; it was a side-effect of the most convenient implementation.
llvm-svn: 169588
2012-12-07 15:03:17 +08:00
|
|
|
/// A block function call is essentially a free-function call with an
|
|
|
|
/// extra implicit argument.
|
|
|
|
const CGFunctionInfo &
|
|
|
|
CodeGenTypes::arrangeBlockFunctionCall(const CallArgList &args,
|
|
|
|
const FunctionType *fnType) {
|
2014-12-13 07:41:25 +08:00
|
|
|
return arrangeFreeFunctionLikeCall(*this, CGM, args, fnType, 1,
|
|
|
|
/*chainCall=*/false);
|
2010-02-06 10:44:09 +08:00
|
|
|
}
|
|
|
|
|
2012-02-17 11:33:10 +08:00
|
|
|
const CGFunctionInfo &
|
2012-07-07 14:41:13 +08:00
|
|
|
CodeGenTypes::arrangeFreeFunctionCall(QualType resultType,
|
|
|
|
const CallArgList &args,
|
|
|
|
FunctionType::ExtInfo info,
|
|
|
|
RequiredArgs required) {
|
2009-02-03 07:23:47 +08:00
|
|
|
// FIXME: Kill copy.
|
2012-02-17 11:33:10 +08:00
|
|
|
SmallVector<CanQualType, 16> argTypes;
|
2014-08-14 04:06:24 +08:00
|
|
|
for (const auto &Arg : args)
|
|
|
|
argTypes.push_back(Context.getCanonicalParamType(Arg.Ty));
|
2014-12-13 07:41:25 +08:00
|
|
|
return arrangeLLVMFunctionInfo(
|
|
|
|
GetReturnType(resultType), /*instanceMethod=*/false,
|
|
|
|
/*chainCall=*/false, argTypes, info, required);
|
2012-07-07 14:41:13 +08:00
|
|
|
}
|
|
|
|
|
|
|
|
/// Arrange a call to a C++ method, passing the given arguments.
|
|
|
|
const CGFunctionInfo &
|
|
|
|
CodeGenTypes::arrangeCXXMethodCall(const CallArgList &args,
|
|
|
|
const FunctionProtoType *FPT,
|
|
|
|
RequiredArgs required) {
|
|
|
|
// FIXME: Kill copy.
|
|
|
|
SmallVector<CanQualType, 16> argTypes;
|
2014-08-14 04:06:24 +08:00
|
|
|
for (const auto &Arg : args)
|
|
|
|
argTypes.push_back(Context.getCanonicalParamType(Arg.Ty));
|
2012-07-07 14:41:13 +08:00
|
|
|
|
|
|
|
FunctionType::ExtInfo info = FPT->getExtInfo();
|
2014-12-13 07:41:25 +08:00
|
|
|
return arrangeLLVMFunctionInfo(
|
|
|
|
GetReturnType(FPT->getReturnType()), /*instanceMethod=*/true,
|
|
|
|
/*chainCall=*/false, argTypes, info, required);
|
2008-09-09 05:33:45 +08:00
|
|
|
}
|
|
|
|
|
2014-02-01 06:54:50 +08:00
|
|
|
const CGFunctionInfo &CodeGenTypes::arrangeFreeFunctionDeclaration(
|
|
|
|
QualType resultType, const FunctionArgList &args,
|
|
|
|
const FunctionType::ExtInfo &info, bool isVariadic) {
|
2009-02-03 07:23:47 +08:00
|
|
|
// FIXME: Kill copy.
|
2012-02-17 11:33:10 +08:00
|
|
|
SmallVector<CanQualType, 16> argTypes;
|
2014-08-14 04:06:24 +08:00
|
|
|
for (auto Arg : args)
|
|
|
|
argTypes.push_back(Context.getCanonicalParamType(Arg->getType()));
|
2012-02-17 11:33:10 +08:00
|
|
|
|
|
|
|
RequiredArgs required =
|
|
|
|
(isVariadic ? RequiredArgs(args.size()) : RequiredArgs::All);
|
2014-12-13 07:41:25 +08:00
|
|
|
return arrangeLLVMFunctionInfo(
|
|
|
|
GetReturnType(resultType), /*instanceMethod=*/false,
|
|
|
|
/*chainCall=*/false, argTypes, info, required);
|
2009-02-03 07:23:47 +08:00
|
|
|
}
|
|
|
|
|
2012-02-17 11:33:10 +08:00
|
|
|
const CGFunctionInfo &CodeGenTypes::arrangeNullaryFunction() {
|
2014-12-13 07:41:25 +08:00
|
|
|
return arrangeLLVMFunctionInfo(
|
|
|
|
getContext().VoidTy, /*instanceMethod=*/false, /*chainCall=*/false,
|
|
|
|
None, FunctionType::ExtInfo(), RequiredArgs::All);
|
2011-03-09 12:27:21 +08:00
|
|
|
}
|
|
|
|
|
2012-02-17 11:33:10 +08:00
|
|
|
/// Arrange the argument and result information for an abstract value
|
|
|
|
/// of a given function type. This is the method which all of the
|
|
|
|
/// above functions ultimately defer to.
|
|
|
|
const CGFunctionInfo &
|
2012-07-07 14:41:13 +08:00
|
|
|
CodeGenTypes::arrangeLLVMFunctionInfo(CanQualType resultType,
|
2014-12-13 07:41:25 +08:00
|
|
|
bool instanceMethod,
|
|
|
|
bool chainCall,
|
2012-07-07 14:41:13 +08:00
|
|
|
ArrayRef<CanQualType> argTypes,
|
|
|
|
FunctionType::ExtInfo info,
|
|
|
|
RequiredArgs required) {
|
2014-11-25 11:49:50 +08:00
|
|
|
assert(std::all_of(argTypes.begin(), argTypes.end(),
|
|
|
|
std::mem_fun_ref(&CanQualType::isCanonicalAsParam)));
|
2010-02-26 08:48:12 +08:00
|
|
|
|
2012-02-17 11:33:10 +08:00
|
|
|
unsigned CC = ClangCallConvToLLVMCallConv(info.getCC());
|
2010-02-06 05:31:56 +08:00
|
|
|
|
2009-02-03 08:07:12 +08:00
|
|
|
// Lookup or create unique function info.
|
|
|
|
llvm::FoldingSetNodeID ID;
|
2014-12-13 07:41:25 +08:00
|
|
|
CGFunctionInfo::Profile(ID, instanceMethod, chainCall, info, required,
|
|
|
|
resultType, argTypes);
|
2009-02-03 08:07:12 +08:00
|
|
|
|
2014-05-21 13:09:00 +08:00
|
|
|
void *insertPos = nullptr;
|
2012-02-17 11:33:10 +08:00
|
|
|
CGFunctionInfo *FI = FunctionInfos.FindNodeOrInsertPos(ID, insertPos);
|
2009-02-03 08:07:12 +08:00
|
|
|
if (FI)
|
|
|
|
return *FI;
|
|
|
|
|
2012-02-17 11:33:10 +08:00
|
|
|
// Construct the function info. We co-allocate the ArgInfos.
|
2014-12-13 07:41:25 +08:00
|
|
|
FI = CGFunctionInfo::create(CC, instanceMethod, chainCall, info,
|
|
|
|
resultType, argTypes, required);
|
2012-02-17 11:33:10 +08:00
|
|
|
FunctionInfos.InsertNode(FI, insertPos);
|
2009-02-03 07:23:47 +08:00
|
|
|
|
2014-11-19 15:49:47 +08:00
|
|
|
bool inserted = FunctionsBeingProcessed.insert(FI).second;
|
|
|
|
(void)inserted;
|
2012-02-17 11:33:10 +08:00
|
|
|
assert(inserted && "Recursively being processed?");
|
2011-07-15 13:16:14 +08:00
|
|
|
|
2009-02-03 13:31:23 +08:00
|
|
|
// Compute ABI information.
|
2010-07-29 10:31:05 +08:00
|
|
|
getABIInfo().computeInfo(*FI);
|
2010-10-19 14:39:39 +08:00
|
|
|
|
Kill off the 'coerce' ABI passing form. Now 'direct' and 'extend' always
have a "coerce to" type which often matches the default lowering of Clang
type to LLVM IR type, but the coerce case can be handled by making them
not be the same.
This simplifies things and fixes issues where X86-64 abi lowering would
return coerce after making preferred types exactly match up. This caused
us to compile:
typedef float v4f32 __attribute__((__vector_size__(16)));
v4f32 foo(v4f32 X) {
return X+X;
}
into this code at -O0:
define <4 x float> @foo(<4 x float> %X.coerce) nounwind {
entry:
%retval = alloca <4 x float>, align 16 ; <<4 x float>*> [#uses=2]
%coerce = alloca <4 x float>, align 16 ; <<4 x float>*> [#uses=2]
%X.addr = alloca <4 x float>, align 16 ; <<4 x float>*> [#uses=3]
store <4 x float> %X.coerce, <4 x float>* %coerce
%X = load <4 x float>* %coerce ; <<4 x float>> [#uses=1]
store <4 x float> %X, <4 x float>* %X.addr
%tmp = load <4 x float>* %X.addr ; <<4 x float>> [#uses=1]
%tmp1 = load <4 x float>* %X.addr ; <<4 x float>> [#uses=1]
%add = fadd <4 x float> %tmp, %tmp1 ; <<4 x float>> [#uses=1]
store <4 x float> %add, <4 x float>* %retval
%0 = load <4 x float>* %retval ; <<4 x float>> [#uses=1]
ret <4 x float> %0
}
Now we get:
define <4 x float> @foo(<4 x float> %X) nounwind {
entry:
%X.addr = alloca <4 x float>, align 16 ; <<4 x float>*> [#uses=3]
store <4 x float> %X, <4 x float>* %X.addr
%tmp = load <4 x float>* %X.addr ; <<4 x float>> [#uses=1]
%tmp1 = load <4 x float>* %X.addr ; <<4 x float>> [#uses=1]
%add = fadd <4 x float> %tmp, %tmp1 ; <<4 x float>> [#uses=1]
ret <4 x float> %add
}
This implements rdar://8248065
llvm-svn: 109733
2010-07-29 14:26:06 +08:00
|
|
|
// Loop over all of the computed argument and return value info. If any of
|
|
|
|
// them are direct or extend without a specified coerce type, specify the
|
|
|
|
// default now.
|
2012-02-17 11:33:10 +08:00
|
|
|
ABIArgInfo &retInfo = FI->getReturnInfo();
|
2014-05-21 13:09:00 +08:00
|
|
|
if (retInfo.canHaveCoerceToType() && retInfo.getCoerceToType() == nullptr)
|
2012-02-17 11:33:10 +08:00
|
|
|
retInfo.setCoerceToType(ConvertType(FI->getReturnType()));
|
2010-10-19 14:39:39 +08:00
|
|
|
|
2014-03-18 02:10:01 +08:00
|
|
|
for (auto &I : FI->arguments())
|
2014-05-21 13:09:00 +08:00
|
|
|
if (I.info.canHaveCoerceToType() && I.info.getCoerceToType() == nullptr)
|
2014-03-18 02:10:01 +08:00
|
|
|
I.info.setCoerceToType(ConvertType(I.type));
|
2010-10-19 14:39:39 +08:00
|
|
|
|
2012-02-17 11:33:10 +08:00
|
|
|
bool erased = FunctionsBeingProcessed.erase(FI); (void)erased;
|
|
|
|
assert(erased && "Not in set?");
|
2011-07-15 14:41:05 +08:00
|
|
|
|
2009-02-03 13:31:23 +08:00
|
|
|
return *FI;
|
2008-09-09 05:33:45 +08:00
|
|
|
}
|
2008-09-10 07:27:19 +08:00
|
|
|
|
2012-02-17 11:33:10 +08:00
|
|
|
CGFunctionInfo *CGFunctionInfo::create(unsigned llvmCC,
|
2014-12-13 07:41:25 +08:00
|
|
|
bool instanceMethod,
|
|
|
|
bool chainCall,
|
2012-02-17 11:33:10 +08:00
|
|
|
const FunctionType::ExtInfo &info,
|
|
|
|
CanQualType resultType,
|
|
|
|
ArrayRef<CanQualType> argTypes,
|
|
|
|
RequiredArgs required) {
|
|
|
|
void *buffer = operator new(sizeof(CGFunctionInfo) +
|
|
|
|
sizeof(ArgInfo) * (argTypes.size() + 1));
|
|
|
|
CGFunctionInfo *FI = new(buffer) CGFunctionInfo();
|
|
|
|
FI->CallingConvention = llvmCC;
|
|
|
|
FI->EffectiveCallingConvention = llvmCC;
|
|
|
|
FI->ASTCallingConvention = info.getCC();
|
2014-12-13 07:41:25 +08:00
|
|
|
FI->InstanceMethod = instanceMethod;
|
|
|
|
FI->ChainCall = chainCall;
|
2012-02-17 11:33:10 +08:00
|
|
|
FI->NoReturn = info.getNoReturn();
|
|
|
|
FI->ReturnsRetained = info.getProducesResult();
|
|
|
|
FI->Required = required;
|
|
|
|
FI->HasRegParm = info.getHasRegParm();
|
|
|
|
FI->RegParm = info.getRegParm();
|
2014-05-21 13:09:00 +08:00
|
|
|
FI->ArgStruct = nullptr;
|
2012-02-17 11:33:10 +08:00
|
|
|
FI->NumArgs = argTypes.size();
|
|
|
|
FI->getArgsBuffer()[0].type = resultType;
|
|
|
|
for (unsigned i = 0, e = argTypes.size(); i != e; ++i)
|
|
|
|
FI->getArgsBuffer()[i + 1].type = argTypes[i];
|
|
|
|
return FI;
|
2009-02-03 13:31:23 +08:00
|
|
|
}
|
|
|
|
|
|
|
|
/***/
|
|
|
|
|
2014-09-30 02:41:28 +08:00
|
|
|
namespace {
|
|
|
|
// ABIArgInfo::Expand implementation.
|
|
|
|
|
|
|
|
// Specifies the way QualType passed as ABIArgInfo::Expand is expanded.
|
|
|
|
struct TypeExpansion {
|
|
|
|
enum TypeExpansionKind {
|
|
|
|
// Elements of constant arrays are expanded recursively.
|
|
|
|
TEK_ConstantArray,
|
|
|
|
// Record fields are expanded recursively (but if record is a union, only
|
|
|
|
// the field with the largest size is expanded).
|
|
|
|
TEK_Record,
|
|
|
|
// For complex types, real and imaginary parts are expanded recursively.
|
|
|
|
TEK_Complex,
|
|
|
|
// All other types are not expandable.
|
|
|
|
TEK_None
|
|
|
|
};
|
|
|
|
|
|
|
|
const TypeExpansionKind Kind;
|
|
|
|
|
|
|
|
TypeExpansion(TypeExpansionKind K) : Kind(K) {}
|
|
|
|
virtual ~TypeExpansion() {}
|
|
|
|
};
|
|
|
|
|
|
|
|
struct ConstantArrayExpansion : TypeExpansion {
|
|
|
|
QualType EltTy;
|
|
|
|
uint64_t NumElts;
|
|
|
|
|
|
|
|
ConstantArrayExpansion(QualType EltTy, uint64_t NumElts)
|
|
|
|
: TypeExpansion(TEK_ConstantArray), EltTy(EltTy), NumElts(NumElts) {}
|
|
|
|
static bool classof(const TypeExpansion *TE) {
|
|
|
|
return TE->Kind == TEK_ConstantArray;
|
|
|
|
}
|
|
|
|
};
|
|
|
|
|
|
|
|
struct RecordExpansion : TypeExpansion {
|
2014-11-01 01:10:41 +08:00
|
|
|
SmallVector<const CXXBaseSpecifier *, 1> Bases;
|
|
|
|
|
2014-09-30 02:41:28 +08:00
|
|
|
SmallVector<const FieldDecl *, 1> Fields;
|
|
|
|
|
2014-11-01 01:10:41 +08:00
|
|
|
RecordExpansion(SmallVector<const CXXBaseSpecifier *, 1> &&Bases,
|
|
|
|
SmallVector<const FieldDecl *, 1> &&Fields)
|
|
|
|
: TypeExpansion(TEK_Record), Bases(Bases), Fields(Fields) {}
|
2014-09-30 02:41:28 +08:00
|
|
|
static bool classof(const TypeExpansion *TE) {
|
|
|
|
return TE->Kind == TEK_Record;
|
|
|
|
}
|
|
|
|
};
|
|
|
|
|
|
|
|
struct ComplexExpansion : TypeExpansion {
|
|
|
|
QualType EltTy;
|
|
|
|
|
|
|
|
ComplexExpansion(QualType EltTy) : TypeExpansion(TEK_Complex), EltTy(EltTy) {}
|
|
|
|
static bool classof(const TypeExpansion *TE) {
|
|
|
|
return TE->Kind == TEK_Complex;
|
|
|
|
}
|
|
|
|
};
|
|
|
|
|
|
|
|
struct NoExpansion : TypeExpansion {
|
|
|
|
NoExpansion() : TypeExpansion(TEK_None) {}
|
|
|
|
static bool classof(const TypeExpansion *TE) {
|
|
|
|
return TE->Kind == TEK_None;
|
|
|
|
}
|
|
|
|
};
|
|
|
|
} // namespace
|
|
|
|
|
|
|
|
static std::unique_ptr<TypeExpansion>
|
|
|
|
getTypeExpansion(QualType Ty, const ASTContext &Context) {
|
|
|
|
if (const ConstantArrayType *AT = Context.getAsConstantArrayType(Ty)) {
|
|
|
|
return llvm::make_unique<ConstantArrayExpansion>(
|
|
|
|
AT->getElementType(), AT->getSize().getZExtValue());
|
|
|
|
}
|
|
|
|
if (const RecordType *RT = Ty->getAs<RecordType>()) {
|
2014-11-01 01:10:41 +08:00
|
|
|
SmallVector<const CXXBaseSpecifier *, 1> Bases;
|
2014-09-30 02:41:28 +08:00
|
|
|
SmallVector<const FieldDecl *, 1> Fields;
|
2011-08-03 13:58:22 +08:00
|
|
|
const RecordDecl *RD = RT->getDecl();
|
|
|
|
assert(!RD->hasFlexibleArrayMember() &&
|
|
|
|
"Cannot expand structure with flexible array.");
|
2012-04-13 19:22:00 +08:00
|
|
|
if (RD->isUnion()) {
|
|
|
|
// Unions can be here only in degenerative cases - all the fields are same
|
|
|
|
// after flattening. Thus we have to use the "largest" field.
|
2014-05-21 13:09:00 +08:00
|
|
|
const FieldDecl *LargestFD = nullptr;
|
2012-04-13 19:22:00 +08:00
|
|
|
CharUnits UnionSize = CharUnits::Zero();
|
|
|
|
|
2014-03-09 04:12:42 +08:00
|
|
|
for (const auto *FD : RD->fields()) {
|
2014-11-01 06:00:51 +08:00
|
|
|
// Skip zero length bitfields.
|
|
|
|
if (FD->isBitField() && FD->getBitWidthValue(Context) == 0)
|
|
|
|
continue;
|
2012-04-13 19:22:00 +08:00
|
|
|
assert(!FD->isBitField() &&
|
|
|
|
"Cannot expand structure with bit-field members.");
|
2014-09-30 02:41:28 +08:00
|
|
|
CharUnits FieldSize = Context.getTypeSizeInChars(FD->getType());
|
2012-04-13 19:22:00 +08:00
|
|
|
if (UnionSize < FieldSize) {
|
|
|
|
UnionSize = FieldSize;
|
|
|
|
LargestFD = FD;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
if (LargestFD)
|
2014-09-30 02:41:28 +08:00
|
|
|
Fields.push_back(LargestFD);
|
2012-04-13 19:22:00 +08:00
|
|
|
} else {
|
2014-11-01 01:10:41 +08:00
|
|
|
if (const auto *CXXRD = dyn_cast<CXXRecordDecl>(RD)) {
|
|
|
|
assert(!CXXRD->isDynamicClass() &&
|
|
|
|
"cannot expand vtable pointers in dynamic classes");
|
|
|
|
for (const CXXBaseSpecifier &BS : CXXRD->bases())
|
|
|
|
Bases.push_back(&BS);
|
|
|
|
}
|
|
|
|
|
2014-09-30 02:41:28 +08:00
|
|
|
for (const auto *FD : RD->fields()) {
|
2014-11-01 06:00:51 +08:00
|
|
|
// Skip zero length bitfields.
|
|
|
|
if (FD->isBitField() && FD->getBitWidthValue(Context) == 0)
|
|
|
|
continue;
|
2014-09-30 02:41:28 +08:00
|
|
|
assert(!FD->isBitField() &&
|
2012-04-13 19:22:00 +08:00
|
|
|
"Cannot expand structure with bit-field members.");
|
2014-09-30 02:41:28 +08:00
|
|
|
Fields.push_back(FD);
|
2012-04-13 19:22:00 +08:00
|
|
|
}
|
2011-08-03 13:58:22 +08:00
|
|
|
}
|
2014-11-01 01:10:41 +08:00
|
|
|
return llvm::make_unique<RecordExpansion>(std::move(Bases),
|
|
|
|
std::move(Fields));
|
2014-09-30 02:41:28 +08:00
|
|
|
}
|
|
|
|
if (const ComplexType *CT = Ty->getAs<ComplexType>()) {
|
|
|
|
return llvm::make_unique<ComplexExpansion>(CT->getElementType());
|
|
|
|
}
|
|
|
|
return llvm::make_unique<NoExpansion>();
|
|
|
|
}
|
|
|
|
|
2014-09-30 04:30:22 +08:00
|
|
|
static int getExpansionSize(QualType Ty, const ASTContext &Context) {
|
|
|
|
auto Exp = getTypeExpansion(Ty, Context);
|
|
|
|
if (auto CAExp = dyn_cast<ConstantArrayExpansion>(Exp.get())) {
|
|
|
|
return CAExp->NumElts * getExpansionSize(CAExp->EltTy, Context);
|
|
|
|
}
|
|
|
|
if (auto RExp = dyn_cast<RecordExpansion>(Exp.get())) {
|
|
|
|
int Res = 0;
|
2014-11-01 01:10:41 +08:00
|
|
|
for (auto BS : RExp->Bases)
|
|
|
|
Res += getExpansionSize(BS->getType(), Context);
|
2014-09-30 04:30:22 +08:00
|
|
|
for (auto FD : RExp->Fields)
|
|
|
|
Res += getExpansionSize(FD->getType(), Context);
|
|
|
|
return Res;
|
|
|
|
}
|
|
|
|
if (isa<ComplexExpansion>(Exp.get()))
|
|
|
|
return 2;
|
|
|
|
assert(isa<NoExpansion>(Exp.get()));
|
|
|
|
return 1;
|
|
|
|
}
|
|
|
|
|
2014-09-30 06:08:00 +08:00
|
|
|
void
|
|
|
|
CodeGenTypes::getExpandedTypes(QualType Ty,
|
|
|
|
SmallVectorImpl<llvm::Type *>::iterator &TI) {
|
|
|
|
auto Exp = getTypeExpansion(Ty, Context);
|
2014-09-30 02:41:28 +08:00
|
|
|
if (auto CAExp = dyn_cast<ConstantArrayExpansion>(Exp.get())) {
|
|
|
|
for (int i = 0, n = CAExp->NumElts; i < n; i++) {
|
2014-09-30 06:08:00 +08:00
|
|
|
getExpandedTypes(CAExp->EltTy, TI);
|
2014-09-30 02:41:28 +08:00
|
|
|
}
|
|
|
|
} else if (auto RExp = dyn_cast<RecordExpansion>(Exp.get())) {
|
2014-11-01 01:10:41 +08:00
|
|
|
for (auto BS : RExp->Bases)
|
|
|
|
getExpandedTypes(BS->getType(), TI);
|
|
|
|
for (auto FD : RExp->Fields)
|
2014-09-30 06:08:00 +08:00
|
|
|
getExpandedTypes(FD->getType(), TI);
|
2014-09-30 02:41:28 +08:00
|
|
|
} else if (auto CExp = dyn_cast<ComplexExpansion>(Exp.get())) {
|
|
|
|
llvm::Type *EltTy = ConvertType(CExp->EltTy);
|
2014-09-30 06:08:00 +08:00
|
|
|
*TI++ = EltTy;
|
|
|
|
*TI++ = EltTy;
|
2014-09-30 02:41:28 +08:00
|
|
|
} else {
|
|
|
|
assert(isa<NoExpansion>(Exp.get()));
|
2014-09-30 06:08:00 +08:00
|
|
|
*TI++ = ConvertType(Ty);
|
2014-09-30 02:41:28 +08:00
|
|
|
}
|
2008-09-17 08:51:38 +08:00
|
|
|
}
|
|
|
|
|
CGCall: Factor out the logic mapping call arguments to LLVM IR arguments.
Summary:
This refactoring introduces ClangToLLVMArgMapping class, which
encapsulates the information about the order in which function arguments listed
in CGFunctionInfo should be passed to actual LLVM IR function, such as:
1) positions of sret, if there is any
2) position of inalloca argument, if there is any
3) position of helper padding argument for each call argument
4) positions of regular argument (there can be many if it's expanded).
Simplify several related methods (ConstructAttributeList, EmitFunctionProlog
and EmitCall): now they don't have to maintain iterators over the list
of LLVM IR function arguments, dealing with all the sret/inalloca/this complexities,
and just use expected positions of LLVM IR arguments stored in ClangToLLVMArgMapping.
This may increase the running time of EmitFunctionProlog, as we have to traverse
expandable arguments twice, but in further refactoring we will be able
to speed up EmitCall by passing already calculated CallArgsToIRArgsMapping to
ConstructAttributeList, thus avoiding traversing expandable argument there.
No functionality change.
Test Plan: regression test suite
Reviewers: majnemer, rnk
Reviewed By: rnk
Subscribers: cfe-commits, rjmccall, timurrrr
Differential Revision: http://reviews.llvm.org/D4938
llvm-svn: 216251
2014-08-22 09:06:06 +08:00
|
|
|
void CodeGenFunction::ExpandTypeFromArgs(
|
|
|
|
QualType Ty, LValue LV, SmallVectorImpl<llvm::Argument *>::iterator &AI) {
|
2009-09-09 23:08:12 +08:00
|
|
|
assert(LV.isSimple() &&
|
|
|
|
"Unexpected non-simple lvalue during struct expansion.");
|
2011-08-03 13:58:22 +08:00
|
|
|
|
2014-09-30 02:41:28 +08:00
|
|
|
auto Exp = getTypeExpansion(Ty, getContext());
|
|
|
|
if (auto CAExp = dyn_cast<ConstantArrayExpansion>(Exp.get())) {
|
|
|
|
for (int i = 0, n = CAExp->NumElts; i < n; i++) {
|
|
|
|
llvm::Value *EltAddr = Builder.CreateConstGEP2_32(LV.getAddress(), 0, i);
|
|
|
|
LValue LV = MakeAddrLValue(EltAddr, CAExp->EltTy);
|
|
|
|
ExpandTypeFromArgs(CAExp->EltTy, LV, AI);
|
2011-08-03 13:58:22 +08:00
|
|
|
}
|
2014-09-30 02:41:28 +08:00
|
|
|
} else if (auto RExp = dyn_cast<RecordExpansion>(Exp.get())) {
|
2014-11-01 01:10:41 +08:00
|
|
|
llvm::Value *This = LV.getAddress();
|
|
|
|
for (const CXXBaseSpecifier *BS : RExp->Bases) {
|
|
|
|
// Perform a single step derived-to-base conversion.
|
|
|
|
llvm::Value *Base =
|
|
|
|
GetAddressOfBaseClass(This, Ty->getAsCXXRecordDecl(), &BS, &BS + 1,
|
|
|
|
/*NullCheckValue=*/false, SourceLocation());
|
|
|
|
LValue SubLV = MakeAddrLValue(Base, BS->getType());
|
|
|
|
|
|
|
|
// Recurse onto bases.
|
|
|
|
ExpandTypeFromArgs(BS->getType(), SubLV, AI);
|
|
|
|
}
|
2014-09-30 02:41:28 +08:00
|
|
|
for (auto FD : RExp->Fields) {
|
|
|
|
// FIXME: What are the right qualifiers here?
|
|
|
|
LValue SubLV = EmitLValueForField(LV, FD);
|
|
|
|
ExpandTypeFromArgs(FD->getType(), SubLV, AI);
|
2008-09-17 08:51:38 +08:00
|
|
|
}
|
2014-09-30 02:41:28 +08:00
|
|
|
} else if (auto CExp = dyn_cast<ComplexExpansion>(Exp.get())) {
|
2012-04-16 11:54:45 +08:00
|
|
|
llvm::Value *RealAddr = Builder.CreateStructGEP(LV.getAddress(), 0, "real");
|
2014-09-30 02:41:28 +08:00
|
|
|
EmitStoreThroughLValue(RValue::get(*AI++),
|
|
|
|
MakeAddrLValue(RealAddr, CExp->EltTy));
|
2012-04-16 11:54:45 +08:00
|
|
|
llvm::Value *ImagAddr = Builder.CreateStructGEP(LV.getAddress(), 1, "imag");
|
2014-09-30 02:41:28 +08:00
|
|
|
EmitStoreThroughLValue(RValue::get(*AI++),
|
|
|
|
MakeAddrLValue(ImagAddr, CExp->EltTy));
|
|
|
|
} else {
|
|
|
|
assert(isa<NoExpansion>(Exp.get()));
|
|
|
|
EmitStoreThroughLValue(RValue::get(*AI++), LV);
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
void CodeGenFunction::ExpandTypeToArgs(
|
|
|
|
QualType Ty, RValue RV, llvm::FunctionType *IRFuncTy,
|
|
|
|
SmallVectorImpl<llvm::Value *> &IRCallArgs, unsigned &IRCallArgPos) {
|
|
|
|
auto Exp = getTypeExpansion(Ty, getContext());
|
|
|
|
if (auto CAExp = dyn_cast<ConstantArrayExpansion>(Exp.get())) {
|
|
|
|
llvm::Value *Addr = RV.getAggregateAddr();
|
|
|
|
for (int i = 0, n = CAExp->NumElts; i < n; i++) {
|
|
|
|
llvm::Value *EltAddr = Builder.CreateConstGEP2_32(Addr, 0, i);
|
|
|
|
RValue EltRV =
|
|
|
|
convertTempToRValue(EltAddr, CAExp->EltTy, SourceLocation());
|
|
|
|
ExpandTypeToArgs(CAExp->EltTy, EltRV, IRFuncTy, IRCallArgs, IRCallArgPos);
|
|
|
|
}
|
|
|
|
} else if (auto RExp = dyn_cast<RecordExpansion>(Exp.get())) {
|
2014-11-01 01:10:41 +08:00
|
|
|
llvm::Value *This = RV.getAggregateAddr();
|
|
|
|
for (const CXXBaseSpecifier *BS : RExp->Bases) {
|
|
|
|
// Perform a single step derived-to-base conversion.
|
|
|
|
llvm::Value *Base =
|
|
|
|
GetAddressOfBaseClass(This, Ty->getAsCXXRecordDecl(), &BS, &BS + 1,
|
|
|
|
/*NullCheckValue=*/false, SourceLocation());
|
|
|
|
RValue BaseRV = RValue::getAggregate(Base);
|
|
|
|
|
|
|
|
// Recurse onto bases.
|
|
|
|
ExpandTypeToArgs(BS->getType(), BaseRV, IRFuncTy, IRCallArgs,
|
|
|
|
IRCallArgPos);
|
|
|
|
}
|
|
|
|
|
|
|
|
LValue LV = MakeAddrLValue(This, Ty);
|
2014-09-30 02:41:28 +08:00
|
|
|
for (auto FD : RExp->Fields) {
|
|
|
|
RValue FldRV = EmitRValueForField(LV, FD, SourceLocation());
|
|
|
|
ExpandTypeToArgs(FD->getType(), FldRV, IRFuncTy, IRCallArgs,
|
|
|
|
IRCallArgPos);
|
|
|
|
}
|
|
|
|
} else if (isa<ComplexExpansion>(Exp.get())) {
|
|
|
|
ComplexPairTy CV = RV.getComplexVal();
|
|
|
|
IRCallArgs[IRCallArgPos++] = CV.first;
|
|
|
|
IRCallArgs[IRCallArgPos++] = CV.second;
|
|
|
|
} else {
|
|
|
|
assert(isa<NoExpansion>(Exp.get()));
|
|
|
|
assert(RV.isScalar() &&
|
|
|
|
"Unexpected non-scalar rvalue during struct expansion.");
|
|
|
|
|
|
|
|
// Insert a bitcast as needed.
|
|
|
|
llvm::Value *V = RV.getScalarVal();
|
|
|
|
if (IRCallArgPos < IRFuncTy->getNumParams() &&
|
|
|
|
V->getType() != IRFuncTy->getParamType(IRCallArgPos))
|
|
|
|
V = Builder.CreateBitCast(V, IRFuncTy->getParamType(IRCallArgPos));
|
|
|
|
|
|
|
|
IRCallArgs[IRCallArgPos++] = V;
|
2008-09-17 08:51:38 +08:00
|
|
|
}
|
|
|
|
}
|
|
|
|
|
Same patch as the previous on the store side. Before we compiled this:
struct DeclGroup {
unsigned NumDecls;
};
int foo(DeclGroup D) {
return D.NumDecls;
}
to:
%struct.DeclGroup = type { i32 }
define i32 @_Z3foo9DeclGroup(i64) nounwind ssp noredzone {
entry:
%D = alloca %struct.DeclGroup, align 4 ; <%struct.DeclGroup*> [#uses=2]
%tmp = alloca i64 ; <i64*> [#uses=2]
store i64 %0, i64* %tmp
%1 = bitcast i64* %tmp to %struct.DeclGroup* ; <%struct.DeclGroup*> [#uses=1]
%2 = load %struct.DeclGroup* %1, align 1 ; <%struct.DeclGroup> [#uses=1]
store %struct.DeclGroup %2, %struct.DeclGroup* %D
%tmp1 = getelementptr inbounds %struct.DeclGroup* %D, i32 0, i32 0 ; <i32*> [#uses=1]
%tmp2 = load i32* %tmp1 ; <i32> [#uses=1]
ret i32 %tmp2
}
which caused fast isel bailouts due to the FCA load/store of %2. Now
we generate this just blissful code:
%struct.DeclGroup = type { i32 }
define i32 @_Z3foo9DeclGroup(i64) nounwind ssp noredzone {
entry:
%D = alloca %struct.DeclGroup, align 4 ; <%struct.DeclGroup*> [#uses=2]
%tmp = alloca i64 ; <i64*> [#uses=2]
%coerce.dive = getelementptr %struct.DeclGroup* %D, i32 0, i32 0 ; <i32*> [#uses=1]
store i64 %0, i64* %tmp
%1 = bitcast i64* %tmp to i32* ; <i32*> [#uses=1]
%2 = load i32* %1, align 1 ; <i32> [#uses=1]
store i32 %2, i32* %coerce.dive
%tmp1 = getelementptr inbounds %struct.DeclGroup* %D, i32 0, i32 0 ; <i32*> [#uses=1]
%tmp2 = load i32* %tmp1 ; <i32> [#uses=1]
ret i32 %tmp2
}
This avoids fastisel bailing out and is groundwork for future patch.
This reduces bailouts on CGStmt.ll to 911 from 935.
llvm-svn: 106974
2010-06-27 14:04:18 +08:00
|
|
|
/// EnterStructPointerForCoercedAccess - Given a struct pointer that we are
|
improve CreateCoercedLoad a bit to generate slightly less awful
IR when handling X86-64 by-value struct stuff. For example, we
use to compile this:
struct DeclGroup {
unsigned NumDecls;
};
int foo(DeclGroup D);
void bar(DeclGroup *D) {
foo(*D);
}
into:
define void @_Z3barP9DeclGroup(%struct.DeclGroup* %D) ssp nounwind {
entry:
%D.addr = alloca %struct.DeclGroup*, align 8 ; <%struct.DeclGroup**> [#uses=2]
%agg.tmp = alloca %struct.DeclGroup, align 4 ; <%struct.DeclGroup*> [#uses=2]
%tmp3 = alloca i64 ; <i64*> [#uses=2]
store %struct.DeclGroup* %D, %struct.DeclGroup** %D.addr
%tmp = load %struct.DeclGroup** %D.addr ; <%struct.DeclGroup*> [#uses=1]
%tmp1 = bitcast %struct.DeclGroup* %agg.tmp to i8* ; <i8*> [#uses=1]
%tmp2 = bitcast %struct.DeclGroup* %tmp to i8* ; <i8*> [#uses=1]
call void @llvm.memcpy.p0i8.p0i8.i64(i8* %tmp1, i8* %tmp2, i64 4, i32 4, i1 false)
%0 = bitcast i64* %tmp3 to %struct.DeclGroup* ; <%struct.DeclGroup*> [#uses=1]
%1 = load %struct.DeclGroup* %agg.tmp ; <%struct.DeclGroup> [#uses=1]
store %struct.DeclGroup %1, %struct.DeclGroup* %0, align 1
%2 = load i64* %tmp3 ; <i64> [#uses=1]
call void @_Z3foo9DeclGroup(i64 %2)
ret void
}
which would cause fastisel to bail out due to the first class aggregate load %1. With
this patch we now compile it into the (still awful):
define void @_Z3barP9DeclGroup(%struct.DeclGroup* %D) nounwind ssp noredzone {
entry:
%D.addr = alloca %struct.DeclGroup*, align 8 ; <%struct.DeclGroup**> [#uses=2]
%agg.tmp = alloca %struct.DeclGroup, align 4 ; <%struct.DeclGroup*> [#uses=2]
%tmp3 = alloca i64 ; <i64*> [#uses=2]
store %struct.DeclGroup* %D, %struct.DeclGroup** %D.addr
%tmp = load %struct.DeclGroup** %D.addr ; <%struct.DeclGroup*> [#uses=1]
%tmp1 = bitcast %struct.DeclGroup* %agg.tmp to i8* ; <i8*> [#uses=1]
%tmp2 = bitcast %struct.DeclGroup* %tmp to i8* ; <i8*> [#uses=1]
call void @llvm.memcpy.p0i8.p0i8.i64(i8* %tmp1, i8* %tmp2, i64 4, i32 4, i1 false)
%coerce.dive = getelementptr %struct.DeclGroup* %agg.tmp, i32 0, i32 0 ; <i32*> [#uses=1]
%0 = bitcast i64* %tmp3 to i32* ; <i32*> [#uses=1]
%1 = load i32* %coerce.dive ; <i32> [#uses=1]
store i32 %1, i32* %0, align 1
%2 = load i64* %tmp3 ; <i64> [#uses=1]
%call = call i32 @_Z3foo9DeclGroup(i64 %2) noredzone ; <i32> [#uses=0]
ret void
}
which doesn't bail out. On CGStmt.ll, this reduces fastisel bail outs from 958 to 935,
and is the precursor of better things to come.
llvm-svn: 106973
2010-06-27 13:56:15 +08:00
|
|
|
/// accessing some number of bytes out of it, try to gep into the struct to get
|
|
|
|
/// at its inner goodness. Dive as deep as possible without entering an element
|
|
|
|
/// with an in-memory size smaller than DstSize.
|
|
|
|
static llvm::Value *
|
Same patch as the previous on the store side. Before we compiled this:
struct DeclGroup {
unsigned NumDecls;
};
int foo(DeclGroup D) {
return D.NumDecls;
}
to:
%struct.DeclGroup = type { i32 }
define i32 @_Z3foo9DeclGroup(i64) nounwind ssp noredzone {
entry:
%D = alloca %struct.DeclGroup, align 4 ; <%struct.DeclGroup*> [#uses=2]
%tmp = alloca i64 ; <i64*> [#uses=2]
store i64 %0, i64* %tmp
%1 = bitcast i64* %tmp to %struct.DeclGroup* ; <%struct.DeclGroup*> [#uses=1]
%2 = load %struct.DeclGroup* %1, align 1 ; <%struct.DeclGroup> [#uses=1]
store %struct.DeclGroup %2, %struct.DeclGroup* %D
%tmp1 = getelementptr inbounds %struct.DeclGroup* %D, i32 0, i32 0 ; <i32*> [#uses=1]
%tmp2 = load i32* %tmp1 ; <i32> [#uses=1]
ret i32 %tmp2
}
which caused fast isel bailouts due to the FCA load/store of %2. Now
we generate this just blissful code:
%struct.DeclGroup = type { i32 }
define i32 @_Z3foo9DeclGroup(i64) nounwind ssp noredzone {
entry:
%D = alloca %struct.DeclGroup, align 4 ; <%struct.DeclGroup*> [#uses=2]
%tmp = alloca i64 ; <i64*> [#uses=2]
%coerce.dive = getelementptr %struct.DeclGroup* %D, i32 0, i32 0 ; <i32*> [#uses=1]
store i64 %0, i64* %tmp
%1 = bitcast i64* %tmp to i32* ; <i32*> [#uses=1]
%2 = load i32* %1, align 1 ; <i32> [#uses=1]
store i32 %2, i32* %coerce.dive
%tmp1 = getelementptr inbounds %struct.DeclGroup* %D, i32 0, i32 0 ; <i32*> [#uses=1]
%tmp2 = load i32* %tmp1 ; <i32> [#uses=1]
ret i32 %tmp2
}
This avoids fastisel bailing out and is groundwork for future patch.
This reduces bailouts on CGStmt.ll to 911 from 935.
llvm-svn: 106974
2010-06-27 14:04:18 +08:00
|
|
|
EnterStructPointerForCoercedAccess(llvm::Value *SrcPtr,
|
2011-07-18 12:24:23 +08:00
|
|
|
llvm::StructType *SrcSTy,
|
Same patch as the previous on the store side. Before we compiled this:
struct DeclGroup {
unsigned NumDecls;
};
int foo(DeclGroup D) {
return D.NumDecls;
}
to:
%struct.DeclGroup = type { i32 }
define i32 @_Z3foo9DeclGroup(i64) nounwind ssp noredzone {
entry:
%D = alloca %struct.DeclGroup, align 4 ; <%struct.DeclGroup*> [#uses=2]
%tmp = alloca i64 ; <i64*> [#uses=2]
store i64 %0, i64* %tmp
%1 = bitcast i64* %tmp to %struct.DeclGroup* ; <%struct.DeclGroup*> [#uses=1]
%2 = load %struct.DeclGroup* %1, align 1 ; <%struct.DeclGroup> [#uses=1]
store %struct.DeclGroup %2, %struct.DeclGroup* %D
%tmp1 = getelementptr inbounds %struct.DeclGroup* %D, i32 0, i32 0 ; <i32*> [#uses=1]
%tmp2 = load i32* %tmp1 ; <i32> [#uses=1]
ret i32 %tmp2
}
which caused fast isel bailouts due to the FCA load/store of %2. Now
we generate this just blissful code:
%struct.DeclGroup = type { i32 }
define i32 @_Z3foo9DeclGroup(i64) nounwind ssp noredzone {
entry:
%D = alloca %struct.DeclGroup, align 4 ; <%struct.DeclGroup*> [#uses=2]
%tmp = alloca i64 ; <i64*> [#uses=2]
%coerce.dive = getelementptr %struct.DeclGroup* %D, i32 0, i32 0 ; <i32*> [#uses=1]
store i64 %0, i64* %tmp
%1 = bitcast i64* %tmp to i32* ; <i32*> [#uses=1]
%2 = load i32* %1, align 1 ; <i32> [#uses=1]
store i32 %2, i32* %coerce.dive
%tmp1 = getelementptr inbounds %struct.DeclGroup* %D, i32 0, i32 0 ; <i32*> [#uses=1]
%tmp2 = load i32* %tmp1 ; <i32> [#uses=1]
ret i32 %tmp2
}
This avoids fastisel bailing out and is groundwork for future patch.
This reduces bailouts on CGStmt.ll to 911 from 935.
llvm-svn: 106974
2010-06-27 14:04:18 +08:00
|
|
|
uint64_t DstSize, CodeGenFunction &CGF) {
|
improve CreateCoercedLoad a bit to generate slightly less awful
IR when handling X86-64 by-value struct stuff. For example, we
use to compile this:
struct DeclGroup {
unsigned NumDecls;
};
int foo(DeclGroup D);
void bar(DeclGroup *D) {
foo(*D);
}
into:
define void @_Z3barP9DeclGroup(%struct.DeclGroup* %D) ssp nounwind {
entry:
%D.addr = alloca %struct.DeclGroup*, align 8 ; <%struct.DeclGroup**> [#uses=2]
%agg.tmp = alloca %struct.DeclGroup, align 4 ; <%struct.DeclGroup*> [#uses=2]
%tmp3 = alloca i64 ; <i64*> [#uses=2]
store %struct.DeclGroup* %D, %struct.DeclGroup** %D.addr
%tmp = load %struct.DeclGroup** %D.addr ; <%struct.DeclGroup*> [#uses=1]
%tmp1 = bitcast %struct.DeclGroup* %agg.tmp to i8* ; <i8*> [#uses=1]
%tmp2 = bitcast %struct.DeclGroup* %tmp to i8* ; <i8*> [#uses=1]
call void @llvm.memcpy.p0i8.p0i8.i64(i8* %tmp1, i8* %tmp2, i64 4, i32 4, i1 false)
%0 = bitcast i64* %tmp3 to %struct.DeclGroup* ; <%struct.DeclGroup*> [#uses=1]
%1 = load %struct.DeclGroup* %agg.tmp ; <%struct.DeclGroup> [#uses=1]
store %struct.DeclGroup %1, %struct.DeclGroup* %0, align 1
%2 = load i64* %tmp3 ; <i64> [#uses=1]
call void @_Z3foo9DeclGroup(i64 %2)
ret void
}
which would cause fastisel to bail out due to the first class aggregate load %1. With
this patch we now compile it into the (still awful):
define void @_Z3barP9DeclGroup(%struct.DeclGroup* %D) nounwind ssp noredzone {
entry:
%D.addr = alloca %struct.DeclGroup*, align 8 ; <%struct.DeclGroup**> [#uses=2]
%agg.tmp = alloca %struct.DeclGroup, align 4 ; <%struct.DeclGroup*> [#uses=2]
%tmp3 = alloca i64 ; <i64*> [#uses=2]
store %struct.DeclGroup* %D, %struct.DeclGroup** %D.addr
%tmp = load %struct.DeclGroup** %D.addr ; <%struct.DeclGroup*> [#uses=1]
%tmp1 = bitcast %struct.DeclGroup* %agg.tmp to i8* ; <i8*> [#uses=1]
%tmp2 = bitcast %struct.DeclGroup* %tmp to i8* ; <i8*> [#uses=1]
call void @llvm.memcpy.p0i8.p0i8.i64(i8* %tmp1, i8* %tmp2, i64 4, i32 4, i1 false)
%coerce.dive = getelementptr %struct.DeclGroup* %agg.tmp, i32 0, i32 0 ; <i32*> [#uses=1]
%0 = bitcast i64* %tmp3 to i32* ; <i32*> [#uses=1]
%1 = load i32* %coerce.dive ; <i32> [#uses=1]
store i32 %1, i32* %0, align 1
%2 = load i64* %tmp3 ; <i64> [#uses=1]
%call = call i32 @_Z3foo9DeclGroup(i64 %2) noredzone ; <i32> [#uses=0]
ret void
}
which doesn't bail out. On CGStmt.ll, this reduces fastisel bail outs from 958 to 935,
and is the precursor of better things to come.
llvm-svn: 106973
2010-06-27 13:56:15 +08:00
|
|
|
// We can't dive into a zero-element struct.
|
|
|
|
if (SrcSTy->getNumElements() == 0) return SrcPtr;
|
2010-10-19 14:39:39 +08:00
|
|
|
|
2011-07-18 12:24:23 +08:00
|
|
|
llvm::Type *FirstElt = SrcSTy->getElementType(0);
|
2010-10-19 14:39:39 +08:00
|
|
|
|
improve CreateCoercedLoad a bit to generate slightly less awful
IR when handling X86-64 by-value struct stuff. For example, we
use to compile this:
struct DeclGroup {
unsigned NumDecls;
};
int foo(DeclGroup D);
void bar(DeclGroup *D) {
foo(*D);
}
into:
define void @_Z3barP9DeclGroup(%struct.DeclGroup* %D) ssp nounwind {
entry:
%D.addr = alloca %struct.DeclGroup*, align 8 ; <%struct.DeclGroup**> [#uses=2]
%agg.tmp = alloca %struct.DeclGroup, align 4 ; <%struct.DeclGroup*> [#uses=2]
%tmp3 = alloca i64 ; <i64*> [#uses=2]
store %struct.DeclGroup* %D, %struct.DeclGroup** %D.addr
%tmp = load %struct.DeclGroup** %D.addr ; <%struct.DeclGroup*> [#uses=1]
%tmp1 = bitcast %struct.DeclGroup* %agg.tmp to i8* ; <i8*> [#uses=1]
%tmp2 = bitcast %struct.DeclGroup* %tmp to i8* ; <i8*> [#uses=1]
call void @llvm.memcpy.p0i8.p0i8.i64(i8* %tmp1, i8* %tmp2, i64 4, i32 4, i1 false)
%0 = bitcast i64* %tmp3 to %struct.DeclGroup* ; <%struct.DeclGroup*> [#uses=1]
%1 = load %struct.DeclGroup* %agg.tmp ; <%struct.DeclGroup> [#uses=1]
store %struct.DeclGroup %1, %struct.DeclGroup* %0, align 1
%2 = load i64* %tmp3 ; <i64> [#uses=1]
call void @_Z3foo9DeclGroup(i64 %2)
ret void
}
which would cause fastisel to bail out due to the first class aggregate load %1. With
this patch we now compile it into the (still awful):
define void @_Z3barP9DeclGroup(%struct.DeclGroup* %D) nounwind ssp noredzone {
entry:
%D.addr = alloca %struct.DeclGroup*, align 8 ; <%struct.DeclGroup**> [#uses=2]
%agg.tmp = alloca %struct.DeclGroup, align 4 ; <%struct.DeclGroup*> [#uses=2]
%tmp3 = alloca i64 ; <i64*> [#uses=2]
store %struct.DeclGroup* %D, %struct.DeclGroup** %D.addr
%tmp = load %struct.DeclGroup** %D.addr ; <%struct.DeclGroup*> [#uses=1]
%tmp1 = bitcast %struct.DeclGroup* %agg.tmp to i8* ; <i8*> [#uses=1]
%tmp2 = bitcast %struct.DeclGroup* %tmp to i8* ; <i8*> [#uses=1]
call void @llvm.memcpy.p0i8.p0i8.i64(i8* %tmp1, i8* %tmp2, i64 4, i32 4, i1 false)
%coerce.dive = getelementptr %struct.DeclGroup* %agg.tmp, i32 0, i32 0 ; <i32*> [#uses=1]
%0 = bitcast i64* %tmp3 to i32* ; <i32*> [#uses=1]
%1 = load i32* %coerce.dive ; <i32> [#uses=1]
store i32 %1, i32* %0, align 1
%2 = load i64* %tmp3 ; <i64> [#uses=1]
%call = call i32 @_Z3foo9DeclGroup(i64 %2) noredzone ; <i32> [#uses=0]
ret void
}
which doesn't bail out. On CGStmt.ll, this reduces fastisel bail outs from 958 to 935,
and is the precursor of better things to come.
llvm-svn: 106973
2010-06-27 13:56:15 +08:00
|
|
|
// If the first elt is at least as large as what we're looking for, or if the
|
2014-08-29 18:17:52 +08:00
|
|
|
// first element is the same size as the whole struct, we can enter it. The
|
|
|
|
// comparison must be made on the store size and not the alloca size. Using
|
|
|
|
// the alloca size may overstate the size of the load.
|
2010-10-19 14:39:39 +08:00
|
|
|
uint64_t FirstEltSize =
|
2014-08-29 18:17:52 +08:00
|
|
|
CGF.CGM.getDataLayout().getTypeStoreSize(FirstElt);
|
2010-10-19 14:39:39 +08:00
|
|
|
if (FirstEltSize < DstSize &&
|
2014-08-29 18:17:52 +08:00
|
|
|
FirstEltSize < CGF.CGM.getDataLayout().getTypeStoreSize(SrcSTy))
|
improve CreateCoercedLoad a bit to generate slightly less awful
IR when handling X86-64 by-value struct stuff. For example, we
use to compile this:
struct DeclGroup {
unsigned NumDecls;
};
int foo(DeclGroup D);
void bar(DeclGroup *D) {
foo(*D);
}
into:
define void @_Z3barP9DeclGroup(%struct.DeclGroup* %D) ssp nounwind {
entry:
%D.addr = alloca %struct.DeclGroup*, align 8 ; <%struct.DeclGroup**> [#uses=2]
%agg.tmp = alloca %struct.DeclGroup, align 4 ; <%struct.DeclGroup*> [#uses=2]
%tmp3 = alloca i64 ; <i64*> [#uses=2]
store %struct.DeclGroup* %D, %struct.DeclGroup** %D.addr
%tmp = load %struct.DeclGroup** %D.addr ; <%struct.DeclGroup*> [#uses=1]
%tmp1 = bitcast %struct.DeclGroup* %agg.tmp to i8* ; <i8*> [#uses=1]
%tmp2 = bitcast %struct.DeclGroup* %tmp to i8* ; <i8*> [#uses=1]
call void @llvm.memcpy.p0i8.p0i8.i64(i8* %tmp1, i8* %tmp2, i64 4, i32 4, i1 false)
%0 = bitcast i64* %tmp3 to %struct.DeclGroup* ; <%struct.DeclGroup*> [#uses=1]
%1 = load %struct.DeclGroup* %agg.tmp ; <%struct.DeclGroup> [#uses=1]
store %struct.DeclGroup %1, %struct.DeclGroup* %0, align 1
%2 = load i64* %tmp3 ; <i64> [#uses=1]
call void @_Z3foo9DeclGroup(i64 %2)
ret void
}
which would cause fastisel to bail out due to the first class aggregate load %1. With
this patch we now compile it into the (still awful):
define void @_Z3barP9DeclGroup(%struct.DeclGroup* %D) nounwind ssp noredzone {
entry:
%D.addr = alloca %struct.DeclGroup*, align 8 ; <%struct.DeclGroup**> [#uses=2]
%agg.tmp = alloca %struct.DeclGroup, align 4 ; <%struct.DeclGroup*> [#uses=2]
%tmp3 = alloca i64 ; <i64*> [#uses=2]
store %struct.DeclGroup* %D, %struct.DeclGroup** %D.addr
%tmp = load %struct.DeclGroup** %D.addr ; <%struct.DeclGroup*> [#uses=1]
%tmp1 = bitcast %struct.DeclGroup* %agg.tmp to i8* ; <i8*> [#uses=1]
%tmp2 = bitcast %struct.DeclGroup* %tmp to i8* ; <i8*> [#uses=1]
call void @llvm.memcpy.p0i8.p0i8.i64(i8* %tmp1, i8* %tmp2, i64 4, i32 4, i1 false)
%coerce.dive = getelementptr %struct.DeclGroup* %agg.tmp, i32 0, i32 0 ; <i32*> [#uses=1]
%0 = bitcast i64* %tmp3 to i32* ; <i32*> [#uses=1]
%1 = load i32* %coerce.dive ; <i32> [#uses=1]
store i32 %1, i32* %0, align 1
%2 = load i64* %tmp3 ; <i64> [#uses=1]
%call = call i32 @_Z3foo9DeclGroup(i64 %2) noredzone ; <i32> [#uses=0]
ret void
}
which doesn't bail out. On CGStmt.ll, this reduces fastisel bail outs from 958 to 935,
and is the precursor of better things to come.
llvm-svn: 106973
2010-06-27 13:56:15 +08:00
|
|
|
return SrcPtr;
|
2010-10-19 14:39:39 +08:00
|
|
|
|
improve CreateCoercedLoad a bit to generate slightly less awful
IR when handling X86-64 by-value struct stuff. For example, we
use to compile this:
struct DeclGroup {
unsigned NumDecls;
};
int foo(DeclGroup D);
void bar(DeclGroup *D) {
foo(*D);
}
into:
define void @_Z3barP9DeclGroup(%struct.DeclGroup* %D) ssp nounwind {
entry:
%D.addr = alloca %struct.DeclGroup*, align 8 ; <%struct.DeclGroup**> [#uses=2]
%agg.tmp = alloca %struct.DeclGroup, align 4 ; <%struct.DeclGroup*> [#uses=2]
%tmp3 = alloca i64 ; <i64*> [#uses=2]
store %struct.DeclGroup* %D, %struct.DeclGroup** %D.addr
%tmp = load %struct.DeclGroup** %D.addr ; <%struct.DeclGroup*> [#uses=1]
%tmp1 = bitcast %struct.DeclGroup* %agg.tmp to i8* ; <i8*> [#uses=1]
%tmp2 = bitcast %struct.DeclGroup* %tmp to i8* ; <i8*> [#uses=1]
call void @llvm.memcpy.p0i8.p0i8.i64(i8* %tmp1, i8* %tmp2, i64 4, i32 4, i1 false)
%0 = bitcast i64* %tmp3 to %struct.DeclGroup* ; <%struct.DeclGroup*> [#uses=1]
%1 = load %struct.DeclGroup* %agg.tmp ; <%struct.DeclGroup> [#uses=1]
store %struct.DeclGroup %1, %struct.DeclGroup* %0, align 1
%2 = load i64* %tmp3 ; <i64> [#uses=1]
call void @_Z3foo9DeclGroup(i64 %2)
ret void
}
which would cause fastisel to bail out due to the first class aggregate load %1. With
this patch we now compile it into the (still awful):
define void @_Z3barP9DeclGroup(%struct.DeclGroup* %D) nounwind ssp noredzone {
entry:
%D.addr = alloca %struct.DeclGroup*, align 8 ; <%struct.DeclGroup**> [#uses=2]
%agg.tmp = alloca %struct.DeclGroup, align 4 ; <%struct.DeclGroup*> [#uses=2]
%tmp3 = alloca i64 ; <i64*> [#uses=2]
store %struct.DeclGroup* %D, %struct.DeclGroup** %D.addr
%tmp = load %struct.DeclGroup** %D.addr ; <%struct.DeclGroup*> [#uses=1]
%tmp1 = bitcast %struct.DeclGroup* %agg.tmp to i8* ; <i8*> [#uses=1]
%tmp2 = bitcast %struct.DeclGroup* %tmp to i8* ; <i8*> [#uses=1]
call void @llvm.memcpy.p0i8.p0i8.i64(i8* %tmp1, i8* %tmp2, i64 4, i32 4, i1 false)
%coerce.dive = getelementptr %struct.DeclGroup* %agg.tmp, i32 0, i32 0 ; <i32*> [#uses=1]
%0 = bitcast i64* %tmp3 to i32* ; <i32*> [#uses=1]
%1 = load i32* %coerce.dive ; <i32> [#uses=1]
store i32 %1, i32* %0, align 1
%2 = load i64* %tmp3 ; <i64> [#uses=1]
%call = call i32 @_Z3foo9DeclGroup(i64 %2) noredzone ; <i32> [#uses=0]
ret void
}
which doesn't bail out. On CGStmt.ll, this reduces fastisel bail outs from 958 to 935,
and is the precursor of better things to come.
llvm-svn: 106973
2010-06-27 13:56:15 +08:00
|
|
|
// GEP into the first element.
|
|
|
|
SrcPtr = CGF.Builder.CreateConstGEP2_32(SrcPtr, 0, 0, "coerce.dive");
|
2010-10-19 14:39:39 +08:00
|
|
|
|
improve CreateCoercedLoad a bit to generate slightly less awful
IR when handling X86-64 by-value struct stuff. For example, we
use to compile this:
struct DeclGroup {
unsigned NumDecls;
};
int foo(DeclGroup D);
void bar(DeclGroup *D) {
foo(*D);
}
into:
define void @_Z3barP9DeclGroup(%struct.DeclGroup* %D) ssp nounwind {
entry:
%D.addr = alloca %struct.DeclGroup*, align 8 ; <%struct.DeclGroup**> [#uses=2]
%agg.tmp = alloca %struct.DeclGroup, align 4 ; <%struct.DeclGroup*> [#uses=2]
%tmp3 = alloca i64 ; <i64*> [#uses=2]
store %struct.DeclGroup* %D, %struct.DeclGroup** %D.addr
%tmp = load %struct.DeclGroup** %D.addr ; <%struct.DeclGroup*> [#uses=1]
%tmp1 = bitcast %struct.DeclGroup* %agg.tmp to i8* ; <i8*> [#uses=1]
%tmp2 = bitcast %struct.DeclGroup* %tmp to i8* ; <i8*> [#uses=1]
call void @llvm.memcpy.p0i8.p0i8.i64(i8* %tmp1, i8* %tmp2, i64 4, i32 4, i1 false)
%0 = bitcast i64* %tmp3 to %struct.DeclGroup* ; <%struct.DeclGroup*> [#uses=1]
%1 = load %struct.DeclGroup* %agg.tmp ; <%struct.DeclGroup> [#uses=1]
store %struct.DeclGroup %1, %struct.DeclGroup* %0, align 1
%2 = load i64* %tmp3 ; <i64> [#uses=1]
call void @_Z3foo9DeclGroup(i64 %2)
ret void
}
which would cause fastisel to bail out due to the first class aggregate load %1. With
this patch we now compile it into the (still awful):
define void @_Z3barP9DeclGroup(%struct.DeclGroup* %D) nounwind ssp noredzone {
entry:
%D.addr = alloca %struct.DeclGroup*, align 8 ; <%struct.DeclGroup**> [#uses=2]
%agg.tmp = alloca %struct.DeclGroup, align 4 ; <%struct.DeclGroup*> [#uses=2]
%tmp3 = alloca i64 ; <i64*> [#uses=2]
store %struct.DeclGroup* %D, %struct.DeclGroup** %D.addr
%tmp = load %struct.DeclGroup** %D.addr ; <%struct.DeclGroup*> [#uses=1]
%tmp1 = bitcast %struct.DeclGroup* %agg.tmp to i8* ; <i8*> [#uses=1]
%tmp2 = bitcast %struct.DeclGroup* %tmp to i8* ; <i8*> [#uses=1]
call void @llvm.memcpy.p0i8.p0i8.i64(i8* %tmp1, i8* %tmp2, i64 4, i32 4, i1 false)
%coerce.dive = getelementptr %struct.DeclGroup* %agg.tmp, i32 0, i32 0 ; <i32*> [#uses=1]
%0 = bitcast i64* %tmp3 to i32* ; <i32*> [#uses=1]
%1 = load i32* %coerce.dive ; <i32> [#uses=1]
store i32 %1, i32* %0, align 1
%2 = load i64* %tmp3 ; <i64> [#uses=1]
%call = call i32 @_Z3foo9DeclGroup(i64 %2) noredzone ; <i32> [#uses=0]
ret void
}
which doesn't bail out. On CGStmt.ll, this reduces fastisel bail outs from 958 to 935,
and is the precursor of better things to come.
llvm-svn: 106973
2010-06-27 13:56:15 +08:00
|
|
|
// If the first element is a struct, recurse.
|
2011-07-18 12:24:23 +08:00
|
|
|
llvm::Type *SrcTy =
|
improve CreateCoercedLoad a bit to generate slightly less awful
IR when handling X86-64 by-value struct stuff. For example, we
use to compile this:
struct DeclGroup {
unsigned NumDecls;
};
int foo(DeclGroup D);
void bar(DeclGroup *D) {
foo(*D);
}
into:
define void @_Z3barP9DeclGroup(%struct.DeclGroup* %D) ssp nounwind {
entry:
%D.addr = alloca %struct.DeclGroup*, align 8 ; <%struct.DeclGroup**> [#uses=2]
%agg.tmp = alloca %struct.DeclGroup, align 4 ; <%struct.DeclGroup*> [#uses=2]
%tmp3 = alloca i64 ; <i64*> [#uses=2]
store %struct.DeclGroup* %D, %struct.DeclGroup** %D.addr
%tmp = load %struct.DeclGroup** %D.addr ; <%struct.DeclGroup*> [#uses=1]
%tmp1 = bitcast %struct.DeclGroup* %agg.tmp to i8* ; <i8*> [#uses=1]
%tmp2 = bitcast %struct.DeclGroup* %tmp to i8* ; <i8*> [#uses=1]
call void @llvm.memcpy.p0i8.p0i8.i64(i8* %tmp1, i8* %tmp2, i64 4, i32 4, i1 false)
%0 = bitcast i64* %tmp3 to %struct.DeclGroup* ; <%struct.DeclGroup*> [#uses=1]
%1 = load %struct.DeclGroup* %agg.tmp ; <%struct.DeclGroup> [#uses=1]
store %struct.DeclGroup %1, %struct.DeclGroup* %0, align 1
%2 = load i64* %tmp3 ; <i64> [#uses=1]
call void @_Z3foo9DeclGroup(i64 %2)
ret void
}
which would cause fastisel to bail out due to the first class aggregate load %1. With
this patch we now compile it into the (still awful):
define void @_Z3barP9DeclGroup(%struct.DeclGroup* %D) nounwind ssp noredzone {
entry:
%D.addr = alloca %struct.DeclGroup*, align 8 ; <%struct.DeclGroup**> [#uses=2]
%agg.tmp = alloca %struct.DeclGroup, align 4 ; <%struct.DeclGroup*> [#uses=2]
%tmp3 = alloca i64 ; <i64*> [#uses=2]
store %struct.DeclGroup* %D, %struct.DeclGroup** %D.addr
%tmp = load %struct.DeclGroup** %D.addr ; <%struct.DeclGroup*> [#uses=1]
%tmp1 = bitcast %struct.DeclGroup* %agg.tmp to i8* ; <i8*> [#uses=1]
%tmp2 = bitcast %struct.DeclGroup* %tmp to i8* ; <i8*> [#uses=1]
call void @llvm.memcpy.p0i8.p0i8.i64(i8* %tmp1, i8* %tmp2, i64 4, i32 4, i1 false)
%coerce.dive = getelementptr %struct.DeclGroup* %agg.tmp, i32 0, i32 0 ; <i32*> [#uses=1]
%0 = bitcast i64* %tmp3 to i32* ; <i32*> [#uses=1]
%1 = load i32* %coerce.dive ; <i32> [#uses=1]
store i32 %1, i32* %0, align 1
%2 = load i64* %tmp3 ; <i64> [#uses=1]
%call = call i32 @_Z3foo9DeclGroup(i64 %2) noredzone ; <i32> [#uses=0]
ret void
}
which doesn't bail out. On CGStmt.ll, this reduces fastisel bail outs from 958 to 935,
and is the precursor of better things to come.
llvm-svn: 106973
2010-06-27 13:56:15 +08:00
|
|
|
cast<llvm::PointerType>(SrcPtr->getType())->getElementType();
|
2011-07-18 12:24:23 +08:00
|
|
|
if (llvm::StructType *SrcSTy = dyn_cast<llvm::StructType>(SrcTy))
|
Same patch as the previous on the store side. Before we compiled this:
struct DeclGroup {
unsigned NumDecls;
};
int foo(DeclGroup D) {
return D.NumDecls;
}
to:
%struct.DeclGroup = type { i32 }
define i32 @_Z3foo9DeclGroup(i64) nounwind ssp noredzone {
entry:
%D = alloca %struct.DeclGroup, align 4 ; <%struct.DeclGroup*> [#uses=2]
%tmp = alloca i64 ; <i64*> [#uses=2]
store i64 %0, i64* %tmp
%1 = bitcast i64* %tmp to %struct.DeclGroup* ; <%struct.DeclGroup*> [#uses=1]
%2 = load %struct.DeclGroup* %1, align 1 ; <%struct.DeclGroup> [#uses=1]
store %struct.DeclGroup %2, %struct.DeclGroup* %D
%tmp1 = getelementptr inbounds %struct.DeclGroup* %D, i32 0, i32 0 ; <i32*> [#uses=1]
%tmp2 = load i32* %tmp1 ; <i32> [#uses=1]
ret i32 %tmp2
}
which caused fast isel bailouts due to the FCA load/store of %2. Now
we generate this just blissful code:
%struct.DeclGroup = type { i32 }
define i32 @_Z3foo9DeclGroup(i64) nounwind ssp noredzone {
entry:
%D = alloca %struct.DeclGroup, align 4 ; <%struct.DeclGroup*> [#uses=2]
%tmp = alloca i64 ; <i64*> [#uses=2]
%coerce.dive = getelementptr %struct.DeclGroup* %D, i32 0, i32 0 ; <i32*> [#uses=1]
store i64 %0, i64* %tmp
%1 = bitcast i64* %tmp to i32* ; <i32*> [#uses=1]
%2 = load i32* %1, align 1 ; <i32> [#uses=1]
store i32 %2, i32* %coerce.dive
%tmp1 = getelementptr inbounds %struct.DeclGroup* %D, i32 0, i32 0 ; <i32*> [#uses=1]
%tmp2 = load i32* %tmp1 ; <i32> [#uses=1]
ret i32 %tmp2
}
This avoids fastisel bailing out and is groundwork for future patch.
This reduces bailouts on CGStmt.ll to 911 from 935.
llvm-svn: 106974
2010-06-27 14:04:18 +08:00
|
|
|
return EnterStructPointerForCoercedAccess(SrcPtr, SrcSTy, DstSize, CGF);
|
improve CreateCoercedLoad a bit to generate slightly less awful
IR when handling X86-64 by-value struct stuff. For example, we
use to compile this:
struct DeclGroup {
unsigned NumDecls;
};
int foo(DeclGroup D);
void bar(DeclGroup *D) {
foo(*D);
}
into:
define void @_Z3barP9DeclGroup(%struct.DeclGroup* %D) ssp nounwind {
entry:
%D.addr = alloca %struct.DeclGroup*, align 8 ; <%struct.DeclGroup**> [#uses=2]
%agg.tmp = alloca %struct.DeclGroup, align 4 ; <%struct.DeclGroup*> [#uses=2]
%tmp3 = alloca i64 ; <i64*> [#uses=2]
store %struct.DeclGroup* %D, %struct.DeclGroup** %D.addr
%tmp = load %struct.DeclGroup** %D.addr ; <%struct.DeclGroup*> [#uses=1]
%tmp1 = bitcast %struct.DeclGroup* %agg.tmp to i8* ; <i8*> [#uses=1]
%tmp2 = bitcast %struct.DeclGroup* %tmp to i8* ; <i8*> [#uses=1]
call void @llvm.memcpy.p0i8.p0i8.i64(i8* %tmp1, i8* %tmp2, i64 4, i32 4, i1 false)
%0 = bitcast i64* %tmp3 to %struct.DeclGroup* ; <%struct.DeclGroup*> [#uses=1]
%1 = load %struct.DeclGroup* %agg.tmp ; <%struct.DeclGroup> [#uses=1]
store %struct.DeclGroup %1, %struct.DeclGroup* %0, align 1
%2 = load i64* %tmp3 ; <i64> [#uses=1]
call void @_Z3foo9DeclGroup(i64 %2)
ret void
}
which would cause fastisel to bail out due to the first class aggregate load %1. With
this patch we now compile it into the (still awful):
define void @_Z3barP9DeclGroup(%struct.DeclGroup* %D) nounwind ssp noredzone {
entry:
%D.addr = alloca %struct.DeclGroup*, align 8 ; <%struct.DeclGroup**> [#uses=2]
%agg.tmp = alloca %struct.DeclGroup, align 4 ; <%struct.DeclGroup*> [#uses=2]
%tmp3 = alloca i64 ; <i64*> [#uses=2]
store %struct.DeclGroup* %D, %struct.DeclGroup** %D.addr
%tmp = load %struct.DeclGroup** %D.addr ; <%struct.DeclGroup*> [#uses=1]
%tmp1 = bitcast %struct.DeclGroup* %agg.tmp to i8* ; <i8*> [#uses=1]
%tmp2 = bitcast %struct.DeclGroup* %tmp to i8* ; <i8*> [#uses=1]
call void @llvm.memcpy.p0i8.p0i8.i64(i8* %tmp1, i8* %tmp2, i64 4, i32 4, i1 false)
%coerce.dive = getelementptr %struct.DeclGroup* %agg.tmp, i32 0, i32 0 ; <i32*> [#uses=1]
%0 = bitcast i64* %tmp3 to i32* ; <i32*> [#uses=1]
%1 = load i32* %coerce.dive ; <i32> [#uses=1]
store i32 %1, i32* %0, align 1
%2 = load i64* %tmp3 ; <i64> [#uses=1]
%call = call i32 @_Z3foo9DeclGroup(i64 %2) noredzone ; <i32> [#uses=0]
ret void
}
which doesn't bail out. On CGStmt.ll, this reduces fastisel bail outs from 958 to 935,
and is the precursor of better things to come.
llvm-svn: 106973
2010-06-27 13:56:15 +08:00
|
|
|
|
|
|
|
return SrcPtr;
|
|
|
|
}
|
|
|
|
|
If coercing something from int or pointer type to int or pointer type
(potentially after unwrapping it from a struct) do it without going through
memory. We now compile:
struct DeclGroup {
unsigned NumDecls;
};
int foo(DeclGroup D) {
return D.NumDecls;
}
into:
%struct.DeclGroup = type { i32 }
define i32 @_Z3foo9DeclGroup(i64) nounwind ssp noredzone {
entry:
%D = alloca %struct.DeclGroup, align 4 ; <%struct.DeclGroup*> [#uses=2]
%coerce.dive = getelementptr %struct.DeclGroup* %D, i32 0, i32 0 ; <i32*> [#uses=1]
%coerce.val.ii = trunc i64 %0 to i32 ; <i32> [#uses=1]
store i32 %coerce.val.ii, i32* %coerce.dive
%tmp = getelementptr inbounds %struct.DeclGroup* %D, i32 0, i32 0 ; <i32*> [#uses=1]
%tmp1 = load i32* %tmp ; <i32> [#uses=1]
ret i32 %tmp1
}
instead of:
%struct.DeclGroup = type { i32 }
define i32 @_Z3foo9DeclGroup(i64) nounwind ssp noredzone {
entry:
%D = alloca %struct.DeclGroup, align 4 ; <%struct.DeclGroup*> [#uses=2]
%tmp = alloca i64 ; <i64*> [#uses=2]
%coerce.dive = getelementptr %struct.DeclGroup* %D, i32 0, i32 0 ; <i32*> [#uses=1]
store i64 %0, i64* %tmp
%1 = bitcast i64* %tmp to i32* ; <i32*> [#uses=1]
%2 = load i32* %1, align 1 ; <i32> [#uses=1]
store i32 %2, i32* %coerce.dive
%tmp1 = getelementptr inbounds %struct.DeclGroup* %D, i32 0, i32 0 ; <i32*> [#uses=1]
%tmp2 = load i32* %tmp1 ; <i32> [#uses=1]
ret i32 %tmp2
}
... which is quite a bit less terrifying.
llvm-svn: 106975
2010-06-27 14:26:04 +08:00
|
|
|
/// CoerceIntOrPtrToIntOrPtr - Convert a value Val to the specific Ty where both
|
|
|
|
/// are either integers or pointers. This does a truncation of the value if it
|
|
|
|
/// is too large or a zero extension if it is too small.
|
2013-06-05 11:00:13 +08:00
|
|
|
///
|
|
|
|
/// This behaves as if the value were coerced through memory, so on big-endian
|
|
|
|
/// targets the high bits are preserved in a truncation, while little-endian
|
|
|
|
/// targets preserve the low bits.
|
If coercing something from int or pointer type to int or pointer type
(potentially after unwrapping it from a struct) do it without going through
memory. We now compile:
struct DeclGroup {
unsigned NumDecls;
};
int foo(DeclGroup D) {
return D.NumDecls;
}
into:
%struct.DeclGroup = type { i32 }
define i32 @_Z3foo9DeclGroup(i64) nounwind ssp noredzone {
entry:
%D = alloca %struct.DeclGroup, align 4 ; <%struct.DeclGroup*> [#uses=2]
%coerce.dive = getelementptr %struct.DeclGroup* %D, i32 0, i32 0 ; <i32*> [#uses=1]
%coerce.val.ii = trunc i64 %0 to i32 ; <i32> [#uses=1]
store i32 %coerce.val.ii, i32* %coerce.dive
%tmp = getelementptr inbounds %struct.DeclGroup* %D, i32 0, i32 0 ; <i32*> [#uses=1]
%tmp1 = load i32* %tmp ; <i32> [#uses=1]
ret i32 %tmp1
}
instead of:
%struct.DeclGroup = type { i32 }
define i32 @_Z3foo9DeclGroup(i64) nounwind ssp noredzone {
entry:
%D = alloca %struct.DeclGroup, align 4 ; <%struct.DeclGroup*> [#uses=2]
%tmp = alloca i64 ; <i64*> [#uses=2]
%coerce.dive = getelementptr %struct.DeclGroup* %D, i32 0, i32 0 ; <i32*> [#uses=1]
store i64 %0, i64* %tmp
%1 = bitcast i64* %tmp to i32* ; <i32*> [#uses=1]
%2 = load i32* %1, align 1 ; <i32> [#uses=1]
store i32 %2, i32* %coerce.dive
%tmp1 = getelementptr inbounds %struct.DeclGroup* %D, i32 0, i32 0 ; <i32*> [#uses=1]
%tmp2 = load i32* %tmp1 ; <i32> [#uses=1]
ret i32 %tmp2
}
... which is quite a bit less terrifying.
llvm-svn: 106975
2010-06-27 14:26:04 +08:00
|
|
|
static llvm::Value *CoerceIntOrPtrToIntOrPtr(llvm::Value *Val,
|
2011-07-18 12:24:23 +08:00
|
|
|
llvm::Type *Ty,
|
If coercing something from int or pointer type to int or pointer type
(potentially after unwrapping it from a struct) do it without going through
memory. We now compile:
struct DeclGroup {
unsigned NumDecls;
};
int foo(DeclGroup D) {
return D.NumDecls;
}
into:
%struct.DeclGroup = type { i32 }
define i32 @_Z3foo9DeclGroup(i64) nounwind ssp noredzone {
entry:
%D = alloca %struct.DeclGroup, align 4 ; <%struct.DeclGroup*> [#uses=2]
%coerce.dive = getelementptr %struct.DeclGroup* %D, i32 0, i32 0 ; <i32*> [#uses=1]
%coerce.val.ii = trunc i64 %0 to i32 ; <i32> [#uses=1]
store i32 %coerce.val.ii, i32* %coerce.dive
%tmp = getelementptr inbounds %struct.DeclGroup* %D, i32 0, i32 0 ; <i32*> [#uses=1]
%tmp1 = load i32* %tmp ; <i32> [#uses=1]
ret i32 %tmp1
}
instead of:
%struct.DeclGroup = type { i32 }
define i32 @_Z3foo9DeclGroup(i64) nounwind ssp noredzone {
entry:
%D = alloca %struct.DeclGroup, align 4 ; <%struct.DeclGroup*> [#uses=2]
%tmp = alloca i64 ; <i64*> [#uses=2]
%coerce.dive = getelementptr %struct.DeclGroup* %D, i32 0, i32 0 ; <i32*> [#uses=1]
store i64 %0, i64* %tmp
%1 = bitcast i64* %tmp to i32* ; <i32*> [#uses=1]
%2 = load i32* %1, align 1 ; <i32> [#uses=1]
store i32 %2, i32* %coerce.dive
%tmp1 = getelementptr inbounds %struct.DeclGroup* %D, i32 0, i32 0 ; <i32*> [#uses=1]
%tmp2 = load i32* %tmp1 ; <i32> [#uses=1]
ret i32 %tmp2
}
... which is quite a bit less terrifying.
llvm-svn: 106975
2010-06-27 14:26:04 +08:00
|
|
|
CodeGenFunction &CGF) {
|
|
|
|
if (Val->getType() == Ty)
|
|
|
|
return Val;
|
2010-10-19 14:39:39 +08:00
|
|
|
|
If coercing something from int or pointer type to int or pointer type
(potentially after unwrapping it from a struct) do it without going through
memory. We now compile:
struct DeclGroup {
unsigned NumDecls;
};
int foo(DeclGroup D) {
return D.NumDecls;
}
into:
%struct.DeclGroup = type { i32 }
define i32 @_Z3foo9DeclGroup(i64) nounwind ssp noredzone {
entry:
%D = alloca %struct.DeclGroup, align 4 ; <%struct.DeclGroup*> [#uses=2]
%coerce.dive = getelementptr %struct.DeclGroup* %D, i32 0, i32 0 ; <i32*> [#uses=1]
%coerce.val.ii = trunc i64 %0 to i32 ; <i32> [#uses=1]
store i32 %coerce.val.ii, i32* %coerce.dive
%tmp = getelementptr inbounds %struct.DeclGroup* %D, i32 0, i32 0 ; <i32*> [#uses=1]
%tmp1 = load i32* %tmp ; <i32> [#uses=1]
ret i32 %tmp1
}
instead of:
%struct.DeclGroup = type { i32 }
define i32 @_Z3foo9DeclGroup(i64) nounwind ssp noredzone {
entry:
%D = alloca %struct.DeclGroup, align 4 ; <%struct.DeclGroup*> [#uses=2]
%tmp = alloca i64 ; <i64*> [#uses=2]
%coerce.dive = getelementptr %struct.DeclGroup* %D, i32 0, i32 0 ; <i32*> [#uses=1]
store i64 %0, i64* %tmp
%1 = bitcast i64* %tmp to i32* ; <i32*> [#uses=1]
%2 = load i32* %1, align 1 ; <i32> [#uses=1]
store i32 %2, i32* %coerce.dive
%tmp1 = getelementptr inbounds %struct.DeclGroup* %D, i32 0, i32 0 ; <i32*> [#uses=1]
%tmp2 = load i32* %tmp1 ; <i32> [#uses=1]
ret i32 %tmp2
}
... which is quite a bit less terrifying.
llvm-svn: 106975
2010-06-27 14:26:04 +08:00
|
|
|
if (isa<llvm::PointerType>(Val->getType())) {
|
|
|
|
// If this is Pointer->Pointer avoid conversion to and from int.
|
|
|
|
if (isa<llvm::PointerType>(Ty))
|
|
|
|
return CGF.Builder.CreateBitCast(Val, Ty, "coerce.val");
|
2010-10-19 14:39:39 +08:00
|
|
|
|
If coercing something from int or pointer type to int or pointer type
(potentially after unwrapping it from a struct) do it without going through
memory. We now compile:
struct DeclGroup {
unsigned NumDecls;
};
int foo(DeclGroup D) {
return D.NumDecls;
}
into:
%struct.DeclGroup = type { i32 }
define i32 @_Z3foo9DeclGroup(i64) nounwind ssp noredzone {
entry:
%D = alloca %struct.DeclGroup, align 4 ; <%struct.DeclGroup*> [#uses=2]
%coerce.dive = getelementptr %struct.DeclGroup* %D, i32 0, i32 0 ; <i32*> [#uses=1]
%coerce.val.ii = trunc i64 %0 to i32 ; <i32> [#uses=1]
store i32 %coerce.val.ii, i32* %coerce.dive
%tmp = getelementptr inbounds %struct.DeclGroup* %D, i32 0, i32 0 ; <i32*> [#uses=1]
%tmp1 = load i32* %tmp ; <i32> [#uses=1]
ret i32 %tmp1
}
instead of:
%struct.DeclGroup = type { i32 }
define i32 @_Z3foo9DeclGroup(i64) nounwind ssp noredzone {
entry:
%D = alloca %struct.DeclGroup, align 4 ; <%struct.DeclGroup*> [#uses=2]
%tmp = alloca i64 ; <i64*> [#uses=2]
%coerce.dive = getelementptr %struct.DeclGroup* %D, i32 0, i32 0 ; <i32*> [#uses=1]
store i64 %0, i64* %tmp
%1 = bitcast i64* %tmp to i32* ; <i32*> [#uses=1]
%2 = load i32* %1, align 1 ; <i32> [#uses=1]
store i32 %2, i32* %coerce.dive
%tmp1 = getelementptr inbounds %struct.DeclGroup* %D, i32 0, i32 0 ; <i32*> [#uses=1]
%tmp2 = load i32* %tmp1 ; <i32> [#uses=1]
ret i32 %tmp2
}
... which is quite a bit less terrifying.
llvm-svn: 106975
2010-06-27 14:26:04 +08:00
|
|
|
// Convert the pointer to an integer so we can play with its width.
|
2010-06-27 15:15:29 +08:00
|
|
|
Val = CGF.Builder.CreatePtrToInt(Val, CGF.IntPtrTy, "coerce.val.pi");
|
If coercing something from int or pointer type to int or pointer type
(potentially after unwrapping it from a struct) do it without going through
memory. We now compile:
struct DeclGroup {
unsigned NumDecls;
};
int foo(DeclGroup D) {
return D.NumDecls;
}
into:
%struct.DeclGroup = type { i32 }
define i32 @_Z3foo9DeclGroup(i64) nounwind ssp noredzone {
entry:
%D = alloca %struct.DeclGroup, align 4 ; <%struct.DeclGroup*> [#uses=2]
%coerce.dive = getelementptr %struct.DeclGroup* %D, i32 0, i32 0 ; <i32*> [#uses=1]
%coerce.val.ii = trunc i64 %0 to i32 ; <i32> [#uses=1]
store i32 %coerce.val.ii, i32* %coerce.dive
%tmp = getelementptr inbounds %struct.DeclGroup* %D, i32 0, i32 0 ; <i32*> [#uses=1]
%tmp1 = load i32* %tmp ; <i32> [#uses=1]
ret i32 %tmp1
}
instead of:
%struct.DeclGroup = type { i32 }
define i32 @_Z3foo9DeclGroup(i64) nounwind ssp noredzone {
entry:
%D = alloca %struct.DeclGroup, align 4 ; <%struct.DeclGroup*> [#uses=2]
%tmp = alloca i64 ; <i64*> [#uses=2]
%coerce.dive = getelementptr %struct.DeclGroup* %D, i32 0, i32 0 ; <i32*> [#uses=1]
store i64 %0, i64* %tmp
%1 = bitcast i64* %tmp to i32* ; <i32*> [#uses=1]
%2 = load i32* %1, align 1 ; <i32> [#uses=1]
store i32 %2, i32* %coerce.dive
%tmp1 = getelementptr inbounds %struct.DeclGroup* %D, i32 0, i32 0 ; <i32*> [#uses=1]
%tmp2 = load i32* %tmp1 ; <i32> [#uses=1]
ret i32 %tmp2
}
... which is quite a bit less terrifying.
llvm-svn: 106975
2010-06-27 14:26:04 +08:00
|
|
|
}
|
2010-10-19 14:39:39 +08:00
|
|
|
|
2011-07-18 12:24:23 +08:00
|
|
|
llvm::Type *DestIntTy = Ty;
|
If coercing something from int or pointer type to int or pointer type
(potentially after unwrapping it from a struct) do it without going through
memory. We now compile:
struct DeclGroup {
unsigned NumDecls;
};
int foo(DeclGroup D) {
return D.NumDecls;
}
into:
%struct.DeclGroup = type { i32 }
define i32 @_Z3foo9DeclGroup(i64) nounwind ssp noredzone {
entry:
%D = alloca %struct.DeclGroup, align 4 ; <%struct.DeclGroup*> [#uses=2]
%coerce.dive = getelementptr %struct.DeclGroup* %D, i32 0, i32 0 ; <i32*> [#uses=1]
%coerce.val.ii = trunc i64 %0 to i32 ; <i32> [#uses=1]
store i32 %coerce.val.ii, i32* %coerce.dive
%tmp = getelementptr inbounds %struct.DeclGroup* %D, i32 0, i32 0 ; <i32*> [#uses=1]
%tmp1 = load i32* %tmp ; <i32> [#uses=1]
ret i32 %tmp1
}
instead of:
%struct.DeclGroup = type { i32 }
define i32 @_Z3foo9DeclGroup(i64) nounwind ssp noredzone {
entry:
%D = alloca %struct.DeclGroup, align 4 ; <%struct.DeclGroup*> [#uses=2]
%tmp = alloca i64 ; <i64*> [#uses=2]
%coerce.dive = getelementptr %struct.DeclGroup* %D, i32 0, i32 0 ; <i32*> [#uses=1]
store i64 %0, i64* %tmp
%1 = bitcast i64* %tmp to i32* ; <i32*> [#uses=1]
%2 = load i32* %1, align 1 ; <i32> [#uses=1]
store i32 %2, i32* %coerce.dive
%tmp1 = getelementptr inbounds %struct.DeclGroup* %D, i32 0, i32 0 ; <i32*> [#uses=1]
%tmp2 = load i32* %tmp1 ; <i32> [#uses=1]
ret i32 %tmp2
}
... which is quite a bit less terrifying.
llvm-svn: 106975
2010-06-27 14:26:04 +08:00
|
|
|
if (isa<llvm::PointerType>(DestIntTy))
|
2010-06-27 15:15:29 +08:00
|
|
|
DestIntTy = CGF.IntPtrTy;
|
2010-10-19 14:39:39 +08:00
|
|
|
|
2013-06-05 11:00:13 +08:00
|
|
|
if (Val->getType() != DestIntTy) {
|
|
|
|
const llvm::DataLayout &DL = CGF.CGM.getDataLayout();
|
|
|
|
if (DL.isBigEndian()) {
|
|
|
|
// Preserve the high bits on big-endian targets.
|
|
|
|
// That is what memory coercion does.
|
2014-05-08 01:41:15 +08:00
|
|
|
uint64_t SrcSize = DL.getTypeSizeInBits(Val->getType());
|
|
|
|
uint64_t DstSize = DL.getTypeSizeInBits(DestIntTy);
|
|
|
|
|
2013-06-05 11:00:13 +08:00
|
|
|
if (SrcSize > DstSize) {
|
|
|
|
Val = CGF.Builder.CreateLShr(Val, SrcSize - DstSize, "coerce.highbits");
|
|
|
|
Val = CGF.Builder.CreateTrunc(Val, DestIntTy, "coerce.val.ii");
|
|
|
|
} else {
|
|
|
|
Val = CGF.Builder.CreateZExt(Val, DestIntTy, "coerce.val.ii");
|
|
|
|
Val = CGF.Builder.CreateShl(Val, DstSize - SrcSize, "coerce.highbits");
|
|
|
|
}
|
|
|
|
} else {
|
|
|
|
// Little-endian targets preserve the low bits. No shifts required.
|
|
|
|
Val = CGF.Builder.CreateIntCast(Val, DestIntTy, false, "coerce.val.ii");
|
|
|
|
}
|
|
|
|
}
|
2010-10-19 14:39:39 +08:00
|
|
|
|
If coercing something from int or pointer type to int or pointer type
(potentially after unwrapping it from a struct) do it without going through
memory. We now compile:
struct DeclGroup {
unsigned NumDecls;
};
int foo(DeclGroup D) {
return D.NumDecls;
}
into:
%struct.DeclGroup = type { i32 }
define i32 @_Z3foo9DeclGroup(i64) nounwind ssp noredzone {
entry:
%D = alloca %struct.DeclGroup, align 4 ; <%struct.DeclGroup*> [#uses=2]
%coerce.dive = getelementptr %struct.DeclGroup* %D, i32 0, i32 0 ; <i32*> [#uses=1]
%coerce.val.ii = trunc i64 %0 to i32 ; <i32> [#uses=1]
store i32 %coerce.val.ii, i32* %coerce.dive
%tmp = getelementptr inbounds %struct.DeclGroup* %D, i32 0, i32 0 ; <i32*> [#uses=1]
%tmp1 = load i32* %tmp ; <i32> [#uses=1]
ret i32 %tmp1
}
instead of:
%struct.DeclGroup = type { i32 }
define i32 @_Z3foo9DeclGroup(i64) nounwind ssp noredzone {
entry:
%D = alloca %struct.DeclGroup, align 4 ; <%struct.DeclGroup*> [#uses=2]
%tmp = alloca i64 ; <i64*> [#uses=2]
%coerce.dive = getelementptr %struct.DeclGroup* %D, i32 0, i32 0 ; <i32*> [#uses=1]
store i64 %0, i64* %tmp
%1 = bitcast i64* %tmp to i32* ; <i32*> [#uses=1]
%2 = load i32* %1, align 1 ; <i32> [#uses=1]
store i32 %2, i32* %coerce.dive
%tmp1 = getelementptr inbounds %struct.DeclGroup* %D, i32 0, i32 0 ; <i32*> [#uses=1]
%tmp2 = load i32* %tmp1 ; <i32> [#uses=1]
ret i32 %tmp2
}
... which is quite a bit less terrifying.
llvm-svn: 106975
2010-06-27 14:26:04 +08:00
|
|
|
if (isa<llvm::PointerType>(Ty))
|
|
|
|
Val = CGF.Builder.CreateIntToPtr(Val, Ty, "coerce.val.ip");
|
|
|
|
return Val;
|
|
|
|
}
|
|
|
|
|
improve CreateCoercedLoad a bit to generate slightly less awful
IR when handling X86-64 by-value struct stuff. For example, we
use to compile this:
struct DeclGroup {
unsigned NumDecls;
};
int foo(DeclGroup D);
void bar(DeclGroup *D) {
foo(*D);
}
into:
define void @_Z3barP9DeclGroup(%struct.DeclGroup* %D) ssp nounwind {
entry:
%D.addr = alloca %struct.DeclGroup*, align 8 ; <%struct.DeclGroup**> [#uses=2]
%agg.tmp = alloca %struct.DeclGroup, align 4 ; <%struct.DeclGroup*> [#uses=2]
%tmp3 = alloca i64 ; <i64*> [#uses=2]
store %struct.DeclGroup* %D, %struct.DeclGroup** %D.addr
%tmp = load %struct.DeclGroup** %D.addr ; <%struct.DeclGroup*> [#uses=1]
%tmp1 = bitcast %struct.DeclGroup* %agg.tmp to i8* ; <i8*> [#uses=1]
%tmp2 = bitcast %struct.DeclGroup* %tmp to i8* ; <i8*> [#uses=1]
call void @llvm.memcpy.p0i8.p0i8.i64(i8* %tmp1, i8* %tmp2, i64 4, i32 4, i1 false)
%0 = bitcast i64* %tmp3 to %struct.DeclGroup* ; <%struct.DeclGroup*> [#uses=1]
%1 = load %struct.DeclGroup* %agg.tmp ; <%struct.DeclGroup> [#uses=1]
store %struct.DeclGroup %1, %struct.DeclGroup* %0, align 1
%2 = load i64* %tmp3 ; <i64> [#uses=1]
call void @_Z3foo9DeclGroup(i64 %2)
ret void
}
which would cause fastisel to bail out due to the first class aggregate load %1. With
this patch we now compile it into the (still awful):
define void @_Z3barP9DeclGroup(%struct.DeclGroup* %D) nounwind ssp noredzone {
entry:
%D.addr = alloca %struct.DeclGroup*, align 8 ; <%struct.DeclGroup**> [#uses=2]
%agg.tmp = alloca %struct.DeclGroup, align 4 ; <%struct.DeclGroup*> [#uses=2]
%tmp3 = alloca i64 ; <i64*> [#uses=2]
store %struct.DeclGroup* %D, %struct.DeclGroup** %D.addr
%tmp = load %struct.DeclGroup** %D.addr ; <%struct.DeclGroup*> [#uses=1]
%tmp1 = bitcast %struct.DeclGroup* %agg.tmp to i8* ; <i8*> [#uses=1]
%tmp2 = bitcast %struct.DeclGroup* %tmp to i8* ; <i8*> [#uses=1]
call void @llvm.memcpy.p0i8.p0i8.i64(i8* %tmp1, i8* %tmp2, i64 4, i32 4, i1 false)
%coerce.dive = getelementptr %struct.DeclGroup* %agg.tmp, i32 0, i32 0 ; <i32*> [#uses=1]
%0 = bitcast i64* %tmp3 to i32* ; <i32*> [#uses=1]
%1 = load i32* %coerce.dive ; <i32> [#uses=1]
store i32 %1, i32* %0, align 1
%2 = load i64* %tmp3 ; <i64> [#uses=1]
%call = call i32 @_Z3foo9DeclGroup(i64 %2) noredzone ; <i32> [#uses=0]
ret void
}
which doesn't bail out. On CGStmt.ll, this reduces fastisel bail outs from 958 to 935,
and is the precursor of better things to come.
llvm-svn: 106973
2010-06-27 13:56:15 +08:00
|
|
|
|
|
|
|
|
2009-02-03 03:06:38 +08:00
|
|
|
/// CreateCoercedLoad - Create a load from \arg SrcPtr interpreted as
|
|
|
|
/// a pointer to an object of type \arg Ty.
|
|
|
|
///
|
|
|
|
/// This safely handles the case when the src type is smaller than the
|
|
|
|
/// destination type; in this situation the values of bits which not
|
|
|
|
/// present in the src are undefined.
|
|
|
|
static llvm::Value *CreateCoercedLoad(llvm::Value *SrcPtr,
|
2011-07-18 12:24:23 +08:00
|
|
|
llvm::Type *Ty,
|
2009-02-03 03:06:38 +08:00
|
|
|
CodeGenFunction &CGF) {
|
2011-07-18 12:24:23 +08:00
|
|
|
llvm::Type *SrcTy =
|
2009-02-03 03:06:38 +08:00
|
|
|
cast<llvm::PointerType>(SrcPtr->getType())->getElementType();
|
2010-10-19 14:39:39 +08:00
|
|
|
|
2010-06-29 06:51:39 +08:00
|
|
|
// If SrcTy and Ty are the same, just do a load.
|
|
|
|
if (SrcTy == Ty)
|
|
|
|
return CGF.Builder.CreateLoad(SrcPtr);
|
2010-10-19 14:39:39 +08:00
|
|
|
|
2012-10-09 00:25:52 +08:00
|
|
|
uint64_t DstSize = CGF.CGM.getDataLayout().getTypeAllocSize(Ty);
|
2010-10-19 14:39:39 +08:00
|
|
|
|
2011-07-18 12:24:23 +08:00
|
|
|
if (llvm::StructType *SrcSTy = dyn_cast<llvm::StructType>(SrcTy)) {
|
Same patch as the previous on the store side. Before we compiled this:
struct DeclGroup {
unsigned NumDecls;
};
int foo(DeclGroup D) {
return D.NumDecls;
}
to:
%struct.DeclGroup = type { i32 }
define i32 @_Z3foo9DeclGroup(i64) nounwind ssp noredzone {
entry:
%D = alloca %struct.DeclGroup, align 4 ; <%struct.DeclGroup*> [#uses=2]
%tmp = alloca i64 ; <i64*> [#uses=2]
store i64 %0, i64* %tmp
%1 = bitcast i64* %tmp to %struct.DeclGroup* ; <%struct.DeclGroup*> [#uses=1]
%2 = load %struct.DeclGroup* %1, align 1 ; <%struct.DeclGroup> [#uses=1]
store %struct.DeclGroup %2, %struct.DeclGroup* %D
%tmp1 = getelementptr inbounds %struct.DeclGroup* %D, i32 0, i32 0 ; <i32*> [#uses=1]
%tmp2 = load i32* %tmp1 ; <i32> [#uses=1]
ret i32 %tmp2
}
which caused fast isel bailouts due to the FCA load/store of %2. Now
we generate this just blissful code:
%struct.DeclGroup = type { i32 }
define i32 @_Z3foo9DeclGroup(i64) nounwind ssp noredzone {
entry:
%D = alloca %struct.DeclGroup, align 4 ; <%struct.DeclGroup*> [#uses=2]
%tmp = alloca i64 ; <i64*> [#uses=2]
%coerce.dive = getelementptr %struct.DeclGroup* %D, i32 0, i32 0 ; <i32*> [#uses=1]
store i64 %0, i64* %tmp
%1 = bitcast i64* %tmp to i32* ; <i32*> [#uses=1]
%2 = load i32* %1, align 1 ; <i32> [#uses=1]
store i32 %2, i32* %coerce.dive
%tmp1 = getelementptr inbounds %struct.DeclGroup* %D, i32 0, i32 0 ; <i32*> [#uses=1]
%tmp2 = load i32* %tmp1 ; <i32> [#uses=1]
ret i32 %tmp2
}
This avoids fastisel bailing out and is groundwork for future patch.
This reduces bailouts on CGStmt.ll to 911 from 935.
llvm-svn: 106974
2010-06-27 14:04:18 +08:00
|
|
|
SrcPtr = EnterStructPointerForCoercedAccess(SrcPtr, SrcSTy, DstSize, CGF);
|
improve CreateCoercedLoad a bit to generate slightly less awful
IR when handling X86-64 by-value struct stuff. For example, we
use to compile this:
struct DeclGroup {
unsigned NumDecls;
};
int foo(DeclGroup D);
void bar(DeclGroup *D) {
foo(*D);
}
into:
define void @_Z3barP9DeclGroup(%struct.DeclGroup* %D) ssp nounwind {
entry:
%D.addr = alloca %struct.DeclGroup*, align 8 ; <%struct.DeclGroup**> [#uses=2]
%agg.tmp = alloca %struct.DeclGroup, align 4 ; <%struct.DeclGroup*> [#uses=2]
%tmp3 = alloca i64 ; <i64*> [#uses=2]
store %struct.DeclGroup* %D, %struct.DeclGroup** %D.addr
%tmp = load %struct.DeclGroup** %D.addr ; <%struct.DeclGroup*> [#uses=1]
%tmp1 = bitcast %struct.DeclGroup* %agg.tmp to i8* ; <i8*> [#uses=1]
%tmp2 = bitcast %struct.DeclGroup* %tmp to i8* ; <i8*> [#uses=1]
call void @llvm.memcpy.p0i8.p0i8.i64(i8* %tmp1, i8* %tmp2, i64 4, i32 4, i1 false)
%0 = bitcast i64* %tmp3 to %struct.DeclGroup* ; <%struct.DeclGroup*> [#uses=1]
%1 = load %struct.DeclGroup* %agg.tmp ; <%struct.DeclGroup> [#uses=1]
store %struct.DeclGroup %1, %struct.DeclGroup* %0, align 1
%2 = load i64* %tmp3 ; <i64> [#uses=1]
call void @_Z3foo9DeclGroup(i64 %2)
ret void
}
which would cause fastisel to bail out due to the first class aggregate load %1. With
this patch we now compile it into the (still awful):
define void @_Z3barP9DeclGroup(%struct.DeclGroup* %D) nounwind ssp noredzone {
entry:
%D.addr = alloca %struct.DeclGroup*, align 8 ; <%struct.DeclGroup**> [#uses=2]
%agg.tmp = alloca %struct.DeclGroup, align 4 ; <%struct.DeclGroup*> [#uses=2]
%tmp3 = alloca i64 ; <i64*> [#uses=2]
store %struct.DeclGroup* %D, %struct.DeclGroup** %D.addr
%tmp = load %struct.DeclGroup** %D.addr ; <%struct.DeclGroup*> [#uses=1]
%tmp1 = bitcast %struct.DeclGroup* %agg.tmp to i8* ; <i8*> [#uses=1]
%tmp2 = bitcast %struct.DeclGroup* %tmp to i8* ; <i8*> [#uses=1]
call void @llvm.memcpy.p0i8.p0i8.i64(i8* %tmp1, i8* %tmp2, i64 4, i32 4, i1 false)
%coerce.dive = getelementptr %struct.DeclGroup* %agg.tmp, i32 0, i32 0 ; <i32*> [#uses=1]
%0 = bitcast i64* %tmp3 to i32* ; <i32*> [#uses=1]
%1 = load i32* %coerce.dive ; <i32> [#uses=1]
store i32 %1, i32* %0, align 1
%2 = load i64* %tmp3 ; <i64> [#uses=1]
%call = call i32 @_Z3foo9DeclGroup(i64 %2) noredzone ; <i32> [#uses=0]
ret void
}
which doesn't bail out. On CGStmt.ll, this reduces fastisel bail outs from 958 to 935,
and is the precursor of better things to come.
llvm-svn: 106973
2010-06-27 13:56:15 +08:00
|
|
|
SrcTy = cast<llvm::PointerType>(SrcPtr->getType())->getElementType();
|
|
|
|
}
|
2010-10-19 14:39:39 +08:00
|
|
|
|
2012-10-09 00:25:52 +08:00
|
|
|
uint64_t SrcSize = CGF.CGM.getDataLayout().getTypeAllocSize(SrcTy);
|
2009-02-03 03:06:38 +08:00
|
|
|
|
If coercing something from int or pointer type to int or pointer type
(potentially after unwrapping it from a struct) do it without going through
memory. We now compile:
struct DeclGroup {
unsigned NumDecls;
};
int foo(DeclGroup D) {
return D.NumDecls;
}
into:
%struct.DeclGroup = type { i32 }
define i32 @_Z3foo9DeclGroup(i64) nounwind ssp noredzone {
entry:
%D = alloca %struct.DeclGroup, align 4 ; <%struct.DeclGroup*> [#uses=2]
%coerce.dive = getelementptr %struct.DeclGroup* %D, i32 0, i32 0 ; <i32*> [#uses=1]
%coerce.val.ii = trunc i64 %0 to i32 ; <i32> [#uses=1]
store i32 %coerce.val.ii, i32* %coerce.dive
%tmp = getelementptr inbounds %struct.DeclGroup* %D, i32 0, i32 0 ; <i32*> [#uses=1]
%tmp1 = load i32* %tmp ; <i32> [#uses=1]
ret i32 %tmp1
}
instead of:
%struct.DeclGroup = type { i32 }
define i32 @_Z3foo9DeclGroup(i64) nounwind ssp noredzone {
entry:
%D = alloca %struct.DeclGroup, align 4 ; <%struct.DeclGroup*> [#uses=2]
%tmp = alloca i64 ; <i64*> [#uses=2]
%coerce.dive = getelementptr %struct.DeclGroup* %D, i32 0, i32 0 ; <i32*> [#uses=1]
store i64 %0, i64* %tmp
%1 = bitcast i64* %tmp to i32* ; <i32*> [#uses=1]
%2 = load i32* %1, align 1 ; <i32> [#uses=1]
store i32 %2, i32* %coerce.dive
%tmp1 = getelementptr inbounds %struct.DeclGroup* %D, i32 0, i32 0 ; <i32*> [#uses=1]
%tmp2 = load i32* %tmp1 ; <i32> [#uses=1]
ret i32 %tmp2
}
... which is quite a bit less terrifying.
llvm-svn: 106975
2010-06-27 14:26:04 +08:00
|
|
|
// If the source and destination are integer or pointer types, just do an
|
|
|
|
// extension or truncation to the desired type.
|
|
|
|
if ((isa<llvm::IntegerType>(Ty) || isa<llvm::PointerType>(Ty)) &&
|
|
|
|
(isa<llvm::IntegerType>(SrcTy) || isa<llvm::PointerType>(SrcTy))) {
|
|
|
|
llvm::LoadInst *Load = CGF.Builder.CreateLoad(SrcPtr);
|
|
|
|
return CoerceIntOrPtrToIntOrPtr(Load, Ty, CGF);
|
|
|
|
}
|
2010-10-19 14:39:39 +08:00
|
|
|
|
2009-02-03 13:59:18 +08:00
|
|
|
// If load is legal, just bitcast the src pointer.
|
2009-05-14 02:54:26 +08:00
|
|
|
if (SrcSize >= DstSize) {
|
2009-05-16 15:57:57 +08:00
|
|
|
// Generally SrcSize is never greater than DstSize, since this means we are
|
|
|
|
// losing bits. However, this can happen in cases where the structure has
|
|
|
|
// additional padding, for example due to a user specified alignment.
|
2009-05-14 02:54:26 +08:00
|
|
|
//
|
2009-05-16 15:57:57 +08:00
|
|
|
// FIXME: Assert that we aren't truncating non-padding bits when have access
|
|
|
|
// to that information.
|
2009-02-03 03:06:38 +08:00
|
|
|
llvm::Value *Casted =
|
|
|
|
CGF.Builder.CreateBitCast(SrcPtr, llvm::PointerType::getUnqual(Ty));
|
2009-02-07 10:46:03 +08:00
|
|
|
llvm::LoadInst *Load = CGF.Builder.CreateLoad(Casted);
|
|
|
|
// FIXME: Use better alignment / avoid requiring aligned load.
|
|
|
|
Load->setAlignment(1);
|
|
|
|
return Load;
|
2009-02-03 03:06:38 +08:00
|
|
|
}
|
2010-10-19 14:39:39 +08:00
|
|
|
|
Change IR generation for return (in the simple case) to avoid doing silly
load/store nonsense in the epilog. For example, for:
int foo(int X) {
int A[100];
return A[X];
}
we used to generate:
%arrayidx = getelementptr inbounds [100 x i32]* %A, i32 0, i64 %idxprom ; <i32*> [#uses=1]
%tmp1 = load i32* %arrayidx ; <i32> [#uses=1]
store i32 %tmp1, i32* %retval
%0 = load i32* %retval ; <i32> [#uses=1]
ret i32 %0
}
which codegen'd to this code:
_foo: ## @foo
## BB#0: ## %entry
subq $408, %rsp ## imm = 0x198
movl %edi, 400(%rsp)
movl 400(%rsp), %edi
movslq %edi, %rax
movl (%rsp,%rax,4), %edi
movl %edi, 404(%rsp)
movl 404(%rsp), %eax
addq $408, %rsp ## imm = 0x198
ret
Now we generate:
%arrayidx = getelementptr inbounds [100 x i32]* %A, i32 0, i64 %idxprom ; <i32*> [#uses=1]
%tmp1 = load i32* %arrayidx ; <i32> [#uses=1]
ret i32 %tmp1
}
and:
_foo: ## @foo
## BB#0: ## %entry
subq $408, %rsp ## imm = 0x198
movl %edi, 404(%rsp)
movl 404(%rsp), %edi
movslq %edi, %rax
movl (%rsp,%rax,4), %eax
addq $408, %rsp ## imm = 0x198
ret
This actually does matter, cutting out 2000 lines of IR from CGStmt.ll
for example.
Another interesting effect is that altivec.h functions which are dead
now get dce'd by the inliner. Hence all the changes to
builtins-ppc-altivec.c to ensure the calls aren't dead.
llvm-svn: 106970
2010-06-27 09:06:27 +08:00
|
|
|
// Otherwise do coercion through memory. This is stupid, but
|
|
|
|
// simple.
|
|
|
|
llvm::Value *Tmp = CGF.CreateTempAlloca(Ty);
|
2012-11-29 06:08:52 +08:00
|
|
|
llvm::Type *I8PtrTy = CGF.Builder.getInt8PtrTy();
|
|
|
|
llvm::Value *Casted = CGF.Builder.CreateBitCast(Tmp, I8PtrTy);
|
|
|
|
llvm::Value *SrcCasted = CGF.Builder.CreateBitCast(SrcPtr, I8PtrTy);
|
2012-11-29 06:29:41 +08:00
|
|
|
// FIXME: Use better alignment.
|
2012-11-29 06:08:52 +08:00
|
|
|
CGF.Builder.CreateMemCpy(Casted, SrcCasted,
|
|
|
|
llvm::ConstantInt::get(CGF.IntPtrTy, SrcSize),
|
|
|
|
1, false);
|
Change IR generation for return (in the simple case) to avoid doing silly
load/store nonsense in the epilog. For example, for:
int foo(int X) {
int A[100];
return A[X];
}
we used to generate:
%arrayidx = getelementptr inbounds [100 x i32]* %A, i32 0, i64 %idxprom ; <i32*> [#uses=1]
%tmp1 = load i32* %arrayidx ; <i32> [#uses=1]
store i32 %tmp1, i32* %retval
%0 = load i32* %retval ; <i32> [#uses=1]
ret i32 %0
}
which codegen'd to this code:
_foo: ## @foo
## BB#0: ## %entry
subq $408, %rsp ## imm = 0x198
movl %edi, 400(%rsp)
movl 400(%rsp), %edi
movslq %edi, %rax
movl (%rsp,%rax,4), %edi
movl %edi, 404(%rsp)
movl 404(%rsp), %eax
addq $408, %rsp ## imm = 0x198
ret
Now we generate:
%arrayidx = getelementptr inbounds [100 x i32]* %A, i32 0, i64 %idxprom ; <i32*> [#uses=1]
%tmp1 = load i32* %arrayidx ; <i32> [#uses=1]
ret i32 %tmp1
}
and:
_foo: ## @foo
## BB#0: ## %entry
subq $408, %rsp ## imm = 0x198
movl %edi, 404(%rsp)
movl 404(%rsp), %edi
movslq %edi, %rax
movl (%rsp,%rax,4), %eax
addq $408, %rsp ## imm = 0x198
ret
This actually does matter, cutting out 2000 lines of IR from CGStmt.ll
for example.
Another interesting effect is that altivec.h functions which are dead
now get dce'd by the inliner. Hence all the changes to
builtins-ppc-altivec.c to ensure the calls aren't dead.
llvm-svn: 106970
2010-06-27 09:06:27 +08:00
|
|
|
return CGF.Builder.CreateLoad(Tmp);
|
2009-02-03 03:06:38 +08:00
|
|
|
}
|
|
|
|
|
2011-05-18 05:08:01 +08:00
|
|
|
// Function to store a first-class aggregate into memory. We prefer to
|
|
|
|
// store the elements rather than the aggregate to be more friendly to
|
|
|
|
// fast-isel.
|
|
|
|
// FIXME: Do we need to recurse here?
|
|
|
|
static void BuildAggStore(CodeGenFunction &CGF, llvm::Value *Val,
|
|
|
|
llvm::Value *DestPtr, bool DestIsVolatile,
|
|
|
|
bool LowAlignment) {
|
|
|
|
// Prefer scalar stores to first-class aggregate stores.
|
2011-07-18 12:24:23 +08:00
|
|
|
if (llvm::StructType *STy =
|
2011-05-18 05:08:01 +08:00
|
|
|
dyn_cast<llvm::StructType>(Val->getType())) {
|
|
|
|
for (unsigned i = 0, e = STy->getNumElements(); i != e; ++i) {
|
|
|
|
llvm::Value *EltPtr = CGF.Builder.CreateConstGEP2_32(DestPtr, 0, i);
|
|
|
|
llvm::Value *Elt = CGF.Builder.CreateExtractValue(Val, i);
|
|
|
|
llvm::StoreInst *SI = CGF.Builder.CreateStore(Elt, EltPtr,
|
|
|
|
DestIsVolatile);
|
|
|
|
if (LowAlignment)
|
|
|
|
SI->setAlignment(1);
|
|
|
|
}
|
|
|
|
} else {
|
2012-03-17 05:45:12 +08:00
|
|
|
llvm::StoreInst *SI = CGF.Builder.CreateStore(Val, DestPtr, DestIsVolatile);
|
|
|
|
if (LowAlignment)
|
|
|
|
SI->setAlignment(1);
|
2011-05-18 05:08:01 +08:00
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2009-02-03 03:06:38 +08:00
|
|
|
/// CreateCoercedStore - Create a store to \arg DstPtr from \arg Src,
|
|
|
|
/// where the source and destination may have different types.
|
|
|
|
///
|
|
|
|
/// This safely handles the case when the src type is larger than the
|
|
|
|
/// destination type; the upper bits of the src will be lost.
|
|
|
|
static void CreateCoercedStore(llvm::Value *Src,
|
|
|
|
llvm::Value *DstPtr,
|
2009-12-25 04:40:36 +08:00
|
|
|
bool DstIsVolatile,
|
2009-02-03 03:06:38 +08:00
|
|
|
CodeGenFunction &CGF) {
|
2011-07-18 12:24:23 +08:00
|
|
|
llvm::Type *SrcTy = Src->getType();
|
|
|
|
llvm::Type *DstTy =
|
2009-02-03 03:06:38 +08:00
|
|
|
cast<llvm::PointerType>(DstPtr->getType())->getElementType();
|
2010-06-29 06:51:39 +08:00
|
|
|
if (SrcTy == DstTy) {
|
|
|
|
CGF.Builder.CreateStore(Src, DstPtr, DstIsVolatile);
|
|
|
|
return;
|
|
|
|
}
|
2010-10-19 14:39:39 +08:00
|
|
|
|
2012-10-09 00:25:52 +08:00
|
|
|
uint64_t SrcSize = CGF.CGM.getDataLayout().getTypeAllocSize(SrcTy);
|
2010-10-19 14:39:39 +08:00
|
|
|
|
2011-07-18 12:24:23 +08:00
|
|
|
if (llvm::StructType *DstSTy = dyn_cast<llvm::StructType>(DstTy)) {
|
Same patch as the previous on the store side. Before we compiled this:
struct DeclGroup {
unsigned NumDecls;
};
int foo(DeclGroup D) {
return D.NumDecls;
}
to:
%struct.DeclGroup = type { i32 }
define i32 @_Z3foo9DeclGroup(i64) nounwind ssp noredzone {
entry:
%D = alloca %struct.DeclGroup, align 4 ; <%struct.DeclGroup*> [#uses=2]
%tmp = alloca i64 ; <i64*> [#uses=2]
store i64 %0, i64* %tmp
%1 = bitcast i64* %tmp to %struct.DeclGroup* ; <%struct.DeclGroup*> [#uses=1]
%2 = load %struct.DeclGroup* %1, align 1 ; <%struct.DeclGroup> [#uses=1]
store %struct.DeclGroup %2, %struct.DeclGroup* %D
%tmp1 = getelementptr inbounds %struct.DeclGroup* %D, i32 0, i32 0 ; <i32*> [#uses=1]
%tmp2 = load i32* %tmp1 ; <i32> [#uses=1]
ret i32 %tmp2
}
which caused fast isel bailouts due to the FCA load/store of %2. Now
we generate this just blissful code:
%struct.DeclGroup = type { i32 }
define i32 @_Z3foo9DeclGroup(i64) nounwind ssp noredzone {
entry:
%D = alloca %struct.DeclGroup, align 4 ; <%struct.DeclGroup*> [#uses=2]
%tmp = alloca i64 ; <i64*> [#uses=2]
%coerce.dive = getelementptr %struct.DeclGroup* %D, i32 0, i32 0 ; <i32*> [#uses=1]
store i64 %0, i64* %tmp
%1 = bitcast i64* %tmp to i32* ; <i32*> [#uses=1]
%2 = load i32* %1, align 1 ; <i32> [#uses=1]
store i32 %2, i32* %coerce.dive
%tmp1 = getelementptr inbounds %struct.DeclGroup* %D, i32 0, i32 0 ; <i32*> [#uses=1]
%tmp2 = load i32* %tmp1 ; <i32> [#uses=1]
ret i32 %tmp2
}
This avoids fastisel bailing out and is groundwork for future patch.
This reduces bailouts on CGStmt.ll to 911 from 935.
llvm-svn: 106974
2010-06-27 14:04:18 +08:00
|
|
|
DstPtr = EnterStructPointerForCoercedAccess(DstPtr, DstSTy, SrcSize, CGF);
|
|
|
|
DstTy = cast<llvm::PointerType>(DstPtr->getType())->getElementType();
|
|
|
|
}
|
2010-10-19 14:39:39 +08:00
|
|
|
|
If coercing something from int or pointer type to int or pointer type
(potentially after unwrapping it from a struct) do it without going through
memory. We now compile:
struct DeclGroup {
unsigned NumDecls;
};
int foo(DeclGroup D) {
return D.NumDecls;
}
into:
%struct.DeclGroup = type { i32 }
define i32 @_Z3foo9DeclGroup(i64) nounwind ssp noredzone {
entry:
%D = alloca %struct.DeclGroup, align 4 ; <%struct.DeclGroup*> [#uses=2]
%coerce.dive = getelementptr %struct.DeclGroup* %D, i32 0, i32 0 ; <i32*> [#uses=1]
%coerce.val.ii = trunc i64 %0 to i32 ; <i32> [#uses=1]
store i32 %coerce.val.ii, i32* %coerce.dive
%tmp = getelementptr inbounds %struct.DeclGroup* %D, i32 0, i32 0 ; <i32*> [#uses=1]
%tmp1 = load i32* %tmp ; <i32> [#uses=1]
ret i32 %tmp1
}
instead of:
%struct.DeclGroup = type { i32 }
define i32 @_Z3foo9DeclGroup(i64) nounwind ssp noredzone {
entry:
%D = alloca %struct.DeclGroup, align 4 ; <%struct.DeclGroup*> [#uses=2]
%tmp = alloca i64 ; <i64*> [#uses=2]
%coerce.dive = getelementptr %struct.DeclGroup* %D, i32 0, i32 0 ; <i32*> [#uses=1]
store i64 %0, i64* %tmp
%1 = bitcast i64* %tmp to i32* ; <i32*> [#uses=1]
%2 = load i32* %1, align 1 ; <i32> [#uses=1]
store i32 %2, i32* %coerce.dive
%tmp1 = getelementptr inbounds %struct.DeclGroup* %D, i32 0, i32 0 ; <i32*> [#uses=1]
%tmp2 = load i32* %tmp1 ; <i32> [#uses=1]
ret i32 %tmp2
}
... which is quite a bit less terrifying.
llvm-svn: 106975
2010-06-27 14:26:04 +08:00
|
|
|
// If the source and destination are integer or pointer types, just do an
|
|
|
|
// extension or truncation to the desired type.
|
|
|
|
if ((isa<llvm::IntegerType>(SrcTy) || isa<llvm::PointerType>(SrcTy)) &&
|
|
|
|
(isa<llvm::IntegerType>(DstTy) || isa<llvm::PointerType>(DstTy))) {
|
|
|
|
Src = CoerceIntOrPtrToIntOrPtr(Src, DstTy, CGF);
|
|
|
|
CGF.Builder.CreateStore(Src, DstPtr, DstIsVolatile);
|
|
|
|
return;
|
|
|
|
}
|
2010-10-19 14:39:39 +08:00
|
|
|
|
2012-10-09 00:25:52 +08:00
|
|
|
uint64_t DstSize = CGF.CGM.getDataLayout().getTypeAllocSize(DstTy);
|
2009-02-03 03:06:38 +08:00
|
|
|
|
2009-02-03 13:31:23 +08:00
|
|
|
// If store is legal, just bitcast the src pointer.
|
2009-06-05 15:58:54 +08:00
|
|
|
if (SrcSize <= DstSize) {
|
2009-02-03 03:06:38 +08:00
|
|
|
llvm::Value *Casted =
|
|
|
|
CGF.Builder.CreateBitCast(DstPtr, llvm::PointerType::getUnqual(SrcTy));
|
2009-02-07 10:46:03 +08:00
|
|
|
// FIXME: Use better alignment / avoid requiring aligned store.
|
2011-05-18 05:08:01 +08:00
|
|
|
BuildAggStore(CGF, Src, Casted, DstIsVolatile, true);
|
2009-02-03 03:06:38 +08:00
|
|
|
} else {
|
|
|
|
// Otherwise do coercion through memory. This is stupid, but
|
|
|
|
// simple.
|
2009-06-05 15:58:54 +08:00
|
|
|
|
|
|
|
// Generally SrcSize is never greater than DstSize, since this means we are
|
|
|
|
// losing bits. However, this can happen in cases where the structure has
|
|
|
|
// additional padding, for example due to a user specified alignment.
|
|
|
|
//
|
|
|
|
// FIXME: Assert that we aren't truncating non-padding bits when have access
|
|
|
|
// to that information.
|
2009-02-03 03:06:38 +08:00
|
|
|
llvm::Value *Tmp = CGF.CreateTempAlloca(SrcTy);
|
|
|
|
CGF.Builder.CreateStore(Src, Tmp);
|
2012-11-29 06:08:52 +08:00
|
|
|
llvm::Type *I8PtrTy = CGF.Builder.getInt8PtrTy();
|
|
|
|
llvm::Value *Casted = CGF.Builder.CreateBitCast(Tmp, I8PtrTy);
|
|
|
|
llvm::Value *DstCasted = CGF.Builder.CreateBitCast(DstPtr, I8PtrTy);
|
2012-11-29 06:29:41 +08:00
|
|
|
// FIXME: Use better alignment.
|
2012-11-29 06:08:52 +08:00
|
|
|
CGF.Builder.CreateMemCpy(DstCasted, Casted,
|
|
|
|
llvm::ConstantInt::get(CGF.IntPtrTy, DstSize),
|
|
|
|
1, false);
|
2009-02-03 03:06:38 +08:00
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2014-09-30 06:08:00 +08:00
|
|
|
namespace {
|
|
|
|
|
|
|
|
/// Encapsulates information about the way function arguments from
|
|
|
|
/// CGFunctionInfo should be passed to actual LLVM IR function.
|
|
|
|
class ClangToLLVMArgMapping {
|
|
|
|
static const unsigned InvalidIndex = ~0U;
|
|
|
|
unsigned InallocaArgNo;
|
|
|
|
unsigned SRetArgNo;
|
|
|
|
unsigned TotalIRArgs;
|
|
|
|
|
|
|
|
/// Arguments of LLVM IR function corresponding to single Clang argument.
|
|
|
|
struct IRArgs {
|
|
|
|
unsigned PaddingArgIndex;
|
|
|
|
// Argument is expanded to IR arguments at positions
|
|
|
|
// [FirstArgIndex, FirstArgIndex + NumberOfArgs).
|
|
|
|
unsigned FirstArgIndex;
|
|
|
|
unsigned NumberOfArgs;
|
|
|
|
|
|
|
|
IRArgs()
|
|
|
|
: PaddingArgIndex(InvalidIndex), FirstArgIndex(InvalidIndex),
|
|
|
|
NumberOfArgs(0) {}
|
|
|
|
};
|
|
|
|
|
|
|
|
SmallVector<IRArgs, 8> ArgInfo;
|
|
|
|
|
|
|
|
public:
|
|
|
|
ClangToLLVMArgMapping(const ASTContext &Context, const CGFunctionInfo &FI,
|
|
|
|
bool OnlyRequiredArgs = false)
|
|
|
|
: InallocaArgNo(InvalidIndex), SRetArgNo(InvalidIndex), TotalIRArgs(0),
|
|
|
|
ArgInfo(OnlyRequiredArgs ? FI.getNumRequiredArgs() : FI.arg_size()) {
|
|
|
|
construct(Context, FI, OnlyRequiredArgs);
|
|
|
|
}
|
|
|
|
|
|
|
|
bool hasInallocaArg() const { return InallocaArgNo != InvalidIndex; }
|
|
|
|
unsigned getInallocaArgNo() const {
|
|
|
|
assert(hasInallocaArg());
|
|
|
|
return InallocaArgNo;
|
|
|
|
}
|
|
|
|
|
|
|
|
bool hasSRetArg() const { return SRetArgNo != InvalidIndex; }
|
|
|
|
unsigned getSRetArgNo() const {
|
|
|
|
assert(hasSRetArg());
|
|
|
|
return SRetArgNo;
|
|
|
|
}
|
|
|
|
|
|
|
|
unsigned totalIRArgs() const { return TotalIRArgs; }
|
|
|
|
|
|
|
|
bool hasPaddingArg(unsigned ArgNo) const {
|
|
|
|
assert(ArgNo < ArgInfo.size());
|
|
|
|
return ArgInfo[ArgNo].PaddingArgIndex != InvalidIndex;
|
|
|
|
}
|
|
|
|
unsigned getPaddingArgNo(unsigned ArgNo) const {
|
|
|
|
assert(hasPaddingArg(ArgNo));
|
|
|
|
return ArgInfo[ArgNo].PaddingArgIndex;
|
|
|
|
}
|
|
|
|
|
|
|
|
/// Returns index of first IR argument corresponding to ArgNo, and their
|
|
|
|
/// quantity.
|
|
|
|
std::pair<unsigned, unsigned> getIRArgs(unsigned ArgNo) const {
|
|
|
|
assert(ArgNo < ArgInfo.size());
|
|
|
|
return std::make_pair(ArgInfo[ArgNo].FirstArgIndex,
|
|
|
|
ArgInfo[ArgNo].NumberOfArgs);
|
|
|
|
}
|
|
|
|
|
|
|
|
private:
|
|
|
|
void construct(const ASTContext &Context, const CGFunctionInfo &FI,
|
|
|
|
bool OnlyRequiredArgs);
|
|
|
|
};
|
|
|
|
|
|
|
|
void ClangToLLVMArgMapping::construct(const ASTContext &Context,
|
|
|
|
const CGFunctionInfo &FI,
|
|
|
|
bool OnlyRequiredArgs) {
|
|
|
|
unsigned IRArgNo = 0;
|
|
|
|
bool SwapThisWithSRet = false;
|
|
|
|
const ABIArgInfo &RetAI = FI.getReturnInfo();
|
|
|
|
|
|
|
|
if (RetAI.getKind() == ABIArgInfo::Indirect) {
|
|
|
|
SwapThisWithSRet = RetAI.isSRetAfterThis();
|
|
|
|
SRetArgNo = SwapThisWithSRet ? 1 : IRArgNo++;
|
|
|
|
}
|
|
|
|
|
|
|
|
unsigned ArgNo = 0;
|
|
|
|
unsigned NumArgs = OnlyRequiredArgs ? FI.getNumRequiredArgs() : FI.arg_size();
|
|
|
|
for (CGFunctionInfo::const_arg_iterator I = FI.arg_begin(); ArgNo < NumArgs;
|
|
|
|
++I, ++ArgNo) {
|
|
|
|
assert(I != FI.arg_end());
|
|
|
|
QualType ArgType = I->type;
|
|
|
|
const ABIArgInfo &AI = I->info;
|
|
|
|
// Collect data about IR arguments corresponding to Clang argument ArgNo.
|
|
|
|
auto &IRArgs = ArgInfo[ArgNo];
|
|
|
|
|
|
|
|
if (AI.getPaddingType())
|
|
|
|
IRArgs.PaddingArgIndex = IRArgNo++;
|
|
|
|
|
|
|
|
switch (AI.getKind()) {
|
|
|
|
case ABIArgInfo::Extend:
|
|
|
|
case ABIArgInfo::Direct: {
|
|
|
|
// FIXME: handle sseregparm someday...
|
|
|
|
llvm::StructType *STy = dyn_cast<llvm::StructType>(AI.getCoerceToType());
|
|
|
|
if (AI.isDirect() && AI.getCanBeFlattened() && STy) {
|
|
|
|
IRArgs.NumberOfArgs = STy->getNumElements();
|
|
|
|
} else {
|
|
|
|
IRArgs.NumberOfArgs = 1;
|
|
|
|
}
|
|
|
|
break;
|
|
|
|
}
|
|
|
|
case ABIArgInfo::Indirect:
|
|
|
|
IRArgs.NumberOfArgs = 1;
|
|
|
|
break;
|
|
|
|
case ABIArgInfo::Ignore:
|
|
|
|
case ABIArgInfo::InAlloca:
|
|
|
|
// ignore and inalloca doesn't have matching LLVM parameters.
|
|
|
|
IRArgs.NumberOfArgs = 0;
|
|
|
|
break;
|
|
|
|
case ABIArgInfo::Expand: {
|
|
|
|
IRArgs.NumberOfArgs = getExpansionSize(ArgType, Context);
|
|
|
|
break;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
if (IRArgs.NumberOfArgs > 0) {
|
|
|
|
IRArgs.FirstArgIndex = IRArgNo;
|
|
|
|
IRArgNo += IRArgs.NumberOfArgs;
|
|
|
|
}
|
|
|
|
|
|
|
|
// Skip over the sret parameter when it comes second. We already handled it
|
|
|
|
// above.
|
|
|
|
if (IRArgNo == 1 && SwapThisWithSRet)
|
|
|
|
IRArgNo++;
|
|
|
|
}
|
|
|
|
assert(ArgNo == ArgInfo.size());
|
|
|
|
|
|
|
|
if (FI.usesInAlloca())
|
|
|
|
InallocaArgNo = IRArgNo++;
|
|
|
|
|
|
|
|
TotalIRArgs = IRArgNo;
|
|
|
|
}
|
|
|
|
} // namespace
|
|
|
|
|
2008-09-17 08:51:38 +08:00
|
|
|
/***/
|
|
|
|
|
2010-07-15 07:39:36 +08:00
|
|
|
bool CodeGenModule::ReturnTypeUsesSRet(const CGFunctionInfo &FI) {
|
2009-02-05 16:00:50 +08:00
|
|
|
return FI.getReturnInfo().isIndirect();
|
2009-02-03 05:43:58 +08:00
|
|
|
}
|
|
|
|
|
2014-03-29 21:28:05 +08:00
|
|
|
bool CodeGenModule::ReturnSlotInterferesWithArgs(const CGFunctionInfo &FI) {
|
|
|
|
return ReturnTypeUsesSRet(FI) &&
|
|
|
|
getTargetCodeGenInfo().doesReturnSlotInterfereWithArgs();
|
|
|
|
}
|
|
|
|
|
2010-07-15 07:39:36 +08:00
|
|
|
bool CodeGenModule::ReturnTypeUsesFPRet(QualType ResultType) {
|
|
|
|
if (const BuiltinType *BT = ResultType->getAs<BuiltinType>()) {
|
|
|
|
switch (BT->getKind()) {
|
|
|
|
default:
|
|
|
|
return false;
|
|
|
|
case BuiltinType::Float:
|
2013-04-17 06:48:15 +08:00
|
|
|
return getTarget().useObjCFPRetForRealType(TargetInfo::Float);
|
2010-07-15 07:39:36 +08:00
|
|
|
case BuiltinType::Double:
|
2013-04-17 06:48:15 +08:00
|
|
|
return getTarget().useObjCFPRetForRealType(TargetInfo::Double);
|
2010-07-15 07:39:36 +08:00
|
|
|
case BuiltinType::LongDouble:
|
2013-04-17 06:48:15 +08:00
|
|
|
return getTarget().useObjCFPRetForRealType(TargetInfo::LongDouble);
|
2010-07-15 07:39:36 +08:00
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
return false;
|
|
|
|
}
|
|
|
|
|
2011-11-01 00:27:11 +08:00
|
|
|
bool CodeGenModule::ReturnTypeUsesFP2Ret(QualType ResultType) {
|
|
|
|
if (const ComplexType *CT = ResultType->getAs<ComplexType>()) {
|
|
|
|
if (const BuiltinType *BT = CT->getElementType()->getAs<BuiltinType>()) {
|
|
|
|
if (BT->getKind() == BuiltinType::LongDouble)
|
2013-04-17 06:48:15 +08:00
|
|
|
return getTarget().useObjCFP2RetForComplexLongDouble();
|
2011-11-01 00:27:11 +08:00
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
return false;
|
|
|
|
}
|
|
|
|
|
2011-07-10 01:41:47 +08:00
|
|
|
llvm::FunctionType *CodeGenTypes::GetFunctionType(GlobalDecl GD) {
|
2012-02-17 11:33:10 +08:00
|
|
|
const CGFunctionInfo &FI = arrangeGlobalDeclaration(GD);
|
|
|
|
return GetFunctionType(FI);
|
2010-02-23 08:48:20 +08:00
|
|
|
}
|
|
|
|
|
2011-07-10 01:41:47 +08:00
|
|
|
llvm::FunctionType *
|
2012-02-17 11:33:10 +08:00
|
|
|
CodeGenTypes::GetFunctionType(const CGFunctionInfo &FI) {
|
2014-09-30 06:08:00 +08:00
|
|
|
|
2014-11-19 15:49:47 +08:00
|
|
|
bool Inserted = FunctionsBeingProcessed.insert(&FI).second;
|
|
|
|
(void)Inserted;
|
2011-07-15 13:16:14 +08:00
|
|
|
assert(Inserted && "Recursively being processed?");
|
2008-09-10 12:01:49 +08:00
|
|
|
|
2014-09-30 06:08:00 +08:00
|
|
|
llvm::Type *resultType = nullptr;
|
2011-05-15 10:19:42 +08:00
|
|
|
const ABIArgInfo &retAI = FI.getReturnInfo();
|
|
|
|
switch (retAI.getKind()) {
|
2008-09-11 09:48:57 +08:00
|
|
|
case ABIArgInfo::Expand:
|
2011-05-15 10:19:42 +08:00
|
|
|
llvm_unreachable("Invalid ABI kind for return argument");
|
2008-09-11 09:48:57 +08:00
|
|
|
|
2009-06-06 17:36:29 +08:00
|
|
|
case ABIArgInfo::Extend:
|
2009-02-03 14:17:37 +08:00
|
|
|
case ABIArgInfo::Direct:
|
2011-05-15 10:19:42 +08:00
|
|
|
resultType = retAI.getCoerceToType();
|
2009-02-03 14:17:37 +08:00
|
|
|
break;
|
|
|
|
|
2014-02-01 08:04:45 +08:00
|
|
|
case ABIArgInfo::InAlloca:
|
2014-02-25 08:59:14 +08:00
|
|
|
if (retAI.getInAllocaSRet()) {
|
|
|
|
// sret things on win32 aren't void, they return the sret pointer.
|
|
|
|
QualType ret = FI.getReturnType();
|
|
|
|
llvm::Type *ty = ConvertType(ret);
|
|
|
|
unsigned addressSpace = Context.getTargetAddressSpace(ret);
|
|
|
|
resultType = llvm::PointerType::get(ty, addressSpace);
|
|
|
|
} else {
|
|
|
|
resultType = llvm::Type::getVoidTy(getLLVMContext());
|
|
|
|
}
|
2014-02-01 08:04:45 +08:00
|
|
|
break;
|
|
|
|
|
2009-02-05 16:00:50 +08:00
|
|
|
case ABIArgInfo::Indirect: {
|
2011-05-15 10:19:42 +08:00
|
|
|
assert(!retAI.getIndirectAlign() && "Align unused on indirect return.");
|
|
|
|
resultType = llvm::Type::getVoidTy(getLLVMContext());
|
2008-09-10 12:01:49 +08:00
|
|
|
break;
|
|
|
|
}
|
|
|
|
|
2009-01-27 05:26:08 +08:00
|
|
|
case ABIArgInfo::Ignore:
|
2011-05-15 10:19:42 +08:00
|
|
|
resultType = llvm::Type::getVoidTy(getLLVMContext());
|
2009-01-27 05:26:08 +08:00
|
|
|
break;
|
2008-09-10 12:01:49 +08:00
|
|
|
}
|
2009-09-09 23:08:12 +08:00
|
|
|
|
2014-09-30 06:08:00 +08:00
|
|
|
ClangToLLVMArgMapping IRFunctionArgs(getContext(), FI, true);
|
|
|
|
SmallVector<llvm::Type*, 8> ArgTypes(IRFunctionArgs.totalIRArgs());
|
|
|
|
|
|
|
|
// Add type for sret argument.
|
|
|
|
if (IRFunctionArgs.hasSRetArg()) {
|
|
|
|
QualType Ret = FI.getReturnType();
|
|
|
|
llvm::Type *Ty = ConvertType(Ret);
|
|
|
|
unsigned AddressSpace = Context.getTargetAddressSpace(Ret);
|
|
|
|
ArgTypes[IRFunctionArgs.getSRetArgNo()] =
|
|
|
|
llvm::PointerType::get(Ty, AddressSpace);
|
|
|
|
}
|
|
|
|
|
|
|
|
// Add type for inalloca argument.
|
|
|
|
if (IRFunctionArgs.hasInallocaArg()) {
|
|
|
|
auto ArgStruct = FI.getArgStruct();
|
|
|
|
assert(ArgStruct);
|
|
|
|
ArgTypes[IRFunctionArgs.getInallocaArgNo()] = ArgStruct->getPointerTo();
|
|
|
|
}
|
|
|
|
|
Fix the required args count for variadic blocks.
We were emitting calls to blocks as if all arguments were
required --- i.e. with signature (A,B,C,D,...) rather than
(A,B,...). This patch fixes that and accounts for the
implicit block-context argument as a required argument.
In addition, this patch changes the function type under which
we call unprototyped functions on platforms like x86-64 that
guarantee compatibility of variadic functions with unprototyped
function types; previously we would always call such functions
under the LLVM type T (...)*, but now we will call them under
the type T (A,B,C,D,...)*. This last change should have no
material effect except for making the type conventions more
explicit; it was a side-effect of the most convenient implementation.
llvm-svn: 169588
2012-12-07 15:03:17 +08:00
|
|
|
// Add in all of the required arguments.
|
2014-09-30 06:08:00 +08:00
|
|
|
unsigned ArgNo = 0;
|
2014-09-30 05:21:48 +08:00
|
|
|
CGFunctionInfo::const_arg_iterator it = FI.arg_begin(),
|
|
|
|
ie = it + FI.getNumRequiredArgs();
|
2014-09-30 06:08:00 +08:00
|
|
|
for (; it != ie; ++it, ++ArgNo) {
|
|
|
|
const ABIArgInfo &ArgInfo = it->info;
|
2009-09-09 23:08:12 +08:00
|
|
|
|
2012-10-24 09:59:00 +08:00
|
|
|
// Insert a padding type to ensure proper alignment.
|
2014-09-30 06:08:00 +08:00
|
|
|
if (IRFunctionArgs.hasPaddingArg(ArgNo))
|
|
|
|
ArgTypes[IRFunctionArgs.getPaddingArgNo(ArgNo)] =
|
|
|
|
ArgInfo.getPaddingType();
|
|
|
|
|
|
|
|
unsigned FirstIRArg, NumIRArgs;
|
|
|
|
std::tie(FirstIRArg, NumIRArgs) = IRFunctionArgs.getIRArgs(ArgNo);
|
2012-10-24 09:59:00 +08:00
|
|
|
|
2014-09-30 06:08:00 +08:00
|
|
|
switch (ArgInfo.getKind()) {
|
2009-01-27 05:26:08 +08:00
|
|
|
case ABIArgInfo::Ignore:
|
2014-02-01 08:04:45 +08:00
|
|
|
case ABIArgInfo::InAlloca:
|
2014-09-30 06:08:00 +08:00
|
|
|
assert(NumIRArgs == 0);
|
2009-01-27 05:26:08 +08:00
|
|
|
break;
|
|
|
|
|
Kill off the 'coerce' ABI passing form. Now 'direct' and 'extend' always
have a "coerce to" type which often matches the default lowering of Clang
type to LLVM IR type, but the coerce case can be handled by making them
not be the same.
This simplifies things and fixes issues where X86-64 abi lowering would
return coerce after making preferred types exactly match up. This caused
us to compile:
typedef float v4f32 __attribute__((__vector_size__(16)));
v4f32 foo(v4f32 X) {
return X+X;
}
into this code at -O0:
define <4 x float> @foo(<4 x float> %X.coerce) nounwind {
entry:
%retval = alloca <4 x float>, align 16 ; <<4 x float>*> [#uses=2]
%coerce = alloca <4 x float>, align 16 ; <<4 x float>*> [#uses=2]
%X.addr = alloca <4 x float>, align 16 ; <<4 x float>*> [#uses=3]
store <4 x float> %X.coerce, <4 x float>* %coerce
%X = load <4 x float>* %coerce ; <<4 x float>> [#uses=1]
store <4 x float> %X, <4 x float>* %X.addr
%tmp = load <4 x float>* %X.addr ; <<4 x float>> [#uses=1]
%tmp1 = load <4 x float>* %X.addr ; <<4 x float>> [#uses=1]
%add = fadd <4 x float> %tmp, %tmp1 ; <<4 x float>> [#uses=1]
store <4 x float> %add, <4 x float>* %retval
%0 = load <4 x float>* %retval ; <<4 x float>> [#uses=1]
ret <4 x float> %0
}
Now we get:
define <4 x float> @foo(<4 x float> %X) nounwind {
entry:
%X.addr = alloca <4 x float>, align 16 ; <<4 x float>*> [#uses=3]
store <4 x float> %X, <4 x float>* %X.addr
%tmp = load <4 x float>* %X.addr ; <<4 x float>> [#uses=1]
%tmp1 = load <4 x float>* %X.addr ; <<4 x float>> [#uses=1]
%add = fadd <4 x float> %tmp, %tmp1 ; <<4 x float>> [#uses=1]
ret <4 x float> %add
}
This implements rdar://8248065
llvm-svn: 109733
2010-07-29 14:26:06 +08:00
|
|
|
case ABIArgInfo::Indirect: {
|
2014-09-30 06:08:00 +08:00
|
|
|
assert(NumIRArgs == 1);
|
Kill off the 'coerce' ABI passing form. Now 'direct' and 'extend' always
have a "coerce to" type which often matches the default lowering of Clang
type to LLVM IR type, but the coerce case can be handled by making them
not be the same.
This simplifies things and fixes issues where X86-64 abi lowering would
return coerce after making preferred types exactly match up. This caused
us to compile:
typedef float v4f32 __attribute__((__vector_size__(16)));
v4f32 foo(v4f32 X) {
return X+X;
}
into this code at -O0:
define <4 x float> @foo(<4 x float> %X.coerce) nounwind {
entry:
%retval = alloca <4 x float>, align 16 ; <<4 x float>*> [#uses=2]
%coerce = alloca <4 x float>, align 16 ; <<4 x float>*> [#uses=2]
%X.addr = alloca <4 x float>, align 16 ; <<4 x float>*> [#uses=3]
store <4 x float> %X.coerce, <4 x float>* %coerce
%X = load <4 x float>* %coerce ; <<4 x float>> [#uses=1]
store <4 x float> %X, <4 x float>* %X.addr
%tmp = load <4 x float>* %X.addr ; <<4 x float>> [#uses=1]
%tmp1 = load <4 x float>* %X.addr ; <<4 x float>> [#uses=1]
%add = fadd <4 x float> %tmp, %tmp1 ; <<4 x float>> [#uses=1]
store <4 x float> %add, <4 x float>* %retval
%0 = load <4 x float>* %retval ; <<4 x float>> [#uses=1]
ret <4 x float> %0
}
Now we get:
define <4 x float> @foo(<4 x float> %X) nounwind {
entry:
%X.addr = alloca <4 x float>, align 16 ; <<4 x float>*> [#uses=3]
store <4 x float> %X, <4 x float>* %X.addr
%tmp = load <4 x float>* %X.addr ; <<4 x float>> [#uses=1]
%tmp1 = load <4 x float>* %X.addr ; <<4 x float>> [#uses=1]
%add = fadd <4 x float> %tmp, %tmp1 ; <<4 x float>> [#uses=1]
ret <4 x float> %add
}
This implements rdar://8248065
llvm-svn: 109733
2010-07-29 14:26:06 +08:00
|
|
|
// indirect arguments are always on the stack, which is addr space #0.
|
2011-07-18 12:24:23 +08:00
|
|
|
llvm::Type *LTy = ConvertTypeForMem(it->type);
|
2014-09-30 06:08:00 +08:00
|
|
|
ArgTypes[FirstIRArg] = LTy->getPointerTo();
|
Kill off the 'coerce' ABI passing form. Now 'direct' and 'extend' always
have a "coerce to" type which often matches the default lowering of Clang
type to LLVM IR type, but the coerce case can be handled by making them
not be the same.
This simplifies things and fixes issues where X86-64 abi lowering would
return coerce after making preferred types exactly match up. This caused
us to compile:
typedef float v4f32 __attribute__((__vector_size__(16)));
v4f32 foo(v4f32 X) {
return X+X;
}
into this code at -O0:
define <4 x float> @foo(<4 x float> %X.coerce) nounwind {
entry:
%retval = alloca <4 x float>, align 16 ; <<4 x float>*> [#uses=2]
%coerce = alloca <4 x float>, align 16 ; <<4 x float>*> [#uses=2]
%X.addr = alloca <4 x float>, align 16 ; <<4 x float>*> [#uses=3]
store <4 x float> %X.coerce, <4 x float>* %coerce
%X = load <4 x float>* %coerce ; <<4 x float>> [#uses=1]
store <4 x float> %X, <4 x float>* %X.addr
%tmp = load <4 x float>* %X.addr ; <<4 x float>> [#uses=1]
%tmp1 = load <4 x float>* %X.addr ; <<4 x float>> [#uses=1]
%add = fadd <4 x float> %tmp, %tmp1 ; <<4 x float>> [#uses=1]
store <4 x float> %add, <4 x float>* %retval
%0 = load <4 x float>* %retval ; <<4 x float>> [#uses=1]
ret <4 x float> %0
}
Now we get:
define <4 x float> @foo(<4 x float> %X) nounwind {
entry:
%X.addr = alloca <4 x float>, align 16 ; <<4 x float>*> [#uses=3]
store <4 x float> %X, <4 x float>* %X.addr
%tmp = load <4 x float>* %X.addr ; <<4 x float>> [#uses=1]
%tmp1 = load <4 x float>* %X.addr ; <<4 x float>> [#uses=1]
%add = fadd <4 x float> %tmp, %tmp1 ; <<4 x float>> [#uses=1]
ret <4 x float> %add
}
This implements rdar://8248065
llvm-svn: 109733
2010-07-29 14:26:06 +08:00
|
|
|
break;
|
|
|
|
}
|
|
|
|
|
|
|
|
case ABIArgInfo::Extend:
|
2010-07-29 14:44:09 +08:00
|
|
|
case ABIArgInfo::Direct: {
|
2014-08-27 18:43:15 +08:00
|
|
|
// Fast-isel and the optimizer generally like scalar values better than
|
|
|
|
// FCAs, so we flatten them if this is safe to do for this argument.
|
2014-09-30 06:08:00 +08:00
|
|
|
llvm::Type *argType = ArgInfo.getCoerceToType();
|
2014-05-10 00:21:39 +08:00
|
|
|
llvm::StructType *st = dyn_cast<llvm::StructType>(argType);
|
2014-09-30 06:08:00 +08:00
|
|
|
if (st && ArgInfo.isDirect() && ArgInfo.getCanBeFlattened()) {
|
|
|
|
assert(NumIRArgs == st->getNumElements());
|
2011-05-15 10:19:42 +08:00
|
|
|
for (unsigned i = 0, e = st->getNumElements(); i != e; ++i)
|
2014-09-30 06:08:00 +08:00
|
|
|
ArgTypes[FirstIRArg + i] = st->getElementType(i);
|
Change CGCall to handle the "coerce" case where the coerce-to type
is a FCA to pass each of the elements as individual scalars. This
produces code fast isel is less likely to reject and is easier on
the optimizers.
For example, before we would compile:
struct DeclGroup { long NumDecls; char * Y; };
char * foo(DeclGroup D) {
return D.NumDecls+D.Y;
}
to:
%struct.DeclGroup = type { i64, i64 }
define i64 @_Z3foo9DeclGroup(%struct.DeclGroup) nounwind {
entry:
%D = alloca %struct.DeclGroup, align 8 ; <%struct.DeclGroup*> [#uses=3]
store %struct.DeclGroup %0, %struct.DeclGroup* %D, align 1
%tmp = getelementptr inbounds %struct.DeclGroup* %D, i32 0, i32 0 ; <i64*> [#uses=1]
%tmp1 = load i64* %tmp ; <i64> [#uses=1]
%tmp2 = getelementptr inbounds %struct.DeclGroup* %D, i32 0, i32 1 ; <i64*> [#uses=1]
%tmp3 = load i64* %tmp2 ; <i64> [#uses=1]
%add = add nsw i64 %tmp1, %tmp3 ; <i64> [#uses=1]
ret i64 %add
}
Now we get:
%0 = type { i64, i64 }
%struct.DeclGroup = type { i64, i8* }
define i8* @_Z3foo9DeclGroup(i64, i64) nounwind {
entry:
%D = alloca %struct.DeclGroup, align 8 ; <%struct.DeclGroup*> [#uses=3]
%2 = insertvalue %0 undef, i64 %0, 0 ; <%0> [#uses=1]
%3 = insertvalue %0 %2, i64 %1, 1 ; <%0> [#uses=1]
%4 = bitcast %struct.DeclGroup* %D to %0* ; <%0*> [#uses=1]
store %0 %3, %0* %4, align 1
%tmp = getelementptr inbounds %struct.DeclGroup* %D, i32 0, i32 0 ; <i64*> [#uses=1]
%tmp1 = load i64* %tmp ; <i64> [#uses=1]
%tmp2 = getelementptr inbounds %struct.DeclGroup* %D, i32 0, i32 1 ; <i8**> [#uses=1]
%tmp3 = load i8** %tmp2 ; <i8*> [#uses=1]
%add.ptr = getelementptr inbounds i8* %tmp3, i64 %tmp1 ; <i8*> [#uses=1]
ret i8* %add.ptr
}
Elimination of the FCA inside the function is still-to-come.
llvm-svn: 107099
2010-06-29 07:44:11 +08:00
|
|
|
} else {
|
2014-09-30 06:08:00 +08:00
|
|
|
assert(NumIRArgs == 1);
|
|
|
|
ArgTypes[FirstIRArg] = argType;
|
Change CGCall to handle the "coerce" case where the coerce-to type
is a FCA to pass each of the elements as individual scalars. This
produces code fast isel is less likely to reject and is easier on
the optimizers.
For example, before we would compile:
struct DeclGroup { long NumDecls; char * Y; };
char * foo(DeclGroup D) {
return D.NumDecls+D.Y;
}
to:
%struct.DeclGroup = type { i64, i64 }
define i64 @_Z3foo9DeclGroup(%struct.DeclGroup) nounwind {
entry:
%D = alloca %struct.DeclGroup, align 8 ; <%struct.DeclGroup*> [#uses=3]
store %struct.DeclGroup %0, %struct.DeclGroup* %D, align 1
%tmp = getelementptr inbounds %struct.DeclGroup* %D, i32 0, i32 0 ; <i64*> [#uses=1]
%tmp1 = load i64* %tmp ; <i64> [#uses=1]
%tmp2 = getelementptr inbounds %struct.DeclGroup* %D, i32 0, i32 1 ; <i64*> [#uses=1]
%tmp3 = load i64* %tmp2 ; <i64> [#uses=1]
%add = add nsw i64 %tmp1, %tmp3 ; <i64> [#uses=1]
ret i64 %add
}
Now we get:
%0 = type { i64, i64 }
%struct.DeclGroup = type { i64, i8* }
define i8* @_Z3foo9DeclGroup(i64, i64) nounwind {
entry:
%D = alloca %struct.DeclGroup, align 8 ; <%struct.DeclGroup*> [#uses=3]
%2 = insertvalue %0 undef, i64 %0, 0 ; <%0> [#uses=1]
%3 = insertvalue %0 %2, i64 %1, 1 ; <%0> [#uses=1]
%4 = bitcast %struct.DeclGroup* %D to %0* ; <%0*> [#uses=1]
store %0 %3, %0* %4, align 1
%tmp = getelementptr inbounds %struct.DeclGroup* %D, i32 0, i32 0 ; <i64*> [#uses=1]
%tmp1 = load i64* %tmp ; <i64> [#uses=1]
%tmp2 = getelementptr inbounds %struct.DeclGroup* %D, i32 0, i32 1 ; <i8**> [#uses=1]
%tmp3 = load i8** %tmp2 ; <i8*> [#uses=1]
%add.ptr = getelementptr inbounds i8* %tmp3, i64 %tmp1 ; <i8*> [#uses=1]
ret i8* %add.ptr
}
Elimination of the FCA inside the function is still-to-come.
llvm-svn: 107099
2010-06-29 07:44:11 +08:00
|
|
|
}
|
2009-02-04 03:12:28 +08:00
|
|
|
break;
|
2010-07-29 14:44:09 +08:00
|
|
|
}
|
2009-09-09 23:08:12 +08:00
|
|
|
|
2008-09-11 09:48:57 +08:00
|
|
|
case ABIArgInfo::Expand:
|
2014-09-30 06:08:00 +08:00
|
|
|
auto ArgTypesIter = ArgTypes.begin() + FirstIRArg;
|
|
|
|
getExpandedTypes(it->type, ArgTypesIter);
|
|
|
|
assert(ArgTypesIter == ArgTypes.begin() + FirstIRArg + NumIRArgs);
|
2008-09-11 09:48:57 +08:00
|
|
|
break;
|
|
|
|
}
|
2008-09-10 12:01:49 +08:00
|
|
|
}
|
|
|
|
|
2011-07-15 13:16:14 +08:00
|
|
|
bool Erased = FunctionsBeingProcessed.erase(&FI); (void)Erased;
|
|
|
|
assert(Erased && "Not in set?");
|
2014-09-30 06:08:00 +08:00
|
|
|
|
|
|
|
return llvm::FunctionType::get(resultType, ArgTypes, FI.isVariadic());
|
2008-09-10 07:48:28 +08:00
|
|
|
}
|
|
|
|
|
2011-07-18 12:24:23 +08:00
|
|
|
llvm::Type *CodeGenTypes::GetFunctionTypeForVTable(GlobalDecl GD) {
|
2010-08-31 15:33:07 +08:00
|
|
|
const CXXMethodDecl *MD = cast<CXXMethodDecl>(GD.getDecl());
|
2009-11-24 13:08:52 +08:00
|
|
|
const FunctionProtoType *FPT = MD->getType()->getAs<FunctionProtoType>();
|
2010-10-19 14:39:39 +08:00
|
|
|
|
2011-07-10 08:18:59 +08:00
|
|
|
if (!isFuncTypeConvertible(FPT))
|
|
|
|
return llvm::StructType::get(getLLVMContext());
|
|
|
|
|
|
|
|
const CGFunctionInfo *Info;
|
|
|
|
if (isa<CXXDestructorDecl>(MD))
|
2014-09-09 00:01:27 +08:00
|
|
|
Info =
|
|
|
|
&arrangeCXXStructorDeclaration(MD, getFromDtorType(GD.getDtorType()));
|
2011-07-10 08:18:59 +08:00
|
|
|
else
|
2012-02-17 11:33:10 +08:00
|
|
|
Info = &arrangeCXXMethodDeclaration(MD);
|
|
|
|
return GetFunctionType(*Info);
|
2009-11-24 13:08:52 +08:00
|
|
|
}
|
|
|
|
|
2009-02-03 07:43:58 +08:00
|
|
|
void CodeGenModule::ConstructAttributeList(const CGFunctionInfo &FI,
|
2009-02-03 06:03:45 +08:00
|
|
|
const Decl *TargetDecl,
|
2010-10-19 14:39:39 +08:00
|
|
|
AttributeListType &PAL,
|
2013-02-22 08:13:35 +08:00
|
|
|
unsigned &CallingConv,
|
|
|
|
bool AttrOnCallSite) {
|
2012-10-16 04:36:26 +08:00
|
|
|
llvm::AttrBuilder FuncAttrs;
|
|
|
|
llvm::AttrBuilder RetAttrs;
|
2014-12-12 04:14:04 +08:00
|
|
|
bool HasOptnone = false;
|
2008-09-10 08:32:18 +08:00
|
|
|
|
2009-09-12 08:59:20 +08:00
|
|
|
CallingConv = FI.getEffectiveCallingConvention();
|
|
|
|
|
2010-02-06 05:31:56 +08:00
|
|
|
if (FI.isNoReturn())
|
2012-12-21 03:27:06 +08:00
|
|
|
FuncAttrs.addAttribute(llvm::Attribute::NoReturn);
|
2010-02-06 05:31:56 +08:00
|
|
|
|
2009-04-04 08:49:24 +08:00
|
|
|
// FIXME: handle sseregparm someday...
|
2008-09-10 08:32:18 +08:00
|
|
|
if (TargetDecl) {
|
2011-10-13 03:51:18 +08:00
|
|
|
if (TargetDecl->hasAttr<ReturnsTwiceAttr>())
|
2012-12-21 03:27:06 +08:00
|
|
|
FuncAttrs.addAttribute(llvm::Attribute::ReturnsTwice);
|
2009-06-30 10:34:44 +08:00
|
|
|
if (TargetDecl->hasAttr<NoThrowAttr>())
|
2012-12-21 03:27:06 +08:00
|
|
|
FuncAttrs.addAttribute(llvm::Attribute::NoUnwind);
|
2013-01-30 13:45:05 +08:00
|
|
|
if (TargetDecl->hasAttr<NoReturnAttr>())
|
|
|
|
FuncAttrs.addAttribute(llvm::Attribute::NoReturn);
|
2014-02-23 00:59:24 +08:00
|
|
|
if (TargetDecl->hasAttr<NoDuplicateAttr>())
|
|
|
|
FuncAttrs.addAttribute(llvm::Attribute::NoDuplicate);
|
2013-01-30 13:45:05 +08:00
|
|
|
|
|
|
|
if (const FunctionDecl *Fn = dyn_cast<FunctionDecl>(TargetDecl)) {
|
2010-07-08 14:48:12 +08:00
|
|
|
const FunctionProtoType *FPT = Fn->getType()->getAs<FunctionProtoType>();
|
2011-03-14 01:09:40 +08:00
|
|
|
if (FPT && FPT->isNothrow(getContext()))
|
2012-12-21 03:27:06 +08:00
|
|
|
FuncAttrs.addAttribute(llvm::Attribute::NoUnwind);
|
2013-03-05 16:30:04 +08:00
|
|
|
// Don't use [[noreturn]] or _Noreturn for a call to a virtual function.
|
|
|
|
// These attributes are not inherited by overloads.
|
|
|
|
const CXXMethodDecl *MD = dyn_cast<CXXMethodDecl>(Fn);
|
|
|
|
if (Fn->isNoReturn() && !(AttrOnCallSite && MD && MD->isVirtual()))
|
2013-01-30 13:45:05 +08:00
|
|
|
FuncAttrs.addAttribute(llvm::Attribute::NoReturn);
|
2010-07-08 14:48:12 +08:00
|
|
|
}
|
|
|
|
|
2011-08-16 06:38:22 +08:00
|
|
|
// 'const' and 'pure' attribute functions are also nounwind.
|
|
|
|
if (TargetDecl->hasAttr<ConstAttr>()) {
|
2012-12-21 03:27:06 +08:00
|
|
|
FuncAttrs.addAttribute(llvm::Attribute::ReadNone);
|
|
|
|
FuncAttrs.addAttribute(llvm::Attribute::NoUnwind);
|
2011-08-16 06:38:22 +08:00
|
|
|
} else if (TargetDecl->hasAttr<PureAttr>()) {
|
2012-12-21 03:27:06 +08:00
|
|
|
FuncAttrs.addAttribute(llvm::Attribute::ReadOnly);
|
|
|
|
FuncAttrs.addAttribute(llvm::Attribute::NoUnwind);
|
2011-08-16 06:38:22 +08:00
|
|
|
}
|
2015-02-04 15:23:21 +08:00
|
|
|
if (TargetDecl->hasAttr<RestrictAttr>())
|
2012-12-21 03:27:06 +08:00
|
|
|
RetAttrs.addAttribute(llvm::Attribute::NoAlias);
|
2014-07-12 12:51:04 +08:00
|
|
|
if (TargetDecl->hasAttr<ReturnsNonNullAttr>())
|
|
|
|
RetAttrs.addAttribute(llvm::Attribute::NonNull);
|
2014-12-12 04:14:04 +08:00
|
|
|
|
|
|
|
HasOptnone = TargetDecl->hasAttr<OptimizeNoneAttr>();
|
|
|
|
}
|
|
|
|
|
|
|
|
// OptimizeNoneAttr takes precedence over -Os or -Oz. No warning needed.
|
|
|
|
if (!HasOptnone) {
|
|
|
|
if (CodeGenOpts.OptimizeSize)
|
|
|
|
FuncAttrs.addAttribute(llvm::Attribute::OptimizeForSize);
|
|
|
|
if (CodeGenOpts.OptimizeSize == 2)
|
|
|
|
FuncAttrs.addAttribute(llvm::Attribute::MinSize);
|
2008-09-10 08:32:18 +08:00
|
|
|
}
|
|
|
|
|
2009-11-13 01:24:48 +08:00
|
|
|
if (CodeGenOpts.DisableRedZone)
|
2012-12-21 03:27:06 +08:00
|
|
|
FuncAttrs.addAttribute(llvm::Attribute::NoRedZone);
|
2009-11-13 01:24:48 +08:00
|
|
|
if (CodeGenOpts.NoImplicitFloat)
|
2012-12-21 03:27:06 +08:00
|
|
|
FuncAttrs.addAttribute(llvm::Attribute::NoImplicitFloat);
|
2014-05-20 06:14:34 +08:00
|
|
|
if (CodeGenOpts.EnableSegmentedStacks &&
|
|
|
|
!(TargetDecl && TargetDecl->hasAttr<NoSplitStackAttr>()))
|
2014-04-11 06:59:13 +08:00
|
|
|
FuncAttrs.addAttribute("split-stack");
|
2009-06-05 07:32:02 +08:00
|
|
|
|
2013-02-23 04:53:29 +08:00
|
|
|
if (AttrOnCallSite) {
|
|
|
|
// Attributes that should go on the call site only.
|
|
|
|
if (!CodeGenOpts.SimplifyLibCalls)
|
|
|
|
FuncAttrs.addAttribute(llvm::Attribute::NoBuiltin);
|
2013-03-01 06:49:57 +08:00
|
|
|
} else {
|
|
|
|
// Attributes that should go on the function, but not the call site.
|
|
|
|
if (!CodeGenOpts.DisableFPElim) {
|
2013-03-14 06:24:33 +08:00
|
|
|
FuncAttrs.addAttribute("no-frame-pointer-elim", "false");
|
2013-03-01 06:49:57 +08:00
|
|
|
} else if (CodeGenOpts.OmitLeafFramePointer) {
|
2013-03-14 06:24:33 +08:00
|
|
|
FuncAttrs.addAttribute("no-frame-pointer-elim", "false");
|
2013-08-23 05:16:51 +08:00
|
|
|
FuncAttrs.addAttribute("no-frame-pointer-elim-non-leaf");
|
2013-03-01 06:49:57 +08:00
|
|
|
} else {
|
2013-03-14 06:24:33 +08:00
|
|
|
FuncAttrs.addAttribute("no-frame-pointer-elim", "true");
|
2013-08-23 05:16:51 +08:00
|
|
|
FuncAttrs.addAttribute("no-frame-pointer-elim-non-leaf");
|
2013-03-01 06:49:57 +08:00
|
|
|
}
|
|
|
|
|
2013-03-14 06:24:33 +08:00
|
|
|
FuncAttrs.addAttribute("less-precise-fpmad",
|
2013-07-27 05:51:11 +08:00
|
|
|
llvm::toStringRef(CodeGenOpts.LessPreciseFPMAD));
|
2013-03-14 06:24:33 +08:00
|
|
|
FuncAttrs.addAttribute("no-infs-fp-math",
|
2013-07-27 05:51:11 +08:00
|
|
|
llvm::toStringRef(CodeGenOpts.NoInfsFPMath));
|
2013-03-14 06:24:33 +08:00
|
|
|
FuncAttrs.addAttribute("no-nans-fp-math",
|
2013-07-27 05:51:11 +08:00
|
|
|
llvm::toStringRef(CodeGenOpts.NoNaNsFPMath));
|
2013-03-14 06:24:33 +08:00
|
|
|
FuncAttrs.addAttribute("unsafe-fp-math",
|
2013-07-27 05:51:11 +08:00
|
|
|
llvm::toStringRef(CodeGenOpts.UnsafeFPMath));
|
2013-03-14 06:24:33 +08:00
|
|
|
FuncAttrs.addAttribute("use-soft-float",
|
2013-07-27 05:51:11 +08:00
|
|
|
llvm::toStringRef(CodeGenOpts.SoftFloat));
|
2013-07-23 04:15:41 +08:00
|
|
|
FuncAttrs.addAttribute("stack-protector-buffer-size",
|
2013-07-13 06:26:07 +08:00
|
|
|
llvm::utostr(CodeGenOpts.SSPBufferSize));
|
2013-07-25 08:32:41 +08:00
|
|
|
|
2013-08-02 05:41:02 +08:00
|
|
|
if (!CodeGenOpts.StackRealignment)
|
|
|
|
FuncAttrs.addAttribute("no-realign-stack");
|
2013-02-16 05:30:01 +08:00
|
|
|
}
|
|
|
|
|
2014-09-30 06:08:00 +08:00
|
|
|
ClangToLLVMArgMapping IRFunctionArgs(getContext(), FI);
|
CGCall: Factor out the logic mapping call arguments to LLVM IR arguments.
Summary:
This refactoring introduces ClangToLLVMArgMapping class, which
encapsulates the information about the order in which function arguments listed
in CGFunctionInfo should be passed to actual LLVM IR function, such as:
1) positions of sret, if there is any
2) position of inalloca argument, if there is any
3) position of helper padding argument for each call argument
4) positions of regular argument (there can be many if it's expanded).
Simplify several related methods (ConstructAttributeList, EmitFunctionProlog
and EmitCall): now they don't have to maintain iterators over the list
of LLVM IR function arguments, dealing with all the sret/inalloca/this complexities,
and just use expected positions of LLVM IR arguments stored in ClangToLLVMArgMapping.
This may increase the running time of EmitFunctionProlog, as we have to traverse
expandable arguments twice, but in further refactoring we will be able
to speed up EmitCall by passing already calculated CallArgsToIRArgsMapping to
ConstructAttributeList, thus avoiding traversing expandable argument there.
No functionality change.
Test Plan: regression test suite
Reviewers: majnemer, rnk
Reviewed By: rnk
Subscribers: cfe-commits, rjmccall, timurrrr
Differential Revision: http://reviews.llvm.org/D4938
llvm-svn: 216251
2014-08-22 09:06:06 +08:00
|
|
|
|
2009-02-03 07:43:58 +08:00
|
|
|
QualType RetTy = FI.getReturnType();
|
2009-02-03 13:59:18 +08:00
|
|
|
const ABIArgInfo &RetAI = FI.getReturnInfo();
|
2008-09-10 12:01:49 +08:00
|
|
|
switch (RetAI.getKind()) {
|
2009-06-06 17:36:29 +08:00
|
|
|
case ABIArgInfo::Extend:
|
2013-05-29 11:57:23 +08:00
|
|
|
if (RetTy->hasSignedIntegerRepresentation())
|
|
|
|
RetAttrs.addAttribute(llvm::Attribute::SExt);
|
|
|
|
else if (RetTy->hasUnsignedIntegerRepresentation())
|
|
|
|
RetAttrs.addAttribute(llvm::Attribute::ZExt);
|
2013-06-05 11:00:09 +08:00
|
|
|
// FALL THROUGH
|
2009-02-03 14:17:37 +08:00
|
|
|
case ABIArgInfo::Direct:
|
2013-06-05 11:00:09 +08:00
|
|
|
if (RetAI.getInReg())
|
|
|
|
RetAttrs.addAttribute(llvm::Attribute::InReg);
|
|
|
|
break;
|
Kill off the 'coerce' ABI passing form. Now 'direct' and 'extend' always
have a "coerce to" type which often matches the default lowering of Clang
type to LLVM IR type, but the coerce case can be handled by making them
not be the same.
This simplifies things and fixes issues where X86-64 abi lowering would
return coerce after making preferred types exactly match up. This caused
us to compile:
typedef float v4f32 __attribute__((__vector_size__(16)));
v4f32 foo(v4f32 X) {
return X+X;
}
into this code at -O0:
define <4 x float> @foo(<4 x float> %X.coerce) nounwind {
entry:
%retval = alloca <4 x float>, align 16 ; <<4 x float>*> [#uses=2]
%coerce = alloca <4 x float>, align 16 ; <<4 x float>*> [#uses=2]
%X.addr = alloca <4 x float>, align 16 ; <<4 x float>*> [#uses=3]
store <4 x float> %X.coerce, <4 x float>* %coerce
%X = load <4 x float>* %coerce ; <<4 x float>> [#uses=1]
store <4 x float> %X, <4 x float>* %X.addr
%tmp = load <4 x float>* %X.addr ; <<4 x float>> [#uses=1]
%tmp1 = load <4 x float>* %X.addr ; <<4 x float>> [#uses=1]
%add = fadd <4 x float> %tmp, %tmp1 ; <<4 x float>> [#uses=1]
store <4 x float> %add, <4 x float>* %retval
%0 = load <4 x float>* %retval ; <<4 x float>> [#uses=1]
ret <4 x float> %0
}
Now we get:
define <4 x float> @foo(<4 x float> %X) nounwind {
entry:
%X.addr = alloca <4 x float>, align 16 ; <<4 x float>*> [#uses=3]
store <4 x float> %X, <4 x float>* %X.addr
%tmp = load <4 x float>* %X.addr ; <<4 x float>> [#uses=1]
%tmp1 = load <4 x float>* %X.addr ; <<4 x float>> [#uses=1]
%add = fadd <4 x float> %tmp, %tmp1 ; <<4 x float>> [#uses=1]
ret <4 x float> %add
}
This implements rdar://8248065
llvm-svn: 109733
2010-07-29 14:26:06 +08:00
|
|
|
case ABIArgInfo::Ignore:
|
2008-09-10 10:41:04 +08:00
|
|
|
break;
|
|
|
|
|
CGCall: Factor out the logic mapping call arguments to LLVM IR arguments.
Summary:
This refactoring introduces ClangToLLVMArgMapping class, which
encapsulates the information about the order in which function arguments listed
in CGFunctionInfo should be passed to actual LLVM IR function, such as:
1) positions of sret, if there is any
2) position of inalloca argument, if there is any
3) position of helper padding argument for each call argument
4) positions of regular argument (there can be many if it's expanded).
Simplify several related methods (ConstructAttributeList, EmitFunctionProlog
and EmitCall): now they don't have to maintain iterators over the list
of LLVM IR function arguments, dealing with all the sret/inalloca/this complexities,
and just use expected positions of LLVM IR arguments stored in ClangToLLVMArgMapping.
This may increase the running time of EmitFunctionProlog, as we have to traverse
expandable arguments twice, but in further refactoring we will be able
to speed up EmitCall by passing already calculated CallArgsToIRArgsMapping to
ConstructAttributeList, thus avoiding traversing expandable argument there.
No functionality change.
Test Plan: regression test suite
Reviewers: majnemer, rnk
Reviewed By: rnk
Subscribers: cfe-commits, rjmccall, timurrrr
Differential Revision: http://reviews.llvm.org/D4938
llvm-svn: 216251
2014-08-22 09:06:06 +08:00
|
|
|
case ABIArgInfo::InAlloca:
|
2012-07-31 10:44:24 +08:00
|
|
|
case ABIArgInfo::Indirect: {
|
CGCall: Factor out the logic mapping call arguments to LLVM IR arguments.
Summary:
This refactoring introduces ClangToLLVMArgMapping class, which
encapsulates the information about the order in which function arguments listed
in CGFunctionInfo should be passed to actual LLVM IR function, such as:
1) positions of sret, if there is any
2) position of inalloca argument, if there is any
3) position of helper padding argument for each call argument
4) positions of regular argument (there can be many if it's expanded).
Simplify several related methods (ConstructAttributeList, EmitFunctionProlog
and EmitCall): now they don't have to maintain iterators over the list
of LLVM IR function arguments, dealing with all the sret/inalloca/this complexities,
and just use expected positions of LLVM IR arguments stored in ClangToLLVMArgMapping.
This may increase the running time of EmitFunctionProlog, as we have to traverse
expandable arguments twice, but in further refactoring we will be able
to speed up EmitCall by passing already calculated CallArgsToIRArgsMapping to
ConstructAttributeList, thus avoiding traversing expandable argument there.
No functionality change.
Test Plan: regression test suite
Reviewers: majnemer, rnk
Reviewed By: rnk
Subscribers: cfe-commits, rjmccall, timurrrr
Differential Revision: http://reviews.llvm.org/D4938
llvm-svn: 216251
2014-08-22 09:06:06 +08:00
|
|
|
// inalloca and sret disable readnone and readonly
|
2012-12-21 03:27:06 +08:00
|
|
|
FuncAttrs.removeAttribute(llvm::Attribute::ReadOnly)
|
|
|
|
.removeAttribute(llvm::Attribute::ReadNone);
|
2008-09-10 10:41:04 +08:00
|
|
|
break;
|
2012-07-31 10:44:24 +08:00
|
|
|
}
|
2008-09-10 10:41:04 +08:00
|
|
|
|
2008-09-11 09:48:57 +08:00
|
|
|
case ABIArgInfo::Expand:
|
2011-09-23 13:06:16 +08:00
|
|
|
llvm_unreachable("Invalid ABI kind for return argument");
|
2008-09-10 08:32:18 +08:00
|
|
|
}
|
2008-09-10 10:41:04 +08:00
|
|
|
|
2014-07-18 23:52:10 +08:00
|
|
|
if (const auto *RefTy = RetTy->getAs<ReferenceType>()) {
|
|
|
|
QualType PTy = RefTy->getPointeeType();
|
|
|
|
if (!PTy->isIncompleteType() && PTy->isConstantSizeType())
|
|
|
|
RetAttrs.addDereferenceableAttr(getContext().getTypeSizeInChars(PTy)
|
|
|
|
.getQuantity());
|
|
|
|
else if (getContext().getTargetAddressSpace(PTy) == 0)
|
|
|
|
RetAttrs.addAttribute(llvm::Attribute::NonNull);
|
|
|
|
}
|
2014-05-28 17:56:42 +08:00
|
|
|
|
CGCall: Factor out the logic mapping call arguments to LLVM IR arguments.
Summary:
This refactoring introduces ClangToLLVMArgMapping class, which
encapsulates the information about the order in which function arguments listed
in CGFunctionInfo should be passed to actual LLVM IR function, such as:
1) positions of sret, if there is any
2) position of inalloca argument, if there is any
3) position of helper padding argument for each call argument
4) positions of regular argument (there can be many if it's expanded).
Simplify several related methods (ConstructAttributeList, EmitFunctionProlog
and EmitCall): now they don't have to maintain iterators over the list
of LLVM IR function arguments, dealing with all the sret/inalloca/this complexities,
and just use expected positions of LLVM IR arguments stored in ClangToLLVMArgMapping.
This may increase the running time of EmitFunctionProlog, as we have to traverse
expandable arguments twice, but in further refactoring we will be able
to speed up EmitCall by passing already calculated CallArgsToIRArgsMapping to
ConstructAttributeList, thus avoiding traversing expandable argument there.
No functionality change.
Test Plan: regression test suite
Reviewers: majnemer, rnk
Reviewed By: rnk
Subscribers: cfe-commits, rjmccall, timurrrr
Differential Revision: http://reviews.llvm.org/D4938
llvm-svn: 216251
2014-08-22 09:06:06 +08:00
|
|
|
// Attach return attributes.
|
|
|
|
if (RetAttrs.hasAttributes()) {
|
|
|
|
PAL.push_back(llvm::AttributeSet::get(
|
|
|
|
getLLVMContext(), llvm::AttributeSet::ReturnIndex, RetAttrs));
|
|
|
|
}
|
2009-04-04 08:49:24 +08:00
|
|
|
|
CGCall: Factor out the logic mapping call arguments to LLVM IR arguments.
Summary:
This refactoring introduces ClangToLLVMArgMapping class, which
encapsulates the information about the order in which function arguments listed
in CGFunctionInfo should be passed to actual LLVM IR function, such as:
1) positions of sret, if there is any
2) position of inalloca argument, if there is any
3) position of helper padding argument for each call argument
4) positions of regular argument (there can be many if it's expanded).
Simplify several related methods (ConstructAttributeList, EmitFunctionProlog
and EmitCall): now they don't have to maintain iterators over the list
of LLVM IR function arguments, dealing with all the sret/inalloca/this complexities,
and just use expected positions of LLVM IR arguments stored in ClangToLLVMArgMapping.
This may increase the running time of EmitFunctionProlog, as we have to traverse
expandable arguments twice, but in further refactoring we will be able
to speed up EmitCall by passing already calculated CallArgsToIRArgsMapping to
ConstructAttributeList, thus avoiding traversing expandable argument there.
No functionality change.
Test Plan: regression test suite
Reviewers: majnemer, rnk
Reviewed By: rnk
Subscribers: cfe-commits, rjmccall, timurrrr
Differential Revision: http://reviews.llvm.org/D4938
llvm-svn: 216251
2014-08-22 09:06:06 +08:00
|
|
|
// Attach attributes to sret.
|
|
|
|
if (IRFunctionArgs.hasSRetArg()) {
|
|
|
|
llvm::AttrBuilder SRETAttrs;
|
|
|
|
SRETAttrs.addAttribute(llvm::Attribute::StructRet);
|
|
|
|
if (RetAI.getInReg())
|
|
|
|
SRETAttrs.addAttribute(llvm::Attribute::InReg);
|
|
|
|
PAL.push_back(llvm::AttributeSet::get(
|
|
|
|
getLLVMContext(), IRFunctionArgs.getSRetArgNo() + 1, SRETAttrs));
|
|
|
|
}
|
|
|
|
|
|
|
|
// Attach attributes to inalloca argument.
|
|
|
|
if (IRFunctionArgs.hasInallocaArg()) {
|
2012-10-16 04:36:26 +08:00
|
|
|
llvm::AttrBuilder Attrs;
|
CGCall: Factor out the logic mapping call arguments to LLVM IR arguments.
Summary:
This refactoring introduces ClangToLLVMArgMapping class, which
encapsulates the information about the order in which function arguments listed
in CGFunctionInfo should be passed to actual LLVM IR function, such as:
1) positions of sret, if there is any
2) position of inalloca argument, if there is any
3) position of helper padding argument for each call argument
4) positions of regular argument (there can be many if it's expanded).
Simplify several related methods (ConstructAttributeList, EmitFunctionProlog
and EmitCall): now they don't have to maintain iterators over the list
of LLVM IR function arguments, dealing with all the sret/inalloca/this complexities,
and just use expected positions of LLVM IR arguments stored in ClangToLLVMArgMapping.
This may increase the running time of EmitFunctionProlog, as we have to traverse
expandable arguments twice, but in further refactoring we will be able
to speed up EmitCall by passing already calculated CallArgsToIRArgsMapping to
ConstructAttributeList, thus avoiding traversing expandable argument there.
No functionality change.
Test Plan: regression test suite
Reviewers: majnemer, rnk
Reviewed By: rnk
Subscribers: cfe-commits, rjmccall, timurrrr
Differential Revision: http://reviews.llvm.org/D4938
llvm-svn: 216251
2014-08-22 09:06:06 +08:00
|
|
|
Attrs.addAttribute(llvm::Attribute::InAlloca);
|
|
|
|
PAL.push_back(llvm::AttributeSet::get(
|
|
|
|
getLLVMContext(), IRFunctionArgs.getInallocaArgNo() + 1, Attrs));
|
|
|
|
}
|
2009-04-04 08:49:24 +08:00
|
|
|
|
CGCall: Factor out the logic mapping call arguments to LLVM IR arguments.
Summary:
This refactoring introduces ClangToLLVMArgMapping class, which
encapsulates the information about the order in which function arguments listed
in CGFunctionInfo should be passed to actual LLVM IR function, such as:
1) positions of sret, if there is any
2) position of inalloca argument, if there is any
3) position of helper padding argument for each call argument
4) positions of regular argument (there can be many if it's expanded).
Simplify several related methods (ConstructAttributeList, EmitFunctionProlog
and EmitCall): now they don't have to maintain iterators over the list
of LLVM IR function arguments, dealing with all the sret/inalloca/this complexities,
and just use expected positions of LLVM IR arguments stored in ClangToLLVMArgMapping.
This may increase the running time of EmitFunctionProlog, as we have to traverse
expandable arguments twice, but in further refactoring we will be able
to speed up EmitCall by passing already calculated CallArgsToIRArgsMapping to
ConstructAttributeList, thus avoiding traversing expandable argument there.
No functionality change.
Test Plan: regression test suite
Reviewers: majnemer, rnk
Reviewed By: rnk
Subscribers: cfe-commits, rjmccall, timurrrr
Differential Revision: http://reviews.llvm.org/D4938
llvm-svn: 216251
2014-08-22 09:06:06 +08:00
|
|
|
unsigned ArgNo = 0;
|
|
|
|
for (CGFunctionInfo::const_arg_iterator I = FI.arg_begin(),
|
|
|
|
E = FI.arg_end();
|
|
|
|
I != E; ++I, ++ArgNo) {
|
|
|
|
QualType ParamType = I->type;
|
|
|
|
const ABIArgInfo &AI = I->info;
|
|
|
|
llvm::AttrBuilder Attrs;
|
|
|
|
|
|
|
|
// Add attribute for padding argument, if necessary.
|
|
|
|
if (IRFunctionArgs.hasPaddingArg(ArgNo)) {
|
2013-01-27 10:46:53 +08:00
|
|
|
if (AI.getPaddingInReg())
|
CGCall: Factor out the logic mapping call arguments to LLVM IR arguments.
Summary:
This refactoring introduces ClangToLLVMArgMapping class, which
encapsulates the information about the order in which function arguments listed
in CGFunctionInfo should be passed to actual LLVM IR function, such as:
1) positions of sret, if there is any
2) position of inalloca argument, if there is any
3) position of helper padding argument for each call argument
4) positions of regular argument (there can be many if it's expanded).
Simplify several related methods (ConstructAttributeList, EmitFunctionProlog
and EmitCall): now they don't have to maintain iterators over the list
of LLVM IR function arguments, dealing with all the sret/inalloca/this complexities,
and just use expected positions of LLVM IR arguments stored in ClangToLLVMArgMapping.
This may increase the running time of EmitFunctionProlog, as we have to traverse
expandable arguments twice, but in further refactoring we will be able
to speed up EmitCall by passing already calculated CallArgsToIRArgsMapping to
ConstructAttributeList, thus avoiding traversing expandable argument there.
No functionality change.
Test Plan: regression test suite
Reviewers: majnemer, rnk
Reviewed By: rnk
Subscribers: cfe-commits, rjmccall, timurrrr
Differential Revision: http://reviews.llvm.org/D4938
llvm-svn: 216251
2014-08-22 09:06:06 +08:00
|
|
|
PAL.push_back(llvm::AttributeSet::get(
|
|
|
|
getLLVMContext(), IRFunctionArgs.getPaddingArgNo(ArgNo) + 1,
|
|
|
|
llvm::Attribute::InReg));
|
2012-10-24 09:59:00 +08:00
|
|
|
}
|
|
|
|
|
2010-03-27 08:47:27 +08:00
|
|
|
// 'restrict' -> 'noalias' is done in EmitFunctionProlog when we
|
|
|
|
// have the corresponding parameter variable. It doesn't make
|
2011-02-11 02:10:07 +08:00
|
|
|
// sense to do it here because parameters are so messed up.
|
2008-09-11 09:48:57 +08:00
|
|
|
switch (AI.getKind()) {
|
2009-06-06 17:36:29 +08:00
|
|
|
case ABIArgInfo::Extend:
|
2011-05-21 00:38:50 +08:00
|
|
|
if (ParamType->isSignedIntegerOrEnumerationType())
|
2012-12-21 03:27:06 +08:00
|
|
|
Attrs.addAttribute(llvm::Attribute::SExt);
|
2011-05-21 00:38:50 +08:00
|
|
|
else if (ParamType->isUnsignedIntegerOrEnumerationType())
|
2012-12-21 03:27:06 +08:00
|
|
|
Attrs.addAttribute(llvm::Attribute::ZExt);
|
Kill off the 'coerce' ABI passing form. Now 'direct' and 'extend' always
have a "coerce to" type which often matches the default lowering of Clang
type to LLVM IR type, but the coerce case can be handled by making them
not be the same.
This simplifies things and fixes issues where X86-64 abi lowering would
return coerce after making preferred types exactly match up. This caused
us to compile:
typedef float v4f32 __attribute__((__vector_size__(16)));
v4f32 foo(v4f32 X) {
return X+X;
}
into this code at -O0:
define <4 x float> @foo(<4 x float> %X.coerce) nounwind {
entry:
%retval = alloca <4 x float>, align 16 ; <<4 x float>*> [#uses=2]
%coerce = alloca <4 x float>, align 16 ; <<4 x float>*> [#uses=2]
%X.addr = alloca <4 x float>, align 16 ; <<4 x float>*> [#uses=3]
store <4 x float> %X.coerce, <4 x float>* %coerce
%X = load <4 x float>* %coerce ; <<4 x float>> [#uses=1]
store <4 x float> %X, <4 x float>* %X.addr
%tmp = load <4 x float>* %X.addr ; <<4 x float>> [#uses=1]
%tmp1 = load <4 x float>* %X.addr ; <<4 x float>> [#uses=1]
%add = fadd <4 x float> %tmp, %tmp1 ; <<4 x float>> [#uses=1]
store <4 x float> %add, <4 x float>* %retval
%0 = load <4 x float>* %retval ; <<4 x float>> [#uses=1]
ret <4 x float> %0
}
Now we get:
define <4 x float> @foo(<4 x float> %X) nounwind {
entry:
%X.addr = alloca <4 x float>, align 16 ; <<4 x float>*> [#uses=3]
store <4 x float> %X, <4 x float>* %X.addr
%tmp = load <4 x float>* %X.addr ; <<4 x float>> [#uses=1]
%tmp1 = load <4 x float>* %X.addr ; <<4 x float>> [#uses=1]
%add = fadd <4 x float> %tmp, %tmp1 ; <<4 x float>> [#uses=1]
ret <4 x float> %add
}
This implements rdar://8248065
llvm-svn: 109733
2010-07-29 14:26:06 +08:00
|
|
|
// FALL THROUGH
|
CGCall: Factor out the logic mapping call arguments to LLVM IR arguments.
Summary:
This refactoring introduces ClangToLLVMArgMapping class, which
encapsulates the information about the order in which function arguments listed
in CGFunctionInfo should be passed to actual LLVM IR function, such as:
1) positions of sret, if there is any
2) position of inalloca argument, if there is any
3) position of helper padding argument for each call argument
4) positions of regular argument (there can be many if it's expanded).
Simplify several related methods (ConstructAttributeList, EmitFunctionProlog
and EmitCall): now they don't have to maintain iterators over the list
of LLVM IR function arguments, dealing with all the sret/inalloca/this complexities,
and just use expected positions of LLVM IR arguments stored in ClangToLLVMArgMapping.
This may increase the running time of EmitFunctionProlog, as we have to traverse
expandable arguments twice, but in further refactoring we will be able
to speed up EmitCall by passing already calculated CallArgsToIRArgsMapping to
ConstructAttributeList, thus avoiding traversing expandable argument there.
No functionality change.
Test Plan: regression test suite
Reviewers: majnemer, rnk
Reviewed By: rnk
Subscribers: cfe-commits, rjmccall, timurrrr
Differential Revision: http://reviews.llvm.org/D4938
llvm-svn: 216251
2014-08-22 09:06:06 +08:00
|
|
|
case ABIArgInfo::Direct:
|
2014-12-13 07:41:25 +08:00
|
|
|
if (ArgNo == 0 && FI.isChainCall())
|
|
|
|
Attrs.addAttribute(llvm::Attribute::Nest);
|
|
|
|
else if (AI.getInReg())
|
2012-12-21 03:27:06 +08:00
|
|
|
Attrs.addAttribute(llvm::Attribute::InReg);
|
Kill off the 'coerce' ABI passing form. Now 'direct' and 'extend' always
have a "coerce to" type which often matches the default lowering of Clang
type to LLVM IR type, but the coerce case can be handled by making them
not be the same.
This simplifies things and fixes issues where X86-64 abi lowering would
return coerce after making preferred types exactly match up. This caused
us to compile:
typedef float v4f32 __attribute__((__vector_size__(16)));
v4f32 foo(v4f32 X) {
return X+X;
}
into this code at -O0:
define <4 x float> @foo(<4 x float> %X.coerce) nounwind {
entry:
%retval = alloca <4 x float>, align 16 ; <<4 x float>*> [#uses=2]
%coerce = alloca <4 x float>, align 16 ; <<4 x float>*> [#uses=2]
%X.addr = alloca <4 x float>, align 16 ; <<4 x float>*> [#uses=3]
store <4 x float> %X.coerce, <4 x float>* %coerce
%X = load <4 x float>* %coerce ; <<4 x float>> [#uses=1]
store <4 x float> %X, <4 x float>* %X.addr
%tmp = load <4 x float>* %X.addr ; <<4 x float>> [#uses=1]
%tmp1 = load <4 x float>* %X.addr ; <<4 x float>> [#uses=1]
%add = fadd <4 x float> %tmp, %tmp1 ; <<4 x float>> [#uses=1]
store <4 x float> %add, <4 x float>* %retval
%0 = load <4 x float>* %retval ; <<4 x float>> [#uses=1]
ret <4 x float> %0
}
Now we get:
define <4 x float> @foo(<4 x float> %X) nounwind {
entry:
%X.addr = alloca <4 x float>, align 16 ; <<4 x float>*> [#uses=3]
store <4 x float> %X, <4 x float>* %X.addr
%tmp = load <4 x float>* %X.addr ; <<4 x float>> [#uses=1]
%tmp1 = load <4 x float>* %X.addr ; <<4 x float>> [#uses=1]
%add = fadd <4 x float> %tmp, %tmp1 ; <<4 x float>> [#uses=1]
ret <4 x float> %add
}
This implements rdar://8248065
llvm-svn: 109733
2010-07-29 14:26:06 +08:00
|
|
|
break;
|
CGCall: Factor out the logic mapping call arguments to LLVM IR arguments.
Summary:
This refactoring introduces ClangToLLVMArgMapping class, which
encapsulates the information about the order in which function arguments listed
in CGFunctionInfo should be passed to actual LLVM IR function, such as:
1) positions of sret, if there is any
2) position of inalloca argument, if there is any
3) position of helper padding argument for each call argument
4) positions of regular argument (there can be many if it's expanded).
Simplify several related methods (ConstructAttributeList, EmitFunctionProlog
and EmitCall): now they don't have to maintain iterators over the list
of LLVM IR function arguments, dealing with all the sret/inalloca/this complexities,
and just use expected positions of LLVM IR arguments stored in ClangToLLVMArgMapping.
This may increase the running time of EmitFunctionProlog, as we have to traverse
expandable arguments twice, but in further refactoring we will be able
to speed up EmitCall by passing already calculated CallArgsToIRArgsMapping to
ConstructAttributeList, thus avoiding traversing expandable argument there.
No functionality change.
Test Plan: regression test suite
Reviewers: majnemer, rnk
Reviewed By: rnk
Subscribers: cfe-commits, rjmccall, timurrrr
Differential Revision: http://reviews.llvm.org/D4938
llvm-svn: 216251
2014-08-22 09:06:06 +08:00
|
|
|
|
Kill off the 'coerce' ABI passing form. Now 'direct' and 'extend' always
have a "coerce to" type which often matches the default lowering of Clang
type to LLVM IR type, but the coerce case can be handled by making them
not be the same.
This simplifies things and fixes issues where X86-64 abi lowering would
return coerce after making preferred types exactly match up. This caused
us to compile:
typedef float v4f32 __attribute__((__vector_size__(16)));
v4f32 foo(v4f32 X) {
return X+X;
}
into this code at -O0:
define <4 x float> @foo(<4 x float> %X.coerce) nounwind {
entry:
%retval = alloca <4 x float>, align 16 ; <<4 x float>*> [#uses=2]
%coerce = alloca <4 x float>, align 16 ; <<4 x float>*> [#uses=2]
%X.addr = alloca <4 x float>, align 16 ; <<4 x float>*> [#uses=3]
store <4 x float> %X.coerce, <4 x float>* %coerce
%X = load <4 x float>* %coerce ; <<4 x float>> [#uses=1]
store <4 x float> %X, <4 x float>* %X.addr
%tmp = load <4 x float>* %X.addr ; <<4 x float>> [#uses=1]
%tmp1 = load <4 x float>* %X.addr ; <<4 x float>> [#uses=1]
%add = fadd <4 x float> %tmp, %tmp1 ; <<4 x float>> [#uses=1]
store <4 x float> %add, <4 x float>* %retval
%0 = load <4 x float>* %retval ; <<4 x float>> [#uses=1]
ret <4 x float> %0
}
Now we get:
define <4 x float> @foo(<4 x float> %X) nounwind {
entry:
%X.addr = alloca <4 x float>, align 16 ; <<4 x float>*> [#uses=3]
store <4 x float> %X, <4 x float>* %X.addr
%tmp = load <4 x float>* %X.addr ; <<4 x float>> [#uses=1]
%tmp1 = load <4 x float>* %X.addr ; <<4 x float>> [#uses=1]
%add = fadd <4 x float> %tmp, %tmp1 ; <<4 x float>> [#uses=1]
ret <4 x float> %add
}
This implements rdar://8248065
llvm-svn: 109733
2010-07-29 14:26:06 +08:00
|
|
|
case ABIArgInfo::Indirect:
|
2012-10-19 13:04:37 +08:00
|
|
|
if (AI.getInReg())
|
2012-12-21 03:27:06 +08:00
|
|
|
Attrs.addAttribute(llvm::Attribute::InReg);
|
2012-10-19 13:04:37 +08:00
|
|
|
|
Kill off the 'coerce' ABI passing form. Now 'direct' and 'extend' always
have a "coerce to" type which often matches the default lowering of Clang
type to LLVM IR type, but the coerce case can be handled by making them
not be the same.
This simplifies things and fixes issues where X86-64 abi lowering would
return coerce after making preferred types exactly match up. This caused
us to compile:
typedef float v4f32 __attribute__((__vector_size__(16)));
v4f32 foo(v4f32 X) {
return X+X;
}
into this code at -O0:
define <4 x float> @foo(<4 x float> %X.coerce) nounwind {
entry:
%retval = alloca <4 x float>, align 16 ; <<4 x float>*> [#uses=2]
%coerce = alloca <4 x float>, align 16 ; <<4 x float>*> [#uses=2]
%X.addr = alloca <4 x float>, align 16 ; <<4 x float>*> [#uses=3]
store <4 x float> %X.coerce, <4 x float>* %coerce
%X = load <4 x float>* %coerce ; <<4 x float>> [#uses=1]
store <4 x float> %X, <4 x float>* %X.addr
%tmp = load <4 x float>* %X.addr ; <<4 x float>> [#uses=1]
%tmp1 = load <4 x float>* %X.addr ; <<4 x float>> [#uses=1]
%add = fadd <4 x float> %tmp, %tmp1 ; <<4 x float>> [#uses=1]
store <4 x float> %add, <4 x float>* %retval
%0 = load <4 x float>* %retval ; <<4 x float>> [#uses=1]
ret <4 x float> %0
}
Now we get:
define <4 x float> @foo(<4 x float> %X) nounwind {
entry:
%X.addr = alloca <4 x float>, align 16 ; <<4 x float>*> [#uses=3]
store <4 x float> %X, <4 x float>* %X.addr
%tmp = load <4 x float>* %X.addr ; <<4 x float>> [#uses=1]
%tmp1 = load <4 x float>* %X.addr ; <<4 x float>> [#uses=1]
%add = fadd <4 x float> %tmp, %tmp1 ; <<4 x float>> [#uses=1]
ret <4 x float> %add
}
This implements rdar://8248065
llvm-svn: 109733
2010-07-29 14:26:06 +08:00
|
|
|
if (AI.getIndirectByVal())
|
2012-12-21 03:27:06 +08:00
|
|
|
Attrs.addAttribute(llvm::Attribute::ByVal);
|
2012-10-10 15:36:56 +08:00
|
|
|
|
|
|
|
Attrs.addAlignmentAttr(AI.getIndirectAlign());
|
Kill off the 'coerce' ABI passing form. Now 'direct' and 'extend' always
have a "coerce to" type which often matches the default lowering of Clang
type to LLVM IR type, but the coerce case can be handled by making them
not be the same.
This simplifies things and fixes issues where X86-64 abi lowering would
return coerce after making preferred types exactly match up. This caused
us to compile:
typedef float v4f32 __attribute__((__vector_size__(16)));
v4f32 foo(v4f32 X) {
return X+X;
}
into this code at -O0:
define <4 x float> @foo(<4 x float> %X.coerce) nounwind {
entry:
%retval = alloca <4 x float>, align 16 ; <<4 x float>*> [#uses=2]
%coerce = alloca <4 x float>, align 16 ; <<4 x float>*> [#uses=2]
%X.addr = alloca <4 x float>, align 16 ; <<4 x float>*> [#uses=3]
store <4 x float> %X.coerce, <4 x float>* %coerce
%X = load <4 x float>* %coerce ; <<4 x float>> [#uses=1]
store <4 x float> %X, <4 x float>* %X.addr
%tmp = load <4 x float>* %X.addr ; <<4 x float>> [#uses=1]
%tmp1 = load <4 x float>* %X.addr ; <<4 x float>> [#uses=1]
%add = fadd <4 x float> %tmp, %tmp1 ; <<4 x float>> [#uses=1]
store <4 x float> %add, <4 x float>* %retval
%0 = load <4 x float>* %retval ; <<4 x float>> [#uses=1]
ret <4 x float> %0
}
Now we get:
define <4 x float> @foo(<4 x float> %X) nounwind {
entry:
%X.addr = alloca <4 x float>, align 16 ; <<4 x float>*> [#uses=3]
store <4 x float> %X, <4 x float>* %X.addr
%tmp = load <4 x float>* %X.addr ; <<4 x float>> [#uses=1]
%tmp1 = load <4 x float>* %X.addr ; <<4 x float>> [#uses=1]
%add = fadd <4 x float> %tmp, %tmp1 ; <<4 x float>> [#uses=1]
ret <4 x float> %add
}
This implements rdar://8248065
llvm-svn: 109733
2010-07-29 14:26:06 +08:00
|
|
|
|
|
|
|
// byval disables readnone and readonly.
|
2012-12-21 03:27:06 +08:00
|
|
|
FuncAttrs.removeAttribute(llvm::Attribute::ReadOnly)
|
|
|
|
.removeAttribute(llvm::Attribute::ReadNone);
|
2008-09-11 09:48:57 +08:00
|
|
|
break;
|
2009-04-04 08:49:24 +08:00
|
|
|
|
2009-01-27 05:26:08 +08:00
|
|
|
case ABIArgInfo::Ignore:
|
CGCall: Factor out the logic mapping call arguments to LLVM IR arguments.
Summary:
This refactoring introduces ClangToLLVMArgMapping class, which
encapsulates the information about the order in which function arguments listed
in CGFunctionInfo should be passed to actual LLVM IR function, such as:
1) positions of sret, if there is any
2) position of inalloca argument, if there is any
3) position of helper padding argument for each call argument
4) positions of regular argument (there can be many if it's expanded).
Simplify several related methods (ConstructAttributeList, EmitFunctionProlog
and EmitCall): now they don't have to maintain iterators over the list
of LLVM IR function arguments, dealing with all the sret/inalloca/this complexities,
and just use expected positions of LLVM IR arguments stored in ClangToLLVMArgMapping.
This may increase the running time of EmitFunctionProlog, as we have to traverse
expandable arguments twice, but in further refactoring we will be able
to speed up EmitCall by passing already calculated CallArgsToIRArgsMapping to
ConstructAttributeList, thus avoiding traversing expandable argument there.
No functionality change.
Test Plan: regression test suite
Reviewers: majnemer, rnk
Reviewed By: rnk
Subscribers: cfe-commits, rjmccall, timurrrr
Differential Revision: http://reviews.llvm.org/D4938
llvm-svn: 216251
2014-08-22 09:06:06 +08:00
|
|
|
case ABIArgInfo::Expand:
|
2009-09-09 23:08:12 +08:00
|
|
|
continue;
|
2009-01-27 05:26:08 +08:00
|
|
|
|
2014-02-01 08:04:45 +08:00
|
|
|
case ABIArgInfo::InAlloca:
|
|
|
|
// inalloca disables readnone and readonly.
|
|
|
|
FuncAttrs.removeAttribute(llvm::Attribute::ReadOnly)
|
|
|
|
.removeAttribute(llvm::Attribute::ReadNone);
|
|
|
|
continue;
|
2008-09-10 08:32:18 +08:00
|
|
|
}
|
2009-09-09 23:08:12 +08:00
|
|
|
|
2014-07-18 23:52:10 +08:00
|
|
|
if (const auto *RefTy = ParamType->getAs<ReferenceType>()) {
|
|
|
|
QualType PTy = RefTy->getPointeeType();
|
|
|
|
if (!PTy->isIncompleteType() && PTy->isConstantSizeType())
|
|
|
|
Attrs.addDereferenceableAttr(getContext().getTypeSizeInChars(PTy)
|
|
|
|
.getQuantity());
|
|
|
|
else if (getContext().getTargetAddressSpace(PTy) == 0)
|
|
|
|
Attrs.addAttribute(llvm::Attribute::NonNull);
|
|
|
|
}
|
2014-05-28 17:56:42 +08:00
|
|
|
|
CGCall: Factor out the logic mapping call arguments to LLVM IR arguments.
Summary:
This refactoring introduces ClangToLLVMArgMapping class, which
encapsulates the information about the order in which function arguments listed
in CGFunctionInfo should be passed to actual LLVM IR function, such as:
1) positions of sret, if there is any
2) position of inalloca argument, if there is any
3) position of helper padding argument for each call argument
4) positions of regular argument (there can be many if it's expanded).
Simplify several related methods (ConstructAttributeList, EmitFunctionProlog
and EmitCall): now they don't have to maintain iterators over the list
of LLVM IR function arguments, dealing with all the sret/inalloca/this complexities,
and just use expected positions of LLVM IR arguments stored in ClangToLLVMArgMapping.
This may increase the running time of EmitFunctionProlog, as we have to traverse
expandable arguments twice, but in further refactoring we will be able
to speed up EmitCall by passing already calculated CallArgsToIRArgsMapping to
ConstructAttributeList, thus avoiding traversing expandable argument there.
No functionality change.
Test Plan: regression test suite
Reviewers: majnemer, rnk
Reviewed By: rnk
Subscribers: cfe-commits, rjmccall, timurrrr
Differential Revision: http://reviews.llvm.org/D4938
llvm-svn: 216251
2014-08-22 09:06:06 +08:00
|
|
|
if (Attrs.hasAttributes()) {
|
|
|
|
unsigned FirstIRArg, NumIRArgs;
|
|
|
|
std::tie(FirstIRArg, NumIRArgs) = IRFunctionArgs.getIRArgs(ArgNo);
|
|
|
|
for (unsigned i = 0; i < NumIRArgs; i++)
|
|
|
|
PAL.push_back(llvm::AttributeSet::get(getLLVMContext(),
|
|
|
|
FirstIRArg + i + 1, Attrs));
|
|
|
|
}
|
2014-02-01 08:04:45 +08:00
|
|
|
}
|
CGCall: Factor out the logic mapping call arguments to LLVM IR arguments.
Summary:
This refactoring introduces ClangToLLVMArgMapping class, which
encapsulates the information about the order in which function arguments listed
in CGFunctionInfo should be passed to actual LLVM IR function, such as:
1) positions of sret, if there is any
2) position of inalloca argument, if there is any
3) position of helper padding argument for each call argument
4) positions of regular argument (there can be many if it's expanded).
Simplify several related methods (ConstructAttributeList, EmitFunctionProlog
and EmitCall): now they don't have to maintain iterators over the list
of LLVM IR function arguments, dealing with all the sret/inalloca/this complexities,
and just use expected positions of LLVM IR arguments stored in ClangToLLVMArgMapping.
This may increase the running time of EmitFunctionProlog, as we have to traverse
expandable arguments twice, but in further refactoring we will be able
to speed up EmitCall by passing already calculated CallArgsToIRArgsMapping to
ConstructAttributeList, thus avoiding traversing expandable argument there.
No functionality change.
Test Plan: regression test suite
Reviewers: majnemer, rnk
Reviewed By: rnk
Subscribers: cfe-commits, rjmccall, timurrrr
Differential Revision: http://reviews.llvm.org/D4938
llvm-svn: 216251
2014-08-22 09:06:06 +08:00
|
|
|
assert(ArgNo == FI.arg_size());
|
2014-02-01 08:04:45 +08:00
|
|
|
|
2012-10-10 15:36:56 +08:00
|
|
|
if (FuncAttrs.hasAttributes())
|
2012-10-15 15:31:59 +08:00
|
|
|
PAL.push_back(llvm::
|
2013-01-27 10:46:53 +08:00
|
|
|
AttributeSet::get(getLLVMContext(),
|
|
|
|
llvm::AttributeSet::FunctionIndex,
|
|
|
|
FuncAttrs));
|
2008-09-10 08:32:18 +08:00
|
|
|
}
|
|
|
|
|
2011-03-09 12:27:21 +08:00
|
|
|
/// An argument came in as a promoted argument; demote it back to its
|
|
|
|
/// declared type.
|
|
|
|
static llvm::Value *emitArgumentDemotion(CodeGenFunction &CGF,
|
|
|
|
const VarDecl *var,
|
|
|
|
llvm::Value *value) {
|
2011-07-18 12:24:23 +08:00
|
|
|
llvm::Type *varType = CGF.ConvertType(var->getType());
|
2011-03-09 12:27:21 +08:00
|
|
|
|
|
|
|
// This can happen with promotions that actually don't change the
|
|
|
|
// underlying type, like the enum promotions.
|
|
|
|
if (value->getType() == varType) return value;
|
|
|
|
|
|
|
|
assert((varType->isIntegerTy() || varType->isFloatingPointTy())
|
|
|
|
&& "unexpected promotion type");
|
|
|
|
|
|
|
|
if (isa<llvm::IntegerType>(varType))
|
|
|
|
return CGF.Builder.CreateTrunc(value, varType, "arg.unpromote");
|
|
|
|
|
|
|
|
return CGF.Builder.CreateFPCast(value, varType, "arg.unpromote");
|
|
|
|
}
|
|
|
|
|
2014-09-09 01:22:45 +08:00
|
|
|
/// Returns the attribute (either parameter attribute, or function
|
|
|
|
/// attribute), which declares argument ArgNo to be non-null.
|
|
|
|
static const NonNullAttr *getNonNullAttr(const Decl *FD, const ParmVarDecl *PVD,
|
|
|
|
QualType ArgType, unsigned ArgNo) {
|
2014-08-28 08:53:20 +08:00
|
|
|
// FIXME: __attribute__((nonnull)) can also be applied to:
|
|
|
|
// - references to pointers, where the pointee is known to be
|
|
|
|
// nonnull (apparently a Clang extension)
|
|
|
|
// - transparent unions containing pointers
|
|
|
|
// In the former case, LLVM IR cannot represent the constraint. In
|
|
|
|
// the latter case, we have no guarantee that the transparent union
|
|
|
|
// is in fact passed as a pointer.
|
2014-09-09 01:22:45 +08:00
|
|
|
if (!ArgType->isAnyPointerType() && !ArgType->isBlockPointerType())
|
|
|
|
return nullptr;
|
2014-08-28 08:53:20 +08:00
|
|
|
// First, check attribute on parameter itself.
|
2014-09-09 01:22:45 +08:00
|
|
|
if (PVD) {
|
|
|
|
if (auto ParmNNAttr = PVD->getAttr<NonNullAttr>())
|
|
|
|
return ParmNNAttr;
|
|
|
|
}
|
2014-08-28 08:53:20 +08:00
|
|
|
// Check function attributes.
|
|
|
|
if (!FD)
|
2014-09-09 01:22:45 +08:00
|
|
|
return nullptr;
|
2014-08-28 08:53:20 +08:00
|
|
|
for (const auto *NNAttr : FD->specific_attrs<NonNullAttr>()) {
|
2014-09-09 01:22:45 +08:00
|
|
|
if (NNAttr->isNonNull(ArgNo))
|
|
|
|
return NNAttr;
|
2014-08-28 08:53:20 +08:00
|
|
|
}
|
2014-09-09 01:22:45 +08:00
|
|
|
return nullptr;
|
2014-08-28 08:53:20 +08:00
|
|
|
}
|
|
|
|
|
2009-02-03 06:03:45 +08:00
|
|
|
void CodeGenFunction::EmitFunctionProlog(const CGFunctionInfo &FI,
|
|
|
|
llvm::Function *Fn,
|
2008-09-10 07:27:19 +08:00
|
|
|
const FunctionArgList &Args) {
|
2014-09-05 06:16:33 +08:00
|
|
|
if (CurCodeDecl && CurCodeDecl->hasAttr<NakedAttr>())
|
|
|
|
// Naked functions don't have prologues.
|
|
|
|
return;
|
|
|
|
|
2009-07-28 09:00:58 +08:00
|
|
|
// If this is an implicit-return-zero function, go ahead and
|
|
|
|
// initialize the return value. TODO: it might be nice to have
|
|
|
|
// a more general mechanism for this that didn't require synthesized
|
|
|
|
// return statements.
|
2013-05-03 15:33:41 +08:00
|
|
|
if (const FunctionDecl *FD = dyn_cast_or_null<FunctionDecl>(CurCodeDecl)) {
|
2009-07-28 09:00:58 +08:00
|
|
|
if (FD->hasImplicitReturnZero()) {
|
2014-01-26 00:55:45 +08:00
|
|
|
QualType RetTy = FD->getReturnType().getUnqualifiedType();
|
2011-07-18 12:24:23 +08:00
|
|
|
llvm::Type* LLVMTy = CGM.getTypes().ConvertType(RetTy);
|
2009-08-01 04:28:54 +08:00
|
|
|
llvm::Constant* Zero = llvm::Constant::getNullValue(LLVMTy);
|
2009-07-28 09:00:58 +08:00
|
|
|
Builder.CreateStore(Zero, ReturnValue);
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2009-05-16 15:57:57 +08:00
|
|
|
// FIXME: We no longer need the types from FunctionArgList; lift up and
|
|
|
|
// simplify.
|
2009-02-03 14:02:10 +08:00
|
|
|
|
2014-09-30 06:08:00 +08:00
|
|
|
ClangToLLVMArgMapping IRFunctionArgs(CGM.getContext(), FI);
|
CGCall: Factor out the logic mapping call arguments to LLVM IR arguments.
Summary:
This refactoring introduces ClangToLLVMArgMapping class, which
encapsulates the information about the order in which function arguments listed
in CGFunctionInfo should be passed to actual LLVM IR function, such as:
1) positions of sret, if there is any
2) position of inalloca argument, if there is any
3) position of helper padding argument for each call argument
4) positions of regular argument (there can be many if it's expanded).
Simplify several related methods (ConstructAttributeList, EmitFunctionProlog
and EmitCall): now they don't have to maintain iterators over the list
of LLVM IR function arguments, dealing with all the sret/inalloca/this complexities,
and just use expected positions of LLVM IR arguments stored in ClangToLLVMArgMapping.
This may increase the running time of EmitFunctionProlog, as we have to traverse
expandable arguments twice, but in further refactoring we will be able
to speed up EmitCall by passing already calculated CallArgsToIRArgsMapping to
ConstructAttributeList, thus avoiding traversing expandable argument there.
No functionality change.
Test Plan: regression test suite
Reviewers: majnemer, rnk
Reviewed By: rnk
Subscribers: cfe-commits, rjmccall, timurrrr
Differential Revision: http://reviews.llvm.org/D4938
llvm-svn: 216251
2014-08-22 09:06:06 +08:00
|
|
|
// Flattened function arguments.
|
|
|
|
SmallVector<llvm::Argument *, 16> FnArgs;
|
|
|
|
FnArgs.reserve(IRFunctionArgs.totalIRArgs());
|
|
|
|
for (auto &Arg : Fn->args()) {
|
|
|
|
FnArgs.push_back(&Arg);
|
|
|
|
}
|
|
|
|
assert(FnArgs.size() == IRFunctionArgs.totalIRArgs());
|
2009-09-09 23:08:12 +08:00
|
|
|
|
2014-02-01 08:04:45 +08:00
|
|
|
// If we're using inalloca, all the memory arguments are GEPs off of the last
|
|
|
|
// parameter, which is a pointer to the complete memory area.
|
2014-05-21 13:09:00 +08:00
|
|
|
llvm::Value *ArgStruct = nullptr;
|
CGCall: Factor out the logic mapping call arguments to LLVM IR arguments.
Summary:
This refactoring introduces ClangToLLVMArgMapping class, which
encapsulates the information about the order in which function arguments listed
in CGFunctionInfo should be passed to actual LLVM IR function, such as:
1) positions of sret, if there is any
2) position of inalloca argument, if there is any
3) position of helper padding argument for each call argument
4) positions of regular argument (there can be many if it's expanded).
Simplify several related methods (ConstructAttributeList, EmitFunctionProlog
and EmitCall): now they don't have to maintain iterators over the list
of LLVM IR function arguments, dealing with all the sret/inalloca/this complexities,
and just use expected positions of LLVM IR arguments stored in ClangToLLVMArgMapping.
This may increase the running time of EmitFunctionProlog, as we have to traverse
expandable arguments twice, but in further refactoring we will be able
to speed up EmitCall by passing already calculated CallArgsToIRArgsMapping to
ConstructAttributeList, thus avoiding traversing expandable argument there.
No functionality change.
Test Plan: regression test suite
Reviewers: majnemer, rnk
Reviewed By: rnk
Subscribers: cfe-commits, rjmccall, timurrrr
Differential Revision: http://reviews.llvm.org/D4938
llvm-svn: 216251
2014-08-22 09:06:06 +08:00
|
|
|
if (IRFunctionArgs.hasInallocaArg()) {
|
|
|
|
ArgStruct = FnArgs[IRFunctionArgs.getInallocaArgNo()];
|
2014-02-01 08:04:45 +08:00
|
|
|
assert(ArgStruct->getType() == FI.getArgStruct()->getPointerTo());
|
|
|
|
}
|
|
|
|
|
CGCall: Factor out the logic mapping call arguments to LLVM IR arguments.
Summary:
This refactoring introduces ClangToLLVMArgMapping class, which
encapsulates the information about the order in which function arguments listed
in CGFunctionInfo should be passed to actual LLVM IR function, such as:
1) positions of sret, if there is any
2) position of inalloca argument, if there is any
3) position of helper padding argument for each call argument
4) positions of regular argument (there can be many if it's expanded).
Simplify several related methods (ConstructAttributeList, EmitFunctionProlog
and EmitCall): now they don't have to maintain iterators over the list
of LLVM IR function arguments, dealing with all the sret/inalloca/this complexities,
and just use expected positions of LLVM IR arguments stored in ClangToLLVMArgMapping.
This may increase the running time of EmitFunctionProlog, as we have to traverse
expandable arguments twice, but in further refactoring we will be able
to speed up EmitCall by passing already calculated CallArgsToIRArgsMapping to
ConstructAttributeList, thus avoiding traversing expandable argument there.
No functionality change.
Test Plan: regression test suite
Reviewers: majnemer, rnk
Reviewed By: rnk
Subscribers: cfe-commits, rjmccall, timurrrr
Differential Revision: http://reviews.llvm.org/D4938
llvm-svn: 216251
2014-08-22 09:06:06 +08:00
|
|
|
// Name the struct return parameter.
|
|
|
|
if (IRFunctionArgs.hasSRetArg()) {
|
|
|
|
auto AI = FnArgs[IRFunctionArgs.getSRetArgNo()];
|
2008-09-10 07:27:19 +08:00
|
|
|
AI->setName("agg.result");
|
2014-05-10 06:46:15 +08:00
|
|
|
AI->addAttr(llvm::AttributeSet::get(getLLVMContext(), AI->getArgNo() + 1,
|
2013-01-23 14:15:10 +08:00
|
|
|
llvm::Attribute::NoAlias));
|
2008-09-10 07:27:19 +08:00
|
|
|
}
|
2009-09-09 23:08:12 +08:00
|
|
|
|
2014-02-01 08:04:45 +08:00
|
|
|
// Track if we received the parameter as a pointer (indirect, byval, or
|
|
|
|
// inalloca). If already have a pointer, EmitParmDecl doesn't need to copy it
|
|
|
|
// into a local alloca for us.
|
|
|
|
enum ValOrPointer { HaveValue = 0, HavePointer = 1 };
|
2014-02-01 08:23:22 +08:00
|
|
|
typedef llvm::PointerIntPair<llvm::Value *, 1> ValueAndIsPtr;
|
2014-02-01 08:04:45 +08:00
|
|
|
SmallVector<ValueAndIsPtr, 16> ArgVals;
|
|
|
|
ArgVals.reserve(Args.size());
|
|
|
|
|
2013-12-05 03:23:12 +08:00
|
|
|
// Create a pointer value for every parameter declaration. This usually
|
|
|
|
// entails copying one or more LLVM IR arguments into an alloca. Don't push
|
|
|
|
// any cleanups or do anything that might unwind. We do that separately, so
|
|
|
|
// we can push the cleanups in the correct order for the ABI.
|
2009-02-05 05:17:21 +08:00
|
|
|
assert(FI.arg_size() == Args.size() &&
|
|
|
|
"Mismatch between function signature & arguments.");
|
CGCall: Factor out the logic mapping call arguments to LLVM IR arguments.
Summary:
This refactoring introduces ClangToLLVMArgMapping class, which
encapsulates the information about the order in which function arguments listed
in CGFunctionInfo should be passed to actual LLVM IR function, such as:
1) positions of sret, if there is any
2) position of inalloca argument, if there is any
3) position of helper padding argument for each call argument
4) positions of regular argument (there can be many if it's expanded).
Simplify several related methods (ConstructAttributeList, EmitFunctionProlog
and EmitCall): now they don't have to maintain iterators over the list
of LLVM IR function arguments, dealing with all the sret/inalloca/this complexities,
and just use expected positions of LLVM IR arguments stored in ClangToLLVMArgMapping.
This may increase the running time of EmitFunctionProlog, as we have to traverse
expandable arguments twice, but in further refactoring we will be able
to speed up EmitCall by passing already calculated CallArgsToIRArgsMapping to
ConstructAttributeList, thus avoiding traversing expandable argument there.
No functionality change.
Test Plan: regression test suite
Reviewers: majnemer, rnk
Reviewed By: rnk
Subscribers: cfe-commits, rjmccall, timurrrr
Differential Revision: http://reviews.llvm.org/D4938
llvm-svn: 216251
2014-08-22 09:06:06 +08:00
|
|
|
unsigned ArgNo = 0;
|
2009-02-03 13:59:18 +08:00
|
|
|
CGFunctionInfo::const_arg_iterator info_it = FI.arg_begin();
|
CGCall: Factor out the logic mapping call arguments to LLVM IR arguments.
Summary:
This refactoring introduces ClangToLLVMArgMapping class, which
encapsulates the information about the order in which function arguments listed
in CGFunctionInfo should be passed to actual LLVM IR function, such as:
1) positions of sret, if there is any
2) position of inalloca argument, if there is any
3) position of helper padding argument for each call argument
4) positions of regular argument (there can be many if it's expanded).
Simplify several related methods (ConstructAttributeList, EmitFunctionProlog
and EmitCall): now they don't have to maintain iterators over the list
of LLVM IR function arguments, dealing with all the sret/inalloca/this complexities,
and just use expected positions of LLVM IR arguments stored in ClangToLLVMArgMapping.
This may increase the running time of EmitFunctionProlog, as we have to traverse
expandable arguments twice, but in further refactoring we will be able
to speed up EmitCall by passing already calculated CallArgsToIRArgsMapping to
ConstructAttributeList, thus avoiding traversing expandable argument there.
No functionality change.
Test Plan: regression test suite
Reviewers: majnemer, rnk
Reviewed By: rnk
Subscribers: cfe-commits, rjmccall, timurrrr
Differential Revision: http://reviews.llvm.org/D4938
llvm-svn: 216251
2014-08-22 09:06:06 +08:00
|
|
|
for (FunctionArgList::const_iterator i = Args.begin(), e = Args.end();
|
2011-03-04 04:13:15 +08:00
|
|
|
i != e; ++i, ++info_it, ++ArgNo) {
|
2011-03-09 12:27:21 +08:00
|
|
|
const VarDecl *Arg = *i;
|
2009-02-03 13:59:18 +08:00
|
|
|
QualType Ty = info_it->type;
|
|
|
|
const ABIArgInfo &ArgI = info_it->info;
|
2008-09-11 09:48:57 +08:00
|
|
|
|
2011-03-09 12:27:21 +08:00
|
|
|
bool isPromoted =
|
|
|
|
isa<ParmVarDecl>(Arg) && cast<ParmVarDecl>(Arg)->isKNRPromoted();
|
|
|
|
|
CGCall: Factor out the logic mapping call arguments to LLVM IR arguments.
Summary:
This refactoring introduces ClangToLLVMArgMapping class, which
encapsulates the information about the order in which function arguments listed
in CGFunctionInfo should be passed to actual LLVM IR function, such as:
1) positions of sret, if there is any
2) position of inalloca argument, if there is any
3) position of helper padding argument for each call argument
4) positions of regular argument (there can be many if it's expanded).
Simplify several related methods (ConstructAttributeList, EmitFunctionProlog
and EmitCall): now they don't have to maintain iterators over the list
of LLVM IR function arguments, dealing with all the sret/inalloca/this complexities,
and just use expected positions of LLVM IR arguments stored in ClangToLLVMArgMapping.
This may increase the running time of EmitFunctionProlog, as we have to traverse
expandable arguments twice, but in further refactoring we will be able
to speed up EmitCall by passing already calculated CallArgsToIRArgsMapping to
ConstructAttributeList, thus avoiding traversing expandable argument there.
No functionality change.
Test Plan: regression test suite
Reviewers: majnemer, rnk
Reviewed By: rnk
Subscribers: cfe-commits, rjmccall, timurrrr
Differential Revision: http://reviews.llvm.org/D4938
llvm-svn: 216251
2014-08-22 09:06:06 +08:00
|
|
|
unsigned FirstIRArg, NumIRArgs;
|
|
|
|
std::tie(FirstIRArg, NumIRArgs) = IRFunctionArgs.getIRArgs(ArgNo);
|
2012-10-24 09:59:00 +08:00
|
|
|
|
2008-09-11 09:48:57 +08:00
|
|
|
switch (ArgI.getKind()) {
|
2014-02-01 08:04:45 +08:00
|
|
|
case ABIArgInfo::InAlloca: {
|
CGCall: Factor out the logic mapping call arguments to LLVM IR arguments.
Summary:
This refactoring introduces ClangToLLVMArgMapping class, which
encapsulates the information about the order in which function arguments listed
in CGFunctionInfo should be passed to actual LLVM IR function, such as:
1) positions of sret, if there is any
2) position of inalloca argument, if there is any
3) position of helper padding argument for each call argument
4) positions of regular argument (there can be many if it's expanded).
Simplify several related methods (ConstructAttributeList, EmitFunctionProlog
and EmitCall): now they don't have to maintain iterators over the list
of LLVM IR function arguments, dealing with all the sret/inalloca/this complexities,
and just use expected positions of LLVM IR arguments stored in ClangToLLVMArgMapping.
This may increase the running time of EmitFunctionProlog, as we have to traverse
expandable arguments twice, but in further refactoring we will be able
to speed up EmitCall by passing already calculated CallArgsToIRArgsMapping to
ConstructAttributeList, thus avoiding traversing expandable argument there.
No functionality change.
Test Plan: regression test suite
Reviewers: majnemer, rnk
Reviewed By: rnk
Subscribers: cfe-commits, rjmccall, timurrrr
Differential Revision: http://reviews.llvm.org/D4938
llvm-svn: 216251
2014-08-22 09:06:06 +08:00
|
|
|
assert(NumIRArgs == 0);
|
2014-02-01 08:04:45 +08:00
|
|
|
llvm::Value *V = Builder.CreateStructGEP(
|
|
|
|
ArgStruct, ArgI.getInAllocaFieldIndex(), Arg->getName());
|
|
|
|
ArgVals.push_back(ValueAndIsPtr(V, HavePointer));
|
CGCall: Factor out the logic mapping call arguments to LLVM IR arguments.
Summary:
This refactoring introduces ClangToLLVMArgMapping class, which
encapsulates the information about the order in which function arguments listed
in CGFunctionInfo should be passed to actual LLVM IR function, such as:
1) positions of sret, if there is any
2) position of inalloca argument, if there is any
3) position of helper padding argument for each call argument
4) positions of regular argument (there can be many if it's expanded).
Simplify several related methods (ConstructAttributeList, EmitFunctionProlog
and EmitCall): now they don't have to maintain iterators over the list
of LLVM IR function arguments, dealing with all the sret/inalloca/this complexities,
and just use expected positions of LLVM IR arguments stored in ClangToLLVMArgMapping.
This may increase the running time of EmitFunctionProlog, as we have to traverse
expandable arguments twice, but in further refactoring we will be able
to speed up EmitCall by passing already calculated CallArgsToIRArgsMapping to
ConstructAttributeList, thus avoiding traversing expandable argument there.
No functionality change.
Test Plan: regression test suite
Reviewers: majnemer, rnk
Reviewed By: rnk
Subscribers: cfe-commits, rjmccall, timurrrr
Differential Revision: http://reviews.llvm.org/D4938
llvm-svn: 216251
2014-08-22 09:06:06 +08:00
|
|
|
break;
|
2014-02-01 08:04:45 +08:00
|
|
|
}
|
|
|
|
|
2009-02-05 17:16:39 +08:00
|
|
|
case ABIArgInfo::Indirect: {
|
CGCall: Factor out the logic mapping call arguments to LLVM IR arguments.
Summary:
This refactoring introduces ClangToLLVMArgMapping class, which
encapsulates the information about the order in which function arguments listed
in CGFunctionInfo should be passed to actual LLVM IR function, such as:
1) positions of sret, if there is any
2) position of inalloca argument, if there is any
3) position of helper padding argument for each call argument
4) positions of regular argument (there can be many if it's expanded).
Simplify several related methods (ConstructAttributeList, EmitFunctionProlog
and EmitCall): now they don't have to maintain iterators over the list
of LLVM IR function arguments, dealing with all the sret/inalloca/this complexities,
and just use expected positions of LLVM IR arguments stored in ClangToLLVMArgMapping.
This may increase the running time of EmitFunctionProlog, as we have to traverse
expandable arguments twice, but in further refactoring we will be able
to speed up EmitCall by passing already calculated CallArgsToIRArgsMapping to
ConstructAttributeList, thus avoiding traversing expandable argument there.
No functionality change.
Test Plan: regression test suite
Reviewers: majnemer, rnk
Reviewed By: rnk
Subscribers: cfe-commits, rjmccall, timurrrr
Differential Revision: http://reviews.llvm.org/D4938
llvm-svn: 216251
2014-08-22 09:06:06 +08:00
|
|
|
assert(NumIRArgs == 1);
|
|
|
|
llvm::Value *V = FnArgs[FirstIRArg];
|
2010-09-17 04:42:02 +08:00
|
|
|
|
2013-03-08 05:37:08 +08:00
|
|
|
if (!hasScalarEvaluationKind(Ty)) {
|
2010-09-17 04:42:02 +08:00
|
|
|
// Aggregates and complex variables are accessed by reference. All we
|
|
|
|
// need to do is realign the value, if requested
|
|
|
|
if (ArgI.getIndirectRealign()) {
|
|
|
|
llvm::Value *AlignedTemp = CreateMemTemp(Ty, "coerce");
|
|
|
|
|
|
|
|
// Copy from the incoming argument pointer to the temporary with the
|
|
|
|
// appropriate alignment.
|
|
|
|
//
|
|
|
|
// FIXME: We should have a common utility for generating an aggregate
|
|
|
|
// copy.
|
2011-07-18 12:24:23 +08:00
|
|
|
llvm::Type *I8PtrTy = Builder.getInt8PtrTy();
|
2011-01-19 09:58:38 +08:00
|
|
|
CharUnits Size = getContext().getTypeSizeInChars(Ty);
|
2011-03-10 22:02:21 +08:00
|
|
|
llvm::Value *Dst = Builder.CreateBitCast(AlignedTemp, I8PtrTy);
|
|
|
|
llvm::Value *Src = Builder.CreateBitCast(V, I8PtrTy);
|
|
|
|
Builder.CreateMemCpy(Dst,
|
|
|
|
Src,
|
2011-01-19 09:58:38 +08:00
|
|
|
llvm::ConstantInt::get(IntPtrTy,
|
|
|
|
Size.getQuantity()),
|
2010-12-30 08:13:21 +08:00
|
|
|
ArgI.getIndirectAlign(),
|
|
|
|
false);
|
2010-09-17 04:42:02 +08:00
|
|
|
V = AlignedTemp;
|
|
|
|
}
|
2014-02-01 08:04:45 +08:00
|
|
|
ArgVals.push_back(ValueAndIsPtr(V, HavePointer));
|
2009-02-05 17:16:39 +08:00
|
|
|
} else {
|
|
|
|
// Load scalar value from indirect argument.
|
2011-01-19 09:58:38 +08:00
|
|
|
CharUnits Alignment = getContext().getTypeAlignInChars(Ty);
|
2013-10-02 10:29:49 +08:00
|
|
|
V = EmitLoadOfScalar(V, false, Alignment.getQuantity(), Ty,
|
|
|
|
Arg->getLocStart());
|
2011-03-09 12:27:21 +08:00
|
|
|
|
|
|
|
if (isPromoted)
|
|
|
|
V = emitArgumentDemotion(*this, Arg, V);
|
2014-02-01 08:04:45 +08:00
|
|
|
ArgVals.push_back(ValueAndIsPtr(V, HaveValue));
|
2009-02-05 17:16:39 +08:00
|
|
|
}
|
|
|
|
break;
|
|
|
|
}
|
2009-06-06 17:36:29 +08:00
|
|
|
|
|
|
|
case ABIArgInfo::Extend:
|
2009-02-03 14:17:37 +08:00
|
|
|
case ABIArgInfo::Direct: {
|
2012-01-10 03:08:06 +08:00
|
|
|
|
Kill off the 'coerce' ABI passing form. Now 'direct' and 'extend' always
have a "coerce to" type which often matches the default lowering of Clang
type to LLVM IR type, but the coerce case can be handled by making them
not be the same.
This simplifies things and fixes issues where X86-64 abi lowering would
return coerce after making preferred types exactly match up. This caused
us to compile:
typedef float v4f32 __attribute__((__vector_size__(16)));
v4f32 foo(v4f32 X) {
return X+X;
}
into this code at -O0:
define <4 x float> @foo(<4 x float> %X.coerce) nounwind {
entry:
%retval = alloca <4 x float>, align 16 ; <<4 x float>*> [#uses=2]
%coerce = alloca <4 x float>, align 16 ; <<4 x float>*> [#uses=2]
%X.addr = alloca <4 x float>, align 16 ; <<4 x float>*> [#uses=3]
store <4 x float> %X.coerce, <4 x float>* %coerce
%X = load <4 x float>* %coerce ; <<4 x float>> [#uses=1]
store <4 x float> %X, <4 x float>* %X.addr
%tmp = load <4 x float>* %X.addr ; <<4 x float>> [#uses=1]
%tmp1 = load <4 x float>* %X.addr ; <<4 x float>> [#uses=1]
%add = fadd <4 x float> %tmp, %tmp1 ; <<4 x float>> [#uses=1]
store <4 x float> %add, <4 x float>* %retval
%0 = load <4 x float>* %retval ; <<4 x float>> [#uses=1]
ret <4 x float> %0
}
Now we get:
define <4 x float> @foo(<4 x float> %X) nounwind {
entry:
%X.addr = alloca <4 x float>, align 16 ; <<4 x float>*> [#uses=3]
store <4 x float> %X, <4 x float>* %X.addr
%tmp = load <4 x float>* %X.addr ; <<4 x float>> [#uses=1]
%tmp1 = load <4 x float>* %X.addr ; <<4 x float>> [#uses=1]
%add = fadd <4 x float> %tmp, %tmp1 ; <<4 x float>> [#uses=1]
ret <4 x float> %add
}
This implements rdar://8248065
llvm-svn: 109733
2010-07-29 14:26:06 +08:00
|
|
|
// If we have the trivial case, handle it with no muss and fuss.
|
|
|
|
if (!isa<llvm::StructType>(ArgI.getCoerceToType()) &&
|
2010-07-30 12:02:24 +08:00
|
|
|
ArgI.getCoerceToType() == ConvertType(Ty) &&
|
|
|
|
ArgI.getDirectOffset() == 0) {
|
CGCall: Factor out the logic mapping call arguments to LLVM IR arguments.
Summary:
This refactoring introduces ClangToLLVMArgMapping class, which
encapsulates the information about the order in which function arguments listed
in CGFunctionInfo should be passed to actual LLVM IR function, such as:
1) positions of sret, if there is any
2) position of inalloca argument, if there is any
3) position of helper padding argument for each call argument
4) positions of regular argument (there can be many if it's expanded).
Simplify several related methods (ConstructAttributeList, EmitFunctionProlog
and EmitCall): now they don't have to maintain iterators over the list
of LLVM IR function arguments, dealing with all the sret/inalloca/this complexities,
and just use expected positions of LLVM IR arguments stored in ClangToLLVMArgMapping.
This may increase the running time of EmitFunctionProlog, as we have to traverse
expandable arguments twice, but in further refactoring we will be able
to speed up EmitCall by passing already calculated CallArgsToIRArgsMapping to
ConstructAttributeList, thus avoiding traversing expandable argument there.
No functionality change.
Test Plan: regression test suite
Reviewers: majnemer, rnk
Reviewed By: rnk
Subscribers: cfe-commits, rjmccall, timurrrr
Differential Revision: http://reviews.llvm.org/D4938
llvm-svn: 216251
2014-08-22 09:06:06 +08:00
|
|
|
assert(NumIRArgs == 1);
|
|
|
|
auto AI = FnArgs[FirstIRArg];
|
Kill off the 'coerce' ABI passing form. Now 'direct' and 'extend' always
have a "coerce to" type which often matches the default lowering of Clang
type to LLVM IR type, but the coerce case can be handled by making them
not be the same.
This simplifies things and fixes issues where X86-64 abi lowering would
return coerce after making preferred types exactly match up. This caused
us to compile:
typedef float v4f32 __attribute__((__vector_size__(16)));
v4f32 foo(v4f32 X) {
return X+X;
}
into this code at -O0:
define <4 x float> @foo(<4 x float> %X.coerce) nounwind {
entry:
%retval = alloca <4 x float>, align 16 ; <<4 x float>*> [#uses=2]
%coerce = alloca <4 x float>, align 16 ; <<4 x float>*> [#uses=2]
%X.addr = alloca <4 x float>, align 16 ; <<4 x float>*> [#uses=3]
store <4 x float> %X.coerce, <4 x float>* %coerce
%X = load <4 x float>* %coerce ; <<4 x float>> [#uses=1]
store <4 x float> %X, <4 x float>* %X.addr
%tmp = load <4 x float>* %X.addr ; <<4 x float>> [#uses=1]
%tmp1 = load <4 x float>* %X.addr ; <<4 x float>> [#uses=1]
%add = fadd <4 x float> %tmp, %tmp1 ; <<4 x float>> [#uses=1]
store <4 x float> %add, <4 x float>* %retval
%0 = load <4 x float>* %retval ; <<4 x float>> [#uses=1]
ret <4 x float> %0
}
Now we get:
define <4 x float> @foo(<4 x float> %X) nounwind {
entry:
%X.addr = alloca <4 x float>, align 16 ; <<4 x float>*> [#uses=3]
store <4 x float> %X, <4 x float>* %X.addr
%tmp = load <4 x float>* %X.addr ; <<4 x float>> [#uses=1]
%tmp1 = load <4 x float>* %X.addr ; <<4 x float>> [#uses=1]
%add = fadd <4 x float> %tmp, %tmp1 ; <<4 x float>> [#uses=1]
ret <4 x float> %add
}
This implements rdar://8248065
llvm-svn: 109733
2010-07-29 14:26:06 +08:00
|
|
|
llvm::Value *V = AI;
|
2010-10-19 14:39:39 +08:00
|
|
|
|
2014-07-19 09:41:07 +08:00
|
|
|
if (const ParmVarDecl *PVD = dyn_cast<ParmVarDecl>(Arg)) {
|
2014-09-09 01:22:45 +08:00
|
|
|
if (getNonNullAttr(CurCodeDecl, PVD, PVD->getType(),
|
|
|
|
PVD->getFunctionScopeIndex()))
|
2014-07-12 01:35:21 +08:00
|
|
|
AI->addAttr(llvm::AttributeSet::get(getLLVMContext(),
|
|
|
|
AI->getArgNo() + 1,
|
|
|
|
llvm::Attribute::NonNull));
|
|
|
|
|
2014-07-19 09:41:07 +08:00
|
|
|
QualType OTy = PVD->getOriginalType();
|
|
|
|
if (const auto *ArrTy =
|
|
|
|
getContext().getAsConstantArrayType(OTy)) {
|
|
|
|
// A C99 array parameter declaration with the static keyword also
|
|
|
|
// indicates dereferenceability, and if the size is constant we can
|
|
|
|
// use the dereferenceable attribute (which requires the size in
|
|
|
|
// bytes).
|
2014-07-19 10:13:40 +08:00
|
|
|
if (ArrTy->getSizeModifier() == ArrayType::Static) {
|
2014-07-19 09:41:07 +08:00
|
|
|
QualType ETy = ArrTy->getElementType();
|
|
|
|
uint64_t ArrSize = ArrTy->getSize().getZExtValue();
|
|
|
|
if (!ETy->isIncompleteType() && ETy->isConstantSizeType() &&
|
|
|
|
ArrSize) {
|
|
|
|
llvm::AttrBuilder Attrs;
|
|
|
|
Attrs.addDereferenceableAttr(
|
|
|
|
getContext().getTypeSizeInChars(ETy).getQuantity()*ArrSize);
|
|
|
|
AI->addAttr(llvm::AttributeSet::get(getLLVMContext(),
|
|
|
|
AI->getArgNo() + 1, Attrs));
|
|
|
|
} else if (getContext().getTargetAddressSpace(ETy) == 0) {
|
|
|
|
AI->addAttr(llvm::AttributeSet::get(getLLVMContext(),
|
|
|
|
AI->getArgNo() + 1,
|
|
|
|
llvm::Attribute::NonNull));
|
|
|
|
}
|
|
|
|
}
|
|
|
|
} else if (const auto *ArrTy =
|
|
|
|
getContext().getAsVariableArrayType(OTy)) {
|
|
|
|
// For C99 VLAs with the static keyword, we don't know the size so
|
|
|
|
// we can't use the dereferenceable attribute, but in addrspace(0)
|
|
|
|
// we know that it must be nonnull.
|
|
|
|
if (ArrTy->getSizeModifier() == VariableArrayType::Static &&
|
|
|
|
!getContext().getTargetAddressSpace(ArrTy->getElementType()))
|
|
|
|
AI->addAttr(llvm::AttributeSet::get(getLLVMContext(),
|
|
|
|
AI->getArgNo() + 1,
|
|
|
|
llvm::Attribute::NonNull));
|
|
|
|
}
|
Initial support for the align_value attribute
This adds support for the align_value attribute. This attribute is supported by
Intel's compiler (versions 14.0+), and several of my HPC users have requested
support in Clang. It specifies an alignment assumption on the values to which a
pointer points, and is used by numerical libraries to encourage efficient
generation of vector code.
Of course, we already have an aligned attribute that can specify enhanced
alignment for a type, so why is this additional attribute important? The
problem is that if you want to specify that an input array of T is, say,
64-byte aligned, you could try this:
typedef double aligned_double attribute((aligned(64)));
void foo(aligned_double *P) {
double x = P[0]; // This is fine.
double y = P[1]; // What alignment did those doubles have again?
}
the access here to P[1] causes problems. P was specified as a pointer to type
aligned_double, and any object of type aligned_double must be 64-byte aligned.
But if P[0] is 64-byte aligned, then P[1] cannot be, and this access causes
undefined behavior. Getting round this problem requires a lot of awkward
casting and hand-unrolling of loops, all of which is bad.
With the align_value attribute, we can accomplish what we'd like in a well
defined way:
typedef double *aligned_double_ptr attribute((align_value(64)));
void foo(aligned_double_ptr P) {
double x = P[0]; // This is fine.
double y = P[1]; // This is fine too.
}
This attribute does not create a new type (and so it not part of the type
system), and so will only "propagate" through templates, auto, etc. by
optimizer deduction after inlining. This seems consistent with Intel's
implementation (thanks to Alexey for confirming the various Intel-compiler
behaviors).
As a final note, I would have chosen to call this aligned_value, not
align_value, for better naming consistency with the aligned attribute, but I
think it would be more useful to users to adopt Intel's name.
llvm-svn: 218910
2014-10-03 05:21:25 +08:00
|
|
|
|
|
|
|
const auto *AVAttr = PVD->getAttr<AlignValueAttr>();
|
|
|
|
if (!AVAttr)
|
|
|
|
if (const auto *TOTy = dyn_cast<TypedefType>(OTy))
|
|
|
|
AVAttr = TOTy->getDecl()->getAttr<AlignValueAttr>();
|
|
|
|
if (AVAttr) {
|
|
|
|
llvm::Value *AlignmentValue =
|
|
|
|
EmitScalarExpr(AVAttr->getAlignment());
|
|
|
|
llvm::ConstantInt *AlignmentCI =
|
|
|
|
cast<llvm::ConstantInt>(AlignmentValue);
|
|
|
|
unsigned Alignment =
|
|
|
|
std::min((unsigned) AlignmentCI->getZExtValue(),
|
|
|
|
+llvm::Value::MaximumAlignment);
|
|
|
|
|
|
|
|
llvm::AttrBuilder Attrs;
|
|
|
|
Attrs.addAlignmentAttr(Alignment);
|
|
|
|
AI->addAttr(llvm::AttributeSet::get(getLLVMContext(),
|
|
|
|
AI->getArgNo() + 1, Attrs));
|
|
|
|
}
|
2014-07-19 09:41:07 +08:00
|
|
|
}
|
|
|
|
|
2012-10-16 13:23:44 +08:00
|
|
|
if (Arg->getType().isRestrictQualified())
|
2013-01-23 14:15:10 +08:00
|
|
|
AI->addAttr(llvm::AttributeSet::get(getLLVMContext(),
|
|
|
|
AI->getArgNo() + 1,
|
|
|
|
llvm::Attribute::NoAlias));
|
2010-03-27 08:47:27 +08:00
|
|
|
|
fix rdar://9780211 - Clang crashes with an assertion failure building WKView.mm from WebKit
This is something of a hack, the problem is as follows:
1. we instantiate both copied of RetainPtr with the two different argument types
(an id and protocol-qualified id).
2. We refer to the ctor of one of the instantiations when introducing global "x",
this causes us to emit an llvm::Function for a prototype whose "this" has type
"RetainPtr<id<bork> >*".
3. We refer to the ctor of the other instantiation when introducing global "y",
however, because it *mangles to the same name as the other ctor* we just use
a bitcasted version of the llvm::Function we previously emitted.
4. We emit deferred declarations, causing us to emit the body of the ctor, however
the body we emit is for RetainPtr<id>, which expects its 'this' to have an IR
type of "RetainPtr<id>*".
Because of the mangling collision, we don't have this case, and explode.
This is really some sort of weird AST invariant violation or something, but hey
a bitcast makes the pain go away.
llvm-svn: 135572
2011-07-20 14:29:00 +08:00
|
|
|
// Ensure the argument is the correct type.
|
|
|
|
if (V->getType() != ArgI.getCoerceToType())
|
|
|
|
V = Builder.CreateBitCast(V, ArgI.getCoerceToType());
|
|
|
|
|
2011-03-09 12:27:21 +08:00
|
|
|
if (isPromoted)
|
|
|
|
V = emitArgumentDemotion(*this, Arg, V);
|
2012-11-30 00:09:03 +08:00
|
|
|
|
2013-10-02 05:51:38 +08:00
|
|
|
if (const CXXMethodDecl *MD =
|
|
|
|
dyn_cast_or_null<CXXMethodDecl>(CurCodeDecl)) {
|
2013-08-21 14:25:03 +08:00
|
|
|
if (MD->isVirtual() && Arg == CXXABIThisDecl)
|
2013-10-02 05:51:38 +08:00
|
|
|
V = CGM.getCXXABI().
|
|
|
|
adjustThisParameterInVirtualFunctionPrologue(*this, CurGD, V);
|
2013-08-21 14:25:03 +08:00
|
|
|
}
|
|
|
|
|
2012-11-30 00:09:03 +08:00
|
|
|
// Because of merging of function types from multiple decls it is
|
|
|
|
// possible for the type of an argument to not match the corresponding
|
|
|
|
// type in the function type. Since we are codegening the callee
|
|
|
|
// in here, add a cast to the argument type.
|
|
|
|
llvm::Type *LTy = ConvertType(Arg->getType());
|
|
|
|
if (V->getType() != LTy)
|
|
|
|
V = Builder.CreateBitCast(V, LTy);
|
|
|
|
|
2014-02-01 08:04:45 +08:00
|
|
|
ArgVals.push_back(ValueAndIsPtr(V, HaveValue));
|
Kill off the 'coerce' ABI passing form. Now 'direct' and 'extend' always
have a "coerce to" type which often matches the default lowering of Clang
type to LLVM IR type, but the coerce case can be handled by making them
not be the same.
This simplifies things and fixes issues where X86-64 abi lowering would
return coerce after making preferred types exactly match up. This caused
us to compile:
typedef float v4f32 __attribute__((__vector_size__(16)));
v4f32 foo(v4f32 X) {
return X+X;
}
into this code at -O0:
define <4 x float> @foo(<4 x float> %X.coerce) nounwind {
entry:
%retval = alloca <4 x float>, align 16 ; <<4 x float>*> [#uses=2]
%coerce = alloca <4 x float>, align 16 ; <<4 x float>*> [#uses=2]
%X.addr = alloca <4 x float>, align 16 ; <<4 x float>*> [#uses=3]
store <4 x float> %X.coerce, <4 x float>* %coerce
%X = load <4 x float>* %coerce ; <<4 x float>> [#uses=1]
store <4 x float> %X, <4 x float>* %X.addr
%tmp = load <4 x float>* %X.addr ; <<4 x float>> [#uses=1]
%tmp1 = load <4 x float>* %X.addr ; <<4 x float>> [#uses=1]
%add = fadd <4 x float> %tmp, %tmp1 ; <<4 x float>> [#uses=1]
store <4 x float> %add, <4 x float>* %retval
%0 = load <4 x float>* %retval ; <<4 x float>> [#uses=1]
ret <4 x float> %0
}
Now we get:
define <4 x float> @foo(<4 x float> %X) nounwind {
entry:
%X.addr = alloca <4 x float>, align 16 ; <<4 x float>*> [#uses=3]
store <4 x float> %X, <4 x float>* %X.addr
%tmp = load <4 x float>* %X.addr ; <<4 x float>> [#uses=1]
%tmp1 = load <4 x float>* %X.addr ; <<4 x float>> [#uses=1]
%add = fadd <4 x float> %tmp, %tmp1 ; <<4 x float>> [#uses=1]
ret <4 x float> %add
}
This implements rdar://8248065
llvm-svn: 109733
2010-07-29 14:26:06 +08:00
|
|
|
break;
|
2008-09-10 07:27:19 +08:00
|
|
|
}
|
2009-09-09 23:08:12 +08:00
|
|
|
|
2012-02-10 17:30:15 +08:00
|
|
|
llvm::AllocaInst *Alloca = CreateMemTemp(Ty, Arg->getName());
|
2010-10-19 14:39:39 +08:00
|
|
|
|
2010-07-29 02:24:28 +08:00
|
|
|
// The alignment we need to use is the max of the requested alignment for
|
|
|
|
// the argument plus the alignment required by our access code below.
|
2010-10-19 14:39:39 +08:00
|
|
|
unsigned AlignmentToUse =
|
2012-10-09 00:25:52 +08:00
|
|
|
CGM.getDataLayout().getABITypeAlignment(ArgI.getCoerceToType());
|
2010-07-29 02:24:28 +08:00
|
|
|
AlignmentToUse = std::max(AlignmentToUse,
|
|
|
|
(unsigned)getContext().getDeclAlign(Arg).getQuantity());
|
2010-10-19 14:39:39 +08:00
|
|
|
|
2010-07-29 02:24:28 +08:00
|
|
|
Alloca->setAlignment(AlignmentToUse);
|
2010-07-06 04:21:00 +08:00
|
|
|
llvm::Value *V = Alloca;
|
2010-07-30 12:02:24 +08:00
|
|
|
llvm::Value *Ptr = V; // Pointer to store into.
|
2010-10-19 14:39:39 +08:00
|
|
|
|
2010-07-30 12:02:24 +08:00
|
|
|
// If the value is offset in memory, apply the offset now.
|
|
|
|
if (unsigned Offs = ArgI.getDirectOffset()) {
|
|
|
|
Ptr = Builder.CreateBitCast(Ptr, Builder.getInt8PtrTy());
|
|
|
|
Ptr = Builder.CreateConstGEP1_32(Ptr, Offs);
|
2010-10-19 14:39:39 +08:00
|
|
|
Ptr = Builder.CreateBitCast(Ptr,
|
2010-07-30 12:02:24 +08:00
|
|
|
llvm::PointerType::getUnqual(ArgI.getCoerceToType()));
|
|
|
|
}
|
2010-10-19 14:39:39 +08:00
|
|
|
|
2014-08-27 18:43:15 +08:00
|
|
|
// Fast-isel and the optimizer generally like scalar values better than
|
|
|
|
// FCAs, so we flatten them if this is safe to do for this argument.
|
2012-02-10 17:30:15 +08:00
|
|
|
llvm::StructType *STy = dyn_cast<llvm::StructType>(ArgI.getCoerceToType());
|
2014-08-27 18:43:15 +08:00
|
|
|
if (ArgI.isDirect() && ArgI.getCanBeFlattened() && STy &&
|
|
|
|
STy->getNumElements() > 1) {
|
2012-10-09 00:25:52 +08:00
|
|
|
uint64_t SrcSize = CGM.getDataLayout().getTypeAllocSize(STy);
|
2012-02-10 17:30:15 +08:00
|
|
|
llvm::Type *DstTy =
|
|
|
|
cast<llvm::PointerType>(Ptr->getType())->getElementType();
|
2012-10-09 00:25:52 +08:00
|
|
|
uint64_t DstSize = CGM.getDataLayout().getTypeAllocSize(DstTy);
|
2012-02-10 17:30:15 +08:00
|
|
|
|
|
|
|
if (SrcSize <= DstSize) {
|
|
|
|
Ptr = Builder.CreateBitCast(Ptr, llvm::PointerType::getUnqual(STy));
|
|
|
|
|
CGCall: Factor out the logic mapping call arguments to LLVM IR arguments.
Summary:
This refactoring introduces ClangToLLVMArgMapping class, which
encapsulates the information about the order in which function arguments listed
in CGFunctionInfo should be passed to actual LLVM IR function, such as:
1) positions of sret, if there is any
2) position of inalloca argument, if there is any
3) position of helper padding argument for each call argument
4) positions of regular argument (there can be many if it's expanded).
Simplify several related methods (ConstructAttributeList, EmitFunctionProlog
and EmitCall): now they don't have to maintain iterators over the list
of LLVM IR function arguments, dealing with all the sret/inalloca/this complexities,
and just use expected positions of LLVM IR arguments stored in ClangToLLVMArgMapping.
This may increase the running time of EmitFunctionProlog, as we have to traverse
expandable arguments twice, but in further refactoring we will be able
to speed up EmitCall by passing already calculated CallArgsToIRArgsMapping to
ConstructAttributeList, thus avoiding traversing expandable argument there.
No functionality change.
Test Plan: regression test suite
Reviewers: majnemer, rnk
Reviewed By: rnk
Subscribers: cfe-commits, rjmccall, timurrrr
Differential Revision: http://reviews.llvm.org/D4938
llvm-svn: 216251
2014-08-22 09:06:06 +08:00
|
|
|
assert(STy->getNumElements() == NumIRArgs);
|
2012-02-10 17:30:15 +08:00
|
|
|
for (unsigned i = 0, e = STy->getNumElements(); i != e; ++i) {
|
CGCall: Factor out the logic mapping call arguments to LLVM IR arguments.
Summary:
This refactoring introduces ClangToLLVMArgMapping class, which
encapsulates the information about the order in which function arguments listed
in CGFunctionInfo should be passed to actual LLVM IR function, such as:
1) positions of sret, if there is any
2) position of inalloca argument, if there is any
3) position of helper padding argument for each call argument
4) positions of regular argument (there can be many if it's expanded).
Simplify several related methods (ConstructAttributeList, EmitFunctionProlog
and EmitCall): now they don't have to maintain iterators over the list
of LLVM IR function arguments, dealing with all the sret/inalloca/this complexities,
and just use expected positions of LLVM IR arguments stored in ClangToLLVMArgMapping.
This may increase the running time of EmitFunctionProlog, as we have to traverse
expandable arguments twice, but in further refactoring we will be able
to speed up EmitCall by passing already calculated CallArgsToIRArgsMapping to
ConstructAttributeList, thus avoiding traversing expandable argument there.
No functionality change.
Test Plan: regression test suite
Reviewers: majnemer, rnk
Reviewed By: rnk
Subscribers: cfe-commits, rjmccall, timurrrr
Differential Revision: http://reviews.llvm.org/D4938
llvm-svn: 216251
2014-08-22 09:06:06 +08:00
|
|
|
auto AI = FnArgs[FirstIRArg + i];
|
2012-02-10 17:30:15 +08:00
|
|
|
AI->setName(Arg->getName() + ".coerce" + Twine(i));
|
|
|
|
llvm::Value *EltPtr = Builder.CreateConstGEP2_32(Ptr, 0, i);
|
CGCall: Factor out the logic mapping call arguments to LLVM IR arguments.
Summary:
This refactoring introduces ClangToLLVMArgMapping class, which
encapsulates the information about the order in which function arguments listed
in CGFunctionInfo should be passed to actual LLVM IR function, such as:
1) positions of sret, if there is any
2) position of inalloca argument, if there is any
3) position of helper padding argument for each call argument
4) positions of regular argument (there can be many if it's expanded).
Simplify several related methods (ConstructAttributeList, EmitFunctionProlog
and EmitCall): now they don't have to maintain iterators over the list
of LLVM IR function arguments, dealing with all the sret/inalloca/this complexities,
and just use expected positions of LLVM IR arguments stored in ClangToLLVMArgMapping.
This may increase the running time of EmitFunctionProlog, as we have to traverse
expandable arguments twice, but in further refactoring we will be able
to speed up EmitCall by passing already calculated CallArgsToIRArgsMapping to
ConstructAttributeList, thus avoiding traversing expandable argument there.
No functionality change.
Test Plan: regression test suite
Reviewers: majnemer, rnk
Reviewed By: rnk
Subscribers: cfe-commits, rjmccall, timurrrr
Differential Revision: http://reviews.llvm.org/D4938
llvm-svn: 216251
2014-08-22 09:06:06 +08:00
|
|
|
Builder.CreateStore(AI, EltPtr);
|
2012-02-10 17:30:15 +08:00
|
|
|
}
|
|
|
|
} else {
|
|
|
|
llvm::AllocaInst *TempAlloca =
|
|
|
|
CreateTempAlloca(ArgI.getCoerceToType(), "coerce");
|
|
|
|
TempAlloca->setAlignment(AlignmentToUse);
|
|
|
|
llvm::Value *TempV = TempAlloca;
|
|
|
|
|
CGCall: Factor out the logic mapping call arguments to LLVM IR arguments.
Summary:
This refactoring introduces ClangToLLVMArgMapping class, which
encapsulates the information about the order in which function arguments listed
in CGFunctionInfo should be passed to actual LLVM IR function, such as:
1) positions of sret, if there is any
2) position of inalloca argument, if there is any
3) position of helper padding argument for each call argument
4) positions of regular argument (there can be many if it's expanded).
Simplify several related methods (ConstructAttributeList, EmitFunctionProlog
and EmitCall): now they don't have to maintain iterators over the list
of LLVM IR function arguments, dealing with all the sret/inalloca/this complexities,
and just use expected positions of LLVM IR arguments stored in ClangToLLVMArgMapping.
This may increase the running time of EmitFunctionProlog, as we have to traverse
expandable arguments twice, but in further refactoring we will be able
to speed up EmitCall by passing already calculated CallArgsToIRArgsMapping to
ConstructAttributeList, thus avoiding traversing expandable argument there.
No functionality change.
Test Plan: regression test suite
Reviewers: majnemer, rnk
Reviewed By: rnk
Subscribers: cfe-commits, rjmccall, timurrrr
Differential Revision: http://reviews.llvm.org/D4938
llvm-svn: 216251
2014-08-22 09:06:06 +08:00
|
|
|
assert(STy->getNumElements() == NumIRArgs);
|
2012-02-10 17:30:15 +08:00
|
|
|
for (unsigned i = 0, e = STy->getNumElements(); i != e; ++i) {
|
CGCall: Factor out the logic mapping call arguments to LLVM IR arguments.
Summary:
This refactoring introduces ClangToLLVMArgMapping class, which
encapsulates the information about the order in which function arguments listed
in CGFunctionInfo should be passed to actual LLVM IR function, such as:
1) positions of sret, if there is any
2) position of inalloca argument, if there is any
3) position of helper padding argument for each call argument
4) positions of regular argument (there can be many if it's expanded).
Simplify several related methods (ConstructAttributeList, EmitFunctionProlog
and EmitCall): now they don't have to maintain iterators over the list
of LLVM IR function arguments, dealing with all the sret/inalloca/this complexities,
and just use expected positions of LLVM IR arguments stored in ClangToLLVMArgMapping.
This may increase the running time of EmitFunctionProlog, as we have to traverse
expandable arguments twice, but in further refactoring we will be able
to speed up EmitCall by passing already calculated CallArgsToIRArgsMapping to
ConstructAttributeList, thus avoiding traversing expandable argument there.
No functionality change.
Test Plan: regression test suite
Reviewers: majnemer, rnk
Reviewed By: rnk
Subscribers: cfe-commits, rjmccall, timurrrr
Differential Revision: http://reviews.llvm.org/D4938
llvm-svn: 216251
2014-08-22 09:06:06 +08:00
|
|
|
auto AI = FnArgs[FirstIRArg + i];
|
2012-02-10 17:30:15 +08:00
|
|
|
AI->setName(Arg->getName() + ".coerce" + Twine(i));
|
|
|
|
llvm::Value *EltPtr = Builder.CreateConstGEP2_32(TempV, 0, i);
|
CGCall: Factor out the logic mapping call arguments to LLVM IR arguments.
Summary:
This refactoring introduces ClangToLLVMArgMapping class, which
encapsulates the information about the order in which function arguments listed
in CGFunctionInfo should be passed to actual LLVM IR function, such as:
1) positions of sret, if there is any
2) position of inalloca argument, if there is any
3) position of helper padding argument for each call argument
4) positions of regular argument (there can be many if it's expanded).
Simplify several related methods (ConstructAttributeList, EmitFunctionProlog
and EmitCall): now they don't have to maintain iterators over the list
of LLVM IR function arguments, dealing with all the sret/inalloca/this complexities,
and just use expected positions of LLVM IR arguments stored in ClangToLLVMArgMapping.
This may increase the running time of EmitFunctionProlog, as we have to traverse
expandable arguments twice, but in further refactoring we will be able
to speed up EmitCall by passing already calculated CallArgsToIRArgsMapping to
ConstructAttributeList, thus avoiding traversing expandable argument there.
No functionality change.
Test Plan: regression test suite
Reviewers: majnemer, rnk
Reviewed By: rnk
Subscribers: cfe-commits, rjmccall, timurrrr
Differential Revision: http://reviews.llvm.org/D4938
llvm-svn: 216251
2014-08-22 09:06:06 +08:00
|
|
|
Builder.CreateStore(AI, EltPtr);
|
2012-02-10 17:30:15 +08:00
|
|
|
}
|
|
|
|
|
|
|
|
Builder.CreateMemCpy(Ptr, TempV, DstSize, AlignmentToUse);
|
Change CGCall to handle the "coerce" case where the coerce-to type
is a FCA to pass each of the elements as individual scalars. This
produces code fast isel is less likely to reject and is easier on
the optimizers.
For example, before we would compile:
struct DeclGroup { long NumDecls; char * Y; };
char * foo(DeclGroup D) {
return D.NumDecls+D.Y;
}
to:
%struct.DeclGroup = type { i64, i64 }
define i64 @_Z3foo9DeclGroup(%struct.DeclGroup) nounwind {
entry:
%D = alloca %struct.DeclGroup, align 8 ; <%struct.DeclGroup*> [#uses=3]
store %struct.DeclGroup %0, %struct.DeclGroup* %D, align 1
%tmp = getelementptr inbounds %struct.DeclGroup* %D, i32 0, i32 0 ; <i64*> [#uses=1]
%tmp1 = load i64* %tmp ; <i64> [#uses=1]
%tmp2 = getelementptr inbounds %struct.DeclGroup* %D, i32 0, i32 1 ; <i64*> [#uses=1]
%tmp3 = load i64* %tmp2 ; <i64> [#uses=1]
%add = add nsw i64 %tmp1, %tmp3 ; <i64> [#uses=1]
ret i64 %add
}
Now we get:
%0 = type { i64, i64 }
%struct.DeclGroup = type { i64, i8* }
define i8* @_Z3foo9DeclGroup(i64, i64) nounwind {
entry:
%D = alloca %struct.DeclGroup, align 8 ; <%struct.DeclGroup*> [#uses=3]
%2 = insertvalue %0 undef, i64 %0, 0 ; <%0> [#uses=1]
%3 = insertvalue %0 %2, i64 %1, 1 ; <%0> [#uses=1]
%4 = bitcast %struct.DeclGroup* %D to %0* ; <%0*> [#uses=1]
store %0 %3, %0* %4, align 1
%tmp = getelementptr inbounds %struct.DeclGroup* %D, i32 0, i32 0 ; <i64*> [#uses=1]
%tmp1 = load i64* %tmp ; <i64> [#uses=1]
%tmp2 = getelementptr inbounds %struct.DeclGroup* %D, i32 0, i32 1 ; <i8**> [#uses=1]
%tmp3 = load i8** %tmp2 ; <i8*> [#uses=1]
%add.ptr = getelementptr inbounds i8* %tmp3, i64 %tmp1 ; <i8*> [#uses=1]
ret i8* %add.ptr
}
Elimination of the FCA inside the function is still-to-come.
llvm-svn: 107099
2010-06-29 07:44:11 +08:00
|
|
|
}
|
|
|
|
} else {
|
make the argument passing stuff in the FCA case smarter still, by
avoiding making the FCA at all when the types exactly line up. For
example, before we made:
%struct.DeclGroup = type { i64, i64 }
define i64 @_Z3foo9DeclGroup(i64, i64) nounwind {
entry:
%D = alloca %struct.DeclGroup, align 8 ; <%struct.DeclGroup*> [#uses=3]
%2 = insertvalue %struct.DeclGroup undef, i64 %0, 0 ; <%struct.DeclGroup> [#uses=1]
%3 = insertvalue %struct.DeclGroup %2, i64 %1, 1 ; <%struct.DeclGroup> [#uses=1]
store %struct.DeclGroup %3, %struct.DeclGroup* %D
%tmp = getelementptr inbounds %struct.DeclGroup* %D, i32 0, i32 0 ; <i64*> [#uses=1]
%tmp1 = load i64* %tmp ; <i64> [#uses=1]
%tmp2 = getelementptr inbounds %struct.DeclGroup* %D, i32 0, i32 1 ; <i64*> [#uses=1]
%tmp3 = load i64* %tmp2 ; <i64> [#uses=1]
%add = add nsw i64 %tmp1, %tmp3 ; <i64> [#uses=1]
ret i64 %add
}
... which has the pointless insertvalue, which fastisel hates, now we
make:
%struct.DeclGroup = type { i64, i64 }
define i64 @_Z3foo9DeclGroup(i64, i64) nounwind {
entry:
%D = alloca %struct.DeclGroup, align 8 ; <%struct.DeclGroup*> [#uses=4]
%2 = getelementptr %struct.DeclGroup* %D, i32 0, i32 0 ; <i64*> [#uses=1]
store i64 %0, i64* %2
%3 = getelementptr %struct.DeclGroup* %D, i32 0, i32 1 ; <i64*> [#uses=1]
store i64 %1, i64* %3
%tmp = getelementptr inbounds %struct.DeclGroup* %D, i32 0, i32 0 ; <i64*> [#uses=1]
%tmp1 = load i64* %tmp ; <i64> [#uses=1]
%tmp2 = getelementptr inbounds %struct.DeclGroup* %D, i32 0, i32 1 ; <i64*> [#uses=1]
%tmp3 = load i64* %tmp2 ; <i64> [#uses=1]
%add = add nsw i64 %tmp1, %tmp3 ; <i64> [#uses=1]
ret i64 %add
}
This only kicks in when x86-64 abi lowering decides it likes us.
llvm-svn: 107104
2010-06-29 08:06:42 +08:00
|
|
|
// Simple case, just do a coerced store of the argument into the alloca.
|
CGCall: Factor out the logic mapping call arguments to LLVM IR arguments.
Summary:
This refactoring introduces ClangToLLVMArgMapping class, which
encapsulates the information about the order in which function arguments listed
in CGFunctionInfo should be passed to actual LLVM IR function, such as:
1) positions of sret, if there is any
2) position of inalloca argument, if there is any
3) position of helper padding argument for each call argument
4) positions of regular argument (there can be many if it's expanded).
Simplify several related methods (ConstructAttributeList, EmitFunctionProlog
and EmitCall): now they don't have to maintain iterators over the list
of LLVM IR function arguments, dealing with all the sret/inalloca/this complexities,
and just use expected positions of LLVM IR arguments stored in ClangToLLVMArgMapping.
This may increase the running time of EmitFunctionProlog, as we have to traverse
expandable arguments twice, but in further refactoring we will be able
to speed up EmitCall by passing already calculated CallArgsToIRArgsMapping to
ConstructAttributeList, thus avoiding traversing expandable argument there.
No functionality change.
Test Plan: regression test suite
Reviewers: majnemer, rnk
Reviewed By: rnk
Subscribers: cfe-commits, rjmccall, timurrrr
Differential Revision: http://reviews.llvm.org/D4938
llvm-svn: 216251
2014-08-22 09:06:06 +08:00
|
|
|
assert(NumIRArgs == 1);
|
|
|
|
auto AI = FnArgs[FirstIRArg];
|
2010-06-29 08:14:52 +08:00
|
|
|
AI->setName(Arg->getName() + ".coerce");
|
CGCall: Factor out the logic mapping call arguments to LLVM IR arguments.
Summary:
This refactoring introduces ClangToLLVMArgMapping class, which
encapsulates the information about the order in which function arguments listed
in CGFunctionInfo should be passed to actual LLVM IR function, such as:
1) positions of sret, if there is any
2) position of inalloca argument, if there is any
3) position of helper padding argument for each call argument
4) positions of regular argument (there can be many if it's expanded).
Simplify several related methods (ConstructAttributeList, EmitFunctionProlog
and EmitCall): now they don't have to maintain iterators over the list
of LLVM IR function arguments, dealing with all the sret/inalloca/this complexities,
and just use expected positions of LLVM IR arguments stored in ClangToLLVMArgMapping.
This may increase the running time of EmitFunctionProlog, as we have to traverse
expandable arguments twice, but in further refactoring we will be able
to speed up EmitCall by passing already calculated CallArgsToIRArgsMapping to
ConstructAttributeList, thus avoiding traversing expandable argument there.
No functionality change.
Test Plan: regression test suite
Reviewers: majnemer, rnk
Reviewed By: rnk
Subscribers: cfe-commits, rjmccall, timurrrr
Differential Revision: http://reviews.llvm.org/D4938
llvm-svn: 216251
2014-08-22 09:06:06 +08:00
|
|
|
CreateCoercedStore(AI, Ptr, /*DestIsVolatile=*/false, *this);
|
Change CGCall to handle the "coerce" case where the coerce-to type
is a FCA to pass each of the elements as individual scalars. This
produces code fast isel is less likely to reject and is easier on
the optimizers.
For example, before we would compile:
struct DeclGroup { long NumDecls; char * Y; };
char * foo(DeclGroup D) {
return D.NumDecls+D.Y;
}
to:
%struct.DeclGroup = type { i64, i64 }
define i64 @_Z3foo9DeclGroup(%struct.DeclGroup) nounwind {
entry:
%D = alloca %struct.DeclGroup, align 8 ; <%struct.DeclGroup*> [#uses=3]
store %struct.DeclGroup %0, %struct.DeclGroup* %D, align 1
%tmp = getelementptr inbounds %struct.DeclGroup* %D, i32 0, i32 0 ; <i64*> [#uses=1]
%tmp1 = load i64* %tmp ; <i64> [#uses=1]
%tmp2 = getelementptr inbounds %struct.DeclGroup* %D, i32 0, i32 1 ; <i64*> [#uses=1]
%tmp3 = load i64* %tmp2 ; <i64> [#uses=1]
%add = add nsw i64 %tmp1, %tmp3 ; <i64> [#uses=1]
ret i64 %add
}
Now we get:
%0 = type { i64, i64 }
%struct.DeclGroup = type { i64, i8* }
define i8* @_Z3foo9DeclGroup(i64, i64) nounwind {
entry:
%D = alloca %struct.DeclGroup, align 8 ; <%struct.DeclGroup*> [#uses=3]
%2 = insertvalue %0 undef, i64 %0, 0 ; <%0> [#uses=1]
%3 = insertvalue %0 %2, i64 %1, 1 ; <%0> [#uses=1]
%4 = bitcast %struct.DeclGroup* %D to %0* ; <%0*> [#uses=1]
store %0 %3, %0* %4, align 1
%tmp = getelementptr inbounds %struct.DeclGroup* %D, i32 0, i32 0 ; <i64*> [#uses=1]
%tmp1 = load i64* %tmp ; <i64> [#uses=1]
%tmp2 = getelementptr inbounds %struct.DeclGroup* %D, i32 0, i32 1 ; <i8**> [#uses=1]
%tmp3 = load i8** %tmp2 ; <i8*> [#uses=1]
%add.ptr = getelementptr inbounds i8* %tmp3, i64 %tmp1 ; <i8*> [#uses=1]
ret i8* %add.ptr
}
Elimination of the FCA inside the function is still-to-come.
llvm-svn: 107099
2010-06-29 07:44:11 +08:00
|
|
|
}
|
2010-10-19 14:39:39 +08:00
|
|
|
|
|
|
|
|
2009-02-04 03:12:28 +08:00
|
|
|
// Match to what EmitParmDecl is expecting for this type.
|
2013-03-08 05:37:08 +08:00
|
|
|
if (CodeGenFunction::hasScalarEvaluationKind(Ty)) {
|
2013-10-02 10:29:49 +08:00
|
|
|
V = EmitLoadOfScalar(V, false, AlignmentToUse, Ty, Arg->getLocStart());
|
2011-03-09 12:27:21 +08:00
|
|
|
if (isPromoted)
|
|
|
|
V = emitArgumentDemotion(*this, Arg, V);
|
2014-02-01 08:04:45 +08:00
|
|
|
ArgVals.push_back(ValueAndIsPtr(V, HaveValue));
|
|
|
|
} else {
|
|
|
|
ArgVals.push_back(ValueAndIsPtr(V, HavePointer));
|
2009-02-04 15:22:24 +08:00
|
|
|
}
|
CGCall: Factor out the logic mapping call arguments to LLVM IR arguments.
Summary:
This refactoring introduces ClangToLLVMArgMapping class, which
encapsulates the information about the order in which function arguments listed
in CGFunctionInfo should be passed to actual LLVM IR function, such as:
1) positions of sret, if there is any
2) position of inalloca argument, if there is any
3) position of helper padding argument for each call argument
4) positions of regular argument (there can be many if it's expanded).
Simplify several related methods (ConstructAttributeList, EmitFunctionProlog
and EmitCall): now they don't have to maintain iterators over the list
of LLVM IR function arguments, dealing with all the sret/inalloca/this complexities,
and just use expected positions of LLVM IR arguments stored in ClangToLLVMArgMapping.
This may increase the running time of EmitFunctionProlog, as we have to traverse
expandable arguments twice, but in further refactoring we will be able
to speed up EmitCall by passing already calculated CallArgsToIRArgsMapping to
ConstructAttributeList, thus avoiding traversing expandable argument there.
No functionality change.
Test Plan: regression test suite
Reviewers: majnemer, rnk
Reviewed By: rnk
Subscribers: cfe-commits, rjmccall, timurrrr
Differential Revision: http://reviews.llvm.org/D4938
llvm-svn: 216251
2014-08-22 09:06:06 +08:00
|
|
|
break;
|
2009-02-04 03:12:28 +08:00
|
|
|
}
|
Kill off the 'coerce' ABI passing form. Now 'direct' and 'extend' always
have a "coerce to" type which often matches the default lowering of Clang
type to LLVM IR type, but the coerce case can be handled by making them
not be the same.
This simplifies things and fixes issues where X86-64 abi lowering would
return coerce after making preferred types exactly match up. This caused
us to compile:
typedef float v4f32 __attribute__((__vector_size__(16)));
v4f32 foo(v4f32 X) {
return X+X;
}
into this code at -O0:
define <4 x float> @foo(<4 x float> %X.coerce) nounwind {
entry:
%retval = alloca <4 x float>, align 16 ; <<4 x float>*> [#uses=2]
%coerce = alloca <4 x float>, align 16 ; <<4 x float>*> [#uses=2]
%X.addr = alloca <4 x float>, align 16 ; <<4 x float>*> [#uses=3]
store <4 x float> %X.coerce, <4 x float>* %coerce
%X = load <4 x float>* %coerce ; <<4 x float>> [#uses=1]
store <4 x float> %X, <4 x float>* %X.addr
%tmp = load <4 x float>* %X.addr ; <<4 x float>> [#uses=1]
%tmp1 = load <4 x float>* %X.addr ; <<4 x float>> [#uses=1]
%add = fadd <4 x float> %tmp, %tmp1 ; <<4 x float>> [#uses=1]
store <4 x float> %add, <4 x float>* %retval
%0 = load <4 x float>* %retval ; <<4 x float>> [#uses=1]
ret <4 x float> %0
}
Now we get:
define <4 x float> @foo(<4 x float> %X) nounwind {
entry:
%X.addr = alloca <4 x float>, align 16 ; <<4 x float>*> [#uses=3]
store <4 x float> %X, <4 x float>* %X.addr
%tmp = load <4 x float>* %X.addr ; <<4 x float>> [#uses=1]
%tmp1 = load <4 x float>* %X.addr ; <<4 x float>> [#uses=1]
%add = fadd <4 x float> %tmp, %tmp1 ; <<4 x float>> [#uses=1]
ret <4 x float> %add
}
This implements rdar://8248065
llvm-svn: 109733
2010-07-29 14:26:06 +08:00
|
|
|
|
|
|
|
case ABIArgInfo::Expand: {
|
|
|
|
// If this structure was expanded into multiple arguments then
|
|
|
|
// we need to create a temporary and reconstruct it from the
|
|
|
|
// arguments.
|
2011-11-04 05:39:02 +08:00
|
|
|
llvm::AllocaInst *Alloca = CreateMemTemp(Ty);
|
2011-12-03 12:14:32 +08:00
|
|
|
CharUnits Align = getContext().getDeclAlign(Arg);
|
|
|
|
Alloca->setAlignment(Align.getQuantity());
|
|
|
|
LValue LV = MakeAddrLValue(Alloca, Ty, Align);
|
2014-02-01 08:04:45 +08:00
|
|
|
ArgVals.push_back(ValueAndIsPtr(Alloca, HavePointer));
|
Kill off the 'coerce' ABI passing form. Now 'direct' and 'extend' always
have a "coerce to" type which often matches the default lowering of Clang
type to LLVM IR type, but the coerce case can be handled by making them
not be the same.
This simplifies things and fixes issues where X86-64 abi lowering would
return coerce after making preferred types exactly match up. This caused
us to compile:
typedef float v4f32 __attribute__((__vector_size__(16)));
v4f32 foo(v4f32 X) {
return X+X;
}
into this code at -O0:
define <4 x float> @foo(<4 x float> %X.coerce) nounwind {
entry:
%retval = alloca <4 x float>, align 16 ; <<4 x float>*> [#uses=2]
%coerce = alloca <4 x float>, align 16 ; <<4 x float>*> [#uses=2]
%X.addr = alloca <4 x float>, align 16 ; <<4 x float>*> [#uses=3]
store <4 x float> %X.coerce, <4 x float>* %coerce
%X = load <4 x float>* %coerce ; <<4 x float>> [#uses=1]
store <4 x float> %X, <4 x float>* %X.addr
%tmp = load <4 x float>* %X.addr ; <<4 x float>> [#uses=1]
%tmp1 = load <4 x float>* %X.addr ; <<4 x float>> [#uses=1]
%add = fadd <4 x float> %tmp, %tmp1 ; <<4 x float>> [#uses=1]
store <4 x float> %add, <4 x float>* %retval
%0 = load <4 x float>* %retval ; <<4 x float>> [#uses=1]
ret <4 x float> %0
}
Now we get:
define <4 x float> @foo(<4 x float> %X) nounwind {
entry:
%X.addr = alloca <4 x float>, align 16 ; <<4 x float>*> [#uses=3]
store <4 x float> %X, <4 x float>* %X.addr
%tmp = load <4 x float>* %X.addr ; <<4 x float>> [#uses=1]
%tmp1 = load <4 x float>* %X.addr ; <<4 x float>> [#uses=1]
%add = fadd <4 x float> %tmp, %tmp1 ; <<4 x float>> [#uses=1]
ret <4 x float> %add
}
This implements rdar://8248065
llvm-svn: 109733
2010-07-29 14:26:06 +08:00
|
|
|
|
CGCall: Factor out the logic mapping call arguments to LLVM IR arguments.
Summary:
This refactoring introduces ClangToLLVMArgMapping class, which
encapsulates the information about the order in which function arguments listed
in CGFunctionInfo should be passed to actual LLVM IR function, such as:
1) positions of sret, if there is any
2) position of inalloca argument, if there is any
3) position of helper padding argument for each call argument
4) positions of regular argument (there can be many if it's expanded).
Simplify several related methods (ConstructAttributeList, EmitFunctionProlog
and EmitCall): now they don't have to maintain iterators over the list
of LLVM IR function arguments, dealing with all the sret/inalloca/this complexities,
and just use expected positions of LLVM IR arguments stored in ClangToLLVMArgMapping.
This may increase the running time of EmitFunctionProlog, as we have to traverse
expandable arguments twice, but in further refactoring we will be able
to speed up EmitCall by passing already calculated CallArgsToIRArgsMapping to
ConstructAttributeList, thus avoiding traversing expandable argument there.
No functionality change.
Test Plan: regression test suite
Reviewers: majnemer, rnk
Reviewed By: rnk
Subscribers: cfe-commits, rjmccall, timurrrr
Differential Revision: http://reviews.llvm.org/D4938
llvm-svn: 216251
2014-08-22 09:06:06 +08:00
|
|
|
auto FnArgIter = FnArgs.begin() + FirstIRArg;
|
|
|
|
ExpandTypeFromArgs(Ty, LV, FnArgIter);
|
|
|
|
assert(FnArgIter == FnArgs.begin() + FirstIRArg + NumIRArgs);
|
|
|
|
for (unsigned i = 0, e = NumIRArgs; i != e; ++i) {
|
|
|
|
auto AI = FnArgs[FirstIRArg + i];
|
|
|
|
AI->setName(Arg->getName() + "." + Twine(i));
|
|
|
|
}
|
|
|
|
break;
|
Kill off the 'coerce' ABI passing form. Now 'direct' and 'extend' always
have a "coerce to" type which often matches the default lowering of Clang
type to LLVM IR type, but the coerce case can be handled by making them
not be the same.
This simplifies things and fixes issues where X86-64 abi lowering would
return coerce after making preferred types exactly match up. This caused
us to compile:
typedef float v4f32 __attribute__((__vector_size__(16)));
v4f32 foo(v4f32 X) {
return X+X;
}
into this code at -O0:
define <4 x float> @foo(<4 x float> %X.coerce) nounwind {
entry:
%retval = alloca <4 x float>, align 16 ; <<4 x float>*> [#uses=2]
%coerce = alloca <4 x float>, align 16 ; <<4 x float>*> [#uses=2]
%X.addr = alloca <4 x float>, align 16 ; <<4 x float>*> [#uses=3]
store <4 x float> %X.coerce, <4 x float>* %coerce
%X = load <4 x float>* %coerce ; <<4 x float>> [#uses=1]
store <4 x float> %X, <4 x float>* %X.addr
%tmp = load <4 x float>* %X.addr ; <<4 x float>> [#uses=1]
%tmp1 = load <4 x float>* %X.addr ; <<4 x float>> [#uses=1]
%add = fadd <4 x float> %tmp, %tmp1 ; <<4 x float>> [#uses=1]
store <4 x float> %add, <4 x float>* %retval
%0 = load <4 x float>* %retval ; <<4 x float>> [#uses=1]
ret <4 x float> %0
}
Now we get:
define <4 x float> @foo(<4 x float> %X) nounwind {
entry:
%X.addr = alloca <4 x float>, align 16 ; <<4 x float>*> [#uses=3]
store <4 x float> %X, <4 x float>* %X.addr
%tmp = load <4 x float>* %X.addr ; <<4 x float>> [#uses=1]
%tmp1 = load <4 x float>* %X.addr ; <<4 x float>> [#uses=1]
%add = fadd <4 x float> %tmp, %tmp1 ; <<4 x float>> [#uses=1]
ret <4 x float> %add
}
This implements rdar://8248065
llvm-svn: 109733
2010-07-29 14:26:06 +08:00
|
|
|
}
|
|
|
|
|
|
|
|
case ABIArgInfo::Ignore:
|
CGCall: Factor out the logic mapping call arguments to LLVM IR arguments.
Summary:
This refactoring introduces ClangToLLVMArgMapping class, which
encapsulates the information about the order in which function arguments listed
in CGFunctionInfo should be passed to actual LLVM IR function, such as:
1) positions of sret, if there is any
2) position of inalloca argument, if there is any
3) position of helper padding argument for each call argument
4) positions of regular argument (there can be many if it's expanded).
Simplify several related methods (ConstructAttributeList, EmitFunctionProlog
and EmitCall): now they don't have to maintain iterators over the list
of LLVM IR function arguments, dealing with all the sret/inalloca/this complexities,
and just use expected positions of LLVM IR arguments stored in ClangToLLVMArgMapping.
This may increase the running time of EmitFunctionProlog, as we have to traverse
expandable arguments twice, but in further refactoring we will be able
to speed up EmitCall by passing already calculated CallArgsToIRArgsMapping to
ConstructAttributeList, thus avoiding traversing expandable argument there.
No functionality change.
Test Plan: regression test suite
Reviewers: majnemer, rnk
Reviewed By: rnk
Subscribers: cfe-commits, rjmccall, timurrrr
Differential Revision: http://reviews.llvm.org/D4938
llvm-svn: 216251
2014-08-22 09:06:06 +08:00
|
|
|
assert(NumIRArgs == 0);
|
Kill off the 'coerce' ABI passing form. Now 'direct' and 'extend' always
have a "coerce to" type which often matches the default lowering of Clang
type to LLVM IR type, but the coerce case can be handled by making them
not be the same.
This simplifies things and fixes issues where X86-64 abi lowering would
return coerce after making preferred types exactly match up. This caused
us to compile:
typedef float v4f32 __attribute__((__vector_size__(16)));
v4f32 foo(v4f32 X) {
return X+X;
}
into this code at -O0:
define <4 x float> @foo(<4 x float> %X.coerce) nounwind {
entry:
%retval = alloca <4 x float>, align 16 ; <<4 x float>*> [#uses=2]
%coerce = alloca <4 x float>, align 16 ; <<4 x float>*> [#uses=2]
%X.addr = alloca <4 x float>, align 16 ; <<4 x float>*> [#uses=3]
store <4 x float> %X.coerce, <4 x float>* %coerce
%X = load <4 x float>* %coerce ; <<4 x float>> [#uses=1]
store <4 x float> %X, <4 x float>* %X.addr
%tmp = load <4 x float>* %X.addr ; <<4 x float>> [#uses=1]
%tmp1 = load <4 x float>* %X.addr ; <<4 x float>> [#uses=1]
%add = fadd <4 x float> %tmp, %tmp1 ; <<4 x float>> [#uses=1]
store <4 x float> %add, <4 x float>* %retval
%0 = load <4 x float>* %retval ; <<4 x float>> [#uses=1]
ret <4 x float> %0
}
Now we get:
define <4 x float> @foo(<4 x float> %X) nounwind {
entry:
%X.addr = alloca <4 x float>, align 16 ; <<4 x float>*> [#uses=3]
store <4 x float> %X, <4 x float>* %X.addr
%tmp = load <4 x float>* %X.addr ; <<4 x float>> [#uses=1]
%tmp1 = load <4 x float>* %X.addr ; <<4 x float>> [#uses=1]
%add = fadd <4 x float> %tmp, %tmp1 ; <<4 x float>> [#uses=1]
ret <4 x float> %add
}
This implements rdar://8248065
llvm-svn: 109733
2010-07-29 14:26:06 +08:00
|
|
|
// Initialize the local variable appropriately.
|
2014-02-01 08:04:45 +08:00
|
|
|
if (!hasScalarEvaluationKind(Ty)) {
|
|
|
|
ArgVals.push_back(ValueAndIsPtr(CreateMemTemp(Ty), HavePointer));
|
|
|
|
} else {
|
|
|
|
llvm::Value *U = llvm::UndefValue::get(ConvertType(Arg->getType()));
|
|
|
|
ArgVals.push_back(ValueAndIsPtr(U, HaveValue));
|
|
|
|
}
|
CGCall: Factor out the logic mapping call arguments to LLVM IR arguments.
Summary:
This refactoring introduces ClangToLLVMArgMapping class, which
encapsulates the information about the order in which function arguments listed
in CGFunctionInfo should be passed to actual LLVM IR function, such as:
1) positions of sret, if there is any
2) position of inalloca argument, if there is any
3) position of helper padding argument for each call argument
4) positions of regular argument (there can be many if it's expanded).
Simplify several related methods (ConstructAttributeList, EmitFunctionProlog
and EmitCall): now they don't have to maintain iterators over the list
of LLVM IR function arguments, dealing with all the sret/inalloca/this complexities,
and just use expected positions of LLVM IR arguments stored in ClangToLLVMArgMapping.
This may increase the running time of EmitFunctionProlog, as we have to traverse
expandable arguments twice, but in further refactoring we will be able
to speed up EmitCall by passing already calculated CallArgsToIRArgsMapping to
ConstructAttributeList, thus avoiding traversing expandable argument there.
No functionality change.
Test Plan: regression test suite
Reviewers: majnemer, rnk
Reviewed By: rnk
Subscribers: cfe-commits, rjmccall, timurrrr
Differential Revision: http://reviews.llvm.org/D4938
llvm-svn: 216251
2014-08-22 09:06:06 +08:00
|
|
|
break;
|
2008-09-11 09:48:57 +08:00
|
|
|
}
|
2008-09-10 07:27:19 +08:00
|
|
|
}
|
2014-02-01 08:04:45 +08:00
|
|
|
|
2013-12-05 03:23:12 +08:00
|
|
|
if (getTarget().getCXXABI().areArgsDestroyedLeftToRightInCallee()) {
|
|
|
|
for (int I = Args.size() - 1; I >= 0; --I)
|
2014-02-01 08:04:45 +08:00
|
|
|
EmitParmDecl(*Args[I], ArgVals[I].getPointer(), ArgVals[I].getInt(),
|
|
|
|
I + 1);
|
2013-12-05 03:23:12 +08:00
|
|
|
} else {
|
|
|
|
for (unsigned I = 0, E = Args.size(); I != E; ++I)
|
2014-02-01 08:04:45 +08:00
|
|
|
EmitParmDecl(*Args[I], ArgVals[I].getPointer(), ArgVals[I].getInt(),
|
|
|
|
I + 1);
|
2013-12-05 03:23:12 +08:00
|
|
|
}
|
2008-09-10 07:27:19 +08:00
|
|
|
}
|
|
|
|
|
2012-01-29 15:46:59 +08:00
|
|
|
static void eraseUnusedBitCasts(llvm::Instruction *insn) {
|
|
|
|
while (insn->use_empty()) {
|
|
|
|
llvm::BitCastInst *bitcast = dyn_cast<llvm::BitCastInst>(insn);
|
|
|
|
if (!bitcast) return;
|
|
|
|
|
|
|
|
// This is "safe" because we would have used a ConstantExpr otherwise.
|
|
|
|
insn = cast<llvm::Instruction>(bitcast->getOperand(0));
|
|
|
|
bitcast->eraseFromParent();
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2011-06-16 07:02:42 +08:00
|
|
|
/// Try to emit a fused autorelease of a return result.
|
|
|
|
static llvm::Value *tryEmitFusedAutoreleaseOfResult(CodeGenFunction &CGF,
|
|
|
|
llvm::Value *result) {
|
|
|
|
// We must be immediately followed the cast.
|
|
|
|
llvm::BasicBlock *BB = CGF.Builder.GetInsertBlock();
|
2014-05-21 13:09:00 +08:00
|
|
|
if (BB->empty()) return nullptr;
|
|
|
|
if (&BB->back() != result) return nullptr;
|
2011-06-16 07:02:42 +08:00
|
|
|
|
2011-07-18 12:24:23 +08:00
|
|
|
llvm::Type *resultType = result->getType();
|
2011-06-16 07:02:42 +08:00
|
|
|
|
|
|
|
// result is in a BasicBlock and is therefore an Instruction.
|
|
|
|
llvm::Instruction *generator = cast<llvm::Instruction>(result);
|
|
|
|
|
2011-07-23 18:55:15 +08:00
|
|
|
SmallVector<llvm::Instruction*,4> insnsToKill;
|
2011-06-16 07:02:42 +08:00
|
|
|
|
|
|
|
// Look for:
|
|
|
|
// %generator = bitcast %type1* %generator2 to %type2*
|
|
|
|
while (llvm::BitCastInst *bitcast = dyn_cast<llvm::BitCastInst>(generator)) {
|
|
|
|
// We would have emitted this as a constant if the operand weren't
|
|
|
|
// an Instruction.
|
|
|
|
generator = cast<llvm::Instruction>(bitcast->getOperand(0));
|
|
|
|
|
|
|
|
// Require the generator to be immediately followed by the cast.
|
|
|
|
if (generator->getNextNode() != bitcast)
|
2014-05-21 13:09:00 +08:00
|
|
|
return nullptr;
|
2011-06-16 07:02:42 +08:00
|
|
|
|
|
|
|
insnsToKill.push_back(bitcast);
|
|
|
|
}
|
|
|
|
|
|
|
|
// Look for:
|
|
|
|
// %generator = call i8* @objc_retain(i8* %originalResult)
|
|
|
|
// or
|
|
|
|
// %generator = call i8* @objc_retainAutoreleasedReturnValue(i8* %originalResult)
|
|
|
|
llvm::CallInst *call = dyn_cast<llvm::CallInst>(generator);
|
2014-05-21 13:09:00 +08:00
|
|
|
if (!call) return nullptr;
|
2011-06-16 07:02:42 +08:00
|
|
|
|
|
|
|
bool doRetainAutorelease;
|
|
|
|
|
|
|
|
if (call->getCalledValue() == CGF.CGM.getARCEntrypoints().objc_retain) {
|
|
|
|
doRetainAutorelease = true;
|
|
|
|
} else if (call->getCalledValue() == CGF.CGM.getARCEntrypoints()
|
|
|
|
.objc_retainAutoreleasedReturnValue) {
|
|
|
|
doRetainAutorelease = false;
|
|
|
|
|
2012-09-08 07:30:50 +08:00
|
|
|
// If we emitted an assembly marker for this call (and the
|
|
|
|
// ARCEntrypoints field should have been set if so), go looking
|
|
|
|
// for that call. If we can't find it, we can't do this
|
|
|
|
// optimization. But it should always be the immediately previous
|
|
|
|
// instruction, unless we needed bitcasts around the call.
|
|
|
|
if (CGF.CGM.getARCEntrypoints().retainAutoreleasedReturnValueMarker) {
|
|
|
|
llvm::Instruction *prev = call->getPrevNode();
|
|
|
|
assert(prev);
|
|
|
|
if (isa<llvm::BitCastInst>(prev)) {
|
|
|
|
prev = prev->getPrevNode();
|
|
|
|
assert(prev);
|
|
|
|
}
|
|
|
|
assert(isa<llvm::CallInst>(prev));
|
|
|
|
assert(cast<llvm::CallInst>(prev)->getCalledValue() ==
|
|
|
|
CGF.CGM.getARCEntrypoints().retainAutoreleasedReturnValueMarker);
|
|
|
|
insnsToKill.push_back(prev);
|
|
|
|
}
|
2011-06-16 07:02:42 +08:00
|
|
|
} else {
|
2014-05-21 13:09:00 +08:00
|
|
|
return nullptr;
|
2011-06-16 07:02:42 +08:00
|
|
|
}
|
|
|
|
|
|
|
|
result = call->getArgOperand(0);
|
|
|
|
insnsToKill.push_back(call);
|
|
|
|
|
|
|
|
// Keep killing bitcasts, for sanity. Note that we no longer care
|
|
|
|
// about precise ordering as long as there's exactly one use.
|
|
|
|
while (llvm::BitCastInst *bitcast = dyn_cast<llvm::BitCastInst>(result)) {
|
|
|
|
if (!bitcast->hasOneUse()) break;
|
|
|
|
insnsToKill.push_back(bitcast);
|
|
|
|
result = bitcast->getOperand(0);
|
|
|
|
}
|
|
|
|
|
|
|
|
// Delete all the unnecessary instructions, from latest to earliest.
|
2011-07-23 18:55:15 +08:00
|
|
|
for (SmallVectorImpl<llvm::Instruction*>::iterator
|
2011-06-16 07:02:42 +08:00
|
|
|
i = insnsToKill.begin(), e = insnsToKill.end(); i != e; ++i)
|
|
|
|
(*i)->eraseFromParent();
|
|
|
|
|
|
|
|
// Do the fused retain/autorelease if we were asked to.
|
|
|
|
if (doRetainAutorelease)
|
|
|
|
result = CGF.EmitARCRetainAutoreleaseReturnValue(result);
|
|
|
|
|
|
|
|
// Cast back to the result type.
|
|
|
|
return CGF.Builder.CreateBitCast(result, resultType);
|
|
|
|
}
|
|
|
|
|
2012-01-29 15:46:59 +08:00
|
|
|
/// If this is a +1 of the value of an immutable 'self', remove it.
|
|
|
|
static llvm::Value *tryRemoveRetainOfSelf(CodeGenFunction &CGF,
|
|
|
|
llvm::Value *result) {
|
|
|
|
// This is only applicable to a method with an immutable 'self'.
|
2012-07-31 08:33:55 +08:00
|
|
|
const ObjCMethodDecl *method =
|
|
|
|
dyn_cast_or_null<ObjCMethodDecl>(CGF.CurCodeDecl);
|
2014-05-21 13:09:00 +08:00
|
|
|
if (!method) return nullptr;
|
2012-01-29 15:46:59 +08:00
|
|
|
const VarDecl *self = method->getSelfDecl();
|
2014-05-21 13:09:00 +08:00
|
|
|
if (!self->getType().isConstQualified()) return nullptr;
|
2012-01-29 15:46:59 +08:00
|
|
|
|
|
|
|
// Look for a retain call.
|
|
|
|
llvm::CallInst *retainCall =
|
|
|
|
dyn_cast<llvm::CallInst>(result->stripPointerCasts());
|
|
|
|
if (!retainCall ||
|
|
|
|
retainCall->getCalledValue() != CGF.CGM.getARCEntrypoints().objc_retain)
|
2014-05-21 13:09:00 +08:00
|
|
|
return nullptr;
|
2012-01-29 15:46:59 +08:00
|
|
|
|
|
|
|
// Look for an ordinary load of 'self'.
|
|
|
|
llvm::Value *retainedValue = retainCall->getArgOperand(0);
|
|
|
|
llvm::LoadInst *load =
|
|
|
|
dyn_cast<llvm::LoadInst>(retainedValue->stripPointerCasts());
|
|
|
|
if (!load || load->isAtomic() || load->isVolatile() ||
|
|
|
|
load->getPointerOperand() != CGF.GetAddrOfLocalVar(self))
|
2014-05-21 13:09:00 +08:00
|
|
|
return nullptr;
|
2012-01-29 15:46:59 +08:00
|
|
|
|
|
|
|
// Okay! Burn it all down. This relies for correctness on the
|
|
|
|
// assumption that the retain is emitted as part of the return and
|
|
|
|
// that thereafter everything is used "linearly".
|
|
|
|
llvm::Type *resultType = result->getType();
|
|
|
|
eraseUnusedBitCasts(cast<llvm::Instruction>(result));
|
|
|
|
assert(retainCall->use_empty());
|
|
|
|
retainCall->eraseFromParent();
|
|
|
|
eraseUnusedBitCasts(cast<llvm::Instruction>(retainedValue));
|
|
|
|
|
|
|
|
return CGF.Builder.CreateBitCast(load, resultType);
|
|
|
|
}
|
|
|
|
|
2011-06-16 07:02:42 +08:00
|
|
|
/// Emit an ARC autorelease of the result of a function.
|
2012-01-29 15:46:59 +08:00
|
|
|
///
|
|
|
|
/// \return the value to actually return from the function
|
2011-06-16 07:02:42 +08:00
|
|
|
static llvm::Value *emitAutoreleaseOfResult(CodeGenFunction &CGF,
|
|
|
|
llvm::Value *result) {
|
2012-01-29 15:46:59 +08:00
|
|
|
// If we're returning 'self', kill the initial retain. This is a
|
|
|
|
// heuristic attempt to "encourage correctness" in the really unfortunate
|
|
|
|
// case where we have a return of self during a dealloc and we desperately
|
|
|
|
// need to avoid the possible autorelease.
|
|
|
|
if (llvm::Value *self = tryRemoveRetainOfSelf(CGF, result))
|
|
|
|
return self;
|
|
|
|
|
2011-06-16 07:02:42 +08:00
|
|
|
// At -O0, try to emit a fused retain/autorelease.
|
|
|
|
if (CGF.shouldUseFusedARCCalls())
|
|
|
|
if (llvm::Value *fused = tryEmitFusedAutoreleaseOfResult(CGF, result))
|
|
|
|
return fused;
|
|
|
|
|
|
|
|
return CGF.EmitARCAutoreleaseReturnValue(result);
|
|
|
|
}
|
|
|
|
|
2012-01-29 10:35:02 +08:00
|
|
|
/// Heuristically search for a dominating store to the return-value slot.
|
|
|
|
static llvm::StoreInst *findDominatingStoreToReturnValue(CodeGenFunction &CGF) {
|
|
|
|
// If there are multiple uses of the return-value slot, just check
|
|
|
|
// for something immediately preceding the IP. Sometimes this can
|
|
|
|
// happen with how we generate implicit-returns; it can also happen
|
|
|
|
// with noreturn cleanups.
|
|
|
|
if (!CGF.ReturnValue->hasOneUse()) {
|
|
|
|
llvm::BasicBlock *IP = CGF.Builder.GetInsertBlock();
|
2014-05-21 13:09:00 +08:00
|
|
|
if (IP->empty()) return nullptr;
|
2012-01-29 10:35:02 +08:00
|
|
|
llvm::StoreInst *store = dyn_cast<llvm::StoreInst>(&IP->back());
|
2014-05-21 13:09:00 +08:00
|
|
|
if (!store) return nullptr;
|
|
|
|
if (store->getPointerOperand() != CGF.ReturnValue) return nullptr;
|
2012-01-29 10:35:02 +08:00
|
|
|
assert(!store->isAtomic() && !store->isVolatile()); // see below
|
|
|
|
return store;
|
|
|
|
}
|
|
|
|
|
|
|
|
llvm::StoreInst *store =
|
2014-03-09 11:16:50 +08:00
|
|
|
dyn_cast<llvm::StoreInst>(CGF.ReturnValue->user_back());
|
2014-05-21 13:09:00 +08:00
|
|
|
if (!store) return nullptr;
|
2012-01-29 10:35:02 +08:00
|
|
|
|
|
|
|
// These aren't actually possible for non-coerced returns, and we
|
|
|
|
// only care about non-coerced returns on this code path.
|
|
|
|
assert(!store->isAtomic() && !store->isVolatile());
|
|
|
|
|
|
|
|
// Now do a first-and-dirty dominance check: just walk up the
|
|
|
|
// single-predecessors chain from the current insertion point.
|
|
|
|
llvm::BasicBlock *StoreBB = store->getParent();
|
|
|
|
llvm::BasicBlock *IP = CGF.Builder.GetInsertBlock();
|
|
|
|
while (IP != StoreBB) {
|
|
|
|
if (!(IP = IP->getSinglePredecessor()))
|
2014-05-21 13:09:00 +08:00
|
|
|
return nullptr;
|
2012-01-29 10:35:02 +08:00
|
|
|
}
|
|
|
|
|
|
|
|
// Okay, the store's basic block dominates the insertion point; we
|
|
|
|
// can do our thing.
|
|
|
|
return store;
|
|
|
|
}
|
|
|
|
|
2013-05-03 01:30:20 +08:00
|
|
|
void CodeGenFunction::EmitFunctionEpilog(const CGFunctionInfo &FI,
|
2013-10-02 10:29:49 +08:00
|
|
|
bool EmitRetDbgLoc,
|
|
|
|
SourceLocation EndLoc) {
|
2014-09-05 06:16:33 +08:00
|
|
|
if (CurCodeDecl && CurCodeDecl->hasAttr<NakedAttr>()) {
|
|
|
|
// Naked functions don't have epilogues.
|
|
|
|
Builder.CreateUnreachable();
|
|
|
|
return;
|
|
|
|
}
|
|
|
|
|
2008-09-10 10:41:04 +08:00
|
|
|
// Functions with no result always return void.
|
2014-05-21 13:09:00 +08:00
|
|
|
if (!ReturnValue) {
|
2010-06-27 07:13:19 +08:00
|
|
|
Builder.CreateRetVoid();
|
|
|
|
return;
|
|
|
|
}
|
2010-07-01 05:27:58 +08:00
|
|
|
|
2010-07-21 04:13:52 +08:00
|
|
|
llvm::DebugLoc RetDbgLoc;
|
2014-05-21 13:09:00 +08:00
|
|
|
llvm::Value *RV = nullptr;
|
2010-06-27 07:13:19 +08:00
|
|
|
QualType RetTy = FI.getReturnType();
|
|
|
|
const ABIArgInfo &RetAI = FI.getReturnInfo();
|
2009-06-06 17:36:29 +08:00
|
|
|
|
2010-06-27 07:13:19 +08:00
|
|
|
switch (RetAI.getKind()) {
|
2014-02-01 08:04:45 +08:00
|
|
|
case ABIArgInfo::InAlloca:
|
2014-02-25 08:59:14 +08:00
|
|
|
// Aggregrates get evaluated directly into the destination. Sometimes we
|
|
|
|
// need to return the sret value in a register, though.
|
|
|
|
assert(hasAggregateEvaluationKind(RetTy));
|
|
|
|
if (RetAI.getInAllocaSRet()) {
|
|
|
|
llvm::Function::arg_iterator EI = CurFn->arg_end();
|
|
|
|
--EI;
|
|
|
|
llvm::Value *ArgStruct = EI;
|
|
|
|
llvm::Value *SRet =
|
|
|
|
Builder.CreateStructGEP(ArgStruct, RetAI.getInAllocaFieldIndex());
|
|
|
|
RV = Builder.CreateLoad(SRet, "sret");
|
|
|
|
}
|
2014-02-01 08:04:45 +08:00
|
|
|
break;
|
|
|
|
|
2010-08-21 10:24:36 +08:00
|
|
|
case ABIArgInfo::Indirect: {
|
2014-05-10 06:46:15 +08:00
|
|
|
auto AI = CurFn->arg_begin();
|
|
|
|
if (RetAI.isSRetAfterThis())
|
|
|
|
++AI;
|
2013-03-08 05:37:08 +08:00
|
|
|
switch (getEvaluationKind(RetTy)) {
|
|
|
|
case TEK_Complex: {
|
|
|
|
ComplexPairTy RT =
|
2013-10-02 10:29:49 +08:00
|
|
|
EmitLoadOfComplex(MakeNaturalAlignAddrLValue(ReturnValue, RetTy),
|
|
|
|
EndLoc);
|
2014-05-10 06:46:15 +08:00
|
|
|
EmitStoreOfComplex(RT, MakeNaturalAlignAddrLValue(AI, RetTy),
|
2013-03-08 05:37:08 +08:00
|
|
|
/*isInit*/ true);
|
|
|
|
break;
|
|
|
|
}
|
|
|
|
case TEK_Aggregate:
|
2010-06-27 07:13:19 +08:00
|
|
|
// Do nothing; aggregrates get evaluated directly into the destination.
|
2013-03-08 05:37:08 +08:00
|
|
|
break;
|
|
|
|
case TEK_Scalar:
|
|
|
|
EmitStoreOfScalar(Builder.CreateLoad(ReturnValue),
|
2014-05-10 06:46:15 +08:00
|
|
|
MakeNaturalAlignAddrLValue(AI, RetTy),
|
2013-03-08 05:37:08 +08:00
|
|
|
/*isInit*/ true);
|
|
|
|
break;
|
2010-06-27 07:13:19 +08:00
|
|
|
}
|
|
|
|
break;
|
2010-08-21 10:24:36 +08:00
|
|
|
}
|
2008-09-11 09:48:57 +08:00
|
|
|
|
2010-06-27 07:13:19 +08:00
|
|
|
case ABIArgInfo::Extend:
|
Kill off the 'coerce' ABI passing form. Now 'direct' and 'extend' always
have a "coerce to" type which often matches the default lowering of Clang
type to LLVM IR type, but the coerce case can be handled by making them
not be the same.
This simplifies things and fixes issues where X86-64 abi lowering would
return coerce after making preferred types exactly match up. This caused
us to compile:
typedef float v4f32 __attribute__((__vector_size__(16)));
v4f32 foo(v4f32 X) {
return X+X;
}
into this code at -O0:
define <4 x float> @foo(<4 x float> %X.coerce) nounwind {
entry:
%retval = alloca <4 x float>, align 16 ; <<4 x float>*> [#uses=2]
%coerce = alloca <4 x float>, align 16 ; <<4 x float>*> [#uses=2]
%X.addr = alloca <4 x float>, align 16 ; <<4 x float>*> [#uses=3]
store <4 x float> %X.coerce, <4 x float>* %coerce
%X = load <4 x float>* %coerce ; <<4 x float>> [#uses=1]
store <4 x float> %X, <4 x float>* %X.addr
%tmp = load <4 x float>* %X.addr ; <<4 x float>> [#uses=1]
%tmp1 = load <4 x float>* %X.addr ; <<4 x float>> [#uses=1]
%add = fadd <4 x float> %tmp, %tmp1 ; <<4 x float>> [#uses=1]
store <4 x float> %add, <4 x float>* %retval
%0 = load <4 x float>* %retval ; <<4 x float>> [#uses=1]
ret <4 x float> %0
}
Now we get:
define <4 x float> @foo(<4 x float> %X) nounwind {
entry:
%X.addr = alloca <4 x float>, align 16 ; <<4 x float>*> [#uses=3]
store <4 x float> %X, <4 x float>* %X.addr
%tmp = load <4 x float>* %X.addr ; <<4 x float>> [#uses=1]
%tmp1 = load <4 x float>* %X.addr ; <<4 x float>> [#uses=1]
%add = fadd <4 x float> %tmp, %tmp1 ; <<4 x float>> [#uses=1]
ret <4 x float> %add
}
This implements rdar://8248065
llvm-svn: 109733
2010-07-29 14:26:06 +08:00
|
|
|
case ABIArgInfo::Direct:
|
2010-07-30 12:02:24 +08:00
|
|
|
if (RetAI.getCoerceToType() == ConvertType(RetTy) &&
|
|
|
|
RetAI.getDirectOffset() == 0) {
|
Kill off the 'coerce' ABI passing form. Now 'direct' and 'extend' always
have a "coerce to" type which often matches the default lowering of Clang
type to LLVM IR type, but the coerce case can be handled by making them
not be the same.
This simplifies things and fixes issues where X86-64 abi lowering would
return coerce after making preferred types exactly match up. This caused
us to compile:
typedef float v4f32 __attribute__((__vector_size__(16)));
v4f32 foo(v4f32 X) {
return X+X;
}
into this code at -O0:
define <4 x float> @foo(<4 x float> %X.coerce) nounwind {
entry:
%retval = alloca <4 x float>, align 16 ; <<4 x float>*> [#uses=2]
%coerce = alloca <4 x float>, align 16 ; <<4 x float>*> [#uses=2]
%X.addr = alloca <4 x float>, align 16 ; <<4 x float>*> [#uses=3]
store <4 x float> %X.coerce, <4 x float>* %coerce
%X = load <4 x float>* %coerce ; <<4 x float>> [#uses=1]
store <4 x float> %X, <4 x float>* %X.addr
%tmp = load <4 x float>* %X.addr ; <<4 x float>> [#uses=1]
%tmp1 = load <4 x float>* %X.addr ; <<4 x float>> [#uses=1]
%add = fadd <4 x float> %tmp, %tmp1 ; <<4 x float>> [#uses=1]
store <4 x float> %add, <4 x float>* %retval
%0 = load <4 x float>* %retval ; <<4 x float>> [#uses=1]
ret <4 x float> %0
}
Now we get:
define <4 x float> @foo(<4 x float> %X) nounwind {
entry:
%X.addr = alloca <4 x float>, align 16 ; <<4 x float>*> [#uses=3]
store <4 x float> %X, <4 x float>* %X.addr
%tmp = load <4 x float>* %X.addr ; <<4 x float>> [#uses=1]
%tmp1 = load <4 x float>* %X.addr ; <<4 x float>> [#uses=1]
%add = fadd <4 x float> %tmp, %tmp1 ; <<4 x float>> [#uses=1]
ret <4 x float> %add
}
This implements rdar://8248065
llvm-svn: 109733
2010-07-29 14:26:06 +08:00
|
|
|
// The internal return value temp always will have pointer-to-return-type
|
|
|
|
// type, just do a load.
|
2010-10-19 14:39:39 +08:00
|
|
|
|
2012-01-29 10:35:02 +08:00
|
|
|
// If there is a dominating store to ReturnValue, we can elide
|
|
|
|
// the load, zap the store, and usually zap the alloca.
|
|
|
|
if (llvm::StoreInst *SI = findDominatingStoreToReturnValue(*this)) {
|
2013-05-31 02:12:23 +08:00
|
|
|
// Reuse the debug location from the store unless there is
|
|
|
|
// cleanup code to be emitted between the store and return
|
|
|
|
// instruction.
|
|
|
|
if (EmitRetDbgLoc && !AutoreleaseResult)
|
2013-05-03 01:30:20 +08:00
|
|
|
RetDbgLoc = SI->getDebugLoc();
|
Kill off the 'coerce' ABI passing form. Now 'direct' and 'extend' always
have a "coerce to" type which often matches the default lowering of Clang
type to LLVM IR type, but the coerce case can be handled by making them
not be the same.
This simplifies things and fixes issues where X86-64 abi lowering would
return coerce after making preferred types exactly match up. This caused
us to compile:
typedef float v4f32 __attribute__((__vector_size__(16)));
v4f32 foo(v4f32 X) {
return X+X;
}
into this code at -O0:
define <4 x float> @foo(<4 x float> %X.coerce) nounwind {
entry:
%retval = alloca <4 x float>, align 16 ; <<4 x float>*> [#uses=2]
%coerce = alloca <4 x float>, align 16 ; <<4 x float>*> [#uses=2]
%X.addr = alloca <4 x float>, align 16 ; <<4 x float>*> [#uses=3]
store <4 x float> %X.coerce, <4 x float>* %coerce
%X = load <4 x float>* %coerce ; <<4 x float>> [#uses=1]
store <4 x float> %X, <4 x float>* %X.addr
%tmp = load <4 x float>* %X.addr ; <<4 x float>> [#uses=1]
%tmp1 = load <4 x float>* %X.addr ; <<4 x float>> [#uses=1]
%add = fadd <4 x float> %tmp, %tmp1 ; <<4 x float>> [#uses=1]
store <4 x float> %add, <4 x float>* %retval
%0 = load <4 x float>* %retval ; <<4 x float>> [#uses=1]
ret <4 x float> %0
}
Now we get:
define <4 x float> @foo(<4 x float> %X) nounwind {
entry:
%X.addr = alloca <4 x float>, align 16 ; <<4 x float>*> [#uses=3]
store <4 x float> %X, <4 x float>* %X.addr
%tmp = load <4 x float>* %X.addr ; <<4 x float>> [#uses=1]
%tmp1 = load <4 x float>* %X.addr ; <<4 x float>> [#uses=1]
%add = fadd <4 x float> %tmp, %tmp1 ; <<4 x float>> [#uses=1]
ret <4 x float> %add
}
This implements rdar://8248065
llvm-svn: 109733
2010-07-29 14:26:06 +08:00
|
|
|
// Get the stored value and nuke the now-dead store.
|
|
|
|
RV = SI->getValueOperand();
|
|
|
|
SI->eraseFromParent();
|
2010-10-19 14:39:39 +08:00
|
|
|
|
Kill off the 'coerce' ABI passing form. Now 'direct' and 'extend' always
have a "coerce to" type which often matches the default lowering of Clang
type to LLVM IR type, but the coerce case can be handled by making them
not be the same.
This simplifies things and fixes issues where X86-64 abi lowering would
return coerce after making preferred types exactly match up. This caused
us to compile:
typedef float v4f32 __attribute__((__vector_size__(16)));
v4f32 foo(v4f32 X) {
return X+X;
}
into this code at -O0:
define <4 x float> @foo(<4 x float> %X.coerce) nounwind {
entry:
%retval = alloca <4 x float>, align 16 ; <<4 x float>*> [#uses=2]
%coerce = alloca <4 x float>, align 16 ; <<4 x float>*> [#uses=2]
%X.addr = alloca <4 x float>, align 16 ; <<4 x float>*> [#uses=3]
store <4 x float> %X.coerce, <4 x float>* %coerce
%X = load <4 x float>* %coerce ; <<4 x float>> [#uses=1]
store <4 x float> %X, <4 x float>* %X.addr
%tmp = load <4 x float>* %X.addr ; <<4 x float>> [#uses=1]
%tmp1 = load <4 x float>* %X.addr ; <<4 x float>> [#uses=1]
%add = fadd <4 x float> %tmp, %tmp1 ; <<4 x float>> [#uses=1]
store <4 x float> %add, <4 x float>* %retval
%0 = load <4 x float>* %retval ; <<4 x float>> [#uses=1]
ret <4 x float> %0
}
Now we get:
define <4 x float> @foo(<4 x float> %X) nounwind {
entry:
%X.addr = alloca <4 x float>, align 16 ; <<4 x float>*> [#uses=3]
store <4 x float> %X, <4 x float>* %X.addr
%tmp = load <4 x float>* %X.addr ; <<4 x float>> [#uses=1]
%tmp1 = load <4 x float>* %X.addr ; <<4 x float>> [#uses=1]
%add = fadd <4 x float> %tmp, %tmp1 ; <<4 x float>> [#uses=1]
ret <4 x float> %add
}
This implements rdar://8248065
llvm-svn: 109733
2010-07-29 14:26:06 +08:00
|
|
|
// If that was the only use of the return value, nuke it as well now.
|
|
|
|
if (ReturnValue->use_empty() && isa<llvm::AllocaInst>(ReturnValue)) {
|
|
|
|
cast<llvm::AllocaInst>(ReturnValue)->eraseFromParent();
|
2014-05-21 13:09:00 +08:00
|
|
|
ReturnValue = nullptr;
|
Kill off the 'coerce' ABI passing form. Now 'direct' and 'extend' always
have a "coerce to" type which often matches the default lowering of Clang
type to LLVM IR type, but the coerce case can be handled by making them
not be the same.
This simplifies things and fixes issues where X86-64 abi lowering would
return coerce after making preferred types exactly match up. This caused
us to compile:
typedef float v4f32 __attribute__((__vector_size__(16)));
v4f32 foo(v4f32 X) {
return X+X;
}
into this code at -O0:
define <4 x float> @foo(<4 x float> %X.coerce) nounwind {
entry:
%retval = alloca <4 x float>, align 16 ; <<4 x float>*> [#uses=2]
%coerce = alloca <4 x float>, align 16 ; <<4 x float>*> [#uses=2]
%X.addr = alloca <4 x float>, align 16 ; <<4 x float>*> [#uses=3]
store <4 x float> %X.coerce, <4 x float>* %coerce
%X = load <4 x float>* %coerce ; <<4 x float>> [#uses=1]
store <4 x float> %X, <4 x float>* %X.addr
%tmp = load <4 x float>* %X.addr ; <<4 x float>> [#uses=1]
%tmp1 = load <4 x float>* %X.addr ; <<4 x float>> [#uses=1]
%add = fadd <4 x float> %tmp, %tmp1 ; <<4 x float>> [#uses=1]
store <4 x float> %add, <4 x float>* %retval
%0 = load <4 x float>* %retval ; <<4 x float>> [#uses=1]
ret <4 x float> %0
}
Now we get:
define <4 x float> @foo(<4 x float> %X) nounwind {
entry:
%X.addr = alloca <4 x float>, align 16 ; <<4 x float>*> [#uses=3]
store <4 x float> %X, <4 x float>* %X.addr
%tmp = load <4 x float>* %X.addr ; <<4 x float>> [#uses=1]
%tmp1 = load <4 x float>* %X.addr ; <<4 x float>> [#uses=1]
%add = fadd <4 x float> %tmp, %tmp1 ; <<4 x float>> [#uses=1]
ret <4 x float> %add
}
This implements rdar://8248065
llvm-svn: 109733
2010-07-29 14:26:06 +08:00
|
|
|
}
|
2012-01-29 10:35:02 +08:00
|
|
|
|
|
|
|
// Otherwise, we have to do a simple load.
|
|
|
|
} else {
|
|
|
|
RV = Builder.CreateLoad(ReturnValue);
|
Change IR generation for return (in the simple case) to avoid doing silly
load/store nonsense in the epilog. For example, for:
int foo(int X) {
int A[100];
return A[X];
}
we used to generate:
%arrayidx = getelementptr inbounds [100 x i32]* %A, i32 0, i64 %idxprom ; <i32*> [#uses=1]
%tmp1 = load i32* %arrayidx ; <i32> [#uses=1]
store i32 %tmp1, i32* %retval
%0 = load i32* %retval ; <i32> [#uses=1]
ret i32 %0
}
which codegen'd to this code:
_foo: ## @foo
## BB#0: ## %entry
subq $408, %rsp ## imm = 0x198
movl %edi, 400(%rsp)
movl 400(%rsp), %edi
movslq %edi, %rax
movl (%rsp,%rax,4), %edi
movl %edi, 404(%rsp)
movl 404(%rsp), %eax
addq $408, %rsp ## imm = 0x198
ret
Now we generate:
%arrayidx = getelementptr inbounds [100 x i32]* %A, i32 0, i64 %idxprom ; <i32*> [#uses=1]
%tmp1 = load i32* %arrayidx ; <i32> [#uses=1]
ret i32 %tmp1
}
and:
_foo: ## @foo
## BB#0: ## %entry
subq $408, %rsp ## imm = 0x198
movl %edi, 404(%rsp)
movl 404(%rsp), %edi
movslq %edi, %rax
movl (%rsp,%rax,4), %eax
addq $408, %rsp ## imm = 0x198
ret
This actually does matter, cutting out 2000 lines of IR from CGStmt.ll
for example.
Another interesting effect is that altivec.h functions which are dead
now get dce'd by the inliner. Hence all the changes to
builtins-ppc-altivec.c to ensure the calls aren't dead.
llvm-svn: 106970
2010-06-27 09:06:27 +08:00
|
|
|
}
|
Kill off the 'coerce' ABI passing form. Now 'direct' and 'extend' always
have a "coerce to" type which often matches the default lowering of Clang
type to LLVM IR type, but the coerce case can be handled by making them
not be the same.
This simplifies things and fixes issues where X86-64 abi lowering would
return coerce after making preferred types exactly match up. This caused
us to compile:
typedef float v4f32 __attribute__((__vector_size__(16)));
v4f32 foo(v4f32 X) {
return X+X;
}
into this code at -O0:
define <4 x float> @foo(<4 x float> %X.coerce) nounwind {
entry:
%retval = alloca <4 x float>, align 16 ; <<4 x float>*> [#uses=2]
%coerce = alloca <4 x float>, align 16 ; <<4 x float>*> [#uses=2]
%X.addr = alloca <4 x float>, align 16 ; <<4 x float>*> [#uses=3]
store <4 x float> %X.coerce, <4 x float>* %coerce
%X = load <4 x float>* %coerce ; <<4 x float>> [#uses=1]
store <4 x float> %X, <4 x float>* %X.addr
%tmp = load <4 x float>* %X.addr ; <<4 x float>> [#uses=1]
%tmp1 = load <4 x float>* %X.addr ; <<4 x float>> [#uses=1]
%add = fadd <4 x float> %tmp, %tmp1 ; <<4 x float>> [#uses=1]
store <4 x float> %add, <4 x float>* %retval
%0 = load <4 x float>* %retval ; <<4 x float>> [#uses=1]
ret <4 x float> %0
}
Now we get:
define <4 x float> @foo(<4 x float> %X) nounwind {
entry:
%X.addr = alloca <4 x float>, align 16 ; <<4 x float>*> [#uses=3]
store <4 x float> %X, <4 x float>* %X.addr
%tmp = load <4 x float>* %X.addr ; <<4 x float>> [#uses=1]
%tmp1 = load <4 x float>* %X.addr ; <<4 x float>> [#uses=1]
%add = fadd <4 x float> %tmp, %tmp1 ; <<4 x float>> [#uses=1]
ret <4 x float> %add
}
This implements rdar://8248065
llvm-svn: 109733
2010-07-29 14:26:06 +08:00
|
|
|
} else {
|
2010-07-30 12:02:24 +08:00
|
|
|
llvm::Value *V = ReturnValue;
|
|
|
|
// If the value is offset in memory, apply the offset now.
|
|
|
|
if (unsigned Offs = RetAI.getDirectOffset()) {
|
|
|
|
V = Builder.CreateBitCast(V, Builder.getInt8PtrTy());
|
|
|
|
V = Builder.CreateConstGEP1_32(V, Offs);
|
2010-10-19 14:39:39 +08:00
|
|
|
V = Builder.CreateBitCast(V,
|
2010-07-30 12:02:24 +08:00
|
|
|
llvm::PointerType::getUnqual(RetAI.getCoerceToType()));
|
|
|
|
}
|
2010-10-19 14:39:39 +08:00
|
|
|
|
2010-07-30 12:02:24 +08:00
|
|
|
RV = CreateCoercedLoad(V, RetAI.getCoerceToType(), *this);
|
Change IR generation for return (in the simple case) to avoid doing silly
load/store nonsense in the epilog. For example, for:
int foo(int X) {
int A[100];
return A[X];
}
we used to generate:
%arrayidx = getelementptr inbounds [100 x i32]* %A, i32 0, i64 %idxprom ; <i32*> [#uses=1]
%tmp1 = load i32* %arrayidx ; <i32> [#uses=1]
store i32 %tmp1, i32* %retval
%0 = load i32* %retval ; <i32> [#uses=1]
ret i32 %0
}
which codegen'd to this code:
_foo: ## @foo
## BB#0: ## %entry
subq $408, %rsp ## imm = 0x198
movl %edi, 400(%rsp)
movl 400(%rsp), %edi
movslq %edi, %rax
movl (%rsp,%rax,4), %edi
movl %edi, 404(%rsp)
movl 404(%rsp), %eax
addq $408, %rsp ## imm = 0x198
ret
Now we generate:
%arrayidx = getelementptr inbounds [100 x i32]* %A, i32 0, i64 %idxprom ; <i32*> [#uses=1]
%tmp1 = load i32* %arrayidx ; <i32> [#uses=1]
ret i32 %tmp1
}
and:
_foo: ## @foo
## BB#0: ## %entry
subq $408, %rsp ## imm = 0x198
movl %edi, 404(%rsp)
movl 404(%rsp), %edi
movslq %edi, %rax
movl (%rsp,%rax,4), %eax
addq $408, %rsp ## imm = 0x198
ret
This actually does matter, cutting out 2000 lines of IR from CGStmt.ll
for example.
Another interesting effect is that altivec.h functions which are dead
now get dce'd by the inliner. Hence all the changes to
builtins-ppc-altivec.c to ensure the calls aren't dead.
llvm-svn: 106970
2010-06-27 09:06:27 +08:00
|
|
|
}
|
2011-06-16 07:02:42 +08:00
|
|
|
|
|
|
|
// In ARC, end functions that return a retainable type with a call
|
|
|
|
// to objc_autoreleaseReturnValue.
|
|
|
|
if (AutoreleaseResult) {
|
2012-03-11 15:00:24 +08:00
|
|
|
assert(getLangOpts().ObjCAutoRefCount &&
|
2011-06-16 07:02:42 +08:00
|
|
|
!FI.isReturnsRetained() &&
|
|
|
|
RetTy->isObjCRetainableType());
|
|
|
|
RV = emitAutoreleaseOfResult(*this, RV);
|
|
|
|
}
|
|
|
|
|
2010-06-27 07:13:19 +08:00
|
|
|
break;
|
2009-09-09 23:08:12 +08:00
|
|
|
|
Kill off the 'coerce' ABI passing form. Now 'direct' and 'extend' always
have a "coerce to" type which often matches the default lowering of Clang
type to LLVM IR type, but the coerce case can be handled by making them
not be the same.
This simplifies things and fixes issues where X86-64 abi lowering would
return coerce after making preferred types exactly match up. This caused
us to compile:
typedef float v4f32 __attribute__((__vector_size__(16)));
v4f32 foo(v4f32 X) {
return X+X;
}
into this code at -O0:
define <4 x float> @foo(<4 x float> %X.coerce) nounwind {
entry:
%retval = alloca <4 x float>, align 16 ; <<4 x float>*> [#uses=2]
%coerce = alloca <4 x float>, align 16 ; <<4 x float>*> [#uses=2]
%X.addr = alloca <4 x float>, align 16 ; <<4 x float>*> [#uses=3]
store <4 x float> %X.coerce, <4 x float>* %coerce
%X = load <4 x float>* %coerce ; <<4 x float>> [#uses=1]
store <4 x float> %X, <4 x float>* %X.addr
%tmp = load <4 x float>* %X.addr ; <<4 x float>> [#uses=1]
%tmp1 = load <4 x float>* %X.addr ; <<4 x float>> [#uses=1]
%add = fadd <4 x float> %tmp, %tmp1 ; <<4 x float>> [#uses=1]
store <4 x float> %add, <4 x float>* %retval
%0 = load <4 x float>* %retval ; <<4 x float>> [#uses=1]
ret <4 x float> %0
}
Now we get:
define <4 x float> @foo(<4 x float> %X) nounwind {
entry:
%X.addr = alloca <4 x float>, align 16 ; <<4 x float>*> [#uses=3]
store <4 x float> %X, <4 x float>* %X.addr
%tmp = load <4 x float>* %X.addr ; <<4 x float>> [#uses=1]
%tmp1 = load <4 x float>* %X.addr ; <<4 x float>> [#uses=1]
%add = fadd <4 x float> %tmp, %tmp1 ; <<4 x float>> [#uses=1]
ret <4 x float> %add
}
This implements rdar://8248065
llvm-svn: 109733
2010-07-29 14:26:06 +08:00
|
|
|
case ABIArgInfo::Ignore:
|
2010-06-27 07:13:19 +08:00
|
|
|
break;
|
2008-09-11 09:48:57 +08:00
|
|
|
|
2010-06-27 07:13:19 +08:00
|
|
|
case ABIArgInfo::Expand:
|
2011-09-23 13:06:16 +08:00
|
|
|
llvm_unreachable("Invalid ABI kind for return argument");
|
2008-09-10 07:27:19 +08:00
|
|
|
}
|
2009-09-09 23:08:12 +08:00
|
|
|
|
2014-08-13 08:26:40 +08:00
|
|
|
llvm::Instruction *Ret;
|
|
|
|
if (RV) {
|
2014-11-08 06:29:38 +08:00
|
|
|
if (SanOpts.has(SanitizerKind::ReturnsNonnullAttribute)) {
|
2014-09-09 04:17:19 +08:00
|
|
|
if (auto RetNNAttr = CurGD.getDecl()->getAttr<ReturnsNonNullAttr>()) {
|
|
|
|
SanitizerScope SanScope(this);
|
|
|
|
llvm::Value *Cond = Builder.CreateICmpNE(
|
|
|
|
RV, llvm::Constant::getNullValue(RV->getType()));
|
|
|
|
llvm::Constant *StaticData[] = {
|
|
|
|
EmitCheckSourceLocation(EndLoc),
|
|
|
|
EmitCheckSourceLocation(RetNNAttr->getLocation()),
|
|
|
|
};
|
2014-11-12 06:03:54 +08:00
|
|
|
EmitCheck(std::make_pair(Cond, SanitizerKind::ReturnsNonnullAttribute),
|
|
|
|
"nonnull_return", StaticData, None);
|
2014-09-09 04:17:19 +08:00
|
|
|
}
|
2014-08-13 08:26:40 +08:00
|
|
|
}
|
|
|
|
Ret = Builder.CreateRet(RV);
|
|
|
|
} else {
|
|
|
|
Ret = Builder.CreateRetVoid();
|
|
|
|
}
|
|
|
|
|
2010-07-22 02:08:50 +08:00
|
|
|
if (!RetDbgLoc.isUnknown())
|
2015-02-07 21:15:54 +08:00
|
|
|
Ret->setDebugLoc(std::move(RetDbgLoc));
|
2008-09-10 07:27:19 +08:00
|
|
|
}
|
|
|
|
|
2014-02-01 08:04:45 +08:00
|
|
|
static bool isInAllocaArgument(CGCXXABI &ABI, QualType type) {
|
|
|
|
const CXXRecordDecl *RD = type->getAsCXXRecordDecl();
|
|
|
|
return RD && ABI.getRecordArgABI(RD) == CGCXXABI::RAA_DirectInMemory;
|
|
|
|
}
|
|
|
|
|
|
|
|
static AggValueSlot createPlaceholderSlot(CodeGenFunction &CGF, QualType Ty) {
|
|
|
|
// FIXME: Generate IR in one pass, rather than going back and fixing up these
|
|
|
|
// placeholders.
|
|
|
|
llvm::Type *IRTy = CGF.ConvertTypeForMem(Ty);
|
|
|
|
llvm::Value *Placeholder =
|
|
|
|
llvm::UndefValue::get(IRTy->getPointerTo()->getPointerTo());
|
|
|
|
Placeholder = CGF.Builder.CreateLoad(Placeholder);
|
|
|
|
return AggValueSlot::forAddr(Placeholder, CharUnits::Zero(),
|
|
|
|
Ty.getQualifiers(),
|
|
|
|
AggValueSlot::IsNotDestructed,
|
|
|
|
AggValueSlot::DoesNotNeedGCBarriers,
|
|
|
|
AggValueSlot::IsNotAliased);
|
|
|
|
}
|
|
|
|
|
2011-03-12 04:59:21 +08:00
|
|
|
void CodeGenFunction::EmitDelegateCallArg(CallArgList &args,
|
2013-10-02 10:29:49 +08:00
|
|
|
const VarDecl *param,
|
|
|
|
SourceLocation loc) {
|
2010-05-27 06:34:26 +08:00
|
|
|
// StartFunction converted the ABI-lowered parameter(s) into a
|
|
|
|
// local alloca. We need to turn that into an r-value suitable
|
|
|
|
// for EmitCall.
|
2011-03-12 04:59:21 +08:00
|
|
|
llvm::Value *local = GetAddrOfLocalVar(param);
|
2010-05-27 06:34:26 +08:00
|
|
|
|
2011-03-12 04:59:21 +08:00
|
|
|
QualType type = param->getType();
|
2010-10-19 14:39:39 +08:00
|
|
|
|
2010-05-27 06:34:26 +08:00
|
|
|
// For the most part, we just need to load the alloca, except:
|
|
|
|
// 1) aggregate r-values are actually pointers to temporaries, and
|
2013-03-08 05:37:08 +08:00
|
|
|
// 2) references to non-scalars are pointers directly to the aggregate.
|
|
|
|
// I don't know why references to scalars are different here.
|
2011-03-12 04:59:21 +08:00
|
|
|
if (const ReferenceType *ref = type->getAs<ReferenceType>()) {
|
2013-03-08 05:37:08 +08:00
|
|
|
if (!hasScalarEvaluationKind(ref->getPointeeType()))
|
2011-03-12 04:59:21 +08:00
|
|
|
return args.add(RValue::getAggregate(local), type);
|
2010-05-27 06:34:26 +08:00
|
|
|
|
|
|
|
// Locals which are references to scalars are represented
|
|
|
|
// with allocas holding the pointer.
|
2011-03-12 04:59:21 +08:00
|
|
|
return args.add(RValue::get(Builder.CreateLoad(local)), type);
|
2010-05-27 06:34:26 +08:00
|
|
|
}
|
|
|
|
|
2014-07-26 09:34:32 +08:00
|
|
|
assert(!isInAllocaArgument(CGM.getCXXABI(), type) &&
|
|
|
|
"cannot emit delegate call arguments for inalloca arguments!");
|
2014-02-01 08:04:45 +08:00
|
|
|
|
2013-10-02 10:29:49 +08:00
|
|
|
args.add(convertTempToRValue(local, type, loc), type);
|
2010-05-27 06:34:26 +08:00
|
|
|
}
|
|
|
|
|
2011-06-16 07:02:42 +08:00
|
|
|
static bool isProvablyNull(llvm::Value *addr) {
|
|
|
|
return isa<llvm::ConstantPointerNull>(addr);
|
|
|
|
}
|
|
|
|
|
|
|
|
static bool isProvablyNonNull(llvm::Value *addr) {
|
|
|
|
return isa<llvm::AllocaInst>(addr);
|
|
|
|
}
|
|
|
|
|
|
|
|
/// Emit the actual writing-back of a writeback.
|
|
|
|
static void emitWriteback(CodeGenFunction &CGF,
|
|
|
|
const CallArgList::Writeback &writeback) {
|
2013-03-23 10:35:54 +08:00
|
|
|
const LValue &srcLV = writeback.Source;
|
|
|
|
llvm::Value *srcAddr = srcLV.getAddress();
|
2011-06-16 07:02:42 +08:00
|
|
|
assert(!isProvablyNull(srcAddr) &&
|
|
|
|
"shouldn't have writeback for provably null argument");
|
|
|
|
|
2014-05-21 13:09:00 +08:00
|
|
|
llvm::BasicBlock *contBB = nullptr;
|
2011-06-16 07:02:42 +08:00
|
|
|
|
|
|
|
// If the argument wasn't provably non-null, we need to null check
|
|
|
|
// before doing the store.
|
|
|
|
bool provablyNonNull = isProvablyNonNull(srcAddr);
|
|
|
|
if (!provablyNonNull) {
|
|
|
|
llvm::BasicBlock *writebackBB = CGF.createBasicBlock("icr.writeback");
|
|
|
|
contBB = CGF.createBasicBlock("icr.done");
|
|
|
|
|
|
|
|
llvm::Value *isNull = CGF.Builder.CreateIsNull(srcAddr, "icr.isnull");
|
|
|
|
CGF.Builder.CreateCondBr(isNull, contBB, writebackBB);
|
|
|
|
CGF.EmitBlock(writebackBB);
|
|
|
|
}
|
|
|
|
|
|
|
|
// Load the value to writeback.
|
|
|
|
llvm::Value *value = CGF.Builder.CreateLoad(writeback.Temporary);
|
|
|
|
|
|
|
|
// Cast it back, in case we're writing an id to a Foo* or something.
|
|
|
|
value = CGF.Builder.CreateBitCast(value,
|
|
|
|
cast<llvm::PointerType>(srcAddr->getType())->getElementType(),
|
|
|
|
"icr.writeback-cast");
|
|
|
|
|
|
|
|
// Perform the writeback.
|
2013-03-23 10:35:54 +08:00
|
|
|
|
|
|
|
// If we have a "to use" value, it's something we need to emit a use
|
|
|
|
// of. This has to be carefully threaded in: if it's done after the
|
|
|
|
// release it's potentially undefined behavior (and the optimizer
|
|
|
|
// will ignore it), and if it happens before the retain then the
|
|
|
|
// optimizer could move the release there.
|
|
|
|
if (writeback.ToUse) {
|
|
|
|
assert(srcLV.getObjCLifetime() == Qualifiers::OCL_Strong);
|
|
|
|
|
|
|
|
// Retain the new value. No need to block-copy here: the block's
|
|
|
|
// being passed up the stack.
|
|
|
|
value = CGF.EmitARCRetainNonBlock(value);
|
|
|
|
|
|
|
|
// Emit the intrinsic use here.
|
|
|
|
CGF.EmitARCIntrinsicUse(writeback.ToUse);
|
|
|
|
|
|
|
|
// Load the old value (primitively).
|
2013-10-02 10:29:49 +08:00
|
|
|
llvm::Value *oldValue = CGF.EmitLoadOfScalar(srcLV, SourceLocation());
|
2013-03-23 10:35:54 +08:00
|
|
|
|
|
|
|
// Put the new value in place (primitively).
|
|
|
|
CGF.EmitStoreOfScalar(value, srcLV, /*init*/ false);
|
|
|
|
|
|
|
|
// Release the old value.
|
|
|
|
CGF.EmitARCRelease(oldValue, srcLV.isARCPreciseLifetime());
|
|
|
|
|
|
|
|
// Otherwise, we can just do a normal lvalue store.
|
|
|
|
} else {
|
|
|
|
CGF.EmitStoreThroughLValue(RValue::get(value), srcLV);
|
|
|
|
}
|
2011-06-16 07:02:42 +08:00
|
|
|
|
|
|
|
// Jump to the continuation block.
|
|
|
|
if (!provablyNonNull)
|
|
|
|
CGF.EmitBlock(contBB);
|
|
|
|
}
|
|
|
|
|
|
|
|
static void emitWritebacks(CodeGenFunction &CGF,
|
|
|
|
const CallArgList &args) {
|
2014-03-18 01:22:27 +08:00
|
|
|
for (const auto &I : args.writebacks())
|
|
|
|
emitWriteback(CGF, I);
|
2011-06-16 07:02:42 +08:00
|
|
|
}
|
|
|
|
|
2013-06-21 20:45:15 +08:00
|
|
|
static void deactivateArgCleanupsBeforeCall(CodeGenFunction &CGF,
|
|
|
|
const CallArgList &CallArgs) {
|
2013-12-05 03:23:12 +08:00
|
|
|
assert(CGF.getTarget().getCXXABI().areArgsDestroyedLeftToRightInCallee());
|
2013-06-21 20:45:15 +08:00
|
|
|
ArrayRef<CallArgList::CallArgCleanup> Cleanups =
|
|
|
|
CallArgs.getCleanupsToDeactivate();
|
|
|
|
// Iterate in reverse to increase the likelihood of popping the cleanup.
|
|
|
|
for (ArrayRef<CallArgList::CallArgCleanup>::reverse_iterator
|
|
|
|
I = Cleanups.rbegin(), E = Cleanups.rend(); I != E; ++I) {
|
|
|
|
CGF.DeactivateCleanupBlock(I->Cleanup, I->IsActiveIP);
|
|
|
|
I->IsActiveIP->eraseFromParent();
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2013-03-23 10:35:54 +08:00
|
|
|
static const Expr *maybeGetUnaryAddrOfOperand(const Expr *E) {
|
|
|
|
if (const UnaryOperator *uop = dyn_cast<UnaryOperator>(E->IgnoreParens()))
|
|
|
|
if (uop->getOpcode() == UO_AddrOf)
|
|
|
|
return uop->getSubExpr();
|
2014-05-21 13:09:00 +08:00
|
|
|
return nullptr;
|
2013-03-23 10:35:54 +08:00
|
|
|
}
|
|
|
|
|
2011-06-16 07:02:42 +08:00
|
|
|
/// Emit an argument that's being passed call-by-writeback. That is,
|
|
|
|
/// we are passing the address of
|
|
|
|
static void emitWritebackArg(CodeGenFunction &CGF, CallArgList &args,
|
|
|
|
const ObjCIndirectCopyRestoreExpr *CRE) {
|
2013-03-23 10:35:54 +08:00
|
|
|
LValue srcLV;
|
|
|
|
|
|
|
|
// Make an optimistic effort to emit the address as an l-value.
|
|
|
|
// This can fail if the the argument expression is more complicated.
|
|
|
|
if (const Expr *lvExpr = maybeGetUnaryAddrOfOperand(CRE->getSubExpr())) {
|
|
|
|
srcLV = CGF.EmitLValue(lvExpr);
|
|
|
|
|
|
|
|
// Otherwise, just emit it as a scalar.
|
|
|
|
} else {
|
|
|
|
llvm::Value *srcAddr = CGF.EmitScalarExpr(CRE->getSubExpr());
|
|
|
|
|
|
|
|
QualType srcAddrType =
|
|
|
|
CRE->getSubExpr()->getType()->castAs<PointerType>()->getPointeeType();
|
|
|
|
srcLV = CGF.MakeNaturalAlignAddrLValue(srcAddr, srcAddrType);
|
|
|
|
}
|
|
|
|
llvm::Value *srcAddr = srcLV.getAddress();
|
2011-06-16 07:02:42 +08:00
|
|
|
|
|
|
|
// The dest and src types don't necessarily match in LLVM terms
|
|
|
|
// because of the crazy ObjC compatibility rules.
|
|
|
|
|
2011-07-18 12:24:23 +08:00
|
|
|
llvm::PointerType *destType =
|
2011-06-16 07:02:42 +08:00
|
|
|
cast<llvm::PointerType>(CGF.ConvertType(CRE->getType()));
|
|
|
|
|
|
|
|
// If the address is a constant null, just pass the appropriate null.
|
|
|
|
if (isProvablyNull(srcAddr)) {
|
|
|
|
args.add(RValue::get(llvm::ConstantPointerNull::get(destType)),
|
|
|
|
CRE->getType());
|
|
|
|
return;
|
|
|
|
}
|
|
|
|
|
|
|
|
// Create the temporary.
|
|
|
|
llvm::Value *temp = CGF.CreateTempAlloca(destType->getElementType(),
|
|
|
|
"icr.temp");
|
2012-11-28 07:02:53 +08:00
|
|
|
// Loading an l-value can introduce a cleanup if the l-value is __weak,
|
|
|
|
// and that cleanup will be conditional if we can't prove that the l-value
|
|
|
|
// isn't null, so we need to register a dominating point so that the cleanups
|
|
|
|
// system will make valid IR.
|
|
|
|
CodeGenFunction::ConditionalEvaluation condEval(CGF);
|
|
|
|
|
2011-06-16 07:02:42 +08:00
|
|
|
// Zero-initialize it if we're not doing a copy-initialization.
|
|
|
|
bool shouldCopy = CRE->shouldCopy();
|
|
|
|
if (!shouldCopy) {
|
|
|
|
llvm::Value *null =
|
|
|
|
llvm::ConstantPointerNull::get(
|
|
|
|
cast<llvm::PointerType>(destType->getElementType()));
|
|
|
|
CGF.Builder.CreateStore(null, temp);
|
|
|
|
}
|
2014-05-21 13:09:00 +08:00
|
|
|
|
|
|
|
llvm::BasicBlock *contBB = nullptr;
|
|
|
|
llvm::BasicBlock *originBB = nullptr;
|
2011-06-16 07:02:42 +08:00
|
|
|
|
|
|
|
// If the address is *not* known to be non-null, we need to switch.
|
|
|
|
llvm::Value *finalArgument;
|
|
|
|
|
|
|
|
bool provablyNonNull = isProvablyNonNull(srcAddr);
|
|
|
|
if (provablyNonNull) {
|
|
|
|
finalArgument = temp;
|
|
|
|
} else {
|
|
|
|
llvm::Value *isNull = CGF.Builder.CreateIsNull(srcAddr, "icr.isnull");
|
|
|
|
|
|
|
|
finalArgument = CGF.Builder.CreateSelect(isNull,
|
|
|
|
llvm::ConstantPointerNull::get(destType),
|
|
|
|
temp, "icr.argument");
|
|
|
|
|
|
|
|
// If we need to copy, then the load has to be conditional, which
|
|
|
|
// means we need control flow.
|
|
|
|
if (shouldCopy) {
|
2013-03-23 10:35:54 +08:00
|
|
|
originBB = CGF.Builder.GetInsertBlock();
|
2011-06-16 07:02:42 +08:00
|
|
|
contBB = CGF.createBasicBlock("icr.cont");
|
|
|
|
llvm::BasicBlock *copyBB = CGF.createBasicBlock("icr.copy");
|
|
|
|
CGF.Builder.CreateCondBr(isNull, contBB, copyBB);
|
|
|
|
CGF.EmitBlock(copyBB);
|
2012-11-28 07:02:53 +08:00
|
|
|
condEval.begin(CGF);
|
2011-06-16 07:02:42 +08:00
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2014-05-21 13:09:00 +08:00
|
|
|
llvm::Value *valueToUse = nullptr;
|
2013-03-23 10:35:54 +08:00
|
|
|
|
2011-06-16 07:02:42 +08:00
|
|
|
// Perform a copy if necessary.
|
|
|
|
if (shouldCopy) {
|
2013-10-02 10:29:49 +08:00
|
|
|
RValue srcRV = CGF.EmitLoadOfLValue(srcLV, SourceLocation());
|
2011-06-16 07:02:42 +08:00
|
|
|
assert(srcRV.isScalar());
|
|
|
|
|
|
|
|
llvm::Value *src = srcRV.getScalarVal();
|
|
|
|
src = CGF.Builder.CreateBitCast(src, destType->getElementType(),
|
|
|
|
"icr.cast");
|
|
|
|
|
|
|
|
// Use an ordinary store, not a store-to-lvalue.
|
|
|
|
CGF.Builder.CreateStore(src, temp);
|
2013-03-23 10:35:54 +08:00
|
|
|
|
|
|
|
// If optimization is enabled, and the value was held in a
|
|
|
|
// __strong variable, we need to tell the optimizer that this
|
|
|
|
// value has to stay alive until we're doing the store back.
|
|
|
|
// This is because the temporary is effectively unretained,
|
|
|
|
// and so otherwise we can violate the high-level semantics.
|
|
|
|
if (CGF.CGM.getCodeGenOpts().OptimizationLevel != 0 &&
|
|
|
|
srcLV.getObjCLifetime() == Qualifiers::OCL_Strong) {
|
|
|
|
valueToUse = src;
|
|
|
|
}
|
2011-06-16 07:02:42 +08:00
|
|
|
}
|
2012-11-28 07:02:53 +08:00
|
|
|
|
2011-06-16 07:02:42 +08:00
|
|
|
// Finish the control flow if we needed it.
|
2012-11-28 07:02:53 +08:00
|
|
|
if (shouldCopy && !provablyNonNull) {
|
2013-03-23 10:35:54 +08:00
|
|
|
llvm::BasicBlock *copyBB = CGF.Builder.GetInsertBlock();
|
2011-06-16 07:02:42 +08:00
|
|
|
CGF.EmitBlock(contBB);
|
2013-03-23 10:35:54 +08:00
|
|
|
|
|
|
|
// Make a phi for the value to intrinsically use.
|
|
|
|
if (valueToUse) {
|
|
|
|
llvm::PHINode *phiToUse = CGF.Builder.CreatePHI(valueToUse->getType(), 2,
|
|
|
|
"icr.to-use");
|
|
|
|
phiToUse->addIncoming(valueToUse, copyBB);
|
|
|
|
phiToUse->addIncoming(llvm::UndefValue::get(valueToUse->getType()),
|
|
|
|
originBB);
|
|
|
|
valueToUse = phiToUse;
|
|
|
|
}
|
|
|
|
|
2012-11-28 07:02:53 +08:00
|
|
|
condEval.end(CGF);
|
|
|
|
}
|
2011-06-16 07:02:42 +08:00
|
|
|
|
2013-03-23 10:35:54 +08:00
|
|
|
args.addWriteback(srcLV, temp, valueToUse);
|
2011-06-16 07:02:42 +08:00
|
|
|
args.add(RValue::get(finalArgument), CRE->getType());
|
|
|
|
}
|
|
|
|
|
2014-02-01 08:04:45 +08:00
|
|
|
void CallArgList::allocateArgumentMemory(CodeGenFunction &CGF) {
|
|
|
|
assert(!StackBase && !StackCleanup.isValid());
|
|
|
|
|
|
|
|
// Save the stack.
|
|
|
|
llvm::Function *F = CGF.CGM.getIntrinsic(llvm::Intrinsic::stacksave);
|
|
|
|
StackBase = CGF.Builder.CreateCall(F, "inalloca.save");
|
|
|
|
|
|
|
|
// Control gets really tied up in landing pads, so we have to spill the
|
|
|
|
// stacksave to an alloca to avoid violating SSA form.
|
|
|
|
// TODO: This is dead if we never emit the cleanup. We should create the
|
|
|
|
// alloca and store lazily on the first cleanup emission.
|
|
|
|
StackBaseMem = CGF.CreateTempAlloca(CGF.Int8PtrTy, "inalloca.spmem");
|
|
|
|
CGF.Builder.CreateStore(StackBase, StackBaseMem);
|
|
|
|
CGF.pushStackRestore(EHCleanup, StackBaseMem);
|
|
|
|
StackCleanup = CGF.EHStack.getInnermostEHScope();
|
|
|
|
assert(StackCleanup.isValid());
|
|
|
|
}
|
|
|
|
|
|
|
|
void CallArgList::freeArgumentMemory(CodeGenFunction &CGF) const {
|
|
|
|
if (StackBase) {
|
|
|
|
CGF.DeactivateCleanupBlock(StackCleanup, StackBase);
|
|
|
|
llvm::Value *F = CGF.CGM.getIntrinsic(llvm::Intrinsic::stackrestore);
|
|
|
|
// We could load StackBase from StackBaseMem, but in the non-exceptional
|
|
|
|
// case we can skip it.
|
|
|
|
CGF.Builder.CreateCall(F, StackBase);
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2014-09-09 01:22:45 +08:00
|
|
|
static void emitNonNullArgCheck(CodeGenFunction &CGF, RValue RV,
|
|
|
|
QualType ArgType, SourceLocation ArgLoc,
|
|
|
|
const FunctionDecl *FD, unsigned ParmNum) {
|
2014-11-08 06:29:38 +08:00
|
|
|
if (!CGF.SanOpts.has(SanitizerKind::NonnullAttribute) || !FD)
|
2014-09-09 01:22:45 +08:00
|
|
|
return;
|
|
|
|
auto PVD = ParmNum < FD->getNumParams() ? FD->getParamDecl(ParmNum) : nullptr;
|
|
|
|
unsigned ArgNo = PVD ? PVD->getFunctionScopeIndex() : ParmNum;
|
|
|
|
auto NNAttr = getNonNullAttr(FD, PVD, ArgType, ArgNo);
|
|
|
|
if (!NNAttr)
|
|
|
|
return;
|
|
|
|
CodeGenFunction::SanitizerScope SanScope(&CGF);
|
|
|
|
assert(RV.isScalar());
|
|
|
|
llvm::Value *V = RV.getScalarVal();
|
|
|
|
llvm::Value *Cond =
|
|
|
|
CGF.Builder.CreateICmpNE(V, llvm::Constant::getNullValue(V->getType()));
|
|
|
|
llvm::Constant *StaticData[] = {
|
|
|
|
CGF.EmitCheckSourceLocation(ArgLoc),
|
|
|
|
CGF.EmitCheckSourceLocation(NNAttr->getLocation()),
|
|
|
|
llvm::ConstantInt::get(CGF.Int32Ty, ArgNo + 1),
|
|
|
|
};
|
2014-11-12 06:03:54 +08:00
|
|
|
CGF.EmitCheck(std::make_pair(Cond, SanitizerKind::NonnullAttribute),
|
|
|
|
"nonnull_arg", StaticData, None);
|
2014-09-09 01:22:45 +08:00
|
|
|
}
|
|
|
|
|
2013-12-05 03:23:12 +08:00
|
|
|
void CodeGenFunction::EmitCallArgs(CallArgList &Args,
|
|
|
|
ArrayRef<QualType> ArgTypes,
|
|
|
|
CallExpr::const_arg_iterator ArgBeg,
|
|
|
|
CallExpr::const_arg_iterator ArgEnd,
|
2014-09-09 01:22:45 +08:00
|
|
|
const FunctionDecl *CalleeDecl,
|
2015-01-22 07:08:17 +08:00
|
|
|
unsigned ParamsToSkip) {
|
2013-12-05 03:23:12 +08:00
|
|
|
// We *have* to evaluate arguments from right to left in the MS C++ ABI,
|
|
|
|
// because arguments are destroyed left to right in the callee.
|
|
|
|
if (CGM.getTarget().getCXXABI().areArgsDestroyedLeftToRightInCallee()) {
|
2014-02-01 08:04:45 +08:00
|
|
|
// Insert a stack save if we're going to need any inalloca args.
|
|
|
|
bool HasInAllocaArgs = false;
|
|
|
|
for (ArrayRef<QualType>::iterator I = ArgTypes.begin(), E = ArgTypes.end();
|
|
|
|
I != E && !HasInAllocaArgs; ++I)
|
|
|
|
HasInAllocaArgs = isInAllocaArgument(CGM.getCXXABI(), *I);
|
|
|
|
if (HasInAllocaArgs) {
|
|
|
|
assert(getTarget().getTriple().getArch() == llvm::Triple::x86);
|
|
|
|
Args.allocateArgumentMemory(*this);
|
|
|
|
}
|
|
|
|
|
|
|
|
// Evaluate each argument.
|
2013-12-05 03:23:12 +08:00
|
|
|
size_t CallArgsStart = Args.size();
|
|
|
|
for (int I = ArgTypes.size() - 1; I >= 0; --I) {
|
|
|
|
CallExpr::const_arg_iterator Arg = ArgBeg + I;
|
|
|
|
EmitCallArg(Args, *Arg, ArgTypes[I]);
|
2014-09-09 01:22:45 +08:00
|
|
|
emitNonNullArgCheck(*this, Args.back().RV, ArgTypes[I], Arg->getExprLoc(),
|
|
|
|
CalleeDecl, ParamsToSkip + I);
|
2013-12-05 03:23:12 +08:00
|
|
|
}
|
|
|
|
|
|
|
|
// Un-reverse the arguments we just evaluated so they match up with the LLVM
|
|
|
|
// IR function.
|
|
|
|
std::reverse(Args.begin() + CallArgsStart, Args.end());
|
|
|
|
return;
|
|
|
|
}
|
|
|
|
|
|
|
|
for (unsigned I = 0, E = ArgTypes.size(); I != E; ++I) {
|
|
|
|
CallExpr::const_arg_iterator Arg = ArgBeg + I;
|
|
|
|
assert(Arg != ArgEnd);
|
|
|
|
EmitCallArg(Args, *Arg, ArgTypes[I]);
|
2014-09-09 01:22:45 +08:00
|
|
|
emitNonNullArgCheck(*this, Args.back().RV, ArgTypes[I], Arg->getExprLoc(),
|
|
|
|
CalleeDecl, ParamsToSkip + I);
|
2013-12-05 03:23:12 +08:00
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2014-02-01 08:04:45 +08:00
|
|
|
namespace {
|
|
|
|
|
|
|
|
struct DestroyUnpassedArg : EHScopeStack::Cleanup {
|
|
|
|
DestroyUnpassedArg(llvm::Value *Addr, QualType Ty)
|
|
|
|
: Addr(Addr), Ty(Ty) {}
|
|
|
|
|
|
|
|
llvm::Value *Addr;
|
|
|
|
QualType Ty;
|
|
|
|
|
2014-03-12 14:41:41 +08:00
|
|
|
void Emit(CodeGenFunction &CGF, Flags flags) override {
|
2014-02-01 08:04:45 +08:00
|
|
|
const CXXDestructorDecl *Dtor = Ty->getAsCXXRecordDecl()->getDestructor();
|
|
|
|
assert(!Dtor->isTrivial());
|
|
|
|
CGF.EmitCXXDestructorCall(Dtor, Dtor_Complete, /*for vbase*/ false,
|
|
|
|
/*Delegating=*/false, Addr);
|
|
|
|
}
|
|
|
|
};
|
|
|
|
|
|
|
|
}
|
|
|
|
|
2015-02-10 03:13:51 +08:00
|
|
|
struct DisableDebugLocationUpdates {
|
|
|
|
CodeGenFunction &CGF;
|
|
|
|
bool disabledDebugInfo;
|
|
|
|
DisableDebugLocationUpdates(CodeGenFunction &CGF, const Expr *E) : CGF(CGF) {
|
|
|
|
if ((disabledDebugInfo = isa<CXXDefaultArgExpr>(E) && CGF.getDebugInfo()))
|
|
|
|
CGF.disableDebugInfo();
|
|
|
|
}
|
|
|
|
~DisableDebugLocationUpdates() {
|
|
|
|
if (disabledDebugInfo)
|
|
|
|
CGF.enableDebugInfo();
|
|
|
|
}
|
|
|
|
};
|
|
|
|
|
2011-03-12 04:59:21 +08:00
|
|
|
void CodeGenFunction::EmitCallArg(CallArgList &args, const Expr *E,
|
|
|
|
QualType type) {
|
2015-02-10 03:13:51 +08:00
|
|
|
DisableDebugLocationUpdates Dis(*this, E);
|
2011-06-16 07:02:42 +08:00
|
|
|
if (const ObjCIndirectCopyRestoreExpr *CRE
|
|
|
|
= dyn_cast<ObjCIndirectCopyRestoreExpr>(E)) {
|
2012-11-02 06:30:59 +08:00
|
|
|
assert(getLangOpts().ObjCAutoRefCount);
|
2011-06-16 07:02:42 +08:00
|
|
|
assert(getContext().hasSameType(E->getType(), type));
|
|
|
|
return emitWritebackArg(*this, args, CRE);
|
|
|
|
}
|
|
|
|
|
2011-08-27 02:42:59 +08:00
|
|
|
assert(type->isReferenceType() == E->isGLValue() &&
|
|
|
|
"reference binding to unmaterialized r-value!");
|
|
|
|
|
2011-08-27 05:08:13 +08:00
|
|
|
if (E->isGLValue()) {
|
|
|
|
assert(E->getObjectKind() == OK_Ordinary);
|
2013-06-13 07:38:09 +08:00
|
|
|
return args.add(EmitReferenceBindingToExpr(E), type);
|
2011-08-27 05:08:13 +08:00
|
|
|
}
|
2009-09-09 23:08:12 +08:00
|
|
|
|
2013-06-21 20:45:15 +08:00
|
|
|
bool HasAggregateEvalKind = hasAggregateEvaluationKind(type);
|
|
|
|
|
|
|
|
// In the Microsoft C++ ABI, aggregate arguments are destructed by the callee.
|
|
|
|
// However, we still have to push an EH-only cleanup in case we unwind before
|
|
|
|
// we make it to the call.
|
2014-05-01 11:07:18 +08:00
|
|
|
if (HasAggregateEvalKind &&
|
|
|
|
CGM.getTarget().getCXXABI().areArgsDestroyedLeftToRightInCallee()) {
|
|
|
|
// If we're using inalloca, use the argument memory. Otherwise, use a
|
2014-05-03 08:33:28 +08:00
|
|
|
// temporary.
|
2014-05-01 11:07:18 +08:00
|
|
|
AggValueSlot Slot;
|
|
|
|
if (args.isUsingInAlloca())
|
|
|
|
Slot = createPlaceholderSlot(*this, type);
|
|
|
|
else
|
|
|
|
Slot = CreateAggTemp(type, "agg.tmp");
|
2014-05-03 08:33:28 +08:00
|
|
|
|
|
|
|
const CXXRecordDecl *RD = type->getAsCXXRecordDecl();
|
|
|
|
bool DestroyedInCallee =
|
|
|
|
RD && RD->hasNonTrivialDestructor() &&
|
|
|
|
CGM.getCXXABI().getRecordArgABI(RD) != CGCXXABI::RAA_Default;
|
|
|
|
if (DestroyedInCallee)
|
|
|
|
Slot.setExternallyDestructed();
|
|
|
|
|
2014-02-01 08:04:45 +08:00
|
|
|
EmitAggExpr(E, Slot);
|
|
|
|
RValue RV = Slot.asRValue();
|
|
|
|
args.add(RV, type);
|
|
|
|
|
2014-05-03 08:33:28 +08:00
|
|
|
if (DestroyedInCallee) {
|
2014-02-01 08:04:45 +08:00
|
|
|
// Create a no-op GEP between the placeholder and the cleanup so we can
|
|
|
|
// RAUW it successfully. It also serves as a marker of the first
|
|
|
|
// instruction where the cleanup is active.
|
|
|
|
pushFullExprCleanup<DestroyUnpassedArg>(EHCleanup, Slot.getAddr(), type);
|
2013-06-21 20:45:15 +08:00
|
|
|
// This unreachable is a temporary marker which will be removed later.
|
|
|
|
llvm::Instruction *IsActive = Builder.CreateUnreachable();
|
|
|
|
args.addArgCleanupDeactivation(EHStack.getInnermostEHScope(), IsActive);
|
|
|
|
}
|
2014-02-01 08:04:45 +08:00
|
|
|
return;
|
2013-06-21 20:45:15 +08:00
|
|
|
}
|
|
|
|
|
|
|
|
if (HasAggregateEvalKind && isa<ImplicitCastExpr>(E) &&
|
2011-05-26 08:10:27 +08:00
|
|
|
cast<CastExpr>(E)->getCastKind() == CK_LValueToRValue) {
|
|
|
|
LValue L = EmitLValue(cast<CastExpr>(E)->getSubExpr());
|
|
|
|
assert(L.isSimple());
|
2013-06-11 09:08:22 +08:00
|
|
|
if (L.getAlignment() >= getContext().getTypeAlignInChars(type)) {
|
|
|
|
args.add(L.asAggregateRValue(), type, /*NeedsCopy*/true);
|
|
|
|
} else {
|
|
|
|
// We can't represent a misaligned lvalue in the CallArgList, so copy
|
|
|
|
// to an aligned temporary now.
|
|
|
|
llvm::Value *tmp = CreateMemTemp(type);
|
|
|
|
EmitAggregateCopy(tmp, L.getAddress(), type, L.isVolatile(),
|
|
|
|
L.getAlignment());
|
|
|
|
args.add(RValue::getAggregate(tmp), type);
|
|
|
|
}
|
2011-05-26 08:10:27 +08:00
|
|
|
return;
|
|
|
|
}
|
|
|
|
|
2011-03-12 04:59:21 +08:00
|
|
|
args.add(EmitAnyExprToTemp(E), type);
|
2009-04-09 04:47:54 +08:00
|
|
|
}
|
|
|
|
|
2014-10-10 08:05:45 +08:00
|
|
|
QualType CodeGenFunction::getVarArgType(const Expr *Arg) {
|
|
|
|
// System headers on Windows define NULL to 0 instead of 0LL on Win64. MSVC
|
|
|
|
// implicitly widens null pointer constants that are arguments to varargs
|
|
|
|
// functions to pointer-sized ints.
|
|
|
|
if (!getTarget().getTriple().isOSWindows())
|
|
|
|
return Arg->getType();
|
|
|
|
|
|
|
|
if (Arg->getType()->isIntegerType() &&
|
|
|
|
getContext().getTypeSize(Arg->getType()) <
|
|
|
|
getContext().getTargetInfo().getPointerWidth(0) &&
|
|
|
|
Arg->isNullPointerConstant(getContext(),
|
|
|
|
Expr::NPC_ValueDependentIsNotNull)) {
|
|
|
|
return getContext().getIntPtrType();
|
|
|
|
}
|
|
|
|
|
|
|
|
return Arg->getType();
|
|
|
|
}
|
|
|
|
|
2012-02-16 08:57:37 +08:00
|
|
|
// In ObjC ARC mode with no ObjC ARC exception safety, tell the ARC
|
|
|
|
// optimizer it can aggressively ignore unwind edges.
|
|
|
|
void
|
|
|
|
CodeGenFunction::AddObjCARCExceptionMetadata(llvm::Instruction *Inst) {
|
|
|
|
if (CGM.getCodeGenOpts().OptimizationLevel != 0 &&
|
|
|
|
!CGM.getCodeGenOpts().ObjCAutoRefCountExceptions)
|
|
|
|
Inst->setMetadata("clang.arc.no_objc_arc_exceptions",
|
|
|
|
CGM.getNoObjCARCExceptionsMetadata());
|
|
|
|
}
|
|
|
|
|
2013-03-01 03:01:20 +08:00
|
|
|
/// Emits a call to the given no-arguments nounwind runtime function.
|
|
|
|
llvm::CallInst *
|
|
|
|
CodeGenFunction::EmitNounwindRuntimeCall(llvm::Value *callee,
|
|
|
|
const llvm::Twine &name) {
|
2014-08-27 14:28:36 +08:00
|
|
|
return EmitNounwindRuntimeCall(callee, None, name);
|
2013-03-01 03:01:20 +08:00
|
|
|
}
|
|
|
|
|
|
|
|
/// Emits a call to the given nounwind runtime function.
|
|
|
|
llvm::CallInst *
|
|
|
|
CodeGenFunction::EmitNounwindRuntimeCall(llvm::Value *callee,
|
|
|
|
ArrayRef<llvm::Value*> args,
|
|
|
|
const llvm::Twine &name) {
|
|
|
|
llvm::CallInst *call = EmitRuntimeCall(callee, args, name);
|
|
|
|
call->setDoesNotThrow();
|
|
|
|
return call;
|
|
|
|
}
|
|
|
|
|
|
|
|
/// Emits a simple call (never an invoke) to the given no-arguments
|
|
|
|
/// runtime function.
|
|
|
|
llvm::CallInst *
|
|
|
|
CodeGenFunction::EmitRuntimeCall(llvm::Value *callee,
|
|
|
|
const llvm::Twine &name) {
|
2014-08-27 14:28:36 +08:00
|
|
|
return EmitRuntimeCall(callee, None, name);
|
2013-03-01 03:01:20 +08:00
|
|
|
}
|
|
|
|
|
|
|
|
/// Emits a simple call (never an invoke) to the given runtime
|
|
|
|
/// function.
|
|
|
|
llvm::CallInst *
|
|
|
|
CodeGenFunction::EmitRuntimeCall(llvm::Value *callee,
|
|
|
|
ArrayRef<llvm::Value*> args,
|
|
|
|
const llvm::Twine &name) {
|
|
|
|
llvm::CallInst *call = Builder.CreateCall(callee, args, name);
|
|
|
|
call->setCallingConv(getRuntimeCC());
|
|
|
|
return call;
|
|
|
|
}
|
|
|
|
|
|
|
|
/// Emits a call or invoke to the given noreturn runtime function.
|
|
|
|
void CodeGenFunction::EmitNoreturnRuntimeCallOrInvoke(llvm::Value *callee,
|
|
|
|
ArrayRef<llvm::Value*> args) {
|
|
|
|
if (getInvokeDest()) {
|
|
|
|
llvm::InvokeInst *invoke =
|
|
|
|
Builder.CreateInvoke(callee,
|
|
|
|
getUnreachableBlock(),
|
|
|
|
getInvokeDest(),
|
|
|
|
args);
|
|
|
|
invoke->setDoesNotReturn();
|
|
|
|
invoke->setCallingConv(getRuntimeCC());
|
|
|
|
} else {
|
|
|
|
llvm::CallInst *call = Builder.CreateCall(callee, args);
|
|
|
|
call->setDoesNotReturn();
|
|
|
|
call->setCallingConv(getRuntimeCC());
|
|
|
|
Builder.CreateUnreachable();
|
|
|
|
}
|
2014-01-14 05:24:18 +08:00
|
|
|
PGO.setCurrentRegionUnreachable();
|
2013-03-01 03:01:20 +08:00
|
|
|
}
|
|
|
|
|
|
|
|
/// Emits a call or invoke instruction to the given nullary runtime
|
|
|
|
/// function.
|
|
|
|
llvm::CallSite
|
|
|
|
CodeGenFunction::EmitRuntimeCallOrInvoke(llvm::Value *callee,
|
|
|
|
const Twine &name) {
|
2014-08-27 14:28:36 +08:00
|
|
|
return EmitRuntimeCallOrInvoke(callee, None, name);
|
2013-03-01 03:01:20 +08:00
|
|
|
}
|
|
|
|
|
|
|
|
/// Emits a call or invoke instruction to the given runtime function.
|
|
|
|
llvm::CallSite
|
|
|
|
CodeGenFunction::EmitRuntimeCallOrInvoke(llvm::Value *callee,
|
|
|
|
ArrayRef<llvm::Value*> args,
|
|
|
|
const Twine &name) {
|
|
|
|
llvm::CallSite callSite = EmitCallOrInvoke(callee, args, name);
|
|
|
|
callSite.setCallingConv(getRuntimeCC());
|
|
|
|
return callSite;
|
|
|
|
}
|
|
|
|
|
|
|
|
llvm::CallSite
|
|
|
|
CodeGenFunction::EmitCallOrInvoke(llvm::Value *Callee,
|
|
|
|
const Twine &Name) {
|
2014-08-27 14:28:36 +08:00
|
|
|
return EmitCallOrInvoke(Callee, None, Name);
|
2013-03-01 03:01:20 +08:00
|
|
|
}
|
|
|
|
|
2010-07-06 09:34:17 +08:00
|
|
|
/// Emits a call or invoke instruction to the given function, depending
|
|
|
|
/// on the current state of the EH stack.
|
|
|
|
llvm::CallSite
|
|
|
|
CodeGenFunction::EmitCallOrInvoke(llvm::Value *Callee,
|
2011-07-24 01:14:25 +08:00
|
|
|
ArrayRef<llvm::Value *> Args,
|
2011-07-23 18:55:15 +08:00
|
|
|
const Twine &Name) {
|
2010-07-06 09:34:17 +08:00
|
|
|
llvm::BasicBlock *InvokeDest = getInvokeDest();
|
2012-02-16 08:57:37 +08:00
|
|
|
|
|
|
|
llvm::Instruction *Inst;
|
2010-07-06 09:34:17 +08:00
|
|
|
if (!InvokeDest)
|
2012-02-16 08:57:37 +08:00
|
|
|
Inst = Builder.CreateCall(Callee, Args, Name);
|
|
|
|
else {
|
|
|
|
llvm::BasicBlock *ContBB = createBasicBlock("invoke.cont");
|
|
|
|
Inst = Builder.CreateInvoke(Callee, ContBB, InvokeDest, Args, Name);
|
|
|
|
EmitBlock(ContBB);
|
|
|
|
}
|
|
|
|
|
|
|
|
// In ObjC ARC mode with no ObjC ARC exception safety, tell the ARC
|
|
|
|
// optimizer it can aggressively ignore unwind edges.
|
2012-03-11 15:00:24 +08:00
|
|
|
if (CGM.getLangOpts().ObjCAutoRefCount)
|
2012-02-16 08:57:37 +08:00
|
|
|
AddObjCARCExceptionMetadata(Inst);
|
2010-07-06 09:34:17 +08:00
|
|
|
|
2012-02-16 08:57:37 +08:00
|
|
|
return Inst;
|
2010-07-06 09:34:17 +08:00
|
|
|
}
|
|
|
|
|
2014-02-01 08:04:45 +08:00
|
|
|
/// \brief Store a non-aggregate value to an address to initialize it. For
|
|
|
|
/// initialization, a non-atomic store will be used.
|
|
|
|
static void EmitInitStoreOfNonAggregate(CodeGenFunction &CGF, RValue Src,
|
|
|
|
LValue Dst) {
|
|
|
|
if (Src.isScalar())
|
|
|
|
CGF.EmitStoreOfScalar(Src.getScalarVal(), Dst, /*init=*/true);
|
|
|
|
else
|
|
|
|
CGF.EmitStoreOfComplex(Src.getComplexVal(), Dst, /*init=*/true);
|
|
|
|
}
|
|
|
|
|
|
|
|
void CodeGenFunction::deferPlaceholderReplacement(llvm::Instruction *Old,
|
|
|
|
llvm::Value *New) {
|
|
|
|
DeferredReplacements.push_back(std::make_pair(Old, New));
|
|
|
|
}
|
2011-07-12 14:29:11 +08:00
|
|
|
|
2009-02-03 06:03:45 +08:00
|
|
|
RValue CodeGenFunction::EmitCall(const CGFunctionInfo &CallInfo,
|
2009-09-09 23:08:12 +08:00
|
|
|
llvm::Value *Callee,
|
2009-12-25 03:25:24 +08:00
|
|
|
ReturnValueSlot ReturnValue,
|
2009-02-21 02:06:48 +08:00
|
|
|
const CallArgList &CallArgs,
|
2010-05-01 19:15:56 +08:00
|
|
|
const Decl *TargetDecl,
|
2010-05-02 21:41:58 +08:00
|
|
|
llvm::Instruction **callOrInvoke) {
|
2009-05-16 15:57:57 +08:00
|
|
|
// FIXME: We no longer need the types from CallArgs; lift up and simplify.
|
2008-09-10 07:27:19 +08:00
|
|
|
|
|
|
|
// Handle struct-return functions by passing a pointer to the
|
|
|
|
// location that we would like to return into.
|
2009-02-03 05:43:58 +08:00
|
|
|
QualType RetTy = CallInfo.getReturnType();
|
2009-02-03 13:59:18 +08:00
|
|
|
const ABIArgInfo &RetAI = CallInfo.getReturnInfo();
|
2009-09-09 23:08:12 +08:00
|
|
|
|
2011-07-12 12:46:18 +08:00
|
|
|
llvm::FunctionType *IRFuncTy =
|
|
|
|
cast<llvm::FunctionType>(
|
|
|
|
cast<llvm::PointerType>(Callee->getType())->getElementType());
|
2009-09-09 23:08:12 +08:00
|
|
|
|
2014-02-01 08:04:45 +08:00
|
|
|
// If we're using inalloca, insert the allocation after the stack save.
|
|
|
|
// FIXME: Do this earlier rather than hacking it in here!
|
2014-05-21 13:09:00 +08:00
|
|
|
llvm::Value *ArgMemory = nullptr;
|
2014-02-01 08:04:45 +08:00
|
|
|
if (llvm::StructType *ArgStruct = CallInfo.getArgStruct()) {
|
2014-04-10 09:40:15 +08:00
|
|
|
llvm::Instruction *IP = CallArgs.getStackBase();
|
|
|
|
llvm::AllocaInst *AI;
|
|
|
|
if (IP) {
|
|
|
|
IP = IP->getNextNode();
|
|
|
|
AI = new llvm::AllocaInst(ArgStruct, "argmem", IP);
|
|
|
|
} else {
|
2014-05-16 07:01:46 +08:00
|
|
|
AI = CreateTempAlloca(ArgStruct, "argmem");
|
2014-04-10 09:40:15 +08:00
|
|
|
}
|
2014-02-01 08:04:45 +08:00
|
|
|
AI->setUsedWithInAlloca(true);
|
|
|
|
assert(AI->isUsedWithInAlloca() && !AI->isStaticAlloca());
|
|
|
|
ArgMemory = AI;
|
|
|
|
}
|
|
|
|
|
2014-09-30 06:08:00 +08:00
|
|
|
ClangToLLVMArgMapping IRFunctionArgs(CGM.getContext(), CallInfo);
|
CGCall: Factor out the logic mapping call arguments to LLVM IR arguments.
Summary:
This refactoring introduces ClangToLLVMArgMapping class, which
encapsulates the information about the order in which function arguments listed
in CGFunctionInfo should be passed to actual LLVM IR function, such as:
1) positions of sret, if there is any
2) position of inalloca argument, if there is any
3) position of helper padding argument for each call argument
4) positions of regular argument (there can be many if it's expanded).
Simplify several related methods (ConstructAttributeList, EmitFunctionProlog
and EmitCall): now they don't have to maintain iterators over the list
of LLVM IR function arguments, dealing with all the sret/inalloca/this complexities,
and just use expected positions of LLVM IR arguments stored in ClangToLLVMArgMapping.
This may increase the running time of EmitFunctionProlog, as we have to traverse
expandable arguments twice, but in further refactoring we will be able
to speed up EmitCall by passing already calculated CallArgsToIRArgsMapping to
ConstructAttributeList, thus avoiding traversing expandable argument there.
No functionality change.
Test Plan: regression test suite
Reviewers: majnemer, rnk
Reviewed By: rnk
Subscribers: cfe-commits, rjmccall, timurrrr
Differential Revision: http://reviews.llvm.org/D4938
llvm-svn: 216251
2014-08-22 09:06:06 +08:00
|
|
|
SmallVector<llvm::Value *, 16> IRCallArgs(IRFunctionArgs.totalIRArgs());
|
|
|
|
|
2009-06-13 08:26:38 +08:00
|
|
|
// If the call returns a temporary with struct return, create a temporary
|
2009-12-25 04:40:36 +08:00
|
|
|
// alloca to hold the result, unless one is given to us.
|
2014-05-21 13:09:00 +08:00
|
|
|
llvm::Value *SRetPtr = nullptr;
|
2014-05-10 06:46:15 +08:00
|
|
|
if (RetAI.isIndirect() || RetAI.isInAlloca()) {
|
2014-02-01 08:04:45 +08:00
|
|
|
SRetPtr = ReturnValue.getValue();
|
|
|
|
if (!SRetPtr)
|
|
|
|
SRetPtr = CreateMemTemp(RetTy);
|
CGCall: Factor out the logic mapping call arguments to LLVM IR arguments.
Summary:
This refactoring introduces ClangToLLVMArgMapping class, which
encapsulates the information about the order in which function arguments listed
in CGFunctionInfo should be passed to actual LLVM IR function, such as:
1) positions of sret, if there is any
2) position of inalloca argument, if there is any
3) position of helper padding argument for each call argument
4) positions of regular argument (there can be many if it's expanded).
Simplify several related methods (ConstructAttributeList, EmitFunctionProlog
and EmitCall): now they don't have to maintain iterators over the list
of LLVM IR function arguments, dealing with all the sret/inalloca/this complexities,
and just use expected positions of LLVM IR arguments stored in ClangToLLVMArgMapping.
This may increase the running time of EmitFunctionProlog, as we have to traverse
expandable arguments twice, but in further refactoring we will be able
to speed up EmitCall by passing already calculated CallArgsToIRArgsMapping to
ConstructAttributeList, thus avoiding traversing expandable argument there.
No functionality change.
Test Plan: regression test suite
Reviewers: majnemer, rnk
Reviewed By: rnk
Subscribers: cfe-commits, rjmccall, timurrrr
Differential Revision: http://reviews.llvm.org/D4938
llvm-svn: 216251
2014-08-22 09:06:06 +08:00
|
|
|
if (IRFunctionArgs.hasSRetArg()) {
|
|
|
|
IRCallArgs[IRFunctionArgs.getSRetArgNo()] = SRetPtr;
|
2014-02-01 08:04:45 +08:00
|
|
|
} else {
|
|
|
|
llvm::Value *Addr =
|
|
|
|
Builder.CreateStructGEP(ArgMemory, RetAI.getInAllocaFieldIndex());
|
|
|
|
Builder.CreateStore(SRetPtr, Addr);
|
|
|
|
}
|
2009-12-25 04:40:36 +08:00
|
|
|
}
|
2009-09-09 23:08:12 +08:00
|
|
|
|
2009-02-05 05:17:21 +08:00
|
|
|
assert(CallInfo.arg_size() == CallArgs.size() &&
|
|
|
|
"Mismatch between function signature & arguments.");
|
CGCall: Factor out the logic mapping call arguments to LLVM IR arguments.
Summary:
This refactoring introduces ClangToLLVMArgMapping class, which
encapsulates the information about the order in which function arguments listed
in CGFunctionInfo should be passed to actual LLVM IR function, such as:
1) positions of sret, if there is any
2) position of inalloca argument, if there is any
3) position of helper padding argument for each call argument
4) positions of regular argument (there can be many if it's expanded).
Simplify several related methods (ConstructAttributeList, EmitFunctionProlog
and EmitCall): now they don't have to maintain iterators over the list
of LLVM IR function arguments, dealing with all the sret/inalloca/this complexities,
and just use expected positions of LLVM IR arguments stored in ClangToLLVMArgMapping.
This may increase the running time of EmitFunctionProlog, as we have to traverse
expandable arguments twice, but in further refactoring we will be able
to speed up EmitCall by passing already calculated CallArgsToIRArgsMapping to
ConstructAttributeList, thus avoiding traversing expandable argument there.
No functionality change.
Test Plan: regression test suite
Reviewers: majnemer, rnk
Reviewed By: rnk
Subscribers: cfe-commits, rjmccall, timurrrr
Differential Revision: http://reviews.llvm.org/D4938
llvm-svn: 216251
2014-08-22 09:06:06 +08:00
|
|
|
unsigned ArgNo = 0;
|
2009-02-03 13:59:18 +08:00
|
|
|
CGFunctionInfo::const_arg_iterator info_it = CallInfo.arg_begin();
|
2009-09-09 23:08:12 +08:00
|
|
|
for (CallArgList::const_iterator I = CallArgs.begin(), E = CallArgs.end();
|
CGCall: Factor out the logic mapping call arguments to LLVM IR arguments.
Summary:
This refactoring introduces ClangToLLVMArgMapping class, which
encapsulates the information about the order in which function arguments listed
in CGFunctionInfo should be passed to actual LLVM IR function, such as:
1) positions of sret, if there is any
2) position of inalloca argument, if there is any
3) position of helper padding argument for each call argument
4) positions of regular argument (there can be many if it's expanded).
Simplify several related methods (ConstructAttributeList, EmitFunctionProlog
and EmitCall): now they don't have to maintain iterators over the list
of LLVM IR function arguments, dealing with all the sret/inalloca/this complexities,
and just use expected positions of LLVM IR arguments stored in ClangToLLVMArgMapping.
This may increase the running time of EmitFunctionProlog, as we have to traverse
expandable arguments twice, but in further refactoring we will be able
to speed up EmitCall by passing already calculated CallArgsToIRArgsMapping to
ConstructAttributeList, thus avoiding traversing expandable argument there.
No functionality change.
Test Plan: regression test suite
Reviewers: majnemer, rnk
Reviewed By: rnk
Subscribers: cfe-commits, rjmccall, timurrrr
Differential Revision: http://reviews.llvm.org/D4938
llvm-svn: 216251
2014-08-22 09:06:06 +08:00
|
|
|
I != E; ++I, ++info_it, ++ArgNo) {
|
2009-02-03 13:59:18 +08:00
|
|
|
const ABIArgInfo &ArgInfo = info_it->info;
|
2011-05-03 02:05:27 +08:00
|
|
|
RValue RV = I->RV;
|
2008-09-17 08:51:38 +08:00
|
|
|
|
2013-03-08 05:37:08 +08:00
|
|
|
CharUnits TypeAlign = getContext().getTypeAlignInChars(I->Ty);
|
2012-10-24 09:59:00 +08:00
|
|
|
|
|
|
|
// Insert a padding argument to ensure proper alignment.
|
CGCall: Factor out the logic mapping call arguments to LLVM IR arguments.
Summary:
This refactoring introduces ClangToLLVMArgMapping class, which
encapsulates the information about the order in which function arguments listed
in CGFunctionInfo should be passed to actual LLVM IR function, such as:
1) positions of sret, if there is any
2) position of inalloca argument, if there is any
3) position of helper padding argument for each call argument
4) positions of regular argument (there can be many if it's expanded).
Simplify several related methods (ConstructAttributeList, EmitFunctionProlog
and EmitCall): now they don't have to maintain iterators over the list
of LLVM IR function arguments, dealing with all the sret/inalloca/this complexities,
and just use expected positions of LLVM IR arguments stored in ClangToLLVMArgMapping.
This may increase the running time of EmitFunctionProlog, as we have to traverse
expandable arguments twice, but in further refactoring we will be able
to speed up EmitCall by passing already calculated CallArgsToIRArgsMapping to
ConstructAttributeList, thus avoiding traversing expandable argument there.
No functionality change.
Test Plan: regression test suite
Reviewers: majnemer, rnk
Reviewed By: rnk
Subscribers: cfe-commits, rjmccall, timurrrr
Differential Revision: http://reviews.llvm.org/D4938
llvm-svn: 216251
2014-08-22 09:06:06 +08:00
|
|
|
if (IRFunctionArgs.hasPaddingArg(ArgNo))
|
|
|
|
IRCallArgs[IRFunctionArgs.getPaddingArgNo(ArgNo)] =
|
|
|
|
llvm::UndefValue::get(ArgInfo.getPaddingType());
|
|
|
|
|
|
|
|
unsigned FirstIRArg, NumIRArgs;
|
|
|
|
std::tie(FirstIRArg, NumIRArgs) = IRFunctionArgs.getIRArgs(ArgNo);
|
2012-10-24 09:59:00 +08:00
|
|
|
|
2008-09-17 08:51:38 +08:00
|
|
|
switch (ArgInfo.getKind()) {
|
2014-02-01 08:04:45 +08:00
|
|
|
case ABIArgInfo::InAlloca: {
|
CGCall: Factor out the logic mapping call arguments to LLVM IR arguments.
Summary:
This refactoring introduces ClangToLLVMArgMapping class, which
encapsulates the information about the order in which function arguments listed
in CGFunctionInfo should be passed to actual LLVM IR function, such as:
1) positions of sret, if there is any
2) position of inalloca argument, if there is any
3) position of helper padding argument for each call argument
4) positions of regular argument (there can be many if it's expanded).
Simplify several related methods (ConstructAttributeList, EmitFunctionProlog
and EmitCall): now they don't have to maintain iterators over the list
of LLVM IR function arguments, dealing with all the sret/inalloca/this complexities,
and just use expected positions of LLVM IR arguments stored in ClangToLLVMArgMapping.
This may increase the running time of EmitFunctionProlog, as we have to traverse
expandable arguments twice, but in further refactoring we will be able
to speed up EmitCall by passing already calculated CallArgsToIRArgsMapping to
ConstructAttributeList, thus avoiding traversing expandable argument there.
No functionality change.
Test Plan: regression test suite
Reviewers: majnemer, rnk
Reviewed By: rnk
Subscribers: cfe-commits, rjmccall, timurrrr
Differential Revision: http://reviews.llvm.org/D4938
llvm-svn: 216251
2014-08-22 09:06:06 +08:00
|
|
|
assert(NumIRArgs == 0);
|
2014-02-01 08:04:45 +08:00
|
|
|
assert(getTarget().getTriple().getArch() == llvm::Triple::x86);
|
|
|
|
if (RV.isAggregate()) {
|
|
|
|
// Replace the placeholder with the appropriate argument slot GEP.
|
|
|
|
llvm::Instruction *Placeholder =
|
|
|
|
cast<llvm::Instruction>(RV.getAggregateAddr());
|
|
|
|
CGBuilderTy::InsertPoint IP = Builder.saveIP();
|
|
|
|
Builder.SetInsertPoint(Placeholder);
|
|
|
|
llvm::Value *Addr = Builder.CreateStructGEP(
|
|
|
|
ArgMemory, ArgInfo.getInAllocaFieldIndex());
|
|
|
|
Builder.restoreIP(IP);
|
|
|
|
deferPlaceholderReplacement(Placeholder, Addr);
|
|
|
|
} else {
|
|
|
|
// Store the RValue into the argument struct.
|
|
|
|
llvm::Value *Addr =
|
|
|
|
Builder.CreateStructGEP(ArgMemory, ArgInfo.getInAllocaFieldIndex());
|
2014-04-01 00:12:47 +08:00
|
|
|
unsigned AS = Addr->getType()->getPointerAddressSpace();
|
|
|
|
llvm::Type *MemType = ConvertTypeForMem(I->Ty)->getPointerTo(AS);
|
|
|
|
// There are some cases where a trivial bitcast is not avoidable. The
|
|
|
|
// definition of a type later in a translation unit may change it's type
|
|
|
|
// from {}* to (%struct.foo*)*.
|
|
|
|
if (Addr->getType() != MemType)
|
|
|
|
Addr = Builder.CreateBitCast(Addr, MemType);
|
2014-02-01 08:04:45 +08:00
|
|
|
LValue argLV = MakeAddrLValue(Addr, I->Ty, TypeAlign);
|
|
|
|
EmitInitStoreOfNonAggregate(*this, RV, argLV);
|
|
|
|
}
|
CGCall: Factor out the logic mapping call arguments to LLVM IR arguments.
Summary:
This refactoring introduces ClangToLLVMArgMapping class, which
encapsulates the information about the order in which function arguments listed
in CGFunctionInfo should be passed to actual LLVM IR function, such as:
1) positions of sret, if there is any
2) position of inalloca argument, if there is any
3) position of helper padding argument for each call argument
4) positions of regular argument (there can be many if it's expanded).
Simplify several related methods (ConstructAttributeList, EmitFunctionProlog
and EmitCall): now they don't have to maintain iterators over the list
of LLVM IR function arguments, dealing with all the sret/inalloca/this complexities,
and just use expected positions of LLVM IR arguments stored in ClangToLLVMArgMapping.
This may increase the running time of EmitFunctionProlog, as we have to traverse
expandable arguments twice, but in further refactoring we will be able
to speed up EmitCall by passing already calculated CallArgsToIRArgsMapping to
ConstructAttributeList, thus avoiding traversing expandable argument there.
No functionality change.
Test Plan: regression test suite
Reviewers: majnemer, rnk
Reviewed By: rnk
Subscribers: cfe-commits, rjmccall, timurrrr
Differential Revision: http://reviews.llvm.org/D4938
llvm-svn: 216251
2014-08-22 09:06:06 +08:00
|
|
|
break;
|
2014-02-01 08:04:45 +08:00
|
|
|
}
|
|
|
|
|
2010-08-21 10:24:36 +08:00
|
|
|
case ABIArgInfo::Indirect: {
|
CGCall: Factor out the logic mapping call arguments to LLVM IR arguments.
Summary:
This refactoring introduces ClangToLLVMArgMapping class, which
encapsulates the information about the order in which function arguments listed
in CGFunctionInfo should be passed to actual LLVM IR function, such as:
1) positions of sret, if there is any
2) position of inalloca argument, if there is any
3) position of helper padding argument for each call argument
4) positions of regular argument (there can be many if it's expanded).
Simplify several related methods (ConstructAttributeList, EmitFunctionProlog
and EmitCall): now they don't have to maintain iterators over the list
of LLVM IR function arguments, dealing with all the sret/inalloca/this complexities,
and just use expected positions of LLVM IR arguments stored in ClangToLLVMArgMapping.
This may increase the running time of EmitFunctionProlog, as we have to traverse
expandable arguments twice, but in further refactoring we will be able
to speed up EmitCall by passing already calculated CallArgsToIRArgsMapping to
ConstructAttributeList, thus avoiding traversing expandable argument there.
No functionality change.
Test Plan: regression test suite
Reviewers: majnemer, rnk
Reviewed By: rnk
Subscribers: cfe-commits, rjmccall, timurrrr
Differential Revision: http://reviews.llvm.org/D4938
llvm-svn: 216251
2014-08-22 09:06:06 +08:00
|
|
|
assert(NumIRArgs == 1);
|
2009-02-05 17:16:39 +08:00
|
|
|
if (RV.isScalar() || RV.isComplex()) {
|
|
|
|
// Make a temporary alloca to pass the argument.
|
2011-06-16 02:26:32 +08:00
|
|
|
llvm::AllocaInst *AI = CreateMemTemp(I->Ty);
|
|
|
|
if (ArgInfo.getIndirectAlign() > AI->getAlignment())
|
|
|
|
AI->setAlignment(ArgInfo.getIndirectAlign());
|
CGCall: Factor out the logic mapping call arguments to LLVM IR arguments.
Summary:
This refactoring introduces ClangToLLVMArgMapping class, which
encapsulates the information about the order in which function arguments listed
in CGFunctionInfo should be passed to actual LLVM IR function, such as:
1) positions of sret, if there is any
2) position of inalloca argument, if there is any
3) position of helper padding argument for each call argument
4) positions of regular argument (there can be many if it's expanded).
Simplify several related methods (ConstructAttributeList, EmitFunctionProlog
and EmitCall): now they don't have to maintain iterators over the list
of LLVM IR function arguments, dealing with all the sret/inalloca/this complexities,
and just use expected positions of LLVM IR arguments stored in ClangToLLVMArgMapping.
This may increase the running time of EmitFunctionProlog, as we have to traverse
expandable arguments twice, but in further refactoring we will be able
to speed up EmitCall by passing already calculated CallArgsToIRArgsMapping to
ConstructAttributeList, thus avoiding traversing expandable argument there.
No functionality change.
Test Plan: regression test suite
Reviewers: majnemer, rnk
Reviewed By: rnk
Subscribers: cfe-commits, rjmccall, timurrrr
Differential Revision: http://reviews.llvm.org/D4938
llvm-svn: 216251
2014-08-22 09:06:06 +08:00
|
|
|
IRCallArgs[FirstIRArg] = AI;
|
2013-03-08 05:37:08 +08:00
|
|
|
|
CGCall: Factor out the logic mapping call arguments to LLVM IR arguments.
Summary:
This refactoring introduces ClangToLLVMArgMapping class, which
encapsulates the information about the order in which function arguments listed
in CGFunctionInfo should be passed to actual LLVM IR function, such as:
1) positions of sret, if there is any
2) position of inalloca argument, if there is any
3) position of helper padding argument for each call argument
4) positions of regular argument (there can be many if it's expanded).
Simplify several related methods (ConstructAttributeList, EmitFunctionProlog
and EmitCall): now they don't have to maintain iterators over the list
of LLVM IR function arguments, dealing with all the sret/inalloca/this complexities,
and just use expected positions of LLVM IR arguments stored in ClangToLLVMArgMapping.
This may increase the running time of EmitFunctionProlog, as we have to traverse
expandable arguments twice, but in further refactoring we will be able
to speed up EmitCall by passing already calculated CallArgsToIRArgsMapping to
ConstructAttributeList, thus avoiding traversing expandable argument there.
No functionality change.
Test Plan: regression test suite
Reviewers: majnemer, rnk
Reviewed By: rnk
Subscribers: cfe-commits, rjmccall, timurrrr
Differential Revision: http://reviews.llvm.org/D4938
llvm-svn: 216251
2014-08-22 09:06:06 +08:00
|
|
|
LValue argLV = MakeAddrLValue(AI, I->Ty, TypeAlign);
|
2014-02-01 08:04:45 +08:00
|
|
|
EmitInitStoreOfNonAggregate(*this, RV, argLV);
|
2009-02-05 17:16:39 +08:00
|
|
|
} else {
|
2011-06-14 09:37:52 +08:00
|
|
|
// We want to avoid creating an unnecessary temporary+copy here;
|
2013-03-10 20:59:00 +08:00
|
|
|
// however, we need one in three cases:
|
2011-06-14 09:37:52 +08:00
|
|
|
// 1. If the argument is not byval, and we are required to copy the
|
|
|
|
// source. (This case doesn't occur on any common architecture.)
|
|
|
|
// 2. If the argument is byval, RV is not sufficiently aligned, and
|
|
|
|
// we cannot force it to be sufficiently aligned.
|
2013-03-10 20:59:00 +08:00
|
|
|
// 3. If the argument is byval, but RV is located in an address space
|
|
|
|
// different than that of the argument (0).
|
2011-06-16 06:09:18 +08:00
|
|
|
llvm::Value *Addr = RV.getAggregateAddr();
|
|
|
|
unsigned Align = ArgInfo.getIndirectAlign();
|
2012-10-09 00:25:52 +08:00
|
|
|
const llvm::DataLayout *TD = &CGM.getDataLayout();
|
2013-03-10 20:59:00 +08:00
|
|
|
const unsigned RVAddrSpace = Addr->getType()->getPointerAddressSpace();
|
CGCall: Factor out the logic mapping call arguments to LLVM IR arguments.
Summary:
This refactoring introduces ClangToLLVMArgMapping class, which
encapsulates the information about the order in which function arguments listed
in CGFunctionInfo should be passed to actual LLVM IR function, such as:
1) positions of sret, if there is any
2) position of inalloca argument, if there is any
3) position of helper padding argument for each call argument
4) positions of regular argument (there can be many if it's expanded).
Simplify several related methods (ConstructAttributeList, EmitFunctionProlog
and EmitCall): now they don't have to maintain iterators over the list
of LLVM IR function arguments, dealing with all the sret/inalloca/this complexities,
and just use expected positions of LLVM IR arguments stored in ClangToLLVMArgMapping.
This may increase the running time of EmitFunctionProlog, as we have to traverse
expandable arguments twice, but in further refactoring we will be able
to speed up EmitCall by passing already calculated CallArgsToIRArgsMapping to
ConstructAttributeList, thus avoiding traversing expandable argument there.
No functionality change.
Test Plan: regression test suite
Reviewers: majnemer, rnk
Reviewed By: rnk
Subscribers: cfe-commits, rjmccall, timurrrr
Differential Revision: http://reviews.llvm.org/D4938
llvm-svn: 216251
2014-08-22 09:06:06 +08:00
|
|
|
const unsigned ArgAddrSpace =
|
|
|
|
(FirstIRArg < IRFuncTy->getNumParams()
|
|
|
|
? IRFuncTy->getParamType(FirstIRArg)->getPointerAddressSpace()
|
|
|
|
: 0);
|
2011-06-16 06:09:18 +08:00
|
|
|
if ((!ArgInfo.getIndirectByVal() && I->NeedsCopy) ||
|
2013-03-08 05:37:08 +08:00
|
|
|
(ArgInfo.getIndirectByVal() && TypeAlign.getQuantity() < Align &&
|
2013-03-10 20:59:00 +08:00
|
|
|
llvm::getOrEnforceKnownAlignment(Addr, Align, TD) < Align) ||
|
|
|
|
(ArgInfo.getIndirectByVal() && (RVAddrSpace != ArgAddrSpace))) {
|
2011-06-14 09:37:52 +08:00
|
|
|
// Create an aligned temporary, and copy to it.
|
2011-06-16 06:09:18 +08:00
|
|
|
llvm::AllocaInst *AI = CreateMemTemp(I->Ty);
|
|
|
|
if (Align > AI->getAlignment())
|
|
|
|
AI->setAlignment(Align);
|
CGCall: Factor out the logic mapping call arguments to LLVM IR arguments.
Summary:
This refactoring introduces ClangToLLVMArgMapping class, which
encapsulates the information about the order in which function arguments listed
in CGFunctionInfo should be passed to actual LLVM IR function, such as:
1) positions of sret, if there is any
2) position of inalloca argument, if there is any
3) position of helper padding argument for each call argument
4) positions of regular argument (there can be many if it's expanded).
Simplify several related methods (ConstructAttributeList, EmitFunctionProlog
and EmitCall): now they don't have to maintain iterators over the list
of LLVM IR function arguments, dealing with all the sret/inalloca/this complexities,
and just use expected positions of LLVM IR arguments stored in ClangToLLVMArgMapping.
This may increase the running time of EmitFunctionProlog, as we have to traverse
expandable arguments twice, but in further refactoring we will be able
to speed up EmitCall by passing already calculated CallArgsToIRArgsMapping to
ConstructAttributeList, thus avoiding traversing expandable argument there.
No functionality change.
Test Plan: regression test suite
Reviewers: majnemer, rnk
Reviewed By: rnk
Subscribers: cfe-commits, rjmccall, timurrrr
Differential Revision: http://reviews.llvm.org/D4938
llvm-svn: 216251
2014-08-22 09:06:06 +08:00
|
|
|
IRCallArgs[FirstIRArg] = AI;
|
2012-03-30 01:37:10 +08:00
|
|
|
EmitAggregateCopy(AI, Addr, I->Ty, RV.isVolatileQualified());
|
2011-06-14 09:37:52 +08:00
|
|
|
} else {
|
|
|
|
// Skip the extra memcpy call.
|
CGCall: Factor out the logic mapping call arguments to LLVM IR arguments.
Summary:
This refactoring introduces ClangToLLVMArgMapping class, which
encapsulates the information about the order in which function arguments listed
in CGFunctionInfo should be passed to actual LLVM IR function, such as:
1) positions of sret, if there is any
2) position of inalloca argument, if there is any
3) position of helper padding argument for each call argument
4) positions of regular argument (there can be many if it's expanded).
Simplify several related methods (ConstructAttributeList, EmitFunctionProlog
and EmitCall): now they don't have to maintain iterators over the list
of LLVM IR function arguments, dealing with all the sret/inalloca/this complexities,
and just use expected positions of LLVM IR arguments stored in ClangToLLVMArgMapping.
This may increase the running time of EmitFunctionProlog, as we have to traverse
expandable arguments twice, but in further refactoring we will be able
to speed up EmitCall by passing already calculated CallArgsToIRArgsMapping to
ConstructAttributeList, thus avoiding traversing expandable argument there.
No functionality change.
Test Plan: regression test suite
Reviewers: majnemer, rnk
Reviewed By: rnk
Subscribers: cfe-commits, rjmccall, timurrrr
Differential Revision: http://reviews.llvm.org/D4938
llvm-svn: 216251
2014-08-22 09:06:06 +08:00
|
|
|
IRCallArgs[FirstIRArg] = Addr;
|
2011-06-14 09:37:52 +08:00
|
|
|
}
|
2009-02-05 17:16:39 +08:00
|
|
|
}
|
|
|
|
break;
|
2010-08-21 10:24:36 +08:00
|
|
|
}
|
2009-02-05 17:16:39 +08:00
|
|
|
|
2009-01-27 05:26:08 +08:00
|
|
|
case ABIArgInfo::Ignore:
|
CGCall: Factor out the logic mapping call arguments to LLVM IR arguments.
Summary:
This refactoring introduces ClangToLLVMArgMapping class, which
encapsulates the information about the order in which function arguments listed
in CGFunctionInfo should be passed to actual LLVM IR function, such as:
1) positions of sret, if there is any
2) position of inalloca argument, if there is any
3) position of helper padding argument for each call argument
4) positions of regular argument (there can be many if it's expanded).
Simplify several related methods (ConstructAttributeList, EmitFunctionProlog
and EmitCall): now they don't have to maintain iterators over the list
of LLVM IR function arguments, dealing with all the sret/inalloca/this complexities,
and just use expected positions of LLVM IR arguments stored in ClangToLLVMArgMapping.
This may increase the running time of EmitFunctionProlog, as we have to traverse
expandable arguments twice, but in further refactoring we will be able
to speed up EmitCall by passing already calculated CallArgsToIRArgsMapping to
ConstructAttributeList, thus avoiding traversing expandable argument there.
No functionality change.
Test Plan: regression test suite
Reviewers: majnemer, rnk
Reviewed By: rnk
Subscribers: cfe-commits, rjmccall, timurrrr
Differential Revision: http://reviews.llvm.org/D4938
llvm-svn: 216251
2014-08-22 09:06:06 +08:00
|
|
|
assert(NumIRArgs == 0);
|
2009-01-27 05:26:08 +08:00
|
|
|
break;
|
2010-10-19 14:39:39 +08:00
|
|
|
|
Kill off the 'coerce' ABI passing form. Now 'direct' and 'extend' always
have a "coerce to" type which often matches the default lowering of Clang
type to LLVM IR type, but the coerce case can be handled by making them
not be the same.
This simplifies things and fixes issues where X86-64 abi lowering would
return coerce after making preferred types exactly match up. This caused
us to compile:
typedef float v4f32 __attribute__((__vector_size__(16)));
v4f32 foo(v4f32 X) {
return X+X;
}
into this code at -O0:
define <4 x float> @foo(<4 x float> %X.coerce) nounwind {
entry:
%retval = alloca <4 x float>, align 16 ; <<4 x float>*> [#uses=2]
%coerce = alloca <4 x float>, align 16 ; <<4 x float>*> [#uses=2]
%X.addr = alloca <4 x float>, align 16 ; <<4 x float>*> [#uses=3]
store <4 x float> %X.coerce, <4 x float>* %coerce
%X = load <4 x float>* %coerce ; <<4 x float>> [#uses=1]
store <4 x float> %X, <4 x float>* %X.addr
%tmp = load <4 x float>* %X.addr ; <<4 x float>> [#uses=1]
%tmp1 = load <4 x float>* %X.addr ; <<4 x float>> [#uses=1]
%add = fadd <4 x float> %tmp, %tmp1 ; <<4 x float>> [#uses=1]
store <4 x float> %add, <4 x float>* %retval
%0 = load <4 x float>* %retval ; <<4 x float>> [#uses=1]
ret <4 x float> %0
}
Now we get:
define <4 x float> @foo(<4 x float> %X) nounwind {
entry:
%X.addr = alloca <4 x float>, align 16 ; <<4 x float>*> [#uses=3]
store <4 x float> %X, <4 x float>* %X.addr
%tmp = load <4 x float>* %X.addr ; <<4 x float>> [#uses=1]
%tmp1 = load <4 x float>* %X.addr ; <<4 x float>> [#uses=1]
%add = fadd <4 x float> %tmp, %tmp1 ; <<4 x float>> [#uses=1]
ret <4 x float> %add
}
This implements rdar://8248065
llvm-svn: 109733
2010-07-29 14:26:06 +08:00
|
|
|
case ABIArgInfo::Extend:
|
|
|
|
case ABIArgInfo::Direct: {
|
|
|
|
if (!isa<llvm::StructType>(ArgInfo.getCoerceToType()) &&
|
2010-07-30 12:02:24 +08:00
|
|
|
ArgInfo.getCoerceToType() == ConvertType(info_it->type) &&
|
|
|
|
ArgInfo.getDirectOffset() == 0) {
|
CGCall: Factor out the logic mapping call arguments to LLVM IR arguments.
Summary:
This refactoring introduces ClangToLLVMArgMapping class, which
encapsulates the information about the order in which function arguments listed
in CGFunctionInfo should be passed to actual LLVM IR function, such as:
1) positions of sret, if there is any
2) position of inalloca argument, if there is any
3) position of helper padding argument for each call argument
4) positions of regular argument (there can be many if it's expanded).
Simplify several related methods (ConstructAttributeList, EmitFunctionProlog
and EmitCall): now they don't have to maintain iterators over the list
of LLVM IR function arguments, dealing with all the sret/inalloca/this complexities,
and just use expected positions of LLVM IR arguments stored in ClangToLLVMArgMapping.
This may increase the running time of EmitFunctionProlog, as we have to traverse
expandable arguments twice, but in further refactoring we will be able
to speed up EmitCall by passing already calculated CallArgsToIRArgsMapping to
ConstructAttributeList, thus avoiding traversing expandable argument there.
No functionality change.
Test Plan: regression test suite
Reviewers: majnemer, rnk
Reviewed By: rnk
Subscribers: cfe-commits, rjmccall, timurrrr
Differential Revision: http://reviews.llvm.org/D4938
llvm-svn: 216251
2014-08-22 09:06:06 +08:00
|
|
|
assert(NumIRArgs == 1);
|
2011-07-12 12:46:18 +08:00
|
|
|
llvm::Value *V;
|
Kill off the 'coerce' ABI passing form. Now 'direct' and 'extend' always
have a "coerce to" type which often matches the default lowering of Clang
type to LLVM IR type, but the coerce case can be handled by making them
not be the same.
This simplifies things and fixes issues where X86-64 abi lowering would
return coerce after making preferred types exactly match up. This caused
us to compile:
typedef float v4f32 __attribute__((__vector_size__(16)));
v4f32 foo(v4f32 X) {
return X+X;
}
into this code at -O0:
define <4 x float> @foo(<4 x float> %X.coerce) nounwind {
entry:
%retval = alloca <4 x float>, align 16 ; <<4 x float>*> [#uses=2]
%coerce = alloca <4 x float>, align 16 ; <<4 x float>*> [#uses=2]
%X.addr = alloca <4 x float>, align 16 ; <<4 x float>*> [#uses=3]
store <4 x float> %X.coerce, <4 x float>* %coerce
%X = load <4 x float>* %coerce ; <<4 x float>> [#uses=1]
store <4 x float> %X, <4 x float>* %X.addr
%tmp = load <4 x float>* %X.addr ; <<4 x float>> [#uses=1]
%tmp1 = load <4 x float>* %X.addr ; <<4 x float>> [#uses=1]
%add = fadd <4 x float> %tmp, %tmp1 ; <<4 x float>> [#uses=1]
store <4 x float> %add, <4 x float>* %retval
%0 = load <4 x float>* %retval ; <<4 x float>> [#uses=1]
ret <4 x float> %0
}
Now we get:
define <4 x float> @foo(<4 x float> %X) nounwind {
entry:
%X.addr = alloca <4 x float>, align 16 ; <<4 x float>*> [#uses=3]
store <4 x float> %X, <4 x float>* %X.addr
%tmp = load <4 x float>* %X.addr ; <<4 x float>> [#uses=1]
%tmp1 = load <4 x float>* %X.addr ; <<4 x float>> [#uses=1]
%add = fadd <4 x float> %tmp, %tmp1 ; <<4 x float>> [#uses=1]
ret <4 x float> %add
}
This implements rdar://8248065
llvm-svn: 109733
2010-07-29 14:26:06 +08:00
|
|
|
if (RV.isScalar())
|
2011-07-12 12:46:18 +08:00
|
|
|
V = RV.getScalarVal();
|
Kill off the 'coerce' ABI passing form. Now 'direct' and 'extend' always
have a "coerce to" type which often matches the default lowering of Clang
type to LLVM IR type, but the coerce case can be handled by making them
not be the same.
This simplifies things and fixes issues where X86-64 abi lowering would
return coerce after making preferred types exactly match up. This caused
us to compile:
typedef float v4f32 __attribute__((__vector_size__(16)));
v4f32 foo(v4f32 X) {
return X+X;
}
into this code at -O0:
define <4 x float> @foo(<4 x float> %X.coerce) nounwind {
entry:
%retval = alloca <4 x float>, align 16 ; <<4 x float>*> [#uses=2]
%coerce = alloca <4 x float>, align 16 ; <<4 x float>*> [#uses=2]
%X.addr = alloca <4 x float>, align 16 ; <<4 x float>*> [#uses=3]
store <4 x float> %X.coerce, <4 x float>* %coerce
%X = load <4 x float>* %coerce ; <<4 x float>> [#uses=1]
store <4 x float> %X, <4 x float>* %X.addr
%tmp = load <4 x float>* %X.addr ; <<4 x float>> [#uses=1]
%tmp1 = load <4 x float>* %X.addr ; <<4 x float>> [#uses=1]
%add = fadd <4 x float> %tmp, %tmp1 ; <<4 x float>> [#uses=1]
store <4 x float> %add, <4 x float>* %retval
%0 = load <4 x float>* %retval ; <<4 x float>> [#uses=1]
ret <4 x float> %0
}
Now we get:
define <4 x float> @foo(<4 x float> %X) nounwind {
entry:
%X.addr = alloca <4 x float>, align 16 ; <<4 x float>*> [#uses=3]
store <4 x float> %X, <4 x float>* %X.addr
%tmp = load <4 x float>* %X.addr ; <<4 x float>> [#uses=1]
%tmp1 = load <4 x float>* %X.addr ; <<4 x float>> [#uses=1]
%add = fadd <4 x float> %tmp, %tmp1 ; <<4 x float>> [#uses=1]
ret <4 x float> %add
}
This implements rdar://8248065
llvm-svn: 109733
2010-07-29 14:26:06 +08:00
|
|
|
else
|
2011-07-12 12:46:18 +08:00
|
|
|
V = Builder.CreateLoad(RV.getAggregateAddr());
|
CGCall: Factor out the logic mapping call arguments to LLVM IR arguments.
Summary:
This refactoring introduces ClangToLLVMArgMapping class, which
encapsulates the information about the order in which function arguments listed
in CGFunctionInfo should be passed to actual LLVM IR function, such as:
1) positions of sret, if there is any
2) position of inalloca argument, if there is any
3) position of helper padding argument for each call argument
4) positions of regular argument (there can be many if it's expanded).
Simplify several related methods (ConstructAttributeList, EmitFunctionProlog
and EmitCall): now they don't have to maintain iterators over the list
of LLVM IR function arguments, dealing with all the sret/inalloca/this complexities,
and just use expected positions of LLVM IR arguments stored in ClangToLLVMArgMapping.
This may increase the running time of EmitFunctionProlog, as we have to traverse
expandable arguments twice, but in further refactoring we will be able
to speed up EmitCall by passing already calculated CallArgsToIRArgsMapping to
ConstructAttributeList, thus avoiding traversing expandable argument there.
No functionality change.
Test Plan: regression test suite
Reviewers: majnemer, rnk
Reviewed By: rnk
Subscribers: cfe-commits, rjmccall, timurrrr
Differential Revision: http://reviews.llvm.org/D4938
llvm-svn: 216251
2014-08-22 09:06:06 +08:00
|
|
|
|
2014-10-10 08:05:45 +08:00
|
|
|
// We might have to widen integers, but we should never truncate.
|
|
|
|
if (ArgInfo.getCoerceToType() != V->getType() &&
|
|
|
|
V->getType()->isIntegerTy())
|
|
|
|
V = Builder.CreateZExt(V, ArgInfo.getCoerceToType());
|
|
|
|
|
2011-07-12 12:53:39 +08:00
|
|
|
// If the argument doesn't match, perform a bitcast to coerce it. This
|
|
|
|
// can happen due to trivial type mismatches.
|
CGCall: Factor out the logic mapping call arguments to LLVM IR arguments.
Summary:
This refactoring introduces ClangToLLVMArgMapping class, which
encapsulates the information about the order in which function arguments listed
in CGFunctionInfo should be passed to actual LLVM IR function, such as:
1) positions of sret, if there is any
2) position of inalloca argument, if there is any
3) position of helper padding argument for each call argument
4) positions of regular argument (there can be many if it's expanded).
Simplify several related methods (ConstructAttributeList, EmitFunctionProlog
and EmitCall): now they don't have to maintain iterators over the list
of LLVM IR function arguments, dealing with all the sret/inalloca/this complexities,
and just use expected positions of LLVM IR arguments stored in ClangToLLVMArgMapping.
This may increase the running time of EmitFunctionProlog, as we have to traverse
expandable arguments twice, but in further refactoring we will be able
to speed up EmitCall by passing already calculated CallArgsToIRArgsMapping to
ConstructAttributeList, thus avoiding traversing expandable argument there.
No functionality change.
Test Plan: regression test suite
Reviewers: majnemer, rnk
Reviewed By: rnk
Subscribers: cfe-commits, rjmccall, timurrrr
Differential Revision: http://reviews.llvm.org/D4938
llvm-svn: 216251
2014-08-22 09:06:06 +08:00
|
|
|
if (FirstIRArg < IRFuncTy->getNumParams() &&
|
|
|
|
V->getType() != IRFuncTy->getParamType(FirstIRArg))
|
|
|
|
V = Builder.CreateBitCast(V, IRFuncTy->getParamType(FirstIRArg));
|
|
|
|
IRCallArgs[FirstIRArg] = V;
|
Kill off the 'coerce' ABI passing form. Now 'direct' and 'extend' always
have a "coerce to" type which often matches the default lowering of Clang
type to LLVM IR type, but the coerce case can be handled by making them
not be the same.
This simplifies things and fixes issues where X86-64 abi lowering would
return coerce after making preferred types exactly match up. This caused
us to compile:
typedef float v4f32 __attribute__((__vector_size__(16)));
v4f32 foo(v4f32 X) {
return X+X;
}
into this code at -O0:
define <4 x float> @foo(<4 x float> %X.coerce) nounwind {
entry:
%retval = alloca <4 x float>, align 16 ; <<4 x float>*> [#uses=2]
%coerce = alloca <4 x float>, align 16 ; <<4 x float>*> [#uses=2]
%X.addr = alloca <4 x float>, align 16 ; <<4 x float>*> [#uses=3]
store <4 x float> %X.coerce, <4 x float>* %coerce
%X = load <4 x float>* %coerce ; <<4 x float>> [#uses=1]
store <4 x float> %X, <4 x float>* %X.addr
%tmp = load <4 x float>* %X.addr ; <<4 x float>> [#uses=1]
%tmp1 = load <4 x float>* %X.addr ; <<4 x float>> [#uses=1]
%add = fadd <4 x float> %tmp, %tmp1 ; <<4 x float>> [#uses=1]
store <4 x float> %add, <4 x float>* %retval
%0 = load <4 x float>* %retval ; <<4 x float>> [#uses=1]
ret <4 x float> %0
}
Now we get:
define <4 x float> @foo(<4 x float> %X) nounwind {
entry:
%X.addr = alloca <4 x float>, align 16 ; <<4 x float>*> [#uses=3]
store <4 x float> %X, <4 x float>* %X.addr
%tmp = load <4 x float>* %X.addr ; <<4 x float>> [#uses=1]
%tmp1 = load <4 x float>* %X.addr ; <<4 x float>> [#uses=1]
%add = fadd <4 x float> %tmp, %tmp1 ; <<4 x float>> [#uses=1]
ret <4 x float> %add
}
This implements rdar://8248065
llvm-svn: 109733
2010-07-29 14:26:06 +08:00
|
|
|
break;
|
|
|
|
}
|
2009-01-27 05:26:08 +08:00
|
|
|
|
2009-02-04 03:12:28 +08:00
|
|
|
// FIXME: Avoid the conversion through memory if possible.
|
|
|
|
llvm::Value *SrcPtr;
|
2013-03-08 05:37:08 +08:00
|
|
|
if (RV.isScalar() || RV.isComplex()) {
|
2011-05-03 02:05:27 +08:00
|
|
|
SrcPtr = CreateMemTemp(I->Ty, "coerce");
|
2013-03-08 05:37:08 +08:00
|
|
|
LValue SrcLV = MakeAddrLValue(SrcPtr, I->Ty, TypeAlign);
|
2014-02-01 08:04:45 +08:00
|
|
|
EmitInitStoreOfNonAggregate(*this, RV, SrcLV);
|
2009-09-09 23:08:12 +08:00
|
|
|
} else
|
2009-02-04 03:12:28 +08:00
|
|
|
SrcPtr = RV.getAggregateAddr();
|
2010-10-19 14:39:39 +08:00
|
|
|
|
2010-07-30 12:02:24 +08:00
|
|
|
// If the value is offset in memory, apply the offset now.
|
|
|
|
if (unsigned Offs = ArgInfo.getDirectOffset()) {
|
|
|
|
SrcPtr = Builder.CreateBitCast(SrcPtr, Builder.getInt8PtrTy());
|
|
|
|
SrcPtr = Builder.CreateConstGEP1_32(SrcPtr, Offs);
|
2010-10-19 14:39:39 +08:00
|
|
|
SrcPtr = Builder.CreateBitCast(SrcPtr,
|
2010-07-30 12:02:24 +08:00
|
|
|
llvm::PointerType::getUnqual(ArgInfo.getCoerceToType()));
|
|
|
|
|
|
|
|
}
|
2010-10-19 14:39:39 +08:00
|
|
|
|
2014-08-27 18:43:15 +08:00
|
|
|
// Fast-isel and the optimizer generally like scalar values better than
|
|
|
|
// FCAs, so we flatten them if this is safe to do for this argument.
|
2014-05-10 00:21:39 +08:00
|
|
|
llvm::StructType *STy =
|
|
|
|
dyn_cast<llvm::StructType>(ArgInfo.getCoerceToType());
|
2014-08-27 18:43:15 +08:00
|
|
|
if (STy && ArgInfo.isDirect() && ArgInfo.getCanBeFlattened()) {
|
2012-10-10 19:29:08 +08:00
|
|
|
llvm::Type *SrcTy =
|
|
|
|
cast<llvm::PointerType>(SrcPtr->getType())->getElementType();
|
|
|
|
uint64_t SrcSize = CGM.getDataLayout().getTypeAllocSize(SrcTy);
|
|
|
|
uint64_t DstSize = CGM.getDataLayout().getTypeAllocSize(STy);
|
|
|
|
|
|
|
|
// If the source type is smaller than the destination type of the
|
|
|
|
// coerce-to logic, copy the source value into a temp alloca the size
|
|
|
|
// of the destination type to allow loading all of it. The bits past
|
|
|
|
// the source value are left undef.
|
|
|
|
if (SrcSize < DstSize) {
|
|
|
|
llvm::AllocaInst *TempAlloca
|
|
|
|
= CreateTempAlloca(STy, SrcPtr->getName() + ".coerce");
|
|
|
|
Builder.CreateMemCpy(TempAlloca, SrcPtr, SrcSize, 0);
|
|
|
|
SrcPtr = TempAlloca;
|
|
|
|
} else {
|
|
|
|
SrcPtr = Builder.CreateBitCast(SrcPtr,
|
|
|
|
llvm::PointerType::getUnqual(STy));
|
|
|
|
}
|
|
|
|
|
CGCall: Factor out the logic mapping call arguments to LLVM IR arguments.
Summary:
This refactoring introduces ClangToLLVMArgMapping class, which
encapsulates the information about the order in which function arguments listed
in CGFunctionInfo should be passed to actual LLVM IR function, such as:
1) positions of sret, if there is any
2) position of inalloca argument, if there is any
3) position of helper padding argument for each call argument
4) positions of regular argument (there can be many if it's expanded).
Simplify several related methods (ConstructAttributeList, EmitFunctionProlog
and EmitCall): now they don't have to maintain iterators over the list
of LLVM IR function arguments, dealing with all the sret/inalloca/this complexities,
and just use expected positions of LLVM IR arguments stored in ClangToLLVMArgMapping.
This may increase the running time of EmitFunctionProlog, as we have to traverse
expandable arguments twice, but in further refactoring we will be able
to speed up EmitCall by passing already calculated CallArgsToIRArgsMapping to
ConstructAttributeList, thus avoiding traversing expandable argument there.
No functionality change.
Test Plan: regression test suite
Reviewers: majnemer, rnk
Reviewed By: rnk
Subscribers: cfe-commits, rjmccall, timurrrr
Differential Revision: http://reviews.llvm.org/D4938
llvm-svn: 216251
2014-08-22 09:06:06 +08:00
|
|
|
assert(NumIRArgs == STy->getNumElements());
|
2010-07-06 04:41:41 +08:00
|
|
|
for (unsigned i = 0, e = STy->getNumElements(); i != e; ++i) {
|
|
|
|
llvm::Value *EltPtr = Builder.CreateConstGEP2_32(SrcPtr, 0, i);
|
2010-07-29 02:24:28 +08:00
|
|
|
llvm::LoadInst *LI = Builder.CreateLoad(EltPtr);
|
|
|
|
// We don't know what we're loading from.
|
|
|
|
LI->setAlignment(1);
|
CGCall: Factor out the logic mapping call arguments to LLVM IR arguments.
Summary:
This refactoring introduces ClangToLLVMArgMapping class, which
encapsulates the information about the order in which function arguments listed
in CGFunctionInfo should be passed to actual LLVM IR function, such as:
1) positions of sret, if there is any
2) position of inalloca argument, if there is any
3) position of helper padding argument for each call argument
4) positions of regular argument (there can be many if it's expanded).
Simplify several related methods (ConstructAttributeList, EmitFunctionProlog
and EmitCall): now they don't have to maintain iterators over the list
of LLVM IR function arguments, dealing with all the sret/inalloca/this complexities,
and just use expected positions of LLVM IR arguments stored in ClangToLLVMArgMapping.
This may increase the running time of EmitFunctionProlog, as we have to traverse
expandable arguments twice, but in further refactoring we will be able
to speed up EmitCall by passing already calculated CallArgsToIRArgsMapping to
ConstructAttributeList, thus avoiding traversing expandable argument there.
No functionality change.
Test Plan: regression test suite
Reviewers: majnemer, rnk
Reviewed By: rnk
Subscribers: cfe-commits, rjmccall, timurrrr
Differential Revision: http://reviews.llvm.org/D4938
llvm-svn: 216251
2014-08-22 09:06:06 +08:00
|
|
|
IRCallArgs[FirstIRArg + i] = LI;
|
make the argument passing stuff in the FCA case smarter still, by
avoiding making the FCA at all when the types exactly line up. For
example, before we made:
%struct.DeclGroup = type { i64, i64 }
define i64 @_Z3foo9DeclGroup(i64, i64) nounwind {
entry:
%D = alloca %struct.DeclGroup, align 8 ; <%struct.DeclGroup*> [#uses=3]
%2 = insertvalue %struct.DeclGroup undef, i64 %0, 0 ; <%struct.DeclGroup> [#uses=1]
%3 = insertvalue %struct.DeclGroup %2, i64 %1, 1 ; <%struct.DeclGroup> [#uses=1]
store %struct.DeclGroup %3, %struct.DeclGroup* %D
%tmp = getelementptr inbounds %struct.DeclGroup* %D, i32 0, i32 0 ; <i64*> [#uses=1]
%tmp1 = load i64* %tmp ; <i64> [#uses=1]
%tmp2 = getelementptr inbounds %struct.DeclGroup* %D, i32 0, i32 1 ; <i64*> [#uses=1]
%tmp3 = load i64* %tmp2 ; <i64> [#uses=1]
%add = add nsw i64 %tmp1, %tmp3 ; <i64> [#uses=1]
ret i64 %add
}
... which has the pointless insertvalue, which fastisel hates, now we
make:
%struct.DeclGroup = type { i64, i64 }
define i64 @_Z3foo9DeclGroup(i64, i64) nounwind {
entry:
%D = alloca %struct.DeclGroup, align 8 ; <%struct.DeclGroup*> [#uses=4]
%2 = getelementptr %struct.DeclGroup* %D, i32 0, i32 0 ; <i64*> [#uses=1]
store i64 %0, i64* %2
%3 = getelementptr %struct.DeclGroup* %D, i32 0, i32 1 ; <i64*> [#uses=1]
store i64 %1, i64* %3
%tmp = getelementptr inbounds %struct.DeclGroup* %D, i32 0, i32 0 ; <i64*> [#uses=1]
%tmp1 = load i64* %tmp ; <i64> [#uses=1]
%tmp2 = getelementptr inbounds %struct.DeclGroup* %D, i32 0, i32 1 ; <i64*> [#uses=1]
%tmp3 = load i64* %tmp2 ; <i64> [#uses=1]
%add = add nsw i64 %tmp1, %tmp3 ; <i64> [#uses=1]
ret i64 %add
}
This only kicks in when x86-64 abi lowering decides it likes us.
llvm-svn: 107104
2010-06-29 08:06:42 +08:00
|
|
|
}
|
Change CGCall to handle the "coerce" case where the coerce-to type
is a FCA to pass each of the elements as individual scalars. This
produces code fast isel is less likely to reject and is easier on
the optimizers.
For example, before we would compile:
struct DeclGroup { long NumDecls; char * Y; };
char * foo(DeclGroup D) {
return D.NumDecls+D.Y;
}
to:
%struct.DeclGroup = type { i64, i64 }
define i64 @_Z3foo9DeclGroup(%struct.DeclGroup) nounwind {
entry:
%D = alloca %struct.DeclGroup, align 8 ; <%struct.DeclGroup*> [#uses=3]
store %struct.DeclGroup %0, %struct.DeclGroup* %D, align 1
%tmp = getelementptr inbounds %struct.DeclGroup* %D, i32 0, i32 0 ; <i64*> [#uses=1]
%tmp1 = load i64* %tmp ; <i64> [#uses=1]
%tmp2 = getelementptr inbounds %struct.DeclGroup* %D, i32 0, i32 1 ; <i64*> [#uses=1]
%tmp3 = load i64* %tmp2 ; <i64> [#uses=1]
%add = add nsw i64 %tmp1, %tmp3 ; <i64> [#uses=1]
ret i64 %add
}
Now we get:
%0 = type { i64, i64 }
%struct.DeclGroup = type { i64, i8* }
define i8* @_Z3foo9DeclGroup(i64, i64) nounwind {
entry:
%D = alloca %struct.DeclGroup, align 8 ; <%struct.DeclGroup*> [#uses=3]
%2 = insertvalue %0 undef, i64 %0, 0 ; <%0> [#uses=1]
%3 = insertvalue %0 %2, i64 %1, 1 ; <%0> [#uses=1]
%4 = bitcast %struct.DeclGroup* %D to %0* ; <%0*> [#uses=1]
store %0 %3, %0* %4, align 1
%tmp = getelementptr inbounds %struct.DeclGroup* %D, i32 0, i32 0 ; <i64*> [#uses=1]
%tmp1 = load i64* %tmp ; <i64> [#uses=1]
%tmp2 = getelementptr inbounds %struct.DeclGroup* %D, i32 0, i32 1 ; <i8**> [#uses=1]
%tmp3 = load i8** %tmp2 ; <i8*> [#uses=1]
%add.ptr = getelementptr inbounds i8* %tmp3, i64 %tmp1 ; <i8*> [#uses=1]
ret i8* %add.ptr
}
Elimination of the FCA inside the function is still-to-come.
llvm-svn: 107099
2010-06-29 07:44:11 +08:00
|
|
|
} else {
|
make the argument passing stuff in the FCA case smarter still, by
avoiding making the FCA at all when the types exactly line up. For
example, before we made:
%struct.DeclGroup = type { i64, i64 }
define i64 @_Z3foo9DeclGroup(i64, i64) nounwind {
entry:
%D = alloca %struct.DeclGroup, align 8 ; <%struct.DeclGroup*> [#uses=3]
%2 = insertvalue %struct.DeclGroup undef, i64 %0, 0 ; <%struct.DeclGroup> [#uses=1]
%3 = insertvalue %struct.DeclGroup %2, i64 %1, 1 ; <%struct.DeclGroup> [#uses=1]
store %struct.DeclGroup %3, %struct.DeclGroup* %D
%tmp = getelementptr inbounds %struct.DeclGroup* %D, i32 0, i32 0 ; <i64*> [#uses=1]
%tmp1 = load i64* %tmp ; <i64> [#uses=1]
%tmp2 = getelementptr inbounds %struct.DeclGroup* %D, i32 0, i32 1 ; <i64*> [#uses=1]
%tmp3 = load i64* %tmp2 ; <i64> [#uses=1]
%add = add nsw i64 %tmp1, %tmp3 ; <i64> [#uses=1]
ret i64 %add
}
... which has the pointless insertvalue, which fastisel hates, now we
make:
%struct.DeclGroup = type { i64, i64 }
define i64 @_Z3foo9DeclGroup(i64, i64) nounwind {
entry:
%D = alloca %struct.DeclGroup, align 8 ; <%struct.DeclGroup*> [#uses=4]
%2 = getelementptr %struct.DeclGroup* %D, i32 0, i32 0 ; <i64*> [#uses=1]
store i64 %0, i64* %2
%3 = getelementptr %struct.DeclGroup* %D, i32 0, i32 1 ; <i64*> [#uses=1]
store i64 %1, i64* %3
%tmp = getelementptr inbounds %struct.DeclGroup* %D, i32 0, i32 0 ; <i64*> [#uses=1]
%tmp1 = load i64* %tmp ; <i64> [#uses=1]
%tmp2 = getelementptr inbounds %struct.DeclGroup* %D, i32 0, i32 1 ; <i64*> [#uses=1]
%tmp3 = load i64* %tmp2 ; <i64> [#uses=1]
%add = add nsw i64 %tmp1, %tmp3 ; <i64> [#uses=1]
ret i64 %add
}
This only kicks in when x86-64 abi lowering decides it likes us.
llvm-svn: 107104
2010-06-29 08:06:42 +08:00
|
|
|
// In the simple case, just pass the coerced loaded value.
|
CGCall: Factor out the logic mapping call arguments to LLVM IR arguments.
Summary:
This refactoring introduces ClangToLLVMArgMapping class, which
encapsulates the information about the order in which function arguments listed
in CGFunctionInfo should be passed to actual LLVM IR function, such as:
1) positions of sret, if there is any
2) position of inalloca argument, if there is any
3) position of helper padding argument for each call argument
4) positions of regular argument (there can be many if it's expanded).
Simplify several related methods (ConstructAttributeList, EmitFunctionProlog
and EmitCall): now they don't have to maintain iterators over the list
of LLVM IR function arguments, dealing with all the sret/inalloca/this complexities,
and just use expected positions of LLVM IR arguments stored in ClangToLLVMArgMapping.
This may increase the running time of EmitFunctionProlog, as we have to traverse
expandable arguments twice, but in further refactoring we will be able
to speed up EmitCall by passing already calculated CallArgsToIRArgsMapping to
ConstructAttributeList, thus avoiding traversing expandable argument there.
No functionality change.
Test Plan: regression test suite
Reviewers: majnemer, rnk
Reviewed By: rnk
Subscribers: cfe-commits, rjmccall, timurrrr
Differential Revision: http://reviews.llvm.org/D4938
llvm-svn: 216251
2014-08-22 09:06:06 +08:00
|
|
|
assert(NumIRArgs == 1);
|
|
|
|
IRCallArgs[FirstIRArg] =
|
|
|
|
CreateCoercedLoad(SrcPtr, ArgInfo.getCoerceToType(), *this);
|
Change CGCall to handle the "coerce" case where the coerce-to type
is a FCA to pass each of the elements as individual scalars. This
produces code fast isel is less likely to reject and is easier on
the optimizers.
For example, before we would compile:
struct DeclGroup { long NumDecls; char * Y; };
char * foo(DeclGroup D) {
return D.NumDecls+D.Y;
}
to:
%struct.DeclGroup = type { i64, i64 }
define i64 @_Z3foo9DeclGroup(%struct.DeclGroup) nounwind {
entry:
%D = alloca %struct.DeclGroup, align 8 ; <%struct.DeclGroup*> [#uses=3]
store %struct.DeclGroup %0, %struct.DeclGroup* %D, align 1
%tmp = getelementptr inbounds %struct.DeclGroup* %D, i32 0, i32 0 ; <i64*> [#uses=1]
%tmp1 = load i64* %tmp ; <i64> [#uses=1]
%tmp2 = getelementptr inbounds %struct.DeclGroup* %D, i32 0, i32 1 ; <i64*> [#uses=1]
%tmp3 = load i64* %tmp2 ; <i64> [#uses=1]
%add = add nsw i64 %tmp1, %tmp3 ; <i64> [#uses=1]
ret i64 %add
}
Now we get:
%0 = type { i64, i64 }
%struct.DeclGroup = type { i64, i8* }
define i8* @_Z3foo9DeclGroup(i64, i64) nounwind {
entry:
%D = alloca %struct.DeclGroup, align 8 ; <%struct.DeclGroup*> [#uses=3]
%2 = insertvalue %0 undef, i64 %0, 0 ; <%0> [#uses=1]
%3 = insertvalue %0 %2, i64 %1, 1 ; <%0> [#uses=1]
%4 = bitcast %struct.DeclGroup* %D to %0* ; <%0*> [#uses=1]
store %0 %3, %0* %4, align 1
%tmp = getelementptr inbounds %struct.DeclGroup* %D, i32 0, i32 0 ; <i64*> [#uses=1]
%tmp1 = load i64* %tmp ; <i64> [#uses=1]
%tmp2 = getelementptr inbounds %struct.DeclGroup* %D, i32 0, i32 1 ; <i8**> [#uses=1]
%tmp3 = load i8** %tmp2 ; <i8*> [#uses=1]
%add.ptr = getelementptr inbounds i8* %tmp3, i64 %tmp1 ; <i8*> [#uses=1]
ret i8* %add.ptr
}
Elimination of the FCA inside the function is still-to-come.
llvm-svn: 107099
2010-06-29 07:44:11 +08:00
|
|
|
}
|
2010-10-19 14:39:39 +08:00
|
|
|
|
2009-02-04 03:12:28 +08:00
|
|
|
break;
|
|
|
|
}
|
|
|
|
|
2008-09-17 08:51:38 +08:00
|
|
|
case ABIArgInfo::Expand:
|
CGCall: Factor out the logic mapping call arguments to LLVM IR arguments.
Summary:
This refactoring introduces ClangToLLVMArgMapping class, which
encapsulates the information about the order in which function arguments listed
in CGFunctionInfo should be passed to actual LLVM IR function, such as:
1) positions of sret, if there is any
2) position of inalloca argument, if there is any
3) position of helper padding argument for each call argument
4) positions of regular argument (there can be many if it's expanded).
Simplify several related methods (ConstructAttributeList, EmitFunctionProlog
and EmitCall): now they don't have to maintain iterators over the list
of LLVM IR function arguments, dealing with all the sret/inalloca/this complexities,
and just use expected positions of LLVM IR arguments stored in ClangToLLVMArgMapping.
This may increase the running time of EmitFunctionProlog, as we have to traverse
expandable arguments twice, but in further refactoring we will be able
to speed up EmitCall by passing already calculated CallArgsToIRArgsMapping to
ConstructAttributeList, thus avoiding traversing expandable argument there.
No functionality change.
Test Plan: regression test suite
Reviewers: majnemer, rnk
Reviewed By: rnk
Subscribers: cfe-commits, rjmccall, timurrrr
Differential Revision: http://reviews.llvm.org/D4938
llvm-svn: 216251
2014-08-22 09:06:06 +08:00
|
|
|
unsigned IRArgPos = FirstIRArg;
|
|
|
|
ExpandTypeToArgs(I->Ty, RV, IRFuncTy, IRCallArgs, IRArgPos);
|
|
|
|
assert(IRArgPos == FirstIRArg + NumIRArgs);
|
2008-09-17 08:51:38 +08:00
|
|
|
break;
|
2008-09-10 07:27:19 +08:00
|
|
|
}
|
|
|
|
}
|
2009-09-09 23:08:12 +08:00
|
|
|
|
2014-02-01 08:04:45 +08:00
|
|
|
if (ArgMemory) {
|
|
|
|
llvm::Value *Arg = ArgMemory;
|
2014-07-08 10:24:27 +08:00
|
|
|
if (CallInfo.isVariadic()) {
|
|
|
|
// When passing non-POD arguments by value to variadic functions, we will
|
|
|
|
// end up with a variadic prototype and an inalloca call site. In such
|
|
|
|
// cases, we can't do any parameter mismatch checks. Give up and bitcast
|
|
|
|
// the callee.
|
|
|
|
unsigned CalleeAS =
|
|
|
|
cast<llvm::PointerType>(Callee->getType())->getAddressSpace();
|
|
|
|
Callee = Builder.CreateBitCast(
|
|
|
|
Callee, getTypes().GetFunctionType(CallInfo)->getPointerTo(CalleeAS));
|
|
|
|
} else {
|
|
|
|
llvm::Type *LastParamTy =
|
|
|
|
IRFuncTy->getParamType(IRFuncTy->getNumParams() - 1);
|
|
|
|
if (Arg->getType() != LastParamTy) {
|
2014-02-01 08:04:45 +08:00
|
|
|
#ifndef NDEBUG
|
2014-07-08 10:24:27 +08:00
|
|
|
// Assert that these structs have equivalent element types.
|
|
|
|
llvm::StructType *FullTy = CallInfo.getArgStruct();
|
|
|
|
llvm::StructType *DeclaredTy = cast<llvm::StructType>(
|
|
|
|
cast<llvm::PointerType>(LastParamTy)->getElementType());
|
|
|
|
assert(DeclaredTy->getNumElements() == FullTy->getNumElements());
|
|
|
|
for (llvm::StructType::element_iterator DI = DeclaredTy->element_begin(),
|
|
|
|
DE = DeclaredTy->element_end(),
|
|
|
|
FI = FullTy->element_begin();
|
|
|
|
DI != DE; ++DI, ++FI)
|
|
|
|
assert(*DI == *FI);
|
2014-02-01 08:04:45 +08:00
|
|
|
#endif
|
2014-07-08 10:24:27 +08:00
|
|
|
Arg = Builder.CreateBitCast(Arg, LastParamTy);
|
|
|
|
}
|
2014-02-01 08:04:45 +08:00
|
|
|
}
|
CGCall: Factor out the logic mapping call arguments to LLVM IR arguments.
Summary:
This refactoring introduces ClangToLLVMArgMapping class, which
encapsulates the information about the order in which function arguments listed
in CGFunctionInfo should be passed to actual LLVM IR function, such as:
1) positions of sret, if there is any
2) position of inalloca argument, if there is any
3) position of helper padding argument for each call argument
4) positions of regular argument (there can be many if it's expanded).
Simplify several related methods (ConstructAttributeList, EmitFunctionProlog
and EmitCall): now they don't have to maintain iterators over the list
of LLVM IR function arguments, dealing with all the sret/inalloca/this complexities,
and just use expected positions of LLVM IR arguments stored in ClangToLLVMArgMapping.
This may increase the running time of EmitFunctionProlog, as we have to traverse
expandable arguments twice, but in further refactoring we will be able
to speed up EmitCall by passing already calculated CallArgsToIRArgsMapping to
ConstructAttributeList, thus avoiding traversing expandable argument there.
No functionality change.
Test Plan: regression test suite
Reviewers: majnemer, rnk
Reviewed By: rnk
Subscribers: cfe-commits, rjmccall, timurrrr
Differential Revision: http://reviews.llvm.org/D4938
llvm-svn: 216251
2014-08-22 09:06:06 +08:00
|
|
|
assert(IRFunctionArgs.hasInallocaArg());
|
|
|
|
IRCallArgs[IRFunctionArgs.getInallocaArgNo()] = Arg;
|
2014-02-01 08:04:45 +08:00
|
|
|
}
|
|
|
|
|
2013-06-21 20:45:15 +08:00
|
|
|
if (!CallArgs.getCleanupsToDeactivate().empty())
|
|
|
|
deactivateArgCleanupsBeforeCall(*this, CallArgs);
|
|
|
|
|
2009-06-13 08:26:38 +08:00
|
|
|
// If the callee is a bitcast of a function to a varargs pointer to function
|
|
|
|
// type, check to see if we can remove the bitcast. This handles some cases
|
|
|
|
// with unprototyped functions.
|
|
|
|
if (llvm::ConstantExpr *CE = dyn_cast<llvm::ConstantExpr>(Callee))
|
|
|
|
if (llvm::Function *CalleeF = dyn_cast<llvm::Function>(CE->getOperand(0))) {
|
2011-07-18 12:24:23 +08:00
|
|
|
llvm::PointerType *CurPT=cast<llvm::PointerType>(Callee->getType());
|
|
|
|
llvm::FunctionType *CurFT =
|
2009-06-13 08:26:38 +08:00
|
|
|
cast<llvm::FunctionType>(CurPT->getElementType());
|
2011-07-18 12:24:23 +08:00
|
|
|
llvm::FunctionType *ActualFT = CalleeF->getFunctionType();
|
2009-09-09 23:08:12 +08:00
|
|
|
|
2009-06-13 08:26:38 +08:00
|
|
|
if (CE->getOpcode() == llvm::Instruction::BitCast &&
|
|
|
|
ActualFT->getReturnType() == CurFT->getReturnType() &&
|
2009-06-23 09:38:41 +08:00
|
|
|
ActualFT->getNumParams() == CurFT->getNumParams() &&
|
CGCall: Factor out the logic mapping call arguments to LLVM IR arguments.
Summary:
This refactoring introduces ClangToLLVMArgMapping class, which
encapsulates the information about the order in which function arguments listed
in CGFunctionInfo should be passed to actual LLVM IR function, such as:
1) positions of sret, if there is any
2) position of inalloca argument, if there is any
3) position of helper padding argument for each call argument
4) positions of regular argument (there can be many if it's expanded).
Simplify several related methods (ConstructAttributeList, EmitFunctionProlog
and EmitCall): now they don't have to maintain iterators over the list
of LLVM IR function arguments, dealing with all the sret/inalloca/this complexities,
and just use expected positions of LLVM IR arguments stored in ClangToLLVMArgMapping.
This may increase the running time of EmitFunctionProlog, as we have to traverse
expandable arguments twice, but in further refactoring we will be able
to speed up EmitCall by passing already calculated CallArgsToIRArgsMapping to
ConstructAttributeList, thus avoiding traversing expandable argument there.
No functionality change.
Test Plan: regression test suite
Reviewers: majnemer, rnk
Reviewed By: rnk
Subscribers: cfe-commits, rjmccall, timurrrr
Differential Revision: http://reviews.llvm.org/D4938
llvm-svn: 216251
2014-08-22 09:06:06 +08:00
|
|
|
ActualFT->getNumParams() == IRCallArgs.size() &&
|
2011-03-02 01:28:13 +08:00
|
|
|
(CurFT->isVarArg() || !ActualFT->isVarArg())) {
|
2009-06-13 08:26:38 +08:00
|
|
|
bool ArgsMatch = true;
|
|
|
|
for (unsigned i = 0, e = ActualFT->getNumParams(); i != e; ++i)
|
|
|
|
if (ActualFT->getParamType(i) != CurFT->getParamType(i)) {
|
|
|
|
ArgsMatch = false;
|
|
|
|
break;
|
|
|
|
}
|
2009-09-09 23:08:12 +08:00
|
|
|
|
2009-06-13 08:26:38 +08:00
|
|
|
// Strip the cast if we can get away with it. This is a nice cleanup,
|
|
|
|
// but also allows us to inline the function at -O0 if it is marked
|
|
|
|
// always_inline.
|
|
|
|
if (ArgsMatch)
|
|
|
|
Callee = CalleeF;
|
|
|
|
}
|
|
|
|
}
|
2009-09-09 23:08:12 +08:00
|
|
|
|
CGCall: Factor out the logic mapping call arguments to LLVM IR arguments.
Summary:
This refactoring introduces ClangToLLVMArgMapping class, which
encapsulates the information about the order in which function arguments listed
in CGFunctionInfo should be passed to actual LLVM IR function, such as:
1) positions of sret, if there is any
2) position of inalloca argument, if there is any
3) position of helper padding argument for each call argument
4) positions of regular argument (there can be many if it's expanded).
Simplify several related methods (ConstructAttributeList, EmitFunctionProlog
and EmitCall): now they don't have to maintain iterators over the list
of LLVM IR function arguments, dealing with all the sret/inalloca/this complexities,
and just use expected positions of LLVM IR arguments stored in ClangToLLVMArgMapping.
This may increase the running time of EmitFunctionProlog, as we have to traverse
expandable arguments twice, but in further refactoring we will be able
to speed up EmitCall by passing already calculated CallArgsToIRArgsMapping to
ConstructAttributeList, thus avoiding traversing expandable argument there.
No functionality change.
Test Plan: regression test suite
Reviewers: majnemer, rnk
Reviewed By: rnk
Subscribers: cfe-commits, rjmccall, timurrrr
Differential Revision: http://reviews.llvm.org/D4938
llvm-svn: 216251
2014-08-22 09:06:06 +08:00
|
|
|
assert(IRCallArgs.size() == IRFuncTy->getNumParams() || IRFuncTy->isVarArg());
|
|
|
|
for (unsigned i = 0; i < IRCallArgs.size(); ++i) {
|
|
|
|
// Inalloca argument can have different type.
|
|
|
|
if (IRFunctionArgs.hasInallocaArg() &&
|
|
|
|
i == IRFunctionArgs.getInallocaArgNo())
|
|
|
|
continue;
|
|
|
|
if (i < IRFuncTy->getNumParams())
|
|
|
|
assert(IRCallArgs[i]->getType() == IRFuncTy->getParamType(i));
|
|
|
|
}
|
|
|
|
|
2009-09-12 08:59:20 +08:00
|
|
|
unsigned CallingConv;
|
2008-09-26 05:02:23 +08:00
|
|
|
CodeGen::AttributeListType AttributeList;
|
2013-02-22 08:13:35 +08:00
|
|
|
CGM.ConstructAttributeList(CallInfo, TargetDecl, AttributeList,
|
|
|
|
CallingConv, true);
|
2012-12-08 07:17:26 +08:00
|
|
|
llvm::AttributeSet Attrs = llvm::AttributeSet::get(getLLVMContext(),
|
2013-02-22 08:13:35 +08:00
|
|
|
AttributeList);
|
2009-09-09 23:08:12 +08:00
|
|
|
|
2014-05-21 13:09:00 +08:00
|
|
|
llvm::BasicBlock *InvokeDest = nullptr;
|
2012-12-30 18:32:17 +08:00
|
|
|
if (!Attrs.hasAttribute(llvm::AttributeSet::FunctionIndex,
|
2015-02-11 08:00:21 +08:00
|
|
|
llvm::Attribute::NoUnwind) ||
|
|
|
|
currentFunctionUsesSEHTry())
|
2010-07-06 09:34:17 +08:00
|
|
|
InvokeDest = getInvokeDest();
|
|
|
|
|
2009-03-02 12:32:35 +08:00
|
|
|
llvm::CallSite CS;
|
2010-07-06 09:34:17 +08:00
|
|
|
if (!InvokeDest) {
|
CGCall: Factor out the logic mapping call arguments to LLVM IR arguments.
Summary:
This refactoring introduces ClangToLLVMArgMapping class, which
encapsulates the information about the order in which function arguments listed
in CGFunctionInfo should be passed to actual LLVM IR function, such as:
1) positions of sret, if there is any
2) position of inalloca argument, if there is any
3) position of helper padding argument for each call argument
4) positions of regular argument (there can be many if it's expanded).
Simplify several related methods (ConstructAttributeList, EmitFunctionProlog
and EmitCall): now they don't have to maintain iterators over the list
of LLVM IR function arguments, dealing with all the sret/inalloca/this complexities,
and just use expected positions of LLVM IR arguments stored in ClangToLLVMArgMapping.
This may increase the running time of EmitFunctionProlog, as we have to traverse
expandable arguments twice, but in further refactoring we will be able
to speed up EmitCall by passing already calculated CallArgsToIRArgsMapping to
ConstructAttributeList, thus avoiding traversing expandable argument there.
No functionality change.
Test Plan: regression test suite
Reviewers: majnemer, rnk
Reviewed By: rnk
Subscribers: cfe-commits, rjmccall, timurrrr
Differential Revision: http://reviews.llvm.org/D4938
llvm-svn: 216251
2014-08-22 09:06:06 +08:00
|
|
|
CS = Builder.CreateCall(Callee, IRCallArgs);
|
2009-02-24 01:26:39 +08:00
|
|
|
} else {
|
|
|
|
llvm::BasicBlock *Cont = createBasicBlock("invoke.cont");
|
CGCall: Factor out the logic mapping call arguments to LLVM IR arguments.
Summary:
This refactoring introduces ClangToLLVMArgMapping class, which
encapsulates the information about the order in which function arguments listed
in CGFunctionInfo should be passed to actual LLVM IR function, such as:
1) positions of sret, if there is any
2) position of inalloca argument, if there is any
3) position of helper padding argument for each call argument
4) positions of regular argument (there can be many if it's expanded).
Simplify several related methods (ConstructAttributeList, EmitFunctionProlog
and EmitCall): now they don't have to maintain iterators over the list
of LLVM IR function arguments, dealing with all the sret/inalloca/this complexities,
and just use expected positions of LLVM IR arguments stored in ClangToLLVMArgMapping.
This may increase the running time of EmitFunctionProlog, as we have to traverse
expandable arguments twice, but in further refactoring we will be able
to speed up EmitCall by passing already calculated CallArgsToIRArgsMapping to
ConstructAttributeList, thus avoiding traversing expandable argument there.
No functionality change.
Test Plan: regression test suite
Reviewers: majnemer, rnk
Reviewed By: rnk
Subscribers: cfe-commits, rjmccall, timurrrr
Differential Revision: http://reviews.llvm.org/D4938
llvm-svn: 216251
2014-08-22 09:06:06 +08:00
|
|
|
CS = Builder.CreateInvoke(Callee, Cont, InvokeDest, IRCallArgs);
|
2009-02-24 01:26:39 +08:00
|
|
|
EmitBlock(Cont);
|
2009-02-21 02:54:31 +08:00
|
|
|
}
|
2010-06-30 00:40:28 +08:00
|
|
|
if (callOrInvoke)
|
2010-05-02 21:41:58 +08:00
|
|
|
*callOrInvoke = CS.getInstruction();
|
2009-02-21 02:54:31 +08:00
|
|
|
|
2014-05-21 01:12:51 +08:00
|
|
|
if (CurCodeDecl && CurCodeDecl->hasAttr<FlattenAttr>() &&
|
|
|
|
!CS.hasFnAttr(llvm::Attribute::NoInline))
|
|
|
|
Attrs =
|
|
|
|
Attrs.addAttribute(getLLVMContext(), llvm::AttributeSet::FunctionIndex,
|
|
|
|
llvm::Attribute::AlwaysInline);
|
|
|
|
|
2015-02-12 05:40:48 +08:00
|
|
|
// Disable inlining inside SEH __try blocks.
|
2015-02-13 07:40:45 +08:00
|
|
|
if (isSEHTryScope())
|
2015-02-12 05:40:48 +08:00
|
|
|
Attrs =
|
|
|
|
Attrs.addAttribute(getLLVMContext(), llvm::AttributeSet::FunctionIndex,
|
|
|
|
llvm::Attribute::NoInline);
|
|
|
|
|
2009-03-02 12:32:35 +08:00
|
|
|
CS.setAttributes(Attrs);
|
2009-09-12 08:59:20 +08:00
|
|
|
CS.setCallingConv(static_cast<llvm::CallingConv::ID>(CallingConv));
|
2009-03-02 12:32:35 +08:00
|
|
|
|
2012-02-16 08:57:37 +08:00
|
|
|
// In ObjC ARC mode with no ObjC ARC exception safety, tell the ARC
|
|
|
|
// optimizer it can aggressively ignore unwind edges.
|
2012-03-11 15:00:24 +08:00
|
|
|
if (CGM.getLangOpts().ObjCAutoRefCount)
|
2012-02-16 08:57:37 +08:00
|
|
|
AddObjCARCExceptionMetadata(CS.getInstruction());
|
|
|
|
|
2009-03-02 12:32:35 +08:00
|
|
|
// If the call doesn't return, finish the basic block and clear the
|
|
|
|
// insertion point; this allows the rest of IRgen to discard
|
|
|
|
// unreachable code.
|
|
|
|
if (CS.doesNotReturn()) {
|
|
|
|
Builder.CreateUnreachable();
|
|
|
|
Builder.ClearInsertionPoint();
|
2009-09-09 23:08:12 +08:00
|
|
|
|
2009-05-16 15:57:57 +08:00
|
|
|
// FIXME: For now, emit a dummy basic block because expr emitters in
|
|
|
|
// generally are not ready to handle emitting expressions at unreachable
|
|
|
|
// points.
|
2009-03-02 12:32:35 +08:00
|
|
|
EnsureInsertPoint();
|
2009-09-09 23:08:12 +08:00
|
|
|
|
2009-03-02 12:32:35 +08:00
|
|
|
// Return a reasonable RValue.
|
|
|
|
return GetUndefRValue(RetTy);
|
2009-09-09 23:08:12 +08:00
|
|
|
}
|
2009-03-02 12:32:35 +08:00
|
|
|
|
|
|
|
llvm::Instruction *CI = CS.getInstruction();
|
2009-10-05 21:47:21 +08:00
|
|
|
if (Builder.isNamePreserving() && !CI->getType()->isVoidTy())
|
2008-09-10 07:27:19 +08:00
|
|
|
CI->setName("call");
|
2008-09-10 10:41:04 +08:00
|
|
|
|
2011-06-16 07:02:42 +08:00
|
|
|
// Emit any writebacks immediately. Arguably this should happen
|
|
|
|
// after any return-value munging.
|
|
|
|
if (CallArgs.hasWritebacks())
|
|
|
|
emitWritebacks(*this, CallArgs);
|
|
|
|
|
2014-02-01 08:04:45 +08:00
|
|
|
// The stack cleanup for inalloca arguments has to run out of the normal
|
|
|
|
// lexical order, so deactivate it and run it manually here.
|
|
|
|
CallArgs.freeArgumentMemory(*this);
|
|
|
|
|
2014-09-26 13:04:30 +08:00
|
|
|
RValue Ret = [&] {
|
|
|
|
switch (RetAI.getKind()) {
|
|
|
|
case ABIArgInfo::InAlloca:
|
|
|
|
case ABIArgInfo::Indirect:
|
|
|
|
return convertTempToRValue(SRetPtr, RetTy, SourceLocation());
|
2008-09-11 09:48:57 +08:00
|
|
|
|
2014-09-26 13:04:30 +08:00
|
|
|
case ABIArgInfo::Ignore:
|
|
|
|
// If we are ignoring an argument that had a result, make sure to
|
|
|
|
// construct the appropriate return value for our caller.
|
|
|
|
return GetUndefRValue(RetTy);
|
2010-10-19 14:39:39 +08:00
|
|
|
|
2014-09-26 13:04:30 +08:00
|
|
|
case ABIArgInfo::Extend:
|
|
|
|
case ABIArgInfo::Direct: {
|
|
|
|
llvm::Type *RetIRTy = ConvertType(RetTy);
|
|
|
|
if (RetAI.getCoerceToType() == RetIRTy && RetAI.getDirectOffset() == 0) {
|
|
|
|
switch (getEvaluationKind(RetTy)) {
|
|
|
|
case TEK_Complex: {
|
|
|
|
llvm::Value *Real = Builder.CreateExtractValue(CI, 0);
|
|
|
|
llvm::Value *Imag = Builder.CreateExtractValue(CI, 1);
|
|
|
|
return RValue::getComplex(std::make_pair(Real, Imag));
|
|
|
|
}
|
|
|
|
case TEK_Aggregate: {
|
|
|
|
llvm::Value *DestPtr = ReturnValue.getValue();
|
|
|
|
bool DestIsVolatile = ReturnValue.isVolatile();
|
2009-01-27 05:26:08 +08:00
|
|
|
|
2014-09-26 13:04:30 +08:00
|
|
|
if (!DestPtr) {
|
|
|
|
DestPtr = CreateMemTemp(RetTy, "agg.tmp");
|
|
|
|
DestIsVolatile = false;
|
|
|
|
}
|
|
|
|
BuildAggStore(*this, CI, DestPtr, DestIsVolatile, false);
|
|
|
|
return RValue::getAggregate(DestPtr);
|
Kill off the 'coerce' ABI passing form. Now 'direct' and 'extend' always
have a "coerce to" type which often matches the default lowering of Clang
type to LLVM IR type, but the coerce case can be handled by making them
not be the same.
This simplifies things and fixes issues where X86-64 abi lowering would
return coerce after making preferred types exactly match up. This caused
us to compile:
typedef float v4f32 __attribute__((__vector_size__(16)));
v4f32 foo(v4f32 X) {
return X+X;
}
into this code at -O0:
define <4 x float> @foo(<4 x float> %X.coerce) nounwind {
entry:
%retval = alloca <4 x float>, align 16 ; <<4 x float>*> [#uses=2]
%coerce = alloca <4 x float>, align 16 ; <<4 x float>*> [#uses=2]
%X.addr = alloca <4 x float>, align 16 ; <<4 x float>*> [#uses=3]
store <4 x float> %X.coerce, <4 x float>* %coerce
%X = load <4 x float>* %coerce ; <<4 x float>> [#uses=1]
store <4 x float> %X, <4 x float>* %X.addr
%tmp = load <4 x float>* %X.addr ; <<4 x float>> [#uses=1]
%tmp1 = load <4 x float>* %X.addr ; <<4 x float>> [#uses=1]
%add = fadd <4 x float> %tmp, %tmp1 ; <<4 x float>> [#uses=1]
store <4 x float> %add, <4 x float>* %retval
%0 = load <4 x float>* %retval ; <<4 x float>> [#uses=1]
ret <4 x float> %0
}
Now we get:
define <4 x float> @foo(<4 x float> %X) nounwind {
entry:
%X.addr = alloca <4 x float>, align 16 ; <<4 x float>*> [#uses=3]
store <4 x float> %X, <4 x float>* %X.addr
%tmp = load <4 x float>* %X.addr ; <<4 x float>> [#uses=1]
%tmp1 = load <4 x float>* %X.addr ; <<4 x float>> [#uses=1]
%add = fadd <4 x float> %tmp, %tmp1 ; <<4 x float>> [#uses=1]
ret <4 x float> %add
}
This implements rdar://8248065
llvm-svn: 109733
2010-07-29 14:26:06 +08:00
|
|
|
}
|
2014-09-26 13:04:30 +08:00
|
|
|
case TEK_Scalar: {
|
|
|
|
// If the argument doesn't match, perform a bitcast to coerce it. This
|
|
|
|
// can happen due to trivial type mismatches.
|
|
|
|
llvm::Value *V = CI;
|
|
|
|
if (V->getType() != RetIRTy)
|
|
|
|
V = Builder.CreateBitCast(V, RetIRTy);
|
|
|
|
return RValue::get(V);
|
|
|
|
}
|
|
|
|
}
|
|
|
|
llvm_unreachable("bad evaluation kind");
|
2013-03-08 05:37:08 +08:00
|
|
|
}
|
2014-09-26 13:04:30 +08:00
|
|
|
|
|
|
|
llvm::Value *DestPtr = ReturnValue.getValue();
|
|
|
|
bool DestIsVolatile = ReturnValue.isVolatile();
|
|
|
|
|
|
|
|
if (!DestPtr) {
|
|
|
|
DestPtr = CreateMemTemp(RetTy, "coerce");
|
|
|
|
DestIsVolatile = false;
|
2013-03-08 05:37:08 +08:00
|
|
|
}
|
2010-10-19 14:39:39 +08:00
|
|
|
|
2014-09-26 13:04:30 +08:00
|
|
|
// If the value is offset in memory, apply the offset now.
|
|
|
|
llvm::Value *StorePtr = DestPtr;
|
|
|
|
if (unsigned Offs = RetAI.getDirectOffset()) {
|
|
|
|
StorePtr = Builder.CreateBitCast(StorePtr, Builder.getInt8PtrTy());
|
|
|
|
StorePtr = Builder.CreateConstGEP1_32(StorePtr, Offs);
|
|
|
|
StorePtr = Builder.CreateBitCast(StorePtr,
|
|
|
|
llvm::PointerType::getUnqual(RetAI.getCoerceToType()));
|
|
|
|
}
|
|
|
|
CreateCoercedStore(CI, StorePtr, DestIsVolatile, *this);
|
2010-10-19 14:39:39 +08:00
|
|
|
|
2014-09-26 13:04:30 +08:00
|
|
|
return convertTempToRValue(DestPtr, RetTy, SourceLocation());
|
2009-12-25 04:40:36 +08:00
|
|
|
}
|
2010-10-19 14:39:39 +08:00
|
|
|
|
2014-09-26 13:04:30 +08:00
|
|
|
case ABIArgInfo::Expand:
|
|
|
|
llvm_unreachable("Invalid ABI kind for return argument");
|
2010-07-30 12:02:24 +08:00
|
|
|
}
|
2010-10-19 14:39:39 +08:00
|
|
|
|
2014-09-26 13:04:30 +08:00
|
|
|
llvm_unreachable("Unhandled ABIArgInfo::Kind");
|
|
|
|
} ();
|
2008-09-11 09:48:57 +08:00
|
|
|
|
2014-09-26 13:04:30 +08:00
|
|
|
if (Ret.isScalar() && TargetDecl) {
|
|
|
|
if (const auto *AA = TargetDecl->getAttr<AssumeAlignedAttr>()) {
|
|
|
|
llvm::Value *OffsetValue = nullptr;
|
|
|
|
if (const auto *Offset = AA->getOffset())
|
|
|
|
OffsetValue = EmitScalarExpr(Offset);
|
|
|
|
|
|
|
|
llvm::Value *Alignment = EmitScalarExpr(AA->getAlignment());
|
|
|
|
llvm::ConstantInt *AlignmentCI = cast<llvm::ConstantInt>(Alignment);
|
|
|
|
EmitAlignmentAssumption(Ret.getScalarVal(), AlignmentCI->getZExtValue(),
|
|
|
|
OffsetValue);
|
|
|
|
}
|
2008-09-10 07:27:19 +08:00
|
|
|
}
|
2008-09-10 10:41:04 +08:00
|
|
|
|
2014-09-26 13:04:30 +08:00
|
|
|
return Ret;
|
2008-09-10 07:27:19 +08:00
|
|
|
}
|
2009-02-11 04:44:09 +08:00
|
|
|
|
|
|
|
/* VarArg handling */
|
|
|
|
|
|
|
|
llvm::Value *CodeGenFunction::EmitVAArg(llvm::Value *VAListAddr, QualType Ty) {
|
|
|
|
return CGM.getTypes().getABIInfo().EmitVAArg(VAListAddr, Ty, *this);
|
|
|
|
}
|