2011-07-10 01:41:47 +08:00
|
|
|
//===--- CGCall.cpp - Encapsulate calling convention details ----*- C++ -*-===//
|
2008-09-09 05:33:45 +08:00
|
|
|
//
|
|
|
|
// The LLVM Compiler Infrastructure
|
|
|
|
//
|
|
|
|
// This file is distributed under the University of Illinois Open Source
|
|
|
|
// License. See LICENSE.TXT for details.
|
|
|
|
//
|
|
|
|
//===----------------------------------------------------------------------===//
|
|
|
|
//
|
|
|
|
// These classes wrap the information about a call or function
|
|
|
|
// definition used to handle ABI compliancy.
|
|
|
|
//
|
|
|
|
//===----------------------------------------------------------------------===//
|
|
|
|
|
|
|
|
#include "CGCall.h"
|
2010-08-31 15:33:07 +08:00
|
|
|
#include "CGCXXABI.h"
|
2010-06-30 00:40:28 +08:00
|
|
|
#include "ABIInfo.h"
|
2008-09-09 05:33:45 +08:00
|
|
|
#include "CodeGenFunction.h"
|
2008-09-10 08:41:16 +08:00
|
|
|
#include "CodeGenModule.h"
|
2012-02-17 11:33:10 +08:00
|
|
|
#include "TargetInfo.h"
|
2008-10-14 01:02:26 +08:00
|
|
|
#include "clang/Basic/TargetInfo.h"
|
2008-09-09 05:33:45 +08:00
|
|
|
#include "clang/AST/Decl.h"
|
2009-04-04 06:48:58 +08:00
|
|
|
#include "clang/AST/DeclCXX.h"
|
2008-09-09 05:33:45 +08:00
|
|
|
#include "clang/AST/DeclObjC.h"
|
2010-06-16 07:19:56 +08:00
|
|
|
#include "clang/Frontend/CodeGenOptions.h"
|
2008-09-24 09:01:36 +08:00
|
|
|
#include "llvm/Attributes.h"
|
2009-03-02 12:32:35 +08:00
|
|
|
#include "llvm/Support/CallSite.h"
|
2009-01-27 09:36:03 +08:00
|
|
|
#include "llvm/Target/TargetData.h"
|
2011-06-16 07:02:42 +08:00
|
|
|
#include "llvm/InlineAsm.h"
|
2011-06-16 06:09:18 +08:00
|
|
|
#include "llvm/Transforms/Utils/Local.h"
|
2008-09-09 05:33:45 +08:00
|
|
|
using namespace clang;
|
|
|
|
using namespace CodeGen;
|
|
|
|
|
|
|
|
/***/
|
|
|
|
|
2010-02-06 05:31:56 +08:00
|
|
|
static unsigned ClangCallConvToLLVMCallConv(CallingConv CC) {
|
|
|
|
switch (CC) {
|
|
|
|
default: return llvm::CallingConv::C;
|
|
|
|
case CC_X86StdCall: return llvm::CallingConv::X86_StdCall;
|
|
|
|
case CC_X86FastCall: return llvm::CallingConv::X86_FastCall;
|
2010-05-19 00:57:00 +08:00
|
|
|
case CC_X86ThisCall: return llvm::CallingConv::X86_ThisCall;
|
2011-04-15 04:06:49 +08:00
|
|
|
case CC_AAPCS: return llvm::CallingConv::ARM_AAPCS;
|
|
|
|
case CC_AAPCS_VFP: return llvm::CallingConv::ARM_AAPCS_VFP;
|
2010-09-03 09:29:35 +08:00
|
|
|
// TODO: add support for CC_X86Pascal to llvm
|
2010-02-06 05:31:56 +08:00
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2010-02-24 15:14:12 +08:00
|
|
|
/// Derives the 'this' type for codegen purposes, i.e. ignoring method
|
|
|
|
/// qualification.
|
|
|
|
/// FIXME: address space qualification?
|
2010-02-26 08:48:12 +08:00
|
|
|
static CanQualType GetThisType(ASTContext &Context, const CXXRecordDecl *RD) {
|
|
|
|
QualType RecTy = Context.getTagDeclType(RD)->getCanonicalTypeInternal();
|
|
|
|
return Context.getPointerType(CanQualType::CreateUnsafe(RecTy));
|
2010-02-24 15:14:12 +08:00
|
|
|
}
|
|
|
|
|
|
|
|
/// Returns the canonical formal type of the given C++ method.
|
2010-02-26 08:48:12 +08:00
|
|
|
static CanQual<FunctionProtoType> GetFormalType(const CXXMethodDecl *MD) {
|
|
|
|
return MD->getType()->getCanonicalTypeUnqualified()
|
|
|
|
.getAs<FunctionProtoType>();
|
2010-02-24 15:14:12 +08:00
|
|
|
}
|
|
|
|
|
|
|
|
/// Returns the "extra-canonicalized" return type, which discards
|
|
|
|
/// qualifiers on the return type. Codegen doesn't care about them,
|
|
|
|
/// and it makes ABI code a little easier to be able to assume that
|
|
|
|
/// all parameter and return types are top-level unqualified.
|
2010-02-26 08:48:12 +08:00
|
|
|
static CanQualType GetReturnType(QualType RetTy) {
|
|
|
|
return RetTy->getCanonicalTypeUnqualified().getUnqualifiedType();
|
2010-02-24 15:14:12 +08:00
|
|
|
}
|
|
|
|
|
2012-02-17 11:33:10 +08:00
|
|
|
/// Arrange the argument and result information for a value of the
|
|
|
|
/// given unprototyped function type.
|
2010-02-24 15:14:12 +08:00
|
|
|
const CGFunctionInfo &
|
2012-02-17 11:33:10 +08:00
|
|
|
CodeGenTypes::arrangeFunctionType(CanQual<FunctionNoProtoType> FTNP) {
|
|
|
|
// When translating an unprototyped function type, always use a
|
|
|
|
// variadic type.
|
|
|
|
return arrangeFunctionType(FTNP->getResultType().getUnqualifiedType(),
|
|
|
|
ArrayRef<CanQualType>(),
|
|
|
|
FTNP->getExtInfo(),
|
|
|
|
RequiredArgs(0));
|
|
|
|
}
|
|
|
|
|
|
|
|
/// Arrange the argument and result information for a value of the
|
|
|
|
/// given function type, on top of any implicit parameters already
|
|
|
|
/// stored.
|
|
|
|
static const CGFunctionInfo &arrangeFunctionType(CodeGenTypes &CGT,
|
|
|
|
SmallVectorImpl<CanQualType> &argTypes,
|
2011-07-10 01:41:47 +08:00
|
|
|
CanQual<FunctionProtoType> FTP) {
|
2012-02-17 11:33:10 +08:00
|
|
|
RequiredArgs required = RequiredArgs::forPrototypePlus(FTP, argTypes.size());
|
2009-02-03 07:23:47 +08:00
|
|
|
// FIXME: Kill copy.
|
2008-09-10 12:01:49 +08:00
|
|
|
for (unsigned i = 0, e = FTP->getNumArgs(); i != e; ++i)
|
2012-02-17 11:33:10 +08:00
|
|
|
argTypes.push_back(FTP->getArgType(i));
|
|
|
|
CanQualType resultType = FTP->getResultType().getUnqualifiedType();
|
|
|
|
return CGT.arrangeFunctionType(resultType, argTypes,
|
|
|
|
FTP->getExtInfo(), required);
|
2010-02-24 15:14:12 +08:00
|
|
|
}
|
|
|
|
|
2012-02-17 11:33:10 +08:00
|
|
|
/// Arrange the argument and result information for a value of the
|
|
|
|
/// given function type.
|
2010-02-24 15:14:12 +08:00
|
|
|
const CGFunctionInfo &
|
2012-02-17 11:33:10 +08:00
|
|
|
CodeGenTypes::arrangeFunctionType(CanQual<FunctionProtoType> FTP) {
|
|
|
|
SmallVector<CanQualType, 16> argTypes;
|
|
|
|
return ::arrangeFunctionType(*this, argTypes, FTP);
|
2009-09-12 06:24:53 +08:00
|
|
|
}
|
|
|
|
|
2010-02-06 05:31:56 +08:00
|
|
|
static CallingConv getCallingConventionForDecl(const Decl *D) {
|
2009-09-12 06:24:53 +08:00
|
|
|
// Set the appropriate calling convention for the Function.
|
|
|
|
if (D->hasAttr<StdCallAttr>())
|
2010-02-06 05:31:56 +08:00
|
|
|
return CC_X86StdCall;
|
2009-09-12 06:24:53 +08:00
|
|
|
|
|
|
|
if (D->hasAttr<FastCallAttr>())
|
2010-02-06 05:31:56 +08:00
|
|
|
return CC_X86FastCall;
|
2009-09-12 06:24:53 +08:00
|
|
|
|
2010-05-19 00:57:00 +08:00
|
|
|
if (D->hasAttr<ThisCallAttr>())
|
|
|
|
return CC_X86ThisCall;
|
|
|
|
|
2010-09-03 09:29:35 +08:00
|
|
|
if (D->hasAttr<PascalAttr>())
|
|
|
|
return CC_X86Pascal;
|
|
|
|
|
2011-04-15 04:06:49 +08:00
|
|
|
if (PcsAttr *PCS = D->getAttr<PcsAttr>())
|
|
|
|
return (PCS->getPCS() == PcsAttr::AAPCS ? CC_AAPCS : CC_AAPCS_VFP);
|
|
|
|
|
2010-02-06 05:31:56 +08:00
|
|
|
return CC_C;
|
2008-09-10 12:01:49 +08:00
|
|
|
}
|
|
|
|
|
2012-02-17 11:33:10 +08:00
|
|
|
/// Arrange the argument and result information for a call to an
|
|
|
|
/// unknown C++ non-static member function of the given abstract type.
|
|
|
|
/// The member function must be an ordinary function, i.e. not a
|
|
|
|
/// constructor or destructor.
|
|
|
|
const CGFunctionInfo &
|
|
|
|
CodeGenTypes::arrangeCXXMethodType(const CXXRecordDecl *RD,
|
|
|
|
const FunctionProtoType *FTP) {
|
|
|
|
SmallVector<CanQualType, 16> argTypes;
|
2010-02-24 15:14:12 +08:00
|
|
|
|
2009-10-04 03:43:08 +08:00
|
|
|
// Add the 'this' pointer.
|
2012-02-17 11:33:10 +08:00
|
|
|
argTypes.push_back(GetThisType(Context, RD));
|
2010-02-24 15:14:12 +08:00
|
|
|
|
2012-02-17 11:33:10 +08:00
|
|
|
return ::arrangeFunctionType(*this, argTypes,
|
2011-03-03 05:36:49 +08:00
|
|
|
FTP->getCanonicalTypeUnqualified().getAs<FunctionProtoType>());
|
2009-10-04 03:43:08 +08:00
|
|
|
}
|
|
|
|
|
2012-02-17 11:33:10 +08:00
|
|
|
/// Arrange the argument and result information for a declaration or
|
|
|
|
/// definition of the given C++ non-static member function. The
|
|
|
|
/// member function must be an ordinary function, i.e. not a
|
|
|
|
/// constructor or destructor.
|
|
|
|
const CGFunctionInfo &
|
|
|
|
CodeGenTypes::arrangeCXXMethodDeclaration(const CXXMethodDecl *MD) {
|
2010-09-03 09:26:39 +08:00
|
|
|
assert(!isa<CXXConstructorDecl>(MD) && "wrong method for contructors!");
|
|
|
|
assert(!isa<CXXDestructorDecl>(MD) && "wrong method for destructors!");
|
|
|
|
|
2012-02-17 11:33:10 +08:00
|
|
|
CanQual<FunctionProtoType> prototype = GetFormalType(MD);
|
2009-09-09 23:08:12 +08:00
|
|
|
|
2012-02-17 11:33:10 +08:00
|
|
|
if (MD->isInstance()) {
|
|
|
|
// The abstract case is perfectly fine.
|
|
|
|
return arrangeCXXMethodType(MD->getParent(), prototype.getTypePtr());
|
|
|
|
}
|
|
|
|
|
|
|
|
return arrangeFunctionType(prototype);
|
2009-04-04 06:48:58 +08:00
|
|
|
}
|
|
|
|
|
2012-02-17 11:33:10 +08:00
|
|
|
/// Arrange the argument and result information for a declaration
|
|
|
|
/// or definition to the given constructor variant.
|
|
|
|
const CGFunctionInfo &
|
|
|
|
CodeGenTypes::arrangeCXXConstructorDeclaration(const CXXConstructorDecl *D,
|
|
|
|
CXXCtorType ctorKind) {
|
|
|
|
SmallVector<CanQualType, 16> argTypes;
|
|
|
|
argTypes.push_back(GetThisType(Context, D->getParent()));
|
|
|
|
CanQualType resultType = Context.VoidTy;
|
2009-11-25 11:15:49 +08:00
|
|
|
|
2012-02-17 11:33:10 +08:00
|
|
|
TheCXXABI.BuildConstructorSignature(D, ctorKind, resultType, argTypes);
|
2010-02-24 15:14:12 +08:00
|
|
|
|
2010-08-31 15:33:07 +08:00
|
|
|
CanQual<FunctionProtoType> FTP = GetFormalType(D);
|
|
|
|
|
2012-02-17 11:33:10 +08:00
|
|
|
RequiredArgs required = RequiredArgs::forPrototypePlus(FTP, argTypes.size());
|
|
|
|
|
2010-08-31 15:33:07 +08:00
|
|
|
// Add the formal parameters.
|
|
|
|
for (unsigned i = 0, e = FTP->getNumArgs(); i != e; ++i)
|
2012-02-17 11:33:10 +08:00
|
|
|
argTypes.push_back(FTP->getArgType(i));
|
2010-08-31 15:33:07 +08:00
|
|
|
|
2012-02-17 11:33:10 +08:00
|
|
|
return arrangeFunctionType(resultType, argTypes, FTP->getExtInfo(), required);
|
2009-11-25 11:15:49 +08:00
|
|
|
}
|
|
|
|
|
2012-02-17 11:33:10 +08:00
|
|
|
/// Arrange the argument and result information for a declaration,
|
|
|
|
/// definition, or call to the given destructor variant. It so
|
|
|
|
/// happens that all three cases produce the same information.
|
|
|
|
const CGFunctionInfo &
|
|
|
|
CodeGenTypes::arrangeCXXDestructor(const CXXDestructorDecl *D,
|
|
|
|
CXXDtorType dtorKind) {
|
|
|
|
SmallVector<CanQualType, 2> argTypes;
|
|
|
|
argTypes.push_back(GetThisType(Context, D->getParent()));
|
|
|
|
CanQualType resultType = Context.VoidTy;
|
2010-08-31 15:33:07 +08:00
|
|
|
|
2012-02-17 11:33:10 +08:00
|
|
|
TheCXXABI.BuildDestructorSignature(D, dtorKind, resultType, argTypes);
|
2010-02-24 15:14:12 +08:00
|
|
|
|
2010-08-31 15:33:07 +08:00
|
|
|
CanQual<FunctionProtoType> FTP = GetFormalType(D);
|
|
|
|
assert(FTP->getNumArgs() == 0 && "dtor with formal parameters");
|
|
|
|
|
2012-02-17 11:33:10 +08:00
|
|
|
return arrangeFunctionType(resultType, argTypes, FTP->getExtInfo(),
|
|
|
|
RequiredArgs::All);
|
2009-11-25 11:15:49 +08:00
|
|
|
}
|
|
|
|
|
2012-02-17 11:33:10 +08:00
|
|
|
/// Arrange the argument and result information for the declaration or
|
|
|
|
/// definition of the given function.
|
|
|
|
const CGFunctionInfo &
|
|
|
|
CodeGenTypes::arrangeFunctionDeclaration(const FunctionDecl *FD) {
|
2009-05-13 04:27:19 +08:00
|
|
|
if (const CXXMethodDecl *MD = dyn_cast<CXXMethodDecl>(FD))
|
2009-04-04 06:48:58 +08:00
|
|
|
if (MD->isInstance())
|
2012-02-17 11:33:10 +08:00
|
|
|
return arrangeCXXMethodDeclaration(MD);
|
2009-09-09 23:08:12 +08:00
|
|
|
|
2010-02-26 08:48:12 +08:00
|
|
|
CanQualType FTy = FD->getType()->getCanonicalTypeUnqualified();
|
2012-02-17 11:33:10 +08:00
|
|
|
|
2010-02-26 08:48:12 +08:00
|
|
|
assert(isa<FunctionType>(FTy));
|
2012-02-17 11:33:10 +08:00
|
|
|
|
|
|
|
// When declaring a function without a prototype, always use a
|
|
|
|
// non-variadic type.
|
|
|
|
if (isa<FunctionNoProtoType>(FTy)) {
|
|
|
|
CanQual<FunctionNoProtoType> noProto = FTy.getAs<FunctionNoProtoType>();
|
|
|
|
return arrangeFunctionType(noProto->getResultType(),
|
|
|
|
ArrayRef<CanQualType>(),
|
|
|
|
noProto->getExtInfo(),
|
|
|
|
RequiredArgs::All);
|
|
|
|
}
|
|
|
|
|
2010-02-26 08:48:12 +08:00
|
|
|
assert(isa<FunctionProtoType>(FTy));
|
2012-02-17 11:33:10 +08:00
|
|
|
return arrangeFunctionType(FTy.getAs<FunctionProtoType>());
|
2008-09-09 05:33:45 +08:00
|
|
|
}
|
|
|
|
|
2012-02-17 11:33:10 +08:00
|
|
|
/// Arrange the argument and result information for the declaration or
|
|
|
|
/// definition of an Objective-C method.
|
|
|
|
const CGFunctionInfo &
|
|
|
|
CodeGenTypes::arrangeObjCMethodDeclaration(const ObjCMethodDecl *MD) {
|
|
|
|
// It happens that this is the same as a call with no optional
|
|
|
|
// arguments, except also using the formal 'self' type.
|
|
|
|
return arrangeObjCMessageSendSignature(MD, MD->getSelfDecl()->getType());
|
|
|
|
}
|
|
|
|
|
|
|
|
/// Arrange the argument and result information for the function type
|
|
|
|
/// through which to perform a send to the given Objective-C method,
|
|
|
|
/// using the given receiver type. The receiver type is not always
|
|
|
|
/// the 'self' type of the method or even an Objective-C pointer type.
|
|
|
|
/// This is *not* the right method for actually performing such a
|
|
|
|
/// message send, due to the possibility of optional arguments.
|
|
|
|
const CGFunctionInfo &
|
|
|
|
CodeGenTypes::arrangeObjCMessageSendSignature(const ObjCMethodDecl *MD,
|
|
|
|
QualType receiverType) {
|
|
|
|
SmallVector<CanQualType, 16> argTys;
|
|
|
|
argTys.push_back(Context.getCanonicalParamType(receiverType));
|
|
|
|
argTys.push_back(Context.getCanonicalParamType(Context.getObjCSelType()));
|
2009-02-03 07:23:47 +08:00
|
|
|
// FIXME: Kill copy?
|
2011-10-03 14:37:04 +08:00
|
|
|
for (ObjCMethodDecl::param_const_iterator i = MD->param_begin(),
|
2010-02-24 15:14:12 +08:00
|
|
|
e = MD->param_end(); i != e; ++i) {
|
2012-02-17 11:33:10 +08:00
|
|
|
argTys.push_back(Context.getCanonicalParamType((*i)->getType()));
|
2010-02-24 15:14:12 +08:00
|
|
|
}
|
2011-06-16 07:02:42 +08:00
|
|
|
|
|
|
|
FunctionType::ExtInfo einfo;
|
|
|
|
einfo = einfo.withCallingConv(getCallingConventionForDecl(MD));
|
|
|
|
|
|
|
|
if (getContext().getLangOptions().ObjCAutoRefCount &&
|
|
|
|
MD->hasAttr<NSReturnsRetainedAttr>())
|
|
|
|
einfo = einfo.withProducesResult(true);
|
|
|
|
|
2012-02-17 11:33:10 +08:00
|
|
|
RequiredArgs required =
|
|
|
|
(MD->isVariadic() ? RequiredArgs(argTys.size()) : RequiredArgs::All);
|
|
|
|
|
|
|
|
return arrangeFunctionType(GetReturnType(MD->getResultType()), argTys,
|
|
|
|
einfo, required);
|
2008-09-09 05:33:45 +08:00
|
|
|
}
|
|
|
|
|
2012-02-17 11:33:10 +08:00
|
|
|
const CGFunctionInfo &
|
|
|
|
CodeGenTypes::arrangeGlobalDeclaration(GlobalDecl GD) {
|
2010-02-06 10:44:09 +08:00
|
|
|
// FIXME: Do we need to handle ObjCMethodDecl?
|
|
|
|
const FunctionDecl *FD = cast<FunctionDecl>(GD.getDecl());
|
2010-10-19 14:39:39 +08:00
|
|
|
|
2010-02-06 10:44:09 +08:00
|
|
|
if (const CXXConstructorDecl *CD = dyn_cast<CXXConstructorDecl>(FD))
|
2012-02-17 11:33:10 +08:00
|
|
|
return arrangeCXXConstructorDeclaration(CD, GD.getCtorType());
|
2010-02-06 10:44:09 +08:00
|
|
|
|
|
|
|
if (const CXXDestructorDecl *DD = dyn_cast<CXXDestructorDecl>(FD))
|
2012-02-17 11:33:10 +08:00
|
|
|
return arrangeCXXDestructor(DD, GD.getDtorType());
|
|
|
|
|
|
|
|
return arrangeFunctionDeclaration(FD);
|
|
|
|
}
|
2010-10-19 14:39:39 +08:00
|
|
|
|
2012-02-17 11:33:10 +08:00
|
|
|
/// Figure out the rules for calling a function with the given formal
|
|
|
|
/// type using the given arguments. The arguments are necessary
|
|
|
|
/// because the function might be unprototyped, in which case it's
|
|
|
|
/// target-dependent in crazy ways.
|
|
|
|
const CGFunctionInfo &
|
|
|
|
CodeGenTypes::arrangeFunctionCall(const CallArgList &args,
|
|
|
|
const FunctionType *fnType) {
|
|
|
|
RequiredArgs required = RequiredArgs::All;
|
|
|
|
if (const FunctionProtoType *proto = dyn_cast<FunctionProtoType>(fnType)) {
|
|
|
|
if (proto->isVariadic())
|
|
|
|
required = RequiredArgs(proto->getNumArgs());
|
|
|
|
} else if (CGM.getTargetCodeGenInfo()
|
|
|
|
.isNoProtoCallVariadic(args, cast<FunctionNoProtoType>(fnType))) {
|
|
|
|
required = RequiredArgs(0);
|
|
|
|
}
|
|
|
|
|
|
|
|
return arrangeFunctionCall(fnType->getResultType(), args,
|
|
|
|
fnType->getExtInfo(), required);
|
2010-02-06 10:44:09 +08:00
|
|
|
}
|
|
|
|
|
2012-02-17 11:33:10 +08:00
|
|
|
const CGFunctionInfo &
|
|
|
|
CodeGenTypes::arrangeFunctionCall(QualType resultType,
|
|
|
|
const CallArgList &args,
|
|
|
|
const FunctionType::ExtInfo &info,
|
|
|
|
RequiredArgs required) {
|
2009-02-03 07:23:47 +08:00
|
|
|
// FIXME: Kill copy.
|
2012-02-17 11:33:10 +08:00
|
|
|
SmallVector<CanQualType, 16> argTypes;
|
|
|
|
for (CallArgList::const_iterator i = args.begin(), e = args.end();
|
2009-01-31 10:19:00 +08:00
|
|
|
i != e; ++i)
|
2012-02-17 11:33:10 +08:00
|
|
|
argTypes.push_back(Context.getCanonicalParamType(i->Ty));
|
|
|
|
return arrangeFunctionType(GetReturnType(resultType), argTypes, info,
|
|
|
|
required);
|
2008-09-09 05:33:45 +08:00
|
|
|
}
|
|
|
|
|
2012-02-17 11:33:10 +08:00
|
|
|
const CGFunctionInfo &
|
|
|
|
CodeGenTypes::arrangeFunctionDeclaration(QualType resultType,
|
|
|
|
const FunctionArgList &args,
|
|
|
|
const FunctionType::ExtInfo &info,
|
|
|
|
bool isVariadic) {
|
2009-02-03 07:23:47 +08:00
|
|
|
// FIXME: Kill copy.
|
2012-02-17 11:33:10 +08:00
|
|
|
SmallVector<CanQualType, 16> argTypes;
|
|
|
|
for (FunctionArgList::const_iterator i = args.begin(), e = args.end();
|
2009-02-03 05:43:58 +08:00
|
|
|
i != e; ++i)
|
2012-02-17 11:33:10 +08:00
|
|
|
argTypes.push_back(Context.getCanonicalParamType((*i)->getType()));
|
|
|
|
|
|
|
|
RequiredArgs required =
|
|
|
|
(isVariadic ? RequiredArgs(args.size()) : RequiredArgs::All);
|
|
|
|
return arrangeFunctionType(GetReturnType(resultType), argTypes, info,
|
|
|
|
required);
|
2009-02-03 07:23:47 +08:00
|
|
|
}
|
|
|
|
|
2012-02-17 11:33:10 +08:00
|
|
|
const CGFunctionInfo &CodeGenTypes::arrangeNullaryFunction() {
|
|
|
|
return arrangeFunctionType(getContext().VoidTy, ArrayRef<CanQualType>(),
|
|
|
|
FunctionType::ExtInfo(), RequiredArgs::All);
|
2011-03-09 12:27:21 +08:00
|
|
|
}
|
|
|
|
|
2012-02-17 11:33:10 +08:00
|
|
|
/// Arrange the argument and result information for an abstract value
|
|
|
|
/// of a given function type. This is the method which all of the
|
|
|
|
/// above functions ultimately defer to.
|
|
|
|
const CGFunctionInfo &
|
|
|
|
CodeGenTypes::arrangeFunctionType(CanQualType resultType,
|
|
|
|
ArrayRef<CanQualType> argTypes,
|
|
|
|
const FunctionType::ExtInfo &info,
|
|
|
|
RequiredArgs required) {
|
2010-02-26 08:48:12 +08:00
|
|
|
#ifndef NDEBUG
|
2012-02-17 11:33:10 +08:00
|
|
|
for (ArrayRef<CanQualType>::const_iterator
|
|
|
|
I = argTypes.begin(), E = argTypes.end(); I != E; ++I)
|
2010-02-26 08:48:12 +08:00
|
|
|
assert(I->isCanonicalAsParam());
|
|
|
|
#endif
|
|
|
|
|
2012-02-17 11:33:10 +08:00
|
|
|
unsigned CC = ClangCallConvToLLVMCallConv(info.getCC());
|
2010-02-06 05:31:56 +08:00
|
|
|
|
2009-02-03 08:07:12 +08:00
|
|
|
// Lookup or create unique function info.
|
|
|
|
llvm::FoldingSetNodeID ID;
|
2012-02-17 11:33:10 +08:00
|
|
|
CGFunctionInfo::Profile(ID, info, required, resultType, argTypes);
|
2009-02-03 08:07:12 +08:00
|
|
|
|
2012-02-17 11:33:10 +08:00
|
|
|
void *insertPos = 0;
|
|
|
|
CGFunctionInfo *FI = FunctionInfos.FindNodeOrInsertPos(ID, insertPos);
|
2009-02-03 08:07:12 +08:00
|
|
|
if (FI)
|
|
|
|
return *FI;
|
|
|
|
|
2012-02-17 11:33:10 +08:00
|
|
|
// Construct the function info. We co-allocate the ArgInfos.
|
|
|
|
FI = CGFunctionInfo::create(CC, info, resultType, argTypes, required);
|
|
|
|
FunctionInfos.InsertNode(FI, insertPos);
|
2009-02-03 07:23:47 +08:00
|
|
|
|
2012-02-17 11:33:10 +08:00
|
|
|
bool inserted = FunctionsBeingProcessed.insert(FI); (void)inserted;
|
|
|
|
assert(inserted && "Recursively being processed?");
|
2011-07-15 13:16:14 +08:00
|
|
|
|
2009-02-03 13:31:23 +08:00
|
|
|
// Compute ABI information.
|
2010-07-29 10:31:05 +08:00
|
|
|
getABIInfo().computeInfo(*FI);
|
2010-10-19 14:39:39 +08:00
|
|
|
|
Kill off the 'coerce' ABI passing form. Now 'direct' and 'extend' always
have a "coerce to" type which often matches the default lowering of Clang
type to LLVM IR type, but the coerce case can be handled by making them
not be the same.
This simplifies things and fixes issues where X86-64 abi lowering would
return coerce after making preferred types exactly match up. This caused
us to compile:
typedef float v4f32 __attribute__((__vector_size__(16)));
v4f32 foo(v4f32 X) {
return X+X;
}
into this code at -O0:
define <4 x float> @foo(<4 x float> %X.coerce) nounwind {
entry:
%retval = alloca <4 x float>, align 16 ; <<4 x float>*> [#uses=2]
%coerce = alloca <4 x float>, align 16 ; <<4 x float>*> [#uses=2]
%X.addr = alloca <4 x float>, align 16 ; <<4 x float>*> [#uses=3]
store <4 x float> %X.coerce, <4 x float>* %coerce
%X = load <4 x float>* %coerce ; <<4 x float>> [#uses=1]
store <4 x float> %X, <4 x float>* %X.addr
%tmp = load <4 x float>* %X.addr ; <<4 x float>> [#uses=1]
%tmp1 = load <4 x float>* %X.addr ; <<4 x float>> [#uses=1]
%add = fadd <4 x float> %tmp, %tmp1 ; <<4 x float>> [#uses=1]
store <4 x float> %add, <4 x float>* %retval
%0 = load <4 x float>* %retval ; <<4 x float>> [#uses=1]
ret <4 x float> %0
}
Now we get:
define <4 x float> @foo(<4 x float> %X) nounwind {
entry:
%X.addr = alloca <4 x float>, align 16 ; <<4 x float>*> [#uses=3]
store <4 x float> %X, <4 x float>* %X.addr
%tmp = load <4 x float>* %X.addr ; <<4 x float>> [#uses=1]
%tmp1 = load <4 x float>* %X.addr ; <<4 x float>> [#uses=1]
%add = fadd <4 x float> %tmp, %tmp1 ; <<4 x float>> [#uses=1]
ret <4 x float> %add
}
This implements rdar://8248065
llvm-svn: 109733
2010-07-29 14:26:06 +08:00
|
|
|
// Loop over all of the computed argument and return value info. If any of
|
|
|
|
// them are direct or extend without a specified coerce type, specify the
|
|
|
|
// default now.
|
2012-02-17 11:33:10 +08:00
|
|
|
ABIArgInfo &retInfo = FI->getReturnInfo();
|
|
|
|
if (retInfo.canHaveCoerceToType() && retInfo.getCoerceToType() == 0)
|
|
|
|
retInfo.setCoerceToType(ConvertType(FI->getReturnType()));
|
2010-10-19 14:39:39 +08:00
|
|
|
|
Kill off the 'coerce' ABI passing form. Now 'direct' and 'extend' always
have a "coerce to" type which often matches the default lowering of Clang
type to LLVM IR type, but the coerce case can be handled by making them
not be the same.
This simplifies things and fixes issues where X86-64 abi lowering would
return coerce after making preferred types exactly match up. This caused
us to compile:
typedef float v4f32 __attribute__((__vector_size__(16)));
v4f32 foo(v4f32 X) {
return X+X;
}
into this code at -O0:
define <4 x float> @foo(<4 x float> %X.coerce) nounwind {
entry:
%retval = alloca <4 x float>, align 16 ; <<4 x float>*> [#uses=2]
%coerce = alloca <4 x float>, align 16 ; <<4 x float>*> [#uses=2]
%X.addr = alloca <4 x float>, align 16 ; <<4 x float>*> [#uses=3]
store <4 x float> %X.coerce, <4 x float>* %coerce
%X = load <4 x float>* %coerce ; <<4 x float>> [#uses=1]
store <4 x float> %X, <4 x float>* %X.addr
%tmp = load <4 x float>* %X.addr ; <<4 x float>> [#uses=1]
%tmp1 = load <4 x float>* %X.addr ; <<4 x float>> [#uses=1]
%add = fadd <4 x float> %tmp, %tmp1 ; <<4 x float>> [#uses=1]
store <4 x float> %add, <4 x float>* %retval
%0 = load <4 x float>* %retval ; <<4 x float>> [#uses=1]
ret <4 x float> %0
}
Now we get:
define <4 x float> @foo(<4 x float> %X) nounwind {
entry:
%X.addr = alloca <4 x float>, align 16 ; <<4 x float>*> [#uses=3]
store <4 x float> %X, <4 x float>* %X.addr
%tmp = load <4 x float>* %X.addr ; <<4 x float>> [#uses=1]
%tmp1 = load <4 x float>* %X.addr ; <<4 x float>> [#uses=1]
%add = fadd <4 x float> %tmp, %tmp1 ; <<4 x float>> [#uses=1]
ret <4 x float> %add
}
This implements rdar://8248065
llvm-svn: 109733
2010-07-29 14:26:06 +08:00
|
|
|
for (CGFunctionInfo::arg_iterator I = FI->arg_begin(), E = FI->arg_end();
|
|
|
|
I != E; ++I)
|
|
|
|
if (I->info.canHaveCoerceToType() && I->info.getCoerceToType() == 0)
|
2011-07-10 01:41:47 +08:00
|
|
|
I->info.setCoerceToType(ConvertType(I->type));
|
2010-10-19 14:39:39 +08:00
|
|
|
|
2012-02-17 11:33:10 +08:00
|
|
|
bool erased = FunctionsBeingProcessed.erase(FI); (void)erased;
|
|
|
|
assert(erased && "Not in set?");
|
2011-07-15 14:41:05 +08:00
|
|
|
|
2009-02-03 13:31:23 +08:00
|
|
|
return *FI;
|
2008-09-09 05:33:45 +08:00
|
|
|
}
|
2008-09-10 07:27:19 +08:00
|
|
|
|
2012-02-17 11:33:10 +08:00
|
|
|
CGFunctionInfo *CGFunctionInfo::create(unsigned llvmCC,
|
|
|
|
const FunctionType::ExtInfo &info,
|
|
|
|
CanQualType resultType,
|
|
|
|
ArrayRef<CanQualType> argTypes,
|
|
|
|
RequiredArgs required) {
|
|
|
|
void *buffer = operator new(sizeof(CGFunctionInfo) +
|
|
|
|
sizeof(ArgInfo) * (argTypes.size() + 1));
|
|
|
|
CGFunctionInfo *FI = new(buffer) CGFunctionInfo();
|
|
|
|
FI->CallingConvention = llvmCC;
|
|
|
|
FI->EffectiveCallingConvention = llvmCC;
|
|
|
|
FI->ASTCallingConvention = info.getCC();
|
|
|
|
FI->NoReturn = info.getNoReturn();
|
|
|
|
FI->ReturnsRetained = info.getProducesResult();
|
|
|
|
FI->Required = required;
|
|
|
|
FI->HasRegParm = info.getHasRegParm();
|
|
|
|
FI->RegParm = info.getRegParm();
|
|
|
|
FI->NumArgs = argTypes.size();
|
|
|
|
FI->getArgsBuffer()[0].type = resultType;
|
|
|
|
for (unsigned i = 0, e = argTypes.size(); i != e; ++i)
|
|
|
|
FI->getArgsBuffer()[i + 1].type = argTypes[i];
|
|
|
|
return FI;
|
2009-02-03 13:31:23 +08:00
|
|
|
}
|
|
|
|
|
|
|
|
/***/
|
|
|
|
|
2011-05-15 10:19:42 +08:00
|
|
|
void CodeGenTypes::GetExpandedTypes(QualType type,
|
2011-07-23 18:55:15 +08:00
|
|
|
SmallVectorImpl<llvm::Type*> &expandedTypes) {
|
2011-08-03 13:58:22 +08:00
|
|
|
if (const ConstantArrayType *AT = Context.getAsConstantArrayType(type)) {
|
|
|
|
uint64_t NumElts = AT->getSize().getZExtValue();
|
|
|
|
for (uint64_t Elt = 0; Elt < NumElts; ++Elt)
|
|
|
|
GetExpandedTypes(AT->getElementType(), expandedTypes);
|
|
|
|
} else if (const RecordType *RT = type->getAsStructureType()) {
|
|
|
|
const RecordDecl *RD = RT->getDecl();
|
|
|
|
assert(!RD->hasFlexibleArrayMember() &&
|
|
|
|
"Cannot expand structure with flexible array.");
|
|
|
|
for (RecordDecl::field_iterator i = RD->field_begin(), e = RD->field_end();
|
2009-06-30 10:36:12 +08:00
|
|
|
i != e; ++i) {
|
2011-08-03 13:58:22 +08:00
|
|
|
const FieldDecl *FD = *i;
|
|
|
|
assert(!FD->isBitField() &&
|
|
|
|
"Cannot expand structure with bit-field members.");
|
|
|
|
GetExpandedTypes(FD->getType(), expandedTypes);
|
|
|
|
}
|
|
|
|
} else if (const ComplexType *CT = type->getAs<ComplexType>()) {
|
|
|
|
llvm::Type *EltTy = ConvertType(CT->getElementType());
|
|
|
|
expandedTypes.push_back(EltTy);
|
|
|
|
expandedTypes.push_back(EltTy);
|
|
|
|
} else
|
|
|
|
expandedTypes.push_back(ConvertType(type));
|
2008-09-17 08:51:38 +08:00
|
|
|
}
|
|
|
|
|
2009-09-09 23:08:12 +08:00
|
|
|
llvm::Function::arg_iterator
|
2008-09-17 08:51:38 +08:00
|
|
|
CodeGenFunction::ExpandTypeFromArgs(QualType Ty, LValue LV,
|
|
|
|
llvm::Function::arg_iterator AI) {
|
2009-09-09 23:08:12 +08:00
|
|
|
assert(LV.isSimple() &&
|
|
|
|
"Unexpected non-simple lvalue during struct expansion.");
|
2008-09-17 08:51:38 +08:00
|
|
|
llvm::Value *Addr = LV.getAddress();
|
2011-08-03 13:58:22 +08:00
|
|
|
|
|
|
|
if (const ConstantArrayType *AT = getContext().getAsConstantArrayType(Ty)) {
|
|
|
|
unsigned NumElts = AT->getSize().getZExtValue();
|
|
|
|
QualType EltTy = AT->getElementType();
|
|
|
|
for (unsigned Elt = 0; Elt < NumElts; ++Elt) {
|
|
|
|
llvm::Value *EltAddr = Builder.CreateConstGEP2_32(Addr, 0, Elt);
|
|
|
|
LValue LV = MakeAddrLValue(EltAddr, EltTy);
|
|
|
|
AI = ExpandTypeFromArgs(EltTy, LV, AI);
|
|
|
|
}
|
|
|
|
} else if (const RecordType *RT = Ty->getAsStructureType()) {
|
|
|
|
RecordDecl *RD = RT->getDecl();
|
|
|
|
for (RecordDecl::field_iterator i = RD->field_begin(), e = RD->field_end();
|
2009-06-30 10:36:12 +08:00
|
|
|
i != e; ++i) {
|
2011-08-03 13:58:22 +08:00
|
|
|
FieldDecl *FD = *i;
|
|
|
|
QualType FT = FD->getType();
|
2008-09-17 08:51:38 +08:00
|
|
|
|
2011-08-03 13:58:22 +08:00
|
|
|
// FIXME: What are the right qualifiers here?
|
|
|
|
LValue LV = EmitLValueForField(Addr, FD, 0);
|
2008-09-17 08:51:38 +08:00
|
|
|
AI = ExpandTypeFromArgs(FT, LV, AI);
|
|
|
|
}
|
2011-08-03 13:58:22 +08:00
|
|
|
} else if (const ComplexType *CT = Ty->getAs<ComplexType>()) {
|
|
|
|
QualType EltTy = CT->getElementType();
|
|
|
|
llvm::Value *RealAddr = Builder.CreateStructGEP(Addr, 0, "real");
|
|
|
|
EmitStoreThroughLValue(RValue::get(AI++), MakeAddrLValue(RealAddr, EltTy));
|
2011-10-23 05:42:34 +08:00
|
|
|
llvm::Value *ImagAddr = Builder.CreateStructGEP(Addr, 1, "imag");
|
2011-08-03 13:58:22 +08:00
|
|
|
EmitStoreThroughLValue(RValue::get(AI++), MakeAddrLValue(ImagAddr, EltTy));
|
|
|
|
} else {
|
|
|
|
EmitStoreThroughLValue(RValue::get(AI), LV);
|
|
|
|
++AI;
|
2008-09-17 08:51:38 +08:00
|
|
|
}
|
|
|
|
|
|
|
|
return AI;
|
|
|
|
}
|
|
|
|
|
Same patch as the previous on the store side. Before we compiled this:
struct DeclGroup {
unsigned NumDecls;
};
int foo(DeclGroup D) {
return D.NumDecls;
}
to:
%struct.DeclGroup = type { i32 }
define i32 @_Z3foo9DeclGroup(i64) nounwind ssp noredzone {
entry:
%D = alloca %struct.DeclGroup, align 4 ; <%struct.DeclGroup*> [#uses=2]
%tmp = alloca i64 ; <i64*> [#uses=2]
store i64 %0, i64* %tmp
%1 = bitcast i64* %tmp to %struct.DeclGroup* ; <%struct.DeclGroup*> [#uses=1]
%2 = load %struct.DeclGroup* %1, align 1 ; <%struct.DeclGroup> [#uses=1]
store %struct.DeclGroup %2, %struct.DeclGroup* %D
%tmp1 = getelementptr inbounds %struct.DeclGroup* %D, i32 0, i32 0 ; <i32*> [#uses=1]
%tmp2 = load i32* %tmp1 ; <i32> [#uses=1]
ret i32 %tmp2
}
which caused fast isel bailouts due to the FCA load/store of %2. Now
we generate this just blissful code:
%struct.DeclGroup = type { i32 }
define i32 @_Z3foo9DeclGroup(i64) nounwind ssp noredzone {
entry:
%D = alloca %struct.DeclGroup, align 4 ; <%struct.DeclGroup*> [#uses=2]
%tmp = alloca i64 ; <i64*> [#uses=2]
%coerce.dive = getelementptr %struct.DeclGroup* %D, i32 0, i32 0 ; <i32*> [#uses=1]
store i64 %0, i64* %tmp
%1 = bitcast i64* %tmp to i32* ; <i32*> [#uses=1]
%2 = load i32* %1, align 1 ; <i32> [#uses=1]
store i32 %2, i32* %coerce.dive
%tmp1 = getelementptr inbounds %struct.DeclGroup* %D, i32 0, i32 0 ; <i32*> [#uses=1]
%tmp2 = load i32* %tmp1 ; <i32> [#uses=1]
ret i32 %tmp2
}
This avoids fastisel bailing out and is groundwork for future patch.
This reduces bailouts on CGStmt.ll to 911 from 935.
llvm-svn: 106974
2010-06-27 14:04:18 +08:00
|
|
|
/// EnterStructPointerForCoercedAccess - Given a struct pointer that we are
|
improve CreateCoercedLoad a bit to generate slightly less awful
IR when handling X86-64 by-value struct stuff. For example, we
use to compile this:
struct DeclGroup {
unsigned NumDecls;
};
int foo(DeclGroup D);
void bar(DeclGroup *D) {
foo(*D);
}
into:
define void @_Z3barP9DeclGroup(%struct.DeclGroup* %D) ssp nounwind {
entry:
%D.addr = alloca %struct.DeclGroup*, align 8 ; <%struct.DeclGroup**> [#uses=2]
%agg.tmp = alloca %struct.DeclGroup, align 4 ; <%struct.DeclGroup*> [#uses=2]
%tmp3 = alloca i64 ; <i64*> [#uses=2]
store %struct.DeclGroup* %D, %struct.DeclGroup** %D.addr
%tmp = load %struct.DeclGroup** %D.addr ; <%struct.DeclGroup*> [#uses=1]
%tmp1 = bitcast %struct.DeclGroup* %agg.tmp to i8* ; <i8*> [#uses=1]
%tmp2 = bitcast %struct.DeclGroup* %tmp to i8* ; <i8*> [#uses=1]
call void @llvm.memcpy.p0i8.p0i8.i64(i8* %tmp1, i8* %tmp2, i64 4, i32 4, i1 false)
%0 = bitcast i64* %tmp3 to %struct.DeclGroup* ; <%struct.DeclGroup*> [#uses=1]
%1 = load %struct.DeclGroup* %agg.tmp ; <%struct.DeclGroup> [#uses=1]
store %struct.DeclGroup %1, %struct.DeclGroup* %0, align 1
%2 = load i64* %tmp3 ; <i64> [#uses=1]
call void @_Z3foo9DeclGroup(i64 %2)
ret void
}
which would cause fastisel to bail out due to the first class aggregate load %1. With
this patch we now compile it into the (still awful):
define void @_Z3barP9DeclGroup(%struct.DeclGroup* %D) nounwind ssp noredzone {
entry:
%D.addr = alloca %struct.DeclGroup*, align 8 ; <%struct.DeclGroup**> [#uses=2]
%agg.tmp = alloca %struct.DeclGroup, align 4 ; <%struct.DeclGroup*> [#uses=2]
%tmp3 = alloca i64 ; <i64*> [#uses=2]
store %struct.DeclGroup* %D, %struct.DeclGroup** %D.addr
%tmp = load %struct.DeclGroup** %D.addr ; <%struct.DeclGroup*> [#uses=1]
%tmp1 = bitcast %struct.DeclGroup* %agg.tmp to i8* ; <i8*> [#uses=1]
%tmp2 = bitcast %struct.DeclGroup* %tmp to i8* ; <i8*> [#uses=1]
call void @llvm.memcpy.p0i8.p0i8.i64(i8* %tmp1, i8* %tmp2, i64 4, i32 4, i1 false)
%coerce.dive = getelementptr %struct.DeclGroup* %agg.tmp, i32 0, i32 0 ; <i32*> [#uses=1]
%0 = bitcast i64* %tmp3 to i32* ; <i32*> [#uses=1]
%1 = load i32* %coerce.dive ; <i32> [#uses=1]
store i32 %1, i32* %0, align 1
%2 = load i64* %tmp3 ; <i64> [#uses=1]
%call = call i32 @_Z3foo9DeclGroup(i64 %2) noredzone ; <i32> [#uses=0]
ret void
}
which doesn't bail out. On CGStmt.ll, this reduces fastisel bail outs from 958 to 935,
and is the precursor of better things to come.
llvm-svn: 106973
2010-06-27 13:56:15 +08:00
|
|
|
/// accessing some number of bytes out of it, try to gep into the struct to get
|
|
|
|
/// at its inner goodness. Dive as deep as possible without entering an element
|
|
|
|
/// with an in-memory size smaller than DstSize.
|
|
|
|
static llvm::Value *
|
Same patch as the previous on the store side. Before we compiled this:
struct DeclGroup {
unsigned NumDecls;
};
int foo(DeclGroup D) {
return D.NumDecls;
}
to:
%struct.DeclGroup = type { i32 }
define i32 @_Z3foo9DeclGroup(i64) nounwind ssp noredzone {
entry:
%D = alloca %struct.DeclGroup, align 4 ; <%struct.DeclGroup*> [#uses=2]
%tmp = alloca i64 ; <i64*> [#uses=2]
store i64 %0, i64* %tmp
%1 = bitcast i64* %tmp to %struct.DeclGroup* ; <%struct.DeclGroup*> [#uses=1]
%2 = load %struct.DeclGroup* %1, align 1 ; <%struct.DeclGroup> [#uses=1]
store %struct.DeclGroup %2, %struct.DeclGroup* %D
%tmp1 = getelementptr inbounds %struct.DeclGroup* %D, i32 0, i32 0 ; <i32*> [#uses=1]
%tmp2 = load i32* %tmp1 ; <i32> [#uses=1]
ret i32 %tmp2
}
which caused fast isel bailouts due to the FCA load/store of %2. Now
we generate this just blissful code:
%struct.DeclGroup = type { i32 }
define i32 @_Z3foo9DeclGroup(i64) nounwind ssp noredzone {
entry:
%D = alloca %struct.DeclGroup, align 4 ; <%struct.DeclGroup*> [#uses=2]
%tmp = alloca i64 ; <i64*> [#uses=2]
%coerce.dive = getelementptr %struct.DeclGroup* %D, i32 0, i32 0 ; <i32*> [#uses=1]
store i64 %0, i64* %tmp
%1 = bitcast i64* %tmp to i32* ; <i32*> [#uses=1]
%2 = load i32* %1, align 1 ; <i32> [#uses=1]
store i32 %2, i32* %coerce.dive
%tmp1 = getelementptr inbounds %struct.DeclGroup* %D, i32 0, i32 0 ; <i32*> [#uses=1]
%tmp2 = load i32* %tmp1 ; <i32> [#uses=1]
ret i32 %tmp2
}
This avoids fastisel bailing out and is groundwork for future patch.
This reduces bailouts on CGStmt.ll to 911 from 935.
llvm-svn: 106974
2010-06-27 14:04:18 +08:00
|
|
|
EnterStructPointerForCoercedAccess(llvm::Value *SrcPtr,
|
2011-07-18 12:24:23 +08:00
|
|
|
llvm::StructType *SrcSTy,
|
Same patch as the previous on the store side. Before we compiled this:
struct DeclGroup {
unsigned NumDecls;
};
int foo(DeclGroup D) {
return D.NumDecls;
}
to:
%struct.DeclGroup = type { i32 }
define i32 @_Z3foo9DeclGroup(i64) nounwind ssp noredzone {
entry:
%D = alloca %struct.DeclGroup, align 4 ; <%struct.DeclGroup*> [#uses=2]
%tmp = alloca i64 ; <i64*> [#uses=2]
store i64 %0, i64* %tmp
%1 = bitcast i64* %tmp to %struct.DeclGroup* ; <%struct.DeclGroup*> [#uses=1]
%2 = load %struct.DeclGroup* %1, align 1 ; <%struct.DeclGroup> [#uses=1]
store %struct.DeclGroup %2, %struct.DeclGroup* %D
%tmp1 = getelementptr inbounds %struct.DeclGroup* %D, i32 0, i32 0 ; <i32*> [#uses=1]
%tmp2 = load i32* %tmp1 ; <i32> [#uses=1]
ret i32 %tmp2
}
which caused fast isel bailouts due to the FCA load/store of %2. Now
we generate this just blissful code:
%struct.DeclGroup = type { i32 }
define i32 @_Z3foo9DeclGroup(i64) nounwind ssp noredzone {
entry:
%D = alloca %struct.DeclGroup, align 4 ; <%struct.DeclGroup*> [#uses=2]
%tmp = alloca i64 ; <i64*> [#uses=2]
%coerce.dive = getelementptr %struct.DeclGroup* %D, i32 0, i32 0 ; <i32*> [#uses=1]
store i64 %0, i64* %tmp
%1 = bitcast i64* %tmp to i32* ; <i32*> [#uses=1]
%2 = load i32* %1, align 1 ; <i32> [#uses=1]
store i32 %2, i32* %coerce.dive
%tmp1 = getelementptr inbounds %struct.DeclGroup* %D, i32 0, i32 0 ; <i32*> [#uses=1]
%tmp2 = load i32* %tmp1 ; <i32> [#uses=1]
ret i32 %tmp2
}
This avoids fastisel bailing out and is groundwork for future patch.
This reduces bailouts on CGStmt.ll to 911 from 935.
llvm-svn: 106974
2010-06-27 14:04:18 +08:00
|
|
|
uint64_t DstSize, CodeGenFunction &CGF) {
|
improve CreateCoercedLoad a bit to generate slightly less awful
IR when handling X86-64 by-value struct stuff. For example, we
use to compile this:
struct DeclGroup {
unsigned NumDecls;
};
int foo(DeclGroup D);
void bar(DeclGroup *D) {
foo(*D);
}
into:
define void @_Z3barP9DeclGroup(%struct.DeclGroup* %D) ssp nounwind {
entry:
%D.addr = alloca %struct.DeclGroup*, align 8 ; <%struct.DeclGroup**> [#uses=2]
%agg.tmp = alloca %struct.DeclGroup, align 4 ; <%struct.DeclGroup*> [#uses=2]
%tmp3 = alloca i64 ; <i64*> [#uses=2]
store %struct.DeclGroup* %D, %struct.DeclGroup** %D.addr
%tmp = load %struct.DeclGroup** %D.addr ; <%struct.DeclGroup*> [#uses=1]
%tmp1 = bitcast %struct.DeclGroup* %agg.tmp to i8* ; <i8*> [#uses=1]
%tmp2 = bitcast %struct.DeclGroup* %tmp to i8* ; <i8*> [#uses=1]
call void @llvm.memcpy.p0i8.p0i8.i64(i8* %tmp1, i8* %tmp2, i64 4, i32 4, i1 false)
%0 = bitcast i64* %tmp3 to %struct.DeclGroup* ; <%struct.DeclGroup*> [#uses=1]
%1 = load %struct.DeclGroup* %agg.tmp ; <%struct.DeclGroup> [#uses=1]
store %struct.DeclGroup %1, %struct.DeclGroup* %0, align 1
%2 = load i64* %tmp3 ; <i64> [#uses=1]
call void @_Z3foo9DeclGroup(i64 %2)
ret void
}
which would cause fastisel to bail out due to the first class aggregate load %1. With
this patch we now compile it into the (still awful):
define void @_Z3barP9DeclGroup(%struct.DeclGroup* %D) nounwind ssp noredzone {
entry:
%D.addr = alloca %struct.DeclGroup*, align 8 ; <%struct.DeclGroup**> [#uses=2]
%agg.tmp = alloca %struct.DeclGroup, align 4 ; <%struct.DeclGroup*> [#uses=2]
%tmp3 = alloca i64 ; <i64*> [#uses=2]
store %struct.DeclGroup* %D, %struct.DeclGroup** %D.addr
%tmp = load %struct.DeclGroup** %D.addr ; <%struct.DeclGroup*> [#uses=1]
%tmp1 = bitcast %struct.DeclGroup* %agg.tmp to i8* ; <i8*> [#uses=1]
%tmp2 = bitcast %struct.DeclGroup* %tmp to i8* ; <i8*> [#uses=1]
call void @llvm.memcpy.p0i8.p0i8.i64(i8* %tmp1, i8* %tmp2, i64 4, i32 4, i1 false)
%coerce.dive = getelementptr %struct.DeclGroup* %agg.tmp, i32 0, i32 0 ; <i32*> [#uses=1]
%0 = bitcast i64* %tmp3 to i32* ; <i32*> [#uses=1]
%1 = load i32* %coerce.dive ; <i32> [#uses=1]
store i32 %1, i32* %0, align 1
%2 = load i64* %tmp3 ; <i64> [#uses=1]
%call = call i32 @_Z3foo9DeclGroup(i64 %2) noredzone ; <i32> [#uses=0]
ret void
}
which doesn't bail out. On CGStmt.ll, this reduces fastisel bail outs from 958 to 935,
and is the precursor of better things to come.
llvm-svn: 106973
2010-06-27 13:56:15 +08:00
|
|
|
// We can't dive into a zero-element struct.
|
|
|
|
if (SrcSTy->getNumElements() == 0) return SrcPtr;
|
2010-10-19 14:39:39 +08:00
|
|
|
|
2011-07-18 12:24:23 +08:00
|
|
|
llvm::Type *FirstElt = SrcSTy->getElementType(0);
|
2010-10-19 14:39:39 +08:00
|
|
|
|
improve CreateCoercedLoad a bit to generate slightly less awful
IR when handling X86-64 by-value struct stuff. For example, we
use to compile this:
struct DeclGroup {
unsigned NumDecls;
};
int foo(DeclGroup D);
void bar(DeclGroup *D) {
foo(*D);
}
into:
define void @_Z3barP9DeclGroup(%struct.DeclGroup* %D) ssp nounwind {
entry:
%D.addr = alloca %struct.DeclGroup*, align 8 ; <%struct.DeclGroup**> [#uses=2]
%agg.tmp = alloca %struct.DeclGroup, align 4 ; <%struct.DeclGroup*> [#uses=2]
%tmp3 = alloca i64 ; <i64*> [#uses=2]
store %struct.DeclGroup* %D, %struct.DeclGroup** %D.addr
%tmp = load %struct.DeclGroup** %D.addr ; <%struct.DeclGroup*> [#uses=1]
%tmp1 = bitcast %struct.DeclGroup* %agg.tmp to i8* ; <i8*> [#uses=1]
%tmp2 = bitcast %struct.DeclGroup* %tmp to i8* ; <i8*> [#uses=1]
call void @llvm.memcpy.p0i8.p0i8.i64(i8* %tmp1, i8* %tmp2, i64 4, i32 4, i1 false)
%0 = bitcast i64* %tmp3 to %struct.DeclGroup* ; <%struct.DeclGroup*> [#uses=1]
%1 = load %struct.DeclGroup* %agg.tmp ; <%struct.DeclGroup> [#uses=1]
store %struct.DeclGroup %1, %struct.DeclGroup* %0, align 1
%2 = load i64* %tmp3 ; <i64> [#uses=1]
call void @_Z3foo9DeclGroup(i64 %2)
ret void
}
which would cause fastisel to bail out due to the first class aggregate load %1. With
this patch we now compile it into the (still awful):
define void @_Z3barP9DeclGroup(%struct.DeclGroup* %D) nounwind ssp noredzone {
entry:
%D.addr = alloca %struct.DeclGroup*, align 8 ; <%struct.DeclGroup**> [#uses=2]
%agg.tmp = alloca %struct.DeclGroup, align 4 ; <%struct.DeclGroup*> [#uses=2]
%tmp3 = alloca i64 ; <i64*> [#uses=2]
store %struct.DeclGroup* %D, %struct.DeclGroup** %D.addr
%tmp = load %struct.DeclGroup** %D.addr ; <%struct.DeclGroup*> [#uses=1]
%tmp1 = bitcast %struct.DeclGroup* %agg.tmp to i8* ; <i8*> [#uses=1]
%tmp2 = bitcast %struct.DeclGroup* %tmp to i8* ; <i8*> [#uses=1]
call void @llvm.memcpy.p0i8.p0i8.i64(i8* %tmp1, i8* %tmp2, i64 4, i32 4, i1 false)
%coerce.dive = getelementptr %struct.DeclGroup* %agg.tmp, i32 0, i32 0 ; <i32*> [#uses=1]
%0 = bitcast i64* %tmp3 to i32* ; <i32*> [#uses=1]
%1 = load i32* %coerce.dive ; <i32> [#uses=1]
store i32 %1, i32* %0, align 1
%2 = load i64* %tmp3 ; <i64> [#uses=1]
%call = call i32 @_Z3foo9DeclGroup(i64 %2) noredzone ; <i32> [#uses=0]
ret void
}
which doesn't bail out. On CGStmt.ll, this reduces fastisel bail outs from 958 to 935,
and is the precursor of better things to come.
llvm-svn: 106973
2010-06-27 13:56:15 +08:00
|
|
|
// If the first elt is at least as large as what we're looking for, or if the
|
|
|
|
// first element is the same size as the whole struct, we can enter it.
|
2010-10-19 14:39:39 +08:00
|
|
|
uint64_t FirstEltSize =
|
improve CreateCoercedLoad a bit to generate slightly less awful
IR when handling X86-64 by-value struct stuff. For example, we
use to compile this:
struct DeclGroup {
unsigned NumDecls;
};
int foo(DeclGroup D);
void bar(DeclGroup *D) {
foo(*D);
}
into:
define void @_Z3barP9DeclGroup(%struct.DeclGroup* %D) ssp nounwind {
entry:
%D.addr = alloca %struct.DeclGroup*, align 8 ; <%struct.DeclGroup**> [#uses=2]
%agg.tmp = alloca %struct.DeclGroup, align 4 ; <%struct.DeclGroup*> [#uses=2]
%tmp3 = alloca i64 ; <i64*> [#uses=2]
store %struct.DeclGroup* %D, %struct.DeclGroup** %D.addr
%tmp = load %struct.DeclGroup** %D.addr ; <%struct.DeclGroup*> [#uses=1]
%tmp1 = bitcast %struct.DeclGroup* %agg.tmp to i8* ; <i8*> [#uses=1]
%tmp2 = bitcast %struct.DeclGroup* %tmp to i8* ; <i8*> [#uses=1]
call void @llvm.memcpy.p0i8.p0i8.i64(i8* %tmp1, i8* %tmp2, i64 4, i32 4, i1 false)
%0 = bitcast i64* %tmp3 to %struct.DeclGroup* ; <%struct.DeclGroup*> [#uses=1]
%1 = load %struct.DeclGroup* %agg.tmp ; <%struct.DeclGroup> [#uses=1]
store %struct.DeclGroup %1, %struct.DeclGroup* %0, align 1
%2 = load i64* %tmp3 ; <i64> [#uses=1]
call void @_Z3foo9DeclGroup(i64 %2)
ret void
}
which would cause fastisel to bail out due to the first class aggregate load %1. With
this patch we now compile it into the (still awful):
define void @_Z3barP9DeclGroup(%struct.DeclGroup* %D) nounwind ssp noredzone {
entry:
%D.addr = alloca %struct.DeclGroup*, align 8 ; <%struct.DeclGroup**> [#uses=2]
%agg.tmp = alloca %struct.DeclGroup, align 4 ; <%struct.DeclGroup*> [#uses=2]
%tmp3 = alloca i64 ; <i64*> [#uses=2]
store %struct.DeclGroup* %D, %struct.DeclGroup** %D.addr
%tmp = load %struct.DeclGroup** %D.addr ; <%struct.DeclGroup*> [#uses=1]
%tmp1 = bitcast %struct.DeclGroup* %agg.tmp to i8* ; <i8*> [#uses=1]
%tmp2 = bitcast %struct.DeclGroup* %tmp to i8* ; <i8*> [#uses=1]
call void @llvm.memcpy.p0i8.p0i8.i64(i8* %tmp1, i8* %tmp2, i64 4, i32 4, i1 false)
%coerce.dive = getelementptr %struct.DeclGroup* %agg.tmp, i32 0, i32 0 ; <i32*> [#uses=1]
%0 = bitcast i64* %tmp3 to i32* ; <i32*> [#uses=1]
%1 = load i32* %coerce.dive ; <i32> [#uses=1]
store i32 %1, i32* %0, align 1
%2 = load i64* %tmp3 ; <i64> [#uses=1]
%call = call i32 @_Z3foo9DeclGroup(i64 %2) noredzone ; <i32> [#uses=0]
ret void
}
which doesn't bail out. On CGStmt.ll, this reduces fastisel bail outs from 958 to 935,
and is the precursor of better things to come.
llvm-svn: 106973
2010-06-27 13:56:15 +08:00
|
|
|
CGF.CGM.getTargetData().getTypeAllocSize(FirstElt);
|
2010-10-19 14:39:39 +08:00
|
|
|
if (FirstEltSize < DstSize &&
|
improve CreateCoercedLoad a bit to generate slightly less awful
IR when handling X86-64 by-value struct stuff. For example, we
use to compile this:
struct DeclGroup {
unsigned NumDecls;
};
int foo(DeclGroup D);
void bar(DeclGroup *D) {
foo(*D);
}
into:
define void @_Z3barP9DeclGroup(%struct.DeclGroup* %D) ssp nounwind {
entry:
%D.addr = alloca %struct.DeclGroup*, align 8 ; <%struct.DeclGroup**> [#uses=2]
%agg.tmp = alloca %struct.DeclGroup, align 4 ; <%struct.DeclGroup*> [#uses=2]
%tmp3 = alloca i64 ; <i64*> [#uses=2]
store %struct.DeclGroup* %D, %struct.DeclGroup** %D.addr
%tmp = load %struct.DeclGroup** %D.addr ; <%struct.DeclGroup*> [#uses=1]
%tmp1 = bitcast %struct.DeclGroup* %agg.tmp to i8* ; <i8*> [#uses=1]
%tmp2 = bitcast %struct.DeclGroup* %tmp to i8* ; <i8*> [#uses=1]
call void @llvm.memcpy.p0i8.p0i8.i64(i8* %tmp1, i8* %tmp2, i64 4, i32 4, i1 false)
%0 = bitcast i64* %tmp3 to %struct.DeclGroup* ; <%struct.DeclGroup*> [#uses=1]
%1 = load %struct.DeclGroup* %agg.tmp ; <%struct.DeclGroup> [#uses=1]
store %struct.DeclGroup %1, %struct.DeclGroup* %0, align 1
%2 = load i64* %tmp3 ; <i64> [#uses=1]
call void @_Z3foo9DeclGroup(i64 %2)
ret void
}
which would cause fastisel to bail out due to the first class aggregate load %1. With
this patch we now compile it into the (still awful):
define void @_Z3barP9DeclGroup(%struct.DeclGroup* %D) nounwind ssp noredzone {
entry:
%D.addr = alloca %struct.DeclGroup*, align 8 ; <%struct.DeclGroup**> [#uses=2]
%agg.tmp = alloca %struct.DeclGroup, align 4 ; <%struct.DeclGroup*> [#uses=2]
%tmp3 = alloca i64 ; <i64*> [#uses=2]
store %struct.DeclGroup* %D, %struct.DeclGroup** %D.addr
%tmp = load %struct.DeclGroup** %D.addr ; <%struct.DeclGroup*> [#uses=1]
%tmp1 = bitcast %struct.DeclGroup* %agg.tmp to i8* ; <i8*> [#uses=1]
%tmp2 = bitcast %struct.DeclGroup* %tmp to i8* ; <i8*> [#uses=1]
call void @llvm.memcpy.p0i8.p0i8.i64(i8* %tmp1, i8* %tmp2, i64 4, i32 4, i1 false)
%coerce.dive = getelementptr %struct.DeclGroup* %agg.tmp, i32 0, i32 0 ; <i32*> [#uses=1]
%0 = bitcast i64* %tmp3 to i32* ; <i32*> [#uses=1]
%1 = load i32* %coerce.dive ; <i32> [#uses=1]
store i32 %1, i32* %0, align 1
%2 = load i64* %tmp3 ; <i64> [#uses=1]
%call = call i32 @_Z3foo9DeclGroup(i64 %2) noredzone ; <i32> [#uses=0]
ret void
}
which doesn't bail out. On CGStmt.ll, this reduces fastisel bail outs from 958 to 935,
and is the precursor of better things to come.
llvm-svn: 106973
2010-06-27 13:56:15 +08:00
|
|
|
FirstEltSize < CGF.CGM.getTargetData().getTypeAllocSize(SrcSTy))
|
|
|
|
return SrcPtr;
|
2010-10-19 14:39:39 +08:00
|
|
|
|
improve CreateCoercedLoad a bit to generate slightly less awful
IR when handling X86-64 by-value struct stuff. For example, we
use to compile this:
struct DeclGroup {
unsigned NumDecls;
};
int foo(DeclGroup D);
void bar(DeclGroup *D) {
foo(*D);
}
into:
define void @_Z3barP9DeclGroup(%struct.DeclGroup* %D) ssp nounwind {
entry:
%D.addr = alloca %struct.DeclGroup*, align 8 ; <%struct.DeclGroup**> [#uses=2]
%agg.tmp = alloca %struct.DeclGroup, align 4 ; <%struct.DeclGroup*> [#uses=2]
%tmp3 = alloca i64 ; <i64*> [#uses=2]
store %struct.DeclGroup* %D, %struct.DeclGroup** %D.addr
%tmp = load %struct.DeclGroup** %D.addr ; <%struct.DeclGroup*> [#uses=1]
%tmp1 = bitcast %struct.DeclGroup* %agg.tmp to i8* ; <i8*> [#uses=1]
%tmp2 = bitcast %struct.DeclGroup* %tmp to i8* ; <i8*> [#uses=1]
call void @llvm.memcpy.p0i8.p0i8.i64(i8* %tmp1, i8* %tmp2, i64 4, i32 4, i1 false)
%0 = bitcast i64* %tmp3 to %struct.DeclGroup* ; <%struct.DeclGroup*> [#uses=1]
%1 = load %struct.DeclGroup* %agg.tmp ; <%struct.DeclGroup> [#uses=1]
store %struct.DeclGroup %1, %struct.DeclGroup* %0, align 1
%2 = load i64* %tmp3 ; <i64> [#uses=1]
call void @_Z3foo9DeclGroup(i64 %2)
ret void
}
which would cause fastisel to bail out due to the first class aggregate load %1. With
this patch we now compile it into the (still awful):
define void @_Z3barP9DeclGroup(%struct.DeclGroup* %D) nounwind ssp noredzone {
entry:
%D.addr = alloca %struct.DeclGroup*, align 8 ; <%struct.DeclGroup**> [#uses=2]
%agg.tmp = alloca %struct.DeclGroup, align 4 ; <%struct.DeclGroup*> [#uses=2]
%tmp3 = alloca i64 ; <i64*> [#uses=2]
store %struct.DeclGroup* %D, %struct.DeclGroup** %D.addr
%tmp = load %struct.DeclGroup** %D.addr ; <%struct.DeclGroup*> [#uses=1]
%tmp1 = bitcast %struct.DeclGroup* %agg.tmp to i8* ; <i8*> [#uses=1]
%tmp2 = bitcast %struct.DeclGroup* %tmp to i8* ; <i8*> [#uses=1]
call void @llvm.memcpy.p0i8.p0i8.i64(i8* %tmp1, i8* %tmp2, i64 4, i32 4, i1 false)
%coerce.dive = getelementptr %struct.DeclGroup* %agg.tmp, i32 0, i32 0 ; <i32*> [#uses=1]
%0 = bitcast i64* %tmp3 to i32* ; <i32*> [#uses=1]
%1 = load i32* %coerce.dive ; <i32> [#uses=1]
store i32 %1, i32* %0, align 1
%2 = load i64* %tmp3 ; <i64> [#uses=1]
%call = call i32 @_Z3foo9DeclGroup(i64 %2) noredzone ; <i32> [#uses=0]
ret void
}
which doesn't bail out. On CGStmt.ll, this reduces fastisel bail outs from 958 to 935,
and is the precursor of better things to come.
llvm-svn: 106973
2010-06-27 13:56:15 +08:00
|
|
|
// GEP into the first element.
|
|
|
|
SrcPtr = CGF.Builder.CreateConstGEP2_32(SrcPtr, 0, 0, "coerce.dive");
|
2010-10-19 14:39:39 +08:00
|
|
|
|
improve CreateCoercedLoad a bit to generate slightly less awful
IR when handling X86-64 by-value struct stuff. For example, we
use to compile this:
struct DeclGroup {
unsigned NumDecls;
};
int foo(DeclGroup D);
void bar(DeclGroup *D) {
foo(*D);
}
into:
define void @_Z3barP9DeclGroup(%struct.DeclGroup* %D) ssp nounwind {
entry:
%D.addr = alloca %struct.DeclGroup*, align 8 ; <%struct.DeclGroup**> [#uses=2]
%agg.tmp = alloca %struct.DeclGroup, align 4 ; <%struct.DeclGroup*> [#uses=2]
%tmp3 = alloca i64 ; <i64*> [#uses=2]
store %struct.DeclGroup* %D, %struct.DeclGroup** %D.addr
%tmp = load %struct.DeclGroup** %D.addr ; <%struct.DeclGroup*> [#uses=1]
%tmp1 = bitcast %struct.DeclGroup* %agg.tmp to i8* ; <i8*> [#uses=1]
%tmp2 = bitcast %struct.DeclGroup* %tmp to i8* ; <i8*> [#uses=1]
call void @llvm.memcpy.p0i8.p0i8.i64(i8* %tmp1, i8* %tmp2, i64 4, i32 4, i1 false)
%0 = bitcast i64* %tmp3 to %struct.DeclGroup* ; <%struct.DeclGroup*> [#uses=1]
%1 = load %struct.DeclGroup* %agg.tmp ; <%struct.DeclGroup> [#uses=1]
store %struct.DeclGroup %1, %struct.DeclGroup* %0, align 1
%2 = load i64* %tmp3 ; <i64> [#uses=1]
call void @_Z3foo9DeclGroup(i64 %2)
ret void
}
which would cause fastisel to bail out due to the first class aggregate load %1. With
this patch we now compile it into the (still awful):
define void @_Z3barP9DeclGroup(%struct.DeclGroup* %D) nounwind ssp noredzone {
entry:
%D.addr = alloca %struct.DeclGroup*, align 8 ; <%struct.DeclGroup**> [#uses=2]
%agg.tmp = alloca %struct.DeclGroup, align 4 ; <%struct.DeclGroup*> [#uses=2]
%tmp3 = alloca i64 ; <i64*> [#uses=2]
store %struct.DeclGroup* %D, %struct.DeclGroup** %D.addr
%tmp = load %struct.DeclGroup** %D.addr ; <%struct.DeclGroup*> [#uses=1]
%tmp1 = bitcast %struct.DeclGroup* %agg.tmp to i8* ; <i8*> [#uses=1]
%tmp2 = bitcast %struct.DeclGroup* %tmp to i8* ; <i8*> [#uses=1]
call void @llvm.memcpy.p0i8.p0i8.i64(i8* %tmp1, i8* %tmp2, i64 4, i32 4, i1 false)
%coerce.dive = getelementptr %struct.DeclGroup* %agg.tmp, i32 0, i32 0 ; <i32*> [#uses=1]
%0 = bitcast i64* %tmp3 to i32* ; <i32*> [#uses=1]
%1 = load i32* %coerce.dive ; <i32> [#uses=1]
store i32 %1, i32* %0, align 1
%2 = load i64* %tmp3 ; <i64> [#uses=1]
%call = call i32 @_Z3foo9DeclGroup(i64 %2) noredzone ; <i32> [#uses=0]
ret void
}
which doesn't bail out. On CGStmt.ll, this reduces fastisel bail outs from 958 to 935,
and is the precursor of better things to come.
llvm-svn: 106973
2010-06-27 13:56:15 +08:00
|
|
|
// If the first element is a struct, recurse.
|
2011-07-18 12:24:23 +08:00
|
|
|
llvm::Type *SrcTy =
|
improve CreateCoercedLoad a bit to generate slightly less awful
IR when handling X86-64 by-value struct stuff. For example, we
use to compile this:
struct DeclGroup {
unsigned NumDecls;
};
int foo(DeclGroup D);
void bar(DeclGroup *D) {
foo(*D);
}
into:
define void @_Z3barP9DeclGroup(%struct.DeclGroup* %D) ssp nounwind {
entry:
%D.addr = alloca %struct.DeclGroup*, align 8 ; <%struct.DeclGroup**> [#uses=2]
%agg.tmp = alloca %struct.DeclGroup, align 4 ; <%struct.DeclGroup*> [#uses=2]
%tmp3 = alloca i64 ; <i64*> [#uses=2]
store %struct.DeclGroup* %D, %struct.DeclGroup** %D.addr
%tmp = load %struct.DeclGroup** %D.addr ; <%struct.DeclGroup*> [#uses=1]
%tmp1 = bitcast %struct.DeclGroup* %agg.tmp to i8* ; <i8*> [#uses=1]
%tmp2 = bitcast %struct.DeclGroup* %tmp to i8* ; <i8*> [#uses=1]
call void @llvm.memcpy.p0i8.p0i8.i64(i8* %tmp1, i8* %tmp2, i64 4, i32 4, i1 false)
%0 = bitcast i64* %tmp3 to %struct.DeclGroup* ; <%struct.DeclGroup*> [#uses=1]
%1 = load %struct.DeclGroup* %agg.tmp ; <%struct.DeclGroup> [#uses=1]
store %struct.DeclGroup %1, %struct.DeclGroup* %0, align 1
%2 = load i64* %tmp3 ; <i64> [#uses=1]
call void @_Z3foo9DeclGroup(i64 %2)
ret void
}
which would cause fastisel to bail out due to the first class aggregate load %1. With
this patch we now compile it into the (still awful):
define void @_Z3barP9DeclGroup(%struct.DeclGroup* %D) nounwind ssp noredzone {
entry:
%D.addr = alloca %struct.DeclGroup*, align 8 ; <%struct.DeclGroup**> [#uses=2]
%agg.tmp = alloca %struct.DeclGroup, align 4 ; <%struct.DeclGroup*> [#uses=2]
%tmp3 = alloca i64 ; <i64*> [#uses=2]
store %struct.DeclGroup* %D, %struct.DeclGroup** %D.addr
%tmp = load %struct.DeclGroup** %D.addr ; <%struct.DeclGroup*> [#uses=1]
%tmp1 = bitcast %struct.DeclGroup* %agg.tmp to i8* ; <i8*> [#uses=1]
%tmp2 = bitcast %struct.DeclGroup* %tmp to i8* ; <i8*> [#uses=1]
call void @llvm.memcpy.p0i8.p0i8.i64(i8* %tmp1, i8* %tmp2, i64 4, i32 4, i1 false)
%coerce.dive = getelementptr %struct.DeclGroup* %agg.tmp, i32 0, i32 0 ; <i32*> [#uses=1]
%0 = bitcast i64* %tmp3 to i32* ; <i32*> [#uses=1]
%1 = load i32* %coerce.dive ; <i32> [#uses=1]
store i32 %1, i32* %0, align 1
%2 = load i64* %tmp3 ; <i64> [#uses=1]
%call = call i32 @_Z3foo9DeclGroup(i64 %2) noredzone ; <i32> [#uses=0]
ret void
}
which doesn't bail out. On CGStmt.ll, this reduces fastisel bail outs from 958 to 935,
and is the precursor of better things to come.
llvm-svn: 106973
2010-06-27 13:56:15 +08:00
|
|
|
cast<llvm::PointerType>(SrcPtr->getType())->getElementType();
|
2011-07-18 12:24:23 +08:00
|
|
|
if (llvm::StructType *SrcSTy = dyn_cast<llvm::StructType>(SrcTy))
|
Same patch as the previous on the store side. Before we compiled this:
struct DeclGroup {
unsigned NumDecls;
};
int foo(DeclGroup D) {
return D.NumDecls;
}
to:
%struct.DeclGroup = type { i32 }
define i32 @_Z3foo9DeclGroup(i64) nounwind ssp noredzone {
entry:
%D = alloca %struct.DeclGroup, align 4 ; <%struct.DeclGroup*> [#uses=2]
%tmp = alloca i64 ; <i64*> [#uses=2]
store i64 %0, i64* %tmp
%1 = bitcast i64* %tmp to %struct.DeclGroup* ; <%struct.DeclGroup*> [#uses=1]
%2 = load %struct.DeclGroup* %1, align 1 ; <%struct.DeclGroup> [#uses=1]
store %struct.DeclGroup %2, %struct.DeclGroup* %D
%tmp1 = getelementptr inbounds %struct.DeclGroup* %D, i32 0, i32 0 ; <i32*> [#uses=1]
%tmp2 = load i32* %tmp1 ; <i32> [#uses=1]
ret i32 %tmp2
}
which caused fast isel bailouts due to the FCA load/store of %2. Now
we generate this just blissful code:
%struct.DeclGroup = type { i32 }
define i32 @_Z3foo9DeclGroup(i64) nounwind ssp noredzone {
entry:
%D = alloca %struct.DeclGroup, align 4 ; <%struct.DeclGroup*> [#uses=2]
%tmp = alloca i64 ; <i64*> [#uses=2]
%coerce.dive = getelementptr %struct.DeclGroup* %D, i32 0, i32 0 ; <i32*> [#uses=1]
store i64 %0, i64* %tmp
%1 = bitcast i64* %tmp to i32* ; <i32*> [#uses=1]
%2 = load i32* %1, align 1 ; <i32> [#uses=1]
store i32 %2, i32* %coerce.dive
%tmp1 = getelementptr inbounds %struct.DeclGroup* %D, i32 0, i32 0 ; <i32*> [#uses=1]
%tmp2 = load i32* %tmp1 ; <i32> [#uses=1]
ret i32 %tmp2
}
This avoids fastisel bailing out and is groundwork for future patch.
This reduces bailouts on CGStmt.ll to 911 from 935.
llvm-svn: 106974
2010-06-27 14:04:18 +08:00
|
|
|
return EnterStructPointerForCoercedAccess(SrcPtr, SrcSTy, DstSize, CGF);
|
improve CreateCoercedLoad a bit to generate slightly less awful
IR when handling X86-64 by-value struct stuff. For example, we
use to compile this:
struct DeclGroup {
unsigned NumDecls;
};
int foo(DeclGroup D);
void bar(DeclGroup *D) {
foo(*D);
}
into:
define void @_Z3barP9DeclGroup(%struct.DeclGroup* %D) ssp nounwind {
entry:
%D.addr = alloca %struct.DeclGroup*, align 8 ; <%struct.DeclGroup**> [#uses=2]
%agg.tmp = alloca %struct.DeclGroup, align 4 ; <%struct.DeclGroup*> [#uses=2]
%tmp3 = alloca i64 ; <i64*> [#uses=2]
store %struct.DeclGroup* %D, %struct.DeclGroup** %D.addr
%tmp = load %struct.DeclGroup** %D.addr ; <%struct.DeclGroup*> [#uses=1]
%tmp1 = bitcast %struct.DeclGroup* %agg.tmp to i8* ; <i8*> [#uses=1]
%tmp2 = bitcast %struct.DeclGroup* %tmp to i8* ; <i8*> [#uses=1]
call void @llvm.memcpy.p0i8.p0i8.i64(i8* %tmp1, i8* %tmp2, i64 4, i32 4, i1 false)
%0 = bitcast i64* %tmp3 to %struct.DeclGroup* ; <%struct.DeclGroup*> [#uses=1]
%1 = load %struct.DeclGroup* %agg.tmp ; <%struct.DeclGroup> [#uses=1]
store %struct.DeclGroup %1, %struct.DeclGroup* %0, align 1
%2 = load i64* %tmp3 ; <i64> [#uses=1]
call void @_Z3foo9DeclGroup(i64 %2)
ret void
}
which would cause fastisel to bail out due to the first class aggregate load %1. With
this patch we now compile it into the (still awful):
define void @_Z3barP9DeclGroup(%struct.DeclGroup* %D) nounwind ssp noredzone {
entry:
%D.addr = alloca %struct.DeclGroup*, align 8 ; <%struct.DeclGroup**> [#uses=2]
%agg.tmp = alloca %struct.DeclGroup, align 4 ; <%struct.DeclGroup*> [#uses=2]
%tmp3 = alloca i64 ; <i64*> [#uses=2]
store %struct.DeclGroup* %D, %struct.DeclGroup** %D.addr
%tmp = load %struct.DeclGroup** %D.addr ; <%struct.DeclGroup*> [#uses=1]
%tmp1 = bitcast %struct.DeclGroup* %agg.tmp to i8* ; <i8*> [#uses=1]
%tmp2 = bitcast %struct.DeclGroup* %tmp to i8* ; <i8*> [#uses=1]
call void @llvm.memcpy.p0i8.p0i8.i64(i8* %tmp1, i8* %tmp2, i64 4, i32 4, i1 false)
%coerce.dive = getelementptr %struct.DeclGroup* %agg.tmp, i32 0, i32 0 ; <i32*> [#uses=1]
%0 = bitcast i64* %tmp3 to i32* ; <i32*> [#uses=1]
%1 = load i32* %coerce.dive ; <i32> [#uses=1]
store i32 %1, i32* %0, align 1
%2 = load i64* %tmp3 ; <i64> [#uses=1]
%call = call i32 @_Z3foo9DeclGroup(i64 %2) noredzone ; <i32> [#uses=0]
ret void
}
which doesn't bail out. On CGStmt.ll, this reduces fastisel bail outs from 958 to 935,
and is the precursor of better things to come.
llvm-svn: 106973
2010-06-27 13:56:15 +08:00
|
|
|
|
|
|
|
return SrcPtr;
|
|
|
|
}
|
|
|
|
|
If coercing something from int or pointer type to int or pointer type
(potentially after unwrapping it from a struct) do it without going through
memory. We now compile:
struct DeclGroup {
unsigned NumDecls;
};
int foo(DeclGroup D) {
return D.NumDecls;
}
into:
%struct.DeclGroup = type { i32 }
define i32 @_Z3foo9DeclGroup(i64) nounwind ssp noredzone {
entry:
%D = alloca %struct.DeclGroup, align 4 ; <%struct.DeclGroup*> [#uses=2]
%coerce.dive = getelementptr %struct.DeclGroup* %D, i32 0, i32 0 ; <i32*> [#uses=1]
%coerce.val.ii = trunc i64 %0 to i32 ; <i32> [#uses=1]
store i32 %coerce.val.ii, i32* %coerce.dive
%tmp = getelementptr inbounds %struct.DeclGroup* %D, i32 0, i32 0 ; <i32*> [#uses=1]
%tmp1 = load i32* %tmp ; <i32> [#uses=1]
ret i32 %tmp1
}
instead of:
%struct.DeclGroup = type { i32 }
define i32 @_Z3foo9DeclGroup(i64) nounwind ssp noredzone {
entry:
%D = alloca %struct.DeclGroup, align 4 ; <%struct.DeclGroup*> [#uses=2]
%tmp = alloca i64 ; <i64*> [#uses=2]
%coerce.dive = getelementptr %struct.DeclGroup* %D, i32 0, i32 0 ; <i32*> [#uses=1]
store i64 %0, i64* %tmp
%1 = bitcast i64* %tmp to i32* ; <i32*> [#uses=1]
%2 = load i32* %1, align 1 ; <i32> [#uses=1]
store i32 %2, i32* %coerce.dive
%tmp1 = getelementptr inbounds %struct.DeclGroup* %D, i32 0, i32 0 ; <i32*> [#uses=1]
%tmp2 = load i32* %tmp1 ; <i32> [#uses=1]
ret i32 %tmp2
}
... which is quite a bit less terrifying.
llvm-svn: 106975
2010-06-27 14:26:04 +08:00
|
|
|
/// CoerceIntOrPtrToIntOrPtr - Convert a value Val to the specific Ty where both
|
|
|
|
/// are either integers or pointers. This does a truncation of the value if it
|
|
|
|
/// is too large or a zero extension if it is too small.
|
|
|
|
static llvm::Value *CoerceIntOrPtrToIntOrPtr(llvm::Value *Val,
|
2011-07-18 12:24:23 +08:00
|
|
|
llvm::Type *Ty,
|
If coercing something from int or pointer type to int or pointer type
(potentially after unwrapping it from a struct) do it without going through
memory. We now compile:
struct DeclGroup {
unsigned NumDecls;
};
int foo(DeclGroup D) {
return D.NumDecls;
}
into:
%struct.DeclGroup = type { i32 }
define i32 @_Z3foo9DeclGroup(i64) nounwind ssp noredzone {
entry:
%D = alloca %struct.DeclGroup, align 4 ; <%struct.DeclGroup*> [#uses=2]
%coerce.dive = getelementptr %struct.DeclGroup* %D, i32 0, i32 0 ; <i32*> [#uses=1]
%coerce.val.ii = trunc i64 %0 to i32 ; <i32> [#uses=1]
store i32 %coerce.val.ii, i32* %coerce.dive
%tmp = getelementptr inbounds %struct.DeclGroup* %D, i32 0, i32 0 ; <i32*> [#uses=1]
%tmp1 = load i32* %tmp ; <i32> [#uses=1]
ret i32 %tmp1
}
instead of:
%struct.DeclGroup = type { i32 }
define i32 @_Z3foo9DeclGroup(i64) nounwind ssp noredzone {
entry:
%D = alloca %struct.DeclGroup, align 4 ; <%struct.DeclGroup*> [#uses=2]
%tmp = alloca i64 ; <i64*> [#uses=2]
%coerce.dive = getelementptr %struct.DeclGroup* %D, i32 0, i32 0 ; <i32*> [#uses=1]
store i64 %0, i64* %tmp
%1 = bitcast i64* %tmp to i32* ; <i32*> [#uses=1]
%2 = load i32* %1, align 1 ; <i32> [#uses=1]
store i32 %2, i32* %coerce.dive
%tmp1 = getelementptr inbounds %struct.DeclGroup* %D, i32 0, i32 0 ; <i32*> [#uses=1]
%tmp2 = load i32* %tmp1 ; <i32> [#uses=1]
ret i32 %tmp2
}
... which is quite a bit less terrifying.
llvm-svn: 106975
2010-06-27 14:26:04 +08:00
|
|
|
CodeGenFunction &CGF) {
|
|
|
|
if (Val->getType() == Ty)
|
|
|
|
return Val;
|
2010-10-19 14:39:39 +08:00
|
|
|
|
If coercing something from int or pointer type to int or pointer type
(potentially after unwrapping it from a struct) do it without going through
memory. We now compile:
struct DeclGroup {
unsigned NumDecls;
};
int foo(DeclGroup D) {
return D.NumDecls;
}
into:
%struct.DeclGroup = type { i32 }
define i32 @_Z3foo9DeclGroup(i64) nounwind ssp noredzone {
entry:
%D = alloca %struct.DeclGroup, align 4 ; <%struct.DeclGroup*> [#uses=2]
%coerce.dive = getelementptr %struct.DeclGroup* %D, i32 0, i32 0 ; <i32*> [#uses=1]
%coerce.val.ii = trunc i64 %0 to i32 ; <i32> [#uses=1]
store i32 %coerce.val.ii, i32* %coerce.dive
%tmp = getelementptr inbounds %struct.DeclGroup* %D, i32 0, i32 0 ; <i32*> [#uses=1]
%tmp1 = load i32* %tmp ; <i32> [#uses=1]
ret i32 %tmp1
}
instead of:
%struct.DeclGroup = type { i32 }
define i32 @_Z3foo9DeclGroup(i64) nounwind ssp noredzone {
entry:
%D = alloca %struct.DeclGroup, align 4 ; <%struct.DeclGroup*> [#uses=2]
%tmp = alloca i64 ; <i64*> [#uses=2]
%coerce.dive = getelementptr %struct.DeclGroup* %D, i32 0, i32 0 ; <i32*> [#uses=1]
store i64 %0, i64* %tmp
%1 = bitcast i64* %tmp to i32* ; <i32*> [#uses=1]
%2 = load i32* %1, align 1 ; <i32> [#uses=1]
store i32 %2, i32* %coerce.dive
%tmp1 = getelementptr inbounds %struct.DeclGroup* %D, i32 0, i32 0 ; <i32*> [#uses=1]
%tmp2 = load i32* %tmp1 ; <i32> [#uses=1]
ret i32 %tmp2
}
... which is quite a bit less terrifying.
llvm-svn: 106975
2010-06-27 14:26:04 +08:00
|
|
|
if (isa<llvm::PointerType>(Val->getType())) {
|
|
|
|
// If this is Pointer->Pointer avoid conversion to and from int.
|
|
|
|
if (isa<llvm::PointerType>(Ty))
|
|
|
|
return CGF.Builder.CreateBitCast(Val, Ty, "coerce.val");
|
2010-10-19 14:39:39 +08:00
|
|
|
|
If coercing something from int or pointer type to int or pointer type
(potentially after unwrapping it from a struct) do it without going through
memory. We now compile:
struct DeclGroup {
unsigned NumDecls;
};
int foo(DeclGroup D) {
return D.NumDecls;
}
into:
%struct.DeclGroup = type { i32 }
define i32 @_Z3foo9DeclGroup(i64) nounwind ssp noredzone {
entry:
%D = alloca %struct.DeclGroup, align 4 ; <%struct.DeclGroup*> [#uses=2]
%coerce.dive = getelementptr %struct.DeclGroup* %D, i32 0, i32 0 ; <i32*> [#uses=1]
%coerce.val.ii = trunc i64 %0 to i32 ; <i32> [#uses=1]
store i32 %coerce.val.ii, i32* %coerce.dive
%tmp = getelementptr inbounds %struct.DeclGroup* %D, i32 0, i32 0 ; <i32*> [#uses=1]
%tmp1 = load i32* %tmp ; <i32> [#uses=1]
ret i32 %tmp1
}
instead of:
%struct.DeclGroup = type { i32 }
define i32 @_Z3foo9DeclGroup(i64) nounwind ssp noredzone {
entry:
%D = alloca %struct.DeclGroup, align 4 ; <%struct.DeclGroup*> [#uses=2]
%tmp = alloca i64 ; <i64*> [#uses=2]
%coerce.dive = getelementptr %struct.DeclGroup* %D, i32 0, i32 0 ; <i32*> [#uses=1]
store i64 %0, i64* %tmp
%1 = bitcast i64* %tmp to i32* ; <i32*> [#uses=1]
%2 = load i32* %1, align 1 ; <i32> [#uses=1]
store i32 %2, i32* %coerce.dive
%tmp1 = getelementptr inbounds %struct.DeclGroup* %D, i32 0, i32 0 ; <i32*> [#uses=1]
%tmp2 = load i32* %tmp1 ; <i32> [#uses=1]
ret i32 %tmp2
}
... which is quite a bit less terrifying.
llvm-svn: 106975
2010-06-27 14:26:04 +08:00
|
|
|
// Convert the pointer to an integer so we can play with its width.
|
2010-06-27 15:15:29 +08:00
|
|
|
Val = CGF.Builder.CreatePtrToInt(Val, CGF.IntPtrTy, "coerce.val.pi");
|
If coercing something from int or pointer type to int or pointer type
(potentially after unwrapping it from a struct) do it without going through
memory. We now compile:
struct DeclGroup {
unsigned NumDecls;
};
int foo(DeclGroup D) {
return D.NumDecls;
}
into:
%struct.DeclGroup = type { i32 }
define i32 @_Z3foo9DeclGroup(i64) nounwind ssp noredzone {
entry:
%D = alloca %struct.DeclGroup, align 4 ; <%struct.DeclGroup*> [#uses=2]
%coerce.dive = getelementptr %struct.DeclGroup* %D, i32 0, i32 0 ; <i32*> [#uses=1]
%coerce.val.ii = trunc i64 %0 to i32 ; <i32> [#uses=1]
store i32 %coerce.val.ii, i32* %coerce.dive
%tmp = getelementptr inbounds %struct.DeclGroup* %D, i32 0, i32 0 ; <i32*> [#uses=1]
%tmp1 = load i32* %tmp ; <i32> [#uses=1]
ret i32 %tmp1
}
instead of:
%struct.DeclGroup = type { i32 }
define i32 @_Z3foo9DeclGroup(i64) nounwind ssp noredzone {
entry:
%D = alloca %struct.DeclGroup, align 4 ; <%struct.DeclGroup*> [#uses=2]
%tmp = alloca i64 ; <i64*> [#uses=2]
%coerce.dive = getelementptr %struct.DeclGroup* %D, i32 0, i32 0 ; <i32*> [#uses=1]
store i64 %0, i64* %tmp
%1 = bitcast i64* %tmp to i32* ; <i32*> [#uses=1]
%2 = load i32* %1, align 1 ; <i32> [#uses=1]
store i32 %2, i32* %coerce.dive
%tmp1 = getelementptr inbounds %struct.DeclGroup* %D, i32 0, i32 0 ; <i32*> [#uses=1]
%tmp2 = load i32* %tmp1 ; <i32> [#uses=1]
ret i32 %tmp2
}
... which is quite a bit less terrifying.
llvm-svn: 106975
2010-06-27 14:26:04 +08:00
|
|
|
}
|
2010-10-19 14:39:39 +08:00
|
|
|
|
2011-07-18 12:24:23 +08:00
|
|
|
llvm::Type *DestIntTy = Ty;
|
If coercing something from int or pointer type to int or pointer type
(potentially after unwrapping it from a struct) do it without going through
memory. We now compile:
struct DeclGroup {
unsigned NumDecls;
};
int foo(DeclGroup D) {
return D.NumDecls;
}
into:
%struct.DeclGroup = type { i32 }
define i32 @_Z3foo9DeclGroup(i64) nounwind ssp noredzone {
entry:
%D = alloca %struct.DeclGroup, align 4 ; <%struct.DeclGroup*> [#uses=2]
%coerce.dive = getelementptr %struct.DeclGroup* %D, i32 0, i32 0 ; <i32*> [#uses=1]
%coerce.val.ii = trunc i64 %0 to i32 ; <i32> [#uses=1]
store i32 %coerce.val.ii, i32* %coerce.dive
%tmp = getelementptr inbounds %struct.DeclGroup* %D, i32 0, i32 0 ; <i32*> [#uses=1]
%tmp1 = load i32* %tmp ; <i32> [#uses=1]
ret i32 %tmp1
}
instead of:
%struct.DeclGroup = type { i32 }
define i32 @_Z3foo9DeclGroup(i64) nounwind ssp noredzone {
entry:
%D = alloca %struct.DeclGroup, align 4 ; <%struct.DeclGroup*> [#uses=2]
%tmp = alloca i64 ; <i64*> [#uses=2]
%coerce.dive = getelementptr %struct.DeclGroup* %D, i32 0, i32 0 ; <i32*> [#uses=1]
store i64 %0, i64* %tmp
%1 = bitcast i64* %tmp to i32* ; <i32*> [#uses=1]
%2 = load i32* %1, align 1 ; <i32> [#uses=1]
store i32 %2, i32* %coerce.dive
%tmp1 = getelementptr inbounds %struct.DeclGroup* %D, i32 0, i32 0 ; <i32*> [#uses=1]
%tmp2 = load i32* %tmp1 ; <i32> [#uses=1]
ret i32 %tmp2
}
... which is quite a bit less terrifying.
llvm-svn: 106975
2010-06-27 14:26:04 +08:00
|
|
|
if (isa<llvm::PointerType>(DestIntTy))
|
2010-06-27 15:15:29 +08:00
|
|
|
DestIntTy = CGF.IntPtrTy;
|
2010-10-19 14:39:39 +08:00
|
|
|
|
If coercing something from int or pointer type to int or pointer type
(potentially after unwrapping it from a struct) do it without going through
memory. We now compile:
struct DeclGroup {
unsigned NumDecls;
};
int foo(DeclGroup D) {
return D.NumDecls;
}
into:
%struct.DeclGroup = type { i32 }
define i32 @_Z3foo9DeclGroup(i64) nounwind ssp noredzone {
entry:
%D = alloca %struct.DeclGroup, align 4 ; <%struct.DeclGroup*> [#uses=2]
%coerce.dive = getelementptr %struct.DeclGroup* %D, i32 0, i32 0 ; <i32*> [#uses=1]
%coerce.val.ii = trunc i64 %0 to i32 ; <i32> [#uses=1]
store i32 %coerce.val.ii, i32* %coerce.dive
%tmp = getelementptr inbounds %struct.DeclGroup* %D, i32 0, i32 0 ; <i32*> [#uses=1]
%tmp1 = load i32* %tmp ; <i32> [#uses=1]
ret i32 %tmp1
}
instead of:
%struct.DeclGroup = type { i32 }
define i32 @_Z3foo9DeclGroup(i64) nounwind ssp noredzone {
entry:
%D = alloca %struct.DeclGroup, align 4 ; <%struct.DeclGroup*> [#uses=2]
%tmp = alloca i64 ; <i64*> [#uses=2]
%coerce.dive = getelementptr %struct.DeclGroup* %D, i32 0, i32 0 ; <i32*> [#uses=1]
store i64 %0, i64* %tmp
%1 = bitcast i64* %tmp to i32* ; <i32*> [#uses=1]
%2 = load i32* %1, align 1 ; <i32> [#uses=1]
store i32 %2, i32* %coerce.dive
%tmp1 = getelementptr inbounds %struct.DeclGroup* %D, i32 0, i32 0 ; <i32*> [#uses=1]
%tmp2 = load i32* %tmp1 ; <i32> [#uses=1]
ret i32 %tmp2
}
... which is quite a bit less terrifying.
llvm-svn: 106975
2010-06-27 14:26:04 +08:00
|
|
|
if (Val->getType() != DestIntTy)
|
|
|
|
Val = CGF.Builder.CreateIntCast(Val, DestIntTy, false, "coerce.val.ii");
|
2010-10-19 14:39:39 +08:00
|
|
|
|
If coercing something from int or pointer type to int or pointer type
(potentially after unwrapping it from a struct) do it without going through
memory. We now compile:
struct DeclGroup {
unsigned NumDecls;
};
int foo(DeclGroup D) {
return D.NumDecls;
}
into:
%struct.DeclGroup = type { i32 }
define i32 @_Z3foo9DeclGroup(i64) nounwind ssp noredzone {
entry:
%D = alloca %struct.DeclGroup, align 4 ; <%struct.DeclGroup*> [#uses=2]
%coerce.dive = getelementptr %struct.DeclGroup* %D, i32 0, i32 0 ; <i32*> [#uses=1]
%coerce.val.ii = trunc i64 %0 to i32 ; <i32> [#uses=1]
store i32 %coerce.val.ii, i32* %coerce.dive
%tmp = getelementptr inbounds %struct.DeclGroup* %D, i32 0, i32 0 ; <i32*> [#uses=1]
%tmp1 = load i32* %tmp ; <i32> [#uses=1]
ret i32 %tmp1
}
instead of:
%struct.DeclGroup = type { i32 }
define i32 @_Z3foo9DeclGroup(i64) nounwind ssp noredzone {
entry:
%D = alloca %struct.DeclGroup, align 4 ; <%struct.DeclGroup*> [#uses=2]
%tmp = alloca i64 ; <i64*> [#uses=2]
%coerce.dive = getelementptr %struct.DeclGroup* %D, i32 0, i32 0 ; <i32*> [#uses=1]
store i64 %0, i64* %tmp
%1 = bitcast i64* %tmp to i32* ; <i32*> [#uses=1]
%2 = load i32* %1, align 1 ; <i32> [#uses=1]
store i32 %2, i32* %coerce.dive
%tmp1 = getelementptr inbounds %struct.DeclGroup* %D, i32 0, i32 0 ; <i32*> [#uses=1]
%tmp2 = load i32* %tmp1 ; <i32> [#uses=1]
ret i32 %tmp2
}
... which is quite a bit less terrifying.
llvm-svn: 106975
2010-06-27 14:26:04 +08:00
|
|
|
if (isa<llvm::PointerType>(Ty))
|
|
|
|
Val = CGF.Builder.CreateIntToPtr(Val, Ty, "coerce.val.ip");
|
|
|
|
return Val;
|
|
|
|
}
|
|
|
|
|
improve CreateCoercedLoad a bit to generate slightly less awful
IR when handling X86-64 by-value struct stuff. For example, we
use to compile this:
struct DeclGroup {
unsigned NumDecls;
};
int foo(DeclGroup D);
void bar(DeclGroup *D) {
foo(*D);
}
into:
define void @_Z3barP9DeclGroup(%struct.DeclGroup* %D) ssp nounwind {
entry:
%D.addr = alloca %struct.DeclGroup*, align 8 ; <%struct.DeclGroup**> [#uses=2]
%agg.tmp = alloca %struct.DeclGroup, align 4 ; <%struct.DeclGroup*> [#uses=2]
%tmp3 = alloca i64 ; <i64*> [#uses=2]
store %struct.DeclGroup* %D, %struct.DeclGroup** %D.addr
%tmp = load %struct.DeclGroup** %D.addr ; <%struct.DeclGroup*> [#uses=1]
%tmp1 = bitcast %struct.DeclGroup* %agg.tmp to i8* ; <i8*> [#uses=1]
%tmp2 = bitcast %struct.DeclGroup* %tmp to i8* ; <i8*> [#uses=1]
call void @llvm.memcpy.p0i8.p0i8.i64(i8* %tmp1, i8* %tmp2, i64 4, i32 4, i1 false)
%0 = bitcast i64* %tmp3 to %struct.DeclGroup* ; <%struct.DeclGroup*> [#uses=1]
%1 = load %struct.DeclGroup* %agg.tmp ; <%struct.DeclGroup> [#uses=1]
store %struct.DeclGroup %1, %struct.DeclGroup* %0, align 1
%2 = load i64* %tmp3 ; <i64> [#uses=1]
call void @_Z3foo9DeclGroup(i64 %2)
ret void
}
which would cause fastisel to bail out due to the first class aggregate load %1. With
this patch we now compile it into the (still awful):
define void @_Z3barP9DeclGroup(%struct.DeclGroup* %D) nounwind ssp noredzone {
entry:
%D.addr = alloca %struct.DeclGroup*, align 8 ; <%struct.DeclGroup**> [#uses=2]
%agg.tmp = alloca %struct.DeclGroup, align 4 ; <%struct.DeclGroup*> [#uses=2]
%tmp3 = alloca i64 ; <i64*> [#uses=2]
store %struct.DeclGroup* %D, %struct.DeclGroup** %D.addr
%tmp = load %struct.DeclGroup** %D.addr ; <%struct.DeclGroup*> [#uses=1]
%tmp1 = bitcast %struct.DeclGroup* %agg.tmp to i8* ; <i8*> [#uses=1]
%tmp2 = bitcast %struct.DeclGroup* %tmp to i8* ; <i8*> [#uses=1]
call void @llvm.memcpy.p0i8.p0i8.i64(i8* %tmp1, i8* %tmp2, i64 4, i32 4, i1 false)
%coerce.dive = getelementptr %struct.DeclGroup* %agg.tmp, i32 0, i32 0 ; <i32*> [#uses=1]
%0 = bitcast i64* %tmp3 to i32* ; <i32*> [#uses=1]
%1 = load i32* %coerce.dive ; <i32> [#uses=1]
store i32 %1, i32* %0, align 1
%2 = load i64* %tmp3 ; <i64> [#uses=1]
%call = call i32 @_Z3foo9DeclGroup(i64 %2) noredzone ; <i32> [#uses=0]
ret void
}
which doesn't bail out. On CGStmt.ll, this reduces fastisel bail outs from 958 to 935,
and is the precursor of better things to come.
llvm-svn: 106973
2010-06-27 13:56:15 +08:00
|
|
|
|
|
|
|
|
2009-02-03 03:06:38 +08:00
|
|
|
/// CreateCoercedLoad - Create a load from \arg SrcPtr interpreted as
|
|
|
|
/// a pointer to an object of type \arg Ty.
|
|
|
|
///
|
|
|
|
/// This safely handles the case when the src type is smaller than the
|
|
|
|
/// destination type; in this situation the values of bits which not
|
|
|
|
/// present in the src are undefined.
|
|
|
|
static llvm::Value *CreateCoercedLoad(llvm::Value *SrcPtr,
|
2011-07-18 12:24:23 +08:00
|
|
|
llvm::Type *Ty,
|
2009-02-03 03:06:38 +08:00
|
|
|
CodeGenFunction &CGF) {
|
2011-07-18 12:24:23 +08:00
|
|
|
llvm::Type *SrcTy =
|
2009-02-03 03:06:38 +08:00
|
|
|
cast<llvm::PointerType>(SrcPtr->getType())->getElementType();
|
2010-10-19 14:39:39 +08:00
|
|
|
|
2010-06-29 06:51:39 +08:00
|
|
|
// If SrcTy and Ty are the same, just do a load.
|
|
|
|
if (SrcTy == Ty)
|
|
|
|
return CGF.Builder.CreateLoad(SrcPtr);
|
2010-10-19 14:39:39 +08:00
|
|
|
|
2009-05-09 15:08:47 +08:00
|
|
|
uint64_t DstSize = CGF.CGM.getTargetData().getTypeAllocSize(Ty);
|
2010-10-19 14:39:39 +08:00
|
|
|
|
2011-07-18 12:24:23 +08:00
|
|
|
if (llvm::StructType *SrcSTy = dyn_cast<llvm::StructType>(SrcTy)) {
|
Same patch as the previous on the store side. Before we compiled this:
struct DeclGroup {
unsigned NumDecls;
};
int foo(DeclGroup D) {
return D.NumDecls;
}
to:
%struct.DeclGroup = type { i32 }
define i32 @_Z3foo9DeclGroup(i64) nounwind ssp noredzone {
entry:
%D = alloca %struct.DeclGroup, align 4 ; <%struct.DeclGroup*> [#uses=2]
%tmp = alloca i64 ; <i64*> [#uses=2]
store i64 %0, i64* %tmp
%1 = bitcast i64* %tmp to %struct.DeclGroup* ; <%struct.DeclGroup*> [#uses=1]
%2 = load %struct.DeclGroup* %1, align 1 ; <%struct.DeclGroup> [#uses=1]
store %struct.DeclGroup %2, %struct.DeclGroup* %D
%tmp1 = getelementptr inbounds %struct.DeclGroup* %D, i32 0, i32 0 ; <i32*> [#uses=1]
%tmp2 = load i32* %tmp1 ; <i32> [#uses=1]
ret i32 %tmp2
}
which caused fast isel bailouts due to the FCA load/store of %2. Now
we generate this just blissful code:
%struct.DeclGroup = type { i32 }
define i32 @_Z3foo9DeclGroup(i64) nounwind ssp noredzone {
entry:
%D = alloca %struct.DeclGroup, align 4 ; <%struct.DeclGroup*> [#uses=2]
%tmp = alloca i64 ; <i64*> [#uses=2]
%coerce.dive = getelementptr %struct.DeclGroup* %D, i32 0, i32 0 ; <i32*> [#uses=1]
store i64 %0, i64* %tmp
%1 = bitcast i64* %tmp to i32* ; <i32*> [#uses=1]
%2 = load i32* %1, align 1 ; <i32> [#uses=1]
store i32 %2, i32* %coerce.dive
%tmp1 = getelementptr inbounds %struct.DeclGroup* %D, i32 0, i32 0 ; <i32*> [#uses=1]
%tmp2 = load i32* %tmp1 ; <i32> [#uses=1]
ret i32 %tmp2
}
This avoids fastisel bailing out and is groundwork for future patch.
This reduces bailouts on CGStmt.ll to 911 from 935.
llvm-svn: 106974
2010-06-27 14:04:18 +08:00
|
|
|
SrcPtr = EnterStructPointerForCoercedAccess(SrcPtr, SrcSTy, DstSize, CGF);
|
improve CreateCoercedLoad a bit to generate slightly less awful
IR when handling X86-64 by-value struct stuff. For example, we
use to compile this:
struct DeclGroup {
unsigned NumDecls;
};
int foo(DeclGroup D);
void bar(DeclGroup *D) {
foo(*D);
}
into:
define void @_Z3barP9DeclGroup(%struct.DeclGroup* %D) ssp nounwind {
entry:
%D.addr = alloca %struct.DeclGroup*, align 8 ; <%struct.DeclGroup**> [#uses=2]
%agg.tmp = alloca %struct.DeclGroup, align 4 ; <%struct.DeclGroup*> [#uses=2]
%tmp3 = alloca i64 ; <i64*> [#uses=2]
store %struct.DeclGroup* %D, %struct.DeclGroup** %D.addr
%tmp = load %struct.DeclGroup** %D.addr ; <%struct.DeclGroup*> [#uses=1]
%tmp1 = bitcast %struct.DeclGroup* %agg.tmp to i8* ; <i8*> [#uses=1]
%tmp2 = bitcast %struct.DeclGroup* %tmp to i8* ; <i8*> [#uses=1]
call void @llvm.memcpy.p0i8.p0i8.i64(i8* %tmp1, i8* %tmp2, i64 4, i32 4, i1 false)
%0 = bitcast i64* %tmp3 to %struct.DeclGroup* ; <%struct.DeclGroup*> [#uses=1]
%1 = load %struct.DeclGroup* %agg.tmp ; <%struct.DeclGroup> [#uses=1]
store %struct.DeclGroup %1, %struct.DeclGroup* %0, align 1
%2 = load i64* %tmp3 ; <i64> [#uses=1]
call void @_Z3foo9DeclGroup(i64 %2)
ret void
}
which would cause fastisel to bail out due to the first class aggregate load %1. With
this patch we now compile it into the (still awful):
define void @_Z3barP9DeclGroup(%struct.DeclGroup* %D) nounwind ssp noredzone {
entry:
%D.addr = alloca %struct.DeclGroup*, align 8 ; <%struct.DeclGroup**> [#uses=2]
%agg.tmp = alloca %struct.DeclGroup, align 4 ; <%struct.DeclGroup*> [#uses=2]
%tmp3 = alloca i64 ; <i64*> [#uses=2]
store %struct.DeclGroup* %D, %struct.DeclGroup** %D.addr
%tmp = load %struct.DeclGroup** %D.addr ; <%struct.DeclGroup*> [#uses=1]
%tmp1 = bitcast %struct.DeclGroup* %agg.tmp to i8* ; <i8*> [#uses=1]
%tmp2 = bitcast %struct.DeclGroup* %tmp to i8* ; <i8*> [#uses=1]
call void @llvm.memcpy.p0i8.p0i8.i64(i8* %tmp1, i8* %tmp2, i64 4, i32 4, i1 false)
%coerce.dive = getelementptr %struct.DeclGroup* %agg.tmp, i32 0, i32 0 ; <i32*> [#uses=1]
%0 = bitcast i64* %tmp3 to i32* ; <i32*> [#uses=1]
%1 = load i32* %coerce.dive ; <i32> [#uses=1]
store i32 %1, i32* %0, align 1
%2 = load i64* %tmp3 ; <i64> [#uses=1]
%call = call i32 @_Z3foo9DeclGroup(i64 %2) noredzone ; <i32> [#uses=0]
ret void
}
which doesn't bail out. On CGStmt.ll, this reduces fastisel bail outs from 958 to 935,
and is the precursor of better things to come.
llvm-svn: 106973
2010-06-27 13:56:15 +08:00
|
|
|
SrcTy = cast<llvm::PointerType>(SrcPtr->getType())->getElementType();
|
|
|
|
}
|
2010-10-19 14:39:39 +08:00
|
|
|
|
improve CreateCoercedLoad a bit to generate slightly less awful
IR when handling X86-64 by-value struct stuff. For example, we
use to compile this:
struct DeclGroup {
unsigned NumDecls;
};
int foo(DeclGroup D);
void bar(DeclGroup *D) {
foo(*D);
}
into:
define void @_Z3barP9DeclGroup(%struct.DeclGroup* %D) ssp nounwind {
entry:
%D.addr = alloca %struct.DeclGroup*, align 8 ; <%struct.DeclGroup**> [#uses=2]
%agg.tmp = alloca %struct.DeclGroup, align 4 ; <%struct.DeclGroup*> [#uses=2]
%tmp3 = alloca i64 ; <i64*> [#uses=2]
store %struct.DeclGroup* %D, %struct.DeclGroup** %D.addr
%tmp = load %struct.DeclGroup** %D.addr ; <%struct.DeclGroup*> [#uses=1]
%tmp1 = bitcast %struct.DeclGroup* %agg.tmp to i8* ; <i8*> [#uses=1]
%tmp2 = bitcast %struct.DeclGroup* %tmp to i8* ; <i8*> [#uses=1]
call void @llvm.memcpy.p0i8.p0i8.i64(i8* %tmp1, i8* %tmp2, i64 4, i32 4, i1 false)
%0 = bitcast i64* %tmp3 to %struct.DeclGroup* ; <%struct.DeclGroup*> [#uses=1]
%1 = load %struct.DeclGroup* %agg.tmp ; <%struct.DeclGroup> [#uses=1]
store %struct.DeclGroup %1, %struct.DeclGroup* %0, align 1
%2 = load i64* %tmp3 ; <i64> [#uses=1]
call void @_Z3foo9DeclGroup(i64 %2)
ret void
}
which would cause fastisel to bail out due to the first class aggregate load %1. With
this patch we now compile it into the (still awful):
define void @_Z3barP9DeclGroup(%struct.DeclGroup* %D) nounwind ssp noredzone {
entry:
%D.addr = alloca %struct.DeclGroup*, align 8 ; <%struct.DeclGroup**> [#uses=2]
%agg.tmp = alloca %struct.DeclGroup, align 4 ; <%struct.DeclGroup*> [#uses=2]
%tmp3 = alloca i64 ; <i64*> [#uses=2]
store %struct.DeclGroup* %D, %struct.DeclGroup** %D.addr
%tmp = load %struct.DeclGroup** %D.addr ; <%struct.DeclGroup*> [#uses=1]
%tmp1 = bitcast %struct.DeclGroup* %agg.tmp to i8* ; <i8*> [#uses=1]
%tmp2 = bitcast %struct.DeclGroup* %tmp to i8* ; <i8*> [#uses=1]
call void @llvm.memcpy.p0i8.p0i8.i64(i8* %tmp1, i8* %tmp2, i64 4, i32 4, i1 false)
%coerce.dive = getelementptr %struct.DeclGroup* %agg.tmp, i32 0, i32 0 ; <i32*> [#uses=1]
%0 = bitcast i64* %tmp3 to i32* ; <i32*> [#uses=1]
%1 = load i32* %coerce.dive ; <i32> [#uses=1]
store i32 %1, i32* %0, align 1
%2 = load i64* %tmp3 ; <i64> [#uses=1]
%call = call i32 @_Z3foo9DeclGroup(i64 %2) noredzone ; <i32> [#uses=0]
ret void
}
which doesn't bail out. On CGStmt.ll, this reduces fastisel bail outs from 958 to 935,
and is the precursor of better things to come.
llvm-svn: 106973
2010-06-27 13:56:15 +08:00
|
|
|
uint64_t SrcSize = CGF.CGM.getTargetData().getTypeAllocSize(SrcTy);
|
2009-02-03 03:06:38 +08:00
|
|
|
|
If coercing something from int or pointer type to int or pointer type
(potentially after unwrapping it from a struct) do it without going through
memory. We now compile:
struct DeclGroup {
unsigned NumDecls;
};
int foo(DeclGroup D) {
return D.NumDecls;
}
into:
%struct.DeclGroup = type { i32 }
define i32 @_Z3foo9DeclGroup(i64) nounwind ssp noredzone {
entry:
%D = alloca %struct.DeclGroup, align 4 ; <%struct.DeclGroup*> [#uses=2]
%coerce.dive = getelementptr %struct.DeclGroup* %D, i32 0, i32 0 ; <i32*> [#uses=1]
%coerce.val.ii = trunc i64 %0 to i32 ; <i32> [#uses=1]
store i32 %coerce.val.ii, i32* %coerce.dive
%tmp = getelementptr inbounds %struct.DeclGroup* %D, i32 0, i32 0 ; <i32*> [#uses=1]
%tmp1 = load i32* %tmp ; <i32> [#uses=1]
ret i32 %tmp1
}
instead of:
%struct.DeclGroup = type { i32 }
define i32 @_Z3foo9DeclGroup(i64) nounwind ssp noredzone {
entry:
%D = alloca %struct.DeclGroup, align 4 ; <%struct.DeclGroup*> [#uses=2]
%tmp = alloca i64 ; <i64*> [#uses=2]
%coerce.dive = getelementptr %struct.DeclGroup* %D, i32 0, i32 0 ; <i32*> [#uses=1]
store i64 %0, i64* %tmp
%1 = bitcast i64* %tmp to i32* ; <i32*> [#uses=1]
%2 = load i32* %1, align 1 ; <i32> [#uses=1]
store i32 %2, i32* %coerce.dive
%tmp1 = getelementptr inbounds %struct.DeclGroup* %D, i32 0, i32 0 ; <i32*> [#uses=1]
%tmp2 = load i32* %tmp1 ; <i32> [#uses=1]
ret i32 %tmp2
}
... which is quite a bit less terrifying.
llvm-svn: 106975
2010-06-27 14:26:04 +08:00
|
|
|
// If the source and destination are integer or pointer types, just do an
|
|
|
|
// extension or truncation to the desired type.
|
|
|
|
if ((isa<llvm::IntegerType>(Ty) || isa<llvm::PointerType>(Ty)) &&
|
|
|
|
(isa<llvm::IntegerType>(SrcTy) || isa<llvm::PointerType>(SrcTy))) {
|
|
|
|
llvm::LoadInst *Load = CGF.Builder.CreateLoad(SrcPtr);
|
|
|
|
return CoerceIntOrPtrToIntOrPtr(Load, Ty, CGF);
|
|
|
|
}
|
2010-10-19 14:39:39 +08:00
|
|
|
|
2009-02-03 13:59:18 +08:00
|
|
|
// If load is legal, just bitcast the src pointer.
|
2009-05-14 02:54:26 +08:00
|
|
|
if (SrcSize >= DstSize) {
|
2009-05-16 15:57:57 +08:00
|
|
|
// Generally SrcSize is never greater than DstSize, since this means we are
|
|
|
|
// losing bits. However, this can happen in cases where the structure has
|
|
|
|
// additional padding, for example due to a user specified alignment.
|
2009-05-14 02:54:26 +08:00
|
|
|
//
|
2009-05-16 15:57:57 +08:00
|
|
|
// FIXME: Assert that we aren't truncating non-padding bits when have access
|
|
|
|
// to that information.
|
2009-02-03 03:06:38 +08:00
|
|
|
llvm::Value *Casted =
|
|
|
|
CGF.Builder.CreateBitCast(SrcPtr, llvm::PointerType::getUnqual(Ty));
|
2009-02-07 10:46:03 +08:00
|
|
|
llvm::LoadInst *Load = CGF.Builder.CreateLoad(Casted);
|
|
|
|
// FIXME: Use better alignment / avoid requiring aligned load.
|
|
|
|
Load->setAlignment(1);
|
|
|
|
return Load;
|
2009-02-03 03:06:38 +08:00
|
|
|
}
|
2010-10-19 14:39:39 +08:00
|
|
|
|
Change IR generation for return (in the simple case) to avoid doing silly
load/store nonsense in the epilog. For example, for:
int foo(int X) {
int A[100];
return A[X];
}
we used to generate:
%arrayidx = getelementptr inbounds [100 x i32]* %A, i32 0, i64 %idxprom ; <i32*> [#uses=1]
%tmp1 = load i32* %arrayidx ; <i32> [#uses=1]
store i32 %tmp1, i32* %retval
%0 = load i32* %retval ; <i32> [#uses=1]
ret i32 %0
}
which codegen'd to this code:
_foo: ## @foo
## BB#0: ## %entry
subq $408, %rsp ## imm = 0x198
movl %edi, 400(%rsp)
movl 400(%rsp), %edi
movslq %edi, %rax
movl (%rsp,%rax,4), %edi
movl %edi, 404(%rsp)
movl 404(%rsp), %eax
addq $408, %rsp ## imm = 0x198
ret
Now we generate:
%arrayidx = getelementptr inbounds [100 x i32]* %A, i32 0, i64 %idxprom ; <i32*> [#uses=1]
%tmp1 = load i32* %arrayidx ; <i32> [#uses=1]
ret i32 %tmp1
}
and:
_foo: ## @foo
## BB#0: ## %entry
subq $408, %rsp ## imm = 0x198
movl %edi, 404(%rsp)
movl 404(%rsp), %edi
movslq %edi, %rax
movl (%rsp,%rax,4), %eax
addq $408, %rsp ## imm = 0x198
ret
This actually does matter, cutting out 2000 lines of IR from CGStmt.ll
for example.
Another interesting effect is that altivec.h functions which are dead
now get dce'd by the inliner. Hence all the changes to
builtins-ppc-altivec.c to ensure the calls aren't dead.
llvm-svn: 106970
2010-06-27 09:06:27 +08:00
|
|
|
// Otherwise do coercion through memory. This is stupid, but
|
|
|
|
// simple.
|
|
|
|
llvm::Value *Tmp = CGF.CreateTempAlloca(Ty);
|
|
|
|
llvm::Value *Casted =
|
|
|
|
CGF.Builder.CreateBitCast(Tmp, llvm::PointerType::getUnqual(SrcTy));
|
|
|
|
llvm::StoreInst *Store =
|
|
|
|
CGF.Builder.CreateStore(CGF.Builder.CreateLoad(SrcPtr), Casted);
|
|
|
|
// FIXME: Use better alignment / avoid requiring aligned store.
|
|
|
|
Store->setAlignment(1);
|
|
|
|
return CGF.Builder.CreateLoad(Tmp);
|
2009-02-03 03:06:38 +08:00
|
|
|
}
|
|
|
|
|
2011-05-18 05:08:01 +08:00
|
|
|
// Function to store a first-class aggregate into memory. We prefer to
|
|
|
|
// store the elements rather than the aggregate to be more friendly to
|
|
|
|
// fast-isel.
|
|
|
|
// FIXME: Do we need to recurse here?
|
|
|
|
static void BuildAggStore(CodeGenFunction &CGF, llvm::Value *Val,
|
|
|
|
llvm::Value *DestPtr, bool DestIsVolatile,
|
|
|
|
bool LowAlignment) {
|
|
|
|
// Prefer scalar stores to first-class aggregate stores.
|
2011-07-18 12:24:23 +08:00
|
|
|
if (llvm::StructType *STy =
|
2011-05-18 05:08:01 +08:00
|
|
|
dyn_cast<llvm::StructType>(Val->getType())) {
|
|
|
|
for (unsigned i = 0, e = STy->getNumElements(); i != e; ++i) {
|
|
|
|
llvm::Value *EltPtr = CGF.Builder.CreateConstGEP2_32(DestPtr, 0, i);
|
|
|
|
llvm::Value *Elt = CGF.Builder.CreateExtractValue(Val, i);
|
|
|
|
llvm::StoreInst *SI = CGF.Builder.CreateStore(Elt, EltPtr,
|
|
|
|
DestIsVolatile);
|
|
|
|
if (LowAlignment)
|
|
|
|
SI->setAlignment(1);
|
|
|
|
}
|
|
|
|
} else {
|
|
|
|
CGF.Builder.CreateStore(Val, DestPtr, DestIsVolatile);
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2009-02-03 03:06:38 +08:00
|
|
|
/// CreateCoercedStore - Create a store to \arg DstPtr from \arg Src,
|
|
|
|
/// where the source and destination may have different types.
|
|
|
|
///
|
|
|
|
/// This safely handles the case when the src type is larger than the
|
|
|
|
/// destination type; the upper bits of the src will be lost.
|
|
|
|
static void CreateCoercedStore(llvm::Value *Src,
|
|
|
|
llvm::Value *DstPtr,
|
2009-12-25 04:40:36 +08:00
|
|
|
bool DstIsVolatile,
|
2009-02-03 03:06:38 +08:00
|
|
|
CodeGenFunction &CGF) {
|
2011-07-18 12:24:23 +08:00
|
|
|
llvm::Type *SrcTy = Src->getType();
|
|
|
|
llvm::Type *DstTy =
|
2009-02-03 03:06:38 +08:00
|
|
|
cast<llvm::PointerType>(DstPtr->getType())->getElementType();
|
2010-06-29 06:51:39 +08:00
|
|
|
if (SrcTy == DstTy) {
|
|
|
|
CGF.Builder.CreateStore(Src, DstPtr, DstIsVolatile);
|
|
|
|
return;
|
|
|
|
}
|
2010-10-19 14:39:39 +08:00
|
|
|
|
2010-06-29 06:51:39 +08:00
|
|
|
uint64_t SrcSize = CGF.CGM.getTargetData().getTypeAllocSize(SrcTy);
|
2010-10-19 14:39:39 +08:00
|
|
|
|
2011-07-18 12:24:23 +08:00
|
|
|
if (llvm::StructType *DstSTy = dyn_cast<llvm::StructType>(DstTy)) {
|
Same patch as the previous on the store side. Before we compiled this:
struct DeclGroup {
unsigned NumDecls;
};
int foo(DeclGroup D) {
return D.NumDecls;
}
to:
%struct.DeclGroup = type { i32 }
define i32 @_Z3foo9DeclGroup(i64) nounwind ssp noredzone {
entry:
%D = alloca %struct.DeclGroup, align 4 ; <%struct.DeclGroup*> [#uses=2]
%tmp = alloca i64 ; <i64*> [#uses=2]
store i64 %0, i64* %tmp
%1 = bitcast i64* %tmp to %struct.DeclGroup* ; <%struct.DeclGroup*> [#uses=1]
%2 = load %struct.DeclGroup* %1, align 1 ; <%struct.DeclGroup> [#uses=1]
store %struct.DeclGroup %2, %struct.DeclGroup* %D
%tmp1 = getelementptr inbounds %struct.DeclGroup* %D, i32 0, i32 0 ; <i32*> [#uses=1]
%tmp2 = load i32* %tmp1 ; <i32> [#uses=1]
ret i32 %tmp2
}
which caused fast isel bailouts due to the FCA load/store of %2. Now
we generate this just blissful code:
%struct.DeclGroup = type { i32 }
define i32 @_Z3foo9DeclGroup(i64) nounwind ssp noredzone {
entry:
%D = alloca %struct.DeclGroup, align 4 ; <%struct.DeclGroup*> [#uses=2]
%tmp = alloca i64 ; <i64*> [#uses=2]
%coerce.dive = getelementptr %struct.DeclGroup* %D, i32 0, i32 0 ; <i32*> [#uses=1]
store i64 %0, i64* %tmp
%1 = bitcast i64* %tmp to i32* ; <i32*> [#uses=1]
%2 = load i32* %1, align 1 ; <i32> [#uses=1]
store i32 %2, i32* %coerce.dive
%tmp1 = getelementptr inbounds %struct.DeclGroup* %D, i32 0, i32 0 ; <i32*> [#uses=1]
%tmp2 = load i32* %tmp1 ; <i32> [#uses=1]
ret i32 %tmp2
}
This avoids fastisel bailing out and is groundwork for future patch.
This reduces bailouts on CGStmt.ll to 911 from 935.
llvm-svn: 106974
2010-06-27 14:04:18 +08:00
|
|
|
DstPtr = EnterStructPointerForCoercedAccess(DstPtr, DstSTy, SrcSize, CGF);
|
|
|
|
DstTy = cast<llvm::PointerType>(DstPtr->getType())->getElementType();
|
|
|
|
}
|
2010-10-19 14:39:39 +08:00
|
|
|
|
If coercing something from int or pointer type to int or pointer type
(potentially after unwrapping it from a struct) do it without going through
memory. We now compile:
struct DeclGroup {
unsigned NumDecls;
};
int foo(DeclGroup D) {
return D.NumDecls;
}
into:
%struct.DeclGroup = type { i32 }
define i32 @_Z3foo9DeclGroup(i64) nounwind ssp noredzone {
entry:
%D = alloca %struct.DeclGroup, align 4 ; <%struct.DeclGroup*> [#uses=2]
%coerce.dive = getelementptr %struct.DeclGroup* %D, i32 0, i32 0 ; <i32*> [#uses=1]
%coerce.val.ii = trunc i64 %0 to i32 ; <i32> [#uses=1]
store i32 %coerce.val.ii, i32* %coerce.dive
%tmp = getelementptr inbounds %struct.DeclGroup* %D, i32 0, i32 0 ; <i32*> [#uses=1]
%tmp1 = load i32* %tmp ; <i32> [#uses=1]
ret i32 %tmp1
}
instead of:
%struct.DeclGroup = type { i32 }
define i32 @_Z3foo9DeclGroup(i64) nounwind ssp noredzone {
entry:
%D = alloca %struct.DeclGroup, align 4 ; <%struct.DeclGroup*> [#uses=2]
%tmp = alloca i64 ; <i64*> [#uses=2]
%coerce.dive = getelementptr %struct.DeclGroup* %D, i32 0, i32 0 ; <i32*> [#uses=1]
store i64 %0, i64* %tmp
%1 = bitcast i64* %tmp to i32* ; <i32*> [#uses=1]
%2 = load i32* %1, align 1 ; <i32> [#uses=1]
store i32 %2, i32* %coerce.dive
%tmp1 = getelementptr inbounds %struct.DeclGroup* %D, i32 0, i32 0 ; <i32*> [#uses=1]
%tmp2 = load i32* %tmp1 ; <i32> [#uses=1]
ret i32 %tmp2
}
... which is quite a bit less terrifying.
llvm-svn: 106975
2010-06-27 14:26:04 +08:00
|
|
|
// If the source and destination are integer or pointer types, just do an
|
|
|
|
// extension or truncation to the desired type.
|
|
|
|
if ((isa<llvm::IntegerType>(SrcTy) || isa<llvm::PointerType>(SrcTy)) &&
|
|
|
|
(isa<llvm::IntegerType>(DstTy) || isa<llvm::PointerType>(DstTy))) {
|
|
|
|
Src = CoerceIntOrPtrToIntOrPtr(Src, DstTy, CGF);
|
|
|
|
CGF.Builder.CreateStore(Src, DstPtr, DstIsVolatile);
|
|
|
|
return;
|
|
|
|
}
|
2010-10-19 14:39:39 +08:00
|
|
|
|
2009-05-09 15:08:47 +08:00
|
|
|
uint64_t DstSize = CGF.CGM.getTargetData().getTypeAllocSize(DstTy);
|
2009-02-03 03:06:38 +08:00
|
|
|
|
2009-02-03 13:31:23 +08:00
|
|
|
// If store is legal, just bitcast the src pointer.
|
2009-06-05 15:58:54 +08:00
|
|
|
if (SrcSize <= DstSize) {
|
2009-02-03 03:06:38 +08:00
|
|
|
llvm::Value *Casted =
|
|
|
|
CGF.Builder.CreateBitCast(DstPtr, llvm::PointerType::getUnqual(SrcTy));
|
2009-02-07 10:46:03 +08:00
|
|
|
// FIXME: Use better alignment / avoid requiring aligned store.
|
2011-05-18 05:08:01 +08:00
|
|
|
BuildAggStore(CGF, Src, Casted, DstIsVolatile, true);
|
2009-02-03 03:06:38 +08:00
|
|
|
} else {
|
|
|
|
// Otherwise do coercion through memory. This is stupid, but
|
|
|
|
// simple.
|
2009-06-05 15:58:54 +08:00
|
|
|
|
|
|
|
// Generally SrcSize is never greater than DstSize, since this means we are
|
|
|
|
// losing bits. However, this can happen in cases where the structure has
|
|
|
|
// additional padding, for example due to a user specified alignment.
|
|
|
|
//
|
|
|
|
// FIXME: Assert that we aren't truncating non-padding bits when have access
|
|
|
|
// to that information.
|
2009-02-03 03:06:38 +08:00
|
|
|
llvm::Value *Tmp = CGF.CreateTempAlloca(SrcTy);
|
|
|
|
CGF.Builder.CreateStore(Src, Tmp);
|
2009-09-09 23:08:12 +08:00
|
|
|
llvm::Value *Casted =
|
2009-02-03 03:06:38 +08:00
|
|
|
CGF.Builder.CreateBitCast(Tmp, llvm::PointerType::getUnqual(DstTy));
|
2009-02-07 10:46:03 +08:00
|
|
|
llvm::LoadInst *Load = CGF.Builder.CreateLoad(Casted);
|
|
|
|
// FIXME: Use better alignment / avoid requiring aligned load.
|
|
|
|
Load->setAlignment(1);
|
2009-12-25 04:40:36 +08:00
|
|
|
CGF.Builder.CreateStore(Load, DstPtr, DstIsVolatile);
|
2009-02-03 03:06:38 +08:00
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2008-09-17 08:51:38 +08:00
|
|
|
/***/
|
|
|
|
|
2010-07-15 07:39:36 +08:00
|
|
|
bool CodeGenModule::ReturnTypeUsesSRet(const CGFunctionInfo &FI) {
|
2009-02-05 16:00:50 +08:00
|
|
|
return FI.getReturnInfo().isIndirect();
|
2009-02-03 05:43:58 +08:00
|
|
|
}
|
|
|
|
|
2010-07-15 07:39:36 +08:00
|
|
|
bool CodeGenModule::ReturnTypeUsesFPRet(QualType ResultType) {
|
|
|
|
if (const BuiltinType *BT = ResultType->getAs<BuiltinType>()) {
|
|
|
|
switch (BT->getKind()) {
|
|
|
|
default:
|
|
|
|
return false;
|
|
|
|
case BuiltinType::Float:
|
2011-09-02 08:18:52 +08:00
|
|
|
return getContext().getTargetInfo().useObjCFPRetForRealType(TargetInfo::Float);
|
2010-07-15 07:39:36 +08:00
|
|
|
case BuiltinType::Double:
|
2011-09-02 08:18:52 +08:00
|
|
|
return getContext().getTargetInfo().useObjCFPRetForRealType(TargetInfo::Double);
|
2010-07-15 07:39:36 +08:00
|
|
|
case BuiltinType::LongDouble:
|
2011-09-02 08:18:52 +08:00
|
|
|
return getContext().getTargetInfo().useObjCFPRetForRealType(
|
2010-07-15 07:39:36 +08:00
|
|
|
TargetInfo::LongDouble);
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
return false;
|
|
|
|
}
|
|
|
|
|
2011-11-01 00:27:11 +08:00
|
|
|
bool CodeGenModule::ReturnTypeUsesFP2Ret(QualType ResultType) {
|
|
|
|
if (const ComplexType *CT = ResultType->getAs<ComplexType>()) {
|
|
|
|
if (const BuiltinType *BT = CT->getElementType()->getAs<BuiltinType>()) {
|
|
|
|
if (BT->getKind() == BuiltinType::LongDouble)
|
|
|
|
return getContext().getTargetInfo().useObjCFP2RetForComplexLongDouble();
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
return false;
|
|
|
|
}
|
|
|
|
|
2011-07-10 01:41:47 +08:00
|
|
|
llvm::FunctionType *CodeGenTypes::GetFunctionType(GlobalDecl GD) {
|
2012-02-17 11:33:10 +08:00
|
|
|
const CGFunctionInfo &FI = arrangeGlobalDeclaration(GD);
|
|
|
|
return GetFunctionType(FI);
|
2010-02-23 08:48:20 +08:00
|
|
|
}
|
|
|
|
|
2011-07-10 01:41:47 +08:00
|
|
|
llvm::FunctionType *
|
2012-02-17 11:33:10 +08:00
|
|
|
CodeGenTypes::GetFunctionType(const CGFunctionInfo &FI) {
|
2011-07-15 13:16:14 +08:00
|
|
|
|
|
|
|
bool Inserted = FunctionsBeingProcessed.insert(&FI); (void)Inserted;
|
|
|
|
assert(Inserted && "Recursively being processed?");
|
|
|
|
|
2011-07-23 18:55:15 +08:00
|
|
|
SmallVector<llvm::Type*, 8> argTypes;
|
2011-07-18 12:24:23 +08:00
|
|
|
llvm::Type *resultType = 0;
|
2008-09-10 12:01:49 +08:00
|
|
|
|
2011-05-15 10:19:42 +08:00
|
|
|
const ABIArgInfo &retAI = FI.getReturnInfo();
|
|
|
|
switch (retAI.getKind()) {
|
2008-09-11 09:48:57 +08:00
|
|
|
case ABIArgInfo::Expand:
|
2011-05-15 10:19:42 +08:00
|
|
|
llvm_unreachable("Invalid ABI kind for return argument");
|
2008-09-11 09:48:57 +08:00
|
|
|
|
2009-06-06 17:36:29 +08:00
|
|
|
case ABIArgInfo::Extend:
|
2009-02-03 14:17:37 +08:00
|
|
|
case ABIArgInfo::Direct:
|
2011-05-15 10:19:42 +08:00
|
|
|
resultType = retAI.getCoerceToType();
|
2009-02-03 14:17:37 +08:00
|
|
|
break;
|
|
|
|
|
2009-02-05 16:00:50 +08:00
|
|
|
case ABIArgInfo::Indirect: {
|
2011-05-15 10:19:42 +08:00
|
|
|
assert(!retAI.getIndirectAlign() && "Align unused on indirect return.");
|
|
|
|
resultType = llvm::Type::getVoidTy(getLLVMContext());
|
|
|
|
|
|
|
|
QualType ret = FI.getReturnType();
|
2011-07-18 12:24:23 +08:00
|
|
|
llvm::Type *ty = ConvertType(ret);
|
2011-05-15 10:19:42 +08:00
|
|
|
unsigned addressSpace = Context.getTargetAddressSpace(ret);
|
|
|
|
argTypes.push_back(llvm::PointerType::get(ty, addressSpace));
|
2008-09-10 12:01:49 +08:00
|
|
|
break;
|
|
|
|
}
|
|
|
|
|
2009-01-27 05:26:08 +08:00
|
|
|
case ABIArgInfo::Ignore:
|
2011-05-15 10:19:42 +08:00
|
|
|
resultType = llvm::Type::getVoidTy(getLLVMContext());
|
2009-01-27 05:26:08 +08:00
|
|
|
break;
|
2008-09-10 12:01:49 +08:00
|
|
|
}
|
2009-09-09 23:08:12 +08:00
|
|
|
|
|
|
|
for (CGFunctionInfo::const_arg_iterator it = FI.arg_begin(),
|
2009-02-03 13:31:23 +08:00
|
|
|
ie = FI.arg_end(); it != ie; ++it) {
|
2011-05-15 10:19:42 +08:00
|
|
|
const ABIArgInfo &argAI = it->info;
|
2009-09-09 23:08:12 +08:00
|
|
|
|
2011-05-15 10:19:42 +08:00
|
|
|
switch (argAI.getKind()) {
|
2009-01-27 05:26:08 +08:00
|
|
|
case ABIArgInfo::Ignore:
|
|
|
|
break;
|
|
|
|
|
Kill off the 'coerce' ABI passing form. Now 'direct' and 'extend' always
have a "coerce to" type which often matches the default lowering of Clang
type to LLVM IR type, but the coerce case can be handled by making them
not be the same.
This simplifies things and fixes issues where X86-64 abi lowering would
return coerce after making preferred types exactly match up. This caused
us to compile:
typedef float v4f32 __attribute__((__vector_size__(16)));
v4f32 foo(v4f32 X) {
return X+X;
}
into this code at -O0:
define <4 x float> @foo(<4 x float> %X.coerce) nounwind {
entry:
%retval = alloca <4 x float>, align 16 ; <<4 x float>*> [#uses=2]
%coerce = alloca <4 x float>, align 16 ; <<4 x float>*> [#uses=2]
%X.addr = alloca <4 x float>, align 16 ; <<4 x float>*> [#uses=3]
store <4 x float> %X.coerce, <4 x float>* %coerce
%X = load <4 x float>* %coerce ; <<4 x float>> [#uses=1]
store <4 x float> %X, <4 x float>* %X.addr
%tmp = load <4 x float>* %X.addr ; <<4 x float>> [#uses=1]
%tmp1 = load <4 x float>* %X.addr ; <<4 x float>> [#uses=1]
%add = fadd <4 x float> %tmp, %tmp1 ; <<4 x float>> [#uses=1]
store <4 x float> %add, <4 x float>* %retval
%0 = load <4 x float>* %retval ; <<4 x float>> [#uses=1]
ret <4 x float> %0
}
Now we get:
define <4 x float> @foo(<4 x float> %X) nounwind {
entry:
%X.addr = alloca <4 x float>, align 16 ; <<4 x float>*> [#uses=3]
store <4 x float> %X, <4 x float>* %X.addr
%tmp = load <4 x float>* %X.addr ; <<4 x float>> [#uses=1]
%tmp1 = load <4 x float>* %X.addr ; <<4 x float>> [#uses=1]
%add = fadd <4 x float> %tmp, %tmp1 ; <<4 x float>> [#uses=1]
ret <4 x float> %add
}
This implements rdar://8248065
llvm-svn: 109733
2010-07-29 14:26:06 +08:00
|
|
|
case ABIArgInfo::Indirect: {
|
|
|
|
// indirect arguments are always on the stack, which is addr space #0.
|
2011-07-18 12:24:23 +08:00
|
|
|
llvm::Type *LTy = ConvertTypeForMem(it->type);
|
2011-05-15 10:19:42 +08:00
|
|
|
argTypes.push_back(LTy->getPointerTo());
|
Kill off the 'coerce' ABI passing form. Now 'direct' and 'extend' always
have a "coerce to" type which often matches the default lowering of Clang
type to LLVM IR type, but the coerce case can be handled by making them
not be the same.
This simplifies things and fixes issues where X86-64 abi lowering would
return coerce after making preferred types exactly match up. This caused
us to compile:
typedef float v4f32 __attribute__((__vector_size__(16)));
v4f32 foo(v4f32 X) {
return X+X;
}
into this code at -O0:
define <4 x float> @foo(<4 x float> %X.coerce) nounwind {
entry:
%retval = alloca <4 x float>, align 16 ; <<4 x float>*> [#uses=2]
%coerce = alloca <4 x float>, align 16 ; <<4 x float>*> [#uses=2]
%X.addr = alloca <4 x float>, align 16 ; <<4 x float>*> [#uses=3]
store <4 x float> %X.coerce, <4 x float>* %coerce
%X = load <4 x float>* %coerce ; <<4 x float>> [#uses=1]
store <4 x float> %X, <4 x float>* %X.addr
%tmp = load <4 x float>* %X.addr ; <<4 x float>> [#uses=1]
%tmp1 = load <4 x float>* %X.addr ; <<4 x float>> [#uses=1]
%add = fadd <4 x float> %tmp, %tmp1 ; <<4 x float>> [#uses=1]
store <4 x float> %add, <4 x float>* %retval
%0 = load <4 x float>* %retval ; <<4 x float>> [#uses=1]
ret <4 x float> %0
}
Now we get:
define <4 x float> @foo(<4 x float> %X) nounwind {
entry:
%X.addr = alloca <4 x float>, align 16 ; <<4 x float>*> [#uses=3]
store <4 x float> %X, <4 x float>* %X.addr
%tmp = load <4 x float>* %X.addr ; <<4 x float>> [#uses=1]
%tmp1 = load <4 x float>* %X.addr ; <<4 x float>> [#uses=1]
%add = fadd <4 x float> %tmp, %tmp1 ; <<4 x float>> [#uses=1]
ret <4 x float> %add
}
This implements rdar://8248065
llvm-svn: 109733
2010-07-29 14:26:06 +08:00
|
|
|
break;
|
|
|
|
}
|
|
|
|
|
|
|
|
case ABIArgInfo::Extend:
|
2010-07-29 14:44:09 +08:00
|
|
|
case ABIArgInfo::Direct: {
|
2012-01-07 08:25:33 +08:00
|
|
|
// Insert a padding type to ensure proper alignment.
|
|
|
|
if (llvm::Type *PaddingType = argAI.getPaddingType())
|
|
|
|
argTypes.push_back(PaddingType);
|
Change CGCall to handle the "coerce" case where the coerce-to type
is a FCA to pass each of the elements as individual scalars. This
produces code fast isel is less likely to reject and is easier on
the optimizers.
For example, before we would compile:
struct DeclGroup { long NumDecls; char * Y; };
char * foo(DeclGroup D) {
return D.NumDecls+D.Y;
}
to:
%struct.DeclGroup = type { i64, i64 }
define i64 @_Z3foo9DeclGroup(%struct.DeclGroup) nounwind {
entry:
%D = alloca %struct.DeclGroup, align 8 ; <%struct.DeclGroup*> [#uses=3]
store %struct.DeclGroup %0, %struct.DeclGroup* %D, align 1
%tmp = getelementptr inbounds %struct.DeclGroup* %D, i32 0, i32 0 ; <i64*> [#uses=1]
%tmp1 = load i64* %tmp ; <i64> [#uses=1]
%tmp2 = getelementptr inbounds %struct.DeclGroup* %D, i32 0, i32 1 ; <i64*> [#uses=1]
%tmp3 = load i64* %tmp2 ; <i64> [#uses=1]
%add = add nsw i64 %tmp1, %tmp3 ; <i64> [#uses=1]
ret i64 %add
}
Now we get:
%0 = type { i64, i64 }
%struct.DeclGroup = type { i64, i8* }
define i8* @_Z3foo9DeclGroup(i64, i64) nounwind {
entry:
%D = alloca %struct.DeclGroup, align 8 ; <%struct.DeclGroup*> [#uses=3]
%2 = insertvalue %0 undef, i64 %0, 0 ; <%0> [#uses=1]
%3 = insertvalue %0 %2, i64 %1, 1 ; <%0> [#uses=1]
%4 = bitcast %struct.DeclGroup* %D to %0* ; <%0*> [#uses=1]
store %0 %3, %0* %4, align 1
%tmp = getelementptr inbounds %struct.DeclGroup* %D, i32 0, i32 0 ; <i64*> [#uses=1]
%tmp1 = load i64* %tmp ; <i64> [#uses=1]
%tmp2 = getelementptr inbounds %struct.DeclGroup* %D, i32 0, i32 1 ; <i8**> [#uses=1]
%tmp3 = load i8** %tmp2 ; <i8*> [#uses=1]
%add.ptr = getelementptr inbounds i8* %tmp3, i64 %tmp1 ; <i8*> [#uses=1]
ret i8* %add.ptr
}
Elimination of the FCA inside the function is still-to-come.
llvm-svn: 107099
2010-06-29 07:44:11 +08:00
|
|
|
// If the coerce-to type is a first class aggregate, flatten it. Either
|
|
|
|
// way is semantically identical, but fast-isel and the optimizer
|
|
|
|
// generally likes scalar values better than FCAs.
|
2011-07-10 01:41:47 +08:00
|
|
|
llvm::Type *argType = argAI.getCoerceToType();
|
2011-07-18 12:24:23 +08:00
|
|
|
if (llvm::StructType *st = dyn_cast<llvm::StructType>(argType)) {
|
2011-05-15 10:19:42 +08:00
|
|
|
for (unsigned i = 0, e = st->getNumElements(); i != e; ++i)
|
|
|
|
argTypes.push_back(st->getElementType(i));
|
Change CGCall to handle the "coerce" case where the coerce-to type
is a FCA to pass each of the elements as individual scalars. This
produces code fast isel is less likely to reject and is easier on
the optimizers.
For example, before we would compile:
struct DeclGroup { long NumDecls; char * Y; };
char * foo(DeclGroup D) {
return D.NumDecls+D.Y;
}
to:
%struct.DeclGroup = type { i64, i64 }
define i64 @_Z3foo9DeclGroup(%struct.DeclGroup) nounwind {
entry:
%D = alloca %struct.DeclGroup, align 8 ; <%struct.DeclGroup*> [#uses=3]
store %struct.DeclGroup %0, %struct.DeclGroup* %D, align 1
%tmp = getelementptr inbounds %struct.DeclGroup* %D, i32 0, i32 0 ; <i64*> [#uses=1]
%tmp1 = load i64* %tmp ; <i64> [#uses=1]
%tmp2 = getelementptr inbounds %struct.DeclGroup* %D, i32 0, i32 1 ; <i64*> [#uses=1]
%tmp3 = load i64* %tmp2 ; <i64> [#uses=1]
%add = add nsw i64 %tmp1, %tmp3 ; <i64> [#uses=1]
ret i64 %add
}
Now we get:
%0 = type { i64, i64 }
%struct.DeclGroup = type { i64, i8* }
define i8* @_Z3foo9DeclGroup(i64, i64) nounwind {
entry:
%D = alloca %struct.DeclGroup, align 8 ; <%struct.DeclGroup*> [#uses=3]
%2 = insertvalue %0 undef, i64 %0, 0 ; <%0> [#uses=1]
%3 = insertvalue %0 %2, i64 %1, 1 ; <%0> [#uses=1]
%4 = bitcast %struct.DeclGroup* %D to %0* ; <%0*> [#uses=1]
store %0 %3, %0* %4, align 1
%tmp = getelementptr inbounds %struct.DeclGroup* %D, i32 0, i32 0 ; <i64*> [#uses=1]
%tmp1 = load i64* %tmp ; <i64> [#uses=1]
%tmp2 = getelementptr inbounds %struct.DeclGroup* %D, i32 0, i32 1 ; <i8**> [#uses=1]
%tmp3 = load i8** %tmp2 ; <i8*> [#uses=1]
%add.ptr = getelementptr inbounds i8* %tmp3, i64 %tmp1 ; <i8*> [#uses=1]
ret i8* %add.ptr
}
Elimination of the FCA inside the function is still-to-come.
llvm-svn: 107099
2010-06-29 07:44:11 +08:00
|
|
|
} else {
|
2011-05-15 10:19:42 +08:00
|
|
|
argTypes.push_back(argType);
|
Change CGCall to handle the "coerce" case where the coerce-to type
is a FCA to pass each of the elements as individual scalars. This
produces code fast isel is less likely to reject and is easier on
the optimizers.
For example, before we would compile:
struct DeclGroup { long NumDecls; char * Y; };
char * foo(DeclGroup D) {
return D.NumDecls+D.Y;
}
to:
%struct.DeclGroup = type { i64, i64 }
define i64 @_Z3foo9DeclGroup(%struct.DeclGroup) nounwind {
entry:
%D = alloca %struct.DeclGroup, align 8 ; <%struct.DeclGroup*> [#uses=3]
store %struct.DeclGroup %0, %struct.DeclGroup* %D, align 1
%tmp = getelementptr inbounds %struct.DeclGroup* %D, i32 0, i32 0 ; <i64*> [#uses=1]
%tmp1 = load i64* %tmp ; <i64> [#uses=1]
%tmp2 = getelementptr inbounds %struct.DeclGroup* %D, i32 0, i32 1 ; <i64*> [#uses=1]
%tmp3 = load i64* %tmp2 ; <i64> [#uses=1]
%add = add nsw i64 %tmp1, %tmp3 ; <i64> [#uses=1]
ret i64 %add
}
Now we get:
%0 = type { i64, i64 }
%struct.DeclGroup = type { i64, i8* }
define i8* @_Z3foo9DeclGroup(i64, i64) nounwind {
entry:
%D = alloca %struct.DeclGroup, align 8 ; <%struct.DeclGroup*> [#uses=3]
%2 = insertvalue %0 undef, i64 %0, 0 ; <%0> [#uses=1]
%3 = insertvalue %0 %2, i64 %1, 1 ; <%0> [#uses=1]
%4 = bitcast %struct.DeclGroup* %D to %0* ; <%0*> [#uses=1]
store %0 %3, %0* %4, align 1
%tmp = getelementptr inbounds %struct.DeclGroup* %D, i32 0, i32 0 ; <i64*> [#uses=1]
%tmp1 = load i64* %tmp ; <i64> [#uses=1]
%tmp2 = getelementptr inbounds %struct.DeclGroup* %D, i32 0, i32 1 ; <i8**> [#uses=1]
%tmp3 = load i8** %tmp2 ; <i8*> [#uses=1]
%add.ptr = getelementptr inbounds i8* %tmp3, i64 %tmp1 ; <i8*> [#uses=1]
ret i8* %add.ptr
}
Elimination of the FCA inside the function is still-to-come.
llvm-svn: 107099
2010-06-29 07:44:11 +08:00
|
|
|
}
|
2009-02-04 03:12:28 +08:00
|
|
|
break;
|
2010-07-29 14:44:09 +08:00
|
|
|
}
|
2009-09-09 23:08:12 +08:00
|
|
|
|
2008-09-11 09:48:57 +08:00
|
|
|
case ABIArgInfo::Expand:
|
2011-07-10 01:41:47 +08:00
|
|
|
GetExpandedTypes(it->type, argTypes);
|
2008-09-11 09:48:57 +08:00
|
|
|
break;
|
|
|
|
}
|
2008-09-10 12:01:49 +08:00
|
|
|
}
|
|
|
|
|
2011-07-15 13:16:14 +08:00
|
|
|
bool Erased = FunctionsBeingProcessed.erase(&FI); (void)Erased;
|
|
|
|
assert(Erased && "Not in set?");
|
|
|
|
|
2012-02-17 11:33:10 +08:00
|
|
|
return llvm::FunctionType::get(resultType, argTypes, FI.isVariadic());
|
2008-09-10 07:48:28 +08:00
|
|
|
}
|
|
|
|
|
2011-07-18 12:24:23 +08:00
|
|
|
llvm::Type *CodeGenTypes::GetFunctionTypeForVTable(GlobalDecl GD) {
|
2010-08-31 15:33:07 +08:00
|
|
|
const CXXMethodDecl *MD = cast<CXXMethodDecl>(GD.getDecl());
|
2009-11-24 13:08:52 +08:00
|
|
|
const FunctionProtoType *FPT = MD->getType()->getAs<FunctionProtoType>();
|
2010-10-19 14:39:39 +08:00
|
|
|
|
2011-07-10 08:18:59 +08:00
|
|
|
if (!isFuncTypeConvertible(FPT))
|
|
|
|
return llvm::StructType::get(getLLVMContext());
|
|
|
|
|
|
|
|
const CGFunctionInfo *Info;
|
|
|
|
if (isa<CXXDestructorDecl>(MD))
|
2012-02-17 11:33:10 +08:00
|
|
|
Info = &arrangeCXXDestructor(cast<CXXDestructorDecl>(MD), GD.getDtorType());
|
2011-07-10 08:18:59 +08:00
|
|
|
else
|
2012-02-17 11:33:10 +08:00
|
|
|
Info = &arrangeCXXMethodDeclaration(MD);
|
|
|
|
return GetFunctionType(*Info);
|
2009-11-24 13:08:52 +08:00
|
|
|
}
|
|
|
|
|
2009-02-03 07:43:58 +08:00
|
|
|
void CodeGenModule::ConstructAttributeList(const CGFunctionInfo &FI,
|
2009-02-03 06:03:45 +08:00
|
|
|
const Decl *TargetDecl,
|
2010-10-19 14:39:39 +08:00
|
|
|
AttributeListType &PAL,
|
2009-09-12 08:59:20 +08:00
|
|
|
unsigned &CallingConv) {
|
2012-01-21 01:57:16 +08:00
|
|
|
llvm::Attributes FuncAttrs;
|
|
|
|
llvm::Attributes RetAttrs;
|
2008-09-10 08:32:18 +08:00
|
|
|
|
2009-09-12 08:59:20 +08:00
|
|
|
CallingConv = FI.getEffectiveCallingConvention();
|
|
|
|
|
2010-02-06 05:31:56 +08:00
|
|
|
if (FI.isNoReturn())
|
|
|
|
FuncAttrs |= llvm::Attribute::NoReturn;
|
|
|
|
|
2009-04-04 08:49:24 +08:00
|
|
|
// FIXME: handle sseregparm someday...
|
2008-09-10 08:32:18 +08:00
|
|
|
if (TargetDecl) {
|
2011-10-13 03:51:18 +08:00
|
|
|
if (TargetDecl->hasAttr<ReturnsTwiceAttr>())
|
|
|
|
FuncAttrs |= llvm::Attribute::ReturnsTwice;
|
2009-06-30 10:34:44 +08:00
|
|
|
if (TargetDecl->hasAttr<NoThrowAttr>())
|
2008-09-26 05:02:23 +08:00
|
|
|
FuncAttrs |= llvm::Attribute::NoUnwind;
|
2010-07-08 14:48:12 +08:00
|
|
|
else if (const FunctionDecl *Fn = dyn_cast<FunctionDecl>(TargetDecl)) {
|
|
|
|
const FunctionProtoType *FPT = Fn->getType()->getAs<FunctionProtoType>();
|
2011-03-14 01:09:40 +08:00
|
|
|
if (FPT && FPT->isNothrow(getContext()))
|
2010-07-08 14:48:12 +08:00
|
|
|
FuncAttrs |= llvm::Attribute::NoUnwind;
|
|
|
|
}
|
|
|
|
|
2009-06-30 10:34:44 +08:00
|
|
|
if (TargetDecl->hasAttr<NoReturnAttr>())
|
2008-09-26 05:02:23 +08:00
|
|
|
FuncAttrs |= llvm::Attribute::NoReturn;
|
2011-08-16 06:38:22 +08:00
|
|
|
|
2011-10-03 22:59:42 +08:00
|
|
|
if (TargetDecl->hasAttr<ReturnsTwiceAttr>())
|
|
|
|
FuncAttrs |= llvm::Attribute::ReturnsTwice;
|
|
|
|
|
2011-08-16 06:38:22 +08:00
|
|
|
// 'const' and 'pure' attribute functions are also nounwind.
|
|
|
|
if (TargetDecl->hasAttr<ConstAttr>()) {
|
2008-10-06 07:32:53 +08:00
|
|
|
FuncAttrs |= llvm::Attribute::ReadNone;
|
2011-08-16 06:38:22 +08:00
|
|
|
FuncAttrs |= llvm::Attribute::NoUnwind;
|
|
|
|
} else if (TargetDecl->hasAttr<PureAttr>()) {
|
2009-04-11 06:14:52 +08:00
|
|
|
FuncAttrs |= llvm::Attribute::ReadOnly;
|
2011-08-16 06:38:22 +08:00
|
|
|
FuncAttrs |= llvm::Attribute::NoUnwind;
|
|
|
|
}
|
2009-08-10 04:07:29 +08:00
|
|
|
if (TargetDecl->hasAttr<MallocAttr>())
|
|
|
|
RetAttrs |= llvm::Attribute::NoAlias;
|
2008-09-10 08:32:18 +08:00
|
|
|
}
|
|
|
|
|
2009-11-13 01:24:48 +08:00
|
|
|
if (CodeGenOpts.OptimizeSize)
|
2009-10-28 03:48:08 +08:00
|
|
|
FuncAttrs |= llvm::Attribute::OptimizeForSize;
|
2009-11-13 01:24:48 +08:00
|
|
|
if (CodeGenOpts.DisableRedZone)
|
2009-06-05 07:32:02 +08:00
|
|
|
FuncAttrs |= llvm::Attribute::NoRedZone;
|
2009-11-13 01:24:48 +08:00
|
|
|
if (CodeGenOpts.NoImplicitFloat)
|
2009-06-06 06:05:48 +08:00
|
|
|
FuncAttrs |= llvm::Attribute::NoImplicitFloat;
|
2009-06-05 07:32:02 +08:00
|
|
|
|
2009-02-03 07:43:58 +08:00
|
|
|
QualType RetTy = FI.getReturnType();
|
2008-09-10 08:32:18 +08:00
|
|
|
unsigned Index = 1;
|
2009-02-03 13:59:18 +08:00
|
|
|
const ABIArgInfo &RetAI = FI.getReturnInfo();
|
2008-09-10 12:01:49 +08:00
|
|
|
switch (RetAI.getKind()) {
|
2009-06-06 17:36:29 +08:00
|
|
|
case ABIArgInfo::Extend:
|
2010-07-29 07:46:15 +08:00
|
|
|
if (RetTy->hasSignedIntegerRepresentation())
|
2009-06-06 17:36:29 +08:00
|
|
|
RetAttrs |= llvm::Attribute::SExt;
|
2010-07-29 07:46:15 +08:00
|
|
|
else if (RetTy->hasUnsignedIntegerRepresentation())
|
2009-06-06 17:36:29 +08:00
|
|
|
RetAttrs |= llvm::Attribute::ZExt;
|
Kill off the 'coerce' ABI passing form. Now 'direct' and 'extend' always
have a "coerce to" type which often matches the default lowering of Clang
type to LLVM IR type, but the coerce case can be handled by making them
not be the same.
This simplifies things and fixes issues where X86-64 abi lowering would
return coerce after making preferred types exactly match up. This caused
us to compile:
typedef float v4f32 __attribute__((__vector_size__(16)));
v4f32 foo(v4f32 X) {
return X+X;
}
into this code at -O0:
define <4 x float> @foo(<4 x float> %X.coerce) nounwind {
entry:
%retval = alloca <4 x float>, align 16 ; <<4 x float>*> [#uses=2]
%coerce = alloca <4 x float>, align 16 ; <<4 x float>*> [#uses=2]
%X.addr = alloca <4 x float>, align 16 ; <<4 x float>*> [#uses=3]
store <4 x float> %X.coerce, <4 x float>* %coerce
%X = load <4 x float>* %coerce ; <<4 x float>> [#uses=1]
store <4 x float> %X, <4 x float>* %X.addr
%tmp = load <4 x float>* %X.addr ; <<4 x float>> [#uses=1]
%tmp1 = load <4 x float>* %X.addr ; <<4 x float>> [#uses=1]
%add = fadd <4 x float> %tmp, %tmp1 ; <<4 x float>> [#uses=1]
store <4 x float> %add, <4 x float>* %retval
%0 = load <4 x float>* %retval ; <<4 x float>> [#uses=1]
ret <4 x float> %0
}
Now we get:
define <4 x float> @foo(<4 x float> %X) nounwind {
entry:
%X.addr = alloca <4 x float>, align 16 ; <<4 x float>*> [#uses=3]
store <4 x float> %X, <4 x float>* %X.addr
%tmp = load <4 x float>* %X.addr ; <<4 x float>> [#uses=1]
%tmp1 = load <4 x float>* %X.addr ; <<4 x float>> [#uses=1]
%add = fadd <4 x float> %tmp, %tmp1 ; <<4 x float>> [#uses=1]
ret <4 x float> %add
}
This implements rdar://8248065
llvm-svn: 109733
2010-07-29 14:26:06 +08:00
|
|
|
break;
|
2009-02-03 14:17:37 +08:00
|
|
|
case ABIArgInfo::Direct:
|
Kill off the 'coerce' ABI passing form. Now 'direct' and 'extend' always
have a "coerce to" type which often matches the default lowering of Clang
type to LLVM IR type, but the coerce case can be handled by making them
not be the same.
This simplifies things and fixes issues where X86-64 abi lowering would
return coerce after making preferred types exactly match up. This caused
us to compile:
typedef float v4f32 __attribute__((__vector_size__(16)));
v4f32 foo(v4f32 X) {
return X+X;
}
into this code at -O0:
define <4 x float> @foo(<4 x float> %X.coerce) nounwind {
entry:
%retval = alloca <4 x float>, align 16 ; <<4 x float>*> [#uses=2]
%coerce = alloca <4 x float>, align 16 ; <<4 x float>*> [#uses=2]
%X.addr = alloca <4 x float>, align 16 ; <<4 x float>*> [#uses=3]
store <4 x float> %X.coerce, <4 x float>* %coerce
%X = load <4 x float>* %coerce ; <<4 x float>> [#uses=1]
store <4 x float> %X, <4 x float>* %X.addr
%tmp = load <4 x float>* %X.addr ; <<4 x float>> [#uses=1]
%tmp1 = load <4 x float>* %X.addr ; <<4 x float>> [#uses=1]
%add = fadd <4 x float> %tmp, %tmp1 ; <<4 x float>> [#uses=1]
store <4 x float> %add, <4 x float>* %retval
%0 = load <4 x float>* %retval ; <<4 x float>> [#uses=1]
ret <4 x float> %0
}
Now we get:
define <4 x float> @foo(<4 x float> %X) nounwind {
entry:
%X.addr = alloca <4 x float>, align 16 ; <<4 x float>*> [#uses=3]
store <4 x float> %X, <4 x float>* %X.addr
%tmp = load <4 x float>* %X.addr ; <<4 x float>> [#uses=1]
%tmp1 = load <4 x float>* %X.addr ; <<4 x float>> [#uses=1]
%add = fadd <4 x float> %tmp, %tmp1 ; <<4 x float>> [#uses=1]
ret <4 x float> %add
}
This implements rdar://8248065
llvm-svn: 109733
2010-07-29 14:26:06 +08:00
|
|
|
case ABIArgInfo::Ignore:
|
2008-09-10 10:41:04 +08:00
|
|
|
break;
|
|
|
|
|
2009-02-05 16:00:50 +08:00
|
|
|
case ABIArgInfo::Indirect:
|
2009-09-09 23:08:12 +08:00
|
|
|
PAL.push_back(llvm::AttributeWithIndex::get(Index,
|
2010-04-20 13:44:43 +08:00
|
|
|
llvm::Attribute::StructRet));
|
2008-09-10 08:32:18 +08:00
|
|
|
++Index;
|
2009-03-19 03:51:01 +08:00
|
|
|
// sret disables readnone and readonly
|
|
|
|
FuncAttrs &= ~(llvm::Attribute::ReadOnly |
|
|
|
|
llvm::Attribute::ReadNone);
|
2008-09-10 10:41:04 +08:00
|
|
|
break;
|
|
|
|
|
2008-09-11 09:48:57 +08:00
|
|
|
case ABIArgInfo::Expand:
|
2011-09-23 13:06:16 +08:00
|
|
|
llvm_unreachable("Invalid ABI kind for return argument");
|
2008-09-10 08:32:18 +08:00
|
|
|
}
|
2008-09-10 10:41:04 +08:00
|
|
|
|
2008-09-27 06:53:57 +08:00
|
|
|
if (RetAttrs)
|
|
|
|
PAL.push_back(llvm::AttributeWithIndex::get(0, RetAttrs));
|
2009-04-04 08:49:24 +08:00
|
|
|
|
2011-02-10 01:54:19 +08:00
|
|
|
// FIXME: RegParm should be reduced in case of global register variable.
|
2011-04-09 16:18:08 +08:00
|
|
|
signed RegParm;
|
|
|
|
if (FI.getHasRegParm())
|
|
|
|
RegParm = FI.getRegParm();
|
|
|
|
else
|
2011-02-10 01:54:19 +08:00
|
|
|
RegParm = CodeGenOpts.NumRegisterParameters;
|
2009-04-04 08:49:24 +08:00
|
|
|
|
2011-09-02 08:18:52 +08:00
|
|
|
unsigned PointerWidth = getContext().getTargetInfo().getPointerWidth(0);
|
2009-09-09 23:08:12 +08:00
|
|
|
for (CGFunctionInfo::const_arg_iterator it = FI.arg_begin(),
|
2009-02-03 13:31:23 +08:00
|
|
|
ie = FI.arg_end(); it != ie; ++it) {
|
|
|
|
QualType ParamType = it->type;
|
|
|
|
const ABIArgInfo &AI = it->info;
|
2012-01-21 01:57:16 +08:00
|
|
|
llvm::Attributes Attrs;
|
2009-04-04 08:49:24 +08:00
|
|
|
|
2010-03-27 08:47:27 +08:00
|
|
|
// 'restrict' -> 'noalias' is done in EmitFunctionProlog when we
|
|
|
|
// have the corresponding parameter variable. It doesn't make
|
2011-02-11 02:10:07 +08:00
|
|
|
// sense to do it here because parameters are so messed up.
|
2008-09-11 09:48:57 +08:00
|
|
|
switch (AI.getKind()) {
|
2009-06-06 17:36:29 +08:00
|
|
|
case ABIArgInfo::Extend:
|
2011-05-21 00:38:50 +08:00
|
|
|
if (ParamType->isSignedIntegerOrEnumerationType())
|
2012-01-21 01:57:16 +08:00
|
|
|
Attrs |= llvm::Attribute::SExt;
|
2011-05-21 00:38:50 +08:00
|
|
|
else if (ParamType->isUnsignedIntegerOrEnumerationType())
|
2012-01-21 01:57:16 +08:00
|
|
|
Attrs |= llvm::Attribute::ZExt;
|
Kill off the 'coerce' ABI passing form. Now 'direct' and 'extend' always
have a "coerce to" type which often matches the default lowering of Clang
type to LLVM IR type, but the coerce case can be handled by making them
not be the same.
This simplifies things and fixes issues where X86-64 abi lowering would
return coerce after making preferred types exactly match up. This caused
us to compile:
typedef float v4f32 __attribute__((__vector_size__(16)));
v4f32 foo(v4f32 X) {
return X+X;
}
into this code at -O0:
define <4 x float> @foo(<4 x float> %X.coerce) nounwind {
entry:
%retval = alloca <4 x float>, align 16 ; <<4 x float>*> [#uses=2]
%coerce = alloca <4 x float>, align 16 ; <<4 x float>*> [#uses=2]
%X.addr = alloca <4 x float>, align 16 ; <<4 x float>*> [#uses=3]
store <4 x float> %X.coerce, <4 x float>* %coerce
%X = load <4 x float>* %coerce ; <<4 x float>> [#uses=1]
store <4 x float> %X, <4 x float>* %X.addr
%tmp = load <4 x float>* %X.addr ; <<4 x float>> [#uses=1]
%tmp1 = load <4 x float>* %X.addr ; <<4 x float>> [#uses=1]
%add = fadd <4 x float> %tmp, %tmp1 ; <<4 x float>> [#uses=1]
store <4 x float> %add, <4 x float>* %retval
%0 = load <4 x float>* %retval ; <<4 x float>> [#uses=1]
ret <4 x float> %0
}
Now we get:
define <4 x float> @foo(<4 x float> %X) nounwind {
entry:
%X.addr = alloca <4 x float>, align 16 ; <<4 x float>*> [#uses=3]
store <4 x float> %X, <4 x float>* %X.addr
%tmp = load <4 x float>* %X.addr ; <<4 x float>> [#uses=1]
%tmp1 = load <4 x float>* %X.addr ; <<4 x float>> [#uses=1]
%add = fadd <4 x float> %tmp, %tmp1 ; <<4 x float>> [#uses=1]
ret <4 x float> %add
}
This implements rdar://8248065
llvm-svn: 109733
2010-07-29 14:26:06 +08:00
|
|
|
// FALL THROUGH
|
2009-02-03 14:17:37 +08:00
|
|
|
case ABIArgInfo::Direct:
|
2009-04-04 08:49:24 +08:00
|
|
|
if (RegParm > 0 &&
|
2011-11-28 02:35:39 +08:00
|
|
|
(ParamType->isIntegerType() || ParamType->isPointerType() ||
|
|
|
|
ParamType->isReferenceType())) {
|
2009-04-04 08:49:24 +08:00
|
|
|
RegParm -=
|
Kill off the 'coerce' ABI passing form. Now 'direct' and 'extend' always
have a "coerce to" type which often matches the default lowering of Clang
type to LLVM IR type, but the coerce case can be handled by making them
not be the same.
This simplifies things and fixes issues where X86-64 abi lowering would
return coerce after making preferred types exactly match up. This caused
us to compile:
typedef float v4f32 __attribute__((__vector_size__(16)));
v4f32 foo(v4f32 X) {
return X+X;
}
into this code at -O0:
define <4 x float> @foo(<4 x float> %X.coerce) nounwind {
entry:
%retval = alloca <4 x float>, align 16 ; <<4 x float>*> [#uses=2]
%coerce = alloca <4 x float>, align 16 ; <<4 x float>*> [#uses=2]
%X.addr = alloca <4 x float>, align 16 ; <<4 x float>*> [#uses=3]
store <4 x float> %X.coerce, <4 x float>* %coerce
%X = load <4 x float>* %coerce ; <<4 x float>> [#uses=1]
store <4 x float> %X, <4 x float>* %X.addr
%tmp = load <4 x float>* %X.addr ; <<4 x float>> [#uses=1]
%tmp1 = load <4 x float>* %X.addr ; <<4 x float>> [#uses=1]
%add = fadd <4 x float> %tmp, %tmp1 ; <<4 x float>> [#uses=1]
store <4 x float> %add, <4 x float>* %retval
%0 = load <4 x float>* %retval ; <<4 x float>> [#uses=1]
ret <4 x float> %0
}
Now we get:
define <4 x float> @foo(<4 x float> %X) nounwind {
entry:
%X.addr = alloca <4 x float>, align 16 ; <<4 x float>*> [#uses=3]
store <4 x float> %X, <4 x float>* %X.addr
%tmp = load <4 x float>* %X.addr ; <<4 x float>> [#uses=1]
%tmp1 = load <4 x float>* %X.addr ; <<4 x float>> [#uses=1]
%add = fadd <4 x float> %tmp, %tmp1 ; <<4 x float>> [#uses=1]
ret <4 x float> %add
}
This implements rdar://8248065
llvm-svn: 109733
2010-07-29 14:26:06 +08:00
|
|
|
(Context.getTypeSize(ParamType) + PointerWidth - 1) / PointerWidth;
|
2009-04-04 08:49:24 +08:00
|
|
|
if (RegParm >= 0)
|
2012-01-21 01:57:16 +08:00
|
|
|
Attrs |= llvm::Attribute::InReg;
|
2009-04-04 08:49:24 +08:00
|
|
|
}
|
|
|
|
// FIXME: handle sseregparm someday...
|
2010-10-19 14:39:39 +08:00
|
|
|
|
2012-01-07 08:25:33 +08:00
|
|
|
// Increment Index if there is padding.
|
|
|
|
Index += (AI.getPaddingType() != 0);
|
|
|
|
|
2011-07-18 12:24:23 +08:00
|
|
|
if (llvm::StructType *STy =
|
Kill off the 'coerce' ABI passing form. Now 'direct' and 'extend' always
have a "coerce to" type which often matches the default lowering of Clang
type to LLVM IR type, but the coerce case can be handled by making them
not be the same.
This simplifies things and fixes issues where X86-64 abi lowering would
return coerce after making preferred types exactly match up. This caused
us to compile:
typedef float v4f32 __attribute__((__vector_size__(16)));
v4f32 foo(v4f32 X) {
return X+X;
}
into this code at -O0:
define <4 x float> @foo(<4 x float> %X.coerce) nounwind {
entry:
%retval = alloca <4 x float>, align 16 ; <<4 x float>*> [#uses=2]
%coerce = alloca <4 x float>, align 16 ; <<4 x float>*> [#uses=2]
%X.addr = alloca <4 x float>, align 16 ; <<4 x float>*> [#uses=3]
store <4 x float> %X.coerce, <4 x float>* %coerce
%X = load <4 x float>* %coerce ; <<4 x float>> [#uses=1]
store <4 x float> %X, <4 x float>* %X.addr
%tmp = load <4 x float>* %X.addr ; <<4 x float>> [#uses=1]
%tmp1 = load <4 x float>* %X.addr ; <<4 x float>> [#uses=1]
%add = fadd <4 x float> %tmp, %tmp1 ; <<4 x float>> [#uses=1]
store <4 x float> %add, <4 x float>* %retval
%0 = load <4 x float>* %retval ; <<4 x float>> [#uses=1]
ret <4 x float> %0
}
Now we get:
define <4 x float> @foo(<4 x float> %X) nounwind {
entry:
%X.addr = alloca <4 x float>, align 16 ; <<4 x float>*> [#uses=3]
store <4 x float> %X, <4 x float>* %X.addr
%tmp = load <4 x float>* %X.addr ; <<4 x float>> [#uses=1]
%tmp1 = load <4 x float>* %X.addr ; <<4 x float>> [#uses=1]
%add = fadd <4 x float> %tmp, %tmp1 ; <<4 x float>> [#uses=1]
ret <4 x float> %add
}
This implements rdar://8248065
llvm-svn: 109733
2010-07-29 14:26:06 +08:00
|
|
|
dyn_cast<llvm::StructType>(AI.getCoerceToType()))
|
|
|
|
Index += STy->getNumElements()-1; // 1 will be added below.
|
|
|
|
break;
|
|
|
|
|
|
|
|
case ABIArgInfo::Indirect:
|
|
|
|
if (AI.getIndirectByVal())
|
2012-01-21 01:57:16 +08:00
|
|
|
Attrs |= llvm::Attribute::ByVal;
|
Kill off the 'coerce' ABI passing form. Now 'direct' and 'extend' always
have a "coerce to" type which often matches the default lowering of Clang
type to LLVM IR type, but the coerce case can be handled by making them
not be the same.
This simplifies things and fixes issues where X86-64 abi lowering would
return coerce after making preferred types exactly match up. This caused
us to compile:
typedef float v4f32 __attribute__((__vector_size__(16)));
v4f32 foo(v4f32 X) {
return X+X;
}
into this code at -O0:
define <4 x float> @foo(<4 x float> %X.coerce) nounwind {
entry:
%retval = alloca <4 x float>, align 16 ; <<4 x float>*> [#uses=2]
%coerce = alloca <4 x float>, align 16 ; <<4 x float>*> [#uses=2]
%X.addr = alloca <4 x float>, align 16 ; <<4 x float>*> [#uses=3]
store <4 x float> %X.coerce, <4 x float>* %coerce
%X = load <4 x float>* %coerce ; <<4 x float>> [#uses=1]
store <4 x float> %X, <4 x float>* %X.addr
%tmp = load <4 x float>* %X.addr ; <<4 x float>> [#uses=1]
%tmp1 = load <4 x float>* %X.addr ; <<4 x float>> [#uses=1]
%add = fadd <4 x float> %tmp, %tmp1 ; <<4 x float>> [#uses=1]
store <4 x float> %add, <4 x float>* %retval
%0 = load <4 x float>* %retval ; <<4 x float>> [#uses=1]
ret <4 x float> %0
}
Now we get:
define <4 x float> @foo(<4 x float> %X) nounwind {
entry:
%X.addr = alloca <4 x float>, align 16 ; <<4 x float>*> [#uses=3]
store <4 x float> %X, <4 x float>* %X.addr
%tmp = load <4 x float>* %X.addr ; <<4 x float>> [#uses=1]
%tmp1 = load <4 x float>* %X.addr ; <<4 x float>> [#uses=1]
%add = fadd <4 x float> %tmp, %tmp1 ; <<4 x float>> [#uses=1]
ret <4 x float> %add
}
This implements rdar://8248065
llvm-svn: 109733
2010-07-29 14:26:06 +08:00
|
|
|
|
2012-01-21 01:57:16 +08:00
|
|
|
Attrs |=
|
Kill off the 'coerce' ABI passing form. Now 'direct' and 'extend' always
have a "coerce to" type which often matches the default lowering of Clang
type to LLVM IR type, but the coerce case can be handled by making them
not be the same.
This simplifies things and fixes issues where X86-64 abi lowering would
return coerce after making preferred types exactly match up. This caused
us to compile:
typedef float v4f32 __attribute__((__vector_size__(16)));
v4f32 foo(v4f32 X) {
return X+X;
}
into this code at -O0:
define <4 x float> @foo(<4 x float> %X.coerce) nounwind {
entry:
%retval = alloca <4 x float>, align 16 ; <<4 x float>*> [#uses=2]
%coerce = alloca <4 x float>, align 16 ; <<4 x float>*> [#uses=2]
%X.addr = alloca <4 x float>, align 16 ; <<4 x float>*> [#uses=3]
store <4 x float> %X.coerce, <4 x float>* %coerce
%X = load <4 x float>* %coerce ; <<4 x float>> [#uses=1]
store <4 x float> %X, <4 x float>* %X.addr
%tmp = load <4 x float>* %X.addr ; <<4 x float>> [#uses=1]
%tmp1 = load <4 x float>* %X.addr ; <<4 x float>> [#uses=1]
%add = fadd <4 x float> %tmp, %tmp1 ; <<4 x float>> [#uses=1]
store <4 x float> %add, <4 x float>* %retval
%0 = load <4 x float>* %retval ; <<4 x float>> [#uses=1]
ret <4 x float> %0
}
Now we get:
define <4 x float> @foo(<4 x float> %X) nounwind {
entry:
%X.addr = alloca <4 x float>, align 16 ; <<4 x float>*> [#uses=3]
store <4 x float> %X, <4 x float>* %X.addr
%tmp = load <4 x float>* %X.addr ; <<4 x float>> [#uses=1]
%tmp1 = load <4 x float>* %X.addr ; <<4 x float>> [#uses=1]
%add = fadd <4 x float> %tmp, %tmp1 ; <<4 x float>> [#uses=1]
ret <4 x float> %add
}
This implements rdar://8248065
llvm-svn: 109733
2010-07-29 14:26:06 +08:00
|
|
|
llvm::Attribute::constructAlignmentFromInt(AI.getIndirectAlign());
|
|
|
|
// byval disables readnone and readonly.
|
|
|
|
FuncAttrs &= ~(llvm::Attribute::ReadOnly |
|
|
|
|
llvm::Attribute::ReadNone);
|
2008-09-11 09:48:57 +08:00
|
|
|
break;
|
2009-04-04 08:49:24 +08:00
|
|
|
|
2009-01-27 05:26:08 +08:00
|
|
|
case ABIArgInfo::Ignore:
|
|
|
|
// Skip increment, no matching LLVM parameter.
|
2009-09-09 23:08:12 +08:00
|
|
|
continue;
|
2009-01-27 05:26:08 +08:00
|
|
|
|
2008-09-17 08:51:38 +08:00
|
|
|
case ABIArgInfo::Expand: {
|
2011-07-23 18:55:15 +08:00
|
|
|
SmallVector<llvm::Type*, 8> types;
|
2009-05-16 15:57:57 +08:00
|
|
|
// FIXME: This is rather inefficient. Do we ever actually need to do
|
|
|
|
// anything here? The result should be just reconstructed on the other
|
|
|
|
// side, so extension should be a non-issue.
|
2011-07-10 01:41:47 +08:00
|
|
|
getTypes().GetExpandedTypes(ParamType, types);
|
2011-05-15 10:19:42 +08:00
|
|
|
Index += types.size();
|
2008-09-17 08:51:38 +08:00
|
|
|
continue;
|
|
|
|
}
|
2008-09-10 08:32:18 +08:00
|
|
|
}
|
2009-09-09 23:08:12 +08:00
|
|
|
|
2012-01-21 01:57:16 +08:00
|
|
|
if (Attrs)
|
|
|
|
PAL.push_back(llvm::AttributeWithIndex::get(Index, Attrs));
|
2008-09-17 08:51:38 +08:00
|
|
|
++Index;
|
2008-09-10 08:32:18 +08:00
|
|
|
}
|
2008-09-27 06:53:57 +08:00
|
|
|
if (FuncAttrs)
|
|
|
|
PAL.push_back(llvm::AttributeWithIndex::get(~0, FuncAttrs));
|
2008-09-10 08:32:18 +08:00
|
|
|
}
|
|
|
|
|
2011-03-09 12:27:21 +08:00
|
|
|
/// An argument came in as a promoted argument; demote it back to its
|
|
|
|
/// declared type.
|
|
|
|
static llvm::Value *emitArgumentDemotion(CodeGenFunction &CGF,
|
|
|
|
const VarDecl *var,
|
|
|
|
llvm::Value *value) {
|
2011-07-18 12:24:23 +08:00
|
|
|
llvm::Type *varType = CGF.ConvertType(var->getType());
|
2011-03-09 12:27:21 +08:00
|
|
|
|
|
|
|
// This can happen with promotions that actually don't change the
|
|
|
|
// underlying type, like the enum promotions.
|
|
|
|
if (value->getType() == varType) return value;
|
|
|
|
|
|
|
|
assert((varType->isIntegerTy() || varType->isFloatingPointTy())
|
|
|
|
&& "unexpected promotion type");
|
|
|
|
|
|
|
|
if (isa<llvm::IntegerType>(varType))
|
|
|
|
return CGF.Builder.CreateTrunc(value, varType, "arg.unpromote");
|
|
|
|
|
|
|
|
return CGF.Builder.CreateFPCast(value, varType, "arg.unpromote");
|
|
|
|
}
|
|
|
|
|
2009-02-03 06:03:45 +08:00
|
|
|
void CodeGenFunction::EmitFunctionProlog(const CGFunctionInfo &FI,
|
|
|
|
llvm::Function *Fn,
|
2008-09-10 07:27:19 +08:00
|
|
|
const FunctionArgList &Args) {
|
2009-07-28 09:00:58 +08:00
|
|
|
// If this is an implicit-return-zero function, go ahead and
|
|
|
|
// initialize the return value. TODO: it might be nice to have
|
|
|
|
// a more general mechanism for this that didn't require synthesized
|
|
|
|
// return statements.
|
2010-07-06 04:21:00 +08:00
|
|
|
if (const FunctionDecl *FD = dyn_cast_or_null<FunctionDecl>(CurFuncDecl)) {
|
2009-07-28 09:00:58 +08:00
|
|
|
if (FD->hasImplicitReturnZero()) {
|
|
|
|
QualType RetTy = FD->getResultType().getUnqualifiedType();
|
2011-07-18 12:24:23 +08:00
|
|
|
llvm::Type* LLVMTy = CGM.getTypes().ConvertType(RetTy);
|
2009-08-01 04:28:54 +08:00
|
|
|
llvm::Constant* Zero = llvm::Constant::getNullValue(LLVMTy);
|
2009-07-28 09:00:58 +08:00
|
|
|
Builder.CreateStore(Zero, ReturnValue);
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2009-05-16 15:57:57 +08:00
|
|
|
// FIXME: We no longer need the types from FunctionArgList; lift up and
|
|
|
|
// simplify.
|
2009-02-03 14:02:10 +08:00
|
|
|
|
2008-09-10 07:27:19 +08:00
|
|
|
// Emit allocs for param decls. Give the LLVM Argument nodes names.
|
|
|
|
llvm::Function::arg_iterator AI = Fn->arg_begin();
|
2009-09-09 23:08:12 +08:00
|
|
|
|
2008-09-10 07:27:19 +08:00
|
|
|
// Name the struct return argument.
|
2010-07-15 07:39:36 +08:00
|
|
|
if (CGM.ReturnTypeUsesSRet(FI)) {
|
2008-09-10 07:27:19 +08:00
|
|
|
AI->setName("agg.result");
|
2011-08-26 07:04:34 +08:00
|
|
|
AI->addAttr(llvm::Attribute::NoAlias);
|
2008-09-10 07:27:19 +08:00
|
|
|
++AI;
|
|
|
|
}
|
2009-09-09 23:08:12 +08:00
|
|
|
|
2009-02-05 05:17:21 +08:00
|
|
|
assert(FI.arg_size() == Args.size() &&
|
|
|
|
"Mismatch between function signature & arguments.");
|
2011-03-04 04:13:15 +08:00
|
|
|
unsigned ArgNo = 1;
|
2009-02-03 13:59:18 +08:00
|
|
|
CGFunctionInfo::const_arg_iterator info_it = FI.arg_begin();
|
2011-03-04 04:13:15 +08:00
|
|
|
for (FunctionArgList::const_iterator i = Args.begin(), e = Args.end();
|
|
|
|
i != e; ++i, ++info_it, ++ArgNo) {
|
2011-03-09 12:27:21 +08:00
|
|
|
const VarDecl *Arg = *i;
|
2009-02-03 13:59:18 +08:00
|
|
|
QualType Ty = info_it->type;
|
|
|
|
const ABIArgInfo &ArgI = info_it->info;
|
2008-09-11 09:48:57 +08:00
|
|
|
|
2011-03-09 12:27:21 +08:00
|
|
|
bool isPromoted =
|
|
|
|
isa<ParmVarDecl>(Arg) && cast<ParmVarDecl>(Arg)->isKNRPromoted();
|
|
|
|
|
2008-09-11 09:48:57 +08:00
|
|
|
switch (ArgI.getKind()) {
|
2009-02-05 17:16:39 +08:00
|
|
|
case ABIArgInfo::Indirect: {
|
Change CGCall to handle the "coerce" case where the coerce-to type
is a FCA to pass each of the elements as individual scalars. This
produces code fast isel is less likely to reject and is easier on
the optimizers.
For example, before we would compile:
struct DeclGroup { long NumDecls; char * Y; };
char * foo(DeclGroup D) {
return D.NumDecls+D.Y;
}
to:
%struct.DeclGroup = type { i64, i64 }
define i64 @_Z3foo9DeclGroup(%struct.DeclGroup) nounwind {
entry:
%D = alloca %struct.DeclGroup, align 8 ; <%struct.DeclGroup*> [#uses=3]
store %struct.DeclGroup %0, %struct.DeclGroup* %D, align 1
%tmp = getelementptr inbounds %struct.DeclGroup* %D, i32 0, i32 0 ; <i64*> [#uses=1]
%tmp1 = load i64* %tmp ; <i64> [#uses=1]
%tmp2 = getelementptr inbounds %struct.DeclGroup* %D, i32 0, i32 1 ; <i64*> [#uses=1]
%tmp3 = load i64* %tmp2 ; <i64> [#uses=1]
%add = add nsw i64 %tmp1, %tmp3 ; <i64> [#uses=1]
ret i64 %add
}
Now we get:
%0 = type { i64, i64 }
%struct.DeclGroup = type { i64, i8* }
define i8* @_Z3foo9DeclGroup(i64, i64) nounwind {
entry:
%D = alloca %struct.DeclGroup, align 8 ; <%struct.DeclGroup*> [#uses=3]
%2 = insertvalue %0 undef, i64 %0, 0 ; <%0> [#uses=1]
%3 = insertvalue %0 %2, i64 %1, 1 ; <%0> [#uses=1]
%4 = bitcast %struct.DeclGroup* %D to %0* ; <%0*> [#uses=1]
store %0 %3, %0* %4, align 1
%tmp = getelementptr inbounds %struct.DeclGroup* %D, i32 0, i32 0 ; <i64*> [#uses=1]
%tmp1 = load i64* %tmp ; <i64> [#uses=1]
%tmp2 = getelementptr inbounds %struct.DeclGroup* %D, i32 0, i32 1 ; <i8**> [#uses=1]
%tmp3 = load i8** %tmp2 ; <i8*> [#uses=1]
%add.ptr = getelementptr inbounds i8* %tmp3, i64 %tmp1 ; <i8*> [#uses=1]
ret i8* %add.ptr
}
Elimination of the FCA inside the function is still-to-come.
llvm-svn: 107099
2010-06-29 07:44:11 +08:00
|
|
|
llvm::Value *V = AI;
|
2010-09-17 04:42:02 +08:00
|
|
|
|
2009-02-05 17:16:39 +08:00
|
|
|
if (hasAggregateLLVMType(Ty)) {
|
2010-09-17 04:42:02 +08:00
|
|
|
// Aggregates and complex variables are accessed by reference. All we
|
|
|
|
// need to do is realign the value, if requested
|
|
|
|
if (ArgI.getIndirectRealign()) {
|
|
|
|
llvm::Value *AlignedTemp = CreateMemTemp(Ty, "coerce");
|
|
|
|
|
|
|
|
// Copy from the incoming argument pointer to the temporary with the
|
|
|
|
// appropriate alignment.
|
|
|
|
//
|
|
|
|
// FIXME: We should have a common utility for generating an aggregate
|
|
|
|
// copy.
|
2011-07-18 12:24:23 +08:00
|
|
|
llvm::Type *I8PtrTy = Builder.getInt8PtrTy();
|
2011-01-19 09:58:38 +08:00
|
|
|
CharUnits Size = getContext().getTypeSizeInChars(Ty);
|
2011-03-10 22:02:21 +08:00
|
|
|
llvm::Value *Dst = Builder.CreateBitCast(AlignedTemp, I8PtrTy);
|
|
|
|
llvm::Value *Src = Builder.CreateBitCast(V, I8PtrTy);
|
|
|
|
Builder.CreateMemCpy(Dst,
|
|
|
|
Src,
|
2011-01-19 09:58:38 +08:00
|
|
|
llvm::ConstantInt::get(IntPtrTy,
|
|
|
|
Size.getQuantity()),
|
2010-12-30 08:13:21 +08:00
|
|
|
ArgI.getIndirectAlign(),
|
|
|
|
false);
|
2010-09-17 04:42:02 +08:00
|
|
|
V = AlignedTemp;
|
|
|
|
}
|
2009-02-05 17:16:39 +08:00
|
|
|
} else {
|
|
|
|
// Load scalar value from indirect argument.
|
2011-01-19 09:58:38 +08:00
|
|
|
CharUnits Alignment = getContext().getTypeAlignInChars(Ty);
|
|
|
|
V = EmitLoadOfScalar(V, false, Alignment.getQuantity(), Ty);
|
2011-03-09 12:27:21 +08:00
|
|
|
|
|
|
|
if (isPromoted)
|
|
|
|
V = emitArgumentDemotion(*this, Arg, V);
|
2009-02-05 17:16:39 +08:00
|
|
|
}
|
2011-03-04 04:13:15 +08:00
|
|
|
EmitParmDecl(*Arg, V, ArgNo);
|
2009-02-05 17:16:39 +08:00
|
|
|
break;
|
|
|
|
}
|
2009-06-06 17:36:29 +08:00
|
|
|
|
|
|
|
case ABIArgInfo::Extend:
|
2009-02-03 14:17:37 +08:00
|
|
|
case ABIArgInfo::Direct: {
|
2012-01-10 03:08:06 +08:00
|
|
|
// Skip the dummy padding argument.
|
|
|
|
if (ArgI.getPaddingType())
|
|
|
|
++AI;
|
|
|
|
|
Kill off the 'coerce' ABI passing form. Now 'direct' and 'extend' always
have a "coerce to" type which often matches the default lowering of Clang
type to LLVM IR type, but the coerce case can be handled by making them
not be the same.
This simplifies things and fixes issues where X86-64 abi lowering would
return coerce after making preferred types exactly match up. This caused
us to compile:
typedef float v4f32 __attribute__((__vector_size__(16)));
v4f32 foo(v4f32 X) {
return X+X;
}
into this code at -O0:
define <4 x float> @foo(<4 x float> %X.coerce) nounwind {
entry:
%retval = alloca <4 x float>, align 16 ; <<4 x float>*> [#uses=2]
%coerce = alloca <4 x float>, align 16 ; <<4 x float>*> [#uses=2]
%X.addr = alloca <4 x float>, align 16 ; <<4 x float>*> [#uses=3]
store <4 x float> %X.coerce, <4 x float>* %coerce
%X = load <4 x float>* %coerce ; <<4 x float>> [#uses=1]
store <4 x float> %X, <4 x float>* %X.addr
%tmp = load <4 x float>* %X.addr ; <<4 x float>> [#uses=1]
%tmp1 = load <4 x float>* %X.addr ; <<4 x float>> [#uses=1]
%add = fadd <4 x float> %tmp, %tmp1 ; <<4 x float>> [#uses=1]
store <4 x float> %add, <4 x float>* %retval
%0 = load <4 x float>* %retval ; <<4 x float>> [#uses=1]
ret <4 x float> %0
}
Now we get:
define <4 x float> @foo(<4 x float> %X) nounwind {
entry:
%X.addr = alloca <4 x float>, align 16 ; <<4 x float>*> [#uses=3]
store <4 x float> %X, <4 x float>* %X.addr
%tmp = load <4 x float>* %X.addr ; <<4 x float>> [#uses=1]
%tmp1 = load <4 x float>* %X.addr ; <<4 x float>> [#uses=1]
%add = fadd <4 x float> %tmp, %tmp1 ; <<4 x float>> [#uses=1]
ret <4 x float> %add
}
This implements rdar://8248065
llvm-svn: 109733
2010-07-29 14:26:06 +08:00
|
|
|
// If we have the trivial case, handle it with no muss and fuss.
|
|
|
|
if (!isa<llvm::StructType>(ArgI.getCoerceToType()) &&
|
2010-07-30 12:02:24 +08:00
|
|
|
ArgI.getCoerceToType() == ConvertType(Ty) &&
|
|
|
|
ArgI.getDirectOffset() == 0) {
|
Kill off the 'coerce' ABI passing form. Now 'direct' and 'extend' always
have a "coerce to" type which often matches the default lowering of Clang
type to LLVM IR type, but the coerce case can be handled by making them
not be the same.
This simplifies things and fixes issues where X86-64 abi lowering would
return coerce after making preferred types exactly match up. This caused
us to compile:
typedef float v4f32 __attribute__((__vector_size__(16)));
v4f32 foo(v4f32 X) {
return X+X;
}
into this code at -O0:
define <4 x float> @foo(<4 x float> %X.coerce) nounwind {
entry:
%retval = alloca <4 x float>, align 16 ; <<4 x float>*> [#uses=2]
%coerce = alloca <4 x float>, align 16 ; <<4 x float>*> [#uses=2]
%X.addr = alloca <4 x float>, align 16 ; <<4 x float>*> [#uses=3]
store <4 x float> %X.coerce, <4 x float>* %coerce
%X = load <4 x float>* %coerce ; <<4 x float>> [#uses=1]
store <4 x float> %X, <4 x float>* %X.addr
%tmp = load <4 x float>* %X.addr ; <<4 x float>> [#uses=1]
%tmp1 = load <4 x float>* %X.addr ; <<4 x float>> [#uses=1]
%add = fadd <4 x float> %tmp, %tmp1 ; <<4 x float>> [#uses=1]
store <4 x float> %add, <4 x float>* %retval
%0 = load <4 x float>* %retval ; <<4 x float>> [#uses=1]
ret <4 x float> %0
}
Now we get:
define <4 x float> @foo(<4 x float> %X) nounwind {
entry:
%X.addr = alloca <4 x float>, align 16 ; <<4 x float>*> [#uses=3]
store <4 x float> %X, <4 x float>* %X.addr
%tmp = load <4 x float>* %X.addr ; <<4 x float>> [#uses=1]
%tmp1 = load <4 x float>* %X.addr ; <<4 x float>> [#uses=1]
%add = fadd <4 x float> %tmp, %tmp1 ; <<4 x float>> [#uses=1]
ret <4 x float> %add
}
This implements rdar://8248065
llvm-svn: 109733
2010-07-29 14:26:06 +08:00
|
|
|
assert(AI != Fn->arg_end() && "Argument mismatch!");
|
|
|
|
llvm::Value *V = AI;
|
2010-10-19 14:39:39 +08:00
|
|
|
|
2010-03-27 08:47:27 +08:00
|
|
|
if (Arg->getType().isRestrictQualified())
|
|
|
|
AI->addAttr(llvm::Attribute::NoAlias);
|
|
|
|
|
fix rdar://9780211 - Clang crashes with an assertion failure building WKView.mm from WebKit
This is something of a hack, the problem is as follows:
1. we instantiate both copied of RetainPtr with the two different argument types
(an id and protocol-qualified id).
2. We refer to the ctor of one of the instantiations when introducing global "x",
this causes us to emit an llvm::Function for a prototype whose "this" has type
"RetainPtr<id<bork> >*".
3. We refer to the ctor of the other instantiation when introducing global "y",
however, because it *mangles to the same name as the other ctor* we just use
a bitcasted version of the llvm::Function we previously emitted.
4. We emit deferred declarations, causing us to emit the body of the ctor, however
the body we emit is for RetainPtr<id>, which expects its 'this' to have an IR
type of "RetainPtr<id>*".
Because of the mangling collision, we don't have this case, and explode.
This is really some sort of weird AST invariant violation or something, but hey
a bitcast makes the pain go away.
llvm-svn: 135572
2011-07-20 14:29:00 +08:00
|
|
|
// Ensure the argument is the correct type.
|
|
|
|
if (V->getType() != ArgI.getCoerceToType())
|
|
|
|
V = Builder.CreateBitCast(V, ArgI.getCoerceToType());
|
|
|
|
|
2011-03-09 12:27:21 +08:00
|
|
|
if (isPromoted)
|
|
|
|
V = emitArgumentDemotion(*this, Arg, V);
|
fix rdar://9780211 - Clang crashes with an assertion failure building WKView.mm from WebKit
This is something of a hack, the problem is as follows:
1. we instantiate both copied of RetainPtr with the two different argument types
(an id and protocol-qualified id).
2. We refer to the ctor of one of the instantiations when introducing global "x",
this causes us to emit an llvm::Function for a prototype whose "this" has type
"RetainPtr<id<bork> >*".
3. We refer to the ctor of the other instantiation when introducing global "y",
however, because it *mangles to the same name as the other ctor* we just use
a bitcasted version of the llvm::Function we previously emitted.
4. We emit deferred declarations, causing us to emit the body of the ctor, however
the body we emit is for RetainPtr<id>, which expects its 'this' to have an IR
type of "RetainPtr<id>*".
Because of the mangling collision, we don't have this case, and explode.
This is really some sort of weird AST invariant violation or something, but hey
a bitcast makes the pain go away.
llvm-svn: 135572
2011-07-20 14:29:00 +08:00
|
|
|
|
2011-03-04 04:13:15 +08:00
|
|
|
EmitParmDecl(*Arg, V, ArgNo);
|
Kill off the 'coerce' ABI passing form. Now 'direct' and 'extend' always
have a "coerce to" type which often matches the default lowering of Clang
type to LLVM IR type, but the coerce case can be handled by making them
not be the same.
This simplifies things and fixes issues where X86-64 abi lowering would
return coerce after making preferred types exactly match up. This caused
us to compile:
typedef float v4f32 __attribute__((__vector_size__(16)));
v4f32 foo(v4f32 X) {
return X+X;
}
into this code at -O0:
define <4 x float> @foo(<4 x float> %X.coerce) nounwind {
entry:
%retval = alloca <4 x float>, align 16 ; <<4 x float>*> [#uses=2]
%coerce = alloca <4 x float>, align 16 ; <<4 x float>*> [#uses=2]
%X.addr = alloca <4 x float>, align 16 ; <<4 x float>*> [#uses=3]
store <4 x float> %X.coerce, <4 x float>* %coerce
%X = load <4 x float>* %coerce ; <<4 x float>> [#uses=1]
store <4 x float> %X, <4 x float>* %X.addr
%tmp = load <4 x float>* %X.addr ; <<4 x float>> [#uses=1]
%tmp1 = load <4 x float>* %X.addr ; <<4 x float>> [#uses=1]
%add = fadd <4 x float> %tmp, %tmp1 ; <<4 x float>> [#uses=1]
store <4 x float> %add, <4 x float>* %retval
%0 = load <4 x float>* %retval ; <<4 x float>> [#uses=1]
ret <4 x float> %0
}
Now we get:
define <4 x float> @foo(<4 x float> %X) nounwind {
entry:
%X.addr = alloca <4 x float>, align 16 ; <<4 x float>*> [#uses=3]
store <4 x float> %X, <4 x float>* %X.addr
%tmp = load <4 x float>* %X.addr ; <<4 x float>> [#uses=1]
%tmp1 = load <4 x float>* %X.addr ; <<4 x float>> [#uses=1]
%add = fadd <4 x float> %tmp, %tmp1 ; <<4 x float>> [#uses=1]
ret <4 x float> %add
}
This implements rdar://8248065
llvm-svn: 109733
2010-07-29 14:26:06 +08:00
|
|
|
break;
|
2008-09-10 07:27:19 +08:00
|
|
|
}
|
2009-09-09 23:08:12 +08:00
|
|
|
|
2012-02-10 17:30:15 +08:00
|
|
|
llvm::AllocaInst *Alloca = CreateMemTemp(Ty, Arg->getName());
|
2010-10-19 14:39:39 +08:00
|
|
|
|
2010-07-29 02:24:28 +08:00
|
|
|
// The alignment we need to use is the max of the requested alignment for
|
|
|
|
// the argument plus the alignment required by our access code below.
|
2010-10-19 14:39:39 +08:00
|
|
|
unsigned AlignmentToUse =
|
2011-02-08 16:22:06 +08:00
|
|
|
CGM.getTargetData().getABITypeAlignment(ArgI.getCoerceToType());
|
2010-07-29 02:24:28 +08:00
|
|
|
AlignmentToUse = std::max(AlignmentToUse,
|
|
|
|
(unsigned)getContext().getDeclAlign(Arg).getQuantity());
|
2010-10-19 14:39:39 +08:00
|
|
|
|
2010-07-29 02:24:28 +08:00
|
|
|
Alloca->setAlignment(AlignmentToUse);
|
2010-07-06 04:21:00 +08:00
|
|
|
llvm::Value *V = Alloca;
|
2010-07-30 12:02:24 +08:00
|
|
|
llvm::Value *Ptr = V; // Pointer to store into.
|
2010-10-19 14:39:39 +08:00
|
|
|
|
2010-07-30 12:02:24 +08:00
|
|
|
// If the value is offset in memory, apply the offset now.
|
|
|
|
if (unsigned Offs = ArgI.getDirectOffset()) {
|
|
|
|
Ptr = Builder.CreateBitCast(Ptr, Builder.getInt8PtrTy());
|
|
|
|
Ptr = Builder.CreateConstGEP1_32(Ptr, Offs);
|
2010-10-19 14:39:39 +08:00
|
|
|
Ptr = Builder.CreateBitCast(Ptr,
|
2010-07-30 12:02:24 +08:00
|
|
|
llvm::PointerType::getUnqual(ArgI.getCoerceToType()));
|
|
|
|
}
|
2010-10-19 14:39:39 +08:00
|
|
|
|
Change CGCall to handle the "coerce" case where the coerce-to type
is a FCA to pass each of the elements as individual scalars. This
produces code fast isel is less likely to reject and is easier on
the optimizers.
For example, before we would compile:
struct DeclGroup { long NumDecls; char * Y; };
char * foo(DeclGroup D) {
return D.NumDecls+D.Y;
}
to:
%struct.DeclGroup = type { i64, i64 }
define i64 @_Z3foo9DeclGroup(%struct.DeclGroup) nounwind {
entry:
%D = alloca %struct.DeclGroup, align 8 ; <%struct.DeclGroup*> [#uses=3]
store %struct.DeclGroup %0, %struct.DeclGroup* %D, align 1
%tmp = getelementptr inbounds %struct.DeclGroup* %D, i32 0, i32 0 ; <i64*> [#uses=1]
%tmp1 = load i64* %tmp ; <i64> [#uses=1]
%tmp2 = getelementptr inbounds %struct.DeclGroup* %D, i32 0, i32 1 ; <i64*> [#uses=1]
%tmp3 = load i64* %tmp2 ; <i64> [#uses=1]
%add = add nsw i64 %tmp1, %tmp3 ; <i64> [#uses=1]
ret i64 %add
}
Now we get:
%0 = type { i64, i64 }
%struct.DeclGroup = type { i64, i8* }
define i8* @_Z3foo9DeclGroup(i64, i64) nounwind {
entry:
%D = alloca %struct.DeclGroup, align 8 ; <%struct.DeclGroup*> [#uses=3]
%2 = insertvalue %0 undef, i64 %0, 0 ; <%0> [#uses=1]
%3 = insertvalue %0 %2, i64 %1, 1 ; <%0> [#uses=1]
%4 = bitcast %struct.DeclGroup* %D to %0* ; <%0*> [#uses=1]
store %0 %3, %0* %4, align 1
%tmp = getelementptr inbounds %struct.DeclGroup* %D, i32 0, i32 0 ; <i64*> [#uses=1]
%tmp1 = load i64* %tmp ; <i64> [#uses=1]
%tmp2 = getelementptr inbounds %struct.DeclGroup* %D, i32 0, i32 1 ; <i8**> [#uses=1]
%tmp3 = load i8** %tmp2 ; <i8*> [#uses=1]
%add.ptr = getelementptr inbounds i8* %tmp3, i64 %tmp1 ; <i8*> [#uses=1]
ret i8* %add.ptr
}
Elimination of the FCA inside the function is still-to-come.
llvm-svn: 107099
2010-06-29 07:44:11 +08:00
|
|
|
// If the coerce-to type is a first class aggregate, we flatten it and
|
|
|
|
// pass the elements. Either way is semantically identical, but fast-isel
|
|
|
|
// and the optimizer generally likes scalar values better than FCAs.
|
2012-02-10 17:30:15 +08:00
|
|
|
llvm::StructType *STy = dyn_cast<llvm::StructType>(ArgI.getCoerceToType());
|
|
|
|
if (STy && STy->getNumElements() > 1) {
|
|
|
|
uint64_t SrcSize = CGM.getTargetData().getTypeAllocSize(STy);
|
|
|
|
llvm::Type *DstTy =
|
|
|
|
cast<llvm::PointerType>(Ptr->getType())->getElementType();
|
|
|
|
uint64_t DstSize = CGM.getTargetData().getTypeAllocSize(DstTy);
|
|
|
|
|
|
|
|
if (SrcSize <= DstSize) {
|
|
|
|
Ptr = Builder.CreateBitCast(Ptr, llvm::PointerType::getUnqual(STy));
|
|
|
|
|
|
|
|
for (unsigned i = 0, e = STy->getNumElements(); i != e; ++i) {
|
|
|
|
assert(AI != Fn->arg_end() && "Argument mismatch!");
|
|
|
|
AI->setName(Arg->getName() + ".coerce" + Twine(i));
|
|
|
|
llvm::Value *EltPtr = Builder.CreateConstGEP2_32(Ptr, 0, i);
|
|
|
|
Builder.CreateStore(AI++, EltPtr);
|
|
|
|
}
|
|
|
|
} else {
|
|
|
|
llvm::AllocaInst *TempAlloca =
|
|
|
|
CreateTempAlloca(ArgI.getCoerceToType(), "coerce");
|
|
|
|
TempAlloca->setAlignment(AlignmentToUse);
|
|
|
|
llvm::Value *TempV = TempAlloca;
|
|
|
|
|
|
|
|
for (unsigned i = 0, e = STy->getNumElements(); i != e; ++i) {
|
|
|
|
assert(AI != Fn->arg_end() && "Argument mismatch!");
|
|
|
|
AI->setName(Arg->getName() + ".coerce" + Twine(i));
|
|
|
|
llvm::Value *EltPtr = Builder.CreateConstGEP2_32(TempV, 0, i);
|
|
|
|
Builder.CreateStore(AI++, EltPtr);
|
|
|
|
}
|
|
|
|
|
|
|
|
Builder.CreateMemCpy(Ptr, TempV, DstSize, AlignmentToUse);
|
Change CGCall to handle the "coerce" case where the coerce-to type
is a FCA to pass each of the elements as individual scalars. This
produces code fast isel is less likely to reject and is easier on
the optimizers.
For example, before we would compile:
struct DeclGroup { long NumDecls; char * Y; };
char * foo(DeclGroup D) {
return D.NumDecls+D.Y;
}
to:
%struct.DeclGroup = type { i64, i64 }
define i64 @_Z3foo9DeclGroup(%struct.DeclGroup) nounwind {
entry:
%D = alloca %struct.DeclGroup, align 8 ; <%struct.DeclGroup*> [#uses=3]
store %struct.DeclGroup %0, %struct.DeclGroup* %D, align 1
%tmp = getelementptr inbounds %struct.DeclGroup* %D, i32 0, i32 0 ; <i64*> [#uses=1]
%tmp1 = load i64* %tmp ; <i64> [#uses=1]
%tmp2 = getelementptr inbounds %struct.DeclGroup* %D, i32 0, i32 1 ; <i64*> [#uses=1]
%tmp3 = load i64* %tmp2 ; <i64> [#uses=1]
%add = add nsw i64 %tmp1, %tmp3 ; <i64> [#uses=1]
ret i64 %add
}
Now we get:
%0 = type { i64, i64 }
%struct.DeclGroup = type { i64, i8* }
define i8* @_Z3foo9DeclGroup(i64, i64) nounwind {
entry:
%D = alloca %struct.DeclGroup, align 8 ; <%struct.DeclGroup*> [#uses=3]
%2 = insertvalue %0 undef, i64 %0, 0 ; <%0> [#uses=1]
%3 = insertvalue %0 %2, i64 %1, 1 ; <%0> [#uses=1]
%4 = bitcast %struct.DeclGroup* %D to %0* ; <%0*> [#uses=1]
store %0 %3, %0* %4, align 1
%tmp = getelementptr inbounds %struct.DeclGroup* %D, i32 0, i32 0 ; <i64*> [#uses=1]
%tmp1 = load i64* %tmp ; <i64> [#uses=1]
%tmp2 = getelementptr inbounds %struct.DeclGroup* %D, i32 0, i32 1 ; <i8**> [#uses=1]
%tmp3 = load i8** %tmp2 ; <i8*> [#uses=1]
%add.ptr = getelementptr inbounds i8* %tmp3, i64 %tmp1 ; <i8*> [#uses=1]
ret i8* %add.ptr
}
Elimination of the FCA inside the function is still-to-come.
llvm-svn: 107099
2010-06-29 07:44:11 +08:00
|
|
|
}
|
|
|
|
} else {
|
make the argument passing stuff in the FCA case smarter still, by
avoiding making the FCA at all when the types exactly line up. For
example, before we made:
%struct.DeclGroup = type { i64, i64 }
define i64 @_Z3foo9DeclGroup(i64, i64) nounwind {
entry:
%D = alloca %struct.DeclGroup, align 8 ; <%struct.DeclGroup*> [#uses=3]
%2 = insertvalue %struct.DeclGroup undef, i64 %0, 0 ; <%struct.DeclGroup> [#uses=1]
%3 = insertvalue %struct.DeclGroup %2, i64 %1, 1 ; <%struct.DeclGroup> [#uses=1]
store %struct.DeclGroup %3, %struct.DeclGroup* %D
%tmp = getelementptr inbounds %struct.DeclGroup* %D, i32 0, i32 0 ; <i64*> [#uses=1]
%tmp1 = load i64* %tmp ; <i64> [#uses=1]
%tmp2 = getelementptr inbounds %struct.DeclGroup* %D, i32 0, i32 1 ; <i64*> [#uses=1]
%tmp3 = load i64* %tmp2 ; <i64> [#uses=1]
%add = add nsw i64 %tmp1, %tmp3 ; <i64> [#uses=1]
ret i64 %add
}
... which has the pointless insertvalue, which fastisel hates, now we
make:
%struct.DeclGroup = type { i64, i64 }
define i64 @_Z3foo9DeclGroup(i64, i64) nounwind {
entry:
%D = alloca %struct.DeclGroup, align 8 ; <%struct.DeclGroup*> [#uses=4]
%2 = getelementptr %struct.DeclGroup* %D, i32 0, i32 0 ; <i64*> [#uses=1]
store i64 %0, i64* %2
%3 = getelementptr %struct.DeclGroup* %D, i32 0, i32 1 ; <i64*> [#uses=1]
store i64 %1, i64* %3
%tmp = getelementptr inbounds %struct.DeclGroup* %D, i32 0, i32 0 ; <i64*> [#uses=1]
%tmp1 = load i64* %tmp ; <i64> [#uses=1]
%tmp2 = getelementptr inbounds %struct.DeclGroup* %D, i32 0, i32 1 ; <i64*> [#uses=1]
%tmp3 = load i64* %tmp2 ; <i64> [#uses=1]
%add = add nsw i64 %tmp1, %tmp3 ; <i64> [#uses=1]
ret i64 %add
}
This only kicks in when x86-64 abi lowering decides it likes us.
llvm-svn: 107104
2010-06-29 08:06:42 +08:00
|
|
|
// Simple case, just do a coerced store of the argument into the alloca.
|
Change CGCall to handle the "coerce" case where the coerce-to type
is a FCA to pass each of the elements as individual scalars. This
produces code fast isel is less likely to reject and is easier on
the optimizers.
For example, before we would compile:
struct DeclGroup { long NumDecls; char * Y; };
char * foo(DeclGroup D) {
return D.NumDecls+D.Y;
}
to:
%struct.DeclGroup = type { i64, i64 }
define i64 @_Z3foo9DeclGroup(%struct.DeclGroup) nounwind {
entry:
%D = alloca %struct.DeclGroup, align 8 ; <%struct.DeclGroup*> [#uses=3]
store %struct.DeclGroup %0, %struct.DeclGroup* %D, align 1
%tmp = getelementptr inbounds %struct.DeclGroup* %D, i32 0, i32 0 ; <i64*> [#uses=1]
%tmp1 = load i64* %tmp ; <i64> [#uses=1]
%tmp2 = getelementptr inbounds %struct.DeclGroup* %D, i32 0, i32 1 ; <i64*> [#uses=1]
%tmp3 = load i64* %tmp2 ; <i64> [#uses=1]
%add = add nsw i64 %tmp1, %tmp3 ; <i64> [#uses=1]
ret i64 %add
}
Now we get:
%0 = type { i64, i64 }
%struct.DeclGroup = type { i64, i8* }
define i8* @_Z3foo9DeclGroup(i64, i64) nounwind {
entry:
%D = alloca %struct.DeclGroup, align 8 ; <%struct.DeclGroup*> [#uses=3]
%2 = insertvalue %0 undef, i64 %0, 0 ; <%0> [#uses=1]
%3 = insertvalue %0 %2, i64 %1, 1 ; <%0> [#uses=1]
%4 = bitcast %struct.DeclGroup* %D to %0* ; <%0*> [#uses=1]
store %0 %3, %0* %4, align 1
%tmp = getelementptr inbounds %struct.DeclGroup* %D, i32 0, i32 0 ; <i64*> [#uses=1]
%tmp1 = load i64* %tmp ; <i64> [#uses=1]
%tmp2 = getelementptr inbounds %struct.DeclGroup* %D, i32 0, i32 1 ; <i8**> [#uses=1]
%tmp3 = load i8** %tmp2 ; <i8*> [#uses=1]
%add.ptr = getelementptr inbounds i8* %tmp3, i64 %tmp1 ; <i8*> [#uses=1]
ret i8* %add.ptr
}
Elimination of the FCA inside the function is still-to-come.
llvm-svn: 107099
2010-06-29 07:44:11 +08:00
|
|
|
assert(AI != Fn->arg_end() && "Argument mismatch!");
|
2010-06-29 08:14:52 +08:00
|
|
|
AI->setName(Arg->getName() + ".coerce");
|
2010-07-30 12:02:24 +08:00
|
|
|
CreateCoercedStore(AI++, Ptr, /*DestIsVolatile=*/false, *this);
|
Change CGCall to handle the "coerce" case where the coerce-to type
is a FCA to pass each of the elements as individual scalars. This
produces code fast isel is less likely to reject and is easier on
the optimizers.
For example, before we would compile:
struct DeclGroup { long NumDecls; char * Y; };
char * foo(DeclGroup D) {
return D.NumDecls+D.Y;
}
to:
%struct.DeclGroup = type { i64, i64 }
define i64 @_Z3foo9DeclGroup(%struct.DeclGroup) nounwind {
entry:
%D = alloca %struct.DeclGroup, align 8 ; <%struct.DeclGroup*> [#uses=3]
store %struct.DeclGroup %0, %struct.DeclGroup* %D, align 1
%tmp = getelementptr inbounds %struct.DeclGroup* %D, i32 0, i32 0 ; <i64*> [#uses=1]
%tmp1 = load i64* %tmp ; <i64> [#uses=1]
%tmp2 = getelementptr inbounds %struct.DeclGroup* %D, i32 0, i32 1 ; <i64*> [#uses=1]
%tmp3 = load i64* %tmp2 ; <i64> [#uses=1]
%add = add nsw i64 %tmp1, %tmp3 ; <i64> [#uses=1]
ret i64 %add
}
Now we get:
%0 = type { i64, i64 }
%struct.DeclGroup = type { i64, i8* }
define i8* @_Z3foo9DeclGroup(i64, i64) nounwind {
entry:
%D = alloca %struct.DeclGroup, align 8 ; <%struct.DeclGroup*> [#uses=3]
%2 = insertvalue %0 undef, i64 %0, 0 ; <%0> [#uses=1]
%3 = insertvalue %0 %2, i64 %1, 1 ; <%0> [#uses=1]
%4 = bitcast %struct.DeclGroup* %D to %0* ; <%0*> [#uses=1]
store %0 %3, %0* %4, align 1
%tmp = getelementptr inbounds %struct.DeclGroup* %D, i32 0, i32 0 ; <i64*> [#uses=1]
%tmp1 = load i64* %tmp ; <i64> [#uses=1]
%tmp2 = getelementptr inbounds %struct.DeclGroup* %D, i32 0, i32 1 ; <i8**> [#uses=1]
%tmp3 = load i8** %tmp2 ; <i8*> [#uses=1]
%add.ptr = getelementptr inbounds i8* %tmp3, i64 %tmp1 ; <i8*> [#uses=1]
ret i8* %add.ptr
}
Elimination of the FCA inside the function is still-to-come.
llvm-svn: 107099
2010-06-29 07:44:11 +08:00
|
|
|
}
|
2010-10-19 14:39:39 +08:00
|
|
|
|
|
|
|
|
2009-02-04 03:12:28 +08:00
|
|
|
// Match to what EmitParmDecl is expecting for this type.
|
2009-02-04 15:22:24 +08:00
|
|
|
if (!CodeGenFunction::hasAggregateLLVMType(Ty)) {
|
2010-08-21 10:24:36 +08:00
|
|
|
V = EmitLoadOfScalar(V, false, AlignmentToUse, Ty);
|
2011-03-09 12:27:21 +08:00
|
|
|
if (isPromoted)
|
|
|
|
V = emitArgumentDemotion(*this, Arg, V);
|
2009-02-04 15:22:24 +08:00
|
|
|
}
|
2011-03-04 04:13:15 +08:00
|
|
|
EmitParmDecl(*Arg, V, ArgNo);
|
Change CGCall to handle the "coerce" case where the coerce-to type
is a FCA to pass each of the elements as individual scalars. This
produces code fast isel is less likely to reject and is easier on
the optimizers.
For example, before we would compile:
struct DeclGroup { long NumDecls; char * Y; };
char * foo(DeclGroup D) {
return D.NumDecls+D.Y;
}
to:
%struct.DeclGroup = type { i64, i64 }
define i64 @_Z3foo9DeclGroup(%struct.DeclGroup) nounwind {
entry:
%D = alloca %struct.DeclGroup, align 8 ; <%struct.DeclGroup*> [#uses=3]
store %struct.DeclGroup %0, %struct.DeclGroup* %D, align 1
%tmp = getelementptr inbounds %struct.DeclGroup* %D, i32 0, i32 0 ; <i64*> [#uses=1]
%tmp1 = load i64* %tmp ; <i64> [#uses=1]
%tmp2 = getelementptr inbounds %struct.DeclGroup* %D, i32 0, i32 1 ; <i64*> [#uses=1]
%tmp3 = load i64* %tmp2 ; <i64> [#uses=1]
%add = add nsw i64 %tmp1, %tmp3 ; <i64> [#uses=1]
ret i64 %add
}
Now we get:
%0 = type { i64, i64 }
%struct.DeclGroup = type { i64, i8* }
define i8* @_Z3foo9DeclGroup(i64, i64) nounwind {
entry:
%D = alloca %struct.DeclGroup, align 8 ; <%struct.DeclGroup*> [#uses=3]
%2 = insertvalue %0 undef, i64 %0, 0 ; <%0> [#uses=1]
%3 = insertvalue %0 %2, i64 %1, 1 ; <%0> [#uses=1]
%4 = bitcast %struct.DeclGroup* %D to %0* ; <%0*> [#uses=1]
store %0 %3, %0* %4, align 1
%tmp = getelementptr inbounds %struct.DeclGroup* %D, i32 0, i32 0 ; <i64*> [#uses=1]
%tmp1 = load i64* %tmp ; <i64> [#uses=1]
%tmp2 = getelementptr inbounds %struct.DeclGroup* %D, i32 0, i32 1 ; <i8**> [#uses=1]
%tmp3 = load i8** %tmp2 ; <i8*> [#uses=1]
%add.ptr = getelementptr inbounds i8* %tmp3, i64 %tmp1 ; <i8*> [#uses=1]
ret i8* %add.ptr
}
Elimination of the FCA inside the function is still-to-come.
llvm-svn: 107099
2010-06-29 07:44:11 +08:00
|
|
|
continue; // Skip ++AI increment, already done.
|
2009-02-04 03:12:28 +08:00
|
|
|
}
|
Kill off the 'coerce' ABI passing form. Now 'direct' and 'extend' always
have a "coerce to" type which often matches the default lowering of Clang
type to LLVM IR type, but the coerce case can be handled by making them
not be the same.
This simplifies things and fixes issues where X86-64 abi lowering would
return coerce after making preferred types exactly match up. This caused
us to compile:
typedef float v4f32 __attribute__((__vector_size__(16)));
v4f32 foo(v4f32 X) {
return X+X;
}
into this code at -O0:
define <4 x float> @foo(<4 x float> %X.coerce) nounwind {
entry:
%retval = alloca <4 x float>, align 16 ; <<4 x float>*> [#uses=2]
%coerce = alloca <4 x float>, align 16 ; <<4 x float>*> [#uses=2]
%X.addr = alloca <4 x float>, align 16 ; <<4 x float>*> [#uses=3]
store <4 x float> %X.coerce, <4 x float>* %coerce
%X = load <4 x float>* %coerce ; <<4 x float>> [#uses=1]
store <4 x float> %X, <4 x float>* %X.addr
%tmp = load <4 x float>* %X.addr ; <<4 x float>> [#uses=1]
%tmp1 = load <4 x float>* %X.addr ; <<4 x float>> [#uses=1]
%add = fadd <4 x float> %tmp, %tmp1 ; <<4 x float>> [#uses=1]
store <4 x float> %add, <4 x float>* %retval
%0 = load <4 x float>* %retval ; <<4 x float>> [#uses=1]
ret <4 x float> %0
}
Now we get:
define <4 x float> @foo(<4 x float> %X) nounwind {
entry:
%X.addr = alloca <4 x float>, align 16 ; <<4 x float>*> [#uses=3]
store <4 x float> %X, <4 x float>* %X.addr
%tmp = load <4 x float>* %X.addr ; <<4 x float>> [#uses=1]
%tmp1 = load <4 x float>* %X.addr ; <<4 x float>> [#uses=1]
%add = fadd <4 x float> %tmp, %tmp1 ; <<4 x float>> [#uses=1]
ret <4 x float> %add
}
This implements rdar://8248065
llvm-svn: 109733
2010-07-29 14:26:06 +08:00
|
|
|
|
|
|
|
case ABIArgInfo::Expand: {
|
|
|
|
// If this structure was expanded into multiple arguments then
|
|
|
|
// we need to create a temporary and reconstruct it from the
|
|
|
|
// arguments.
|
2011-11-04 05:39:02 +08:00
|
|
|
llvm::AllocaInst *Alloca = CreateMemTemp(Ty);
|
2011-12-03 12:14:32 +08:00
|
|
|
CharUnits Align = getContext().getDeclAlign(Arg);
|
|
|
|
Alloca->setAlignment(Align.getQuantity());
|
|
|
|
LValue LV = MakeAddrLValue(Alloca, Ty, Align);
|
2011-11-04 05:39:02 +08:00
|
|
|
llvm::Function::arg_iterator End = ExpandTypeFromArgs(Ty, LV, AI);
|
|
|
|
EmitParmDecl(*Arg, Alloca, ArgNo);
|
Kill off the 'coerce' ABI passing form. Now 'direct' and 'extend' always
have a "coerce to" type which often matches the default lowering of Clang
type to LLVM IR type, but the coerce case can be handled by making them
not be the same.
This simplifies things and fixes issues where X86-64 abi lowering would
return coerce after making preferred types exactly match up. This caused
us to compile:
typedef float v4f32 __attribute__((__vector_size__(16)));
v4f32 foo(v4f32 X) {
return X+X;
}
into this code at -O0:
define <4 x float> @foo(<4 x float> %X.coerce) nounwind {
entry:
%retval = alloca <4 x float>, align 16 ; <<4 x float>*> [#uses=2]
%coerce = alloca <4 x float>, align 16 ; <<4 x float>*> [#uses=2]
%X.addr = alloca <4 x float>, align 16 ; <<4 x float>*> [#uses=3]
store <4 x float> %X.coerce, <4 x float>* %coerce
%X = load <4 x float>* %coerce ; <<4 x float>> [#uses=1]
store <4 x float> %X, <4 x float>* %X.addr
%tmp = load <4 x float>* %X.addr ; <<4 x float>> [#uses=1]
%tmp1 = load <4 x float>* %X.addr ; <<4 x float>> [#uses=1]
%add = fadd <4 x float> %tmp, %tmp1 ; <<4 x float>> [#uses=1]
store <4 x float> %add, <4 x float>* %retval
%0 = load <4 x float>* %retval ; <<4 x float>> [#uses=1]
ret <4 x float> %0
}
Now we get:
define <4 x float> @foo(<4 x float> %X) nounwind {
entry:
%X.addr = alloca <4 x float>, align 16 ; <<4 x float>*> [#uses=3]
store <4 x float> %X, <4 x float>* %X.addr
%tmp = load <4 x float>* %X.addr ; <<4 x float>> [#uses=1]
%tmp1 = load <4 x float>* %X.addr ; <<4 x float>> [#uses=1]
%add = fadd <4 x float> %tmp, %tmp1 ; <<4 x float>> [#uses=1]
ret <4 x float> %add
}
This implements rdar://8248065
llvm-svn: 109733
2010-07-29 14:26:06 +08:00
|
|
|
|
|
|
|
// Name the arguments used in expansion and increment AI.
|
|
|
|
unsigned Index = 0;
|
|
|
|
for (; AI != End; ++AI, ++Index)
|
2011-07-23 18:55:15 +08:00
|
|
|
AI->setName(Arg->getName() + "." + Twine(Index));
|
Kill off the 'coerce' ABI passing form. Now 'direct' and 'extend' always
have a "coerce to" type which often matches the default lowering of Clang
type to LLVM IR type, but the coerce case can be handled by making them
not be the same.
This simplifies things and fixes issues where X86-64 abi lowering would
return coerce after making preferred types exactly match up. This caused
us to compile:
typedef float v4f32 __attribute__((__vector_size__(16)));
v4f32 foo(v4f32 X) {
return X+X;
}
into this code at -O0:
define <4 x float> @foo(<4 x float> %X.coerce) nounwind {
entry:
%retval = alloca <4 x float>, align 16 ; <<4 x float>*> [#uses=2]
%coerce = alloca <4 x float>, align 16 ; <<4 x float>*> [#uses=2]
%X.addr = alloca <4 x float>, align 16 ; <<4 x float>*> [#uses=3]
store <4 x float> %X.coerce, <4 x float>* %coerce
%X = load <4 x float>* %coerce ; <<4 x float>> [#uses=1]
store <4 x float> %X, <4 x float>* %X.addr
%tmp = load <4 x float>* %X.addr ; <<4 x float>> [#uses=1]
%tmp1 = load <4 x float>* %X.addr ; <<4 x float>> [#uses=1]
%add = fadd <4 x float> %tmp, %tmp1 ; <<4 x float>> [#uses=1]
store <4 x float> %add, <4 x float>* %retval
%0 = load <4 x float>* %retval ; <<4 x float>> [#uses=1]
ret <4 x float> %0
}
Now we get:
define <4 x float> @foo(<4 x float> %X) nounwind {
entry:
%X.addr = alloca <4 x float>, align 16 ; <<4 x float>*> [#uses=3]
store <4 x float> %X, <4 x float>* %X.addr
%tmp = load <4 x float>* %X.addr ; <<4 x float>> [#uses=1]
%tmp1 = load <4 x float>* %X.addr ; <<4 x float>> [#uses=1]
%add = fadd <4 x float> %tmp, %tmp1 ; <<4 x float>> [#uses=1]
ret <4 x float> %add
}
This implements rdar://8248065
llvm-svn: 109733
2010-07-29 14:26:06 +08:00
|
|
|
continue;
|
|
|
|
}
|
|
|
|
|
|
|
|
case ABIArgInfo::Ignore:
|
|
|
|
// Initialize the local variable appropriately.
|
|
|
|
if (hasAggregateLLVMType(Ty))
|
2011-03-04 04:13:15 +08:00
|
|
|
EmitParmDecl(*Arg, CreateMemTemp(Ty), ArgNo);
|
Kill off the 'coerce' ABI passing form. Now 'direct' and 'extend' always
have a "coerce to" type which often matches the default lowering of Clang
type to LLVM IR type, but the coerce case can be handled by making them
not be the same.
This simplifies things and fixes issues where X86-64 abi lowering would
return coerce after making preferred types exactly match up. This caused
us to compile:
typedef float v4f32 __attribute__((__vector_size__(16)));
v4f32 foo(v4f32 X) {
return X+X;
}
into this code at -O0:
define <4 x float> @foo(<4 x float> %X.coerce) nounwind {
entry:
%retval = alloca <4 x float>, align 16 ; <<4 x float>*> [#uses=2]
%coerce = alloca <4 x float>, align 16 ; <<4 x float>*> [#uses=2]
%X.addr = alloca <4 x float>, align 16 ; <<4 x float>*> [#uses=3]
store <4 x float> %X.coerce, <4 x float>* %coerce
%X = load <4 x float>* %coerce ; <<4 x float>> [#uses=1]
store <4 x float> %X, <4 x float>* %X.addr
%tmp = load <4 x float>* %X.addr ; <<4 x float>> [#uses=1]
%tmp1 = load <4 x float>* %X.addr ; <<4 x float>> [#uses=1]
%add = fadd <4 x float> %tmp, %tmp1 ; <<4 x float>> [#uses=1]
store <4 x float> %add, <4 x float>* %retval
%0 = load <4 x float>* %retval ; <<4 x float>> [#uses=1]
ret <4 x float> %0
}
Now we get:
define <4 x float> @foo(<4 x float> %X) nounwind {
entry:
%X.addr = alloca <4 x float>, align 16 ; <<4 x float>*> [#uses=3]
store <4 x float> %X, <4 x float>* %X.addr
%tmp = load <4 x float>* %X.addr ; <<4 x float>> [#uses=1]
%tmp1 = load <4 x float>* %X.addr ; <<4 x float>> [#uses=1]
%add = fadd <4 x float> %tmp, %tmp1 ; <<4 x float>> [#uses=1]
ret <4 x float> %add
}
This implements rdar://8248065
llvm-svn: 109733
2010-07-29 14:26:06 +08:00
|
|
|
else
|
2011-03-04 04:13:15 +08:00
|
|
|
EmitParmDecl(*Arg, llvm::UndefValue::get(ConvertType(Arg->getType())),
|
|
|
|
ArgNo);
|
Kill off the 'coerce' ABI passing form. Now 'direct' and 'extend' always
have a "coerce to" type which often matches the default lowering of Clang
type to LLVM IR type, but the coerce case can be handled by making them
not be the same.
This simplifies things and fixes issues where X86-64 abi lowering would
return coerce after making preferred types exactly match up. This caused
us to compile:
typedef float v4f32 __attribute__((__vector_size__(16)));
v4f32 foo(v4f32 X) {
return X+X;
}
into this code at -O0:
define <4 x float> @foo(<4 x float> %X.coerce) nounwind {
entry:
%retval = alloca <4 x float>, align 16 ; <<4 x float>*> [#uses=2]
%coerce = alloca <4 x float>, align 16 ; <<4 x float>*> [#uses=2]
%X.addr = alloca <4 x float>, align 16 ; <<4 x float>*> [#uses=3]
store <4 x float> %X.coerce, <4 x float>* %coerce
%X = load <4 x float>* %coerce ; <<4 x float>> [#uses=1]
store <4 x float> %X, <4 x float>* %X.addr
%tmp = load <4 x float>* %X.addr ; <<4 x float>> [#uses=1]
%tmp1 = load <4 x float>* %X.addr ; <<4 x float>> [#uses=1]
%add = fadd <4 x float> %tmp, %tmp1 ; <<4 x float>> [#uses=1]
store <4 x float> %add, <4 x float>* %retval
%0 = load <4 x float>* %retval ; <<4 x float>> [#uses=1]
ret <4 x float> %0
}
Now we get:
define <4 x float> @foo(<4 x float> %X) nounwind {
entry:
%X.addr = alloca <4 x float>, align 16 ; <<4 x float>*> [#uses=3]
store <4 x float> %X, <4 x float>* %X.addr
%tmp = load <4 x float>* %X.addr ; <<4 x float>> [#uses=1]
%tmp1 = load <4 x float>* %X.addr ; <<4 x float>> [#uses=1]
%add = fadd <4 x float> %tmp, %tmp1 ; <<4 x float>> [#uses=1]
ret <4 x float> %add
}
This implements rdar://8248065
llvm-svn: 109733
2010-07-29 14:26:06 +08:00
|
|
|
|
|
|
|
// Skip increment, no matching LLVM parameter.
|
|
|
|
continue;
|
2008-09-11 09:48:57 +08:00
|
|
|
}
|
2008-09-17 08:51:38 +08:00
|
|
|
|
|
|
|
++AI;
|
2008-09-10 07:27:19 +08:00
|
|
|
}
|
|
|
|
assert(AI == Fn->arg_end() && "Argument mismatch!");
|
|
|
|
}
|
|
|
|
|
2012-01-29 15:46:59 +08:00
|
|
|
static void eraseUnusedBitCasts(llvm::Instruction *insn) {
|
|
|
|
while (insn->use_empty()) {
|
|
|
|
llvm::BitCastInst *bitcast = dyn_cast<llvm::BitCastInst>(insn);
|
|
|
|
if (!bitcast) return;
|
|
|
|
|
|
|
|
// This is "safe" because we would have used a ConstantExpr otherwise.
|
|
|
|
insn = cast<llvm::Instruction>(bitcast->getOperand(0));
|
|
|
|
bitcast->eraseFromParent();
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2011-06-16 07:02:42 +08:00
|
|
|
/// Try to emit a fused autorelease of a return result.
|
|
|
|
static llvm::Value *tryEmitFusedAutoreleaseOfResult(CodeGenFunction &CGF,
|
|
|
|
llvm::Value *result) {
|
|
|
|
// We must be immediately followed the cast.
|
|
|
|
llvm::BasicBlock *BB = CGF.Builder.GetInsertBlock();
|
|
|
|
if (BB->empty()) return 0;
|
|
|
|
if (&BB->back() != result) return 0;
|
|
|
|
|
2011-07-18 12:24:23 +08:00
|
|
|
llvm::Type *resultType = result->getType();
|
2011-06-16 07:02:42 +08:00
|
|
|
|
|
|
|
// result is in a BasicBlock and is therefore an Instruction.
|
|
|
|
llvm::Instruction *generator = cast<llvm::Instruction>(result);
|
|
|
|
|
2011-07-23 18:55:15 +08:00
|
|
|
SmallVector<llvm::Instruction*,4> insnsToKill;
|
2011-06-16 07:02:42 +08:00
|
|
|
|
|
|
|
// Look for:
|
|
|
|
// %generator = bitcast %type1* %generator2 to %type2*
|
|
|
|
while (llvm::BitCastInst *bitcast = dyn_cast<llvm::BitCastInst>(generator)) {
|
|
|
|
// We would have emitted this as a constant if the operand weren't
|
|
|
|
// an Instruction.
|
|
|
|
generator = cast<llvm::Instruction>(bitcast->getOperand(0));
|
|
|
|
|
|
|
|
// Require the generator to be immediately followed by the cast.
|
|
|
|
if (generator->getNextNode() != bitcast)
|
|
|
|
return 0;
|
|
|
|
|
|
|
|
insnsToKill.push_back(bitcast);
|
|
|
|
}
|
|
|
|
|
|
|
|
// Look for:
|
|
|
|
// %generator = call i8* @objc_retain(i8* %originalResult)
|
|
|
|
// or
|
|
|
|
// %generator = call i8* @objc_retainAutoreleasedReturnValue(i8* %originalResult)
|
|
|
|
llvm::CallInst *call = dyn_cast<llvm::CallInst>(generator);
|
|
|
|
if (!call) return 0;
|
|
|
|
|
|
|
|
bool doRetainAutorelease;
|
|
|
|
|
|
|
|
if (call->getCalledValue() == CGF.CGM.getARCEntrypoints().objc_retain) {
|
|
|
|
doRetainAutorelease = true;
|
|
|
|
} else if (call->getCalledValue() == CGF.CGM.getARCEntrypoints()
|
|
|
|
.objc_retainAutoreleasedReturnValue) {
|
|
|
|
doRetainAutorelease = false;
|
|
|
|
|
|
|
|
// Look for an inline asm immediately preceding the call and kill it, too.
|
|
|
|
llvm::Instruction *prev = call->getPrevNode();
|
|
|
|
if (llvm::CallInst *asmCall = dyn_cast_or_null<llvm::CallInst>(prev))
|
|
|
|
if (asmCall->getCalledValue()
|
|
|
|
== CGF.CGM.getARCEntrypoints().retainAutoreleasedReturnValueMarker)
|
|
|
|
insnsToKill.push_back(prev);
|
|
|
|
} else {
|
|
|
|
return 0;
|
|
|
|
}
|
|
|
|
|
|
|
|
result = call->getArgOperand(0);
|
|
|
|
insnsToKill.push_back(call);
|
|
|
|
|
|
|
|
// Keep killing bitcasts, for sanity. Note that we no longer care
|
|
|
|
// about precise ordering as long as there's exactly one use.
|
|
|
|
while (llvm::BitCastInst *bitcast = dyn_cast<llvm::BitCastInst>(result)) {
|
|
|
|
if (!bitcast->hasOneUse()) break;
|
|
|
|
insnsToKill.push_back(bitcast);
|
|
|
|
result = bitcast->getOperand(0);
|
|
|
|
}
|
|
|
|
|
|
|
|
// Delete all the unnecessary instructions, from latest to earliest.
|
2011-07-23 18:55:15 +08:00
|
|
|
for (SmallVectorImpl<llvm::Instruction*>::iterator
|
2011-06-16 07:02:42 +08:00
|
|
|
i = insnsToKill.begin(), e = insnsToKill.end(); i != e; ++i)
|
|
|
|
(*i)->eraseFromParent();
|
|
|
|
|
|
|
|
// Do the fused retain/autorelease if we were asked to.
|
|
|
|
if (doRetainAutorelease)
|
|
|
|
result = CGF.EmitARCRetainAutoreleaseReturnValue(result);
|
|
|
|
|
|
|
|
// Cast back to the result type.
|
|
|
|
return CGF.Builder.CreateBitCast(result, resultType);
|
|
|
|
}
|
|
|
|
|
2012-01-29 15:46:59 +08:00
|
|
|
/// If this is a +1 of the value of an immutable 'self', remove it.
|
|
|
|
static llvm::Value *tryRemoveRetainOfSelf(CodeGenFunction &CGF,
|
|
|
|
llvm::Value *result) {
|
|
|
|
// This is only applicable to a method with an immutable 'self'.
|
|
|
|
const ObjCMethodDecl *method = dyn_cast<ObjCMethodDecl>(CGF.CurCodeDecl);
|
|
|
|
if (!method) return 0;
|
|
|
|
const VarDecl *self = method->getSelfDecl();
|
|
|
|
if (!self->getType().isConstQualified()) return 0;
|
|
|
|
|
|
|
|
// Look for a retain call.
|
|
|
|
llvm::CallInst *retainCall =
|
|
|
|
dyn_cast<llvm::CallInst>(result->stripPointerCasts());
|
|
|
|
if (!retainCall ||
|
|
|
|
retainCall->getCalledValue() != CGF.CGM.getARCEntrypoints().objc_retain)
|
|
|
|
return 0;
|
|
|
|
|
|
|
|
// Look for an ordinary load of 'self'.
|
|
|
|
llvm::Value *retainedValue = retainCall->getArgOperand(0);
|
|
|
|
llvm::LoadInst *load =
|
|
|
|
dyn_cast<llvm::LoadInst>(retainedValue->stripPointerCasts());
|
|
|
|
if (!load || load->isAtomic() || load->isVolatile() ||
|
|
|
|
load->getPointerOperand() != CGF.GetAddrOfLocalVar(self))
|
|
|
|
return 0;
|
|
|
|
|
|
|
|
// Okay! Burn it all down. This relies for correctness on the
|
|
|
|
// assumption that the retain is emitted as part of the return and
|
|
|
|
// that thereafter everything is used "linearly".
|
|
|
|
llvm::Type *resultType = result->getType();
|
|
|
|
eraseUnusedBitCasts(cast<llvm::Instruction>(result));
|
|
|
|
assert(retainCall->use_empty());
|
|
|
|
retainCall->eraseFromParent();
|
|
|
|
eraseUnusedBitCasts(cast<llvm::Instruction>(retainedValue));
|
|
|
|
|
|
|
|
return CGF.Builder.CreateBitCast(load, resultType);
|
|
|
|
}
|
|
|
|
|
2011-06-16 07:02:42 +08:00
|
|
|
/// Emit an ARC autorelease of the result of a function.
|
2012-01-29 15:46:59 +08:00
|
|
|
///
|
|
|
|
/// \return the value to actually return from the function
|
2011-06-16 07:02:42 +08:00
|
|
|
static llvm::Value *emitAutoreleaseOfResult(CodeGenFunction &CGF,
|
|
|
|
llvm::Value *result) {
|
2012-01-29 15:46:59 +08:00
|
|
|
// If we're returning 'self', kill the initial retain. This is a
|
|
|
|
// heuristic attempt to "encourage correctness" in the really unfortunate
|
|
|
|
// case where we have a return of self during a dealloc and we desperately
|
|
|
|
// need to avoid the possible autorelease.
|
|
|
|
if (llvm::Value *self = tryRemoveRetainOfSelf(CGF, result))
|
|
|
|
return self;
|
|
|
|
|
2011-06-16 07:02:42 +08:00
|
|
|
// At -O0, try to emit a fused retain/autorelease.
|
|
|
|
if (CGF.shouldUseFusedARCCalls())
|
|
|
|
if (llvm::Value *fused = tryEmitFusedAutoreleaseOfResult(CGF, result))
|
|
|
|
return fused;
|
|
|
|
|
|
|
|
return CGF.EmitARCAutoreleaseReturnValue(result);
|
|
|
|
}
|
|
|
|
|
2012-01-29 10:35:02 +08:00
|
|
|
/// Heuristically search for a dominating store to the return-value slot.
|
|
|
|
static llvm::StoreInst *findDominatingStoreToReturnValue(CodeGenFunction &CGF) {
|
|
|
|
// If there are multiple uses of the return-value slot, just check
|
|
|
|
// for something immediately preceding the IP. Sometimes this can
|
|
|
|
// happen with how we generate implicit-returns; it can also happen
|
|
|
|
// with noreturn cleanups.
|
|
|
|
if (!CGF.ReturnValue->hasOneUse()) {
|
|
|
|
llvm::BasicBlock *IP = CGF.Builder.GetInsertBlock();
|
|
|
|
if (IP->empty()) return 0;
|
|
|
|
llvm::StoreInst *store = dyn_cast<llvm::StoreInst>(&IP->back());
|
|
|
|
if (!store) return 0;
|
|
|
|
if (store->getPointerOperand() != CGF.ReturnValue) return 0;
|
|
|
|
assert(!store->isAtomic() && !store->isVolatile()); // see below
|
|
|
|
return store;
|
|
|
|
}
|
|
|
|
|
|
|
|
llvm::StoreInst *store =
|
|
|
|
dyn_cast<llvm::StoreInst>(CGF.ReturnValue->use_back());
|
|
|
|
if (!store) return 0;
|
|
|
|
|
|
|
|
// These aren't actually possible for non-coerced returns, and we
|
|
|
|
// only care about non-coerced returns on this code path.
|
|
|
|
assert(!store->isAtomic() && !store->isVolatile());
|
|
|
|
|
|
|
|
// Now do a first-and-dirty dominance check: just walk up the
|
|
|
|
// single-predecessors chain from the current insertion point.
|
|
|
|
llvm::BasicBlock *StoreBB = store->getParent();
|
|
|
|
llvm::BasicBlock *IP = CGF.Builder.GetInsertBlock();
|
|
|
|
while (IP != StoreBB) {
|
|
|
|
if (!(IP = IP->getSinglePredecessor()))
|
|
|
|
return 0;
|
|
|
|
}
|
|
|
|
|
|
|
|
// Okay, the store's basic block dominates the insertion point; we
|
|
|
|
// can do our thing.
|
|
|
|
return store;
|
|
|
|
}
|
|
|
|
|
Change IR generation for return (in the simple case) to avoid doing silly
load/store nonsense in the epilog. For example, for:
int foo(int X) {
int A[100];
return A[X];
}
we used to generate:
%arrayidx = getelementptr inbounds [100 x i32]* %A, i32 0, i64 %idxprom ; <i32*> [#uses=1]
%tmp1 = load i32* %arrayidx ; <i32> [#uses=1]
store i32 %tmp1, i32* %retval
%0 = load i32* %retval ; <i32> [#uses=1]
ret i32 %0
}
which codegen'd to this code:
_foo: ## @foo
## BB#0: ## %entry
subq $408, %rsp ## imm = 0x198
movl %edi, 400(%rsp)
movl 400(%rsp), %edi
movslq %edi, %rax
movl (%rsp,%rax,4), %edi
movl %edi, 404(%rsp)
movl 404(%rsp), %eax
addq $408, %rsp ## imm = 0x198
ret
Now we generate:
%arrayidx = getelementptr inbounds [100 x i32]* %A, i32 0, i64 %idxprom ; <i32*> [#uses=1]
%tmp1 = load i32* %arrayidx ; <i32> [#uses=1]
ret i32 %tmp1
}
and:
_foo: ## @foo
## BB#0: ## %entry
subq $408, %rsp ## imm = 0x198
movl %edi, 404(%rsp)
movl 404(%rsp), %edi
movslq %edi, %rax
movl (%rsp,%rax,4), %eax
addq $408, %rsp ## imm = 0x198
ret
This actually does matter, cutting out 2000 lines of IR from CGStmt.ll
for example.
Another interesting effect is that altivec.h functions which are dead
now get dce'd by the inliner. Hence all the changes to
builtins-ppc-altivec.c to ensure the calls aren't dead.
llvm-svn: 106970
2010-06-27 09:06:27 +08:00
|
|
|
void CodeGenFunction::EmitFunctionEpilog(const CGFunctionInfo &FI) {
|
2008-09-10 10:41:04 +08:00
|
|
|
// Functions with no result always return void.
|
2010-06-27 07:13:19 +08:00
|
|
|
if (ReturnValue == 0) {
|
|
|
|
Builder.CreateRetVoid();
|
|
|
|
return;
|
|
|
|
}
|
2010-07-01 05:27:58 +08:00
|
|
|
|
2010-07-21 04:13:52 +08:00
|
|
|
llvm::DebugLoc RetDbgLoc;
|
2010-06-27 07:13:19 +08:00
|
|
|
llvm::Value *RV = 0;
|
|
|
|
QualType RetTy = FI.getReturnType();
|
|
|
|
const ABIArgInfo &RetAI = FI.getReturnInfo();
|
2009-06-06 17:36:29 +08:00
|
|
|
|
2010-06-27 07:13:19 +08:00
|
|
|
switch (RetAI.getKind()) {
|
2010-08-21 10:24:36 +08:00
|
|
|
case ABIArgInfo::Indirect: {
|
|
|
|
unsigned Alignment = getContext().getTypeAlignInChars(RetTy).getQuantity();
|
2010-06-27 07:13:19 +08:00
|
|
|
if (RetTy->isAnyComplexType()) {
|
|
|
|
ComplexPairTy RT = LoadComplexFromAddr(ReturnValue, false);
|
|
|
|
StoreComplexToAddr(RT, CurFn->arg_begin(), false);
|
|
|
|
} else if (CodeGenFunction::hasAggregateLLVMType(RetTy)) {
|
|
|
|
// Do nothing; aggregrates get evaluated directly into the destination.
|
|
|
|
} else {
|
|
|
|
EmitStoreOfScalar(Builder.CreateLoad(ReturnValue), CurFn->arg_begin(),
|
2010-08-21 10:24:36 +08:00
|
|
|
false, Alignment, RetTy);
|
2010-06-27 07:13:19 +08:00
|
|
|
}
|
|
|
|
break;
|
2010-08-21 10:24:36 +08:00
|
|
|
}
|
2008-09-11 09:48:57 +08:00
|
|
|
|
2010-06-27 07:13:19 +08:00
|
|
|
case ABIArgInfo::Extend:
|
Kill off the 'coerce' ABI passing form. Now 'direct' and 'extend' always
have a "coerce to" type which often matches the default lowering of Clang
type to LLVM IR type, but the coerce case can be handled by making them
not be the same.
This simplifies things and fixes issues where X86-64 abi lowering would
return coerce after making preferred types exactly match up. This caused
us to compile:
typedef float v4f32 __attribute__((__vector_size__(16)));
v4f32 foo(v4f32 X) {
return X+X;
}
into this code at -O0:
define <4 x float> @foo(<4 x float> %X.coerce) nounwind {
entry:
%retval = alloca <4 x float>, align 16 ; <<4 x float>*> [#uses=2]
%coerce = alloca <4 x float>, align 16 ; <<4 x float>*> [#uses=2]
%X.addr = alloca <4 x float>, align 16 ; <<4 x float>*> [#uses=3]
store <4 x float> %X.coerce, <4 x float>* %coerce
%X = load <4 x float>* %coerce ; <<4 x float>> [#uses=1]
store <4 x float> %X, <4 x float>* %X.addr
%tmp = load <4 x float>* %X.addr ; <<4 x float>> [#uses=1]
%tmp1 = load <4 x float>* %X.addr ; <<4 x float>> [#uses=1]
%add = fadd <4 x float> %tmp, %tmp1 ; <<4 x float>> [#uses=1]
store <4 x float> %add, <4 x float>* %retval
%0 = load <4 x float>* %retval ; <<4 x float>> [#uses=1]
ret <4 x float> %0
}
Now we get:
define <4 x float> @foo(<4 x float> %X) nounwind {
entry:
%X.addr = alloca <4 x float>, align 16 ; <<4 x float>*> [#uses=3]
store <4 x float> %X, <4 x float>* %X.addr
%tmp = load <4 x float>* %X.addr ; <<4 x float>> [#uses=1]
%tmp1 = load <4 x float>* %X.addr ; <<4 x float>> [#uses=1]
%add = fadd <4 x float> %tmp, %tmp1 ; <<4 x float>> [#uses=1]
ret <4 x float> %add
}
This implements rdar://8248065
llvm-svn: 109733
2010-07-29 14:26:06 +08:00
|
|
|
case ABIArgInfo::Direct:
|
2010-07-30 12:02:24 +08:00
|
|
|
if (RetAI.getCoerceToType() == ConvertType(RetTy) &&
|
|
|
|
RetAI.getDirectOffset() == 0) {
|
Kill off the 'coerce' ABI passing form. Now 'direct' and 'extend' always
have a "coerce to" type which often matches the default lowering of Clang
type to LLVM IR type, but the coerce case can be handled by making them
not be the same.
This simplifies things and fixes issues where X86-64 abi lowering would
return coerce after making preferred types exactly match up. This caused
us to compile:
typedef float v4f32 __attribute__((__vector_size__(16)));
v4f32 foo(v4f32 X) {
return X+X;
}
into this code at -O0:
define <4 x float> @foo(<4 x float> %X.coerce) nounwind {
entry:
%retval = alloca <4 x float>, align 16 ; <<4 x float>*> [#uses=2]
%coerce = alloca <4 x float>, align 16 ; <<4 x float>*> [#uses=2]
%X.addr = alloca <4 x float>, align 16 ; <<4 x float>*> [#uses=3]
store <4 x float> %X.coerce, <4 x float>* %coerce
%X = load <4 x float>* %coerce ; <<4 x float>> [#uses=1]
store <4 x float> %X, <4 x float>* %X.addr
%tmp = load <4 x float>* %X.addr ; <<4 x float>> [#uses=1]
%tmp1 = load <4 x float>* %X.addr ; <<4 x float>> [#uses=1]
%add = fadd <4 x float> %tmp, %tmp1 ; <<4 x float>> [#uses=1]
store <4 x float> %add, <4 x float>* %retval
%0 = load <4 x float>* %retval ; <<4 x float>> [#uses=1]
ret <4 x float> %0
}
Now we get:
define <4 x float> @foo(<4 x float> %X) nounwind {
entry:
%X.addr = alloca <4 x float>, align 16 ; <<4 x float>*> [#uses=3]
store <4 x float> %X, <4 x float>* %X.addr
%tmp = load <4 x float>* %X.addr ; <<4 x float>> [#uses=1]
%tmp1 = load <4 x float>* %X.addr ; <<4 x float>> [#uses=1]
%add = fadd <4 x float> %tmp, %tmp1 ; <<4 x float>> [#uses=1]
ret <4 x float> %add
}
This implements rdar://8248065
llvm-svn: 109733
2010-07-29 14:26:06 +08:00
|
|
|
// The internal return value temp always will have pointer-to-return-type
|
|
|
|
// type, just do a load.
|
2010-10-19 14:39:39 +08:00
|
|
|
|
2012-01-29 10:35:02 +08:00
|
|
|
// If there is a dominating store to ReturnValue, we can elide
|
|
|
|
// the load, zap the store, and usually zap the alloca.
|
|
|
|
if (llvm::StoreInst *SI = findDominatingStoreToReturnValue(*this)) {
|
Kill off the 'coerce' ABI passing form. Now 'direct' and 'extend' always
have a "coerce to" type which often matches the default lowering of Clang
type to LLVM IR type, but the coerce case can be handled by making them
not be the same.
This simplifies things and fixes issues where X86-64 abi lowering would
return coerce after making preferred types exactly match up. This caused
us to compile:
typedef float v4f32 __attribute__((__vector_size__(16)));
v4f32 foo(v4f32 X) {
return X+X;
}
into this code at -O0:
define <4 x float> @foo(<4 x float> %X.coerce) nounwind {
entry:
%retval = alloca <4 x float>, align 16 ; <<4 x float>*> [#uses=2]
%coerce = alloca <4 x float>, align 16 ; <<4 x float>*> [#uses=2]
%X.addr = alloca <4 x float>, align 16 ; <<4 x float>*> [#uses=3]
store <4 x float> %X.coerce, <4 x float>* %coerce
%X = load <4 x float>* %coerce ; <<4 x float>> [#uses=1]
store <4 x float> %X, <4 x float>* %X.addr
%tmp = load <4 x float>* %X.addr ; <<4 x float>> [#uses=1]
%tmp1 = load <4 x float>* %X.addr ; <<4 x float>> [#uses=1]
%add = fadd <4 x float> %tmp, %tmp1 ; <<4 x float>> [#uses=1]
store <4 x float> %add, <4 x float>* %retval
%0 = load <4 x float>* %retval ; <<4 x float>> [#uses=1]
ret <4 x float> %0
}
Now we get:
define <4 x float> @foo(<4 x float> %X) nounwind {
entry:
%X.addr = alloca <4 x float>, align 16 ; <<4 x float>*> [#uses=3]
store <4 x float> %X, <4 x float>* %X.addr
%tmp = load <4 x float>* %X.addr ; <<4 x float>> [#uses=1]
%tmp1 = load <4 x float>* %X.addr ; <<4 x float>> [#uses=1]
%add = fadd <4 x float> %tmp, %tmp1 ; <<4 x float>> [#uses=1]
ret <4 x float> %add
}
This implements rdar://8248065
llvm-svn: 109733
2010-07-29 14:26:06 +08:00
|
|
|
// Get the stored value and nuke the now-dead store.
|
|
|
|
RetDbgLoc = SI->getDebugLoc();
|
|
|
|
RV = SI->getValueOperand();
|
|
|
|
SI->eraseFromParent();
|
2010-10-19 14:39:39 +08:00
|
|
|
|
Kill off the 'coerce' ABI passing form. Now 'direct' and 'extend' always
have a "coerce to" type which often matches the default lowering of Clang
type to LLVM IR type, but the coerce case can be handled by making them
not be the same.
This simplifies things and fixes issues where X86-64 abi lowering would
return coerce after making preferred types exactly match up. This caused
us to compile:
typedef float v4f32 __attribute__((__vector_size__(16)));
v4f32 foo(v4f32 X) {
return X+X;
}
into this code at -O0:
define <4 x float> @foo(<4 x float> %X.coerce) nounwind {
entry:
%retval = alloca <4 x float>, align 16 ; <<4 x float>*> [#uses=2]
%coerce = alloca <4 x float>, align 16 ; <<4 x float>*> [#uses=2]
%X.addr = alloca <4 x float>, align 16 ; <<4 x float>*> [#uses=3]
store <4 x float> %X.coerce, <4 x float>* %coerce
%X = load <4 x float>* %coerce ; <<4 x float>> [#uses=1]
store <4 x float> %X, <4 x float>* %X.addr
%tmp = load <4 x float>* %X.addr ; <<4 x float>> [#uses=1]
%tmp1 = load <4 x float>* %X.addr ; <<4 x float>> [#uses=1]
%add = fadd <4 x float> %tmp, %tmp1 ; <<4 x float>> [#uses=1]
store <4 x float> %add, <4 x float>* %retval
%0 = load <4 x float>* %retval ; <<4 x float>> [#uses=1]
ret <4 x float> %0
}
Now we get:
define <4 x float> @foo(<4 x float> %X) nounwind {
entry:
%X.addr = alloca <4 x float>, align 16 ; <<4 x float>*> [#uses=3]
store <4 x float> %X, <4 x float>* %X.addr
%tmp = load <4 x float>* %X.addr ; <<4 x float>> [#uses=1]
%tmp1 = load <4 x float>* %X.addr ; <<4 x float>> [#uses=1]
%add = fadd <4 x float> %tmp, %tmp1 ; <<4 x float>> [#uses=1]
ret <4 x float> %add
}
This implements rdar://8248065
llvm-svn: 109733
2010-07-29 14:26:06 +08:00
|
|
|
// If that was the only use of the return value, nuke it as well now.
|
|
|
|
if (ReturnValue->use_empty() && isa<llvm::AllocaInst>(ReturnValue)) {
|
|
|
|
cast<llvm::AllocaInst>(ReturnValue)->eraseFromParent();
|
|
|
|
ReturnValue = 0;
|
|
|
|
}
|
2012-01-29 10:35:02 +08:00
|
|
|
|
|
|
|
// Otherwise, we have to do a simple load.
|
|
|
|
} else {
|
|
|
|
RV = Builder.CreateLoad(ReturnValue);
|
Change IR generation for return (in the simple case) to avoid doing silly
load/store nonsense in the epilog. For example, for:
int foo(int X) {
int A[100];
return A[X];
}
we used to generate:
%arrayidx = getelementptr inbounds [100 x i32]* %A, i32 0, i64 %idxprom ; <i32*> [#uses=1]
%tmp1 = load i32* %arrayidx ; <i32> [#uses=1]
store i32 %tmp1, i32* %retval
%0 = load i32* %retval ; <i32> [#uses=1]
ret i32 %0
}
which codegen'd to this code:
_foo: ## @foo
## BB#0: ## %entry
subq $408, %rsp ## imm = 0x198
movl %edi, 400(%rsp)
movl 400(%rsp), %edi
movslq %edi, %rax
movl (%rsp,%rax,4), %edi
movl %edi, 404(%rsp)
movl 404(%rsp), %eax
addq $408, %rsp ## imm = 0x198
ret
Now we generate:
%arrayidx = getelementptr inbounds [100 x i32]* %A, i32 0, i64 %idxprom ; <i32*> [#uses=1]
%tmp1 = load i32* %arrayidx ; <i32> [#uses=1]
ret i32 %tmp1
}
and:
_foo: ## @foo
## BB#0: ## %entry
subq $408, %rsp ## imm = 0x198
movl %edi, 404(%rsp)
movl 404(%rsp), %edi
movslq %edi, %rax
movl (%rsp,%rax,4), %eax
addq $408, %rsp ## imm = 0x198
ret
This actually does matter, cutting out 2000 lines of IR from CGStmt.ll
for example.
Another interesting effect is that altivec.h functions which are dead
now get dce'd by the inliner. Hence all the changes to
builtins-ppc-altivec.c to ensure the calls aren't dead.
llvm-svn: 106970
2010-06-27 09:06:27 +08:00
|
|
|
}
|
Kill off the 'coerce' ABI passing form. Now 'direct' and 'extend' always
have a "coerce to" type which often matches the default lowering of Clang
type to LLVM IR type, but the coerce case can be handled by making them
not be the same.
This simplifies things and fixes issues where X86-64 abi lowering would
return coerce after making preferred types exactly match up. This caused
us to compile:
typedef float v4f32 __attribute__((__vector_size__(16)));
v4f32 foo(v4f32 X) {
return X+X;
}
into this code at -O0:
define <4 x float> @foo(<4 x float> %X.coerce) nounwind {
entry:
%retval = alloca <4 x float>, align 16 ; <<4 x float>*> [#uses=2]
%coerce = alloca <4 x float>, align 16 ; <<4 x float>*> [#uses=2]
%X.addr = alloca <4 x float>, align 16 ; <<4 x float>*> [#uses=3]
store <4 x float> %X.coerce, <4 x float>* %coerce
%X = load <4 x float>* %coerce ; <<4 x float>> [#uses=1]
store <4 x float> %X, <4 x float>* %X.addr
%tmp = load <4 x float>* %X.addr ; <<4 x float>> [#uses=1]
%tmp1 = load <4 x float>* %X.addr ; <<4 x float>> [#uses=1]
%add = fadd <4 x float> %tmp, %tmp1 ; <<4 x float>> [#uses=1]
store <4 x float> %add, <4 x float>* %retval
%0 = load <4 x float>* %retval ; <<4 x float>> [#uses=1]
ret <4 x float> %0
}
Now we get:
define <4 x float> @foo(<4 x float> %X) nounwind {
entry:
%X.addr = alloca <4 x float>, align 16 ; <<4 x float>*> [#uses=3]
store <4 x float> %X, <4 x float>* %X.addr
%tmp = load <4 x float>* %X.addr ; <<4 x float>> [#uses=1]
%tmp1 = load <4 x float>* %X.addr ; <<4 x float>> [#uses=1]
%add = fadd <4 x float> %tmp, %tmp1 ; <<4 x float>> [#uses=1]
ret <4 x float> %add
}
This implements rdar://8248065
llvm-svn: 109733
2010-07-29 14:26:06 +08:00
|
|
|
} else {
|
2010-07-30 12:02:24 +08:00
|
|
|
llvm::Value *V = ReturnValue;
|
|
|
|
// If the value is offset in memory, apply the offset now.
|
|
|
|
if (unsigned Offs = RetAI.getDirectOffset()) {
|
|
|
|
V = Builder.CreateBitCast(V, Builder.getInt8PtrTy());
|
|
|
|
V = Builder.CreateConstGEP1_32(V, Offs);
|
2010-10-19 14:39:39 +08:00
|
|
|
V = Builder.CreateBitCast(V,
|
2010-07-30 12:02:24 +08:00
|
|
|
llvm::PointerType::getUnqual(RetAI.getCoerceToType()));
|
|
|
|
}
|
2010-10-19 14:39:39 +08:00
|
|
|
|
2010-07-30 12:02:24 +08:00
|
|
|
RV = CreateCoercedLoad(V, RetAI.getCoerceToType(), *this);
|
Change IR generation for return (in the simple case) to avoid doing silly
load/store nonsense in the epilog. For example, for:
int foo(int X) {
int A[100];
return A[X];
}
we used to generate:
%arrayidx = getelementptr inbounds [100 x i32]* %A, i32 0, i64 %idxprom ; <i32*> [#uses=1]
%tmp1 = load i32* %arrayidx ; <i32> [#uses=1]
store i32 %tmp1, i32* %retval
%0 = load i32* %retval ; <i32> [#uses=1]
ret i32 %0
}
which codegen'd to this code:
_foo: ## @foo
## BB#0: ## %entry
subq $408, %rsp ## imm = 0x198
movl %edi, 400(%rsp)
movl 400(%rsp), %edi
movslq %edi, %rax
movl (%rsp,%rax,4), %edi
movl %edi, 404(%rsp)
movl 404(%rsp), %eax
addq $408, %rsp ## imm = 0x198
ret
Now we generate:
%arrayidx = getelementptr inbounds [100 x i32]* %A, i32 0, i64 %idxprom ; <i32*> [#uses=1]
%tmp1 = load i32* %arrayidx ; <i32> [#uses=1]
ret i32 %tmp1
}
and:
_foo: ## @foo
## BB#0: ## %entry
subq $408, %rsp ## imm = 0x198
movl %edi, 404(%rsp)
movl 404(%rsp), %edi
movslq %edi, %rax
movl (%rsp,%rax,4), %eax
addq $408, %rsp ## imm = 0x198
ret
This actually does matter, cutting out 2000 lines of IR from CGStmt.ll
for example.
Another interesting effect is that altivec.h functions which are dead
now get dce'd by the inliner. Hence all the changes to
builtins-ppc-altivec.c to ensure the calls aren't dead.
llvm-svn: 106970
2010-06-27 09:06:27 +08:00
|
|
|
}
|
2011-06-16 07:02:42 +08:00
|
|
|
|
|
|
|
// In ARC, end functions that return a retainable type with a call
|
|
|
|
// to objc_autoreleaseReturnValue.
|
|
|
|
if (AutoreleaseResult) {
|
|
|
|
assert(getLangOptions().ObjCAutoRefCount &&
|
|
|
|
!FI.isReturnsRetained() &&
|
|
|
|
RetTy->isObjCRetainableType());
|
|
|
|
RV = emitAutoreleaseOfResult(*this, RV);
|
|
|
|
}
|
|
|
|
|
2010-06-27 07:13:19 +08:00
|
|
|
break;
|
2009-09-09 23:08:12 +08:00
|
|
|
|
Kill off the 'coerce' ABI passing form. Now 'direct' and 'extend' always
have a "coerce to" type which often matches the default lowering of Clang
type to LLVM IR type, but the coerce case can be handled by making them
not be the same.
This simplifies things and fixes issues where X86-64 abi lowering would
return coerce after making preferred types exactly match up. This caused
us to compile:
typedef float v4f32 __attribute__((__vector_size__(16)));
v4f32 foo(v4f32 X) {
return X+X;
}
into this code at -O0:
define <4 x float> @foo(<4 x float> %X.coerce) nounwind {
entry:
%retval = alloca <4 x float>, align 16 ; <<4 x float>*> [#uses=2]
%coerce = alloca <4 x float>, align 16 ; <<4 x float>*> [#uses=2]
%X.addr = alloca <4 x float>, align 16 ; <<4 x float>*> [#uses=3]
store <4 x float> %X.coerce, <4 x float>* %coerce
%X = load <4 x float>* %coerce ; <<4 x float>> [#uses=1]
store <4 x float> %X, <4 x float>* %X.addr
%tmp = load <4 x float>* %X.addr ; <<4 x float>> [#uses=1]
%tmp1 = load <4 x float>* %X.addr ; <<4 x float>> [#uses=1]
%add = fadd <4 x float> %tmp, %tmp1 ; <<4 x float>> [#uses=1]
store <4 x float> %add, <4 x float>* %retval
%0 = load <4 x float>* %retval ; <<4 x float>> [#uses=1]
ret <4 x float> %0
}
Now we get:
define <4 x float> @foo(<4 x float> %X) nounwind {
entry:
%X.addr = alloca <4 x float>, align 16 ; <<4 x float>*> [#uses=3]
store <4 x float> %X, <4 x float>* %X.addr
%tmp = load <4 x float>* %X.addr ; <<4 x float>> [#uses=1]
%tmp1 = load <4 x float>* %X.addr ; <<4 x float>> [#uses=1]
%add = fadd <4 x float> %tmp, %tmp1 ; <<4 x float>> [#uses=1]
ret <4 x float> %add
}
This implements rdar://8248065
llvm-svn: 109733
2010-07-29 14:26:06 +08:00
|
|
|
case ABIArgInfo::Ignore:
|
2010-06-27 07:13:19 +08:00
|
|
|
break;
|
2008-09-11 09:48:57 +08:00
|
|
|
|
2010-06-27 07:13:19 +08:00
|
|
|
case ABIArgInfo::Expand:
|
2011-09-23 13:06:16 +08:00
|
|
|
llvm_unreachable("Invalid ABI kind for return argument");
|
2008-09-10 07:27:19 +08:00
|
|
|
}
|
2009-09-09 23:08:12 +08:00
|
|
|
|
2010-07-01 05:27:58 +08:00
|
|
|
llvm::Instruction *Ret = RV ? Builder.CreateRet(RV) : Builder.CreateRetVoid();
|
2010-07-22 02:08:50 +08:00
|
|
|
if (!RetDbgLoc.isUnknown())
|
|
|
|
Ret->setDebugLoc(RetDbgLoc);
|
2008-09-10 07:27:19 +08:00
|
|
|
}
|
|
|
|
|
2011-03-12 04:59:21 +08:00
|
|
|
void CodeGenFunction::EmitDelegateCallArg(CallArgList &args,
|
|
|
|
const VarDecl *param) {
|
2010-05-27 06:34:26 +08:00
|
|
|
// StartFunction converted the ABI-lowered parameter(s) into a
|
|
|
|
// local alloca. We need to turn that into an r-value suitable
|
|
|
|
// for EmitCall.
|
2011-03-12 04:59:21 +08:00
|
|
|
llvm::Value *local = GetAddrOfLocalVar(param);
|
2010-05-27 06:34:26 +08:00
|
|
|
|
2011-03-12 04:59:21 +08:00
|
|
|
QualType type = param->getType();
|
2010-10-19 14:39:39 +08:00
|
|
|
|
2010-05-27 06:34:26 +08:00
|
|
|
// For the most part, we just need to load the alloca, except:
|
|
|
|
// 1) aggregate r-values are actually pointers to temporaries, and
|
|
|
|
// 2) references to aggregates are pointers directly to the aggregate.
|
|
|
|
// I don't know why references to non-aggregates are different here.
|
2011-03-12 04:59:21 +08:00
|
|
|
if (const ReferenceType *ref = type->getAs<ReferenceType>()) {
|
|
|
|
if (hasAggregateLLVMType(ref->getPointeeType()))
|
|
|
|
return args.add(RValue::getAggregate(local), type);
|
2010-05-27 06:34:26 +08:00
|
|
|
|
|
|
|
// Locals which are references to scalars are represented
|
|
|
|
// with allocas holding the pointer.
|
2011-03-12 04:59:21 +08:00
|
|
|
return args.add(RValue::get(Builder.CreateLoad(local)), type);
|
2010-05-27 06:34:26 +08:00
|
|
|
}
|
|
|
|
|
2011-03-12 04:59:21 +08:00
|
|
|
if (type->isAnyComplexType()) {
|
|
|
|
ComplexPairTy complex = LoadComplexFromAddr(local, /*volatile*/ false);
|
|
|
|
return args.add(RValue::getComplex(complex), type);
|
|
|
|
}
|
2010-05-27 06:34:26 +08:00
|
|
|
|
2011-03-12 04:59:21 +08:00
|
|
|
if (hasAggregateLLVMType(type))
|
|
|
|
return args.add(RValue::getAggregate(local), type);
|
2010-05-27 06:34:26 +08:00
|
|
|
|
2011-03-12 04:59:21 +08:00
|
|
|
unsigned alignment = getContext().getDeclAlign(param).getQuantity();
|
|
|
|
llvm::Value *value = EmitLoadOfScalar(local, false, alignment, type);
|
|
|
|
return args.add(RValue::get(value), type);
|
2010-05-27 06:34:26 +08:00
|
|
|
}
|
|
|
|
|
2011-06-16 07:02:42 +08:00
|
|
|
static bool isProvablyNull(llvm::Value *addr) {
|
|
|
|
return isa<llvm::ConstantPointerNull>(addr);
|
|
|
|
}
|
|
|
|
|
|
|
|
static bool isProvablyNonNull(llvm::Value *addr) {
|
|
|
|
return isa<llvm::AllocaInst>(addr);
|
|
|
|
}
|
|
|
|
|
|
|
|
/// Emit the actual writing-back of a writeback.
|
|
|
|
static void emitWriteback(CodeGenFunction &CGF,
|
|
|
|
const CallArgList::Writeback &writeback) {
|
|
|
|
llvm::Value *srcAddr = writeback.Address;
|
|
|
|
assert(!isProvablyNull(srcAddr) &&
|
|
|
|
"shouldn't have writeback for provably null argument");
|
|
|
|
|
|
|
|
llvm::BasicBlock *contBB = 0;
|
|
|
|
|
|
|
|
// If the argument wasn't provably non-null, we need to null check
|
|
|
|
// before doing the store.
|
|
|
|
bool provablyNonNull = isProvablyNonNull(srcAddr);
|
|
|
|
if (!provablyNonNull) {
|
|
|
|
llvm::BasicBlock *writebackBB = CGF.createBasicBlock("icr.writeback");
|
|
|
|
contBB = CGF.createBasicBlock("icr.done");
|
|
|
|
|
|
|
|
llvm::Value *isNull = CGF.Builder.CreateIsNull(srcAddr, "icr.isnull");
|
|
|
|
CGF.Builder.CreateCondBr(isNull, contBB, writebackBB);
|
|
|
|
CGF.EmitBlock(writebackBB);
|
|
|
|
}
|
|
|
|
|
|
|
|
// Load the value to writeback.
|
|
|
|
llvm::Value *value = CGF.Builder.CreateLoad(writeback.Temporary);
|
|
|
|
|
|
|
|
// Cast it back, in case we're writing an id to a Foo* or something.
|
|
|
|
value = CGF.Builder.CreateBitCast(value,
|
|
|
|
cast<llvm::PointerType>(srcAddr->getType())->getElementType(),
|
|
|
|
"icr.writeback-cast");
|
|
|
|
|
|
|
|
// Perform the writeback.
|
|
|
|
QualType srcAddrType = writeback.AddressType;
|
|
|
|
CGF.EmitStoreThroughLValue(RValue::get(value),
|
2011-06-25 10:11:03 +08:00
|
|
|
CGF.MakeAddrLValue(srcAddr, srcAddrType));
|
2011-06-16 07:02:42 +08:00
|
|
|
|
|
|
|
// Jump to the continuation block.
|
|
|
|
if (!provablyNonNull)
|
|
|
|
CGF.EmitBlock(contBB);
|
|
|
|
}
|
|
|
|
|
|
|
|
static void emitWritebacks(CodeGenFunction &CGF,
|
|
|
|
const CallArgList &args) {
|
|
|
|
for (CallArgList::writeback_iterator
|
|
|
|
i = args.writeback_begin(), e = args.writeback_end(); i != e; ++i)
|
|
|
|
emitWriteback(CGF, *i);
|
|
|
|
}
|
|
|
|
|
|
|
|
/// Emit an argument that's being passed call-by-writeback. That is,
|
|
|
|
/// we are passing the address of
|
|
|
|
static void emitWritebackArg(CodeGenFunction &CGF, CallArgList &args,
|
|
|
|
const ObjCIndirectCopyRestoreExpr *CRE) {
|
|
|
|
llvm::Value *srcAddr = CGF.EmitScalarExpr(CRE->getSubExpr());
|
|
|
|
|
|
|
|
// The dest and src types don't necessarily match in LLVM terms
|
|
|
|
// because of the crazy ObjC compatibility rules.
|
|
|
|
|
2011-07-18 12:24:23 +08:00
|
|
|
llvm::PointerType *destType =
|
2011-06-16 07:02:42 +08:00
|
|
|
cast<llvm::PointerType>(CGF.ConvertType(CRE->getType()));
|
|
|
|
|
|
|
|
// If the address is a constant null, just pass the appropriate null.
|
|
|
|
if (isProvablyNull(srcAddr)) {
|
|
|
|
args.add(RValue::get(llvm::ConstantPointerNull::get(destType)),
|
|
|
|
CRE->getType());
|
|
|
|
return;
|
|
|
|
}
|
|
|
|
|
|
|
|
QualType srcAddrType =
|
|
|
|
CRE->getSubExpr()->getType()->castAs<PointerType>()->getPointeeType();
|
|
|
|
|
|
|
|
// Create the temporary.
|
|
|
|
llvm::Value *temp = CGF.CreateTempAlloca(destType->getElementType(),
|
|
|
|
"icr.temp");
|
|
|
|
|
|
|
|
// Zero-initialize it if we're not doing a copy-initialization.
|
|
|
|
bool shouldCopy = CRE->shouldCopy();
|
|
|
|
if (!shouldCopy) {
|
|
|
|
llvm::Value *null =
|
|
|
|
llvm::ConstantPointerNull::get(
|
|
|
|
cast<llvm::PointerType>(destType->getElementType()));
|
|
|
|
CGF.Builder.CreateStore(null, temp);
|
|
|
|
}
|
|
|
|
|
|
|
|
llvm::BasicBlock *contBB = 0;
|
|
|
|
|
|
|
|
// If the address is *not* known to be non-null, we need to switch.
|
|
|
|
llvm::Value *finalArgument;
|
|
|
|
|
|
|
|
bool provablyNonNull = isProvablyNonNull(srcAddr);
|
|
|
|
if (provablyNonNull) {
|
|
|
|
finalArgument = temp;
|
|
|
|
} else {
|
|
|
|
llvm::Value *isNull = CGF.Builder.CreateIsNull(srcAddr, "icr.isnull");
|
|
|
|
|
|
|
|
finalArgument = CGF.Builder.CreateSelect(isNull,
|
|
|
|
llvm::ConstantPointerNull::get(destType),
|
|
|
|
temp, "icr.argument");
|
|
|
|
|
|
|
|
// If we need to copy, then the load has to be conditional, which
|
|
|
|
// means we need control flow.
|
|
|
|
if (shouldCopy) {
|
|
|
|
contBB = CGF.createBasicBlock("icr.cont");
|
|
|
|
llvm::BasicBlock *copyBB = CGF.createBasicBlock("icr.copy");
|
|
|
|
CGF.Builder.CreateCondBr(isNull, contBB, copyBB);
|
|
|
|
CGF.EmitBlock(copyBB);
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
// Perform a copy if necessary.
|
|
|
|
if (shouldCopy) {
|
|
|
|
LValue srcLV = CGF.MakeAddrLValue(srcAddr, srcAddrType);
|
2011-06-25 10:11:03 +08:00
|
|
|
RValue srcRV = CGF.EmitLoadOfLValue(srcLV);
|
2011-06-16 07:02:42 +08:00
|
|
|
assert(srcRV.isScalar());
|
|
|
|
|
|
|
|
llvm::Value *src = srcRV.getScalarVal();
|
|
|
|
src = CGF.Builder.CreateBitCast(src, destType->getElementType(),
|
|
|
|
"icr.cast");
|
|
|
|
|
|
|
|
// Use an ordinary store, not a store-to-lvalue.
|
|
|
|
CGF.Builder.CreateStore(src, temp);
|
|
|
|
}
|
|
|
|
|
|
|
|
// Finish the control flow if we needed it.
|
|
|
|
if (shouldCopy && !provablyNonNull)
|
|
|
|
CGF.EmitBlock(contBB);
|
|
|
|
|
|
|
|
args.addWriteback(srcAddr, srcAddrType, temp);
|
|
|
|
args.add(RValue::get(finalArgument), CRE->getType());
|
|
|
|
}
|
|
|
|
|
2011-03-12 04:59:21 +08:00
|
|
|
void CodeGenFunction::EmitCallArg(CallArgList &args, const Expr *E,
|
|
|
|
QualType type) {
|
2011-06-16 07:02:42 +08:00
|
|
|
if (const ObjCIndirectCopyRestoreExpr *CRE
|
|
|
|
= dyn_cast<ObjCIndirectCopyRestoreExpr>(E)) {
|
|
|
|
assert(getContext().getLangOptions().ObjCAutoRefCount);
|
|
|
|
assert(getContext().hasSameType(E->getType(), type));
|
|
|
|
return emitWritebackArg(*this, args, CRE);
|
|
|
|
}
|
|
|
|
|
2011-08-27 02:42:59 +08:00
|
|
|
assert(type->isReferenceType() == E->isGLValue() &&
|
|
|
|
"reference binding to unmaterialized r-value!");
|
|
|
|
|
2011-08-27 05:08:13 +08:00
|
|
|
if (E->isGLValue()) {
|
|
|
|
assert(E->getObjectKind() == OK_Ordinary);
|
2011-03-12 04:59:21 +08:00
|
|
|
return args.add(EmitReferenceBindingToExpr(E, /*InitializedDecl=*/0),
|
|
|
|
type);
|
2011-08-27 05:08:13 +08:00
|
|
|
}
|
2009-09-09 23:08:12 +08:00
|
|
|
|
2011-06-16 02:26:32 +08:00
|
|
|
if (hasAggregateLLVMType(type) && !E->getType()->isAnyComplexType() &&
|
|
|
|
isa<ImplicitCastExpr>(E) &&
|
2011-05-26 08:10:27 +08:00
|
|
|
cast<CastExpr>(E)->getCastKind() == CK_LValueToRValue) {
|
|
|
|
LValue L = EmitLValue(cast<CastExpr>(E)->getSubExpr());
|
|
|
|
assert(L.isSimple());
|
2011-12-03 11:08:40 +08:00
|
|
|
args.add(L.asAggregateRValue(), type, /*NeedsCopy*/true);
|
2011-05-26 08:10:27 +08:00
|
|
|
return;
|
|
|
|
}
|
|
|
|
|
2011-03-12 04:59:21 +08:00
|
|
|
args.add(EmitAnyExprToTemp(E), type);
|
2009-04-09 04:47:54 +08:00
|
|
|
}
|
|
|
|
|
2012-02-16 08:57:37 +08:00
|
|
|
// In ObjC ARC mode with no ObjC ARC exception safety, tell the ARC
|
|
|
|
// optimizer it can aggressively ignore unwind edges.
|
|
|
|
void
|
|
|
|
CodeGenFunction::AddObjCARCExceptionMetadata(llvm::Instruction *Inst) {
|
|
|
|
if (CGM.getCodeGenOpts().OptimizationLevel != 0 &&
|
|
|
|
!CGM.getCodeGenOpts().ObjCAutoRefCountExceptions)
|
|
|
|
Inst->setMetadata("clang.arc.no_objc_arc_exceptions",
|
|
|
|
CGM.getNoObjCARCExceptionsMetadata());
|
|
|
|
}
|
|
|
|
|
2010-07-06 09:34:17 +08:00
|
|
|
/// Emits a call or invoke instruction to the given function, depending
|
|
|
|
/// on the current state of the EH stack.
|
|
|
|
llvm::CallSite
|
|
|
|
CodeGenFunction::EmitCallOrInvoke(llvm::Value *Callee,
|
2011-07-24 01:14:25 +08:00
|
|
|
ArrayRef<llvm::Value *> Args,
|
2011-07-23 18:55:15 +08:00
|
|
|
const Twine &Name) {
|
2010-07-06 09:34:17 +08:00
|
|
|
llvm::BasicBlock *InvokeDest = getInvokeDest();
|
2012-02-16 08:57:37 +08:00
|
|
|
|
|
|
|
llvm::Instruction *Inst;
|
2010-07-06 09:34:17 +08:00
|
|
|
if (!InvokeDest)
|
2012-02-16 08:57:37 +08:00
|
|
|
Inst = Builder.CreateCall(Callee, Args, Name);
|
|
|
|
else {
|
|
|
|
llvm::BasicBlock *ContBB = createBasicBlock("invoke.cont");
|
|
|
|
Inst = Builder.CreateInvoke(Callee, ContBB, InvokeDest, Args, Name);
|
|
|
|
EmitBlock(ContBB);
|
|
|
|
}
|
|
|
|
|
|
|
|
// In ObjC ARC mode with no ObjC ARC exception safety, tell the ARC
|
|
|
|
// optimizer it can aggressively ignore unwind edges.
|
|
|
|
if (CGM.getLangOptions().ObjCAutoRefCount)
|
|
|
|
AddObjCARCExceptionMetadata(Inst);
|
2010-07-06 09:34:17 +08:00
|
|
|
|
2012-02-16 08:57:37 +08:00
|
|
|
return Inst;
|
2010-07-06 09:34:17 +08:00
|
|
|
}
|
|
|
|
|
2011-07-15 16:37:34 +08:00
|
|
|
llvm::CallSite
|
|
|
|
CodeGenFunction::EmitCallOrInvoke(llvm::Value *Callee,
|
2011-07-23 18:55:15 +08:00
|
|
|
const Twine &Name) {
|
2011-07-24 01:14:25 +08:00
|
|
|
return EmitCallOrInvoke(Callee, ArrayRef<llvm::Value *>(), Name);
|
2011-07-15 16:37:34 +08:00
|
|
|
}
|
|
|
|
|
2011-07-12 12:46:18 +08:00
|
|
|
static void checkArgMatches(llvm::Value *Elt, unsigned &ArgNo,
|
|
|
|
llvm::FunctionType *FTy) {
|
|
|
|
if (ArgNo < FTy->getNumParams())
|
|
|
|
assert(Elt->getType() == FTy->getParamType(ArgNo));
|
|
|
|
else
|
|
|
|
assert(FTy->isVarArg());
|
|
|
|
++ArgNo;
|
|
|
|
}
|
|
|
|
|
2011-07-12 14:29:11 +08:00
|
|
|
void CodeGenFunction::ExpandTypeToArgs(QualType Ty, RValue RV,
|
2011-07-23 18:55:15 +08:00
|
|
|
SmallVector<llvm::Value*,16> &Args,
|
2011-07-12 14:29:11 +08:00
|
|
|
llvm::FunctionType *IRFuncTy) {
|
2011-08-03 13:58:22 +08:00
|
|
|
if (const ConstantArrayType *AT = getContext().getAsConstantArrayType(Ty)) {
|
|
|
|
unsigned NumElts = AT->getSize().getZExtValue();
|
|
|
|
QualType EltTy = AT->getElementType();
|
|
|
|
llvm::Value *Addr = RV.getAggregateAddr();
|
|
|
|
for (unsigned Elt = 0; Elt < NumElts; ++Elt) {
|
|
|
|
llvm::Value *EltAddr = Builder.CreateConstGEP2_32(Addr, 0, Elt);
|
|
|
|
LValue LV = MakeAddrLValue(EltAddr, EltTy);
|
|
|
|
RValue EltRV;
|
2011-11-15 10:46:03 +08:00
|
|
|
if (EltTy->isAnyComplexType())
|
|
|
|
// FIXME: Volatile?
|
|
|
|
EltRV = RValue::getComplex(LoadComplexFromAddr(LV.getAddress(), false));
|
|
|
|
else if (CodeGenFunction::hasAggregateLLVMType(EltTy))
|
2011-12-03 11:08:40 +08:00
|
|
|
EltRV = LV.asAggregateRValue();
|
2011-08-03 13:58:22 +08:00
|
|
|
else
|
|
|
|
EltRV = EmitLoadOfLValue(LV);
|
|
|
|
ExpandTypeToArgs(EltTy, EltRV, Args, IRFuncTy);
|
2011-07-12 14:29:11 +08:00
|
|
|
}
|
2011-08-03 13:58:22 +08:00
|
|
|
} else if (const RecordType *RT = Ty->getAsStructureType()) {
|
|
|
|
RecordDecl *RD = RT->getDecl();
|
|
|
|
assert(RV.isAggregate() && "Unexpected rvalue during struct expansion");
|
|
|
|
llvm::Value *Addr = RV.getAggregateAddr();
|
|
|
|
for (RecordDecl::field_iterator i = RD->field_begin(), e = RD->field_end();
|
|
|
|
i != e; ++i) {
|
|
|
|
FieldDecl *FD = *i;
|
|
|
|
QualType FT = FD->getType();
|
2011-07-12 14:29:11 +08:00
|
|
|
|
2011-08-03 13:58:22 +08:00
|
|
|
// FIXME: What are the right qualifiers here?
|
|
|
|
LValue LV = EmitLValueForField(Addr, FD, 0);
|
|
|
|
RValue FldRV;
|
2011-11-15 10:46:03 +08:00
|
|
|
if (FT->isAnyComplexType())
|
|
|
|
// FIXME: Volatile?
|
|
|
|
FldRV = RValue::getComplex(LoadComplexFromAddr(LV.getAddress(), false));
|
|
|
|
else if (CodeGenFunction::hasAggregateLLVMType(FT))
|
2011-12-03 11:08:40 +08:00
|
|
|
FldRV = LV.asAggregateRValue();
|
2011-08-03 13:58:22 +08:00
|
|
|
else
|
|
|
|
FldRV = EmitLoadOfLValue(LV);
|
|
|
|
ExpandTypeToArgs(FT, FldRV, Args, IRFuncTy);
|
|
|
|
}
|
2011-11-15 10:46:03 +08:00
|
|
|
} else if (Ty->isAnyComplexType()) {
|
2011-08-03 13:58:22 +08:00
|
|
|
ComplexPairTy CV = RV.getComplexVal();
|
|
|
|
Args.push_back(CV.first);
|
|
|
|
Args.push_back(CV.second);
|
|
|
|
} else {
|
2011-07-12 14:29:11 +08:00
|
|
|
assert(RV.isScalar() &&
|
|
|
|
"Unexpected non-scalar rvalue during struct expansion.");
|
|
|
|
|
|
|
|
// Insert a bitcast as needed.
|
|
|
|
llvm::Value *V = RV.getScalarVal();
|
|
|
|
if (Args.size() < IRFuncTy->getNumParams() &&
|
|
|
|
V->getType() != IRFuncTy->getParamType(Args.size()))
|
|
|
|
V = Builder.CreateBitCast(V, IRFuncTy->getParamType(Args.size()));
|
|
|
|
|
|
|
|
Args.push_back(V);
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
|
2009-02-03 06:03:45 +08:00
|
|
|
RValue CodeGenFunction::EmitCall(const CGFunctionInfo &CallInfo,
|
2009-09-09 23:08:12 +08:00
|
|
|
llvm::Value *Callee,
|
2009-12-25 03:25:24 +08:00
|
|
|
ReturnValueSlot ReturnValue,
|
2009-02-21 02:06:48 +08:00
|
|
|
const CallArgList &CallArgs,
|
2010-05-01 19:15:56 +08:00
|
|
|
const Decl *TargetDecl,
|
2010-05-02 21:41:58 +08:00
|
|
|
llvm::Instruction **callOrInvoke) {
|
2009-05-16 15:57:57 +08:00
|
|
|
// FIXME: We no longer need the types from CallArgs; lift up and simplify.
|
2011-07-23 18:55:15 +08:00
|
|
|
SmallVector<llvm::Value*, 16> Args;
|
2008-09-10 07:27:19 +08:00
|
|
|
|
|
|
|
// Handle struct-return functions by passing a pointer to the
|
|
|
|
// location that we would like to return into.
|
2009-02-03 05:43:58 +08:00
|
|
|
QualType RetTy = CallInfo.getReturnType();
|
2009-02-03 13:59:18 +08:00
|
|
|
const ABIArgInfo &RetAI = CallInfo.getReturnInfo();
|
2009-09-09 23:08:12 +08:00
|
|
|
|
2011-07-12 12:46:18 +08:00
|
|
|
// IRArgNo - Keep track of the argument number in the callee we're looking at.
|
|
|
|
unsigned IRArgNo = 0;
|
|
|
|
llvm::FunctionType *IRFuncTy =
|
|
|
|
cast<llvm::FunctionType>(
|
|
|
|
cast<llvm::PointerType>(Callee->getType())->getElementType());
|
2009-09-09 23:08:12 +08:00
|
|
|
|
2009-06-13 08:26:38 +08:00
|
|
|
// If the call returns a temporary with struct return, create a temporary
|
2009-12-25 04:40:36 +08:00
|
|
|
// alloca to hold the result, unless one is given to us.
|
2010-07-15 07:39:36 +08:00
|
|
|
if (CGM.ReturnTypeUsesSRet(CallInfo)) {
|
2009-12-25 04:40:36 +08:00
|
|
|
llvm::Value *Value = ReturnValue.getValue();
|
|
|
|
if (!Value)
|
2010-02-09 10:48:28 +08:00
|
|
|
Value = CreateMemTemp(RetTy);
|
2009-12-25 04:40:36 +08:00
|
|
|
Args.push_back(Value);
|
2011-07-12 12:46:18 +08:00
|
|
|
checkArgMatches(Value, IRArgNo, IRFuncTy);
|
2009-12-25 04:40:36 +08:00
|
|
|
}
|
2009-09-09 23:08:12 +08:00
|
|
|
|
2009-02-05 05:17:21 +08:00
|
|
|
assert(CallInfo.arg_size() == CallArgs.size() &&
|
|
|
|
"Mismatch between function signature & arguments.");
|
2009-02-03 13:59:18 +08:00
|
|
|
CGFunctionInfo::const_arg_iterator info_it = CallInfo.arg_begin();
|
2009-09-09 23:08:12 +08:00
|
|
|
for (CallArgList::const_iterator I = CallArgs.begin(), E = CallArgs.end();
|
2009-02-03 13:59:18 +08:00
|
|
|
I != E; ++I, ++info_it) {
|
|
|
|
const ABIArgInfo &ArgInfo = info_it->info;
|
2011-05-03 02:05:27 +08:00
|
|
|
RValue RV = I->RV;
|
2008-09-17 08:51:38 +08:00
|
|
|
|
2011-06-16 06:09:18 +08:00
|
|
|
unsigned TypeAlign =
|
2011-05-03 02:05:27 +08:00
|
|
|
getContext().getTypeAlignInChars(I->Ty).getQuantity();
|
2008-09-17 08:51:38 +08:00
|
|
|
switch (ArgInfo.getKind()) {
|
2010-08-21 10:24:36 +08:00
|
|
|
case ABIArgInfo::Indirect: {
|
2009-02-05 17:16:39 +08:00
|
|
|
if (RV.isScalar() || RV.isComplex()) {
|
|
|
|
// Make a temporary alloca to pass the argument.
|
2011-06-16 02:26:32 +08:00
|
|
|
llvm::AllocaInst *AI = CreateMemTemp(I->Ty);
|
|
|
|
if (ArgInfo.getIndirectAlign() > AI->getAlignment())
|
|
|
|
AI->setAlignment(ArgInfo.getIndirectAlign());
|
|
|
|
Args.push_back(AI);
|
2011-07-12 12:46:18 +08:00
|
|
|
|
2009-02-05 17:16:39 +08:00
|
|
|
if (RV.isScalar())
|
2010-08-21 10:24:36 +08:00
|
|
|
EmitStoreOfScalar(RV.getScalarVal(), Args.back(), false,
|
2011-06-16 06:09:18 +08:00
|
|
|
TypeAlign, I->Ty);
|
2009-02-05 17:16:39 +08:00
|
|
|
else
|
2009-09-09 23:08:12 +08:00
|
|
|
StoreComplexToAddr(RV.getComplexVal(), Args.back(), false);
|
2011-07-12 12:46:18 +08:00
|
|
|
|
|
|
|
// Validate argument match.
|
|
|
|
checkArgMatches(AI, IRArgNo, IRFuncTy);
|
2009-02-05 17:16:39 +08:00
|
|
|
} else {
|
2011-06-14 09:37:52 +08:00
|
|
|
// We want to avoid creating an unnecessary temporary+copy here;
|
|
|
|
// however, we need one in two cases:
|
|
|
|
// 1. If the argument is not byval, and we are required to copy the
|
|
|
|
// source. (This case doesn't occur on any common architecture.)
|
|
|
|
// 2. If the argument is byval, RV is not sufficiently aligned, and
|
|
|
|
// we cannot force it to be sufficiently aligned.
|
2011-06-16 06:09:18 +08:00
|
|
|
llvm::Value *Addr = RV.getAggregateAddr();
|
|
|
|
unsigned Align = ArgInfo.getIndirectAlign();
|
|
|
|
const llvm::TargetData *TD = &CGM.getTargetData();
|
|
|
|
if ((!ArgInfo.getIndirectByVal() && I->NeedsCopy) ||
|
|
|
|
(ArgInfo.getIndirectByVal() && TypeAlign < Align &&
|
|
|
|
llvm::getOrEnforceKnownAlignment(Addr, Align, TD) < Align)) {
|
2011-06-14 09:37:52 +08:00
|
|
|
// Create an aligned temporary, and copy to it.
|
2011-06-16 06:09:18 +08:00
|
|
|
llvm::AllocaInst *AI = CreateMemTemp(I->Ty);
|
|
|
|
if (Align > AI->getAlignment())
|
|
|
|
AI->setAlignment(Align);
|
2011-06-14 09:37:52 +08:00
|
|
|
Args.push_back(AI);
|
2011-06-16 06:09:18 +08:00
|
|
|
EmitAggregateCopy(AI, Addr, I->Ty, RV.isVolatileQualified());
|
2011-07-12 12:46:18 +08:00
|
|
|
|
|
|
|
// Validate argument match.
|
|
|
|
checkArgMatches(AI, IRArgNo, IRFuncTy);
|
2011-06-14 09:37:52 +08:00
|
|
|
} else {
|
|
|
|
// Skip the extra memcpy call.
|
2011-06-16 06:09:18 +08:00
|
|
|
Args.push_back(Addr);
|
2011-07-12 12:46:18 +08:00
|
|
|
|
|
|
|
// Validate argument match.
|
|
|
|
checkArgMatches(Addr, IRArgNo, IRFuncTy);
|
2011-06-14 09:37:52 +08:00
|
|
|
}
|
2009-02-05 17:16:39 +08:00
|
|
|
}
|
|
|
|
break;
|
2010-08-21 10:24:36 +08:00
|
|
|
}
|
2009-02-05 17:16:39 +08:00
|
|
|
|
2009-01-27 05:26:08 +08:00
|
|
|
case ABIArgInfo::Ignore:
|
|
|
|
break;
|
2010-10-19 14:39:39 +08:00
|
|
|
|
Kill off the 'coerce' ABI passing form. Now 'direct' and 'extend' always
have a "coerce to" type which often matches the default lowering of Clang
type to LLVM IR type, but the coerce case can be handled by making them
not be the same.
This simplifies things and fixes issues where X86-64 abi lowering would
return coerce after making preferred types exactly match up. This caused
us to compile:
typedef float v4f32 __attribute__((__vector_size__(16)));
v4f32 foo(v4f32 X) {
return X+X;
}
into this code at -O0:
define <4 x float> @foo(<4 x float> %X.coerce) nounwind {
entry:
%retval = alloca <4 x float>, align 16 ; <<4 x float>*> [#uses=2]
%coerce = alloca <4 x float>, align 16 ; <<4 x float>*> [#uses=2]
%X.addr = alloca <4 x float>, align 16 ; <<4 x float>*> [#uses=3]
store <4 x float> %X.coerce, <4 x float>* %coerce
%X = load <4 x float>* %coerce ; <<4 x float>> [#uses=1]
store <4 x float> %X, <4 x float>* %X.addr
%tmp = load <4 x float>* %X.addr ; <<4 x float>> [#uses=1]
%tmp1 = load <4 x float>* %X.addr ; <<4 x float>> [#uses=1]
%add = fadd <4 x float> %tmp, %tmp1 ; <<4 x float>> [#uses=1]
store <4 x float> %add, <4 x float>* %retval
%0 = load <4 x float>* %retval ; <<4 x float>> [#uses=1]
ret <4 x float> %0
}
Now we get:
define <4 x float> @foo(<4 x float> %X) nounwind {
entry:
%X.addr = alloca <4 x float>, align 16 ; <<4 x float>*> [#uses=3]
store <4 x float> %X, <4 x float>* %X.addr
%tmp = load <4 x float>* %X.addr ; <<4 x float>> [#uses=1]
%tmp1 = load <4 x float>* %X.addr ; <<4 x float>> [#uses=1]
%add = fadd <4 x float> %tmp, %tmp1 ; <<4 x float>> [#uses=1]
ret <4 x float> %add
}
This implements rdar://8248065
llvm-svn: 109733
2010-07-29 14:26:06 +08:00
|
|
|
case ABIArgInfo::Extend:
|
|
|
|
case ABIArgInfo::Direct: {
|
2012-01-07 08:25:33 +08:00
|
|
|
// Insert a padding argument to ensure proper alignment.
|
|
|
|
if (llvm::Type *PaddingType = ArgInfo.getPaddingType()) {
|
|
|
|
Args.push_back(llvm::UndefValue::get(PaddingType));
|
|
|
|
++IRArgNo;
|
|
|
|
}
|
|
|
|
|
Kill off the 'coerce' ABI passing form. Now 'direct' and 'extend' always
have a "coerce to" type which often matches the default lowering of Clang
type to LLVM IR type, but the coerce case can be handled by making them
not be the same.
This simplifies things and fixes issues where X86-64 abi lowering would
return coerce after making preferred types exactly match up. This caused
us to compile:
typedef float v4f32 __attribute__((__vector_size__(16)));
v4f32 foo(v4f32 X) {
return X+X;
}
into this code at -O0:
define <4 x float> @foo(<4 x float> %X.coerce) nounwind {
entry:
%retval = alloca <4 x float>, align 16 ; <<4 x float>*> [#uses=2]
%coerce = alloca <4 x float>, align 16 ; <<4 x float>*> [#uses=2]
%X.addr = alloca <4 x float>, align 16 ; <<4 x float>*> [#uses=3]
store <4 x float> %X.coerce, <4 x float>* %coerce
%X = load <4 x float>* %coerce ; <<4 x float>> [#uses=1]
store <4 x float> %X, <4 x float>* %X.addr
%tmp = load <4 x float>* %X.addr ; <<4 x float>> [#uses=1]
%tmp1 = load <4 x float>* %X.addr ; <<4 x float>> [#uses=1]
%add = fadd <4 x float> %tmp, %tmp1 ; <<4 x float>> [#uses=1]
store <4 x float> %add, <4 x float>* %retval
%0 = load <4 x float>* %retval ; <<4 x float>> [#uses=1]
ret <4 x float> %0
}
Now we get:
define <4 x float> @foo(<4 x float> %X) nounwind {
entry:
%X.addr = alloca <4 x float>, align 16 ; <<4 x float>*> [#uses=3]
store <4 x float> %X, <4 x float>* %X.addr
%tmp = load <4 x float>* %X.addr ; <<4 x float>> [#uses=1]
%tmp1 = load <4 x float>* %X.addr ; <<4 x float>> [#uses=1]
%add = fadd <4 x float> %tmp, %tmp1 ; <<4 x float>> [#uses=1]
ret <4 x float> %add
}
This implements rdar://8248065
llvm-svn: 109733
2010-07-29 14:26:06 +08:00
|
|
|
if (!isa<llvm::StructType>(ArgInfo.getCoerceToType()) &&
|
2010-07-30 12:02:24 +08:00
|
|
|
ArgInfo.getCoerceToType() == ConvertType(info_it->type) &&
|
|
|
|
ArgInfo.getDirectOffset() == 0) {
|
2011-07-12 12:46:18 +08:00
|
|
|
llvm::Value *V;
|
Kill off the 'coerce' ABI passing form. Now 'direct' and 'extend' always
have a "coerce to" type which often matches the default lowering of Clang
type to LLVM IR type, but the coerce case can be handled by making them
not be the same.
This simplifies things and fixes issues where X86-64 abi lowering would
return coerce after making preferred types exactly match up. This caused
us to compile:
typedef float v4f32 __attribute__((__vector_size__(16)));
v4f32 foo(v4f32 X) {
return X+X;
}
into this code at -O0:
define <4 x float> @foo(<4 x float> %X.coerce) nounwind {
entry:
%retval = alloca <4 x float>, align 16 ; <<4 x float>*> [#uses=2]
%coerce = alloca <4 x float>, align 16 ; <<4 x float>*> [#uses=2]
%X.addr = alloca <4 x float>, align 16 ; <<4 x float>*> [#uses=3]
store <4 x float> %X.coerce, <4 x float>* %coerce
%X = load <4 x float>* %coerce ; <<4 x float>> [#uses=1]
store <4 x float> %X, <4 x float>* %X.addr
%tmp = load <4 x float>* %X.addr ; <<4 x float>> [#uses=1]
%tmp1 = load <4 x float>* %X.addr ; <<4 x float>> [#uses=1]
%add = fadd <4 x float> %tmp, %tmp1 ; <<4 x float>> [#uses=1]
store <4 x float> %add, <4 x float>* %retval
%0 = load <4 x float>* %retval ; <<4 x float>> [#uses=1]
ret <4 x float> %0
}
Now we get:
define <4 x float> @foo(<4 x float> %X) nounwind {
entry:
%X.addr = alloca <4 x float>, align 16 ; <<4 x float>*> [#uses=3]
store <4 x float> %X, <4 x float>* %X.addr
%tmp = load <4 x float>* %X.addr ; <<4 x float>> [#uses=1]
%tmp1 = load <4 x float>* %X.addr ; <<4 x float>> [#uses=1]
%add = fadd <4 x float> %tmp, %tmp1 ; <<4 x float>> [#uses=1]
ret <4 x float> %add
}
This implements rdar://8248065
llvm-svn: 109733
2010-07-29 14:26:06 +08:00
|
|
|
if (RV.isScalar())
|
2011-07-12 12:46:18 +08:00
|
|
|
V = RV.getScalarVal();
|
Kill off the 'coerce' ABI passing form. Now 'direct' and 'extend' always
have a "coerce to" type which often matches the default lowering of Clang
type to LLVM IR type, but the coerce case can be handled by making them
not be the same.
This simplifies things and fixes issues where X86-64 abi lowering would
return coerce after making preferred types exactly match up. This caused
us to compile:
typedef float v4f32 __attribute__((__vector_size__(16)));
v4f32 foo(v4f32 X) {
return X+X;
}
into this code at -O0:
define <4 x float> @foo(<4 x float> %X.coerce) nounwind {
entry:
%retval = alloca <4 x float>, align 16 ; <<4 x float>*> [#uses=2]
%coerce = alloca <4 x float>, align 16 ; <<4 x float>*> [#uses=2]
%X.addr = alloca <4 x float>, align 16 ; <<4 x float>*> [#uses=3]
store <4 x float> %X.coerce, <4 x float>* %coerce
%X = load <4 x float>* %coerce ; <<4 x float>> [#uses=1]
store <4 x float> %X, <4 x float>* %X.addr
%tmp = load <4 x float>* %X.addr ; <<4 x float>> [#uses=1]
%tmp1 = load <4 x float>* %X.addr ; <<4 x float>> [#uses=1]
%add = fadd <4 x float> %tmp, %tmp1 ; <<4 x float>> [#uses=1]
store <4 x float> %add, <4 x float>* %retval
%0 = load <4 x float>* %retval ; <<4 x float>> [#uses=1]
ret <4 x float> %0
}
Now we get:
define <4 x float> @foo(<4 x float> %X) nounwind {
entry:
%X.addr = alloca <4 x float>, align 16 ; <<4 x float>*> [#uses=3]
store <4 x float> %X, <4 x float>* %X.addr
%tmp = load <4 x float>* %X.addr ; <<4 x float>> [#uses=1]
%tmp1 = load <4 x float>* %X.addr ; <<4 x float>> [#uses=1]
%add = fadd <4 x float> %tmp, %tmp1 ; <<4 x float>> [#uses=1]
ret <4 x float> %add
}
This implements rdar://8248065
llvm-svn: 109733
2010-07-29 14:26:06 +08:00
|
|
|
else
|
2011-07-12 12:46:18 +08:00
|
|
|
V = Builder.CreateLoad(RV.getAggregateAddr());
|
|
|
|
|
2011-07-12 12:53:39 +08:00
|
|
|
// If the argument doesn't match, perform a bitcast to coerce it. This
|
|
|
|
// can happen due to trivial type mismatches.
|
|
|
|
if (IRArgNo < IRFuncTy->getNumParams() &&
|
|
|
|
V->getType() != IRFuncTy->getParamType(IRArgNo))
|
|
|
|
V = Builder.CreateBitCast(V, IRFuncTy->getParamType(IRArgNo));
|
2011-07-12 12:46:18 +08:00
|
|
|
Args.push_back(V);
|
|
|
|
|
|
|
|
checkArgMatches(V, IRArgNo, IRFuncTy);
|
Kill off the 'coerce' ABI passing form. Now 'direct' and 'extend' always
have a "coerce to" type which often matches the default lowering of Clang
type to LLVM IR type, but the coerce case can be handled by making them
not be the same.
This simplifies things and fixes issues where X86-64 abi lowering would
return coerce after making preferred types exactly match up. This caused
us to compile:
typedef float v4f32 __attribute__((__vector_size__(16)));
v4f32 foo(v4f32 X) {
return X+X;
}
into this code at -O0:
define <4 x float> @foo(<4 x float> %X.coerce) nounwind {
entry:
%retval = alloca <4 x float>, align 16 ; <<4 x float>*> [#uses=2]
%coerce = alloca <4 x float>, align 16 ; <<4 x float>*> [#uses=2]
%X.addr = alloca <4 x float>, align 16 ; <<4 x float>*> [#uses=3]
store <4 x float> %X.coerce, <4 x float>* %coerce
%X = load <4 x float>* %coerce ; <<4 x float>> [#uses=1]
store <4 x float> %X, <4 x float>* %X.addr
%tmp = load <4 x float>* %X.addr ; <<4 x float>> [#uses=1]
%tmp1 = load <4 x float>* %X.addr ; <<4 x float>> [#uses=1]
%add = fadd <4 x float> %tmp, %tmp1 ; <<4 x float>> [#uses=1]
store <4 x float> %add, <4 x float>* %retval
%0 = load <4 x float>* %retval ; <<4 x float>> [#uses=1]
ret <4 x float> %0
}
Now we get:
define <4 x float> @foo(<4 x float> %X) nounwind {
entry:
%X.addr = alloca <4 x float>, align 16 ; <<4 x float>*> [#uses=3]
store <4 x float> %X, <4 x float>* %X.addr
%tmp = load <4 x float>* %X.addr ; <<4 x float>> [#uses=1]
%tmp1 = load <4 x float>* %X.addr ; <<4 x float>> [#uses=1]
%add = fadd <4 x float> %tmp, %tmp1 ; <<4 x float>> [#uses=1]
ret <4 x float> %add
}
This implements rdar://8248065
llvm-svn: 109733
2010-07-29 14:26:06 +08:00
|
|
|
break;
|
|
|
|
}
|
2009-01-27 05:26:08 +08:00
|
|
|
|
2009-02-04 03:12:28 +08:00
|
|
|
// FIXME: Avoid the conversion through memory if possible.
|
|
|
|
llvm::Value *SrcPtr;
|
|
|
|
if (RV.isScalar()) {
|
2011-05-03 02:05:27 +08:00
|
|
|
SrcPtr = CreateMemTemp(I->Ty, "coerce");
|
2011-06-16 06:09:18 +08:00
|
|
|
EmitStoreOfScalar(RV.getScalarVal(), SrcPtr, false, TypeAlign, I->Ty);
|
2009-02-04 03:12:28 +08:00
|
|
|
} else if (RV.isComplex()) {
|
2011-05-03 02:05:27 +08:00
|
|
|
SrcPtr = CreateMemTemp(I->Ty, "coerce");
|
2009-02-04 03:12:28 +08:00
|
|
|
StoreComplexToAddr(RV.getComplexVal(), SrcPtr, false);
|
2009-09-09 23:08:12 +08:00
|
|
|
} else
|
2009-02-04 03:12:28 +08:00
|
|
|
SrcPtr = RV.getAggregateAddr();
|
2010-10-19 14:39:39 +08:00
|
|
|
|
2010-07-30 12:02:24 +08:00
|
|
|
// If the value is offset in memory, apply the offset now.
|
|
|
|
if (unsigned Offs = ArgInfo.getDirectOffset()) {
|
|
|
|
SrcPtr = Builder.CreateBitCast(SrcPtr, Builder.getInt8PtrTy());
|
|
|
|
SrcPtr = Builder.CreateConstGEP1_32(SrcPtr, Offs);
|
2010-10-19 14:39:39 +08:00
|
|
|
SrcPtr = Builder.CreateBitCast(SrcPtr,
|
2010-07-30 12:02:24 +08:00
|
|
|
llvm::PointerType::getUnqual(ArgInfo.getCoerceToType()));
|
|
|
|
|
|
|
|
}
|
2010-10-19 14:39:39 +08:00
|
|
|
|
Change CGCall to handle the "coerce" case where the coerce-to type
is a FCA to pass each of the elements as individual scalars. This
produces code fast isel is less likely to reject and is easier on
the optimizers.
For example, before we would compile:
struct DeclGroup { long NumDecls; char * Y; };
char * foo(DeclGroup D) {
return D.NumDecls+D.Y;
}
to:
%struct.DeclGroup = type { i64, i64 }
define i64 @_Z3foo9DeclGroup(%struct.DeclGroup) nounwind {
entry:
%D = alloca %struct.DeclGroup, align 8 ; <%struct.DeclGroup*> [#uses=3]
store %struct.DeclGroup %0, %struct.DeclGroup* %D, align 1
%tmp = getelementptr inbounds %struct.DeclGroup* %D, i32 0, i32 0 ; <i64*> [#uses=1]
%tmp1 = load i64* %tmp ; <i64> [#uses=1]
%tmp2 = getelementptr inbounds %struct.DeclGroup* %D, i32 0, i32 1 ; <i64*> [#uses=1]
%tmp3 = load i64* %tmp2 ; <i64> [#uses=1]
%add = add nsw i64 %tmp1, %tmp3 ; <i64> [#uses=1]
ret i64 %add
}
Now we get:
%0 = type { i64, i64 }
%struct.DeclGroup = type { i64, i8* }
define i8* @_Z3foo9DeclGroup(i64, i64) nounwind {
entry:
%D = alloca %struct.DeclGroup, align 8 ; <%struct.DeclGroup*> [#uses=3]
%2 = insertvalue %0 undef, i64 %0, 0 ; <%0> [#uses=1]
%3 = insertvalue %0 %2, i64 %1, 1 ; <%0> [#uses=1]
%4 = bitcast %struct.DeclGroup* %D to %0* ; <%0*> [#uses=1]
store %0 %3, %0* %4, align 1
%tmp = getelementptr inbounds %struct.DeclGroup* %D, i32 0, i32 0 ; <i64*> [#uses=1]
%tmp1 = load i64* %tmp ; <i64> [#uses=1]
%tmp2 = getelementptr inbounds %struct.DeclGroup* %D, i32 0, i32 1 ; <i8**> [#uses=1]
%tmp3 = load i8** %tmp2 ; <i8*> [#uses=1]
%add.ptr = getelementptr inbounds i8* %tmp3, i64 %tmp1 ; <i8*> [#uses=1]
ret i8* %add.ptr
}
Elimination of the FCA inside the function is still-to-come.
llvm-svn: 107099
2010-06-29 07:44:11 +08:00
|
|
|
// If the coerce-to type is a first class aggregate, we flatten it and
|
|
|
|
// pass the elements. Either way is semantically identical, but fast-isel
|
|
|
|
// and the optimizer generally likes scalar values better than FCAs.
|
2011-07-18 12:24:23 +08:00
|
|
|
if (llvm::StructType *STy =
|
make the argument passing stuff in the FCA case smarter still, by
avoiding making the FCA at all when the types exactly line up. For
example, before we made:
%struct.DeclGroup = type { i64, i64 }
define i64 @_Z3foo9DeclGroup(i64, i64) nounwind {
entry:
%D = alloca %struct.DeclGroup, align 8 ; <%struct.DeclGroup*> [#uses=3]
%2 = insertvalue %struct.DeclGroup undef, i64 %0, 0 ; <%struct.DeclGroup> [#uses=1]
%3 = insertvalue %struct.DeclGroup %2, i64 %1, 1 ; <%struct.DeclGroup> [#uses=1]
store %struct.DeclGroup %3, %struct.DeclGroup* %D
%tmp = getelementptr inbounds %struct.DeclGroup* %D, i32 0, i32 0 ; <i64*> [#uses=1]
%tmp1 = load i64* %tmp ; <i64> [#uses=1]
%tmp2 = getelementptr inbounds %struct.DeclGroup* %D, i32 0, i32 1 ; <i64*> [#uses=1]
%tmp3 = load i64* %tmp2 ; <i64> [#uses=1]
%add = add nsw i64 %tmp1, %tmp3 ; <i64> [#uses=1]
ret i64 %add
}
... which has the pointless insertvalue, which fastisel hates, now we
make:
%struct.DeclGroup = type { i64, i64 }
define i64 @_Z3foo9DeclGroup(i64, i64) nounwind {
entry:
%D = alloca %struct.DeclGroup, align 8 ; <%struct.DeclGroup*> [#uses=4]
%2 = getelementptr %struct.DeclGroup* %D, i32 0, i32 0 ; <i64*> [#uses=1]
store i64 %0, i64* %2
%3 = getelementptr %struct.DeclGroup* %D, i32 0, i32 1 ; <i64*> [#uses=1]
store i64 %1, i64* %3
%tmp = getelementptr inbounds %struct.DeclGroup* %D, i32 0, i32 0 ; <i64*> [#uses=1]
%tmp1 = load i64* %tmp ; <i64> [#uses=1]
%tmp2 = getelementptr inbounds %struct.DeclGroup* %D, i32 0, i32 1 ; <i64*> [#uses=1]
%tmp3 = load i64* %tmp2 ; <i64> [#uses=1]
%add = add nsw i64 %tmp1, %tmp3 ; <i64> [#uses=1]
ret i64 %add
}
This only kicks in when x86-64 abi lowering decides it likes us.
llvm-svn: 107104
2010-06-29 08:06:42 +08:00
|
|
|
dyn_cast<llvm::StructType>(ArgInfo.getCoerceToType())) {
|
2010-07-06 04:41:41 +08:00
|
|
|
SrcPtr = Builder.CreateBitCast(SrcPtr,
|
|
|
|
llvm::PointerType::getUnqual(STy));
|
|
|
|
for (unsigned i = 0, e = STy->getNumElements(); i != e; ++i) {
|
|
|
|
llvm::Value *EltPtr = Builder.CreateConstGEP2_32(SrcPtr, 0, i);
|
2010-07-29 02:24:28 +08:00
|
|
|
llvm::LoadInst *LI = Builder.CreateLoad(EltPtr);
|
|
|
|
// We don't know what we're loading from.
|
|
|
|
LI->setAlignment(1);
|
|
|
|
Args.push_back(LI);
|
2011-07-12 12:46:18 +08:00
|
|
|
|
|
|
|
// Validate argument match.
|
|
|
|
checkArgMatches(LI, IRArgNo, IRFuncTy);
|
make the argument passing stuff in the FCA case smarter still, by
avoiding making the FCA at all when the types exactly line up. For
example, before we made:
%struct.DeclGroup = type { i64, i64 }
define i64 @_Z3foo9DeclGroup(i64, i64) nounwind {
entry:
%D = alloca %struct.DeclGroup, align 8 ; <%struct.DeclGroup*> [#uses=3]
%2 = insertvalue %struct.DeclGroup undef, i64 %0, 0 ; <%struct.DeclGroup> [#uses=1]
%3 = insertvalue %struct.DeclGroup %2, i64 %1, 1 ; <%struct.DeclGroup> [#uses=1]
store %struct.DeclGroup %3, %struct.DeclGroup* %D
%tmp = getelementptr inbounds %struct.DeclGroup* %D, i32 0, i32 0 ; <i64*> [#uses=1]
%tmp1 = load i64* %tmp ; <i64> [#uses=1]
%tmp2 = getelementptr inbounds %struct.DeclGroup* %D, i32 0, i32 1 ; <i64*> [#uses=1]
%tmp3 = load i64* %tmp2 ; <i64> [#uses=1]
%add = add nsw i64 %tmp1, %tmp3 ; <i64> [#uses=1]
ret i64 %add
}
... which has the pointless insertvalue, which fastisel hates, now we
make:
%struct.DeclGroup = type { i64, i64 }
define i64 @_Z3foo9DeclGroup(i64, i64) nounwind {
entry:
%D = alloca %struct.DeclGroup, align 8 ; <%struct.DeclGroup*> [#uses=4]
%2 = getelementptr %struct.DeclGroup* %D, i32 0, i32 0 ; <i64*> [#uses=1]
store i64 %0, i64* %2
%3 = getelementptr %struct.DeclGroup* %D, i32 0, i32 1 ; <i64*> [#uses=1]
store i64 %1, i64* %3
%tmp = getelementptr inbounds %struct.DeclGroup* %D, i32 0, i32 0 ; <i64*> [#uses=1]
%tmp1 = load i64* %tmp ; <i64> [#uses=1]
%tmp2 = getelementptr inbounds %struct.DeclGroup* %D, i32 0, i32 1 ; <i64*> [#uses=1]
%tmp3 = load i64* %tmp2 ; <i64> [#uses=1]
%add = add nsw i64 %tmp1, %tmp3 ; <i64> [#uses=1]
ret i64 %add
}
This only kicks in when x86-64 abi lowering decides it likes us.
llvm-svn: 107104
2010-06-29 08:06:42 +08:00
|
|
|
}
|
Change CGCall to handle the "coerce" case where the coerce-to type
is a FCA to pass each of the elements as individual scalars. This
produces code fast isel is less likely to reject and is easier on
the optimizers.
For example, before we would compile:
struct DeclGroup { long NumDecls; char * Y; };
char * foo(DeclGroup D) {
return D.NumDecls+D.Y;
}
to:
%struct.DeclGroup = type { i64, i64 }
define i64 @_Z3foo9DeclGroup(%struct.DeclGroup) nounwind {
entry:
%D = alloca %struct.DeclGroup, align 8 ; <%struct.DeclGroup*> [#uses=3]
store %struct.DeclGroup %0, %struct.DeclGroup* %D, align 1
%tmp = getelementptr inbounds %struct.DeclGroup* %D, i32 0, i32 0 ; <i64*> [#uses=1]
%tmp1 = load i64* %tmp ; <i64> [#uses=1]
%tmp2 = getelementptr inbounds %struct.DeclGroup* %D, i32 0, i32 1 ; <i64*> [#uses=1]
%tmp3 = load i64* %tmp2 ; <i64> [#uses=1]
%add = add nsw i64 %tmp1, %tmp3 ; <i64> [#uses=1]
ret i64 %add
}
Now we get:
%0 = type { i64, i64 }
%struct.DeclGroup = type { i64, i8* }
define i8* @_Z3foo9DeclGroup(i64, i64) nounwind {
entry:
%D = alloca %struct.DeclGroup, align 8 ; <%struct.DeclGroup*> [#uses=3]
%2 = insertvalue %0 undef, i64 %0, 0 ; <%0> [#uses=1]
%3 = insertvalue %0 %2, i64 %1, 1 ; <%0> [#uses=1]
%4 = bitcast %struct.DeclGroup* %D to %0* ; <%0*> [#uses=1]
store %0 %3, %0* %4, align 1
%tmp = getelementptr inbounds %struct.DeclGroup* %D, i32 0, i32 0 ; <i64*> [#uses=1]
%tmp1 = load i64* %tmp ; <i64> [#uses=1]
%tmp2 = getelementptr inbounds %struct.DeclGroup* %D, i32 0, i32 1 ; <i8**> [#uses=1]
%tmp3 = load i8** %tmp2 ; <i8*> [#uses=1]
%add.ptr = getelementptr inbounds i8* %tmp3, i64 %tmp1 ; <i8*> [#uses=1]
ret i8* %add.ptr
}
Elimination of the FCA inside the function is still-to-come.
llvm-svn: 107099
2010-06-29 07:44:11 +08:00
|
|
|
} else {
|
make the argument passing stuff in the FCA case smarter still, by
avoiding making the FCA at all when the types exactly line up. For
example, before we made:
%struct.DeclGroup = type { i64, i64 }
define i64 @_Z3foo9DeclGroup(i64, i64) nounwind {
entry:
%D = alloca %struct.DeclGroup, align 8 ; <%struct.DeclGroup*> [#uses=3]
%2 = insertvalue %struct.DeclGroup undef, i64 %0, 0 ; <%struct.DeclGroup> [#uses=1]
%3 = insertvalue %struct.DeclGroup %2, i64 %1, 1 ; <%struct.DeclGroup> [#uses=1]
store %struct.DeclGroup %3, %struct.DeclGroup* %D
%tmp = getelementptr inbounds %struct.DeclGroup* %D, i32 0, i32 0 ; <i64*> [#uses=1]
%tmp1 = load i64* %tmp ; <i64> [#uses=1]
%tmp2 = getelementptr inbounds %struct.DeclGroup* %D, i32 0, i32 1 ; <i64*> [#uses=1]
%tmp3 = load i64* %tmp2 ; <i64> [#uses=1]
%add = add nsw i64 %tmp1, %tmp3 ; <i64> [#uses=1]
ret i64 %add
}
... which has the pointless insertvalue, which fastisel hates, now we
make:
%struct.DeclGroup = type { i64, i64 }
define i64 @_Z3foo9DeclGroup(i64, i64) nounwind {
entry:
%D = alloca %struct.DeclGroup, align 8 ; <%struct.DeclGroup*> [#uses=4]
%2 = getelementptr %struct.DeclGroup* %D, i32 0, i32 0 ; <i64*> [#uses=1]
store i64 %0, i64* %2
%3 = getelementptr %struct.DeclGroup* %D, i32 0, i32 1 ; <i64*> [#uses=1]
store i64 %1, i64* %3
%tmp = getelementptr inbounds %struct.DeclGroup* %D, i32 0, i32 0 ; <i64*> [#uses=1]
%tmp1 = load i64* %tmp ; <i64> [#uses=1]
%tmp2 = getelementptr inbounds %struct.DeclGroup* %D, i32 0, i32 1 ; <i64*> [#uses=1]
%tmp3 = load i64* %tmp2 ; <i64> [#uses=1]
%add = add nsw i64 %tmp1, %tmp3 ; <i64> [#uses=1]
ret i64 %add
}
This only kicks in when x86-64 abi lowering decides it likes us.
llvm-svn: 107104
2010-06-29 08:06:42 +08:00
|
|
|
// In the simple case, just pass the coerced loaded value.
|
|
|
|
Args.push_back(CreateCoercedLoad(SrcPtr, ArgInfo.getCoerceToType(),
|
|
|
|
*this));
|
2011-07-12 12:46:18 +08:00
|
|
|
|
|
|
|
// Validate argument match.
|
|
|
|
checkArgMatches(Args.back(), IRArgNo, IRFuncTy);
|
Change CGCall to handle the "coerce" case where the coerce-to type
is a FCA to pass each of the elements as individual scalars. This
produces code fast isel is less likely to reject and is easier on
the optimizers.
For example, before we would compile:
struct DeclGroup { long NumDecls; char * Y; };
char * foo(DeclGroup D) {
return D.NumDecls+D.Y;
}
to:
%struct.DeclGroup = type { i64, i64 }
define i64 @_Z3foo9DeclGroup(%struct.DeclGroup) nounwind {
entry:
%D = alloca %struct.DeclGroup, align 8 ; <%struct.DeclGroup*> [#uses=3]
store %struct.DeclGroup %0, %struct.DeclGroup* %D, align 1
%tmp = getelementptr inbounds %struct.DeclGroup* %D, i32 0, i32 0 ; <i64*> [#uses=1]
%tmp1 = load i64* %tmp ; <i64> [#uses=1]
%tmp2 = getelementptr inbounds %struct.DeclGroup* %D, i32 0, i32 1 ; <i64*> [#uses=1]
%tmp3 = load i64* %tmp2 ; <i64> [#uses=1]
%add = add nsw i64 %tmp1, %tmp3 ; <i64> [#uses=1]
ret i64 %add
}
Now we get:
%0 = type { i64, i64 }
%struct.DeclGroup = type { i64, i8* }
define i8* @_Z3foo9DeclGroup(i64, i64) nounwind {
entry:
%D = alloca %struct.DeclGroup, align 8 ; <%struct.DeclGroup*> [#uses=3]
%2 = insertvalue %0 undef, i64 %0, 0 ; <%0> [#uses=1]
%3 = insertvalue %0 %2, i64 %1, 1 ; <%0> [#uses=1]
%4 = bitcast %struct.DeclGroup* %D to %0* ; <%0*> [#uses=1]
store %0 %3, %0* %4, align 1
%tmp = getelementptr inbounds %struct.DeclGroup* %D, i32 0, i32 0 ; <i64*> [#uses=1]
%tmp1 = load i64* %tmp ; <i64> [#uses=1]
%tmp2 = getelementptr inbounds %struct.DeclGroup* %D, i32 0, i32 1 ; <i8**> [#uses=1]
%tmp3 = load i8** %tmp2 ; <i8*> [#uses=1]
%add.ptr = getelementptr inbounds i8* %tmp3, i64 %tmp1 ; <i8*> [#uses=1]
ret i8* %add.ptr
}
Elimination of the FCA inside the function is still-to-come.
llvm-svn: 107099
2010-06-29 07:44:11 +08:00
|
|
|
}
|
2010-10-19 14:39:39 +08:00
|
|
|
|
2009-02-04 03:12:28 +08:00
|
|
|
break;
|
|
|
|
}
|
|
|
|
|
2008-09-17 08:51:38 +08:00
|
|
|
case ABIArgInfo::Expand:
|
2011-07-12 14:29:11 +08:00
|
|
|
ExpandTypeToArgs(I->Ty, RV, Args, IRFuncTy);
|
2011-07-12 12:46:18 +08:00
|
|
|
IRArgNo = Args.size();
|
2008-09-17 08:51:38 +08:00
|
|
|
break;
|
2008-09-10 07:27:19 +08:00
|
|
|
}
|
|
|
|
}
|
2009-09-09 23:08:12 +08:00
|
|
|
|
2009-06-13 08:26:38 +08:00
|
|
|
// If the callee is a bitcast of a function to a varargs pointer to function
|
|
|
|
// type, check to see if we can remove the bitcast. This handles some cases
|
|
|
|
// with unprototyped functions.
|
|
|
|
if (llvm::ConstantExpr *CE = dyn_cast<llvm::ConstantExpr>(Callee))
|
|
|
|
if (llvm::Function *CalleeF = dyn_cast<llvm::Function>(CE->getOperand(0))) {
|
2011-07-18 12:24:23 +08:00
|
|
|
llvm::PointerType *CurPT=cast<llvm::PointerType>(Callee->getType());
|
|
|
|
llvm::FunctionType *CurFT =
|
2009-06-13 08:26:38 +08:00
|
|
|
cast<llvm::FunctionType>(CurPT->getElementType());
|
2011-07-18 12:24:23 +08:00
|
|
|
llvm::FunctionType *ActualFT = CalleeF->getFunctionType();
|
2009-09-09 23:08:12 +08:00
|
|
|
|
2009-06-13 08:26:38 +08:00
|
|
|
if (CE->getOpcode() == llvm::Instruction::BitCast &&
|
|
|
|
ActualFT->getReturnType() == CurFT->getReturnType() &&
|
2009-06-23 09:38:41 +08:00
|
|
|
ActualFT->getNumParams() == CurFT->getNumParams() &&
|
2011-03-02 01:28:13 +08:00
|
|
|
ActualFT->getNumParams() == Args.size() &&
|
|
|
|
(CurFT->isVarArg() || !ActualFT->isVarArg())) {
|
2009-06-13 08:26:38 +08:00
|
|
|
bool ArgsMatch = true;
|
|
|
|
for (unsigned i = 0, e = ActualFT->getNumParams(); i != e; ++i)
|
|
|
|
if (ActualFT->getParamType(i) != CurFT->getParamType(i)) {
|
|
|
|
ArgsMatch = false;
|
|
|
|
break;
|
|
|
|
}
|
2009-09-09 23:08:12 +08:00
|
|
|
|
2009-06-13 08:26:38 +08:00
|
|
|
// Strip the cast if we can get away with it. This is a nice cleanup,
|
|
|
|
// but also allows us to inline the function at -O0 if it is marked
|
|
|
|
// always_inline.
|
|
|
|
if (ArgsMatch)
|
|
|
|
Callee = CalleeF;
|
|
|
|
}
|
|
|
|
}
|
2009-09-09 23:08:12 +08:00
|
|
|
|
2009-09-12 08:59:20 +08:00
|
|
|
unsigned CallingConv;
|
2008-09-26 05:02:23 +08:00
|
|
|
CodeGen::AttributeListType AttributeList;
|
2009-09-12 08:59:20 +08:00
|
|
|
CGM.ConstructAttributeList(CallInfo, TargetDecl, AttributeList, CallingConv);
|
2009-02-24 01:26:39 +08:00
|
|
|
llvm::AttrListPtr Attrs = llvm::AttrListPtr::get(AttributeList.begin(),
|
|
|
|
AttributeList.end());
|
2009-09-09 23:08:12 +08:00
|
|
|
|
2010-07-06 09:34:17 +08:00
|
|
|
llvm::BasicBlock *InvokeDest = 0;
|
|
|
|
if (!(Attrs.getFnAttributes() & llvm::Attribute::NoUnwind))
|
|
|
|
InvokeDest = getInvokeDest();
|
|
|
|
|
2009-03-02 12:32:35 +08:00
|
|
|
llvm::CallSite CS;
|
2010-07-06 09:34:17 +08:00
|
|
|
if (!InvokeDest) {
|
2011-07-15 16:37:34 +08:00
|
|
|
CS = Builder.CreateCall(Callee, Args);
|
2009-02-24 01:26:39 +08:00
|
|
|
} else {
|
|
|
|
llvm::BasicBlock *Cont = createBasicBlock("invoke.cont");
|
2011-07-15 16:37:34 +08:00
|
|
|
CS = Builder.CreateInvoke(Callee, Cont, InvokeDest, Args);
|
2009-02-24 01:26:39 +08:00
|
|
|
EmitBlock(Cont);
|
2009-02-21 02:54:31 +08:00
|
|
|
}
|
2010-06-30 00:40:28 +08:00
|
|
|
if (callOrInvoke)
|
2010-05-02 21:41:58 +08:00
|
|
|
*callOrInvoke = CS.getInstruction();
|
2009-02-21 02:54:31 +08:00
|
|
|
|
2009-03-02 12:32:35 +08:00
|
|
|
CS.setAttributes(Attrs);
|
2009-09-12 08:59:20 +08:00
|
|
|
CS.setCallingConv(static_cast<llvm::CallingConv::ID>(CallingConv));
|
2009-03-02 12:32:35 +08:00
|
|
|
|
2012-02-16 08:57:37 +08:00
|
|
|
// In ObjC ARC mode with no ObjC ARC exception safety, tell the ARC
|
|
|
|
// optimizer it can aggressively ignore unwind edges.
|
|
|
|
if (CGM.getLangOptions().ObjCAutoRefCount)
|
|
|
|
AddObjCARCExceptionMetadata(CS.getInstruction());
|
|
|
|
|
2009-03-02 12:32:35 +08:00
|
|
|
// If the call doesn't return, finish the basic block and clear the
|
|
|
|
// insertion point; this allows the rest of IRgen to discard
|
|
|
|
// unreachable code.
|
|
|
|
if (CS.doesNotReturn()) {
|
|
|
|
Builder.CreateUnreachable();
|
|
|
|
Builder.ClearInsertionPoint();
|
2009-09-09 23:08:12 +08:00
|
|
|
|
2009-05-16 15:57:57 +08:00
|
|
|
// FIXME: For now, emit a dummy basic block because expr emitters in
|
|
|
|
// generally are not ready to handle emitting expressions at unreachable
|
|
|
|
// points.
|
2009-03-02 12:32:35 +08:00
|
|
|
EnsureInsertPoint();
|
2009-09-09 23:08:12 +08:00
|
|
|
|
2009-03-02 12:32:35 +08:00
|
|
|
// Return a reasonable RValue.
|
|
|
|
return GetUndefRValue(RetTy);
|
2009-09-09 23:08:12 +08:00
|
|
|
}
|
2009-03-02 12:32:35 +08:00
|
|
|
|
|
|
|
llvm::Instruction *CI = CS.getInstruction();
|
2009-10-05 21:47:21 +08:00
|
|
|
if (Builder.isNamePreserving() && !CI->getType()->isVoidTy())
|
2008-09-10 07:27:19 +08:00
|
|
|
CI->setName("call");
|
2008-09-10 10:41:04 +08:00
|
|
|
|
2011-06-16 07:02:42 +08:00
|
|
|
// Emit any writebacks immediately. Arguably this should happen
|
|
|
|
// after any return-value munging.
|
|
|
|
if (CallArgs.hasWritebacks())
|
|
|
|
emitWritebacks(*this, CallArgs);
|
|
|
|
|
2008-09-10 10:41:04 +08:00
|
|
|
switch (RetAI.getKind()) {
|
2010-08-21 10:24:36 +08:00
|
|
|
case ABIArgInfo::Indirect: {
|
|
|
|
unsigned Alignment = getContext().getTypeAlignInChars(RetTy).getQuantity();
|
2008-09-10 10:41:04 +08:00
|
|
|
if (RetTy->isAnyComplexType())
|
2008-09-17 08:51:38 +08:00
|
|
|
return RValue::getComplex(LoadComplexFromAddr(Args[0], false));
|
2009-03-22 08:32:22 +08:00
|
|
|
if (CodeGenFunction::hasAggregateLLVMType(RetTy))
|
2008-09-17 08:51:38 +08:00
|
|
|
return RValue::getAggregate(Args[0]);
|
2010-08-21 10:24:36 +08:00
|
|
|
return RValue::get(EmitLoadOfScalar(Args[0], false, Alignment, RetTy));
|
|
|
|
}
|
2008-09-11 09:48:57 +08:00
|
|
|
|
2009-01-27 05:26:08 +08:00
|
|
|
case ABIArgInfo::Ignore:
|
2009-02-03 14:30:17 +08:00
|
|
|
// If we are ignoring an argument that had a result, make sure to
|
|
|
|
// construct the appropriate return value for our caller.
|
2009-02-05 15:09:07 +08:00
|
|
|
return GetUndefRValue(RetTy);
|
2010-10-19 14:39:39 +08:00
|
|
|
|
Kill off the 'coerce' ABI passing form. Now 'direct' and 'extend' always
have a "coerce to" type which often matches the default lowering of Clang
type to LLVM IR type, but the coerce case can be handled by making them
not be the same.
This simplifies things and fixes issues where X86-64 abi lowering would
return coerce after making preferred types exactly match up. This caused
us to compile:
typedef float v4f32 __attribute__((__vector_size__(16)));
v4f32 foo(v4f32 X) {
return X+X;
}
into this code at -O0:
define <4 x float> @foo(<4 x float> %X.coerce) nounwind {
entry:
%retval = alloca <4 x float>, align 16 ; <<4 x float>*> [#uses=2]
%coerce = alloca <4 x float>, align 16 ; <<4 x float>*> [#uses=2]
%X.addr = alloca <4 x float>, align 16 ; <<4 x float>*> [#uses=3]
store <4 x float> %X.coerce, <4 x float>* %coerce
%X = load <4 x float>* %coerce ; <<4 x float>> [#uses=1]
store <4 x float> %X, <4 x float>* %X.addr
%tmp = load <4 x float>* %X.addr ; <<4 x float>> [#uses=1]
%tmp1 = load <4 x float>* %X.addr ; <<4 x float>> [#uses=1]
%add = fadd <4 x float> %tmp, %tmp1 ; <<4 x float>> [#uses=1]
store <4 x float> %add, <4 x float>* %retval
%0 = load <4 x float>* %retval ; <<4 x float>> [#uses=1]
ret <4 x float> %0
}
Now we get:
define <4 x float> @foo(<4 x float> %X) nounwind {
entry:
%X.addr = alloca <4 x float>, align 16 ; <<4 x float>*> [#uses=3]
store <4 x float> %X, <4 x float>* %X.addr
%tmp = load <4 x float>* %X.addr ; <<4 x float>> [#uses=1]
%tmp1 = load <4 x float>* %X.addr ; <<4 x float>> [#uses=1]
%add = fadd <4 x float> %tmp, %tmp1 ; <<4 x float>> [#uses=1]
ret <4 x float> %add
}
This implements rdar://8248065
llvm-svn: 109733
2010-07-29 14:26:06 +08:00
|
|
|
case ABIArgInfo::Extend:
|
|
|
|
case ABIArgInfo::Direct: {
|
2011-07-13 11:59:32 +08:00
|
|
|
llvm::Type *RetIRTy = ConvertType(RetTy);
|
|
|
|
if (RetAI.getCoerceToType() == RetIRTy && RetAI.getDirectOffset() == 0) {
|
Kill off the 'coerce' ABI passing form. Now 'direct' and 'extend' always
have a "coerce to" type which often matches the default lowering of Clang
type to LLVM IR type, but the coerce case can be handled by making them
not be the same.
This simplifies things and fixes issues where X86-64 abi lowering would
return coerce after making preferred types exactly match up. This caused
us to compile:
typedef float v4f32 __attribute__((__vector_size__(16)));
v4f32 foo(v4f32 X) {
return X+X;
}
into this code at -O0:
define <4 x float> @foo(<4 x float> %X.coerce) nounwind {
entry:
%retval = alloca <4 x float>, align 16 ; <<4 x float>*> [#uses=2]
%coerce = alloca <4 x float>, align 16 ; <<4 x float>*> [#uses=2]
%X.addr = alloca <4 x float>, align 16 ; <<4 x float>*> [#uses=3]
store <4 x float> %X.coerce, <4 x float>* %coerce
%X = load <4 x float>* %coerce ; <<4 x float>> [#uses=1]
store <4 x float> %X, <4 x float>* %X.addr
%tmp = load <4 x float>* %X.addr ; <<4 x float>> [#uses=1]
%tmp1 = load <4 x float>* %X.addr ; <<4 x float>> [#uses=1]
%add = fadd <4 x float> %tmp, %tmp1 ; <<4 x float>> [#uses=1]
store <4 x float> %add, <4 x float>* %retval
%0 = load <4 x float>* %retval ; <<4 x float>> [#uses=1]
ret <4 x float> %0
}
Now we get:
define <4 x float> @foo(<4 x float> %X) nounwind {
entry:
%X.addr = alloca <4 x float>, align 16 ; <<4 x float>*> [#uses=3]
store <4 x float> %X, <4 x float>* %X.addr
%tmp = load <4 x float>* %X.addr ; <<4 x float>> [#uses=1]
%tmp1 = load <4 x float>* %X.addr ; <<4 x float>> [#uses=1]
%add = fadd <4 x float> %tmp, %tmp1 ; <<4 x float>> [#uses=1]
ret <4 x float> %add
}
This implements rdar://8248065
llvm-svn: 109733
2010-07-29 14:26:06 +08:00
|
|
|
if (RetTy->isAnyComplexType()) {
|
|
|
|
llvm::Value *Real = Builder.CreateExtractValue(CI, 0);
|
|
|
|
llvm::Value *Imag = Builder.CreateExtractValue(CI, 1);
|
|
|
|
return RValue::getComplex(std::make_pair(Real, Imag));
|
|
|
|
}
|
|
|
|
if (CodeGenFunction::hasAggregateLLVMType(RetTy)) {
|
|
|
|
llvm::Value *DestPtr = ReturnValue.getValue();
|
|
|
|
bool DestIsVolatile = ReturnValue.isVolatile();
|
2009-01-27 05:26:08 +08:00
|
|
|
|
Kill off the 'coerce' ABI passing form. Now 'direct' and 'extend' always
have a "coerce to" type which often matches the default lowering of Clang
type to LLVM IR type, but the coerce case can be handled by making them
not be the same.
This simplifies things and fixes issues where X86-64 abi lowering would
return coerce after making preferred types exactly match up. This caused
us to compile:
typedef float v4f32 __attribute__((__vector_size__(16)));
v4f32 foo(v4f32 X) {
return X+X;
}
into this code at -O0:
define <4 x float> @foo(<4 x float> %X.coerce) nounwind {
entry:
%retval = alloca <4 x float>, align 16 ; <<4 x float>*> [#uses=2]
%coerce = alloca <4 x float>, align 16 ; <<4 x float>*> [#uses=2]
%X.addr = alloca <4 x float>, align 16 ; <<4 x float>*> [#uses=3]
store <4 x float> %X.coerce, <4 x float>* %coerce
%X = load <4 x float>* %coerce ; <<4 x float>> [#uses=1]
store <4 x float> %X, <4 x float>* %X.addr
%tmp = load <4 x float>* %X.addr ; <<4 x float>> [#uses=1]
%tmp1 = load <4 x float>* %X.addr ; <<4 x float>> [#uses=1]
%add = fadd <4 x float> %tmp, %tmp1 ; <<4 x float>> [#uses=1]
store <4 x float> %add, <4 x float>* %retval
%0 = load <4 x float>* %retval ; <<4 x float>> [#uses=1]
ret <4 x float> %0
}
Now we get:
define <4 x float> @foo(<4 x float> %X) nounwind {
entry:
%X.addr = alloca <4 x float>, align 16 ; <<4 x float>*> [#uses=3]
store <4 x float> %X, <4 x float>* %X.addr
%tmp = load <4 x float>* %X.addr ; <<4 x float>> [#uses=1]
%tmp1 = load <4 x float>* %X.addr ; <<4 x float>> [#uses=1]
%add = fadd <4 x float> %tmp, %tmp1 ; <<4 x float>> [#uses=1]
ret <4 x float> %add
}
This implements rdar://8248065
llvm-svn: 109733
2010-07-29 14:26:06 +08:00
|
|
|
if (!DestPtr) {
|
|
|
|
DestPtr = CreateMemTemp(RetTy, "agg.tmp");
|
|
|
|
DestIsVolatile = false;
|
|
|
|
}
|
2011-05-18 05:08:01 +08:00
|
|
|
BuildAggStore(*this, CI, DestPtr, DestIsVolatile, false);
|
Kill off the 'coerce' ABI passing form. Now 'direct' and 'extend' always
have a "coerce to" type which often matches the default lowering of Clang
type to LLVM IR type, but the coerce case can be handled by making them
not be the same.
This simplifies things and fixes issues where X86-64 abi lowering would
return coerce after making preferred types exactly match up. This caused
us to compile:
typedef float v4f32 __attribute__((__vector_size__(16)));
v4f32 foo(v4f32 X) {
return X+X;
}
into this code at -O0:
define <4 x float> @foo(<4 x float> %X.coerce) nounwind {
entry:
%retval = alloca <4 x float>, align 16 ; <<4 x float>*> [#uses=2]
%coerce = alloca <4 x float>, align 16 ; <<4 x float>*> [#uses=2]
%X.addr = alloca <4 x float>, align 16 ; <<4 x float>*> [#uses=3]
store <4 x float> %X.coerce, <4 x float>* %coerce
%X = load <4 x float>* %coerce ; <<4 x float>> [#uses=1]
store <4 x float> %X, <4 x float>* %X.addr
%tmp = load <4 x float>* %X.addr ; <<4 x float>> [#uses=1]
%tmp1 = load <4 x float>* %X.addr ; <<4 x float>> [#uses=1]
%add = fadd <4 x float> %tmp, %tmp1 ; <<4 x float>> [#uses=1]
store <4 x float> %add, <4 x float>* %retval
%0 = load <4 x float>* %retval ; <<4 x float>> [#uses=1]
ret <4 x float> %0
}
Now we get:
define <4 x float> @foo(<4 x float> %X) nounwind {
entry:
%X.addr = alloca <4 x float>, align 16 ; <<4 x float>*> [#uses=3]
store <4 x float> %X, <4 x float>* %X.addr
%tmp = load <4 x float>* %X.addr ; <<4 x float>> [#uses=1]
%tmp1 = load <4 x float>* %X.addr ; <<4 x float>> [#uses=1]
%add = fadd <4 x float> %tmp, %tmp1 ; <<4 x float>> [#uses=1]
ret <4 x float> %add
}
This implements rdar://8248065
llvm-svn: 109733
2010-07-29 14:26:06 +08:00
|
|
|
return RValue::getAggregate(DestPtr);
|
|
|
|
}
|
2011-07-13 11:59:32 +08:00
|
|
|
|
|
|
|
// If the argument doesn't match, perform a bitcast to coerce it. This
|
|
|
|
// can happen due to trivial type mismatches.
|
|
|
|
llvm::Value *V = CI;
|
|
|
|
if (V->getType() != RetIRTy)
|
|
|
|
V = Builder.CreateBitCast(V, RetIRTy);
|
|
|
|
return RValue::get(V);
|
Kill off the 'coerce' ABI passing form. Now 'direct' and 'extend' always
have a "coerce to" type which often matches the default lowering of Clang
type to LLVM IR type, but the coerce case can be handled by making them
not be the same.
This simplifies things and fixes issues where X86-64 abi lowering would
return coerce after making preferred types exactly match up. This caused
us to compile:
typedef float v4f32 __attribute__((__vector_size__(16)));
v4f32 foo(v4f32 X) {
return X+X;
}
into this code at -O0:
define <4 x float> @foo(<4 x float> %X.coerce) nounwind {
entry:
%retval = alloca <4 x float>, align 16 ; <<4 x float>*> [#uses=2]
%coerce = alloca <4 x float>, align 16 ; <<4 x float>*> [#uses=2]
%X.addr = alloca <4 x float>, align 16 ; <<4 x float>*> [#uses=3]
store <4 x float> %X.coerce, <4 x float>* %coerce
%X = load <4 x float>* %coerce ; <<4 x float>> [#uses=1]
store <4 x float> %X, <4 x float>* %X.addr
%tmp = load <4 x float>* %X.addr ; <<4 x float>> [#uses=1]
%tmp1 = load <4 x float>* %X.addr ; <<4 x float>> [#uses=1]
%add = fadd <4 x float> %tmp, %tmp1 ; <<4 x float>> [#uses=1]
store <4 x float> %add, <4 x float>* %retval
%0 = load <4 x float>* %retval ; <<4 x float>> [#uses=1]
ret <4 x float> %0
}
Now we get:
define <4 x float> @foo(<4 x float> %X) nounwind {
entry:
%X.addr = alloca <4 x float>, align 16 ; <<4 x float>*> [#uses=3]
store <4 x float> %X, <4 x float>* %X.addr
%tmp = load <4 x float>* %X.addr ; <<4 x float>> [#uses=1]
%tmp1 = load <4 x float>* %X.addr ; <<4 x float>> [#uses=1]
%add = fadd <4 x float> %tmp, %tmp1 ; <<4 x float>> [#uses=1]
ret <4 x float> %add
}
This implements rdar://8248065
llvm-svn: 109733
2010-07-29 14:26:06 +08:00
|
|
|
}
|
2010-10-19 14:39:39 +08:00
|
|
|
|
2009-12-25 04:40:36 +08:00
|
|
|
llvm::Value *DestPtr = ReturnValue.getValue();
|
|
|
|
bool DestIsVolatile = ReturnValue.isVolatile();
|
2010-10-19 14:39:39 +08:00
|
|
|
|
2009-12-25 04:40:36 +08:00
|
|
|
if (!DestPtr) {
|
2010-02-09 10:48:28 +08:00
|
|
|
DestPtr = CreateMemTemp(RetTy, "coerce");
|
2009-12-25 04:40:36 +08:00
|
|
|
DestIsVolatile = false;
|
|
|
|
}
|
2010-10-19 14:39:39 +08:00
|
|
|
|
2010-07-30 12:02:24 +08:00
|
|
|
// If the value is offset in memory, apply the offset now.
|
|
|
|
llvm::Value *StorePtr = DestPtr;
|
|
|
|
if (unsigned Offs = RetAI.getDirectOffset()) {
|
|
|
|
StorePtr = Builder.CreateBitCast(StorePtr, Builder.getInt8PtrTy());
|
|
|
|
StorePtr = Builder.CreateConstGEP1_32(StorePtr, Offs);
|
2010-10-19 14:39:39 +08:00
|
|
|
StorePtr = Builder.CreateBitCast(StorePtr,
|
2010-07-30 12:02:24 +08:00
|
|
|
llvm::PointerType::getUnqual(RetAI.getCoerceToType()));
|
|
|
|
}
|
|
|
|
CreateCoercedStore(CI, StorePtr, DestIsVolatile, *this);
|
2010-10-19 14:39:39 +08:00
|
|
|
|
2010-08-21 10:24:36 +08:00
|
|
|
unsigned Alignment = getContext().getTypeAlignInChars(RetTy).getQuantity();
|
2008-11-26 06:21:48 +08:00
|
|
|
if (RetTy->isAnyComplexType())
|
2009-12-25 04:40:36 +08:00
|
|
|
return RValue::getComplex(LoadComplexFromAddr(DestPtr, false));
|
2009-03-22 08:32:22 +08:00
|
|
|
if (CodeGenFunction::hasAggregateLLVMType(RetTy))
|
2009-12-25 04:40:36 +08:00
|
|
|
return RValue::getAggregate(DestPtr);
|
2010-08-21 10:24:36 +08:00
|
|
|
return RValue::get(EmitLoadOfScalar(DestPtr, false, Alignment, RetTy));
|
2008-09-10 15:04:09 +08:00
|
|
|
}
|
2008-09-11 09:48:57 +08:00
|
|
|
|
|
|
|
case ABIArgInfo::Expand:
|
2011-09-23 13:06:16 +08:00
|
|
|
llvm_unreachable("Invalid ABI kind for return argument");
|
2008-09-10 07:27:19 +08:00
|
|
|
}
|
2008-09-10 10:41:04 +08:00
|
|
|
|
2011-09-23 13:06:16 +08:00
|
|
|
llvm_unreachable("Unhandled ABIArgInfo::Kind");
|
2008-09-10 07:27:19 +08:00
|
|
|
}
|
2009-02-11 04:44:09 +08:00
|
|
|
|
|
|
|
/* VarArg handling */
|
|
|
|
|
|
|
|
llvm::Value *CodeGenFunction::EmitVAArg(llvm::Value *VAListAddr, QualType Ty) {
|
|
|
|
return CGM.getTypes().getABIInfo().EmitVAArg(VAListAddr, Ty, *this);
|
|
|
|
}
|