forked from OSchip/llvm-project
Fix indirect byval passing of records in address spaced memory. Allocate memory on stack, and memcpy the actual value before the call.
llvm-svn: 176786
This commit is contained in:
parent
f0803370cb
commit
3832bfd557
|
@ -2199,17 +2199,23 @@ RValue CodeGenFunction::EmitCall(const CGFunctionInfo &CallInfo,
|
|||
checkArgMatches(AI, IRArgNo, IRFuncTy);
|
||||
} else {
|
||||
// We want to avoid creating an unnecessary temporary+copy here;
|
||||
// however, we need one in two cases:
|
||||
// however, we need one in three cases:
|
||||
// 1. If the argument is not byval, and we are required to copy the
|
||||
// source. (This case doesn't occur on any common architecture.)
|
||||
// 2. If the argument is byval, RV is not sufficiently aligned, and
|
||||
// we cannot force it to be sufficiently aligned.
|
||||
// 3. If the argument is byval, but RV is located in an address space
|
||||
// different than that of the argument (0).
|
||||
llvm::Value *Addr = RV.getAggregateAddr();
|
||||
unsigned Align = ArgInfo.getIndirectAlign();
|
||||
const llvm::DataLayout *TD = &CGM.getDataLayout();
|
||||
const unsigned RVAddrSpace = Addr->getType()->getPointerAddressSpace();
|
||||
const unsigned ArgAddrSpace = (IRArgNo < IRFuncTy->getNumParams() ?
|
||||
IRFuncTy->getParamType(IRArgNo)->getPointerAddressSpace() : 0);
|
||||
if ((!ArgInfo.getIndirectByVal() && I->NeedsCopy) ||
|
||||
(ArgInfo.getIndirectByVal() && TypeAlign.getQuantity() < Align &&
|
||||
llvm::getOrEnforceKnownAlignment(Addr, Align, TD) < Align)) {
|
||||
llvm::getOrEnforceKnownAlignment(Addr, Align, TD) < Align) ||
|
||||
(ArgInfo.getIndirectByVal() && (RVAddrSpace != ArgAddrSpace))) {
|
||||
// Create an aligned temporary, and copy to it.
|
||||
llvm::AllocaInst *AI = CreateMemTemp(I->Ty);
|
||||
if (Align > AI->getAlignment())
|
||||
|
|
|
@ -0,0 +1,23 @@
|
|||
// RUN: %clang_cc1 %s -emit-llvm -o - -O0 -ffake-address-space-map -triple i686-pc-darwin | FileCheck %s
|
||||
|
||||
typedef struct {
|
||||
int cells[9];
|
||||
} Mat3X3;
|
||||
|
||||
typedef struct {
|
||||
int cells[16];
|
||||
} Mat4X4;
|
||||
|
||||
Mat4X4 __attribute__((noinline)) foo(Mat3X3 in) {
|
||||
Mat4X4 out;
|
||||
return out;
|
||||
}
|
||||
|
||||
kernel void ker(global Mat3X3 *in, global Mat4X4 *out) {
|
||||
out[0] = foo(in[1]);
|
||||
}
|
||||
|
||||
// Expect two mem copies: one for the argument "in", and one for
|
||||
// the return value.
|
||||
// CHECK: call void @llvm.memcpy.p0i8.p1i8.i32(i8*
|
||||
// CHECK: call void @llvm.memcpy.p1i8.p0i8.i32(i8 addrspace(1)*
|
Loading…
Reference in New Issue