forked from OSchip/llvm-project
clang: Use byref for aggregate kernel arguments
Add address space to indirect abi info and use it for kernels. Previously, indirect arguments assumed assumed a stack passed object in the alloca address space using byval. A stack pointer is unsuitable for kernel arguments, which are passed in a separate, constant buffer with a different address space. Start using the new byref for aggregate kernel arguments. Previously these were emitted as raw struct arguments, and turned into loads in the backend. These will lower identically, although with byref you now have the option of applying an explicit alignment. In the future, a reasonable implementation would use byref for all kernel arguments (this would be a practical problem at the moment due to losing things like noalias on pointer arguments). This is mostly to avoid fighting the optimizer's treatment of aggregate load/store. SROA and instcombine both turn aggregate loads and stores into a long sequence of element loads and stores, rather than the optimizable memcpy I would expect in this situation. Now an explicit memcpy will be introduced up-front which is better understood and helps eliminate the alloca in more situations. This skips using byref in the case where HIP kernel pointer arguments in structs are promoted to global pointers. At minimum an additional patch is needed to allow coercion with indirect arguments. This also skips using it for OpenCL due to the current workaround used to support kernels calling kernels. Distinct function bodies would need to be generated up front instead of emitting an illegal call.
This commit is contained in:
parent
c9bcc237a2
commit
30eeb742f1
|
@ -44,10 +44,23 @@ public:
|
||||||
/// but also emit a zero/sign extension attribute.
|
/// but also emit a zero/sign extension attribute.
|
||||||
Extend,
|
Extend,
|
||||||
|
|
||||||
/// Indirect - Pass the argument indirectly via a hidden pointer
|
/// Indirect - Pass the argument indirectly via a hidden pointer with the
|
||||||
/// with the specified alignment (0 indicates default alignment).
|
/// specified alignment (0 indicates default alignment) and address space.
|
||||||
Indirect,
|
Indirect,
|
||||||
|
|
||||||
|
/// IndirectAliased - Similar to Indirect, but the pointer may be to an
|
||||||
|
/// object that is otherwise referenced. The object is known to not be
|
||||||
|
/// modified through any other references for the duration of the call, and
|
||||||
|
/// the callee must not itself modify the object. Because C allows
|
||||||
|
/// parameter variables to be modified and guarantees that they have unique
|
||||||
|
/// addresses, the callee must defensively copy the object into a local
|
||||||
|
/// variable if it might be modified or its address might be compared.
|
||||||
|
/// Since those are uncommon, in principle this convention allows programs
|
||||||
|
/// to avoid copies in more situations. However, it may introduce *extra*
|
||||||
|
/// copies if the callee fails to prove that a copy is unnecessary and the
|
||||||
|
/// caller naturally produces an unaliased object for the argument.
|
||||||
|
IndirectAliased,
|
||||||
|
|
||||||
/// Ignore - Ignore the argument (treat as void). Useful for void and
|
/// Ignore - Ignore the argument (treat as void). Useful for void and
|
||||||
/// empty structs.
|
/// empty structs.
|
||||||
Ignore,
|
Ignore,
|
||||||
|
@ -86,6 +99,7 @@ private:
|
||||||
unsigned AllocaFieldIndex; // isInAlloca()
|
unsigned AllocaFieldIndex; // isInAlloca()
|
||||||
};
|
};
|
||||||
Kind TheKind;
|
Kind TheKind;
|
||||||
|
unsigned IndirectAddrSpace : 24; // isIndirect()
|
||||||
bool PaddingInReg : 1;
|
bool PaddingInReg : 1;
|
||||||
bool InAllocaSRet : 1; // isInAlloca()
|
bool InAllocaSRet : 1; // isInAlloca()
|
||||||
bool InAllocaIndirect : 1;// isInAlloca()
|
bool InAllocaIndirect : 1;// isInAlloca()
|
||||||
|
@ -97,7 +111,8 @@ private:
|
||||||
bool SignExt : 1; // isExtend()
|
bool SignExt : 1; // isExtend()
|
||||||
|
|
||||||
bool canHavePaddingType() const {
|
bool canHavePaddingType() const {
|
||||||
return isDirect() || isExtend() || isIndirect() || isExpand();
|
return isDirect() || isExtend() || isIndirect() || isIndirectAliased() ||
|
||||||
|
isExpand();
|
||||||
}
|
}
|
||||||
void setPaddingType(llvm::Type *T) {
|
void setPaddingType(llvm::Type *T) {
|
||||||
assert(canHavePaddingType());
|
assert(canHavePaddingType());
|
||||||
|
@ -112,9 +127,10 @@ private:
|
||||||
public:
|
public:
|
||||||
ABIArgInfo(Kind K = Direct)
|
ABIArgInfo(Kind K = Direct)
|
||||||
: TypeData(nullptr), PaddingType(nullptr), DirectOffset(0), TheKind(K),
|
: TypeData(nullptr), PaddingType(nullptr), DirectOffset(0), TheKind(K),
|
||||||
PaddingInReg(false), InAllocaSRet(false), InAllocaIndirect(false),
|
IndirectAddrSpace(0), PaddingInReg(false), InAllocaSRet(false),
|
||||||
IndirectByVal(false), IndirectRealign(false), SRetAfterThis(false),
|
InAllocaIndirect(false), IndirectByVal(false), IndirectRealign(false),
|
||||||
InReg(false), CanBeFlattened(false), SignExt(false) {}
|
SRetAfterThis(false), InReg(false), CanBeFlattened(false),
|
||||||
|
SignExt(false) {}
|
||||||
|
|
||||||
static ABIArgInfo getDirect(llvm::Type *T = nullptr, unsigned Offset = 0,
|
static ABIArgInfo getDirect(llvm::Type *T = nullptr, unsigned Offset = 0,
|
||||||
llvm::Type *Padding = nullptr,
|
llvm::Type *Padding = nullptr,
|
||||||
|
@ -180,6 +196,19 @@ public:
|
||||||
AI.setPaddingType(Padding);
|
AI.setPaddingType(Padding);
|
||||||
return AI;
|
return AI;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/// Pass this in memory using the IR byref attribute.
|
||||||
|
static ABIArgInfo getIndirectAliased(CharUnits Alignment, unsigned AddrSpace,
|
||||||
|
bool Realign = false,
|
||||||
|
llvm::Type *Padding = nullptr) {
|
||||||
|
auto AI = ABIArgInfo(IndirectAliased);
|
||||||
|
AI.setIndirectAlign(Alignment);
|
||||||
|
AI.setIndirectRealign(Realign);
|
||||||
|
AI.setPaddingType(Padding);
|
||||||
|
AI.setIndirectAddrSpace(AddrSpace);
|
||||||
|
return AI;
|
||||||
|
}
|
||||||
|
|
||||||
static ABIArgInfo getIndirectInReg(CharUnits Alignment, bool ByVal = true,
|
static ABIArgInfo getIndirectInReg(CharUnits Alignment, bool ByVal = true,
|
||||||
bool Realign = false) {
|
bool Realign = false) {
|
||||||
auto AI = getIndirect(Alignment, ByVal, Realign);
|
auto AI = getIndirect(Alignment, ByVal, Realign);
|
||||||
|
@ -259,6 +288,7 @@ public:
|
||||||
bool isExtend() const { return TheKind == Extend; }
|
bool isExtend() const { return TheKind == Extend; }
|
||||||
bool isIgnore() const { return TheKind == Ignore; }
|
bool isIgnore() const { return TheKind == Ignore; }
|
||||||
bool isIndirect() const { return TheKind == Indirect; }
|
bool isIndirect() const { return TheKind == Indirect; }
|
||||||
|
bool isIndirectAliased() const { return TheKind == IndirectAliased; }
|
||||||
bool isExpand() const { return TheKind == Expand; }
|
bool isExpand() const { return TheKind == Expand; }
|
||||||
bool isCoerceAndExpand() const { return TheKind == CoerceAndExpand; }
|
bool isCoerceAndExpand() const { return TheKind == CoerceAndExpand; }
|
||||||
|
|
||||||
|
@ -338,11 +368,11 @@ public:
|
||||||
|
|
||||||
// Indirect accessors
|
// Indirect accessors
|
||||||
CharUnits getIndirectAlign() const {
|
CharUnits getIndirectAlign() const {
|
||||||
assert(isIndirect() && "Invalid kind!");
|
assert((isIndirect() || isIndirectAliased()) && "Invalid kind!");
|
||||||
return CharUnits::fromQuantity(IndirectAlign);
|
return CharUnits::fromQuantity(IndirectAlign);
|
||||||
}
|
}
|
||||||
void setIndirectAlign(CharUnits IA) {
|
void setIndirectAlign(CharUnits IA) {
|
||||||
assert(isIndirect() && "Invalid kind!");
|
assert((isIndirect() || isIndirectAliased()) && "Invalid kind!");
|
||||||
IndirectAlign = IA.getQuantity();
|
IndirectAlign = IA.getQuantity();
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -355,12 +385,22 @@ public:
|
||||||
IndirectByVal = IBV;
|
IndirectByVal = IBV;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
unsigned getIndirectAddrSpace() const {
|
||||||
|
assert(isIndirectAliased() && "Invalid kind!");
|
||||||
|
return IndirectAddrSpace;
|
||||||
|
}
|
||||||
|
|
||||||
|
void setIndirectAddrSpace(unsigned AddrSpace) {
|
||||||
|
assert(isIndirectAliased() && "Invalid kind!");
|
||||||
|
IndirectAddrSpace = AddrSpace;
|
||||||
|
}
|
||||||
|
|
||||||
bool getIndirectRealign() const {
|
bool getIndirectRealign() const {
|
||||||
assert(isIndirect() && "Invalid kind!");
|
assert((isIndirect() || isIndirectAliased()) && "Invalid kind!");
|
||||||
return IndirectRealign;
|
return IndirectRealign;
|
||||||
}
|
}
|
||||||
void setIndirectRealign(bool IR) {
|
void setIndirectRealign(bool IR) {
|
||||||
assert(isIndirect() && "Invalid kind!");
|
assert((isIndirect() || isIndirectAliased()) && "Invalid kind!");
|
||||||
IndirectRealign = IR;
|
IndirectRealign = IR;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
|
@ -1470,6 +1470,7 @@ void ClangToLLVMArgMapping::construct(const ASTContext &Context,
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
case ABIArgInfo::Indirect:
|
case ABIArgInfo::Indirect:
|
||||||
|
case ABIArgInfo::IndirectAliased:
|
||||||
IRArgs.NumberOfArgs = 1;
|
IRArgs.NumberOfArgs = 1;
|
||||||
break;
|
break;
|
||||||
case ABIArgInfo::Ignore:
|
case ABIArgInfo::Ignore:
|
||||||
|
@ -1560,6 +1561,7 @@ CodeGenTypes::GetFunctionType(const CGFunctionInfo &FI) {
|
||||||
const ABIArgInfo &retAI = FI.getReturnInfo();
|
const ABIArgInfo &retAI = FI.getReturnInfo();
|
||||||
switch (retAI.getKind()) {
|
switch (retAI.getKind()) {
|
||||||
case ABIArgInfo::Expand:
|
case ABIArgInfo::Expand:
|
||||||
|
case ABIArgInfo::IndirectAliased:
|
||||||
llvm_unreachable("Invalid ABI kind for return argument");
|
llvm_unreachable("Invalid ABI kind for return argument");
|
||||||
|
|
||||||
case ABIArgInfo::Extend:
|
case ABIArgInfo::Extend:
|
||||||
|
@ -1637,7 +1639,12 @@ CodeGenTypes::GetFunctionType(const CGFunctionInfo &FI) {
|
||||||
CGM.getDataLayout().getAllocaAddrSpace());
|
CGM.getDataLayout().getAllocaAddrSpace());
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
|
case ABIArgInfo::IndirectAliased: {
|
||||||
|
assert(NumIRArgs == 1);
|
||||||
|
llvm::Type *LTy = ConvertTypeForMem(it->type);
|
||||||
|
ArgTypes[FirstIRArg] = LTy->getPointerTo(ArgInfo.getIndirectAddrSpace());
|
||||||
|
break;
|
||||||
|
}
|
||||||
case ABIArgInfo::Extend:
|
case ABIArgInfo::Extend:
|
||||||
case ABIArgInfo::Direct: {
|
case ABIArgInfo::Direct: {
|
||||||
// Fast-isel and the optimizer generally like scalar values better than
|
// Fast-isel and the optimizer generally like scalar values better than
|
||||||
|
@ -2101,6 +2108,7 @@ void CodeGenModule::ConstructAttributeList(
|
||||||
break;
|
break;
|
||||||
|
|
||||||
case ABIArgInfo::Expand:
|
case ABIArgInfo::Expand:
|
||||||
|
case ABIArgInfo::IndirectAliased:
|
||||||
llvm_unreachable("Invalid ABI kind for return argument");
|
llvm_unreachable("Invalid ABI kind for return argument");
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -2184,6 +2192,9 @@ void CodeGenModule::ConstructAttributeList(
|
||||||
if (AI.getIndirectByVal())
|
if (AI.getIndirectByVal())
|
||||||
Attrs.addByValAttr(getTypes().ConvertTypeForMem(ParamType));
|
Attrs.addByValAttr(getTypes().ConvertTypeForMem(ParamType));
|
||||||
|
|
||||||
|
// TODO: We could add the byref attribute if not byval, but it would
|
||||||
|
// require updating many testcases.
|
||||||
|
|
||||||
CharUnits Align = AI.getIndirectAlign();
|
CharUnits Align = AI.getIndirectAlign();
|
||||||
|
|
||||||
// In a byval argument, it is important that the required
|
// In a byval argument, it is important that the required
|
||||||
|
@ -2206,6 +2217,13 @@ void CodeGenModule::ConstructAttributeList(
|
||||||
// byval disables readnone and readonly.
|
// byval disables readnone and readonly.
|
||||||
FuncAttrs.removeAttribute(llvm::Attribute::ReadOnly)
|
FuncAttrs.removeAttribute(llvm::Attribute::ReadOnly)
|
||||||
.removeAttribute(llvm::Attribute::ReadNone);
|
.removeAttribute(llvm::Attribute::ReadNone);
|
||||||
|
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
case ABIArgInfo::IndirectAliased: {
|
||||||
|
CharUnits Align = AI.getIndirectAlign();
|
||||||
|
Attrs.addByRefAttr(getTypes().ConvertTypeForMem(ParamType));
|
||||||
|
Attrs.addAlignmentAttr(Align.getQuantity());
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
case ABIArgInfo::Ignore:
|
case ABIArgInfo::Ignore:
|
||||||
|
@ -2434,16 +2452,19 @@ void CodeGenFunction::EmitFunctionProlog(const CGFunctionInfo &FI,
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
|
|
||||||
case ABIArgInfo::Indirect: {
|
case ABIArgInfo::Indirect:
|
||||||
|
case ABIArgInfo::IndirectAliased: {
|
||||||
assert(NumIRArgs == 1);
|
assert(NumIRArgs == 1);
|
||||||
Address ParamAddr =
|
Address ParamAddr =
|
||||||
Address(Fn->getArg(FirstIRArg), ArgI.getIndirectAlign());
|
Address(Fn->getArg(FirstIRArg), ArgI.getIndirectAlign());
|
||||||
|
|
||||||
if (!hasScalarEvaluationKind(Ty)) {
|
if (!hasScalarEvaluationKind(Ty)) {
|
||||||
// Aggregates and complex variables are accessed by reference. All we
|
// Aggregates and complex variables are accessed by reference. All we
|
||||||
// need to do is realign the value, if requested.
|
// need to do is realign the value, if requested. Also, if the address
|
||||||
|
// may be aliased, copy it to ensure that the parameter variable is
|
||||||
|
// mutable and has a unique adress, as C requires.
|
||||||
Address V = ParamAddr;
|
Address V = ParamAddr;
|
||||||
if (ArgI.getIndirectRealign()) {
|
if (ArgI.getIndirectRealign() || ArgI.isIndirectAliased()) {
|
||||||
Address AlignedTemp = CreateMemTemp(Ty, "coerce");
|
Address AlignedTemp = CreateMemTemp(Ty, "coerce");
|
||||||
|
|
||||||
// Copy from the incoming argument pointer to the temporary with the
|
// Copy from the incoming argument pointer to the temporary with the
|
||||||
|
@ -3285,8 +3306,8 @@ void CodeGenFunction::EmitFunctionEpilog(const CGFunctionInfo &FI,
|
||||||
}
|
}
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
|
|
||||||
case ABIArgInfo::Expand:
|
case ABIArgInfo::Expand:
|
||||||
|
case ABIArgInfo::IndirectAliased:
|
||||||
llvm_unreachable("Invalid ABI kind for return argument");
|
llvm_unreachable("Invalid ABI kind for return argument");
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -4413,7 +4434,8 @@ RValue CodeGenFunction::EmitCall(const CGFunctionInfo &CallInfo,
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
|
|
||||||
case ABIArgInfo::Indirect: {
|
case ABIArgInfo::Indirect:
|
||||||
|
case ABIArgInfo::IndirectAliased: {
|
||||||
assert(NumIRArgs == 1);
|
assert(NumIRArgs == 1);
|
||||||
if (!I->isAggregate()) {
|
if (!I->isAggregate()) {
|
||||||
// Make a temporary alloca to pass the argument.
|
// Make a temporary alloca to pass the argument.
|
||||||
|
@ -4668,12 +4690,13 @@ RValue CodeGenFunction::EmitCall(const CGFunctionInfo &CallInfo,
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
|
|
||||||
case ABIArgInfo::Expand:
|
case ABIArgInfo::Expand: {
|
||||||
unsigned IRArgPos = FirstIRArg;
|
unsigned IRArgPos = FirstIRArg;
|
||||||
ExpandTypeToArgs(I->Ty, *I, IRFuncTy, IRCallArgs, IRArgPos);
|
ExpandTypeToArgs(I->Ty, *I, IRFuncTy, IRCallArgs, IRArgPos);
|
||||||
assert(IRArgPos == FirstIRArg + NumIRArgs);
|
assert(IRArgPos == FirstIRArg + NumIRArgs);
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
const CGCallee &ConcreteCallee = Callee.prepareConcreteCallee(*this);
|
const CGCallee &ConcreteCallee = Callee.prepareConcreteCallee(*this);
|
||||||
|
@ -5084,6 +5107,7 @@ RValue CodeGenFunction::EmitCall(const CGFunctionInfo &CallInfo,
|
||||||
}
|
}
|
||||||
|
|
||||||
case ABIArgInfo::Expand:
|
case ABIArgInfo::Expand:
|
||||||
|
case ABIArgInfo::IndirectAliased:
|
||||||
llvm_unreachable("Invalid ABI kind for return argument");
|
llvm_unreachable("Invalid ABI kind for return argument");
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
|
@ -257,6 +257,11 @@ LLVM_DUMP_METHOD void ABIArgInfo::dump() const {
|
||||||
<< " ByVal=" << getIndirectByVal()
|
<< " ByVal=" << getIndirectByVal()
|
||||||
<< " Realign=" << getIndirectRealign();
|
<< " Realign=" << getIndirectRealign();
|
||||||
break;
|
break;
|
||||||
|
case IndirectAliased:
|
||||||
|
OS << "Indirect Align=" << getIndirectAlign().getQuantity()
|
||||||
|
<< " AadrSpace=" << getIndirectAddrSpace()
|
||||||
|
<< " Realign=" << getIndirectRealign();
|
||||||
|
break;
|
||||||
case Expand:
|
case Expand:
|
||||||
OS << "Expand";
|
OS << "Expand";
|
||||||
break;
|
break;
|
||||||
|
@ -1989,6 +1994,7 @@ static bool isArgInAlloca(const ABIArgInfo &Info) {
|
||||||
case ABIArgInfo::InAlloca:
|
case ABIArgInfo::InAlloca:
|
||||||
return true;
|
return true;
|
||||||
case ABIArgInfo::Ignore:
|
case ABIArgInfo::Ignore:
|
||||||
|
case ABIArgInfo::IndirectAliased:
|
||||||
return false;
|
return false;
|
||||||
case ABIArgInfo::Indirect:
|
case ABIArgInfo::Indirect:
|
||||||
case ABIArgInfo::Direct:
|
case ABIArgInfo::Direct:
|
||||||
|
@ -8790,18 +8796,31 @@ ABIArgInfo AMDGPUABIInfo::classifyKernelArgumentType(QualType Ty) const {
|
||||||
|
|
||||||
// TODO: Can we omit empty structs?
|
// TODO: Can we omit empty structs?
|
||||||
|
|
||||||
llvm::Type *LTy = nullptr;
|
|
||||||
if (const Type *SeltTy = isSingleElementStruct(Ty, getContext()))
|
if (const Type *SeltTy = isSingleElementStruct(Ty, getContext()))
|
||||||
LTy = CGT.ConvertType(QualType(SeltTy, 0));
|
Ty = QualType(SeltTy, 0);
|
||||||
|
|
||||||
|
llvm::Type *OrigLTy = CGT.ConvertType(Ty);
|
||||||
|
llvm::Type *LTy = OrigLTy;
|
||||||
if (getContext().getLangOpts().HIP) {
|
if (getContext().getLangOpts().HIP) {
|
||||||
if (!LTy)
|
|
||||||
LTy = CGT.ConvertType(Ty);
|
|
||||||
LTy = coerceKernelArgumentType(
|
LTy = coerceKernelArgumentType(
|
||||||
LTy, /*FromAS=*/getContext().getTargetAddressSpace(LangAS::Default),
|
OrigLTy, /*FromAS=*/getContext().getTargetAddressSpace(LangAS::Default),
|
||||||
/*ToAS=*/getContext().getTargetAddressSpace(LangAS::cuda_device));
|
/*ToAS=*/getContext().getTargetAddressSpace(LangAS::cuda_device));
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// FIXME: Should also use this for OpenCL, but it requires addressing the
|
||||||
|
// problem of kernels being called.
|
||||||
|
//
|
||||||
|
// FIXME: This doesn't apply the optimization of coercing pointers in structs
|
||||||
|
// to global address space when using byref. This would require implementing a
|
||||||
|
// new kind of coercion of the in-memory type when for indirect arguments.
|
||||||
|
if (!getContext().getLangOpts().OpenCL && LTy == OrigLTy &&
|
||||||
|
isAggregateTypeForABI(Ty)) {
|
||||||
|
return ABIArgInfo::getIndirectAliased(
|
||||||
|
getContext().getTypeAlignInChars(Ty),
|
||||||
|
getContext().getTargetAddressSpace(LangAS::opencl_constant),
|
||||||
|
false /*Realign*/, nullptr /*Padding*/);
|
||||||
|
}
|
||||||
|
|
||||||
// If we set CanBeFlattened to true, CodeGen will expand the struct to its
|
// If we set CanBeFlattened to true, CodeGen will expand the struct to its
|
||||||
// individual elements, which confuses the Clover OpenCL backend; therefore we
|
// individual elements, which confuses the Clover OpenCL backend; therefore we
|
||||||
// have to set it to false here. Other args of getDirect() are just defaults.
|
// have to set it to false here. Other args of getDirect() are just defaults.
|
||||||
|
@ -9377,6 +9396,7 @@ Address SparcV9ABIInfo::EmitVAArg(CodeGenFunction &CGF, Address VAListAddr,
|
||||||
}
|
}
|
||||||
|
|
||||||
case ABIArgInfo::Indirect:
|
case ABIArgInfo::Indirect:
|
||||||
|
case ABIArgInfo::IndirectAliased:
|
||||||
Stride = SlotSize;
|
Stride = SlotSize;
|
||||||
ArgAddr = Builder.CreateElementBitCast(Addr, ArgPtrTy, "indirect");
|
ArgAddr = Builder.CreateElementBitCast(Addr, ArgPtrTy, "indirect");
|
||||||
ArgAddr = Address(Builder.CreateLoad(ArgAddr, "indirect.arg"),
|
ArgAddr = Address(Builder.CreateLoad(ArgAddr, "indirect.arg"),
|
||||||
|
@ -9742,6 +9762,7 @@ Address XCoreABIInfo::EmitVAArg(CodeGenFunction &CGF, Address VAListAddr,
|
||||||
ArgSize = ArgSize.alignTo(SlotSize);
|
ArgSize = ArgSize.alignTo(SlotSize);
|
||||||
break;
|
break;
|
||||||
case ABIArgInfo::Indirect:
|
case ABIArgInfo::Indirect:
|
||||||
|
case ABIArgInfo::IndirectAliased:
|
||||||
Val = Builder.CreateElementBitCast(AP, ArgPtrTy);
|
Val = Builder.CreateElementBitCast(AP, ArgPtrTy);
|
||||||
Val = Address(Builder.CreateLoad(Val), TypeAlign);
|
Val = Address(Builder.CreateLoad(Val), TypeAlign);
|
||||||
ArgSize = SlotSize;
|
ArgSize = SlotSize;
|
||||||
|
|
|
@ -8,14 +8,14 @@ struct A {
|
||||||
int a[32];
|
int a[32];
|
||||||
};
|
};
|
||||||
|
|
||||||
// AMDGCN: define amdgpu_kernel void @_Z6kernel1A(%struct.A %x.coerce)
|
// AMDGCN: define amdgpu_kernel void @_Z6kernel1A(%struct.A addrspace(4)* byref(%struct.A) align 4 %{{.+}})
|
||||||
// NVPTX: define void @_Z6kernel1A(%struct.A* byval(%struct.A) align 4 %x)
|
// NVPTX: define void @_Z6kernel1A(%struct.A* byval(%struct.A) align 4 %x)
|
||||||
__global__ void kernel(A x) {
|
__global__ void kernel(A x) {
|
||||||
}
|
}
|
||||||
|
|
||||||
class Kernel {
|
class Kernel {
|
||||||
public:
|
public:
|
||||||
// AMDGCN: define amdgpu_kernel void @_ZN6Kernel12memberKernelE1A(%struct.A %x.coerce)
|
// AMDGCN: define amdgpu_kernel void @_ZN6Kernel12memberKernelE1A(%struct.A addrspace(4)* byref(%struct.A) align 4 %{{.+}})
|
||||||
// NVPTX: define void @_ZN6Kernel12memberKernelE1A(%struct.A* byval(%struct.A) align 4 %x)
|
// NVPTX: define void @_ZN6Kernel12memberKernelE1A(%struct.A* byval(%struct.A) align 4 %x)
|
||||||
static __global__ void memberKernel(A x){}
|
static __global__ void memberKernel(A x){}
|
||||||
template<typename T> static __global__ void templateMemberKernel(T x) {}
|
template<typename T> static __global__ void templateMemberKernel(T x) {}
|
||||||
|
@ -29,11 +29,11 @@ void launch(void*);
|
||||||
|
|
||||||
void test() {
|
void test() {
|
||||||
Kernel K;
|
Kernel K;
|
||||||
// AMDGCN: define amdgpu_kernel void @_Z14templateKernelI1AEvT_(%struct.A %x.coerce)
|
// AMDGCN: define amdgpu_kernel void @_Z14templateKernelI1AEvT_(%struct.A addrspace(4)* byref(%struct.A) align 4 %{{.+}}
|
||||||
// NVPTX: define void @_Z14templateKernelI1AEvT_(%struct.A* byval(%struct.A) align 4 %x)
|
// NVPTX: define void @_Z14templateKernelI1AEvT_(%struct.A* byval(%struct.A) align 4 %x)
|
||||||
launch((void*)templateKernel<A>);
|
launch((void*)templateKernel<A>);
|
||||||
|
|
||||||
// AMDGCN: define amdgpu_kernel void @_ZN6Kernel20templateMemberKernelI1AEEvT_(%struct.A %x.coerce)
|
// AMDGCN: define amdgpu_kernel void @_ZN6Kernel20templateMemberKernelI1AEEvT_(%struct.A addrspace(4)* byref(%struct.A) align 4 %{{.+}}
|
||||||
// NVPTX: define void @_ZN6Kernel20templateMemberKernelI1AEEvT_(%struct.A* byval(%struct.A) align 4 %x)
|
// NVPTX: define void @_ZN6Kernel20templateMemberKernelI1AEEvT_(%struct.A* byval(%struct.A) align 4 %x)
|
||||||
launch((void*)Kernel::templateMemberKernel<A>);
|
launch((void*)Kernel::templateMemberKernel<A>);
|
||||||
}
|
}
|
||||||
|
|
|
@ -67,7 +67,6 @@ typedef struct struct_of_structs_arg
|
||||||
int i2;
|
int i2;
|
||||||
} struct_of_structs_arg_t;
|
} struct_of_structs_arg_t;
|
||||||
|
|
||||||
// CHECK: %union.transparent_u = type { i32 }
|
|
||||||
typedef union
|
typedef union
|
||||||
{
|
{
|
||||||
int b1;
|
int b1;
|
||||||
|
@ -237,7 +236,7 @@ __kernel void kernel_test_struct_of_arrays_arg(struct_of_arrays_arg_t arg1) { }
|
||||||
// CHECK: void @kernel_struct_of_structs_arg(%struct.struct_of_structs_arg %arg1.coerce)
|
// CHECK: void @kernel_struct_of_structs_arg(%struct.struct_of_structs_arg %arg1.coerce)
|
||||||
__kernel void kernel_struct_of_structs_arg(struct_of_structs_arg_t arg1) { }
|
__kernel void kernel_struct_of_structs_arg(struct_of_structs_arg_t arg1) { }
|
||||||
|
|
||||||
// CHECK: void @test_kernel_transparent_union_arg(%union.transparent_u %u.coerce)
|
// CHECK: void @test_kernel_transparent_union_arg(i32 %u.coerce)
|
||||||
__kernel void test_kernel_transparent_union_arg(transparent_u u) { }
|
__kernel void test_kernel_transparent_union_arg(transparent_u u) { }
|
||||||
|
|
||||||
// CHECK: void @kernel_single_array_element_struct_arg(%struct.single_array_element_struct_arg %arg1.coerce)
|
// CHECK: void @kernel_single_array_element_struct_arg(%struct.single_array_element_struct_arg %arg1.coerce)
|
||||||
|
|
Loading…
Reference in New Issue