forked from OSchip/llvm-project
[Alignment][NFC] Migrate AMDGPU backend to Align
This patch is part of a series to introduce an Alignment type. See this thread for context: http://lists.llvm.org/pipermail/llvm-dev/2019-July/133851.html See this patch for the introduction of the type: https://reviews.llvm.org/D64790 Differential Revision: https://reviews.llvm.org/D82743
This commit is contained in:
parent
368a5e3a66
commit
52911428ef
|
@ -315,14 +315,12 @@ void AMDGPUAsmPrinter::emitGlobalVariable(const GlobalVariable *GV) {
|
||||||
|
|
||||||
const DataLayout &DL = GV->getParent()->getDataLayout();
|
const DataLayout &DL = GV->getParent()->getDataLayout();
|
||||||
uint64_t Size = DL.getTypeAllocSize(GV->getValueType());
|
uint64_t Size = DL.getTypeAllocSize(GV->getValueType());
|
||||||
unsigned Align = GV->getAlignment();
|
Align Alignment = GV->getAlign().getValueOr(Align(4));
|
||||||
if (!Align)
|
|
||||||
Align = 4;
|
|
||||||
|
|
||||||
emitVisibility(GVSym, GV->getVisibility(), !GV->isDeclaration());
|
emitVisibility(GVSym, GV->getVisibility(), !GV->isDeclaration());
|
||||||
emitLinkage(GV, GVSym);
|
emitLinkage(GV, GVSym);
|
||||||
if (auto TS = getTargetStreamer())
|
if (auto TS = getTargetStreamer())
|
||||||
TS->emitAMDGPULDS(GVSym, Size, Align);
|
TS->emitAMDGPULDS(GVSym, Size, Alignment);
|
||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
|
@ -317,10 +317,9 @@ bool AMDGPUCodeGenPrepare::canWidenScalarExtLoad(LoadInst &I) const {
|
||||||
Type *Ty = I.getType();
|
Type *Ty = I.getType();
|
||||||
const DataLayout &DL = Mod->getDataLayout();
|
const DataLayout &DL = Mod->getDataLayout();
|
||||||
int TySize = DL.getTypeSizeInBits(Ty);
|
int TySize = DL.getTypeSizeInBits(Ty);
|
||||||
unsigned Align = I.getAlignment() ?
|
Align Alignment = DL.getValueOrABITypeAlignment(I.getAlign(), Ty);
|
||||||
I.getAlignment() : DL.getABITypeAlignment(Ty);
|
|
||||||
|
|
||||||
return I.isSimple() && TySize < 32 && Align >= 4 && DA->isUniform(&I);
|
return I.isSimple() && TySize < 32 && Alignment >= 4 && DA->isUniform(&I);
|
||||||
}
|
}
|
||||||
|
|
||||||
bool AMDGPUCodeGenPrepare::promoteUniformOpToI32(BinaryOperator &I) const {
|
bool AMDGPUCodeGenPrepare::promoteUniformOpToI32(BinaryOperator &I) const {
|
||||||
|
|
|
@ -1010,7 +1010,7 @@ void AMDGPUTargetLowering::analyzeFormalArgumentsCompute(
|
||||||
const unsigned ExplicitOffset = ST.getExplicitKernelArgOffset(Fn);
|
const unsigned ExplicitOffset = ST.getExplicitKernelArgOffset(Fn);
|
||||||
CallingConv::ID CC = Fn.getCallingConv();
|
CallingConv::ID CC = Fn.getCallingConv();
|
||||||
|
|
||||||
unsigned MaxAlign = 1;
|
Align MaxAlign = Align(1);
|
||||||
uint64_t ExplicitArgOffset = 0;
|
uint64_t ExplicitArgOffset = 0;
|
||||||
const DataLayout &DL = Fn.getParent()->getDataLayout();
|
const DataLayout &DL = Fn.getParent()->getDataLayout();
|
||||||
|
|
||||||
|
@ -1018,12 +1018,12 @@ void AMDGPUTargetLowering::analyzeFormalArgumentsCompute(
|
||||||
|
|
||||||
for (const Argument &Arg : Fn.args()) {
|
for (const Argument &Arg : Fn.args()) {
|
||||||
Type *BaseArgTy = Arg.getType();
|
Type *BaseArgTy = Arg.getType();
|
||||||
unsigned Align = DL.getABITypeAlignment(BaseArgTy);
|
Align Alignment = DL.getABITypeAlign(BaseArgTy);
|
||||||
MaxAlign = std::max(Align, MaxAlign);
|
MaxAlign = std::max(Alignment, MaxAlign);
|
||||||
unsigned AllocSize = DL.getTypeAllocSize(BaseArgTy);
|
unsigned AllocSize = DL.getTypeAllocSize(BaseArgTy);
|
||||||
|
|
||||||
uint64_t ArgOffset = alignTo(ExplicitArgOffset, Align) + ExplicitOffset;
|
uint64_t ArgOffset = alignTo(ExplicitArgOffset, Alignment) + ExplicitOffset;
|
||||||
ExplicitArgOffset = alignTo(ExplicitArgOffset, Align) + AllocSize;
|
ExplicitArgOffset = alignTo(ExplicitArgOffset, Alignment) + AllocSize;
|
||||||
|
|
||||||
// We're basically throwing away everything passed into us and starting over
|
// We're basically throwing away everything passed into us and starting over
|
||||||
// to get accurate in-memory offsets. The "PartOffset" is completely useless
|
// to get accurate in-memory offsets. The "PartOffset" is completely useless
|
||||||
|
@ -2931,16 +2931,17 @@ SDValue AMDGPUTargetLowering::performLoadCombine(SDNode *N,
|
||||||
EVT VT = LN->getMemoryVT();
|
EVT VT = LN->getMemoryVT();
|
||||||
|
|
||||||
unsigned Size = VT.getStoreSize();
|
unsigned Size = VT.getStoreSize();
|
||||||
unsigned Align = LN->getAlignment();
|
Align Alignment = LN->getAlign();
|
||||||
if (Align < Size && isTypeLegal(VT)) {
|
if (Alignment < Size && isTypeLegal(VT)) {
|
||||||
bool IsFast;
|
bool IsFast;
|
||||||
unsigned AS = LN->getAddressSpace();
|
unsigned AS = LN->getAddressSpace();
|
||||||
|
|
||||||
// Expand unaligned loads earlier than legalization. Due to visitation order
|
// Expand unaligned loads earlier than legalization. Due to visitation order
|
||||||
// problems during legalization, the emitted instructions to pack and unpack
|
// problems during legalization, the emitted instructions to pack and unpack
|
||||||
// the bytes again are not eliminated in the case of an unaligned copy.
|
// the bytes again are not eliminated in the case of an unaligned copy.
|
||||||
if (!allowsMisalignedMemoryAccesses(
|
if (!allowsMisalignedMemoryAccesses(VT, AS, Alignment.value(),
|
||||||
VT, AS, Align, LN->getMemOperand()->getFlags(), &IsFast)) {
|
LN->getMemOperand()->getFlags(),
|
||||||
|
&IsFast)) {
|
||||||
SDValue Ops[2];
|
SDValue Ops[2];
|
||||||
|
|
||||||
if (VT.isVector())
|
if (VT.isVector())
|
||||||
|
@ -2985,8 +2986,8 @@ SDValue AMDGPUTargetLowering::performStoreCombine(SDNode *N,
|
||||||
|
|
||||||
SDLoc SL(N);
|
SDLoc SL(N);
|
||||||
SelectionDAG &DAG = DCI.DAG;
|
SelectionDAG &DAG = DCI.DAG;
|
||||||
unsigned Align = SN->getAlignment();
|
Align Alignment = SN->getAlign();
|
||||||
if (Align < Size && isTypeLegal(VT)) {
|
if (Alignment < Size && isTypeLegal(VT)) {
|
||||||
bool IsFast;
|
bool IsFast;
|
||||||
unsigned AS = SN->getAddressSpace();
|
unsigned AS = SN->getAddressSpace();
|
||||||
|
|
||||||
|
@ -2994,8 +2995,9 @@ SDValue AMDGPUTargetLowering::performStoreCombine(SDNode *N,
|
||||||
// order problems during legalization, the emitted instructions to pack and
|
// order problems during legalization, the emitted instructions to pack and
|
||||||
// unpack the bytes again are not eliminated in the case of an unaligned
|
// unpack the bytes again are not eliminated in the case of an unaligned
|
||||||
// copy.
|
// copy.
|
||||||
if (!allowsMisalignedMemoryAccesses(
|
if (!allowsMisalignedMemoryAccesses(VT, AS, Alignment.value(),
|
||||||
VT, AS, Align, SN->getMemOperand()->getFlags(), &IsFast)) {
|
SN->getMemOperand()->getFlags(),
|
||||||
|
&IsFast)) {
|
||||||
if (VT.isVector())
|
if (VT.isVector())
|
||||||
return scalarizeVectorStore(SN, DAG);
|
return scalarizeVectorStore(SN, DAG);
|
||||||
|
|
||||||
|
|
|
@ -43,14 +43,13 @@ unsigned AMDGPUMachineFunction::allocateLDSGlobal(const DataLayout &DL,
|
||||||
if (!Entry.second)
|
if (!Entry.second)
|
||||||
return Entry.first->second;
|
return Entry.first->second;
|
||||||
|
|
||||||
unsigned Align = GV.getAlignment();
|
Align Alignment =
|
||||||
if (Align == 0)
|
DL.getValueOrABITypeAlignment(GV.getAlign(), GV.getValueType());
|
||||||
Align = DL.getABITypeAlignment(GV.getValueType());
|
|
||||||
|
|
||||||
/// TODO: We should sort these to minimize wasted space due to alignment
|
/// TODO: We should sort these to minimize wasted space due to alignment
|
||||||
/// padding. Currently the padding is decided by the first encountered use
|
/// padding. Currently the padding is decided by the first encountered use
|
||||||
/// during lowering.
|
/// during lowering.
|
||||||
unsigned Offset = LDSSize = alignTo(LDSSize, Align);
|
unsigned Offset = LDSSize = alignTo(LDSSize, Alignment);
|
||||||
|
|
||||||
Entry.first->second = Offset;
|
Entry.first->second = Offset;
|
||||||
LDSSize += DL.getTypeAllocSize(GV.getValueType());
|
LDSSize += DL.getTypeAllocSize(GV.getValueType());
|
||||||
|
|
|
@ -736,16 +736,15 @@ bool AMDGPUPromoteAlloca::hasSufficientLocalMem(const Function &F) {
|
||||||
continue;
|
continue;
|
||||||
|
|
||||||
if (Use->getParent()->getParent() == &F) {
|
if (Use->getParent()->getParent() == &F) {
|
||||||
unsigned Align = GV.getAlignment();
|
Align Alignment =
|
||||||
if (Align == 0)
|
DL.getValueOrABITypeAlignment(GV.getAlign(), GV.getValueType());
|
||||||
Align = DL.getABITypeAlignment(GV.getValueType());
|
|
||||||
|
|
||||||
// FIXME: Try to account for padding here. The padding is currently
|
// FIXME: Try to account for padding here. The padding is currently
|
||||||
// determined from the inverse order of uses in the function. I'm not
|
// determined from the inverse order of uses in the function. I'm not
|
||||||
// sure if the use list order is in any way connected to this, so the
|
// sure if the use list order is in any way connected to this, so the
|
||||||
// total reported size is likely incorrect.
|
// total reported size is likely incorrect.
|
||||||
uint64_t AllocSize = DL.getTypeAllocSize(GV.getValueType());
|
uint64_t AllocSize = DL.getTypeAllocSize(GV.getValueType());
|
||||||
CurrentLocalMemUsage = alignTo(CurrentLocalMemUsage, Align);
|
CurrentLocalMemUsage = alignTo(CurrentLocalMemUsage, Alignment);
|
||||||
CurrentLocalMemUsage += AllocSize;
|
CurrentLocalMemUsage += AllocSize;
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
|
@ -837,9 +836,8 @@ bool AMDGPUPromoteAlloca::handleAlloca(AllocaInst &I, bool SufficientLDS) {
|
||||||
const AMDGPUSubtarget &ST = AMDGPUSubtarget::get(*TM, ContainingFunction);
|
const AMDGPUSubtarget &ST = AMDGPUSubtarget::get(*TM, ContainingFunction);
|
||||||
unsigned WorkGroupSize = ST.getFlatWorkGroupSizes(ContainingFunction).second;
|
unsigned WorkGroupSize = ST.getFlatWorkGroupSizes(ContainingFunction).second;
|
||||||
|
|
||||||
unsigned Align = I.getAlignment();
|
Align Alignment =
|
||||||
if (Align == 0)
|
DL.getValueOrABITypeAlignment(I.getAlign(), I.getAllocatedType());
|
||||||
Align = DL.getABITypeAlignment(I.getAllocatedType());
|
|
||||||
|
|
||||||
// FIXME: This computed padding is likely wrong since it depends on inverse
|
// FIXME: This computed padding is likely wrong since it depends on inverse
|
||||||
// usage order.
|
// usage order.
|
||||||
|
@ -847,7 +845,7 @@ bool AMDGPUPromoteAlloca::handleAlloca(AllocaInst &I, bool SufficientLDS) {
|
||||||
// FIXME: It is also possible that if we're allowed to use all of the memory
|
// FIXME: It is also possible that if we're allowed to use all of the memory
|
||||||
// could could end up using more than the maximum due to alignment padding.
|
// could could end up using more than the maximum due to alignment padding.
|
||||||
|
|
||||||
uint32_t NewSize = alignTo(CurrentLocalMemUsage, Align);
|
uint32_t NewSize = alignTo(CurrentLocalMemUsage, Alignment);
|
||||||
uint32_t AllocSize = WorkGroupSize * DL.getTypeAllocSize(AllocaTy);
|
uint32_t AllocSize = WorkGroupSize * DL.getTypeAllocSize(AllocaTy);
|
||||||
NewSize += AllocSize;
|
NewSize += AllocSize;
|
||||||
|
|
||||||
|
|
|
@ -1303,18 +1303,16 @@ static Register getSrcRegIgnoringCopies(const MachineRegisterInfo &MRI,
|
||||||
// the three offsets (voffset, soffset and instoffset)
|
// the three offsets (voffset, soffset and instoffset)
|
||||||
static unsigned setBufferOffsets(MachineIRBuilder &B,
|
static unsigned setBufferOffsets(MachineIRBuilder &B,
|
||||||
const AMDGPURegisterBankInfo &RBI,
|
const AMDGPURegisterBankInfo &RBI,
|
||||||
Register CombinedOffset,
|
Register CombinedOffset, Register &VOffsetReg,
|
||||||
Register &VOffsetReg,
|
Register &SOffsetReg, int64_t &InstOffsetVal,
|
||||||
Register &SOffsetReg,
|
Align Alignment) {
|
||||||
int64_t &InstOffsetVal,
|
|
||||||
unsigned Align) {
|
|
||||||
const LLT S32 = LLT::scalar(32);
|
const LLT S32 = LLT::scalar(32);
|
||||||
MachineRegisterInfo *MRI = B.getMRI();
|
MachineRegisterInfo *MRI = B.getMRI();
|
||||||
|
|
||||||
if (Optional<int64_t> Imm = getConstantVRegVal(CombinedOffset, *MRI)) {
|
if (Optional<int64_t> Imm = getConstantVRegVal(CombinedOffset, *MRI)) {
|
||||||
uint32_t SOffset, ImmOffset;
|
uint32_t SOffset, ImmOffset;
|
||||||
if (AMDGPU::splitMUBUFOffset(*Imm, SOffset, ImmOffset,
|
if (AMDGPU::splitMUBUFOffset(*Imm, SOffset, ImmOffset, &RBI.Subtarget,
|
||||||
&RBI.Subtarget, Align)) {
|
Alignment)) {
|
||||||
VOffsetReg = B.buildConstant(S32, 0).getReg(0);
|
VOffsetReg = B.buildConstant(S32, 0).getReg(0);
|
||||||
SOffsetReg = B.buildConstant(S32, SOffset).getReg(0);
|
SOffsetReg = B.buildConstant(S32, SOffset).getReg(0);
|
||||||
InstOffsetVal = ImmOffset;
|
InstOffsetVal = ImmOffset;
|
||||||
|
@ -1334,7 +1332,7 @@ static unsigned setBufferOffsets(MachineIRBuilder &B,
|
||||||
|
|
||||||
uint32_t SOffset, ImmOffset;
|
uint32_t SOffset, ImmOffset;
|
||||||
if (Offset > 0 && AMDGPU::splitMUBUFOffset(Offset, SOffset, ImmOffset,
|
if (Offset > 0 && AMDGPU::splitMUBUFOffset(Offset, SOffset, ImmOffset,
|
||||||
&RBI.Subtarget, Align)) {
|
&RBI.Subtarget, Alignment)) {
|
||||||
if (RBI.getRegBank(Base, *MRI, *RBI.TRI) == &AMDGPU::VGPRRegBank) {
|
if (RBI.getRegBank(Base, *MRI, *RBI.TRI) == &AMDGPU::VGPRRegBank) {
|
||||||
VOffsetReg = Base;
|
VOffsetReg = Base;
|
||||||
SOffsetReg = B.buildConstant(S32, SOffset).getReg(0);
|
SOffsetReg = B.buildConstant(S32, SOffset).getReg(0);
|
||||||
|
@ -1417,7 +1415,7 @@ bool AMDGPURegisterBankInfo::applyMappingSBufferLoad(
|
||||||
|
|
||||||
// Use the alignment to ensure that the required offsets will fit into the
|
// Use the alignment to ensure that the required offsets will fit into the
|
||||||
// immediate offsets.
|
// immediate offsets.
|
||||||
const unsigned Alignment = NumLoads > 1 ? 16 * NumLoads : 1;
|
const Align Alignment = NumLoads > 1 ? Align(16 * NumLoads) : Align(1);
|
||||||
|
|
||||||
MachineIRBuilder B(MI);
|
MachineIRBuilder B(MI);
|
||||||
MachineFunction &MF = B.getMF();
|
MachineFunction &MF = B.getMF();
|
||||||
|
|
|
@ -4460,19 +4460,19 @@ bool AMDGPUAsmParser::ParseDirectiveAMDGPULDS() {
|
||||||
if (Size > LocalMemorySize)
|
if (Size > LocalMemorySize)
|
||||||
return Error(SizeLoc, "size is too large");
|
return Error(SizeLoc, "size is too large");
|
||||||
|
|
||||||
int64_t Align = 4;
|
int64_t Alignment = 4;
|
||||||
if (getLexer().is(AsmToken::Comma)) {
|
if (getLexer().is(AsmToken::Comma)) {
|
||||||
Lex();
|
Lex();
|
||||||
SMLoc AlignLoc = getLexer().getLoc();
|
SMLoc AlignLoc = getLexer().getLoc();
|
||||||
if (getParser().parseAbsoluteExpression(Align))
|
if (getParser().parseAbsoluteExpression(Alignment))
|
||||||
return true;
|
return true;
|
||||||
if (Align < 0 || !isPowerOf2_64(Align))
|
if (Alignment < 0 || !isPowerOf2_64(Alignment))
|
||||||
return Error(AlignLoc, "alignment must be a power of two");
|
return Error(AlignLoc, "alignment must be a power of two");
|
||||||
|
|
||||||
// Alignment larger than the size of LDS is possible in theory, as long
|
// Alignment larger than the size of LDS is possible in theory, as long
|
||||||
// as the linker manages to place to symbol at address 0, but we do want
|
// as the linker manages to place to symbol at address 0, but we do want
|
||||||
// to make sure the alignment fits nicely into a 32-bit integer.
|
// to make sure the alignment fits nicely into a 32-bit integer.
|
||||||
if (Align >= 1u << 31)
|
if (Alignment >= 1u << 31)
|
||||||
return Error(AlignLoc, "alignment is too large");
|
return Error(AlignLoc, "alignment is too large");
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -4484,7 +4484,7 @@ bool AMDGPUAsmParser::ParseDirectiveAMDGPULDS() {
|
||||||
if (!Symbol->isUndefined())
|
if (!Symbol->isUndefined())
|
||||||
return Error(NameLoc, "invalid symbol redefinition");
|
return Error(NameLoc, "invalid symbol redefinition");
|
||||||
|
|
||||||
getTargetStreamer().emitAMDGPULDS(Symbol, Size, Align);
|
getTargetStreamer().emitAMDGPULDS(Symbol, Size, Align(Alignment));
|
||||||
return false;
|
return false;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
|
@ -212,9 +212,9 @@ void AMDGPUTargetAsmStreamer::EmitAMDGPUSymbolType(StringRef SymbolName,
|
||||||
}
|
}
|
||||||
|
|
||||||
void AMDGPUTargetAsmStreamer::emitAMDGPULDS(MCSymbol *Symbol, unsigned Size,
|
void AMDGPUTargetAsmStreamer::emitAMDGPULDS(MCSymbol *Symbol, unsigned Size,
|
||||||
unsigned Align) {
|
Align Alignment) {
|
||||||
OS << "\t.amdgpu_lds " << Symbol->getName() << ", " << Size << ", " << Align
|
OS << "\t.amdgpu_lds " << Symbol->getName() << ", " << Size << ", "
|
||||||
<< '\n';
|
<< Alignment.value() << '\n';
|
||||||
}
|
}
|
||||||
|
|
||||||
bool AMDGPUTargetAsmStreamer::EmitISAVersion(StringRef IsaVersionString) {
|
bool AMDGPUTargetAsmStreamer::EmitISAVersion(StringRef IsaVersionString) {
|
||||||
|
@ -515,9 +515,7 @@ void AMDGPUTargetELFStreamer::EmitAMDGPUSymbolType(StringRef SymbolName,
|
||||||
}
|
}
|
||||||
|
|
||||||
void AMDGPUTargetELFStreamer::emitAMDGPULDS(MCSymbol *Symbol, unsigned Size,
|
void AMDGPUTargetELFStreamer::emitAMDGPULDS(MCSymbol *Symbol, unsigned Size,
|
||||||
unsigned Align) {
|
Align Alignment) {
|
||||||
assert(isPowerOf2_32(Align));
|
|
||||||
|
|
||||||
MCSymbolELF *SymbolELF = cast<MCSymbolELF>(Symbol);
|
MCSymbolELF *SymbolELF = cast<MCSymbolELF>(Symbol);
|
||||||
SymbolELF->setType(ELF::STT_OBJECT);
|
SymbolELF->setType(ELF::STT_OBJECT);
|
||||||
|
|
||||||
|
@ -526,7 +524,7 @@ void AMDGPUTargetELFStreamer::emitAMDGPULDS(MCSymbol *Symbol, unsigned Size,
|
||||||
SymbolELF->setExternal(true);
|
SymbolELF->setExternal(true);
|
||||||
}
|
}
|
||||||
|
|
||||||
if (SymbolELF->declareCommon(Size, Align, true)) {
|
if (SymbolELF->declareCommon(Size, Alignment.value(), true)) {
|
||||||
report_fatal_error("Symbol: " + Symbol->getName() +
|
report_fatal_error("Symbol: " + Symbol->getName() +
|
||||||
" redeclared as different type");
|
" redeclared as different type");
|
||||||
}
|
}
|
||||||
|
|
|
@ -54,7 +54,7 @@ public:
|
||||||
virtual void EmitAMDGPUSymbolType(StringRef SymbolName, unsigned Type) = 0;
|
virtual void EmitAMDGPUSymbolType(StringRef SymbolName, unsigned Type) = 0;
|
||||||
|
|
||||||
virtual void emitAMDGPULDS(MCSymbol *Symbol, unsigned Size,
|
virtual void emitAMDGPULDS(MCSymbol *Symbol, unsigned Size,
|
||||||
unsigned Align) = 0;
|
Align Alignment) = 0;
|
||||||
|
|
||||||
/// \returns True on success, false on failure.
|
/// \returns True on success, false on failure.
|
||||||
virtual bool EmitISAVersion(StringRef IsaVersionString) = 0;
|
virtual bool EmitISAVersion(StringRef IsaVersionString) = 0;
|
||||||
|
@ -110,7 +110,7 @@ public:
|
||||||
|
|
||||||
void EmitAMDGPUSymbolType(StringRef SymbolName, unsigned Type) override;
|
void EmitAMDGPUSymbolType(StringRef SymbolName, unsigned Type) override;
|
||||||
|
|
||||||
void emitAMDGPULDS(MCSymbol *Sym, unsigned Size, unsigned Align) override;
|
void emitAMDGPULDS(MCSymbol *Sym, unsigned Size, Align Alignment) override;
|
||||||
|
|
||||||
/// \returns True on success, false on failure.
|
/// \returns True on success, false on failure.
|
||||||
bool EmitISAVersion(StringRef IsaVersionString) override;
|
bool EmitISAVersion(StringRef IsaVersionString) override;
|
||||||
|
@ -158,7 +158,7 @@ public:
|
||||||
|
|
||||||
void EmitAMDGPUSymbolType(StringRef SymbolName, unsigned Type) override;
|
void EmitAMDGPUSymbolType(StringRef SymbolName, unsigned Type) override;
|
||||||
|
|
||||||
void emitAMDGPULDS(MCSymbol *Sym, unsigned Size, unsigned Align) override;
|
void emitAMDGPULDS(MCSymbol *Sym, unsigned Size, Align Alignment) override;
|
||||||
|
|
||||||
/// \returns True on success, false on failure.
|
/// \returns True on success, false on failure.
|
||||||
bool EmitISAVersion(StringRef IsaVersionString) override;
|
bool EmitISAVersion(StringRef IsaVersionString) override;
|
||||||
|
|
|
@ -1265,10 +1265,11 @@ SDValue R600TargetLowering::LowerSTORE(SDValue Op, SelectionDAG &DAG) const {
|
||||||
return scalarizeVectorStore(StoreNode, DAG);
|
return scalarizeVectorStore(StoreNode, DAG);
|
||||||
}
|
}
|
||||||
|
|
||||||
unsigned Align = StoreNode->getAlignment();
|
Align Alignment = StoreNode->getAlign();
|
||||||
if (Align < MemVT.getStoreSize() &&
|
if (Alignment < MemVT.getStoreSize() &&
|
||||||
!allowsMisalignedMemoryAccesses(
|
!allowsMisalignedMemoryAccesses(MemVT, AS, Alignment.value(),
|
||||||
MemVT, AS, Align, StoreNode->getMemOperand()->getFlags(), nullptr)) {
|
StoreNode->getMemOperand()->getFlags(),
|
||||||
|
nullptr)) {
|
||||||
return expandUnalignedStore(StoreNode, DAG);
|
return expandUnalignedStore(StoreNode, DAG);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
|
@ -1578,16 +1578,15 @@ SDValue SITargetLowering::convertArgType(SelectionDAG &DAG, EVT VT, EVT MemVT,
|
||||||
}
|
}
|
||||||
|
|
||||||
SDValue SITargetLowering::lowerKernargMemParameter(
|
SDValue SITargetLowering::lowerKernargMemParameter(
|
||||||
SelectionDAG &DAG, EVT VT, EVT MemVT,
|
SelectionDAG &DAG, EVT VT, EVT MemVT, const SDLoc &SL, SDValue Chain,
|
||||||
const SDLoc &SL, SDValue Chain,
|
uint64_t Offset, Align Alignment, bool Signed,
|
||||||
uint64_t Offset, unsigned Align, bool Signed,
|
const ISD::InputArg *Arg) const {
|
||||||
const ISD::InputArg *Arg) const {
|
|
||||||
MachinePointerInfo PtrInfo(AMDGPUAS::CONSTANT_ADDRESS);
|
MachinePointerInfo PtrInfo(AMDGPUAS::CONSTANT_ADDRESS);
|
||||||
|
|
||||||
// Try to avoid using an extload by loading earlier than the argument address,
|
// Try to avoid using an extload by loading earlier than the argument address,
|
||||||
// and extracting the relevant bits. The load should hopefully be merged with
|
// and extracting the relevant bits. The load should hopefully be merged with
|
||||||
// the previous argument.
|
// the previous argument.
|
||||||
if (MemVT.getStoreSize() < 4 && Align < 4) {
|
if (MemVT.getStoreSize() < 4 && Alignment < 4) {
|
||||||
// TODO: Handle align < 4 and size >= 4 (can happen with packed structs).
|
// TODO: Handle align < 4 and size >= 4 (can happen with packed structs).
|
||||||
int64_t AlignDownOffset = alignDown(Offset, 4);
|
int64_t AlignDownOffset = alignDown(Offset, 4);
|
||||||
int64_t OffsetDiff = Offset - AlignDownOffset;
|
int64_t OffsetDiff = Offset - AlignDownOffset;
|
||||||
|
@ -1613,9 +1612,9 @@ SDValue SITargetLowering::lowerKernargMemParameter(
|
||||||
}
|
}
|
||||||
|
|
||||||
SDValue Ptr = lowerKernArgParameterPtr(DAG, SL, Chain, Offset);
|
SDValue Ptr = lowerKernArgParameterPtr(DAG, SL, Chain, Offset);
|
||||||
SDValue Load = DAG.getLoad(MemVT, SL, Chain, Ptr, PtrInfo, Align,
|
SDValue Load = DAG.getLoad(MemVT, SL, Chain, Ptr, PtrInfo, Alignment,
|
||||||
MachineMemOperand::MODereferenceable |
|
MachineMemOperand::MODereferenceable |
|
||||||
MachineMemOperand::MOInvariant);
|
MachineMemOperand::MOInvariant);
|
||||||
|
|
||||||
SDValue Val = convertArgType(DAG, VT, MemVT, SL, Load, Signed, Arg);
|
SDValue Val = convertArgType(DAG, VT, MemVT, SL, Load, Signed, Arg);
|
||||||
return DAG.getMergeValues({ Val, Load.getValue(1) }, SL);
|
return DAG.getMergeValues({ Val, Load.getValue(1) }, SL);
|
||||||
|
@ -2233,9 +2232,9 @@ SDValue SITargetLowering::LowerFormalArguments(
|
||||||
//
|
//
|
||||||
// FIXME: Alignment of explicit arguments totally broken with non-0 explicit
|
// FIXME: Alignment of explicit arguments totally broken with non-0 explicit
|
||||||
// kern arg offset.
|
// kern arg offset.
|
||||||
const unsigned KernelArgBaseAlign = 16;
|
const Align KernelArgBaseAlign = Align(16);
|
||||||
|
|
||||||
for (unsigned i = 0, e = Ins.size(), ArgIdx = 0; i != e; ++i) {
|
for (unsigned i = 0, e = Ins.size(), ArgIdx = 0; i != e; ++i) {
|
||||||
const ISD::InputArg &Arg = Ins[i];
|
const ISD::InputArg &Arg = Ins[i];
|
||||||
if (Arg.isOrigArg() && Skipped[Arg.getOrigArgIndex()]) {
|
if (Arg.isOrigArg() && Skipped[Arg.getOrigArgIndex()]) {
|
||||||
InVals.push_back(DAG.getUNDEF(Arg.VT));
|
InVals.push_back(DAG.getUNDEF(Arg.VT));
|
||||||
|
@ -2250,10 +2249,11 @@ SDValue SITargetLowering::LowerFormalArguments(
|
||||||
EVT MemVT = VA.getLocVT();
|
EVT MemVT = VA.getLocVT();
|
||||||
|
|
||||||
const uint64_t Offset = VA.getLocMemOffset();
|
const uint64_t Offset = VA.getLocMemOffset();
|
||||||
unsigned Align = MinAlign(KernelArgBaseAlign, Offset);
|
Align Alignment = commonAlignment(KernelArgBaseAlign, Offset);
|
||||||
|
|
||||||
SDValue Arg = lowerKernargMemParameter(
|
SDValue Arg =
|
||||||
DAG, VT, MemVT, DL, Chain, Offset, Align, Ins[i].Flags.isSExt(), &Ins[i]);
|
lowerKernargMemParameter(DAG, VT, MemVT, DL, Chain, Offset, Alignment,
|
||||||
|
Ins[i].Flags.isSExt(), &Ins[i]);
|
||||||
Chains.push_back(Arg.getValue(1));
|
Chains.push_back(Arg.getValue(1));
|
||||||
|
|
||||||
auto *ParamTy =
|
auto *ParamTy =
|
||||||
|
@ -3127,7 +3127,7 @@ SDValue SITargetLowering::lowerDYNAMIC_STACKALLOCImpl(
|
||||||
SDValue Size = Tmp2.getOperand(1);
|
SDValue Size = Tmp2.getOperand(1);
|
||||||
SDValue SP = DAG.getCopyFromReg(Chain, dl, SPReg, VT);
|
SDValue SP = DAG.getCopyFromReg(Chain, dl, SPReg, VT);
|
||||||
Chain = SP.getValue(1);
|
Chain = SP.getValue(1);
|
||||||
unsigned Align = cast<ConstantSDNode>(Tmp3)->getZExtValue();
|
MaybeAlign Alignment(cast<ConstantSDNode>(Tmp3)->getZExtValue());
|
||||||
const GCNSubtarget &ST = MF.getSubtarget<GCNSubtarget>();
|
const GCNSubtarget &ST = MF.getSubtarget<GCNSubtarget>();
|
||||||
const TargetFrameLowering *TFL = ST.getFrameLowering();
|
const TargetFrameLowering *TFL = ST.getFrameLowering();
|
||||||
unsigned Opc =
|
unsigned Opc =
|
||||||
|
@ -3138,12 +3138,13 @@ SDValue SITargetLowering::lowerDYNAMIC_STACKALLOCImpl(
|
||||||
ISD::SHL, dl, VT, Size,
|
ISD::SHL, dl, VT, Size,
|
||||||
DAG.getConstant(ST.getWavefrontSizeLog2(), dl, MVT::i32));
|
DAG.getConstant(ST.getWavefrontSizeLog2(), dl, MVT::i32));
|
||||||
|
|
||||||
unsigned StackAlign = TFL->getStackAlignment();
|
Align StackAlign = TFL->getStackAlign();
|
||||||
Tmp1 = DAG.getNode(Opc, dl, VT, SP, ScaledSize); // Value
|
Tmp1 = DAG.getNode(Opc, dl, VT, SP, ScaledSize); // Value
|
||||||
if (Align > StackAlign) {
|
if (Alignment && *Alignment > StackAlign) {
|
||||||
Tmp1 = DAG.getNode(
|
Tmp1 = DAG.getNode(ISD::AND, dl, VT, Tmp1,
|
||||||
ISD::AND, dl, VT, Tmp1,
|
DAG.getConstant(-(uint64_t)Alignment->value()
|
||||||
DAG.getConstant(-(uint64_t)Align << ST.getWavefrontSizeLog2(), dl, VT));
|
<< ST.getWavefrontSizeLog2(),
|
||||||
|
dl, VT));
|
||||||
}
|
}
|
||||||
|
|
||||||
Chain = DAG.getCopyToReg(Chain, dl, SPReg, Tmp1); // Output chain
|
Chain = DAG.getCopyToReg(Chain, dl, SPReg, Tmp1); // Output chain
|
||||||
|
@ -5538,11 +5539,11 @@ SDValue SITargetLowering::LowerGlobalAddress(AMDGPUMachineFunction *MFI,
|
||||||
Type *Ty = PtrVT.getTypeForEVT(*DAG.getContext());
|
Type *Ty = PtrVT.getTypeForEVT(*DAG.getContext());
|
||||||
PointerType *PtrTy = PointerType::get(Ty, AMDGPUAS::CONSTANT_ADDRESS);
|
PointerType *PtrTy = PointerType::get(Ty, AMDGPUAS::CONSTANT_ADDRESS);
|
||||||
const DataLayout &DataLayout = DAG.getDataLayout();
|
const DataLayout &DataLayout = DAG.getDataLayout();
|
||||||
unsigned Align = DataLayout.getABITypeAlignment(PtrTy);
|
Align Alignment = DataLayout.getABITypeAlign(PtrTy);
|
||||||
MachinePointerInfo PtrInfo
|
MachinePointerInfo PtrInfo
|
||||||
= MachinePointerInfo::getGOT(DAG.getMachineFunction());
|
= MachinePointerInfo::getGOT(DAG.getMachineFunction());
|
||||||
|
|
||||||
return DAG.getLoad(PtrVT, DL, DAG.getEntryNode(), GOTAddr, PtrInfo, Align,
|
return DAG.getLoad(PtrVT, DL, DAG.getEntryNode(), GOTAddr, PtrInfo, Alignment,
|
||||||
MachineMemOperand::MODereferenceable |
|
MachineMemOperand::MODereferenceable |
|
||||||
MachineMemOperand::MOInvariant);
|
MachineMemOperand::MOInvariant);
|
||||||
}
|
}
|
||||||
|
@ -5568,8 +5569,8 @@ SDValue SITargetLowering::lowerImplicitZextParam(SelectionDAG &DAG,
|
||||||
MVT VT,
|
MVT VT,
|
||||||
unsigned Offset) const {
|
unsigned Offset) const {
|
||||||
SDLoc SL(Op);
|
SDLoc SL(Op);
|
||||||
SDValue Param = lowerKernargMemParameter(DAG, MVT::i32, MVT::i32, SL,
|
SDValue Param = lowerKernargMemParameter(
|
||||||
DAG.getEntryNode(), Offset, 4, false);
|
DAG, MVT::i32, MVT::i32, SL, DAG.getEntryNode(), Offset, Align(4), false);
|
||||||
// The local size values will have the hi 16-bits as zero.
|
// The local size values will have the hi 16-bits as zero.
|
||||||
return DAG.getNode(ISD::AssertZext, SL, MVT::i32, Param,
|
return DAG.getNode(ISD::AssertZext, SL, MVT::i32, Param,
|
||||||
DAG.getValueType(VT));
|
DAG.getValueType(VT));
|
||||||
|
@ -6203,7 +6204,8 @@ SDValue SITargetLowering::lowerSBuffer(EVT VT, SDLoc DL, SDValue Rsrc,
|
||||||
|
|
||||||
// Use the alignment to ensure that the required offsets will fit into the
|
// Use the alignment to ensure that the required offsets will fit into the
|
||||||
// immediate offsets.
|
// immediate offsets.
|
||||||
setBufferOffsets(Offset, DAG, &Ops[3], NumLoads > 1 ? 16 * NumLoads : 4);
|
setBufferOffsets(Offset, DAG, &Ops[3],
|
||||||
|
NumLoads > 1 ? Align(16 * NumLoads) : Align(4));
|
||||||
|
|
||||||
uint64_t InstOffset = cast<ConstantSDNode>(Ops[5])->getZExtValue();
|
uint64_t InstOffset = cast<ConstantSDNode>(Ops[5])->getZExtValue();
|
||||||
for (unsigned i = 0; i < NumLoads; ++i) {
|
for (unsigned i = 0; i < NumLoads; ++i) {
|
||||||
|
@ -6299,37 +6301,43 @@ SDValue SITargetLowering::LowerINTRINSIC_WO_CHAIN(SDValue Op,
|
||||||
return emitNonHSAIntrinsicError(DAG, DL, VT);
|
return emitNonHSAIntrinsicError(DAG, DL, VT);
|
||||||
|
|
||||||
return lowerKernargMemParameter(DAG, VT, VT, DL, DAG.getEntryNode(),
|
return lowerKernargMemParameter(DAG, VT, VT, DL, DAG.getEntryNode(),
|
||||||
SI::KernelInputOffsets::NGROUPS_X, 4, false);
|
SI::KernelInputOffsets::NGROUPS_X, Align(4),
|
||||||
|
false);
|
||||||
case Intrinsic::r600_read_ngroups_y:
|
case Intrinsic::r600_read_ngroups_y:
|
||||||
if (Subtarget->isAmdHsaOS())
|
if (Subtarget->isAmdHsaOS())
|
||||||
return emitNonHSAIntrinsicError(DAG, DL, VT);
|
return emitNonHSAIntrinsicError(DAG, DL, VT);
|
||||||
|
|
||||||
return lowerKernargMemParameter(DAG, VT, VT, DL, DAG.getEntryNode(),
|
return lowerKernargMemParameter(DAG, VT, VT, DL, DAG.getEntryNode(),
|
||||||
SI::KernelInputOffsets::NGROUPS_Y, 4, false);
|
SI::KernelInputOffsets::NGROUPS_Y, Align(4),
|
||||||
|
false);
|
||||||
case Intrinsic::r600_read_ngroups_z:
|
case Intrinsic::r600_read_ngroups_z:
|
||||||
if (Subtarget->isAmdHsaOS())
|
if (Subtarget->isAmdHsaOS())
|
||||||
return emitNonHSAIntrinsicError(DAG, DL, VT);
|
return emitNonHSAIntrinsicError(DAG, DL, VT);
|
||||||
|
|
||||||
return lowerKernargMemParameter(DAG, VT, VT, DL, DAG.getEntryNode(),
|
return lowerKernargMemParameter(DAG, VT, VT, DL, DAG.getEntryNode(),
|
||||||
SI::KernelInputOffsets::NGROUPS_Z, 4, false);
|
SI::KernelInputOffsets::NGROUPS_Z, Align(4),
|
||||||
|
false);
|
||||||
case Intrinsic::r600_read_global_size_x:
|
case Intrinsic::r600_read_global_size_x:
|
||||||
if (Subtarget->isAmdHsaOS())
|
if (Subtarget->isAmdHsaOS())
|
||||||
return emitNonHSAIntrinsicError(DAG, DL, VT);
|
return emitNonHSAIntrinsicError(DAG, DL, VT);
|
||||||
|
|
||||||
return lowerKernargMemParameter(DAG, VT, VT, DL, DAG.getEntryNode(),
|
return lowerKernargMemParameter(DAG, VT, VT, DL, DAG.getEntryNode(),
|
||||||
SI::KernelInputOffsets::GLOBAL_SIZE_X, 4, false);
|
SI::KernelInputOffsets::GLOBAL_SIZE_X,
|
||||||
|
Align(4), false);
|
||||||
case Intrinsic::r600_read_global_size_y:
|
case Intrinsic::r600_read_global_size_y:
|
||||||
if (Subtarget->isAmdHsaOS())
|
if (Subtarget->isAmdHsaOS())
|
||||||
return emitNonHSAIntrinsicError(DAG, DL, VT);
|
return emitNonHSAIntrinsicError(DAG, DL, VT);
|
||||||
|
|
||||||
return lowerKernargMemParameter(DAG, VT, VT, DL, DAG.getEntryNode(),
|
return lowerKernargMemParameter(DAG, VT, VT, DL, DAG.getEntryNode(),
|
||||||
SI::KernelInputOffsets::GLOBAL_SIZE_Y, 4, false);
|
SI::KernelInputOffsets::GLOBAL_SIZE_Y,
|
||||||
|
Align(4), false);
|
||||||
case Intrinsic::r600_read_global_size_z:
|
case Intrinsic::r600_read_global_size_z:
|
||||||
if (Subtarget->isAmdHsaOS())
|
if (Subtarget->isAmdHsaOS())
|
||||||
return emitNonHSAIntrinsicError(DAG, DL, VT);
|
return emitNonHSAIntrinsicError(DAG, DL, VT);
|
||||||
|
|
||||||
return lowerKernargMemParameter(DAG, VT, VT, DL, DAG.getEntryNode(),
|
return lowerKernargMemParameter(DAG, VT, VT, DL, DAG.getEntryNode(),
|
||||||
SI::KernelInputOffsets::GLOBAL_SIZE_Z, 4, false);
|
SI::KernelInputOffsets::GLOBAL_SIZE_Z,
|
||||||
|
Align(4), false);
|
||||||
case Intrinsic::r600_read_local_size_x:
|
case Intrinsic::r600_read_local_size_x:
|
||||||
if (Subtarget->isAmdHsaOS())
|
if (Subtarget->isAmdHsaOS())
|
||||||
return emitNonHSAIntrinsicError(DAG, DL, VT);
|
return emitNonHSAIntrinsicError(DAG, DL, VT);
|
||||||
|
@ -7618,13 +7626,14 @@ std::pair<SDValue, SDValue> SITargetLowering::splitBufferOffsets(
|
||||||
// three offsets (voffset, soffset and instoffset) into the SDValue[3] array
|
// three offsets (voffset, soffset and instoffset) into the SDValue[3] array
|
||||||
// pointed to by Offsets.
|
// pointed to by Offsets.
|
||||||
unsigned SITargetLowering::setBufferOffsets(SDValue CombinedOffset,
|
unsigned SITargetLowering::setBufferOffsets(SDValue CombinedOffset,
|
||||||
SelectionDAG &DAG, SDValue *Offsets,
|
SelectionDAG &DAG, SDValue *Offsets,
|
||||||
unsigned Align) const {
|
Align Alignment) const {
|
||||||
SDLoc DL(CombinedOffset);
|
SDLoc DL(CombinedOffset);
|
||||||
if (auto C = dyn_cast<ConstantSDNode>(CombinedOffset)) {
|
if (auto C = dyn_cast<ConstantSDNode>(CombinedOffset)) {
|
||||||
uint32_t Imm = C->getZExtValue();
|
uint32_t Imm = C->getZExtValue();
|
||||||
uint32_t SOffset, ImmOffset;
|
uint32_t SOffset, ImmOffset;
|
||||||
if (AMDGPU::splitMUBUFOffset(Imm, SOffset, ImmOffset, Subtarget, Align)) {
|
if (AMDGPU::splitMUBUFOffset(Imm, SOffset, ImmOffset, Subtarget,
|
||||||
|
Alignment)) {
|
||||||
Offsets[0] = DAG.getConstant(0, DL, MVT::i32);
|
Offsets[0] = DAG.getConstant(0, DL, MVT::i32);
|
||||||
Offsets[1] = DAG.getConstant(SOffset, DL, MVT::i32);
|
Offsets[1] = DAG.getConstant(SOffset, DL, MVT::i32);
|
||||||
Offsets[2] = DAG.getTargetConstant(ImmOffset, DL, MVT::i32);
|
Offsets[2] = DAG.getTargetConstant(ImmOffset, DL, MVT::i32);
|
||||||
|
@ -7637,7 +7646,7 @@ unsigned SITargetLowering::setBufferOffsets(SDValue CombinedOffset,
|
||||||
uint32_t SOffset, ImmOffset;
|
uint32_t SOffset, ImmOffset;
|
||||||
int Offset = cast<ConstantSDNode>(N1)->getSExtValue();
|
int Offset = cast<ConstantSDNode>(N1)->getSExtValue();
|
||||||
if (Offset >= 0 && AMDGPU::splitMUBUFOffset(Offset, SOffset, ImmOffset,
|
if (Offset >= 0 && AMDGPU::splitMUBUFOffset(Offset, SOffset, ImmOffset,
|
||||||
Subtarget, Align)) {
|
Subtarget, Alignment)) {
|
||||||
Offsets[0] = N0;
|
Offsets[0] = N0;
|
||||||
Offsets[1] = DAG.getConstant(SOffset, DL, MVT::i32);
|
Offsets[1] = DAG.getConstant(SOffset, DL, MVT::i32);
|
||||||
Offsets[2] = DAG.getTargetConstant(ImmOffset, DL, MVT::i32);
|
Offsets[2] = DAG.getTargetConstant(ImmOffset, DL, MVT::i32);
|
||||||
|
|
|
@ -42,7 +42,8 @@ private:
|
||||||
SDValue getImplicitArgPtr(SelectionDAG &DAG, const SDLoc &SL) const;
|
SDValue getImplicitArgPtr(SelectionDAG &DAG, const SDLoc &SL) const;
|
||||||
SDValue lowerKernargMemParameter(SelectionDAG &DAG, EVT VT, EVT MemVT,
|
SDValue lowerKernargMemParameter(SelectionDAG &DAG, EVT VT, EVT MemVT,
|
||||||
const SDLoc &SL, SDValue Chain,
|
const SDLoc &SL, SDValue Chain,
|
||||||
uint64_t Offset, unsigned Align, bool Signed,
|
uint64_t Offset, Align Alignment,
|
||||||
|
bool Signed,
|
||||||
const ISD::InputArg *Arg = nullptr) const;
|
const ISD::InputArg *Arg = nullptr) const;
|
||||||
|
|
||||||
SDValue lowerStackParameter(SelectionDAG &DAG, CCValAssign &VA,
|
SDValue lowerStackParameter(SelectionDAG &DAG, CCValAssign &VA,
|
||||||
|
@ -216,7 +217,7 @@ private:
|
||||||
/// \returns 0 If there is a non-constant offset or if the offset is 0.
|
/// \returns 0 If there is a non-constant offset or if the offset is 0.
|
||||||
/// Otherwise returns the constant offset.
|
/// Otherwise returns the constant offset.
|
||||||
unsigned setBufferOffsets(SDValue CombinedOffset, SelectionDAG &DAG,
|
unsigned setBufferOffsets(SDValue CombinedOffset, SelectionDAG &DAG,
|
||||||
SDValue *Offsets, unsigned Align = 4) const;
|
SDValue *Offsets, Align Alignment = Align(4)) const;
|
||||||
|
|
||||||
// Handle 8 bit and 16 bit buffer loads
|
// Handle 8 bit and 16 bit buffer loads
|
||||||
SDValue handleByteShortBufferLoads(SelectionDAG &DAG, EVT LoadVT, SDLoc DL,
|
SDValue handleByteShortBufferLoads(SelectionDAG &DAG, EVT LoadVT, SDLoc DL,
|
||||||
|
|
|
@ -1377,8 +1377,8 @@ Optional<int64_t> getSMRDEncodedLiteralOffset32(const MCSubtargetInfo &ST,
|
||||||
// aligned if they are aligned to begin with. It also ensures that additional
|
// aligned if they are aligned to begin with. It also ensures that additional
|
||||||
// offsets within the given alignment can be added to the resulting ImmOffset.
|
// offsets within the given alignment can be added to the resulting ImmOffset.
|
||||||
bool splitMUBUFOffset(uint32_t Imm, uint32_t &SOffset, uint32_t &ImmOffset,
|
bool splitMUBUFOffset(uint32_t Imm, uint32_t &SOffset, uint32_t &ImmOffset,
|
||||||
const GCNSubtarget *Subtarget, uint32_t Align) {
|
const GCNSubtarget *Subtarget, Align Alignment) {
|
||||||
const uint32_t MaxImm = alignDown(4095, Align);
|
const uint32_t MaxImm = alignDown(4095, Alignment.value());
|
||||||
uint32_t Overflow = 0;
|
uint32_t Overflow = 0;
|
||||||
|
|
||||||
if (Imm > MaxImm) {
|
if (Imm > MaxImm) {
|
||||||
|
@ -1396,10 +1396,10 @@ bool splitMUBUFOffset(uint32_t Imm, uint32_t &SOffset, uint32_t &ImmOffset,
|
||||||
//
|
//
|
||||||
// Atomic operations fail to work correctly when individual address
|
// Atomic operations fail to work correctly when individual address
|
||||||
// components are unaligned, even if their sum is aligned.
|
// components are unaligned, even if their sum is aligned.
|
||||||
uint32_t High = (Imm + Align) & ~4095;
|
uint32_t High = (Imm + Alignment.value()) & ~4095;
|
||||||
uint32_t Low = (Imm + Align) & 4095;
|
uint32_t Low = (Imm + Alignment.value()) & 4095;
|
||||||
Imm = Low;
|
Imm = Low;
|
||||||
Overflow = High - Align;
|
Overflow = High - Alignment.value();
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
|
@ -15,6 +15,7 @@
|
||||||
#include "llvm/IR/CallingConv.h"
|
#include "llvm/IR/CallingConv.h"
|
||||||
#include "llvm/MC/MCInstrDesc.h"
|
#include "llvm/MC/MCInstrDesc.h"
|
||||||
#include "llvm/Support/AMDHSAKernelDescriptor.h"
|
#include "llvm/Support/AMDHSAKernelDescriptor.h"
|
||||||
|
#include "llvm/Support/Alignment.h"
|
||||||
#include "llvm/Support/Compiler.h"
|
#include "llvm/Support/Compiler.h"
|
||||||
#include "llvm/Support/ErrorHandling.h"
|
#include "llvm/Support/ErrorHandling.h"
|
||||||
#include "llvm/Support/TargetParser.h"
|
#include "llvm/Support/TargetParser.h"
|
||||||
|
@ -692,7 +693,8 @@ Optional<int64_t> getSMRDEncodedLiteralOffset32(const MCSubtargetInfo &ST,
|
||||||
bool isLegalSMRDImmOffset(const MCSubtargetInfo &ST, int64_t ByteOffset);
|
bool isLegalSMRDImmOffset(const MCSubtargetInfo &ST, int64_t ByteOffset);
|
||||||
|
|
||||||
bool splitMUBUFOffset(uint32_t Imm, uint32_t &SOffset, uint32_t &ImmOffset,
|
bool splitMUBUFOffset(uint32_t Imm, uint32_t &SOffset, uint32_t &ImmOffset,
|
||||||
const GCNSubtarget *Subtarget, uint32_t Align = 4);
|
const GCNSubtarget *Subtarget,
|
||||||
|
Align Alignment = Align(4));
|
||||||
|
|
||||||
/// \returns true if the intrinsic is divergent
|
/// \returns true if the intrinsic is divergent
|
||||||
bool isIntrinsicSourceOfDivergence(unsigned IntrID);
|
bool isIntrinsicSourceOfDivergence(unsigned IntrID);
|
||||||
|
|
Loading…
Reference in New Issue