forked from OSchip/llvm-project
[AMDGPU][NFC] Fix typos
Differential Revision: https://reviews.llvm.org/D113672
This commit is contained in:
parent
5dfe60d171
commit
d1f45ed58f
|
@ -415,7 +415,7 @@ def FeatureDPP : SubtargetFeature<"dpp",
|
||||||
"Support DPP (Data Parallel Primitives) extension"
|
"Support DPP (Data Parallel Primitives) extension"
|
||||||
>;
|
>;
|
||||||
|
|
||||||
// DPP8 allows arbitrary cross-lane swizzling withing groups of 8 lanes.
|
// DPP8 allows arbitrary cross-lane swizzling within groups of 8 lanes.
|
||||||
def FeatureDPP8 : SubtargetFeature<"dpp8",
|
def FeatureDPP8 : SubtargetFeature<"dpp8",
|
||||||
"HasDPP8",
|
"HasDPP8",
|
||||||
"true",
|
"true",
|
||||||
|
|
|
@ -76,8 +76,8 @@ struct ImageDimIntrinsicInfo {
|
||||||
};
|
};
|
||||||
const ImageDimIntrinsicInfo *getImageDimIntrinsicInfo(unsigned Intr);
|
const ImageDimIntrinsicInfo *getImageDimIntrinsicInfo(unsigned Intr);
|
||||||
|
|
||||||
const ImageDimIntrinsicInfo *getImageDimInstrinsicByBaseOpcode(unsigned BaseOpcode,
|
const ImageDimIntrinsicInfo *
|
||||||
unsigned Dim);
|
getImageDimIntrinsicByBaseOpcode(unsigned BaseOpcode, unsigned Dim);
|
||||||
|
|
||||||
} // end AMDGPU namespace
|
} // end AMDGPU namespace
|
||||||
} // End llvm namespace
|
} // End llvm namespace
|
||||||
|
|
|
@ -4408,8 +4408,8 @@ bool AMDGPULegalizerInfo::legalizeImageIntrinsic(
|
||||||
if (ConstantLod->isZero() || ConstantLod->isNegative()) {
|
if (ConstantLod->isZero() || ConstantLod->isNegative()) {
|
||||||
// Set new opcode to _lz variant of _l, and change the intrinsic ID.
|
// Set new opcode to _lz variant of _l, and change the intrinsic ID.
|
||||||
const AMDGPU::ImageDimIntrinsicInfo *NewImageDimIntr =
|
const AMDGPU::ImageDimIntrinsicInfo *NewImageDimIntr =
|
||||||
AMDGPU::getImageDimInstrinsicByBaseOpcode(LZMappingInfo->LZ,
|
AMDGPU::getImageDimIntrinsicByBaseOpcode(LZMappingInfo->LZ,
|
||||||
Intr->Dim);
|
Intr->Dim);
|
||||||
|
|
||||||
// The starting indexes should remain in the same place.
|
// The starting indexes should remain in the same place.
|
||||||
--CorrectedNumVAddrs;
|
--CorrectedNumVAddrs;
|
||||||
|
|
|
@ -109,8 +109,8 @@ public:
|
||||||
Register Den) const;
|
Register Den) const;
|
||||||
|
|
||||||
void legalizeUnsignedDIV_REM64Impl(MachineIRBuilder &B, Register DstDivReg,
|
void legalizeUnsignedDIV_REM64Impl(MachineIRBuilder &B, Register DstDivReg,
|
||||||
Register DstRemReg, Register Numer,
|
Register DstRemReg, Register Num,
|
||||||
Register Denom) const;
|
Register Den) const;
|
||||||
|
|
||||||
bool legalizeSignedDIV_REM(MachineInstr &MI, MachineRegisterInfo &MRI,
|
bool legalizeSignedDIV_REM(MachineInstr &MI, MachineRegisterInfo &MRI,
|
||||||
MachineIRBuilder &B) const;
|
MachineIRBuilder &B) const;
|
||||||
|
|
|
@ -125,7 +125,7 @@ private:
|
||||||
BasicBlock::iterator getEntryIns(CallInst * UI);
|
BasicBlock::iterator getEntryIns(CallInst * UI);
|
||||||
// Insert an Alloc instruction.
|
// Insert an Alloc instruction.
|
||||||
AllocaInst* insertAlloca(CallInst * UI, IRBuilder<> &B, const char *prefix);
|
AllocaInst* insertAlloca(CallInst * UI, IRBuilder<> &B, const char *prefix);
|
||||||
// Get a scalar native builtin signle argument FP function
|
// Get a scalar native builtin single argument FP function
|
||||||
FunctionCallee getNativeFunction(Module *M, const FuncInfo &FInfo);
|
FunctionCallee getNativeFunction(Module *M, const FuncInfo &FInfo);
|
||||||
|
|
||||||
protected:
|
protected:
|
||||||
|
|
|
@ -455,7 +455,8 @@ AMDGPULibFunc::Param ParamIterator::getNextParam() {
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
|
|
||||||
default: llvm_unreachable("Unhandeled param rule");
|
default:
|
||||||
|
llvm_unreachable("Unhandled param rule");
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
++Index;
|
++Index;
|
||||||
|
@ -747,7 +748,8 @@ static const char *getItaniumTypeName(AMDGPULibFunc::EType T) {
|
||||||
case AMDGPULibFunc::IMG3D: return "11ocl_image3d";
|
case AMDGPULibFunc::IMG3D: return "11ocl_image3d";
|
||||||
case AMDGPULibFunc::SAMPLER: return "11ocl_sampler";
|
case AMDGPULibFunc::SAMPLER: return "11ocl_sampler";
|
||||||
case AMDGPULibFunc::EVENT: return "9ocl_event";
|
case AMDGPULibFunc::EVENT: return "9ocl_event";
|
||||||
default: llvm_unreachable("Unhandeled param type");
|
default:
|
||||||
|
llvm_unreachable("Unhandled param type");
|
||||||
}
|
}
|
||||||
return nullptr;
|
return nullptr;
|
||||||
}
|
}
|
||||||
|
@ -761,7 +763,7 @@ namespace {
|
||||||
// substitution candidates from the grammar, but are explicitly excluded:
|
// substitution candidates from the grammar, but are explicitly excluded:
|
||||||
// 1. <builtin-type> other than vendor extended types ..."
|
// 1. <builtin-type> other than vendor extended types ..."
|
||||||
|
|
||||||
// For the purpose of functions the following productions make sence for the
|
// For the purpose of functions the following productions make sense for the
|
||||||
// substitution:
|
// substitution:
|
||||||
// <type> ::= <builtin-type>
|
// <type> ::= <builtin-type>
|
||||||
// ::= <class-enum-type>
|
// ::= <class-enum-type>
|
||||||
|
@ -774,8 +776,8 @@ namespace {
|
||||||
// using <class-enum-type> production rule they're not used for substitution
|
// using <class-enum-type> production rule they're not used for substitution
|
||||||
// because clang consider them as builtin types.
|
// because clang consider them as builtin types.
|
||||||
//
|
//
|
||||||
// DvNN_ type is GCC extension for vectors and is a subject for the substitution.
|
// DvNN_ type is GCC extension for vectors and is a subject for the
|
||||||
|
// substitution.
|
||||||
|
|
||||||
class ItaniumMangler {
|
class ItaniumMangler {
|
||||||
SmallVector<AMDGPULibFunc::Param, 10> Str; // list of accumulated substitutions
|
SmallVector<AMDGPULibFunc::Param, 10> Str; // list of accumulated substitutions
|
||||||
|
@ -902,7 +904,7 @@ static Type* getIntrinsicParamType(
|
||||||
case AMDGPULibFunc::EVENT:
|
case AMDGPULibFunc::EVENT:
|
||||||
T = StructType::create(C,"ocl_event")->getPointerTo(); break;
|
T = StructType::create(C,"ocl_event")->getPointerTo(); break;
|
||||||
default:
|
default:
|
||||||
llvm_unreachable("Unhandeled param type");
|
llvm_unreachable("Unhandled param type");
|
||||||
return nullptr;
|
return nullptr;
|
||||||
}
|
}
|
||||||
if (P.VectorSize > 1)
|
if (P.VectorSize > 1)
|
||||||
|
|
|
@ -13,7 +13,7 @@
|
||||||
//
|
//
|
||||||
// In LLVM CodeGen the runtime-handle metadata will be translated to
|
// In LLVM CodeGen the runtime-handle metadata will be translated to
|
||||||
// RuntimeHandle metadata in code object. Runtime allocates a global buffer
|
// RuntimeHandle metadata in code object. Runtime allocates a global buffer
|
||||||
// for each kernel with RuntimeHandel metadata and saves the kernel address
|
// for each kernel with RuntimeHandle metadata and saves the kernel address
|
||||||
// required for the AQL packet into the buffer. __enqueue_kernel function
|
// required for the AQL packet into the buffer. __enqueue_kernel function
|
||||||
// in device library knows that the invoke function pointer in the block
|
// in device library knows that the invoke function pointer in the block
|
||||||
// literal is actually runtime handle and loads the kernel address from it
|
// literal is actually runtime handle and loads the kernel address from it
|
||||||
|
|
|
@ -274,7 +274,7 @@ AMDGPUPromoteAllocaImpl::getLocalSizeYZ(IRBuilder<> &Builder) {
|
||||||
|
|
||||||
// We could do a single 64-bit load here, but it's likely that the basic
|
// We could do a single 64-bit load here, but it's likely that the basic
|
||||||
// 32-bit and extract sequence is already present, and it is probably easier
|
// 32-bit and extract sequence is already present, and it is probably easier
|
||||||
// to CSE this. The loads should be mergable later anyway.
|
// to CSE this. The loads should be mergeable later anyway.
|
||||||
Value *GEPXY = Builder.CreateConstInBoundsGEP1_64(I32Ty, CastDispatchPtr, 1);
|
Value *GEPXY = Builder.CreateConstInBoundsGEP1_64(I32Ty, CastDispatchPtr, 1);
|
||||||
LoadInst *LoadXY = Builder.CreateAlignedLoad(I32Ty, GEPXY, Align(4));
|
LoadInst *LoadXY = Builder.CreateAlignedLoad(I32Ty, GEPXY, Align(4));
|
||||||
|
|
||||||
|
|
|
@ -251,7 +251,7 @@ public:
|
||||||
uint64_t getExplicitKernArgSize(const Function &F, Align &MaxAlign) const;
|
uint64_t getExplicitKernArgSize(const Function &F, Align &MaxAlign) const;
|
||||||
unsigned getKernArgSegmentSize(const Function &F, Align &MaxAlign) const;
|
unsigned getKernArgSegmentSize(const Function &F, Align &MaxAlign) const;
|
||||||
|
|
||||||
/// \returns Corresponsing DWARF register number mapping flavour for the
|
/// \returns Corresponding DWARF register number mapping flavour for the
|
||||||
/// \p WavefrontSize.
|
/// \p WavefrontSize.
|
||||||
AMDGPUDwarfFlavour getAMDGPUDwarfFlavour() const;
|
AMDGPUDwarfFlavour getAMDGPUDwarfFlavour() const;
|
||||||
|
|
||||||
|
|
|
@ -1545,7 +1545,7 @@ int GCNHazardRecognizer::checkMAIHazards90A(MachineInstr *MI) {
|
||||||
}
|
}
|
||||||
|
|
||||||
int GCNHazardRecognizer::checkMAILdStHazards(MachineInstr *MI) {
|
int GCNHazardRecognizer::checkMAILdStHazards(MachineInstr *MI) {
|
||||||
// On gfx90a+ releveant hazards are checked in checkMAIVALUHazards()
|
// On gfx90a+ relevant hazards are checked in checkMAIVALUHazards()
|
||||||
if (!ST.hasMAIInsts() || ST.hasGFX90AInsts())
|
if (!ST.hasMAIInsts() || ST.hasGFX90AInsts())
|
||||||
return 0;
|
return 0;
|
||||||
|
|
||||||
|
|
|
@ -188,7 +188,7 @@ public:
|
||||||
printRegion(dbgs(), Rgn.Begin, Rgn.End, Sch.LIS, 2));
|
printRegion(dbgs(), Rgn.Begin, Rgn.End, Sch.LIS, 2));
|
||||||
Sch.BaseClass::schedule();
|
Sch.BaseClass::schedule();
|
||||||
|
|
||||||
// Unfortunatelly placeDebugValues incorrectly modifies RegionEnd, restore
|
// Unfortunately placeDebugValues incorrectly modifies RegionEnd, restore
|
||||||
Sch.RegionEnd = Rgn.End;
|
Sch.RegionEnd = Rgn.End;
|
||||||
//assert(Rgn.End == Sch.RegionEnd);
|
//assert(Rgn.End == Sch.RegionEnd);
|
||||||
Rgn.Begin = Sch.RegionBegin;
|
Rgn.Begin = Sch.RegionBegin;
|
||||||
|
@ -280,7 +280,7 @@ GCNIterativeScheduler::getSchedulePressure(const Region &R,
|
||||||
return RPTracker.moveMaxPressure();
|
return RPTracker.moveMaxPressure();
|
||||||
}
|
}
|
||||||
|
|
||||||
void GCNIterativeScheduler::enterRegion(MachineBasicBlock *BB, // overriden
|
void GCNIterativeScheduler::enterRegion(MachineBasicBlock *BB, // overridden
|
||||||
MachineBasicBlock::iterator Begin,
|
MachineBasicBlock::iterator Begin,
|
||||||
MachineBasicBlock::iterator End,
|
MachineBasicBlock::iterator End,
|
||||||
unsigned NumRegionInstrs) {
|
unsigned NumRegionInstrs) {
|
||||||
|
@ -293,7 +293,7 @@ void GCNIterativeScheduler::enterRegion(MachineBasicBlock *BB, // overriden
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
void GCNIterativeScheduler::schedule() { // overriden
|
void GCNIterativeScheduler::schedule() { // overridden
|
||||||
// do nothing
|
// do nothing
|
||||||
LLVM_DEBUG(printLivenessInfo(dbgs(), RegionBegin, RegionEnd, LIS);
|
LLVM_DEBUG(printLivenessInfo(dbgs(), RegionBegin, RegionEnd, LIS);
|
||||||
if (!Regions.empty() && Regions.back()->Begin == RegionBegin) {
|
if (!Regions.empty() && Regions.back()->Begin == RegionBegin) {
|
||||||
|
@ -304,7 +304,7 @@ void GCNIterativeScheduler::schedule() { // overriden
|
||||||
<< '\n';);
|
<< '\n';);
|
||||||
}
|
}
|
||||||
|
|
||||||
void GCNIterativeScheduler::finalizeSchedule() { // overriden
|
void GCNIterativeScheduler::finalizeSchedule() { // overridden
|
||||||
if (Regions.empty())
|
if (Regions.empty())
|
||||||
return;
|
return;
|
||||||
switch (Strategy) {
|
switch (Strategy) {
|
||||||
|
@ -391,8 +391,8 @@ void GCNIterativeScheduler::scheduleRegion(Region &R, Range &&Schedule,
|
||||||
// and already interleaved with debug values
|
// and already interleaved with debug values
|
||||||
if (!std::is_same<decltype(*Schedule.begin()), MachineInstr*>::value) {
|
if (!std::is_same<decltype(*Schedule.begin()), MachineInstr*>::value) {
|
||||||
placeDebugValues();
|
placeDebugValues();
|
||||||
// Unfortunatelly placeDebugValues incorrectly modifies RegionEnd, restore
|
// Unfortunately placeDebugValues incorrectly modifies RegionEnd, restore
|
||||||
//assert(R.End == RegionEnd);
|
// assert(R.End == RegionEnd);
|
||||||
RegionEnd = R.End;
|
RegionEnd = R.End;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
|
@ -7,7 +7,7 @@
|
||||||
//===----------------------------------------------------------------------===//
|
//===----------------------------------------------------------------------===//
|
||||||
///
|
///
|
||||||
/// \file
|
/// \file
|
||||||
/// This file defines and imlements the class GCNMinRegScheduler, which
|
/// This file defines and implements the class GCNMinRegScheduler, which
|
||||||
/// implements an experimental, simple scheduler whose main goal is to learn
|
/// implements an experimental, simple scheduler whose main goal is to learn
|
||||||
/// ways about consuming less possible registers for a region.
|
/// ways about consuming less possible registers for a region.
|
||||||
///
|
///
|
||||||
|
|
|
@ -7,7 +7,7 @@
|
||||||
//===----------------------------------------------------------------------===//
|
//===----------------------------------------------------------------------===//
|
||||||
//
|
//
|
||||||
/// \file
|
/// \file
|
||||||
/// This pass combines split register tuple initialization into a single psuedo:
|
/// This pass combines split register tuple initialization into a single pseudo:
|
||||||
///
|
///
|
||||||
/// undef %0.sub1:sreg_64 = S_MOV_B32 1
|
/// undef %0.sub1:sreg_64 = S_MOV_B32 1
|
||||||
/// %0.sub0:sreg_64 = S_MOV_B32 2
|
/// %0.sub0:sreg_64 = S_MOV_B32 2
|
||||||
|
|
|
@ -121,7 +121,7 @@ void GCNMaxOccupancySchedStrategy::initCandidate(SchedCandidate &Cand, SUnit *SU
|
||||||
|
|
||||||
// Register pressure is considered 'CRITICAL' if it is approaching a value
|
// Register pressure is considered 'CRITICAL' if it is approaching a value
|
||||||
// that would reduce the wave occupancy for the execution unit. When
|
// that would reduce the wave occupancy for the execution unit. When
|
||||||
// register pressure is 'CRITICAL', increading SGPR and VGPR pressure both
|
// register pressure is 'CRITICAL', increasing SGPR and VGPR pressure both
|
||||||
// has the same cost, so we don't need to prefer one over the other.
|
// has the same cost, so we don't need to prefer one over the other.
|
||||||
|
|
||||||
int SGPRDelta = NewSGPRPressure - SGPRCriticalLimit;
|
int SGPRDelta = NewSGPRPressure - SGPRCriticalLimit;
|
||||||
|
|
|
@ -60,7 +60,7 @@ class AMDGPUCustomBehaviour : public CustomBehaviour {
|
||||||
/// where we setup the InstrWaitCntInfo vector.
|
/// where we setup the InstrWaitCntInfo vector.
|
||||||
/// The core logic for determining which CNTs an instruction
|
/// The core logic for determining which CNTs an instruction
|
||||||
/// interacts with is taken from SIInsertWaitcnts::updateEventWaitcntAfter().
|
/// interacts with is taken from SIInsertWaitcnts::updateEventWaitcntAfter().
|
||||||
/// Unfortunately, some of the logic from that function is not avalable to us
|
/// Unfortunately, some of the logic from that function is not available to us
|
||||||
/// in this scope so we conservatively end up assuming that some
|
/// in this scope so we conservatively end up assuming that some
|
||||||
/// instructions interact with more CNTs than they do in reality.
|
/// instructions interact with more CNTs than they do in reality.
|
||||||
void generateWaitCntInfo();
|
void generateWaitCntInfo();
|
||||||
|
|
|
@ -1108,7 +1108,7 @@ def ImageDimIntrinsicTable : GenericTable {
|
||||||
let PrimaryKeyEarlyOut = 1;
|
let PrimaryKeyEarlyOut = 1;
|
||||||
}
|
}
|
||||||
|
|
||||||
def getImageDimInstrinsicByBaseOpcode : SearchIndex {
|
def getImageDimIntrinsicByBaseOpcode : SearchIndex {
|
||||||
let Table = ImageDimIntrinsicTable;
|
let Table = ImageDimIntrinsicTable;
|
||||||
let Key = ["BaseOpcode", "Dim"];
|
let Key = ["BaseOpcode", "Dim"];
|
||||||
}
|
}
|
||||||
|
|
|
@ -926,7 +926,7 @@ SDValue R600TargetLowering::LowerSELECT_CC(SDValue Op, SelectionDAG &DAG) const
|
||||||
std::swap(LHS, RHS);
|
std::swap(LHS, RHS);
|
||||||
CC = DAG.getCondCode(CCSwapped);
|
CC = DAG.getCondCode(CCSwapped);
|
||||||
} else {
|
} else {
|
||||||
// Try inverting the conditon and then swapping the operands
|
// Try inverting the condition and then swapping the operands
|
||||||
ISD::CondCode CCInv = ISD::getSetCCInverse(CCOpcode, CompareVT);
|
ISD::CondCode CCInv = ISD::getSetCCInverse(CCOpcode, CompareVT);
|
||||||
CCSwapped = ISD::getSetCCSwappedOperands(CCInv);
|
CCSwapped = ISD::getSetCCSwappedOperands(CCInv);
|
||||||
if (isCondCodeLegal(CCSwapped, CompareVT.getSimpleVT())) {
|
if (isCondCodeLegal(CCSwapped, CompareVT.getSimpleVT())) {
|
||||||
|
|
|
@ -175,7 +175,7 @@ public:
|
||||||
int *BytesAdded = nullptr) const override;
|
int *BytesAdded = nullptr) const override;
|
||||||
|
|
||||||
unsigned removeBranch(MachineBasicBlock &MBB,
|
unsigned removeBranch(MachineBasicBlock &MBB,
|
||||||
int *BytesRemvoed = nullptr) const override;
|
int *BytesRemoved = nullptr) const override;
|
||||||
|
|
||||||
bool isPredicated(const MachineInstr &MI) const override;
|
bool isPredicated(const MachineInstr &MI) const override;
|
||||||
|
|
||||||
|
|
|
@ -1346,7 +1346,7 @@ let Predicates = [isR600] in {
|
||||||
|
|
||||||
|
|
||||||
//===----------------------------------------------------------------------===//
|
//===----------------------------------------------------------------------===//
|
||||||
// Regist loads and stores - for indirect addressing
|
// Register loads and stores - for indirect addressing
|
||||||
//===----------------------------------------------------------------------===//
|
//===----------------------------------------------------------------------===//
|
||||||
|
|
||||||
let Namespace = "R600" in {
|
let Namespace = "R600" in {
|
||||||
|
|
|
@ -29,7 +29,7 @@ void R600SchedStrategy::initialize(ScheduleDAGMI *dag) {
|
||||||
MRI = &DAG->MRI;
|
MRI = &DAG->MRI;
|
||||||
CurInstKind = IDOther;
|
CurInstKind = IDOther;
|
||||||
CurEmitted = 0;
|
CurEmitted = 0;
|
||||||
OccupedSlotsMask = 31;
|
OccupiedSlotsMask = 31;
|
||||||
InstKindLimit[IDAlu] = TII->getMaxAlusPerClause();
|
InstKindLimit[IDAlu] = TII->getMaxAlusPerClause();
|
||||||
InstKindLimit[IDOther] = 32;
|
InstKindLimit[IDOther] = 32;
|
||||||
InstKindLimit[IDFetch] = ST.getTexVTXClauseSize();
|
InstKindLimit[IDFetch] = ST.getTexVTXClauseSize();
|
||||||
|
@ -138,7 +138,7 @@ void R600SchedStrategy::schedNode(SUnit *SU, bool IsTopNode) {
|
||||||
if (NextInstKind != CurInstKind) {
|
if (NextInstKind != CurInstKind) {
|
||||||
LLVM_DEBUG(dbgs() << "Instruction Type Switch\n");
|
LLVM_DEBUG(dbgs() << "Instruction Type Switch\n");
|
||||||
if (NextInstKind != IDAlu)
|
if (NextInstKind != IDAlu)
|
||||||
OccupedSlotsMask |= 31;
|
OccupiedSlotsMask |= 31;
|
||||||
CurEmitted = 0;
|
CurEmitted = 0;
|
||||||
CurInstKind = NextInstKind;
|
CurInstKind = NextInstKind;
|
||||||
}
|
}
|
||||||
|
@ -339,10 +339,10 @@ void R600SchedStrategy::LoadAlu() {
|
||||||
|
|
||||||
void R600SchedStrategy::PrepareNextSlot() {
|
void R600SchedStrategy::PrepareNextSlot() {
|
||||||
LLVM_DEBUG(dbgs() << "New Slot\n");
|
LLVM_DEBUG(dbgs() << "New Slot\n");
|
||||||
assert (OccupedSlotsMask && "Slot wasn't filled");
|
assert(OccupiedSlotsMask && "Slot wasn't filled");
|
||||||
OccupedSlotsMask = 0;
|
OccupiedSlotsMask = 0;
|
||||||
// if (HwGen == AMDGPUSubtarget::NORTHERN_ISLANDS)
|
// if (HwGen == AMDGPUSubtarget::NORTHERN_ISLANDS)
|
||||||
// OccupedSlotsMask |= 16;
|
// OccupiedSlotsMask |= 16;
|
||||||
InstructionsGroupCandidate.clear();
|
InstructionsGroupCandidate.clear();
|
||||||
LoadAlu();
|
LoadAlu();
|
||||||
}
|
}
|
||||||
|
@ -400,41 +400,41 @@ unsigned R600SchedStrategy::AvailablesAluCount() const {
|
||||||
|
|
||||||
SUnit* R600SchedStrategy::pickAlu() {
|
SUnit* R600SchedStrategy::pickAlu() {
|
||||||
while (AvailablesAluCount() || !Pending[IDAlu].empty()) {
|
while (AvailablesAluCount() || !Pending[IDAlu].empty()) {
|
||||||
if (!OccupedSlotsMask) {
|
if (!OccupiedSlotsMask) {
|
||||||
// Bottom up scheduling : predX must comes first
|
// Bottom up scheduling : predX must comes first
|
||||||
if (!AvailableAlus[AluPredX].empty()) {
|
if (!AvailableAlus[AluPredX].empty()) {
|
||||||
OccupedSlotsMask |= 31;
|
OccupiedSlotsMask |= 31;
|
||||||
return PopInst(AvailableAlus[AluPredX], false);
|
return PopInst(AvailableAlus[AluPredX], false);
|
||||||
}
|
}
|
||||||
// Flush physical reg copies (RA will discard them)
|
// Flush physical reg copies (RA will discard them)
|
||||||
if (!AvailableAlus[AluDiscarded].empty()) {
|
if (!AvailableAlus[AluDiscarded].empty()) {
|
||||||
OccupedSlotsMask |= 31;
|
OccupiedSlotsMask |= 31;
|
||||||
return PopInst(AvailableAlus[AluDiscarded], false);
|
return PopInst(AvailableAlus[AluDiscarded], false);
|
||||||
}
|
}
|
||||||
// If there is a T_XYZW alu available, use it
|
// If there is a T_XYZW alu available, use it
|
||||||
if (!AvailableAlus[AluT_XYZW].empty()) {
|
if (!AvailableAlus[AluT_XYZW].empty()) {
|
||||||
OccupedSlotsMask |= 15;
|
OccupiedSlotsMask |= 15;
|
||||||
return PopInst(AvailableAlus[AluT_XYZW], false);
|
return PopInst(AvailableAlus[AluT_XYZW], false);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
bool TransSlotOccuped = OccupedSlotsMask & 16;
|
bool TransSlotOccupied = OccupiedSlotsMask & 16;
|
||||||
if (!TransSlotOccuped && VLIW5) {
|
if (!TransSlotOccupied && VLIW5) {
|
||||||
if (!AvailableAlus[AluTrans].empty()) {
|
if (!AvailableAlus[AluTrans].empty()) {
|
||||||
OccupedSlotsMask |= 16;
|
OccupiedSlotsMask |= 16;
|
||||||
return PopInst(AvailableAlus[AluTrans], false);
|
return PopInst(AvailableAlus[AluTrans], false);
|
||||||
}
|
}
|
||||||
SUnit *SU = AttemptFillSlot(3, true);
|
SUnit *SU = AttemptFillSlot(3, true);
|
||||||
if (SU) {
|
if (SU) {
|
||||||
OccupedSlotsMask |= 16;
|
OccupiedSlotsMask |= 16;
|
||||||
return SU;
|
return SU;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
for (int Chan = 3; Chan > -1; --Chan) {
|
for (int Chan = 3; Chan > -1; --Chan) {
|
||||||
bool isOccupied = OccupedSlotsMask & (1 << Chan);
|
bool isOccupied = OccupiedSlotsMask & (1 << Chan);
|
||||||
if (!isOccupied) {
|
if (!isOccupied) {
|
||||||
SUnit *SU = AttemptFillSlot(Chan, false);
|
SUnit *SU = AttemptFillSlot(Chan, false);
|
||||||
if (SU) {
|
if (SU) {
|
||||||
OccupedSlotsMask |= (1 << Chan);
|
OccupiedSlotsMask |= (1 << Chan);
|
||||||
InstructionsGroupCandidate.push_back(SU->getInstr());
|
InstructionsGroupCandidate.push_back(SU->getInstr());
|
||||||
return SU;
|
return SU;
|
||||||
}
|
}
|
||||||
|
|
|
@ -63,7 +63,7 @@ class R600SchedStrategy final : public MachineSchedStrategy {
|
||||||
|
|
||||||
int InstKindLimit[IDLast];
|
int InstKindLimit[IDLast];
|
||||||
|
|
||||||
int OccupedSlotsMask;
|
int OccupiedSlotsMask;
|
||||||
|
|
||||||
public:
|
public:
|
||||||
R600SchedStrategy() = default;
|
R600SchedStrategy() = default;
|
||||||
|
|
|
@ -228,7 +228,7 @@ static bool updateOperand(FoldCandidate &Fold,
|
||||||
MachineOperand &Mod = MI->getOperand(ModIdx);
|
MachineOperand &Mod = MI->getOperand(ModIdx);
|
||||||
unsigned Val = Mod.getImm();
|
unsigned Val = Mod.getImm();
|
||||||
if (!(Val & SISrcMods::OP_SEL_0) && (Val & SISrcMods::OP_SEL_1)) {
|
if (!(Val & SISrcMods::OP_SEL_0) && (Val & SISrcMods::OP_SEL_1)) {
|
||||||
// Only apply the following transformation if that operand requries
|
// Only apply the following transformation if that operand requires
|
||||||
// a packed immediate.
|
// a packed immediate.
|
||||||
switch (TII.get(Opcode).OpInfo[OpNo].OperandType) {
|
switch (TII.get(Opcode).OpInfo[OpNo].OperandType) {
|
||||||
case AMDGPU::OPERAND_REG_IMM_V2FP16:
|
case AMDGPU::OPERAND_REG_IMM_V2FP16:
|
||||||
|
@ -688,7 +688,7 @@ void SIFoldOperands::foldOperand(
|
||||||
|
|
||||||
// Don't fold into a copy to a physical register with the same class. Doing
|
// Don't fold into a copy to a physical register with the same class. Doing
|
||||||
// so would interfere with the register coalescer's logic which would avoid
|
// so would interfere with the register coalescer's logic which would avoid
|
||||||
// redundant initalizations.
|
// redundant initializations.
|
||||||
if (DestReg.isPhysical() && SrcRC->contains(DestReg))
|
if (DestReg.isPhysical() && SrcRC->contains(DestReg))
|
||||||
return;
|
return;
|
||||||
|
|
||||||
|
@ -902,7 +902,7 @@ void SIFoldOperands::foldOperand(
|
||||||
tryAddToFoldList(FoldList, UseMI, UseOpIdx, &OpToFold, TII);
|
tryAddToFoldList(FoldList, UseMI, UseOpIdx, &OpToFold, TII);
|
||||||
|
|
||||||
// FIXME: We could try to change the instruction from 64-bit to 32-bit
|
// FIXME: We could try to change the instruction from 64-bit to 32-bit
|
||||||
// to enable more folding opportunites. The shrink operands pass
|
// to enable more folding opportunities. The shrink operands pass
|
||||||
// already does this.
|
// already does this.
|
||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
|
|
|
@ -73,7 +73,7 @@ public:
|
||||||
|
|
||||||
// Class of object that encapsulates latest instruction counter score
|
// Class of object that encapsulates latest instruction counter score
|
||||||
// associated with the operand. Used for determining whether
|
// associated with the operand. Used for determining whether
|
||||||
// s_waitcnt instruction needs to be emited.
|
// s_waitcnt instruction needs to be emitted.
|
||||||
|
|
||||||
#define CNT_MASK(t) (1u << (t))
|
#define CNT_MASK(t) (1u << (t))
|
||||||
|
|
||||||
|
|
|
@ -146,7 +146,7 @@ class SILoadStoreOptimizer : public MachineFunctionPass {
|
||||||
if (!AddrOp->isReg())
|
if (!AddrOp->isReg())
|
||||||
return false;
|
return false;
|
||||||
|
|
||||||
// TODO: We should be able to merge physical reg addreses.
|
// TODO: We should be able to merge physical reg addresses.
|
||||||
if (AddrOp->getReg().isPhysical())
|
if (AddrOp->getReg().isPhysical())
|
||||||
return false;
|
return false;
|
||||||
|
|
||||||
|
@ -652,7 +652,7 @@ static bool canMoveInstsAcrossMemOp(MachineInstr &MemOp,
|
||||||
}
|
}
|
||||||
|
|
||||||
// This function assumes that \p A and \p B have are identical except for
|
// This function assumes that \p A and \p B have are identical except for
|
||||||
// size and offset, and they referecne adjacent memory.
|
// size and offset, and they reference adjacent memory.
|
||||||
static MachineMemOperand *combineKnownAdjacentMMOs(MachineFunction &MF,
|
static MachineMemOperand *combineKnownAdjacentMMOs(MachineFunction &MF,
|
||||||
const MachineMemOperand *A,
|
const MachineMemOperand *A,
|
||||||
const MachineMemOperand *B) {
|
const MachineMemOperand *B) {
|
||||||
|
|
|
@ -13,7 +13,7 @@
|
||||||
/// All control flow is handled using predicated instructions and
|
/// All control flow is handled using predicated instructions and
|
||||||
/// a predicate stack. Each Scalar ALU controls the operations of 64 Vector
|
/// a predicate stack. Each Scalar ALU controls the operations of 64 Vector
|
||||||
/// ALUs. The Scalar ALU can update the predicate for any of the Vector ALUs
|
/// ALUs. The Scalar ALU can update the predicate for any of the Vector ALUs
|
||||||
/// by writting to the 64-bit EXEC register (each bit corresponds to a
|
/// by writing to the 64-bit EXEC register (each bit corresponds to a
|
||||||
/// single vector ALU). Typically, for predicates, a vector ALU will write
|
/// single vector ALU). Typically, for predicates, a vector ALU will write
|
||||||
/// to its bit of the VCC register (like EXEC VCC is 64-bits, one for each
|
/// to its bit of the VCC register (like EXEC VCC is 64-bits, one for each
|
||||||
/// Vector ALU) and then the ScalarALU will AND the VCC register with the
|
/// Vector ALU) and then the ScalarALU will AND the VCC register with the
|
||||||
|
@ -38,7 +38,8 @@
|
||||||
/// %vgpr0 = V_ADD_F32 %vgpr0, %vgpr0 // Do the IF block of the branch
|
/// %vgpr0 = V_ADD_F32 %vgpr0, %vgpr0 // Do the IF block of the branch
|
||||||
///
|
///
|
||||||
/// label0:
|
/// label0:
|
||||||
/// %sgpr0 = S_OR_SAVEEXEC_B64 %sgpr0 // Restore the exec mask for the Then block
|
/// %sgpr0 = S_OR_SAVEEXEC_B64 %sgpr0 // Restore the exec mask for the Then
|
||||||
|
/// // block
|
||||||
/// %exec = S_XOR_B64 %sgpr0, %exec // Update the exec mask
|
/// %exec = S_XOR_B64 %sgpr0, %exec // Update the exec mask
|
||||||
/// S_BRANCH_EXECZ label1 // Use our branch optimization
|
/// S_BRANCH_EXECZ label1 // Use our branch optimization
|
||||||
/// // instruction again.
|
/// // instruction again.
|
||||||
|
|
|
@ -368,7 +368,7 @@ bool SILowerSGPRSpills::runOnMachineFunction(MachineFunction &MF) {
|
||||||
}
|
}
|
||||||
|
|
||||||
// All those frame indices which are dead by now should be removed from the
|
// All those frame indices which are dead by now should be removed from the
|
||||||
// function frame. Othewise, there is a side effect such as re-mapping of
|
// function frame. Otherwise, there is a side effect such as re-mapping of
|
||||||
// free frame index ids by the later pass(es) like "stack slot coloring"
|
// free frame index ids by the later pass(es) like "stack slot coloring"
|
||||||
// which in turn could mess-up with the book keeping of "frame index to VGPR
|
// which in turn could mess-up with the book keeping of "frame index to VGPR
|
||||||
// lane".
|
// lane".
|
||||||
|
|
|
@ -403,7 +403,7 @@ void SIScheduleBlock::schedule(MachineBasicBlock::iterator BeginBlock,
|
||||||
}
|
}
|
||||||
|
|
||||||
// TODO: compute InternalAdditionnalPressure.
|
// TODO: compute InternalAdditionnalPressure.
|
||||||
InternalAdditionnalPressure.resize(TopPressure.MaxSetPressure.size());
|
InternalAdditionalPressure.resize(TopPressure.MaxSetPressure.size());
|
||||||
|
|
||||||
// Check everything is right.
|
// Check everything is right.
|
||||||
#ifndef NDEBUG
|
#ifndef NDEBUG
|
||||||
|
|
|
@ -72,7 +72,7 @@ class SIScheduleBlock {
|
||||||
// store the live virtual and real registers.
|
// store the live virtual and real registers.
|
||||||
// We do care only of SGPR32 and VGPR32 and do track only virtual registers.
|
// We do care only of SGPR32 and VGPR32 and do track only virtual registers.
|
||||||
// Pressure of additional registers required inside the block.
|
// Pressure of additional registers required inside the block.
|
||||||
std::vector<unsigned> InternalAdditionnalPressure;
|
std::vector<unsigned> InternalAdditionalPressure;
|
||||||
// Pressure of input and output registers
|
// Pressure of input and output registers
|
||||||
std::vector<unsigned> LiveInPressure;
|
std::vector<unsigned> LiveInPressure;
|
||||||
std::vector<unsigned> LiveOutPressure;
|
std::vector<unsigned> LiveOutPressure;
|
||||||
|
@ -153,8 +153,8 @@ public:
|
||||||
|
|
||||||
// Needs the block to be scheduled inside
|
// Needs the block to be scheduled inside
|
||||||
// TODO: find a way to compute it.
|
// TODO: find a way to compute it.
|
||||||
std::vector<unsigned> &getInternalAdditionnalRegUsage() {
|
std::vector<unsigned> &getInternalAdditionalRegUsage() {
|
||||||
return InternalAdditionnalPressure;
|
return InternalAdditionalPressure;
|
||||||
}
|
}
|
||||||
|
|
||||||
std::set<unsigned> &getInRegs() { return LiveInRegs; }
|
std::set<unsigned> &getInRegs() { return LiveInRegs; }
|
||||||
|
|
|
@ -225,7 +225,7 @@ void SIModeRegister::processBlockPhase1(MachineBasicBlock &MBB,
|
||||||
// RequirePending is used to indicate whether we are collecting the initial
|
// RequirePending is used to indicate whether we are collecting the initial
|
||||||
// requirements for the block, and need to defer the first InsertionPoint to
|
// requirements for the block, and need to defer the first InsertionPoint to
|
||||||
// Phase 3. It is set to false once we have set FirstInsertionPoint, or when
|
// Phase 3. It is set to false once we have set FirstInsertionPoint, or when
|
||||||
// we discover an explict setreg that means this block doesn't have any
|
// we discover an explicit setreg that means this block doesn't have any
|
||||||
// initial requirements.
|
// initial requirements.
|
||||||
bool RequirePending = true;
|
bool RequirePending = true;
|
||||||
Status IPChange;
|
Status IPChange;
|
||||||
|
|
|
@ -11,7 +11,7 @@
|
||||||
/// structures and waterfall loops.
|
/// structures and waterfall loops.
|
||||||
///
|
///
|
||||||
/// When we do structurization, we usually transform an if-else into two
|
/// When we do structurization, we usually transform an if-else into two
|
||||||
/// sucessive if-then (with a flow block to do predicate inversion). Consider a
|
/// successive if-then (with a flow block to do predicate inversion). Consider a
|
||||||
/// simple case after structurization: A divergent value %a was defined before
|
/// simple case after structurization: A divergent value %a was defined before
|
||||||
/// if-else and used in both THEN (use in THEN is optional) and ELSE part:
|
/// if-else and used in both THEN (use in THEN is optional) and ELSE part:
|
||||||
/// bb.if:
|
/// bb.if:
|
||||||
|
|
|
@ -365,7 +365,7 @@ bool SDWASrcOperand::convertToSDWA(MachineInstr &MI, const SIInstrInfo *TII) {
|
||||||
|
|
||||||
if (Dst &&
|
if (Dst &&
|
||||||
DstUnused->getImm() == AMDGPU::SDWA::DstUnused::UNUSED_PRESERVE) {
|
DstUnused->getImm() == AMDGPU::SDWA::DstUnused::UNUSED_PRESERVE) {
|
||||||
// This will work if the tied src is acessing WORD_0, and the dst is
|
// This will work if the tied src is accessing WORD_0, and the dst is
|
||||||
// writing WORD_1. Modifiers don't matter because all the bits that
|
// writing WORD_1. Modifiers don't matter because all the bits that
|
||||||
// would be impacted are being overwritten by the dst.
|
// would be impacted are being overwritten by the dst.
|
||||||
// Any other case will not work.
|
// Any other case will not work.
|
||||||
|
|
|
@ -8,7 +8,7 @@
|
||||||
//
|
//
|
||||||
/// \file
|
/// \file
|
||||||
/// This pass creates bundles of memory instructions to protect adjacent loads
|
/// This pass creates bundles of memory instructions to protect adjacent loads
|
||||||
/// and stores from beeing rescheduled apart from each other post-RA.
|
/// and stores from being rescheduled apart from each other post-RA.
|
||||||
///
|
///
|
||||||
//===----------------------------------------------------------------------===//
|
//===----------------------------------------------------------------------===//
|
||||||
|
|
||||||
|
|
|
@ -174,7 +174,7 @@ bool SIPreEmitPeephole::optimizeVccBranch(MachineInstr &MI) const {
|
||||||
MI.setDesc(TII->get(AMDGPU::S_BRANCH));
|
MI.setDesc(TII->get(AMDGPU::S_BRANCH));
|
||||||
} else if (IsVCCZ && MaskValue == 0) {
|
} else if (IsVCCZ && MaskValue == 0) {
|
||||||
// Will always branch
|
// Will always branch
|
||||||
// Remove all succesors shadowed by new unconditional branch
|
// Remove all successors shadowed by new unconditional branch
|
||||||
MachineBasicBlock *Parent = MI.getParent();
|
MachineBasicBlock *Parent = MI.getParent();
|
||||||
SmallVector<MachineInstr *, 4> ToRemove;
|
SmallVector<MachineInstr *, 4> ToRemove;
|
||||||
bool Found = false;
|
bool Found = false;
|
||||||
|
|
|
@ -834,7 +834,7 @@ defm AReg_1024 : ARegClass<32, [v32i32, v32f32, v16i64, v16f64], (add AGPR_1024)
|
||||||
|
|
||||||
// This is not a real register. This is just to have a register to add
|
// This is not a real register. This is just to have a register to add
|
||||||
// to VReg_1 that does not alias any real register that would
|
// to VReg_1 that does not alias any real register that would
|
||||||
// introduce inferred register classess.
|
// introduce inferred register classes.
|
||||||
def ARTIFICIAL_VGPR : SIReg <"invalid vgpr", 0> {
|
def ARTIFICIAL_VGPR : SIReg <"invalid vgpr", 0> {
|
||||||
let isArtificial = 1;
|
let isArtificial = 1;
|
||||||
}
|
}
|
||||||
|
|
|
@ -188,7 +188,7 @@ static void shrinkScalarCompare(const SIInstrInfo *TII, MachineInstr &MI) {
|
||||||
return;
|
return;
|
||||||
|
|
||||||
// eq/ne is special because the imm16 can be treated as signed or unsigned,
|
// eq/ne is special because the imm16 can be treated as signed or unsigned,
|
||||||
// and initially selectd to the unsigned versions.
|
// and initially selected to the unsigned versions.
|
||||||
if (SOPKOpc == AMDGPU::S_CMPK_EQ_U32 || SOPKOpc == AMDGPU::S_CMPK_LG_U32) {
|
if (SOPKOpc == AMDGPU::S_CMPK_EQ_U32 || SOPKOpc == AMDGPU::S_CMPK_LG_U32) {
|
||||||
bool HasUImm;
|
bool HasUImm;
|
||||||
if (isKImmOrKUImmOperand(TII, Src1, HasUImm)) {
|
if (isKImmOrKUImmOperand(TII, Src1, HasUImm)) {
|
||||||
|
|
|
@ -771,7 +771,7 @@ bool isSISrcOperand(const MCInstrDesc &Desc, unsigned OpNo);
|
||||||
/// Is this floating-point operand?
|
/// Is this floating-point operand?
|
||||||
bool isSISrcFPOperand(const MCInstrDesc &Desc, unsigned OpNo);
|
bool isSISrcFPOperand(const MCInstrDesc &Desc, unsigned OpNo);
|
||||||
|
|
||||||
/// Does this opearnd support only inlinable literals?
|
/// Does this operand support only inlinable literals?
|
||||||
bool isSISrcInlinableOperand(const MCInstrDesc &Desc, unsigned OpNo);
|
bool isSISrcInlinableOperand(const MCInstrDesc &Desc, unsigned OpNo);
|
||||||
|
|
||||||
/// Get the size in bits of a register from the register class \p RC.
|
/// Get the size in bits of a register from the register class \p RC.
|
||||||
|
|
|
@ -48,7 +48,7 @@ Align getAlign(DataLayout const &DL, const GlobalVariable *GV);
|
||||||
/// as an use within some instruction (either from kernel or from non-kernel).
|
/// as an use within some instruction (either from kernel or from non-kernel).
|
||||||
bool hasUserInstruction(const GlobalValue *GV);
|
bool hasUserInstruction(const GlobalValue *GV);
|
||||||
|
|
||||||
/// \returns true if an LDS global requres lowering to a module LDS structure
|
/// \returns true if an LDS global requires lowering to a module LDS structure
|
||||||
/// if \p F is not given. If \p F is given it must be a kernel and function
|
/// if \p F is not given. If \p F is given it must be a kernel and function
|
||||||
/// \returns true if an LDS global is directly used from that kernel and it
|
/// \returns true if an LDS global is directly used from that kernel and it
|
||||||
/// is safe to replace its uses with a kernel LDS structure member.
|
/// is safe to replace its uses with a kernel LDS structure member.
|
||||||
|
|
Loading…
Reference in New Issue