AMDGPU: Don't add unnecessary convergent attributes

These are redundant with the intrinsic declaration.

llvm-svn: 356143
This commit is contained in:
Matt Arsenault 2019-03-14 13:46:09 +00:00
parent 7b907bed3c
commit 3a31b3f6e8
1 changed files with 2 additions and 19 deletions

View File

@ -58,8 +58,6 @@ private:
void optimizeAtomic(Instruction &I, Instruction::BinaryOps Op,
unsigned ValIdx, bool ValDivergent) const;
void setConvergent(CallInst *const CI) const;
public:
static char ID;
@ -253,7 +251,6 @@ void AMDGPUAtomicOptimizer::optimizeAtomic(Instruction &I,
CallInst *const Ballot =
B.CreateIntrinsic(Intrinsic::amdgcn_icmp, {B.getInt32Ty()},
{B.getInt32(1), B.getInt32(0), B.getInt32(33)});
setConvergent(Ballot);
// We need to know how many lanes are active within the wavefront that are
// below us. If we counted each lane linearly starting from 0, a lane is
@ -281,13 +278,11 @@ void AMDGPUAtomicOptimizer::optimizeAtomic(Instruction &I,
// correctly contribute to the final result.
CallInst *const SetInactive =
B.CreateIntrinsic(Intrinsic::amdgcn_set_inactive, Ty, {V, Identity});
setConvergent(SetInactive);
CallInst *const FirstDPP =
B.CreateIntrinsic(Intrinsic::amdgcn_update_dpp, Ty,
{Identity, SetInactive, B.getInt32(DPP_WF_SR1),
B.getInt32(0xf), B.getInt32(0xf), B.getFalse()});
setConvergent(FirstDPP);
NewV = FirstDPP;
const unsigned Iters = 7;
@ -305,7 +300,6 @@ void AMDGPUAtomicOptimizer::optimizeAtomic(Instruction &I,
Intrinsic::amdgcn_update_dpp, Ty,
{Identity, UpdateValue, B.getInt32(DPPCtrl[Idx]),
B.getInt32(RowMask[Idx]), B.getInt32(BankMask[Idx]), B.getFalse()});
setConvergent(DPP);
NewV = B.CreateBinOp(Op, NewV, DPP);
}
@ -322,10 +316,8 @@ void AMDGPUAtomicOptimizer::optimizeAtomic(Instruction &I,
B.CreateTrunc(B.CreateLShr(NewV, B.getInt64(32)), B.getInt32Ty());
CallInst *const ReadLaneLo = B.CreateIntrinsic(
Intrinsic::amdgcn_readlane, {}, {ExtractLo, B.getInt32(63)});
setConvergent(ReadLaneLo);
CallInst *const ReadLaneHi = B.CreateIntrinsic(
Intrinsic::amdgcn_readlane, {}, {ExtractHi, B.getInt32(63)});
setConvergent(ReadLaneHi);
Value *const PartialInsert = B.CreateInsertElement(
UndefValue::get(VecTy), ReadLaneLo, B.getInt32(0));
Value *const Insert =
@ -334,7 +326,6 @@ void AMDGPUAtomicOptimizer::optimizeAtomic(Instruction &I,
} else if (TyBitWidth == 32) {
CallInst *const ReadLane = B.CreateIntrinsic(Intrinsic::amdgcn_readlane,
{}, {NewV, B.getInt32(63)});
setConvergent(ReadLane);
NewV = ReadLane;
} else {
llvm_unreachable("Unhandled atomic bit width");
@ -398,20 +389,16 @@ void AMDGPUAtomicOptimizer::optimizeAtomic(Instruction &I,
B.CreateTrunc(B.CreateLShr(PHI, B.getInt64(32)), B.getInt32Ty());
CallInst *const ReadFirstLaneLo =
B.CreateIntrinsic(Intrinsic::amdgcn_readfirstlane, {}, ExtractLo);
setConvergent(ReadFirstLaneLo);
CallInst *const ReadFirstLaneHi =
B.CreateIntrinsic(Intrinsic::amdgcn_readfirstlane, {}, ExtractHi);
setConvergent(ReadFirstLaneHi);
Value *const PartialInsert = B.CreateInsertElement(
UndefValue::get(VecTy), ReadFirstLaneLo, B.getInt32(0));
Value *const Insert =
B.CreateInsertElement(PartialInsert, ReadFirstLaneHi, B.getInt32(1));
BroadcastI = B.CreateBitCast(Insert, Ty);
} else if (TyBitWidth == 32) {
CallInst *const ReadFirstLane =
B.CreateIntrinsic(Intrinsic::amdgcn_readfirstlane, {}, PHI);
setConvergent(ReadFirstLane);
BroadcastI = ReadFirstLane;
BroadcastI = B.CreateIntrinsic(Intrinsic::amdgcn_readfirstlane, {}, PHI);
} else {
llvm_unreachable("Unhandled atomic bit width");
}
@ -439,10 +426,6 @@ void AMDGPUAtomicOptimizer::optimizeAtomic(Instruction &I,
I.eraseFromParent();
}
void AMDGPUAtomicOptimizer::setConvergent(CallInst *const CI) const {
CI->addAttribute(AttributeList::FunctionIndex, Attribute::Convergent);
}
INITIALIZE_PASS_BEGIN(AMDGPUAtomicOptimizer, DEBUG_TYPE,
"AMDGPU atomic optimizations", false, false)
INITIALIZE_PASS_DEPENDENCY(LegacyDivergenceAnalysis)