forked from OSchip/llvm-project
[AMDGPU] Optimze old value of v_mov_b32_dpp
We can eliminate old value if bound_ctrl = 1 and row_mask = bank_mask = 0xf. This is alternative implementation working with the intrinsic in InstCombine. Original review for past-ISel optimization: D46570. Differential Revision: https://reviews.llvm.org/D46596 llvm-svn: 332956
This commit is contained in:
parent
7c6cd52698
commit
0e132dca53
|
@ -3407,6 +3407,23 @@ Instruction *InstCombiner::visitCallInst(CallInst &CI) {
|
|||
// amdgcn.kill(i1 1) is a no-op
|
||||
return eraseInstFromFunction(CI);
|
||||
}
|
||||
case Intrinsic::amdgcn_update_dpp: {
|
||||
Value *Old = II->getArgOperand(0);
|
||||
|
||||
auto BC = dyn_cast<ConstantInt>(II->getArgOperand(5));
|
||||
auto RM = dyn_cast<ConstantInt>(II->getArgOperand(3));
|
||||
auto BM = dyn_cast<ConstantInt>(II->getArgOperand(4));
|
||||
if (!BC || !RM || !BM ||
|
||||
BC->isZeroValue() ||
|
||||
RM->getZExtValue() != 0xF ||
|
||||
BM->getZExtValue() != 0xF ||
|
||||
isa<UndefValue>(Old))
|
||||
break;
|
||||
|
||||
// If bound_ctrl = 1, row mask = bank mask = 0xf we can omit old value.
|
||||
II->setOperand(0, UndefValue::get(Old->getType()));
|
||||
return II;
|
||||
}
|
||||
case Intrinsic::stackrestore: {
|
||||
// If the save is right next to the restore, remove the restore. This can
|
||||
// happen when variable allocas are DCE'd.
|
||||
|
|
|
@ -1692,5 +1692,34 @@ define void @kill_true() {
|
|||
ret void
|
||||
}
|
||||
|
||||
; --------------------------------------------------------------------
|
||||
; llvm.amdgcn.update.dpp.i32
|
||||
; --------------------------------------------------------------------
|
||||
|
||||
declare i32 @llvm.amdgcn.update.dpp.i32(i32, i32, i32, i32, i32, i1)
|
||||
|
||||
; CHECK-LABEL: {{^}}define amdgpu_kernel void @update_dpp_no_combine(
|
||||
; CHECK: @llvm.amdgcn.update.dpp.i32(i32 %in1, i32 %in2, i32 1, i32 1, i32 1, i1 false)
|
||||
define amdgpu_kernel void @update_dpp_no_combine(i32 addrspace(1)* %out, i32 %in1, i32 %in2) {
|
||||
%tmp0 = call i32 @llvm.amdgcn.update.dpp.i32(i32 %in1, i32 %in2, i32 1, i32 1, i32 1, i1 0)
|
||||
store i32 %tmp0, i32 addrspace(1)* %out
|
||||
ret void
|
||||
}
|
||||
|
||||
; CHECK-LABEL: {{^}}define amdgpu_kernel void @update_dpp_drop_old(
|
||||
; CHECK: @llvm.amdgcn.update.dpp.i32(i32 undef, i32 %in2, i32 3, i32 15, i32 15, i1 true)
|
||||
define amdgpu_kernel void @update_dpp_drop_old(i32 addrspace(1)* %out, i32 %in1, i32 %in2) {
|
||||
%tmp0 = call i32 @llvm.amdgcn.update.dpp.i32(i32 %in1, i32 %in2, i32 3, i32 15, i32 15, i1 1)
|
||||
store i32 %tmp0, i32 addrspace(1)* %out
|
||||
ret void
|
||||
}
|
||||
|
||||
; CHECK-LABEL: {{^}}define amdgpu_kernel void @update_dpp_undef_old(
|
||||
; CHECK: @llvm.amdgcn.update.dpp.i32(i32 undef, i32 %in1, i32 4, i32 15, i32 15, i1 true)
|
||||
define amdgpu_kernel void @update_dpp_undef_old(i32 addrspace(1)* %out, i32 %in1) {
|
||||
%tmp0 = call i32 @llvm.amdgcn.update.dpp.i32(i32 undef, i32 %in1, i32 4, i32 15, i32 15, i1 1)
|
||||
store i32 %tmp0, i32 addrspace(1)* %out
|
||||
ret void
|
||||
}
|
||||
|
||||
; CHECK: attributes #5 = { convergent }
|
||||
|
|
Loading…
Reference in New Issue