AMDGPU: Fold readlane/readfirstlane calls

llvm-svn: 363587
This commit is contained in:
Matt Arsenault 2019-06-17 17:52:35 +00:00
parent ad04e7ad42
commit 6d741f29ec
2 changed files with 149 additions and 0 deletions

View File

@ -3781,6 +3781,30 @@ Instruction *InstCombiner::visitCallInst(CallInst &CI) {
// A constant value is trivially uniform.
if (Constant *C = dyn_cast<Constant>(II->getArgOperand(0)))
return replaceInstUsesWith(*II, C);
// The rest of these may not be safe if the exec may not be the same between
// the def and use.
Value *Src = II->getArgOperand(0);
Instruction *SrcInst = dyn_cast<Instruction>(Src);
if (SrcInst && SrcInst->getParent() != II->getParent())
break;
// readfirstlane (readfirstlane x) -> readfirstlane x
// readlane (readfirstlane x), y -> readfirstlane x
if (match(Src, m_Intrinsic<Intrinsic::amdgcn_readfirstlane>()))
return replaceInstUsesWith(*II, Src);
if (IID == Intrinsic::amdgcn_readfirstlane) {
// readfirstlane (readlane x, y) -> readlane x, y
if (match(Src, m_Intrinsic<Intrinsic::amdgcn_readlane>()))
return replaceInstUsesWith(*II, Src);
} else {
// readlane (readlane x, y), y -> readlane x, y
if (match(Src, m_Intrinsic<Intrinsic::amdgcn_readlane>(
m_Value(), m_Specific(II->getArgOperand(1)))))
return replaceInstUsesWith(*II, Src);
}
break;
}
case Intrinsic::stackrestore: {

View File

@ -2462,6 +2462,63 @@ define amdgpu_kernel void @readfirstlane_constant(i32 %arg) {
ret void
}
define i32 @readfirstlane_idempotent(i32 %arg) {
; CHECK-LABEL: @readfirstlane_idempotent(
; CHECK-NEXT: [[READ0:%.*]] = call i32 @llvm.amdgcn.readfirstlane(i32 [[ARG:%.*]])
; CHECK-NEXT: ret i32 [[READ0]]
;
%read0 = call i32 @llvm.amdgcn.readfirstlane(i32 %arg)
%read1 = call i32 @llvm.amdgcn.readfirstlane(i32 %read0)
%read2 = call i32 @llvm.amdgcn.readfirstlane(i32 %read1)
ret i32 %read2
}
define i32 @readfirstlane_readlane(i32 %arg) {
; CHECK-LABEL: @readfirstlane_readlane(
; CHECK-NEXT: [[READ0:%.*]] = call i32 @llvm.amdgcn.readfirstlane(i32 [[ARG:%.*]])
; CHECK-NEXT: ret i32 [[READ0]]
;
%read0 = call i32 @llvm.amdgcn.readfirstlane(i32 %arg)
%read1 = call i32 @llvm.amdgcn.readlane(i32 %read0, i32 0)
ret i32 %read1
}
define i32 @readfirstlane_readfirstlane_different_block(i32 %arg) {
; CHECK-LABEL: @readfirstlane_readfirstlane_different_block(
; CHECK-NEXT: bb0:
; CHECK-NEXT: [[READ0:%.*]] = call i32 @llvm.amdgcn.readfirstlane(i32 [[ARG:%.*]])
; CHECK-NEXT: br label [[BB1:%.*]]
; CHECK: bb1:
; CHECK-NEXT: [[READ1:%.*]] = call i32 @llvm.amdgcn.readfirstlane(i32 [[READ0]])
; CHECK-NEXT: ret i32 [[READ1]]
;
bb0:
%read0 = call i32 @llvm.amdgcn.readfirstlane(i32 %arg)
br label %bb1
bb1:
%read1 = call i32 @llvm.amdgcn.readfirstlane(i32 %read0)
ret i32 %read1
}
define i32 @readfirstlane_readlane_different_block(i32 %arg) {
; CHECK-LABEL: @readfirstlane_readlane_different_block(
; CHECK-NEXT: bb0:
; CHECK-NEXT: [[READ0:%.*]] = call i32 @llvm.amdgcn.readlane(i32 [[ARG:%.*]], i32 0)
; CHECK-NEXT: br label [[BB1:%.*]]
; CHECK: bb1:
; CHECK-NEXT: [[READ1:%.*]] = call i32 @llvm.amdgcn.readfirstlane(i32 [[READ0]])
; CHECK-NEXT: ret i32 [[READ1]]
;
bb0:
%read0 = call i32 @llvm.amdgcn.readlane(i32 %arg, i32 0)
br label %bb1
bb1:
%read1 = call i32 @llvm.amdgcn.readfirstlane(i32 %read0)
ret i32 %read1
}
; --------------------------------------------------------------------
; llvm.amdgcn.readlane
; --------------------------------------------------------------------
@ -2491,6 +2548,74 @@ define amdgpu_kernel void @readlane_constant(i32 %arg, i32 %lane) {
ret void
}
define i32 @readlane_idempotent(i32 %arg, i32 %lane) {
; CHECK-LABEL: @readlane_idempotent(
; CHECK-NEXT: [[READ0:%.*]] = call i32 @llvm.amdgcn.readlane(i32 [[ARG:%.*]], i32 [[LANE:%.*]])
; CHECK-NEXT: ret i32 [[READ0]]
;
%read0 = call i32 @llvm.amdgcn.readlane(i32 %arg, i32 %lane)
%read1 = call i32 @llvm.amdgcn.readlane(i32 %read0, i32 %lane)
ret i32 %read1
}
define i32 @readlane_idempotent_different_lanes(i32 %arg, i32 %lane0, i32 %lane1) {
; CHECK-LABEL: @readlane_idempotent_different_lanes(
; CHECK-NEXT: [[READ0:%.*]] = call i32 @llvm.amdgcn.readlane(i32 [[ARG:%.*]], i32 [[LANE0:%.*]])
; CHECK-NEXT: [[READ1:%.*]] = call i32 @llvm.amdgcn.readlane(i32 [[READ0]], i32 [[LANE1:%.*]])
; CHECK-NEXT: ret i32 [[READ1]]
;
%read0 = call i32 @llvm.amdgcn.readlane(i32 %arg, i32 %lane0)
%read1 = call i32 @llvm.amdgcn.readlane(i32 %read0, i32 %lane1)
ret i32 %read1
}
define i32 @readlane_readfirstlane(i32 %arg) {
; CHECK-LABEL: @readlane_readfirstlane(
; CHECK-NEXT: [[READ0:%.*]] = call i32 @llvm.amdgcn.readfirstlane(i32 [[ARG:%.*]])
; CHECK-NEXT: ret i32 [[READ0]]
;
%read0 = call i32 @llvm.amdgcn.readfirstlane(i32 %arg)
%read1 = call i32 @llvm.amdgcn.readlane(i32 %read0, i32 0)
ret i32 %read1
}
define i32 @readlane_idempotent_different_block(i32 %arg, i32 %lane) {
; CHECK-LABEL: @readlane_idempotent_different_block(
; CHECK-NEXT: bb0:
; CHECK-NEXT: [[READ0:%.*]] = call i32 @llvm.amdgcn.readlane(i32 [[ARG:%.*]], i32 [[LANE:%.*]])
; CHECK-NEXT: br label [[BB1:%.*]]
; CHECK: bb1:
; CHECK-NEXT: [[READ1:%.*]] = call i32 @llvm.amdgcn.readlane(i32 [[READ0]], i32 [[LANE]])
; CHECK-NEXT: ret i32 [[READ1]]
;
bb0:
%read0 = call i32 @llvm.amdgcn.readlane(i32 %arg, i32 %lane)
br label %bb1
bb1:
%read1 = call i32 @llvm.amdgcn.readlane(i32 %read0, i32 %lane)
ret i32 %read1
}
define i32 @readlane_readfirstlane_different_block(i32 %arg) {
; CHECK-LABEL: @readlane_readfirstlane_different_block(
; CHECK-NEXT: bb0:
; CHECK-NEXT: [[READ0:%.*]] = call i32 @llvm.amdgcn.readfirstlane(i32 [[ARG:%.*]])
; CHECK-NEXT: br label [[BB1:%.*]]
; CHECK: bb1:
; CHECK-NEXT: [[READ1:%.*]] = call i32 @llvm.amdgcn.readlane(i32 [[READ0]], i32 0)
; CHECK-NEXT: ret i32 [[READ1]]
;
bb0:
%read0 = call i32 @llvm.amdgcn.readfirstlane(i32 %arg)
br label %bb1
bb1:
%read1 = call i32 @llvm.amdgcn.readlane(i32 %read0, i32 0)
ret i32 %read1
}
; --------------------------------------------------------------------
; llvm.amdgcn.update.dpp.i32
; --------------------------------------------------------------------