forked from OSchip/llvm-project
AMDGPU: Fold readlane/readfirstlane calls
llvm-svn: 363587
This commit is contained in:
parent
ad04e7ad42
commit
6d741f29ec
|
@ -3781,6 +3781,30 @@ Instruction *InstCombiner::visitCallInst(CallInst &CI) {
|
|||
// A constant value is trivially uniform.
|
||||
if (Constant *C = dyn_cast<Constant>(II->getArgOperand(0)))
|
||||
return replaceInstUsesWith(*II, C);
|
||||
|
||||
// The rest of these may not be safe if the exec may not be the same between
|
||||
// the def and use.
|
||||
Value *Src = II->getArgOperand(0);
|
||||
Instruction *SrcInst = dyn_cast<Instruction>(Src);
|
||||
if (SrcInst && SrcInst->getParent() != II->getParent())
|
||||
break;
|
||||
|
||||
// readfirstlane (readfirstlane x) -> readfirstlane x
|
||||
// readlane (readfirstlane x), y -> readfirstlane x
|
||||
if (match(Src, m_Intrinsic<Intrinsic::amdgcn_readfirstlane>()))
|
||||
return replaceInstUsesWith(*II, Src);
|
||||
|
||||
if (IID == Intrinsic::amdgcn_readfirstlane) {
|
||||
// readfirstlane (readlane x, y) -> readlane x, y
|
||||
if (match(Src, m_Intrinsic<Intrinsic::amdgcn_readlane>()))
|
||||
return replaceInstUsesWith(*II, Src);
|
||||
} else {
|
||||
// readlane (readlane x, y), y -> readlane x, y
|
||||
if (match(Src, m_Intrinsic<Intrinsic::amdgcn_readlane>(
|
||||
m_Value(), m_Specific(II->getArgOperand(1)))))
|
||||
return replaceInstUsesWith(*II, Src);
|
||||
}
|
||||
|
||||
break;
|
||||
}
|
||||
case Intrinsic::stackrestore: {
|
||||
|
|
|
@ -2462,6 +2462,63 @@ define amdgpu_kernel void @readfirstlane_constant(i32 %arg) {
|
|||
ret void
|
||||
}
|
||||
|
||||
define i32 @readfirstlane_idempotent(i32 %arg) {
|
||||
; CHECK-LABEL: @readfirstlane_idempotent(
|
||||
; CHECK-NEXT: [[READ0:%.*]] = call i32 @llvm.amdgcn.readfirstlane(i32 [[ARG:%.*]])
|
||||
; CHECK-NEXT: ret i32 [[READ0]]
|
||||
;
|
||||
%read0 = call i32 @llvm.amdgcn.readfirstlane(i32 %arg)
|
||||
%read1 = call i32 @llvm.amdgcn.readfirstlane(i32 %read0)
|
||||
%read2 = call i32 @llvm.amdgcn.readfirstlane(i32 %read1)
|
||||
ret i32 %read2
|
||||
}
|
||||
|
||||
define i32 @readfirstlane_readlane(i32 %arg) {
|
||||
; CHECK-LABEL: @readfirstlane_readlane(
|
||||
; CHECK-NEXT: [[READ0:%.*]] = call i32 @llvm.amdgcn.readfirstlane(i32 [[ARG:%.*]])
|
||||
; CHECK-NEXT: ret i32 [[READ0]]
|
||||
;
|
||||
%read0 = call i32 @llvm.amdgcn.readfirstlane(i32 %arg)
|
||||
%read1 = call i32 @llvm.amdgcn.readlane(i32 %read0, i32 0)
|
||||
ret i32 %read1
|
||||
}
|
||||
|
||||
define i32 @readfirstlane_readfirstlane_different_block(i32 %arg) {
|
||||
; CHECK-LABEL: @readfirstlane_readfirstlane_different_block(
|
||||
; CHECK-NEXT: bb0:
|
||||
; CHECK-NEXT: [[READ0:%.*]] = call i32 @llvm.amdgcn.readfirstlane(i32 [[ARG:%.*]])
|
||||
; CHECK-NEXT: br label [[BB1:%.*]]
|
||||
; CHECK: bb1:
|
||||
; CHECK-NEXT: [[READ1:%.*]] = call i32 @llvm.amdgcn.readfirstlane(i32 [[READ0]])
|
||||
; CHECK-NEXT: ret i32 [[READ1]]
|
||||
;
|
||||
bb0:
|
||||
%read0 = call i32 @llvm.amdgcn.readfirstlane(i32 %arg)
|
||||
br label %bb1
|
||||
|
||||
bb1:
|
||||
%read1 = call i32 @llvm.amdgcn.readfirstlane(i32 %read0)
|
||||
ret i32 %read1
|
||||
}
|
||||
|
||||
define i32 @readfirstlane_readlane_different_block(i32 %arg) {
|
||||
; CHECK-LABEL: @readfirstlane_readlane_different_block(
|
||||
; CHECK-NEXT: bb0:
|
||||
; CHECK-NEXT: [[READ0:%.*]] = call i32 @llvm.amdgcn.readlane(i32 [[ARG:%.*]], i32 0)
|
||||
; CHECK-NEXT: br label [[BB1:%.*]]
|
||||
; CHECK: bb1:
|
||||
; CHECK-NEXT: [[READ1:%.*]] = call i32 @llvm.amdgcn.readfirstlane(i32 [[READ0]])
|
||||
; CHECK-NEXT: ret i32 [[READ1]]
|
||||
;
|
||||
bb0:
|
||||
%read0 = call i32 @llvm.amdgcn.readlane(i32 %arg, i32 0)
|
||||
br label %bb1
|
||||
|
||||
bb1:
|
||||
%read1 = call i32 @llvm.amdgcn.readfirstlane(i32 %read0)
|
||||
ret i32 %read1
|
||||
}
|
||||
|
||||
; --------------------------------------------------------------------
|
||||
; llvm.amdgcn.readlane
|
||||
; --------------------------------------------------------------------
|
||||
|
@ -2491,6 +2548,74 @@ define amdgpu_kernel void @readlane_constant(i32 %arg, i32 %lane) {
|
|||
ret void
|
||||
}
|
||||
|
||||
define i32 @readlane_idempotent(i32 %arg, i32 %lane) {
|
||||
; CHECK-LABEL: @readlane_idempotent(
|
||||
; CHECK-NEXT: [[READ0:%.*]] = call i32 @llvm.amdgcn.readlane(i32 [[ARG:%.*]], i32 [[LANE:%.*]])
|
||||
; CHECK-NEXT: ret i32 [[READ0]]
|
||||
;
|
||||
%read0 = call i32 @llvm.amdgcn.readlane(i32 %arg, i32 %lane)
|
||||
%read1 = call i32 @llvm.amdgcn.readlane(i32 %read0, i32 %lane)
|
||||
ret i32 %read1
|
||||
}
|
||||
|
||||
define i32 @readlane_idempotent_different_lanes(i32 %arg, i32 %lane0, i32 %lane1) {
|
||||
; CHECK-LABEL: @readlane_idempotent_different_lanes(
|
||||
; CHECK-NEXT: [[READ0:%.*]] = call i32 @llvm.amdgcn.readlane(i32 [[ARG:%.*]], i32 [[LANE0:%.*]])
|
||||
; CHECK-NEXT: [[READ1:%.*]] = call i32 @llvm.amdgcn.readlane(i32 [[READ0]], i32 [[LANE1:%.*]])
|
||||
; CHECK-NEXT: ret i32 [[READ1]]
|
||||
;
|
||||
%read0 = call i32 @llvm.amdgcn.readlane(i32 %arg, i32 %lane0)
|
||||
%read1 = call i32 @llvm.amdgcn.readlane(i32 %read0, i32 %lane1)
|
||||
ret i32 %read1
|
||||
}
|
||||
|
||||
define i32 @readlane_readfirstlane(i32 %arg) {
|
||||
; CHECK-LABEL: @readlane_readfirstlane(
|
||||
; CHECK-NEXT: [[READ0:%.*]] = call i32 @llvm.amdgcn.readfirstlane(i32 [[ARG:%.*]])
|
||||
; CHECK-NEXT: ret i32 [[READ0]]
|
||||
;
|
||||
%read0 = call i32 @llvm.amdgcn.readfirstlane(i32 %arg)
|
||||
%read1 = call i32 @llvm.amdgcn.readlane(i32 %read0, i32 0)
|
||||
ret i32 %read1
|
||||
}
|
||||
|
||||
define i32 @readlane_idempotent_different_block(i32 %arg, i32 %lane) {
|
||||
; CHECK-LABEL: @readlane_idempotent_different_block(
|
||||
; CHECK-NEXT: bb0:
|
||||
; CHECK-NEXT: [[READ0:%.*]] = call i32 @llvm.amdgcn.readlane(i32 [[ARG:%.*]], i32 [[LANE:%.*]])
|
||||
; CHECK-NEXT: br label [[BB1:%.*]]
|
||||
; CHECK: bb1:
|
||||
; CHECK-NEXT: [[READ1:%.*]] = call i32 @llvm.amdgcn.readlane(i32 [[READ0]], i32 [[LANE]])
|
||||
; CHECK-NEXT: ret i32 [[READ1]]
|
||||
;
|
||||
bb0:
|
||||
%read0 = call i32 @llvm.amdgcn.readlane(i32 %arg, i32 %lane)
|
||||
br label %bb1
|
||||
|
||||
bb1:
|
||||
%read1 = call i32 @llvm.amdgcn.readlane(i32 %read0, i32 %lane)
|
||||
ret i32 %read1
|
||||
}
|
||||
|
||||
|
||||
define i32 @readlane_readfirstlane_different_block(i32 %arg) {
|
||||
; CHECK-LABEL: @readlane_readfirstlane_different_block(
|
||||
; CHECK-NEXT: bb0:
|
||||
; CHECK-NEXT: [[READ0:%.*]] = call i32 @llvm.amdgcn.readfirstlane(i32 [[ARG:%.*]])
|
||||
; CHECK-NEXT: br label [[BB1:%.*]]
|
||||
; CHECK: bb1:
|
||||
; CHECK-NEXT: [[READ1:%.*]] = call i32 @llvm.amdgcn.readlane(i32 [[READ0]], i32 0)
|
||||
; CHECK-NEXT: ret i32 [[READ1]]
|
||||
;
|
||||
bb0:
|
||||
%read0 = call i32 @llvm.amdgcn.readfirstlane(i32 %arg)
|
||||
br label %bb1
|
||||
|
||||
bb1:
|
||||
%read1 = call i32 @llvm.amdgcn.readlane(i32 %read0, i32 0)
|
||||
ret i32 %read1
|
||||
}
|
||||
|
||||
; --------------------------------------------------------------------
|
||||
; llvm.amdgcn.update.dpp.i32
|
||||
; --------------------------------------------------------------------
|
||||
|
|
Loading…
Reference in New Issue