forked from OSchip/llvm-project
StructurizeCFG: Simplify inserted PHI nodes
Summary: This improves subsequent divergence analysis in some cases. Change-Id: I5e95e7ec7fd3fa80d414d1a53a02fea23e3d67d3 Reviewers: arsenm, rampitec Subscribers: jvesely, wdng, llvm-commits Differential Revision: https://reviews.llvm.org/D53316 llvm-svn: 344697
This commit is contained in:
parent
c4a2ff0950
commit
0823050b9f
|
@ -13,6 +13,7 @@
|
|||
#include "llvm/ADT/STLExtras.h"
|
||||
#include "llvm/ADT/SmallPtrSet.h"
|
||||
#include "llvm/ADT/SmallVector.h"
|
||||
#include "llvm/Analysis/InstructionSimplify.h"
|
||||
#include "llvm/Analysis/LegacyDivergenceAnalysis.h"
|
||||
#include "llvm/Analysis/LoopInfo.h"
|
||||
#include "llvm/Analysis/RegionInfo.h"
|
||||
|
@ -596,7 +597,8 @@ void StructurizeCFG::addPhiValues(BasicBlock *From, BasicBlock *To) {
|
|||
|
||||
/// Add the real PHI value as soon as everything is set up
|
||||
void StructurizeCFG::setPhiValues() {
|
||||
SSAUpdater Updater;
|
||||
SmallVector<PHINode *, 8> InsertedPhis;
|
||||
SSAUpdater Updater(&InsertedPhis);
|
||||
for (const auto &AddedPhi : AddedPhis) {
|
||||
BasicBlock *To = AddedPhi.first;
|
||||
const BBVector &From = AddedPhi.second;
|
||||
|
@ -632,6 +634,26 @@ void StructurizeCFG::setPhiValues() {
|
|||
DeletedPhis.erase(To);
|
||||
}
|
||||
assert(DeletedPhis.empty());
|
||||
|
||||
// Simplify any phis inserted by the SSAUpdater if possible
|
||||
bool Changed;
|
||||
do {
|
||||
Changed = false;
|
||||
|
||||
SimplifyQuery Q(Func->getParent()->getDataLayout());
|
||||
Q.DT = DT;
|
||||
for (size_t i = 0; i < InsertedPhis.size(); ++i) {
|
||||
PHINode *Phi = InsertedPhis[i];
|
||||
if (Value *V = SimplifyInstruction(Phi, Q)) {
|
||||
Phi->replaceAllUsesWith(V);
|
||||
Phi->eraseFromParent();
|
||||
InsertedPhis[i] = InsertedPhis.back();
|
||||
InsertedPhis.pop_back();
|
||||
i--;
|
||||
Changed = true;
|
||||
}
|
||||
}
|
||||
} while (Changed);
|
||||
}
|
||||
|
||||
/// Remove phi values from all successors and then remove the terminator.
|
||||
|
|
|
@ -312,13 +312,12 @@ exit1: ; preds = %LeafBlock, %LeafBlock1
|
|||
|
||||
; IR-LABEL: @multi_divergent_region_exit_ret_ret_return_value(
|
||||
; IR: Flow2:
|
||||
; IR: %11 = phi float [ 2.000000e+00, %exit1 ], [ undef, %Flow1 ]
|
||||
; IR: %12 = phi i1 [ false, %exit1 ], [ %16, %Flow1 ]
|
||||
; IR: call void @llvm.amdgcn.end.cf(i64 %20)
|
||||
; IR: %11 = phi i1 [ false, %exit1 ], [ %15, %Flow1 ]
|
||||
; IR: call void @llvm.amdgcn.end.cf(i64 %19)
|
||||
|
||||
; IR: UnifiedReturnBlock:
|
||||
; IR: %UnifiedRetVal = phi float [ %11, %Flow2 ], [ 1.000000e+00, %exit0 ]
|
||||
; IR: call void @llvm.amdgcn.end.cf(i64 %15)
|
||||
; IR: %UnifiedRetVal = phi float [ 2.000000e+00, %Flow2 ], [ 1.000000e+00, %exit0 ]
|
||||
; IR: call void @llvm.amdgcn.end.cf(i64 %14)
|
||||
; IR: ret float %UnifiedRetVal
|
||||
define amdgpu_ps float @multi_divergent_region_exit_ret_ret_return_value(i32 %vgpr) #0 {
|
||||
entry:
|
||||
|
@ -353,8 +352,8 @@ exit1: ; preds = %LeafBlock, %LeafBlock1
|
|||
; GCN: {{^}}[[FLOW]]:
|
||||
; GCN: s_cbranch_vccnz [[FLOW1:BB[0-9]+]]
|
||||
|
||||
; GCN: v_mov_b32_e32 v0, 2.0
|
||||
; GCN: s_or_b64 exec, exec
|
||||
; GCN: v_mov_b32_e32 v0, 2.0
|
||||
; GCN-NOT: s_and_b64 exec, exec
|
||||
; GCN: v_mov_b32_e32 v0, 1.0
|
||||
|
||||
|
|
|
@ -33,6 +33,8 @@
|
|||
; GCN-NEXT: s_mov_b64
|
||||
; GCN-NEXT: s_and_b64 [[MASKED_SAVE_BREAK:s\[[0-9]+:[0-9]+\]]], exec, [[SAVE_BREAK]]
|
||||
; GCN-NEXT: s_or_b64 [[OR_BREAK:s\[[0-9]+:[0-9]+\]]], [[MASKED_SAVE_BREAK]], s{{\[[0-9]+:[0-9]+\]}}
|
||||
; TODO: get rid of redundant loop counter moves
|
||||
; GCN-NEXT: v_mov_b32_e32
|
||||
; GCN-NEXT: s_andn2_b64 exec, exec, [[OR_BREAK]]
|
||||
; GCN-NEXT: s_cbranch_execnz [[INNER_LOOP]]
|
||||
|
||||
|
@ -43,6 +45,7 @@
|
|||
; GCN-NEXT: s_or_b64 exec, exec, [[OR_BREAK]]
|
||||
; GCN-NEXT: s_and_b64 [[MASKED2_SAVE_BREAK:s\[[0-9]+:[0-9]+\]]], exec, [[SAVE_BREAK]]
|
||||
; GCN-NEXT: s_or_b64 [[OUTER_OR_BREAK:s\[[0-9]+:[0-9]+\]]], [[MASKED2_SAVE_BREAK]], s{{\[[0-9]+:[0-9]+\]}}
|
||||
; GCN-NEXT: v_mov_b32_e32
|
||||
; GCN-NEXT: s_andn2_b64 exec, exec, [[OUTER_OR_BREAK]]
|
||||
; GCN-NEXT: s_cbranch_execnz [[OUTER_LOOP]]
|
||||
define amdgpu_vs void @multi_else_break(<4 x float> %vec, i32 %ub, i32 %cont) {
|
||||
|
|
|
@ -592,11 +592,12 @@ exit:
|
|||
|
||||
; GCN-LABEL: {{^}}smrd_uniform_loop2:
|
||||
; (this test differs from smrd_uniform_loop by the more complex structure of phis,
|
||||
; which currently confuses the DivergenceAnalysis after structurization)
|
||||
; which used to confuse the DivergenceAnalysis after structurization)
|
||||
;
|
||||
; TODO: this should use an s_buffer_load
|
||||
; TODO: we should keep the loop counter in an SGPR
|
||||
;
|
||||
; GCN: buffer_load_dword
|
||||
; GCN: v_readfirstlane_b32
|
||||
; GCN: s_buffer_load_dword
|
||||
define amdgpu_ps float @smrd_uniform_loop2(<4 x i32> inreg %desc, i32 %bound, i32 %bound.a) #0 {
|
||||
main_body:
|
||||
br label %loop
|
||||
|
|
|
@ -12,13 +12,12 @@ define void @invert_constantexpr_condition(i32 %arg, i32 %arg1) #0 {
|
|||
; CHECK: bb2:
|
||||
; CHECK-NEXT: br label [[FLOW]]
|
||||
; CHECK: bb3:
|
||||
; CHECK-NEXT: [[TMP4:%.*]] = phi i1 [ [[TMP1:%.*]], [[FLOW]] ], [ [[TMP7:%.*]], [[BB6:%.*]] ]
|
||||
; CHECK-NEXT: [[TMP4:%.*]] = phi i1 [ undef, [[FLOW]] ], [ [[TMP7:%.*]], [[BB6:%.*]] ]
|
||||
; CHECK-NEXT: [[TMP5:%.*]] = or i1 [[TMP4]], icmp eq (i32 ptrtoint (i32* @g to i32), i32 0)
|
||||
; CHECK-NEXT: br label [[BB8:%.*]]
|
||||
; CHECK: Flow:
|
||||
; CHECK-NEXT: [[TMP1]] = phi i1 [ undef, [[BB2]] ], [ undef, [[BB:%.*]] ]
|
||||
; CHECK-NEXT: [[TMP2:%.*]] = phi i1 [ [[TMP0]], [[BB2]] ], [ icmp ne (i32 ptrtoint (i32* @g to i32), i32 0), [[BB]] ]
|
||||
; CHECK-NEXT: br i1 [[TMP2]], label [[BB6]], label [[BB3:%.*]]
|
||||
; CHECK-NEXT: [[TMP1:%.*]] = phi i1 [ [[TMP0]], [[BB2]] ], [ icmp ne (i32 ptrtoint (i32* @g to i32), i32 0), [[BB:%.*]] ]
|
||||
; CHECK-NEXT: br i1 [[TMP1]], label [[BB6]], label [[BB3:%.*]]
|
||||
; CHECK: bb6:
|
||||
; CHECK-NEXT: [[TMP7]] = icmp slt i32 [[ARG]], [[ARG1:%.*]]
|
||||
; CHECK-NEXT: br label [[BB3]]
|
||||
|
|
|
@ -1,28 +1,23 @@
|
|||
; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
|
||||
; RUN: opt -S -o - -structurizecfg < %s | FileCheck %s
|
||||
|
||||
;
|
||||
; TODO: eliminate redundant phis for the loop counter
|
||||
;
|
||||
define void @test1() {
|
||||
; CHECK-LABEL: @test1(
|
||||
; CHECK-NEXT: entry:
|
||||
; CHECK-NEXT: br label [[LOOP:%.*]]
|
||||
; CHECK: Flow:
|
||||
; CHECK-NEXT: [[TMP0:%.*]] = phi i32 [ [[CTR_NEXT:%.*]], [[LOOP_B:%.*]] ], [ [[CTR_NEXT]], [[LOOP_A:%.*]] ]
|
||||
; CHECK-NEXT: br label [[FLOW1:%.*]]
|
||||
; CHECK: loop:
|
||||
; CHECK-NEXT: [[CTR:%.*]] = phi i32 [ 0, [[ENTRY:%.*]] ], [ [[TMP1:%.*]], [[FLOW1]] ]
|
||||
; CHECK-NEXT: [[CTR:%.*]] = phi i32 [ 0, [[ENTRY:%.*]] ], [ [[CTR_NEXT:%.*]], [[FLOW1]] ]
|
||||
; CHECK-NEXT: [[CTR_NEXT]] = add i32 [[CTR]], 1
|
||||
; CHECK-NEXT: br i1 undef, label [[LOOP_A]], label [[FLOW1]]
|
||||
; CHECK-NEXT: br i1 undef, label [[LOOP_A:%.*]], label [[FLOW1]]
|
||||
; CHECK: loop.a:
|
||||
; CHECK-NEXT: br i1 undef, label [[LOOP_B]], label [[FLOW:%.*]]
|
||||
; CHECK-NEXT: br i1 undef, label [[LOOP_B:%.*]], label [[FLOW:%.*]]
|
||||
; CHECK: loop.b:
|
||||
; CHECK-NEXT: br label [[FLOW]]
|
||||
; CHECK: Flow1:
|
||||
; CHECK-NEXT: [[TMP1]] = phi i32 [ [[TMP0]], [[FLOW]] ], [ undef, [[LOOP]] ]
|
||||
; CHECK-NEXT: [[TMP2:%.*]] = phi i1 [ false, [[FLOW]] ], [ true, [[LOOP]] ]
|
||||
; CHECK-NEXT: br i1 [[TMP2]], label [[EXIT:%.*]], label [[LOOP]]
|
||||
; CHECK-NEXT: [[TMP0:%.*]] = phi i1 [ false, [[FLOW]] ], [ true, [[LOOP]] ]
|
||||
; CHECK-NEXT: br i1 [[TMP0]], label [[EXIT:%.*]], label [[LOOP]]
|
||||
; CHECK: exit:
|
||||
; CHECK-NEXT: ret void
|
||||
;
|
||||
|
|
|
@ -8,33 +8,36 @@ bb:
|
|||
br label %bb3
|
||||
|
||||
; CHECK: bb3:
|
||||
; CHECK: %0 = xor i1 %tmp4, true
|
||||
; CHECK: br i1 %0, label %bb5, label %Flow
|
||||
bb3: ; preds = %bb7, %bb
|
||||
%tmp = phi i64 [ 0, %bb ], [ %tmp8, %bb7 ]
|
||||
%tmp4 = fcmp ult float %arg1, 3.500000e+00
|
||||
; CHECK: %0 = xor i1 %tmp4, true
|
||||
; CHECK: br i1 %0, label %bb5, label %Flow
|
||||
br i1 %tmp4, label %bb7, label %bb5
|
||||
|
||||
; CHECK: bb5:
|
||||
; CHECK: %1 = xor i1 %tmp6, true
|
||||
; CHECK: br label %Flow
|
||||
bb5: ; preds = %bb3
|
||||
%tmp6 = fcmp olt float 0.000000e+00, %arg2
|
||||
; CHECK: br label %Flow
|
||||
br i1 %tmp6, label %bb10, label %bb7
|
||||
|
||||
; CHECK: Flow:
|
||||
; CHECK: br i1 %3, label %bb7, label %Flow1
|
||||
; CHECK: %2 = phi i1 [ %1, %bb5 ], [ %tmp4, %bb3 ]
|
||||
; CHECK: br i1 %2, label %bb7, label %Flow1
|
||||
|
||||
; CHECK: bb7
|
||||
; CHECK: bb7:
|
||||
; CHECK: br label %Flow1
|
||||
bb7: ; preds = %bb5, %bb3
|
||||
%tmp8 = add nuw nsw i64 %tmp, 1
|
||||
%tmp9 = icmp slt i64 %tmp8, 5
|
||||
; CHECK: br label %Flow1
|
||||
br i1 %tmp9, label %bb3, label %bb10
|
||||
|
||||
; CHECK: Flow1:
|
||||
; CHECK: br i1 %7, label %bb10, label %bb3
|
||||
; CHECK: %6 = phi i1 [ %3, %bb7 ], [ true, %Flow ]
|
||||
; CHECK: br i1 %6, label %bb10, label %bb3
|
||||
|
||||
; CHECK: bb10
|
||||
; CHECK: bb10:
|
||||
bb10: ; preds = %bb7, %bb5
|
||||
%tmp11 = phi i32 [ 15, %bb5 ], [ 255, %bb7 ]
|
||||
store i32 %tmp11, i32 addrspace(1)* %arg, align 4
|
||||
|
|
Loading…
Reference in New Issue