forked from OSchip/llvm-project
AMDGPU: Add a pass to rewrite certain undef in PHI
For the pattern of IR (%if terminates with a divergent branch.), divergence analysis will report %phi as uniform to help optimal code generation. ``` %if | \ | %then | / %endif: %phi = phi [ %uniform, %if ], [ %undef, %then ] ``` In the backend, %phi and %uniform will be assigned a scalar register. But the %undef from %then will make the scalar register dead in %then. This will likely cause the register being over-written in %then. To fix the issue, we will rewrite %undef as %uniform. For details, please refer the comment in AMDGPURewriteUndefForPHI.cpp. Currently there is no test changes shown, but this is mandatory for later changes. Reviewed by: sameerds Differential Revision: https://reviews.llvm.org/D133840
This commit is contained in:
parent
7a8b9307ca
commit
cf14c7caac
|
@ -290,6 +290,10 @@ extern char &AMDGPUCodeGenPrepareID;
|
|||
void initializeAMDGPULateCodeGenPreparePass(PassRegistry &);
|
||||
extern char &AMDGPULateCodeGenPrepareID;
|
||||
|
||||
FunctionPass *createAMDGPURewriteUndefForPHIPass();
|
||||
void initializeAMDGPURewriteUndefForPHIPass(PassRegistry &);
|
||||
extern char &AMDGPURewriteUndefForPHIPassID;
|
||||
|
||||
void initializeSIAnnotateControlFlowPass(PassRegistry&);
|
||||
extern char &SIAnnotateControlFlowPassID;
|
||||
|
||||
|
|
|
@ -0,0 +1,181 @@
|
|||
//===- AMDGPURewriteUndefForPHI.cpp ---------------------------------------===//
|
||||
//
|
||||
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
|
||||
// See https://llvm.org/LICENSE.txt for license information.
|
||||
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
|
||||
//
|
||||
//===----------------------------------------------------------------------===//
|
||||
// This file implements the idea to rewrite undef incoming operand for certain
|
||||
// PHIs in structurized CFG. This pass only works on IR that has gone through
|
||||
// StructurizedCFG pass, and this pass has some additional limitation that make
|
||||
// it can only run after SIAnnotateControlFlow.
|
||||
//
|
||||
// To achieve optimal code generation for AMDGPU, we assume that divergence
|
||||
// analysis reports the PHI in join block of divergent branch as uniform if
|
||||
// it has one unique uniform value plus additional undefined/poisoned incoming
|
||||
// value. That is to say the later compiler pipeline will ensure such PHI always
|
||||
// return uniform value and ensure it work correctly. Let's take a look at two
|
||||
// typical patterns in structured CFG that need to be taken care: (In both
|
||||
// patterns, block %if terminate with divergent branch.)
|
||||
//
|
||||
// Pattern A: Block with undefined incoming value dominates defined predecessor
|
||||
// %if
|
||||
// | \
|
||||
// | %then
|
||||
// | /
|
||||
// %endif: %phi = phi [%undef, %if], [%uniform, %then]
|
||||
//
|
||||
// Pattern B: Block with defined incoming value dominates undefined predecessor
|
||||
// %if
|
||||
// | \
|
||||
// | %then
|
||||
// | /
|
||||
// %endif: %phi = phi [%uniform, %if], [%undef, %then]
|
||||
//
|
||||
// For pattern A, by reporting %phi as uniform, the later pipeline need to make
|
||||
// sure it be handled correctly. The backend usually allocates a scalar register
|
||||
// and if any thread in a wave takes %then path, the scalar register will get
|
||||
// the %uniform value.
|
||||
//
|
||||
// For pattern B, we will replace the undef operand with the other defined value
|
||||
// in this pass. So the scalar register allocated for such PHI will get correct
|
||||
// liveness. Without this transformation, the scalar register may be overwritten
|
||||
// in the %then block.
|
||||
//
|
||||
// Limitation note:
|
||||
// If the join block of divergent threads is a loop header, the pass cannot
|
||||
// handle it correctly right now. For below case, the undef in %phi should also
|
||||
// be rewritten. Currently we depend on SIAnnotateControlFlow to split %header
|
||||
// block to get a separate join block, then we can rewrite the undef correctly.
|
||||
// %if
|
||||
// | \
|
||||
// | %then
|
||||
// | /
|
||||
// -> %header: %phi = phi [%uniform, %if], [%undef, %then], [%uniform2, %header]
|
||||
// | |
|
||||
// \---
|
||||
|
||||
#include "AMDGPU.h"
|
||||
#include "llvm/Analysis/LegacyDivergenceAnalysis.h"
|
||||
#include "llvm/IR/BasicBlock.h"
|
||||
#include "llvm/IR/Constants.h"
|
||||
#include "llvm/IR/Dominators.h"
|
||||
#include "llvm/IR/Instructions.h"
|
||||
#include "llvm/InitializePasses.h"
|
||||
|
||||
using namespace llvm;
|
||||
|
||||
#define DEBUG_TYPE "amdgpu-rewrite-undef-for-phi"
|
||||
|
||||
namespace {
|
||||
|
||||
class AMDGPURewriteUndefForPHI : public FunctionPass {
|
||||
public:
|
||||
static char ID;
|
||||
AMDGPURewriteUndefForPHI() : FunctionPass(ID) {
|
||||
initializeAMDGPURewriteUndefForPHIPass(*PassRegistry::getPassRegistry());
|
||||
}
|
||||
bool runOnFunction(Function &F);
|
||||
StringRef getPassName() const override {
|
||||
return "AMDGPU Rewrite Undef for PHI";
|
||||
}
|
||||
|
||||
void getAnalysisUsage(AnalysisUsage &AU) const override {
|
||||
AU.addRequired<LegacyDivergenceAnalysis>();
|
||||
AU.addRequired<DominatorTreeWrapperPass>();
|
||||
|
||||
AU.addPreserved<DominatorTreeWrapperPass>();
|
||||
AU.addPreserved<LegacyDivergenceAnalysis>();
|
||||
AU.setPreservesCFG();
|
||||
}
|
||||
};
|
||||
|
||||
} // end anonymous namespace
|
||||
char AMDGPURewriteUndefForPHI::ID = 0;
|
||||
|
||||
INITIALIZE_PASS_BEGIN(AMDGPURewriteUndefForPHI, DEBUG_TYPE,
|
||||
"Rewrite undef for PHI", false, false)
|
||||
INITIALIZE_PASS_DEPENDENCY(LegacyDivergenceAnalysis)
|
||||
INITIALIZE_PASS_DEPENDENCY(DominatorTreeWrapperPass)
|
||||
INITIALIZE_PASS_END(AMDGPURewriteUndefForPHI, DEBUG_TYPE,
|
||||
"Rewrite undef for PHI", false, false)
|
||||
|
||||
bool rewritePHIs(Function &F, LegacyDivergenceAnalysis *DA, DominatorTree *DT) {
|
||||
bool Changed = false;
|
||||
SmallVector<PHINode *> ToBeDeleted;
|
||||
for (auto &BB : F) {
|
||||
for (auto &PHI : BB.phis()) {
|
||||
if (DA->isDivergent(&PHI))
|
||||
continue;
|
||||
|
||||
// The unique incoming value except undef/poison for the PHI node.
|
||||
Value *UniqueDefinedIncoming = nullptr;
|
||||
// The divergent block with defined incoming value that dominates all
|
||||
// other block with the same incoming value.
|
||||
BasicBlock *DominateBB = nullptr;
|
||||
// Predecessors with undefined incoming value (excluding loop backedge).
|
||||
SmallVector<BasicBlock *> Undefs;
|
||||
|
||||
for (unsigned i = 0; i < PHI.getNumIncomingValues(); i++) {
|
||||
Value *Incoming = PHI.getIncomingValue(i);
|
||||
BasicBlock *IncomingBB = PHI.getIncomingBlock(i);
|
||||
|
||||
if (Incoming == &PHI)
|
||||
continue;
|
||||
|
||||
if (isa<UndefValue>(Incoming)) {
|
||||
// Undef from loop backedge will not be replaced.
|
||||
if (!DT->dominates(&BB, IncomingBB))
|
||||
Undefs.push_back(IncomingBB);
|
||||
continue;
|
||||
}
|
||||
|
||||
if (!UniqueDefinedIncoming) {
|
||||
UniqueDefinedIncoming = Incoming;
|
||||
DominateBB = IncomingBB;
|
||||
} else if (Incoming == UniqueDefinedIncoming) {
|
||||
// Update DominateBB if necessary.
|
||||
if (DT->dominates(IncomingBB, DominateBB))
|
||||
DominateBB = IncomingBB;
|
||||
} else {
|
||||
UniqueDefinedIncoming = nullptr;
|
||||
break;
|
||||
}
|
||||
}
|
||||
// We only need to replace the undef for the PHI which is merging
|
||||
// defined/undefined values from divergent threads.
|
||||
// TODO: We should still be able to replace undef value if the unique
|
||||
// value is a Constant.
|
||||
if (!UniqueDefinedIncoming || Undefs.empty() ||
|
||||
!DA->isDivergent(DominateBB->getTerminator()))
|
||||
continue;
|
||||
|
||||
// We only replace the undef when DominateBB truly dominates all the
|
||||
// other predecessors with undefined incoming value. Make sure DominateBB
|
||||
// dominates BB so that UniqueDefinedIncoming is available in BB and
|
||||
// afterwards.
|
||||
if (DT->dominates(DominateBB, &BB) && all_of(Undefs, [&](BasicBlock *UD) {
|
||||
return DT->dominates(DominateBB, UD);
|
||||
})) {
|
||||
PHI.replaceAllUsesWith(UniqueDefinedIncoming);
|
||||
ToBeDeleted.push_back(&PHI);
|
||||
Changed = true;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
for (auto *PHI : ToBeDeleted)
|
||||
PHI->eraseFromParent();
|
||||
|
||||
return Changed;
|
||||
}
|
||||
|
||||
bool AMDGPURewriteUndefForPHI::runOnFunction(Function &F) {
|
||||
LegacyDivergenceAnalysis *DA = &getAnalysis<LegacyDivergenceAnalysis>();
|
||||
DominatorTree *DT = &getAnalysis<DominatorTreeWrapperPass>().getDomTree();
|
||||
return rewritePHIs(F, DA, DT);
|
||||
}
|
||||
|
||||
FunctionPass *llvm::createAMDGPURewriteUndefForPHIPass() {
|
||||
return new AMDGPURewriteUndefForPHI();
|
||||
}
|
|
@ -380,6 +380,7 @@ extern "C" LLVM_EXTERNAL_VISIBILITY void LLVMInitializeAMDGPUTarget() {
|
|||
initializeAMDGPUReplaceLDSUseWithPointerPass(*PR);
|
||||
initializeAMDGPULowerModuleLDSPass(*PR);
|
||||
initializeAMDGPURewriteOutArgumentsPass(*PR);
|
||||
initializeAMDGPURewriteUndefForPHIPass(*PR);
|
||||
initializeAMDGPUUnifyMetadataPass(*PR);
|
||||
initializeSIAnnotateControlFlowPass(*PR);
|
||||
initializeAMDGPUReleaseVGPRsPass(*PR);
|
||||
|
@ -1198,6 +1199,10 @@ bool GCNPassConfig::addPreISel() {
|
|||
addPass(createAMDGPUAnnotateUniformValues());
|
||||
if (!LateCFGStructurize) {
|
||||
addPass(createSIAnnotateControlFlowPass());
|
||||
// TODO: Move this right after structurizeCFG to avoid extra divergence
|
||||
// analysis. This depends on stopping SIAnnotateControlFlow from making
|
||||
// control flow modifications.
|
||||
addPass(createAMDGPURewriteUndefForPHIPass());
|
||||
}
|
||||
addPass(createLCSSAPass());
|
||||
|
||||
|
|
|
@ -92,6 +92,7 @@ add_llvm_target(AMDGPUCodeGen
|
|||
AMDGPUReplaceLDSUseWithPointer.cpp
|
||||
AMDGPUResourceUsageAnalysis.cpp
|
||||
AMDGPURewriteOutArguments.cpp
|
||||
AMDGPURewriteUndefForPHI.cpp
|
||||
AMDGPUSetWavePriority.cpp
|
||||
AMDGPUSubtarget.cpp
|
||||
AMDGPUTargetMachine.cpp
|
||||
|
|
|
@ -83,6 +83,9 @@
|
|||
; GCN-O0-NEXT: Memory SSA
|
||||
; GCN-O0-NEXT: AMDGPU Annotate Uniform Values
|
||||
; GCN-O0-NEXT: SI annotate control flow
|
||||
; GCN-O0-NEXT: Post-Dominator Tree Construction
|
||||
; GCN-O0-NEXT: Legacy Divergence Analysis
|
||||
; GCN-O0-NEXT: AMDGPU Rewrite Undef for PHI
|
||||
; GCN-O0-NEXT: LCSSA Verifier
|
||||
; GCN-O0-NEXT: Loop-Closed SSA Form Pass
|
||||
; GCN-O0-NEXT: DummyCGSCCPass
|
||||
|
@ -264,6 +267,9 @@
|
|||
; GCN-O1-NEXT: Memory SSA
|
||||
; GCN-O1-NEXT: AMDGPU Annotate Uniform Values
|
||||
; GCN-O1-NEXT: SI annotate control flow
|
||||
; GCN-O1-NEXT: Post-Dominator Tree Construction
|
||||
; GCN-O1-NEXT: Legacy Divergence Analysis
|
||||
; GCN-O1-NEXT: AMDGPU Rewrite Undef for PHI
|
||||
; GCN-O1-NEXT: LCSSA Verifier
|
||||
; GCN-O1-NEXT: Loop-Closed SSA Form Pass
|
||||
; GCN-O1-NEXT: DummyCGSCCPass
|
||||
|
@ -548,6 +554,9 @@
|
|||
; GCN-O1-OPTS-NEXT: Memory SSA
|
||||
; GCN-O1-OPTS-NEXT: AMDGPU Annotate Uniform Values
|
||||
; GCN-O1-OPTS-NEXT: SI annotate control flow
|
||||
; GCN-O1-OPTS-NEXT: Post-Dominator Tree Construction
|
||||
; GCN-O1-OPTS-NEXT: Legacy Divergence Analysis
|
||||
; GCN-O1-OPTS-NEXT: AMDGPU Rewrite Undef for PHI
|
||||
; GCN-O1-OPTS-NEXT: LCSSA Verifier
|
||||
; GCN-O1-OPTS-NEXT: Loop-Closed SSA Form Pass
|
||||
; GCN-O1-OPTS-NEXT: DummyCGSCCPass
|
||||
|
@ -840,6 +849,9 @@
|
|||
; GCN-O2-NEXT: Memory SSA
|
||||
; GCN-O2-NEXT: AMDGPU Annotate Uniform Values
|
||||
; GCN-O2-NEXT: SI annotate control flow
|
||||
; GCN-O2-NEXT: Post-Dominator Tree Construction
|
||||
; GCN-O2-NEXT: Legacy Divergence Analysis
|
||||
; GCN-O2-NEXT: AMDGPU Rewrite Undef for PHI
|
||||
; GCN-O2-NEXT: LCSSA Verifier
|
||||
; GCN-O2-NEXT: Loop-Closed SSA Form Pass
|
||||
; GCN-O2-NEXT: Analysis if a function is memory bound
|
||||
|
@ -1147,6 +1159,9 @@
|
|||
; GCN-O3-NEXT: Memory SSA
|
||||
; GCN-O3-NEXT: AMDGPU Annotate Uniform Values
|
||||
; GCN-O3-NEXT: SI annotate control flow
|
||||
; GCN-O3-NEXT: Post-Dominator Tree Construction
|
||||
; GCN-O3-NEXT: Legacy Divergence Analysis
|
||||
; GCN-O3-NEXT: AMDGPU Rewrite Undef for PHI
|
||||
; GCN-O3-NEXT: LCSSA Verifier
|
||||
; GCN-O3-NEXT: Loop-Closed SSA Form Pass
|
||||
; GCN-O3-NEXT: Analysis if a function is memory bound
|
||||
|
|
|
@ -0,0 +1,103 @@
|
|||
; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
|
||||
; RUN: opt -mtriple=amdgcn-- -S -amdgpu-rewrite-undef-for-phi %s | FileCheck -check-prefix=OPT %s
|
||||
|
||||
define amdgpu_ps float @basic(float inreg %c, i32 %x) #0 {
|
||||
; OPT-LABEL: @basic(
|
||||
; OPT-NEXT: entry:
|
||||
; OPT-NEXT: [[CC:%.*]] = icmp slt i32 [[X:%.*]], 0
|
||||
; OPT-NEXT: br i1 [[CC]], label [[IF:%.*]], label [[END:%.*]]
|
||||
; OPT: if:
|
||||
; OPT-NEXT: br label [[END]]
|
||||
; OPT: end:
|
||||
; OPT-NEXT: ret float [[C:%.*]]
|
||||
;
|
||||
entry:
|
||||
%cc = icmp slt i32 %x, 0
|
||||
br i1 %cc, label %if, label %end
|
||||
|
||||
if:
|
||||
br label %end
|
||||
|
||||
end:
|
||||
%c2 = phi float [ undef, %if ], [ %c, %entry ]
|
||||
ret float %c2
|
||||
}
|
||||
|
||||
define amdgpu_ps float @with_uniform_region_inside(float inreg %c, i32 inreg %d, i32 %x) #0 {
|
||||
; OPT-LABEL: @with_uniform_region_inside(
|
||||
; OPT-NEXT: entry:
|
||||
; OPT-NEXT: [[CC:%.*]] = icmp slt i32 [[X:%.*]], 0
|
||||
; OPT-NEXT: br i1 [[CC]], label [[IF:%.*]], label [[END:%.*]]
|
||||
; OPT: if:
|
||||
; OPT-NEXT: [[CC2:%.*]] = icmp slt i32 [[D:%.*]], 0
|
||||
; OPT-NEXT: br i1 [[CC2]], label [[BB2:%.*]], label [[BB3:%.*]]
|
||||
; OPT: bb2:
|
||||
; OPT-NEXT: br label [[END]]
|
||||
; OPT: bb3:
|
||||
; OPT-NEXT: [[CC3:%.*]] = icmp slt i32 [[D]], 2
|
||||
; OPT-NEXT: br i1 [[CC3]], label [[BB4:%.*]], label [[END]]
|
||||
; OPT: bb4:
|
||||
; OPT-NEXT: br label [[END]]
|
||||
; OPT: end:
|
||||
; OPT-NEXT: ret float [[C:%.*]]
|
||||
;
|
||||
entry:
|
||||
%cc = icmp slt i32 %x, 0
|
||||
br i1 %cc, label %if, label %end
|
||||
|
||||
if:
|
||||
%cc2 = icmp slt i32 %d, 0
|
||||
br i1 %cc2, label %bb2, label %bb3
|
||||
|
||||
bb2:
|
||||
br label %end
|
||||
|
||||
bb3:
|
||||
%cc3 = icmp slt i32 %d, 2
|
||||
br i1 %cc3, label %bb4, label %end
|
||||
|
||||
bb4:
|
||||
br label %end
|
||||
|
||||
end:
|
||||
%c2 = phi float [ undef, %bb2 ], [ %c, %bb3 ], [ undef, %bb4 ], [ %c, %entry ]
|
||||
ret float %c2
|
||||
}
|
||||
|
||||
define amdgpu_ps float @exclude_backedge(float inreg %c, i32 %x) #0 {
|
||||
; OPT-LABEL: @exclude_backedge(
|
||||
; OPT-NEXT: entry:
|
||||
; OPT-NEXT: [[CC:%.*]] = icmp slt i32 [[X:%.*]], 0
|
||||
; OPT-NEXT: br i1 [[CC]], label [[END:%.*]], label [[LOOP:%.*]]
|
||||
; OPT: loop:
|
||||
; OPT-NEXT: [[IND:%.*]] = phi i32 [ 0, [[ENTRY:%.*]] ], [ [[INC:%.*]], [[LOOP]] ]
|
||||
; OPT-NEXT: [[C2:%.*]] = phi float [ [[C:%.*]], [[ENTRY]] ], [ undef, [[LOOP]] ]
|
||||
; OPT-NEXT: [[INC]] = add i32 [[IND]], 1
|
||||
; OPT-NEXT: [[LOOP_CC:%.*]] = icmp slt i32 [[INC]], 5
|
||||
; OPT-NEXT: br i1 [[LOOP_CC]], label [[LOOP]], label [[LOOP_END:%.*]]
|
||||
; OPT: loop_end:
|
||||
; OPT-NEXT: br label [[END]]
|
||||
; OPT: end:
|
||||
; OPT-NEXT: [[R:%.*]] = phi float [ [[C2]], [[LOOP_END]] ], [ [[C]], [[ENTRY]] ]
|
||||
; OPT-NEXT: ret float [[R]]
|
||||
;
|
||||
entry:
|
||||
%cc = icmp slt i32 %x, 0
|
||||
br i1 %cc, label %end, label %loop
|
||||
|
||||
loop:
|
||||
%ind = phi i32 [ 0, %entry ], [ %inc, %loop ]
|
||||
%c2 = phi float [ %c, %entry ], [ undef, %loop ]
|
||||
%inc = add i32 %ind, 1
|
||||
%loop_cc = icmp slt i32 %inc, 5
|
||||
br i1 %loop_cc, label %loop, label %loop_end
|
||||
|
||||
loop_end:
|
||||
br label %end
|
||||
|
||||
end:
|
||||
%r = phi float [ %c2, %loop_end ], [ %c, %entry ]
|
||||
ret float %r
|
||||
}
|
||||
|
||||
attributes #0 = { nounwind noinline }
|
|
@ -0,0 +1,52 @@
|
|||
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
|
||||
; RUN: llc -march=amdgcn -mcpu=gfx1010 -verify-machineinstrs -o - %s | FileCheck --check-prefix=GCN %s
|
||||
;
|
||||
; This test shows a typical case that a PHI(%c2) in join block was treated as uniform
|
||||
; as it has one unique uniform incoming value plus one additional undef incoming
|
||||
; value. This case might suffer from correctness issue if %c2 was assigned a scalar
|
||||
; register but meanwhile dead in %if. The problem is solved by replacing the %undef
|
||||
; with %c (thus replacing %c2 with %c in this example).
|
||||
|
||||
|
||||
define amdgpu_ps float @uniform_phi_with_undef(float inreg %c, float %v, i32 %x, i32 %y) #0 {
|
||||
; GCN-LABEL: uniform_phi_with_undef:
|
||||
; GCN: ; %bb.0: ; %entry
|
||||
; GCN-NEXT: v_cmp_lt_i32_e64 s2, v2, v1
|
||||
; GCN-NEXT: s_mov_b32 s1, exec_lo
|
||||
; GCN-NEXT: s_and_b32 s2, s1, s2
|
||||
; GCN-NEXT: s_mov_b32 exec_lo, s2
|
||||
; GCN-NEXT: s_cbranch_execz .LBB0_2
|
||||
; GCN-NEXT: ; %bb.1: ; %if
|
||||
; GCN-NEXT: s_mov_b32 s2, 2.0
|
||||
; GCN-NEXT: v_div_scale_f32 v1, s3, s2, s2, v0
|
||||
; GCN-NEXT: v_rcp_f32_e64 v2, v1
|
||||
; GCN-NEXT: s_mov_b32 s3, 1.0
|
||||
; GCN-NEXT: v_fma_f32 v3, -v1, v2, s3
|
||||
; GCN-NEXT: v_fmac_f32_e64 v2, v3, v2
|
||||
; GCN-NEXT: v_div_scale_f32 v3, vcc_lo, v0, s2, v0
|
||||
; GCN-NEXT: v_mul_f32_e64 v4, v3, v2
|
||||
; GCN-NEXT: v_fma_f32 v5, -v1, v4, v3
|
||||
; GCN-NEXT: v_fmac_f32_e64 v4, v5, v2
|
||||
; GCN-NEXT: v_fma_f32 v1, -v1, v4, v3
|
||||
; GCN-NEXT: v_div_fmas_f32 v1, v1, v2, v4
|
||||
; GCN-NEXT: v_div_fixup_f32 v0, v1, s2, v0
|
||||
; GCN-NEXT: .LBB0_2: ; %end
|
||||
; GCN-NEXT: s_or_b32 exec_lo, exec_lo, s1
|
||||
; GCN-NEXT: v_add_f32_e64 v0, v0, s0
|
||||
; GCN-NEXT: ; return to shader part epilog
|
||||
entry:
|
||||
%cc = icmp slt i32 %y, %x
|
||||
br i1 %cc, label %if, label %end
|
||||
|
||||
if:
|
||||
%v.if = fdiv float %v, 2.0
|
||||
br label %end
|
||||
|
||||
end:
|
||||
%v2 = phi float [ %v.if, %if ], [ %v, %entry ]
|
||||
%c2 = phi float [ undef, %if ], [ %c, %entry ]
|
||||
%r = fadd float %v2, %c2
|
||||
ret float %r
|
||||
}
|
||||
|
||||
attributes #0 = { nounwind optnone noinline }
|
Loading…
Reference in New Issue