forked from OSchip/llvm-project
SROA: Enhance speculateSelectInstLoads
Allow the folding even if there is an intervening bitcast. Reviewed By: arsenm Differential Revision: https://reviews.llvm.org/D106667
This commit is contained in:
parent
a00aafc30d
commit
ffc3fb665d
|
@ -1330,14 +1330,21 @@ static void speculatePHINodeLoads(PHINode &PN) {
|
||||||
/// %V = select i1 %cond, i32 %V1, i32 %V2
|
/// %V = select i1 %cond, i32 %V1, i32 %V2
|
||||||
///
|
///
|
||||||
/// We can do this to a select if its only uses are loads and if the operand
|
/// We can do this to a select if its only uses are loads and if the operand
|
||||||
/// to the select can be loaded unconditionally.
|
/// to the select can be loaded unconditionally. If found an intervening bitcast
|
||||||
|
/// with a single use of the load, allow the promotion.
|
||||||
static bool isSafeSelectToSpeculate(SelectInst &SI) {
|
static bool isSafeSelectToSpeculate(SelectInst &SI) {
|
||||||
Value *TValue = SI.getTrueValue();
|
Value *TValue = SI.getTrueValue();
|
||||||
Value *FValue = SI.getFalseValue();
|
Value *FValue = SI.getFalseValue();
|
||||||
const DataLayout &DL = SI.getModule()->getDataLayout();
|
const DataLayout &DL = SI.getModule()->getDataLayout();
|
||||||
|
|
||||||
for (User *U : SI.users()) {
|
for (User *U : SI.users()) {
|
||||||
LoadInst *LI = dyn_cast<LoadInst>(U);
|
LoadInst *LI;
|
||||||
|
BitCastInst *BC = dyn_cast<BitCastInst>(U);
|
||||||
|
if (BC && BC->hasOneUse())
|
||||||
|
LI = dyn_cast<LoadInst>(*BC->user_begin());
|
||||||
|
else
|
||||||
|
LI = dyn_cast<LoadInst>(U);
|
||||||
|
|
||||||
if (!LI || !LI->isSimple())
|
if (!LI || !LI->isSimple())
|
||||||
return false;
|
return false;
|
||||||
|
|
||||||
|
@ -1363,10 +1370,24 @@ static void speculateSelectInstLoads(SelectInst &SI) {
|
||||||
Value *FV = SI.getFalseValue();
|
Value *FV = SI.getFalseValue();
|
||||||
// Replace the loads of the select with a select of two loads.
|
// Replace the loads of the select with a select of two loads.
|
||||||
while (!SI.use_empty()) {
|
while (!SI.use_empty()) {
|
||||||
LoadInst *LI = cast<LoadInst>(SI.user_back());
|
LoadInst *LI;
|
||||||
|
BitCastInst *BC = dyn_cast<BitCastInst>(SI.user_back());
|
||||||
|
if (BC) {
|
||||||
|
assert(BC->hasOneUse() && "Bitcast should have a single use.");
|
||||||
|
LI = cast<LoadInst>(BC->user_back());
|
||||||
|
} else {
|
||||||
|
LI = cast<LoadInst>(SI.user_back());
|
||||||
|
}
|
||||||
|
|
||||||
assert(LI->isSimple() && "We only speculate simple loads");
|
assert(LI->isSimple() && "We only speculate simple loads");
|
||||||
|
|
||||||
IRB.SetInsertPoint(LI);
|
IRB.SetInsertPoint(LI);
|
||||||
|
if (BC) {
|
||||||
|
// Cast the operands to bitcast's target type.
|
||||||
|
TV = IRB.CreateBitCast(TV, BC->getType(), TV->getName() + ".sroa.cast");
|
||||||
|
FV = IRB.CreateBitCast(FV, BC->getType(), FV->getName() + ".sroa.cast");
|
||||||
|
}
|
||||||
|
|
||||||
LoadInst *TL = IRB.CreateLoad(LI->getType(), TV,
|
LoadInst *TL = IRB.CreateLoad(LI->getType(), TV,
|
||||||
LI->getName() + ".sroa.speculate.load.true");
|
LI->getName() + ".sroa.speculate.load.true");
|
||||||
LoadInst *FL = IRB.CreateLoad(LI->getType(), FV,
|
LoadInst *FL = IRB.CreateLoad(LI->getType(), FV,
|
||||||
|
@ -1390,6 +1411,8 @@ static void speculateSelectInstLoads(SelectInst &SI) {
|
||||||
LLVM_DEBUG(dbgs() << " speculated to: " << *V << "\n");
|
LLVM_DEBUG(dbgs() << " speculated to: " << *V << "\n");
|
||||||
LI->replaceAllUsesWith(V);
|
LI->replaceAllUsesWith(V);
|
||||||
LI->eraseFromParent();
|
LI->eraseFromParent();
|
||||||
|
if (BC)
|
||||||
|
BC->eraseFromParent();
|
||||||
}
|
}
|
||||||
SI.eraseFromParent();
|
SI.eraseFromParent();
|
||||||
}
|
}
|
||||||
|
|
|
@ -60,23 +60,14 @@ entry:
|
||||||
ret i32 %result
|
ret i32 %result
|
||||||
}
|
}
|
||||||
|
|
||||||
; If bitcast isn't considered a safe phi/select use, the alloca
|
|
||||||
; remains as an array.
|
|
||||||
; FIXME: Why isn't this identical to test2?
|
|
||||||
define float @test2_bitcast() {
|
define float @test2_bitcast() {
|
||||||
; CHECK-LABEL: @test2_bitcast(
|
; CHECK-LABEL: @test2_bitcast(
|
||||||
; CHECK-NEXT: entry:
|
; CHECK-NEXT: entry:
|
||||||
; CHECK-NEXT: [[A_SROA_0:%.*]] = alloca i32, align 4
|
; CHECK-NEXT: [[COND:%.*]] = icmp sle i32 0, 1
|
||||||
; CHECK-NEXT: [[A_SROA_3:%.*]] = alloca i32, align 4
|
; CHECK-NEXT: [[TMP0:%.*]] = bitcast i32 1 to float
|
||||||
; CHECK-NEXT: store i32 0, i32* [[A_SROA_0]], align 4
|
; CHECK-NEXT: [[TMP1:%.*]] = bitcast i32 0 to float
|
||||||
; CHECK-NEXT: store i32 1, i32* [[A_SROA_3]], align 4
|
; CHECK-NEXT: [[RESULT_SROA_SPECULATED:%.*]] = select i1 [[COND]], float [[TMP0]], float [[TMP1]]
|
||||||
; CHECK-NEXT: [[A_SROA_0_0_A_SROA_0_0_V0:%.*]] = load i32, i32* [[A_SROA_0]], align 4
|
; CHECK-NEXT: ret float [[RESULT_SROA_SPECULATED]]
|
||||||
; CHECK-NEXT: [[A_SROA_3_0_A_SROA_3_4_V1:%.*]] = load i32, i32* [[A_SROA_3]], align 4
|
|
||||||
; CHECK-NEXT: [[COND:%.*]] = icmp sle i32 [[A_SROA_0_0_A_SROA_0_0_V0]], [[A_SROA_3_0_A_SROA_3_4_V1]]
|
|
||||||
; CHECK-NEXT: [[SELECT:%.*]] = select i1 [[COND]], i32* [[A_SROA_3]], i32* [[A_SROA_0]]
|
|
||||||
; CHECK-NEXT: [[SELECT_BC:%.*]] = bitcast i32* [[SELECT]] to float*
|
|
||||||
; CHECK-NEXT: [[RESULT:%.*]] = load float, float* [[SELECT_BC]], align 4
|
|
||||||
; CHECK-NEXT: ret float [[RESULT]]
|
|
||||||
;
|
;
|
||||||
entry:
|
entry:
|
||||||
%a = alloca [2 x i32]
|
%a = alloca [2 x i32]
|
||||||
|
|
|
@ -0,0 +1,35 @@
|
||||||
|
; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
|
||||||
|
; RUN: opt -S -sroa < %s | FileCheck %s
|
||||||
|
|
||||||
|
%st.half = type { half }
|
||||||
|
|
||||||
|
; Allow speculateSelectInstLoads to fold load and select
|
||||||
|
; even if there is an intervening bitcast.
|
||||||
|
define <2 x i16> @test_load_bitcast_select(i1 %cond1, i1 %cond2) {
|
||||||
|
; CHECK-LABEL: @test_load_bitcast_select(
|
||||||
|
; CHECK-NEXT: [[TMP1:%.*]] = bitcast half 0xHFFFF to i16
|
||||||
|
; CHECK-NEXT: [[TMP2:%.*]] = bitcast half 0xH0000 to i16
|
||||||
|
; CHECK-NEXT: [[LD1_SROA_SPECULATED:%.*]] = select i1 [[COND1:%.*]], i16 [[TMP1]], i16 [[TMP2]]
|
||||||
|
; CHECK-NEXT: [[V1:%.*]] = insertelement <2 x i16> undef, i16 [[LD1_SROA_SPECULATED]], i32 0
|
||||||
|
; CHECK-NEXT: [[TMP3:%.*]] = bitcast half 0xHFFFF to i16
|
||||||
|
; CHECK-NEXT: [[TMP4:%.*]] = bitcast half 0xH0000 to i16
|
||||||
|
; CHECK-NEXT: [[LD2_SROA_SPECULATED:%.*]] = select i1 [[COND2:%.*]], i16 [[TMP3]], i16 [[TMP4]]
|
||||||
|
; CHECK-NEXT: [[V2:%.*]] = insertelement <2 x i16> [[V1]], i16 [[LD2_SROA_SPECULATED]], i32 1
|
||||||
|
; CHECK-NEXT: ret <2 x i16> [[V2]]
|
||||||
|
;
|
||||||
|
%true = alloca half, align 2
|
||||||
|
%false = alloca half, align 2
|
||||||
|
store half 0xHFFFF, half* %true, align 2
|
||||||
|
store half 0xH0000, half* %false, align 2
|
||||||
|
%false.cast = bitcast half* %false to %st.half*
|
||||||
|
%true.cast = bitcast half* %true to %st.half*
|
||||||
|
%sel1 = select i1 %cond1, %st.half* %true.cast, %st.half* %false.cast
|
||||||
|
%cast1 = bitcast %st.half* %sel1 to i16*
|
||||||
|
%ld1 = load i16, i16* %cast1, align 2
|
||||||
|
%v1 = insertelement <2 x i16> undef, i16 %ld1, i32 0
|
||||||
|
%sel2 = select i1 %cond2, %st.half* %true.cast, %st.half* %false.cast
|
||||||
|
%cast2 = bitcast %st.half* %sel2 to i16*
|
||||||
|
%ld2 = load i16, i16* %cast2, align 2
|
||||||
|
%v2 = insertelement <2 x i16> %v1, i16 %ld2, i32 1
|
||||||
|
ret <2 x i16> %v2
|
||||||
|
}
|
Loading…
Reference in New Issue