[InstCombine] Optimize select(freeze(icmp eq/ne x, y), x, y)

This patch adds an optimization that folds select(freeze(icmp eq/ne x, y), x, y)
to x or y.
This was needed to resolve slowdown after D84940 is applied.

I tried to bake this logic into foldSelectInstWithICmp, but it wasn't clear.
This patch conservatively writes the pattern in a separate function,
foldSelectWithFrozenICmp.

The output does not need freeze; https://alive2.llvm.org/ce/z/X49hNE (from @nikic)

Reviewed By: nikic

Differential Revision: https://reviews.llvm.org/D85533
This commit is contained in:
Juneyoung Lee 2020-08-07 21:12:52 +09:00
parent 5d59385ba6
commit b6d9add71b
2 changed files with 48 additions and 8 deletions

View File

@ -2526,6 +2526,32 @@ static Instruction *foldSelectToPhi(SelectInst &Sel, const DominatorTree &DT,
return nullptr;
}
static Value *foldSelectWithFrozenICmp(SelectInst &Sel, InstCombiner::BuilderTy &Builder) {
FreezeInst *FI = dyn_cast<FreezeInst>(Sel.getCondition());
if (!FI)
return nullptr;
Value *Cond = FI->getOperand(0);
Value *TrueVal = Sel.getTrueValue(), *FalseVal = Sel.getFalseValue();
// select (freeze(x == y)), x, y --> y
// select (freeze(x != y)), x, y --> x
// The freeze should be only used by this select. Otherwise, remaining uses of
// the freeze can observe a contradictory value.
// c = freeze(x == y) ; Let's assume that y = poison & x = 42; c is 0 or 1
// a = select c, x, y ;
// f(a, c) ; f(poison, 1) cannot happen, but if a is folded
// ; to y, this can happen.
CmpInst::Predicate Pred;
if (FI->hasOneUse() &&
match(Cond, m_c_ICmp(Pred, m_Specific(TrueVal), m_Specific(FalseVal))) &&
(Pred == ICmpInst::ICMP_EQ || Pred == ICmpInst::ICMP_NE)) {
return Pred == ICmpInst::ICMP_EQ ? FalseVal : TrueVal;
}
return nullptr;
}
Instruction *InstCombinerImpl::visitSelectInst(SelectInst &SI) {
Value *CondVal = SI.getCondition();
Value *TrueVal = SI.getTrueValue();
@ -2977,5 +3003,8 @@ Instruction *InstCombinerImpl::visitSelectInst(SelectInst &SI) {
if (Instruction *PN = foldSelectToPhi(SI, DT, Builder))
return replaceInstUsesWith(SI, PN);
if (Value *Fr = foldSelectWithFrozenICmp(SI, Builder))
return replaceInstUsesWith(SI, Fr);
return nullptr;
}

View File

@ -2540,10 +2540,7 @@ define void @cond_freeze_multipleuses(i8 %x, i8 %y) {
define i32 @select_freeze_icmp_eq(i32 %x, i32 %y) {
; CHECK-LABEL: @select_freeze_icmp_eq(
; CHECK-NEXT: [[C:%.*]] = icmp eq i32 [[X:%.*]], [[Y:%.*]]
; CHECK-NEXT: [[C_FR:%.*]] = freeze i1 [[C]]
; CHECK-NEXT: [[V:%.*]] = select i1 [[C_FR]], i32 [[X]], i32 [[Y]]
; CHECK-NEXT: ret i32 [[V]]
; CHECK-NEXT: ret i32 [[Y:%.*]]
;
%c = icmp eq i32 %x, %y
%c.fr = freeze i1 %c
@ -2553,10 +2550,7 @@ define i32 @select_freeze_icmp_eq(i32 %x, i32 %y) {
define i32 @select_freeze_icmp_ne(i32 %x, i32 %y) {
; CHECK-LABEL: @select_freeze_icmp_ne(
; CHECK-NEXT: [[C:%.*]] = icmp ne i32 [[X:%.*]], [[Y:%.*]]
; CHECK-NEXT: [[C_FR:%.*]] = freeze i1 [[C]]
; CHECK-NEXT: [[V:%.*]] = select i1 [[C_FR]], i32 [[X]], i32 [[Y]]
; CHECK-NEXT: ret i32 [[V]]
; CHECK-NEXT: ret i32 [[X:%.*]]
;
%c = icmp ne i32 %x, %y
%c.fr = freeze i1 %c
@ -2576,3 +2570,20 @@ define i32 @select_freeze_icmp_else(i32 %x, i32 %y) {
%v = select i1 %c.fr, i32 %x, i32 %y
ret i32 %v
}
declare void @use_i1_i32(i1, i32)
define void @select_freeze_icmp_multuses(i32 %x, i32 %y) {
; CHECK-LABEL: @select_freeze_icmp_multuses(
; CHECK-NEXT: [[C:%.*]] = icmp ne i32 [[X:%.*]], [[Y:%.*]]
; CHECK-NEXT: [[C_FR:%.*]] = freeze i1 [[C]]
; CHECK-NEXT: [[V:%.*]] = select i1 [[C_FR]], i32 [[X]], i32 [[Y]]
; CHECK-NEXT: call void @use_i1_i32(i1 [[C_FR]], i32 [[V]])
; CHECK-NEXT: ret void
;
%c = icmp ne i32 %x, %y
%c.fr = freeze i1 %c
%v = select i1 %c.fr, i32 %x, i32 %y
call void @use_i1_i32(i1 %c.fr, i32 %v)
ret void
}