[NARY] Don't optimize min/max if there are side uses (part2)

Previous attempt to fix infinite recursion in min/max reassociation was not fully successful (D100170). Newly discovered failing case is due to not properly handled when there is a single use. It should be processed separately from 2 uses case.

Reviewed By: mkazantsev

Differential Revision: https://reviews.llvm.org/D101359
This commit is contained in:
Evgeniy Brevnov 2021-04-27 20:15:05 +07:00
parent 76f84e7729
commit 7861cb600c
2 changed files with 44 additions and 4 deletions

View File

@ -585,6 +585,11 @@ template <typename MaxMinT> static SCEVTypes convertToSCEVype(MaxMinT &MM) {
return scUnknown;
}
// Parameters:
// I - instruction matched by MaxMinMatch matcher
// MaxMinMatch - min/max idiom matcher
// LHS - first operand of I
// RHS - second operand of I
template <typename MaxMinT>
Value *NaryReassociatePass::tryReassociateMinOrMax(Instruction *I,
MaxMinT MaxMinMatch,
@ -612,9 +617,10 @@ Value *NaryReassociatePass::tryReassociateMinOrMax(Instruction *I,
// The optimization is profitable only if LHS can be removed in the end.
// In other words LHS should be used (directly or indirectly) by I only.
for (User *U : LHS->users())
if (U != I || !(U->hasOneUser() && *U->users().begin() == I))
continue;
if (llvm::any_of(LHS->users(), [&](auto *U) {
return U != I && !(U->hasOneUser() && *U->users().begin() == I);
}))
continue;
SCEVExpander Expander(*SE, *DL, "nary-reassociate");
SmallVector<const SCEV *, 2> Ops1{ BExpr, AExpr };

View File

@ -3,9 +3,10 @@
; RUN: opt < %s -passes='nary-reassociate' -S | FileCheck %s
declare i32 @llvm.smax.i32(i32 %a, i32 %b)
declare i64 @llvm.umin.i64(i64, i64)
; This is a negative test. We should not optimize if intermediate result
; has a use outside of optimizaple pattern. In other words %smax2 has one
; has a use outside of optimizable pattern. In other words %smax2 has one
; use from %smax3 and side use from %res2.
define i32 @smax_test1(i32 %a, i32 %b, i32 %c) {
; CHECK-LABEL: @smax_test1(
@ -30,3 +31,36 @@ define i32 @smax_test1(i32 %a, i32 %b, i32 %c) {
ret i32 %res
}
; This is a negative test. It similar to the previous one
; but a bit more complex. In particular after first iteration
; e10 is replaced with %e10.nary = call i64 @llvm.umin.i64(i64 %e5, i64 %e).
; No more reassociation should be applied to %e10.nary since
; %e5 has side use in %e6.
define void @test2(i64 %arg) {
; CHECK-LABEL: @test2(
; CHECK-NEXT: bb:
; CHECK-NEXT: [[E:%.*]] = sub i64 undef, 0
; CHECK-NEXT: [[E1:%.*]] = sub i64 [[ARG:%.*]], 0
; CHECK-NEXT: [[E2:%.*]] = call i64 @llvm.umin.i64(i64 [[E]], i64 [[E1]])
; CHECK-NEXT: [[E3:%.*]] = call i64 @llvm.umin.i64(i64 [[E2]], i64 16384)
; CHECK-NEXT: [[E4:%.*]] = sub i64 [[ARG]], 0
; CHECK-NEXT: [[E5:%.*]] = call i64 @llvm.umin.i64(i64 [[E4]], i64 16384)
; CHECK-NEXT: [[E6:%.*]] = icmp ugt i64 [[E5]], 0
; CHECK-NEXT: [[E10_NARY:%.*]] = call i64 @llvm.umin.i64(i64 [[E5]], i64 [[E]])
; CHECK-NEXT: unreachable
;
bb:
%e = sub i64 undef, 0
%e1 = sub i64 %arg, 0
%e2 = call i64 @llvm.umin.i64(i64 %e, i64 %e1)
%e3 = call i64 @llvm.umin.i64(i64 %e2, i64 16384)
%e4 = sub i64 %arg, 0
%e5 = call i64 @llvm.umin.i64(i64 %e4, i64 16384)
%e6 = icmp ugt i64 %e5, 0
%e7 = sub i64 undef, 0
%e8 = sub i64 %arg, 0
%e9 = call i64 @llvm.umin.i64(i64 %e7, i64 %e8)
%e10 = call i64 @llvm.umin.i64(i64 %e9, i64 16384)
unreachable
}