[SLP] Fix crash in reduction for integer min/max

The SCEV commit b46c085d2b [NFCI] SCEVExpander: emit intrinsics for integral {u,s}{min,max} SCEV expressions seems to reveal a new crash in SLPVectorizer. SLP crashes expecting a SelectInst as an externally used value but umin() call is found. The patch relaxes the assumption to make the IR flag propagation safe. Reviewed By: spatel Differential Revision: https://reviews.llvm.org/D99328
2021-03-25 21:32:55 +07:00 · 2021-03-25 21:32:55 +07:00 · f7ef26ef0b
parent 02d7ef3181
commit f7ef26ef0b
2 changed files with 35 additions and 6 deletions
--- a/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp
+++ b/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp
@ -6629,10 +6629,9 @@ class HorizontalReduction {
                         Value *RHS, const Twine &Name, Instruction *I) {
    Value *Op = createOp(Builder, RdxKind, LHS, RHS, Name);
    if (RecurrenceDescriptor::isIntMinMaxRecurrenceKind(RdxKind)) {
-      if (auto *Sel = dyn_cast<SelectInst>(Op)) {
-        propagateIRFlags(Sel->getCondition(),
-                         cast<SelectInst>(I)->getCondition());
-      }
+      if (auto *Sel = dyn_cast<SelectInst>(Op))
+        if (auto *SelI = dyn_cast<SelectInst>(I))
+          propagateIRFlags(Sel->getCondition(), SelI->getCondition());
    }
    propagateIRFlags(Op, I);
    return Op;
--- a/llvm/test/Transforms/SLPVectorizer/slp-umax-rdx-matcher-crash.ll
+++ b/llvm/test/Transforms/SLPVectorizer/slp-umax-rdx-matcher-crash.ll
@ -1,6 +1,5 @@
 ; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
-; RUN: opt -slp-vectorizer -S < %s 2>&1 | FileCheck %s
-; REQUIRES: asserts
+; RUN: opt -slp-vectorizer -S < %s | FileCheck %s

 ; Given LLVM IR caused associative reduction matching routine crash in SLP.
 ; The routines begins with select as integer Umax reduction kind
@ -37,3 +36,34 @@ next:

 declare i8 @llvm.umax.i8(i8, i8)

+declare i32 @llvm.smin.i32(i32, i32)
+declare i32 @llvm.umin.i32(i32, i32)
+
+; Given LLVM IR caused crash in SLP.
+define void @test2() {
+; CHECK-LABEL: @test2(
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    [[TMP0:%.*]] = call <4 x i32> @llvm.smin.v4i32(<4 x i32> undef, <4 x i32> <i32 3, i32 2, i32 1, i32 0>)
+; CHECK-NEXT:    [[TMP1:%.*]] = sub nsw <4 x i32> undef, [[TMP0]]
+; CHECK-NEXT:    [[TMP2:%.*]] = call i32 @llvm.vector.reduce.umin.v4i32(<4 x i32> [[TMP1]])
+; CHECK-NEXT:    [[OP_EXTRA:%.*]] = icmp ult i32 [[TMP2]], 77
+; CHECK-NEXT:    [[OP_EXTRA1:%.*]] = select i1 [[OP_EXTRA]], i32 [[TMP2]], i32 77
+; CHECK-NEXT:    [[E:%.*]] = icmp ugt i32 [[OP_EXTRA1]], 1
+; CHECK-NEXT:    ret void
+;
+entry:
+  %smin0 = call i32 @llvm.smin.i32(i32 undef, i32 0)
+  %smin1 = call i32 @llvm.smin.i32(i32 undef, i32 1)
+  %smin2 = call i32 @llvm.smin.i32(i32 undef, i32 2)
+  %smin3 = call i32 @llvm.smin.i32(i32 undef, i32 3)
+  %a = sub nsw i32 undef, %smin0
+  %b = sub nsw i32 undef, %smin1
+  %c = sub nsw i32 undef, %smin2
+  %d = sub nsw i32 undef, %smin3
+  %umin0 = call i32 @llvm.umin.i32(i32 %d, i32 %c)
+  %umin1 = call i32 @llvm.umin.i32(i32 %umin0, i32 %b)
+  %umin2 = call i32 @llvm.umin.i32(i32 %umin1, i32 %a)
+  %umin3 = call i32 @llvm.umin.i32(i32 %umin2, i32 77)
+  %e = icmp ugt i32 %umin3, 1
+  ret void
+}