Revert "[LoopVectorize][AArch64] Enable ordered reductions by default for AArch64"

This reverts commit f4122398e7 to investigate a crash exposed by it. The patch breaks building the code below with `clang -O2 --target=aarch64-linux` int a; double b, c; void d() { for (; a; a++) { b += c; c = a; } }
2021-08-20 21:22:59 +01:00 · 2021-08-20 21:22:59 +01:00 · ab9296f13b
parent 5ca7131eb3
commit ab9296f13b
7 changed files with 6 additions and 26 deletions
--- a/llvm/include/llvm/Analysis/TargetTransformInfo.h
+++ b/llvm/include/llvm/Analysis/TargetTransformInfo.h
@ -662,9 +662,6 @@ public:
  /// Return true if the target supports masked expand load.
  bool isLegalMaskedExpandLoad(Type *DataType) const;

-  /// Return true if we should be enabling ordered reductions for the target.
-  bool enableOrderedReductions() const;
-
  /// Return true if the target has a unified operation to calculate division
  /// and remainder. If so, the additional implicit multiplication and
  /// subtraction required to calculate a remainder from division are free. This
@ -1511,7 +1508,6 @@ public:
  virtual bool isLegalMaskedGather(Type *DataType, Align Alignment) = 0;
  virtual bool isLegalMaskedCompressStore(Type *DataType) = 0;
  virtual bool isLegalMaskedExpandLoad(Type *DataType) = 0;
-  virtual bool enableOrderedReductions() = 0;
  virtual bool hasDivRemOp(Type *DataType, bool IsSigned) = 0;
  virtual bool hasVolatileVariant(Instruction *I, unsigned AddrSpace) = 0;
  virtual bool prefersVectorizedAddressing() = 0;
@ -1894,9 +1890,6 @@ public:
  bool isLegalMaskedExpandLoad(Type *DataType) override {
    return Impl.isLegalMaskedExpandLoad(DataType);
  }
-  bool enableOrderedReductions() override {
-    return Impl.enableOrderedReductions();
-  }
  bool hasDivRemOp(Type *DataType, bool IsSigned) override {
    return Impl.hasDivRemOp(DataType, IsSigned);
  }
--- a/llvm/include/llvm/Analysis/TargetTransformInfoImpl.h
+++ b/llvm/include/llvm/Analysis/TargetTransformInfoImpl.h
@ -263,8 +263,6 @@ public:

  bool isLegalMaskedExpandLoad(Type *DataType) const { return false; }

-  bool enableOrderedReductions() const { return false; }
-
  bool hasDivRemOp(Type *DataType, bool IsSigned) const { return false; }

  bool hasVolatileVariant(Instruction *I, unsigned AddrSpace) const {
--- a/llvm/lib/Analysis/TargetTransformInfo.cpp
+++ b/llvm/lib/Analysis/TargetTransformInfo.cpp
@ -410,10 +410,6 @@ bool TargetTransformInfo::isLegalMaskedExpandLoad(Type *DataType) const {
  return TTIImpl->isLegalMaskedExpandLoad(DataType);
 }

-bool TargetTransformInfo::enableOrderedReductions() const {
-  return TTIImpl->enableOrderedReductions();
-}
-
 bool TargetTransformInfo::hasDivRemOp(Type *DataType, bool IsSigned) const {
  return TTIImpl->hasDivRemOp(DataType, IsSigned);
 }
--- a/llvm/lib/Target/AArch64/AArch64TargetTransformInfo.h
+++ b/llvm/lib/Target/AArch64/AArch64TargetTransformInfo.h
@ -299,8 +299,6 @@ public:
    return BaseT::isLegalNTStore(DataType, Alignment);
  }

-  bool enableOrderedReductions() const { return true; }
-
  InstructionCost getInterleavedMemoryOpCost(
      unsigned Opcode, Type *VecTy, unsigned Factor, ArrayRef<unsigned> Indices,
      Align Alignment, unsigned AddressSpace,
--- a/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp
+++ b/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp
@ -331,7 +331,7 @@ static cl::opt<bool>
                           cl::desc("Prefer in-loop vector reductions, "
                                    "overriding the targets preference."));

-static cl::opt<bool> ForceOrderedReductions(
+cl::opt<bool> ForceOrderedReductions(
    "force-ordered-reductions", cl::init(false), cl::Hidden,
    cl::desc("Enable the vectorisation of loops with in-order (strict) "
             "FP reductions"));
@ -1317,7 +1317,8 @@ public:
  /// the IsOrdered flag of RdxDesc is set and we do not allow reordering
  /// of FP operations.
  bool useOrderedReductions(const RecurrenceDescriptor &RdxDesc) {
-    return !Hints->allowReordering() && RdxDesc.isOrdered();
+    return ForceOrderedReductions && !Hints->allowReordering() &&
+           RdxDesc.isOrdered();
  }

  /// \returns The smallest bitwidth each instruction can be represented with.
@ -10224,13 +10225,7 @@ bool LoopVectorizePass::processLoop(Loop *L) {
    return false;
  }

-  bool AllowOrderedReductions;
-  // If the flag is set, use that instead and override the TTI behaviour.
-  if (ForceOrderedReductions.getNumOccurrences() > 0)
-    AllowOrderedReductions = ForceOrderedReductions;
-  else
-    AllowOrderedReductions = TTI->enableOrderedReductions();
-  if (!LVL.canVectorizeFPMath(AllowOrderedReductions)) {
+  if (!LVL.canVectorizeFPMath(ForceOrderedReductions)) {
    ORE->emit([&]() {
      auto *ExactFPMathInst = Requirements.getExactFPInst();
      return OptimizationRemarkAnalysisFPCommute(DEBUG_TYPE, "CantReorderFPOps",
--- a/llvm/test/Transforms/LoopVectorize/AArch64/scalable-strict-fadd.ll
+++ b/llvm/test/Transforms/LoopVectorize/AArch64/scalable-strict-fadd.ll
@ -2,7 +2,7 @@
 ; RUN: opt < %s -loop-vectorize -scalable-vectorization=on -mtriple aarch64-unknown-linux-gnu -mattr=+sve -force-ordered-reductions=false -hints-allow-reordering=true  -S 2>%t | FileCheck %s --check-prefix=CHECK-UNORDERED
 ; RUN: opt < %s -loop-vectorize -scalable-vectorization=on -mtriple aarch64-unknown-linux-gnu -mattr=+sve -force-ordered-reductions=true  -hints-allow-reordering=false -S 2>%t | FileCheck %s --check-prefix=CHECK-ORDERED
 ; RUN: opt < %s -loop-vectorize -scalable-vectorization=on -mtriple aarch64-unknown-linux-gnu -mattr=+sve -force-ordered-reductions=true  -hints-allow-reordering=true  -S 2>%t | FileCheck %s --check-prefix=CHECK-UNORDERED
-; RUN: opt < %s -loop-vectorize -scalable-vectorization=on -mtriple aarch64-unknown-linux-gnu -mattr=+sve -hints-allow-reordering=false -S 2>%t | FileCheck %s --check-prefix=CHECK-ORDERED
+; RUN: opt < %s -loop-vectorize -scalable-vectorization=on -mtriple aarch64-unknown-linux-gnu -mattr=+sve -hints-allow-reordering=false -S 2>%t | FileCheck %s --check-prefix=CHECK-NOT-VECTORIZED

 define float @fadd_strict(float* noalias nocapture readonly %a, i64 %n) #0 {
 ; CHECK-ORDERED-LABEL: @fadd_strict
--- a/llvm/test/Transforms/LoopVectorize/AArch64/strict-fadd.ll
+++ b/llvm/test/Transforms/LoopVectorize/AArch64/strict-fadd.ll
@ -2,7 +2,7 @@
 ; RUN: opt < %s -loop-vectorize -mtriple aarch64-unknown-linux-gnu -force-ordered-reductions=false -hints-allow-reordering=true  -S 2>%t | FileCheck %s --check-prefix=CHECK-UNORDERED
 ; RUN: opt < %s -loop-vectorize -mtriple aarch64-unknown-linux-gnu -force-ordered-reductions=true  -hints-allow-reordering=false -S 2>%t | FileCheck %s --check-prefix=CHECK-ORDERED
 ; RUN: opt < %s -loop-vectorize -mtriple aarch64-unknown-linux-gnu -force-ordered-reductions=true  -hints-allow-reordering=true  -S 2>%t | FileCheck %s --check-prefix=CHECK-UNORDERED
-; RUN: opt < %s -loop-vectorize -mtriple aarch64-unknown-linux-gnu -hints-allow-reordering=false -S 2>%t | FileCheck %s --check-prefix=CHECK-ORDERED
+; RUN: opt < %s -loop-vectorize -mtriple aarch64-unknown-linux-gnu -hints-allow-reordering=false -S 2>%t | FileCheck %s --check-prefix=CHECK-NOT-VECTORIZED

 define float @fadd_strict(float* noalias nocapture readonly %a, i64 %n) {
 ; CHECK-ORDERED-LABEL: @fadd_strict