From a5b07a221a5772c0d3733a0bc8ff0b57dd5705de Mon Sep 17 00:00:00 2001
From: Hsiangkai Wang <kai.wang@sifive.com>
Date: Wed, 27 Jan 2021 15:00:46 +0800
Subject: [PATCH] [RISCV] Initial support of LoopVectorizer for RISC-V Vector.

Define an option -riscv-vector-bits-max to specify the maximum vector
bits for vectorizer. Loop vectorizer will use the value to check if it
is safe to use the whole vector registers to vectorize the loop.

It is not the optimum solution for loop vectorizing for scalable vector.
It assumed the whole vector registers will be used to vectorize the code.
If it is possible, we should configure vl to do vectorize instead of
using whole vector registers.

We only consider LMUL = 1 in this patch.

This patch just an initial work for loop vectorizer for RISC-V Vector.

Differential Revision: https://reviews.llvm.org/D95659
---
 llvm/lib/Target/RISCV/RISCVISelLowering.cpp   | 16 ++++++++
 llvm/lib/Target/RISCV/RISCVISelLowering.h     | 12 ++++++
 llvm/lib/Target/RISCV/RISCVSubtarget.cpp      | 10 +++++
 llvm/lib/Target/RISCV/RISCVSubtarget.h        |  1 +
 .../Target/RISCV/RISCVTargetTransformInfo.cpp | 14 +++++++
 .../Target/RISCV/RISCVTargetTransformInfo.h   |  2 +
 .../LoopVectorize/RISCV/lit.local.cfg         |  4 ++
 .../LoopVectorize/RISCV/scalable-vf-hint.ll   | 37 +++++++++++++++++++
 8 files changed, 96 insertions(+)
 create mode 100644 llvm/test/Transforms/LoopVectorize/RISCV/lit.local.cfg
 create mode 100644 llvm/test/Transforms/LoopVectorize/RISCV/scalable-vf-hint.ll
diff --git a/llvm/lib/Target/RISCV/RISCVISelLowering.cpp b/llvm/lib/Target/RISCV/RISCVISelLowering.cpp
index 5a5c4d1b53b1..cdb888b873ab 100644
--- a/llvm/lib/Target/RISCV/RISCVISelLowering.cpp
+++ b/llvm/lib/Target/RISCV/RISCVISelLowering.cpp
@@ -5102,6 +5102,22 @@ bool RISCVTargetLowering::useRVVForFixedLengthVectorVT(MVT VT) const {
   return true;
 }
 
+bool RISCVTargetLowering::allowsMisalignedMemoryAccesses(
+    EVT VT, unsigned AddrSpace, Align Alignment, MachineMemOperand::Flags Flags,
+    bool *Fast) const {
+  if (!VT.isScalableVector())
+    return false;
+
+  EVT ElemVT = VT.getVectorElementType();
+  if (Alignment >= ElemVT.getStoreSize()) {
+    if (Fast)
+      *Fast = true;
+    return true;
+  }
+
+  return false;
+}
+
 #define GET_REGISTER_MATCHER
 #include "RISCVGenAsmMatcher.inc"
 
diff --git a/llvm/lib/Target/RISCV/RISCVISelLowering.h b/llvm/lib/Target/RISCV/RISCVISelLowering.h
index 423c64f3fe40..5b8987c23e4b 100644
--- a/llvm/lib/Target/RISCV/RISCVISelLowering.h
+++ b/llvm/lib/Target/RISCV/RISCVISelLowering.h
@@ -340,6 +340,13 @@ public:
                                           Value *NewVal, Value *Mask,
                                           AtomicOrdering Ord) const override;
 
+  /// Returns true if the target allows unaligned memory accesses of the
+  /// specified type.
+  bool allowsMisalignedMemoryAccesses(
+      EVT VT, unsigned AddrSpace = 0, Align Alignment = Align(1),
+      MachineMemOperand::Flags Flags = MachineMemOperand::MONone,
+      bool *Fast = nullptr) const override;
+
 private:
   void analyzeInputArgs(MachineFunction &MF, CCState &CCInfo,
                         const SmallVectorImpl<ISD::InputArg> &Ins,
@@ -394,6 +401,11 @@ private:
   bool useRVVForFixedLengthVectorVT(MVT VT) const;
 };
 
+namespace RISCV {
+// We use 64 bits as the known part in the scalable vector types.
+static constexpr unsigned RVVBitsPerBlock = 64;
+}; // namespace RISCV
+
 namespace RISCVVIntrinsicsTable {
 
 struct RISCVVIntrinsicInfo {
diff --git a/llvm/lib/Target/RISCV/RISCVSubtarget.cpp b/llvm/lib/Target/RISCV/RISCVSubtarget.cpp
index 48bcd489b120..890bf498179a 100644
--- a/llvm/lib/Target/RISCV/RISCVSubtarget.cpp
+++ b/llvm/lib/Target/RISCV/RISCVSubtarget.cpp
@@ -39,6 +39,11 @@ static cl::opt<unsigned> RVVVectorLMULMax(
              "Fractional LMUL values are not supported."),
     cl::init(8), cl::Hidden);
 
+static cl::opt<unsigned> VectorBitsMax(
+    "riscv-vector-bits-max",
+    cl::desc("Assume RISC-V vector registers are at most this big"),
+    cl::init(0), cl::Hidden);
+
 void RISCVSubtarget::anchor() {}
 
 RISCVSubtarget &RISCVSubtarget::initializeSubtargetDependencies(
@@ -62,6 +67,11 @@ RISCVSubtarget &RISCVSubtarget::initializeSubtargetDependencies(
   return *this;
 }
 
+unsigned RISCVSubtarget::getMaxVectorSizeInBits() const {
+  assert(HasStdExtV && "Tried to get vector length without V support!");
+  return VectorBitsMax;
+}
+
 RISCVSubtarget::RISCVSubtarget(const Triple &TT, StringRef CPU,
                                StringRef TuneCPU, StringRef FS,
                                StringRef ABIName, const TargetMachine &TM)
diff --git a/llvm/lib/Target/RISCV/RISCVSubtarget.h b/llvm/lib/Target/RISCV/RISCVSubtarget.h
index 70b555cda1bd..d6a63a04892a 100644
--- a/llvm/lib/Target/RISCV/RISCVSubtarget.h
+++ b/llvm/lib/Target/RISCV/RISCVSubtarget.h
@@ -134,6 +134,7 @@ public:
     assert(i < RISCV::NUM_TARGET_REGS && "Register out of range");
     return UserReservedRegister[i];
   }
+  unsigned getMaxVectorSizeInBits() const;
 
 protected:
   // GlobalISel related APIs.
diff --git a/llvm/lib/Target/RISCV/RISCVTargetTransformInfo.cpp b/llvm/lib/Target/RISCV/RISCVTargetTransformInfo.cpp
index e56a1cb4252e..8204a65d8080 100644
--- a/llvm/lib/Target/RISCV/RISCVTargetTransformInfo.cpp
+++ b/llvm/lib/Target/RISCV/RISCVTargetTransformInfo.cpp
@@ -120,3 +120,17 @@ bool RISCVTTIImpl::shouldExpandReduction(const IntrinsicInst *II) const {
     return true;
   }
 }
+
+Optional<unsigned> RISCVTTIImpl::getMaxVScale() const {
+  // There is no assumption of the maximum vector length in V specification.
+  // We use the value specified by users as the maximum vector length.
+  // This function will use the assumed maximum vector length to get the
+  // maximum vscale for LoopVectorizer.
+  // If users do not specify the maximum vector length, we have no way to
+  // know whether the LoopVectorizer is safe to do or not.
+  // We only consider to use single vector register (LMUL = 1) to vectorize.
+  unsigned MaxVectorSizeInBits = ST->getMaxVectorSizeInBits();
+  if (ST->hasStdExtV() && MaxVectorSizeInBits != 0)
+    return MaxVectorSizeInBits / RISCV::RVVBitsPerBlock;
+  return BaseT::getMaxVScale();
+}
diff --git a/llvm/lib/Target/RISCV/RISCVTargetTransformInfo.h b/llvm/lib/Target/RISCV/RISCVTargetTransformInfo.h
index cf37dc9a0aea..5177f8165646 100644
--- a/llvm/lib/Target/RISCV/RISCVTargetTransformInfo.h
+++ b/llvm/lib/Target/RISCV/RISCVTargetTransformInfo.h
@@ -49,6 +49,8 @@ public:
                           Type *Ty, TTI::TargetCostKind CostKind);
 
   bool shouldExpandReduction(const IntrinsicInst *II) const;
+  bool supportsScalableVectors() const { return ST->hasStdExtV(); }
+  Optional<unsigned> getMaxVScale() const;
 };
 
 } // end namespace llvm
diff --git a/llvm/test/Transforms/LoopVectorize/RISCV/lit.local.cfg b/llvm/test/Transforms/LoopVectorize/RISCV/lit.local.cfg
new file mode 100644
index 000000000000..d82490b75940
--- /dev/null
+++ b/llvm/test/Transforms/LoopVectorize/RISCV/lit.local.cfg
@@ -0,0 +1,4 @@
+config.suffixes = ['.ll']
+
+if not 'RISCV' in config.root.targets:
+    config.unsupported = True
diff --git a/llvm/test/Transforms/LoopVectorize/RISCV/scalable-vf-hint.ll b/llvm/test/Transforms/LoopVectorize/RISCV/scalable-vf-hint.ll
new file mode 100644
index 000000000000..2c5631110f90
--- /dev/null
+++ b/llvm/test/Transforms/LoopVectorize/RISCV/scalable-vf-hint.ll
@@ -0,0 +1,37 @@
+; RUN: opt -mtriple=riscv64 -mattr=+m,+experimental-v -loop-vectorize \
+; RUN:   -riscv-vector-bits-max=512 -S < %s 2>&1 \
+; RUN:   | FileCheck %s
+
+; void test(int *a, int *b, int N) {
+;   #pragma clang loop vectorize(enable) vectorize_width(2, scalable)
+;   for (int i=0; i<N; ++i) {
+;     a[i + 64] = a[i] + b[i];
+;   }
+; }
+;
+; CHECK: <vscale x 2 x i32>
+define void @test(i32* %a, i32* %b) {
+entry:
+  br label %loop
+
+loop:
+  %iv = phi i64 [ 0, %entry ], [ %iv.next, %loop ]
+  %arrayidx = getelementptr inbounds i32, i32* %a, i64 %iv
+  %0 = load i32, i32* %arrayidx, align 4
+  %arrayidx2 = getelementptr inbounds i32, i32* %b, i64 %iv
+  %1 = load i32, i32* %arrayidx2, align 4
+  %add = add nsw i32 %1, %0
+  %2 = add nuw nsw i64 %iv, 64
+  %arrayidx5 = getelementptr inbounds i32, i32* %a, i64 %2
+  store i32 %add, i32* %arrayidx5, align 4
+  %iv.next = add nuw nsw i64 %iv, 1
+  %exitcond.not = icmp eq i64 %iv.next, 1024
+  br i1 %exitcond.not, label %exit, label %loop, !llvm.loop !6
+
+exit:
+  ret void
+}
+
+!6 = !{!6, !7, !8}
+!7 = !{!"llvm.loop.vectorize.width", i32 2}
+!8 = !{!"llvm.loop.vectorize.scalable.enable", i1 true}