[mlir][LLVMIR] Use a new way to verify GEPOp indices

Previously, GEPOp relies on `findKnownStructIndices` to check if a GEP index should be static. The truth is, `findKnownStructIndices` can only tell you a GEP index _might_ be indexing into a struct (which should use a static GEP index). But GEPOp::build and GEPOp::verify are falsely taking this information as a certain answer, which creates many false alarms like the one depicted in `test/Target/LLVMIR/Import/dynamic-gep-index.ll`. The solution presented here adopts a new verification scheme: When we're recursively checking the child element types of a struct type, instead of checking every child types, we only check the one dictated by the (static) GEP index value. We also combine "refinement" logics -- refine/promote struct index mlir::Value into constants -- into the very verification process since they have lots of logics in common. The resulting code is more concise and less brittle. We also hide GEPOp::findKnownStructIndices since most of the aforementioned logics are already encapsulated within GEPOp::build and GEPOp::verify, we found little reason for findKnownStructIndices (or the new findStructIndices) to be public. Differential Revision: https://reviews.llvm.org/D124935
2022-04-20 17:46:39 -07:00 · 2022-04-20 17:46:39 -07:00 · 0b168a49bf
parent e1cf702a02
commit 0b168a49bf
5 changed files with 171 additions and 121 deletions
--- a/mlir/include/mlir/Dialect/LLVMIR/LLVMOps.td
+++ b/mlir/include/mlir/Dialect/LLVMIR/LLVMOps.td
@ -334,16 +334,6 @@ def LLVM_GEPOp : LLVM_Op<"getelementptr", [NoSideEffect]> {

    /// Returns the type pointed to by the pointer argument of this GEP.
    Type getSourceElementType();
-
-    /// Populates `indices` with positions of GEP indices that correspond to
-    /// LLVMStructTypes potentially nested in the given `sourceElementType`,
-    /// which is the type pointed to by the pointer argument of a GEP. If
-    /// `structSizes` is provided, it is populated with sizes of the indexed
-    /// structs for bounds verification purposes.
-    static void findKnownStructIndices(
-        Type sourceElementType, SmallVectorImpl<unsigned> &indices,
-        SmallVectorImpl<unsigned> *structSizes = nullptr);
-
  }];
  let hasFolder = 1;
  let hasVerifier = 1;
--- a/mlir/lib/Dialect/LLVMIR/IR/LLVMDialect.cpp
+++ b/mlir/lib/Dialect/LLVMIR/IR/LLVMDialect.cpp
@ -29,6 +29,7 @@
 #include "llvm/IR/Attributes.h"
 #include "llvm/IR/Function.h"
 #include "llvm/IR/Type.h"
+#include "llvm/Support/Error.h"
 #include "llvm/Support/Mutex.h"
 #include "llvm/Support/SourceMgr.h"

@ -398,55 +399,139 @@ SuccessorOperands SwitchOp::getSuccessorOperands(unsigned index) {

 constexpr int GEPOp::kDynamicIndex;

-/// Populates `indices` with positions of GEP indices that would correspond to
-/// LLVMStructTypes potentially nested in the given type. The type currently
-/// visited gets `currentIndex` and LLVM container types are visited
-/// recursively. The recursion is bounded and takes care of recursive types by
-/// means of the `visited` set.
-static void recordStructIndices(Type type, unsigned currentIndex,
-                                SmallVectorImpl<unsigned> &indices,
-                                SmallVectorImpl<unsigned> *structSizes,
-                                SmallPtrSet<Type, 4> &visited) {
-  if (visited.contains(type))
-    return;
+namespace {
+/// Base class for llvm::Error related to GEP index.
+class GEPIndexError : public llvm::ErrorInfo<GEPIndexError> {
+protected:
+  unsigned indexPos;

-  visited.insert(type);
+public:
+  static char ID;

-  llvm::TypeSwitch<Type>(type)
-      .Case<LLVMStructType>([&](LLVMStructType structType) {
-        indices.push_back(currentIndex);
-        if (structSizes)
-          structSizes->push_back(structType.getBody().size());
-        for (Type elementType : structType.getBody())
-          recordStructIndices(elementType, currentIndex + 1, indices,
-                              structSizes, visited);
+  std::error_code convertToErrorCode() const override {
+    return llvm::inconvertibleErrorCode();
+  }
+
+  explicit GEPIndexError(unsigned pos) : indexPos(pos) {}
+};
+
+/// llvm::Error for out-of-bound GEP index.
+struct GEPIndexOutOfBoundError
+    : public llvm::ErrorInfo<GEPIndexOutOfBoundError, GEPIndexError> {
+  static char ID;
+
+  using ErrorInfo::ErrorInfo;
+
+  void log(llvm::raw_ostream &os) const override {
+    os << "index " << indexPos << " indexing a struct is out of bounds";
+  }
+};
+
+/// llvm::Error for non-static GEP index indexing a struct.
+struct GEPStaticIndexError
+    : public llvm::ErrorInfo<GEPStaticIndexError, GEPIndexError> {
+  static char ID;
+
+  using ErrorInfo::ErrorInfo;
+
+  void log(llvm::raw_ostream &os) const override {
+    os << "expected index " << indexPos << " indexing a struct "
+       << "to be constant";
+  }
+};
+} // end anonymous namespace
+
+char GEPIndexError::ID = 0;
+char GEPIndexOutOfBoundError::ID = 0;
+char GEPStaticIndexError::ID = 0;
+
+/// For the given `structIndices` and `indices`, check if they're complied
+/// with `baseGEPType`, especially check against LLVMStructTypes nested within,
+/// and refine/promote struct index from `indices` to `updatedStructIndices`
+/// if the latter argument is not null.
+static llvm::Error
+recordStructIndices(Type baseGEPType, unsigned indexPos,
+                    ArrayRef<int32_t> structIndices, ValueRange indices,
+                    SmallVectorImpl<int32_t> *updatedStructIndices,
+                    SmallVectorImpl<Value> *remainingIndices) {
+  if (indexPos >= structIndices.size())
+    // Stop searching
+    return llvm::Error::success();
+
+  int32_t gepIndex = structIndices[indexPos];
+  bool isStaticIndex = gepIndex != GEPOp::kDynamicIndex;
+
+  unsigned dynamicIndexPos = indexPos;
+  if (!isStaticIndex)
+    dynamicIndexPos = llvm::count(structIndices.take_front(indexPos + 1),
+                                  LLVM::GEPOp::kDynamicIndex) - 1;
+
+  return llvm::TypeSwitch<Type, llvm::Error>(baseGEPType)
+      .Case<LLVMStructType>([&](LLVMStructType structType) -> llvm::Error {
+        // We don't always want to refine the index (e.g. when performing
+        // verification), so we only refine when updatedStructIndices is not
+        // null.
+        if (!isStaticIndex && updatedStructIndices) {
+          // Try to refine.
+          APInt staticIndexValue;
+          isStaticIndex = matchPattern(indices[dynamicIndexPos],
+                                       m_ConstantInt(&staticIndexValue));
+          if (isStaticIndex) {
+            assert(staticIndexValue.getBitWidth() <= 64 &&
+                   llvm::isInt<32>(staticIndexValue.getLimitedValue()) &&
+                   "struct index can't fit within int32_t");
+            gepIndex = static_cast<int32_t>(staticIndexValue.getSExtValue());
+          }
+        }
+        if (!isStaticIndex)
+          return llvm::make_error<GEPStaticIndexError>(indexPos);
+
+        ArrayRef<Type> elementTypes = structType.getBody();
+        if (gepIndex < 0 ||
+            static_cast<size_t>(gepIndex) >= elementTypes.size())
+          return llvm::make_error<GEPIndexOutOfBoundError>(indexPos);
+
+        if (updatedStructIndices)
+          (*updatedStructIndices)[indexPos] = gepIndex;
+
+        // Instead of recusively going into every children types, we only
+        // dive into the one indexed by gepIndex.
+        return recordStructIndices(elementTypes[gepIndex], indexPos + 1,
+                                   structIndices, indices, updatedStructIndices,
+                                   remainingIndices);
      })
      .Case<VectorType, LLVMScalableVectorType, LLVMFixedVectorType,
-            LLVMArrayType>([&](auto containerType) {
-        recordStructIndices(containerType.getElementType(), currentIndex + 1,
-                            indices, structSizes, visited);
-      });
+            LLVMArrayType>([&](auto containerType) -> llvm::Error {
+        // Currently we don't refine non-struct index even if it's static.
+        if (remainingIndices)
+          remainingIndices->push_back(indices[dynamicIndexPos]);
+        return recordStructIndices(containerType.getElementType(), indexPos + 1,
+                                   structIndices, indices, updatedStructIndices,
+                                   remainingIndices);
+      })
+      .Default(
+          [](auto otherType) -> llvm::Error { return llvm::Error::success(); });
 }

-/// Populates `indices` with positions of GEP indices that correspond to
-/// LLVMStructTypes potentially nested in the given `baseGEPType`, which must
-/// be either an LLVMPointer type or a vector thereof. If `structSizes` is
-/// provided, it is populated with sizes of the indexed structs for bounds
-/// verification purposes.
-void GEPOp::findKnownStructIndices(Type sourceElementType,
-                                   SmallVectorImpl<unsigned> &indices,
-                                   SmallVectorImpl<unsigned> *structSizes) {
-  SmallPtrSet<Type, 4> visited;
-  recordStructIndices(sourceElementType, /*currentIndex=*/1, indices,
-                      structSizes, visited);
+/// Driver function around `recordStructIndices`. Note that we always check
+/// from the second GEP index since the first one is always dynamic.
+static llvm::Error
+findStructIndices(Type baseGEPType, ArrayRef<int32_t> structIndices,
+                  ValueRange indices,
+                  SmallVectorImpl<int32_t> *updatedStructIndices = nullptr,
+                  SmallVectorImpl<Value> *remainingIndices = nullptr) {
+  if (remainingIndices)
+    // The first GEP index is always dynamic.
+    remainingIndices->push_back(indices[0]);
+  return recordStructIndices(baseGEPType, /*indexPos=*/1, structIndices,
+                             indices, updatedStructIndices, remainingIndices);
 }

 void GEPOp::build(OpBuilder &builder, OperationState &result, Type resultType,
                  Value basePtr, ValueRange operands,
                  ArrayRef<NamedAttribute> attributes) {
  build(builder, result, resultType, basePtr, operands,
-        SmallVector<int32_t>(operands.size(), LLVM::GEPOp::kDynamicIndex),
-        attributes);
+        SmallVector<int32_t>(operands.size(), kDynamicIndex), attributes);
 }

 /// Returns the elemental type of any LLVM-compatible vector type or self.
@ -480,44 +565,10 @@ void GEPOp::build(OpBuilder &builder, OperationState &result, Type resultType,
  SmallVector<Value> remainingIndices;
  SmallVector<int32_t> updatedStructIndices(structIndices.begin(),
                                            structIndices.end());
-  SmallVector<unsigned> structRelatedPositions;
-  findKnownStructIndices(elementType, structRelatedPositions);
-
-  SmallVector<unsigned> operandsToErase;
-  for (unsigned pos : structRelatedPositions) {
-    // GEP may not be indexing as deep as some structs are located.
-    if (pos >= structIndices.size())
-      continue;
-
-    // If the index is already static, it's fine.
-    if (structIndices[pos] != kDynamicIndex)
-      continue;
-
-    // Find the corresponding operand.
-    unsigned operandPos =
-        std::count(structIndices.begin(), std::next(structIndices.begin(), pos),
-                   kDynamicIndex);
-
-    // Extract the constant value from the operand and put it into the attribute
-    // instead.
-    APInt staticIndexValue;
-    bool matched =
-        matchPattern(indices[operandPos], m_ConstantInt(&staticIndexValue));
-    (void)matched;
-    assert(matched && "index into a struct must be a constant");
-    assert(staticIndexValue.sge(APInt::getSignedMinValue(/*numBits=*/32)) &&
-           "struct index underflows 32-bit integer");
-    assert(staticIndexValue.sle(APInt::getSignedMaxValue(/*numBits=*/32)) &&
-           "struct index overflows 32-bit integer");
-    auto staticIndex = static_cast<int32_t>(staticIndexValue.getSExtValue());
-    updatedStructIndices[pos] = staticIndex;
-    operandsToErase.push_back(operandPos);
-  }
-
-  for (unsigned i = 0, e = indices.size(); i < e; ++i) {
-    if (!llvm::is_contained(operandsToErase, i))
-      remainingIndices.push_back(indices[i]);
-  }
+  if (llvm::Error err =
+          findStructIndices(elementType, structIndices, indices,
+                            &updatedStructIndices, &remainingIndices))
+    llvm::report_fatal_error(StringRef(llvm::toString(std::move(err))));

  assert(remainingIndices.size() == static_cast<size_t>(llvm::count(
                                        updatedStructIndices, kDynamicIndex)) &&
@ -582,24 +633,16 @@ LogicalResult LLVM::GEPOp::verify() {
          getElemType())))
    return failure();

-  SmallVector<unsigned> indices;
-  SmallVector<unsigned> structSizes;
-  findKnownStructIndices(getSourceElementType(), indices, &structSizes);
-  DenseIntElementsAttr structIndices = getStructIndices();
-  for (unsigned i : llvm::seq<unsigned>(0, indices.size())) {
-    unsigned index = indices[i];
-    // GEP may not be indexing as deep as some structs nested in the type.
-    if (index >= structIndices.getNumElements())
-      continue;
+  auto structIndexRange = getStructIndices().getValues<int32_t>();
+  // structIndexRange is a kind of iterator, which cannot be converted
+  // to ArrayRef directly.
+  SmallVector<int32_t> structIndices(structIndexRange.size());
+  for (unsigned i : llvm::seq<unsigned>(0, structIndexRange.size()))
+    structIndices[i] = structIndexRange[i];
+  if (llvm::Error err = findStructIndices(getSourceElementType(), structIndices,
+                                          getIndices()))
+    return emitOpError() << llvm::toString(std::move(err));

-    int32_t staticIndex = structIndices.getValues<int32_t>()[index];
-    if (staticIndex == LLVM::GEPOp::kDynamicIndex)
-      return emitOpError() << "expected index " << index
-                           << " indexing a struct to be constant";
-    if (staticIndex < 0 || static_cast<unsigned>(staticIndex) >= structSizes[i])
-      return emitOpError() << "index " << index
-                           << " indexing a struct is out of bounds";
-  }
  return success();
 }

--- a/mlir/lib/Target/LLVMIR/ConvertFromLLVMIR.cpp
+++ b/mlir/lib/Target/LLVMIR/ConvertFromLLVMIR.cpp
@ -1003,33 +1003,26 @@ LogicalResult Importer::processInstruction(llvm::Instruction *inst) {
    // FIXME: Support inbounds GEPs.
    llvm::GetElementPtrInst *gep = cast<llvm::GetElementPtrInst>(inst);
    Value basePtr = processValue(gep->getOperand(0));
-    SmallVector<int32_t> staticIndices;
-    SmallVector<Value> dynamicIndices;
    Type sourceElementType = processType(gep->getSourceElementType());
-    SmallVector<unsigned> staticIndexPositions;
-    GEPOp::findKnownStructIndices(sourceElementType, staticIndexPositions);

-    for (const auto &en :
-         llvm::enumerate(llvm::drop_begin(gep->operand_values()))) {
-      llvm::Value *operand = en.value();
-      if (llvm::find(staticIndexPositions, en.index()) ==
-          staticIndexPositions.end()) {
-        staticIndices.push_back(GEPOp::kDynamicIndex);
-        dynamicIndices.push_back(processValue(operand));
-        if (!dynamicIndices.back())
-          return failure();
-      } else {
-        auto *constantInt = cast<llvm::ConstantInt>(operand);
-        staticIndices.push_back(
-            static_cast<int32_t>(constantInt->getValue().getZExtValue()));
-      }
+    SmallVector<Value> indices;
+    for (llvm::Value *operand : llvm::drop_begin(gep->operand_values())) {
+      indices.push_back(processValue(operand));
+      if (!indices.back())
+        return failure();
    }
+    // Treat every indices as dynamic since GEPOp::build will refine those
+    // indices into static attributes later. One small downside of this
+    // approach is that many unused `llvm.mlir.constant` would be emitted
+    // at first place.
+    SmallVector<int32_t> structIndices(indices.size(),
+                                       LLVM::GEPOp::kDynamicIndex);

    Type type = processType(inst->getType());
    if (!type)
      return failure();
    instMap[inst] = b.create<GEPOp>(loc, type, sourceElementType, basePtr,
-                                    dynamicIndices, staticIndices);
+                                    indices, structIndices);
    return success();
  }
  case llvm::Instruction::InsertValue: {
--- a/mlir/test/Dialect/LLVMIR/dynamic-gep-index.mlir
+++ b/mlir/test/Dialect/LLVMIR/dynamic-gep-index.mlir
@ -0,0 +1,12 @@
+// RUN: mlir-opt %s | FileCheck %s
+
+module attributes {dlti.dl_spec = #dlti.dl_spec<#dlti.dl_entry<i1, dense<8> : vector<2xi32>>, #dlti.dl_entry<i8, dense<8> : vector<2xi32>>, #dlti.dl_entry<i16, dense<16> : vector<2xi32>>, #dlti.dl_entry<i32, dense<32> : vector<2xi32>>, #dlti.dl_entry<i64, dense<[32, 64]> : vector<2xi32>>, #dlti.dl_entry<f16, dense<16> : vector<2xi32>>, #dlti.dl_entry<f64, dense<64> : vector<2xi32>>, #dlti.dl_entry<f128, dense<128> : vector<2xi32>>>} {
+  // CHECK: llvm.func @foo(%[[ARG0:.+]]: !llvm.ptr<struct<"my_struct", {{.+}}>>, %[[ARG1:.+]]: i32)
+  llvm.func @foo(%arg0: !llvm.ptr<struct<"my_struct", (struct<"sub_struct", (i32, i8)>, array<4 x i32>)>>, %arg1: i32) {
+    // CHECK: %[[C0:.+]] = llvm.mlir.constant(0 : i32)
+    %0 = llvm.mlir.constant(0 : i32) : i32
+    // CHECK: llvm.getelementptr %[[ARG0]][%[[C0]], 1, %[[ARG1]]]
+    %1 = "llvm.getelementptr"(%arg0, %0, %arg1) {structIndices = dense<[-2147483648, 1, -2147483648]> : tensor<3xi32>} : (!llvm.ptr<struct<"my_struct", (struct<"sub_struct", (i32, i8)>, array<4 x i32>)>>, i32, i32) -> !llvm.ptr<i32>
+    llvm.return
+  }
+}
--- a/mlir/test/Target/LLVMIR/Import/dynamic-gep-index.ll
+++ b/mlir/test/Target/LLVMIR/Import/dynamic-gep-index.ll
@ -0,0 +1,12 @@
+; RUN: mlir-translate --import-llvm %s | FileCheck %s
+
+%sub_struct = type { i32, i8 }
+%my_struct = type { %sub_struct, [4 x i32] }
+
+; CHECK: llvm.func @foo(%[[ARG0:.+]]: !llvm.ptr<struct<"my_struct", {{.+}}>>, %[[ARG1:.+]]: i32)
+define void @foo(%my_struct* %arg, i32 %idx) {
+  ; CHECK: %[[C0:.+]] = llvm.mlir.constant(0 : i32)
+  ; CHECK: llvm.getelementptr %[[ARG0]][%[[C0]], 1, %[[ARG1]]]
+  %1 = getelementptr %my_struct, %my_struct* %arg, i32 0, i32 1, i32 %idx
+  ret void
+}