[flang] Avoid opaque pointer issue with character array substring addressing

When addressing a substring of a character array, codegen emits two GEPs: one for to compute the address of the base element, and a second one to address the first characters from that element. The first GEP still returns the LLVM array type (if the FIR array type could be translated to an array type. Therefore) so zero indexes must be added to the second GEP in this case to cover for the Fortran array dimensions before inserting the susbtring offset index. Surprisingly, the previous code worked ok when MLIR emits none opaque pointers. But with opaque pointers, the two GEPs are folded in an invalid GEP where the substring offset becomes an offset for the outer array dimension. Note that I tried to fix the issue by modifying the first GEP to return the element type, but this still gave bad results (here something might be wrong with opaque pointer in MLIR or LLVM). Differential Revision: https://reviews.llvm.org/D129079
2022-07-05 09:13:07 +02:00 · 2022-07-05 09:13:07 +02:00 · b094c737cd
parent 7a550212e8
commit b094c737cd
2 changed files with 18 additions and 12 deletions
--- a/flang/lib/Optimizer/CodeGen/CodeGen.cpp
+++ b/flang/lib/Optimizer/CodeGen/CodeGen.cpp
@ -1064,6 +1064,15 @@ getFree(fir::FreeMemOp op, mlir::ConversionPatternRewriter &rewriter) {
                                        /*isVarArg=*/false));
 }

+static unsigned getDimension(mlir::LLVM::LLVMArrayType ty) {
+  unsigned result = 1;
+  for (auto eleTy = ty.getElementType().dyn_cast<mlir::LLVM::LLVMArrayType>();
+       eleTy;
+       eleTy = eleTy.getElementType().dyn_cast<mlir::LLVM::LLVMArrayType>())
+    ++result;
+  return result;
+}
+
 namespace {
 /// Lower a `fir.freemem` instruction into `llvm.call @free`
 struct FreeMemOpConversion : public FIROpConversion<fir::FreeMemOp> {
@ -1383,9 +1392,15 @@ struct EmboxCommonConversion : public FIROpConversion<OP> {
    llvm::SmallVector<mlir::Value> gepOperands;
    auto baseType =
        base.getType().cast<mlir::LLVM::LLVMPointerType>().getElementType();
-    if (baseType.isa<mlir::LLVM::LLVMArrayType>()) {
+    if (auto arrayType = baseType.dyn_cast<mlir::LLVM::LLVMArrayType>()) {
+      // FIXME: The baseType should be the array element type here, meaning
+      // there should at most be one dimension (constant length characters are
+      // lowered to LLVM as an array of length one characters.). However, using
+      // the character type in the GEP does not lead to correct GEPs when llvm
+      // opaque pointers are enabled.
      auto idxTy = this->lowerTy().indexType();
-      gepOperands.push_back(genConstantIndex(loc, idxTy, rewriter, 0));
+      gepOperands.append(getDimension(arrayType),
+                         genConstantIndex(loc, idxTy, rewriter, 0));
      gepOperands.push_back(lowerBound);
    } else {
      gepOperands.push_back(lowerBound);
@ -1927,15 +1942,6 @@ struct ValueOpCommon {
  }

 private:
-  static unsigned getDimension(mlir::LLVM::LLVMArrayType ty) {
-    unsigned result = 1;
-    for (auto eleTy = ty.getElementType().dyn_cast<mlir::LLVM::LLVMArrayType>();
-         eleTy;
-         eleTy = eleTy.getElementType().dyn_cast<mlir::LLVM::LLVMArrayType>())
-      ++result;
-    return result;
-  }
-
  static mlir::Type getArrayElementType(mlir::LLVM::LLVMArrayType ty) {
    auto eleTy = ty.getElementType();
    while (auto arrTy = eleTy.dyn_cast<mlir::LLVM::LLVMArrayType>())
--- a/flang/test/Fir/embox.fir
+++ b/flang/test/Fir/embox.fir
@ -75,7 +75,7 @@ func.func @emboxSubstring(%arg0: !fir.ref<!fir.array<2x3x!fir.char<1,4>>>) {
  %1 = fir.slice %c1, %c2, %c1, %c1, %c3, %c1 substr %c1_i64, %c2_i64 : (index, index, index, index, index, index, i64, i64) -> !fir.slice<2>
  %2 = fir.embox %arg0(%0) [%1] : (!fir.ref<!fir.array<2x3x!fir.char<1,4>>>, !fir.shape<2>, !fir.slice<2>) -> !fir.box<!fir.array<?x?x!fir.char<1,?>>>
  // CHECK: %[[addr:.*]] = getelementptr [3 x [2 x [4 x i8]]], ptr %[[arg0]], i64 0, i64 0, i64 0
-  // CHECK: %[[substringAddr:.*]] = getelementptr {{.*}}, ptr %[[addr]], i64 0, i64 1
+  // CHECK: %[[substringAddr:.*]] = getelementptr {{.*}}, ptr %[[addr]], i64 0, i64 0, i64 0, i64 1
  // CHECK: insertvalue {[[descriptorType:.*]]} { ptr undef, i64 2, i32 20180515, i8 2, i8 40, i8 0, i8 0,
  // CHECK-SAME: [2 x [3 x i64]] [{{\[}}3 x i64] [i64 1, i64 2, i64 4], [3 x i64] [i64 1, i64 3, i64 8]] },
  // CHECK-SAME: ptr %[[substringAddr]], 0