[flang] Avoid opaque pointer issue with character array substring addressing

When addressing a substring of a character array, codegen emits two
GEPs: one for to compute the address of the base element, and a second
one to address the first characters from that element.

The first GEP still returns the LLVM array type (if the FIR array type could be
translated to an array type. Therefore) so zero
indexes must be added to the second GEP in this case to cover for the
Fortran array dimensions before inserting the susbtring offset index.

Surprisingly, the previous code worked ok when MLIR emits none opaque
pointers. But with opaque pointers, the two GEPs are folded in an
invalid GEP where the substring offset becomes an offset for the outer
array dimension.

Note that I tried to fix the issue by modifying the first GEP to return the
element type, but this still gave bad results (here something might be
wrong with opaque pointer in MLIR or LLVM).

Differential Revision: https://reviews.llvm.org/D129079
This commit is contained in:
Jean Perier 2022-07-05 09:13:07 +02:00
parent 7a550212e8
commit b094c737cd
2 changed files with 18 additions and 12 deletions

View File

@ -1064,6 +1064,15 @@ getFree(fir::FreeMemOp op, mlir::ConversionPatternRewriter &rewriter) {
/*isVarArg=*/false));
}
static unsigned getDimension(mlir::LLVM::LLVMArrayType ty) {
unsigned result = 1;
for (auto eleTy = ty.getElementType().dyn_cast<mlir::LLVM::LLVMArrayType>();
eleTy;
eleTy = eleTy.getElementType().dyn_cast<mlir::LLVM::LLVMArrayType>())
++result;
return result;
}
namespace {
/// Lower a `fir.freemem` instruction into `llvm.call @free`
struct FreeMemOpConversion : public FIROpConversion<fir::FreeMemOp> {
@ -1383,9 +1392,15 @@ struct EmboxCommonConversion : public FIROpConversion<OP> {
llvm::SmallVector<mlir::Value> gepOperands;
auto baseType =
base.getType().cast<mlir::LLVM::LLVMPointerType>().getElementType();
if (baseType.isa<mlir::LLVM::LLVMArrayType>()) {
if (auto arrayType = baseType.dyn_cast<mlir::LLVM::LLVMArrayType>()) {
// FIXME: The baseType should be the array element type here, meaning
// there should at most be one dimension (constant length characters are
// lowered to LLVM as an array of length one characters.). However, using
// the character type in the GEP does not lead to correct GEPs when llvm
// opaque pointers are enabled.
auto idxTy = this->lowerTy().indexType();
gepOperands.push_back(genConstantIndex(loc, idxTy, rewriter, 0));
gepOperands.append(getDimension(arrayType),
genConstantIndex(loc, idxTy, rewriter, 0));
gepOperands.push_back(lowerBound);
} else {
gepOperands.push_back(lowerBound);
@ -1927,15 +1942,6 @@ struct ValueOpCommon {
}
private:
static unsigned getDimension(mlir::LLVM::LLVMArrayType ty) {
unsigned result = 1;
for (auto eleTy = ty.getElementType().dyn_cast<mlir::LLVM::LLVMArrayType>();
eleTy;
eleTy = eleTy.getElementType().dyn_cast<mlir::LLVM::LLVMArrayType>())
++result;
return result;
}
static mlir::Type getArrayElementType(mlir::LLVM::LLVMArrayType ty) {
auto eleTy = ty.getElementType();
while (auto arrTy = eleTy.dyn_cast<mlir::LLVM::LLVMArrayType>())

View File

@ -75,7 +75,7 @@ func.func @emboxSubstring(%arg0: !fir.ref<!fir.array<2x3x!fir.char<1,4>>>) {
%1 = fir.slice %c1, %c2, %c1, %c1, %c3, %c1 substr %c1_i64, %c2_i64 : (index, index, index, index, index, index, i64, i64) -> !fir.slice<2>
%2 = fir.embox %arg0(%0) [%1] : (!fir.ref<!fir.array<2x3x!fir.char<1,4>>>, !fir.shape<2>, !fir.slice<2>) -> !fir.box<!fir.array<?x?x!fir.char<1,?>>>
// CHECK: %[[addr:.*]] = getelementptr [3 x [2 x [4 x i8]]], ptr %[[arg0]], i64 0, i64 0, i64 0
// CHECK: %[[substringAddr:.*]] = getelementptr {{.*}}, ptr %[[addr]], i64 0, i64 1
// CHECK: %[[substringAddr:.*]] = getelementptr {{.*}}, ptr %[[addr]], i64 0, i64 0, i64 0, i64 1
// CHECK: insertvalue {[[descriptorType:.*]]} { ptr undef, i64 2, i32 20180515, i8 2, i8 40, i8 0, i8 0,
// CHECK-SAME: [2 x [3 x i64]] [{{\[}}3 x i64] [i64 1, i64 2, i64 4], [3 x i64] [i64 1, i64 3, i64 8]] },
// CHECK-SAME: ptr %[[substringAddr]], 0