From af40f99e2b4db501b955d04ae30fdf21853ea945 Mon Sep 17 00:00:00 2001 From: Jean Perier Date: Tue, 12 Jul 2022 09:26:16 +0200 Subject: [PATCH] [flang] Merge GEPs in substring fir.embox codegen When computing the base addresses of an array slice to make a descriptor, codegen generated two LLVM GEPs. The first to compute the address of the base character element, and a second one to compute the substring base inside that element. The previous code did not care about getting the result of the first GEP right: it used the base array LLVM type as the result type. This used to work when opaque pointer were not enabled (the actual GEP result type was probably applied in some later pass). But with opaque pointers, the second GEP ends-up computing an offset of len* instead of len*. A previous attempt to fix the issue was done in D129079, but it does not cover the cases where the array slice contains subcomponents before the substring (e.g: array(:)%char_field(5:10)). This patch fix the issue by computing the actual GEP result type in codegen. There is also enough knowledge now so that a single GEP can be generated instead of two. Differential Revision: https://reviews.llvm.org/D129481 --- flang/lib/Optimizer/CodeGen/CodeGen.cpp | 174 +++++++++++++++--------- flang/test/Fir/convert-to-llvm.fir | 9 +- flang/test/Fir/embox.fir | 5 +- flang/test/Fir/rebox-susbtring.fir | 9 +- 4 files changed, 121 insertions(+), 76 deletions(-) diff --git a/flang/lib/Optimizer/CodeGen/CodeGen.cpp b/flang/lib/Optimizer/CodeGen/CodeGen.cpp index f9f6f939705c..4a4ff04550fc 100644 --- a/flang/lib/Optimizer/CodeGen/CodeGen.cpp +++ b/flang/lib/Optimizer/CodeGen/CodeGen.cpp @@ -64,6 +64,20 @@ static mlir::Block *createBlock(mlir::ConversionPatternRewriter &rewriter, mlir::Region::iterator(insertBefore)); } +/// Extract constant from a value that must be the result of one of the +/// ConstantOp operations. +static int64_t getConstantIntValue(mlir::Value val) { + assert(val && val.dyn_cast() && "must not be null value"); + mlir::Operation *defop = val.getDefiningOp(); + + if (auto constOp = mlir::dyn_cast(defop)) + return constOp.value(); + if (auto llConstOp = mlir::dyn_cast(defop)) + if (auto attr = llConstOp.getValue().dyn_cast()) + return attr.getValue().getSExtValue(); + fir::emitFatalError(val.getLoc(), "must be a constant"); +} + namespace { /// FIR conversion pattern template template @@ -1384,28 +1398,83 @@ struct EmboxCommonConversion : public FIROpConversion { return {boxTy, descriptor, eleSize}; } - /// Compute the base address of a substring given the base address of a scalar - /// string and the zero based string lower bound. - mlir::Value shiftSubstringBase(mlir::ConversionPatternRewriter &rewriter, - mlir::Location loc, mlir::Value base, - mlir::Value lowerBound) const { - llvm::SmallVector gepOperands; - auto baseType = + // Compute the base address of a fir.box given the indices from the slice. + // The indices from the "outer" dimensions (every dimension after the first + // one (inlcuded) that is not a compile time constant) must have been + // multiplied with the related extents and added together into \p outerOffset. + mlir::Value + genBoxOffsetGep(mlir::ConversionPatternRewriter &rewriter, mlir::Location loc, + mlir::Value base, mlir::Value outerOffset, + mlir::ValueRange cstInteriorIndices, + mlir::ValueRange componentIndices, + llvm::Optional substringOffset) const { + llvm::SmallVector gepArgs{outerOffset}; + mlir::Type resultTy = base.getType().cast().getElementType(); - if (auto arrayType = baseType.dyn_cast()) { - // FIXME: The baseType should be the array element type here, meaning - // there should at most be one dimension (constant length characters are - // lowered to LLVM as an array of length one characters.). However, using - // the character type in the GEP does not lead to correct GEPs when llvm - // opaque pointers are enabled. - auto idxTy = this->lowerTy().indexType(); - gepOperands.append(getDimension(arrayType), - genConstantIndex(loc, idxTy, rewriter, 0)); - gepOperands.push_back(lowerBound); - } else { - gepOperands.push_back(lowerBound); + // Fortran is column major, llvm GEP is row major: reverse the indices here. + for (mlir::Value interiorIndex : llvm::reverse(cstInteriorIndices)) { + auto arrayTy = resultTy.dyn_cast(); + if (!arrayTy) + fir::emitFatalError( + loc, + "corrupted GEP generated being generated in fir.embox/fir.rebox"); + resultTy = arrayTy.getElementType(); + gepArgs.push_back(interiorIndex); } - return this->genGEP(loc, base.getType(), rewriter, base, gepOperands); + for (mlir::Value componentIndex : componentIndices) { + // Component indices can be field index to select a component, or array + // index, to select an element in an array component. + if (auto structTy = resultTy.dyn_cast()) { + std::int64_t cstIndex = getConstantIntValue(componentIndex); + resultTy = structTy.getBody()[cstIndex]; + } else if (auto arrayTy = + resultTy.dyn_cast()) { + resultTy = arrayTy.getElementType(); + } else { + fir::emitFatalError(loc, "corrupted component GEP generated being " + "generated in fir.embox/fir.rebox"); + } + gepArgs.push_back(componentIndex); + } + if (substringOffset) { + if (auto arrayTy = resultTy.dyn_cast()) { + gepArgs.push_back(*substringOffset); + resultTy = arrayTy.getElementType(); + } else { + // If the CHARACTER length is dynamic, the whole base type should have + // degenerated to an llvm.ptr, and there should not be any + // cstInteriorIndices/componentIndices. The substring offset can be + // added to the outterOffset since it applies on the same LLVM type. + if (gepArgs.size() != 1) + fir::emitFatalError(loc, + "corrupted substring GEP in fir.embox/fir.rebox"); + mlir::Type outterOffsetTy = gepArgs[0].getType(); + mlir::Value cast = + this->integerCast(loc, rewriter, outterOffsetTy, *substringOffset); + + gepArgs[0] = rewriter.create(loc, outterOffsetTy, + gepArgs[0], cast); + } + } + resultTy = mlir::LLVM::LLVMPointerType::get(resultTy); + return rewriter.create(loc, resultTy, base, gepArgs); + } + + template + void + getSubcomponentIndices(BOX xbox, mlir::Value memref, + mlir::ValueRange operands, + mlir::SmallVectorImpl &indices) const { + // For each field in the path add the offset to base via the args list. + // In the most general case, some offsets must be computed since + // they are not be known until runtime. + if (fir::hasDynamicSize(fir::unwrapSequenceType( + fir::unwrapPassByRefType(memref.getType())))) + TODO(xbox.getLoc(), + "fir.embox codegen dynamic size component in derived type"); + indices.append(operands.begin() + xbox.subcomponentOffset(), + operands.begin() + xbox.subcomponentOffset() + + xbox.subcomponent().size()); } /// If the embox is not in a globalOp body, allocate storage for the box; @@ -1489,7 +1558,7 @@ struct XEmboxOpConversion : public EmboxCommonConversion { mlir::Value prevPtrOff = one; mlir::Type eleTy = boxTy.getEleTy(); const unsigned rank = xbox.getRank(); - llvm::SmallVector gepArgs; + llvm::SmallVector cstInteriorIndices; unsigned constRows = 0; mlir::Value ptrOffset = zero; mlir::Type memEleTy = fir::dyn_cast_ptrEleTy(xbox.memref().getType()); @@ -1554,7 +1623,7 @@ struct XEmboxOpConversion : public EmboxCommonConversion { adj = operands[shiftOffset]; auto ao = rewriter.create(loc, i64Ty, off, adj); if (constRows > 0) { - gepArgs.push_back(ao); + cstInteriorIndices.push_back(ao); } else { auto dimOff = rewriter.create(loc, i64Ty, ao, prevPtrOff); @@ -1624,24 +1693,15 @@ struct XEmboxOpConversion : public EmboxCommonConversion { sliceOffset += 3; } if (hasSlice || hasSubcomp || hasSubstr) { - llvm::SmallVector args = {ptrOffset}; - args.append(gepArgs.rbegin(), gepArgs.rend()); - if (hasSubcomp) { - // For each field in the path add the offset to base via the args list. - // In the most general case, some offsets must be computed since - // they are not be known until runtime. - if (fir::hasDynamicSize(fir::unwrapSequenceType( - fir::unwrapPassByRefType(xbox.memref().getType())))) - TODO(loc, "fir.embox codegen dynamic size component in derived type"); - args.append(operands.begin() + xbox.subcomponentOffset(), - operands.begin() + xbox.subcomponentOffset() + - xbox.subcomponent().size()); - } - base = - rewriter.create(loc, base.getType(), base, args); + // Shift the base address. + llvm::SmallVector fieldIndices; + llvm::Optional substringOffset; + if (hasSubcomp) + getSubcomponentIndices(xbox, xbox.memref(), operands, fieldIndices); if (hasSubstr) - base = shiftSubstringBase(rewriter, loc, base, - operands[xbox.substrOffset()]); + substringOffset = operands[xbox.substrOffset()]; + base = genBoxOffsetGep(rewriter, loc, base, ptrOffset, cstInteriorIndices, + fieldIndices, substringOffset); } dest = insertBaseAddress(rewriter, loc, dest, base); if (isDerivedTypeWithLenParams(boxTy)) @@ -1765,15 +1825,15 @@ private: mlir::LLVM::LLVMPointerType::get(convertType(inputEleTy)); base = rewriter.create(loc, llvmElePtrTy, base); - if (!rebox.subcomponent().empty()) { - llvm::SmallVector gepOperands = {zero}; - for (unsigned i = 0; i < rebox.subcomponent().size(); ++i) - gepOperands.push_back(operands[rebox.subcomponentOffset() + i]); - base = genGEP(loc, llvmElePtrTy, rewriter, base, gepOperands); - } + llvm::SmallVector fieldIndices; + llvm::Optional substringOffset; + if (!rebox.subcomponent().empty()) + getSubcomponentIndices(rebox, rebox.box(), operands, fieldIndices); if (!rebox.substr().empty()) - base = shiftSubstringBase(rewriter, loc, base, - operands[rebox.substrOffset()]); + substringOffset = operands[rebox.substrOffset()]; + base = genBoxOffsetGep(rewriter, loc, base, zero, + /*cstInteriorIndices=*/llvm::None, fieldIndices, + substringOffset); } if (rebox.slice().empty()) @@ -2266,19 +2326,7 @@ struct CoordinateOpConversion ? op.getDefiningOp() ->getAttrOfType("field") .getInt() - : getIntValue(op); - } - - static int64_t getIntValue(mlir::Value val) { - assert(val && val.dyn_cast() && "must not be null value"); - mlir::Operation *defop = val.getDefiningOp(); - - if (auto constOp = mlir::dyn_cast(defop)) - return constOp.value(); - if (auto llConstOp = mlir::dyn_cast(defop)) - if (auto attr = llConstOp.getValue().dyn_cast()) - return attr.getValue().getSExtValue(); - fir::emitFatalError(val.getLoc(), "must be a constant"); + : getConstantIntValue(op); } static bool hasSubDimensions(mlir::Type type) { @@ -2306,7 +2354,7 @@ struct CoordinateOpConversion type = recTy.getType(getFieldNumber(recTy, nxtOpnd)); } else if (auto tupTy = type.dyn_cast()) { subEle = true; - type = tupTy.getType(getIntValue(nxtOpnd)); + type = tupTy.getType(getConstantIntValue(nxtOpnd)); } else { ptrEle = true; } @@ -2330,7 +2378,7 @@ struct CoordinateOpConversion } else if (auto strTy = type.dyn_cast()) { type = strTy.getType(getFieldNumber(strTy, nxtOpnd)); } else if (auto strTy = type.dyn_cast()) { - type = strTy.getType(getIntValue(nxtOpnd)); + type = strTy.getType(getConstantIntValue(nxtOpnd)); } else { return true; } @@ -2520,7 +2568,7 @@ private: if (auto recTy = cpnTy.dyn_cast()) cpnTy = recTy.getType(getFieldNumber(recTy, nxtOpnd)); else if (auto tupTy = cpnTy.dyn_cast()) - cpnTy = tupTy.getType(getIntValue(nxtOpnd)); + cpnTy = tupTy.getType(getConstantIntValue(nxtOpnd)); else cpnTy = nullptr; diff --git a/flang/test/Fir/convert-to-llvm.fir b/flang/test/Fir/convert-to-llvm.fir index ae46b3653115..7b5d1e9d326b 100644 --- a/flang/test/Fir/convert-to-llvm.fir +++ b/flang/test/Fir/convert-to-llvm.fir @@ -1947,8 +1947,8 @@ func.func private @_QPtest_dt_callee(%arg0: !fir.box>) // CHECK: %[[BOX8:.*]] = llvm.insertvalue %[[EXT_SELECT]], %[[BOX7]][7 : i32, 0 : i32, 1 : i32] : !llvm.struct<(ptr, i{{.*}}, i{{.*}}, i{{.*}}, i{{.*}}, i{{.*}}, i{{.*}}, array<1 x array<3 x i64>>)> // CHECK: %[[STRIDE_MUL:.*]] = llvm.mul %[[PTRTOINT_DTYPE_SIZE]], %[[C2]] : i64 // CHECK: %[[BOX9:.*]] = llvm.insertvalue %[[STRIDE_MUL]], %[[BOX8]][7 : i32, 0 : i32, 2 : i32] : !llvm.struct<(ptr, i{{.*}}, i{{.*}}, i{{.*}}, i{{.*}}, i{{.*}}, i{{.*}}, array<1 x array<3 x i64>>)> -// CHECK: %[[BASE_PTR:.*]] = llvm.getelementptr %[[X]][%[[ZERO]], %[[ADJUSTED_OFFSET]], 0] : (!llvm.ptr>>, i64, i64) -> !llvm.ptr>> -// CHECK: %[[ADDR_BITCAST:.*]] = llvm.bitcast %[[BASE_PTR]] : !llvm.ptr>> to !llvm.ptr +// CHECK: %[[BASE_PTR:.*]] = llvm.getelementptr %[[X]][%[[ZERO]], %[[ADJUSTED_OFFSET]], 0] : (!llvm.ptr>>, i64, i64) -> !llvm.ptr +// CHECK: %[[ADDR_BITCAST:.*]] = llvm.bitcast %[[BASE_PTR]] : !llvm.ptr to !llvm.ptr // CHECK: %[[BOX10:.*]] = llvm.insertvalue %[[ADDR_BITCAST]], %[[BOX9]][0 : i32] : !llvm.struct<(ptr, i{{.*}}, i{{.*}}, i{{.*}}, i{{.*}}, i{{.*}}, i{{.*}}, array<1 x array<3 x i64>>)> // CHECK: llvm.store %[[BOX10]], %[[ALLOCA]] : !llvm.ptr, i{{.*}}, i{{.*}}, i{{.*}}, i{{.*}}, i{{.*}}, i{{.*}}, array<1 x array<3 x i64>>)>> // CHECK: llvm.call @_QPtest_dt_callee(%1) : (!llvm.ptr, i{{.*}}, i{{.*}}, i{{.*}}, i{{.*}}, i{{.*}}, i{{.*}}, array<1 x array<3 x i64>>)>>) -> () @@ -2294,9 +2294,8 @@ func.func @foo(%arg0: !fir.box} //CHECK: %[[SRC_ARRAY:.*]] = llvm.load %[[SRC_ARRAY_PTR]] : !llvm.ptr)>>> //CHECK: %[[ZERO_6:.*]] = llvm.mlir.constant(0 : i64) : i64 //CHECK: %[[SRC_CAST:.*]] = llvm.bitcast %[[SRC_ARRAY]] : !llvm.ptr)>> to !llvm.ptr)>> -//CHECK: %[[TMP_COMPONENT:.*]] = llvm.getelementptr %[[SRC_CAST]][%[[ZERO_6]], 1] : (!llvm.ptr)>>, i64) -> !llvm.ptr)>> -//CHECK: %[[COMPONENT:.*]] = llvm.getelementptr %[[TMP_COMPONENT]][%[[COMPONENT_OFFSET_1]]] : (!llvm.ptr)>>, i64) -> !llvm.ptr)>> -//CHECK: %[[COMPONENT_CAST:.*]] = llvm.bitcast %[[COMPONENT]] : !llvm.ptr)>> to !llvm.ptr +//CHECK: %[[COMPONENT:.*]] = llvm.getelementptr %[[SRC_CAST]][%[[ZERO_6]], 1, %[[COMPONENT_OFFSET_1]]] : (!llvm.ptr)>>, i64, i64) -> !llvm.ptr +//CHECK: %[[COMPONENT_CAST:.*]] = llvm.bitcast %[[COMPONENT]] : !llvm.ptr to !llvm.ptr //CHECK: %[[SRC_LB:.*]] = llvm.mlir.constant(1 : i64) : i64 //CHECK: %[[RESULT_TMP0:.*]] = llvm.sub %[[RESULT_LB]], %[[SRC_LB]] : i64 //CHECK: %[[RESULT_OFFSET_START:.*]] = llvm.mul %[[RESULT_TMP0]], %[[SRC_STRIDE]] : i64 diff --git a/flang/test/Fir/embox.fir b/flang/test/Fir/embox.fir index 0019217bb68d..79aded8b5f71 100644 --- a/flang/test/Fir/embox.fir +++ b/flang/test/Fir/embox.fir @@ -74,11 +74,10 @@ func.func @emboxSubstring(%arg0: !fir.ref>>) { %0 = fir.shape %c2, %c3 : (index, index) -> !fir.shape<2> %1 = fir.slice %c1, %c2, %c1, %c1, %c3, %c1 substr %c1_i64, %c2_i64 : (index, index, index, index, index, index, i64, i64) -> !fir.slice<2> %2 = fir.embox %arg0(%0) [%1] : (!fir.ref>>, !fir.shape<2>, !fir.slice<2>) -> !fir.box>> - // CHECK: %[[addr:.*]] = getelementptr [3 x [2 x [4 x i8]]], ptr %[[arg0]], i64 0, i64 0, i64 0 - // CHECK: %[[substringAddr:.*]] = getelementptr {{.*}}, ptr %[[addr]], i64 0, i64 0, i64 0, i64 1 + // CHECK: %[[addr:.*]] = getelementptr [3 x [2 x [4 x i8]]], ptr %[[arg0]], i64 0, i64 0, i64 0, i64 1 // CHECK: insertvalue {[[descriptorType:.*]]} { ptr undef, i64 2, i32 20180515, i8 2, i8 40, i8 0, i8 0, // CHECK-SAME: [2 x [3 x i64]] [{{\[}}3 x i64] [i64 1, i64 2, i64 4], [3 x i64] [i64 1, i64 3, i64 8]] }, - // CHECK-SAME: ptr %[[substringAddr]], 0 + // CHECK-SAME: ptr %[[addr]], 0 fir.call @takesRank2CharBox(%2) : (!fir.box>>) -> () return diff --git a/flang/test/Fir/rebox-susbtring.fir b/flang/test/Fir/rebox-susbtring.fir index 382c54e1058e..05c04877b73e 100644 --- a/flang/test/Fir/rebox-susbtring.fir +++ b/flang/test/Fir/rebox-susbtring.fir @@ -25,8 +25,8 @@ func.func @char_section(%arg0: !fir.box>>) { // CHECK: %[[VAL_37:.*]] = llvm.getelementptr %[[VAL_0]]{{\[}}%[[VAL_7]], 0] : (!llvm.ptr<[[char20_descriptor_t]]>)>>, i32) -> !llvm.ptr>> // CHECK: %[[VAL_38:.*]] = llvm.load %[[VAL_37]] : !llvm.ptr>> // CHECK: %[[VAL_39:.*]] = llvm.bitcast %[[VAL_38]] : !llvm.ptr> to !llvm.ptr> -// CHECK: %[[VAL_40:.*]] = llvm.getelementptr %[[VAL_39]]{{\[}}%[[VAL_30]], %[[VAL_4]]] : (!llvm.ptr>, i64, i64) -> !llvm.ptr> -// CHECK: llvm.bitcast %[[VAL_40]] : !llvm.ptr> to !llvm.ptr +// CHECK: %[[VAL_40:.*]] = llvm.getelementptr %[[VAL_39]]{{\[}}%[[VAL_30]], %[[VAL_4]]] : (!llvm.ptr>, i64, i64) -> !llvm.ptr +// CHECK: llvm.bitcast %[[VAL_40]] : !llvm.ptr to !llvm.ptr // More offset computation with descriptor strides and triplets that is not character specific ... @@ -59,9 +59,8 @@ func.func @foo(%arg0: !fir.box} // CHECK: %[[VAL_30:.*]] = llvm.getelementptr %[[VAL_0]]{{\[}}%[[VAL_17]], 0] : (!llvm.ptr<[[struct_t_descriptor:.*]]>, i32) -> !llvm.ptr> // CHECK: %[[VAL_31:.*]] = llvm.load %[[VAL_30]] : !llvm.ptr> // CHECK: %[[VAL_32:.*]] = llvm.bitcast %[[VAL_31]] : !llvm.ptr<[[struct_t]]> to !llvm.ptr<[[struct_t]]> -// CHECK: %[[VAL_33:.*]] = llvm.getelementptr %[[VAL_32]]{{\[}}%[[VAL_21]], 1] : (!llvm.ptr<[[struct_t]]>, i64) -> !llvm.ptr<[[struct_t]]> -// CHECK: %[[VAL_34:.*]] = llvm.getelementptr %[[VAL_33]]{{\[}}%[[VAL_4]]] : (!llvm.ptr<[[struct_t]]>, i64) -> !llvm.ptr<[[struct_t]]> -// CHECK: llvm.bitcast %[[VAL_34]] : !llvm.ptr<[[struct_t]]> to !llvm.ptr +// CHECK: %[[VAL_33:.*]] = llvm.getelementptr %[[VAL_32]]{{\[}}%[[VAL_21]], 1, %[[VAL_4]]] : (!llvm.ptr<[[struct_t]]>, i64, i64) -> !llvm.ptr +// CHECK: llvm.bitcast %[[VAL_33]] : !llvm.ptr to !llvm.ptr // More offset computation with descriptor strides and triplets that is not character specific ...