[Fortran Support] Materialize outermost dimension for Fortran array.

- We use the outermost dimension of arrays since we need this
information to generate GPU transfers.

- In general, if we do not know the outermost dimension of the array
(because the indexing expression is non-affine, for example) then we
simply cannot generate transfer code.

- However, for Fortran arrays, we can use the Fortran array
representation which stores the dimensions of all arrays.

- This patch uses the Fortran array representation to generate code that
computes the outermost dimension size.

Differential Revision: https://reviews.llvm.org/D32967

llvm-svn: 303429
This commit is contained in:
Siddharth Bhat 2017-05-19 15:07:45 +00:00
parent ce941c9c38
commit b7f68b8c9e
7 changed files with 310 additions and 6 deletions

View File

@ -75,6 +75,13 @@ public:
void addParameters(__isl_take isl_set *Context);
/// Create Values which hold the sizes of the outermost dimension of all
/// Fortran arrays in the current scop.
///
/// @returns False, if a problem occurred and a Fortran array was not
/// materialized. True otherwise.
bool materializeFortranArrayOutermostDimension();
/// Generate code that evaluates @p Condition at run-time.
///
/// This function is typically called to generate the LLVM-IR for the

View File

@ -264,6 +264,12 @@ public:
/// with old sizes
bool updateSizes(ArrayRef<const SCEV *> Sizes, bool CheckConsistency = true);
/// Make the ScopArrayInfo model a Fortran array.
/// It receives the Fortran array descriptor and stores this.
/// It also adds a piecewise expression for the outermost dimension
/// since this information is available for Fortran arrays at runtime.
void applyAndSetFAD(Value *FAD);
/// Destructor to free the isl id of the base pointer.
~ScopArrayInfo();
@ -420,6 +426,10 @@ private:
/// The scop this SAI object belongs to.
Scop &S;
/// If this array models a Fortran array, then this points
/// to the Fortran array descriptor.
Value *FAD;
};
/// Represent memory accesses in statements.
@ -891,6 +901,10 @@ public:
/// the dimension of the innermost loop containing the statement.
__isl_give isl_set *getStride(__isl_take const isl_map *Schedule) const;
/// Get the FortranArrayDescriptor corresponding to this memory access if
/// it exists, and nullptr otherwise.
Value *getFortranArrayDescriptor() const { return this->FAD; };
/// Is the stride of the access equal to a certain width? Schedule is a map
/// from the statement to a schedule where the innermost dimension is the
/// dimension of the innermost loop containing the statement.
@ -2063,6 +2077,9 @@ private:
/// all memory accesses have been modeled and canonicalized.
void assumeNoOutOfBounds();
/// Mark arrays that have memory accesses with FortranArrayDescriptor.
void markFortranArrays();
/// Finalize all access relations.
///
/// When building up access relations, temporary access relations that

View File

@ -247,7 +247,8 @@ ScopArrayInfo::ScopArrayInfo(Value *BasePtr, Type *ElementType, isl_ctx *Ctx,
ArrayRef<const SCEV *> Sizes, MemoryKind Kind,
const DataLayout &DL, Scop *S,
const char *BaseName)
: BasePtr(BasePtr), ElementType(ElementType), Kind(Kind), DL(DL), S(*S) {
: BasePtr(BasePtr), ElementType(ElementType), Kind(Kind), DL(DL), S(*S),
FAD(nullptr) {
std::string BasePtrName =
BaseName ? BaseName
: getIslCompatibleName("MemRef", BasePtr, S->getNextArrayIdx(),
@ -318,6 +319,37 @@ void ScopArrayInfo::updateElementType(Type *NewElementType) {
}
}
/// Make the ScopArrayInfo model a Fortran Array
void ScopArrayInfo::applyAndSetFAD(Value *FAD) {
assert(FAD && "got invalid Fortran array descriptor");
if (this->FAD) {
assert(this->FAD == FAD &&
"receiving different array descriptors for same array");
return;
}
assert(DimensionSizesPw.size() > 0 && !DimensionSizesPw[0]);
assert(!this->FAD);
this->FAD = FAD;
isl_space *Space = isl_space_set_alloc(S.getIslCtx(), 1, 0);
std::string param_name = getName();
param_name += "_fortranarr_size";
// TODO: see if we need to add `this` as the id user pointer
isl_id *IdPwAff = isl_id_alloc(S.getIslCtx(), param_name.c_str(), nullptr);
Space = isl_space_set_dim_id(Space, isl_dim_param, 0, IdPwAff);
isl_basic_set *Identity = isl_basic_set_universe(Space);
isl_local_space *LocalSpace = isl_basic_set_get_local_space(Identity);
isl_basic_set_free(Identity);
isl_pw_aff *PwAff =
isl_pw_aff_from_aff(isl_aff_var_on_domain(LocalSpace, isl_dim_param, 0));
DimensionSizesPw[0] = PwAff;
}
bool ScopArrayInfo::updateSizes(ArrayRef<const SCEV *> NewSizes,
bool CheckConsistency) {
int SharedDims = std::min(NewSizes.size(), DimensionSizes.size());
@ -374,7 +406,12 @@ void ScopArrayInfo::dump() const { print(errs()); }
void ScopArrayInfo::print(raw_ostream &OS, bool SizeAsPwAff) const {
OS.indent(8) << *getElementType() << " " << getName();
unsigned u = 0;
if (getNumberOfDimensions() > 0 && !getDimensionSize(0)) {
// If this is a Fortran array, then we can print the outermost dimension
// as a isl_pw_aff even though there is no SCEV information.
bool IsOutermostSizeKnown = SizeAsPwAff && FAD;
if (!IsOutermostSizeKnown && getNumberOfDimensions() > 0 &&
!getDimensionSize(0)) {
OS << "[*]";
u++;
}
@ -2175,6 +2212,46 @@ void Scop::addParameterBounds() {
}
}
// We use the outermost dimension to generate GPU transfers for Fortran arrays
// even when the array bounds are not known statically. To do so, we need the
// outermost dimension information. We add this into the context so that the
// outermost dimension is available during codegen.
// We currently do not care about dimensions other than the outermost
// dimension since it doesn't affect transfers.
static isl_set *addFortranArrayOutermostDimParams(__isl_give isl_set *Context,
Scop::array_range Arrays) {
std::vector<isl_id *> OutermostSizeIds;
for (auto Array : Arrays) {
// To check if an array is a Fortran array, we check if it has a isl_pw_aff
// for its outermost dimension. Fortran arrays will have this since the
// outermost dimension size can be picked up from their runtime description.
// TODO: actually need to check if it has a FAD, but for now this works.
if (Array->getNumberOfDimensions() > 0) {
isl_pw_aff *PwAff = Array->getDimensionSizePw(0);
if (!PwAff)
continue;
isl_id *Id = isl_pw_aff_get_dim_id(PwAff, isl_dim_param, 0);
isl_pw_aff_free(PwAff);
assert(Id && "Invalid Id for PwAff expression in Fortran array");
OutermostSizeIds.push_back(Id);
}
}
const int NumTrueParams = isl_set_dim(Context, isl_dim_param);
Context = isl_set_add_dims(Context, isl_dim_param, OutermostSizeIds.size());
for (size_t i = 0; i < OutermostSizeIds.size(); i++) {
Context = isl_set_set_dim_id(Context, isl_dim_param, NumTrueParams + i,
OutermostSizeIds[i]);
Context =
isl_set_lower_bound_si(Context, isl_dim_param, NumTrueParams + i, 0);
}
return Context;
}
void Scop::realignParams() {
if (PollyIgnoreParamBounds)
return;
@ -2191,12 +2268,15 @@ void Scop::realignParams() {
// Align the parameters of all data structures to the model.
Context = isl_set_align_params(Context, Space);
// Add the outermost dimension of the Fortran arrays into the Context.
// See the description of the function for more information.
Context = addFortranArrayOutermostDimParams(Context, arrays());
// As all parameters are known add bounds to them.
addParameterBounds();
for (ScopStmt &Stmt : *this)
Stmt.realignParams();
// Simplify the schedule according to the context too.
Schedule = isl_schedule_gist_domain_params(Schedule, getContext());
}
@ -3442,11 +3522,29 @@ void Scop::foldSizeConstantsToRight() {
return;
}
void Scop::markFortranArrays() {
for (ScopStmt &Stmt : Stmts) {
for (MemoryAccess *MemAcc : Stmt) {
Value *FAD = MemAcc->getFortranArrayDescriptor();
if (!FAD)
continue;
// TODO: const_cast-ing to edit
ScopArrayInfo *SAI =
const_cast<ScopArrayInfo *>(MemAcc->getLatestScopArrayInfo());
assert(SAI && "memory access into a Fortran array does not "
"have an associated ScopArrayInfo");
SAI->applyAndSetFAD(FAD);
}
}
}
void Scop::finalizeAccesses() {
updateAccessDimensionality();
foldSizeConstantsToRight();
foldAccessRelations();
assumeNoOutOfBounds();
markFortranArrays();
}
Scop::~Scop() {

View File

@ -995,6 +995,92 @@ bool IslNodeBuilder::materializeParameters() {
return true;
}
/// Generate the computation of the size of the outermost dimension from the
/// Fortran array descriptor (in this case, `@g_arr`). The final `%size`
/// contains the size of the array.
///
/// %arrty = type { i8*, i64, i64, [3 x %desc.dimensionty] }
/// %desc.dimensionty = type { i64, i64, i64 }
/// @g_arr = global %arrty zeroinitializer, align 32
/// ...
/// %0 = load i64, i64* getelementptr inbounds
/// (%arrty, %arrty* @g_arr, i64 0, i32 3, i64 0, i32 2)
/// %1 = load i64, i64* getelementptr inbounds
/// (%arrty, %arrty* @g_arr, i64 0, i32 3, i64 0, i32 1)
/// %2 = sub nsw i64 %0, %1
/// %size = add nsw i64 %2, 1
static Value *buildFADOutermostDimensionLoad(Value *GlobalDescriptor,
PollyIRBuilder &Builder,
std::string ArrayName) {
assert(GlobalDescriptor && "invalid global descriptor given");
Value *endIdx[4] = {Builder.getInt64(0), Builder.getInt32(3),
Builder.getInt64(0), Builder.getInt32(2)};
Value *endPtr = Builder.CreateInBoundsGEP(GlobalDescriptor, endIdx,
ArrayName + "_end_ptr");
Value *end = Builder.CreateLoad(endPtr, ArrayName + "_end");
Value *beginIdx[4] = {Builder.getInt64(0), Builder.getInt32(3),
Builder.getInt64(0), Builder.getInt32(1)};
Value *beginPtr = Builder.CreateInBoundsGEP(GlobalDescriptor, beginIdx,
ArrayName + "_begin_ptr");
Value *begin = Builder.CreateLoad(beginPtr, ArrayName + "_begin");
Value *size =
Builder.CreateNSWSub(end, begin, ArrayName + "_end_begin_delta");
Type *endType = dyn_cast<IntegerType>(end->getType());
assert(endType && "expected type of end to be integral");
size = Builder.CreateNSWAdd(end,
ConstantInt::get(endType, 1, /* signed = */ true),
ArrayName + "_size");
return size;
}
bool IslNodeBuilder::materializeFortranArrayOutermostDimension() {
for (const ScopStmt &Stmt : S) {
for (const MemoryAccess *Access : Stmt) {
if (!Access->isArrayKind())
continue;
const ScopArrayInfo *Array = Access->getScopArrayInfo();
if (!Array)
continue;
if (Array->getNumberOfDimensions() == 0)
continue;
Value *FAD = Access->getFortranArrayDescriptor();
if (!FAD)
continue;
isl_pw_aff *ParametricPwAff = Array->getDimensionSizePw(0);
assert(ParametricPwAff && "parameteric pw_aff corresponding "
"to outermost dimension does not "
"exist");
isl_id *Id = isl_pw_aff_get_dim_id(ParametricPwAff, isl_dim_param, 0);
isl_pw_aff_free(ParametricPwAff);
assert(Id && "pw_aff is not parametric");
if (IDToValue.count(Id)) {
isl_id_free(Id);
continue;
}
Value *FinalValue =
buildFADOutermostDimensionLoad(FAD, Builder, Array->getName());
assert(FinalValue && "unable to build Fortran array "
"descriptor load of outermost dimension");
IDToValue[Id] = FinalValue;
isl_id_free(Id);
}
}
return true;
}
/// Add the number of dimensions in @p BS to @p U.
static isl_stat countTotalDims(__isl_take isl_basic_set *BS, void *U) {
unsigned *NumTotalDim = static_cast<unsigned *>(U);
@ -1313,6 +1399,12 @@ void IslNodeBuilder::addParameters(__isl_take isl_set *Context) {
// Materialize values for the parameters of the SCoP.
materializeParameters();
// materialize the outermost dimension parameters for a Fortran array.
// NOTE: materializeParameters() does not work since it looks through
// the SCEVs. We don't have a corresponding SCEV for the array size
// parameter
materializeFortranArrayOutermostDimension();
// Generate values for the current loop iteration for all surrounding loops.
//
// We may also reference loops outside of the scop which do not contain the

View File

@ -2163,9 +2163,17 @@ public:
for (unsigned i = 1; i < NumDims; ++i)
Extent = isl_set_lower_bound_si(Extent, isl_dim_set, i, 0);
for (unsigned i = 1; i < NumDims; ++i) {
for (unsigned i = 0; i < NumDims; ++i) {
isl_pw_aff *PwAff =
const_cast<isl_pw_aff *>(Array->getDimensionSizePw(i));
// isl_pw_aff can be NULL for zero dimension. Only in the case of a
// Fortran array will we have a legitimate dimension.
if (!PwAff) {
assert(i == 0 && "invalid dimension isl_pw_aff for nonzero dimension");
continue;
}
isl_pw_aff *Val = isl_pw_aff_from_aff(isl_aff_var_on_domain(
isl_local_space_from_space(Array->getSpace()), isl_dim_set, i));
PwAff = isl_pw_aff_add_dims(PwAff, isl_dim_in,

View File

@ -0,0 +1,82 @@
; Check that the runtime size computation is generated for Fortran arrays.
; PPCG code generation backend:
; RUN: opt %loadPolly -S -polly-detect-fortran-arrays \
; RUN: -polly-target=gpu -polly-acc-mincompute=0 \
; RUN: -polly-codegen-ppcg < %s | FileCheck %s
; Regular code generation backend:
; RUN: opt %loadPolly -S -polly-detect-fortran-arrays \
; RUN: -polly-codegen < %s | FileCheck %s
; What the input fortran code should look like. NOTE: this is fake, the
; .ll file was hand-written.
;
; MODULE testmod
; USE data_parameters, ONLY : &
; IMPLICIT NONE
;
; INTEGER (KIND=iintegers), ALLOCATABLE, PRIVATE :: &
; arrin(:), arrout(:)
; CONTAINS
;
; SUBROUTINE test()
; INTEGER (KIND=iintegers) :: i
;
; DO i = 1, 100
; arrout(i) = arrin(i) * arrin(i)
; END DO
; END SUBROUTINE test
; END MODULE testmod
target datalayout = "e-p:64:64:64-S128-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i32:64:64-f16:16:16-f32:32:32-f64:64:64-f128:128:128-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64"
target triple = "x86_64-unknown-linux-gnu"
module asm "\09.ident\09\22GCC: (GNU) 4.6.4 LLVM: 3.3.1\22"
%"struct.array1_real(kind=8)" = type { i8*, i32, i32, [1 x %struct.descriptor_dimension] }
%struct.descriptor_dimension = type { i32, i32, i32 }
@arrin = unnamed_addr global %"struct.array1_real(kind=8)" zeroinitializer, align 32
@arrout = unnamed_addr global %"struct.array1_real(kind=8)" zeroinitializer, align 32
; Function Attrs: nounwind uwtable
define void @__src_soil_MOD_terra1() unnamed_addr #0 {
entry:
br label %entry.split
entry.split: ; preds = %entry
%rawmemin1 = load i32*, i32** bitcast (%"struct.array1_real(kind=8)"* @arrin to i32**), align 32, !tbaa !0
%rawmemout2 = load i32*, i32** bitcast (%"struct.array1_real(kind=8)"* @arrout to i32**), align 32, !tbaa !0
br label %for.body
for.body: ; preds = %entry.split, %for.body
%indvars.iv = phi i64 [ 1, %entry.split ], [ %indvars.iv.next4, %for.body ]
%inslot = getelementptr inbounds i32, i32* %rawmemin1, i64 %indvars.iv
%inval = load i32, i32* %inslot, align 8
%outslot = getelementptr inbounds i32, i32* %rawmemout2, i64 %indvars.iv
%out = mul nsw i32 %inval, %inval
store i32 %out, i32* %outslot, align 8
%indvars.iv.next4 = add nuw nsw i64 %indvars.iv, 1
%exitcond = icmp eq i64 %indvars.iv.next4, 100
br i1 %exitcond, label %return, label %for.body
return: ; preds = %for.body
ret void
}
attributes #0 = { nounwind uwtable }
!0 = !{!1, !1, i32 0}
!1 = !{!"alias set 3: void*", !2}
!2 = distinct !{!2}
; CHECK: %MemRef_rawmemin1_end = load i32, i32* getelementptr inbounds (%"struct.array1_real(kind=8)", %"struct.array1_real(kind=8)"* @arrin, i64 0, i32 3, i64 0, i32 2)
; CHECK-NEXT: %MemRef_rawmemin1_begin = load i32, i32* getelementptr inbounds (%"struct.array1_real(kind=8)", %"struct.array1_real(kind=8)"* @arrin, i64 0, i32 3, i64 0, i32 1)
; CHECK-NEXT: %MemRef_rawmemin1_end_begin_delta = sub nsw i32 %MemRef_rawmemin1_end, %MemRef_rawmemin1_begin
; CHECK-NEXT: %MemRef_rawmemin1_size = add nsw i32 %MemRef_rawmemin1_end, 1
; CHECK-NEXT: %MemRef_rawmemout2_end = load i32, i32* getelementptr inbounds (%"struct.array1_real(kind=8)", %"struct.array1_real(kind=8)"* @arrout, i64 0, i32 3, i64 0, i32 2)
; CHECK-NEXT: %MemRef_rawmemout2_begin = load i32, i32* getelementptr inbounds (%"struct.array1_real(kind=8)", %"struct.array1_real(kind=8)"* @arrout, i64 0, i32 3, i64 0, i32 1)
; CHECK-NEXT: %MemRef_rawmemout2_end_begin_delta = sub nsw i32 %MemRef_rawmemout2_end, %MemRef_rawmemout2_begin
; CHECK-NEXT: %MemRef_rawmemout2_size = add nsw i32 %MemRef_rawmemout2_end, 1

View File

@ -88,6 +88,6 @@ return: ; preds = %return.loopexit, %e
}
; CHECK: ReadAccess := [Reduction Type: NONE] [Fortran array descriptor: xs] [Scalar: 0]
; CHECK-NEXT: [p_0_loaded_from_n] -> { Stmt_9[i0] -> MemRef0[o0] };
; CHECK-NEXT: [p_0_loaded_from_n, MemRef0_fortranarr_size, MemRef1_fortranarr_size] -> { Stmt_9[i0] -> MemRef0[o0] };
; CHECK-NEXT: MayWriteAccess := [Reduction Type: NONE] [Fortran array descriptor: ys] [Scalar: 0]
; CHECK-NEXT: [p_0_loaded_from_n] -> { Stmt_9[i0] -> MemRef1[o0] };
; CHECK-NEXT: [p_0_loaded_from_n, MemRef0_fortranarr_size, MemRef1_fortranarr_size] -> { Stmt_9[i0] -> MemRef1[o0] };