[Polly] Remove isConstCall.

The function was intended to catch OpenMP functions such as
get_thread_id(). If matched, the call would be considered synthesizable.

There were a few problems with this:

 * get_thread_id() is not 'const' in the sense of have the gcc manual
   defines it: "do not examine any values except their arguments".
   get_thread_id() reads OpenCL runtime libreary global state.
   What was inteded was probably 'speculable'.

 * isConstCall was implemented using mayReadOrWriteMemory(). 'const' is
   stricter than that, mayReadOrWriteMemory is e.g. true for malloc(),
   since it may only read/write addresses that are considered
   inaccessible fro the application. However, malloc is certainly not
   speculable.

 * Values that are isConstCall were not handled consistently throughout
   Polly. In particular, it was not considered for referenced values
   (OpenMP outlining and PollyACC).

Fix by removing special handling for isConstCall entirely.
This commit is contained in:
Michael Kruse 2021-09-26 03:06:19 -05:00
parent e21b0ba8c9
commit 1cea25eec9
6 changed files with 2 additions and 304 deletions

View File

@ -20,24 +20,6 @@ class SCEVConstant;
namespace polly {
class ScopDetection;
/// Check if a call is side-effect free and has only constant arguments.
///
/// Such calls can be re-generated easily, so we do not need to model them
/// as scalar dependences.
///
/// @param Call The call to check.
bool isConstCall(llvm::CallInst *Call);
/// Check if some parameters in the affine expression might hide induction
/// variables. If this is the case, we will try to delinearize the accesses
/// taking into account this information to possibly obtain a memory access
/// with more structure. Currently we assume that each parameter that
/// comes from a function call might depend on a (virtual) induction variable.
/// This covers calls to 'get_global_id' and 'get_local_id' as they commonly
/// arise in OpenCL code, while not catching any false-positives in our current
/// tests.
bool hasIVParams(const llvm::SCEV *Expr);
/// Find the loops referenced from a SCEV expression.
///
/// @param Expr The SCEV expression to scan for loops.

View File

@ -1134,7 +1134,7 @@ bool ScopDetection::isValidAccess(Instruction *Inst, const SCEV *AF,
} else if (PollyDelinearize && !IsVariantInNonAffineLoop) {
Context.Accesses[BP].push_back({Inst, AF});
if (!IsAffine || hasIVParams(AF))
if (!IsAffine)
Context.NonAffineAccesses.insert(
std::make_pair(BP, LI.getLoopFor(Inst->getParent())));
} else if (!AllowNonAffine && !IsAffine) {

View File

@ -1468,37 +1468,6 @@ const SCEV *Scop::getRepresentingInvariantLoadSCEV(const SCEV *E) const {
return SCEVSensitiveParameterRewriter::rewrite(E, *SE, InvEquivClassVMap);
}
// This table of function names is used to translate parameter names in more
// human-readable names. This makes it easier to interpret Polly analysis
// results.
StringMap<std::string> KnownNames = {
{"_Z13get_global_idj", "global_id"},
{"_Z12get_local_idj", "local_id"},
{"_Z15get_global_sizej", "global_size"},
{"_Z14get_local_sizej", "local_size"},
{"_Z12get_work_dimv", "work_dim"},
{"_Z17get_global_offsetj", "global_offset"},
{"_Z12get_group_idj", "group_id"},
{"_Z14get_num_groupsj", "num_groups"},
};
static std::string getCallParamName(CallInst *Call) {
std::string Result;
raw_string_ostream OS(Result);
std::string Name = Call->getCalledFunction()->getName().str();
auto Iterator = KnownNames.find(Name);
if (Iterator != KnownNames.end())
Name = "__" + Iterator->getValue();
OS << Name;
for (auto &Operand : Call->arg_operands()) {
ConstantInt *Op = cast<ConstantInt>(&Operand);
OS << "_" << Op->getValue();
}
OS.flush();
return Result;
}
void Scop::createParameterId(const SCEV *Parameter) {
assert(Parameters.count(Parameter));
assert(!ParameterIds.count(Parameter));
@ -1507,11 +1476,8 @@ void Scop::createParameterId(const SCEV *Parameter) {
if (const SCEVUnknown *ValueParameter = dyn_cast<SCEVUnknown>(Parameter)) {
Value *Val = ValueParameter->getValue();
CallInst *Call = dyn_cast<CallInst>(Val);
if (Call && isConstCall(Call)) {
ParameterName = getCallParamName(Call);
} else if (UseInstructionNames) {
if (UseInstructionNames) {
// If this parameter references a specific Value and this value has a name
// we use this name as it is likely to be unique and more useful than just
// a number.

View File

@ -117,17 +117,6 @@ raw_ostream &operator<<(raw_ostream &OS, class ValidatorResult &VR) {
return OS;
}
bool polly::isConstCall(llvm::CallInst *Call) {
if (Call->mayReadOrWriteMemory())
return false;
for (auto &Operand : Call->arg_operands())
if (!isa<ConstantInt>(&Operand))
return false;
return true;
}
/// Check if a SCEV is valid in a SCoP.
struct SCEVValidator
: public SCEVVisitor<SCEVValidator, class ValidatorResult> {
@ -353,18 +342,6 @@ public:
return ValidatorResult(SCEVType::PARAM, S);
}
ValidatorResult visitCallInstruction(Instruction *I, const SCEV *S) {
assert(I->getOpcode() == Instruction::Call && "Call instruction expected");
if (R->contains(I)) {
auto Call = cast<CallInst>(I);
if (!isConstCall(Call))
return ValidatorResult(SCEVType::INVALID, S);
}
return ValidatorResult(SCEVType::PARAM, S);
}
ValidatorResult visitLoadInstruction(Instruction *I, const SCEV *S) {
if (R->contains(I) && ILS) {
ILS->insert(cast<LoadInst>(I));
@ -454,8 +431,6 @@ public:
return visitSDivInstruction(I, Expr);
case Instruction::SRem:
return visitSRemInstruction(I, Expr);
case Instruction::Call:
return visitCallInstruction(I, Expr);
default:
return visitGenericInst(I, Expr);
}
@ -470,34 +445,6 @@ public:
}
};
class SCEVHasIVParams {
bool HasIVParams = false;
public:
SCEVHasIVParams() {}
bool follow(const SCEV *S) {
const SCEVUnknown *Unknown = dyn_cast<SCEVUnknown>(S);
if (!Unknown)
return true;
CallInst *Call = dyn_cast<CallInst>(Unknown->getValue());
if (!Call)
return true;
if (isConstCall(Call)) {
HasIVParams = true;
return false;
}
return true;
}
bool isDone() { return HasIVParams; }
bool hasIVParams() { return HasIVParams; }
};
/// Check whether a SCEV refers to an SSA name defined inside a region.
class SCEVInRegionDependences {
const Region *R;
@ -515,11 +462,6 @@ public:
if (auto Unknown = dyn_cast<SCEVUnknown>(S)) {
Instruction *Inst = dyn_cast<Instruction>(Unknown->getValue());
CallInst *Call = dyn_cast<CallInst>(Unknown->getValue());
if (Call && isConstCall(Call))
return false;
if (Inst) {
// When we invariant load hoist a load, we first make sure that there
// can be no dependences created by it in the Scop region. So, we should
@ -623,13 +565,6 @@ void findValues(const SCEV *Expr, ScalarEvolution &SE,
ST.visitAll(Expr);
}
bool hasIVParams(const SCEV *Expr) {
SCEVHasIVParams HasIVParams;
SCEVTraversal<SCEVHasIVParams> ST(HasIVParams);
ST.visitAll(Expr);
return HasIVParams.hasIVParams();
}
bool hasScalarDepsInsideRegion(const SCEV *Expr, const Region *R,
llvm::Loop *Scope, bool AllowLoops,
const InvariantLoadsSetTy &ILS) {

View File

@ -1,67 +0,0 @@
; RUN: opt %loadPolly -polly-scops -analyze < %s | FileCheck %s
target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128"
target triple = "x86_64-unknown-linux-gnu"
; CHECK: Context:
; CHECK-NEXT: [__global_id_0] -> { : -9223372036854775808 <= __global_id_0 <= 9223372036854775807 }
; CHECK-NEXT: Assumed Context:
; CHECK-NEXT: [__global_id_0] -> { : }
; CHECK-NEXT: Invalid Context:
; CHECK-NEXT: [__global_id_0] -> { : false }
; CHECK: p0: %__global_id_0
; CHECK-NEXT: Arrays {
; CHECK-NEXT: i64 MemRef_A[*]; // Element size 8
; CHECK-NEXT: }
; CHECK-NEXT: Arrays (Bounds as pw_affs) {
; CHECK-NEXT: i64 MemRef_A[*]; // Element size 8
; CHECK-NEXT: }
; CHECK-NEXT: Alias Groups (0):
; CHECK-NEXT: n/a
; CHECK-NEXT: Statements {
; CHECK-NEXT: Stmt_bb
; CHECK-NEXT: Domain :=
; CHECK-NEXT: [__global_id_0] -> { Stmt_bb[] };
; CHECK-NEXT: Schedule :=
; CHECK-NEXT: [__global_id_0] -> { Stmt_bb[] -> [] };
; CHECK-NEXT: MustWriteAccess := [Reduction Type: NONE] [Scalar: 0]
; CHECK-NEXT: [__global_id_0] -> { Stmt_bb[] -> MemRef_A[__global_id_0] };
; CHECK-NEXT: }
define void @globalid(i64* nocapture %A) local_unnamed_addr #0 !kernel_arg_addr_space !2 !kernel_arg_access_qual !3 !kernel_arg_type !4 !kernel_arg_base_type !4 !kernel_arg_type_qual !5 {
entry:
br label %next
next:
br i1 true, label %bb, label %exit
bb:
%__global_id_0 = tail call i64 @_Z13get_global_idj(i32 0) #2
%arrayidx = getelementptr inbounds i64, i64* %A, i64 %__global_id_0
store i64 0, i64* %arrayidx, align 8, !tbaa !6
br label %exit
exit:
ret void
}
; Function Attrs: nounwind readnone
declare i64 @_Z13get_global_idj(i32) local_unnamed_addr #1
attributes #0 = { nounwind uwtable "correctly-rounded-divide-sqrt-fp-math"="false" "disable-tail-__global_id_0s"="false" "less-precise-fpmad"="false" "frame-pointer"="all" "no-infs-fp-math"="false" "no-jump-tables"="false" "no-nans-fp-math"="false" "no-signed-zeros-fp-math"="false" "no-trapping-math"="false" "stack-protector-buffer-size"="8" "target-cpu"="x86-64" "target-features"="+fxsr,+mmx,+sse,+sse2,+x87" "unsafe-fp-math"="false" "use-soft-float"="false" }
attributes #1 = { nounwind readnone "correctly-rounded-divide-sqrt-fp-math"="false" "disable-tail-__global_id_0s"="false" "less-precise-fpmad"="false" "frame-pointer"="all" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "no-signed-zeros-fp-math"="false" "no-trapping-math"="false" "stack-protector-buffer-size"="8" "target-cpu"="x86-64" "target-features"="+fxsr,+mmx,+sse,+sse2,+x87" "unsafe-fp-math"="false" "use-soft-float"="false" }
attributes #2 = { nounwind readnone }
!llvm.module.flags = !{!0}
!llvm.ident = !{!1}
!0 = !{i32 1, !"wchar_size", i32 4}
!1 = !{!"clang version 5.0.0 (trunk 303846) (llvm/trunk 303834)"}
!2 = !{i32 1}
!3 = !{!"none"}
!4 = !{!"long*"}
!5 = !{!""}
!6 = !{!7, !7, i64 0}
!7 = !{!"long", !8, i64 0}
!8 = !{!"omnipotent char", !9, i64 0}
!9 = !{!"Simple C/C++ TBAA"}

View File

@ -1,118 +0,0 @@
; RUN: opt %loadPolly -polly-stmt-granularity=bb -polly-scops -analyze \
; RUN: -polly-detect-full-functions < %s | FileCheck %s
target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128"
; CHECK: Statements {
; CHECK-NEXT: Stmt_entry_split
; CHECK-NEXT: Domain :=
; CHECK-NEXT: [N, __global_id_0, __global_id_1] -> { Stmt_entry_split[] };
; CHECK-NEXT: Schedule :=
; CHECK-NEXT: [N, __global_id_0, __global_id_1] -> { Stmt_entry_split[] -> [0, 0] };
; CHECK-NEXT: MustWriteAccess := [Reduction Type: NONE] [Scalar: 1]
; CHECK-NEXT: [N, __global_id_0, __global_id_1] -> { Stmt_entry_split[] -> MemRef_acc_0_lcssa__phi[] };
; CHECK-NEXT: Stmt_for_inc_lr_ph
; CHECK-NEXT: Domain :=
; CHECK-NEXT: [N, __global_id_0, __global_id_1] -> { Stmt_for_inc_lr_ph[] : N > 0 };
; CHECK-NEXT: Schedule :=
; CHECK-NEXT: [N, __global_id_0, __global_id_1] -> { Stmt_for_inc_lr_ph[] -> [1, 0] };
; CHECK-NEXT: MustWriteAccess := [Reduction Type: NONE] [Scalar: 1]
; CHECK-NEXT: [N, __global_id_0, __global_id_1] -> { Stmt_for_inc_lr_ph[] -> MemRef_acc_03__phi[] };
; CHECK-NEXT: Stmt_for_inc
; CHECK-NEXT: Domain :=
; CHECK-NEXT: [N, __global_id_0, __global_id_1] -> { Stmt_for_inc[i0] : 0 <= i0 < N };
; CHECK-NEXT: Schedule :=
; CHECK-NEXT: [N, __global_id_0, __global_id_1] -> { Stmt_for_inc[i0] -> [2, i0] };
; CHECK-NEXT: MustWriteAccess := [Reduction Type: NONE] [Scalar: 1]
; CHECK-NEXT: [N, __global_id_0, __global_id_1] -> { Stmt_for_inc[i0] -> MemRef_acc_03__phi[] };
; CHECK-NEXT: ReadAccess := [Reduction Type: NONE] [Scalar: 1]
; CHECK-NEXT: [N, __global_id_0, __global_id_1] -> { Stmt_for_inc[i0] -> MemRef_acc_03__phi[] };
; CHECK-NEXT: ReadAccess := [Reduction Type: NONE] [Scalar: 0]
; CHECK-NEXT: [N, __global_id_0, __global_id_1] -> { Stmt_for_inc[i0] -> MemRef_A[__global_id_0, i0] };
; CHECK-NEXT: ReadAccess := [Reduction Type: NONE] [Scalar: 0]
; CHECK-NEXT: [N, __global_id_0, __global_id_1] -> { Stmt_for_inc[i0] -> MemRef_B[i0, __global_id_1] };
; CHECK-NEXT: MustWriteAccess := [Reduction Type: NONE] [Scalar: 1]
; CHECK-NEXT: [N, __global_id_0, __global_id_1] -> { Stmt_for_inc[i0] -> MemRef__lcssa__phi[] };
; CHECK-NEXT: Stmt_for_cond_for_end_crit_edge
; CHECK-NEXT: Domain :=
; CHECK-NEXT: [N, __global_id_0, __global_id_1] -> { Stmt_for_cond_for_end_crit_edge[] : N > 0 };
; CHECK-NEXT: Schedule :=
; CHECK-NEXT: [N, __global_id_0, __global_id_1] -> { Stmt_for_cond_for_end_crit_edge[] -> [3, 0] };
; CHECK-NEXT: ReadAccess := [Reduction Type: NONE] [Scalar: 1]
; CHECK-NEXT: [N, __global_id_0, __global_id_1] -> { Stmt_for_cond_for_end_crit_edge[] -> MemRef__lcssa__phi[] };
; CHECK-NEXT: MustWriteAccess := [Reduction Type: NONE] [Scalar: 1]
; CHECK-NEXT: [N, __global_id_0, __global_id_1] -> { Stmt_for_cond_for_end_crit_edge[] -> MemRef_acc_0_lcssa__phi[] };
; CHECK-NEXT: Stmt_for_end
; CHECK-NEXT: Domain :=
; CHECK-NEXT: [N, __global_id_0, __global_id_1] -> { Stmt_for_end[] };
; CHECK-NEXT: Schedule :=
; CHECK-NEXT: [N, __global_id_0, __global_id_1] -> { Stmt_for_end[] -> [4, 0] };
; CHECK-NEXT: ReadAccess := [Reduction Type: NONE] [Scalar: 1]
; CHECK-NEXT: [N, __global_id_0, __global_id_1] -> { Stmt_for_end[] -> MemRef_acc_0_lcssa__phi[] };
; CHECK-NEXT: MustWriteAccess := [Reduction Type: NONE] [Scalar: 0]
; CHECK-NEXT: [N, __global_id_0, __global_id_1] -> { Stmt_for_end[] -> MemRef_C[__global_id_0, __global_id_1] };
; CHECK-NEXT: }
; Function Attrs: noinline nounwind uwtable
define void @mat_mul(float* %C, float* %A, float* %B, i64 %N) #0 !kernel_arg_addr_space !2 !kernel_arg_access_qual !3 !kernel_arg_type !4 !kernel_arg_base_type !4 !kernel_arg_type_qual !5 {
entry:
br label %entry.split
entry.split: ; preds = %entry
%call = tail call i64 @_Z13get_global_idj(i32 0) #3
%call1 = tail call i64 @_Z13get_global_idj(i32 1) #3
%cmp1 = icmp sgt i64 %N, 0
%mul = mul nsw i64 %call, %N
br i1 %cmp1, label %for.inc.lr.ph, label %for.end
for.inc.lr.ph: ; preds = %entry.split
br label %for.inc
for.inc: ; preds = %for.inc.lr.ph, %for.inc
%acc.03 = phi float [ 0.000000e+00, %for.inc.lr.ph ], [ %tmp6, %for.inc ]
%m.02 = phi i64 [ 0, %for.inc.lr.ph ], [ %inc, %for.inc ]
%add = add nsw i64 %m.02, %mul
%arrayidx = getelementptr inbounds float, float* %A, i64 %add
%tmp = load float, float* %arrayidx, align 4
%mul2 = mul nsw i64 %m.02, %N
%add3 = add nsw i64 %mul2, %call1
%arrayidx4 = getelementptr inbounds float, float* %B, i64 %add3
%tmp5 = load float, float* %arrayidx4, align 4
%tmp6 = tail call float @llvm.fmuladd.f32(float %tmp, float %tmp5, float %acc.03)
%inc = add nuw nsw i64 %m.02, 1
%exitcond = icmp ne i64 %inc, %N
br i1 %exitcond, label %for.inc, label %for.cond.for.end_crit_edge
for.cond.for.end_crit_edge: ; preds = %for.inc
%.lcssa = phi float [ %tmp6, %for.inc ]
br label %for.end
for.end: ; preds = %for.cond.for.end_crit_edge, %entry.split
%acc.0.lcssa = phi float [ %.lcssa, %for.cond.for.end_crit_edge ], [ 0.000000e+00, %entry.split ]
%add7 = add nsw i64 %mul, %call1
%arrayidx8 = getelementptr inbounds float, float* %C, i64 %add7
store float %acc.0.lcssa, float* %arrayidx8, align 4
ret void
}
; Function Attrs: nounwind readnone
declare i64 @_Z13get_global_idj(i32) #1
; Function Attrs: nounwind readnone speculatable
declare float @llvm.fmuladd.f32(float, float, float) #2
attributes #0 = { noinline nounwind uwtable "correctly-rounded-divide-sqrt-fp-math"="false" "disable-tail-calls"="false" "less-precise-fpmad"="false" "frame-pointer"="all" "no-infs-fp-math"="false" "no-jump-tables"="false" "no-nans-fp-math"="false" "no-signed-zeros-fp-math"="false" "no-trapping-math"="false" "stack-protector-buffer-size"="8" "target-cpu"="x86-64" "target-features"="+fxsr,+mmx,+sse,+sse2,+x87" "unsafe-fp-math"="false" "use-soft-float"="false" }
attributes #1 = { nounwind readnone "correctly-rounded-divide-sqrt-fp-math"="false" "disable-tail-calls"="false" "less-precise-fpmad"="false" "frame-pointer"="all" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "no-signed-zeros-fp-math"="false" "no-trapping-math"="false" "stack-protector-buffer-size"="8" "target-cpu"="x86-64" "target-features"="+fxsr,+mmx,+sse,+sse2,+x87" "unsafe-fp-math"="false" "use-soft-float"="false" }
attributes #2 = { nounwind readnone speculatable }
attributes #3 = { nounwind readnone }
!llvm.module.flags = !{!0}
!llvm.ident = !{!1}
!0 = !{i32 1, !"wchar_size", i32 4}
!1 = !{!"clang version 5.0.0 (trunk 303846) (llvm/trunk 303834)"}
!2 = !{i32 1, i32 1, i32 1, i32 0}
!3 = !{!"none", !"none", !"none", !"none"}
!4 = !{!"float*", !"float*", !"float*", !"long"}
!5 = !{!"", !"", !"", !""}