forked from OSchip/llvm-project
[ArgPromotion] Make a non-byval promotion attempt first
It makes sense to make a non-byval promotion attempt first and then fall back to the byval one. The non-byval ('usual') promotion is generally better, for example it does promotion even when a structure has more elements than 'MaxElements' but not all of them are actually used in the function. Differential Revision: https://reviews.llvm.org/D124514
This commit is contained in:
parent
ee51e9795a
commit
098afdb0a0
|
@ -730,7 +730,7 @@ static bool canPaddingBeAccessed(Argument *Arg) {
|
|||
Value *V = WorkList.pop_back_val();
|
||||
if (isa<GetElementPtrInst>(V) || isa<PHINode>(V)) {
|
||||
if (PtrValues.insert(V).second)
|
||||
llvm::append_range(WorkList, V->users());
|
||||
append_range(WorkList, V->users());
|
||||
} else if (StoreInst *Store = dyn_cast<StoreInst>(V)) {
|
||||
Stores.push_back(Store);
|
||||
} else if (!isa<LoadInst>(V)) {
|
||||
|
@ -848,9 +848,23 @@ promoteArguments(Function *F, function_ref<AAResults &(Function &F)> AARGetter,
|
|||
}
|
||||
}
|
||||
|
||||
// If this is a byval argument, and if the aggregate type is small, just
|
||||
// pass the elements, which is always safe, if the passed value is densely
|
||||
// packed or if we can prove the padding bytes are never accessed.
|
||||
// If we can promote the pointer to its value.
|
||||
SmallVector<OffsetAndArgPart, 4> ArgParts;
|
||||
if (findArgParts(PtrArg, DL, AAR, MaxElements, IsRecursive, ArgParts)) {
|
||||
SmallVector<Type *, 4> Types;
|
||||
for (const auto &Pair : ArgParts)
|
||||
Types.push_back(Pair.second.Ty);
|
||||
|
||||
if (areTypesABICompatible(Types, *F, TTI)) {
|
||||
ArgsToPromote.insert({PtrArg, std::move(ArgParts)});
|
||||
continue;
|
||||
}
|
||||
}
|
||||
|
||||
// Otherwise, if this is a byval argument, and if the aggregate type is
|
||||
// small, just pass the elements, which is always safe, if the passed value
|
||||
// is densely packed or if we can prove the padding bytes are never
|
||||
// accessed.
|
||||
//
|
||||
// Only handle arguments with specified alignment; if it's unspecified, the
|
||||
// actual alignment of the argument is target-specific.
|
||||
|
@ -859,43 +873,32 @@ promoteArguments(Function *F, function_ref<AAResults &(Function &F)> AARGetter,
|
|||
ByValTy && PtrArg->getParamAlign() &&
|
||||
(ArgumentPromotionPass::isDenselyPacked(ByValTy, DL) ||
|
||||
!canPaddingBeAccessed(PtrArg));
|
||||
if (IsSafeToPromote) {
|
||||
if (StructType *STy = dyn_cast<StructType>(ByValTy)) {
|
||||
if (MaxElements > 0 && STy->getNumElements() > MaxElements) {
|
||||
LLVM_DEBUG(dbgs() << "ArgPromotion disables promoting argument '"
|
||||
<< PtrArg->getName()
|
||||
<< "' because it would require adding more"
|
||||
<< " than " << MaxElements
|
||||
<< " arguments to the function.\n");
|
||||
continue;
|
||||
}
|
||||
|
||||
SmallVector<Type *, 4> Types;
|
||||
append_range(Types, STy->elements());
|
||||
|
||||
// If all the elements are single-value types, we can promote it.
|
||||
bool AllSimple =
|
||||
all_of(Types, [](Type *Ty) { return Ty->isSingleValueType(); });
|
||||
|
||||
// Safe to transform, don't even bother trying to "promote" it.
|
||||
// Passing the elements as a scalar will allow sroa to hack on
|
||||
// the new alloca we introduce.
|
||||
if (AllSimple && areTypesABICompatible(Types, *F, TTI)) {
|
||||
ByValArgsToTransform.insert(PtrArg);
|
||||
continue;
|
||||
}
|
||||
}
|
||||
if (!IsSafeToPromote) {
|
||||
LLVM_DEBUG(dbgs() << "ArgPromotion disables passing the elements of"
|
||||
<< " the argument '" << PtrArg->getName()
|
||||
<< "' because it is not safe.\n");
|
||||
continue;
|
||||
}
|
||||
|
||||
// Otherwise, see if we can promote the pointer to its value.
|
||||
SmallVector<OffsetAndArgPart, 4> ArgParts;
|
||||
if (findArgParts(PtrArg, DL, AAR, MaxElements, IsRecursive, ArgParts)) {
|
||||
if (StructType *STy = dyn_cast<StructType>(ByValTy)) {
|
||||
if (MaxElements > 0 && STy->getNumElements() > MaxElements) {
|
||||
LLVM_DEBUG(dbgs() << "ArgPromotion disables passing the elements of"
|
||||
<< " the argument '" << PtrArg->getName()
|
||||
<< "' because it would require adding more"
|
||||
<< " than " << MaxElements
|
||||
<< " arguments to the function.\n");
|
||||
continue;
|
||||
}
|
||||
SmallVector<Type *, 4> Types;
|
||||
for (const auto &Pair : ArgParts)
|
||||
Types.push_back(Pair.second.Ty);
|
||||
append_range(Types, STy->elements());
|
||||
|
||||
if (areTypesABICompatible(Types, *F, TTI))
|
||||
ArgsToPromote.insert({PtrArg, std::move(ArgParts)});
|
||||
// If all the elements are single-value types, we can promote it.
|
||||
bool AllSimple =
|
||||
all_of(Types, [](Type *Ty) { return Ty->isSingleValueType(); });
|
||||
|
||||
// Safe to transform. Passing the elements as a scalar will allow sroa to
|
||||
// hack on the new alloca we introduce.
|
||||
if (AllSimple && areTypesABICompatible(Types, *F, TTI))
|
||||
ByValArgsToTransform.insert(PtrArg);
|
||||
}
|
||||
}
|
||||
|
||||
|
|
|
@ -0,0 +1,45 @@
|
|||
; RUN: opt -passes=argpromotion -S %s | FileCheck %s
|
||||
|
||||
%struct.A = type { float, [12 x i8], i64, [8 x i8] }
|
||||
|
||||
define internal float @callee(%struct.A* byval(%struct.A) align 32 %0) {
|
||||
; CHECK-LABEL: define {{[^@]+}}@callee
|
||||
; CHECK-SAME: (float [[ARG_0:%.*]], i64 [[ARG_1:%.*]]) {
|
||||
; CHECK-NEXT: [[SUM:%.*]] = fadd float 0.000000e+00, [[ARG_0]]
|
||||
; CHECK-NEXT: [[COEFF:%.*]] = uitofp i64 [[ARG_1]] to float
|
||||
; CHECK-NEXT: [[RES:%.*]] = fmul float [[SUM]], [[COEFF]]
|
||||
; CHECK-NEXT: ret float [[RES]]
|
||||
;
|
||||
%2 = getelementptr inbounds %struct.A, %struct.A* %0, i32 0, i32 0
|
||||
%3 = load float, float* %2, align 32
|
||||
%4 = fadd float 0.000000e+00, %3
|
||||
%5 = getelementptr inbounds %struct.A, %struct.A* %0, i32 0, i32 2
|
||||
%6 = load i64, i64* %5, align 16
|
||||
%7 = uitofp i64 %6 to float
|
||||
%8 = fmul float %4, %7
|
||||
ret float %8
|
||||
}
|
||||
|
||||
define float @caller(float %0) {
|
||||
; CHECK-LABEL: define {{[^@]+}}@caller
|
||||
; CHECK-SAME: (float [[ARG_0:%.*]]) {
|
||||
; CHECK-NEXT: [[TMP_0:%.*]] = alloca %struct.A, align 32
|
||||
; CHECK-NEXT: [[FL_PTR_0:%.*]] = getelementptr inbounds %struct.A, %struct.A* [[TMP_0]], i32 0, i32 0
|
||||
; CHECK-NEXT: store float [[ARG_0]], float* [[FL_PTR_0]], align 32
|
||||
; CHECK-NEXT: [[I64_PTR_0:%.*]] = getelementptr inbounds %struct.A, %struct.A* [[TMP_0]], i32 0, i32 2
|
||||
; CHECK-NEXT: store i64 2, i64* [[I64_PTR_0]], align 16
|
||||
; CHECK-NEXT: [[FL_PTR_1:%.*]] = getelementptr %struct.A, %struct.A* [[TMP_0]], i64 0, i32 0
|
||||
; CHECK-NEXT: [[FL_VAL:%.*]] = load float, float* [[FL_PTR_1]], align 32
|
||||
; CHECK-NEXT: [[I64_PTR_1:%.*]] = getelementptr %struct.A, %struct.A* [[TMP_0]], i64 0, i32 2
|
||||
; CHECK-NEXT: [[I64_VAL:%.*]] = load i64, i64* [[I64_PTR_1]], align 16
|
||||
; CHECK-NEXT: [[RES:%.*]] = call noundef float @callee(float [[FL_VAL]], i64 [[I64_VAL]])
|
||||
; CHECK-NEXT: ret float [[RES]]
|
||||
;
|
||||
%2 = alloca %struct.A, align 32
|
||||
%3 = getelementptr inbounds %struct.A, %struct.A* %2, i32 0, i32 0
|
||||
store float %0, float* %3, align 32
|
||||
%4 = getelementptr inbounds %struct.A, %struct.A* %2, i32 0, i32 2
|
||||
store i64 2, i64* %4, align 16
|
||||
%5 = call noundef float @callee(%struct.A* byval(%struct.A) align 32 %2)
|
||||
ret float %5
|
||||
}
|
|
@ -21,12 +21,18 @@ define internal void @test_byval(%struct.pair* byval(%struct.pair) align 4 %P) {
|
|||
; CHECK-LABEL: define {{[^@]+}}@test_byval
|
||||
; CHECK-SAME: (i32 [[P_0:%.*]], i32 [[P_1:%.*]]) {
|
||||
; CHECK-NEXT: [[P:%.*]] = alloca [[STRUCT_PAIR:%.*]], align 4
|
||||
; CHECK-NEXT: [[DOT0:%.*]] = getelementptr [[STRUCT_PAIR]], %struct.pair* [[P]], i32 0, i32 0
|
||||
; CHECK-NEXT: [[DOT0:%.*]] = getelementptr [[STRUCT_PAIR]], [[STRUCT_PAIR]]* [[P]], i32 0, i32 0
|
||||
; CHECK-NEXT: store i32 [[P_0]], i32* [[DOT0]], align 4
|
||||
; CHECK-NEXT: [[DOT1:%.*]] = getelementptr [[STRUCT_PAIR]], %struct.pair* [[P]], i32 0, i32 1
|
||||
; CHECK-NEXT: [[DOT1:%.*]] = getelementptr [[STRUCT_PAIR]], [[STRUCT_PAIR]]* [[P]], i32 0, i32 1
|
||||
; CHECK-NEXT: store i32 [[P_1]], i32* [[DOT1]], align 4
|
||||
; CHECK-NEXT: [[SINK:%.*]] = alloca i32*, align 8
|
||||
; CHECK-NEXT: [[DOT2:%.*]] = getelementptr [[STRUCT_PAIR]], [[STRUCT_PAIR]]* [[P]], i32 0, i32 0
|
||||
; CHECK-NEXT: store i32* [[DOT2]], i32** [[SINK]], align 8
|
||||
; CHECK-NEXT: ret void
|
||||
;
|
||||
%1 = alloca i32*, align 8
|
||||
%2 = getelementptr %struct.pair, %struct.pair* %P, i32 0, i32 0
|
||||
store i32* %2, i32** %1, align 8 ; to protect from "usual" promotion
|
||||
ret void
|
||||
}
|
||||
|
||||
|
|
|
@ -53,13 +53,21 @@ define internal x86_fp80 @UseLongDoubleSafely(%union.u* byval(%union.u) align 16
|
|||
; CHECK-LABEL: define {{[^@]+}}@UseLongDoubleSafely
|
||||
; CHECK-SAME: (x86_fp80 [[ARG_0:%.*]]) {
|
||||
; CHECK-NEXT: [[ARG:%.*]] = alloca [[UNION_U:%.*]], align 16
|
||||
; CHECK-NEXT: [[DOT0:%.*]] = getelementptr [[UNION_U]], %union.u* [[ARG]], i32 0, i32 0
|
||||
; CHECK-NEXT: [[DOT0:%.*]] = getelementptr [[UNION_U]], [[UNION_U]]* [[ARG]], i32 0, i32 0
|
||||
; CHECK-NEXT: store x86_fp80 [[ARG_0]], x86_fp80* [[DOT0]], align 16
|
||||
; CHECK-NEXT: [[GEP:%.*]] = getelementptr inbounds [[UNION_U]], %union.u* [[ARG]], i64 0, i32 0
|
||||
; CHECK-NEXT: [[GEP:%.*]] = getelementptr inbounds [[UNION_U]], [[UNION_U]]* [[ARG]], i64 0, i32 0
|
||||
; CHECK-NEXT: [[IDX_P:%.*]] = alloca i64, align 8
|
||||
; CHECK-NEXT: store i64 0, i64* [[IDX_P]], align 8
|
||||
; CHECK-NEXT: [[IDX:%.*]] = load i64, i64* [[IDX_P]], align 8
|
||||
; CHECK-NEXT: [[GEP_IDX:%.*]] = getelementptr inbounds [[UNION_U]], [[UNION_U]]* [[ARG]], i64 [[IDX]], i32 0
|
||||
; CHECK-NEXT: [[FP80:%.*]] = load x86_fp80, x86_fp80* [[GEP]], align 16
|
||||
; CHECK-NEXT: ret x86_fp80 [[FP80]]
|
||||
;
|
||||
%gep = getelementptr inbounds %union.u, %union.u* %arg, i64 0, i32 0
|
||||
%idx_slot = alloca i64, align 8
|
||||
store i64 0, i64* %idx_slot, align 8
|
||||
%idx = load i64, i64* %idx_slot, align 8
|
||||
%gep_idx = getelementptr inbounds %union.u, %union.u* %arg, i64 %idx, i32 0 ; to protect from "usual" promotion
|
||||
%fp80 = load x86_fp80, x86_fp80* %gep
|
||||
ret x86_fp80 %fp80
|
||||
}
|
||||
|
|
Loading…
Reference in New Issue