[ArgPromotion] Make a non-byval promotion attempt first

It makes sense to make a non-byval promotion attempt first and then
fall back to the byval one. The non-byval ('usual') promotion is
generally better, for example it does promotion even when a structure
has more elements than 'MaxElements' but not all of them are actually
used in the function.

Differential Revision: https://reviews.llvm.org/D124514
This commit is contained in:
Pavel Samolysov 2022-05-12 16:39:26 +02:00 committed by Nikita Popov
parent ee51e9795a
commit 098afdb0a0
4 changed files with 104 additions and 42 deletions

View File

@ -730,7 +730,7 @@ static bool canPaddingBeAccessed(Argument *Arg) {
Value *V = WorkList.pop_back_val();
if (isa<GetElementPtrInst>(V) || isa<PHINode>(V)) {
if (PtrValues.insert(V).second)
llvm::append_range(WorkList, V->users());
append_range(WorkList, V->users());
} else if (StoreInst *Store = dyn_cast<StoreInst>(V)) {
Stores.push_back(Store);
} else if (!isa<LoadInst>(V)) {
@ -848,9 +848,23 @@ promoteArguments(Function *F, function_ref<AAResults &(Function &F)> AARGetter,
}
}
// If this is a byval argument, and if the aggregate type is small, just
// pass the elements, which is always safe, if the passed value is densely
// packed or if we can prove the padding bytes are never accessed.
// If we can promote the pointer to its value.
SmallVector<OffsetAndArgPart, 4> ArgParts;
if (findArgParts(PtrArg, DL, AAR, MaxElements, IsRecursive, ArgParts)) {
SmallVector<Type *, 4> Types;
for (const auto &Pair : ArgParts)
Types.push_back(Pair.second.Ty);
if (areTypesABICompatible(Types, *F, TTI)) {
ArgsToPromote.insert({PtrArg, std::move(ArgParts)});
continue;
}
}
// Otherwise, if this is a byval argument, and if the aggregate type is
// small, just pass the elements, which is always safe, if the passed value
// is densely packed or if we can prove the padding bytes are never
// accessed.
//
// Only handle arguments with specified alignment; if it's unspecified, the
// actual alignment of the argument is target-specific.
@ -859,43 +873,32 @@ promoteArguments(Function *F, function_ref<AAResults &(Function &F)> AARGetter,
ByValTy && PtrArg->getParamAlign() &&
(ArgumentPromotionPass::isDenselyPacked(ByValTy, DL) ||
!canPaddingBeAccessed(PtrArg));
if (IsSafeToPromote) {
if (StructType *STy = dyn_cast<StructType>(ByValTy)) {
if (MaxElements > 0 && STy->getNumElements() > MaxElements) {
LLVM_DEBUG(dbgs() << "ArgPromotion disables promoting argument '"
<< PtrArg->getName()
<< "' because it would require adding more"
<< " than " << MaxElements
<< " arguments to the function.\n");
continue;
}
SmallVector<Type *, 4> Types;
append_range(Types, STy->elements());
// If all the elements are single-value types, we can promote it.
bool AllSimple =
all_of(Types, [](Type *Ty) { return Ty->isSingleValueType(); });
// Safe to transform, don't even bother trying to "promote" it.
// Passing the elements as a scalar will allow sroa to hack on
// the new alloca we introduce.
if (AllSimple && areTypesABICompatible(Types, *F, TTI)) {
ByValArgsToTransform.insert(PtrArg);
continue;
}
}
if (!IsSafeToPromote) {
LLVM_DEBUG(dbgs() << "ArgPromotion disables passing the elements of"
<< " the argument '" << PtrArg->getName()
<< "' because it is not safe.\n");
continue;
}
// Otherwise, see if we can promote the pointer to its value.
SmallVector<OffsetAndArgPart, 4> ArgParts;
if (findArgParts(PtrArg, DL, AAR, MaxElements, IsRecursive, ArgParts)) {
if (StructType *STy = dyn_cast<StructType>(ByValTy)) {
if (MaxElements > 0 && STy->getNumElements() > MaxElements) {
LLVM_DEBUG(dbgs() << "ArgPromotion disables passing the elements of"
<< " the argument '" << PtrArg->getName()
<< "' because it would require adding more"
<< " than " << MaxElements
<< " arguments to the function.\n");
continue;
}
SmallVector<Type *, 4> Types;
for (const auto &Pair : ArgParts)
Types.push_back(Pair.second.Ty);
append_range(Types, STy->elements());
if (areTypesABICompatible(Types, *F, TTI))
ArgsToPromote.insert({PtrArg, std::move(ArgParts)});
// If all the elements are single-value types, we can promote it.
bool AllSimple =
all_of(Types, [](Type *Ty) { return Ty->isSingleValueType(); });
// Safe to transform. Passing the elements as a scalar will allow sroa to
// hack on the new alloca we introduce.
if (AllSimple && areTypesABICompatible(Types, *F, TTI))
ByValArgsToTransform.insert(PtrArg);
}
}

View File

@ -0,0 +1,45 @@
; RUN: opt -passes=argpromotion -S %s | FileCheck %s
%struct.A = type { float, [12 x i8], i64, [8 x i8] }
define internal float @callee(%struct.A* byval(%struct.A) align 32 %0) {
; CHECK-LABEL: define {{[^@]+}}@callee
; CHECK-SAME: (float [[ARG_0:%.*]], i64 [[ARG_1:%.*]]) {
; CHECK-NEXT: [[SUM:%.*]] = fadd float 0.000000e+00, [[ARG_0]]
; CHECK-NEXT: [[COEFF:%.*]] = uitofp i64 [[ARG_1]] to float
; CHECK-NEXT: [[RES:%.*]] = fmul float [[SUM]], [[COEFF]]
; CHECK-NEXT: ret float [[RES]]
;
%2 = getelementptr inbounds %struct.A, %struct.A* %0, i32 0, i32 0
%3 = load float, float* %2, align 32
%4 = fadd float 0.000000e+00, %3
%5 = getelementptr inbounds %struct.A, %struct.A* %0, i32 0, i32 2
%6 = load i64, i64* %5, align 16
%7 = uitofp i64 %6 to float
%8 = fmul float %4, %7
ret float %8
}
define float @caller(float %0) {
; CHECK-LABEL: define {{[^@]+}}@caller
; CHECK-SAME: (float [[ARG_0:%.*]]) {
; CHECK-NEXT: [[TMP_0:%.*]] = alloca %struct.A, align 32
; CHECK-NEXT: [[FL_PTR_0:%.*]] = getelementptr inbounds %struct.A, %struct.A* [[TMP_0]], i32 0, i32 0
; CHECK-NEXT: store float [[ARG_0]], float* [[FL_PTR_0]], align 32
; CHECK-NEXT: [[I64_PTR_0:%.*]] = getelementptr inbounds %struct.A, %struct.A* [[TMP_0]], i32 0, i32 2
; CHECK-NEXT: store i64 2, i64* [[I64_PTR_0]], align 16
; CHECK-NEXT: [[FL_PTR_1:%.*]] = getelementptr %struct.A, %struct.A* [[TMP_0]], i64 0, i32 0
; CHECK-NEXT: [[FL_VAL:%.*]] = load float, float* [[FL_PTR_1]], align 32
; CHECK-NEXT: [[I64_PTR_1:%.*]] = getelementptr %struct.A, %struct.A* [[TMP_0]], i64 0, i32 2
; CHECK-NEXT: [[I64_VAL:%.*]] = load i64, i64* [[I64_PTR_1]], align 16
; CHECK-NEXT: [[RES:%.*]] = call noundef float @callee(float [[FL_VAL]], i64 [[I64_VAL]])
; CHECK-NEXT: ret float [[RES]]
;
%2 = alloca %struct.A, align 32
%3 = getelementptr inbounds %struct.A, %struct.A* %2, i32 0, i32 0
store float %0, float* %3, align 32
%4 = getelementptr inbounds %struct.A, %struct.A* %2, i32 0, i32 2
store i64 2, i64* %4, align 16
%5 = call noundef float @callee(%struct.A* byval(%struct.A) align 32 %2)
ret float %5
}

View File

@ -21,12 +21,18 @@ define internal void @test_byval(%struct.pair* byval(%struct.pair) align 4 %P) {
; CHECK-LABEL: define {{[^@]+}}@test_byval
; CHECK-SAME: (i32 [[P_0:%.*]], i32 [[P_1:%.*]]) {
; CHECK-NEXT: [[P:%.*]] = alloca [[STRUCT_PAIR:%.*]], align 4
; CHECK-NEXT: [[DOT0:%.*]] = getelementptr [[STRUCT_PAIR]], %struct.pair* [[P]], i32 0, i32 0
; CHECK-NEXT: [[DOT0:%.*]] = getelementptr [[STRUCT_PAIR]], [[STRUCT_PAIR]]* [[P]], i32 0, i32 0
; CHECK-NEXT: store i32 [[P_0]], i32* [[DOT0]], align 4
; CHECK-NEXT: [[DOT1:%.*]] = getelementptr [[STRUCT_PAIR]], %struct.pair* [[P]], i32 0, i32 1
; CHECK-NEXT: [[DOT1:%.*]] = getelementptr [[STRUCT_PAIR]], [[STRUCT_PAIR]]* [[P]], i32 0, i32 1
; CHECK-NEXT: store i32 [[P_1]], i32* [[DOT1]], align 4
; CHECK-NEXT: [[SINK:%.*]] = alloca i32*, align 8
; CHECK-NEXT: [[DOT2:%.*]] = getelementptr [[STRUCT_PAIR]], [[STRUCT_PAIR]]* [[P]], i32 0, i32 0
; CHECK-NEXT: store i32* [[DOT2]], i32** [[SINK]], align 8
; CHECK-NEXT: ret void
;
%1 = alloca i32*, align 8
%2 = getelementptr %struct.pair, %struct.pair* %P, i32 0, i32 0
store i32* %2, i32** %1, align 8 ; to protect from "usual" promotion
ret void
}

View File

@ -53,13 +53,21 @@ define internal x86_fp80 @UseLongDoubleSafely(%union.u* byval(%union.u) align 16
; CHECK-LABEL: define {{[^@]+}}@UseLongDoubleSafely
; CHECK-SAME: (x86_fp80 [[ARG_0:%.*]]) {
; CHECK-NEXT: [[ARG:%.*]] = alloca [[UNION_U:%.*]], align 16
; CHECK-NEXT: [[DOT0:%.*]] = getelementptr [[UNION_U]], %union.u* [[ARG]], i32 0, i32 0
; CHECK-NEXT: [[DOT0:%.*]] = getelementptr [[UNION_U]], [[UNION_U]]* [[ARG]], i32 0, i32 0
; CHECK-NEXT: store x86_fp80 [[ARG_0]], x86_fp80* [[DOT0]], align 16
; CHECK-NEXT: [[GEP:%.*]] = getelementptr inbounds [[UNION_U]], %union.u* [[ARG]], i64 0, i32 0
; CHECK-NEXT: [[GEP:%.*]] = getelementptr inbounds [[UNION_U]], [[UNION_U]]* [[ARG]], i64 0, i32 0
; CHECK-NEXT: [[IDX_P:%.*]] = alloca i64, align 8
; CHECK-NEXT: store i64 0, i64* [[IDX_P]], align 8
; CHECK-NEXT: [[IDX:%.*]] = load i64, i64* [[IDX_P]], align 8
; CHECK-NEXT: [[GEP_IDX:%.*]] = getelementptr inbounds [[UNION_U]], [[UNION_U]]* [[ARG]], i64 [[IDX]], i32 0
; CHECK-NEXT: [[FP80:%.*]] = load x86_fp80, x86_fp80* [[GEP]], align 16
; CHECK-NEXT: ret x86_fp80 [[FP80]]
;
%gep = getelementptr inbounds %union.u, %union.u* %arg, i64 0, i32 0
%idx_slot = alloca i64, align 8
store i64 0, i64* %idx_slot, align 8
%idx = load i64, i64* %idx_slot, align 8
%gep_idx = getelementptr inbounds %union.u, %union.u* %arg, i64 %idx, i32 0 ; to protect from "usual" promotion
%fp80 = load x86_fp80, x86_fp80* %gep
ret x86_fp80 %fp80
}