[ArgPromotion] Make a non-byval promotion attempt first

It makes sense to make a non-byval promotion attempt first and then fall back to the byval one. The non-byval ('usual') promotion is generally better, for example it does promotion even when a structure has more elements than 'MaxElements' but not all of them are actually used in the function. Differential Revision: https://reviews.llvm.org/D124514
2022-05-12 16:39:26 +02:00 · 2022-05-12 16:39:26 +02:00 · 098afdb0a0
parent ee51e9795a
commit 098afdb0a0
4 changed files with 104 additions and 42 deletions
--- a/llvm/lib/Transforms/IPO/ArgumentPromotion.cpp
+++ b/llvm/lib/Transforms/IPO/ArgumentPromotion.cpp
@ -730,7 +730,7 @@ static bool canPaddingBeAccessed(Argument *Arg) {
    Value *V = WorkList.pop_back_val();
    if (isa<GetElementPtrInst>(V) || isa<PHINode>(V)) {
      if (PtrValues.insert(V).second)
-        llvm::append_range(WorkList, V->users());
+        append_range(WorkList, V->users());
    } else if (StoreInst *Store = dyn_cast<StoreInst>(V)) {
      Stores.push_back(Store);
    } else if (!isa<LoadInst>(V)) {
@ -848,9 +848,23 @@ promoteArguments(Function *F, function_ref<AAResults &(Function &F)> AARGetter,
      }
    }

-    // If this is a byval argument, and if the aggregate type is small, just
-    // pass the elements, which is always safe, if the passed value is densely
-    // packed or if we can prove the padding bytes are never accessed.
+    // If we can promote the pointer to its value.
+    SmallVector<OffsetAndArgPart, 4> ArgParts;
+    if (findArgParts(PtrArg, DL, AAR, MaxElements, IsRecursive, ArgParts)) {
+      SmallVector<Type *, 4> Types;
+      for (const auto &Pair : ArgParts)
+        Types.push_back(Pair.second.Ty);
+
+      if (areTypesABICompatible(Types, *F, TTI)) {
+        ArgsToPromote.insert({PtrArg, std::move(ArgParts)});
+        continue;
+      }
+    }
+
+    // Otherwise, if this is a byval argument, and if the aggregate type is
+    // small, just pass the elements, which is always safe, if the passed value
+    // is densely packed or if we can prove the padding bytes are never
+    // accessed.
    //
    // Only handle arguments with specified alignment; if it's unspecified, the
    // actual alignment of the argument is target-specific.
@ -859,43 +873,32 @@ promoteArguments(Function *F, function_ref<AAResults &(Function &F)> AARGetter,
        ByValTy && PtrArg->getParamAlign() &&
        (ArgumentPromotionPass::isDenselyPacked(ByValTy, DL) ||
         !canPaddingBeAccessed(PtrArg));
-    if (IsSafeToPromote) {
-      if (StructType *STy = dyn_cast<StructType>(ByValTy)) {
-        if (MaxElements > 0 && STy->getNumElements() > MaxElements) {
-          LLVM_DEBUG(dbgs() << "ArgPromotion disables promoting argument '"
-                            << PtrArg->getName()
-                            << "' because it would require adding more"
-                            << " than " << MaxElements
-                            << " arguments to the function.\n");
-          continue;
-        }
-
-        SmallVector<Type *, 4> Types;
-        append_range(Types, STy->elements());
-
-        // If all the elements are single-value types, we can promote it.
-        bool AllSimple =
-            all_of(Types, [](Type *Ty) { return Ty->isSingleValueType(); });
-
-        // Safe to transform, don't even bother trying to "promote" it.
-        // Passing the elements as a scalar will allow sroa to hack on
-        // the new alloca we introduce.
-        if (AllSimple && areTypesABICompatible(Types, *F, TTI)) {
-          ByValArgsToTransform.insert(PtrArg);
-          continue;
-        }
-      }
+    if (!IsSafeToPromote) {
+      LLVM_DEBUG(dbgs() << "ArgPromotion disables passing the elements of"
+                        << " the argument '" << PtrArg->getName()
+                        << "' because it is not safe.\n");
+      continue;
    }
-
-    // Otherwise, see if we can promote the pointer to its value.
-    SmallVector<OffsetAndArgPart, 4> ArgParts;
-    if (findArgParts(PtrArg, DL, AAR, MaxElements, IsRecursive, ArgParts)) {
+    if (StructType *STy = dyn_cast<StructType>(ByValTy)) {
+      if (MaxElements > 0 && STy->getNumElements() > MaxElements) {
+        LLVM_DEBUG(dbgs() << "ArgPromotion disables passing the elements of"
+                          << " the argument '" << PtrArg->getName()
+                          << "' because it would require adding more"
+                          << " than " << MaxElements
+                          << " arguments to the function.\n");
+        continue;
+      }
      SmallVector<Type *, 4> Types;
-      for (const auto &Pair : ArgParts)
-        Types.push_back(Pair.second.Ty);
+      append_range(Types, STy->elements());

-      if (areTypesABICompatible(Types, *F, TTI))
-        ArgsToPromote.insert({PtrArg, std::move(ArgParts)});
+      // If all the elements are single-value types, we can promote it.
+      bool AllSimple =
+          all_of(Types, [](Type *Ty) { return Ty->isSingleValueType(); });
+
+      // Safe to transform. Passing the elements as a scalar will allow sroa to
+      // hack on the new alloca we introduce.
+      if (AllSimple && areTypesABICompatible(Types, *F, TTI))
+        ByValArgsToTransform.insert(PtrArg);
    }
  }

--- a/llvm/test/Transforms/ArgumentPromotion/byval-through-pointer-promotion.ll
+++ b/llvm/test/Transforms/ArgumentPromotion/byval-through-pointer-promotion.ll
@ -0,0 +1,45 @@
+; RUN: opt -passes=argpromotion -S %s | FileCheck %s
+
+%struct.A = type { float, [12 x i8], i64, [8 x i8] }
+
+define internal float @callee(%struct.A* byval(%struct.A) align 32 %0) {
+; CHECK-LABEL: define {{[^@]+}}@callee
+; CHECK-SAME: (float [[ARG_0:%.*]], i64 [[ARG_1:%.*]]) {
+; CHECK-NEXT:    [[SUM:%.*]] = fadd float 0.000000e+00, [[ARG_0]]
+; CHECK-NEXT:    [[COEFF:%.*]] = uitofp i64 [[ARG_1]] to float
+; CHECK-NEXT:    [[RES:%.*]] = fmul float [[SUM]], [[COEFF]]
+; CHECK-NEXT:    ret float [[RES]]
+;
+  %2 = getelementptr inbounds %struct.A, %struct.A* %0, i32 0, i32 0
+  %3 = load float, float* %2, align 32
+  %4 = fadd float 0.000000e+00, %3
+  %5 = getelementptr inbounds %struct.A, %struct.A* %0, i32 0, i32 2
+  %6 = load i64, i64* %5, align 16
+  %7 = uitofp i64 %6 to float
+  %8 = fmul float %4, %7
+  ret float %8
+}
+
+define float @caller(float %0) {
+; CHECK-LABEL: define {{[^@]+}}@caller
+; CHECK-SAME: (float [[ARG_0:%.*]]) {
+; CHECK-NEXT:    [[TMP_0:%.*]] = alloca %struct.A, align 32
+; CHECK-NEXT:    [[FL_PTR_0:%.*]] = getelementptr inbounds %struct.A, %struct.A* [[TMP_0]], i32 0, i32 0
+; CHECK-NEXT:    store float [[ARG_0]], float* [[FL_PTR_0]], align 32
+; CHECK-NEXT:    [[I64_PTR_0:%.*]] = getelementptr inbounds %struct.A, %struct.A* [[TMP_0]], i32 0, i32 2
+; CHECK-NEXT:    store i64 2, i64* [[I64_PTR_0]], align 16
+; CHECK-NEXT:    [[FL_PTR_1:%.*]] = getelementptr %struct.A, %struct.A* [[TMP_0]], i64 0, i32 0
+; CHECK-NEXT:    [[FL_VAL:%.*]] = load float, float* [[FL_PTR_1]], align 32
+; CHECK-NEXT:    [[I64_PTR_1:%.*]] = getelementptr %struct.A, %struct.A* [[TMP_0]], i64 0, i32 2
+; CHECK-NEXT:    [[I64_VAL:%.*]] = load i64, i64* [[I64_PTR_1]], align 16
+; CHECK-NEXT:    [[RES:%.*]] = call noundef float @callee(float [[FL_VAL]], i64 [[I64_VAL]])
+; CHECK-NEXT:    ret float [[RES]]
+;
+  %2 = alloca %struct.A, align 32
+  %3 = getelementptr inbounds %struct.A, %struct.A* %2, i32 0, i32 0
+  store float %0, float* %3, align 32
+  %4 = getelementptr inbounds %struct.A, %struct.A* %2, i32 0, i32 2
+  store i64 2, i64* %4, align 16
+  %5 = call noundef float @callee(%struct.A* byval(%struct.A) align 32 %2)
+  ret float %5
+}
--- a/llvm/test/Transforms/ArgumentPromotion/dbg.ll
+++ b/llvm/test/Transforms/ArgumentPromotion/dbg.ll
@ -21,12 +21,18 @@ define internal void @test_byval(%struct.pair* byval(%struct.pair) align 4 %P) {
 ; CHECK-LABEL: define {{[^@]+}}@test_byval
 ; CHECK-SAME: (i32 [[P_0:%.*]], i32 [[P_1:%.*]]) {
 ; CHECK-NEXT:    [[P:%.*]] = alloca [[STRUCT_PAIR:%.*]], align 4
-; CHECK-NEXT:    [[DOT0:%.*]] = getelementptr [[STRUCT_PAIR]], %struct.pair* [[P]], i32 0, i32 0
+; CHECK-NEXT:    [[DOT0:%.*]] = getelementptr [[STRUCT_PAIR]], [[STRUCT_PAIR]]* [[P]], i32 0, i32 0
 ; CHECK-NEXT:    store i32 [[P_0]], i32* [[DOT0]], align 4
-; CHECK-NEXT:    [[DOT1:%.*]] = getelementptr [[STRUCT_PAIR]], %struct.pair* [[P]], i32 0, i32 1
+; CHECK-NEXT:    [[DOT1:%.*]] = getelementptr [[STRUCT_PAIR]], [[STRUCT_PAIR]]* [[P]], i32 0, i32 1
 ; CHECK-NEXT:    store i32 [[P_1]], i32* [[DOT1]], align 4
+; CHECK-NEXT:    [[SINK:%.*]] = alloca i32*, align 8
+; CHECK-NEXT:    [[DOT2:%.*]] = getelementptr [[STRUCT_PAIR]], [[STRUCT_PAIR]]* [[P]], i32 0, i32 0
+; CHECK-NEXT:    store i32* [[DOT2]], i32** [[SINK]], align 8
 ; CHECK-NEXT:    ret void
 ;
+  %1 = alloca i32*, align 8
+  %2 = getelementptr %struct.pair, %struct.pair* %P, i32 0, i32 0
+  store i32* %2, i32** %1, align 8 ; to protect from "usual" promotion
  ret void
 }

--- a/llvm/test/Transforms/ArgumentPromotion/fp80.ll
+++ b/llvm/test/Transforms/ArgumentPromotion/fp80.ll
@ -53,13 +53,21 @@ define internal x86_fp80 @UseLongDoubleSafely(%union.u* byval(%union.u) align 16
 ; CHECK-LABEL: define {{[^@]+}}@UseLongDoubleSafely
 ; CHECK-SAME: (x86_fp80 [[ARG_0:%.*]]) {
 ; CHECK-NEXT:    [[ARG:%.*]] = alloca [[UNION_U:%.*]], align 16
-; CHECK-NEXT:    [[DOT0:%.*]] = getelementptr [[UNION_U]], %union.u* [[ARG]], i32 0, i32 0
+; CHECK-NEXT:    [[DOT0:%.*]] = getelementptr [[UNION_U]], [[UNION_U]]* [[ARG]], i32 0, i32 0
 ; CHECK-NEXT:    store x86_fp80 [[ARG_0]], x86_fp80* [[DOT0]], align 16
-; CHECK-NEXT:    [[GEP:%.*]] = getelementptr inbounds [[UNION_U]], %union.u* [[ARG]], i64 0, i32 0
+; CHECK-NEXT:    [[GEP:%.*]] = getelementptr inbounds [[UNION_U]], [[UNION_U]]* [[ARG]], i64 0, i32 0
+; CHECK-NEXT:    [[IDX_P:%.*]] = alloca i64, align 8
+; CHECK-NEXT:    store i64 0, i64* [[IDX_P]], align 8
+; CHECK-NEXT:    [[IDX:%.*]] = load i64, i64* [[IDX_P]], align 8
+; CHECK-NEXT:    [[GEP_IDX:%.*]] = getelementptr inbounds [[UNION_U]], [[UNION_U]]* [[ARG]], i64 [[IDX]], i32 0
 ; CHECK-NEXT:    [[FP80:%.*]] = load x86_fp80, x86_fp80* [[GEP]], align 16
 ; CHECK-NEXT:    ret x86_fp80 [[FP80]]
 ;
  %gep = getelementptr inbounds %union.u, %union.u* %arg, i64 0, i32 0
+  %idx_slot = alloca i64, align 8
+  store i64 0, i64* %idx_slot, align 8
+  %idx = load i64, i64* %idx_slot, align 8
+  %gep_idx = getelementptr inbounds %union.u, %union.u* %arg, i64 %idx, i32 0 ; to protect from "usual" promotion
  %fp80 = load x86_fp80, x86_fp80* %gep
  ret x86_fp80 %fp80
 }