forked from OSchip/llvm-project
[ArgPromotion] Change the condition to check the promotion limit
The condition should be 'ArgParts.size() > MaxElements', so that if we have exactly 3 elements in the 'ArgParts' vector, the promotion should be allowed because the 'MaxElement' threshold is not exceeded yet. The default value for 'MaxElement' has been decreased to 2 in order to avoid an actual change in argument promoting behavior. However, this changes byval argument transformation behavior by allowing adding not more than 2 arguments to the function instead of 3 allowed before. Reviewed By: aeubanks Differential Revision: https://reviews.llvm.org/D124178
This commit is contained in:
parent
3a39bb96ca
commit
6b825e50f7
|
@ -25,7 +25,7 @@ class ArgumentPromotionPass : public PassInfoMixin<ArgumentPromotionPass> {
|
|||
unsigned MaxElements;
|
||||
|
||||
public:
|
||||
ArgumentPromotionPass(unsigned MaxElements = 3u) : MaxElements(MaxElements) {}
|
||||
ArgumentPromotionPass(unsigned MaxElements = 2u) : MaxElements(MaxElements) {}
|
||||
|
||||
/// Checks if a type could have padding bytes.
|
||||
static bool isDenselyPacked(Type *Ty, const DataLayout &DL);
|
||||
|
|
|
@ -509,7 +509,7 @@ static bool findArgParts(Argument *Arg, const DataLayout &DL, AAResults &AAR,
|
|||
|
||||
// We limit promotion to only promoting up to a fixed number of elements of
|
||||
// the aggregate.
|
||||
if (MaxElements > 0 && ArgParts.size() >= MaxElements) {
|
||||
if (MaxElements > 0 && ArgParts.size() > MaxElements) {
|
||||
LLVM_DEBUG(dbgs() << "ArgPromotion of " << *Arg << " failed: "
|
||||
<< "more than " << MaxElements << " parts\n");
|
||||
return false;
|
||||
|
|
|
@ -0,0 +1,90 @@
|
|||
; RUN: opt -passes=argpromotion -S %s | FileCheck %s
|
||||
|
||||
define internal i32 @callee2(i32* noundef %0) {
|
||||
; CHECK-LABEL: define {{[^@]+}}@callee2
|
||||
; CHECK-SAME: (i32 [[P_0:%.*]], i32 [[P_1:%.*]]) {
|
||||
; CHECK-NEXT: [[SUM:%.*]] = add nsw i32 [[P_0]], [[P_1]]
|
||||
; CHECK-NEXT: ret i32 [[SUM]]
|
||||
;
|
||||
%2 = getelementptr inbounds i32, i32* %0, i64 0
|
||||
%3 = load i32, i32* %2, align 4
|
||||
%4 = getelementptr inbounds i32, i32* %0, i64 1
|
||||
%5 = load i32, i32* %4, align 4
|
||||
%6 = add nsw i32 %3, %5
|
||||
ret i32 %6
|
||||
}
|
||||
|
||||
define i32 @caller2(i32 %0, i32 %1) {
|
||||
; CHECK-LABEL: define {{[^@]+}}@caller2
|
||||
; CHECK-SAME: (i32 [[P_0:%.*]], i32 [[P_1:%.*]]) {
|
||||
; CHECK-NEXT: [[TMP1:%.*]] = alloca [2 x i32], align 4
|
||||
; CHECK-NEXT: [[PL_0:%.*]] = getelementptr inbounds [2 x i32], [2 x i32]* [[TMP1]], i64 0, i64 0
|
||||
; CHECK-NEXT: store i32 [[P_0]], i32* [[PL_0]], align 4
|
||||
; CHECK-NEXT: [[PL_1:%.*]] = getelementptr inbounds i32, i32* [[PL_0]], i64 1
|
||||
; CHECK-NEXT: store i32 [[P_1]], i32* [[PL_1]], align 4
|
||||
; CHECK-NEXT: [[PL_2:%.*]] = getelementptr inbounds [2 x i32], [2 x i32]* [[TMP1]], i64 0, i64 0
|
||||
; CHECK-NEXT: [[VAL_0:%.*]] = load i32, i32* [[PL_2]], align 4
|
||||
; CHECK-NEXT: [[PL_3:%.*]] = getelementptr i32, i32* [[PL_2]], i64 1
|
||||
; CHECK-NEXT: [[VAL_1:%.*]] = load i32, i32* [[PL_3]], align 4
|
||||
; CHECK-NEXT: [[RES:%.*]] = call i32 @callee2(i32 [[VAL_0]], i32 [[VAL_1]])
|
||||
; CHECK-NEXT: ret i32 [[RES]]
|
||||
;
|
||||
%3 = alloca [2 x i32], align 4
|
||||
%4 = getelementptr inbounds [2 x i32], [2 x i32]* %3, i64 0, i64 0
|
||||
store i32 %0, i32* %4, align 4
|
||||
%5 = getelementptr inbounds i32, i32* %4, i64 1
|
||||
store i32 %1, i32* %5, align 4
|
||||
%6 = getelementptr inbounds [2 x i32], [2 x i32]* %3, i64 0, i64 0
|
||||
%7 = call i32 @callee2(i32* noundef %6)
|
||||
ret i32 %7
|
||||
}
|
||||
|
||||
define internal i32 @callee3(i32* noundef %0) {
|
||||
; CHECK-LABEL: define {{[^@]+}}@callee3
|
||||
; CHECK-SAME: (i32* noundef [[P_0:%.*]]) {
|
||||
; CHECK-NEXT: [[PL_0:%.*]] = getelementptr inbounds i32, i32* [[P_0]], i64 0
|
||||
; CHECK-NEXT: [[VAL_0:%.*]] = load i32, i32* [[PL_0]], align 4
|
||||
; CHECK-NEXT: [[PL_1:%.*]] = getelementptr inbounds i32, i32* [[P_0]], i64 1
|
||||
; CHECK-NEXT: [[VAL_1:%.*]] = load i32, i32* [[PL_1]], align 4
|
||||
; CHECK-NEXT: [[SUM_0:%.*]] = add nsw i32 [[VAL_0]], [[VAL_1]]
|
||||
; CHECK-NEXT: [[PL_2:%.*]] = getelementptr inbounds i32, i32* [[P_0]], i64 2
|
||||
; CHECK-NEXT: [[VAL_2:%.*]] = load i32, i32* [[PL_2]], align 4
|
||||
; CHECK-NEXT: [[RES:%.*]] = add nsw i32 [[SUM_0]], [[VAL_2]]
|
||||
; CHECK-NEXT: ret i32 [[RES]]
|
||||
;
|
||||
%2 = getelementptr inbounds i32, i32* %0, i64 0
|
||||
%3 = load i32, i32* %2, align 4
|
||||
%4 = getelementptr inbounds i32, i32* %0, i64 1
|
||||
%5 = load i32, i32* %4, align 4
|
||||
%6 = add nsw i32 %3, %5
|
||||
%7 = getelementptr inbounds i32, i32* %0, i64 2
|
||||
%8 = load i32, i32* %7, align 4
|
||||
%9 = add nsw i32 %6, %8
|
||||
ret i32 %9
|
||||
}
|
||||
|
||||
define i32 @caller3(i32 %0, i32 %1, i32 %2) {
|
||||
; CHECK-LABEL: define {{[^@]+}}@caller3
|
||||
; CHECK-SAME: (i32 [[P_0:%.*]], i32 [[P_1:%.*]], i32 [[P_2:%.*]]) {
|
||||
; CHECK-NEXT: [[TMP1:%.*]] = alloca [3 x i32], align 4
|
||||
; CHECK-NEXT: [[PL_0:%.*]] = getelementptr inbounds [3 x i32], [3 x i32]* [[TMP1]], i64 0, i64 0
|
||||
; CHECK-NEXT: store i32 [[P_0]], i32* [[PL_0]], align 4
|
||||
; CHECK-NEXT: [[PL_1:%.*]] = getelementptr inbounds i32, i32* [[PL_0]], i64 1
|
||||
; CHECK-NEXT: store i32 [[P_1]], i32* [[PL_1]], align 4
|
||||
; CHECK-NEXT: [[PL_2:%.*]] = getelementptr inbounds i32, i32* [[PL_1]], i64 1
|
||||
; CHECK-NEXT: store i32 [[P_2]], i32* [[PL_2]], align 4
|
||||
; CHECK-NEXT: [[PL_3:%.*]] = getelementptr inbounds [3 x i32], [3 x i32]* [[TMP1]], i64 0, i64 0
|
||||
; CHECK-NEXT: [[RES:%.*]] = call i32 @callee3(i32* noundef [[PL_3]])
|
||||
; CHECK-NEXT: ret i32 [[RES]]
|
||||
;
|
||||
%4 = alloca [3 x i32], align 4
|
||||
%5 = getelementptr inbounds [3 x i32], [3 x i32]* %4, i64 0, i64 0
|
||||
store i32 %0, i32* %5, align 4
|
||||
%6 = getelementptr inbounds i32, i32* %5, i64 1
|
||||
store i32 %1, i32* %6, align 4
|
||||
%7 = getelementptr inbounds i32, i32* %6, i64 1
|
||||
store i32 %2, i32* %7, align 4
|
||||
%8 = getelementptr inbounds [3 x i32], [3 x i32]* %4, i64 0, i64 0
|
||||
%9 = call i32 @callee3(i32* noundef %8)
|
||||
ret i32 %9
|
||||
}
|
Loading…
Reference in New Issue