diff --git a/clang/test/CodeGen/debug-info-block-vars.c b/clang/test/CodeGen/debug-info-block-vars.c index dc522a807951..11c899fa3c81 100644 --- a/clang/test/CodeGen/debug-info-block-vars.c +++ b/clang/test/CodeGen/debug-info-block-vars.c @@ -11,7 +11,10 @@ // CHECK: call void @llvm.dbg.declare(metadata i8** %.block_descriptor.addr, // CHECK-SAME: metadata !DIExpression()) // CHECK-OPT-NOT: alloca -// CHECK-OPT: call void @llvm.dbg.value(metadata i8* %.block_descriptor, +// Since the block address is not used anywhere in this function, +// the optimizer (DeadArgElim) has replaced all the false uses +// (i.e., metadata users) with undef. +// CHECK-OPT: call void @llvm.dbg.value(metadata i8* undef, // CHECK-OPT-SAME: metadata !DIExpression()) void f(void) { a(^{ diff --git a/clang/test/CodeGenObjCXX/nrvo.mm b/clang/test/CodeGenObjCXX/nrvo.mm index 89d9ae9639cc..0e4b98996965 100644 --- a/clang/test/CodeGenObjCXX/nrvo.mm +++ b/clang/test/CodeGenObjCXX/nrvo.mm @@ -22,7 +22,11 @@ struct X { X blocksNRVO() { return ^{ - // CHECK-LABEL: define internal void @___Z10blocksNRVOv_block_invoke + // With the optimizer enabled, the DeadArgElim pass is able to + // mark the block litteral address argument as unused and later the + // related block_litteral global variable is removed. + // This allows to promote this call to a fastcc call. + // CHECK-LABEL: define internal fastcc void @___Z10blocksNRVOv_block_invoke X x; // CHECK: call void @_ZN1XC1Ev // CHECK-NEXT: ret void diff --git a/llvm/lib/Transforms/IPO/DeadArgumentElimination.cpp b/llvm/lib/Transforms/IPO/DeadArgumentElimination.cpp index 95f6a4f4fb57..a879a0fb30b3 100644 --- a/llvm/lib/Transforms/IPO/DeadArgumentElimination.cpp +++ b/llvm/lib/Transforms/IPO/DeadArgumentElimination.cpp @@ -268,9 +268,12 @@ bool DeadArgumentEliminationPass::RemoveDeadArgumentsFromCallers(Function &Fn) { if (!Fn.hasExactDefinition()) return false; - // Functions with local linkage should already have been handled, except the - // fragile (variadic) ones which we can improve here. - if (Fn.hasLocalLinkage() && !Fn.getFunctionType()->isVarArg()) + // Functions with local linkage should already have been handled, except if + // they are fully alive (e.g., called indirectly) and except for the fragile + // (variadic) ones. In these cases, we may still be able to improve their + // statically known call sites. + if ((Fn.hasLocalLinkage() && !LiveFunctions.count(&Fn)) && + !Fn.getFunctionType()->isVarArg()) return false; // Don't touch naked functions. The assembly might be using an argument, or diff --git a/llvm/test/Transforms/DeadArgElim/fct_ptr.ll b/llvm/test/Transforms/DeadArgElim/fct_ptr.ll new file mode 100644 index 000000000000..2e352666c1f6 --- /dev/null +++ b/llvm/test/Transforms/DeadArgElim/fct_ptr.ll @@ -0,0 +1,67 @@ +; RUN: opt -S %s -deadargelim -o - | FileCheck %s +; In that test @internal_fct is used by an instruction +; we don't know how to rewrite (the comparison that produces +; %cmp1). +; Because of that use, we used to bail out on removing the +; unused arguments for this function. +; Yet, we should still be able to rewrite the direct calls that are +; statically known, by replacing the related arguments with undef. +; This is what we check on the call that produces %res2. + +define i32 @call_indirect(i32 (i32, i32, i32)* readnone %fct_ptr, i32 %arg1, i32 %arg2, i32 %arg3) { +; CHECK-LABEL: @call_indirect( +; CHECK-NEXT: [[CMP0:%.*]] = icmp eq i32 (i32, i32, i32)* [[FCT_PTR:%.*]], @external_fct +; CHECK-NEXT: br i1 [[CMP0]], label [[CALL_EXT:%.*]], label [[CHK2:%.*]] +; CHECK: call_ext: +; CHECK-NEXT: [[RES1:%.*]] = tail call i32 @external_fct(i32 undef, i32 [[ARG2:%.*]], i32 undef) +; CHECK-NEXT: br label [[END:%.*]] +; CHECK: chk2: +; CHECK-NEXT: [[CMP1:%.*]] = icmp eq i32 (i32, i32, i32)* [[FCT_PTR]], @internal_fct +; CHECK-NEXT: br i1 [[CMP1]], label [[CALL_INT:%.*]], label [[CALL_OTHER:%.*]] +; CHECK: call_int: +; CHECK-NEXT: [[RES2:%.*]] = tail call i32 @internal_fct(i32 undef, i32 [[ARG2]], i32 undef) +; CHECK-NEXT: br label [[END]] +; CHECK: call_other: +; CHECK-NEXT: [[RES3:%.*]] = tail call i32 @other_fct(i32 [[ARG2]]) +; CHECK-NEXT: br label [[END]] +; CHECK: end: +; CHECK-NEXT: [[FINAL_RES:%.*]] = phi i32 [ [[RES1]], [[CALL_EXT]] ], [ [[RES2]], [[CALL_INT]] ], [ [[RES3]], [[CALL_OTHER]] ] +; CHECK-NEXT: ret i32 [[FINAL_RES]] +; + %cmp0 = icmp eq i32 (i32, i32, i32)* %fct_ptr, @external_fct + br i1 %cmp0, label %call_ext, label %chk2 + +call_ext: + %res1 = tail call i32 @external_fct(i32 %arg1, i32 %arg2, i32 %arg3) + br label %end + +chk2: + %cmp1 = icmp eq i32 (i32, i32, i32)* %fct_ptr, @internal_fct + br i1 %cmp1, label %call_int, label %call_other + +call_int: + %res2 = tail call i32 @internal_fct(i32 %arg1, i32 %arg2, i32 %arg3) + br label %end + +call_other: + %res3 = tail call i32 @other_fct(i32 %arg1, i32 %arg2, i32 %arg3) + br label %end + +end: + %final_res = phi i32 [%res1, %call_ext], [%res2, %call_int], [%res3, %call_other] + ret i32 %final_res +} + + +define i32 @external_fct(i32 %unused_arg1, i32 %arg2, i32 %unused_arg3) { + ret i32 %arg2 +} + +define internal i32 @internal_fct(i32 %unused_arg1, i32 %arg2, i32 %unused_arg3) { + ret i32 %arg2 +} + +define internal i32 @other_fct(i32 %unused_arg1, i32 %arg2, i32 %unused_arg3) { + ret i32 %arg2 +} +