diff --git a/clang/lib/CodeGen/CGCall.cpp b/clang/lib/CodeGen/CGCall.cpp index 1de8f0e3f9d1..940cee3bbd27 100644 --- a/clang/lib/CodeGen/CGCall.cpp +++ b/clang/lib/CodeGen/CGCall.cpp @@ -1189,6 +1189,44 @@ static llvm::Value *emitAutoreleaseOfResult(CodeGenFunction &CGF, return CGF.EmitARCAutoreleaseReturnValue(result); } +/// Heuristically search for a dominating store to the return-value slot. +static llvm::StoreInst *findDominatingStoreToReturnValue(CodeGenFunction &CGF) { + // If there are multiple uses of the return-value slot, just check + // for something immediately preceding the IP. Sometimes this can + // happen with how we generate implicit-returns; it can also happen + // with noreturn cleanups. + if (!CGF.ReturnValue->hasOneUse()) { + llvm::BasicBlock *IP = CGF.Builder.GetInsertBlock(); + if (IP->empty()) return 0; + llvm::StoreInst *store = dyn_cast(&IP->back()); + if (!store) return 0; + if (store->getPointerOperand() != CGF.ReturnValue) return 0; + assert(!store->isAtomic() && !store->isVolatile()); // see below + return store; + } + + llvm::StoreInst *store = + dyn_cast(CGF.ReturnValue->use_back()); + if (!store) return 0; + + // These aren't actually possible for non-coerced returns, and we + // only care about non-coerced returns on this code path. + assert(!store->isAtomic() && !store->isVolatile()); + + // Now do a first-and-dirty dominance check: just walk up the + // single-predecessors chain from the current insertion point. + llvm::BasicBlock *StoreBB = store->getParent(); + llvm::BasicBlock *IP = CGF.Builder.GetInsertBlock(); + while (IP != StoreBB) { + if (!(IP = IP->getSinglePredecessor())) + return 0; + } + + // Okay, the store's basic block dominates the insertion point; we + // can do our thing. + return store; +} + void CodeGenFunction::EmitFunctionEpilog(const CGFunctionInfo &FI) { // Functions with no result always return void. if (ReturnValue == 0) { @@ -1223,16 +1261,9 @@ void CodeGenFunction::EmitFunctionEpilog(const CGFunctionInfo &FI) { // The internal return value temp always will have pointer-to-return-type // type, just do a load. - // If the instruction right before the insertion point is a store to the - // return value, we can elide the load, zap the store, and usually zap the - // alloca. - llvm::BasicBlock *InsertBB = Builder.GetInsertBlock(); - llvm::StoreInst *SI = 0; - if (InsertBB->empty() || - !(SI = dyn_cast(&InsertBB->back())) || - SI->getPointerOperand() != ReturnValue || SI->isVolatile()) { - RV = Builder.CreateLoad(ReturnValue); - } else { + // If there is a dominating store to ReturnValue, we can elide + // the load, zap the store, and usually zap the alloca. + if (llvm::StoreInst *SI = findDominatingStoreToReturnValue(*this)) { // Get the stored value and nuke the now-dead store. RetDbgLoc = SI->getDebugLoc(); RV = SI->getValueOperand(); @@ -1243,6 +1274,10 @@ void CodeGenFunction::EmitFunctionEpilog(const CGFunctionInfo &FI) { cast(ReturnValue)->eraseFromParent(); ReturnValue = 0; } + + // Otherwise, we have to do a simple load. + } else { + RV = Builder.CreateLoad(ReturnValue); } } else { llvm::Value *V = ReturnValue; diff --git a/clang/test/CodeGenCXX/arm.cpp b/clang/test/CodeGenCXX/arm.cpp index a767f425553b..0a4754b815a5 100644 --- a/clang/test/CodeGenCXX/arm.cpp +++ b/clang/test/CodeGenCXX/arm.cpp @@ -45,24 +45,18 @@ namespace test1 { } // CHECK: define linkonce_odr [[A]]* @_ZN5test11AC1Ei([[A]]* %this, i32 %i) unnamed_addr - // CHECK: [[RET:%.*]] = alloca [[A]]*, align 4 // CHECK: [[THIS:%.*]] = alloca [[A]]*, align 4 // CHECK: store [[A]]* {{.*}}, [[A]]** [[THIS]] // CHECK: [[THIS1:%.*]] = load [[A]]** [[THIS]] - // CHECK: store [[A]]* [[THIS1]], [[A]]** [[RET]] // CHECK: call [[A]]* @_ZN5test11AC2Ei( - // CHECK: [[THIS2:%.*]] = load [[A]]** [[RET]] - // CHECK: ret [[A]]* [[THIS2]] + // CHECK: ret [[A]]* [[THIS1]] // CHECK: define linkonce_odr [[A]]* @_ZN5test11AD1Ev([[A]]* %this) unnamed_addr - // CHECK: [[RET:%.*]] = alloca [[A]]*, align 4 // CHECK: [[THIS:%.*]] = alloca [[A]]*, align 4 // CHECK: store [[A]]* {{.*}}, [[A]]** [[THIS]] // CHECK: [[THIS1:%.*]] = load [[A]]** [[THIS]] - // CHECK: store [[A]]* [[THIS1]], [[A]]** [[RET]] // CHECK: call [[A]]* @_ZN5test11AD2Ev( - // CHECK: [[THIS2:%.*]] = load [[A]]** [[RET]] - // CHECK: ret [[A]]* [[THIS2]] + // CHECK: ret [[A]]* [[THIS1]] } // Awkward virtual cases. diff --git a/clang/test/CodeGenObjC/arc.m b/clang/test/CodeGenObjC/arc.m index 8fd1a91cd7de..1a020e2a5291 100644 --- a/clang/test/CodeGenObjC/arc.m +++ b/clang/test/CodeGenObjC/arc.m @@ -13,23 +13,20 @@ void test0(id x) { // CHECK: define i8* @test1(i8* id test1(id x) { - // CHECK: [[RET:%.*]] = alloca i8* - // CHECK-NEXT: [[X:%.*]] = alloca i8* + // CHECK: [[X:%.*]] = alloca i8* // CHECK-NEXT: [[Y:%.*]] = alloca i8* // CHECK-NEXT: alloca i32 // CHECK-NEXT: [[PARM:%.*]] = call i8* @objc_retain(i8* {{%.*}}) // CHECK-NEXT: store i8* [[PARM]], i8** [[X]] // CHECK-NEXT: store i8* null, i8** [[Y]] // CHECK-NEXT: [[T0:%.*]] = load i8** [[Y]] - // CHECK-NEXT: [[T1:%.*]] = call i8* @objc_retain(i8* [[T0]]) - // CHECK-NEXT: store i8* [[T1]], i8** [[RET]] + // CHECK-NEXT: [[RET:%.*]] = call i8* @objc_retain(i8* [[T0]]) // CHECK-NEXT: store i32 // CHECK-NEXT: [[T0:%.*]] = load i8** [[Y]] // CHECK-NEXT: call void @objc_release(i8* [[T0]]) // CHECK-NEXT: [[T1:%.*]] = load i8** [[X]] // CHECK-NEXT: call void @objc_release(i8* [[T1]]) - // CHECK-NEXT: [[T0:%.*]] = load i8** [[RET]] - // CHECK-NEXT: [[T1:%.*]] = call i8* @objc_autoreleaseReturnValue(i8* [[T0]]) + // CHECK-NEXT: [[T1:%.*]] = call i8* @objc_autoreleaseReturnValue(i8* [[RET]]) // CHECK-NEXT: ret i8* [[T1]] id y; return y; @@ -636,8 +633,7 @@ void test22(_Bool cond) { @implementation Test27 - (id) init { return self; } // CHECK: define internal i8* @"\01-[Test27 init]" -// CHECK: [[RET:%.*]] = alloca i8*, -// CHECK-NEXT: [[SELF:%.*]] = alloca [[TEST27:%.*]]*, +// CHECK: [[SELF:%.*]] = alloca [[TEST27:%.*]]*, // CHECK-NEXT: [[CMD:%.*]] = alloca i8*, // CHECK-NEXT: [[DEST:%.*]] = alloca i32 // CHECK-NEXT: store [[TEST27]]* {{%.*}}, [[TEST27]]** [[SELF]] @@ -646,14 +642,12 @@ void test22(_Bool cond) { // CHECK-NEXT: [[T1:%.*]] = bitcast [[TEST27]]* [[T0]] to i8* // CHECK-NEXT: [[T2:%.*]] = call i8* @objc_retain(i8* [[T1]]) // CHECK-NEXT: [[T3:%.*]] = bitcast i8* [[T2]] -// CHECK-NEXT: [[T2:%.*]] = bitcast -// CHECK-NEXT: store i8* [[T2]], i8** [[RET]] +// CHECK-NEXT: [[RET:%.*]] = bitcast // CHECK-NEXT: store i32 {{[0-9]+}}, i32* [[DEST]] // CHECK-NEXT: [[T0:%.*]] = load [[TEST27]]** [[SELF]] // CHECK-NEXT: [[T1:%.*]] = bitcast [[TEST27]]* [[T0]] to i8* // CHECK-NEXT: call void @objc_release(i8* [[T1]]) -// CHECK-NEXT: [[T0:%.*]] = load i8** [[RET]] -// CHECK-NEXT: ret i8* [[T0]] +// CHECK-NEXT: ret i8* [[RET]] @end @@ -684,8 +678,7 @@ void test22(_Bool cond) { static id _test29_allocator = 0; - (id) init { // CHECK: define internal i8* @"\01-[Test29 init]"([[TEST29:%.*]]* {{%.*}}, -// CHECK: [[RET:%.*]] = alloca i8*, align 8 -// CHECK-NEXT: [[SELF:%.*]] = alloca [[TEST29]]*, align 8 +// CHECK: [[SELF:%.*]] = alloca [[TEST29]]*, align 8 // CHECK-NEXT: [[CMD:%.*]] = alloca i8*, align 8 // CHECK-NEXT: [[CLEANUP:%.*]] = alloca i32 // CHECK-NEXT: store [[TEST29]]* {{%.*}}, [[TEST29]]** [[SELF]] @@ -715,8 +708,7 @@ static id _test29_allocator = 0; // CHECK-NEXT: [[CALL:%.*]] = bitcast // CHECK-NEXT: [[T1:%.*]] = call i8* @objc_retain(i8* [[CALL]]) nounwind // CHECK-NEXT: [[T2:%.*]] = bitcast i8* [[T1]] -// CHECK-NEXT: [[T1:%.*]] = bitcast -// CHECK-NEXT: store i8* [[T1]], i8** [[RET]] +// CHECK-NEXT: [[RET:%.*]] = bitcast // CHECK-NEXT: store i32 1, i32* [[CLEANUP]] // Cleanup. @@ -725,14 +717,12 @@ static id _test29_allocator = 0; // CHECK-NEXT: call void @objc_release(i8* [[T1]]) nounwind, !clang.imprecise_release // Return. -// CHECK-NEXT: [[T0:%.*]] = load i8** [[RET]] -// CHECK-NEXT: ret i8* [[T0]] +// CHECK-NEXT: ret i8* [[RET]] return [self initWithAllocator: _test29_allocator]; } - (id) initWithAllocator: (id) allocator { // CHECK: define internal i8* @"\01-[Test29 initWithAllocator:]"( -// CHECK: [[RET:%.*]] = alloca i8*, align 8 -// CHECK-NEXT: [[SELF:%.*]] = alloca [[TEST29]]*, align 8 +// CHECK: [[SELF:%.*]] = alloca [[TEST29]]*, align 8 // CHECK-NEXT: [[CMD:%.*]] = alloca i8*, align 8 // CHECK-NEXT: [[ALLOCATOR:%.*]] = alloca i8*, align 8 // CHECK-NEXT: alloca @@ -774,8 +764,7 @@ static id _test29_allocator = 0; // CHECK-NEXT: [[T1:%.*]] = bitcast [[TEST29]]* [[T0]] to i8* // CHECK-NEXT: [[T2:%.*]] = call i8* @objc_retain(i8* [[T1]]) nounwind // CHECK-NEXT: [[T3:%.*]] = bitcast i8* [[T2]] -// CHECK-NEXT: [[T2:%.*]] = bitcast -// CHECK-NEXT: store i8* [[T2]], i8** [[RET]] +// CHECK-NEXT: [[RET:%.*]] = bitcast // CHECK-NEXT: store i32 1, i32* [[CLEANUP]] // Cleanup. @@ -787,8 +776,7 @@ static id _test29_allocator = 0; // CHECK-NEXT: call void @objc_release(i8* [[T1]]) nounwind, !clang.imprecise_release // Return. -// CHECK-NEXT: [[T0:%.*]] = load i8** [[RET]] -// CHECK-NEXT: ret i8* [[T0]] +// CHECK-NEXT: ret i8* [[RET]] self = [super initWithAllocator: allocator]; return self; } @@ -804,8 +792,7 @@ char *helper; } - (id) init { // CHECK: define internal i8* @"\01-[Test30 init]"([[TEST30:%.*]]* {{%.*}}, -// CHECK: [[RET:%.*]] = alloca i8* -// CHECK-NEXT: [[SELF:%.*]] = alloca [[TEST30]]* +// CHECK: [[RET:%.*]] = alloca [[TEST30]]* // CHECK-NEXT: alloca i8* // CHECK-NEXT: alloca i32 // CHECK-NEXT: store [[TEST30]]* {{%.*}}, [[TEST30]]** [[SELF]] @@ -834,8 +821,7 @@ char *helper; // CHECK-NEXT: [[T1:%.*]] = bitcast [[TEST30]]* [[T0]] to i8* // CHECK-NEXT: [[T2:%.*]] = call i8* @objc_retain(i8* [[T1]]) // CHECK-NEXT: [[T3:%.*]] = bitcast i8* [[T2]] -// CHECK-NEXT: [[T2:%.*]] = bitcast -// CHECK-NEXT: store i8* [[T2]], i8** [[RET]] +// CHECK-NEXT: [[RET:%.*]] = bitcast // CHECK-NEXT: store i32 1 // Cleanup. @@ -844,8 +830,7 @@ char *helper; // CHECK-NEXT: call void @objc_release(i8* [[T1]]) // Epilogue. -// CHECK-NEXT: [[T0:%.*]] = load i8** [[RET]] -// CHECK-NEXT: ret i8* [[T0]] +// CHECK-NEXT: ret i8* [[RET]] self->helper = [self initHelper]; return self; }