From ff0ad3c43d44e36d20787cfe1b4e4404de4c7cff Mon Sep 17 00:00:00 2001 From: David Bolvansky Date: Sat, 31 Aug 2019 18:19:05 +0000 Subject: [PATCH] [InstCombine] mempcpy(d,s,n) to memcpy(d,s,n) + n Summary: Back-end currently expands mempcpy, but middle-end should work with memcpy instead of mempcpy to enable more memcpy-optimization. GCC backend emits mempcpy, so LLVM backend could form it too, if we know mempcpy libcall is better than memcpy + n. https://godbolt.org/z/dOCG96 Reviewers: efriedma, spatel, craig.topper, RKSimon, jdoerfert Reviewed By: efriedma Subscribers: hjl.tools, llvm-commits Tags: #llvm Differential Revision: https://reviews.llvm.org/D65737 llvm-svn: 370593 --- .../llvm/Transforms/Utils/SimplifyLibCalls.h | 1 + .../lib/Transforms/Utils/SimplifyLibCalls.cpp | 11 ++++++ llvm/test/Transforms/InstCombine/mempcpy.ll | 37 ++++++++++++++++--- 3 files changed, 43 insertions(+), 6 deletions(-) diff --git a/llvm/include/llvm/Transforms/Utils/SimplifyLibCalls.h b/llvm/include/llvm/Transforms/Utils/SimplifyLibCalls.h index b7e6224f8556..7fff3e63b31f 100644 --- a/llvm/include/llvm/Transforms/Utils/SimplifyLibCalls.h +++ b/llvm/include/llvm/Transforms/Utils/SimplifyLibCalls.h @@ -167,6 +167,7 @@ private: Value *optimizeMemCmp(CallInst *CI, IRBuilder<> &B); Value *optimizeBCmp(CallInst *CI, IRBuilder<> &B); Value *optimizeMemCmpBCmpCommon(CallInst *CI, IRBuilder<> &B); + Value *optimizeMemPCpy(CallInst *CI, IRBuilder<> &B); Value *optimizeMemCpy(CallInst *CI, IRBuilder<> &B, bool isIntrinsic = false); Value *optimizeMemMove(CallInst *CI, IRBuilder<> &B, bool isIntrinsic = false); Value *optimizeMemSet(CallInst *CI, IRBuilder<> &B, bool isIntrinsic = false); diff --git a/llvm/lib/Transforms/Utils/SimplifyLibCalls.cpp b/llvm/lib/Transforms/Utils/SimplifyLibCalls.cpp index 7ff1f4aef237..1f397c0dedbf 100644 --- a/llvm/lib/Transforms/Utils/SimplifyLibCalls.cpp +++ b/llvm/lib/Transforms/Utils/SimplifyLibCalls.cpp @@ -998,6 +998,15 @@ Value *LibCallSimplifier::optimizeMemCpy(CallInst *CI, IRBuilder<> &B, return CI->getArgOperand(0); } +Value *LibCallSimplifier::optimizeMemPCpy(CallInst *CI, IRBuilder<> &B) { + Value *Dst = CI->getArgOperand(0); + Value *N = CI->getArgOperand(2); + // mempcpy(x, y, n) -> llvm.memcpy(align 1 x, align 1 y, n), x + n + CallInst *NewCI = B.CreateMemCpy(Dst, 1, CI->getArgOperand(1), 1, N); + NewCI->setAttributes(CI->getAttributes()); + return B.CreateInBoundsGEP(B.getInt8Ty(), Dst, N); +} + Value *LibCallSimplifier::optimizeMemMove(CallInst *CI, IRBuilder<> &B, bool isIntrinsic) { Value *Size = CI->getArgOperand(2); if (ConstantInt *LenC = dyn_cast(Size)) @@ -2624,6 +2633,8 @@ Value *LibCallSimplifier::optimizeStringMemoryLibCall(CallInst *CI, return optimizeMemCmp(CI, Builder); case LibFunc_memcpy: return optimizeMemCpy(CI, Builder); + case LibFunc_mempcpy: + return optimizeMemPCpy(CI, Builder); case LibFunc_memmove: return optimizeMemMove(CI, Builder); case LibFunc_memset: diff --git a/llvm/test/Transforms/InstCombine/mempcpy.ll b/llvm/test/Transforms/InstCombine/mempcpy.ll index ce372d8f37d7..7aa6a31a61ac 100644 --- a/llvm/test/Transforms/InstCombine/mempcpy.ll +++ b/llvm/test/Transforms/InstCombine/mempcpy.ll @@ -3,17 +3,41 @@ define i8* @memcpy_nonconst_n(i8* %d, i8* nocapture readonly %s, i64 %n) { ; CHECK-LABEL: @memcpy_nonconst_n( -; CHECK-NEXT: [[R:%.*]] = tail call i8* @mempcpy(i8* [[D:%.*]], i8* [[S:%.*]], i64 [[N:%.*]]) -; CHECK-NEXT: ret i8* [[R]] +; CHECK-NEXT: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 1 [[D:%.*]], i8* align 1 [[S:%.*]], i64 [[N:%.*]], i1 false) +; CHECK-NEXT: [[TMP1:%.*]] = getelementptr inbounds i8, i8* [[D]], i64 [[N]] +; CHECK-NEXT: ret i8* [[TMP1]] ; %r = tail call i8* @mempcpy(i8* %d, i8* %s, i64 %n) ret i8* %r } +define i8* @memcpy_nonconst_n_copy_attrs(i8* %d, i8* nocapture readonly %s, i64 %n) { +; CHECK-LABEL: @memcpy_nonconst_n_copy_attrs( +; CHECK-NEXT: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 1 dereferenceable(16) [[D:%.*]], i8* align 1 [[S:%.*]], i64 [[N:%.*]], i1 false) +; CHECK-NEXT: [[TMP1:%.*]] = getelementptr inbounds i8, i8* [[D]], i64 [[N]] +; CHECK-NEXT: ret i8* [[TMP1]] +; + %r = tail call i8* @mempcpy(i8* dereferenceable(16) %d, i8* %s, i64 %n) + ret i8* %r +} + +define void @memcpy_nonconst_n_unused_retval(i8* %d, i8* nocapture readonly %s, i64 %n) { +; CHECK-LABEL: @memcpy_nonconst_n_unused_retval( +; CHECK-NEXT: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 1 [[D:%.*]], i8* align 1 [[S:%.*]], i64 [[N:%.*]], i1 false) +; CHECK-NEXT: ret void +; + call i8* @mempcpy(i8* %d, i8* %s, i64 %n) + ret void +} + define i8* @memcpy_small_const_n(i8* %d, i8* nocapture readonly %s) { ; CHECK-LABEL: @memcpy_small_const_n( -; CHECK-NEXT: [[R:%.*]] = tail call i8* @mempcpy(i8* [[D:%.*]], i8* [[S:%.*]], i64 8) -; CHECK-NEXT: ret i8* [[R]] +; CHECK-NEXT: [[TMP1:%.*]] = bitcast i8* [[S:%.*]] to i64* +; CHECK-NEXT: [[TMP2:%.*]] = bitcast i8* [[D:%.*]] to i64* +; CHECK-NEXT: [[TMP3:%.*]] = load i64, i64* [[TMP1]], align 1 +; CHECK-NEXT: store i64 [[TMP3]], i64* [[TMP2]], align 1 +; CHECK-NEXT: [[TMP4:%.*]] = getelementptr inbounds i8, i8* [[D]], i64 8 +; CHECK-NEXT: ret i8* [[TMP4]] ; %r = tail call i8* @mempcpy(i8* %d, i8* %s, i64 8) ret i8* %r @@ -21,8 +45,9 @@ define i8* @memcpy_small_const_n(i8* %d, i8* nocapture readonly %s) { define i8* @memcpy_big_const_n(i8* %d, i8* nocapture readonly %s) { ; CHECK-LABEL: @memcpy_big_const_n( -; CHECK-NEXT: [[R:%.*]] = tail call i8* @mempcpy(i8* [[D:%.*]], i8* [[S:%.*]], i64 1024) -; CHECK-NEXT: ret i8* [[R]] +; CHECK-NEXT: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 1 dereferenceable(1024) [[D:%.*]], i8* align 1 dereferenceable(1024) [[S:%.*]], i64 1024, i1 false) +; CHECK-NEXT: [[TMP1:%.*]] = getelementptr inbounds i8, i8* [[D]], i64 1024 +; CHECK-NEXT: ret i8* [[TMP1]] ; %r = tail call i8* @mempcpy(i8* %d, i8* %s, i64 1024) ret i8* %r