forked from OSchip/llvm-project
[InstCombine] mempcpy(d,s,n) to memcpy(d,s,n) + n
Summary: Back-end currently expands mempcpy, but middle-end should work with memcpy instead of mempcpy to enable more memcpy-optimization. GCC backend emits mempcpy, so LLVM backend could form it too, if we know mempcpy libcall is better than memcpy + n. https://godbolt.org/z/dOCG96 Reviewers: efriedma, spatel, craig.topper, RKSimon, jdoerfert Reviewed By: efriedma Subscribers: hjl.tools, llvm-commits Tags: #llvm Differential Revision: https://reviews.llvm.org/D65737 llvm-svn: 370593
This commit is contained in:
parent
f8d1d00190
commit
ff0ad3c43d
|
@ -167,6 +167,7 @@ private:
|
|||
Value *optimizeMemCmp(CallInst *CI, IRBuilder<> &B);
|
||||
Value *optimizeBCmp(CallInst *CI, IRBuilder<> &B);
|
||||
Value *optimizeMemCmpBCmpCommon(CallInst *CI, IRBuilder<> &B);
|
||||
Value *optimizeMemPCpy(CallInst *CI, IRBuilder<> &B);
|
||||
Value *optimizeMemCpy(CallInst *CI, IRBuilder<> &B, bool isIntrinsic = false);
|
||||
Value *optimizeMemMove(CallInst *CI, IRBuilder<> &B, bool isIntrinsic = false);
|
||||
Value *optimizeMemSet(CallInst *CI, IRBuilder<> &B, bool isIntrinsic = false);
|
||||
|
|
|
@ -998,6 +998,15 @@ Value *LibCallSimplifier::optimizeMemCpy(CallInst *CI, IRBuilder<> &B,
|
|||
return CI->getArgOperand(0);
|
||||
}
|
||||
|
||||
Value *LibCallSimplifier::optimizeMemPCpy(CallInst *CI, IRBuilder<> &B) {
|
||||
Value *Dst = CI->getArgOperand(0);
|
||||
Value *N = CI->getArgOperand(2);
|
||||
// mempcpy(x, y, n) -> llvm.memcpy(align 1 x, align 1 y, n), x + n
|
||||
CallInst *NewCI = B.CreateMemCpy(Dst, 1, CI->getArgOperand(1), 1, N);
|
||||
NewCI->setAttributes(CI->getAttributes());
|
||||
return B.CreateInBoundsGEP(B.getInt8Ty(), Dst, N);
|
||||
}
|
||||
|
||||
Value *LibCallSimplifier::optimizeMemMove(CallInst *CI, IRBuilder<> &B, bool isIntrinsic) {
|
||||
Value *Size = CI->getArgOperand(2);
|
||||
if (ConstantInt *LenC = dyn_cast<ConstantInt>(Size))
|
||||
|
@ -2624,6 +2633,8 @@ Value *LibCallSimplifier::optimizeStringMemoryLibCall(CallInst *CI,
|
|||
return optimizeMemCmp(CI, Builder);
|
||||
case LibFunc_memcpy:
|
||||
return optimizeMemCpy(CI, Builder);
|
||||
case LibFunc_mempcpy:
|
||||
return optimizeMemPCpy(CI, Builder);
|
||||
case LibFunc_memmove:
|
||||
return optimizeMemMove(CI, Builder);
|
||||
case LibFunc_memset:
|
||||
|
|
|
@ -3,17 +3,41 @@
|
|||
|
||||
define i8* @memcpy_nonconst_n(i8* %d, i8* nocapture readonly %s, i64 %n) {
|
||||
; CHECK-LABEL: @memcpy_nonconst_n(
|
||||
; CHECK-NEXT: [[R:%.*]] = tail call i8* @mempcpy(i8* [[D:%.*]], i8* [[S:%.*]], i64 [[N:%.*]])
|
||||
; CHECK-NEXT: ret i8* [[R]]
|
||||
; CHECK-NEXT: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 1 [[D:%.*]], i8* align 1 [[S:%.*]], i64 [[N:%.*]], i1 false)
|
||||
; CHECK-NEXT: [[TMP1:%.*]] = getelementptr inbounds i8, i8* [[D]], i64 [[N]]
|
||||
; CHECK-NEXT: ret i8* [[TMP1]]
|
||||
;
|
||||
%r = tail call i8* @mempcpy(i8* %d, i8* %s, i64 %n)
|
||||
ret i8* %r
|
||||
}
|
||||
|
||||
define i8* @memcpy_nonconst_n_copy_attrs(i8* %d, i8* nocapture readonly %s, i64 %n) {
|
||||
; CHECK-LABEL: @memcpy_nonconst_n_copy_attrs(
|
||||
; CHECK-NEXT: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 1 dereferenceable(16) [[D:%.*]], i8* align 1 [[S:%.*]], i64 [[N:%.*]], i1 false)
|
||||
; CHECK-NEXT: [[TMP1:%.*]] = getelementptr inbounds i8, i8* [[D]], i64 [[N]]
|
||||
; CHECK-NEXT: ret i8* [[TMP1]]
|
||||
;
|
||||
%r = tail call i8* @mempcpy(i8* dereferenceable(16) %d, i8* %s, i64 %n)
|
||||
ret i8* %r
|
||||
}
|
||||
|
||||
define void @memcpy_nonconst_n_unused_retval(i8* %d, i8* nocapture readonly %s, i64 %n) {
|
||||
; CHECK-LABEL: @memcpy_nonconst_n_unused_retval(
|
||||
; CHECK-NEXT: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 1 [[D:%.*]], i8* align 1 [[S:%.*]], i64 [[N:%.*]], i1 false)
|
||||
; CHECK-NEXT: ret void
|
||||
;
|
||||
call i8* @mempcpy(i8* %d, i8* %s, i64 %n)
|
||||
ret void
|
||||
}
|
||||
|
||||
define i8* @memcpy_small_const_n(i8* %d, i8* nocapture readonly %s) {
|
||||
; CHECK-LABEL: @memcpy_small_const_n(
|
||||
; CHECK-NEXT: [[R:%.*]] = tail call i8* @mempcpy(i8* [[D:%.*]], i8* [[S:%.*]], i64 8)
|
||||
; CHECK-NEXT: ret i8* [[R]]
|
||||
; CHECK-NEXT: [[TMP1:%.*]] = bitcast i8* [[S:%.*]] to i64*
|
||||
; CHECK-NEXT: [[TMP2:%.*]] = bitcast i8* [[D:%.*]] to i64*
|
||||
; CHECK-NEXT: [[TMP3:%.*]] = load i64, i64* [[TMP1]], align 1
|
||||
; CHECK-NEXT: store i64 [[TMP3]], i64* [[TMP2]], align 1
|
||||
; CHECK-NEXT: [[TMP4:%.*]] = getelementptr inbounds i8, i8* [[D]], i64 8
|
||||
; CHECK-NEXT: ret i8* [[TMP4]]
|
||||
;
|
||||
%r = tail call i8* @mempcpy(i8* %d, i8* %s, i64 8)
|
||||
ret i8* %r
|
||||
|
@ -21,8 +45,9 @@ define i8* @memcpy_small_const_n(i8* %d, i8* nocapture readonly %s) {
|
|||
|
||||
define i8* @memcpy_big_const_n(i8* %d, i8* nocapture readonly %s) {
|
||||
; CHECK-LABEL: @memcpy_big_const_n(
|
||||
; CHECK-NEXT: [[R:%.*]] = tail call i8* @mempcpy(i8* [[D:%.*]], i8* [[S:%.*]], i64 1024)
|
||||
; CHECK-NEXT: ret i8* [[R]]
|
||||
; CHECK-NEXT: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 1 dereferenceable(1024) [[D:%.*]], i8* align 1 dereferenceable(1024) [[S:%.*]], i64 1024, i1 false)
|
||||
; CHECK-NEXT: [[TMP1:%.*]] = getelementptr inbounds i8, i8* [[D]], i64 1024
|
||||
; CHECK-NEXT: ret i8* [[TMP1]]
|
||||
;
|
||||
%r = tail call i8* @mempcpy(i8* %d, i8* %s, i64 1024)
|
||||
ret i8* %r
|
||||
|
|
Loading…
Reference in New Issue