forked from OSchip/llvm-project
Teach valuetracking that byval arguments with a specified alignment are
aligned. Teach memcpyopt to not give up all hope when confonted with an underaligned memcpy feeding an overaligned byval. If the *source* of the memcpy can be determined to be adequeately aligned, or if it can be forced to be, we can eliminate the memcpy. This addresses PR9794. We now compile the example into: define i32 @f(%struct.p* nocapture byval align 8 %q) nounwind ssp { entry: %call = call i32 @g(%struct.p* byval align 8 %q) nounwind ret i32 %call } in both x86-64 and x86-32 mode. We still don't get a tailcall though, because tailcalls apparently can't handle byval. llvm-svn: 131884
This commit is contained in:
parent
4d37d99798
commit
83791ced7b
|
@ -131,8 +131,18 @@ void llvm::ComputeMaskedBits(Value *V, const APInt &Mask,
|
|||
}
|
||||
return;
|
||||
}
|
||||
|
||||
if (Argument *A = dyn_cast<Argument>(V)) {
|
||||
// Get alignment information off byval arguments if specified in the IR.
|
||||
if (A->hasByValAttr())
|
||||
if (unsigned Align = A->getParamAlignment())
|
||||
KnownZero = Mask & APInt::getLowBitsSet(BitWidth,
|
||||
CountTrailingZeros_32(Align));
|
||||
return;
|
||||
}
|
||||
|
||||
KnownZero.clearAllBits(); KnownOne.clearAllBits(); // Start out not knowing anything.
|
||||
// Start out not knowing anything.
|
||||
KnownZero.clearAllBits(); KnownOne.clearAllBits();
|
||||
|
||||
if (Depth == MaxDepth || Mask == 0)
|
||||
return; // Limit search depth.
|
||||
|
|
|
@ -23,6 +23,7 @@
|
|||
#include "llvm/Analysis/AliasAnalysis.h"
|
||||
#include "llvm/Analysis/MemoryDependenceAnalysis.h"
|
||||
#include "llvm/Analysis/ValueTracking.h"
|
||||
#include "llvm/Transforms/Utils/Local.h"
|
||||
#include "llvm/Support/Debug.h"
|
||||
#include "llvm/Support/GetElementPtrTypeIterator.h"
|
||||
#include "llvm/Support/IRBuilder.h"
|
||||
|
@ -866,12 +867,16 @@ bool MemCpyOpt::processByValArgument(CallSite CS, unsigned ArgNo) {
|
|||
if (C1 == 0 || C1->getValue().getZExtValue() < ByValSize)
|
||||
return false;
|
||||
|
||||
// Get the alignment of the byval. If it is greater than the memcpy, then we
|
||||
// can't do the substitution. If the call doesn't specify the alignment, then
|
||||
// it is some target specific value that we can't know.
|
||||
// Get the alignment of the byval. If the call doesn't specify the alignment,
|
||||
// then it is some target specific value that we can't know.
|
||||
unsigned ByValAlign = CS.getParamAlignment(ArgNo+1);
|
||||
if (ByValAlign == 0 || MDep->getAlignment() < ByValAlign)
|
||||
return false;
|
||||
if (ByValAlign == 0) return false;
|
||||
|
||||
// If it is greater than the memcpy, then we check to see if we can force the
|
||||
// source of the memcpy to the alignment we need. If we fail, we bail out.
|
||||
if (MDep->getAlignment() < ByValAlign &&
|
||||
getOrEnforceKnownAlignment(MDep->getSource(),ByValAlign, TD) < ByValAlign)
|
||||
return false;
|
||||
|
||||
// Verify that the copied-from memory doesn't change in between the memcpy and
|
||||
// the byval call.
|
||||
|
|
|
@ -109,3 +109,23 @@ define void @test6(i8 *%P) {
|
|||
; CHECK-NEXT: ret void
|
||||
}
|
||||
|
||||
|
||||
; PR9794 - Should forward memcpy into byval argument even though the memcpy
|
||||
; isn't itself 8 byte aligned.
|
||||
%struct.p = type { i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32 }
|
||||
|
||||
define i32 @test7(%struct.p* nocapture byval align 8 %q) nounwind ssp {
|
||||
entry:
|
||||
%agg.tmp = alloca %struct.p, align 4
|
||||
%tmp = bitcast %struct.p* %agg.tmp to i8*
|
||||
%tmp1 = bitcast %struct.p* %q to i8*
|
||||
call void @llvm.memcpy.p0i8.p0i8.i64(i8* %tmp, i8* %tmp1, i64 48, i32 4, i1 false)
|
||||
%call = call i32 @g(%struct.p* byval align 8 %agg.tmp) nounwind
|
||||
ret i32 %call
|
||||
; CHECK: @test7
|
||||
; CHECK: call i32 @g(%struct.p* byval align 8 %q) nounwind
|
||||
}
|
||||
|
||||
declare i32 @g(%struct.p* byval align 8)
|
||||
|
||||
|
||||
|
|
Loading…
Reference in New Issue