forked from OSchip/llvm-project
- Avoid using floating point stores to implement memset unless the value is zero.
- Do not try to infer GV alignment unless its type is sized. It's not possible to infer alignment if it has opaque type. llvm-svn: 100118
This commit is contained in:
parent
1a55ef0427
commit
4c014c892a
|
@ -638,7 +638,7 @@ public:
|
|||
/// determining it.
|
||||
virtual EVT getOptimalMemOpType(uint64_t Size,
|
||||
unsigned DstAlign, unsigned SrcAlign,
|
||||
SelectionDAG &DAG) const {
|
||||
bool SafeToUseFP, SelectionDAG &DAG) const {
|
||||
return MVT::Other;
|
||||
}
|
||||
|
||||
|
|
|
@ -3195,9 +3195,9 @@ static bool isMemSrcFromString(SDValue Src, std::string &Str) {
|
|||
/// is below the threshold. It returns the types of the sequence of
|
||||
/// memory ops to perform memset / memcpy by reference.
|
||||
static bool FindOptimalMemOpLowering(std::vector<EVT> &MemOps,
|
||||
SDValue Dst, SDValue Src,
|
||||
unsigned Limit, uint64_t Size,
|
||||
unsigned DstAlign, unsigned SrcAlign,
|
||||
bool SafeToUseFP,
|
||||
SelectionDAG &DAG,
|
||||
const TargetLowering &TLI) {
|
||||
assert((SrcAlign == 0 || SrcAlign >= DstAlign) &&
|
||||
|
@ -3207,7 +3207,7 @@ static bool FindOptimalMemOpLowering(std::vector<EVT> &MemOps,
|
|||
// the inferred alignment of the source. 'DstAlign', on the other hand, is the
|
||||
// specified alignment of the memory operation. If it is zero, that means
|
||||
// it's possible to change the alignment of the destination.
|
||||
EVT VT = TLI.getOptimalMemOpType(Size, DstAlign, SrcAlign, DAG);
|
||||
EVT VT = TLI.getOptimalMemOpType(Size, DstAlign, SrcAlign, SafeToUseFP, DAG);
|
||||
|
||||
if (VT == MVT::Other) {
|
||||
VT = TLI.getPointerTy();
|
||||
|
@ -3285,9 +3285,9 @@ static SDValue getMemcpyLoadsAndStores(SelectionDAG &DAG, DebugLoc dl,
|
|||
std::string Str;
|
||||
bool CopyFromStr = isMemSrcFromString(Src, Str);
|
||||
bool isZeroStr = CopyFromStr && Str.empty();
|
||||
if (!FindOptimalMemOpLowering(MemOps, Dst, Src, Limit, Size,
|
||||
if (!FindOptimalMemOpLowering(MemOps, Limit, Size,
|
||||
(DstAlignCanChange ? 0 : Align),
|
||||
(isZeroStr ? 0 : SrcAlign), DAG, TLI))
|
||||
(isZeroStr ? 0 : SrcAlign), true, DAG, TLI))
|
||||
return SDValue();
|
||||
|
||||
if (DstAlignCanChange) {
|
||||
|
@ -3369,9 +3369,9 @@ static SDValue getMemmoveLoadsAndStores(SelectionDAG &DAG, DebugLoc dl,
|
|||
if (Align > SrcAlign)
|
||||
SrcAlign = Align;
|
||||
|
||||
if (!FindOptimalMemOpLowering(MemOps, Dst, Src, Limit, Size,
|
||||
if (!FindOptimalMemOpLowering(MemOps, Limit, Size,
|
||||
(DstAlignCanChange ? 0 : Align),
|
||||
SrcAlign, DAG, TLI))
|
||||
SrcAlign, true, DAG, TLI))
|
||||
return SDValue();
|
||||
|
||||
if (DstAlignCanChange) {
|
||||
|
@ -3436,9 +3436,11 @@ static SDValue getMemsetStores(SelectionDAG &DAG, DebugLoc dl,
|
|||
FrameIndexSDNode *FI = dyn_cast<FrameIndexSDNode>(Dst);
|
||||
if (FI && !MFI->isFixedObjectIndex(FI->getIndex()))
|
||||
DstAlignCanChange = true;
|
||||
if (!FindOptimalMemOpLowering(MemOps, Dst, Src, TLI.getMaxStoresPerMemset(),
|
||||
bool IsZero = isa<ConstantSDNode>(Src) &&
|
||||
cast<ConstantSDNode>(Src)->isNullValue();
|
||||
if (!FindOptimalMemOpLowering(MemOps, TLI.getMaxStoresPerMemset(),
|
||||
Size, (DstAlignCanChange ? 0 : Align), 0,
|
||||
DAG, TLI))
|
||||
IsZero, DAG, TLI))
|
||||
return SDValue();
|
||||
|
||||
if (DstAlignCanChange) {
|
||||
|
@ -6150,8 +6152,10 @@ unsigned SelectionDAG::InferPtrAlignment(SDValue Ptr) const {
|
|||
unsigned Align = GV->getAlignment();
|
||||
if (!Align) {
|
||||
if (GlobalVariable *GVar = dyn_cast<GlobalVariable>(GV)) {
|
||||
const TargetData *TD = TLI.getTargetData();
|
||||
Align = TD->getPreferredAlignment(GVar);
|
||||
if (GV->getType()->getElementType()->isSized()) {
|
||||
const TargetData *TD = TLI.getTargetData();
|
||||
Align = TD->getPreferredAlignment(GVar);
|
||||
}
|
||||
}
|
||||
}
|
||||
return MinAlign(Align, GVOffset);
|
||||
|
|
|
@ -5541,6 +5541,7 @@ PPCTargetLowering::isOffsetFoldingLegal(const GlobalAddressSDNode *GA) const {
|
|||
|
||||
EVT PPCTargetLowering::getOptimalMemOpType(uint64_t Size,
|
||||
unsigned DstAlign, unsigned SrcAlign,
|
||||
bool SafeToUseFP,
|
||||
SelectionDAG &DAG) const {
|
||||
if (this->PPCSubTarget.isPPC64()) {
|
||||
return MVT::i64;
|
||||
|
|
|
@ -349,7 +349,7 @@ namespace llvm {
|
|||
|
||||
virtual EVT getOptimalMemOpType(uint64_t Size,
|
||||
unsigned DstAlign, unsigned SrcAlign,
|
||||
SelectionDAG &DAG) const;
|
||||
bool SafeToUseFP, SelectionDAG &DAG) const;
|
||||
|
||||
/// getFunctionAlignment - Return the Log2 alignment of this function.
|
||||
virtual unsigned getFunctionAlignment(const Function *F) const;
|
||||
|
|
|
@ -1076,6 +1076,7 @@ unsigned X86TargetLowering::getByValTypeAlignment(const Type *Ty) const {
|
|||
EVT
|
||||
X86TargetLowering::getOptimalMemOpType(uint64_t Size,
|
||||
unsigned DstAlign, unsigned SrcAlign,
|
||||
bool SafeToUseFP,
|
||||
SelectionDAG &DAG) const {
|
||||
// FIXME: This turns off use of xmm stores for memset/memcpy on targets like
|
||||
// linux. This is because the stack realignment code can't handle certain
|
||||
|
@ -1089,9 +1090,10 @@ X86TargetLowering::getOptimalMemOpType(uint64_t Size,
|
|||
Subtarget->getStackAlignment() >= 16) {
|
||||
if (Subtarget->hasSSE2())
|
||||
return MVT::v4i32;
|
||||
if (Subtarget->hasSSE1())
|
||||
if (SafeToUseFP && Subtarget->hasSSE1())
|
||||
return MVT::v4f32;
|
||||
} else if (Size >= 8 &&
|
||||
} else if (SafeToUseFP &&
|
||||
Size >= 8 &&
|
||||
Subtarget->getStackAlignment() >= 8 &&
|
||||
Subtarget->hasSSE2())
|
||||
return MVT::f64;
|
||||
|
|
|
@ -425,7 +425,7 @@ namespace llvm {
|
|||
/// determining it.
|
||||
virtual EVT getOptimalMemOpType(uint64_t Size,
|
||||
unsigned DstAlign, unsigned SrcAlign,
|
||||
SelectionDAG &DAG) const;
|
||||
bool SafeToUseFP, SelectionDAG &DAG) const;
|
||||
|
||||
/// allowsUnalignedMemoryAccesses - Returns true if the target allows
|
||||
/// unaligned memory accesses. of the specified type.
|
||||
|
|
|
@ -4,10 +4,18 @@ target triple = "i386"
|
|||
|
||||
declare void @llvm.memset.i32(i8*, i8, i32, i32) nounwind
|
||||
|
||||
define fastcc void @t() nounwind {
|
||||
define fastcc void @t1() nounwind {
|
||||
entry:
|
||||
; CHECK: t:
|
||||
; CHECK: t1:
|
||||
; CHECK: call memset
|
||||
call void @llvm.memset.i32( i8* null, i8 0, i32 188, i32 1 ) nounwind
|
||||
unreachable
|
||||
}
|
||||
|
||||
define fastcc void @t2(i8 signext %c) nounwind {
|
||||
entry:
|
||||
; CHECK: t2:
|
||||
; CHECK: call memset
|
||||
call void @llvm.memset.i32( i8* undef, i8 %c, i32 76, i32 1 ) nounwind
|
||||
unreachable
|
||||
}
|
||||
|
|
Loading…
Reference in New Issue