[ARM] Fix lowering of misaligned memcpy/memset

Currently getOptimalMemOpType returns i32 for large enough sizes without
checking for alignment, leading to poor code generation when misaligned accesses
aren't permitted as we generate a word store then later split it up into byte
stores. This means we inadvertantly go over the MaxStoresPerMemcpy limit and for
memset we splat the memset value into a word then immediately split it up
again.

Fix this by leaving it up to FindOptimalMemOpLowering to figure out which type
to use, but also fix a bug there where it wasn't correctly checking if
misaligned memory accesses are allowed.

Differential Revision: https://reviews.llvm.org/D33442

llvm-svn: 303990
This commit is contained in:
John Brawn 2017-05-26 13:59:12 +00:00
parent ba9d8ba82a
commit 9009d2905d
4 changed files with 62 additions and 21 deletions

View File

@ -4779,23 +4779,23 @@ static bool FindOptimalMemOpLowering(std::vector<EVT> &MemOps,
DAG.getMachineFunction()); DAG.getMachineFunction());
if (VT == MVT::Other) { if (VT == MVT::Other) {
if (DstAlign >= DAG.getDataLayout().getPointerPrefAlignment(DstAS) || // Use the largest integer type whose alignment constraints are satisfied.
TLI.allowsMisalignedMemoryAccesses(VT, DstAS, DstAlign)) { // We only need to check DstAlign here as SrcAlign is always greater or
VT = TLI.getPointerTy(DAG.getDataLayout(), DstAS); // equal to DstAlign (or zero).
} else { VT = MVT::i64;
switch (DstAlign & 7) { while (DstAlign && DstAlign < VT.getSizeInBits() / 8 &&
case 0: VT = MVT::i64; break; !TLI.allowsMisalignedMemoryAccesses(VT, DstAS, DstAlign))
case 4: VT = MVT::i32; break; VT = (MVT::SimpleValueType)(VT.getSimpleVT().SimpleTy - 1);
case 2: VT = MVT::i16; break; assert(VT.isInteger());
default: VT = MVT::i8; break;
}
}
// Find the largest legal integer type.
MVT LVT = MVT::i64; MVT LVT = MVT::i64;
while (!TLI.isTypeLegal(LVT)) while (!TLI.isTypeLegal(LVT))
LVT = (MVT::SimpleValueType)(LVT.SimpleTy - 1); LVT = (MVT::SimpleValueType)(LVT.SimpleTy - 1);
assert(LVT.isInteger()); assert(LVT.isInteger());
// If the type we've chosen is larger than the largest legal integer type
// then use that instead.
if (VT.bitsGT(LVT)) if (VT.bitsGT(LVT))
VT = LVT; VT = LVT;
} }

View File

@ -12147,12 +12147,6 @@ EVT ARMTargetLowering::getOptimalMemOpType(uint64_t Size,
} }
} }
// Lowering to i32/i16 if the size permits.
if (Size >= 4)
return MVT::i32;
else if (Size >= 2)
return MVT::i16;
// Let the target-independent logic figure it out. // Let the target-independent logic figure it out.
return MVT::Other; return MVT::Other;
} }

View File

@ -95,10 +95,7 @@ entry:
; CHECK: movt [[REG7:r[0-9]+]], #22866 ; CHECK: movt [[REG7:r[0-9]+]], #22866
; CHECK: str [[REG7]] ; CHECK: str [[REG7]]
; CHECK-T1-LABEL: t5: ; CHECK-T1-LABEL: t5:
; CHECK-T1: movs [[TREG3:r[0-9]]], ; CHECK-T1: bl _memcpy
; CHECK-T1: strb [[TREG3]],
; CHECK-T1: movs [[TREG4:r[0-9]]],
; CHECK-T1: strb [[TREG4]],
tail call void @llvm.memcpy.p0i8.p0i8.i64(i8* %C, i8* getelementptr inbounds ([7 x i8], [7 x i8]* @.str5, i64 0, i64 0), i64 7, i32 1, i1 false) tail call void @llvm.memcpy.p0i8.p0i8.i64(i8* %C, i8* getelementptr inbounds ([7 x i8], [7 x i8]* @.str5, i64 0, i64 0), i64 7, i32 1, i1 false)
ret void ret void
} }

View File

@ -38,6 +38,56 @@ entry:
ret void ret void
} }
define void @t3(i8* %p) {
entry:
; CHECK-7A-LABEL: t3:
; CHECK-7A: muls [[REG:r[0-9]+]],
; CHECK-7A: str [[REG]],
; CHECK-6M-LABEL: t3:
; CHECK-6M-NOT: muls
; CHECK-6M: strb [[REG:r[0-9]+]],
; CHECK-6M: strb [[REG]],
; CHECK-6M: strb [[REG]],
; CHECK-6M: strb [[REG]],
br label %for.body
for.body:
%i = phi i32 [ 0, %entry ], [ %inc, %for.body ]
%0 = trunc i32 %i to i8
call void @llvm.memset.p0i8.i32(i8* %p, i8 %0, i32 4, i32 1, i1 false)
call void @something(i8* %p)
%inc = add nuw nsw i32 %i, 1
%exitcond = icmp eq i32 %inc, 255
br i1 %exitcond, label %for.end, label %for.body
for.end:
ret void
}
define void @t4(i8* %p) {
entry:
; CHECK-7A-LABEL: t4:
; CHECK-7A: muls [[REG:r[0-9]+]],
; CHECK-7A: str [[REG]],
; CHECK-6M-LABEL: t4:
; CHECK-6M: muls [[REG:r[0-9]+]],
; CHECK-6M: strh [[REG]],
; CHECK-6M: strh [[REG]],
br label %for.body
for.body:
%i = phi i32 [ 0, %entry ], [ %inc, %for.body ]
%0 = trunc i32 %i to i8
call void @llvm.memset.p0i8.i32(i8* %p, i8 %0, i32 4, i32 2, i1 false)
call void @something(i8* %p)
%inc = add nuw nsw i32 %i, 1
%exitcond = icmp eq i32 %inc, 255
br i1 %exitcond, label %for.end, label %for.body
for.end:
ret void
}
declare void @something(i8*) nounwind declare void @something(i8*) nounwind
declare void @llvm.memset.p0i8.i32(i8* nocapture, i8, i32, i32, i1) nounwind declare void @llvm.memset.p0i8.i32(i8* nocapture, i8, i32, i32, i1) nounwind
declare void @llvm.memset.p0i8.i64(i8* nocapture, i8, i64, i32, i1) nounwind declare void @llvm.memset.p0i8.i64(i8* nocapture, i8, i64, i32, i1) nounwind