[LoopIdiom] Be more aggressive when setting alignment in memcpy

Summary:
This change is part of step five in the series of changes to remove alignment argument from
memcpy/memmove/memset in favour of alignment attributes. In particular, this changes the
LoopIdiom pass to cease using the old IRBuilder CreateMemCpy single-alignment APIs in
favour of the new API that allows setting source and destination alignments independently.
This allows us to be slightly more aggressive in setting the alignment of memcpy calls that
loop idiom creates.

Steps:
Step 1) Remove alignment parameter and create alignment parameter attributes for
memcpy/memmove/memset. ( rL322965, rC322964, rL322963 )
Step 2) Expand the IRBuilder API to allow creation of memcpy/memmove with differing
source and dest alignments. ( rL323597 )
Step 3) Update Clang to use the new IRBuilder API. ( rC323617 )
Step 4) Update Polly to use the new IRBuilder API. ( rL323618 )
Step 5) Update LLVM passes that create memcpy/memmove calls to use the new IRBuilder API,
and those that use use MemIntrinsicInst::[get|set]Alignment() to use [get|set]DestAlignment()
and [get|set]SourceAlignment() instead. ( rL323886, rL323891, rL324148, rL324273, rL324278,
rL324384, rL324395, rL324402 )
Step 6) Remove the single-alignment IRBuilder API for memcpy/memmove, and the
MemIntrinsicInst::[get|set]Alignment() methods.

Reference
   http://lists.llvm.org/pipermail/llvm-dev/2015-August/089384.html
   http://lists.llvm.org/pipermail/llvm-commits/Week-of-Mon-20151109/312083.html

llvm-svn: 324626
This commit is contained in:
Daniel Neilson 2018-02-08 17:33:08 +00:00
parent fa3e722597
commit fb99a493be
2 changed files with 80 additions and 4 deletions

View File

@ -756,8 +756,8 @@ bool LoopIdiomRecognize::processLoopMemSet(MemSetInst *MSI,
MSIs.insert(MSI);
bool NegStride = SizeInBytes == -Stride;
return processLoopStridedStore(Pointer, (unsigned)SizeInBytes,
MSI->getAlignment(), SplatValue, MSI, MSIs, Ev,
BECount, NegStride, /*IsLoopMemset=*/true);
MSI->getDestAlignment(), SplatValue, MSI, MSIs,
Ev, BECount, NegStride, /*IsLoopMemset=*/true);
}
/// mayLoopAccessLocation - Return true if the specified loop might access the
@ -1037,16 +1037,17 @@ bool LoopIdiomRecognize::processLoopStoreOfLoopLoad(StoreInst *SI,
Value *NumBytes =
Expander.expandCodeFor(NumBytesS, IntPtrTy, Preheader->getTerminator());
unsigned Align = std::min(SI->getAlignment(), LI->getAlignment());
CallInst *NewCall = nullptr;
// Check whether to generate an unordered atomic memcpy:
// If the load or store are atomic, then they must neccessarily be unordered
// by previous checks.
if (!SI->isAtomic() && !LI->isAtomic())
NewCall = Builder.CreateMemCpy(StoreBasePtr, LoadBasePtr, NumBytes, Align);
NewCall = Builder.CreateMemCpy(StoreBasePtr, SI->getAlignment(),
LoadBasePtr, LI->getAlignment(), NumBytes);
else {
// We cannot allow unaligned ops for unordered load/store, so reject
// anything where the alignment isn't at least the element size.
unsigned Align = std::min(SI->getAlignment(), LI->getAlignment());
if (Align < StoreSize)
return false;

View File

@ -28,6 +28,29 @@ for.end: ; preds = %for.body, %entry
; CHECK-NOT: store
}
; Make sure memset is formed for larger than 1 byte stores, and that the
; alignment of the store is preserved
define void @test1_i16(i16* align 2 %Base, i64 %Size) nounwind ssp {
bb.nph: ; preds = %entry
br label %for.body
for.body: ; preds = %bb.nph, %for.body
%indvar = phi i64 [ 0, %bb.nph ], [ %indvar.next, %for.body ]
%I.0.014 = getelementptr i16, i16* %Base, i64 %indvar
store i16 0, i16* %I.0.014, align 2
%indvar.next = add i64 %indvar, 1
%exitcond = icmp eq i64 %indvar.next, %Size
br i1 %exitcond, label %for.end, label %for.body
for.end: ; preds = %for.body, %entry
ret void
; CHECK-LABEL: @test1_i16(
; CHECK: %[[BaseBC:.*]] = bitcast i16* %Base to i8*
; CHECK: %[[Sz:[0-9]+]] = shl i64 %Size, 1
; CHECK: call void @llvm.memset.p0i8.i64(i8* align 2 %[[BaseBC]], i8 0, i64 %[[Sz]], i1 false)
; CHECK-NOT: store
}
; This is a loop that was rotated but where the blocks weren't merged. This
; shouldn't perturb us.
define void @test1a(i8* %Base, i64 %Size) nounwind ssp {
@ -169,6 +192,58 @@ for.end: ; preds = %for.body, %entry
; CHECK: ret void
}
;; memcpy formation, check alignment
define void @test6_dest_align(i32* noalias align 1 %Base, i32* noalias align 4 %Dest, i64 %Size) nounwind ssp {
bb.nph:
br label %for.body
for.body: ; preds = %bb.nph, %for.body
%indvar = phi i64 [ 0, %bb.nph ], [ %indvar.next, %for.body ]
%I.0.014 = getelementptr i32, i32* %Base, i64 %indvar
%DestI = getelementptr i32, i32* %Dest, i64 %indvar
%V = load i32, i32* %I.0.014, align 1
store i32 %V, i32* %DestI, align 4
%indvar.next = add i64 %indvar, 1
%exitcond = icmp eq i64 %indvar.next, %Size
br i1 %exitcond, label %for.end, label %for.body
for.end: ; preds = %for.body, %entry
ret void
; CHECK-LABEL: @test6_dest_align(
; CHECK: %[[Dst:.*]] = bitcast i32* %Dest to i8*
; CHECK: %[[Src:.*]] = bitcast i32* %Base to i8*
; CHECK: %[[Sz:[0-9]+]] = shl i64 %Size, 2
; CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 4 %[[Dst]], i8* align 1 %[[Src]], i64 %[[Sz]], i1 false)
; CHECK-NOT: store
; CHECK: ret void
}
;; memcpy formation, check alignment
define void @test6_src_align(i32* noalias align 4 %Base, i32* noalias align 1 %Dest, i64 %Size) nounwind ssp {
bb.nph:
br label %for.body
for.body: ; preds = %bb.nph, %for.body
%indvar = phi i64 [ 0, %bb.nph ], [ %indvar.next, %for.body ]
%I.0.014 = getelementptr i32, i32* %Base, i64 %indvar
%DestI = getelementptr i32, i32* %Dest, i64 %indvar
%V = load i32, i32* %I.0.014, align 4
store i32 %V, i32* %DestI, align 1
%indvar.next = add i64 %indvar, 1
%exitcond = icmp eq i64 %indvar.next, %Size
br i1 %exitcond, label %for.end, label %for.body
for.end: ; preds = %for.body, %entry
ret void
; CHECK-LABEL: @test6_src_align(
; CHECK: %[[Dst]] = bitcast i32* %Dest to i8*
; CHECK: %[[Src]] = bitcast i32* %Base to i8*
; CHECK: %[[Sz:[0-9]+]] = shl i64 %Size, 2
; CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 1 %[[Dst]], i8* align 4 %[[Src]], i64 %[[Sz]], i1 false)
; CHECK-NOT: store
; CHECK: ret void
}
; This is a loop that was rotated but where the blocks weren't merged. This
; shouldn't perturb us.