Revert "[DAGCombiner] allow load/store merging if pairs can be rotated into place"

This reverts commit 591a3af5c7.
The commit message was cut off and failed to include the review citation.
This commit is contained in:
Sanjay Patel 2020-07-13 08:55:29 -04:00
parent 591a3af5c7
commit f1bbf3acb4
3 changed files with 52 additions and 70 deletions

View File

@ -16541,27 +16541,14 @@ bool DAGCombiner::tryStoreMergeOfLoads(SmallVectorImpl<MemOpLink> &StoreNodes,
}
while (NumConsecutiveStores >= 2 && LoadNodes.size() >= 2) {
// If we have load/store pair instructions and we only have two values,
// don't bother merging.
Align RequiredAlignment;
bool NeedRotate = false;
if (LoadNodes.size() == 2) {
// If we have load/store pair instructions and we only have two values,
// don't bother merging.
if (TLI.hasPairedLoad(MemVT, RequiredAlignment) &&
StoreNodes[0].MemNode->getAlign() >= RequiredAlignment) {
StoreNodes.erase(StoreNodes.begin(), StoreNodes.begin() + 2);
LoadNodes.erase(LoadNodes.begin(), LoadNodes.begin() + 2);
break;
}
// If the loads are reversed, see if we can rotate the halves into place.
int64_t Offset0 = LoadNodes[0].OffsetFromBase;
int64_t Offset1 = LoadNodes[1].OffsetFromBase;
EVT PairVT = EVT::getIntegerVT(Context, ElementSizeBytes * 8 * 2);
if (Offset0 - Offset1 == ElementSizeBytes &&
(hasOperation(ISD::ROTL, PairVT) ||
hasOperation(ISD::ROTR, PairVT))) {
std::swap(LoadNodes[0], LoadNodes[1]);
NeedRotate = true;
}
if (LoadNodes.size() == 2 && TLI.hasPairedLoad(MemVT, RequiredAlignment) &&
StoreNodes[0].MemNode->getAlign() >= RequiredAlignment) {
StoreNodes.erase(StoreNodes.begin(), StoreNodes.begin() + 2);
LoadNodes.erase(LoadNodes.begin(), LoadNodes.begin() + 2);
break;
}
LSBaseSDNode *FirstInChain = StoreNodes[0].MemNode;
unsigned FirstStoreAS = FirstInChain->getAddressSpace();
@ -16726,18 +16713,8 @@ bool DAGCombiner::tryStoreMergeOfLoads(SmallVectorImpl<MemOpLink> &StoreNodes,
NewLoad = DAG.getLoad(
JointMemOpVT, LoadDL, FirstLoad->getChain(), FirstLoad->getBasePtr(),
FirstLoad->getPointerInfo(), FirstLoadAlign, LdMMOFlags);
SDValue StoreOp = NewLoad;
if (NeedRotate) {
unsigned LoadWidth = ElementSizeBytes * 8 * 2;
assert(JointMemOpVT == EVT::getIntegerVT(Context, LoadWidth) &&
"Unexpected type for rotate-able load pair");
SDValue RotAmt =
DAG.getShiftAmountConstant(LoadWidth / 2, JointMemOpVT, LoadDL);
// Target can convert to the identical ROTR if it does not have ROTL.
StoreOp = DAG.getNode(ISD::ROTL, LoadDL, JointMemOpVT, NewLoad, RotAmt);
}
NewStore = DAG.getStore(
NewStoreChain, StoreDL, StoreOp, FirstInChain->getBasePtr(),
NewStoreChain, StoreDL, NewLoad, FirstInChain->getBasePtr(),
FirstInChain->getPointerInfo(), FirstStoreAlign, StMMOFlags);
} else { // This must be the truncstore/extload case
EVT ExtendedTy =

View File

@ -95,8 +95,6 @@ exit:
ret void
}
; TODO: rev16?
define void @rotate16_in_place(i8* %p) {
; A53-LABEL: rotate16_in_place:
; A53: // %bb.0:
@ -114,8 +112,6 @@ define void @rotate16_in_place(i8* %p) {
ret void
}
; TODO: rev16?
define void @rotate16(i8* %p, i8* %q) {
; A53-LABEL: rotate16:
; A53: // %bb.0:
@ -138,9 +134,10 @@ define void @rotate16(i8* %p, i8* %q) {
define void @rotate32_in_place(i16* %p) {
; A53-LABEL: rotate32_in_place:
; A53: // %bb.0:
; A53-NEXT: ldr w8, [x0]
; A53-NEXT: ror w8, w8, #16
; A53-NEXT: str w8, [x0]
; A53-NEXT: ldrh w8, [x0, #2]
; A53-NEXT: ldrh w9, [x0]
; A53-NEXT: strh w8, [x0]
; A53-NEXT: strh w9, [x0, #2]
; A53-NEXT: ret
%p0 = getelementptr i16, i16* %p, i64 0
%p1 = getelementptr i16, i16* %p, i64 1
@ -154,9 +151,10 @@ define void @rotate32_in_place(i16* %p) {
define void @rotate32(i16* %p) {
; A53-LABEL: rotate32:
; A53: // %bb.0:
; A53-NEXT: ldr w8, [x0]
; A53-NEXT: ror w8, w8, #16
; A53-NEXT: str w8, [x0, #84]
; A53-NEXT: ldrh w8, [x0, #2]
; A53-NEXT: ldrh w9, [x0]
; A53-NEXT: strh w8, [x0, #84]
; A53-NEXT: strh w9, [x0, #86]
; A53-NEXT: ret
%p0 = getelementptr i16, i16* %p, i64 0
%p1 = getelementptr i16, i16* %p, i64 1
@ -169,8 +167,6 @@ define void @rotate32(i16* %p) {
ret void
}
; Prefer paired memops over rotate.
define void @rotate64_in_place(i32* %p) {
; A53-LABEL: rotate64_in_place:
; A53: // %bb.0:
@ -186,8 +182,6 @@ define void @rotate64_in_place(i32* %p) {
ret void
}
; Prefer paired memops over rotate.
define void @rotate64(i32* %p) {
; A53-LABEL: rotate64:
; A53: // %bb.0:

View File

@ -246,7 +246,10 @@ define void @pr43446_1(i8* %a) {
define void @rotate16_in_place(i8* %p) {
; CHECK-LABEL: rotate16_in_place:
; CHECK: # %bb.0:
; CHECK-NEXT: rolw $8, (%rdi)
; CHECK-NEXT: movb (%rdi), %al
; CHECK-NEXT: movb 1(%rdi), %cl
; CHECK-NEXT: movb %cl, (%rdi)
; CHECK-NEXT: movb %al, 1(%rdi)
; CHECK-NEXT: retq
%p0 = getelementptr i8, i8* %p, i64 0
%p1 = getelementptr i8, i8* %p, i64 1
@ -260,9 +263,10 @@ define void @rotate16_in_place(i8* %p) {
define void @rotate16(i8* %p, i8* %q) {
; CHECK-LABEL: rotate16:
; CHECK: # %bb.0:
; CHECK-NEXT: movzwl (%rdi), %eax
; CHECK-NEXT: rolw $8, %ax
; CHECK-NEXT: movw %ax, (%rsi)
; CHECK-NEXT: movb (%rdi), %al
; CHECK-NEXT: movb 1(%rdi), %cl
; CHECK-NEXT: movb %cl, (%rsi)
; CHECK-NEXT: movb %al, 1(%rsi)
; CHECK-NEXT: retq
%p0 = getelementptr i8, i8* %p, i64 0
%p1 = getelementptr i8, i8* %p, i64 1
@ -278,7 +282,10 @@ define void @rotate16(i8* %p, i8* %q) {
define void @rotate32_in_place(i16* %p) {
; CHECK-LABEL: rotate32_in_place:
; CHECK: # %bb.0:
; CHECK-NEXT: roll $16, (%rdi)
; CHECK-NEXT: movzwl (%rdi), %eax
; CHECK-NEXT: movzwl 2(%rdi), %ecx
; CHECK-NEXT: movw %cx, (%rdi)
; CHECK-NEXT: movw %ax, 2(%rdi)
; CHECK-NEXT: retq
%p0 = getelementptr i16, i16* %p, i64 0
%p1 = getelementptr i16, i16* %p, i64 1
@ -292,9 +299,10 @@ define void @rotate32_in_place(i16* %p) {
define void @rotate32(i16* %p) {
; CHECK-LABEL: rotate32:
; CHECK: # %bb.0:
; CHECK-NEXT: movl (%rdi), %eax
; CHECK-NEXT: roll $16, %eax
; CHECK-NEXT: movl %eax, 84(%rdi)
; CHECK-NEXT: movzwl (%rdi), %eax
; CHECK-NEXT: movzwl 2(%rdi), %ecx
; CHECK-NEXT: movw %cx, 84(%rdi)
; CHECK-NEXT: movw %ax, 86(%rdi)
; CHECK-NEXT: retq
%p0 = getelementptr i16, i16* %p, i64 0
%p1 = getelementptr i16, i16* %p, i64 1
@ -310,7 +318,10 @@ define void @rotate32(i16* %p) {
define void @rotate64_in_place(i32* %p) {
; CHECK-LABEL: rotate64_in_place:
; CHECK: # %bb.0:
; CHECK-NEXT: rolq $32, (%rdi)
; CHECK-NEXT: movl (%rdi), %eax
; CHECK-NEXT: movl 4(%rdi), %ecx
; CHECK-NEXT: movl %ecx, (%rdi)
; CHECK-NEXT: movl %eax, 4(%rdi)
; CHECK-NEXT: retq
%p0 = getelementptr i32, i32* %p, i64 0
%p1 = getelementptr i32, i32* %p, i64 1
@ -324,9 +335,10 @@ define void @rotate64_in_place(i32* %p) {
define void @rotate64(i32* %p) {
; CHECK-LABEL: rotate64:
; CHECK: # %bb.0:
; CHECK-NEXT: movq (%rdi), %rax
; CHECK-NEXT: rolq $32, %rax
; CHECK-NEXT: movq %rax, 8(%rdi)
; CHECK-NEXT: movl (%rdi), %eax
; CHECK-NEXT: movl 4(%rdi), %ecx
; CHECK-NEXT: movl %ecx, 8(%rdi)
; CHECK-NEXT: movl %eax, 12(%rdi)
; CHECK-NEXT: retq
%p0 = getelementptr i32, i32* %p, i64 0
%p1 = getelementptr i32, i32* %p, i64 1
@ -342,9 +354,10 @@ define void @rotate64(i32* %p) {
define void @rotate64_iterate(i16* %p) {
; CHECK-LABEL: rotate64_iterate:
; CHECK: # %bb.0:
; CHECK-NEXT: movq (%rdi), %rax
; CHECK-NEXT: rolq $32, %rax
; CHECK-NEXT: movq %rax, 84(%rdi)
; CHECK-NEXT: movl (%rdi), %eax
; CHECK-NEXT: movl 4(%rdi), %ecx
; CHECK-NEXT: movl %ecx, 84(%rdi)
; CHECK-NEXT: movl %eax, 88(%rdi)
; CHECK-NEXT: retq
%p0 = getelementptr i16, i16* %p, i64 0
%p1 = getelementptr i16, i16* %p, i64 1
@ -365,8 +378,6 @@ define void @rotate64_iterate(i16* %p) {
ret void
}
; TODO: recognize this as 2 rotates?
define void @rotate32_consecutive(i16* %p) {
; CHECK-LABEL: rotate32_consecutive:
; CHECK: # %bb.0:
@ -398,17 +409,17 @@ define void @rotate32_consecutive(i16* %p) {
ret void
}
; Same as above, but now the stores are not all consecutive.
define void @rotate32_twice(i16* %p) {
; CHECK-LABEL: rotate32_twice:
; CHECK: # %bb.0:
; CHECK-NEXT: movl (%rdi), %eax
; CHECK-NEXT: movl 4(%rdi), %ecx
; CHECK-NEXT: roll $16, %eax
; CHECK-NEXT: roll $16, %ecx
; CHECK-NEXT: movl %eax, 84(%rdi)
; CHECK-NEXT: movl %ecx, 108(%rdi)
; CHECK-NEXT: movzwl (%rdi), %eax
; CHECK-NEXT: movzwl 2(%rdi), %ecx
; CHECK-NEXT: movzwl 4(%rdi), %edx
; CHECK-NEXT: movzwl 6(%rdi), %esi
; CHECK-NEXT: movw %cx, 84(%rdi)
; CHECK-NEXT: movw %ax, 86(%rdi)
; CHECK-NEXT: movw %si, 108(%rdi)
; CHECK-NEXT: movw %dx, 110(%rdi)
; CHECK-NEXT: retq
%p0 = getelementptr i16, i16* %p, i64 0
%p1 = getelementptr i16, i16* %p, i64 1