forked from OSchip/llvm-project
Revert "[DAGCombiner] allow load/store merging if pairs can be rotated into place"
This reverts commit 591a3af5c7
.
The commit message was cut off and failed to include the review citation.
This commit is contained in:
parent
591a3af5c7
commit
f1bbf3acb4
|
@ -16541,27 +16541,14 @@ bool DAGCombiner::tryStoreMergeOfLoads(SmallVectorImpl<MemOpLink> &StoreNodes,
|
|||
}
|
||||
|
||||
while (NumConsecutiveStores >= 2 && LoadNodes.size() >= 2) {
|
||||
// If we have load/store pair instructions and we only have two values,
|
||||
// don't bother merging.
|
||||
Align RequiredAlignment;
|
||||
bool NeedRotate = false;
|
||||
if (LoadNodes.size() == 2) {
|
||||
// If we have load/store pair instructions and we only have two values,
|
||||
// don't bother merging.
|
||||
if (TLI.hasPairedLoad(MemVT, RequiredAlignment) &&
|
||||
StoreNodes[0].MemNode->getAlign() >= RequiredAlignment) {
|
||||
StoreNodes.erase(StoreNodes.begin(), StoreNodes.begin() + 2);
|
||||
LoadNodes.erase(LoadNodes.begin(), LoadNodes.begin() + 2);
|
||||
break;
|
||||
}
|
||||
// If the loads are reversed, see if we can rotate the halves into place.
|
||||
int64_t Offset0 = LoadNodes[0].OffsetFromBase;
|
||||
int64_t Offset1 = LoadNodes[1].OffsetFromBase;
|
||||
EVT PairVT = EVT::getIntegerVT(Context, ElementSizeBytes * 8 * 2);
|
||||
if (Offset0 - Offset1 == ElementSizeBytes &&
|
||||
(hasOperation(ISD::ROTL, PairVT) ||
|
||||
hasOperation(ISD::ROTR, PairVT))) {
|
||||
std::swap(LoadNodes[0], LoadNodes[1]);
|
||||
NeedRotate = true;
|
||||
}
|
||||
if (LoadNodes.size() == 2 && TLI.hasPairedLoad(MemVT, RequiredAlignment) &&
|
||||
StoreNodes[0].MemNode->getAlign() >= RequiredAlignment) {
|
||||
StoreNodes.erase(StoreNodes.begin(), StoreNodes.begin() + 2);
|
||||
LoadNodes.erase(LoadNodes.begin(), LoadNodes.begin() + 2);
|
||||
break;
|
||||
}
|
||||
LSBaseSDNode *FirstInChain = StoreNodes[0].MemNode;
|
||||
unsigned FirstStoreAS = FirstInChain->getAddressSpace();
|
||||
|
@ -16726,18 +16713,8 @@ bool DAGCombiner::tryStoreMergeOfLoads(SmallVectorImpl<MemOpLink> &StoreNodes,
|
|||
NewLoad = DAG.getLoad(
|
||||
JointMemOpVT, LoadDL, FirstLoad->getChain(), FirstLoad->getBasePtr(),
|
||||
FirstLoad->getPointerInfo(), FirstLoadAlign, LdMMOFlags);
|
||||
SDValue StoreOp = NewLoad;
|
||||
if (NeedRotate) {
|
||||
unsigned LoadWidth = ElementSizeBytes * 8 * 2;
|
||||
assert(JointMemOpVT == EVT::getIntegerVT(Context, LoadWidth) &&
|
||||
"Unexpected type for rotate-able load pair");
|
||||
SDValue RotAmt =
|
||||
DAG.getShiftAmountConstant(LoadWidth / 2, JointMemOpVT, LoadDL);
|
||||
// Target can convert to the identical ROTR if it does not have ROTL.
|
||||
StoreOp = DAG.getNode(ISD::ROTL, LoadDL, JointMemOpVT, NewLoad, RotAmt);
|
||||
}
|
||||
NewStore = DAG.getStore(
|
||||
NewStoreChain, StoreDL, StoreOp, FirstInChain->getBasePtr(),
|
||||
NewStoreChain, StoreDL, NewLoad, FirstInChain->getBasePtr(),
|
||||
FirstInChain->getPointerInfo(), FirstStoreAlign, StMMOFlags);
|
||||
} else { // This must be the truncstore/extload case
|
||||
EVT ExtendedTy =
|
||||
|
|
|
@ -95,8 +95,6 @@ exit:
|
|||
ret void
|
||||
}
|
||||
|
||||
; TODO: rev16?
|
||||
|
||||
define void @rotate16_in_place(i8* %p) {
|
||||
; A53-LABEL: rotate16_in_place:
|
||||
; A53: // %bb.0:
|
||||
|
@ -114,8 +112,6 @@ define void @rotate16_in_place(i8* %p) {
|
|||
ret void
|
||||
}
|
||||
|
||||
; TODO: rev16?
|
||||
|
||||
define void @rotate16(i8* %p, i8* %q) {
|
||||
; A53-LABEL: rotate16:
|
||||
; A53: // %bb.0:
|
||||
|
@ -138,9 +134,10 @@ define void @rotate16(i8* %p, i8* %q) {
|
|||
define void @rotate32_in_place(i16* %p) {
|
||||
; A53-LABEL: rotate32_in_place:
|
||||
; A53: // %bb.0:
|
||||
; A53-NEXT: ldr w8, [x0]
|
||||
; A53-NEXT: ror w8, w8, #16
|
||||
; A53-NEXT: str w8, [x0]
|
||||
; A53-NEXT: ldrh w8, [x0, #2]
|
||||
; A53-NEXT: ldrh w9, [x0]
|
||||
; A53-NEXT: strh w8, [x0]
|
||||
; A53-NEXT: strh w9, [x0, #2]
|
||||
; A53-NEXT: ret
|
||||
%p0 = getelementptr i16, i16* %p, i64 0
|
||||
%p1 = getelementptr i16, i16* %p, i64 1
|
||||
|
@ -154,9 +151,10 @@ define void @rotate32_in_place(i16* %p) {
|
|||
define void @rotate32(i16* %p) {
|
||||
; A53-LABEL: rotate32:
|
||||
; A53: // %bb.0:
|
||||
; A53-NEXT: ldr w8, [x0]
|
||||
; A53-NEXT: ror w8, w8, #16
|
||||
; A53-NEXT: str w8, [x0, #84]
|
||||
; A53-NEXT: ldrh w8, [x0, #2]
|
||||
; A53-NEXT: ldrh w9, [x0]
|
||||
; A53-NEXT: strh w8, [x0, #84]
|
||||
; A53-NEXT: strh w9, [x0, #86]
|
||||
; A53-NEXT: ret
|
||||
%p0 = getelementptr i16, i16* %p, i64 0
|
||||
%p1 = getelementptr i16, i16* %p, i64 1
|
||||
|
@ -169,8 +167,6 @@ define void @rotate32(i16* %p) {
|
|||
ret void
|
||||
}
|
||||
|
||||
; Prefer paired memops over rotate.
|
||||
|
||||
define void @rotate64_in_place(i32* %p) {
|
||||
; A53-LABEL: rotate64_in_place:
|
||||
; A53: // %bb.0:
|
||||
|
@ -186,8 +182,6 @@ define void @rotate64_in_place(i32* %p) {
|
|||
ret void
|
||||
}
|
||||
|
||||
; Prefer paired memops over rotate.
|
||||
|
||||
define void @rotate64(i32* %p) {
|
||||
; A53-LABEL: rotate64:
|
||||
; A53: // %bb.0:
|
||||
|
|
|
@ -246,7 +246,10 @@ define void @pr43446_1(i8* %a) {
|
|||
define void @rotate16_in_place(i8* %p) {
|
||||
; CHECK-LABEL: rotate16_in_place:
|
||||
; CHECK: # %bb.0:
|
||||
; CHECK-NEXT: rolw $8, (%rdi)
|
||||
; CHECK-NEXT: movb (%rdi), %al
|
||||
; CHECK-NEXT: movb 1(%rdi), %cl
|
||||
; CHECK-NEXT: movb %cl, (%rdi)
|
||||
; CHECK-NEXT: movb %al, 1(%rdi)
|
||||
; CHECK-NEXT: retq
|
||||
%p0 = getelementptr i8, i8* %p, i64 0
|
||||
%p1 = getelementptr i8, i8* %p, i64 1
|
||||
|
@ -260,9 +263,10 @@ define void @rotate16_in_place(i8* %p) {
|
|||
define void @rotate16(i8* %p, i8* %q) {
|
||||
; CHECK-LABEL: rotate16:
|
||||
; CHECK: # %bb.0:
|
||||
; CHECK-NEXT: movzwl (%rdi), %eax
|
||||
; CHECK-NEXT: rolw $8, %ax
|
||||
; CHECK-NEXT: movw %ax, (%rsi)
|
||||
; CHECK-NEXT: movb (%rdi), %al
|
||||
; CHECK-NEXT: movb 1(%rdi), %cl
|
||||
; CHECK-NEXT: movb %cl, (%rsi)
|
||||
; CHECK-NEXT: movb %al, 1(%rsi)
|
||||
; CHECK-NEXT: retq
|
||||
%p0 = getelementptr i8, i8* %p, i64 0
|
||||
%p1 = getelementptr i8, i8* %p, i64 1
|
||||
|
@ -278,7 +282,10 @@ define void @rotate16(i8* %p, i8* %q) {
|
|||
define void @rotate32_in_place(i16* %p) {
|
||||
; CHECK-LABEL: rotate32_in_place:
|
||||
; CHECK: # %bb.0:
|
||||
; CHECK-NEXT: roll $16, (%rdi)
|
||||
; CHECK-NEXT: movzwl (%rdi), %eax
|
||||
; CHECK-NEXT: movzwl 2(%rdi), %ecx
|
||||
; CHECK-NEXT: movw %cx, (%rdi)
|
||||
; CHECK-NEXT: movw %ax, 2(%rdi)
|
||||
; CHECK-NEXT: retq
|
||||
%p0 = getelementptr i16, i16* %p, i64 0
|
||||
%p1 = getelementptr i16, i16* %p, i64 1
|
||||
|
@ -292,9 +299,10 @@ define void @rotate32_in_place(i16* %p) {
|
|||
define void @rotate32(i16* %p) {
|
||||
; CHECK-LABEL: rotate32:
|
||||
; CHECK: # %bb.0:
|
||||
; CHECK-NEXT: movl (%rdi), %eax
|
||||
; CHECK-NEXT: roll $16, %eax
|
||||
; CHECK-NEXT: movl %eax, 84(%rdi)
|
||||
; CHECK-NEXT: movzwl (%rdi), %eax
|
||||
; CHECK-NEXT: movzwl 2(%rdi), %ecx
|
||||
; CHECK-NEXT: movw %cx, 84(%rdi)
|
||||
; CHECK-NEXT: movw %ax, 86(%rdi)
|
||||
; CHECK-NEXT: retq
|
||||
%p0 = getelementptr i16, i16* %p, i64 0
|
||||
%p1 = getelementptr i16, i16* %p, i64 1
|
||||
|
@ -310,7 +318,10 @@ define void @rotate32(i16* %p) {
|
|||
define void @rotate64_in_place(i32* %p) {
|
||||
; CHECK-LABEL: rotate64_in_place:
|
||||
; CHECK: # %bb.0:
|
||||
; CHECK-NEXT: rolq $32, (%rdi)
|
||||
; CHECK-NEXT: movl (%rdi), %eax
|
||||
; CHECK-NEXT: movl 4(%rdi), %ecx
|
||||
; CHECK-NEXT: movl %ecx, (%rdi)
|
||||
; CHECK-NEXT: movl %eax, 4(%rdi)
|
||||
; CHECK-NEXT: retq
|
||||
%p0 = getelementptr i32, i32* %p, i64 0
|
||||
%p1 = getelementptr i32, i32* %p, i64 1
|
||||
|
@ -324,9 +335,10 @@ define void @rotate64_in_place(i32* %p) {
|
|||
define void @rotate64(i32* %p) {
|
||||
; CHECK-LABEL: rotate64:
|
||||
; CHECK: # %bb.0:
|
||||
; CHECK-NEXT: movq (%rdi), %rax
|
||||
; CHECK-NEXT: rolq $32, %rax
|
||||
; CHECK-NEXT: movq %rax, 8(%rdi)
|
||||
; CHECK-NEXT: movl (%rdi), %eax
|
||||
; CHECK-NEXT: movl 4(%rdi), %ecx
|
||||
; CHECK-NEXT: movl %ecx, 8(%rdi)
|
||||
; CHECK-NEXT: movl %eax, 12(%rdi)
|
||||
; CHECK-NEXT: retq
|
||||
%p0 = getelementptr i32, i32* %p, i64 0
|
||||
%p1 = getelementptr i32, i32* %p, i64 1
|
||||
|
@ -342,9 +354,10 @@ define void @rotate64(i32* %p) {
|
|||
define void @rotate64_iterate(i16* %p) {
|
||||
; CHECK-LABEL: rotate64_iterate:
|
||||
; CHECK: # %bb.0:
|
||||
; CHECK-NEXT: movq (%rdi), %rax
|
||||
; CHECK-NEXT: rolq $32, %rax
|
||||
; CHECK-NEXT: movq %rax, 84(%rdi)
|
||||
; CHECK-NEXT: movl (%rdi), %eax
|
||||
; CHECK-NEXT: movl 4(%rdi), %ecx
|
||||
; CHECK-NEXT: movl %ecx, 84(%rdi)
|
||||
; CHECK-NEXT: movl %eax, 88(%rdi)
|
||||
; CHECK-NEXT: retq
|
||||
%p0 = getelementptr i16, i16* %p, i64 0
|
||||
%p1 = getelementptr i16, i16* %p, i64 1
|
||||
|
@ -365,8 +378,6 @@ define void @rotate64_iterate(i16* %p) {
|
|||
ret void
|
||||
}
|
||||
|
||||
; TODO: recognize this as 2 rotates?
|
||||
|
||||
define void @rotate32_consecutive(i16* %p) {
|
||||
; CHECK-LABEL: rotate32_consecutive:
|
||||
; CHECK: # %bb.0:
|
||||
|
@ -398,17 +409,17 @@ define void @rotate32_consecutive(i16* %p) {
|
|||
ret void
|
||||
}
|
||||
|
||||
; Same as above, but now the stores are not all consecutive.
|
||||
|
||||
define void @rotate32_twice(i16* %p) {
|
||||
; CHECK-LABEL: rotate32_twice:
|
||||
; CHECK: # %bb.0:
|
||||
; CHECK-NEXT: movl (%rdi), %eax
|
||||
; CHECK-NEXT: movl 4(%rdi), %ecx
|
||||
; CHECK-NEXT: roll $16, %eax
|
||||
; CHECK-NEXT: roll $16, %ecx
|
||||
; CHECK-NEXT: movl %eax, 84(%rdi)
|
||||
; CHECK-NEXT: movl %ecx, 108(%rdi)
|
||||
; CHECK-NEXT: movzwl (%rdi), %eax
|
||||
; CHECK-NEXT: movzwl 2(%rdi), %ecx
|
||||
; CHECK-NEXT: movzwl 4(%rdi), %edx
|
||||
; CHECK-NEXT: movzwl 6(%rdi), %esi
|
||||
; CHECK-NEXT: movw %cx, 84(%rdi)
|
||||
; CHECK-NEXT: movw %ax, 86(%rdi)
|
||||
; CHECK-NEXT: movw %si, 108(%rdi)
|
||||
; CHECK-NEXT: movw %dx, 110(%rdi)
|
||||
; CHECK-NEXT: retq
|
||||
%p0 = getelementptr i16, i16* %p, i64 0
|
||||
%p1 = getelementptr i16, i16* %p, i64 1
|
||||
|
|
Loading…
Reference in New Issue