forked from OSchip/llvm-project
[SDAG] Relax conditions under stores of loaded values can be merged
Summary: Allow consecutive stores whose values come from consecutive loads to merged in the presense of other uses of the loads. Previously this was disallowed as in general the merged load cannot be shared with the other uses. Merging N stores into 1 may cause as many as N redundant loads. However in the context of caching this should have neglible affect on memory pressure and reduce instruction count making it almost always a win. Fixes PR32086. Reviewers: spatel, jyknight, andreadb, hfinkel, efriedma Reviewed By: efriedma Subscribers: llvm-commits Differential Revision: https://reviews.llvm.org/D30471 llvm-svn: 302712
This commit is contained in:
parent
f69a7c306b
commit
a38c049fc5
|
@ -44,6 +44,10 @@ namespace ISD {
|
||||||
/// EntryToken - This is the marker used to indicate the start of a region.
|
/// EntryToken - This is the marker used to indicate the start of a region.
|
||||||
EntryToken,
|
EntryToken,
|
||||||
|
|
||||||
|
/// DummyNode - Temporary node for node replacement. These nodes
|
||||||
|
/// should not persist beyond their introduction.
|
||||||
|
DummyNode,
|
||||||
|
|
||||||
/// TokenFactor - This node takes multiple tokens as input and produces a
|
/// TokenFactor - This node takes multiple tokens as input and produces a
|
||||||
/// single token result. This is used to represent the fact that the operand
|
/// single token result. This is used to represent the fact that the operand
|
||||||
/// operators are independent of each other.
|
/// operators are independent of each other.
|
||||||
|
|
|
@ -12783,10 +12783,6 @@ bool DAGCombiner::MergeConsecutiveStores(StoreSDNode *St) {
|
||||||
LoadSDNode *Ld = dyn_cast<LoadSDNode>(St->getValue());
|
LoadSDNode *Ld = dyn_cast<LoadSDNode>(St->getValue());
|
||||||
if (!Ld) break;
|
if (!Ld) break;
|
||||||
|
|
||||||
// Loads must only have one use.
|
|
||||||
if (!Ld->hasNUsesOfValue(1, 0))
|
|
||||||
break;
|
|
||||||
|
|
||||||
// The memory operands must not be volatile.
|
// The memory operands must not be volatile.
|
||||||
if (Ld->isVolatile() || Ld->isIndexed())
|
if (Ld->isVolatile() || Ld->isIndexed())
|
||||||
break;
|
break;
|
||||||
|
@ -12795,10 +12791,6 @@ bool DAGCombiner::MergeConsecutiveStores(StoreSDNode *St) {
|
||||||
if (Ld->getExtensionType() != ISD::NON_EXTLOAD)
|
if (Ld->getExtensionType() != ISD::NON_EXTLOAD)
|
||||||
break;
|
break;
|
||||||
|
|
||||||
// The stored memory type must be the same.
|
|
||||||
if (Ld->getMemoryVT() != MemVT)
|
|
||||||
break;
|
|
||||||
|
|
||||||
BaseIndexOffset LdPtr = BaseIndexOffset::match(Ld->getBasePtr(), DAG);
|
BaseIndexOffset LdPtr = BaseIndexOffset::match(Ld->getBasePtr(), DAG);
|
||||||
// If this is not the first ptr that we check.
|
// If this is not the first ptr that we check.
|
||||||
if (LdBasePtr.Base.getNode()) {
|
if (LdBasePtr.Base.getNode()) {
|
||||||
|
@ -12930,8 +12922,28 @@ bool DAGCombiner::MergeConsecutiveStores(StoreSDNode *St) {
|
||||||
// Transfer chain users from old loads to the new load.
|
// Transfer chain users from old loads to the new load.
|
||||||
for (unsigned i = 0; i < NumElem; ++i) {
|
for (unsigned i = 0; i < NumElem; ++i) {
|
||||||
LoadSDNode *Ld = cast<LoadSDNode>(LoadNodes[i].MemNode);
|
LoadSDNode *Ld = cast<LoadSDNode>(LoadNodes[i].MemNode);
|
||||||
DAG.ReplaceAllUsesOfValueWith(SDValue(Ld, 1),
|
if (SDValue(Ld, 0).hasOneUse()) {
|
||||||
SDValue(NewLoad.getNode(), 1));
|
// Only the original store used value so just replace chain.
|
||||||
|
DAG.ReplaceAllUsesOfValueWith(SDValue(Ld, 1),
|
||||||
|
SDValue(NewLoad.getNode(), 1));
|
||||||
|
} else {
|
||||||
|
// Multiple uses exist. Keep the old load in line with the new
|
||||||
|
// load, i.e. Replace chains using Ld's chain with a
|
||||||
|
// TokenFactor. Create a temporary node to serve as a placer so
|
||||||
|
// we do not replace the reference to original Load's chain in
|
||||||
|
// the TokenFactor.
|
||||||
|
SDValue TokenDummy = DAG.getNode(ISD::DummyNode, SDLoc(Ld), MVT::Other);
|
||||||
|
|
||||||
|
// Replace all references to Load's output chain to TokenDummy
|
||||||
|
CombineTo(Ld, SDValue(Ld, 0), TokenDummy, false);
|
||||||
|
SDValue Token =
|
||||||
|
DAG.getNode(ISD::TokenFactor, SDLoc(Ld), MVT::Other, SDValue(Ld, 1),
|
||||||
|
SDValue(NewLoad.getNode(), 1));
|
||||||
|
// Replace all uses of TokenDummy from itself to Ld's output chain.
|
||||||
|
CombineTo(TokenDummy.getNode(), Token);
|
||||||
|
assert(TokenDummy.use_empty() && "TokenDummy should be unused");
|
||||||
|
AddToWorklist(Ld);
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
// Replace the all stores with the new store.
|
// Replace the all stores with the new store.
|
||||||
|
|
|
@ -1,18 +1,15 @@
|
||||||
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
|
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
|
||||||
; RUN: llc %s -mtriple=x86_64-unknown-linux-gnu -mcpu=corei7 -o - | FileCheck %s
|
; RUN: llc %s -mtriple=x86_64-unknown-linux-gnu -mcpu=corei7 -o - | FileCheck %s
|
||||||
|
|
||||||
|
; PR32086
|
||||||
target triple = "x86_64-unknown-linux-gnu"
|
target triple = "x86_64-unknown-linux-gnu"
|
||||||
|
|
||||||
define void @merge_double(double* noalias nocapture %st, double* noalias nocapture readonly %ld) #0 {
|
define void @merge_double(double* noalias nocapture %st, double* noalias nocapture readonly %ld) #0 {
|
||||||
; CHECK-LABEL: merge_double:
|
; CHECK-LABEL: merge_double:
|
||||||
; CHECK: # BB#0:
|
; CHECK: # BB#0:
|
||||||
; CHECK-NEXT: movsd {{.*#+}} xmm0 = mem[0],zero
|
; CHECK-NEXT: movups (%rsi), %xmm0
|
||||||
; CHECK-NEXT: movsd {{.*#+}} xmm1 = mem[0],zero
|
; CHECK-NEXT: movups %xmm0, (%rdi)
|
||||||
; CHECK-NEXT: movsd %xmm0, (%rdi)
|
; CHECK-NEXT: movups %xmm0, 16(%rdi)
|
||||||
; CHECK-NEXT: movsd %xmm1, 8(%rdi)
|
|
||||||
; CHECK-NEXT: movsd %xmm0, 16(%rdi)
|
|
||||||
; CHECK-NEXT: movsd %xmm1, 24(%rdi)
|
|
||||||
; CHECK-NEXT: retq
|
; CHECK-NEXT: retq
|
||||||
%ld_idx1 = getelementptr inbounds double, double* %ld, i64 1
|
%ld_idx1 = getelementptr inbounds double, double* %ld, i64 1
|
||||||
%ld0 = load double, double* %ld, align 8, !tbaa !2
|
%ld0 = load double, double* %ld, align 8, !tbaa !2
|
||||||
|
@ -32,12 +29,9 @@ define void @merge_double(double* noalias nocapture %st, double* noalias nocaptu
|
||||||
define void @merge_loadstore_int(i64* noalias nocapture readonly %p, i64* noalias nocapture %q) local_unnamed_addr #0 {
|
define void @merge_loadstore_int(i64* noalias nocapture readonly %p, i64* noalias nocapture %q) local_unnamed_addr #0 {
|
||||||
; CHECK-LABEL: merge_loadstore_int:
|
; CHECK-LABEL: merge_loadstore_int:
|
||||||
; CHECK: # BB#0: # %entry
|
; CHECK: # BB#0: # %entry
|
||||||
; CHECK-NEXT: movq (%rdi), %rax
|
; CHECK-NEXT: movups (%rdi), %xmm0
|
||||||
; CHECK-NEXT: movq 8(%rdi), %rcx
|
; CHECK-NEXT: movups %xmm0, (%rsi)
|
||||||
; CHECK-NEXT: movq %rax, (%rsi)
|
; CHECK-NEXT: movups %xmm0, 16(%rsi)
|
||||||
; CHECK-NEXT: movq %rcx, 8(%rsi)
|
|
||||||
; CHECK-NEXT: movq %rax, 16(%rsi)
|
|
||||||
; CHECK-NEXT: movq %rcx, 24(%rsi)
|
|
||||||
; CHECK-NEXT: retq
|
; CHECK-NEXT: retq
|
||||||
entry:
|
entry:
|
||||||
%0 = load i64, i64* %p, align 8, !tbaa !1
|
%0 = load i64, i64* %p, align 8, !tbaa !1
|
||||||
|
@ -57,11 +51,9 @@ define i64 @merge_loadstore_int_with_extra_use(i64* noalias nocapture readonly %
|
||||||
; CHECK-LABEL: merge_loadstore_int_with_extra_use:
|
; CHECK-LABEL: merge_loadstore_int_with_extra_use:
|
||||||
; CHECK: # BB#0: # %entry
|
; CHECK: # BB#0: # %entry
|
||||||
; CHECK-NEXT: movq (%rdi), %rax
|
; CHECK-NEXT: movq (%rdi), %rax
|
||||||
; CHECK-NEXT: movq 8(%rdi), %rcx
|
; CHECK-NEXT: movups (%rdi), %xmm0
|
||||||
; CHECK-NEXT: movq %rax, (%rsi)
|
; CHECK-NEXT: movups %xmm0, (%rsi)
|
||||||
; CHECK-NEXT: movq %rcx, 8(%rsi)
|
; CHECK-NEXT: movups %xmm0, 16(%rsi)
|
||||||
; CHECK-NEXT: movq %rax, 16(%rsi)
|
|
||||||
; CHECK-NEXT: movq %rcx, 24(%rsi)
|
|
||||||
; CHECK-NEXT: retq
|
; CHECK-NEXT: retq
|
||||||
entry:
|
entry:
|
||||||
%0 = load i64, i64* %p, align 8, !tbaa !1
|
%0 = load i64, i64* %p, align 8, !tbaa !1
|
||||||
|
|
Loading…
Reference in New Issue