forked from OSchip/llvm-project
Fix a missed opportunity to merge stores.
This commit fixes a missed opportunity in merging consecutive stores. The code that searches for stores skipped the case of stores that directly connect to the root. The comment above the implementation lists this case but the code did not handle it. I found this pattern when looking into the shared_ptr destructor. GCC generates the right sequence. Here is a small repo: int foo(int* buff) { buff[0] = 0; int x = buff[1]; buff[1] = 0; return x; } Differential Revision: https://reviews.llvm.org/D116895
This commit is contained in:
parent
8bed953782
commit
e2cc091a7d
|
@ -17492,6 +17492,10 @@ void DAGCombiner::getStoreMergeCandidates(
|
||||||
for (auto I2 = (*I)->use_begin(), E2 = (*I)->use_end(); I2 != E2; ++I2)
|
for (auto I2 = (*I)->use_begin(), E2 = (*I)->use_end(); I2 != E2; ++I2)
|
||||||
TryToAddCandidate(I2);
|
TryToAddCandidate(I2);
|
||||||
}
|
}
|
||||||
|
// Check stores that depend on the root (e.g. Store 3 in the chart above).
|
||||||
|
if (I.getOperandNo() == 0 && isa<StoreSDNode>(*I)) {
|
||||||
|
TryToAddCandidate(I);
|
||||||
|
}
|
||||||
}
|
}
|
||||||
} else {
|
} else {
|
||||||
for (auto I = RootNode->use_begin(), E = RootNode->use_end();
|
for (auto I = RootNode->use_begin(), E = RootNode->use_end();
|
||||||
|
|
|
@ -1,4 +1,4 @@
|
||||||
; RUN: llc -march=hexagon < %s | FileCheck %s
|
; RUN: llc -march=hexagon --combiner-store-merging=false < %s | FileCheck %s
|
||||||
; CHECK-NOT: memh
|
; CHECK-NOT: memh
|
||||||
; Check that store widening does not merge the two stores.
|
; Check that store widening does not merge the two stores.
|
||||||
|
|
||||||
|
|
|
@ -562,69 +562,59 @@ define dso_local void @testUnalignedLdStPair() {
|
||||||
; LE-PWR9-LABEL: testUnalignedLdStPair:
|
; LE-PWR9-LABEL: testUnalignedLdStPair:
|
||||||
; LE-PWR9: # %bb.0: # %entry
|
; LE-PWR9: # %bb.0: # %entry
|
||||||
; LE-PWR9-NEXT: addis r3, r2, g@toc@ha
|
; LE-PWR9-NEXT: addis r3, r2, g@toc@ha
|
||||||
; LE-PWR9-NEXT: li r6, 19
|
|
||||||
; LE-PWR9-NEXT: li r4, 11
|
; LE-PWR9-NEXT: li r4, 11
|
||||||
; LE-PWR9-NEXT: li r5, 35
|
|
||||||
; LE-PWR9-NEXT: li r7, 27
|
|
||||||
; LE-PWR9-NEXT: addi r3, r3, g@toc@l
|
; LE-PWR9-NEXT: addi r3, r3, g@toc@l
|
||||||
; LE-PWR9-NEXT: lxvx vs0, r3, r6
|
; LE-PWR9-NEXT: lxvx vs0, r3, r4
|
||||||
; LE-PWR9-NEXT: ldx r4, r3, r4
|
; LE-PWR9-NEXT: li r4, 27
|
||||||
; LE-PWR9-NEXT: ldx r5, r3, r5
|
; LE-PWR9-NEXT: lxvx vs1, r3, r4
|
||||||
; LE-PWR9-NEXT: stdx r4, r3, r6
|
; LE-PWR9-NEXT: li r4, 35
|
||||||
; LE-PWR9-NEXT: stxvx vs0, r3, r7
|
; LE-PWR9-NEXT: stxvx vs1, r3, r4
|
||||||
; LE-PWR9-NEXT: li r7, 43
|
; LE-PWR9-NEXT: li r4, 19
|
||||||
; LE-PWR9-NEXT: stdx r5, r3, r7
|
; LE-PWR9-NEXT: stxvx vs0, r3, r4
|
||||||
; LE-PWR9-NEXT: blr
|
; LE-PWR9-NEXT: blr
|
||||||
;
|
;
|
||||||
; LE-PWR8-LABEL: testUnalignedLdStPair:
|
; LE-PWR8-LABEL: testUnalignedLdStPair:
|
||||||
; LE-PWR8: # %bb.0: # %entry
|
; LE-PWR8: # %bb.0: # %entry
|
||||||
; LE-PWR8-NEXT: addis r3, r2, g@toc@ha
|
; LE-PWR8-NEXT: addis r3, r2, g@toc@ha
|
||||||
; LE-PWR8-NEXT: li r4, 19
|
; LE-PWR8-NEXT: li r4, 27
|
||||||
; LE-PWR8-NEXT: li r5, 11
|
; LE-PWR8-NEXT: li r5, 11
|
||||||
; LE-PWR8-NEXT: li r6, 35
|
; LE-PWR8-NEXT: li r6, 19
|
||||||
; LE-PWR8-NEXT: li r7, 43
|
; LE-PWR8-NEXT: li r8, 35
|
||||||
; LE-PWR8-NEXT: li r8, 27
|
|
||||||
; LE-PWR8-NEXT: addi r3, r3, g@toc@l
|
; LE-PWR8-NEXT: addi r3, r3, g@toc@l
|
||||||
; LE-PWR8-NEXT: lxvd2x vs0, r3, r4
|
; LE-PWR8-NEXT: lxvd2x vs0, r3, r4
|
||||||
; LE-PWR8-NEXT: ldx r5, r3, r5
|
; LE-PWR8-NEXT: ldx r5, r3, r5
|
||||||
; LE-PWR8-NEXT: ldx r6, r3, r6
|
; LE-PWR8-NEXT: ldx r7, r3, r6
|
||||||
; LE-PWR8-NEXT: stdx r6, r3, r7
|
; LE-PWR8-NEXT: stdx r7, r3, r4
|
||||||
; LE-PWR8-NEXT: stdx r5, r3, r4
|
; LE-PWR8-NEXT: stdx r5, r3, r6
|
||||||
; LE-PWR8-NEXT: stxvd2x vs0, r3, r8
|
; LE-PWR8-NEXT: stxvd2x vs0, r3, r8
|
||||||
; LE-PWR8-NEXT: blr
|
; LE-PWR8-NEXT: blr
|
||||||
;
|
;
|
||||||
; BE-PWR9-LABEL: testUnalignedLdStPair:
|
; BE-PWR9-LABEL: testUnalignedLdStPair:
|
||||||
; BE-PWR9: # %bb.0: # %entry
|
; BE-PWR9: # %bb.0: # %entry
|
||||||
; BE-PWR9-NEXT: addis r3, r2, g@toc@ha
|
; BE-PWR9-NEXT: addis r3, r2, g@toc@ha
|
||||||
; BE-PWR9-NEXT: li r6, 19
|
|
||||||
; BE-PWR9-NEXT: li r4, 11
|
; BE-PWR9-NEXT: li r4, 11
|
||||||
; BE-PWR9-NEXT: li r5, 35
|
|
||||||
; BE-PWR9-NEXT: li r7, 27
|
|
||||||
; BE-PWR9-NEXT: addi r3, r3, g@toc@l
|
; BE-PWR9-NEXT: addi r3, r3, g@toc@l
|
||||||
; BE-PWR9-NEXT: lxvx vs0, r3, r6
|
; BE-PWR9-NEXT: lxvx vs0, r3, r4
|
||||||
; BE-PWR9-NEXT: ldx r4, r3, r4
|
; BE-PWR9-NEXT: li r4, 27
|
||||||
; BE-PWR9-NEXT: ldx r5, r3, r5
|
; BE-PWR9-NEXT: lxvx vs1, r3, r4
|
||||||
; BE-PWR9-NEXT: stdx r4, r3, r6
|
; BE-PWR9-NEXT: li r4, 35
|
||||||
; BE-PWR9-NEXT: stxvx vs0, r3, r7
|
; BE-PWR9-NEXT: stxvx vs1, r3, r4
|
||||||
; BE-PWR9-NEXT: li r7, 43
|
; BE-PWR9-NEXT: li r4, 19
|
||||||
; BE-PWR9-NEXT: stdx r5, r3, r7
|
; BE-PWR9-NEXT: stxvx vs0, r3, r4
|
||||||
; BE-PWR9-NEXT: blr
|
; BE-PWR9-NEXT: blr
|
||||||
;
|
;
|
||||||
; BE-PWR8-LABEL: testUnalignedLdStPair:
|
; BE-PWR8-LABEL: testUnalignedLdStPair:
|
||||||
; BE-PWR8: # %bb.0: # %entry
|
; BE-PWR8: # %bb.0: # %entry
|
||||||
; BE-PWR8-NEXT: addis r3, r2, g@toc@ha
|
; BE-PWR8-NEXT: addis r3, r2, g@toc@ha
|
||||||
; BE-PWR8-NEXT: li r4, 19
|
; BE-PWR8-NEXT: li r4, 11
|
||||||
; BE-PWR8-NEXT: li r5, 11
|
; BE-PWR8-NEXT: li r5, 27
|
||||||
; BE-PWR8-NEXT: li r6, 35
|
|
||||||
; BE-PWR8-NEXT: li r7, 27
|
|
||||||
; BE-PWR8-NEXT: addi r3, r3, g@toc@l
|
; BE-PWR8-NEXT: addi r3, r3, g@toc@l
|
||||||
; BE-PWR8-NEXT: lxvd2x vs0, r3, r4
|
; BE-PWR8-NEXT: lxvd2x vs0, r3, r4
|
||||||
; BE-PWR8-NEXT: ldx r5, r3, r5
|
; BE-PWR8-NEXT: lxvd2x vs1, r3, r5
|
||||||
; BE-PWR8-NEXT: ldx r6, r3, r6
|
; BE-PWR8-NEXT: li r4, 35
|
||||||
; BE-PWR8-NEXT: stxvd2x vs0, r3, r7
|
; BE-PWR8-NEXT: li r5, 19
|
||||||
; BE-PWR8-NEXT: li r7, 43
|
; BE-PWR8-NEXT: stxvd2x vs1, r3, r4
|
||||||
; BE-PWR8-NEXT: stdx r5, r3, r4
|
; BE-PWR8-NEXT: stxvd2x vs0, r3, r5
|
||||||
; BE-PWR8-NEXT: stdx r6, r3, r7
|
|
||||||
; BE-PWR8-NEXT: blr
|
; BE-PWR8-NEXT: blr
|
||||||
entry:
|
entry:
|
||||||
%0 = bitcast <256 x i1>* @g to i8*
|
%0 = bitcast <256 x i1>* @g to i8*
|
||||||
|
|
|
@ -920,3 +920,31 @@ define void @merge_heterogeneous(%struct.C* nocapture %p, %struct.C* nocapture %
|
||||||
ret void
|
ret void
|
||||||
}
|
}
|
||||||
|
|
||||||
|
define i32 @merge_store_load_store_seq(i32* %buff) {
|
||||||
|
entry:
|
||||||
|
; CHECK-LABEL: merge_store_load_store_seq:
|
||||||
|
; CHECK: movl 4(%rdi), %eax
|
||||||
|
; CHECK-NEXT: movq $0, (%rdi)
|
||||||
|
; CHECK-NEXT: retq
|
||||||
|
|
||||||
|
store i32 0, i32* %buff, align 4
|
||||||
|
%arrayidx1 = getelementptr inbounds i32, i32* %buff, i64 1
|
||||||
|
%0 = load i32, i32* %arrayidx1, align 4
|
||||||
|
store i32 0, i32* %arrayidx1, align 4
|
||||||
|
ret i32 %0
|
||||||
|
}
|
||||||
|
|
||||||
|
define i32 @merge_store_alias(i32* %buff, i32* %other) {
|
||||||
|
entry:
|
||||||
|
; CHECK-LABEL: merge_store_alias:
|
||||||
|
; CHECK: movl $0, (%rdi)
|
||||||
|
; CHECK-NEXT: movl (%rsi), %eax
|
||||||
|
; CHECK-NEXT: movl $0, 4(%rdi)
|
||||||
|
; CHECK-NEXT: retq
|
||||||
|
|
||||||
|
store i32 0, i32* %buff, align 4
|
||||||
|
%arrayidx1 = getelementptr inbounds i32, i32* %buff, i64 1
|
||||||
|
%0 = load i32, i32* %other, align 4
|
||||||
|
store i32 0, i32* %arrayidx1, align 4
|
||||||
|
ret i32 %0
|
||||||
|
}
|
||||||
|
|
Loading…
Reference in New Issue