Fix a missed opportunity to merge stores.

This commit fixes a missed opportunity in merging consecutive stores.
The code that searches for stores skipped the case of stores that
directly connect to the root. The comment above the implementation lists
this case but the code did not handle it. I found this pattern when
looking into the shared_ptr destructor. GCC generates the right
sequence. Here is a small repo:

    int foo(int* buff) {
        buff[0] = 0;
        int x = buff[1];
        buff[1] = 0;
        return x;
    }

Differential Revision: https://reviews.llvm.org/D116895
This commit is contained in:
Nadav Rotem 2022-01-10 12:51:37 -08:00
parent 8bed953782
commit e2cc091a7d
4 changed files with 60 additions and 38 deletions

View File

@ -17492,6 +17492,10 @@ void DAGCombiner::getStoreMergeCandidates(
for (auto I2 = (*I)->use_begin(), E2 = (*I)->use_end(); I2 != E2; ++I2)
TryToAddCandidate(I2);
}
// Check stores that depend on the root (e.g. Store 3 in the chart above).
if (I.getOperandNo() == 0 && isa<StoreSDNode>(*I)) {
TryToAddCandidate(I);
}
}
} else {
for (auto I = RootNode->use_begin(), E = RootNode->use_end();

View File

@ -1,4 +1,4 @@
; RUN: llc -march=hexagon < %s | FileCheck %s
; RUN: llc -march=hexagon --combiner-store-merging=false < %s | FileCheck %s
; CHECK-NOT: memh
; Check that store widening does not merge the two stores.

View File

@ -562,69 +562,59 @@ define dso_local void @testUnalignedLdStPair() {
; LE-PWR9-LABEL: testUnalignedLdStPair:
; LE-PWR9: # %bb.0: # %entry
; LE-PWR9-NEXT: addis r3, r2, g@toc@ha
; LE-PWR9-NEXT: li r6, 19
; LE-PWR9-NEXT: li r4, 11
; LE-PWR9-NEXT: li r5, 35
; LE-PWR9-NEXT: li r7, 27
; LE-PWR9-NEXT: addi r3, r3, g@toc@l
; LE-PWR9-NEXT: lxvx vs0, r3, r6
; LE-PWR9-NEXT: ldx r4, r3, r4
; LE-PWR9-NEXT: ldx r5, r3, r5
; LE-PWR9-NEXT: stdx r4, r3, r6
; LE-PWR9-NEXT: stxvx vs0, r3, r7
; LE-PWR9-NEXT: li r7, 43
; LE-PWR9-NEXT: stdx r5, r3, r7
; LE-PWR9-NEXT: lxvx vs0, r3, r4
; LE-PWR9-NEXT: li r4, 27
; LE-PWR9-NEXT: lxvx vs1, r3, r4
; LE-PWR9-NEXT: li r4, 35
; LE-PWR9-NEXT: stxvx vs1, r3, r4
; LE-PWR9-NEXT: li r4, 19
; LE-PWR9-NEXT: stxvx vs0, r3, r4
; LE-PWR9-NEXT: blr
;
; LE-PWR8-LABEL: testUnalignedLdStPair:
; LE-PWR8: # %bb.0: # %entry
; LE-PWR8-NEXT: addis r3, r2, g@toc@ha
; LE-PWR8-NEXT: li r4, 19
; LE-PWR8-NEXT: li r4, 27
; LE-PWR8-NEXT: li r5, 11
; LE-PWR8-NEXT: li r6, 35
; LE-PWR8-NEXT: li r7, 43
; LE-PWR8-NEXT: li r8, 27
; LE-PWR8-NEXT: li r6, 19
; LE-PWR8-NEXT: li r8, 35
; LE-PWR8-NEXT: addi r3, r3, g@toc@l
; LE-PWR8-NEXT: lxvd2x vs0, r3, r4
; LE-PWR8-NEXT: ldx r5, r3, r5
; LE-PWR8-NEXT: ldx r6, r3, r6
; LE-PWR8-NEXT: stdx r6, r3, r7
; LE-PWR8-NEXT: stdx r5, r3, r4
; LE-PWR8-NEXT: ldx r7, r3, r6
; LE-PWR8-NEXT: stdx r7, r3, r4
; LE-PWR8-NEXT: stdx r5, r3, r6
; LE-PWR8-NEXT: stxvd2x vs0, r3, r8
; LE-PWR8-NEXT: blr
;
; BE-PWR9-LABEL: testUnalignedLdStPair:
; BE-PWR9: # %bb.0: # %entry
; BE-PWR9-NEXT: addis r3, r2, g@toc@ha
; BE-PWR9-NEXT: li r6, 19
; BE-PWR9-NEXT: li r4, 11
; BE-PWR9-NEXT: li r5, 35
; BE-PWR9-NEXT: li r7, 27
; BE-PWR9-NEXT: addi r3, r3, g@toc@l
; BE-PWR9-NEXT: lxvx vs0, r3, r6
; BE-PWR9-NEXT: ldx r4, r3, r4
; BE-PWR9-NEXT: ldx r5, r3, r5
; BE-PWR9-NEXT: stdx r4, r3, r6
; BE-PWR9-NEXT: stxvx vs0, r3, r7
; BE-PWR9-NEXT: li r7, 43
; BE-PWR9-NEXT: stdx r5, r3, r7
; BE-PWR9-NEXT: lxvx vs0, r3, r4
; BE-PWR9-NEXT: li r4, 27
; BE-PWR9-NEXT: lxvx vs1, r3, r4
; BE-PWR9-NEXT: li r4, 35
; BE-PWR9-NEXT: stxvx vs1, r3, r4
; BE-PWR9-NEXT: li r4, 19
; BE-PWR9-NEXT: stxvx vs0, r3, r4
; BE-PWR9-NEXT: blr
;
; BE-PWR8-LABEL: testUnalignedLdStPair:
; BE-PWR8: # %bb.0: # %entry
; BE-PWR8-NEXT: addis r3, r2, g@toc@ha
; BE-PWR8-NEXT: li r4, 19
; BE-PWR8-NEXT: li r5, 11
; BE-PWR8-NEXT: li r6, 35
; BE-PWR8-NEXT: li r7, 27
; BE-PWR8-NEXT: li r4, 11
; BE-PWR8-NEXT: li r5, 27
; BE-PWR8-NEXT: addi r3, r3, g@toc@l
; BE-PWR8-NEXT: lxvd2x vs0, r3, r4
; BE-PWR8-NEXT: ldx r5, r3, r5
; BE-PWR8-NEXT: ldx r6, r3, r6
; BE-PWR8-NEXT: stxvd2x vs0, r3, r7
; BE-PWR8-NEXT: li r7, 43
; BE-PWR8-NEXT: stdx r5, r3, r4
; BE-PWR8-NEXT: stdx r6, r3, r7
; BE-PWR8-NEXT: lxvd2x vs1, r3, r5
; BE-PWR8-NEXT: li r4, 35
; BE-PWR8-NEXT: li r5, 19
; BE-PWR8-NEXT: stxvd2x vs1, r3, r4
; BE-PWR8-NEXT: stxvd2x vs0, r3, r5
; BE-PWR8-NEXT: blr
entry:
%0 = bitcast <256 x i1>* @g to i8*

View File

@ -920,3 +920,31 @@ define void @merge_heterogeneous(%struct.C* nocapture %p, %struct.C* nocapture %
ret void
}
define i32 @merge_store_load_store_seq(i32* %buff) {
entry:
; CHECK-LABEL: merge_store_load_store_seq:
; CHECK: movl 4(%rdi), %eax
; CHECK-NEXT: movq $0, (%rdi)
; CHECK-NEXT: retq
store i32 0, i32* %buff, align 4
%arrayidx1 = getelementptr inbounds i32, i32* %buff, i64 1
%0 = load i32, i32* %arrayidx1, align 4
store i32 0, i32* %arrayidx1, align 4
ret i32 %0
}
define i32 @merge_store_alias(i32* %buff, i32* %other) {
entry:
; CHECK-LABEL: merge_store_alias:
; CHECK: movl $0, (%rdi)
; CHECK-NEXT: movl (%rsi), %eax
; CHECK-NEXT: movl $0, 4(%rdi)
; CHECK-NEXT: retq
store i32 0, i32* %buff, align 4
%arrayidx1 = getelementptr inbounds i32, i32* %buff, i64 1
%0 = load i32, i32* %other, align 4
store i32 0, i32* %arrayidx1, align 4
ret i32 %0
}