2016-04-15 22:58:38 +08:00
|
|
|
; REQUIRES: asserts
|
2017-05-26 05:26:32 +08:00
|
|
|
; RUN: llc < %s -mtriple=arm64-linux-gnu -mcpu=cortex-a57 -verify-misched -debug-only=machine-scheduler -aarch64-enable-stp-suppress=false -o - 2>&1 > /dev/null | FileCheck %s
|
2016-04-15 22:58:38 +08:00
|
|
|
|
|
|
|
; CHECK: ********** MI Scheduling **********
|
2017-12-05 01:18:51 +08:00
|
|
|
; CHECK-LABEL: stp_i64_scale:%bb.0
|
[MachineScheduler] Reduce reordering due to mem op clustering
Summary:
Mem op clustering adds a weak edge in the DAG between two loads or
stores that should be clustered, but the direction of this edge is
pretty arbitrary (it depends on the sort order of MemOpInfo, which
represents the operands of a load or store). This often means that two
loads or stores will get reordered even if they would naturally have
been scheduled together anyway, which leads to test case churn and goes
against the scheduler's "do no harm" philosophy.
The fix makes sure that the direction of the edge always matches the
original code order of the instructions.
Reviewers: atrick, MatzeB, arsenm, rampitec, t.p.northover
Subscribers: jvesely, wdng, nhaehnle, kristof.beyls, hiraditya, javed.absar, arphaman, llvm-commits
Tags: #llvm
Differential Revision: https://reviews.llvm.org/D72706
2020-01-14 23:40:52 +08:00
|
|
|
; CHECK:Cluster ld/st SU(3) - SU(4)
|
2016-04-15 22:58:38 +08:00
|
|
|
; CHECK:Cluster ld/st SU(2) - SU(5)
|
2018-01-19 01:59:06 +08:00
|
|
|
; CHECK:SU(4): STRXui %1:gpr64, %0:gpr64common, 1
|
|
|
|
; CHECK:SU(3): STRXui %1:gpr64, %0:gpr64common, 2
|
|
|
|
; CHECK:SU(2): STRXui %1:gpr64, %0:gpr64common, 3
|
|
|
|
; CHECK:SU(5): STRXui %1:gpr64, %0:gpr64common, 4
|
2016-04-15 22:58:38 +08:00
|
|
|
define i64 @stp_i64_scale(i64* nocapture %P, i64 %v) {
|
|
|
|
entry:
|
|
|
|
%arrayidx = getelementptr inbounds i64, i64* %P, i64 3
|
|
|
|
store i64 %v, i64* %arrayidx
|
|
|
|
%arrayidx1 = getelementptr inbounds i64, i64* %P, i64 2
|
|
|
|
store i64 %v, i64* %arrayidx1
|
|
|
|
%arrayidx2 = getelementptr inbounds i64, i64* %P, i64 1
|
|
|
|
store i64 %v, i64* %arrayidx2
|
|
|
|
%arrayidx3 = getelementptr inbounds i64, i64* %P, i64 4
|
|
|
|
store i64 %v, i64* %arrayidx3
|
|
|
|
ret i64 %v
|
|
|
|
}
|
|
|
|
|
|
|
|
; CHECK: ********** MI Scheduling **********
|
2017-12-05 01:18:51 +08:00
|
|
|
; CHECK-LABEL: stp_i32_scale:%bb.0
|
[MachineScheduler] Reduce reordering due to mem op clustering
Summary:
Mem op clustering adds a weak edge in the DAG between two loads or
stores that should be clustered, but the direction of this edge is
pretty arbitrary (it depends on the sort order of MemOpInfo, which
represents the operands of a load or store). This often means that two
loads or stores will get reordered even if they would naturally have
been scheduled together anyway, which leads to test case churn and goes
against the scheduler's "do no harm" philosophy.
The fix makes sure that the direction of the edge always matches the
original code order of the instructions.
Reviewers: atrick, MatzeB, arsenm, rampitec, t.p.northover
Subscribers: jvesely, wdng, nhaehnle, kristof.beyls, hiraditya, javed.absar, arphaman, llvm-commits
Tags: #llvm
Differential Revision: https://reviews.llvm.org/D72706
2020-01-14 23:40:52 +08:00
|
|
|
; CHECK:Cluster ld/st SU(3) - SU(4)
|
2016-04-15 22:58:38 +08:00
|
|
|
; CHECK:Cluster ld/st SU(2) - SU(5)
|
2018-01-19 01:59:06 +08:00
|
|
|
; CHECK:SU(4): STRWui %1:gpr32, %0:gpr64common, 1
|
|
|
|
; CHECK:SU(3): STRWui %1:gpr32, %0:gpr64common, 2
|
|
|
|
; CHECK:SU(2): STRWui %1:gpr32, %0:gpr64common, 3
|
|
|
|
; CHECK:SU(5): STRWui %1:gpr32, %0:gpr64common, 4
|
2016-04-15 22:58:38 +08:00
|
|
|
define i32 @stp_i32_scale(i32* nocapture %P, i32 %v) {
|
|
|
|
entry:
|
|
|
|
%arrayidx = getelementptr inbounds i32, i32* %P, i32 3
|
|
|
|
store i32 %v, i32* %arrayidx
|
|
|
|
%arrayidx1 = getelementptr inbounds i32, i32* %P, i32 2
|
|
|
|
store i32 %v, i32* %arrayidx1
|
|
|
|
%arrayidx2 = getelementptr inbounds i32, i32* %P, i32 1
|
|
|
|
store i32 %v, i32* %arrayidx2
|
|
|
|
%arrayidx3 = getelementptr inbounds i32, i32* %P, i32 4
|
|
|
|
store i32 %v, i32* %arrayidx3
|
|
|
|
ret i32 %v
|
|
|
|
}
|
|
|
|
|
|
|
|
; CHECK:********** MI Scheduling **********
|
2017-12-05 01:18:51 +08:00
|
|
|
; CHECK-LABEL:stp_i64_unscale:%bb.0 entry
|
[MachineScheduler] Reduce reordering due to mem op clustering
Summary:
Mem op clustering adds a weak edge in the DAG between two loads or
stores that should be clustered, but the direction of this edge is
pretty arbitrary (it depends on the sort order of MemOpInfo, which
represents the operands of a load or store). This often means that two
loads or stores will get reordered even if they would naturally have
been scheduled together anyway, which leads to test case churn and goes
against the scheduler's "do no harm" philosophy.
The fix makes sure that the direction of the edge always matches the
original code order of the instructions.
Reviewers: atrick, MatzeB, arsenm, rampitec, t.p.northover
Subscribers: jvesely, wdng, nhaehnle, kristof.beyls, hiraditya, javed.absar, arphaman, llvm-commits
Tags: #llvm
Differential Revision: https://reviews.llvm.org/D72706
2020-01-14 23:40:52 +08:00
|
|
|
; CHECK:Cluster ld/st SU(2) - SU(5)
|
|
|
|
; CHECK:Cluster ld/st SU(3) - SU(4)
|
2018-01-19 01:59:06 +08:00
|
|
|
; CHECK:SU(2): STURXi %1:gpr64, %0:gpr64common, -24
|
|
|
|
; CHECK:SU(3): STURXi %1:gpr64, %0:gpr64common, -8
|
[MachineScheduler] Reduce reordering due to mem op clustering
Summary:
Mem op clustering adds a weak edge in the DAG between two loads or
stores that should be clustered, but the direction of this edge is
pretty arbitrary (it depends on the sort order of MemOpInfo, which
represents the operands of a load or store). This often means that two
loads or stores will get reordered even if they would naturally have
been scheduled together anyway, which leads to test case churn and goes
against the scheduler's "do no harm" philosophy.
The fix makes sure that the direction of the edge always matches the
original code order of the instructions.
Reviewers: atrick, MatzeB, arsenm, rampitec, t.p.northover
Subscribers: jvesely, wdng, nhaehnle, kristof.beyls, hiraditya, javed.absar, arphaman, llvm-commits
Tags: #llvm
Differential Revision: https://reviews.llvm.org/D72706
2020-01-14 23:40:52 +08:00
|
|
|
; CHECK:SU(4): STURXi %1:gpr64, %0:gpr64common, -16
|
|
|
|
; CHECK:SU(5): STURXi %1:gpr64, %0:gpr64common, -32
|
2016-04-15 22:58:38 +08:00
|
|
|
define void @stp_i64_unscale(i64* nocapture %P, i64 %v) #0 {
|
|
|
|
entry:
|
|
|
|
%arrayidx = getelementptr inbounds i64, i64* %P, i64 -3
|
|
|
|
store i64 %v, i64* %arrayidx
|
|
|
|
%arrayidx1 = getelementptr inbounds i64, i64* %P, i64 -1
|
|
|
|
store i64 %v, i64* %arrayidx1
|
|
|
|
%arrayidx2 = getelementptr inbounds i64, i64* %P, i64 -2
|
|
|
|
store i64 %v, i64* %arrayidx2
|
|
|
|
%arrayidx3 = getelementptr inbounds i64, i64* %P, i64 -4
|
|
|
|
store i64 %v, i64* %arrayidx3
|
|
|
|
ret void
|
|
|
|
}
|
|
|
|
|
|
|
|
; CHECK:********** MI Scheduling **********
|
2017-12-05 01:18:51 +08:00
|
|
|
; CHECK-LABEL:stp_i32_unscale:%bb.0 entry
|
[MachineScheduler] Reduce reordering due to mem op clustering
Summary:
Mem op clustering adds a weak edge in the DAG between two loads or
stores that should be clustered, but the direction of this edge is
pretty arbitrary (it depends on the sort order of MemOpInfo, which
represents the operands of a load or store). This often means that two
loads or stores will get reordered even if they would naturally have
been scheduled together anyway, which leads to test case churn and goes
against the scheduler's "do no harm" philosophy.
The fix makes sure that the direction of the edge always matches the
original code order of the instructions.
Reviewers: atrick, MatzeB, arsenm, rampitec, t.p.northover
Subscribers: jvesely, wdng, nhaehnle, kristof.beyls, hiraditya, javed.absar, arphaman, llvm-commits
Tags: #llvm
Differential Revision: https://reviews.llvm.org/D72706
2020-01-14 23:40:52 +08:00
|
|
|
; CHECK:Cluster ld/st SU(2) - SU(5)
|
|
|
|
; CHECK:Cluster ld/st SU(3) - SU(4)
|
2018-01-19 01:59:06 +08:00
|
|
|
; CHECK:SU(2): STURWi %1:gpr32, %0:gpr64common, -12
|
|
|
|
; CHECK:SU(3): STURWi %1:gpr32, %0:gpr64common, -4
|
[MachineScheduler] Reduce reordering due to mem op clustering
Summary:
Mem op clustering adds a weak edge in the DAG between two loads or
stores that should be clustered, but the direction of this edge is
pretty arbitrary (it depends on the sort order of MemOpInfo, which
represents the operands of a load or store). This often means that two
loads or stores will get reordered even if they would naturally have
been scheduled together anyway, which leads to test case churn and goes
against the scheduler's "do no harm" philosophy.
The fix makes sure that the direction of the edge always matches the
original code order of the instructions.
Reviewers: atrick, MatzeB, arsenm, rampitec, t.p.northover
Subscribers: jvesely, wdng, nhaehnle, kristof.beyls, hiraditya, javed.absar, arphaman, llvm-commits
Tags: #llvm
Differential Revision: https://reviews.llvm.org/D72706
2020-01-14 23:40:52 +08:00
|
|
|
; CHECK:SU(4): STURWi %1:gpr32, %0:gpr64common, -8
|
|
|
|
; CHECK:SU(5): STURWi %1:gpr32, %0:gpr64common, -16
|
2016-04-15 22:58:38 +08:00
|
|
|
define void @stp_i32_unscale(i32* nocapture %P, i32 %v) #0 {
|
|
|
|
entry:
|
|
|
|
%arrayidx = getelementptr inbounds i32, i32* %P, i32 -3
|
|
|
|
store i32 %v, i32* %arrayidx
|
|
|
|
%arrayidx1 = getelementptr inbounds i32, i32* %P, i32 -1
|
|
|
|
store i32 %v, i32* %arrayidx1
|
|
|
|
%arrayidx2 = getelementptr inbounds i32, i32* %P, i32 -2
|
|
|
|
store i32 %v, i32* %arrayidx2
|
|
|
|
%arrayidx3 = getelementptr inbounds i32, i32* %P, i32 -4
|
|
|
|
store i32 %v, i32* %arrayidx3
|
|
|
|
ret void
|
|
|
|
}
|
|
|
|
|
|
|
|
; CHECK:********** MI Scheduling **********
|
2017-12-05 01:18:51 +08:00
|
|
|
; CHECK-LABEL:stp_double:%bb.0
|
2016-04-15 22:58:38 +08:00
|
|
|
; CHECK:Cluster ld/st SU(3) - SU(4)
|
|
|
|
; CHECK:Cluster ld/st SU(2) - SU(5)
|
2018-01-19 01:59:06 +08:00
|
|
|
; CHECK:SU(3): STRDui %1:fpr64, %0:gpr64common, 1
|
|
|
|
; CHECK:SU(4): STRDui %1:fpr64, %0:gpr64common, 2
|
|
|
|
; CHECK:SU(2): STRDui %1:fpr64, %0:gpr64common, 3
|
|
|
|
; CHECK:SU(5): STRDui %1:fpr64, %0:gpr64common, 4
|
2016-04-15 22:58:38 +08:00
|
|
|
define void @stp_double(double* nocapture %P, double %v) {
|
|
|
|
entry:
|
|
|
|
%arrayidx = getelementptr inbounds double, double* %P, i64 3
|
|
|
|
store double %v, double* %arrayidx
|
|
|
|
%arrayidx1 = getelementptr inbounds double, double* %P, i64 1
|
|
|
|
store double %v, double* %arrayidx1
|
|
|
|
%arrayidx2 = getelementptr inbounds double, double* %P, i64 2
|
|
|
|
store double %v, double* %arrayidx2
|
|
|
|
%arrayidx3 = getelementptr inbounds double, double* %P, i64 4
|
|
|
|
store double %v, double* %arrayidx3
|
|
|
|
ret void
|
|
|
|
}
|
|
|
|
|
|
|
|
; CHECK:********** MI Scheduling **********
|
2017-12-05 01:18:51 +08:00
|
|
|
; CHECK-LABEL:stp_float:%bb.0
|
2016-04-15 22:58:38 +08:00
|
|
|
; CHECK:Cluster ld/st SU(3) - SU(4)
|
|
|
|
; CHECK:Cluster ld/st SU(2) - SU(5)
|
2018-01-19 01:59:06 +08:00
|
|
|
; CHECK:SU(3): STRSui %1:fpr32, %0:gpr64common, 1
|
|
|
|
; CHECK:SU(4): STRSui %1:fpr32, %0:gpr64common, 2
|
|
|
|
; CHECK:SU(2): STRSui %1:fpr32, %0:gpr64common, 3
|
|
|
|
; CHECK:SU(5): STRSui %1:fpr32, %0:gpr64common, 4
|
2016-04-15 22:58:38 +08:00
|
|
|
define void @stp_float(float* nocapture %P, float %v) {
|
|
|
|
entry:
|
|
|
|
%arrayidx = getelementptr inbounds float, float* %P, i64 3
|
|
|
|
store float %v, float* %arrayidx
|
|
|
|
%arrayidx1 = getelementptr inbounds float, float* %P, i64 1
|
|
|
|
store float %v, float* %arrayidx1
|
|
|
|
%arrayidx2 = getelementptr inbounds float, float* %P, i64 2
|
|
|
|
store float %v, float* %arrayidx2
|
|
|
|
%arrayidx3 = getelementptr inbounds float, float* %P, i64 4
|
|
|
|
store float %v, float* %arrayidx3
|
|
|
|
ret void
|
|
|
|
}
|
|
|
|
|
|
|
|
; CHECK: ********** MI Scheduling **********
|
2017-12-05 01:18:51 +08:00
|
|
|
; CHECK-LABEL: stp_volatile:%bb.0
|
2016-04-15 22:58:38 +08:00
|
|
|
; CHECK-NOT: Cluster ld/st
|
2018-03-15 05:52:13 +08:00
|
|
|
; CHECK:SU(2): STRXui %1:gpr64, %0:gpr64common, 3 :: (volatile
|
|
|
|
; CHECK:SU(3): STRXui %1:gpr64, %0:gpr64common, 2 :: (volatile
|
|
|
|
; CHECK:SU(4): STRXui %1:gpr64, %0:gpr64common, 1 :: (volatile
|
|
|
|
; CHECK:SU(5): STRXui %1:gpr64, %0:gpr64common, 4 :: (volatile
|
2016-04-15 22:58:38 +08:00
|
|
|
define i64 @stp_volatile(i64* nocapture %P, i64 %v) {
|
|
|
|
entry:
|
|
|
|
%arrayidx = getelementptr inbounds i64, i64* %P, i64 3
|
|
|
|
store volatile i64 %v, i64* %arrayidx
|
|
|
|
%arrayidx1 = getelementptr inbounds i64, i64* %P, i64 2
|
|
|
|
store volatile i64 %v, i64* %arrayidx1
|
|
|
|
%arrayidx2 = getelementptr inbounds i64, i64* %P, i64 1
|
|
|
|
store volatile i64 %v, i64* %arrayidx2
|
|
|
|
%arrayidx3 = getelementptr inbounds i64, i64* %P, i64 4
|
|
|
|
store volatile i64 %v, i64* %arrayidx3
|
|
|
|
ret i64 %v
|
|
|
|
}
|
|
|
|
|
2020-07-27 10:02:40 +08:00
|
|
|
; CHECK: ********** MI Scheduling **********
|
|
|
|
; CHECK-LABEL: stp_i64_with_ld:%bb.0
|
|
|
|
; CHECK:Cluster ld/st SU(5) - SU(10)
|
|
|
|
; CHECK:Cluster ld/st SU(15) - SU(20)
|
|
|
|
; CHECK:SU(5): STRXui %7:gpr64, %0:gpr64common, 0 ::
|
|
|
|
; CHECK:SU(10): STRXui %12:gpr64, %0:gpr64common, 1 ::
|
|
|
|
; CHECK:SU(15): STRXui %17:gpr64, %0:gpr64common, 2 ::
|
|
|
|
; CHECK:SU(20): STRXui %22:gpr64, %0:gpr64common, 3 ::
|
|
|
|
define void @stp_i64_with_ld(i64* noalias nocapture %a, i64* noalias nocapture readnone %b, i64* noalias nocapture readnone %c) {
|
|
|
|
entry:
|
|
|
|
%arrayidx = getelementptr inbounds i64, i64* %a, i64 8
|
|
|
|
%0 = load i64, i64* %arrayidx, align 8
|
|
|
|
%arrayidx3 = getelementptr inbounds i64, i64* %a, i64 16
|
|
|
|
%1 = load i64, i64* %arrayidx3, align 8
|
|
|
|
%mul = mul nsw i64 %1, %0
|
|
|
|
%2 = load i64, i64* %a, align 8
|
|
|
|
%add6 = add nsw i64 %2, %mul
|
|
|
|
store i64 %add6, i64* %a, align 8
|
|
|
|
%arrayidx.1 = getelementptr inbounds i64, i64* %a, i64 9
|
|
|
|
%3 = load i64, i64* %arrayidx.1, align 8
|
|
|
|
%arrayidx3.1 = getelementptr inbounds i64, i64* %a, i64 17
|
|
|
|
%4 = load i64, i64* %arrayidx3.1, align 8
|
|
|
|
%mul.1 = mul nsw i64 %4, %3
|
|
|
|
%arrayidx5.1 = getelementptr inbounds i64, i64* %a, i64 1
|
|
|
|
%5 = load i64, i64* %arrayidx5.1, align 8
|
|
|
|
%add6.1 = add nsw i64 %5, %mul.1
|
|
|
|
store i64 %add6.1, i64* %arrayidx5.1, align 8
|
|
|
|
%arrayidx.2 = getelementptr inbounds i64, i64* %a, i64 10
|
|
|
|
%6 = load i64, i64* %arrayidx.2, align 8
|
|
|
|
%arrayidx3.2 = getelementptr inbounds i64, i64* %a, i64 18
|
|
|
|
%7 = load i64, i64* %arrayidx3.2, align 8
|
|
|
|
%mul.2 = mul nsw i64 %7, %6
|
|
|
|
%arrayidx5.2 = getelementptr inbounds i64, i64* %a, i64 2
|
|
|
|
%8 = load i64, i64* %arrayidx5.2, align 8
|
|
|
|
%add6.2 = add nsw i64 %8, %mul.2
|
|
|
|
store i64 %add6.2, i64* %arrayidx5.2, align 8
|
|
|
|
%arrayidx.3 = getelementptr inbounds i64, i64* %a, i64 11
|
|
|
|
%9 = load i64, i64* %arrayidx.3, align 8
|
|
|
|
%arrayidx3.3 = getelementptr inbounds i64, i64* %a, i64 19
|
|
|
|
%10 = load i64, i64* %arrayidx3.3, align 8
|
|
|
|
%mul.3 = mul nsw i64 %10, %9
|
|
|
|
%arrayidx5.3 = getelementptr inbounds i64, i64* %a, i64 3
|
|
|
|
%11 = load i64, i64* %arrayidx5.3, align 8
|
|
|
|
%add6.3 = add nsw i64 %11, %mul.3
|
|
|
|
store i64 %add6.3, i64* %arrayidx5.3, align 8
|
|
|
|
ret void
|
|
|
|
}
|
2020-08-07 12:53:37 +08:00
|
|
|
|
|
|
|
; Verify that the SU(2) and SU(4) are the preds of SU(3)
|
|
|
|
; CHECK: ********** MI Scheduling **********
|
|
|
|
; CHECK-LABEL: stp_missing_preds_edges:%bb.0
|
|
|
|
; CHECK:Cluster ld/st SU(3) - SU(5)
|
|
|
|
; CHECK: Copy Pred SU(4)
|
|
|
|
; CHECK: Copy Pred SU(2)
|
|
|
|
; CHECK:SU(2): %0:gpr64common = COPY $x0
|
|
|
|
; CHECK:SU(3): STRWui %1:gpr32, %0:gpr64common, 0
|
|
|
|
; CHECK:SU(4): %3:gpr32common = nsw ADDWri %2:gpr32common, 5, 0
|
|
|
|
; CHECK:SU(5): STRWui %3:gpr32common, %0:gpr64common, 1
|
|
|
|
define void @stp_missing_preds_edges(i32* %p, i32 %m, i32 %n) {
|
|
|
|
entry:
|
|
|
|
store i32 %m, i32* %p, align 4
|
|
|
|
%add = add nsw i32 %n, 5
|
|
|
|
%arrayidx1 = getelementptr inbounds i32, i32* %p, i64 1
|
|
|
|
store i32 %add, i32* %arrayidx1, align 4
|
|
|
|
ret void
|
|
|
|
}
|
2020-08-18 17:40:37 +08:00
|
|
|
|
[Scheduling] Implement a new way to cluster loads/stores
Before calling target hook to determine if two loads/stores are clusterable,
we put them into different groups to avoid fake cluster due to dependency.
For now, we are putting the loads/stores into the same group if they have
the same predecessor. We assume that, if two loads/stores have the same
predecessor, it is likely that, they didn't have dependency for each other.
However, one SUnit might have several predecessors and for now, we just
pick up the first predecessor that has non-data/non-artificial dependency,
which is too arbitrary. And we are struggling to fix it.
So, I am proposing some better implementation.
1. Collect all the loads/stores that has memory info first to reduce the complexity.
2. Sort these loads/stores so that we can stop the seeking as early as possible.
3. For each load/store, seeking for the first non-dependency instruction with the
sorted order, and check if they can cluster or not.
Reviewed By: Jay Foad
Differential Revision: https://reviews.llvm.org/D85517
2020-08-26 20:26:21 +08:00
|
|
|
; Verify that the SU(4) and SU(7) can be clustered even with
|
2020-08-18 17:40:37 +08:00
|
|
|
; different preds
|
|
|
|
; CHECK: ********** MI Scheduling **********
|
|
|
|
; CHECK-LABEL: cluster_with_different_preds:%bb.0
|
[Scheduling] Implement a new way to cluster loads/stores
Before calling target hook to determine if two loads/stores are clusterable,
we put them into different groups to avoid fake cluster due to dependency.
For now, we are putting the loads/stores into the same group if they have
the same predecessor. We assume that, if two loads/stores have the same
predecessor, it is likely that, they didn't have dependency for each other.
However, one SUnit might have several predecessors and for now, we just
pick up the first predecessor that has non-data/non-artificial dependency,
which is too arbitrary. And we are struggling to fix it.
So, I am proposing some better implementation.
1. Collect all the loads/stores that has memory info first to reduce the complexity.
2. Sort these loads/stores so that we can stop the seeking as early as possible.
3. For each load/store, seeking for the first non-dependency instruction with the
sorted order, and check if they can cluster or not.
Reviewed By: Jay Foad
Differential Revision: https://reviews.llvm.org/D85517
2020-08-26 20:26:21 +08:00
|
|
|
; CHECK:Cluster ld/st SU(4) - SU(7)
|
2020-08-18 17:40:37 +08:00
|
|
|
; CHECK:SU(3): STRWui %2:gpr32, %0:gpr64common, 0 ::
|
|
|
|
; CHECK:SU(4): %3:gpr32 = LDRWui %1:gpr64common, 0 ::
|
|
|
|
; CHECK:Predecessors:
|
|
|
|
; CHECK: SU(3): Ord Latency=1 Memory
|
|
|
|
; CHECK:SU(6): STRBBui %4:gpr32, %1:gpr64common, 4 ::
|
|
|
|
; CHECK:SU(7): %5:gpr32 = LDRWui %1:gpr64common, 1 ::
|
|
|
|
; CHECK:Predecessors:
|
|
|
|
; CHECK:SU(6): Ord Latency=1 Memory
|
|
|
|
define i32 @cluster_with_different_preds(i32* %p, i32* %q) {
|
|
|
|
entry:
|
|
|
|
store i32 3, i32* %p, align 4
|
|
|
|
%0 = load i32, i32* %q, align 4
|
|
|
|
%add.ptr = getelementptr inbounds i32, i32* %q, i64 1
|
|
|
|
%1 = bitcast i32* %add.ptr to i8*
|
|
|
|
store i8 5, i8* %1, align 1
|
|
|
|
%2 = load i32, i32* %add.ptr, align 4
|
|
|
|
%add = add nsw i32 %2, %0
|
|
|
|
ret i32 %add
|
|
|
|
}
|