forked from OSchip/llvm-project
[MCA][LSUnit] Correctly update the internal group flags on store barrier execution. Fixes PR48024.
This is likely to be a regressigion introduced by my last refactoring of the
LSUnit (commit 5578ec32f9
). Before this patch, the
"CurrentStoreBarrierGroupID" index was not correctly reset on store barrier
executions. This was leading to unexpected crashes like the one reported as
PR48024.
This commit is contained in:
parent
55dbb7d823
commit
0e20666db3
|
@ -243,6 +243,8 @@ void LSUnit::onInstructionExecuted(const InstRef &IR) {
|
|||
CurrentStoreGroupID = 0;
|
||||
if (GroupID == CurrentLoadBarrierGroupID)
|
||||
CurrentLoadBarrierGroupID = 0;
|
||||
if (GroupID == CurrentStoreBarrierGroupID)
|
||||
CurrentStoreBarrierGroupID = 0;
|
||||
}
|
||||
}
|
||||
|
||||
|
|
|
@ -0,0 +1,104 @@
|
|||
# NOTE: Assertions have been autogenerated by utils/update_mca_test_checks.py
|
||||
# RUN: llvm-mca -mtriple=x86_64-unknown-unknown -mcpu=btver2 -timeline -timeline-max-iterations=3 < %s | FileCheck %s
|
||||
|
||||
# Code snippet taken from PR48024.
|
||||
|
||||
stmxcsr -4(%rsp)
|
||||
movl $-24577, %eax # imm = 0x9FFF
|
||||
andl -4(%rsp), %eax
|
||||
movl %eax, -8(%rsp)
|
||||
ldmxcsr -8(%rsp)
|
||||
retq
|
||||
|
||||
# CHECK: Iterations: 100
|
||||
# CHECK-NEXT: Instructions: 600
|
||||
# CHECK-NEXT: Total Cycles: 704
|
||||
# CHECK-NEXT: Total uOps: 600
|
||||
|
||||
# CHECK: Dispatch Width: 2
|
||||
# CHECK-NEXT: uOps Per Cycle: 0.85
|
||||
# CHECK-NEXT: IPC: 0.85
|
||||
# CHECK-NEXT: Block RThroughput: 3.0
|
||||
|
||||
# CHECK: Instruction Info:
|
||||
# CHECK-NEXT: [1]: #uOps
|
||||
# CHECK-NEXT: [2]: Latency
|
||||
# CHECK-NEXT: [3]: RThroughput
|
||||
# CHECK-NEXT: [4]: MayLoad
|
||||
# CHECK-NEXT: [5]: MayStore
|
||||
# CHECK-NEXT: [6]: HasSideEffects (U)
|
||||
|
||||
# CHECK: [1] [2] [3] [4] [5] [6] Instructions:
|
||||
# CHECK-NEXT: 1 1 1.00 * U stmxcsr -4(%rsp)
|
||||
# CHECK-NEXT: 1 1 0.50 movl $-24577, %eax
|
||||
# CHECK-NEXT: 1 4 1.00 * andl -4(%rsp), %eax
|
||||
# CHECK-NEXT: 1 1 1.00 * movl %eax, -8(%rsp)
|
||||
# CHECK-NEXT: 1 3 1.00 * U ldmxcsr -8(%rsp)
|
||||
# CHECK-NEXT: 1 4 1.00 U retq
|
||||
|
||||
# CHECK: Resources:
|
||||
# CHECK-NEXT: [0] - JALU0
|
||||
# CHECK-NEXT: [1] - JALU1
|
||||
# CHECK-NEXT: [2] - JDiv
|
||||
# CHECK-NEXT: [3] - JFPA
|
||||
# CHECK-NEXT: [4] - JFPM
|
||||
# CHECK-NEXT: [5] - JFPU0
|
||||
# CHECK-NEXT: [6] - JFPU1
|
||||
# CHECK-NEXT: [7] - JLAGU
|
||||
# CHECK-NEXT: [8] - JMul
|
||||
# CHECK-NEXT: [9] - JSAGU
|
||||
# CHECK-NEXT: [10] - JSTC
|
||||
# CHECK-NEXT: [11] - JVALU0
|
||||
# CHECK-NEXT: [12] - JVALU1
|
||||
# CHECK-NEXT: [13] - JVIMUL
|
||||
|
||||
# CHECK: Resource pressure per iteration:
|
||||
# CHECK-NEXT: [0] [1] [2] [3] [4] [5] [6] [7] [8] [9] [10] [11] [12] [13]
|
||||
# CHECK-NEXT: 1.50 1.50 - - - - - 3.00 - 2.00 - - - -
|
||||
|
||||
# CHECK: Resource pressure by instruction:
|
||||
# CHECK-NEXT: [0] [1] [2] [3] [4] [5] [6] [7] [8] [9] [10] [11] [12] [13] Instructions:
|
||||
# CHECK-NEXT: - - - - - - - - - 1.00 - - - - stmxcsr -4(%rsp)
|
||||
# CHECK-NEXT: 0.50 0.50 - - - - - - - - - - - - movl $-24577, %eax
|
||||
# CHECK-NEXT: 0.50 0.50 - - - - - 1.00 - - - - - - andl -4(%rsp), %eax
|
||||
# CHECK-NEXT: - - - - - - - - - 1.00 - - - - movl %eax, -8(%rsp)
|
||||
# CHECK-NEXT: - - - - - - - 1.00 - - - - - - ldmxcsr -8(%rsp)
|
||||
# CHECK-NEXT: 0.50 0.50 - - - - - 1.00 - - - - - - retq
|
||||
|
||||
# CHECK: Timeline view:
|
||||
# CHECK-NEXT: 0123456789
|
||||
# CHECK-NEXT: Index 0123456789 01234
|
||||
|
||||
# CHECK: [0,0] DeER . . . . . stmxcsr -4(%rsp)
|
||||
# CHECK-NEXT: [0,1] DeER . . . . . movl $-24577, %eax
|
||||
# CHECK-NEXT: [0,2] .DeeeeER . . . . andl -4(%rsp), %eax
|
||||
# CHECK-NEXT: [0,3] .D====eER . . . . movl %eax, -8(%rsp)
|
||||
# CHECK-NEXT: [0,4] . D===eeeER . . . ldmxcsr -8(%rsp)
|
||||
# CHECK-NEXT: [0,5] . DeeeeE--R . . . retq
|
||||
# CHECK-NEXT: [1,0] . D===eE--R . . . stmxcsr -4(%rsp)
|
||||
# CHECK-NEXT: [1,1] . DeE-----R . . . movl $-24577, %eax
|
||||
# CHECK-NEXT: [1,2] . D====eeeeER. . . andl -4(%rsp), %eax
|
||||
# CHECK-NEXT: [1,3] . D========eER . . movl %eax, -8(%rsp)
|
||||
# CHECK-NEXT: [1,4] . D=======eeeER . . ldmxcsr -8(%rsp)
|
||||
# CHECK-NEXT: [1,5] . D=eeeeE-----R . . retq
|
||||
# CHECK-NEXT: [2,0] . .D=======eE--R . . stmxcsr -4(%rsp)
|
||||
# CHECK-NEXT: [2,1] . .DeE---------R . . movl $-24577, %eax
|
||||
# CHECK-NEXT: [2,2] . . D========eeeeER . andl -4(%rsp), %eax
|
||||
# CHECK-NEXT: [2,3] . . D============eER . movl %eax, -8(%rsp)
|
||||
# CHECK-NEXT: [2,4] . . D===========eeeER ldmxcsr -8(%rsp)
|
||||
# CHECK-NEXT: [2,5] . . D=eeeeE---------R retq
|
||||
|
||||
# CHECK: Average Wait times (based on the timeline view):
|
||||
# CHECK-NEXT: [0]: Executions
|
||||
# CHECK-NEXT: [1]: Average time spent waiting in a scheduler's queue
|
||||
# CHECK-NEXT: [2]: Average time spent waiting in a scheduler's queue while ready
|
||||
# CHECK-NEXT: [3]: Average time elapsed from WB until retire stage
|
||||
|
||||
# CHECK: [0] [1] [2] [3]
|
||||
# CHECK-NEXT: 0. 3 4.3 1.0 1.3 stmxcsr -4(%rsp)
|
||||
# CHECK-NEXT: 1. 3 1.0 1.0 4.7 movl $-24577, %eax
|
||||
# CHECK-NEXT: 2. 3 5.0 0.3 0.0 andl -4(%rsp), %eax
|
||||
# CHECK-NEXT: 3. 3 9.0 0.0 0.0 movl %eax, -8(%rsp)
|
||||
# CHECK-NEXT: 4. 3 8.0 0.0 0.0 ldmxcsr -8(%rsp)
|
||||
# CHECK-NEXT: 5. 3 1.7 1.7 5.3 retq
|
||||
# CHECK-NEXT: 3 4.8 0.7 1.9 <total>
|
|
@ -0,0 +1,100 @@
|
|||
# NOTE: Assertions have been autogenerated by utils/update_mca_test_checks.py
|
||||
# RUN: llvm-mca -mtriple=x86_64-unknown-unknown -mcpu=haswell -timeline -timeline-max-iterations=3 < %s | FileCheck %s
|
||||
|
||||
# Code snippet taken from PR48024.
|
||||
|
||||
stmxcsr -4(%rsp)
|
||||
movl $-24577, %eax # imm = 0x9FFF
|
||||
andl -4(%rsp), %eax
|
||||
movl %eax, -8(%rsp)
|
||||
ldmxcsr -8(%rsp)
|
||||
retq
|
||||
|
||||
# CHECK: Iterations: 100
|
||||
# CHECK-NEXT: Instructions: 600
|
||||
# CHECK-NEXT: Total Cycles: 1304
|
||||
# CHECK-NEXT: Total uOps: 1300
|
||||
|
||||
# CHECK: Dispatch Width: 4
|
||||
# CHECK-NEXT: uOps Per Cycle: 1.00
|
||||
# CHECK-NEXT: IPC: 0.46
|
||||
# CHECK-NEXT: Block RThroughput: 3.3
|
||||
|
||||
# CHECK: Instruction Info:
|
||||
# CHECK-NEXT: [1]: #uOps
|
||||
# CHECK-NEXT: [2]: Latency
|
||||
# CHECK-NEXT: [3]: RThroughput
|
||||
# CHECK-NEXT: [4]: MayLoad
|
||||
# CHECK-NEXT: [5]: MayStore
|
||||
# CHECK-NEXT: [6]: HasSideEffects (U)
|
||||
|
||||
# CHECK: [1] [2] [3] [4] [5] [6] Instructions:
|
||||
# CHECK-NEXT: 3 2 1.00 * U stmxcsr -4(%rsp)
|
||||
# CHECK-NEXT: 1 1 0.25 movl $-24577, %eax
|
||||
# CHECK-NEXT: 2 6 0.50 * andl -4(%rsp), %eax
|
||||
# CHECK-NEXT: 1 1 1.00 * movl %eax, -8(%rsp)
|
||||
# CHECK-NEXT: 3 7 1.00 * U ldmxcsr -8(%rsp)
|
||||
# CHECK-NEXT: 3 7 1.00 U retq
|
||||
|
||||
# CHECK: Resources:
|
||||
# CHECK-NEXT: [0] - HWDivider
|
||||
# CHECK-NEXT: [1] - HWFPDivider
|
||||
# CHECK-NEXT: [2] - HWPort0
|
||||
# CHECK-NEXT: [3] - HWPort1
|
||||
# CHECK-NEXT: [4] - HWPort2
|
||||
# CHECK-NEXT: [5] - HWPort3
|
||||
# CHECK-NEXT: [6] - HWPort4
|
||||
# CHECK-NEXT: [7] - HWPort5
|
||||
# CHECK-NEXT: [8] - HWPort6
|
||||
# CHECK-NEXT: [9] - HWPort7
|
||||
|
||||
# CHECK: Resource pressure per iteration:
|
||||
# CHECK-NEXT: [0] [1] [2] [3] [4] [5] [6] [7] [8] [9]
|
||||
# CHECK-NEXT: - - 1.75 1.74 1.67 1.68 2.00 1.75 1.76 1.65
|
||||
|
||||
# CHECK: Resource pressure by instruction:
|
||||
# CHECK-NEXT: [0] [1] [2] [3] [4] [5] [6] [7] [8] [9] Instructions:
|
||||
# CHECK-NEXT: - - - - 0.30 - 1.00 1.00 - 0.70 stmxcsr -4(%rsp)
|
||||
# CHECK-NEXT: - - 0.08 0.67 - - - 0.04 0.21 - movl $-24577, %eax
|
||||
# CHECK-NEXT: - - 0.42 0.37 0.35 0.65 - 0.01 0.20 - andl -4(%rsp), %eax
|
||||
# CHECK-NEXT: - - - - 0.05 - 1.00 - - 0.95 movl %eax, -8(%rsp)
|
||||
# CHECK-NEXT: - - 1.00 0.23 0.34 0.66 - 0.42 0.35 - ldmxcsr -8(%rsp)
|
||||
# CHECK-NEXT: - - 0.25 0.47 0.63 0.37 - 0.28 1.00 - retq
|
||||
|
||||
# CHECK: Timeline view:
|
||||
# CHECK-NEXT: 0123456789 0123456789
|
||||
# CHECK-NEXT: Index 0123456789 0123456789 012
|
||||
|
||||
# CHECK: [0,0] DeeER. . . . . . . . . stmxcsr -4(%rsp)
|
||||
# CHECK-NEXT: [0,1] DeE-R. . . . . . . . . movl $-24577, %eax
|
||||
# CHECK-NEXT: [0,2] .DeeeeeeER. . . . . . . . andl -4(%rsp), %eax
|
||||
# CHECK-NEXT: [0,3] .D======eER . . . . . . . movl %eax, -8(%rsp)
|
||||
# CHECK-NEXT: [0,4] . D=====eeeeeeeER . . . . . . ldmxcsr -8(%rsp)
|
||||
# CHECK-NEXT: [0,5] . DeeeeeeeE----R . . . . . . retq
|
||||
# CHECK-NEXT: [1,0] . D====eeE----R . . . . . . stmxcsr -4(%rsp)
|
||||
# CHECK-NEXT: [1,1] . DeE---------R . . . . . . movl $-24577, %eax
|
||||
# CHECK-NEXT: [1,2] . D=========eeeeeeER . . . . . andl -4(%rsp), %eax
|
||||
# CHECK-NEXT: [1,3] . D===============eER . . . . . movl %eax, -8(%rsp)
|
||||
# CHECK-NEXT: [1,4] . .D==============eeeeeeeER. . . . ldmxcsr -8(%rsp)
|
||||
# CHECK-NEXT: [1,5] . . DeeeeeeeE-------------R. . . . retq
|
||||
# CHECK-NEXT: [2,0] . . D=============eeE----R. . . . stmxcsr -4(%rsp)
|
||||
# CHECK-NEXT: [2,1] . . DeE------------------R. . . . movl $-24577, %eax
|
||||
# CHECK-NEXT: [2,2] . . D==================eeeeeeER . . andl -4(%rsp), %eax
|
||||
# CHECK-NEXT: [2,3] . . D========================eER . . movl %eax, -8(%rsp)
|
||||
# CHECK-NEXT: [2,4] . . D=======================eeeeeeeER ldmxcsr -8(%rsp)
|
||||
# CHECK-NEXT: [2,5] . . .DeeeeeeeE----------------------R retq
|
||||
|
||||
# CHECK: Average Wait times (based on the timeline view):
|
||||
# CHECK-NEXT: [0]: Executions
|
||||
# CHECK-NEXT: [1]: Average time spent waiting in a scheduler's queue
|
||||
# CHECK-NEXT: [2]: Average time spent waiting in a scheduler's queue while ready
|
||||
# CHECK-NEXT: [3]: Average time elapsed from WB until retire stage
|
||||
|
||||
# CHECK: [0] [1] [2] [3]
|
||||
# CHECK-NEXT: 0. 3 6.7 1.0 2.7 stmxcsr -4(%rsp)
|
||||
# CHECK-NEXT: 1. 3 1.0 1.0 9.3 movl $-24577, %eax
|
||||
# CHECK-NEXT: 2. 3 10.0 0.3 0.0 andl -4(%rsp), %eax
|
||||
# CHECK-NEXT: 3. 3 16.0 0.0 0.0 movl %eax, -8(%rsp)
|
||||
# CHECK-NEXT: 4. 3 15.0 0.0 0.0 ldmxcsr -8(%rsp)
|
||||
# CHECK-NEXT: 5. 3 1.0 1.0 13.0 retq
|
||||
# CHECK-NEXT: 3 8.3 0.6 4.2 <total>
|
Loading…
Reference in New Issue