forked from OSchip/llvm-project
[X86] PUSH/POP 'mem-mem' instructions are not RMW - these are 2 different addresses
This patch adds a 'WriteCopy' [WriteLoad, WriteStore] schedule sequence instead to better model the behaviour Found by @andreadb during llvm-mca testing on btver2 which was crashing on "zero uop" WriteRMW only instructions llvm-svn: 343708
This commit is contained in:
parent
2016536304
commit
aabd99c27a
|
@ -1210,12 +1210,12 @@ def POP32rmr: I<0x8F, MRM0r, (outs GR32:$reg), (ins), "pop{l}\t$reg", []>,
|
|||
OpSize32, Requires<[Not64BitMode]>, NotMemoryFoldable;
|
||||
} // isCodeGenOnly = 1, ForceDisassemble = 1
|
||||
} // mayLoad, SchedRW
|
||||
let mayStore = 1, mayLoad = 1, SchedRW = [WriteRMW] in {
|
||||
let mayStore = 1, mayLoad = 1, SchedRW = [WriteCopy] in {
|
||||
def POP16rmm: I<0x8F, MRM0m, (outs), (ins i16mem:$dst), "pop{w}\t$dst", []>,
|
||||
OpSize16;
|
||||
def POP32rmm: I<0x8F, MRM0m, (outs), (ins i32mem:$dst), "pop{l}\t$dst", []>,
|
||||
OpSize32, Requires<[Not64BitMode]>;
|
||||
} // mayStore, mayLoad, WriteRMW
|
||||
} // mayStore, mayLoad, SchedRW
|
||||
|
||||
let mayStore = 1, SchedRW = [WriteStore] in {
|
||||
def PUSH16r : I<0x50, AddRegFrm, (outs), (ins GR16:$reg), "push{w}\t$reg",[]>,
|
||||
|
@ -1243,7 +1243,7 @@ def PUSHi32 : Ii32<0x68, RawFrm, (outs), (ins i32imm:$imm),
|
|||
Requires<[Not64BitMode]>;
|
||||
} // mayStore, SchedRW
|
||||
|
||||
let mayLoad = 1, mayStore = 1, SchedRW = [WriteRMW] in {
|
||||
let mayLoad = 1, mayStore = 1, SchedRW = [WriteCopy] in {
|
||||
def PUSH16rmm: I<0xFF, MRM6m, (outs), (ins i16mem:$src), "push{w}\t$src", []>,
|
||||
OpSize16;
|
||||
def PUSH32rmm: I<0xFF, MRM6m, (outs), (ins i32mem:$src), "push{l}\t$src", []>,
|
||||
|
@ -1302,7 +1302,7 @@ def POP64rmr: I<0x8F, MRM0r, (outs GR64:$reg), (ins), "pop{q}\t$reg", []>,
|
|||
OpSize32, Requires<[In64BitMode]>, NotMemoryFoldable;
|
||||
} // isCodeGenOnly = 1, ForceDisassemble = 1
|
||||
} // mayLoad, SchedRW
|
||||
let mayLoad = 1, mayStore = 1, SchedRW = [WriteRMW] in
|
||||
let mayLoad = 1, mayStore = 1, SchedRW = [WriteCopy] in
|
||||
def POP64rmm: I<0x8F, MRM0m, (outs), (ins i64mem:$dst), "pop{q}\t$dst", []>,
|
||||
OpSize32, Requires<[In64BitMode]>;
|
||||
let mayStore = 1, SchedRW = [WriteStore] in {
|
||||
|
@ -1314,7 +1314,7 @@ def PUSH64rmr: I<0xFF, MRM6r, (outs), (ins GR64:$reg), "push{q}\t$reg", []>,
|
|||
OpSize32, Requires<[In64BitMode]>, NotMemoryFoldable;
|
||||
} // isCodeGenOnly = 1, ForceDisassemble = 1
|
||||
} // mayStore, SchedRW
|
||||
let mayLoad = 1, mayStore = 1, SchedRW = [WriteRMW] in {
|
||||
let mayLoad = 1, mayStore = 1, SchedRW = [WriteCopy] in {
|
||||
def PUSH64rmm: I<0xFF, MRM6m, (outs), (ins i64mem:$src), "push{q}\t$src", []>,
|
||||
OpSize32, Requires<[In64BitMode]>;
|
||||
} // mayLoad, mayStore, SchedRW
|
||||
|
|
|
@ -107,6 +107,7 @@ def WriteLoad : SchedWrite;
|
|||
def WriteStore : SchedWrite;
|
||||
def WriteStoreNT : SchedWrite;
|
||||
def WriteMove : SchedWrite;
|
||||
def WriteCopy : WriteSequence<[WriteLoad, WriteStore]>; // mem->mem copy
|
||||
|
||||
// Arithmetic.
|
||||
defm WriteALU : X86SchedWritePair; // Simple integer ALU op.
|
||||
|
|
|
@ -1676,9 +1676,9 @@ define i16 @test_pop_push_16(i16 %a0, i16 *%a1) optsize {
|
|||
; SLM-NEXT: movl {{[0-9]+}}(%esp), %ecx # sched: [3:1.00]
|
||||
; SLM-NEXT: #APP
|
||||
; SLM-NEXT: popw %ax # sched: [3:1.00]
|
||||
; SLM-NEXT: popw (%ecx) # sched: [1:1.00]
|
||||
; SLM-NEXT: popw (%ecx) # sched: [4:2.00]
|
||||
; SLM-NEXT: pushw %ax # sched: [1:1.00]
|
||||
; SLM-NEXT: pushw (%ecx) # sched: [1:1.00]
|
||||
; SLM-NEXT: pushw (%ecx) # sched: [4:2.00]
|
||||
; SLM-NEXT: pushw $4095 # imm = 0xFFF
|
||||
; SLM-NEXT: # sched: [1:1.00]
|
||||
; SLM-NEXT: pushw $7 # sched: [1:1.00]
|
||||
|
@ -1766,9 +1766,9 @@ define i16 @test_pop_push_16(i16 %a0, i16 *%a1) optsize {
|
|||
; BTVER2-NEXT: movl {{[0-9]+}}(%esp), %ecx # sched: [5:1.00]
|
||||
; BTVER2-NEXT: #APP
|
||||
; BTVER2-NEXT: popw %ax # sched: [5:1.00]
|
||||
; BTVER2-NEXT: popw (%ecx) # sched: [1:1.00]
|
||||
; BTVER2-NEXT: popw (%ecx) # sched: [6:1.00]
|
||||
; BTVER2-NEXT: pushw %ax # sched: [1:1.00]
|
||||
; BTVER2-NEXT: pushw (%ecx) # sched: [1:1.00]
|
||||
; BTVER2-NEXT: pushw (%ecx) # sched: [6:1.00]
|
||||
; BTVER2-NEXT: pushw $4095 # imm = 0xFFF
|
||||
; BTVER2-NEXT: # sched: [1:1.00]
|
||||
; BTVER2-NEXT: pushw $7 # sched: [1:1.00]
|
||||
|
@ -1828,9 +1828,9 @@ define i32 @test_pop_push_32(i32 %a0, i32 *%a1) optsize {
|
|||
; SLM-NEXT: movl {{[0-9]+}}(%esp), %ecx # sched: [3:1.00]
|
||||
; SLM-NEXT: #APP
|
||||
; SLM-NEXT: popl %eax # sched: [3:1.00]
|
||||
; SLM-NEXT: popl (%ecx) # sched: [1:1.00]
|
||||
; SLM-NEXT: popl (%ecx) # sched: [4:2.00]
|
||||
; SLM-NEXT: pushl %eax # sched: [1:1.00]
|
||||
; SLM-NEXT: pushl (%ecx) # sched: [1:1.00]
|
||||
; SLM-NEXT: pushl (%ecx) # sched: [4:2.00]
|
||||
; SLM-NEXT: pushl $4095 # imm = 0xFFF
|
||||
; SLM-NEXT: # sched: [1:1.00]
|
||||
; SLM-NEXT: pushl $7 # sched: [1:1.00]
|
||||
|
@ -1918,9 +1918,9 @@ define i32 @test_pop_push_32(i32 %a0, i32 *%a1) optsize {
|
|||
; BTVER2-NEXT: movl {{[0-9]+}}(%esp), %ecx # sched: [5:1.00]
|
||||
; BTVER2-NEXT: #APP
|
||||
; BTVER2-NEXT: popl %eax # sched: [5:1.00]
|
||||
; BTVER2-NEXT: popl (%ecx) # sched: [1:1.00]
|
||||
; BTVER2-NEXT: popl (%ecx) # sched: [6:1.00]
|
||||
; BTVER2-NEXT: pushl %eax # sched: [1:1.00]
|
||||
; BTVER2-NEXT: pushl (%ecx) # sched: [1:1.00]
|
||||
; BTVER2-NEXT: pushl (%ecx) # sched: [6:1.00]
|
||||
; BTVER2-NEXT: pushl $4095 # imm = 0xFFF
|
||||
; BTVER2-NEXT: # sched: [1:1.00]
|
||||
; BTVER2-NEXT: pushl $7 # sched: [1:1.00]
|
||||
|
@ -1933,7 +1933,7 @@ define i32 @test_pop_push_32(i32 %a0, i32 *%a1) optsize {
|
|||
; ZNVER1-NEXT: movl {{[0-9]+}}(%esp), %ecx # sched: [8:0.50]
|
||||
; ZNVER1-NEXT: #APP
|
||||
; ZNVER1-NEXT: popl %eax # sched: [8:0.50]
|
||||
; ZNVER1-NEXT: popl (%ecx) # sched: [1:0.50]
|
||||
; ZNVER1-NEXT: popl (%ecx) # sched: [9:1.00]
|
||||
; ZNVER1-NEXT: pushl %eax # sched: [1:0.50]
|
||||
; ZNVER1-NEXT: pushl (%ecx) # sched: [4:0.50]
|
||||
; ZNVER1-NEXT: pushl $4095 # imm = 0xFFF
|
||||
|
|
|
@ -9648,9 +9648,9 @@ define i16 @test_pop_push_16(i16 %a0, i16 *%a1) optsize {
|
|||
; SLM: # %bb.0:
|
||||
; SLM-NEXT: #APP
|
||||
; SLM-NEXT: popw %ax # sched: [3:1.00]
|
||||
; SLM-NEXT: popw (%rsi) # sched: [1:1.00]
|
||||
; SLM-NEXT: popw (%rsi) # sched: [4:2.00]
|
||||
; SLM-NEXT: pushw %di # sched: [1:1.00]
|
||||
; SLM-NEXT: pushw (%rsi) # sched: [1:1.00]
|
||||
; SLM-NEXT: pushw (%rsi) # sched: [4:2.00]
|
||||
; SLM-NEXT: pushw $4095 # imm = 0xFFF
|
||||
; SLM-NEXT: # sched: [1:1.00]
|
||||
; SLM-NEXT: pushw $7 # sched: [1:1.00]
|
||||
|
@ -9726,9 +9726,9 @@ define i16 @test_pop_push_16(i16 %a0, i16 *%a1) optsize {
|
|||
; BTVER2: # %bb.0:
|
||||
; BTVER2-NEXT: #APP
|
||||
; BTVER2-NEXT: popw %ax # sched: [5:1.00]
|
||||
; BTVER2-NEXT: popw (%rsi) # sched: [1:1.00]
|
||||
; BTVER2-NEXT: popw (%rsi) # sched: [6:1.00]
|
||||
; BTVER2-NEXT: pushw %di # sched: [1:1.00]
|
||||
; BTVER2-NEXT: pushw (%rsi) # sched: [1:1.00]
|
||||
; BTVER2-NEXT: pushw (%rsi) # sched: [6:1.00]
|
||||
; BTVER2-NEXT: pushw $4095 # imm = 0xFFF
|
||||
; BTVER2-NEXT: # sched: [1:1.00]
|
||||
; BTVER2-NEXT: pushw $7 # sched: [1:1.00]
|
||||
|
@ -9781,9 +9781,9 @@ define i64 @test_pop_push_64(i64 %a0, i64 *%a1) optsize {
|
|||
; SLM: # %bb.0:
|
||||
; SLM-NEXT: #APP
|
||||
; SLM-NEXT: popq %rax # sched: [3:1.00]
|
||||
; SLM-NEXT: popq (%rsi) # sched: [1:1.00]
|
||||
; SLM-NEXT: popq (%rsi) # sched: [4:2.00]
|
||||
; SLM-NEXT: pushq %rdi # sched: [1:1.00]
|
||||
; SLM-NEXT: pushq (%rsi) # sched: [1:1.00]
|
||||
; SLM-NEXT: pushq (%rsi) # sched: [4:2.00]
|
||||
; SLM-NEXT: pushq $4095 # imm = 0xFFF
|
||||
; SLM-NEXT: # sched: [1:1.00]
|
||||
; SLM-NEXT: pushq $7 # sched: [1:1.00]
|
||||
|
@ -9859,9 +9859,9 @@ define i64 @test_pop_push_64(i64 %a0, i64 *%a1) optsize {
|
|||
; BTVER2: # %bb.0:
|
||||
; BTVER2-NEXT: #APP
|
||||
; BTVER2-NEXT: popq %rax # sched: [5:1.00]
|
||||
; BTVER2-NEXT: popq (%rsi) # sched: [1:1.00]
|
||||
; BTVER2-NEXT: popq (%rsi) # sched: [6:1.00]
|
||||
; BTVER2-NEXT: pushq %rdi # sched: [1:1.00]
|
||||
; BTVER2-NEXT: pushq (%rsi) # sched: [1:1.00]
|
||||
; BTVER2-NEXT: pushq (%rsi) # sched: [6:1.00]
|
||||
; BTVER2-NEXT: pushq $4095 # imm = 0xFFF
|
||||
; BTVER2-NEXT: # sched: [1:1.00]
|
||||
; BTVER2-NEXT: pushq $7 # sched: [1:1.00]
|
||||
|
@ -9872,9 +9872,9 @@ define i64 @test_pop_push_64(i64 %a0, i64 *%a1) optsize {
|
|||
; ZNVER1: # %bb.0:
|
||||
; ZNVER1-NEXT: #APP
|
||||
; ZNVER1-NEXT: popq %rax # sched: [8:0.50]
|
||||
; ZNVER1-NEXT: popq (%rsi) # sched: [1:0.50]
|
||||
; ZNVER1-NEXT: popq (%rsi) # sched: [9:1.00]
|
||||
; ZNVER1-NEXT: pushq %rdi # sched: [1:0.50]
|
||||
; ZNVER1-NEXT: pushq (%rsi) # sched: [1:0.50]
|
||||
; ZNVER1-NEXT: pushq (%rsi) # sched: [9:1.00]
|
||||
; ZNVER1-NEXT: pushq $4095 # imm = 0xFFF
|
||||
; ZNVER1-NEXT: # sched: [1:0.50]
|
||||
; ZNVER1-NEXT: pushq $7 # sched: [1:0.50]
|
||||
|
|
Loading…
Reference in New Issue