[X86] PUSH/POP 'mem-mem' instructions are not RMW - these are 2 different addresses

This patch adds a 'WriteCopy' [WriteLoad, WriteStore] schedule sequence instead to better model the behaviour

Found by @andreadb during llvm-mca testing on btver2 which was crashing on "zero uop" WriteRMW only instructions

llvm-svn: 343708
This commit is contained in:
Simon Pilgrim 2018-10-03 19:02:38 +00:00
parent 2016536304
commit aabd99c27a
4 changed files with 25 additions and 24 deletions

View File

@ -1210,12 +1210,12 @@ def POP32rmr: I<0x8F, MRM0r, (outs GR32:$reg), (ins), "pop{l}\t$reg", []>,
OpSize32, Requires<[Not64BitMode]>, NotMemoryFoldable;
} // isCodeGenOnly = 1, ForceDisassemble = 1
} // mayLoad, SchedRW
let mayStore = 1, mayLoad = 1, SchedRW = [WriteRMW] in {
let mayStore = 1, mayLoad = 1, SchedRW = [WriteCopy] in {
def POP16rmm: I<0x8F, MRM0m, (outs), (ins i16mem:$dst), "pop{w}\t$dst", []>,
OpSize16;
def POP32rmm: I<0x8F, MRM0m, (outs), (ins i32mem:$dst), "pop{l}\t$dst", []>,
OpSize32, Requires<[Not64BitMode]>;
} // mayStore, mayLoad, WriteRMW
} // mayStore, mayLoad, SchedRW
let mayStore = 1, SchedRW = [WriteStore] in {
def PUSH16r : I<0x50, AddRegFrm, (outs), (ins GR16:$reg), "push{w}\t$reg",[]>,
@ -1243,7 +1243,7 @@ def PUSHi32 : Ii32<0x68, RawFrm, (outs), (ins i32imm:$imm),
Requires<[Not64BitMode]>;
} // mayStore, SchedRW
let mayLoad = 1, mayStore = 1, SchedRW = [WriteRMW] in {
let mayLoad = 1, mayStore = 1, SchedRW = [WriteCopy] in {
def PUSH16rmm: I<0xFF, MRM6m, (outs), (ins i16mem:$src), "push{w}\t$src", []>,
OpSize16;
def PUSH32rmm: I<0xFF, MRM6m, (outs), (ins i32mem:$src), "push{l}\t$src", []>,
@ -1302,7 +1302,7 @@ def POP64rmr: I<0x8F, MRM0r, (outs GR64:$reg), (ins), "pop{q}\t$reg", []>,
OpSize32, Requires<[In64BitMode]>, NotMemoryFoldable;
} // isCodeGenOnly = 1, ForceDisassemble = 1
} // mayLoad, SchedRW
let mayLoad = 1, mayStore = 1, SchedRW = [WriteRMW] in
let mayLoad = 1, mayStore = 1, SchedRW = [WriteCopy] in
def POP64rmm: I<0x8F, MRM0m, (outs), (ins i64mem:$dst), "pop{q}\t$dst", []>,
OpSize32, Requires<[In64BitMode]>;
let mayStore = 1, SchedRW = [WriteStore] in {
@ -1314,7 +1314,7 @@ def PUSH64rmr: I<0xFF, MRM6r, (outs), (ins GR64:$reg), "push{q}\t$reg", []>,
OpSize32, Requires<[In64BitMode]>, NotMemoryFoldable;
} // isCodeGenOnly = 1, ForceDisassemble = 1
} // mayStore, SchedRW
let mayLoad = 1, mayStore = 1, SchedRW = [WriteRMW] in {
let mayLoad = 1, mayStore = 1, SchedRW = [WriteCopy] in {
def PUSH64rmm: I<0xFF, MRM6m, (outs), (ins i64mem:$src), "push{q}\t$src", []>,
OpSize32, Requires<[In64BitMode]>;
} // mayLoad, mayStore, SchedRW

View File

@ -107,6 +107,7 @@ def WriteLoad : SchedWrite;
def WriteStore : SchedWrite;
def WriteStoreNT : SchedWrite;
def WriteMove : SchedWrite;
def WriteCopy : WriteSequence<[WriteLoad, WriteStore]>; // mem->mem copy
// Arithmetic.
defm WriteALU : X86SchedWritePair; // Simple integer ALU op.

View File

@ -1676,9 +1676,9 @@ define i16 @test_pop_push_16(i16 %a0, i16 *%a1) optsize {
; SLM-NEXT: movl {{[0-9]+}}(%esp), %ecx # sched: [3:1.00]
; SLM-NEXT: #APP
; SLM-NEXT: popw %ax # sched: [3:1.00]
; SLM-NEXT: popw (%ecx) # sched: [1:1.00]
; SLM-NEXT: popw (%ecx) # sched: [4:2.00]
; SLM-NEXT: pushw %ax # sched: [1:1.00]
; SLM-NEXT: pushw (%ecx) # sched: [1:1.00]
; SLM-NEXT: pushw (%ecx) # sched: [4:2.00]
; SLM-NEXT: pushw $4095 # imm = 0xFFF
; SLM-NEXT: # sched: [1:1.00]
; SLM-NEXT: pushw $7 # sched: [1:1.00]
@ -1766,9 +1766,9 @@ define i16 @test_pop_push_16(i16 %a0, i16 *%a1) optsize {
; BTVER2-NEXT: movl {{[0-9]+}}(%esp), %ecx # sched: [5:1.00]
; BTVER2-NEXT: #APP
; BTVER2-NEXT: popw %ax # sched: [5:1.00]
; BTVER2-NEXT: popw (%ecx) # sched: [1:1.00]
; BTVER2-NEXT: popw (%ecx) # sched: [6:1.00]
; BTVER2-NEXT: pushw %ax # sched: [1:1.00]
; BTVER2-NEXT: pushw (%ecx) # sched: [1:1.00]
; BTVER2-NEXT: pushw (%ecx) # sched: [6:1.00]
; BTVER2-NEXT: pushw $4095 # imm = 0xFFF
; BTVER2-NEXT: # sched: [1:1.00]
; BTVER2-NEXT: pushw $7 # sched: [1:1.00]
@ -1828,9 +1828,9 @@ define i32 @test_pop_push_32(i32 %a0, i32 *%a1) optsize {
; SLM-NEXT: movl {{[0-9]+}}(%esp), %ecx # sched: [3:1.00]
; SLM-NEXT: #APP
; SLM-NEXT: popl %eax # sched: [3:1.00]
; SLM-NEXT: popl (%ecx) # sched: [1:1.00]
; SLM-NEXT: popl (%ecx) # sched: [4:2.00]
; SLM-NEXT: pushl %eax # sched: [1:1.00]
; SLM-NEXT: pushl (%ecx) # sched: [1:1.00]
; SLM-NEXT: pushl (%ecx) # sched: [4:2.00]
; SLM-NEXT: pushl $4095 # imm = 0xFFF
; SLM-NEXT: # sched: [1:1.00]
; SLM-NEXT: pushl $7 # sched: [1:1.00]
@ -1918,9 +1918,9 @@ define i32 @test_pop_push_32(i32 %a0, i32 *%a1) optsize {
; BTVER2-NEXT: movl {{[0-9]+}}(%esp), %ecx # sched: [5:1.00]
; BTVER2-NEXT: #APP
; BTVER2-NEXT: popl %eax # sched: [5:1.00]
; BTVER2-NEXT: popl (%ecx) # sched: [1:1.00]
; BTVER2-NEXT: popl (%ecx) # sched: [6:1.00]
; BTVER2-NEXT: pushl %eax # sched: [1:1.00]
; BTVER2-NEXT: pushl (%ecx) # sched: [1:1.00]
; BTVER2-NEXT: pushl (%ecx) # sched: [6:1.00]
; BTVER2-NEXT: pushl $4095 # imm = 0xFFF
; BTVER2-NEXT: # sched: [1:1.00]
; BTVER2-NEXT: pushl $7 # sched: [1:1.00]
@ -1933,7 +1933,7 @@ define i32 @test_pop_push_32(i32 %a0, i32 *%a1) optsize {
; ZNVER1-NEXT: movl {{[0-9]+}}(%esp), %ecx # sched: [8:0.50]
; ZNVER1-NEXT: #APP
; ZNVER1-NEXT: popl %eax # sched: [8:0.50]
; ZNVER1-NEXT: popl (%ecx) # sched: [1:0.50]
; ZNVER1-NEXT: popl (%ecx) # sched: [9:1.00]
; ZNVER1-NEXT: pushl %eax # sched: [1:0.50]
; ZNVER1-NEXT: pushl (%ecx) # sched: [4:0.50]
; ZNVER1-NEXT: pushl $4095 # imm = 0xFFF

View File

@ -9648,9 +9648,9 @@ define i16 @test_pop_push_16(i16 %a0, i16 *%a1) optsize {
; SLM: # %bb.0:
; SLM-NEXT: #APP
; SLM-NEXT: popw %ax # sched: [3:1.00]
; SLM-NEXT: popw (%rsi) # sched: [1:1.00]
; SLM-NEXT: popw (%rsi) # sched: [4:2.00]
; SLM-NEXT: pushw %di # sched: [1:1.00]
; SLM-NEXT: pushw (%rsi) # sched: [1:1.00]
; SLM-NEXT: pushw (%rsi) # sched: [4:2.00]
; SLM-NEXT: pushw $4095 # imm = 0xFFF
; SLM-NEXT: # sched: [1:1.00]
; SLM-NEXT: pushw $7 # sched: [1:1.00]
@ -9726,9 +9726,9 @@ define i16 @test_pop_push_16(i16 %a0, i16 *%a1) optsize {
; BTVER2: # %bb.0:
; BTVER2-NEXT: #APP
; BTVER2-NEXT: popw %ax # sched: [5:1.00]
; BTVER2-NEXT: popw (%rsi) # sched: [1:1.00]
; BTVER2-NEXT: popw (%rsi) # sched: [6:1.00]
; BTVER2-NEXT: pushw %di # sched: [1:1.00]
; BTVER2-NEXT: pushw (%rsi) # sched: [1:1.00]
; BTVER2-NEXT: pushw (%rsi) # sched: [6:1.00]
; BTVER2-NEXT: pushw $4095 # imm = 0xFFF
; BTVER2-NEXT: # sched: [1:1.00]
; BTVER2-NEXT: pushw $7 # sched: [1:1.00]
@ -9781,9 +9781,9 @@ define i64 @test_pop_push_64(i64 %a0, i64 *%a1) optsize {
; SLM: # %bb.0:
; SLM-NEXT: #APP
; SLM-NEXT: popq %rax # sched: [3:1.00]
; SLM-NEXT: popq (%rsi) # sched: [1:1.00]
; SLM-NEXT: popq (%rsi) # sched: [4:2.00]
; SLM-NEXT: pushq %rdi # sched: [1:1.00]
; SLM-NEXT: pushq (%rsi) # sched: [1:1.00]
; SLM-NEXT: pushq (%rsi) # sched: [4:2.00]
; SLM-NEXT: pushq $4095 # imm = 0xFFF
; SLM-NEXT: # sched: [1:1.00]
; SLM-NEXT: pushq $7 # sched: [1:1.00]
@ -9859,9 +9859,9 @@ define i64 @test_pop_push_64(i64 %a0, i64 *%a1) optsize {
; BTVER2: # %bb.0:
; BTVER2-NEXT: #APP
; BTVER2-NEXT: popq %rax # sched: [5:1.00]
; BTVER2-NEXT: popq (%rsi) # sched: [1:1.00]
; BTVER2-NEXT: popq (%rsi) # sched: [6:1.00]
; BTVER2-NEXT: pushq %rdi # sched: [1:1.00]
; BTVER2-NEXT: pushq (%rsi) # sched: [1:1.00]
; BTVER2-NEXT: pushq (%rsi) # sched: [6:1.00]
; BTVER2-NEXT: pushq $4095 # imm = 0xFFF
; BTVER2-NEXT: # sched: [1:1.00]
; BTVER2-NEXT: pushq $7 # sched: [1:1.00]
@ -9872,9 +9872,9 @@ define i64 @test_pop_push_64(i64 %a0, i64 *%a1) optsize {
; ZNVER1: # %bb.0:
; ZNVER1-NEXT: #APP
; ZNVER1-NEXT: popq %rax # sched: [8:0.50]
; ZNVER1-NEXT: popq (%rsi) # sched: [1:0.50]
; ZNVER1-NEXT: popq (%rsi) # sched: [9:1.00]
; ZNVER1-NEXT: pushq %rdi # sched: [1:0.50]
; ZNVER1-NEXT: pushq (%rsi) # sched: [1:0.50]
; ZNVER1-NEXT: pushq (%rsi) # sched: [9:1.00]
; ZNVER1-NEXT: pushq $4095 # imm = 0xFFF
; ZNVER1-NEXT: # sched: [1:0.50]
; ZNVER1-NEXT: pushq $7 # sched: [1:0.50]