forked from OSchip/llvm-project
[X86][Btver2] Add correct mul/imul schedule costs
Integer multiply is performed on the JMul function unit and i64 requires double pumping llvm-svn: 327707
This commit is contained in:
parent
8d28ae6aec
commit
23578e7d3c
|
@ -117,7 +117,7 @@ def : WriteRes<WriteRMW, [JSAGU]>;
|
||||||
////////////////////////////////////////////////////////////////////////////////
|
////////////////////////////////////////////////////////////////////////////////
|
||||||
|
|
||||||
defm : JWriteResIntPair<WriteALU, [JALU01], 1>;
|
defm : JWriteResIntPair<WriteALU, [JALU01], 1>;
|
||||||
defm : JWriteResIntPair<WriteIMul, [JALU1], 3>;
|
defm : JWriteResIntPair<WriteIMul, [JALU1, JMul], 3, [1, 1], 2>; // i8/i16/i32 multiplication
|
||||||
defm : JWriteResIntPair<WriteIDiv, [JALU1, JDiv], 41, [1, 41], 2>; // Worst case (i64 division)
|
defm : JWriteResIntPair<WriteIDiv, [JALU1, JDiv], 41, [1, 41], 2>; // Worst case (i64 division)
|
||||||
|
|
||||||
def : WriteRes<WriteIMulH, [JALU1]> {
|
def : WriteRes<WriteIMulH, [JALU1]> {
|
||||||
|
@ -152,6 +152,19 @@ def JWriteTZCNTLd : SchedWriteRes<[JLAGU, JALU01]> {
|
||||||
def : InstRW<[JWriteTZCNT], (instrs TZCNT16rr, TZCNT32rr, TZCNT64rr)>;
|
def : InstRW<[JWriteTZCNT], (instrs TZCNT16rr, TZCNT32rr, TZCNT64rr)>;
|
||||||
def : InstRW<[JWriteTZCNTLd], (instrs TZCNT16rm, TZCNT32rm, TZCNT64rm)>;
|
def : InstRW<[JWriteTZCNTLd], (instrs TZCNT16rm, TZCNT32rm, TZCNT64rm)>;
|
||||||
|
|
||||||
|
def JWriteIMul64 : SchedWriteRes<[JALU1, JMul]> {
|
||||||
|
let Latency = 6;
|
||||||
|
let ResourceCycles = [1, 4];
|
||||||
|
let NumMicroOps = 2;
|
||||||
|
}
|
||||||
|
def JWriteIMul64Ld : SchedWriteRes<[JLAGU, JALU1, JMul]> {
|
||||||
|
let Latency = 9;
|
||||||
|
let ResourceCycles = [1, 1, 4];
|
||||||
|
let NumMicroOps = 2;
|
||||||
|
}
|
||||||
|
def : InstRW<[JWriteIMul64], (instrs MUL64r, IMUL64r)>;
|
||||||
|
def : InstRW<[JWriteIMul64Ld], (instrs MUL64m, IMUL64m)>;
|
||||||
|
|
||||||
def JWriteIDiv8 : SchedWriteRes<[JALU1, JDiv]> {
|
def JWriteIDiv8 : SchedWriteRes<[JALU1, JDiv]> {
|
||||||
let Latency = 12;
|
let Latency = 12;
|
||||||
let ResourceCycles = [1, 12];
|
let ResourceCycles = [1, 12];
|
||||||
|
|
|
@ -6078,8 +6078,8 @@ define void @test_imul_64(i64 %a0, i64* %a1) optsize {
|
||||||
; BTVER2-LABEL: test_imul_64:
|
; BTVER2-LABEL: test_imul_64:
|
||||||
; BTVER2: # %bb.0:
|
; BTVER2: # %bb.0:
|
||||||
; BTVER2-NEXT: #APP
|
; BTVER2-NEXT: #APP
|
||||||
; BTVER2-NEXT: imulq %rdi # sched: [3:1.00]
|
; BTVER2-NEXT: imulq %rdi # sched: [6:4.00]
|
||||||
; BTVER2-NEXT: imulq (%rsi) # sched: [6:1.00]
|
; BTVER2-NEXT: imulq (%rsi) # sched: [9:4.00]
|
||||||
; BTVER2-NEXT: imulq %rdi, %rdi # sched: [3:1.00]
|
; BTVER2-NEXT: imulq %rdi, %rdi # sched: [3:1.00]
|
||||||
; BTVER2-NEXT: imulq (%rsi), %rdi # sched: [6:1.00]
|
; BTVER2-NEXT: imulq (%rsi), %rdi # sched: [6:1.00]
|
||||||
; BTVER2-NEXT: imulq $665536, %rdi, %rdi # imm = 0xA27C0
|
; BTVER2-NEXT: imulq $665536, %rdi, %rdi # imm = 0xA27C0
|
||||||
|
@ -8093,8 +8093,8 @@ define void @test_mul(i8 %a0, i16 %a1, i32 %a2, i64 %a3, i8 *%p0, i16 *%p1, i32
|
||||||
; BTVER2-NEXT: mulw (%r9) # sched: [6:1.00]
|
; BTVER2-NEXT: mulw (%r9) # sched: [6:1.00]
|
||||||
; BTVER2-NEXT: mull %edx # sched: [3:1.00]
|
; BTVER2-NEXT: mull %edx # sched: [3:1.00]
|
||||||
; BTVER2-NEXT: mull (%rax) # sched: [6:1.00]
|
; BTVER2-NEXT: mull (%rax) # sched: [6:1.00]
|
||||||
; BTVER2-NEXT: mulq %rcx # sched: [3:1.00]
|
; BTVER2-NEXT: mulq %rcx # sched: [6:4.00]
|
||||||
; BTVER2-NEXT: mulq (%r10) # sched: [6:1.00]
|
; BTVER2-NEXT: mulq (%r10) # sched: [9:4.00]
|
||||||
; BTVER2-NEXT: #NO_APP
|
; BTVER2-NEXT: #NO_APP
|
||||||
; BTVER2-NEXT: retq # sched: [4:1.00]
|
; BTVER2-NEXT: retq # sched: [4:1.00]
|
||||||
;
|
;
|
||||||
|
|
Loading…
Reference in New Issue