forked from OSchip/llvm-project
[X86][BtVer2] Fixed the number of micro opcodes for AVX vector converts and
VSQRT instructions. There were still a few AVX instructions with an incorrect number of opcodes. These should be fixed now. llvm-svn: 328892
This commit is contained in:
parent
1ad23a494b
commit
dc97172b2f
llvm
|
@ -700,12 +700,14 @@ def : InstRW<[JWriteFCmpYLd, ReadAfterLd], (instregex "VCMPP(S|D)Yrmi", "VM(AX|I
|
|||
def JWriteVCVTPDY: SchedWriteRes<[JFPU1, JSTC, JFPX]> {
|
||||
let Latency = 6;
|
||||
let ResourceCycles = [2, 2, 4];
|
||||
let NumMicroOps = 3;
|
||||
}
|
||||
def : InstRW<[JWriteVCVTPDY], (instrs VCVTPD2DQYrr, VCVTTPD2DQYrr, VCVTPD2PSYrr)>;
|
||||
|
||||
def JWriteVCVTPDYLd: SchedWriteRes<[JLAGU, JFPU1, JSTC, JFPX]> {
|
||||
let Latency = 11;
|
||||
let ResourceCycles = [2, 2, 2, 4];
|
||||
let NumMicroOps = 3;
|
||||
}
|
||||
def : InstRW<[JWriteVCVTPDYLd, ReadAfterLd], (instrs VCVTPD2DQYrm, VCVTTPD2DQYrm, VCVTPD2PSYrm)>;
|
||||
|
||||
|
@ -757,8 +759,9 @@ def JWriteShuffleY: SchedWriteRes<[JFPU01, JFPX]> {
|
|||
}
|
||||
def : InstRW<[JWriteShuffleY], (instrs VBLENDPDYrri, VBLENDPSYrri,
|
||||
VMOVDDUPYrr, VMOVSHDUPYrr, VMOVSLDUPYrr,
|
||||
VPERMILPDYri, VPERMILPSYri, VSHUFPDYrri, VSHUFPSYrri,
|
||||
VUNPCKHPDYrr, VUNPCKHPSYrr, VUNPCKLPDYrr, VUNPCKLPSYrr)>;
|
||||
VPERMILPDYri, VPERMILPSYri, VSHUFPDYrri,
|
||||
VSHUFPSYrri, VUNPCKHPDYrr, VUNPCKHPSYrr,
|
||||
VUNPCKLPDYrr, VUNPCKLPSYrr)>;
|
||||
|
||||
def JWriteShuffleYLd: SchedWriteRes<[JLAGU, JFPU01, JFPX]> {
|
||||
let Latency = 6;
|
||||
|
@ -766,16 +769,20 @@ def JWriteShuffleYLd: SchedWriteRes<[JLAGU, JFPU01, JFPX]> {
|
|||
let NumMicroOps = 2;
|
||||
}
|
||||
def : InstRW<[JWriteShuffleYLd, ReadAfterLd], (instrs VBLENDPDYrmi, VBLENDPSYrmi,
|
||||
VMOVDDUPYrm, VMOVSHDUPYrm, VMOVSLDUPYrm,
|
||||
VPERMILPDYmi, VPERMILPSYmi, VSHUFPDYrmi, VSHUFPSYrmi,
|
||||
VUNPCKHPDYrm, VUNPCKHPSYrm, VUNPCKLPDYrm, VUNPCKLPSYrm)>;
|
||||
VMOVDDUPYrm, VMOVSHDUPYrm,
|
||||
VMOVSLDUPYrm, VPERMILPDYmi,
|
||||
VPERMILPSYmi, VSHUFPDYrmi,
|
||||
VSHUFPSYrmi, VUNPCKHPDYrm,
|
||||
VUNPCKHPSYrm, VUNPCKLPDYrm,
|
||||
VUNPCKLPSYrm)>;
|
||||
|
||||
def JWriteVBROADCASTYLd: SchedWriteRes<[JLAGU, JFPU01, JFPX]> {
|
||||
let Latency = 6;
|
||||
let ResourceCycles = [1, 2, 4];
|
||||
let NumMicroOps = 2;
|
||||
}
|
||||
def : InstRW<[JWriteVBROADCASTYLd, ReadAfterLd], (instrs VBROADCASTSDYrm, VBROADCASTSSYrm)>;
|
||||
def : InstRW<[JWriteVBROADCASTYLd, ReadAfterLd], (instrs VBROADCASTSDYrm,
|
||||
VBROADCASTSSYrm)>;
|
||||
|
||||
def JWriteVMaskMovLd: SchedWriteRes<[JLAGU, JFPU01, JFPX]> {
|
||||
let Latency = 6;
|
||||
|
@ -844,14 +851,16 @@ def : InstRW<[JWriteVSQRTPDLd], (instrs SQRTPDm, VSQRTPDm,
|
|||
SQRTSDm_Int, VSQRTSDm_Int)>;
|
||||
|
||||
def JWriteVSQRTYPD: SchedWriteRes<[JFPU1, JFPM]> {
|
||||
let Latency = 54;
|
||||
let Latency = 54; // each uOp is 27cy.
|
||||
let ResourceCycles = [2, 54];
|
||||
let NumMicroOps = 2;
|
||||
}
|
||||
def : InstRW<[JWriteVSQRTYPD], (instrs VSQRTPDYr)>;
|
||||
|
||||
def JWriteVSQRTYPDLd: SchedWriteRes<[JLAGU, JFPU1, JFPM]> {
|
||||
let Latency = 59;
|
||||
let Latency = 59; // each uOp is 27cy (+5cy of memory load).
|
||||
let ResourceCycles = [2, 2, 54];
|
||||
let NumMicroOps = 2;
|
||||
}
|
||||
def : InstRW<[JWriteVSQRTYPDLd], (instrs VSQRTPDYm)>;
|
||||
|
||||
|
|
|
@ -1112,12 +1112,12 @@ vzeroupper
|
|||
# CHECK-NEXT: 2 8 2.00 * vcvtdq2ps (%rax), %ymm2
|
||||
# CHECK-NEXT: 1 3 1.00 vcvtpd2dq %xmm0, %xmm2
|
||||
# CHECK-NEXT: 1 8 1.00 * vcvtpd2dqx (%rax), %xmm2
|
||||
# CHECK-NEXT: 1 6 2.00 vcvtpd2dq %ymm0, %xmm2
|
||||
# CHECK-NEXT: 1 11 2.00 * vcvtpd2dqy (%rax), %xmm2
|
||||
# CHECK-NEXT: 3 6 2.00 vcvtpd2dq %ymm0, %xmm2
|
||||
# CHECK-NEXT: 3 11 2.00 * vcvtpd2dqy (%rax), %xmm2
|
||||
# CHECK-NEXT: 1 3 1.00 vcvtpd2ps %xmm0, %xmm2
|
||||
# CHECK-NEXT: 1 8 1.00 * vcvtpd2psx (%rax), %xmm2
|
||||
# CHECK-NEXT: 1 6 2.00 vcvtpd2ps %ymm0, %xmm2
|
||||
# CHECK-NEXT: 1 11 2.00 * vcvtpd2psy (%rax), %xmm2
|
||||
# CHECK-NEXT: 3 6 2.00 vcvtpd2ps %ymm0, %xmm2
|
||||
# CHECK-NEXT: 3 11 2.00 * vcvtpd2psy (%rax), %xmm2
|
||||
# CHECK-NEXT: 1 3 1.00 vcvtps2dq %xmm0, %xmm2
|
||||
# CHECK-NEXT: 1 8 1.00 * vcvtps2dq (%rax), %xmm2
|
||||
# CHECK-NEXT: 2 3 2.00 vcvtps2dq %ymm0, %ymm2
|
||||
|
@ -1148,8 +1148,8 @@ vzeroupper
|
|||
# CHECK-NEXT: 2 12 1.00 * vcvtss2si (%rax), %rcx
|
||||
# CHECK-NEXT: 1 3 1.00 vcvttpd2dq %xmm0, %xmm2
|
||||
# CHECK-NEXT: 1 8 1.00 * vcvttpd2dqx (%rax), %xmm2
|
||||
# CHECK-NEXT: 1 6 2.00 vcvttpd2dq %ymm0, %xmm2
|
||||
# CHECK-NEXT: 1 11 2.00 * vcvttpd2dqy (%rax), %xmm2
|
||||
# CHECK-NEXT: 3 6 2.00 vcvttpd2dq %ymm0, %xmm2
|
||||
# CHECK-NEXT: 3 11 2.00 * vcvttpd2dqy (%rax), %xmm2
|
||||
# CHECK-NEXT: 1 3 1.00 vcvttps2dq %xmm0, %xmm2
|
||||
# CHECK-NEXT: 1 8 1.00 * vcvttps2dq (%rax), %xmm2
|
||||
# CHECK-NEXT: 2 3 2.00 vcvttps2dq %ymm0, %ymm2
|
||||
|
@ -1640,8 +1640,8 @@ vzeroupper
|
|||
# CHECK-NEXT: 2 6 2.00 * vshufps $1, (%rax), %ymm1, %ymm2
|
||||
# CHECK-NEXT: 1 27 27.00 vsqrtpd %xmm0, %xmm2
|
||||
# CHECK-NEXT: 1 32 27.00 * vsqrtpd (%rax), %xmm2
|
||||
# CHECK-NEXT: 1 54 54.00 vsqrtpd %ymm0, %ymm2
|
||||
# CHECK-NEXT: 1 59 54.00 * vsqrtpd (%rax), %ymm2
|
||||
# CHECK-NEXT: 2 54 54.00 vsqrtpd %ymm0, %ymm2
|
||||
# CHECK-NEXT: 2 59 54.00 * vsqrtpd (%rax), %ymm2
|
||||
# CHECK-NEXT: 1 21 21.00 vsqrtps %xmm0, %xmm2
|
||||
# CHECK-NEXT: 1 26 21.00 * vsqrtps (%rax), %xmm2
|
||||
# CHECK-NEXT: 2 42 42.00 vsqrtps %ymm0, %ymm2
|
||||
|
|
Loading…
Reference in New Issue