[X86][BtVer2] Fixed the number of micro opcodes for AVX vector converts and

VSQRT instructions.

There were still a few AVX instructions with an incorrect number of opcodes.
These should be fixed now.

llvm-svn: 328892
This commit is contained in:
Andrea Di Biagio 2018-03-30 18:53:47 +00:00
parent 1ad23a494b
commit dc97172b2f
2 changed files with 25 additions and 16 deletions
llvm
lib/Target/X86
test/tools/llvm-mca/X86/BtVer2

View File

@ -700,12 +700,14 @@ def : InstRW<[JWriteFCmpYLd, ReadAfterLd], (instregex "VCMPP(S|D)Yrmi", "VM(AX|I
def JWriteVCVTPDY: SchedWriteRes<[JFPU1, JSTC, JFPX]> {
let Latency = 6;
let ResourceCycles = [2, 2, 4];
let NumMicroOps = 3;
}
def : InstRW<[JWriteVCVTPDY], (instrs VCVTPD2DQYrr, VCVTTPD2DQYrr, VCVTPD2PSYrr)>;
def JWriteVCVTPDYLd: SchedWriteRes<[JLAGU, JFPU1, JSTC, JFPX]> {
let Latency = 11;
let ResourceCycles = [2, 2, 2, 4];
let NumMicroOps = 3;
}
def : InstRW<[JWriteVCVTPDYLd, ReadAfterLd], (instrs VCVTPD2DQYrm, VCVTTPD2DQYrm, VCVTPD2PSYrm)>;
@ -757,8 +759,9 @@ def JWriteShuffleY: SchedWriteRes<[JFPU01, JFPX]> {
}
def : InstRW<[JWriteShuffleY], (instrs VBLENDPDYrri, VBLENDPSYrri,
VMOVDDUPYrr, VMOVSHDUPYrr, VMOVSLDUPYrr,
VPERMILPDYri, VPERMILPSYri, VSHUFPDYrri, VSHUFPSYrri,
VUNPCKHPDYrr, VUNPCKHPSYrr, VUNPCKLPDYrr, VUNPCKLPSYrr)>;
VPERMILPDYri, VPERMILPSYri, VSHUFPDYrri,
VSHUFPSYrri, VUNPCKHPDYrr, VUNPCKHPSYrr,
VUNPCKLPDYrr, VUNPCKLPSYrr)>;
def JWriteShuffleYLd: SchedWriteRes<[JLAGU, JFPU01, JFPX]> {
let Latency = 6;
@ -766,16 +769,20 @@ def JWriteShuffleYLd: SchedWriteRes<[JLAGU, JFPU01, JFPX]> {
let NumMicroOps = 2;
}
def : InstRW<[JWriteShuffleYLd, ReadAfterLd], (instrs VBLENDPDYrmi, VBLENDPSYrmi,
VMOVDDUPYrm, VMOVSHDUPYrm, VMOVSLDUPYrm,
VPERMILPDYmi, VPERMILPSYmi, VSHUFPDYrmi, VSHUFPSYrmi,
VUNPCKHPDYrm, VUNPCKHPSYrm, VUNPCKLPDYrm, VUNPCKLPSYrm)>;
VMOVDDUPYrm, VMOVSHDUPYrm,
VMOVSLDUPYrm, VPERMILPDYmi,
VPERMILPSYmi, VSHUFPDYrmi,
VSHUFPSYrmi, VUNPCKHPDYrm,
VUNPCKHPSYrm, VUNPCKLPDYrm,
VUNPCKLPSYrm)>;
def JWriteVBROADCASTYLd: SchedWriteRes<[JLAGU, JFPU01, JFPX]> {
let Latency = 6;
let ResourceCycles = [1, 2, 4];
let NumMicroOps = 2;
}
def : InstRW<[JWriteVBROADCASTYLd, ReadAfterLd], (instrs VBROADCASTSDYrm, VBROADCASTSSYrm)>;
def : InstRW<[JWriteVBROADCASTYLd, ReadAfterLd], (instrs VBROADCASTSDYrm,
VBROADCASTSSYrm)>;
def JWriteVMaskMovLd: SchedWriteRes<[JLAGU, JFPU01, JFPX]> {
let Latency = 6;
@ -844,14 +851,16 @@ def : InstRW<[JWriteVSQRTPDLd], (instrs SQRTPDm, VSQRTPDm,
SQRTSDm_Int, VSQRTSDm_Int)>;
def JWriteVSQRTYPD: SchedWriteRes<[JFPU1, JFPM]> {
let Latency = 54;
let Latency = 54; // each uOp is 27cy.
let ResourceCycles = [2, 54];
let NumMicroOps = 2;
}
def : InstRW<[JWriteVSQRTYPD], (instrs VSQRTPDYr)>;
def JWriteVSQRTYPDLd: SchedWriteRes<[JLAGU, JFPU1, JFPM]> {
let Latency = 59;
let Latency = 59; // each uOp is 27cy (+5cy of memory load).
let ResourceCycles = [2, 2, 54];
let NumMicroOps = 2;
}
def : InstRW<[JWriteVSQRTYPDLd], (instrs VSQRTPDYm)>;

View File

@ -1112,12 +1112,12 @@ vzeroupper
# CHECK-NEXT: 2 8 2.00 * vcvtdq2ps (%rax), %ymm2
# CHECK-NEXT: 1 3 1.00 vcvtpd2dq %xmm0, %xmm2
# CHECK-NEXT: 1 8 1.00 * vcvtpd2dqx (%rax), %xmm2
# CHECK-NEXT: 1 6 2.00 vcvtpd2dq %ymm0, %xmm2
# CHECK-NEXT: 1 11 2.00 * vcvtpd2dqy (%rax), %xmm2
# CHECK-NEXT: 3 6 2.00 vcvtpd2dq %ymm0, %xmm2
# CHECK-NEXT: 3 11 2.00 * vcvtpd2dqy (%rax), %xmm2
# CHECK-NEXT: 1 3 1.00 vcvtpd2ps %xmm0, %xmm2
# CHECK-NEXT: 1 8 1.00 * vcvtpd2psx (%rax), %xmm2
# CHECK-NEXT: 1 6 2.00 vcvtpd2ps %ymm0, %xmm2
# CHECK-NEXT: 1 11 2.00 * vcvtpd2psy (%rax), %xmm2
# CHECK-NEXT: 3 6 2.00 vcvtpd2ps %ymm0, %xmm2
# CHECK-NEXT: 3 11 2.00 * vcvtpd2psy (%rax), %xmm2
# CHECK-NEXT: 1 3 1.00 vcvtps2dq %xmm0, %xmm2
# CHECK-NEXT: 1 8 1.00 * vcvtps2dq (%rax), %xmm2
# CHECK-NEXT: 2 3 2.00 vcvtps2dq %ymm0, %ymm2
@ -1148,8 +1148,8 @@ vzeroupper
# CHECK-NEXT: 2 12 1.00 * vcvtss2si (%rax), %rcx
# CHECK-NEXT: 1 3 1.00 vcvttpd2dq %xmm0, %xmm2
# CHECK-NEXT: 1 8 1.00 * vcvttpd2dqx (%rax), %xmm2
# CHECK-NEXT: 1 6 2.00 vcvttpd2dq %ymm0, %xmm2
# CHECK-NEXT: 1 11 2.00 * vcvttpd2dqy (%rax), %xmm2
# CHECK-NEXT: 3 6 2.00 vcvttpd2dq %ymm0, %xmm2
# CHECK-NEXT: 3 11 2.00 * vcvttpd2dqy (%rax), %xmm2
# CHECK-NEXT: 1 3 1.00 vcvttps2dq %xmm0, %xmm2
# CHECK-NEXT: 1 8 1.00 * vcvttps2dq (%rax), %xmm2
# CHECK-NEXT: 2 3 2.00 vcvttps2dq %ymm0, %ymm2
@ -1640,8 +1640,8 @@ vzeroupper
# CHECK-NEXT: 2 6 2.00 * vshufps $1, (%rax), %ymm1, %ymm2
# CHECK-NEXT: 1 27 27.00 vsqrtpd %xmm0, %xmm2
# CHECK-NEXT: 1 32 27.00 * vsqrtpd (%rax), %xmm2
# CHECK-NEXT: 1 54 54.00 vsqrtpd %ymm0, %ymm2
# CHECK-NEXT: 1 59 54.00 * vsqrtpd (%rax), %ymm2
# CHECK-NEXT: 2 54 54.00 vsqrtpd %ymm0, %ymm2
# CHECK-NEXT: 2 59 54.00 * vsqrtpd (%rax), %ymm2
# CHECK-NEXT: 1 21 21.00 vsqrtps %xmm0, %xmm2
# CHECK-NEXT: 1 26 21.00 * vsqrtps (%rax), %xmm2
# CHECK-NEXT: 2 42 42.00 vsqrtps %ymm0, %ymm2