forked from OSchip/llvm-project
[X86][BtVer2] Fixed the number of micro opcodes for AVX vector converts and
VSQRT instructions. There were still a few AVX instructions with an incorrect number of opcodes. These should be fixed now. llvm-svn: 328892
This commit is contained in:
parent
1ad23a494b
commit
dc97172b2f
|
@ -700,12 +700,14 @@ def : InstRW<[JWriteFCmpYLd, ReadAfterLd], (instregex "VCMPP(S|D)Yrmi", "VM(AX|I
|
||||||
def JWriteVCVTPDY: SchedWriteRes<[JFPU1, JSTC, JFPX]> {
|
def JWriteVCVTPDY: SchedWriteRes<[JFPU1, JSTC, JFPX]> {
|
||||||
let Latency = 6;
|
let Latency = 6;
|
||||||
let ResourceCycles = [2, 2, 4];
|
let ResourceCycles = [2, 2, 4];
|
||||||
|
let NumMicroOps = 3;
|
||||||
}
|
}
|
||||||
def : InstRW<[JWriteVCVTPDY], (instrs VCVTPD2DQYrr, VCVTTPD2DQYrr, VCVTPD2PSYrr)>;
|
def : InstRW<[JWriteVCVTPDY], (instrs VCVTPD2DQYrr, VCVTTPD2DQYrr, VCVTPD2PSYrr)>;
|
||||||
|
|
||||||
def JWriteVCVTPDYLd: SchedWriteRes<[JLAGU, JFPU1, JSTC, JFPX]> {
|
def JWriteVCVTPDYLd: SchedWriteRes<[JLAGU, JFPU1, JSTC, JFPX]> {
|
||||||
let Latency = 11;
|
let Latency = 11;
|
||||||
let ResourceCycles = [2, 2, 2, 4];
|
let ResourceCycles = [2, 2, 2, 4];
|
||||||
|
let NumMicroOps = 3;
|
||||||
}
|
}
|
||||||
def : InstRW<[JWriteVCVTPDYLd, ReadAfterLd], (instrs VCVTPD2DQYrm, VCVTTPD2DQYrm, VCVTPD2PSYrm)>;
|
def : InstRW<[JWriteVCVTPDYLd, ReadAfterLd], (instrs VCVTPD2DQYrm, VCVTTPD2DQYrm, VCVTPD2PSYrm)>;
|
||||||
|
|
||||||
|
@ -757,8 +759,9 @@ def JWriteShuffleY: SchedWriteRes<[JFPU01, JFPX]> {
|
||||||
}
|
}
|
||||||
def : InstRW<[JWriteShuffleY], (instrs VBLENDPDYrri, VBLENDPSYrri,
|
def : InstRW<[JWriteShuffleY], (instrs VBLENDPDYrri, VBLENDPSYrri,
|
||||||
VMOVDDUPYrr, VMOVSHDUPYrr, VMOVSLDUPYrr,
|
VMOVDDUPYrr, VMOVSHDUPYrr, VMOVSLDUPYrr,
|
||||||
VPERMILPDYri, VPERMILPSYri, VSHUFPDYrri, VSHUFPSYrri,
|
VPERMILPDYri, VPERMILPSYri, VSHUFPDYrri,
|
||||||
VUNPCKHPDYrr, VUNPCKHPSYrr, VUNPCKLPDYrr, VUNPCKLPSYrr)>;
|
VSHUFPSYrri, VUNPCKHPDYrr, VUNPCKHPSYrr,
|
||||||
|
VUNPCKLPDYrr, VUNPCKLPSYrr)>;
|
||||||
|
|
||||||
def JWriteShuffleYLd: SchedWriteRes<[JLAGU, JFPU01, JFPX]> {
|
def JWriteShuffleYLd: SchedWriteRes<[JLAGU, JFPU01, JFPX]> {
|
||||||
let Latency = 6;
|
let Latency = 6;
|
||||||
|
@ -766,16 +769,20 @@ def JWriteShuffleYLd: SchedWriteRes<[JLAGU, JFPU01, JFPX]> {
|
||||||
let NumMicroOps = 2;
|
let NumMicroOps = 2;
|
||||||
}
|
}
|
||||||
def : InstRW<[JWriteShuffleYLd, ReadAfterLd], (instrs VBLENDPDYrmi, VBLENDPSYrmi,
|
def : InstRW<[JWriteShuffleYLd, ReadAfterLd], (instrs VBLENDPDYrmi, VBLENDPSYrmi,
|
||||||
VMOVDDUPYrm, VMOVSHDUPYrm, VMOVSLDUPYrm,
|
VMOVDDUPYrm, VMOVSHDUPYrm,
|
||||||
VPERMILPDYmi, VPERMILPSYmi, VSHUFPDYrmi, VSHUFPSYrmi,
|
VMOVSLDUPYrm, VPERMILPDYmi,
|
||||||
VUNPCKHPDYrm, VUNPCKHPSYrm, VUNPCKLPDYrm, VUNPCKLPSYrm)>;
|
VPERMILPSYmi, VSHUFPDYrmi,
|
||||||
|
VSHUFPSYrmi, VUNPCKHPDYrm,
|
||||||
|
VUNPCKHPSYrm, VUNPCKLPDYrm,
|
||||||
|
VUNPCKLPSYrm)>;
|
||||||
|
|
||||||
def JWriteVBROADCASTYLd: SchedWriteRes<[JLAGU, JFPU01, JFPX]> {
|
def JWriteVBROADCASTYLd: SchedWriteRes<[JLAGU, JFPU01, JFPX]> {
|
||||||
let Latency = 6;
|
let Latency = 6;
|
||||||
let ResourceCycles = [1, 2, 4];
|
let ResourceCycles = [1, 2, 4];
|
||||||
let NumMicroOps = 2;
|
let NumMicroOps = 2;
|
||||||
}
|
}
|
||||||
def : InstRW<[JWriteVBROADCASTYLd, ReadAfterLd], (instrs VBROADCASTSDYrm, VBROADCASTSSYrm)>;
|
def : InstRW<[JWriteVBROADCASTYLd, ReadAfterLd], (instrs VBROADCASTSDYrm,
|
||||||
|
VBROADCASTSSYrm)>;
|
||||||
|
|
||||||
def JWriteVMaskMovLd: SchedWriteRes<[JLAGU, JFPU01, JFPX]> {
|
def JWriteVMaskMovLd: SchedWriteRes<[JLAGU, JFPU01, JFPX]> {
|
||||||
let Latency = 6;
|
let Latency = 6;
|
||||||
|
@ -844,14 +851,16 @@ def : InstRW<[JWriteVSQRTPDLd], (instrs SQRTPDm, VSQRTPDm,
|
||||||
SQRTSDm_Int, VSQRTSDm_Int)>;
|
SQRTSDm_Int, VSQRTSDm_Int)>;
|
||||||
|
|
||||||
def JWriteVSQRTYPD: SchedWriteRes<[JFPU1, JFPM]> {
|
def JWriteVSQRTYPD: SchedWriteRes<[JFPU1, JFPM]> {
|
||||||
let Latency = 54;
|
let Latency = 54; // each uOp is 27cy.
|
||||||
let ResourceCycles = [2, 54];
|
let ResourceCycles = [2, 54];
|
||||||
|
let NumMicroOps = 2;
|
||||||
}
|
}
|
||||||
def : InstRW<[JWriteVSQRTYPD], (instrs VSQRTPDYr)>;
|
def : InstRW<[JWriteVSQRTYPD], (instrs VSQRTPDYr)>;
|
||||||
|
|
||||||
def JWriteVSQRTYPDLd: SchedWriteRes<[JLAGU, JFPU1, JFPM]> {
|
def JWriteVSQRTYPDLd: SchedWriteRes<[JLAGU, JFPU1, JFPM]> {
|
||||||
let Latency = 59;
|
let Latency = 59; // each uOp is 27cy (+5cy of memory load).
|
||||||
let ResourceCycles = [2, 2, 54];
|
let ResourceCycles = [2, 2, 54];
|
||||||
|
let NumMicroOps = 2;
|
||||||
}
|
}
|
||||||
def : InstRW<[JWriteVSQRTYPDLd], (instrs VSQRTPDYm)>;
|
def : InstRW<[JWriteVSQRTYPDLd], (instrs VSQRTPDYm)>;
|
||||||
|
|
||||||
|
|
|
@ -1112,12 +1112,12 @@ vzeroupper
|
||||||
# CHECK-NEXT: 2 8 2.00 * vcvtdq2ps (%rax), %ymm2
|
# CHECK-NEXT: 2 8 2.00 * vcvtdq2ps (%rax), %ymm2
|
||||||
# CHECK-NEXT: 1 3 1.00 vcvtpd2dq %xmm0, %xmm2
|
# CHECK-NEXT: 1 3 1.00 vcvtpd2dq %xmm0, %xmm2
|
||||||
# CHECK-NEXT: 1 8 1.00 * vcvtpd2dqx (%rax), %xmm2
|
# CHECK-NEXT: 1 8 1.00 * vcvtpd2dqx (%rax), %xmm2
|
||||||
# CHECK-NEXT: 1 6 2.00 vcvtpd2dq %ymm0, %xmm2
|
# CHECK-NEXT: 3 6 2.00 vcvtpd2dq %ymm0, %xmm2
|
||||||
# CHECK-NEXT: 1 11 2.00 * vcvtpd2dqy (%rax), %xmm2
|
# CHECK-NEXT: 3 11 2.00 * vcvtpd2dqy (%rax), %xmm2
|
||||||
# CHECK-NEXT: 1 3 1.00 vcvtpd2ps %xmm0, %xmm2
|
# CHECK-NEXT: 1 3 1.00 vcvtpd2ps %xmm0, %xmm2
|
||||||
# CHECK-NEXT: 1 8 1.00 * vcvtpd2psx (%rax), %xmm2
|
# CHECK-NEXT: 1 8 1.00 * vcvtpd2psx (%rax), %xmm2
|
||||||
# CHECK-NEXT: 1 6 2.00 vcvtpd2ps %ymm0, %xmm2
|
# CHECK-NEXT: 3 6 2.00 vcvtpd2ps %ymm0, %xmm2
|
||||||
# CHECK-NEXT: 1 11 2.00 * vcvtpd2psy (%rax), %xmm2
|
# CHECK-NEXT: 3 11 2.00 * vcvtpd2psy (%rax), %xmm2
|
||||||
# CHECK-NEXT: 1 3 1.00 vcvtps2dq %xmm0, %xmm2
|
# CHECK-NEXT: 1 3 1.00 vcvtps2dq %xmm0, %xmm2
|
||||||
# CHECK-NEXT: 1 8 1.00 * vcvtps2dq (%rax), %xmm2
|
# CHECK-NEXT: 1 8 1.00 * vcvtps2dq (%rax), %xmm2
|
||||||
# CHECK-NEXT: 2 3 2.00 vcvtps2dq %ymm0, %ymm2
|
# CHECK-NEXT: 2 3 2.00 vcvtps2dq %ymm0, %ymm2
|
||||||
|
@ -1148,8 +1148,8 @@ vzeroupper
|
||||||
# CHECK-NEXT: 2 12 1.00 * vcvtss2si (%rax), %rcx
|
# CHECK-NEXT: 2 12 1.00 * vcvtss2si (%rax), %rcx
|
||||||
# CHECK-NEXT: 1 3 1.00 vcvttpd2dq %xmm0, %xmm2
|
# CHECK-NEXT: 1 3 1.00 vcvttpd2dq %xmm0, %xmm2
|
||||||
# CHECK-NEXT: 1 8 1.00 * vcvttpd2dqx (%rax), %xmm2
|
# CHECK-NEXT: 1 8 1.00 * vcvttpd2dqx (%rax), %xmm2
|
||||||
# CHECK-NEXT: 1 6 2.00 vcvttpd2dq %ymm0, %xmm2
|
# CHECK-NEXT: 3 6 2.00 vcvttpd2dq %ymm0, %xmm2
|
||||||
# CHECK-NEXT: 1 11 2.00 * vcvttpd2dqy (%rax), %xmm2
|
# CHECK-NEXT: 3 11 2.00 * vcvttpd2dqy (%rax), %xmm2
|
||||||
# CHECK-NEXT: 1 3 1.00 vcvttps2dq %xmm0, %xmm2
|
# CHECK-NEXT: 1 3 1.00 vcvttps2dq %xmm0, %xmm2
|
||||||
# CHECK-NEXT: 1 8 1.00 * vcvttps2dq (%rax), %xmm2
|
# CHECK-NEXT: 1 8 1.00 * vcvttps2dq (%rax), %xmm2
|
||||||
# CHECK-NEXT: 2 3 2.00 vcvttps2dq %ymm0, %ymm2
|
# CHECK-NEXT: 2 3 2.00 vcvttps2dq %ymm0, %ymm2
|
||||||
|
@ -1640,8 +1640,8 @@ vzeroupper
|
||||||
# CHECK-NEXT: 2 6 2.00 * vshufps $1, (%rax), %ymm1, %ymm2
|
# CHECK-NEXT: 2 6 2.00 * vshufps $1, (%rax), %ymm1, %ymm2
|
||||||
# CHECK-NEXT: 1 27 27.00 vsqrtpd %xmm0, %xmm2
|
# CHECK-NEXT: 1 27 27.00 vsqrtpd %xmm0, %xmm2
|
||||||
# CHECK-NEXT: 1 32 27.00 * vsqrtpd (%rax), %xmm2
|
# CHECK-NEXT: 1 32 27.00 * vsqrtpd (%rax), %xmm2
|
||||||
# CHECK-NEXT: 1 54 54.00 vsqrtpd %ymm0, %ymm2
|
# CHECK-NEXT: 2 54 54.00 vsqrtpd %ymm0, %ymm2
|
||||||
# CHECK-NEXT: 1 59 54.00 * vsqrtpd (%rax), %ymm2
|
# CHECK-NEXT: 2 59 54.00 * vsqrtpd (%rax), %ymm2
|
||||||
# CHECK-NEXT: 1 21 21.00 vsqrtps %xmm0, %xmm2
|
# CHECK-NEXT: 1 21 21.00 vsqrtps %xmm0, %xmm2
|
||||||
# CHECK-NEXT: 1 26 21.00 * vsqrtps (%rax), %xmm2
|
# CHECK-NEXT: 1 26 21.00 * vsqrtps (%rax), %xmm2
|
||||||
# CHECK-NEXT: 2 42 42.00 vsqrtps %ymm0, %ymm2
|
# CHECK-NEXT: 2 42 42.00 vsqrtps %ymm0, %ymm2
|
||||||
|
|
Loading…
Reference in New Issue