From dc97172b2fb7452bdd9e841e4b2d5049fbdd9275 Mon Sep 17 00:00:00 2001 From: Andrea Di Biagio Date: Fri, 30 Mar 2018 18:53:47 +0000 Subject: [PATCH] [X86][BtVer2] Fixed the number of micro opcodes for AVX vector converts and VSQRT instructions. There were still a few AVX instructions with an incorrect number of opcodes. These should be fixed now. llvm-svn: 328892 --- llvm/lib/Target/X86/X86ScheduleBtVer2.td | 25 +++++++++++++------ .../llvm-mca/X86/BtVer2/resources-avx1.s | 16 ++++++------ 2 files changed, 25 insertions(+), 16 deletions(-) diff --git a/llvm/lib/Target/X86/X86ScheduleBtVer2.td b/llvm/lib/Target/X86/X86ScheduleBtVer2.td index d85c335617f3..f098fceac2ef 100644 --- a/llvm/lib/Target/X86/X86ScheduleBtVer2.td +++ b/llvm/lib/Target/X86/X86ScheduleBtVer2.td @@ -700,12 +700,14 @@ def : InstRW<[JWriteFCmpYLd, ReadAfterLd], (instregex "VCMPP(S|D)Yrmi", "VM(AX|I def JWriteVCVTPDY: SchedWriteRes<[JFPU1, JSTC, JFPX]> { let Latency = 6; let ResourceCycles = [2, 2, 4]; + let NumMicroOps = 3; } def : InstRW<[JWriteVCVTPDY], (instrs VCVTPD2DQYrr, VCVTTPD2DQYrr, VCVTPD2PSYrr)>; def JWriteVCVTPDYLd: SchedWriteRes<[JLAGU, JFPU1, JSTC, JFPX]> { let Latency = 11; let ResourceCycles = [2, 2, 2, 4]; + let NumMicroOps = 3; } def : InstRW<[JWriteVCVTPDYLd, ReadAfterLd], (instrs VCVTPD2DQYrm, VCVTTPD2DQYrm, VCVTPD2PSYrm)>; @@ -757,8 +759,9 @@ def JWriteShuffleY: SchedWriteRes<[JFPU01, JFPX]> { } def : InstRW<[JWriteShuffleY], (instrs VBLENDPDYrri, VBLENDPSYrri, VMOVDDUPYrr, VMOVSHDUPYrr, VMOVSLDUPYrr, - VPERMILPDYri, VPERMILPSYri, VSHUFPDYrri, VSHUFPSYrri, - VUNPCKHPDYrr, VUNPCKHPSYrr, VUNPCKLPDYrr, VUNPCKLPSYrr)>; + VPERMILPDYri, VPERMILPSYri, VSHUFPDYrri, + VSHUFPSYrri, VUNPCKHPDYrr, VUNPCKHPSYrr, + VUNPCKLPDYrr, VUNPCKLPSYrr)>; def JWriteShuffleYLd: SchedWriteRes<[JLAGU, JFPU01, JFPX]> { let Latency = 6; @@ -766,16 +769,20 @@ def JWriteShuffleYLd: SchedWriteRes<[JLAGU, JFPU01, JFPX]> { let NumMicroOps = 2; } def : InstRW<[JWriteShuffleYLd, ReadAfterLd], (instrs VBLENDPDYrmi, VBLENDPSYrmi, - VMOVDDUPYrm, VMOVSHDUPYrm, VMOVSLDUPYrm, - VPERMILPDYmi, VPERMILPSYmi, VSHUFPDYrmi, VSHUFPSYrmi, - VUNPCKHPDYrm, VUNPCKHPSYrm, VUNPCKLPDYrm, VUNPCKLPSYrm)>; + VMOVDDUPYrm, VMOVSHDUPYrm, + VMOVSLDUPYrm, VPERMILPDYmi, + VPERMILPSYmi, VSHUFPDYrmi, + VSHUFPSYrmi, VUNPCKHPDYrm, + VUNPCKHPSYrm, VUNPCKLPDYrm, + VUNPCKLPSYrm)>; def JWriteVBROADCASTYLd: SchedWriteRes<[JLAGU, JFPU01, JFPX]> { let Latency = 6; let ResourceCycles = [1, 2, 4]; let NumMicroOps = 2; } -def : InstRW<[JWriteVBROADCASTYLd, ReadAfterLd], (instrs VBROADCASTSDYrm, VBROADCASTSSYrm)>; +def : InstRW<[JWriteVBROADCASTYLd, ReadAfterLd], (instrs VBROADCASTSDYrm, + VBROADCASTSSYrm)>; def JWriteVMaskMovLd: SchedWriteRes<[JLAGU, JFPU01, JFPX]> { let Latency = 6; @@ -844,14 +851,16 @@ def : InstRW<[JWriteVSQRTPDLd], (instrs SQRTPDm, VSQRTPDm, SQRTSDm_Int, VSQRTSDm_Int)>; def JWriteVSQRTYPD: SchedWriteRes<[JFPU1, JFPM]> { - let Latency = 54; + let Latency = 54; // each uOp is 27cy. let ResourceCycles = [2, 54]; + let NumMicroOps = 2; } def : InstRW<[JWriteVSQRTYPD], (instrs VSQRTPDYr)>; def JWriteVSQRTYPDLd: SchedWriteRes<[JLAGU, JFPU1, JFPM]> { - let Latency = 59; + let Latency = 59; // each uOp is 27cy (+5cy of memory load). let ResourceCycles = [2, 2, 54]; + let NumMicroOps = 2; } def : InstRW<[JWriteVSQRTYPDLd], (instrs VSQRTPDYm)>; diff --git a/llvm/test/tools/llvm-mca/X86/BtVer2/resources-avx1.s b/llvm/test/tools/llvm-mca/X86/BtVer2/resources-avx1.s index 02b73adc21e4..59fa2268972f 100644 --- a/llvm/test/tools/llvm-mca/X86/BtVer2/resources-avx1.s +++ b/llvm/test/tools/llvm-mca/X86/BtVer2/resources-avx1.s @@ -1112,12 +1112,12 @@ vzeroupper # CHECK-NEXT: 2 8 2.00 * vcvtdq2ps (%rax), %ymm2 # CHECK-NEXT: 1 3 1.00 vcvtpd2dq %xmm0, %xmm2 # CHECK-NEXT: 1 8 1.00 * vcvtpd2dqx (%rax), %xmm2 -# CHECK-NEXT: 1 6 2.00 vcvtpd2dq %ymm0, %xmm2 -# CHECK-NEXT: 1 11 2.00 * vcvtpd2dqy (%rax), %xmm2 +# CHECK-NEXT: 3 6 2.00 vcvtpd2dq %ymm0, %xmm2 +# CHECK-NEXT: 3 11 2.00 * vcvtpd2dqy (%rax), %xmm2 # CHECK-NEXT: 1 3 1.00 vcvtpd2ps %xmm0, %xmm2 # CHECK-NEXT: 1 8 1.00 * vcvtpd2psx (%rax), %xmm2 -# CHECK-NEXT: 1 6 2.00 vcvtpd2ps %ymm0, %xmm2 -# CHECK-NEXT: 1 11 2.00 * vcvtpd2psy (%rax), %xmm2 +# CHECK-NEXT: 3 6 2.00 vcvtpd2ps %ymm0, %xmm2 +# CHECK-NEXT: 3 11 2.00 * vcvtpd2psy (%rax), %xmm2 # CHECK-NEXT: 1 3 1.00 vcvtps2dq %xmm0, %xmm2 # CHECK-NEXT: 1 8 1.00 * vcvtps2dq (%rax), %xmm2 # CHECK-NEXT: 2 3 2.00 vcvtps2dq %ymm0, %ymm2 @@ -1148,8 +1148,8 @@ vzeroupper # CHECK-NEXT: 2 12 1.00 * vcvtss2si (%rax), %rcx # CHECK-NEXT: 1 3 1.00 vcvttpd2dq %xmm0, %xmm2 # CHECK-NEXT: 1 8 1.00 * vcvttpd2dqx (%rax), %xmm2 -# CHECK-NEXT: 1 6 2.00 vcvttpd2dq %ymm0, %xmm2 -# CHECK-NEXT: 1 11 2.00 * vcvttpd2dqy (%rax), %xmm2 +# CHECK-NEXT: 3 6 2.00 vcvttpd2dq %ymm0, %xmm2 +# CHECK-NEXT: 3 11 2.00 * vcvttpd2dqy (%rax), %xmm2 # CHECK-NEXT: 1 3 1.00 vcvttps2dq %xmm0, %xmm2 # CHECK-NEXT: 1 8 1.00 * vcvttps2dq (%rax), %xmm2 # CHECK-NEXT: 2 3 2.00 vcvttps2dq %ymm0, %ymm2 @@ -1640,8 +1640,8 @@ vzeroupper # CHECK-NEXT: 2 6 2.00 * vshufps $1, (%rax), %ymm1, %ymm2 # CHECK-NEXT: 1 27 27.00 vsqrtpd %xmm0, %xmm2 # CHECK-NEXT: 1 32 27.00 * vsqrtpd (%rax), %xmm2 -# CHECK-NEXT: 1 54 54.00 vsqrtpd %ymm0, %ymm2 -# CHECK-NEXT: 1 59 54.00 * vsqrtpd (%rax), %ymm2 +# CHECK-NEXT: 2 54 54.00 vsqrtpd %ymm0, %ymm2 +# CHECK-NEXT: 2 59 54.00 * vsqrtpd (%rax), %ymm2 # CHECK-NEXT: 1 21 21.00 vsqrtps %xmm0, %xmm2 # CHECK-NEXT: 1 26 21.00 * vsqrtps (%rax), %xmm2 # CHECK-NEXT: 2 42 42.00 vsqrtps %ymm0, %ymm2