From 323f2e17151a6f3105bf1917f2ca42e168b2ff33 Mon Sep 17 00:00:00 2001 From: Gadi Haber Date: Tue, 24 Oct 2017 20:19:47 +0000 Subject: [PATCH] [X86][Broadwell] Added the instruction scheduling information for the Broadwell CPU. Adding the scheduling information for the Browadwell (BDW) CPU target. This patch adds the instruction scheduling information for the Broadwell (BDW) architecture target by adding the file X86SchedBroadwell.td located under the X86 Target. We used the scheduling information retrieved from the Broadwell architects in order to create the file. The scheduling information includes latency, number of micro-Ops and used ports by each BDW instruction. The patch continues the scheduling replacement and insertion effort started with the SandyBridge (SNB) target in r310792, the Haswell (HSW) target in r311879, the SkylakeClient (SKL) target in rL313613 + rL315978 and the SkylakeServer (SKX) in rL315175. Performance fluctuations may be expected due to code alignment effects. Reviewers: zvi, RKSimon, craig.topper Differential Revision: https://reviews.llvm.org/D39054 Change-Id: If6f799e5ff60e1091c8d43b05ea78c53581bae01 llvm-svn: 316492 --- llvm/lib/Target/X86/X86.td | 2 +- llvm/lib/Target/X86/X86SchedBroadwell.td | 4076 ++++++++++++++++++++++ llvm/lib/Target/X86/X86Schedule.td | 2 +- llvm/test/CodeGen/X86/aes-schedule.ll | 24 +- llvm/test/CodeGen/X86/avx-schedule.ll | 364 +- llvm/test/CodeGen/X86/avx2-schedule.ll | 630 ++-- llvm/test/CodeGen/X86/bmi-schedule.ll | 56 +- llvm/test/CodeGen/X86/bmi2-schedule.ll | 60 +- llvm/test/CodeGen/X86/f16c-schedule.ll | 14 +- llvm/test/CodeGen/X86/fma-schedule.ll | 128 +- llvm/test/CodeGen/X86/lea32-schedule.ll | 22 +- llvm/test/CodeGen/X86/lea64-schedule.ll | 22 +- llvm/test/CodeGen/X86/lzcnt-schedule.ll | 12 +- llvm/test/CodeGen/X86/mmx-schedule.ll | 344 +- llvm/test/CodeGen/X86/movbe-schedule.ll | 18 +- llvm/test/CodeGen/X86/popcnt-schedule.ll | 12 +- llvm/test/CodeGen/X86/sse-schedule.ll | 198 +- llvm/test/CodeGen/X86/sse2-schedule.ll | 480 +-- llvm/test/CodeGen/X86/sse3-schedule.ll | 46 +- llvm/test/CodeGen/X86/sse41-schedule.ll | 204 +- llvm/test/CodeGen/X86/sse42-schedule.ll | 46 +- llvm/test/CodeGen/X86/ssse3-schedule.ll | 64 +- 22 files changed, 5450 insertions(+), 1374 deletions(-) create mode 100755 llvm/lib/Target/X86/X86SchedBroadwell.td diff --git a/llvm/lib/Target/X86/X86.td b/llvm/lib/Target/X86/X86.td index 03b5a6064c9f..1a13ac2206a1 100644 --- a/llvm/lib/Target/X86/X86.td +++ b/llvm/lib/Target/X86/X86.td @@ -576,7 +576,7 @@ def BDWFeatures : ProcessorFeatures; -class BroadwellProc : ProcModel : ProcModel; diff --git a/llvm/lib/Target/X86/X86SchedBroadwell.td b/llvm/lib/Target/X86/X86SchedBroadwell.td new file mode 100755 index 000000000000..c70af22d060c --- /dev/null +++ b/llvm/lib/Target/X86/X86SchedBroadwell.td @@ -0,0 +1,4076 @@ +//=- X86SchedBroadwell.td - X86 Broadwell Scheduling ---------*- tablegen -*-=// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file defines the machine model for Broadwell to support instruction +// scheduling and other instruction cost heuristics. +// +//===----------------------------------------------------------------------===// +def BroadwellModel : SchedMachineModel { + // All x86 instructions are modeled as a single micro-op, and HW can decode 4 + // instructions per cycle. + let IssueWidth = 4; + let MicroOpBufferSize = 192; // Based on the reorder buffer. + let LoadLatency = 5; + let MispredictPenalty = 16; + + // Based on the LSD (loop-stream detector) queue size and benchmarking data. + let LoopMicroOpBufferSize = 50; + + // This flag is set to allow the scheduler to assign a default model to + // unrecognized opcodes. + let CompleteModel = 0; +} + +let SchedModel = BroadwellModel in { + +// Broadwell can issue micro-ops to 8 different ports in one cycle. + +// Ports 0, 1, 5, and 6 handle all computation. +// Port 4 gets the data half of stores. Store data can be available later than +// the store address, but since we don't model the latency of stores, we can +// ignore that. +// Ports 2 and 3 are identical. They handle loads and the address half of +// stores. Port 7 can handle address calculations. +def BWPort0 : ProcResource<1>; +def BWPort1 : ProcResource<1>; +def BWPort2 : ProcResource<1>; +def BWPort3 : ProcResource<1>; +def BWPort4 : ProcResource<1>; +def BWPort5 : ProcResource<1>; +def BWPort6 : ProcResource<1>; +def BWPort7 : ProcResource<1>; + +// Many micro-ops are capable of issuing on multiple ports. +def BWPort01 : ProcResGroup<[BWPort0, BWPort1]>; +def BWPort23 : ProcResGroup<[BWPort2, BWPort3]>; +def BWPort237 : ProcResGroup<[BWPort2, BWPort3, BWPort7]>; +def BWPort04 : ProcResGroup<[BWPort0, BWPort4]>; +def BWPort05 : ProcResGroup<[BWPort0, BWPort5]>; +def BWPort06 : ProcResGroup<[BWPort0, BWPort6]>; +def BWPort15 : ProcResGroup<[BWPort1, BWPort5]>; +def BWPort16 : ProcResGroup<[BWPort1, BWPort6]>; +def BWPort56 : ProcResGroup<[BWPort5, BWPort6]>; +def BWPort015 : ProcResGroup<[BWPort0, BWPort1, BWPort5]>; +def BWPort056 : ProcResGroup<[BWPort0, BWPort5, BWPort6]>; +def BWPort0156: ProcResGroup<[BWPort0, BWPort1, BWPort5, BWPort6]>; + +// 60 Entry Unified Scheduler +def BWPortAny : ProcResGroup<[BWPort0, BWPort1, BWPort2, BWPort3, BWPort4, + BWPort5, BWPort6, BWPort7]> { + let BufferSize=60; +} + +// Loads are 5 cycles, so ReadAfterLd registers needn't be available until 5 +// cycles after the memory operand. +def : ReadAdvance; + +// Many SchedWrites are defined in pairs with and without a folded load. +// Instructions with folded loads are usually micro-fused, so they only appear +// as two micro-ops when queued in the reservation station. +// This multiclass defines the resource usage for variants with and without +// folded loads. +multiclass BWWriteResPair { + // Register variant is using a single cycle on ExePort. + def : WriteRes { let Latency = Lat; } + + // Memory variant also uses a cycle on port 2/3 and adds 5 cycles to the + // latency. + def : WriteRes { + let Latency = !add(Lat, 5); + } +} + +// A folded store needs a cycle on port 4 for the store data, but it does not +// need an extra port 2/3 cycle to recompute the address. +def : WriteRes; + +// Arithmetic. +defm : BWWriteResPair; // Simple integer ALU op. +defm : BWWriteResPair; // Integer multiplication. +def : WriteRes { let Latency = 3; } // Integer multiplication, high part. +def BWDivider : ProcResource<1>; // Integer division issued on port 0. +def : WriteRes { // Integer division. + let Latency = 25; + let ResourceCycles = [1, 10]; +} +def : WriteRes { + let Latency = 29; + let ResourceCycles = [1, 1, 10]; +} + +def : WriteRes; // LEA instructions can't fold loads. + +// Integer shifts and rotates. +defm : BWWriteResPair; + +// Loads, stores, and moves, not folded with other operations. +def : WriteRes { let Latency = 5; } +def : WriteRes; +def : WriteRes; + +// Idioms that clear a register, like xorps %xmm0, %xmm0. +// These can often bypass execution ports completely. +def : WriteRes; + +// Branches don't produce values, so they have no latency, but they still +// consume resources. Indirect branches can fold loads. +defm : BWWriteResPair; + +// Floating point. This covers both scalar and vector operations. +defm : BWWriteResPair; // Floating point add/sub/compare. +defm : BWWriteResPair; // Floating point multiplication. +defm : BWWriteResPair; // 10-14 cycles. // Floating point division. +defm : BWWriteResPair; // Floating point square root. +defm : BWWriteResPair; // Floating point reciprocal estimate. +defm : BWWriteResPair; // Floating point reciprocal square root estimate. +// defm WriteFMA : X86SchedWritePair; // Fused Multiply Add. +defm : BWWriteResPair; // Floating point vector shuffles. +defm : BWWriteResPair; // Floating point vector blends. +def : WriteRes { // Fp vector variable blends. + let Latency = 2; + let ResourceCycles = [2]; +} +def : WriteRes { + let Latency = 6; + let ResourceCycles = [2, 1]; +} + +// FMA Scheduling helper class. +// class FMASC { X86FoldableSchedWrite Sched = WriteFAdd; } + +// Vector integer operations. +defm : BWWriteResPair; // Vector integer ALU op, no logicals. +defm : BWWriteResPair; // Vector integer shifts. +defm : BWWriteResPair; // Vector integer multiply. +defm : BWWriteResPair; // Vector shuffles. +defm : BWWriteResPair; // Vector blends. + +def : WriteRes { // Vector variable blends. + let Latency = 2; + let ResourceCycles = [2]; +} +def : WriteRes { + let Latency = 6; + let ResourceCycles = [2, 1]; +} + +def : WriteRes { // Vector MPSAD. + let Latency = 6; + let ResourceCycles = [1, 2]; +} +def : WriteRes { + let Latency = 6; + let ResourceCycles = [1, 1, 2]; +} + +// Vector bitwise operations. +// These are often used on both floating point and integer vectors. +defm : BWWriteResPair; // Vector and/or/xor. + +// Conversion between integer and float. +defm : BWWriteResPair; // Float -> Integer. +defm : BWWriteResPair; // Integer -> Float. +defm : BWWriteResPair; // Float -> Float size conversion. + +// Strings instructions. +// Packed Compare Implicit Length Strings, Return Mask +// String instructions. +def : WriteRes { + let Latency = 10; + let ResourceCycles = [3]; +} +def : WriteRes { + let Latency = 10; + let ResourceCycles = [3, 1]; +} +// Packed Compare Explicit Length Strings, Return Mask +def : WriteRes { + let Latency = 10; + let ResourceCycles = [3, 2, 4]; +} +def : WriteRes { + let Latency = 10; + let ResourceCycles = [6, 2, 1]; +} + // Packed Compare Implicit Length Strings, Return Index +def : WriteRes { + let Latency = 11; + let ResourceCycles = [3]; +} +def : WriteRes { + let Latency = 11; + let ResourceCycles = [3, 1]; +} +// Packed Compare Explicit Length Strings, Return Index +def : WriteRes { + let Latency = 11; + let ResourceCycles = [6, 2]; +} +def : WriteRes { + let Latency = 11; + let ResourceCycles = [3, 2, 2, 1]; +} + +// AES instructions. +def : WriteRes { // Decryption, encryption. + let Latency = 7; + let ResourceCycles = [1]; +} +def : WriteRes { + let Latency = 7; + let ResourceCycles = [1, 1]; +} +def : WriteRes { // InvMixColumn. + let Latency = 14; + let ResourceCycles = [2]; +} +def : WriteRes { + let Latency = 14; + let ResourceCycles = [2, 1]; +} +def : WriteRes { // Key Generation. + let Latency = 10; + let ResourceCycles = [2, 8]; +} +def : WriteRes { + let Latency = 10; + let ResourceCycles = [2, 7, 1]; +} + +// Carry-less multiplication instructions. +def : WriteRes { + let Latency = 7; + let ResourceCycles = [2, 1]; +} +def : WriteRes { + let Latency = 7; + let ResourceCycles = [2, 1, 1]; +} + +// Catch-all for expensive system instructions. +def : WriteRes { let Latency = 100; } // def WriteSystem : SchedWrite; + +// AVX2. +defm : BWWriteResPair; // Fp 256-bit width vector shuffles. +defm : BWWriteResPair; // 256-bit width vector shuffles. +def : WriteRes { // Variable vector shifts. + let Latency = 2; + let ResourceCycles = [2, 1]; +} +def : WriteRes { + let Latency = 6; + let ResourceCycles = [2, 1, 1]; +} + +// Old microcoded instructions that nobody use. +def : WriteRes { let Latency = 100; } // def WriteMicrocoded : SchedWrite; + +// Fence instructions. +def : WriteRes; + +// Nop, not very useful expect it provides a model for nops! +def : WriteRes; + +//////////////////////////////////////////////////////////////////////////////// +// Horizontal add/sub instructions. +//////////////////////////////////////////////////////////////////////////////// +// HADD, HSUB PS/PD +// x,x / v,v,v. +def : WriteRes { + let Latency = 3; +} + +// x,m / v,v,m. +def : WriteRes { + let Latency = 7; + let ResourceCycles = [1, 1]; +} + +// PHADD|PHSUB (S) W/D. +// v <- v,v. +def : WriteRes; + +// v <- v,m. +def : WriteRes { + let Latency = 5; + let ResourceCycles = [1, 1]; +} + +// Remaining instrs. + +def BWWriteResGroup1 : SchedWriteRes<[BWPort0]> { + let Latency = 1; + let NumMicroOps = 1; + let ResourceCycles = [1]; +} +def: InstRW<[BWWriteResGroup1], (instregex "MMX_MOVD64from64rr")>; +def: InstRW<[BWWriteResGroup1], (instregex "MMX_MOVD64grr")>; +def: InstRW<[BWWriteResGroup1], (instregex "MMX_PMOVMSKBrr")>; +def: InstRW<[BWWriteResGroup1], (instregex "MMX_PSLLDri")>; +def: InstRW<[BWWriteResGroup1], (instregex "MMX_PSLLDrr")>; +def: InstRW<[BWWriteResGroup1], (instregex "MMX_PSLLQri")>; +def: InstRW<[BWWriteResGroup1], (instregex "MMX_PSLLQrr")>; +def: InstRW<[BWWriteResGroup1], (instregex "MMX_PSLLWri")>; +def: InstRW<[BWWriteResGroup1], (instregex "MMX_PSLLWrr")>; +def: InstRW<[BWWriteResGroup1], (instregex "MMX_PSRADri")>; +def: InstRW<[BWWriteResGroup1], (instregex "MMX_PSRADrr")>; +def: InstRW<[BWWriteResGroup1], (instregex "MMX_PSRAWri")>; +def: InstRW<[BWWriteResGroup1], (instregex "MMX_PSRAWrr")>; +def: InstRW<[BWWriteResGroup1], (instregex "MMX_PSRLDri")>; +def: InstRW<[BWWriteResGroup1], (instregex "MMX_PSRLDrr")>; +def: InstRW<[BWWriteResGroup1], (instregex "MMX_PSRLQri")>; +def: InstRW<[BWWriteResGroup1], (instregex "MMX_PSRLQrr")>; +def: InstRW<[BWWriteResGroup1], (instregex "MMX_PSRLWri")>; +def: InstRW<[BWWriteResGroup1], (instregex "MMX_PSRLWrr")>; +def: InstRW<[BWWriteResGroup1], (instregex "MOVPDI2DIrr")>; +def: InstRW<[BWWriteResGroup1], (instregex "MOVPQIto64rr")>; +def: InstRW<[BWWriteResGroup1], (instregex "PSLLDri")>; +def: InstRW<[BWWriteResGroup1], (instregex "PSLLQri")>; +def: InstRW<[BWWriteResGroup1], (instregex "PSLLWri")>; +def: InstRW<[BWWriteResGroup1], (instregex "PSRADri")>; +def: InstRW<[BWWriteResGroup1], (instregex "PSRAWri")>; +def: InstRW<[BWWriteResGroup1], (instregex "PSRLDri")>; +def: InstRW<[BWWriteResGroup1], (instregex "PSRLQri")>; +def: InstRW<[BWWriteResGroup1], (instregex "PSRLWri")>; +def: InstRW<[BWWriteResGroup1], (instregex "VMOVPDI2DIrr")>; +def: InstRW<[BWWriteResGroup1], (instregex "VMOVPQIto64rr")>; +def: InstRW<[BWWriteResGroup1], (instregex "VPSLLDYri")>; +def: InstRW<[BWWriteResGroup1], (instregex "VPSLLDri")>; +def: InstRW<[BWWriteResGroup1], (instregex "VPSLLQYri")>; +def: InstRW<[BWWriteResGroup1], (instregex "VPSLLQri")>; +def: InstRW<[BWWriteResGroup1], (instregex "VPSLLVQYrr")>; +def: InstRW<[BWWriteResGroup1], (instregex "VPSLLVQrr")>; +def: InstRW<[BWWriteResGroup1], (instregex "VPSLLWYri")>; +def: InstRW<[BWWriteResGroup1], (instregex "VPSLLWri")>; +def: InstRW<[BWWriteResGroup1], (instregex "VPSRADYri")>; +def: InstRW<[BWWriteResGroup1], (instregex "VPSRADri")>; +def: InstRW<[BWWriteResGroup1], (instregex "VPSRAWYri")>; +def: InstRW<[BWWriteResGroup1], (instregex "VPSRAWri")>; +def: InstRW<[BWWriteResGroup1], (instregex "VPSRLDYri")>; +def: InstRW<[BWWriteResGroup1], (instregex "VPSRLDri")>; +def: InstRW<[BWWriteResGroup1], (instregex "VPSRLQYri")>; +def: InstRW<[BWWriteResGroup1], (instregex "VPSRLQri")>; +def: InstRW<[BWWriteResGroup1], (instregex "VPSRLVQYrr")>; +def: InstRW<[BWWriteResGroup1], (instregex "VPSRLVQrr")>; +def: InstRW<[BWWriteResGroup1], (instregex "VPSRLWYri")>; +def: InstRW<[BWWriteResGroup1], (instregex "VPSRLWri")>; +def: InstRW<[BWWriteResGroup1], (instregex "VTESTPDYrr")>; +def: InstRW<[BWWriteResGroup1], (instregex "VTESTPDrr")>; +def: InstRW<[BWWriteResGroup1], (instregex "VTESTPSYrr")>; +def: InstRW<[BWWriteResGroup1], (instregex "VTESTPSrr")>; + +def BWWriteResGroup2 : SchedWriteRes<[BWPort1]> { + let Latency = 1; + let NumMicroOps = 1; + let ResourceCycles = [1]; +} +def: InstRW<[BWWriteResGroup2], (instregex "COMP_FST0r")>; +def: InstRW<[BWWriteResGroup2], (instregex "COM_FST0r")>; +def: InstRW<[BWWriteResGroup2], (instregex "MMX_MASKMOVQ64")>; +def: InstRW<[BWWriteResGroup2], (instregex "MMX_MASKMOVQ64")>; +def: InstRW<[BWWriteResGroup2], (instregex "UCOM_FPr")>; +def: InstRW<[BWWriteResGroup2], (instregex "UCOM_Fr")>; +def: InstRW<[BWWriteResGroup2], (instregex "VMASKMOVDQU")>; + +def BWWriteResGroup3 : SchedWriteRes<[BWPort5]> { + let Latency = 1; + let NumMicroOps = 1; + let ResourceCycles = [1]; +} +def: InstRW<[BWWriteResGroup3], (instregex "ANDNPDrr")>; +def: InstRW<[BWWriteResGroup3], (instregex "ANDNPSrr")>; +def: InstRW<[BWWriteResGroup3], (instregex "ANDPDrr")>; +def: InstRW<[BWWriteResGroup3], (instregex "ANDPSrr")>; +def: InstRW<[BWWriteResGroup3], (instregex "INSERTPSrr")>; +def: InstRW<[BWWriteResGroup3], (instregex "MMX_MOVD64rr")>; +def: InstRW<[BWWriteResGroup3], (instregex "MMX_MOVD64to64rr")>; +def: InstRW<[BWWriteResGroup3], (instregex "MMX_MOVQ2DQrr")>; +def: InstRW<[BWWriteResGroup3], (instregex "MMX_PALIGNR64irr")>; +def: InstRW<[BWWriteResGroup3], (instregex "MMX_PSHUFBrr64")>; +def: InstRW<[BWWriteResGroup3], (instregex "MMX_PSHUFWri")>; +def: InstRW<[BWWriteResGroup3], (instregex "MMX_PUNPCKHBWirr")>; +def: InstRW<[BWWriteResGroup3], (instregex "MMX_PUNPCKHDQirr")>; +def: InstRW<[BWWriteResGroup3], (instregex "MMX_PUNPCKHWDirr")>; +def: InstRW<[BWWriteResGroup3], (instregex "MMX_PUNPCKLBWirr")>; +def: InstRW<[BWWriteResGroup3], (instregex "MMX_PUNPCKLDQirr")>; +def: InstRW<[BWWriteResGroup3], (instregex "MMX_PUNPCKLWDirr")>; +def: InstRW<[BWWriteResGroup3], (instregex "MOV64toPQIrr")>; +def: InstRW<[BWWriteResGroup3], (instregex "MOVAPDrr(_REV?)")>; +def: InstRW<[BWWriteResGroup3], (instregex "MOVAPSrr(_REV?)")>; +def: InstRW<[BWWriteResGroup3], (instregex "MOVDDUPrr")>; +def: InstRW<[BWWriteResGroup3], (instregex "MOVDI2PDIrr")>; +def: InstRW<[BWWriteResGroup3], (instregex "MOVHLPSrr")>; +def: InstRW<[BWWriteResGroup3], (instregex "MOVLHPSrr")>; +def: InstRW<[BWWriteResGroup3], (instregex "MOVSDrr(_REV?)")>; +def: InstRW<[BWWriteResGroup3], (instregex "MOVSHDUPrr")>; +def: InstRW<[BWWriteResGroup3], (instregex "MOVSLDUPrr")>; +def: InstRW<[BWWriteResGroup3], (instregex "MOVSSrr(_REV?)")>; +def: InstRW<[BWWriteResGroup3], (instregex "MOVUPDrr(_REV?)")>; +def: InstRW<[BWWriteResGroup3], (instregex "MOVUPSrr(_REV?)")>; +def: InstRW<[BWWriteResGroup3], (instregex "ORPDrr")>; +def: InstRW<[BWWriteResGroup3], (instregex "ORPSrr")>; +def: InstRW<[BWWriteResGroup3], (instregex "PACKSSDWrr")>; +def: InstRW<[BWWriteResGroup3], (instregex "PACKSSWBrr")>; +def: InstRW<[BWWriteResGroup3], (instregex "PACKUSDWrr")>; +def: InstRW<[BWWriteResGroup3], (instregex "PACKUSWBrr")>; +def: InstRW<[BWWriteResGroup3], (instregex "PALIGNRrri")>; +def: InstRW<[BWWriteResGroup3], (instregex "PBLENDWrri")>; +def: InstRW<[BWWriteResGroup3], (instregex "PMOVSXBDrr")>; +def: InstRW<[BWWriteResGroup3], (instregex "PMOVSXBQrr")>; +def: InstRW<[BWWriteResGroup3], (instregex "PMOVSXBWrr")>; +def: InstRW<[BWWriteResGroup3], (instregex "PMOVSXDQrr")>; +def: InstRW<[BWWriteResGroup3], (instregex "PMOVSXWDrr")>; +def: InstRW<[BWWriteResGroup3], (instregex "PMOVSXWQrr")>; +def: InstRW<[BWWriteResGroup3], (instregex "PMOVZXBDrr")>; +def: InstRW<[BWWriteResGroup3], (instregex "PMOVZXBQrr")>; +def: InstRW<[BWWriteResGroup3], (instregex "PMOVZXBWrr")>; +def: InstRW<[BWWriteResGroup3], (instregex "PMOVZXDQrr")>; +def: InstRW<[BWWriteResGroup3], (instregex "PMOVZXWDrr")>; +def: InstRW<[BWWriteResGroup3], (instregex "PMOVZXWQrr")>; +def: InstRW<[BWWriteResGroup3], (instregex "PSHUFBrr")>; +def: InstRW<[BWWriteResGroup3], (instregex "PSHUFDri")>; +def: InstRW<[BWWriteResGroup3], (instregex "PSHUFHWri")>; +def: InstRW<[BWWriteResGroup3], (instregex "PSHUFLWri")>; +def: InstRW<[BWWriteResGroup3], (instregex "PSLLDQri")>; +def: InstRW<[BWWriteResGroup3], (instregex "PSRLDQri")>; +def: InstRW<[BWWriteResGroup3], (instregex "PUNPCKHBWrr")>; +def: InstRW<[BWWriteResGroup3], (instregex "PUNPCKHDQrr")>; +def: InstRW<[BWWriteResGroup3], (instregex "PUNPCKHQDQrr")>; +def: InstRW<[BWWriteResGroup3], (instregex "PUNPCKHWDrr")>; +def: InstRW<[BWWriteResGroup3], (instregex "PUNPCKLBWrr")>; +def: InstRW<[BWWriteResGroup3], (instregex "PUNPCKLDQrr")>; +def: InstRW<[BWWriteResGroup3], (instregex "PUNPCKLQDQrr")>; +def: InstRW<[BWWriteResGroup3], (instregex "PUNPCKLWDrr")>; +def: InstRW<[BWWriteResGroup3], (instregex "SHUFPDrri")>; +def: InstRW<[BWWriteResGroup3], (instregex "SHUFPSrri")>; +def: InstRW<[BWWriteResGroup3], (instregex "UNPCKHPDrr")>; +def: InstRW<[BWWriteResGroup3], (instregex "UNPCKHPSrr")>; +def: InstRW<[BWWriteResGroup3], (instregex "UNPCKLPDrr")>; +def: InstRW<[BWWriteResGroup3], (instregex "UNPCKLPSrr")>; +def: InstRW<[BWWriteResGroup3], (instregex "VANDNPDYrr")>; +def: InstRW<[BWWriteResGroup3], (instregex "VANDNPDrr")>; +def: InstRW<[BWWriteResGroup3], (instregex "VANDNPSYrr")>; +def: InstRW<[BWWriteResGroup3], (instregex "VANDNPSrr")>; +def: InstRW<[BWWriteResGroup3], (instregex "VANDPDYrr")>; +def: InstRW<[BWWriteResGroup3], (instregex "VANDPDrr")>; +def: InstRW<[BWWriteResGroup3], (instregex "VANDPSYrr")>; +def: InstRW<[BWWriteResGroup3], (instregex "VANDPSrr")>; +def: InstRW<[BWWriteResGroup3], (instregex "VBROADCASTSSrr")>; +def: InstRW<[BWWriteResGroup3], (instregex "VINSERTPSrr")>; +def: InstRW<[BWWriteResGroup3], (instregex "VMOV64toPQIrr")>; +def: InstRW<[BWWriteResGroup3], (instregex "VMOVAPDYrr(_REV?)")>; +def: InstRW<[BWWriteResGroup3], (instregex "VMOVAPDrr(_REV?)")>; +def: InstRW<[BWWriteResGroup3], (instregex "VMOVAPSYrr(_REV?)")>; +def: InstRW<[BWWriteResGroup3], (instregex "VMOVAPSrr(_REV?)")>; +def: InstRW<[BWWriteResGroup3], (instregex "VMOVDDUPYrr")>; +def: InstRW<[BWWriteResGroup3], (instregex "VMOVDDUPrr")>; +def: InstRW<[BWWriteResGroup3], (instregex "VMOVDI2PDIrr")>; +def: InstRW<[BWWriteResGroup3], (instregex "VMOVHLPSrr")>; +def: InstRW<[BWWriteResGroup3], (instregex "VMOVLHPSrr")>; +def: InstRW<[BWWriteResGroup3], (instregex "VMOVSDrr(_REV?)")>; +def: InstRW<[BWWriteResGroup3], (instregex "VMOVSHDUPYrr")>; +def: InstRW<[BWWriteResGroup3], (instregex "VMOVSHDUPrr")>; +def: InstRW<[BWWriteResGroup3], (instregex "VMOVSLDUPYrr")>; +def: InstRW<[BWWriteResGroup3], (instregex "VMOVSLDUPrr")>; +def: InstRW<[BWWriteResGroup3], (instregex "VMOVSSrr(_REV?)")>; +def: InstRW<[BWWriteResGroup3], (instregex "VMOVUPDYrr(_REV?)")>; +def: InstRW<[BWWriteResGroup3], (instregex "VMOVUPDrr(_REV?)")>; +def: InstRW<[BWWriteResGroup3], (instregex "VMOVUPSYrr(_REV?)")>; +def: InstRW<[BWWriteResGroup3], (instregex "VMOVUPSrr(_REV?)")>; +def: InstRW<[BWWriteResGroup3], (instregex "VORPDYrr")>; +def: InstRW<[BWWriteResGroup3], (instregex "VORPDrr")>; +def: InstRW<[BWWriteResGroup3], (instregex "VORPSYrr")>; +def: InstRW<[BWWriteResGroup3], (instregex "VORPSrr")>; +def: InstRW<[BWWriteResGroup3], (instregex "VPACKSSDWYrr")>; +def: InstRW<[BWWriteResGroup3], (instregex "VPACKSSDWrr")>; +def: InstRW<[BWWriteResGroup3], (instregex "VPACKSSWBYrr")>; +def: InstRW<[BWWriteResGroup3], (instregex "VPACKSSWBrr")>; +def: InstRW<[BWWriteResGroup3], (instregex "VPACKUSDWYrr")>; +def: InstRW<[BWWriteResGroup3], (instregex "VPACKUSDWrr")>; +def: InstRW<[BWWriteResGroup3], (instregex "VPACKUSWBYrr")>; +def: InstRW<[BWWriteResGroup3], (instregex "VPACKUSWBrr")>; +def: InstRW<[BWWriteResGroup3], (instregex "VPALIGNRYrri")>; +def: InstRW<[BWWriteResGroup3], (instregex "VPALIGNRrri")>; +def: InstRW<[BWWriteResGroup3], (instregex "VPBLENDWYrri")>; +def: InstRW<[BWWriteResGroup3], (instregex "VPBLENDWrri")>; +def: InstRW<[BWWriteResGroup3], (instregex "VPBROADCASTDrr")>; +def: InstRW<[BWWriteResGroup3], (instregex "VPBROADCASTQrr")>; +def: InstRW<[BWWriteResGroup3], (instregex "VPERMILPDYri")>; +def: InstRW<[BWWriteResGroup3], (instregex "VPERMILPDYrr")>; +def: InstRW<[BWWriteResGroup3], (instregex "VPERMILPDri")>; +def: InstRW<[BWWriteResGroup3], (instregex "VPERMILPDrr")>; +def: InstRW<[BWWriteResGroup3], (instregex "VPERMILPSYri")>; +def: InstRW<[BWWriteResGroup3], (instregex "VPERMILPSYrr")>; +def: InstRW<[BWWriteResGroup3], (instregex "VPERMILPSri")>; +def: InstRW<[BWWriteResGroup3], (instregex "VPERMILPSrr")>; +def: InstRW<[BWWriteResGroup3], (instregex "VPMOVSXBDrr")>; +def: InstRW<[BWWriteResGroup3], (instregex "VPMOVSXBQrr")>; +def: InstRW<[BWWriteResGroup3], (instregex "VPMOVSXBWrr")>; +def: InstRW<[BWWriteResGroup3], (instregex "VPMOVSXDQrr")>; +def: InstRW<[BWWriteResGroup3], (instregex "VPMOVSXWDrr")>; +def: InstRW<[BWWriteResGroup3], (instregex "VPMOVSXWQrr")>; +def: InstRW<[BWWriteResGroup3], (instregex "VPMOVZXBDrr")>; +def: InstRW<[BWWriteResGroup3], (instregex "VPMOVZXBQrr")>; +def: InstRW<[BWWriteResGroup3], (instregex "VPMOVZXBWrr")>; +def: InstRW<[BWWriteResGroup3], (instregex "VPMOVZXDQrr")>; +def: InstRW<[BWWriteResGroup3], (instregex "VPMOVZXWDrr")>; +def: InstRW<[BWWriteResGroup3], (instregex "VPMOVZXWQrr")>; +def: InstRW<[BWWriteResGroup3], (instregex "VPSHUFBYrr")>; +def: InstRW<[BWWriteResGroup3], (instregex "VPSHUFBrr")>; +def: InstRW<[BWWriteResGroup3], (instregex "VPSHUFDYri")>; +def: InstRW<[BWWriteResGroup3], (instregex "VPSHUFDri")>; +def: InstRW<[BWWriteResGroup3], (instregex "VPSHUFHWYri")>; +def: InstRW<[BWWriteResGroup3], (instregex "VPSHUFHWri")>; +def: InstRW<[BWWriteResGroup3], (instregex "VPSHUFLWYri")>; +def: InstRW<[BWWriteResGroup3], (instregex "VPSHUFLWri")>; +def: InstRW<[BWWriteResGroup3], (instregex "VPSLLDQYri")>; +def: InstRW<[BWWriteResGroup3], (instregex "VPSLLDQri")>; +def: InstRW<[BWWriteResGroup3], (instregex "VPSRLDQYri")>; +def: InstRW<[BWWriteResGroup3], (instregex "VPSRLDQri")>; +def: InstRW<[BWWriteResGroup3], (instregex "VPUNPCKHBWYrr")>; +def: InstRW<[BWWriteResGroup3], (instregex "VPUNPCKHBWrr")>; +def: InstRW<[BWWriteResGroup3], (instregex "VPUNPCKHDQYrr")>; +def: InstRW<[BWWriteResGroup3], (instregex "VPUNPCKHDQrr")>; +def: InstRW<[BWWriteResGroup3], (instregex "VPUNPCKHQDQYrr")>; +def: InstRW<[BWWriteResGroup3], (instregex "VPUNPCKHQDQrr")>; +def: InstRW<[BWWriteResGroup3], (instregex "VPUNPCKHWDYrr")>; +def: InstRW<[BWWriteResGroup3], (instregex "VPUNPCKHWDrr")>; +def: InstRW<[BWWriteResGroup3], (instregex "VPUNPCKLBWYrr")>; +def: InstRW<[BWWriteResGroup3], (instregex "VPUNPCKLBWrr")>; +def: InstRW<[BWWriteResGroup3], (instregex "VPUNPCKLDQYrr")>; +def: InstRW<[BWWriteResGroup3], (instregex "VPUNPCKLDQrr")>; +def: InstRW<[BWWriteResGroup3], (instregex "VPUNPCKLQDQYrr")>; +def: InstRW<[BWWriteResGroup3], (instregex "VPUNPCKLQDQrr")>; +def: InstRW<[BWWriteResGroup3], (instregex "VPUNPCKLWDYrr")>; +def: InstRW<[BWWriteResGroup3], (instregex "VPUNPCKLWDrr")>; +def: InstRW<[BWWriteResGroup3], (instregex "VSHUFPDYrri")>; +def: InstRW<[BWWriteResGroup3], (instregex "VSHUFPDrri")>; +def: InstRW<[BWWriteResGroup3], (instregex "VSHUFPSYrri")>; +def: InstRW<[BWWriteResGroup3], (instregex "VSHUFPSrri")>; +def: InstRW<[BWWriteResGroup3], (instregex "VUNPCKHPDYrr")>; +def: InstRW<[BWWriteResGroup3], (instregex "VUNPCKHPDrr")>; +def: InstRW<[BWWriteResGroup3], (instregex "VUNPCKHPSYrr")>; +def: InstRW<[BWWriteResGroup3], (instregex "VUNPCKHPSrr")>; +def: InstRW<[BWWriteResGroup3], (instregex "VUNPCKLPDYrr")>; +def: InstRW<[BWWriteResGroup3], (instregex "VUNPCKLPDrr")>; +def: InstRW<[BWWriteResGroup3], (instregex "VUNPCKLPSYrr")>; +def: InstRW<[BWWriteResGroup3], (instregex "VUNPCKLPSrr")>; +def: InstRW<[BWWriteResGroup3], (instregex "VXORPDYrr")>; +def: InstRW<[BWWriteResGroup3], (instregex "VXORPDrr")>; +def: InstRW<[BWWriteResGroup3], (instregex "VXORPSYrr")>; +def: InstRW<[BWWriteResGroup3], (instregex "VXORPSrr")>; +def: InstRW<[BWWriteResGroup3], (instregex "XORPDrr")>; +def: InstRW<[BWWriteResGroup3], (instregex "XORPSrr")>; + +def BWWriteResGroup4 : SchedWriteRes<[BWPort6]> { + let Latency = 1; + let NumMicroOps = 1; + let ResourceCycles = [1]; +} +def: InstRW<[BWWriteResGroup4], (instregex "JMP(16|32|64)r")>; + +def BWWriteResGroup5 : SchedWriteRes<[BWPort01]> { + let Latency = 1; + let NumMicroOps = 1; + let ResourceCycles = [1]; +} +def: InstRW<[BWWriteResGroup5], (instregex "FINCSTP")>; +def: InstRW<[BWWriteResGroup5], (instregex "FNOP")>; + +def BWWriteResGroup6 : SchedWriteRes<[BWPort06]> { + let Latency = 1; + let NumMicroOps = 1; + let ResourceCycles = [1]; +} +def: InstRW<[BWWriteResGroup6], (instregex "ADC(16|32|64)ri8")>; +def: InstRW<[BWWriteResGroup6], (instregex "ADC(16|32|64)rr(_REV?)")>; +def: InstRW<[BWWriteResGroup6], (instregex "ADC8rr(_REV?)")>; +def: InstRW<[BWWriteResGroup6], (instregex "ADCX32rr")>; +def: InstRW<[BWWriteResGroup6], (instregex "ADCX64rr")>; +def: InstRW<[BWWriteResGroup6], (instregex "ADOX32rr")>; +def: InstRW<[BWWriteResGroup6], (instregex "ADOX64rr")>; +def: InstRW<[BWWriteResGroup6], (instregex "BT(16|32|64)ri8")>; +def: InstRW<[BWWriteResGroup6], (instregex "BT(16|32|64)rr")>; +def: InstRW<[BWWriteResGroup6], (instregex "BTC(16|32|64)ri8")>; +def: InstRW<[BWWriteResGroup6], (instregex "BTC(16|32|64)rr")>; +def: InstRW<[BWWriteResGroup6], (instregex "BTR(16|32|64)ri8")>; +def: InstRW<[BWWriteResGroup6], (instregex "BTR(16|32|64)rr")>; +def: InstRW<[BWWriteResGroup6], (instregex "BTS(16|32|64)ri8")>; +def: InstRW<[BWWriteResGroup6], (instregex "BTS(16|32|64)rr")>; +def: InstRW<[BWWriteResGroup6], (instregex "CDQ")>; +def: InstRW<[BWWriteResGroup6], (instregex "CMOVAE(16|32|64)rr")>; +def: InstRW<[BWWriteResGroup6], (instregex "CMOVB(16|32|64)rr")>; +def: InstRW<[BWWriteResGroup6], (instregex "CMOVE(16|32|64)rr")>; +def: InstRW<[BWWriteResGroup6], (instregex "CMOVG(16|32|64)rr")>; +def: InstRW<[BWWriteResGroup6], (instregex "CMOVGE(16|32|64)rr")>; +def: InstRW<[BWWriteResGroup6], (instregex "CMOVL(16|32|64)rr")>; +def: InstRW<[BWWriteResGroup6], (instregex "CMOVLE(16|32|64)rr")>; +def: InstRW<[BWWriteResGroup6], (instregex "CMOVNE(16|32|64)rr")>; +def: InstRW<[BWWriteResGroup6], (instregex "CMOVNO(16|32|64)rr")>; +def: InstRW<[BWWriteResGroup6], (instregex "CMOVNP(16|32|64)rr")>; +def: InstRW<[BWWriteResGroup6], (instregex "CMOVNS(16|32|64)rr")>; +def: InstRW<[BWWriteResGroup6], (instregex "CMOVO(16|32|64)rr")>; +def: InstRW<[BWWriteResGroup6], (instregex "CMOVP(16|32|64)rr")>; +def: InstRW<[BWWriteResGroup6], (instregex "CMOVS(16|32|64)rr")>; +def: InstRW<[BWWriteResGroup6], (instregex "CQO")>; +def: InstRW<[BWWriteResGroup6], (instregex "JAE_1")>; +def: InstRW<[BWWriteResGroup6], (instregex "JAE_4")>; +def: InstRW<[BWWriteResGroup6], (instregex "JA_1")>; +def: InstRW<[BWWriteResGroup6], (instregex "JA_4")>; +def: InstRW<[BWWriteResGroup6], (instregex "JBE_1")>; +def: InstRW<[BWWriteResGroup6], (instregex "JBE_4")>; +def: InstRW<[BWWriteResGroup6], (instregex "JB_1")>; +def: InstRW<[BWWriteResGroup6], (instregex "JB_4")>; +def: InstRW<[BWWriteResGroup6], (instregex "JE_1")>; +def: InstRW<[BWWriteResGroup6], (instregex "JE_4")>; +def: InstRW<[BWWriteResGroup6], (instregex "JGE_1")>; +def: InstRW<[BWWriteResGroup6], (instregex "JGE_4")>; +def: InstRW<[BWWriteResGroup6], (instregex "JG_1")>; +def: InstRW<[BWWriteResGroup6], (instregex "JG_4")>; +def: InstRW<[BWWriteResGroup6], (instregex "JLE_1")>; +def: InstRW<[BWWriteResGroup6], (instregex "JLE_4")>; +def: InstRW<[BWWriteResGroup6], (instregex "JL_1")>; +def: InstRW<[BWWriteResGroup6], (instregex "JL_4")>; +def: InstRW<[BWWriteResGroup6], (instregex "JMP_1")>; +def: InstRW<[BWWriteResGroup6], (instregex "JMP_4")>; +def: InstRW<[BWWriteResGroup6], (instregex "JNE_1")>; +def: InstRW<[BWWriteResGroup6], (instregex "JNE_4")>; +def: InstRW<[BWWriteResGroup6], (instregex "JNO_1")>; +def: InstRW<[BWWriteResGroup6], (instregex "JNO_4")>; +def: InstRW<[BWWriteResGroup6], (instregex "JNP_1")>; +def: InstRW<[BWWriteResGroup6], (instregex "JNP_4")>; +def: InstRW<[BWWriteResGroup6], (instregex "JNS_1")>; +def: InstRW<[BWWriteResGroup6], (instregex "JNS_4")>; +def: InstRW<[BWWriteResGroup6], (instregex "JO_1")>; +def: InstRW<[BWWriteResGroup6], (instregex "JO_4")>; +def: InstRW<[BWWriteResGroup6], (instregex "JP_1")>; +def: InstRW<[BWWriteResGroup6], (instregex "JP_4")>; +def: InstRW<[BWWriteResGroup6], (instregex "JS_1")>; +def: InstRW<[BWWriteResGroup6], (instregex "JS_4")>; +def: InstRW<[BWWriteResGroup6], (instregex "RORX32ri")>; +def: InstRW<[BWWriteResGroup6], (instregex "RORX64ri")>; +def: InstRW<[BWWriteResGroup6], (instregex "SAR(16|32|64)r1")>; +def: InstRW<[BWWriteResGroup6], (instregex "SAR(16|32|64)ri")>; +def: InstRW<[BWWriteResGroup6], (instregex "SAR8r1")>; +def: InstRW<[BWWriteResGroup6], (instregex "SAR8ri")>; +def: InstRW<[BWWriteResGroup6], (instregex "SARX32rr")>; +def: InstRW<[BWWriteResGroup6], (instregex "SARX64rr")>; +def: InstRW<[BWWriteResGroup6], (instregex "SBB(16|32|64)ri8")>; +def: InstRW<[BWWriteResGroup6], (instregex "SBB(16|32|64)rr(_REV?)")>; +def: InstRW<[BWWriteResGroup6], (instregex "SBB8rr(_REV?)")>; +def: InstRW<[BWWriteResGroup6], (instregex "SETAEr")>; +def: InstRW<[BWWriteResGroup6], (instregex "SETBr")>; +def: InstRW<[BWWriteResGroup6], (instregex "SETEr")>; +def: InstRW<[BWWriteResGroup6], (instregex "SETGEr")>; +def: InstRW<[BWWriteResGroup6], (instregex "SETGr")>; +def: InstRW<[BWWriteResGroup6], (instregex "SETLEr")>; +def: InstRW<[BWWriteResGroup6], (instregex "SETLr")>; +def: InstRW<[BWWriteResGroup6], (instregex "SETNEr")>; +def: InstRW<[BWWriteResGroup6], (instregex "SETNOr")>; +def: InstRW<[BWWriteResGroup6], (instregex "SETNPr")>; +def: InstRW<[BWWriteResGroup6], (instregex "SETNSr")>; +def: InstRW<[BWWriteResGroup6], (instregex "SETOr")>; +def: InstRW<[BWWriteResGroup6], (instregex "SETPr")>; +def: InstRW<[BWWriteResGroup6], (instregex "SETSr")>; +def: InstRW<[BWWriteResGroup6], (instregex "SHL(16|32|64)r1")>; +def: InstRW<[BWWriteResGroup6], (instregex "SHL(16|32|64)ri")>; +def: InstRW<[BWWriteResGroup6], (instregex "SHL8r1")>; +def: InstRW<[BWWriteResGroup6], (instregex "SHL8ri")>; +def: InstRW<[BWWriteResGroup6], (instregex "SHLX32rr")>; +def: InstRW<[BWWriteResGroup6], (instregex "SHLX64rr")>; +def: InstRW<[BWWriteResGroup6], (instregex "SHR(16|32|64)r1")>; +def: InstRW<[BWWriteResGroup6], (instregex "SHR(16|32|64)ri")>; +def: InstRW<[BWWriteResGroup6], (instregex "SHR8r1")>; +def: InstRW<[BWWriteResGroup6], (instregex "SHR8ri")>; +def: InstRW<[BWWriteResGroup6], (instregex "SHRX32rr")>; +def: InstRW<[BWWriteResGroup6], (instregex "SHRX64rr")>; + +def BWWriteResGroup7 : SchedWriteRes<[BWPort15]> { + let Latency = 1; + let NumMicroOps = 1; + let ResourceCycles = [1]; +} +def: InstRW<[BWWriteResGroup7], (instregex "ANDN32rr")>; +def: InstRW<[BWWriteResGroup7], (instregex "ANDN64rr")>; +def: InstRW<[BWWriteResGroup7], (instregex "BLSI32rr")>; +def: InstRW<[BWWriteResGroup7], (instregex "BLSI64rr")>; +def: InstRW<[BWWriteResGroup7], (instregex "BLSMSK32rr")>; +def: InstRW<[BWWriteResGroup7], (instregex "BLSMSK64rr")>; +def: InstRW<[BWWriteResGroup7], (instregex "BLSR32rr")>; +def: InstRW<[BWWriteResGroup7], (instregex "BLSR64rr")>; +def: InstRW<[BWWriteResGroup7], (instregex "BZHI32rr")>; +def: InstRW<[BWWriteResGroup7], (instregex "BZHI64rr")>; +def: InstRW<[BWWriteResGroup7], (instregex "LEA(16|32|64)r")>; +def: InstRW<[BWWriteResGroup7], (instregex "MMX_PABSBrr64")>; +def: InstRW<[BWWriteResGroup7], (instregex "MMX_PABSDrr64")>; +def: InstRW<[BWWriteResGroup7], (instregex "MMX_PABSWrr64")>; +def: InstRW<[BWWriteResGroup7], (instregex "MMX_PADDBirr")>; +def: InstRW<[BWWriteResGroup7], (instregex "MMX_PADDDirr")>; +def: InstRW<[BWWriteResGroup7], (instregex "MMX_PADDQirr")>; +def: InstRW<[BWWriteResGroup7], (instregex "MMX_PADDSBirr")>; +def: InstRW<[BWWriteResGroup7], (instregex "MMX_PADDSWirr")>; +def: InstRW<[BWWriteResGroup7], (instregex "MMX_PADDUSBirr")>; +def: InstRW<[BWWriteResGroup7], (instregex "MMX_PADDUSWirr")>; +def: InstRW<[BWWriteResGroup7], (instregex "MMX_PADDWirr")>; +def: InstRW<[BWWriteResGroup7], (instregex "MMX_PAVGBirr")>; +def: InstRW<[BWWriteResGroup7], (instregex "MMX_PAVGWirr")>; +def: InstRW<[BWWriteResGroup7], (instregex "MMX_PCMPEQBirr")>; +def: InstRW<[BWWriteResGroup7], (instregex "MMX_PCMPEQDirr")>; +def: InstRW<[BWWriteResGroup7], (instregex "MMX_PCMPEQWirr")>; +def: InstRW<[BWWriteResGroup7], (instregex "MMX_PCMPGTBirr")>; +def: InstRW<[BWWriteResGroup7], (instregex "MMX_PCMPGTDirr")>; +def: InstRW<[BWWriteResGroup7], (instregex "MMX_PCMPGTWirr")>; +def: InstRW<[BWWriteResGroup7], (instregex "MMX_PMAXSWirr")>; +def: InstRW<[BWWriteResGroup7], (instregex "MMX_PMAXUBirr")>; +def: InstRW<[BWWriteResGroup7], (instregex "MMX_PMINSWirr")>; +def: InstRW<[BWWriteResGroup7], (instregex "MMX_PMINUBirr")>; +def: InstRW<[BWWriteResGroup7], (instregex "MMX_PSIGNBrr64")>; +def: InstRW<[BWWriteResGroup7], (instregex "MMX_PSIGNDrr64")>; +def: InstRW<[BWWriteResGroup7], (instregex "MMX_PSIGNWrr64")>; +def: InstRW<[BWWriteResGroup7], (instregex "MMX_PSUBBirr")>; +def: InstRW<[BWWriteResGroup7], (instregex "MMX_PSUBDirr")>; +def: InstRW<[BWWriteResGroup7], (instregex "MMX_PSUBQirr")>; +def: InstRW<[BWWriteResGroup7], (instregex "MMX_PSUBSBirr")>; +def: InstRW<[BWWriteResGroup7], (instregex "MMX_PSUBSWirr")>; +def: InstRW<[BWWriteResGroup7], (instregex "MMX_PSUBUSBirr")>; +def: InstRW<[BWWriteResGroup7], (instregex "MMX_PSUBUSWirr")>; +def: InstRW<[BWWriteResGroup7], (instregex "MMX_PSUBWirr")>; +def: InstRW<[BWWriteResGroup7], (instregex "PABSBrr")>; +def: InstRW<[BWWriteResGroup7], (instregex "PABSDrr")>; +def: InstRW<[BWWriteResGroup7], (instregex "PABSWrr")>; +def: InstRW<[BWWriteResGroup7], (instregex "PADDBrr")>; +def: InstRW<[BWWriteResGroup7], (instregex "PADDDrr")>; +def: InstRW<[BWWriteResGroup7], (instregex "PADDQrr")>; +def: InstRW<[BWWriteResGroup7], (instregex "PADDSBrr")>; +def: InstRW<[BWWriteResGroup7], (instregex "PADDSWrr")>; +def: InstRW<[BWWriteResGroup7], (instregex "PADDUSBrr")>; +def: InstRW<[BWWriteResGroup7], (instregex "PADDUSWrr")>; +def: InstRW<[BWWriteResGroup7], (instregex "PADDWrr")>; +def: InstRW<[BWWriteResGroup7], (instregex "PAVGBrr")>; +def: InstRW<[BWWriteResGroup7], (instregex "PAVGWrr")>; +def: InstRW<[BWWriteResGroup7], (instregex "PCMPEQBrr")>; +def: InstRW<[BWWriteResGroup7], (instregex "PCMPEQDrr")>; +def: InstRW<[BWWriteResGroup7], (instregex "PCMPEQQrr")>; +def: InstRW<[BWWriteResGroup7], (instregex "PCMPEQWrr")>; +def: InstRW<[BWWriteResGroup7], (instregex "PCMPGTBrr")>; +def: InstRW<[BWWriteResGroup7], (instregex "PCMPGTDrr")>; +def: InstRW<[BWWriteResGroup7], (instregex "PCMPGTWrr")>; +def: InstRW<[BWWriteResGroup7], (instregex "PMAXSBrr")>; +def: InstRW<[BWWriteResGroup7], (instregex "PMAXSDrr")>; +def: InstRW<[BWWriteResGroup7], (instregex "PMAXSWrr")>; +def: InstRW<[BWWriteResGroup7], (instregex "PMAXUBrr")>; +def: InstRW<[BWWriteResGroup7], (instregex "PMAXUDrr")>; +def: InstRW<[BWWriteResGroup7], (instregex "PMAXUWrr")>; +def: InstRW<[BWWriteResGroup7], (instregex "PMINSBrr")>; +def: InstRW<[BWWriteResGroup7], (instregex "PMINSDrr")>; +def: InstRW<[BWWriteResGroup7], (instregex "PMINSWrr")>; +def: InstRW<[BWWriteResGroup7], (instregex "PMINUBrr")>; +def: InstRW<[BWWriteResGroup7], (instregex "PMINUDrr")>; +def: InstRW<[BWWriteResGroup7], (instregex "PMINUWrr")>; +def: InstRW<[BWWriteResGroup7], (instregex "PSIGNBrr128")>; +def: InstRW<[BWWriteResGroup7], (instregex "PSIGNDrr128")>; +def: InstRW<[BWWriteResGroup7], (instregex "PSIGNWrr128")>; +def: InstRW<[BWWriteResGroup7], (instregex "PSUBBrr")>; +def: InstRW<[BWWriteResGroup7], (instregex "PSUBDrr")>; +def: InstRW<[BWWriteResGroup7], (instregex "PSUBQrr")>; +def: InstRW<[BWWriteResGroup7], (instregex "PSUBSBrr")>; +def: InstRW<[BWWriteResGroup7], (instregex "PSUBSWrr")>; +def: InstRW<[BWWriteResGroup7], (instregex "PSUBUSBrr")>; +def: InstRW<[BWWriteResGroup7], (instregex "PSUBUSWrr")>; +def: InstRW<[BWWriteResGroup7], (instregex "PSUBWrr")>; +def: InstRW<[BWWriteResGroup7], (instregex "VPABSBYrr")>; +def: InstRW<[BWWriteResGroup7], (instregex "VPABSBrr")>; +def: InstRW<[BWWriteResGroup7], (instregex "VPABSDYrr")>; +def: InstRW<[BWWriteResGroup7], (instregex "VPABSDrr")>; +def: InstRW<[BWWriteResGroup7], (instregex "VPABSWYrr")>; +def: InstRW<[BWWriteResGroup7], (instregex "VPABSWrr")>; +def: InstRW<[BWWriteResGroup7], (instregex "VPADDBYrr")>; +def: InstRW<[BWWriteResGroup7], (instregex "VPADDBrr")>; +def: InstRW<[BWWriteResGroup7], (instregex "VPADDDYrr")>; +def: InstRW<[BWWriteResGroup7], (instregex "VPADDDrr")>; +def: InstRW<[BWWriteResGroup7], (instregex "VPADDQYrr")>; +def: InstRW<[BWWriteResGroup7], (instregex "VPADDQrr")>; +def: InstRW<[BWWriteResGroup7], (instregex "VPADDSBYrr")>; +def: InstRW<[BWWriteResGroup7], (instregex "VPADDSBrr")>; +def: InstRW<[BWWriteResGroup7], (instregex "VPADDSWYrr")>; +def: InstRW<[BWWriteResGroup7], (instregex "VPADDSWrr")>; +def: InstRW<[BWWriteResGroup7], (instregex "VPADDUSBYrr")>; +def: InstRW<[BWWriteResGroup7], (instregex "VPADDUSBrr")>; +def: InstRW<[BWWriteResGroup7], (instregex "VPADDUSWYrr")>; +def: InstRW<[BWWriteResGroup7], (instregex "VPADDUSWrr")>; +def: InstRW<[BWWriteResGroup7], (instregex "VPADDWYrr")>; +def: InstRW<[BWWriteResGroup7], (instregex "VPADDWrr")>; +def: InstRW<[BWWriteResGroup7], (instregex "VPAVGBYrr")>; +def: InstRW<[BWWriteResGroup7], (instregex "VPAVGBrr")>; +def: InstRW<[BWWriteResGroup7], (instregex "VPAVGWYrr")>; +def: InstRW<[BWWriteResGroup7], (instregex "VPAVGWrr")>; +def: InstRW<[BWWriteResGroup7], (instregex "VPCMPEQBYrr")>; +def: InstRW<[BWWriteResGroup7], (instregex "VPCMPEQBrr")>; +def: InstRW<[BWWriteResGroup7], (instregex "VPCMPEQDYrr")>; +def: InstRW<[BWWriteResGroup7], (instregex "VPCMPEQDrr")>; +def: InstRW<[BWWriteResGroup7], (instregex "VPCMPEQQYrr")>; +def: InstRW<[BWWriteResGroup7], (instregex "VPCMPEQQrr")>; +def: InstRW<[BWWriteResGroup7], (instregex "VPCMPEQWYrr")>; +def: InstRW<[BWWriteResGroup7], (instregex "VPCMPEQWrr")>; +def: InstRW<[BWWriteResGroup7], (instregex "VPCMPGTBYrr")>; +def: InstRW<[BWWriteResGroup7], (instregex "VPCMPGTBrr")>; +def: InstRW<[BWWriteResGroup7], (instregex "VPCMPGTDYrr")>; +def: InstRW<[BWWriteResGroup7], (instregex "VPCMPGTDrr")>; +def: InstRW<[BWWriteResGroup7], (instregex "VPCMPGTWYrr")>; +def: InstRW<[BWWriteResGroup7], (instregex "VPCMPGTWrr")>; +def: InstRW<[BWWriteResGroup7], (instregex "VPMAXSBYrr")>; +def: InstRW<[BWWriteResGroup7], (instregex "VPMAXSBrr")>; +def: InstRW<[BWWriteResGroup7], (instregex "VPMAXSDYrr")>; +def: InstRW<[BWWriteResGroup7], (instregex "VPMAXSDrr")>; +def: InstRW<[BWWriteResGroup7], (instregex "VPMAXSWYrr")>; +def: InstRW<[BWWriteResGroup7], (instregex "VPMAXSWrr")>; +def: InstRW<[BWWriteResGroup7], (instregex "VPMAXUBYrr")>; +def: InstRW<[BWWriteResGroup7], (instregex "VPMAXUBrr")>; +def: InstRW<[BWWriteResGroup7], (instregex "VPMAXUDYrr")>; +def: InstRW<[BWWriteResGroup7], (instregex "VPMAXUDrr")>; +def: InstRW<[BWWriteResGroup7], (instregex "VPMAXUWYrr")>; +def: InstRW<[BWWriteResGroup7], (instregex "VPMAXUWrr")>; +def: InstRW<[BWWriteResGroup7], (instregex "VPMINSBYrr")>; +def: InstRW<[BWWriteResGroup7], (instregex "VPMINSBrr")>; +def: InstRW<[BWWriteResGroup7], (instregex "VPMINSDYrr")>; +def: InstRW<[BWWriteResGroup7], (instregex "VPMINSDrr")>; +def: InstRW<[BWWriteResGroup7], (instregex "VPMINSWYrr")>; +def: InstRW<[BWWriteResGroup7], (instregex "VPMINSWrr")>; +def: InstRW<[BWWriteResGroup7], (instregex "VPMINUBYrr")>; +def: InstRW<[BWWriteResGroup7], (instregex "VPMINUBrr")>; +def: InstRW<[BWWriteResGroup7], (instregex "VPMINUDYrr")>; +def: InstRW<[BWWriteResGroup7], (instregex "VPMINUDrr")>; +def: InstRW<[BWWriteResGroup7], (instregex "VPMINUWYrr")>; +def: InstRW<[BWWriteResGroup7], (instregex "VPMINUWrr")>; +def: InstRW<[BWWriteResGroup7], (instregex "VPSIGNBYrr256")>; +def: InstRW<[BWWriteResGroup7], (instregex "VPSIGNBrr128")>; +def: InstRW<[BWWriteResGroup7], (instregex "VPSIGNDYrr256")>; +def: InstRW<[BWWriteResGroup7], (instregex "VPSIGNDrr128")>; +def: InstRW<[BWWriteResGroup7], (instregex "VPSIGNWYrr256")>; +def: InstRW<[BWWriteResGroup7], (instregex "VPSIGNWrr128")>; +def: InstRW<[BWWriteResGroup7], (instregex "VPSUBBYrr")>; +def: InstRW<[BWWriteResGroup7], (instregex "VPSUBBrr")>; +def: InstRW<[BWWriteResGroup7], (instregex "VPSUBDYrr")>; +def: InstRW<[BWWriteResGroup7], (instregex "VPSUBDrr")>; +def: InstRW<[BWWriteResGroup7], (instregex "VPSUBQYrr")>; +def: InstRW<[BWWriteResGroup7], (instregex "VPSUBQrr")>; +def: InstRW<[BWWriteResGroup7], (instregex "VPSUBSBYrr")>; +def: InstRW<[BWWriteResGroup7], (instregex "VPSUBSBrr")>; +def: InstRW<[BWWriteResGroup7], (instregex "VPSUBSWYrr")>; +def: InstRW<[BWWriteResGroup7], (instregex "VPSUBSWrr")>; +def: InstRW<[BWWriteResGroup7], (instregex "VPSUBUSBYrr")>; +def: InstRW<[BWWriteResGroup7], (instregex "VPSUBUSBrr")>; +def: InstRW<[BWWriteResGroup7], (instregex "VPSUBUSWYrr")>; +def: InstRW<[BWWriteResGroup7], (instregex "VPSUBUSWrr")>; +def: InstRW<[BWWriteResGroup7], (instregex "VPSUBWYrr")>; +def: InstRW<[BWWriteResGroup7], (instregex "VPSUBWrr")>; + +def BWWriteResGroup8 : SchedWriteRes<[BWPort015]> { + let Latency = 1; + let NumMicroOps = 1; + let ResourceCycles = [1]; +} +def: InstRW<[BWWriteResGroup8], (instregex "BLENDPDrri")>; +def: InstRW<[BWWriteResGroup8], (instregex "BLENDPSrri")>; +def: InstRW<[BWWriteResGroup8], (instregex "MMX_MOVD64from64rr")>; +def: InstRW<[BWWriteResGroup8], (instregex "MMX_MOVQ64rr(_REV?)")>; +def: InstRW<[BWWriteResGroup8], (instregex "MMX_PANDNirr")>; +def: InstRW<[BWWriteResGroup8], (instregex "MMX_PANDirr")>; +def: InstRW<[BWWriteResGroup8], (instregex "MMX_PORirr")>; +def: InstRW<[BWWriteResGroup8], (instregex "MMX_PXORirr")>; +def: InstRW<[BWWriteResGroup8], (instregex "MOVDQArr(_REV?)")>; +def: InstRW<[BWWriteResGroup8], (instregex "MOVDQUrr(_REV?)")>; +def: InstRW<[BWWriteResGroup8], (instregex "MOVPQI2QIrr")>; +def: InstRW<[BWWriteResGroup8], (instregex "PANDNrr")>; +def: InstRW<[BWWriteResGroup8], (instregex "PANDrr")>; +def: InstRW<[BWWriteResGroup8], (instregex "PORrr")>; +def: InstRW<[BWWriteResGroup8], (instregex "PXORrr")>; +def: InstRW<[BWWriteResGroup8], (instregex "VBLENDPDYrri")>; +def: InstRW<[BWWriteResGroup8], (instregex "VBLENDPDrri")>; +def: InstRW<[BWWriteResGroup8], (instregex "VBLENDPSYrri")>; +def: InstRW<[BWWriteResGroup8], (instregex "VBLENDPSrri")>; +def: InstRW<[BWWriteResGroup8], (instregex "VMOVDQAYrr(_REV?)")>; +def: InstRW<[BWWriteResGroup8], (instregex "VMOVDQArr(_REV?)")>; +def: InstRW<[BWWriteResGroup8], (instregex "VMOVDQUYrr(_REV?)")>; +def: InstRW<[BWWriteResGroup8], (instregex "VMOVDQUrr(_REV?)")>; +def: InstRW<[BWWriteResGroup8], (instregex "VMOVPQI2QIrr")>; +def: InstRW<[BWWriteResGroup8], (instregex "VMOVZPQILo2PQIrr")>; +def: InstRW<[BWWriteResGroup8], (instregex "VPANDNYrr")>; +def: InstRW<[BWWriteResGroup8], (instregex "VPANDNrr")>; +def: InstRW<[BWWriteResGroup8], (instregex "VPANDYrr")>; +def: InstRW<[BWWriteResGroup8], (instregex "VPANDrr")>; +def: InstRW<[BWWriteResGroup8], (instregex "VPBLENDDYrri")>; +def: InstRW<[BWWriteResGroup8], (instregex "VPBLENDDrri")>; +def: InstRW<[BWWriteResGroup8], (instregex "VPORYrr")>; +def: InstRW<[BWWriteResGroup8], (instregex "VPORrr")>; +def: InstRW<[BWWriteResGroup8], (instregex "VPXORYrr")>; +def: InstRW<[BWWriteResGroup8], (instregex "VPXORrr")>; + +def BWWriteResGroup9 : SchedWriteRes<[BWPort0156]> { + let Latency = 1; + let NumMicroOps = 1; + let ResourceCycles = [1]; +} +def: InstRW<[BWWriteResGroup9], (instregex "ADD(16|32|64)ri8")>; +def: InstRW<[BWWriteResGroup9], (instregex "ADD(16|32|64)rr(_REV?)")>; +def: InstRW<[BWWriteResGroup9], (instregex "ADD8i8")>; +def: InstRW<[BWWriteResGroup9], (instregex "ADD8ri")>; +def: InstRW<[BWWriteResGroup9], (instregex "ADD8rr(_REV?)")>; +def: InstRW<[BWWriteResGroup9], (instregex "AND(16|32|64)ri8")>; +def: InstRW<[BWWriteResGroup9], (instregex "AND(16|32|64)rr(_REV?)")>; +def: InstRW<[BWWriteResGroup9], (instregex "AND8i8")>; +def: InstRW<[BWWriteResGroup9], (instregex "AND8ri")>; +def: InstRW<[BWWriteResGroup9], (instregex "AND8rr(_REV?)")>; +def: InstRW<[BWWriteResGroup9], (instregex "CBW")>; +def: InstRW<[BWWriteResGroup9], (instregex "CLC")>; +def: InstRW<[BWWriteResGroup9], (instregex "CMC")>; +def: InstRW<[BWWriteResGroup9], (instregex "CMP(16|32|64)ri8")>; +def: InstRW<[BWWriteResGroup9], (instregex "CMP(16|32|64)rr(_REV?)")>; +def: InstRW<[BWWriteResGroup9], (instregex "CMP8i8")>; +def: InstRW<[BWWriteResGroup9], (instregex "CMP8ri")>; +def: InstRW<[BWWriteResGroup9], (instregex "CMP8rr(_REV?)")>; +def: InstRW<[BWWriteResGroup9], (instregex "CWDE")>; +def: InstRW<[BWWriteResGroup9], (instregex "DEC(16|32|64)r")>; +def: InstRW<[BWWriteResGroup9], (instregex "DEC8r")>; +def: InstRW<[BWWriteResGroup9], (instregex "INC(16|32|64)r")>; +def: InstRW<[BWWriteResGroup9], (instregex "INC8r")>; +def: InstRW<[BWWriteResGroup9], (instregex "LAHF")>; +def: InstRW<[BWWriteResGroup9], (instregex "MOV(16|32|64)rr(_REV?)")>; +def: InstRW<[BWWriteResGroup9], (instregex "MOV8ri")>; +def: InstRW<[BWWriteResGroup9], (instregex "MOV8ri_alt")>; +def: InstRW<[BWWriteResGroup9], (instregex "MOV8rr(_REV?)")>; +def: InstRW<[BWWriteResGroup9], (instregex "MOVSX(16|32|64)rr16")>; +def: InstRW<[BWWriteResGroup9], (instregex "MOVSX(16|32|64)rr32")>; +def: InstRW<[BWWriteResGroup9], (instregex "MOVSX(16|32|64)rr8")>; +def: InstRW<[BWWriteResGroup9], (instregex "MOVZX(16|32|64)rr16")>; +def: InstRW<[BWWriteResGroup9], (instregex "MOVZX(16|32|64)rr8")>; +def: InstRW<[BWWriteResGroup9], (instregex "NEG(16|32|64)r")>; +def: InstRW<[BWWriteResGroup9], (instregex "NEG8r")>; +def: InstRW<[BWWriteResGroup9], (instregex "NOOP")>; +def: InstRW<[BWWriteResGroup9], (instregex "NOT(16|32|64)r")>; +def: InstRW<[BWWriteResGroup9], (instregex "NOT8r")>; +def: InstRW<[BWWriteResGroup9], (instregex "OR(16|32|64)ri8")>; +def: InstRW<[BWWriteResGroup9], (instregex "OR(16|32|64)rr(_REV?)")>; +def: InstRW<[BWWriteResGroup9], (instregex "OR8i8")>; +def: InstRW<[BWWriteResGroup9], (instregex "OR8ri")>; +def: InstRW<[BWWriteResGroup9], (instregex "OR8rr(_REV?)")>; +def: InstRW<[BWWriteResGroup9], (instregex "SAHF")>; +def: InstRW<[BWWriteResGroup9], (instregex "SGDT64m")>; +def: InstRW<[BWWriteResGroup9], (instregex "SIDT64m")>; +def: InstRW<[BWWriteResGroup9], (instregex "SLDT64m")>; +def: InstRW<[BWWriteResGroup9], (instregex "SMSW16m")>; +def: InstRW<[BWWriteResGroup9], (instregex "STC")>; +def: InstRW<[BWWriteResGroup9], (instregex "STRm")>; +def: InstRW<[BWWriteResGroup9], (instregex "SUB(16|32|64)ri8")>; +def: InstRW<[BWWriteResGroup9], (instregex "SUB(16|32|64)rr(_REV?)")>; +def: InstRW<[BWWriteResGroup9], (instregex "SUB8i8")>; +def: InstRW<[BWWriteResGroup9], (instregex "SUB8ri")>; +def: InstRW<[BWWriteResGroup9], (instregex "SUB8rr(_REV?)")>; +def: InstRW<[BWWriteResGroup9], (instregex "SYSCALL")>; +def: InstRW<[BWWriteResGroup9], (instregex "TEST(16|32|64)rr")>; +def: InstRW<[BWWriteResGroup9], (instregex "TEST8i8")>; +def: InstRW<[BWWriteResGroup9], (instregex "TEST8ri")>; +def: InstRW<[BWWriteResGroup9], (instregex "TEST8rr")>; +def: InstRW<[BWWriteResGroup9], (instregex "XCHG(16|32|64)rr")>; +def: InstRW<[BWWriteResGroup9], (instregex "XOR(16|32|64)ri8")>; +def: InstRW<[BWWriteResGroup9], (instregex "XOR(16|32|64)rr(_REV?)")>; +def: InstRW<[BWWriteResGroup9], (instregex "XOR8i8")>; +def: InstRW<[BWWriteResGroup9], (instregex "XOR8ri")>; +def: InstRW<[BWWriteResGroup9], (instregex "XOR8rr(_REV?)")>; + +def BWWriteResGroup10 : SchedWriteRes<[BWPort4,BWPort237]> { + let Latency = 1; + let NumMicroOps = 2; + let ResourceCycles = [1,1]; +} +def: InstRW<[BWWriteResGroup10], (instregex "FBSTPm")>; +def: InstRW<[BWWriteResGroup10], (instregex "MMX_MOVD64from64rm")>; +def: InstRW<[BWWriteResGroup10], (instregex "MMX_MOVD64mr")>; +def: InstRW<[BWWriteResGroup10], (instregex "MMX_MOVNTQmr")>; +def: InstRW<[BWWriteResGroup10], (instregex "MMX_MOVQ64mr")>; +def: InstRW<[BWWriteResGroup10], (instregex "MOV(16|32|64)mr")>; +def: InstRW<[BWWriteResGroup10], (instregex "MOV8mi")>; +def: InstRW<[BWWriteResGroup10], (instregex "MOV8mr")>; +def: InstRW<[BWWriteResGroup10], (instregex "MOVAPDmr")>; +def: InstRW<[BWWriteResGroup10], (instregex "MOVAPSmr")>; +def: InstRW<[BWWriteResGroup10], (instregex "MOVDQAmr")>; +def: InstRW<[BWWriteResGroup10], (instregex "MOVDQUmr")>; +def: InstRW<[BWWriteResGroup10], (instregex "MOVHPDmr")>; +def: InstRW<[BWWriteResGroup10], (instregex "MOVHPSmr")>; +def: InstRW<[BWWriteResGroup10], (instregex "MOVLPDmr")>; +def: InstRW<[BWWriteResGroup10], (instregex "MOVLPSmr")>; +def: InstRW<[BWWriteResGroup10], (instregex "MOVNTDQmr")>; +def: InstRW<[BWWriteResGroup10], (instregex "MOVNTI_64mr")>; +def: InstRW<[BWWriteResGroup10], (instregex "MOVNTImr")>; +def: InstRW<[BWWriteResGroup10], (instregex "MOVNTPDmr")>; +def: InstRW<[BWWriteResGroup10], (instregex "MOVNTPSmr")>; +def: InstRW<[BWWriteResGroup10], (instregex "MOVPDI2DImr")>; +def: InstRW<[BWWriteResGroup10], (instregex "MOVPQI2QImr")>; +def: InstRW<[BWWriteResGroup10], (instregex "MOVPQIto64mr")>; +def: InstRW<[BWWriteResGroup10], (instregex "MOVSSmr")>; +def: InstRW<[BWWriteResGroup10], (instregex "MOVUPDmr")>; +def: InstRW<[BWWriteResGroup10], (instregex "MOVUPSmr")>; +def: InstRW<[BWWriteResGroup10], (instregex "ST_FP32m")>; +def: InstRW<[BWWriteResGroup10], (instregex "ST_FP64m")>; +def: InstRW<[BWWriteResGroup10], (instregex "ST_FP80m")>; +def: InstRW<[BWWriteResGroup10], (instregex "VEXTRACTF128mr")>; +def: InstRW<[BWWriteResGroup10], (instregex "VEXTRACTI128mr")>; +def: InstRW<[BWWriteResGroup10], (instregex "VMOVAPDYmr")>; +def: InstRW<[BWWriteResGroup10], (instregex "VMOVAPDmr")>; +def: InstRW<[BWWriteResGroup10], (instregex "VMOVAPSYmr")>; +def: InstRW<[BWWriteResGroup10], (instregex "VMOVAPSmr")>; +def: InstRW<[BWWriteResGroup10], (instregex "VMOVDQAYmr")>; +def: InstRW<[BWWriteResGroup10], (instregex "VMOVDQAmr")>; +def: InstRW<[BWWriteResGroup10], (instregex "VMOVDQUYmr")>; +def: InstRW<[BWWriteResGroup10], (instregex "VMOVDQUmr")>; +def: InstRW<[BWWriteResGroup10], (instregex "VMOVHPDmr")>; +def: InstRW<[BWWriteResGroup10], (instregex "VMOVHPSmr")>; +def: InstRW<[BWWriteResGroup10], (instregex "VMOVLPDmr")>; +def: InstRW<[BWWriteResGroup10], (instregex "VMOVLPSmr")>; +def: InstRW<[BWWriteResGroup10], (instregex "VMOVNTDQYmr")>; +def: InstRW<[BWWriteResGroup10], (instregex "VMOVNTDQmr")>; +def: InstRW<[BWWriteResGroup10], (instregex "VMOVNTPDYmr")>; +def: InstRW<[BWWriteResGroup10], (instregex "VMOVNTPDmr")>; +def: InstRW<[BWWriteResGroup10], (instregex "VMOVNTPSYmr")>; +def: InstRW<[BWWriteResGroup10], (instregex "VMOVNTPSmr")>; +def: InstRW<[BWWriteResGroup10], (instregex "VMOVPDI2DImr")>; +def: InstRW<[BWWriteResGroup10], (instregex "VMOVPQI2QImr")>; +def: InstRW<[BWWriteResGroup10], (instregex "VMOVPQIto64mr")>; +def: InstRW<[BWWriteResGroup10], (instregex "VMOVSDmr")>; +def: InstRW<[BWWriteResGroup10], (instregex "VMOVSSmr")>; +def: InstRW<[BWWriteResGroup10], (instregex "VMOVUPDYmr")>; +def: InstRW<[BWWriteResGroup10], (instregex "VMOVUPDmr")>; +def: InstRW<[BWWriteResGroup10], (instregex "VMOVUPSYmr")>; +def: InstRW<[BWWriteResGroup10], (instregex "VMOVUPSmr")>; + +def BWWriteResGroup11 : SchedWriteRes<[BWPort5]> { + let Latency = 2; + let NumMicroOps = 2; + let ResourceCycles = [2]; +} +def: InstRW<[BWWriteResGroup11], (instregex "BLENDVPDrr0")>; +def: InstRW<[BWWriteResGroup11], (instregex "BLENDVPSrr0")>; +def: InstRW<[BWWriteResGroup11], (instregex "MMX_PINSRWirri")>; +def: InstRW<[BWWriteResGroup11], (instregex "PBLENDVBrr0")>; +def: InstRW<[BWWriteResGroup11], (instregex "PINSRBrr")>; +def: InstRW<[BWWriteResGroup11], (instregex "PINSRDrr")>; +def: InstRW<[BWWriteResGroup11], (instregex "PINSRQrr")>; +def: InstRW<[BWWriteResGroup11], (instregex "PINSRWrri")>; +def: InstRW<[BWWriteResGroup11], (instregex "VBLENDVPDYrr")>; +def: InstRW<[BWWriteResGroup11], (instregex "VBLENDVPDrr")>; +def: InstRW<[BWWriteResGroup11], (instregex "VBLENDVPSYrr")>; +def: InstRW<[BWWriteResGroup11], (instregex "VBLENDVPSrr")>; +def: InstRW<[BWWriteResGroup11], (instregex "VPBLENDVBYrr")>; +def: InstRW<[BWWriteResGroup11], (instregex "VPBLENDVBrr")>; +def: InstRW<[BWWriteResGroup11], (instregex "VPINSRBrr")>; +def: InstRW<[BWWriteResGroup11], (instregex "VPINSRDrr")>; +def: InstRW<[BWWriteResGroup11], (instregex "VPINSRQrr")>; +def: InstRW<[BWWriteResGroup11], (instregex "VPINSRWrri")>; + +def BWWriteResGroup12 : SchedWriteRes<[BWPort01]> { + let Latency = 2; + let NumMicroOps = 2; + let ResourceCycles = [2]; +} +def: InstRW<[BWWriteResGroup12], (instregex "FDECSTP")>; + +def BWWriteResGroup13 : SchedWriteRes<[BWPort06]> { + let Latency = 2; + let NumMicroOps = 2; + let ResourceCycles = [2]; +} +def: InstRW<[BWWriteResGroup13], (instregex "ROL(16|32|64)r1")>; +def: InstRW<[BWWriteResGroup13], (instregex "ROL(16|32|64)ri")>; +def: InstRW<[BWWriteResGroup13], (instregex "ROL8r1")>; +def: InstRW<[BWWriteResGroup13], (instregex "ROL8ri")>; +def: InstRW<[BWWriteResGroup13], (instregex "ROR(16|32|64)r1")>; +def: InstRW<[BWWriteResGroup13], (instregex "ROR(16|32|64)ri")>; +def: InstRW<[BWWriteResGroup13], (instregex "ROR8r1")>; +def: InstRW<[BWWriteResGroup13], (instregex "ROR8ri")>; + +def BWWriteResGroup14 : SchedWriteRes<[BWPort0156]> { + let Latency = 2; + let NumMicroOps = 2; + let ResourceCycles = [2]; +} +def: InstRW<[BWWriteResGroup14], (instregex "LFENCE")>; +def: InstRW<[BWWriteResGroup14], (instregex "MFENCE")>; +def: InstRW<[BWWriteResGroup14], (instregex "WAIT")>; +def: InstRW<[BWWriteResGroup14], (instregex "XGETBV")>; + +def BWWriteResGroup15 : SchedWriteRes<[BWPort0,BWPort5]> { + let Latency = 2; + let NumMicroOps = 2; + let ResourceCycles = [1,1]; +} +def: InstRW<[BWWriteResGroup15], (instregex "CVTPS2PDrr")>; +def: InstRW<[BWWriteResGroup15], (instregex "CVTSS2SDrr")>; +def: InstRW<[BWWriteResGroup15], (instregex "EXTRACTPSrr")>; +def: InstRW<[BWWriteResGroup15], (instregex "MMX_PEXTRWirri")>; +def: InstRW<[BWWriteResGroup15], (instregex "PEXTRBrr")>; +def: InstRW<[BWWriteResGroup15], (instregex "PEXTRDrr")>; +def: InstRW<[BWWriteResGroup15], (instregex "PEXTRQrr")>; +def: InstRW<[BWWriteResGroup15], (instregex "PEXTRWri")>; +def: InstRW<[BWWriteResGroup15], (instregex "PEXTRWrr_REV")>; +def: InstRW<[BWWriteResGroup15], (instregex "PSLLDrr")>; +def: InstRW<[BWWriteResGroup15], (instregex "PSLLQrr")>; +def: InstRW<[BWWriteResGroup15], (instregex "PSLLWrr")>; +def: InstRW<[BWWriteResGroup15], (instregex "PSRADrr")>; +def: InstRW<[BWWriteResGroup15], (instregex "PSRAWrr")>; +def: InstRW<[BWWriteResGroup15], (instregex "PSRLDrr")>; +def: InstRW<[BWWriteResGroup15], (instregex "PSRLQrr")>; +def: InstRW<[BWWriteResGroup15], (instregex "PSRLWrr")>; +def: InstRW<[BWWriteResGroup15], (instregex "PTESTrr")>; +def: InstRW<[BWWriteResGroup15], (instregex "VCVTPH2PSYrr")>; +def: InstRW<[BWWriteResGroup15], (instregex "VCVTPH2PSrr")>; +def: InstRW<[BWWriteResGroup15], (instregex "VCVTPS2PDrr")>; +def: InstRW<[BWWriteResGroup15], (instregex "VCVTSS2SDrr")>; +def: InstRW<[BWWriteResGroup15], (instregex "VEXTRACTPSrr")>; +def: InstRW<[BWWriteResGroup15], (instregex "VPEXTRBrr")>; +def: InstRW<[BWWriteResGroup15], (instregex "VPEXTRDrr")>; +def: InstRW<[BWWriteResGroup15], (instregex "VPEXTRQrr")>; +def: InstRW<[BWWriteResGroup15], (instregex "VPEXTRWri")>; +def: InstRW<[BWWriteResGroup15], (instregex "VPEXTRWrr_REV")>; +def: InstRW<[BWWriteResGroup15], (instregex "VPSLLDrr")>; +def: InstRW<[BWWriteResGroup15], (instregex "VPSLLQrr")>; +def: InstRW<[BWWriteResGroup15], (instregex "VPSLLWrr")>; +def: InstRW<[BWWriteResGroup15], (instregex "VPSRADrr")>; +def: InstRW<[BWWriteResGroup15], (instregex "VPSRAWrr")>; +def: InstRW<[BWWriteResGroup15], (instregex "VPSRLDrr")>; +def: InstRW<[BWWriteResGroup15], (instregex "VPSRLQrr")>; +def: InstRW<[BWWriteResGroup15], (instregex "VPSRLWrr")>; +def: InstRW<[BWWriteResGroup15], (instregex "VPTESTrr")>; + +def BWWriteResGroup16 : SchedWriteRes<[BWPort6,BWPort0156]> { + let Latency = 2; + let NumMicroOps = 2; + let ResourceCycles = [1,1]; +} +def: InstRW<[BWWriteResGroup16], (instregex "CLFLUSH")>; + +def BWWriteResGroup17 : SchedWriteRes<[BWPort01,BWPort015]> { + let Latency = 2; + let NumMicroOps = 2; + let ResourceCycles = [1,1]; +} +def: InstRW<[BWWriteResGroup17], (instregex "MMX_MOVDQ2Qrr")>; + +def BWWriteResGroup18 : SchedWriteRes<[BWPort237,BWPort0156]> { + let Latency = 2; + let NumMicroOps = 2; + let ResourceCycles = [1,1]; +} +def: InstRW<[BWWriteResGroup18], (instregex "SFENCE")>; + +def BWWriteResGroup19 : SchedWriteRes<[BWPort06,BWPort15]> { + let Latency = 2; + let NumMicroOps = 2; + let ResourceCycles = [1,1]; +} +def: InstRW<[BWWriteResGroup19], (instregex "BEXTR32rr")>; +def: InstRW<[BWWriteResGroup19], (instregex "BEXTR64rr")>; +def: InstRW<[BWWriteResGroup19], (instregex "BSWAP(16|32|64)r")>; + +def BWWriteResGroup20 : SchedWriteRes<[BWPort06,BWPort0156]> { + let Latency = 2; + let NumMicroOps = 2; + let ResourceCycles = [1,1]; +} +def: InstRW<[BWWriteResGroup20], (instregex "ADC8i8")>; +def: InstRW<[BWWriteResGroup20], (instregex "ADC8ri")>; +def: InstRW<[BWWriteResGroup20], (instregex "CMOVA(16|32|64)rr")>; +def: InstRW<[BWWriteResGroup20], (instregex "CMOVBE(16|32|64)rr")>; +def: InstRW<[BWWriteResGroup20], (instregex "CWD")>; +def: InstRW<[BWWriteResGroup20], (instregex "JRCXZ")>; +def: InstRW<[BWWriteResGroup20], (instregex "SBB8i8")>; +def: InstRW<[BWWriteResGroup20], (instregex "SBB8ri")>; +def: InstRW<[BWWriteResGroup20], (instregex "SETAr")>; +def: InstRW<[BWWriteResGroup20], (instregex "SETBEr")>; + +def BWWriteResGroup21 : SchedWriteRes<[BWPort4,BWPort5,BWPort237]> { + let Latency = 2; + let NumMicroOps = 3; + let ResourceCycles = [1,1,1]; +} +def: InstRW<[BWWriteResGroup21], (instregex "EXTRACTPSmr")>; +def: InstRW<[BWWriteResGroup21], (instregex "PEXTRBmr")>; +def: InstRW<[BWWriteResGroup21], (instregex "PEXTRDmr")>; +def: InstRW<[BWWriteResGroup21], (instregex "PEXTRQmr")>; +def: InstRW<[BWWriteResGroup21], (instregex "PEXTRWmr")>; +def: InstRW<[BWWriteResGroup21], (instregex "STMXCSR")>; +def: InstRW<[BWWriteResGroup21], (instregex "VEXTRACTPSmr")>; +def: InstRW<[BWWriteResGroup21], (instregex "VPEXTRBmr")>; +def: InstRW<[BWWriteResGroup21], (instregex "VPEXTRDmr")>; +def: InstRW<[BWWriteResGroup21], (instregex "VPEXTRQmr")>; +def: InstRW<[BWWriteResGroup21], (instregex "VPEXTRWmr")>; +def: InstRW<[BWWriteResGroup21], (instregex "VSTMXCSR")>; + +def BWWriteResGroup22 : SchedWriteRes<[BWPort4,BWPort6,BWPort237]> { + let Latency = 2; + let NumMicroOps = 3; + let ResourceCycles = [1,1,1]; +} +def: InstRW<[BWWriteResGroup22], (instregex "FNSTCW16m")>; + +def BWWriteResGroup23 : SchedWriteRes<[BWPort4,BWPort237,BWPort06]> { + let Latency = 2; + let NumMicroOps = 3; + let ResourceCycles = [1,1,1]; +} +def: InstRW<[BWWriteResGroup23], (instregex "SETAEm")>; +def: InstRW<[BWWriteResGroup23], (instregex "SETBm")>; +def: InstRW<[BWWriteResGroup23], (instregex "SETEm")>; +def: InstRW<[BWWriteResGroup23], (instregex "SETGEm")>; +def: InstRW<[BWWriteResGroup23], (instregex "SETGm")>; +def: InstRW<[BWWriteResGroup23], (instregex "SETLEm")>; +def: InstRW<[BWWriteResGroup23], (instregex "SETLm")>; +def: InstRW<[BWWriteResGroup23], (instregex "SETNEm")>; +def: InstRW<[BWWriteResGroup23], (instregex "SETNOm")>; +def: InstRW<[BWWriteResGroup23], (instregex "SETNPm")>; +def: InstRW<[BWWriteResGroup23], (instregex "SETNSm")>; +def: InstRW<[BWWriteResGroup23], (instregex "SETOm")>; +def: InstRW<[BWWriteResGroup23], (instregex "SETPm")>; +def: InstRW<[BWWriteResGroup23], (instregex "SETSm")>; + +def BWWriteResGroup24 : SchedWriteRes<[BWPort4,BWPort237,BWPort15]> { + let Latency = 2; + let NumMicroOps = 3; + let ResourceCycles = [1,1,1]; +} +def: InstRW<[BWWriteResGroup24], (instregex "MOVBE(16|32|64)mr")>; + +def BWWriteResGroup25 : SchedWriteRes<[BWPort4,BWPort237,BWPort0156]> { + let Latency = 2; + let NumMicroOps = 3; + let ResourceCycles = [1,1,1]; +} +def: InstRW<[BWWriteResGroup25], (instregex "PUSH(16|32|64)r")>; +def: InstRW<[BWWriteResGroup25], (instregex "PUSH(16|32|64)rmr")>; +def: InstRW<[BWWriteResGroup25], (instregex "PUSH64i8")>; +def: InstRW<[BWWriteResGroup25], (instregex "STOSB")>; +def: InstRW<[BWWriteResGroup25], (instregex "STOSL")>; +def: InstRW<[BWWriteResGroup25], (instregex "STOSQ")>; +def: InstRW<[BWWriteResGroup25], (instregex "STOSW")>; + +def BWWriteResGroup26 : SchedWriteRes<[BWPort0]> { + let Latency = 3; + let NumMicroOps = 1; + let ResourceCycles = [1]; +} +def: InstRW<[BWWriteResGroup26], (instregex "MOVMSKPDrr")>; +def: InstRW<[BWWriteResGroup26], (instregex "MOVMSKPSrr")>; +def: InstRW<[BWWriteResGroup26], (instregex "PMOVMSKBrr")>; +def: InstRW<[BWWriteResGroup26], (instregex "VMOVMSKPDYrr")>; +def: InstRW<[BWWriteResGroup26], (instregex "VMOVMSKPDrr")>; +def: InstRW<[BWWriteResGroup26], (instregex "VMOVMSKPSYrr")>; +def: InstRW<[BWWriteResGroup26], (instregex "VMOVMSKPSrr")>; +def: InstRW<[BWWriteResGroup26], (instregex "VPMOVMSKBYrr")>; +def: InstRW<[BWWriteResGroup26], (instregex "VPMOVMSKBrr")>; + +def BWWriteResGroup27 : SchedWriteRes<[BWPort1]> { + let Latency = 3; + let NumMicroOps = 1; + let ResourceCycles = [1]; +} +def: InstRW<[BWWriteResGroup27], (instregex "ADDPDrr")>; +def: InstRW<[BWWriteResGroup27], (instregex "ADDPSrr")>; +def: InstRW<[BWWriteResGroup27], (instregex "ADDSDrr")>; +def: InstRW<[BWWriteResGroup27], (instregex "ADDSSrr")>; +def: InstRW<[BWWriteResGroup27], (instregex "ADDSUBPDrr")>; +def: InstRW<[BWWriteResGroup27], (instregex "ADDSUBPSrr")>; +def: InstRW<[BWWriteResGroup27], (instregex "ADD_FPrST0")>; +def: InstRW<[BWWriteResGroup27], (instregex "ADD_FST0r")>; +def: InstRW<[BWWriteResGroup27], (instregex "ADD_FrST0")>; +def: InstRW<[BWWriteResGroup27], (instregex "BSF(16|32|64)rr")>; +def: InstRW<[BWWriteResGroup27], (instregex "BSR(16|32|64)rr")>; +def: InstRW<[BWWriteResGroup27], (instregex "CMPPDrri")>; +def: InstRW<[BWWriteResGroup27], (instregex "CMPPSrri")>; +def: InstRW<[BWWriteResGroup27], (instregex "CMPSSrr")>; +def: InstRW<[BWWriteResGroup27], (instregex "COMISDrr")>; +def: InstRW<[BWWriteResGroup27], (instregex "COMISSrr")>; +def: InstRW<[BWWriteResGroup27], (instregex "CVTDQ2PSrr")>; +def: InstRW<[BWWriteResGroup27], (instregex "CVTPS2DQrr")>; +def: InstRW<[BWWriteResGroup27], (instregex "CVTTPS2DQrr")>; +def: InstRW<[BWWriteResGroup27], (instregex "IMUL(32|64)rr(i8?)")>; +def: InstRW<[BWWriteResGroup27], (instregex "IMUL8r")>; +def: InstRW<[BWWriteResGroup27], (instregex "LZCNT(16|32|64)rr")>; +def: InstRW<[BWWriteResGroup27], (instregex "MAXPDrr")>; +def: InstRW<[BWWriteResGroup27], (instregex "MAXPSrr")>; +def: InstRW<[BWWriteResGroup27], (instregex "MAXSDrr")>; +def: InstRW<[BWWriteResGroup27], (instregex "MAXSSrr")>; +def: InstRW<[BWWriteResGroup27], (instregex "MINPDrr")>; +def: InstRW<[BWWriteResGroup27], (instregex "MINPSrr")>; +def: InstRW<[BWWriteResGroup27], (instregex "MINSDrr")>; +def: InstRW<[BWWriteResGroup27], (instregex "MINSSrr")>; +def: InstRW<[BWWriteResGroup27], (instregex "MMX_CVTPI2PSirr")>; +def: InstRW<[BWWriteResGroup27], (instregex "MUL8r")>; +def: InstRW<[BWWriteResGroup27], (instregex "PDEP32rr")>; +def: InstRW<[BWWriteResGroup27], (instregex "PDEP64rr")>; +def: InstRW<[BWWriteResGroup27], (instregex "PEXT32rr")>; +def: InstRW<[BWWriteResGroup27], (instregex "PEXT64rr")>; +def: InstRW<[BWWriteResGroup27], (instregex "POPCNT(16|32|64)rr")>; +def: InstRW<[BWWriteResGroup27], (instregex "SHLD(16|32|64)rri8")>; +def: InstRW<[BWWriteResGroup27], (instregex "SHRD(16|32|64)rri8")>; +def: InstRW<[BWWriteResGroup27], (instregex "SUBPDrr")>; +def: InstRW<[BWWriteResGroup27], (instregex "SUBPSrr")>; +def: InstRW<[BWWriteResGroup27], (instregex "SUBR_FPrST0")>; +def: InstRW<[BWWriteResGroup27], (instregex "SUBR_FST0r")>; +def: InstRW<[BWWriteResGroup27], (instregex "SUBR_FrST0")>; +def: InstRW<[BWWriteResGroup27], (instregex "SUBSDrr")>; +def: InstRW<[BWWriteResGroup27], (instregex "SUBSSrr")>; +def: InstRW<[BWWriteResGroup27], (instregex "SUB_FPrST0")>; +def: InstRW<[BWWriteResGroup27], (instregex "SUB_FST0r")>; +def: InstRW<[BWWriteResGroup27], (instregex "SUB_FrST0")>; +def: InstRW<[BWWriteResGroup27], (instregex "TZCNT(16|32|64)rr")>; +def: InstRW<[BWWriteResGroup27], (instregex "UCOMISDrr")>; +def: InstRW<[BWWriteResGroup27], (instregex "UCOMISSrr")>; +def: InstRW<[BWWriteResGroup27], (instregex "VADDPDYrr")>; +def: InstRW<[BWWriteResGroup27], (instregex "VADDPDrr")>; +def: InstRW<[BWWriteResGroup27], (instregex "VADDPSYrr")>; +def: InstRW<[BWWriteResGroup27], (instregex "VADDPSrr")>; +def: InstRW<[BWWriteResGroup27], (instregex "VADDSDrr")>; +def: InstRW<[BWWriteResGroup27], (instregex "VADDSSrr")>; +def: InstRW<[BWWriteResGroup27], (instregex "VADDSUBPDYrr")>; +def: InstRW<[BWWriteResGroup27], (instregex "VADDSUBPDrr")>; +def: InstRW<[BWWriteResGroup27], (instregex "VADDSUBPSYrr")>; +def: InstRW<[BWWriteResGroup27], (instregex "VADDSUBPSrr")>; +def: InstRW<[BWWriteResGroup27], (instregex "VCMPPDYrri")>; +def: InstRW<[BWWriteResGroup27], (instregex "VCMPPDrri")>; +def: InstRW<[BWWriteResGroup27], (instregex "VCMPPSYrri")>; +def: InstRW<[BWWriteResGroup27], (instregex "VCMPPSrri")>; +def: InstRW<[BWWriteResGroup27], (instregex "VCMPSDrr")>; +def: InstRW<[BWWriteResGroup27], (instregex "VCMPSSrr")>; +def: InstRW<[BWWriteResGroup27], (instregex "VCOMISDrr")>; +def: InstRW<[BWWriteResGroup27], (instregex "VCOMISSrr")>; +def: InstRW<[BWWriteResGroup27], (instregex "VCVTDQ2PSYrr")>; +def: InstRW<[BWWriteResGroup27], (instregex "VCVTDQ2PSrr")>; +def: InstRW<[BWWriteResGroup27], (instregex "VCVTPS2DQYrr")>; +def: InstRW<[BWWriteResGroup27], (instregex "VCVTPS2DQrr")>; +def: InstRW<[BWWriteResGroup27], (instregex "VCVTTPS2DQYrr")>; +def: InstRW<[BWWriteResGroup27], (instregex "VCVTTPS2DQrr")>; +def: InstRW<[BWWriteResGroup27], (instregex "VMAXPDYrr")>; +def: InstRW<[BWWriteResGroup27], (instregex "VMAXPDrr")>; +def: InstRW<[BWWriteResGroup27], (instregex "VMAXPSYrr")>; +def: InstRW<[BWWriteResGroup27], (instregex "VMAXPSrr")>; +def: InstRW<[BWWriteResGroup27], (instregex "VMAXSDrr")>; +def: InstRW<[BWWriteResGroup27], (instregex "VMAXSSrr")>; +def: InstRW<[BWWriteResGroup27], (instregex "VMINPDYrr")>; +def: InstRW<[BWWriteResGroup27], (instregex "VMINPDrr")>; +def: InstRW<[BWWriteResGroup27], (instregex "VMINPSYrr")>; +def: InstRW<[BWWriteResGroup27], (instregex "VMINPSrr")>; +def: InstRW<[BWWriteResGroup27], (instregex "VMINSDrr")>; +def: InstRW<[BWWriteResGroup27], (instregex "VMINSSrr")>; +def: InstRW<[BWWriteResGroup27], (instregex "VSUBPDYrr")>; +def: InstRW<[BWWriteResGroup27], (instregex "VSUBPDrr")>; +def: InstRW<[BWWriteResGroup27], (instregex "VSUBPSYrr")>; +def: InstRW<[BWWriteResGroup27], (instregex "VSUBPSrr")>; +def: InstRW<[BWWriteResGroup27], (instregex "VSUBSDrr")>; +def: InstRW<[BWWriteResGroup27], (instregex "VSUBSSrr")>; +def: InstRW<[BWWriteResGroup27], (instregex "VUCOMISDrr")>; +def: InstRW<[BWWriteResGroup27], (instregex "VUCOMISSrr")>; + +def BWWriteResGroup27_16 : SchedWriteRes<[BWPort1, BWPort0156]> { + let Latency = 3; + let NumMicroOps = 2; + let ResourceCycles = [1,1]; +} +def: InstRW<[BWWriteResGroup27_16], (instregex "IMUL16rr(i8?)")>; + +def BWWriteResGroup28 : SchedWriteRes<[BWPort5]> { + let Latency = 3; + let NumMicroOps = 1; + let ResourceCycles = [1]; +} +def: InstRW<[BWWriteResGroup28], (instregex "VBROADCASTSDYrr")>; +def: InstRW<[BWWriteResGroup28], (instregex "VBROADCASTSSYrr")>; +def: InstRW<[BWWriteResGroup28], (instregex "VEXTRACTF128rr")>; +def: InstRW<[BWWriteResGroup28], (instregex "VEXTRACTI128rr")>; +def: InstRW<[BWWriteResGroup28], (instregex "VINSERTF128rr")>; +def: InstRW<[BWWriteResGroup28], (instregex "VINSERTI128rr")>; +def: InstRW<[BWWriteResGroup28], (instregex "VPBROADCASTBYrr")>; +def: InstRW<[BWWriteResGroup28], (instregex "VPBROADCASTBrr")>; +def: InstRW<[BWWriteResGroup28], (instregex "VPBROADCASTDYrr")>; +def: InstRW<[BWWriteResGroup28], (instregex "VPBROADCASTQYrr")>; +def: InstRW<[BWWriteResGroup28], (instregex "VPBROADCASTWYrr")>; +def: InstRW<[BWWriteResGroup28], (instregex "VPBROADCASTWrr")>; +def: InstRW<[BWWriteResGroup28], (instregex "VPERM2F128rr")>; +def: InstRW<[BWWriteResGroup28], (instregex "VPERM2I128rr")>; +def: InstRW<[BWWriteResGroup28], (instregex "VPERMDYrr")>; +def: InstRW<[BWWriteResGroup28], (instregex "VPERMPDYri")>; +def: InstRW<[BWWriteResGroup28], (instregex "VPERMPSYrr")>; +def: InstRW<[BWWriteResGroup28], (instregex "VPERMQYri")>; +def: InstRW<[BWWriteResGroup28], (instregex "VPMOVSXBDYrr")>; +def: InstRW<[BWWriteResGroup28], (instregex "VPMOVSXBQYrr")>; +def: InstRW<[BWWriteResGroup28], (instregex "VPMOVSXBWYrr")>; +def: InstRW<[BWWriteResGroup28], (instregex "VPMOVSXDQYrr")>; +def: InstRW<[BWWriteResGroup28], (instregex "VPMOVSXWDYrr")>; +def: InstRW<[BWWriteResGroup28], (instregex "VPMOVSXWQYrr")>; +def: InstRW<[BWWriteResGroup28], (instregex "VPMOVZXBDYrr")>; +def: InstRW<[BWWriteResGroup28], (instregex "VPMOVZXBQYrr")>; +def: InstRW<[BWWriteResGroup28], (instregex "VPMOVZXBWYrr")>; +def: InstRW<[BWWriteResGroup28], (instregex "VPMOVZXDQYrr")>; +def: InstRW<[BWWriteResGroup28], (instregex "VPMOVZXWDYrr")>; +def: InstRW<[BWWriteResGroup28], (instregex "VPMOVZXWQYrr")>; + +def BWWriteResGroup29 : SchedWriteRes<[BWPort01]> { + let Latency = 3; + let NumMicroOps = 1; + let ResourceCycles = [1]; +} +def: InstRW<[BWWriteResGroup29], (instregex "MULPDrr")>; +def: InstRW<[BWWriteResGroup29], (instregex "MULPSrr")>; +def: InstRW<[BWWriteResGroup29], (instregex "MULSDrr")>; +def: InstRW<[BWWriteResGroup29], (instregex "MULSSrr")>; +def: InstRW<[BWWriteResGroup29], (instregex "VMULPDYrr")>; +def: InstRW<[BWWriteResGroup29], (instregex "VMULPDrr")>; +def: InstRW<[BWWriteResGroup29], (instregex "VMULPSYrr")>; +def: InstRW<[BWWriteResGroup29], (instregex "VMULPSrr")>; +def: InstRW<[BWWriteResGroup29], (instregex "VMULSDrr")>; +def: InstRW<[BWWriteResGroup29], (instregex "VMULSSrr")>; + +def BWWriteResGroup30 : SchedWriteRes<[BWPort0156]> { + let Latency = 3; + let NumMicroOps = 3; + let ResourceCycles = [3]; +} +def: InstRW<[BWWriteResGroup30], (instregex "XADD(16|32|64)rr")>; +def: InstRW<[BWWriteResGroup30], (instregex "XADD8rr")>; +def: InstRW<[BWWriteResGroup30], (instregex "XCHG8rr")>; + +def BWWriteResGroup31 : SchedWriteRes<[BWPort0,BWPort5]> { + let Latency = 3; + let NumMicroOps = 3; + let ResourceCycles = [2,1]; +} +def: InstRW<[BWWriteResGroup31], (instregex "VPSLLVDYrr")>; +def: InstRW<[BWWriteResGroup31], (instregex "VPSLLVDrr")>; +def: InstRW<[BWWriteResGroup31], (instregex "VPSRAVDYrr")>; +def: InstRW<[BWWriteResGroup31], (instregex "VPSRAVDrr")>; +def: InstRW<[BWWriteResGroup31], (instregex "VPSRLVDYrr")>; +def: InstRW<[BWWriteResGroup31], (instregex "VPSRLVDrr")>; + +def BWWriteResGroup32 : SchedWriteRes<[BWPort5,BWPort15]> { + let Latency = 3; + let NumMicroOps = 3; + let ResourceCycles = [2,1]; +} +def: InstRW<[BWWriteResGroup32], (instregex "MMX_PHADDSWrr64")>; +def: InstRW<[BWWriteResGroup32], (instregex "MMX_PHADDWrr64")>; +def: InstRW<[BWWriteResGroup32], (instregex "MMX_PHADDrr64")>; +def: InstRW<[BWWriteResGroup32], (instregex "MMX_PHSUBDrr64")>; +def: InstRW<[BWWriteResGroup32], (instregex "MMX_PHSUBSWrr64")>; +def: InstRW<[BWWriteResGroup32], (instregex "MMX_PHSUBWrr64")>; +def: InstRW<[BWWriteResGroup32], (instregex "PHADDDrr")>; +def: InstRW<[BWWriteResGroup32], (instregex "PHADDSWrr128")>; +def: InstRW<[BWWriteResGroup32], (instregex "PHADDWrr")>; +def: InstRW<[BWWriteResGroup32], (instregex "PHSUBDrr")>; +def: InstRW<[BWWriteResGroup32], (instregex "PHSUBSWrr128")>; +def: InstRW<[BWWriteResGroup32], (instregex "PHSUBWrr")>; +def: InstRW<[BWWriteResGroup32], (instregex "VPHADDDYrr")>; +def: InstRW<[BWWriteResGroup32], (instregex "VPHADDDrr")>; +def: InstRW<[BWWriteResGroup32], (instregex "VPHADDSWrr128")>; +def: InstRW<[BWWriteResGroup32], (instregex "VPHADDSWrr256")>; +def: InstRW<[BWWriteResGroup32], (instregex "VPHADDWYrr")>; +def: InstRW<[BWWriteResGroup32], (instregex "VPHADDWrr")>; +def: InstRW<[BWWriteResGroup32], (instregex "VPHSUBDYrr")>; +def: InstRW<[BWWriteResGroup32], (instregex "VPHSUBDrr")>; +def: InstRW<[BWWriteResGroup32], (instregex "VPHSUBSWrr128")>; +def: InstRW<[BWWriteResGroup32], (instregex "VPHSUBSWrr256")>; +def: InstRW<[BWWriteResGroup32], (instregex "VPHSUBWYrr")>; +def: InstRW<[BWWriteResGroup32], (instregex "VPHSUBWrr")>; + +def BWWriteResGroup33 : SchedWriteRes<[BWPort5,BWPort0156]> { + let Latency = 3; + let NumMicroOps = 3; + let ResourceCycles = [2,1]; +} +def: InstRW<[BWWriteResGroup33], (instregex "MMX_PACKSSDWirr")>; +def: InstRW<[BWWriteResGroup33], (instregex "MMX_PACKSSWBirr")>; +def: InstRW<[BWWriteResGroup33], (instregex "MMX_PACKUSWBirr")>; + +def BWWriteResGroup34 : SchedWriteRes<[BWPort6,BWPort0156]> { + let Latency = 3; + let NumMicroOps = 3; + let ResourceCycles = [1,2]; +} +def: InstRW<[BWWriteResGroup34], (instregex "CLD")>; + +def BWWriteResGroup35 : SchedWriteRes<[BWPort06,BWPort0156]> { + let Latency = 3; + let NumMicroOps = 3; + let ResourceCycles = [1,2]; +} +def: InstRW<[BWWriteResGroup35], (instregex "RCL(16|32|64)r1")>; +def: InstRW<[BWWriteResGroup35], (instregex "RCL(16|32|64)ri")>; +def: InstRW<[BWWriteResGroup35], (instregex "RCL8r1")>; +def: InstRW<[BWWriteResGroup35], (instregex "RCL8ri")>; +def: InstRW<[BWWriteResGroup35], (instregex "RCR(16|32|64)r1")>; +def: InstRW<[BWWriteResGroup35], (instregex "RCR(16|32|64)ri")>; +def: InstRW<[BWWriteResGroup35], (instregex "RCR8r1")>; +def: InstRW<[BWWriteResGroup35], (instregex "RCR8ri")>; + +def BWWriteResGroup36 : SchedWriteRes<[BWPort06,BWPort0156]> { + let Latency = 3; + let NumMicroOps = 3; + let ResourceCycles = [2,1]; +} +def: InstRW<[BWWriteResGroup36], (instregex "ROL(16|32|64)rCL")>; +def: InstRW<[BWWriteResGroup36], (instregex "ROL8rCL")>; +def: InstRW<[BWWriteResGroup36], (instregex "ROR(16|32|64)rCL")>; +def: InstRW<[BWWriteResGroup36], (instregex "ROR8rCL")>; +def: InstRW<[BWWriteResGroup36], (instregex "SAR(16|32|64)rCL")>; +def: InstRW<[BWWriteResGroup36], (instregex "SAR8rCL")>; +def: InstRW<[BWWriteResGroup36], (instregex "SHL(16|32|64)rCL")>; +def: InstRW<[BWWriteResGroup36], (instregex "SHL8rCL")>; +def: InstRW<[BWWriteResGroup36], (instregex "SHR(16|32|64)rCL")>; +def: InstRW<[BWWriteResGroup36], (instregex "SHR8rCL")>; + +def BWWriteResGroup37 : SchedWriteRes<[BWPort4,BWPort6,BWPort237,BWPort0156]> { + let Latency = 3; + let NumMicroOps = 4; + let ResourceCycles = [1,1,1,1]; +} +def: InstRW<[BWWriteResGroup37], (instregex "CALL(16|32|64)r")>; + +def BWWriteResGroup38 : SchedWriteRes<[BWPort4,BWPort237,BWPort06,BWPort0156]> { + let Latency = 3; + let NumMicroOps = 4; + let ResourceCycles = [1,1,1,1]; +} +def: InstRW<[BWWriteResGroup38], (instregex "CALL64pcrel32")>; +def: InstRW<[BWWriteResGroup38], (instregex "SETAm")>; +def: InstRW<[BWWriteResGroup38], (instregex "SETBEm")>; + +def BWWriteResGroup39 : SchedWriteRes<[BWPort0,BWPort1]> { + let Latency = 4; + let NumMicroOps = 2; + let ResourceCycles = [1,1]; +} +def: InstRW<[BWWriteResGroup39], (instregex "CVTSD2SI64rr")>; +def: InstRW<[BWWriteResGroup39], (instregex "CVTSD2SIrr")>; +def: InstRW<[BWWriteResGroup39], (instregex "CVTSS2SI64rr")>; +def: InstRW<[BWWriteResGroup39], (instregex "CVTSS2SIrr")>; +def: InstRW<[BWWriteResGroup39], (instregex "CVTTSD2SI64rr")>; +def: InstRW<[BWWriteResGroup39], (instregex "CVTTSD2SIrr")>; +def: InstRW<[BWWriteResGroup39], (instregex "CVTTSS2SI64rr")>; +def: InstRW<[BWWriteResGroup39], (instregex "CVTTSS2SIrr")>; +def: InstRW<[BWWriteResGroup39], (instregex "VCVTSD2SI64rr")>; +def: InstRW<[BWWriteResGroup39], (instregex "VCVTSD2SIrr")>; +def: InstRW<[BWWriteResGroup39], (instregex "VCVTSS2SI64rr")>; +def: InstRW<[BWWriteResGroup39], (instregex "VCVTSS2SIrr")>; +def: InstRW<[BWWriteResGroup39], (instregex "VCVTTSD2SI64rr")>; +def: InstRW<[BWWriteResGroup39], (instregex "VCVTTSD2SIrr")>; +def: InstRW<[BWWriteResGroup39], (instregex "VCVTTSS2SI64rr")>; +def: InstRW<[BWWriteResGroup39], (instregex "VCVTTSS2SIrr")>; + +def BWWriteResGroup40 : SchedWriteRes<[BWPort0,BWPort5]> { + let Latency = 4; + let NumMicroOps = 2; + let ResourceCycles = [1,1]; +} +def: InstRW<[BWWriteResGroup40], (instregex "VCVTPS2PDYrr")>; +def: InstRW<[BWWriteResGroup40], (instregex "VPSLLDYrr")>; +def: InstRW<[BWWriteResGroup40], (instregex "VPSLLQYrr")>; +def: InstRW<[BWWriteResGroup40], (instregex "VPSLLWYrr")>; +def: InstRW<[BWWriteResGroup40], (instregex "VPSRADYrr")>; +def: InstRW<[BWWriteResGroup40], (instregex "VPSRAWYrr")>; +def: InstRW<[BWWriteResGroup40], (instregex "VPSRLDYrr")>; +def: InstRW<[BWWriteResGroup40], (instregex "VPSRLQYrr")>; +def: InstRW<[BWWriteResGroup40], (instregex "VPSRLWYrr")>; +def: InstRW<[BWWriteResGroup40], (instregex "VPTESTYrr")>; + +def BWWriteResGroup41 : SchedWriteRes<[BWPort0,BWPort0156]> { + let Latency = 4; + let NumMicroOps = 2; + let ResourceCycles = [1,1]; +} +def: InstRW<[BWWriteResGroup41], (instregex "FNSTSW16r")>; + +def BWWriteResGroup42 : SchedWriteRes<[BWPort1,BWPort5]> { + let Latency = 4; + let NumMicroOps = 2; + let ResourceCycles = [1,1]; +} +def: InstRW<[BWWriteResGroup42], (instregex "CVTDQ2PDrr")>; +def: InstRW<[BWWriteResGroup42], (instregex "CVTPD2DQrr")>; +def: InstRW<[BWWriteResGroup42], (instregex "CVTPD2PSrr")>; +def: InstRW<[BWWriteResGroup42], (instregex "CVTSD2SSrr")>; +def: InstRW<[BWWriteResGroup42], (instregex "CVTSI2SD64rr")>; +def: InstRW<[BWWriteResGroup42], (instregex "CVTSI2SDrr")>; +def: InstRW<[BWWriteResGroup42], (instregex "CVTSI2SSrr")>; +def: InstRW<[BWWriteResGroup42], (instregex "CVTTPD2DQrr")>; +def: InstRW<[BWWriteResGroup42], (instregex "IMUL(32|64)r")>; +def: InstRW<[BWWriteResGroup42], (instregex "MMX_CVTPD2PIirr")>; +def: InstRW<[BWWriteResGroup42], (instregex "MMX_CVTPI2PDirr")>; +def: InstRW<[BWWriteResGroup42], (instregex "MMX_CVTPS2PIirr")>; +def: InstRW<[BWWriteResGroup42], (instregex "MMX_CVTTPD2PIirr")>; +def: InstRW<[BWWriteResGroup42], (instregex "MMX_CVTTPS2PIirr")>; +def: InstRW<[BWWriteResGroup42], (instregex "MUL(32|64)r")>; +def: InstRW<[BWWriteResGroup42], (instregex "MULX64rr")>; +def: InstRW<[BWWriteResGroup42], (instregex "VCVTDQ2PDrr")>; +def: InstRW<[BWWriteResGroup42], (instregex "VCVTPD2DQrr")>; +def: InstRW<[BWWriteResGroup42], (instregex "VCVTPD2PSrr")>; +def: InstRW<[BWWriteResGroup42], (instregex "VCVTPS2PHrr")>; +def: InstRW<[BWWriteResGroup42], (instregex "VCVTSD2SSrr")>; +def: InstRW<[BWWriteResGroup42], (instregex "VCVTSI2SD64rr")>; +def: InstRW<[BWWriteResGroup42], (instregex "VCVTSI2SDrr")>; +def: InstRW<[BWWriteResGroup42], (instregex "VCVTSI2SSrr")>; +def: InstRW<[BWWriteResGroup42], (instregex "VCVTTPD2DQrr")>; + +def BWWriteResGroup42_16 : SchedWriteRes<[BWPort1,BWPort06,BWPort0156]> { + let Latency = 4; + let NumMicroOps = 4; +} +def: InstRW<[BWWriteResGroup42_16], (instregex "IMUL16r")>; +def: InstRW<[BWWriteResGroup42_16], (instregex "MUL16r")>; + +def BWWriteResGroup43 : SchedWriteRes<[BWPort0,BWPort4,BWPort237]> { + let Latency = 4; + let NumMicroOps = 3; + let ResourceCycles = [1,1,1]; +} +def: InstRW<[BWWriteResGroup43], (instregex "FNSTSWm")>; + +def BWWriteResGroup44 : SchedWriteRes<[BWPort1,BWPort4,BWPort237]> { + let Latency = 4; + let NumMicroOps = 3; + let ResourceCycles = [1,1,1]; +} +def: InstRW<[BWWriteResGroup44], (instregex "ISTT_FP16m")>; +def: InstRW<[BWWriteResGroup44], (instregex "ISTT_FP32m")>; +def: InstRW<[BWWriteResGroup44], (instregex "ISTT_FP64m")>; +def: InstRW<[BWWriteResGroup44], (instregex "IST_F16m")>; +def: InstRW<[BWWriteResGroup44], (instregex "IST_F32m")>; +def: InstRW<[BWWriteResGroup44], (instregex "IST_FP16m")>; +def: InstRW<[BWWriteResGroup44], (instregex "IST_FP32m")>; +def: InstRW<[BWWriteResGroup44], (instregex "IST_FP64m")>; +def: InstRW<[BWWriteResGroup44], (instregex "VCVTPS2PHYmr")>; +def: InstRW<[BWWriteResGroup44], (instregex "VCVTPS2PHmr")>; + +def BWWriteResGroup45 : SchedWriteRes<[BWPort0156]> { + let Latency = 4; + let NumMicroOps = 4; + let ResourceCycles = [4]; +} +def: InstRW<[BWWriteResGroup45], (instregex "FNCLEX")>; + +def BWWriteResGroup46 : SchedWriteRes<[BWPort015,BWPort0156]> { + let Latency = 4; + let NumMicroOps = 4; + let ResourceCycles = [1,3]; +} +def: InstRW<[BWWriteResGroup46], (instregex "VZEROUPPER")>; + +def BWWriteResGroup47 : SchedWriteRes<[BWPort0]> { + let Latency = 5; + let NumMicroOps = 1; + let ResourceCycles = [1]; +} +def: InstRW<[BWWriteResGroup47], (instregex "MMX_PMADDUBSWrr64")>; +def: InstRW<[BWWriteResGroup47], (instregex "MMX_PMADDWDirr")>; +def: InstRW<[BWWriteResGroup47], (instregex "MMX_PMULHRSWrr64")>; +def: InstRW<[BWWriteResGroup47], (instregex "MMX_PMULHUWirr")>; +def: InstRW<[BWWriteResGroup47], (instregex "MMX_PMULHWirr")>; +def: InstRW<[BWWriteResGroup47], (instregex "MMX_PMULLWirr")>; +def: InstRW<[BWWriteResGroup47], (instregex "MMX_PMULUDQirr")>; +def: InstRW<[BWWriteResGroup47], (instregex "MMX_PSADBWirr")>; +def: InstRW<[BWWriteResGroup47], (instregex "MUL_FPrST0")>; +def: InstRW<[BWWriteResGroup47], (instregex "MUL_FST0r")>; +def: InstRW<[BWWriteResGroup47], (instregex "MUL_FrST0")>; +def: InstRW<[BWWriteResGroup47], (instregex "PCLMULQDQrr")>; +def: InstRW<[BWWriteResGroup47], (instregex "PCMPGTQrr")>; +def: InstRW<[BWWriteResGroup47], (instregex "PHMINPOSUWrr128")>; +def: InstRW<[BWWriteResGroup47], (instregex "PMADDUBSWrr")>; +def: InstRW<[BWWriteResGroup47], (instregex "PMADDWDrr")>; +def: InstRW<[BWWriteResGroup47], (instregex "PMULDQrr")>; +def: InstRW<[BWWriteResGroup47], (instregex "PMULHRSWrr")>; +def: InstRW<[BWWriteResGroup47], (instregex "PMULHUWrr")>; +def: InstRW<[BWWriteResGroup47], (instregex "PMULHWrr")>; +def: InstRW<[BWWriteResGroup47], (instregex "PMULLWrr")>; +def: InstRW<[BWWriteResGroup47], (instregex "PMULUDQrr")>; +def: InstRW<[BWWriteResGroup47], (instregex "PSADBWrr")>; +def: InstRW<[BWWriteResGroup47], (instregex "RCPPSr")>; +def: InstRW<[BWWriteResGroup47], (instregex "RCPSSr")>; +def: InstRW<[BWWriteResGroup47], (instregex "RSQRTPSr")>; +def: InstRW<[BWWriteResGroup47], (instregex "RSQRTSSr")>; +def: InstRW<[BWWriteResGroup47], (instregex "VPCLMULQDQrr")>; +def: InstRW<[BWWriteResGroup47], (instregex "VPCMPGTQYrr")>; +def: InstRW<[BWWriteResGroup47], (instregex "VPCMPGTQrr")>; +def: InstRW<[BWWriteResGroup47], (instregex "VPHMINPOSUWrr128")>; +def: InstRW<[BWWriteResGroup47], (instregex "VPMADDUBSWYrr")>; +def: InstRW<[BWWriteResGroup47], (instregex "VPMADDUBSWrr")>; +def: InstRW<[BWWriteResGroup47], (instregex "VPMADDWDYrr")>; +def: InstRW<[BWWriteResGroup47], (instregex "VPMADDWDrr")>; +def: InstRW<[BWWriteResGroup47], (instregex "VPMULDQYrr")>; +def: InstRW<[BWWriteResGroup47], (instregex "VPMULDQrr")>; +def: InstRW<[BWWriteResGroup47], (instregex "VPMULHRSWYrr")>; +def: InstRW<[BWWriteResGroup47], (instregex "VPMULHRSWrr")>; +def: InstRW<[BWWriteResGroup47], (instregex "VPMULHUWYrr")>; +def: InstRW<[BWWriteResGroup47], (instregex "VPMULHUWrr")>; +def: InstRW<[BWWriteResGroup47], (instregex "VPMULHWYrr")>; +def: InstRW<[BWWriteResGroup47], (instregex "VPMULHWrr")>; +def: InstRW<[BWWriteResGroup47], (instregex "VPMULLWYrr")>; +def: InstRW<[BWWriteResGroup47], (instregex "VPMULLWrr")>; +def: InstRW<[BWWriteResGroup47], (instregex "VPMULUDQYrr")>; +def: InstRW<[BWWriteResGroup47], (instregex "VPMULUDQrr")>; +def: InstRW<[BWWriteResGroup47], (instregex "VPSADBWYrr")>; +def: InstRW<[BWWriteResGroup47], (instregex "VPSADBWrr")>; +def: InstRW<[BWWriteResGroup47], (instregex "VRCPPSr")>; +def: InstRW<[BWWriteResGroup47], (instregex "VRCPSSr")>; +def: InstRW<[BWWriteResGroup47], (instregex "VRSQRTPSr")>; +def: InstRW<[BWWriteResGroup47], (instregex "VRSQRTSSr")>; + +def BWWriteResGroup48 : SchedWriteRes<[BWPort01]> { + let Latency = 5; + let NumMicroOps = 1; + let ResourceCycles = [1]; +} +def: InstRW<[BWWriteResGroup48], (instregex "VFMADD132PDYr")>; +def: InstRW<[BWWriteResGroup48], (instregex "VFMADD132PDr")>; +def: InstRW<[BWWriteResGroup48], (instregex "VFMADD132PSYr")>; +def: InstRW<[BWWriteResGroup48], (instregex "VFMADD132PSr")>; +def: InstRW<[BWWriteResGroup48], (instregex "VFMADD132SDr")>; +def: InstRW<[BWWriteResGroup48], (instregex "VFMADD132SSr")>; +def: InstRW<[BWWriteResGroup48], (instregex "VFMADD213PDYr")>; +def: InstRW<[BWWriteResGroup48], (instregex "VFMADD213PDr")>; +def: InstRW<[BWWriteResGroup48], (instregex "VFMADD213PSYr")>; +def: InstRW<[BWWriteResGroup48], (instregex "VFMADD213PSr")>; +def: InstRW<[BWWriteResGroup48], (instregex "VFMADD213SDr")>; +def: InstRW<[BWWriteResGroup48], (instregex "VFMADD213SSr")>; +def: InstRW<[BWWriteResGroup48], (instregex "VFMADD231PDYr")>; +def: InstRW<[BWWriteResGroup48], (instregex "VFMADD231PDr")>; +def: InstRW<[BWWriteResGroup48], (instregex "VFMADD231PSYr")>; +def: InstRW<[BWWriteResGroup48], (instregex "VFMADD231PSr")>; +def: InstRW<[BWWriteResGroup48], (instregex "VFMADD231SDr")>; +def: InstRW<[BWWriteResGroup48], (instregex "VFMADD231SSr")>; +def: InstRW<[BWWriteResGroup48], (instregex "VFMADDSUB132PDYr")>; +def: InstRW<[BWWriteResGroup48], (instregex "VFMADDSUB132PDr")>; +def: InstRW<[BWWriteResGroup48], (instregex "VFMADDSUB132PSYr")>; +def: InstRW<[BWWriteResGroup48], (instregex "VFMADDSUB132PSr")>; +def: InstRW<[BWWriteResGroup48], (instregex "VFMADDSUB213PDYr")>; +def: InstRW<[BWWriteResGroup48], (instregex "VFMADDSUB213PDr")>; +def: InstRW<[BWWriteResGroup48], (instregex "VFMADDSUB213PSYr")>; +def: InstRW<[BWWriteResGroup48], (instregex "VFMADDSUB213PSr")>; +def: InstRW<[BWWriteResGroup48], (instregex "VFMADDSUB231PDYr")>; +def: InstRW<[BWWriteResGroup48], (instregex "VFMADDSUB231PDr")>; +def: InstRW<[BWWriteResGroup48], (instregex "VFMADDSUB231PSYr")>; +def: InstRW<[BWWriteResGroup48], (instregex "VFMADDSUB231PSr")>; +def: InstRW<[BWWriteResGroup48], (instregex "VFMSUB132PDYr")>; +def: InstRW<[BWWriteResGroup48], (instregex "VFMSUB132PDr")>; +def: InstRW<[BWWriteResGroup48], (instregex "VFMSUB132PSYr")>; +def: InstRW<[BWWriteResGroup48], (instregex "VFMSUB132PSr")>; +def: InstRW<[BWWriteResGroup48], (instregex "VFMSUB132SDr")>; +def: InstRW<[BWWriteResGroup48], (instregex "VFMSUB132SSr")>; +def: InstRW<[BWWriteResGroup48], (instregex "VFMSUB213PDYr")>; +def: InstRW<[BWWriteResGroup48], (instregex "VFMSUB213PDr")>; +def: InstRW<[BWWriteResGroup48], (instregex "VFMSUB213PSYr")>; +def: InstRW<[BWWriteResGroup48], (instregex "VFMSUB213PSr")>; +def: InstRW<[BWWriteResGroup48], (instregex "VFMSUB213SDr")>; +def: InstRW<[BWWriteResGroup48], (instregex "VFMSUB213SSr")>; +def: InstRW<[BWWriteResGroup48], (instregex "VFMSUB231PDYr")>; +def: InstRW<[BWWriteResGroup48], (instregex "VFMSUB231PDr")>; +def: InstRW<[BWWriteResGroup48], (instregex "VFMSUB231PSYr")>; +def: InstRW<[BWWriteResGroup48], (instregex "VFMSUB231PSr")>; +def: InstRW<[BWWriteResGroup48], (instregex "VFMSUB231SDr")>; +def: InstRW<[BWWriteResGroup48], (instregex "VFMSUB231SSr")>; +def: InstRW<[BWWriteResGroup48], (instregex "VFMSUBADD132PDYr")>; +def: InstRW<[BWWriteResGroup48], (instregex "VFMSUBADD132PDr")>; +def: InstRW<[BWWriteResGroup48], (instregex "VFMSUBADD132PSYr")>; +def: InstRW<[BWWriteResGroup48], (instregex "VFMSUBADD132PSr")>; +def: InstRW<[BWWriteResGroup48], (instregex "VFMSUBADD213PDYr")>; +def: InstRW<[BWWriteResGroup48], (instregex "VFMSUBADD213PDr")>; +def: InstRW<[BWWriteResGroup48], (instregex "VFMSUBADD213PSYr")>; +def: InstRW<[BWWriteResGroup48], (instregex "VFMSUBADD213PSr")>; +def: InstRW<[BWWriteResGroup48], (instregex "VFMSUBADD231PDYr")>; +def: InstRW<[BWWriteResGroup48], (instregex "VFMSUBADD231PDr")>; +def: InstRW<[BWWriteResGroup48], (instregex "VFMSUBADD231PSYr")>; +def: InstRW<[BWWriteResGroup48], (instregex "VFMSUBADD231PSr")>; +def: InstRW<[BWWriteResGroup48], (instregex "VFNMADD132PDYr")>; +def: InstRW<[BWWriteResGroup48], (instregex "VFNMADD132PDr")>; +def: InstRW<[BWWriteResGroup48], (instregex "VFNMADD132PSYr")>; +def: InstRW<[BWWriteResGroup48], (instregex "VFNMADD132PSr")>; +def: InstRW<[BWWriteResGroup48], (instregex "VFNMADD132SDr")>; +def: InstRW<[BWWriteResGroup48], (instregex "VFNMADD132SSr")>; +def: InstRW<[BWWriteResGroup48], (instregex "VFNMADD213PDYr")>; +def: InstRW<[BWWriteResGroup48], (instregex "VFNMADD213PDr")>; +def: InstRW<[BWWriteResGroup48], (instregex "VFNMADD213PSYr")>; +def: InstRW<[BWWriteResGroup48], (instregex "VFNMADD213PSr")>; +def: InstRW<[BWWriteResGroup48], (instregex "VFNMADD213SDr")>; +def: InstRW<[BWWriteResGroup48], (instregex "VFNMADD213SSr")>; +def: InstRW<[BWWriteResGroup48], (instregex "VFNMADD231PDYr")>; +def: InstRW<[BWWriteResGroup48], (instregex "VFNMADD231PDr")>; +def: InstRW<[BWWriteResGroup48], (instregex "VFNMADD231PSYr")>; +def: InstRW<[BWWriteResGroup48], (instregex "VFNMADD231PSr")>; +def: InstRW<[BWWriteResGroup48], (instregex "VFNMADD231SDr")>; +def: InstRW<[BWWriteResGroup48], (instregex "VFNMADD231SSr")>; +def: InstRW<[BWWriteResGroup48], (instregex "VFNMSUB132PDYr")>; +def: InstRW<[BWWriteResGroup48], (instregex "VFNMSUB132PDr")>; +def: InstRW<[BWWriteResGroup48], (instregex "VFNMSUB132PSYr")>; +def: InstRW<[BWWriteResGroup48], (instregex "VFNMSUB132PSr")>; +def: InstRW<[BWWriteResGroup48], (instregex "VFNMSUB132SDr")>; +def: InstRW<[BWWriteResGroup48], (instregex "VFNMSUB132SSr")>; +def: InstRW<[BWWriteResGroup48], (instregex "VFNMSUB213PDYr")>; +def: InstRW<[BWWriteResGroup48], (instregex "VFNMSUB213PDr")>; +def: InstRW<[BWWriteResGroup48], (instregex "VFNMSUB213PSYr")>; +def: InstRW<[BWWriteResGroup48], (instregex "VFNMSUB213PSr")>; +def: InstRW<[BWWriteResGroup48], (instregex "VFNMSUB213SDr")>; +def: InstRW<[BWWriteResGroup48], (instregex "VFNMSUB213SSr")>; +def: InstRW<[BWWriteResGroup48], (instregex "VFNMSUB231PDYr")>; +def: InstRW<[BWWriteResGroup48], (instregex "VFNMSUB231PDr")>; +def: InstRW<[BWWriteResGroup48], (instregex "VFNMSUB231PSYr")>; +def: InstRW<[BWWriteResGroup48], (instregex "VFNMSUB231PSr")>; +def: InstRW<[BWWriteResGroup48], (instregex "VFNMSUB231SDr")>; +def: InstRW<[BWWriteResGroup48], (instregex "VFNMSUB231SSr")>; + +def BWWriteResGroup49 : SchedWriteRes<[BWPort23]> { + let Latency = 5; + let NumMicroOps = 1; + let ResourceCycles = [1]; +} +def: InstRW<[BWWriteResGroup49], (instregex "LDDQUrm")>; +def: InstRW<[BWWriteResGroup49], (instregex "MMX_MOVD64from64rm")>; +def: InstRW<[BWWriteResGroup49], (instregex "MMX_MOVD64rm")>; +def: InstRW<[BWWriteResGroup49], (instregex "MMX_MOVD64to64rm")>; +def: InstRW<[BWWriteResGroup49], (instregex "MMX_MOVQ64rm")>; +def: InstRW<[BWWriteResGroup49], (instregex "MOV(16|32|64)rm")>; +def: InstRW<[BWWriteResGroup49], (instregex "MOV64toPQIrm")>; +def: InstRW<[BWWriteResGroup49], (instregex "MOV8rm")>; +def: InstRW<[BWWriteResGroup49], (instregex "MOVAPDrm")>; +def: InstRW<[BWWriteResGroup49], (instregex "MOVAPSrm")>; +def: InstRW<[BWWriteResGroup49], (instregex "MOVDDUPrm")>; +def: InstRW<[BWWriteResGroup49], (instregex "MOVDI2PDIrm")>; +def: InstRW<[BWWriteResGroup49], (instregex "MOVDQArm")>; +def: InstRW<[BWWriteResGroup49], (instregex "MOVDQUrm")>; +def: InstRW<[BWWriteResGroup49], (instregex "MOVNTDQArm")>; +def: InstRW<[BWWriteResGroup49], (instregex "MOVSHDUPrm")>; +def: InstRW<[BWWriteResGroup49], (instregex "MOVSLDUPrm")>; +def: InstRW<[BWWriteResGroup49], (instregex "MOVSSrm")>; +def: InstRW<[BWWriteResGroup49], (instregex "MOVSX(16|32|64)rm16")>; +def: InstRW<[BWWriteResGroup49], (instregex "MOVSX(16|32|64)rm32")>; +def: InstRW<[BWWriteResGroup49], (instregex "MOVSX(16|32|64)rm8")>; +def: InstRW<[BWWriteResGroup49], (instregex "MOVUPDrm")>; +def: InstRW<[BWWriteResGroup49], (instregex "MOVUPSrm")>; +def: InstRW<[BWWriteResGroup49], (instregex "MOVZX(16|32|64)rm16")>; +def: InstRW<[BWWriteResGroup49], (instregex "MOVZX(16|32|64)rm8")>; +def: InstRW<[BWWriteResGroup49], (instregex "PREFETCHNTA")>; +def: InstRW<[BWWriteResGroup49], (instregex "PREFETCHT0")>; +def: InstRW<[BWWriteResGroup49], (instregex "PREFETCHT1")>; +def: InstRW<[BWWriteResGroup49], (instregex "PREFETCHT2")>; +def: InstRW<[BWWriteResGroup49], (instregex "VBROADCASTSSrm")>; +def: InstRW<[BWWriteResGroup49], (instregex "VLDDQUrm")>; +def: InstRW<[BWWriteResGroup49], (instregex "VMOV64toPQIrm")>; +def: InstRW<[BWWriteResGroup49], (instregex "VMOVAPDrm")>; +def: InstRW<[BWWriteResGroup49], (instregex "VMOVAPSrm")>; +def: InstRW<[BWWriteResGroup49], (instregex "VMOVDDUPrm")>; +def: InstRW<[BWWriteResGroup49], (instregex "VMOVDI2PDIrm")>; +def: InstRW<[BWWriteResGroup49], (instregex "VMOVDQArm")>; +def: InstRW<[BWWriteResGroup49], (instregex "VMOVDQUrm")>; +def: InstRW<[BWWriteResGroup49], (instregex "VMOVNTDQArm")>; +def: InstRW<[BWWriteResGroup49], (instregex "VMOVQI2PQIrm")>; +def: InstRW<[BWWriteResGroup49], (instregex "VMOVSDrm")>; +def: InstRW<[BWWriteResGroup49], (instregex "VMOVSHDUPrm")>; +def: InstRW<[BWWriteResGroup49], (instregex "VMOVSLDUPrm")>; +def: InstRW<[BWWriteResGroup49], (instregex "VMOVSSrm")>; +def: InstRW<[BWWriteResGroup49], (instregex "VMOVUPDrm")>; +def: InstRW<[BWWriteResGroup49], (instregex "VMOVUPSrm")>; +def: InstRW<[BWWriteResGroup49], (instregex "VPBROADCASTDrm")>; +def: InstRW<[BWWriteResGroup49], (instregex "VPBROADCASTQrm")>; + +def BWWriteResGroup50 : SchedWriteRes<[BWPort1,BWPort5]> { + let Latency = 5; + let NumMicroOps = 3; + let ResourceCycles = [1,2]; +} +def: InstRW<[BWWriteResGroup50], (instregex "CVTSI2SS64rr")>; +def: InstRW<[BWWriteResGroup50], (instregex "HADDPDrr")>; +def: InstRW<[BWWriteResGroup50], (instregex "HADDPSrr")>; +def: InstRW<[BWWriteResGroup50], (instregex "HSUBPDrr")>; +def: InstRW<[BWWriteResGroup50], (instregex "HSUBPSrr")>; +def: InstRW<[BWWriteResGroup50], (instregex "VCVTSI2SS64rr")>; +def: InstRW<[BWWriteResGroup50], (instregex "VHADDPDYrr")>; +def: InstRW<[BWWriteResGroup50], (instregex "VHADDPDrr")>; +def: InstRW<[BWWriteResGroup50], (instregex "VHADDPSYrr")>; +def: InstRW<[BWWriteResGroup50], (instregex "VHADDPSrr")>; +def: InstRW<[BWWriteResGroup50], (instregex "VHSUBPDYrr")>; +def: InstRW<[BWWriteResGroup50], (instregex "VHSUBPDrr")>; +def: InstRW<[BWWriteResGroup50], (instregex "VHSUBPSYrr")>; +def: InstRW<[BWWriteResGroup50], (instregex "VHSUBPSrr")>; + +def BWWriteResGroup51 : SchedWriteRes<[BWPort1,BWPort6,BWPort06]> { + let Latency = 5; + let NumMicroOps = 3; + let ResourceCycles = [1,1,1]; +} +def: InstRW<[BWWriteResGroup51], (instregex "STR(16|32|64)r")>; + +def BWWriteResGroup52 : SchedWriteRes<[BWPort1,BWPort06,BWPort0156]> { + let Latency = 5; + let NumMicroOps = 3; + let ResourceCycles = [1,1,1]; +} +def: InstRW<[BWWriteResGroup52], (instregex "MULX32rr")>; + +def BWWriteResGroup53 : SchedWriteRes<[BWPort0,BWPort4,BWPort237,BWPort15]> { + let Latency = 5; + let NumMicroOps = 4; + let ResourceCycles = [1,1,1,1]; +} +def: InstRW<[BWWriteResGroup53], (instregex "VMASKMOVPDYmr")>; +def: InstRW<[BWWriteResGroup53], (instregex "VMASKMOVPDmr")>; +def: InstRW<[BWWriteResGroup53], (instregex "VMASKMOVPSYmr")>; +def: InstRW<[BWWriteResGroup53], (instregex "VMASKMOVPSmr")>; +def: InstRW<[BWWriteResGroup53], (instregex "VPMASKMOVDYmr")>; +def: InstRW<[BWWriteResGroup53], (instregex "VPMASKMOVDmr")>; +def: InstRW<[BWWriteResGroup53], (instregex "VPMASKMOVQYmr")>; +def: InstRW<[BWWriteResGroup53], (instregex "VPMASKMOVQmr")>; + +def BWWriteResGroup54 : SchedWriteRes<[BWPort6,BWPort0156]> { + let Latency = 5; + let NumMicroOps = 5; + let ResourceCycles = [1,4]; +} +def: InstRW<[BWWriteResGroup54], (instregex "PAUSE")>; + +def BWWriteResGroup55 : SchedWriteRes<[BWPort06,BWPort0156]> { + let Latency = 5; + let NumMicroOps = 5; + let ResourceCycles = [1,4]; +} +def: InstRW<[BWWriteResGroup55], (instregex "XSETBV")>; + +def BWWriteResGroup56 : SchedWriteRes<[BWPort06,BWPort0156]> { + let Latency = 5; + let NumMicroOps = 5; + let ResourceCycles = [2,3]; +} +def: InstRW<[BWWriteResGroup56], (instregex "CMPXCHG(16|32|64)rr")>; +def: InstRW<[BWWriteResGroup56], (instregex "CMPXCHG8rr")>; + +def BWWriteResGroup57 : SchedWriteRes<[BWPort4,BWPort237,BWPort0156]> { + let Latency = 5; + let NumMicroOps = 6; + let ResourceCycles = [1,1,4]; +} +def: InstRW<[BWWriteResGroup57], (instregex "PUSHF16")>; +def: InstRW<[BWWriteResGroup57], (instregex "PUSHF64")>; + +def BWWriteResGroup58 : SchedWriteRes<[BWPort23]> { + let Latency = 6; + let NumMicroOps = 1; + let ResourceCycles = [1]; +} +def: InstRW<[BWWriteResGroup58], (instregex "LD_F32m")>; +def: InstRW<[BWWriteResGroup58], (instregex "LD_F64m")>; +def: InstRW<[BWWriteResGroup58], (instregex "LD_F80m")>; +def: InstRW<[BWWriteResGroup58], (instregex "VBROADCASTF128")>; +def: InstRW<[BWWriteResGroup58], (instregex "VBROADCASTI128")>; +def: InstRW<[BWWriteResGroup58], (instregex "VBROADCASTSDYrm")>; +def: InstRW<[BWWriteResGroup58], (instregex "VBROADCASTSSYrm")>; +def: InstRW<[BWWriteResGroup58], (instregex "VLDDQUYrm")>; +def: InstRW<[BWWriteResGroup58], (instregex "VMOVAPDYrm")>; +def: InstRW<[BWWriteResGroup58], (instregex "VMOVAPSYrm")>; +def: InstRW<[BWWriteResGroup58], (instregex "VMOVDDUPYrm")>; +def: InstRW<[BWWriteResGroup58], (instregex "VMOVDQAYrm")>; +def: InstRW<[BWWriteResGroup58], (instregex "VMOVDQUYrm")>; +def: InstRW<[BWWriteResGroup58], (instregex "VMOVNTDQAYrm")>; +def: InstRW<[BWWriteResGroup58], (instregex "VMOVSHDUPYrm")>; +def: InstRW<[BWWriteResGroup58], (instregex "VMOVSLDUPYrm")>; +def: InstRW<[BWWriteResGroup58], (instregex "VMOVUPDYrm")>; +def: InstRW<[BWWriteResGroup58], (instregex "VMOVUPSYrm")>; +def: InstRW<[BWWriteResGroup58], (instregex "VPBROADCASTDYrm")>; +def: InstRW<[BWWriteResGroup58], (instregex "VPBROADCASTQYrm")>; +def: InstRW<[BWWriteResGroup58], (instregex "ROUNDPDr")>; +def: InstRW<[BWWriteResGroup58], (instregex "ROUNDPSr")>; +def: InstRW<[BWWriteResGroup58], (instregex "ROUNDSDr")>; +def: InstRW<[BWWriteResGroup58], (instregex "ROUNDSSr")>; +def: InstRW<[BWWriteResGroup58], (instregex "VROUNDPDr")>; +def: InstRW<[BWWriteResGroup58], (instregex "VROUNDPSr")>; +def: InstRW<[BWWriteResGroup58], (instregex "VROUNDSDr")>; +def: InstRW<[BWWriteResGroup58], (instregex "VROUNDSSr")>; +def: InstRW<[BWWriteResGroup58], (instregex "VROUNDYPDr")>; +def: InstRW<[BWWriteResGroup58], (instregex "VROUNDYPSr")>; + +def BWWriteResGroup59 : SchedWriteRes<[BWPort0,BWPort23]> { + let Latency = 6; + let NumMicroOps = 2; + let ResourceCycles = [1,1]; +} +def: InstRW<[BWWriteResGroup59], (instregex "CVTPS2PDrm")>; +def: InstRW<[BWWriteResGroup59], (instregex "CVTSS2SDrm")>; +def: InstRW<[BWWriteResGroup59], (instregex "MMX_PSLLDrm")>; +def: InstRW<[BWWriteResGroup59], (instregex "MMX_PSLLQrm")>; +def: InstRW<[BWWriteResGroup59], (instregex "MMX_PSLLWrm")>; +def: InstRW<[BWWriteResGroup59], (instregex "MMX_PSRADrm")>; +def: InstRW<[BWWriteResGroup59], (instregex "MMX_PSRAWrm")>; +def: InstRW<[BWWriteResGroup59], (instregex "MMX_PSRLDrm")>; +def: InstRW<[BWWriteResGroup59], (instregex "MMX_PSRLQrm")>; +def: InstRW<[BWWriteResGroup59], (instregex "MMX_PSRLWrm")>; +def: InstRW<[BWWriteResGroup59], (instregex "VCVTPH2PSYrm")>; +def: InstRW<[BWWriteResGroup59], (instregex "VCVTPH2PSrm")>; +def: InstRW<[BWWriteResGroup59], (instregex "VCVTPS2PDrm")>; +def: InstRW<[BWWriteResGroup59], (instregex "VCVTSS2SDrm")>; +def: InstRW<[BWWriteResGroup59], (instregex "VPSLLVQrm")>; +def: InstRW<[BWWriteResGroup59], (instregex "VPSRLVQrm")>; +def: InstRW<[BWWriteResGroup59], (instregex "VTESTPDrm")>; +def: InstRW<[BWWriteResGroup59], (instregex "VTESTPSrm")>; + +def BWWriteResGroup60 : SchedWriteRes<[BWPort1,BWPort5]> { + let Latency = 6; + let NumMicroOps = 2; + let ResourceCycles = [1,1]; +} +def: InstRW<[BWWriteResGroup60], (instregex "VCVTDQ2PDYrr")>; +def: InstRW<[BWWriteResGroup60], (instregex "VCVTPD2DQYrr")>; +def: InstRW<[BWWriteResGroup60], (instregex "VCVTPD2PSYrr")>; +def: InstRW<[BWWriteResGroup60], (instregex "VCVTPS2PHYrr")>; +def: InstRW<[BWWriteResGroup60], (instregex "VCVTTPD2DQYrr")>; + +def BWWriteResGroup61 : SchedWriteRes<[BWPort5,BWPort23]> { + let Latency = 6; + let NumMicroOps = 2; + let ResourceCycles = [1,1]; +} +def: InstRW<[BWWriteResGroup61], (instregex "ANDNPDrm")>; +def: InstRW<[BWWriteResGroup61], (instregex "ANDNPSrm")>; +def: InstRW<[BWWriteResGroup61], (instregex "ANDPDrm")>; +def: InstRW<[BWWriteResGroup61], (instregex "ANDPSrm")>; +def: InstRW<[BWWriteResGroup61], (instregex "INSERTPSrm")>; +def: InstRW<[BWWriteResGroup61], (instregex "MMX_PALIGNR64irm")>; +def: InstRW<[BWWriteResGroup61], (instregex "MMX_PINSRWirmi")>; +def: InstRW<[BWWriteResGroup61], (instregex "MMX_PSHUFBrm64")>; +def: InstRW<[BWWriteResGroup61], (instregex "MMX_PSHUFWmi")>; +def: InstRW<[BWWriteResGroup61], (instregex "MMX_PUNPCKHBWirm")>; +def: InstRW<[BWWriteResGroup61], (instregex "MMX_PUNPCKHDQirm")>; +def: InstRW<[BWWriteResGroup61], (instregex "MMX_PUNPCKHWDirm")>; +def: InstRW<[BWWriteResGroup61], (instregex "MMX_PUNPCKLBWirm")>; +def: InstRW<[BWWriteResGroup61], (instregex "MMX_PUNPCKLDQirm")>; +def: InstRW<[BWWriteResGroup61], (instregex "MMX_PUNPCKLWDirm")>; +def: InstRW<[BWWriteResGroup61], (instregex "MOVHPDrm")>; +def: InstRW<[BWWriteResGroup61], (instregex "MOVHPSrm")>; +def: InstRW<[BWWriteResGroup61], (instregex "MOVLPDrm")>; +def: InstRW<[BWWriteResGroup61], (instregex "MOVLPSrm")>; +def: InstRW<[BWWriteResGroup61], (instregex "ORPDrm")>; +def: InstRW<[BWWriteResGroup61], (instregex "ORPSrm")>; +def: InstRW<[BWWriteResGroup61], (instregex "PACKSSDWrm")>; +def: InstRW<[BWWriteResGroup61], (instregex "PACKSSWBrm")>; +def: InstRW<[BWWriteResGroup61], (instregex "PACKUSDWrm")>; +def: InstRW<[BWWriteResGroup61], (instregex "PACKUSWBrm")>; +def: InstRW<[BWWriteResGroup61], (instregex "PALIGNRrmi")>; +def: InstRW<[BWWriteResGroup61], (instregex "PBLENDWrmi")>; +def: InstRW<[BWWriteResGroup61], (instregex "PINSRBrm")>; +def: InstRW<[BWWriteResGroup61], (instregex "PINSRDrm")>; +def: InstRW<[BWWriteResGroup61], (instregex "PINSRQrm")>; +def: InstRW<[BWWriteResGroup61], (instregex "PINSRWrmi")>; +def: InstRW<[BWWriteResGroup61], (instregex "PMOVSXBDrm")>; +def: InstRW<[BWWriteResGroup61], (instregex "PMOVSXBQrm")>; +def: InstRW<[BWWriteResGroup61], (instregex "PMOVSXBWrm")>; +def: InstRW<[BWWriteResGroup61], (instregex "PMOVSXDQrm")>; +def: InstRW<[BWWriteResGroup61], (instregex "PMOVSXWDrm")>; +def: InstRW<[BWWriteResGroup61], (instregex "PMOVSXWQrm")>; +def: InstRW<[BWWriteResGroup61], (instregex "PMOVZXBDrm")>; +def: InstRW<[BWWriteResGroup61], (instregex "PMOVZXBQrm")>; +def: InstRW<[BWWriteResGroup61], (instregex "PMOVZXBWrm")>; +def: InstRW<[BWWriteResGroup61], (instregex "PMOVZXDQrm")>; +def: InstRW<[BWWriteResGroup61], (instregex "PMOVZXWDrm")>; +def: InstRW<[BWWriteResGroup61], (instregex "PMOVZXWQrm")>; +def: InstRW<[BWWriteResGroup61], (instregex "PSHUFBrm")>; +def: InstRW<[BWWriteResGroup61], (instregex "PSHUFDmi")>; +def: InstRW<[BWWriteResGroup61], (instregex "PSHUFHWmi")>; +def: InstRW<[BWWriteResGroup61], (instregex "PSHUFLWmi")>; +def: InstRW<[BWWriteResGroup61], (instregex "PUNPCKHBWrm")>; +def: InstRW<[BWWriteResGroup61], (instregex "PUNPCKHDQrm")>; +def: InstRW<[BWWriteResGroup61], (instregex "PUNPCKHQDQrm")>; +def: InstRW<[BWWriteResGroup61], (instregex "PUNPCKHWDrm")>; +def: InstRW<[BWWriteResGroup61], (instregex "PUNPCKLBWrm")>; +def: InstRW<[BWWriteResGroup61], (instregex "PUNPCKLDQrm")>; +def: InstRW<[BWWriteResGroup61], (instregex "PUNPCKLQDQrm")>; +def: InstRW<[BWWriteResGroup61], (instregex "PUNPCKLWDrm")>; +def: InstRW<[BWWriteResGroup61], (instregex "SHUFPDrmi")>; +def: InstRW<[BWWriteResGroup61], (instregex "SHUFPSrmi")>; +def: InstRW<[BWWriteResGroup61], (instregex "UNPCKHPDrm")>; +def: InstRW<[BWWriteResGroup61], (instregex "UNPCKHPSrm")>; +def: InstRW<[BWWriteResGroup61], (instregex "UNPCKLPDrm")>; +def: InstRW<[BWWriteResGroup61], (instregex "UNPCKLPSrm")>; +def: InstRW<[BWWriteResGroup61], (instregex "VANDNPDrm")>; +def: InstRW<[BWWriteResGroup61], (instregex "VANDNPSrm")>; +def: InstRW<[BWWriteResGroup61], (instregex "VANDPDrm")>; +def: InstRW<[BWWriteResGroup61], (instregex "VANDPSrm")>; +def: InstRW<[BWWriteResGroup61], (instregex "VINSERTPSrm")>; +def: InstRW<[BWWriteResGroup61], (instregex "VMOVHPDrm")>; +def: InstRW<[BWWriteResGroup61], (instregex "VMOVHPSrm")>; +def: InstRW<[BWWriteResGroup61], (instregex "VMOVLPDrm")>; +def: InstRW<[BWWriteResGroup61], (instregex "VMOVLPSrm")>; +def: InstRW<[BWWriteResGroup61], (instregex "VORPDrm")>; +def: InstRW<[BWWriteResGroup61], (instregex "VORPSrm")>; +def: InstRW<[BWWriteResGroup61], (instregex "VPACKSSDWrm")>; +def: InstRW<[BWWriteResGroup61], (instregex "VPACKSSWBrm")>; +def: InstRW<[BWWriteResGroup61], (instregex "VPACKUSDWrm")>; +def: InstRW<[BWWriteResGroup61], (instregex "VPACKUSWBrm")>; +def: InstRW<[BWWriteResGroup61], (instregex "VPALIGNRrmi")>; +def: InstRW<[BWWriteResGroup61], (instregex "VPBLENDWrmi")>; +def: InstRW<[BWWriteResGroup61], (instregex "VPERMILPDmi")>; +def: InstRW<[BWWriteResGroup61], (instregex "VPERMILPDrm")>; +def: InstRW<[BWWriteResGroup61], (instregex "VPERMILPSmi")>; +def: InstRW<[BWWriteResGroup61], (instregex "VPERMILPSrm")>; +def: InstRW<[BWWriteResGroup61], (instregex "VPINSRBrm")>; +def: InstRW<[BWWriteResGroup61], (instregex "VPINSRDrm")>; +def: InstRW<[BWWriteResGroup61], (instregex "VPINSRQrm")>; +def: InstRW<[BWWriteResGroup61], (instregex "VPINSRWrmi")>; +def: InstRW<[BWWriteResGroup61], (instregex "VPMOVSXBDrm")>; +def: InstRW<[BWWriteResGroup61], (instregex "VPMOVSXBQrm")>; +def: InstRW<[BWWriteResGroup61], (instregex "VPMOVSXBWrm")>; +def: InstRW<[BWWriteResGroup61], (instregex "VPMOVSXDQrm")>; +def: InstRW<[BWWriteResGroup61], (instregex "VPMOVSXWDrm")>; +def: InstRW<[BWWriteResGroup61], (instregex "VPMOVSXWQrm")>; +def: InstRW<[BWWriteResGroup61], (instregex "VPMOVZXBDrm")>; +def: InstRW<[BWWriteResGroup61], (instregex "VPMOVZXBQrm")>; +def: InstRW<[BWWriteResGroup61], (instregex "VPMOVZXBWrm")>; +def: InstRW<[BWWriteResGroup61], (instregex "VPMOVZXDQrm")>; +def: InstRW<[BWWriteResGroup61], (instregex "VPMOVZXWDrm")>; +def: InstRW<[BWWriteResGroup61], (instregex "VPMOVZXWQrm")>; +def: InstRW<[BWWriteResGroup61], (instregex "VPSHUFBrm")>; +def: InstRW<[BWWriteResGroup61], (instregex "VPSHUFDmi")>; +def: InstRW<[BWWriteResGroup61], (instregex "VPSHUFHWmi")>; +def: InstRW<[BWWriteResGroup61], (instregex "VPSHUFLWmi")>; +def: InstRW<[BWWriteResGroup61], (instregex "VPUNPCKHBWrm")>; +def: InstRW<[BWWriteResGroup61], (instregex "VPUNPCKHDQrm")>; +def: InstRW<[BWWriteResGroup61], (instregex "VPUNPCKHQDQrm")>; +def: InstRW<[BWWriteResGroup61], (instregex "VPUNPCKHWDrm")>; +def: InstRW<[BWWriteResGroup61], (instregex "VPUNPCKLBWrm")>; +def: InstRW<[BWWriteResGroup61], (instregex "VPUNPCKLDQrm")>; +def: InstRW<[BWWriteResGroup61], (instregex "VPUNPCKLQDQrm")>; +def: InstRW<[BWWriteResGroup61], (instregex "VPUNPCKLWDrm")>; +def: InstRW<[BWWriteResGroup61], (instregex "VSHUFPDrmi")>; +def: InstRW<[BWWriteResGroup61], (instregex "VSHUFPSrmi")>; +def: InstRW<[BWWriteResGroup61], (instregex "VUNPCKHPDrm")>; +def: InstRW<[BWWriteResGroup61], (instregex "VUNPCKHPSrm")>; +def: InstRW<[BWWriteResGroup61], (instregex "VUNPCKLPDrm")>; +def: InstRW<[BWWriteResGroup61], (instregex "VUNPCKLPSrm")>; +def: InstRW<[BWWriteResGroup61], (instregex "VXORPDrm")>; +def: InstRW<[BWWriteResGroup61], (instregex "VXORPSrm")>; +def: InstRW<[BWWriteResGroup61], (instregex "XORPDrm")>; +def: InstRW<[BWWriteResGroup61], (instregex "XORPSrm")>; + +def BWWriteResGroup62 : SchedWriteRes<[BWPort6,BWPort23]> { + let Latency = 6; + let NumMicroOps = 2; + let ResourceCycles = [1,1]; +} +def: InstRW<[BWWriteResGroup62], (instregex "FARJMP64")>; +def: InstRW<[BWWriteResGroup62], (instregex "JMP(16|32|64)m")>; + +def BWWriteResGroup63 : SchedWriteRes<[BWPort23,BWPort06]> { + let Latency = 6; + let NumMicroOps = 2; + let ResourceCycles = [1,1]; +} +def: InstRW<[BWWriteResGroup63], (instregex "ADC(16|32|64)rm")>; +def: InstRW<[BWWriteResGroup63], (instregex "ADC8rm")>; +def: InstRW<[BWWriteResGroup63], (instregex "ADCX32rm")>; +def: InstRW<[BWWriteResGroup63], (instregex "ADCX64rm")>; +def: InstRW<[BWWriteResGroup63], (instregex "ADOX32rm")>; +def: InstRW<[BWWriteResGroup63], (instregex "ADOX64rm")>; +def: InstRW<[BWWriteResGroup63], (instregex "BT(16|32|64)mi8")>; +def: InstRW<[BWWriteResGroup63], (instregex "CMOVAE(16|32|64)rm")>; +def: InstRW<[BWWriteResGroup63], (instregex "CMOVB(16|32|64)rm")>; +def: InstRW<[BWWriteResGroup63], (instregex "CMOVE(16|32|64)rm")>; +def: InstRW<[BWWriteResGroup63], (instregex "CMOVG(16|32|64)rm")>; +def: InstRW<[BWWriteResGroup63], (instregex "CMOVGE(16|32|64)rm")>; +def: InstRW<[BWWriteResGroup63], (instregex "CMOVL(16|32|64)rm")>; +def: InstRW<[BWWriteResGroup63], (instregex "CMOVLE(16|32|64)rm")>; +def: InstRW<[BWWriteResGroup63], (instregex "CMOVNE(16|32|64)rm")>; +def: InstRW<[BWWriteResGroup63], (instregex "CMOVNO(16|32|64)rm")>; +def: InstRW<[BWWriteResGroup63], (instregex "CMOVNP(16|32|64)rm")>; +def: InstRW<[BWWriteResGroup63], (instregex "CMOVNS(16|32|64)rm")>; +def: InstRW<[BWWriteResGroup63], (instregex "CMOVO(16|32|64)rm")>; +def: InstRW<[BWWriteResGroup63], (instregex "CMOVP(16|32|64)rm")>; +def: InstRW<[BWWriteResGroup63], (instregex "CMOVS(16|32|64)rm")>; +def: InstRW<[BWWriteResGroup63], (instregex "RORX32mi")>; +def: InstRW<[BWWriteResGroup63], (instregex "RORX64mi")>; +def: InstRW<[BWWriteResGroup63], (instregex "SARX32rm")>; +def: InstRW<[BWWriteResGroup63], (instregex "SARX64rm")>; +def: InstRW<[BWWriteResGroup63], (instregex "SBB(16|32|64)rm")>; +def: InstRW<[BWWriteResGroup63], (instregex "SBB8rm")>; +def: InstRW<[BWWriteResGroup63], (instregex "SHLX32rm")>; +def: InstRW<[BWWriteResGroup63], (instregex "SHLX64rm")>; +def: InstRW<[BWWriteResGroup63], (instregex "SHRX32rm")>; +def: InstRW<[BWWriteResGroup63], (instregex "SHRX64rm")>; + +def BWWriteResGroup64 : SchedWriteRes<[BWPort23,BWPort15]> { + let Latency = 6; + let NumMicroOps = 2; + let ResourceCycles = [1,1]; +} +def: InstRW<[BWWriteResGroup64], (instregex "ANDN32rm")>; +def: InstRW<[BWWriteResGroup64], (instregex "ANDN64rm")>; +def: InstRW<[BWWriteResGroup64], (instregex "BLSI32rm")>; +def: InstRW<[BWWriteResGroup64], (instregex "BLSI64rm")>; +def: InstRW<[BWWriteResGroup64], (instregex "BLSMSK32rm")>; +def: InstRW<[BWWriteResGroup64], (instregex "BLSMSK64rm")>; +def: InstRW<[BWWriteResGroup64], (instregex "BLSR32rm")>; +def: InstRW<[BWWriteResGroup64], (instregex "BLSR64rm")>; +def: InstRW<[BWWriteResGroup64], (instregex "BZHI32rm")>; +def: InstRW<[BWWriteResGroup64], (instregex "BZHI64rm")>; +def: InstRW<[BWWriteResGroup64], (instregex "MMX_PABSBrm64")>; +def: InstRW<[BWWriteResGroup64], (instregex "MMX_PABSDrm64")>; +def: InstRW<[BWWriteResGroup64], (instregex "MMX_PABSWrm64")>; +def: InstRW<[BWWriteResGroup64], (instregex "MMX_PADDBirm")>; +def: InstRW<[BWWriteResGroup64], (instregex "MMX_PADDDirm")>; +def: InstRW<[BWWriteResGroup64], (instregex "MMX_PADDQirm")>; +def: InstRW<[BWWriteResGroup64], (instregex "MMX_PADDSBirm")>; +def: InstRW<[BWWriteResGroup64], (instregex "MMX_PADDSWirm")>; +def: InstRW<[BWWriteResGroup64], (instregex "MMX_PADDUSBirm")>; +def: InstRW<[BWWriteResGroup64], (instregex "MMX_PADDUSWirm")>; +def: InstRW<[BWWriteResGroup64], (instregex "MMX_PADDWirm")>; +def: InstRW<[BWWriteResGroup64], (instregex "MMX_PAVGBirm")>; +def: InstRW<[BWWriteResGroup64], (instregex "MMX_PAVGWirm")>; +def: InstRW<[BWWriteResGroup64], (instregex "MMX_PCMPEQBirm")>; +def: InstRW<[BWWriteResGroup64], (instregex "MMX_PCMPEQDirm")>; +def: InstRW<[BWWriteResGroup64], (instregex "MMX_PCMPEQWirm")>; +def: InstRW<[BWWriteResGroup64], (instregex "MMX_PCMPGTBirm")>; +def: InstRW<[BWWriteResGroup64], (instregex "MMX_PCMPGTDirm")>; +def: InstRW<[BWWriteResGroup64], (instregex "MMX_PCMPGTWirm")>; +def: InstRW<[BWWriteResGroup64], (instregex "MMX_PMAXSWirm")>; +def: InstRW<[BWWriteResGroup64], (instregex "MMX_PMAXUBirm")>; +def: InstRW<[BWWriteResGroup64], (instregex "MMX_PMINSWirm")>; +def: InstRW<[BWWriteResGroup64], (instregex "MMX_PMINUBirm")>; +def: InstRW<[BWWriteResGroup64], (instregex "MMX_PSIGNBrm64")>; +def: InstRW<[BWWriteResGroup64], (instregex "MMX_PSIGNDrm64")>; +def: InstRW<[BWWriteResGroup64], (instregex "MMX_PSIGNWrm64")>; +def: InstRW<[BWWriteResGroup64], (instregex "MMX_PSUBBirm")>; +def: InstRW<[BWWriteResGroup64], (instregex "MMX_PSUBDirm")>; +def: InstRW<[BWWriteResGroup64], (instregex "MMX_PSUBQirm")>; +def: InstRW<[BWWriteResGroup64], (instregex "MMX_PSUBSBirm")>; +def: InstRW<[BWWriteResGroup64], (instregex "MMX_PSUBSWirm")>; +def: InstRW<[BWWriteResGroup64], (instregex "MMX_PSUBUSBirm")>; +def: InstRW<[BWWriteResGroup64], (instregex "MMX_PSUBUSWirm")>; +def: InstRW<[BWWriteResGroup64], (instregex "MMX_PSUBWirm")>; +def: InstRW<[BWWriteResGroup64], (instregex "MOVBE(16|32|64)rm")>; +def: InstRW<[BWWriteResGroup64], (instregex "PABSBrm")>; +def: InstRW<[BWWriteResGroup64], (instregex "PABSDrm")>; +def: InstRW<[BWWriteResGroup64], (instregex "PABSWrm")>; +def: InstRW<[BWWriteResGroup64], (instregex "PADDBrm")>; +def: InstRW<[BWWriteResGroup64], (instregex "PADDDrm")>; +def: InstRW<[BWWriteResGroup64], (instregex "PADDQrm")>; +def: InstRW<[BWWriteResGroup64], (instregex "PADDSBrm")>; +def: InstRW<[BWWriteResGroup64], (instregex "PADDSWrm")>; +def: InstRW<[BWWriteResGroup64], (instregex "PADDUSBrm")>; +def: InstRW<[BWWriteResGroup64], (instregex "PADDUSWrm")>; +def: InstRW<[BWWriteResGroup64], (instregex "PADDWrm")>; +def: InstRW<[BWWriteResGroup64], (instregex "PAVGBrm")>; +def: InstRW<[BWWriteResGroup64], (instregex "PAVGWrm")>; +def: InstRW<[BWWriteResGroup64], (instregex "PCMPEQBrm")>; +def: InstRW<[BWWriteResGroup64], (instregex "PCMPEQDrm")>; +def: InstRW<[BWWriteResGroup64], (instregex "PCMPEQQrm")>; +def: InstRW<[BWWriteResGroup64], (instregex "PCMPEQWrm")>; +def: InstRW<[BWWriteResGroup64], (instregex "PCMPGTBrm")>; +def: InstRW<[BWWriteResGroup64], (instregex "PCMPGTDrm")>; +def: InstRW<[BWWriteResGroup64], (instregex "PCMPGTWrm")>; +def: InstRW<[BWWriteResGroup64], (instregex "PMAXSBrm")>; +def: InstRW<[BWWriteResGroup64], (instregex "PMAXSDrm")>; +def: InstRW<[BWWriteResGroup64], (instregex "PMAXSWrm")>; +def: InstRW<[BWWriteResGroup64], (instregex "PMAXUBrm")>; +def: InstRW<[BWWriteResGroup64], (instregex "PMAXUDrm")>; +def: InstRW<[BWWriteResGroup64], (instregex "PMAXUWrm")>; +def: InstRW<[BWWriteResGroup64], (instregex "PMINSBrm")>; +def: InstRW<[BWWriteResGroup64], (instregex "PMINSDrm")>; +def: InstRW<[BWWriteResGroup64], (instregex "PMINSWrm")>; +def: InstRW<[BWWriteResGroup64], (instregex "PMINUBrm")>; +def: InstRW<[BWWriteResGroup64], (instregex "PMINUDrm")>; +def: InstRW<[BWWriteResGroup64], (instregex "PMINUWrm")>; +def: InstRW<[BWWriteResGroup64], (instregex "PSIGNBrm128")>; +def: InstRW<[BWWriteResGroup64], (instregex "PSIGNDrm128")>; +def: InstRW<[BWWriteResGroup64], (instregex "PSIGNWrm128")>; +def: InstRW<[BWWriteResGroup64], (instregex "PSUBBrm")>; +def: InstRW<[BWWriteResGroup64], (instregex "PSUBDrm")>; +def: InstRW<[BWWriteResGroup64], (instregex "PSUBQrm")>; +def: InstRW<[BWWriteResGroup64], (instregex "PSUBSBrm")>; +def: InstRW<[BWWriteResGroup64], (instregex "PSUBSWrm")>; +def: InstRW<[BWWriteResGroup64], (instregex "PSUBUSBrm")>; +def: InstRW<[BWWriteResGroup64], (instregex "PSUBUSWrm")>; +def: InstRW<[BWWriteResGroup64], (instregex "PSUBWrm")>; +def: InstRW<[BWWriteResGroup64], (instregex "VPABSBrm")>; +def: InstRW<[BWWriteResGroup64], (instregex "VPABSDrm")>; +def: InstRW<[BWWriteResGroup64], (instregex "VPABSWrm")>; +def: InstRW<[BWWriteResGroup64], (instregex "VPADDBrm")>; +def: InstRW<[BWWriteResGroup64], (instregex "VPADDDrm")>; +def: InstRW<[BWWriteResGroup64], (instregex "VPADDQrm")>; +def: InstRW<[BWWriteResGroup64], (instregex "VPADDSBrm")>; +def: InstRW<[BWWriteResGroup64], (instregex "VPADDSWrm")>; +def: InstRW<[BWWriteResGroup64], (instregex "VPADDUSBrm")>; +def: InstRW<[BWWriteResGroup64], (instregex "VPADDUSWrm")>; +def: InstRW<[BWWriteResGroup64], (instregex "VPADDWrm")>; +def: InstRW<[BWWriteResGroup64], (instregex "VPAVGBrm")>; +def: InstRW<[BWWriteResGroup64], (instregex "VPAVGWrm")>; +def: InstRW<[BWWriteResGroup64], (instregex "VPCMPEQBrm")>; +def: InstRW<[BWWriteResGroup64], (instregex "VPCMPEQDrm")>; +def: InstRW<[BWWriteResGroup64], (instregex "VPCMPEQQrm")>; +def: InstRW<[BWWriteResGroup64], (instregex "VPCMPEQWrm")>; +def: InstRW<[BWWriteResGroup64], (instregex "VPCMPGTBrm")>; +def: InstRW<[BWWriteResGroup64], (instregex "VPCMPGTDrm")>; +def: InstRW<[BWWriteResGroup64], (instregex "VPCMPGTWrm")>; +def: InstRW<[BWWriteResGroup64], (instregex "VPMAXSBrm")>; +def: InstRW<[BWWriteResGroup64], (instregex "VPMAXSDrm")>; +def: InstRW<[BWWriteResGroup64], (instregex "VPMAXSWrm")>; +def: InstRW<[BWWriteResGroup64], (instregex "VPMAXUBrm")>; +def: InstRW<[BWWriteResGroup64], (instregex "VPMAXUDrm")>; +def: InstRW<[BWWriteResGroup64], (instregex "VPMAXUWrm")>; +def: InstRW<[BWWriteResGroup64], (instregex "VPMINSBrm")>; +def: InstRW<[BWWriteResGroup64], (instregex "VPMINSDrm")>; +def: InstRW<[BWWriteResGroup64], (instregex "VPMINSWrm")>; +def: InstRW<[BWWriteResGroup64], (instregex "VPMINUBrm")>; +def: InstRW<[BWWriteResGroup64], (instregex "VPMINUDrm")>; +def: InstRW<[BWWriteResGroup64], (instregex "VPMINUWrm")>; +def: InstRW<[BWWriteResGroup64], (instregex "VPSIGNBrm128")>; +def: InstRW<[BWWriteResGroup64], (instregex "VPSIGNDrm128")>; +def: InstRW<[BWWriteResGroup64], (instregex "VPSIGNWrm128")>; +def: InstRW<[BWWriteResGroup64], (instregex "VPSUBBrm")>; +def: InstRW<[BWWriteResGroup64], (instregex "VPSUBDrm")>; +def: InstRW<[BWWriteResGroup64], (instregex "VPSUBQrm")>; +def: InstRW<[BWWriteResGroup64], (instregex "VPSUBSBrm")>; +def: InstRW<[BWWriteResGroup64], (instregex "VPSUBSWrm")>; +def: InstRW<[BWWriteResGroup64], (instregex "VPSUBUSBrm")>; +def: InstRW<[BWWriteResGroup64], (instregex "VPSUBUSWrm")>; +def: InstRW<[BWWriteResGroup64], (instregex "VPSUBWrm")>; + +def BWWriteResGroup65 : SchedWriteRes<[BWPort23,BWPort015]> { + let Latency = 6; + let NumMicroOps = 2; + let ResourceCycles = [1,1]; +} +def: InstRW<[BWWriteResGroup65], (instregex "BLENDPDrmi")>; +def: InstRW<[BWWriteResGroup65], (instregex "BLENDPSrmi")>; +def: InstRW<[BWWriteResGroup65], (instregex "MMX_PANDNirm")>; +def: InstRW<[BWWriteResGroup65], (instregex "MMX_PANDirm")>; +def: InstRW<[BWWriteResGroup65], (instregex "MMX_PORirm")>; +def: InstRW<[BWWriteResGroup65], (instregex "MMX_PXORirm")>; +def: InstRW<[BWWriteResGroup65], (instregex "PANDNrm")>; +def: InstRW<[BWWriteResGroup65], (instregex "PANDrm")>; +def: InstRW<[BWWriteResGroup65], (instregex "PORrm")>; +def: InstRW<[BWWriteResGroup65], (instregex "PXORrm")>; +def: InstRW<[BWWriteResGroup65], (instregex "VBLENDPDrmi")>; +def: InstRW<[BWWriteResGroup65], (instregex "VBLENDPSrmi")>; +def: InstRW<[BWWriteResGroup65], (instregex "VINSERTF128rm")>; +def: InstRW<[BWWriteResGroup65], (instregex "VINSERTI128rm")>; +def: InstRW<[BWWriteResGroup65], (instregex "VPANDNrm")>; +def: InstRW<[BWWriteResGroup65], (instregex "VPANDrm")>; +def: InstRW<[BWWriteResGroup65], (instregex "VPBLENDDrmi")>; +def: InstRW<[BWWriteResGroup65], (instregex "VPORrm")>; +def: InstRW<[BWWriteResGroup65], (instregex "VPXORrm")>; + +def BWWriteResGroup66 : SchedWriteRes<[BWPort23,BWPort0156]> { + let Latency = 6; + let NumMicroOps = 2; + let ResourceCycles = [1,1]; +} +def: InstRW<[BWWriteResGroup66], (instregex "ADD(16|32|64)rm")>; +def: InstRW<[BWWriteResGroup66], (instregex "ADD8rm")>; +def: InstRW<[BWWriteResGroup66], (instregex "AND(16|32|64)rm")>; +def: InstRW<[BWWriteResGroup66], (instregex "AND8rm")>; +def: InstRW<[BWWriteResGroup66], (instregex "CMP(16|32|64)mi8")>; +def: InstRW<[BWWriteResGroup66], (instregex "CMP(16|32|64)mr")>; +def: InstRW<[BWWriteResGroup66], (instregex "CMP(16|32|64)rm")>; +def: InstRW<[BWWriteResGroup66], (instregex "CMP8mi")>; +def: InstRW<[BWWriteResGroup66], (instregex "CMP8mr")>; +def: InstRW<[BWWriteResGroup66], (instregex "CMP8rm")>; +def: InstRW<[BWWriteResGroup66], (instregex "OR(16|32|64)rm")>; +def: InstRW<[BWWriteResGroup66], (instregex "OR8rm")>; +def: InstRW<[BWWriteResGroup66], (instregex "POP(16|32|64)r")>; +def: InstRW<[BWWriteResGroup66], (instregex "POP(16|32|64)rmr")>; +def: InstRW<[BWWriteResGroup66], (instregex "SUB(16|32|64)rm")>; +def: InstRW<[BWWriteResGroup66], (instregex "SUB8rm")>; +def: InstRW<[BWWriteResGroup66], (instregex "TEST(16|32|64)mr")>; +def: InstRW<[BWWriteResGroup66], (instregex "TEST8mi")>; +def: InstRW<[BWWriteResGroup66], (instregex "TEST8mr")>; +def: InstRW<[BWWriteResGroup66], (instregex "XOR(16|32|64)rm")>; +def: InstRW<[BWWriteResGroup66], (instregex "XOR8rm")>; + +def BWWriteResGroup67 : SchedWriteRes<[BWPort1,BWPort06,BWPort0156]> { + let Latency = 6; + let NumMicroOps = 4; + let ResourceCycles = [1,1,2]; +} +def: InstRW<[BWWriteResGroup67], (instregex "SHLD(16|32|64)rrCL")>; +def: InstRW<[BWWriteResGroup67], (instregex "SHRD(16|32|64)rrCL")>; + +def BWWriteResGroup68 : SchedWriteRes<[BWPort1,BWPort6,BWPort06,BWPort0156]> { + let Latency = 6; + let NumMicroOps = 4; + let ResourceCycles = [1,1,1,1]; +} +def: InstRW<[BWWriteResGroup68], (instregex "SLDT(16|32|64)r")>; + +def BWWriteResGroup69 : SchedWriteRes<[BWPort4,BWPort23,BWPort237,BWPort06]> { + let Latency = 6; + let NumMicroOps = 4; + let ResourceCycles = [1,1,1,1]; +} +def: InstRW<[BWWriteResGroup69], (instregex "BTC(16|32|64)mi8")>; +def: InstRW<[BWWriteResGroup69], (instregex "BTR(16|32|64)mi8")>; +def: InstRW<[BWWriteResGroup69], (instregex "BTS(16|32|64)mi8")>; +def: InstRW<[BWWriteResGroup69], (instregex "SAR(16|32|64)m1")>; +def: InstRW<[BWWriteResGroup69], (instregex "SAR(16|32|64)mi")>; +def: InstRW<[BWWriteResGroup69], (instregex "SAR8m1")>; +def: InstRW<[BWWriteResGroup69], (instregex "SAR8mi")>; +def: InstRW<[BWWriteResGroup69], (instregex "SHL(16|32|64)m1")>; +def: InstRW<[BWWriteResGroup69], (instregex "SHL(16|32|64)mi")>; +def: InstRW<[BWWriteResGroup69], (instregex "SHL8m1")>; +def: InstRW<[BWWriteResGroup69], (instregex "SHL8mi")>; +def: InstRW<[BWWriteResGroup69], (instregex "SHR(16|32|64)m1")>; +def: InstRW<[BWWriteResGroup69], (instregex "SHR(16|32|64)mi")>; +def: InstRW<[BWWriteResGroup69], (instregex "SHR8m1")>; +def: InstRW<[BWWriteResGroup69], (instregex "SHR8mi")>; + +def BWWriteResGroup70 : SchedWriteRes<[BWPort4,BWPort23,BWPort237,BWPort0156]> { + let Latency = 6; + let NumMicroOps = 4; + let ResourceCycles = [1,1,1,1]; +} +def: InstRW<[BWWriteResGroup70], (instregex "ADD(16|32|64)mi8")>; +def: InstRW<[BWWriteResGroup70], (instregex "ADD(16|32|64)mr")>; +def: InstRW<[BWWriteResGroup70], (instregex "ADD8mi")>; +def: InstRW<[BWWriteResGroup70], (instregex "ADD8mr")>; +def: InstRW<[BWWriteResGroup70], (instregex "AND(16|32|64)mi8")>; +def: InstRW<[BWWriteResGroup70], (instregex "AND(16|32|64)mr")>; +def: InstRW<[BWWriteResGroup70], (instregex "AND8mi")>; +def: InstRW<[BWWriteResGroup70], (instregex "AND8mr")>; +def: InstRW<[BWWriteResGroup70], (instregex "DEC(16|32|64)m")>; +def: InstRW<[BWWriteResGroup70], (instregex "DEC8m")>; +def: InstRW<[BWWriteResGroup70], (instregex "INC(16|32|64)m")>; +def: InstRW<[BWWriteResGroup70], (instregex "INC8m")>; +def: InstRW<[BWWriteResGroup70], (instregex "NEG(16|32|64)m")>; +def: InstRW<[BWWriteResGroup70], (instregex "NEG8m")>; +def: InstRW<[BWWriteResGroup70], (instregex "NOT(16|32|64)m")>; +def: InstRW<[BWWriteResGroup70], (instregex "NOT8m")>; +def: InstRW<[BWWriteResGroup70], (instregex "OR(16|32|64)mi8")>; +def: InstRW<[BWWriteResGroup70], (instregex "OR(16|32|64)mr")>; +def: InstRW<[BWWriteResGroup70], (instregex "OR8mi")>; +def: InstRW<[BWWriteResGroup70], (instregex "OR8mr")>; +def: InstRW<[BWWriteResGroup70], (instregex "POP(16|32|64)rmm")>; +def: InstRW<[BWWriteResGroup70], (instregex "PUSH(16|32|64)rmm")>; +def: InstRW<[BWWriteResGroup70], (instregex "SUB(16|32|64)mi8")>; +def: InstRW<[BWWriteResGroup70], (instregex "SUB(16|32|64)mr")>; +def: InstRW<[BWWriteResGroup70], (instregex "SUB8mi")>; +def: InstRW<[BWWriteResGroup70], (instregex "SUB8mr")>; +def: InstRW<[BWWriteResGroup70], (instregex "XOR(16|32|64)mi8")>; +def: InstRW<[BWWriteResGroup70], (instregex "XOR(16|32|64)mr")>; +def: InstRW<[BWWriteResGroup70], (instregex "XOR8mi")>; +def: InstRW<[BWWriteResGroup70], (instregex "XOR8mr")>; + +def BWWriteResGroup71 : SchedWriteRes<[BWPort6,BWPort0156]> { + let Latency = 6; + let NumMicroOps = 6; + let ResourceCycles = [1,5]; +} +def: InstRW<[BWWriteResGroup71], (instregex "STD")>; + +def BWWriteResGroup72 : SchedWriteRes<[BWPort5]> { + let Latency = 7; + let NumMicroOps = 1; + let ResourceCycles = [1]; +} +def: InstRW<[BWWriteResGroup72], (instregex "AESDECLASTrr")>; +def: InstRW<[BWWriteResGroup72], (instregex "AESDECrr")>; +def: InstRW<[BWWriteResGroup72], (instregex "AESENCLASTrr")>; +def: InstRW<[BWWriteResGroup72], (instregex "AESENCrr")>; +def: InstRW<[BWWriteResGroup72], (instregex "VAESDECLASTrr")>; +def: InstRW<[BWWriteResGroup72], (instregex "VAESDECrr")>; +def: InstRW<[BWWriteResGroup72], (instregex "VAESENCLASTrr")>; +def: InstRW<[BWWriteResGroup72], (instregex "VAESENCrr")>; + +def BWWriteResGroup73 : SchedWriteRes<[BWPort0,BWPort23]> { + let Latency = 7; + let NumMicroOps = 2; + let ResourceCycles = [1,1]; +} +def: InstRW<[BWWriteResGroup73], (instregex "VPSLLDYrm")>; +def: InstRW<[BWWriteResGroup73], (instregex "VPSLLQYrm")>; +def: InstRW<[BWWriteResGroup73], (instregex "VPSLLVQYrm")>; +def: InstRW<[BWWriteResGroup73], (instregex "VPSLLWYrm")>; +def: InstRW<[BWWriteResGroup73], (instregex "VPSRADYrm")>; +def: InstRW<[BWWriteResGroup73], (instregex "VPSRAWYrm")>; +def: InstRW<[BWWriteResGroup73], (instregex "VPSRLDYrm")>; +def: InstRW<[BWWriteResGroup73], (instregex "VPSRLQYrm")>; +def: InstRW<[BWWriteResGroup73], (instregex "VPSRLVQYrm")>; +def: InstRW<[BWWriteResGroup73], (instregex "VPSRLWYrm")>; +def: InstRW<[BWWriteResGroup73], (instregex "VTESTPDYrm")>; +def: InstRW<[BWWriteResGroup73], (instregex "VTESTPSYrm")>; + +def BWWriteResGroup74 : SchedWriteRes<[BWPort1,BWPort23]> { + let Latency = 7; + let NumMicroOps = 2; + let ResourceCycles = [1,1]; +} +def: InstRW<[BWWriteResGroup74], (instregex "FCOM32m")>; +def: InstRW<[BWWriteResGroup74], (instregex "FCOM64m")>; +def: InstRW<[BWWriteResGroup74], (instregex "FCOMP32m")>; +def: InstRW<[BWWriteResGroup74], (instregex "FCOMP64m")>; + +def BWWriteResGroup75 : SchedWriteRes<[BWPort5,BWPort23]> { + let Latency = 7; + let NumMicroOps = 2; + let ResourceCycles = [1,1]; +} +def: InstRW<[BWWriteResGroup75], (instregex "VANDNPDYrm")>; +def: InstRW<[BWWriteResGroup75], (instregex "VANDNPSYrm")>; +def: InstRW<[BWWriteResGroup75], (instregex "VANDPDYrm")>; +def: InstRW<[BWWriteResGroup75], (instregex "VANDPSYrm")>; +def: InstRW<[BWWriteResGroup75], (instregex "VORPDYrm")>; +def: InstRW<[BWWriteResGroup75], (instregex "VORPSYrm")>; +def: InstRW<[BWWriteResGroup75], (instregex "VPACKSSDWYrm")>; +def: InstRW<[BWWriteResGroup75], (instregex "VPACKSSWBYrm")>; +def: InstRW<[BWWriteResGroup75], (instregex "VPACKUSDWYrm")>; +def: InstRW<[BWWriteResGroup75], (instregex "VPACKUSWBYrm")>; +def: InstRW<[BWWriteResGroup75], (instregex "VPALIGNRYrmi")>; +def: InstRW<[BWWriteResGroup75], (instregex "VPBLENDWYrmi")>; +def: InstRW<[BWWriteResGroup75], (instregex "VPERMILPDYmi")>; +def: InstRW<[BWWriteResGroup75], (instregex "VPERMILPDYrm")>; +def: InstRW<[BWWriteResGroup75], (instregex "VPERMILPSYmi")>; +def: InstRW<[BWWriteResGroup75], (instregex "VPERMILPSYrm")>; +def: InstRW<[BWWriteResGroup75], (instregex "VPSHUFBYrm")>; +def: InstRW<[BWWriteResGroup75], (instregex "VPSHUFDYmi")>; +def: InstRW<[BWWriteResGroup75], (instregex "VPSHUFHWYmi")>; +def: InstRW<[BWWriteResGroup75], (instregex "VPSHUFLWYmi")>; +def: InstRW<[BWWriteResGroup75], (instregex "VPUNPCKHBWYrm")>; +def: InstRW<[BWWriteResGroup75], (instregex "VPUNPCKHDQYrm")>; +def: InstRW<[BWWriteResGroup75], (instregex "VPUNPCKHQDQYrm")>; +def: InstRW<[BWWriteResGroup75], (instregex "VPUNPCKHWDYrm")>; +def: InstRW<[BWWriteResGroup75], (instregex "VPUNPCKLBWYrm")>; +def: InstRW<[BWWriteResGroup75], (instregex "VPUNPCKLDQYrm")>; +def: InstRW<[BWWriteResGroup75], (instregex "VPUNPCKLQDQYrm")>; +def: InstRW<[BWWriteResGroup75], (instregex "VPUNPCKLWDYrm")>; +def: InstRW<[BWWriteResGroup75], (instregex "VSHUFPDYrmi")>; +def: InstRW<[BWWriteResGroup75], (instregex "VSHUFPSYrmi")>; +def: InstRW<[BWWriteResGroup75], (instregex "VUNPCKHPDYrm")>; +def: InstRW<[BWWriteResGroup75], (instregex "VUNPCKHPSYrm")>; +def: InstRW<[BWWriteResGroup75], (instregex "VUNPCKLPDYrm")>; +def: InstRW<[BWWriteResGroup75], (instregex "VUNPCKLPSYrm")>; +def: InstRW<[BWWriteResGroup75], (instregex "VXORPDYrm")>; +def: InstRW<[BWWriteResGroup75], (instregex "VXORPSYrm")>; + +def BWWriteResGroup76 : SchedWriteRes<[BWPort23,BWPort15]> { + let Latency = 7; + let NumMicroOps = 2; + let ResourceCycles = [1,1]; +} +def: InstRW<[BWWriteResGroup76], (instregex "VPABSBYrm")>; +def: InstRW<[BWWriteResGroup76], (instregex "VPABSDYrm")>; +def: InstRW<[BWWriteResGroup76], (instregex "VPABSWYrm")>; +def: InstRW<[BWWriteResGroup76], (instregex "VPADDBYrm")>; +def: InstRW<[BWWriteResGroup76], (instregex "VPADDDYrm")>; +def: InstRW<[BWWriteResGroup76], (instregex "VPADDQYrm")>; +def: InstRW<[BWWriteResGroup76], (instregex "VPADDSBYrm")>; +def: InstRW<[BWWriteResGroup76], (instregex "VPADDSWYrm")>; +def: InstRW<[BWWriteResGroup76], (instregex "VPADDUSBYrm")>; +def: InstRW<[BWWriteResGroup76], (instregex "VPADDUSWYrm")>; +def: InstRW<[BWWriteResGroup76], (instregex "VPADDWYrm")>; +def: InstRW<[BWWriteResGroup76], (instregex "VPAVGBYrm")>; +def: InstRW<[BWWriteResGroup76], (instregex "VPAVGWYrm")>; +def: InstRW<[BWWriteResGroup76], (instregex "VPCMPEQBYrm")>; +def: InstRW<[BWWriteResGroup76], (instregex "VPCMPEQDYrm")>; +def: InstRW<[BWWriteResGroup76], (instregex "VPCMPEQQYrm")>; +def: InstRW<[BWWriteResGroup76], (instregex "VPCMPEQWYrm")>; +def: InstRW<[BWWriteResGroup76], (instregex "VPCMPGTBYrm")>; +def: InstRW<[BWWriteResGroup76], (instregex "VPCMPGTDYrm")>; +def: InstRW<[BWWriteResGroup76], (instregex "VPCMPGTWYrm")>; +def: InstRW<[BWWriteResGroup76], (instregex "VPMAXSBYrm")>; +def: InstRW<[BWWriteResGroup76], (instregex "VPMAXSDYrm")>; +def: InstRW<[BWWriteResGroup76], (instregex "VPMAXSWYrm")>; +def: InstRW<[BWWriteResGroup76], (instregex "VPMAXUBYrm")>; +def: InstRW<[BWWriteResGroup76], (instregex "VPMAXUDYrm")>; +def: InstRW<[BWWriteResGroup76], (instregex "VPMAXUWYrm")>; +def: InstRW<[BWWriteResGroup76], (instregex "VPMINSBYrm")>; +def: InstRW<[BWWriteResGroup76], (instregex "VPMINSDYrm")>; +def: InstRW<[BWWriteResGroup76], (instregex "VPMINSWYrm")>; +def: InstRW<[BWWriteResGroup76], (instregex "VPMINUBYrm")>; +def: InstRW<[BWWriteResGroup76], (instregex "VPMINUDYrm")>; +def: InstRW<[BWWriteResGroup76], (instregex "VPMINUWYrm")>; +def: InstRW<[BWWriteResGroup76], (instregex "VPSIGNBYrm256")>; +def: InstRW<[BWWriteResGroup76], (instregex "VPSIGNDYrm256")>; +def: InstRW<[BWWriteResGroup76], (instregex "VPSIGNWYrm256")>; +def: InstRW<[BWWriteResGroup76], (instregex "VPSUBBYrm")>; +def: InstRW<[BWWriteResGroup76], (instregex "VPSUBDYrm")>; +def: InstRW<[BWWriteResGroup76], (instregex "VPSUBQYrm")>; +def: InstRW<[BWWriteResGroup76], (instregex "VPSUBSBYrm")>; +def: InstRW<[BWWriteResGroup76], (instregex "VPSUBSWYrm")>; +def: InstRW<[BWWriteResGroup76], (instregex "VPSUBUSBYrm")>; +def: InstRW<[BWWriteResGroup76], (instregex "VPSUBUSWYrm")>; +def: InstRW<[BWWriteResGroup76], (instregex "VPSUBWYrm")>; + +def BWWriteResGroup77 : SchedWriteRes<[BWPort23,BWPort015]> { + let Latency = 7; + let NumMicroOps = 2; + let ResourceCycles = [1,1]; +} +def: InstRW<[BWWriteResGroup77], (instregex "VBLENDPDYrmi")>; +def: InstRW<[BWWriteResGroup77], (instregex "VBLENDPSYrmi")>; +def: InstRW<[BWWriteResGroup77], (instregex "VPANDNYrm")>; +def: InstRW<[BWWriteResGroup77], (instregex "VPANDYrm")>; +def: InstRW<[BWWriteResGroup77], (instregex "VPBLENDDYrmi")>; +def: InstRW<[BWWriteResGroup77], (instregex "VPORYrm")>; +def: InstRW<[BWWriteResGroup77], (instregex "VPXORYrm")>; + +def BWWriteResGroup78 : SchedWriteRes<[BWPort0,BWPort5]> { + let Latency = 7; + let NumMicroOps = 3; + let ResourceCycles = [1,2]; +} +def: InstRW<[BWWriteResGroup78], (instregex "MPSADBWrri")>; +def: InstRW<[BWWriteResGroup78], (instregex "VMPSADBWYrri")>; +def: InstRW<[BWWriteResGroup78], (instregex "VMPSADBWrri")>; + +def BWWriteResGroup79 : SchedWriteRes<[BWPort5,BWPort23]> { + let Latency = 7; + let NumMicroOps = 3; + let ResourceCycles = [2,1]; +} +def: InstRW<[BWWriteResGroup79], (instregex "BLENDVPDrm0")>; +def: InstRW<[BWWriteResGroup79], (instregex "BLENDVPSrm0")>; +def: InstRW<[BWWriteResGroup79], (instregex "MMX_PACKSSDWirm")>; +def: InstRW<[BWWriteResGroup79], (instregex "MMX_PACKSSWBirm")>; +def: InstRW<[BWWriteResGroup79], (instregex "MMX_PACKUSWBirm")>; +def: InstRW<[BWWriteResGroup79], (instregex "PBLENDVBrm0")>; +def: InstRW<[BWWriteResGroup79], (instregex "VBLENDVPDrm")>; +def: InstRW<[BWWriteResGroup79], (instregex "VBLENDVPSrm")>; +def: InstRW<[BWWriteResGroup79], (instregex "VMASKMOVPDrm")>; +def: InstRW<[BWWriteResGroup79], (instregex "VMASKMOVPSrm")>; +def: InstRW<[BWWriteResGroup79], (instregex "VPBLENDVBrm")>; +def: InstRW<[BWWriteResGroup79], (instregex "VPMASKMOVDrm")>; +def: InstRW<[BWWriteResGroup79], (instregex "VPMASKMOVQrm")>; + +def BWWriteResGroup80 : SchedWriteRes<[BWPort23,BWPort0156]> { + let Latency = 7; + let NumMicroOps = 3; + let ResourceCycles = [1,2]; +} +def: InstRW<[BWWriteResGroup80], (instregex "LEAVE64")>; +def: InstRW<[BWWriteResGroup80], (instregex "SCASB")>; +def: InstRW<[BWWriteResGroup80], (instregex "SCASL")>; +def: InstRW<[BWWriteResGroup80], (instregex "SCASQ")>; +def: InstRW<[BWWriteResGroup80], (instregex "SCASW")>; + +def BWWriteResGroup81 : SchedWriteRes<[BWPort0,BWPort5,BWPort23]> { + let Latency = 7; + let NumMicroOps = 3; + let ResourceCycles = [1,1,1]; +} +def: InstRW<[BWWriteResGroup81], (instregex "PSLLDrm")>; +def: InstRW<[BWWriteResGroup81], (instregex "PSLLQrm")>; +def: InstRW<[BWWriteResGroup81], (instregex "PSLLWrm")>; +def: InstRW<[BWWriteResGroup81], (instregex "PSRADrm")>; +def: InstRW<[BWWriteResGroup81], (instregex "PSRAWrm")>; +def: InstRW<[BWWriteResGroup81], (instregex "PSRLDrm")>; +def: InstRW<[BWWriteResGroup81], (instregex "PSRLQrm")>; +def: InstRW<[BWWriteResGroup81], (instregex "PSRLWrm")>; +def: InstRW<[BWWriteResGroup81], (instregex "PTESTrm")>; +def: InstRW<[BWWriteResGroup81], (instregex "VPSLLDrm")>; +def: InstRW<[BWWriteResGroup81], (instregex "VPSLLQrm")>; +def: InstRW<[BWWriteResGroup81], (instregex "VPSLLWrm")>; +def: InstRW<[BWWriteResGroup81], (instregex "VPSRADrm")>; +def: InstRW<[BWWriteResGroup81], (instregex "VPSRAWrm")>; +def: InstRW<[BWWriteResGroup81], (instregex "VPSRLDrm")>; +def: InstRW<[BWWriteResGroup81], (instregex "VPSRLQrm")>; +def: InstRW<[BWWriteResGroup81], (instregex "VPSRLWrm")>; +def: InstRW<[BWWriteResGroup81], (instregex "VPTESTrm")>; + +def BWWriteResGroup82 : SchedWriteRes<[BWPort0,BWPort01,BWPort23]> { + let Latency = 7; + let NumMicroOps = 3; + let ResourceCycles = [1,1,1]; +} +def: InstRW<[BWWriteResGroup82], (instregex "FLDCW16m")>; + +def BWWriteResGroup83 : SchedWriteRes<[BWPort0,BWPort23,BWPort0156]> { + let Latency = 7; + let NumMicroOps = 3; + let ResourceCycles = [1,1,1]; +} +def: InstRW<[BWWriteResGroup83], (instregex "LDMXCSR")>; +def: InstRW<[BWWriteResGroup83], (instregex "VLDMXCSR")>; + +def BWWriteResGroup84 : SchedWriteRes<[BWPort6,BWPort23,BWPort0156]> { + let Latency = 7; + let NumMicroOps = 3; + let ResourceCycles = [1,1,1]; +} +def: InstRW<[BWWriteResGroup84], (instregex "LRETQ")>; +def: InstRW<[BWWriteResGroup84], (instregex "RETQ")>; + +def BWWriteResGroup85 : SchedWriteRes<[BWPort23,BWPort06,BWPort15]> { + let Latency = 7; + let NumMicroOps = 3; + let ResourceCycles = [1,1,1]; +} +def: InstRW<[BWWriteResGroup85], (instregex "BEXTR32rm")>; +def: InstRW<[BWWriteResGroup85], (instregex "BEXTR64rm")>; + +def BWWriteResGroup86 : SchedWriteRes<[BWPort23,BWPort06,BWPort0156]> { + let Latency = 7; + let NumMicroOps = 3; + let ResourceCycles = [1,1,1]; +} +def: InstRW<[BWWriteResGroup86], (instregex "CMOVA(16|32|64)rm")>; +def: InstRW<[BWWriteResGroup86], (instregex "CMOVBE(16|32|64)rm")>; + +def BWWriteResGroup87 : SchedWriteRes<[BWPort4,BWPort23,BWPort237,BWPort06]> { + let Latency = 7; + let NumMicroOps = 5; + let ResourceCycles = [1,1,1,2]; +} +def: InstRW<[BWWriteResGroup87], (instregex "ROL(16|32|64)m1")>; +def: InstRW<[BWWriteResGroup87], (instregex "ROL(16|32|64)mi")>; +def: InstRW<[BWWriteResGroup87], (instregex "ROL8m1")>; +def: InstRW<[BWWriteResGroup87], (instregex "ROL8mi")>; +def: InstRW<[BWWriteResGroup87], (instregex "ROR(16|32|64)m1")>; +def: InstRW<[BWWriteResGroup87], (instregex "ROR(16|32|64)mi")>; +def: InstRW<[BWWriteResGroup87], (instregex "ROR8m1")>; +def: InstRW<[BWWriteResGroup87], (instregex "ROR8mi")>; + +def BWWriteResGroup88 : SchedWriteRes<[BWPort4,BWPort23,BWPort237,BWPort0156]> { + let Latency = 7; + let NumMicroOps = 5; + let ResourceCycles = [1,1,1,2]; +} +def: InstRW<[BWWriteResGroup88], (instregex "XADD(16|32|64)rm")>; +def: InstRW<[BWWriteResGroup88], (instregex "XADD8rm")>; + +def BWWriteResGroup89 : SchedWriteRes<[BWPort4,BWPort6,BWPort23,BWPort237,BWPort0156]> { + let Latency = 7; + let NumMicroOps = 5; + let ResourceCycles = [1,1,1,1,1]; +} +def: InstRW<[BWWriteResGroup89], (instregex "CALL(16|32|64)m")>; +def: InstRW<[BWWriteResGroup89], (instregex "FARCALL64")>; + +def BWWriteResGroup90 : SchedWriteRes<[BWPort6,BWPort06,BWPort15,BWPort0156]> { + let Latency = 7; + let NumMicroOps = 7; + let ResourceCycles = [2,2,1,2]; +} +def: InstRW<[BWWriteResGroup90], (instregex "LOOP")>; + +def BWWriteResGroup91 : SchedWriteRes<[BWPort1,BWPort23]> { + let Latency = 8; + let NumMicroOps = 2; + let ResourceCycles = [1,1]; +} +def: InstRW<[BWWriteResGroup91], (instregex "ADDPDrm")>; +def: InstRW<[BWWriteResGroup91], (instregex "ADDPSrm")>; +def: InstRW<[BWWriteResGroup91], (instregex "ADDSDrm")>; +def: InstRW<[BWWriteResGroup91], (instregex "ADDSSrm")>; +def: InstRW<[BWWriteResGroup91], (instregex "ADDSUBPDrm")>; +def: InstRW<[BWWriteResGroup91], (instregex "ADDSUBPSrm")>; +def: InstRW<[BWWriteResGroup91], (instregex "BSF(16|32|64)rm")>; +def: InstRW<[BWWriteResGroup91], (instregex "BSR(16|32|64)rm")>; +def: InstRW<[BWWriteResGroup91], (instregex "CMPPDrmi")>; +def: InstRW<[BWWriteResGroup91], (instregex "CMPPSrmi")>; +def: InstRW<[BWWriteResGroup91], (instregex "CMPSSrm")>; +def: InstRW<[BWWriteResGroup91], (instregex "COMISDrm")>; +def: InstRW<[BWWriteResGroup91], (instregex "COMISSrm")>; +def: InstRW<[BWWriteResGroup91], (instregex "CVTDQ2PSrm")>; +def: InstRW<[BWWriteResGroup91], (instregex "CVTPS2DQrm")>; +def: InstRW<[BWWriteResGroup91], (instregex "CVTTPS2DQrm")>; +def: InstRW<[BWWriteResGroup91], (instregex "IMUL64m")>; +def: InstRW<[BWWriteResGroup91], (instregex "IMUL(32|64)rm(i8?)")>; +def: InstRW<[BWWriteResGroup91], (instregex "IMUL8m")>; +def: InstRW<[BWWriteResGroup91], (instregex "LZCNT(16|32|64)rm")>; +def: InstRW<[BWWriteResGroup91], (instregex "MAXPDrm")>; +def: InstRW<[BWWriteResGroup91], (instregex "MAXPSrm")>; +def: InstRW<[BWWriteResGroup91], (instregex "MAXSDrm")>; +def: InstRW<[BWWriteResGroup91], (instregex "MAXSSrm")>; +def: InstRW<[BWWriteResGroup91], (instregex "MINPDrm")>; +def: InstRW<[BWWriteResGroup91], (instregex "MINPSrm")>; +def: InstRW<[BWWriteResGroup91], (instregex "MINSDrm")>; +def: InstRW<[BWWriteResGroup91], (instregex "MINSSrm")>; +def: InstRW<[BWWriteResGroup91], (instregex "MMX_CVTPI2PSirm")>; +def: InstRW<[BWWriteResGroup91], (instregex "MMX_CVTPS2PIirm")>; +def: InstRW<[BWWriteResGroup91], (instregex "MMX_CVTTPS2PIirm")>; +def: InstRW<[BWWriteResGroup91], (instregex "MUL64m")>; +def: InstRW<[BWWriteResGroup91], (instregex "MUL8m")>; +def: InstRW<[BWWriteResGroup91], (instregex "PDEP32rm")>; +def: InstRW<[BWWriteResGroup91], (instregex "PDEP64rm")>; +def: InstRW<[BWWriteResGroup91], (instregex "PEXT32rm")>; +def: InstRW<[BWWriteResGroup91], (instregex "PEXT64rm")>; +def: InstRW<[BWWriteResGroup91], (instregex "POPCNT(16|32|64)rm")>; +def: InstRW<[BWWriteResGroup91], (instregex "SUBPDrm")>; +def: InstRW<[BWWriteResGroup91], (instregex "SUBPSrm")>; +def: InstRW<[BWWriteResGroup91], (instregex "SUBSDrm")>; +def: InstRW<[BWWriteResGroup91], (instregex "SUBSSrm")>; +def: InstRW<[BWWriteResGroup91], (instregex "TZCNT(16|32|64)rm")>; +def: InstRW<[BWWriteResGroup91], (instregex "UCOMISDrm")>; +def: InstRW<[BWWriteResGroup91], (instregex "UCOMISSrm")>; +def: InstRW<[BWWriteResGroup91], (instregex "VADDPDrm")>; +def: InstRW<[BWWriteResGroup91], (instregex "VADDPSrm")>; +def: InstRW<[BWWriteResGroup91], (instregex "VADDSDrm")>; +def: InstRW<[BWWriteResGroup91], (instregex "VADDSSrm")>; +def: InstRW<[BWWriteResGroup91], (instregex "VADDSUBPDrm")>; +def: InstRW<[BWWriteResGroup91], (instregex "VADDSUBPSrm")>; +def: InstRW<[BWWriteResGroup91], (instregex "VCMPPDrmi")>; +def: InstRW<[BWWriteResGroup91], (instregex "VCMPPSrmi")>; +def: InstRW<[BWWriteResGroup91], (instregex "VCMPSDrm")>; +def: InstRW<[BWWriteResGroup91], (instregex "VCMPSSrm")>; +def: InstRW<[BWWriteResGroup91], (instregex "VCOMISDrm")>; +def: InstRW<[BWWriteResGroup91], (instregex "VCOMISSrm")>; +def: InstRW<[BWWriteResGroup91], (instregex "VCVTDQ2PSrm")>; +def: InstRW<[BWWriteResGroup91], (instregex "VCVTPS2DQrm")>; +def: InstRW<[BWWriteResGroup91], (instregex "VCVTTPS2DQrm")>; +def: InstRW<[BWWriteResGroup91], (instregex "VMAXPDrm")>; +def: InstRW<[BWWriteResGroup91], (instregex "VMAXPSrm")>; +def: InstRW<[BWWriteResGroup91], (instregex "VMAXSDrm")>; +def: InstRW<[BWWriteResGroup91], (instregex "VMAXSSrm")>; +def: InstRW<[BWWriteResGroup91], (instregex "VMINPDrm")>; +def: InstRW<[BWWriteResGroup91], (instregex "VMINPSrm")>; +def: InstRW<[BWWriteResGroup91], (instregex "VMINSDrm")>; +def: InstRW<[BWWriteResGroup91], (instregex "VMINSSrm")>; +def: InstRW<[BWWriteResGroup91], (instregex "VSUBPDrm")>; +def: InstRW<[BWWriteResGroup91], (instregex "VSUBPSrm")>; +def: InstRW<[BWWriteResGroup91], (instregex "VSUBSDrm")>; +def: InstRW<[BWWriteResGroup91], (instregex "VSUBSSrm")>; +def: InstRW<[BWWriteResGroup91], (instregex "VUCOMISDrm")>; +def: InstRW<[BWWriteResGroup91], (instregex "VUCOMISSrm")>; + +def BWWriteResGroup91_16 : SchedWriteRes<[BWPort1, BWPort0156, BWPort23]> { + let Latency = 8; + let NumMicroOps = 3; + let ResourceCycles = [1,1,1]; +} +def: InstRW<[BWWriteResGroup91_16], (instregex "IMUL16rm(i8?)")>; + +def BWWriteResGroup91_16_2 : SchedWriteRes<[BWPort1, BWPort0156, BWPort23]> { + let Latency = 8; + let NumMicroOps = 5; +} +def: InstRW<[BWWriteResGroup91_16_2], (instregex "IMUL16m")>; +def: InstRW<[BWWriteResGroup91_16_2], (instregex "MUL16m")>; + +def BWWriteResGroup91_32 : SchedWriteRes<[BWPort1, BWPort0156, BWPort23]> { + let Latency = 8; + let NumMicroOps = 3; + let ResourceCycles = [1,1,1]; +} +def: InstRW<[BWWriteResGroup91_32], (instregex "IMUL32m")>; +def: InstRW<[BWWriteResGroup91_32], (instregex "MUL32m")>; + +def BWWriteResGroup92 : SchedWriteRes<[BWPort5,BWPort23]> { + let Latency = 8; + let NumMicroOps = 2; + let ResourceCycles = [1,1]; +} +def: InstRW<[BWWriteResGroup92], (instregex "VPMOVSXBDYrm")>; +def: InstRW<[BWWriteResGroup92], (instregex "VPMOVSXBQYrm")>; +def: InstRW<[BWWriteResGroup92], (instregex "VPMOVSXBWYrm")>; +def: InstRW<[BWWriteResGroup92], (instregex "VPMOVSXDQYrm")>; +def: InstRW<[BWWriteResGroup92], (instregex "VPMOVSXWDYrm")>; +def: InstRW<[BWWriteResGroup92], (instregex "VPMOVSXWQYrm")>; +def: InstRW<[BWWriteResGroup92], (instregex "VPMOVZXWDYrm")>; + +def BWWriteResGroup93 : SchedWriteRes<[BWPort01,BWPort23]> { + let Latency = 8; + let NumMicroOps = 2; + let ResourceCycles = [1,1]; +} +def: InstRW<[BWWriteResGroup93], (instregex "MULPDrm")>; +def: InstRW<[BWWriteResGroup93], (instregex "MULPSrm")>; +def: InstRW<[BWWriteResGroup93], (instregex "MULSDrm")>; +def: InstRW<[BWWriteResGroup93], (instregex "MULSSrm")>; +def: InstRW<[BWWriteResGroup93], (instregex "VMULPDrm")>; +def: InstRW<[BWWriteResGroup93], (instregex "VMULPSrm")>; +def: InstRW<[BWWriteResGroup93], (instregex "VMULSDrm")>; +def: InstRW<[BWWriteResGroup93], (instregex "VMULSSrm")>; + +def BWWriteResGroup94 : SchedWriteRes<[BWPort5,BWPort23]> { + let Latency = 8; + let NumMicroOps = 3; + let ResourceCycles = [2,1]; +} +def: InstRW<[BWWriteResGroup94], (instregex "VBLENDVPDYrm")>; +def: InstRW<[BWWriteResGroup94], (instregex "VBLENDVPSYrm")>; +def: InstRW<[BWWriteResGroup94], (instregex "VMASKMOVPDYrm")>; +def: InstRW<[BWWriteResGroup94], (instregex "VMASKMOVPSYrm")>; +def: InstRW<[BWWriteResGroup94], (instregex "VPBLENDVBYrm")>; +def: InstRW<[BWWriteResGroup94], (instregex "VPMASKMOVDYrm")>; +def: InstRW<[BWWriteResGroup94], (instregex "VPMASKMOVQYrm")>; + +def BWWriteResGroup95 : SchedWriteRes<[BWPort0,BWPort5,BWPort23]> { + let Latency = 8; + let NumMicroOps = 4; + let ResourceCycles = [2,1,1]; +} +def: InstRW<[BWWriteResGroup95], (instregex "VPSLLVDrm")>; +def: InstRW<[BWWriteResGroup95], (instregex "VPSRAVDrm")>; +def: InstRW<[BWWriteResGroup95], (instregex "VPSRLVDrm")>; + +def BWWriteResGroup96 : SchedWriteRes<[BWPort5,BWPort23,BWPort15]> { + let Latency = 8; + let NumMicroOps = 4; + let ResourceCycles = [2,1,1]; +} +def: InstRW<[BWWriteResGroup96], (instregex "MMX_PHADDSWrm64")>; +def: InstRW<[BWWriteResGroup96], (instregex "MMX_PHADDWrm64")>; +def: InstRW<[BWWriteResGroup96], (instregex "MMX_PHADDrm64")>; +def: InstRW<[BWWriteResGroup96], (instregex "MMX_PHSUBDrm64")>; +def: InstRW<[BWWriteResGroup96], (instregex "MMX_PHSUBSWrm64")>; +def: InstRW<[BWWriteResGroup96], (instregex "MMX_PHSUBWrm64")>; +def: InstRW<[BWWriteResGroup96], (instregex "PHADDDrm")>; +def: InstRW<[BWWriteResGroup96], (instregex "PHADDSWrm128")>; +def: InstRW<[BWWriteResGroup96], (instregex "PHADDWrm")>; +def: InstRW<[BWWriteResGroup96], (instregex "PHSUBDrm")>; +def: InstRW<[BWWriteResGroup96], (instregex "PHSUBSWrm128")>; +def: InstRW<[BWWriteResGroup96], (instregex "PHSUBWrm")>; +def: InstRW<[BWWriteResGroup96], (instregex "VPHADDDrm")>; +def: InstRW<[BWWriteResGroup96], (instregex "VPHADDSWrm128")>; +def: InstRW<[BWWriteResGroup96], (instregex "VPHADDWrm")>; +def: InstRW<[BWWriteResGroup96], (instregex "VPHSUBDrm")>; +def: InstRW<[BWWriteResGroup96], (instregex "VPHSUBSWrm128")>; +def: InstRW<[BWWriteResGroup96], (instregex "VPHSUBWrm")>; + +def BWWriteResGroup97 : SchedWriteRes<[BWPort23,BWPort237,BWPort06,BWPort0156]> { + let Latency = 8; + let NumMicroOps = 5; + let ResourceCycles = [1,1,1,2]; +} +def: InstRW<[BWWriteResGroup97], (instregex "RCL(16|32|64)m1")>; +def: InstRW<[BWWriteResGroup97], (instregex "RCL(16|32|64)mi")>; +def: InstRW<[BWWriteResGroup97], (instregex "RCL8m1")>; +def: InstRW<[BWWriteResGroup97], (instregex "RCL8mi")>; +def: InstRW<[BWWriteResGroup97], (instregex "RCR(16|32|64)m1")>; +def: InstRW<[BWWriteResGroup97], (instregex "RCR(16|32|64)mi")>; +def: InstRW<[BWWriteResGroup97], (instregex "RCR8m1")>; +def: InstRW<[BWWriteResGroup97], (instregex "RCR8mi")>; + +def BWWriteResGroup98 : SchedWriteRes<[BWPort23,BWPort237,BWPort06,BWPort0156]> { + let Latency = 8; + let NumMicroOps = 5; + let ResourceCycles = [1,1,2,1]; +} +def: InstRW<[BWWriteResGroup98], (instregex "ROR(16|32|64)mCL")>; +def: InstRW<[BWWriteResGroup98], (instregex "ROR8mCL")>; + +def BWWriteResGroup99 : SchedWriteRes<[BWPort4,BWPort23,BWPort237,BWPort0156]> { + let Latency = 8; + let NumMicroOps = 6; + let ResourceCycles = [1,1,1,3]; +} +def: InstRW<[BWWriteResGroup99], (instregex "ADC(16|32|64)mi8")>; +def: InstRW<[BWWriteResGroup99], (instregex "ADC8mi")>; +def: InstRW<[BWWriteResGroup99], (instregex "ADD8mi")>; +def: InstRW<[BWWriteResGroup99], (instregex "AND8mi")>; +def: InstRW<[BWWriteResGroup99], (instregex "OR8mi")>; +def: InstRW<[BWWriteResGroup99], (instregex "SUB8mi")>; +def: InstRW<[BWWriteResGroup99], (instregex "XCHG(16|32|64)rm")>; +def: InstRW<[BWWriteResGroup99], (instregex "XCHG8rm")>; +def: InstRW<[BWWriteResGroup99], (instregex "XOR8mi")>; + +def BWWriteResGroup100 : SchedWriteRes<[BWPort4,BWPort23,BWPort237,BWPort06,BWPort0156]> { + let Latency = 8; + let NumMicroOps = 6; + let ResourceCycles = [1,1,1,2,1]; +} +def: InstRW<[BWWriteResGroup100], (instregex "ADC(16|32|64)mr")>; +def: InstRW<[BWWriteResGroup100], (instregex "ADC8mr")>; +def: InstRW<[BWWriteResGroup100], (instregex "CMPXCHG(16|32|64)rm")>; +def: InstRW<[BWWriteResGroup100], (instregex "CMPXCHG8rm")>; +def: InstRW<[BWWriteResGroup100], (instregex "ROL(16|32|64)mCL")>; +def: InstRW<[BWWriteResGroup100], (instregex "ROL8mCL")>; +def: InstRW<[BWWriteResGroup100], (instregex "SAR(16|32|64)mCL")>; +def: InstRW<[BWWriteResGroup100], (instregex "SAR8mCL")>; +def: InstRW<[BWWriteResGroup100], (instregex "SBB(16|32|64)mi8")>; +def: InstRW<[BWWriteResGroup100], (instregex "SBB(16|32|64)mr")>; +def: InstRW<[BWWriteResGroup100], (instregex "SBB8mi")>; +def: InstRW<[BWWriteResGroup100], (instregex "SBB8mr")>; +def: InstRW<[BWWriteResGroup100], (instregex "SHL(16|32|64)mCL")>; +def: InstRW<[BWWriteResGroup100], (instregex "SHL8mCL")>; +def: InstRW<[BWWriteResGroup100], (instregex "SHR(16|32|64)mCL")>; +def: InstRW<[BWWriteResGroup100], (instregex "SHR8mCL")>; + +def BWWriteResGroup101 : SchedWriteRes<[BWPort1,BWPort23]> { + let Latency = 9; + let NumMicroOps = 2; + let ResourceCycles = [1,1]; +} +def: InstRW<[BWWriteResGroup101], (instregex "ADD_F32m")>; +def: InstRW<[BWWriteResGroup101], (instregex "ADD_F64m")>; +def: InstRW<[BWWriteResGroup101], (instregex "ILD_F16m")>; +def: InstRW<[BWWriteResGroup101], (instregex "ILD_F32m")>; +def: InstRW<[BWWriteResGroup101], (instregex "ILD_F64m")>; +def: InstRW<[BWWriteResGroup101], (instregex "SUBR_F32m")>; +def: InstRW<[BWWriteResGroup101], (instregex "SUBR_F64m")>; +def: InstRW<[BWWriteResGroup101], (instregex "SUB_F32m")>; +def: InstRW<[BWWriteResGroup101], (instregex "SUB_F64m")>; +def: InstRW<[BWWriteResGroup101], (instregex "VADDPDYrm")>; +def: InstRW<[BWWriteResGroup101], (instregex "VADDPSYrm")>; +def: InstRW<[BWWriteResGroup101], (instregex "VADDSUBPDYrm")>; +def: InstRW<[BWWriteResGroup101], (instregex "VADDSUBPSYrm")>; +def: InstRW<[BWWriteResGroup101], (instregex "VCMPPDYrmi")>; +def: InstRW<[BWWriteResGroup101], (instregex "VCMPPSYrmi")>; +def: InstRW<[BWWriteResGroup101], (instregex "VCVTDQ2PSYrm")>; +def: InstRW<[BWWriteResGroup101], (instregex "VCVTPS2DQYrm")>; +def: InstRW<[BWWriteResGroup101], (instregex "VCVTTPS2DQYrm")>; +def: InstRW<[BWWriteResGroup101], (instregex "VMAXPDYrm")>; +def: InstRW<[BWWriteResGroup101], (instregex "VMAXPSYrm")>; +def: InstRW<[BWWriteResGroup101], (instregex "VMINPDYrm")>; +def: InstRW<[BWWriteResGroup101], (instregex "VMINPSYrm")>; +def: InstRW<[BWWriteResGroup101], (instregex "VSUBPDYrm")>; +def: InstRW<[BWWriteResGroup101], (instregex "VSUBPSYrm")>; + +def BWWriteResGroup102 : SchedWriteRes<[BWPort5,BWPort23]> { + let Latency = 9; + let NumMicroOps = 2; + let ResourceCycles = [1,1]; +} +def: InstRW<[BWWriteResGroup102], (instregex "VPERM2F128rm")>; +def: InstRW<[BWWriteResGroup102], (instregex "VPERM2I128rm")>; +def: InstRW<[BWWriteResGroup102], (instregex "VPERMDYrm")>; +def: InstRW<[BWWriteResGroup102], (instregex "VPERMPDYmi")>; +def: InstRW<[BWWriteResGroup102], (instregex "VPERMPSYrm")>; +def: InstRW<[BWWriteResGroup102], (instregex "VPERMQYmi")>; +def: InstRW<[BWWriteResGroup102], (instregex "VPMOVZXBDYrm")>; +def: InstRW<[BWWriteResGroup102], (instregex "VPMOVZXBQYrm")>; +def: InstRW<[BWWriteResGroup102], (instregex "VPMOVZXBWYrm")>; +def: InstRW<[BWWriteResGroup102], (instregex "VPMOVZXDQYrm")>; +def: InstRW<[BWWriteResGroup102], (instregex "VPMOVZXWQYrm")>; + +def BWWriteResGroup103 : SchedWriteRes<[BWPort01,BWPort23]> { + let Latency = 9; + let NumMicroOps = 2; + let ResourceCycles = [1,1]; +} +def: InstRW<[BWWriteResGroup103], (instregex "VMULPDYrm")>; +def: InstRW<[BWWriteResGroup103], (instregex "VMULPSYrm")>; + +def BWWriteResGroup104 : SchedWriteRes<[BWPort0,BWPort1,BWPort5]> { + let Latency = 9; + let NumMicroOps = 3; + let ResourceCycles = [1,1,1]; +} +def: InstRW<[BWWriteResGroup104], (instregex "DPPDrri")>; +def: InstRW<[BWWriteResGroup104], (instregex "VDPPDrri")>; + +def BWWriteResGroup105 : SchedWriteRes<[BWPort0,BWPort1,BWPort23]> { + let Latency = 9; + let NumMicroOps = 3; + let ResourceCycles = [1,1,1]; +} +def: InstRW<[BWWriteResGroup105], (instregex "CVTSD2SI64rm")>; +def: InstRW<[BWWriteResGroup105], (instregex "CVTSD2SIrm")>; +def: InstRW<[BWWriteResGroup105], (instregex "CVTSS2SI64rm")>; +def: InstRW<[BWWriteResGroup105], (instregex "CVTSS2SIrm")>; +def: InstRW<[BWWriteResGroup105], (instregex "CVTTSD2SI64rm")>; +def: InstRW<[BWWriteResGroup105], (instregex "CVTTSD2SIrm")>; +def: InstRW<[BWWriteResGroup105], (instregex "CVTTSS2SIrm")>; +def: InstRW<[BWWriteResGroup105], (instregex "VCVTSD2SI64rm")>; +def: InstRW<[BWWriteResGroup105], (instregex "VCVTSD2SIrm")>; +def: InstRW<[BWWriteResGroup105], (instregex "VCVTSS2SI64rm")>; +def: InstRW<[BWWriteResGroup105], (instregex "VCVTSS2SIrm")>; +def: InstRW<[BWWriteResGroup105], (instregex "VCVTTSD2SI64rm")>; +def: InstRW<[BWWriteResGroup105], (instregex "VCVTTSD2SIrm")>; +def: InstRW<[BWWriteResGroup105], (instregex "VCVTTSS2SI64rm")>; +def: InstRW<[BWWriteResGroup105], (instregex "VCVTTSS2SIrm")>; + +def BWWriteResGroup106 : SchedWriteRes<[BWPort0,BWPort5,BWPort23]> { + let Latency = 9; + let NumMicroOps = 3; + let ResourceCycles = [1,1,1]; +} +def: InstRW<[BWWriteResGroup106], (instregex "VCVTPS2PDYrm")>; + +def BWWriteResGroup107 : SchedWriteRes<[BWPort1,BWPort5,BWPort23]> { + let Latency = 9; + let NumMicroOps = 3; + let ResourceCycles = [1,1,1]; +} +def: InstRW<[BWWriteResGroup107], (instregex "CVTDQ2PDrm")>; +def: InstRW<[BWWriteResGroup107], (instregex "CVTPD2DQrm")>; +def: InstRW<[BWWriteResGroup107], (instregex "CVTPD2PSrm")>; +def: InstRW<[BWWriteResGroup107], (instregex "CVTSD2SSrm")>; +def: InstRW<[BWWriteResGroup107], (instregex "CVTTPD2DQrm")>; +def: InstRW<[BWWriteResGroup107], (instregex "MMX_CVTPD2PIirm")>; +def: InstRW<[BWWriteResGroup107], (instregex "MMX_CVTPI2PDirm")>; +def: InstRW<[BWWriteResGroup107], (instregex "MMX_CVTTPD2PIirm")>; +def: InstRW<[BWWriteResGroup107], (instregex "MULX64rm")>; +def: InstRW<[BWWriteResGroup107], (instregex "VCVTDQ2PDrm")>; +def: InstRW<[BWWriteResGroup107], (instregex "VCVTSD2SSrm")>; + +def BWWriteResGroup108 : SchedWriteRes<[BWPort5,BWPort23,BWPort015]> { + let Latency = 9; + let NumMicroOps = 3; + let ResourceCycles = [1,1,1]; +} +def: InstRW<[BWWriteResGroup108], (instregex "VPBROADCASTBYrm")>; +def: InstRW<[BWWriteResGroup108], (instregex "VPBROADCASTBrm")>; +def: InstRW<[BWWriteResGroup108], (instregex "VPBROADCASTWYrm")>; +def: InstRW<[BWWriteResGroup108], (instregex "VPBROADCASTWrm")>; + +def BWWriteResGroup109 : SchedWriteRes<[BWPort0,BWPort5,BWPort23]> { + let Latency = 9; + let NumMicroOps = 4; + let ResourceCycles = [2,1,1]; +} +def: InstRW<[BWWriteResGroup109], (instregex "VPSLLVDYrm")>; +def: InstRW<[BWWriteResGroup109], (instregex "VPSRAVDYrm")>; +def: InstRW<[BWWriteResGroup109], (instregex "VPSRLVDYrm")>; + +def BWWriteResGroup110 : SchedWriteRes<[BWPort5,BWPort23,BWPort15]> { + let Latency = 9; + let NumMicroOps = 4; + let ResourceCycles = [2,1,1]; +} +def: InstRW<[BWWriteResGroup110], (instregex "VPHADDDYrm")>; +def: InstRW<[BWWriteResGroup110], (instregex "VPHADDSWrm256")>; +def: InstRW<[BWWriteResGroup110], (instregex "VPHADDWYrm")>; +def: InstRW<[BWWriteResGroup110], (instregex "VPHSUBDYrm")>; +def: InstRW<[BWWriteResGroup110], (instregex "VPHSUBSWrm256")>; +def: InstRW<[BWWriteResGroup110], (instregex "VPHSUBWYrm")>; + +def BWWriteResGroup111 : SchedWriteRes<[BWPort1,BWPort23,BWPort237,BWPort0156]> { + let Latency = 9; + let NumMicroOps = 4; + let ResourceCycles = [1,1,1,1]; +} +def: InstRW<[BWWriteResGroup111], (instregex "SHLD(16|32|64)mri8")>; +def: InstRW<[BWWriteResGroup111], (instregex "SHRD(16|32|64)mri8")>; + +def BWWriteResGroup112 : SchedWriteRes<[BWPort23,BWPort06,BWPort0156]> { + let Latency = 9; + let NumMicroOps = 5; + let ResourceCycles = [1,1,3]; +} +def: InstRW<[BWWriteResGroup112], (instregex "RDRAND(16|32|64)r")>; + +def BWWriteResGroup113 : SchedWriteRes<[BWPort1,BWPort6,BWPort23,BWPort0156]> { + let Latency = 9; + let NumMicroOps = 5; + let ResourceCycles = [1,2,1,1]; +} +def: InstRW<[BWWriteResGroup113], (instregex "LAR(16|32|64)rm")>; +def: InstRW<[BWWriteResGroup113], (instregex "LSL(16|32|64)rm")>; + +def BWWriteResGroup114 : SchedWriteRes<[BWPort0]> { + let Latency = 10; + let NumMicroOps = 2; + let ResourceCycles = [2]; +} +def: InstRW<[BWWriteResGroup114], (instregex "PMULLDrr")>; +def: InstRW<[BWWriteResGroup114], (instregex "VPMULLDYrr")>; +def: InstRW<[BWWriteResGroup114], (instregex "VPMULLDrr")>; + +def BWWriteResGroup115 : SchedWriteRes<[BWPort0,BWPort23]> { + let Latency = 10; + let NumMicroOps = 2; + let ResourceCycles = [1,1]; +} +def: InstRW<[BWWriteResGroup115], (instregex "MMX_PMADDUBSWrm64")>; +def: InstRW<[BWWriteResGroup115], (instregex "MMX_PMADDWDirm")>; +def: InstRW<[BWWriteResGroup115], (instregex "MMX_PMULHRSWrm64")>; +def: InstRW<[BWWriteResGroup115], (instregex "MMX_PMULHUWirm")>; +def: InstRW<[BWWriteResGroup115], (instregex "MMX_PMULHWirm")>; +def: InstRW<[BWWriteResGroup115], (instregex "MMX_PMULLWirm")>; +def: InstRW<[BWWriteResGroup115], (instregex "MMX_PMULUDQirm")>; +def: InstRW<[BWWriteResGroup115], (instregex "MMX_PSADBWirm")>; +def: InstRW<[BWWriteResGroup115], (instregex "PCLMULQDQrm")>; +def: InstRW<[BWWriteResGroup115], (instregex "PCMPGTQrm")>; +def: InstRW<[BWWriteResGroup115], (instregex "PHMINPOSUWrm128")>; +def: InstRW<[BWWriteResGroup115], (instregex "PMADDUBSWrm")>; +def: InstRW<[BWWriteResGroup115], (instregex "PMADDWDrm")>; +def: InstRW<[BWWriteResGroup115], (instregex "PMULDQrm")>; +def: InstRW<[BWWriteResGroup115], (instregex "PMULHRSWrm")>; +def: InstRW<[BWWriteResGroup115], (instregex "PMULHUWrm")>; +def: InstRW<[BWWriteResGroup115], (instregex "PMULHWrm")>; +def: InstRW<[BWWriteResGroup115], (instregex "PMULLWrm")>; +def: InstRW<[BWWriteResGroup115], (instregex "PMULUDQrm")>; +def: InstRW<[BWWriteResGroup115], (instregex "PSADBWrm")>; +def: InstRW<[BWWriteResGroup115], (instregex "RCPPSm")>; +def: InstRW<[BWWriteResGroup115], (instregex "RCPSSm")>; +def: InstRW<[BWWriteResGroup115], (instregex "RSQRTPSm")>; +def: InstRW<[BWWriteResGroup115], (instregex "RSQRTSSm")>; +def: InstRW<[BWWriteResGroup115], (instregex "VPCLMULQDQrm")>; +def: InstRW<[BWWriteResGroup115], (instregex "VPCMPGTQrm")>; +def: InstRW<[BWWriteResGroup115], (instregex "VPHMINPOSUWrm128")>; +def: InstRW<[BWWriteResGroup115], (instregex "VPMADDUBSWrm")>; +def: InstRW<[BWWriteResGroup115], (instregex "VPMADDWDrm")>; +def: InstRW<[BWWriteResGroup115], (instregex "VPMULDQrm")>; +def: InstRW<[BWWriteResGroup115], (instregex "VPMULHRSWrm")>; +def: InstRW<[BWWriteResGroup115], (instregex "VPMULHUWrm")>; +def: InstRW<[BWWriteResGroup115], (instregex "VPMULHWrm")>; +def: InstRW<[BWWriteResGroup115], (instregex "VPMULLWrm")>; +def: InstRW<[BWWriteResGroup115], (instregex "VPMULUDQrm")>; +def: InstRW<[BWWriteResGroup115], (instregex "VPSADBWrm")>; +def: InstRW<[BWWriteResGroup115], (instregex "VRCPPSm")>; +def: InstRW<[BWWriteResGroup115], (instregex "VRCPSSm")>; +def: InstRW<[BWWriteResGroup115], (instregex "VRSQRTPSm")>; +def: InstRW<[BWWriteResGroup115], (instregex "VRSQRTSSm")>; + +def BWWriteResGroup116 : SchedWriteRes<[BWPort01,BWPort23]> { + let Latency = 10; + let NumMicroOps = 2; + let ResourceCycles = [1,1]; +} +def: InstRW<[BWWriteResGroup116], (instregex "VFMADD132PDm")>; +def: InstRW<[BWWriteResGroup116], (instregex "VFMADD132PSm")>; +def: InstRW<[BWWriteResGroup116], (instregex "VFMADD132SDm")>; +def: InstRW<[BWWriteResGroup116], (instregex "VFMADD132SSm")>; +def: InstRW<[BWWriteResGroup116], (instregex "VFMADD213PDm")>; +def: InstRW<[BWWriteResGroup116], (instregex "VFMADD213PSm")>; +def: InstRW<[BWWriteResGroup116], (instregex "VFMADD213SDm")>; +def: InstRW<[BWWriteResGroup116], (instregex "VFMADD213SSm")>; +def: InstRW<[BWWriteResGroup116], (instregex "VFMADD231PDm")>; +def: InstRW<[BWWriteResGroup116], (instregex "VFMADD231PSm")>; +def: InstRW<[BWWriteResGroup116], (instregex "VFMADD231SDm")>; +def: InstRW<[BWWriteResGroup116], (instregex "VFMADD231SSm")>; +def: InstRW<[BWWriteResGroup116], (instregex "VFMADDSUB132PDm")>; +def: InstRW<[BWWriteResGroup116], (instregex "VFMADDSUB132PSm")>; +def: InstRW<[BWWriteResGroup116], (instregex "VFMADDSUB213PDm")>; +def: InstRW<[BWWriteResGroup116], (instregex "VFMADDSUB213PSm")>; +def: InstRW<[BWWriteResGroup116], (instregex "VFMADDSUB231PDm")>; +def: InstRW<[BWWriteResGroup116], (instregex "VFMADDSUB231PSm")>; +def: InstRW<[BWWriteResGroup116], (instregex "VFMSUB132PDm")>; +def: InstRW<[BWWriteResGroup116], (instregex "VFMSUB132PSm")>; +def: InstRW<[BWWriteResGroup116], (instregex "VFMSUB132SDm")>; +def: InstRW<[BWWriteResGroup116], (instregex "VFMSUB132SSm")>; +def: InstRW<[BWWriteResGroup116], (instregex "VFMSUB213PDm")>; +def: InstRW<[BWWriteResGroup116], (instregex "VFMSUB213PSm")>; +def: InstRW<[BWWriteResGroup116], (instregex "VFMSUB213SDm")>; +def: InstRW<[BWWriteResGroup116], (instregex "VFMSUB213SSm")>; +def: InstRW<[BWWriteResGroup116], (instregex "VFMSUB231PDm")>; +def: InstRW<[BWWriteResGroup116], (instregex "VFMSUB231PSm")>; +def: InstRW<[BWWriteResGroup116], (instregex "VFMSUB231SDm")>; +def: InstRW<[BWWriteResGroup116], (instregex "VFMSUB231SSm")>; +def: InstRW<[BWWriteResGroup116], (instregex "VFMSUBADD132PDm")>; +def: InstRW<[BWWriteResGroup116], (instregex "VFMSUBADD132PSm")>; +def: InstRW<[BWWriteResGroup116], (instregex "VFMSUBADD213PDm")>; +def: InstRW<[BWWriteResGroup116], (instregex "VFMSUBADD213PSm")>; +def: InstRW<[BWWriteResGroup116], (instregex "VFMSUBADD231PDm")>; +def: InstRW<[BWWriteResGroup116], (instregex "VFMSUBADD231PSm")>; +def: InstRW<[BWWriteResGroup116], (instregex "VFNMADD132PDm")>; +def: InstRW<[BWWriteResGroup116], (instregex "VFNMADD132PSm")>; +def: InstRW<[BWWriteResGroup116], (instregex "VFNMADD132SDm")>; +def: InstRW<[BWWriteResGroup116], (instregex "VFNMADD132SSm")>; +def: InstRW<[BWWriteResGroup116], (instregex "VFNMADD213PDm")>; +def: InstRW<[BWWriteResGroup116], (instregex "VFNMADD213PSm")>; +def: InstRW<[BWWriteResGroup116], (instregex "VFNMADD213SDm")>; +def: InstRW<[BWWriteResGroup116], (instregex "VFNMADD213SSm")>; +def: InstRW<[BWWriteResGroup116], (instregex "VFNMADD231PDm")>; +def: InstRW<[BWWriteResGroup116], (instregex "VFNMADD231PSm")>; +def: InstRW<[BWWriteResGroup116], (instregex "VFNMADD231SDm")>; +def: InstRW<[BWWriteResGroup116], (instregex "VFNMADD231SSm")>; +def: InstRW<[BWWriteResGroup116], (instregex "VFNMSUB132PDm")>; +def: InstRW<[BWWriteResGroup116], (instregex "VFNMSUB132PSm")>; +def: InstRW<[BWWriteResGroup116], (instregex "VFNMSUB132SDm")>; +def: InstRW<[BWWriteResGroup116], (instregex "VFNMSUB132SSm")>; +def: InstRW<[BWWriteResGroup116], (instregex "VFNMSUB213PDm")>; +def: InstRW<[BWWriteResGroup116], (instregex "VFNMSUB213PSm")>; +def: InstRW<[BWWriteResGroup116], (instregex "VFNMSUB213SDm")>; +def: InstRW<[BWWriteResGroup116], (instregex "VFNMSUB213SSm")>; +def: InstRW<[BWWriteResGroup116], (instregex "VFNMSUB231PDm")>; +def: InstRW<[BWWriteResGroup116], (instregex "VFNMSUB231PSm")>; +def: InstRW<[BWWriteResGroup116], (instregex "VFNMSUB231SDm")>; +def: InstRW<[BWWriteResGroup116], (instregex "VFNMSUB231SSm")>; + +def BWWriteResGroup117 : SchedWriteRes<[BWPort1,BWPort23]> { + let Latency = 10; + let NumMicroOps = 3; + let ResourceCycles = [2,1]; +} +def: InstRW<[BWWriteResGroup117], (instregex "FICOM16m")>; +def: InstRW<[BWWriteResGroup117], (instregex "FICOM32m")>; +def: InstRW<[BWWriteResGroup117], (instregex "FICOMP16m")>; +def: InstRW<[BWWriteResGroup117], (instregex "FICOMP32m")>; + +def BWWriteResGroup118 : SchedWriteRes<[BWPort0,BWPort5,BWPort23]> { + let Latency = 10; + let NumMicroOps = 3; + let ResourceCycles = [1,1,1]; +} +def: InstRW<[BWWriteResGroup118], (instregex "VPTESTYrm")>; + +def BWWriteResGroup119 : SchedWriteRes<[BWPort1,BWPort5,BWPort23]> { + let Latency = 10; + let NumMicroOps = 4; + let ResourceCycles = [1,2,1]; +} +def: InstRW<[BWWriteResGroup119], (instregex "HADDPDrm")>; +def: InstRW<[BWWriteResGroup119], (instregex "HADDPSrm")>; +def: InstRW<[BWWriteResGroup119], (instregex "HSUBPDrm")>; +def: InstRW<[BWWriteResGroup119], (instregex "HSUBPSrm")>; +def: InstRW<[BWWriteResGroup119], (instregex "VHADDPDrm")>; +def: InstRW<[BWWriteResGroup119], (instregex "VHADDPSrm")>; +def: InstRW<[BWWriteResGroup119], (instregex "VHSUBPDrm")>; +def: InstRW<[BWWriteResGroup119], (instregex "VHSUBPSrm")>; + +def BWWriteResGroup120 : SchedWriteRes<[BWPort0,BWPort1,BWPort5,BWPort23]> { + let Latency = 10; + let NumMicroOps = 4; + let ResourceCycles = [1,1,1,1]; +} +def: InstRW<[BWWriteResGroup120], (instregex "CVTTSS2SI64rm")>; + +def BWWriteResGroup121 : SchedWriteRes<[BWPort1,BWPort23,BWPort06,BWPort0156]> { + let Latency = 10; + let NumMicroOps = 4; + let ResourceCycles = [1,1,1,1]; +} +def: InstRW<[BWWriteResGroup121], (instregex "MULX32rm")>; + +def BWWriteResGroup122 : SchedWriteRes<[BWPort0]> { + let Latency = 11; + let NumMicroOps = 1; + let ResourceCycles = [1]; +} +def: InstRW<[BWWriteResGroup122], (instregex "DIVPSrr")>; +def: InstRW<[BWWriteResGroup122], (instregex "DIVSSrr")>; +def: InstRW<[BWWriteResGroup122], (instregex "VDIVPSrr")>; +def: InstRW<[BWWriteResGroup122], (instregex "VDIVSSrr")>; + +def BWWriteResGroup123 : SchedWriteRes<[BWPort0,BWPort23]> { + let Latency = 11; + let NumMicroOps = 2; + let ResourceCycles = [1,1]; +} +def: InstRW<[BWWriteResGroup123], (instregex "MUL_F32m")>; +def: InstRW<[BWWriteResGroup123], (instregex "MUL_F64m")>; +def: InstRW<[BWWriteResGroup123], (instregex "VPCMPGTQYrm")>; +def: InstRW<[BWWriteResGroup123], (instregex "VPMADDUBSWYrm")>; +def: InstRW<[BWWriteResGroup123], (instregex "VPMADDWDYrm")>; +def: InstRW<[BWWriteResGroup123], (instregex "VPMULDQYrm")>; +def: InstRW<[BWWriteResGroup123], (instregex "VPMULHRSWYrm")>; +def: InstRW<[BWWriteResGroup123], (instregex "VPMULHUWYrm")>; +def: InstRW<[BWWriteResGroup123], (instregex "VPMULHWYrm")>; +def: InstRW<[BWWriteResGroup123], (instregex "VPMULLWYrm")>; +def: InstRW<[BWWriteResGroup123], (instregex "VPMULUDQYrm")>; +def: InstRW<[BWWriteResGroup123], (instregex "VPSADBWYrm")>; + +def BWWriteResGroup124 : SchedWriteRes<[BWPort01,BWPort23]> { + let Latency = 11; + let NumMicroOps = 2; + let ResourceCycles = [1,1]; +} +def: InstRW<[BWWriteResGroup124], (instregex "VFMADD132PDYm")>; +def: InstRW<[BWWriteResGroup124], (instregex "VFMADD132PSYm")>; +def: InstRW<[BWWriteResGroup124], (instregex "VFMADD213PDYm")>; +def: InstRW<[BWWriteResGroup124], (instregex "VFMADD213PSYm")>; +def: InstRW<[BWWriteResGroup124], (instregex "VFMADD231PDYm")>; +def: InstRW<[BWWriteResGroup124], (instregex "VFMADD231PSYm")>; +def: InstRW<[BWWriteResGroup124], (instregex "VFMADDSUB132PDYm")>; +def: InstRW<[BWWriteResGroup124], (instregex "VFMADDSUB132PSYm")>; +def: InstRW<[BWWriteResGroup124], (instregex "VFMADDSUB213PDYm")>; +def: InstRW<[BWWriteResGroup124], (instregex "VFMADDSUB213PSYm")>; +def: InstRW<[BWWriteResGroup124], (instregex "VFMADDSUB231PDYm")>; +def: InstRW<[BWWriteResGroup124], (instregex "VFMADDSUB231PSYm")>; +def: InstRW<[BWWriteResGroup124], (instregex "VFMSUB132PDYm")>; +def: InstRW<[BWWriteResGroup124], (instregex "VFMSUB132PSYm")>; +def: InstRW<[BWWriteResGroup124], (instregex "VFMSUB213PDYm")>; +def: InstRW<[BWWriteResGroup124], (instregex "VFMSUB213PSYm")>; +def: InstRW<[BWWriteResGroup124], (instregex "VFMSUB231PDYm")>; +def: InstRW<[BWWriteResGroup124], (instregex "VFMSUB231PSYm")>; +def: InstRW<[BWWriteResGroup124], (instregex "VFMSUBADD132PDYm")>; +def: InstRW<[BWWriteResGroup124], (instregex "VFMSUBADD132PSYm")>; +def: InstRW<[BWWriteResGroup124], (instregex "VFMSUBADD213PDYm")>; +def: InstRW<[BWWriteResGroup124], (instregex "VFMSUBADD213PSYm")>; +def: InstRW<[BWWriteResGroup124], (instregex "VFMSUBADD231PDYm")>; +def: InstRW<[BWWriteResGroup124], (instregex "VFMSUBADD231PSYm")>; +def: InstRW<[BWWriteResGroup124], (instregex "VFNMADD132PDYm")>; +def: InstRW<[BWWriteResGroup124], (instregex "VFNMADD132PSYm")>; +def: InstRW<[BWWriteResGroup124], (instregex "VFNMADD213PDYm")>; +def: InstRW<[BWWriteResGroup124], (instregex "VFNMADD213PSYm")>; +def: InstRW<[BWWriteResGroup124], (instregex "VFNMADD231PDYm")>; +def: InstRW<[BWWriteResGroup124], (instregex "VFNMADD231PSYm")>; +def: InstRW<[BWWriteResGroup124], (instregex "VFNMSUB132PDYm")>; +def: InstRW<[BWWriteResGroup124], (instregex "VFNMSUB132PSYm")>; +def: InstRW<[BWWriteResGroup124], (instregex "VFNMSUB213PDYm")>; +def: InstRW<[BWWriteResGroup124], (instregex "VFNMSUB213PSYm")>; +def: InstRW<[BWWriteResGroup124], (instregex "VFNMSUB231PDYm")>; +def: InstRW<[BWWriteResGroup124], (instregex "VFNMSUB231PSYm")>; + +def BWWriteResGroup125 : SchedWriteRes<[BWPort0]> { + let Latency = 11; + let NumMicroOps = 3; + let ResourceCycles = [3]; +} +def: InstRW<[BWWriteResGroup125], (instregex "PCMPISTRIrr")>; +def: InstRW<[BWWriteResGroup125], (instregex "PCMPISTRM128rr")>; +def: InstRW<[BWWriteResGroup125], (instregex "VPCMPISTRIrr")>; +def: InstRW<[BWWriteResGroup125], (instregex "VPCMPISTRM128rr")>; + +def BWWriteResGroup126 : SchedWriteRes<[BWPort0,BWPort015]> { + let Latency = 11; + let NumMicroOps = 3; + let ResourceCycles = [2,1]; +} +def: InstRW<[BWWriteResGroup126], (instregex "VRCPPSYr")>; +def: InstRW<[BWWriteResGroup126], (instregex "VRSQRTPSYr")>; + +def BWWriteResGroup127 : SchedWriteRes<[BWPort1,BWPort23]> { + let Latency = 11; + let NumMicroOps = 3; + let ResourceCycles = [2,1]; +} +def: InstRW<[BWWriteResGroup127], (instregex "ROUNDPDm")>; +def: InstRW<[BWWriteResGroup127], (instregex "ROUNDPSm")>; +def: InstRW<[BWWriteResGroup127], (instregex "ROUNDSDm")>; +def: InstRW<[BWWriteResGroup127], (instregex "ROUNDSSm")>; +def: InstRW<[BWWriteResGroup127], (instregex "VROUNDPDm")>; +def: InstRW<[BWWriteResGroup127], (instregex "VROUNDPSm")>; +def: InstRW<[BWWriteResGroup127], (instregex "VROUNDSDm")>; +def: InstRW<[BWWriteResGroup127], (instregex "VROUNDSSm")>; + +def BWWriteResGroup128 : SchedWriteRes<[BWPort1,BWPort5,BWPort23]> { + let Latency = 11; + let NumMicroOps = 3; + let ResourceCycles = [1,1,1]; +} +def: InstRW<[BWWriteResGroup128], (instregex "VCVTDQ2PDYrm")>; + +def BWWriteResGroup129 : SchedWriteRes<[BWPort1,BWPort5,BWPort23]> { + let Latency = 11; + let NumMicroOps = 4; + let ResourceCycles = [1,2,1]; +} +def: InstRW<[BWWriteResGroup129], (instregex "VHADDPDYrm")>; +def: InstRW<[BWWriteResGroup129], (instregex "VHADDPSYrm")>; +def: InstRW<[BWWriteResGroup129], (instregex "VHSUBPDYrm")>; +def: InstRW<[BWWriteResGroup129], (instregex "VHSUBPSYrm")>; + +def BWWriteResGroup130 : SchedWriteRes<[BWPort1,BWPort23,BWPort237,BWPort06,BWPort0156]> { + let Latency = 11; + let NumMicroOps = 6; + let ResourceCycles = [1,1,1,1,2]; +} +def: InstRW<[BWWriteResGroup130], (instregex "SHLD(16|32|64)mrCL")>; +def: InstRW<[BWWriteResGroup130], (instregex "SHRD(16|32|64)mrCL")>; + +def BWWriteResGroup131 : SchedWriteRes<[BWPort1,BWPort06,BWPort0156]> { + let Latency = 11; + let NumMicroOps = 7; + let ResourceCycles = [2,2,3]; +} +def: InstRW<[BWWriteResGroup131], (instregex "RCL(16|32|64)rCL")>; +def: InstRW<[BWWriteResGroup131], (instregex "RCR(16|32|64)rCL")>; + +def BWWriteResGroup132 : SchedWriteRes<[BWPort1,BWPort06,BWPort15,BWPort0156]> { + let Latency = 11; + let NumMicroOps = 9; + let ResourceCycles = [1,4,1,3]; +} +def: InstRW<[BWWriteResGroup132], (instregex "RCL8rCL")>; + +def BWWriteResGroup133 : SchedWriteRes<[BWPort06,BWPort0156]> { + let Latency = 11; + let NumMicroOps = 11; + let ResourceCycles = [2,9]; +} +def: InstRW<[BWWriteResGroup133], (instregex "LOOPE")>; +def: InstRW<[BWWriteResGroup133], (instregex "LOOPNE")>; + +def BWWriteResGroup134 : SchedWriteRes<[BWPort5,BWPort23]> { + let Latency = 12; + let NumMicroOps = 2; + let ResourceCycles = [1,1]; +} +def: InstRW<[BWWriteResGroup134], (instregex "AESDECLASTrm")>; +def: InstRW<[BWWriteResGroup134], (instregex "AESDECrm")>; +def: InstRW<[BWWriteResGroup134], (instregex "AESENCLASTrm")>; +def: InstRW<[BWWriteResGroup134], (instregex "AESENCrm")>; +def: InstRW<[BWWriteResGroup134], (instregex "VAESDECLASTrm")>; +def: InstRW<[BWWriteResGroup134], (instregex "VAESDECrm")>; +def: InstRW<[BWWriteResGroup134], (instregex "VAESENCLASTrm")>; +def: InstRW<[BWWriteResGroup134], (instregex "VAESENCrm")>; + +def BWWriteResGroup135 : SchedWriteRes<[BWPort1,BWPort23]> { + let Latency = 12; + let NumMicroOps = 3; + let ResourceCycles = [2,1]; +} +def: InstRW<[BWWriteResGroup135], (instregex "ADD_FI16m")>; +def: InstRW<[BWWriteResGroup135], (instregex "ADD_FI32m")>; +def: InstRW<[BWWriteResGroup135], (instregex "SUBR_FI16m")>; +def: InstRW<[BWWriteResGroup135], (instregex "SUBR_FI32m")>; +def: InstRW<[BWWriteResGroup135], (instregex "SUB_FI16m")>; +def: InstRW<[BWWriteResGroup135], (instregex "SUB_FI32m")>; +def: InstRW<[BWWriteResGroup135], (instregex "VROUNDYPDm")>; +def: InstRW<[BWWriteResGroup135], (instregex "VROUNDYPSm")>; + +def BWWriteResGroup136 : SchedWriteRes<[BWPort0,BWPort5,BWPort23]> { + let Latency = 12; + let NumMicroOps = 4; + let ResourceCycles = [1,2,1]; +} +def: InstRW<[BWWriteResGroup136], (instregex "MPSADBWrmi")>; +def: InstRW<[BWWriteResGroup136], (instregex "VMPSADBWrmi")>; + +def BWWriteResGroup137 : SchedWriteRes<[BWPort0]> { + let Latency = 13; + let NumMicroOps = 1; + let ResourceCycles = [1]; +} +def: InstRW<[BWWriteResGroup137], (instregex "SQRTPSr")>; +def: InstRW<[BWWriteResGroup137], (instregex "SQRTSSr")>; + +def BWWriteResGroup138 : SchedWriteRes<[BWPort0,BWPort5,BWPort23]> { + let Latency = 13; + let NumMicroOps = 4; + let ResourceCycles = [1,2,1]; +} +def: InstRW<[BWWriteResGroup138], (instregex "VMPSADBWYrmi")>; + +def BWWriteResGroup139 : SchedWriteRes<[BWPort0]> { + let Latency = 14; + let NumMicroOps = 1; + let ResourceCycles = [1]; +} +def: InstRW<[BWWriteResGroup139], (instregex "DIVPDrr")>; +def: InstRW<[BWWriteResGroup139], (instregex "DIVSDrr")>; +def: InstRW<[BWWriteResGroup139], (instregex "VDIVPDrr")>; +def: InstRW<[BWWriteResGroup139], (instregex "VDIVSDrr")>; +def: InstRW<[BWWriteResGroup139], (instregex "VSQRTPSr")>; +def: InstRW<[BWWriteResGroup139], (instregex "VSQRTSSr")>; + +def BWWriteResGroup140 : SchedWriteRes<[BWPort5]> { + let Latency = 14; + let NumMicroOps = 2; + let ResourceCycles = [2]; +} +def: InstRW<[BWWriteResGroup140], (instregex "AESIMCrr")>; +def: InstRW<[BWWriteResGroup140], (instregex "VAESIMCrr")>; + +def BWWriteResGroup141 : SchedWriteRes<[BWPort0,BWPort1,BWPort23]> { + let Latency = 14; + let NumMicroOps = 3; + let ResourceCycles = [1,1,1]; +} +def: InstRW<[BWWriteResGroup141], (instregex "MUL_FI16m")>; +def: InstRW<[BWWriteResGroup141], (instregex "MUL_FI32m")>; + +def BWWriteResGroup142 : SchedWriteRes<[BWPort0,BWPort1,BWPort5]> { + let Latency = 14; + let NumMicroOps = 4; + let ResourceCycles = [2,1,1]; +} +def: InstRW<[BWWriteResGroup142], (instregex "DPPSrri")>; +def: InstRW<[BWWriteResGroup142], (instregex "VDPPSYrri")>; +def: InstRW<[BWWriteResGroup142], (instregex "VDPPSrri")>; + +def BWWriteResGroup143 : SchedWriteRes<[BWPort0,BWPort1,BWPort5,BWPort23]> { + let Latency = 14; + let NumMicroOps = 4; + let ResourceCycles = [1,1,1,1]; +} +def: InstRW<[BWWriteResGroup143], (instregex "DPPDrmi")>; +def: InstRW<[BWWriteResGroup143], (instregex "VDPPDrmi")>; + +def BWWriteResGroup144 : SchedWriteRes<[BWPort1,BWPort6,BWPort23,BWPort0156]> { + let Latency = 14; + let NumMicroOps = 8; + let ResourceCycles = [2,2,1,3]; +} +def: InstRW<[BWWriteResGroup144], (instregex "LAR(16|32|64)rr")>; + +def BWWriteResGroup145 : SchedWriteRes<[BWPort1,BWPort06,BWPort15,BWPort0156]> { + let Latency = 14; + let NumMicroOps = 10; + let ResourceCycles = [2,3,1,4]; +} +def: InstRW<[BWWriteResGroup145], (instregex "RCR8rCL")>; + +def BWWriteResGroup146 : SchedWriteRes<[BWPort0,BWPort1,BWPort6,BWPort0156]> { + let Latency = 14; + let NumMicroOps = 12; + let ResourceCycles = [2,1,4,5]; +} +def: InstRW<[BWWriteResGroup146], (instregex "XCH_F")>; + +def BWWriteResGroup147 : SchedWriteRes<[BWPort0]> { + let Latency = 15; + let NumMicroOps = 1; + let ResourceCycles = [1]; +} +def: InstRW<[BWWriteResGroup147], (instregex "DIVR_FPrST0")>; +def: InstRW<[BWWriteResGroup147], (instregex "DIVR_FST0r")>; +def: InstRW<[BWWriteResGroup147], (instregex "DIVR_FrST0")>; + +def BWWriteResGroup148 : SchedWriteRes<[BWPort0,BWPort23]> { + let Latency = 15; + let NumMicroOps = 3; + let ResourceCycles = [2,1]; +} +def: InstRW<[BWWriteResGroup148], (instregex "PMULLDrm")>; +def: InstRW<[BWWriteResGroup148], (instregex "VPMULLDrm")>; + +def BWWriteResGroup149 : SchedWriteRes<[BWPort1,BWPort23,BWPort237,BWPort06,BWPort15,BWPort0156]> { + let Latency = 15; + let NumMicroOps = 10; + let ResourceCycles = [1,1,1,4,1,2]; +} +def: InstRW<[BWWriteResGroup149], (instregex "RCL(16|32|64)mCL")>; +def: InstRW<[BWWriteResGroup149], (instregex "RCL8mCL")>; + +def BWWriteResGroup150 : SchedWriteRes<[BWPort0,BWPort23]> { + let Latency = 16; + let NumMicroOps = 2; + let ResourceCycles = [1,1]; +} +def: InstRW<[BWWriteResGroup150], (instregex "DIVPSrm")>; +def: InstRW<[BWWriteResGroup150], (instregex "DIVSSrm")>; +def: InstRW<[BWWriteResGroup150], (instregex "VDIVPSrm")>; +def: InstRW<[BWWriteResGroup150], (instregex "VDIVSSrm")>; + +def BWWriteResGroup151 : SchedWriteRes<[BWPort0,BWPort23]> { + let Latency = 16; + let NumMicroOps = 3; + let ResourceCycles = [2,1]; +} +def: InstRW<[BWWriteResGroup151], (instregex "VPMULLDYrm")>; + +def BWWriteResGroup152 : SchedWriteRes<[BWPort0,BWPort23]> { + let Latency = 16; + let NumMicroOps = 4; + let ResourceCycles = [3,1]; +} +def: InstRW<[BWWriteResGroup152], (instregex "PCMPISTRIrm")>; +def: InstRW<[BWWriteResGroup152], (instregex "PCMPISTRM128rm")>; +def: InstRW<[BWWriteResGroup152], (instregex "VPCMPISTRIrm")>; +def: InstRW<[BWWriteResGroup152], (instregex "VPCMPISTRM128rm")>; + +def BWWriteResGroup153 : SchedWriteRes<[BWPort4,BWPort23,BWPort237,BWPort06,BWPort15,BWPort0156]> { + let Latency = 16; + let NumMicroOps = 14; + let ResourceCycles = [1,1,1,4,2,5]; +} +def: InstRW<[BWWriteResGroup153], (instregex "CMPXCHG8B")>; + +def BWWriteResGroup154 : SchedWriteRes<[BWPort5]> { + let Latency = 16; + let NumMicroOps = 16; + let ResourceCycles = [16]; +} +def: InstRW<[BWWriteResGroup154], (instregex "VZEROALL")>; + +def BWWriteResGroup155 : SchedWriteRes<[BWPort0,BWPort015]> { + let Latency = 17; + let NumMicroOps = 3; + let ResourceCycles = [2,1]; +} +def: InstRW<[BWWriteResGroup155], (instregex "VDIVPSYrr")>; + +def BWWriteResGroup156 : SchedWriteRes<[BWPort0,BWPort23,BWPort015]> { + let Latency = 17; + let NumMicroOps = 4; + let ResourceCycles = [2,1,1]; +} +def: InstRW<[BWWriteResGroup156], (instregex "VRCPPSYm")>; +def: InstRW<[BWWriteResGroup156], (instregex "VRSQRTPSYm")>; + +def BWWriteResGroup157 : SchedWriteRes<[BWPort0,BWPort23]> { + let Latency = 18; + let NumMicroOps = 2; + let ResourceCycles = [1,1]; +} +def: InstRW<[BWWriteResGroup157], (instregex "SQRTPSm")>; +def: InstRW<[BWWriteResGroup157], (instregex "SQRTSSm")>; + +def BWWriteResGroup158 : SchedWriteRes<[BWPort0,BWPort5,BWPort0156]> { + let Latency = 18; + let NumMicroOps = 8; + let ResourceCycles = [4,3,1]; +} +def: InstRW<[BWWriteResGroup158], (instregex "PCMPESTRIrr")>; +def: InstRW<[BWWriteResGroup158], (instregex "VPCMPESTRIrr")>; + +def BWWriteResGroup159 : SchedWriteRes<[BWPort5,BWPort6,BWPort06,BWPort0156]> { + let Latency = 18; + let NumMicroOps = 8; + let ResourceCycles = [1,1,1,5]; +} +def: InstRW<[BWWriteResGroup159], (instregex "CPUID")>; +def: InstRW<[BWWriteResGroup159], (instregex "RDTSC")>; + +def BWWriteResGroup160 : SchedWriteRes<[BWPort1,BWPort23,BWPort237,BWPort06,BWPort15,BWPort0156]> { + let Latency = 18; + let NumMicroOps = 11; + let ResourceCycles = [2,1,1,3,1,3]; +} +def: InstRW<[BWWriteResGroup160], (instregex "RCR(16|32|64)mCL")>; +def: InstRW<[BWWriteResGroup160], (instregex "RCR8mCL")>; + +def BWWriteResGroup161 : SchedWriteRes<[BWPort0,BWPort23]> { + let Latency = 19; + let NumMicroOps = 2; + let ResourceCycles = [1,1]; +} +def: InstRW<[BWWriteResGroup161], (instregex "DIVPDrm")>; +def: InstRW<[BWWriteResGroup161], (instregex "DIVSDrm")>; +def: InstRW<[BWWriteResGroup161], (instregex "VDIVPDrm")>; +def: InstRW<[BWWriteResGroup161], (instregex "VDIVSDrm")>; +def: InstRW<[BWWriteResGroup161], (instregex "VSQRTPSm")>; +def: InstRW<[BWWriteResGroup161], (instregex "VSQRTSSm")>; + +def BWWriteResGroup162 : SchedWriteRes<[BWPort5,BWPort23]> { + let Latency = 19; + let NumMicroOps = 3; + let ResourceCycles = [2,1]; +} +def: InstRW<[BWWriteResGroup162], (instregex "AESIMCrm")>; +def: InstRW<[BWWriteResGroup162], (instregex "VAESIMCrm")>; + +def BWWriteResGroup163 : SchedWriteRes<[BWPort0,BWPort1,BWPort5,BWPort23]> { + let Latency = 19; + let NumMicroOps = 5; + let ResourceCycles = [2,1,1,1]; +} +def: InstRW<[BWWriteResGroup163], (instregex "DPPSrmi")>; +def: InstRW<[BWWriteResGroup163], (instregex "VDPPSrmi")>; + +def BWWriteResGroup164 : SchedWriteRes<[BWPort0,BWPort5,BWPort015,BWPort0156]> { + let Latency = 19; + let NumMicroOps = 9; + let ResourceCycles = [4,3,1,1]; +} +def: InstRW<[BWWriteResGroup164], (instregex "PCMPESTRM128rr")>; +def: InstRW<[BWWriteResGroup164], (instregex "VPCMPESTRM128rr")>; + +def BWWriteResGroup165 : SchedWriteRes<[BWPort0]> { + let Latency = 20; + let NumMicroOps = 1; + let ResourceCycles = [1]; +} +def: InstRW<[BWWriteResGroup165], (instregex "DIV_FPrST0")>; +def: InstRW<[BWWriteResGroup165], (instregex "DIV_FST0r")>; +def: InstRW<[BWWriteResGroup165], (instregex "DIV_FrST0")>; +def: InstRW<[BWWriteResGroup165], (instregex "SQRTPDr")>; +def: InstRW<[BWWriteResGroup165], (instregex "SQRTSDr")>; + +def BWWriteResGroup166 : SchedWriteRes<[BWPort0,BWPort1,BWPort5,BWPort23]> { + let Latency = 20; + let NumMicroOps = 5; + let ResourceCycles = [2,1,1,1]; +} +def: InstRW<[BWWriteResGroup166], (instregex "VDPPSYrmi")>; + +def BWWriteResGroup167 : SchedWriteRes<[BWPort4,BWPort5,BWPort6,BWPort23,BWPort237,BWPort06,BWPort0156]> { + let Latency = 20; + let NumMicroOps = 8; + let ResourceCycles = [1,1,1,1,1,1,2]; +} +def: InstRW<[BWWriteResGroup167], (instregex "INSB")>; +def: InstRW<[BWWriteResGroup167], (instregex "INSL")>; +def: InstRW<[BWWriteResGroup167], (instregex "INSW")>; + +def BWWriteResGroup168 : SchedWriteRes<[BWPort0]> { + let Latency = 21; + let NumMicroOps = 1; + let ResourceCycles = [1]; +} +def: InstRW<[BWWriteResGroup168], (instregex "VSQRTPDr")>; +def: InstRW<[BWWriteResGroup168], (instregex "VSQRTSDr")>; + +def BWWriteResGroup169 : SchedWriteRes<[BWPort0,BWPort23]> { + let Latency = 21; + let NumMicroOps = 2; + let ResourceCycles = [1,1]; +} +def: InstRW<[BWWriteResGroup169], (instregex "DIV_F32m")>; +def: InstRW<[BWWriteResGroup169], (instregex "DIV_F64m")>; + +def BWWriteResGroup170 : SchedWriteRes<[BWPort0,BWPort015]> { + let Latency = 21; + let NumMicroOps = 3; + let ResourceCycles = [2,1]; +} +def: InstRW<[BWWriteResGroup170], (instregex "VSQRTPSYr")>; + +def BWWriteResGroup171 : SchedWriteRes<[BWPort0,BWPort4,BWPort5,BWPort23,BWPort237,BWPort06,BWPort0156]> { + let Latency = 21; + let NumMicroOps = 19; + let ResourceCycles = [2,1,4,1,1,4,6]; +} +def: InstRW<[BWWriteResGroup171], (instregex "CMPXCHG16B")>; + +def BWWriteResGroup172 : SchedWriteRes<[BWPort6,BWPort23,BWPort0156]> { + let Latency = 22; + let NumMicroOps = 18; + let ResourceCycles = [1,1,16]; +} +def: InstRW<[BWWriteResGroup172], (instregex "POPF64")>; + +def BWWriteResGroup173 : SchedWriteRes<[BWPort0,BWPort015]> { + let Latency = 23; + let NumMicroOps = 3; + let ResourceCycles = [2,1]; +} +def: InstRW<[BWWriteResGroup173], (instregex "VDIVPDYrr")>; + +def BWWriteResGroup174 : SchedWriteRes<[BWPort0,BWPort23,BWPort015]> { + let Latency = 23; + let NumMicroOps = 4; + let ResourceCycles = [2,1,1]; +} +def: InstRW<[BWWriteResGroup174], (instregex "VDIVPSYrm")>; + +def BWWriteResGroup175 : SchedWriteRes<[BWPort0,BWPort5,BWPort23,BWPort0156]> { + let Latency = 23; + let NumMicroOps = 9; + let ResourceCycles = [4,3,1,1]; +} +def: InstRW<[BWWriteResGroup175], (instregex "PCMPESTRIrm")>; +def: InstRW<[BWWriteResGroup175], (instregex "VPCMPESTRIrm")>; + +def BWWriteResGroup176 : SchedWriteRes<[BWPort6,BWPort23,BWPort0156]> { + let Latency = 23; + let NumMicroOps = 19; + let ResourceCycles = [3,1,15]; +} +def: InstRW<[BWWriteResGroup176], (instregex "XRSTOR(64?)")>; + +def BWWriteResGroup177 : SchedWriteRes<[BWPort0,BWPort1,BWPort23]> { + let Latency = 24; + let NumMicroOps = 3; + let ResourceCycles = [1,1,1]; +} +def: InstRW<[BWWriteResGroup177], (instregex "DIV_FI16m")>; +def: InstRW<[BWWriteResGroup177], (instregex "DIV_FI32m")>; + +def BWWriteResGroup178 : SchedWriteRes<[BWPort0,BWPort5,BWPort23,BWPort015,BWPort0156]> { + let Latency = 24; + let NumMicroOps = 10; + let ResourceCycles = [4,3,1,1,1]; +} +def: InstRW<[BWWriteResGroup178], (instregex "PCMPESTRM128rm")>; +def: InstRW<[BWWriteResGroup178], (instregex "VPCMPESTRM128rm")>; + +def BWWriteResGroup179 : SchedWriteRes<[BWPort0,BWPort23]> { + let Latency = 25; + let NumMicroOps = 2; + let ResourceCycles = [1,1]; +} +def: InstRW<[BWWriteResGroup179], (instregex "SQRTPDm")>; +def: InstRW<[BWWriteResGroup179], (instregex "SQRTSDm")>; + +def BWWriteResGroup180 : SchedWriteRes<[BWPort0,BWPort23]> { + let Latency = 26; + let NumMicroOps = 2; + let ResourceCycles = [1,1]; +} +def: InstRW<[BWWriteResGroup180], (instregex "DIVR_F32m")>; +def: InstRW<[BWWriteResGroup180], (instregex "DIVR_F64m")>; +def: InstRW<[BWWriteResGroup180], (instregex "VSQRTPDm")>; +def: InstRW<[BWWriteResGroup180], (instregex "VSQRTSDm")>; + +def BWWriteResGroup181 : SchedWriteRes<[BWPort0,BWPort23,BWPort015]> { + let Latency = 27; + let NumMicroOps = 4; + let ResourceCycles = [2,1,1]; +} +def: InstRW<[BWWriteResGroup181], (instregex "VSQRTPSYm")>; + +def BWWriteResGroup182 : SchedWriteRes<[BWPort0,BWPort1,BWPort23]> { + let Latency = 29; + let NumMicroOps = 3; + let ResourceCycles = [1,1,1]; +} +def: InstRW<[BWWriteResGroup182], (instregex "DIVR_FI16m")>; +def: InstRW<[BWWriteResGroup182], (instregex "DIVR_FI32m")>; + +def BWWriteResGroup183 : SchedWriteRes<[BWPort0,BWPort23,BWPort015]> { + let Latency = 29; + let NumMicroOps = 4; + let ResourceCycles = [2,1,1]; +} +def: InstRW<[BWWriteResGroup183], (instregex "VDIVPDYrm")>; + +def BWWriteResGroup183_1 : SchedWriteRes<[BWPort4, BWPort5, BWPort23, BWPort0156]> { + let Latency = 22; + let NumMicroOps = 7; + let ResourceCycles = [1,3,2,1]; +} +def: InstRW<[BWWriteResGroup183_1], (instregex "VGATHERQPDrm")>; + +def BWWriteResGroup183_2 : SchedWriteRes<[BWPort4, BWPort5, BWPort23, BWPort0156]> { + let Latency = 23; + let NumMicroOps = 9; + let ResourceCycles = [1,3,4,1]; +} +def: InstRW<[BWWriteResGroup183_2], (instregex "VGATHERQPDYrm")>; + +def BWWriteResGroup183_3 : SchedWriteRes<[BWPort4, BWPort5, BWPort23, BWPort0156]> { + let Latency = 24; + let NumMicroOps = 9; + let ResourceCycles = [1,5,2,1]; +} +def: InstRW<[BWWriteResGroup183_3], (instregex "VGATHERQPSYrm")>; + +def BWWriteResGroup183_4 : SchedWriteRes<[BWPort4, BWPort5, BWPort23, BWPort0156]> { + let Latency = 25; + let NumMicroOps = 7; + let ResourceCycles = [1,3,2,1]; +} +def: InstRW<[BWWriteResGroup183_4], (instregex "VGATHERDPDrm")>; +def: InstRW<[BWWriteResGroup183_4], (instregex "VGATHERDPSrm")>; + +def BWWriteResGroup183_5 : SchedWriteRes<[BWPort4, BWPort5, BWPort23, BWPort0156]> { + let Latency = 26; + let NumMicroOps = 9; + let ResourceCycles = [1,5,2,1]; +} +def: InstRW<[BWWriteResGroup183_5], (instregex "VGATHERDPDYrm")>; + +def BWWriteResGroup183_6 : SchedWriteRes<[BWPort4, BWPort5, BWPort23, BWPort0156]> { + let Latency = 26; + let NumMicroOps = 14; + let ResourceCycles = [1,4,8,1]; +} +def: InstRW<[BWWriteResGroup183_6], (instregex "VGATHERDPSYrm")>; + +def BWWriteResGroup183_7 : SchedWriteRes<[BWPort4, BWPort5, BWPort23, BWPort0156]> { + let Latency = 27; + let NumMicroOps = 9; + let ResourceCycles = [1,5,2,1]; +} +def: InstRW<[BWWriteResGroup183_7], (instregex "VGATHERQPSrm")>; + +def BWWriteResGroup184 : SchedWriteRes<[BWPort0,BWPort5,BWPort015]> { + let Latency = 29; + let NumMicroOps = 11; + let ResourceCycles = [2,7,2]; +} +def: InstRW<[BWWriteResGroup184], (instregex "AESKEYGENASSIST128rr")>; +def: InstRW<[BWWriteResGroup184], (instregex "VAESKEYGENASSIST128rr")>; + +def BWWriteResGroup185 : SchedWriteRes<[BWPort4,BWPort6,BWPort23,BWPort237,BWPort0156]> { + let Latency = 29; + let NumMicroOps = 27; + let ResourceCycles = [1,5,1,1,19]; +} +def: InstRW<[BWWriteResGroup185], (instregex "XSAVE64")>; + +def BWWriteResGroup186 : SchedWriteRes<[BWPort4,BWPort6,BWPort23,BWPort237,BWPort0156]> { + let Latency = 30; + let NumMicroOps = 28; + let ResourceCycles = [1,6,1,1,19]; +} +def: InstRW<[BWWriteResGroup186], (instregex "XSAVE(OPT?)")>; + +def BWWriteResGroup187 : SchedWriteRes<[BWPort01,BWPort15,BWPort015,BWPort0156]> { + let Latency = 31; + let NumMicroOps = 31; + let ResourceCycles = [8,1,21,1]; +} +def: InstRW<[BWWriteResGroup187], (instregex "MMX_EMMS")>; + +def BWWriteResGroup188 : SchedWriteRes<[BWPort0,BWPort5,BWPort23,BWPort015]> { + let Latency = 33; + let NumMicroOps = 11; + let ResourceCycles = [2,7,1,1]; +} +def: InstRW<[BWWriteResGroup188], (instregex "AESKEYGENASSIST128rm")>; +def: InstRW<[BWWriteResGroup188], (instregex "VAESKEYGENASSIST128rm")>; + +def BWWriteResGroup189 : SchedWriteRes<[BWPort0,BWPort015]> { + let Latency = 34; + let NumMicroOps = 3; + let ResourceCycles = [2,1]; +} +def: InstRW<[BWWriteResGroup189], (instregex "VSQRTPDYr")>; + +def BWWriteResGroup190 : SchedWriteRes<[BWPort0,BWPort1,BWPort5,BWPort23,BWPort0156]> { + let Latency = 34; + let NumMicroOps = 8; + let ResourceCycles = [2,2,2,1,1]; +} +def: InstRW<[BWWriteResGroup190], (instregex "DIV(16|32|64)m")>; +def: InstRW<[BWWriteResGroup190], (instregex "DIV8m")>; + +def BWWriteResGroup191 : SchedWriteRes<[BWPort5,BWPort6,BWPort23,BWPort06,BWPort0156]> { + let Latency = 34; + let NumMicroOps = 23; + let ResourceCycles = [1,5,3,4,10]; +} +def: InstRW<[BWWriteResGroup191], (instregex "IN32ri")>; +def: InstRW<[BWWriteResGroup191], (instregex "IN32rr")>; +def: InstRW<[BWWriteResGroup191], (instregex "IN8ri")>; +def: InstRW<[BWWriteResGroup191], (instregex "IN8rr")>; + +def BWWriteResGroup193 : SchedWriteRes<[BWPort0,BWPort1,BWPort5,BWPort23,BWPort0156]> { + let Latency = 35; + let NumMicroOps = 8; + let ResourceCycles = [2,2,2,1,1]; +} +def: InstRW<[BWWriteResGroup193], (instregex "IDIV(16|32|64)m")>; +def: InstRW<[BWWriteResGroup193], (instregex "IDIV8m")>; + +def BWWriteResGroup194 : SchedWriteRes<[BWPort5,BWPort6,BWPort23,BWPort237,BWPort06,BWPort0156]> { + let Latency = 35; + let NumMicroOps = 23; + let ResourceCycles = [1,5,2,1,4,10]; +} +def: InstRW<[BWWriteResGroup194], (instregex "OUT32ir")>; +def: InstRW<[BWWriteResGroup194], (instregex "OUT32rr")>; +def: InstRW<[BWWriteResGroup194], (instregex "OUT8ir")>; +def: InstRW<[BWWriteResGroup194], (instregex "OUT8rr")>; + +def BWWriteResGroup195 : SchedWriteRes<[BWPort0,BWPort23,BWPort015]> { + let Latency = 40; + let NumMicroOps = 4; + let ResourceCycles = [2,1,1]; +} +def: InstRW<[BWWriteResGroup195], (instregex "VSQRTPDYm")>; + +def BWWriteResGroup196 : SchedWriteRes<[BWPort5,BWPort0156]> { + let Latency = 42; + let NumMicroOps = 22; + let ResourceCycles = [2,20]; +} +def: InstRW<[BWWriteResGroup196], (instregex "RDTSCP")>; + +def BWWriteResGroup197 : SchedWriteRes<[BWPort0,BWPort01,BWPort23,BWPort05,BWPort06,BWPort015,BWPort0156]> { + let Latency = 60; + let NumMicroOps = 64; + let ResourceCycles = [2,2,8,1,10,2,39]; +} +def: InstRW<[BWWriteResGroup197], (instregex "FLDENVm")>; +def: InstRW<[BWWriteResGroup197], (instregex "FLDENVm")>; + +def BWWriteResGroup198 : SchedWriteRes<[BWPort0,BWPort6,BWPort23,BWPort05,BWPort06,BWPort15,BWPort0156]> { + let Latency = 63; + let NumMicroOps = 88; + let ResourceCycles = [4,4,31,1,2,1,45]; +} +def: InstRW<[BWWriteResGroup198], (instregex "FXRSTOR64")>; + +def BWWriteResGroup199 : SchedWriteRes<[BWPort0,BWPort6,BWPort23,BWPort05,BWPort06,BWPort15,BWPort0156]> { + let Latency = 63; + let NumMicroOps = 90; + let ResourceCycles = [4,2,33,1,2,1,47]; +} +def: InstRW<[BWWriteResGroup199], (instregex "FXRSTOR")>; + +def BWWriteResGroup200 : SchedWriteRes<[BWPort5,BWPort01,BWPort0156]> { + let Latency = 75; + let NumMicroOps = 15; + let ResourceCycles = [6,3,6]; +} +def: InstRW<[BWWriteResGroup200], (instregex "FNINIT")>; + +def BWWriteResGroup201 : SchedWriteRes<[BWPort0,BWPort1,BWPort5,BWPort6,BWPort01,BWPort0156]> { + let Latency = 80; + let NumMicroOps = 32; + let ResourceCycles = [7,7,3,3,1,11]; +} +def: InstRW<[BWWriteResGroup201], (instregex "DIV(16|32|64)r")>; + +def BWWriteResGroup202 : SchedWriteRes<[BWPort0,BWPort1,BWPort4,BWPort5,BWPort6,BWPort237,BWPort06,BWPort0156]> { + let Latency = 115; + let NumMicroOps = 100; + let ResourceCycles = [9,9,11,8,1,11,21,30]; +} +def: InstRW<[BWWriteResGroup202], (instregex "FSTENVm")>; +def: InstRW<[BWWriteResGroup202], (instregex "FSTENVm")>; + +} // SchedModel + diff --git a/llvm/lib/Target/X86/X86Schedule.td b/llvm/lib/Target/X86/X86Schedule.td index 8a67606dca94..fcf9f4ff6384 100644 --- a/llvm/lib/Target/X86/X86Schedule.td +++ b/llvm/lib/Target/X86/X86Schedule.td @@ -663,10 +663,10 @@ def GenericPostRAModel : GenericX86Model { include "X86ScheduleAtom.td" include "X86SchedSandyBridge.td" include "X86SchedHaswell.td" +include "X86SchedBroadwell.td" include "X86ScheduleSLM.td" include "X86ScheduleZnver1.td" include "X86ScheduleBtVer2.td" include "X86SchedSkylakeClient.td" include "X86SchedSkylakeServer.td" - diff --git a/llvm/test/CodeGen/X86/aes-schedule.ll b/llvm/test/CodeGen/X86/aes-schedule.ll index f74065ba5e59..ba22f175757f 100644 --- a/llvm/test/CodeGen/X86/aes-schedule.ll +++ b/llvm/test/CodeGen/X86/aes-schedule.ll @@ -38,8 +38,8 @@ define <2 x i64> @test_aesdec(<2 x i64> %a0, <2 x i64> %a1, <2 x i64> *%a2) { ; BROADWELL-LABEL: test_aesdec: ; BROADWELL: # BB#0: ; BROADWELL-NEXT: vaesdec %xmm1, %xmm0, %xmm0 # sched: [7:1.00] -; BROADWELL-NEXT: vaesdec (%rdi), %xmm0, %xmm0 # sched: [7:1.00] -; BROADWELL-NEXT: retq # sched: [2:1.00] +; BROADWELL-NEXT: vaesdec (%rdi), %xmm0, %xmm0 # sched: [12:1.00] +; BROADWELL-NEXT: retq # sched: [7:1.00] ; ; SKYLAKE-LABEL: test_aesdec: ; SKYLAKE: # BB#0: @@ -93,8 +93,8 @@ define <2 x i64> @test_aesdeclast(<2 x i64> %a0, <2 x i64> %a1, <2 x i64> *%a2) ; BROADWELL-LABEL: test_aesdeclast: ; BROADWELL: # BB#0: ; BROADWELL-NEXT: vaesdeclast %xmm1, %xmm0, %xmm0 # sched: [7:1.00] -; BROADWELL-NEXT: vaesdeclast (%rdi), %xmm0, %xmm0 # sched: [7:1.00] -; BROADWELL-NEXT: retq # sched: [2:1.00] +; BROADWELL-NEXT: vaesdeclast (%rdi), %xmm0, %xmm0 # sched: [12:1.00] +; BROADWELL-NEXT: retq # sched: [7:1.00] ; ; SKYLAKE-LABEL: test_aesdeclast: ; SKYLAKE: # BB#0: @@ -148,8 +148,8 @@ define <2 x i64> @test_aesenc(<2 x i64> %a0, <2 x i64> %a1, <2 x i64> *%a2) { ; BROADWELL-LABEL: test_aesenc: ; BROADWELL: # BB#0: ; BROADWELL-NEXT: vaesenc %xmm1, %xmm0, %xmm0 # sched: [7:1.00] -; BROADWELL-NEXT: vaesenc (%rdi), %xmm0, %xmm0 # sched: [7:1.00] -; BROADWELL-NEXT: retq # sched: [2:1.00] +; BROADWELL-NEXT: vaesenc (%rdi), %xmm0, %xmm0 # sched: [12:1.00] +; BROADWELL-NEXT: retq # sched: [7:1.00] ; ; SKYLAKE-LABEL: test_aesenc: ; SKYLAKE: # BB#0: @@ -203,8 +203,8 @@ define <2 x i64> @test_aesenclast(<2 x i64> %a0, <2 x i64> %a1, <2 x i64> *%a2) ; BROADWELL-LABEL: test_aesenclast: ; BROADWELL: # BB#0: ; BROADWELL-NEXT: vaesenclast %xmm1, %xmm0, %xmm0 # sched: [7:1.00] -; BROADWELL-NEXT: vaesenclast (%rdi), %xmm0, %xmm0 # sched: [7:1.00] -; BROADWELL-NEXT: retq # sched: [2:1.00] +; BROADWELL-NEXT: vaesenclast (%rdi), %xmm0, %xmm0 # sched: [12:1.00] +; BROADWELL-NEXT: retq # sched: [7:1.00] ; ; SKYLAKE-LABEL: test_aesenclast: ; SKYLAKE: # BB#0: @@ -262,9 +262,9 @@ define <2 x i64> @test_aesimc(<2 x i64> %a0, <2 x i64> *%a1) { ; BROADWELL-LABEL: test_aesimc: ; BROADWELL: # BB#0: ; BROADWELL-NEXT: vaesimc %xmm0, %xmm0 # sched: [14:2.00] -; BROADWELL-NEXT: vaesimc (%rdi), %xmm1 # sched: [14:2.00] +; BROADWELL-NEXT: vaesimc (%rdi), %xmm1 # sched: [19:2.00] ; BROADWELL-NEXT: vpor %xmm1, %xmm0, %xmm0 # sched: [1:0.33] -; BROADWELL-NEXT: retq # sched: [2:1.00] +; BROADWELL-NEXT: retq # sched: [7:1.00] ; ; SKYLAKE-LABEL: test_aesimc: ; SKYLAKE: # BB#0: @@ -326,9 +326,9 @@ define <2 x i64> @test_aeskeygenassist(<2 x i64> %a0, <2 x i64> *%a1) { ; BROADWELL-LABEL: test_aeskeygenassist: ; BROADWELL: # BB#0: ; BROADWELL-NEXT: vaeskeygenassist $7, %xmm0, %xmm0 # sched: [29:7.00] -; BROADWELL-NEXT: vaeskeygenassist $7, (%rdi), %xmm1 # sched: [28:7.00] +; BROADWELL-NEXT: vaeskeygenassist $7, (%rdi), %xmm1 # sched: [33:7.00] ; BROADWELL-NEXT: vpor %xmm1, %xmm0, %xmm0 # sched: [1:0.33] -; BROADWELL-NEXT: retq # sched: [2:1.00] +; BROADWELL-NEXT: retq # sched: [7:1.00] ; ; SKYLAKE-LABEL: test_aeskeygenassist: ; SKYLAKE: # BB#0: diff --git a/llvm/test/CodeGen/X86/avx-schedule.ll b/llvm/test/CodeGen/X86/avx-schedule.ll index 0e351bc0a336..d4367b36f278 100644 --- a/llvm/test/CodeGen/X86/avx-schedule.ll +++ b/llvm/test/CodeGen/X86/avx-schedule.ll @@ -31,8 +31,8 @@ define <4 x double> @test_addpd(<4 x double> %a0, <4 x double> %a1, <4 x double> ; BROADWELL-LABEL: test_addpd: ; BROADWELL: # BB#0: ; BROADWELL-NEXT: vaddpd %ymm1, %ymm0, %ymm0 # sched: [3:1.00] -; BROADWELL-NEXT: vaddpd (%rdi), %ymm0, %ymm0 # sched: [3:1.00] -; BROADWELL-NEXT: retq # sched: [2:1.00] +; BROADWELL-NEXT: vaddpd (%rdi), %ymm0, %ymm0 # sched: [9:1.00] +; BROADWELL-NEXT: retq # sched: [7:1.00] ; ; SKYLAKE-LABEL: test_addpd: ; SKYLAKE: # BB#0: @@ -85,8 +85,8 @@ define <8 x float> @test_addps(<8 x float> %a0, <8 x float> %a1, <8 x float> *%a ; BROADWELL-LABEL: test_addps: ; BROADWELL: # BB#0: ; BROADWELL-NEXT: vaddps %ymm1, %ymm0, %ymm0 # sched: [3:1.00] -; BROADWELL-NEXT: vaddps (%rdi), %ymm0, %ymm0 # sched: [3:1.00] -; BROADWELL-NEXT: retq # sched: [2:1.00] +; BROADWELL-NEXT: vaddps (%rdi), %ymm0, %ymm0 # sched: [9:1.00] +; BROADWELL-NEXT: retq # sched: [7:1.00] ; ; SKYLAKE-LABEL: test_addps: ; SKYLAKE: # BB#0: @@ -139,8 +139,8 @@ define <4 x double> @test_addsubpd(<4 x double> %a0, <4 x double> %a1, <4 x doub ; BROADWELL-LABEL: test_addsubpd: ; BROADWELL: # BB#0: ; BROADWELL-NEXT: vaddsubpd %ymm1, %ymm0, %ymm0 # sched: [3:1.00] -; BROADWELL-NEXT: vaddsubpd (%rdi), %ymm0, %ymm0 # sched: [3:1.00] -; BROADWELL-NEXT: retq # sched: [2:1.00] +; BROADWELL-NEXT: vaddsubpd (%rdi), %ymm0, %ymm0 # sched: [9:1.00] +; BROADWELL-NEXT: retq # sched: [7:1.00] ; ; SKYLAKE-LABEL: test_addsubpd: ; SKYLAKE: # BB#0: @@ -194,8 +194,8 @@ define <8 x float> @test_addsubps(<8 x float> %a0, <8 x float> %a1, <8 x float> ; BROADWELL-LABEL: test_addsubps: ; BROADWELL: # BB#0: ; BROADWELL-NEXT: vaddsubps %ymm1, %ymm0, %ymm0 # sched: [3:1.00] -; BROADWELL-NEXT: vaddsubps (%rdi), %ymm0, %ymm0 # sched: [3:1.00] -; BROADWELL-NEXT: retq # sched: [2:1.00] +; BROADWELL-NEXT: vaddsubps (%rdi), %ymm0, %ymm0 # sched: [9:1.00] +; BROADWELL-NEXT: retq # sched: [7:1.00] ; ; SKYLAKE-LABEL: test_addsubps: ; SKYLAKE: # BB#0: @@ -252,9 +252,9 @@ define <4 x double> @test_andnotpd(<4 x double> %a0, <4 x double> %a1, <4 x doub ; BROADWELL-LABEL: test_andnotpd: ; BROADWELL: # BB#0: ; BROADWELL-NEXT: vandnpd %ymm1, %ymm0, %ymm0 # sched: [1:1.00] -; BROADWELL-NEXT: vandnpd (%rdi), %ymm0, %ymm0 # sched: [1:1.00] +; BROADWELL-NEXT: vandnpd (%rdi), %ymm0, %ymm0 # sched: [7:1.00] ; BROADWELL-NEXT: vaddpd %ymm0, %ymm1, %ymm0 # sched: [3:1.00] -; BROADWELL-NEXT: retq # sched: [2:1.00] +; BROADWELL-NEXT: retq # sched: [7:1.00] ; ; SKYLAKE-LABEL: test_andnotpd: ; SKYLAKE: # BB#0: @@ -321,9 +321,9 @@ define <8 x float> @test_andnotps(<8 x float> %a0, <8 x float> %a1, <8 x float> ; BROADWELL-LABEL: test_andnotps: ; BROADWELL: # BB#0: ; BROADWELL-NEXT: vandnps %ymm1, %ymm0, %ymm0 # sched: [1:1.00] -; BROADWELL-NEXT: vandnps (%rdi), %ymm0, %ymm0 # sched: [1:1.00] +; BROADWELL-NEXT: vandnps (%rdi), %ymm0, %ymm0 # sched: [7:1.00] ; BROADWELL-NEXT: vaddps %ymm0, %ymm1, %ymm0 # sched: [3:1.00] -; BROADWELL-NEXT: retq # sched: [2:1.00] +; BROADWELL-NEXT: retq # sched: [7:1.00] ; ; SKYLAKE-LABEL: test_andnotps: ; SKYLAKE: # BB#0: @@ -390,9 +390,9 @@ define <4 x double> @test_andpd(<4 x double> %a0, <4 x double> %a1, <4 x double> ; BROADWELL-LABEL: test_andpd: ; BROADWELL: # BB#0: ; BROADWELL-NEXT: vandpd %ymm1, %ymm0, %ymm0 # sched: [1:1.00] -; BROADWELL-NEXT: vandpd (%rdi), %ymm0, %ymm0 # sched: [1:1.00] +; BROADWELL-NEXT: vandpd (%rdi), %ymm0, %ymm0 # sched: [7:1.00] ; BROADWELL-NEXT: vaddpd %ymm0, %ymm1, %ymm0 # sched: [3:1.00] -; BROADWELL-NEXT: retq # sched: [2:1.00] +; BROADWELL-NEXT: retq # sched: [7:1.00] ; ; SKYLAKE-LABEL: test_andpd: ; SKYLAKE: # BB#0: @@ -457,9 +457,9 @@ define <8 x float> @test_andps(<8 x float> %a0, <8 x float> %a1, <8 x float> *%a ; BROADWELL-LABEL: test_andps: ; BROADWELL: # BB#0: ; BROADWELL-NEXT: vandps %ymm1, %ymm0, %ymm0 # sched: [1:1.00] -; BROADWELL-NEXT: vandps (%rdi), %ymm0, %ymm0 # sched: [1:1.00] +; BROADWELL-NEXT: vandps (%rdi), %ymm0, %ymm0 # sched: [7:1.00] ; BROADWELL-NEXT: vaddps %ymm0, %ymm1, %ymm0 # sched: [3:1.00] -; BROADWELL-NEXT: retq # sched: [2:1.00] +; BROADWELL-NEXT: retq # sched: [7:1.00] ; ; SKYLAKE-LABEL: test_andps: ; SKYLAKE: # BB#0: @@ -525,8 +525,8 @@ define <4 x double> @test_blendpd(<4 x double> %a0, <4 x double> %a1, <4 x doubl ; BROADWELL: # BB#0: ; BROADWELL-NEXT: vblendpd {{.*#+}} ymm0 = ymm0[0],ymm1[1,2],ymm0[3] sched: [1:0.33] ; BROADWELL-NEXT: vaddpd %ymm0, %ymm1, %ymm0 # sched: [3:1.00] -; BROADWELL-NEXT: vblendpd {{.*#+}} ymm0 = ymm0[0],mem[1,2],ymm0[3] sched: [1:0.50] -; BROADWELL-NEXT: retq # sched: [2:1.00] +; BROADWELL-NEXT: vblendpd {{.*#+}} ymm0 = ymm0[0],mem[1,2],ymm0[3] sched: [7:0.50] +; BROADWELL-NEXT: retq # sched: [7:1.00] ; ; SKYLAKE-LABEL: test_blendpd: ; SKYLAKE: # BB#0: @@ -584,8 +584,8 @@ define <8 x float> @test_blendps(<8 x float> %a0, <8 x float> %a1, <8 x float> * ; BROADWELL-LABEL: test_blendps: ; BROADWELL: # BB#0: ; BROADWELL-NEXT: vblendps {{.*#+}} ymm0 = ymm0[0],ymm1[1,2],ymm0[3,4,5,6,7] sched: [1:0.33] -; BROADWELL-NEXT: vblendps {{.*#+}} ymm0 = ymm0[0,1],mem[2],ymm0[3],mem[4,5,6],ymm0[7] sched: [1:0.50] -; BROADWELL-NEXT: retq # sched: [2:1.00] +; BROADWELL-NEXT: vblendps {{.*#+}} ymm0 = ymm0[0,1],mem[2],ymm0[3],mem[4,5,6],ymm0[7] sched: [7:0.50] +; BROADWELL-NEXT: retq # sched: [7:1.00] ; ; SKYLAKE-LABEL: test_blendps: ; SKYLAKE: # BB#0: @@ -638,8 +638,8 @@ define <4 x double> @test_blendvpd(<4 x double> %a0, <4 x double> %a1, <4 x doub ; BROADWELL-LABEL: test_blendvpd: ; BROADWELL: # BB#0: ; BROADWELL-NEXT: vblendvpd %ymm2, %ymm1, %ymm0, %ymm0 # sched: [2:2.00] -; BROADWELL-NEXT: vblendvpd %ymm2, (%rdi), %ymm0, %ymm0 # sched: [2:2.00] -; BROADWELL-NEXT: retq # sched: [2:1.00] +; BROADWELL-NEXT: vblendvpd %ymm2, (%rdi), %ymm0, %ymm0 # sched: [8:2.00] +; BROADWELL-NEXT: retq # sched: [7:1.00] ; ; SKYLAKE-LABEL: test_blendvpd: ; SKYLAKE: # BB#0: @@ -693,8 +693,8 @@ define <8 x float> @test_blendvps(<8 x float> %a0, <8 x float> %a1, <8 x float> ; BROADWELL-LABEL: test_blendvps: ; BROADWELL: # BB#0: ; BROADWELL-NEXT: vblendvps %ymm2, %ymm1, %ymm0, %ymm0 # sched: [2:2.00] -; BROADWELL-NEXT: vblendvps %ymm2, (%rdi), %ymm0, %ymm0 # sched: [2:2.00] -; BROADWELL-NEXT: retq # sched: [2:1.00] +; BROADWELL-NEXT: vblendvps %ymm2, (%rdi), %ymm0, %ymm0 # sched: [8:2.00] +; BROADWELL-NEXT: retq # sched: [7:1.00] ; ; SKYLAKE-LABEL: test_blendvps: ; SKYLAKE: # BB#0: @@ -744,8 +744,8 @@ define <8 x float> @test_broadcastf128(<4 x float> *%a0) { ; ; BROADWELL-LABEL: test_broadcastf128: ; BROADWELL: # BB#0: -; BROADWELL-NEXT: vbroadcastf128 {{.*#+}} ymm0 = mem[0,1,0,1] sched: [1:0.50] -; BROADWELL-NEXT: retq # sched: [2:1.00] +; BROADWELL-NEXT: vbroadcastf128 {{.*#+}} ymm0 = mem[0,1,0,1] sched: [6:0.50] +; BROADWELL-NEXT: retq # sched: [7:1.00] ; ; SKYLAKE-LABEL: test_broadcastf128: ; SKYLAKE: # BB#0: @@ -789,8 +789,8 @@ define <4 x double> @test_broadcastsd_ymm(double *%a0) { ; ; BROADWELL-LABEL: test_broadcastsd_ymm: ; BROADWELL: # BB#0: -; BROADWELL-NEXT: vbroadcastsd (%rdi), %ymm0 # sched: [1:0.50] -; BROADWELL-NEXT: retq # sched: [2:1.00] +; BROADWELL-NEXT: vbroadcastsd (%rdi), %ymm0 # sched: [6:0.50] +; BROADWELL-NEXT: retq # sched: [7:1.00] ; ; SKYLAKE-LABEL: test_broadcastsd_ymm: ; SKYLAKE: # BB#0: @@ -835,8 +835,8 @@ define <4 x float> @test_broadcastss(float *%a0) { ; ; BROADWELL-LABEL: test_broadcastss: ; BROADWELL: # BB#0: -; BROADWELL-NEXT: vbroadcastss (%rdi), %xmm0 # sched: [1:0.50] -; BROADWELL-NEXT: retq # sched: [2:1.00] +; BROADWELL-NEXT: vbroadcastss (%rdi), %xmm0 # sched: [5:0.50] +; BROADWELL-NEXT: retq # sched: [7:1.00] ; ; SKYLAKE-LABEL: test_broadcastss: ; SKYLAKE: # BB#0: @@ -881,8 +881,8 @@ define <8 x float> @test_broadcastss_ymm(float *%a0) { ; ; BROADWELL-LABEL: test_broadcastss_ymm: ; BROADWELL: # BB#0: -; BROADWELL-NEXT: vbroadcastss (%rdi), %ymm0 # sched: [1:0.50] -; BROADWELL-NEXT: retq # sched: [2:1.00] +; BROADWELL-NEXT: vbroadcastss (%rdi), %ymm0 # sched: [6:0.50] +; BROADWELL-NEXT: retq # sched: [7:1.00] ; ; SKYLAKE-LABEL: test_broadcastss_ymm: ; SKYLAKE: # BB#0: @@ -934,9 +934,9 @@ define <4 x double> @test_cmppd(<4 x double> %a0, <4 x double> %a1, <4 x double> ; BROADWELL-LABEL: test_cmppd: ; BROADWELL: # BB#0: ; BROADWELL-NEXT: vcmpeqpd %ymm1, %ymm0, %ymm1 # sched: [3:1.00] -; BROADWELL-NEXT: vcmpeqpd (%rdi), %ymm0, %ymm0 # sched: [3:1.00] +; BROADWELL-NEXT: vcmpeqpd (%rdi), %ymm0, %ymm0 # sched: [9:1.00] ; BROADWELL-NEXT: vorpd %ymm0, %ymm1, %ymm0 # sched: [1:1.00] -; BROADWELL-NEXT: retq # sched: [2:1.00] +; BROADWELL-NEXT: retq # sched: [7:1.00] ; ; SKYLAKE-LABEL: test_cmppd: ; SKYLAKE: # BB#0: @@ -1002,9 +1002,9 @@ define <8 x float> @test_cmpps(<8 x float> %a0, <8 x float> %a1, <8 x float> *%a ; BROADWELL-LABEL: test_cmpps: ; BROADWELL: # BB#0: ; BROADWELL-NEXT: vcmpeqps %ymm1, %ymm0, %ymm1 # sched: [3:1.00] -; BROADWELL-NEXT: vcmpeqps (%rdi), %ymm0, %ymm0 # sched: [3:1.00] +; BROADWELL-NEXT: vcmpeqps (%rdi), %ymm0, %ymm0 # sched: [9:1.00] ; BROADWELL-NEXT: vorps %ymm0, %ymm1, %ymm0 # sched: [1:1.00] -; BROADWELL-NEXT: retq # sched: [2:1.00] +; BROADWELL-NEXT: retq # sched: [7:1.00] ; ; SKYLAKE-LABEL: test_cmpps: ; SKYLAKE: # BB#0: @@ -1070,9 +1070,9 @@ define <4 x double> @test_cvtdq2pd(<4 x i32> %a0, <4 x i32> *%a1) { ; BROADWELL-LABEL: test_cvtdq2pd: ; BROADWELL: # BB#0: ; BROADWELL-NEXT: vcvtdq2pd %xmm0, %ymm0 # sched: [6:1.00] -; BROADWELL-NEXT: vcvtdq2pd (%rdi), %ymm1 # sched: [6:1.00] +; BROADWELL-NEXT: vcvtdq2pd (%rdi), %ymm1 # sched: [11:1.00] ; BROADWELL-NEXT: vaddpd %ymm1, %ymm0, %ymm0 # sched: [3:1.00] -; BROADWELL-NEXT: retq # sched: [2:1.00] +; BROADWELL-NEXT: retq # sched: [7:1.00] ; ; SKYLAKE-LABEL: test_cvtdq2pd: ; SKYLAKE: # BB#0: @@ -1135,9 +1135,9 @@ define <8 x float> @test_cvtdq2ps(<8 x i32> %a0, <8 x i32> *%a1) { ; BROADWELL-LABEL: test_cvtdq2ps: ; BROADWELL: # BB#0: ; BROADWELL-NEXT: vcvtdq2ps %ymm0, %ymm0 # sched: [3:1.00] -; BROADWELL-NEXT: vcvtdq2ps (%rdi), %ymm1 # sched: [3:1.00] +; BROADWELL-NEXT: vcvtdq2ps (%rdi), %ymm1 # sched: [9:1.00] ; BROADWELL-NEXT: vaddps %ymm1, %ymm0, %ymm0 # sched: [3:1.00] -; BROADWELL-NEXT: retq # sched: [2:1.00] +; BROADWELL-NEXT: retq # sched: [7:1.00] ; ; SKYLAKE-LABEL: test_cvtdq2ps: ; SKYLAKE: # BB#0: @@ -1198,9 +1198,9 @@ define <8 x i32> @test_cvtpd2dq(<4 x double> %a0, <4 x double> *%a1) { ; BROADWELL-LABEL: test_cvtpd2dq: ; BROADWELL: # BB#0: ; BROADWELL-NEXT: vcvttpd2dq %ymm0, %xmm0 # sched: [6:1.00] -; BROADWELL-NEXT: vcvttpd2dqy (%rdi), %xmm1 # sched: [7:1.00] +; BROADWELL-NEXT: vcvttpd2dqy (%rdi), %xmm1 # sched: [8:1.00] ; BROADWELL-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0 # sched: [3:1.00] -; BROADWELL-NEXT: retq # sched: [2:1.00] +; BROADWELL-NEXT: retq # sched: [7:1.00] ; ; SKYLAKE-LABEL: test_cvtpd2dq: ; SKYLAKE: # BB#0: @@ -1261,9 +1261,9 @@ define <8 x float> @test_cvtpd2ps(<4 x double> %a0, <4 x double> *%a1) { ; BROADWELL-LABEL: test_cvtpd2ps: ; BROADWELL: # BB#0: ; BROADWELL-NEXT: vcvtpd2ps %ymm0, %xmm0 # sched: [6:1.00] -; BROADWELL-NEXT: vcvtpd2psy (%rdi), %xmm1 # sched: [7:1.00] +; BROADWELL-NEXT: vcvtpd2psy (%rdi), %xmm1 # sched: [8:1.00] ; BROADWELL-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0 # sched: [3:1.00] -; BROADWELL-NEXT: retq # sched: [2:1.00] +; BROADWELL-NEXT: retq # sched: [7:1.00] ; ; SKYLAKE-LABEL: test_cvtpd2ps: ; SKYLAKE: # BB#0: @@ -1324,9 +1324,9 @@ define <8 x i32> @test_cvtps2dq(<8 x float> %a0, <8 x float> *%a1) { ; BROADWELL-LABEL: test_cvtps2dq: ; BROADWELL: # BB#0: ; BROADWELL-NEXT: vcvttps2dq %ymm0, %ymm0 # sched: [3:1.00] -; BROADWELL-NEXT: vcvttps2dq (%rdi), %ymm1 # sched: [3:1.00] +; BROADWELL-NEXT: vcvttps2dq (%rdi), %ymm1 # sched: [9:1.00] ; BROADWELL-NEXT: vorps %ymm1, %ymm0, %ymm0 # sched: [1:1.00] -; BROADWELL-NEXT: retq # sched: [2:1.00] +; BROADWELL-NEXT: retq # sched: [7:1.00] ; ; SKYLAKE-LABEL: test_cvtps2dq: ; SKYLAKE: # BB#0: @@ -1383,9 +1383,9 @@ define <4 x double> @test_divpd(<4 x double> %a0, <4 x double> %a1, <4 x double> ; ; BROADWELL-LABEL: test_divpd: ; BROADWELL: # BB#0: -; BROADWELL-NEXT: vdivpd %ymm1, %ymm0, %ymm0 # sched: [35:2.00] -; BROADWELL-NEXT: vdivpd (%rdi), %ymm0, %ymm0 # sched: [35:2.00] -; BROADWELL-NEXT: retq # sched: [2:1.00] +; BROADWELL-NEXT: vdivpd %ymm1, %ymm0, %ymm0 # sched: [23:2.00] +; BROADWELL-NEXT: vdivpd (%rdi), %ymm0, %ymm0 # sched: [29:2.00] +; BROADWELL-NEXT: retq # sched: [7:1.00] ; ; SKYLAKE-LABEL: test_divpd: ; SKYLAKE: # BB#0: @@ -1437,9 +1437,9 @@ define <8 x float> @test_divps(<8 x float> %a0, <8 x float> %a1, <8 x float> *%a ; ; BROADWELL-LABEL: test_divps: ; BROADWELL: # BB#0: -; BROADWELL-NEXT: vdivps %ymm1, %ymm0, %ymm0 # sched: [21:2.00] -; BROADWELL-NEXT: vdivps (%rdi), %ymm0, %ymm0 # sched: [21:2.00] -; BROADWELL-NEXT: retq # sched: [2:1.00] +; BROADWELL-NEXT: vdivps %ymm1, %ymm0, %ymm0 # sched: [17:2.00] +; BROADWELL-NEXT: vdivps (%rdi), %ymm0, %ymm0 # sched: [23:2.00] +; BROADWELL-NEXT: retq # sched: [7:1.00] ; ; SKYLAKE-LABEL: test_divps: ; SKYLAKE: # BB#0: @@ -1492,8 +1492,8 @@ define <8 x float> @test_dpps(<8 x float> %a0, <8 x float> %a1, <8 x float> *%a2 ; BROADWELL-LABEL: test_dpps: ; BROADWELL: # BB#0: ; BROADWELL-NEXT: vdpps $7, %ymm1, %ymm0, %ymm0 # sched: [14:2.00] -; BROADWELL-NEXT: vdpps $7, (%rdi), %ymm0, %ymm0 # sched: [14:2.00] -; BROADWELL-NEXT: retq # sched: [2:1.00] +; BROADWELL-NEXT: vdpps $7, (%rdi), %ymm0, %ymm0 # sched: [20:2.00] +; BROADWELL-NEXT: retq # sched: [7:1.00] ; ; SKYLAKE-LABEL: test_dpps: ; SKYLAKE: # BB#0: @@ -1552,7 +1552,7 @@ define <4 x float> @test_extractf128(<8 x float> %a0, <8 x float> %a1, <4 x floa ; BROADWELL-NEXT: vextractf128 $1, %ymm0, %xmm0 # sched: [3:1.00] ; BROADWELL-NEXT: vextractf128 $1, %ymm1, (%rdi) # sched: [1:1.00] ; BROADWELL-NEXT: vzeroupper # sched: [4:1.00] -; BROADWELL-NEXT: retq # sched: [2:1.00] +; BROADWELL-NEXT: retq # sched: [7:1.00] ; ; SKYLAKE-LABEL: test_extractf128: ; SKYLAKE: # BB#0: @@ -1608,8 +1608,8 @@ define <4 x double> @test_haddpd(<4 x double> %a0, <4 x double> %a1, <4 x double ; BROADWELL-LABEL: test_haddpd: ; BROADWELL: # BB#0: ; BROADWELL-NEXT: vhaddpd %ymm1, %ymm0, %ymm0 # sched: [5:2.00] -; BROADWELL-NEXT: vhaddpd (%rdi), %ymm0, %ymm0 # sched: [5:2.00] -; BROADWELL-NEXT: retq # sched: [2:1.00] +; BROADWELL-NEXT: vhaddpd (%rdi), %ymm0, %ymm0 # sched: [11:2.00] +; BROADWELL-NEXT: retq # sched: [7:1.00] ; ; SKYLAKE-LABEL: test_haddpd: ; SKYLAKE: # BB#0: @@ -1663,8 +1663,8 @@ define <8 x float> @test_haddps(<8 x float> %a0, <8 x float> %a1, <8 x float> *% ; BROADWELL-LABEL: test_haddps: ; BROADWELL: # BB#0: ; BROADWELL-NEXT: vhaddps %ymm1, %ymm0, %ymm0 # sched: [5:2.00] -; BROADWELL-NEXT: vhaddps (%rdi), %ymm0, %ymm0 # sched: [5:2.00] -; BROADWELL-NEXT: retq # sched: [2:1.00] +; BROADWELL-NEXT: vhaddps (%rdi), %ymm0, %ymm0 # sched: [11:2.00] +; BROADWELL-NEXT: retq # sched: [7:1.00] ; ; SKYLAKE-LABEL: test_haddps: ; SKYLAKE: # BB#0: @@ -1718,8 +1718,8 @@ define <4 x double> @test_hsubpd(<4 x double> %a0, <4 x double> %a1, <4 x double ; BROADWELL-LABEL: test_hsubpd: ; BROADWELL: # BB#0: ; BROADWELL-NEXT: vhsubpd %ymm1, %ymm0, %ymm0 # sched: [5:2.00] -; BROADWELL-NEXT: vhsubpd (%rdi), %ymm0, %ymm0 # sched: [5:2.00] -; BROADWELL-NEXT: retq # sched: [2:1.00] +; BROADWELL-NEXT: vhsubpd (%rdi), %ymm0, %ymm0 # sched: [11:2.00] +; BROADWELL-NEXT: retq # sched: [7:1.00] ; ; SKYLAKE-LABEL: test_hsubpd: ; SKYLAKE: # BB#0: @@ -1773,8 +1773,8 @@ define <8 x float> @test_hsubps(<8 x float> %a0, <8 x float> %a1, <8 x float> *% ; BROADWELL-LABEL: test_hsubps: ; BROADWELL: # BB#0: ; BROADWELL-NEXT: vhsubps %ymm1, %ymm0, %ymm0 # sched: [5:2.00] -; BROADWELL-NEXT: vhsubps (%rdi), %ymm0, %ymm0 # sched: [5:2.00] -; BROADWELL-NEXT: retq # sched: [2:1.00] +; BROADWELL-NEXT: vhsubps (%rdi), %ymm0, %ymm0 # sched: [11:2.00] +; BROADWELL-NEXT: retq # sched: [7:1.00] ; ; SKYLAKE-LABEL: test_hsubps: ; SKYLAKE: # BB#0: @@ -1831,9 +1831,9 @@ define <8 x float> @test_insertf128(<8 x float> %a0, <4 x float> %a1, <4 x float ; BROADWELL-LABEL: test_insertf128: ; BROADWELL: # BB#0: ; BROADWELL-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm1 # sched: [3:1.00] -; BROADWELL-NEXT: vinsertf128 $1, (%rdi), %ymm0, %ymm0 # sched: [1:0.50] +; BROADWELL-NEXT: vinsertf128 $1, (%rdi), %ymm0, %ymm0 # sched: [6:0.50] ; BROADWELL-NEXT: vaddps %ymm0, %ymm1, %ymm0 # sched: [3:1.00] -; BROADWELL-NEXT: retq # sched: [2:1.00] +; BROADWELL-NEXT: retq # sched: [7:1.00] ; ; SKYLAKE-LABEL: test_insertf128: ; SKYLAKE: # BB#0: @@ -1889,8 +1889,8 @@ define <32 x i8> @test_lddqu(i8* %a0) { ; ; BROADWELL-LABEL: test_lddqu: ; BROADWELL: # BB#0: -; BROADWELL-NEXT: vlddqu (%rdi), %ymm0 # sched: [1:0.50] -; BROADWELL-NEXT: retq # sched: [2:1.00] +; BROADWELL-NEXT: vlddqu (%rdi), %ymm0 # sched: [6:0.50] +; BROADWELL-NEXT: retq # sched: [7:1.00] ; ; SKYLAKE-LABEL: test_lddqu: ; SKYLAKE: # BB#0: @@ -1940,10 +1940,10 @@ define <2 x double> @test_maskmovpd(i8* %a0, <2 x i64> %a1, <2 x double> %a2) { ; ; BROADWELL-LABEL: test_maskmovpd: ; BROADWELL: # BB#0: -; BROADWELL-NEXT: vmaskmovpd (%rdi), %xmm0, %xmm2 # sched: [2:2.00] -; BROADWELL-NEXT: vmaskmovpd %xmm1, %xmm0, (%rdi) # sched: [4:1.00] +; BROADWELL-NEXT: vmaskmovpd (%rdi), %xmm0, %xmm2 # sched: [7:2.00] +; BROADWELL-NEXT: vmaskmovpd %xmm1, %xmm0, (%rdi) # sched: [5:1.00] ; BROADWELL-NEXT: vmovapd %xmm2, %xmm0 # sched: [1:1.00] -; BROADWELL-NEXT: retq # sched: [2:1.00] +; BROADWELL-NEXT: retq # sched: [7:1.00] ; ; SKYLAKE-LABEL: test_maskmovpd: ; SKYLAKE: # BB#0: @@ -2003,10 +2003,10 @@ define <4 x double> @test_maskmovpd_ymm(i8* %a0, <4 x i64> %a1, <4 x double> %a2 ; ; BROADWELL-LABEL: test_maskmovpd_ymm: ; BROADWELL: # BB#0: -; BROADWELL-NEXT: vmaskmovpd (%rdi), %ymm0, %ymm2 # sched: [2:2.00] -; BROADWELL-NEXT: vmaskmovpd %ymm1, %ymm0, (%rdi) # sched: [4:1.00] +; BROADWELL-NEXT: vmaskmovpd (%rdi), %ymm0, %ymm2 # sched: [8:2.00] +; BROADWELL-NEXT: vmaskmovpd %ymm1, %ymm0, (%rdi) # sched: [5:1.00] ; BROADWELL-NEXT: vmovapd %ymm2, %ymm0 # sched: [1:1.00] -; BROADWELL-NEXT: retq # sched: [2:1.00] +; BROADWELL-NEXT: retq # sched: [7:1.00] ; ; SKYLAKE-LABEL: test_maskmovpd_ymm: ; SKYLAKE: # BB#0: @@ -2066,10 +2066,10 @@ define <4 x float> @test_maskmovps(i8* %a0, <4 x i32> %a1, <4 x float> %a2) { ; ; BROADWELL-LABEL: test_maskmovps: ; BROADWELL: # BB#0: -; BROADWELL-NEXT: vmaskmovps (%rdi), %xmm0, %xmm2 # sched: [2:2.00] -; BROADWELL-NEXT: vmaskmovps %xmm1, %xmm0, (%rdi) # sched: [4:1.00] +; BROADWELL-NEXT: vmaskmovps (%rdi), %xmm0, %xmm2 # sched: [7:2.00] +; BROADWELL-NEXT: vmaskmovps %xmm1, %xmm0, (%rdi) # sched: [5:1.00] ; BROADWELL-NEXT: vmovaps %xmm2, %xmm0 # sched: [1:1.00] -; BROADWELL-NEXT: retq # sched: [2:1.00] +; BROADWELL-NEXT: retq # sched: [7:1.00] ; ; SKYLAKE-LABEL: test_maskmovps: ; SKYLAKE: # BB#0: @@ -2129,10 +2129,10 @@ define <8 x float> @test_maskmovps_ymm(i8* %a0, <8 x i32> %a1, <8 x float> %a2) ; ; BROADWELL-LABEL: test_maskmovps_ymm: ; BROADWELL: # BB#0: -; BROADWELL-NEXT: vmaskmovps (%rdi), %ymm0, %ymm2 # sched: [2:2.00] -; BROADWELL-NEXT: vmaskmovps %ymm1, %ymm0, (%rdi) # sched: [4:1.00] +; BROADWELL-NEXT: vmaskmovps (%rdi), %ymm0, %ymm2 # sched: [8:2.00] +; BROADWELL-NEXT: vmaskmovps %ymm1, %ymm0, (%rdi) # sched: [5:1.00] ; BROADWELL-NEXT: vmovaps %ymm2, %ymm0 # sched: [1:1.00] -; BROADWELL-NEXT: retq # sched: [2:1.00] +; BROADWELL-NEXT: retq # sched: [7:1.00] ; ; SKYLAKE-LABEL: test_maskmovps_ymm: ; SKYLAKE: # BB#0: @@ -2190,8 +2190,8 @@ define <4 x double> @test_maxpd(<4 x double> %a0, <4 x double> %a1, <4 x double> ; BROADWELL-LABEL: test_maxpd: ; BROADWELL: # BB#0: ; BROADWELL-NEXT: vmaxpd %ymm1, %ymm0, %ymm0 # sched: [3:1.00] -; BROADWELL-NEXT: vmaxpd (%rdi), %ymm0, %ymm0 # sched: [3:1.00] -; BROADWELL-NEXT: retq # sched: [2:1.00] +; BROADWELL-NEXT: vmaxpd (%rdi), %ymm0, %ymm0 # sched: [9:1.00] +; BROADWELL-NEXT: retq # sched: [7:1.00] ; ; SKYLAKE-LABEL: test_maxpd: ; SKYLAKE: # BB#0: @@ -2245,8 +2245,8 @@ define <8 x float> @test_maxps(<8 x float> %a0, <8 x float> %a1, <8 x float> *%a ; BROADWELL-LABEL: test_maxps: ; BROADWELL: # BB#0: ; BROADWELL-NEXT: vmaxps %ymm1, %ymm0, %ymm0 # sched: [3:1.00] -; BROADWELL-NEXT: vmaxps (%rdi), %ymm0, %ymm0 # sched: [3:1.00] -; BROADWELL-NEXT: retq # sched: [2:1.00] +; BROADWELL-NEXT: vmaxps (%rdi), %ymm0, %ymm0 # sched: [9:1.00] +; BROADWELL-NEXT: retq # sched: [7:1.00] ; ; SKYLAKE-LABEL: test_maxps: ; SKYLAKE: # BB#0: @@ -2300,8 +2300,8 @@ define <4 x double> @test_minpd(<4 x double> %a0, <4 x double> %a1, <4 x double> ; BROADWELL-LABEL: test_minpd: ; BROADWELL: # BB#0: ; BROADWELL-NEXT: vminpd %ymm1, %ymm0, %ymm0 # sched: [3:1.00] -; BROADWELL-NEXT: vminpd (%rdi), %ymm0, %ymm0 # sched: [3:1.00] -; BROADWELL-NEXT: retq # sched: [2:1.00] +; BROADWELL-NEXT: vminpd (%rdi), %ymm0, %ymm0 # sched: [9:1.00] +; BROADWELL-NEXT: retq # sched: [7:1.00] ; ; SKYLAKE-LABEL: test_minpd: ; SKYLAKE: # BB#0: @@ -2355,8 +2355,8 @@ define <8 x float> @test_minps(<8 x float> %a0, <8 x float> %a1, <8 x float> *%a ; BROADWELL-LABEL: test_minps: ; BROADWELL: # BB#0: ; BROADWELL-NEXT: vminps %ymm1, %ymm0, %ymm0 # sched: [3:1.00] -; BROADWELL-NEXT: vminps (%rdi), %ymm0, %ymm0 # sched: [3:1.00] -; BROADWELL-NEXT: retq # sched: [2:1.00] +; BROADWELL-NEXT: vminps (%rdi), %ymm0, %ymm0 # sched: [9:1.00] +; BROADWELL-NEXT: retq # sched: [7:1.00] ; ; SKYLAKE-LABEL: test_minps: ; SKYLAKE: # BB#0: @@ -2412,10 +2412,10 @@ define <4 x double> @test_movapd(<4 x double> *%a0, <4 x double> *%a1) { ; ; BROADWELL-LABEL: test_movapd: ; BROADWELL: # BB#0: -; BROADWELL-NEXT: vmovapd (%rdi), %ymm0 # sched: [1:0.50] +; BROADWELL-NEXT: vmovapd (%rdi), %ymm0 # sched: [6:0.50] ; BROADWELL-NEXT: vaddpd %ymm0, %ymm0, %ymm0 # sched: [3:1.00] ; BROADWELL-NEXT: vmovapd %ymm0, (%rsi) # sched: [1:1.00] -; BROADWELL-NEXT: retq # sched: [2:1.00] +; BROADWELL-NEXT: retq # sched: [7:1.00] ; ; SKYLAKE-LABEL: test_movapd: ; SKYLAKE: # BB#0: @@ -2474,10 +2474,10 @@ define <8 x float> @test_movaps(<8 x float> *%a0, <8 x float> *%a1) { ; ; BROADWELL-LABEL: test_movaps: ; BROADWELL: # BB#0: -; BROADWELL-NEXT: vmovaps (%rdi), %ymm0 # sched: [1:0.50] +; BROADWELL-NEXT: vmovaps (%rdi), %ymm0 # sched: [6:0.50] ; BROADWELL-NEXT: vaddps %ymm0, %ymm0, %ymm0 # sched: [3:1.00] ; BROADWELL-NEXT: vmovaps %ymm0, (%rsi) # sched: [1:1.00] -; BROADWELL-NEXT: retq # sched: [2:1.00] +; BROADWELL-NEXT: retq # sched: [7:1.00] ; ; SKYLAKE-LABEL: test_movaps: ; SKYLAKE: # BB#0: @@ -2537,9 +2537,9 @@ define <4 x double> @test_movddup(<4 x double> %a0, <4 x double> *%a1) { ; BROADWELL-LABEL: test_movddup: ; BROADWELL: # BB#0: ; BROADWELL-NEXT: vmovddup {{.*#+}} ymm0 = ymm0[0,0,2,2] sched: [1:1.00] -; BROADWELL-NEXT: vmovddup {{.*#+}} ymm1 = mem[0,0,2,2] sched: [1:0.50] +; BROADWELL-NEXT: vmovddup {{.*#+}} ymm1 = mem[0,0,2,2] sched: [6:0.50] ; BROADWELL-NEXT: vaddpd %ymm1, %ymm0, %ymm0 # sched: [3:1.00] -; BROADWELL-NEXT: retq # sched: [2:1.00] +; BROADWELL-NEXT: retq # sched: [7:1.00] ; ; SKYLAKE-LABEL: test_movddup: ; SKYLAKE: # BB#0: @@ -2598,7 +2598,7 @@ define i32 @test_movmskpd(<4 x double> %a0) { ; BROADWELL: # BB#0: ; BROADWELL-NEXT: vmovmskpd %ymm0, %eax # sched: [3:1.00] ; BROADWELL-NEXT: vzeroupper # sched: [4:1.00] -; BROADWELL-NEXT: retq # sched: [2:1.00] +; BROADWELL-NEXT: retq # sched: [7:1.00] ; ; SKYLAKE-LABEL: test_movmskpd: ; SKYLAKE: # BB#0: @@ -2650,7 +2650,7 @@ define i32 @test_movmskps(<8 x float> %a0) { ; BROADWELL: # BB#0: ; BROADWELL-NEXT: vmovmskps %ymm0, %eax # sched: [3:1.00] ; BROADWELL-NEXT: vzeroupper # sched: [4:1.00] -; BROADWELL-NEXT: retq # sched: [2:1.00] +; BROADWELL-NEXT: retq # sched: [7:1.00] ; ; SKYLAKE-LABEL: test_movmskps: ; SKYLAKE: # BB#0: @@ -2702,7 +2702,7 @@ define <4 x double> @test_movntpd(<4 x double> %a0, <4 x double> *%a1) { ; BROADWELL: # BB#0: ; BROADWELL-NEXT: vaddpd %ymm0, %ymm0, %ymm0 # sched: [3:1.00] ; BROADWELL-NEXT: vmovntpd %ymm0, (%rdi) # sched: [1:1.00] -; BROADWELL-NEXT: retq # sched: [2:1.00] +; BROADWELL-NEXT: retq # sched: [7:1.00] ; ; SKYLAKE-LABEL: test_movntpd: ; SKYLAKE: # BB#0: @@ -2755,7 +2755,7 @@ define <8 x float> @test_movntps(<8 x float> %a0, <8 x float> *%a1) { ; BROADWELL: # BB#0: ; BROADWELL-NEXT: vaddps %ymm0, %ymm0, %ymm0 # sched: [3:1.00] ; BROADWELL-NEXT: vmovntps %ymm0, (%rdi) # sched: [1:1.00] -; BROADWELL-NEXT: retq # sched: [2:1.00] +; BROADWELL-NEXT: retq # sched: [7:1.00] ; ; SKYLAKE-LABEL: test_movntps: ; SKYLAKE: # BB#0: @@ -2810,9 +2810,9 @@ define <8 x float> @test_movshdup(<8 x float> %a0, <8 x float> *%a1) { ; BROADWELL-LABEL: test_movshdup: ; BROADWELL: # BB#0: ; BROADWELL-NEXT: vmovshdup {{.*#+}} ymm0 = ymm0[1,1,3,3,5,5,7,7] sched: [1:1.00] -; BROADWELL-NEXT: vmovshdup {{.*#+}} ymm1 = mem[1,1,3,3,5,5,7,7] sched: [1:0.50] +; BROADWELL-NEXT: vmovshdup {{.*#+}} ymm1 = mem[1,1,3,3,5,5,7,7] sched: [6:0.50] ; BROADWELL-NEXT: vaddps %ymm1, %ymm0, %ymm0 # sched: [3:1.00] -; BROADWELL-NEXT: retq # sched: [2:1.00] +; BROADWELL-NEXT: retq # sched: [7:1.00] ; ; SKYLAKE-LABEL: test_movshdup: ; SKYLAKE: # BB#0: @@ -2873,9 +2873,9 @@ define <8 x float> @test_movsldup(<8 x float> %a0, <8 x float> *%a1) { ; BROADWELL-LABEL: test_movsldup: ; BROADWELL: # BB#0: ; BROADWELL-NEXT: vmovsldup {{.*#+}} ymm0 = ymm0[0,0,2,2,4,4,6,6] sched: [1:1.00] -; BROADWELL-NEXT: vmovsldup {{.*#+}} ymm1 = mem[0,0,2,2,4,4,6,6] sched: [1:0.50] +; BROADWELL-NEXT: vmovsldup {{.*#+}} ymm1 = mem[0,0,2,2,4,4,6,6] sched: [6:0.50] ; BROADWELL-NEXT: vaddps %ymm1, %ymm0, %ymm0 # sched: [3:1.00] -; BROADWELL-NEXT: retq # sched: [2:1.00] +; BROADWELL-NEXT: retq # sched: [7:1.00] ; ; SKYLAKE-LABEL: test_movsldup: ; SKYLAKE: # BB#0: @@ -2937,10 +2937,10 @@ define <4 x double> @test_movupd(<4 x double> *%a0, <4 x double> *%a1) { ; ; BROADWELL-LABEL: test_movupd: ; BROADWELL: # BB#0: -; BROADWELL-NEXT: vmovupd (%rdi), %ymm0 # sched: [1:0.50] +; BROADWELL-NEXT: vmovupd (%rdi), %ymm0 # sched: [6:0.50] ; BROADWELL-NEXT: vaddpd %ymm0, %ymm0, %ymm0 # sched: [3:1.00] ; BROADWELL-NEXT: vmovupd %ymm0, (%rsi) # sched: [1:1.00] -; BROADWELL-NEXT: retq # sched: [2:1.00] +; BROADWELL-NEXT: retq # sched: [7:1.00] ; ; SKYLAKE-LABEL: test_movupd: ; SKYLAKE: # BB#0: @@ -3001,10 +3001,10 @@ define <8 x float> @test_movups(<8 x float> *%a0, <8 x float> *%a1) { ; ; BROADWELL-LABEL: test_movups: ; BROADWELL: # BB#0: -; BROADWELL-NEXT: vmovups (%rdi), %ymm0 # sched: [1:0.50] +; BROADWELL-NEXT: vmovups (%rdi), %ymm0 # sched: [6:0.50] ; BROADWELL-NEXT: vaddps %ymm0, %ymm0, %ymm0 # sched: [3:1.00] ; BROADWELL-NEXT: vmovups %ymm0, (%rsi) # sched: [1:1.00] -; BROADWELL-NEXT: retq # sched: [2:1.00] +; BROADWELL-NEXT: retq # sched: [7:1.00] ; ; SKYLAKE-LABEL: test_movups: ; SKYLAKE: # BB#0: @@ -3060,9 +3060,9 @@ define <4 x double> @test_mulpd(<4 x double> %a0, <4 x double> %a1, <4 x double> ; ; BROADWELL-LABEL: test_mulpd: ; BROADWELL: # BB#0: -; BROADWELL-NEXT: vmulpd %ymm1, %ymm0, %ymm0 # sched: [5:0.50] -; BROADWELL-NEXT: vmulpd (%rdi), %ymm0, %ymm0 # sched: [5:0.50] -; BROADWELL-NEXT: retq # sched: [2:1.00] +; BROADWELL-NEXT: vmulpd %ymm1, %ymm0, %ymm0 # sched: [3:0.50] +; BROADWELL-NEXT: vmulpd (%rdi), %ymm0, %ymm0 # sched: [9:0.50] +; BROADWELL-NEXT: retq # sched: [7:1.00] ; ; SKYLAKE-LABEL: test_mulpd: ; SKYLAKE: # BB#0: @@ -3114,9 +3114,9 @@ define <8 x float> @test_mulps(<8 x float> %a0, <8 x float> %a1, <8 x float> *%a ; ; BROADWELL-LABEL: test_mulps: ; BROADWELL: # BB#0: -; BROADWELL-NEXT: vmulps %ymm1, %ymm0, %ymm0 # sched: [5:0.50] -; BROADWELL-NEXT: vmulps (%rdi), %ymm0, %ymm0 # sched: [5:0.50] -; BROADWELL-NEXT: retq # sched: [2:1.00] +; BROADWELL-NEXT: vmulps %ymm1, %ymm0, %ymm0 # sched: [3:0.50] +; BROADWELL-NEXT: vmulps (%rdi), %ymm0, %ymm0 # sched: [9:0.50] +; BROADWELL-NEXT: retq # sched: [7:1.00] ; ; SKYLAKE-LABEL: test_mulps: ; SKYLAKE: # BB#0: @@ -3172,9 +3172,9 @@ define <4 x double> @orpd(<4 x double> %a0, <4 x double> %a1, <4 x double> *%a2) ; BROADWELL-LABEL: orpd: ; BROADWELL: # BB#0: ; BROADWELL-NEXT: vorpd %ymm1, %ymm0, %ymm0 # sched: [1:1.00] -; BROADWELL-NEXT: vorpd (%rdi), %ymm0, %ymm0 # sched: [1:1.00] +; BROADWELL-NEXT: vorpd (%rdi), %ymm0, %ymm0 # sched: [7:1.00] ; BROADWELL-NEXT: vaddpd %ymm0, %ymm1, %ymm0 # sched: [3:1.00] -; BROADWELL-NEXT: retq # sched: [2:1.00] +; BROADWELL-NEXT: retq # sched: [7:1.00] ; ; SKYLAKE-LABEL: orpd: ; SKYLAKE: # BB#0: @@ -3239,9 +3239,9 @@ define <8 x float> @test_orps(<8 x float> %a0, <8 x float> %a1, <8 x float> *%a2 ; BROADWELL-LABEL: test_orps: ; BROADWELL: # BB#0: ; BROADWELL-NEXT: vorps %ymm1, %ymm0, %ymm0 # sched: [1:1.00] -; BROADWELL-NEXT: vorps (%rdi), %ymm0, %ymm0 # sched: [1:1.00] +; BROADWELL-NEXT: vorps (%rdi), %ymm0, %ymm0 # sched: [7:1.00] ; BROADWELL-NEXT: vaddps %ymm0, %ymm1, %ymm0 # sched: [3:1.00] -; BROADWELL-NEXT: retq # sched: [2:1.00] +; BROADWELL-NEXT: retq # sched: [7:1.00] ; ; SKYLAKE-LABEL: test_orps: ; SKYLAKE: # BB#0: @@ -3306,9 +3306,9 @@ define <4 x double> @test_perm2f128(<4 x double> %a0, <4 x double> %a1, <4 x dou ; BROADWELL-LABEL: test_perm2f128: ; BROADWELL: # BB#0: ; BROADWELL-NEXT: vperm2f128 {{.*#+}} ymm1 = ymm0[2,3],ymm1[0,1] sched: [3:1.00] -; BROADWELL-NEXT: vperm2f128 {{.*#+}} ymm0 = ymm0[2,3],mem[0,1] sched: [3:1.00] +; BROADWELL-NEXT: vperm2f128 {{.*#+}} ymm0 = ymm0[2,3],mem[0,1] sched: [9:1.00] ; BROADWELL-NEXT: vaddpd %ymm0, %ymm1, %ymm0 # sched: [3:1.00] -; BROADWELL-NEXT: retq # sched: [2:1.00] +; BROADWELL-NEXT: retq # sched: [7:1.00] ; ; SKYLAKE-LABEL: test_perm2f128: ; SKYLAKE: # BB#0: @@ -3369,9 +3369,9 @@ define <2 x double> @test_permilpd(<2 x double> %a0, <2 x double> *%a1) { ; BROADWELL-LABEL: test_permilpd: ; BROADWELL: # BB#0: ; BROADWELL-NEXT: vpermilpd {{.*#+}} xmm0 = xmm0[1,0] sched: [1:1.00] -; BROADWELL-NEXT: vpermilpd {{.*#+}} xmm1 = mem[1,0] sched: [1:1.00] +; BROADWELL-NEXT: vpermilpd {{.*#+}} xmm1 = mem[1,0] sched: [6:1.00] ; BROADWELL-NEXT: vaddpd %xmm1, %xmm0, %xmm0 # sched: [3:1.00] -; BROADWELL-NEXT: retq # sched: [2:1.00] +; BROADWELL-NEXT: retq # sched: [7:1.00] ; ; SKYLAKE-LABEL: test_permilpd: ; SKYLAKE: # BB#0: @@ -3432,9 +3432,9 @@ define <4 x double> @test_permilpd_ymm(<4 x double> %a0, <4 x double> *%a1) { ; BROADWELL-LABEL: test_permilpd_ymm: ; BROADWELL: # BB#0: ; BROADWELL-NEXT: vpermilpd {{.*#+}} ymm0 = ymm0[1,0,2,3] sched: [1:1.00] -; BROADWELL-NEXT: vpermilpd {{.*#+}} ymm1 = mem[1,0,2,3] sched: [1:1.00] +; BROADWELL-NEXT: vpermilpd {{.*#+}} ymm1 = mem[1,0,2,3] sched: [7:1.00] ; BROADWELL-NEXT: vaddpd %ymm1, %ymm0, %ymm0 # sched: [3:1.00] -; BROADWELL-NEXT: retq # sched: [2:1.00] +; BROADWELL-NEXT: retq # sched: [7:1.00] ; ; SKYLAKE-LABEL: test_permilpd_ymm: ; SKYLAKE: # BB#0: @@ -3495,9 +3495,9 @@ define <4 x float> @test_permilps(<4 x float> %a0, <4 x float> *%a1) { ; BROADWELL-LABEL: test_permilps: ; BROADWELL: # BB#0: ; BROADWELL-NEXT: vpermilps {{.*#+}} xmm0 = xmm0[3,2,1,0] sched: [1:1.00] -; BROADWELL-NEXT: vpermilps {{.*#+}} xmm1 = mem[3,2,1,0] sched: [1:1.00] +; BROADWELL-NEXT: vpermilps {{.*#+}} xmm1 = mem[3,2,1,0] sched: [6:1.00] ; BROADWELL-NEXT: vaddps %xmm1, %xmm0, %xmm0 # sched: [3:1.00] -; BROADWELL-NEXT: retq # sched: [2:1.00] +; BROADWELL-NEXT: retq # sched: [7:1.00] ; ; SKYLAKE-LABEL: test_permilps: ; SKYLAKE: # BB#0: @@ -3558,9 +3558,9 @@ define <8 x float> @test_permilps_ymm(<8 x float> %a0, <8 x float> *%a1) { ; BROADWELL-LABEL: test_permilps_ymm: ; BROADWELL: # BB#0: ; BROADWELL-NEXT: vpermilps {{.*#+}} ymm0 = ymm0[3,2,1,0,7,6,5,4] sched: [1:1.00] -; BROADWELL-NEXT: vpermilps {{.*#+}} ymm1 = mem[3,2,1,0,7,6,5,4] sched: [1:1.00] +; BROADWELL-NEXT: vpermilps {{.*#+}} ymm1 = mem[3,2,1,0,7,6,5,4] sched: [7:1.00] ; BROADWELL-NEXT: vaddps %ymm1, %ymm0, %ymm0 # sched: [3:1.00] -; BROADWELL-NEXT: retq # sched: [2:1.00] +; BROADWELL-NEXT: retq # sched: [7:1.00] ; ; SKYLAKE-LABEL: test_permilps_ymm: ; SKYLAKE: # BB#0: @@ -3618,8 +3618,8 @@ define <2 x double> @test_permilvarpd(<2 x double> %a0, <2 x i64> %a1, <2 x i64> ; BROADWELL-LABEL: test_permilvarpd: ; BROADWELL: # BB#0: ; BROADWELL-NEXT: vpermilpd %xmm1, %xmm0, %xmm0 # sched: [1:1.00] -; BROADWELL-NEXT: vpermilpd (%rdi), %xmm0, %xmm0 # sched: [1:1.00] -; BROADWELL-NEXT: retq # sched: [2:1.00] +; BROADWELL-NEXT: vpermilpd (%rdi), %xmm0, %xmm0 # sched: [6:1.00] +; BROADWELL-NEXT: retq # sched: [7:1.00] ; ; SKYLAKE-LABEL: test_permilvarpd: ; SKYLAKE: # BB#0: @@ -3673,8 +3673,8 @@ define <4 x double> @test_permilvarpd_ymm(<4 x double> %a0, <4 x i64> %a1, <4 x ; BROADWELL-LABEL: test_permilvarpd_ymm: ; BROADWELL: # BB#0: ; BROADWELL-NEXT: vpermilpd %ymm1, %ymm0, %ymm0 # sched: [1:1.00] -; BROADWELL-NEXT: vpermilpd (%rdi), %ymm0, %ymm0 # sched: [1:1.00] -; BROADWELL-NEXT: retq # sched: [2:1.00] +; BROADWELL-NEXT: vpermilpd (%rdi), %ymm0, %ymm0 # sched: [7:1.00] +; BROADWELL-NEXT: retq # sched: [7:1.00] ; ; SKYLAKE-LABEL: test_permilvarpd_ymm: ; SKYLAKE: # BB#0: @@ -3728,8 +3728,8 @@ define <4 x float> @test_permilvarps(<4 x float> %a0, <4 x i32> %a1, <4 x i32> * ; BROADWELL-LABEL: test_permilvarps: ; BROADWELL: # BB#0: ; BROADWELL-NEXT: vpermilps %xmm1, %xmm0, %xmm0 # sched: [1:1.00] -; BROADWELL-NEXT: vpermilps (%rdi), %xmm0, %xmm0 # sched: [1:1.00] -; BROADWELL-NEXT: retq # sched: [2:1.00] +; BROADWELL-NEXT: vpermilps (%rdi), %xmm0, %xmm0 # sched: [6:1.00] +; BROADWELL-NEXT: retq # sched: [7:1.00] ; ; SKYLAKE-LABEL: test_permilvarps: ; SKYLAKE: # BB#0: @@ -3783,8 +3783,8 @@ define <8 x float> @test_permilvarps_ymm(<8 x float> %a0, <8 x i32> %a1, <8 x i3 ; BROADWELL-LABEL: test_permilvarps_ymm: ; BROADWELL: # BB#0: ; BROADWELL-NEXT: vpermilps %ymm1, %ymm0, %ymm0 # sched: [1:1.00] -; BROADWELL-NEXT: vpermilps (%rdi), %ymm0, %ymm0 # sched: [1:1.00] -; BROADWELL-NEXT: retq # sched: [2:1.00] +; BROADWELL-NEXT: vpermilps (%rdi), %ymm0, %ymm0 # sched: [7:1.00] +; BROADWELL-NEXT: retq # sched: [7:1.00] ; ; SKYLAKE-LABEL: test_permilvarps_ymm: ; SKYLAKE: # BB#0: @@ -3840,10 +3840,10 @@ define <8 x float> @test_rcpps(<8 x float> %a0, <8 x float> *%a1) { ; ; BROADWELL-LABEL: test_rcpps: ; BROADWELL: # BB#0: -; BROADWELL-NEXT: vrcpps (%rdi), %ymm1 # sched: [11:2.00] +; BROADWELL-NEXT: vrcpps (%rdi), %ymm1 # sched: [17:2.00] ; BROADWELL-NEXT: vrcpps %ymm0, %ymm0 # sched: [11:2.00] ; BROADWELL-NEXT: vaddps %ymm1, %ymm0, %ymm0 # sched: [3:1.00] -; BROADWELL-NEXT: retq # sched: [2:1.00] +; BROADWELL-NEXT: retq # sched: [7:1.00] ; ; SKYLAKE-LABEL: test_rcpps: ; SKYLAKE: # BB#0: @@ -3904,10 +3904,10 @@ define <4 x double> @test_roundpd(<4 x double> %a0, <4 x double> *%a1) { ; ; BROADWELL-LABEL: test_roundpd: ; BROADWELL: # BB#0: -; BROADWELL-NEXT: vroundpd $7, %ymm0, %ymm0 # sched: [5:1.25] -; BROADWELL-NEXT: vroundpd $7, (%rdi), %ymm1 # sched: [6:2.00] +; BROADWELL-NEXT: vroundpd $7, %ymm0, %ymm0 # sched: [6:0.50] +; BROADWELL-NEXT: vroundpd $7, (%rdi), %ymm1 # sched: [12:2.00] ; BROADWELL-NEXT: vaddpd %ymm1, %ymm0, %ymm0 # sched: [3:1.00] -; BROADWELL-NEXT: retq # sched: [2:1.00] +; BROADWELL-NEXT: retq # sched: [7:1.00] ; ; SKYLAKE-LABEL: test_roundpd: ; SKYLAKE: # BB#0: @@ -3968,10 +3968,10 @@ define <8 x float> @test_roundps(<8 x float> %a0, <8 x float> *%a1) { ; ; BROADWELL-LABEL: test_roundps: ; BROADWELL: # BB#0: -; BROADWELL-NEXT: vroundps $7, %ymm0, %ymm0 # sched: [5:1.25] -; BROADWELL-NEXT: vroundps $7, (%rdi), %ymm1 # sched: [6:2.00] +; BROADWELL-NEXT: vroundps $7, %ymm0, %ymm0 # sched: [6:0.50] +; BROADWELL-NEXT: vroundps $7, (%rdi), %ymm1 # sched: [12:2.00] ; BROADWELL-NEXT: vaddps %ymm1, %ymm0, %ymm0 # sched: [3:1.00] -; BROADWELL-NEXT: retq # sched: [2:1.00] +; BROADWELL-NEXT: retq # sched: [7:1.00] ; ; SKYLAKE-LABEL: test_roundps: ; SKYLAKE: # BB#0: @@ -4032,10 +4032,10 @@ define <8 x float> @test_rsqrtps(<8 x float> %a0, <8 x float> *%a1) { ; ; BROADWELL-LABEL: test_rsqrtps: ; BROADWELL: # BB#0: -; BROADWELL-NEXT: vrsqrtps (%rdi), %ymm1 # sched: [11:2.00] +; BROADWELL-NEXT: vrsqrtps (%rdi), %ymm1 # sched: [17:2.00] ; BROADWELL-NEXT: vrsqrtps %ymm0, %ymm0 # sched: [11:2.00] ; BROADWELL-NEXT: vaddps %ymm1, %ymm0, %ymm0 # sched: [3:1.00] -; BROADWELL-NEXT: retq # sched: [2:1.00] +; BROADWELL-NEXT: retq # sched: [7:1.00] ; ; SKYLAKE-LABEL: test_rsqrtps: ; SKYLAKE: # BB#0: @@ -4097,9 +4097,9 @@ define <4 x double> @test_shufpd(<4 x double> %a0, <4 x double> %a1, <4 x double ; BROADWELL-LABEL: test_shufpd: ; BROADWELL: # BB#0: ; BROADWELL-NEXT: vshufpd {{.*#+}} ymm0 = ymm0[1],ymm1[0],ymm0[2],ymm1[3] sched: [1:1.00] -; BROADWELL-NEXT: vshufpd {{.*#+}} ymm1 = ymm1[1],mem[0],ymm1[2],mem[3] sched: [1:1.00] +; BROADWELL-NEXT: vshufpd {{.*#+}} ymm1 = ymm1[1],mem[0],ymm1[2],mem[3] sched: [7:1.00] ; BROADWELL-NEXT: vaddpd %ymm1, %ymm0, %ymm0 # sched: [3:1.00] -; BROADWELL-NEXT: retq # sched: [2:1.00] +; BROADWELL-NEXT: retq # sched: [7:1.00] ; ; SKYLAKE-LABEL: test_shufpd: ; SKYLAKE: # BB#0: @@ -4157,8 +4157,8 @@ define <8 x float> @test_shufps(<8 x float> %a0, <8 x float> %a1, <8 x float> *% ; BROADWELL-LABEL: test_shufps: ; BROADWELL: # BB#0: ; BROADWELL-NEXT: vshufps {{.*#+}} ymm0 = ymm0[0,0],ymm1[0,0],ymm0[4,4],ymm1[4,4] sched: [1:1.00] -; BROADWELL-NEXT: vshufps {{.*#+}} ymm0 = ymm0[0,3],mem[0,0],ymm0[4,7],mem[4,4] sched: [1:1.00] -; BROADWELL-NEXT: retq # sched: [2:1.00] +; BROADWELL-NEXT: vshufps {{.*#+}} ymm0 = ymm0[0,3],mem[0,0],ymm0[4,7],mem[4,4] sched: [7:1.00] +; BROADWELL-NEXT: retq # sched: [7:1.00] ; ; SKYLAKE-LABEL: test_shufps: ; SKYLAKE: # BB#0: @@ -4213,10 +4213,10 @@ define <4 x double> @test_sqrtpd(<4 x double> %a0, <4 x double> *%a1) { ; ; BROADWELL-LABEL: test_sqrtpd: ; BROADWELL: # BB#0: -; BROADWELL-NEXT: vsqrtpd (%rdi), %ymm1 # sched: [35:2.00] -; BROADWELL-NEXT: vsqrtpd %ymm0, %ymm0 # sched: [35:2.00] +; BROADWELL-NEXT: vsqrtpd (%rdi), %ymm1 # sched: [40:2.00] +; BROADWELL-NEXT: vsqrtpd %ymm0, %ymm0 # sched: [34:2.00] ; BROADWELL-NEXT: vaddpd %ymm1, %ymm0, %ymm0 # sched: [3:1.00] -; BROADWELL-NEXT: retq # sched: [2:1.00] +; BROADWELL-NEXT: retq # sched: [7:1.00] ; ; SKYLAKE-LABEL: test_sqrtpd: ; SKYLAKE: # BB#0: @@ -4277,10 +4277,10 @@ define <8 x float> @test_sqrtps(<8 x float> %a0, <8 x float> *%a1) { ; ; BROADWELL-LABEL: test_sqrtps: ; BROADWELL: # BB#0: -; BROADWELL-NEXT: vsqrtps (%rdi), %ymm1 # sched: [21:2.00] +; BROADWELL-NEXT: vsqrtps (%rdi), %ymm1 # sched: [27:2.00] ; BROADWELL-NEXT: vsqrtps %ymm0, %ymm0 # sched: [21:2.00] ; BROADWELL-NEXT: vaddps %ymm1, %ymm0, %ymm0 # sched: [3:1.00] -; BROADWELL-NEXT: retq # sched: [2:1.00] +; BROADWELL-NEXT: retq # sched: [7:1.00] ; ; SKYLAKE-LABEL: test_sqrtps: ; SKYLAKE: # BB#0: @@ -4339,8 +4339,8 @@ define <4 x double> @test_subpd(<4 x double> %a0, <4 x double> %a1, <4 x double> ; BROADWELL-LABEL: test_subpd: ; BROADWELL: # BB#0: ; BROADWELL-NEXT: vsubpd %ymm1, %ymm0, %ymm0 # sched: [3:1.00] -; BROADWELL-NEXT: vsubpd (%rdi), %ymm0, %ymm0 # sched: [3:1.00] -; BROADWELL-NEXT: retq # sched: [2:1.00] +; BROADWELL-NEXT: vsubpd (%rdi), %ymm0, %ymm0 # sched: [9:1.00] +; BROADWELL-NEXT: retq # sched: [7:1.00] ; ; SKYLAKE-LABEL: test_subpd: ; SKYLAKE: # BB#0: @@ -4393,8 +4393,8 @@ define <8 x float> @test_subps(<8 x float> %a0, <8 x float> %a1, <8 x float> *%a ; BROADWELL-LABEL: test_subps: ; BROADWELL: # BB#0: ; BROADWELL-NEXT: vsubps %ymm1, %ymm0, %ymm0 # sched: [3:1.00] -; BROADWELL-NEXT: vsubps (%rdi), %ymm0, %ymm0 # sched: [3:1.00] -; BROADWELL-NEXT: retq # sched: [2:1.00] +; BROADWELL-NEXT: vsubps (%rdi), %ymm0, %ymm0 # sched: [9:1.00] +; BROADWELL-NEXT: retq # sched: [7:1.00] ; ; SKYLAKE-LABEL: test_subps: ; SKYLAKE: # BB#0: @@ -4458,9 +4458,9 @@ define i32 @test_testpd(<2 x double> %a0, <2 x double> %a1, <2 x double> *%a2) { ; BROADWELL-NEXT: xorl %eax, %eax # sched: [1:0.25] ; BROADWELL-NEXT: vtestpd %xmm1, %xmm0 # sched: [1:1.00] ; BROADWELL-NEXT: setb %al # sched: [1:0.50] -; BROADWELL-NEXT: vtestpd (%rdi), %xmm0 # sched: [1:1.00] -; BROADWELL-NEXT: adcl $0, %eax # sched: [2:0.50] -; BROADWELL-NEXT: retq # sched: [2:1.00] +; BROADWELL-NEXT: vtestpd (%rdi), %xmm0 # sched: [6:1.00] +; BROADWELL-NEXT: adcl $0, %eax # sched: [1:0.50] +; BROADWELL-NEXT: retq # sched: [7:1.00] ; ; SKYLAKE-LABEL: test_testpd: ; SKYLAKE: # BB#0: @@ -4541,10 +4541,10 @@ define i32 @test_testpd_ymm(<4 x double> %a0, <4 x double> %a1, <4 x double> *%a ; BROADWELL-NEXT: xorl %eax, %eax # sched: [1:0.25] ; BROADWELL-NEXT: vtestpd %ymm1, %ymm0 # sched: [1:1.00] ; BROADWELL-NEXT: setb %al # sched: [1:0.50] -; BROADWELL-NEXT: vtestpd (%rdi), %ymm0 # sched: [1:1.00] -; BROADWELL-NEXT: adcl $0, %eax # sched: [2:0.50] +; BROADWELL-NEXT: vtestpd (%rdi), %ymm0 # sched: [7:1.00] +; BROADWELL-NEXT: adcl $0, %eax # sched: [1:0.50] ; BROADWELL-NEXT: vzeroupper # sched: [4:1.00] -; BROADWELL-NEXT: retq # sched: [2:1.00] +; BROADWELL-NEXT: retq # sched: [7:1.00] ; ; SKYLAKE-LABEL: test_testpd_ymm: ; SKYLAKE: # BB#0: @@ -4625,9 +4625,9 @@ define i32 @test_testps(<4 x float> %a0, <4 x float> %a1, <4 x float> *%a2) { ; BROADWELL-NEXT: xorl %eax, %eax # sched: [1:0.25] ; BROADWELL-NEXT: vtestps %xmm1, %xmm0 # sched: [1:1.00] ; BROADWELL-NEXT: setb %al # sched: [1:0.50] -; BROADWELL-NEXT: vtestps (%rdi), %xmm0 # sched: [1:1.00] -; BROADWELL-NEXT: adcl $0, %eax # sched: [2:0.50] -; BROADWELL-NEXT: retq # sched: [2:1.00] +; BROADWELL-NEXT: vtestps (%rdi), %xmm0 # sched: [6:1.00] +; BROADWELL-NEXT: adcl $0, %eax # sched: [1:0.50] +; BROADWELL-NEXT: retq # sched: [7:1.00] ; ; SKYLAKE-LABEL: test_testps: ; SKYLAKE: # BB#0: @@ -4708,10 +4708,10 @@ define i32 @test_testps_ymm(<8 x float> %a0, <8 x float> %a1, <8 x float> *%a2) ; BROADWELL-NEXT: xorl %eax, %eax # sched: [1:0.25] ; BROADWELL-NEXT: vtestps %ymm1, %ymm0 # sched: [1:1.00] ; BROADWELL-NEXT: setb %al # sched: [1:0.50] -; BROADWELL-NEXT: vtestps (%rdi), %ymm0 # sched: [1:1.00] -; BROADWELL-NEXT: adcl $0, %eax # sched: [2:0.50] +; BROADWELL-NEXT: vtestps (%rdi), %ymm0 # sched: [7:1.00] +; BROADWELL-NEXT: adcl $0, %eax # sched: [1:0.50] ; BROADWELL-NEXT: vzeroupper # sched: [4:1.00] -; BROADWELL-NEXT: retq # sched: [2:1.00] +; BROADWELL-NEXT: retq # sched: [7:1.00] ; ; SKYLAKE-LABEL: test_testps_ymm: ; SKYLAKE: # BB#0: @@ -4784,9 +4784,9 @@ define <4 x double> @test_unpckhpd(<4 x double> %a0, <4 x double> %a1, <4 x doub ; BROADWELL-LABEL: test_unpckhpd: ; BROADWELL: # BB#0: ; BROADWELL-NEXT: vunpckhpd {{.*#+}} ymm0 = ymm0[1],ymm1[1],ymm0[3],ymm1[3] sched: [1:1.00] -; BROADWELL-NEXT: vunpckhpd {{.*#+}} ymm1 = ymm1[1],mem[1],ymm1[3],mem[3] sched: [1:1.00] +; BROADWELL-NEXT: vunpckhpd {{.*#+}} ymm1 = ymm1[1],mem[1],ymm1[3],mem[3] sched: [7:1.00] ; BROADWELL-NEXT: vaddpd %ymm1, %ymm0, %ymm0 # sched: [3:1.00] -; BROADWELL-NEXT: retq # sched: [2:1.00] +; BROADWELL-NEXT: retq # sched: [7:1.00] ; ; SKYLAKE-LABEL: test_unpckhpd: ; SKYLAKE: # BB#0: @@ -4844,8 +4844,8 @@ define <8 x float> @test_unpckhps(<8 x float> %a0, <8 x float> %a1, <8 x float> ; BROADWELL-LABEL: test_unpckhps: ; BROADWELL: # BB#0: ; BROADWELL-NEXT: vunpckhps {{.*#+}} ymm0 = ymm0[2],ymm1[2],ymm0[3],ymm1[3],ymm0[6],ymm1[6],ymm0[7],ymm1[7] sched: [1:1.00] -; BROADWELL-NEXT: vunpckhps {{.*#+}} ymm0 = ymm0[2],mem[2],ymm0[3],mem[3],ymm0[6],mem[6],ymm0[7],mem[7] sched: [1:1.00] -; BROADWELL-NEXT: retq # sched: [2:1.00] +; BROADWELL-NEXT: vunpckhps {{.*#+}} ymm0 = ymm0[2],mem[2],ymm0[3],mem[3],ymm0[6],mem[6],ymm0[7],mem[7] sched: [7:1.00] +; BROADWELL-NEXT: retq # sched: [7:1.00] ; ; SKYLAKE-LABEL: test_unpckhps: ; SKYLAKE: # BB#0: @@ -4901,9 +4901,9 @@ define <4 x double> @test_unpcklpd(<4 x double> %a0, <4 x double> %a1, <4 x doub ; BROADWELL-LABEL: test_unpcklpd: ; BROADWELL: # BB#0: ; BROADWELL-NEXT: vunpcklpd {{.*#+}} ymm0 = ymm0[0],ymm1[0],ymm0[2],ymm1[2] sched: [1:1.00] -; BROADWELL-NEXT: vunpcklpd {{.*#+}} ymm1 = ymm1[0],mem[0],ymm1[2],mem[2] sched: [1:1.00] +; BROADWELL-NEXT: vunpcklpd {{.*#+}} ymm1 = ymm1[0],mem[0],ymm1[2],mem[2] sched: [7:1.00] ; BROADWELL-NEXT: vaddpd %ymm1, %ymm0, %ymm0 # sched: [3:1.00] -; BROADWELL-NEXT: retq # sched: [2:1.00] +; BROADWELL-NEXT: retq # sched: [7:1.00] ; ; SKYLAKE-LABEL: test_unpcklpd: ; SKYLAKE: # BB#0: @@ -4961,8 +4961,8 @@ define <8 x float> @test_unpcklps(<8 x float> %a0, <8 x float> %a1, <8 x float> ; BROADWELL-LABEL: test_unpcklps: ; BROADWELL: # BB#0: ; BROADWELL-NEXT: vunpcklps {{.*#+}} ymm0 = ymm0[0],ymm1[0],ymm0[1],ymm1[1],ymm0[4],ymm1[4],ymm0[5],ymm1[5] sched: [1:1.00] -; BROADWELL-NEXT: vunpcklps {{.*#+}} ymm0 = ymm0[0],mem[0],ymm0[1],mem[1],ymm0[4],mem[4],ymm0[5],mem[5] sched: [1:1.00] -; BROADWELL-NEXT: retq # sched: [2:1.00] +; BROADWELL-NEXT: vunpcklps {{.*#+}} ymm0 = ymm0[0],mem[0],ymm0[1],mem[1],ymm0[4],mem[4],ymm0[5],mem[5] sched: [7:1.00] +; BROADWELL-NEXT: retq # sched: [7:1.00] ; ; SKYLAKE-LABEL: test_unpcklps: ; SKYLAKE: # BB#0: @@ -5018,9 +5018,9 @@ define <4 x double> @test_xorpd(<4 x double> %a0, <4 x double> %a1, <4 x double> ; BROADWELL-LABEL: test_xorpd: ; BROADWELL: # BB#0: ; BROADWELL-NEXT: vxorpd %ymm1, %ymm0, %ymm0 # sched: [1:1.00] -; BROADWELL-NEXT: vxorpd (%rdi), %ymm0, %ymm0 # sched: [1:1.00] +; BROADWELL-NEXT: vxorpd (%rdi), %ymm0, %ymm0 # sched: [7:1.00] ; BROADWELL-NEXT: vaddpd %ymm0, %ymm1, %ymm0 # sched: [3:1.00] -; BROADWELL-NEXT: retq # sched: [2:1.00] +; BROADWELL-NEXT: retq # sched: [7:1.00] ; ; SKYLAKE-LABEL: test_xorpd: ; SKYLAKE: # BB#0: @@ -5085,9 +5085,9 @@ define <8 x float> @test_xorps(<8 x float> %a0, <8 x float> %a1, <8 x float> *%a ; BROADWELL-LABEL: test_xorps: ; BROADWELL: # BB#0: ; BROADWELL-NEXT: vxorps %ymm1, %ymm0, %ymm0 # sched: [1:1.00] -; BROADWELL-NEXT: vxorps (%rdi), %ymm0, %ymm0 # sched: [1:1.00] +; BROADWELL-NEXT: vxorps (%rdi), %ymm0, %ymm0 # sched: [7:1.00] ; BROADWELL-NEXT: vaddps %ymm0, %ymm1, %ymm0 # sched: [3:1.00] -; BROADWELL-NEXT: retq # sched: [2:1.00] +; BROADWELL-NEXT: retq # sched: [7:1.00] ; ; SKYLAKE-LABEL: test_xorps: ; SKYLAKE: # BB#0: @@ -5146,7 +5146,7 @@ define void @test_zeroall() { ; BROADWELL-LABEL: test_zeroall: ; BROADWELL: # BB#0: ; BROADWELL-NEXT: vzeroall # sched: [16:16.00] -; BROADWELL-NEXT: retq # sched: [2:1.00] +; BROADWELL-NEXT: retq # sched: [7:1.00] ; ; SKYLAKE-LABEL: test_zeroall: ; SKYLAKE: # BB#0: @@ -5191,7 +5191,7 @@ define void @test_zeroupper() { ; BROADWELL-LABEL: test_zeroupper: ; BROADWELL: # BB#0: ; BROADWELL-NEXT: vzeroupper # sched: [4:1.00] -; BROADWELL-NEXT: retq # sched: [2:1.00] +; BROADWELL-NEXT: retq # sched: [7:1.00] ; ; SKYLAKE-LABEL: test_zeroupper: ; SKYLAKE: # BB#0: diff --git a/llvm/test/CodeGen/X86/avx2-schedule.ll b/llvm/test/CodeGen/X86/avx2-schedule.ll index 741eb62050e9..cec8ca94409d 100644 --- a/llvm/test/CodeGen/X86/avx2-schedule.ll +++ b/llvm/test/CodeGen/X86/avx2-schedule.ll @@ -21,9 +21,9 @@ define <8 x i32> @test_broadcasti128(<8 x i32> %a0, <4 x i32> *%a1) { ; ; BROADWELL-LABEL: test_broadcasti128: ; BROADWELL: # BB#0: -; BROADWELL-NEXT: vbroadcasti128 {{.*#+}} ymm1 = mem[0,1,0,1] sched: [1:0.50] +; BROADWELL-NEXT: vbroadcasti128 {{.*#+}} ymm1 = mem[0,1,0,1] sched: [6:0.50] ; BROADWELL-NEXT: vpaddd %ymm0, %ymm1, %ymm0 # sched: [1:0.50] -; BROADWELL-NEXT: retq # sched: [2:1.00] +; BROADWELL-NEXT: retq # sched: [7:1.00] ; ; SKYLAKE-LABEL: test_broadcasti128: ; SKYLAKE: # BB#0: @@ -65,7 +65,7 @@ define <4 x double> @test_broadcastsd_ymm(<2 x double> %a0) { ; BROADWELL: # BB#0: ; BROADWELL-NEXT: vbroadcastsd %xmm0, %ymm0 # sched: [3:1.00] ; BROADWELL-NEXT: vaddpd %ymm0, %ymm0, %ymm0 # sched: [3:1.00] -; BROADWELL-NEXT: retq # sched: [2:1.00] +; BROADWELL-NEXT: retq # sched: [7:1.00] ; ; SKYLAKE-LABEL: test_broadcastsd_ymm: ; SKYLAKE: # BB#0: @@ -106,7 +106,7 @@ define <4 x float> @test_broadcastss(<4 x float> %a0) { ; BROADWELL: # BB#0: ; BROADWELL-NEXT: vbroadcastss %xmm0, %xmm0 # sched: [1:1.00] ; BROADWELL-NEXT: vaddps %xmm0, %xmm0, %xmm0 # sched: [3:1.00] -; BROADWELL-NEXT: retq # sched: [2:1.00] +; BROADWELL-NEXT: retq # sched: [7:1.00] ; ; SKYLAKE-LABEL: test_broadcastss: ; SKYLAKE: # BB#0: @@ -147,7 +147,7 @@ define <8 x float> @test_broadcastss_ymm(<4 x float> %a0) { ; BROADWELL: # BB#0: ; BROADWELL-NEXT: vbroadcastss %xmm0, %ymm0 # sched: [3:1.00] ; BROADWELL-NEXT: vaddps %ymm0, %ymm0, %ymm0 # sched: [3:1.00] -; BROADWELL-NEXT: retq # sched: [2:1.00] +; BROADWELL-NEXT: retq # sched: [7:1.00] ; ; SKYLAKE-LABEL: test_broadcastss_ymm: ; SKYLAKE: # BB#0: @@ -197,7 +197,7 @@ define <4 x i32> @test_extracti128(<8 x i32> %a0, <8 x i32> %a1, <4 x i32> *%a2) ; BROADWELL-NEXT: vextracti128 $1, %ymm0, %xmm0 # sched: [3:1.00] ; BROADWELL-NEXT: vextracti128 $1, %ymm2, (%rdi) # sched: [1:1.00] ; BROADWELL-NEXT: vzeroupper # sched: [4:1.00] -; BROADWELL-NEXT: retq # sched: [2:1.00] +; BROADWELL-NEXT: retq # sched: [7:1.00] ; ; SKYLAKE-LABEL: test_extracti128: ; SKYLAKE: # BB#0: @@ -246,8 +246,8 @@ define <2 x double> @test_gatherdpd(<2 x double> %a0, i8* %a1, <4 x i32> %a2, <2 ; ; BROADWELL-LABEL: test_gatherdpd: ; BROADWELL: # BB#0: -; BROADWELL-NEXT: vgatherdpd %xmm2, (%rdi,%xmm1,2), %xmm0 # sched: [1:?] -; BROADWELL-NEXT: retq # sched: [2:1.00] +; BROADWELL-NEXT: vgatherdpd %xmm2, (%rdi,%xmm1,2), %xmm0 # sched: [25:3.00] +; BROADWELL-NEXT: retq # sched: [7:1.00] ; ; SKYLAKE-LABEL: test_gatherdpd: ; SKYLAKE: # BB#0: @@ -281,8 +281,8 @@ define <4 x double> @test_gatherdpd_ymm(<4 x double> %a0, i8* %a1, <4 x i32> %a2 ; ; BROADWELL-LABEL: test_gatherdpd_ymm: ; BROADWELL: # BB#0: -; BROADWELL-NEXT: vgatherdpd %ymm2, (%rdi,%xmm1,8), %ymm0 # sched: [1:?] -; BROADWELL-NEXT: retq # sched: [2:1.00] +; BROADWELL-NEXT: vgatherdpd %ymm2, (%rdi,%xmm1,8), %ymm0 # sched: [26:5.00] +; BROADWELL-NEXT: retq # sched: [7:1.00] ; ; SKYLAKE-LABEL: test_gatherdpd_ymm: ; SKYLAKE: # BB#0: @@ -316,8 +316,8 @@ define <4 x float> @test_gatherdps(<4 x float> %a0, i8* %a1, <4 x i32> %a2, <4 x ; ; BROADWELL-LABEL: test_gatherdps: ; BROADWELL: # BB#0: -; BROADWELL-NEXT: vgatherdps %xmm2, (%rdi,%xmm1,2), %xmm0 # sched: [1:?] -; BROADWELL-NEXT: retq # sched: [2:1.00] +; BROADWELL-NEXT: vgatherdps %xmm2, (%rdi,%xmm1,2), %xmm0 # sched: [25:3.00] +; BROADWELL-NEXT: retq # sched: [7:1.00] ; ; SKYLAKE-LABEL: test_gatherdps: ; SKYLAKE: # BB#0: @@ -351,8 +351,8 @@ define <8 x float> @test_gatherdps_ymm(<8 x float> %a0, i8* %a1, <8 x i32> %a2, ; ; BROADWELL-LABEL: test_gatherdps_ymm: ; BROADWELL: # BB#0: -; BROADWELL-NEXT: vgatherdps %ymm2, (%rdi,%ymm1,4), %ymm0 # sched: [1:?] -; BROADWELL-NEXT: retq # sched: [2:1.00] +; BROADWELL-NEXT: vgatherdps %ymm2, (%rdi,%ymm1,4), %ymm0 # sched: [26:4.00] +; BROADWELL-NEXT: retq # sched: [7:1.00] ; ; SKYLAKE-LABEL: test_gatherdps_ymm: ; SKYLAKE: # BB#0: @@ -386,8 +386,8 @@ define <2 x double> @test_gatherqpd(<2 x double> %a0, i8* %a1, <2 x i64> %a2, <2 ; ; BROADWELL-LABEL: test_gatherqpd: ; BROADWELL: # BB#0: -; BROADWELL-NEXT: vgatherqpd %xmm2, (%rdi,%xmm1,2), %xmm0 # sched: [1:?] -; BROADWELL-NEXT: retq # sched: [2:1.00] +; BROADWELL-NEXT: vgatherqpd %xmm2, (%rdi,%xmm1,2), %xmm0 # sched: [22:3.00] +; BROADWELL-NEXT: retq # sched: [7:1.00] ; ; SKYLAKE-LABEL: test_gatherqpd: ; SKYLAKE: # BB#0: @@ -421,8 +421,8 @@ define <4 x double> @test_gatherqpd_ymm(<4 x double> %a0, i8* %a1, <4 x i64> %a2 ; ; BROADWELL-LABEL: test_gatherqpd_ymm: ; BROADWELL: # BB#0: -; BROADWELL-NEXT: vgatherqpd %ymm2, (%rdi,%ymm1,8), %ymm0 # sched: [1:?] -; BROADWELL-NEXT: retq # sched: [2:1.00] +; BROADWELL-NEXT: vgatherqpd %ymm2, (%rdi,%ymm1,8), %ymm0 # sched: [23:3.00] +; BROADWELL-NEXT: retq # sched: [7:1.00] ; ; SKYLAKE-LABEL: test_gatherqpd_ymm: ; SKYLAKE: # BB#0: @@ -456,8 +456,8 @@ define <4 x float> @test_gatherqps(<4 x float> %a0, i8* %a1, <2 x i64> %a2, <4 x ; ; BROADWELL-LABEL: test_gatherqps: ; BROADWELL: # BB#0: -; BROADWELL-NEXT: vgatherqps %xmm2, (%rdi,%xmm1,2), %xmm0 # sched: [1:?] -; BROADWELL-NEXT: retq # sched: [2:1.00] +; BROADWELL-NEXT: vgatherqps %xmm2, (%rdi,%xmm1,2), %xmm0 # sched: [27:5.00] +; BROADWELL-NEXT: retq # sched: [7:1.00] ; ; SKYLAKE-LABEL: test_gatherqps: ; SKYLAKE: # BB#0: @@ -493,9 +493,9 @@ define <4 x float> @test_gatherqps_ymm(<4 x float> %a0, i8* %a1, <4 x i64> %a2, ; ; BROADWELL-LABEL: test_gatherqps_ymm: ; BROADWELL: # BB#0: -; BROADWELL-NEXT: vgatherqps %xmm2, (%rdi,%ymm1,4), %xmm0 # sched: [1:?] +; BROADWELL-NEXT: vgatherqps %xmm2, (%rdi,%ymm1,4), %xmm0 # sched: [24:5.00] ; BROADWELL-NEXT: vzeroupper # sched: [4:1.00] -; BROADWELL-NEXT: retq # sched: [2:1.00] +; BROADWELL-NEXT: retq # sched: [7:1.00] ; ; SKYLAKE-LABEL: test_gatherqps_ymm: ; SKYLAKE: # BB#0: @@ -537,9 +537,9 @@ define <8 x i32> @test_inserti128(<8 x i32> %a0, <4 x i32> %a1, <4 x i32> *%a2) ; BROADWELL-LABEL: test_inserti128: ; BROADWELL: # BB#0: ; BROADWELL-NEXT: vinserti128 $1, %xmm1, %ymm0, %ymm1 # sched: [3:1.00] -; BROADWELL-NEXT: vinserti128 $1, (%rdi), %ymm0, %ymm0 # sched: [1:0.50] +; BROADWELL-NEXT: vinserti128 $1, (%rdi), %ymm0, %ymm0 # sched: [6:0.50] ; BROADWELL-NEXT: vpaddd %ymm0, %ymm1, %ymm0 # sched: [1:0.50] -; BROADWELL-NEXT: retq # sched: [2:1.00] +; BROADWELL-NEXT: retq # sched: [7:1.00] ; ; SKYLAKE-LABEL: test_inserti128: ; SKYLAKE: # BB#0: @@ -583,8 +583,8 @@ define <4 x i64> @test_movntdqa(i8* %a0) { ; ; BROADWELL-LABEL: test_movntdqa: ; BROADWELL: # BB#0: -; BROADWELL-NEXT: vmovntdqa (%rdi), %ymm0 # sched: [1:0.50] -; BROADWELL-NEXT: retq # sched: [2:1.00] +; BROADWELL-NEXT: vmovntdqa (%rdi), %ymm0 # sched: [6:0.50] +; BROADWELL-NEXT: retq # sched: [7:1.00] ; ; SKYLAKE-LABEL: test_movntdqa: ; SKYLAKE: # BB#0: @@ -621,8 +621,8 @@ define <16 x i16> @test_mpsadbw(<32 x i8> %a0, <32 x i8> %a1, <32 x i8> *%a2) { ; BROADWELL-LABEL: test_mpsadbw: ; BROADWELL: # BB#0: ; BROADWELL-NEXT: vmpsadbw $7, %ymm1, %ymm0, %ymm0 # sched: [7:2.00] -; BROADWELL-NEXT: vmpsadbw $7, (%rdi), %ymm0, %ymm0 # sched: [7:2.00] -; BROADWELL-NEXT: retq # sched: [2:1.00] +; BROADWELL-NEXT: vmpsadbw $7, (%rdi), %ymm0, %ymm0 # sched: [13:2.00] +; BROADWELL-NEXT: retq # sched: [7:1.00] ; ; SKYLAKE-LABEL: test_mpsadbw: ; SKYLAKE: # BB#0: @@ -667,9 +667,9 @@ define <32 x i8> @test_pabsb(<32 x i8> %a0, <32 x i8> *%a1) { ; BROADWELL-LABEL: test_pabsb: ; BROADWELL: # BB#0: ; BROADWELL-NEXT: vpabsb %ymm0, %ymm0 # sched: [1:0.50] -; BROADWELL-NEXT: vpabsb (%rdi), %ymm1 # sched: [1:0.50] +; BROADWELL-NEXT: vpabsb (%rdi), %ymm1 # sched: [7:0.50] ; BROADWELL-NEXT: vpor %ymm1, %ymm0, %ymm0 # sched: [1:0.33] -; BROADWELL-NEXT: retq # sched: [2:1.00] +; BROADWELL-NEXT: retq # sched: [7:1.00] ; ; SKYLAKE-LABEL: test_pabsb: ; SKYLAKE: # BB#0: @@ -717,9 +717,9 @@ define <8 x i32> @test_pabsd(<8 x i32> %a0, <8 x i32> *%a1) { ; BROADWELL-LABEL: test_pabsd: ; BROADWELL: # BB#0: ; BROADWELL-NEXT: vpabsd %ymm0, %ymm0 # sched: [1:0.50] -; BROADWELL-NEXT: vpabsd (%rdi), %ymm1 # sched: [1:0.50] +; BROADWELL-NEXT: vpabsd (%rdi), %ymm1 # sched: [7:0.50] ; BROADWELL-NEXT: vpor %ymm1, %ymm0, %ymm0 # sched: [1:0.33] -; BROADWELL-NEXT: retq # sched: [2:1.00] +; BROADWELL-NEXT: retq # sched: [7:1.00] ; ; SKYLAKE-LABEL: test_pabsd: ; SKYLAKE: # BB#0: @@ -767,9 +767,9 @@ define <16 x i16> @test_pabsw(<16 x i16> %a0, <16 x i16> *%a1) { ; BROADWELL-LABEL: test_pabsw: ; BROADWELL: # BB#0: ; BROADWELL-NEXT: vpabsw %ymm0, %ymm0 # sched: [1:0.50] -; BROADWELL-NEXT: vpabsw (%rdi), %ymm1 # sched: [1:0.50] +; BROADWELL-NEXT: vpabsw (%rdi), %ymm1 # sched: [7:0.50] ; BROADWELL-NEXT: vpor %ymm1, %ymm0, %ymm0 # sched: [1:0.33] -; BROADWELL-NEXT: retq # sched: [2:1.00] +; BROADWELL-NEXT: retq # sched: [7:1.00] ; ; SKYLAKE-LABEL: test_pabsw: ; SKYLAKE: # BB#0: @@ -815,8 +815,8 @@ define <16 x i16> @test_packssdw(<8 x i32> %a0, <8 x i32> %a1, <8 x i32> *%a2) { ; BROADWELL-LABEL: test_packssdw: ; BROADWELL: # BB#0: ; BROADWELL-NEXT: vpackssdw %ymm1, %ymm0, %ymm0 # sched: [1:1.00] -; BROADWELL-NEXT: vpackssdw (%rdi), %ymm0, %ymm0 # sched: [1:1.00] -; BROADWELL-NEXT: retq # sched: [2:1.00] +; BROADWELL-NEXT: vpackssdw (%rdi), %ymm0, %ymm0 # sched: [7:1.00] +; BROADWELL-NEXT: retq # sched: [7:1.00] ; ; SKYLAKE-LABEL: test_packssdw: ; SKYLAKE: # BB#0: @@ -859,8 +859,8 @@ define <32 x i8> @test_packsswb(<16 x i16> %a0, <16 x i16> %a1, <16 x i16> *%a2) ; BROADWELL-LABEL: test_packsswb: ; BROADWELL: # BB#0: ; BROADWELL-NEXT: vpacksswb %ymm1, %ymm0, %ymm0 # sched: [1:1.00] -; BROADWELL-NEXT: vpacksswb (%rdi), %ymm0, %ymm0 # sched: [1:1.00] -; BROADWELL-NEXT: retq # sched: [2:1.00] +; BROADWELL-NEXT: vpacksswb (%rdi), %ymm0, %ymm0 # sched: [7:1.00] +; BROADWELL-NEXT: retq # sched: [7:1.00] ; ; SKYLAKE-LABEL: test_packsswb: ; SKYLAKE: # BB#0: @@ -903,8 +903,8 @@ define <16 x i16> @test_packusdw(<8 x i32> %a0, <8 x i32> %a1, <8 x i32> *%a2) { ; BROADWELL-LABEL: test_packusdw: ; BROADWELL: # BB#0: ; BROADWELL-NEXT: vpackusdw %ymm1, %ymm0, %ymm0 # sched: [1:1.00] -; BROADWELL-NEXT: vpackusdw (%rdi), %ymm0, %ymm0 # sched: [1:1.00] -; BROADWELL-NEXT: retq # sched: [2:1.00] +; BROADWELL-NEXT: vpackusdw (%rdi), %ymm0, %ymm0 # sched: [7:1.00] +; BROADWELL-NEXT: retq # sched: [7:1.00] ; ; SKYLAKE-LABEL: test_packusdw: ; SKYLAKE: # BB#0: @@ -947,8 +947,8 @@ define <32 x i8> @test_packuswb(<16 x i16> %a0, <16 x i16> %a1, <16 x i16> *%a2) ; BROADWELL-LABEL: test_packuswb: ; BROADWELL: # BB#0: ; BROADWELL-NEXT: vpackuswb %ymm1, %ymm0, %ymm0 # sched: [1:1.00] -; BROADWELL-NEXT: vpackuswb (%rdi), %ymm0, %ymm0 # sched: [1:1.00] -; BROADWELL-NEXT: retq # sched: [2:1.00] +; BROADWELL-NEXT: vpackuswb (%rdi), %ymm0, %ymm0 # sched: [7:1.00] +; BROADWELL-NEXT: retq # sched: [7:1.00] ; ; SKYLAKE-LABEL: test_packuswb: ; SKYLAKE: # BB#0: @@ -991,8 +991,8 @@ define <32 x i8> @test_paddb(<32 x i8> %a0, <32 x i8> %a1, <32 x i8> *%a2) { ; BROADWELL-LABEL: test_paddb: ; BROADWELL: # BB#0: ; BROADWELL-NEXT: vpaddb %ymm1, %ymm0, %ymm0 # sched: [1:0.50] -; BROADWELL-NEXT: vpaddb (%rdi), %ymm0, %ymm0 # sched: [1:0.50] -; BROADWELL-NEXT: retq # sched: [2:1.00] +; BROADWELL-NEXT: vpaddb (%rdi), %ymm0, %ymm0 # sched: [7:0.50] +; BROADWELL-NEXT: retq # sched: [7:1.00] ; ; SKYLAKE-LABEL: test_paddb: ; SKYLAKE: # BB#0: @@ -1033,8 +1033,8 @@ define <8 x i32> @test_paddd(<8 x i32> %a0, <8 x i32> %a1, <8 x i32> *%a2) { ; BROADWELL-LABEL: test_paddd: ; BROADWELL: # BB#0: ; BROADWELL-NEXT: vpaddd %ymm1, %ymm0, %ymm0 # sched: [1:0.50] -; BROADWELL-NEXT: vpaddd (%rdi), %ymm0, %ymm0 # sched: [1:0.50] -; BROADWELL-NEXT: retq # sched: [2:1.00] +; BROADWELL-NEXT: vpaddd (%rdi), %ymm0, %ymm0 # sched: [7:0.50] +; BROADWELL-NEXT: retq # sched: [7:1.00] ; ; SKYLAKE-LABEL: test_paddd: ; SKYLAKE: # BB#0: @@ -1075,8 +1075,8 @@ define <4 x i64> @test_paddq(<4 x i64> %a0, <4 x i64> %a1, <4 x i64> *%a2) { ; BROADWELL-LABEL: test_paddq: ; BROADWELL: # BB#0: ; BROADWELL-NEXT: vpaddq %ymm1, %ymm0, %ymm0 # sched: [1:0.50] -; BROADWELL-NEXT: vpaddq (%rdi), %ymm0, %ymm0 # sched: [1:0.50] -; BROADWELL-NEXT: retq # sched: [2:1.00] +; BROADWELL-NEXT: vpaddq (%rdi), %ymm0, %ymm0 # sched: [7:0.50] +; BROADWELL-NEXT: retq # sched: [7:1.00] ; ; SKYLAKE-LABEL: test_paddq: ; SKYLAKE: # BB#0: @@ -1117,8 +1117,8 @@ define <32 x i8> @test_paddsb(<32 x i8> %a0, <32 x i8> %a1, <32 x i8> *%a2) { ; BROADWELL-LABEL: test_paddsb: ; BROADWELL: # BB#0: ; BROADWELL-NEXT: vpaddsb %ymm1, %ymm0, %ymm0 # sched: [1:0.50] -; BROADWELL-NEXT: vpaddsb (%rdi), %ymm0, %ymm0 # sched: [1:0.50] -; BROADWELL-NEXT: retq # sched: [2:1.00] +; BROADWELL-NEXT: vpaddsb (%rdi), %ymm0, %ymm0 # sched: [7:0.50] +; BROADWELL-NEXT: retq # sched: [7:1.00] ; ; SKYLAKE-LABEL: test_paddsb: ; SKYLAKE: # BB#0: @@ -1160,8 +1160,8 @@ define <16 x i16> @test_paddsw(<16 x i16> %a0, <16 x i16> %a1, <16 x i16> *%a2) ; BROADWELL-LABEL: test_paddsw: ; BROADWELL: # BB#0: ; BROADWELL-NEXT: vpaddsw %ymm1, %ymm0, %ymm0 # sched: [1:0.50] -; BROADWELL-NEXT: vpaddsw (%rdi), %ymm0, %ymm0 # sched: [1:0.50] -; BROADWELL-NEXT: retq # sched: [2:1.00] +; BROADWELL-NEXT: vpaddsw (%rdi), %ymm0, %ymm0 # sched: [7:0.50] +; BROADWELL-NEXT: retq # sched: [7:1.00] ; ; SKYLAKE-LABEL: test_paddsw: ; SKYLAKE: # BB#0: @@ -1203,8 +1203,8 @@ define <32 x i8> @test_paddusb(<32 x i8> %a0, <32 x i8> %a1, <32 x i8> *%a2) { ; BROADWELL-LABEL: test_paddusb: ; BROADWELL: # BB#0: ; BROADWELL-NEXT: vpaddusb %ymm1, %ymm0, %ymm0 # sched: [1:0.50] -; BROADWELL-NEXT: vpaddusb (%rdi), %ymm0, %ymm0 # sched: [1:0.50] -; BROADWELL-NEXT: retq # sched: [2:1.00] +; BROADWELL-NEXT: vpaddusb (%rdi), %ymm0, %ymm0 # sched: [7:0.50] +; BROADWELL-NEXT: retq # sched: [7:1.00] ; ; SKYLAKE-LABEL: test_paddusb: ; SKYLAKE: # BB#0: @@ -1246,8 +1246,8 @@ define <16 x i16> @test_paddusw(<16 x i16> %a0, <16 x i16> %a1, <16 x i16> *%a2) ; BROADWELL-LABEL: test_paddusw: ; BROADWELL: # BB#0: ; BROADWELL-NEXT: vpaddusw %ymm1, %ymm0, %ymm0 # sched: [1:0.50] -; BROADWELL-NEXT: vpaddusw (%rdi), %ymm0, %ymm0 # sched: [1:0.50] -; BROADWELL-NEXT: retq # sched: [2:1.00] +; BROADWELL-NEXT: vpaddusw (%rdi), %ymm0, %ymm0 # sched: [7:0.50] +; BROADWELL-NEXT: retq # sched: [7:1.00] ; ; SKYLAKE-LABEL: test_paddusw: ; SKYLAKE: # BB#0: @@ -1289,8 +1289,8 @@ define <16 x i16> @test_paddw(<16 x i16> %a0, <16 x i16> %a1, <16 x i16> *%a2) { ; BROADWELL-LABEL: test_paddw: ; BROADWELL: # BB#0: ; BROADWELL-NEXT: vpaddw %ymm1, %ymm0, %ymm0 # sched: [1:0.50] -; BROADWELL-NEXT: vpaddw (%rdi), %ymm0, %ymm0 # sched: [1:0.50] -; BROADWELL-NEXT: retq # sched: [2:1.00] +; BROADWELL-NEXT: vpaddw (%rdi), %ymm0, %ymm0 # sched: [7:0.50] +; BROADWELL-NEXT: retq # sched: [7:1.00] ; ; SKYLAKE-LABEL: test_paddw: ; SKYLAKE: # BB#0: @@ -1331,8 +1331,8 @@ define <32 x i8> @test_palignr(<32 x i8> %a0, <32 x i8> %a1, <32 x i8> *%a2) { ; BROADWELL-LABEL: test_palignr: ; BROADWELL: # BB#0: ; BROADWELL-NEXT: vpalignr {{.*#+}} ymm0 = ymm1[1,2,3,4,5,6,7,8,9,10,11,12,13,14,15],ymm0[0],ymm1[17,18,19,20,21,22,23,24,25,26,27,28,29,30,31],ymm0[16] sched: [1:1.00] -; BROADWELL-NEXT: vpalignr {{.*#+}} ymm0 = mem[1,2,3,4,5,6,7,8,9,10,11,12,13,14,15],ymm0[0],mem[17,18,19,20,21,22,23,24,25,26,27,28,29,30,31],ymm0[16] sched: [1:1.00] -; BROADWELL-NEXT: retq # sched: [2:1.00] +; BROADWELL-NEXT: vpalignr {{.*#+}} ymm0 = mem[1,2,3,4,5,6,7,8,9,10,11,12,13,14,15],ymm0[0],mem[17,18,19,20,21,22,23,24,25,26,27,28,29,30,31],ymm0[16] sched: [7:1.00] +; BROADWELL-NEXT: retq # sched: [7:1.00] ; ; SKYLAKE-LABEL: test_palignr: ; SKYLAKE: # BB#0: @@ -1375,9 +1375,9 @@ define <4 x i64> @test_pand(<4 x i64> %a0, <4 x i64> %a1, <4 x i64> *%a2) { ; BROADWELL-LABEL: test_pand: ; BROADWELL: # BB#0: ; BROADWELL-NEXT: vpand %ymm1, %ymm0, %ymm0 # sched: [1:0.33] -; BROADWELL-NEXT: vpand (%rdi), %ymm0, %ymm0 # sched: [1:0.50] +; BROADWELL-NEXT: vpand (%rdi), %ymm0, %ymm0 # sched: [7:0.50] ; BROADWELL-NEXT: vpaddq %ymm1, %ymm0, %ymm0 # sched: [1:0.50] -; BROADWELL-NEXT: retq # sched: [2:1.00] +; BROADWELL-NEXT: retq # sched: [7:1.00] ; ; SKYLAKE-LABEL: test_pand: ; SKYLAKE: # BB#0: @@ -1424,9 +1424,9 @@ define <4 x i64> @test_pandn(<4 x i64> %a0, <4 x i64> %a1, <4 x i64> *%a2) { ; BROADWELL-LABEL: test_pandn: ; BROADWELL: # BB#0: ; BROADWELL-NEXT: vpandn %ymm1, %ymm0, %ymm0 # sched: [1:0.33] -; BROADWELL-NEXT: vpandn (%rdi), %ymm0, %ymm1 # sched: [1:0.50] +; BROADWELL-NEXT: vpandn (%rdi), %ymm0, %ymm1 # sched: [7:0.50] ; BROADWELL-NEXT: vpaddq %ymm1, %ymm0, %ymm0 # sched: [1:0.50] -; BROADWELL-NEXT: retq # sched: [2:1.00] +; BROADWELL-NEXT: retq # sched: [7:1.00] ; ; SKYLAKE-LABEL: test_pandn: ; SKYLAKE: # BB#0: @@ -1473,8 +1473,8 @@ define <32 x i8> @test_pavgb(<32 x i8> %a0, <32 x i8> %a1, <32 x i8> *%a2) { ; BROADWELL-LABEL: test_pavgb: ; BROADWELL: # BB#0: ; BROADWELL-NEXT: vpavgb %ymm1, %ymm0, %ymm0 # sched: [1:0.50] -; BROADWELL-NEXT: vpavgb (%rdi), %ymm0, %ymm0 # sched: [1:0.50] -; BROADWELL-NEXT: retq # sched: [2:1.00] +; BROADWELL-NEXT: vpavgb (%rdi), %ymm0, %ymm0 # sched: [7:0.50] +; BROADWELL-NEXT: retq # sched: [7:1.00] ; ; SKYLAKE-LABEL: test_pavgb: ; SKYLAKE: # BB#0: @@ -1525,8 +1525,8 @@ define <16 x i16> @test_pavgw(<16 x i16> %a0, <16 x i16> %a1, <16 x i16> *%a2) { ; BROADWELL-LABEL: test_pavgw: ; BROADWELL: # BB#0: ; BROADWELL-NEXT: vpavgw %ymm1, %ymm0, %ymm0 # sched: [1:0.50] -; BROADWELL-NEXT: vpavgw (%rdi), %ymm0, %ymm0 # sched: [1:0.50] -; BROADWELL-NEXT: retq # sched: [2:1.00] +; BROADWELL-NEXT: vpavgw (%rdi), %ymm0, %ymm0 # sched: [7:0.50] +; BROADWELL-NEXT: retq # sched: [7:1.00] ; ; SKYLAKE-LABEL: test_pavgw: ; SKYLAKE: # BB#0: @@ -1579,9 +1579,9 @@ define <4 x i32> @test_pblendd(<4 x i32> %a0, <4 x i32> %a1, <4 x i32> *%a2) { ; BROADWELL-LABEL: test_pblendd: ; BROADWELL: # BB#0: ; BROADWELL-NEXT: vpblendd {{.*#+}} xmm1 = xmm1[0,1,2],xmm0[3] sched: [1:0.33] -; BROADWELL-NEXT: vpblendd {{.*#+}} xmm1 = mem[0],xmm1[1],mem[2],xmm1[3] sched: [1:0.50] +; BROADWELL-NEXT: vpblendd {{.*#+}} xmm1 = mem[0],xmm1[1],mem[2],xmm1[3] sched: [6:0.50] ; BROADWELL-NEXT: vpaddd %xmm1, %xmm0, %xmm0 # sched: [1:0.50] -; BROADWELL-NEXT: retq # sched: [2:1.00] +; BROADWELL-NEXT: retq # sched: [7:1.00] ; ; SKYLAKE-LABEL: test_pblendd: ; SKYLAKE: # BB#0: @@ -1628,9 +1628,9 @@ define <8 x i32> @test_pblendd_ymm(<8 x i32> %a0, <8 x i32> %a1, <8 x i32> *%a2) ; BROADWELL-LABEL: test_pblendd_ymm: ; BROADWELL: # BB#0: ; BROADWELL-NEXT: vpblendd {{.*#+}} ymm1 = ymm1[0,1,2],ymm0[3,4,5,6],ymm1[7] sched: [1:0.33] -; BROADWELL-NEXT: vpblendd {{.*#+}} ymm1 = ymm1[0],mem[1,2],ymm1[3,4,5,6,7] sched: [1:0.50] +; BROADWELL-NEXT: vpblendd {{.*#+}} ymm1 = ymm1[0],mem[1,2],ymm1[3,4,5,6,7] sched: [7:0.50] ; BROADWELL-NEXT: vpaddd %ymm1, %ymm0, %ymm0 # sched: [1:0.50] -; BROADWELL-NEXT: retq # sched: [2:1.00] +; BROADWELL-NEXT: retq # sched: [7:1.00] ; ; SKYLAKE-LABEL: test_pblendd_ymm: ; SKYLAKE: # BB#0: @@ -1675,8 +1675,8 @@ define <32 x i8> @test_pblendvb(<32 x i8> %a0, <32 x i8> %a1, <32 x i8> %a2, <32 ; BROADWELL-LABEL: test_pblendvb: ; BROADWELL: # BB#0: ; BROADWELL-NEXT: vpblendvb %ymm2, %ymm1, %ymm0, %ymm0 # sched: [2:2.00] -; BROADWELL-NEXT: vpblendvb %ymm3, (%rdi), %ymm0, %ymm0 # sched: [2:2.00] -; BROADWELL-NEXT: retq # sched: [2:1.00] +; BROADWELL-NEXT: vpblendvb %ymm3, (%rdi), %ymm0, %ymm0 # sched: [8:2.00] +; BROADWELL-NEXT: retq # sched: [7:1.00] ; ; SKYLAKE-LABEL: test_pblendvb: ; SKYLAKE: # BB#0: @@ -1718,8 +1718,8 @@ define <16 x i16> @test_pblendw(<16 x i16> %a0, <16 x i16> %a1, <16 x i16> *%a2) ; BROADWELL-LABEL: test_pblendw: ; BROADWELL: # BB#0: ; BROADWELL-NEXT: vpblendw {{.*#+}} ymm0 = ymm0[0,1],ymm1[2,3,4],ymm0[5,6,7,8,9],ymm1[10,11,12],ymm0[13,14,15] sched: [1:1.00] -; BROADWELL-NEXT: vpblendw {{.*#+}} ymm0 = mem[0],ymm0[1],mem[2],ymm0[3],mem[4],ymm0[5],mem[6],ymm0[7],mem[8],ymm0[9],mem[10],ymm0[11],mem[12],ymm0[13],mem[14],ymm0[15] sched: [4:1.00] -; BROADWELL-NEXT: retq # sched: [2:1.00] +; BROADWELL-NEXT: vpblendw {{.*#+}} ymm0 = mem[0],ymm0[1],mem[2],ymm0[3],mem[4],ymm0[5],mem[6],ymm0[7],mem[8],ymm0[9],mem[10],ymm0[11],mem[12],ymm0[13],mem[14],ymm0[15] sched: [7:1.00] +; BROADWELL-NEXT: retq # sched: [7:1.00] ; ; SKYLAKE-LABEL: test_pblendw: ; SKYLAKE: # BB#0: @@ -1762,9 +1762,9 @@ define <16 x i8> @test_pbroadcastb(<16 x i8> %a0, <16 x i8> *%a1) { ; BROADWELL-LABEL: test_pbroadcastb: ; BROADWELL: # BB#0: ; BROADWELL-NEXT: vpbroadcastb %xmm0, %xmm0 # sched: [3:1.00] -; BROADWELL-NEXT: vpbroadcastb (%rdi), %xmm1 # sched: [4:1.00] +; BROADWELL-NEXT: vpbroadcastb (%rdi), %xmm1 # sched: [9:1.00] ; BROADWELL-NEXT: vpaddb %xmm1, %xmm0, %xmm0 # sched: [1:0.50] -; BROADWELL-NEXT: retq # sched: [2:1.00] +; BROADWELL-NEXT: retq # sched: [7:1.00] ; ; SKYLAKE-LABEL: test_pbroadcastb: ; SKYLAKE: # BB#0: @@ -1811,9 +1811,9 @@ define <32 x i8> @test_pbroadcastb_ymm(<32 x i8> %a0, <32 x i8> *%a1) { ; BROADWELL-LABEL: test_pbroadcastb_ymm: ; BROADWELL: # BB#0: ; BROADWELL-NEXT: vpbroadcastb %xmm0, %ymm0 # sched: [3:1.00] -; BROADWELL-NEXT: vpbroadcastb (%rdi), %ymm1 # sched: [4:1.00] +; BROADWELL-NEXT: vpbroadcastb (%rdi), %ymm1 # sched: [9:1.00] ; BROADWELL-NEXT: vpaddb %ymm1, %ymm0, %ymm0 # sched: [1:0.50] -; BROADWELL-NEXT: retq # sched: [2:1.00] +; BROADWELL-NEXT: retq # sched: [7:1.00] ; ; SKYLAKE-LABEL: test_pbroadcastb_ymm: ; SKYLAKE: # BB#0: @@ -1860,9 +1860,9 @@ define <4 x i32> @test_pbroadcastd(<4 x i32> %a0, <4 x i32> *%a1) { ; BROADWELL-LABEL: test_pbroadcastd: ; BROADWELL: # BB#0: ; BROADWELL-NEXT: vpbroadcastd %xmm0, %xmm0 # sched: [1:1.00] -; BROADWELL-NEXT: vpbroadcastd (%rdi), %xmm1 # sched: [1:0.50] +; BROADWELL-NEXT: vpbroadcastd (%rdi), %xmm1 # sched: [5:0.50] ; BROADWELL-NEXT: vpaddd %xmm1, %xmm0, %xmm0 # sched: [1:0.50] -; BROADWELL-NEXT: retq # sched: [2:1.00] +; BROADWELL-NEXT: retq # sched: [7:1.00] ; ; SKYLAKE-LABEL: test_pbroadcastd: ; SKYLAKE: # BB#0: @@ -1908,9 +1908,9 @@ define <8 x i32> @test_pbroadcastd_ymm(<8 x i32> %a0, <8 x i32> *%a1) { ; BROADWELL-LABEL: test_pbroadcastd_ymm: ; BROADWELL: # BB#0: ; BROADWELL-NEXT: vpbroadcastd %xmm0, %ymm0 # sched: [3:1.00] -; BROADWELL-NEXT: vpbroadcastd (%rdi), %ymm1 # sched: [1:0.50] +; BROADWELL-NEXT: vpbroadcastd (%rdi), %ymm1 # sched: [6:0.50] ; BROADWELL-NEXT: vpaddd %ymm1, %ymm0, %ymm0 # sched: [1:0.50] -; BROADWELL-NEXT: retq # sched: [2:1.00] +; BROADWELL-NEXT: retq # sched: [7:1.00] ; ; SKYLAKE-LABEL: test_pbroadcastd_ymm: ; SKYLAKE: # BB#0: @@ -1956,9 +1956,9 @@ define <2 x i64> @test_pbroadcastq(<2 x i64> %a0, <2 x i64> *%a1) { ; BROADWELL-LABEL: test_pbroadcastq: ; BROADWELL: # BB#0: ; BROADWELL-NEXT: vpbroadcastq %xmm0, %xmm0 # sched: [1:1.00] -; BROADWELL-NEXT: vpbroadcastq (%rdi), %xmm1 # sched: [1:0.50] +; BROADWELL-NEXT: vpbroadcastq (%rdi), %xmm1 # sched: [5:0.50] ; BROADWELL-NEXT: vpaddq %xmm1, %xmm0, %xmm0 # sched: [1:0.50] -; BROADWELL-NEXT: retq # sched: [2:1.00] +; BROADWELL-NEXT: retq # sched: [7:1.00] ; ; SKYLAKE-LABEL: test_pbroadcastq: ; SKYLAKE: # BB#0: @@ -2004,9 +2004,9 @@ define <4 x i64> @test_pbroadcastq_ymm(<4 x i64> %a0, <4 x i64> *%a1) { ; BROADWELL-LABEL: test_pbroadcastq_ymm: ; BROADWELL: # BB#0: ; BROADWELL-NEXT: vpbroadcastq %xmm0, %ymm0 # sched: [3:1.00] -; BROADWELL-NEXT: vpbroadcastq (%rdi), %ymm1 # sched: [1:0.50] +; BROADWELL-NEXT: vpbroadcastq (%rdi), %ymm1 # sched: [6:0.50] ; BROADWELL-NEXT: vpaddq %ymm1, %ymm0, %ymm0 # sched: [1:0.50] -; BROADWELL-NEXT: retq # sched: [2:1.00] +; BROADWELL-NEXT: retq # sched: [7:1.00] ; ; SKYLAKE-LABEL: test_pbroadcastq_ymm: ; SKYLAKE: # BB#0: @@ -2052,9 +2052,9 @@ define <8 x i16> @test_pbroadcastw(<8 x i16> %a0, <8 x i16> *%a1) { ; BROADWELL-LABEL: test_pbroadcastw: ; BROADWELL: # BB#0: ; BROADWELL-NEXT: vpbroadcastw %xmm0, %xmm0 # sched: [3:1.00] -; BROADWELL-NEXT: vpbroadcastw (%rdi), %xmm1 # sched: [4:1.00] +; BROADWELL-NEXT: vpbroadcastw (%rdi), %xmm1 # sched: [9:1.00] ; BROADWELL-NEXT: vpaddw %xmm1, %xmm0, %xmm0 # sched: [1:0.50] -; BROADWELL-NEXT: retq # sched: [2:1.00] +; BROADWELL-NEXT: retq # sched: [7:1.00] ; ; SKYLAKE-LABEL: test_pbroadcastw: ; SKYLAKE: # BB#0: @@ -2101,9 +2101,9 @@ define <16 x i16> @test_pbroadcastw_ymm(<16 x i16> %a0, <16 x i16> *%a1) { ; BROADWELL-LABEL: test_pbroadcastw_ymm: ; BROADWELL: # BB#0: ; BROADWELL-NEXT: vpbroadcastw %xmm0, %ymm0 # sched: [3:1.00] -; BROADWELL-NEXT: vpbroadcastw (%rdi), %ymm1 # sched: [4:1.00] +; BROADWELL-NEXT: vpbroadcastw (%rdi), %ymm1 # sched: [9:1.00] ; BROADWELL-NEXT: vpaddw %ymm1, %ymm0, %ymm0 # sched: [1:0.50] -; BROADWELL-NEXT: retq # sched: [2:1.00] +; BROADWELL-NEXT: retq # sched: [7:1.00] ; ; SKYLAKE-LABEL: test_pbroadcastw_ymm: ; SKYLAKE: # BB#0: @@ -2148,8 +2148,8 @@ define <32 x i8> @test_pcmpeqb(<32 x i8> %a0, <32 x i8> %a1, <32 x i8> *%a2) { ; BROADWELL-LABEL: test_pcmpeqb: ; BROADWELL: # BB#0: ; BROADWELL-NEXT: vpcmpeqb %ymm1, %ymm0, %ymm0 # sched: [1:0.50] -; BROADWELL-NEXT: vpcmpeqb (%rdi), %ymm0, %ymm0 # sched: [1:0.50] -; BROADWELL-NEXT: retq # sched: [2:1.00] +; BROADWELL-NEXT: vpcmpeqb (%rdi), %ymm0, %ymm0 # sched: [7:0.50] +; BROADWELL-NEXT: retq # sched: [7:1.00] ; ; SKYLAKE-LABEL: test_pcmpeqb: ; SKYLAKE: # BB#0: @@ -2194,8 +2194,8 @@ define <8 x i32> @test_pcmpeqd(<8 x i32> %a0, <8 x i32> %a1, <8 x i32> *%a2) { ; BROADWELL-LABEL: test_pcmpeqd: ; BROADWELL: # BB#0: ; BROADWELL-NEXT: vpcmpeqd %ymm1, %ymm0, %ymm0 # sched: [1:0.50] -; BROADWELL-NEXT: vpcmpeqd (%rdi), %ymm0, %ymm0 # sched: [1:0.50] -; BROADWELL-NEXT: retq # sched: [2:1.00] +; BROADWELL-NEXT: vpcmpeqd (%rdi), %ymm0, %ymm0 # sched: [7:0.50] +; BROADWELL-NEXT: retq # sched: [7:1.00] ; ; SKYLAKE-LABEL: test_pcmpeqd: ; SKYLAKE: # BB#0: @@ -2240,8 +2240,8 @@ define <4 x i64> @test_pcmpeqq(<4 x i64> %a0, <4 x i64> %a1, <4 x i64> *%a2) { ; BROADWELL-LABEL: test_pcmpeqq: ; BROADWELL: # BB#0: ; BROADWELL-NEXT: vpcmpeqq %ymm1, %ymm0, %ymm0 # sched: [1:0.50] -; BROADWELL-NEXT: vpcmpeqq (%rdi), %ymm0, %ymm0 # sched: [1:0.50] -; BROADWELL-NEXT: retq # sched: [2:1.00] +; BROADWELL-NEXT: vpcmpeqq (%rdi), %ymm0, %ymm0 # sched: [7:0.50] +; BROADWELL-NEXT: retq # sched: [7:1.00] ; ; SKYLAKE-LABEL: test_pcmpeqq: ; SKYLAKE: # BB#0: @@ -2286,8 +2286,8 @@ define <16 x i16> @test_pcmpeqw(<16 x i16> %a0, <16 x i16> %a1, <16 x i16> *%a2) ; BROADWELL-LABEL: test_pcmpeqw: ; BROADWELL: # BB#0: ; BROADWELL-NEXT: vpcmpeqw %ymm1, %ymm0, %ymm0 # sched: [1:0.50] -; BROADWELL-NEXT: vpcmpeqw (%rdi), %ymm0, %ymm0 # sched: [1:0.50] -; BROADWELL-NEXT: retq # sched: [2:1.00] +; BROADWELL-NEXT: vpcmpeqw (%rdi), %ymm0, %ymm0 # sched: [7:0.50] +; BROADWELL-NEXT: retq # sched: [7:1.00] ; ; SKYLAKE-LABEL: test_pcmpeqw: ; SKYLAKE: # BB#0: @@ -2332,8 +2332,8 @@ define <32 x i8> @test_pcmpgtb(<32 x i8> %a0, <32 x i8> %a1, <32 x i8> *%a2) { ; BROADWELL-LABEL: test_pcmpgtb: ; BROADWELL: # BB#0: ; BROADWELL-NEXT: vpcmpgtb %ymm1, %ymm0, %ymm0 # sched: [1:0.50] -; BROADWELL-NEXT: vpcmpgtb (%rdi), %ymm0, %ymm0 # sched: [1:0.50] -; BROADWELL-NEXT: retq # sched: [2:1.00] +; BROADWELL-NEXT: vpcmpgtb (%rdi), %ymm0, %ymm0 # sched: [7:0.50] +; BROADWELL-NEXT: retq # sched: [7:1.00] ; ; SKYLAKE-LABEL: test_pcmpgtb: ; SKYLAKE: # BB#0: @@ -2378,8 +2378,8 @@ define <8 x i32> @test_pcmpgtd(<8 x i32> %a0, <8 x i32> %a1, <8 x i32> *%a2) { ; BROADWELL-LABEL: test_pcmpgtd: ; BROADWELL: # BB#0: ; BROADWELL-NEXT: vpcmpgtd %ymm1, %ymm0, %ymm0 # sched: [1:0.50] -; BROADWELL-NEXT: vpcmpgtd (%rdi), %ymm0, %ymm0 # sched: [1:0.50] -; BROADWELL-NEXT: retq # sched: [2:1.00] +; BROADWELL-NEXT: vpcmpgtd (%rdi), %ymm0, %ymm0 # sched: [7:0.50] +; BROADWELL-NEXT: retq # sched: [7:1.00] ; ; SKYLAKE-LABEL: test_pcmpgtd: ; SKYLAKE: # BB#0: @@ -2424,8 +2424,8 @@ define <4 x i64> @test_pcmpgtq(<4 x i64> %a0, <4 x i64> %a1, <4 x i64> *%a2) { ; BROADWELL-LABEL: test_pcmpgtq: ; BROADWELL: # BB#0: ; BROADWELL-NEXT: vpcmpgtq %ymm1, %ymm0, %ymm0 # sched: [5:1.00] -; BROADWELL-NEXT: vpcmpgtq (%rdi), %ymm0, %ymm0 # sched: [5:1.00] -; BROADWELL-NEXT: retq # sched: [2:1.00] +; BROADWELL-NEXT: vpcmpgtq (%rdi), %ymm0, %ymm0 # sched: [11:1.00] +; BROADWELL-NEXT: retq # sched: [7:1.00] ; ; SKYLAKE-LABEL: test_pcmpgtq: ; SKYLAKE: # BB#0: @@ -2470,8 +2470,8 @@ define <16 x i16> @test_pcmpgtw(<16 x i16> %a0, <16 x i16> %a1, <16 x i16> *%a2) ; BROADWELL-LABEL: test_pcmpgtw: ; BROADWELL: # BB#0: ; BROADWELL-NEXT: vpcmpgtw %ymm1, %ymm0, %ymm0 # sched: [1:0.50] -; BROADWELL-NEXT: vpcmpgtw (%rdi), %ymm0, %ymm0 # sched: [1:0.50] -; BROADWELL-NEXT: retq # sched: [2:1.00] +; BROADWELL-NEXT: vpcmpgtw (%rdi), %ymm0, %ymm0 # sched: [7:0.50] +; BROADWELL-NEXT: retq # sched: [7:1.00] ; ; SKYLAKE-LABEL: test_pcmpgtw: ; SKYLAKE: # BB#0: @@ -2518,9 +2518,9 @@ define <4 x i64> @test_perm2i128(<4 x i64> %a0, <4 x i64> %a1, <4 x i64> *%a2) { ; BROADWELL-LABEL: test_perm2i128: ; BROADWELL: # BB#0: ; BROADWELL-NEXT: vperm2i128 {{.*#+}} ymm1 = ymm0[2,3],ymm1[0,1] sched: [3:1.00] -; BROADWELL-NEXT: vperm2i128 {{.*#+}} ymm0 = ymm0[2,3],mem[0,1] sched: [3:1.00] +; BROADWELL-NEXT: vperm2i128 {{.*#+}} ymm0 = ymm0[2,3],mem[0,1] sched: [9:1.00] ; BROADWELL-NEXT: vpaddq %ymm0, %ymm1, %ymm0 # sched: [1:0.50] -; BROADWELL-NEXT: retq # sched: [2:1.00] +; BROADWELL-NEXT: retq # sched: [7:1.00] ; ; SKYLAKE-LABEL: test_perm2i128: ; SKYLAKE: # BB#0: @@ -2567,9 +2567,9 @@ define <8 x i32> @test_permd(<8 x i32> %a0, <8 x i32> %a1, <8 x i32> *%a2) { ; BROADWELL-LABEL: test_permd: ; BROADWELL: # BB#0: ; BROADWELL-NEXT: vpermd %ymm1, %ymm0, %ymm1 # sched: [3:1.00] -; BROADWELL-NEXT: vpermd (%rdi), %ymm0, %ymm0 # sched: [3:1.00] +; BROADWELL-NEXT: vpermd (%rdi), %ymm0, %ymm0 # sched: [9:1.00] ; BROADWELL-NEXT: vpaddd %ymm0, %ymm1, %ymm0 # sched: [1:0.50] -; BROADWELL-NEXT: retq # sched: [2:1.00] +; BROADWELL-NEXT: retq # sched: [7:1.00] ; ; SKYLAKE-LABEL: test_permd: ; SKYLAKE: # BB#0: @@ -2617,9 +2617,9 @@ define <4 x double> @test_permpd(<4 x double> %a0, <4 x double> *%a1) { ; BROADWELL-LABEL: test_permpd: ; BROADWELL: # BB#0: ; BROADWELL-NEXT: vpermpd {{.*#+}} ymm0 = ymm0[3,2,2,3] sched: [3:1.00] -; BROADWELL-NEXT: vpermpd {{.*#+}} ymm1 = mem[0,2,2,3] sched: [3:1.00] +; BROADWELL-NEXT: vpermpd {{.*#+}} ymm1 = mem[0,2,2,3] sched: [9:1.00] ; BROADWELL-NEXT: vaddpd %ymm1, %ymm0, %ymm0 # sched: [3:1.00] -; BROADWELL-NEXT: retq # sched: [2:1.00] +; BROADWELL-NEXT: retq # sched: [7:1.00] ; ; SKYLAKE-LABEL: test_permpd: ; SKYLAKE: # BB#0: @@ -2666,9 +2666,9 @@ define <8 x float> @test_permps(<8 x i32> %a0, <8 x float> %a1, <8 x float> *%a2 ; BROADWELL-LABEL: test_permps: ; BROADWELL: # BB#0: ; BROADWELL-NEXT: vpermps %ymm1, %ymm0, %ymm1 # sched: [3:1.00] -; BROADWELL-NEXT: vpermps (%rdi), %ymm0, %ymm0 # sched: [3:1.00] +; BROADWELL-NEXT: vpermps (%rdi), %ymm0, %ymm0 # sched: [9:1.00] ; BROADWELL-NEXT: vaddps %ymm0, %ymm1, %ymm0 # sched: [3:1.00] -; BROADWELL-NEXT: retq # sched: [2:1.00] +; BROADWELL-NEXT: retq # sched: [7:1.00] ; ; SKYLAKE-LABEL: test_permps: ; SKYLAKE: # BB#0: @@ -2716,9 +2716,9 @@ define <4 x i64> @test_permq(<4 x i64> %a0, <4 x i64> *%a1) { ; BROADWELL-LABEL: test_permq: ; BROADWELL: # BB#0: ; BROADWELL-NEXT: vpermq {{.*#+}} ymm0 = ymm0[3,2,2,3] sched: [3:1.00] -; BROADWELL-NEXT: vpermq {{.*#+}} ymm1 = mem[0,2,2,3] sched: [3:1.00] +; BROADWELL-NEXT: vpermq {{.*#+}} ymm1 = mem[0,2,2,3] sched: [9:1.00] ; BROADWELL-NEXT: vpaddq %ymm1, %ymm0, %ymm0 # sched: [1:0.50] -; BROADWELL-NEXT: retq # sched: [2:1.00] +; BROADWELL-NEXT: retq # sched: [7:1.00] ; ; SKYLAKE-LABEL: test_permq: ; SKYLAKE: # BB#0: @@ -2760,8 +2760,8 @@ define <4 x i32> @test_pgatherdd(<4 x i32> %a0, i8* %a1, <4 x i32> %a2, <4 x i32 ; ; BROADWELL-LABEL: test_pgatherdd: ; BROADWELL: # BB#0: -; BROADWELL-NEXT: vpgatherdd %xmm2, (%rdi,%xmm1,2), %xmm0 # sched: [1:?] -; BROADWELL-NEXT: retq # sched: [2:1.00] +; BROADWELL-NEXT: vpgatherdd %xmm2, (%rdi,%xmm1,2), %xmm0 +; BROADWELL-NEXT: retq # sched: [7:1.00] ; ; SKYLAKE-LABEL: test_pgatherdd: ; SKYLAKE: # BB#0: @@ -2795,8 +2795,8 @@ define <8 x i32> @test_pgatherdd_ymm(<8 x i32> %a0, i8* %a1, <8 x i32> %a2, <8 x ; ; BROADWELL-LABEL: test_pgatherdd_ymm: ; BROADWELL: # BB#0: -; BROADWELL-NEXT: vpgatherdd %ymm2, (%rdi,%ymm1,2), %ymm0 # sched: [1:?] -; BROADWELL-NEXT: retq # sched: [2:1.00] +; BROADWELL-NEXT: vpgatherdd %ymm2, (%rdi,%ymm1,2), %ymm0 +; BROADWELL-NEXT: retq # sched: [7:1.00] ; ; SKYLAKE-LABEL: test_pgatherdd_ymm: ; SKYLAKE: # BB#0: @@ -2830,8 +2830,8 @@ define <2 x i64> @test_pgatherdq(<2 x i64> %a0, i8* %a1, <4 x i32> %a2, <2 x i64 ; ; BROADWELL-LABEL: test_pgatherdq: ; BROADWELL: # BB#0: -; BROADWELL-NEXT: vpgatherdq %xmm2, (%rdi,%xmm1,2), %xmm0 # sched: [1:?] -; BROADWELL-NEXT: retq # sched: [2:1.00] +; BROADWELL-NEXT: vpgatherdq %xmm2, (%rdi,%xmm1,2), %xmm0 +; BROADWELL-NEXT: retq # sched: [7:1.00] ; ; SKYLAKE-LABEL: test_pgatherdq: ; SKYLAKE: # BB#0: @@ -2865,8 +2865,8 @@ define <4 x i64> @test_pgatherdq_ymm(<4 x i64> %a0, i8* %a1, <4 x i32> %a2, <4 x ; ; BROADWELL-LABEL: test_pgatherdq_ymm: ; BROADWELL: # BB#0: -; BROADWELL-NEXT: vpgatherdq %ymm2, (%rdi,%xmm1,2), %ymm0 # sched: [1:?] -; BROADWELL-NEXT: retq # sched: [2:1.00] +; BROADWELL-NEXT: vpgatherdq %ymm2, (%rdi,%xmm1,2), %ymm0 +; BROADWELL-NEXT: retq # sched: [7:1.00] ; ; SKYLAKE-LABEL: test_pgatherdq_ymm: ; SKYLAKE: # BB#0: @@ -2900,8 +2900,8 @@ define <4 x i32> @test_pgatherqd(<4 x i32> %a0, i8* %a1, <2 x i64> %a2, <4 x i32 ; ; BROADWELL-LABEL: test_pgatherqd: ; BROADWELL: # BB#0: -; BROADWELL-NEXT: vpgatherqd %xmm2, (%rdi,%xmm1,2), %xmm0 # sched: [1:?] -; BROADWELL-NEXT: retq # sched: [2:1.00] +; BROADWELL-NEXT: vpgatherqd %xmm2, (%rdi,%xmm1,2), %xmm0 +; BROADWELL-NEXT: retq # sched: [7:1.00] ; ; SKYLAKE-LABEL: test_pgatherqd: ; SKYLAKE: # BB#0: @@ -2937,9 +2937,9 @@ define <4 x i32> @test_pgatherqd_ymm(<4 x i32> %a0, i8* %a1, <4 x i64> %a2, <4 x ; ; BROADWELL-LABEL: test_pgatherqd_ymm: ; BROADWELL: # BB#0: -; BROADWELL-NEXT: vpgatherqd %xmm2, (%rdi,%ymm1,2), %xmm0 # sched: [1:?] +; BROADWELL-NEXT: vpgatherqd %xmm2, (%rdi,%ymm1,2), %xmm0 ; BROADWELL-NEXT: vzeroupper # sched: [4:1.00] -; BROADWELL-NEXT: retq # sched: [2:1.00] +; BROADWELL-NEXT: retq # sched: [7:1.00] ; ; SKYLAKE-LABEL: test_pgatherqd_ymm: ; SKYLAKE: # BB#0: @@ -2976,8 +2976,8 @@ define <2 x i64> @test_pgatherqq(<2 x i64> %a0, i8 *%a1, <2 x i64> %a2, <2 x i64 ; ; BROADWELL-LABEL: test_pgatherqq: ; BROADWELL: # BB#0: -; BROADWELL-NEXT: vpgatherqq %xmm2, (%rdi,%xmm1,2), %xmm0 # sched: [1:?] -; BROADWELL-NEXT: retq # sched: [2:1.00] +; BROADWELL-NEXT: vpgatherqq %xmm2, (%rdi,%xmm1,2), %xmm0 +; BROADWELL-NEXT: retq # sched: [7:1.00] ; ; SKYLAKE-LABEL: test_pgatherqq: ; SKYLAKE: # BB#0: @@ -3011,8 +3011,8 @@ define <4 x i64> @test_pgatherqq_ymm(<4 x i64> %a0, i8 *%a1, <4 x i64> %a2, <4 x ; ; BROADWELL-LABEL: test_pgatherqq_ymm: ; BROADWELL: # BB#0: -; BROADWELL-NEXT: vpgatherqq %ymm2, (%rdi,%ymm1,2), %ymm0 # sched: [1:?] -; BROADWELL-NEXT: retq # sched: [2:1.00] +; BROADWELL-NEXT: vpgatherqq %ymm2, (%rdi,%ymm1,2), %ymm0 +; BROADWELL-NEXT: retq # sched: [7:1.00] ; ; SKYLAKE-LABEL: test_pgatherqq_ymm: ; SKYLAKE: # BB#0: @@ -3049,8 +3049,8 @@ define <8 x i32> @test_phaddd(<8 x i32> %a0, <8 x i32> %a1, <8 x i32> *%a2) { ; BROADWELL-LABEL: test_phaddd: ; BROADWELL: # BB#0: ; BROADWELL-NEXT: vphaddd %ymm1, %ymm0, %ymm0 # sched: [3:2.00] -; BROADWELL-NEXT: vphaddd (%rdi), %ymm0, %ymm0 # sched: [3:2.00] -; BROADWELL-NEXT: retq # sched: [2:1.00] +; BROADWELL-NEXT: vphaddd (%rdi), %ymm0, %ymm0 # sched: [9:2.00] +; BROADWELL-NEXT: retq # sched: [7:1.00] ; ; SKYLAKE-LABEL: test_phaddd: ; SKYLAKE: # BB#0: @@ -3092,8 +3092,8 @@ define <16 x i16> @test_phaddsw(<16 x i16> %a0, <16 x i16> %a1, <16 x i16> *%a2) ; BROADWELL-LABEL: test_phaddsw: ; BROADWELL: # BB#0: ; BROADWELL-NEXT: vphaddsw %ymm1, %ymm0, %ymm0 # sched: [3:2.00] -; BROADWELL-NEXT: vphaddsw (%rdi), %ymm0, %ymm0 # sched: [3:2.00] -; BROADWELL-NEXT: retq # sched: [2:1.00] +; BROADWELL-NEXT: vphaddsw (%rdi), %ymm0, %ymm0 # sched: [9:2.00] +; BROADWELL-NEXT: retq # sched: [7:1.00] ; ; SKYLAKE-LABEL: test_phaddsw: ; SKYLAKE: # BB#0: @@ -3135,8 +3135,8 @@ define <16 x i16> @test_phaddw(<16 x i16> %a0, <16 x i16> %a1, <16 x i16> *%a2) ; BROADWELL-LABEL: test_phaddw: ; BROADWELL: # BB#0: ; BROADWELL-NEXT: vphaddw %ymm1, %ymm0, %ymm0 # sched: [3:2.00] -; BROADWELL-NEXT: vphaddw (%rdi), %ymm0, %ymm0 # sched: [3:2.00] -; BROADWELL-NEXT: retq # sched: [2:1.00] +; BROADWELL-NEXT: vphaddw (%rdi), %ymm0, %ymm0 # sched: [9:2.00] +; BROADWELL-NEXT: retq # sched: [7:1.00] ; ; SKYLAKE-LABEL: test_phaddw: ; SKYLAKE: # BB#0: @@ -3178,8 +3178,8 @@ define <8 x i32> @test_phsubd(<8 x i32> %a0, <8 x i32> %a1, <8 x i32> *%a2) { ; BROADWELL-LABEL: test_phsubd: ; BROADWELL: # BB#0: ; BROADWELL-NEXT: vphsubd %ymm1, %ymm0, %ymm0 # sched: [3:2.00] -; BROADWELL-NEXT: vphsubd (%rdi), %ymm0, %ymm0 # sched: [3:2.00] -; BROADWELL-NEXT: retq # sched: [2:1.00] +; BROADWELL-NEXT: vphsubd (%rdi), %ymm0, %ymm0 # sched: [9:2.00] +; BROADWELL-NEXT: retq # sched: [7:1.00] ; ; SKYLAKE-LABEL: test_phsubd: ; SKYLAKE: # BB#0: @@ -3221,8 +3221,8 @@ define <16 x i16> @test_phsubsw(<16 x i16> %a0, <16 x i16> %a1, <16 x i16> *%a2) ; BROADWELL-LABEL: test_phsubsw: ; BROADWELL: # BB#0: ; BROADWELL-NEXT: vphsubsw %ymm1, %ymm0, %ymm0 # sched: [3:2.00] -; BROADWELL-NEXT: vphsubsw (%rdi), %ymm0, %ymm0 # sched: [3:2.00] -; BROADWELL-NEXT: retq # sched: [2:1.00] +; BROADWELL-NEXT: vphsubsw (%rdi), %ymm0, %ymm0 # sched: [9:2.00] +; BROADWELL-NEXT: retq # sched: [7:1.00] ; ; SKYLAKE-LABEL: test_phsubsw: ; SKYLAKE: # BB#0: @@ -3264,8 +3264,8 @@ define <16 x i16> @test_phsubw(<16 x i16> %a0, <16 x i16> %a1, <16 x i16> *%a2) ; BROADWELL-LABEL: test_phsubw: ; BROADWELL: # BB#0: ; BROADWELL-NEXT: vphsubw %ymm1, %ymm0, %ymm0 # sched: [3:2.00] -; BROADWELL-NEXT: vphsubw (%rdi), %ymm0, %ymm0 # sched: [3:2.00] -; BROADWELL-NEXT: retq # sched: [2:1.00] +; BROADWELL-NEXT: vphsubw (%rdi), %ymm0, %ymm0 # sched: [9:2.00] +; BROADWELL-NEXT: retq # sched: [7:1.00] ; ; SKYLAKE-LABEL: test_phsubw: ; SKYLAKE: # BB#0: @@ -3307,8 +3307,8 @@ define <16 x i16> @test_pmaddubsw(<32 x i8> %a0, <32 x i8> %a1, <32 x i8> *%a2) ; BROADWELL-LABEL: test_pmaddubsw: ; BROADWELL: # BB#0: ; BROADWELL-NEXT: vpmaddubsw %ymm1, %ymm0, %ymm0 # sched: [5:1.00] -; BROADWELL-NEXT: vpmaddubsw (%rdi), %ymm0, %ymm0 # sched: [5:1.00] -; BROADWELL-NEXT: retq # sched: [2:1.00] +; BROADWELL-NEXT: vpmaddubsw (%rdi), %ymm0, %ymm0 # sched: [11:1.00] +; BROADWELL-NEXT: retq # sched: [7:1.00] ; ; SKYLAKE-LABEL: test_pmaddubsw: ; SKYLAKE: # BB#0: @@ -3351,8 +3351,8 @@ define <8 x i32> @test_pmaddwd(<16 x i16> %a0, <16 x i16> %a1, <16 x i16> *%a2) ; BROADWELL-LABEL: test_pmaddwd: ; BROADWELL: # BB#0: ; BROADWELL-NEXT: vpmaddwd %ymm1, %ymm0, %ymm0 # sched: [5:1.00] -; BROADWELL-NEXT: vpmaddwd (%rdi), %ymm0, %ymm0 # sched: [5:1.00] -; BROADWELL-NEXT: retq # sched: [2:1.00] +; BROADWELL-NEXT: vpmaddwd (%rdi), %ymm0, %ymm0 # sched: [11:1.00] +; BROADWELL-NEXT: retq # sched: [7:1.00] ; ; SKYLAKE-LABEL: test_pmaddwd: ; SKYLAKE: # BB#0: @@ -3396,10 +3396,10 @@ define <4 x i32> @test_pmaskmovd(i8* %a0, <4 x i32> %a1, <4 x i32> %a2) { ; ; BROADWELL-LABEL: test_pmaskmovd: ; BROADWELL: # BB#0: -; BROADWELL-NEXT: vpmaskmovd (%rdi), %xmm0, %xmm2 # sched: [2:2.00] -; BROADWELL-NEXT: vpmaskmovd %xmm1, %xmm0, (%rdi) # sched: [4:1.00] +; BROADWELL-NEXT: vpmaskmovd (%rdi), %xmm0, %xmm2 # sched: [7:2.00] +; BROADWELL-NEXT: vpmaskmovd %xmm1, %xmm0, (%rdi) # sched: [5:1.00] ; BROADWELL-NEXT: vmovdqa %xmm2, %xmm0 # sched: [1:0.25] -; BROADWELL-NEXT: retq # sched: [2:1.00] +; BROADWELL-NEXT: retq # sched: [7:1.00] ; ; SKYLAKE-LABEL: test_pmaskmovd: ; SKYLAKE: # BB#0: @@ -3445,10 +3445,10 @@ define <8 x i32> @test_pmaskmovd_ymm(i8* %a0, <8 x i32> %a1, <8 x i32> %a2) { ; ; BROADWELL-LABEL: test_pmaskmovd_ymm: ; BROADWELL: # BB#0: -; BROADWELL-NEXT: vpmaskmovd (%rdi), %ymm0, %ymm2 # sched: [2:2.00] -; BROADWELL-NEXT: vpmaskmovd %ymm1, %ymm0, (%rdi) # sched: [4:1.00] +; BROADWELL-NEXT: vpmaskmovd (%rdi), %ymm0, %ymm2 # sched: [8:2.00] +; BROADWELL-NEXT: vpmaskmovd %ymm1, %ymm0, (%rdi) # sched: [5:1.00] ; BROADWELL-NEXT: vmovdqa %ymm2, %ymm0 # sched: [1:0.25] -; BROADWELL-NEXT: retq # sched: [2:1.00] +; BROADWELL-NEXT: retq # sched: [7:1.00] ; ; SKYLAKE-LABEL: test_pmaskmovd_ymm: ; SKYLAKE: # BB#0: @@ -3494,10 +3494,10 @@ define <2 x i64> @test_pmaskmovq(i8* %a0, <2 x i64> %a1, <2 x i64> %a2) { ; ; BROADWELL-LABEL: test_pmaskmovq: ; BROADWELL: # BB#0: -; BROADWELL-NEXT: vpmaskmovq (%rdi), %xmm0, %xmm2 # sched: [2:2.00] -; BROADWELL-NEXT: vpmaskmovq %xmm1, %xmm0, (%rdi) # sched: [4:1.00] +; BROADWELL-NEXT: vpmaskmovq (%rdi), %xmm0, %xmm2 # sched: [7:2.00] +; BROADWELL-NEXT: vpmaskmovq %xmm1, %xmm0, (%rdi) # sched: [5:1.00] ; BROADWELL-NEXT: vmovdqa %xmm2, %xmm0 # sched: [1:0.25] -; BROADWELL-NEXT: retq # sched: [2:1.00] +; BROADWELL-NEXT: retq # sched: [7:1.00] ; ; SKYLAKE-LABEL: test_pmaskmovq: ; SKYLAKE: # BB#0: @@ -3543,10 +3543,10 @@ define <4 x i64> @test_pmaskmovq_ymm(i8* %a0, <4 x i64> %a1, <4 x i64> %a2) { ; ; BROADWELL-LABEL: test_pmaskmovq_ymm: ; BROADWELL: # BB#0: -; BROADWELL-NEXT: vpmaskmovq (%rdi), %ymm0, %ymm2 # sched: [2:2.00] -; BROADWELL-NEXT: vpmaskmovq %ymm1, %ymm0, (%rdi) # sched: [4:1.00] +; BROADWELL-NEXT: vpmaskmovq (%rdi), %ymm0, %ymm2 # sched: [8:2.00] +; BROADWELL-NEXT: vpmaskmovq %ymm1, %ymm0, (%rdi) # sched: [5:1.00] ; BROADWELL-NEXT: vmovdqa %ymm2, %ymm0 # sched: [1:0.25] -; BROADWELL-NEXT: retq # sched: [2:1.00] +; BROADWELL-NEXT: retq # sched: [7:1.00] ; ; SKYLAKE-LABEL: test_pmaskmovq_ymm: ; SKYLAKE: # BB#0: @@ -3591,8 +3591,8 @@ define <32 x i8> @test_pmaxsb(<32 x i8> %a0, <32 x i8> %a1, <32 x i8> *%a2) { ; BROADWELL-LABEL: test_pmaxsb: ; BROADWELL: # BB#0: ; BROADWELL-NEXT: vpmaxsb %ymm1, %ymm0, %ymm0 # sched: [1:0.50] -; BROADWELL-NEXT: vpmaxsb (%rdi), %ymm0, %ymm0 # sched: [1:0.50] -; BROADWELL-NEXT: retq # sched: [2:1.00] +; BROADWELL-NEXT: vpmaxsb (%rdi), %ymm0, %ymm0 # sched: [7:0.50] +; BROADWELL-NEXT: retq # sched: [7:1.00] ; ; SKYLAKE-LABEL: test_pmaxsb: ; SKYLAKE: # BB#0: @@ -3634,8 +3634,8 @@ define <8 x i32> @test_pmaxsd(<8 x i32> %a0, <8 x i32> %a1, <8 x i32> *%a2) { ; BROADWELL-LABEL: test_pmaxsd: ; BROADWELL: # BB#0: ; BROADWELL-NEXT: vpmaxsd %ymm1, %ymm0, %ymm0 # sched: [1:0.50] -; BROADWELL-NEXT: vpmaxsd (%rdi), %ymm0, %ymm0 # sched: [1:0.50] -; BROADWELL-NEXT: retq # sched: [2:1.00] +; BROADWELL-NEXT: vpmaxsd (%rdi), %ymm0, %ymm0 # sched: [7:0.50] +; BROADWELL-NEXT: retq # sched: [7:1.00] ; ; SKYLAKE-LABEL: test_pmaxsd: ; SKYLAKE: # BB#0: @@ -3677,8 +3677,8 @@ define <16 x i16> @test_pmaxsw(<16 x i16> %a0, <16 x i16> %a1, <16 x i16> *%a2) ; BROADWELL-LABEL: test_pmaxsw: ; BROADWELL: # BB#0: ; BROADWELL-NEXT: vpmaxsw %ymm1, %ymm0, %ymm0 # sched: [1:0.50] -; BROADWELL-NEXT: vpmaxsw (%rdi), %ymm0, %ymm0 # sched: [1:0.50] -; BROADWELL-NEXT: retq # sched: [2:1.00] +; BROADWELL-NEXT: vpmaxsw (%rdi), %ymm0, %ymm0 # sched: [7:0.50] +; BROADWELL-NEXT: retq # sched: [7:1.00] ; ; SKYLAKE-LABEL: test_pmaxsw: ; SKYLAKE: # BB#0: @@ -3720,8 +3720,8 @@ define <32 x i8> @test_pmaxub(<32 x i8> %a0, <32 x i8> %a1, <32 x i8> *%a2) { ; BROADWELL-LABEL: test_pmaxub: ; BROADWELL: # BB#0: ; BROADWELL-NEXT: vpmaxub %ymm1, %ymm0, %ymm0 # sched: [1:0.50] -; BROADWELL-NEXT: vpmaxub (%rdi), %ymm0, %ymm0 # sched: [1:0.50] -; BROADWELL-NEXT: retq # sched: [2:1.00] +; BROADWELL-NEXT: vpmaxub (%rdi), %ymm0, %ymm0 # sched: [7:0.50] +; BROADWELL-NEXT: retq # sched: [7:1.00] ; ; SKYLAKE-LABEL: test_pmaxub: ; SKYLAKE: # BB#0: @@ -3763,8 +3763,8 @@ define <8 x i32> @test_pmaxud(<8 x i32> %a0, <8 x i32> %a1, <8 x i32> *%a2) { ; BROADWELL-LABEL: test_pmaxud: ; BROADWELL: # BB#0: ; BROADWELL-NEXT: vpmaxud %ymm1, %ymm0, %ymm0 # sched: [1:0.50] -; BROADWELL-NEXT: vpmaxud (%rdi), %ymm0, %ymm0 # sched: [1:0.50] -; BROADWELL-NEXT: retq # sched: [2:1.00] +; BROADWELL-NEXT: vpmaxud (%rdi), %ymm0, %ymm0 # sched: [7:0.50] +; BROADWELL-NEXT: retq # sched: [7:1.00] ; ; SKYLAKE-LABEL: test_pmaxud: ; SKYLAKE: # BB#0: @@ -3806,8 +3806,8 @@ define <16 x i16> @test_pmaxuw(<16 x i16> %a0, <16 x i16> %a1, <16 x i16> *%a2) ; BROADWELL-LABEL: test_pmaxuw: ; BROADWELL: # BB#0: ; BROADWELL-NEXT: vpmaxuw %ymm1, %ymm0, %ymm0 # sched: [1:0.50] -; BROADWELL-NEXT: vpmaxuw (%rdi), %ymm0, %ymm0 # sched: [1:0.50] -; BROADWELL-NEXT: retq # sched: [2:1.00] +; BROADWELL-NEXT: vpmaxuw (%rdi), %ymm0, %ymm0 # sched: [7:0.50] +; BROADWELL-NEXT: retq # sched: [7:1.00] ; ; SKYLAKE-LABEL: test_pmaxuw: ; SKYLAKE: # BB#0: @@ -3849,8 +3849,8 @@ define <32 x i8> @test_pminsb(<32 x i8> %a0, <32 x i8> %a1, <32 x i8> *%a2) { ; BROADWELL-LABEL: test_pminsb: ; BROADWELL: # BB#0: ; BROADWELL-NEXT: vpminsb %ymm1, %ymm0, %ymm0 # sched: [1:0.50] -; BROADWELL-NEXT: vpminsb (%rdi), %ymm0, %ymm0 # sched: [1:0.50] -; BROADWELL-NEXT: retq # sched: [2:1.00] +; BROADWELL-NEXT: vpminsb (%rdi), %ymm0, %ymm0 # sched: [7:0.50] +; BROADWELL-NEXT: retq # sched: [7:1.00] ; ; SKYLAKE-LABEL: test_pminsb: ; SKYLAKE: # BB#0: @@ -3892,8 +3892,8 @@ define <8 x i32> @test_pminsd(<8 x i32> %a0, <8 x i32> %a1, <8 x i32> *%a2) { ; BROADWELL-LABEL: test_pminsd: ; BROADWELL: # BB#0: ; BROADWELL-NEXT: vpminsd %ymm1, %ymm0, %ymm0 # sched: [1:0.50] -; BROADWELL-NEXT: vpminsd (%rdi), %ymm0, %ymm0 # sched: [1:0.50] -; BROADWELL-NEXT: retq # sched: [2:1.00] +; BROADWELL-NEXT: vpminsd (%rdi), %ymm0, %ymm0 # sched: [7:0.50] +; BROADWELL-NEXT: retq # sched: [7:1.00] ; ; SKYLAKE-LABEL: test_pminsd: ; SKYLAKE: # BB#0: @@ -3935,8 +3935,8 @@ define <16 x i16> @test_pminsw(<16 x i16> %a0, <16 x i16> %a1, <16 x i16> *%a2) ; BROADWELL-LABEL: test_pminsw: ; BROADWELL: # BB#0: ; BROADWELL-NEXT: vpminsw %ymm1, %ymm0, %ymm0 # sched: [1:0.50] -; BROADWELL-NEXT: vpminsw (%rdi), %ymm0, %ymm0 # sched: [1:0.50] -; BROADWELL-NEXT: retq # sched: [2:1.00] +; BROADWELL-NEXT: vpminsw (%rdi), %ymm0, %ymm0 # sched: [7:0.50] +; BROADWELL-NEXT: retq # sched: [7:1.00] ; ; SKYLAKE-LABEL: test_pminsw: ; SKYLAKE: # BB#0: @@ -3978,8 +3978,8 @@ define <32 x i8> @test_pminub(<32 x i8> %a0, <32 x i8> %a1, <32 x i8> *%a2) { ; BROADWELL-LABEL: test_pminub: ; BROADWELL: # BB#0: ; BROADWELL-NEXT: vpminub %ymm1, %ymm0, %ymm0 # sched: [1:0.50] -; BROADWELL-NEXT: vpminub (%rdi), %ymm0, %ymm0 # sched: [1:0.50] -; BROADWELL-NEXT: retq # sched: [2:1.00] +; BROADWELL-NEXT: vpminub (%rdi), %ymm0, %ymm0 # sched: [7:0.50] +; BROADWELL-NEXT: retq # sched: [7:1.00] ; ; SKYLAKE-LABEL: test_pminub: ; SKYLAKE: # BB#0: @@ -4021,8 +4021,8 @@ define <8 x i32> @test_pminud(<8 x i32> %a0, <8 x i32> %a1, <8 x i32> *%a2) { ; BROADWELL-LABEL: test_pminud: ; BROADWELL: # BB#0: ; BROADWELL-NEXT: vpminud %ymm1, %ymm0, %ymm0 # sched: [1:0.50] -; BROADWELL-NEXT: vpminud (%rdi), %ymm0, %ymm0 # sched: [1:0.50] -; BROADWELL-NEXT: retq # sched: [2:1.00] +; BROADWELL-NEXT: vpminud (%rdi), %ymm0, %ymm0 # sched: [7:0.50] +; BROADWELL-NEXT: retq # sched: [7:1.00] ; ; SKYLAKE-LABEL: test_pminud: ; SKYLAKE: # BB#0: @@ -4064,8 +4064,8 @@ define <16 x i16> @test_pminuw(<16 x i16> %a0, <16 x i16> %a1, <16 x i16> *%a2) ; BROADWELL-LABEL: test_pminuw: ; BROADWELL: # BB#0: ; BROADWELL-NEXT: vpminuw %ymm1, %ymm0, %ymm0 # sched: [1:0.50] -; BROADWELL-NEXT: vpminuw (%rdi), %ymm0, %ymm0 # sched: [1:0.50] -; BROADWELL-NEXT: retq # sched: [2:1.00] +; BROADWELL-NEXT: vpminuw (%rdi), %ymm0, %ymm0 # sched: [7:0.50] +; BROADWELL-NEXT: retq # sched: [7:1.00] ; ; SKYLAKE-LABEL: test_pminuw: ; SKYLAKE: # BB#0: @@ -4108,7 +4108,7 @@ define i32 @test_pmovmskb(<32 x i8> %a0) { ; BROADWELL: # BB#0: ; BROADWELL-NEXT: vpmovmskb %ymm0, %eax # sched: [3:1.00] ; BROADWELL-NEXT: vzeroupper # sched: [4:1.00] -; BROADWELL-NEXT: retq # sched: [2:1.00] +; BROADWELL-NEXT: retq # sched: [7:1.00] ; ; SKYLAKE-LABEL: test_pmovmskb: ; SKYLAKE: # BB#0: @@ -4150,9 +4150,9 @@ define <8 x i32> @test_pmovsxbd(<16 x i8> %a0, <16 x i8> *%a1) { ; BROADWELL-LABEL: test_pmovsxbd: ; BROADWELL: # BB#0: ; BROADWELL-NEXT: vpmovsxbd %xmm0, %ymm0 # sched: [3:1.00] -; BROADWELL-NEXT: vpmovsxbd (%rdi), %ymm1 # sched: [3:1.00] +; BROADWELL-NEXT: vpmovsxbd (%rdi), %ymm1 # sched: [8:1.00] ; BROADWELL-NEXT: vpaddd %ymm1, %ymm0, %ymm0 # sched: [1:0.50] -; BROADWELL-NEXT: retq # sched: [2:1.00] +; BROADWELL-NEXT: retq # sched: [7:1.00] ; ; SKYLAKE-LABEL: test_pmovsxbd: ; SKYLAKE: # BB#0: @@ -4201,9 +4201,9 @@ define <4 x i64> @test_pmovsxbq(<16 x i8> %a0, <16 x i8> *%a1) { ; BROADWELL-LABEL: test_pmovsxbq: ; BROADWELL: # BB#0: ; BROADWELL-NEXT: vpmovsxbq %xmm0, %ymm0 # sched: [3:1.00] -; BROADWELL-NEXT: vpmovsxbq (%rdi), %ymm1 # sched: [3:1.00] +; BROADWELL-NEXT: vpmovsxbq (%rdi), %ymm1 # sched: [8:1.00] ; BROADWELL-NEXT: vpaddq %ymm1, %ymm0, %ymm0 # sched: [1:0.50] -; BROADWELL-NEXT: retq # sched: [2:1.00] +; BROADWELL-NEXT: retq # sched: [7:1.00] ; ; SKYLAKE-LABEL: test_pmovsxbq: ; SKYLAKE: # BB#0: @@ -4252,9 +4252,9 @@ define <16 x i16> @test_pmovsxbw(<16 x i8> %a0, <16 x i8> *%a1) { ; BROADWELL-LABEL: test_pmovsxbw: ; BROADWELL: # BB#0: ; BROADWELL-NEXT: vpmovsxbw %xmm0, %ymm0 # sched: [3:1.00] -; BROADWELL-NEXT: vpmovsxbw (%rdi), %ymm1 # sched: [3:1.00] +; BROADWELL-NEXT: vpmovsxbw (%rdi), %ymm1 # sched: [8:1.00] ; BROADWELL-NEXT: vpaddw %ymm1, %ymm0, %ymm0 # sched: [1:0.50] -; BROADWELL-NEXT: retq # sched: [2:1.00] +; BROADWELL-NEXT: retq # sched: [7:1.00] ; ; SKYLAKE-LABEL: test_pmovsxbw: ; SKYLAKE: # BB#0: @@ -4301,9 +4301,9 @@ define <4 x i64> @test_pmovsxdq(<4 x i32> %a0, <4 x i32> *%a1) { ; BROADWELL-LABEL: test_pmovsxdq: ; BROADWELL: # BB#0: ; BROADWELL-NEXT: vpmovsxdq %xmm0, %ymm0 # sched: [3:1.00] -; BROADWELL-NEXT: vpmovsxdq (%rdi), %ymm1 # sched: [3:1.00] +; BROADWELL-NEXT: vpmovsxdq (%rdi), %ymm1 # sched: [8:1.00] ; BROADWELL-NEXT: vpaddq %ymm1, %ymm0, %ymm0 # sched: [1:0.50] -; BROADWELL-NEXT: retq # sched: [2:1.00] +; BROADWELL-NEXT: retq # sched: [7:1.00] ; ; SKYLAKE-LABEL: test_pmovsxdq: ; SKYLAKE: # BB#0: @@ -4350,9 +4350,9 @@ define <8 x i32> @test_pmovsxwd(<8 x i16> %a0, <8 x i16> *%a1) { ; BROADWELL-LABEL: test_pmovsxwd: ; BROADWELL: # BB#0: ; BROADWELL-NEXT: vpmovsxwd %xmm0, %ymm0 # sched: [3:1.00] -; BROADWELL-NEXT: vpmovsxwd (%rdi), %ymm1 # sched: [3:1.00] +; BROADWELL-NEXT: vpmovsxwd (%rdi), %ymm1 # sched: [8:1.00] ; BROADWELL-NEXT: vpaddd %ymm1, %ymm0, %ymm0 # sched: [1:0.50] -; BROADWELL-NEXT: retq # sched: [2:1.00] +; BROADWELL-NEXT: retq # sched: [7:1.00] ; ; SKYLAKE-LABEL: test_pmovsxwd: ; SKYLAKE: # BB#0: @@ -4399,9 +4399,9 @@ define <4 x i64> @test_pmovsxwq(<8 x i16> %a0, <8 x i16> *%a1) { ; BROADWELL-LABEL: test_pmovsxwq: ; BROADWELL: # BB#0: ; BROADWELL-NEXT: vpmovsxwq %xmm0, %ymm0 # sched: [3:1.00] -; BROADWELL-NEXT: vpmovsxwq (%rdi), %ymm1 # sched: [3:1.00] +; BROADWELL-NEXT: vpmovsxwq (%rdi), %ymm1 # sched: [8:1.00] ; BROADWELL-NEXT: vpaddq %ymm1, %ymm0, %ymm0 # sched: [1:0.50] -; BROADWELL-NEXT: retq # sched: [2:1.00] +; BROADWELL-NEXT: retq # sched: [7:1.00] ; ; SKYLAKE-LABEL: test_pmovsxwq: ; SKYLAKE: # BB#0: @@ -4450,9 +4450,9 @@ define <8 x i32> @test_pmovzxbd(<16 x i8> %a0, <16 x i8> *%a1) { ; BROADWELL-LABEL: test_pmovzxbd: ; BROADWELL: # BB#0: ; BROADWELL-NEXT: vpmovzxbd {{.*#+}} ymm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero,xmm0[4],zero,zero,zero,xmm0[5],zero,zero,zero,xmm0[6],zero,zero,zero,xmm0[7],zero,zero,zero sched: [3:1.00] -; BROADWELL-NEXT: vpmovzxbd {{.*#+}} ymm1 = mem[0],zero,zero,zero,mem[1],zero,zero,zero,mem[2],zero,zero,zero,mem[3],zero,zero,zero,mem[4],zero,zero,zero,mem[5],zero,zero,zero,mem[6],zero,zero,zero,mem[7],zero,zero,zero sched: [3:1.00] +; BROADWELL-NEXT: vpmovzxbd {{.*#+}} ymm1 = mem[0],zero,zero,zero,mem[1],zero,zero,zero,mem[2],zero,zero,zero,mem[3],zero,zero,zero,mem[4],zero,zero,zero,mem[5],zero,zero,zero,mem[6],zero,zero,zero,mem[7],zero,zero,zero sched: [9:1.00] ; BROADWELL-NEXT: vpaddd %ymm1, %ymm0, %ymm0 # sched: [1:0.50] -; BROADWELL-NEXT: retq # sched: [2:1.00] +; BROADWELL-NEXT: retq # sched: [7:1.00] ; ; SKYLAKE-LABEL: test_pmovzxbd: ; SKYLAKE: # BB#0: @@ -4501,9 +4501,9 @@ define <4 x i64> @test_pmovzxbq(<16 x i8> %a0, <16 x i8> *%a1) { ; BROADWELL-LABEL: test_pmovzxbq: ; BROADWELL: # BB#0: ; BROADWELL-NEXT: vpmovzxbq {{.*#+}} ymm0 = xmm0[0],zero,zero,zero,zero,zero,zero,zero,xmm0[1],zero,zero,zero,zero,zero,zero,zero,xmm0[2],zero,zero,zero,zero,zero,zero,zero,xmm0[3],zero,zero,zero,zero,zero,zero,zero sched: [3:1.00] -; BROADWELL-NEXT: vpmovzxbq {{.*#+}} ymm1 = mem[0],zero,zero,zero,zero,zero,zero,zero,mem[1],zero,zero,zero,zero,zero,zero,zero,mem[2],zero,zero,zero,zero,zero,zero,zero,mem[3],zero,zero,zero,zero,zero,zero,zero sched: [3:1.00] +; BROADWELL-NEXT: vpmovzxbq {{.*#+}} ymm1 = mem[0],zero,zero,zero,zero,zero,zero,zero,mem[1],zero,zero,zero,zero,zero,zero,zero,mem[2],zero,zero,zero,zero,zero,zero,zero,mem[3],zero,zero,zero,zero,zero,zero,zero sched: [9:1.00] ; BROADWELL-NEXT: vpaddq %ymm1, %ymm0, %ymm0 # sched: [1:0.50] -; BROADWELL-NEXT: retq # sched: [2:1.00] +; BROADWELL-NEXT: retq # sched: [7:1.00] ; ; SKYLAKE-LABEL: test_pmovzxbq: ; SKYLAKE: # BB#0: @@ -4552,9 +4552,9 @@ define <16 x i16> @test_pmovzxbw(<16 x i8> %a0, <16 x i8> *%a1) { ; BROADWELL-LABEL: test_pmovzxbw: ; BROADWELL: # BB#0: ; BROADWELL-NEXT: vpmovzxbw {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero,xmm0[8],zero,xmm0[9],zero,xmm0[10],zero,xmm0[11],zero,xmm0[12],zero,xmm0[13],zero,xmm0[14],zero,xmm0[15],zero sched: [3:1.00] -; BROADWELL-NEXT: vpmovzxbw {{.*#+}} ymm1 = mem[0],zero,mem[1],zero,mem[2],zero,mem[3],zero,mem[4],zero,mem[5],zero,mem[6],zero,mem[7],zero,mem[8],zero,mem[9],zero,mem[10],zero,mem[11],zero,mem[12],zero,mem[13],zero,mem[14],zero,mem[15],zero sched: [3:1.00] +; BROADWELL-NEXT: vpmovzxbw {{.*#+}} ymm1 = mem[0],zero,mem[1],zero,mem[2],zero,mem[3],zero,mem[4],zero,mem[5],zero,mem[6],zero,mem[7],zero,mem[8],zero,mem[9],zero,mem[10],zero,mem[11],zero,mem[12],zero,mem[13],zero,mem[14],zero,mem[15],zero sched: [9:1.00] ; BROADWELL-NEXT: vpaddw %ymm1, %ymm0, %ymm0 # sched: [1:0.50] -; BROADWELL-NEXT: retq # sched: [2:1.00] +; BROADWELL-NEXT: retq # sched: [7:1.00] ; ; SKYLAKE-LABEL: test_pmovzxbw: ; SKYLAKE: # BB#0: @@ -4601,9 +4601,9 @@ define <4 x i64> @test_pmovzxdq(<4 x i32> %a0, <4 x i32> *%a1) { ; BROADWELL-LABEL: test_pmovzxdq: ; BROADWELL: # BB#0: ; BROADWELL-NEXT: vpmovzxdq {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero sched: [3:1.00] -; BROADWELL-NEXT: vpmovzxdq {{.*#+}} ymm1 = mem[0],zero,mem[1],zero,mem[2],zero,mem[3],zero sched: [3:1.00] +; BROADWELL-NEXT: vpmovzxdq {{.*#+}} ymm1 = mem[0],zero,mem[1],zero,mem[2],zero,mem[3],zero sched: [9:1.00] ; BROADWELL-NEXT: vpaddq %ymm1, %ymm0, %ymm0 # sched: [1:0.50] -; BROADWELL-NEXT: retq # sched: [2:1.00] +; BROADWELL-NEXT: retq # sched: [7:1.00] ; ; SKYLAKE-LABEL: test_pmovzxdq: ; SKYLAKE: # BB#0: @@ -4650,9 +4650,9 @@ define <8 x i32> @test_pmovzxwd(<8 x i16> %a0, <8 x i16> *%a1) { ; BROADWELL-LABEL: test_pmovzxwd: ; BROADWELL: # BB#0: ; BROADWELL-NEXT: vpmovzxwd {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero sched: [3:1.00] -; BROADWELL-NEXT: vpmovzxwd {{.*#+}} ymm1 = mem[0],zero,mem[1],zero,mem[2],zero,mem[3],zero,mem[4],zero,mem[5],zero,mem[6],zero,mem[7],zero sched: [3:1.00] +; BROADWELL-NEXT: vpmovzxwd {{.*#+}} ymm1 = mem[0],zero,mem[1],zero,mem[2],zero,mem[3],zero,mem[4],zero,mem[5],zero,mem[6],zero,mem[7],zero sched: [8:1.00] ; BROADWELL-NEXT: vpaddd %ymm1, %ymm0, %ymm0 # sched: [1:0.50] -; BROADWELL-NEXT: retq # sched: [2:1.00] +; BROADWELL-NEXT: retq # sched: [7:1.00] ; ; SKYLAKE-LABEL: test_pmovzxwd: ; SKYLAKE: # BB#0: @@ -4699,9 +4699,9 @@ define <4 x i64> @test_pmovzxwq(<8 x i16> %a0, <8 x i16> *%a1) { ; BROADWELL-LABEL: test_pmovzxwq: ; BROADWELL: # BB#0: ; BROADWELL-NEXT: vpmovzxwq {{.*#+}} ymm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero sched: [3:1.00] -; BROADWELL-NEXT: vpmovzxwq {{.*#+}} ymm1 = mem[0],zero,zero,zero,mem[1],zero,zero,zero,mem[2],zero,zero,zero,mem[3],zero,zero,zero sched: [3:1.00] +; BROADWELL-NEXT: vpmovzxwq {{.*#+}} ymm1 = mem[0],zero,zero,zero,mem[1],zero,zero,zero,mem[2],zero,zero,zero,mem[3],zero,zero,zero sched: [9:1.00] ; BROADWELL-NEXT: vpaddq %ymm1, %ymm0, %ymm0 # sched: [1:0.50] -; BROADWELL-NEXT: retq # sched: [2:1.00] +; BROADWELL-NEXT: retq # sched: [7:1.00] ; ; SKYLAKE-LABEL: test_pmovzxwq: ; SKYLAKE: # BB#0: @@ -4748,8 +4748,8 @@ define <4 x i64> @test_pmuldq(<8 x i32> %a0, <8 x i32> %a1, <8 x i32> *%a2) { ; BROADWELL-LABEL: test_pmuldq: ; BROADWELL: # BB#0: ; BROADWELL-NEXT: vpmuldq %ymm1, %ymm0, %ymm0 # sched: [5:1.00] -; BROADWELL-NEXT: vpmuldq (%rdi), %ymm0, %ymm0 # sched: [5:1.00] -; BROADWELL-NEXT: retq # sched: [2:1.00] +; BROADWELL-NEXT: vpmuldq (%rdi), %ymm0, %ymm0 # sched: [11:1.00] +; BROADWELL-NEXT: retq # sched: [7:1.00] ; ; SKYLAKE-LABEL: test_pmuldq: ; SKYLAKE: # BB#0: @@ -4792,8 +4792,8 @@ define <16 x i16> @test_pmulhrsw(<16 x i16> %a0, <16 x i16> %a1, <16 x i16> *%a2 ; BROADWELL-LABEL: test_pmulhrsw: ; BROADWELL: # BB#0: ; BROADWELL-NEXT: vpmulhrsw %ymm1, %ymm0, %ymm0 # sched: [5:1.00] -; BROADWELL-NEXT: vpmulhrsw (%rdi), %ymm0, %ymm0 # sched: [5:1.00] -; BROADWELL-NEXT: retq # sched: [2:1.00] +; BROADWELL-NEXT: vpmulhrsw (%rdi), %ymm0, %ymm0 # sched: [11:1.00] +; BROADWELL-NEXT: retq # sched: [7:1.00] ; ; SKYLAKE-LABEL: test_pmulhrsw: ; SKYLAKE: # BB#0: @@ -4835,8 +4835,8 @@ define <16 x i16> @test_pmulhuw(<16 x i16> %a0, <16 x i16> %a1, <16 x i16> *%a2) ; BROADWELL-LABEL: test_pmulhuw: ; BROADWELL: # BB#0: ; BROADWELL-NEXT: vpmulhuw %ymm1, %ymm0, %ymm0 # sched: [5:1.00] -; BROADWELL-NEXT: vpmulhuw (%rdi), %ymm0, %ymm0 # sched: [5:1.00] -; BROADWELL-NEXT: retq # sched: [2:1.00] +; BROADWELL-NEXT: vpmulhuw (%rdi), %ymm0, %ymm0 # sched: [11:1.00] +; BROADWELL-NEXT: retq # sched: [7:1.00] ; ; SKYLAKE-LABEL: test_pmulhuw: ; SKYLAKE: # BB#0: @@ -4878,8 +4878,8 @@ define <16 x i16> @test_pmulhw(<16 x i16> %a0, <16 x i16> %a1, <16 x i16> *%a2) ; BROADWELL-LABEL: test_pmulhw: ; BROADWELL: # BB#0: ; BROADWELL-NEXT: vpmulhw %ymm1, %ymm0, %ymm0 # sched: [5:1.00] -; BROADWELL-NEXT: vpmulhw (%rdi), %ymm0, %ymm0 # sched: [5:1.00] -; BROADWELL-NEXT: retq # sched: [2:1.00] +; BROADWELL-NEXT: vpmulhw (%rdi), %ymm0, %ymm0 # sched: [11:1.00] +; BROADWELL-NEXT: retq # sched: [7:1.00] ; ; SKYLAKE-LABEL: test_pmulhw: ; SKYLAKE: # BB#0: @@ -4921,8 +4921,8 @@ define <8 x i32> @test_pmulld(<8 x i32> %a0, <8 x i32> %a1, <8 x i32> *%a2) { ; BROADWELL-LABEL: test_pmulld: ; BROADWELL: # BB#0: ; BROADWELL-NEXT: vpmulld %ymm1, %ymm0, %ymm0 # sched: [10:2.00] -; BROADWELL-NEXT: vpmulld (%rdi), %ymm0, %ymm0 # sched: [10:2.00] -; BROADWELL-NEXT: retq # sched: [2:1.00] +; BROADWELL-NEXT: vpmulld (%rdi), %ymm0, %ymm0 # sched: [16:2.00] +; BROADWELL-NEXT: retq # sched: [7:1.00] ; ; SKYLAKE-LABEL: test_pmulld: ; SKYLAKE: # BB#0: @@ -4963,8 +4963,8 @@ define <16 x i16> @test_pmullw(<16 x i16> %a0, <16 x i16> %a1, <16 x i16> *%a2) ; BROADWELL-LABEL: test_pmullw: ; BROADWELL: # BB#0: ; BROADWELL-NEXT: vpmullw %ymm1, %ymm0, %ymm0 # sched: [5:1.00] -; BROADWELL-NEXT: vpmullw (%rdi), %ymm0, %ymm0 # sched: [5:1.00] -; BROADWELL-NEXT: retq # sched: [2:1.00] +; BROADWELL-NEXT: vpmullw (%rdi), %ymm0, %ymm0 # sched: [11:1.00] +; BROADWELL-NEXT: retq # sched: [7:1.00] ; ; SKYLAKE-LABEL: test_pmullw: ; SKYLAKE: # BB#0: @@ -5005,8 +5005,8 @@ define <4 x i64> @test_pmuludq(<8 x i32> %a0, <8 x i32> %a1, <8 x i32> *%a2) { ; BROADWELL-LABEL: test_pmuludq: ; BROADWELL: # BB#0: ; BROADWELL-NEXT: vpmuludq %ymm1, %ymm0, %ymm0 # sched: [5:1.00] -; BROADWELL-NEXT: vpmuludq (%rdi), %ymm0, %ymm0 # sched: [5:1.00] -; BROADWELL-NEXT: retq # sched: [2:1.00] +; BROADWELL-NEXT: vpmuludq (%rdi), %ymm0, %ymm0 # sched: [11:1.00] +; BROADWELL-NEXT: retq # sched: [7:1.00] ; ; SKYLAKE-LABEL: test_pmuludq: ; SKYLAKE: # BB#0: @@ -5051,9 +5051,9 @@ define <4 x i64> @test_por(<4 x i64> %a0, <4 x i64> %a1, <4 x i64> *%a2) { ; BROADWELL-LABEL: test_por: ; BROADWELL: # BB#0: ; BROADWELL-NEXT: vpor %ymm1, %ymm0, %ymm0 # sched: [1:0.33] -; BROADWELL-NEXT: vpor (%rdi), %ymm0, %ymm0 # sched: [1:0.50] +; BROADWELL-NEXT: vpor (%rdi), %ymm0, %ymm0 # sched: [7:0.50] ; BROADWELL-NEXT: vpaddq %ymm1, %ymm0, %ymm0 # sched: [1:0.50] -; BROADWELL-NEXT: retq # sched: [2:1.00] +; BROADWELL-NEXT: retq # sched: [7:1.00] ; ; SKYLAKE-LABEL: test_por: ; SKYLAKE: # BB#0: @@ -5098,8 +5098,8 @@ define <4 x i64> @test_psadbw(<32 x i8> %a0, <32 x i8> %a1, <32 x i8> *%a2) { ; BROADWELL-LABEL: test_psadbw: ; BROADWELL: # BB#0: ; BROADWELL-NEXT: vpsadbw %ymm1, %ymm0, %ymm0 # sched: [5:1.00] -; BROADWELL-NEXT: vpsadbw (%rdi), %ymm0, %ymm0 # sched: [5:1.00] -; BROADWELL-NEXT: retq # sched: [2:1.00] +; BROADWELL-NEXT: vpsadbw (%rdi), %ymm0, %ymm0 # sched: [11:1.00] +; BROADWELL-NEXT: retq # sched: [7:1.00] ; ; SKYLAKE-LABEL: test_psadbw: ; SKYLAKE: # BB#0: @@ -5142,8 +5142,8 @@ define <32 x i8> @test_pshufb(<32 x i8> %a0, <32 x i8> %a1, <32 x i8> *%a2) { ; BROADWELL-LABEL: test_pshufb: ; BROADWELL: # BB#0: ; BROADWELL-NEXT: vpshufb %ymm1, %ymm0, %ymm0 # sched: [1:1.00] -; BROADWELL-NEXT: vpshufb (%rdi), %ymm0, %ymm0 # sched: [1:1.00] -; BROADWELL-NEXT: retq # sched: [2:1.00] +; BROADWELL-NEXT: vpshufb (%rdi), %ymm0, %ymm0 # sched: [7:1.00] +; BROADWELL-NEXT: retq # sched: [7:1.00] ; ; SKYLAKE-LABEL: test_pshufb: ; SKYLAKE: # BB#0: @@ -5187,9 +5187,9 @@ define <8 x i32> @test_pshufd(<8 x i32> %a0, <8 x i32> *%a1) { ; BROADWELL-LABEL: test_pshufd: ; BROADWELL: # BB#0: ; BROADWELL-NEXT: vpshufd {{.*#+}} ymm0 = ymm0[3,2,1,0,7,6,5,4] sched: [1:1.00] -; BROADWELL-NEXT: vpshufd {{.*#+}} ymm1 = mem[1,0,3,2,5,4,7,6] sched: [1:1.00] +; BROADWELL-NEXT: vpshufd {{.*#+}} ymm1 = mem[1,0,3,2,5,4,7,6] sched: [7:1.00] ; BROADWELL-NEXT: vpaddd %ymm1, %ymm0, %ymm0 # sched: [1:0.50] -; BROADWELL-NEXT: retq # sched: [2:1.00] +; BROADWELL-NEXT: retq # sched: [7:1.00] ; ; SKYLAKE-LABEL: test_pshufd: ; SKYLAKE: # BB#0: @@ -5236,9 +5236,9 @@ define <16 x i16> @test_pshufhw(<16 x i16> %a0, <16 x i16> *%a1) { ; BROADWELL-LABEL: test_pshufhw: ; BROADWELL: # BB#0: ; BROADWELL-NEXT: vpshufhw {{.*#+}} ymm0 = ymm0[0,1,2,3,7,6,5,4,8,9,10,11,15,14,13,12] sched: [1:1.00] -; BROADWELL-NEXT: vpshufhw {{.*#+}} ymm1 = mem[0,1,2,3,5,4,7,6,8,9,10,11,13,12,15,14] sched: [1:1.00] +; BROADWELL-NEXT: vpshufhw {{.*#+}} ymm1 = mem[0,1,2,3,5,4,7,6,8,9,10,11,13,12,15,14] sched: [7:1.00] ; BROADWELL-NEXT: vpor %ymm1, %ymm0, %ymm0 # sched: [1:0.33] -; BROADWELL-NEXT: retq # sched: [2:1.00] +; BROADWELL-NEXT: retq # sched: [7:1.00] ; ; SKYLAKE-LABEL: test_pshufhw: ; SKYLAKE: # BB#0: @@ -5285,9 +5285,9 @@ define <16 x i16> @test_pshuflw(<16 x i16> %a0, <16 x i16> *%a1) { ; BROADWELL-LABEL: test_pshuflw: ; BROADWELL: # BB#0: ; BROADWELL-NEXT: vpshuflw {{.*#+}} ymm0 = ymm0[3,2,1,0,4,5,6,7,11,10,9,8,12,13,14,15] sched: [1:1.00] -; BROADWELL-NEXT: vpshuflw {{.*#+}} ymm1 = mem[1,0,3,2,4,5,6,7,9,8,11,10,12,13,14,15] sched: [1:1.00] +; BROADWELL-NEXT: vpshuflw {{.*#+}} ymm1 = mem[1,0,3,2,4,5,6,7,9,8,11,10,12,13,14,15] sched: [7:1.00] ; BROADWELL-NEXT: vpor %ymm1, %ymm0, %ymm0 # sched: [1:0.33] -; BROADWELL-NEXT: retq # sched: [2:1.00] +; BROADWELL-NEXT: retq # sched: [7:1.00] ; ; SKYLAKE-LABEL: test_pshuflw: ; SKYLAKE: # BB#0: @@ -5332,8 +5332,8 @@ define <32 x i8> @test_psignb(<32 x i8> %a0, <32 x i8> %a1, <32 x i8> *%a2) { ; BROADWELL-LABEL: test_psignb: ; BROADWELL: # BB#0: ; BROADWELL-NEXT: vpsignb %ymm1, %ymm0, %ymm0 # sched: [1:0.50] -; BROADWELL-NEXT: vpsignb (%rdi), %ymm0, %ymm0 # sched: [1:0.50] -; BROADWELL-NEXT: retq # sched: [2:1.00] +; BROADWELL-NEXT: vpsignb (%rdi), %ymm0, %ymm0 # sched: [7:0.50] +; BROADWELL-NEXT: retq # sched: [7:1.00] ; ; SKYLAKE-LABEL: test_psignb: ; SKYLAKE: # BB#0: @@ -5375,8 +5375,8 @@ define <8 x i32> @test_psignd(<8 x i32> %a0, <8 x i32> %a1, <8 x i32> *%a2) { ; BROADWELL-LABEL: test_psignd: ; BROADWELL: # BB#0: ; BROADWELL-NEXT: vpsignd %ymm1, %ymm0, %ymm0 # sched: [1:0.50] -; BROADWELL-NEXT: vpsignd (%rdi), %ymm0, %ymm0 # sched: [1:0.50] -; BROADWELL-NEXT: retq # sched: [2:1.00] +; BROADWELL-NEXT: vpsignd (%rdi), %ymm0, %ymm0 # sched: [7:0.50] +; BROADWELL-NEXT: retq # sched: [7:1.00] ; ; SKYLAKE-LABEL: test_psignd: ; SKYLAKE: # BB#0: @@ -5418,8 +5418,8 @@ define <16 x i16> @test_psignw(<16 x i16> %a0, <16 x i16> %a1, <16 x i16> *%a2) ; BROADWELL-LABEL: test_psignw: ; BROADWELL: # BB#0: ; BROADWELL-NEXT: vpsignw %ymm1, %ymm0, %ymm0 # sched: [1:0.50] -; BROADWELL-NEXT: vpsignw (%rdi), %ymm0, %ymm0 # sched: [1:0.50] -; BROADWELL-NEXT: retq # sched: [2:1.00] +; BROADWELL-NEXT: vpsignw (%rdi), %ymm0, %ymm0 # sched: [7:0.50] +; BROADWELL-NEXT: retq # sched: [7:1.00] ; ; SKYLAKE-LABEL: test_psignw: ; SKYLAKE: # BB#0: @@ -5463,9 +5463,9 @@ define <8 x i32> @test_pslld(<8 x i32> %a0, <4 x i32> %a1, <4 x i32> *%a2) { ; BROADWELL-LABEL: test_pslld: ; BROADWELL: # BB#0: ; BROADWELL-NEXT: vpslld %xmm1, %ymm0, %ymm0 # sched: [4:1.00] -; BROADWELL-NEXT: vpslld (%rdi), %ymm0, %ymm0 # sched: [1:1.00] +; BROADWELL-NEXT: vpslld (%rdi), %ymm0, %ymm0 # sched: [7:1.00] ; BROADWELL-NEXT: vpslld $2, %ymm0, %ymm0 # sched: [1:1.00] -; BROADWELL-NEXT: retq # sched: [2:1.00] +; BROADWELL-NEXT: retq # sched: [7:1.00] ; ; SKYLAKE-LABEL: test_pslld: ; SKYLAKE: # BB#0: @@ -5509,7 +5509,7 @@ define <32 x i8> @test_pslldq(<32 x i8> %a0) { ; BROADWELL-LABEL: test_pslldq: ; BROADWELL: # BB#0: ; BROADWELL-NEXT: vpslldq {{.*#+}} ymm0 = zero,zero,zero,ymm0[0,1,2,3,4,5,6,7,8,9,10,11,12],zero,zero,zero,ymm0[16,17,18,19,20,21,22,23,24,25,26,27,28] sched: [1:1.00] -; BROADWELL-NEXT: retq # sched: [2:1.00] +; BROADWELL-NEXT: retq # sched: [7:1.00] ; ; SKYLAKE-LABEL: test_pslldq: ; SKYLAKE: # BB#0: @@ -5547,9 +5547,9 @@ define <4 x i64> @test_psllq(<4 x i64> %a0, <2 x i64> %a1, <2 x i64> *%a2) { ; BROADWELL-LABEL: test_psllq: ; BROADWELL: # BB#0: ; BROADWELL-NEXT: vpsllq %xmm1, %ymm0, %ymm0 # sched: [4:1.00] -; BROADWELL-NEXT: vpsllq (%rdi), %ymm0, %ymm0 # sched: [1:1.00] +; BROADWELL-NEXT: vpsllq (%rdi), %ymm0, %ymm0 # sched: [7:1.00] ; BROADWELL-NEXT: vpsllq $2, %ymm0, %ymm0 # sched: [1:1.00] -; BROADWELL-NEXT: retq # sched: [2:1.00] +; BROADWELL-NEXT: retq # sched: [7:1.00] ; ; SKYLAKE-LABEL: test_psllq: ; SKYLAKE: # BB#0: @@ -5595,8 +5595,8 @@ define <4 x i32> @test_psllvd(<4 x i32> %a0, <4 x i32> %a1, <4 x i32> *%a2) { ; BROADWELL-LABEL: test_psllvd: ; BROADWELL: # BB#0: ; BROADWELL-NEXT: vpsllvd %xmm1, %xmm0, %xmm0 # sched: [3:2.00] -; BROADWELL-NEXT: vpsllvd (%rdi), %xmm0, %xmm0 # sched: [3:2.00] -; BROADWELL-NEXT: retq # sched: [2:1.00] +; BROADWELL-NEXT: vpsllvd (%rdi), %xmm0, %xmm0 # sched: [8:2.00] +; BROADWELL-NEXT: retq # sched: [7:1.00] ; ; SKYLAKE-LABEL: test_psllvd: ; SKYLAKE: # BB#0: @@ -5638,8 +5638,8 @@ define <8 x i32> @test_psllvd_ymm(<8 x i32> %a0, <8 x i32> %a1, <8 x i32> *%a2) ; BROADWELL-LABEL: test_psllvd_ymm: ; BROADWELL: # BB#0: ; BROADWELL-NEXT: vpsllvd %ymm1, %ymm0, %ymm0 # sched: [3:2.00] -; BROADWELL-NEXT: vpsllvd (%rdi), %ymm0, %ymm0 # sched: [3:2.00] -; BROADWELL-NEXT: retq # sched: [2:1.00] +; BROADWELL-NEXT: vpsllvd (%rdi), %ymm0, %ymm0 # sched: [9:2.00] +; BROADWELL-NEXT: retq # sched: [7:1.00] ; ; SKYLAKE-LABEL: test_psllvd_ymm: ; SKYLAKE: # BB#0: @@ -5681,8 +5681,8 @@ define <2 x i64> @test_psllvq(<2 x i64> %a0, <2 x i64> %a1, <2 x i64> *%a2) { ; BROADWELL-LABEL: test_psllvq: ; BROADWELL: # BB#0: ; BROADWELL-NEXT: vpsllvq %xmm1, %xmm0, %xmm0 # sched: [1:1.00] -; BROADWELL-NEXT: vpsllvq (%rdi), %xmm0, %xmm0 # sched: [1:1.00] -; BROADWELL-NEXT: retq # sched: [2:1.00] +; BROADWELL-NEXT: vpsllvq (%rdi), %xmm0, %xmm0 # sched: [6:1.00] +; BROADWELL-NEXT: retq # sched: [7:1.00] ; ; SKYLAKE-LABEL: test_psllvq: ; SKYLAKE: # BB#0: @@ -5724,8 +5724,8 @@ define <4 x i64> @test_psllvq_ymm(<4 x i64> %a0, <4 x i64> %a1, <4 x i64> *%a2) ; BROADWELL-LABEL: test_psllvq_ymm: ; BROADWELL: # BB#0: ; BROADWELL-NEXT: vpsllvq %ymm1, %ymm0, %ymm0 # sched: [1:1.00] -; BROADWELL-NEXT: vpsllvq (%rdi), %ymm0, %ymm0 # sched: [1:1.00] -; BROADWELL-NEXT: retq # sched: [2:1.00] +; BROADWELL-NEXT: vpsllvq (%rdi), %ymm0, %ymm0 # sched: [7:1.00] +; BROADWELL-NEXT: retq # sched: [7:1.00] ; ; SKYLAKE-LABEL: test_psllvq_ymm: ; SKYLAKE: # BB#0: @@ -5769,9 +5769,9 @@ define <16 x i16> @test_psllw(<16 x i16> %a0, <8 x i16> %a1, <8 x i16> *%a2) { ; BROADWELL-LABEL: test_psllw: ; BROADWELL: # BB#0: ; BROADWELL-NEXT: vpsllw %xmm1, %ymm0, %ymm0 # sched: [4:1.00] -; BROADWELL-NEXT: vpsllw (%rdi), %ymm0, %ymm0 # sched: [1:1.00] +; BROADWELL-NEXT: vpsllw (%rdi), %ymm0, %ymm0 # sched: [7:1.00] ; BROADWELL-NEXT: vpsllw $2, %ymm0, %ymm0 # sched: [1:1.00] -; BROADWELL-NEXT: retq # sched: [2:1.00] +; BROADWELL-NEXT: retq # sched: [7:1.00] ; ; SKYLAKE-LABEL: test_psllw: ; SKYLAKE: # BB#0: @@ -5819,9 +5819,9 @@ define <8 x i32> @test_psrad(<8 x i32> %a0, <4 x i32> %a1, <4 x i32> *%a2) { ; BROADWELL-LABEL: test_psrad: ; BROADWELL: # BB#0: ; BROADWELL-NEXT: vpsrad %xmm1, %ymm0, %ymm0 # sched: [4:1.00] -; BROADWELL-NEXT: vpsrad (%rdi), %ymm0, %ymm0 # sched: [1:1.00] +; BROADWELL-NEXT: vpsrad (%rdi), %ymm0, %ymm0 # sched: [7:1.00] ; BROADWELL-NEXT: vpsrad $2, %ymm0, %ymm0 # sched: [1:1.00] -; BROADWELL-NEXT: retq # sched: [2:1.00] +; BROADWELL-NEXT: retq # sched: [7:1.00] ; ; SKYLAKE-LABEL: test_psrad: ; SKYLAKE: # BB#0: @@ -5867,8 +5867,8 @@ define <4 x i32> @test_psravd(<4 x i32> %a0, <4 x i32> %a1, <4 x i32> *%a2) { ; BROADWELL-LABEL: test_psravd: ; BROADWELL: # BB#0: ; BROADWELL-NEXT: vpsravd %xmm1, %xmm0, %xmm0 # sched: [3:2.00] -; BROADWELL-NEXT: vpsravd (%rdi), %xmm0, %xmm0 # sched: [3:2.00] -; BROADWELL-NEXT: retq # sched: [2:1.00] +; BROADWELL-NEXT: vpsravd (%rdi), %xmm0, %xmm0 # sched: [8:2.00] +; BROADWELL-NEXT: retq # sched: [7:1.00] ; ; SKYLAKE-LABEL: test_psravd: ; SKYLAKE: # BB#0: @@ -5910,8 +5910,8 @@ define <8 x i32> @test_psravd_ymm(<8 x i32> %a0, <8 x i32> %a1, <8 x i32> *%a2) ; BROADWELL-LABEL: test_psravd_ymm: ; BROADWELL: # BB#0: ; BROADWELL-NEXT: vpsravd %ymm1, %ymm0, %ymm0 # sched: [3:2.00] -; BROADWELL-NEXT: vpsravd (%rdi), %ymm0, %ymm0 # sched: [3:2.00] -; BROADWELL-NEXT: retq # sched: [2:1.00] +; BROADWELL-NEXT: vpsravd (%rdi), %ymm0, %ymm0 # sched: [9:2.00] +; BROADWELL-NEXT: retq # sched: [7:1.00] ; ; SKYLAKE-LABEL: test_psravd_ymm: ; SKYLAKE: # BB#0: @@ -5955,9 +5955,9 @@ define <16 x i16> @test_psraw(<16 x i16> %a0, <8 x i16> %a1, <8 x i16> *%a2) { ; BROADWELL-LABEL: test_psraw: ; BROADWELL: # BB#0: ; BROADWELL-NEXT: vpsraw %xmm1, %ymm0, %ymm0 # sched: [4:1.00] -; BROADWELL-NEXT: vpsraw (%rdi), %ymm0, %ymm0 # sched: [1:1.00] +; BROADWELL-NEXT: vpsraw (%rdi), %ymm0, %ymm0 # sched: [7:1.00] ; BROADWELL-NEXT: vpsraw $2, %ymm0, %ymm0 # sched: [1:1.00] -; BROADWELL-NEXT: retq # sched: [2:1.00] +; BROADWELL-NEXT: retq # sched: [7:1.00] ; ; SKYLAKE-LABEL: test_psraw: ; SKYLAKE: # BB#0: @@ -6005,9 +6005,9 @@ define <8 x i32> @test_psrld(<8 x i32> %a0, <4 x i32> %a1, <4 x i32> *%a2) { ; BROADWELL-LABEL: test_psrld: ; BROADWELL: # BB#0: ; BROADWELL-NEXT: vpsrld %xmm1, %ymm0, %ymm0 # sched: [4:1.00] -; BROADWELL-NEXT: vpsrld (%rdi), %ymm0, %ymm0 # sched: [1:1.00] +; BROADWELL-NEXT: vpsrld (%rdi), %ymm0, %ymm0 # sched: [7:1.00] ; BROADWELL-NEXT: vpsrld $2, %ymm0, %ymm0 # sched: [1:1.00] -; BROADWELL-NEXT: retq # sched: [2:1.00] +; BROADWELL-NEXT: retq # sched: [7:1.00] ; ; SKYLAKE-LABEL: test_psrld: ; SKYLAKE: # BB#0: @@ -6051,7 +6051,7 @@ define <32 x i8> @test_psrldq(<32 x i8> %a0) { ; BROADWELL-LABEL: test_psrldq: ; BROADWELL: # BB#0: ; BROADWELL-NEXT: vpsrldq {{.*#+}} ymm0 = ymm0[3,4,5,6,7,8,9,10,11,12,13,14,15],zero,zero,zero,ymm0[19,20,21,22,23,24,25,26,27,28,29,30,31],zero,zero,zero sched: [1:1.00] -; BROADWELL-NEXT: retq # sched: [2:1.00] +; BROADWELL-NEXT: retq # sched: [7:1.00] ; ; SKYLAKE-LABEL: test_psrldq: ; SKYLAKE: # BB#0: @@ -6089,9 +6089,9 @@ define <4 x i64> @test_psrlq(<4 x i64> %a0, <2 x i64> %a1, <2 x i64> *%a2) { ; BROADWELL-LABEL: test_psrlq: ; BROADWELL: # BB#0: ; BROADWELL-NEXT: vpsrlq %xmm1, %ymm0, %ymm0 # sched: [4:1.00] -; BROADWELL-NEXT: vpsrlq (%rdi), %ymm0, %ymm0 # sched: [1:1.00] +; BROADWELL-NEXT: vpsrlq (%rdi), %ymm0, %ymm0 # sched: [7:1.00] ; BROADWELL-NEXT: vpsrlq $2, %ymm0, %ymm0 # sched: [1:1.00] -; BROADWELL-NEXT: retq # sched: [2:1.00] +; BROADWELL-NEXT: retq # sched: [7:1.00] ; ; SKYLAKE-LABEL: test_psrlq: ; SKYLAKE: # BB#0: @@ -6137,8 +6137,8 @@ define <4 x i32> @test_psrlvd(<4 x i32> %a0, <4 x i32> %a1, <4 x i32> *%a2) { ; BROADWELL-LABEL: test_psrlvd: ; BROADWELL: # BB#0: ; BROADWELL-NEXT: vpsrlvd %xmm1, %xmm0, %xmm0 # sched: [3:2.00] -; BROADWELL-NEXT: vpsrlvd (%rdi), %xmm0, %xmm0 # sched: [3:2.00] -; BROADWELL-NEXT: retq # sched: [2:1.00] +; BROADWELL-NEXT: vpsrlvd (%rdi), %xmm0, %xmm0 # sched: [8:2.00] +; BROADWELL-NEXT: retq # sched: [7:1.00] ; ; SKYLAKE-LABEL: test_psrlvd: ; SKYLAKE: # BB#0: @@ -6180,8 +6180,8 @@ define <8 x i32> @test_psrlvd_ymm(<8 x i32> %a0, <8 x i32> %a1, <8 x i32> *%a2) ; BROADWELL-LABEL: test_psrlvd_ymm: ; BROADWELL: # BB#0: ; BROADWELL-NEXT: vpsrlvd %ymm1, %ymm0, %ymm0 # sched: [3:2.00] -; BROADWELL-NEXT: vpsrlvd (%rdi), %ymm0, %ymm0 # sched: [3:2.00] -; BROADWELL-NEXT: retq # sched: [2:1.00] +; BROADWELL-NEXT: vpsrlvd (%rdi), %ymm0, %ymm0 # sched: [9:2.00] +; BROADWELL-NEXT: retq # sched: [7:1.00] ; ; SKYLAKE-LABEL: test_psrlvd_ymm: ; SKYLAKE: # BB#0: @@ -6223,8 +6223,8 @@ define <2 x i64> @test_psrlvq(<2 x i64> %a0, <2 x i64> %a1, <2 x i64> *%a2) { ; BROADWELL-LABEL: test_psrlvq: ; BROADWELL: # BB#0: ; BROADWELL-NEXT: vpsrlvq %xmm1, %xmm0, %xmm0 # sched: [1:1.00] -; BROADWELL-NEXT: vpsrlvq (%rdi), %xmm0, %xmm0 # sched: [1:1.00] -; BROADWELL-NEXT: retq # sched: [2:1.00] +; BROADWELL-NEXT: vpsrlvq (%rdi), %xmm0, %xmm0 # sched: [6:1.00] +; BROADWELL-NEXT: retq # sched: [7:1.00] ; ; SKYLAKE-LABEL: test_psrlvq: ; SKYLAKE: # BB#0: @@ -6266,8 +6266,8 @@ define <4 x i64> @test_psrlvq_ymm(<4 x i64> %a0, <4 x i64> %a1, <4 x i64> *%a2) ; BROADWELL-LABEL: test_psrlvq_ymm: ; BROADWELL: # BB#0: ; BROADWELL-NEXT: vpsrlvq %ymm1, %ymm0, %ymm0 # sched: [1:1.00] -; BROADWELL-NEXT: vpsrlvq (%rdi), %ymm0, %ymm0 # sched: [1:1.00] -; BROADWELL-NEXT: retq # sched: [2:1.00] +; BROADWELL-NEXT: vpsrlvq (%rdi), %ymm0, %ymm0 # sched: [7:1.00] +; BROADWELL-NEXT: retq # sched: [7:1.00] ; ; SKYLAKE-LABEL: test_psrlvq_ymm: ; SKYLAKE: # BB#0: @@ -6311,9 +6311,9 @@ define <16 x i16> @test_psrlw(<16 x i16> %a0, <8 x i16> %a1, <8 x i16> *%a2) { ; BROADWELL-LABEL: test_psrlw: ; BROADWELL: # BB#0: ; BROADWELL-NEXT: vpsrlw %xmm1, %ymm0, %ymm0 # sched: [4:1.00] -; BROADWELL-NEXT: vpsrlw (%rdi), %ymm0, %ymm0 # sched: [1:1.00] +; BROADWELL-NEXT: vpsrlw (%rdi), %ymm0, %ymm0 # sched: [7:1.00] ; BROADWELL-NEXT: vpsrlw $2, %ymm0, %ymm0 # sched: [1:1.00] -; BROADWELL-NEXT: retq # sched: [2:1.00] +; BROADWELL-NEXT: retq # sched: [7:1.00] ; ; SKYLAKE-LABEL: test_psrlw: ; SKYLAKE: # BB#0: @@ -6359,8 +6359,8 @@ define <32 x i8> @test_psubb(<32 x i8> %a0, <32 x i8> %a1, <32 x i8> *%a2) { ; BROADWELL-LABEL: test_psubb: ; BROADWELL: # BB#0: ; BROADWELL-NEXT: vpsubb %ymm1, %ymm0, %ymm0 # sched: [1:0.50] -; BROADWELL-NEXT: vpsubb (%rdi), %ymm0, %ymm0 # sched: [1:0.50] -; BROADWELL-NEXT: retq # sched: [2:1.00] +; BROADWELL-NEXT: vpsubb (%rdi), %ymm0, %ymm0 # sched: [7:0.50] +; BROADWELL-NEXT: retq # sched: [7:1.00] ; ; SKYLAKE-LABEL: test_psubb: ; SKYLAKE: # BB#0: @@ -6401,8 +6401,8 @@ define <8 x i32> @test_psubd(<8 x i32> %a0, <8 x i32> %a1, <8 x i32> *%a2) { ; BROADWELL-LABEL: test_psubd: ; BROADWELL: # BB#0: ; BROADWELL-NEXT: vpsubd %ymm1, %ymm0, %ymm0 # sched: [1:0.50] -; BROADWELL-NEXT: vpsubd (%rdi), %ymm0, %ymm0 # sched: [1:0.50] -; BROADWELL-NEXT: retq # sched: [2:1.00] +; BROADWELL-NEXT: vpsubd (%rdi), %ymm0, %ymm0 # sched: [7:0.50] +; BROADWELL-NEXT: retq # sched: [7:1.00] ; ; SKYLAKE-LABEL: test_psubd: ; SKYLAKE: # BB#0: @@ -6443,8 +6443,8 @@ define <4 x i64> @test_psubq(<4 x i64> %a0, <4 x i64> %a1, <4 x i64> *%a2) { ; BROADWELL-LABEL: test_psubq: ; BROADWELL: # BB#0: ; BROADWELL-NEXT: vpsubq %ymm1, %ymm0, %ymm0 # sched: [1:0.50] -; BROADWELL-NEXT: vpsubq (%rdi), %ymm0, %ymm0 # sched: [1:0.50] -; BROADWELL-NEXT: retq # sched: [2:1.00] +; BROADWELL-NEXT: vpsubq (%rdi), %ymm0, %ymm0 # sched: [7:0.50] +; BROADWELL-NEXT: retq # sched: [7:1.00] ; ; SKYLAKE-LABEL: test_psubq: ; SKYLAKE: # BB#0: @@ -6485,8 +6485,8 @@ define <32 x i8> @test_psubsb(<32 x i8> %a0, <32 x i8> %a1, <32 x i8> *%a2) { ; BROADWELL-LABEL: test_psubsb: ; BROADWELL: # BB#0: ; BROADWELL-NEXT: vpsubsb %ymm1, %ymm0, %ymm0 # sched: [1:0.50] -; BROADWELL-NEXT: vpsubsb (%rdi), %ymm0, %ymm0 # sched: [1:0.50] -; BROADWELL-NEXT: retq # sched: [2:1.00] +; BROADWELL-NEXT: vpsubsb (%rdi), %ymm0, %ymm0 # sched: [7:0.50] +; BROADWELL-NEXT: retq # sched: [7:1.00] ; ; SKYLAKE-LABEL: test_psubsb: ; SKYLAKE: # BB#0: @@ -6528,8 +6528,8 @@ define <16 x i16> @test_psubsw(<16 x i16> %a0, <16 x i16> %a1, <16 x i16> *%a2) ; BROADWELL-LABEL: test_psubsw: ; BROADWELL: # BB#0: ; BROADWELL-NEXT: vpsubsw %ymm1, %ymm0, %ymm0 # sched: [1:0.50] -; BROADWELL-NEXT: vpsubsw (%rdi), %ymm0, %ymm0 # sched: [1:0.50] -; BROADWELL-NEXT: retq # sched: [2:1.00] +; BROADWELL-NEXT: vpsubsw (%rdi), %ymm0, %ymm0 # sched: [7:0.50] +; BROADWELL-NEXT: retq # sched: [7:1.00] ; ; SKYLAKE-LABEL: test_psubsw: ; SKYLAKE: # BB#0: @@ -6571,8 +6571,8 @@ define <32 x i8> @test_psubusb(<32 x i8> %a0, <32 x i8> %a1, <32 x i8> *%a2) { ; BROADWELL-LABEL: test_psubusb: ; BROADWELL: # BB#0: ; BROADWELL-NEXT: vpsubusb %ymm1, %ymm0, %ymm0 # sched: [1:0.50] -; BROADWELL-NEXT: vpsubusb (%rdi), %ymm0, %ymm0 # sched: [1:0.50] -; BROADWELL-NEXT: retq # sched: [2:1.00] +; BROADWELL-NEXT: vpsubusb (%rdi), %ymm0, %ymm0 # sched: [7:0.50] +; BROADWELL-NEXT: retq # sched: [7:1.00] ; ; SKYLAKE-LABEL: test_psubusb: ; SKYLAKE: # BB#0: @@ -6614,8 +6614,8 @@ define <16 x i16> @test_psubusw(<16 x i16> %a0, <16 x i16> %a1, <16 x i16> *%a2) ; BROADWELL-LABEL: test_psubusw: ; BROADWELL: # BB#0: ; BROADWELL-NEXT: vpsubusw %ymm1, %ymm0, %ymm0 # sched: [1:0.50] -; BROADWELL-NEXT: vpsubusw (%rdi), %ymm0, %ymm0 # sched: [1:0.50] -; BROADWELL-NEXT: retq # sched: [2:1.00] +; BROADWELL-NEXT: vpsubusw (%rdi), %ymm0, %ymm0 # sched: [7:0.50] +; BROADWELL-NEXT: retq # sched: [7:1.00] ; ; SKYLAKE-LABEL: test_psubusw: ; SKYLAKE: # BB#0: @@ -6657,8 +6657,8 @@ define <16 x i16> @test_psubw(<16 x i16> %a0, <16 x i16> %a1, <16 x i16> *%a2) { ; BROADWELL-LABEL: test_psubw: ; BROADWELL: # BB#0: ; BROADWELL-NEXT: vpsubw %ymm1, %ymm0, %ymm0 # sched: [1:0.50] -; BROADWELL-NEXT: vpsubw (%rdi), %ymm0, %ymm0 # sched: [1:0.50] -; BROADWELL-NEXT: retq # sched: [2:1.00] +; BROADWELL-NEXT: vpsubw (%rdi), %ymm0, %ymm0 # sched: [7:0.50] +; BROADWELL-NEXT: retq # sched: [7:1.00] ; ; SKYLAKE-LABEL: test_psubw: ; SKYLAKE: # BB#0: @@ -6699,8 +6699,8 @@ define <32 x i8> @test_punpckhbw(<32 x i8> %a0, <32 x i8> %a1, <32 x i8> *%a2) { ; BROADWELL-LABEL: test_punpckhbw: ; BROADWELL: # BB#0: ; BROADWELL-NEXT: vpunpckhbw {{.*#+}} ymm0 = ymm0[8],ymm1[8],ymm0[9],ymm1[9],ymm0[10],ymm1[10],ymm0[11],ymm1[11],ymm0[12],ymm1[12],ymm0[13],ymm1[13],ymm0[14],ymm1[14],ymm0[15],ymm1[15],ymm0[24],ymm1[24],ymm0[25],ymm1[25],ymm0[26],ymm1[26],ymm0[27],ymm1[27],ymm0[28],ymm1[28],ymm0[29],ymm1[29],ymm0[30],ymm1[30],ymm0[31],ymm1[31] sched: [1:1.00] -; BROADWELL-NEXT: vpunpckhbw {{.*#+}} ymm0 = ymm0[8],mem[8],ymm0[9],mem[9],ymm0[10],mem[10],ymm0[11],mem[11],ymm0[12],mem[12],ymm0[13],mem[13],ymm0[14],mem[14],ymm0[15],mem[15],ymm0[24],mem[24],ymm0[25],mem[25],ymm0[26],mem[26],ymm0[27],mem[27],ymm0[28],mem[28],ymm0[29],mem[29],ymm0[30],mem[30],ymm0[31],mem[31] sched: [1:1.00] -; BROADWELL-NEXT: retq # sched: [2:1.00] +; BROADWELL-NEXT: vpunpckhbw {{.*#+}} ymm0 = ymm0[8],mem[8],ymm0[9],mem[9],ymm0[10],mem[10],ymm0[11],mem[11],ymm0[12],mem[12],ymm0[13],mem[13],ymm0[14],mem[14],ymm0[15],mem[15],ymm0[24],mem[24],ymm0[25],mem[25],ymm0[26],mem[26],ymm0[27],mem[27],ymm0[28],mem[28],ymm0[29],mem[29],ymm0[30],mem[30],ymm0[31],mem[31] sched: [7:1.00] +; BROADWELL-NEXT: retq # sched: [7:1.00] ; ; SKYLAKE-LABEL: test_punpckhbw: ; SKYLAKE: # BB#0: @@ -6745,10 +6745,10 @@ define <8 x i32> @test_punpckhdq(<8 x i32> %a0, <8 x i32> %a1, <8 x i32> *%a2) { ; BROADWELL-LABEL: test_punpckhdq: ; BROADWELL: # BB#0: ; BROADWELL-NEXT: vpunpckhdq {{.*#+}} ymm0 = ymm0[2],ymm1[2],ymm0[3],ymm1[3],ymm0[6],ymm1[6],ymm0[7],ymm1[7] sched: [1:1.00] -; BROADWELL-NEXT: vpunpckhdq {{.*#+}} ymm0 = ymm0[2],mem[2],ymm0[3],mem[3],ymm0[6],mem[6],ymm0[7],mem[7] sched: [1:1.00] +; BROADWELL-NEXT: vpunpckhdq {{.*#+}} ymm0 = ymm0[2],mem[2],ymm0[3],mem[3],ymm0[6],mem[6],ymm0[7],mem[7] sched: [7:1.00] ; BROADWELL-NEXT: vpcmpeqd %ymm1, %ymm1, %ymm1 # sched: [1:0.50] ; BROADWELL-NEXT: vpsubd %ymm1, %ymm0, %ymm0 # sched: [1:0.50] -; BROADWELL-NEXT: retq # sched: [2:1.00] +; BROADWELL-NEXT: retq # sched: [7:1.00] ; ; SKYLAKE-LABEL: test_punpckhdq: ; SKYLAKE: # BB#0: @@ -6798,9 +6798,9 @@ define <4 x i64> @test_punpckhqdq(<4 x i64> %a0, <4 x i64> %a1, <4 x i64> *%a2) ; BROADWELL-LABEL: test_punpckhqdq: ; BROADWELL: # BB#0: ; BROADWELL-NEXT: vpunpckhqdq {{.*#+}} ymm1 = ymm0[1],ymm1[1],ymm0[3],ymm1[3] sched: [1:1.00] -; BROADWELL-NEXT: vpunpckhqdq {{.*#+}} ymm0 = ymm0[1],mem[1],ymm0[3],mem[3] sched: [1:1.00] +; BROADWELL-NEXT: vpunpckhqdq {{.*#+}} ymm0 = ymm0[1],mem[1],ymm0[3],mem[3] sched: [7:1.00] ; BROADWELL-NEXT: vpaddq %ymm0, %ymm1, %ymm0 # sched: [1:0.50] -; BROADWELL-NEXT: retq # sched: [2:1.00] +; BROADWELL-NEXT: retq # sched: [7:1.00] ; ; SKYLAKE-LABEL: test_punpckhqdq: ; SKYLAKE: # BB#0: @@ -6845,8 +6845,8 @@ define <16 x i16> @test_punpckhwd(<16 x i16> %a0, <16 x i16> %a1, <16 x i16> *%a ; BROADWELL-LABEL: test_punpckhwd: ; BROADWELL: # BB#0: ; BROADWELL-NEXT: vpunpckhwd {{.*#+}} ymm0 = ymm0[4],ymm1[4],ymm0[5],ymm1[5],ymm0[6],ymm1[6],ymm0[7],ymm1[7],ymm0[12],ymm1[12],ymm0[13],ymm1[13],ymm0[14],ymm1[14],ymm0[15],ymm1[15] sched: [1:1.00] -; BROADWELL-NEXT: vpunpckhwd {{.*#+}} ymm0 = ymm0[4],mem[4],ymm0[5],mem[5],ymm0[6],mem[6],ymm0[7],mem[7],ymm0[12],mem[12],ymm0[13],mem[13],ymm0[14],mem[14],ymm0[15],mem[15] sched: [1:1.00] -; BROADWELL-NEXT: retq # sched: [2:1.00] +; BROADWELL-NEXT: vpunpckhwd {{.*#+}} ymm0 = ymm0[4],mem[4],ymm0[5],mem[5],ymm0[6],mem[6],ymm0[7],mem[7],ymm0[12],mem[12],ymm0[13],mem[13],ymm0[14],mem[14],ymm0[15],mem[15] sched: [7:1.00] +; BROADWELL-NEXT: retq # sched: [7:1.00] ; ; SKYLAKE-LABEL: test_punpckhwd: ; SKYLAKE: # BB#0: @@ -6887,8 +6887,8 @@ define <32 x i8> @test_punpcklbw(<32 x i8> %a0, <32 x i8> %a1, <32 x i8> *%a2) { ; BROADWELL-LABEL: test_punpcklbw: ; BROADWELL: # BB#0: ; BROADWELL-NEXT: vpunpcklbw {{.*#+}} ymm0 = ymm0[0],ymm1[0],ymm0[1],ymm1[1],ymm0[2],ymm1[2],ymm0[3],ymm1[3],ymm0[4],ymm1[4],ymm0[5],ymm1[5],ymm0[6],ymm1[6],ymm0[7],ymm1[7],ymm0[16],ymm1[16],ymm0[17],ymm1[17],ymm0[18],ymm1[18],ymm0[19],ymm1[19],ymm0[20],ymm1[20],ymm0[21],ymm1[21],ymm0[22],ymm1[22],ymm0[23],ymm1[23] sched: [1:1.00] -; BROADWELL-NEXT: vpunpcklbw {{.*#+}} ymm0 = ymm0[0],mem[0],ymm0[1],mem[1],ymm0[2],mem[2],ymm0[3],mem[3],ymm0[4],mem[4],ymm0[5],mem[5],ymm0[6],mem[6],ymm0[7],mem[7],ymm0[16],mem[16],ymm0[17],mem[17],ymm0[18],mem[18],ymm0[19],mem[19],ymm0[20],mem[20],ymm0[21],mem[21],ymm0[22],mem[22],ymm0[23],mem[23] sched: [1:1.00] -; BROADWELL-NEXT: retq # sched: [2:1.00] +; BROADWELL-NEXT: vpunpcklbw {{.*#+}} ymm0 = ymm0[0],mem[0],ymm0[1],mem[1],ymm0[2],mem[2],ymm0[3],mem[3],ymm0[4],mem[4],ymm0[5],mem[5],ymm0[6],mem[6],ymm0[7],mem[7],ymm0[16],mem[16],ymm0[17],mem[17],ymm0[18],mem[18],ymm0[19],mem[19],ymm0[20],mem[20],ymm0[21],mem[21],ymm0[22],mem[22],ymm0[23],mem[23] sched: [7:1.00] +; BROADWELL-NEXT: retq # sched: [7:1.00] ; ; SKYLAKE-LABEL: test_punpcklbw: ; SKYLAKE: # BB#0: @@ -6933,10 +6933,10 @@ define <8 x i32> @test_punpckldq(<8 x i32> %a0, <8 x i32> %a1, <8 x i32> *%a2) { ; BROADWELL-LABEL: test_punpckldq: ; BROADWELL: # BB#0: ; BROADWELL-NEXT: vpunpckldq {{.*#+}} ymm0 = ymm0[0],ymm1[0],ymm0[1],ymm1[1],ymm0[4],ymm1[4],ymm0[5],ymm1[5] sched: [1:1.00] -; BROADWELL-NEXT: vpunpckldq {{.*#+}} ymm0 = ymm0[0],mem[0],ymm0[1],mem[1],ymm0[4],mem[4],ymm0[5],mem[5] sched: [1:1.00] +; BROADWELL-NEXT: vpunpckldq {{.*#+}} ymm0 = ymm0[0],mem[0],ymm0[1],mem[1],ymm0[4],mem[4],ymm0[5],mem[5] sched: [7:1.00] ; BROADWELL-NEXT: vpcmpeqd %ymm1, %ymm1, %ymm1 # sched: [1:0.50] ; BROADWELL-NEXT: vpsubd %ymm1, %ymm0, %ymm0 # sched: [1:0.50] -; BROADWELL-NEXT: retq # sched: [2:1.00] +; BROADWELL-NEXT: retq # sched: [7:1.00] ; ; SKYLAKE-LABEL: test_punpckldq: ; SKYLAKE: # BB#0: @@ -6986,9 +6986,9 @@ define <4 x i64> @test_punpcklqdq(<4 x i64> %a0, <4 x i64> %a1, <4 x i64> *%a2) ; BROADWELL-LABEL: test_punpcklqdq: ; BROADWELL: # BB#0: ; BROADWELL-NEXT: vpunpcklqdq {{.*#+}} ymm1 = ymm0[0],ymm1[0],ymm0[2],ymm1[2] sched: [1:1.00] -; BROADWELL-NEXT: vpunpcklqdq {{.*#+}} ymm0 = ymm0[0],mem[0],ymm0[2],mem[2] sched: [1:1.00] +; BROADWELL-NEXT: vpunpcklqdq {{.*#+}} ymm0 = ymm0[0],mem[0],ymm0[2],mem[2] sched: [7:1.00] ; BROADWELL-NEXT: vpaddq %ymm0, %ymm1, %ymm0 # sched: [1:0.50] -; BROADWELL-NEXT: retq # sched: [2:1.00] +; BROADWELL-NEXT: retq # sched: [7:1.00] ; ; SKYLAKE-LABEL: test_punpcklqdq: ; SKYLAKE: # BB#0: @@ -7033,8 +7033,8 @@ define <16 x i16> @test_punpcklwd(<16 x i16> %a0, <16 x i16> %a1, <16 x i16> *%a ; BROADWELL-LABEL: test_punpcklwd: ; BROADWELL: # BB#0: ; BROADWELL-NEXT: vpunpcklwd {{.*#+}} ymm0 = ymm0[0],ymm1[0],ymm0[1],ymm1[1],ymm0[2],ymm1[2],ymm0[3],ymm1[3],ymm0[8],ymm1[8],ymm0[9],ymm1[9],ymm0[10],ymm1[10],ymm0[11],ymm1[11] sched: [1:1.00] -; BROADWELL-NEXT: vpunpcklwd {{.*#+}} ymm0 = ymm0[0],mem[0],ymm0[1],mem[1],ymm0[2],mem[2],ymm0[3],mem[3],ymm0[8],mem[8],ymm0[9],mem[9],ymm0[10],mem[10],ymm0[11],mem[11] sched: [1:1.00] -; BROADWELL-NEXT: retq # sched: [2:1.00] +; BROADWELL-NEXT: vpunpcklwd {{.*#+}} ymm0 = ymm0[0],mem[0],ymm0[1],mem[1],ymm0[2],mem[2],ymm0[3],mem[3],ymm0[8],mem[8],ymm0[9],mem[9],ymm0[10],mem[10],ymm0[11],mem[11] sched: [7:1.00] +; BROADWELL-NEXT: retq # sched: [7:1.00] ; ; SKYLAKE-LABEL: test_punpcklwd: ; SKYLAKE: # BB#0: @@ -7077,9 +7077,9 @@ define <4 x i64> @test_pxor(<4 x i64> %a0, <4 x i64> %a1, <4 x i64> *%a2) { ; BROADWELL-LABEL: test_pxor: ; BROADWELL: # BB#0: ; BROADWELL-NEXT: vpxor %ymm1, %ymm0, %ymm0 # sched: [1:0.33] -; BROADWELL-NEXT: vpxor (%rdi), %ymm0, %ymm0 # sched: [1:0.50] +; BROADWELL-NEXT: vpxor (%rdi), %ymm0, %ymm0 # sched: [7:0.50] ; BROADWELL-NEXT: vpaddq %ymm1, %ymm0, %ymm0 # sched: [1:0.50] -; BROADWELL-NEXT: retq # sched: [2:1.00] +; BROADWELL-NEXT: retq # sched: [7:1.00] ; ; SKYLAKE-LABEL: test_pxor: ; SKYLAKE: # BB#0: diff --git a/llvm/test/CodeGen/X86/bmi-schedule.ll b/llvm/test/CodeGen/X86/bmi-schedule.ll index c61f5a4fbf6f..2ba1c454f756 100644 --- a/llvm/test/CodeGen/X86/bmi-schedule.ll +++ b/llvm/test/CodeGen/X86/bmi-schedule.ll @@ -30,10 +30,10 @@ define i16 @test_andn_i16(i16 zeroext %a0, i16 zeroext %a1, i16 *%a2) { ; BROADWELL: # BB#0: ; BROADWELL-NEXT: andnl %esi, %edi, %eax # sched: [1:0.50] ; BROADWELL-NEXT: notl %edi # sched: [1:0.25] -; BROADWELL-NEXT: andw (%rdx), %di # sched: [1:0.50] +; BROADWELL-NEXT: andw (%rdx), %di # sched: [6:0.50] ; BROADWELL-NEXT: addl %edi, %eax # sched: [1:0.25] ; BROADWELL-NEXT: # kill: %AX %AX %EAX -; BROADWELL-NEXT: retq # sched: [2:1.00] +; BROADWELL-NEXT: retq # sched: [7:1.00] ; ; SKYLAKE-LABEL: test_andn_i16: ; SKYLAKE: # BB#0: @@ -87,9 +87,9 @@ define i32 @test_andn_i32(i32 %a0, i32 %a1, i32 *%a2) { ; BROADWELL-LABEL: test_andn_i32: ; BROADWELL: # BB#0: ; BROADWELL-NEXT: andnl %esi, %edi, %ecx # sched: [1:0.50] -; BROADWELL-NEXT: andnl (%rdx), %edi, %eax # sched: [1:0.50] +; BROADWELL-NEXT: andnl (%rdx), %edi, %eax # sched: [6:0.50] ; BROADWELL-NEXT: addl %ecx, %eax # sched: [1:0.25] -; BROADWELL-NEXT: retq # sched: [2:1.00] +; BROADWELL-NEXT: retq # sched: [7:1.00] ; ; SKYLAKE-LABEL: test_andn_i32: ; SKYLAKE: # BB#0: @@ -137,9 +137,9 @@ define i64 @test_andn_i64(i64 %a0, i64 %a1, i64 *%a2) { ; BROADWELL-LABEL: test_andn_i64: ; BROADWELL: # BB#0: ; BROADWELL-NEXT: andnq %rsi, %rdi, %rcx # sched: [1:0.50] -; BROADWELL-NEXT: andnq (%rdx), %rdi, %rax # sched: [1:0.50] +; BROADWELL-NEXT: andnq (%rdx), %rdi, %rax # sched: [6:0.50] ; BROADWELL-NEXT: addq %rcx, %rax # sched: [1:0.25] -; BROADWELL-NEXT: retq # sched: [2:1.00] +; BROADWELL-NEXT: retq # sched: [7:1.00] ; ; SKYLAKE-LABEL: test_andn_i64: ; SKYLAKE: # BB#0: @@ -186,10 +186,10 @@ define i32 @test_bextr_i32(i32 %a0, i32 %a1, i32 *%a2) { ; ; BROADWELL-LABEL: test_bextr_i32: ; BROADWELL: # BB#0: -; BROADWELL-NEXT: bextrl %edi, (%rdx), %ecx # sched: [2:0.50] +; BROADWELL-NEXT: bextrl %edi, (%rdx), %ecx # sched: [7:0.50] ; BROADWELL-NEXT: bextrl %edi, %esi, %eax # sched: [2:0.50] ; BROADWELL-NEXT: addl %ecx, %eax # sched: [1:0.25] -; BROADWELL-NEXT: retq # sched: [2:1.00] +; BROADWELL-NEXT: retq # sched: [7:1.00] ; ; SKYLAKE-LABEL: test_bextr_i32: ; SKYLAKE: # BB#0: @@ -236,10 +236,10 @@ define i64 @test_bextr_i64(i64 %a0, i64 %a1, i64 *%a2) { ; ; BROADWELL-LABEL: test_bextr_i64: ; BROADWELL: # BB#0: -; BROADWELL-NEXT: bextrq %rdi, (%rdx), %rcx # sched: [2:0.50] +; BROADWELL-NEXT: bextrq %rdi, (%rdx), %rcx # sched: [7:0.50] ; BROADWELL-NEXT: bextrq %rdi, %rsi, %rax # sched: [2:0.50] ; BROADWELL-NEXT: addq %rcx, %rax # sched: [1:0.25] -; BROADWELL-NEXT: retq # sched: [2:1.00] +; BROADWELL-NEXT: retq # sched: [7:1.00] ; ; SKYLAKE-LABEL: test_bextr_i64: ; SKYLAKE: # BB#0: @@ -286,10 +286,10 @@ define i32 @test_blsi_i32(i32 %a0, i32 *%a1) { ; ; BROADWELL-LABEL: test_blsi_i32: ; BROADWELL: # BB#0: -; BROADWELL-NEXT: blsil (%rsi), %ecx # sched: [1:0.50] +; BROADWELL-NEXT: blsil (%rsi), %ecx # sched: [6:0.50] ; BROADWELL-NEXT: blsil %edi, %eax # sched: [1:0.50] ; BROADWELL-NEXT: addl %ecx, %eax # sched: [1:0.25] -; BROADWELL-NEXT: retq # sched: [2:1.00] +; BROADWELL-NEXT: retq # sched: [7:1.00] ; ; SKYLAKE-LABEL: test_blsi_i32: ; SKYLAKE: # BB#0: @@ -337,10 +337,10 @@ define i64 @test_blsi_i64(i64 %a0, i64 *%a1) { ; ; BROADWELL-LABEL: test_blsi_i64: ; BROADWELL: # BB#0: -; BROADWELL-NEXT: blsiq (%rsi), %rcx # sched: [1:0.50] +; BROADWELL-NEXT: blsiq (%rsi), %rcx # sched: [6:0.50] ; BROADWELL-NEXT: blsiq %rdi, %rax # sched: [1:0.50] ; BROADWELL-NEXT: addq %rcx, %rax # sched: [1:0.25] -; BROADWELL-NEXT: retq # sched: [2:1.00] +; BROADWELL-NEXT: retq # sched: [7:1.00] ; ; SKYLAKE-LABEL: test_blsi_i64: ; SKYLAKE: # BB#0: @@ -388,10 +388,10 @@ define i32 @test_blsmsk_i32(i32 %a0, i32 *%a1) { ; ; BROADWELL-LABEL: test_blsmsk_i32: ; BROADWELL: # BB#0: -; BROADWELL-NEXT: blsmskl (%rsi), %ecx # sched: [1:0.50] +; BROADWELL-NEXT: blsmskl (%rsi), %ecx # sched: [6:0.50] ; BROADWELL-NEXT: blsmskl %edi, %eax # sched: [1:0.50] ; BROADWELL-NEXT: addl %ecx, %eax # sched: [1:0.25] -; BROADWELL-NEXT: retq # sched: [2:1.00] +; BROADWELL-NEXT: retq # sched: [7:1.00] ; ; SKYLAKE-LABEL: test_blsmsk_i32: ; SKYLAKE: # BB#0: @@ -439,10 +439,10 @@ define i64 @test_blsmsk_i64(i64 %a0, i64 *%a1) { ; ; BROADWELL-LABEL: test_blsmsk_i64: ; BROADWELL: # BB#0: -; BROADWELL-NEXT: blsmskq (%rsi), %rcx # sched: [1:0.50] +; BROADWELL-NEXT: blsmskq (%rsi), %rcx # sched: [6:0.50] ; BROADWELL-NEXT: blsmskq %rdi, %rax # sched: [1:0.50] ; BROADWELL-NEXT: addq %rcx, %rax # sched: [1:0.25] -; BROADWELL-NEXT: retq # sched: [2:1.00] +; BROADWELL-NEXT: retq # sched: [7:1.00] ; ; SKYLAKE-LABEL: test_blsmsk_i64: ; SKYLAKE: # BB#0: @@ -490,10 +490,10 @@ define i32 @test_blsr_i32(i32 %a0, i32 *%a1) { ; ; BROADWELL-LABEL: test_blsr_i32: ; BROADWELL: # BB#0: -; BROADWELL-NEXT: blsrl (%rsi), %ecx # sched: [1:0.50] +; BROADWELL-NEXT: blsrl (%rsi), %ecx # sched: [6:0.50] ; BROADWELL-NEXT: blsrl %edi, %eax # sched: [1:0.50] ; BROADWELL-NEXT: addl %ecx, %eax # sched: [1:0.25] -; BROADWELL-NEXT: retq # sched: [2:1.00] +; BROADWELL-NEXT: retq # sched: [7:1.00] ; ; SKYLAKE-LABEL: test_blsr_i32: ; SKYLAKE: # BB#0: @@ -541,10 +541,10 @@ define i64 @test_blsr_i64(i64 %a0, i64 *%a1) { ; ; BROADWELL-LABEL: test_blsr_i64: ; BROADWELL: # BB#0: -; BROADWELL-NEXT: blsrq (%rsi), %rcx # sched: [1:0.50] +; BROADWELL-NEXT: blsrq (%rsi), %rcx # sched: [6:0.50] ; BROADWELL-NEXT: blsrq %rdi, %rax # sched: [1:0.50] ; BROADWELL-NEXT: addq %rcx, %rax # sched: [1:0.25] -; BROADWELL-NEXT: retq # sched: [2:1.00] +; BROADWELL-NEXT: retq # sched: [7:1.00] ; ; SKYLAKE-LABEL: test_blsr_i64: ; SKYLAKE: # BB#0: @@ -594,11 +594,11 @@ define i16 @test_cttz_i16(i16 zeroext %a0, i16 *%a1) { ; ; BROADWELL-LABEL: test_cttz_i16: ; BROADWELL: # BB#0: -; BROADWELL-NEXT: tzcntw (%rsi), %cx # sched: [3:1.00] +; BROADWELL-NEXT: tzcntw (%rsi), %cx # sched: [8:1.00] ; BROADWELL-NEXT: tzcntw %di, %ax # sched: [3:1.00] ; BROADWELL-NEXT: orl %ecx, %eax # sched: [1:0.25] ; BROADWELL-NEXT: # kill: %AX %AX %EAX -; BROADWELL-NEXT: retq # sched: [2:1.00] +; BROADWELL-NEXT: retq # sched: [7:1.00] ; ; SKYLAKE-LABEL: test_cttz_i16: ; SKYLAKE: # BB#0: @@ -648,10 +648,10 @@ define i32 @test_cttz_i32(i32 %a0, i32 *%a1) { ; ; BROADWELL-LABEL: test_cttz_i32: ; BROADWELL: # BB#0: -; BROADWELL-NEXT: tzcntl (%rsi), %ecx # sched: [3:1.00] +; BROADWELL-NEXT: tzcntl (%rsi), %ecx # sched: [8:1.00] ; BROADWELL-NEXT: tzcntl %edi, %eax # sched: [3:1.00] ; BROADWELL-NEXT: orl %ecx, %eax # sched: [1:0.25] -; BROADWELL-NEXT: retq # sched: [2:1.00] +; BROADWELL-NEXT: retq # sched: [7:1.00] ; ; SKYLAKE-LABEL: test_cttz_i32: ; SKYLAKE: # BB#0: @@ -698,10 +698,10 @@ define i64 @test_cttz_i64(i64 %a0, i64 *%a1) { ; ; BROADWELL-LABEL: test_cttz_i64: ; BROADWELL: # BB#0: -; BROADWELL-NEXT: tzcntq (%rsi), %rcx # sched: [3:1.00] +; BROADWELL-NEXT: tzcntq (%rsi), %rcx # sched: [8:1.00] ; BROADWELL-NEXT: tzcntq %rdi, %rax # sched: [3:1.00] ; BROADWELL-NEXT: orq %rcx, %rax # sched: [1:0.25] -; BROADWELL-NEXT: retq # sched: [2:1.00] +; BROADWELL-NEXT: retq # sched: [7:1.00] ; ; SKYLAKE-LABEL: test_cttz_i64: ; SKYLAKE: # BB#0: diff --git a/llvm/test/CodeGen/X86/bmi2-schedule.ll b/llvm/test/CodeGen/X86/bmi2-schedule.ll index f77ace21eca5..f645ddfefdb2 100644 --- a/llvm/test/CodeGen/X86/bmi2-schedule.ll +++ b/llvm/test/CodeGen/X86/bmi2-schedule.ll @@ -23,10 +23,10 @@ define i32 @test_bzhi_i32(i32 %a0, i32 %a1, i32 *%a2) { ; ; BROADWELL-LABEL: test_bzhi_i32: ; BROADWELL: # BB#0: -; BROADWELL-NEXT: bzhil %edi, (%rdx), %ecx # sched: [1:0.50] +; BROADWELL-NEXT: bzhil %edi, (%rdx), %ecx # sched: [6:0.50] ; BROADWELL-NEXT: bzhil %edi, %esi, %eax # sched: [1:0.50] ; BROADWELL-NEXT: addl %ecx, %eax # sched: [1:0.25] -; BROADWELL-NEXT: retq # sched: [2:1.00] +; BROADWELL-NEXT: retq # sched: [7:1.00] ; ; SKYLAKE-LABEL: test_bzhi_i32: ; SKYLAKE: # BB#0: @@ -73,10 +73,10 @@ define i64 @test_bzhi_i64(i64 %a0, i64 %a1, i64 *%a2) { ; ; BROADWELL-LABEL: test_bzhi_i64: ; BROADWELL: # BB#0: -; BROADWELL-NEXT: bzhiq %rdi, (%rdx), %rcx # sched: [1:0.50] +; BROADWELL-NEXT: bzhiq %rdi, (%rdx), %rcx # sched: [6:0.50] ; BROADWELL-NEXT: bzhiq %rdi, %rsi, %rax # sched: [1:0.50] ; BROADWELL-NEXT: addq %rcx, %rax # sched: [1:0.25] -; BROADWELL-NEXT: retq # sched: [2:1.00] +; BROADWELL-NEXT: retq # sched: [7:1.00] ; ; SKYLAKE-LABEL: test_bzhi_i64: ; SKYLAKE: # BB#0: @@ -132,9 +132,9 @@ define i64 @test_mulx_i64(i64 %a0, i64 %a1, i64 *%a2) { ; BROADWELL-NEXT: movq %rdx, %rax # sched: [1:0.25] ; BROADWELL-NEXT: movq %rdi, %rdx # sched: [1:0.25] ; BROADWELL-NEXT: mulxq %rsi, %rsi, %rcx # sched: [4:1.00] -; BROADWELL-NEXT: mulxq (%rax), %rdx, %rax # sched: [4:1.00] +; BROADWELL-NEXT: mulxq (%rax), %rdx, %rax # sched: [9:1.00] ; BROADWELL-NEXT: orq %rcx, %rax # sched: [1:0.25] -; BROADWELL-NEXT: retq # sched: [2:1.00] +; BROADWELL-NEXT: retq # sched: [7:1.00] ; ; SKYLAKE-LABEL: test_mulx_i64: ; SKYLAKE: # BB#0: @@ -193,10 +193,10 @@ define i32 @test_pdep_i32(i32 %a0, i32 %a1, i32 *%a2) { ; ; BROADWELL-LABEL: test_pdep_i32: ; BROADWELL: # BB#0: -; BROADWELL-NEXT: pdepl (%rdx), %edi, %ecx # sched: [3:1.00] +; BROADWELL-NEXT: pdepl (%rdx), %edi, %ecx # sched: [8:1.00] ; BROADWELL-NEXT: pdepl %esi, %edi, %eax # sched: [3:1.00] ; BROADWELL-NEXT: addl %ecx, %eax # sched: [1:0.25] -; BROADWELL-NEXT: retq # sched: [2:1.00] +; BROADWELL-NEXT: retq # sched: [7:1.00] ; ; SKYLAKE-LABEL: test_pdep_i32: ; SKYLAKE: # BB#0: @@ -243,10 +243,10 @@ define i64 @test_pdep_i64(i64 %a0, i64 %a1, i64 *%a2) { ; ; BROADWELL-LABEL: test_pdep_i64: ; BROADWELL: # BB#0: -; BROADWELL-NEXT: pdepq (%rdx), %rdi, %rcx # sched: [3:1.00] +; BROADWELL-NEXT: pdepq (%rdx), %rdi, %rcx # sched: [8:1.00] ; BROADWELL-NEXT: pdepq %rsi, %rdi, %rax # sched: [3:1.00] ; BROADWELL-NEXT: addq %rcx, %rax # sched: [1:0.25] -; BROADWELL-NEXT: retq # sched: [2:1.00] +; BROADWELL-NEXT: retq # sched: [7:1.00] ; ; SKYLAKE-LABEL: test_pdep_i64: ; SKYLAKE: # BB#0: @@ -293,10 +293,10 @@ define i32 @test_pext_i32(i32 %a0, i32 %a1, i32 *%a2) { ; ; BROADWELL-LABEL: test_pext_i32: ; BROADWELL: # BB#0: -; BROADWELL-NEXT: pextl (%rdx), %edi, %ecx # sched: [3:1.00] +; BROADWELL-NEXT: pextl (%rdx), %edi, %ecx # sched: [8:1.00] ; BROADWELL-NEXT: pextl %esi, %edi, %eax # sched: [3:1.00] ; BROADWELL-NEXT: addl %ecx, %eax # sched: [1:0.25] -; BROADWELL-NEXT: retq # sched: [2:1.00] +; BROADWELL-NEXT: retq # sched: [7:1.00] ; ; SKYLAKE-LABEL: test_pext_i32: ; SKYLAKE: # BB#0: @@ -343,10 +343,10 @@ define i64 @test_pext_i64(i64 %a0, i64 %a1, i64 *%a2) { ; ; BROADWELL-LABEL: test_pext_i64: ; BROADWELL: # BB#0: -; BROADWELL-NEXT: pextq (%rdx), %rdi, %rcx # sched: [3:1.00] +; BROADWELL-NEXT: pextq (%rdx), %rdi, %rcx # sched: [8:1.00] ; BROADWELL-NEXT: pextq %rsi, %rdi, %rax # sched: [3:1.00] ; BROADWELL-NEXT: addq %rcx, %rax # sched: [1:0.25] -; BROADWELL-NEXT: retq # sched: [2:1.00] +; BROADWELL-NEXT: retq # sched: [7:1.00] ; ; SKYLAKE-LABEL: test_pext_i64: ; SKYLAKE: # BB#0: @@ -394,9 +394,9 @@ define i32 @test_rorx_i32(i32 %a0, i32 %a1, i32 *%a2) { ; BROADWELL-LABEL: test_rorx_i32: ; BROADWELL: # BB#0: ; BROADWELL-NEXT: rorxl $5, %edi, %ecx # sched: [1:0.50] -; BROADWELL-NEXT: rorxl $5, (%rdx), %eax # sched: [1:0.50] +; BROADWELL-NEXT: rorxl $5, (%rdx), %eax # sched: [6:0.50] ; BROADWELL-NEXT: addl %ecx, %eax # sched: [1:0.25] -; BROADWELL-NEXT: retq # sched: [2:1.00] +; BROADWELL-NEXT: retq # sched: [7:1.00] ; ; SKYLAKE-LABEL: test_rorx_i32: ; SKYLAKE: # BB#0: @@ -447,9 +447,9 @@ define i64 @test_rorx_i64(i64 %a0, i64 %a1, i64 *%a2) { ; BROADWELL-LABEL: test_rorx_i64: ; BROADWELL: # BB#0: ; BROADWELL-NEXT: rorxq $5, %rdi, %rcx # sched: [1:0.50] -; BROADWELL-NEXT: rorxq $5, (%rdx), %rax # sched: [1:0.50] +; BROADWELL-NEXT: rorxq $5, (%rdx), %rax # sched: [6:0.50] ; BROADWELL-NEXT: addq %rcx, %rax # sched: [1:0.25] -; BROADWELL-NEXT: retq # sched: [2:1.00] +; BROADWELL-NEXT: retq # sched: [7:1.00] ; ; SKYLAKE-LABEL: test_rorx_i64: ; SKYLAKE: # BB#0: @@ -500,9 +500,9 @@ define i32 @test_sarx_i32(i32 %a0, i32 %a1, i32 *%a2) { ; BROADWELL-LABEL: test_sarx_i32: ; BROADWELL: # BB#0: ; BROADWELL-NEXT: sarxl %esi, %edi, %ecx # sched: [1:0.50] -; BROADWELL-NEXT: sarxl %esi, (%rdx), %eax # sched: [1:0.50] +; BROADWELL-NEXT: sarxl %esi, (%rdx), %eax # sched: [6:0.50] ; BROADWELL-NEXT: addl %ecx, %eax # sched: [1:0.25] -; BROADWELL-NEXT: retq # sched: [2:1.00] +; BROADWELL-NEXT: retq # sched: [7:1.00] ; ; SKYLAKE-LABEL: test_sarx_i32: ; SKYLAKE: # BB#0: @@ -549,9 +549,9 @@ define i64 @test_sarx_i64(i64 %a0, i64 %a1, i64 *%a2) { ; BROADWELL-LABEL: test_sarx_i64: ; BROADWELL: # BB#0: ; BROADWELL-NEXT: sarxq %rsi, %rdi, %rcx # sched: [1:0.50] -; BROADWELL-NEXT: sarxq %rsi, (%rdx), %rax # sched: [1:0.50] +; BROADWELL-NEXT: sarxq %rsi, (%rdx), %rax # sched: [6:0.50] ; BROADWELL-NEXT: addq %rcx, %rax # sched: [1:0.25] -; BROADWELL-NEXT: retq # sched: [2:1.00] +; BROADWELL-NEXT: retq # sched: [7:1.00] ; ; SKYLAKE-LABEL: test_sarx_i64: ; SKYLAKE: # BB#0: @@ -598,9 +598,9 @@ define i32 @test_shlx_i32(i32 %a0, i32 %a1, i32 *%a2) { ; BROADWELL-LABEL: test_shlx_i32: ; BROADWELL: # BB#0: ; BROADWELL-NEXT: shlxl %esi, %edi, %ecx # sched: [1:0.50] -; BROADWELL-NEXT: shlxl %esi, (%rdx), %eax # sched: [1:0.50] +; BROADWELL-NEXT: shlxl %esi, (%rdx), %eax # sched: [6:0.50] ; BROADWELL-NEXT: addl %ecx, %eax # sched: [1:0.25] -; BROADWELL-NEXT: retq # sched: [2:1.00] +; BROADWELL-NEXT: retq # sched: [7:1.00] ; ; SKYLAKE-LABEL: test_shlx_i32: ; SKYLAKE: # BB#0: @@ -647,9 +647,9 @@ define i64 @test_shlx_i64(i64 %a0, i64 %a1, i64 *%a2) { ; BROADWELL-LABEL: test_shlx_i64: ; BROADWELL: # BB#0: ; BROADWELL-NEXT: shlxq %rsi, %rdi, %rcx # sched: [1:0.50] -; BROADWELL-NEXT: shlxq %rsi, (%rdx), %rax # sched: [1:0.50] +; BROADWELL-NEXT: shlxq %rsi, (%rdx), %rax # sched: [6:0.50] ; BROADWELL-NEXT: addq %rcx, %rax # sched: [1:0.25] -; BROADWELL-NEXT: retq # sched: [2:1.00] +; BROADWELL-NEXT: retq # sched: [7:1.00] ; ; SKYLAKE-LABEL: test_shlx_i64: ; SKYLAKE: # BB#0: @@ -696,9 +696,9 @@ define i32 @test_shrx_i32(i32 %a0, i32 %a1, i32 *%a2) { ; BROADWELL-LABEL: test_shrx_i32: ; BROADWELL: # BB#0: ; BROADWELL-NEXT: shrxl %esi, %edi, %ecx # sched: [1:0.50] -; BROADWELL-NEXT: shrxl %esi, (%rdx), %eax # sched: [1:0.50] +; BROADWELL-NEXT: shrxl %esi, (%rdx), %eax # sched: [6:0.50] ; BROADWELL-NEXT: addl %ecx, %eax # sched: [1:0.25] -; BROADWELL-NEXT: retq # sched: [2:1.00] +; BROADWELL-NEXT: retq # sched: [7:1.00] ; ; SKYLAKE-LABEL: test_shrx_i32: ; SKYLAKE: # BB#0: @@ -745,9 +745,9 @@ define i64 @test_shrx_i64(i64 %a0, i64 %a1, i64 *%a2) { ; BROADWELL-LABEL: test_shrx_i64: ; BROADWELL: # BB#0: ; BROADWELL-NEXT: shrxq %rsi, %rdi, %rcx # sched: [1:0.50] -; BROADWELL-NEXT: shrxq %rsi, (%rdx), %rax # sched: [1:0.50] +; BROADWELL-NEXT: shrxq %rsi, (%rdx), %rax # sched: [6:0.50] ; BROADWELL-NEXT: addq %rcx, %rax # sched: [1:0.25] -; BROADWELL-NEXT: retq # sched: [2:1.00] +; BROADWELL-NEXT: retq # sched: [7:1.00] ; ; SKYLAKE-LABEL: test_shrx_i64: ; SKYLAKE: # BB#0: diff --git a/llvm/test/CodeGen/X86/f16c-schedule.ll b/llvm/test/CodeGen/X86/f16c-schedule.ll index 88ec56e211bd..1d0236e44303 100644 --- a/llvm/test/CodeGen/X86/f16c-schedule.ll +++ b/llvm/test/CodeGen/X86/f16c-schedule.ll @@ -31,10 +31,10 @@ define <4 x float> @test_vcvtph2ps_128(<8 x i16> %a0, <8 x i16> *%a1) { ; ; BROADWELL-LABEL: test_vcvtph2ps_128: ; BROADWELL: # BB#0: -; BROADWELL-NEXT: vcvtph2ps (%rdi), %xmm1 # sched: [1:1.00] +; BROADWELL-NEXT: vcvtph2ps (%rdi), %xmm1 # sched: [6:1.00] ; BROADWELL-NEXT: vcvtph2ps %xmm0, %xmm0 # sched: [2:1.00] ; BROADWELL-NEXT: vaddps %xmm0, %xmm1, %xmm0 # sched: [3:1.00] -; BROADWELL-NEXT: retq # sched: [2:1.00] +; BROADWELL-NEXT: retq # sched: [7:1.00] ; ; SKYLAKE-LABEL: test_vcvtph2ps_128: ; SKYLAKE: # BB#0: @@ -88,10 +88,10 @@ define <8 x float> @test_vcvtph2ps_256(<8 x i16> %a0, <8 x i16> *%a1) { ; ; BROADWELL-LABEL: test_vcvtph2ps_256: ; BROADWELL: # BB#0: -; BROADWELL-NEXT: vcvtph2ps (%rdi), %ymm1 # sched: [1:1.00] +; BROADWELL-NEXT: vcvtph2ps (%rdi), %ymm1 # sched: [6:1.00] ; BROADWELL-NEXT: vcvtph2ps %xmm0, %ymm0 # sched: [2:1.00] ; BROADWELL-NEXT: vaddps %ymm0, %ymm1, %ymm0 # sched: [3:1.00] -; BROADWELL-NEXT: retq # sched: [2:1.00] +; BROADWELL-NEXT: retq # sched: [7:1.00] ; ; SKYLAKE-LABEL: test_vcvtph2ps_256: ; SKYLAKE: # BB#0: @@ -144,7 +144,7 @@ define <8 x i16> @test_vcvtps2ph_128(<4 x float> %a0, <4 x float> %a1, <4 x i16> ; BROADWELL: # BB#0: ; BROADWELL-NEXT: vcvtps2ph $0, %xmm0, %xmm0 # sched: [4:1.00] ; BROADWELL-NEXT: vcvtps2ph $0, %xmm1, (%rdi) # sched: [4:1.00] -; BROADWELL-NEXT: retq # sched: [2:1.00] +; BROADWELL-NEXT: retq # sched: [7:1.00] ; ; SKYLAKE-LABEL: test_vcvtps2ph_128: ; SKYLAKE: # BB#0: @@ -196,9 +196,9 @@ define <8 x i16> @test_vcvtps2ph_256(<8 x float> %a0, <8 x float> %a1, <8 x i16> ; BROADWELL-LABEL: test_vcvtps2ph_256: ; BROADWELL: # BB#0: ; BROADWELL-NEXT: vcvtps2ph $0, %ymm0, %xmm0 # sched: [6:1.00] -; BROADWELL-NEXT: vcvtps2ph $0, %ymm1, (%rdi) # sched: [6:1.00] +; BROADWELL-NEXT: vcvtps2ph $0, %ymm1, (%rdi) # sched: [4:1.00] ; BROADWELL-NEXT: vzeroupper # sched: [4:1.00] -; BROADWELL-NEXT: retq # sched: [2:1.00] +; BROADWELL-NEXT: retq # sched: [7:1.00] ; ; SKYLAKE-LABEL: test_vcvtps2ph_256: ; SKYLAKE: # BB#0: diff --git a/llvm/test/CodeGen/X86/fma-schedule.ll b/llvm/test/CodeGen/X86/fma-schedule.ll index b22d04233c10..121807a697e7 100644 --- a/llvm/test/CodeGen/X86/fma-schedule.ll +++ b/llvm/test/CodeGen/X86/fma-schedule.ll @@ -31,8 +31,8 @@ define <2 x double> @test_vfmadd213pd(<2 x double> %a0, <2 x double> %a1, <2 x d ; BROADWELL-LABEL: test_vfmadd213pd: ; BROADWELL: # BB#0: ; BROADWELL-NEXT: vfmadd213pd %xmm2, %xmm1, %xmm0 # sched: [5:0.50] -; BROADWELL-NEXT: vfmadd213pd (%rdi), %xmm1, %xmm0 # sched: [5:0.50] -; BROADWELL-NEXT: retq # sched: [2:1.00] +; BROADWELL-NEXT: vfmadd213pd (%rdi), %xmm1, %xmm0 # sched: [10:0.50] +; BROADWELL-NEXT: retq # sched: [7:1.00] ; ; SKYLAKE-LABEL: test_vfmadd213pd: ; SKYLAKE: # BB#0: @@ -79,8 +79,8 @@ define <4 x double> @test_vfmadd213pd_ymm(<4 x double> %a0, <4 x double> %a1, <4 ; BROADWELL-LABEL: test_vfmadd213pd_ymm: ; BROADWELL: # BB#0: ; BROADWELL-NEXT: vfmadd213pd %ymm2, %ymm1, %ymm0 # sched: [5:0.50] -; BROADWELL-NEXT: vfmadd213pd (%rdi), %ymm1, %ymm0 # sched: [5:0.50] -; BROADWELL-NEXT: retq # sched: [2:1.00] +; BROADWELL-NEXT: vfmadd213pd (%rdi), %ymm1, %ymm0 # sched: [11:0.50] +; BROADWELL-NEXT: retq # sched: [7:1.00] ; ; SKYLAKE-LABEL: test_vfmadd213pd_ymm: ; SKYLAKE: # BB#0: @@ -127,8 +127,8 @@ define <4 x float> @test_vfmadd213ps(<4 x float> %a0, <4 x float> %a1, <4 x floa ; BROADWELL-LABEL: test_vfmadd213ps: ; BROADWELL: # BB#0: ; BROADWELL-NEXT: vfmadd213ps %xmm2, %xmm1, %xmm0 # sched: [5:0.50] -; BROADWELL-NEXT: vfmadd213ps (%rdi), %xmm1, %xmm0 # sched: [5:0.50] -; BROADWELL-NEXT: retq # sched: [2:1.00] +; BROADWELL-NEXT: vfmadd213ps (%rdi), %xmm1, %xmm0 # sched: [10:0.50] +; BROADWELL-NEXT: retq # sched: [7:1.00] ; ; SKYLAKE-LABEL: test_vfmadd213ps: ; SKYLAKE: # BB#0: @@ -175,8 +175,8 @@ define <8 x float> @test_vfmadd213ps_ymm(<8 x float> %a0, <8 x float> %a1, <8 x ; BROADWELL-LABEL: test_vfmadd213ps_ymm: ; BROADWELL: # BB#0: ; BROADWELL-NEXT: vfmadd213ps %ymm2, %ymm1, %ymm0 # sched: [5:0.50] -; BROADWELL-NEXT: vfmadd213ps (%rdi), %ymm1, %ymm0 # sched: [5:0.50] -; BROADWELL-NEXT: retq # sched: [2:1.00] +; BROADWELL-NEXT: vfmadd213ps (%rdi), %ymm1, %ymm0 # sched: [11:0.50] +; BROADWELL-NEXT: retq # sched: [7:1.00] ; ; SKYLAKE-LABEL: test_vfmadd213ps_ymm: ; SKYLAKE: # BB#0: @@ -223,8 +223,8 @@ define <2 x double> @test_vfmadd213sd(<2 x double> %a0, <2 x double> %a1, <2 x d ; BROADWELL-LABEL: test_vfmadd213sd: ; BROADWELL: # BB#0: ; BROADWELL-NEXT: vfmadd213sd %xmm2, %xmm1, %xmm0 # sched: [5:0.50] -; BROADWELL-NEXT: vfmadd213sd (%rdi), %xmm1, %xmm0 # sched: [5:0.50] -; BROADWELL-NEXT: retq # sched: [2:1.00] +; BROADWELL-NEXT: vfmadd213sd (%rdi), %xmm1, %xmm0 # sched: [10:0.50] +; BROADWELL-NEXT: retq # sched: [7:1.00] ; ; SKYLAKE-LABEL: test_vfmadd213sd: ; SKYLAKE: # BB#0: @@ -271,8 +271,8 @@ define <4 x float> @test_vfmadd213ss(<4 x float> %a0, <4 x float> %a1, <4 x floa ; BROADWELL-LABEL: test_vfmadd213ss: ; BROADWELL: # BB#0: ; BROADWELL-NEXT: vfmadd213ss %xmm2, %xmm1, %xmm0 # sched: [5:0.50] -; BROADWELL-NEXT: vfmadd213ss (%rdi), %xmm1, %xmm0 # sched: [5:0.50] -; BROADWELL-NEXT: retq # sched: [2:1.00] +; BROADWELL-NEXT: vfmadd213ss (%rdi), %xmm1, %xmm0 # sched: [10:0.50] +; BROADWELL-NEXT: retq # sched: [7:1.00] ; ; SKYLAKE-LABEL: test_vfmadd213ss: ; SKYLAKE: # BB#0: @@ -331,8 +331,8 @@ define <2 x double> @test_vfmaddsubpd(<2 x double> %a0, <2 x double> %a1, <2 x d ; BROADWELL-LABEL: test_vfmaddsubpd: ; BROADWELL: # BB#0: ; BROADWELL-NEXT: vfmaddsub213pd %xmm2, %xmm1, %xmm0 # sched: [5:0.50] -; BROADWELL-NEXT: vfmaddsub213pd (%rdi), %xmm1, %xmm0 # sched: [5:0.50] -; BROADWELL-NEXT: retq # sched: [2:1.00] +; BROADWELL-NEXT: vfmaddsub213pd (%rdi), %xmm1, %xmm0 # sched: [10:0.50] +; BROADWELL-NEXT: retq # sched: [7:1.00] ; ; SKYLAKE-LABEL: test_vfmaddsubpd: ; SKYLAKE: # BB#0: @@ -379,8 +379,8 @@ define <4 x double> @test_vfmaddsubpd_ymm(<4 x double> %a0, <4 x double> %a1, <4 ; BROADWELL-LABEL: test_vfmaddsubpd_ymm: ; BROADWELL: # BB#0: ; BROADWELL-NEXT: vfmaddsub213pd %ymm2, %ymm1, %ymm0 # sched: [5:0.50] -; BROADWELL-NEXT: vfmaddsub213pd (%rdi), %ymm1, %ymm0 # sched: [5:0.50] -; BROADWELL-NEXT: retq # sched: [2:1.00] +; BROADWELL-NEXT: vfmaddsub213pd (%rdi), %ymm1, %ymm0 # sched: [11:0.50] +; BROADWELL-NEXT: retq # sched: [7:1.00] ; ; SKYLAKE-LABEL: test_vfmaddsubpd_ymm: ; SKYLAKE: # BB#0: @@ -427,8 +427,8 @@ define <4 x float> @test_vfmaddsubps(<4 x float> %a0, <4 x float> %a1, <4 x floa ; BROADWELL-LABEL: test_vfmaddsubps: ; BROADWELL: # BB#0: ; BROADWELL-NEXT: vfmaddsub213ps %xmm2, %xmm1, %xmm0 # sched: [5:0.50] -; BROADWELL-NEXT: vfmaddsub213ps (%rdi), %xmm1, %xmm0 # sched: [5:0.50] -; BROADWELL-NEXT: retq # sched: [2:1.00] +; BROADWELL-NEXT: vfmaddsub213ps (%rdi), %xmm1, %xmm0 # sched: [10:0.50] +; BROADWELL-NEXT: retq # sched: [7:1.00] ; ; SKYLAKE-LABEL: test_vfmaddsubps: ; SKYLAKE: # BB#0: @@ -475,8 +475,8 @@ define <8 x float> @test_vfmaddsubps_ymm(<8 x float> %a0, <8 x float> %a1, <8 x ; BROADWELL-LABEL: test_vfmaddsubps_ymm: ; BROADWELL: # BB#0: ; BROADWELL-NEXT: vfmaddsub213ps %ymm2, %ymm1, %ymm0 # sched: [5:0.50] -; BROADWELL-NEXT: vfmaddsub213ps (%rdi), %ymm1, %ymm0 # sched: [5:0.50] -; BROADWELL-NEXT: retq # sched: [2:1.00] +; BROADWELL-NEXT: vfmaddsub213ps (%rdi), %ymm1, %ymm0 # sched: [11:0.50] +; BROADWELL-NEXT: retq # sched: [7:1.00] ; ; SKYLAKE-LABEL: test_vfmaddsubps_ymm: ; SKYLAKE: # BB#0: @@ -535,8 +535,8 @@ define <2 x double> @test_vfmsubaddpd(<2 x double> %a0, <2 x double> %a1, <2 x d ; BROADWELL-LABEL: test_vfmsubaddpd: ; BROADWELL: # BB#0: ; BROADWELL-NEXT: vfmsubadd213pd %xmm2, %xmm1, %xmm0 # sched: [5:0.50] -; BROADWELL-NEXT: vfmsubadd213pd (%rdi), %xmm1, %xmm0 # sched: [5:0.50] -; BROADWELL-NEXT: retq # sched: [2:1.00] +; BROADWELL-NEXT: vfmsubadd213pd (%rdi), %xmm1, %xmm0 # sched: [10:0.50] +; BROADWELL-NEXT: retq # sched: [7:1.00] ; ; SKYLAKE-LABEL: test_vfmsubaddpd: ; SKYLAKE: # BB#0: @@ -583,8 +583,8 @@ define <4 x double> @test_vfmsubaddpd_ymm(<4 x double> %a0, <4 x double> %a1, <4 ; BROADWELL-LABEL: test_vfmsubaddpd_ymm: ; BROADWELL: # BB#0: ; BROADWELL-NEXT: vfmsubadd213pd %ymm2, %ymm1, %ymm0 # sched: [5:0.50] -; BROADWELL-NEXT: vfmsubadd213pd (%rdi), %ymm1, %ymm0 # sched: [5:0.50] -; BROADWELL-NEXT: retq # sched: [2:1.00] +; BROADWELL-NEXT: vfmsubadd213pd (%rdi), %ymm1, %ymm0 # sched: [11:0.50] +; BROADWELL-NEXT: retq # sched: [7:1.00] ; ; SKYLAKE-LABEL: test_vfmsubaddpd_ymm: ; SKYLAKE: # BB#0: @@ -631,8 +631,8 @@ define <4 x float> @test_vfmsubaddps(<4 x float> %a0, <4 x float> %a1, <4 x floa ; BROADWELL-LABEL: test_vfmsubaddps: ; BROADWELL: # BB#0: ; BROADWELL-NEXT: vfmsubadd213ps %xmm2, %xmm1, %xmm0 # sched: [5:0.50] -; BROADWELL-NEXT: vfmsubadd213ps (%rdi), %xmm1, %xmm0 # sched: [5:0.50] -; BROADWELL-NEXT: retq # sched: [2:1.00] +; BROADWELL-NEXT: vfmsubadd213ps (%rdi), %xmm1, %xmm0 # sched: [10:0.50] +; BROADWELL-NEXT: retq # sched: [7:1.00] ; ; SKYLAKE-LABEL: test_vfmsubaddps: ; SKYLAKE: # BB#0: @@ -679,8 +679,8 @@ define <8 x float> @test_vfmsubaddps_ymm(<8 x float> %a0, <8 x float> %a1, <8 x ; BROADWELL-LABEL: test_vfmsubaddps_ymm: ; BROADWELL: # BB#0: ; BROADWELL-NEXT: vfmsubadd213ps %ymm2, %ymm1, %ymm0 # sched: [5:0.50] -; BROADWELL-NEXT: vfmsubadd213ps (%rdi), %ymm1, %ymm0 # sched: [5:0.50] -; BROADWELL-NEXT: retq # sched: [2:1.00] +; BROADWELL-NEXT: vfmsubadd213ps (%rdi), %ymm1, %ymm0 # sched: [11:0.50] +; BROADWELL-NEXT: retq # sched: [7:1.00] ; ; SKYLAKE-LABEL: test_vfmsubaddps_ymm: ; SKYLAKE: # BB#0: @@ -739,8 +739,8 @@ define <2 x double> @test_vfmsub213pd(<2 x double> %a0, <2 x double> %a1, <2 x d ; BROADWELL-LABEL: test_vfmsub213pd: ; BROADWELL: # BB#0: ; BROADWELL-NEXT: vfmsub213pd %xmm2, %xmm1, %xmm0 # sched: [5:0.50] -; BROADWELL-NEXT: vfmsub213pd (%rdi), %xmm1, %xmm0 # sched: [5:0.50] -; BROADWELL-NEXT: retq # sched: [2:1.00] +; BROADWELL-NEXT: vfmsub213pd (%rdi), %xmm1, %xmm0 # sched: [10:0.50] +; BROADWELL-NEXT: retq # sched: [7:1.00] ; ; SKYLAKE-LABEL: test_vfmsub213pd: ; SKYLAKE: # BB#0: @@ -787,8 +787,8 @@ define <4 x double> @test_vfmsub213pd_ymm(<4 x double> %a0, <4 x double> %a1, <4 ; BROADWELL-LABEL: test_vfmsub213pd_ymm: ; BROADWELL: # BB#0: ; BROADWELL-NEXT: vfmsub213pd %ymm2, %ymm1, %ymm0 # sched: [5:0.50] -; BROADWELL-NEXT: vfmsub213pd (%rdi), %ymm1, %ymm0 # sched: [5:0.50] -; BROADWELL-NEXT: retq # sched: [2:1.00] +; BROADWELL-NEXT: vfmsub213pd (%rdi), %ymm1, %ymm0 # sched: [11:0.50] +; BROADWELL-NEXT: retq # sched: [7:1.00] ; ; SKYLAKE-LABEL: test_vfmsub213pd_ymm: ; SKYLAKE: # BB#0: @@ -835,8 +835,8 @@ define <4 x float> @test_vfmsub213ps(<4 x float> %a0, <4 x float> %a1, <4 x floa ; BROADWELL-LABEL: test_vfmsub213ps: ; BROADWELL: # BB#0: ; BROADWELL-NEXT: vfmsub213ps %xmm2, %xmm1, %xmm0 # sched: [5:0.50] -; BROADWELL-NEXT: vfmsub213ps (%rdi), %xmm1, %xmm0 # sched: [5:0.50] -; BROADWELL-NEXT: retq # sched: [2:1.00] +; BROADWELL-NEXT: vfmsub213ps (%rdi), %xmm1, %xmm0 # sched: [10:0.50] +; BROADWELL-NEXT: retq # sched: [7:1.00] ; ; SKYLAKE-LABEL: test_vfmsub213ps: ; SKYLAKE: # BB#0: @@ -883,8 +883,8 @@ define <8 x float> @test_vfmsub213ps_ymm(<8 x float> %a0, <8 x float> %a1, <8 x ; BROADWELL-LABEL: test_vfmsub213ps_ymm: ; BROADWELL: # BB#0: ; BROADWELL-NEXT: vfmsub213ps %ymm2, %ymm1, %ymm0 # sched: [5:0.50] -; BROADWELL-NEXT: vfmsub213ps (%rdi), %ymm1, %ymm0 # sched: [5:0.50] -; BROADWELL-NEXT: retq # sched: [2:1.00] +; BROADWELL-NEXT: vfmsub213ps (%rdi), %ymm1, %ymm0 # sched: [11:0.50] +; BROADWELL-NEXT: retq # sched: [7:1.00] ; ; SKYLAKE-LABEL: test_vfmsub213ps_ymm: ; SKYLAKE: # BB#0: @@ -931,8 +931,8 @@ define <2 x double> @test_vfmsub213sd(<2 x double> %a0, <2 x double> %a1, <2 x d ; BROADWELL-LABEL: test_vfmsub213sd: ; BROADWELL: # BB#0: ; BROADWELL-NEXT: vfmsub213sd %xmm2, %xmm1, %xmm0 # sched: [5:0.50] -; BROADWELL-NEXT: vfmsub213sd (%rdi), %xmm1, %xmm0 # sched: [5:0.50] -; BROADWELL-NEXT: retq # sched: [2:1.00] +; BROADWELL-NEXT: vfmsub213sd (%rdi), %xmm1, %xmm0 # sched: [10:0.50] +; BROADWELL-NEXT: retq # sched: [7:1.00] ; ; SKYLAKE-LABEL: test_vfmsub213sd: ; SKYLAKE: # BB#0: @@ -979,8 +979,8 @@ define <4 x float> @test_vfmsub213ss(<4 x float> %a0, <4 x float> %a1, <4 x floa ; BROADWELL-LABEL: test_vfmsub213ss: ; BROADWELL: # BB#0: ; BROADWELL-NEXT: vfmsub213ss %xmm2, %xmm1, %xmm0 # sched: [5:0.50] -; BROADWELL-NEXT: vfmsub213ss (%rdi), %xmm1, %xmm0 # sched: [5:0.50] -; BROADWELL-NEXT: retq # sched: [2:1.00] +; BROADWELL-NEXT: vfmsub213ss (%rdi), %xmm1, %xmm0 # sched: [10:0.50] +; BROADWELL-NEXT: retq # sched: [7:1.00] ; ; SKYLAKE-LABEL: test_vfmsub213ss: ; SKYLAKE: # BB#0: @@ -1039,8 +1039,8 @@ define <2 x double> @test_vfnmadd213pd(<2 x double> %a0, <2 x double> %a1, <2 x ; BROADWELL-LABEL: test_vfnmadd213pd: ; BROADWELL: # BB#0: ; BROADWELL-NEXT: vfnmadd213pd %xmm2, %xmm1, %xmm0 # sched: [5:0.50] -; BROADWELL-NEXT: vfnmadd213pd (%rdi), %xmm1, %xmm0 # sched: [5:0.50] -; BROADWELL-NEXT: retq # sched: [2:1.00] +; BROADWELL-NEXT: vfnmadd213pd (%rdi), %xmm1, %xmm0 # sched: [10:0.50] +; BROADWELL-NEXT: retq # sched: [7:1.00] ; ; SKYLAKE-LABEL: test_vfnmadd213pd: ; SKYLAKE: # BB#0: @@ -1087,8 +1087,8 @@ define <4 x double> @test_vfnmadd213pd_ymm(<4 x double> %a0, <4 x double> %a1, < ; BROADWELL-LABEL: test_vfnmadd213pd_ymm: ; BROADWELL: # BB#0: ; BROADWELL-NEXT: vfnmadd213pd %ymm2, %ymm1, %ymm0 # sched: [5:0.50] -; BROADWELL-NEXT: vfnmadd213pd (%rdi), %ymm1, %ymm0 # sched: [5:0.50] -; BROADWELL-NEXT: retq # sched: [2:1.00] +; BROADWELL-NEXT: vfnmadd213pd (%rdi), %ymm1, %ymm0 # sched: [11:0.50] +; BROADWELL-NEXT: retq # sched: [7:1.00] ; ; SKYLAKE-LABEL: test_vfnmadd213pd_ymm: ; SKYLAKE: # BB#0: @@ -1135,8 +1135,8 @@ define <4 x float> @test_vfnmadd213ps(<4 x float> %a0, <4 x float> %a1, <4 x flo ; BROADWELL-LABEL: test_vfnmadd213ps: ; BROADWELL: # BB#0: ; BROADWELL-NEXT: vfnmadd213ps %xmm2, %xmm1, %xmm0 # sched: [5:0.50] -; BROADWELL-NEXT: vfnmadd213ps (%rdi), %xmm1, %xmm0 # sched: [5:0.50] -; BROADWELL-NEXT: retq # sched: [2:1.00] +; BROADWELL-NEXT: vfnmadd213ps (%rdi), %xmm1, %xmm0 # sched: [10:0.50] +; BROADWELL-NEXT: retq # sched: [7:1.00] ; ; SKYLAKE-LABEL: test_vfnmadd213ps: ; SKYLAKE: # BB#0: @@ -1183,8 +1183,8 @@ define <8 x float> @test_vfnmadd213ps_ymm(<8 x float> %a0, <8 x float> %a1, <8 x ; BROADWELL-LABEL: test_vfnmadd213ps_ymm: ; BROADWELL: # BB#0: ; BROADWELL-NEXT: vfnmadd213ps %ymm2, %ymm1, %ymm0 # sched: [5:0.50] -; BROADWELL-NEXT: vfnmadd213ps (%rdi), %ymm1, %ymm0 # sched: [5:0.50] -; BROADWELL-NEXT: retq # sched: [2:1.00] +; BROADWELL-NEXT: vfnmadd213ps (%rdi), %ymm1, %ymm0 # sched: [11:0.50] +; BROADWELL-NEXT: retq # sched: [7:1.00] ; ; SKYLAKE-LABEL: test_vfnmadd213ps_ymm: ; SKYLAKE: # BB#0: @@ -1231,8 +1231,8 @@ define <2 x double> @test_vfnmadd213sd(<2 x double> %a0, <2 x double> %a1, <2 x ; BROADWELL-LABEL: test_vfnmadd213sd: ; BROADWELL: # BB#0: ; BROADWELL-NEXT: vfnmadd213sd %xmm2, %xmm1, %xmm0 # sched: [5:0.50] -; BROADWELL-NEXT: vfnmadd213sd (%rdi), %xmm1, %xmm0 # sched: [5:0.50] -; BROADWELL-NEXT: retq # sched: [2:1.00] +; BROADWELL-NEXT: vfnmadd213sd (%rdi), %xmm1, %xmm0 # sched: [10:0.50] +; BROADWELL-NEXT: retq # sched: [7:1.00] ; ; SKYLAKE-LABEL: test_vfnmadd213sd: ; SKYLAKE: # BB#0: @@ -1279,8 +1279,8 @@ define <4 x float> @test_vfnmadd213ss(<4 x float> %a0, <4 x float> %a1, <4 x flo ; BROADWELL-LABEL: test_vfnmadd213ss: ; BROADWELL: # BB#0: ; BROADWELL-NEXT: vfnmadd213ss %xmm2, %xmm1, %xmm0 # sched: [5:0.50] -; BROADWELL-NEXT: vfnmadd213ss (%rdi), %xmm1, %xmm0 # sched: [5:0.50] -; BROADWELL-NEXT: retq # sched: [2:1.00] +; BROADWELL-NEXT: vfnmadd213ss (%rdi), %xmm1, %xmm0 # sched: [10:0.50] +; BROADWELL-NEXT: retq # sched: [7:1.00] ; ; SKYLAKE-LABEL: test_vfnmadd213ss: ; SKYLAKE: # BB#0: @@ -1339,8 +1339,8 @@ define <2 x double> @test_vfnmsub213pd(<2 x double> %a0, <2 x double> %a1, <2 x ; BROADWELL-LABEL: test_vfnmsub213pd: ; BROADWELL: # BB#0: ; BROADWELL-NEXT: vfnmsub213pd %xmm2, %xmm1, %xmm0 # sched: [5:0.50] -; BROADWELL-NEXT: vfnmsub213pd (%rdi), %xmm1, %xmm0 # sched: [5:0.50] -; BROADWELL-NEXT: retq # sched: [2:1.00] +; BROADWELL-NEXT: vfnmsub213pd (%rdi), %xmm1, %xmm0 # sched: [10:0.50] +; BROADWELL-NEXT: retq # sched: [7:1.00] ; ; SKYLAKE-LABEL: test_vfnmsub213pd: ; SKYLAKE: # BB#0: @@ -1387,8 +1387,8 @@ define <4 x double> @test_vfnmsub213pd_ymm(<4 x double> %a0, <4 x double> %a1, < ; BROADWELL-LABEL: test_vfnmsub213pd_ymm: ; BROADWELL: # BB#0: ; BROADWELL-NEXT: vfnmsub213pd %ymm2, %ymm1, %ymm0 # sched: [5:0.50] -; BROADWELL-NEXT: vfnmsub213pd (%rdi), %ymm1, %ymm0 # sched: [5:0.50] -; BROADWELL-NEXT: retq # sched: [2:1.00] +; BROADWELL-NEXT: vfnmsub213pd (%rdi), %ymm1, %ymm0 # sched: [11:0.50] +; BROADWELL-NEXT: retq # sched: [7:1.00] ; ; SKYLAKE-LABEL: test_vfnmsub213pd_ymm: ; SKYLAKE: # BB#0: @@ -1435,8 +1435,8 @@ define <4 x float> @test_vfnmsub213ps(<4 x float> %a0, <4 x float> %a1, <4 x flo ; BROADWELL-LABEL: test_vfnmsub213ps: ; BROADWELL: # BB#0: ; BROADWELL-NEXT: vfnmsub213ps %xmm2, %xmm1, %xmm0 # sched: [5:0.50] -; BROADWELL-NEXT: vfnmsub213ps (%rdi), %xmm1, %xmm0 # sched: [5:0.50] -; BROADWELL-NEXT: retq # sched: [2:1.00] +; BROADWELL-NEXT: vfnmsub213ps (%rdi), %xmm1, %xmm0 # sched: [10:0.50] +; BROADWELL-NEXT: retq # sched: [7:1.00] ; ; SKYLAKE-LABEL: test_vfnmsub213ps: ; SKYLAKE: # BB#0: @@ -1483,8 +1483,8 @@ define <8 x float> @test_vfnmsub213ps_ymm(<8 x float> %a0, <8 x float> %a1, <8 x ; BROADWELL-LABEL: test_vfnmsub213ps_ymm: ; BROADWELL: # BB#0: ; BROADWELL-NEXT: vfnmsub213ps %ymm2, %ymm1, %ymm0 # sched: [5:0.50] -; BROADWELL-NEXT: vfnmsub213ps (%rdi), %ymm1, %ymm0 # sched: [5:0.50] -; BROADWELL-NEXT: retq # sched: [2:1.00] +; BROADWELL-NEXT: vfnmsub213ps (%rdi), %ymm1, %ymm0 # sched: [11:0.50] +; BROADWELL-NEXT: retq # sched: [7:1.00] ; ; SKYLAKE-LABEL: test_vfnmsub213ps_ymm: ; SKYLAKE: # BB#0: @@ -1531,8 +1531,8 @@ define <2 x double> @test_vfnmsub213sd(<2 x double> %a0, <2 x double> %a1, <2 x ; BROADWELL-LABEL: test_vfnmsub213sd: ; BROADWELL: # BB#0: ; BROADWELL-NEXT: vfnmsub213sd %xmm2, %xmm1, %xmm0 # sched: [5:0.50] -; BROADWELL-NEXT: vfnmsub213sd (%rdi), %xmm1, %xmm0 # sched: [5:0.50] -; BROADWELL-NEXT: retq # sched: [2:1.00] +; BROADWELL-NEXT: vfnmsub213sd (%rdi), %xmm1, %xmm0 # sched: [10:0.50] +; BROADWELL-NEXT: retq # sched: [7:1.00] ; ; SKYLAKE-LABEL: test_vfnmsub213sd: ; SKYLAKE: # BB#0: @@ -1579,8 +1579,8 @@ define <4 x float> @test_vfnmsub213ss(<4 x float> %a0, <4 x float> %a1, <4 x flo ; BROADWELL-LABEL: test_vfnmsub213ss: ; BROADWELL: # BB#0: ; BROADWELL-NEXT: vfnmsub213ss %xmm2, %xmm1, %xmm0 # sched: [5:0.50] -; BROADWELL-NEXT: vfnmsub213ss (%rdi), %xmm1, %xmm0 # sched: [5:0.50] -; BROADWELL-NEXT: retq # sched: [2:1.00] +; BROADWELL-NEXT: vfnmsub213ss (%rdi), %xmm1, %xmm0 # sched: [10:0.50] +; BROADWELL-NEXT: retq # sched: [7:1.00] ; ; SKYLAKE-LABEL: test_vfnmsub213ss: ; SKYLAKE: # BB#0: diff --git a/llvm/test/CodeGen/X86/lea32-schedule.ll b/llvm/test/CodeGen/X86/lea32-schedule.ll index 074aede4e2db..18a165009ea1 100644 --- a/llvm/test/CodeGen/X86/lea32-schedule.ll +++ b/llvm/test/CodeGen/X86/lea32-schedule.ll @@ -52,7 +52,7 @@ define i32 @test_lea_offset(i32) { ; BROADWELL: # BB#0: ; BROADWELL-NEXT: # kill: %EDI %EDI %RDI ; BROADWELL-NEXT: leal -24(%rdi), %eax # sched: [1:0.50] -; BROADWELL-NEXT: retq # sched: [2:1.00] +; BROADWELL-NEXT: retq # sched: [7:1.00] ; ; SKYLAKE-LABEL: test_lea_offset: ; SKYLAKE: # BB#0: @@ -116,7 +116,7 @@ define i32 @test_lea_offset_big(i32) { ; BROADWELL: # BB#0: ; BROADWELL-NEXT: # kill: %EDI %EDI %RDI ; BROADWELL-NEXT: leal 1024(%rdi), %eax # sched: [1:0.50] -; BROADWELL-NEXT: retq # sched: [2:1.00] +; BROADWELL-NEXT: retq # sched: [7:1.00] ; ; SKYLAKE-LABEL: test_lea_offset_big: ; SKYLAKE: # BB#0: @@ -187,7 +187,7 @@ define i32 @test_lea_add(i32, i32) { ; BROADWELL-NEXT: # kill: %ESI %ESI %RSI ; BROADWELL-NEXT: # kill: %EDI %EDI %RDI ; BROADWELL-NEXT: leal (%rdi,%rsi), %eax # sched: [1:0.50] -; BROADWELL-NEXT: retq # sched: [2:1.00] +; BROADWELL-NEXT: retq # sched: [7:1.00] ; ; SKYLAKE-LABEL: test_lea_add: ; SKYLAKE: # BB#0: @@ -264,7 +264,7 @@ define i32 @test_lea_add_offset(i32, i32) { ; BROADWELL-NEXT: # kill: %EDI %EDI %RDI ; BROADWELL-NEXT: leal (%rdi,%rsi), %eax # sched: [1:0.50] ; BROADWELL-NEXT: addl $16, %eax # sched: [1:0.25] -; BROADWELL-NEXT: retq # sched: [2:1.00] +; BROADWELL-NEXT: retq # sched: [7:1.00] ; ; SKYLAKE-LABEL: test_lea_add_offset: ; SKYLAKE: # BB#0: @@ -347,7 +347,7 @@ define i32 @test_lea_add_offset_big(i32, i32) { ; BROADWELL-NEXT: leal (%rdi,%rsi), %eax # sched: [1:0.50] ; BROADWELL-NEXT: addl $-4096, %eax # imm = 0xF000 ; BROADWELL-NEXT: # sched: [1:0.25] -; BROADWELL-NEXT: retq # sched: [2:1.00] +; BROADWELL-NEXT: retq # sched: [7:1.00] ; ; SKYLAKE-LABEL: test_lea_add_offset_big: ; SKYLAKE: # BB#0: @@ -417,7 +417,7 @@ define i32 @test_lea_mul(i32) { ; BROADWELL: # BB#0: ; BROADWELL-NEXT: # kill: %EDI %EDI %RDI ; BROADWELL-NEXT: leal (%rdi,%rdi,2), %eax # sched: [1:0.50] -; BROADWELL-NEXT: retq # sched: [2:1.00] +; BROADWELL-NEXT: retq # sched: [7:1.00] ; ; SKYLAKE-LABEL: test_lea_mul: ; SKYLAKE: # BB#0: @@ -485,7 +485,7 @@ define i32 @test_lea_mul_offset(i32) { ; BROADWELL-NEXT: # kill: %EDI %EDI %RDI ; BROADWELL-NEXT: leal (%rdi,%rdi,2), %eax # sched: [1:0.50] ; BROADWELL-NEXT: addl $-32, %eax # sched: [1:0.25] -; BROADWELL-NEXT: retq # sched: [2:1.00] +; BROADWELL-NEXT: retq # sched: [7:1.00] ; ; SKYLAKE-LABEL: test_lea_mul_offset: ; SKYLAKE: # BB#0: @@ -559,7 +559,7 @@ define i32 @test_lea_mul_offset_big(i32) { ; BROADWELL-NEXT: leal (%rdi,%rdi,8), %eax # sched: [1:0.50] ; BROADWELL-NEXT: addl $10000, %eax # imm = 0x2710 ; BROADWELL-NEXT: # sched: [1:0.25] -; BROADWELL-NEXT: retq # sched: [2:1.00] +; BROADWELL-NEXT: retq # sched: [7:1.00] ; ; SKYLAKE-LABEL: test_lea_mul_offset_big: ; SKYLAKE: # BB#0: @@ -632,7 +632,7 @@ define i32 @test_lea_add_scale(i32, i32) { ; BROADWELL-NEXT: # kill: %ESI %ESI %RSI ; BROADWELL-NEXT: # kill: %EDI %EDI %RDI ; BROADWELL-NEXT: leal (%rdi,%rsi,2), %eax # sched: [1:0.50] -; BROADWELL-NEXT: retq # sched: [2:1.00] +; BROADWELL-NEXT: retq # sched: [7:1.00] ; ; SKYLAKE-LABEL: test_lea_add_scale: ; SKYLAKE: # BB#0: @@ -710,7 +710,7 @@ define i32 @test_lea_add_scale_offset(i32, i32) { ; BROADWELL-NEXT: # kill: %EDI %EDI %RDI ; BROADWELL-NEXT: leal (%rdi,%rsi,4), %eax # sched: [1:0.50] ; BROADWELL-NEXT: addl $96, %eax # sched: [1:0.25] -; BROADWELL-NEXT: retq # sched: [2:1.00] +; BROADWELL-NEXT: retq # sched: [7:1.00] ; ; SKYLAKE-LABEL: test_lea_add_scale_offset: ; SKYLAKE: # BB#0: @@ -794,7 +794,7 @@ define i32 @test_lea_add_scale_offset_big(i32, i32) { ; BROADWELL-NEXT: leal (%rdi,%rsi,8), %eax # sched: [1:0.50] ; BROADWELL-NEXT: addl $-1200, %eax # imm = 0xFB50 ; BROADWELL-NEXT: # sched: [1:0.25] -; BROADWELL-NEXT: retq # sched: [2:1.00] +; BROADWELL-NEXT: retq # sched: [7:1.00] ; ; SKYLAKE-LABEL: test_lea_add_scale_offset_big: ; SKYLAKE: # BB#0: diff --git a/llvm/test/CodeGen/X86/lea64-schedule.ll b/llvm/test/CodeGen/X86/lea64-schedule.ll index a7128ad8a2e2..1177645a6983 100644 --- a/llvm/test/CodeGen/X86/lea64-schedule.ll +++ b/llvm/test/CodeGen/X86/lea64-schedule.ll @@ -46,7 +46,7 @@ define i64 @test_lea_offset(i64) { ; BROADWELL-LABEL: test_lea_offset: ; BROADWELL: # BB#0: ; BROADWELL-NEXT: leaq -24(%rdi), %rax # sched: [1:0.50] -; BROADWELL-NEXT: retq # sched: [2:1.00] +; BROADWELL-NEXT: retq # sched: [7:1.00] ; ; SKYLAKE-LABEL: test_lea_offset: ; SKYLAKE: # BB#0: @@ -101,7 +101,7 @@ define i64 @test_lea_offset_big(i64) { ; BROADWELL-LABEL: test_lea_offset_big: ; BROADWELL: # BB#0: ; BROADWELL-NEXT: leaq 1024(%rdi), %rax # sched: [1:0.50] -; BROADWELL-NEXT: retq # sched: [2:1.00] +; BROADWELL-NEXT: retq # sched: [7:1.00] ; ; SKYLAKE-LABEL: test_lea_offset_big: ; SKYLAKE: # BB#0: @@ -157,7 +157,7 @@ define i64 @test_lea_add(i64, i64) { ; BROADWELL-LABEL: test_lea_add: ; BROADWELL: # BB#0: ; BROADWELL-NEXT: leaq (%rdi,%rsi), %rax # sched: [1:0.50] -; BROADWELL-NEXT: retq # sched: [2:1.00] +; BROADWELL-NEXT: retq # sched: [7:1.00] ; ; SKYLAKE-LABEL: test_lea_add: ; SKYLAKE: # BB#0: @@ -216,7 +216,7 @@ define i64 @test_lea_add_offset(i64, i64) { ; BROADWELL: # BB#0: ; BROADWELL-NEXT: leaq (%rdi,%rsi), %rax # sched: [1:0.50] ; BROADWELL-NEXT: addq $16, %rax # sched: [1:0.25] -; BROADWELL-NEXT: retq # sched: [2:1.00] +; BROADWELL-NEXT: retq # sched: [7:1.00] ; ; SKYLAKE-LABEL: test_lea_add_offset: ; SKYLAKE: # BB#0: @@ -281,7 +281,7 @@ define i64 @test_lea_add_offset_big(i64, i64) { ; BROADWELL-NEXT: leaq (%rdi,%rsi), %rax # sched: [1:0.50] ; BROADWELL-NEXT: addq $-4096, %rax # imm = 0xF000 ; BROADWELL-NEXT: # sched: [1:0.25] -; BROADWELL-NEXT: retq # sched: [2:1.00] +; BROADWELL-NEXT: retq # sched: [7:1.00] ; ; SKYLAKE-LABEL: test_lea_add_offset_big: ; SKYLAKE: # BB#0: @@ -339,7 +339,7 @@ define i64 @test_lea_mul(i64) { ; BROADWELL-LABEL: test_lea_mul: ; BROADWELL: # BB#0: ; BROADWELL-NEXT: leaq (%rdi,%rdi,2), %rax # sched: [1:0.50] -; BROADWELL-NEXT: retq # sched: [2:1.00] +; BROADWELL-NEXT: retq # sched: [7:1.00] ; ; SKYLAKE-LABEL: test_lea_mul: ; SKYLAKE: # BB#0: @@ -398,7 +398,7 @@ define i64 @test_lea_mul_offset(i64) { ; BROADWELL: # BB#0: ; BROADWELL-NEXT: leaq (%rdi,%rdi,2), %rax # sched: [1:0.50] ; BROADWELL-NEXT: addq $-32, %rax # sched: [1:0.25] -; BROADWELL-NEXT: retq # sched: [2:1.00] +; BROADWELL-NEXT: retq # sched: [7:1.00] ; ; SKYLAKE-LABEL: test_lea_mul_offset: ; SKYLAKE: # BB#0: @@ -463,7 +463,7 @@ define i64 @test_lea_mul_offset_big(i64) { ; BROADWELL-NEXT: leaq (%rdi,%rdi,8), %rax # sched: [1:0.50] ; BROADWELL-NEXT: addq $10000, %rax # imm = 0x2710 ; BROADWELL-NEXT: # sched: [1:0.25] -; BROADWELL-NEXT: retq # sched: [2:1.00] +; BROADWELL-NEXT: retq # sched: [7:1.00] ; ; SKYLAKE-LABEL: test_lea_mul_offset_big: ; SKYLAKE: # BB#0: @@ -521,7 +521,7 @@ define i64 @test_lea_add_scale(i64, i64) { ; BROADWELL-LABEL: test_lea_add_scale: ; BROADWELL: # BB#0: ; BROADWELL-NEXT: leaq (%rdi,%rsi,2), %rax # sched: [1:0.50] -; BROADWELL-NEXT: retq # sched: [2:1.00] +; BROADWELL-NEXT: retq # sched: [7:1.00] ; ; SKYLAKE-LABEL: test_lea_add_scale: ; SKYLAKE: # BB#0: @@ -581,7 +581,7 @@ define i64 @test_lea_add_scale_offset(i64, i64) { ; BROADWELL: # BB#0: ; BROADWELL-NEXT: leaq (%rdi,%rsi,4), %rax # sched: [1:0.50] ; BROADWELL-NEXT: addq $96, %rax # sched: [1:0.25] -; BROADWELL-NEXT: retq # sched: [2:1.00] +; BROADWELL-NEXT: retq # sched: [7:1.00] ; ; SKYLAKE-LABEL: test_lea_add_scale_offset: ; SKYLAKE: # BB#0: @@ -647,7 +647,7 @@ define i64 @test_lea_add_scale_offset_big(i64, i64) { ; BROADWELL-NEXT: leaq (%rdi,%rsi,8), %rax # sched: [1:0.50] ; BROADWELL-NEXT: addq $-1200, %rax # imm = 0xFB50 ; BROADWELL-NEXT: # sched: [1:0.25] -; BROADWELL-NEXT: retq # sched: [2:1.00] +; BROADWELL-NEXT: retq # sched: [7:1.00] ; ; SKYLAKE-LABEL: test_lea_add_scale_offset_big: ; SKYLAKE: # BB#0: diff --git a/llvm/test/CodeGen/X86/lzcnt-schedule.ll b/llvm/test/CodeGen/X86/lzcnt-schedule.ll index 6e4b86a58488..d50fad7535e6 100644 --- a/llvm/test/CodeGen/X86/lzcnt-schedule.ll +++ b/llvm/test/CodeGen/X86/lzcnt-schedule.ll @@ -26,11 +26,11 @@ define i16 @test_ctlz_i16(i16 zeroext %a0, i16 *%a1) { ; ; BROADWELL-LABEL: test_ctlz_i16: ; BROADWELL: # BB#0: -; BROADWELL-NEXT: lzcntw (%rsi), %cx # sched: [3:1.00] +; BROADWELL-NEXT: lzcntw (%rsi), %cx # sched: [8:1.00] ; BROADWELL-NEXT: lzcntw %di, %ax # sched: [3:1.00] ; BROADWELL-NEXT: orl %ecx, %eax # sched: [1:0.25] ; BROADWELL-NEXT: # kill: %AX %AX %EAX -; BROADWELL-NEXT: retq # sched: [2:1.00] +; BROADWELL-NEXT: retq # sched: [7:1.00] ; ; SKYLAKE-LABEL: test_ctlz_i16: ; SKYLAKE: # BB#0: @@ -80,10 +80,10 @@ define i32 @test_ctlz_i32(i32 %a0, i32 *%a1) { ; ; BROADWELL-LABEL: test_ctlz_i32: ; BROADWELL: # BB#0: -; BROADWELL-NEXT: lzcntl (%rsi), %ecx # sched: [3:1.00] +; BROADWELL-NEXT: lzcntl (%rsi), %ecx # sched: [8:1.00] ; BROADWELL-NEXT: lzcntl %edi, %eax # sched: [3:1.00] ; BROADWELL-NEXT: orl %ecx, %eax # sched: [1:0.25] -; BROADWELL-NEXT: retq # sched: [2:1.00] +; BROADWELL-NEXT: retq # sched: [7:1.00] ; ; SKYLAKE-LABEL: test_ctlz_i32: ; SKYLAKE: # BB#0: @@ -130,10 +130,10 @@ define i64 @test_ctlz_i64(i64 %a0, i64 *%a1) { ; ; BROADWELL-LABEL: test_ctlz_i64: ; BROADWELL: # BB#0: -; BROADWELL-NEXT: lzcntq (%rsi), %rcx # sched: [3:1.00] +; BROADWELL-NEXT: lzcntq (%rsi), %rcx # sched: [8:1.00] ; BROADWELL-NEXT: lzcntq %rdi, %rax # sched: [3:1.00] ; BROADWELL-NEXT: orq %rcx, %rax # sched: [1:0.25] -; BROADWELL-NEXT: retq # sched: [2:1.00] +; BROADWELL-NEXT: retq # sched: [7:1.00] ; ; SKYLAKE-LABEL: test_ctlz_i64: ; SKYLAKE: # BB#0: diff --git a/llvm/test/CodeGen/X86/mmx-schedule.ll b/llvm/test/CodeGen/X86/mmx-schedule.ll index 0b6a0c11bb9e..7f7c00d075ba 100644 --- a/llvm/test/CodeGen/X86/mmx-schedule.ll +++ b/llvm/test/CodeGen/X86/mmx-schedule.ll @@ -54,11 +54,11 @@ define i64 @test_cvtpd2pi(<2 x double> %a0, <2 x double>* %a1) optsize { ; ; BROADWELL-LABEL: test_cvtpd2pi: ; BROADWELL: # BB#0: -; BROADWELL-NEXT: cvtpd2pi (%rdi), %mm0 # sched: [4:1.00] +; BROADWELL-NEXT: cvtpd2pi (%rdi), %mm0 # sched: [9:1.00] ; BROADWELL-NEXT: cvtpd2pi %xmm0, %mm1 # sched: [4:1.00] ; BROADWELL-NEXT: por %mm1, %mm0 # sched: [1:0.33] ; BROADWELL-NEXT: movd %mm0, %rax # sched: [1:1.00] -; BROADWELL-NEXT: retq # sched: [2:1.00] +; BROADWELL-NEXT: retq # sched: [7:1.00] ; ; SKYLAKE-LABEL: test_cvtpd2pi: ; SKYLAKE: # BB#0: @@ -139,9 +139,9 @@ define <2 x double> @test_cvtpi2pd(x86_mmx %a0, x86_mmx* %a1) optsize { ; BROADWELL-LABEL: test_cvtpi2pd: ; BROADWELL: # BB#0: ; BROADWELL-NEXT: cvtpi2pd %mm0, %xmm0 # sched: [4:1.00] -; BROADWELL-NEXT: cvtpi2pd (%rdi), %xmm1 # sched: [4:1.00] +; BROADWELL-NEXT: cvtpi2pd (%rdi), %xmm1 # sched: [9:1.00] ; BROADWELL-NEXT: vaddpd %xmm1, %xmm0, %xmm0 # sched: [3:1.00] -; BROADWELL-NEXT: retq # sched: [2:1.00] +; BROADWELL-NEXT: retq # sched: [7:1.00] ; ; SKYLAKE-LABEL: test_cvtpi2pd: ; SKYLAKE: # BB#0: @@ -217,9 +217,9 @@ define <4 x float> @test_cvtpi2ps(x86_mmx %a0, x86_mmx* %a1, <4 x float> %a2, <4 ; BROADWELL-LABEL: test_cvtpi2ps: ; BROADWELL: # BB#0: ; BROADWELL-NEXT: cvtpi2ps %mm0, %xmm0 # sched: [3:1.00] -; BROADWELL-NEXT: cvtpi2ps (%rdi), %xmm1 # sched: [3:1.00] +; BROADWELL-NEXT: cvtpi2ps (%rdi), %xmm1 # sched: [8:1.00] ; BROADWELL-NEXT: vaddps %xmm1, %xmm0, %xmm0 # sched: [3:1.00] -; BROADWELL-NEXT: retq # sched: [2:1.00] +; BROADWELL-NEXT: retq # sched: [7:1.00] ; ; SKYLAKE-LABEL: test_cvtpi2ps: ; SKYLAKE: # BB#0: @@ -300,10 +300,10 @@ define i64 @test_cvtps2pi(<4 x float> %a0, <4 x float>* %a1) optsize { ; BROADWELL-LABEL: test_cvtps2pi: ; BROADWELL: # BB#0: ; BROADWELL-NEXT: cvtps2pi %xmm0, %mm0 # sched: [4:1.00] -; BROADWELL-NEXT: cvtps2pi (%rdi), %mm1 # sched: [3:1.00] +; BROADWELL-NEXT: cvtps2pi (%rdi), %mm1 # sched: [8:1.00] ; BROADWELL-NEXT: por %mm0, %mm1 # sched: [1:0.33] ; BROADWELL-NEXT: movd %mm1, %rax # sched: [1:1.00] -; BROADWELL-NEXT: retq # sched: [2:1.00] +; BROADWELL-NEXT: retq # sched: [7:1.00] ; ; SKYLAKE-LABEL: test_cvtps2pi: ; SKYLAKE: # BB#0: @@ -388,11 +388,11 @@ define i64 @test_cvttpd2pi(<2 x double> %a0, <2 x double>* %a1) optsize { ; ; BROADWELL-LABEL: test_cvttpd2pi: ; BROADWELL: # BB#0: -; BROADWELL-NEXT: cvttpd2pi (%rdi), %mm0 # sched: [4:1.00] +; BROADWELL-NEXT: cvttpd2pi (%rdi), %mm0 # sched: [9:1.00] ; BROADWELL-NEXT: cvttpd2pi %xmm0, %mm1 # sched: [4:1.00] ; BROADWELL-NEXT: por %mm1, %mm0 # sched: [1:0.33] ; BROADWELL-NEXT: movd %mm0, %rax # sched: [1:1.00] -; BROADWELL-NEXT: retq # sched: [2:1.00] +; BROADWELL-NEXT: retq # sched: [7:1.00] ; ; SKYLAKE-LABEL: test_cvttpd2pi: ; SKYLAKE: # BB#0: @@ -478,10 +478,10 @@ define i64 @test_cvttps2pi(<4 x float> %a0, <4 x float>* %a1) optsize { ; BROADWELL-LABEL: test_cvttps2pi: ; BROADWELL: # BB#0: ; BROADWELL-NEXT: cvttps2pi %xmm0, %mm0 # sched: [4:1.00] -; BROADWELL-NEXT: cvttps2pi (%rdi), %mm1 # sched: [3:1.00] +; BROADWELL-NEXT: cvttps2pi (%rdi), %mm1 # sched: [8:1.00] ; BROADWELL-NEXT: por %mm0, %mm1 # sched: [1:0.33] ; BROADWELL-NEXT: movd %mm1, %rax # sched: [1:1.00] -; BROADWELL-NEXT: retq # sched: [2:1.00] +; BROADWELL-NEXT: retq # sched: [7:1.00] ; ; SKYLAKE-LABEL: test_cvttps2pi: ; SKYLAKE: # BB#0: @@ -552,7 +552,7 @@ define void @test_emms() optsize { ; BROADWELL-LABEL: test_emms: ; BROADWELL: # BB#0: ; BROADWELL-NEXT: emms # sched: [31:10.00] -; BROADWELL-NEXT: retq # sched: [2:1.00] +; BROADWELL-NEXT: retq # sched: [7:1.00] ; ; SKYLAKE-LABEL: test_emms: ; SKYLAKE: # BB#0: @@ -607,7 +607,7 @@ define void @test_maskmovq(x86_mmx %a0, x86_mmx %a1, i8* %a2) optsize { ; BROADWELL-LABEL: test_maskmovq: ; BROADWELL: # BB#0: ; BROADWELL-NEXT: maskmovq %mm1, %mm0 # sched: [1:1.00] -; BROADWELL-NEXT: retq # sched: [2:1.00] +; BROADWELL-NEXT: retq # sched: [7:1.00] ; ; SKYLAKE-LABEL: test_maskmovq: ; SKYLAKE: # BB#0: @@ -708,15 +708,15 @@ define i32 @test_movd(x86_mmx %a0, i32 %a1, i32 *%a2) { ; BROADWELL: # BB#0: ; BROADWELL-NEXT: vmovd %edi, %xmm0 # sched: [1:1.00] ; BROADWELL-NEXT: vmovq %xmm0, -{{[0-9]+}}(%rsp) # sched: [1:1.00] -; BROADWELL-NEXT: movq -{{[0-9]+}}(%rsp), %mm1 # sched: [1:0.50] -; BROADWELL-NEXT: vmovss {{.*#+}} xmm0 = mem[0],zero,zero,zero sched: [1:0.50] +; BROADWELL-NEXT: movq -{{[0-9]+}}(%rsp), %mm1 # sched: [5:0.50] +; BROADWELL-NEXT: vmovss {{.*#+}} xmm0 = mem[0],zero,zero,zero sched: [5:0.50] ; BROADWELL-NEXT: vmovlps %xmm0, -{{[0-9]+}}(%rsp) # sched: [1:1.00] -; BROADWELL-NEXT: paddd -{{[0-9]+}}(%rsp), %mm1 # sched: [1:0.50] +; BROADWELL-NEXT: paddd -{{[0-9]+}}(%rsp), %mm1 # sched: [6:0.50] ; BROADWELL-NEXT: paddd %mm1, %mm0 # sched: [1:0.50] ; BROADWELL-NEXT: movd %mm1, %ecx # sched: [1:1.00] ; BROADWELL-NEXT: movd %mm0, %eax # sched: [1:1.00] ; BROADWELL-NEXT: movl %ecx, (%rsi) # sched: [1:1.00] -; BROADWELL-NEXT: retq # sched: [2:1.00] +; BROADWELL-NEXT: retq # sched: [7:1.00] ; ; SKYLAKE-LABEL: test_movd: ; SKYLAKE: # BB#0: @@ -829,7 +829,7 @@ define i64 @test_movdq2q(<2 x i64> %a0) optsize { ; BROADWELL-NEXT: movdq2q %xmm0, %mm0 # sched: [2:0.67] ; BROADWELL-NEXT: paddd %mm0, %mm0 # sched: [1:0.50] ; BROADWELL-NEXT: movd %mm0, %rax # sched: [1:1.00] -; BROADWELL-NEXT: retq # sched: [2:1.00] +; BROADWELL-NEXT: retq # sched: [7:1.00] ; ; SKYLAKE-LABEL: test_movdq2q: ; SKYLAKE: # BB#0: @@ -894,7 +894,7 @@ define void @test_movntq(x86_mmx* %a0, x86_mmx %a1) optsize { ; BROADWELL-LABEL: test_movntq: ; BROADWELL: # BB#0: ; BROADWELL-NEXT: movntq %mm0, (%rdi) # sched: [1:1.00] -; BROADWELL-NEXT: retq # sched: [2:1.00] +; BROADWELL-NEXT: retq # sched: [7:1.00] ; ; SKYLAKE-LABEL: test_movntq: ; SKYLAKE: # BB#0: @@ -960,10 +960,10 @@ define void @test_movq(i64 *%a0) { ; ; BROADWELL-LABEL: test_movq: ; BROADWELL: # BB#0: -; BROADWELL-NEXT: movq (%rdi), %mm0 # sched: [1:0.50] +; BROADWELL-NEXT: movq (%rdi), %mm0 # sched: [5:0.50] ; BROADWELL-NEXT: paddd %mm0, %mm0 # sched: [1:0.50] ; BROADWELL-NEXT: movq %mm0, (%rdi) # sched: [1:1.00] -; BROADWELL-NEXT: retq # sched: [2:1.00] +; BROADWELL-NEXT: retq # sched: [7:1.00] ; ; SKYLAKE-LABEL: test_movq: ; SKYLAKE: # BB#0: @@ -1029,7 +1029,7 @@ define <2 x i64> @test_movq2dq(x86_mmx %a0) optsize { ; BROADWELL-LABEL: test_movq2dq: ; BROADWELL: # BB#0: ; BROADWELL-NEXT: movq2dq %mm0, %xmm0 # sched: [1:1.00] -; BROADWELL-NEXT: retq # sched: [2:1.00] +; BROADWELL-NEXT: retq # sched: [7:1.00] ; ; SKYLAKE-LABEL: test_movq2dq: ; SKYLAKE: # BB#0: @@ -1093,10 +1093,10 @@ define i64 @test_pabsb(x86_mmx *%a0) optsize { ; ; BROADWELL-LABEL: test_pabsb: ; BROADWELL: # BB#0: -; BROADWELL-NEXT: pabsb (%rdi), %mm0 # sched: [1:0.50] +; BROADWELL-NEXT: pabsb (%rdi), %mm0 # sched: [6:0.50] ; BROADWELL-NEXT: pabsb %mm0, %mm0 # sched: [1:0.50] ; BROADWELL-NEXT: movd %mm0, %rax # sched: [1:1.00] -; BROADWELL-NEXT: retq # sched: [2:1.00] +; BROADWELL-NEXT: retq # sched: [7:1.00] ; ; SKYLAKE-LABEL: test_pabsb: ; SKYLAKE: # BB#0: @@ -1171,10 +1171,10 @@ define i64 @test_pabsd(x86_mmx *%a0) optsize { ; ; BROADWELL-LABEL: test_pabsd: ; BROADWELL: # BB#0: -; BROADWELL-NEXT: pabsd (%rdi), %mm0 # sched: [1:0.50] +; BROADWELL-NEXT: pabsd (%rdi), %mm0 # sched: [6:0.50] ; BROADWELL-NEXT: pabsd %mm0, %mm0 # sched: [1:0.50] ; BROADWELL-NEXT: movd %mm0, %rax # sched: [1:1.00] -; BROADWELL-NEXT: retq # sched: [2:1.00] +; BROADWELL-NEXT: retq # sched: [7:1.00] ; ; SKYLAKE-LABEL: test_pabsd: ; SKYLAKE: # BB#0: @@ -1249,10 +1249,10 @@ define i64 @test_pabsw(x86_mmx *%a0) optsize { ; ; BROADWELL-LABEL: test_pabsw: ; BROADWELL: # BB#0: -; BROADWELL-NEXT: pabsw (%rdi), %mm0 # sched: [1:0.50] +; BROADWELL-NEXT: pabsw (%rdi), %mm0 # sched: [6:0.50] ; BROADWELL-NEXT: pabsw %mm0, %mm0 # sched: [1:0.50] ; BROADWELL-NEXT: movd %mm0, %rax # sched: [1:1.00] -; BROADWELL-NEXT: retq # sched: [2:1.00] +; BROADWELL-NEXT: retq # sched: [7:1.00] ; ; SKYLAKE-LABEL: test_pabsw: ; SKYLAKE: # BB#0: @@ -1328,9 +1328,9 @@ define i64 @test_packssdw(x86_mmx %a0, x86_mmx %a1, x86_mmx* %a2) optsize { ; BROADWELL-LABEL: test_packssdw: ; BROADWELL: # BB#0: ; BROADWELL-NEXT: packssdw %mm1, %mm0 # sched: [3:2.00] -; BROADWELL-NEXT: packssdw (%rdi), %mm0 # sched: [2:2.00] +; BROADWELL-NEXT: packssdw (%rdi), %mm0 # sched: [7:2.00] ; BROADWELL-NEXT: movd %mm0, %rax # sched: [1:1.00] -; BROADWELL-NEXT: retq # sched: [2:1.00] +; BROADWELL-NEXT: retq # sched: [7:1.00] ; ; SKYLAKE-LABEL: test_packssdw: ; SKYLAKE: # BB#0: @@ -1406,9 +1406,9 @@ define i64 @test_packsswb(x86_mmx %a0, x86_mmx %a1, x86_mmx* %a2) optsize { ; BROADWELL-LABEL: test_packsswb: ; BROADWELL: # BB#0: ; BROADWELL-NEXT: packsswb %mm1, %mm0 # sched: [3:2.00] -; BROADWELL-NEXT: packsswb (%rdi), %mm0 # sched: [2:2.00] +; BROADWELL-NEXT: packsswb (%rdi), %mm0 # sched: [7:2.00] ; BROADWELL-NEXT: movd %mm0, %rax # sched: [1:1.00] -; BROADWELL-NEXT: retq # sched: [2:1.00] +; BROADWELL-NEXT: retq # sched: [7:1.00] ; ; SKYLAKE-LABEL: test_packsswb: ; SKYLAKE: # BB#0: @@ -1484,9 +1484,9 @@ define i64 @test_packuswb(x86_mmx %a0, x86_mmx %a1, x86_mmx* %a2) optsize { ; BROADWELL-LABEL: test_packuswb: ; BROADWELL: # BB#0: ; BROADWELL-NEXT: packuswb %mm1, %mm0 # sched: [3:2.00] -; BROADWELL-NEXT: packuswb (%rdi), %mm0 # sched: [2:2.00] +; BROADWELL-NEXT: packuswb (%rdi), %mm0 # sched: [7:2.00] ; BROADWELL-NEXT: movd %mm0, %rax # sched: [1:1.00] -; BROADWELL-NEXT: retq # sched: [2:1.00] +; BROADWELL-NEXT: retq # sched: [7:1.00] ; ; SKYLAKE-LABEL: test_packuswb: ; SKYLAKE: # BB#0: @@ -1562,9 +1562,9 @@ define i64 @test_paddb(x86_mmx %a0, x86_mmx %a1, x86_mmx* %a2) optsize { ; BROADWELL-LABEL: test_paddb: ; BROADWELL: # BB#0: ; BROADWELL-NEXT: paddb %mm1, %mm0 # sched: [1:0.50] -; BROADWELL-NEXT: paddb (%rdi), %mm0 # sched: [1:0.50] +; BROADWELL-NEXT: paddb (%rdi), %mm0 # sched: [6:0.50] ; BROADWELL-NEXT: movd %mm0, %rax # sched: [1:1.00] -; BROADWELL-NEXT: retq # sched: [2:1.00] +; BROADWELL-NEXT: retq # sched: [7:1.00] ; ; SKYLAKE-LABEL: test_paddb: ; SKYLAKE: # BB#0: @@ -1640,9 +1640,9 @@ define i64 @test_paddd(x86_mmx %a0, x86_mmx %a1, x86_mmx* %a2) optsize { ; BROADWELL-LABEL: test_paddd: ; BROADWELL: # BB#0: ; BROADWELL-NEXT: paddd %mm1, %mm0 # sched: [1:0.50] -; BROADWELL-NEXT: paddd (%rdi), %mm0 # sched: [1:0.50] +; BROADWELL-NEXT: paddd (%rdi), %mm0 # sched: [6:0.50] ; BROADWELL-NEXT: movd %mm0, %rax # sched: [1:1.00] -; BROADWELL-NEXT: retq # sched: [2:1.00] +; BROADWELL-NEXT: retq # sched: [7:1.00] ; ; SKYLAKE-LABEL: test_paddd: ; SKYLAKE: # BB#0: @@ -1718,9 +1718,9 @@ define i64 @test_paddq(x86_mmx %a0, x86_mmx %a1, x86_mmx* %a2) optsize { ; BROADWELL-LABEL: test_paddq: ; BROADWELL: # BB#0: ; BROADWELL-NEXT: paddq %mm1, %mm0 # sched: [1:0.50] -; BROADWELL-NEXT: paddq (%rdi), %mm0 # sched: [1:0.50] +; BROADWELL-NEXT: paddq (%rdi), %mm0 # sched: [6:0.50] ; BROADWELL-NEXT: movd %mm0, %rax # sched: [1:1.00] -; BROADWELL-NEXT: retq # sched: [2:1.00] +; BROADWELL-NEXT: retq # sched: [7:1.00] ; ; SKYLAKE-LABEL: test_paddq: ; SKYLAKE: # BB#0: @@ -1796,9 +1796,9 @@ define i64 @test_paddsb(x86_mmx %a0, x86_mmx %a1, x86_mmx* %a2) optsize { ; BROADWELL-LABEL: test_paddsb: ; BROADWELL: # BB#0: ; BROADWELL-NEXT: paddsb %mm1, %mm0 # sched: [1:0.50] -; BROADWELL-NEXT: paddsb (%rdi), %mm0 # sched: [1:0.50] +; BROADWELL-NEXT: paddsb (%rdi), %mm0 # sched: [6:0.50] ; BROADWELL-NEXT: movd %mm0, %rax # sched: [1:1.00] -; BROADWELL-NEXT: retq # sched: [2:1.00] +; BROADWELL-NEXT: retq # sched: [7:1.00] ; ; SKYLAKE-LABEL: test_paddsb: ; SKYLAKE: # BB#0: @@ -1874,9 +1874,9 @@ define i64 @test_paddsw(x86_mmx %a0, x86_mmx %a1, x86_mmx* %a2) optsize { ; BROADWELL-LABEL: test_paddsw: ; BROADWELL: # BB#0: ; BROADWELL-NEXT: paddsw %mm1, %mm0 # sched: [1:0.50] -; BROADWELL-NEXT: paddsw (%rdi), %mm0 # sched: [1:0.50] +; BROADWELL-NEXT: paddsw (%rdi), %mm0 # sched: [6:0.50] ; BROADWELL-NEXT: movd %mm0, %rax # sched: [1:1.00] -; BROADWELL-NEXT: retq # sched: [2:1.00] +; BROADWELL-NEXT: retq # sched: [7:1.00] ; ; SKYLAKE-LABEL: test_paddsw: ; SKYLAKE: # BB#0: @@ -1952,9 +1952,9 @@ define i64 @test_paddusb(x86_mmx %a0, x86_mmx %a1, x86_mmx* %a2) optsize { ; BROADWELL-LABEL: test_paddusb: ; BROADWELL: # BB#0: ; BROADWELL-NEXT: paddusb %mm1, %mm0 # sched: [1:0.50] -; BROADWELL-NEXT: paddusb (%rdi), %mm0 # sched: [1:0.50] +; BROADWELL-NEXT: paddusb (%rdi), %mm0 # sched: [6:0.50] ; BROADWELL-NEXT: movd %mm0, %rax # sched: [1:1.00] -; BROADWELL-NEXT: retq # sched: [2:1.00] +; BROADWELL-NEXT: retq # sched: [7:1.00] ; ; SKYLAKE-LABEL: test_paddusb: ; SKYLAKE: # BB#0: @@ -2030,9 +2030,9 @@ define i64 @test_paddusw(x86_mmx %a0, x86_mmx %a1, x86_mmx* %a2) optsize { ; BROADWELL-LABEL: test_paddusw: ; BROADWELL: # BB#0: ; BROADWELL-NEXT: paddusw %mm1, %mm0 # sched: [1:0.50] -; BROADWELL-NEXT: paddusw (%rdi), %mm0 # sched: [1:0.50] +; BROADWELL-NEXT: paddusw (%rdi), %mm0 # sched: [6:0.50] ; BROADWELL-NEXT: movd %mm0, %rax # sched: [1:1.00] -; BROADWELL-NEXT: retq # sched: [2:1.00] +; BROADWELL-NEXT: retq # sched: [7:1.00] ; ; SKYLAKE-LABEL: test_paddusw: ; SKYLAKE: # BB#0: @@ -2108,9 +2108,9 @@ define i64 @test_paddw(x86_mmx %a0, x86_mmx %a1, x86_mmx* %a2) optsize { ; BROADWELL-LABEL: test_paddw: ; BROADWELL: # BB#0: ; BROADWELL-NEXT: paddw %mm1, %mm0 # sched: [1:0.50] -; BROADWELL-NEXT: paddw (%rdi), %mm0 # sched: [1:0.50] +; BROADWELL-NEXT: paddw (%rdi), %mm0 # sched: [6:0.50] ; BROADWELL-NEXT: movd %mm0, %rax # sched: [1:1.00] -; BROADWELL-NEXT: retq # sched: [2:1.00] +; BROADWELL-NEXT: retq # sched: [7:1.00] ; ; SKYLAKE-LABEL: test_paddw: ; SKYLAKE: # BB#0: @@ -2186,9 +2186,9 @@ define i64 @test_palignr(x86_mmx %a0, x86_mmx %a1, x86_mmx* %a2) optsize { ; BROADWELL-LABEL: test_palignr: ; BROADWELL: # BB#0: ; BROADWELL-NEXT: palignr $1, %mm1, %mm0 # sched: [1:1.00] -; BROADWELL-NEXT: palignr $1, (%rdi), %mm0 # sched: [1:1.00] +; BROADWELL-NEXT: palignr $1, (%rdi), %mm0 # sched: [6:1.00] ; BROADWELL-NEXT: movd %mm0, %rax # sched: [1:1.00] -; BROADWELL-NEXT: retq # sched: [2:1.00] +; BROADWELL-NEXT: retq # sched: [7:1.00] ; ; SKYLAKE-LABEL: test_palignr: ; SKYLAKE: # BB#0: @@ -2264,9 +2264,9 @@ define i64 @test_pand(x86_mmx %a0, x86_mmx %a1, x86_mmx* %a2) optsize { ; BROADWELL-LABEL: test_pand: ; BROADWELL: # BB#0: ; BROADWELL-NEXT: pand %mm1, %mm0 # sched: [1:0.33] -; BROADWELL-NEXT: pand (%rdi), %mm0 # sched: [1:0.50] +; BROADWELL-NEXT: pand (%rdi), %mm0 # sched: [6:0.50] ; BROADWELL-NEXT: movd %mm0, %rax # sched: [1:1.00] -; BROADWELL-NEXT: retq # sched: [2:1.00] +; BROADWELL-NEXT: retq # sched: [7:1.00] ; ; SKYLAKE-LABEL: test_pand: ; SKYLAKE: # BB#0: @@ -2342,9 +2342,9 @@ define i64 @test_pandn(x86_mmx %a0, x86_mmx %a1, x86_mmx* %a2) optsize { ; BROADWELL-LABEL: test_pandn: ; BROADWELL: # BB#0: ; BROADWELL-NEXT: pandn %mm1, %mm0 # sched: [1:0.33] -; BROADWELL-NEXT: pandn (%rdi), %mm0 # sched: [1:0.50] +; BROADWELL-NEXT: pandn (%rdi), %mm0 # sched: [6:0.50] ; BROADWELL-NEXT: movd %mm0, %rax # sched: [1:1.00] -; BROADWELL-NEXT: retq # sched: [2:1.00] +; BROADWELL-NEXT: retq # sched: [7:1.00] ; ; SKYLAKE-LABEL: test_pandn: ; SKYLAKE: # BB#0: @@ -2420,9 +2420,9 @@ define i64 @test_pavgb(x86_mmx %a0, x86_mmx %a1, x86_mmx* %a2) optsize { ; BROADWELL-LABEL: test_pavgb: ; BROADWELL: # BB#0: ; BROADWELL-NEXT: pavgb %mm1, %mm0 # sched: [1:0.50] -; BROADWELL-NEXT: pavgb (%rdi), %mm0 # sched: [1:0.50] +; BROADWELL-NEXT: pavgb (%rdi), %mm0 # sched: [6:0.50] ; BROADWELL-NEXT: movd %mm0, %rax # sched: [1:1.00] -; BROADWELL-NEXT: retq # sched: [2:1.00] +; BROADWELL-NEXT: retq # sched: [7:1.00] ; ; SKYLAKE-LABEL: test_pavgb: ; SKYLAKE: # BB#0: @@ -2498,9 +2498,9 @@ define i64 @test_pavgw(x86_mmx %a0, x86_mmx %a1, x86_mmx* %a2) optsize { ; BROADWELL-LABEL: test_pavgw: ; BROADWELL: # BB#0: ; BROADWELL-NEXT: pavgw %mm1, %mm0 # sched: [1:0.50] -; BROADWELL-NEXT: pavgw (%rdi), %mm0 # sched: [1:0.50] +; BROADWELL-NEXT: pavgw (%rdi), %mm0 # sched: [6:0.50] ; BROADWELL-NEXT: movd %mm0, %rax # sched: [1:1.00] -; BROADWELL-NEXT: retq # sched: [2:1.00] +; BROADWELL-NEXT: retq # sched: [7:1.00] ; ; SKYLAKE-LABEL: test_pavgw: ; SKYLAKE: # BB#0: @@ -2576,9 +2576,9 @@ define i64 @test_pcmpeqb(x86_mmx %a0, x86_mmx %a1, x86_mmx* %a2) optsize { ; BROADWELL-LABEL: test_pcmpeqb: ; BROADWELL: # BB#0: ; BROADWELL-NEXT: pcmpeqb %mm1, %mm0 # sched: [1:0.50] -; BROADWELL-NEXT: pcmpeqb (%rdi), %mm0 # sched: [1:0.50] +; BROADWELL-NEXT: pcmpeqb (%rdi), %mm0 # sched: [6:0.50] ; BROADWELL-NEXT: movd %mm0, %rax # sched: [1:1.00] -; BROADWELL-NEXT: retq # sched: [2:1.00] +; BROADWELL-NEXT: retq # sched: [7:1.00] ; ; SKYLAKE-LABEL: test_pcmpeqb: ; SKYLAKE: # BB#0: @@ -2654,9 +2654,9 @@ define i64 @test_pcmpeqd(x86_mmx %a0, x86_mmx %a1, x86_mmx* %a2) optsize { ; BROADWELL-LABEL: test_pcmpeqd: ; BROADWELL: # BB#0: ; BROADWELL-NEXT: pcmpeqd %mm1, %mm0 # sched: [1:0.50] -; BROADWELL-NEXT: pcmpeqd (%rdi), %mm0 # sched: [1:0.50] +; BROADWELL-NEXT: pcmpeqd (%rdi), %mm0 # sched: [6:0.50] ; BROADWELL-NEXT: movd %mm0, %rax # sched: [1:1.00] -; BROADWELL-NEXT: retq # sched: [2:1.00] +; BROADWELL-NEXT: retq # sched: [7:1.00] ; ; SKYLAKE-LABEL: test_pcmpeqd: ; SKYLAKE: # BB#0: @@ -2732,9 +2732,9 @@ define i64 @test_pcmpeqw(x86_mmx %a0, x86_mmx %a1, x86_mmx* %a2) optsize { ; BROADWELL-LABEL: test_pcmpeqw: ; BROADWELL: # BB#0: ; BROADWELL-NEXT: pcmpeqw %mm1, %mm0 # sched: [1:0.50] -; BROADWELL-NEXT: pcmpeqw (%rdi), %mm0 # sched: [1:0.50] +; BROADWELL-NEXT: pcmpeqw (%rdi), %mm0 # sched: [6:0.50] ; BROADWELL-NEXT: movd %mm0, %rax # sched: [1:1.00] -; BROADWELL-NEXT: retq # sched: [2:1.00] +; BROADWELL-NEXT: retq # sched: [7:1.00] ; ; SKYLAKE-LABEL: test_pcmpeqw: ; SKYLAKE: # BB#0: @@ -2810,9 +2810,9 @@ define i64 @test_pcmpgtb(x86_mmx %a0, x86_mmx %a1, x86_mmx* %a2) optsize { ; BROADWELL-LABEL: test_pcmpgtb: ; BROADWELL: # BB#0: ; BROADWELL-NEXT: pcmpgtb %mm1, %mm0 # sched: [1:0.50] -; BROADWELL-NEXT: pcmpgtb (%rdi), %mm0 # sched: [1:0.50] +; BROADWELL-NEXT: pcmpgtb (%rdi), %mm0 # sched: [6:0.50] ; BROADWELL-NEXT: movd %mm0, %rax # sched: [1:1.00] -; BROADWELL-NEXT: retq # sched: [2:1.00] +; BROADWELL-NEXT: retq # sched: [7:1.00] ; ; SKYLAKE-LABEL: test_pcmpgtb: ; SKYLAKE: # BB#0: @@ -2888,9 +2888,9 @@ define i64 @test_pcmpgtd(x86_mmx %a0, x86_mmx %a1, x86_mmx* %a2) optsize { ; BROADWELL-LABEL: test_pcmpgtd: ; BROADWELL: # BB#0: ; BROADWELL-NEXT: pcmpgtd %mm1, %mm0 # sched: [1:0.50] -; BROADWELL-NEXT: pcmpgtd (%rdi), %mm0 # sched: [1:0.50] +; BROADWELL-NEXT: pcmpgtd (%rdi), %mm0 # sched: [6:0.50] ; BROADWELL-NEXT: movd %mm0, %rax # sched: [1:1.00] -; BROADWELL-NEXT: retq # sched: [2:1.00] +; BROADWELL-NEXT: retq # sched: [7:1.00] ; ; SKYLAKE-LABEL: test_pcmpgtd: ; SKYLAKE: # BB#0: @@ -2966,9 +2966,9 @@ define i64 @test_pcmpgtw(x86_mmx %a0, x86_mmx %a1, x86_mmx* %a2) optsize { ; BROADWELL-LABEL: test_pcmpgtw: ; BROADWELL: # BB#0: ; BROADWELL-NEXT: pcmpgtw %mm1, %mm0 # sched: [1:0.50] -; BROADWELL-NEXT: pcmpgtw (%rdi), %mm0 # sched: [1:0.50] +; BROADWELL-NEXT: pcmpgtw (%rdi), %mm0 # sched: [6:0.50] ; BROADWELL-NEXT: movd %mm0, %rax # sched: [1:1.00] -; BROADWELL-NEXT: retq # sched: [2:1.00] +; BROADWELL-NEXT: retq # sched: [7:1.00] ; ; SKYLAKE-LABEL: test_pcmpgtw: ; SKYLAKE: # BB#0: @@ -3034,7 +3034,7 @@ define i32 @test_pextrw(x86_mmx %a0) optsize { ; BROADWELL-LABEL: test_pextrw: ; BROADWELL: # BB#0: ; BROADWELL-NEXT: pextrw $0, %mm0, %eax # sched: [2:1.00] -; BROADWELL-NEXT: retq # sched: [2:1.00] +; BROADWELL-NEXT: retq # sched: [7:1.00] ; ; SKYLAKE-LABEL: test_pextrw: ; SKYLAKE: # BB#0: @@ -3099,9 +3099,9 @@ define i64 @test_phaddd(x86_mmx %a0, x86_mmx %a1, x86_mmx* %a2) optsize { ; BROADWELL-LABEL: test_phaddd: ; BROADWELL: # BB#0: ; BROADWELL-NEXT: phaddd %mm1, %mm0 # sched: [3:2.00] -; BROADWELL-NEXT: phaddd (%rdi), %mm0 # sched: [3:2.00] +; BROADWELL-NEXT: phaddd (%rdi), %mm0 # sched: [8:2.00] ; BROADWELL-NEXT: movd %mm0, %rax # sched: [1:1.00] -; BROADWELL-NEXT: retq # sched: [2:1.00] +; BROADWELL-NEXT: retq # sched: [7:1.00] ; ; SKYLAKE-LABEL: test_phaddd: ; SKYLAKE: # BB#0: @@ -3177,9 +3177,9 @@ define i64 @test_phaddsw(x86_mmx %a0, x86_mmx %a1, x86_mmx* %a2) optsize { ; BROADWELL-LABEL: test_phaddsw: ; BROADWELL: # BB#0: ; BROADWELL-NEXT: phaddsw %mm1, %mm0 # sched: [3:2.00] -; BROADWELL-NEXT: phaddsw (%rdi), %mm0 # sched: [3:2.00] +; BROADWELL-NEXT: phaddsw (%rdi), %mm0 # sched: [8:2.00] ; BROADWELL-NEXT: movd %mm0, %rax # sched: [1:1.00] -; BROADWELL-NEXT: retq # sched: [2:1.00] +; BROADWELL-NEXT: retq # sched: [7:1.00] ; ; SKYLAKE-LABEL: test_phaddsw: ; SKYLAKE: # BB#0: @@ -3255,9 +3255,9 @@ define i64 @test_phaddw(x86_mmx %a0, x86_mmx %a1, x86_mmx* %a2) optsize { ; BROADWELL-LABEL: test_phaddw: ; BROADWELL: # BB#0: ; BROADWELL-NEXT: phaddw %mm1, %mm0 # sched: [3:2.00] -; BROADWELL-NEXT: phaddw (%rdi), %mm0 # sched: [3:2.00] +; BROADWELL-NEXT: phaddw (%rdi), %mm0 # sched: [8:2.00] ; BROADWELL-NEXT: movd %mm0, %rax # sched: [1:1.00] -; BROADWELL-NEXT: retq # sched: [2:1.00] +; BROADWELL-NEXT: retq # sched: [7:1.00] ; ; SKYLAKE-LABEL: test_phaddw: ; SKYLAKE: # BB#0: @@ -3333,9 +3333,9 @@ define i64 @test_phsubd(x86_mmx %a0, x86_mmx %a1, x86_mmx* %a2) optsize { ; BROADWELL-LABEL: test_phsubd: ; BROADWELL: # BB#0: ; BROADWELL-NEXT: phsubd %mm1, %mm0 # sched: [3:2.00] -; BROADWELL-NEXT: phsubd (%rdi), %mm0 # sched: [3:2.00] +; BROADWELL-NEXT: phsubd (%rdi), %mm0 # sched: [8:2.00] ; BROADWELL-NEXT: movd %mm0, %rax # sched: [1:1.00] -; BROADWELL-NEXT: retq # sched: [2:1.00] +; BROADWELL-NEXT: retq # sched: [7:1.00] ; ; SKYLAKE-LABEL: test_phsubd: ; SKYLAKE: # BB#0: @@ -3411,9 +3411,9 @@ define i64 @test_phsubsw(x86_mmx %a0, x86_mmx %a1, x86_mmx* %a2) optsize { ; BROADWELL-LABEL: test_phsubsw: ; BROADWELL: # BB#0: ; BROADWELL-NEXT: phsubsw %mm1, %mm0 # sched: [3:2.00] -; BROADWELL-NEXT: phsubsw (%rdi), %mm0 # sched: [3:2.00] +; BROADWELL-NEXT: phsubsw (%rdi), %mm0 # sched: [8:2.00] ; BROADWELL-NEXT: movd %mm0, %rax # sched: [1:1.00] -; BROADWELL-NEXT: retq # sched: [2:1.00] +; BROADWELL-NEXT: retq # sched: [7:1.00] ; ; SKYLAKE-LABEL: test_phsubsw: ; SKYLAKE: # BB#0: @@ -3489,9 +3489,9 @@ define i64 @test_phsubw(x86_mmx %a0, x86_mmx %a1, x86_mmx* %a2) optsize { ; BROADWELL-LABEL: test_phsubw: ; BROADWELL: # BB#0: ; BROADWELL-NEXT: phsubw %mm1, %mm0 # sched: [3:2.00] -; BROADWELL-NEXT: phsubw (%rdi), %mm0 # sched: [3:2.00] +; BROADWELL-NEXT: phsubw (%rdi), %mm0 # sched: [8:2.00] ; BROADWELL-NEXT: movd %mm0, %rax # sched: [1:1.00] -; BROADWELL-NEXT: retq # sched: [2:1.00] +; BROADWELL-NEXT: retq # sched: [7:1.00] ; ; SKYLAKE-LABEL: test_phsubw: ; SKYLAKE: # BB#0: @@ -3572,10 +3572,10 @@ define i64 @test_pinsrw(x86_mmx %a0, i32 %a1, i16* %a2) optsize { ; BROADWELL-LABEL: test_pinsrw: ; BROADWELL: # BB#0: ; BROADWELL-NEXT: pinsrw $0, %edi, %mm0 # sched: [2:2.00] -; BROADWELL-NEXT: movswl (%rsi), %eax # sched: [4:0.50] +; BROADWELL-NEXT: movswl (%rsi), %eax # sched: [5:0.50] ; BROADWELL-NEXT: pinsrw $1, %eax, %mm0 # sched: [2:2.00] ; BROADWELL-NEXT: movd %mm0, %rax # sched: [1:1.00] -; BROADWELL-NEXT: retq # sched: [2:1.00] +; BROADWELL-NEXT: retq # sched: [7:1.00] ; ; SKYLAKE-LABEL: test_pinsrw: ; SKYLAKE: # BB#0: @@ -3656,9 +3656,9 @@ define i64 @test_pmaddwd(x86_mmx %a0, x86_mmx %a1, x86_mmx* %a2) optsize { ; BROADWELL-LABEL: test_pmaddwd: ; BROADWELL: # BB#0: ; BROADWELL-NEXT: pmaddwd %mm1, %mm0 # sched: [5:1.00] -; BROADWELL-NEXT: pmaddwd (%rdi), %mm0 # sched: [5:1.00] +; BROADWELL-NEXT: pmaddwd (%rdi), %mm0 # sched: [10:1.00] ; BROADWELL-NEXT: movd %mm0, %rax # sched: [1:1.00] -; BROADWELL-NEXT: retq # sched: [2:1.00] +; BROADWELL-NEXT: retq # sched: [7:1.00] ; ; SKYLAKE-LABEL: test_pmaddwd: ; SKYLAKE: # BB#0: @@ -3734,9 +3734,9 @@ define i64 @test_pmaddubsw(x86_mmx %a0, x86_mmx %a1, x86_mmx* %a2) optsize { ; BROADWELL-LABEL: test_pmaddubsw: ; BROADWELL: # BB#0: ; BROADWELL-NEXT: pmaddubsw %mm1, %mm0 # sched: [5:1.00] -; BROADWELL-NEXT: pmaddubsw (%rdi), %mm0 # sched: [5:1.00] +; BROADWELL-NEXT: pmaddubsw (%rdi), %mm0 # sched: [10:1.00] ; BROADWELL-NEXT: movd %mm0, %rax # sched: [1:1.00] -; BROADWELL-NEXT: retq # sched: [2:1.00] +; BROADWELL-NEXT: retq # sched: [7:1.00] ; ; SKYLAKE-LABEL: test_pmaddubsw: ; SKYLAKE: # BB#0: @@ -3812,9 +3812,9 @@ define i64 @test_pmaxsw(x86_mmx %a0, x86_mmx %a1, x86_mmx* %a2) optsize { ; BROADWELL-LABEL: test_pmaxsw: ; BROADWELL: # BB#0: ; BROADWELL-NEXT: pmaxsw %mm1, %mm0 # sched: [1:0.50] -; BROADWELL-NEXT: pmaxsw (%rdi), %mm0 # sched: [1:0.50] +; BROADWELL-NEXT: pmaxsw (%rdi), %mm0 # sched: [6:0.50] ; BROADWELL-NEXT: movd %mm0, %rax # sched: [1:1.00] -; BROADWELL-NEXT: retq # sched: [2:1.00] +; BROADWELL-NEXT: retq # sched: [7:1.00] ; ; SKYLAKE-LABEL: test_pmaxsw: ; SKYLAKE: # BB#0: @@ -3890,9 +3890,9 @@ define i64 @test_pmaxub(x86_mmx %a0, x86_mmx %a1, x86_mmx* %a2) optsize { ; BROADWELL-LABEL: test_pmaxub: ; BROADWELL: # BB#0: ; BROADWELL-NEXT: pmaxub %mm1, %mm0 # sched: [1:0.50] -; BROADWELL-NEXT: pmaxub (%rdi), %mm0 # sched: [1:0.50] +; BROADWELL-NEXT: pmaxub (%rdi), %mm0 # sched: [6:0.50] ; BROADWELL-NEXT: movd %mm0, %rax # sched: [1:1.00] -; BROADWELL-NEXT: retq # sched: [2:1.00] +; BROADWELL-NEXT: retq # sched: [7:1.00] ; ; SKYLAKE-LABEL: test_pmaxub: ; SKYLAKE: # BB#0: @@ -3968,9 +3968,9 @@ define i64 @test_pminsw(x86_mmx %a0, x86_mmx %a1, x86_mmx* %a2) optsize { ; BROADWELL-LABEL: test_pminsw: ; BROADWELL: # BB#0: ; BROADWELL-NEXT: pminsw %mm1, %mm0 # sched: [1:0.50] -; BROADWELL-NEXT: pminsw (%rdi), %mm0 # sched: [1:0.50] +; BROADWELL-NEXT: pminsw (%rdi), %mm0 # sched: [6:0.50] ; BROADWELL-NEXT: movd %mm0, %rax # sched: [1:1.00] -; BROADWELL-NEXT: retq # sched: [2:1.00] +; BROADWELL-NEXT: retq # sched: [7:1.00] ; ; SKYLAKE-LABEL: test_pminsw: ; SKYLAKE: # BB#0: @@ -4046,9 +4046,9 @@ define i64 @test_pminub(x86_mmx %a0, x86_mmx %a1, x86_mmx* %a2) optsize { ; BROADWELL-LABEL: test_pminub: ; BROADWELL: # BB#0: ; BROADWELL-NEXT: pminub %mm1, %mm0 # sched: [1:0.50] -; BROADWELL-NEXT: pminub (%rdi), %mm0 # sched: [1:0.50] +; BROADWELL-NEXT: pminub (%rdi), %mm0 # sched: [6:0.50] ; BROADWELL-NEXT: movd %mm0, %rax # sched: [1:1.00] -; BROADWELL-NEXT: retq # sched: [2:1.00] +; BROADWELL-NEXT: retq # sched: [7:1.00] ; ; SKYLAKE-LABEL: test_pminub: ; SKYLAKE: # BB#0: @@ -4113,8 +4113,8 @@ define i32 @test_pmovmskb(x86_mmx %a0) optsize { ; ; BROADWELL-LABEL: test_pmovmskb: ; BROADWELL: # BB#0: -; BROADWELL-NEXT: pmovmskb %mm0, %eax # sched: [3:1.00] -; BROADWELL-NEXT: retq # sched: [2:1.00] +; BROADWELL-NEXT: pmovmskb %mm0, %eax # sched: [1:1.00] +; BROADWELL-NEXT: retq # sched: [7:1.00] ; ; SKYLAKE-LABEL: test_pmovmskb: ; SKYLAKE: # BB#0: @@ -4179,9 +4179,9 @@ define i64 @test_pmulhrsw(x86_mmx %a0, x86_mmx %a1, x86_mmx* %a2) optsize { ; BROADWELL-LABEL: test_pmulhrsw: ; BROADWELL: # BB#0: ; BROADWELL-NEXT: pmulhrsw %mm1, %mm0 # sched: [5:1.00] -; BROADWELL-NEXT: pmulhrsw (%rdi), %mm0 # sched: [5:1.00] +; BROADWELL-NEXT: pmulhrsw (%rdi), %mm0 # sched: [10:1.00] ; BROADWELL-NEXT: movd %mm0, %rax # sched: [1:1.00] -; BROADWELL-NEXT: retq # sched: [2:1.00] +; BROADWELL-NEXT: retq # sched: [7:1.00] ; ; SKYLAKE-LABEL: test_pmulhrsw: ; SKYLAKE: # BB#0: @@ -4257,9 +4257,9 @@ define i64 @test_pmulhw(x86_mmx %a0, x86_mmx %a1, x86_mmx* %a2) optsize { ; BROADWELL-LABEL: test_pmulhw: ; BROADWELL: # BB#0: ; BROADWELL-NEXT: pmulhw %mm1, %mm0 # sched: [5:1.00] -; BROADWELL-NEXT: pmulhw (%rdi), %mm0 # sched: [5:1.00] +; BROADWELL-NEXT: pmulhw (%rdi), %mm0 # sched: [10:1.00] ; BROADWELL-NEXT: movd %mm0, %rax # sched: [1:1.00] -; BROADWELL-NEXT: retq # sched: [2:1.00] +; BROADWELL-NEXT: retq # sched: [7:1.00] ; ; SKYLAKE-LABEL: test_pmulhw: ; SKYLAKE: # BB#0: @@ -4335,9 +4335,9 @@ define i64 @test_pmulhuw(x86_mmx %a0, x86_mmx %a1, x86_mmx* %a2) optsize { ; BROADWELL-LABEL: test_pmulhuw: ; BROADWELL: # BB#0: ; BROADWELL-NEXT: pmulhuw %mm1, %mm0 # sched: [5:1.00] -; BROADWELL-NEXT: pmulhuw (%rdi), %mm0 # sched: [5:1.00] +; BROADWELL-NEXT: pmulhuw (%rdi), %mm0 # sched: [10:1.00] ; BROADWELL-NEXT: movd %mm0, %rax # sched: [1:1.00] -; BROADWELL-NEXT: retq # sched: [2:1.00] +; BROADWELL-NEXT: retq # sched: [7:1.00] ; ; SKYLAKE-LABEL: test_pmulhuw: ; SKYLAKE: # BB#0: @@ -4413,9 +4413,9 @@ define i64 @test_pmullw(x86_mmx %a0, x86_mmx %a1, x86_mmx* %a2) optsize { ; BROADWELL-LABEL: test_pmullw: ; BROADWELL: # BB#0: ; BROADWELL-NEXT: pmullw %mm1, %mm0 # sched: [5:1.00] -; BROADWELL-NEXT: pmullw (%rdi), %mm0 # sched: [5:1.00] +; BROADWELL-NEXT: pmullw (%rdi), %mm0 # sched: [10:1.00] ; BROADWELL-NEXT: movd %mm0, %rax # sched: [1:1.00] -; BROADWELL-NEXT: retq # sched: [2:1.00] +; BROADWELL-NEXT: retq # sched: [7:1.00] ; ; SKYLAKE-LABEL: test_pmullw: ; SKYLAKE: # BB#0: @@ -4491,9 +4491,9 @@ define i64 @test_pmuludq(x86_mmx %a0, x86_mmx %a1, x86_mmx* %a2) optsize { ; BROADWELL-LABEL: test_pmuludq: ; BROADWELL: # BB#0: ; BROADWELL-NEXT: pmuludq %mm1, %mm0 # sched: [5:1.00] -; BROADWELL-NEXT: pmuludq (%rdi), %mm0 # sched: [5:1.00] +; BROADWELL-NEXT: pmuludq (%rdi), %mm0 # sched: [10:1.00] ; BROADWELL-NEXT: movd %mm0, %rax # sched: [1:1.00] -; BROADWELL-NEXT: retq # sched: [2:1.00] +; BROADWELL-NEXT: retq # sched: [7:1.00] ; ; SKYLAKE-LABEL: test_pmuludq: ; SKYLAKE: # BB#0: @@ -4569,9 +4569,9 @@ define i64 @test_por(x86_mmx %a0, x86_mmx %a1, x86_mmx* %a2) optsize { ; BROADWELL-LABEL: test_por: ; BROADWELL: # BB#0: ; BROADWELL-NEXT: por %mm1, %mm0 # sched: [1:0.33] -; BROADWELL-NEXT: por (%rdi), %mm0 # sched: [1:0.50] +; BROADWELL-NEXT: por (%rdi), %mm0 # sched: [6:0.50] ; BROADWELL-NEXT: movd %mm0, %rax # sched: [1:1.00] -; BROADWELL-NEXT: retq # sched: [2:1.00] +; BROADWELL-NEXT: retq # sched: [7:1.00] ; ; SKYLAKE-LABEL: test_por: ; SKYLAKE: # BB#0: @@ -4647,9 +4647,9 @@ define i64 @test_psadbw(x86_mmx %a0, x86_mmx %a1, x86_mmx* %a2) optsize { ; BROADWELL-LABEL: test_psadbw: ; BROADWELL: # BB#0: ; BROADWELL-NEXT: psadbw %mm1, %mm0 # sched: [5:1.00] -; BROADWELL-NEXT: psadbw (%rdi), %mm0 # sched: [5:1.00] +; BROADWELL-NEXT: psadbw (%rdi), %mm0 # sched: [10:1.00] ; BROADWELL-NEXT: movd %mm0, %rax # sched: [1:1.00] -; BROADWELL-NEXT: retq # sched: [2:1.00] +; BROADWELL-NEXT: retq # sched: [7:1.00] ; ; SKYLAKE-LABEL: test_psadbw: ; SKYLAKE: # BB#0: @@ -4725,9 +4725,9 @@ define i64 @test_pshufb(x86_mmx %a0, x86_mmx %a1, x86_mmx *%a2) optsize { ; BROADWELL-LABEL: test_pshufb: ; BROADWELL: # BB#0: ; BROADWELL-NEXT: pshufb %mm1, %mm0 # sched: [1:1.00] -; BROADWELL-NEXT: pshufb (%rdi), %mm0 # sched: [1:1.00] +; BROADWELL-NEXT: pshufb (%rdi), %mm0 # sched: [6:1.00] ; BROADWELL-NEXT: movd %mm0, %rax # sched: [1:1.00] -; BROADWELL-NEXT: retq # sched: [2:1.00] +; BROADWELL-NEXT: retq # sched: [7:1.00] ; ; SKYLAKE-LABEL: test_pshufb: ; SKYLAKE: # BB#0: @@ -4802,10 +4802,10 @@ define i64 @test_pshufw(x86_mmx *%a0) optsize { ; ; BROADWELL-LABEL: test_pshufw: ; BROADWELL: # BB#0: -; BROADWELL-NEXT: pshufw $0, (%rdi), %mm0 # mm0 = mem[0,0,0,0] sched: [1:1.00] +; BROADWELL-NEXT: pshufw $0, (%rdi), %mm0 # mm0 = mem[0,0,0,0] sched: [6:1.00] ; BROADWELL-NEXT: pshufw $0, %mm0, %mm0 # mm0 = mm0[0,0,0,0] sched: [1:1.00] ; BROADWELL-NEXT: movd %mm0, %rax # sched: [1:1.00] -; BROADWELL-NEXT: retq # sched: [2:1.00] +; BROADWELL-NEXT: retq # sched: [7:1.00] ; ; SKYLAKE-LABEL: test_pshufw: ; SKYLAKE: # BB#0: @@ -4881,9 +4881,9 @@ define i64 @test_psignb(x86_mmx %a0, x86_mmx %a1, x86_mmx* %a2) optsize { ; BROADWELL-LABEL: test_psignb: ; BROADWELL: # BB#0: ; BROADWELL-NEXT: psignb %mm1, %mm0 # sched: [1:0.50] -; BROADWELL-NEXT: psignb (%rdi), %mm0 # sched: [1:0.50] +; BROADWELL-NEXT: psignb (%rdi), %mm0 # sched: [6:0.50] ; BROADWELL-NEXT: movd %mm0, %rax # sched: [1:1.00] -; BROADWELL-NEXT: retq # sched: [2:1.00] +; BROADWELL-NEXT: retq # sched: [7:1.00] ; ; SKYLAKE-LABEL: test_psignb: ; SKYLAKE: # BB#0: @@ -4959,9 +4959,9 @@ define i64 @test_psignd(x86_mmx %a0, x86_mmx %a1, x86_mmx* %a2) optsize { ; BROADWELL-LABEL: test_psignd: ; BROADWELL: # BB#0: ; BROADWELL-NEXT: psignd %mm1, %mm0 # sched: [1:0.50] -; BROADWELL-NEXT: psignd (%rdi), %mm0 # sched: [1:0.50] +; BROADWELL-NEXT: psignd (%rdi), %mm0 # sched: [6:0.50] ; BROADWELL-NEXT: movd %mm0, %rax # sched: [1:1.00] -; BROADWELL-NEXT: retq # sched: [2:1.00] +; BROADWELL-NEXT: retq # sched: [7:1.00] ; ; SKYLAKE-LABEL: test_psignd: ; SKYLAKE: # BB#0: @@ -5037,9 +5037,9 @@ define i64 @test_psignw(x86_mmx %a0, x86_mmx %a1, x86_mmx* %a2) optsize { ; BROADWELL-LABEL: test_psignw: ; BROADWELL: # BB#0: ; BROADWELL-NEXT: psignw %mm1, %mm0 # sched: [1:0.50] -; BROADWELL-NEXT: psignw (%rdi), %mm0 # sched: [1:0.50] +; BROADWELL-NEXT: psignw (%rdi), %mm0 # sched: [6:0.50] ; BROADWELL-NEXT: movd %mm0, %rax # sched: [1:1.00] -; BROADWELL-NEXT: retq # sched: [2:1.00] +; BROADWELL-NEXT: retq # sched: [7:1.00] ; ; SKYLAKE-LABEL: test_psignw: ; SKYLAKE: # BB#0: @@ -5120,10 +5120,10 @@ define i64 @test_pslld(x86_mmx %a0, x86_mmx %a1, x86_mmx* %a2) optsize { ; BROADWELL-LABEL: test_pslld: ; BROADWELL: # BB#0: ; BROADWELL-NEXT: pslld %mm1, %mm0 # sched: [1:1.00] -; BROADWELL-NEXT: pslld (%rdi), %mm0 # sched: [1:1.00] +; BROADWELL-NEXT: pslld (%rdi), %mm0 # sched: [6:1.00] ; BROADWELL-NEXT: pslld $7, %mm0 # sched: [1:1.00] ; BROADWELL-NEXT: movd %mm0, %rax # sched: [1:1.00] -; BROADWELL-NEXT: retq # sched: [2:1.00] +; BROADWELL-NEXT: retq # sched: [7:1.00] ; ; SKYLAKE-LABEL: test_pslld: ; SKYLAKE: # BB#0: @@ -5210,10 +5210,10 @@ define i64 @test_psllq(x86_mmx %a0, x86_mmx %a1, x86_mmx* %a2) optsize { ; BROADWELL-LABEL: test_psllq: ; BROADWELL: # BB#0: ; BROADWELL-NEXT: psllq %mm1, %mm0 # sched: [1:1.00] -; BROADWELL-NEXT: psllq (%rdi), %mm0 # sched: [1:1.00] +; BROADWELL-NEXT: psllq (%rdi), %mm0 # sched: [6:1.00] ; BROADWELL-NEXT: psllq $7, %mm0 # sched: [1:1.00] ; BROADWELL-NEXT: movd %mm0, %rax # sched: [1:1.00] -; BROADWELL-NEXT: retq # sched: [2:1.00] +; BROADWELL-NEXT: retq # sched: [7:1.00] ; ; SKYLAKE-LABEL: test_psllq: ; SKYLAKE: # BB#0: @@ -5300,10 +5300,10 @@ define i64 @test_psllw(x86_mmx %a0, x86_mmx %a1, x86_mmx* %a2) optsize { ; BROADWELL-LABEL: test_psllw: ; BROADWELL: # BB#0: ; BROADWELL-NEXT: psllw %mm1, %mm0 # sched: [1:1.00] -; BROADWELL-NEXT: psllw (%rdi), %mm0 # sched: [1:1.00] +; BROADWELL-NEXT: psllw (%rdi), %mm0 # sched: [6:1.00] ; BROADWELL-NEXT: psllw $7, %mm0 # sched: [1:1.00] ; BROADWELL-NEXT: movd %mm0, %rax # sched: [1:1.00] -; BROADWELL-NEXT: retq # sched: [2:1.00] +; BROADWELL-NEXT: retq # sched: [7:1.00] ; ; SKYLAKE-LABEL: test_psllw: ; SKYLAKE: # BB#0: @@ -5390,10 +5390,10 @@ define i64 @test_psrad(x86_mmx %a0, x86_mmx %a1, x86_mmx* %a2) optsize { ; BROADWELL-LABEL: test_psrad: ; BROADWELL: # BB#0: ; BROADWELL-NEXT: psrad %mm1, %mm0 # sched: [1:1.00] -; BROADWELL-NEXT: psrad (%rdi), %mm0 # sched: [1:1.00] +; BROADWELL-NEXT: psrad (%rdi), %mm0 # sched: [6:1.00] ; BROADWELL-NEXT: psrad $7, %mm0 # sched: [1:1.00] ; BROADWELL-NEXT: movd %mm0, %rax # sched: [1:1.00] -; BROADWELL-NEXT: retq # sched: [2:1.00] +; BROADWELL-NEXT: retq # sched: [7:1.00] ; ; SKYLAKE-LABEL: test_psrad: ; SKYLAKE: # BB#0: @@ -5480,10 +5480,10 @@ define i64 @test_psraw(x86_mmx %a0, x86_mmx %a1, x86_mmx* %a2) optsize { ; BROADWELL-LABEL: test_psraw: ; BROADWELL: # BB#0: ; BROADWELL-NEXT: psraw %mm1, %mm0 # sched: [1:1.00] -; BROADWELL-NEXT: psraw (%rdi), %mm0 # sched: [1:1.00] +; BROADWELL-NEXT: psraw (%rdi), %mm0 # sched: [6:1.00] ; BROADWELL-NEXT: psraw $7, %mm0 # sched: [1:1.00] ; BROADWELL-NEXT: movd %mm0, %rax # sched: [1:1.00] -; BROADWELL-NEXT: retq # sched: [2:1.00] +; BROADWELL-NEXT: retq # sched: [7:1.00] ; ; SKYLAKE-LABEL: test_psraw: ; SKYLAKE: # BB#0: @@ -5570,10 +5570,10 @@ define i64 @test_psrld(x86_mmx %a0, x86_mmx %a1, x86_mmx* %a2) optsize { ; BROADWELL-LABEL: test_psrld: ; BROADWELL: # BB#0: ; BROADWELL-NEXT: psrld %mm1, %mm0 # sched: [1:1.00] -; BROADWELL-NEXT: psrld (%rdi), %mm0 # sched: [1:1.00] +; BROADWELL-NEXT: psrld (%rdi), %mm0 # sched: [6:1.00] ; BROADWELL-NEXT: psrld $7, %mm0 # sched: [1:1.00] ; BROADWELL-NEXT: movd %mm0, %rax # sched: [1:1.00] -; BROADWELL-NEXT: retq # sched: [2:1.00] +; BROADWELL-NEXT: retq # sched: [7:1.00] ; ; SKYLAKE-LABEL: test_psrld: ; SKYLAKE: # BB#0: @@ -5660,10 +5660,10 @@ define i64 @test_psrlq(x86_mmx %a0, x86_mmx %a1, x86_mmx* %a2) optsize { ; BROADWELL-LABEL: test_psrlq: ; BROADWELL: # BB#0: ; BROADWELL-NEXT: psrlq %mm1, %mm0 # sched: [1:1.00] -; BROADWELL-NEXT: psrlq (%rdi), %mm0 # sched: [1:1.00] +; BROADWELL-NEXT: psrlq (%rdi), %mm0 # sched: [6:1.00] ; BROADWELL-NEXT: psrlq $7, %mm0 # sched: [1:1.00] ; BROADWELL-NEXT: movd %mm0, %rax # sched: [1:1.00] -; BROADWELL-NEXT: retq # sched: [2:1.00] +; BROADWELL-NEXT: retq # sched: [7:1.00] ; ; SKYLAKE-LABEL: test_psrlq: ; SKYLAKE: # BB#0: @@ -5750,10 +5750,10 @@ define i64 @test_psrlw(x86_mmx %a0, x86_mmx %a1, x86_mmx* %a2) optsize { ; BROADWELL-LABEL: test_psrlw: ; BROADWELL: # BB#0: ; BROADWELL-NEXT: psrlw %mm1, %mm0 # sched: [1:1.00] -; BROADWELL-NEXT: psrlw (%rdi), %mm0 # sched: [1:1.00] +; BROADWELL-NEXT: psrlw (%rdi), %mm0 # sched: [6:1.00] ; BROADWELL-NEXT: psrlw $7, %mm0 # sched: [1:1.00] ; BROADWELL-NEXT: movd %mm0, %rax # sched: [1:1.00] -; BROADWELL-NEXT: retq # sched: [2:1.00] +; BROADWELL-NEXT: retq # sched: [7:1.00] ; ; SKYLAKE-LABEL: test_psrlw: ; SKYLAKE: # BB#0: @@ -5835,9 +5835,9 @@ define i64 @test_psubb(x86_mmx %a0, x86_mmx %a1, x86_mmx* %a2) optsize { ; BROADWELL-LABEL: test_psubb: ; BROADWELL: # BB#0: ; BROADWELL-NEXT: psubb %mm1, %mm0 # sched: [1:0.50] -; BROADWELL-NEXT: psubb (%rdi), %mm0 # sched: [1:0.50] +; BROADWELL-NEXT: psubb (%rdi), %mm0 # sched: [6:0.50] ; BROADWELL-NEXT: movd %mm0, %rax # sched: [1:1.00] -; BROADWELL-NEXT: retq # sched: [2:1.00] +; BROADWELL-NEXT: retq # sched: [7:1.00] ; ; SKYLAKE-LABEL: test_psubb: ; SKYLAKE: # BB#0: @@ -5913,9 +5913,9 @@ define i64 @test_psubd(x86_mmx %a0, x86_mmx %a1, x86_mmx* %a2) optsize { ; BROADWELL-LABEL: test_psubd: ; BROADWELL: # BB#0: ; BROADWELL-NEXT: psubd %mm1, %mm0 # sched: [1:0.50] -; BROADWELL-NEXT: psubd (%rdi), %mm0 # sched: [1:0.50] +; BROADWELL-NEXT: psubd (%rdi), %mm0 # sched: [6:0.50] ; BROADWELL-NEXT: movd %mm0, %rax # sched: [1:1.00] -; BROADWELL-NEXT: retq # sched: [2:1.00] +; BROADWELL-NEXT: retq # sched: [7:1.00] ; ; SKYLAKE-LABEL: test_psubd: ; SKYLAKE: # BB#0: @@ -5991,9 +5991,9 @@ define i64 @test_psubq(x86_mmx %a0, x86_mmx %a1, x86_mmx* %a2) optsize { ; BROADWELL-LABEL: test_psubq: ; BROADWELL: # BB#0: ; BROADWELL-NEXT: psubq %mm1, %mm0 # sched: [1:0.50] -; BROADWELL-NEXT: psubq (%rdi), %mm0 # sched: [1:0.50] +; BROADWELL-NEXT: psubq (%rdi), %mm0 # sched: [6:0.50] ; BROADWELL-NEXT: movd %mm0, %rax # sched: [1:1.00] -; BROADWELL-NEXT: retq # sched: [2:1.00] +; BROADWELL-NEXT: retq # sched: [7:1.00] ; ; SKYLAKE-LABEL: test_psubq: ; SKYLAKE: # BB#0: @@ -6069,9 +6069,9 @@ define i64 @test_psubsb(x86_mmx %a0, x86_mmx %a1, x86_mmx* %a2) optsize { ; BROADWELL-LABEL: test_psubsb: ; BROADWELL: # BB#0: ; BROADWELL-NEXT: psubsb %mm1, %mm0 # sched: [1:0.50] -; BROADWELL-NEXT: psubsb (%rdi), %mm0 # sched: [1:0.50] +; BROADWELL-NEXT: psubsb (%rdi), %mm0 # sched: [6:0.50] ; BROADWELL-NEXT: movd %mm0, %rax # sched: [1:1.00] -; BROADWELL-NEXT: retq # sched: [2:1.00] +; BROADWELL-NEXT: retq # sched: [7:1.00] ; ; SKYLAKE-LABEL: test_psubsb: ; SKYLAKE: # BB#0: @@ -6147,9 +6147,9 @@ define i64 @test_psubsw(x86_mmx %a0, x86_mmx %a1, x86_mmx* %a2) optsize { ; BROADWELL-LABEL: test_psubsw: ; BROADWELL: # BB#0: ; BROADWELL-NEXT: psubsw %mm1, %mm0 # sched: [1:0.50] -; BROADWELL-NEXT: psubsw (%rdi), %mm0 # sched: [1:0.50] +; BROADWELL-NEXT: psubsw (%rdi), %mm0 # sched: [6:0.50] ; BROADWELL-NEXT: movd %mm0, %rax # sched: [1:1.00] -; BROADWELL-NEXT: retq # sched: [2:1.00] +; BROADWELL-NEXT: retq # sched: [7:1.00] ; ; SKYLAKE-LABEL: test_psubsw: ; SKYLAKE: # BB#0: @@ -6225,9 +6225,9 @@ define i64 @test_psubusb(x86_mmx %a0, x86_mmx %a1, x86_mmx* %a2) optsize { ; BROADWELL-LABEL: test_psubusb: ; BROADWELL: # BB#0: ; BROADWELL-NEXT: psubusb %mm1, %mm0 # sched: [1:0.50] -; BROADWELL-NEXT: psubusb (%rdi), %mm0 # sched: [1:0.50] +; BROADWELL-NEXT: psubusb (%rdi), %mm0 # sched: [6:0.50] ; BROADWELL-NEXT: movd %mm0, %rax # sched: [1:1.00] -; BROADWELL-NEXT: retq # sched: [2:1.00] +; BROADWELL-NEXT: retq # sched: [7:1.00] ; ; SKYLAKE-LABEL: test_psubusb: ; SKYLAKE: # BB#0: @@ -6303,9 +6303,9 @@ define i64 @test_psubusw(x86_mmx %a0, x86_mmx %a1, x86_mmx* %a2) optsize { ; BROADWELL-LABEL: test_psubusw: ; BROADWELL: # BB#0: ; BROADWELL-NEXT: psubusw %mm1, %mm0 # sched: [1:0.50] -; BROADWELL-NEXT: psubusw (%rdi), %mm0 # sched: [1:0.50] +; BROADWELL-NEXT: psubusw (%rdi), %mm0 # sched: [6:0.50] ; BROADWELL-NEXT: movd %mm0, %rax # sched: [1:1.00] -; BROADWELL-NEXT: retq # sched: [2:1.00] +; BROADWELL-NEXT: retq # sched: [7:1.00] ; ; SKYLAKE-LABEL: test_psubusw: ; SKYLAKE: # BB#0: @@ -6381,9 +6381,9 @@ define i64 @test_psubw(x86_mmx %a0, x86_mmx %a1, x86_mmx* %a2) optsize { ; BROADWELL-LABEL: test_psubw: ; BROADWELL: # BB#0: ; BROADWELL-NEXT: psubw %mm1, %mm0 # sched: [1:0.50] -; BROADWELL-NEXT: psubw (%rdi), %mm0 # sched: [1:0.50] +; BROADWELL-NEXT: psubw (%rdi), %mm0 # sched: [6:0.50] ; BROADWELL-NEXT: movd %mm0, %rax # sched: [1:1.00] -; BROADWELL-NEXT: retq # sched: [2:1.00] +; BROADWELL-NEXT: retq # sched: [7:1.00] ; ; SKYLAKE-LABEL: test_psubw: ; SKYLAKE: # BB#0: @@ -6459,9 +6459,9 @@ define i64 @test_punpckhbw(x86_mmx %a0, x86_mmx %a1, x86_mmx* %a2) optsize { ; BROADWELL-LABEL: test_punpckhbw: ; BROADWELL: # BB#0: ; BROADWELL-NEXT: punpckhbw %mm1, %mm0 # mm0 = mm0[4],mm1[4],mm0[5],mm1[5],mm0[6],mm1[6],mm0[7],mm1[7] sched: [1:1.00] -; BROADWELL-NEXT: punpckhbw (%rdi), %mm0 # mm0 = mm0[4],mem[4],mm0[5],mem[5],mm0[6],mem[6],mm0[7],mem[7] sched: [1:1.00] +; BROADWELL-NEXT: punpckhbw (%rdi), %mm0 # mm0 = mm0[4],mem[4],mm0[5],mem[5],mm0[6],mem[6],mm0[7],mem[7] sched: [6:1.00] ; BROADWELL-NEXT: movd %mm0, %rax # sched: [1:1.00] -; BROADWELL-NEXT: retq # sched: [2:1.00] +; BROADWELL-NEXT: retq # sched: [7:1.00] ; ; SKYLAKE-LABEL: test_punpckhbw: ; SKYLAKE: # BB#0: @@ -6537,9 +6537,9 @@ define i64 @test_punpckhdq(x86_mmx %a0, x86_mmx %a1, x86_mmx* %a2) optsize { ; BROADWELL-LABEL: test_punpckhdq: ; BROADWELL: # BB#0: ; BROADWELL-NEXT: punpckhdq %mm1, %mm0 # mm0 = mm0[1],mm1[1] sched: [1:1.00] -; BROADWELL-NEXT: punpckhdq (%rdi), %mm0 # mm0 = mm0[1],mem[1] sched: [1:1.00] +; BROADWELL-NEXT: punpckhdq (%rdi), %mm0 # mm0 = mm0[1],mem[1] sched: [6:1.00] ; BROADWELL-NEXT: movd %mm0, %rax # sched: [1:1.00] -; BROADWELL-NEXT: retq # sched: [2:1.00] +; BROADWELL-NEXT: retq # sched: [7:1.00] ; ; SKYLAKE-LABEL: test_punpckhdq: ; SKYLAKE: # BB#0: @@ -6615,9 +6615,9 @@ define i64 @test_punpckhwd(x86_mmx %a0, x86_mmx %a1, x86_mmx* %a2) optsize { ; BROADWELL-LABEL: test_punpckhwd: ; BROADWELL: # BB#0: ; BROADWELL-NEXT: punpckhwd %mm1, %mm0 # mm0 = mm0[2],mm1[2],mm0[3],mm1[3] sched: [1:1.00] -; BROADWELL-NEXT: punpckhwd (%rdi), %mm0 # mm0 = mm0[2],mem[2],mm0[3],mem[3] sched: [1:1.00] +; BROADWELL-NEXT: punpckhwd (%rdi), %mm0 # mm0 = mm0[2],mem[2],mm0[3],mem[3] sched: [6:1.00] ; BROADWELL-NEXT: movd %mm0, %rax # sched: [1:1.00] -; BROADWELL-NEXT: retq # sched: [2:1.00] +; BROADWELL-NEXT: retq # sched: [7:1.00] ; ; SKYLAKE-LABEL: test_punpckhwd: ; SKYLAKE: # BB#0: @@ -6693,9 +6693,9 @@ define i64 @test_punpcklbw(x86_mmx %a0, x86_mmx %a1, x86_mmx* %a2) optsize { ; BROADWELL-LABEL: test_punpcklbw: ; BROADWELL: # BB#0: ; BROADWELL-NEXT: punpcklbw %mm1, %mm0 # mm0 = mm0[0],mm1[0],mm0[1],mm1[1],mm0[2],mm1[2],mm0[3],mm1[3] sched: [1:1.00] -; BROADWELL-NEXT: punpcklbw (%rdi), %mm0 # mm0 = mm0[0],mem[0],mm0[1],mem[1],mm0[2],mem[2],mm0[3],mem[3] sched: [1:1.00] +; BROADWELL-NEXT: punpcklbw (%rdi), %mm0 # mm0 = mm0[0],mem[0],mm0[1],mem[1],mm0[2],mem[2],mm0[3],mem[3] sched: [6:1.00] ; BROADWELL-NEXT: movd %mm0, %rax # sched: [1:1.00] -; BROADWELL-NEXT: retq # sched: [2:1.00] +; BROADWELL-NEXT: retq # sched: [7:1.00] ; ; SKYLAKE-LABEL: test_punpcklbw: ; SKYLAKE: # BB#0: @@ -6771,9 +6771,9 @@ define i64 @test_punpckldq(x86_mmx %a0, x86_mmx %a1, x86_mmx* %a2) optsize { ; BROADWELL-LABEL: test_punpckldq: ; BROADWELL: # BB#0: ; BROADWELL-NEXT: punpckldq %mm1, %mm0 # mm0 = mm0[0],mm1[0] sched: [1:1.00] -; BROADWELL-NEXT: punpckldq (%rdi), %mm0 # mm0 = mm0[0],mem[0] sched: [1:1.00] +; BROADWELL-NEXT: punpckldq (%rdi), %mm0 # mm0 = mm0[0],mem[0] sched: [6:1.00] ; BROADWELL-NEXT: movd %mm0, %rax # sched: [1:1.00] -; BROADWELL-NEXT: retq # sched: [2:1.00] +; BROADWELL-NEXT: retq # sched: [7:1.00] ; ; SKYLAKE-LABEL: test_punpckldq: ; SKYLAKE: # BB#0: @@ -6849,9 +6849,9 @@ define i64 @test_punpcklwd(x86_mmx %a0, x86_mmx %a1, x86_mmx* %a2) optsize { ; BROADWELL-LABEL: test_punpcklwd: ; BROADWELL: # BB#0: ; BROADWELL-NEXT: punpcklwd %mm1, %mm0 # mm0 = mm0[0],mm1[0],mm0[1],mm1[1] sched: [1:1.00] -; BROADWELL-NEXT: punpcklwd (%rdi), %mm0 # mm0 = mm0[0],mem[0],mm0[1],mem[1] sched: [1:1.00] +; BROADWELL-NEXT: punpcklwd (%rdi), %mm0 # mm0 = mm0[0],mem[0],mm0[1],mem[1] sched: [6:1.00] ; BROADWELL-NEXT: movd %mm0, %rax # sched: [1:1.00] -; BROADWELL-NEXT: retq # sched: [2:1.00] +; BROADWELL-NEXT: retq # sched: [7:1.00] ; ; SKYLAKE-LABEL: test_punpcklwd: ; SKYLAKE: # BB#0: @@ -6927,9 +6927,9 @@ define i64 @test_pxor(x86_mmx %a0, x86_mmx %a1, x86_mmx* %a2) optsize { ; BROADWELL-LABEL: test_pxor: ; BROADWELL: # BB#0: ; BROADWELL-NEXT: pxor %mm1, %mm0 # sched: [1:0.33] -; BROADWELL-NEXT: pxor (%rdi), %mm0 # sched: [1:0.50] +; BROADWELL-NEXT: pxor (%rdi), %mm0 # sched: [6:0.50] ; BROADWELL-NEXT: movd %mm0, %rax # sched: [1:1.00] -; BROADWELL-NEXT: retq # sched: [2:1.00] +; BROADWELL-NEXT: retq # sched: [7:1.00] ; ; SKYLAKE-LABEL: test_pxor: ; SKYLAKE: # BB#0: diff --git a/llvm/test/CodeGen/X86/movbe-schedule.ll b/llvm/test/CodeGen/X86/movbe-schedule.ll index 2ca27148ffca..868a5c6080ec 100644 --- a/llvm/test/CodeGen/X86/movbe-schedule.ll +++ b/llvm/test/CodeGen/X86/movbe-schedule.ll @@ -40,9 +40,9 @@ define i16 @test_movbe_i16(i16 *%a0, i16 %a1, i16 *%a2) { ; ; BROADWELL-LABEL: test_movbe_i16: ; BROADWELL: # BB#0: -; BROADWELL-NEXT: movbew (%rdi), %ax # sched: [1:0.50] -; BROADWELL-NEXT: movbew %si, (%rdx) # sched: [1:1.00] -; BROADWELL-NEXT: retq # sched: [2:1.00] +; BROADWELL-NEXT: movbew (%rdi), %ax # sched: [6:0.50] +; BROADWELL-NEXT: movbew %si, (%rdx) # sched: [2:1.00] +; BROADWELL-NEXT: retq # sched: [7:1.00] ; ; SKYLAKE-LABEL: test_movbe_i16: ; SKYLAKE: # BB#0: @@ -100,9 +100,9 @@ define i32 @test_movbe_i32(i32 *%a0, i32 %a1, i32 *%a2) { ; ; BROADWELL-LABEL: test_movbe_i32: ; BROADWELL: # BB#0: -; BROADWELL-NEXT: movbel (%rdi), %eax # sched: [1:0.50] -; BROADWELL-NEXT: movbel %esi, (%rdx) # sched: [1:1.00] -; BROADWELL-NEXT: retq # sched: [2:1.00] +; BROADWELL-NEXT: movbel (%rdi), %eax # sched: [6:0.50] +; BROADWELL-NEXT: movbel %esi, (%rdx) # sched: [2:1.00] +; BROADWELL-NEXT: retq # sched: [7:1.00] ; ; SKYLAKE-LABEL: test_movbe_i32: ; SKYLAKE: # BB#0: @@ -160,9 +160,9 @@ define i64 @test_movbe_i64(i64 *%a0, i64 %a1, i64 *%a2) { ; ; BROADWELL-LABEL: test_movbe_i64: ; BROADWELL: # BB#0: -; BROADWELL-NEXT: movbeq (%rdi), %rax # sched: [1:0.50] -; BROADWELL-NEXT: movbeq %rsi, (%rdx) # sched: [1:1.00] -; BROADWELL-NEXT: retq # sched: [2:1.00] +; BROADWELL-NEXT: movbeq (%rdi), %rax # sched: [6:0.50] +; BROADWELL-NEXT: movbeq %rsi, (%rdx) # sched: [2:1.00] +; BROADWELL-NEXT: retq # sched: [7:1.00] ; ; SKYLAKE-LABEL: test_movbe_i64: ; SKYLAKE: # BB#0: diff --git a/llvm/test/CodeGen/X86/popcnt-schedule.ll b/llvm/test/CodeGen/X86/popcnt-schedule.ll index 5d5190625667..9b35da059f29 100644 --- a/llvm/test/CodeGen/X86/popcnt-schedule.ll +++ b/llvm/test/CodeGen/X86/popcnt-schedule.ll @@ -46,11 +46,11 @@ define i16 @test_ctpop_i16(i16 zeroext %a0, i16 *%a1) { ; ; BROADWELL-LABEL: test_ctpop_i16: ; BROADWELL: # BB#0: -; BROADWELL-NEXT: popcntw (%rsi), %cx # sched: [3:1.00] +; BROADWELL-NEXT: popcntw (%rsi), %cx # sched: [8:1.00] ; BROADWELL-NEXT: popcntw %di, %ax # sched: [3:1.00] ; BROADWELL-NEXT: orl %ecx, %eax # sched: [1:0.25] ; BROADWELL-NEXT: # kill: %AX %AX %EAX -; BROADWELL-NEXT: retq # sched: [2:1.00] +; BROADWELL-NEXT: retq # sched: [7:1.00] ; ; SKYLAKE-LABEL: test_ctpop_i16: ; SKYLAKE: # BB#0: @@ -114,10 +114,10 @@ define i32 @test_ctpop_i32(i32 %a0, i32 *%a1) { ; ; BROADWELL-LABEL: test_ctpop_i32: ; BROADWELL: # BB#0: -; BROADWELL-NEXT: popcntl (%rsi), %ecx # sched: [3:1.00] +; BROADWELL-NEXT: popcntl (%rsi), %ecx # sched: [8:1.00] ; BROADWELL-NEXT: popcntl %edi, %eax # sched: [3:1.00] ; BROADWELL-NEXT: orl %ecx, %eax # sched: [1:0.25] -; BROADWELL-NEXT: retq # sched: [2:1.00] +; BROADWELL-NEXT: retq # sched: [7:1.00] ; ; SKYLAKE-LABEL: test_ctpop_i32: ; SKYLAKE: # BB#0: @@ -178,10 +178,10 @@ define i64 @test_ctpop_i64(i64 %a0, i64 *%a1) { ; ; BROADWELL-LABEL: test_ctpop_i64: ; BROADWELL: # BB#0: -; BROADWELL-NEXT: popcntq (%rsi), %rcx # sched: [3:1.00] +; BROADWELL-NEXT: popcntq (%rsi), %rcx # sched: [8:1.00] ; BROADWELL-NEXT: popcntq %rdi, %rax # sched: [3:1.00] ; BROADWELL-NEXT: orq %rcx, %rax # sched: [1:0.25] -; BROADWELL-NEXT: retq # sched: [2:1.00] +; BROADWELL-NEXT: retq # sched: [7:1.00] ; ; SKYLAKE-LABEL: test_ctpop_i64: ; SKYLAKE: # BB#0: diff --git a/llvm/test/CodeGen/X86/sse-schedule.ll b/llvm/test/CodeGen/X86/sse-schedule.ll index 844fb7a87f62..20e022ac62c6 100644 --- a/llvm/test/CodeGen/X86/sse-schedule.ll +++ b/llvm/test/CodeGen/X86/sse-schedule.ll @@ -45,8 +45,8 @@ define <4 x float> @test_addps(<4 x float> %a0, <4 x float> %a1, <4 x float> *%a ; BROADWELL-LABEL: test_addps: ; BROADWELL: # BB#0: ; BROADWELL-NEXT: vaddps %xmm1, %xmm0, %xmm0 # sched: [3:1.00] -; BROADWELL-NEXT: vaddps (%rdi), %xmm0, %xmm0 # sched: [3:1.00] -; BROADWELL-NEXT: retq # sched: [2:1.00] +; BROADWELL-NEXT: vaddps (%rdi), %xmm0, %xmm0 # sched: [8:1.00] +; BROADWELL-NEXT: retq # sched: [7:1.00] ; ; SKYLAKE-LABEL: test_addps: ; SKYLAKE: # BB#0: @@ -111,8 +111,8 @@ define float @test_addss(float %a0, float %a1, float *%a2) { ; BROADWELL-LABEL: test_addss: ; BROADWELL: # BB#0: ; BROADWELL-NEXT: vaddss %xmm1, %xmm0, %xmm0 # sched: [3:1.00] -; BROADWELL-NEXT: vaddss (%rdi), %xmm0, %xmm0 # sched: [3:1.00] -; BROADWELL-NEXT: retq # sched: [2:1.00] +; BROADWELL-NEXT: vaddss (%rdi), %xmm0, %xmm0 # sched: [8:1.00] +; BROADWELL-NEXT: retq # sched: [7:1.00] ; ; SKYLAKE-LABEL: test_addss: ; SKYLAKE: # BB#0: @@ -181,8 +181,8 @@ define <4 x float> @test_andps(<4 x float> %a0, <4 x float> %a1, <4 x float> *%a ; BROADWELL-LABEL: test_andps: ; BROADWELL: # BB#0: ; BROADWELL-NEXT: vandps %xmm1, %xmm0, %xmm0 # sched: [1:1.00] -; BROADWELL-NEXT: vandps (%rdi), %xmm0, %xmm0 # sched: [1:1.00] -; BROADWELL-NEXT: retq # sched: [2:1.00] +; BROADWELL-NEXT: vandps (%rdi), %xmm0, %xmm0 # sched: [6:1.00] +; BROADWELL-NEXT: retq # sched: [7:1.00] ; ; SKYLAKE-LABEL: test_andps: ; SKYLAKE: # BB#0: @@ -255,8 +255,8 @@ define <4 x float> @test_andnotps(<4 x float> %a0, <4 x float> %a1, <4 x float> ; BROADWELL-LABEL: test_andnotps: ; BROADWELL: # BB#0: ; BROADWELL-NEXT: vandnps %xmm1, %xmm0, %xmm0 # sched: [1:1.00] -; BROADWELL-NEXT: vandnps (%rdi), %xmm0, %xmm0 # sched: [1:1.00] -; BROADWELL-NEXT: retq # sched: [2:1.00] +; BROADWELL-NEXT: vandnps (%rdi), %xmm0, %xmm0 # sched: [6:1.00] +; BROADWELL-NEXT: retq # sched: [7:1.00] ; ; SKYLAKE-LABEL: test_andnotps: ; SKYLAKE: # BB#0: @@ -332,9 +332,9 @@ define <4 x float> @test_cmpps(<4 x float> %a0, <4 x float> %a1, <4 x float> *%a ; BROADWELL-LABEL: test_cmpps: ; BROADWELL: # BB#0: ; BROADWELL-NEXT: vcmpeqps %xmm1, %xmm0, %xmm1 # sched: [3:1.00] -; BROADWELL-NEXT: vcmpeqps (%rdi), %xmm0, %xmm0 # sched: [3:1.00] +; BROADWELL-NEXT: vcmpeqps (%rdi), %xmm0, %xmm0 # sched: [8:1.00] ; BROADWELL-NEXT: vorps %xmm0, %xmm1, %xmm0 # sched: [1:1.00] -; BROADWELL-NEXT: retq # sched: [2:1.00] +; BROADWELL-NEXT: retq # sched: [7:1.00] ; ; SKYLAKE-LABEL: test_cmpps: ; SKYLAKE: # BB#0: @@ -407,8 +407,8 @@ define float @test_cmpss(float %a0, float %a1, float *%a2) { ; BROADWELL-LABEL: test_cmpss: ; BROADWELL: # BB#0: ; BROADWELL-NEXT: vcmpeqss %xmm1, %xmm0, %xmm0 # sched: [3:1.00] -; BROADWELL-NEXT: vcmpeqss (%rdi), %xmm0, %xmm0 # sched: [7:1.00] -; BROADWELL-NEXT: retq # sched: [2:1.00] +; BROADWELL-NEXT: vcmpeqss (%rdi), %xmm0, %xmm0 # sched: [8:1.00] +; BROADWELL-NEXT: retq # sched: [7:1.00] ; ; SKYLAKE-LABEL: test_cmpss: ; SKYLAKE: # BB#0: @@ -521,13 +521,13 @@ define i32 @test_comiss(<4 x float> %a0, <4 x float> %a1, <4 x float> *%a2) { ; BROADWELL-NEXT: setnp %al # sched: [1:0.50] ; BROADWELL-NEXT: sete %cl # sched: [1:0.50] ; BROADWELL-NEXT: andb %al, %cl # sched: [1:0.25] -; BROADWELL-NEXT: vcomiss (%rdi), %xmm0 # sched: [7:1.00] +; BROADWELL-NEXT: vcomiss (%rdi), %xmm0 # sched: [8:1.00] ; BROADWELL-NEXT: setnp %al # sched: [1:0.50] ; BROADWELL-NEXT: sete %dl # sched: [1:0.50] ; BROADWELL-NEXT: andb %al, %dl # sched: [1:0.25] ; BROADWELL-NEXT: orb %cl, %dl # sched: [1:0.25] ; BROADWELL-NEXT: movzbl %dl, %eax # sched: [1:0.25] -; BROADWELL-NEXT: retq # sched: [2:1.00] +; BROADWELL-NEXT: retq # sched: [7:1.00] ; ; SKYLAKE-LABEL: test_comiss: ; SKYLAKE: # BB#0: @@ -631,9 +631,9 @@ define float @test_cvtsi2ss(i32 %a0, i32 *%a1) { ; BROADWELL-LABEL: test_cvtsi2ss: ; BROADWELL: # BB#0: ; BROADWELL-NEXT: vcvtsi2ssl %edi, %xmm0, %xmm0 # sched: [4:1.00] -; BROADWELL-NEXT: vcvtsi2ssl (%rsi), %xmm1, %xmm1 # sched: [8:1.00] +; BROADWELL-NEXT: vcvtsi2ssl (%rsi), %xmm1, %xmm1 # sched: [9:1.00] ; BROADWELL-NEXT: vaddss %xmm1, %xmm0, %xmm0 # sched: [3:1.00] -; BROADWELL-NEXT: retq # sched: [2:1.00] +; BROADWELL-NEXT: retq # sched: [7:1.00] ; ; SKYLAKE-LABEL: test_cvtsi2ss: ; SKYLAKE: # BB#0: @@ -708,9 +708,9 @@ define float @test_cvtsi2ssq(i64 %a0, i64 *%a1) { ; BROADWELL-LABEL: test_cvtsi2ssq: ; BROADWELL: # BB#0: ; BROADWELL-NEXT: vcvtsi2ssq %rdi, %xmm0, %xmm0 # sched: [5:2.00] -; BROADWELL-NEXT: vcvtsi2ssq (%rsi), %xmm1, %xmm1 # sched: [8:1.00] +; BROADWELL-NEXT: vcvtsi2ssq (%rsi), %xmm1, %xmm1 # sched: [9:1.00] ; BROADWELL-NEXT: vaddss %xmm1, %xmm0, %xmm0 # sched: [3:1.00] -; BROADWELL-NEXT: retq # sched: [2:1.00] +; BROADWELL-NEXT: retq # sched: [7:1.00] ; ; SKYLAKE-LABEL: test_cvtsi2ssq: ; SKYLAKE: # BB#0: @@ -785,9 +785,9 @@ define i32 @test_cvtss2si(float %a0, float *%a1) { ; BROADWELL-LABEL: test_cvtss2si: ; BROADWELL: # BB#0: ; BROADWELL-NEXT: vcvtss2si %xmm0, %ecx # sched: [4:1.00] -; BROADWELL-NEXT: vcvtss2si (%rdi), %eax # sched: [4:1.00] +; BROADWELL-NEXT: vcvtss2si (%rdi), %eax # sched: [9:1.00] ; BROADWELL-NEXT: addl %ecx, %eax # sched: [1:0.25] -; BROADWELL-NEXT: retq # sched: [2:1.00] +; BROADWELL-NEXT: retq # sched: [7:1.00] ; ; SKYLAKE-LABEL: test_cvtss2si: ; SKYLAKE: # BB#0: @@ -865,9 +865,9 @@ define i64 @test_cvtss2siq(float %a0, float *%a1) { ; BROADWELL-LABEL: test_cvtss2siq: ; BROADWELL: # BB#0: ; BROADWELL-NEXT: vcvtss2si %xmm0, %rcx # sched: [4:1.00] -; BROADWELL-NEXT: vcvtss2si (%rdi), %rax # sched: [4:1.00] +; BROADWELL-NEXT: vcvtss2si (%rdi), %rax # sched: [9:1.00] ; BROADWELL-NEXT: addq %rcx, %rax # sched: [1:0.25] -; BROADWELL-NEXT: retq # sched: [2:1.00] +; BROADWELL-NEXT: retq # sched: [7:1.00] ; ; SKYLAKE-LABEL: test_cvtss2siq: ; SKYLAKE: # BB#0: @@ -945,9 +945,9 @@ define i32 @test_cvttss2si(float %a0, float *%a1) { ; BROADWELL-LABEL: test_cvttss2si: ; BROADWELL: # BB#0: ; BROADWELL-NEXT: vcvttss2si %xmm0, %ecx # sched: [4:1.00] -; BROADWELL-NEXT: vcvttss2si (%rdi), %eax # sched: [4:1.00] +; BROADWELL-NEXT: vcvttss2si (%rdi), %eax # sched: [9:1.00] ; BROADWELL-NEXT: addl %ecx, %eax # sched: [1:0.25] -; BROADWELL-NEXT: retq # sched: [2:1.00] +; BROADWELL-NEXT: retq # sched: [7:1.00] ; ; SKYLAKE-LABEL: test_cvttss2si: ; SKYLAKE: # BB#0: @@ -1022,9 +1022,9 @@ define i64 @test_cvttss2siq(float %a0, float *%a1) { ; BROADWELL-LABEL: test_cvttss2siq: ; BROADWELL: # BB#0: ; BROADWELL-NEXT: vcvttss2si %xmm0, %rcx # sched: [4:1.00] -; BROADWELL-NEXT: vcvttss2si (%rdi), %rax # sched: [4:1.00] +; BROADWELL-NEXT: vcvttss2si (%rdi), %rax # sched: [9:1.00] ; BROADWELL-NEXT: addq %rcx, %rax # sched: [1:0.25] -; BROADWELL-NEXT: retq # sched: [2:1.00] +; BROADWELL-NEXT: retq # sched: [7:1.00] ; ; SKYLAKE-LABEL: test_cvttss2siq: ; SKYLAKE: # BB#0: @@ -1093,9 +1093,9 @@ define <4 x float> @test_divps(<4 x float> %a0, <4 x float> %a1, <4 x float> *%a ; ; BROADWELL-LABEL: test_divps: ; BROADWELL: # BB#0: -; BROADWELL-NEXT: vdivps %xmm1, %xmm0, %xmm0 # sched: [13:1.00] -; BROADWELL-NEXT: vdivps (%rdi), %xmm0, %xmm0 # sched: [13:1.00] -; BROADWELL-NEXT: retq # sched: [2:1.00] +; BROADWELL-NEXT: vdivps %xmm1, %xmm0, %xmm0 # sched: [11:1.00] +; BROADWELL-NEXT: vdivps (%rdi), %xmm0, %xmm0 # sched: [16:1.00] +; BROADWELL-NEXT: retq # sched: [7:1.00] ; ; SKYLAKE-LABEL: test_divps: ; SKYLAKE: # BB#0: @@ -1159,9 +1159,9 @@ define float @test_divss(float %a0, float %a1, float *%a2) { ; ; BROADWELL-LABEL: test_divss: ; BROADWELL: # BB#0: -; BROADWELL-NEXT: vdivss %xmm1, %xmm0, %xmm0 # sched: [13:1.00] -; BROADWELL-NEXT: vdivss (%rdi), %xmm0, %xmm0 # sched: [13:1.00] -; BROADWELL-NEXT: retq # sched: [2:1.00] +; BROADWELL-NEXT: vdivss %xmm1, %xmm0, %xmm0 # sched: [11:1.00] +; BROADWELL-NEXT: vdivss (%rdi), %xmm0, %xmm0 # sched: [16:1.00] +; BROADWELL-NEXT: retq # sched: [7:1.00] ; ; SKYLAKE-LABEL: test_divss: ; SKYLAKE: # BB#0: @@ -1226,8 +1226,8 @@ define void @test_ldmxcsr(i32 %a0) { ; BROADWELL-LABEL: test_ldmxcsr: ; BROADWELL: # BB#0: ; BROADWELL-NEXT: movl %edi, -{{[0-9]+}}(%rsp) # sched: [1:1.00] -; BROADWELL-NEXT: vldmxcsr -{{[0-9]+}}(%rsp) # sched: [2:1.00] -; BROADWELL-NEXT: retq # sched: [2:1.00] +; BROADWELL-NEXT: vldmxcsr -{{[0-9]+}}(%rsp) # sched: [7:1.00] +; BROADWELL-NEXT: retq # sched: [7:1.00] ; ; SKYLAKE-LABEL: test_ldmxcsr: ; SKYLAKE: # BB#0: @@ -1294,8 +1294,8 @@ define <4 x float> @test_maxps(<4 x float> %a0, <4 x float> %a1, <4 x float> *%a ; BROADWELL-LABEL: test_maxps: ; BROADWELL: # BB#0: ; BROADWELL-NEXT: vmaxps %xmm1, %xmm0, %xmm0 # sched: [3:1.00] -; BROADWELL-NEXT: vmaxps (%rdi), %xmm0, %xmm0 # sched: [3:1.00] -; BROADWELL-NEXT: retq # sched: [2:1.00] +; BROADWELL-NEXT: vmaxps (%rdi), %xmm0, %xmm0 # sched: [8:1.00] +; BROADWELL-NEXT: retq # sched: [7:1.00] ; ; SKYLAKE-LABEL: test_maxps: ; SKYLAKE: # BB#0: @@ -1361,8 +1361,8 @@ define <4 x float> @test_maxss(<4 x float> %a0, <4 x float> %a1, <4 x float> *%a ; BROADWELL-LABEL: test_maxss: ; BROADWELL: # BB#0: ; BROADWELL-NEXT: vmaxss %xmm1, %xmm0, %xmm0 # sched: [3:1.00] -; BROADWELL-NEXT: vmaxss (%rdi), %xmm0, %xmm0 # sched: [3:1.00] -; BROADWELL-NEXT: retq # sched: [2:1.00] +; BROADWELL-NEXT: vmaxss (%rdi), %xmm0, %xmm0 # sched: [8:1.00] +; BROADWELL-NEXT: retq # sched: [7:1.00] ; ; SKYLAKE-LABEL: test_maxss: ; SKYLAKE: # BB#0: @@ -1428,8 +1428,8 @@ define <4 x float> @test_minps(<4 x float> %a0, <4 x float> %a1, <4 x float> *%a ; BROADWELL-LABEL: test_minps: ; BROADWELL: # BB#0: ; BROADWELL-NEXT: vminps %xmm1, %xmm0, %xmm0 # sched: [3:1.00] -; BROADWELL-NEXT: vminps (%rdi), %xmm0, %xmm0 # sched: [3:1.00] -; BROADWELL-NEXT: retq # sched: [2:1.00] +; BROADWELL-NEXT: vminps (%rdi), %xmm0, %xmm0 # sched: [8:1.00] +; BROADWELL-NEXT: retq # sched: [7:1.00] ; ; SKYLAKE-LABEL: test_minps: ; SKYLAKE: # BB#0: @@ -1495,8 +1495,8 @@ define <4 x float> @test_minss(<4 x float> %a0, <4 x float> %a1, <4 x float> *%a ; BROADWELL-LABEL: test_minss: ; BROADWELL: # BB#0: ; BROADWELL-NEXT: vminss %xmm1, %xmm0, %xmm0 # sched: [3:1.00] -; BROADWELL-NEXT: vminss (%rdi), %xmm0, %xmm0 # sched: [3:1.00] -; BROADWELL-NEXT: retq # sched: [2:1.00] +; BROADWELL-NEXT: vminss (%rdi), %xmm0, %xmm0 # sched: [8:1.00] +; BROADWELL-NEXT: retq # sched: [7:1.00] ; ; SKYLAKE-LABEL: test_minss: ; SKYLAKE: # BB#0: @@ -1566,10 +1566,10 @@ define void @test_movaps(<4 x float> *%a0, <4 x float> *%a1) { ; ; BROADWELL-LABEL: test_movaps: ; BROADWELL: # BB#0: -; BROADWELL-NEXT: vmovaps (%rdi), %xmm0 # sched: [1:0.50] +; BROADWELL-NEXT: vmovaps (%rdi), %xmm0 # sched: [5:0.50] ; BROADWELL-NEXT: vaddps %xmm0, %xmm0, %xmm0 # sched: [3:1.00] ; BROADWELL-NEXT: vmovaps %xmm0, (%rsi) # sched: [1:1.00] -; BROADWELL-NEXT: retq # sched: [2:1.00] +; BROADWELL-NEXT: retq # sched: [7:1.00] ; ; SKYLAKE-LABEL: test_movaps: ; SKYLAKE: # BB#0: @@ -1641,7 +1641,7 @@ define <4 x float> @test_movhlps(<4 x float> %a0, <4 x float> %a1) { ; BROADWELL-LABEL: test_movhlps: ; BROADWELL: # BB#0: ; BROADWELL-NEXT: vunpckhpd {{.*#+}} xmm0 = xmm1[1],xmm0[1] sched: [1:1.00] -; BROADWELL-NEXT: retq # sched: [2:1.00] +; BROADWELL-NEXT: retq # sched: [7:1.00] ; ; SKYLAKE-LABEL: test_movhlps: ; SKYLAKE: # BB#0: @@ -1708,10 +1708,10 @@ define void @test_movhps(<4 x float> %a0, <4 x float> %a1, x86_mmx *%a2) { ; ; BROADWELL-LABEL: test_movhps: ; BROADWELL: # BB#0: -; BROADWELL-NEXT: vmovhpd {{.*#+}} xmm1 = xmm1[0],mem[0] sched: [1:1.00] +; BROADWELL-NEXT: vmovhpd {{.*#+}} xmm1 = xmm1[0],mem[0] sched: [6:1.00] ; BROADWELL-NEXT: vaddps %xmm1, %xmm0, %xmm0 # sched: [3:1.00] -; BROADWELL-NEXT: vpextrq $1, %xmm0, (%rdi) # sched: [1:1.00] -; BROADWELL-NEXT: retq # sched: [2:1.00] +; BROADWELL-NEXT: vpextrq $1, %xmm0, (%rdi) # sched: [2:1.00] +; BROADWELL-NEXT: retq # sched: [7:1.00] ; ; SKYLAKE-LABEL: test_movhps: ; SKYLAKE: # BB#0: @@ -1787,7 +1787,7 @@ define <4 x float> @test_movlhps(<4 x float> %a0, <4 x float> %a1) { ; BROADWELL: # BB#0: ; BROADWELL-NEXT: vmovlhps {{.*#+}} xmm0 = xmm0[0],xmm1[0] sched: [1:1.00] ; BROADWELL-NEXT: vaddps %xmm0, %xmm1, %xmm0 # sched: [3:1.00] -; BROADWELL-NEXT: retq # sched: [2:1.00] +; BROADWELL-NEXT: retq # sched: [7:1.00] ; ; SKYLAKE-LABEL: test_movlhps: ; SKYLAKE: # BB#0: @@ -1855,10 +1855,10 @@ define void @test_movlps(<4 x float> %a0, <4 x float> %a1, x86_mmx *%a2) { ; ; BROADWELL-LABEL: test_movlps: ; BROADWELL: # BB#0: -; BROADWELL-NEXT: vmovlpd {{.*#+}} xmm1 = mem[0],xmm1[1] sched: [1:1.00] +; BROADWELL-NEXT: vmovlpd {{.*#+}} xmm1 = mem[0],xmm1[1] sched: [6:1.00] ; BROADWELL-NEXT: vaddps %xmm1, %xmm0, %xmm0 # sched: [3:1.00] ; BROADWELL-NEXT: vmovlps %xmm0, (%rdi) # sched: [1:1.00] -; BROADWELL-NEXT: retq # sched: [2:1.00] +; BROADWELL-NEXT: retq # sched: [7:1.00] ; ; SKYLAKE-LABEL: test_movlps: ; SKYLAKE: # BB#0: @@ -1928,7 +1928,7 @@ define i32 @test_movmskps(<4 x float> %a0) { ; BROADWELL-LABEL: test_movmskps: ; BROADWELL: # BB#0: ; BROADWELL-NEXT: vmovmskps %xmm0, %eax # sched: [3:1.00] -; BROADWELL-NEXT: retq # sched: [2:1.00] +; BROADWELL-NEXT: retq # sched: [7:1.00] ; ; SKYLAKE-LABEL: test_movmskps: ; SKYLAKE: # BB#0: @@ -1989,7 +1989,7 @@ define void @test_movntps(<4 x float> %a0, <4 x float> *%a1) { ; BROADWELL-LABEL: test_movntps: ; BROADWELL: # BB#0: ; BROADWELL-NEXT: vmovntps %xmm0, (%rdi) # sched: [1:1.00] -; BROADWELL-NEXT: retq # sched: [2:1.00] +; BROADWELL-NEXT: retq # sched: [7:1.00] ; ; SKYLAKE-LABEL: test_movntps: ; SKYLAKE: # BB#0: @@ -2052,10 +2052,10 @@ define void @test_movss_mem(float* %a0, float* %a1) { ; ; BROADWELL-LABEL: test_movss_mem: ; BROADWELL: # BB#0: -; BROADWELL-NEXT: vmovss {{.*#+}} xmm0 = mem[0],zero,zero,zero sched: [1:0.50] +; BROADWELL-NEXT: vmovss {{.*#+}} xmm0 = mem[0],zero,zero,zero sched: [5:0.50] ; BROADWELL-NEXT: vaddss %xmm0, %xmm0, %xmm0 # sched: [3:1.00] ; BROADWELL-NEXT: vmovss %xmm0, (%rsi) # sched: [1:1.00] -; BROADWELL-NEXT: retq # sched: [2:1.00] +; BROADWELL-NEXT: retq # sched: [7:1.00] ; ; SKYLAKE-LABEL: test_movss_mem: ; SKYLAKE: # BB#0: @@ -2125,7 +2125,7 @@ define <4 x float> @test_movss_reg(<4 x float> %a0, <4 x float> %a1) { ; BROADWELL-LABEL: test_movss_reg: ; BROADWELL: # BB#0: ; BROADWELL-NEXT: vblendps {{.*#+}} xmm0 = xmm1[0],xmm0[1,2,3] sched: [1:0.33] -; BROADWELL-NEXT: retq # sched: [2:1.00] +; BROADWELL-NEXT: retq # sched: [7:1.00] ; ; SKYLAKE-LABEL: test_movss_reg: ; SKYLAKE: # BB#0: @@ -2188,10 +2188,10 @@ define void @test_movups(<4 x float> *%a0, <4 x float> *%a1) { ; ; BROADWELL-LABEL: test_movups: ; BROADWELL: # BB#0: -; BROADWELL-NEXT: vmovups (%rdi), %xmm0 # sched: [1:0.50] +; BROADWELL-NEXT: vmovups (%rdi), %xmm0 # sched: [5:0.50] ; BROADWELL-NEXT: vaddps %xmm0, %xmm0, %xmm0 # sched: [3:1.00] ; BROADWELL-NEXT: vmovups %xmm0, (%rsi) # sched: [1:1.00] -; BROADWELL-NEXT: retq # sched: [2:1.00] +; BROADWELL-NEXT: retq # sched: [7:1.00] ; ; SKYLAKE-LABEL: test_movups: ; SKYLAKE: # BB#0: @@ -2259,9 +2259,9 @@ define <4 x float> @test_mulps(<4 x float> %a0, <4 x float> %a1, <4 x float> *%a ; ; BROADWELL-LABEL: test_mulps: ; BROADWELL: # BB#0: -; BROADWELL-NEXT: vmulps %xmm1, %xmm0, %xmm0 # sched: [5:0.50] -; BROADWELL-NEXT: vmulps (%rdi), %xmm0, %xmm0 # sched: [5:0.50] -; BROADWELL-NEXT: retq # sched: [2:1.00] +; BROADWELL-NEXT: vmulps %xmm1, %xmm0, %xmm0 # sched: [3:0.50] +; BROADWELL-NEXT: vmulps (%rdi), %xmm0, %xmm0 # sched: [8:0.50] +; BROADWELL-NEXT: retq # sched: [7:1.00] ; ; SKYLAKE-LABEL: test_mulps: ; SKYLAKE: # BB#0: @@ -2325,9 +2325,9 @@ define float @test_mulss(float %a0, float %a1, float *%a2) { ; ; BROADWELL-LABEL: test_mulss: ; BROADWELL: # BB#0: -; BROADWELL-NEXT: vmulss %xmm1, %xmm0, %xmm0 # sched: [5:0.50] -; BROADWELL-NEXT: vmulss (%rdi), %xmm0, %xmm0 # sched: [5:0.50] -; BROADWELL-NEXT: retq # sched: [2:1.00] +; BROADWELL-NEXT: vmulss %xmm1, %xmm0, %xmm0 # sched: [3:0.50] +; BROADWELL-NEXT: vmulss (%rdi), %xmm0, %xmm0 # sched: [8:0.50] +; BROADWELL-NEXT: retq # sched: [7:1.00] ; ; SKYLAKE-LABEL: test_mulss: ; SKYLAKE: # BB#0: @@ -2396,8 +2396,8 @@ define <4 x float> @test_orps(<4 x float> %a0, <4 x float> %a1, <4 x float> *%a2 ; BROADWELL-LABEL: test_orps: ; BROADWELL: # BB#0: ; BROADWELL-NEXT: vorps %xmm1, %xmm0, %xmm0 # sched: [1:1.00] -; BROADWELL-NEXT: vorps (%rdi), %xmm0, %xmm0 # sched: [1:1.00] -; BROADWELL-NEXT: retq # sched: [2:1.00] +; BROADWELL-NEXT: vorps (%rdi), %xmm0, %xmm0 # sched: [6:1.00] +; BROADWELL-NEXT: retq # sched: [7:1.00] ; ; SKYLAKE-LABEL: test_orps: ; SKYLAKE: # BB#0: @@ -2466,8 +2466,8 @@ define void @test_prefetchnta(i8* %a0) { ; ; BROADWELL-LABEL: test_prefetchnta: ; BROADWELL: # BB#0: -; BROADWELL-NEXT: prefetchnta (%rdi) # sched: [1:0.50] -; BROADWELL-NEXT: retq # sched: [2:1.00] +; BROADWELL-NEXT: prefetchnta (%rdi) # sched: [5:0.50] +; BROADWELL-NEXT: retq # sched: [7:1.00] ; ; SKYLAKE-LABEL: test_prefetchnta: ; SKYLAKE: # BB#0: @@ -2534,9 +2534,9 @@ define <4 x float> @test_rcpps(<4 x float> %a0, <4 x float> *%a1) { ; BROADWELL-LABEL: test_rcpps: ; BROADWELL: # BB#0: ; BROADWELL-NEXT: vrcpps %xmm0, %xmm0 # sched: [5:1.00] -; BROADWELL-NEXT: vrcpps (%rdi), %xmm1 # sched: [5:1.00] +; BROADWELL-NEXT: vrcpps (%rdi), %xmm1 # sched: [10:1.00] ; BROADWELL-NEXT: vaddps %xmm1, %xmm0, %xmm0 # sched: [3:1.00] -; BROADWELL-NEXT: retq # sched: [2:1.00] +; BROADWELL-NEXT: retq # sched: [7:1.00] ; ; SKYLAKE-LABEL: test_rcpps: ; SKYLAKE: # BB#0: @@ -2619,10 +2619,10 @@ define <4 x float> @test_rcpss(float %a0, float *%a1) { ; BROADWELL-LABEL: test_rcpss: ; BROADWELL: # BB#0: ; BROADWELL-NEXT: vrcpss %xmm0, %xmm0, %xmm0 # sched: [5:1.00] -; BROADWELL-NEXT: vmovss {{.*#+}} xmm1 = mem[0],zero,zero,zero sched: [1:0.50] +; BROADWELL-NEXT: vmovss {{.*#+}} xmm1 = mem[0],zero,zero,zero sched: [5:0.50] ; BROADWELL-NEXT: vrcpss %xmm1, %xmm1, %xmm1 # sched: [5:1.00] ; BROADWELL-NEXT: vaddps %xmm1, %xmm0, %xmm0 # sched: [3:1.00] -; BROADWELL-NEXT: retq # sched: [2:1.00] +; BROADWELL-NEXT: retq # sched: [7:1.00] ; ; SKYLAKE-LABEL: test_rcpss: ; SKYLAKE: # BB#0: @@ -2706,9 +2706,9 @@ define <4 x float> @test_rsqrtps(<4 x float> %a0, <4 x float> *%a1) { ; BROADWELL-LABEL: test_rsqrtps: ; BROADWELL: # BB#0: ; BROADWELL-NEXT: vrsqrtps %xmm0, %xmm0 # sched: [5:1.00] -; BROADWELL-NEXT: vrsqrtps (%rdi), %xmm1 # sched: [5:1.00] +; BROADWELL-NEXT: vrsqrtps (%rdi), %xmm1 # sched: [10:1.00] ; BROADWELL-NEXT: vaddps %xmm1, %xmm0, %xmm0 # sched: [3:1.00] -; BROADWELL-NEXT: retq # sched: [2:1.00] +; BROADWELL-NEXT: retq # sched: [7:1.00] ; ; SKYLAKE-LABEL: test_rsqrtps: ; SKYLAKE: # BB#0: @@ -2791,10 +2791,10 @@ define <4 x float> @test_rsqrtss(float %a0, float *%a1) { ; BROADWELL-LABEL: test_rsqrtss: ; BROADWELL: # BB#0: ; BROADWELL-NEXT: vrsqrtss %xmm0, %xmm0, %xmm0 # sched: [5:1.00] -; BROADWELL-NEXT: vmovss {{.*#+}} xmm1 = mem[0],zero,zero,zero sched: [1:0.50] +; BROADWELL-NEXT: vmovss {{.*#+}} xmm1 = mem[0],zero,zero,zero sched: [5:0.50] ; BROADWELL-NEXT: vrsqrtss %xmm1, %xmm1, %xmm1 # sched: [5:1.00] ; BROADWELL-NEXT: vaddps %xmm1, %xmm0, %xmm0 # sched: [3:1.00] -; BROADWELL-NEXT: retq # sched: [2:1.00] +; BROADWELL-NEXT: retq # sched: [7:1.00] ; ; SKYLAKE-LABEL: test_rsqrtss: ; SKYLAKE: # BB#0: @@ -2871,8 +2871,8 @@ define void @test_sfence() { ; ; BROADWELL-LABEL: test_sfence: ; BROADWELL: # BB#0: -; BROADWELL-NEXT: sfence # sched: [1:0.33] -; BROADWELL-NEXT: retq # sched: [2:1.00] +; BROADWELL-NEXT: sfence # sched: [2:0.33] +; BROADWELL-NEXT: retq # sched: [7:1.00] ; ; SKYLAKE-LABEL: test_sfence: ; SKYLAKE: # BB#0: @@ -2936,8 +2936,8 @@ define <4 x float> @test_shufps(<4 x float> %a0, <4 x float> %a1, <4 x float> *% ; BROADWELL-LABEL: test_shufps: ; BROADWELL: # BB#0: ; BROADWELL-NEXT: vshufps {{.*#+}} xmm0 = xmm0[0,0],xmm1[0,0] sched: [1:1.00] -; BROADWELL-NEXT: vshufps {{.*#+}} xmm0 = xmm0[0,3],mem[0,0] sched: [1:1.00] -; BROADWELL-NEXT: retq # sched: [2:1.00] +; BROADWELL-NEXT: vshufps {{.*#+}} xmm0 = xmm0[0,3],mem[0,0] sched: [6:1.00] +; BROADWELL-NEXT: retq # sched: [7:1.00] ; ; SKYLAKE-LABEL: test_shufps: ; SKYLAKE: # BB#0: @@ -3008,9 +3008,9 @@ define <4 x float> @test_sqrtps(<4 x float> %a0, <4 x float> *%a1) { ; BROADWELL-LABEL: test_sqrtps: ; BROADWELL: # BB#0: ; BROADWELL-NEXT: vsqrtps %xmm0, %xmm0 # sched: [14:1.00] -; BROADWELL-NEXT: vsqrtps (%rdi), %xmm1 # sched: [14:1.00] +; BROADWELL-NEXT: vsqrtps (%rdi), %xmm1 # sched: [19:1.00] ; BROADWELL-NEXT: vaddps %xmm1, %xmm0, %xmm0 # sched: [3:1.00] -; BROADWELL-NEXT: retq # sched: [2:1.00] +; BROADWELL-NEXT: retq # sched: [7:1.00] ; ; SKYLAKE-LABEL: test_sqrtps: ; SKYLAKE: # BB#0: @@ -3093,10 +3093,10 @@ define <4 x float> @test_sqrtss(<4 x float> %a0, <4 x float> *%a1) { ; BROADWELL-LABEL: test_sqrtss: ; BROADWELL: # BB#0: ; BROADWELL-NEXT: vsqrtss %xmm0, %xmm0, %xmm0 # sched: [14:1.00] -; BROADWELL-NEXT: vmovaps (%rdi), %xmm1 # sched: [1:0.50] +; BROADWELL-NEXT: vmovaps (%rdi), %xmm1 # sched: [5:0.50] ; BROADWELL-NEXT: vsqrtss %xmm1, %xmm1, %xmm1 # sched: [14:1.00] ; BROADWELL-NEXT: vaddps %xmm1, %xmm0, %xmm0 # sched: [3:1.00] -; BROADWELL-NEXT: retq # sched: [2:1.00] +; BROADWELL-NEXT: retq # sched: [7:1.00] ; ; SKYLAKE-LABEL: test_sqrtss: ; SKYLAKE: # BB#0: @@ -3170,9 +3170,9 @@ define i32 @test_stmxcsr() { ; ; BROADWELL-LABEL: test_stmxcsr: ; BROADWELL: # BB#0: -; BROADWELL-NEXT: vstmxcsr -{{[0-9]+}}(%rsp) # sched: [1:1.00] -; BROADWELL-NEXT: movl -{{[0-9]+}}(%rsp), %eax # sched: [1:0.50] -; BROADWELL-NEXT: retq # sched: [2:1.00] +; BROADWELL-NEXT: vstmxcsr -{{[0-9]+}}(%rsp) # sched: [2:1.00] +; BROADWELL-NEXT: movl -{{[0-9]+}}(%rsp), %eax # sched: [5:0.50] +; BROADWELL-NEXT: retq # sched: [7:1.00] ; ; SKYLAKE-LABEL: test_stmxcsr: ; SKYLAKE: # BB#0: @@ -3239,8 +3239,8 @@ define <4 x float> @test_subps(<4 x float> %a0, <4 x float> %a1, <4 x float> *%a ; BROADWELL-LABEL: test_subps: ; BROADWELL: # BB#0: ; BROADWELL-NEXT: vsubps %xmm1, %xmm0, %xmm0 # sched: [3:1.00] -; BROADWELL-NEXT: vsubps (%rdi), %xmm0, %xmm0 # sched: [3:1.00] -; BROADWELL-NEXT: retq # sched: [2:1.00] +; BROADWELL-NEXT: vsubps (%rdi), %xmm0, %xmm0 # sched: [8:1.00] +; BROADWELL-NEXT: retq # sched: [7:1.00] ; ; SKYLAKE-LABEL: test_subps: ; SKYLAKE: # BB#0: @@ -3305,8 +3305,8 @@ define float @test_subss(float %a0, float %a1, float *%a2) { ; BROADWELL-LABEL: test_subss: ; BROADWELL: # BB#0: ; BROADWELL-NEXT: vsubss %xmm1, %xmm0, %xmm0 # sched: [3:1.00] -; BROADWELL-NEXT: vsubss (%rdi), %xmm0, %xmm0 # sched: [3:1.00] -; BROADWELL-NEXT: retq # sched: [2:1.00] +; BROADWELL-NEXT: vsubss (%rdi), %xmm0, %xmm0 # sched: [8:1.00] +; BROADWELL-NEXT: retq # sched: [7:1.00] ; ; SKYLAKE-LABEL: test_subss: ; SKYLAKE: # BB#0: @@ -3414,13 +3414,13 @@ define i32 @test_ucomiss(<4 x float> %a0, <4 x float> %a1, <4 x float> *%a2) { ; BROADWELL-NEXT: setnp %al # sched: [1:0.50] ; BROADWELL-NEXT: sete %cl # sched: [1:0.50] ; BROADWELL-NEXT: andb %al, %cl # sched: [1:0.25] -; BROADWELL-NEXT: vucomiss (%rdi), %xmm0 # sched: [7:1.00] +; BROADWELL-NEXT: vucomiss (%rdi), %xmm0 # sched: [8:1.00] ; BROADWELL-NEXT: setnp %al # sched: [1:0.50] ; BROADWELL-NEXT: sete %dl # sched: [1:0.50] ; BROADWELL-NEXT: andb %al, %dl # sched: [1:0.25] ; BROADWELL-NEXT: orb %cl, %dl # sched: [1:0.25] ; BROADWELL-NEXT: movzbl %dl, %eax # sched: [1:0.25] -; BROADWELL-NEXT: retq # sched: [2:1.00] +; BROADWELL-NEXT: retq # sched: [7:1.00] ; ; SKYLAKE-LABEL: test_ucomiss: ; SKYLAKE: # BB#0: @@ -3523,8 +3523,8 @@ define <4 x float> @test_unpckhps(<4 x float> %a0, <4 x float> %a1, <4 x float> ; BROADWELL-LABEL: test_unpckhps: ; BROADWELL: # BB#0: ; BROADWELL-NEXT: vunpckhps {{.*#+}} xmm0 = xmm0[2],xmm1[2],xmm0[3],xmm1[3] sched: [1:1.00] -; BROADWELL-NEXT: vunpckhps {{.*#+}} xmm0 = xmm0[2],mem[2],xmm0[3],mem[3] sched: [1:1.00] -; BROADWELL-NEXT: retq # sched: [2:1.00] +; BROADWELL-NEXT: vunpckhps {{.*#+}} xmm0 = xmm0[2],mem[2],xmm0[3],mem[3] sched: [6:1.00] +; BROADWELL-NEXT: retq # sched: [7:1.00] ; ; SKYLAKE-LABEL: test_unpckhps: ; SKYLAKE: # BB#0: @@ -3593,8 +3593,8 @@ define <4 x float> @test_unpcklps(<4 x float> %a0, <4 x float> %a1, <4 x float> ; BROADWELL-LABEL: test_unpcklps: ; BROADWELL: # BB#0: ; BROADWELL-NEXT: vunpcklps {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1] sched: [1:1.00] -; BROADWELL-NEXT: vunpcklps {{.*#+}} xmm0 = xmm0[0],mem[0],xmm0[1],mem[1] sched: [1:1.00] -; BROADWELL-NEXT: retq # sched: [2:1.00] +; BROADWELL-NEXT: vunpcklps {{.*#+}} xmm0 = xmm0[0],mem[0],xmm0[1],mem[1] sched: [6:1.00] +; BROADWELL-NEXT: retq # sched: [7:1.00] ; ; SKYLAKE-LABEL: test_unpcklps: ; SKYLAKE: # BB#0: @@ -3663,8 +3663,8 @@ define <4 x float> @test_xorps(<4 x float> %a0, <4 x float> %a1, <4 x float> *%a ; BROADWELL-LABEL: test_xorps: ; BROADWELL: # BB#0: ; BROADWELL-NEXT: vxorps %xmm1, %xmm0, %xmm0 # sched: [1:1.00] -; BROADWELL-NEXT: vxorps (%rdi), %xmm0, %xmm0 # sched: [1:1.00] -; BROADWELL-NEXT: retq # sched: [2:1.00] +; BROADWELL-NEXT: vxorps (%rdi), %xmm0, %xmm0 # sched: [6:1.00] +; BROADWELL-NEXT: retq # sched: [7:1.00] ; ; SKYLAKE-LABEL: test_xorps: ; SKYLAKE: # BB#0: diff --git a/llvm/test/CodeGen/X86/sse2-schedule.ll b/llvm/test/CodeGen/X86/sse2-schedule.ll index 0c1f8d9952fa..8f645aea818f 100644 --- a/llvm/test/CodeGen/X86/sse2-schedule.ll +++ b/llvm/test/CodeGen/X86/sse2-schedule.ll @@ -45,8 +45,8 @@ define <2 x double> @test_addpd(<2 x double> %a0, <2 x double> %a1, <2 x double> ; BROADWELL-LABEL: test_addpd: ; BROADWELL: # BB#0: ; BROADWELL-NEXT: vaddpd %xmm1, %xmm0, %xmm0 # sched: [3:1.00] -; BROADWELL-NEXT: vaddpd (%rdi), %xmm0, %xmm0 # sched: [3:1.00] -; BROADWELL-NEXT: retq # sched: [2:1.00] +; BROADWELL-NEXT: vaddpd (%rdi), %xmm0, %xmm0 # sched: [8:1.00] +; BROADWELL-NEXT: retq # sched: [7:1.00] ; ; SKYLAKE-LABEL: test_addpd: ; SKYLAKE: # BB#0: @@ -111,8 +111,8 @@ define double @test_addsd(double %a0, double %a1, double *%a2) { ; BROADWELL-LABEL: test_addsd: ; BROADWELL: # BB#0: ; BROADWELL-NEXT: vaddsd %xmm1, %xmm0, %xmm0 # sched: [3:1.00] -; BROADWELL-NEXT: vaddsd (%rdi), %xmm0, %xmm0 # sched: [3:1.00] -; BROADWELL-NEXT: retq # sched: [2:1.00] +; BROADWELL-NEXT: vaddsd (%rdi), %xmm0, %xmm0 # sched: [8:1.00] +; BROADWELL-NEXT: retq # sched: [7:1.00] ; ; SKYLAKE-LABEL: test_addsd: ; SKYLAKE: # BB#0: @@ -182,9 +182,9 @@ define <2 x double> @test_andpd(<2 x double> %a0, <2 x double> %a1, <2 x double> ; BROADWELL-LABEL: test_andpd: ; BROADWELL: # BB#0: ; BROADWELL-NEXT: vandpd %xmm1, %xmm0, %xmm0 # sched: [1:1.00] -; BROADWELL-NEXT: vandpd (%rdi), %xmm0, %xmm0 # sched: [1:1.00] +; BROADWELL-NEXT: vandpd (%rdi), %xmm0, %xmm0 # sched: [6:1.00] ; BROADWELL-NEXT: vaddpd %xmm0, %xmm1, %xmm0 # sched: [3:1.00] -; BROADWELL-NEXT: retq # sched: [2:1.00] +; BROADWELL-NEXT: retq # sched: [7:1.00] ; ; SKYLAKE-LABEL: test_andpd: ; SKYLAKE: # BB#0: @@ -263,9 +263,9 @@ define <2 x double> @test_andnotpd(<2 x double> %a0, <2 x double> %a1, <2 x doub ; BROADWELL-LABEL: test_andnotpd: ; BROADWELL: # BB#0: ; BROADWELL-NEXT: vandnpd %xmm1, %xmm0, %xmm0 # sched: [1:1.00] -; BROADWELL-NEXT: vandnpd (%rdi), %xmm0, %xmm0 # sched: [1:1.00] +; BROADWELL-NEXT: vandnpd (%rdi), %xmm0, %xmm0 # sched: [6:1.00] ; BROADWELL-NEXT: vaddpd %xmm0, %xmm1, %xmm0 # sched: [3:1.00] -; BROADWELL-NEXT: retq # sched: [2:1.00] +; BROADWELL-NEXT: retq # sched: [7:1.00] ; ; SKYLAKE-LABEL: test_andnotpd: ; SKYLAKE: # BB#0: @@ -346,9 +346,9 @@ define <2 x double> @test_cmppd(<2 x double> %a0, <2 x double> %a1, <2 x double> ; BROADWELL-LABEL: test_cmppd: ; BROADWELL: # BB#0: ; BROADWELL-NEXT: vcmpeqpd %xmm1, %xmm0, %xmm1 # sched: [3:1.00] -; BROADWELL-NEXT: vcmpeqpd (%rdi), %xmm0, %xmm0 # sched: [3:1.00] +; BROADWELL-NEXT: vcmpeqpd (%rdi), %xmm0, %xmm0 # sched: [8:1.00] ; BROADWELL-NEXT: vorpd %xmm0, %xmm1, %xmm0 # sched: [1:1.00] -; BROADWELL-NEXT: retq # sched: [2:1.00] +; BROADWELL-NEXT: retq # sched: [7:1.00] ; ; SKYLAKE-LABEL: test_cmppd: ; SKYLAKE: # BB#0: @@ -421,8 +421,8 @@ define double @test_cmpsd(double %a0, double %a1, double *%a2) { ; BROADWELL-LABEL: test_cmpsd: ; BROADWELL: # BB#0: ; BROADWELL-NEXT: vcmpeqsd %xmm1, %xmm0, %xmm0 # sched: [3:1.00] -; BROADWELL-NEXT: vcmpeqsd (%rdi), %xmm0, %xmm0 # sched: [7:1.00] -; BROADWELL-NEXT: retq # sched: [2:1.00] +; BROADWELL-NEXT: vcmpeqsd (%rdi), %xmm0, %xmm0 # sched: [8:1.00] +; BROADWELL-NEXT: retq # sched: [7:1.00] ; ; SKYLAKE-LABEL: test_cmpsd: ; SKYLAKE: # BB#0: @@ -535,13 +535,13 @@ define i32 @test_comisd(<2 x double> %a0, <2 x double> %a1, <2 x double> *%a2) { ; BROADWELL-NEXT: setnp %al # sched: [1:0.50] ; BROADWELL-NEXT: sete %cl # sched: [1:0.50] ; BROADWELL-NEXT: andb %al, %cl # sched: [1:0.25] -; BROADWELL-NEXT: vcomisd (%rdi), %xmm0 # sched: [7:1.00] +; BROADWELL-NEXT: vcomisd (%rdi), %xmm0 # sched: [8:1.00] ; BROADWELL-NEXT: setnp %al # sched: [1:0.50] ; BROADWELL-NEXT: sete %dl # sched: [1:0.50] ; BROADWELL-NEXT: andb %al, %dl # sched: [1:0.25] ; BROADWELL-NEXT: orb %cl, %dl # sched: [1:0.25] ; BROADWELL-NEXT: movzbl %dl, %eax # sched: [1:0.25] -; BROADWELL-NEXT: retq # sched: [2:1.00] +; BROADWELL-NEXT: retq # sched: [7:1.00] ; ; SKYLAKE-LABEL: test_comisd: ; SKYLAKE: # BB#0: @@ -645,9 +645,9 @@ define <2 x double> @test_cvtdq2pd(<4 x i32> %a0, <4 x i32> *%a1) { ; BROADWELL-LABEL: test_cvtdq2pd: ; BROADWELL: # BB#0: ; BROADWELL-NEXT: vcvtdq2pd %xmm0, %xmm0 # sched: [4:1.00] -; BROADWELL-NEXT: vcvtdq2pd (%rdi), %xmm1 # sched: [4:1.00] +; BROADWELL-NEXT: vcvtdq2pd (%rdi), %xmm1 # sched: [9:1.00] ; BROADWELL-NEXT: vaddpd %xmm1, %xmm0, %xmm0 # sched: [3:1.00] -; BROADWELL-NEXT: retq # sched: [2:1.00] +; BROADWELL-NEXT: retq # sched: [7:1.00] ; ; SKYLAKE-LABEL: test_cvtdq2pd: ; SKYLAKE: # BB#0: @@ -725,9 +725,9 @@ define <4 x float> @test_cvtdq2ps(<4 x i32> %a0, <4 x i32> *%a1) { ; BROADWELL-LABEL: test_cvtdq2ps: ; BROADWELL: # BB#0: ; BROADWELL-NEXT: vcvtdq2ps %xmm0, %xmm0 # sched: [3:1.00] -; BROADWELL-NEXT: vcvtdq2ps (%rdi), %xmm1 # sched: [3:1.00] +; BROADWELL-NEXT: vcvtdq2ps (%rdi), %xmm1 # sched: [8:1.00] ; BROADWELL-NEXT: vaddps %xmm1, %xmm0, %xmm0 # sched: [3:1.00] -; BROADWELL-NEXT: retq # sched: [2:1.00] +; BROADWELL-NEXT: retq # sched: [7:1.00] ; ; SKYLAKE-LABEL: test_cvtdq2ps: ; SKYLAKE: # BB#0: @@ -803,9 +803,9 @@ define <4 x i32> @test_cvtpd2dq(<2 x double> %a0, <2 x double> *%a1) { ; BROADWELL-LABEL: test_cvtpd2dq: ; BROADWELL: # BB#0: ; BROADWELL-NEXT: vcvtpd2dq %xmm0, %xmm0 # sched: [4:1.00] -; BROADWELL-NEXT: vcvtpd2dqx (%rdi), %xmm1 # sched: [7:1.00] +; BROADWELL-NEXT: vcvtpd2dqx (%rdi), %xmm1 # sched: [8:1.00] ; BROADWELL-NEXT: vpaddd %xmm1, %xmm0, %xmm0 # sched: [1:0.50] -; BROADWELL-NEXT: retq # sched: [2:1.00] +; BROADWELL-NEXT: retq # sched: [7:1.00] ; ; SKYLAKE-LABEL: test_cvtpd2dq: ; SKYLAKE: # BB#0: @@ -882,9 +882,9 @@ define <4 x float> @test_cvtpd2ps(<2 x double> %a0, <2 x double> *%a1) { ; BROADWELL-LABEL: test_cvtpd2ps: ; BROADWELL: # BB#0: ; BROADWELL-NEXT: vcvtpd2ps %xmm0, %xmm0 # sched: [4:1.00] -; BROADWELL-NEXT: vcvtpd2psx (%rdi), %xmm1 # sched: [7:1.00] +; BROADWELL-NEXT: vcvtpd2psx (%rdi), %xmm1 # sched: [8:1.00] ; BROADWELL-NEXT: vaddps %xmm1, %xmm0, %xmm0 # sched: [3:1.00] -; BROADWELL-NEXT: retq # sched: [2:1.00] +; BROADWELL-NEXT: retq # sched: [7:1.00] ; ; SKYLAKE-LABEL: test_cvtpd2ps: ; SKYLAKE: # BB#0: @@ -961,9 +961,9 @@ define <4 x i32> @test_cvtps2dq(<4 x float> %a0, <4 x float> *%a1) { ; BROADWELL-LABEL: test_cvtps2dq: ; BROADWELL: # BB#0: ; BROADWELL-NEXT: vcvtps2dq %xmm0, %xmm0 # sched: [3:1.00] -; BROADWELL-NEXT: vcvtps2dq (%rdi), %xmm1 # sched: [3:1.00] +; BROADWELL-NEXT: vcvtps2dq (%rdi), %xmm1 # sched: [8:1.00] ; BROADWELL-NEXT: vpaddd %xmm1, %xmm0, %xmm0 # sched: [1:0.50] -; BROADWELL-NEXT: retq # sched: [2:1.00] +; BROADWELL-NEXT: retq # sched: [7:1.00] ; ; SKYLAKE-LABEL: test_cvtps2dq: ; SKYLAKE: # BB#0: @@ -1040,9 +1040,9 @@ define <2 x double> @test_cvtps2pd(<4 x float> %a0, <4 x float> *%a1) { ; BROADWELL-LABEL: test_cvtps2pd: ; BROADWELL: # BB#0: ; BROADWELL-NEXT: vcvtps2pd %xmm0, %xmm0 # sched: [2:1.00] -; BROADWELL-NEXT: vcvtps2pd (%rdi), %xmm1 # sched: [1:1.00] +; BROADWELL-NEXT: vcvtps2pd (%rdi), %xmm1 # sched: [6:1.00] ; BROADWELL-NEXT: vaddpd %xmm1, %xmm0, %xmm0 # sched: [3:1.00] -; BROADWELL-NEXT: retq # sched: [2:1.00] +; BROADWELL-NEXT: retq # sched: [7:1.00] ; ; SKYLAKE-LABEL: test_cvtps2pd: ; SKYLAKE: # BB#0: @@ -1119,9 +1119,9 @@ define i32 @test_cvtsd2si(double %a0, double *%a1) { ; BROADWELL-LABEL: test_cvtsd2si: ; BROADWELL: # BB#0: ; BROADWELL-NEXT: vcvtsd2si %xmm0, %ecx # sched: [4:1.00] -; BROADWELL-NEXT: vcvtsd2si (%rdi), %eax # sched: [4:1.00] +; BROADWELL-NEXT: vcvtsd2si (%rdi), %eax # sched: [9:1.00] ; BROADWELL-NEXT: addl %ecx, %eax # sched: [1:0.25] -; BROADWELL-NEXT: retq # sched: [2:1.00] +; BROADWELL-NEXT: retq # sched: [7:1.00] ; ; SKYLAKE-LABEL: test_cvtsd2si: ; SKYLAKE: # BB#0: @@ -1199,9 +1199,9 @@ define i64 @test_cvtsd2siq(double %a0, double *%a1) { ; BROADWELL-LABEL: test_cvtsd2siq: ; BROADWELL: # BB#0: ; BROADWELL-NEXT: vcvtsd2si %xmm0, %rcx # sched: [4:1.00] -; BROADWELL-NEXT: vcvtsd2si (%rdi), %rax # sched: [4:1.00] +; BROADWELL-NEXT: vcvtsd2si (%rdi), %rax # sched: [9:1.00] ; BROADWELL-NEXT: addq %rcx, %rax # sched: [1:0.25] -; BROADWELL-NEXT: retq # sched: [2:1.00] +; BROADWELL-NEXT: retq # sched: [7:1.00] ; ; SKYLAKE-LABEL: test_cvtsd2siq: ; SKYLAKE: # BB#0: @@ -1285,10 +1285,10 @@ define float @test_cvtsd2ss(double %a0, double *%a1) { ; BROADWELL-LABEL: test_cvtsd2ss: ; BROADWELL: # BB#0: ; BROADWELL-NEXT: vcvtsd2ss %xmm0, %xmm0, %xmm0 # sched: [4:1.00] -; BROADWELL-NEXT: vmovsd {{.*#+}} xmm1 = mem[0],zero sched: [1:0.50] +; BROADWELL-NEXT: vmovsd {{.*#+}} xmm1 = mem[0],zero sched: [5:0.50] ; BROADWELL-NEXT: vcvtsd2ss %xmm1, %xmm1, %xmm1 # sched: [4:1.00] ; BROADWELL-NEXT: vaddss %xmm1, %xmm0, %xmm0 # sched: [3:1.00] -; BROADWELL-NEXT: retq # sched: [2:1.00] +; BROADWELL-NEXT: retq # sched: [7:1.00] ; ; SKYLAKE-LABEL: test_cvtsd2ss: ; SKYLAKE: # BB#0: @@ -1367,9 +1367,9 @@ define double @test_cvtsi2sd(i32 %a0, i32 *%a1) { ; BROADWELL-LABEL: test_cvtsi2sd: ; BROADWELL: # BB#0: ; BROADWELL-NEXT: vcvtsi2sdl %edi, %xmm0, %xmm0 # sched: [4:1.00] -; BROADWELL-NEXT: vcvtsi2sdl (%rsi), %xmm1, %xmm1 # sched: [8:1.00] +; BROADWELL-NEXT: vcvtsi2sdl (%rsi), %xmm1, %xmm1 # sched: [9:1.00] ; BROADWELL-NEXT: vaddsd %xmm1, %xmm0, %xmm0 # sched: [3:1.00] -; BROADWELL-NEXT: retq # sched: [2:1.00] +; BROADWELL-NEXT: retq # sched: [7:1.00] ; ; SKYLAKE-LABEL: test_cvtsi2sd: ; SKYLAKE: # BB#0: @@ -1444,9 +1444,9 @@ define double @test_cvtsi2sdq(i64 %a0, i64 *%a1) { ; BROADWELL-LABEL: test_cvtsi2sdq: ; BROADWELL: # BB#0: ; BROADWELL-NEXT: vcvtsi2sdq %rdi, %xmm0, %xmm0 # sched: [4:1.00] -; BROADWELL-NEXT: vcvtsi2sdq (%rsi), %xmm1, %xmm1 # sched: [8:1.00] +; BROADWELL-NEXT: vcvtsi2sdq (%rsi), %xmm1, %xmm1 # sched: [9:1.00] ; BROADWELL-NEXT: vaddsd %xmm1, %xmm0, %xmm0 # sched: [3:1.00] -; BROADWELL-NEXT: retq # sched: [2:1.00] +; BROADWELL-NEXT: retq # sched: [7:1.00] ; ; SKYLAKE-LABEL: test_cvtsi2sdq: ; SKYLAKE: # BB#0: @@ -1529,10 +1529,10 @@ define double @test_cvtss2sd(float %a0, float *%a1) { ; BROADWELL-LABEL: test_cvtss2sd: ; BROADWELL: # BB#0: ; BROADWELL-NEXT: vcvtss2sd %xmm0, %xmm0, %xmm0 # sched: [2:1.00] -; BROADWELL-NEXT: vmovss {{.*#+}} xmm1 = mem[0],zero,zero,zero sched: [1:0.50] +; BROADWELL-NEXT: vmovss {{.*#+}} xmm1 = mem[0],zero,zero,zero sched: [5:0.50] ; BROADWELL-NEXT: vcvtss2sd %xmm1, %xmm1, %xmm1 # sched: [2:1.00] ; BROADWELL-NEXT: vaddsd %xmm1, %xmm0, %xmm0 # sched: [3:1.00] -; BROADWELL-NEXT: retq # sched: [2:1.00] +; BROADWELL-NEXT: retq # sched: [7:1.00] ; ; SKYLAKE-LABEL: test_cvtss2sd: ; SKYLAKE: # BB#0: @@ -1612,9 +1612,9 @@ define <4 x i32> @test_cvttpd2dq(<2 x double> %a0, <2 x double> *%a1) { ; BROADWELL-LABEL: test_cvttpd2dq: ; BROADWELL: # BB#0: ; BROADWELL-NEXT: vcvttpd2dq %xmm0, %xmm0 # sched: [4:1.00] -; BROADWELL-NEXT: vcvttpd2dqx (%rdi), %xmm1 # sched: [7:1.00] +; BROADWELL-NEXT: vcvttpd2dqx (%rdi), %xmm1 # sched: [8:1.00] ; BROADWELL-NEXT: vpaddd %xmm1, %xmm0, %xmm0 # sched: [1:0.50] -; BROADWELL-NEXT: retq # sched: [2:1.00] +; BROADWELL-NEXT: retq # sched: [7:1.00] ; ; SKYLAKE-LABEL: test_cvttpd2dq: ; SKYLAKE: # BB#0: @@ -1692,9 +1692,9 @@ define <4 x i32> @test_cvttps2dq(<4 x float> %a0, <4 x float> *%a1) { ; BROADWELL-LABEL: test_cvttps2dq: ; BROADWELL: # BB#0: ; BROADWELL-NEXT: vcvttps2dq %xmm0, %xmm0 # sched: [3:1.00] -; BROADWELL-NEXT: vcvttps2dq (%rdi), %xmm1 # sched: [3:1.00] +; BROADWELL-NEXT: vcvttps2dq (%rdi), %xmm1 # sched: [8:1.00] ; BROADWELL-NEXT: vpaddd %xmm1, %xmm0, %xmm0 # sched: [1:0.50] -; BROADWELL-NEXT: retq # sched: [2:1.00] +; BROADWELL-NEXT: retq # sched: [7:1.00] ; ; SKYLAKE-LABEL: test_cvttps2dq: ; SKYLAKE: # BB#0: @@ -1769,9 +1769,9 @@ define i32 @test_cvttsd2si(double %a0, double *%a1) { ; BROADWELL-LABEL: test_cvttsd2si: ; BROADWELL: # BB#0: ; BROADWELL-NEXT: vcvttsd2si %xmm0, %ecx # sched: [4:1.00] -; BROADWELL-NEXT: vcvttsd2si (%rdi), %eax # sched: [4:1.00] +; BROADWELL-NEXT: vcvttsd2si (%rdi), %eax # sched: [9:1.00] ; BROADWELL-NEXT: addl %ecx, %eax # sched: [1:0.25] -; BROADWELL-NEXT: retq # sched: [2:1.00] +; BROADWELL-NEXT: retq # sched: [7:1.00] ; ; SKYLAKE-LABEL: test_cvttsd2si: ; SKYLAKE: # BB#0: @@ -1846,9 +1846,9 @@ define i64 @test_cvttsd2siq(double %a0, double *%a1) { ; BROADWELL-LABEL: test_cvttsd2siq: ; BROADWELL: # BB#0: ; BROADWELL-NEXT: vcvttsd2si %xmm0, %rcx # sched: [4:1.00] -; BROADWELL-NEXT: vcvttsd2si (%rdi), %rax # sched: [4:1.00] +; BROADWELL-NEXT: vcvttsd2si (%rdi), %rax # sched: [9:1.00] ; BROADWELL-NEXT: addq %rcx, %rax # sched: [1:0.25] -; BROADWELL-NEXT: retq # sched: [2:1.00] +; BROADWELL-NEXT: retq # sched: [7:1.00] ; ; SKYLAKE-LABEL: test_cvttsd2siq: ; SKYLAKE: # BB#0: @@ -1917,9 +1917,9 @@ define <2 x double> @test_divpd(<2 x double> %a0, <2 x double> %a1, <2 x double> ; ; BROADWELL-LABEL: test_divpd: ; BROADWELL: # BB#0: -; BROADWELL-NEXT: vdivpd %xmm1, %xmm0, %xmm0 # sched: [20:1.00] -; BROADWELL-NEXT: vdivpd (%rdi), %xmm0, %xmm0 # sched: [20:1.00] -; BROADWELL-NEXT: retq # sched: [2:1.00] +; BROADWELL-NEXT: vdivpd %xmm1, %xmm0, %xmm0 # sched: [14:1.00] +; BROADWELL-NEXT: vdivpd (%rdi), %xmm0, %xmm0 # sched: [19:1.00] +; BROADWELL-NEXT: retq # sched: [7:1.00] ; ; SKYLAKE-LABEL: test_divpd: ; SKYLAKE: # BB#0: @@ -1983,9 +1983,9 @@ define double @test_divsd(double %a0, double %a1, double *%a2) { ; ; BROADWELL-LABEL: test_divsd: ; BROADWELL: # BB#0: -; BROADWELL-NEXT: vdivsd %xmm1, %xmm0, %xmm0 # sched: [20:1.00] -; BROADWELL-NEXT: vdivsd (%rdi), %xmm0, %xmm0 # sched: [20:1.00] -; BROADWELL-NEXT: retq # sched: [2:1.00] +; BROADWELL-NEXT: vdivsd %xmm1, %xmm0, %xmm0 # sched: [14:1.00] +; BROADWELL-NEXT: vdivsd (%rdi), %xmm0, %xmm0 # sched: [19:1.00] +; BROADWELL-NEXT: retq # sched: [7:1.00] ; ; SKYLAKE-LABEL: test_divsd: ; SKYLAKE: # BB#0: @@ -2051,7 +2051,7 @@ define void @test_lfence() { ; BROADWELL-LABEL: test_lfence: ; BROADWELL: # BB#0: ; BROADWELL-NEXT: lfence # sched: [2:0.50] -; BROADWELL-NEXT: retq # sched: [2:1.00] +; BROADWELL-NEXT: retq # sched: [7:1.00] ; ; SKYLAKE-LABEL: test_lfence: ; SKYLAKE: # BB#0: @@ -2112,7 +2112,7 @@ define void @test_mfence() { ; BROADWELL-LABEL: test_mfence: ; BROADWELL: # BB#0: ; BROADWELL-NEXT: mfence # sched: [2:0.50] -; BROADWELL-NEXT: retq # sched: [2:1.00] +; BROADWELL-NEXT: retq # sched: [7:1.00] ; ; SKYLAKE-LABEL: test_mfence: ; SKYLAKE: # BB#0: @@ -2171,7 +2171,7 @@ define void @test_maskmovdqu(<16 x i8> %a0, <16 x i8> %a1, i8* %a2) { ; BROADWELL-LABEL: test_maskmovdqu: ; BROADWELL: # BB#0: ; BROADWELL-NEXT: vmaskmovdqu %xmm1, %xmm0 # sched: [1:1.00] -; BROADWELL-NEXT: retq # sched: [2:1.00] +; BROADWELL-NEXT: retq # sched: [7:1.00] ; ; SKYLAKE-LABEL: test_maskmovdqu: ; SKYLAKE: # BB#0: @@ -2231,8 +2231,8 @@ define <2 x double> @test_maxpd(<2 x double> %a0, <2 x double> %a1, <2 x double> ; BROADWELL-LABEL: test_maxpd: ; BROADWELL: # BB#0: ; BROADWELL-NEXT: vmaxpd %xmm1, %xmm0, %xmm0 # sched: [3:1.00] -; BROADWELL-NEXT: vmaxpd (%rdi), %xmm0, %xmm0 # sched: [3:1.00] -; BROADWELL-NEXT: retq # sched: [2:1.00] +; BROADWELL-NEXT: vmaxpd (%rdi), %xmm0, %xmm0 # sched: [8:1.00] +; BROADWELL-NEXT: retq # sched: [7:1.00] ; ; SKYLAKE-LABEL: test_maxpd: ; SKYLAKE: # BB#0: @@ -2298,8 +2298,8 @@ define <2 x double> @test_maxsd(<2 x double> %a0, <2 x double> %a1, <2 x double> ; BROADWELL-LABEL: test_maxsd: ; BROADWELL: # BB#0: ; BROADWELL-NEXT: vmaxsd %xmm1, %xmm0, %xmm0 # sched: [3:1.00] -; BROADWELL-NEXT: vmaxsd (%rdi), %xmm0, %xmm0 # sched: [3:1.00] -; BROADWELL-NEXT: retq # sched: [2:1.00] +; BROADWELL-NEXT: vmaxsd (%rdi), %xmm0, %xmm0 # sched: [8:1.00] +; BROADWELL-NEXT: retq # sched: [7:1.00] ; ; SKYLAKE-LABEL: test_maxsd: ; SKYLAKE: # BB#0: @@ -2365,8 +2365,8 @@ define <2 x double> @test_minpd(<2 x double> %a0, <2 x double> %a1, <2 x double> ; BROADWELL-LABEL: test_minpd: ; BROADWELL: # BB#0: ; BROADWELL-NEXT: vminpd %xmm1, %xmm0, %xmm0 # sched: [3:1.00] -; BROADWELL-NEXT: vminpd (%rdi), %xmm0, %xmm0 # sched: [3:1.00] -; BROADWELL-NEXT: retq # sched: [2:1.00] +; BROADWELL-NEXT: vminpd (%rdi), %xmm0, %xmm0 # sched: [8:1.00] +; BROADWELL-NEXT: retq # sched: [7:1.00] ; ; SKYLAKE-LABEL: test_minpd: ; SKYLAKE: # BB#0: @@ -2432,8 +2432,8 @@ define <2 x double> @test_minsd(<2 x double> %a0, <2 x double> %a1, <2 x double> ; BROADWELL-LABEL: test_minsd: ; BROADWELL: # BB#0: ; BROADWELL-NEXT: vminsd %xmm1, %xmm0, %xmm0 # sched: [3:1.00] -; BROADWELL-NEXT: vminsd (%rdi), %xmm0, %xmm0 # sched: [3:1.00] -; BROADWELL-NEXT: retq # sched: [2:1.00] +; BROADWELL-NEXT: vminsd (%rdi), %xmm0, %xmm0 # sched: [8:1.00] +; BROADWELL-NEXT: retq # sched: [7:1.00] ; ; SKYLAKE-LABEL: test_minsd: ; SKYLAKE: # BB#0: @@ -2503,10 +2503,10 @@ define void @test_movapd(<2 x double> *%a0, <2 x double> *%a1) { ; ; BROADWELL-LABEL: test_movapd: ; BROADWELL: # BB#0: -; BROADWELL-NEXT: vmovapd (%rdi), %xmm0 # sched: [1:0.50] +; BROADWELL-NEXT: vmovapd (%rdi), %xmm0 # sched: [5:0.50] ; BROADWELL-NEXT: vaddpd %xmm0, %xmm0, %xmm0 # sched: [3:1.00] ; BROADWELL-NEXT: vmovapd %xmm0, (%rsi) # sched: [1:1.00] -; BROADWELL-NEXT: retq # sched: [2:1.00] +; BROADWELL-NEXT: retq # sched: [7:1.00] ; ; SKYLAKE-LABEL: test_movapd: ; SKYLAKE: # BB#0: @@ -2579,10 +2579,10 @@ define void @test_movdqa(<2 x i64> *%a0, <2 x i64> *%a1) { ; ; BROADWELL-LABEL: test_movdqa: ; BROADWELL: # BB#0: -; BROADWELL-NEXT: vmovdqa (%rdi), %xmm0 # sched: [1:0.50] +; BROADWELL-NEXT: vmovdqa (%rdi), %xmm0 # sched: [5:0.50] ; BROADWELL-NEXT: vpaddq %xmm0, %xmm0, %xmm0 # sched: [1:0.50] ; BROADWELL-NEXT: vmovdqa %xmm0, (%rsi) # sched: [1:1.00] -; BROADWELL-NEXT: retq # sched: [2:1.00] +; BROADWELL-NEXT: retq # sched: [7:1.00] ; ; SKYLAKE-LABEL: test_movdqa: ; SKYLAKE: # BB#0: @@ -2655,10 +2655,10 @@ define void @test_movdqu(<2 x i64> *%a0, <2 x i64> *%a1) { ; ; BROADWELL-LABEL: test_movdqu: ; BROADWELL: # BB#0: -; BROADWELL-NEXT: vmovdqu (%rdi), %xmm0 # sched: [1:0.50] +; BROADWELL-NEXT: vmovdqu (%rdi), %xmm0 # sched: [5:0.50] ; BROADWELL-NEXT: vpaddq %xmm0, %xmm0, %xmm0 # sched: [1:0.50] ; BROADWELL-NEXT: vmovdqu %xmm0, (%rsi) # sched: [1:1.00] -; BROADWELL-NEXT: retq # sched: [2:1.00] +; BROADWELL-NEXT: retq # sched: [7:1.00] ; ; SKYLAKE-LABEL: test_movdqu: ; SKYLAKE: # BB#0: @@ -2747,12 +2747,12 @@ define i32 @test_movd(<4 x i32> %a0, i32 %a1, i32 *%a2) { ; BROADWELL-LABEL: test_movd: ; BROADWELL: # BB#0: ; BROADWELL-NEXT: vmovd %edi, %xmm1 # sched: [1:1.00] -; BROADWELL-NEXT: vmovd {{.*#+}} xmm2 = mem[0],zero,zero,zero sched: [1:0.50] +; BROADWELL-NEXT: vmovd {{.*#+}} xmm2 = mem[0],zero,zero,zero sched: [5:0.50] ; BROADWELL-NEXT: vpaddd %xmm1, %xmm0, %xmm1 # sched: [1:0.50] ; BROADWELL-NEXT: vpaddd %xmm2, %xmm0, %xmm0 # sched: [1:0.50] ; BROADWELL-NEXT: vmovd %xmm0, %eax # sched: [1:1.00] ; BROADWELL-NEXT: vmovd %xmm1, (%rsi) # sched: [1:1.00] -; BROADWELL-NEXT: retq # sched: [2:1.00] +; BROADWELL-NEXT: retq # sched: [7:1.00] ; ; SKYLAKE-LABEL: test_movd: ; SKYLAKE: # BB#0: @@ -2858,12 +2858,12 @@ define i64 @test_movd_64(<2 x i64> %a0, i64 %a1, i64 *%a2) { ; BROADWELL-LABEL: test_movd_64: ; BROADWELL: # BB#0: ; BROADWELL-NEXT: vmovq %rdi, %xmm1 # sched: [1:1.00] -; BROADWELL-NEXT: vmovq {{.*#+}} xmm2 = mem[0],zero sched: [1:0.50] +; BROADWELL-NEXT: vmovq {{.*#+}} xmm2 = mem[0],zero sched: [5:0.50] ; BROADWELL-NEXT: vpaddq %xmm1, %xmm0, %xmm1 # sched: [1:0.50] ; BROADWELL-NEXT: vpaddq %xmm2, %xmm0, %xmm0 # sched: [1:0.50] ; BROADWELL-NEXT: vmovq %xmm0, %rax # sched: [1:1.00] ; BROADWELL-NEXT: vmovq %xmm1, (%rsi) # sched: [1:1.00] -; BROADWELL-NEXT: retq # sched: [2:1.00] +; BROADWELL-NEXT: retq # sched: [7:1.00] ; ; SKYLAKE-LABEL: test_movd_64: ; SKYLAKE: # BB#0: @@ -2953,10 +2953,10 @@ define void @test_movhpd(<2 x double> %a0, <2 x double> %a1, x86_mmx *%a2) { ; ; BROADWELL-LABEL: test_movhpd: ; BROADWELL: # BB#0: -; BROADWELL-NEXT: vmovhpd {{.*#+}} xmm1 = xmm1[0],mem[0] sched: [1:1.00] +; BROADWELL-NEXT: vmovhpd {{.*#+}} xmm1 = xmm1[0],mem[0] sched: [6:1.00] ; BROADWELL-NEXT: vaddpd %xmm1, %xmm0, %xmm0 # sched: [3:1.00] ; BROADWELL-NEXT: vmovhpd %xmm0, (%rdi) # sched: [1:1.00] -; BROADWELL-NEXT: retq # sched: [2:1.00] +; BROADWELL-NEXT: retq # sched: [7:1.00] ; ; SKYLAKE-LABEL: test_movhpd: ; SKYLAKE: # BB#0: @@ -3032,10 +3032,10 @@ define void @test_movlpd(<2 x double> %a0, <2 x double> %a1, x86_mmx *%a2) { ; ; BROADWELL-LABEL: test_movlpd: ; BROADWELL: # BB#0: -; BROADWELL-NEXT: vmovlpd {{.*#+}} xmm1 = mem[0],xmm1[1] sched: [1:1.00] +; BROADWELL-NEXT: vmovlpd {{.*#+}} xmm1 = mem[0],xmm1[1] sched: [6:1.00] ; BROADWELL-NEXT: vaddpd %xmm1, %xmm0, %xmm0 # sched: [3:1.00] ; BROADWELL-NEXT: vmovlpd %xmm0, (%rdi) # sched: [1:1.00] -; BROADWELL-NEXT: retq # sched: [2:1.00] +; BROADWELL-NEXT: retq # sched: [7:1.00] ; ; SKYLAKE-LABEL: test_movlpd: ; SKYLAKE: # BB#0: @@ -3104,7 +3104,7 @@ define i32 @test_movmskpd(<2 x double> %a0) { ; BROADWELL-LABEL: test_movmskpd: ; BROADWELL: # BB#0: ; BROADWELL-NEXT: vmovmskpd %xmm0, %eax # sched: [3:1.00] -; BROADWELL-NEXT: retq # sched: [2:1.00] +; BROADWELL-NEXT: retq # sched: [7:1.00] ; ; SKYLAKE-LABEL: test_movmskpd: ; SKYLAKE: # BB#0: @@ -3167,7 +3167,7 @@ define void @test_movntdqa(<2 x i64> %a0, <2 x i64> *%a1) { ; BROADWELL: # BB#0: ; BROADWELL-NEXT: vpaddq %xmm0, %xmm0, %xmm0 # sched: [1:0.50] ; BROADWELL-NEXT: vmovntdq %xmm0, (%rdi) # sched: [1:1.00] -; BROADWELL-NEXT: retq # sched: [2:1.00] +; BROADWELL-NEXT: retq # sched: [7:1.00] ; ; SKYLAKE-LABEL: test_movntdqa: ; SKYLAKE: # BB#0: @@ -3232,7 +3232,7 @@ define void @test_movntpd(<2 x double> %a0, <2 x double> *%a1) { ; BROADWELL: # BB#0: ; BROADWELL-NEXT: vaddpd %xmm0, %xmm0, %xmm0 # sched: [3:1.00] ; BROADWELL-NEXT: vmovntpd %xmm0, (%rdi) # sched: [1:1.00] -; BROADWELL-NEXT: retq # sched: [2:1.00] +; BROADWELL-NEXT: retq # sched: [7:1.00] ; ; SKYLAKE-LABEL: test_movntpd: ; SKYLAKE: # BB#0: @@ -3300,10 +3300,10 @@ define <2 x i64> @test_movq_mem(<2 x i64> %a0, i64 *%a1) { ; ; BROADWELL-LABEL: test_movq_mem: ; BROADWELL: # BB#0: -; BROADWELL-NEXT: vmovq {{.*#+}} xmm1 = mem[0],zero sched: [1:0.50] +; BROADWELL-NEXT: vmovq {{.*#+}} xmm1 = mem[0],zero sched: [5:0.50] ; BROADWELL-NEXT: vpaddq %xmm1, %xmm0, %xmm0 # sched: [1:0.50] ; BROADWELL-NEXT: vmovq %xmm0, (%rdi) # sched: [1:1.00] -; BROADWELL-NEXT: retq # sched: [2:1.00] +; BROADWELL-NEXT: retq # sched: [7:1.00] ; ; SKYLAKE-LABEL: test_movq_mem: ; SKYLAKE: # BB#0: @@ -3377,7 +3377,7 @@ define <2 x i64> @test_movq_reg(<2 x i64> %a0, <2 x i64> %a1) { ; BROADWELL: # BB#0: ; BROADWELL-NEXT: vmovq {{.*#+}} xmm0 = xmm0[0],zero sched: [1:0.33] ; BROADWELL-NEXT: vpaddq %xmm0, %xmm1, %xmm0 # sched: [1:0.50] -; BROADWELL-NEXT: retq # sched: [2:1.00] +; BROADWELL-NEXT: retq # sched: [7:1.00] ; ; SKYLAKE-LABEL: test_movq_reg: ; SKYLAKE: # BB#0: @@ -3445,10 +3445,10 @@ define void @test_movsd_mem(double* %a0, double* %a1) { ; ; BROADWELL-LABEL: test_movsd_mem: ; BROADWELL: # BB#0: -; BROADWELL-NEXT: vmovsd {{.*#+}} xmm0 = mem[0],zero sched: [1:0.50] +; BROADWELL-NEXT: vmovsd {{.*#+}} xmm0 = mem[0],zero sched: [5:0.50] ; BROADWELL-NEXT: vaddsd %xmm0, %xmm0, %xmm0 # sched: [3:1.00] ; BROADWELL-NEXT: vmovsd %xmm0, (%rsi) # sched: [1:1.00] -; BROADWELL-NEXT: retq # sched: [2:1.00] +; BROADWELL-NEXT: retq # sched: [7:1.00] ; ; SKYLAKE-LABEL: test_movsd_mem: ; SKYLAKE: # BB#0: @@ -3519,7 +3519,7 @@ define <2 x double> @test_movsd_reg(<2 x double> %a0, <2 x double> %a1) { ; BROADWELL-LABEL: test_movsd_reg: ; BROADWELL: # BB#0: ; BROADWELL-NEXT: vmovlhps {{.*#+}} xmm0 = xmm1[0],xmm0[0] sched: [1:1.00] -; BROADWELL-NEXT: retq # sched: [2:1.00] +; BROADWELL-NEXT: retq # sched: [7:1.00] ; ; SKYLAKE-LABEL: test_movsd_reg: ; SKYLAKE: # BB#0: @@ -3582,10 +3582,10 @@ define void @test_movupd(<2 x double> *%a0, <2 x double> *%a1) { ; ; BROADWELL-LABEL: test_movupd: ; BROADWELL: # BB#0: -; BROADWELL-NEXT: vmovupd (%rdi), %xmm0 # sched: [1:0.50] +; BROADWELL-NEXT: vmovupd (%rdi), %xmm0 # sched: [5:0.50] ; BROADWELL-NEXT: vaddpd %xmm0, %xmm0, %xmm0 # sched: [3:1.00] ; BROADWELL-NEXT: vmovupd %xmm0, (%rsi) # sched: [1:1.00] -; BROADWELL-NEXT: retq # sched: [2:1.00] +; BROADWELL-NEXT: retq # sched: [7:1.00] ; ; SKYLAKE-LABEL: test_movupd: ; SKYLAKE: # BB#0: @@ -3653,9 +3653,9 @@ define <2 x double> @test_mulpd(<2 x double> %a0, <2 x double> %a1, <2 x double> ; ; BROADWELL-LABEL: test_mulpd: ; BROADWELL: # BB#0: -; BROADWELL-NEXT: vmulpd %xmm1, %xmm0, %xmm0 # sched: [5:0.50] -; BROADWELL-NEXT: vmulpd (%rdi), %xmm0, %xmm0 # sched: [5:0.50] -; BROADWELL-NEXT: retq # sched: [2:1.00] +; BROADWELL-NEXT: vmulpd %xmm1, %xmm0, %xmm0 # sched: [3:0.50] +; BROADWELL-NEXT: vmulpd (%rdi), %xmm0, %xmm0 # sched: [8:0.50] +; BROADWELL-NEXT: retq # sched: [7:1.00] ; ; SKYLAKE-LABEL: test_mulpd: ; SKYLAKE: # BB#0: @@ -3719,9 +3719,9 @@ define double @test_mulsd(double %a0, double %a1, double *%a2) { ; ; BROADWELL-LABEL: test_mulsd: ; BROADWELL: # BB#0: -; BROADWELL-NEXT: vmulsd %xmm1, %xmm0, %xmm0 # sched: [5:0.50] -; BROADWELL-NEXT: vmulsd (%rdi), %xmm0, %xmm0 # sched: [5:0.50] -; BROADWELL-NEXT: retq # sched: [2:1.00] +; BROADWELL-NEXT: vmulsd %xmm1, %xmm0, %xmm0 # sched: [3:0.50] +; BROADWELL-NEXT: vmulsd (%rdi), %xmm0, %xmm0 # sched: [8:0.50] +; BROADWELL-NEXT: retq # sched: [7:1.00] ; ; SKYLAKE-LABEL: test_mulsd: ; SKYLAKE: # BB#0: @@ -3791,9 +3791,9 @@ define <2 x double> @test_orpd(<2 x double> %a0, <2 x double> %a1, <2 x double> ; BROADWELL-LABEL: test_orpd: ; BROADWELL: # BB#0: ; BROADWELL-NEXT: vorpd %xmm1, %xmm0, %xmm0 # sched: [1:1.00] -; BROADWELL-NEXT: vorpd (%rdi), %xmm0, %xmm0 # sched: [1:1.00] +; BROADWELL-NEXT: vorpd (%rdi), %xmm0, %xmm0 # sched: [6:1.00] ; BROADWELL-NEXT: vaddpd %xmm0, %xmm1, %xmm0 # sched: [3:1.00] -; BROADWELL-NEXT: retq # sched: [2:1.00] +; BROADWELL-NEXT: retq # sched: [7:1.00] ; ; SKYLAKE-LABEL: test_orpd: ; SKYLAKE: # BB#0: @@ -3871,8 +3871,8 @@ define <8 x i16> @test_packssdw(<4 x i32> %a0, <4 x i32> %a1, <4 x i32> *%a2) { ; BROADWELL-LABEL: test_packssdw: ; BROADWELL: # BB#0: ; BROADWELL-NEXT: vpackssdw %xmm1, %xmm0, %xmm0 # sched: [1:1.00] -; BROADWELL-NEXT: vpackssdw (%rdi), %xmm0, %xmm0 # sched: [1:1.00] -; BROADWELL-NEXT: retq # sched: [2:1.00] +; BROADWELL-NEXT: vpackssdw (%rdi), %xmm0, %xmm0 # sched: [6:1.00] +; BROADWELL-NEXT: retq # sched: [7:1.00] ; ; SKYLAKE-LABEL: test_packssdw: ; SKYLAKE: # BB#0: @@ -3943,8 +3943,8 @@ define <16 x i8> @test_packsswb(<8 x i16> %a0, <8 x i16> %a1, <8 x i16> *%a2) { ; BROADWELL-LABEL: test_packsswb: ; BROADWELL: # BB#0: ; BROADWELL-NEXT: vpacksswb %xmm1, %xmm0, %xmm0 # sched: [1:1.00] -; BROADWELL-NEXT: vpacksswb (%rdi), %xmm0, %xmm0 # sched: [1:1.00] -; BROADWELL-NEXT: retq # sched: [2:1.00] +; BROADWELL-NEXT: vpacksswb (%rdi), %xmm0, %xmm0 # sched: [6:1.00] +; BROADWELL-NEXT: retq # sched: [7:1.00] ; ; SKYLAKE-LABEL: test_packsswb: ; SKYLAKE: # BB#0: @@ -4015,8 +4015,8 @@ define <16 x i8> @test_packuswb(<8 x i16> %a0, <8 x i16> %a1, <8 x i16> *%a2) { ; BROADWELL-LABEL: test_packuswb: ; BROADWELL: # BB#0: ; BROADWELL-NEXT: vpackuswb %xmm1, %xmm0, %xmm0 # sched: [1:1.00] -; BROADWELL-NEXT: vpackuswb (%rdi), %xmm0, %xmm0 # sched: [1:1.00] -; BROADWELL-NEXT: retq # sched: [2:1.00] +; BROADWELL-NEXT: vpackuswb (%rdi), %xmm0, %xmm0 # sched: [6:1.00] +; BROADWELL-NEXT: retq # sched: [7:1.00] ; ; SKYLAKE-LABEL: test_packuswb: ; SKYLAKE: # BB#0: @@ -4087,8 +4087,8 @@ define <16 x i8> @test_paddb(<16 x i8> %a0, <16 x i8> %a1, <16 x i8> *%a2) { ; BROADWELL-LABEL: test_paddb: ; BROADWELL: # BB#0: ; BROADWELL-NEXT: vpaddb %xmm1, %xmm0, %xmm0 # sched: [1:0.50] -; BROADWELL-NEXT: vpaddb (%rdi), %xmm0, %xmm0 # sched: [1:0.50] -; BROADWELL-NEXT: retq # sched: [2:1.00] +; BROADWELL-NEXT: vpaddb (%rdi), %xmm0, %xmm0 # sched: [6:0.50] +; BROADWELL-NEXT: retq # sched: [7:1.00] ; ; SKYLAKE-LABEL: test_paddb: ; SKYLAKE: # BB#0: @@ -4157,8 +4157,8 @@ define <4 x i32> @test_paddd(<4 x i32> %a0, <4 x i32> %a1, <4 x i32> *%a2) { ; BROADWELL-LABEL: test_paddd: ; BROADWELL: # BB#0: ; BROADWELL-NEXT: vpaddd %xmm1, %xmm0, %xmm0 # sched: [1:0.50] -; BROADWELL-NEXT: vpaddd (%rdi), %xmm0, %xmm0 # sched: [1:0.50] -; BROADWELL-NEXT: retq # sched: [2:1.00] +; BROADWELL-NEXT: vpaddd (%rdi), %xmm0, %xmm0 # sched: [6:0.50] +; BROADWELL-NEXT: retq # sched: [7:1.00] ; ; SKYLAKE-LABEL: test_paddd: ; SKYLAKE: # BB#0: @@ -4223,8 +4223,8 @@ define <2 x i64> @test_paddq(<2 x i64> %a0, <2 x i64> %a1, <2 x i64> *%a2) { ; BROADWELL-LABEL: test_paddq: ; BROADWELL: # BB#0: ; BROADWELL-NEXT: vpaddq %xmm1, %xmm0, %xmm0 # sched: [1:0.50] -; BROADWELL-NEXT: vpaddq (%rdi), %xmm0, %xmm0 # sched: [1:0.50] -; BROADWELL-NEXT: retq # sched: [2:1.00] +; BROADWELL-NEXT: vpaddq (%rdi), %xmm0, %xmm0 # sched: [6:0.50] +; BROADWELL-NEXT: retq # sched: [7:1.00] ; ; SKYLAKE-LABEL: test_paddq: ; SKYLAKE: # BB#0: @@ -4293,8 +4293,8 @@ define <16 x i8> @test_paddsb(<16 x i8> %a0, <16 x i8> %a1, <16 x i8> *%a2) { ; BROADWELL-LABEL: test_paddsb: ; BROADWELL: # BB#0: ; BROADWELL-NEXT: vpaddsb %xmm1, %xmm0, %xmm0 # sched: [1:0.50] -; BROADWELL-NEXT: vpaddsb (%rdi), %xmm0, %xmm0 # sched: [1:0.50] -; BROADWELL-NEXT: retq # sched: [2:1.00] +; BROADWELL-NEXT: vpaddsb (%rdi), %xmm0, %xmm0 # sched: [6:0.50] +; BROADWELL-NEXT: retq # sched: [7:1.00] ; ; SKYLAKE-LABEL: test_paddsb: ; SKYLAKE: # BB#0: @@ -4364,8 +4364,8 @@ define <8 x i16> @test_paddsw(<8 x i16> %a0, <8 x i16> %a1, <8 x i16> *%a2) { ; BROADWELL-LABEL: test_paddsw: ; BROADWELL: # BB#0: ; BROADWELL-NEXT: vpaddsw %xmm1, %xmm0, %xmm0 # sched: [1:0.50] -; BROADWELL-NEXT: vpaddsw (%rdi), %xmm0, %xmm0 # sched: [1:0.50] -; BROADWELL-NEXT: retq # sched: [2:1.00] +; BROADWELL-NEXT: vpaddsw (%rdi), %xmm0, %xmm0 # sched: [6:0.50] +; BROADWELL-NEXT: retq # sched: [7:1.00] ; ; SKYLAKE-LABEL: test_paddsw: ; SKYLAKE: # BB#0: @@ -4435,8 +4435,8 @@ define <16 x i8> @test_paddusb(<16 x i8> %a0, <16 x i8> %a1, <16 x i8> *%a2) { ; BROADWELL-LABEL: test_paddusb: ; BROADWELL: # BB#0: ; BROADWELL-NEXT: vpaddusb %xmm1, %xmm0, %xmm0 # sched: [1:0.50] -; BROADWELL-NEXT: vpaddusb (%rdi), %xmm0, %xmm0 # sched: [1:0.50] -; BROADWELL-NEXT: retq # sched: [2:1.00] +; BROADWELL-NEXT: vpaddusb (%rdi), %xmm0, %xmm0 # sched: [6:0.50] +; BROADWELL-NEXT: retq # sched: [7:1.00] ; ; SKYLAKE-LABEL: test_paddusb: ; SKYLAKE: # BB#0: @@ -4506,8 +4506,8 @@ define <8 x i16> @test_paddusw(<8 x i16> %a0, <8 x i16> %a1, <8 x i16> *%a2) { ; BROADWELL-LABEL: test_paddusw: ; BROADWELL: # BB#0: ; BROADWELL-NEXT: vpaddusw %xmm1, %xmm0, %xmm0 # sched: [1:0.50] -; BROADWELL-NEXT: vpaddusw (%rdi), %xmm0, %xmm0 # sched: [1:0.50] -; BROADWELL-NEXT: retq # sched: [2:1.00] +; BROADWELL-NEXT: vpaddusw (%rdi), %xmm0, %xmm0 # sched: [6:0.50] +; BROADWELL-NEXT: retq # sched: [7:1.00] ; ; SKYLAKE-LABEL: test_paddusw: ; SKYLAKE: # BB#0: @@ -4577,8 +4577,8 @@ define <8 x i16> @test_paddw(<8 x i16> %a0, <8 x i16> %a1, <8 x i16> *%a2) { ; BROADWELL-LABEL: test_paddw: ; BROADWELL: # BB#0: ; BROADWELL-NEXT: vpaddw %xmm1, %xmm0, %xmm0 # sched: [1:0.50] -; BROADWELL-NEXT: vpaddw (%rdi), %xmm0, %xmm0 # sched: [1:0.50] -; BROADWELL-NEXT: retq # sched: [2:1.00] +; BROADWELL-NEXT: vpaddw (%rdi), %xmm0, %xmm0 # sched: [6:0.50] +; BROADWELL-NEXT: retq # sched: [7:1.00] ; ; SKYLAKE-LABEL: test_paddw: ; SKYLAKE: # BB#0: @@ -4648,9 +4648,9 @@ define <2 x i64> @test_pand(<2 x i64> %a0, <2 x i64> %a1, <2 x i64> *%a2) { ; BROADWELL-LABEL: test_pand: ; BROADWELL: # BB#0: ; BROADWELL-NEXT: vpand %xmm1, %xmm0, %xmm0 # sched: [1:0.33] -; BROADWELL-NEXT: vpand (%rdi), %xmm0, %xmm0 # sched: [1:0.50] +; BROADWELL-NEXT: vpand (%rdi), %xmm0, %xmm0 # sched: [6:0.50] ; BROADWELL-NEXT: vpaddq %xmm1, %xmm0, %xmm0 # sched: [1:0.50] -; BROADWELL-NEXT: retq # sched: [2:1.00] +; BROADWELL-NEXT: retq # sched: [7:1.00] ; ; SKYLAKE-LABEL: test_pand: ; SKYLAKE: # BB#0: @@ -4731,9 +4731,9 @@ define <2 x i64> @test_pandn(<2 x i64> %a0, <2 x i64> %a1, <2 x i64> *%a2) { ; BROADWELL-LABEL: test_pandn: ; BROADWELL: # BB#0: ; BROADWELL-NEXT: vpandn %xmm1, %xmm0, %xmm0 # sched: [1:0.33] -; BROADWELL-NEXT: vpandn (%rdi), %xmm0, %xmm1 # sched: [1:0.50] +; BROADWELL-NEXT: vpandn (%rdi), %xmm0, %xmm1 # sched: [6:0.50] ; BROADWELL-NEXT: vpaddq %xmm1, %xmm0, %xmm0 # sched: [1:0.50] -; BROADWELL-NEXT: retq # sched: [2:1.00] +; BROADWELL-NEXT: retq # sched: [7:1.00] ; ; SKYLAKE-LABEL: test_pandn: ; SKYLAKE: # BB#0: @@ -4809,8 +4809,8 @@ define <16 x i8> @test_pavgb(<16 x i8> %a0, <16 x i8> %a1, <16 x i8> *%a2) { ; BROADWELL-LABEL: test_pavgb: ; BROADWELL: # BB#0: ; BROADWELL-NEXT: vpavgb %xmm1, %xmm0, %xmm0 # sched: [1:0.50] -; BROADWELL-NEXT: vpavgb (%rdi), %xmm0, %xmm0 # sched: [1:0.50] -; BROADWELL-NEXT: retq # sched: [2:1.00] +; BROADWELL-NEXT: vpavgb (%rdi), %xmm0, %xmm0 # sched: [6:0.50] +; BROADWELL-NEXT: retq # sched: [7:1.00] ; ; SKYLAKE-LABEL: test_pavgb: ; SKYLAKE: # BB#0: @@ -4889,8 +4889,8 @@ define <8 x i16> @test_pavgw(<8 x i16> %a0, <8 x i16> %a1, <8 x i16> *%a2) { ; BROADWELL-LABEL: test_pavgw: ; BROADWELL: # BB#0: ; BROADWELL-NEXT: vpavgw %xmm1, %xmm0, %xmm0 # sched: [1:0.50] -; BROADWELL-NEXT: vpavgw (%rdi), %xmm0, %xmm0 # sched: [1:0.50] -; BROADWELL-NEXT: retq # sched: [2:1.00] +; BROADWELL-NEXT: vpavgw (%rdi), %xmm0, %xmm0 # sched: [6:0.50] +; BROADWELL-NEXT: retq # sched: [7:1.00] ; ; SKYLAKE-LABEL: test_pavgw: ; SKYLAKE: # BB#0: @@ -4972,9 +4972,9 @@ define <16 x i8> @test_pcmpeqb(<16 x i8> %a0, <16 x i8> %a1, <16 x i8> *%a2) { ; BROADWELL-LABEL: test_pcmpeqb: ; BROADWELL: # BB#0: ; BROADWELL-NEXT: vpcmpeqb %xmm1, %xmm0, %xmm1 # sched: [1:0.50] -; BROADWELL-NEXT: vpcmpeqb (%rdi), %xmm0, %xmm0 # sched: [1:0.50] +; BROADWELL-NEXT: vpcmpeqb (%rdi), %xmm0, %xmm0 # sched: [6:0.50] ; BROADWELL-NEXT: vpor %xmm0, %xmm1, %xmm0 # sched: [1:0.33] -; BROADWELL-NEXT: retq # sched: [2:1.00] +; BROADWELL-NEXT: retq # sched: [7:1.00] ; ; SKYLAKE-LABEL: test_pcmpeqb: ; SKYLAKE: # BB#0: @@ -5053,9 +5053,9 @@ define <4 x i32> @test_pcmpeqd(<4 x i32> %a0, <4 x i32> %a1, <4 x i32> *%a2) { ; BROADWELL-LABEL: test_pcmpeqd: ; BROADWELL: # BB#0: ; BROADWELL-NEXT: vpcmpeqd %xmm1, %xmm0, %xmm1 # sched: [1:0.50] -; BROADWELL-NEXT: vpcmpeqd (%rdi), %xmm0, %xmm0 # sched: [1:0.50] +; BROADWELL-NEXT: vpcmpeqd (%rdi), %xmm0, %xmm0 # sched: [6:0.50] ; BROADWELL-NEXT: vpor %xmm0, %xmm1, %xmm0 # sched: [1:0.33] -; BROADWELL-NEXT: retq # sched: [2:1.00] +; BROADWELL-NEXT: retq # sched: [7:1.00] ; ; SKYLAKE-LABEL: test_pcmpeqd: ; SKYLAKE: # BB#0: @@ -5134,9 +5134,9 @@ define <8 x i16> @test_pcmpeqw(<8 x i16> %a0, <8 x i16> %a1, <8 x i16> *%a2) { ; BROADWELL-LABEL: test_pcmpeqw: ; BROADWELL: # BB#0: ; BROADWELL-NEXT: vpcmpeqw %xmm1, %xmm0, %xmm1 # sched: [1:0.50] -; BROADWELL-NEXT: vpcmpeqw (%rdi), %xmm0, %xmm0 # sched: [1:0.50] +; BROADWELL-NEXT: vpcmpeqw (%rdi), %xmm0, %xmm0 # sched: [6:0.50] ; BROADWELL-NEXT: vpor %xmm0, %xmm1, %xmm0 # sched: [1:0.33] -; BROADWELL-NEXT: retq # sched: [2:1.00] +; BROADWELL-NEXT: retq # sched: [7:1.00] ; ; SKYLAKE-LABEL: test_pcmpeqw: ; SKYLAKE: # BB#0: @@ -5216,9 +5216,9 @@ define <16 x i8> @test_pcmpgtb(<16 x i8> %a0, <16 x i8> %a1, <16 x i8> *%a2) { ; BROADWELL-LABEL: test_pcmpgtb: ; BROADWELL: # BB#0: ; BROADWELL-NEXT: vpcmpgtb %xmm1, %xmm0, %xmm1 # sched: [1:0.50] -; BROADWELL-NEXT: vpcmpgtb (%rdi), %xmm0, %xmm0 # sched: [1:0.50] +; BROADWELL-NEXT: vpcmpgtb (%rdi), %xmm0, %xmm0 # sched: [6:0.50] ; BROADWELL-NEXT: vpor %xmm0, %xmm1, %xmm0 # sched: [1:0.33] -; BROADWELL-NEXT: retq # sched: [2:1.00] +; BROADWELL-NEXT: retq # sched: [7:1.00] ; ; SKYLAKE-LABEL: test_pcmpgtb: ; SKYLAKE: # BB#0: @@ -5298,9 +5298,9 @@ define <4 x i32> @test_pcmpgtd(<4 x i32> %a0, <4 x i32> %a1, <4 x i32> *%a2) { ; BROADWELL-LABEL: test_pcmpgtd: ; BROADWELL: # BB#0: ; BROADWELL-NEXT: vpcmpgtd %xmm1, %xmm0, %xmm1 # sched: [1:0.50] -; BROADWELL-NEXT: vpcmpeqd (%rdi), %xmm0, %xmm0 # sched: [1:0.50] +; BROADWELL-NEXT: vpcmpeqd (%rdi), %xmm0, %xmm0 # sched: [6:0.50] ; BROADWELL-NEXT: vpor %xmm0, %xmm1, %xmm0 # sched: [1:0.33] -; BROADWELL-NEXT: retq # sched: [2:1.00] +; BROADWELL-NEXT: retq # sched: [7:1.00] ; ; SKYLAKE-LABEL: test_pcmpgtd: ; SKYLAKE: # BB#0: @@ -5380,9 +5380,9 @@ define <8 x i16> @test_pcmpgtw(<8 x i16> %a0, <8 x i16> %a1, <8 x i16> *%a2) { ; BROADWELL-LABEL: test_pcmpgtw: ; BROADWELL: # BB#0: ; BROADWELL-NEXT: vpcmpgtw %xmm1, %xmm0, %xmm1 # sched: [1:0.50] -; BROADWELL-NEXT: vpcmpgtw (%rdi), %xmm0, %xmm0 # sched: [1:0.50] +; BROADWELL-NEXT: vpcmpgtw (%rdi), %xmm0, %xmm0 # sched: [6:0.50] ; BROADWELL-NEXT: vpor %xmm0, %xmm1, %xmm0 # sched: [1:0.33] -; BROADWELL-NEXT: retq # sched: [2:1.00] +; BROADWELL-NEXT: retq # sched: [7:1.00] ; ; SKYLAKE-LABEL: test_pcmpgtw: ; SKYLAKE: # BB#0: @@ -5455,7 +5455,7 @@ define i16 @test_pextrw(<8 x i16> %a0) { ; BROADWELL: # BB#0: ; BROADWELL-NEXT: vpextrw $6, %xmm0, %eax # sched: [2:1.00] ; BROADWELL-NEXT: # kill: %AX %AX %EAX -; BROADWELL-NEXT: retq # sched: [2:1.00] +; BROADWELL-NEXT: retq # sched: [7:1.00] ; ; SKYLAKE-LABEL: test_pextrw: ; SKYLAKE: # BB#0: @@ -5522,8 +5522,8 @@ define <8 x i16> @test_pinsrw(<8 x i16> %a0, i16 %a1, i16 *%a2) { ; BROADWELL-LABEL: test_pinsrw: ; BROADWELL: # BB#0: ; BROADWELL-NEXT: vpinsrw $1, %edi, %xmm0, %xmm0 # sched: [2:2.00] -; BROADWELL-NEXT: vpinsrw $3, (%rsi), %xmm0, %xmm0 # sched: [1:1.00] -; BROADWELL-NEXT: retq # sched: [2:1.00] +; BROADWELL-NEXT: vpinsrw $3, (%rsi), %xmm0, %xmm0 # sched: [6:1.00] +; BROADWELL-NEXT: retq # sched: [7:1.00] ; ; SKYLAKE-LABEL: test_pinsrw: ; SKYLAKE: # BB#0: @@ -5596,8 +5596,8 @@ define <4 x i32> @test_pmaddwd(<8 x i16> %a0, <8 x i16> %a1, <8 x i16> *%a2) { ; BROADWELL-LABEL: test_pmaddwd: ; BROADWELL: # BB#0: ; BROADWELL-NEXT: vpmaddwd %xmm1, %xmm0, %xmm0 # sched: [5:1.00] -; BROADWELL-NEXT: vpmaddwd (%rdi), %xmm0, %xmm0 # sched: [5:1.00] -; BROADWELL-NEXT: retq # sched: [2:1.00] +; BROADWELL-NEXT: vpmaddwd (%rdi), %xmm0, %xmm0 # sched: [10:1.00] +; BROADWELL-NEXT: retq # sched: [7:1.00] ; ; SKYLAKE-LABEL: test_pmaddwd: ; SKYLAKE: # BB#0: @@ -5668,8 +5668,8 @@ define <8 x i16> @test_pmaxsw(<8 x i16> %a0, <8 x i16> %a1, <8 x i16> *%a2) { ; BROADWELL-LABEL: test_pmaxsw: ; BROADWELL: # BB#0: ; BROADWELL-NEXT: vpmaxsw %xmm1, %xmm0, %xmm0 # sched: [1:0.50] -; BROADWELL-NEXT: vpmaxsw (%rdi), %xmm0, %xmm0 # sched: [1:0.50] -; BROADWELL-NEXT: retq # sched: [2:1.00] +; BROADWELL-NEXT: vpmaxsw (%rdi), %xmm0, %xmm0 # sched: [6:0.50] +; BROADWELL-NEXT: retq # sched: [7:1.00] ; ; SKYLAKE-LABEL: test_pmaxsw: ; SKYLAKE: # BB#0: @@ -5739,8 +5739,8 @@ define <16 x i8> @test_pmaxub(<16 x i8> %a0, <16 x i8> %a1, <16 x i8> *%a2) { ; BROADWELL-LABEL: test_pmaxub: ; BROADWELL: # BB#0: ; BROADWELL-NEXT: vpmaxub %xmm1, %xmm0, %xmm0 # sched: [1:0.50] -; BROADWELL-NEXT: vpmaxub (%rdi), %xmm0, %xmm0 # sched: [1:0.50] -; BROADWELL-NEXT: retq # sched: [2:1.00] +; BROADWELL-NEXT: vpmaxub (%rdi), %xmm0, %xmm0 # sched: [6:0.50] +; BROADWELL-NEXT: retq # sched: [7:1.00] ; ; SKYLAKE-LABEL: test_pmaxub: ; SKYLAKE: # BB#0: @@ -5810,8 +5810,8 @@ define <8 x i16> @test_pminsw(<8 x i16> %a0, <8 x i16> %a1, <8 x i16> *%a2) { ; BROADWELL-LABEL: test_pminsw: ; BROADWELL: # BB#0: ; BROADWELL-NEXT: vpminsw %xmm1, %xmm0, %xmm0 # sched: [1:0.50] -; BROADWELL-NEXT: vpminsw (%rdi), %xmm0, %xmm0 # sched: [1:0.50] -; BROADWELL-NEXT: retq # sched: [2:1.00] +; BROADWELL-NEXT: vpminsw (%rdi), %xmm0, %xmm0 # sched: [6:0.50] +; BROADWELL-NEXT: retq # sched: [7:1.00] ; ; SKYLAKE-LABEL: test_pminsw: ; SKYLAKE: # BB#0: @@ -5881,8 +5881,8 @@ define <16 x i8> @test_pminub(<16 x i8> %a0, <16 x i8> %a1, <16 x i8> *%a2) { ; BROADWELL-LABEL: test_pminub: ; BROADWELL: # BB#0: ; BROADWELL-NEXT: vpminub %xmm1, %xmm0, %xmm0 # sched: [1:0.50] -; BROADWELL-NEXT: vpminub (%rdi), %xmm0, %xmm0 # sched: [1:0.50] -; BROADWELL-NEXT: retq # sched: [2:1.00] +; BROADWELL-NEXT: vpminub (%rdi), %xmm0, %xmm0 # sched: [6:0.50] +; BROADWELL-NEXT: retq # sched: [7:1.00] ; ; SKYLAKE-LABEL: test_pminub: ; SKYLAKE: # BB#0: @@ -5945,7 +5945,7 @@ define i32 @test_pmovmskb(<16 x i8> %a0) { ; BROADWELL-LABEL: test_pmovmskb: ; BROADWELL: # BB#0: ; BROADWELL-NEXT: vpmovmskb %xmm0, %eax # sched: [3:1.00] -; BROADWELL-NEXT: retq # sched: [2:1.00] +; BROADWELL-NEXT: retq # sched: [7:1.00] ; ; SKYLAKE-LABEL: test_pmovmskb: ; SKYLAKE: # BB#0: @@ -6005,8 +6005,8 @@ define <8 x i16> @test_pmulhuw(<8 x i16> %a0, <8 x i16> %a1, <8 x i16> *%a2) { ; BROADWELL-LABEL: test_pmulhuw: ; BROADWELL: # BB#0: ; BROADWELL-NEXT: vpmulhuw %xmm1, %xmm0, %xmm0 # sched: [5:1.00] -; BROADWELL-NEXT: vpmulhuw (%rdi), %xmm0, %xmm0 # sched: [5:1.00] -; BROADWELL-NEXT: retq # sched: [2:1.00] +; BROADWELL-NEXT: vpmulhuw (%rdi), %xmm0, %xmm0 # sched: [10:1.00] +; BROADWELL-NEXT: retq # sched: [7:1.00] ; ; SKYLAKE-LABEL: test_pmulhuw: ; SKYLAKE: # BB#0: @@ -6072,8 +6072,8 @@ define <8 x i16> @test_pmulhw(<8 x i16> %a0, <8 x i16> %a1, <8 x i16> *%a2) { ; BROADWELL-LABEL: test_pmulhw: ; BROADWELL: # BB#0: ; BROADWELL-NEXT: vpmulhw %xmm1, %xmm0, %xmm0 # sched: [5:1.00] -; BROADWELL-NEXT: vpmulhw (%rdi), %xmm0, %xmm0 # sched: [5:1.00] -; BROADWELL-NEXT: retq # sched: [2:1.00] +; BROADWELL-NEXT: vpmulhw (%rdi), %xmm0, %xmm0 # sched: [10:1.00] +; BROADWELL-NEXT: retq # sched: [7:1.00] ; ; SKYLAKE-LABEL: test_pmulhw: ; SKYLAKE: # BB#0: @@ -6139,8 +6139,8 @@ define <8 x i16> @test_pmullw(<8 x i16> %a0, <8 x i16> %a1, <8 x i16> *%a2) { ; BROADWELL-LABEL: test_pmullw: ; BROADWELL: # BB#0: ; BROADWELL-NEXT: vpmullw %xmm1, %xmm0, %xmm0 # sched: [5:1.00] -; BROADWELL-NEXT: vpmullw (%rdi), %xmm0, %xmm0 # sched: [5:1.00] -; BROADWELL-NEXT: retq # sched: [2:1.00] +; BROADWELL-NEXT: vpmullw (%rdi), %xmm0, %xmm0 # sched: [10:1.00] +; BROADWELL-NEXT: retq # sched: [7:1.00] ; ; SKYLAKE-LABEL: test_pmullw: ; SKYLAKE: # BB#0: @@ -6213,8 +6213,8 @@ define <2 x i64> @test_pmuludq(<4 x i32> %a0, <4 x i32> %a1, <4 x i32> *%a2) { ; BROADWELL-LABEL: test_pmuludq: ; BROADWELL: # BB#0: ; BROADWELL-NEXT: vpmuludq %xmm1, %xmm0, %xmm0 # sched: [5:1.00] -; BROADWELL-NEXT: vpmuludq (%rdi), %xmm0, %xmm0 # sched: [5:1.00] -; BROADWELL-NEXT: retq # sched: [2:1.00] +; BROADWELL-NEXT: vpmuludq (%rdi), %xmm0, %xmm0 # sched: [10:1.00] +; BROADWELL-NEXT: retq # sched: [7:1.00] ; ; SKYLAKE-LABEL: test_pmuludq: ; SKYLAKE: # BB#0: @@ -6286,9 +6286,9 @@ define <2 x i64> @test_por(<2 x i64> %a0, <2 x i64> %a1, <2 x i64> *%a2) { ; BROADWELL-LABEL: test_por: ; BROADWELL: # BB#0: ; BROADWELL-NEXT: vpor %xmm1, %xmm0, %xmm0 # sched: [1:0.33] -; BROADWELL-NEXT: vpor (%rdi), %xmm0, %xmm0 # sched: [1:0.50] +; BROADWELL-NEXT: vpor (%rdi), %xmm0, %xmm0 # sched: [6:0.50] ; BROADWELL-NEXT: vpaddq %xmm1, %xmm0, %xmm0 # sched: [1:0.50] -; BROADWELL-NEXT: retq # sched: [2:1.00] +; BROADWELL-NEXT: retq # sched: [7:1.00] ; ; SKYLAKE-LABEL: test_por: ; SKYLAKE: # BB#0: @@ -6366,8 +6366,8 @@ define <2 x i64> @test_psadbw(<16 x i8> %a0, <16 x i8> %a1, <16 x i8> *%a2) { ; BROADWELL-LABEL: test_psadbw: ; BROADWELL: # BB#0: ; BROADWELL-NEXT: vpsadbw %xmm1, %xmm0, %xmm0 # sched: [5:1.00] -; BROADWELL-NEXT: vpsadbw (%rdi), %xmm0, %xmm0 # sched: [5:1.00] -; BROADWELL-NEXT: retq # sched: [2:1.00] +; BROADWELL-NEXT: vpsadbw (%rdi), %xmm0, %xmm0 # sched: [10:1.00] +; BROADWELL-NEXT: retq # sched: [7:1.00] ; ; SKYLAKE-LABEL: test_psadbw: ; SKYLAKE: # BB#0: @@ -6441,9 +6441,9 @@ define <4 x i32> @test_pshufd(<4 x i32> %a0, <4 x i32> *%a1) { ; BROADWELL-LABEL: test_pshufd: ; BROADWELL: # BB#0: ; BROADWELL-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[1,0,3,2] sched: [1:1.00] -; BROADWELL-NEXT: vpshufd {{.*#+}} xmm1 = mem[3,2,1,0] sched: [1:1.00] +; BROADWELL-NEXT: vpshufd {{.*#+}} xmm1 = mem[3,2,1,0] sched: [6:1.00] ; BROADWELL-NEXT: vpaddd %xmm1, %xmm0, %xmm0 # sched: [1:0.50] -; BROADWELL-NEXT: retq # sched: [2:1.00] +; BROADWELL-NEXT: retq # sched: [7:1.00] ; ; SKYLAKE-LABEL: test_pshufd: ; SKYLAKE: # BB#0: @@ -6520,9 +6520,9 @@ define <8 x i16> @test_pshufhw(<8 x i16> %a0, <8 x i16> *%a1) { ; BROADWELL-LABEL: test_pshufhw: ; BROADWELL: # BB#0: ; BROADWELL-NEXT: vpshufhw {{.*#+}} xmm0 = xmm0[0,1,2,3,5,4,7,6] sched: [1:1.00] -; BROADWELL-NEXT: vpshufhw {{.*#+}} xmm1 = mem[0,1,2,3,7,6,5,4] sched: [1:1.00] +; BROADWELL-NEXT: vpshufhw {{.*#+}} xmm1 = mem[0,1,2,3,7,6,5,4] sched: [6:1.00] ; BROADWELL-NEXT: vpaddw %xmm1, %xmm0, %xmm0 # sched: [1:0.50] -; BROADWELL-NEXT: retq # sched: [2:1.00] +; BROADWELL-NEXT: retq # sched: [7:1.00] ; ; SKYLAKE-LABEL: test_pshufhw: ; SKYLAKE: # BB#0: @@ -6599,9 +6599,9 @@ define <8 x i16> @test_pshuflw(<8 x i16> %a0, <8 x i16> *%a1) { ; BROADWELL-LABEL: test_pshuflw: ; BROADWELL: # BB#0: ; BROADWELL-NEXT: vpshuflw {{.*#+}} xmm0 = xmm0[1,0,3,2,4,5,6,7] sched: [1:1.00] -; BROADWELL-NEXT: vpshuflw {{.*#+}} xmm1 = mem[3,2,1,0,4,5,6,7] sched: [1:1.00] +; BROADWELL-NEXT: vpshuflw {{.*#+}} xmm1 = mem[3,2,1,0,4,5,6,7] sched: [6:1.00] ; BROADWELL-NEXT: vpaddw %xmm1, %xmm0, %xmm0 # sched: [1:0.50] -; BROADWELL-NEXT: retq # sched: [2:1.00] +; BROADWELL-NEXT: retq # sched: [7:1.00] ; ; SKYLAKE-LABEL: test_pshuflw: ; SKYLAKE: # BB#0: @@ -6676,9 +6676,9 @@ define <4 x i32> @test_pslld(<4 x i32> %a0, <4 x i32> %a1, <4 x i32> *%a2) { ; BROADWELL-LABEL: test_pslld: ; BROADWELL: # BB#0: ; BROADWELL-NEXT: vpslld %xmm1, %xmm0, %xmm0 # sched: [2:1.00] -; BROADWELL-NEXT: vpslld (%rdi), %xmm0, %xmm0 # sched: [2:1.00] +; BROADWELL-NEXT: vpslld (%rdi), %xmm0, %xmm0 # sched: [7:1.00] ; BROADWELL-NEXT: vpslld $2, %xmm0, %xmm0 # sched: [1:1.00] -; BROADWELL-NEXT: retq # sched: [2:1.00] +; BROADWELL-NEXT: retq # sched: [7:1.00] ; ; SKYLAKE-LABEL: test_pslld: ; SKYLAKE: # BB#0: @@ -6751,7 +6751,7 @@ define <4 x i32> @test_pslldq(<4 x i32> %a0) { ; BROADWELL-LABEL: test_pslldq: ; BROADWELL: # BB#0: ; BROADWELL-NEXT: vpslldq {{.*#+}} xmm0 = zero,zero,zero,zero,xmm0[0,1,2,3,4,5,6,7,8,9,10,11] sched: [1:1.00] -; BROADWELL-NEXT: retq # sched: [2:1.00] +; BROADWELL-NEXT: retq # sched: [7:1.00] ; ; SKYLAKE-LABEL: test_pslldq: ; SKYLAKE: # BB#0: @@ -6815,9 +6815,9 @@ define <2 x i64> @test_psllq(<2 x i64> %a0, <2 x i64> %a1, <2 x i64> *%a2) { ; BROADWELL-LABEL: test_psllq: ; BROADWELL: # BB#0: ; BROADWELL-NEXT: vpsllq %xmm1, %xmm0, %xmm0 # sched: [2:1.00] -; BROADWELL-NEXT: vpsllq (%rdi), %xmm0, %xmm0 # sched: [2:1.00] +; BROADWELL-NEXT: vpsllq (%rdi), %xmm0, %xmm0 # sched: [7:1.00] ; BROADWELL-NEXT: vpsllq $2, %xmm0, %xmm0 # sched: [1:1.00] -; BROADWELL-NEXT: retq # sched: [2:1.00] +; BROADWELL-NEXT: retq # sched: [7:1.00] ; ; SKYLAKE-LABEL: test_psllq: ; SKYLAKE: # BB#0: @@ -6894,9 +6894,9 @@ define <8 x i16> @test_psllw(<8 x i16> %a0, <8 x i16> %a1, <8 x i16> *%a2) { ; BROADWELL-LABEL: test_psllw: ; BROADWELL: # BB#0: ; BROADWELL-NEXT: vpsllw %xmm1, %xmm0, %xmm0 # sched: [2:1.00] -; BROADWELL-NEXT: vpsllw (%rdi), %xmm0, %xmm0 # sched: [2:1.00] +; BROADWELL-NEXT: vpsllw (%rdi), %xmm0, %xmm0 # sched: [7:1.00] ; BROADWELL-NEXT: vpsllw $2, %xmm0, %xmm0 # sched: [1:1.00] -; BROADWELL-NEXT: retq # sched: [2:1.00] +; BROADWELL-NEXT: retq # sched: [7:1.00] ; ; SKYLAKE-LABEL: test_psllw: ; SKYLAKE: # BB#0: @@ -6973,9 +6973,9 @@ define <4 x i32> @test_psrad(<4 x i32> %a0, <4 x i32> %a1, <4 x i32> *%a2) { ; BROADWELL-LABEL: test_psrad: ; BROADWELL: # BB#0: ; BROADWELL-NEXT: vpsrad %xmm1, %xmm0, %xmm0 # sched: [2:1.00] -; BROADWELL-NEXT: vpsrad (%rdi), %xmm0, %xmm0 # sched: [2:1.00] +; BROADWELL-NEXT: vpsrad (%rdi), %xmm0, %xmm0 # sched: [7:1.00] ; BROADWELL-NEXT: vpsrad $2, %xmm0, %xmm0 # sched: [1:1.00] -; BROADWELL-NEXT: retq # sched: [2:1.00] +; BROADWELL-NEXT: retq # sched: [7:1.00] ; ; SKYLAKE-LABEL: test_psrad: ; SKYLAKE: # BB#0: @@ -7052,9 +7052,9 @@ define <8 x i16> @test_psraw(<8 x i16> %a0, <8 x i16> %a1, <8 x i16> *%a2) { ; BROADWELL-LABEL: test_psraw: ; BROADWELL: # BB#0: ; BROADWELL-NEXT: vpsraw %xmm1, %xmm0, %xmm0 # sched: [2:1.00] -; BROADWELL-NEXT: vpsraw (%rdi), %xmm0, %xmm0 # sched: [2:1.00] +; BROADWELL-NEXT: vpsraw (%rdi), %xmm0, %xmm0 # sched: [7:1.00] ; BROADWELL-NEXT: vpsraw $2, %xmm0, %xmm0 # sched: [1:1.00] -; BROADWELL-NEXT: retq # sched: [2:1.00] +; BROADWELL-NEXT: retq # sched: [7:1.00] ; ; SKYLAKE-LABEL: test_psraw: ; SKYLAKE: # BB#0: @@ -7131,9 +7131,9 @@ define <4 x i32> @test_psrld(<4 x i32> %a0, <4 x i32> %a1, <4 x i32> *%a2) { ; BROADWELL-LABEL: test_psrld: ; BROADWELL: # BB#0: ; BROADWELL-NEXT: vpsrld %xmm1, %xmm0, %xmm0 # sched: [2:1.00] -; BROADWELL-NEXT: vpsrld (%rdi), %xmm0, %xmm0 # sched: [2:1.00] +; BROADWELL-NEXT: vpsrld (%rdi), %xmm0, %xmm0 # sched: [7:1.00] ; BROADWELL-NEXT: vpsrld $2, %xmm0, %xmm0 # sched: [1:1.00] -; BROADWELL-NEXT: retq # sched: [2:1.00] +; BROADWELL-NEXT: retq # sched: [7:1.00] ; ; SKYLAKE-LABEL: test_psrld: ; SKYLAKE: # BB#0: @@ -7206,7 +7206,7 @@ define <4 x i32> @test_psrldq(<4 x i32> %a0) { ; BROADWELL-LABEL: test_psrldq: ; BROADWELL: # BB#0: ; BROADWELL-NEXT: vpsrldq {{.*#+}} xmm0 = xmm0[4,5,6,7,8,9,10,11,12,13,14,15],zero,zero,zero,zero sched: [1:1.00] -; BROADWELL-NEXT: retq # sched: [2:1.00] +; BROADWELL-NEXT: retq # sched: [7:1.00] ; ; SKYLAKE-LABEL: test_psrldq: ; SKYLAKE: # BB#0: @@ -7270,9 +7270,9 @@ define <2 x i64> @test_psrlq(<2 x i64> %a0, <2 x i64> %a1, <2 x i64> *%a2) { ; BROADWELL-LABEL: test_psrlq: ; BROADWELL: # BB#0: ; BROADWELL-NEXT: vpsrlq %xmm1, %xmm0, %xmm0 # sched: [2:1.00] -; BROADWELL-NEXT: vpsrlq (%rdi), %xmm0, %xmm0 # sched: [2:1.00] +; BROADWELL-NEXT: vpsrlq (%rdi), %xmm0, %xmm0 # sched: [7:1.00] ; BROADWELL-NEXT: vpsrlq $2, %xmm0, %xmm0 # sched: [1:1.00] -; BROADWELL-NEXT: retq # sched: [2:1.00] +; BROADWELL-NEXT: retq # sched: [7:1.00] ; ; SKYLAKE-LABEL: test_psrlq: ; SKYLAKE: # BB#0: @@ -7349,9 +7349,9 @@ define <8 x i16> @test_psrlw(<8 x i16> %a0, <8 x i16> %a1, <8 x i16> *%a2) { ; BROADWELL-LABEL: test_psrlw: ; BROADWELL: # BB#0: ; BROADWELL-NEXT: vpsrlw %xmm1, %xmm0, %xmm0 # sched: [2:1.00] -; BROADWELL-NEXT: vpsrlw (%rdi), %xmm0, %xmm0 # sched: [2:1.00] +; BROADWELL-NEXT: vpsrlw (%rdi), %xmm0, %xmm0 # sched: [7:1.00] ; BROADWELL-NEXT: vpsrlw $2, %xmm0, %xmm0 # sched: [1:1.00] -; BROADWELL-NEXT: retq # sched: [2:1.00] +; BROADWELL-NEXT: retq # sched: [7:1.00] ; ; SKYLAKE-LABEL: test_psrlw: ; SKYLAKE: # BB#0: @@ -7427,8 +7427,8 @@ define <16 x i8> @test_psubb(<16 x i8> %a0, <16 x i8> %a1, <16 x i8> *%a2) { ; BROADWELL-LABEL: test_psubb: ; BROADWELL: # BB#0: ; BROADWELL-NEXT: vpsubb %xmm1, %xmm0, %xmm0 # sched: [1:0.50] -; BROADWELL-NEXT: vpsubb (%rdi), %xmm0, %xmm0 # sched: [1:0.50] -; BROADWELL-NEXT: retq # sched: [2:1.00] +; BROADWELL-NEXT: vpsubb (%rdi), %xmm0, %xmm0 # sched: [6:0.50] +; BROADWELL-NEXT: retq # sched: [7:1.00] ; ; SKYLAKE-LABEL: test_psubb: ; SKYLAKE: # BB#0: @@ -7497,8 +7497,8 @@ define <4 x i32> @test_psubd(<4 x i32> %a0, <4 x i32> %a1, <4 x i32> *%a2) { ; BROADWELL-LABEL: test_psubd: ; BROADWELL: # BB#0: ; BROADWELL-NEXT: vpsubd %xmm1, %xmm0, %xmm0 # sched: [1:0.50] -; BROADWELL-NEXT: vpsubd (%rdi), %xmm0, %xmm0 # sched: [1:0.50] -; BROADWELL-NEXT: retq # sched: [2:1.00] +; BROADWELL-NEXT: vpsubd (%rdi), %xmm0, %xmm0 # sched: [6:0.50] +; BROADWELL-NEXT: retq # sched: [7:1.00] ; ; SKYLAKE-LABEL: test_psubd: ; SKYLAKE: # BB#0: @@ -7563,8 +7563,8 @@ define <2 x i64> @test_psubq(<2 x i64> %a0, <2 x i64> %a1, <2 x i64> *%a2) { ; BROADWELL-LABEL: test_psubq: ; BROADWELL: # BB#0: ; BROADWELL-NEXT: vpsubq %xmm1, %xmm0, %xmm0 # sched: [1:0.50] -; BROADWELL-NEXT: vpsubq (%rdi), %xmm0, %xmm0 # sched: [1:0.50] -; BROADWELL-NEXT: retq # sched: [2:1.00] +; BROADWELL-NEXT: vpsubq (%rdi), %xmm0, %xmm0 # sched: [6:0.50] +; BROADWELL-NEXT: retq # sched: [7:1.00] ; ; SKYLAKE-LABEL: test_psubq: ; SKYLAKE: # BB#0: @@ -7633,8 +7633,8 @@ define <16 x i8> @test_psubsb(<16 x i8> %a0, <16 x i8> %a1, <16 x i8> *%a2) { ; BROADWELL-LABEL: test_psubsb: ; BROADWELL: # BB#0: ; BROADWELL-NEXT: vpsubsb %xmm1, %xmm0, %xmm0 # sched: [1:0.50] -; BROADWELL-NEXT: vpsubsb (%rdi), %xmm0, %xmm0 # sched: [1:0.50] -; BROADWELL-NEXT: retq # sched: [2:1.00] +; BROADWELL-NEXT: vpsubsb (%rdi), %xmm0, %xmm0 # sched: [6:0.50] +; BROADWELL-NEXT: retq # sched: [7:1.00] ; ; SKYLAKE-LABEL: test_psubsb: ; SKYLAKE: # BB#0: @@ -7704,8 +7704,8 @@ define <8 x i16> @test_psubsw(<8 x i16> %a0, <8 x i16> %a1, <8 x i16> *%a2) { ; BROADWELL-LABEL: test_psubsw: ; BROADWELL: # BB#0: ; BROADWELL-NEXT: vpsubsw %xmm1, %xmm0, %xmm0 # sched: [1:0.50] -; BROADWELL-NEXT: vpsubsw (%rdi), %xmm0, %xmm0 # sched: [1:0.50] -; BROADWELL-NEXT: retq # sched: [2:1.00] +; BROADWELL-NEXT: vpsubsw (%rdi), %xmm0, %xmm0 # sched: [6:0.50] +; BROADWELL-NEXT: retq # sched: [7:1.00] ; ; SKYLAKE-LABEL: test_psubsw: ; SKYLAKE: # BB#0: @@ -7775,8 +7775,8 @@ define <16 x i8> @test_psubusb(<16 x i8> %a0, <16 x i8> %a1, <16 x i8> *%a2) { ; BROADWELL-LABEL: test_psubusb: ; BROADWELL: # BB#0: ; BROADWELL-NEXT: vpsubusb %xmm1, %xmm0, %xmm0 # sched: [1:0.50] -; BROADWELL-NEXT: vpsubusb (%rdi), %xmm0, %xmm0 # sched: [1:0.50] -; BROADWELL-NEXT: retq # sched: [2:1.00] +; BROADWELL-NEXT: vpsubusb (%rdi), %xmm0, %xmm0 # sched: [6:0.50] +; BROADWELL-NEXT: retq # sched: [7:1.00] ; ; SKYLAKE-LABEL: test_psubusb: ; SKYLAKE: # BB#0: @@ -7846,8 +7846,8 @@ define <8 x i16> @test_psubusw(<8 x i16> %a0, <8 x i16> %a1, <8 x i16> *%a2) { ; BROADWELL-LABEL: test_psubusw: ; BROADWELL: # BB#0: ; BROADWELL-NEXT: vpsubusw %xmm1, %xmm0, %xmm0 # sched: [1:0.50] -; BROADWELL-NEXT: vpsubusw (%rdi), %xmm0, %xmm0 # sched: [1:0.50] -; BROADWELL-NEXT: retq # sched: [2:1.00] +; BROADWELL-NEXT: vpsubusw (%rdi), %xmm0, %xmm0 # sched: [6:0.50] +; BROADWELL-NEXT: retq # sched: [7:1.00] ; ; SKYLAKE-LABEL: test_psubusw: ; SKYLAKE: # BB#0: @@ -7917,8 +7917,8 @@ define <8 x i16> @test_psubw(<8 x i16> %a0, <8 x i16> %a1, <8 x i16> *%a2) { ; BROADWELL-LABEL: test_psubw: ; BROADWELL: # BB#0: ; BROADWELL-NEXT: vpsubw %xmm1, %xmm0, %xmm0 # sched: [1:0.50] -; BROADWELL-NEXT: vpsubw (%rdi), %xmm0, %xmm0 # sched: [1:0.50] -; BROADWELL-NEXT: retq # sched: [2:1.00] +; BROADWELL-NEXT: vpsubw (%rdi), %xmm0, %xmm0 # sched: [6:0.50] +; BROADWELL-NEXT: retq # sched: [7:1.00] ; ; SKYLAKE-LABEL: test_psubw: ; SKYLAKE: # BB#0: @@ -7987,8 +7987,8 @@ define <16 x i8> @test_punpckhbw(<16 x i8> %a0, <16 x i8> %a1, <16 x i8> *%a2) { ; BROADWELL-LABEL: test_punpckhbw: ; BROADWELL: # BB#0: ; BROADWELL-NEXT: vpunpckhbw {{.*#+}} xmm0 = xmm0[8],xmm1[8],xmm0[9],xmm1[9],xmm0[10],xmm1[10],xmm0[11],xmm1[11],xmm0[12],xmm1[12],xmm0[13],xmm1[13],xmm0[14],xmm1[14],xmm0[15],xmm1[15] sched: [1:1.00] -; BROADWELL-NEXT: vpunpckhbw {{.*#+}} xmm0 = xmm0[8],mem[8],xmm0[9],mem[9],xmm0[10],mem[10],xmm0[11],mem[11],xmm0[12],mem[12],xmm0[13],mem[13],xmm0[14],mem[14],xmm0[15],mem[15] sched: [1:1.00] -; BROADWELL-NEXT: retq # sched: [2:1.00] +; BROADWELL-NEXT: vpunpckhbw {{.*#+}} xmm0 = xmm0[8],mem[8],xmm0[9],mem[9],xmm0[10],mem[10],xmm0[11],mem[11],xmm0[12],mem[12],xmm0[13],mem[13],xmm0[14],mem[14],xmm0[15],mem[15] sched: [6:1.00] +; BROADWELL-NEXT: retq # sched: [7:1.00] ; ; SKYLAKE-LABEL: test_punpckhbw: ; SKYLAKE: # BB#0: @@ -8060,9 +8060,9 @@ define <4 x i32> @test_punpckhdq(<4 x i32> %a0, <4 x i32> %a1, <4 x i32> *%a2) { ; BROADWELL-LABEL: test_punpckhdq: ; BROADWELL: # BB#0: ; BROADWELL-NEXT: vpunpckhdq {{.*#+}} xmm0 = xmm0[2],xmm1[2],xmm0[3],xmm1[3] sched: [1:1.00] -; BROADWELL-NEXT: vpunpckhdq {{.*#+}} xmm1 = xmm1[2],mem[2],xmm1[3],mem[3] sched: [1:1.00] +; BROADWELL-NEXT: vpunpckhdq {{.*#+}} xmm1 = xmm1[2],mem[2],xmm1[3],mem[3] sched: [6:1.00] ; BROADWELL-NEXT: vpaddd %xmm1, %xmm0, %xmm0 # sched: [1:0.50] -; BROADWELL-NEXT: retq # sched: [2:1.00] +; BROADWELL-NEXT: retq # sched: [7:1.00] ; ; SKYLAKE-LABEL: test_punpckhdq: ; SKYLAKE: # BB#0: @@ -8137,9 +8137,9 @@ define <2 x i64> @test_punpckhqdq(<2 x i64> %a0, <2 x i64> %a1, <2 x i64> *%a2) ; BROADWELL-LABEL: test_punpckhqdq: ; BROADWELL: # BB#0: ; BROADWELL-NEXT: vpunpckhqdq {{.*#+}} xmm0 = xmm0[1],xmm1[1] sched: [1:1.00] -; BROADWELL-NEXT: vpunpckhqdq {{.*#+}} xmm1 = xmm1[1],mem[1] sched: [1:1.00] +; BROADWELL-NEXT: vpunpckhqdq {{.*#+}} xmm1 = xmm1[1],mem[1] sched: [6:1.00] ; BROADWELL-NEXT: vpaddq %xmm1, %xmm0, %xmm0 # sched: [1:0.50] -; BROADWELL-NEXT: retq # sched: [2:1.00] +; BROADWELL-NEXT: retq # sched: [7:1.00] ; ; SKYLAKE-LABEL: test_punpckhqdq: ; SKYLAKE: # BB#0: @@ -8213,8 +8213,8 @@ define <8 x i16> @test_punpckhwd(<8 x i16> %a0, <8 x i16> %a1, <8 x i16> *%a2) { ; BROADWELL-LABEL: test_punpckhwd: ; BROADWELL: # BB#0: ; BROADWELL-NEXT: vpunpckhwd {{.*#+}} xmm0 = xmm0[4],xmm1[4],xmm0[5],xmm1[5],xmm0[6],xmm1[6],xmm0[7],xmm1[7] sched: [1:1.00] -; BROADWELL-NEXT: vpunpckhwd {{.*#+}} xmm0 = xmm0[4],mem[4],xmm0[5],mem[5],xmm0[6],mem[6],xmm0[7],mem[7] sched: [1:1.00] -; BROADWELL-NEXT: retq # sched: [2:1.00] +; BROADWELL-NEXT: vpunpckhwd {{.*#+}} xmm0 = xmm0[4],mem[4],xmm0[5],mem[5],xmm0[6],mem[6],xmm0[7],mem[7] sched: [6:1.00] +; BROADWELL-NEXT: retq # sched: [7:1.00] ; ; SKYLAKE-LABEL: test_punpckhwd: ; SKYLAKE: # BB#0: @@ -8283,8 +8283,8 @@ define <16 x i8> @test_punpcklbw(<16 x i8> %a0, <16 x i8> %a1, <16 x i8> *%a2) { ; BROADWELL-LABEL: test_punpcklbw: ; BROADWELL: # BB#0: ; BROADWELL-NEXT: vpunpcklbw {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3],xmm0[4],xmm1[4],xmm0[5],xmm1[5],xmm0[6],xmm1[6],xmm0[7],xmm1[7] sched: [1:1.00] -; BROADWELL-NEXT: vpunpcklbw {{.*#+}} xmm0 = xmm0[0],mem[0],xmm0[1],mem[1],xmm0[2],mem[2],xmm0[3],mem[3],xmm0[4],mem[4],xmm0[5],mem[5],xmm0[6],mem[6],xmm0[7],mem[7] sched: [1:1.00] -; BROADWELL-NEXT: retq # sched: [2:1.00] +; BROADWELL-NEXT: vpunpcklbw {{.*#+}} xmm0 = xmm0[0],mem[0],xmm0[1],mem[1],xmm0[2],mem[2],xmm0[3],mem[3],xmm0[4],mem[4],xmm0[5],mem[5],xmm0[6],mem[6],xmm0[7],mem[7] sched: [6:1.00] +; BROADWELL-NEXT: retq # sched: [7:1.00] ; ; SKYLAKE-LABEL: test_punpcklbw: ; SKYLAKE: # BB#0: @@ -8356,9 +8356,9 @@ define <4 x i32> @test_punpckldq(<4 x i32> %a0, <4 x i32> %a1, <4 x i32> *%a2) { ; BROADWELL-LABEL: test_punpckldq: ; BROADWELL: # BB#0: ; BROADWELL-NEXT: vpunpckldq {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1] sched: [1:1.00] -; BROADWELL-NEXT: vpunpckldq {{.*#+}} xmm1 = xmm1[0],mem[0],xmm1[1],mem[1] sched: [1:1.00] +; BROADWELL-NEXT: vpunpckldq {{.*#+}} xmm1 = xmm1[0],mem[0],xmm1[1],mem[1] sched: [6:1.00] ; BROADWELL-NEXT: vpaddd %xmm1, %xmm0, %xmm0 # sched: [1:0.50] -; BROADWELL-NEXT: retq # sched: [2:1.00] +; BROADWELL-NEXT: retq # sched: [7:1.00] ; ; SKYLAKE-LABEL: test_punpckldq: ; SKYLAKE: # BB#0: @@ -8433,9 +8433,9 @@ define <2 x i64> @test_punpcklqdq(<2 x i64> %a0, <2 x i64> %a1, <2 x i64> *%a2) ; BROADWELL-LABEL: test_punpcklqdq: ; BROADWELL: # BB#0: ; BROADWELL-NEXT: vpunpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0] sched: [1:1.00] -; BROADWELL-NEXT: vpunpcklqdq {{.*#+}} xmm1 = xmm1[0],mem[0] sched: [1:1.00] +; BROADWELL-NEXT: vpunpcklqdq {{.*#+}} xmm1 = xmm1[0],mem[0] sched: [6:1.00] ; BROADWELL-NEXT: vpaddq %xmm1, %xmm0, %xmm0 # sched: [1:0.50] -; BROADWELL-NEXT: retq # sched: [2:1.00] +; BROADWELL-NEXT: retq # sched: [7:1.00] ; ; SKYLAKE-LABEL: test_punpcklqdq: ; SKYLAKE: # BB#0: @@ -8509,8 +8509,8 @@ define <8 x i16> @test_punpcklwd(<8 x i16> %a0, <8 x i16> %a1, <8 x i16> *%a2) { ; BROADWELL-LABEL: test_punpcklwd: ; BROADWELL: # BB#0: ; BROADWELL-NEXT: vpunpcklwd {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3] sched: [1:1.00] -; BROADWELL-NEXT: vpunpcklwd {{.*#+}} xmm0 = xmm0[0],mem[0],xmm0[1],mem[1],xmm0[2],mem[2],xmm0[3],mem[3] sched: [1:1.00] -; BROADWELL-NEXT: retq # sched: [2:1.00] +; BROADWELL-NEXT: vpunpcklwd {{.*#+}} xmm0 = xmm0[0],mem[0],xmm0[1],mem[1],xmm0[2],mem[2],xmm0[3],mem[3] sched: [6:1.00] +; BROADWELL-NEXT: retq # sched: [7:1.00] ; ; SKYLAKE-LABEL: test_punpcklwd: ; SKYLAKE: # BB#0: @@ -8580,9 +8580,9 @@ define <2 x i64> @test_pxor(<2 x i64> %a0, <2 x i64> %a1, <2 x i64> *%a2) { ; BROADWELL-LABEL: test_pxor: ; BROADWELL: # BB#0: ; BROADWELL-NEXT: vpxor %xmm1, %xmm0, %xmm0 # sched: [1:0.33] -; BROADWELL-NEXT: vpxor (%rdi), %xmm0, %xmm0 # sched: [1:0.50] +; BROADWELL-NEXT: vpxor (%rdi), %xmm0, %xmm0 # sched: [6:0.50] ; BROADWELL-NEXT: vpaddq %xmm1, %xmm0, %xmm0 # sched: [1:0.50] -; BROADWELL-NEXT: retq # sched: [2:1.00] +; BROADWELL-NEXT: retq # sched: [7:1.00] ; ; SKYLAKE-LABEL: test_pxor: ; SKYLAKE: # BB#0: @@ -8657,9 +8657,9 @@ define <2 x double> @test_shufpd(<2 x double> %a0, <2 x double> %a1, <2 x double ; BROADWELL-LABEL: test_shufpd: ; BROADWELL: # BB#0: ; BROADWELL-NEXT: vshufpd {{.*#+}} xmm0 = xmm0[1],xmm1[0] sched: [1:1.00] -; BROADWELL-NEXT: vshufpd {{.*#+}} xmm1 = xmm1[1],mem[0] sched: [1:1.00] +; BROADWELL-NEXT: vshufpd {{.*#+}} xmm1 = xmm1[1],mem[0] sched: [6:1.00] ; BROADWELL-NEXT: vaddpd %xmm1, %xmm0, %xmm0 # sched: [3:1.00] -; BROADWELL-NEXT: retq # sched: [2:1.00] +; BROADWELL-NEXT: retq # sched: [7:1.00] ; ; SKYLAKE-LABEL: test_shufpd: ; SKYLAKE: # BB#0: @@ -8735,9 +8735,9 @@ define <2 x double> @test_sqrtpd(<2 x double> %a0, <2 x double> *%a1) { ; BROADWELL-LABEL: test_sqrtpd: ; BROADWELL: # BB#0: ; BROADWELL-NEXT: vsqrtpd %xmm0, %xmm0 # sched: [21:1.00] -; BROADWELL-NEXT: vsqrtpd (%rdi), %xmm1 # sched: [21:1.00] +; BROADWELL-NEXT: vsqrtpd (%rdi), %xmm1 # sched: [26:1.00] ; BROADWELL-NEXT: vaddpd %xmm1, %xmm0, %xmm0 # sched: [3:1.00] -; BROADWELL-NEXT: retq # sched: [2:1.00] +; BROADWELL-NEXT: retq # sched: [7:1.00] ; ; SKYLAKE-LABEL: test_sqrtpd: ; SKYLAKE: # BB#0: @@ -8820,10 +8820,10 @@ define <2 x double> @test_sqrtsd(<2 x double> %a0, <2 x double> *%a1) { ; BROADWELL-LABEL: test_sqrtsd: ; BROADWELL: # BB#0: ; BROADWELL-NEXT: vsqrtsd %xmm0, %xmm0, %xmm0 # sched: [21:1.00] -; BROADWELL-NEXT: vmovapd (%rdi), %xmm1 # sched: [1:0.50] +; BROADWELL-NEXT: vmovapd (%rdi), %xmm1 # sched: [5:0.50] ; BROADWELL-NEXT: vsqrtsd %xmm1, %xmm1, %xmm1 # sched: [21:1.00] ; BROADWELL-NEXT: vaddpd %xmm1, %xmm0, %xmm0 # sched: [3:1.00] -; BROADWELL-NEXT: retq # sched: [2:1.00] +; BROADWELL-NEXT: retq # sched: [7:1.00] ; ; SKYLAKE-LABEL: test_sqrtsd: ; SKYLAKE: # BB#0: @@ -8898,8 +8898,8 @@ define <2 x double> @test_subpd(<2 x double> %a0, <2 x double> %a1, <2 x double> ; BROADWELL-LABEL: test_subpd: ; BROADWELL: # BB#0: ; BROADWELL-NEXT: vsubpd %xmm1, %xmm0, %xmm0 # sched: [3:1.00] -; BROADWELL-NEXT: vsubpd (%rdi), %xmm0, %xmm0 # sched: [3:1.00] -; BROADWELL-NEXT: retq # sched: [2:1.00] +; BROADWELL-NEXT: vsubpd (%rdi), %xmm0, %xmm0 # sched: [8:1.00] +; BROADWELL-NEXT: retq # sched: [7:1.00] ; ; SKYLAKE-LABEL: test_subpd: ; SKYLAKE: # BB#0: @@ -8964,8 +8964,8 @@ define double @test_subsd(double %a0, double %a1, double *%a2) { ; BROADWELL-LABEL: test_subsd: ; BROADWELL: # BB#0: ; BROADWELL-NEXT: vsubsd %xmm1, %xmm0, %xmm0 # sched: [3:1.00] -; BROADWELL-NEXT: vsubsd (%rdi), %xmm0, %xmm0 # sched: [3:1.00] -; BROADWELL-NEXT: retq # sched: [2:1.00] +; BROADWELL-NEXT: vsubsd (%rdi), %xmm0, %xmm0 # sched: [8:1.00] +; BROADWELL-NEXT: retq # sched: [7:1.00] ; ; SKYLAKE-LABEL: test_subsd: ; SKYLAKE: # BB#0: @@ -9073,13 +9073,13 @@ define i32 @test_ucomisd(<2 x double> %a0, <2 x double> %a1, <2 x double> *%a2) ; BROADWELL-NEXT: setnp %al # sched: [1:0.50] ; BROADWELL-NEXT: sete %cl # sched: [1:0.50] ; BROADWELL-NEXT: andb %al, %cl # sched: [1:0.25] -; BROADWELL-NEXT: vucomisd (%rdi), %xmm0 # sched: [7:1.00] +; BROADWELL-NEXT: vucomisd (%rdi), %xmm0 # sched: [8:1.00] ; BROADWELL-NEXT: setnp %al # sched: [1:0.50] ; BROADWELL-NEXT: sete %dl # sched: [1:0.50] ; BROADWELL-NEXT: andb %al, %dl # sched: [1:0.25] ; BROADWELL-NEXT: orb %cl, %dl # sched: [1:0.25] ; BROADWELL-NEXT: movzbl %dl, %eax # sched: [1:0.25] -; BROADWELL-NEXT: retq # sched: [2:1.00] +; BROADWELL-NEXT: retq # sched: [7:1.00] ; ; SKYLAKE-LABEL: test_ucomisd: ; SKYLAKE: # BB#0: @@ -9183,9 +9183,9 @@ define <2 x double> @test_unpckhpd(<2 x double> %a0, <2 x double> %a1, <2 x doub ; BROADWELL-LABEL: test_unpckhpd: ; BROADWELL: # BB#0: ; BROADWELL-NEXT: vunpckhpd {{.*#+}} xmm0 = xmm0[1],xmm1[1] sched: [1:1.00] -; BROADWELL-NEXT: vunpckhpd {{.*#+}} xmm1 = xmm1[1],mem[1] sched: [1:1.00] +; BROADWELL-NEXT: vunpckhpd {{.*#+}} xmm1 = xmm1[1],mem[1] sched: [6:1.00] ; BROADWELL-NEXT: vaddpd %xmm1, %xmm0, %xmm0 # sched: [3:1.00] -; BROADWELL-NEXT: retq # sched: [2:1.00] +; BROADWELL-NEXT: retq # sched: [7:1.00] ; ; SKYLAKE-LABEL: test_unpckhpd: ; SKYLAKE: # BB#0: @@ -9266,9 +9266,9 @@ define <2 x double> @test_unpcklpd(<2 x double> %a0, <2 x double> %a1, <2 x doub ; BROADWELL-LABEL: test_unpcklpd: ; BROADWELL: # BB#0: ; BROADWELL-NEXT: vunpcklpd {{.*#+}} xmm0 = xmm0[0],xmm1[0] sched: [1:1.00] -; BROADWELL-NEXT: vunpcklpd {{.*#+}} xmm1 = xmm0[0],mem[0] sched: [1:1.00] +; BROADWELL-NEXT: vunpcklpd {{.*#+}} xmm1 = xmm0[0],mem[0] sched: [6:1.00] ; BROADWELL-NEXT: vaddpd %xmm1, %xmm0, %xmm0 # sched: [3:1.00] -; BROADWELL-NEXT: retq # sched: [2:1.00] +; BROADWELL-NEXT: retq # sched: [7:1.00] ; ; SKYLAKE-LABEL: test_unpcklpd: ; SKYLAKE: # BB#0: @@ -9343,9 +9343,9 @@ define <2 x double> @test_xorpd(<2 x double> %a0, <2 x double> %a1, <2 x double> ; BROADWELL-LABEL: test_xorpd: ; BROADWELL: # BB#0: ; BROADWELL-NEXT: vxorpd %xmm1, %xmm0, %xmm0 # sched: [1:1.00] -; BROADWELL-NEXT: vxorpd (%rdi), %xmm0, %xmm0 # sched: [1:1.00] +; BROADWELL-NEXT: vxorpd (%rdi), %xmm0, %xmm0 # sched: [6:1.00] ; BROADWELL-NEXT: vaddpd %xmm0, %xmm1, %xmm0 # sched: [3:1.00] -; BROADWELL-NEXT: retq # sched: [2:1.00] +; BROADWELL-NEXT: retq # sched: [7:1.00] ; ; SKYLAKE-LABEL: test_xorpd: ; SKYLAKE: # BB#0: diff --git a/llvm/test/CodeGen/X86/sse3-schedule.ll b/llvm/test/CodeGen/X86/sse3-schedule.ll index 8cfc95feea75..2a3dae1b64ea 100644 --- a/llvm/test/CodeGen/X86/sse3-schedule.ll +++ b/llvm/test/CodeGen/X86/sse3-schedule.ll @@ -45,8 +45,8 @@ define <2 x double> @test_addsubpd(<2 x double> %a0, <2 x double> %a1, <2 x doub ; BROADWELL-LABEL: test_addsubpd: ; BROADWELL: # BB#0: ; BROADWELL-NEXT: vaddsubpd %xmm1, %xmm0, %xmm0 # sched: [3:1.00] -; BROADWELL-NEXT: vaddsubpd (%rdi), %xmm0, %xmm0 # sched: [3:1.00] -; BROADWELL-NEXT: retq # sched: [2:1.00] +; BROADWELL-NEXT: vaddsubpd (%rdi), %xmm0, %xmm0 # sched: [8:1.00] +; BROADWELL-NEXT: retq # sched: [7:1.00] ; ; SKYLAKE-LABEL: test_addsubpd: ; SKYLAKE: # BB#0: @@ -112,8 +112,8 @@ define <4 x float> @test_addsubps(<4 x float> %a0, <4 x float> %a1, <4 x float> ; BROADWELL-LABEL: test_addsubps: ; BROADWELL: # BB#0: ; BROADWELL-NEXT: vaddsubps %xmm1, %xmm0, %xmm0 # sched: [3:1.00] -; BROADWELL-NEXT: vaddsubps (%rdi), %xmm0, %xmm0 # sched: [3:1.00] -; BROADWELL-NEXT: retq # sched: [2:1.00] +; BROADWELL-NEXT: vaddsubps (%rdi), %xmm0, %xmm0 # sched: [8:1.00] +; BROADWELL-NEXT: retq # sched: [7:1.00] ; ; SKYLAKE-LABEL: test_addsubps: ; SKYLAKE: # BB#0: @@ -179,8 +179,8 @@ define <2 x double> @test_haddpd(<2 x double> %a0, <2 x double> %a1, <2 x double ; BROADWELL-LABEL: test_haddpd: ; BROADWELL: # BB#0: ; BROADWELL-NEXT: vhaddpd %xmm1, %xmm0, %xmm0 # sched: [5:2.00] -; BROADWELL-NEXT: vhaddpd (%rdi), %xmm0, %xmm0 # sched: [5:2.00] -; BROADWELL-NEXT: retq # sched: [2:1.00] +; BROADWELL-NEXT: vhaddpd (%rdi), %xmm0, %xmm0 # sched: [10:2.00] +; BROADWELL-NEXT: retq # sched: [7:1.00] ; ; SKYLAKE-LABEL: test_haddpd: ; SKYLAKE: # BB#0: @@ -246,8 +246,8 @@ define <4 x float> @test_haddps(<4 x float> %a0, <4 x float> %a1, <4 x float> *% ; BROADWELL-LABEL: test_haddps: ; BROADWELL: # BB#0: ; BROADWELL-NEXT: vhaddps %xmm1, %xmm0, %xmm0 # sched: [5:2.00] -; BROADWELL-NEXT: vhaddps (%rdi), %xmm0, %xmm0 # sched: [5:2.00] -; BROADWELL-NEXT: retq # sched: [2:1.00] +; BROADWELL-NEXT: vhaddps (%rdi), %xmm0, %xmm0 # sched: [10:2.00] +; BROADWELL-NEXT: retq # sched: [7:1.00] ; ; SKYLAKE-LABEL: test_haddps: ; SKYLAKE: # BB#0: @@ -313,8 +313,8 @@ define <2 x double> @test_hsubpd(<2 x double> %a0, <2 x double> %a1, <2 x double ; BROADWELL-LABEL: test_hsubpd: ; BROADWELL: # BB#0: ; BROADWELL-NEXT: vhsubpd %xmm1, %xmm0, %xmm0 # sched: [5:2.00] -; BROADWELL-NEXT: vhsubpd (%rdi), %xmm0, %xmm0 # sched: [5:2.00] -; BROADWELL-NEXT: retq # sched: [2:1.00] +; BROADWELL-NEXT: vhsubpd (%rdi), %xmm0, %xmm0 # sched: [10:2.00] +; BROADWELL-NEXT: retq # sched: [7:1.00] ; ; SKYLAKE-LABEL: test_hsubpd: ; SKYLAKE: # BB#0: @@ -380,8 +380,8 @@ define <4 x float> @test_hsubps(<4 x float> %a0, <4 x float> %a1, <4 x float> *% ; BROADWELL-LABEL: test_hsubps: ; BROADWELL: # BB#0: ; BROADWELL-NEXT: vhsubps %xmm1, %xmm0, %xmm0 # sched: [5:2.00] -; BROADWELL-NEXT: vhsubps (%rdi), %xmm0, %xmm0 # sched: [5:2.00] -; BROADWELL-NEXT: retq # sched: [2:1.00] +; BROADWELL-NEXT: vhsubps (%rdi), %xmm0, %xmm0 # sched: [10:2.00] +; BROADWELL-NEXT: retq # sched: [7:1.00] ; ; SKYLAKE-LABEL: test_hsubps: ; SKYLAKE: # BB#0: @@ -443,8 +443,8 @@ define <16 x i8> @test_lddqu(i8* %a0) { ; ; BROADWELL-LABEL: test_lddqu: ; BROADWELL: # BB#0: -; BROADWELL-NEXT: vlddqu (%rdi), %xmm0 # sched: [1:0.50] -; BROADWELL-NEXT: retq # sched: [2:1.00] +; BROADWELL-NEXT: vlddqu (%rdi), %xmm0 # sched: [5:0.50] +; BROADWELL-NEXT: retq # sched: [7:1.00] ; ; SKYLAKE-LABEL: test_lddqu: ; SKYLAKE: # BB#0: @@ -511,7 +511,7 @@ define void @test_monitor(i8* %a0, i32 %a1, i32 %a2) { ; BROADWELL-NEXT: leaq (%rdi), %rax # sched: [1:0.50] ; BROADWELL-NEXT: movl %esi, %ecx # sched: [1:0.25] ; BROADWELL-NEXT: monitor # sched: [100:0.25] -; BROADWELL-NEXT: retq # sched: [2:1.00] +; BROADWELL-NEXT: retq # sched: [7:1.00] ; ; SKYLAKE-LABEL: test_monitor: ; SKYLAKE: # BB#0: @@ -585,9 +585,9 @@ define <2 x double> @test_movddup(<2 x double> %a0, <2 x double> *%a1) { ; BROADWELL-LABEL: test_movddup: ; BROADWELL: # BB#0: ; BROADWELL-NEXT: vmovddup {{.*#+}} xmm0 = xmm0[0,0] sched: [1:1.00] -; BROADWELL-NEXT: vmovddup {{.*#+}} xmm1 = mem[0,0] sched: [1:0.50] +; BROADWELL-NEXT: vmovddup {{.*#+}} xmm1 = mem[0,0] sched: [5:0.50] ; BROADWELL-NEXT: vsubpd %xmm0, %xmm1, %xmm0 # sched: [3:1.00] -; BROADWELL-NEXT: retq # sched: [2:1.00] +; BROADWELL-NEXT: retq # sched: [7:1.00] ; ; SKYLAKE-LABEL: test_movddup: ; SKYLAKE: # BB#0: @@ -663,9 +663,9 @@ define <4 x float> @test_movshdup(<4 x float> %a0, <4 x float> *%a1) { ; BROADWELL-LABEL: test_movshdup: ; BROADWELL: # BB#0: ; BROADWELL-NEXT: vmovshdup {{.*#+}} xmm0 = xmm0[1,1,3,3] sched: [1:1.00] -; BROADWELL-NEXT: vmovshdup {{.*#+}} xmm1 = mem[1,1,3,3] sched: [1:0.50] +; BROADWELL-NEXT: vmovshdup {{.*#+}} xmm1 = mem[1,1,3,3] sched: [5:0.50] ; BROADWELL-NEXT: vaddps %xmm1, %xmm0, %xmm0 # sched: [3:1.00] -; BROADWELL-NEXT: retq # sched: [2:1.00] +; BROADWELL-NEXT: retq # sched: [7:1.00] ; ; SKYLAKE-LABEL: test_movshdup: ; SKYLAKE: # BB#0: @@ -741,9 +741,9 @@ define <4 x float> @test_movsldup(<4 x float> %a0, <4 x float> *%a1) { ; BROADWELL-LABEL: test_movsldup: ; BROADWELL: # BB#0: ; BROADWELL-NEXT: vmovsldup {{.*#+}} xmm0 = xmm0[0,0,2,2] sched: [1:1.00] -; BROADWELL-NEXT: vmovsldup {{.*#+}} xmm1 = mem[0,0,2,2] sched: [1:0.50] +; BROADWELL-NEXT: vmovsldup {{.*#+}} xmm1 = mem[0,0,2,2] sched: [5:0.50] ; BROADWELL-NEXT: vaddps %xmm1, %xmm0, %xmm0 # sched: [3:1.00] -; BROADWELL-NEXT: retq # sched: [2:1.00] +; BROADWELL-NEXT: retq # sched: [7:1.00] ; ; SKYLAKE-LABEL: test_movsldup: ; SKYLAKE: # BB#0: @@ -819,8 +819,8 @@ define void @test_mwait(i32 %a0, i32 %a1) { ; BROADWELL: # BB#0: ; BROADWELL-NEXT: movl %edi, %ecx # sched: [1:0.25] ; BROADWELL-NEXT: movl %esi, %eax # sched: [1:0.25] -; BROADWELL-NEXT: mwait # sched: [20:2.50] -; BROADWELL-NEXT: retq # sched: [2:1.00] +; BROADWELL-NEXT: mwait # sched: [100:0.25] +; BROADWELL-NEXT: retq # sched: [7:1.00] ; ; SKYLAKE-LABEL: test_mwait: ; SKYLAKE: # BB#0: diff --git a/llvm/test/CodeGen/X86/sse41-schedule.ll b/llvm/test/CodeGen/X86/sse41-schedule.ll index c6986f29b4b9..bdcefe8fe2eb 100644 --- a/llvm/test/CodeGen/X86/sse41-schedule.ll +++ b/llvm/test/CodeGen/X86/sse41-schedule.ll @@ -43,8 +43,8 @@ define <2 x double> @test_blendpd(<2 x double> %a0, <2 x double> %a1, <2 x doubl ; BROADWELL: # BB#0: ; BROADWELL-NEXT: vblendpd {{.*#+}} xmm0 = xmm0[0],xmm1[1] sched: [1:0.33] ; BROADWELL-NEXT: vaddpd %xmm0, %xmm1, %xmm0 # sched: [3:1.00] -; BROADWELL-NEXT: vblendpd {{.*#+}} xmm0 = xmm0[0],mem[1] sched: [1:0.50] -; BROADWELL-NEXT: retq # sched: [2:1.00] +; BROADWELL-NEXT: vblendpd {{.*#+}} xmm0 = xmm0[0],mem[1] sched: [6:0.50] +; BROADWELL-NEXT: retq # sched: [7:1.00] ; ; SKYLAKE-LABEL: test_blendpd: ; SKYLAKE: # BB#0: @@ -109,8 +109,8 @@ define <4 x float> @test_blendps(<4 x float> %a0, <4 x float> %a1, <4 x float> * ; BROADWELL-LABEL: test_blendps: ; BROADWELL: # BB#0: ; BROADWELL-NEXT: vblendps {{.*#+}} xmm0 = xmm0[0],xmm1[1,2],xmm0[3] sched: [1:0.33] -; BROADWELL-NEXT: vblendps {{.*#+}} xmm0 = xmm0[0],mem[1],xmm0[2,3] sched: [1:0.50] -; BROADWELL-NEXT: retq # sched: [2:1.00] +; BROADWELL-NEXT: vblendps {{.*#+}} xmm0 = xmm0[0],mem[1],xmm0[2,3] sched: [6:0.50] +; BROADWELL-NEXT: retq # sched: [7:1.00] ; ; SKYLAKE-LABEL: test_blendps: ; SKYLAKE: # BB#0: @@ -175,8 +175,8 @@ define <2 x double> @test_blendvpd(<2 x double> %a0, <2 x double> %a1, <2 x doub ; BROADWELL-LABEL: test_blendvpd: ; BROADWELL: # BB#0: ; BROADWELL-NEXT: vblendvpd %xmm2, %xmm1, %xmm0, %xmm0 # sched: [2:2.00] -; BROADWELL-NEXT: vblendvpd %xmm2, (%rdi), %xmm0, %xmm0 # sched: [2:2.00] -; BROADWELL-NEXT: retq # sched: [2:1.00] +; BROADWELL-NEXT: vblendvpd %xmm2, (%rdi), %xmm0, %xmm0 # sched: [7:2.00] +; BROADWELL-NEXT: retq # sched: [7:1.00] ; ; SKYLAKE-LABEL: test_blendvpd: ; SKYLAKE: # BB#0: @@ -242,8 +242,8 @@ define <4 x float> @test_blendvps(<4 x float> %a0, <4 x float> %a1, <4 x float> ; BROADWELL-LABEL: test_blendvps: ; BROADWELL: # BB#0: ; BROADWELL-NEXT: vblendvps %xmm2, %xmm1, %xmm0, %xmm0 # sched: [2:2.00] -; BROADWELL-NEXT: vblendvps %xmm2, (%rdi), %xmm0, %xmm0 # sched: [2:2.00] -; BROADWELL-NEXT: retq # sched: [2:1.00] +; BROADWELL-NEXT: vblendvps %xmm2, (%rdi), %xmm0, %xmm0 # sched: [7:2.00] +; BROADWELL-NEXT: retq # sched: [7:1.00] ; ; SKYLAKE-LABEL: test_blendvps: ; SKYLAKE: # BB#0: @@ -303,8 +303,8 @@ define <2 x double> @test_dppd(<2 x double> %a0, <2 x double> %a1, <2 x double> ; BROADWELL-LABEL: test_dppd: ; BROADWELL: # BB#0: ; BROADWELL-NEXT: vdppd $7, %xmm1, %xmm0, %xmm0 # sched: [9:1.00] -; BROADWELL-NEXT: vdppd $7, (%rdi), %xmm0, %xmm0 # sched: [9:1.00] -; BROADWELL-NEXT: retq # sched: [2:1.00] +; BROADWELL-NEXT: vdppd $7, (%rdi), %xmm0, %xmm0 # sched: [14:1.00] +; BROADWELL-NEXT: retq # sched: [7:1.00] ; ; SKYLAKE-LABEL: test_dppd: ; SKYLAKE: # BB#0: @@ -364,8 +364,8 @@ define <4 x float> @test_dpps(<4 x float> %a0, <4 x float> %a1, <4 x float> *%a2 ; BROADWELL-LABEL: test_dpps: ; BROADWELL: # BB#0: ; BROADWELL-NEXT: vdpps $7, %xmm1, %xmm0, %xmm0 # sched: [14:2.00] -; BROADWELL-NEXT: vdpps $7, (%rdi), %xmm0, %xmm0 # sched: [14:2.00] -; BROADWELL-NEXT: retq # sched: [2:1.00] +; BROADWELL-NEXT: vdpps $7, (%rdi), %xmm0, %xmm0 # sched: [19:2.00] +; BROADWELL-NEXT: retq # sched: [7:1.00] ; ; SKYLAKE-LABEL: test_dpps: ; SKYLAKE: # BB#0: @@ -425,8 +425,8 @@ define i32 @test_extractps(<4 x float> %a0, i32 *%a1) { ; BROADWELL-LABEL: test_extractps: ; BROADWELL: # BB#0: ; BROADWELL-NEXT: vextractps $3, %xmm0, %eax # sched: [2:1.00] -; BROADWELL-NEXT: vextractps $1, %xmm0, (%rdi) # sched: [1:1.00] -; BROADWELL-NEXT: retq # sched: [2:1.00] +; BROADWELL-NEXT: vextractps $1, %xmm0, (%rdi) # sched: [2:1.00] +; BROADWELL-NEXT: retq # sched: [7:1.00] ; ; SKYLAKE-LABEL: test_extractps: ; SKYLAKE: # BB#0: @@ -487,8 +487,8 @@ define <4 x float> @test_insertps(<4 x float> %a0, <4 x float> %a1, float *%a2) ; BROADWELL-LABEL: test_insertps: ; BROADWELL: # BB#0: ; BROADWELL-NEXT: vinsertps {{.*#+}} xmm0 = zero,xmm1[0],xmm0[2,3] sched: [1:1.00] -; BROADWELL-NEXT: vinsertps {{.*#+}} xmm0 = xmm0[0,1,2],mem[0] sched: [1:1.00] -; BROADWELL-NEXT: retq # sched: [2:1.00] +; BROADWELL-NEXT: vinsertps {{.*#+}} xmm0 = xmm0[0,1,2],mem[0] sched: [6:1.00] +; BROADWELL-NEXT: retq # sched: [7:1.00] ; ; SKYLAKE-LABEL: test_insertps: ; SKYLAKE: # BB#0: @@ -543,8 +543,8 @@ define <2 x i64> @test_movntdqa(i8* %a0) { ; ; BROADWELL-LABEL: test_movntdqa: ; BROADWELL: # BB#0: -; BROADWELL-NEXT: vmovntdqa (%rdi), %xmm0 # sched: [1:0.50] -; BROADWELL-NEXT: retq # sched: [2:1.00] +; BROADWELL-NEXT: vmovntdqa (%rdi), %xmm0 # sched: [5:0.50] +; BROADWELL-NEXT: retq # sched: [7:1.00] ; ; SKYLAKE-LABEL: test_movntdqa: ; SKYLAKE: # BB#0: @@ -598,8 +598,8 @@ define <8 x i16> @test_mpsadbw(<16 x i8> %a0, <16 x i8> %a1, <16 x i8> *%a2) { ; BROADWELL-LABEL: test_mpsadbw: ; BROADWELL: # BB#0: ; BROADWELL-NEXT: vmpsadbw $7, %xmm1, %xmm0, %xmm0 # sched: [7:2.00] -; BROADWELL-NEXT: vmpsadbw $7, (%rdi), %xmm0, %xmm0 # sched: [7:2.00] -; BROADWELL-NEXT: retq # sched: [2:1.00] +; BROADWELL-NEXT: vmpsadbw $7, (%rdi), %xmm0, %xmm0 # sched: [12:2.00] +; BROADWELL-NEXT: retq # sched: [7:1.00] ; ; SKYLAKE-LABEL: test_mpsadbw: ; SKYLAKE: # BB#0: @@ -660,8 +660,8 @@ define <8 x i16> @test_packusdw(<4 x i32> %a0, <4 x i32> %a1, <4 x i32> *%a2) { ; BROADWELL-LABEL: test_packusdw: ; BROADWELL: # BB#0: ; BROADWELL-NEXT: vpackusdw %xmm1, %xmm0, %xmm0 # sched: [1:1.00] -; BROADWELL-NEXT: vpackusdw (%rdi), %xmm0, %xmm0 # sched: [1:1.00] -; BROADWELL-NEXT: retq # sched: [2:1.00] +; BROADWELL-NEXT: vpackusdw (%rdi), %xmm0, %xmm0 # sched: [6:1.00] +; BROADWELL-NEXT: retq # sched: [7:1.00] ; ; SKYLAKE-LABEL: test_packusdw: ; SKYLAKE: # BB#0: @@ -728,8 +728,8 @@ define <16 x i8> @test_pblendvb(<16 x i8> %a0, <16 x i8> %a1, <16 x i8> %a2, <16 ; BROADWELL-LABEL: test_pblendvb: ; BROADWELL: # BB#0: ; BROADWELL-NEXT: vpblendvb %xmm2, %xmm1, %xmm0, %xmm0 # sched: [2:2.00] -; BROADWELL-NEXT: vpblendvb %xmm2, (%rdi), %xmm0, %xmm0 # sched: [2:2.00] -; BROADWELL-NEXT: retq # sched: [2:1.00] +; BROADWELL-NEXT: vpblendvb %xmm2, (%rdi), %xmm0, %xmm0 # sched: [7:2.00] +; BROADWELL-NEXT: retq # sched: [7:1.00] ; ; SKYLAKE-LABEL: test_pblendvb: ; SKYLAKE: # BB#0: @@ -789,8 +789,8 @@ define <8 x i16> @test_pblendw(<8 x i16> %a0, <8 x i16> %a1, <8 x i16> *%a2) { ; BROADWELL-LABEL: test_pblendw: ; BROADWELL: # BB#0: ; BROADWELL-NEXT: vpblendw {{.*#+}} xmm0 = xmm0[0],xmm1[1],xmm0[2],xmm1[3],xmm0[4],xmm1[5],xmm0[6],xmm1[7] sched: [1:1.00] -; BROADWELL-NEXT: vpblendw {{.*#+}} xmm0 = xmm0[0,1],mem[2,3],xmm0[4,5,6],mem[7] sched: [4:1.00] -; BROADWELL-NEXT: retq # sched: [2:1.00] +; BROADWELL-NEXT: vpblendw {{.*#+}} xmm0 = xmm0[0,1],mem[2,3],xmm0[4,5,6],mem[7] sched: [6:1.00] +; BROADWELL-NEXT: retq # sched: [7:1.00] ; ; SKYLAKE-LABEL: test_pblendw: ; SKYLAKE: # BB#0: @@ -849,8 +849,8 @@ define <2 x i64> @test_pcmpeqq(<2 x i64> %a0, <2 x i64> %a1, <2 x i64> *%a2) { ; BROADWELL-LABEL: test_pcmpeqq: ; BROADWELL: # BB#0: ; BROADWELL-NEXT: vpcmpeqq %xmm1, %xmm0, %xmm0 # sched: [1:0.50] -; BROADWELL-NEXT: vpcmpeqq (%rdi), %xmm0, %xmm0 # sched: [1:0.50] -; BROADWELL-NEXT: retq # sched: [2:1.00] +; BROADWELL-NEXT: vpcmpeqq (%rdi), %xmm0, %xmm0 # sched: [6:0.50] +; BROADWELL-NEXT: retq # sched: [7:1.00] ; ; SKYLAKE-LABEL: test_pcmpeqq: ; SKYLAKE: # BB#0: @@ -913,8 +913,8 @@ define i32 @test_pextrb(<16 x i8> %a0, i8 *%a1) { ; BROADWELL-LABEL: test_pextrb: ; BROADWELL: # BB#0: ; BROADWELL-NEXT: vpextrb $3, %xmm0, %eax # sched: [2:1.00] -; BROADWELL-NEXT: vpextrb $1, %xmm0, (%rdi) # sched: [1:1.00] -; BROADWELL-NEXT: retq # sched: [2:1.00] +; BROADWELL-NEXT: vpextrb $1, %xmm0, (%rdi) # sched: [2:1.00] +; BROADWELL-NEXT: retq # sched: [7:1.00] ; ; SKYLAKE-LABEL: test_pextrb: ; SKYLAKE: # BB#0: @@ -979,8 +979,8 @@ define i32 @test_pextrd(<4 x i32> %a0, i32 *%a1) { ; BROADWELL: # BB#0: ; BROADWELL-NEXT: vpaddd %xmm0, %xmm0, %xmm0 # sched: [1:0.50] ; BROADWELL-NEXT: vpextrd $3, %xmm0, %eax # sched: [2:1.00] -; BROADWELL-NEXT: vpextrd $1, %xmm0, (%rdi) # sched: [1:1.00] -; BROADWELL-NEXT: retq # sched: [2:1.00] +; BROADWELL-NEXT: vpextrd $1, %xmm0, (%rdi) # sched: [2:1.00] +; BROADWELL-NEXT: retq # sched: [7:1.00] ; ; SKYLAKE-LABEL: test_pextrd: ; SKYLAKE: # BB#0: @@ -1044,8 +1044,8 @@ define i64 @test_pextrq(<2 x i64> %a0, <2 x i64> %a1, i64 *%a2) { ; BROADWELL-LABEL: test_pextrq: ; BROADWELL: # BB#0: ; BROADWELL-NEXT: vpextrq $1, %xmm0, %rax # sched: [2:1.00] -; BROADWELL-NEXT: vpextrq $1, %xmm0, (%rdi) # sched: [1:1.00] -; BROADWELL-NEXT: retq # sched: [2:1.00] +; BROADWELL-NEXT: vpextrq $1, %xmm0, (%rdi) # sched: [2:1.00] +; BROADWELL-NEXT: retq # sched: [7:1.00] ; ; SKYLAKE-LABEL: test_pextrq: ; SKYLAKE: # BB#0: @@ -1104,8 +1104,8 @@ define i32 @test_pextrw(<8 x i16> %a0, i16 *%a1) { ; BROADWELL-LABEL: test_pextrw: ; BROADWELL: # BB#0: ; BROADWELL-NEXT: vpextrw $3, %xmm0, %eax # sched: [2:1.00] -; BROADWELL-NEXT: vpextrw $1, %xmm0, (%rdi) # sched: [1:1.00] -; BROADWELL-NEXT: retq # sched: [2:1.00] +; BROADWELL-NEXT: vpextrw $1, %xmm0, (%rdi) # sched: [2:1.00] +; BROADWELL-NEXT: retq # sched: [7:1.00] ; ; SKYLAKE-LABEL: test_pextrw: ; SKYLAKE: # BB#0: @@ -1164,9 +1164,9 @@ define <8 x i16> @test_phminposuw(<8 x i16> *%a0) { ; ; BROADWELL-LABEL: test_phminposuw: ; BROADWELL: # BB#0: -; BROADWELL-NEXT: vphminposuw (%rdi), %xmm0 # sched: [5:1.00] +; BROADWELL-NEXT: vphminposuw (%rdi), %xmm0 # sched: [10:1.00] ; BROADWELL-NEXT: vphminposuw %xmm0, %xmm0 # sched: [5:1.00] -; BROADWELL-NEXT: retq # sched: [2:1.00] +; BROADWELL-NEXT: retq # sched: [7:1.00] ; ; SKYLAKE-LABEL: test_phminposuw: ; SKYLAKE: # BB#0: @@ -1226,8 +1226,8 @@ define <16 x i8> @test_pinsrb(<16 x i8> %a0, i8 %a1, i8 *%a2) { ; BROADWELL-LABEL: test_pinsrb: ; BROADWELL: # BB#0: ; BROADWELL-NEXT: vpinsrb $1, %edi, %xmm0, %xmm0 # sched: [2:2.00] -; BROADWELL-NEXT: vpinsrb $3, (%rsi), %xmm0, %xmm0 # sched: [1:1.00] -; BROADWELL-NEXT: retq # sched: [2:1.00] +; BROADWELL-NEXT: vpinsrb $3, (%rsi), %xmm0, %xmm0 # sched: [6:1.00] +; BROADWELL-NEXT: retq # sched: [7:1.00] ; ; SKYLAKE-LABEL: test_pinsrb: ; SKYLAKE: # BB#0: @@ -1286,8 +1286,8 @@ define <4 x i32> @test_pinsrd(<4 x i32> %a0, i32 %a1, i32 *%a2) { ; BROADWELL-LABEL: test_pinsrd: ; BROADWELL: # BB#0: ; BROADWELL-NEXT: vpinsrd $1, %edi, %xmm0, %xmm0 # sched: [2:2.00] -; BROADWELL-NEXT: vpinsrd $3, (%rsi), %xmm0, %xmm0 # sched: [1:1.00] -; BROADWELL-NEXT: retq # sched: [2:1.00] +; BROADWELL-NEXT: vpinsrd $3, (%rsi), %xmm0, %xmm0 # sched: [6:1.00] +; BROADWELL-NEXT: retq # sched: [7:1.00] ; ; SKYLAKE-LABEL: test_pinsrd: ; SKYLAKE: # BB#0: @@ -1350,9 +1350,9 @@ define <2 x i64> @test_pinsrq(<2 x i64> %a0, <2 x i64> %a1, i64 %a2, i64 *%a3) { ; BROADWELL-LABEL: test_pinsrq: ; BROADWELL: # BB#0: ; BROADWELL-NEXT: vpinsrq $1, %rdi, %xmm0, %xmm0 # sched: [2:2.00] -; BROADWELL-NEXT: vpinsrq $1, (%rsi), %xmm1, %xmm1 # sched: [1:1.00] +; BROADWELL-NEXT: vpinsrq $1, (%rsi), %xmm1, %xmm1 # sched: [6:1.00] ; BROADWELL-NEXT: vpaddq %xmm1, %xmm0, %xmm0 # sched: [1:0.50] -; BROADWELL-NEXT: retq # sched: [2:1.00] +; BROADWELL-NEXT: retq # sched: [7:1.00] ; ; SKYLAKE-LABEL: test_pinsrq: ; SKYLAKE: # BB#0: @@ -1416,8 +1416,8 @@ define <16 x i8> @test_pmaxsb(<16 x i8> %a0, <16 x i8> %a1, <16 x i8> *%a2) { ; BROADWELL-LABEL: test_pmaxsb: ; BROADWELL: # BB#0: ; BROADWELL-NEXT: vpmaxsb %xmm1, %xmm0, %xmm0 # sched: [1:0.50] -; BROADWELL-NEXT: vpmaxsb (%rdi), %xmm0, %xmm0 # sched: [1:0.50] -; BROADWELL-NEXT: retq # sched: [2:1.00] +; BROADWELL-NEXT: vpmaxsb (%rdi), %xmm0, %xmm0 # sched: [6:0.50] +; BROADWELL-NEXT: retq # sched: [7:1.00] ; ; SKYLAKE-LABEL: test_pmaxsb: ; SKYLAKE: # BB#0: @@ -1477,8 +1477,8 @@ define <4 x i32> @test_pmaxsd(<4 x i32> %a0, <4 x i32> %a1, <4 x i32> *%a2) { ; BROADWELL-LABEL: test_pmaxsd: ; BROADWELL: # BB#0: ; BROADWELL-NEXT: vpmaxsd %xmm1, %xmm0, %xmm0 # sched: [1:0.50] -; BROADWELL-NEXT: vpmaxsd (%rdi), %xmm0, %xmm0 # sched: [1:0.50] -; BROADWELL-NEXT: retq # sched: [2:1.00] +; BROADWELL-NEXT: vpmaxsd (%rdi), %xmm0, %xmm0 # sched: [6:0.50] +; BROADWELL-NEXT: retq # sched: [7:1.00] ; ; SKYLAKE-LABEL: test_pmaxsd: ; SKYLAKE: # BB#0: @@ -1538,8 +1538,8 @@ define <4 x i32> @test_pmaxud(<4 x i32> %a0, <4 x i32> %a1, <4 x i32> *%a2) { ; BROADWELL-LABEL: test_pmaxud: ; BROADWELL: # BB#0: ; BROADWELL-NEXT: vpmaxud %xmm1, %xmm0, %xmm0 # sched: [1:0.50] -; BROADWELL-NEXT: vpmaxud (%rdi), %xmm0, %xmm0 # sched: [1:0.50] -; BROADWELL-NEXT: retq # sched: [2:1.00] +; BROADWELL-NEXT: vpmaxud (%rdi), %xmm0, %xmm0 # sched: [6:0.50] +; BROADWELL-NEXT: retq # sched: [7:1.00] ; ; SKYLAKE-LABEL: test_pmaxud: ; SKYLAKE: # BB#0: @@ -1599,8 +1599,8 @@ define <8 x i16> @test_pmaxuw(<8 x i16> %a0, <8 x i16> %a1, <8 x i16> *%a2) { ; BROADWELL-LABEL: test_pmaxuw: ; BROADWELL: # BB#0: ; BROADWELL-NEXT: vpmaxuw %xmm1, %xmm0, %xmm0 # sched: [1:0.50] -; BROADWELL-NEXT: vpmaxuw (%rdi), %xmm0, %xmm0 # sched: [1:0.50] -; BROADWELL-NEXT: retq # sched: [2:1.00] +; BROADWELL-NEXT: vpmaxuw (%rdi), %xmm0, %xmm0 # sched: [6:0.50] +; BROADWELL-NEXT: retq # sched: [7:1.00] ; ; SKYLAKE-LABEL: test_pmaxuw: ; SKYLAKE: # BB#0: @@ -1660,8 +1660,8 @@ define <16 x i8> @test_pminsb(<16 x i8> %a0, <16 x i8> %a1, <16 x i8> *%a2) { ; BROADWELL-LABEL: test_pminsb: ; BROADWELL: # BB#0: ; BROADWELL-NEXT: vpminsb %xmm1, %xmm0, %xmm0 # sched: [1:0.50] -; BROADWELL-NEXT: vpminsb (%rdi), %xmm0, %xmm0 # sched: [1:0.50] -; BROADWELL-NEXT: retq # sched: [2:1.00] +; BROADWELL-NEXT: vpminsb (%rdi), %xmm0, %xmm0 # sched: [6:0.50] +; BROADWELL-NEXT: retq # sched: [7:1.00] ; ; SKYLAKE-LABEL: test_pminsb: ; SKYLAKE: # BB#0: @@ -1721,8 +1721,8 @@ define <4 x i32> @test_pminsd(<4 x i32> %a0, <4 x i32> %a1, <4 x i32> *%a2) { ; BROADWELL-LABEL: test_pminsd: ; BROADWELL: # BB#0: ; BROADWELL-NEXT: vpminsd %xmm1, %xmm0, %xmm0 # sched: [1:0.50] -; BROADWELL-NEXT: vpminsd (%rdi), %xmm0, %xmm0 # sched: [1:0.50] -; BROADWELL-NEXT: retq # sched: [2:1.00] +; BROADWELL-NEXT: vpminsd (%rdi), %xmm0, %xmm0 # sched: [6:0.50] +; BROADWELL-NEXT: retq # sched: [7:1.00] ; ; SKYLAKE-LABEL: test_pminsd: ; SKYLAKE: # BB#0: @@ -1782,8 +1782,8 @@ define <4 x i32> @test_pminud(<4 x i32> %a0, <4 x i32> %a1, <4 x i32> *%a2) { ; BROADWELL-LABEL: test_pminud: ; BROADWELL: # BB#0: ; BROADWELL-NEXT: vpminud %xmm1, %xmm0, %xmm0 # sched: [1:0.50] -; BROADWELL-NEXT: vpminud (%rdi), %xmm0, %xmm0 # sched: [1:0.50] -; BROADWELL-NEXT: retq # sched: [2:1.00] +; BROADWELL-NEXT: vpminud (%rdi), %xmm0, %xmm0 # sched: [6:0.50] +; BROADWELL-NEXT: retq # sched: [7:1.00] ; ; SKYLAKE-LABEL: test_pminud: ; SKYLAKE: # BB#0: @@ -1843,8 +1843,8 @@ define <8 x i16> @test_pminuw(<8 x i16> %a0, <8 x i16> %a1, <8 x i16> *%a2) { ; BROADWELL-LABEL: test_pminuw: ; BROADWELL: # BB#0: ; BROADWELL-NEXT: vpminuw %xmm1, %xmm0, %xmm0 # sched: [1:0.50] -; BROADWELL-NEXT: vpminuw (%rdi), %xmm0, %xmm0 # sched: [1:0.50] -; BROADWELL-NEXT: retq # sched: [2:1.00] +; BROADWELL-NEXT: vpminuw (%rdi), %xmm0, %xmm0 # sched: [6:0.50] +; BROADWELL-NEXT: retq # sched: [7:1.00] ; ; SKYLAKE-LABEL: test_pminuw: ; SKYLAKE: # BB#0: @@ -1909,9 +1909,9 @@ define <8 x i16> @test_pmovsxbw(<16 x i8> %a0, <8 x i8> *%a1) { ; BROADWELL-LABEL: test_pmovsxbw: ; BROADWELL: # BB#0: ; BROADWELL-NEXT: vpmovsxbw %xmm0, %xmm0 # sched: [1:1.00] -; BROADWELL-NEXT: vpmovsxbw (%rdi), %xmm1 # sched: [1:1.00] +; BROADWELL-NEXT: vpmovsxbw (%rdi), %xmm1 # sched: [6:1.00] ; BROADWELL-NEXT: vpaddw %xmm1, %xmm0, %xmm0 # sched: [1:0.50] -; BROADWELL-NEXT: retq # sched: [2:1.00] +; BROADWELL-NEXT: retq # sched: [7:1.00] ; ; SKYLAKE-LABEL: test_pmovsxbw: ; SKYLAKE: # BB#0: @@ -1981,9 +1981,9 @@ define <4 x i32> @test_pmovsxbd(<16 x i8> %a0, <4 x i8> *%a1) { ; BROADWELL-LABEL: test_pmovsxbd: ; BROADWELL: # BB#0: ; BROADWELL-NEXT: vpmovsxbd %xmm0, %xmm0 # sched: [1:1.00] -; BROADWELL-NEXT: vpmovsxbd (%rdi), %xmm1 # sched: [1:1.00] +; BROADWELL-NEXT: vpmovsxbd (%rdi), %xmm1 # sched: [6:1.00] ; BROADWELL-NEXT: vpaddd %xmm1, %xmm0, %xmm0 # sched: [1:0.50] -; BROADWELL-NEXT: retq # sched: [2:1.00] +; BROADWELL-NEXT: retq # sched: [7:1.00] ; ; SKYLAKE-LABEL: test_pmovsxbd: ; SKYLAKE: # BB#0: @@ -2053,9 +2053,9 @@ define <2 x i64> @test_pmovsxbq(<16 x i8> %a0, <2 x i8> *%a1) { ; BROADWELL-LABEL: test_pmovsxbq: ; BROADWELL: # BB#0: ; BROADWELL-NEXT: vpmovsxbq %xmm0, %xmm0 # sched: [1:1.00] -; BROADWELL-NEXT: vpmovsxbq (%rdi), %xmm1 # sched: [1:1.00] +; BROADWELL-NEXT: vpmovsxbq (%rdi), %xmm1 # sched: [6:1.00] ; BROADWELL-NEXT: vpaddq %xmm1, %xmm0, %xmm0 # sched: [1:0.50] -; BROADWELL-NEXT: retq # sched: [2:1.00] +; BROADWELL-NEXT: retq # sched: [7:1.00] ; ; SKYLAKE-LABEL: test_pmovsxbq: ; SKYLAKE: # BB#0: @@ -2125,9 +2125,9 @@ define <2 x i64> @test_pmovsxdq(<4 x i32> %a0, <2 x i32> *%a1) { ; BROADWELL-LABEL: test_pmovsxdq: ; BROADWELL: # BB#0: ; BROADWELL-NEXT: vpmovsxdq %xmm0, %xmm0 # sched: [1:1.00] -; BROADWELL-NEXT: vpmovsxdq (%rdi), %xmm1 # sched: [1:1.00] +; BROADWELL-NEXT: vpmovsxdq (%rdi), %xmm1 # sched: [6:1.00] ; BROADWELL-NEXT: vpaddq %xmm1, %xmm0, %xmm0 # sched: [1:0.50] -; BROADWELL-NEXT: retq # sched: [2:1.00] +; BROADWELL-NEXT: retq # sched: [7:1.00] ; ; SKYLAKE-LABEL: test_pmovsxdq: ; SKYLAKE: # BB#0: @@ -2197,9 +2197,9 @@ define <4 x i32> @test_pmovsxwd(<8 x i16> %a0, <4 x i16> *%a1) { ; BROADWELL-LABEL: test_pmovsxwd: ; BROADWELL: # BB#0: ; BROADWELL-NEXT: vpmovsxwd %xmm0, %xmm0 # sched: [1:1.00] -; BROADWELL-NEXT: vpmovsxwd (%rdi), %xmm1 # sched: [1:1.00] +; BROADWELL-NEXT: vpmovsxwd (%rdi), %xmm1 # sched: [6:1.00] ; BROADWELL-NEXT: vpaddd %xmm1, %xmm0, %xmm0 # sched: [1:0.50] -; BROADWELL-NEXT: retq # sched: [2:1.00] +; BROADWELL-NEXT: retq # sched: [7:1.00] ; ; SKYLAKE-LABEL: test_pmovsxwd: ; SKYLAKE: # BB#0: @@ -2269,9 +2269,9 @@ define <2 x i64> @test_pmovsxwq(<8 x i16> %a0, <2 x i16> *%a1) { ; BROADWELL-LABEL: test_pmovsxwq: ; BROADWELL: # BB#0: ; BROADWELL-NEXT: vpmovsxwq %xmm0, %xmm0 # sched: [1:1.00] -; BROADWELL-NEXT: vpmovsxwq (%rdi), %xmm1 # sched: [1:1.00] +; BROADWELL-NEXT: vpmovsxwq (%rdi), %xmm1 # sched: [6:1.00] ; BROADWELL-NEXT: vpaddq %xmm1, %xmm0, %xmm0 # sched: [1:0.50] -; BROADWELL-NEXT: retq # sched: [2:1.00] +; BROADWELL-NEXT: retq # sched: [7:1.00] ; ; SKYLAKE-LABEL: test_pmovsxwq: ; SKYLAKE: # BB#0: @@ -2341,9 +2341,9 @@ define <8 x i16> @test_pmovzxbw(<16 x i8> %a0, <8 x i8> *%a1) { ; BROADWELL-LABEL: test_pmovzxbw: ; BROADWELL: # BB#0: ; BROADWELL-NEXT: vpmovzxbw {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero sched: [1:1.00] -; BROADWELL-NEXT: vpmovzxbw {{.*#+}} xmm1 = mem[0],zero,mem[1],zero,mem[2],zero,mem[3],zero,mem[4],zero,mem[5],zero,mem[6],zero,mem[7],zero sched: [1:1.00] +; BROADWELL-NEXT: vpmovzxbw {{.*#+}} xmm1 = mem[0],zero,mem[1],zero,mem[2],zero,mem[3],zero,mem[4],zero,mem[5],zero,mem[6],zero,mem[7],zero sched: [6:1.00] ; BROADWELL-NEXT: vpaddw %xmm1, %xmm0, %xmm0 # sched: [1:0.50] -; BROADWELL-NEXT: retq # sched: [2:1.00] +; BROADWELL-NEXT: retq # sched: [7:1.00] ; ; SKYLAKE-LABEL: test_pmovzxbw: ; SKYLAKE: # BB#0: @@ -2413,9 +2413,9 @@ define <4 x i32> @test_pmovzxbd(<16 x i8> %a0, <4 x i8> *%a1) { ; BROADWELL-LABEL: test_pmovzxbd: ; BROADWELL: # BB#0: ; BROADWELL-NEXT: vpmovzxbd {{.*#+}} xmm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero sched: [1:1.00] -; BROADWELL-NEXT: vpmovzxbd {{.*#+}} xmm1 = mem[0],zero,zero,zero,mem[1],zero,zero,zero,mem[2],zero,zero,zero,mem[3],zero,zero,zero sched: [1:1.00] +; BROADWELL-NEXT: vpmovzxbd {{.*#+}} xmm1 = mem[0],zero,zero,zero,mem[1],zero,zero,zero,mem[2],zero,zero,zero,mem[3],zero,zero,zero sched: [6:1.00] ; BROADWELL-NEXT: vpaddd %xmm1, %xmm0, %xmm0 # sched: [1:0.50] -; BROADWELL-NEXT: retq # sched: [2:1.00] +; BROADWELL-NEXT: retq # sched: [7:1.00] ; ; SKYLAKE-LABEL: test_pmovzxbd: ; SKYLAKE: # BB#0: @@ -2485,9 +2485,9 @@ define <2 x i64> @test_pmovzxbq(<16 x i8> %a0, <2 x i8> *%a1) { ; BROADWELL-LABEL: test_pmovzxbq: ; BROADWELL: # BB#0: ; BROADWELL-NEXT: vpmovzxbq {{.*#+}} xmm0 = xmm0[0],zero,zero,zero,zero,zero,zero,zero,xmm0[1],zero,zero,zero,zero,zero,zero,zero sched: [1:1.00] -; BROADWELL-NEXT: vpmovzxbq {{.*#+}} xmm1 = mem[0],zero,zero,zero,zero,zero,zero,zero,mem[1],zero,zero,zero,zero,zero,zero,zero sched: [1:1.00] +; BROADWELL-NEXT: vpmovzxbq {{.*#+}} xmm1 = mem[0],zero,zero,zero,zero,zero,zero,zero,mem[1],zero,zero,zero,zero,zero,zero,zero sched: [6:1.00] ; BROADWELL-NEXT: vpaddq %xmm1, %xmm0, %xmm0 # sched: [1:0.50] -; BROADWELL-NEXT: retq # sched: [2:1.00] +; BROADWELL-NEXT: retq # sched: [7:1.00] ; ; SKYLAKE-LABEL: test_pmovzxbq: ; SKYLAKE: # BB#0: @@ -2557,9 +2557,9 @@ define <2 x i64> @test_pmovzxdq(<4 x i32> %a0, <2 x i32> *%a1) { ; BROADWELL-LABEL: test_pmovzxdq: ; BROADWELL: # BB#0: ; BROADWELL-NEXT: vpmovzxdq {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero sched: [1:1.00] -; BROADWELL-NEXT: vpmovzxdq {{.*#+}} xmm1 = mem[0],zero,mem[1],zero sched: [1:1.00] +; BROADWELL-NEXT: vpmovzxdq {{.*#+}} xmm1 = mem[0],zero,mem[1],zero sched: [6:1.00] ; BROADWELL-NEXT: vpaddq %xmm1, %xmm0, %xmm0 # sched: [1:0.50] -; BROADWELL-NEXT: retq # sched: [2:1.00] +; BROADWELL-NEXT: retq # sched: [7:1.00] ; ; SKYLAKE-LABEL: test_pmovzxdq: ; SKYLAKE: # BB#0: @@ -2629,9 +2629,9 @@ define <4 x i32> @test_pmovzxwd(<8 x i16> %a0, <4 x i16> *%a1) { ; BROADWELL-LABEL: test_pmovzxwd: ; BROADWELL: # BB#0: ; BROADWELL-NEXT: vpmovzxwd {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero sched: [1:1.00] -; BROADWELL-NEXT: vpmovzxwd {{.*#+}} xmm1 = mem[0],zero,mem[1],zero,mem[2],zero,mem[3],zero sched: [1:1.00] +; BROADWELL-NEXT: vpmovzxwd {{.*#+}} xmm1 = mem[0],zero,mem[1],zero,mem[2],zero,mem[3],zero sched: [6:1.00] ; BROADWELL-NEXT: vpaddd %xmm1, %xmm0, %xmm0 # sched: [1:0.50] -; BROADWELL-NEXT: retq # sched: [2:1.00] +; BROADWELL-NEXT: retq # sched: [7:1.00] ; ; SKYLAKE-LABEL: test_pmovzxwd: ; SKYLAKE: # BB#0: @@ -2701,9 +2701,9 @@ define <2 x i64> @test_pmovzxwq(<8 x i16> %a0, <2 x i16> *%a1) { ; BROADWELL-LABEL: test_pmovzxwq: ; BROADWELL: # BB#0: ; BROADWELL-NEXT: vpmovzxwq {{.*#+}} xmm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero sched: [1:1.00] -; BROADWELL-NEXT: vpmovzxwq {{.*#+}} xmm1 = mem[0],zero,zero,zero,mem[1],zero,zero,zero sched: [1:1.00] +; BROADWELL-NEXT: vpmovzxwq {{.*#+}} xmm1 = mem[0],zero,zero,zero,mem[1],zero,zero,zero sched: [6:1.00] ; BROADWELL-NEXT: vpaddq %xmm1, %xmm0, %xmm0 # sched: [1:0.50] -; BROADWELL-NEXT: retq # sched: [2:1.00] +; BROADWELL-NEXT: retq # sched: [7:1.00] ; ; SKYLAKE-LABEL: test_pmovzxwq: ; SKYLAKE: # BB#0: @@ -2768,8 +2768,8 @@ define <2 x i64> @test_pmuldq(<4 x i32> %a0, <4 x i32> %a1, <4 x i32> *%a2) { ; BROADWELL-LABEL: test_pmuldq: ; BROADWELL: # BB#0: ; BROADWELL-NEXT: vpmuldq %xmm1, %xmm0, %xmm0 # sched: [5:1.00] -; BROADWELL-NEXT: vpmuldq (%rdi), %xmm0, %xmm0 # sched: [5:1.00] -; BROADWELL-NEXT: retq # sched: [2:1.00] +; BROADWELL-NEXT: vpmuldq (%rdi), %xmm0, %xmm0 # sched: [10:1.00] +; BROADWELL-NEXT: retq # sched: [7:1.00] ; ; SKYLAKE-LABEL: test_pmuldq: ; SKYLAKE: # BB#0: @@ -2830,8 +2830,8 @@ define <4 x i32> @test_pmulld(<4 x i32> %a0, <4 x i32> %a1, <4 x i32> *%a2) { ; BROADWELL-LABEL: test_pmulld: ; BROADWELL: # BB#0: ; BROADWELL-NEXT: vpmulld %xmm1, %xmm0, %xmm0 # sched: [10:2.00] -; BROADWELL-NEXT: vpmulld (%rdi), %xmm0, %xmm0 # sched: [10:2.00] -; BROADWELL-NEXT: retq # sched: [2:1.00] +; BROADWELL-NEXT: vpmulld (%rdi), %xmm0, %xmm0 # sched: [15:2.00] +; BROADWELL-NEXT: retq # sched: [7:1.00] ; ; SKYLAKE-LABEL: test_pmulld: ; SKYLAKE: # BB#0: @@ -2907,11 +2907,11 @@ define i32 @test_ptest(<2 x i64> %a0, <2 x i64> %a1, <2 x i64> *%a2) { ; BROADWELL: # BB#0: ; BROADWELL-NEXT: vptest %xmm1, %xmm0 # sched: [2:1.00] ; BROADWELL-NEXT: setb %al # sched: [1:0.50] -; BROADWELL-NEXT: vptest (%rdi), %xmm0 # sched: [2:1.00] +; BROADWELL-NEXT: vptest (%rdi), %xmm0 # sched: [7:1.00] ; BROADWELL-NEXT: setb %cl # sched: [1:0.50] ; BROADWELL-NEXT: andb %al, %cl # sched: [1:0.25] ; BROADWELL-NEXT: movzbl %cl, %eax # sched: [1:0.25] -; BROADWELL-NEXT: retq # sched: [2:1.00] +; BROADWELL-NEXT: retq # sched: [7:1.00] ; ; SKYLAKE-LABEL: test_ptest: ; SKYLAKE: # BB#0: @@ -2992,10 +2992,10 @@ define <2 x double> @test_roundpd(<2 x double> %a0, <2 x double> *%a1) { ; ; BROADWELL-LABEL: test_roundpd: ; BROADWELL: # BB#0: -; BROADWELL-NEXT: vroundpd $7, %xmm0, %xmm0 # sched: [5:1.25] -; BROADWELL-NEXT: vroundpd $7, (%rdi), %xmm1 # sched: [6:2.00] +; BROADWELL-NEXT: vroundpd $7, %xmm0, %xmm0 # sched: [6:0.50] +; BROADWELL-NEXT: vroundpd $7, (%rdi), %xmm1 # sched: [11:2.00] ; BROADWELL-NEXT: vaddpd %xmm1, %xmm0, %xmm0 # sched: [3:1.00] -; BROADWELL-NEXT: retq # sched: [2:1.00] +; BROADWELL-NEXT: retq # sched: [7:1.00] ; ; SKYLAKE-LABEL: test_roundpd: ; SKYLAKE: # BB#0: @@ -3064,10 +3064,10 @@ define <4 x float> @test_roundps(<4 x float> %a0, <4 x float> *%a1) { ; ; BROADWELL-LABEL: test_roundps: ; BROADWELL: # BB#0: -; BROADWELL-NEXT: vroundps $7, %xmm0, %xmm0 # sched: [5:1.25] -; BROADWELL-NEXT: vroundps $7, (%rdi), %xmm1 # sched: [6:2.00] +; BROADWELL-NEXT: vroundps $7, %xmm0, %xmm0 # sched: [6:0.50] +; BROADWELL-NEXT: vroundps $7, (%rdi), %xmm1 # sched: [11:2.00] ; BROADWELL-NEXT: vaddps %xmm1, %xmm0, %xmm0 # sched: [3:1.00] -; BROADWELL-NEXT: retq # sched: [2:1.00] +; BROADWELL-NEXT: retq # sched: [7:1.00] ; ; SKYLAKE-LABEL: test_roundps: ; SKYLAKE: # BB#0: @@ -3137,10 +3137,10 @@ define <2 x double> @test_roundsd(<2 x double> %a0, <2 x double> %a1, <2 x doubl ; ; BROADWELL-LABEL: test_roundsd: ; BROADWELL: # BB#0: -; BROADWELL-NEXT: vroundsd $7, %xmm1, %xmm0, %xmm1 # sched: [5:1.25] -; BROADWELL-NEXT: vroundsd $7, (%rdi), %xmm0, %xmm0 # sched: [6:2.00] +; BROADWELL-NEXT: vroundsd $7, %xmm1, %xmm0, %xmm1 # sched: [6:0.50] +; BROADWELL-NEXT: vroundsd $7, (%rdi), %xmm0, %xmm0 # sched: [11:2.00] ; BROADWELL-NEXT: vaddpd %xmm0, %xmm1, %xmm0 # sched: [3:1.00] -; BROADWELL-NEXT: retq # sched: [2:1.00] +; BROADWELL-NEXT: retq # sched: [7:1.00] ; ; SKYLAKE-LABEL: test_roundsd: ; SKYLAKE: # BB#0: @@ -3210,10 +3210,10 @@ define <4 x float> @test_roundss(<4 x float> %a0, <4 x float> %a1, <4 x float> * ; ; BROADWELL-LABEL: test_roundss: ; BROADWELL: # BB#0: -; BROADWELL-NEXT: vroundss $7, %xmm1, %xmm0, %xmm1 # sched: [5:1.25] -; BROADWELL-NEXT: vroundss $7, (%rdi), %xmm0, %xmm0 # sched: [6:2.00] +; BROADWELL-NEXT: vroundss $7, %xmm1, %xmm0, %xmm1 # sched: [6:0.50] +; BROADWELL-NEXT: vroundss $7, (%rdi), %xmm0, %xmm0 # sched: [11:2.00] ; BROADWELL-NEXT: vaddps %xmm0, %xmm1, %xmm0 # sched: [3:1.00] -; BROADWELL-NEXT: retq # sched: [2:1.00] +; BROADWELL-NEXT: retq # sched: [7:1.00] ; ; SKYLAKE-LABEL: test_roundss: ; SKYLAKE: # BB#0: diff --git a/llvm/test/CodeGen/X86/sse42-schedule.ll b/llvm/test/CodeGen/X86/sse42-schedule.ll index e900234299dd..419395c793df 100644 --- a/llvm/test/CodeGen/X86/sse42-schedule.ll +++ b/llvm/test/CodeGen/X86/sse42-schedule.ll @@ -42,9 +42,9 @@ define i32 @crc32_32_8(i32 %a0, i8 %a1, i8 *%a2) { ; BROADWELL-LABEL: crc32_32_8: ; BROADWELL: # BB#0: ; BROADWELL-NEXT: crc32b %sil, %edi # sched: [3:1.00] -; BROADWELL-NEXT: crc32b (%rdx), %edi # sched: [7:1.00] +; BROADWELL-NEXT: crc32b (%rdx), %edi # sched: [8:1.00] ; BROADWELL-NEXT: movl %edi, %eax # sched: [1:0.25] -; BROADWELL-NEXT: retq # sched: [2:1.00] +; BROADWELL-NEXT: retq # sched: [7:1.00] ; ; SKYLAKE-LABEL: crc32_32_8: ; SKYLAKE: # BB#0: @@ -112,9 +112,9 @@ define i32 @crc32_32_16(i32 %a0, i16 %a1, i16 *%a2) { ; BROADWELL-LABEL: crc32_32_16: ; BROADWELL: # BB#0: ; BROADWELL-NEXT: crc32w %si, %edi # sched: [3:1.00] -; BROADWELL-NEXT: crc32w (%rdx), %edi # sched: [7:1.00] +; BROADWELL-NEXT: crc32w (%rdx), %edi # sched: [8:1.00] ; BROADWELL-NEXT: movl %edi, %eax # sched: [1:0.25] -; BROADWELL-NEXT: retq # sched: [2:1.00] +; BROADWELL-NEXT: retq # sched: [7:1.00] ; ; SKYLAKE-LABEL: crc32_32_16: ; SKYLAKE: # BB#0: @@ -182,9 +182,9 @@ define i32 @crc32_32_32(i32 %a0, i32 %a1, i32 *%a2) { ; BROADWELL-LABEL: crc32_32_32: ; BROADWELL: # BB#0: ; BROADWELL-NEXT: crc32l %esi, %edi # sched: [3:1.00] -; BROADWELL-NEXT: crc32l (%rdx), %edi # sched: [7:1.00] +; BROADWELL-NEXT: crc32l (%rdx), %edi # sched: [8:1.00] ; BROADWELL-NEXT: movl %edi, %eax # sched: [1:0.25] -; BROADWELL-NEXT: retq # sched: [2:1.00] +; BROADWELL-NEXT: retq # sched: [7:1.00] ; ; SKYLAKE-LABEL: crc32_32_32: ; SKYLAKE: # BB#0: @@ -252,9 +252,9 @@ define i64 @crc32_64_8(i64 %a0, i8 %a1, i8 *%a2) nounwind { ; BROADWELL-LABEL: crc32_64_8: ; BROADWELL: # BB#0: ; BROADWELL-NEXT: crc32b %sil, %edi # sched: [3:1.00] -; BROADWELL-NEXT: crc32b (%rdx), %edi # sched: [7:1.00] +; BROADWELL-NEXT: crc32b (%rdx), %edi # sched: [8:1.00] ; BROADWELL-NEXT: movq %rdi, %rax # sched: [1:0.25] -; BROADWELL-NEXT: retq # sched: [2:1.00] +; BROADWELL-NEXT: retq # sched: [7:1.00] ; ; SKYLAKE-LABEL: crc32_64_8: ; SKYLAKE: # BB#0: @@ -322,9 +322,9 @@ define i64 @crc32_64_64(i64 %a0, i64 %a1, i64 *%a2) { ; BROADWELL-LABEL: crc32_64_64: ; BROADWELL: # BB#0: ; BROADWELL-NEXT: crc32q %rsi, %rdi # sched: [3:1.00] -; BROADWELL-NEXT: crc32q (%rdx), %rdi # sched: [7:1.00] +; BROADWELL-NEXT: crc32q (%rdx), %rdi # sched: [8:1.00] ; BROADWELL-NEXT: movq %rdi, %rax # sched: [1:0.25] -; BROADWELL-NEXT: retq # sched: [2:1.00] +; BROADWELL-NEXT: retq # sched: [7:1.00] ; ; SKYLAKE-LABEL: crc32_64_64: ; SKYLAKE: # BB#0: @@ -421,10 +421,10 @@ define i32 @test_pcmpestri(<16 x i8> %a0, <16 x i8> %a1, <16 x i8> *%a2) { ; BROADWELL-NEXT: movl %ecx, %esi # sched: [1:0.25] ; BROADWELL-NEXT: movl $7, %eax # sched: [1:0.25] ; BROADWELL-NEXT: movl $7, %edx # sched: [1:0.25] -; BROADWELL-NEXT: vpcmpestri $7, (%rdi), %xmm0 # sched: [18:4.00] +; BROADWELL-NEXT: vpcmpestri $7, (%rdi), %xmm0 # sched: [23:4.00] ; BROADWELL-NEXT: # kill: %ECX %ECX %RCX ; BROADWELL-NEXT: leal (%rcx,%rsi), %eax # sched: [1:0.50] -; BROADWELL-NEXT: retq # sched: [2:1.00] +; BROADWELL-NEXT: retq # sched: [7:1.00] ; ; SKYLAKE-LABEL: test_pcmpestri: ; SKYLAKE: # BB#0: @@ -533,8 +533,8 @@ define <16 x i8> @test_pcmpestrm(<16 x i8> %a0, <16 x i8> %a1, <16 x i8> *%a2) { ; BROADWELL-NEXT: vpcmpestrm $7, %xmm1, %xmm0 # sched: [19:4.00] ; BROADWELL-NEXT: movl $7, %eax # sched: [1:0.25] ; BROADWELL-NEXT: movl $7, %edx # sched: [1:0.25] -; BROADWELL-NEXT: vpcmpestrm $7, (%rdi), %xmm0 # sched: [19:4.00] -; BROADWELL-NEXT: retq # sched: [2:1.00] +; BROADWELL-NEXT: vpcmpestrm $7, (%rdi), %xmm0 # sched: [24:4.00] +; BROADWELL-NEXT: retq # sched: [7:1.00] ; ; SKYLAKE-LABEL: test_pcmpestrm: ; SKYLAKE: # BB#0: @@ -623,10 +623,10 @@ define i32 @test_pcmpistri(<16 x i8> %a0, <16 x i8> %a1, <16 x i8> *%a2) { ; BROADWELL: # BB#0: ; BROADWELL-NEXT: vpcmpistri $7, %xmm1, %xmm0 # sched: [11:3.00] ; BROADWELL-NEXT: movl %ecx, %eax # sched: [1:0.25] -; BROADWELL-NEXT: vpcmpistri $7, (%rdi), %xmm0 # sched: [11:3.00] +; BROADWELL-NEXT: vpcmpistri $7, (%rdi), %xmm0 # sched: [16:3.00] ; BROADWELL-NEXT: # kill: %ECX %ECX %RCX ; BROADWELL-NEXT: leal (%rcx,%rax), %eax # sched: [1:0.50] -; BROADWELL-NEXT: retq # sched: [2:1.00] +; BROADWELL-NEXT: retq # sched: [7:1.00] ; ; SKYLAKE-LABEL: test_pcmpistri: ; SKYLAKE: # BB#0: @@ -699,8 +699,8 @@ define <16 x i8> @test_pcmpistrm(<16 x i8> %a0, <16 x i8> %a1, <16 x i8> *%a2) { ; BROADWELL-LABEL: test_pcmpistrm: ; BROADWELL: # BB#0: ; BROADWELL-NEXT: vpcmpistrm $7, %xmm1, %xmm0 # sched: [11:3.00] -; BROADWELL-NEXT: vpcmpistrm $7, (%rdi), %xmm0 # sched: [11:3.00] -; BROADWELL-NEXT: retq # sched: [2:1.00] +; BROADWELL-NEXT: vpcmpistrm $7, (%rdi), %xmm0 # sched: [16:3.00] +; BROADWELL-NEXT: retq # sched: [7:1.00] ; ; SKYLAKE-LABEL: test_pcmpistrm: ; SKYLAKE: # BB#0: @@ -760,8 +760,8 @@ define <2 x i64> @test_pcmpgtq(<2 x i64> %a0, <2 x i64> %a1, <2 x i64> *%a2) { ; BROADWELL-LABEL: test_pcmpgtq: ; BROADWELL: # BB#0: ; BROADWELL-NEXT: vpcmpgtq %xmm1, %xmm0, %xmm0 # sched: [5:1.00] -; BROADWELL-NEXT: vpcmpgtq (%rdi), %xmm0, %xmm0 # sched: [5:1.00] -; BROADWELL-NEXT: retq # sched: [2:1.00] +; BROADWELL-NEXT: vpcmpgtq (%rdi), %xmm0, %xmm0 # sched: [10:1.00] +; BROADWELL-NEXT: retq # sched: [7:1.00] ; ; SKYLAKE-LABEL: test_pcmpgtq: ; SKYLAKE: # BB#0: @@ -823,9 +823,9 @@ define <2 x i64> @test_pclmulqdq(<2 x i64> %a0, <2 x i64> %a1, <2 x i64> *%a2) { ; ; BROADWELL-LABEL: test_pclmulqdq: ; BROADWELL: # BB#0: -; BROADWELL-NEXT: vpclmulqdq $0, %xmm1, %xmm0, %xmm0 # sched: [11:2.00] -; BROADWELL-NEXT: vpclmulqdq $0, (%rdi), %xmm0, %xmm0 # sched: [11:2.00] -; BROADWELL-NEXT: retq # sched: [2:1.00] +; BROADWELL-NEXT: vpclmulqdq $0, %xmm1, %xmm0, %xmm0 # sched: [5:1.00] +; BROADWELL-NEXT: vpclmulqdq $0, (%rdi), %xmm0, %xmm0 # sched: [10:1.00] +; BROADWELL-NEXT: retq # sched: [7:1.00] ; ; SKYLAKE-LABEL: test_pclmulqdq: ; SKYLAKE: # BB#0: diff --git a/llvm/test/CodeGen/X86/ssse3-schedule.ll b/llvm/test/CodeGen/X86/ssse3-schedule.ll index 7772a880ba8e..4ac10745e878 100644 --- a/llvm/test/CodeGen/X86/ssse3-schedule.ll +++ b/llvm/test/CodeGen/X86/ssse3-schedule.ll @@ -51,9 +51,9 @@ define <16 x i8> @test_pabsb(<16 x i8> %a0, <16 x i8> *%a1) { ; BROADWELL-LABEL: test_pabsb: ; BROADWELL: # BB#0: ; BROADWELL-NEXT: vpabsb %xmm0, %xmm0 # sched: [1:0.50] -; BROADWELL-NEXT: vpabsb (%rdi), %xmm1 # sched: [1:0.50] +; BROADWELL-NEXT: vpabsb (%rdi), %xmm1 # sched: [6:0.50] ; BROADWELL-NEXT: vpor %xmm1, %xmm0, %xmm0 # sched: [1:0.33] -; BROADWELL-NEXT: retq # sched: [2:1.00] +; BROADWELL-NEXT: retq # sched: [7:1.00] ; ; SKYLAKE-LABEL: test_pabsb: ; SKYLAKE: # BB#0: @@ -130,9 +130,9 @@ define <4 x i32> @test_pabsd(<4 x i32> %a0, <4 x i32> *%a1) { ; BROADWELL-LABEL: test_pabsd: ; BROADWELL: # BB#0: ; BROADWELL-NEXT: vpabsd %xmm0, %xmm0 # sched: [1:0.50] -; BROADWELL-NEXT: vpabsd (%rdi), %xmm1 # sched: [1:0.50] +; BROADWELL-NEXT: vpabsd (%rdi), %xmm1 # sched: [6:0.50] ; BROADWELL-NEXT: vpor %xmm1, %xmm0, %xmm0 # sched: [1:0.33] -; BROADWELL-NEXT: retq # sched: [2:1.00] +; BROADWELL-NEXT: retq # sched: [7:1.00] ; ; SKYLAKE-LABEL: test_pabsd: ; SKYLAKE: # BB#0: @@ -209,9 +209,9 @@ define <8 x i16> @test_pabsw(<8 x i16> %a0, <8 x i16> *%a1) { ; BROADWELL-LABEL: test_pabsw: ; BROADWELL: # BB#0: ; BROADWELL-NEXT: vpabsw %xmm0, %xmm0 # sched: [1:0.50] -; BROADWELL-NEXT: vpabsw (%rdi), %xmm1 # sched: [1:0.50] +; BROADWELL-NEXT: vpabsw (%rdi), %xmm1 # sched: [6:0.50] ; BROADWELL-NEXT: vpor %xmm1, %xmm0, %xmm0 # sched: [1:0.33] -; BROADWELL-NEXT: retq # sched: [2:1.00] +; BROADWELL-NEXT: retq # sched: [7:1.00] ; ; SKYLAKE-LABEL: test_pabsw: ; SKYLAKE: # BB#0: @@ -287,8 +287,8 @@ define <8 x i16> @test_palignr(<8 x i16> %a0, <8 x i16> %a1, <8 x i16> *%a2) { ; BROADWELL-LABEL: test_palignr: ; BROADWELL: # BB#0: ; BROADWELL-NEXT: vpalignr {{.*#+}} xmm0 = xmm0[6,7,8,9,10,11,12,13,14,15],xmm1[0,1,2,3,4,5] sched: [1:1.00] -; BROADWELL-NEXT: vpalignr {{.*#+}} xmm0 = mem[14,15],xmm0[0,1,2,3,4,5,6,7,8,9,10,11,12,13] sched: [1:1.00] -; BROADWELL-NEXT: retq # sched: [2:1.00] +; BROADWELL-NEXT: vpalignr {{.*#+}} xmm0 = mem[14,15],xmm0[0,1,2,3,4,5,6,7,8,9,10,11,12,13] sched: [6:1.00] +; BROADWELL-NEXT: retq # sched: [7:1.00] ; ; SKYLAKE-LABEL: test_palignr: ; SKYLAKE: # BB#0: @@ -353,8 +353,8 @@ define <4 x i32> @test_phaddd(<4 x i32> %a0, <4 x i32> %a1, <4 x i32> *%a2) { ; BROADWELL-LABEL: test_phaddd: ; BROADWELL: # BB#0: ; BROADWELL-NEXT: vphaddd %xmm1, %xmm0, %xmm0 # sched: [3:2.00] -; BROADWELL-NEXT: vphaddd (%rdi), %xmm0, %xmm0 # sched: [3:2.00] -; BROADWELL-NEXT: retq # sched: [2:1.00] +; BROADWELL-NEXT: vphaddd (%rdi), %xmm0, %xmm0 # sched: [8:2.00] +; BROADWELL-NEXT: retq # sched: [7:1.00] ; ; SKYLAKE-LABEL: test_phaddd: ; SKYLAKE: # BB#0: @@ -420,8 +420,8 @@ define <8 x i16> @test_phaddsw(<8 x i16> %a0, <8 x i16> %a1, <8 x i16> *%a2) { ; BROADWELL-LABEL: test_phaddsw: ; BROADWELL: # BB#0: ; BROADWELL-NEXT: vphaddsw %xmm1, %xmm0, %xmm0 # sched: [3:2.00] -; BROADWELL-NEXT: vphaddsw (%rdi), %xmm0, %xmm0 # sched: [3:2.00] -; BROADWELL-NEXT: retq # sched: [2:1.00] +; BROADWELL-NEXT: vphaddsw (%rdi), %xmm0, %xmm0 # sched: [8:2.00] +; BROADWELL-NEXT: retq # sched: [7:1.00] ; ; SKYLAKE-LABEL: test_phaddsw: ; SKYLAKE: # BB#0: @@ -487,8 +487,8 @@ define <8 x i16> @test_phaddw(<8 x i16> %a0, <8 x i16> %a1, <8 x i16> *%a2) { ; BROADWELL-LABEL: test_phaddw: ; BROADWELL: # BB#0: ; BROADWELL-NEXT: vphaddw %xmm1, %xmm0, %xmm0 # sched: [3:2.00] -; BROADWELL-NEXT: vphaddw (%rdi), %xmm0, %xmm0 # sched: [3:2.00] -; BROADWELL-NEXT: retq # sched: [2:1.00] +; BROADWELL-NEXT: vphaddw (%rdi), %xmm0, %xmm0 # sched: [8:2.00] +; BROADWELL-NEXT: retq # sched: [7:1.00] ; ; SKYLAKE-LABEL: test_phaddw: ; SKYLAKE: # BB#0: @@ -554,8 +554,8 @@ define <4 x i32> @test_phsubd(<4 x i32> %a0, <4 x i32> %a1, <4 x i32> *%a2) { ; BROADWELL-LABEL: test_phsubd: ; BROADWELL: # BB#0: ; BROADWELL-NEXT: vphsubd %xmm1, %xmm0, %xmm0 # sched: [3:2.00] -; BROADWELL-NEXT: vphsubd (%rdi), %xmm0, %xmm0 # sched: [3:2.00] -; BROADWELL-NEXT: retq # sched: [2:1.00] +; BROADWELL-NEXT: vphsubd (%rdi), %xmm0, %xmm0 # sched: [8:2.00] +; BROADWELL-NEXT: retq # sched: [7:1.00] ; ; SKYLAKE-LABEL: test_phsubd: ; SKYLAKE: # BB#0: @@ -621,8 +621,8 @@ define <8 x i16> @test_phsubsw(<8 x i16> %a0, <8 x i16> %a1, <8 x i16> *%a2) { ; BROADWELL-LABEL: test_phsubsw: ; BROADWELL: # BB#0: ; BROADWELL-NEXT: vphsubsw %xmm1, %xmm0, %xmm0 # sched: [3:2.00] -; BROADWELL-NEXT: vphsubsw (%rdi), %xmm0, %xmm0 # sched: [3:2.00] -; BROADWELL-NEXT: retq # sched: [2:1.00] +; BROADWELL-NEXT: vphsubsw (%rdi), %xmm0, %xmm0 # sched: [8:2.00] +; BROADWELL-NEXT: retq # sched: [7:1.00] ; ; SKYLAKE-LABEL: test_phsubsw: ; SKYLAKE: # BB#0: @@ -688,8 +688,8 @@ define <8 x i16> @test_phsubw(<8 x i16> %a0, <8 x i16> %a1, <8 x i16> *%a2) { ; BROADWELL-LABEL: test_phsubw: ; BROADWELL: # BB#0: ; BROADWELL-NEXT: vphsubw %xmm1, %xmm0, %xmm0 # sched: [3:2.00] -; BROADWELL-NEXT: vphsubw (%rdi), %xmm0, %xmm0 # sched: [3:2.00] -; BROADWELL-NEXT: retq # sched: [2:1.00] +; BROADWELL-NEXT: vphsubw (%rdi), %xmm0, %xmm0 # sched: [8:2.00] +; BROADWELL-NEXT: retq # sched: [7:1.00] ; ; SKYLAKE-LABEL: test_phsubw: ; SKYLAKE: # BB#0: @@ -755,8 +755,8 @@ define <8 x i16> @test_pmaddubsw(<16 x i8> %a0, <16 x i8> %a1, <16 x i8> *%a2) { ; BROADWELL-LABEL: test_pmaddubsw: ; BROADWELL: # BB#0: ; BROADWELL-NEXT: vpmaddubsw %xmm1, %xmm0, %xmm0 # sched: [5:1.00] -; BROADWELL-NEXT: vpmaddubsw (%rdi), %xmm0, %xmm0 # sched: [5:1.00] -; BROADWELL-NEXT: retq # sched: [2:1.00] +; BROADWELL-NEXT: vpmaddubsw (%rdi), %xmm0, %xmm0 # sched: [10:1.00] +; BROADWELL-NEXT: retq # sched: [7:1.00] ; ; SKYLAKE-LABEL: test_pmaddubsw: ; SKYLAKE: # BB#0: @@ -823,8 +823,8 @@ define <8 x i16> @test_pmulhrsw(<8 x i16> %a0, <8 x i16> %a1, <8 x i16> *%a2) { ; BROADWELL-LABEL: test_pmulhrsw: ; BROADWELL: # BB#0: ; BROADWELL-NEXT: vpmulhrsw %xmm1, %xmm0, %xmm0 # sched: [5:1.00] -; BROADWELL-NEXT: vpmulhrsw (%rdi), %xmm0, %xmm0 # sched: [5:1.00] -; BROADWELL-NEXT: retq # sched: [2:1.00] +; BROADWELL-NEXT: vpmulhrsw (%rdi), %xmm0, %xmm0 # sched: [10:1.00] +; BROADWELL-NEXT: retq # sched: [7:1.00] ; ; SKYLAKE-LABEL: test_pmulhrsw: ; SKYLAKE: # BB#0: @@ -890,8 +890,8 @@ define <16 x i8> @test_pshufb(<16 x i8> %a0, <16 x i8> %a1, <16 x i8> *%a2) { ; BROADWELL-LABEL: test_pshufb: ; BROADWELL: # BB#0: ; BROADWELL-NEXT: vpshufb %xmm1, %xmm0, %xmm0 # sched: [1:1.00] -; BROADWELL-NEXT: vpshufb (%rdi), %xmm0, %xmm0 # sched: [1:1.00] -; BROADWELL-NEXT: retq # sched: [2:1.00] +; BROADWELL-NEXT: vpshufb (%rdi), %xmm0, %xmm0 # sched: [6:1.00] +; BROADWELL-NEXT: retq # sched: [7:1.00] ; ; SKYLAKE-LABEL: test_pshufb: ; SKYLAKE: # BB#0: @@ -961,8 +961,8 @@ define <16 x i8> @test_psignb(<16 x i8> %a0, <16 x i8> %a1, <16 x i8> *%a2) { ; BROADWELL-LABEL: test_psignb: ; BROADWELL: # BB#0: ; BROADWELL-NEXT: vpsignb %xmm1, %xmm0, %xmm0 # sched: [1:0.50] -; BROADWELL-NEXT: vpsignb (%rdi), %xmm0, %xmm0 # sched: [1:0.50] -; BROADWELL-NEXT: retq # sched: [2:1.00] +; BROADWELL-NEXT: vpsignb (%rdi), %xmm0, %xmm0 # sched: [6:0.50] +; BROADWELL-NEXT: retq # sched: [7:1.00] ; ; SKYLAKE-LABEL: test_psignb: ; SKYLAKE: # BB#0: @@ -1032,8 +1032,8 @@ define <4 x i32> @test_psignd(<4 x i32> %a0, <4 x i32> %a1, <4 x i32> *%a2) { ; BROADWELL-LABEL: test_psignd: ; BROADWELL: # BB#0: ; BROADWELL-NEXT: vpsignd %xmm1, %xmm0, %xmm0 # sched: [1:0.50] -; BROADWELL-NEXT: vpsignd (%rdi), %xmm0, %xmm0 # sched: [1:0.50] -; BROADWELL-NEXT: retq # sched: [2:1.00] +; BROADWELL-NEXT: vpsignd (%rdi), %xmm0, %xmm0 # sched: [6:0.50] +; BROADWELL-NEXT: retq # sched: [7:1.00] ; ; SKYLAKE-LABEL: test_psignd: ; SKYLAKE: # BB#0: @@ -1103,8 +1103,8 @@ define <8 x i16> @test_psignw(<8 x i16> %a0, <8 x i16> %a1, <8 x i16> *%a2) { ; BROADWELL-LABEL: test_psignw: ; BROADWELL: # BB#0: ; BROADWELL-NEXT: vpsignw %xmm1, %xmm0, %xmm0 # sched: [1:0.50] -; BROADWELL-NEXT: vpsignw (%rdi), %xmm0, %xmm0 # sched: [1:0.50] -; BROADWELL-NEXT: retq # sched: [2:1.00] +; BROADWELL-NEXT: vpsignw (%rdi), %xmm0, %xmm0 # sched: [6:0.50] +; BROADWELL-NEXT: retq # sched: [7:1.00] ; ; SKYLAKE-LABEL: test_psignw: ; SKYLAKE: # BB#0: