From ae11aed9d7b380ad3af49eefe7e0c0161dd294a6 Mon Sep 17 00:00:00 2001 From: Craig Topper Date: Tue, 14 Jan 2014 07:41:20 +0000 Subject: [PATCH] Separate the concept of 16-bit/32-bit operand size controlled by 0x66 prefix and the current mode from the concept of SSE instructions using 0x66 prefix as part of their encoding without being affected by the mode. This should allow SSE instructions to be encoded correctly in 16-bit mode which r198586 probably broke. llvm-svn: 199193 --- .../lib/Target/X86/MCTargetDesc/X86BaseInfo.h | 10 + .../X86/MCTargetDesc/X86MCCodeEmitter.cpp | 19 + llvm/lib/Target/X86/X86CodeEmitter.cpp | 19 + llvm/lib/Target/X86/X86InstrAVX512.td | 80 ++-- llvm/lib/Target/X86/X86InstrArithmetic.td | 8 +- llvm/lib/Target/X86/X86InstrFormats.td | 127 +++---- llvm/lib/Target/X86/X86InstrMMX.td | 6 +- llvm/lib/Target/X86/X86InstrSSE.td | 351 ++++++++---------- llvm/lib/Target/X86/X86InstrShiftRotate.td | 4 +- llvm/lib/Target/X86/X86InstrSystem.td | 4 +- llvm/lib/Target/X86/X86InstrVMX.td | 10 +- llvm/utils/TableGen/X86RecognizableInstr.cpp | 53 ++- 12 files changed, 361 insertions(+), 330 deletions(-) diff --git a/llvm/lib/Target/X86/MCTargetDesc/X86BaseInfo.h b/llvm/lib/Target/X86/MCTargetDesc/X86BaseInfo.h index 5b1cf5a1a58b..404a7e80e6ce 100644 --- a/llvm/lib/Target/X86/MCTargetDesc/X86BaseInfo.h +++ b/llvm/lib/Target/X86/MCTargetDesc/X86BaseInfo.h @@ -355,6 +355,16 @@ namespace X86II { // XOPA - Prefix to encode 0xA in VEX.MMMM of XOP instructions. XOPA = 22 << Op0Shift, + // PD - Prefix code for packed double precision vector floating point + // operations performed in the SSE registers. + PD = 23 << Op0Shift, + + // T8PD - Prefix before and after 0x0F. Combination of T8 and PD. + T8PD = 24 << Op0Shift, + + // TAPD - Prefix before and after 0x0F. Combination of TA and PD. + TAPD = 25 << Op0Shift, + //===------------------------------------------------------------------===// // REX_W - REX prefixes are instruction prefixes used in 64-bit mode. // They are used to specify GPRs and SSE registers, 64-bit operand size, diff --git a/llvm/lib/Target/X86/MCTargetDesc/X86MCCodeEmitter.cpp b/llvm/lib/Target/X86/MCTargetDesc/X86MCCodeEmitter.cpp index 48bd6f193517..bd3c00626d60 100644 --- a/llvm/lib/Target/X86/MCTargetDesc/X86MCCodeEmitter.cpp +++ b/llvm/lib/Target/X86/MCTargetDesc/X86MCCodeEmitter.cpp @@ -717,6 +717,10 @@ void X86MCCodeEmitter::EmitVEXOpcodePrefix(uint64_t TSFlags, unsigned &CurByte, case X86II::TA: // 0F 3A VEX_5M = 0x3; break; + case X86II::T8PD: // 66 0F 38 + VEX_PP = 0x1; + VEX_5M = 0x2; + break; case X86II::T8XS: // F3 0F 38 VEX_PP = 0x2; VEX_5M = 0x2; @@ -725,10 +729,17 @@ void X86MCCodeEmitter::EmitVEXOpcodePrefix(uint64_t TSFlags, unsigned &CurByte, VEX_PP = 0x3; VEX_5M = 0x2; break; + case X86II::TAPD: // 66 0F 3A + VEX_PP = 0x1; + VEX_5M = 0x3; + break; case X86II::TAXD: // F2 0F 3A VEX_PP = 0x3; VEX_5M = 0x3; break; + case X86II::PD: // 66 0F + VEX_PP = 0x1; + break; case X86II::XS: // F3 0F VEX_PP = 0x2; break; @@ -1215,6 +1226,12 @@ void X86MCCodeEmitter::EmitOpcodePrefix(uint64_t TSFlags, unsigned &CurByte, case X86II::A7: // 0F A7 Need0FPrefix = true; break; + case X86II::PD: // 66 0F + case X86II::T8PD: // 66 0F 38 + case X86II::TAPD: // 66 0F 3A + EmitByte(0x66, CurByte, OS); + Need0FPrefix = true; + break; case X86II::XS: // F3 0F case X86II::T8XS: // F3 0F 38 EmitByte(0xF3, CurByte, OS); @@ -1252,11 +1269,13 @@ void X86MCCodeEmitter::EmitOpcodePrefix(uint64_t TSFlags, unsigned &CurByte, // FIXME: Pull this up into previous switch if REX can be moved earlier. switch (TSFlags & X86II::Op0Mask) { + case X86II::T8PD: // 66 0F 38 case X86II::T8XS: // F3 0F 38 case X86II::T8XD: // F2 0F 38 case X86II::T8: // 0F 38 EmitByte(0x38, CurByte, OS); break; + case X86II::TAPD: // 66 0F 3A case X86II::TAXD: // F2 0F 3A case X86II::TA: // 0F 3A EmitByte(0x3A, CurByte, OS); diff --git a/llvm/lib/Target/X86/X86CodeEmitter.cpp b/llvm/lib/Target/X86/X86CodeEmitter.cpp index 072996679bc8..5dba4ecbfdb6 100644 --- a/llvm/lib/Target/X86/X86CodeEmitter.cpp +++ b/llvm/lib/Target/X86/X86CodeEmitter.cpp @@ -696,6 +696,12 @@ void Emitter::emitOpcodePrefix(uint64_t TSFlags, Need0FPrefix = true; break; case X86II::REP: break; // already handled. + case X86II::PD: // 66 0F + case X86II::T8PD: // 66 0F 38 + case X86II::TAPD: // 66 0F 3A + MCE.emitByte(0x66); + Need0FPrefix = true; + break; case X86II::T8XS: // F3 0F 38 case X86II::XS: // F3 0F MCE.emitByte(0xF3); @@ -728,11 +734,13 @@ void Emitter::emitOpcodePrefix(uint64_t TSFlags, MCE.emitByte(0x0F); switch (Desc->TSFlags & X86II::Op0Mask) { + case X86II::T8PD: // 66 0F 38 case X86II::T8XD: // F2 0F 38 case X86II::T8XS: // F3 0F 38 case X86II::T8: // 0F 38 MCE.emitByte(0x38); break; + case X86II::TAPD: // 66 0F 38 case X86II::TAXD: // F2 0F 38 case X86II::TA: // 0F 3A MCE.emitByte(0x3A); @@ -882,6 +890,10 @@ void Emitter::emitVEXOpcodePrefix(uint64_t TSFlags, case X86II::TA: // 0F 3A VEX_5M = 0x3; break; + case X86II::T8PD: // 66 0F 38 + VEX_PP = 0x1; + VEX_5M = 0x2; + break; case X86II::T8XS: // F3 0F 38 VEX_PP = 0x2; VEX_5M = 0x2; @@ -890,10 +902,17 @@ void Emitter::emitVEXOpcodePrefix(uint64_t TSFlags, VEX_PP = 0x3; VEX_5M = 0x2; break; + case X86II::TAPD: // 66 0F 3A + VEX_PP = 0x1; + VEX_5M = 0x3; + break; case X86II::TAXD: // F2 0F 3A VEX_PP = 0x3; VEX_5M = 0x3; break; + case X86II::PD: // 66 0F + VEX_PP = 0x1; + break; case X86II::XS: // F3 0F VEX_PP = 0x2; break; diff --git a/llvm/lib/Target/X86/X86InstrAVX512.td b/llvm/lib/Target/X86/X86InstrAVX512.td index c1c3c3cff2de..a5d7ed0b6be4 100644 --- a/llvm/lib/Target/X86/X86InstrAVX512.td +++ b/llvm/lib/Target/X86/X86InstrAVX512.td @@ -744,12 +744,12 @@ multiclass avx512_icmp_packed opc, string OpcodeStr, RegisterClass KRC, defm VPCMPEQDZ : avx512_icmp_packed<0x76, "vpcmpeqd", VK16, VR512, i512mem, memopv16i32, X86pcmpeqm, v16i32>, EVEX_V512; defm VPCMPEQQZ : avx512_icmp_packed<0x29, "vpcmpeqq", VK8, VR512, i512mem, - memopv8i64, X86pcmpeqm, v8i64>, T8, EVEX_V512, VEX_W; + memopv8i64, X86pcmpeqm, v8i64>, T8PD, EVEX_V512, VEX_W; defm VPCMPGTDZ : avx512_icmp_packed<0x66, "vpcmpgtd", VK16, VR512, i512mem, memopv16i32, X86pcmpgtm, v16i32>, EVEX_V512; defm VPCMPGTQZ : avx512_icmp_packed<0x37, "vpcmpgtq", VK8, VR512, i512mem, - memopv8i64, X86pcmpgtm, v8i64>, T8, EVEX_V512, VEX_W; + memopv8i64, X86pcmpgtm, v8i64>, T8PD, EVEX_V512, VEX_W; def : Pat<(v8i1 (X86pcmpgtm (v8i32 VR256X:$src1), (v8i32 VR256X:$src2))), (COPY_TO_REGCLASS (VPCMPGTDZrr @@ -843,7 +843,7 @@ multiclass avx512_cmp_packed, EVEX_4V, EVEX_V512, EVEX_CD8<32, CD8VF>; defm VCMPPDZ : avx512_cmp_packed, OpSize, EVEX_4V, VEX_W, EVEX_V512, + "pd", SSEPackedDouble>, PD, EVEX_4V, VEX_W, EVEX_V512, EVEX_CD8<64, CD8VF>; def : Pat<(v8i1 (X86cmpm (v8f32 VR256X:$src1), (v8f32 VR256X:$src2), imm:$cc)), @@ -1103,7 +1103,7 @@ multiclass avx512_mask_unpck opc, string OpcodeStr, multiclass avx512_mask_unpck_bw opc, string OpcodeStr> { defm BW : avx512_mask_unpck, - VEX_4V, VEX_L, OpSize, TB; + VEX_4V, VEX_L, PD; } defm KUNPCK : avx512_mask_unpck_bw<0x4b, "kunpck">; @@ -1155,7 +1155,7 @@ multiclass avx512_mask_shiftop opc, string OpcodeStr, RegisterClass KRC, multiclass avx512_mask_shiftop_w opc1, bits<8> opc2, string OpcodeStr, SDNode OpNode> { defm W : avx512_mask_shiftop, - VEX, OpSize, TA, VEX_W; + VEX, TAPD, VEX_W; } defm KSHIFTL : avx512_mask_shiftop_w<0x32, 0x33, "kshiftl", X86vshli>; @@ -1228,14 +1228,14 @@ defm VMOVAPSZ : avx512_mov_packed<0x28, VR512, VK16WM, f512mem, alignedloadv16f3 EVEX_V512, EVEX_CD8<32, CD8VF>; defm VMOVAPDZ : avx512_mov_packed<0x28, VR512, VK8WM, f512mem, alignedloadv8f64, "vmovapd", SSEPackedDouble>, - OpSize, EVEX_V512, VEX_W, + PD, EVEX_V512, VEX_W, EVEX_CD8<64, CD8VF>; defm VMOVUPSZ : avx512_mov_packed<0x10, VR512, VK16WM, f512mem, loadv16f32, "vmovups", SSEPackedSingle>, EVEX_V512, EVEX_CD8<32, CD8VF>; defm VMOVUPDZ : avx512_mov_packed<0x10, VR512, VK8WM, f512mem, loadv8f64, "vmovupd", SSEPackedDouble>, - OpSize, EVEX_V512, VEX_W, + PD, EVEX_V512, VEX_W, EVEX_CD8<64, CD8VF>; def VMOVAPSZmr : AVX512PI<0x29, MRMDestMem, (outs), (ins f512mem:$dst, VR512:$src), "vmovaps\t{$src, $dst|$dst, $src}", @@ -1245,7 +1245,7 @@ def VMOVAPDZmr : AVX512PI<0x29, MRMDestMem, (outs), (ins f512mem:$dst, VR512:$sr "vmovapd\t{$src, $dst|$dst, $src}", [(alignedstore512 (v8f64 VR512:$src), addr:$dst)], SSEPackedDouble>, EVEX, EVEX_V512, - OpSize, VEX_W, EVEX_CD8<64, CD8VF>; + PD, VEX_W, EVEX_CD8<64, CD8VF>; def VMOVUPSZmr : AVX512PI<0x11, MRMDestMem, (outs), (ins f512mem:$dst, VR512:$src), "vmovups\t{$src, $dst|$dst, $src}", [(store (v16f32 VR512:$src), addr:$dst)], @@ -1254,7 +1254,7 @@ def VMOVUPDZmr : AVX512PI<0x11, MRMDestMem, (outs), (ins f512mem:$dst, VR512:$sr "vmovupd\t{$src, $dst|$dst, $src}", [(store (v8f64 VR512:$src), addr:$dst)], SSEPackedDouble>, EVEX, EVEX_V512, - OpSize, VEX_W, EVEX_CD8<64, CD8VF>; + PD, VEX_W, EVEX_CD8<64, CD8VF>; let hasSideEffects = 0 in { def VMOVDQA32rr : AVX512BI<0x6F, MRMSrcReg, (outs VR512:$dst), @@ -1421,7 +1421,7 @@ def VMOVPQIto64Zrr : I<0x7E, MRMDestReg, (outs GR64:$dst), (ins VR128X:$src), "vmovq\t{$src, $dst|$dst, $src}", [(set GR64:$dst, (extractelt (v2i64 VR128X:$src), (iPTR 0)))], - IIC_SSE_MOVD_ToGP>, TB, OpSize, EVEX, VEX_LIG, VEX_W, + IIC_SSE_MOVD_ToGP>, PD, EVEX, VEX_LIG, VEX_W, Requires<[HasAVX512, In64BitMode]>; def VMOVPQIto64Zmr : I<0xD6, MRMDestMem, (outs), @@ -1429,7 +1429,7 @@ def VMOVPQIto64Zmr : I<0xD6, MRMDestMem, (outs), "vmovq\t{$src, $dst|$dst, $src}", [(store (extractelt (v2i64 VR128X:$src), (iPTR 0)), addr:$dst)], IIC_SSE_MOVDQ>, - EVEX, OpSize, VEX_LIG, VEX_W, TB, EVEX_CD8<64, CD8VT1>, + EVEX, PD, VEX_LIG, VEX_W, EVEX_CD8<64, CD8VT1>, Sched<[WriteStore]>, Requires<[HasAVX512, In64BitMode]>; // Move Scalar Single to Double Int @@ -1770,7 +1770,7 @@ defm VPSUBDZ : avx512_binop_rm<0xFA, "vpsubd", sub, v16i32, VR512, memopv16i32, defm VPMULLDZ : avx512_binop_rm<0x40, "vpmulld", mul, v16i32, VR512, memopv16i32, i512mem, loadi32, i32mem, "{1to16}", SSE_INTALU_ITINS_P, 1>, - T8, EVEX_V512, EVEX_CD8<32, CD8VF>; + T8PD, EVEX_V512, EVEX_CD8<32, CD8VF>; defm VPADDQZ : avx512_binop_rm<0xD4, "vpaddq", add, v8i64, VR512, memopv8i64, i512mem, loadi64, i64mem, "{1to8}", SSE_INTALU_ITINS_P, 1>, @@ -1781,7 +1781,7 @@ defm VPSUBQZ : avx512_binop_rm<0xFB, "vpsubq", sub, v8i64, VR512, memopv8i64, EVEX_V512, VEX_W, EVEX_CD8<64, CD8VF>; defm VPMULDQZ : avx512_binop_rm2<0x28, "vpmuldq", v8i64, v16i32, - VR512, memopv8i64, i512mem, SSE_INTALU_ITINS_P, 1>, T8, + VR512, memopv8i64, i512mem, SSE_INTALU_ITINS_P, 1>, T8PD, EVEX_V512, EVEX_CD8<64, CD8VF>; defm VPMULUDQZ : avx512_binop_rm2<0xF4, "vpmuludq", v8i64, v16i32, @@ -1800,31 +1800,31 @@ def : Pat<(v8i64 (int_x86_avx512_mask_pmul_dq_512 (v16i32 VR512:$src1), defm VPMAXUDZ : avx512_binop_rm<0x3F, "vpmaxud", X86umax, v16i32, VR512, memopv16i32, i512mem, loadi32, i32mem, "{1to16}", SSE_INTALU_ITINS_P, 1>, - T8, EVEX_V512, EVEX_CD8<32, CD8VF>; + T8PD, EVEX_V512, EVEX_CD8<32, CD8VF>; defm VPMAXUQZ : avx512_binop_rm<0x3F, "vpmaxuq", X86umax, v8i64, VR512, memopv8i64, i512mem, loadi64, i64mem, "{1to8}", SSE_INTALU_ITINS_P, 0>, - T8, EVEX_V512, VEX_W, EVEX_CD8<64, CD8VF>; + T8PD, EVEX_V512, VEX_W, EVEX_CD8<64, CD8VF>; defm VPMAXSDZ : avx512_binop_rm<0x3D, "vpmaxsd", X86smax, v16i32, VR512, memopv16i32, i512mem, loadi32, i32mem, "{1to16}", SSE_INTALU_ITINS_P, 1>, - T8, EVEX_V512, EVEX_CD8<32, CD8VF>; + T8PD, EVEX_V512, EVEX_CD8<32, CD8VF>; defm VPMAXSQZ : avx512_binop_rm<0x3D, "vpmaxsq", X86smax, v8i64, VR512, memopv8i64, i512mem, loadi64, i64mem, "{1to8}", SSE_INTALU_ITINS_P, 0>, - T8, EVEX_V512, VEX_W, EVEX_CD8<64, CD8VF>; + T8PD, EVEX_V512, VEX_W, EVEX_CD8<64, CD8VF>; defm VPMINUDZ : avx512_binop_rm<0x3B, "vpminud", X86umin, v16i32, VR512, memopv16i32, i512mem, loadi32, i32mem, "{1to16}", SSE_INTALU_ITINS_P, 1>, - T8, EVEX_V512, EVEX_CD8<32, CD8VF>; + T8PD, EVEX_V512, EVEX_CD8<32, CD8VF>; defm VPMINUQZ : avx512_binop_rm<0x3B, "vpminuq", X86umin, v8i64, VR512, memopv8i64, i512mem, loadi64, i64mem, "{1to8}", SSE_INTALU_ITINS_P, 0>, - T8, EVEX_V512, VEX_W, EVEX_CD8<64, CD8VF>; + T8PD, EVEX_V512, VEX_W, EVEX_CD8<64, CD8VF>; defm VPMINSDZ : avx512_binop_rm<0x39, "vpminsd", X86smin, v16i32, VR512, memopv16i32, i512mem, loadi32, i32mem, "{1to16}", SSE_INTALU_ITINS_P, 1>, - T8, EVEX_V512, EVEX_CD8<32, CD8VF>; + T8PD, EVEX_V512, EVEX_CD8<32, CD8VF>; defm VPMINSQZ : avx512_binop_rm<0x39, "vpminsq", X86smin, v8i64, VR512, memopv8i64, i512mem, loadi64, i64mem, "{1to8}", SSE_INTALU_ITINS_P, 0>, - T8, EVEX_V512, VEX_W, EVEX_CD8<64, CD8VF>; + T8PD, EVEX_V512, VEX_W, EVEX_CD8<64, CD8VF>; def : Pat <(v16i32 (int_x86_avx512_mask_pmaxs_d_512 (v16i32 VR512:$src1), (v16i32 VR512:$src2), (v16i32 immAllZerosV), (i16 -1))), @@ -1876,13 +1876,13 @@ defm VUNPCKHPSZ: avx512_unpack_fp<0x15, X86Unpckh, v16f32, memopv8f64, SSEPackedSingle>, EVEX_V512, EVEX_CD8<32, CD8VF>; defm VUNPCKHPDZ: avx512_unpack_fp<0x15, X86Unpckh, v8f64, memopv8f64, VR512, f512mem, "vunpckhpd\t{$src2, $src1, $dst|$dst, $src1, $src2}", - SSEPackedDouble>, OpSize, EVEX_V512, VEX_W, EVEX_CD8<64, CD8VF>; + SSEPackedDouble>, PD, EVEX_V512, VEX_W, EVEX_CD8<64, CD8VF>; defm VUNPCKLPSZ: avx512_unpack_fp<0x14, X86Unpckl, v16f32, memopv8f64, VR512, f512mem, "vunpcklps\t{$src2, $src1, $dst|$dst, $src1, $src2}", SSEPackedSingle>, EVEX_V512, EVEX_CD8<32, CD8VF>; defm VUNPCKLPDZ: avx512_unpack_fp<0x14, X86Unpckl, v8f64, memopv8f64, VR512, f512mem, "vunpcklpd\t{$src2, $src1, $dst|$dst, $src1, $src2}", - SSEPackedDouble>, OpSize, EVEX_V512, VEX_W, EVEX_CD8<64, CD8VF>; + SSEPackedDouble>, PD, EVEX_V512, VEX_W, EVEX_CD8<64, CD8VF>; multiclass avx512_unpack_int opc, string OpcodeStr, SDNode OpNode, ValueType OpVT, RegisterClass RC, PatFrag memop_frag, @@ -1935,15 +1935,15 @@ multiclass avx512_pshuf_imm opc, string OpcodeStr, RegisterClass RC, } defm VPSHUFDZ : avx512_pshuf_imm<0x70, "vpshufd", VR512, X86PShufd, memopv16i32, - i512mem, v16i32>, OpSize, EVEX_V512, EVEX_CD8<32, CD8VF>; + i512mem, v16i32>, PD, EVEX_V512, EVEX_CD8<32, CD8VF>; let ExeDomain = SSEPackedSingle in defm VPERMILPSZ : avx512_pshuf_imm<0x04, "vpermilps", VR512, X86VPermilp, - memopv16f32, i512mem, v16f32>, OpSize, TA, EVEX_V512, + memopv16f32, i512mem, v16f32>, TAPD, EVEX_V512, EVEX_CD8<32, CD8VF>; let ExeDomain = SSEPackedDouble in defm VPERMILPDZ : avx512_pshuf_imm<0x05, "vpermilpd", VR512, X86VPermilp, - memopv8f64, i512mem, v8f64>, OpSize, TA, EVEX_V512, + memopv8f64, i512mem, v8f64>, TAPD, EVEX_V512, VEX_W, EVEX_CD8<32, CD8VF>; def : Pat<(v16i32 (X86VPermilp VR512:$src1, (i8 imm:$imm))), @@ -2038,7 +2038,7 @@ defm VADDPSZ : avx512_fp_packed<0x58, "addps", fadd, VR512, v16f32, f512mem, defm VADDPDZ : avx512_fp_packed<0x58, "addpd", fadd, VR512, v8f64, f512mem, memopv8f64, f64mem, loadf64, "{1to8}", SSEPackedDouble, SSE_ALU_ITINS_P.d, 1>, - EVEX_V512, OpSize, VEX_W, EVEX_CD8<64, CD8VF>; + EVEX_V512, PD, VEX_W, EVEX_CD8<64, CD8VF>; defm VMULPSZ : avx512_fp_packed<0x59, "mulps", fmul, VR512, v16f32, f512mem, memopv16f32, f32mem, loadf32, "{1to16}", SSEPackedSingle, @@ -2046,7 +2046,7 @@ defm VMULPSZ : avx512_fp_packed<0x59, "mulps", fmul, VR512, v16f32, f512mem, defm VMULPDZ : avx512_fp_packed<0x59, "mulpd", fmul, VR512, v8f64, f512mem, memopv8f64, f64mem, loadf64, "{1to8}", SSEPackedDouble, SSE_ALU_ITINS_P.d, 1>, - EVEX_V512, OpSize, VEX_W, EVEX_CD8<64, CD8VF>; + EVEX_V512, PD, VEX_W, EVEX_CD8<64, CD8VF>; defm VMINPSZ : avx512_fp_packed<0x5D, "minps", X86fmin, VR512, v16f32, f512mem, memopv16f32, f32mem, loadf32, "{1to16}", SSEPackedSingle, @@ -2060,11 +2060,11 @@ defm VMAXPSZ : avx512_fp_packed<0x5F, "maxps", X86fmax, VR512, v16f32, f512mem, defm VMINPDZ : avx512_fp_packed<0x5D, "minpd", X86fmin, VR512, v8f64, f512mem, memopv8f64, f64mem, loadf64, "{1to8}", SSEPackedDouble, SSE_ALU_ITINS_P.d, 1>, - EVEX_V512, OpSize, VEX_W, EVEX_CD8<64, CD8VF>; + EVEX_V512, PD, VEX_W, EVEX_CD8<64, CD8VF>; defm VMAXPDZ : avx512_fp_packed<0x5F, "maxpd", X86fmax, VR512, v8f64, f512mem, memopv8f64, f64mem, loadf64, "{1to8}", SSEPackedDouble, SSE_ALU_ITINS_P.d, 1>, - EVEX_V512, OpSize, VEX_W, EVEX_CD8<64, CD8VF>; + EVEX_V512, PD, VEX_W, EVEX_CD8<64, CD8VF>; defm VSUBPSZ : avx512_fp_packed<0x5C, "subps", fsub, VR512, v16f32, f512mem, memopv16f32, f32mem, loadf32, "{1to16}", SSEPackedSingle, @@ -2076,11 +2076,11 @@ defm VDIVPSZ : avx512_fp_packed<0x5E, "divps", fdiv, VR512, v16f32, f512mem, defm VSUBPDZ : avx512_fp_packed<0x5C, "subpd", fsub, VR512, v8f64, f512mem, memopv8f64, f64mem, loadf64, "{1to8}", SSEPackedDouble, SSE_ALU_ITINS_P.d, 0>, - EVEX_V512, OpSize, VEX_W, EVEX_CD8<64, CD8VF>; + EVEX_V512, PD, VEX_W, EVEX_CD8<64, CD8VF>; defm VDIVPDZ : avx512_fp_packed<0x5E, "divpd", fdiv, VR512, v8f64, f512mem, memopv8f64, f64mem, loadf64, "{1to8}", SSEPackedDouble, SSE_ALU_ITINS_P.d, 0>, - EVEX_V512, OpSize, VEX_W, EVEX_CD8<64, CD8VF>; + EVEX_V512, PD, VEX_W, EVEX_CD8<64, CD8VF>; def : Pat<(v16f32 (int_x86_avx512_mask_max_ps_512 (v16f32 VR512:$src1), (v16f32 VR512:$src2), (bc_v16f32 (v16i32 immAllZerosV)), @@ -2840,7 +2840,7 @@ let hasSideEffects = 0 in { defm VCVTPD2PSZ : avx512_vcvt_fp_with_rc<0x5A, "vcvtpd2ps", VR512, VR256X, fround, memopv8f64, f512mem, v8f32, v8f64, - SSEPackedSingle>, EVEX_V512, VEX_W, OpSize, + SSEPackedSingle>, EVEX_V512, VEX_W, PD, EVEX_CD8<64, CD8VF>; defm VCVTPS2PDZ : avx512_vcvt_fp<0x5A, "vcvtps2pd", VR256X, VR512, fextend, @@ -2877,7 +2877,7 @@ defm VCVTTPS2DQZ : avx512_vcvt_fp<0x5B, "vcvttps2dq", VR512, VR512, fp_to_sint, defm VCVTTPD2DQZ : avx512_vcvt_fp<0xE6, "vcvttpd2dq", VR512, VR256X, fp_to_sint, memopv8f64, f512mem, v8i32, v8f64, - SSEPackedDouble>, EVEX_V512, OpSize, VEX_W, + SSEPackedDouble>, EVEX_V512, PD, VEX_W, EVEX_CD8<64, CD8VF>; defm VCVTTPS2UDQZ : avx512_vcvt_fp<0x78, "vcvttps2udq", VR512, VR512, fp_to_uint, @@ -2946,7 +2946,7 @@ let hasSideEffects = 0 in { } defm VCVTPS2DQZ : avx512_vcvt_fp2int<0x5B, "vcvtps2dq", VR512, VR512, - memopv16f32, f512mem, SSEPackedSingle>, OpSize, + memopv16f32, f512mem, SSEPackedSingle>, PD, EVEX_V512, EVEX_CD8<32, CD8VF>; defm VCVTPD2DQZ : avx512_vcvt_fp2int<0xE6, "vcvtpd2dq", VR512, VR256X, memopv8f64, f512mem, SSEPackedDouble>, XD, VEX_W, @@ -3019,14 +3019,14 @@ let Defs = [EFLAGS], Predicates = [HasAVX512] in { "ucomiss">, TB, EVEX, VEX_LIG, EVEX_CD8<32, CD8VT1>; defm VUCOMISDZ : sse12_ord_cmp<0x2E, FR64X, X86cmp, f64, f64mem, loadf64, - "ucomisd">, TB, OpSize, EVEX, + "ucomisd">, PD, EVEX, VEX_LIG, VEX_W, EVEX_CD8<64, CD8VT1>; let Pattern = [] in { defm VCOMISSZ : sse12_ord_cmp<0x2F, VR128X, undef, v4f32, f128mem, load, "comiss">, TB, EVEX, VEX_LIG, EVEX_CD8<32, CD8VT1>; defm VCOMISDZ : sse12_ord_cmp<0x2F, VR128X, undef, v2f64, f128mem, load, - "comisd">, TB, OpSize, EVEX, + "comisd">, PD, EVEX, VEX_LIG, VEX_W, EVEX_CD8<64, CD8VT1>; } let isCodeGenOnly = 1 in { @@ -3034,14 +3034,14 @@ let Defs = [EFLAGS], Predicates = [HasAVX512] in { load, "ucomiss">, TB, EVEX, VEX_LIG, EVEX_CD8<32, CD8VT1>; defm Int_VUCOMISDZ : sse12_ord_cmp<0x2E, VR128X, X86ucomi, v2f64, f128mem, - load, "ucomisd">, TB, OpSize, EVEX, + load, "ucomisd">, PD, EVEX, VEX_LIG, VEX_W, EVEX_CD8<64, CD8VT1>; defm Int_VCOMISSZ : sse12_ord_cmp<0x2F, VR128X, X86comi, v4f32, f128mem, load, "comiss">, TB, EVEX, VEX_LIG, EVEX_CD8<32, CD8VT1>; defm Int_VCOMISDZ : sse12_ord_cmp<0x2F, VR128X, X86comi, v2f64, f128mem, - load, "comisd">, TB, OpSize, EVEX, + load, "comisd">, PD, EVEX, VEX_LIG, VEX_W, EVEX_CD8<64, CD8VT1>; } } @@ -3796,7 +3796,7 @@ multiclass avx512_shufp, EVEX_V512, EVEX_CD8<32, CD8VF>; defm VSHUFPDZ : avx512_shufp, OpSize, VEX_W, EVEX_V512, EVEX_CD8<64, CD8VF>; + SSEPackedDouble>, PD, VEX_W, EVEX_V512, EVEX_CD8<64, CD8VF>; def : Pat<(v16i32 (X86Shufp VR512:$src1, VR512:$src2, (i8 imm:$imm))), (VSHUFPSZrri VR512:$src1, VR512:$src2, imm:$imm)>; diff --git a/llvm/lib/Target/X86/X86InstrArithmetic.td b/llvm/lib/Target/X86/X86InstrArithmetic.td index ad2b00e14ff7..afa69507a74e 100644 --- a/llvm/lib/Target/X86/X86InstrArithmetic.td +++ b/llvm/lib/Target/X86/X86InstrArithmetic.td @@ -1363,21 +1363,21 @@ let hasSideEffects = 0, Predicates = [HasADX], Defs = [EFLAGS] in { let SchedRW = [WriteALU] in { def ADCX32rr : I<0xF6, MRMSrcReg, (outs GR32:$dst), (ins GR32:$src), "adcx{l}\t{$src, $dst|$dst, $src}", - [], IIC_BIN_NONMEM>, T8, OpSize; + [], IIC_BIN_NONMEM>, T8PD; def ADCX64rr : I<0xF6, MRMSrcReg, (outs GR64:$dst), (ins GR64:$src), "adcx{q}\t{$src, $dst|$dst, $src}", - [], IIC_BIN_NONMEM>, T8, OpSize, REX_W, Requires<[In64BitMode]>; + [], IIC_BIN_NONMEM>, T8PD, REX_W, Requires<[In64BitMode]>; } // SchedRW let mayLoad = 1, SchedRW = [WriteALULd] in { def ADCX32rm : I<0xF6, MRMSrcMem, (outs GR32:$dst), (ins i32mem:$src), "adcx{l}\t{$src, $dst|$dst, $src}", - [], IIC_BIN_MEM>, T8, OpSize; + [], IIC_BIN_MEM>, T8PD; def ADCX64rm : I<0xF6, MRMSrcMem, (outs GR64:$dst), (ins i64mem:$src), "adcx{q}\t{$src, $dst|$dst, $src}", - [], IIC_BIN_MEM>, T8, OpSize, REX_W, Requires<[In64BitMode]>; + [], IIC_BIN_MEM>, T8PD, REX_W, Requires<[In64BitMode]>; } } diff --git a/llvm/lib/Target/X86/X86InstrFormats.td b/llvm/lib/Target/X86/X86InstrFormats.td index c17815a4e6d5..2cc5339ef8a7 100644 --- a/llvm/lib/Target/X86/X86InstrFormats.td +++ b/llvm/lib/Target/X86/X86InstrFormats.td @@ -138,6 +138,9 @@ class TAXD { bits<5> Prefix = 19; } class XOP8 { bits<5> Prefix = 20; } class XOP9 { bits<5> Prefix = 21; } class XOPA { bits<5> Prefix = 22; } +class PD { bits<5> Prefix = 23; } +class T8PD { bits<5> Prefix = 24; } +class TAPD { bits<5> Prefix = 25; } class VEX { bit hasVEXPrefix = 1; } class VEX_W { bit hasVEX_WPrefix = 1; } class VEX_4V : VEX { bit hasVEX_4VPrefix = 1; } @@ -340,6 +343,7 @@ class Iseg32 o, Format f, dag outs, dag ins, string asm, def __xs : XS; def __xd : XD; +def __pd : PD; // SI - SSE 1 & 2 scalar instructions class SI o, Format F, dag outs, dag ins, string asm, @@ -349,7 +353,7 @@ class SI o, Format F, dag outs, dag ins, string asm, !if(hasVEXPrefix /* VEX */, [UseAVX], !if(!eq(Prefix, __xs.Prefix), [UseSSE1], !if(!eq(Prefix, __xd.Prefix), [UseSSE2], - !if(hasOpSizePrefix, [UseSSE2], [UseSSE1]))))); + !if(!eq(Prefix, __pd.Prefix), [UseSSE2], [UseSSE1]))))); // AVX instructions have a 'v' prefix in the mnemonic let AsmString = !if(hasVEXPrefix, !strconcat("v", asm), asm); @@ -373,7 +377,7 @@ class PI o, Format F, dag outs, dag ins, string asm, list pattern, : I { let Predicates = !if(hasEVEXPrefix /* EVEX */, [HasAVX512], !if(hasVEXPrefix /* VEX */, [HasAVX], - !if(hasOpSizePrefix /* OpSize */, [UseSSE2], [UseSSE1]))); + !if(!eq(Prefix, __pd.Prefix), [UseSSE2], [UseSSE1]))); // AVX instructions have a 'v' prefix in the mnemonic let AsmString = !if(hasVEXPrefix, !strconcat("v", asm), asm); @@ -383,7 +387,7 @@ class PI o, Format F, dag outs, dag ins, string asm, list pattern, class MMXPI o, Format F, dag outs, dag ins, string asm, list pattern, InstrItinClass itin, Domain d> : I { - let Predicates = !if(hasOpSizePrefix /* OpSize */, [HasSSE2], [HasSSE1]); + let Predicates = !if(!eq(Prefix, __pd.Prefix), [HasSSE2], [HasSSE1]); } // PIi8 - SSE 1 & 2 packed instructions with immediate @@ -392,7 +396,7 @@ class PIi8 o, Format F, dag outs, dag ins, string asm, : Ii8 { let Predicates = !if(hasEVEXPrefix /* EVEX */, [HasAVX512], !if(hasVEXPrefix /* VEX */, [HasAVX], - !if(hasOpSizePrefix /* OpSize */, [UseSSE2], [UseSSE1]))); + !if(!eq(Prefix, __pd.Prefix), [UseSSE2], [UseSSE1]))); // AVX instructions have a 'v' prefix in the mnemonic let AsmString = !if(hasVEXPrefix, !strconcat("v", asm), asm); @@ -435,13 +439,13 @@ class VPSI o, Format F, dag outs, dag ins, string asm, // SDIi8 - SSE2 instructions with ImmT == Imm8 and XD prefix. // S2SI - SSE2 instructions with XS prefix. // SSDIi8 - SSE2 instructions with ImmT == Imm8 and XS prefix. -// PDI - SSE2 instructions with TB and OpSize prefixes, packed double domain. -// PDIi8 - SSE2 instructions with ImmT == Imm8 and TB and OpSize prefixes. +// PDI - SSE2 instructions with PD prefix, packed double domain. +// PDIi8 - SSE2 instructions with ImmT == Imm8 and PD prefix. // VSDI - SSE2 scalar instructions with XD prefix in AVX form. -// VPDI - SSE2 vector instructions with TB and OpSize prefixes in AVX form, +// VPDI - SSE2 vector instructions with PD prefix in AVX form, // packed double domain. -// VS2I - SSE2 scalar instructions with TB and OpSize prefixes in AVX form. -// S2I - SSE2 scalar instructions with TB and OpSize prefixes. +// VS2I - SSE2 scalar instructions with PD prefix in AVX form. +// S2I - SSE2 scalar instructions with PD prefix. // MMXSDIi8 - SSE2 instructions with ImmT == Imm8 and XD prefix as well as // MMX operands. // MMXSSDIi8 - SSE2 instructions with ImmT == Imm8 and XS prefix as well as @@ -461,11 +465,11 @@ class S2SIi8 o, Format F, dag outs, dag ins, string asm, : Ii8, XS, Requires<[UseSSE2]>; class PDI o, Format F, dag outs, dag ins, string asm, list pattern, InstrItinClass itin = NoItinerary> - : I, TB, OpSize, + : I, PD, Requires<[UseSSE2]>; class PDIi8 o, Format F, dag outs, dag ins, string asm, list pattern, InstrItinClass itin = NoItinerary> - : Ii8, TB, OpSize, + : Ii8, PD, Requires<[UseSSE2]>; class VSDI o, Format F, dag outs, dag ins, string asm, list pattern, InstrItinClass itin = NoItinerary> @@ -477,16 +481,15 @@ class VS2SI o, Format F, dag outs, dag ins, string asm, Requires<[HasAVX]>; class VPDI o, Format F, dag outs, dag ins, string asm, list pattern, InstrItinClass itin = NoItinerary> - : I, TB, - OpSize, Requires<[HasAVX]>; + : I, + PD, Requires<[HasAVX]>; class VS2I o, Format F, dag outs, dag ins, string asm, list pattern, InstrItinClass itin = NoItinerary> - : I, TB, - OpSize, Requires<[UseAVX]>; + : I, PD, + Requires<[UseAVX]>; class S2I o, Format F, dag outs, dag ins, string asm, list pattern, InstrItinClass itin = NoItinerary> - : I, TB, - OpSize, Requires<[UseSSE2]>; + : I, PD, Requires<[UseSSE2]>; class MMXSDIi8 o, Format F, dag outs, dag ins, string asm, list pattern, InstrItinClass itin = NoItinerary> : Ii8, XD, Requires<[HasSSE2]>; @@ -496,7 +499,7 @@ class MMXS2SIi8 o, Format F, dag outs, dag ins, string asm, // SSE3 Instruction Templates: // -// S3I - SSE3 instructions with TB and OpSize prefixes. +// S3I - SSE3 instructions with PD prefixes. // S3SI - SSE3 instructions with XS prefix. // S3DI - SSE3 instructions with XD prefix. @@ -510,7 +513,7 @@ class S3DI o, Format F, dag outs, dag ins, string asm, Requires<[UseSSE3]>; class S3I o, Format F, dag outs, dag ins, string asm, list pattern, InstrItinClass itin = NoItinerary> - : I, TB, OpSize, + : I, PD, Requires<[UseSSE3]>; @@ -527,11 +530,11 @@ class S3I o, Format F, dag outs, dag ins, string asm, class SS38I o, Format F, dag outs, dag ins, string asm, list pattern, InstrItinClass itin = NoItinerary> - : I, T8, + : I, T8PD, Requires<[UseSSSE3]>; class SS3AI o, Format F, dag outs, dag ins, string asm, list pattern, InstrItinClass itin = NoItinerary> - : Ii8, TA, + : Ii8, TAPD, Requires<[UseSSSE3]>; class MMXSS38I o, Format F, dag outs, dag ins, string asm, list pattern, InstrItinClass itin = NoItinerary> @@ -549,11 +552,11 @@ class MMXSS3AI o, Format F, dag outs, dag ins, string asm, // class SS48I o, Format F, dag outs, dag ins, string asm, list pattern, InstrItinClass itin = NoItinerary> - : I, T8, + : I, T8PD, Requires<[UseSSE41]>; class SS4AIi8 o, Format F, dag outs, dag ins, string asm, list pattern, InstrItinClass itin = NoItinerary> - : Ii8, TA, + : Ii8, TAPD, Requires<[UseSSE41]>; // SSE4.2 Instruction Templates: @@ -561,7 +564,7 @@ class SS4AIi8 o, Format F, dag outs, dag ins, string asm, // SS428I - SSE 4.2 instructions with T8 prefix. class SS428I o, Format F, dag outs, dag ins, string asm, list pattern, InstrItinClass itin = NoItinerary> - : I, T8, + : I, T8PD, Requires<[UseSSE42]>; // SS42FI - SSE 4.2 instructions with T8XD prefix. @@ -573,53 +576,53 @@ class SS42FI o, Format F, dag outs, dag ins, string asm, // SS42AI = SSE 4.2 instructions with TA prefix class SS42AI o, Format F, dag outs, dag ins, string asm, list pattern, InstrItinClass itin = NoItinerary> - : Ii8, TA, + : Ii8, TAPD, Requires<[UseSSE42]>; // AVX Instruction Templates: // Instructions introduced in AVX (no SSE equivalent forms) // -// AVX8I - AVX instructions with T8 and OpSize prefix. -// AVXAIi8 - AVX instructions with TA, OpSize prefix and ImmT = Imm8. +// AVX8I - AVX instructions with T8PD prefix. +// AVXAIi8 - AVX instructions with TAPD prefix and ImmT = Imm8. class AVX8I o, Format F, dag outs, dag ins, string asm, list pattern, InstrItinClass itin = NoItinerary> - : I, T8, OpSize, + : I, T8PD, Requires<[HasAVX]>; class AVXAIi8 o, Format F, dag outs, dag ins, string asm, list pattern, InstrItinClass itin = NoItinerary> - : Ii8, TA, OpSize, + : Ii8, TAPD, Requires<[HasAVX]>; // AVX2 Instruction Templates: // Instructions introduced in AVX2 (no SSE equivalent forms) // -// AVX28I - AVX2 instructions with T8 and OpSize prefix. -// AVX2AIi8 - AVX2 instructions with TA, OpSize prefix and ImmT = Imm8. +// AVX28I - AVX2 instructions with T8PD prefix. +// AVX2AIi8 - AVX2 instructions with TAPD prefix and ImmT = Imm8. class AVX28I o, Format F, dag outs, dag ins, string asm, list pattern, InstrItinClass itin = NoItinerary> - : I, T8, OpSize, + : I, T8PD, Requires<[HasAVX2]>; class AVX2AIi8 o, Format F, dag outs, dag ins, string asm, list pattern, InstrItinClass itin = NoItinerary> - : Ii8, TA, OpSize, + : Ii8, TAPD, Requires<[HasAVX2]>; // AVX-512 Instruction Templates: // Instructions introduced in AVX-512 (no SSE equivalent forms) // -// AVX5128I - AVX-512 instructions with T8 and OpSize prefix. -// AVX512AIi8 - AVX-512 instructions with TA, OpSize prefix and ImmT = Imm8. -// AVX512PDI - AVX-512 instructions with TB, OpSize, double packed. +// AVX5128I - AVX-512 instructions with T8PD prefix. +// AVX512AIi8 - AVX-512 instructions with TAPD prefix and ImmT = Imm8. +// AVX512PDI - AVX-512 instructions with PD, double packed. // AVX512PSI - AVX-512 instructions with TB, single packed. // AVX512XS8I - AVX-512 instructions with T8 and XS prefixes. // AVX512XSI - AVX-512 instructions with XS prefix, generic domain. -// AVX512BI - AVX-512 instructions with TB, OpSize, int packed domain. -// AVX512SI - AVX-512 scalar instructions with TB and OpSize prefixes. +// AVX512BI - AVX-512 instructions with PD, int packed domain. +// AVX512SI - AVX-512 scalar instructions with PD prefix. class AVX5128I o, Format F, dag outs, dag ins, string asm, list pattern, InstrItinClass itin = NoItinerary> - : I, T8, OpSize, + : I, T8PD, Requires<[HasAVX512]>; class AVX512XS8I o, Format F, dag outs, dag ins, string asm, list pattern, InstrItinClass itin = NoItinerary> @@ -635,28 +638,28 @@ class AVX512XDI o, Format F, dag outs, dag ins, string asm, Requires<[HasAVX512]>; class AVX512BI o, Format F, dag outs, dag ins, string asm, list pattern, InstrItinClass itin = NoItinerary> - : I, TB, OpSize, + : I, PD, Requires<[HasAVX512]>; class AVX512BIi8 o, Format F, dag outs, dag ins, string asm, list pattern, InstrItinClass itin = NoItinerary> - : Ii8, TB, OpSize, + : Ii8, PD, Requires<[HasAVX512]>; class AVX512SI o, Format F, dag outs, dag ins, string asm, list pattern, InstrItinClass itin = NoItinerary> - : I, TB, OpSize, + : I, PD, Requires<[HasAVX512]>; class AVX512AIi8 o, Format F, dag outs, dag ins, string asm, list pattern, InstrItinClass itin = NoItinerary> - : Ii8, TA, OpSize, + : Ii8, TAPD, Requires<[HasAVX512]>; class AVX512Ii8 o, Format F, dag outs, dag ins, string asm, list pattern, InstrItinClass itin = NoItinerary> : Ii8, TB, - Requires<[HasAVX512]>; + Requires<[HasAVX512]>; class AVX512PDI o, Format F, dag outs, dag ins, string asm, list pattern, InstrItinClass itin = NoItinerary> - : I, TB, - OpSize, Requires<[HasAVX512]>; + : I, PD, + Requires<[HasAVX512]>; class AVX512PSI o, Format F, dag outs, dag ins, string asm, list pattern, InstrItinClass itin = NoItinerary> : I, TB, @@ -669,8 +672,8 @@ class AVX512PI o, Format F, dag outs, dag ins, string asm, : I, TB, Requires<[HasAVX512]>; class AVX512FMA3 o, Format F, dag outs, dag ins, string asm, listpattern, InstrItinClass itin = NoItinerary> - : I, T8, - OpSize, EVEX_4V, Requires<[HasAVX512]>; + : I, T8PD, + EVEX_4V, Requires<[HasAVX512]>; // AES Instruction Templates: // @@ -678,36 +681,36 @@ class AVX512FMA3 o, Format F, dag outs, dag ins, string asm, // These use the same encoding as the SSE4.2 T8 and TA encodings. class AES8I o, Format F, dag outs, dag ins, string asm, listpattern, InstrItinClass itin = IIC_AES> - : I, T8, + : I, T8PD, Requires<[HasAES]>; class AESAI o, Format F, dag outs, dag ins, string asm, list pattern, InstrItinClass itin = NoItinerary> - : Ii8, TA, + : Ii8, TAPD, Requires<[HasAES]>; // PCLMUL Instruction Templates class PCLMULIi8 o, Format F, dag outs, dag ins, string asm, listpattern, InstrItinClass itin = NoItinerary> - : Ii8, TA, - OpSize, Requires<[HasPCLMUL]>; + : Ii8, TAPD, + Requires<[HasPCLMUL]>; class AVXPCLMULIi8 o, Format F, dag outs, dag ins, string asm, listpattern, InstrItinClass itin = NoItinerary> - : Ii8, TA, - OpSize, VEX_4V, Requires<[HasAVX, HasPCLMUL]>; + : Ii8, TAPD, + VEX_4V, Requires<[HasAVX, HasPCLMUL]>; // FMA3 Instruction Templates class FMA3 o, Format F, dag outs, dag ins, string asm, listpattern, InstrItinClass itin = NoItinerary> - : I, T8, - OpSize, VEX_4V, FMASC, Requires<[HasFMA]>; + : I, T8PD, + VEX_4V, FMASC, Requires<[HasFMA]>; // FMA4 Instruction Templates class FMA4 o, Format F, dag outs, dag ins, string asm, listpattern, InstrItinClass itin = NoItinerary> - : Ii8, TA, - OpSize, VEX_4V, VEX_I8IMM, FMASC, Requires<[HasFMA4]>; + : Ii8, TAPD, + VEX_4V, VEX_I8IMM, FMASC, Requires<[HasFMA4]>; // XOP 2, 3 and 4 Operand Instruction Template class IXOP o, Format F, dag outs, dag ins, string asm, @@ -724,8 +727,8 @@ class IXOPi8 o, Format F, dag outs, dag ins, string asm, // XOP 5 operand instruction (VEX encoding!) class IXOP5 o, Format F, dag outs, dag ins, string asm, listpattern, InstrItinClass itin = NoItinerary> - : Ii8, TA, - OpSize, VEX_4V, VEX_I8IMM, Requires<[HasXOP]>; + : Ii8, TAPD, + VEX_4V, VEX_I8IMM, Requires<[HasXOP]>; // X86-64 Instruction templates... // @@ -782,7 +785,7 @@ class VRS2I o, Format F, dag outs, dag ins, string asm, // MMXI - MMX instructions with TB prefix. // MMXI32 - MMX instructions with TB prefix valid only in 32 bit mode. // MMXI64 - MMX instructions with TB prefix valid only in 64 bit mode. -// MMX2I - MMX / SSE2 instructions with TB and OpSize prefixes. +// MMX2I - MMX / SSE2 instructions with PD prefix. // MMXIi8 - MMX instructions with ImmT == Imm8 and TB prefix. // MMXIi8 - MMX instructions with ImmT == Imm8 and TB prefix. // MMXID - MMX instructions with XD prefix. @@ -801,7 +804,7 @@ class MMXRI o, Format F, dag outs, dag ins, string asm, : I, TB, REX_W, Requires<[HasMMX]>; class MMX2I o, Format F, dag outs, dag ins, string asm, list pattern, InstrItinClass itin = NoItinerary> - : I, TB, OpSize, Requires<[HasMMX]>; + : I, PD, Requires<[HasMMX]>; class MMXIi8 o, Format F, dag outs, dag ins, string asm, list pattern, InstrItinClass itin = NoItinerary> : Ii8, TB, Requires<[HasMMX]>; diff --git a/llvm/lib/Target/X86/X86InstrMMX.td b/llvm/lib/Target/X86/X86InstrMMX.td index ba58143e89ec..5126313de6db 100644 --- a/llvm/lib/Target/X86/X86InstrMMX.td +++ b/llvm/lib/Target/X86/X86InstrMMX.td @@ -527,16 +527,16 @@ defm MMX_CVTPS2PI : sse12_cvt_pint<0x2D, VR128, VR64, int_x86_sse_cvtps2pi, MMX_CVT_PS_ITINS, SSEPackedSingle>, TB; defm MMX_CVTPD2PI : sse12_cvt_pint<0x2D, VR128, VR64, int_x86_sse_cvtpd2pi, f128mem, memop, "cvtpd2pi\t{$src, $dst|$dst, $src}", - MMX_CVT_PD_ITINS, SSEPackedDouble>, TB, OpSize; + MMX_CVT_PD_ITINS, SSEPackedDouble>, PD; defm MMX_CVTTPS2PI : sse12_cvt_pint<0x2C, VR128, VR64, int_x86_sse_cvttps2pi, f64mem, load, "cvttps2pi\t{$src, $dst|$dst, $src}", MMX_CVT_PS_ITINS, SSEPackedSingle>, TB; defm MMX_CVTTPD2PI : sse12_cvt_pint<0x2C, VR128, VR64, int_x86_sse_cvttpd2pi, f128mem, memop, "cvttpd2pi\t{$src, $dst|$dst, $src}", - MMX_CVT_PD_ITINS, SSEPackedDouble>, TB, OpSize; + MMX_CVT_PD_ITINS, SSEPackedDouble>, PD; defm MMX_CVTPI2PD : sse12_cvt_pint<0x2A, VR64, VR128, int_x86_sse_cvtpi2pd, i64mem, load, "cvtpi2pd\t{$src, $dst|$dst, $src}", - MMX_CVT_PD_ITINS, SSEPackedDouble>, TB, OpSize; + MMX_CVT_PD_ITINS, SSEPackedDouble>, PD; let Constraints = "$src1 = $dst" in { defm MMX_CVTPI2PS : sse12_cvt_pint_3addr<0x2A, VR64, VR128, int_x86_sse_cvtpi2ps, diff --git a/llvm/lib/Target/X86/X86InstrSSE.td b/llvm/lib/Target/X86/X86InstrSSE.td index 5bc8f3330bcf..8cf08ad75f2f 100644 --- a/llvm/lib/Target/X86/X86InstrSSE.td +++ b/llvm/lib/Target/X86/X86InstrSSE.td @@ -815,38 +815,38 @@ defm VMOVAPS : sse12_mov_packed<0x28, VR128, f128mem, alignedloadv4f32, TB, VEX; defm VMOVAPD : sse12_mov_packed<0x28, VR128, f128mem, alignedloadv2f64, "movapd", SSEPackedDouble, SSE_MOVA_ITINS>, - TB, OpSize, VEX; + PD, VEX; defm VMOVUPS : sse12_mov_packed<0x10, VR128, f128mem, loadv4f32, "movups", SSEPackedSingle, SSE_MOVU_ITINS>, TB, VEX; defm VMOVUPD : sse12_mov_packed<0x10, VR128, f128mem, loadv2f64, "movupd", SSEPackedDouble, SSE_MOVU_ITINS, 0>, - TB, OpSize, VEX; + PD, VEX; defm VMOVAPSY : sse12_mov_packed<0x28, VR256, f256mem, alignedloadv8f32, "movaps", SSEPackedSingle, SSE_MOVA_ITINS>, TB, VEX, VEX_L; defm VMOVAPDY : sse12_mov_packed<0x28, VR256, f256mem, alignedloadv4f64, "movapd", SSEPackedDouble, SSE_MOVA_ITINS>, - TB, OpSize, VEX, VEX_L; + PD, VEX, VEX_L; defm VMOVUPSY : sse12_mov_packed<0x10, VR256, f256mem, loadv8f32, "movups", SSEPackedSingle, SSE_MOVU_ITINS>, TB, VEX, VEX_L; defm VMOVUPDY : sse12_mov_packed<0x10, VR256, f256mem, loadv4f64, "movupd", SSEPackedDouble, SSE_MOVU_ITINS, 0>, - TB, OpSize, VEX, VEX_L; + PD, VEX, VEX_L; defm MOVAPS : sse12_mov_packed<0x28, VR128, f128mem, alignedloadv4f32, "movaps", SSEPackedSingle, SSE_MOVA_ITINS>, TB; defm MOVAPD : sse12_mov_packed<0x28, VR128, f128mem, alignedloadv2f64, "movapd", SSEPackedDouble, SSE_MOVA_ITINS>, - TB, OpSize; + PD; defm MOVUPS : sse12_mov_packed<0x10, VR128, f128mem, loadv4f32, "movups", SSEPackedSingle, SSE_MOVU_ITINS>, TB; defm MOVUPD : sse12_mov_packed<0x10, VR128, f128mem, loadv2f64, "movupd", SSEPackedDouble, SSE_MOVU_ITINS, 0>, - TB, OpSize; + PD; let SchedRW = [WriteStore] in { def VMOVAPSmr : VPSI<0x29, MRMDestMem, (outs), (ins f128mem:$dst, VR128:$src), @@ -1150,7 +1150,7 @@ multiclass sse12_mov_hilo_packed_baseopc, SDNode psnode, SDNode pdnode, !strconcat(base_opc, "d", asm_opr), [(set VR128:$dst, (v2f64 (pdnode VR128:$src1, (scalar_to_vector (loadf64 addr:$src2)))))], - itin, SSEPackedDouble>, TB, OpSize, + itin, SSEPackedDouble>, PD, Sched<[WriteShuffleLd, ReadAfterLd]>; } @@ -2393,47 +2393,47 @@ let Defs = [EFLAGS] in { defm VUCOMISS : sse12_ord_cmp<0x2E, FR32, X86cmp, f32, f32mem, loadf32, "ucomiss">, TB, VEX, VEX_LIG; defm VUCOMISD : sse12_ord_cmp<0x2E, FR64, X86cmp, f64, f64mem, loadf64, - "ucomisd">, TB, OpSize, VEX, VEX_LIG; + "ucomisd">, PD, VEX, VEX_LIG; let Pattern = [] in { defm VCOMISS : sse12_ord_cmp<0x2F, VR128, undef, v4f32, f128mem, load, "comiss">, TB, VEX, VEX_LIG; defm VCOMISD : sse12_ord_cmp<0x2F, VR128, undef, v2f64, f128mem, load, - "comisd">, TB, OpSize, VEX, VEX_LIG; + "comisd">, PD, VEX, VEX_LIG; } let isCodeGenOnly = 1 in { defm Int_VUCOMISS : sse12_ord_cmp<0x2E, VR128, X86ucomi, v4f32, f128mem, load, "ucomiss">, TB, VEX; defm Int_VUCOMISD : sse12_ord_cmp<0x2E, VR128, X86ucomi, v2f64, f128mem, - load, "ucomisd">, TB, OpSize, VEX; + load, "ucomisd">, PD, VEX; defm Int_VCOMISS : sse12_ord_cmp<0x2F, VR128, X86comi, v4f32, f128mem, load, "comiss">, TB, VEX; defm Int_VCOMISD : sse12_ord_cmp<0x2F, VR128, X86comi, v2f64, f128mem, - load, "comisd">, TB, OpSize, VEX; + load, "comisd">, PD, VEX; } defm UCOMISS : sse12_ord_cmp<0x2E, FR32, X86cmp, f32, f32mem, loadf32, "ucomiss">, TB; defm UCOMISD : sse12_ord_cmp<0x2E, FR64, X86cmp, f64, f64mem, loadf64, - "ucomisd">, TB, OpSize; + "ucomisd">, PD; let Pattern = [] in { defm COMISS : sse12_ord_cmp<0x2F, VR128, undef, v4f32, f128mem, load, "comiss">, TB; defm COMISD : sse12_ord_cmp<0x2F, VR128, undef, v2f64, f128mem, load, - "comisd">, TB, OpSize; + "comisd">, PD; } let isCodeGenOnly = 1 in { defm Int_UCOMISS : sse12_ord_cmp<0x2E, VR128, X86ucomi, v4f32, f128mem, load, "ucomiss">, TB; defm Int_UCOMISD : sse12_ord_cmp<0x2E, VR128, X86ucomi, v2f64, f128mem, - load, "ucomisd">, TB, OpSize; + load, "ucomisd">, PD; defm Int_COMISS : sse12_ord_cmp<0x2F, VR128, X86comi, v4f32, f128mem, load, "comiss">, TB; defm Int_COMISD : sse12_ord_cmp<0x2F, VR128, X86comi, v2f64, f128mem, load, - "comisd">, TB, OpSize; + "comisd">, PD; } } // Defs = [EFLAGS] @@ -2472,7 +2472,7 @@ defm VCMPPS : sse12_cmp_packed, TB, OpSize, VEX_4V; + SSEPackedDouble>, PD, VEX_4V; defm VCMPPSY : sse12_cmp_packed, TB, OpSize, VEX_4V, VEX_L; + SSEPackedDouble>, PD, VEX_4V, VEX_L; let Constraints = "$src1 = $dst" in { defm CMPPS : sse12_cmp_packed, TB, OpSize; + SSEPackedDouble, SSE_ALU_F64P>, PD; } let Predicates = [HasAVX] in { @@ -2555,20 +2555,18 @@ defm VSHUFPSY : sse12_shuffle, TB, VEX_4V, VEX_L; defm VSHUFPD : sse12_shuffle, TB, OpSize, VEX_4V; + loadv2f64, SSEPackedDouble>, PD, VEX_4V; defm VSHUFPDY : sse12_shuffle, TB, OpSize, VEX_4V, VEX_L; + loadv4f64, SSEPackedDouble>, PD, VEX_4V, VEX_L; let Constraints = "$src1 = $dst" in { defm SHUFPS : sse12_shuffle, - TB; + memopv4f32, SSEPackedSingle, 1 /* cvt to pshufd */>, TB; defm SHUFPD : sse12_shuffle, - TB, OpSize; + memopv2f64, SSEPackedDouble, 1 /* cvt to pshufd */>, PD; } let Predicates = [HasAVX] in { @@ -2643,26 +2641,26 @@ defm VUNPCKHPS: sse12_unpack_interleave<0x15, X86Unpckh, v4f32, loadv4f32, SSEPackedSingle>, TB, VEX_4V; defm VUNPCKHPD: sse12_unpack_interleave<0x15, X86Unpckh, v2f64, loadv2f64, VR128, f128mem, "unpckhpd\t{$src2, $src1, $dst|$dst, $src1, $src2}", - SSEPackedDouble>, TB, OpSize, VEX_4V; + SSEPackedDouble>, PD, VEX_4V; defm VUNPCKLPS: sse12_unpack_interleave<0x14, X86Unpckl, v4f32, loadv4f32, VR128, f128mem, "unpcklps\t{$src2, $src1, $dst|$dst, $src1, $src2}", SSEPackedSingle>, TB, VEX_4V; defm VUNPCKLPD: sse12_unpack_interleave<0x14, X86Unpckl, v2f64, loadv2f64, VR128, f128mem, "unpcklpd\t{$src2, $src1, $dst|$dst, $src1, $src2}", - SSEPackedDouble>, TB, OpSize, VEX_4V; + SSEPackedDouble>, PD, VEX_4V; defm VUNPCKHPSY: sse12_unpack_interleave<0x15, X86Unpckh, v8f32, loadv8f32, VR256, f256mem, "unpckhps\t{$src2, $src1, $dst|$dst, $src1, $src2}", SSEPackedSingle>, TB, VEX_4V, VEX_L; defm VUNPCKHPDY: sse12_unpack_interleave<0x15, X86Unpckh, v4f64, loadv4f64, VR256, f256mem, "unpckhpd\t{$src2, $src1, $dst|$dst, $src1, $src2}", - SSEPackedDouble>, TB, OpSize, VEX_4V, VEX_L; + SSEPackedDouble>, PD, VEX_4V, VEX_L; defm VUNPCKLPSY: sse12_unpack_interleave<0x14, X86Unpckl, v8f32, loadv8f32, VR256, f256mem, "unpcklps\t{$src2, $src1, $dst|$dst, $src1, $src2}", SSEPackedSingle>, TB, VEX_4V, VEX_L; defm VUNPCKLPDY: sse12_unpack_interleave<0x14, X86Unpckl, v4f64, loadv4f64, VR256, f256mem, "unpcklpd\t{$src2, $src1, $dst|$dst, $src1, $src2}", - SSEPackedDouble>, TB, OpSize, VEX_4V, VEX_L; + SSEPackedDouble>, PD, VEX_4V, VEX_L; let Constraints = "$src1 = $dst" in { defm UNPCKHPS: sse12_unpack_interleave<0x15, X86Unpckh, v4f32, memopv4f32, @@ -2670,13 +2668,13 @@ let Constraints = "$src1 = $dst" in { SSEPackedSingle>, TB; defm UNPCKHPD: sse12_unpack_interleave<0x15, X86Unpckh, v2f64, memopv2f64, VR128, f128mem, "unpckhpd\t{$src2, $dst|$dst, $src2}", - SSEPackedDouble>, TB, OpSize; + SSEPackedDouble>, PD; defm UNPCKLPS: sse12_unpack_interleave<0x14, X86Unpckl, v4f32, memopv4f32, VR128, f128mem, "unpcklps\t{$src2, $dst|$dst, $src2}", SSEPackedSingle>, TB; defm UNPCKLPD: sse12_unpack_interleave<0x14, X86Unpckl, v2f64, memopv2f64, VR128, f128mem, "unpcklpd\t{$src2, $dst|$dst, $src2}", - SSEPackedDouble>, TB, OpSize; + SSEPackedDouble>, PD; } // Constraints = "$src1 = $dst" let Predicates = [HasAVX1Only] in { @@ -2734,14 +2732,13 @@ let Predicates = [HasAVX] in { defm VMOVMSKPS : sse12_extr_sign_mask, TB, VEX; defm VMOVMSKPD : sse12_extr_sign_mask, TB, - OpSize, VEX; + "movmskpd", SSEPackedDouble>, PD, VEX; defm VMOVMSKPSY : sse12_extr_sign_mask, TB, VEX, VEX_L; defm VMOVMSKPDY : sse12_extr_sign_mask, TB, - OpSize, VEX, VEX_L; + "movmskpd", SSEPackedDouble>, PD, + VEX, VEX_L; def : Pat<(i32 (X86fgetsign FR32:$src)), (VMOVMSKPSrr (COPY_TO_REGCLASS FR32:$src, VR128))>; @@ -2758,7 +2755,7 @@ let Predicates = [HasAVX] in { defm MOVMSKPS : sse12_extr_sign_mask, TB; defm MOVMSKPD : sse12_extr_sign_mask, TB, OpSize; + SSEPackedDouble>, PD; def : Pat<(i32 (X86fgetsign FR32:$src)), (MOVMSKPSrr (COPY_TO_REGCLASS FR32:$src, VR128))>, @@ -2845,7 +2842,7 @@ multiclass sse12_fp_alias_pack_logical opc, string OpcodeStr, defm V#NAME#PD : sse12_fp_packed, - TB, OpSize, VEX_4V; + PD, VEX_4V; let Constraints = "$src1 = $dst" in { defm PS : sse12_fp_packed opc, string OpcodeStr, defm PD : sse12_fp_packed, - TB, OpSize; + PD; } } @@ -2888,7 +2885,7 @@ multiclass sse12_fp_packed_logical opc, string OpcodeStr, (bc_v4i64 (v4f64 VR256:$src2))))], [(set VR256:$dst, (OpNode (bc_v4i64 (v4f64 VR256:$src1)), (loadv4i64 addr:$src2)))], 0>, - TB, OpSize, VEX_4V, VEX_L; + PD, VEX_4V, VEX_L; // In AVX no need to add a pattern for 128-bit logical rr ps, because they // are all promoted to v2i64, and the patterns are covered by the int @@ -2905,7 +2902,7 @@ multiclass sse12_fp_packed_logical opc, string OpcodeStr, (bc_v2i64 (v2f64 VR128:$src2))))], [(set VR128:$dst, (OpNode (bc_v2i64 (v2f64 VR128:$src1)), (loadv2i64 addr:$src2)))], 0>, - TB, OpSize, VEX_4V; + PD, VEX_4V; let Constraints = "$src1 = $dst" in { defm PS : sse12_fp_packed_logical_rm opc, string OpcodeStr, [(set VR128:$dst, (OpNode (bc_v2i64 (v2f64 VR128:$src1)), (bc_v2i64 (v2f64 VR128:$src2))))], [(set VR128:$dst, (OpNode (bc_v2i64 (v2f64 VR128:$src1)), - (memopv2i64 addr:$src2)))]>, TB, OpSize; + (memopv2i64 addr:$src2)))]>, PD; } } @@ -2953,14 +2950,14 @@ multiclass basic_sse12_fp_binop_p opc, string OpcodeStr, SSEPackedSingle, itins.s, 0>, TB, VEX_4V; defm V#NAME#PD : sse12_fp_packed, TB, OpSize, VEX_4V; + SSEPackedDouble, itins.d, 0>, PD, VEX_4V; defm V#NAME#PSY : sse12_fp_packed, TB, VEX_4V, VEX_L; defm V#NAME#PDY : sse12_fp_packed, TB, OpSize, VEX_4V, VEX_L; + SSEPackedDouble, itins.d, 0>, PD, VEX_4V, VEX_L; let Constraints = "$src1 = $dst" in { defm PS : sse12_fp_packed opc, string OpcodeStr, itins.s>, TB; defm PD : sse12_fp_packed, TB, OpSize; + itins.d>, PD; } } @@ -4354,7 +4351,7 @@ let Predicates = [UseSSE2] in { } } // ExeDomain = SSEPackedInt -defm PSHUFD : sse2_pshuffle<"pshufd", v4i32, v8i32, X86PShufd>, TB, OpSize; +defm PSHUFD : sse2_pshuffle<"pshufd", v4i32, v8i32, X86PShufd>, PD; defm PSHUFHW : sse2_pshuffle<"pshufhw", v8i16, v16i16, X86PShufhw>, XS; defm PSHUFLW : sse2_pshuffle<"pshuflw", v8i16, v16i16, X86PShuflw>, XD; @@ -4507,7 +4504,7 @@ def VPEXTRWri : Ii8<0xC5, MRMSrcReg, (outs GR32orGR64:$dst), (ins VR128:$src1, i32i8imm:$src2), "vpextrw\t{$src2, $src1, $dst|$dst, $src1, $src2}", [(set GR32orGR64:$dst, (X86pextrw (v8i16 VR128:$src1), - imm:$src2))]>, TB, OpSize, VEX, + imm:$src2))]>, PD, VEX, Sched<[WriteShuffle]>; def PEXTRWri : PDIi8<0xC5, MRMSrcReg, (outs GR32orGR64:$dst), (ins VR128:$src1, i32i8imm:$src2), @@ -4518,10 +4515,10 @@ def PEXTRWri : PDIi8<0xC5, MRMSrcReg, // Insert let Predicates = [HasAVX] in -defm VPINSRW : sse2_pinsrw<0>, TB, OpSize, VEX_4V; +defm VPINSRW : sse2_pinsrw<0>, PD, VEX_4V; let Predicates = [UseSSE2], Constraints = "$src1 = $dst" in -defm PINSRW : sse2_pinsrw, TB, OpSize; +defm PINSRW : sse2_pinsrw, PD; } // ExeDomain = SSEPackedInt @@ -5160,24 +5157,24 @@ multiclass sse3_addsub, TB, XD, VEX_4V; + f128mem, SSE_ALU_F32P, 0>, XD, VEX_4V; defm VADDSUBPSY : sse3_addsub, TB, XD, VEX_4V, VEX_L; + f256mem, SSE_ALU_F32P, 0>, XD, VEX_4V, VEX_L; } let ExeDomain = SSEPackedDouble in { defm VADDSUBPD : sse3_addsub, TB, OpSize, VEX_4V; + f128mem, SSE_ALU_F64P, 0>, PD, VEX_4V; defm VADDSUBPDY : sse3_addsub, TB, OpSize, VEX_4V, VEX_L; + f256mem, SSE_ALU_F64P, 0>, PD, VEX_4V, VEX_L; } } let Constraints = "$src1 = $dst", Predicates = [UseSSE3] in { let ExeDomain = SSEPackedSingle in defm ADDSUBPS : sse3_addsub, TB, XD; + f128mem, SSE_ALU_F32P>, XD; let ExeDomain = SSEPackedDouble in defm ADDSUBPD : sse3_addsub, TB, OpSize; + f128mem, SSE_ALU_F64P>, PD; } //===---------------------------------------------------------------------===// @@ -5264,7 +5261,7 @@ multiclass SS3I_unop_rm_int opc, string OpcodeStr, (ins VR128:$src), !strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"), [(set VR128:$dst, (IntId128 VR128:$src))], IIC_SSE_PABS_RR>, - OpSize, Sched<[WriteVecALU]>; + Sched<[WriteVecALU]>; def rm128 : SS38I opc, string OpcodeStr, [(set VR128:$dst, (IntId128 (bitconvert (memopv2i64 addr:$src))))], IIC_SSE_PABS_RM>, - OpSize, Sched<[WriteVecALULd]>; + Sched<[WriteVecALULd]>; } /// SS3I_unop_rm_int_y - Simple SSSE3 unary op whose type can be v*{i8,i16,i32}. @@ -5282,14 +5279,14 @@ multiclass SS3I_unop_rm_int_y opc, string OpcodeStr, (ins VR256:$src), !strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"), [(set VR256:$dst, (IntId256 VR256:$src))]>, - OpSize, Sched<[WriteVecALU]>; + Sched<[WriteVecALU]>; def rm256 : SS38I, OpSize, + (bitconvert (memopv4i64 addr:$src))))]>, Sched<[WriteVecALULd]>; } @@ -5409,7 +5406,7 @@ multiclass SS3I_binop_rm opc, string OpcodeStr, SDNode OpNode, !strconcat(OpcodeStr, "\t{$src2, $dst|$dst, $src2}"), !strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}")), [(set RC:$dst, (OpVT (OpNode RC:$src1, RC:$src2)))], itins.rr>, - OpSize, Sched<[itins.Sched]>; + Sched<[itins.Sched]>; def rm : SS38I opc, string OpcodeStr, SDNode OpNode, !strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}")), [(set RC:$dst, (OpVT (OpNode RC:$src1, - (bitconvert (memop_frag addr:$src2)))))], itins.rm>, OpSize, + (bitconvert (memop_frag addr:$src2)))))], itins.rm>, Sched<[itins.Sched.Folded, ReadAfterLd]>; } @@ -5432,7 +5429,7 @@ multiclass SS3I_binop_rm_int opc, string OpcodeStr, !strconcat(OpcodeStr, "\t{$src2, $dst|$dst, $src2}"), !strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}")), [(set VR128:$dst, (IntId128 VR128:$src1, VR128:$src2))]>, - OpSize, Sched<[itins.Sched]>; + Sched<[itins.Sched]>; def rm128 : SS38I opc, string OpcodeStr, !strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}")), [(set VR128:$dst, (IntId128 VR128:$src1, - (bitconvert (memopv2i64 addr:$src2))))]>, OpSize, + (bitconvert (memopv2i64 addr:$src2))))]>, Sched<[itins.Sched.Folded, ReadAfterLd]>; } @@ -5450,14 +5447,12 @@ multiclass SS3I_binop_rm_int_y opc, string OpcodeStr, def rr256 : SS38I, - OpSize; + [(set VR256:$dst, (IntId256 VR256:$src1, VR256:$src2))]>; def rm256 : SS38I, OpSize; + (IntId256 VR256:$src1, (bitconvert (loadv4i64 addr:$src2))))]>; } let ImmT = NoImm, Predicates = [HasAVX] in { @@ -5583,7 +5578,7 @@ multiclass ssse3_palignr { !strconcat(asm, "\t{$src3, $src2, $dst|$dst, $src2, $src3}"), !strconcat(asm, "\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}")), - [], IIC_SSE_PALIGNRR>, OpSize, Sched<[WriteShuffle]>; + [], IIC_SSE_PALIGNRR>, Sched<[WriteShuffle]>; let mayLoad = 1 in def R128rm : SS3AI<0x0F, MRMSrcMem, (outs VR128:$dst), (ins VR128:$src1, i128mem:$src2, i8imm:$src3), @@ -5591,7 +5586,7 @@ multiclass ssse3_palignr { !strconcat(asm, "\t{$src3, $src2, $dst|$dst, $src2, $src3}"), !strconcat(asm, "\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}")), - [], IIC_SSE_PALIGNRM>, OpSize, Sched<[WriteShuffleLd, ReadAfterLd]>; + [], IIC_SSE_PALIGNRM>, Sched<[WriteShuffleLd, ReadAfterLd]>; } } @@ -5601,13 +5596,13 @@ multiclass ssse3_palignr_y { (ins VR256:$src1, VR256:$src2, i8imm:$src3), !strconcat(asm, "\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}"), - []>, OpSize, Sched<[WriteShuffle]>; + []>, Sched<[WriteShuffle]>; let mayLoad = 1 in def R256rm : SS3AI<0x0F, MRMSrcMem, (outs VR256:$dst), (ins VR256:$src1, i256mem:$src2, i8imm:$src3), !strconcat(asm, "\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}"), - []>, OpSize, Sched<[WriteShuffleLd, ReadAfterLd]>; + []>, Sched<[WriteShuffleLd, ReadAfterLd]>; } } @@ -5687,25 +5682,24 @@ multiclass SS41I_binop_rm_int8 opc, string OpcodeStr, Intrinsic IntId, OpndItins itins = DEFAULT_ITINS> { def rr : SS48I, OpSize; + [(set VR128:$dst, (IntId VR128:$src))], itins.rr>; def rm : SS48I, OpSize; + itins.rm>; } multiclass SS41I_binop_rm_int16_y opc, string OpcodeStr, Intrinsic IntId> { def Yrr : SS48I, OpSize; + [(set VR256:$dst, (IntId VR128:$src))]>; def Yrm : SS48I, - OpSize; + [(set VR256:$dst, (IntId (load addr:$src)))]>; } let Predicates = [HasAVX] in { @@ -5867,27 +5861,25 @@ multiclass SS41I_binop_rm_int4 opc, string OpcodeStr, Intrinsic IntId, OpndItins itins = DEFAULT_ITINS> { def rr : SS48I, OpSize; + [(set VR128:$dst, (IntId VR128:$src))], itins.rr>; def rm : SS48I, - OpSize; + itins.rm>; } multiclass SS41I_binop_rm_int8_y opc, string OpcodeStr, Intrinsic IntId> { def Yrr : SS48I, OpSize; + [(set VR256:$dst, (IntId VR128:$src))]>; def Yrm : SS48I, - OpSize; + (IntId (bitconvert (v2i64 (scalar_to_vector (loadi64 addr:$src))))))]>; } let Predicates = [HasAVX] in { @@ -5951,28 +5943,26 @@ multiclass SS41I_binop_rm_int2 opc, string OpcodeStr, Intrinsic IntId, OpndItins itins = DEFAULT_ITINS> { def rr : SS48I, OpSize; + [(set VR128:$dst, (IntId VR128:$src))]>; // Expecting a i16 load any extended to i32 value. def rm : SS48I, - OpSize; + (v4i32 (scalar_to_vector (loadi16_anyext addr:$src))))))]>; } multiclass SS41I_binop_rm_int4_y opc, string OpcodeStr, Intrinsic IntId> { def Yrr : SS48I, OpSize; + [(set VR256:$dst, (IntId VR128:$src))]>; // Expecting a i16 load any extended to i32 value. def Yrm : SS48I, - OpSize; + (v4i32 (scalar_to_vector (loadi32 addr:$src))))))]>; } let Predicates = [HasAVX] in { @@ -6247,14 +6237,13 @@ multiclass SS41I_extract8 opc, string OpcodeStr> { !strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}"), [(set GR32orGR64:$dst, (X86pextrb (v16i8 VR128:$src1), - imm:$src2))]>, - OpSize; + imm:$src2))]>; let neverHasSideEffects = 1, mayStore = 1 in def mr : SS4AIi8, OpSize; + []>; // FIXME: // There's an AssertZext in the way of writing the store pattern // (store (i8 (trunc (X86pextrb (v16i8 VR128:$src1), imm:$src2))), addr:$dst) @@ -6273,14 +6262,14 @@ multiclass SS41I_extract16 opc, string OpcodeStr> { (ins VR128:$src1, i32i8imm:$src2), !strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}"), - []>, OpSize; + []>; let neverHasSideEffects = 1, mayStore = 1 in def mr : SS4AIi8, OpSize; + []>; // FIXME: // There's an AssertZext in the way of writing the store pattern // (store (i16 (trunc (X86pextrw (v16i8 VR128:$src1), imm:$src2))), addr:$dst) @@ -6299,13 +6288,13 @@ multiclass SS41I_extract32 opc, string OpcodeStr> { !strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}"), [(set GR32:$dst, - (extractelt (v4i32 VR128:$src1), imm:$src2))]>, OpSize; + (extractelt (v4i32 VR128:$src1), imm:$src2))]>; def mr : SS4AIi8, OpSize; + addr:$dst)]>; } let Predicates = [HasAVX] in @@ -6320,13 +6309,13 @@ multiclass SS41I_extract64 opc, string OpcodeStr> { !strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}"), [(set GR64:$dst, - (extractelt (v2i64 VR128:$src1), imm:$src2))]>, OpSize, REX_W; + (extractelt (v2i64 VR128:$src1), imm:$src2))]>, REX_W; def mr : SS4AIi8, OpSize, REX_W; + addr:$dst)]>, REX_W; } let Predicates = [HasAVX] in @@ -6344,14 +6333,13 @@ multiclass SS41I_extractf32 opc, string OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}"), [(set GR32orGR64:$dst, (extractelt (bc_v4i32 (v4f32 VR128:$src1)), imm:$src2))], - itins.rr>, - OpSize; + itins.rr>; def mr : SS4AIi8, OpSize; + addr:$dst)], itins.rm>; } let ExeDomain = SSEPackedSingle in { @@ -6384,7 +6372,7 @@ multiclass SS41I_insert8 opc, string asm, bit Is2Addr = 1> { !strconcat(asm, "\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}")), [(set VR128:$dst, - (X86pinsrb VR128:$src1, GR32orGR64:$src2, imm:$src3))]>, OpSize; + (X86pinsrb VR128:$src1, GR32orGR64:$src2, imm:$src3))]>; def rm : SS4AIi8 opc, string asm, bit Is2Addr = 1> { "\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}")), [(set VR128:$dst, (X86pinsrb VR128:$src1, (extloadi8 addr:$src2), - imm:$src3))]>, OpSize; + imm:$src3))]>; } let Predicates = [HasAVX] in @@ -6409,8 +6397,7 @@ multiclass SS41I_insert32 opc, string asm, bit Is2Addr = 1> { !strconcat(asm, "\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}")), [(set VR128:$dst, - (v4i32 (insertelt VR128:$src1, GR32:$src2, imm:$src3)))]>, - OpSize; + (v4i32 (insertelt VR128:$src1, GR32:$src2, imm:$src3)))]>; def rm : SS4AIi8 opc, string asm, bit Is2Addr = 1> { "\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}")), [(set VR128:$dst, (v4i32 (insertelt VR128:$src1, (loadi32 addr:$src2), - imm:$src3)))]>, OpSize; + imm:$src3)))]>; } let Predicates = [HasAVX] in @@ -6435,8 +6422,7 @@ multiclass SS41I_insert64 opc, string asm, bit Is2Addr = 1> { !strconcat(asm, "\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}")), [(set VR128:$dst, - (v2i64 (insertelt VR128:$src1, GR64:$src2, imm:$src3)))]>, - OpSize; + (v2i64 (insertelt VR128:$src1, GR64:$src2, imm:$src3)))]>; def rm : SS4AIi8 opc, string asm, bit Is2Addr = 1> { "\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}")), [(set VR128:$dst, (v2i64 (insertelt VR128:$src1, (loadi64 addr:$src2), - imm:$src3)))]>, OpSize; + imm:$src3)))]>; } let Predicates = [HasAVX] in @@ -6466,8 +6452,7 @@ multiclass SS41I_insertf32 opc, string asm, bit Is2Addr = 1, !strconcat(asm, "\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}")), [(set VR128:$dst, - (X86insrtps VR128:$src1, VR128:$src2, imm:$src3))], itins.rr>, - OpSize; + (X86insrtps VR128:$src1, VR128:$src2, imm:$src3))], itins.rr>; def rm : SS4AIi8 opc, string asm, bit Is2Addr = 1, [(set VR128:$dst, (X86insrtps VR128:$src1, (v4f32 (scalar_to_vector (loadf32 addr:$src2))), - imm:$src3))], itins.rm>, OpSize; + imm:$src3))], itins.rm>; } let ExeDomain = SSEPackedSingle in { @@ -6503,8 +6488,7 @@ let ExeDomain = SSEPackedSingle in { !strconcat(OpcodeStr, "ps\t{$src2, $src1, $dst|$dst, $src1, $src2}"), [(set RC:$dst, (V4F32Int RC:$src1, imm:$src2))], - IIC_SSE_ROUNDPS_REG>, - OpSize; + IIC_SSE_ROUNDPS_REG>; // Vector intrinsic operation, mem def PSm : SS4AIi8, - OpSize; + IIC_SSE_ROUNDPS_MEM>; } // ExeDomain = SSEPackedSingle let ExeDomain = SSEPackedDouble in { @@ -6524,8 +6507,7 @@ let ExeDomain = SSEPackedDouble in { !strconcat(OpcodeStr, "pd\t{$src2, $src1, $dst|$dst, $src1, $src2}"), [(set RC:$dst, (V2F64Int RC:$src1, imm:$src2))], - IIC_SSE_ROUNDPS_REG>, - OpSize; + IIC_SSE_ROUNDPS_REG>; // Vector intrinsic operation, mem def PDm : SS4AIi8, - OpSize; + IIC_SSE_ROUNDPS_REG>; } // ExeDomain = SSEPackedDouble } @@ -6553,7 +6534,7 @@ let ExeDomain = GenericDomain in { "ss\t{$src3, $src2, $dst|$dst, $src2, $src3}"), !strconcat(OpcodeStr, "ss\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}")), - []>, OpSize; + []>; // Intrinsic operation, reg. let isCodeGenOnly = 1 in @@ -6564,8 +6545,7 @@ let ExeDomain = GenericDomain in { "ss\t{$src3, $src2, $dst|$dst, $src2, $src3}"), !strconcat(OpcodeStr, "ss\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}")), - [(set VR128:$dst, (F32Int VR128:$src1, VR128:$src2, imm:$src3))]>, - OpSize; + [(set VR128:$dst, (F32Int VR128:$src1, VR128:$src2, imm:$src3))]>; // Intrinsic operation, mem. def SSm : SS4AIi8, - OpSize; + (F32Int VR128:$src1, sse_load_f32:$src2, imm:$src3))]>; // Operation, reg. let hasSideEffects = 0 in @@ -6588,7 +6567,7 @@ let ExeDomain = GenericDomain in { "sd\t{$src3, $src2, $dst|$dst, $src2, $src3}"), !strconcat(OpcodeStr, "sd\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}")), - []>, OpSize; + []>; // Intrinsic operation, reg. let isCodeGenOnly = 1 in @@ -6599,8 +6578,7 @@ let ExeDomain = GenericDomain in { "sd\t{$src3, $src2, $dst|$dst, $src2, $src3}"), !strconcat(OpcodeStr, "sd\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}")), - [(set VR128:$dst, (F64Int VR128:$src1, VR128:$src2, imm:$src3))]>, - OpSize; + [(set VR128:$dst, (F64Int VR128:$src1, VR128:$src2, imm:$src3))]>; // Intrinsic operation, mem. def SDm : SS4AIi8, - OpSize; + (F64Int VR128:$src1, sse_load_f64:$src2, imm:$src3))]>; } // ExeDomain = GenericDomain } @@ -6759,31 +6736,29 @@ let Defs = [EFLAGS], Predicates = [HasAVX] in { def VPTESTrr : SS48I<0x17, MRMSrcReg, (outs), (ins VR128:$src1, VR128:$src2), "vptest\t{$src2, $src1|$src1, $src2}", [(set EFLAGS, (X86ptest VR128:$src1, (v2i64 VR128:$src2)))]>, - OpSize, VEX; + VEX; def VPTESTrm : SS48I<0x17, MRMSrcMem, (outs), (ins VR128:$src1, f128mem:$src2), "vptest\t{$src2, $src1|$src1, $src2}", [(set EFLAGS,(X86ptest VR128:$src1, (loadv2i64 addr:$src2)))]>, - OpSize, VEX; + VEX; def VPTESTYrr : SS48I<0x17, MRMSrcReg, (outs), (ins VR256:$src1, VR256:$src2), "vptest\t{$src2, $src1|$src1, $src2}", [(set EFLAGS, (X86ptest VR256:$src1, (v4i64 VR256:$src2)))]>, - OpSize, VEX, VEX_L; + VEX, VEX_L; def VPTESTYrm : SS48I<0x17, MRMSrcMem, (outs), (ins VR256:$src1, i256mem:$src2), "vptest\t{$src2, $src1|$src1, $src2}", [(set EFLAGS,(X86ptest VR256:$src1, (loadv4i64 addr:$src2)))]>, - OpSize, VEX, VEX_L; + VEX, VEX_L; } let Defs = [EFLAGS] in { def PTESTrr : SS48I<0x17, MRMSrcReg, (outs), (ins VR128:$src1, VR128:$src2), "ptest\t{$src2, $src1|$src1, $src2}", - [(set EFLAGS, (X86ptest VR128:$src1, (v2i64 VR128:$src2)))]>, - OpSize; + [(set EFLAGS, (X86ptest VR128:$src1, (v2i64 VR128:$src2)))]>; def PTESTrm : SS48I<0x17, MRMSrcMem, (outs), (ins VR128:$src1, f128mem:$src2), "ptest\t{$src2, $src1|$src1, $src2}", - [(set EFLAGS, (X86ptest VR128:$src1, (memopv2i64 addr:$src2)))]>, - OpSize; + [(set EFLAGS, (X86ptest VR128:$src1, (memopv2i64 addr:$src2)))]>; } // The bit test instructions below are AVX only @@ -6791,11 +6766,10 @@ multiclass avx_bittest opc, string OpcodeStr, RegisterClass RC, X86MemOperand x86memop, PatFrag mem_frag, ValueType vt> { def rr : SS48I, OpSize, VEX; + [(set EFLAGS, (X86testp RC:$src1, (vt RC:$src2)))]>, VEX; def rm : SS48I, - OpSize, VEX; + [(set EFLAGS, (X86testp RC:$src1, (mem_frag addr:$src2)))]>, VEX; } let Defs = [EFLAGS], Predicates = [HasAVX] in { @@ -6855,13 +6829,12 @@ multiclass SS41I_unop_rm_int_v16 opc, string OpcodeStr, def rr128 : SS48I, OpSize; + [(set VR128:$dst, (IntId128 VR128:$src))]>; def rm128 : SS48I, OpSize; + (IntId128 (bitconvert (memopv2i64 addr:$src))))]>; } let Predicates = [HasAVX] in @@ -6881,16 +6854,15 @@ multiclass SS41I_binop_rm_int opc, string OpcodeStr, !strconcat(OpcodeStr, "\t{$src2, $dst|$dst, $src2}"), !strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}")), [(set VR128:$dst, (IntId128 VR128:$src1, VR128:$src2))], - itins.rr>, OpSize; + itins.rr>; def rm : SS48I, OpSize; + (IntId128 VR128:$src1, (bitconvert (memopv2i64 addr:$src2))))], + itins.rm>; } /// SS41I_binop_rm_int_y - Simple SSE 4.1 binary operator @@ -6900,13 +6872,12 @@ multiclass SS41I_binop_rm_int_y opc, string OpcodeStr, def Yrr : SS48I, OpSize; + [(set VR256:$dst, (IntId256 VR256:$src1, VR256:$src2))]>; def Yrm : SS48I, OpSize; + (IntId256 VR256:$src1, (bitconvert (loadv4i64 addr:$src2))))]>; } @@ -6921,15 +6892,14 @@ multiclass SS48I_binop_rm opc, string OpcodeStr, SDNode OpNode, !if(Is2Addr, !strconcat(OpcodeStr, "\t{$src2, $dst|$dst, $src2}"), !strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}")), - [(set RC:$dst, (OpVT (OpNode RC:$src1, RC:$src2)))]>, OpSize; + [(set RC:$dst, (OpVT (OpNode RC:$src1, RC:$src2)))]>; def rm : SS48I, OpSize; + (OpVT (OpNode RC:$src1, (bitconvert (memop_frag addr:$src2)))))]>; } let Predicates = [HasAVX] in { @@ -7036,8 +7006,7 @@ multiclass SS41I_binop_rmi_int opc, string OpcodeStr, "\t{$src3, $src2, $dst|$dst, $src2, $src3}"), !strconcat(OpcodeStr, "\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}")), - [(set RC:$dst, (IntId RC:$src1, RC:$src2, imm:$src3))], itins.rr>, - OpSize; + [(set RC:$dst, (IntId RC:$src1, RC:$src2, imm:$src3))], itins.rr>; def rmi : SS4AIi8 opc, string OpcodeStr, "\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}")), [(set RC:$dst, (IntId RC:$src1, - (bitconvert (memop_frag addr:$src2)), imm:$src3))], itins.rm>, - OpSize; + (bitconvert (memop_frag addr:$src2)), imm:$src3))], itins.rm>; } let Predicates = [HasAVX] in { @@ -7128,7 +7096,7 @@ multiclass SS41I_quaternary_int_avx opc, string OpcodeStr, !strconcat(OpcodeStr, "\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}"), [(set RC:$dst, (IntId RC:$src1, RC:$src2, RC:$src3))], - NoItinerary, SSEPackedInt>, OpSize, TA, VEX_4V, VEX_I8IMM; + NoItinerary, SSEPackedInt>, TAPD, VEX_4V, VEX_I8IMM; def rm : Ii8 opc, string OpcodeStr, [(set RC:$dst, (IntId RC:$src1, (bitconvert (mem_frag addr:$src2)), RC:$src3))], - NoItinerary, SSEPackedInt>, OpSize, TA, VEX_4V, VEX_I8IMM; + NoItinerary, SSEPackedInt>, TAPD, VEX_4V, VEX_I8IMM; } let Predicates = [HasAVX] in { @@ -7228,7 +7196,7 @@ let Uses = [XMM0], Constraints = "$src1 = $dst" in { !strconcat(OpcodeStr, "\t{$src2, $dst|$dst, $src2}"), [(set VR128:$dst, (IntId VR128:$src1, VR128:$src2, XMM0))], - itins.rr>, OpSize; + itins.rr>; def rm0 : SS48I, OpSize; + itins.rm>; } } @@ -7297,16 +7265,15 @@ let Predicates = [HasAVX] in def VMOVNTDQArm : SS48I<0x2A, MRMSrcMem, (outs VR128:$dst), (ins i128mem:$src), "vmovntdqa\t{$src, $dst|$dst, $src}", [(set VR128:$dst, (int_x86_sse41_movntdqa addr:$src))]>, - OpSize, VEX; + VEX; let Predicates = [HasAVX2] in def VMOVNTDQAYrm : SS48I<0x2A, MRMSrcMem, (outs VR256:$dst), (ins i256mem:$src), "vmovntdqa\t{$src, $dst|$dst, $src}", [(set VR256:$dst, (int_x86_avx2_movntdqa addr:$src))]>, - OpSize, VEX, VEX_L; + VEX, VEX_L; def MOVNTDQArm : SS48I<0x2A, MRMSrcMem, (outs VR128:$dst), (ins i128mem:$src), "movntdqa\t{$src, $dst|$dst, $src}", - [(set VR128:$dst, (int_x86_sse41_movntdqa addr:$src))]>, - OpSize; + [(set VR128:$dst, (int_x86_sse41_movntdqa addr:$src))]>; //===----------------------------------------------------------------------===// // SSE4.2 - Compare Instructions @@ -7321,15 +7288,14 @@ multiclass SS42I_binop_rm opc, string OpcodeStr, SDNode OpNode, !if(Is2Addr, !strconcat(OpcodeStr, "\t{$src2, $dst|$dst, $src2}"), !strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}")), - [(set RC:$dst, (OpVT (OpNode RC:$src1, RC:$src2)))]>, - OpSize; + [(set RC:$dst, (OpVT (OpNode RC:$src1, RC:$src2)))]>; def rm : SS428I, OpSize; + (OpVT (OpNode RC:$src1, (memop_frag addr:$src2))))]>; } let Predicates = [HasAVX] in @@ -7369,12 +7335,12 @@ multiclass pcmpistrm_SS42AI { def rr : SS42AI<0x62, MRMSrcReg, (outs), (ins VR128:$src1, VR128:$src2, i8imm:$src3), !strconcat(asm, "\t{$src3, $src2, $src1|$src1, $src2, $src3}"), - []>, OpSize; + []>; let mayLoad = 1 in def rm :SS42AI<0x62, MRMSrcMem, (outs), (ins VR128:$src1, i128mem:$src2, i8imm:$src3), !strconcat(asm, "\t{$src3, $src2, $src1|$src1, $src2, $src3}"), - []>, OpSize; + []>; } let Defs = [XMM0, EFLAGS], neverHasSideEffects = 1 in { @@ -7404,12 +7370,12 @@ multiclass SS42AI_pcmpestrm { def rr : SS42AI<0x60, MRMSrcReg, (outs), (ins VR128:$src1, VR128:$src3, i8imm:$src5), !strconcat(asm, "\t{$src5, $src3, $src1|$src1, $src3, $src5}"), - []>, OpSize; + []>; let mayLoad = 1 in def rm : SS42AI<0x60, MRMSrcMem, (outs), (ins VR128:$src1, i128mem:$src3, i8imm:$src5), !strconcat(asm, "\t{$src5, $src3, $src1|$src1, $src3, $src5}"), - []>, OpSize; + []>; } let Defs = [XMM0, EFLAGS], Uses = [EAX, EDX], neverHasSideEffects = 1 in { @@ -7439,12 +7405,12 @@ multiclass SS42AI_pcmpistri { def rr : SS42AI<0x63, MRMSrcReg, (outs), (ins VR128:$src1, VR128:$src2, i8imm:$src3), !strconcat(asm, "\t{$src3, $src2, $src1|$src1, $src2, $src3}"), - []>, OpSize; + []>; let mayLoad = 1 in def rm : SS42AI<0x63, MRMSrcMem, (outs), (ins VR128:$src1, i128mem:$src2, i8imm:$src3), !strconcat(asm, "\t{$src3, $src2, $src1|$src1, $src2, $src3}"), - []>, OpSize; + []>; } let Defs = [ECX, EFLAGS], neverHasSideEffects = 1 in { @@ -7475,12 +7441,12 @@ multiclass SS42AI_pcmpestri { def rr : SS42AI<0x61, MRMSrcReg, (outs), (ins VR128:$src1, VR128:$src3, i8imm:$src5), !strconcat(asm, "\t{$src5, $src3, $src1|$src1, $src3, $src5}"), - []>, OpSize; + []>; let mayLoad = 1 in def rm : SS42AI<0x61, MRMSrcMem, (outs), (ins VR128:$src1, i128mem:$src3, i8imm:$src5), !strconcat(asm, "\t{$src5, $src3, $src1|$src1, $src3, $src5}"), - []>, OpSize; + []>; } let Defs = [ECX, EFLAGS], Uses = [EAX, EDX], neverHasSideEffects = 1 in { @@ -7603,15 +7569,14 @@ multiclass AESI_binop_rm_int opc, string OpcodeStr, !if(Is2Addr, !strconcat(OpcodeStr, "\t{$src2, $dst|$dst, $src2}"), !strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}")), - [(set VR128:$dst, (IntId128 VR128:$src1, VR128:$src2))]>, - OpSize; + [(set VR128:$dst, (IntId128 VR128:$src1, VR128:$src2))]>; def rm : AES8I, OpSize; + (IntId128 VR128:$src1, (memopv2i64 addr:$src2)))]>; } // Perform One Round of an AES Encryption/Decryption Flow @@ -7644,24 +7609,22 @@ let Predicates = [HasAVX, HasAES] in { "vaesimc\t{$src1, $dst|$dst, $src1}", [(set VR128:$dst, (int_x86_aesni_aesimc VR128:$src1))]>, - OpSize, VEX; + VEX; def VAESIMCrm : AES8I<0xDB, MRMSrcMem, (outs VR128:$dst), (ins i128mem:$src1), "vaesimc\t{$src1, $dst|$dst, $src1}", [(set VR128:$dst, (int_x86_aesni_aesimc (loadv2i64 addr:$src1)))]>, - OpSize, VEX; + VEX; } def AESIMCrr : AES8I<0xDB, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src1), "aesimc\t{$src1, $dst|$dst, $src1}", [(set VR128:$dst, - (int_x86_aesni_aesimc VR128:$src1))]>, - OpSize; + (int_x86_aesni_aesimc VR128:$src1))]>; def AESIMCrm : AES8I<0xDB, MRMSrcMem, (outs VR128:$dst), (ins i128mem:$src1), "aesimc\t{$src1, $dst|$dst, $src1}", - [(set VR128:$dst, (int_x86_aesni_aesimc (memopv2i64 addr:$src1)))]>, - OpSize; + [(set VR128:$dst, (int_x86_aesni_aesimc (memopv2i64 addr:$src1)))]>; // AES Round Key Generation Assist let Predicates = [HasAVX, HasAES] in { @@ -7670,26 +7633,24 @@ let Predicates = [HasAVX, HasAES] in { "vaeskeygenassist\t{$src2, $src1, $dst|$dst, $src1, $src2}", [(set VR128:$dst, (int_x86_aesni_aeskeygenassist VR128:$src1, imm:$src2))]>, - OpSize, VEX; + VEX; def VAESKEYGENASSIST128rm : AESAI<0xDF, MRMSrcMem, (outs VR128:$dst), (ins i128mem:$src1, i8imm:$src2), "vaeskeygenassist\t{$src2, $src1, $dst|$dst, $src1, $src2}", [(set VR128:$dst, (int_x86_aesni_aeskeygenassist (loadv2i64 addr:$src1), imm:$src2))]>, - OpSize, VEX; + VEX; } def AESKEYGENASSIST128rr : AESAI<0xDF, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src1, i8imm:$src2), "aeskeygenassist\t{$src2, $src1, $dst|$dst, $src1, $src2}", [(set VR128:$dst, - (int_x86_aesni_aeskeygenassist VR128:$src1, imm:$src2))]>, - OpSize; + (int_x86_aesni_aeskeygenassist VR128:$src1, imm:$src2))]>; def AESKEYGENASSIST128rm : AESAI<0xDF, MRMSrcMem, (outs VR128:$dst), (ins i128mem:$src1, i8imm:$src2), "aeskeygenassist\t{$src2, $src1, $dst|$dst, $src1, $src2}", [(set VR128:$dst, - (int_x86_aesni_aeskeygenassist (memopv2i64 addr:$src1), imm:$src2))]>, - OpSize; + (int_x86_aesni_aeskeygenassist (memopv2i64 addr:$src1), imm:$src2))]>; //===----------------------------------------------------------------------===// // PCLMUL Instructions @@ -7757,12 +7718,12 @@ def EXTRQI : Ii8<0x78, MRM0r, (outs VR128:$dst), (ins VR128:$src, i8imm:$len, i8imm:$idx), "extrq\t{$idx, $len, $src|$src, $len, $idx}", [(set VR128:$dst, (int_x86_sse4a_extrqi VR128:$src, imm:$len, - imm:$idx))]>, TB, OpSize; + imm:$idx))]>, PD; def EXTRQ : I<0x79, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src, VR128:$mask), "extrq\t{$mask, $src|$src, $mask}", [(set VR128:$dst, (int_x86_sse4a_extrq VR128:$src, - VR128:$mask))]>, TB, OpSize; + VR128:$mask))]>, PD; def INSERTQI : Ii8<0x78, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src, VR128:$src2, i8imm:$len, i8imm:$idx), @@ -8149,10 +8110,10 @@ multiclass f16c_ph2ps { def rr : I<0x13, MRMSrcReg, (outs RC:$dst), (ins VR128:$src), "vcvtph2ps\t{$src, $dst|$dst, $src}", [(set RC:$dst, (Int VR128:$src))]>, - T8, OpSize, VEX; + T8PD, VEX; let neverHasSideEffects = 1, mayLoad = 1 in def rm : I<0x13, MRMSrcMem, (outs RC:$dst), (ins x86memop:$src), - "vcvtph2ps\t{$src, $dst|$dst, $src}", []>, T8, OpSize, VEX; + "vcvtph2ps\t{$src, $dst|$dst, $src}", []>, T8PD, VEX; } multiclass f16c_ps2ph { @@ -8160,12 +8121,12 @@ multiclass f16c_ps2ph { (ins RC:$src1, i32i8imm:$src2), "vcvtps2ph\t{$src2, $src1, $dst|$dst, $src1, $src2}", [(set VR128:$dst, (Int RC:$src1, imm:$src2))]>, - TA, OpSize, VEX; + TAPD, VEX; let neverHasSideEffects = 1, mayStore = 1 in def mr : Ii8<0x1D, MRMDestMem, (outs), (ins x86memop:$dst, RC:$src1, i32i8imm:$src2), "vcvtps2ph\t{$src2, $src1, $dst|$dst, $src1, $src2}", []>, - TA, OpSize, VEX; + TAPD, VEX; } let Predicates = [HasF16C] in { diff --git a/llvm/lib/Target/X86/X86InstrShiftRotate.td b/llvm/lib/Target/X86/X86InstrShiftRotate.td index 3be1129eeca1..ebabd493228f 100644 --- a/llvm/lib/Target/X86/X86InstrShiftRotate.td +++ b/llvm/lib/Target/X86/X86InstrShiftRotate.td @@ -908,8 +908,8 @@ let Predicates = [HasBMI2] in { defm SARX64 : bmi_shift<"sarx{q}", GR64, i64mem>, T8XS, VEX_W; defm SHRX32 : bmi_shift<"shrx{l}", GR32, i32mem>, T8XD; defm SHRX64 : bmi_shift<"shrx{q}", GR64, i64mem>, T8XD, VEX_W; - defm SHLX32 : bmi_shift<"shlx{l}", GR32, i32mem>, T8, OpSize; - defm SHLX64 : bmi_shift<"shlx{q}", GR64, i64mem>, T8, OpSize, VEX_W; + defm SHLX32 : bmi_shift<"shlx{l}", GR32, i32mem>, T8PD; + defm SHLX64 : bmi_shift<"shlx{q}", GR64, i64mem>, T8PD, VEX_W; // Prefer RORX which is non-destructive and doesn't update EFLAGS. let AddedComplexity = 10 in { diff --git a/llvm/lib/Target/X86/X86InstrSystem.td b/llvm/lib/Target/X86/X86InstrSystem.td index a559909a723f..8c557e72337d 100644 --- a/llvm/lib/Target/X86/X86InstrSystem.td +++ b/llvm/lib/Target/X86/X86InstrSystem.td @@ -542,10 +542,10 @@ let Predicates = [HasFSGSBase, In64BitMode] in { //===----------------------------------------------------------------------===// // INVPCID Instruction def INVPCID32 : I<0x82, MRMSrcMem, (outs), (ins GR32:$src1, i128mem:$src2), - "invpcid\t{$src2, $src1|$src1, $src2}", []>, OpSize, T8, + "invpcid\t{$src2, $src1|$src1, $src2}", []>, T8PD, Requires<[Not64BitMode]>; def INVPCID64 : I<0x82, MRMSrcMem, (outs), (ins GR64:$src1, i128mem:$src2), - "invpcid\t{$src2, $src1|$src1, $src2}", []>, OpSize, T8, + "invpcid\t{$src2, $src1|$src1, $src2}", []>, T8PD, Requires<[In64BitMode]>; //===----------------------------------------------------------------------===// diff --git a/llvm/lib/Target/X86/X86InstrVMX.td b/llvm/lib/Target/X86/X86InstrVMX.td index 5bf46d146512..25be8a5a9e21 100644 --- a/llvm/lib/Target/X86/X86InstrVMX.td +++ b/llvm/lib/Target/X86/X86InstrVMX.td @@ -17,22 +17,22 @@ // 66 0F 38 80 def INVEPT32 : I<0x80, MRMSrcMem, (outs), (ins GR32:$src1, i128mem:$src2), - "invept\t{$src2, $src1|$src1, $src2}", []>, OpSize, T8, + "invept\t{$src2, $src1|$src1, $src2}", []>, T8PD, Requires<[Not64BitMode]>; def INVEPT64 : I<0x80, MRMSrcMem, (outs), (ins GR64:$src1, i128mem:$src2), - "invept\t{$src2, $src1|$src1, $src2}", []>, OpSize, T8, + "invept\t{$src2, $src1|$src1, $src2}", []>, T8PD, Requires<[In64BitMode]>; // 66 0F 38 81 def INVVPID32 : I<0x81, MRMSrcMem, (outs), (ins GR32:$src1, i128mem:$src2), - "invvpid\t{$src2, $src1|$src1, $src2}", []>, OpSize, T8, + "invvpid\t{$src2, $src1|$src1, $src2}", []>, T8PD, Requires<[Not64BitMode]>; def INVVPID64 : I<0x81, MRMSrcMem, (outs), (ins GR64:$src1, i128mem:$src2), - "invvpid\t{$src2, $src1|$src1, $src2}", []>, OpSize, T8, + "invvpid\t{$src2, $src1|$src1, $src2}", []>, T8PD, Requires<[In64BitMode]>; // 0F 01 C1 def VMCALL : I<0x01, MRM_C1, (outs), (ins), "vmcall", []>, TB; def VMCLEARm : I<0xC7, MRM6m, (outs), (ins i64mem:$vmcs), - "vmclear\t$vmcs", []>, OpSize, TB; + "vmclear\t$vmcs", []>, PD; // OF 01 D4 def VMFUNC : I<0x01, MRM_D4, (outs), (ins), "vmfunc", []>, TB; // 0F 01 C2 diff --git a/llvm/utils/TableGen/X86RecognizableInstr.cpp b/llvm/utils/TableGen/X86RecognizableInstr.cpp index ab97a5f5433c..b88cc529ab87 100644 --- a/llvm/utils/TableGen/X86RecognizableInstr.cpp +++ b/llvm/utils/TableGen/X86RecognizableInstr.cpp @@ -80,7 +80,7 @@ namespace X86Local { XD = 11, XS = 12, T8 = 13, P_TA = 14, A6 = 15, A7 = 16, T8XD = 17, T8XS = 18, TAXD = 19, - XOP8 = 20, XOP9 = 21, XOPA = 22 + XOP8 = 20, XOP9 = 21, XOPA = 22, PD = 23, T8PD = 24, TAPD = 25, }; } @@ -254,7 +254,9 @@ RecognizableInstr::RecognizableInstr(DisassemblerTables &tables, Operands = &insn.Operands.OperandList; - IsSSE = (HasOpSizePrefix && (Name.find("16") == Name.npos)) || + IsSSE = ((HasOpSizePrefix || Prefix == X86Local::PD || + Prefix == X86Local::T8PD || Prefix == X86Local::TAPD) && + (Name.find("16") == Name.npos)) || (Name.find("CRC32") != Name.npos); HasVEX_LPrefix = Rec->getValueAsBit("hasVEX_L"); @@ -309,7 +311,7 @@ InstructionContext RecognizableInstr::insnContext() const { } // VEX_L & VEX_W if (HasVEX_LPrefix && HasVEX_WPrefix) { - if (HasOpSizePrefix) + if (HasOpSizePrefix || Prefix == X86Local::PD) insnContext = EVEX_KB(IC_EVEX_L_W_OPSIZE); else if (Prefix == X86Local::XS || Prefix == X86Local::T8XS) insnContext = EVEX_KB(IC_EVEX_L_W_XS); @@ -320,7 +322,8 @@ InstructionContext RecognizableInstr::insnContext() const { insnContext = EVEX_KB(IC_EVEX_L_W); } else if (HasVEX_LPrefix) { // VEX_L - if (HasOpSizePrefix) + if (HasOpSizePrefix || Prefix == X86Local::PD || + Prefix == X86Local::T8PD || Prefix == X86Local::TAPD) insnContext = EVEX_KB(IC_EVEX_L_OPSIZE); else if (Prefix == X86Local::XS || Prefix == X86Local::T8XS) insnContext = EVEX_KB(IC_EVEX_L_XS); @@ -332,7 +335,8 @@ InstructionContext RecognizableInstr::insnContext() const { } else if (HasEVEX_L2Prefix && HasVEX_WPrefix) { // EVEX_L2 & VEX_W - if (HasOpSizePrefix) + if (HasOpSizePrefix || Prefix == X86Local::PD || + Prefix == X86Local::T8PD || Prefix == X86Local::TAPD) insnContext = EVEX_KB(IC_EVEX_L2_W_OPSIZE); else if (Prefix == X86Local::XS || Prefix == X86Local::T8XS) insnContext = EVEX_KB(IC_EVEX_L2_W_XS); @@ -343,10 +347,11 @@ InstructionContext RecognizableInstr::insnContext() const { insnContext = EVEX_KB(IC_EVEX_L2_W); } else if (HasEVEX_L2Prefix) { // EVEX_L2 - if (HasOpSizePrefix) + if (HasOpSizePrefix || Prefix == X86Local::PD || + Prefix == X86Local::T8PD || Prefix == X86Local::TAPD) insnContext = EVEX_KB(IC_EVEX_L2_OPSIZE); else if (Prefix == X86Local::XD || Prefix == X86Local::T8XD || - Prefix == X86Local::TAXD) + Prefix == X86Local::TAXD) insnContext = EVEX_KB(IC_EVEX_L2_XD); else if (Prefix == X86Local::XS || Prefix == X86Local::T8XS) insnContext = EVEX_KB(IC_EVEX_L2_XS); @@ -355,7 +360,8 @@ InstructionContext RecognizableInstr::insnContext() const { } else if (HasVEX_WPrefix) { // VEX_W - if (HasOpSizePrefix) + if (HasOpSizePrefix || Prefix == X86Local::PD || + Prefix == X86Local::T8PD || Prefix == X86Local::TAPD) insnContext = EVEX_KB(IC_EVEX_W_OPSIZE); else if (Prefix == X86Local::XS || Prefix == X86Local::T8XS) insnContext = EVEX_KB(IC_EVEX_W_XS); @@ -366,7 +372,8 @@ InstructionContext RecognizableInstr::insnContext() const { insnContext = EVEX_KB(IC_EVEX_W); } // No L, no W - else if (HasOpSizePrefix) + else if (HasOpSizePrefix || Prefix == X86Local::PD || + Prefix == X86Local::T8PD || Prefix == X86Local::TAPD) insnContext = EVEX_KB(IC_EVEX_OPSIZE); else if (Prefix == X86Local::XD || Prefix == X86Local::T8XD || Prefix == X86Local::TAXD) @@ -378,7 +385,8 @@ InstructionContext RecognizableInstr::insnContext() const { /// eof EVEX } else if (HasVEX_4VPrefix || HasVEX_4VOp3Prefix|| HasVEXPrefix) { if (HasVEX_LPrefix && HasVEX_WPrefix) { - if (HasOpSizePrefix) + if (HasOpSizePrefix || Prefix == X86Local::PD || + Prefix == X86Local::T8PD || Prefix == X86Local::TAPD) insnContext = IC_VEX_L_W_OPSIZE; else if (Prefix == X86Local::XS || Prefix == X86Local::T8XS) insnContext = IC_VEX_L_W_XS; @@ -387,11 +395,16 @@ InstructionContext RecognizableInstr::insnContext() const { insnContext = IC_VEX_L_W_XD; else insnContext = IC_VEX_L_W; - } else if (HasOpSizePrefix && HasVEX_LPrefix) + } else if ((HasOpSizePrefix || Prefix == X86Local::PD || + Prefix == X86Local::T8PD || Prefix == X86Local::TAPD) && + HasVEX_LPrefix) insnContext = IC_VEX_L_OPSIZE; - else if (HasOpSizePrefix && HasVEX_WPrefix) + else if ((HasOpSizePrefix || Prefix == X86Local::PD || + Prefix == X86Local::T8PD || Prefix == X86Local::TAPD) && + HasVEX_WPrefix) insnContext = IC_VEX_W_OPSIZE; - else if (HasOpSizePrefix) + else if (HasOpSizePrefix || Prefix == X86Local::PD || + Prefix == X86Local::T8PD || Prefix == X86Local::TAPD) insnContext = IC_VEX_OPSIZE; else if (HasVEX_LPrefix && (Prefix == X86Local::XS || Prefix == X86Local::T8XS)) @@ -419,7 +432,8 @@ InstructionContext RecognizableInstr::insnContext() const { else insnContext = IC_VEX; } else if (Is64Bit || HasREX_WPrefix) { - if (HasREX_WPrefix && HasOpSizePrefix) + if (HasREX_WPrefix && (HasOpSizePrefix || Prefix == X86Local::PD || + Prefix == X86Local::T8PD || Prefix == X86Local::TAPD)) insnContext = IC_64BIT_REXW_OPSIZE; else if (HasOpSizePrefix && (Prefix == X86Local::XD || Prefix == X86Local::T8XD || @@ -428,7 +442,8 @@ InstructionContext RecognizableInstr::insnContext() const { else if (HasOpSizePrefix && (Prefix == X86Local::XS || Prefix == X86Local::T8XS)) insnContext = IC_64BIT_XS_OPSIZE; - else if (HasOpSizePrefix) + else if (HasOpSizePrefix || Prefix == X86Local::PD || + Prefix == X86Local::T8PD || Prefix == X86Local::TAPD) insnContext = IC_64BIT_OPSIZE; else if (HasAdSizePrefix) insnContext = IC_64BIT_ADSIZE; @@ -458,7 +473,8 @@ InstructionContext RecognizableInstr::insnContext() const { insnContext = IC_XS_OPSIZE; else if (HasOpSizePrefix && HasAdSizePrefix) insnContext = IC_OPSIZE_ADSIZE; - else if (HasOpSizePrefix) + else if (HasOpSizePrefix || Prefix == X86Local::PD || + Prefix == X86Local::T8PD || Prefix == X86Local::TAPD) insnContext = IC_OPSIZE; else if (HasAdSizePrefix) insnContext = IC_ADSIZE; @@ -851,7 +867,8 @@ void RecognizableInstr::emitDecodePath(DisassemblerTables &tables) const { switch (Prefix) { default: llvm_unreachable("Invalid prefix!"); - // Extended two-byte opcodes can start with f2 0f, f3 0f, or 0f + // Extended two-byte opcodes can start with 66 0f, f2 0f, f3 0f, or 0f + case X86Local::PD: case X86Local::XD: case X86Local::XS: case X86Local::TB: @@ -897,6 +914,7 @@ void RecognizableInstr::emitDecodePath(DisassemblerTables &tables) const { opcodeToSet = Opcode; break; case X86Local::T8: + case X86Local::T8PD: case X86Local::T8XD: case X86Local::T8XS: opcodeType = THREEBYTE_38; @@ -940,6 +958,7 @@ void RecognizableInstr::emitDecodePath(DisassemblerTables &tables) const { opcodeToSet = Opcode; break; case X86Local::P_TA: + case X86Local::TAPD: case X86Local::TAXD: opcodeType = THREEBYTE_3A; if (needsModRMForDecode(Form))