diff --git a/llvm/lib/Target/X86/AsmParser/X86AsmParser.cpp b/llvm/lib/Target/X86/AsmParser/X86AsmParser.cpp index a259c9632032..a11a238fc976 100644 --- a/llvm/lib/Target/X86/AsmParser/X86AsmParser.cpp +++ b/llvm/lib/Target/X86/AsmParser/X86AsmParser.cpp @@ -1666,6 +1666,8 @@ bool X86AsmParser::HandleAVX512Operand(OperandVector &Operands, // Recognize only reasonable suffixes. const char *BroadcastPrimitive = StringSwitch(getLexer().getTok().getIdentifier()) + .Case("to2", "{1to2}") + .Case("to4", "{1to4}") .Case("to8", "{1to8}") .Case("to16", "{1to16}") .Default(nullptr); diff --git a/llvm/lib/Target/X86/Disassembler/X86DisassemblerDecoderCommon.h b/llvm/lib/Target/X86/Disassembler/X86DisassemblerDecoderCommon.h index 4baaf1e68736..13a7b557b440 100644 --- a/llvm/lib/Target/X86/Disassembler/X86DisassemblerDecoderCommon.h +++ b/llvm/lib/Target/X86/Disassembler/X86DisassemblerDecoderCommon.h @@ -265,7 +265,7 @@ enum attributeBits { ENUM_ENTRY(IC_EVEX_L2_W_KZ, 3, "requires EVEX_KZ, L2 and W") \ ENUM_ENTRY(IC_EVEX_L2_W_XS_KZ, 4, "requires EVEX_KZ, L2, W and XS prefix") \ ENUM_ENTRY(IC_EVEX_L2_W_XD_KZ, 4, "requires EVEX_KZ, L2, W and XD prefix") \ - ENUM_ENTRY(IC_EVEX_L2_W_OPSIZE_KZ, 4, "requires EVEX_KZ, L2, W and OpSize") + ENUM_ENTRY(IC_EVEX_L2_W_OPSIZE_KZ, 4, "requires EVEX_KZ, L2, W and OpSize") #define ENUM_ENTRY(n, r, d) n, enum InstructionContext { @@ -453,8 +453,12 @@ enum OperandEncoding { ENUM_ENTRY(TYPE_XMM256, "32-byte") \ ENUM_ENTRY(TYPE_XMM512, "64-byte") \ ENUM_ENTRY(TYPE_VK1, "1-bit") \ + ENUM_ENTRY(TYPE_VK2, "2-bit") \ + ENUM_ENTRY(TYPE_VK4, "4-bit") \ ENUM_ENTRY(TYPE_VK8, "8-bit") \ ENUM_ENTRY(TYPE_VK16, "16-bit") \ + ENUM_ENTRY(TYPE_VK32, "32-bit") \ + ENUM_ENTRY(TYPE_VK64, "64-bit") \ ENUM_ENTRY(TYPE_XMM0, "Implicit use of XMM0") \ ENUM_ENTRY(TYPE_SEGMENTREG, "Segment register operand") \ ENUM_ENTRY(TYPE_DEBUGREG, "Debug register operand") \ diff --git a/llvm/lib/Target/X86/X86.td b/llvm/lib/Target/X86/X86.td index 93f516a151a9..cd32a0f24234 100644 --- a/llvm/lib/Target/X86/X86.td +++ b/llvm/lib/Target/X86/X86.td @@ -104,7 +104,15 @@ def FeatureCDI : SubtargetFeature<"avx512cd", "HasCDI", "true", def FeaturePFI : SubtargetFeature<"avx512pf", "HasPFI", "true", "Enable AVX-512 PreFetch Instructions", [FeatureAVX512]>; - +def FeatureDQI : SubtargetFeature<"avx512dq", "HasDQI", "true", + "Enable AVX-512 Doubleword and Quadword Instructions", + [FeatureAVX512]>; +def FeatureBWI : SubtargetFeature<"avx512bw", "HasBWI", "true", + "Enable AVX-512 Byte and Word Instructions", + [FeatureAVX512]>; +def FeatureVLX : SubtargetFeature<"avx512vl", "HasVLX", "true", + "Enable AVX-512 Vector Length eXtensions", + [FeatureAVX512]>; def FeaturePCLMUL : SubtargetFeature<"pclmul", "HasPCLMUL", "true", "Enable packed carry-less multiplication instructions", [FeatureSSE2]>; @@ -276,6 +284,17 @@ def : ProcessorModel<"knl", HaswellModel, FeatureBMI2, FeatureFMA, FeatureRTM, FeatureHLE, FeatureSlowIncDec]>; +// SKX +// FIXME: define SKX model +def : ProcessorModel<"skx", HaswellModel, + [FeatureAVX512, FeatureCDI, + FeatureDQI, FeatureBWI, FeatureVLX, + FeatureCMPXCHG16B, FeatureFastUAMem, FeaturePOPCNT, + FeatureAES, FeaturePCLMUL, FeatureRDRAND, FeatureF16C, + FeatureFSGSBase, FeatureMOVBE, FeatureLZCNT, FeatureBMI, + FeatureBMI2, FeatureFMA, FeatureRTM, FeatureHLE, + FeatureSlowIncDec]>; + def : Proc<"k6", [FeatureMMX]>; def : Proc<"k6-2", [Feature3DNow]>; def : Proc<"k6-3", [Feature3DNow]>; diff --git a/llvm/lib/Target/X86/X86CallingConv.td b/llvm/lib/Target/X86/X86CallingConv.td index 0824d4ed660b..86c01bd64647 100644 --- a/llvm/lib/Target/X86/X86CallingConv.td +++ b/llvm/lib/Target/X86/X86CallingConv.td @@ -52,7 +52,7 @@ def RetCC_X86Common : CallingConv<[ // 512-bit vectors are returned in ZMM0 and ZMM1, when they fit. ZMM2 and ZMM3 // can only be used by ABI non-compliant code. This vector type is only // supported while using the AVX-512 target feature. - CCIfType<[v16i32, v8i64, v16f32, v8f64], + CCIfType<[v64i8, v32i16, v16i32, v8i64, v16f32, v8f64], CCAssignToReg<[ZMM0,ZMM1,ZMM2,ZMM3]>>, // MMX vector types are always returned in MM0. If the target doesn't have @@ -252,7 +252,7 @@ def CC_X86_64_C : CallingConv<[ YMM4, YMM5, YMM6, YMM7]>>>>, // The first 8 512-bit vector arguments are passed in ZMM registers. - CCIfNotVarArg>>>, diff --git a/llvm/lib/Target/X86/X86InstrAVX512.td b/llvm/lib/Target/X86/X86InstrAVX512.td index 41e900ed11a4..d2894088b80f 100644 --- a/llvm/lib/Target/X86/X86InstrAVX512.td +++ b/llvm/lib/Target/X86/X86InstrAVX512.td @@ -1,19 +1,36 @@ // Bitcasts between 512-bit vector types. Return the original type since // no instruction is needed for the conversion let Predicates = [HasAVX512] in { - def : Pat<(v8f64 (bitconvert (v16f32 VR512:$src))), (v8f64 VR512:$src)>; + def : Pat<(v8f64 (bitconvert (v8i64 VR512:$src))), (v8f64 VR512:$src)>; def : Pat<(v8f64 (bitconvert (v16i32 VR512:$src))), (v8f64 VR512:$src)>; - def : Pat<(v8f64 (bitconvert (v8i64 VR512:$src))), (v8f64 VR512:$src)>; - def : Pat<(v16f32 (bitconvert (v16i32 VR512:$src))), (v16f32 VR512:$src)>; + def : Pat<(v8f64 (bitconvert (v32i16 VR512:$src))), (v8f64 VR512:$src)>; + def : Pat<(v8f64 (bitconvert (v64i8 VR512:$src))), (v8f64 VR512:$src)>; + def : Pat<(v8f64 (bitconvert (v16f32 VR512:$src))), (v8f64 VR512:$src)>; def : Pat<(v16f32 (bitconvert (v8i64 VR512:$src))), (v16f32 VR512:$src)>; + def : Pat<(v16f32 (bitconvert (v16i32 VR512:$src))), (v16f32 VR512:$src)>; + def : Pat<(v16f32 (bitconvert (v32i16 VR512:$src))), (v16f32 VR512:$src)>; + def : Pat<(v16f32 (bitconvert (v64i8 VR512:$src))), (v16f32 VR512:$src)>; def : Pat<(v16f32 (bitconvert (v8f64 VR512:$src))), (v16f32 VR512:$src)>; - def : Pat<(v8i64 (bitconvert (v16f32 VR512:$src))), (v8i64 VR512:$src)>; def : Pat<(v8i64 (bitconvert (v16i32 VR512:$src))), (v8i64 VR512:$src)>; + def : Pat<(v8i64 (bitconvert (v32i16 VR512:$src))), (v8i64 VR512:$src)>; + def : Pat<(v8i64 (bitconvert (v64i8 VR512:$src))), (v8i64 VR512:$src)>; def : Pat<(v8i64 (bitconvert (v8f64 VR512:$src))), (v8i64 VR512:$src)>; - def : Pat<(v16i32 (bitconvert (v16f32 VR512:$src))), (v16i32 VR512:$src)>; - def : Pat<(v16i32 (bitconvert (v8i64 VR512:$src))), (v16i32 VR512:$src)>; + def : Pat<(v8i64 (bitconvert (v16f32 VR512:$src))), (v8i64 VR512:$src)>; + def : Pat<(v16i32 (bitconvert (v8i64 VR512:$src))), (v16i32 VR512:$src)>; + def : Pat<(v16i32 (bitconvert (v32i16 VR512:$src))), (v16i32 VR512:$src)>; + def : Pat<(v16i32 (bitconvert (v64i8 VR512:$src))), (v16i32 VR512:$src)>; def : Pat<(v16i32 (bitconvert (v8f64 VR512:$src))), (v16i32 VR512:$src)>; - def : Pat<(v8f64 (bitconvert (v8i64 VR512:$src))), (v8f64 VR512:$src)>; + def : Pat<(v32i16 (bitconvert (v8i64 VR512:$src))), (v32i16 VR512:$src)>; + def : Pat<(v32i16 (bitconvert (v16i32 VR512:$src))), (v32i16 VR512:$src)>; + def : Pat<(v32i16 (bitconvert (v64i8 VR512:$src))), (v32i16 VR512:$src)>; + def : Pat<(v32i16 (bitconvert (v8f64 VR512:$src))), (v32i16 VR512:$src)>; + def : Pat<(v32i16 (bitconvert (v16f32 VR512:$src))), (v32i16 VR512:$src)>; + def : Pat<(v32i16 (bitconvert (v16f32 VR512:$src))), (v32i16 VR512:$src)>; + def : Pat<(v64i8 (bitconvert (v8i64 VR512:$src))), (v64i8 VR512:$src)>; + def : Pat<(v64i8 (bitconvert (v16i32 VR512:$src))), (v64i8 VR512:$src)>; + def : Pat<(v64i8 (bitconvert (v32i16 VR512:$src))), (v64i8 VR512:$src)>; + def : Pat<(v64i8 (bitconvert (v8f64 VR512:$src))), (v64i8 VR512:$src)>; + def : Pat<(v64i8 (bitconvert (v16f32 VR512:$src))), (v64i8 VR512:$src)>; def : Pat<(v2i64 (bitconvert (v4i32 VR128X:$src))), (v2i64 VR128X:$src)>; def : Pat<(v2i64 (bitconvert (v8i16 VR128X:$src))), (v2i64 VR128X:$src)>; @@ -135,7 +152,6 @@ def VINSERTI32x4rm : AVX512AIi8<0x38, MRMSrcMem, (outs VR512:$dst), (ins VR512:$src1, i128mem:$src2, i8imm:$src3), "vinserti32x4\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}", []>, EVEX_4V, EVEX_V512, EVEX_CD8<32, CD8VT4>; - } let hasSideEffects = 0 in { diff --git a/llvm/lib/Target/X86/X86InstrFormats.td b/llvm/lib/Target/X86/X86InstrFormats.td index ab6d2885fb3a..8ef5f901c185 100644 --- a/llvm/lib/Target/X86/X86InstrFormats.td +++ b/llvm/lib/Target/X86/X86InstrFormats.td @@ -184,6 +184,8 @@ class EVEX_KZ : EVEX_K { bit hasEVEX_Z = 1; } class EVEX_B { bit hasEVEX_B = 1; } class EVEX_RC { bit hasEVEX_RC = 1; } class EVEX_V512 { bit hasEVEX_L2 = 1; bit hasVEX_L = 0; } +class EVEX_V256 { bit hasEVEX_L2 = 0; bit hasVEX_L = 1; } +class EVEX_V128 { bit hasEVEX_L2 = 0; bit hasVEX_L = 0; } // Specify AVX512 8-bit compressed displacement encoding based on the vector // element size in bits (8, 16, 32, 64) and the CDisp8 form. diff --git a/llvm/lib/Target/X86/X86InstrInfo.td b/llvm/lib/Target/X86/X86InstrInfo.td index e7b532c6af81..0f872a676c25 100644 --- a/llvm/lib/Target/X86/X86InstrInfo.td +++ b/llvm/lib/Target/X86/X86InstrInfo.td @@ -719,10 +719,14 @@ def HasAVX512 : Predicate<"Subtarget->hasAVX512()">, AssemblerPredicate<"FeatureAVX512", "AVX-512 ISA">; def UseAVX : Predicate<"Subtarget->hasAVX() && !Subtarget->hasAVX512()">; def UseAVX2 : Predicate<"Subtarget->hasAVX2() && !Subtarget->hasAVX512()">; -def NoAVX512 : Predicate<"!Subtarget->hasAVX512()">; +def NoAVX512 : Predicate<"!Subtarget->hasAVX512()">; def HasCDI : Predicate<"Subtarget->hasCDI()">; def HasPFI : Predicate<"Subtarget->hasPFI()">; def HasERI : Predicate<"Subtarget->hasERI()">; +def HasDQI : Predicate<"Subtarget->hasDQI()">; +def HasBWI : Predicate<"Subtarget->hasBWI()">; +def HasVLX : Predicate<"Subtarget->hasVLX()">, + AssemblerPredicate<"FeatureVLX", "AVX-512 VLX ISA">; def HasPOPCNT : Predicate<"Subtarget->hasPOPCNT()">; def HasAES : Predicate<"Subtarget->hasAES()">; diff --git a/llvm/lib/Target/X86/X86RegisterInfo.td b/llvm/lib/Target/X86/X86RegisterInfo.td index 33c402b69a4a..0da98637496e 100644 --- a/llvm/lib/Target/X86/X86RegisterInfo.td +++ b/llvm/lib/Target/X86/X86RegisterInfo.td @@ -449,7 +449,7 @@ def FPCCR : RegisterClass<"X86", [i16], 16, (add FPSW)> { } // AVX-512 vector/mask registers. -def VR512 : RegisterClass<"X86", [v16f32, v8f64, v16i32, v8i64], 512, +def VR512 : RegisterClass<"X86", [v16f32, v8f64, v64i8, v32i16, v16i32, v8i64], 512, (sequence "ZMM%u", 0, 31)>; // Scalar AVX-512 floating point registers. @@ -463,13 +463,19 @@ def VR128X : RegisterClass<"X86", [v16i8, v8i16, v4i32, v2i64, v4f32, v2f64], def VR256X : RegisterClass<"X86", [v32i8, v16i16, v8i32, v4i64, v8f32, v4f64], 256, (sequence "YMM%u", 0, 31)>; -// The size of the all masked registers is 16 bit because we have only one -// KMOVW istruction that can store this register in memory, and it writes 2 bytes -def VK1 : RegisterClass<"X86", [i1], 16, (sequence "K%u", 0, 7)>; -def VK8 : RegisterClass<"X86", [v8i1], 16, (add VK1)> {let Size = 16;} +// Mask registers +def VK1 : RegisterClass<"X86", [i1], 16, (sequence "K%u", 0, 7)> {let Size = 16;} +def VK2 : RegisterClass<"X86", [v2i1], 16, (add VK1)> {let Size = 16;} +def VK4 : RegisterClass<"X86", [v4i1], 16, (add VK2)> {let Size = 16;} +def VK8 : RegisterClass<"X86", [v8i1], 16, (add VK4)> {let Size = 16;} def VK16 : RegisterClass<"X86", [v16i1], 16, (add VK8)> {let Size = 16;} +def VK32 : RegisterClass<"X86", [v32i1], 32, (add VK16)> {let Size = 32;} +def VK64 : RegisterClass<"X86", [v64i1], 64, (add VK32)> {let Size = 64;} def VK1WM : RegisterClass<"X86", [i1], 16, (sub VK1, K0)> {let Size = 16;} +def VK2WM : RegisterClass<"X86", [v2i1], 16, (sub VK2, K0)> {let Size = 16;} +def VK4WM : RegisterClass<"X86", [v4i1], 16, (sub VK4, K0)> {let Size = 16;} def VK8WM : RegisterClass<"X86", [v8i1], 16, (sub VK8, K0)> {let Size = 16;} -def VK16WM : RegisterClass<"X86", [v16i1], 16, (add VK8WM)>; - +def VK16WM : RegisterClass<"X86", [v16i1], 16, (add VK8WM)> {let Size = 16;} +def VK32WM : RegisterClass<"X86", [v32i1], 32, (add VK16WM)> {let Size = 32;} +def VK64WM : RegisterClass<"X86", [v64i1], 64, (add VK32WM)> {let Size = 64;} diff --git a/llvm/lib/Target/X86/X86Subtarget.cpp b/llvm/lib/Target/X86/X86Subtarget.cpp index d1b71c48a92a..41551a1d677f 100644 --- a/llvm/lib/Target/X86/X86Subtarget.cpp +++ b/llvm/lib/Target/X86/X86Subtarget.cpp @@ -272,6 +272,9 @@ void X86Subtarget::initializeEnvironment() { HasERI = false; HasCDI = false; HasPFI = false; + HasDQI = false; + HasBWI = false; + HasVLX = false; HasADX = false; HasSHA = false; HasPRFCHW = false; diff --git a/llvm/lib/Target/X86/X86Subtarget.h b/llvm/lib/Target/X86/X86Subtarget.h index 30c3b08a890e..5f5df5e0818c 100644 --- a/llvm/lib/Target/X86/X86Subtarget.h +++ b/llvm/lib/Target/X86/X86Subtarget.h @@ -189,13 +189,22 @@ protected: /// Processor has AVX-512 PreFetch Instructions bool HasPFI; - + /// Processor has AVX-512 Exponential and Reciprocal Instructions bool HasERI; - + /// Processor has AVX-512 Conflict Detection Instructions bool HasCDI; - + + /// Processor has AVX-512 Doubleword and Quadword instructions + bool HasDQI; + + /// Processor has AVX-512 Byte and Word instructions + bool HasBWI; + + /// Processor has AVX-512 Vector Length eXtenstions + bool HasVLX; + /// stackAlignment - The minimum alignment known to hold of the stack frame on /// entry to the function and which must be maintained by every function. unsigned stackAlignment; @@ -349,6 +358,9 @@ public: bool hasCDI() const { return HasCDI; } bool hasPFI() const { return HasPFI; } bool hasERI() const { return HasERI; } + bool hasDQI() const { return HasDQI; } + bool hasBWI() const { return HasBWI; } + bool hasVLX() const { return HasVLX; } bool isAtom() const { return X86ProcFamily == IntelAtom; } bool isSLM() const { return X86ProcFamily == IntelSLM; } diff --git a/llvm/utils/TableGen/X86RecognizableInstr.cpp b/llvm/utils/TableGen/X86RecognizableInstr.cpp index 4137a57b00a4..b7bd822d31d6 100644 --- a/llvm/utils/TableGen/X86RecognizableInstr.cpp +++ b/llvm/utils/TableGen/X86RecognizableInstr.cpp @@ -975,10 +975,18 @@ OperandType RecognizableInstr::typeFromString(const std::string &s, TYPE("VR512", TYPE_XMM512) TYPE("VK1", TYPE_VK1) TYPE("VK1WM", TYPE_VK1) + TYPE("VK2", TYPE_VK2) + TYPE("VK2WM", TYPE_VK2) + TYPE("VK4", TYPE_VK4) + TYPE("VK4WM", TYPE_VK4) TYPE("VK8", TYPE_VK8) TYPE("VK8WM", TYPE_VK8) TYPE("VK16", TYPE_VK16) TYPE("VK16WM", TYPE_VK16) + TYPE("VK32", TYPE_VK32) + TYPE("VK32WM", TYPE_VK32) + TYPE("VK64", TYPE_VK64) + TYPE("VK64WM", TYPE_VK64) TYPE("GR16_NOAX", TYPE_Rv) TYPE("GR32_NOAX", TYPE_Rv) TYPE("GR64_NOAX", TYPE_R64) @@ -1101,6 +1109,8 @@ RecognizableInstr::vvvvRegisterEncodingFromString(const std::string &s, ENCODING("VR256X", ENCODING_VVVV) ENCODING("VR512", ENCODING_VVVV) ENCODING("VK1", ENCODING_VVVV) + ENCODING("VK2", ENCODING_VVVV) + ENCODING("VK4", ENCODING_VVVV) ENCODING("VK8", ENCODING_VVVV) ENCODING("VK16", ENCODING_VVVV) errs() << "Unhandled VEX.vvvv register encoding " << s << "\n"; @@ -1111,8 +1121,12 @@ OperandEncoding RecognizableInstr::writemaskRegisterEncodingFromString(const std::string &s, uint8_t OpSize) { ENCODING("VK1WM", ENCODING_WRITEMASK) + ENCODING("VK2WM", ENCODING_WRITEMASK) + ENCODING("VK4WM", ENCODING_WRITEMASK) ENCODING("VK8WM", ENCODING_WRITEMASK) ENCODING("VK16WM", ENCODING_WRITEMASK) + ENCODING("VK32WM", ENCODING_WRITEMASK) + ENCODING("VK64WM", ENCODING_WRITEMASK) errs() << "Unhandled mask register encoding " << s << "\n"; llvm_unreachable("Unhandled mask register encoding"); }