forked from OSchip/llvm-project
[SKX] Enabling SKX target and AVX512BW, AVX512DQ, AVX512VL features.
Enabling HasAVX512{DQ,BW,VL} predicates. Adding VK2, VK4, VK32, VK64 masked register classes. Adding new types (v64i8, v32i16) to VR512. Extending calling conventions for new types (v64i8, v32i16) Patch by Zinovy Nis <zinovy.y.nis@intel.com> Reviewed by Elena Demikhovsky <elena.demikhovsky@intel.com> llvm-svn: 213545
This commit is contained in:
parent
32411403b2
commit
bfa0131365
|
@ -1666,6 +1666,8 @@ bool X86AsmParser::HandleAVX512Operand(OperandVector &Operands,
|
|||
// Recognize only reasonable suffixes.
|
||||
const char *BroadcastPrimitive =
|
||||
StringSwitch<const char*>(getLexer().getTok().getIdentifier())
|
||||
.Case("to2", "{1to2}")
|
||||
.Case("to4", "{1to4}")
|
||||
.Case("to8", "{1to8}")
|
||||
.Case("to16", "{1to16}")
|
||||
.Default(nullptr);
|
||||
|
|
|
@ -265,7 +265,7 @@ enum attributeBits {
|
|||
ENUM_ENTRY(IC_EVEX_L2_W_KZ, 3, "requires EVEX_KZ, L2 and W") \
|
||||
ENUM_ENTRY(IC_EVEX_L2_W_XS_KZ, 4, "requires EVEX_KZ, L2, W and XS prefix") \
|
||||
ENUM_ENTRY(IC_EVEX_L2_W_XD_KZ, 4, "requires EVEX_KZ, L2, W and XD prefix") \
|
||||
ENUM_ENTRY(IC_EVEX_L2_W_OPSIZE_KZ, 4, "requires EVEX_KZ, L2, W and OpSize")
|
||||
ENUM_ENTRY(IC_EVEX_L2_W_OPSIZE_KZ, 4, "requires EVEX_KZ, L2, W and OpSize")
|
||||
|
||||
#define ENUM_ENTRY(n, r, d) n,
|
||||
enum InstructionContext {
|
||||
|
@ -453,8 +453,12 @@ enum OperandEncoding {
|
|||
ENUM_ENTRY(TYPE_XMM256, "32-byte") \
|
||||
ENUM_ENTRY(TYPE_XMM512, "64-byte") \
|
||||
ENUM_ENTRY(TYPE_VK1, "1-bit") \
|
||||
ENUM_ENTRY(TYPE_VK2, "2-bit") \
|
||||
ENUM_ENTRY(TYPE_VK4, "4-bit") \
|
||||
ENUM_ENTRY(TYPE_VK8, "8-bit") \
|
||||
ENUM_ENTRY(TYPE_VK16, "16-bit") \
|
||||
ENUM_ENTRY(TYPE_VK32, "32-bit") \
|
||||
ENUM_ENTRY(TYPE_VK64, "64-bit") \
|
||||
ENUM_ENTRY(TYPE_XMM0, "Implicit use of XMM0") \
|
||||
ENUM_ENTRY(TYPE_SEGMENTREG, "Segment register operand") \
|
||||
ENUM_ENTRY(TYPE_DEBUGREG, "Debug register operand") \
|
||||
|
|
|
@ -104,7 +104,15 @@ def FeatureCDI : SubtargetFeature<"avx512cd", "HasCDI", "true",
|
|||
def FeaturePFI : SubtargetFeature<"avx512pf", "HasPFI", "true",
|
||||
"Enable AVX-512 PreFetch Instructions",
|
||||
[FeatureAVX512]>;
|
||||
|
||||
def FeatureDQI : SubtargetFeature<"avx512dq", "HasDQI", "true",
|
||||
"Enable AVX-512 Doubleword and Quadword Instructions",
|
||||
[FeatureAVX512]>;
|
||||
def FeatureBWI : SubtargetFeature<"avx512bw", "HasBWI", "true",
|
||||
"Enable AVX-512 Byte and Word Instructions",
|
||||
[FeatureAVX512]>;
|
||||
def FeatureVLX : SubtargetFeature<"avx512vl", "HasVLX", "true",
|
||||
"Enable AVX-512 Vector Length eXtensions",
|
||||
[FeatureAVX512]>;
|
||||
def FeaturePCLMUL : SubtargetFeature<"pclmul", "HasPCLMUL", "true",
|
||||
"Enable packed carry-less multiplication instructions",
|
||||
[FeatureSSE2]>;
|
||||
|
@ -276,6 +284,17 @@ def : ProcessorModel<"knl", HaswellModel,
|
|||
FeatureBMI2, FeatureFMA, FeatureRTM, FeatureHLE,
|
||||
FeatureSlowIncDec]>;
|
||||
|
||||
// SKX
|
||||
// FIXME: define SKX model
|
||||
def : ProcessorModel<"skx", HaswellModel,
|
||||
[FeatureAVX512, FeatureCDI,
|
||||
FeatureDQI, FeatureBWI, FeatureVLX,
|
||||
FeatureCMPXCHG16B, FeatureFastUAMem, FeaturePOPCNT,
|
||||
FeatureAES, FeaturePCLMUL, FeatureRDRAND, FeatureF16C,
|
||||
FeatureFSGSBase, FeatureMOVBE, FeatureLZCNT, FeatureBMI,
|
||||
FeatureBMI2, FeatureFMA, FeatureRTM, FeatureHLE,
|
||||
FeatureSlowIncDec]>;
|
||||
|
||||
def : Proc<"k6", [FeatureMMX]>;
|
||||
def : Proc<"k6-2", [Feature3DNow]>;
|
||||
def : Proc<"k6-3", [Feature3DNow]>;
|
||||
|
|
|
@ -52,7 +52,7 @@ def RetCC_X86Common : CallingConv<[
|
|||
// 512-bit vectors are returned in ZMM0 and ZMM1, when they fit. ZMM2 and ZMM3
|
||||
// can only be used by ABI non-compliant code. This vector type is only
|
||||
// supported while using the AVX-512 target feature.
|
||||
CCIfType<[v16i32, v8i64, v16f32, v8f64],
|
||||
CCIfType<[v64i8, v32i16, v16i32, v8i64, v16f32, v8f64],
|
||||
CCAssignToReg<[ZMM0,ZMM1,ZMM2,ZMM3]>>,
|
||||
|
||||
// MMX vector types are always returned in MM0. If the target doesn't have
|
||||
|
@ -252,7 +252,7 @@ def CC_X86_64_C : CallingConv<[
|
|||
YMM4, YMM5, YMM6, YMM7]>>>>,
|
||||
|
||||
// The first 8 512-bit vector arguments are passed in ZMM registers.
|
||||
CCIfNotVarArg<CCIfType<[v16i32, v8i64, v16f32, v8f64],
|
||||
CCIfNotVarArg<CCIfType<[v64i8, v32i16, v16i32, v8i64, v16f32, v8f64],
|
||||
CCIfSubtarget<"hasAVX512()",
|
||||
CCAssignToReg<[ZMM0, ZMM1, ZMM2, ZMM3, ZMM4, ZMM5, ZMM6, ZMM7]>>>>,
|
||||
|
||||
|
|
|
@ -1,19 +1,36 @@
|
|||
// Bitcasts between 512-bit vector types. Return the original type since
|
||||
// no instruction is needed for the conversion
|
||||
let Predicates = [HasAVX512] in {
|
||||
def : Pat<(v8f64 (bitconvert (v16f32 VR512:$src))), (v8f64 VR512:$src)>;
|
||||
def : Pat<(v8f64 (bitconvert (v8i64 VR512:$src))), (v8f64 VR512:$src)>;
|
||||
def : Pat<(v8f64 (bitconvert (v16i32 VR512:$src))), (v8f64 VR512:$src)>;
|
||||
def : Pat<(v8f64 (bitconvert (v8i64 VR512:$src))), (v8f64 VR512:$src)>;
|
||||
def : Pat<(v16f32 (bitconvert (v16i32 VR512:$src))), (v16f32 VR512:$src)>;
|
||||
def : Pat<(v8f64 (bitconvert (v32i16 VR512:$src))), (v8f64 VR512:$src)>;
|
||||
def : Pat<(v8f64 (bitconvert (v64i8 VR512:$src))), (v8f64 VR512:$src)>;
|
||||
def : Pat<(v8f64 (bitconvert (v16f32 VR512:$src))), (v8f64 VR512:$src)>;
|
||||
def : Pat<(v16f32 (bitconvert (v8i64 VR512:$src))), (v16f32 VR512:$src)>;
|
||||
def : Pat<(v16f32 (bitconvert (v16i32 VR512:$src))), (v16f32 VR512:$src)>;
|
||||
def : Pat<(v16f32 (bitconvert (v32i16 VR512:$src))), (v16f32 VR512:$src)>;
|
||||
def : Pat<(v16f32 (bitconvert (v64i8 VR512:$src))), (v16f32 VR512:$src)>;
|
||||
def : Pat<(v16f32 (bitconvert (v8f64 VR512:$src))), (v16f32 VR512:$src)>;
|
||||
def : Pat<(v8i64 (bitconvert (v16f32 VR512:$src))), (v8i64 VR512:$src)>;
|
||||
def : Pat<(v8i64 (bitconvert (v16i32 VR512:$src))), (v8i64 VR512:$src)>;
|
||||
def : Pat<(v8i64 (bitconvert (v32i16 VR512:$src))), (v8i64 VR512:$src)>;
|
||||
def : Pat<(v8i64 (bitconvert (v64i8 VR512:$src))), (v8i64 VR512:$src)>;
|
||||
def : Pat<(v8i64 (bitconvert (v8f64 VR512:$src))), (v8i64 VR512:$src)>;
|
||||
def : Pat<(v16i32 (bitconvert (v16f32 VR512:$src))), (v16i32 VR512:$src)>;
|
||||
def : Pat<(v16i32 (bitconvert (v8i64 VR512:$src))), (v16i32 VR512:$src)>;
|
||||
def : Pat<(v8i64 (bitconvert (v16f32 VR512:$src))), (v8i64 VR512:$src)>;
|
||||
def : Pat<(v16i32 (bitconvert (v8i64 VR512:$src))), (v16i32 VR512:$src)>;
|
||||
def : Pat<(v16i32 (bitconvert (v32i16 VR512:$src))), (v16i32 VR512:$src)>;
|
||||
def : Pat<(v16i32 (bitconvert (v64i8 VR512:$src))), (v16i32 VR512:$src)>;
|
||||
def : Pat<(v16i32 (bitconvert (v8f64 VR512:$src))), (v16i32 VR512:$src)>;
|
||||
def : Pat<(v8f64 (bitconvert (v8i64 VR512:$src))), (v8f64 VR512:$src)>;
|
||||
def : Pat<(v32i16 (bitconvert (v8i64 VR512:$src))), (v32i16 VR512:$src)>;
|
||||
def : Pat<(v32i16 (bitconvert (v16i32 VR512:$src))), (v32i16 VR512:$src)>;
|
||||
def : Pat<(v32i16 (bitconvert (v64i8 VR512:$src))), (v32i16 VR512:$src)>;
|
||||
def : Pat<(v32i16 (bitconvert (v8f64 VR512:$src))), (v32i16 VR512:$src)>;
|
||||
def : Pat<(v32i16 (bitconvert (v16f32 VR512:$src))), (v32i16 VR512:$src)>;
|
||||
def : Pat<(v32i16 (bitconvert (v16f32 VR512:$src))), (v32i16 VR512:$src)>;
|
||||
def : Pat<(v64i8 (bitconvert (v8i64 VR512:$src))), (v64i8 VR512:$src)>;
|
||||
def : Pat<(v64i8 (bitconvert (v16i32 VR512:$src))), (v64i8 VR512:$src)>;
|
||||
def : Pat<(v64i8 (bitconvert (v32i16 VR512:$src))), (v64i8 VR512:$src)>;
|
||||
def : Pat<(v64i8 (bitconvert (v8f64 VR512:$src))), (v64i8 VR512:$src)>;
|
||||
def : Pat<(v64i8 (bitconvert (v16f32 VR512:$src))), (v64i8 VR512:$src)>;
|
||||
|
||||
def : Pat<(v2i64 (bitconvert (v4i32 VR128X:$src))), (v2i64 VR128X:$src)>;
|
||||
def : Pat<(v2i64 (bitconvert (v8i16 VR128X:$src))), (v2i64 VR128X:$src)>;
|
||||
|
@ -135,7 +152,6 @@ def VINSERTI32x4rm : AVX512AIi8<0x38, MRMSrcMem, (outs VR512:$dst),
|
|||
(ins VR512:$src1, i128mem:$src2, i8imm:$src3),
|
||||
"vinserti32x4\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}",
|
||||
[]>, EVEX_4V, EVEX_V512, EVEX_CD8<32, CD8VT4>;
|
||||
|
||||
}
|
||||
|
||||
let hasSideEffects = 0 in {
|
||||
|
|
|
@ -184,6 +184,8 @@ class EVEX_KZ : EVEX_K { bit hasEVEX_Z = 1; }
|
|||
class EVEX_B { bit hasEVEX_B = 1; }
|
||||
class EVEX_RC { bit hasEVEX_RC = 1; }
|
||||
class EVEX_V512 { bit hasEVEX_L2 = 1; bit hasVEX_L = 0; }
|
||||
class EVEX_V256 { bit hasEVEX_L2 = 0; bit hasVEX_L = 1; }
|
||||
class EVEX_V128 { bit hasEVEX_L2 = 0; bit hasVEX_L = 0; }
|
||||
|
||||
// Specify AVX512 8-bit compressed displacement encoding based on the vector
|
||||
// element size in bits (8, 16, 32, 64) and the CDisp8 form.
|
||||
|
|
|
@ -719,10 +719,14 @@ def HasAVX512 : Predicate<"Subtarget->hasAVX512()">,
|
|||
AssemblerPredicate<"FeatureAVX512", "AVX-512 ISA">;
|
||||
def UseAVX : Predicate<"Subtarget->hasAVX() && !Subtarget->hasAVX512()">;
|
||||
def UseAVX2 : Predicate<"Subtarget->hasAVX2() && !Subtarget->hasAVX512()">;
|
||||
def NoAVX512 : Predicate<"!Subtarget->hasAVX512()">;
|
||||
def NoAVX512 : Predicate<"!Subtarget->hasAVX512()">;
|
||||
def HasCDI : Predicate<"Subtarget->hasCDI()">;
|
||||
def HasPFI : Predicate<"Subtarget->hasPFI()">;
|
||||
def HasERI : Predicate<"Subtarget->hasERI()">;
|
||||
def HasDQI : Predicate<"Subtarget->hasDQI()">;
|
||||
def HasBWI : Predicate<"Subtarget->hasBWI()">;
|
||||
def HasVLX : Predicate<"Subtarget->hasVLX()">,
|
||||
AssemblerPredicate<"FeatureVLX", "AVX-512 VLX ISA">;
|
||||
|
||||
def HasPOPCNT : Predicate<"Subtarget->hasPOPCNT()">;
|
||||
def HasAES : Predicate<"Subtarget->hasAES()">;
|
||||
|
|
|
@ -449,7 +449,7 @@ def FPCCR : RegisterClass<"X86", [i16], 16, (add FPSW)> {
|
|||
}
|
||||
|
||||
// AVX-512 vector/mask registers.
|
||||
def VR512 : RegisterClass<"X86", [v16f32, v8f64, v16i32, v8i64], 512,
|
||||
def VR512 : RegisterClass<"X86", [v16f32, v8f64, v64i8, v32i16, v16i32, v8i64], 512,
|
||||
(sequence "ZMM%u", 0, 31)>;
|
||||
|
||||
// Scalar AVX-512 floating point registers.
|
||||
|
@ -463,13 +463,19 @@ def VR128X : RegisterClass<"X86", [v16i8, v8i16, v4i32, v2i64, v4f32, v2f64],
|
|||
def VR256X : RegisterClass<"X86", [v32i8, v16i16, v8i32, v4i64, v8f32, v4f64],
|
||||
256, (sequence "YMM%u", 0, 31)>;
|
||||
|
||||
// The size of the all masked registers is 16 bit because we have only one
|
||||
// KMOVW istruction that can store this register in memory, and it writes 2 bytes
|
||||
def VK1 : RegisterClass<"X86", [i1], 16, (sequence "K%u", 0, 7)>;
|
||||
def VK8 : RegisterClass<"X86", [v8i1], 16, (add VK1)> {let Size = 16;}
|
||||
// Mask registers
|
||||
def VK1 : RegisterClass<"X86", [i1], 16, (sequence "K%u", 0, 7)> {let Size = 16;}
|
||||
def VK2 : RegisterClass<"X86", [v2i1], 16, (add VK1)> {let Size = 16;}
|
||||
def VK4 : RegisterClass<"X86", [v4i1], 16, (add VK2)> {let Size = 16;}
|
||||
def VK8 : RegisterClass<"X86", [v8i1], 16, (add VK4)> {let Size = 16;}
|
||||
def VK16 : RegisterClass<"X86", [v16i1], 16, (add VK8)> {let Size = 16;}
|
||||
def VK32 : RegisterClass<"X86", [v32i1], 32, (add VK16)> {let Size = 32;}
|
||||
def VK64 : RegisterClass<"X86", [v64i1], 64, (add VK32)> {let Size = 64;}
|
||||
|
||||
def VK1WM : RegisterClass<"X86", [i1], 16, (sub VK1, K0)> {let Size = 16;}
|
||||
def VK2WM : RegisterClass<"X86", [v2i1], 16, (sub VK2, K0)> {let Size = 16;}
|
||||
def VK4WM : RegisterClass<"X86", [v4i1], 16, (sub VK4, K0)> {let Size = 16;}
|
||||
def VK8WM : RegisterClass<"X86", [v8i1], 16, (sub VK8, K0)> {let Size = 16;}
|
||||
def VK16WM : RegisterClass<"X86", [v16i1], 16, (add VK8WM)>;
|
||||
|
||||
def VK16WM : RegisterClass<"X86", [v16i1], 16, (add VK8WM)> {let Size = 16;}
|
||||
def VK32WM : RegisterClass<"X86", [v32i1], 32, (add VK16WM)> {let Size = 32;}
|
||||
def VK64WM : RegisterClass<"X86", [v64i1], 64, (add VK32WM)> {let Size = 64;}
|
||||
|
|
|
@ -272,6 +272,9 @@ void X86Subtarget::initializeEnvironment() {
|
|||
HasERI = false;
|
||||
HasCDI = false;
|
||||
HasPFI = false;
|
||||
HasDQI = false;
|
||||
HasBWI = false;
|
||||
HasVLX = false;
|
||||
HasADX = false;
|
||||
HasSHA = false;
|
||||
HasPRFCHW = false;
|
||||
|
|
|
@ -189,13 +189,22 @@ protected:
|
|||
|
||||
/// Processor has AVX-512 PreFetch Instructions
|
||||
bool HasPFI;
|
||||
|
||||
|
||||
/// Processor has AVX-512 Exponential and Reciprocal Instructions
|
||||
bool HasERI;
|
||||
|
||||
|
||||
/// Processor has AVX-512 Conflict Detection Instructions
|
||||
bool HasCDI;
|
||||
|
||||
|
||||
/// Processor has AVX-512 Doubleword and Quadword instructions
|
||||
bool HasDQI;
|
||||
|
||||
/// Processor has AVX-512 Byte and Word instructions
|
||||
bool HasBWI;
|
||||
|
||||
/// Processor has AVX-512 Vector Length eXtenstions
|
||||
bool HasVLX;
|
||||
|
||||
/// stackAlignment - The minimum alignment known to hold of the stack frame on
|
||||
/// entry to the function and which must be maintained by every function.
|
||||
unsigned stackAlignment;
|
||||
|
@ -349,6 +358,9 @@ public:
|
|||
bool hasCDI() const { return HasCDI; }
|
||||
bool hasPFI() const { return HasPFI; }
|
||||
bool hasERI() const { return HasERI; }
|
||||
bool hasDQI() const { return HasDQI; }
|
||||
bool hasBWI() const { return HasBWI; }
|
||||
bool hasVLX() const { return HasVLX; }
|
||||
|
||||
bool isAtom() const { return X86ProcFamily == IntelAtom; }
|
||||
bool isSLM() const { return X86ProcFamily == IntelSLM; }
|
||||
|
|
|
@ -975,10 +975,18 @@ OperandType RecognizableInstr::typeFromString(const std::string &s,
|
|||
TYPE("VR512", TYPE_XMM512)
|
||||
TYPE("VK1", TYPE_VK1)
|
||||
TYPE("VK1WM", TYPE_VK1)
|
||||
TYPE("VK2", TYPE_VK2)
|
||||
TYPE("VK2WM", TYPE_VK2)
|
||||
TYPE("VK4", TYPE_VK4)
|
||||
TYPE("VK4WM", TYPE_VK4)
|
||||
TYPE("VK8", TYPE_VK8)
|
||||
TYPE("VK8WM", TYPE_VK8)
|
||||
TYPE("VK16", TYPE_VK16)
|
||||
TYPE("VK16WM", TYPE_VK16)
|
||||
TYPE("VK32", TYPE_VK32)
|
||||
TYPE("VK32WM", TYPE_VK32)
|
||||
TYPE("VK64", TYPE_VK64)
|
||||
TYPE("VK64WM", TYPE_VK64)
|
||||
TYPE("GR16_NOAX", TYPE_Rv)
|
||||
TYPE("GR32_NOAX", TYPE_Rv)
|
||||
TYPE("GR64_NOAX", TYPE_R64)
|
||||
|
@ -1101,6 +1109,8 @@ RecognizableInstr::vvvvRegisterEncodingFromString(const std::string &s,
|
|||
ENCODING("VR256X", ENCODING_VVVV)
|
||||
ENCODING("VR512", ENCODING_VVVV)
|
||||
ENCODING("VK1", ENCODING_VVVV)
|
||||
ENCODING("VK2", ENCODING_VVVV)
|
||||
ENCODING("VK4", ENCODING_VVVV)
|
||||
ENCODING("VK8", ENCODING_VVVV)
|
||||
ENCODING("VK16", ENCODING_VVVV)
|
||||
errs() << "Unhandled VEX.vvvv register encoding " << s << "\n";
|
||||
|
@ -1111,8 +1121,12 @@ OperandEncoding
|
|||
RecognizableInstr::writemaskRegisterEncodingFromString(const std::string &s,
|
||||
uint8_t OpSize) {
|
||||
ENCODING("VK1WM", ENCODING_WRITEMASK)
|
||||
ENCODING("VK2WM", ENCODING_WRITEMASK)
|
||||
ENCODING("VK4WM", ENCODING_WRITEMASK)
|
||||
ENCODING("VK8WM", ENCODING_WRITEMASK)
|
||||
ENCODING("VK16WM", ENCODING_WRITEMASK)
|
||||
ENCODING("VK32WM", ENCODING_WRITEMASK)
|
||||
ENCODING("VK64WM", ENCODING_WRITEMASK)
|
||||
errs() << "Unhandled mask register encoding " << s << "\n";
|
||||
llvm_unreachable("Unhandled mask register encoding");
|
||||
}
|
||||
|
|
Loading…
Reference in New Issue