[SKX] Enabling SKX target and AVX512BW, AVX512DQ, AVX512VL features.

Enabling HasAVX512{DQ,BW,VL} predicates.
Adding VK2, VK4, VK32, VK64 masked register classes.
Adding new types (v64i8, v32i16) to VR512.
Extending calling conventions for new types (v64i8, v32i16)

Patch by Zinovy Nis <zinovy.y.nis@intel.com>
Reviewed by Elena Demikhovsky <elena.demikhovsky@intel.com>

llvm-svn: 213545
This commit is contained in:
Robert Khasanov 2014-07-21 14:54:21 +00:00
parent 32411403b2
commit bfa0131365
11 changed files with 105 additions and 23 deletions

View File

@ -1666,6 +1666,8 @@ bool X86AsmParser::HandleAVX512Operand(OperandVector &Operands,
// Recognize only reasonable suffixes.
const char *BroadcastPrimitive =
StringSwitch<const char*>(getLexer().getTok().getIdentifier())
.Case("to2", "{1to2}")
.Case("to4", "{1to4}")
.Case("to8", "{1to8}")
.Case("to16", "{1to16}")
.Default(nullptr);

View File

@ -265,7 +265,7 @@ enum attributeBits {
ENUM_ENTRY(IC_EVEX_L2_W_KZ, 3, "requires EVEX_KZ, L2 and W") \
ENUM_ENTRY(IC_EVEX_L2_W_XS_KZ, 4, "requires EVEX_KZ, L2, W and XS prefix") \
ENUM_ENTRY(IC_EVEX_L2_W_XD_KZ, 4, "requires EVEX_KZ, L2, W and XD prefix") \
ENUM_ENTRY(IC_EVEX_L2_W_OPSIZE_KZ, 4, "requires EVEX_KZ, L2, W and OpSize")
ENUM_ENTRY(IC_EVEX_L2_W_OPSIZE_KZ, 4, "requires EVEX_KZ, L2, W and OpSize")
#define ENUM_ENTRY(n, r, d) n,
enum InstructionContext {
@ -453,8 +453,12 @@ enum OperandEncoding {
ENUM_ENTRY(TYPE_XMM256, "32-byte") \
ENUM_ENTRY(TYPE_XMM512, "64-byte") \
ENUM_ENTRY(TYPE_VK1, "1-bit") \
ENUM_ENTRY(TYPE_VK2, "2-bit") \
ENUM_ENTRY(TYPE_VK4, "4-bit") \
ENUM_ENTRY(TYPE_VK8, "8-bit") \
ENUM_ENTRY(TYPE_VK16, "16-bit") \
ENUM_ENTRY(TYPE_VK32, "32-bit") \
ENUM_ENTRY(TYPE_VK64, "64-bit") \
ENUM_ENTRY(TYPE_XMM0, "Implicit use of XMM0") \
ENUM_ENTRY(TYPE_SEGMENTREG, "Segment register operand") \
ENUM_ENTRY(TYPE_DEBUGREG, "Debug register operand") \

View File

@ -104,7 +104,15 @@ def FeatureCDI : SubtargetFeature<"avx512cd", "HasCDI", "true",
def FeaturePFI : SubtargetFeature<"avx512pf", "HasPFI", "true",
"Enable AVX-512 PreFetch Instructions",
[FeatureAVX512]>;
def FeatureDQI : SubtargetFeature<"avx512dq", "HasDQI", "true",
"Enable AVX-512 Doubleword and Quadword Instructions",
[FeatureAVX512]>;
def FeatureBWI : SubtargetFeature<"avx512bw", "HasBWI", "true",
"Enable AVX-512 Byte and Word Instructions",
[FeatureAVX512]>;
def FeatureVLX : SubtargetFeature<"avx512vl", "HasVLX", "true",
"Enable AVX-512 Vector Length eXtensions",
[FeatureAVX512]>;
def FeaturePCLMUL : SubtargetFeature<"pclmul", "HasPCLMUL", "true",
"Enable packed carry-less multiplication instructions",
[FeatureSSE2]>;
@ -276,6 +284,17 @@ def : ProcessorModel<"knl", HaswellModel,
FeatureBMI2, FeatureFMA, FeatureRTM, FeatureHLE,
FeatureSlowIncDec]>;
// SKX
// FIXME: define SKX model
def : ProcessorModel<"skx", HaswellModel,
[FeatureAVX512, FeatureCDI,
FeatureDQI, FeatureBWI, FeatureVLX,
FeatureCMPXCHG16B, FeatureFastUAMem, FeaturePOPCNT,
FeatureAES, FeaturePCLMUL, FeatureRDRAND, FeatureF16C,
FeatureFSGSBase, FeatureMOVBE, FeatureLZCNT, FeatureBMI,
FeatureBMI2, FeatureFMA, FeatureRTM, FeatureHLE,
FeatureSlowIncDec]>;
def : Proc<"k6", [FeatureMMX]>;
def : Proc<"k6-2", [Feature3DNow]>;
def : Proc<"k6-3", [Feature3DNow]>;

View File

@ -52,7 +52,7 @@ def RetCC_X86Common : CallingConv<[
// 512-bit vectors are returned in ZMM0 and ZMM1, when they fit. ZMM2 and ZMM3
// can only be used by ABI non-compliant code. This vector type is only
// supported while using the AVX-512 target feature.
CCIfType<[v16i32, v8i64, v16f32, v8f64],
CCIfType<[v64i8, v32i16, v16i32, v8i64, v16f32, v8f64],
CCAssignToReg<[ZMM0,ZMM1,ZMM2,ZMM3]>>,
// MMX vector types are always returned in MM0. If the target doesn't have
@ -252,7 +252,7 @@ def CC_X86_64_C : CallingConv<[
YMM4, YMM5, YMM6, YMM7]>>>>,
// The first 8 512-bit vector arguments are passed in ZMM registers.
CCIfNotVarArg<CCIfType<[v16i32, v8i64, v16f32, v8f64],
CCIfNotVarArg<CCIfType<[v64i8, v32i16, v16i32, v8i64, v16f32, v8f64],
CCIfSubtarget<"hasAVX512()",
CCAssignToReg<[ZMM0, ZMM1, ZMM2, ZMM3, ZMM4, ZMM5, ZMM6, ZMM7]>>>>,

View File

@ -1,19 +1,36 @@
// Bitcasts between 512-bit vector types. Return the original type since
// no instruction is needed for the conversion
let Predicates = [HasAVX512] in {
def : Pat<(v8f64 (bitconvert (v16f32 VR512:$src))), (v8f64 VR512:$src)>;
def : Pat<(v8f64 (bitconvert (v8i64 VR512:$src))), (v8f64 VR512:$src)>;
def : Pat<(v8f64 (bitconvert (v16i32 VR512:$src))), (v8f64 VR512:$src)>;
def : Pat<(v8f64 (bitconvert (v8i64 VR512:$src))), (v8f64 VR512:$src)>;
def : Pat<(v16f32 (bitconvert (v16i32 VR512:$src))), (v16f32 VR512:$src)>;
def : Pat<(v8f64 (bitconvert (v32i16 VR512:$src))), (v8f64 VR512:$src)>;
def : Pat<(v8f64 (bitconvert (v64i8 VR512:$src))), (v8f64 VR512:$src)>;
def : Pat<(v8f64 (bitconvert (v16f32 VR512:$src))), (v8f64 VR512:$src)>;
def : Pat<(v16f32 (bitconvert (v8i64 VR512:$src))), (v16f32 VR512:$src)>;
def : Pat<(v16f32 (bitconvert (v16i32 VR512:$src))), (v16f32 VR512:$src)>;
def : Pat<(v16f32 (bitconvert (v32i16 VR512:$src))), (v16f32 VR512:$src)>;
def : Pat<(v16f32 (bitconvert (v64i8 VR512:$src))), (v16f32 VR512:$src)>;
def : Pat<(v16f32 (bitconvert (v8f64 VR512:$src))), (v16f32 VR512:$src)>;
def : Pat<(v8i64 (bitconvert (v16f32 VR512:$src))), (v8i64 VR512:$src)>;
def : Pat<(v8i64 (bitconvert (v16i32 VR512:$src))), (v8i64 VR512:$src)>;
def : Pat<(v8i64 (bitconvert (v32i16 VR512:$src))), (v8i64 VR512:$src)>;
def : Pat<(v8i64 (bitconvert (v64i8 VR512:$src))), (v8i64 VR512:$src)>;
def : Pat<(v8i64 (bitconvert (v8f64 VR512:$src))), (v8i64 VR512:$src)>;
def : Pat<(v16i32 (bitconvert (v16f32 VR512:$src))), (v16i32 VR512:$src)>;
def : Pat<(v16i32 (bitconvert (v8i64 VR512:$src))), (v16i32 VR512:$src)>;
def : Pat<(v8i64 (bitconvert (v16f32 VR512:$src))), (v8i64 VR512:$src)>;
def : Pat<(v16i32 (bitconvert (v8i64 VR512:$src))), (v16i32 VR512:$src)>;
def : Pat<(v16i32 (bitconvert (v32i16 VR512:$src))), (v16i32 VR512:$src)>;
def : Pat<(v16i32 (bitconvert (v64i8 VR512:$src))), (v16i32 VR512:$src)>;
def : Pat<(v16i32 (bitconvert (v8f64 VR512:$src))), (v16i32 VR512:$src)>;
def : Pat<(v8f64 (bitconvert (v8i64 VR512:$src))), (v8f64 VR512:$src)>;
def : Pat<(v32i16 (bitconvert (v8i64 VR512:$src))), (v32i16 VR512:$src)>;
def : Pat<(v32i16 (bitconvert (v16i32 VR512:$src))), (v32i16 VR512:$src)>;
def : Pat<(v32i16 (bitconvert (v64i8 VR512:$src))), (v32i16 VR512:$src)>;
def : Pat<(v32i16 (bitconvert (v8f64 VR512:$src))), (v32i16 VR512:$src)>;
def : Pat<(v32i16 (bitconvert (v16f32 VR512:$src))), (v32i16 VR512:$src)>;
def : Pat<(v32i16 (bitconvert (v16f32 VR512:$src))), (v32i16 VR512:$src)>;
def : Pat<(v64i8 (bitconvert (v8i64 VR512:$src))), (v64i8 VR512:$src)>;
def : Pat<(v64i8 (bitconvert (v16i32 VR512:$src))), (v64i8 VR512:$src)>;
def : Pat<(v64i8 (bitconvert (v32i16 VR512:$src))), (v64i8 VR512:$src)>;
def : Pat<(v64i8 (bitconvert (v8f64 VR512:$src))), (v64i8 VR512:$src)>;
def : Pat<(v64i8 (bitconvert (v16f32 VR512:$src))), (v64i8 VR512:$src)>;
def : Pat<(v2i64 (bitconvert (v4i32 VR128X:$src))), (v2i64 VR128X:$src)>;
def : Pat<(v2i64 (bitconvert (v8i16 VR128X:$src))), (v2i64 VR128X:$src)>;
@ -135,7 +152,6 @@ def VINSERTI32x4rm : AVX512AIi8<0x38, MRMSrcMem, (outs VR512:$dst),
(ins VR512:$src1, i128mem:$src2, i8imm:$src3),
"vinserti32x4\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}",
[]>, EVEX_4V, EVEX_V512, EVEX_CD8<32, CD8VT4>;
}
let hasSideEffects = 0 in {

View File

@ -184,6 +184,8 @@ class EVEX_KZ : EVEX_K { bit hasEVEX_Z = 1; }
class EVEX_B { bit hasEVEX_B = 1; }
class EVEX_RC { bit hasEVEX_RC = 1; }
class EVEX_V512 { bit hasEVEX_L2 = 1; bit hasVEX_L = 0; }
class EVEX_V256 { bit hasEVEX_L2 = 0; bit hasVEX_L = 1; }
class EVEX_V128 { bit hasEVEX_L2 = 0; bit hasVEX_L = 0; }
// Specify AVX512 8-bit compressed displacement encoding based on the vector
// element size in bits (8, 16, 32, 64) and the CDisp8 form.

View File

@ -719,10 +719,14 @@ def HasAVX512 : Predicate<"Subtarget->hasAVX512()">,
AssemblerPredicate<"FeatureAVX512", "AVX-512 ISA">;
def UseAVX : Predicate<"Subtarget->hasAVX() && !Subtarget->hasAVX512()">;
def UseAVX2 : Predicate<"Subtarget->hasAVX2() && !Subtarget->hasAVX512()">;
def NoAVX512 : Predicate<"!Subtarget->hasAVX512()">;
def NoAVX512 : Predicate<"!Subtarget->hasAVX512()">;
def HasCDI : Predicate<"Subtarget->hasCDI()">;
def HasPFI : Predicate<"Subtarget->hasPFI()">;
def HasERI : Predicate<"Subtarget->hasERI()">;
def HasDQI : Predicate<"Subtarget->hasDQI()">;
def HasBWI : Predicate<"Subtarget->hasBWI()">;
def HasVLX : Predicate<"Subtarget->hasVLX()">,
AssemblerPredicate<"FeatureVLX", "AVX-512 VLX ISA">;
def HasPOPCNT : Predicate<"Subtarget->hasPOPCNT()">;
def HasAES : Predicate<"Subtarget->hasAES()">;

View File

@ -449,7 +449,7 @@ def FPCCR : RegisterClass<"X86", [i16], 16, (add FPSW)> {
}
// AVX-512 vector/mask registers.
def VR512 : RegisterClass<"X86", [v16f32, v8f64, v16i32, v8i64], 512,
def VR512 : RegisterClass<"X86", [v16f32, v8f64, v64i8, v32i16, v16i32, v8i64], 512,
(sequence "ZMM%u", 0, 31)>;
// Scalar AVX-512 floating point registers.
@ -463,13 +463,19 @@ def VR128X : RegisterClass<"X86", [v16i8, v8i16, v4i32, v2i64, v4f32, v2f64],
def VR256X : RegisterClass<"X86", [v32i8, v16i16, v8i32, v4i64, v8f32, v4f64],
256, (sequence "YMM%u", 0, 31)>;
// The size of the all masked registers is 16 bit because we have only one
// KMOVW istruction that can store this register in memory, and it writes 2 bytes
def VK1 : RegisterClass<"X86", [i1], 16, (sequence "K%u", 0, 7)>;
def VK8 : RegisterClass<"X86", [v8i1], 16, (add VK1)> {let Size = 16;}
// Mask registers
def VK1 : RegisterClass<"X86", [i1], 16, (sequence "K%u", 0, 7)> {let Size = 16;}
def VK2 : RegisterClass<"X86", [v2i1], 16, (add VK1)> {let Size = 16;}
def VK4 : RegisterClass<"X86", [v4i1], 16, (add VK2)> {let Size = 16;}
def VK8 : RegisterClass<"X86", [v8i1], 16, (add VK4)> {let Size = 16;}
def VK16 : RegisterClass<"X86", [v16i1], 16, (add VK8)> {let Size = 16;}
def VK32 : RegisterClass<"X86", [v32i1], 32, (add VK16)> {let Size = 32;}
def VK64 : RegisterClass<"X86", [v64i1], 64, (add VK32)> {let Size = 64;}
def VK1WM : RegisterClass<"X86", [i1], 16, (sub VK1, K0)> {let Size = 16;}
def VK2WM : RegisterClass<"X86", [v2i1], 16, (sub VK2, K0)> {let Size = 16;}
def VK4WM : RegisterClass<"X86", [v4i1], 16, (sub VK4, K0)> {let Size = 16;}
def VK8WM : RegisterClass<"X86", [v8i1], 16, (sub VK8, K0)> {let Size = 16;}
def VK16WM : RegisterClass<"X86", [v16i1], 16, (add VK8WM)>;
def VK16WM : RegisterClass<"X86", [v16i1], 16, (add VK8WM)> {let Size = 16;}
def VK32WM : RegisterClass<"X86", [v32i1], 32, (add VK16WM)> {let Size = 32;}
def VK64WM : RegisterClass<"X86", [v64i1], 64, (add VK32WM)> {let Size = 64;}

View File

@ -272,6 +272,9 @@ void X86Subtarget::initializeEnvironment() {
HasERI = false;
HasCDI = false;
HasPFI = false;
HasDQI = false;
HasBWI = false;
HasVLX = false;
HasADX = false;
HasSHA = false;
HasPRFCHW = false;

View File

@ -189,13 +189,22 @@ protected:
/// Processor has AVX-512 PreFetch Instructions
bool HasPFI;
/// Processor has AVX-512 Exponential and Reciprocal Instructions
bool HasERI;
/// Processor has AVX-512 Conflict Detection Instructions
bool HasCDI;
/// Processor has AVX-512 Doubleword and Quadword instructions
bool HasDQI;
/// Processor has AVX-512 Byte and Word instructions
bool HasBWI;
/// Processor has AVX-512 Vector Length eXtenstions
bool HasVLX;
/// stackAlignment - The minimum alignment known to hold of the stack frame on
/// entry to the function and which must be maintained by every function.
unsigned stackAlignment;
@ -349,6 +358,9 @@ public:
bool hasCDI() const { return HasCDI; }
bool hasPFI() const { return HasPFI; }
bool hasERI() const { return HasERI; }
bool hasDQI() const { return HasDQI; }
bool hasBWI() const { return HasBWI; }
bool hasVLX() const { return HasVLX; }
bool isAtom() const { return X86ProcFamily == IntelAtom; }
bool isSLM() const { return X86ProcFamily == IntelSLM; }

View File

@ -975,10 +975,18 @@ OperandType RecognizableInstr::typeFromString(const std::string &s,
TYPE("VR512", TYPE_XMM512)
TYPE("VK1", TYPE_VK1)
TYPE("VK1WM", TYPE_VK1)
TYPE("VK2", TYPE_VK2)
TYPE("VK2WM", TYPE_VK2)
TYPE("VK4", TYPE_VK4)
TYPE("VK4WM", TYPE_VK4)
TYPE("VK8", TYPE_VK8)
TYPE("VK8WM", TYPE_VK8)
TYPE("VK16", TYPE_VK16)
TYPE("VK16WM", TYPE_VK16)
TYPE("VK32", TYPE_VK32)
TYPE("VK32WM", TYPE_VK32)
TYPE("VK64", TYPE_VK64)
TYPE("VK64WM", TYPE_VK64)
TYPE("GR16_NOAX", TYPE_Rv)
TYPE("GR32_NOAX", TYPE_Rv)
TYPE("GR64_NOAX", TYPE_R64)
@ -1101,6 +1109,8 @@ RecognizableInstr::vvvvRegisterEncodingFromString(const std::string &s,
ENCODING("VR256X", ENCODING_VVVV)
ENCODING("VR512", ENCODING_VVVV)
ENCODING("VK1", ENCODING_VVVV)
ENCODING("VK2", ENCODING_VVVV)
ENCODING("VK4", ENCODING_VVVV)
ENCODING("VK8", ENCODING_VVVV)
ENCODING("VK16", ENCODING_VVVV)
errs() << "Unhandled VEX.vvvv register encoding " << s << "\n";
@ -1111,8 +1121,12 @@ OperandEncoding
RecognizableInstr::writemaskRegisterEncodingFromString(const std::string &s,
uint8_t OpSize) {
ENCODING("VK1WM", ENCODING_WRITEMASK)
ENCODING("VK2WM", ENCODING_WRITEMASK)
ENCODING("VK4WM", ENCODING_WRITEMASK)
ENCODING("VK8WM", ENCODING_WRITEMASK)
ENCODING("VK16WM", ENCODING_WRITEMASK)
ENCODING("VK32WM", ENCODING_WRITEMASK)
ENCODING("VK64WM", ENCODING_WRITEMASK)
errs() << "Unhandled mask register encoding " << s << "\n";
llvm_unreachable("Unhandled mask register encoding");
}