[compiler-rt][X86] Match the detection of cpu's for __cpu_model to the latest version of gcc

Summary:
We were missing many feature flags that newer gcc supports and we had our own set of feature flags that gcc didnt' support that were overlapping. Clang's implementation assumes gcc's features list so a mismatch here is problematic.

I've also matched the cpu type/subtype lists with gcc and removed all the cpus that gcc doesn't support. I've also removed the fallback autodetection logic that was taken from Host.cpp. It was the main reason we had extra feature flags relative to gcc. I don't think gcc does this in libgcc.

Once this support is in place we can consider implementing __builtin_cpu_is in clang. This could also be needed for function dispatching that Erich Keane is working on.

Reviewers: echristo, asbirlea, RKSimon, erichkeane, zvi

Reviewed By: asbirlea

Subscribers: dberris, llvm-commits

Differential Revision: https://reviews.llvm.org/D35214

llvm-svn: 307878
This commit is contained in:
Craig Topper 2017-07-13 02:56:24 +00:00
parent 6748abe24d
commit e6b07073ca
1 changed files with 108 additions and 308 deletions

View File

@ -44,29 +44,15 @@ enum ProcessorVendors {
};
enum ProcessorTypes {
INTEL_ATOM = 1,
INTEL_BONNELL = 1,
INTEL_CORE2,
INTEL_COREI7,
AMDFAM10H,
AMDFAM15H,
INTEL_i386,
INTEL_i486,
INTEL_PENTIUM,
INTEL_PENTIUM_PRO,
INTEL_PENTIUM_II,
INTEL_PENTIUM_III,
INTEL_PENTIUM_IV,
INTEL_PENTIUM_M,
INTEL_CORE_DUO,
INTEL_XEONPHI,
INTEL_X86_64,
INTEL_NOCONA,
INTEL_PRESCOTT,
AMD_i486,
AMDPENTIUM,
AMDATHLON,
AMDFAM14H,
AMDFAM16H,
INTEL_SILVERMONT,
INTEL_KNL,
AMD_BTVER1,
AMD_BTVER2,
AMDFAM17H,
CPU_TYPE_MAX
};
@ -80,33 +66,14 @@ enum ProcessorSubtypes {
AMDFAM10H_ISTANBUL,
AMDFAM15H_BDVER1,
AMDFAM15H_BDVER2,
INTEL_PENTIUM_MMX,
INTEL_CORE2_65,
INTEL_CORE2_45,
AMDFAM15H_BDVER3,
AMDFAM15H_BDVER4,
AMDFAM17H_ZNVER1,
INTEL_COREI7_IVYBRIDGE,
INTEL_COREI7_HASWELL,
INTEL_COREI7_BROADWELL,
INTEL_COREI7_SKYLAKE,
INTEL_COREI7_SKYLAKE_AVX512,
INTEL_ATOM_BONNELL,
INTEL_ATOM_SILVERMONT,
INTEL_KNIGHTS_LANDING,
AMDPENTIUM_K6,
AMDPENTIUM_K62,
AMDPENTIUM_K63,
AMDPENTIUM_GEODE,
AMDATHLON_TBIRD,
AMDATHLON_MP,
AMDATHLON_XP,
AMDATHLON_K8SSE3,
AMDATHLON_OPTERON,
AMDATHLON_FX,
AMDATHLON_64,
AMD_BTVER1,
AMD_BTVER2,
AMDFAM15H_BDVER3,
AMDFAM15H_BDVER4,
AMDFAM17H_ZNVER1,
CPU_SUBTYPE_MAX
};
@ -122,11 +89,26 @@ enum ProcessorFeatures {
FEATURE_SSE4_2,
FEATURE_AVX,
FEATURE_AVX2,
FEATURE_AVX512,
FEATURE_AVX512SAVE,
FEATURE_MOVBE,
FEATURE_ADX,
FEATURE_EM64T
FEATURE_SSE4_A,
FEATURE_FMA4,
FEATURE_XOP,
FEATURE_FMA,
FEATURE_AVX512F,
FEATURE_BMI,
FEATURE_BMI2,
FEATURE_AES,
FEATURE_PCLMUL,
FEATURE_AVX512VL,
FEATURE_AVX512BW,
FEATURE_AVX512DQ,
FEATURE_AVX512CD,
FEATURE_AVX512ER,
FEATURE_AVX512PF,
FEATURE_AVX512VBMI,
FEATURE_AVX512IFMA,
FEATURE_AVX5124VNNIW,
FEATURE_AVX5124FMAPS,
FEATURE_AVX512VPOPCNTDQ
};
// The check below for i386 was copied from clang's cpuid.h (__get_cpuid_max).
@ -298,78 +280,8 @@ getIntelProcessorTypeAndSubtype(unsigned Family, unsigned Model,
if (Brand_id != 0)
return;
switch (Family) {
case 3:
*Type = INTEL_i386;
break;
case 4:
switch (Model) {
case 0: // Intel486 DX processors
case 1: // Intel486 DX processors
case 2: // Intel486 SX processors
case 3: // Intel487 processors, IntelDX2 OverDrive processors,
// IntelDX2 processors
case 4: // Intel486 SL processor
case 5: // IntelSX2 processors
case 7: // Write-Back Enhanced IntelDX2 processors
case 8: // IntelDX4 OverDrive processors, IntelDX4 processors
default:
*Type = INTEL_i486;
break;
}
break;
case 5:
switch (Model) {
case 1: // Pentium OverDrive processor for Pentium processor (60, 66),
// Pentium processors (60, 66)
case 2: // Pentium OverDrive processor for Pentium processor (75, 90,
// 100, 120, 133), Pentium processors (75, 90, 100, 120, 133,
// 150, 166, 200)
case 3: // Pentium OverDrive processors for Intel486 processor-based
// systems
*Type = INTEL_PENTIUM;
break;
case 4: // Pentium OverDrive processor with MMX technology for Pentium
// processor (75, 90, 100, 120, 133), Pentium processor with
// MMX technology (166, 200)
*Type = INTEL_PENTIUM;
*Subtype = INTEL_PENTIUM_MMX;
break;
default:
*Type = INTEL_PENTIUM;
break;
}
break;
case 6:
switch (Model) {
case 0x01: // Pentium Pro processor
*Type = INTEL_PENTIUM_PRO;
break;
case 0x03: // Intel Pentium II OverDrive processor, Pentium II processor,
// model 03
case 0x05: // Pentium II processor, model 05, Pentium II Xeon processor,
// model 05, and Intel Celeron processor, model 05
case 0x06: // Celeron processor, model 06
*Type = INTEL_PENTIUM_II;
break;
case 0x07: // Pentium III processor, model 07, and Pentium III Xeon
// processor, model 07
case 0x08: // Pentium III processor, model 08, Pentium III Xeon processor,
// model 08, and Celeron processor, model 08
case 0x0a: // Pentium III Xeon processor, model 0Ah
case 0x0b: // Pentium III processor, model 0Bh
*Type = INTEL_PENTIUM_III;
break;
case 0x09: // Intel Pentium M processor, Intel Celeron M processor model 09.
case 0x0d: // Intel Pentium M processor, Intel Celeron M processor, model
// 0Dh. All processors are manufactured using the 90 nm process.
case 0x15: // Intel EP80579 Integrated Processor and Intel EP80579
// Integrated Processor with Intel QuickAssist Technology
*Type = INTEL_PENTIUM_M;
break;
case 0x0e: // Intel Core Duo processor, Intel Core Solo processor, model
// 0Eh. All processors are manufactured using the 65 nm process.
*Type = INTEL_CORE_DUO;
break; // yonah
case 0x0f: // Intel Core 2 Duo processor, Intel Core 2 Duo mobile
// processor, Intel Core 2 Quad processor, Intel Core 2 Quad
// mobile processor, Intel Core 2 Extreme processor, Intel
@ -377,9 +289,6 @@ getIntelProcessorTypeAndSubtype(unsigned Family, unsigned Model,
// 0Fh. All processors are manufactured using the 65 nm process.
case 0x16: // Intel Celeron processor model 16h. All processors are
// manufactured using the 65 nm process
*Type = INTEL_CORE2; // "core2"
*Subtype = INTEL_CORE2_65;
break;
case 0x17: // Intel Core 2 Extreme processor, Intel Xeon processor, model
// 17h. All processors are manufactured using the 45 nm process.
//
@ -387,7 +296,6 @@ getIntelProcessorTypeAndSubtype(unsigned Family, unsigned Model,
case 0x1d: // Intel Xeon processor MP. All processors are manufactured using
// the 45 nm process.
*Type = INTEL_CORE2; // "penryn"
*Subtype = INTEL_CORE2_45;
break;
case 0x1a: // Intel Core i7 processor and Intel Xeon processor. All
// processors are manufactured using the 45 nm process.
@ -455,8 +363,7 @@ getIntelProcessorTypeAndSubtype(unsigned Family, unsigned Model,
case 0x27: // 32 nm Atom Medfield
case 0x35: // 32 nm Atom Midview
case 0x36: // 32 nm Atom Midview
*Type = INTEL_ATOM;
*Subtype = INTEL_ATOM_BONNELL;
*Type = INTEL_BONNELL;
break; // "bonnell"
// Atom Silvermont codes from the Intel software optimization guide.
@ -466,120 +373,19 @@ getIntelProcessorTypeAndSubtype(unsigned Family, unsigned Model,
case 0x5a:
case 0x5d:
case 0x4c: // really airmont
*Type = INTEL_ATOM;
*Subtype = INTEL_ATOM_SILVERMONT;
*Type = INTEL_SILVERMONT;
break; // "silvermont"
case 0x57:
*Type = INTEL_XEONPHI; // knl
*Subtype = INTEL_KNIGHTS_LANDING;
*Type = INTEL_KNL; // knl
break;
default: // Unknown family 6 CPU, try to guess.
if (Features & (1 << FEATURE_AVX512)) {
*Type = INTEL_XEONPHI; // knl
*Subtype = INTEL_KNIGHTS_LANDING;
break;
}
if (Features & (1 << FEATURE_ADX)) {
*Type = INTEL_COREI7;
*Subtype = INTEL_COREI7_BROADWELL;
break;
}
if (Features & (1 << FEATURE_AVX2)) {
*Type = INTEL_COREI7;
*Subtype = INTEL_COREI7_HASWELL;
break;
}
if (Features & (1 << FEATURE_AVX)) {
*Type = INTEL_COREI7;
*Subtype = INTEL_COREI7_SANDYBRIDGE;
break;
}
if (Features & (1 << FEATURE_SSE4_2)) {
if (Features & (1 << FEATURE_MOVBE)) {
*Type = INTEL_ATOM;
*Subtype = INTEL_ATOM_SILVERMONT;
} else {
*Type = INTEL_COREI7;
*Subtype = INTEL_COREI7_NEHALEM;
}
break;
}
if (Features & (1 << FEATURE_SSE4_1)) {
*Type = INTEL_CORE2; // "penryn"
*Subtype = INTEL_CORE2_45;
break;
}
if (Features & (1 << FEATURE_SSSE3)) {
if (Features & (1 << FEATURE_MOVBE)) {
*Type = INTEL_ATOM;
*Subtype = INTEL_ATOM_BONNELL; // "bonnell"
} else {
*Type = INTEL_CORE2; // "core2"
*Subtype = INTEL_CORE2_65;
}
break;
}
if (Features & (1 << FEATURE_EM64T)) {
*Type = INTEL_X86_64;
break; // x86-64
}
if (Features & (1 << FEATURE_SSE2)) {
*Type = INTEL_PENTIUM_M;
break;
}
if (Features & (1 << FEATURE_SSE)) {
*Type = INTEL_PENTIUM_III;
break;
}
if (Features & (1 << FEATURE_MMX)) {
*Type = INTEL_PENTIUM_II;
break;
}
*Type = INTEL_PENTIUM_PRO;
default: // Unknown family 6 CPU.
break;
}
break;
case 15: {
switch (Model) {
case 0: // Pentium 4 processor, Intel Xeon processor. All processors are
// model 00h and manufactured using the 0.18 micron process.
case 1: // Pentium 4 processor, Intel Xeon processor, Intel Xeon
// processor MP, and Intel Celeron processor. All processors are
// model 01h and manufactured using the 0.18 micron process.
case 2: // Pentium 4 processor, Mobile Intel Pentium 4 processor - M,
// Intel Xeon processor, Intel Xeon processor MP, Intel Celeron
// processor, and Mobile Intel Celeron processor. All processors
// are model 02h and manufactured using the 0.13 micron process.
*Type =
((Features & (1 << FEATURE_EM64T)) ? INTEL_X86_64 : INTEL_PENTIUM_IV);
break;
case 3: // Pentium 4 processor, Intel Xeon processor, Intel Celeron D
// processor. All processors are model 03h and manufactured using
// the 90 nm process.
case 4: // Pentium 4 processor, Pentium 4 processor Extreme Edition,
// Pentium D processor, Intel Xeon processor, Intel Xeon
// processor MP, Intel Celeron D processor. All processors are
// model 04h and manufactured using the 90 nm process.
case 6: // Pentium 4 processor, Pentium D processor, Pentium processor
// Extreme Edition, Intel Xeon processor, Intel Xeon processor
// MP, Intel Celeron D processor. All processors are model 06h
// and manufactured using the 65 nm process.
*Type =
((Features & (1 << FEATURE_EM64T)) ? INTEL_NOCONA : INTEL_PRESCOTT);
break;
default:
*Type =
((Features & (1 << FEATURE_EM64T)) ? INTEL_X86_64 : INTEL_PENTIUM_IV);
break;
}
break;
}
default:
break; /*"generic"*/
break; // Unknown.
}
}
@ -590,62 +396,6 @@ static void getAMDProcessorTypeAndSubtype(unsigned Family, unsigned Model,
// appears to be no way to generate the wide variety of AMD-specific targets
// from the information returned from CPUID.
switch (Family) {
case 4:
*Type = AMD_i486;
break;
case 5:
*Type = AMDPENTIUM;
switch (Model) {
case 6:
case 7:
*Subtype = AMDPENTIUM_K6;
break; // "k6"
case 8:
*Subtype = AMDPENTIUM_K62;
break; // "k6-2"
case 9:
case 13:
*Subtype = AMDPENTIUM_K63;
break; // "k6-3"
case 10:
*Subtype = AMDPENTIUM_GEODE;
break; // "geode"
}
break;
case 6:
*Type = AMDATHLON;
switch (Model) {
case 4:
*Subtype = AMDATHLON_TBIRD;
break; // "athlon-tbird"
case 6:
case 7:
case 8:
*Subtype = AMDATHLON_MP;
break; // "athlon-mp"
case 10:
*Subtype = AMDATHLON_XP;
break; // "athlon-xp"
}
break;
case 15:
*Type = AMDATHLON;
if (Features & (1 << FEATURE_SSE3)) {
*Subtype = AMDATHLON_K8SSE3;
break; // "k8-sse3"
}
switch (Model) {
case 1:
*Subtype = AMDATHLON_OPTERON;
break; // "opteron"
case 5:
*Subtype = AMDATHLON_FX;
break; // "athlon-fx"; also opteron
default:
*Subtype = AMDATHLON_64;
break; // "athlon64"
}
break;
case 16:
*Type = AMDFAM10H; // "amdfam10"
switch (Model) {
@ -661,14 +411,13 @@ static void getAMDProcessorTypeAndSubtype(unsigned Family, unsigned Model,
}
break;
case 20:
*Type = AMDFAM14H;
*Subtype = AMD_BTVER1;
*Type = AMD_BTVER1;
break; // "btver1";
case 21:
*Type = AMDFAM15H;
if (Model >= 0x60 && Model <= 0x7f) {
*Subtype = AMDFAM15H_BDVER4;
break; // "bdver4"; 50h-6Fh: Excavator
break; // "bdver4"; 60h-7Fh: Excavator
}
if (Model >= 0x30 && Model <= 0x3f) {
*Subtype = AMDFAM15H_BDVER3;
@ -695,18 +444,36 @@ static void getAMDProcessorTypeAndSubtype(unsigned Family, unsigned Model,
}
}
static unsigned getAvailableFeatures(unsigned ECX, unsigned EDX,
unsigned MaxLeaf) {
static void getAvailableFeatures(unsigned ECX, unsigned EDX, unsigned MaxLeaf,
unsigned *FeaturesOut) {
unsigned Features = 0;
unsigned EAX, EBX;
Features |= (((EDX >> 23) & 1) << FEATURE_MMX);
Features |= (((EDX >> 25) & 1) << FEATURE_SSE);
Features |= (((EDX >> 26) & 1) << FEATURE_SSE2);
Features |= (((ECX >> 0) & 1) << FEATURE_SSE3);
Features |= (((ECX >> 9) & 1) << FEATURE_SSSE3);
Features |= (((ECX >> 19) & 1) << FEATURE_SSE4_1);
Features |= (((ECX >> 20) & 1) << FEATURE_SSE4_2);
Features |= (((ECX >> 22) & 1) << FEATURE_MOVBE);
if ((EDX >> 15) & 1)
Features |= 1 << FEATURE_CMOV;
if ((EDX >> 23) & 1)
Features |= 1 << FEATURE_MMX;
if ((EDX >> 25) & 1)
Features |= 1 << FEATURE_SSE;
if ((EDX >> 26) & 1)
Features |= 1 << FEATURE_SSE2;
if ((ECX >> 0) & 1)
Features |= 1 << FEATURE_SSE3;
if ((ECX >> 1) & 1)
Features |= 1 << FEATURE_PCLMUL;
if ((ECX >> 9) & 1)
Features |= 1 << FEATURE_SSSE3;
if ((ECX >> 12) & 1)
Features |= 1 << FEATURE_FMA;
if ((ECX >> 19) & 1)
Features |= 1 << FEATURE_SSE4_1;
if ((ECX >> 20) & 1)
Features |= 1 << FEATURE_SSE4_2;
if ((ECX >> 23) & 1)
Features |= 1 << FEATURE_POPCNT;
if ((ECX >> 25) & 1)
Features |= 1 << FEATURE_AES;
// If CPUID indicates support for XSAVE, XRESTORE and AVX, and XGETBV
// indicates that the AVX registers will be saved and restored on context
@ -715,26 +482,59 @@ static unsigned getAvailableFeatures(unsigned ECX, unsigned EDX,
bool HasAVX = ((ECX & AVXBits) == AVXBits) && !getX86XCR0(&EAX, &EDX) &&
((EAX & 0x6) == 0x6);
bool HasAVX512Save = HasAVX && ((EAX & 0xe0) == 0xe0);
if (HasAVX)
Features |= 1 << FEATURE_AVX;
bool HasLeaf7 =
MaxLeaf >= 0x7 && !getX86CpuIDAndInfoEx(0x7, 0x0, &EAX, &EBX, &ECX, &EDX);
bool HasADX = HasLeaf7 && ((EBX >> 19) & 1);
bool HasAVX2 = HasAVX && HasLeaf7 && (EBX & 0x20);
bool HasAVX512 = HasLeaf7 && HasAVX512Save && ((EBX >> 16) & 1);
Features |= (HasAVX << FEATURE_AVX);
Features |= (HasAVX2 << FEATURE_AVX2);
Features |= (HasAVX512 << FEATURE_AVX512);
Features |= (HasAVX512Save << FEATURE_AVX512SAVE);
Features |= (HasADX << FEATURE_ADX);
if (HasLeaf7 && ((EBX >> 3) & 1))
Features |= 1 << FEATURE_BMI;
if (HasLeaf7 && ((EBX >> 5) & 1) && HasAVX)
Features |= 1 << FEATURE_AVX2;
if (HasLeaf7 && ((EBX >> 9) & 1))
Features |= 1 << FEATURE_BMI2;
if (HasLeaf7 && ((EBX >> 16) & 1) && HasAVX512Save)
Features |= 1 << FEATURE_AVX512F;
if (HasLeaf7 && ((EBX >> 17) & 1) && HasAVX512Save)
Features |= 1 << FEATURE_AVX512DQ;
if (HasLeaf7 && ((EBX >> 21) & 1) && HasAVX512Save)
Features |= 1 << FEATURE_AVX512IFMA;
if (HasLeaf7 && ((EBX >> 26) & 1) && HasAVX512Save)
Features |= 1 << FEATURE_AVX512PF;
if (HasLeaf7 && ((EBX >> 27) & 1) && HasAVX512Save)
Features |= 1 << FEATURE_AVX512ER;
if (HasLeaf7 && ((EBX >> 28) & 1) && HasAVX512Save)
Features |= 1 << FEATURE_AVX512CD;
if (HasLeaf7 && ((EBX >> 30) & 1) && HasAVX512Save)
Features |= 1 << FEATURE_AVX512BW;
if (HasLeaf7 && ((EBX >> 31) & 1) && HasAVX512Save)
Features |= 1 << FEATURE_AVX512VL;
if (HasLeaf7 && ((ECX >> 1) & 1) && HasAVX512Save)
Features |= 1 << FEATURE_AVX512VBMI;
if (HasLeaf7 && ((ECX >> 14) & 1) && HasAVX512Save)
Features |= 1 << FEATURE_AVX512VPOPCNTDQ;
if (HasLeaf7 && ((EDX >> 2) & 1) && HasAVX512Save)
Features |= 1 << FEATURE_AVX5124VNNIW;
if (HasLeaf7 && ((EDX >> 3) & 1) && HasAVX512Save)
Features |= 1 << FEATURE_AVX5124FMAPS;
unsigned MaxExtLevel;
getX86CpuIDAndInfo(0x80000000, &MaxExtLevel, &EBX, &ECX, &EDX);
bool HasExtLeaf1 = MaxExtLevel >= 0x80000001 &&
!getX86CpuIDAndInfo(0x80000001, &EAX, &EBX, &ECX, &EDX);
if (HasExtLeaf1)
Features |= (((EDX >> 29) & 0x1) << FEATURE_EM64T);
if (HasExtLeaf1 && ((ECX >> 6) & 1))
Features |= 1 << FEATURE_SSE4_A;
if (HasExtLeaf1 && ((ECX >> 11) & 1))
Features |= 1 << FEATURE_XOP;
if (HasExtLeaf1 && ((ECX >> 16) & 1))
Features |= 1 << FEATURE_FMA4;
return Features;
*FeaturesOut = Features;
}
#if defined(HAVE_INIT_PRIORITY)
@ -787,7 +587,7 @@ __cpu_indicator_init(void) {
Brand_id = EBX & 0xff;
/* Find available features. */
Features = getAvailableFeatures(ECX, EDX, MaxLeaf);
getAvailableFeatures(ECX, EDX, MaxLeaf, &Features);
__cpu_model.__cpu_features[0] = Features;
if (Vendor == SIG_INTEL) {