From f5b34e535d63222500e8456309817cc6ef742b6b Mon Sep 17 00:00:00 2001 From: Andrea Di Biagio Date: Tue, 4 Nov 2014 21:18:09 +0000 Subject: [PATCH] [X86] Add 'FeatureSlowSHLD' to cpu 'bdver3'. Also explicit set FeatureAVX and FeatureSSE4A for all the bdver* cpus. This patch adds 'FeatureSlowSHLD' to 'bdver3'. According to the official AMD optimization guide for amdfam15: "Using alternative code in place of SHLD achieves lower overall latency and requires fewer execution resources. The 32-bit and 64-bit forms of ADD, ADC, SHR, and LEA (except 16-bit form) are DirectPath instructions, while SHLD is a VectorPath instruction." This patch also explicitly sets feature AVX and SSE4A for all the bdver* cpus. This part of the patch is a non-functional change and it is mainly done for clarity reasons (Both XOP and FMA4 already imply AVX and SSE4A). llvm-svn: 221296 --- llvm/lib/Target/X86/X86.td | 19 +++++++++++-------- 1 file changed, 11 insertions(+), 8 deletions(-) diff --git a/llvm/lib/Target/X86/X86.td b/llvm/lib/Target/X86/X86.td index 5c88b5df443a..561c20083547 100644 --- a/llvm/lib/Target/X86/X86.td +++ b/llvm/lib/Target/X86/X86.td @@ -355,27 +355,30 @@ def : ProcessorModel<"btver2", BtVer2Model, // Bulldozer def : Proc<"bdver1", [FeatureXOP, FeatureFMA4, FeatureCMPXCHG16B, FeatureAES, FeaturePRFCHW, FeaturePCLMUL, - FeatureLZCNT, FeaturePOPCNT, FeatureSlowSHLD]>; + FeatureAVX, FeatureSSE4A, FeatureLZCNT, + FeaturePOPCNT, FeatureSlowSHLD]>; // Piledriver def : Proc<"bdver2", [FeatureXOP, FeatureFMA4, FeatureCMPXCHG16B, FeatureAES, FeaturePRFCHW, FeaturePCLMUL, - FeatureF16C, FeatureLZCNT, - FeaturePOPCNT, FeatureBMI, FeatureTBM, - FeatureFMA, FeatureSlowSHLD]>; + FeatureAVX, FeatureSSE4A, FeatureF16C, + FeatureLZCNT, FeaturePOPCNT, FeatureBMI, + FeatureTBM, FeatureFMA, FeatureSlowSHLD]>; // Steamroller def : Proc<"bdver3", [FeatureXOP, FeatureFMA4, FeatureCMPXCHG16B, FeatureAES, FeaturePRFCHW, FeaturePCLMUL, - FeatureF16C, FeatureLZCNT, - FeaturePOPCNT, FeatureBMI, FeatureTBM, - FeatureFMA, FeatureFSGSBase]>; + FeatureAVX, FeatureSSE4A, FeatureF16C, + FeatureLZCNT, FeaturePOPCNT, FeatureBMI, + FeatureTBM, FeatureFMA, FeatureSlowSHLD, + FeatureFSGSBase]>; // Excavator def : Proc<"bdver4", [FeatureAVX2, FeatureXOP, FeatureFMA4, FeatureCMPXCHG16B, FeatureAES, FeaturePRFCHW, FeaturePCLMUL, FeatureF16C, FeatureLZCNT, FeaturePOPCNT, FeatureBMI, FeatureBMI2, - FeatureTBM, FeatureFMA, FeatureFSGSBase]>; + FeatureTBM, FeatureFMA, FeatureSSE4A, + FeatureFSGSBase]>; def : Proc<"geode", [Feature3DNowA]>;