From 1b86ad27a7d38c3751c1cbe65827cc6819dfb4fd Mon Sep 17 00:00:00 2001 From: Philip Reames Date: Fri, 13 Mar 2020 10:49:38 -0700 Subject: [PATCH] Use 15 byte long nops on modern Intel processors Back in D42616, we switched our default nop length from 15 to 10 bytes because some platforms have painful decode stalls when encountering multiple instruction prefixes. (10 byte long nops come from the fact that prefixes are used to pad after 8 bytes, and some platforms have issues w/more than two prefixes.) Based on Agner's guides, it appears to be the case that modern Intel (SandyBridge and later) can decode an arbitrary number of prefixes without issue. Intel's guide only provides up to 9 bytes; I read that as providing a safe default for all their chips. Older chips and Atom series have serious decode stalls. I can't find a conclusive reference beyond those two. Differential Revision: https://reviews.llvm.org/D75945 --- llvm/lib/Target/X86/X86.td | 3 ++- .../X86/align-branch-boundary-suppressions.ll | 6 +++++- llvm/test/MC/X86/align-via-relaxation.s | 7 +++---- llvm/test/MC/X86/x86_long_nop.s | 12 ++++++------ 4 files changed, 16 insertions(+), 12 deletions(-) diff --git a/llvm/lib/Target/X86/X86.td b/llvm/lib/Target/X86/X86.td index 931367ce4de5..6c4ab8f5a6f3 100644 --- a/llvm/lib/Target/X86/X86.td +++ b/llvm/lib/Target/X86/X86.td @@ -551,7 +551,8 @@ def ProcessorFeatures { FeatureSlow3OpsLEA, FeatureFastScalarFSQRT, FeatureFastSHLDRotate, - FeatureMergeToThreeWayBranch]; + FeatureMergeToThreeWayBranch, + FeatureFast15ByteNOP]; list SNBSpecificFeatures = [FeatureSlowUAMem32, FeaturePOPCNTFalseDeps]; list SNBInheritableFeatures = diff --git a/llvm/test/CodeGen/X86/align-branch-boundary-suppressions.ll b/llvm/test/CodeGen/X86/align-branch-boundary-suppressions.ll index 292fa6b18223..7db20187d45d 100644 --- a/llvm/test/CodeGen/X86/align-branch-boundary-suppressions.ll +++ b/llvm/test/CodeGen/X86/align-branch-boundary-suppressions.ll @@ -69,8 +69,12 @@ define void @patchpoint(i64 %a, i64 %b) { ; CHECK-NEXT: .cfi_def_cfa_register %rbp ; CHECK-NEXT: #noautopadding ; CHECK-NEXT: .Ltmp2: +; CHECK-NEXT: .byte 102 +; CHECK-NEXT: .byte 102 +; CHECK-NEXT: .byte 102 +; CHECK-NEXT: .byte 102 +; CHECK-NEXT: .byte 102 ; CHECK-NEXT: nopw %cs:512(%rax,%rax) -; CHECK-NEXT: nopl 8(%rax,%rax) ; CHECK-NEXT: #autopadding ; CHECK-NEXT: popq %rbp ; CHECK-NEXT: .cfi_def_cfa %rsp, 8 diff --git a/llvm/test/MC/X86/align-via-relaxation.s b/llvm/test/MC/X86/align-via-relaxation.s index 7d91eaacf9a9..f81e4c73e4f2 100644 --- a/llvm/test/MC/X86/align-via-relaxation.s +++ b/llvm/test/MC/X86/align-via-relaxation.s @@ -32,8 +32,7 @@ foo: # that would require a further round of relaxation # CHECK: : # CHECK: 22: eb fe jmp -2 - # CHECK: 24: 66 2e 0f 1f 84 00 00 00 00 00 nopw %cs:(%rax,%rax) - # CHECK: 2e: 66 90 nop + # CHECK: 24: 66 66 66 2e 0f 1f 84 00 00 00 00 00 nopw %cs:(%rax,%rax) # CHECK: 30: 0f 0b ud2 bar: @@ -48,8 +47,8 @@ nobypass: # CHECK: : # CHECK: 45: 48 85 c0 testq %rax, %rax # CHECK: 48: 0f 8e 22 00 00 00 jle 34 - # CHECK: 4e: 66 2e 0f 1f 84 00 00 00 00 00 nopw %cs:(%rax,%rax) - # CHECK: 58: 0f 1f 84 00 00 00 00 00 nopl (%rax,%rax) + # CHECK: 4e: 66 66 66 66 66 66 2e 0f 1f 84 00 00 00 00 00 nopw %cs:(%rax,%rax) + # CHECK: 5d: 0f 1f 00 nopl (%rax) # CHECK: : # CHECK: 60: 48 83 e8 01 subq $1, %rax # CHECK: 64: 48 85 c0 testq %rax, %rax diff --git a/llvm/test/MC/X86/x86_long_nop.s b/llvm/test/MC/X86/x86_long_nop.s index 68d46d0d6e16..1676bde58255 100644 --- a/llvm/test/MC/X86/x86_long_nop.s +++ b/llvm/test/MC/X86/x86_long_nop.s @@ -17,12 +17,12 @@ # RUN: llvm-mc -filetype=obj -arch=x86 -triple=i686-pc-linux-gnu %s -mcpu=znver2 | llvm-objdump -d -no-show-raw-insn - | FileCheck %s --check-prefix=LNOP15 # RUN: llvm-mc -filetype=obj -arch=x86 -triple=i686-pc-linux-gnu -mcpu=nehalem %s | llvm-objdump -d -no-show-raw-insn - | FileCheck --check-prefix=LNOP10 %s # RUN: llvm-mc -filetype=obj -arch=x86 -triple=i686-pc-linux-gnu -mcpu=westmere %s | llvm-objdump -d -no-show-raw-insn - | FileCheck --check-prefix=LNOP10 %s -# RUN: llvm-mc -filetype=obj -arch=x86 -triple=i686-pc-linux-gnu -mcpu=sandybridge %s | llvm-objdump -d -no-show-raw-insn - | FileCheck --check-prefix=LNOP10 %s -# RUN: llvm-mc -filetype=obj -arch=x86 -triple=i686-pc-linux-gnu -mcpu=ivybridge %s | llvm-objdump -d -no-show-raw-insn - | FileCheck --check-prefix=LNOP10 %s -# RUN: llvm-mc -filetype=obj -arch=x86 -triple=i686-pc-linux-gnu -mcpu=haswell %s | llvm-objdump -d -no-show-raw-insn - | FileCheck --check-prefix=LNOP10 %s -# RUN: llvm-mc -filetype=obj -arch=x86 -triple=i686-pc-linux-gnu -mcpu=broadwell %s | llvm-objdump -d -no-show-raw-insn - | FileCheck --check-prefix=LNOP10 %s -# RUN: llvm-mc -filetype=obj -arch=x86 -triple=i686-pc-linux-gnu -mcpu=skylake %s | llvm-objdump -d -no-show-raw-insn - | FileCheck --check-prefix=LNOP10 %s -# RUN: llvm-mc -filetype=obj -arch=x86 -triple=i686-pc-linux-gnu -mcpu=skx %s | llvm-objdump -d -no-show-raw-insn - | FileCheck --check-prefix=LNOP10 %s +# RUN: llvm-mc -filetype=obj -arch=x86 -triple=i686-pc-linux-gnu -mcpu=sandybridge %s | llvm-objdump -d -no-show-raw-insn - | FileCheck --check-prefix=LNOP15 %s +# RUN: llvm-mc -filetype=obj -arch=x86 -triple=i686-pc-linux-gnu -mcpu=ivybridge %s | llvm-objdump -d -no-show-raw-insn - | FileCheck --check-prefix=LNOP15 %s +# RUN: llvm-mc -filetype=obj -arch=x86 -triple=i686-pc-linux-gnu -mcpu=haswell %s | llvm-objdump -d -no-show-raw-insn - | FileCheck --check-prefix=LNOP15 %s +# RUN: llvm-mc -filetype=obj -arch=x86 -triple=i686-pc-linux-gnu -mcpu=broadwell %s | llvm-objdump -d -no-show-raw-insn - | FileCheck --check-prefix=LNOP15 %s +# RUN: llvm-mc -filetype=obj -arch=x86 -triple=i686-pc-linux-gnu -mcpu=skylake %s | llvm-objdump -d -no-show-raw-insn - | FileCheck --check-prefix=LNOP15 %s +# RUN: llvm-mc -filetype=obj -arch=x86 -triple=i686-pc-linux-gnu -mcpu=skx %s | llvm-objdump -d -no-show-raw-insn - | FileCheck --check-prefix=LNOP15 %s # RUN: llvm-mc -filetype=obj -arch=x86 -triple=i686-pc-linux-gnu -mcpu=knl %s | llvm-objdump -d -no-show-raw-insn - | FileCheck --check-prefix=LNOP10 %s # RUN: llvm-mc -filetype=obj -arch=x86 -triple=i686-pc-linux-gnu -mcpu=knm %s | llvm-objdump -d -no-show-raw-insn - | FileCheck --check-prefix=LNOP10 %s