diff --git a/llvm/lib/Target/X86/X86ISelLowering.cpp b/llvm/lib/Target/X86/X86ISelLowering.cpp
index 852078a299b9..7cd17f109935 100644
--- a/llvm/lib/Target/X86/X86ISelLowering.cpp
+++ b/llvm/lib/Target/X86/X86ISelLowering.cpp
@@ -35,6 +35,7 @@
 #include "llvm/CodeGen/MachineFunction.h"
 #include "llvm/CodeGen/MachineInstrBuilder.h"
 #include "llvm/CodeGen/MachineJumpTableInfo.h"
+#include "llvm/CodeGen/MachineLoopInfo.h"
 #include "llvm/CodeGen/MachineModuleInfo.h"
 #include "llvm/CodeGen/MachineRegisterInfo.h"
 #include "llvm/CodeGen/TargetLowering.h"
@@ -76,6 +77,14 @@ static cl::opt<int> ExperimentalPrefLoopAlignment(
         " of the loop header PC will be 0)."),
     cl::Hidden);
 
+static cl::opt<int> ExperimentalPrefInnermostLoopAlignment(
+    "x86-experimental-pref-innermost-loop-alignment", cl::init(4),
+    cl::desc(
+        "Sets the preferable loop alignment for experiments (as log2 bytes) "
+        "for innermost loops only. If specified, this option overrides "
+        "alignment set by x86-experimental-pref-loop-alignment."),
+    cl::Hidden);
+
 static cl::opt<bool> MulConstantOptimization(
     "mul-constant-optimization", cl::init(true),
     cl::desc("Replace 'mul x, Const' with more effective instructions like "
@@ -51696,3 +51705,10 @@ X86TargetLowering::getStackProbeSize(MachineFunction &MF) const {
         .getAsInteger(0, StackProbeSize);
   return StackProbeSize;
 }
+
+Align X86TargetLowering::getPrefLoopAlignment(MachineLoop *ML) const {
+  if (ML->isInnermost() &&
+      ExperimentalPrefInnermostLoopAlignment.getNumOccurrences())
+    return Align(1ULL << ExperimentalPrefInnermostLoopAlignment);
+  return TargetLowering::getPrefLoopAlignment();
+}
diff --git a/llvm/lib/Target/X86/X86ISelLowering.h b/llvm/lib/Target/X86/X86ISelLowering.h
index 8b71c8394c01..76c83b7df9eb 100644
--- a/llvm/lib/Target/X86/X86ISelLowering.h
+++ b/llvm/lib/Target/X86/X86ISelLowering.h
@@ -1408,6 +1408,8 @@ namespace llvm {
                                    SDValue Addr, SelectionDAG &DAG)
                                    const override;
 
+    Align getPrefLoopAlignment(MachineLoop *ML) const override;
+
   protected:
     std::pair<const TargetRegisterClass *, uint8_t>
     findRepresentativeClass(const TargetRegisterInfo *TRI,
diff --git a/llvm/test/CodeGen/X86/innermost-loop-alignment.ll b/llvm/test/CodeGen/X86/innermost-loop-alignment.ll
new file mode 100644
index 000000000000..fef30fd28716
--- /dev/null
+++ b/llvm/test/CodeGen/X86/innermost-loop-alignment.ll
@@ -0,0 +1,59 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
+; RUN: llc < %s -mtriple=i686-pc-linux-gnu | FileCheck %s -check-prefix=DEFAULT
+; RUN: llc < %s -mtriple=i686-pc-linux-gnu -x86-experimental-pref-innermost-loop-alignment=5 | FileCheck %s -check-prefix=ALIGN32
+; RUN: llc < %s -mtriple=i686-pc-linux-gnu -x86-experimental-pref-loop-alignment=5 -x86-experimental-pref-innermost-loop-alignment=6 | FileCheck %s -check-prefix=ALIGN64
+
+declare void @foo()
+
+define void @test(i32 %n, i32 %m) {
+; DEFAULT-LABEL: test:
+; DEFAULT:         .p2align 4, 0x90
+; DEFAULT-NEXT:  .LBB0_1: # %outer
+; DEFAULT-NEXT:    # =>This Loop Header: Depth=1
+; DEFAULT-NEXT:    # Child Loop BB0_2 Depth 2
+; DEFAULT:         .p2align 4, 0x90
+; DEFAULT-NEXT:  .LBB0_2: # %inner
+; DEFAULT-NEXT:    # Parent Loop BB0_1 Depth=1
+
+; ALIGN32-LABEL: test:
+; ALIGN32:         .p2align 4, 0x90
+; ALIGN32-NEXT:  .LBB0_1: # %outer
+; ALIGN32-NEXT:    # =>This Loop Header: Depth=1
+; ALIGN32-NEXT:    # Child Loop BB0_2 Depth 2
+; ALIGN32:         .p2align 5, 0x90
+; ALIGN32-NEXT:  .LBB0_2: # %inner
+; ALIGN32-NEXT:    # Parent Loop BB0_1 Depth=1
+; ALIGN32-NEXT:    # => This Inner Loop Header: Depth=2
+
+; ALIGN64-LABEL: test:
+; ALIGN64:         .p2align 5, 0x90
+; ALIGN64-NEXT:  .LBB0_1: # %outer
+; ALIGN64-NEXT:    # =>This Loop Header: Depth=1
+; ALIGN64-NEXT:    # Child Loop BB0_2 Depth 2
+; ALIGN64:         .p2align 6, 0x90
+; ALIGN64-NEXT:  .LBB0_2: # %inner
+; ALIGN64-NEXT:    # Parent Loop BB0_1 Depth=1
+; ALIGN64-NEXT:    # => This Inner Loop Header: Depth=2
+
+entry:
+  br label %outer
+
+outer:
+  %outer.iv = phi i32 [0, %entry], [%outer.iv.next, %outer_bb]
+  br label %inner
+
+inner:
+  %inner.iv = phi i32 [0, %outer], [%inner.iv.next, %inner]
+  call void @foo()
+  %inner.iv.next = add i32 %inner.iv, 1
+  %inner.cond = icmp ne i32 %inner.iv.next, %m
+  br i1 %inner.cond, label %inner, label %outer_bb
+
+outer_bb:
+  %outer.iv.next = add i32 %outer.iv, 1
+  %outer.cond = icmp ne i32 %outer.iv.next, %n
+  br i1 %outer.cond, label %outer, label %exit
+
+exit:
+  ret void
+}