diff --git a/llvm/lib/Target/X86/X86ISelLowering.cpp b/llvm/lib/Target/X86/X86ISelLowering.cpp
index 51f2dbda3c96..912ac5a67898 100644
--- a/llvm/lib/Target/X86/X86ISelLowering.cpp
+++ b/llvm/lib/Target/X86/X86ISelLowering.cpp
@@ -27619,6 +27619,60 @@ X86TargetLowering::EmitLoweredRetpoline(MachineInstr &MI,
   return BB;
 }
 
+/// SetJmp implies future control flow change upon calling the corresponding
+/// LongJmp.
+/// Instead of using the 'return' instruction, the long jump fixes the stack and
+/// performs an indirect branch. To do so it uses the registers that were stored
+/// in the jump buffer (when calling SetJmp).
+/// In case the shadow stack is enabled we need to fix it as well, because some
+/// return addresses will be skipped.
+/// The function will save the SSP for future fixing in the function
+/// emitLongJmpShadowStackFix.
+/// \sa emitLongJmpShadowStackFix
+/// \param [in] MI The temporary Machine Instruction for the builtin.
+/// \param [in] MBB The Machine Basic Block that will be modified.
+void X86TargetLowering::emitSetJmpShadowStackFix(MachineInstr &MI,
+                                                 MachineBasicBlock *MBB) const {
+  DebugLoc DL = MI.getDebugLoc();
+  MachineFunction *MF = MBB->getParent();
+  const TargetInstrInfo *TII = Subtarget.getInstrInfo();
+  MachineRegisterInfo &MRI = MF->getRegInfo();
+  MachineInstrBuilder MIB;
+
+  // Memory Reference
+  MachineInstr::mmo_iterator MMOBegin = MI.memoperands_begin();
+  MachineInstr::mmo_iterator MMOEnd = MI.memoperands_end();
+
+  // Initialize a register with zero.
+  MVT PVT = getPointerTy(MF->getDataLayout());
+  const TargetRegisterClass *PtrRC = getRegClassFor(PVT);
+  unsigned ZReg = MRI.createVirtualRegister(PtrRC);
+  unsigned XorRROpc = (PVT == MVT::i64) ? X86::XOR64rr : X86::XOR32rr;
+  BuildMI(*MBB, MI, DL, TII->get(XorRROpc))
+      .addDef(ZReg)
+      .addReg(ZReg, RegState::Undef)
+      .addReg(ZReg, RegState::Undef);
+
+  // Read the current SSP Register value to the zeroed register.
+  unsigned SSPCopyReg = MRI.createVirtualRegister(PtrRC);
+  unsigned RdsspOpc = (PVT == MVT::i64) ? X86::RDSSPQ : X86::RDSSPD;
+  BuildMI(*MBB, MI, DL, TII->get(RdsspOpc), SSPCopyReg).addReg(ZReg);
+
+  // Write the SSP register value to offset 3 in input memory buffer.
+  unsigned PtrStoreOpc = (PVT == MVT::i64) ? X86::MOV64mr : X86::MOV32mr;
+  MIB = BuildMI(*MBB, MI, DL, TII->get(PtrStoreOpc));
+  const int64_t SSPOffset = 3 * PVT.getStoreSize();
+  const unsigned MemOpndSlot = 1;
+  for (unsigned i = 0; i < X86::AddrNumOperands; ++i) {
+    if (i == X86::AddrDisp)
+      MIB.addDisp(MI.getOperand(MemOpndSlot + i), SSPOffset);
+    else
+      MIB.add(MI.getOperand(MemOpndSlot + i));
+  }
+  MIB.addReg(SSPCopyReg);
+  MIB.setMemRefs(MMOBegin, MMOEnd);
+}
+
 MachineBasicBlock *
 X86TargetLowering::emitEHSjLjSetJmp(MachineInstr &MI,
                                     MachineBasicBlock *MBB) const {
@@ -27728,6 +27782,11 @@ X86TargetLowering::emitEHSjLjSetJmp(MachineInstr &MI,
   else
     MIB.addMBB(restoreMBB);
   MIB.setMemRefs(MMOBegin, MMOEnd);
+
+  if (MF->getMMI().getModule()->getModuleFlag("cf-protection-return")) {
+    emitSetJmpShadowStackFix(MI, thisMBB);
+  }
+
   // Setup
   MIB = BuildMI(*thisMBB, MI, DL, TII->get(X86::EH_SjLj_Setup))
           .addMBB(restoreMBB);
@@ -27769,6 +27828,183 @@ X86TargetLowering::emitEHSjLjSetJmp(MachineInstr &MI,
   return sinkMBB;
 }
 
+/// Fix the shadow stack using the previously saved SSP pointer.
+/// \sa emitSetJmpShadowStackFix
+/// \param [in] MI The temporary Machine Instruction for the builtin.
+/// \param [in] MBB The Machine Basic Block that will be modified.
+/// \return The sink MBB that will perform the future indirect branch.
+MachineBasicBlock *
+X86TargetLowering::emitLongJmpShadowStackFix(MachineInstr &MI,
+                                             MachineBasicBlock *MBB) const {
+  DebugLoc DL = MI.getDebugLoc();
+  MachineFunction *MF = MBB->getParent();
+  const TargetInstrInfo *TII = Subtarget.getInstrInfo();
+  MachineRegisterInfo &MRI = MF->getRegInfo();
+
+  // Memory Reference
+  MachineInstr::mmo_iterator MMOBegin = MI.memoperands_begin();
+  MachineInstr::mmo_iterator MMOEnd = MI.memoperands_end();
+
+  MVT PVT = getPointerTy(MF->getDataLayout());
+  const TargetRegisterClass *PtrRC = getRegClassFor(PVT);
+
+  // checkSspMBB:
+  //         xor vreg1, vreg1
+  //         rdssp vreg1
+  //         test vreg1, vreg1
+  //         je sinkMBB   # Jump if Shadow Stack is not supported
+  // fallMBB:
+  //         mov buf+24/12(%rip), vreg2
+  //         sub vreg1, vreg2
+  //         jbe sinkMBB  # No need to fix the Shadow Stack
+  // fixShadowMBB:
+  //         shr 3/2, vreg2
+  //         incssp vreg2  # fix the SSP according to the lower 8 bits
+  //         shr 8, vreg2
+  //         je sinkMBB
+  // fixShadowLoopPrepareMBB:
+  //         shl vreg2
+  //         mov 128, vreg3
+  // fixShadowLoopMBB:
+  //         incssp vreg3
+  //         dec vreg2
+  //         jne fixShadowLoopMBB # Iterate until you finish fixing
+  //                              # the Shadow Stack
+  // sinkMBB:
+
+  MachineFunction::iterator I = ++MBB->getIterator();
+  const BasicBlock *BB = MBB->getBasicBlock();
+
+  MachineBasicBlock *checkSspMBB = MF->CreateMachineBasicBlock(BB);
+  MachineBasicBlock *fallMBB = MF->CreateMachineBasicBlock(BB);
+  MachineBasicBlock *fixShadowMBB = MF->CreateMachineBasicBlock(BB);
+  MachineBasicBlock *fixShadowLoopPrepareMBB = MF->CreateMachineBasicBlock(BB);
+  MachineBasicBlock *fixShadowLoopMBB = MF->CreateMachineBasicBlock(BB);
+  MachineBasicBlock *sinkMBB = MF->CreateMachineBasicBlock(BB);
+  MF->insert(I, checkSspMBB);
+  MF->insert(I, fallMBB);
+  MF->insert(I, fixShadowMBB);
+  MF->insert(I, fixShadowLoopPrepareMBB);
+  MF->insert(I, fixShadowLoopMBB);
+  MF->insert(I, sinkMBB);
+
+  // Transfer the remainder of BB and its successor edges to sinkMBB.
+  sinkMBB->splice(sinkMBB->begin(), MBB, MachineBasicBlock::iterator(MI),
+                  MBB->end());
+  sinkMBB->transferSuccessorsAndUpdatePHIs(MBB);
+
+  MBB->addSuccessor(checkSspMBB);
+
+  // Initialize a register with zero.
+  unsigned ZReg = MRI.createVirtualRegister(PtrRC);
+  unsigned XorRROpc = (PVT == MVT::i64) ? X86::XOR64rr : X86::XOR32rr;
+  BuildMI(checkSspMBB, DL, TII->get(XorRROpc))
+      .addDef(ZReg)
+      .addReg(ZReg, RegState::Undef)
+      .addReg(ZReg, RegState::Undef);
+
+  // Read the current SSP Register value to the zeroed register.
+  unsigned SSPCopyReg = MRI.createVirtualRegister(PtrRC);
+  unsigned RdsspOpc = (PVT == MVT::i64) ? X86::RDSSPQ : X86::RDSSPD;
+  BuildMI(checkSspMBB, DL, TII->get(RdsspOpc), SSPCopyReg).addReg(ZReg);
+
+  // Check whether the result of the SSP register is zero and jump directly
+  // to the sink.
+  unsigned TestRROpc = (PVT == MVT::i64) ? X86::TEST64rr : X86::TEST32rr;
+  BuildMI(checkSspMBB, DL, TII->get(TestRROpc))
+      .addReg(SSPCopyReg)
+      .addReg(SSPCopyReg);
+  BuildMI(checkSspMBB, DL, TII->get(X86::JE_1)).addMBB(sinkMBB);
+  checkSspMBB->addSuccessor(sinkMBB);
+  checkSspMBB->addSuccessor(fallMBB);
+
+  // Reload the previously saved SSP register value.
+  unsigned PrevSSPReg = MRI.createVirtualRegister(PtrRC);
+  unsigned PtrLoadOpc = (PVT == MVT::i64) ? X86::MOV64rm : X86::MOV32rm;
+  const int64_t SPPOffset = 3 * PVT.getStoreSize();
+  MachineInstrBuilder MIB =
+      BuildMI(fallMBB, DL, TII->get(PtrLoadOpc), PrevSSPReg);
+  for (unsigned i = 0; i < X86::AddrNumOperands; ++i) {
+    if (i == X86::AddrDisp)
+      MIB.addDisp(MI.getOperand(i), SPPOffset);
+    else
+      MIB.add(MI.getOperand(i));
+  }
+  MIB.setMemRefs(MMOBegin, MMOEnd);
+
+  // Subtract the current SSP from the previous SSP.
+  unsigned SspSubReg = MRI.createVirtualRegister(PtrRC);
+  unsigned SubRROpc = (PVT == MVT::i64) ? X86::SUB64rr : X86::SUB32rr;
+  BuildMI(fallMBB, DL, TII->get(SubRROpc), SspSubReg)
+      .addReg(PrevSSPReg)
+      .addReg(SSPCopyReg);
+
+  // Jump to sink in case PrevSSPReg <= SSPCopyReg.
+  BuildMI(fallMBB, DL, TII->get(X86::JBE_1)).addMBB(sinkMBB);
+  fallMBB->addSuccessor(sinkMBB);
+  fallMBB->addSuccessor(fixShadowMBB);
+
+  // Shift right by 2/3 for 32/64 because incssp multiplies the argument by 4/8.
+  unsigned ShrRIOpc = (PVT == MVT::i64) ? X86::SHR64ri : X86::SHR32ri;
+  unsigned Offset = (PVT == MVT::i64) ? 3 : 2;
+  unsigned SspFirstShrReg = MRI.createVirtualRegister(PtrRC);
+  BuildMI(fixShadowMBB, DL, TII->get(ShrRIOpc), SspFirstShrReg)
+      .addReg(SspSubReg)
+      .addImm(Offset);
+
+  // Increase SSP when looking only on the lower 8 bits of the delta.
+  unsigned IncsspOpc = (PVT == MVT::i64) ? X86::INCSSPQ : X86::INCSSPD;
+  BuildMI(fixShadowMBB, DL, TII->get(IncsspOpc)).addReg(SspFirstShrReg);
+
+  // Reset the lower 8 bits.
+  unsigned SspSecondShrReg = MRI.createVirtualRegister(PtrRC);
+  BuildMI(fixShadowMBB, DL, TII->get(ShrRIOpc), SspSecondShrReg)
+      .addReg(SspFirstShrReg)
+      .addImm(8);
+
+  // Jump if the result of the shift is zero.
+  BuildMI(fixShadowMBB, DL, TII->get(X86::JE_1)).addMBB(sinkMBB);
+  fixShadowMBB->addSuccessor(sinkMBB);
+  fixShadowMBB->addSuccessor(fixShadowLoopPrepareMBB);
+
+  // Do a single shift left.
+  unsigned ShlR1Opc = (PVT == MVT::i64) ? X86::SHL64r1 : X86::SHL32r1;
+  unsigned SspAfterShlReg = MRI.createVirtualRegister(PtrRC);
+  BuildMI(fixShadowLoopPrepareMBB, DL, TII->get(ShlR1Opc), SspAfterShlReg)
+      .addReg(SspSecondShrReg);
+
+  // Save the value 128 to a register (will be used next with incssp).
+  unsigned Value128InReg = MRI.createVirtualRegister(PtrRC);
+  unsigned MovRIOpc = (PVT == MVT::i64) ? X86::MOV64ri32 : X86::MOV32ri;
+  BuildMI(fixShadowLoopPrepareMBB, DL, TII->get(MovRIOpc), Value128InReg)
+      .addImm(128);
+  fixShadowLoopPrepareMBB->addSuccessor(fixShadowLoopMBB);
+
+  // Since incssp only looks at the lower 8 bits, we might need to do several
+  // iterations of incssp until we finish fixing the shadow stack.
+  unsigned DecReg = MRI.createVirtualRegister(PtrRC);
+  unsigned CounterReg = MRI.createVirtualRegister(PtrRC);
+  BuildMI(fixShadowLoopMBB, DL, TII->get(X86::PHI), CounterReg)
+      .addReg(SspAfterShlReg)
+      .addMBB(fixShadowLoopPrepareMBB)
+      .addReg(DecReg)
+      .addMBB(fixShadowLoopMBB);
+
+  // Every iteration we increase the SSP by 128.
+  BuildMI(fixShadowLoopMBB, DL, TII->get(IncsspOpc)).addReg(Value128InReg);
+
+  // Every iteration we decrement the counter by 1.
+  unsigned DecROpc = (PVT == MVT::i64) ? X86::DEC64r : X86::DEC32r;
+  BuildMI(fixShadowLoopMBB, DL, TII->get(DecROpc), DecReg).addReg(CounterReg);
+
+  // Jump if the counter is not zero yet.
+  BuildMI(fixShadowLoopMBB, DL, TII->get(X86::JNE_1)).addMBB(fixShadowLoopMBB);
+  fixShadowLoopMBB->addSuccessor(sinkMBB);
+  fixShadowLoopMBB->addSuccessor(fixShadowLoopMBB);
+
+  return sinkMBB;
+}
+
 MachineBasicBlock *
 X86TargetLowering::emitEHSjLjLongJmp(MachineInstr &MI,
                                      MachineBasicBlock *MBB) const {
@@ -27801,13 +28037,21 @@ X86TargetLowering::emitEHSjLjLongJmp(MachineInstr &MI,
   unsigned PtrLoadOpc = (PVT == MVT::i64) ? X86::MOV64rm : X86::MOV32rm;
   unsigned IJmpOpc = (PVT == MVT::i64) ? X86::JMP64r : X86::JMP32r;
 
+  MachineBasicBlock *thisMBB = MBB;
+
+  // When CET and shadow stack is enabled, we need to fix the Shadow Stack.
+  if (MF->getMMI().getModule()->getModuleFlag("cf-protection-return")) {
+    thisMBB = emitLongJmpShadowStackFix(MI, thisMBB);
+  }
+
   // Reload FP
-  MIB = BuildMI(*MBB, MI, DL, TII->get(PtrLoadOpc), FP);
+  MIB = BuildMI(*thisMBB, MI, DL, TII->get(PtrLoadOpc), FP);
   for (unsigned i = 0; i < X86::AddrNumOperands; ++i)
     MIB.add(MI.getOperand(i));
   MIB.setMemRefs(MMOBegin, MMOEnd);
+
   // Reload IP
-  MIB = BuildMI(*MBB, MI, DL, TII->get(PtrLoadOpc), Tmp);
+  MIB = BuildMI(*thisMBB, MI, DL, TII->get(PtrLoadOpc), Tmp);
   for (unsigned i = 0; i < X86::AddrNumOperands; ++i) {
     if (i == X86::AddrDisp)
       MIB.addDisp(MI.getOperand(i), LabelOffset);
@@ -27815,8 +28059,9 @@ X86TargetLowering::emitEHSjLjLongJmp(MachineInstr &MI,
       MIB.add(MI.getOperand(i));
   }
   MIB.setMemRefs(MMOBegin, MMOEnd);
+
   // Reload SP
-  MIB = BuildMI(*MBB, MI, DL, TII->get(PtrLoadOpc), SP);
+  MIB = BuildMI(*thisMBB, MI, DL, TII->get(PtrLoadOpc), SP);
   for (unsigned i = 0; i < X86::AddrNumOperands; ++i) {
     if (i == X86::AddrDisp)
       MIB.addDisp(MI.getOperand(i), SPOffset);
@@ -27824,11 +28069,12 @@ X86TargetLowering::emitEHSjLjLongJmp(MachineInstr &MI,
       MIB.add(MI.getOperand(i));
   }
   MIB.setMemRefs(MMOBegin, MMOEnd);
+
   // Jump
-  BuildMI(*MBB, MI, DL, TII->get(IJmpOpc)).addReg(Tmp);
+  BuildMI(*thisMBB, MI, DL, TII->get(IJmpOpc)).addReg(Tmp);
 
   MI.eraseFromParent();
-  return MBB;
+  return thisMBB;
 }
 
 void X86TargetLowering::SetupEntryBlockForSjLj(MachineInstr &MI,
diff --git a/llvm/lib/Target/X86/X86ISelLowering.h b/llvm/lib/Target/X86/X86ISelLowering.h
index c81b062358e8..d09be38c3b85 100644
--- a/llvm/lib/Target/X86/X86ISelLowering.h
+++ b/llvm/lib/Target/X86/X86ISelLowering.h
@@ -1331,9 +1331,15 @@ namespace llvm {
     MachineBasicBlock *emitEHSjLjSetJmp(MachineInstr &MI,
                                         MachineBasicBlock *MBB) const;
 
+    void emitSetJmpShadowStackFix(MachineInstr &MI,
+                                  MachineBasicBlock *MBB) const;
+
     MachineBasicBlock *emitEHSjLjLongJmp(MachineInstr &MI,
                                          MachineBasicBlock *MBB) const;
 
+    MachineBasicBlock *emitLongJmpShadowStackFix(MachineInstr &MI,
+                                                 MachineBasicBlock *MBB) const;
+
     MachineBasicBlock *emitFMA3Instr(MachineInstr &MI,
                                      MachineBasicBlock *MBB) const;
 
diff --git a/llvm/test/CodeGen/X86/shadow-stack.ll b/llvm/test/CodeGen/X86/shadow-stack.ll
new file mode 100644
index 000000000000..d0886f49c3eb
--- /dev/null
+++ b/llvm/test/CodeGen/X86/shadow-stack.ll
@@ -0,0 +1,244 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
+; RUN: llc -mtriple x86_64-apple-macosx10.13.0 < %s | FileCheck %s --check-prefix=X86_64
+; RUN: llc -mtriple i386-apple-macosx10.13.0 < %s | FileCheck %s --check-prefix=X86
+
+; The MacOS tripples are used to get trapping behavior on the "unreachable" IR
+; instruction, so that the placement of the ud2 instruction could be verified.
+
+;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
+;; The IR was created using the following C code:
+;; typedef void *jmp_buf;
+;; jmp_buf buf;
+;;
+;; __attribute__((noinline)) int bar(int i) {
+;;   int j = i - 111;
+;;   __builtin_longjmp(&buf, 1);
+;;   return j;
+;; }
+;;
+;; int foo(int i) {
+;;   int j = i * 11;
+;;   if (!__builtin_setjmp(&buf)) {
+;;     j += 33 + bar(j);
+;;   }
+;;   return j + i;
+;; }
+;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
+
+@buf = common local_unnamed_addr global i8* null, align 8
+
+; Functions that use LongJmp should fix the Shadow Stack using previosuly saved
+; ShadowStackPointer in the input buffer.
+; The fix requires unwinding the shadow stack to the last SSP.
+define i32 @bar(i32 %i) local_unnamed_addr {
+; X86_64-LABEL: bar:
+; X86_64:       ## %bb.0: ## %entry
+; X86_64-NEXT:    pushq %rbp
+; X86_64-NEXT:    .cfi_def_cfa_offset 16
+; X86_64-NEXT:    .cfi_offset %rbp, -16
+; X86_64-NEXT:    movq _buf@{{.*}}(%rip), %rax
+; X86_64-NEXT:    movq (%rax), %rax
+; X86_64-NEXT:    xorq %rdx, %rdx
+; X86_64-NEXT:    rdsspq %rdx
+; X86_64-NEXT:    testq %rdx, %rdx
+; X86_64-NEXT:    je LBB0_5
+; X86_64-NEXT:  ## %bb.1: ## %entry
+; X86_64-NEXT:    movq 24(%rax), %rcx
+; X86_64-NEXT:    subq %rdx, %rcx
+; X86_64-NEXT:    jbe LBB0_5
+; X86_64-NEXT:  ## %bb.2: ## %entry
+; X86_64-NEXT:    shrq $3, %rcx
+; X86_64-NEXT:    incsspq %rcx
+; X86_64-NEXT:    shrq $8, %rcx
+; X86_64-NEXT:    je LBB0_5
+; X86_64-NEXT:  ## %bb.3: ## %entry
+; X86_64-NEXT:    shlq %rcx
+; X86_64-NEXT:    movq $128, %rdx
+; X86_64-NEXT:  LBB0_4: ## %entry
+; X86_64-NEXT:    ## =>This Inner Loop Header: Depth=1
+; X86_64-NEXT:    incsspq %rdx
+; X86_64-NEXT:    decq %rcx
+; X86_64-NEXT:    jne LBB0_4
+; X86_64-NEXT:  LBB0_5: ## %entry
+; X86_64-NEXT:    movq (%rax), %rbp
+; X86_64-NEXT:    movq 8(%rax), %rcx
+; X86_64-NEXT:    movq 16(%rax), %rsp
+; X86_64-NEXT:    jmpq *%rcx
+; X86_64-NEXT:    ud2
+;
+; X86-LABEL: bar:
+; X86:       ## %bb.0: ## %entry
+; X86-NEXT:    pushl %ebp
+; X86-NEXT:    .cfi_def_cfa_offset 8
+; X86-NEXT:    .cfi_offset %ebp, -8
+; X86-NEXT:    movl L_buf$non_lazy_ptr, %eax
+; X86-NEXT:    movl (%eax), %eax
+; X86-NEXT:    xorl %edx, %edx
+; X86-NEXT:    rdsspd %edx
+; X86-NEXT:    testl %edx, %edx
+; X86-NEXT:    je LBB0_5
+; X86-NEXT:  ## %bb.1: ## %entry
+; X86-NEXT:    movl 12(%eax), %ecx
+; X86-NEXT:    subl %edx, %ecx
+; X86-NEXT:    jbe LBB0_5
+; X86-NEXT:  ## %bb.2: ## %entry
+; X86-NEXT:    shrl $2, %ecx
+; X86-NEXT:    incsspd %ecx
+; X86-NEXT:    shrl $8, %ecx
+; X86-NEXT:    je LBB0_5
+; X86-NEXT:  ## %bb.3: ## %entry
+; X86-NEXT:    shll %ecx
+; X86-NEXT:    movl $128, %edx
+; X86-NEXT:  LBB0_4: ## %entry
+; X86-NEXT:    ## =>This Inner Loop Header: Depth=1
+; X86-NEXT:    incsspd %edx
+; X86-NEXT:    decl %ecx
+; X86-NEXT:    jne LBB0_4
+; X86-NEXT:  LBB0_5: ## %entry
+; X86-NEXT:    movl (%eax), %ebp
+; X86-NEXT:    movl 4(%eax), %ecx
+; X86-NEXT:    movl 8(%eax), %esp
+; X86-NEXT:    jmpl *%ecx
+; X86-NEXT:    ud2
+entry:
+  %0 = load i8*, i8** @buf, align 8
+  tail call void @llvm.eh.sjlj.longjmp(i8* %0)
+  unreachable
+}
+
+declare void @llvm.eh.sjlj.longjmp(i8*)
+
+; Functions that call SetJmp should save the current ShadowStackPointer for
+; future fixing of the Shadow Stack.
+define i32 @foo(i32 %i) local_unnamed_addr {
+; X86_64-LABEL: foo:
+; X86_64:       ## %bb.0: ## %entry
+; X86_64-NEXT:    pushq %rbp
+; X86_64-NEXT:    .cfi_def_cfa_offset 16
+; X86_64-NEXT:    .cfi_offset %rbp, -16
+; X86_64-NEXT:    movq %rsp, %rbp
+; X86_64-NEXT:    .cfi_def_cfa_register %rbp
+; X86_64-NEXT:    pushq %r15
+; X86_64-NEXT:    pushq %r14
+; X86_64-NEXT:    pushq %r13
+; X86_64-NEXT:    pushq %r12
+; X86_64-NEXT:    pushq %rbx
+; X86_64-NEXT:    pushq %rax
+; X86_64-NEXT:    .cfi_offset %rbx, -56
+; X86_64-NEXT:    .cfi_offset %r12, -48
+; X86_64-NEXT:    .cfi_offset %r13, -40
+; X86_64-NEXT:    .cfi_offset %r14, -32
+; X86_64-NEXT:    .cfi_offset %r15, -24
+; X86_64-NEXT:    ## kill: def $edi killed $edi def $rdi
+; X86_64-NEXT:    movq %rdi, {{[-0-9]+}}(%r{{[sb]}}p) ## 8-byte Spill
+; X86_64-NEXT:    movq _buf@{{.*}}(%rip), %rax
+; X86_64-NEXT:    movq (%rax), %rax
+; X86_64-NEXT:    movq %rbp, (%rax)
+; X86_64-NEXT:    movq %rsp, 16(%rax)
+; X86_64-NEXT:    leaq {{.*}}(%rip), %rcx
+; X86_64-NEXT:    movq %rcx, 8(%rax)
+; X86_64-NEXT:    xorq %rcx, %rcx
+; X86_64-NEXT:    rdsspq %rcx
+; X86_64-NEXT:    movq %rcx, 24(%rax)
+; X86_64-NEXT:    #EH_SjLj_Setup LBB1_4
+; X86_64-NEXT:  ## %bb.1: ## %entry
+; X86_64-NEXT:    xorl %eax, %eax
+; X86_64-NEXT:    testl %eax, %eax
+; X86_64-NEXT:    jne LBB1_3
+; X86_64-NEXT:    jmp LBB1_5
+; X86_64-NEXT:  LBB1_4: ## Block address taken
+; X86_64-NEXT:    ## %entry
+; X86_64-NEXT:    movl $1, %eax
+; X86_64-NEXT:    testl %eax, %eax
+; X86_64-NEXT:    je LBB1_5
+; X86_64-NEXT:  LBB1_3: ## %if.end
+; X86_64-NEXT:    movq {{[-0-9]+}}(%r{{[sb]}}p), %rax ## 8-byte Reload
+; X86_64-NEXT:    shll $2, %eax
+; X86_64-NEXT:    leal (%rax,%rax,2), %eax
+; X86_64-NEXT:    addq $8, %rsp
+; X86_64-NEXT:    popq %rbx
+; X86_64-NEXT:    popq %r12
+; X86_64-NEXT:    popq %r13
+; X86_64-NEXT:    popq %r14
+; X86_64-NEXT:    popq %r15
+; X86_64-NEXT:    popq %rbp
+; X86_64-NEXT:    retq
+; X86_64-NEXT:  LBB1_5: ## %if.then
+; X86_64-NEXT:    callq _bar
+; X86_64-NEXT:    ud2
+;
+; X86-LABEL: foo:
+; X86:       ## %bb.0: ## %entry
+; X86-NEXT:    pushl %ebp
+; X86-NEXT:    .cfi_def_cfa_offset 8
+; X86-NEXT:    .cfi_offset %ebp, -8
+; X86-NEXT:    movl %esp, %ebp
+; X86-NEXT:    .cfi_def_cfa_register %ebp
+; X86-NEXT:    pushl %ebx
+; X86-NEXT:    pushl %edi
+; X86-NEXT:    pushl %esi
+; X86-NEXT:    subl $12, %esp
+; X86-NEXT:    .cfi_offset %esi, -20
+; X86-NEXT:    .cfi_offset %edi, -16
+; X86-NEXT:    .cfi_offset %ebx, -12
+; X86-NEXT:    movl L_buf$non_lazy_ptr, %eax
+; X86-NEXT:    movl (%eax), %eax
+; X86-NEXT:    movl %ebp, (%eax)
+; X86-NEXT:    movl %esp, 16(%eax)
+; X86-NEXT:    movl $LBB1_4, 4(%eax)
+; X86-NEXT:    xorl %ecx, %ecx
+; X86-NEXT:    rdsspd %ecx
+; X86-NEXT:    movl %ecx, 12(%eax)
+; X86-NEXT:    #EH_SjLj_Setup LBB1_4
+; X86-NEXT:  ## %bb.1: ## %entry
+; X86-NEXT:    xorl %eax, %eax
+; X86-NEXT:    testl %eax, %eax
+; X86-NEXT:    jne LBB1_3
+; X86-NEXT:    jmp LBB1_5
+; X86-NEXT:  LBB1_4: ## Block address taken
+; X86-NEXT:    ## %entry
+; X86-NEXT:    movl $1, %eax
+; X86-NEXT:    testl %eax, %eax
+; X86-NEXT:    je LBB1_5
+; X86-NEXT:  LBB1_3: ## %if.end
+; X86-NEXT:    movl 8(%ebp), %eax
+; X86-NEXT:    shll $2, %eax
+; X86-NEXT:    leal (%eax,%eax,2), %eax
+; X86-NEXT:    addl $12, %esp
+; X86-NEXT:    popl %esi
+; X86-NEXT:    popl %edi
+; X86-NEXT:    popl %ebx
+; X86-NEXT:    popl %ebp
+; X86-NEXT:    retl
+; X86-NEXT:  LBB1_5: ## %if.then
+; X86-NEXT:    calll _bar
+; X86-NEXT:    ud2
+entry:
+  %0 = load i8*, i8** @buf, align 8
+  %1 = bitcast i8* %0 to i8**
+  %2 = tail call i8* @llvm.frameaddress(i32 0)
+  store i8* %2, i8** %1, align 8
+  %3 = tail call i8* @llvm.stacksave()
+  %4 = getelementptr inbounds i8, i8* %0, i64 16
+  %5 = bitcast i8* %4 to i8**
+  store i8* %3, i8** %5, align 8
+  %6 = tail call i32 @llvm.eh.sjlj.setjmp(i8* %0)
+  %tobool = icmp eq i32 %6, 0
+  br i1 %tobool, label %if.then, label %if.end
+
+if.then:                                          ; preds = %entry
+  %call = tail call i32 @bar(i32 undef)
+  unreachable
+
+if.end:                                           ; preds = %entry
+  %add2 = mul nsw i32 %i, 12
+  ret i32 %add2
+}
+
+declare i8* @llvm.frameaddress(i32)
+declare i8* @llvm.stacksave()
+declare i32 @llvm.eh.sjlj.setjmp(i8*)
+
+!llvm.module.flags = !{!0}
+
+!0 = !{i32 4, !"cf-protection-return", i32 1}