[CodeGen] Move ARMCodegenPrepare to TypePromotion

Convert ARMCodeGenPrepare into a generic type promotion pass by: - Removing the insertion of arm specific intrinsics to handle narrow types as we weren't using this. - Removing ARMSubtarget references. - Now query a generic TLI object to know which types should be promoted and what they should be promoted to. - Move all codegen tests into Transforms folder and testing using opt and not llc, which is how they should have been written in the first place... The pass searches up from icmp operands in an attempt to safely promote types so we can avoid generating unnecessary unsigned extends during DAG ISel. Differential Revision: https://reviews.llvm.org/D69556
2019-12-03 11:00:32 +00:00 · 2019-12-03 11:00:32 +00:00 · bc76dadb3c
parent 6713670b17
commit bc76dadb3c
31 changed files with 3454 additions and 4053 deletions
--- a/llvm/include/llvm/CodeGen/Passes.h
+++ b/llvm/include/llvm/CodeGen/Passes.h
@ -463,6 +463,9 @@ namespace llvm {
  /// Create Hardware Loop pass. \see HardwareLoops.cpp
  FunctionPass *createHardwareLoopsPass();

+  /// Create IR Type Promotion pass. \see TypePromotion.cpp
+  FunctionPass *createTypePromotionPass();
+
 } // End llvm namespace

 #endif
--- a/llvm/include/llvm/InitializePasses.h
+++ b/llvm/include/llvm/InitializePasses.h
@ -408,6 +408,7 @@ void initializeTargetTransformInfoWrapperPassPass(PassRegistry&);
 void initializeThreadSanitizerLegacyPassPass(PassRegistry&);
 void initializeTwoAddressInstructionPassPass(PassRegistry&);
 void initializeTypeBasedAAWrapperPassPass(PassRegistry&);
+void initializeTypePromotionPass(PassRegistry&);
 void initializeUnifyFunctionExitNodesPass(PassRegistry&);
 void initializeUnpackMachineBundlesPass(PassRegistry&);
 void initializeUnreachableBlockElimLegacyPassPass(PassRegistry&);
--- a/llvm/lib/CodeGen/CMakeLists.txt
+++ b/llvm/lib/CodeGen/CMakeLists.txt
@ -163,6 +163,7 @@ add_llvm_component_library(LLVMCodeGen
  TargetRegisterInfo.cpp
  TargetSchedule.cpp
  TargetSubtargetInfo.cpp
+  TypePromotion.cpp
  TwoAddressInstructionPass.cpp
  UnreachableBlockElim.cpp
  ValueTypes.cpp
--- a/llvm/lib/CodeGen/CodeGen.cpp
+++ b/llvm/lib/CodeGen/CodeGen.cpp
@ -105,6 +105,7 @@ void llvm::initializeCodeGen(PassRegistry &Registry) {
  initializeTailDuplicatePass(Registry);
  initializeTargetPassConfigPass(Registry);
  initializeTwoAddressInstructionPassPass(Registry);
+  initializeTypePromotionPass(Registry);
  initializeUnpackMachineBundlesPass(Registry);
  initializeUnreachableBlockElimLegacyPassPass(Registry);
  initializeUnreachableMachineBlockElimPass(Registry);
--- a/llvm/lib/Target/ARM/ARMCodeGenPrepare.cpp
+++ b/llvm/lib/Target/ARM/ARMCodeGenPrepare.cpp
@ -1,4 +1,4 @@
-//===----- ARMCodeGenPrepare.cpp ------------------------------------------===//
+//===----- TypePromotion.cpp ----------------------------------------------===//
 //
 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
 // See https://llvm.org/LICENSE.txt for license information.
@ -7,23 +7,25 @@
 //===----------------------------------------------------------------------===//
 //
 /// \file
-/// This pass inserts intrinsics to handle small types that would otherwise be
-/// promoted during legalization. Here we can manually promote types or insert
-/// intrinsics which can handle narrow types that aren't supported by the
-/// register classes.
-//
+/// This is an opcode based type promotion pass for small types that would
+/// otherwise be promoted during legalisation. This works around the limitations
+/// of selection dag for cyclic regions. The search begins from icmp
+/// instructions operands where a tree, consisting of non-wrapping or safe
+/// wrapping instructions, is built, checked and promoted if possible.
+///
 //===----------------------------------------------------------------------===//

-#include "ARM.h"
-#include "ARMSubtarget.h"
-#include "ARMTargetMachine.h"
+#include "llvm/ADT/SetVector.h"
 #include "llvm/ADT/StringRef.h"
 #include "llvm/CodeGen/Passes.h"
+#include "llvm/CodeGen/TargetLowering.h"
 #include "llvm/CodeGen/TargetPassConfig.h"
+#include "llvm/CodeGen/TargetSubtargetInfo.h"
 #include "llvm/IR/Attributes.h"
 #include "llvm/IR/BasicBlock.h"
 #include "llvm/IR/IRBuilder.h"
 #include "llvm/IR/Constants.h"
+#include "llvm/IR/DataLayout.h"
 #include "llvm/IR/InstrTypes.h"
 #include "llvm/IR/Instruction.h"
 #include "llvm/IR/Instructions.h"
@ -32,26 +34,19 @@
 #include "llvm/IR/Type.h"
 #include "llvm/IR/Value.h"
 #include "llvm/IR/Verifier.h"
+#include "llvm/InitializePasses.h"
 #include "llvm/Pass.h"
 #include "llvm/Support/Casting.h"
 #include "llvm/Support/CommandLine.h"

-#define DEBUG_TYPE "arm-codegenprepare"
+#define DEBUG_TYPE "type-promotion"
+#define PASS_NAME "Type Promotion"

 using namespace llvm;

 static cl::opt<bool>
-DisableCGP("arm-disable-cgp", cl::Hidden, cl::init(true),
-           cl::desc("Disable ARM specific CodeGenPrepare pass"));
-
-static cl::opt<bool>
-EnableDSP("arm-enable-scalar-dsp", cl::Hidden, cl::init(false),
-          cl::desc("Use DSP instructions for scalar operations"));
-
-static cl::opt<bool>
-EnableDSPWithImms("arm-enable-scalar-dsp-imms", cl::Hidden, cl::init(false),
-                   cl::desc("Use DSP instructions for scalar operations\
-                            with immediate operands"));
+DisablePromotion("disable-type-promotion", cl::Hidden, cl::init(true),
+                 cl::desc("Disable type promotion pass"));

 // The goal of this pass is to enable more efficient code generation for
 // operations on narrow types (i.e. types with < 32-bits) and this is a
@ -111,7 +106,6 @@ class IRPromoter {
  SmallPtrSet<Instruction*, 4> InstsToRemove;
  DenseMap<Value*, SmallVector<Type*, 4>> TruncTysMap;
  SmallPtrSet<Value*, 8> Promoted;
-  Module *M = nullptr;
  LLVMContext &Ctx;
  // The type we promote to: always i32
  IntegerType *ExtTy = nullptr;
@ -134,11 +128,10 @@ class IRPromoter {
  void Cleanup(void);

 public:
-  IRPromoter(Module *M) : M(M), Ctx(M->getContext()),
-                          ExtTy(Type::getInt32Ty(Ctx)) { }
+  IRPromoter(Module *M) : Ctx(M->getContext()) { }


-  void Mutate(Type *OrigTy,
+  void Mutate(Type *OrigTy, unsigned PromotedWidth,
              SetVector<Value*> &Visited,
              SmallPtrSetImpl<Value*> &Sources,
              SmallPtrSetImpl<Instruction*> &Sinks,
@ -146,30 +139,29 @@ public:
              SmallPtrSetImpl<Instruction*> &SafeWrap);
 };

-class ARMCodeGenPrepare : public FunctionPass {
-  const ARMSubtarget *ST = nullptr;
+class TypePromotion : public FunctionPass {
  IRPromoter *Promoter = nullptr;
-  std::set<Value*> AllVisited;
+  SmallPtrSet<Value*, 16> AllVisited;
  SmallPtrSet<Instruction*, 8> SafeToPromote;
  SmallPtrSet<Instruction*, 4> SafeWrap;

  bool isSafeWrap(Instruction *I);
  bool isSupportedValue(Value *V);
  bool isLegalToPromote(Value *V);
-  bool TryToPromote(Value *V);
+  bool TryToPromote(Value *V, unsigned PromotedWidth);

 public:
  static char ID;
  static unsigned TypeSize;
  Type *OrigTy = nullptr;

-  ARMCodeGenPrepare() : FunctionPass(ID) {}
+  TypePromotion() : FunctionPass(ID) {}

  void getAnalysisUsage(AnalysisUsage &AU) const override {
    AU.addRequired<TargetPassConfig>();
  }

-  StringRef getPassName() const override { return "ARM IR optimizations"; }
+  StringRef getPassName() const override { return PASS_NAME; }

  bool doInitialization(Module &M) override;
  bool runOnFunction(Function &F) override;
@ -188,19 +180,19 @@ static bool GenerateSignBits(Value *V) {
 }

 static bool EqualTypeSize(Value *V) {
-  return V->getType()->getScalarSizeInBits() == ARMCodeGenPrepare::TypeSize;
+  return V->getType()->getScalarSizeInBits() == TypePromotion::TypeSize;
 }

 static bool LessOrEqualTypeSize(Value *V) {
-  return V->getType()->getScalarSizeInBits() <= ARMCodeGenPrepare::TypeSize;
+  return V->getType()->getScalarSizeInBits() <= TypePromotion::TypeSize;
 }

 static bool GreaterThanTypeSize(Value *V) {
-  return V->getType()->getScalarSizeInBits() > ARMCodeGenPrepare::TypeSize;
+  return V->getType()->getScalarSizeInBits() > TypePromotion::TypeSize;
 }

 static bool LessThanTypeSize(Value *V) {
-  return V->getType()->getScalarSizeInBits() < ARMCodeGenPrepare::TypeSize;
+  return V->getType()->getScalarSizeInBits() < TypePromotion::TypeSize;
 }

 /// Some instructions can use 8- and 16-bit operands, and we don't need to
@ -278,7 +270,7 @@ static bool isSink(Value *V) {
 }

 /// Return whether this instruction can safely wrap.
-bool ARMCodeGenPrepare::isSafeWrap(Instruction *I) {
+bool TypePromotion::isSafeWrap(Instruction *I) {
  // We can support a, potentially, wrapping instruction (I) if:
  // - It is only used by an unsigned icmp.
  // - The icmp uses a constant.
@ -374,7 +366,7 @@ bool ARMCodeGenPrepare::isSafeWrap(Instruction *I) {
  Total += OverflowConst->getValue().getBitWidth() < 32 ?
    OverflowConst->getValue().abs().zext(32) : OverflowConst->getValue().abs();

-  APInt Max = APInt::getAllOnesValue(ARMCodeGenPrepare::TypeSize);
+  APInt Max = APInt::getAllOnesValue(TypePromotion::TypeSize);

  if (Total.getBitWidth() > Max.getBitWidth()) {
    if (Total.ugt(Max.zext(Total.getBitWidth())))
@ -385,7 +377,7 @@ bool ARMCodeGenPrepare::isSafeWrap(Instruction *I) {
  } else if (Total.ugt(Max))
    return false;

-  LLVM_DEBUG(dbgs() << "ARM CGP: Allowing safe overflow for " << *I << "\n");
+  LLVM_DEBUG(dbgs() << "IR Promotion: Allowing safe overflow for " << *I << "\n");
  SafeWrap.insert(I);
  return true;
 }
@ -422,32 +414,12 @@ static bool isPromotedResultSafe(Value *V) {
  return cast<Instruction>(V)->hasNoUnsignedWrap();
 }

-/// Return the intrinsic for the instruction that can perform the same
-/// operation but on a narrow type. This is using the parallel dsp intrinsics
-/// on scalar values.
-static Intrinsic::ID getNarrowIntrinsic(Instruction *I) {
-  // Whether we use the signed or unsigned versions of these intrinsics
-  // doesn't matter because we're not using the GE bits that they set in
-  // the APSR.
-  switch(I->getOpcode()) {
-  default:
-    break;
-  case Instruction::Add:
-    return ARMCodeGenPrepare::TypeSize == 16 ? Intrinsic::arm_uadd16 :
-      Intrinsic::arm_uadd8;
-  case Instruction::Sub:
-    return ARMCodeGenPrepare::TypeSize == 16 ? Intrinsic::arm_usub16 :
-      Intrinsic::arm_usub8;
-  }
-  llvm_unreachable("unhandled opcode for narrow intrinsic");
-}
-
 void IRPromoter::ReplaceAllUsersOfWith(Value *From, Value *To) {
  SmallVector<Instruction*, 4> Users;
  Instruction *InstTo = dyn_cast<Instruction>(To);
  bool ReplacedAll = true;

-  LLVM_DEBUG(dbgs() << "ARM CGP: Replacing " << *From << " with " << *To
+  LLVM_DEBUG(dbgs() << "IR Promotion: Replacing " << *From << " with " << *To
             << "\n");

  for (Use &U : From->uses()) {
@ -468,7 +440,7 @@ void IRPromoter::ReplaceAllUsersOfWith(Value *From, Value *To) {
 }

 void IRPromoter::PrepareWrappingAdds() {
-  LLVM_DEBUG(dbgs() << "ARM CGP: Prepare underflowing adds.\n");
+  LLVM_DEBUG(dbgs() << "IR Promotion: Prepare wrapping adds.\n");
  IRBuilder<> Builder{Ctx};

  // For adds that safely wrap and use a negative immediate as operand 1, we
@ -479,7 +451,7 @@ void IRPromoter::PrepareWrappingAdds() {
    if (I->getOpcode() != Instruction::Add)
      continue;

-    LLVM_DEBUG(dbgs() << "ARM CGP: Adjusting " << *I << "\n");
+    LLVM_DEBUG(dbgs() << "IR Promotion: Adjusting " << *I << "\n");
    assert((isa<ConstantInt>(I->getOperand(1)) &&
            cast<ConstantInt>(I->getOperand(1))->isNegative()) &&
           "Wrapping should have a negative immediate as the second operand");
@ -494,7 +466,7 @@ void IRPromoter::PrepareWrappingAdds() {
    }
    InstsToRemove.insert(I);
    I->replaceAllUsesWith(NewVal);
-    LLVM_DEBUG(dbgs() << "ARM CGP: New equivalent: " << *NewVal << "\n");
+    LLVM_DEBUG(dbgs() << "IR Promotion: New equivalent: " << *NewVal << "\n");
  }
  for (auto *I : NewInsts)
    Visited->insert(I);
@ -505,7 +477,7 @@ void IRPromoter::ExtendSources() {

  auto InsertZExt = [&](Value *V, Instruction *InsertPt) {
    assert(V->getType() != ExtTy && "zext already extends to i32");
-    LLVM_DEBUG(dbgs() << "ARM CGP: Inserting ZExt for " << *V << "\n");
+    LLVM_DEBUG(dbgs() << "IR Promotion: Inserting ZExt for " << *V << "\n");
    Builder.SetInsertPoint(InsertPt);
    if (auto *I = dyn_cast<Instruction>(V))
      Builder.SetCurrentDebugLocation(I->getDebugLoc());
@ -523,7 +495,7 @@ void IRPromoter::ExtendSources() {
  };

  // Now, insert extending instructions between the sources and their users.
-  LLVM_DEBUG(dbgs() << "ARM CGP: Promoting sources:\n");
+  LLVM_DEBUG(dbgs() << "IR Promotion: Promoting sources:\n");
  for (auto V : *Sources) {
    LLVM_DEBUG(dbgs() << " - " << *V << "\n");
    if (auto *I = dyn_cast<Instruction>(V))
@ -539,7 +511,7 @@ void IRPromoter::ExtendSources() {
 }

 void IRPromoter::PromoteTree() {
-  LLVM_DEBUG(dbgs() << "ARM CGP: Mutating the tree..\n");
+  LLVM_DEBUG(dbgs() << "IR Promotion: Mutating the tree..\n");

  IRBuilder<> Builder{Ctx};

@ -570,38 +542,10 @@ void IRPromoter::PromoteTree() {
      Promoted.insert(I);
    }
  }
-
-  // Finally, any instructions that should be promoted but haven't yet been,
-  // need to be handled using intrinsics.
-  for (auto *V : *Visited) {
-    auto *I = dyn_cast<Instruction>(V);
-    if (!I)
-      continue;
-
-    if (Sources->count(I) || Sinks->count(I))
-      continue;
-
-    if (!shouldPromote(I) || SafeToPromote->count(I) || NewInsts.count(I))
-      continue;
-
-    assert(EnableDSP && "DSP intrinisc insertion not enabled!");
-
-    // Replace unsafe instructions with appropriate intrinsic calls.
-    LLVM_DEBUG(dbgs() << "ARM CGP: Inserting DSP intrinsic for "
-               << *I << "\n");
-    Function *DSPInst =
-      Intrinsic::getDeclaration(M, getNarrowIntrinsic(I));
-    Builder.SetInsertPoint(I);
-    Builder.SetCurrentDebugLocation(I->getDebugLoc());
-    Value *Args[] = { I->getOperand(0), I->getOperand(1) };
-    CallInst *Call = Builder.CreateCall(DSPInst, Args);
-    NewInsts.insert(Call);
-    ReplaceAllUsersOfWith(I, Call);
-  }
 }

 void IRPromoter::TruncateSinks() {
-  LLVM_DEBUG(dbgs() << "ARM CGP: Fixing up the sinks:\n");
+  LLVM_DEBUG(dbgs() << "IR Promotion: Fixing up the sinks:\n");

  IRBuilder<> Builder{Ctx};

@ -612,7 +556,7 @@ void IRPromoter::TruncateSinks() {
    if ((!Promoted.count(V) && !NewInsts.count(V)) || Sources->count(V))
      return nullptr;

-    LLVM_DEBUG(dbgs() << "ARM CGP: Creating " << *TruncTy << " Trunc for "
+    LLVM_DEBUG(dbgs() << "IR Promotion: Creating " << *TruncTy << " Trunc for "
               << *V << "\n");
    Builder.SetInsertPoint(cast<Instruction>(V));
    auto *Trunc = dyn_cast<Instruction>(Builder.CreateTrunc(V, TruncTy));
@ -624,7 +568,7 @@ void IRPromoter::TruncateSinks() {
  // Fix up any stores or returns that use the results of the promoted
  // chain.
  for (auto I : *Sinks) {
-    LLVM_DEBUG(dbgs() << "ARM CGP: For Sink: " << *I << "\n");
+    LLVM_DEBUG(dbgs() << "IR Promotion: For Sink: " << *I << "\n");

    // Handle calls separately as we need to iterate over arg operands.
    if (auto *Call = dyn_cast<CallInst>(I)) {
@ -661,7 +605,7 @@ void IRPromoter::TruncateSinks() {
 }

 void IRPromoter::Cleanup() {
-  LLVM_DEBUG(dbgs() << "ARM CGP: Cleanup..\n");
+  LLVM_DEBUG(dbgs() << "IR Promotion: Cleanup..\n");
  // Some zexts will now have become redundant, along with their trunc
  // operands, so remove them
  for (auto V : *Visited) {
@ -674,7 +618,7 @@ void IRPromoter::Cleanup() {

    Value *Src = ZExt->getOperand(0);
    if (ZExt->getSrcTy() == ZExt->getDestTy()) {
-      LLVM_DEBUG(dbgs() << "ARM CGP: Removing unnecessary cast: " << *ZExt
+      LLVM_DEBUG(dbgs() << "IR Promotion: Removing unnecessary cast: " << *ZExt
                 << "\n");
      ReplaceAllUsersOfWith(ZExt, Src);
      continue;
@ -693,7 +637,7 @@ void IRPromoter::Cleanup() {
  }

  for (auto *I : InstsToRemove) {
-    LLVM_DEBUG(dbgs() << "ARM CGP: Removing " << *I << "\n");
+    LLVM_DEBUG(dbgs() << "IR Promotion: Removing " << *I << "\n");
    I->dropAllReferences();
    I->eraseFromParent();
  }
@ -707,7 +651,7 @@ void IRPromoter::Cleanup() {
 }

 void IRPromoter::ConvertTruncs() {
-  LLVM_DEBUG(dbgs() << "ARM CGP: Converting truncs..\n");
+  LLVM_DEBUG(dbgs() << "IR Promotion: Converting truncs..\n");
  IRBuilder<> Builder{Ctx};

  for (auto *V : *Visited) {
@ -731,17 +675,18 @@ void IRPromoter::ConvertTruncs() {
  }
 }

-void IRPromoter::Mutate(Type *OrigTy,
+void IRPromoter::Mutate(Type *OrigTy, unsigned PromotedWidth,
                        SetVector<Value*> &Visited,
                        SmallPtrSetImpl<Value*> &Sources,
                        SmallPtrSetImpl<Instruction*> &Sinks,
                        SmallPtrSetImpl<Instruction*> &SafeToPromote,
                        SmallPtrSetImpl<Instruction*> &SafeWrap) {
-  LLVM_DEBUG(dbgs() << "ARM CGP: Promoting use-def chains to from "
-             << ARMCodeGenPrepare::TypeSize << " to 32-bits\n");
+  LLVM_DEBUG(dbgs() << "IR Promotion: Promoting use-def chains to from "
+             << TypePromotion::TypeSize << " to 32-bits\n");

  assert(isa<IntegerType>(OrigTy) && "expected integer type");
  this->OrigTy = cast<IntegerType>(OrigTy);
+  ExtTy = IntegerType::get(Ctx, PromotedWidth);
  assert(OrigTy->getPrimitiveSizeInBits() < ExtTy->getPrimitiveSizeInBits() &&
         "original type not smaller than extended type");

@ -779,9 +724,7 @@ void IRPromoter::Mutate(Type *OrigTy,
  // Insert zext instructions between sources and their users.
  ExtendSources();

-  // Promote visited instructions, mutating their types in place. Also insert
-  // DSP intrinsics, if enabled, for adds and subs which would be unsafe to
-  // promote.
+  // Promote visited instructions, mutating their types in place.
  PromoteTree();

  // Convert any truncs, that aren't sources, into AND masks.
@ -794,14 +737,14 @@ void IRPromoter::Mutate(Type *OrigTy,
  // clear the data structures.
  Cleanup();

-  LLVM_DEBUG(dbgs() << "ARM CGP: Mutation complete\n");
+  LLVM_DEBUG(dbgs() << "IR Promotion: Mutation complete\n");
 }

 /// We accept most instructions, as well as Arguments and ConstantInsts. We
 /// Disallow casts other than zext and truncs and only allow calls if their
 /// return value is zeroext. We don't allow opcodes that can introduce sign
 /// bits.
-bool ARMCodeGenPrepare::isSupportedValue(Value *V) {
+bool TypePromotion::isSupportedValue(Value *V) {
  if (auto *I = dyn_cast<Instruction>(V)) {
    switch (I->getOpcode()) {
    default:
@ -849,7 +792,7 @@ bool ARMCodeGenPrepare::isSupportedValue(Value *V) {
 /// Check that the type of V would be promoted and that the original type is
 /// smaller than the targeted promoted type. Check that we're not trying to
 /// promote something larger than our base 'TypeSize' type.
-bool ARMCodeGenPrepare::isLegalToPromote(Value *V) {
+bool TypePromotion::isLegalToPromote(Value *V) {

  auto *I = dyn_cast<Instruction>(V);
  if (!I)
@ -862,47 +805,20 @@ bool ARMCodeGenPrepare::isLegalToPromote(Value *V) {
    SafeToPromote.insert(I);
    return true;
  }
-
-  if (I->getOpcode() != Instruction::Add && I->getOpcode() != Instruction::Sub)
-    return false;
-
-  // If promotion is not safe, can we use a DSP instruction to natively
-  // handle the narrow type?
-  if (!ST->hasDSP() || !EnableDSP || !isSupportedType(I))
-    return false;
-
-  if (ST->isThumb() && !ST->hasThumb2())
-    return false;
-
-  // TODO
-  // Would it be profitable? For Thumb code, these parallel DSP instructions
-  // are only Thumb-2, so we wouldn't be able to dual issue on Cortex-M33. For
-  // Cortex-A, specifically Cortex-A72, the latency is double and throughput is
-  // halved. They also do not take immediates as operands.
-  for (auto &Op : I->operands()) {
-    if (isa<Constant>(Op)) {
-      if (!EnableDSPWithImms)
-        return false;
-    }
-  }
-  LLVM_DEBUG(dbgs() << "ARM CGP: Will use an intrinsic for: " << *I << "\n");
-  return true;
+  return false;
 }

-bool ARMCodeGenPrepare::TryToPromote(Value *V) {
+bool TypePromotion::TryToPromote(Value *V, unsigned PromotedWidth) {
  OrigTy = V->getType();
  TypeSize = OrigTy->getPrimitiveSizeInBits();
-  if (TypeSize > 16 || TypeSize < 8)
-    return false;
-
  SafeToPromote.clear();
  SafeWrap.clear();

  if (!isSupportedValue(V) || !shouldPromote(V) || !isLegalToPromote(V))
    return false;

-  LLVM_DEBUG(dbgs() << "ARM CGP: TryToPromote: " << *V << ", TypeSize = "
-             << TypeSize << "\n");
+  LLVM_DEBUG(dbgs() << "IR Promotion: TryToPromote: " << *V << ", from "
+             << TypeSize << " bits to " << PromotedWidth << "\n");

  SetVector<Value*> WorkList;
  SmallPtrSet<Value*, 8> Sources;
@ -923,7 +839,7 @@ bool ARMCodeGenPrepare::TryToPromote(Value *V) {
      return true;

    if (!isSupportedValue(V) || (shouldPromote(V) && !isLegalToPromote(V))) {
-      LLVM_DEBUG(dbgs() << "ARM CGP: Can't handle: " << *V << "\n");
+      LLVM_DEBUG(dbgs() << "IR Promotion: Can't handle: " << *V << "\n");
      return false;
    }

@ -979,7 +895,7 @@ bool ARMCodeGenPrepare::TryToPromote(Value *V) {
    }
  }

-  LLVM_DEBUG(dbgs() << "ARM CGP: Visited nodes:\n";
+  LLVM_DEBUG(dbgs() << "IR Promotion: Visited nodes:\n";
             for (auto *I : CurrentVisited)
               I->dump();
             );
@ -995,28 +911,31 @@ bool ARMCodeGenPrepare::TryToPromote(Value *V) {
  if (ToPromote < 2)
    return false;

-  Promoter->Mutate(OrigTy, CurrentVisited, Sources, Sinks, SafeToPromote,
-                   SafeWrap);
+  Promoter->Mutate(OrigTy, PromotedWidth, CurrentVisited, Sources, Sinks,
+                   SafeToPromote, SafeWrap);
  return true;
 }

-bool ARMCodeGenPrepare::doInitialization(Module &M) {
+bool TypePromotion::doInitialization(Module &M) {
  Promoter = new IRPromoter(&M);
  return false;
 }

-bool ARMCodeGenPrepare::runOnFunction(Function &F) {
-  if (skipFunction(F) || DisableCGP)
+bool TypePromotion::runOnFunction(Function &F) {
+  if (skipFunction(F) || DisablePromotion)
    return false;

-  auto *TPC = &getAnalysis<TargetPassConfig>();
+  LLVM_DEBUG(dbgs() << "IR Promotion: Running on " << F.getName() << "\n");
+
+  auto *TPC = getAnalysisIfAvailable<TargetPassConfig>();
  if (!TPC)
    return false;

-  const TargetMachine &TM = TPC->getTM<TargetMachine>();
-  ST = &TM.getSubtarget<ARMSubtarget>(F);
  bool MadeChange = false;
-  LLVM_DEBUG(dbgs() << "ARM CGP: Running on " << F.getName() << "\n");
+  const DataLayout &DL = F.getParent()->getDataLayout();
+  const TargetMachine &TM = TPC->getTM<TargetMachine>();
+  const TargetSubtargetInfo *SubtargetInfo = TM.getSubtargetImpl(F);
+  const TargetLowering *TLI = SubtargetInfo->getTargetLowering();

  // Search up from icmps to try to promote their operands.
  for (BasicBlock &BB : F) {
@ -1025,18 +944,30 @@ bool ARMCodeGenPrepare::runOnFunction(Function &F) {
      if (AllVisited.count(&I))
        continue;

-      if (isa<ICmpInst>(I)) {
-        auto &CI = cast<ICmpInst>(I);
+      if (!isa<ICmpInst>(&I))
+        continue;

-        // Skip signed or pointer compares
-        if (CI.isSigned() || !isa<IntegerType>(CI.getOperand(0)->getType()))
-          continue;
+      auto *ICmp = cast<ICmpInst>(&I);
+      // Skip signed or pointer compares
+      if (ICmp->isSigned() ||
+          !isa<IntegerType>(ICmp->getOperand(0)->getType()))
+        continue;

-        LLVM_DEBUG(dbgs() << "ARM CGP: Searching from: " << CI << "\n");
+      LLVM_DEBUG(dbgs() << "IR Promotion: Searching from: " << *ICmp << "\n");

-        for (auto &Op : CI.operands()) {
-          if (auto *I = dyn_cast<Instruction>(Op))
-            MadeChange |= TryToPromote(I);
+      for (auto &Op : ICmp->operands()) {
+        if (auto *I = dyn_cast<Instruction>(Op)) {
+          EVT SrcVT = TLI->getValueType(DL, I->getType());
+          if (SrcVT.isSimple() && TLI->isTypeLegal(SrcVT.getSimpleVT()))
+            break;
+
+          if (TLI->getTypeAction(ICmp->getContext(), SrcVT) !=
+              TargetLowering::TypePromoteInteger)
+            break;
+
+          EVT PromotedVT = TLI->getTypeToTransformTo(ICmp->getContext(), SrcVT);
+          MadeChange |= TryToPromote(I, PromotedVT.getSizeInBits());
+          break;
        }
      }
    }
@ -1046,24 +977,22 @@ bool ARMCodeGenPrepare::runOnFunction(Function &F) {
               });
  }
  if (MadeChange)
-    LLVM_DEBUG(dbgs() << "After ARMCodeGenPrepare: " << F << "\n");
+    LLVM_DEBUG(dbgs() << "After TypePromotion: " << F << "\n");

  return MadeChange;
 }

-bool ARMCodeGenPrepare::doFinalization(Module &M) {
+bool TypePromotion::doFinalization(Module &M) {
  delete Promoter;
  return false;
 }

-INITIALIZE_PASS_BEGIN(ARMCodeGenPrepare, DEBUG_TYPE,
-                      "ARM IR optimizations", false, false)
-INITIALIZE_PASS_END(ARMCodeGenPrepare, DEBUG_TYPE, "ARM IR optimizations",
-                    false, false)
+INITIALIZE_PASS_BEGIN(TypePromotion, DEBUG_TYPE, PASS_NAME, false, false)
+INITIALIZE_PASS_END(TypePromotion, DEBUG_TYPE, PASS_NAME, false, false)

-char ARMCodeGenPrepare::ID = 0;
-unsigned ARMCodeGenPrepare::TypeSize = 0;
+char TypePromotion::ID = 0;
+unsigned TypePromotion::TypeSize = 0;

-FunctionPass *llvm::createARMCodeGenPreparePass() {
-  return new ARMCodeGenPrepare();
+FunctionPass *llvm::createTypePromotionPass() {
+  return new TypePromotion();
 }
--- a/llvm/lib/Target/ARM/ARM.h
+++ b/llvm/lib/Target/ARM/ARM.h
@ -43,7 +43,6 @@ FunctionPass *createARMISelDag(ARMBaseTargetMachine &TM,
 FunctionPass *createA15SDOptimizerPass();
 FunctionPass *createARMLoadStoreOptimizationPass(bool PreAlloc = false);
 FunctionPass *createARMExpandPseudoPass();
-FunctionPass *createARMCodeGenPreparePass();
 FunctionPass *createARMConstantIslandPass();
 FunctionPass *createMLxExpansionPass();
 FunctionPass *createThumb2ITBlockPass();
@ -61,7 +60,6 @@ void LowerARMMachineInstrToMCInst(const MachineInstr *MI, MCInst &OutMI,
 void initializeARMParallelDSPPass(PassRegistry &);
 void initializeARMLoadStoreOptPass(PassRegistry &);
 void initializeARMPreAllocLoadStoreOptPass(PassRegistry &);
-void initializeARMCodeGenPreparePass(PassRegistry &);
 void initializeARMConstantIslandsPass(PassRegistry &);
 void initializeARMExpandPseudoPass(PassRegistry &);
 void initializeThumb2SizeReducePass(PassRegistry &);
--- a/llvm/lib/Target/ARM/ARMTargetMachine.cpp
+++ b/llvm/lib/Target/ARM/ARMTargetMachine.cpp
@ -91,7 +91,6 @@ extern "C" void LLVMInitializeARMTarget() {
  initializeARMLoadStoreOptPass(Registry);
  initializeARMPreAllocLoadStoreOptPass(Registry);
  initializeARMParallelDSPPass(Registry);
-  initializeARMCodeGenPreparePass(Registry);
  initializeARMConstantIslandsPass(Registry);
  initializeARMExecutionDomainFixPass(Registry);
  initializeARMExpandPseudoPass(Registry);
@ -422,7 +421,7 @@ void ARMPassConfig::addIRPasses() {

 void ARMPassConfig::addCodeGenPrepare() {
  if (getOptLevel() != CodeGenOpt::None)
-    addPass(createARMCodeGenPreparePass());
+    addPass(createTypePromotionPass());
  TargetPassConfig::addCodeGenPrepare();
 }

--- a/llvm/lib/Target/ARM/CMakeLists.txt
+++ b/llvm/lib/Target/ARM/CMakeLists.txt
@ -25,7 +25,6 @@ add_llvm_target(ARMCodeGen
  ARMBasicBlockInfo.cpp
  ARMCallingConv.cpp
  ARMCallLowering.cpp
-  ARMCodeGenPrepare.cpp
  ARMConstantIslandPass.cpp
  ARMConstantPoolValue.cpp
  ARMExpandPseudoInsts.cpp
--- a/llvm/test/CodeGen/ARM/CGP/arm-cgp-calls.ll
+++ b/llvm/test/CodeGen/ARM/CGP/arm-cgp-calls.ll
@ -1,230 +0,0 @@
-; RUN: llc -mtriple=thumbv8 -arm-disable-cgp=false %s -o - | FileCheck %s
-; RUN: llc -mtriple=armv8 -arm-disable-cgp=false %s -o - | FileCheck %s
-
-; Check that the pass doesn't try to promote the immediate parameters.
-; CHECK-LABEL: call_with_imms
-; CHECK-NOT:   uxt
-define i8 @call_with_imms(i8* %arg) {
-  %call = tail call arm_aapcs_vfpcc zeroext i8 @dummy2(i8* nonnull %arg, i8 zeroext 0, i8 zeroext 0)
-  %cmp = icmp eq i8 %call, 0
-  %res = select i1 %cmp, i8 %call, i8 1
-  ret i8 %res
-}
-
-; Test that the call result is still extended.
-; CHECK-LABEL: test_call:
-; CHECK: bl
-; CHECK-NEXT: sxtb r1, r0
-define i16 @test_call(i8 zeroext %arg) {
-  %call = call i8 @dummy_i8(i8 %arg)
-  %cmp = icmp ult i8 %call, 128
-  %conv = zext i1 %cmp to i16
-  ret i16 %conv
-}
-
-; CHECK-LABEL: promote_i8_sink_i16_1
-; CHECK: bl dummy_i8
-; CHECK: add{{.*}} r0, #1
-; CHECK-NOT: uxt
-; CHECK: cmp r0
-define i16 @promote_i8_sink_i16_1(i8 zeroext %arg0, i16 zeroext %arg1, i16 zeroext %arg2) {
-  %call = tail call zeroext i8 @dummy_i8(i8 %arg0)
-  %add = add nuw i8 %call, 1
-  %conv = zext i8 %add to i16
-  %cmp = icmp ne i16 %conv, %arg1
-  %sel = select i1 %cmp, i16 %arg1, i16 %arg2
-  %res = tail call zeroext i16 @dummy3(i16 %sel)
-  ret i16 %res
-}
-
-; CHECK-LABEL: promote_i8_sink_i16_2
-; CHECK: bl dummy_i8
-; CHECK: add{{.*}} r0, #1
-; CHECK-NOT: uxt
-; CHECK: cmp r0
-define i16 @promote_i8_sink_i16_2(i8 zeroext %arg0, i8 zeroext %arg1, i16 zeroext %arg2) {
-  %call = tail call zeroext i8 @dummy_i8(i8 %arg0)
-  %add = add nuw i8 %call, 1
-  %cmp = icmp ne i8 %add, %arg1
-  %conv = zext i8 %arg1 to i16
-  %sel = select i1 %cmp, i16 %conv, i16 %arg2
-  %res = tail call zeroext i16 @dummy3(i16 %sel)
-  ret i16 %res
-}
-
-@uc = global i8 42, align 1
-@LL = global i64 0, align 8
-
-; CHECK-LABEL: zext_i64
-; CHECK: ldrb
-; CHECK: strd
-define void @zext_i64() {
-entry:
-  %0 = load i8, i8* @uc, align 1
-  %conv = zext i8 %0 to i64
-  store i64 %conv, i64* @LL, align 8
-  %cmp = icmp eq i8 %0, 42
-  %conv1 = zext i1 %cmp to i32
-  %call = tail call i32 bitcast (i32 (...)* @assert to i32 (i32)*)(i32 %conv1)
-  ret void
-}
-
-@a = global i16* null, align 4
-@b = global i32 0, align 4
-
-; CHECK-LABEL: constexpr
-; CHECK: uxth
-define i32 @constexpr() {
-entry:
-  store i32 ptrtoint (i32* @b to i32), i32* @b, align 4
-  %0 = load i16*, i16** @a, align 4
-  %1 = load i16, i16* %0, align 2
-  %or = or i16 %1, ptrtoint (i32* @b to i16)
-  store i16 %or, i16* %0, align 2
-  %cmp = icmp ne i16 %or, 4
-  %conv3 = zext i1 %cmp to i32
-  %call = tail call i32 bitcast (i32 (...)* @e to i32 (i32)*)(i32 %conv3) #2
-  ret i32 undef
-}
-
-; The call to safe_lshift_func takes two parameters, but they're the same value
-; just one is zext. We do support zext now, so the transformation should
-; trigger and we don't want see uxtb here.
-; CHECK-LABEL: call_zext_i8_i32
-; CHECK-NOT: uxt
-define fastcc i32 @call_zext_i8_i32(i32 %p_45, i8 zeroext %p_46) {
-for.cond8.preheader:
-  %call217 = call fastcc zeroext i8 @safe_mul_func_uint8_t_u_u(i8 zeroext undef)
-  %tobool219 = icmp eq i8 %call217, 0
-  br i1 %tobool219, label %for.end411, label %for.cond273.preheader
-
-for.cond273.preheader:                            ; preds = %for.cond8.preheader
-  %call217.lcssa = phi i8 [ %call217, %for.cond8.preheader ]
-  %conv218.le = zext i8 %call217.lcssa to i32
-  %call346 = call fastcc zeroext i8 @safe_lshift_func(i8 zeroext %call217.lcssa, i32 %conv218.le)
-  unreachable
-
-for.end411:                                       ; preds = %for.cond8.preheader
-  %call452 = call fastcc i64 @safe_sub_func_int64_t_s_s(i64 undef, i64 4)
-  unreachable
-}
-
-%struct.anon = type { i32 }
-
-@g_57 = hidden local_unnamed_addr global %struct.anon zeroinitializer, align 4
-@g_893 = hidden local_unnamed_addr global %struct.anon zeroinitializer, align 4
-@g_82 = hidden local_unnamed_addr global i32 0, align 4
-
-; Test that the transform bails on finding %conv4, a trunc
-; CHECK-LABEL: call_return_pointer
-; CHECK: sxth
-; CHECK: uxt
-define hidden i32 @call_return_pointer(i8 zeroext %p_13) local_unnamed_addr #0 {
-entry:
-  %conv1 = zext i8 %p_13 to i16
-  %call = tail call i16** @func_62(i8 zeroext undef, i32 undef, i16 signext %conv1, i32* undef)
-  %0 = load i32, i32* getelementptr inbounds (%struct.anon, %struct.anon* @g_893, i32 0, i32 0), align 4
-  %conv2 = trunc i32 %0 to i16
-  br label %for.cond
-
-for.cond:                                         ; preds = %for.cond.backedge, %entry
-  %p_13.addr.0 = phi i8 [ %p_13, %entry ], [ %p_13.addr.0.be, %for.cond.backedge ]
-  %tobool = icmp eq i8 %p_13.addr.0, 0
-  br i1 %tobool, label %for.cond.backedge, label %if.then
-
-for.cond.backedge:                                ; preds = %for.cond, %if.then
-  %p_13.addr.0.be = phi i8 [ %conv4, %if.then ], [ 0, %for.cond ]
-  br label %for.cond
-
-if.then:                                          ; preds = %for.cond
-  %call3 = tail call fastcc signext i16 @safe_sub_func_int16_t_s_s(i16 signext %conv2)
-  %conv4 = trunc i16 %call3 to i8
-  br label %for.cond.backedge
-}
-
-; Check that d.sroa.0.0.be is promoted passed directly into the tail call.
-; CHECK-LABEL: check_zext_phi_call_arg
-; CHECK-NOT: uxt
-define i32 @check_zext_phi_call_arg() {
-entry:
-  br label %for.cond
-
-for.cond:                                         ; preds = %for.cond.backedge, %entry
-  %d.sroa.0.0 = phi i16 [ 30, %entry ], [ %d.sroa.0.0.be, %for.cond.backedge ]
-  %tobool = icmp eq i16 %d.sroa.0.0, 0
-  br i1 %tobool, label %for.cond.backedge, label %if.then
-
-for.cond.backedge:                                ; preds = %for.cond, %if.then
-  %d.sroa.0.0.be = phi i16 [ %call, %if.then ], [ 0, %for.cond ]
-  br label %for.cond
-
-if.then:                                          ; preds = %for.cond
-  %d.sroa.0.0.insert.ext = zext i16 %d.sroa.0.0 to i32
-  %call = tail call zeroext i16 bitcast (i16 (...)* @f to i16 (i32)*)(i32 %d.sroa.0.0.insert.ext) #2
-  br label %for.cond.backedge
-}
-
-%struct.atomic_flag = type { i8 }
-
-; CHECK-LABEL: atomic_flag_test_and_set
-; CHECK-NOT: uxt
-define zeroext i1 @atomic_flag_test_and_set(%struct.atomic_flag* %object) {
-entry:
-  %_Value = getelementptr inbounds %struct.atomic_flag, %struct.atomic_flag* %object, i32 0, i32 0
-  %call = tail call arm_aapcscc zeroext i8 @__atomic_exchange_1(i8* %_Value, i8 zeroext 1, i32 5) #1
-  %0 = and i8 %call, 1
-  %tobool = icmp ne i8 %0, 0
-  ret i1 %tobool
-}
-
-; CHECK-LABEL: i1_zeroext_call
-; CHECK: uxt
-define i1 @i1_zeroext_call(i16* %ts, i32 %a, i16* %b, i8* %c) {
-entry:
-  %0 = load i16, i16* %ts, align 2
-  %conv.i860 = trunc i32 %a to i16
-  store i16 %conv.i860, i16* %b, align 2
-  %call.i848 = call zeroext i1 @i1_zeroext(i8* %c, i32 64, i16 zeroext %conv.i860)
-  br i1 %call.i848, label %if.then223, label %if.else227
-
-if.then223:
-  %cmp235 = icmp eq i16 %0, %conv.i860
-  br label %exit
-
-if.else227:
-  %cmp236 = icmp ult i16 %0, %conv.i860
-  br label %exit
-
-exit:
-  %retval = phi i1 [ %cmp235, %if.then223 ], [ %cmp236, %if.else227 ]
-  ret i1 %retval
-}
-
-; CHECK-LABEL: promote_arg_pass_to_call
-; CHECK: uxtb
-define i16 @promote_arg_pass_to_call(i16 zeroext %arg1, i16 zeroext %arg2) {
-  %conv = add nuw i16 %arg1, 15
-  %mul = mul nuw nsw i16 %conv, 3
-  %cmp = icmp ult i16 %mul, %arg2
-  %trunc = trunc i16 %arg1 to i8
-  %res = call zeroext i16 @dummy4(i1 %cmp, i8 %trunc, i16 %arg1)
-  ret i16 %res
-}
-
-
-declare i32 @assert(...)
-declare i8 @dummy_i8(i8)
-declare i8 @dummy2(i8*, i8, i8)
-declare i16 @dummy3(i16)
-declare i16 @dummy4(i1, i8, i16)
-
-declare dso_local i32 @e(...) local_unnamed_addr #1
-declare dso_local zeroext i16 @f(...) local_unnamed_addr #1
-declare dso_local arm_aapcscc i8 @__atomic_exchange_1(i8*, i8, i32) local_unnamed_addr
-
-declare noalias i16** @func_62(i8 zeroext %p_63, i32 %p_64, i16 signext %p_65, i32* nocapture readnone %p_66)
-declare fastcc signext i16 @safe_sub_func_int16_t_s_s(i16 signext %si2)
-declare dso_local fastcc i64 @safe_sub_func_int64_t_s_s(i64, i64)
-declare dso_local fastcc zeroext i8 @safe_lshift_func(i8 zeroext, i32)
-declare dso_local fastcc zeroext i8 @safe_mul_func_uint8_t_u_u(i8 returned zeroext)
-declare i1 @i1_zeroext(i8*, i32, i16 zeroext)
--- a/llvm/test/CodeGen/ARM/CGP/arm-cgp-casts.ll
+++ b/llvm/test/CodeGen/ARM/CGP/arm-cgp-casts.ll
--- a/llvm/test/CodeGen/ARM/CGP/arm-cgp-icmps.ll
+++ b/llvm/test/CodeGen/ARM/CGP/arm-cgp-icmps.ll
@ -1,332 +0,0 @@
-; RUN: llc -mtriple=thumbv8m.main -mcpu=cortex-m33 %s -arm-disable-cgp=false -o - | FileCheck %s --check-prefix=CHECK-COMMON --check-prefix=CHECK-NODSP
-; RUN: llc -mtriple=thumbv7em %s -arm-disable-cgp=false -arm-enable-scalar-dsp=true -o - | FileCheck %s --check-prefix=CHECK-COMMON --check-prefix=CHECK-DSP
-; RUN: llc -mtriple=thumbv8 %s -arm-disable-cgp=false -arm-enable-scalar-dsp=true -arm-enable-scalar-dsp-imms=true -o - | FileCheck %s --check-prefix=CHECK-COMMON --check-prefix=CHECK-DSP-IMM
-
-; CHECK-COMMON-LABEL: test_ult_254_inc_imm:
-; CHECK-DSP:        adds    r0, #1
-; CHECK-DSP-NEXT:   uxtb    r1, r0
-; CHECK-DSP-NEXT:   movs    r0, #47
-; CHECK-DSP-NEXT:   cmp     r1, #254
-; CHECK-DSP-NEXT:   it      lo
-; CHECK-DSP-NEXT:   movlo   r0, #35
-
-; CHECK-DSP-IMM:      movs r1, #1
-; CHECK-DSP-IMM-NEXT: uadd8 r1, r0, r1
-; CHECK-DSP-IMM-NEXT: movs  r0, #47
-; CHECK-DSP-IMM-NEXT: cmp r1, #254
-; CHECK-DSP-IMM-NEXT: it  lo
-; CHECK-DSP-IMM-NEXT: movlo r0, #35
-define i32 @test_ult_254_inc_imm(i8 zeroext %x) {
-entry:
-  %add = add i8 %x, 1
-  %cmp = icmp ult i8 %add, 254
-  %res = select i1 %cmp, i32 35, i32 47
-  ret i32 %res
-}
-
-; CHECK-COMMON-LABEL: test_slt_254_inc_imm
-; CHECK-COMMON: adds
-; CHECK-COMMON: sxtb
-define i32 @test_slt_254_inc_imm(i8 signext %x) {
-entry:
-  %add = add i8 %x, 1
-  %cmp = icmp slt i8 %add, 254
-  %res = select i1 %cmp, i32 35, i32 47
-  ret i32 %res
-}
-
-; CHECK-COMMON-LABEL: test_ult_254_inc_var:
-; CHECK-NODSP:      add     r0, r1
-; CHECK-NODSP-NEXT: uxtb    r1, r0
-; CHECK-NODSP-NEXT: movs    r0, #47
-; CHECK-NODSP-NEXT: cmp     r1, #254
-; CHECK-NODSP-NEXT: it      lo
-; CHECK-NODSP-NEXT: movlo   r0, #35
-
-; CHECK-DSP:        uadd8   r1, r0, r1
-; CHECK-DSP-NEXT:   movs    r0, #47
-; CHECK-DSP-NEXT:   cmp     r1, #254
-; CHECK-DSP-NEXT:   it      lo
-; CHECK-DSP-NEXT:   movlo   r0, #35
-define i32 @test_ult_254_inc_var(i8 zeroext %x, i8 zeroext %y) {
-entry:
-  %add = add i8 %x, %y
-  %cmp = icmp ult i8 %add, 254
-  %res = select i1 %cmp, i32 35, i32 47
-  ret i32 %res
-}
-
-; CHECK-COMMON-LABEL: test_sle_254_inc_var
-; CHECK-COMMON: add
-; CHECK-COMMON: sxtb
-; CHECK-COMMON: cmp
-define i32 @test_sle_254_inc_var(i8 %x, i8 %y) {
-entry:
-  %add = add i8 %x, %y
-  %cmp = icmp sle i8 %add, 254
-  %res = select i1 %cmp, i32 35, i32 47
-  ret i32 %res
-}
-
-; CHECK-COMMON-LABEL: test_ugt_1_dec_imm:
-; CHECK-COMMON:      subs    r1, r0, #1
-; CHECK-COMMON-NEXT: movs    r0, #47
-; CHECK-COMMON-NEXT: cmp     r1, #1
-; CHECK-COMMON-NEXT: it      hi
-; CHECK-COMMON-NEXT: movhi   r0, #35
-define i32 @test_ugt_1_dec_imm(i8 zeroext %x) {
-entry:
-  %add = add i8 %x, -1
-  %cmp = icmp ugt i8 %add, 1
-  %res = select i1 %cmp, i32 35, i32 47
-  ret i32 %res
-}
-
-; CHECK-COMMON-LABEL: test_sgt_1_dec_imm
-; CHECK-COMMON: subs
-; CHECK-COMMON: sxtb
-; CHECK-COMMON: cmp
-define i32 @test_sgt_1_dec_imm(i8 %x) {
-entry:
-  %add = add i8 %x, -1
-  %cmp = icmp sgt i8 %add, 1
-  %res = select i1 %cmp, i32 35, i32 47
-  ret i32 %res
-}
-
-; CHECK-COMMON-LABEL: test_ugt_1_dec_var:
-; CHECK-NODSP:      subs    r0, r0, r1
-; CHECK-NODSP-NEXT: uxtb    r1, r0
-; CHECK-NODSP-NEXT: movs    r0, #47
-; CHECK-NODSP-NEXT: cmp     r1, #1
-; CHECK-NODSP-NEXT: it      hi
-; CHECK-NODSP-NEXT: movhi   r0, #35
-
-; CHECK-DSP:      usub8   r1, r0, r1
-; CHECK-DSP-NEXT: movs    r0, #47
-; CHECK-DSP-NEXT: cmp     r1, #1
-; CHECK-DSP-NEXT: it      hi
-; CHECK-DSP-NEXT: movhi   r0, #35
-define i32 @test_ugt_1_dec_var(i8 zeroext %x, i8 zeroext %y) {
-entry:
-  %sub = sub i8 %x, %y
-  %cmp = icmp ugt i8 %sub, 1
-  %res = select i1 %cmp, i32 35, i32 47
-  ret i32 %res
-}
-
-; CHECK-COMMON-LABEL: test_sge_1_dec_var
-; CHECK-COMMON: sub
-; CHECK-COMMON: sxtb
-; CHECK-COMMON: cmp
-define i32 @test_sge_1_dec_var(i8 %x, i8 %y) {
-entry:
-  %sub = sub i8 %x, %y
-  %cmp = icmp sge i8 %sub, 1
-  %res = select i1 %cmp, i32 35, i32 47
-  ret i32 %res
-}
-
-; CHECK-COMMON-LABEL: dsp_imm1:
-; CHECK-DSP:      eors    r1, r0
-; CHECK-DSP-NEXT: and     r0, r0, #7
-; CHECK-DSP-NEXT: subs    r0, r0, r1
-; CHECK-DSP-NEXT: adds    r0, #1
-; CHECK-DSP-NEXT: uxtb    r1, r0
-; CHECK-DSP-NEXT: movs    r0, #47
-; CHECK-DSP-NEXT: cmp     r1, #254
-; CHECK-DSP-NEXT: it      lo
-; CHECK-DSP-NEXT: movlo   r0, #35
-
-; CHECK-DSP-IMM:      eors    r1, r0
-; CHECK-DSP-IMM-NEXT: and     r0, r0, #7
-; CHECK-DSP-IMM-NEXT: usub8   r0, r0, r1
-; CHECK-DSP-IMM-NEXT: movs    r1, #1
-; CHECK-DSP-IMM-NEXT: uadd8   r1, r0, r1
-; CHECK-DSP-IMM-NEXT: movs    r0, #47
-; CHECK-DSP-IMM-NEXT: cmp     r1, #254
-; CHECK-DSP-IMM-NEXT: it      lo
-; CHECK-DSP-IMM-NEXT: movlo   r0, #35
-define i32 @dsp_imm1(i8 zeroext %x, i8 zeroext %y) {
-entry:
-  %xor = xor i8 %x, %y
-  %and = and i8 %x, 7
-  %sub = sub i8 %and, %xor
-  %add = add i8 %sub, 1
-  %cmp = icmp ult i8 %add, 254
-  %res = select i1 %cmp, i32 35, i32 47
-  ret i32 %res
-}
-
-; CHECK-COMMON-LABEL: dsp_var:
-; CHECK-COMMON:   eors    r1, r0
-; CHECK-COMMON:   and     r2, r0, #7
-; CHECK-NODSP:    subs    r1, r2, r1
-; CHECK-NODSP:    add.w   r0, r1, r0, lsl #1
-; CHECK-NODSP:    uxtb    r1, r0
-; CHECK-DSP:      usub8   r1, r2, r1
-; CHECK-DSP:      lsls    r0, r0, #1
-; CHECK-DSP:      uadd8   r1, r1, r0
-; CHECK-DSP-NOT:  uxt
-; CHECK-COMMON:   movs    r0, #47
-; CHECK-COMMON:   cmp     r1, #254
-; CHECK-COMMON:   it      lo
-; CHECK-COMMON:   movlo   r0, #35
-define i32 @dsp_var(i8 zeroext %x, i8 zeroext %y) {
-  %xor = xor i8 %x, %y
-  %and = and i8 %x, 7
-  %sub = sub i8 %and, %xor
-  %mul = shl nuw i8 %x, 1
-  %add = add i8 %sub, %mul
-  %cmp = icmp ult i8 %add, 254
-  %res = select i1 %cmp, i32 35, i32 47
-  ret i32 %res
-}
-
-; CHECK-COMMON-LABEL: store_dsp_res
-; CHECK-DSP: usub8 
-; CHECK-DSP: strb
-define void @store_dsp_res(i8* %in, i8* %out, i8 %compare) {
-  %first = getelementptr inbounds i8, i8* %in, i32 0
-  %second = getelementptr inbounds i8, i8* %in, i32 1
-  %ld0 = load i8, i8* %first
-  %ld1 = load i8, i8* %second
-  %xor = xor i8 %ld0, -1
-  %cmp = icmp ult i8 %compare, %ld1
-  %select = select i1 %cmp, i8 %compare, i8 %xor
-  %sub = sub i8 %ld0, %select
-  store i8 %sub, i8* %out, align 1
-  ret void
-}
-
-; CHECK-COMMON-LABEL: ugt_1_dec_imm:
-; CHECK-COMMON:      subs    r1, r0, #1
-; CHECK-COMMON-NEXT: movs    r0, #47
-; CHECK-COMMON-NEXT: cmp     r1, #1
-; CHECK-COMMON-NEXT: it      hi
-; CHECK-COMMON-NEXT: movhi   r0, #35
-define i32 @ugt_1_dec_imm(i8 zeroext %x) {
-entry:
-  %add = add i8 %x, -1
-  %cmp = icmp ugt i8 %add, 1
-  %res = select i1 %cmp, i32 35, i32 47
-  ret i32 %res
-}
-
-; CHECK-COMMON-LABEL: ugt_1_dec_var:
-; CHECK-NODSP:      subs    r0, r0, r1
-; CHECK-NODSP-NEXT: uxtb    r1, r0
-; CHECK-NODSP-NEXT: movs    r0, #47
-; CHECK-NODSP-NEXT: cmp     r1, #1
-; CHECK-NODSP-NEXT: it      hi
-; CHECK-NODSP-NEXT: movhi   r0, #35
-
-; CHECK-DSP:      usub8   r1, r0, r1
-; CHECK-DSP-NEXT: movs    r0, #47
-; CHECK-DSP-NEXT: cmp     r1, #1
-; CHECK-DSP-NEXT: it      hi
-; CHECK-DSP-NEXT: movhi   r0, #35
-define i32 @ugt_1_dec_var(i8 zeroext %x, i8 zeroext %y) {
-entry:
-  %sub = sub i8 %x, %y
-  %cmp = icmp ugt i8 %sub, 1
-  %res = select i1 %cmp, i32 35, i32 47
-  ret i32 %res
-}
-
-; CHECK-COMMON-LABEL: icmp_eq_minus_one
-; CHECK-COMMON: cmp {{r[0-9]+}}, #255
-define i32 @icmp_eq_minus_one(i8* %ptr) {
-  %load = load i8, i8* %ptr, align 1
-  %conv = zext i8 %load to i32
-  %cmp = icmp eq i8 %load, -1
-  %ret = select i1 %cmp, i32 %conv, i32 -1
-  ret i32 %ret
-}
-
-; CHECK-COMMON-LABEL: icmp_not
-; CHECK-COMMON: movw r2, #65535
-; CHECK-COMMON: eors r2, r0
-; CHECK-COMMON: movs r0, #32
-; CHECK-COMMON: cmp r2, r1
-define i32 @icmp_not(i16 zeroext %arg0, i16 zeroext %arg1) {
-  %not = xor i16 %arg0, -1
-  %cmp = icmp eq i16 %not, %arg1
-  %res = select i1 %cmp, i32 16, i32 32
-  ret i32 %res
-}
-
-; CHECK-COMMON-LABEL: icmp_i1
-; CHECK-NOT: uxt
-define i32 @icmp_i1(i1* %arg0, i1 zeroext %arg1, i32 %a, i32 %b) {
-entry:
-  %load = load i1, i1* %arg0
-  %not = xor i1 %load, 1
-  %cmp = icmp eq i1 %arg1, %not
-  %res = select i1 %cmp, i32 %a, i32 %b
-  ret i32 %res
-}
-
-; CHECK-COMMON-LABEL: icmp_i7
-; CHECK-COMMON: ldrb
-; CHECK-COMMON: cmp
-define i32 @icmp_i7(i7* %arg0, i7 zeroext %arg1, i32 %a, i32 %b) {
-entry:
-  %load = load i7, i7* %arg0
-  %add = add nuw i7 %load, 1
-  %cmp = icmp ult i7 %arg1, %add
-  %res = select i1 %cmp, i32 %a, i32 %b
-  ret i32 %res
-}
-
-; CHECK-COMMON-LABEL: icmp_i15
-; CHECK-COMMON: movw [[MINUS_ONE:r[0-9]+]], #32767
-define i32 @icmp_i15(i15 zeroext %arg0, i15 zeroext %arg1) {
-  %xor = xor i15 %arg0, -1
-  %cmp = icmp eq i15 %xor, %arg1
-  %res = select i1 %cmp, i32 21, i32 42
-  ret i32 %res
-}
-
-; CHECK-COMMON-LABEL: icmp_minus_imm
-; CHECK-NODSP: subs [[SUB:r[0-9]+]],
-; CHECK-NODSP: uxtb [[UXT:r[0-9]+]],
-; CHECK-NODSP: cmp [[UXT]], #251
-
-; CHECK-DSP: subs [[SUB:r[0-9]+]],
-; CHECK-DSP: uxtb [[UXT:r[0-9]+]],
-; CHECK-DSP: cmp [[UXT]], #251
-
-; CHECK-DSP-IMM: ldrb [[A:r[0-9]+]],
-; CHECK-DSP-IMM: movs  [[MINUS_7:r[0-9]+]], #249
-; CHECK-DSP-IMM: uadd8 [[RES:r[0-9]+]], [[A]], [[MINUS_7]]
-; CHECK-DSP-IMM: cmp [[RES]], #251
-define i32 @icmp_minus_imm(i8* %a) {
-entry:
-  %0 = load i8, i8* %a, align 1
-  %add.i = add i8 %0, -7
-  %cmp = icmp ugt i8 %add.i, -5
-  %conv1 = zext i1 %cmp to i32
-  ret i32 %conv1
-}
-
-; CHECK-COMMON-LABEL: mul_with_neg_imm
-; CHECK-COMMON-NOT: uxtb
-; CHECK-COMMON:     and [[BIT0:r[0-9]+]], r0, #1
-; CHECK-COMMON:     add.w [[MUL32:r[0-9]+]], [[BIT0]], [[BIT0]], lsl #5
-; CHECK-COMMON:     cmp.w r0, [[MUL32]], lsl #2
-define void @mul_with_neg_imm(i32, i32* %b) {
-entry:
-  %1 = trunc i32 %0 to i8
-  %2 = and i8 %1, 1
-  %conv.i = mul nuw i8 %2, -124
-  %tobool = icmp eq i8 %conv.i, 0
-  br i1 %tobool, label %if.end, label %if.then
-
-if.then:
-  store i32 0, i32* %b, align 4
-  br label %if.end
-
-if.end:
-  ret void
-}
--- a/llvm/test/CodeGen/ARM/CGP/arm-cgp-overflow.ll
+++ b/llvm/test/CodeGen/ARM/CGP/arm-cgp-overflow.ll
@ -1,279 +0,0 @@
-; RUN: llc -mtriple=thumbv8m.main -mcpu=cortex-m33 -mattr=-use-misched %s -arm-disable-cgp=false -o - | FileCheck %s
-
-; CHECK: overflow_add
-; CHECK: add
-; CHECK: uxth
-; CHECK: cmp
-define zeroext i16 @overflow_add(i16 zeroext %a, i16 zeroext %b) {
-  %add = add i16 %a, %b
-  %or = or i16 %add, 1
-  %cmp = icmp ugt i16 %or, 1024
-  %res = select i1 %cmp, i16 2, i16 5
-  ret i16 %res
-}
-
-; CHECK-LABEL: overflow_sub
-; CHECK: sub
-; CHECK: uxth
-; CHECK: cmp
-define zeroext i16 @overflow_sub(i16 zeroext %a, i16 zeroext %b) {
-  %add = sub i16 %a, %b
-  %or = or i16 %add, 1
-  %cmp = icmp ugt i16 %or, 1024
-  %res = select i1 %cmp, i16 2, i16 5
-  ret i16 %res
-}
-
-; CHECK-LABEL: overflow_mul
-; CHECK: mul
-; CHECK: uxth
-; CHECK: cmp
-define zeroext i16 @overflow_mul(i16 zeroext %a, i16 zeroext %b) {
-  %add = mul i16 %a, %b
-  %or = or i16 %add, 1
-  %cmp = icmp ugt i16 %or, 1024
-  %res = select i1 %cmp, i16 2, i16 5
-  ret i16 %res
-}
-
-; CHECK-LABEL: overflow_shl
-; CHECK-COMMON: lsl
-; CHECK-COMMON: uxth
-; CHECK-COMMON: cmp
-define zeroext i16 @overflow_shl(i16 zeroext %a, i16 zeroext %b) {
-  %add = shl i16 %a, %b
-  %or = or i16 %add, 1
-  %cmp = icmp ugt i16 %or, 1024
-  %res = select i1 %cmp, i16 2, i16 5
-  ret i16 %res
-}
-
-; CHECK-LABEL: overflow_add_no_consts:
-; CHECK:  add r0, r1
-; CHECK:  uxtb [[EXT:r[0-9]+]], r0
-; CHECK:  cmp [[EXT]], r2
-; CHECK:  movhi r0, #8
-define i32 @overflow_add_no_consts(i8 zeroext %a, i8 zeroext %b, i8 zeroext %limit) {
-  %add = add i8 %a, %b
-  %cmp = icmp ugt i8 %add, %limit
-  %res = select i1 %cmp, i32 8, i32 16
-  ret i32 %res
-}
-
-; CHECK-LABEL: overflow_add_const_limit:
-; CHECK:  add r0, r1
-; CHECK:  uxtb [[EXT:r[0-9]+]], r0
-; CHECK:  cmp [[EXT]], #128
-; CHECK:  movhi r0, #8
-define i32 @overflow_add_const_limit(i8 zeroext %a, i8 zeroext %b) {
-  %add = add i8 %a, %b
-  %cmp = icmp ugt i8 %add, 128
-  %res = select i1 %cmp, i32 8, i32 16
-  ret i32 %res
-}
-
-; CHECK-LABEL: overflow_add_positive_const_limit:
-; CHECK:  adds r0, #1
-; CHECK:  uxtb [[EXT:r[0-9]+]], r0
-; CHECK:  cmp [[EXT]], #128
-; CHECK:  movhi r0, #8
-define i32 @overflow_add_positive_const_limit(i8 zeroext %a) {
-  %add = add i8 %a, 1
-  %cmp = icmp ugt i8 %add, 128
-  %res = select i1 %cmp, i32 8, i32 16
-  ret i32 %res
-}
-
-; CHECK-LABEL: unsafe_add_underflow:
-; CHECK: movs	r1, #16
-; CHECK: cmp	r0, #1
-; CHECK: it	eq
-; CHECK: moveq	r1, #8
-; CHECK: mov	r0, r1
-define i32 @unsafe_add_underflow(i8 zeroext %a) {
-  %add = add i8 %a, -2
-  %cmp = icmp ugt i8 %add, 254
-  %res = select i1 %cmp, i32 8, i32 16
-  ret i32 %res
-}
-
-; CHECK-LABEL: safe_add_underflow:
-; CHECK:      subs [[MINUS_1:r[0-9]+]], r0, #1
-; CHECK-NOT:  uxtb
-; CHECK:      cmp [[MINUS_1]], #254
-; CHECK:      movhi r0, #8
-define i32 @safe_add_underflow(i8 zeroext %a) {
-  %add = add i8 %a, -1
-  %cmp = icmp ugt i8 %add, 254
-  %res = select i1 %cmp, i32 8, i32 16
-  ret i32 %res
-}
-
-; CHECK-LABEL: safe_add_underflow_neg:
-; CHECK:      subs [[MINUS_1:r[0-9]+]], r0, #2
-; CHECK-NOT:  uxtb
-; CHECK:      cmp [[MINUS_1]], #251
-; CHECK:      movlo r0, #8
-define i32 @safe_add_underflow_neg(i8 zeroext %a) {
-  %add = add i8 %a, -2
-  %cmp = icmp ule i8 %add, -6
-  %res = select i1 %cmp, i32 8, i32 16
-  ret i32 %res
-}
-
-; CHECK-LABEL: overflow_sub_negative_const_limit:
-; CHECK:  adds r0, #1
-; CHECK:  uxtb [[EXT:r[0-9]+]], r0
-; CHECK:  cmp [[EXT]], #128
-; CHECK:  movhi r0, #8
-define i32 @overflow_sub_negative_const_limit(i8 zeroext %a) {
-  %sub = sub i8 %a, -1
-  %cmp = icmp ugt i8 %sub, 128
-  %res = select i1 %cmp, i32 8, i32 16
-  ret i32 %res
-}
-
-; CHECK-LABEL: unsafe_sub_underflow:
-; CHECK:  subs r0, #6
-; CHECK:  uxtb [[EXT:r[0-9]+]], r0
-; CHECK:  cmp [[EXT]], #250
-; CHECK:  movhi r0, #8
-define i32 @unsafe_sub_underflow(i8 zeroext %a) {
-  %sub = sub i8 %a, 6
-  %cmp = icmp ugt i8 %sub, 250
-  %res = select i1 %cmp, i32 8, i32 16
-  ret i32 %res
-}
-
-; CHECK-LABEL: safe_sub_underflow:
-; CHECK:      subs [[MINUS_1:r[0-9]+]], r0, #1
-; CHECK-NOT:  uxtb
-; CHECK:      cmp [[MINUS_1]], #255
-; CHECK:      movlo r0, #8
-define i32 @safe_sub_underflow(i8 zeroext %a) {
-  %sub = sub i8 %a, 1
-  %cmp = icmp ule i8 %sub, 254
-  %res = select i1 %cmp, i32 8, i32 16
-  ret i32 %res
-}
-
-; CHECK-LABEL: safe_sub_underflow_neg
-; CHECK:      subs [[MINUS_1:r[0-9]+]], r0, #4
-; CHECK-NOT:  uxtb
-; CHECK:      cmp [[MINUS_1]], #250
-; CHECK:      movhi r0, #8
-define i32 @safe_sub_underflow_neg(i8 zeroext %a) {
-  %sub = sub i8 %a, 4
-  %cmp = icmp uge i8 %sub, -5
-  %res = select i1 %cmp, i32 8, i32 16
-  ret i32 %res
-}
-
-; CHECK-LABEL: unsafe_sub_underflow_neg
-; CHECK:  subs r0, #4
-; CHECK:  uxtb [[EXT:r[0-9]+]], r0
-; CHECK:  cmp [[EXT]], #253
-; CHECK:  movlo r0, #8
-define i32 @unsafe_sub_underflow_neg(i8 zeroext %a) {
-  %sub = sub i8 %a, 4
-  %cmp = icmp ult i8 %sub, -3
-  %res = select i1 %cmp, i32 8, i32 16
-  ret i32 %res
-}
-
-; CHECK:      rsb.w [[RSUB:r[0-9]+]], r0, #248
-; CHECK-NOT:  uxt
-; CHECK:      cmp [[RSUB]], #252
-define i32 @safe_sub_imm_var(i8* %b) {
-entry:
-  %0 = load i8, i8* %b, align 1
-  %sub = sub nuw nsw i8 -8, %0
-  %cmp = icmp ugt i8 %sub, 252
-  %conv4 = zext i1 %cmp to i32
-  ret i32 %conv4
-}
-
-; CHECK-LABEL: safe_sub_var_imm
-; CHECK:      sub.w [[ADD:r[0-9]+]], r0, #248
-; CHECK-NOT:  uxt
-; CHECK:      cmp [[ADD]], #252
-define i32 @safe_sub_var_imm(i8* %b) {
-entry:
-  %0 = load i8, i8* %b, align 1
-  %sub = sub nuw nsw i8 %0, -8
-  %cmp = icmp ugt i8 %sub, 252
-  %conv4 = zext i1 %cmp to i32
-  ret i32 %conv4
-}
-
-; CHECK-LABEL: safe_add_imm_var
-; CHECK:      add.w [[ADD:r[0-9]+]], r0, #129
-; CHECK-NOT:  uxt
-; CHECK:      cmp [[ADD]], #127
-define i32 @safe_add_imm_var(i8* %b) {
-entry:
-  %0 = load i8, i8* %b, align 1
-  %add = add nuw nsw i8 -127, %0
-  %cmp = icmp ugt i8 %add, 127
-  %conv4 = zext i1 %cmp to i32
-  ret i32 %conv4
-}
-
-; CHECK-LABEL: safe_add_var_imm
-; CHECK:      add.w [[SUB:r[0-9]+]], r0, #129
-; CHECK-NOT:  uxt
-; CHECK:      cmp [[SUB]], #127
-define i32 @safe_add_var_imm(i8* %b) {
-entry:
-  %0 = load i8, i8* %b, align 1
-  %add = add nuw nsw i8 %0, -127
-  %cmp = icmp ugt i8 %add, 127
-  %conv4 = zext i1 %cmp to i32
-  ret i32 %conv4
-}
-
-; CHECK-LABEL: convert_add_order
-; CHECK: orr{{.*}}, #1
-; CHECK: sub{{.*}}, #40
-; CHECK-NOT: uxt
-define i8 @convert_add_order(i8 zeroext %arg) {
-  %mask.0 = and i8 %arg, 1
-  %mask.1 = and i8 %arg, 2
-  %shl = or i8 %arg, 1
-  %add = add nuw i8 %shl, 10
-  %cmp.0 = icmp ult i8 %add, 60
-  %sub = add nsw i8 %shl, -40
-  %cmp.1 = icmp ult i8 %sub, 20
-  %mask.sel = select i1 %cmp.1, i8 %mask.0, i8 %mask.1
-  %res = select i1 %cmp.0, i8 %mask.sel, i8 %arg
-  ret i8 %res
-}
-
-; CHECK-LABEL: underflow_if_sub
-; CHECK: add{{.}} [[ADD:r[0-9]+]], #245
-; CHECK: cmp [[ADD]], r1
-define i8 @underflow_if_sub(i32 %arg, i8 zeroext %arg1) {
-  %cmp = icmp sgt i32 %arg, 0
-  %conv = zext i1 %cmp to i32
-  %and = and i32 %arg, %conv
-  %trunc = trunc i32 %and to i8
-  %conv1 = add nuw nsw i8 %trunc, -11
-  %cmp.1 = icmp ult i8 %conv1, %arg1
-  %res = select i1 %cmp.1, i8 %conv1, i8 100
-  ret i8 %res
-}
-
-; CHECK-LABEL: underflow_if_sub_signext
-; CHECK:      cmp r0, #0
-; CHECK-NEXT: uxtb  r1, r1
-; CHECK-NOT:  xtb
-define i8 @underflow_if_sub_signext(i32 %arg, i8 signext %arg1) {
-  %cmp = icmp sgt i32 %arg, 0
-  %conv = zext i1 %cmp to i32
-  %and = and i32 %arg, %conv
-  %trunc = trunc i32 %and to i8
-  %conv1 = add nuw nsw i8 %trunc, -11
-  %cmp.1 = icmp ugt i8 %arg1, %conv1
-  %res = select i1 %cmp.1, i8 %conv1, i8 100
-  ret i8 %res
-}
--- a/llvm/test/CodeGen/ARM/CGP/arm-cgp-phis-ret.ll
+++ b/llvm/test/CodeGen/ARM/CGP/arm-cgp-phis-ret.ll
@ -1,218 +0,0 @@
-; RUN: llc -mtriple=thumbv7m -arm-disable-cgp=false %s -o - | FileCheck %s --check-prefix=CHECK-COMMON
-; RUN: llc -mtriple=thumbv8m.main -arm-disable-cgp=false %s -o - | FileCheck %s --check-prefix=CHECK-COMMON
-; RUN: llc -mtriple=thumbv8m.main -arm-disable-cgp=false -arm-enable-scalar-dsp=true -mcpu=cortex-m33 %s -o - | FileCheck %s --check-prefix=CHECK-COMMON
-; RUN: llc -mtriple=thumbv7em %s -arm-disable-cgp=false -arm-enable-scalar-dsp=true -arm-enable-scalar-dsp-imms=true -o - | FileCheck %s --check-prefix=CHECK-COMMON
-
-; Test that ARMCodeGenPrepare can handle:
-; - loops
-; - call operands
-; - call return values
-; - ret instructions
-; We use nuw on the arithmetic instructions to avoid complications.
-
-; Check that the arguments are extended but then nothing else is.
-; This also ensures that the pass can handle loops.
-; CHECK-COMMON-LABEL: phi_feeding_phi_args
-; CHECK-COMMON: uxtb
-; CHECK-COMMON: uxtb
-; CHECK-NOT: uxtb
-define void @phi_feeding_phi_args(i8 %a, i8 %b) {
-entry:
-  %0 = icmp ugt i8 %a, %b
-  br i1 %0, label %preheader, label %empty
-
-empty:
-  br label %preheader
-
-preheader:
-  %1 = phi i8 [ %a, %entry ], [ %b, %empty ]
-  br label %loop
-
-loop:
-  %val = phi i8 [ %1, %preheader ], [ %inc2, %if.end ]
-  %cmp = icmp ult i8 %val, 254
-  br i1 %cmp, label %if.then, label %if.else
-
-if.then:
-  %inc = sub nuw i8 %val, 2
-  br label %if.end
-
-if.else:
-  %inc1 = shl nuw i8 %val, 1
-  br label %if.end
-
-if.end:
-  %inc2 = phi i8 [ %inc, %if.then], [ %inc1, %if.else ]
-  %cmp1 = icmp eq i8 %inc2, 255
-  br i1 %cmp1, label %exit, label %loop
-
-exit:
-  ret void
-}
-
-; Same as above, but as the args are zeroext, we shouldn't see any uxts.
-; CHECK-COMMON-LABEL: phi_feeding_phi_zeroext_args
-; CHECK-COMMON-NOT: uxt
-define void @phi_feeding_phi_zeroext_args(i8 zeroext %a, i8 zeroext %b) {
-entry:
-  %0 = icmp ugt i8 %a, %b
-  br i1 %0, label %preheader, label %empty
-
-empty:
-  br label %preheader
-
-preheader:
-  %1 = phi i8 [ %a, %entry ], [ %b, %empty ]
-  br label %loop
-
-loop:
-  %val = phi i8 [ %1, %preheader ], [ %inc2, %if.end ]
-  %cmp = icmp ult i8 %val, 254
-  br i1 %cmp, label %if.then, label %if.else
-
-if.then:
-  %inc = sub nuw i8 %val, 2
-  br label %if.end
-
-if.else:
-  %inc1 = shl nuw i8 %val, 1
-  br label %if.end
-
-if.end:
-  %inc2 = phi i8 [ %inc, %if.then], [ %inc1, %if.else ]
-  %cmp1 = icmp eq i8 %inc2, 255
-  br i1 %cmp1, label %exit, label %loop
-
-exit:
-  ret void
-}
-
-; Just check that phis also work with i16s.
-; CHECK-COMMON-LABEL: phi_i16:
-; CHECK-COMMON-NOT:   uxt
-define void @phi_i16() {
-entry:
-  br label %loop
-
-loop:
-  %val = phi i16 [ 0, %entry ], [ %inc2, %if.end ]
-  %cmp = icmp ult i16 %val, 128
-  br i1 %cmp, label %if.then, label %if.else
-
-if.then:
-  %inc = add nuw i16 %val, 2
-  br label %if.end
-
-if.else:
-  %inc1 = add nuw i16 %val, 1
-  br label %if.end
-
-if.end:
-  %inc2 = phi i16 [ %inc, %if.then], [ %inc1, %if.else ]
-  %cmp1 = icmp ult i16 %inc2, 253
-  br i1 %cmp1, label %loop, label %exit
-
-exit:
-  ret void
-}
-
-; CHECK-COMMON-LABEL: ret_i8
-; CHECK-COMMON-NOT:   uxt
-define i8 @ret_i8() {
-entry:
-  br label %loop
-
-loop:
-  %val = phi i8 [ 0, %entry ], [ %inc2, %if.end ]
-  %cmp = icmp ult i8 %val, 128
-  br i1 %cmp, label %if.then, label %if.else
-
-if.then:
-  %inc = add nuw i8 %val, 2
-  br label %if.end
-
-if.else:
-  %inc1 = add nuw i8 %val, 1
-  br label %if.end
-
-if.end:
-  %inc2 = phi i8 [ %inc, %if.then], [ %inc1, %if.else ]
-  %cmp1 = icmp ult i8 %inc2, 253
-  br i1 %cmp1, label %exit, label %loop
-
-exit:
-  ret i8 %inc2
-}
-
-; CHECK-COMMON-LABEL: phi_multiple_undefs
-; CHECK-COMMON-NOT:   uxt
-define i16 @phi_multiple_undefs(i16 zeroext %arg) {
-entry:
-  br label %loop
-
-loop:
-  %val = phi i16 [ undef, %entry ], [ %inc2, %if.end ]
-  %cmp = icmp ult i16 %val, 128
-  br i1 %cmp, label %if.then, label %if.else
-
-if.then:
-  %inc = add nuw i16 %val, 2
-  br label %if.end
-
-if.else:
-  %inc1 = add nuw i16 %val, 1
-  br label %if.end
-
-if.end:
-  %inc2 = phi i16 [ %inc, %if.then], [ %inc1, %if.else ]
-  %unrelated = phi i16 [ undef, %if.then ], [ %arg, %if.else ]
-  %cmp1 = icmp ult i16 %inc2, 253
-  br i1 %cmp1, label %loop, label %exit
-
-exit:
-  ret i16 %unrelated
-}
-
-; CHECK-COMMON-LABEL: promote_arg_return
-; CHECK-COMMON-NOT: uxt
-; CHECK-COMMON: strb
-define i16 @promote_arg_return(i16 zeroext %arg1, i16 zeroext %arg2, i8* %res) {
-  %add = add nuw i16 %arg1, 15
-  %mul = mul nuw nsw i16 %add, 3
-  %cmp = icmp ult i16 %mul, %arg2
-  %conv = zext i1 %cmp to i8
-  store i8 %conv, i8* %res
-  ret i16 %arg1
-}
-
-; CHECK-COMMON-LABEL: signext_bitcast_phi_select
-; CHECK: uxth [[UXT:r[0-9]+]], r0
-; CHECK: sxth [[SXT:r[0-9]+]], [[UXT]]
-; CHECK: cmp [[SXT]],
-; CHECK-NOT: xth
-define i16 @signext_bitcast_phi_select(i16 signext %start, i16* %in) {
-entry:
-  %const = bitcast i16 -1 to i16
-  br label %for.body
-
-for.body:
-  %idx = phi i16 [ %select, %if.else ], [ %start, %entry ]
-  %cmp.i = icmp sgt i16 %idx, %const
-  br i1 %cmp.i, label %exit, label %if.then
-
-if.then:
-  %idx.next = getelementptr i16, i16* %in, i16 %idx
-  %ld = load i16, i16* %idx.next, align 2
-  %cmp1.i = icmp eq i16 %ld, %idx
-  br i1 %cmp1.i, label %exit, label %if.else
-
-if.else:
-  %lobit = lshr i16 %idx, 15
-  %lobit.not = xor i16 %lobit, 1
-  %select = add nuw i16 %lobit.not, %idx
-  br label %for.body
-
-exit:
-  %res = phi i16 [ %ld, %if.then ], [ 0, %for.body ]
-  ret i16 %res
-}
--- a/llvm/test/CodeGen/ARM/CGP/arm-cgp-pointers.ll
+++ b/llvm/test/CodeGen/ARM/CGP/arm-cgp-pointers.ll
@ -1,135 +0,0 @@
-; RUN: llc -mtriple=thumbv8 -arm-disable-cgp=false %s -o - | FileCheck %s
-; RUN: llc -mtriple=armv8 -arm-disable-cgp=false %s -o - | FileCheck %s
-
-; CHECK-LABEL: phi_pointers
-; CHECK-NOT: uxt
-define void @phi_pointers(i16* %a, i16* %b, i8 zeroext %M, i8 zeroext %N) {
-entry:
-  %add = add nuw i8 %M, 1
-  %and = and i8 %add, 1
-  %cmp = icmp ugt i8 %add, %N
-  %base = select i1 %cmp, i16* %a, i16* %b
-  %other = select i1 %cmp, i16* %b, i16* %b
-  br label %loop
-
-loop:
-  %ptr = phi i16* [ %base, %entry ], [ %gep, %loop ]
-  %idx = phi i8 [ %and, %entry ], [ %inc, %loop ]
-  %load = load i16, i16* %ptr, align 2
-  %inc = add nuw nsw i8 %idx, 1
-  %gep = getelementptr inbounds i16, i16* %ptr, i8 %inc
-  %cond = icmp eq i16* %gep, %other
-  br i1 %cond, label %exit, label %loop
-
-exit:
-  ret void
-}
-
-; CHECK-LABEL: phi_pointers_null
-; CHECK-NOT: uxt
-define void @phi_pointers_null(i16* %a, i16* %b, i8 zeroext %M, i8 zeroext %N) {
-entry:
-  %add = add nuw i8 %M, 1
-  %and = and i8 %add, 1
-  %cmp = icmp ugt i8 %add, %N
-  %base = select i1 %cmp, i16* %a, i16* %b
-  %other = select i1 %cmp, i16* %b, i16* %b
-  %cmp.1 = icmp eq i16* %base, %other
-  br i1 %cmp.1, label %fail, label %loop
-
-fail:
-  br label %loop
-
-loop:
-  %ptr = phi i16* [ %base, %entry ], [ null, %fail ], [ %gep, %if.then ]
-  %idx = phi i8 [ %and, %entry ], [ 0, %fail ], [ %inc, %if.then ]
-  %undef = icmp eq i16* %ptr, undef
-  br i1 %undef, label %exit, label %if.then
-
-if.then:
-  %load = load i16, i16* %ptr, align 2
-  %inc = add nuw nsw i8 %idx, 1
-  %gep = getelementptr inbounds i16, i16* %ptr, i8 %inc
-  %cond = icmp eq i16* %gep, %other
-  br i1 %cond, label %exit, label %loop
-
-exit:
-  ret void
-}
-
-declare i8 @do_something_with_ptr(i8, i16*)
-
-; CHECK-LABEL: call_pointer
-; CHECK-NOT: uxt
-define i8 @call_pointer(i8 zeroext %x, i8 zeroext %y, i16* %a, i16* %b) {
-  %or = or i8 %x, %y
-  %shr = lshr i8 %or, 1
-  %add = add nuw i8 %shr, 2
-  %cmp = icmp ne i8 %add, 0
-  %ptr = select i1 %cmp, i16* %a, i16* %b
-  %call = tail call zeroext i8 @do_something_with_ptr(i8 %shr, i16* %ptr)
-  ret i8 %call
-}
-
-; CHECK-LABEL: pointer_to_pointer
-; CHECK-NOT: uxt
-define i16 @pointer_to_pointer(i16** %arg, i16 zeroext %limit) {
-entry:
-  %addr = load i16*, i16** %arg
-  %val = load i16, i16* %addr
-  %add = add nuw i16 %val, 7
-  %cmp = icmp ult i16 %add, 256
-  %res = select i1 %cmp, i16 128, i16 255
-  ret i16 %res
-}
-
-; CHECK-LABEL: gep_2d_array
-; CHECK-NOT: uxt
-define i8 @gep_2d_array(i8** %a, i8 zeroext %arg) {
-entry:
-  %arrayidx.us = getelementptr inbounds i8*, i8** %a, i32 0
-  %0 = load i8*, i8** %arrayidx.us, align 4
-  %1 = load i8, i8* %0, align 1
-  %sub = sub nuw i8 %1, 1
-  %cmp = icmp ult i8 %sub, %arg
-  %res = select i1 %cmp, i8 27, i8 54
-  ret i8 %res
-}
-
-; CHECK-LABEL: gep_2d_array_loop
-; CHECK-NOT: uxt
-define void @gep_2d_array_loop(i16** nocapture readonly %a, i16** nocapture readonly %b, i32 %N) {
-entry:
-  %cmp30 = icmp eq i32 %N, 0
-  br i1 %cmp30, label %for.cond.cleanup, label %for.cond1.preheader.us
-
-for.cond1.preheader.us:
-  %y.031.us = phi i32 [ %inc13.us, %for.cond1.for.cond.cleanup3_crit_edge.us ], [ 0, %entry ]
-  br label %for.body4.us
-
-for.body4.us:
-  %x.029.us = phi i32 [ 0, %for.cond1.preheader.us ], [ %inc.us, %for.body4.us ]
-  %arrayidx.us = getelementptr inbounds i16*, i16** %a, i32 %x.029.us
-  %0 = load i16*, i16** %arrayidx.us, align 4
-  %arrayidx5.us = getelementptr inbounds i16, i16* %0, i32 %y.031.us
-  %1 = load i16, i16* %arrayidx5.us, align 2
-  %dec.us = add nuw i16 %1, -1
-  %cmp6.us = icmp ult i16 %dec.us, 16383
-  %shl.us = shl nuw i16 %dec.us, 2
-  %spec.select.us = select i1 %cmp6.us, i16 %shl.us, i16 %dec.us
-  %arrayidx10.us = getelementptr inbounds i16*, i16** %b, i32 %x.029.us
-  %2 = load i16*, i16** %arrayidx10.us, align 4
-  %arrayidx11.us = getelementptr inbounds i16, i16* %2, i32 %y.031.us
-  store i16 %spec.select.us, i16* %arrayidx11.us, align 2
-  %inc.us = add nuw i32 %x.029.us, 1
-  %exitcond = icmp eq i32 %inc.us, %N
-  br i1 %exitcond, label %for.cond1.for.cond.cleanup3_crit_edge.us, label %for.body4.us
-
-for.cond1.for.cond.cleanup3_crit_edge.us:
-  %inc13.us = add nuw i32 %y.031.us, 1
-  %exitcond32 = icmp eq i32 %inc13.us, %N
-  br i1 %exitcond32, label %for.cond.cleanup, label %for.cond1.preheader.us
-
-for.cond.cleanup:
-  ret void
-}
--- a/llvm/test/CodeGen/ARM/CGP/arm-cgp-signed-icmps.ll
+++ b/llvm/test/CodeGen/ARM/CGP/arm-cgp-signed-icmps.ll
@ -1,108 +0,0 @@
-; RUN: llc -mtriple=thumbv8m.main -mcpu=cortex-m33 -arm-disable-cgp=false -mattr=-use-misched %s -o - | FileCheck %s --check-prefix=CHECK-COMMON --check-prefix=CHECK-NODSP
-; RUN: llc -mtriple=thumbv7em %s -arm-disable-cgp=false -arm-enable-scalar-dsp=true -o - | FileCheck %s --check-prefix=CHECK-COMMON --check-prefix=CHECK-DSP
-; RUN: llc -mtriple=thumbv8 %s -arm-disable-cgp=false -arm-enable-scalar-dsp=true -arm-enable-scalar-dsp-imms=true -o - | FileCheck %s --check-prefix=CHECK-COMMON --check-prefix=CHECK-DSP-IMM
-
-; CHECK-COMMON-LABEL: eq_sgt
-; CHECK-NODSP: add
-; CHECK-NODSP: uxtb
-; CHECK-NODSP: sxtb
-; CHECK-NODSP: cmp
-; CHECK-NODSP: sub
-; CHECK-NODSP: sxtb
-; CHECK-NODSP: cmp
-
-; CHECK-DSP: uadd8
-; CHECK-DSP: sub
-; CHECK-DSP: cmp
-; CHECK-DSP: sxtb
-; CHECK-DSP: sxtb
-; CHECK-DSP: cmp
-
-; CHECK-DSP-IMM: uadd8 [[ADD:r[0-9]+]],
-; CHECK-DSP-IMM: cmp [[ADD]],
-; CHECK-DSP-IMM: subs [[SUB:r[0-9]+]],
-; CHECK-DSP-IMM: sxtb [[SEXT0:r[0-9]+]], [[ADD]]
-; CHECK-DSP-IMM: sxtb [[SEXT1:r[0-9]+]], [[SUB]]
-; CHECK-DSP-IMM: cmp [[SEXT1]], [[SEXT0]]
-define i8 @eq_sgt(i8* %x, i8 *%y, i8 zeroext %z) {
-entry:
-  %load0 = load i8, i8* %x, align 1
-  %load1 = load i8, i8* %y, align 1
-  %add = add i8 %load0, %z
-  %sub = sub i8 %load1, 1
-  %cmp = icmp eq i8 %add, 200
-  %cmp1 = icmp sgt i8 %sub, %add
-  %res0 = select i1 %cmp, i8 35, i8 47
-  %res1 = select i1 %cmp1, i8 %res0, i8 %sub
-  ret i8 %res1
-}
-
-; CHECK-COMMON-LABEL: ugt_slt
-; CHECK-NODSP: sub
-; CHECK-NODSP: sxth
-; CHECK-NODSP: uxth
-; CHECK-NODSP: add
-; CHECK-NODSP: sxth
-; CHECK-NODSP: cmp
-; CHECK-NODSP: cmp
-
-; CHECK-DSP: sub
-; CHECK-DSP: sxth
-; CHECK-DSP: add
-; CHECK-DSP: uxth
-; CHECK-DSP: sxth
-; CHECK-DSP: cmp
-; CHECK-DSP: cmp
-
-; CHECK-DSP-IMM: uadd16 [[ADD:r[0-9]+]],
-; CHECK-DSP-IMM: sxth.w [[SEXT:r[0-9]+]], [[ADD]]
-; CHECK-DSP-IMM: sxth [[ARG:r[0-9]+]], r2
-; CHECK-DSP-IMM: cmp [[SEXT]], [[ARG]]
-; CHECK-DSP-IMM-NOT: uxt
-; CHECK-DSP-IMM: movs [[ONE:r[0-9]+]], #1
-; CHECK-DSP-IMM: usub16 [[SUB:r[0-9]+]], r1, [[ONE]]
-; CHECK-DSP-IMM: cmp [[SUB]], r2
-define i16 @ugt_slt(i16 *%x, i16 zeroext %y, i16 zeroext %z) {
-entry:
-  %load0 = load i16, i16* %x, align 1
-  %add = add i16 %load0, %z
-  %sub = sub i16 %y, 1
-  %cmp = icmp slt i16 %add, %z
-  %cmp1 = icmp ugt i16 %sub, %z
-  %res0 = select i1 %cmp, i16 35, i16 -1
-  %res1 = select i1 %cmp1, i16 %res0, i16 0
-  ret i16 %res1
-}
-
-; CHECK-COMMON-LABEL: urem_trunc_icmps
-; CHECK-COMMON-NOT: uxt
-; CHECK-COMMON: sxtb [[SEXT:r[0-9]+]],
-; CHECK-COMMON: cmp [[SEXT]], #7
-define void @urem_trunc_icmps(i16** %in, i32* %g, i32* %k) {
-entry:
-  %ptr = load i16*, i16** %in, align 4
-  %ld = load i16, i16* %ptr, align 2
-  %cmp.i = icmp eq i16 %ld, 0
-  br i1 %cmp.i, label %exit, label %cond.false.i
-
-cond.false.i:
-  %rem = urem i16 5, %ld
-  %extract.t = trunc i16 %rem to i8
-  br label %body
-
-body:
-  %cond.in.i.off0 = phi i8 [ %extract.t, %cond.false.i ], [ %add, %for.inc ]
-  %cmp = icmp sgt i8 %cond.in.i.off0, 7
-  %conv5 = zext i1 %cmp to i32
-  store i32 %conv5, i32* %g, align 4
-  %.pr = load i32, i32* %k, align 4
-  %tobool13150 = icmp eq i32 %.pr, 0
-  br i1 %tobool13150, label %for.inc, label %exit
-
-for.inc:
-  %add = add nuw i8 %cond.in.i.off0, 1
-  br label %body
-
-exit:
-  ret void
-}
--- a/llvm/test/CodeGen/ARM/CGP/arm-cgp-signed.ll
+++ b/llvm/test/CodeGen/ARM/CGP/arm-cgp-signed.ll
@ -1,89 +0,0 @@
-; RUN: llc -mtriple=thumbv7em -arm-disable-cgp=false %s -o - | FileCheck %s
-; RUN: llc -mtriple=thumbv8m.main -mattr=+dsp -arm-disable-cgp=false %s -o - | FileCheck %s
-; RUN: llc -mtriple=thumbv7 %s -arm-disable-cgp=false -o - | FileCheck %s
-; RUN: llc -mtriple=armv8 %s -arm-disable-cgp=false -o - | FileCheck %s
-
-; Test to check that ARMCodeGenPrepare doesn't optimised away sign extends.
-; CHECK-LABEL: test_signed_load:
-; CHECK: uxth
-define i16 @test_signed_load(i16* %ptr) {
-  %load = load i16, i16* %ptr
-  %conv0 = zext i16 %load to i32
-  %conv1 = sext i16 %load to i32
-  %cmp = icmp eq i32 %conv0, %conv1
-  %conv2 = zext i1 %cmp to i16
-  ret i16 %conv2
-}
-
-; Don't allow sign bit generating opcodes.
-; CHECK-LABEL: test_ashr:
-; CHECK: sxth
-define i16 @test_ashr(i16 zeroext %arg) {
-  %ashr = ashr i16 %arg, 1
-  %cmp = icmp eq i16 %ashr, 0
-  %conv = zext i1 %cmp to i16
-  ret i16 %conv 
-}
-
-; CHECK-LABEL: test_sdiv:
-; CHECK: sxth
-define i16 @test_sdiv(i16 zeroext %arg) {
-  %sdiv = sdiv i16 %arg, 2
-  %cmp = icmp ne i16 %sdiv, 0
-  %conv = zext i1 %cmp to i16
-  ret i16 %conv 
-}
-
-; CHECK-LABEL: test_srem
-; CHECK: sxth
-define i16 @test_srem(i16 zeroext %arg) {
-  %srem = srem i16 %arg, 4
-  %cmp = icmp ne i16 %srem, 0
-  %conv = zext i1 %cmp to i16
-  ret i16 %conv 
-}
-
-; CHECK-LABEL: test_signext_b
-; CHECK: ldrb [[LDR:r[0-9]+]], [r0]
-; CHECK: uxtab [[UXT:r[0-9]+]], [[LDR]], r1
-; CHECK: cm{{.*}} [[UXT]], #128
-define i32 @test_signext_b(i8* %ptr, i8 signext %arg) {
-entry:
-  %0 = load i8, i8* %ptr, align 1
-  %1 = add nuw nsw i8 %0, %arg
-  %cmp = icmp ult i8 %1, 128
-  %res = select i1 %cmp, i32 42, i32 20894
-  ret i32 %res
-}
-
-; CHECK-LABEL: test_signext_b_ult_slt
-; CHECK: ldrb [[LDR:r[0-9]+]], [r0]
-; CHECK: uxtab [[ADD:r[0-9]+]], [[LDR]], r1
-; CHECK: uxtb [[UXT:r[0-9]+]], r1
-; CHECK: cmp [[ADD]], [[UXT]]
-; CHECK: uxtb [[TRUNC:r[0-9]+]], [[ADD]]
-; CHECK: cmp [[TRUNC]], #127
-define i32 @test_signext_b_ult_slt(i8* %ptr, i8 signext %arg) {
-entry:
-  %0 = load i8, i8* %ptr, align 1
-  %1 = add nuw nsw i8 %0, %arg
-  %cmp = icmp sle i8 %1, 126
-  %cmp.1 = icmp ule i8 %1, %arg
-  %or = and i1 %cmp, %cmp.1
-  %res = select i1 %or, i32 42, i32 57
-  ret i32 %res
-}
-
-; CHECK-LABEL: test_signext_h
-; CHECK: ldrh [[LDR:r[0-9]+]], [r0]
-; CHECK: uxtah [[ADD:r[0-9]+]], [[LDR]], r1
-; CHECK: cm{{.*}} [[ADD]],
-define i32 @test_signext_h(i16* %ptr, i16 signext %arg) {
-entry:
-  %0 = load i16, i16* %ptr, align 1
-  %1 = add nuw nsw i16 %0, %arg
-  %cmp = icmp ult i16 %1, 32768
-  %res = select i1 %cmp, i32 42, i32 20894
-  ret i32 %res
-}
-
--- a/llvm/test/CodeGen/ARM/CGP/arm-cgp-switch.ll
+++ b/llvm/test/CodeGen/ARM/CGP/arm-cgp-switch.ll
@ -1,168 +0,0 @@
-; RUN: llc -mtriple=thumbv7em %s -arm-disable-cgp=false -o - | FileCheck %s
-; RUN: llc -mtriple=thumbv7-linux-android %s -arm-disable-cgp=false -o - | FileCheck %s
-
-; CHECK-LABEL: truncate_source_phi_switch
-; CHECK: ldrb
-; CHECK: uxtb
-define void @truncate_source_phi_switch(i8* %memblock, i8* %store, i16 %arg) {
-entry:
-  %pre = load i8, i8* %memblock, align 1
-  %conv = trunc i16 %arg to i8
-  br label %header
-
-header:
-  %phi.0 = phi i8 [ %pre, %entry ], [ %count, %latch ]
-  %phi.1 = phi i8 [ %conv, %entry ], [ %phi.3, %latch ]
-  %phi.2 = phi i8 [ 0, %entry], [ %count, %latch ]
-  switch i8 %phi.0, label %default [
-    i8 43, label %for.inc.i
-    i8 45, label %for.inc.i.i
-  ]
-
-for.inc.i:
-  %xor = xor i8 %phi.1, 1
-  br label %latch
-
-for.inc.i.i:
-  %and = and i8 %phi.1, 3
-  br label %latch
-
-default:
-  %sub = sub i8 %phi.0, 1
-  %cmp2 = icmp ugt i8 %sub, 4
-  br i1 %cmp2, label %latch, label %exit
-
-latch:
-  %phi.3 = phi i8 [ %xor, %for.inc.i ], [ %and, %for.inc.i.i ], [ %phi.2, %default ]
-  %count = add nuw i8 %phi.2, 1
-  store i8 %count, i8* %store, align 1
-  br label %header
-
-exit:
-  ret void
-}
-
-; CHECK-LABEL: icmp_switch_source:
-; CHECK-NOT: uxt
-define i16 @icmp_switch_source(i16 zeroext %arg) {
-entry:
-  %conv = add nuw i16 %arg, 15
-  %mul = mul nuw nsw i16 %conv, 3
-  switch i16 %arg, label %default [
-    i16 0, label %sw.bb
-    i16 1, label %sw.bb.i
-  ]
-
-sw.bb:
-  %cmp0 = icmp ult i16 %mul, 127
-  %select = select i1 %cmp0, i16 %mul, i16 127
-  br label %exit
-
-sw.bb.i:
-  %cmp1 = icmp ugt i16 %mul, 34
-  %select.i = select i1 %cmp1, i16 %mul, i16 34
-  br label %exit
-
-default:
-  br label %exit
-
-exit:
-  %res = phi i16 [ %select, %sw.bb ], [ %select.i, %sw.bb.i ], [ %mul, %default ]
-  ret i16 %res
-}
-
-; CHECK-LABEL: icmp_switch_narrow_source:
-; CHECK-NOT: uxt
-define i16 @icmp_switch_narrow_source(i8 zeroext %arg) {
-entry:
-  %conv = zext i8 %arg to i16
-  %add = add nuw i16 %conv, 15
-  %mul = mul nuw nsw i16 %add, 3
-  switch i8 %arg, label %default [
-    i8 0, label %sw.bb
-    i8 1, label %sw.bb.i
-  ]
-
-sw.bb:
-  %cmp0 = icmp ult i16 %mul, 127
-  %select = select i1 %cmp0, i16 %mul, i16 127
-  br label %exit
-
-sw.bb.i:
-  %cmp1 = icmp ugt i16 %mul, 34
-  %select.i = select i1 %cmp1, i16 %mul, i16 34
-  br label %exit
-
-default:
-  br label %exit
-
-exit:
-  %res = phi i16 [ %select, %sw.bb ], [ %select.i, %sw.bb.i ], [ %mul, %default ]
-  ret i16 %res
-}
-
-; CHECK-LABEL: icmp_switch_trunc:
-; CHECK-NOT: uxt
-define i16 @icmp_switch_trunc(i16 zeroext %arg) {
-entry:
-  %conv = add nuw i16 %arg, 15
-  %mul = mul nuw nsw i16 %conv, 3
-  %trunc = trunc i16 %arg to i3
-  switch i3 %trunc, label %default [
-    i3 0, label %sw.bb
-    i3 1, label %sw.bb.i
-  ]
-
-sw.bb:
-  %cmp0 = icmp ult i16 %mul, 127
-  %select = select i1 %cmp0, i16 %mul, i16 127
-  br label %exit
-
-sw.bb.i:
-  %cmp1 = icmp ugt i16 %mul, 34
-  %select.i = select i1 %cmp1, i16 %mul, i16 34
-  br label %exit
-
-default:
-  br label %exit
-
-exit:
-  %res = phi i16 [ %select, %sw.bb ], [ %select.i, %sw.bb.i ], [ %mul, %default ]
-  ret i16 %res
-}
-
-%class.ae = type { i8 }
-%class.x = type { i8 }
-%class.v = type { %class.q }
-%class.q = type { i16 }
-declare %class.x* @_ZNK2ae2afEv(%class.ae*) local_unnamed_addr
-declare %class.v* @_ZN1x2acEv(%class.x*) local_unnamed_addr
-
-; CHECK-LABEL: trunc_i16_i9_switch
-; CHECK-NOT: uxt
-define i32 @trunc_i16_i9_switch(%class.ae* %this) {
-entry:
-  %call = tail call %class.x* @_ZNK2ae2afEv(%class.ae* %this)
-  %call2 = tail call %class.v* @_ZN1x2acEv(%class.x* %call)
-  %0 = getelementptr inbounds %class.v, %class.v* %call2, i32 0, i32 0, i32 0
-  %1 = load i16, i16* %0, align 2
-  %2 = trunc i16 %1 to i9
-  %trunc = and i9 %2, -64
-  switch i9 %trunc, label %cleanup.fold.split [
-    i9 0, label %cleanup
-    i9 -256, label %if.then7
-  ]
-
-if.then7:
-  %3 = and i16 %1, 7
-  %tobool = icmp eq i16 %3, 0
-  %cond = select i1 %tobool, i32 2, i32 1
-  br label %cleanup
-
-cleanup.fold.split:
-  br label %cleanup
-
-cleanup:
-  %retval.0 = phi i32 [ %cond, %if.then7 ], [ 0, %entry ], [ 2, %cleanup.fold.split ]
-  ret i32 %retval.0
-}
--- a/llvm/test/CodeGen/ARM/CGP/clear-structures.ll
+++ b/llvm/test/CodeGen/ARM/CGP/clear-structures.ll
@ -1,75 +0,0 @@
-; RUN: opt -arm-codegenprepare -arm-disable-cgp=false -mtriple=armv8 -verify %s -S -o - | FileCheck %s
-
-; CHECK: clear_structures
-define i32 @clear_structures(i8* nocapture readonly %fmt, [1 x i32] %ap.coerce, i8* %out, void (i32, i8*)* nocapture %write) {
-entry:
-  br label %while.cond.outer
-
-while.cond.outer:
-  %fmt.addr.0.ph = phi i8* [ %fmt, %entry ], [ %fmt.addr.3, %while.cond.outer.backedge ]
-  %0 = load i8, i8* %fmt.addr.0.ph, align 1
-  br label %while.cond
-
-while.cond:
-  switch i8 %0, label %while.cond [
-    i8 0, label %while.end48
-    i8 37, label %while.cond2
-  ]
-
-while.cond2:
-  %flags.0 = phi i32 [ %or, %while.cond2 ], [ 0, %while.cond ]
-  %fmt.addr.0.pn = phi i8* [ %fmt.addr.1, %while.cond2 ], [ %fmt.addr.0.ph, %while.cond ]
-  %fmt.addr.1 = getelementptr inbounds i8, i8* %fmt.addr.0.pn, i32 1
-  %1 = load i8, i8* %fmt.addr.1, align 1
-  ; CHECK: add i8 [[LOAD:%[^ ]+]], -32
-  %sub = add i8 %1, -32
-  %conv6 = zext i8 %sub to i32
-  %shl = shl i32 1, %conv6
-  %and = and i32 %shl, 75785
-  %tobool7 = icmp eq i32 %and, 0
-  %or = or i32 %shl, %flags.0
-  br i1 %tobool7, label %while.cond10.preheader, label %while.cond2
-
-while.cond10.preheader:
-  ; CHECK: [[ADD:%[^ ]+]] = add i8 [[LOAD]], -48
-  ; CHECK: icmp ult i8 [[ADD]], 10
-  %.off = add i8 %1, -48
-  %2 = icmp ult i8 %.off, 10
-  br i1 %2, label %while.cond10, label %while.end18.split
-
-while.cond10:
-  br label %while.cond10
-
-while.end18.split:
-  %cmp20 = icmp eq i8 %1, 46
-  br i1 %cmp20, label %if.then22, label %cond.end
-
-if.then22:
-  %incdec.ptr23 = getelementptr inbounds i8, i8* %fmt.addr.0.pn, i32 2
-  %.pr74 = load i8, i8* %incdec.ptr23, align 1
-  ; CHECK: [[LOAD2:[^ ]+]] = load i8, i8*
-  ; CHECK: [[ZEXT:[^ ]+]] = zext i8 [[LOAD2]] to i32
-  ; CHECK: sub i32 [[ZEXT]], 48
-  %.pr74.off = add i8 %.pr74, -48
-  %3 = icmp ult i8 %.pr74.off, 10
-  br i1 %3, label %while.cond24, label %cond.end
-
-while.cond24:
-  br label %while.cond24
-
-cond.end:
-  %fmt.addr.3 = phi i8* [ %fmt.addr.1, %while.end18.split ], [ %incdec.ptr23, %if.then22 ]
-  %and39 = and i32 %flags.0, 2048
-  %tobool40 = icmp eq i32 %and39, 0
-  br i1 %tobool40, label %while.cond.outer.backedge, label %if.then43
-
-while.cond.outer.backedge:
-  br label %while.cond.outer
-
-if.then43:
-  tail call void %write(i32 43, i8* %out) #1
-  br label %while.cond.outer.backedge
-
-while.end48:
-  ret i32 undef
-}
--- a/llvm/test/CodeGen/ARM/O3-pipeline.ll
+++ b/llvm/test/CodeGen/ARM/O3-pipeline.ll
@ -40,7 +40,7 @@
 ; CHECK-NEXT:      Function Alias Analysis Results
 ; CHECK-NEXT:      Transform functions to use DSP intrinsics
 ; CHECK-NEXT:      Interleaved Access Pass
-; CHECK-NEXT:      ARM IR optimizations
+; CHECK-NEXT:      IR Type Promotion
 ; CHECK-NEXT:      Dominator Tree Construction
 ; CHECK-NEXT:      Natural Loop Information
 ; CHECK-NEXT:      CodeGen Prepare
--- a/llvm/test/Transforms/TypePromotion/ARM/calls.ll
+++ b/llvm/test/Transforms/TypePromotion/ARM/calls.ll
@ -0,0 +1,342 @@
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
+; RUN: opt -mtriple=arm -type-promotion -verify -disable-type-promotion=false -S %s -o - | FileCheck %s
+
+define i8 @call_with_imms(i8* %arg) {
+; CHECK-LABEL: @call_with_imms(
+; CHECK-NEXT:    [[CALL:%.*]] = tail call arm_aapcs_vfpcc zeroext i8 @dummy2(i8* nonnull [[ARG:%.*]], i8 zeroext 0, i8 zeroext 0)
+; CHECK-NEXT:    [[CMP:%.*]] = icmp eq i8 [[CALL]], 0
+; CHECK-NEXT:    [[RES:%.*]] = select i1 [[CMP]], i8 [[CALL]], i8 1
+; CHECK-NEXT:    ret i8 [[RES]]
+;
+  %call = tail call arm_aapcs_vfpcc zeroext i8 @dummy2(i8* nonnull %arg, i8 zeroext 0, i8 zeroext 0)
+  %cmp = icmp eq i8 %call, 0
+  %res = select i1 %cmp, i8 %call, i8 1
+  ret i8 %res
+}
+
+define i16 @test_call(i8 zeroext %arg) {
+; CHECK-LABEL: @test_call(
+; CHECK-NEXT:    [[CALL:%.*]] = call i8 @dummy_i8(i8 [[ARG:%.*]])
+; CHECK-NEXT:    [[CMP:%.*]] = icmp ult i8 [[CALL]], -128
+; CHECK-NEXT:    [[CONV:%.*]] = zext i1 [[CMP]] to i16
+; CHECK-NEXT:    ret i16 [[CONV]]
+;
+  %call = call i8 @dummy_i8(i8 %arg)
+  %cmp = icmp ult i8 %call, 128
+  %conv = zext i1 %cmp to i16
+  ret i16 %conv
+}
+
+define i16 @promote_i8_sink_i16_1(i8 zeroext %arg0, i16 zeroext %arg1, i16 zeroext %arg2) {
+; CHECK-LABEL: @promote_i8_sink_i16_1(
+; CHECK-NEXT:    [[TMP1:%.*]] = zext i16 [[ARG2:%.*]] to i32
+; CHECK-NEXT:    [[TMP2:%.*]] = zext i16 [[ARG1:%.*]] to i32
+; CHECK-NEXT:    [[CALL:%.*]] = tail call zeroext i8 @dummy_i8(i8 [[ARG0:%.*]])
+; CHECK-NEXT:    [[TMP3:%.*]] = zext i8 [[CALL]] to i32
+; CHECK-NEXT:    [[ADD:%.*]] = add nuw i32 [[TMP3]], 1
+; CHECK-NEXT:    [[CMP:%.*]] = icmp ne i32 [[ADD]], [[TMP2]]
+; CHECK-NEXT:    [[SEL:%.*]] = select i1 [[CMP]], i32 [[TMP2]], i32 [[TMP1]]
+; CHECK-NEXT:    [[TMP4:%.*]] = trunc i32 [[SEL]] to i16
+; CHECK-NEXT:    [[RES:%.*]] = tail call zeroext i16 @dummy3(i16 [[TMP4]])
+; CHECK-NEXT:    [[TMP5:%.*]] = zext i16 [[RES]] to i32
+; CHECK-NEXT:    [[TMP6:%.*]] = trunc i32 [[TMP5]] to i16
+; CHECK-NEXT:    ret i16 [[TMP6]]
+;
+  %call = tail call zeroext i8 @dummy_i8(i8 %arg0)
+  %add = add nuw i8 %call, 1
+  %conv = zext i8 %add to i16
+  %cmp = icmp ne i16 %conv, %arg1
+  %sel = select i1 %cmp, i16 %arg1, i16 %arg2
+  %res = tail call zeroext i16 @dummy3(i16 %sel)
+  ret i16 %res
+}
+
+define i16 @promote_i8_sink_i16_2(i8 zeroext %arg0, i8 zeroext %arg1, i16 zeroext %arg2) {
+; CHECK-LABEL: @promote_i8_sink_i16_2(
+; CHECK-NEXT:    [[TMP1:%.*]] = zext i8 [[ARG1:%.*]] to i32
+; CHECK-NEXT:    [[CALL:%.*]] = tail call zeroext i8 @dummy_i8(i8 [[ARG0:%.*]])
+; CHECK-NEXT:    [[TMP2:%.*]] = zext i8 [[CALL]] to i32
+; CHECK-NEXT:    [[ADD:%.*]] = add nuw i32 [[TMP2]], 1
+; CHECK-NEXT:    [[CMP:%.*]] = icmp ne i32 [[ADD]], [[TMP1]]
+; CHECK-NEXT:    [[TMP3:%.*]] = trunc i32 [[TMP1]] to i8
+; CHECK-NEXT:    [[CONV:%.*]] = zext i8 [[TMP3]] to i16
+; CHECK-NEXT:    [[SEL:%.*]] = select i1 [[CMP]], i16 [[CONV]], i16 [[ARG2:%.*]]
+; CHECK-NEXT:    [[RES:%.*]] = tail call zeroext i16 @dummy3(i16 [[SEL]])
+; CHECK-NEXT:    ret i16 [[RES]]
+;
+  %call = tail call zeroext i8 @dummy_i8(i8 %arg0)
+  %add = add nuw i8 %call, 1
+  %cmp = icmp ne i8 %add, %arg1
+  %conv = zext i8 %arg1 to i16
+  %sel = select i1 %cmp, i16 %conv, i16 %arg2
+  %res = tail call zeroext i16 @dummy3(i16 %sel)
+  ret i16 %res
+}
+
+@uc = global i8 42, align 1
+@LL = global i64 0, align 8
+
+define void @zext_i64() {
+; CHECK-LABEL: @zext_i64(
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    [[TMP0:%.*]] = load i8, i8* @uc, align 1
+; CHECK-NEXT:    [[CONV:%.*]] = zext i8 [[TMP0]] to i64
+; CHECK-NEXT:    store i64 [[CONV]], i64* @LL, align 8
+; CHECK-NEXT:    [[CMP:%.*]] = icmp eq i8 [[TMP0]], 42
+; CHECK-NEXT:    [[CONV1:%.*]] = zext i1 [[CMP]] to i32
+; CHECK-NEXT:    [[CALL:%.*]] = tail call i32 bitcast (i32 (...)* @assert to i32 (i32)*)(i32 [[CONV1]])
+; CHECK-NEXT:    ret void
+;
+entry:
+  %0 = load i8, i8* @uc, align 1
+  %conv = zext i8 %0 to i64
+  store i64 %conv, i64* @LL, align 8
+  %cmp = icmp eq i8 %0, 42
+  %conv1 = zext i1 %cmp to i32
+  %call = tail call i32 bitcast (i32 (...)* @assert to i32 (i32)*)(i32 %conv1)
+  ret void
+}
+
+@a = global i16* null, align 4
+@b = global i32 0, align 4
+
+define i32 @constexpr() {
+; CHECK-LABEL: @constexpr(
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    store i32 ptrtoint (i32* @b to i32), i32* @b, align 4
+; CHECK-NEXT:    [[TMP0:%.*]] = load i16*, i16** @a, align 4
+; CHECK-NEXT:    [[TMP1:%.*]] = load i16, i16* [[TMP0]], align 2
+; CHECK-NEXT:    [[OR:%.*]] = or i16 [[TMP1]], ptrtoint (i32* @b to i16)
+; CHECK-NEXT:    store i16 [[OR]], i16* [[TMP0]], align 2
+; CHECK-NEXT:    [[CMP:%.*]] = icmp ne i16 [[OR]], 4
+; CHECK-NEXT:    [[CONV3:%.*]] = zext i1 [[CMP]] to i32
+; CHECK-NEXT:    [[CALL:%.*]] = tail call i32 bitcast (i32 (...)* @e to i32 (i32)*)(i32 [[CONV3]])
+; CHECK-NEXT:    ret i32 undef
+;
+entry:
+  store i32 ptrtoint (i32* @b to i32), i32* @b, align 4
+  %0 = load i16*, i16** @a, align 4
+  %1 = load i16, i16* %0, align 2
+  %or = or i16 %1, ptrtoint (i32* @b to i16)
+  store i16 %or, i16* %0, align 2
+  %cmp = icmp ne i16 %or, 4
+  %conv3 = zext i1 %cmp to i32
+  %call = tail call i32 bitcast (i32 (...)* @e to i32 (i32)*)(i32 %conv3) #2
+  ret i32 undef
+}
+
+define fastcc i32 @call_zext_i8_i32(i32 %p_45, i8 zeroext %p_46) {
+; CHECK-LABEL: @call_zext_i8_i32(
+; CHECK-NEXT:  for.cond8.preheader:
+; CHECK-NEXT:    [[CALL217:%.*]] = call fastcc zeroext i8 @safe_mul_func_uint8_t_u_u(i8 zeroext undef)
+; CHECK-NEXT:    [[TOBOOL219:%.*]] = icmp eq i8 [[CALL217]], 0
+; CHECK-NEXT:    br i1 [[TOBOOL219]], label [[FOR_END411:%.*]], label [[FOR_COND273_PREHEADER:%.*]]
+; CHECK:       for.cond273.preheader:
+; CHECK-NEXT:    [[CALL217_LCSSA:%.*]] = phi i8 [ [[CALL217]], [[FOR_COND8_PREHEADER:%.*]] ]
+; CHECK-NEXT:    [[CONV218_LE:%.*]] = zext i8 [[CALL217_LCSSA]] to i32
+; CHECK-NEXT:    [[CALL346:%.*]] = call fastcc zeroext i8 @safe_lshift_func(i8 zeroext [[CALL217_LCSSA]], i32 [[CONV218_LE]])
+; CHECK-NEXT:    unreachable
+; CHECK:       for.end411:
+; CHECK-NEXT:    [[CALL452:%.*]] = call fastcc i64 @safe_sub_func_int64_t_s_s(i64 undef, i64 4)
+; CHECK-NEXT:    unreachable
+;
+for.cond8.preheader:
+  %call217 = call fastcc zeroext i8 @safe_mul_func_uint8_t_u_u(i8 zeroext undef)
+  %tobool219 = icmp eq i8 %call217, 0
+  br i1 %tobool219, label %for.end411, label %for.cond273.preheader
+
+for.cond273.preheader:                            ; preds = %for.cond8.preheader
+  %call217.lcssa = phi i8 [ %call217, %for.cond8.preheader ]
+  %conv218.le = zext i8 %call217.lcssa to i32
+  %call346 = call fastcc zeroext i8 @safe_lshift_func(i8 zeroext %call217.lcssa, i32 %conv218.le)
+  unreachable
+
+for.end411:                                       ; preds = %for.cond8.preheader
+  %call452 = call fastcc i64 @safe_sub_func_int64_t_s_s(i64 undef, i64 4)
+  unreachable
+}
+
+%struct.anon = type { i32 }
+
+@g_57 = hidden local_unnamed_addr global %struct.anon zeroinitializer, align 4
+@g_893 = hidden local_unnamed_addr global %struct.anon zeroinitializer, align 4
+@g_82 = hidden local_unnamed_addr global i32 0, align 4
+
+define hidden i32 @call_return_pointer(i8 zeroext %p_13) local_unnamed_addr #0 {
+; CHECK-LABEL: @call_return_pointer(
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    [[TMP0:%.*]] = zext i8 [[P_13:%.*]] to i32
+; CHECK-NEXT:    [[TMP1:%.*]] = trunc i32 [[TMP0]] to i8
+; CHECK-NEXT:    [[CONV1:%.*]] = zext i8 [[TMP1]] to i16
+; CHECK-NEXT:    [[CALL:%.*]] = tail call i16** @func_62(i8 zeroext undef, i32 undef, i16 signext [[CONV1]], i32* undef)
+; CHECK-NEXT:    [[TMP2:%.*]] = load i32, i32* getelementptr inbounds (%struct.anon, %struct.anon* @g_893, i32 0, i32 0), align 4
+; CHECK-NEXT:    [[CONV2:%.*]] = trunc i32 [[TMP2]] to i16
+; CHECK-NEXT:    br label [[FOR_COND:%.*]]
+; CHECK:       for.cond:
+; CHECK-NEXT:    [[P_13_ADDR_0:%.*]] = phi i32 [ [[TMP0]], [[ENTRY:%.*]] ], [ [[P_13_ADDR_0_BE:%.*]], [[FOR_COND_BACKEDGE:%.*]] ]
+; CHECK-NEXT:    [[TOBOOL:%.*]] = icmp eq i32 [[P_13_ADDR_0]], 0
+; CHECK-NEXT:    br i1 [[TOBOOL]], label [[FOR_COND_BACKEDGE]], label [[IF_THEN:%.*]]
+; CHECK:       for.cond.backedge:
+; CHECK-NEXT:    [[P_13_ADDR_0_BE]] = phi i32 [ [[TMP3:%.*]], [[IF_THEN]] ], [ 0, [[FOR_COND]] ]
+; CHECK-NEXT:    br label [[FOR_COND]]
+; CHECK:       if.then:
+; CHECK-NEXT:    [[CALL3:%.*]] = tail call fastcc signext i16 @safe_sub_func_int16_t_s_s(i16 signext [[CONV2]])
+; CHECK-NEXT:    [[CONV4:%.*]] = trunc i16 [[CALL3]] to i8
+; CHECK-NEXT:    [[TMP3]] = zext i8 [[CONV4]] to i32
+; CHECK-NEXT:    br label [[FOR_COND_BACKEDGE]]
+;
+entry:
+  %conv1 = zext i8 %p_13 to i16
+  %call = tail call i16** @func_62(i8 zeroext undef, i32 undef, i16 signext %conv1, i32* undef)
+  %0 = load i32, i32* getelementptr inbounds (%struct.anon, %struct.anon* @g_893, i32 0, i32 0), align 4
+  %conv2 = trunc i32 %0 to i16
+  br label %for.cond
+
+for.cond:                                         ; preds = %for.cond.backedge, %entry
+  %p_13.addr.0 = phi i8 [ %p_13, %entry ], [ %p_13.addr.0.be, %for.cond.backedge ]
+  %tobool = icmp eq i8 %p_13.addr.0, 0
+  br i1 %tobool, label %for.cond.backedge, label %if.then
+
+for.cond.backedge:                                ; preds = %for.cond, %if.then
+  %p_13.addr.0.be = phi i8 [ %conv4, %if.then ], [ 0, %for.cond ]
+  br label %for.cond
+
+if.then:                                          ; preds = %for.cond
+  %call3 = tail call fastcc signext i16 @safe_sub_func_int16_t_s_s(i16 signext %conv2)
+  %conv4 = trunc i16 %call3 to i8
+  br label %for.cond.backedge
+}
+
+define i32 @check_zext_phi_call_arg() {
+; CHECK-LABEL: @check_zext_phi_call_arg(
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    br label [[FOR_COND:%.*]]
+; CHECK:       for.cond:
+; CHECK-NEXT:    [[D_SROA_0_0:%.*]] = phi i32 [ 30, [[ENTRY:%.*]] ], [ [[D_SROA_0_0_BE:%.*]], [[FOR_COND_BACKEDGE:%.*]] ]
+; CHECK-NEXT:    [[TOBOOL:%.*]] = icmp eq i32 [[D_SROA_0_0]], 0
+; CHECK-NEXT:    br i1 [[TOBOOL]], label [[FOR_COND_BACKEDGE]], label [[IF_THEN:%.*]]
+; CHECK:       for.cond.backedge:
+; CHECK-NEXT:    [[D_SROA_0_0_BE]] = phi i32 [ [[TMP1:%.*]], [[IF_THEN]] ], [ 0, [[FOR_COND]] ]
+; CHECK-NEXT:    br label [[FOR_COND]]
+; CHECK:       if.then:
+; CHECK-NEXT:    [[TMP0:%.*]] = trunc i32 [[D_SROA_0_0]] to i16
+; CHECK-NEXT:    [[CALL:%.*]] = tail call zeroext i16 bitcast (i16 (...)* @f to i16 (i32)*)(i32 [[D_SROA_0_0]])
+; CHECK-NEXT:    [[TMP1]] = zext i16 [[CALL]] to i32
+; CHECK-NEXT:    br label [[FOR_COND_BACKEDGE]]
+;
+entry:
+  br label %for.cond
+
+for.cond:                                         ; preds = %for.cond.backedge, %entry
+  %d.sroa.0.0 = phi i16 [ 30, %entry ], [ %d.sroa.0.0.be, %for.cond.backedge ]
+  %tobool = icmp eq i16 %d.sroa.0.0, 0
+  br i1 %tobool, label %for.cond.backedge, label %if.then
+
+for.cond.backedge:                                ; preds = %for.cond, %if.then
+  %d.sroa.0.0.be = phi i16 [ %call, %if.then ], [ 0, %for.cond ]
+  br label %for.cond
+
+if.then:                                          ; preds = %for.cond
+  %d.sroa.0.0.insert.ext = zext i16 %d.sroa.0.0 to i32
+  %call = tail call zeroext i16 bitcast (i16 (...)* @f to i16 (i32)*)(i32 %d.sroa.0.0.insert.ext) #2
+  br label %for.cond.backedge
+}
+
+%struct.atomic_flag = type { i8 }
+
+define zeroext i1 @atomic_flag_test_and_set(%struct.atomic_flag* %object) {
+; CHECK-LABEL: @atomic_flag_test_and_set(
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    [[_VALUE:%.*]] = getelementptr inbounds [[STRUCT_ATOMIC_FLAG:%.*]], %struct.atomic_flag* [[OBJECT:%.*]], i32 0, i32 0
+; CHECK-NEXT:    [[CALL:%.*]] = tail call arm_aapcscc zeroext i8 @__atomic_exchange_1(i8* [[_VALUE]], i8 zeroext 1, i32 5)
+; CHECK-NEXT:    [[TMP0:%.*]] = zext i8 [[CALL]] to i32
+; CHECK-NEXT:    [[TMP1:%.*]] = and i32 [[TMP0]], 1
+; CHECK-NEXT:    [[TOBOOL:%.*]] = icmp ne i32 [[TMP1]], 0
+; CHECK-NEXT:    ret i1 [[TOBOOL]]
+;
+entry:
+  %_Value = getelementptr inbounds %struct.atomic_flag, %struct.atomic_flag* %object, i32 0, i32 0
+  %call = tail call arm_aapcscc zeroext i8 @__atomic_exchange_1(i8* %_Value, i8 zeroext 1, i32 5) #1
+  %0 = and i8 %call, 1
+  %tobool = icmp ne i8 %0, 0
+  ret i1 %tobool
+}
+
+define i1 @i1_zeroext_call(i16* %ts, i32 %a, i16* %b, i8* %c) {
+; CHECK-LABEL: @i1_zeroext_call(
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    [[TMP0:%.*]] = load i16, i16* [[TS:%.*]], align 2
+; CHECK-NEXT:    [[CONV_I860:%.*]] = trunc i32 [[A:%.*]] to i16
+; CHECK-NEXT:    store i16 [[CONV_I860]], i16* [[B:%.*]], align 2
+; CHECK-NEXT:    [[CALL_I848:%.*]] = call zeroext i1 @i1_zeroext(i8* [[C:%.*]], i32 64, i16 zeroext [[CONV_I860]])
+; CHECK-NEXT:    br i1 [[CALL_I848]], label [[IF_THEN223:%.*]], label [[IF_ELSE227:%.*]]
+; CHECK:       if.then223:
+; CHECK-NEXT:    [[CMP235:%.*]] = icmp eq i16 [[TMP0]], [[CONV_I860]]
+; CHECK-NEXT:    br label [[EXIT:%.*]]
+; CHECK:       if.else227:
+; CHECK-NEXT:    [[CMP236:%.*]] = icmp ult i16 [[TMP0]], [[CONV_I860]]
+; CHECK-NEXT:    br label [[EXIT]]
+; CHECK:       exit:
+; CHECK-NEXT:    [[RETVAL:%.*]] = phi i1 [ [[CMP235]], [[IF_THEN223]] ], [ [[CMP236]], [[IF_ELSE227]] ]
+; CHECK-NEXT:    ret i1 [[RETVAL]]
+;
+entry:
+  %0 = load i16, i16* %ts, align 2
+  %conv.i860 = trunc i32 %a to i16
+  store i16 %conv.i860, i16* %b, align 2
+  %call.i848 = call zeroext i1 @i1_zeroext(i8* %c, i32 64, i16 zeroext %conv.i860)
+  br i1 %call.i848, label %if.then223, label %if.else227
+
+if.then223:
+  %cmp235 = icmp eq i16 %0, %conv.i860
+  br label %exit
+
+if.else227:
+  %cmp236 = icmp ult i16 %0, %conv.i860
+  br label %exit
+
+exit:
+  %retval = phi i1 [ %cmp235, %if.then223 ], [ %cmp236, %if.else227 ]
+  ret i1 %retval
+}
+
+define i16 @promote_arg_pass_to_call(i16 zeroext %arg1, i16 zeroext %arg2) {
+; CHECK-LABEL: @promote_arg_pass_to_call(
+; CHECK-NEXT:    [[TMP1:%.*]] = zext i16 [[ARG1:%.*]] to i32
+; CHECK-NEXT:    [[TMP2:%.*]] = zext i16 [[ARG2:%.*]] to i32
+; CHECK-NEXT:    [[CONV:%.*]] = add nuw i32 [[TMP1]], 15
+; CHECK-NEXT:    [[MUL:%.*]] = mul nuw nsw i32 [[CONV]], 3
+; CHECK-NEXT:    [[CMP:%.*]] = icmp ult i32 [[MUL]], [[TMP2]]
+; CHECK-NEXT:    [[TMP3:%.*]] = and i32 [[TMP1]], 255
+; CHECK-NEXT:    [[TMP4:%.*]] = trunc i32 [[TMP3]] to i8
+; CHECK-NEXT:    [[TMP5:%.*]] = trunc i32 [[TMP1]] to i16
+; CHECK-NEXT:    [[RES:%.*]] = call zeroext i16 @dummy4(i1 [[CMP]], i8 [[TMP4]], i16 [[TMP5]])
+; CHECK-NEXT:    [[TMP6:%.*]] = zext i16 [[RES]] to i32
+; CHECK-NEXT:    [[TMP7:%.*]] = trunc i32 [[TMP6]] to i16
+; CHECK-NEXT:    ret i16 [[TMP7]]
+;
+  %conv = add nuw i16 %arg1, 15
+  %mul = mul nuw nsw i16 %conv, 3
+  %cmp = icmp ult i16 %mul, %arg2
+  %trunc = trunc i16 %arg1 to i8
+  %res = call zeroext i16 @dummy4(i1 %cmp, i8 %trunc, i16 %arg1)
+  ret i16 %res
+}
+
+
+declare i32 @assert(...)
+declare i8 @dummy_i8(i8)
+declare i8 @dummy2(i8*, i8, i8)
+declare i16 @dummy3(i16)
+declare i16 @dummy4(i1, i8, i16)
+
+declare dso_local i32 @e(...) local_unnamed_addr #1
+declare dso_local zeroext i16 @f(...) local_unnamed_addr #1
+declare dso_local arm_aapcscc i8 @__atomic_exchange_1(i8*, i8, i32) local_unnamed_addr
+
+declare noalias i16** @func_62(i8 zeroext %p_63, i32 %p_64, i16 signext %p_65, i32* nocapture readnone %p_66)
+declare fastcc signext i16 @safe_sub_func_int16_t_s_s(i16 signext %si2)
+declare dso_local fastcc i64 @safe_sub_func_int64_t_s_s(i64, i64)
+declare dso_local fastcc zeroext i8 @safe_lshift_func(i8 zeroext, i32)
+declare dso_local fastcc zeroext i8 @safe_mul_func_uint8_t_u_u(i8 returned zeroext)
+declare i1 @i1_zeroext(i8*, i32, i16 zeroext)
--- a/llvm/test/Transforms/TypePromotion/ARM/casts.ll
+++ b/llvm/test/Transforms/TypePromotion/ARM/casts.ll
--- a/llvm/test/Transforms/TypePromotion/ARM/clear-structures.ll
+++ b/llvm/test/Transforms/TypePromotion/ARM/clear-structures.ll
@ -0,0 +1,124 @@
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
+; RUN: opt -mtriple=arm -type-promotion -verify -disable-type-promotion=false -S %s -o - | FileCheck %s
+
+define i32 @clear_structures(i8* nocapture readonly %fmt, [1 x i32] %ap.coerce, i8* %out, void (i32, i8*)* nocapture %write) {
+; CHECK-LABEL: @clear_structures(
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    br label [[WHILE_COND_OUTER:%.*]]
+; CHECK:       while.cond.outer:
+; CHECK-NEXT:    [[FMT_ADDR_0_PH:%.*]] = phi i8* [ [[FMT:%.*]], [[ENTRY:%.*]] ], [ [[FMT_ADDR_3:%.*]], [[WHILE_COND_OUTER_BACKEDGE:%.*]] ]
+; CHECK-NEXT:    [[TMP0:%.*]] = load i8, i8* [[FMT_ADDR_0_PH]], align 1
+; CHECK-NEXT:    br label [[WHILE_COND:%.*]]
+; CHECK:       while.cond:
+; CHECK-NEXT:    switch i8 [[TMP0]], label [[WHILE_COND]] [
+; CHECK-NEXT:    i8 0, label [[WHILE_END48:%.*]]
+; CHECK-NEXT:    i8 37, label [[WHILE_COND2:%.*]]
+; CHECK-NEXT:    ]
+; CHECK:       while.cond2:
+; CHECK-NEXT:    [[FLAGS_0:%.*]] = phi i32 [ [[OR:%.*]], [[WHILE_COND2]] ], [ 0, [[WHILE_COND]] ]
+; CHECK-NEXT:    [[FMT_ADDR_0_PN:%.*]] = phi i8* [ [[FMT_ADDR_1:%.*]], [[WHILE_COND2]] ], [ [[FMT_ADDR_0_PH]], [[WHILE_COND]] ]
+; CHECK-NEXT:    [[FMT_ADDR_1]] = getelementptr inbounds i8, i8* [[FMT_ADDR_0_PN]], i32 1
+; CHECK-NEXT:    [[TMP1:%.*]] = load i8, i8* [[FMT_ADDR_1]], align 1
+; CHECK-NEXT:    [[SUB:%.*]] = add i8 [[TMP1]], -32
+; CHECK-NEXT:    [[CONV6:%.*]] = zext i8 [[SUB]] to i32
+; CHECK-NEXT:    [[SHL:%.*]] = shl i32 1, [[CONV6]]
+; CHECK-NEXT:    [[AND:%.*]] = and i32 [[SHL]], 75785
+; CHECK-NEXT:    [[TOBOOL7:%.*]] = icmp eq i32 [[AND]], 0
+; CHECK-NEXT:    [[OR]] = or i32 [[SHL]], [[FLAGS_0]]
+; CHECK-NEXT:    br i1 [[TOBOOL7]], label [[WHILE_COND10_PREHEADER:%.*]], label [[WHILE_COND2]]
+; CHECK:       while.cond10.preheader:
+; CHECK-NEXT:    [[DOTOFF:%.*]] = add i8 [[TMP1]], -48
+; CHECK-NEXT:    [[TMP2:%.*]] = icmp ult i8 [[DOTOFF]], 10
+; CHECK-NEXT:    br i1 [[TMP2]], label [[WHILE_COND10:%.*]], label [[WHILE_END18_SPLIT:%.*]]
+; CHECK:       while.cond10:
+; CHECK-NEXT:    br label [[WHILE_COND10]]
+; CHECK:       while.end18.split:
+; CHECK-NEXT:    [[CMP20:%.*]] = icmp eq i8 [[TMP1]], 46
+; CHECK-NEXT:    br i1 [[CMP20]], label [[IF_THEN22:%.*]], label [[COND_END:%.*]]
+; CHECK:       if.then22:
+; CHECK-NEXT:    [[INCDEC_PTR23:%.*]] = getelementptr inbounds i8, i8* [[FMT_ADDR_0_PN]], i32 2
+; CHECK-NEXT:    [[DOTPR74:%.*]] = load i8, i8* [[INCDEC_PTR23]], align 1
+; CHECK-NEXT:    [[TMP3:%.*]] = zext i8 [[DOTPR74]] to i32
+; CHECK-NEXT:    [[TMP4:%.*]] = sub i32 [[TMP3]], 48
+; CHECK-NEXT:    [[TMP5:%.*]] = icmp ult i32 [[TMP4]], 10
+; CHECK-NEXT:    br i1 [[TMP5]], label [[WHILE_COND24:%.*]], label [[COND_END]]
+; CHECK:       while.cond24:
+; CHECK-NEXT:    br label [[WHILE_COND24]]
+; CHECK:       cond.end:
+; CHECK-NEXT:    [[FMT_ADDR_3]] = phi i8* [ [[FMT_ADDR_1]], [[WHILE_END18_SPLIT]] ], [ [[INCDEC_PTR23]], [[IF_THEN22]] ]
+; CHECK-NEXT:    [[AND39:%.*]] = and i32 [[FLAGS_0]], 2048
+; CHECK-NEXT:    [[TOBOOL40:%.*]] = icmp eq i32 [[AND39]], 0
+; CHECK-NEXT:    br i1 [[TOBOOL40]], label [[WHILE_COND_OUTER_BACKEDGE]], label [[IF_THEN43:%.*]]
+; CHECK:       while.cond.outer.backedge:
+; CHECK-NEXT:    br label [[WHILE_COND_OUTER]]
+; CHECK:       if.then43:
+; CHECK-NEXT:    tail call void [[WRITE:%.*]](i32 43, i8* [[OUT:%.*]])
+; CHECK-NEXT:    br label [[WHILE_COND_OUTER_BACKEDGE]]
+; CHECK:       while.end48:
+; CHECK-NEXT:    ret i32 undef
+;
+entry:
+  br label %while.cond.outer
+
+while.cond.outer:
+  %fmt.addr.0.ph = phi i8* [ %fmt, %entry ], [ %fmt.addr.3, %while.cond.outer.backedge ]
+  %0 = load i8, i8* %fmt.addr.0.ph, align 1
+  br label %while.cond
+
+while.cond:
+  switch i8 %0, label %while.cond [
+  i8 0, label %while.end48
+  i8 37, label %while.cond2
+  ]
+
+while.cond2:
+  %flags.0 = phi i32 [ %or, %while.cond2 ], [ 0, %while.cond ]
+  %fmt.addr.0.pn = phi i8* [ %fmt.addr.1, %while.cond2 ], [ %fmt.addr.0.ph, %while.cond ]
+  %fmt.addr.1 = getelementptr inbounds i8, i8* %fmt.addr.0.pn, i32 1
+  %1 = load i8, i8* %fmt.addr.1, align 1
+  %sub = add i8 %1, -32
+  %conv6 = zext i8 %sub to i32
+  %shl = shl i32 1, %conv6
+  %and = and i32 %shl, 75785
+  %tobool7 = icmp eq i32 %and, 0
+  %or = or i32 %shl, %flags.0
+  br i1 %tobool7, label %while.cond10.preheader, label %while.cond2
+
+while.cond10.preheader:
+  %.off = add i8 %1, -48
+  %2 = icmp ult i8 %.off, 10
+  br i1 %2, label %while.cond10, label %while.end18.split
+
+while.cond10:
+  br label %while.cond10
+
+while.end18.split:
+  %cmp20 = icmp eq i8 %1, 46
+  br i1 %cmp20, label %if.then22, label %cond.end
+
+if.then22:
+  %incdec.ptr23 = getelementptr inbounds i8, i8* %fmt.addr.0.pn, i32 2
+  %.pr74 = load i8, i8* %incdec.ptr23, align 1
+  %.pr74.off = add i8 %.pr74, -48
+  %3 = icmp ult i8 %.pr74.off, 10
+  br i1 %3, label %while.cond24, label %cond.end
+
+while.cond24:
+  br label %while.cond24
+
+cond.end:
+  %fmt.addr.3 = phi i8* [ %fmt.addr.1, %while.end18.split ], [ %incdec.ptr23, %if.then22 ]
+  %and39 = and i32 %flags.0, 2048
+  %tobool40 = icmp eq i32 %and39, 0
+  br i1 %tobool40, label %while.cond.outer.backedge, label %if.then43
+
+while.cond.outer.backedge:
+  br label %while.cond.outer
+
+if.then43:
+  tail call void %write(i32 43, i8* %out) #1
+  br label %while.cond.outer.backedge
+
+while.end48:
+  ret i32 undef
+}
--- a/llvm/test/Transforms/TypePromotion/ARM/icmps.ll
+++ b/llvm/test/Transforms/TypePromotion/ARM/icmps.ll
@ -0,0 +1,349 @@
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
+; RUN: opt -mtriple=arm -type-promotion -verify -disable-type-promotion=false -S %s -o - | FileCheck %s
+
+define i32 @test_ult_254_inc_imm(i8 zeroext %x) {
+; CHECK-LABEL: @test_ult_254_inc_imm(
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    [[ADD:%.*]] = add i8 [[X:%.*]], 1
+; CHECK-NEXT:    [[CMP:%.*]] = icmp ult i8 [[ADD]], -2
+; CHECK-NEXT:    [[RES:%.*]] = select i1 [[CMP]], i32 35, i32 47
+; CHECK-NEXT:    ret i32 [[RES]]
+;
+entry:
+  %add = add i8 %x, 1
+  %cmp = icmp ult i8 %add, 254
+  %res = select i1 %cmp, i32 35, i32 47
+  ret i32 %res
+}
+
+define i32 @test_slt_254_inc_imm(i8 signext %x) {
+; CHECK-LABEL: @test_slt_254_inc_imm(
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    [[ADD:%.*]] = add i8 [[X:%.*]], 1
+; CHECK-NEXT:    [[CMP:%.*]] = icmp slt i8 [[ADD]], -2
+; CHECK-NEXT:    [[RES:%.*]] = select i1 [[CMP]], i32 35, i32 47
+; CHECK-NEXT:    ret i32 [[RES]]
+;
+entry:
+  %add = add i8 %x, 1
+  %cmp = icmp slt i8 %add, 254
+  %res = select i1 %cmp, i32 35, i32 47
+  ret i32 %res
+}
+
+define i32 @test_ult_254_inc_var(i8 zeroext %x, i8 zeroext %y) {
+; CHECK-LABEL: @test_ult_254_inc_var(
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    [[ADD:%.*]] = add i8 [[X:%.*]], [[Y:%.*]]
+; CHECK-NEXT:    [[CMP:%.*]] = icmp ult i8 [[ADD]], -2
+; CHECK-NEXT:    [[RES:%.*]] = select i1 [[CMP]], i32 35, i32 47
+; CHECK-NEXT:    ret i32 [[RES]]
+;
+entry:
+  %add = add i8 %x, %y
+  %cmp = icmp ult i8 %add, 254
+  %res = select i1 %cmp, i32 35, i32 47
+  ret i32 %res
+}
+
+define i32 @test_sle_254_inc_var(i8 %x, i8 %y) {
+; CHECK-LABEL: @test_sle_254_inc_var(
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    [[ADD:%.*]] = add i8 [[X:%.*]], [[Y:%.*]]
+; CHECK-NEXT:    [[CMP:%.*]] = icmp sle i8 [[ADD]], -2
+; CHECK-NEXT:    [[RES:%.*]] = select i1 [[CMP]], i32 35, i32 47
+; CHECK-NEXT:    ret i32 [[RES]]
+;
+entry:
+  %add = add i8 %x, %y
+  %cmp = icmp sle i8 %add, 254
+  %res = select i1 %cmp, i32 35, i32 47
+  ret i32 %res
+}
+
+define i32 @test_ugt_1_dec_imm(i8 zeroext %x) {
+; CHECK-LABEL: @test_ugt_1_dec_imm(
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    [[TMP0:%.*]] = zext i8 [[X:%.*]] to i32
+; CHECK-NEXT:    [[TMP1:%.*]] = sub i32 [[TMP0]], 1
+; CHECK-NEXT:    [[CMP:%.*]] = icmp ugt i32 [[TMP1]], 1
+; CHECK-NEXT:    [[RES:%.*]] = select i1 [[CMP]], i32 35, i32 47
+; CHECK-NEXT:    ret i32 [[RES]]
+;
+entry:
+  %add = add i8 %x, -1
+  %cmp = icmp ugt i8 %add, 1
+  %res = select i1 %cmp, i32 35, i32 47
+  ret i32 %res
+}
+
+define i32 @test_sgt_1_dec_imm(i8 %x) {
+; CHECK-LABEL: @test_sgt_1_dec_imm(
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    [[ADD:%.*]] = add i8 [[X:%.*]], -1
+; CHECK-NEXT:    [[CMP:%.*]] = icmp sgt i8 [[ADD]], 1
+; CHECK-NEXT:    [[RES:%.*]] = select i1 [[CMP]], i32 35, i32 47
+; CHECK-NEXT:    ret i32 [[RES]]
+;
+entry:
+  %add = add i8 %x, -1
+  %cmp = icmp sgt i8 %add, 1
+  %res = select i1 %cmp, i32 35, i32 47
+  ret i32 %res
+}
+
+define i32 @test_ugt_1_dec_var(i8 zeroext %x, i8 zeroext %y) {
+; CHECK-LABEL: @test_ugt_1_dec_var(
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    [[SUB:%.*]] = sub i8 [[X:%.*]], [[Y:%.*]]
+; CHECK-NEXT:    [[CMP:%.*]] = icmp ugt i8 [[SUB]], 1
+; CHECK-NEXT:    [[RES:%.*]] = select i1 [[CMP]], i32 35, i32 47
+; CHECK-NEXT:    ret i32 [[RES]]
+;
+entry:
+  %sub = sub i8 %x, %y
+  %cmp = icmp ugt i8 %sub, 1
+  %res = select i1 %cmp, i32 35, i32 47
+  ret i32 %res
+}
+
+define i32 @test_sge_1_dec_var(i8 %x, i8 %y) {
+; CHECK-LABEL: @test_sge_1_dec_var(
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    [[SUB:%.*]] = sub i8 [[X:%.*]], [[Y:%.*]]
+; CHECK-NEXT:    [[CMP:%.*]] = icmp sge i8 [[SUB]], 1
+; CHECK-NEXT:    [[RES:%.*]] = select i1 [[CMP]], i32 35, i32 47
+; CHECK-NEXT:    ret i32 [[RES]]
+;
+entry:
+  %sub = sub i8 %x, %y
+  %cmp = icmp sge i8 %sub, 1
+  %res = select i1 %cmp, i32 35, i32 47
+  ret i32 %res
+}
+
+define i32 @dsp_imm1(i8 zeroext %x, i8 zeroext %y) {
+; CHECK-LABEL: @dsp_imm1(
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    [[XOR:%.*]] = xor i8 [[X:%.*]], [[Y:%.*]]
+; CHECK-NEXT:    [[AND:%.*]] = and i8 [[X]], 7
+; CHECK-NEXT:    [[SUB:%.*]] = sub i8 [[AND]], [[XOR]]
+; CHECK-NEXT:    [[ADD:%.*]] = add i8 [[SUB]], 1
+; CHECK-NEXT:    [[CMP:%.*]] = icmp ult i8 [[ADD]], -2
+; CHECK-NEXT:    [[RES:%.*]] = select i1 [[CMP]], i32 35, i32 47
+; CHECK-NEXT:    ret i32 [[RES]]
+;
+entry:
+  %xor = xor i8 %x, %y
+  %and = and i8 %x, 7
+  %sub = sub i8 %and, %xor
+  %add = add i8 %sub, 1
+  %cmp = icmp ult i8 %add, 254
+  %res = select i1 %cmp, i32 35, i32 47
+  ret i32 %res
+}
+
+define i32 @dsp_var(i8 zeroext %x, i8 zeroext %y) {
+; CHECK-LABEL: @dsp_var(
+; CHECK-NEXT:    [[XOR:%.*]] = xor i8 [[X:%.*]], [[Y:%.*]]
+; CHECK-NEXT:    [[AND:%.*]] = and i8 [[X]], 7
+; CHECK-NEXT:    [[SUB:%.*]] = sub i8 [[AND]], [[XOR]]
+; CHECK-NEXT:    [[MUL:%.*]] = shl nuw i8 [[X]], 1
+; CHECK-NEXT:    [[ADD:%.*]] = add i8 [[SUB]], [[MUL]]
+; CHECK-NEXT:    [[CMP:%.*]] = icmp ult i8 [[ADD]], -2
+; CHECK-NEXT:    [[RES:%.*]] = select i1 [[CMP]], i32 35, i32 47
+; CHECK-NEXT:    ret i32 [[RES]]
+;
+  %xor = xor i8 %x, %y
+  %and = and i8 %x, 7
+  %sub = sub i8 %and, %xor
+  %mul = shl nuw i8 %x, 1
+  %add = add i8 %sub, %mul
+  %cmp = icmp ult i8 %add, 254
+  %res = select i1 %cmp, i32 35, i32 47
+  ret i32 %res
+}
+
+define void @store_dsp_res(i8* %in, i8* %out, i8 %compare) {
+; CHECK-LABEL: @store_dsp_res(
+; CHECK-NEXT:    [[FIRST:%.*]] = getelementptr inbounds i8, i8* [[IN:%.*]], i32 0
+; CHECK-NEXT:    [[SECOND:%.*]] = getelementptr inbounds i8, i8* [[IN]], i32 1
+; CHECK-NEXT:    [[LD0:%.*]] = load i8, i8* [[FIRST]]
+; CHECK-NEXT:    [[LD1:%.*]] = load i8, i8* [[SECOND]]
+; CHECK-NEXT:    [[XOR:%.*]] = xor i8 [[LD0]], -1
+; CHECK-NEXT:    [[CMP:%.*]] = icmp ult i8 [[COMPARE:%.*]], [[LD1]]
+; CHECK-NEXT:    [[SELECT:%.*]] = select i1 [[CMP]], i8 [[COMPARE]], i8 [[XOR]]
+; CHECK-NEXT:    [[SUB:%.*]] = sub i8 [[LD0]], [[SELECT]]
+; CHECK-NEXT:    store i8 [[SUB]], i8* [[OUT:%.*]], align 1
+; CHECK-NEXT:    ret void
+;
+  %first = getelementptr inbounds i8, i8* %in, i32 0
+  %second = getelementptr inbounds i8, i8* %in, i32 1
+  %ld0 = load i8, i8* %first
+  %ld1 = load i8, i8* %second
+  %xor = xor i8 %ld0, -1
+  %cmp = icmp ult i8 %compare, %ld1
+  %select = select i1 %cmp, i8 %compare, i8 %xor
+  %sub = sub i8 %ld0, %select
+  store i8 %sub, i8* %out, align 1
+  ret void
+}
+
+define i32 @ugt_1_dec_imm(i8 zeroext %x) {
+; CHECK-LABEL: @ugt_1_dec_imm(
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    [[TMP0:%.*]] = zext i8 [[X:%.*]] to i32
+; CHECK-NEXT:    [[TMP1:%.*]] = sub i32 [[TMP0]], 1
+; CHECK-NEXT:    [[CMP:%.*]] = icmp ugt i32 [[TMP1]], 1
+; CHECK-NEXT:    [[RES:%.*]] = select i1 [[CMP]], i32 35, i32 47
+; CHECK-NEXT:    ret i32 [[RES]]
+;
+entry:
+  %add = add i8 %x, -1
+  %cmp = icmp ugt i8 %add, 1
+  %res = select i1 %cmp, i32 35, i32 47
+  ret i32 %res
+}
+
+define i32 @ugt_1_dec_var(i8 zeroext %x, i8 zeroext %y) {
+; CHECK-LABEL: @ugt_1_dec_var(
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    [[SUB:%.*]] = sub i8 [[X:%.*]], [[Y:%.*]]
+; CHECK-NEXT:    [[CMP:%.*]] = icmp ugt i8 [[SUB]], 1
+; CHECK-NEXT:    [[RES:%.*]] = select i1 [[CMP]], i32 35, i32 47
+; CHECK-NEXT:    ret i32 [[RES]]
+;
+entry:
+  %sub = sub i8 %x, %y
+  %cmp = icmp ugt i8 %sub, 1
+  %res = select i1 %cmp, i32 35, i32 47
+  ret i32 %res
+}
+
+define i32 @icmp_eq_minus_one(i8* %ptr) {
+; CHECK-LABEL: @icmp_eq_minus_one(
+; CHECK-NEXT:    [[LOAD:%.*]] = load i8, i8* [[PTR:%.*]], align 1
+; CHECK-NEXT:    [[CONV:%.*]] = zext i8 [[LOAD]] to i32
+; CHECK-NEXT:    [[CMP:%.*]] = icmp eq i8 [[LOAD]], -1
+; CHECK-NEXT:    [[RET:%.*]] = select i1 [[CMP]], i32 [[CONV]], i32 -1
+; CHECK-NEXT:    ret i32 [[RET]]
+;
+  %load = load i8, i8* %ptr, align 1
+  %conv = zext i8 %load to i32
+  %cmp = icmp eq i8 %load, -1
+  %ret = select i1 %cmp, i32 %conv, i32 -1
+  ret i32 %ret
+}
+
+define i32 @icmp_not(i16 zeroext %arg0, i16 zeroext %arg1) {
+; CHECK-LABEL: @icmp_not(
+; CHECK-NEXT:    [[TMP1:%.*]] = zext i16 [[ARG0:%.*]] to i32
+; CHECK-NEXT:    [[TMP2:%.*]] = zext i16 [[ARG1:%.*]] to i32
+; CHECK-NEXT:    [[NOT:%.*]] = xor i32 [[TMP1]], 65535
+; CHECK-NEXT:    [[CMP:%.*]] = icmp eq i32 [[NOT]], [[TMP2]]
+; CHECK-NEXT:    [[RES:%.*]] = select i1 [[CMP]], i32 16, i32 32
+; CHECK-NEXT:    ret i32 [[RES]]
+;
+  %not = xor i16 %arg0, -1
+  %cmp = icmp eq i16 %not, %arg1
+  %res = select i1 %cmp, i32 16, i32 32
+  ret i32 %res
+}
+
+define i32 @icmp_i1(i1* %arg0, i1 zeroext %arg1, i32 %a, i32 %b) {
+; CHECK-LABEL: @icmp_i1(
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    [[LOAD:%.*]] = load i1, i1* [[ARG0:%.*]]
+; CHECK-NEXT:    [[NOT:%.*]] = xor i1 [[LOAD]], true
+; CHECK-NEXT:    [[CMP:%.*]] = icmp eq i1 [[ARG1:%.*]], [[NOT]]
+; CHECK-NEXT:    [[RES:%.*]] = select i1 [[CMP]], i32 [[A:%.*]], i32 [[B:%.*]]
+; CHECK-NEXT:    ret i32 [[RES]]
+;
+entry:
+  %load = load i1, i1* %arg0
+  %not = xor i1 %load, 1
+  %cmp = icmp eq i1 %arg1, %not
+  %res = select i1 %cmp, i32 %a, i32 %b
+  ret i32 %res
+}
+
+define i32 @icmp_i7(i7* %arg0, i7 zeroext %arg1, i32 %a, i32 %b) {
+; CHECK-LABEL: @icmp_i7(
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    [[TMP0:%.*]] = zext i7 [[ARG1:%.*]] to i32
+; CHECK-NEXT:    [[LOAD:%.*]] = load i7, i7* [[ARG0:%.*]]
+; CHECK-NEXT:    [[TMP1:%.*]] = zext i7 [[LOAD]] to i32
+; CHECK-NEXT:    [[ADD:%.*]] = add nuw i32 [[TMP1]], 1
+; CHECK-NEXT:    [[CMP:%.*]] = icmp ult i32 [[TMP0]], [[ADD]]
+; CHECK-NEXT:    [[RES:%.*]] = select i1 [[CMP]], i32 [[A:%.*]], i32 [[B:%.*]]
+; CHECK-NEXT:    ret i32 [[RES]]
+;
+entry:
+  %load = load i7, i7* %arg0
+  %add = add nuw i7 %load, 1
+  %cmp = icmp ult i7 %arg1, %add
+  %res = select i1 %cmp, i32 %a, i32 %b
+  ret i32 %res
+}
+
+define i32 @icmp_i15(i15 zeroext %arg0, i15 zeroext %arg1) {
+; CHECK-LABEL: @icmp_i15(
+; CHECK-NEXT:    [[TMP1:%.*]] = zext i15 [[ARG0:%.*]] to i32
+; CHECK-NEXT:    [[TMP2:%.*]] = zext i15 [[ARG1:%.*]] to i32
+; CHECK-NEXT:    [[XOR:%.*]] = xor i32 [[TMP1]], 32767
+; CHECK-NEXT:    [[CMP:%.*]] = icmp eq i32 [[XOR]], [[TMP2]]
+; CHECK-NEXT:    [[RES:%.*]] = select i1 [[CMP]], i32 21, i32 42
+; CHECK-NEXT:    ret i32 [[RES]]
+;
+  %xor = xor i15 %arg0, -1
+  %cmp = icmp eq i15 %xor, %arg1
+  %res = select i1 %cmp, i32 21, i32 42
+  ret i32 %res
+}
+
+define i32 @icmp_minus_imm(i8* %a) {
+; CHECK-LABEL: @icmp_minus_imm(
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    [[TMP0:%.*]] = load i8, i8* [[A:%.*]], align 1
+; CHECK-NEXT:    [[ADD_I:%.*]] = add i8 [[TMP0]], -7
+; CHECK-NEXT:    [[CMP:%.*]] = icmp ugt i8 [[ADD_I]], -5
+; CHECK-NEXT:    [[CONV1:%.*]] = zext i1 [[CMP]] to i32
+; CHECK-NEXT:    ret i32 [[CONV1]]
+;
+entry:
+  %0 = load i8, i8* %a, align 1
+  %add.i = add i8 %0, -7
+  %cmp = icmp ugt i8 %add.i, -5
+  %conv1 = zext i1 %cmp to i32
+  ret i32 %conv1
+}
+
+define void @mul_with_neg_imm(i32, i32* %b) {
+; CHECK-LABEL: @mul_with_neg_imm(
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    [[TMP1:%.*]] = trunc i32 [[TMP0:%.*]] to i8
+; CHECK-NEXT:    [[TMP2:%.*]] = zext i8 [[TMP1]] to i32
+; CHECK-NEXT:    [[TMP3:%.*]] = and i32 [[TMP2]], 1
+; CHECK-NEXT:    [[CONV_I:%.*]] = mul nuw i32 [[TMP3]], 132
+; CHECK-NEXT:    [[TOBOOL:%.*]] = icmp eq i32 [[CONV_I]], 0
+; CHECK-NEXT:    br i1 [[TOBOOL]], label [[IF_END:%.*]], label [[IF_THEN:%.*]]
+; CHECK:       if.then:
+; CHECK-NEXT:    store i32 0, i32* [[B:%.*]], align 4
+; CHECK-NEXT:    br label [[IF_END]]
+; CHECK:       if.end:
+; CHECK-NEXT:    ret void
+;
+entry:
+  %1 = trunc i32 %0 to i8
+  %2 = and i8 %1, 1
+  %conv.i = mul nuw i8 %2, -124
+  %tobool = icmp eq i8 %conv.i, 0
+  br i1 %tobool, label %if.end, label %if.then
+
+if.then:
+  store i32 0, i32* %b, align 4
+  br label %if.end
+
+if.end:
+  ret void
+}
--- a/llvm/test/Transforms/TypePromotion/ARM/lit.local.cfg
+++ b/llvm/test/Transforms/TypePromotion/ARM/lit.local.cfg
@ -0,0 +1,2 @@
+if not 'ARM' in config.root.targets:
+    config.unsupported = True
--- a/llvm/test/Transforms/TypePromotion/ARM/phis-ret.ll
+++ b/llvm/test/Transforms/TypePromotion/ARM/phis-ret.ll
@ -0,0 +1,344 @@
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
+; RUN: opt -mtriple=arm -type-promotion -verify -disable-type-promotion=false -S %s -o - | FileCheck %s
+
+; Check that the arguments are extended but then nothing else is.
+; This also ensures that the pass can handle loops.
+define void @phi_feeding_phi_args(i8 %a, i8 %b) {
+; CHECK-LABEL: @phi_feeding_phi_args(
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    [[TMP0:%.*]] = zext i8 [[A:%.*]] to i32
+; CHECK-NEXT:    [[TMP1:%.*]] = zext i8 [[B:%.*]] to i32
+; CHECK-NEXT:    [[TMP2:%.*]] = icmp ugt i32 [[TMP0]], [[TMP1]]
+; CHECK-NEXT:    br i1 [[TMP2]], label [[PREHEADER:%.*]], label [[EMPTY:%.*]]
+; CHECK:       empty:
+; CHECK-NEXT:    br label [[PREHEADER]]
+; CHECK:       preheader:
+; CHECK-NEXT:    [[TMP3:%.*]] = phi i32 [ [[TMP0]], [[ENTRY:%.*]] ], [ [[TMP1]], [[EMPTY]] ]
+; CHECK-NEXT:    br label [[LOOP:%.*]]
+; CHECK:       loop:
+; CHECK-NEXT:    [[VAL:%.*]] = phi i32 [ [[TMP3]], [[PREHEADER]] ], [ [[INC2:%.*]], [[IF_END:%.*]] ]
+; CHECK-NEXT:    [[CMP:%.*]] = icmp ult i32 [[VAL]], 254
+; CHECK-NEXT:    br i1 [[CMP]], label [[IF_THEN:%.*]], label [[IF_ELSE:%.*]]
+; CHECK:       if.then:
+; CHECK-NEXT:    [[INC:%.*]] = sub nuw i32 [[VAL]], 2
+; CHECK-NEXT:    br label [[IF_END]]
+; CHECK:       if.else:
+; CHECK-NEXT:    [[INC1:%.*]] = shl nuw i32 [[VAL]], 1
+; CHECK-NEXT:    br label [[IF_END]]
+; CHECK:       if.end:
+; CHECK-NEXT:    [[INC2]] = phi i32 [ [[INC]], [[IF_THEN]] ], [ [[INC1]], [[IF_ELSE]] ]
+; CHECK-NEXT:    [[CMP1:%.*]] = icmp eq i32 [[INC2]], 255
+; CHECK-NEXT:    br i1 [[CMP1]], label [[EXIT:%.*]], label [[LOOP]]
+; CHECK:       exit:
+; CHECK-NEXT:    ret void
+;
+entry:
+  %0 = icmp ugt i8 %a, %b
+  br i1 %0, label %preheader, label %empty
+
+empty:
+  br label %preheader
+
+preheader:
+  %1 = phi i8 [ %a, %entry ], [ %b, %empty ]
+  br label %loop
+
+loop:
+  %val = phi i8 [ %1, %preheader ], [ %inc2, %if.end ]
+  %cmp = icmp ult i8 %val, 254
+  br i1 %cmp, label %if.then, label %if.else
+
+if.then:
+  %inc = sub nuw i8 %val, 2
+  br label %if.end
+
+if.else:
+  %inc1 = shl nuw i8 %val, 1
+  br label %if.end
+
+if.end:
+  %inc2 = phi i8 [ %inc, %if.then], [ %inc1, %if.else ]
+  %cmp1 = icmp eq i8 %inc2, 255
+  br i1 %cmp1, label %exit, label %loop
+
+exit:
+  ret void
+}
+
+; Same as above, but as the args are zeroext, we shouldn't see any uxts.
+define void @phi_feeding_phi_zeroext_args(i8 zeroext %a, i8 zeroext %b) {
+; CHECK-LABEL: @phi_feeding_phi_zeroext_args(
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    [[TMP0:%.*]] = zext i8 [[A:%.*]] to i32
+; CHECK-NEXT:    [[TMP1:%.*]] = zext i8 [[B:%.*]] to i32
+; CHECK-NEXT:    [[TMP2:%.*]] = icmp ugt i32 [[TMP0]], [[TMP1]]
+; CHECK-NEXT:    br i1 [[TMP2]], label [[PREHEADER:%.*]], label [[EMPTY:%.*]]
+; CHECK:       empty:
+; CHECK-NEXT:    br label [[PREHEADER]]
+; CHECK:       preheader:
+; CHECK-NEXT:    [[TMP3:%.*]] = phi i32 [ [[TMP0]], [[ENTRY:%.*]] ], [ [[TMP1]], [[EMPTY]] ]
+; CHECK-NEXT:    br label [[LOOP:%.*]]
+; CHECK:       loop:
+; CHECK-NEXT:    [[VAL:%.*]] = phi i32 [ [[TMP3]], [[PREHEADER]] ], [ [[INC2:%.*]], [[IF_END:%.*]] ]
+; CHECK-NEXT:    [[CMP:%.*]] = icmp ult i32 [[VAL]], 254
+; CHECK-NEXT:    br i1 [[CMP]], label [[IF_THEN:%.*]], label [[IF_ELSE:%.*]]
+; CHECK:       if.then:
+; CHECK-NEXT:    [[INC:%.*]] = sub nuw i32 [[VAL]], 2
+; CHECK-NEXT:    br label [[IF_END]]
+; CHECK:       if.else:
+; CHECK-NEXT:    [[INC1:%.*]] = shl nuw i32 [[VAL]], 1
+; CHECK-NEXT:    br label [[IF_END]]
+; CHECK:       if.end:
+; CHECK-NEXT:    [[INC2]] = phi i32 [ [[INC]], [[IF_THEN]] ], [ [[INC1]], [[IF_ELSE]] ]
+; CHECK-NEXT:    [[CMP1:%.*]] = icmp eq i32 [[INC2]], 255
+; CHECK-NEXT:    br i1 [[CMP1]], label [[EXIT:%.*]], label [[LOOP]]
+; CHECK:       exit:
+; CHECK-NEXT:    ret void
+;
+entry:
+  %0 = icmp ugt i8 %a, %b
+  br i1 %0, label %preheader, label %empty
+
+empty:
+  br label %preheader
+
+preheader:
+  %1 = phi i8 [ %a, %entry ], [ %b, %empty ]
+  br label %loop
+
+loop:
+  %val = phi i8 [ %1, %preheader ], [ %inc2, %if.end ]
+  %cmp = icmp ult i8 %val, 254
+  br i1 %cmp, label %if.then, label %if.else
+
+if.then:
+  %inc = sub nuw i8 %val, 2
+  br label %if.end
+
+if.else:
+  %inc1 = shl nuw i8 %val, 1
+  br label %if.end
+
+if.end:
+  %inc2 = phi i8 [ %inc, %if.then], [ %inc1, %if.else ]
+  %cmp1 = icmp eq i8 %inc2, 255
+  br i1 %cmp1, label %exit, label %loop
+
+exit:
+  ret void
+}
+
+; Just check that phis also work with i16s.
+define void @phi_i16() {
+; CHECK-LABEL: @phi_i16(
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    br label [[LOOP:%.*]]
+; CHECK:       loop:
+; CHECK-NEXT:    [[VAL:%.*]] = phi i32 [ 0, [[ENTRY:%.*]] ], [ [[INC2:%.*]], [[IF_END:%.*]] ]
+; CHECK-NEXT:    [[CMP:%.*]] = icmp ult i32 [[VAL]], 128
+; CHECK-NEXT:    br i1 [[CMP]], label [[IF_THEN:%.*]], label [[IF_ELSE:%.*]]
+; CHECK:       if.then:
+; CHECK-NEXT:    [[INC:%.*]] = add nuw i32 [[VAL]], 2
+; CHECK-NEXT:    br label [[IF_END]]
+; CHECK:       if.else:
+; CHECK-NEXT:    [[INC1:%.*]] = add nuw i32 [[VAL]], 1
+; CHECK-NEXT:    br label [[IF_END]]
+; CHECK:       if.end:
+; CHECK-NEXT:    [[INC2]] = phi i32 [ [[INC]], [[IF_THEN]] ], [ [[INC1]], [[IF_ELSE]] ]
+; CHECK-NEXT:    [[CMP1:%.*]] = icmp ult i32 [[INC2]], 253
+; CHECK-NEXT:    br i1 [[CMP1]], label [[LOOP]], label [[EXIT:%.*]]
+; CHECK:       exit:
+; CHECK-NEXT:    ret void
+;
+entry:
+  br label %loop
+
+loop:
+  %val = phi i16 [ 0, %entry ], [ %inc2, %if.end ]
+  %cmp = icmp ult i16 %val, 128
+  br i1 %cmp, label %if.then, label %if.else
+
+if.then:
+  %inc = add nuw i16 %val, 2
+  br label %if.end
+
+if.else:
+  %inc1 = add nuw i16 %val, 1
+  br label %if.end
+
+if.end:
+  %inc2 = phi i16 [ %inc, %if.then], [ %inc1, %if.else ]
+  %cmp1 = icmp ult i16 %inc2, 253
+  br i1 %cmp1, label %loop, label %exit
+
+exit:
+  ret void
+}
+
+define i8 @ret_i8() {
+; CHECK-LABEL: @ret_i8(
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    br label [[LOOP:%.*]]
+; CHECK:       loop:
+; CHECK-NEXT:    [[VAL:%.*]] = phi i32 [ 0, [[ENTRY:%.*]] ], [ [[INC2:%.*]], [[IF_END:%.*]] ]
+; CHECK-NEXT:    [[CMP:%.*]] = icmp ult i32 [[VAL]], 128
+; CHECK-NEXT:    br i1 [[CMP]], label [[IF_THEN:%.*]], label [[IF_ELSE:%.*]]
+; CHECK:       if.then:
+; CHECK-NEXT:    [[INC:%.*]] = add nuw i32 [[VAL]], 2
+; CHECK-NEXT:    br label [[IF_END]]
+; CHECK:       if.else:
+; CHECK-NEXT:    [[INC1:%.*]] = add nuw i32 [[VAL]], 1
+; CHECK-NEXT:    br label [[IF_END]]
+; CHECK:       if.end:
+; CHECK-NEXT:    [[INC2]] = phi i32 [ [[INC]], [[IF_THEN]] ], [ [[INC1]], [[IF_ELSE]] ]
+; CHECK-NEXT:    [[CMP1:%.*]] = icmp ult i32 [[INC2]], 253
+; CHECK-NEXT:    br i1 [[CMP1]], label [[EXIT:%.*]], label [[LOOP]]
+; CHECK:       exit:
+; CHECK-NEXT:    [[TMP0:%.*]] = trunc i32 [[INC2]] to i8
+; CHECK-NEXT:    ret i8 [[TMP0]]
+;
+entry:
+  br label %loop
+
+loop:
+  %val = phi i8 [ 0, %entry ], [ %inc2, %if.end ]
+  %cmp = icmp ult i8 %val, 128
+  br i1 %cmp, label %if.then, label %if.else
+
+if.then:
+  %inc = add nuw i8 %val, 2
+  br label %if.end
+
+if.else:
+  %inc1 = add nuw i8 %val, 1
+  br label %if.end
+
+if.end:
+  %inc2 = phi i8 [ %inc, %if.then], [ %inc1, %if.else ]
+  %cmp1 = icmp ult i8 %inc2, 253
+  br i1 %cmp1, label %exit, label %loop
+
+exit:
+  ret i8 %inc2
+}
+
+define i16 @phi_multiple_undefs(i16 zeroext %arg) {
+; CHECK-LABEL: @phi_multiple_undefs(
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    br label [[LOOP:%.*]]
+; CHECK:       loop:
+; CHECK-NEXT:    [[VAL:%.*]] = phi i32 [ undef, [[ENTRY:%.*]] ], [ [[INC2:%.*]], [[IF_END:%.*]] ]
+; CHECK-NEXT:    [[CMP:%.*]] = icmp ult i32 [[VAL]], 128
+; CHECK-NEXT:    br i1 [[CMP]], label [[IF_THEN:%.*]], label [[IF_ELSE:%.*]]
+; CHECK:       if.then:
+; CHECK-NEXT:    [[INC:%.*]] = add nuw i32 [[VAL]], 2
+; CHECK-NEXT:    br label [[IF_END]]
+; CHECK:       if.else:
+; CHECK-NEXT:    [[INC1:%.*]] = add nuw i32 [[VAL]], 1
+; CHECK-NEXT:    br label [[IF_END]]
+; CHECK:       if.end:
+; CHECK-NEXT:    [[INC2]] = phi i32 [ [[INC]], [[IF_THEN]] ], [ [[INC1]], [[IF_ELSE]] ]
+; CHECK-NEXT:    [[UNRELATED:%.*]] = phi i16 [ undef, [[IF_THEN]] ], [ [[ARG:%.*]], [[IF_ELSE]] ]
+; CHECK-NEXT:    [[CMP1:%.*]] = icmp ult i32 [[INC2]], 253
+; CHECK-NEXT:    br i1 [[CMP1]], label [[LOOP]], label [[EXIT:%.*]]
+; CHECK:       exit:
+; CHECK-NEXT:    ret i16 [[UNRELATED]]
+;
+entry:
+  br label %loop
+
+loop:
+  %val = phi i16 [ undef, %entry ], [ %inc2, %if.end ]
+  %cmp = icmp ult i16 %val, 128
+  br i1 %cmp, label %if.then, label %if.else
+
+if.then:
+  %inc = add nuw i16 %val, 2
+  br label %if.end
+
+if.else:
+  %inc1 = add nuw i16 %val, 1
+  br label %if.end
+
+if.end:
+  %inc2 = phi i16 [ %inc, %if.then], [ %inc1, %if.else ]
+  %unrelated = phi i16 [ undef, %if.then ], [ %arg, %if.else ]
+  %cmp1 = icmp ult i16 %inc2, 253
+  br i1 %cmp1, label %loop, label %exit
+
+exit:
+  ret i16 %unrelated
+}
+
+define i16 @promote_arg_return(i16 zeroext %arg1, i16 zeroext %arg2, i8* %res) {
+; CHECK-LABEL: @promote_arg_return(
+; CHECK-NEXT:    [[TMP1:%.*]] = zext i16 [[ARG1:%.*]] to i32
+; CHECK-NEXT:    [[TMP2:%.*]] = zext i16 [[ARG2:%.*]] to i32
+; CHECK-NEXT:    [[ADD:%.*]] = add nuw i32 [[TMP1]], 15
+; CHECK-NEXT:    [[MUL:%.*]] = mul nuw nsw i32 [[ADD]], 3
+; CHECK-NEXT:    [[CMP:%.*]] = icmp ult i32 [[MUL]], [[TMP2]]
+; CHECK-NEXT:    [[CONV:%.*]] = zext i1 [[CMP]] to i8
+; CHECK-NEXT:    store i8 [[CONV]], i8* [[RES:%.*]]
+; CHECK-NEXT:    [[TMP3:%.*]] = trunc i32 [[TMP1]] to i16
+; CHECK-NEXT:    ret i16 [[TMP3]]
+;
+  %add = add nuw i16 %arg1, 15
+  %mul = mul nuw nsw i16 %add, 3
+  %cmp = icmp ult i16 %mul, %arg2
+  %conv = zext i1 %cmp to i8
+  store i8 %conv, i8* %res
+  ret i16 %arg1
+}
+
+define i16 @signext_bitcast_phi_select(i16 signext %start, i16* %in) {
+; CHECK-LABEL: @signext_bitcast_phi_select(
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    [[TMP0:%.*]] = zext i16 [[START:%.*]] to i32
+; CHECK-NEXT:    [[CONST:%.*]] = bitcast i16 -1 to i16
+; CHECK-NEXT:    br label [[FOR_BODY:%.*]]
+; CHECK:       for.body:
+; CHECK-NEXT:    [[IDX:%.*]] = phi i32 [ [[SELECT:%.*]], [[IF_ELSE:%.*]] ], [ [[TMP0]], [[ENTRY:%.*]] ]
+; CHECK-NEXT:    [[TMP1:%.*]] = trunc i32 [[IDX]] to i16
+; CHECK-NEXT:    [[CMP_I:%.*]] = icmp sgt i16 [[TMP1]], [[CONST]]
+; CHECK-NEXT:    br i1 [[CMP_I]], label [[EXIT:%.*]], label [[IF_THEN:%.*]]
+; CHECK:       if.then:
+; CHECK-NEXT:    [[IDX_NEXT:%.*]] = getelementptr i16, i16* [[IN:%.*]], i32 [[IDX]]
+; CHECK-NEXT:    [[LD:%.*]] = load i16, i16* [[IDX_NEXT]], align 2
+; CHECK-NEXT:    [[TMP2:%.*]] = zext i16 [[LD]] to i32
+; CHECK-NEXT:    [[CMP1_I:%.*]] = icmp eq i32 [[TMP2]], [[IDX]]
+; CHECK-NEXT:    br i1 [[CMP1_I]], label [[EXIT]], label [[IF_ELSE]]
+; CHECK:       if.else:
+; CHECK-NEXT:    [[LOBIT:%.*]] = lshr i32 [[IDX]], 15
+; CHECK-NEXT:    [[LOBIT_NOT:%.*]] = xor i32 [[LOBIT]], 1
+; CHECK-NEXT:    [[SELECT]] = add nuw i32 [[LOBIT_NOT]], [[IDX]]
+; CHECK-NEXT:    br label [[FOR_BODY]]
+; CHECK:       exit:
+; CHECK-NEXT:    [[RES:%.*]] = phi i32 [ [[TMP2]], [[IF_THEN]] ], [ 0, [[FOR_BODY]] ]
+; CHECK-NEXT:    [[TMP3:%.*]] = trunc i32 [[RES]] to i16
+; CHECK-NEXT:    ret i16 [[TMP3]]
+;
+entry:
+  %const = bitcast i16 -1 to i16
+  br label %for.body
+
+for.body:
+  %idx = phi i16 [ %select, %if.else ], [ %start, %entry ]
+  %cmp.i = icmp sgt i16 %idx, %const
+  br i1 %cmp.i, label %exit, label %if.then
+
+if.then:
+  %idx.next = getelementptr i16, i16* %in, i16 %idx
+  %ld = load i16, i16* %idx.next, align 2
+  %cmp1.i = icmp eq i16 %ld, %idx
+  br i1 %cmp1.i, label %exit, label %if.else
+
+if.else:
+  %lobit = lshr i16 %idx, 15
+  %lobit.not = xor i16 %lobit, 1
+  %select = add nuw i16 %lobit.not, %idx
+  br label %for.body
+
+exit:
+  %res = phi i16 [ %ld, %if.then ], [ 0, %for.body ]
+  ret i16 %res
+}
--- a/llvm/test/Transforms/TypePromotion/ARM/pointers.ll
+++ b/llvm/test/Transforms/TypePromotion/ARM/pointers.ll
@ -0,0 +1,240 @@
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
+; RUN: opt -mtriple=arm -type-promotion -verify -disable-type-promotion=false -S %s -o - | FileCheck %s
+
+define void @phi_pointers(i16* %a, i16* %b, i8 zeroext %M, i8 zeroext %N) {
+; CHECK-LABEL: @phi_pointers(
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    [[TMP0:%.*]] = zext i8 [[M:%.*]] to i32
+; CHECK-NEXT:    [[TMP1:%.*]] = zext i8 [[N:%.*]] to i32
+; CHECK-NEXT:    [[ADD:%.*]] = add nuw i32 [[TMP0]], 1
+; CHECK-NEXT:    [[AND:%.*]] = and i32 [[ADD]], 1
+; CHECK-NEXT:    [[CMP:%.*]] = icmp ugt i32 [[ADD]], [[TMP1]]
+; CHECK-NEXT:    [[BASE:%.*]] = select i1 [[CMP]], i16* [[A:%.*]], i16* [[B:%.*]]
+; CHECK-NEXT:    [[OTHER:%.*]] = select i1 [[CMP]], i16* [[B]], i16* [[B]]
+; CHECK-NEXT:    br label [[LOOP:%.*]]
+; CHECK:       loop:
+; CHECK-NEXT:    [[PTR:%.*]] = phi i16* [ [[BASE]], [[ENTRY:%.*]] ], [ [[GEP:%.*]], [[LOOP]] ]
+; CHECK-NEXT:    [[IDX:%.*]] = phi i32 [ [[AND]], [[ENTRY]] ], [ [[INC:%.*]], [[LOOP]] ]
+; CHECK-NEXT:    [[LOAD:%.*]] = load i16, i16* [[PTR]], align 2
+; CHECK-NEXT:    [[INC]] = add nuw nsw i32 [[IDX]], 1
+; CHECK-NEXT:    [[GEP]] = getelementptr inbounds i16, i16* [[PTR]], i32 [[INC]]
+; CHECK-NEXT:    [[COND:%.*]] = icmp eq i16* [[GEP]], [[OTHER]]
+; CHECK-NEXT:    br i1 [[COND]], label [[EXIT:%.*]], label [[LOOP]]
+; CHECK:       exit:
+; CHECK-NEXT:    ret void
+;
+entry:
+  %add = add nuw i8 %M, 1
+  %and = and i8 %add, 1
+  %cmp = icmp ugt i8 %add, %N
+  %base = select i1 %cmp, i16* %a, i16* %b
+  %other = select i1 %cmp, i16* %b, i16* %b
+  br label %loop
+
+loop:
+  %ptr = phi i16* [ %base, %entry ], [ %gep, %loop ]
+  %idx = phi i8 [ %and, %entry ], [ %inc, %loop ]
+  %load = load i16, i16* %ptr, align 2
+  %inc = add nuw nsw i8 %idx, 1
+  %gep = getelementptr inbounds i16, i16* %ptr, i8 %inc
+  %cond = icmp eq i16* %gep, %other
+  br i1 %cond, label %exit, label %loop
+
+exit:
+  ret void
+}
+
+define void @phi_pointers_null(i16* %a, i16* %b, i8 zeroext %M, i8 zeroext %N) {
+; CHECK-LABEL: @phi_pointers_null(
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    [[TMP0:%.*]] = zext i8 [[M:%.*]] to i32
+; CHECK-NEXT:    [[TMP1:%.*]] = zext i8 [[N:%.*]] to i32
+; CHECK-NEXT:    [[ADD:%.*]] = add nuw i32 [[TMP0]], 1
+; CHECK-NEXT:    [[AND:%.*]] = and i32 [[ADD]], 1
+; CHECK-NEXT:    [[CMP:%.*]] = icmp ugt i32 [[ADD]], [[TMP1]]
+; CHECK-NEXT:    [[BASE:%.*]] = select i1 [[CMP]], i16* [[A:%.*]], i16* [[B:%.*]]
+; CHECK-NEXT:    [[OTHER:%.*]] = select i1 [[CMP]], i16* [[B]], i16* [[B]]
+; CHECK-NEXT:    [[CMP_1:%.*]] = icmp eq i16* [[BASE]], [[OTHER]]
+; CHECK-NEXT:    br i1 [[CMP_1]], label [[FAIL:%.*]], label [[LOOP:%.*]]
+; CHECK:       fail:
+; CHECK-NEXT:    br label [[LOOP]]
+; CHECK:       loop:
+; CHECK-NEXT:    [[PTR:%.*]] = phi i16* [ [[BASE]], [[ENTRY:%.*]] ], [ null, [[FAIL]] ], [ [[GEP:%.*]], [[IF_THEN:%.*]] ]
+; CHECK-NEXT:    [[IDX:%.*]] = phi i32 [ [[AND]], [[ENTRY]] ], [ 0, [[FAIL]] ], [ [[INC:%.*]], [[IF_THEN]] ]
+; CHECK-NEXT:    [[UNDEF:%.*]] = icmp eq i16* [[PTR]], undef
+; CHECK-NEXT:    br i1 [[UNDEF]], label [[EXIT:%.*]], label [[IF_THEN]]
+; CHECK:       if.then:
+; CHECK-NEXT:    [[LOAD:%.*]] = load i16, i16* [[PTR]], align 2
+; CHECK-NEXT:    [[INC]] = add nuw nsw i32 [[IDX]], 1
+; CHECK-NEXT:    [[GEP]] = getelementptr inbounds i16, i16* [[PTR]], i32 [[INC]]
+; CHECK-NEXT:    [[COND:%.*]] = icmp eq i16* [[GEP]], [[OTHER]]
+; CHECK-NEXT:    br i1 [[COND]], label [[EXIT]], label [[LOOP]]
+; CHECK:       exit:
+; CHECK-NEXT:    ret void
+;
+entry:
+  %add = add nuw i8 %M, 1
+  %and = and i8 %add, 1
+  %cmp = icmp ugt i8 %add, %N
+  %base = select i1 %cmp, i16* %a, i16* %b
+  %other = select i1 %cmp, i16* %b, i16* %b
+  %cmp.1 = icmp eq i16* %base, %other
+  br i1 %cmp.1, label %fail, label %loop
+
+fail:
+  br label %loop
+
+loop:
+  %ptr = phi i16* [ %base, %entry ], [ null, %fail ], [ %gep, %if.then ]
+  %idx = phi i8 [ %and, %entry ], [ 0, %fail ], [ %inc, %if.then ]
+  %undef = icmp eq i16* %ptr, undef
+  br i1 %undef, label %exit, label %if.then
+
+if.then:
+  %load = load i16, i16* %ptr, align 2
+  %inc = add nuw nsw i8 %idx, 1
+  %gep = getelementptr inbounds i16, i16* %ptr, i8 %inc
+  %cond = icmp eq i16* %gep, %other
+  br i1 %cond, label %exit, label %loop
+
+exit:
+  ret void
+}
+
+declare i8 @do_something_with_ptr(i8, i16*)
+
+define i8 @call_pointer(i8 zeroext %x, i8 zeroext %y, i16* %a, i16* %b) {
+; CHECK-LABEL: @call_pointer(
+; CHECK-NEXT:    [[TMP1:%.*]] = zext i8 [[X:%.*]] to i32
+; CHECK-NEXT:    [[TMP2:%.*]] = zext i8 [[Y:%.*]] to i32
+; CHECK-NEXT:    [[OR:%.*]] = or i32 [[TMP1]], [[TMP2]]
+; CHECK-NEXT:    [[SHR:%.*]] = lshr i32 [[OR]], 1
+; CHECK-NEXT:    [[ADD:%.*]] = add nuw i32 [[SHR]], 2
+; CHECK-NEXT:    [[CMP:%.*]] = icmp ne i32 [[ADD]], 0
+; CHECK-NEXT:    [[PTR:%.*]] = select i1 [[CMP]], i16* [[A:%.*]], i16* [[B:%.*]]
+; CHECK-NEXT:    [[TMP3:%.*]] = trunc i32 [[SHR]] to i8
+; CHECK-NEXT:    [[CALL:%.*]] = tail call zeroext i8 @do_something_with_ptr(i8 [[TMP3]], i16* [[PTR]])
+; CHECK-NEXT:    [[TMP4:%.*]] = zext i8 [[CALL]] to i32
+; CHECK-NEXT:    [[TMP5:%.*]] = trunc i32 [[TMP4]] to i8
+; CHECK-NEXT:    ret i8 [[TMP5]]
+;
+  %or = or i8 %x, %y
+  %shr = lshr i8 %or, 1
+  %add = add nuw i8 %shr, 2
+  %cmp = icmp ne i8 %add, 0
+  %ptr = select i1 %cmp, i16* %a, i16* %b
+  %call = tail call zeroext i8 @do_something_with_ptr(i8 %shr, i16* %ptr)
+  ret i8 %call
+}
+
+define i16 @pointer_to_pointer(i16** %arg, i16 zeroext %limit) {
+; CHECK-LABEL: @pointer_to_pointer(
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    [[ADDR:%.*]] = load i16*, i16** [[ARG:%.*]]
+; CHECK-NEXT:    [[VAL:%.*]] = load i16, i16* [[ADDR]]
+; CHECK-NEXT:    [[TMP0:%.*]] = zext i16 [[VAL]] to i32
+; CHECK-NEXT:    [[ADD:%.*]] = add nuw i32 [[TMP0]], 7
+; CHECK-NEXT:    [[CMP:%.*]] = icmp ult i32 [[ADD]], 256
+; CHECK-NEXT:    [[RES:%.*]] = select i1 [[CMP]], i16 128, i16 255
+; CHECK-NEXT:    ret i16 [[RES]]
+;
+entry:
+  %addr = load i16*, i16** %arg
+  %val = load i16, i16* %addr
+  %add = add nuw i16 %val, 7
+  %cmp = icmp ult i16 %add, 256
+  %res = select i1 %cmp, i16 128, i16 255
+  ret i16 %res
+}
+
+define i8 @gep_2d_array(i8** %a, i8 zeroext %arg) {
+; CHECK-LABEL: @gep_2d_array(
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    [[TMP0:%.*]] = zext i8 [[ARG:%.*]] to i32
+; CHECK-NEXT:    [[ARRAYIDX_US:%.*]] = getelementptr inbounds i8*, i8** [[A:%.*]], i32 0
+; CHECK-NEXT:    [[TMP1:%.*]] = load i8*, i8** [[ARRAYIDX_US]], align 4
+; CHECK-NEXT:    [[TMP2:%.*]] = load i8, i8* [[TMP1]], align 1
+; CHECK-NEXT:    [[TMP3:%.*]] = zext i8 [[TMP2]] to i32
+; CHECK-NEXT:    [[SUB:%.*]] = sub nuw i32 [[TMP3]], 1
+; CHECK-NEXT:    [[CMP:%.*]] = icmp ult i32 [[SUB]], [[TMP0]]
+; CHECK-NEXT:    [[RES:%.*]] = select i1 [[CMP]], i8 27, i8 54
+; CHECK-NEXT:    ret i8 [[RES]]
+;
+entry:
+  %arrayidx.us = getelementptr inbounds i8*, i8** %a, i32 0
+  %0 = load i8*, i8** %arrayidx.us, align 4
+  %1 = load i8, i8* %0, align 1
+  %sub = sub nuw i8 %1, 1
+  %cmp = icmp ult i8 %sub, %arg
+  %res = select i1 %cmp, i8 27, i8 54
+  ret i8 %res
+}
+
+define void @gep_2d_array_loop(i16** nocapture readonly %a, i16** nocapture readonly %b, i32 %N) {
+; CHECK-LABEL: @gep_2d_array_loop(
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    [[CMP30:%.*]] = icmp eq i32 [[N:%.*]], 0
+; CHECK-NEXT:    br i1 [[CMP30]], label [[FOR_COND_CLEANUP:%.*]], label [[FOR_COND1_PREHEADER_US:%.*]]
+; CHECK:       for.cond1.preheader.us:
+; CHECK-NEXT:    [[Y_031_US:%.*]] = phi i32 [ [[INC13_US:%.*]], [[FOR_COND1_FOR_COND_CLEANUP3_CRIT_EDGE_US:%.*]] ], [ 0, [[ENTRY:%.*]] ]
+; CHECK-NEXT:    br label [[FOR_BODY4_US:%.*]]
+; CHECK:       for.body4.us:
+; CHECK-NEXT:    [[X_029_US:%.*]] = phi i32 [ 0, [[FOR_COND1_PREHEADER_US]] ], [ [[INC_US:%.*]], [[FOR_BODY4_US]] ]
+; CHECK-NEXT:    [[ARRAYIDX_US:%.*]] = getelementptr inbounds i16*, i16** [[A:%.*]], i32 [[X_029_US]]
+; CHECK-NEXT:    [[TMP0:%.*]] = load i16*, i16** [[ARRAYIDX_US]], align 4
+; CHECK-NEXT:    [[ARRAYIDX5_US:%.*]] = getelementptr inbounds i16, i16* [[TMP0]], i32 [[Y_031_US]]
+; CHECK-NEXT:    [[TMP1:%.*]] = load i16, i16* [[ARRAYIDX5_US]], align 2
+; CHECK-NEXT:    [[TMP2:%.*]] = zext i16 [[TMP1]] to i32
+; CHECK-NEXT:    [[DEC_US:%.*]] = add nuw i32 [[TMP2]], 65535
+; CHECK-NEXT:    [[CMP6_US:%.*]] = icmp ult i32 [[DEC_US]], 16383
+; CHECK-NEXT:    [[SHL_US:%.*]] = shl nuw i32 [[DEC_US]], 2
+; CHECK-NEXT:    [[SPEC_SELECT_US:%.*]] = select i1 [[CMP6_US]], i32 [[SHL_US]], i32 [[DEC_US]]
+; CHECK-NEXT:    [[ARRAYIDX10_US:%.*]] = getelementptr inbounds i16*, i16** [[B:%.*]], i32 [[X_029_US]]
+; CHECK-NEXT:    [[TMP3:%.*]] = load i16*, i16** [[ARRAYIDX10_US]], align 4
+; CHECK-NEXT:    [[ARRAYIDX11_US:%.*]] = getelementptr inbounds i16, i16* [[TMP3]], i32 [[Y_031_US]]
+; CHECK-NEXT:    [[TMP4:%.*]] = trunc i32 [[SPEC_SELECT_US]] to i16
+; CHECK-NEXT:    store i16 [[TMP4]], i16* [[ARRAYIDX11_US]], align 2
+; CHECK-NEXT:    [[INC_US]] = add nuw i32 [[X_029_US]], 1
+; CHECK-NEXT:    [[EXITCOND:%.*]] = icmp eq i32 [[INC_US]], [[N]]
+; CHECK-NEXT:    br i1 [[EXITCOND]], label [[FOR_COND1_FOR_COND_CLEANUP3_CRIT_EDGE_US]], label [[FOR_BODY4_US]]
+; CHECK:       for.cond1.for.cond.cleanup3_crit_edge.us:
+; CHECK-NEXT:    [[INC13_US]] = add nuw i32 [[Y_031_US]], 1
+; CHECK-NEXT:    [[EXITCOND32:%.*]] = icmp eq i32 [[INC13_US]], [[N]]
+; CHECK-NEXT:    br i1 [[EXITCOND32]], label [[FOR_COND_CLEANUP]], label [[FOR_COND1_PREHEADER_US]]
+; CHECK:       for.cond.cleanup:
+; CHECK-NEXT:    ret void
+;
+entry:
+  %cmp30 = icmp eq i32 %N, 0
+  br i1 %cmp30, label %for.cond.cleanup, label %for.cond1.preheader.us
+
+for.cond1.preheader.us:
+  %y.031.us = phi i32 [ %inc13.us, %for.cond1.for.cond.cleanup3_crit_edge.us ], [ 0, %entry ]
+  br label %for.body4.us
+
+for.body4.us:
+  %x.029.us = phi i32 [ 0, %for.cond1.preheader.us ], [ %inc.us, %for.body4.us ]
+  %arrayidx.us = getelementptr inbounds i16*, i16** %a, i32 %x.029.us
+  %0 = load i16*, i16** %arrayidx.us, align 4
+  %arrayidx5.us = getelementptr inbounds i16, i16* %0, i32 %y.031.us
+  %1 = load i16, i16* %arrayidx5.us, align 2
+  %dec.us = add nuw i16 %1, -1
+  %cmp6.us = icmp ult i16 %dec.us, 16383
+  %shl.us = shl nuw i16 %dec.us, 2
+  %spec.select.us = select i1 %cmp6.us, i16 %shl.us, i16 %dec.us
+  %arrayidx10.us = getelementptr inbounds i16*, i16** %b, i32 %x.029.us
+  %2 = load i16*, i16** %arrayidx10.us, align 4
+  %arrayidx11.us = getelementptr inbounds i16, i16* %2, i32 %y.031.us
+  store i16 %spec.select.us, i16* %arrayidx11.us, align 2
+  %inc.us = add nuw i32 %x.029.us, 1
+  %exitcond = icmp eq i32 %inc.us, %N
+  br i1 %exitcond, label %for.cond1.for.cond.cleanup3_crit_edge.us, label %for.body4.us
+
+for.cond1.for.cond.cleanup3_crit_edge.us:
+  %inc13.us = add nuw i32 %y.031.us, 1
+  %exitcond32 = icmp eq i32 %inc13.us, %N
+  br i1 %exitcond32, label %for.cond.cleanup, label %for.cond1.preheader.us
+
+for.cond.cleanup:
+  ret void
+}
--- a/llvm/test/Transforms/TypePromotion/ARM/signed-icmps.ll
+++ b/llvm/test/Transforms/TypePromotion/ARM/signed-icmps.ll
@ -0,0 +1,103 @@
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
+; RUN: opt -mtriple=arm -type-promotion -verify -disable-type-promotion=false -S %s -o - | FileCheck %s
+
+define i8 @eq_sgt(i8* %x, i8 *%y, i8 zeroext %z) {
+; CHECK-LABEL: @eq_sgt(
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    [[LOAD0:%.*]] = load i8, i8* [[X:%.*]], align 1
+; CHECK-NEXT:    [[LOAD1:%.*]] = load i8, i8* [[Y:%.*]], align 1
+; CHECK-NEXT:    [[ADD:%.*]] = add i8 [[LOAD0]], [[Z:%.*]]
+; CHECK-NEXT:    [[SUB:%.*]] = sub i8 [[LOAD1]], 1
+; CHECK-NEXT:    [[CMP:%.*]] = icmp eq i8 [[ADD]], -56
+; CHECK-NEXT:    [[CMP1:%.*]] = icmp sgt i8 [[SUB]], [[ADD]]
+; CHECK-NEXT:    [[RES0:%.*]] = select i1 [[CMP]], i8 35, i8 47
+; CHECK-NEXT:    [[RES1:%.*]] = select i1 [[CMP1]], i8 [[RES0]], i8 [[SUB]]
+; CHECK-NEXT:    ret i8 [[RES1]]
+;
+entry:
+  %load0 = load i8, i8* %x, align 1
+  %load1 = load i8, i8* %y, align 1
+  %add = add i8 %load0, %z
+  %sub = sub i8 %load1, 1
+  %cmp = icmp eq i8 %add, 200
+  %cmp1 = icmp sgt i8 %sub, %add
+  %res0 = select i1 %cmp, i8 35, i8 47
+  %res1 = select i1 %cmp1, i8 %res0, i8 %sub
+  ret i8 %res1
+}
+
+define i16 @ugt_slt(i16 *%x, i16 zeroext %y, i16 zeroext %z) {
+; CHECK-LABEL: @ugt_slt(
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    [[LOAD0:%.*]] = load i16, i16* [[X:%.*]], align 1
+; CHECK-NEXT:    [[ADD:%.*]] = add i16 [[LOAD0]], [[Z:%.*]]
+; CHECK-NEXT:    [[SUB:%.*]] = sub i16 [[Y:%.*]], 1
+; CHECK-NEXT:    [[CMP:%.*]] = icmp slt i16 [[ADD]], [[Z]]
+; CHECK-NEXT:    [[CMP1:%.*]] = icmp ugt i16 [[SUB]], [[Z]]
+; CHECK-NEXT:    [[RES0:%.*]] = select i1 [[CMP]], i16 35, i16 -1
+; CHECK-NEXT:    [[RES1:%.*]] = select i1 [[CMP1]], i16 [[RES0]], i16 0
+; CHECK-NEXT:    ret i16 [[RES1]]
+;
+entry:
+  %load0 = load i16, i16* %x, align 1
+  %add = add i16 %load0, %z
+  %sub = sub i16 %y, 1
+  %cmp = icmp slt i16 %add, %z
+  %cmp1 = icmp ugt i16 %sub, %z
+  %res0 = select i1 %cmp, i16 35, i16 -1
+  %res1 = select i1 %cmp1, i16 %res0, i16 0
+  ret i16 %res1
+}
+
+define void @urem_trunc_icmps(i16** %in, i32* %g, i32* %k) {
+; CHECK-LABEL: @urem_trunc_icmps(
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    [[PTR:%.*]] = load i16*, i16** [[IN:%.*]], align 4
+; CHECK-NEXT:    [[LD:%.*]] = load i16, i16* [[PTR]], align 2
+; CHECK-NEXT:    [[CMP_I:%.*]] = icmp eq i16 [[LD]], 0
+; CHECK-NEXT:    br i1 [[CMP_I]], label [[EXIT:%.*]], label [[COND_FALSE_I:%.*]]
+; CHECK:       cond.false.i:
+; CHECK-NEXT:    [[REM:%.*]] = urem i16 5, [[LD]]
+; CHECK-NEXT:    [[EXTRACT_T:%.*]] = trunc i16 [[REM]] to i8
+; CHECK-NEXT:    br label [[BODY:%.*]]
+; CHECK:       body:
+; CHECK-NEXT:    [[COND_IN_I_OFF0:%.*]] = phi i8 [ [[EXTRACT_T]], [[COND_FALSE_I]] ], [ [[ADD:%.*]], [[FOR_INC:%.*]] ]
+; CHECK-NEXT:    [[CMP:%.*]] = icmp sgt i8 [[COND_IN_I_OFF0]], 7
+; CHECK-NEXT:    [[CONV5:%.*]] = zext i1 [[CMP]] to i32
+; CHECK-NEXT:    store i32 [[CONV5]], i32* [[G:%.*]], align 4
+; CHECK-NEXT:    [[DOTPR:%.*]] = load i32, i32* [[K:%.*]], align 4
+; CHECK-NEXT:    [[TOBOOL13150:%.*]] = icmp eq i32 [[DOTPR]], 0
+; CHECK-NEXT:    br i1 [[TOBOOL13150]], label [[FOR_INC]], label [[EXIT]]
+; CHECK:       for.inc:
+; CHECK-NEXT:    [[ADD]] = add nuw i8 [[COND_IN_I_OFF0]], 1
+; CHECK-NEXT:    br label [[BODY]]
+; CHECK:       exit:
+; CHECK-NEXT:    ret void
+;
+entry:
+  %ptr = load i16*, i16** %in, align 4
+  %ld = load i16, i16* %ptr, align 2
+  %cmp.i = icmp eq i16 %ld, 0
+  br i1 %cmp.i, label %exit, label %cond.false.i
+
+cond.false.i:
+  %rem = urem i16 5, %ld
+  %extract.t = trunc i16 %rem to i8
+  br label %body
+
+body:
+  %cond.in.i.off0 = phi i8 [ %extract.t, %cond.false.i ], [ %add, %for.inc ]
+  %cmp = icmp sgt i8 %cond.in.i.off0, 7
+  %conv5 = zext i1 %cmp to i32
+  store i32 %conv5, i32* %g, align 4
+  %.pr = load i32, i32* %k, align 4
+  %tobool13150 = icmp eq i32 %.pr, 0
+  br i1 %tobool13150, label %for.inc, label %exit
+
+for.inc:
+  %add = add nuw i8 %cond.in.i.off0, 1
+  br label %body
+
+exit:
+  ret void
+}
--- a/llvm/test/Transforms/TypePromotion/ARM/signed.ll
+++ b/llvm/test/Transforms/TypePromotion/ARM/signed.ll
@ -0,0 +1,123 @@
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
+; RUN: opt -mtriple=arm -type-promotion -verify -disable-type-promotion=false -S %s -o - | FileCheck %s
+
+; Test to check that ARMCodeGenPrepare doesn't optimised away sign extends.
+define i16 @test_signed_load(i16* %ptr) {
+; CHECK-LABEL: @test_signed_load(
+; CHECK-NEXT:    [[LOAD:%.*]] = load i16, i16* [[PTR:%.*]]
+; CHECK-NEXT:    [[CONV0:%.*]] = zext i16 [[LOAD]] to i32
+; CHECK-NEXT:    [[CONV1:%.*]] = sext i16 [[LOAD]] to i32
+; CHECK-NEXT:    [[CMP:%.*]] = icmp eq i32 [[CONV0]], [[CONV1]]
+; CHECK-NEXT:    [[CONV2:%.*]] = zext i1 [[CMP]] to i16
+; CHECK-NEXT:    ret i16 [[CONV2]]
+;
+  %load = load i16, i16* %ptr
+  %conv0 = zext i16 %load to i32
+  %conv1 = sext i16 %load to i32
+  %cmp = icmp eq i32 %conv0, %conv1
+  %conv2 = zext i1 %cmp to i16
+  ret i16 %conv2
+}
+
+; Don't allow sign bit generating opcodes.
+define i16 @test_ashr(i16 zeroext %arg) {
+; CHECK-LABEL: @test_ashr(
+; CHECK-NEXT:    [[ASHR:%.*]] = ashr i16 [[ARG:%.*]], 1
+; CHECK-NEXT:    [[CMP:%.*]] = icmp eq i16 [[ASHR]], 0
+; CHECK-NEXT:    [[CONV:%.*]] = zext i1 [[CMP]] to i16
+; CHECK-NEXT:    ret i16 [[CONV]]
+;
+  %ashr = ashr i16 %arg, 1
+  %cmp = icmp eq i16 %ashr, 0
+  %conv = zext i1 %cmp to i16
+  ret i16 %conv
+}
+
+define i16 @test_sdiv(i16 zeroext %arg) {
+; CHECK-LABEL: @test_sdiv(
+; CHECK-NEXT:    [[SDIV:%.*]] = sdiv i16 [[ARG:%.*]], 2
+; CHECK-NEXT:    [[CMP:%.*]] = icmp ne i16 [[SDIV]], 0
+; CHECK-NEXT:    [[CONV:%.*]] = zext i1 [[CMP]] to i16
+; CHECK-NEXT:    ret i16 [[CONV]]
+;
+  %sdiv = sdiv i16 %arg, 2
+  %cmp = icmp ne i16 %sdiv, 0
+  %conv = zext i1 %cmp to i16
+  ret i16 %conv
+}
+
+define i16 @test_srem(i16 zeroext %arg) {
+; CHECK-LABEL: @test_srem(
+; CHECK-NEXT:    [[SREM:%.*]] = srem i16 [[ARG:%.*]], 4
+; CHECK-NEXT:    [[CMP:%.*]] = icmp ne i16 [[SREM]], 0
+; CHECK-NEXT:    [[CONV:%.*]] = zext i1 [[CMP]] to i16
+; CHECK-NEXT:    ret i16 [[CONV]]
+;
+  %srem = srem i16 %arg, 4
+  %cmp = icmp ne i16 %srem, 0
+  %conv = zext i1 %cmp to i16
+  ret i16 %conv
+}
+
+define i32 @test_signext_b(i8* %ptr, i8 signext %arg) {
+; CHECK-LABEL: @test_signext_b(
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    [[TMP0:%.*]] = zext i8 [[ARG:%.*]] to i32
+; CHECK-NEXT:    [[TMP1:%.*]] = load i8, i8* [[PTR:%.*]], align 1
+; CHECK-NEXT:    [[TMP2:%.*]] = zext i8 [[TMP1]] to i32
+; CHECK-NEXT:    [[TMP3:%.*]] = add nuw nsw i32 [[TMP2]], [[TMP0]]
+; CHECK-NEXT:    [[CMP:%.*]] = icmp ult i32 [[TMP3]], 128
+; CHECK-NEXT:    [[RES:%.*]] = select i1 [[CMP]], i32 42, i32 20894
+; CHECK-NEXT:    ret i32 [[RES]]
+;
+entry:
+  %0 = load i8, i8* %ptr, align 1
+  %1 = add nuw nsw i8 %0, %arg
+  %cmp = icmp ult i8 %1, 128
+  %res = select i1 %cmp, i32 42, i32 20894
+  ret i32 %res
+}
+
+define i32 @test_signext_b_ult_slt(i8* %ptr, i8 signext %arg) {
+; CHECK-LABEL: @test_signext_b_ult_slt(
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    [[TMP0:%.*]] = zext i8 [[ARG:%.*]] to i32
+; CHECK-NEXT:    [[TMP1:%.*]] = load i8, i8* [[PTR:%.*]], align 1
+; CHECK-NEXT:    [[TMP2:%.*]] = zext i8 [[TMP1]] to i32
+; CHECK-NEXT:    [[TMP3:%.*]] = add nuw nsw i32 [[TMP2]], [[TMP0]]
+; CHECK-NEXT:    [[TMP4:%.*]] = trunc i32 [[TMP3]] to i8
+; CHECK-NEXT:    [[CMP:%.*]] = icmp sle i8 [[TMP4]], 126
+; CHECK-NEXT:    [[CMP_1:%.*]] = icmp ule i32 [[TMP3]], [[TMP0]]
+; CHECK-NEXT:    [[OR:%.*]] = and i1 [[CMP]], [[CMP_1]]
+; CHECK-NEXT:    [[RES:%.*]] = select i1 [[OR]], i32 42, i32 57
+; CHECK-NEXT:    ret i32 [[RES]]
+;
+entry:
+  %0 = load i8, i8* %ptr, align 1
+  %1 = add nuw nsw i8 %0, %arg
+  %cmp = icmp sle i8 %1, 126
+  %cmp.1 = icmp ule i8 %1, %arg
+  %or = and i1 %cmp, %cmp.1
+  %res = select i1 %or, i32 42, i32 57
+  ret i32 %res
+}
+
+define i32 @test_signext_h(i16* %ptr, i16 signext %arg) {
+; CHECK-LABEL: @test_signext_h(
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    [[TMP0:%.*]] = zext i16 [[ARG:%.*]] to i32
+; CHECK-NEXT:    [[TMP1:%.*]] = load i16, i16* [[PTR:%.*]], align 1
+; CHECK-NEXT:    [[TMP2:%.*]] = zext i16 [[TMP1]] to i32
+; CHECK-NEXT:    [[TMP3:%.*]] = add nuw nsw i32 [[TMP2]], [[TMP0]]
+; CHECK-NEXT:    [[CMP:%.*]] = icmp ult i32 [[TMP3]], 32768
+; CHECK-NEXT:    [[RES:%.*]] = select i1 [[CMP]], i32 42, i32 20894
+; CHECK-NEXT:    ret i32 [[RES]]
+;
+entry:
+  %0 = load i16, i16* %ptr, align 1
+  %1 = add nuw nsw i16 %0, %arg
+  %cmp = icmp ult i16 %1, 32768
+  %res = select i1 %cmp, i32 42, i32 20894
+  ret i32 %res
+}
+
--- a/llvm/test/Transforms/TypePromotion/ARM/switch.ll
+++ b/llvm/test/Transforms/TypePromotion/ARM/switch.ll
@ -0,0 +1,291 @@
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
+; RUN: opt -mtriple=arm -type-promotion -verify -disable-type-promotion=false -S %s -o - | FileCheck %s
+
+define void @truncate_source_phi_switch(i8* %memblock, i8* %store, i16 %arg) {
+; CHECK-LABEL: @truncate_source_phi_switch(
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    [[PRE:%.*]] = load i8, i8* [[MEMBLOCK:%.*]], align 1
+; CHECK-NEXT:    [[TMP0:%.*]] = zext i8 [[PRE]] to i32
+; CHECK-NEXT:    [[CONV:%.*]] = trunc i16 [[ARG:%.*]] to i8
+; CHECK-NEXT:    [[TMP1:%.*]] = zext i8 [[CONV]] to i32
+; CHECK-NEXT:    br label [[HEADER:%.*]]
+; CHECK:       header:
+; CHECK-NEXT:    [[PHI_0:%.*]] = phi i32 [ [[TMP0]], [[ENTRY:%.*]] ], [ [[COUNT:%.*]], [[LATCH:%.*]] ]
+; CHECK-NEXT:    [[PHI_1:%.*]] = phi i32 [ [[TMP1]], [[ENTRY]] ], [ [[PHI_3:%.*]], [[LATCH]] ]
+; CHECK-NEXT:    [[PHI_2:%.*]] = phi i32 [ 0, [[ENTRY]] ], [ [[COUNT]], [[LATCH]] ]
+; CHECK-NEXT:    switch i32 [[PHI_0]], label [[DEFAULT:%.*]] [
+; CHECK-NEXT:    i32 43, label [[FOR_INC_I:%.*]]
+; CHECK-NEXT:    i32 45, label [[FOR_INC_I_I:%.*]]
+; CHECK-NEXT:    ]
+; CHECK:       for.inc.i:
+; CHECK-NEXT:    [[XOR:%.*]] = xor i32 [[PHI_1]], 1
+; CHECK-NEXT:    br label [[LATCH]]
+; CHECK:       for.inc.i.i:
+; CHECK-NEXT:    [[AND:%.*]] = and i32 [[PHI_1]], 3
+; CHECK-NEXT:    br label [[LATCH]]
+; CHECK:       default:
+; CHECK-NEXT:    [[SUB:%.*]] = sub i32 [[PHI_0]], 1
+; CHECK-NEXT:    [[CMP2:%.*]] = icmp ugt i32 [[SUB]], 4
+; CHECK-NEXT:    br i1 [[CMP2]], label [[LATCH]], label [[EXIT:%.*]]
+; CHECK:       latch:
+; CHECK-NEXT:    [[PHI_3]] = phi i32 [ [[XOR]], [[FOR_INC_I]] ], [ [[AND]], [[FOR_INC_I_I]] ], [ [[PHI_2]], [[DEFAULT]] ]
+; CHECK-NEXT:    [[COUNT]] = add nuw i32 [[PHI_2]], 1
+; CHECK-NEXT:    [[TMP2:%.*]] = trunc i32 [[COUNT]] to i8
+; CHECK-NEXT:    store i8 [[TMP2]], i8* [[STORE:%.*]], align 1
+; CHECK-NEXT:    br label [[HEADER]]
+; CHECK:       exit:
+; CHECK-NEXT:    ret void
+;
+entry:
+  %pre = load i8, i8* %memblock, align 1
+  %conv = trunc i16 %arg to i8
+  br label %header
+
+header:
+  %phi.0 = phi i8 [ %pre, %entry ], [ %count, %latch ]
+  %phi.1 = phi i8 [ %conv, %entry ], [ %phi.3, %latch ]
+  %phi.2 = phi i8 [ 0, %entry], [ %count, %latch ]
+  switch i8 %phi.0, label %default [
+  i8 43, label %for.inc.i
+  i8 45, label %for.inc.i.i
+  ]
+
+for.inc.i:
+  %xor = xor i8 %phi.1, 1
+  br label %latch
+
+for.inc.i.i:
+  %and = and i8 %phi.1, 3
+  br label %latch
+
+default:
+  %sub = sub i8 %phi.0, 1
+  %cmp2 = icmp ugt i8 %sub, 4
+  br i1 %cmp2, label %latch, label %exit
+
+latch:
+  %phi.3 = phi i8 [ %xor, %for.inc.i ], [ %and, %for.inc.i.i ], [ %phi.2, %default ]
+  %count = add nuw i8 %phi.2, 1
+  store i8 %count, i8* %store, align 1
+  br label %header
+
+exit:
+  ret void
+}
+
+define i16 @icmp_switch_source(i16 zeroext %arg) {
+; CHECK-LABEL: @icmp_switch_source(
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    [[TMP0:%.*]] = zext i16 [[ARG:%.*]] to i32
+; CHECK-NEXT:    [[CONV:%.*]] = add nuw i32 [[TMP0]], 15
+; CHECK-NEXT:    [[MUL:%.*]] = mul nuw nsw i32 [[CONV]], 3
+; CHECK-NEXT:    switch i32 [[TMP0]], label [[DEFAULT:%.*]] [
+; CHECK-NEXT:    i32 0, label [[SW_BB:%.*]]
+; CHECK-NEXT:    i32 1, label [[SW_BB_I:%.*]]
+; CHECK-NEXT:    ]
+; CHECK:       sw.bb:
+; CHECK-NEXT:    [[CMP0:%.*]] = icmp ult i32 [[MUL]], 127
+; CHECK-NEXT:    [[SELECT:%.*]] = select i1 [[CMP0]], i32 [[MUL]], i32 127
+; CHECK-NEXT:    br label [[EXIT:%.*]]
+; CHECK:       sw.bb.i:
+; CHECK-NEXT:    [[CMP1:%.*]] = icmp ugt i32 [[MUL]], 34
+; CHECK-NEXT:    [[SELECT_I:%.*]] = select i1 [[CMP1]], i32 [[MUL]], i32 34
+; CHECK-NEXT:    br label [[EXIT]]
+; CHECK:       default:
+; CHECK-NEXT:    br label [[EXIT]]
+; CHECK:       exit:
+; CHECK-NEXT:    [[RES:%.*]] = phi i32 [ [[SELECT]], [[SW_BB]] ], [ [[SELECT_I]], [[SW_BB_I]] ], [ [[MUL]], [[DEFAULT]] ]
+; CHECK-NEXT:    [[TMP1:%.*]] = trunc i32 [[RES]] to i16
+; CHECK-NEXT:    ret i16 [[TMP1]]
+;
+entry:
+  %conv = add nuw i16 %arg, 15
+  %mul = mul nuw nsw i16 %conv, 3
+  switch i16 %arg, label %default [
+  i16 0, label %sw.bb
+  i16 1, label %sw.bb.i
+  ]
+
+sw.bb:
+  %cmp0 = icmp ult i16 %mul, 127
+  %select = select i1 %cmp0, i16 %mul, i16 127
+  br label %exit
+
+sw.bb.i:
+  %cmp1 = icmp ugt i16 %mul, 34
+  %select.i = select i1 %cmp1, i16 %mul, i16 34
+  br label %exit
+
+default:
+  br label %exit
+
+exit:
+  %res = phi i16 [ %select, %sw.bb ], [ %select.i, %sw.bb.i ], [ %mul, %default ]
+  ret i16 %res
+}
+
+define i16 @icmp_switch_narrow_source(i8 zeroext %arg) {
+; CHECK-LABEL: @icmp_switch_narrow_source(
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    [[TMP0:%.*]] = zext i8 [[ARG:%.*]] to i32
+; CHECK-NEXT:    [[ADD:%.*]] = add nuw i32 [[TMP0]], 15
+; CHECK-NEXT:    [[MUL:%.*]] = mul nuw nsw i32 [[ADD]], 3
+; CHECK-NEXT:    [[TMP1:%.*]] = trunc i32 [[TMP0]] to i8
+; CHECK-NEXT:    switch i8 [[TMP1]], label [[DEFAULT:%.*]] [
+; CHECK-NEXT:    i8 0, label [[SW_BB:%.*]]
+; CHECK-NEXT:    i8 1, label [[SW_BB_I:%.*]]
+; CHECK-NEXT:    ]
+; CHECK:       sw.bb:
+; CHECK-NEXT:    [[CMP0:%.*]] = icmp ult i32 [[MUL]], 127
+; CHECK-NEXT:    [[SELECT:%.*]] = select i1 [[CMP0]], i32 [[MUL]], i32 127
+; CHECK-NEXT:    br label [[EXIT:%.*]]
+; CHECK:       sw.bb.i:
+; CHECK-NEXT:    [[CMP1:%.*]] = icmp ugt i32 [[MUL]], 34
+; CHECK-NEXT:    [[SELECT_I:%.*]] = select i1 [[CMP1]], i32 [[MUL]], i32 34
+; CHECK-NEXT:    br label [[EXIT]]
+; CHECK:       default:
+; CHECK-NEXT:    br label [[EXIT]]
+; CHECK:       exit:
+; CHECK-NEXT:    [[RES:%.*]] = phi i32 [ [[SELECT]], [[SW_BB]] ], [ [[SELECT_I]], [[SW_BB_I]] ], [ [[MUL]], [[DEFAULT]] ]
+; CHECK-NEXT:    [[TMP2:%.*]] = trunc i32 [[RES]] to i16
+; CHECK-NEXT:    ret i16 [[TMP2]]
+;
+entry:
+  %conv = zext i8 %arg to i16
+  %add = add nuw i16 %conv, 15
+  %mul = mul nuw nsw i16 %add, 3
+  switch i8 %arg, label %default [
+  i8 0, label %sw.bb
+  i8 1, label %sw.bb.i
+  ]
+
+sw.bb:
+  %cmp0 = icmp ult i16 %mul, 127
+  %select = select i1 %cmp0, i16 %mul, i16 127
+  br label %exit
+
+sw.bb.i:
+  %cmp1 = icmp ugt i16 %mul, 34
+  %select.i = select i1 %cmp1, i16 %mul, i16 34
+  br label %exit
+
+default:
+  br label %exit
+
+exit:
+  %res = phi i16 [ %select, %sw.bb ], [ %select.i, %sw.bb.i ], [ %mul, %default ]
+  ret i16 %res
+}
+
+define i16 @icmp_switch_trunc(i16 zeroext %arg) {
+; CHECK-LABEL: @icmp_switch_trunc(
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    [[TMP0:%.*]] = zext i16 [[ARG:%.*]] to i32
+; CHECK-NEXT:    [[CONV:%.*]] = add nuw i32 [[TMP0]], 15
+; CHECK-NEXT:    [[MUL:%.*]] = mul nuw nsw i32 [[CONV]], 3
+; CHECK-NEXT:    [[TMP1:%.*]] = and i32 [[TMP0]], 7
+; CHECK-NEXT:    [[TMP2:%.*]] = trunc i32 [[TMP1]] to i3
+; CHECK-NEXT:    switch i3 [[TMP2]], label [[DEFAULT:%.*]] [
+; CHECK-NEXT:    i3 0, label [[SW_BB:%.*]]
+; CHECK-NEXT:    i3 1, label [[SW_BB_I:%.*]]
+; CHECK-NEXT:    ]
+; CHECK:       sw.bb:
+; CHECK-NEXT:    [[CMP0:%.*]] = icmp ult i32 [[MUL]], 127
+; CHECK-NEXT:    [[SELECT:%.*]] = select i1 [[CMP0]], i32 [[MUL]], i32 127
+; CHECK-NEXT:    br label [[EXIT:%.*]]
+; CHECK:       sw.bb.i:
+; CHECK-NEXT:    [[CMP1:%.*]] = icmp ugt i32 [[MUL]], 34
+; CHECK-NEXT:    [[SELECT_I:%.*]] = select i1 [[CMP1]], i32 [[MUL]], i32 34
+; CHECK-NEXT:    br label [[EXIT]]
+; CHECK:       default:
+; CHECK-NEXT:    br label [[EXIT]]
+; CHECK:       exit:
+; CHECK-NEXT:    [[RES:%.*]] = phi i32 [ [[SELECT]], [[SW_BB]] ], [ [[SELECT_I]], [[SW_BB_I]] ], [ [[MUL]], [[DEFAULT]] ]
+; CHECK-NEXT:    [[TMP3:%.*]] = trunc i32 [[RES]] to i16
+; CHECK-NEXT:    ret i16 [[TMP3]]
+;
+entry:
+  %conv = add nuw i16 %arg, 15
+  %mul = mul nuw nsw i16 %conv, 3
+  %trunc = trunc i16 %arg to i3
+  switch i3 %trunc, label %default [
+  i3 0, label %sw.bb
+  i3 1, label %sw.bb.i
+  ]
+
+sw.bb:
+  %cmp0 = icmp ult i16 %mul, 127
+  %select = select i1 %cmp0, i16 %mul, i16 127
+  br label %exit
+
+sw.bb.i:
+  %cmp1 = icmp ugt i16 %mul, 34
+  %select.i = select i1 %cmp1, i16 %mul, i16 34
+  br label %exit
+
+default:
+  br label %exit
+
+exit:
+  %res = phi i16 [ %select, %sw.bb ], [ %select.i, %sw.bb.i ], [ %mul, %default ]
+  ret i16 %res
+}
+
+%class.ae = type { i8 }
+%class.x = type { i8 }
+%class.v = type { %class.q }
+%class.q = type { i16 }
+declare %class.x* @_ZNK2ae2afEv(%class.ae*) local_unnamed_addr
+declare %class.v* @_ZN1x2acEv(%class.x*) local_unnamed_addr
+
+define i32 @trunc_i16_i9_switch(%class.ae* %this) {
+; CHECK-LABEL: @trunc_i16_i9_switch(
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    [[CALL:%.*]] = tail call %class.x* @_ZNK2ae2afEv(%class.ae* [[THIS:%.*]])
+; CHECK-NEXT:    [[CALL2:%.*]] = tail call %class.v* @_ZN1x2acEv(%class.x* [[CALL]])
+; CHECK-NEXT:    [[TMP0:%.*]] = getelementptr inbounds [[CLASS_V:%.*]], %class.v* [[CALL2]], i32 0, i32 0, i32 0
+; CHECK-NEXT:    [[TMP1:%.*]] = load i16, i16* [[TMP0]], align 2
+; CHECK-NEXT:    [[TMP2:%.*]] = zext i16 [[TMP1]] to i32
+; CHECK-NEXT:    [[TMP3:%.*]] = and i32 [[TMP2]], 511
+; CHECK-NEXT:    [[TRUNC:%.*]] = and i32 [[TMP3]], 448
+; CHECK-NEXT:    [[TMP4:%.*]] = trunc i32 [[TRUNC]] to i9
+; CHECK-NEXT:    switch i9 [[TMP4]], label [[CLEANUP_FOLD_SPLIT:%.*]] [
+; CHECK-NEXT:    i9 0, label [[CLEANUP:%.*]]
+; CHECK-NEXT:    i9 -256, label [[IF_THEN7:%.*]]
+; CHECK-NEXT:    ]
+; CHECK:       if.then7:
+; CHECK-NEXT:    [[TMP5:%.*]] = and i32 [[TMP2]], 7
+; CHECK-NEXT:    [[TOBOOL:%.*]] = icmp eq i32 [[TMP5]], 0
+; CHECK-NEXT:    [[COND:%.*]] = select i1 [[TOBOOL]], i32 2, i32 1
+; CHECK-NEXT:    br label [[CLEANUP]]
+; CHECK:       cleanup.fold.split:
+; CHECK-NEXT:    br label [[CLEANUP]]
+; CHECK:       cleanup:
+; CHECK-NEXT:    [[RETVAL_0:%.*]] = phi i32 [ [[COND]], [[IF_THEN7]] ], [ 0, [[ENTRY:%.*]] ], [ 2, [[CLEANUP_FOLD_SPLIT]] ]
+; CHECK-NEXT:    ret i32 [[RETVAL_0]]
+;
+entry:
+  %call = tail call %class.x* @_ZNK2ae2afEv(%class.ae* %this)
+  %call2 = tail call %class.v* @_ZN1x2acEv(%class.x* %call)
+  %0 = getelementptr inbounds %class.v, %class.v* %call2, i32 0, i32 0, i32 0
+  %1 = load i16, i16* %0, align 2
+  %2 = trunc i16 %1 to i9
+  %trunc = and i9 %2, -64
+  switch i9 %trunc, label %cleanup.fold.split [
+  i9 0, label %cleanup
+  i9 -256, label %if.then7
+  ]
+
+if.then7:
+  %3 = and i16 %1, 7
+  %tobool = icmp eq i16 %3, 0
+  %cond = select i1 %tobool, i32 2, i32 1
+  br label %cleanup
+
+cleanup.fold.split:
+  br label %cleanup
+
+cleanup:
+  %retval.0 = phi i32 [ %cond, %if.then7 ], [ 0, %entry ], [ 2, %cleanup.fold.split ]
+  ret i32 %retval.0
+}
--- a/llvm/test/Transforms/TypePromotion/ARM/wrapping.ll
+++ b/llvm/test/Transforms/TypePromotion/ARM/wrapping.ll
@ -0,0 +1,356 @@
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
+; RUN: opt -mtriple=arm -type-promotion -verify -disable-type-promotion=false -S %s -o - | FileCheck %s
+
+define zeroext i16 @overflow_add(i16 zeroext %a, i16 zeroext %b) {
+; CHECK-LABEL: @overflow_add(
+; CHECK-NEXT:    [[ADD:%.*]] = add i16 [[A:%.*]], [[B:%.*]]
+; CHECK-NEXT:    [[OR:%.*]] = or i16 [[ADD]], 1
+; CHECK-NEXT:    [[CMP:%.*]] = icmp ugt i16 [[OR]], 1024
+; CHECK-NEXT:    [[RES:%.*]] = select i1 [[CMP]], i16 2, i16 5
+; CHECK-NEXT:    ret i16 [[RES]]
+;
+  %add = add i16 %a, %b
+  %or = or i16 %add, 1
+  %cmp = icmp ugt i16 %or, 1024
+  %res = select i1 %cmp, i16 2, i16 5
+  ret i16 %res
+}
+
+define zeroext i16 @overflow_sub(i16 zeroext %a, i16 zeroext %b) {
+; CHECK-LABEL: @overflow_sub(
+; CHECK-NEXT:    [[ADD:%.*]] = sub i16 [[A:%.*]], [[B:%.*]]
+; CHECK-NEXT:    [[OR:%.*]] = or i16 [[ADD]], 1
+; CHECK-NEXT:    [[CMP:%.*]] = icmp ugt i16 [[OR]], 1024
+; CHECK-NEXT:    [[RES:%.*]] = select i1 [[CMP]], i16 2, i16 5
+; CHECK-NEXT:    ret i16 [[RES]]
+;
+  %add = sub i16 %a, %b
+  %or = or i16 %add, 1
+  %cmp = icmp ugt i16 %or, 1024
+  %res = select i1 %cmp, i16 2, i16 5
+  ret i16 %res
+}
+
+define zeroext i16 @overflow_mul(i16 zeroext %a, i16 zeroext %b) {
+; CHECK-LABEL: @overflow_mul(
+; CHECK-NEXT:    [[ADD:%.*]] = mul i16 [[A:%.*]], [[B:%.*]]
+; CHECK-NEXT:    [[OR:%.*]] = or i16 [[ADD]], 1
+; CHECK-NEXT:    [[CMP:%.*]] = icmp ugt i16 [[OR]], 1024
+; CHECK-NEXT:    [[RES:%.*]] = select i1 [[CMP]], i16 2, i16 5
+; CHECK-NEXT:    ret i16 [[RES]]
+;
+  %add = mul i16 %a, %b
+  %or = or i16 %add, 1
+  %cmp = icmp ugt i16 %or, 1024
+  %res = select i1 %cmp, i16 2, i16 5
+  ret i16 %res
+}
+
+define zeroext i16 @overflow_shl(i16 zeroext %a, i16 zeroext %b) {
+; CHECK-LABEL: @overflow_shl(
+; CHECK-NEXT:    [[ADD:%.*]] = shl i16 [[A:%.*]], [[B:%.*]]
+; CHECK-NEXT:    [[OR:%.*]] = or i16 [[ADD]], 1
+; CHECK-NEXT:    [[CMP:%.*]] = icmp ugt i16 [[OR]], 1024
+; CHECK-NEXT:    [[RES:%.*]] = select i1 [[CMP]], i16 2, i16 5
+; CHECK-NEXT:    ret i16 [[RES]]
+;
+  %add = shl i16 %a, %b
+  %or = or i16 %add, 1
+  %cmp = icmp ugt i16 %or, 1024
+  %res = select i1 %cmp, i16 2, i16 5
+  ret i16 %res
+}
+
+define i32 @overflow_add_no_consts(i8 zeroext %a, i8 zeroext %b, i8 zeroext %limit) {
+; CHECK-LABEL: @overflow_add_no_consts(
+; CHECK-NEXT:    [[ADD:%.*]] = add i8 [[A:%.*]], [[B:%.*]]
+; CHECK-NEXT:    [[CMP:%.*]] = icmp ugt i8 [[ADD]], [[LIMIT:%.*]]
+; CHECK-NEXT:    [[RES:%.*]] = select i1 [[CMP]], i32 8, i32 16
+; CHECK-NEXT:    ret i32 [[RES]]
+;
+  %add = add i8 %a, %b
+  %cmp = icmp ugt i8 %add, %limit
+  %res = select i1 %cmp, i32 8, i32 16
+  ret i32 %res
+}
+
+define i32 @overflow_add_const_limit(i8 zeroext %a, i8 zeroext %b) {
+; CHECK-LABEL: @overflow_add_const_limit(
+; CHECK-NEXT:    [[ADD:%.*]] = add i8 [[A:%.*]], [[B:%.*]]
+; CHECK-NEXT:    [[CMP:%.*]] = icmp ugt i8 [[ADD]], -128
+; CHECK-NEXT:    [[RES:%.*]] = select i1 [[CMP]], i32 8, i32 16
+; CHECK-NEXT:    ret i32 [[RES]]
+;
+  %add = add i8 %a, %b
+  %cmp = icmp ugt i8 %add, 128
+  %res = select i1 %cmp, i32 8, i32 16
+  ret i32 %res
+}
+
+define i32 @overflow_add_positive_const_limit(i8 zeroext %a) {
+; CHECK-LABEL: @overflow_add_positive_const_limit(
+; CHECK-NEXT:    [[ADD:%.*]] = add i8 [[A:%.*]], 1
+; CHECK-NEXT:    [[CMP:%.*]] = icmp ugt i8 [[ADD]], -128
+; CHECK-NEXT:    [[RES:%.*]] = select i1 [[CMP]], i32 8, i32 16
+; CHECK-NEXT:    ret i32 [[RES]]
+;
+  %add = add i8 %a, 1
+  %cmp = icmp ugt i8 %add, 128
+  %res = select i1 %cmp, i32 8, i32 16
+  ret i32 %res
+}
+
+define i32 @unsafe_add_underflow(i8 zeroext %a) {
+; CHECK-LABEL: @unsafe_add_underflow(
+; CHECK-NEXT:    [[ADD:%.*]] = add i8 [[A:%.*]], -2
+; CHECK-NEXT:    [[CMP:%.*]] = icmp ugt i8 [[ADD]], -2
+; CHECK-NEXT:    [[RES:%.*]] = select i1 [[CMP]], i32 8, i32 16
+; CHECK-NEXT:    ret i32 [[RES]]
+;
+  %add = add i8 %a, -2
+  %cmp = icmp ugt i8 %add, 254
+  %res = select i1 %cmp, i32 8, i32 16
+  ret i32 %res
+}
+
+define i32 @safe_add_underflow(i8 zeroext %a) {
+; CHECK-LABEL: @safe_add_underflow(
+; CHECK-NEXT:    [[TMP1:%.*]] = zext i8 [[A:%.*]] to i32
+; CHECK-NEXT:    [[TMP2:%.*]] = sub i32 [[TMP1]], 1
+; CHECK-NEXT:    [[CMP:%.*]] = icmp ugt i32 [[TMP2]], 254
+; CHECK-NEXT:    [[RES:%.*]] = select i1 [[CMP]], i32 8, i32 16
+; CHECK-NEXT:    ret i32 [[RES]]
+;
+  %add = add i8 %a, -1
+  %cmp = icmp ugt i8 %add, 254
+  %res = select i1 %cmp, i32 8, i32 16
+  ret i32 %res
+}
+
+define i32 @safe_add_underflow_neg(i8 zeroext %a) {
+; CHECK-LABEL: @safe_add_underflow_neg(
+; CHECK-NEXT:    [[TMP1:%.*]] = zext i8 [[A:%.*]] to i32
+; CHECK-NEXT:    [[TMP2:%.*]] = sub i32 [[TMP1]], 2
+; CHECK-NEXT:    [[CMP:%.*]] = icmp ule i32 [[TMP2]], 250
+; CHECK-NEXT:    [[RES:%.*]] = select i1 [[CMP]], i32 8, i32 16
+; CHECK-NEXT:    ret i32 [[RES]]
+;
+  %add = add i8 %a, -2
+  %cmp = icmp ule i8 %add, -6
+  %res = select i1 %cmp, i32 8, i32 16
+  ret i32 %res
+}
+
+define i32 @overflow_sub_negative_const_limit(i8 zeroext %a) {
+; CHECK-LABEL: @overflow_sub_negative_const_limit(
+; CHECK-NEXT:    [[SUB:%.*]] = sub i8 [[A:%.*]], -1
+; CHECK-NEXT:    [[CMP:%.*]] = icmp ugt i8 [[SUB]], -128
+; CHECK-NEXT:    [[RES:%.*]] = select i1 [[CMP]], i32 8, i32 16
+; CHECK-NEXT:    ret i32 [[RES]]
+;
+  %sub = sub i8 %a, -1
+  %cmp = icmp ugt i8 %sub, 128
+  %res = select i1 %cmp, i32 8, i32 16
+  ret i32 %res
+}
+
+define i32 @unsafe_sub_underflow(i8 zeroext %a) {
+; CHECK-LABEL: @unsafe_sub_underflow(
+; CHECK-NEXT:    [[SUB:%.*]] = sub i8 [[A:%.*]], 6
+; CHECK-NEXT:    [[CMP:%.*]] = icmp ugt i8 [[SUB]], -6
+; CHECK-NEXT:    [[RES:%.*]] = select i1 [[CMP]], i32 8, i32 16
+; CHECK-NEXT:    ret i32 [[RES]]
+;
+  %sub = sub i8 %a, 6
+  %cmp = icmp ugt i8 %sub, 250
+  %res = select i1 %cmp, i32 8, i32 16
+  ret i32 %res
+}
+
+define i32 @safe_sub_underflow(i8 zeroext %a) {
+; CHECK-LABEL: @safe_sub_underflow(
+; CHECK-NEXT:    [[TMP1:%.*]] = zext i8 [[A:%.*]] to i32
+; CHECK-NEXT:    [[SUB:%.*]] = sub i32 [[TMP1]], 1
+; CHECK-NEXT:    [[CMP:%.*]] = icmp ule i32 [[SUB]], 254
+; CHECK-NEXT:    [[RES:%.*]] = select i1 [[CMP]], i32 8, i32 16
+; CHECK-NEXT:    ret i32 [[RES]]
+;
+  %sub = sub i8 %a, 1
+  %cmp = icmp ule i8 %sub, 254
+  %res = select i1 %cmp, i32 8, i32 16
+  ret i32 %res
+}
+
+define i32 @safe_sub_underflow_neg(i8 zeroext %a) {
+; CHECK-LABEL: @safe_sub_underflow_neg(
+; CHECK-NEXT:    [[TMP1:%.*]] = zext i8 [[A:%.*]] to i32
+; CHECK-NEXT:    [[SUB:%.*]] = sub i32 [[TMP1]], 4
+; CHECK-NEXT:    [[CMP:%.*]] = icmp uge i32 [[SUB]], 251
+; CHECK-NEXT:    [[RES:%.*]] = select i1 [[CMP]], i32 8, i32 16
+; CHECK-NEXT:    ret i32 [[RES]]
+;
+  %sub = sub i8 %a, 4
+  %cmp = icmp uge i8 %sub, -5
+  %res = select i1 %cmp, i32 8, i32 16
+  ret i32 %res
+}
+
+define i32 @unsafe_sub_underflow_neg(i8 zeroext %a) {
+; CHECK-LABEL: @unsafe_sub_underflow_neg(
+; CHECK-NEXT:    [[SUB:%.*]] = sub i8 [[A:%.*]], 4
+; CHECK-NEXT:    [[CMP:%.*]] = icmp ult i8 [[SUB]], -3
+; CHECK-NEXT:    [[RES:%.*]] = select i1 [[CMP]], i32 8, i32 16
+; CHECK-NEXT:    ret i32 [[RES]]
+;
+  %sub = sub i8 %a, 4
+  %cmp = icmp ult i8 %sub, -3
+  %res = select i1 %cmp, i32 8, i32 16
+  ret i32 %res
+}
+
+define i32 @safe_sub_imm_var(i8* %b) {
+; CHECK-LABEL: @safe_sub_imm_var(
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    [[TMP0:%.*]] = load i8, i8* [[B:%.*]], align 1
+; CHECK-NEXT:    [[TMP1:%.*]] = zext i8 [[TMP0]] to i32
+; CHECK-NEXT:    [[SUB:%.*]] = sub nuw nsw i32 248, [[TMP1]]
+; CHECK-NEXT:    [[CMP:%.*]] = icmp ugt i32 [[SUB]], 252
+; CHECK-NEXT:    [[CONV4:%.*]] = zext i1 [[CMP]] to i32
+; CHECK-NEXT:    ret i32 [[CONV4]]
+;
+entry:
+  %0 = load i8, i8* %b, align 1
+  %sub = sub nuw nsw i8 -8, %0
+  %cmp = icmp ugt i8 %sub, 252
+  %conv4 = zext i1 %cmp to i32
+  ret i32 %conv4
+}
+
+define i32 @safe_sub_var_imm(i8* %b) {
+; CHECK-LABEL: @safe_sub_var_imm(
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    [[TMP0:%.*]] = load i8, i8* [[B:%.*]], align 1
+; CHECK-NEXT:    [[TMP1:%.*]] = zext i8 [[TMP0]] to i32
+; CHECK-NEXT:    [[SUB:%.*]] = sub nuw nsw i32 [[TMP1]], 248
+; CHECK-NEXT:    [[CMP:%.*]] = icmp ugt i32 [[SUB]], 252
+; CHECK-NEXT:    [[CONV4:%.*]] = zext i1 [[CMP]] to i32
+; CHECK-NEXT:    ret i32 [[CONV4]]
+;
+entry:
+  %0 = load i8, i8* %b, align 1
+  %sub = sub nuw nsw i8 %0, -8
+  %cmp = icmp ugt i8 %sub, 252
+  %conv4 = zext i1 %cmp to i32
+  ret i32 %conv4
+}
+
+define i32 @safe_add_imm_var(i8* %b) {
+; CHECK-LABEL: @safe_add_imm_var(
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    [[TMP0:%.*]] = load i8, i8* [[B:%.*]], align 1
+; CHECK-NEXT:    [[TMP1:%.*]] = zext i8 [[TMP0]] to i32
+; CHECK-NEXT:    [[ADD:%.*]] = add nuw nsw i32 129, [[TMP1]]
+; CHECK-NEXT:    [[CMP:%.*]] = icmp ugt i32 [[ADD]], 127
+; CHECK-NEXT:    [[CONV4:%.*]] = zext i1 [[CMP]] to i32
+; CHECK-NEXT:    ret i32 [[CONV4]]
+;
+entry:
+  %0 = load i8, i8* %b, align 1
+  %add = add nuw nsw i8 -127, %0
+  %cmp = icmp ugt i8 %add, 127
+  %conv4 = zext i1 %cmp to i32
+  ret i32 %conv4
+}
+
+define i32 @safe_add_var_imm(i8* %b) {
+; CHECK-LABEL: @safe_add_var_imm(
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    [[TMP0:%.*]] = load i8, i8* [[B:%.*]], align 1
+; CHECK-NEXT:    [[TMP1:%.*]] = zext i8 [[TMP0]] to i32
+; CHECK-NEXT:    [[ADD:%.*]] = add nuw nsw i32 [[TMP1]], 129
+; CHECK-NEXT:    [[CMP:%.*]] = icmp ugt i32 [[ADD]], 127
+; CHECK-NEXT:    [[CONV4:%.*]] = zext i1 [[CMP]] to i32
+; CHECK-NEXT:    ret i32 [[CONV4]]
+;
+entry:
+  %0 = load i8, i8* %b, align 1
+  %add = add nuw nsw i8 %0, -127
+  %cmp = icmp ugt i8 %add, 127
+  %conv4 = zext i1 %cmp to i32
+  ret i32 %conv4
+}
+
+define i8 @convert_add_order(i8 zeroext %arg) {
+; CHECK-LABEL: @convert_add_order(
+; CHECK-NEXT:    [[TMP1:%.*]] = zext i8 [[ARG:%.*]] to i32
+; CHECK-NEXT:    [[MASK_0:%.*]] = and i32 [[TMP1]], 1
+; CHECK-NEXT:    [[MASK_1:%.*]] = and i32 [[TMP1]], 2
+; CHECK-NEXT:    [[SHL:%.*]] = or i32 [[TMP1]], 1
+; CHECK-NEXT:    [[ADD:%.*]] = add nuw i32 [[SHL]], 10
+; CHECK-NEXT:    [[CMP_0:%.*]] = icmp ult i32 [[ADD]], 60
+; CHECK-NEXT:    [[TMP2:%.*]] = sub nsw i32 [[SHL]], 40
+; CHECK-NEXT:    [[CMP_1:%.*]] = icmp ult i32 [[TMP2]], 20
+; CHECK-NEXT:    [[MASK_SEL:%.*]] = select i1 [[CMP_1]], i32 [[MASK_0]], i32 [[MASK_1]]
+; CHECK-NEXT:    [[RES:%.*]] = select i1 [[CMP_0]], i32 [[MASK_SEL]], i32 [[TMP1]]
+; CHECK-NEXT:    [[TMP3:%.*]] = trunc i32 [[RES]] to i8
+; CHECK-NEXT:    ret i8 [[TMP3]]
+;
+  %mask.0 = and i8 %arg, 1
+  %mask.1 = and i8 %arg, 2
+  %shl = or i8 %arg, 1
+  %add = add nuw i8 %shl, 10
+  %cmp.0 = icmp ult i8 %add, 60
+  %sub = add nsw i8 %shl, -40
+  %cmp.1 = icmp ult i8 %sub, 20
+  %mask.sel = select i1 %cmp.1, i8 %mask.0, i8 %mask.1
+  %res = select i1 %cmp.0, i8 %mask.sel, i8 %arg
+  ret i8 %res
+}
+
+define i8 @underflow_if_sub(i32 %arg, i8 zeroext %arg1) {
+; CHECK-LABEL: @underflow_if_sub(
+; CHECK-NEXT:    [[TMP1:%.*]] = zext i8 [[ARG1:%.*]] to i32
+; CHECK-NEXT:    [[CMP:%.*]] = icmp sgt i32 [[ARG:%.*]], 0
+; CHECK-NEXT:    [[CONV:%.*]] = zext i1 [[CMP]] to i32
+; CHECK-NEXT:    [[AND:%.*]] = and i32 [[ARG]], [[CONV]]
+; CHECK-NEXT:    [[TRUNC:%.*]] = trunc i32 [[AND]] to i8
+; CHECK-NEXT:    [[TMP2:%.*]] = zext i8 [[TRUNC]] to i32
+; CHECK-NEXT:    [[CONV1:%.*]] = add nuw nsw i32 [[TMP2]], 245
+; CHECK-NEXT:    [[CMP_1:%.*]] = icmp ult i32 [[CONV1]], [[TMP1]]
+; CHECK-NEXT:    [[RES:%.*]] = select i1 [[CMP_1]], i32 [[CONV1]], i32 100
+; CHECK-NEXT:    [[TMP3:%.*]] = trunc i32 [[RES]] to i8
+; CHECK-NEXT:    ret i8 [[TMP3]]
+;
+  %cmp = icmp sgt i32 %arg, 0
+  %conv = zext i1 %cmp to i32
+  %and = and i32 %arg, %conv
+  %trunc = trunc i32 %and to i8
+  %conv1 = add nuw nsw i8 %trunc, -11
+  %cmp.1 = icmp ult i8 %conv1, %arg1
+  %res = select i1 %cmp.1, i8 %conv1, i8 100
+  ret i8 %res
+}
+
+define i8 @underflow_if_sub_signext(i32 %arg, i8 signext %arg1) {
+; CHECK-LABEL: @underflow_if_sub_signext(
+; CHECK-NEXT:    [[TMP1:%.*]] = zext i8 [[ARG1:%.*]] to i32
+; CHECK-NEXT:    [[CMP:%.*]] = icmp sgt i32 [[ARG:%.*]], 0
+; CHECK-NEXT:    [[CONV:%.*]] = zext i1 [[CMP]] to i32
+; CHECK-NEXT:    [[AND:%.*]] = and i32 [[ARG]], [[CONV]]
+; CHECK-NEXT:    [[TRUNC:%.*]] = trunc i32 [[AND]] to i8
+; CHECK-NEXT:    [[TMP2:%.*]] = zext i8 [[TRUNC]] to i32
+; CHECK-NEXT:    [[CONV1:%.*]] = add nuw nsw i32 [[TMP2]], 245
+; CHECK-NEXT:    [[CMP_1:%.*]] = icmp ugt i32 [[TMP1]], [[CONV1]]
+; CHECK-NEXT:    [[RES:%.*]] = select i1 [[CMP_1]], i32 [[CONV1]], i32 100
+; CHECK-NEXT:    [[TMP3:%.*]] = trunc i32 [[RES]] to i8
+; CHECK-NEXT:    ret i8 [[TMP3]]
+;
+  %cmp = icmp sgt i32 %arg, 0
+  %conv = zext i1 %cmp to i32
+  %and = and i32 %arg, %conv
+  %trunc = trunc i32 %and to i8
+  %conv1 = add nuw nsw i8 %trunc, -11
+  %cmp.1 = icmp ugt i8 %arg1, %conv1
+  %res = select i1 %cmp.1, i8 %conv1, i8 100
+  ret i8 %res
+}
--- a/llvm/tools/opt/opt.cpp
+++ b/llvm/tools/opt/opt.cpp
@ -562,6 +562,7 @@ int main(int argc, char **argv) {
  initializeWasmEHPreparePass(Registry);
  initializeWriteBitcodePassPass(Registry);
  initializeHardwareLoopsPass(Registry);
+  initializeTypePromotionPass(Registry);

 #ifdef LINK_POLLY_INTO_TOOLS
  polly::initializePollyPasses(Registry);