[Hexagon] Remove support for V4

llvm-svn: 344791
2018-10-19 17:31:11 +00:00 · 2018-10-19 17:31:11 +00:00 · 6bfc6577f2
parent ce3f1915f3
commit 6bfc6577f2
54 changed files with 1822 additions and 2036 deletions
--- a/llvm/lib/Target/Hexagon/CMakeLists.txt
+++ b/llvm/lib/Target/Hexagon/CMakeLists.txt
@ -73,4 +73,3 @@ add_subdirectory(AsmParser)
 add_subdirectory(Disassembler)
 add_subdirectory(MCTargetDesc)
 add_subdirectory(TargetInfo)
-
--- a/llvm/lib/Target/Hexagon/Hexagon.td
+++ b/llvm/lib/Target/Hexagon/Hexagon.td
@ -323,31 +323,27 @@ class Proc<string Name, SchedMachineModel Model,
 : ProcessorModel<Name, Model, Features>;

 def : Proc<"generic", HexagonModelV60,
-           [ArchV4, ArchV5, ArchV55, ArchV60,
+           [ArchV5, ArchV55, ArchV60,
            FeatureDuplex, FeatureMemops, FeatureNVJ, FeatureNVS,
            FeaturePackets, FeatureSmallData]>;
-def : Proc<"hexagonv4",  HexagonModelV4,
-           [ArchV4,
-            FeatureDuplex, FeatureMemops, FeatureNVJ, FeatureNVS,
-            FeaturePackets, FeatureSmallData]>;
-def : Proc<"hexagonv5",  HexagonModelV4,
-           [ArchV4, ArchV5,
+def : Proc<"hexagonv5",  HexagonModelV5,
+           [ArchV5,
            FeatureDuplex, FeatureMemops, FeatureNVJ, FeatureNVS,
            FeaturePackets, FeatureSmallData]>;
 def : Proc<"hexagonv55", HexagonModelV55,
-           [ArchV4, ArchV5, ArchV55,
+           [ArchV5, ArchV55,
            FeatureDuplex, FeatureMemops, FeatureNVJ, FeatureNVS,
            FeaturePackets, FeatureSmallData]>;
 def : Proc<"hexagonv60", HexagonModelV60,
-           [ArchV4, ArchV5, ArchV55, ArchV60,
+           [ArchV5, ArchV55, ArchV60,
            FeatureDuplex, FeatureMemops, FeatureNVJ, FeatureNVS,
            FeaturePackets, FeatureSmallData]>;
 def : Proc<"hexagonv62", HexagonModelV62,
-           [ArchV4, ArchV5, ArchV55, ArchV60, ArchV62,
+           [ArchV5, ArchV55, ArchV60, ArchV62,
            FeatureDuplex, FeatureMemops, FeatureNVJ, FeatureNVS,
            FeaturePackets, FeatureSmallData]>;
 def : Proc<"hexagonv65", HexagonModelV65,
-           [ArchV4, ArchV5, ArchV55, ArchV60, ArchV62, ArchV65,
+           [ArchV5, ArchV55, ArchV60, ArchV62, ArchV65,
            FeatureDuplex, FeatureMemNoShuf, FeatureMemops, FeatureNVJ,
            FeatureNVS, FeaturePackets, FeatureSmallData]>;

--- a/llvm/lib/Target/Hexagon/HexagonCopyToCombine.cpp
+++ b/llvm/lib/Target/Hexagon/HexagonCopyToCombine.cpp
@ -555,8 +555,7 @@ MachineInstr *HexagonCopyToCombine::findPairable(MachineInstr &I1,
    if ((!IsI1LowReg && !IsI2LowReg) || !isEvenReg(FirstRegIndex))
      continue;

-    // Check that the two instructions are combinable. V4 allows more
-    // instructions to be merged into a combine.
+    // Check that the two instructions are combinable.
    // The order matters because in a A2_tfrsi we might can encode a int8 as
    // the hi reg operand but only a uint6 as the low reg operand.
    if ((IsI2LowReg && !areCombinableOperations(TRI, I1, *I2, AllowC64)) ||
--- a/llvm/lib/Target/Hexagon/HexagonDepArch.h
+++ b/llvm/lib/Target/Hexagon/HexagonDepArch.h
@ -15,7 +15,7 @@
 #define HEXAGON_DEP_ARCH_H
 namespace llvm {
 namespace Hexagon {
-enum class ArchEnum { V4,V5,V55,V60,V62,V65 };
+enum class ArchEnum { NoArch,Generic,V5,V55,V60,V62,V65 };
 } // namespace Hexagon
 } // namespace llvm;
 #endif // HEXAGON_DEP_ARCH_H
--- a/llvm/lib/Target/Hexagon/HexagonDepArch.td
+++ b/llvm/lib/Target/Hexagon/HexagonDepArch.td
@ -18,7 +18,4 @@ def ArchV60: SubtargetFeature<"v60", "HexagonArchVersion", "Hexagon::ArchEnum::V
 def HasV60 : Predicate<"HST->hasV60Ops()">, AssemblerPredicate<"ArchV60">;
 def ArchV55: SubtargetFeature<"v55", "HexagonArchVersion", "Hexagon::ArchEnum::V55", "Enable Hexagon V55 architecture">;
 def HasV55 : Predicate<"HST->hasV55Ops()">, AssemblerPredicate<"ArchV55">;
-def ArchV4: SubtargetFeature<"v4", "HexagonArchVersion", "Hexagon::ArchEnum::V4", "Enable Hexagon V4 architecture">;
-def HasV4 : Predicate<"HST->hasV4Ops()">, AssemblerPredicate<"ArchV4">;
 def ArchV5: SubtargetFeature<"v5", "HexagonArchVersion", "Hexagon::ArchEnum::V5", "Enable Hexagon V5 architecture">;
-def HasV5 : Predicate<"HST->hasV5Ops()">, AssemblerPredicate<"ArchV5">;
--- a/llvm/lib/Target/Hexagon/HexagonDepInstrInfo.td
+++ b/llvm/lib/Target/Hexagon/HexagonDepInstrInfo.td
@ -991,7 +991,7 @@ def A2_roundsat : HInst<
 (outs IntRegs:$Rd32),
 (ins DoubleRegs:$Rss32),
 "$Rd32 = round($Rss32):sat",
-tc_c2f7d806, TypeS_2op>, Enc_90cd8b, Requires<[HasV5]> {
+tc_c2f7d806, TypeS_2op>, Enc_90cd8b {
 let Inst{13-5} = 0b000000001;
 let Inst{31-21} = 0b10001000110;
 let hasNewValue = 1;
@ -3314,7 +3314,7 @@ def A5_vaddhubs : HInst<
 (outs IntRegs:$Rd32),
 (ins DoubleRegs:$Rss32, DoubleRegs:$Rtt32),
 "$Rd32 = vaddhub($Rss32,$Rtt32):sat",
-tc_2b6f77c6, TypeS_3op>, Enc_d2216a, Requires<[HasV5]> {
+tc_2b6f77c6, TypeS_3op>, Enc_d2216a {
 let Inst{7-5} = 0b001;
 let Inst{13-13} = 0b0;
 let Inst{31-21} = 0b11000001010;
@ -4059,7 +4059,7 @@ def F2_conv_d2df : HInst<
 (outs DoubleRegs:$Rdd32),
 (ins DoubleRegs:$Rss32),
 "$Rdd32 = convert_d2df($Rss32)",
-tc_f3eaa14b, TypeS_2op>, Enc_b9c5fb, Requires<[HasV5]> {
+tc_f3eaa14b, TypeS_2op>, Enc_b9c5fb {
 let Inst{13-5} = 0b000000011;
 let Inst{31-21} = 0b10000000111;
 let isFP = 1;
@ -4069,7 +4069,7 @@ def F2_conv_d2sf : HInst<
 (outs IntRegs:$Rd32),
 (ins DoubleRegs:$Rss32),
 "$Rd32 = convert_d2sf($Rss32)",
-tc_f3eaa14b, TypeS_2op>, Enc_90cd8b, Requires<[HasV5]> {
+tc_f3eaa14b, TypeS_2op>, Enc_90cd8b {
 let Inst{13-5} = 0b000000001;
 let Inst{31-21} = 0b10001000010;
 let hasNewValue = 1;
@ -4081,7 +4081,7 @@ def F2_conv_df2d : HInst<
 (outs DoubleRegs:$Rdd32),
 (ins DoubleRegs:$Rss32),
 "$Rdd32 = convert_df2d($Rss32)",
-tc_f3eaa14b, TypeS_2op>, Enc_b9c5fb, Requires<[HasV5]> {
+tc_f3eaa14b, TypeS_2op>, Enc_b9c5fb {
 let Inst{13-5} = 0b000000000;
 let Inst{31-21} = 0b10000000111;
 let isFP = 1;
@ -4091,7 +4091,7 @@ def F2_conv_df2d_chop : HInst<
 (outs DoubleRegs:$Rdd32),
 (ins DoubleRegs:$Rss32),
 "$Rdd32 = convert_df2d($Rss32):chop",
-tc_f3eaa14b, TypeS_2op>, Enc_b9c5fb, Requires<[HasV5]> {
+tc_f3eaa14b, TypeS_2op>, Enc_b9c5fb {
 let Inst{13-5} = 0b000000110;
 let Inst{31-21} = 0b10000000111;
 let isFP = 1;
@ -4101,7 +4101,7 @@ def F2_conv_df2sf : HInst<
 (outs IntRegs:$Rd32),
 (ins DoubleRegs:$Rss32),
 "$Rd32 = convert_df2sf($Rss32)",
-tc_f3eaa14b, TypeS_2op>, Enc_90cd8b, Requires<[HasV5]> {
+tc_f3eaa14b, TypeS_2op>, Enc_90cd8b {
 let Inst{13-5} = 0b000000001;
 let Inst{31-21} = 0b10001000000;
 let hasNewValue = 1;
@ -4113,7 +4113,7 @@ def F2_conv_df2ud : HInst<
 (outs DoubleRegs:$Rdd32),
 (ins DoubleRegs:$Rss32),
 "$Rdd32 = convert_df2ud($Rss32)",
-tc_f3eaa14b, TypeS_2op>, Enc_b9c5fb, Requires<[HasV5]> {
+tc_f3eaa14b, TypeS_2op>, Enc_b9c5fb {
 let Inst{13-5} = 0b000000001;
 let Inst{31-21} = 0b10000000111;
 let isFP = 1;
@ -4123,7 +4123,7 @@ def F2_conv_df2ud_chop : HInst<
 (outs DoubleRegs:$Rdd32),
 (ins DoubleRegs:$Rss32),
 "$Rdd32 = convert_df2ud($Rss32):chop",
-tc_f3eaa14b, TypeS_2op>, Enc_b9c5fb, Requires<[HasV5]> {
+tc_f3eaa14b, TypeS_2op>, Enc_b9c5fb {
 let Inst{13-5} = 0b000000111;
 let Inst{31-21} = 0b10000000111;
 let isFP = 1;
@ -4133,7 +4133,7 @@ def F2_conv_df2uw : HInst<
 (outs IntRegs:$Rd32),
 (ins DoubleRegs:$Rss32),
 "$Rd32 = convert_df2uw($Rss32)",
-tc_f3eaa14b, TypeS_2op>, Enc_90cd8b, Requires<[HasV5]> {
+tc_f3eaa14b, TypeS_2op>, Enc_90cd8b {
 let Inst{13-5} = 0b000000001;
 let Inst{31-21} = 0b10001000011;
 let hasNewValue = 1;
@ -4145,7 +4145,7 @@ def F2_conv_df2uw_chop : HInst<
 (outs IntRegs:$Rd32),
 (ins DoubleRegs:$Rss32),
 "$Rd32 = convert_df2uw($Rss32):chop",
-tc_f3eaa14b, TypeS_2op>, Enc_90cd8b, Requires<[HasV5]> {
+tc_f3eaa14b, TypeS_2op>, Enc_90cd8b {
 let Inst{13-5} = 0b000000001;
 let Inst{31-21} = 0b10001000101;
 let hasNewValue = 1;
@ -4157,7 +4157,7 @@ def F2_conv_df2w : HInst<
 (outs IntRegs:$Rd32),
 (ins DoubleRegs:$Rss32),
 "$Rd32 = convert_df2w($Rss32)",
-tc_f3eaa14b, TypeS_2op>, Enc_90cd8b, Requires<[HasV5]> {
+tc_f3eaa14b, TypeS_2op>, Enc_90cd8b {
 let Inst{13-5} = 0b000000001;
 let Inst{31-21} = 0b10001000100;
 let hasNewValue = 1;
@ -4169,7 +4169,7 @@ def F2_conv_df2w_chop : HInst<
 (outs IntRegs:$Rd32),
 (ins DoubleRegs:$Rss32),
 "$Rd32 = convert_df2w($Rss32):chop",
-tc_f3eaa14b, TypeS_2op>, Enc_90cd8b, Requires<[HasV5]> {
+tc_f3eaa14b, TypeS_2op>, Enc_90cd8b {
 let Inst{13-5} = 0b000000001;
 let Inst{31-21} = 0b10001000111;
 let hasNewValue = 1;
@ -4181,7 +4181,7 @@ def F2_conv_sf2d : HInst<
 (outs DoubleRegs:$Rdd32),
 (ins IntRegs:$Rs32),
 "$Rdd32 = convert_sf2d($Rs32)",
-tc_f3eaa14b, TypeS_2op>, Enc_3a3d62, Requires<[HasV5]> {
+tc_f3eaa14b, TypeS_2op>, Enc_3a3d62 {
 let Inst{13-5} = 0b000000100;
 let Inst{31-21} = 0b10000100100;
 let isFP = 1;
@ -4191,7 +4191,7 @@ def F2_conv_sf2d_chop : HInst<
 (outs DoubleRegs:$Rdd32),
 (ins IntRegs:$Rs32),
 "$Rdd32 = convert_sf2d($Rs32):chop",
-tc_f3eaa14b, TypeS_2op>, Enc_3a3d62, Requires<[HasV5]> {
+tc_f3eaa14b, TypeS_2op>, Enc_3a3d62 {
 let Inst{13-5} = 0b000000110;
 let Inst{31-21} = 0b10000100100;
 let isFP = 1;
@ -4201,7 +4201,7 @@ def F2_conv_sf2df : HInst<
 (outs DoubleRegs:$Rdd32),
 (ins IntRegs:$Rs32),
 "$Rdd32 = convert_sf2df($Rs32)",
-tc_f3eaa14b, TypeS_2op>, Enc_3a3d62, Requires<[HasV5]> {
+tc_f3eaa14b, TypeS_2op>, Enc_3a3d62 {
 let Inst{13-5} = 0b000000000;
 let Inst{31-21} = 0b10000100100;
 let isFP = 1;
@ -4211,7 +4211,7 @@ def F2_conv_sf2ud : HInst<
 (outs DoubleRegs:$Rdd32),
 (ins IntRegs:$Rs32),
 "$Rdd32 = convert_sf2ud($Rs32)",
-tc_f3eaa14b, TypeS_2op>, Enc_3a3d62, Requires<[HasV5]> {
+tc_f3eaa14b, TypeS_2op>, Enc_3a3d62 {
 let Inst{13-5} = 0b000000011;
 let Inst{31-21} = 0b10000100100;
 let isFP = 1;
@ -4221,7 +4221,7 @@ def F2_conv_sf2ud_chop : HInst<
 (outs DoubleRegs:$Rdd32),
 (ins IntRegs:$Rs32),
 "$Rdd32 = convert_sf2ud($Rs32):chop",
-tc_f3eaa14b, TypeS_2op>, Enc_3a3d62, Requires<[HasV5]> {
+tc_f3eaa14b, TypeS_2op>, Enc_3a3d62 {
 let Inst{13-5} = 0b000000101;
 let Inst{31-21} = 0b10000100100;
 let isFP = 1;
@ -4231,7 +4231,7 @@ def F2_conv_sf2uw : HInst<
 (outs IntRegs:$Rd32),
 (ins IntRegs:$Rs32),
 "$Rd32 = convert_sf2uw($Rs32)",
-tc_f3eaa14b, TypeS_2op>, Enc_5e2823, Requires<[HasV5]> {
+tc_f3eaa14b, TypeS_2op>, Enc_5e2823 {
 let Inst{13-5} = 0b000000000;
 let Inst{31-21} = 0b10001011011;
 let hasNewValue = 1;
@ -4243,7 +4243,7 @@ def F2_conv_sf2uw_chop : HInst<
 (outs IntRegs:$Rd32),
 (ins IntRegs:$Rs32),
 "$Rd32 = convert_sf2uw($Rs32):chop",
-tc_f3eaa14b, TypeS_2op>, Enc_5e2823, Requires<[HasV5]> {
+tc_f3eaa14b, TypeS_2op>, Enc_5e2823 {
 let Inst{13-5} = 0b000000001;
 let Inst{31-21} = 0b10001011011;
 let hasNewValue = 1;
@ -4255,7 +4255,7 @@ def F2_conv_sf2w : HInst<
 (outs IntRegs:$Rd32),
 (ins IntRegs:$Rs32),
 "$Rd32 = convert_sf2w($Rs32)",
-tc_f3eaa14b, TypeS_2op>, Enc_5e2823, Requires<[HasV5]> {
+tc_f3eaa14b, TypeS_2op>, Enc_5e2823 {
 let Inst{13-5} = 0b000000000;
 let Inst{31-21} = 0b10001011100;
 let hasNewValue = 1;
@ -4267,7 +4267,7 @@ def F2_conv_sf2w_chop : HInst<
 (outs IntRegs:$Rd32),
 (ins IntRegs:$Rs32),
 "$Rd32 = convert_sf2w($Rs32):chop",
-tc_f3eaa14b, TypeS_2op>, Enc_5e2823, Requires<[HasV5]> {
+tc_f3eaa14b, TypeS_2op>, Enc_5e2823 {
 let Inst{13-5} = 0b000000001;
 let Inst{31-21} = 0b10001011100;
 let hasNewValue = 1;
@ -4279,7 +4279,7 @@ def F2_conv_ud2df : HInst<
 (outs DoubleRegs:$Rdd32),
 (ins DoubleRegs:$Rss32),
 "$Rdd32 = convert_ud2df($Rss32)",
-tc_f3eaa14b, TypeS_2op>, Enc_b9c5fb, Requires<[HasV5]> {
+tc_f3eaa14b, TypeS_2op>, Enc_b9c5fb {
 let Inst{13-5} = 0b000000010;
 let Inst{31-21} = 0b10000000111;
 let isFP = 1;
@ -4289,7 +4289,7 @@ def F2_conv_ud2sf : HInst<
 (outs IntRegs:$Rd32),
 (ins DoubleRegs:$Rss32),
 "$Rd32 = convert_ud2sf($Rss32)",
-tc_f3eaa14b, TypeS_2op>, Enc_90cd8b, Requires<[HasV5]> {
+tc_f3eaa14b, TypeS_2op>, Enc_90cd8b {
 let Inst{13-5} = 0b000000001;
 let Inst{31-21} = 0b10001000001;
 let hasNewValue = 1;
@ -4301,7 +4301,7 @@ def F2_conv_uw2df : HInst<
 (outs DoubleRegs:$Rdd32),
 (ins IntRegs:$Rs32),
 "$Rdd32 = convert_uw2df($Rs32)",
-tc_f3eaa14b, TypeS_2op>, Enc_3a3d62, Requires<[HasV5]> {
+tc_f3eaa14b, TypeS_2op>, Enc_3a3d62 {
 let Inst{13-5} = 0b000000001;
 let Inst{31-21} = 0b10000100100;
 let isFP = 1;
@ -4311,7 +4311,7 @@ def F2_conv_uw2sf : HInst<
 (outs IntRegs:$Rd32),
 (ins IntRegs:$Rs32),
 "$Rd32 = convert_uw2sf($Rs32)",
-tc_f3eaa14b, TypeS_2op>, Enc_5e2823, Requires<[HasV5]> {
+tc_f3eaa14b, TypeS_2op>, Enc_5e2823 {
 let Inst{13-5} = 0b000000000;
 let Inst{31-21} = 0b10001011001;
 let hasNewValue = 1;
@ -4323,7 +4323,7 @@ def F2_conv_w2df : HInst<
 (outs DoubleRegs:$Rdd32),
 (ins IntRegs:$Rs32),
 "$Rdd32 = convert_w2df($Rs32)",
-tc_f3eaa14b, TypeS_2op>, Enc_3a3d62, Requires<[HasV5]> {
+tc_f3eaa14b, TypeS_2op>, Enc_3a3d62 {
 let Inst{13-5} = 0b000000010;
 let Inst{31-21} = 0b10000100100;
 let isFP = 1;
@ -4333,7 +4333,7 @@ def F2_conv_w2sf : HInst<
 (outs IntRegs:$Rd32),
 (ins IntRegs:$Rs32),
 "$Rd32 = convert_w2sf($Rs32)",
-tc_f3eaa14b, TypeS_2op>, Enc_5e2823, Requires<[HasV5]> {
+tc_f3eaa14b, TypeS_2op>, Enc_5e2823 {
 let Inst{13-5} = 0b000000000;
 let Inst{31-21} = 0b10001011010;
 let hasNewValue = 1;
@ -4345,7 +4345,7 @@ def F2_dfclass : HInst<
 (outs PredRegs:$Pd4),
 (ins DoubleRegs:$Rss32, u5_0Imm:$Ii),
 "$Pd4 = dfclass($Rss32,#$Ii)",
-tc_7a830544, TypeALU64>, Enc_1f19b5, Requires<[HasV5]> {
+tc_7a830544, TypeALU64>, Enc_1f19b5 {
 let Inst{4-2} = 0b100;
 let Inst{13-10} = 0b0000;
 let Inst{31-21} = 0b11011100100;
@ -4356,7 +4356,7 @@ def F2_dfcmpeq : HInst<
 (outs PredRegs:$Pd4),
 (ins DoubleRegs:$Rss32, DoubleRegs:$Rtt32),
 "$Pd4 = dfcmp.eq($Rss32,$Rtt32)",
-tc_1e856f58, TypeALU64>, Enc_fcf7a7, Requires<[HasV5]> {
+tc_1e856f58, TypeALU64>, Enc_fcf7a7 {
 let Inst{7-2} = 0b000000;
 let Inst{13-13} = 0b0;
 let Inst{31-21} = 0b11010010111;
@ -4368,7 +4368,7 @@ def F2_dfcmpge : HInst<
 (outs PredRegs:$Pd4),
 (ins DoubleRegs:$Rss32, DoubleRegs:$Rtt32),
 "$Pd4 = dfcmp.ge($Rss32,$Rtt32)",
-tc_1e856f58, TypeALU64>, Enc_fcf7a7, Requires<[HasV5]> {
+tc_1e856f58, TypeALU64>, Enc_fcf7a7 {
 let Inst{7-2} = 0b010000;
 let Inst{13-13} = 0b0;
 let Inst{31-21} = 0b11010010111;
@ -4380,7 +4380,7 @@ def F2_dfcmpgt : HInst<
 (outs PredRegs:$Pd4),
 (ins DoubleRegs:$Rss32, DoubleRegs:$Rtt32),
 "$Pd4 = dfcmp.gt($Rss32,$Rtt32)",
-tc_1e856f58, TypeALU64>, Enc_fcf7a7, Requires<[HasV5]> {
+tc_1e856f58, TypeALU64>, Enc_fcf7a7 {
 let Inst{7-2} = 0b001000;
 let Inst{13-13} = 0b0;
 let Inst{31-21} = 0b11010010111;
@ -4392,7 +4392,7 @@ def F2_dfcmpuo : HInst<
 (outs PredRegs:$Pd4),
 (ins DoubleRegs:$Rss32, DoubleRegs:$Rtt32),
 "$Pd4 = dfcmp.uo($Rss32,$Rtt32)",
-tc_1e856f58, TypeALU64>, Enc_fcf7a7, Requires<[HasV5]> {
+tc_1e856f58, TypeALU64>, Enc_fcf7a7 {
 let Inst{7-2} = 0b011000;
 let Inst{13-13} = 0b0;
 let Inst{31-21} = 0b11010010111;
@ -4404,7 +4404,7 @@ def F2_dfimm_n : HInst<
 (outs DoubleRegs:$Rdd32),
 (ins u10_0Imm:$Ii),
 "$Rdd32 = dfmake(#$Ii):neg",
-tc_234a11a5, TypeALU64>, Enc_e6c957, Requires<[HasV5]> {
+tc_234a11a5, TypeALU64>, Enc_e6c957 {
 let Inst{20-16} = 0b00000;
 let Inst{31-22} = 0b1101100101;
 let prefersSlot3 = 1;
@ -4413,7 +4413,7 @@ def F2_dfimm_p : HInst<
 (outs DoubleRegs:$Rdd32),
 (ins u10_0Imm:$Ii),
 "$Rdd32 = dfmake(#$Ii):pos",
-tc_234a11a5, TypeALU64>, Enc_e6c957, Requires<[HasV5]> {
+tc_234a11a5, TypeALU64>, Enc_e6c957 {
 let Inst{20-16} = 0b00000;
 let Inst{31-22} = 0b1101100100;
 let prefersSlot3 = 1;
@ -4422,7 +4422,7 @@ def F2_sfadd : HInst<
 (outs IntRegs:$Rd32),
 (ins IntRegs:$Rs32, IntRegs:$Rt32),
 "$Rd32 = sfadd($Rs32,$Rt32)",
-tc_6792d5ff, TypeM>, Enc_5ab2be, Requires<[HasV5]> {
+tc_6792d5ff, TypeM>, Enc_5ab2be {
 let Inst{7-5} = 0b000;
 let Inst{13-13} = 0b0;
 let Inst{31-21} = 0b11101011000;
@ -4436,7 +4436,7 @@ def F2_sfclass : HInst<
 (outs PredRegs:$Pd4),
 (ins IntRegs:$Rs32, u5_0Imm:$Ii),
 "$Pd4 = sfclass($Rs32,#$Ii)",
-tc_7a830544, TypeS_2op>, Enc_83ee64, Requires<[HasV5]> {
+tc_7a830544, TypeS_2op>, Enc_83ee64 {
 let Inst{7-2} = 0b000000;
 let Inst{13-13} = 0b0;
 let Inst{31-21} = 0b10000101111;
@ -4447,7 +4447,7 @@ def F2_sfcmpeq : HInst<
 (outs PredRegs:$Pd4),
 (ins IntRegs:$Rs32, IntRegs:$Rt32),
 "$Pd4 = sfcmp.eq($Rs32,$Rt32)",
-tc_1e856f58, TypeS_3op>, Enc_c2b48e, Requires<[HasV5]> {
+tc_1e856f58, TypeS_3op>, Enc_c2b48e {
 let Inst{7-2} = 0b011000;
 let Inst{13-13} = 0b0;
 let Inst{31-21} = 0b11000111111;
@ -4459,7 +4459,7 @@ def F2_sfcmpge : HInst<
 (outs PredRegs:$Pd4),
 (ins IntRegs:$Rs32, IntRegs:$Rt32),
 "$Pd4 = sfcmp.ge($Rs32,$Rt32)",
-tc_1e856f58, TypeS_3op>, Enc_c2b48e, Requires<[HasV5]> {
+tc_1e856f58, TypeS_3op>, Enc_c2b48e {
 let Inst{7-2} = 0b000000;
 let Inst{13-13} = 0b0;
 let Inst{31-21} = 0b11000111111;
@ -4471,7 +4471,7 @@ def F2_sfcmpgt : HInst<
 (outs PredRegs:$Pd4),
 (ins IntRegs:$Rs32, IntRegs:$Rt32),
 "$Pd4 = sfcmp.gt($Rs32,$Rt32)",
-tc_1e856f58, TypeS_3op>, Enc_c2b48e, Requires<[HasV5]> {
+tc_1e856f58, TypeS_3op>, Enc_c2b48e {
 let Inst{7-2} = 0b100000;
 let Inst{13-13} = 0b0;
 let Inst{31-21} = 0b11000111111;
@ -4483,7 +4483,7 @@ def F2_sfcmpuo : HInst<
 (outs PredRegs:$Pd4),
 (ins IntRegs:$Rs32, IntRegs:$Rt32),
 "$Pd4 = sfcmp.uo($Rs32,$Rt32)",
-tc_1e856f58, TypeS_3op>, Enc_c2b48e, Requires<[HasV5]> {
+tc_1e856f58, TypeS_3op>, Enc_c2b48e {
 let Inst{7-2} = 0b001000;
 let Inst{13-13} = 0b0;
 let Inst{31-21} = 0b11000111111;
@ -4495,7 +4495,7 @@ def F2_sffixupd : HInst<
 (outs IntRegs:$Rd32),
 (ins IntRegs:$Rs32, IntRegs:$Rt32),
 "$Rd32 = sffixupd($Rs32,$Rt32)",
-tc_6792d5ff, TypeM>, Enc_5ab2be, Requires<[HasV5]> {
+tc_6792d5ff, TypeM>, Enc_5ab2be {
 let Inst{7-5} = 0b001;
 let Inst{13-13} = 0b0;
 let Inst{31-21} = 0b11101011110;
@ -4507,7 +4507,7 @@ def F2_sffixupn : HInst<
 (outs IntRegs:$Rd32),
 (ins IntRegs:$Rs32, IntRegs:$Rt32),
 "$Rd32 = sffixupn($Rs32,$Rt32)",
-tc_6792d5ff, TypeM>, Enc_5ab2be, Requires<[HasV5]> {
+tc_6792d5ff, TypeM>, Enc_5ab2be {
 let Inst{7-5} = 0b000;
 let Inst{13-13} = 0b0;
 let Inst{31-21} = 0b11101011110;
@ -4519,7 +4519,7 @@ def F2_sffixupr : HInst<
 (outs IntRegs:$Rd32),
 (ins IntRegs:$Rs32),
 "$Rd32 = sffixupr($Rs32)",
-tc_f3eaa14b, TypeS_2op>, Enc_5e2823, Requires<[HasV5]> {
+tc_f3eaa14b, TypeS_2op>, Enc_5e2823 {
 let Inst{13-5} = 0b000000000;
 let Inst{31-21} = 0b10001011101;
 let hasNewValue = 1;
@ -4530,7 +4530,7 @@ def F2_sffma : HInst<
 (outs IntRegs:$Rx32),
 (ins IntRegs:$Rx32in, IntRegs:$Rs32, IntRegs:$Rt32),
 "$Rx32 += sfmpy($Rs32,$Rt32)",
-tc_d580173f, TypeM>, Enc_2ae154, Requires<[HasV5]> {
+tc_d580173f, TypeM>, Enc_2ae154 {
 let Inst{7-5} = 0b100;
 let Inst{13-13} = 0b0;
 let Inst{31-21} = 0b11101111000;
@ -4544,7 +4544,7 @@ def F2_sffma_lib : HInst<
 (outs IntRegs:$Rx32),
 (ins IntRegs:$Rx32in, IntRegs:$Rs32, IntRegs:$Rt32),
 "$Rx32 += sfmpy($Rs32,$Rt32):lib",
-tc_d580173f, TypeM>, Enc_2ae154, Requires<[HasV5]> {
+tc_d580173f, TypeM>, Enc_2ae154 {
 let Inst{7-5} = 0b110;
 let Inst{13-13} = 0b0;
 let Inst{31-21} = 0b11101111000;
@ -4558,7 +4558,7 @@ def F2_sffma_sc : HInst<
 (outs IntRegs:$Rx32),
 (ins IntRegs:$Rx32in, IntRegs:$Rs32, IntRegs:$Rt32, PredRegs:$Pu4),
 "$Rx32 += sfmpy($Rs32,$Rt32,$Pu4):scale",
-tc_038a1342, TypeM>, Enc_437f33, Requires<[HasV5]> {
+tc_038a1342, TypeM>, Enc_437f33 {
 let Inst{7-7} = 0b1;
 let Inst{13-13} = 0b0;
 let Inst{31-21} = 0b11101111011;
@ -4572,7 +4572,7 @@ def F2_sffms : HInst<
 (outs IntRegs:$Rx32),
 (ins IntRegs:$Rx32in, IntRegs:$Rs32, IntRegs:$Rt32),
 "$Rx32 -= sfmpy($Rs32,$Rt32)",
-tc_d580173f, TypeM>, Enc_2ae154, Requires<[HasV5]> {
+tc_d580173f, TypeM>, Enc_2ae154 {
 let Inst{7-5} = 0b101;
 let Inst{13-13} = 0b0;
 let Inst{31-21} = 0b11101111000;
@ -4586,7 +4586,7 @@ def F2_sffms_lib : HInst<
 (outs IntRegs:$Rx32),
 (ins IntRegs:$Rx32in, IntRegs:$Rs32, IntRegs:$Rt32),
 "$Rx32 -= sfmpy($Rs32,$Rt32):lib",
-tc_d580173f, TypeM>, Enc_2ae154, Requires<[HasV5]> {
+tc_d580173f, TypeM>, Enc_2ae154 {
 let Inst{7-5} = 0b111;
 let Inst{13-13} = 0b0;
 let Inst{31-21} = 0b11101111000;
@ -4600,7 +4600,7 @@ def F2_sfimm_n : HInst<
 (outs IntRegs:$Rd32),
 (ins u10_0Imm:$Ii),
 "$Rd32 = sfmake(#$Ii):neg",
-tc_234a11a5, TypeALU64>, Enc_6c9440, Requires<[HasV5]> {
+tc_234a11a5, TypeALU64>, Enc_6c9440 {
 let Inst{20-16} = 0b00000;
 let Inst{31-22} = 0b1101011001;
 let hasNewValue = 1;
@ -4611,7 +4611,7 @@ def F2_sfimm_p : HInst<
 (outs IntRegs:$Rd32),
 (ins u10_0Imm:$Ii),
 "$Rd32 = sfmake(#$Ii):pos",
-tc_234a11a5, TypeALU64>, Enc_6c9440, Requires<[HasV5]> {
+tc_234a11a5, TypeALU64>, Enc_6c9440 {
 let Inst{20-16} = 0b00000;
 let Inst{31-22} = 0b1101011000;
 let hasNewValue = 1;
@ -4622,7 +4622,7 @@ def F2_sfinvsqrta : HInst<
 (outs IntRegs:$Rd32, PredRegs:$Pe4),
 (ins IntRegs:$Rs32),
 "$Rd32,$Pe4 = sfinvsqrta($Rs32)",
-tc_4d99bca9, TypeS_2op>, Enc_890909, Requires<[HasV5]> {
+tc_4d99bca9, TypeS_2op>, Enc_890909 {
 let Inst{13-7} = 0b0000000;
 let Inst{31-21} = 0b10001011111;
 let hasNewValue = 1;
@ -4634,7 +4634,7 @@ def F2_sfmax : HInst<
 (outs IntRegs:$Rd32),
 (ins IntRegs:$Rs32, IntRegs:$Rt32),
 "$Rd32 = sfmax($Rs32,$Rt32)",
-tc_976ddc4f, TypeM>, Enc_5ab2be, Requires<[HasV5]> {
+tc_976ddc4f, TypeM>, Enc_5ab2be {
 let Inst{7-5} = 0b000;
 let Inst{13-13} = 0b0;
 let Inst{31-21} = 0b11101011100;
@ -4648,7 +4648,7 @@ def F2_sfmin : HInst<
 (outs IntRegs:$Rd32),
 (ins IntRegs:$Rs32, IntRegs:$Rt32),
 "$Rd32 = sfmin($Rs32,$Rt32)",
-tc_976ddc4f, TypeM>, Enc_5ab2be, Requires<[HasV5]> {
+tc_976ddc4f, TypeM>, Enc_5ab2be {
 let Inst{7-5} = 0b001;
 let Inst{13-13} = 0b0;
 let Inst{31-21} = 0b11101011100;
@ -4662,7 +4662,7 @@ def F2_sfmpy : HInst<
 (outs IntRegs:$Rd32),
 (ins IntRegs:$Rs32, IntRegs:$Rt32),
 "$Rd32 = sfmpy($Rs32,$Rt32)",
-tc_6792d5ff, TypeM>, Enc_5ab2be, Requires<[HasV5]> {
+tc_6792d5ff, TypeM>, Enc_5ab2be {
 let Inst{7-5} = 0b000;
 let Inst{13-13} = 0b0;
 let Inst{31-21} = 0b11101011010;
@ -4676,7 +4676,7 @@ def F2_sfrecipa : HInst<
 (outs IntRegs:$Rd32, PredRegs:$Pe4),
 (ins IntRegs:$Rs32, IntRegs:$Rt32),
 "$Rd32,$Pe4 = sfrecipa($Rs32,$Rt32)",
-tc_9c00ce8d, TypeM>, Enc_a94f3b, Requires<[HasV5]> {
+tc_9c00ce8d, TypeM>, Enc_a94f3b {
 let Inst{7-7} = 0b1;
 let Inst{13-13} = 0b0;
 let Inst{31-21} = 0b11101011111;
@ -4689,7 +4689,7 @@ def F2_sfsub : HInst<
 (outs IntRegs:$Rd32),
 (ins IntRegs:$Rs32, IntRegs:$Rt32),
 "$Rd32 = sfsub($Rs32,$Rt32)",
-tc_6792d5ff, TypeM>, Enc_5ab2be, Requires<[HasV5]> {
+tc_6792d5ff, TypeM>, Enc_5ab2be {
 let Inst{7-5} = 0b001;
 let Inst{13-13} = 0b0;
 let Inst{31-21} = 0b11101011000;
@ -16981,7 +16981,7 @@ def M4_cmpyi_whc : HInst<
 (outs IntRegs:$Rd32),
 (ins DoubleRegs:$Rss32, IntRegs:$Rt32),
 "$Rd32 = cmpyiwh($Rss32,$Rt32*):<<1:rnd:sat",
-tc_8fd5f294, TypeS_3op>, Enc_3d5b28, Requires<[HasV5]> {
+tc_8fd5f294, TypeS_3op>, Enc_3d5b28 {
 let Inst{7-5} = 0b101;
 let Inst{13-13} = 0b0;
 let Inst{31-21} = 0b11000101000;
@ -17007,7 +17007,7 @@ def M4_cmpyr_whc : HInst<
 (outs IntRegs:$Rd32),
 (ins DoubleRegs:$Rss32, IntRegs:$Rt32),
 "$Rd32 = cmpyrwh($Rss32,$Rt32*):<<1:rnd:sat",
-tc_8fd5f294, TypeS_3op>, Enc_3d5b28, Requires<[HasV5]> {
+tc_8fd5f294, TypeS_3op>, Enc_3d5b28 {
 let Inst{7-5} = 0b111;
 let Inst{13-13} = 0b0;
 let Inst{31-21} = 0b11000101000;
@ -17360,7 +17360,7 @@ def M5_vdmacbsu : HInst<
 (outs DoubleRegs:$Rxx32),
 (ins DoubleRegs:$Rxx32in, DoubleRegs:$Rss32, DoubleRegs:$Rtt32),
 "$Rxx32 += vdmpybsu($Rss32,$Rtt32):sat",
-tc_e913dc32, TypeM>, Enc_88c16c, Requires<[HasV5]> {
+tc_e913dc32, TypeM>, Enc_88c16c {
 let Inst{7-5} = 0b001;
 let Inst{13-13} = 0b0;
 let Inst{31-21} = 0b11101010001;
@ -17372,7 +17372,7 @@ def M5_vdmpybsu : HInst<
 (outs DoubleRegs:$Rdd32),
 (ins DoubleRegs:$Rss32, DoubleRegs:$Rtt32),
 "$Rdd32 = vdmpybsu($Rss32,$Rtt32):sat",
-tc_8fd5f294, TypeM>, Enc_a56825, Requires<[HasV5]> {
+tc_8fd5f294, TypeM>, Enc_a56825 {
 let Inst{7-5} = 0b001;
 let Inst{13-13} = 0b0;
 let Inst{31-21} = 0b11101000101;
@ -18207,7 +18207,7 @@ def S2_asr_i_p_rnd : HInst<
 (outs DoubleRegs:$Rdd32),
 (ins DoubleRegs:$Rss32, u6_0Imm:$Ii),
 "$Rdd32 = asr($Rss32,#$Ii):rnd",
-tc_2b6f77c6, TypeS_2op>, Enc_5eac98, Requires<[HasV5]> {
+tc_2b6f77c6, TypeS_2op>, Enc_5eac98 {
 let Inst{7-5} = 0b111;
 let Inst{31-21} = 0b10000000110;
 let prefersSlot3 = 1;
@ -18216,7 +18216,7 @@ def S2_asr_i_p_rnd_goodsyntax : HInst<
 (outs DoubleRegs:$Rdd32),
 (ins DoubleRegs:$Rss32, u6_0Imm:$Ii),
 "$Rdd32 = asrrnd($Rss32,#$Ii)",
-tc_2b6f77c6, TypeS_2op>, Requires<[HasV5]> {
+tc_2b6f77c6, TypeS_2op> {
 let isPseudo = 1;
 }
 def S2_asr_i_r : HInst<
@ -25151,7 +25151,7 @@ def S5_asrhub_rnd_sat : HInst<
 (outs IntRegs:$Rd32),
 (ins DoubleRegs:$Rss32, u4_0Imm:$Ii),
 "$Rd32 = vasrhub($Rss32,#$Ii):raw",
-tc_2b6f77c6, TypeS_2op>, Enc_11a146, Requires<[HasV5]> {
+tc_2b6f77c6, TypeS_2op>, Enc_11a146 {
 let Inst{7-5} = 0b100;
 let Inst{13-12} = 0b00;
 let Inst{31-21} = 0b10001000011;
@ -25164,7 +25164,7 @@ def S5_asrhub_rnd_sat_goodsyntax : HInst<
 (outs IntRegs:$Rd32),
 (ins DoubleRegs:$Rss32, u4_0Imm:$Ii),
 "$Rd32 = vasrhub($Rss32,#$Ii):rnd:sat",
-tc_2b6f77c6, TypeS_2op>, Requires<[HasV5]> {
+tc_2b6f77c6, TypeS_2op> {
 let hasNewValue = 1;
 let opNewValue = 0;
 let isPseudo = 1;
@ -25173,7 +25173,7 @@ def S5_asrhub_sat : HInst<
 (outs IntRegs:$Rd32),
 (ins DoubleRegs:$Rss32, u4_0Imm:$Ii),
 "$Rd32 = vasrhub($Rss32,#$Ii):sat",
-tc_2b6f77c6, TypeS_2op>, Enc_11a146, Requires<[HasV5]> {
+tc_2b6f77c6, TypeS_2op>, Enc_11a146 {
 let Inst{7-5} = 0b101;
 let Inst{13-12} = 0b00;
 let Inst{31-21} = 0b10001000011;
@ -25186,7 +25186,7 @@ def S5_popcountp : HInst<
 (outs IntRegs:$Rd32),
 (ins DoubleRegs:$Rss32),
 "$Rd32 = popcount($Rss32)",
-tc_00afc57e, TypeS_2op>, Enc_90cd8b, Requires<[HasV5]> {
+tc_00afc57e, TypeS_2op>, Enc_90cd8b {
 let Inst{13-5} = 0b000000011;
 let Inst{31-21} = 0b10001000011;
 let hasNewValue = 1;
@ -25197,7 +25197,7 @@ def S5_vasrhrnd : HInst<
 (outs DoubleRegs:$Rdd32),
 (ins DoubleRegs:$Rss32, u4_0Imm:$Ii),
 "$Rdd32 = vasrh($Rss32,#$Ii):raw",
-tc_2b6f77c6, TypeS_2op>, Enc_12b6e9, Requires<[HasV5]> {
+tc_2b6f77c6, TypeS_2op>, Enc_12b6e9 {
 let Inst{7-5} = 0b000;
 let Inst{13-12} = 0b00;
 let Inst{31-21} = 0b10000000001;
@ -25207,7 +25207,7 @@ def S5_vasrhrnd_goodsyntax : HInst<
 (outs DoubleRegs:$Rdd32),
 (ins DoubleRegs:$Rss32, u4_0Imm:$Ii),
 "$Rdd32 = vasrh($Rss32,#$Ii):rnd",
-tc_2b6f77c6, TypeS_2op>, Requires<[HasV5]> {
+tc_2b6f77c6, TypeS_2op> {
 let isPseudo = 1;
 }
 def S6_allocframe_to_raw : HInst<
@ -37007,7 +37007,7 @@ def Y5_l2fetch : HInst<
 (outs),
 (ins IntRegs:$Rs32, DoubleRegs:$Rtt32),
 "l2fetch($Rs32,$Rtt32)",
-tc_daa058fa, TypeST>, Enc_e6abcf, Requires<[HasV5]> {
+tc_daa058fa, TypeST>, Enc_e6abcf {
 let Inst{7-0} = 0b00000000;
 let Inst{13-13} = 0b0;
 let Inst{31-21} = 0b10100110100;
--- a/llvm/lib/Target/Hexagon/HexagonISelLowering.cpp
+++ b/llvm/lib/Target/Hexagon/HexagonISelLowering.cpp
@ -1228,7 +1228,6 @@ HexagonTargetLowering::HexagonTargetLowering(const TargetMachine &TM,
                                             const HexagonSubtarget &ST)
    : TargetLowering(TM), HTM(static_cast<const HexagonTargetMachine&>(TM)),
      Subtarget(ST) {
-  bool IsV4 = !Subtarget.hasV5Ops();
  auto &HRI = *Subtarget.getRegisterInfo();

  setPrefLoopAlignment(4);
@ -1270,10 +1269,8 @@ HexagonTargetLowering::HexagonTargetLowering(const TargetMachine &TM,
  addRegisterClass(MVT::v4i16, &Hexagon::DoubleRegsRegClass);
  addRegisterClass(MVT::v2i32, &Hexagon::DoubleRegsRegClass);

-  if (Subtarget.hasV5Ops()) {
-    addRegisterClass(MVT::f32, &Hexagon::IntRegsRegClass);
-    addRegisterClass(MVT::f64, &Hexagon::DoubleRegsRegClass);
-  }
+  addRegisterClass(MVT::f32, &Hexagon::IntRegsRegClass);
+  addRegisterClass(MVT::f64, &Hexagon::DoubleRegsRegClass);

  //
  // Handling of scalar operations.
@ -1351,8 +1348,7 @@ HexagonTargetLowering::HexagonTargetLowering(const TargetMachine &TM,
  setOperationAction(ISD::CTTZ, MVT::i8,  Promote);
  setOperationAction(ISD::CTTZ, MVT::i16, Promote);

-  // In V5, popcount can count # of 1s in i64 but returns i32.
-  // On V4 it will be expanded (set later).
+  // Popcount can count # of 1s in i64 but returns i32.
  setOperationAction(ISD::CTPOP, MVT::i8,  Promote);
  setOperationAction(ISD::CTPOP, MVT::i16, Promote);
  setOperationAction(ISD::CTPOP, MVT::i32, Promote);
@ -1515,57 +1511,28 @@ HexagonTargetLowering::HexagonTargetLowering(const TargetMachine &TM,
    setOperationAction(ISD::ROTL, MVT::i32, Custom);
    setOperationAction(ISD::ROTL, MVT::i64, Custom);
  }
-  if (Subtarget.hasV5Ops()) {
-    setOperationAction(ISD::FMA,  MVT::f64, Expand);
-    setOperationAction(ISD::FADD, MVT::f64, Expand);
-    setOperationAction(ISD::FSUB, MVT::f64, Expand);
-    setOperationAction(ISD::FMUL, MVT::f64, Expand);

-    setOperationAction(ISD::FMINNUM, MVT::f32, Legal);
-    setOperationAction(ISD::FMAXNUM, MVT::f32, Legal);
+  // V5+.
+  setOperationAction(ISD::FMA,  MVT::f64, Expand);
+  setOperationAction(ISD::FADD, MVT::f64, Expand);
+  setOperationAction(ISD::FSUB, MVT::f64, Expand);
+  setOperationAction(ISD::FMUL, MVT::f64, Expand);

-    setOperationAction(ISD::FP_TO_UINT, MVT::i1,  Promote);
-    setOperationAction(ISD::FP_TO_UINT, MVT::i8,  Promote);
-    setOperationAction(ISD::FP_TO_UINT, MVT::i16, Promote);
-    setOperationAction(ISD::FP_TO_SINT, MVT::i1,  Promote);
-    setOperationAction(ISD::FP_TO_SINT, MVT::i8,  Promote);
-    setOperationAction(ISD::FP_TO_SINT, MVT::i16, Promote);
-    setOperationAction(ISD::UINT_TO_FP, MVT::i1,  Promote);
-    setOperationAction(ISD::UINT_TO_FP, MVT::i8,  Promote);
-    setOperationAction(ISD::UINT_TO_FP, MVT::i16, Promote);
-    setOperationAction(ISD::SINT_TO_FP, MVT::i1,  Promote);
-    setOperationAction(ISD::SINT_TO_FP, MVT::i8,  Promote);
-    setOperationAction(ISD::SINT_TO_FP, MVT::i16, Promote);
-  } else { // V4
-    setOperationAction(ISD::SINT_TO_FP, MVT::i32, Expand);
-    setOperationAction(ISD::SINT_TO_FP, MVT::i64, Expand);
-    setOperationAction(ISD::UINT_TO_FP, MVT::i32, Expand);
-    setOperationAction(ISD::UINT_TO_FP, MVT::i64, Expand);
-    setOperationAction(ISD::FP_TO_SINT, MVT::f64, Expand);
-    setOperationAction(ISD::FP_TO_SINT, MVT::f32, Expand);
-    setOperationAction(ISD::FP_EXTEND,  MVT::f32, Expand);
-    setOperationAction(ISD::FP_ROUND,   MVT::f64, Expand);
-    setCondCodeAction(ISD::SETUNE, MVT::f64, Expand);
+  setOperationAction(ISD::FMINNUM, MVT::f32, Legal);
+  setOperationAction(ISD::FMAXNUM, MVT::f32, Legal);

-    setOperationAction(ISD::CTPOP, MVT::i8,  Expand);
-    setOperationAction(ISD::CTPOP, MVT::i16, Expand);
-    setOperationAction(ISD::CTPOP, MVT::i32, Expand);
-    setOperationAction(ISD::CTPOP, MVT::i64, Expand);
-
-    // Expand these operations for both f32 and f64:
-    for (unsigned FPExpOpV4 :
-         {ISD::FADD, ISD::FSUB, ISD::FMUL, ISD::FABS, ISD::FNEG, ISD::FMA}) {
-      setOperationAction(FPExpOpV4, MVT::f32, Expand);
-      setOperationAction(FPExpOpV4, MVT::f64, Expand);
-    }
-
-    for (ISD::CondCode FPExpCCV4 :
-         {ISD::SETOEQ, ISD::SETOGT, ISD::SETOLT, ISD::SETOGE, ISD::SETOLE,
-          ISD::SETUO,  ISD::SETO}) {
-      setCondCodeAction(FPExpCCV4, MVT::f32, Expand);
-      setCondCodeAction(FPExpCCV4, MVT::f64, Expand);
-    }
-  }
+  setOperationAction(ISD::FP_TO_UINT, MVT::i1,  Promote);
+  setOperationAction(ISD::FP_TO_UINT, MVT::i8,  Promote);
+  setOperationAction(ISD::FP_TO_UINT, MVT::i16, Promote);
+  setOperationAction(ISD::FP_TO_SINT, MVT::i1,  Promote);
+  setOperationAction(ISD::FP_TO_SINT, MVT::i8,  Promote);
+  setOperationAction(ISD::FP_TO_SINT, MVT::i16, Promote);
+  setOperationAction(ISD::UINT_TO_FP, MVT::i1,  Promote);
+  setOperationAction(ISD::UINT_TO_FP, MVT::i8,  Promote);
+  setOperationAction(ISD::UINT_TO_FP, MVT::i16, Promote);
+  setOperationAction(ISD::SINT_TO_FP, MVT::i1,  Promote);
+  setOperationAction(ISD::SINT_TO_FP, MVT::i8,  Promote);
+  setOperationAction(ISD::SINT_TO_FP, MVT::i16, Promote);

  // Handling of indexed loads/stores: default is "expand".
  //
@ -1601,42 +1568,18 @@ HexagonTargetLowering::HexagonTargetLowering(const TargetMachine &TM,
  setLibcallName(RTLIB::FPTOSINT_F32_I128, "__hexagon_fixsfti");
  setLibcallName(RTLIB::FPTOSINT_F64_I128, "__hexagon_fixdfti");

-  if (IsV4) {
-    // Handle single-precision floating point operations on V4.
-    if (FastMath) {
-      setLibcallName(RTLIB::ADD_F32, "__hexagon_fast_addsf3");
-      setLibcallName(RTLIB::SUB_F32, "__hexagon_fast_subsf3");
-      setLibcallName(RTLIB::MUL_F32, "__hexagon_fast_mulsf3");
-      setLibcallName(RTLIB::OGT_F32, "__hexagon_fast_gtsf2");
-      setLibcallName(RTLIB::OLT_F32, "__hexagon_fast_ltsf2");
-      // Double-precision compares.
-      setLibcallName(RTLIB::OGT_F64, "__hexagon_fast_gtdf2");
-      setLibcallName(RTLIB::OLT_F64, "__hexagon_fast_ltdf2");
-    } else {
-      setLibcallName(RTLIB::ADD_F32, "__hexagon_addsf3");
-      setLibcallName(RTLIB::SUB_F32, "__hexagon_subsf3");
-      setLibcallName(RTLIB::MUL_F32, "__hexagon_mulsf3");
-      setLibcallName(RTLIB::OGT_F32, "__hexagon_gtsf2");
-      setLibcallName(RTLIB::OLT_F32, "__hexagon_ltsf2");
-      // Double-precision compares.
-      setLibcallName(RTLIB::OGT_F64, "__hexagon_gtdf2");
-      setLibcallName(RTLIB::OLT_F64, "__hexagon_ltdf2");
-    }
-  }
-
  // This is the only fast library function for sqrtd.
  if (FastMath)
    setLibcallName(RTLIB::SQRT_F64, "__hexagon_fast2_sqrtdf2");

  // Prefix is: nothing  for "slow-math",
-  //            "fast2_" for V4 fast-math and V5+ fast-math double-precision
+  //            "fast2_" for V5+ fast-math double-precision
  // (actually, keep fast-math and fast-math2 separate for now)
  if (FastMath) {
    setLibcallName(RTLIB::ADD_F64, "__hexagon_fast_adddf3");
    setLibcallName(RTLIB::SUB_F64, "__hexagon_fast_subdf3");
    setLibcallName(RTLIB::MUL_F64, "__hexagon_fast_muldf3");
    setLibcallName(RTLIB::DIV_F64, "__hexagon_fast_divdf3");
-    // Calling __hexagon_fast2_divsf3 with fast-math on V5 (ok).
    setLibcallName(RTLIB::DIV_F32, "__hexagon_fast_divsf3");
  } else {
    setLibcallName(RTLIB::ADD_F64, "__hexagon_adddf3");
@ -1646,44 +1589,10 @@ HexagonTargetLowering::HexagonTargetLowering(const TargetMachine &TM,
    setLibcallName(RTLIB::DIV_F32, "__hexagon_divsf3");
  }

-  if (Subtarget.hasV5Ops()) {
-    if (FastMath)
-      setLibcallName(RTLIB::SQRT_F32, "__hexagon_fast2_sqrtf");
-    else
-      setLibcallName(RTLIB::SQRT_F32, "__hexagon_sqrtf");
-  } else {
-    // V4
-    setLibcallName(RTLIB::SINTTOFP_I32_F32, "__hexagon_floatsisf");
-    setLibcallName(RTLIB::SINTTOFP_I32_F64, "__hexagon_floatsidf");
-    setLibcallName(RTLIB::SINTTOFP_I64_F32, "__hexagon_floatdisf");
-    setLibcallName(RTLIB::SINTTOFP_I64_F64, "__hexagon_floatdidf");
-    setLibcallName(RTLIB::UINTTOFP_I32_F32, "__hexagon_floatunsisf");
-    setLibcallName(RTLIB::UINTTOFP_I32_F64, "__hexagon_floatunsidf");
-    setLibcallName(RTLIB::UINTTOFP_I64_F32, "__hexagon_floatundisf");
-    setLibcallName(RTLIB::UINTTOFP_I64_F64, "__hexagon_floatundidf");
-    setLibcallName(RTLIB::FPTOUINT_F32_I32, "__hexagon_fixunssfsi");
-    setLibcallName(RTLIB::FPTOUINT_F32_I64, "__hexagon_fixunssfdi");
-    setLibcallName(RTLIB::FPTOUINT_F64_I32, "__hexagon_fixunsdfsi");
-    setLibcallName(RTLIB::FPTOUINT_F64_I64, "__hexagon_fixunsdfdi");
-    setLibcallName(RTLIB::FPTOSINT_F32_I32, "__hexagon_fixsfsi");
-    setLibcallName(RTLIB::FPTOSINT_F32_I64, "__hexagon_fixsfdi");
-    setLibcallName(RTLIB::FPTOSINT_F64_I32, "__hexagon_fixdfsi");
-    setLibcallName(RTLIB::FPTOSINT_F64_I64, "__hexagon_fixdfdi");
-    setLibcallName(RTLIB::FPEXT_F32_F64,    "__hexagon_extendsfdf2");
-    setLibcallName(RTLIB::FPROUND_F64_F32,  "__hexagon_truncdfsf2");
-    setLibcallName(RTLIB::OEQ_F32, "__hexagon_eqsf2");
-    setLibcallName(RTLIB::OEQ_F64, "__hexagon_eqdf2");
-    setLibcallName(RTLIB::OGE_F32, "__hexagon_gesf2");
-    setLibcallName(RTLIB::OGE_F64, "__hexagon_gedf2");
-    setLibcallName(RTLIB::OLE_F32, "__hexagon_lesf2");
-    setLibcallName(RTLIB::OLE_F64, "__hexagon_ledf2");
-    setLibcallName(RTLIB::UNE_F32, "__hexagon_nesf2");
-    setLibcallName(RTLIB::UNE_F64, "__hexagon_nedf2");
-    setLibcallName(RTLIB::UO_F32,  "__hexagon_unordsf2");
-    setLibcallName(RTLIB::UO_F64,  "__hexagon_unorddf2");
-    setLibcallName(RTLIB::O_F32,   "__hexagon_unordsf2");
-    setLibcallName(RTLIB::O_F64,   "__hexagon_unorddf2");
-  }
+  if (FastMath)
+    setLibcallName(RTLIB::SQRT_F32, "__hexagon_fast2_sqrtf");
+  else
+    setLibcallName(RTLIB::SQRT_F32, "__hexagon_sqrtf");

  // These cause problems when the shift amount is non-constant.
  setLibcallName(RTLIB::SHL_I128, nullptr);
@ -3007,7 +2916,7 @@ HexagonTargetLowering::getRegForInlineAsmConstraint(
 /// specified FP immediate natively. If false, the legalizer will
 /// materialize the FP immediate as a load from a constant pool.
 bool HexagonTargetLowering::isFPImmLegal(const APFloat &Imm, EVT VT) const {
-  return Subtarget.hasV5Ops();
+  return true;
 }

 /// isLegalAddressingMode - Return true if the addressing mode represented by
--- a/llvm/lib/Target/Hexagon/HexagonInstrFormats.td
+++ b/llvm/lib/Target/Hexagon/HexagonInstrFormats.td
@ -194,8 +194,6 @@ class HInst<dag outs, dag ins, string asmstr, InstrItinClass itin, IType type> :
 //                         Instruction Classes Definitions +
 //===----------------------------------------------------------------------===//

-// LD Instruction Class in V2/V3/V4.
-// Definition of the instruction class NOT CHANGED.
 let mayLoad = 1 in
 class LDInst<dag outs, dag ins, string asmstr, list<dag> pattern = [],
             string cstr = "", InstrItinClass itin = LD_tc_ld_SLOT01>
@ -205,9 +203,6 @@ class CONSTLDInst<dag outs, dag ins, string asmstr, list<dag> pattern = [],
             string cstr = "", InstrItinClass itin = LD_tc_ld_SLOT01>
  : InstHexagon<outs, ins, asmstr, pattern, cstr, itin, TypeLD>, OpcodeHexagon;

-// ST Instruction Class in V2/V3 can take SLOT0 only.
-// ST Instruction Class in V4    can take SLOT0 & SLOT1.
-// Definition of the instruction class CHANGED from V2/V3 to V4.
 let mayStore = 1 in
 class STInst<dag outs, dag ins, string asmstr, list<dag> pattern = [],
             string cstr = "", InstrItinClass itin = ST_tc_st_SLOT01>
@ -235,15 +230,6 @@ class PseudoM<dag outs, dag ins, string asmstr, list<dag> pattern = [],
 //                         Instruction Classes Definitions -
 //===----------------------------------------------------------------------===//

-//===----------------------------------------------------------------------===//
-// V4 Instruction Format Definitions +
-//===----------------------------------------------------------------------===//
-
-include "HexagonInstrFormatsV4.td"
-
-//===----------------------------------------------------------------------===//
-// V60+ Instruction Format Definitions +
-//===----------------------------------------------------------------------===//
-
+include "HexagonInstrFormatsV5.td"
 include "HexagonInstrFormatsV60.td"
 include "HexagonInstrFormatsV65.td"
--- a/llvm/lib/Target/Hexagon/HexagonInstrFormatsV5.td
+++ b/llvm/lib/Target/Hexagon/HexagonInstrFormatsV5.td
@ -1,4 +1,4 @@
-//==- HexagonInstrFormatsV4.td - Hexagon Instruction Formats --*- tablegen -==//
+//==- HexagonInstrFormatsV5.td - Hexagon Instruction Formats --*- tablegen -==//
 //
 //                     The LLVM Compiler Infrastructure
 //
@ -7,7 +7,7 @@
 //
 //===----------------------------------------------------------------------===//
 //
-// This file describes the Hexagon V4 instruction classes in TableGen format.
+// This file describes the Hexagon V5 instruction classes in TableGen format.
 //
 //===----------------------------------------------------------------------===//

--- a/llvm/lib/Target/Hexagon/HexagonIntrinsics.td
+++ b/llvm/lib/Target/Hexagon/HexagonIntrinsics.td
@ -1398,7 +1398,5 @@ def: T_R_pat<Y2_dczeroa,     int_hexagon_Y2_dczeroa>;
 def: T_RR_pat<Y4_l2fetch,    int_hexagon_Y4_l2fetch>;
 def: T_RP_pat<Y5_l2fetch,    int_hexagon_Y5_l2fetch>;

-include "HexagonIntrinsicsV3.td"
-include "HexagonIntrinsicsV4.td"
 include "HexagonIntrinsicsV5.td"
 include "HexagonIntrinsicsV60.td"
--- a/llvm/lib/Target/Hexagon/HexagonIntrinsicsV3.td
+++ b/llvm/lib/Target/Hexagon/HexagonIntrinsicsV3.td
@ -1,27 +0,0 @@
-//=- HexagonIntrinsicsV3.td - Target Description for Hexagon -*- tablegen -*-=//
-//
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
-//
-//===----------------------------------------------------------------------===//
-//
-// This file describes the Hexagon V3 Compiler Intrinsics in TableGen format.
-//
-//===----------------------------------------------------------------------===//
-
-// Vector reduce complex multiply real or imaginary
-def : T_PR_pat <M2_vrcmpys_s1,     int_hexagon_M2_vrcmpys_s1>;
-def : T_PPR_pat<M2_vrcmpys_acc_s1, int_hexagon_M2_vrcmpys_acc_s1>;
-def : T_PR_pat <M2_vrcmpys_s1rp,   int_hexagon_M2_vrcmpys_s1rp>;
-
-// Vector reduce add unsigned halfwords
-def : T_PP_pat<M2_vradduh, int_hexagon_M2_vradduh>;
-
-def: T_RP_pat<A2_addsp,   int_hexagon_A2_addsp>;
-def: T_PP_pat<A2_addpsat, int_hexagon_A2_addpsat>;
-def: T_PP_pat<A2_minp,    int_hexagon_A2_minp>;
-def: T_PP_pat<A2_minup,   int_hexagon_A2_minup>;
-def: T_PP_pat<A2_maxp,    int_hexagon_A2_maxp>;
-def: T_PP_pat<A2_maxup,   int_hexagon_A2_maxup>;
--- a/llvm/lib/Target/Hexagon/HexagonIntrinsicsV4.td
+++ b/llvm/lib/Target/Hexagon/HexagonIntrinsicsV4.td
@ -1,305 +0,0 @@
-//===- HexagonIntrinsicsV4.td - V4 Instruction intrinsics --*- tablegen -*-===//
-//
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
-//
-//===----------------------------------------------------------------------===//
-// This is populated based on the following specs:
-// Hexagon V4 Architecture Extensions
-// Application-Level Specification
-// 80-V9418-12 Rev. A
-// June 15, 2010
-
-// Vector reduce multiply word by signed half (32x16)
-//Rdd=vrmpyweh(Rss,Rtt)[:<<1]
-def : T_PP_pat <M4_vrmpyeh_s0, int_hexagon_M4_vrmpyeh_s0>;
-def : T_PP_pat <M4_vrmpyeh_s1, int_hexagon_M4_vrmpyeh_s1>;
-
-//Rdd=vrmpywoh(Rss,Rtt)[:<<1]
-def : T_PP_pat <M4_vrmpyoh_s0, int_hexagon_M4_vrmpyoh_s0>;
-def : T_PP_pat <M4_vrmpyoh_s1, int_hexagon_M4_vrmpyoh_s1>;
-
-//Rdd+=vrmpyweh(Rss,Rtt)[:<<1]
-def : T_PPP_pat <M4_vrmpyeh_acc_s0, int_hexagon_M4_vrmpyeh_acc_s0>;
-def : T_PPP_pat <M4_vrmpyeh_acc_s1, int_hexagon_M4_vrmpyeh_acc_s1>;
-
-//Rdd=vrmpywoh(Rss,Rtt)[:<<1]
-def : T_PPP_pat <M4_vrmpyoh_acc_s0, int_hexagon_M4_vrmpyoh_acc_s0>;
-def : T_PPP_pat <M4_vrmpyoh_acc_s1, int_hexagon_M4_vrmpyoh_acc_s1>;
-
-// Vector multiply halfwords, signed by unsigned
-// Rdd=vmpyhsu(Rs,Rt)[:<<1]:sat
-def : T_RR_pat <M2_vmpy2su_s0, int_hexagon_M2_vmpy2su_s0>;
-def : T_RR_pat <M2_vmpy2su_s1, int_hexagon_M2_vmpy2su_s1>;
-
-// Rxx+=vmpyhsu(Rs,Rt)[:<<1]:sat
-def : T_PRR_pat <M2_vmac2su_s0, int_hexagon_M2_vmac2su_s0>;
-def : T_PRR_pat <M2_vmac2su_s1, int_hexagon_M2_vmac2su_s1>;
-
-// Vector polynomial multiply halfwords
-// Rdd=vpmpyh(Rs,Rt)
-def : T_RR_pat <M4_vpmpyh, int_hexagon_M4_vpmpyh>;
-// Rxx[^]=vpmpyh(Rs,Rt)
-def : T_PRR_pat <M4_vpmpyh_acc, int_hexagon_M4_vpmpyh_acc>;
-
-// Polynomial multiply words
-// Rdd=pmpyw(Rs,Rt)
-def : T_RR_pat <M4_pmpyw, int_hexagon_M4_pmpyw>;
-// Rxx^=pmpyw(Rs,Rt)
-def : T_PRR_pat <M4_pmpyw_acc, int_hexagon_M4_pmpyw_acc>;
-
-//Rxx^=asr(Rss,Rt)
-def : T_PPR_pat <S2_asr_r_p_xor, int_hexagon_S2_asr_r_p_xor>;
-//Rxx^=asl(Rss,Rt)
-def : T_PPR_pat <S2_asl_r_p_xor, int_hexagon_S2_asl_r_p_xor>;
-//Rxx^=lsr(Rss,Rt)
-def : T_PPR_pat <S2_lsr_r_p_xor, int_hexagon_S2_lsr_r_p_xor>;
-//Rxx^=lsl(Rss,Rt)
-def : T_PPR_pat <S2_lsl_r_p_xor, int_hexagon_S2_lsl_r_p_xor>;
-
-// Multiply and use upper result
-def : T_RR_pat <M2_mpysu_up, int_hexagon_M2_mpysu_up>;
-def : T_RR_pat <M2_mpy_up_s1, int_hexagon_M2_mpy_up_s1>;
-def : T_RR_pat <M2_hmmpyh_s1, int_hexagon_M2_hmmpyh_s1>;
-def : T_RR_pat <M2_hmmpyl_s1, int_hexagon_M2_hmmpyl_s1>;
-def : T_RR_pat <M2_mpy_up_s1_sat, int_hexagon_M2_mpy_up_s1_sat>;
-
-def : T_PP_pat <A2_vaddub, int_hexagon_A2_vaddb_map>;
-def : T_PP_pat <A2_vsubub, int_hexagon_A2_vsubb_map>;
-
-// Vector reduce add unsigned halfwords
-def : T_PP_pat <M2_vraddh, int_hexagon_M2_vraddh>;
-
-def: T_P_pat<S2_brevp, int_hexagon_S2_brevp>;
-def: T_P_pat<S2_ct0p,  int_hexagon_S2_ct0p>;
-def: T_P_pat<S2_ct1p,  int_hexagon_S2_ct1p>;
-
-def: T_Q_RR_pat<C4_nbitsset,  int_hexagon_C4_nbitsset>;
-def: T_Q_RR_pat<C4_nbitsclr,  int_hexagon_C4_nbitsclr>;
-def: T_Q_RI_pat<C4_nbitsclri, int_hexagon_C4_nbitsclri>;
-
-def : T_Q_PI_pat<A4_vcmpbeqi,     int_hexagon_A4_vcmpbeqi>;
-def : T_Q_PI_pat<A4_vcmpbgti,     int_hexagon_A4_vcmpbgti>;
-def : T_Q_PI_pat<A4_vcmpbgtui,    int_hexagon_A4_vcmpbgtui>;
-def : T_Q_PI_pat<A4_vcmpheqi,     int_hexagon_A4_vcmpheqi>;
-def : T_Q_PI_pat<A4_vcmphgti,     int_hexagon_A4_vcmphgti>;
-def : T_Q_PI_pat<A4_vcmphgtui,    int_hexagon_A4_vcmphgtui>;
-def : T_Q_PI_pat<A4_vcmpweqi,     int_hexagon_A4_vcmpweqi>;
-def : T_Q_PI_pat<A4_vcmpwgti,     int_hexagon_A4_vcmpwgti>;
-def : T_Q_PI_pat<A4_vcmpwgtui,    int_hexagon_A4_vcmpwgtui>;
-def : T_Q_PP_pat<A4_vcmpbeq_any,  int_hexagon_A4_vcmpbeq_any>;
-
-def : T_Q_RR_pat<A4_cmpbeq,   int_hexagon_A4_cmpbeq>;
-def : T_Q_RR_pat<A4_cmpbgt,   int_hexagon_A4_cmpbgt>;
-def : T_Q_RR_pat<A4_cmpbgtu,  int_hexagon_A4_cmpbgtu>;
-def : T_Q_RR_pat<A4_cmpheq,   int_hexagon_A4_cmpheq>;
-def : T_Q_RR_pat<A4_cmphgt,   int_hexagon_A4_cmphgt>;
-def : T_Q_RR_pat<A4_cmphgtu,  int_hexagon_A4_cmphgtu>;
-
-def : T_Q_RI_pat<A4_cmpbeqi,  int_hexagon_A4_cmpbeqi>;
-def : T_Q_RI_pat<A4_cmpbgti,  int_hexagon_A4_cmpbgti>;
-def : T_Q_RI_pat<A4_cmpbgtui, int_hexagon_A4_cmpbgtui>;
-
-def : T_Q_RI_pat<A4_cmpheqi,  int_hexagon_A4_cmpheqi>;
-def : T_Q_RI_pat<A4_cmphgti,  int_hexagon_A4_cmphgti>;
-def : T_Q_RI_pat<A4_cmphgtui, int_hexagon_A4_cmphgtui>;
-
-def : T_Q_RP_pat<A4_boundscheck, int_hexagon_A4_boundscheck>;
-def : T_Q_PR_pat<A4_tlbmatch,    int_hexagon_A4_tlbmatch>;
-
-def : T_RRR_pat <M4_mpyrr_addr,    int_hexagon_M4_mpyrr_addr>;
-def : T_IRR_pat <M4_mpyrr_addi,    int_hexagon_M4_mpyrr_addi>;
-def : T_IRI_pat <M4_mpyri_addi,    int_hexagon_M4_mpyri_addi>;
-def : T_RIR_pat <M4_mpyri_addr_u2, int_hexagon_M4_mpyri_addr_u2>;
-def : T_RRI_pat <M4_mpyri_addr,    int_hexagon_M4_mpyri_addr>;
-def : T_RRR_pat <M4_mac_up_s1_sat, int_hexagon_M4_mac_up_s1_sat>;
-def : T_RRR_pat <M4_nac_up_s1_sat, int_hexagon_M4_nac_up_s1_sat>;
-
-// Complex multiply 32x16
-def : T_PR_pat <M4_cmpyi_wh, int_hexagon_M4_cmpyi_wh>;
-def : T_PR_pat <M4_cmpyr_wh, int_hexagon_M4_cmpyr_wh>;
-
-def : T_PR_pat <M4_cmpyi_whc, int_hexagon_M4_cmpyi_whc>;
-def : T_PR_pat <M4_cmpyr_whc, int_hexagon_M4_cmpyr_whc>;
-
-def : T_PP_pat<A4_andnp, int_hexagon_A4_andnp>;
-def : T_PP_pat<A4_ornp,  int_hexagon_A4_ornp>;
-
-// Complex add/sub halfwords/words
-def : T_PP_pat <S4_vxaddsubw, int_hexagon_S4_vxaddsubw>;
-def : T_PP_pat <S4_vxsubaddw, int_hexagon_S4_vxsubaddw>;
-def : T_PP_pat <S4_vxaddsubh, int_hexagon_S4_vxaddsubh>;
-def : T_PP_pat <S4_vxsubaddh, int_hexagon_S4_vxsubaddh>;
-
-def : T_PP_pat <S4_vxaddsubhr, int_hexagon_S4_vxaddsubhr>;
-def : T_PP_pat <S4_vxsubaddhr, int_hexagon_S4_vxsubaddhr>;
-
-// Extract bitfield
-def : T_PP_pat  <S4_extractp_rp, int_hexagon_S4_extractp_rp>;
-def : T_RP_pat  <S4_extract_rp, int_hexagon_S4_extract_rp>;
-def : T_PII_pat <S4_extractp, int_hexagon_S4_extractp>;
-def : T_RII_pat <S4_extract, int_hexagon_S4_extract>;
-
-// Vector conditional negate
-// Rdd=vcnegh(Rss,Rt)
-def : T_PR_pat <S2_vcnegh, int_hexagon_S2_vcnegh>;
-
-// Shift an immediate left by register amount
-def : T_IR_pat<S4_lsli, int_hexagon_S4_lsli>;
-
-// Vector reduce maximum halfwords
-def : T_PPR_pat <A4_vrmaxh, int_hexagon_A4_vrmaxh>;
-def : T_PPR_pat <A4_vrmaxuh, int_hexagon_A4_vrmaxuh>;
-
-// Vector reduce maximum words
-def : T_PPR_pat <A4_vrmaxw, int_hexagon_A4_vrmaxw>;
-def : T_PPR_pat <A4_vrmaxuw, int_hexagon_A4_vrmaxuw>;
-
-// Vector reduce minimum halfwords
-def : T_PPR_pat <A4_vrminh, int_hexagon_A4_vrminh>;
-def : T_PPR_pat <A4_vrminuh, int_hexagon_A4_vrminuh>;
-
-// Vector reduce minimum words
-def : T_PPR_pat <A4_vrminw, int_hexagon_A4_vrminw>;
-def : T_PPR_pat <A4_vrminuw, int_hexagon_A4_vrminuw>;
-
-// Rotate and reduce bytes
-def : Pat <(int_hexagon_S4_vrcrotate DoubleRegs:$src1, IntRegs:$src2,
-                                     u2_0ImmPred:$src3),
-           (S4_vrcrotate DoubleRegs:$src1, IntRegs:$src2, u2_0ImmPred:$src3)>;
-
-// Rotate and reduce bytes with accumulation
-// Rxx+=vrcrotate(Rss,Rt,#u2)
-def : Pat <(int_hexagon_S4_vrcrotate_acc DoubleRegs:$src1, DoubleRegs:$src2,
-                                         IntRegs:$src3, u2_0ImmPred:$src4),
-           (S4_vrcrotate_acc DoubleRegs:$src1, DoubleRegs:$src2,
-                             IntRegs:$src3, u2_0ImmPred:$src4)>;
-
-// Vector conditional negate
-def : T_PPR_pat<S2_vrcnegh, int_hexagon_S2_vrcnegh>;
-
-// Logical xor with xor accumulation
-def : T_PPP_pat<M4_xor_xacc, int_hexagon_M4_xor_xacc>;
-
-// ALU64 - Vector min/max byte
-def : T_PP_pat <A2_vminb, int_hexagon_A2_vminb>;
-def : T_PP_pat <A2_vmaxb, int_hexagon_A2_vmaxb>;
-
-// Shift and add/sub/and/or
-def : T_IRI_pat <S4_andi_asl_ri, int_hexagon_S4_andi_asl_ri>;
-def : T_IRI_pat <S4_ori_asl_ri,  int_hexagon_S4_ori_asl_ri>;
-def : T_IRI_pat <S4_addi_asl_ri, int_hexagon_S4_addi_asl_ri>;
-def : T_IRI_pat <S4_subi_asl_ri, int_hexagon_S4_subi_asl_ri>;
-def : T_IRI_pat <S4_andi_lsr_ri, int_hexagon_S4_andi_lsr_ri>;
-def : T_IRI_pat <S4_ori_lsr_ri,  int_hexagon_S4_ori_lsr_ri>;
-def : T_IRI_pat <S4_addi_lsr_ri, int_hexagon_S4_addi_lsr_ri>;
-def : T_IRI_pat <S4_subi_lsr_ri, int_hexagon_S4_subi_lsr_ri>;
-
-// Split bitfield
-def : T_RI_pat <A4_bitspliti, int_hexagon_A4_bitspliti>;
-def : T_RR_pat <A4_bitsplit,  int_hexagon_A4_bitsplit>;
-
-def: T_RR_pat<S4_parity,      int_hexagon_S4_parity>;
-
-def: T_Q_RI_pat<S4_ntstbit_i, int_hexagon_S4_ntstbit_i>;
-def: T_Q_RR_pat<S4_ntstbit_r, int_hexagon_S4_ntstbit_r>;
-
-def: T_RI_pat<S4_clbaddi,     int_hexagon_S4_clbaddi>;
-def: T_PI_pat<S4_clbpaddi,    int_hexagon_S4_clbpaddi>;
-def: T_P_pat <S4_clbpnorm,    int_hexagon_S4_clbpnorm>;
-
-//*******************************************************************
-//            ALU32/ALU
-//*******************************************************************
-
-// ALU32 / ALU / Logical Operations.
-def: T_RR_pat<A4_andn, int_hexagon_A4_andn>;
-def: T_RR_pat<A4_orn,  int_hexagon_A4_orn>;
-
-//*******************************************************************
-//            ALU32/PERM
-//*******************************************************************
-
-// Combine Words Into Doublewords.
-def: T_RI_pat<A4_combineri, int_hexagon_A4_combineri, s32_0ImmPred>;
-def: T_IR_pat<A4_combineir, int_hexagon_A4_combineir, s32_0ImmPred>;
-
-//*******************************************************************
-//           ALU32/PRED
-//*******************************************************************
-
-// Compare
-def : T_Q_RI_pat<C4_cmpneqi, int_hexagon_C4_cmpneqi, s32_0ImmPred>;
-def : T_Q_RI_pat<C4_cmpltei, int_hexagon_C4_cmpltei, s32_0ImmPred>;
-def : T_Q_RI_pat<C4_cmplteui, int_hexagon_C4_cmplteui, u32_0ImmPred>;
-
-// Compare To General Register.
-def: T_Q_RR_pat<C4_cmpneq,  int_hexagon_C4_cmpneq>;
-def: T_Q_RR_pat<C4_cmplte,  int_hexagon_C4_cmplte>;
-def: T_Q_RR_pat<C4_cmplteu, int_hexagon_C4_cmplteu>;
-
-def: T_RR_pat<A4_rcmpeq,  int_hexagon_A4_rcmpeq>;
-def: T_RR_pat<A4_rcmpneq, int_hexagon_A4_rcmpneq>;
-
-def: T_RI_pat<A4_rcmpeqi,  int_hexagon_A4_rcmpeqi>;
-def: T_RI_pat<A4_rcmpneqi, int_hexagon_A4_rcmpneqi>;
-
-//*******************************************************************
-//           CR
-//*******************************************************************
-
-// CR / Logical Operations On Predicates.
-def: T_Q_QQQ_pat<C4_and_and,  int_hexagon_C4_and_and>;
-def: T_Q_QQQ_pat<C4_and_andn, int_hexagon_C4_and_andn>;
-def: T_Q_QQQ_pat<C4_and_or,   int_hexagon_C4_and_or>;
-def: T_Q_QQQ_pat<C4_and_orn,  int_hexagon_C4_and_orn>;
-def: T_Q_QQQ_pat<C4_or_and,   int_hexagon_C4_or_and>;
-def: T_Q_QQQ_pat<C4_or_andn,  int_hexagon_C4_or_andn>;
-def: T_Q_QQQ_pat<C4_or_or,    int_hexagon_C4_or_or>;
-def: T_Q_QQQ_pat<C4_or_orn,   int_hexagon_C4_or_orn>;
-
-//*******************************************************************
-//           XTYPE/ALU
-//*******************************************************************
-
-// Add And Accumulate.
-
-def : T_RRI_pat <S4_addaddi, int_hexagon_S4_addaddi>;
-def : T_RIR_pat <S4_subaddi, int_hexagon_S4_subaddi>;
-
-
-// XTYPE / ALU / Logical-logical Words.
-def : T_RRR_pat <M4_or_xor,   int_hexagon_M4_or_xor>;
-def : T_RRR_pat <M4_and_xor,  int_hexagon_M4_and_xor>;
-def : T_RRR_pat <M4_or_and,   int_hexagon_M4_or_and>;
-def : T_RRR_pat <M4_and_and,  int_hexagon_M4_and_and>;
-def : T_RRR_pat <M4_xor_and,  int_hexagon_M4_xor_and>;
-def : T_RRR_pat <M4_or_or,    int_hexagon_M4_or_or>;
-def : T_RRR_pat <M4_and_or,   int_hexagon_M4_and_or>;
-def : T_RRR_pat <M4_xor_or,   int_hexagon_M4_xor_or>;
-def : T_RRR_pat <M4_or_andn,  int_hexagon_M4_or_andn>;
-def : T_RRR_pat <M4_and_andn, int_hexagon_M4_and_andn>;
-def : T_RRR_pat <M4_xor_andn, int_hexagon_M4_xor_andn>;
-
-def : T_RRI_pat <S4_or_andi, int_hexagon_S4_or_andi>;
-def : T_RRI_pat <S4_or_andix,  int_hexagon_S4_or_andix>;
-def : T_RRI_pat <S4_or_ori, int_hexagon_S4_or_ori>;
-
-// Modulo wrap.
-def : T_RR_pat <A4_modwrapu, int_hexagon_A4_modwrapu>;
-
-// Arithmetic/Convergent round
-// Rd=[cround|round](Rs,Rt)[:sat]
-// Rd=[cround|round](Rs,#u5)[:sat]
-def : T_RI_pat <A4_cround_ri, int_hexagon_A4_cround_ri>;
-def : T_RR_pat <A4_cround_rr, int_hexagon_A4_cround_rr>;
-
-def : T_RI_pat <A4_round_ri, int_hexagon_A4_round_ri>;
-def : T_RR_pat <A4_round_rr, int_hexagon_A4_round_rr>;
-
-def : T_RI_pat <A4_round_ri_sat, int_hexagon_A4_round_ri_sat>;
-def : T_RR_pat <A4_round_rr_sat, int_hexagon_A4_round_rr_sat>;
-
-def : T_P_pat <A2_roundsat, int_hexagon_A2_roundsat>;
--- a/llvm/lib/Target/Hexagon/HexagonIntrinsicsV5.td
+++ b/llvm/lib/Target/Hexagon/HexagonIntrinsicsV5.td
@ -7,9 +7,314 @@
 //
 //===----------------------------------------------------------------------===//

+def : T_PR_pat <M2_vrcmpys_s1,     int_hexagon_M2_vrcmpys_s1>;
+def : T_PPR_pat<M2_vrcmpys_acc_s1, int_hexagon_M2_vrcmpys_acc_s1>;
+def : T_PR_pat <M2_vrcmpys_s1rp,   int_hexagon_M2_vrcmpys_s1rp>;
+
+// Vector reduce add unsigned halfwords
+def : T_PP_pat<M2_vradduh, int_hexagon_M2_vradduh>;
+
+def: T_RP_pat<A2_addsp,   int_hexagon_A2_addsp>;
+def: T_PP_pat<A2_addpsat, int_hexagon_A2_addpsat>;
+def: T_PP_pat<A2_minp,    int_hexagon_A2_minp>;
+def: T_PP_pat<A2_minup,   int_hexagon_A2_minup>;
+def: T_PP_pat<A2_maxp,    int_hexagon_A2_maxp>;
+def: T_PP_pat<A2_maxup,   int_hexagon_A2_maxup>;
+
+// Vector reduce multiply word by signed half (32x16)
+//Rdd=vrmpyweh(Rss,Rtt)[:<<1]
+def : T_PP_pat <M4_vrmpyeh_s0, int_hexagon_M4_vrmpyeh_s0>;
+def : T_PP_pat <M4_vrmpyeh_s1, int_hexagon_M4_vrmpyeh_s1>;
+
+//Rdd=vrmpywoh(Rss,Rtt)[:<<1]
+def : T_PP_pat <M4_vrmpyoh_s0, int_hexagon_M4_vrmpyoh_s0>;
+def : T_PP_pat <M4_vrmpyoh_s1, int_hexagon_M4_vrmpyoh_s1>;
+
+//Rdd+=vrmpyweh(Rss,Rtt)[:<<1]
+def : T_PPP_pat <M4_vrmpyeh_acc_s0, int_hexagon_M4_vrmpyeh_acc_s0>;
+def : T_PPP_pat <M4_vrmpyeh_acc_s1, int_hexagon_M4_vrmpyeh_acc_s1>;
+
+//Rdd=vrmpywoh(Rss,Rtt)[:<<1]
+def : T_PPP_pat <M4_vrmpyoh_acc_s0, int_hexagon_M4_vrmpyoh_acc_s0>;
+def : T_PPP_pat <M4_vrmpyoh_acc_s1, int_hexagon_M4_vrmpyoh_acc_s1>;
+
+// Vector multiply halfwords, signed by unsigned
+// Rdd=vmpyhsu(Rs,Rt)[:<<1]:sat
+def : T_RR_pat <M2_vmpy2su_s0, int_hexagon_M2_vmpy2su_s0>;
+def : T_RR_pat <M2_vmpy2su_s1, int_hexagon_M2_vmpy2su_s1>;
+
+// Rxx+=vmpyhsu(Rs,Rt)[:<<1]:sat
+def : T_PRR_pat <M2_vmac2su_s0, int_hexagon_M2_vmac2su_s0>;
+def : T_PRR_pat <M2_vmac2su_s1, int_hexagon_M2_vmac2su_s1>;
+
+// Vector polynomial multiply halfwords
+// Rdd=vpmpyh(Rs,Rt)
+def : T_RR_pat <M4_vpmpyh, int_hexagon_M4_vpmpyh>;
+// Rxx[^]=vpmpyh(Rs,Rt)
+def : T_PRR_pat <M4_vpmpyh_acc, int_hexagon_M4_vpmpyh_acc>;
+
+// Polynomial multiply words
+// Rdd=pmpyw(Rs,Rt)
+def : T_RR_pat <M4_pmpyw, int_hexagon_M4_pmpyw>;
+// Rxx^=pmpyw(Rs,Rt)
+def : T_PRR_pat <M4_pmpyw_acc, int_hexagon_M4_pmpyw_acc>;
+
+//Rxx^=asr(Rss,Rt)
+def : T_PPR_pat <S2_asr_r_p_xor, int_hexagon_S2_asr_r_p_xor>;
+//Rxx^=asl(Rss,Rt)
+def : T_PPR_pat <S2_asl_r_p_xor, int_hexagon_S2_asl_r_p_xor>;
+//Rxx^=lsr(Rss,Rt)
+def : T_PPR_pat <S2_lsr_r_p_xor, int_hexagon_S2_lsr_r_p_xor>;
+//Rxx^=lsl(Rss,Rt)
+def : T_PPR_pat <S2_lsl_r_p_xor, int_hexagon_S2_lsl_r_p_xor>;
+
+// Multiply and use upper result
+def : T_RR_pat <M2_mpysu_up, int_hexagon_M2_mpysu_up>;
+def : T_RR_pat <M2_mpy_up_s1, int_hexagon_M2_mpy_up_s1>;
+def : T_RR_pat <M2_hmmpyh_s1, int_hexagon_M2_hmmpyh_s1>;
+def : T_RR_pat <M2_hmmpyl_s1, int_hexagon_M2_hmmpyl_s1>;
+def : T_RR_pat <M2_mpy_up_s1_sat, int_hexagon_M2_mpy_up_s1_sat>;
+
+def : T_PP_pat <A2_vaddub, int_hexagon_A2_vaddb_map>;
+def : T_PP_pat <A2_vsubub, int_hexagon_A2_vsubb_map>;
+
+// Vector reduce add unsigned halfwords
+def : T_PP_pat <M2_vraddh, int_hexagon_M2_vraddh>;
+
+def: T_P_pat<S2_brevp, int_hexagon_S2_brevp>;
+def: T_P_pat<S2_ct0p,  int_hexagon_S2_ct0p>;
+def: T_P_pat<S2_ct1p,  int_hexagon_S2_ct1p>;
+
+def: T_Q_RR_pat<C4_nbitsset,  int_hexagon_C4_nbitsset>;
+def: T_Q_RR_pat<C4_nbitsclr,  int_hexagon_C4_nbitsclr>;
+def: T_Q_RI_pat<C4_nbitsclri, int_hexagon_C4_nbitsclri>;
+
+def : T_Q_PI_pat<A4_vcmpbeqi,     int_hexagon_A4_vcmpbeqi>;
+def : T_Q_PI_pat<A4_vcmpbgti,     int_hexagon_A4_vcmpbgti>;
+def : T_Q_PI_pat<A4_vcmpbgtui,    int_hexagon_A4_vcmpbgtui>;
+def : T_Q_PI_pat<A4_vcmpheqi,     int_hexagon_A4_vcmpheqi>;
+def : T_Q_PI_pat<A4_vcmphgti,     int_hexagon_A4_vcmphgti>;
+def : T_Q_PI_pat<A4_vcmphgtui,    int_hexagon_A4_vcmphgtui>;
+def : T_Q_PI_pat<A4_vcmpweqi,     int_hexagon_A4_vcmpweqi>;
+def : T_Q_PI_pat<A4_vcmpwgti,     int_hexagon_A4_vcmpwgti>;
+def : T_Q_PI_pat<A4_vcmpwgtui,    int_hexagon_A4_vcmpwgtui>;
+def : T_Q_PP_pat<A4_vcmpbeq_any,  int_hexagon_A4_vcmpbeq_any>;
+
+def : T_Q_RR_pat<A4_cmpbeq,   int_hexagon_A4_cmpbeq>;
+def : T_Q_RR_pat<A4_cmpbgt,   int_hexagon_A4_cmpbgt>;
+def : T_Q_RR_pat<A4_cmpbgtu,  int_hexagon_A4_cmpbgtu>;
+def : T_Q_RR_pat<A4_cmpheq,   int_hexagon_A4_cmpheq>;
+def : T_Q_RR_pat<A4_cmphgt,   int_hexagon_A4_cmphgt>;
+def : T_Q_RR_pat<A4_cmphgtu,  int_hexagon_A4_cmphgtu>;
+
+def : T_Q_RI_pat<A4_cmpbeqi,  int_hexagon_A4_cmpbeqi>;
+def : T_Q_RI_pat<A4_cmpbgti,  int_hexagon_A4_cmpbgti>;
+def : T_Q_RI_pat<A4_cmpbgtui, int_hexagon_A4_cmpbgtui>;
+
+def : T_Q_RI_pat<A4_cmpheqi,  int_hexagon_A4_cmpheqi>;
+def : T_Q_RI_pat<A4_cmphgti,  int_hexagon_A4_cmphgti>;
+def : T_Q_RI_pat<A4_cmphgtui, int_hexagon_A4_cmphgtui>;
+
+def : T_Q_RP_pat<A4_boundscheck, int_hexagon_A4_boundscheck>;
+def : T_Q_PR_pat<A4_tlbmatch,    int_hexagon_A4_tlbmatch>;
+
+def : T_RRR_pat <M4_mpyrr_addr,    int_hexagon_M4_mpyrr_addr>;
+def : T_IRR_pat <M4_mpyrr_addi,    int_hexagon_M4_mpyrr_addi>;
+def : T_IRI_pat <M4_mpyri_addi,    int_hexagon_M4_mpyri_addi>;
+def : T_RIR_pat <M4_mpyri_addr_u2, int_hexagon_M4_mpyri_addr_u2>;
+def : T_RRI_pat <M4_mpyri_addr,    int_hexagon_M4_mpyri_addr>;
+def : T_RRR_pat <M4_mac_up_s1_sat, int_hexagon_M4_mac_up_s1_sat>;
+def : T_RRR_pat <M4_nac_up_s1_sat, int_hexagon_M4_nac_up_s1_sat>;
+
+// Complex multiply 32x16
+def : T_PR_pat <M4_cmpyi_wh, int_hexagon_M4_cmpyi_wh>;
+def : T_PR_pat <M4_cmpyr_wh, int_hexagon_M4_cmpyr_wh>;
+
+def : T_PR_pat <M4_cmpyi_whc, int_hexagon_M4_cmpyi_whc>;
+def : T_PR_pat <M4_cmpyr_whc, int_hexagon_M4_cmpyr_whc>;
+
+def : T_PP_pat<A4_andnp, int_hexagon_A4_andnp>;
+def : T_PP_pat<A4_ornp,  int_hexagon_A4_ornp>;
+
+// Complex add/sub halfwords/words
+def : T_PP_pat <S4_vxaddsubw, int_hexagon_S4_vxaddsubw>;
+def : T_PP_pat <S4_vxsubaddw, int_hexagon_S4_vxsubaddw>;
+def : T_PP_pat <S4_vxaddsubh, int_hexagon_S4_vxaddsubh>;
+def : T_PP_pat <S4_vxsubaddh, int_hexagon_S4_vxsubaddh>;
+
+def : T_PP_pat <S4_vxaddsubhr, int_hexagon_S4_vxaddsubhr>;
+def : T_PP_pat <S4_vxsubaddhr, int_hexagon_S4_vxsubaddhr>;
+
+// Extract bitfield
+def : T_PP_pat  <S4_extractp_rp, int_hexagon_S4_extractp_rp>;
+def : T_RP_pat  <S4_extract_rp, int_hexagon_S4_extract_rp>;
+def : T_PII_pat <S4_extractp, int_hexagon_S4_extractp>;
+def : T_RII_pat <S4_extract, int_hexagon_S4_extract>;
+
+// Vector conditional negate
+// Rdd=vcnegh(Rss,Rt)
+def : T_PR_pat <S2_vcnegh, int_hexagon_S2_vcnegh>;
+
+// Shift an immediate left by register amount
+def : T_IR_pat<S4_lsli, int_hexagon_S4_lsli>;
+
+// Vector reduce maximum halfwords
+def : T_PPR_pat <A4_vrmaxh, int_hexagon_A4_vrmaxh>;
+def : T_PPR_pat <A4_vrmaxuh, int_hexagon_A4_vrmaxuh>;
+
+// Vector reduce maximum words
+def : T_PPR_pat <A4_vrmaxw, int_hexagon_A4_vrmaxw>;
+def : T_PPR_pat <A4_vrmaxuw, int_hexagon_A4_vrmaxuw>;
+
+// Vector reduce minimum halfwords
+def : T_PPR_pat <A4_vrminh, int_hexagon_A4_vrminh>;
+def : T_PPR_pat <A4_vrminuh, int_hexagon_A4_vrminuh>;
+
+// Vector reduce minimum words
+def : T_PPR_pat <A4_vrminw, int_hexagon_A4_vrminw>;
+def : T_PPR_pat <A4_vrminuw, int_hexagon_A4_vrminuw>;
+
+// Rotate and reduce bytes
+def : Pat <(int_hexagon_S4_vrcrotate DoubleRegs:$src1, IntRegs:$src2,
+                                     u2_0ImmPred:$src3),
+           (S4_vrcrotate DoubleRegs:$src1, IntRegs:$src2, u2_0ImmPred:$src3)>;
+
+// Rotate and reduce bytes with accumulation
+// Rxx+=vrcrotate(Rss,Rt,#u2)
+def : Pat <(int_hexagon_S4_vrcrotate_acc DoubleRegs:$src1, DoubleRegs:$src2,
+                                         IntRegs:$src3, u2_0ImmPred:$src4),
+           (S4_vrcrotate_acc DoubleRegs:$src1, DoubleRegs:$src2,
+                             IntRegs:$src3, u2_0ImmPred:$src4)>;
+
+// Vector conditional negate
+def : T_PPR_pat<S2_vrcnegh, int_hexagon_S2_vrcnegh>;
+
+// Logical xor with xor accumulation
+def : T_PPP_pat<M4_xor_xacc, int_hexagon_M4_xor_xacc>;
+
+// ALU64 - Vector min/max byte
+def : T_PP_pat <A2_vminb, int_hexagon_A2_vminb>;
+def : T_PP_pat <A2_vmaxb, int_hexagon_A2_vmaxb>;
+
+// Shift and add/sub/and/or
+def : T_IRI_pat <S4_andi_asl_ri, int_hexagon_S4_andi_asl_ri>;
+def : T_IRI_pat <S4_ori_asl_ri,  int_hexagon_S4_ori_asl_ri>;
+def : T_IRI_pat <S4_addi_asl_ri, int_hexagon_S4_addi_asl_ri>;
+def : T_IRI_pat <S4_subi_asl_ri, int_hexagon_S4_subi_asl_ri>;
+def : T_IRI_pat <S4_andi_lsr_ri, int_hexagon_S4_andi_lsr_ri>;
+def : T_IRI_pat <S4_ori_lsr_ri,  int_hexagon_S4_ori_lsr_ri>;
+def : T_IRI_pat <S4_addi_lsr_ri, int_hexagon_S4_addi_lsr_ri>;
+def : T_IRI_pat <S4_subi_lsr_ri, int_hexagon_S4_subi_lsr_ri>;
+
+// Split bitfield
+def : T_RI_pat <A4_bitspliti, int_hexagon_A4_bitspliti>;
+def : T_RR_pat <A4_bitsplit,  int_hexagon_A4_bitsplit>;
+
+def: T_RR_pat<S4_parity,      int_hexagon_S4_parity>;
+
+def: T_Q_RI_pat<S4_ntstbit_i, int_hexagon_S4_ntstbit_i>;
+def: T_Q_RR_pat<S4_ntstbit_r, int_hexagon_S4_ntstbit_r>;
+
+def: T_RI_pat<S4_clbaddi,     int_hexagon_S4_clbaddi>;
+def: T_PI_pat<S4_clbpaddi,    int_hexagon_S4_clbpaddi>;
+def: T_P_pat <S4_clbpnorm,    int_hexagon_S4_clbpnorm>;
+
+//*******************************************************************
+//            ALU32/ALU
+//*******************************************************************
+
+// ALU32 / ALU / Logical Operations.
+def: T_RR_pat<A4_andn, int_hexagon_A4_andn>;
+def: T_RR_pat<A4_orn,  int_hexagon_A4_orn>;
+
+//*******************************************************************
+//            ALU32/PERM
+//*******************************************************************
+
+// Combine Words Into Doublewords.
+def: T_RI_pat<A4_combineri, int_hexagon_A4_combineri, s32_0ImmPred>;
+def: T_IR_pat<A4_combineir, int_hexagon_A4_combineir, s32_0ImmPred>;
+
+//*******************************************************************
+//           ALU32/PRED
+//*******************************************************************
+
+// Compare
+def : T_Q_RI_pat<C4_cmpneqi, int_hexagon_C4_cmpneqi, s32_0ImmPred>;
+def : T_Q_RI_pat<C4_cmpltei, int_hexagon_C4_cmpltei, s32_0ImmPred>;
+def : T_Q_RI_pat<C4_cmplteui, int_hexagon_C4_cmplteui, u32_0ImmPred>;
+
+// Compare To General Register.
+def: T_Q_RR_pat<C4_cmpneq,  int_hexagon_C4_cmpneq>;
+def: T_Q_RR_pat<C4_cmplte,  int_hexagon_C4_cmplte>;
+def: T_Q_RR_pat<C4_cmplteu, int_hexagon_C4_cmplteu>;
+
+def: T_RR_pat<A4_rcmpeq,  int_hexagon_A4_rcmpeq>;
+def: T_RR_pat<A4_rcmpneq, int_hexagon_A4_rcmpneq>;
+
+def: T_RI_pat<A4_rcmpeqi,  int_hexagon_A4_rcmpeqi>;
+def: T_RI_pat<A4_rcmpneqi, int_hexagon_A4_rcmpneqi>;
+
+//*******************************************************************
+//           CR
+//*******************************************************************
+
+// CR / Logical Operations On Predicates.
+def: T_Q_QQQ_pat<C4_and_and,  int_hexagon_C4_and_and>;
+def: T_Q_QQQ_pat<C4_and_andn, int_hexagon_C4_and_andn>;
+def: T_Q_QQQ_pat<C4_and_or,   int_hexagon_C4_and_or>;
+def: T_Q_QQQ_pat<C4_and_orn,  int_hexagon_C4_and_orn>;
+def: T_Q_QQQ_pat<C4_or_and,   int_hexagon_C4_or_and>;
+def: T_Q_QQQ_pat<C4_or_andn,  int_hexagon_C4_or_andn>;
+def: T_Q_QQQ_pat<C4_or_or,    int_hexagon_C4_or_or>;
+def: T_Q_QQQ_pat<C4_or_orn,   int_hexagon_C4_or_orn>;
+
+//*******************************************************************
+//           XTYPE/ALU
+//*******************************************************************
+
+// Add And Accumulate.
+
+def : T_RRI_pat <S4_addaddi, int_hexagon_S4_addaddi>;
+def : T_RIR_pat <S4_subaddi, int_hexagon_S4_subaddi>;
+
+
+// XTYPE / ALU / Logical-logical Words.
+def : T_RRR_pat <M4_or_xor,   int_hexagon_M4_or_xor>;
+def : T_RRR_pat <M4_and_xor,  int_hexagon_M4_and_xor>;
+def : T_RRR_pat <M4_or_and,   int_hexagon_M4_or_and>;
+def : T_RRR_pat <M4_and_and,  int_hexagon_M4_and_and>;
+def : T_RRR_pat <M4_xor_and,  int_hexagon_M4_xor_and>;
+def : T_RRR_pat <M4_or_or,    int_hexagon_M4_or_or>;
+def : T_RRR_pat <M4_and_or,   int_hexagon_M4_and_or>;
+def : T_RRR_pat <M4_xor_or,   int_hexagon_M4_xor_or>;
+def : T_RRR_pat <M4_or_andn,  int_hexagon_M4_or_andn>;
+def : T_RRR_pat <M4_and_andn, int_hexagon_M4_and_andn>;
+def : T_RRR_pat <M4_xor_andn, int_hexagon_M4_xor_andn>;
+
+def : T_RRI_pat <S4_or_andi, int_hexagon_S4_or_andi>;
+def : T_RRI_pat <S4_or_andix,  int_hexagon_S4_or_andix>;
+def : T_RRI_pat <S4_or_ori, int_hexagon_S4_or_ori>;
+
+// Modulo wrap.
+def : T_RR_pat <A4_modwrapu, int_hexagon_A4_modwrapu>;
+
+// Arithmetic/Convergent round
+// Rd=[cround|round](Rs,Rt)[:sat]
+// Rd=[cround|round](Rs,#u5)[:sat]
+def : T_RI_pat <A4_cround_ri, int_hexagon_A4_cround_ri>;
+def : T_RR_pat <A4_cround_rr, int_hexagon_A4_cround_rr>;
+
+def : T_RI_pat <A4_round_ri, int_hexagon_A4_round_ri>;
+def : T_RR_pat <A4_round_rr, int_hexagon_A4_round_rr>;
+
+def : T_RI_pat <A4_round_ri_sat, int_hexagon_A4_round_ri_sat>;
+def : T_RR_pat <A4_round_rr_sat, int_hexagon_A4_round_rr_sat>;
+
+def : T_P_pat <A2_roundsat, int_hexagon_A2_roundsat>;
+
 //Rdd[+]=vrmpybsu(Rss,Rtt)
 //Rdd[+]=vrmpybuu(Rss,Rtt)
-let Predicates = [HasV5]  in {
 def : T_PP_pat  <M5_vrmpybsu, int_hexagon_M5_vrmpybsu>;
 def : T_PP_pat  <M5_vrmpybuu, int_hexagon_M5_vrmpybuu>;

@ -31,7 +336,6 @@ def : T_PRR_pat <M5_vmacbuu, int_hexagon_M5_vmacbuu>;

 // Rd=vaddhub(Rss,Rtt):sat
 def : T_PP_pat <A5_vaddhubs, int_hexagon_A5_vaddhubs>;
-}

 def : T_FF_pat<F2_sfadd, int_hexagon_F2_sfadd>;
 def : T_FF_pat<F2_sfsub, int_hexagon_F2_sfsub>;
--- a/llvm/lib/Target/Hexagon/HexagonPatterns.td
+++ b/llvm/lib/Target/Hexagon/HexagonPatterns.td
@ -365,38 +365,34 @@ def ToI32: OutPatFrag<(ops node:$V), (A2_tfrsi $V)>;
 // --(2) Type cast -------------------------------------------------------
 //

-let Predicates = [HasV5] in {
-  def: OpR_R_pat<F2_conv_sf2df,      pf1<fpextend>,   f64, F32>;
-  def: OpR_R_pat<F2_conv_df2sf,      pf1<fpround>,    f32, F64>;
+def: OpR_R_pat<F2_conv_sf2df,      pf1<fpextend>,   f64, F32>;
+def: OpR_R_pat<F2_conv_df2sf,      pf1<fpround>,    f32, F64>;

-  def: OpR_R_pat<F2_conv_w2sf,       pf1<sint_to_fp>, f32, I32>;
-  def: OpR_R_pat<F2_conv_d2sf,       pf1<sint_to_fp>, f32, I64>;
-  def: OpR_R_pat<F2_conv_w2df,       pf1<sint_to_fp>, f64, I32>;
-  def: OpR_R_pat<F2_conv_d2df,       pf1<sint_to_fp>, f64, I64>;
+def: OpR_R_pat<F2_conv_w2sf,       pf1<sint_to_fp>, f32, I32>;
+def: OpR_R_pat<F2_conv_d2sf,       pf1<sint_to_fp>, f32, I64>;
+def: OpR_R_pat<F2_conv_w2df,       pf1<sint_to_fp>, f64, I32>;
+def: OpR_R_pat<F2_conv_d2df,       pf1<sint_to_fp>, f64, I64>;

-  def: OpR_R_pat<F2_conv_uw2sf,      pf1<uint_to_fp>, f32, I32>;
-  def: OpR_R_pat<F2_conv_ud2sf,      pf1<uint_to_fp>, f32, I64>;
-  def: OpR_R_pat<F2_conv_uw2df,      pf1<uint_to_fp>, f64, I32>;
-  def: OpR_R_pat<F2_conv_ud2df,      pf1<uint_to_fp>, f64, I64>;
+def: OpR_R_pat<F2_conv_uw2sf,      pf1<uint_to_fp>, f32, I32>;
+def: OpR_R_pat<F2_conv_ud2sf,      pf1<uint_to_fp>, f32, I64>;
+def: OpR_R_pat<F2_conv_uw2df,      pf1<uint_to_fp>, f64, I32>;
+def: OpR_R_pat<F2_conv_ud2df,      pf1<uint_to_fp>, f64, I64>;

-  def: OpR_R_pat<F2_conv_sf2w_chop,  pf1<fp_to_sint>, i32, F32>;
-  def: OpR_R_pat<F2_conv_df2w_chop,  pf1<fp_to_sint>, i32, F64>;
-  def: OpR_R_pat<F2_conv_sf2d_chop,  pf1<fp_to_sint>, i64, F32>;
-  def: OpR_R_pat<F2_conv_df2d_chop,  pf1<fp_to_sint>, i64, F64>;
+def: OpR_R_pat<F2_conv_sf2w_chop,  pf1<fp_to_sint>, i32, F32>;
+def: OpR_R_pat<F2_conv_df2w_chop,  pf1<fp_to_sint>, i32, F64>;
+def: OpR_R_pat<F2_conv_sf2d_chop,  pf1<fp_to_sint>, i64, F32>;
+def: OpR_R_pat<F2_conv_df2d_chop,  pf1<fp_to_sint>, i64, F64>;

-  def: OpR_R_pat<F2_conv_sf2uw_chop, pf1<fp_to_uint>, i32, F32>;
-  def: OpR_R_pat<F2_conv_df2uw_chop, pf1<fp_to_uint>, i32, F64>;
-  def: OpR_R_pat<F2_conv_sf2ud_chop, pf1<fp_to_uint>, i64, F32>;
-  def: OpR_R_pat<F2_conv_df2ud_chop, pf1<fp_to_uint>, i64, F64>;
-}
+def: OpR_R_pat<F2_conv_sf2uw_chop, pf1<fp_to_uint>, i32, F32>;
+def: OpR_R_pat<F2_conv_df2uw_chop, pf1<fp_to_uint>, i32, F64>;
+def: OpR_R_pat<F2_conv_sf2ud_chop, pf1<fp_to_uint>, i64, F32>;
+def: OpR_R_pat<F2_conv_df2ud_chop, pf1<fp_to_uint>, i64, F64>;

 // Bitcast is different than [fp|sint|uint]_to_[sint|uint|fp].
-let Predicates = [HasV5] in {
-  def: Pat<(i32 (bitconvert F32:$v)), (I32:$v)>;
-  def: Pat<(f32 (bitconvert I32:$v)), (F32:$v)>;
-  def: Pat<(i64 (bitconvert F64:$v)), (I64:$v)>;
-  def: Pat<(f64 (bitconvert I64:$v)), (F64:$v)>;
-}
+def: Pat<(i32 (bitconvert F32:$v)), (I32:$v)>;
+def: Pat<(f32 (bitconvert I32:$v)), (F32:$v)>;
+def: Pat<(i64 (bitconvert F64:$v)), (I64:$v)>;
+def: Pat<(f64 (bitconvert I64:$v)), (F64:$v)>;

 multiclass Cast_pat<ValueType Ta, ValueType Tb, RegisterClass RC> {
  def: Pat<(Tb (bitconvert (Ta RC:$Rs))), (Tb RC:$Rs)>;
@ -599,31 +595,29 @@ def: OpR_RR_pat<A2_vcmpwgtu,  RevCmp<setult>, v2i1, V2I32>;
 def: OpR_RR_pat<A2_vcmpwgtu,  setugt,         i1,   V2I32>;
 def: OpR_RR_pat<A2_vcmpwgtu,  setugt,         v2i1, V2I32>;

-let Predicates = [HasV5] in {
-  def: OpR_RR_pat<F2_sfcmpeq,   seteq,          i1, F32>;
-  def: OpR_RR_pat<F2_sfcmpgt,   setgt,          i1, F32>;
-  def: OpR_RR_pat<F2_sfcmpge,   setge,          i1, F32>;
-  def: OpR_RR_pat<F2_sfcmpeq,   setoeq,         i1, F32>;
-  def: OpR_RR_pat<F2_sfcmpgt,   setogt,         i1, F32>;
-  def: OpR_RR_pat<F2_sfcmpge,   setoge,         i1, F32>;
-  def: OpR_RR_pat<F2_sfcmpgt,   RevCmp<setolt>, i1, F32>;
-  def: OpR_RR_pat<F2_sfcmpge,   RevCmp<setole>, i1, F32>;
-  def: OpR_RR_pat<F2_sfcmpgt,   RevCmp<setlt>,  i1, F32>;
-  def: OpR_RR_pat<F2_sfcmpge,   RevCmp<setle>,  i1, F32>;
-  def: OpR_RR_pat<F2_sfcmpuo,   setuo,          i1, F32>;
+def: OpR_RR_pat<F2_sfcmpeq,   seteq,          i1, F32>;
+def: OpR_RR_pat<F2_sfcmpgt,   setgt,          i1, F32>;
+def: OpR_RR_pat<F2_sfcmpge,   setge,          i1, F32>;
+def: OpR_RR_pat<F2_sfcmpeq,   setoeq,         i1, F32>;
+def: OpR_RR_pat<F2_sfcmpgt,   setogt,         i1, F32>;
+def: OpR_RR_pat<F2_sfcmpge,   setoge,         i1, F32>;
+def: OpR_RR_pat<F2_sfcmpgt,   RevCmp<setolt>, i1, F32>;
+def: OpR_RR_pat<F2_sfcmpge,   RevCmp<setole>, i1, F32>;
+def: OpR_RR_pat<F2_sfcmpgt,   RevCmp<setlt>,  i1, F32>;
+def: OpR_RR_pat<F2_sfcmpge,   RevCmp<setle>,  i1, F32>;
+def: OpR_RR_pat<F2_sfcmpuo,   setuo,          i1, F32>;

-  def: OpR_RR_pat<F2_dfcmpeq,   seteq,          i1, F64>;
-  def: OpR_RR_pat<F2_dfcmpgt,   setgt,          i1, F64>;
-  def: OpR_RR_pat<F2_dfcmpge,   setge,          i1, F64>;
-  def: OpR_RR_pat<F2_dfcmpeq,   setoeq,         i1, F64>;
-  def: OpR_RR_pat<F2_dfcmpgt,   setogt,         i1, F64>;
-  def: OpR_RR_pat<F2_dfcmpge,   setoge,         i1, F64>;
-  def: OpR_RR_pat<F2_dfcmpgt,   RevCmp<setolt>, i1, F64>;
-  def: OpR_RR_pat<F2_dfcmpge,   RevCmp<setole>, i1, F64>;
-  def: OpR_RR_pat<F2_dfcmpgt,   RevCmp<setlt>,  i1, F64>;
-  def: OpR_RR_pat<F2_dfcmpge,   RevCmp<setle>,  i1, F64>;
-  def: OpR_RR_pat<F2_dfcmpuo,   setuo,          i1, F64>;
-}
+def: OpR_RR_pat<F2_dfcmpeq,   seteq,          i1, F64>;
+def: OpR_RR_pat<F2_dfcmpgt,   setgt,          i1, F64>;
+def: OpR_RR_pat<F2_dfcmpge,   setge,          i1, F64>;
+def: OpR_RR_pat<F2_dfcmpeq,   setoeq,         i1, F64>;
+def: OpR_RR_pat<F2_dfcmpgt,   setogt,         i1, F64>;
+def: OpR_RR_pat<F2_dfcmpge,   setoge,         i1, F64>;
+def: OpR_RR_pat<F2_dfcmpgt,   RevCmp<setolt>, i1, F64>;
+def: OpR_RR_pat<F2_dfcmpge,   RevCmp<setole>, i1, F64>;
+def: OpR_RR_pat<F2_dfcmpgt,   RevCmp<setlt>,  i1, F64>;
+def: OpR_RR_pat<F2_dfcmpge,   RevCmp<setle>,  i1, F64>;
+def: OpR_RR_pat<F2_dfcmpuo,   setuo,          i1, F64>;

 // Avoid C4_cmpneqi, C4_cmpltei, C4_cmplteui, since they cannot form compounds.

@ -746,32 +740,28 @@ class Cmpud<InstHexagon MI>:  T3<C2_or,  F2_dfcmpuo, MI>;
 class Cmpufn<InstHexagon MI>: T3<C2_orn, F2_sfcmpuo, MI>;
 class Cmpudn<InstHexagon MI>: T3<C2_orn, F2_dfcmpuo, MI>;

-let Predicates = [HasV5] in {
-  def: OpmR_RR_pat<Cmpuf<F2_sfcmpeq>,  setueq,         i1, F32>;
-  def: OpmR_RR_pat<Cmpuf<F2_sfcmpge>,  setuge,         i1, F32>;
-  def: OpmR_RR_pat<Cmpuf<F2_sfcmpgt>,  setugt,         i1, F32>;
-  def: OpmR_RR_pat<Cmpuf<F2_sfcmpge>,  RevCmp<setule>, i1, F32>;
-  def: OpmR_RR_pat<Cmpuf<F2_sfcmpgt>,  RevCmp<setult>, i1, F32>;
-  def: OpmR_RR_pat<Cmpufn<F2_sfcmpeq>, setune,         i1, F32>;
+def: OpmR_RR_pat<Cmpuf<F2_sfcmpeq>,  setueq,         i1, F32>;
+def: OpmR_RR_pat<Cmpuf<F2_sfcmpge>,  setuge,         i1, F32>;
+def: OpmR_RR_pat<Cmpuf<F2_sfcmpgt>,  setugt,         i1, F32>;
+def: OpmR_RR_pat<Cmpuf<F2_sfcmpge>,  RevCmp<setule>, i1, F32>;
+def: OpmR_RR_pat<Cmpuf<F2_sfcmpgt>,  RevCmp<setult>, i1, F32>;
+def: OpmR_RR_pat<Cmpufn<F2_sfcmpeq>, setune,         i1, F32>;

-  def: OpmR_RR_pat<Cmpud<F2_dfcmpeq>,  setueq,         i1, F64>;
-  def: OpmR_RR_pat<Cmpud<F2_dfcmpge>,  setuge,         i1, F64>;
-  def: OpmR_RR_pat<Cmpud<F2_dfcmpgt>,  setugt,         i1, F64>;
-  def: OpmR_RR_pat<Cmpud<F2_dfcmpge>,  RevCmp<setule>, i1, F64>;
-  def: OpmR_RR_pat<Cmpud<F2_dfcmpgt>,  RevCmp<setult>, i1, F64>;
-  def: OpmR_RR_pat<Cmpudn<F2_dfcmpeq>, setune,         i1, F64>;
-}
+def: OpmR_RR_pat<Cmpud<F2_dfcmpeq>,  setueq,         i1, F64>;
+def: OpmR_RR_pat<Cmpud<F2_dfcmpge>,  setuge,         i1, F64>;
+def: OpmR_RR_pat<Cmpud<F2_dfcmpgt>,  setugt,         i1, F64>;
+def: OpmR_RR_pat<Cmpud<F2_dfcmpge>,  RevCmp<setule>, i1, F64>;
+def: OpmR_RR_pat<Cmpud<F2_dfcmpgt>,  RevCmp<setult>, i1, F64>;
+def: OpmR_RR_pat<Cmpudn<F2_dfcmpeq>, setune,         i1, F64>;

-let Predicates = [HasV5] in {
-  def: OpmR_RR_pat<Outn<F2_sfcmpeq>, setone, i1, F32>;
-  def: OpmR_RR_pat<Outn<F2_sfcmpeq>, setne,  i1, F32>;
+def: OpmR_RR_pat<Outn<F2_sfcmpeq>, setone, i1, F32>;
+def: OpmR_RR_pat<Outn<F2_sfcmpeq>, setne,  i1, F32>;

-  def: OpmR_RR_pat<Outn<F2_dfcmpeq>, setone, i1, F64>;
-  def: OpmR_RR_pat<Outn<F2_dfcmpeq>, setne,  i1, F64>;
+def: OpmR_RR_pat<Outn<F2_dfcmpeq>, setone, i1, F64>;
+def: OpmR_RR_pat<Outn<F2_dfcmpeq>, setne,  i1, F64>;

-  def: OpmR_RR_pat<Outn<F2_sfcmpuo>, seto,   i1, F32>;
-  def: OpmR_RR_pat<Outn<F2_dfcmpuo>, seto,   i1, F64>;
-}
+def: OpmR_RR_pat<Outn<F2_sfcmpuo>, seto,   i1, F32>;
+def: OpmR_RR_pat<Outn<F2_dfcmpuo>, seto,   i1, F64>;


 // --(6) Select ----------------------------------------------------------
@ -801,27 +791,25 @@ def: Pat<(select I1:$Pu, I64:$Rs, I64:$Rt),
         (Combinew (C2_mux I1:$Pu, (HiReg $Rs), (HiReg $Rt)),
                   (C2_mux I1:$Pu, (LoReg $Rs), (LoReg $Rt)))>;

-let Predicates = [HasV5] in {
-  def: Pat<(select I1:$Pu, F32:$Rs, f32ImmPred:$I),
-           (C2_muxir I1:$Pu, F32:$Rs, (ftoi $I))>;
-  def: Pat<(select I1:$Pu, f32ImmPred:$I, F32:$Rt),
-           (C2_muxri I1:$Pu, (ftoi $I), F32:$Rt)>;
-  def: Pat<(select I1:$Pu, F32:$Rs, F32:$Rt),
-           (C2_mux I1:$Pu, F32:$Rs, F32:$Rt)>;
-  def: Pat<(select I1:$Pu, F64:$Rs, F64:$Rt),
-           (Combinew (C2_mux I1:$Pu, (HiReg $Rs), (HiReg $Rt)),
-                     (C2_mux I1:$Pu, (LoReg $Rs), (LoReg $Rt)))>;
+def: Pat<(select I1:$Pu, F32:$Rs, f32ImmPred:$I),
+         (C2_muxir I1:$Pu, F32:$Rs, (ftoi $I))>;
+def: Pat<(select I1:$Pu, f32ImmPred:$I, F32:$Rt),
+         (C2_muxri I1:$Pu, (ftoi $I), F32:$Rt)>;
+def: Pat<(select I1:$Pu, F32:$Rs, F32:$Rt),
+         (C2_mux I1:$Pu, F32:$Rs, F32:$Rt)>;
+def: Pat<(select I1:$Pu, F64:$Rs, F64:$Rt),
+         (Combinew (C2_mux I1:$Pu, (HiReg $Rs), (HiReg $Rt)),
+                   (C2_mux I1:$Pu, (LoReg $Rs), (LoReg $Rt)))>;

-  def: Pat<(select (i1 (setult F32:$Ra, F32:$Rb)), F32:$Rs, F32:$Rt),
-           (C2_mux (F2_sfcmpgt F32:$Rb, F32:$Ra), F32:$Rs, F32:$Rt)>;
-  def: Pat<(select (i1 (setult F64:$Ra, F64:$Rb)), F64:$Rs, F64:$Rt),
-           (C2_vmux (F2_dfcmpgt F64:$Rb, F64:$Ra), F64:$Rs, F64:$Rt)>;
+def: Pat<(select (i1 (setult F32:$Ra, F32:$Rb)), F32:$Rs, F32:$Rt),
+         (C2_mux (F2_sfcmpgt F32:$Rb, F32:$Ra), F32:$Rs, F32:$Rt)>;
+def: Pat<(select (i1 (setult F64:$Ra, F64:$Rb)), F64:$Rs, F64:$Rt),
+         (C2_vmux (F2_dfcmpgt F64:$Rb, F64:$Ra), F64:$Rs, F64:$Rt)>;

-  def: Pat<(select (not I1:$Pu), f32ImmPred:$I, F32:$Rs),
-           (C2_muxir I1:$Pu, F32:$Rs, (ftoi $I))>;
-  def: Pat<(select (not I1:$Pu), F32:$Rt, f32ImmPred:$I),
-           (C2_muxri I1:$Pu, (ftoi $I), F32:$Rt)>;
-}
+def: Pat<(select (not I1:$Pu), f32ImmPred:$I, F32:$Rs),
+         (C2_muxir I1:$Pu, F32:$Rs, (ftoi $I))>;
+def: Pat<(select (not I1:$Pu), F32:$Rt, f32ImmPred:$I),
+         (C2_muxri I1:$Pu, (ftoi $I), F32:$Rt)>;

 def: Pat<(select I1:$Pu, V4I8:$Rs, V4I8:$Rt),
         (LoReg (C2_vmux I1:$Pu, (ToZext64 $Rs), (ToZext64 $Rt)))>;
@ -889,7 +877,7 @@ let AddedComplexity = 200 in {
  defm: SelMinMax_pats<setult, I64, A2_minup, A2_maxup>;
 }

-let AddedComplexity = 100, Predicates = [HasV5] in {
+let AddedComplexity = 100 in {
  defm: SelMinMax_pats<setolt, F32, F2_sfmin, F2_sfmax>;
  defm: SelMinMax_pats<setole, F32, F2_sfmin, F2_sfmax>;
  defm: SelMinMax_pats<setogt, F32, F2_sfmax, F2_sfmin>;
@ -1014,7 +1002,7 @@ let Predicates = [HasV60] in {
 def: Pat<(sra (add (sra I32:$Rs, u5_0ImmPred:$u5), 1), (i32 1)),
         (S2_asr_i_r_rnd I32:$Rs, imm:$u5)>;
 def: Pat<(sra (add (sra I64:$Rs, u6_0ImmPred:$u6), 1), (i32 1)),
-         (S2_asr_i_p_rnd I64:$Rs, imm:$u6)>, Requires<[HasV5]>;
+         (S2_asr_i_p_rnd I64:$Rs, imm:$u6)>;

 // Prefer S2_addasl_rrri over S2_asl_i_r_acc.
 let AddedComplexity = 120 in
@ -1191,17 +1179,15 @@ def: Pat<(not  I32:$Rs), (A2_subri -1, I32:$Rs)>;
 def: Pat<(not  I64:$Rs), (A2_notp  I64:$Rs)>;
 def: Pat<(ineg I64:$Rs), (A2_negp  I64:$Rs)>;

-let Predicates = [HasV5] in {
-  def: Pat<(fabs F32:$Rs), (S2_clrbit_i    F32:$Rs, 31)>;
-  def: Pat<(fneg F32:$Rs), (S2_togglebit_i F32:$Rs, 31)>;
+def: Pat<(fabs F32:$Rs), (S2_clrbit_i    F32:$Rs, 31)>;
+def: Pat<(fneg F32:$Rs), (S2_togglebit_i F32:$Rs, 31)>;

-  def: Pat<(fabs F64:$Rs),
-           (Combinew (S2_clrbit_i (HiReg $Rs), 31),
-                     (i32 (LoReg $Rs)))>;
-  def: Pat<(fneg F64:$Rs),
-           (Combinew (S2_togglebit_i (HiReg $Rs), 31),
-                     (i32 (LoReg $Rs)))>;
-}
+def: Pat<(fabs F64:$Rs),
+         (Combinew (S2_clrbit_i (HiReg $Rs), 31),
+                   (i32 (LoReg $Rs)))>;
+def: Pat<(fneg F64:$Rs),
+         (Combinew (S2_togglebit_i (HiReg $Rs), 31),
+                   (i32 (LoReg $Rs)))>;

 def: Pat<(add I32:$Rs, anyimm:$s16),   (A2_addi   I32:$Rs,  imm:$s16)>;
 def: Pat<(or  I32:$Rs, anyimm:$s10),   (A2_orir   I32:$Rs,  imm:$s10)>;
@ -1267,13 +1253,11 @@ def: OpR_RR_pat<C2_and,       Mul,        v2i1,  V2I1>;
 def: OpR_RR_pat<C2_and,       Mul,        v4i1,  V4I1>;
 def: OpR_RR_pat<C2_and,       Mul,        v8i1,  V8I1>;

-let Predicates = [HasV5] in {
-  def: OpR_RR_pat<F2_sfadd,     pf2<fadd>,    f32, F32>;
-  def: OpR_RR_pat<F2_sfsub,     pf2<fsub>,    f32, F32>;
-  def: OpR_RR_pat<F2_sfmpy,     pf2<fmul>,    f32, F32>;
-  def: OpR_RR_pat<F2_sfmin,     pf2<fminnum>, f32, F32>;
-  def: OpR_RR_pat<F2_sfmax,     pf2<fmaxnum>, f32, F32>;
-}
+def: OpR_RR_pat<F2_sfadd,     pf2<fadd>,    f32, F32>;
+def: OpR_RR_pat<F2_sfsub,     pf2<fsub>,    f32, F32>;
+def: OpR_RR_pat<F2_sfmpy,     pf2<fmul>,    f32, F32>;
+def: OpR_RR_pat<F2_sfmin,     pf2<fminnum>, f32, F32>;
+def: OpR_RR_pat<F2_sfmax,     pf2<fmaxnum>, f32, F32>;

 // In expressions like a0*b0 + a1*b1 + ..., prefer to generate multiply-add,
 // over add-add with individual multiplies as inputs.
@ -1506,14 +1490,12 @@ def: Pat<(add I32:$Ru, (Su<Mul> I32:$Ry, I32:$Rs)),
         (M4_mpyrr_addr IntRegs:$Ru, IntRegs:$Ry, IntRegs:$Rs)>;


-let Predicates = [HasV5] in {
-  def: Pat<(fma F32:$Rs, F32:$Rt, F32:$Rx),
-           (F2_sffma F32:$Rx, F32:$Rs, F32:$Rt)>;
-  def: Pat<(fma (fneg F32:$Rs), F32:$Rt, F32:$Rx),
-           (F2_sffms F32:$Rx, F32:$Rs, F32:$Rt)>;
-  def: Pat<(fma F32:$Rs, (fneg F32:$Rt), F32:$Rx),
-           (F2_sffms F32:$Rx, F32:$Rs, F32:$Rt)>;
-}
+def: Pat<(fma F32:$Rs, F32:$Rt, F32:$Rx),
+         (F2_sffma F32:$Rx, F32:$Rs, F32:$Rt)>;
+def: Pat<(fma (fneg F32:$Rs), F32:$Rt, F32:$Rx),
+         (F2_sffms F32:$Rx, F32:$Rs, F32:$Rt)>;
+def: Pat<(fma F32:$Rs, (fneg F32:$Rt), F32:$Rx),
+         (F2_sffms F32:$Rx, F32:$Rs, F32:$Rt)>;


 def: Pat<(mul V2I32:$Rs, V2I32:$Rt),
@ -1540,14 +1522,12 @@ def: Pat<(v4i16 (mul V4I16:$Rs, V4I16:$Rt)),

 // Multiplies two v4i8 vectors.
 def: Pat<(v4i8 (mul V4I8:$Rs, V4I8:$Rt)),
-         (S2_vtrunehb (M5_vmpybuu V4I8:$Rs, V4I8:$Rt))>,
-     Requires<[HasV5]>;
+         (S2_vtrunehb (M5_vmpybuu V4I8:$Rs, V4I8:$Rt))>;

 // Multiplies two v8i8 vectors.
 def: Pat<(v8i8 (mul V8I8:$Rs, V8I8:$Rt)),
         (Combinew (S2_vtrunehb (M5_vmpybuu (HiReg $Rs), (HiReg $Rt))),
-                   (S2_vtrunehb (M5_vmpybuu (LoReg $Rs), (LoReg $Rt))))>,
-     Requires<[HasV5]>;
+                   (S2_vtrunehb (M5_vmpybuu (LoReg $Rs), (LoReg $Rt))))>;


 // --(10) Bit ------------------------------------------------------------
--- a/llvm/lib/Target/Hexagon/HexagonRegisterInfo.cpp
+++ b/llvm/lib/Target/Hexagon/HexagonRegisterInfo.cpp
@ -118,18 +118,7 @@ HexagonRegisterInfo::getCalleeSavedRegs(const MachineFunction *MF) const {

  bool HasEHReturn = MF->getInfo<HexagonMachineFunctionInfo>()->hasEHReturn();

-  switch (MF->getSubtarget<HexagonSubtarget>().getHexagonArchVersion()) {
-  case Hexagon::ArchEnum::V4:
-  case Hexagon::ArchEnum::V5:
-  case Hexagon::ArchEnum::V55:
-  case Hexagon::ArchEnum::V60:
-  case Hexagon::ArchEnum::V62:
-  case Hexagon::ArchEnum::V65:
-    return HasEHReturn ? CalleeSavedRegsV3EHReturn : CalleeSavedRegsV3;
-  }
-
-  llvm_unreachable("Callee saved registers requested for unknown architecture "
-                   "version");
+  return HasEHReturn ? CalleeSavedRegsV3EHReturn : CalleeSavedRegsV3;
 }


--- a/llvm/lib/Target/Hexagon/HexagonSchedule.td
+++ b/llvm/lib/Target/Hexagon/HexagonSchedule.td
@ -57,10 +57,10 @@ include "HexagonDepIICScalar.td"
 include "HexagonDepIICHVX.td"

 //===----------------------------------------------------------------------===//
-// V4 Machine Info +
+// V5 Machine Info +
 //===----------------------------------------------------------------------===//

-include "HexagonScheduleV4.td"
+include "HexagonScheduleV5.td"

 // V55 Machine Info +
 include "HexagonScheduleV55.td"
--- a/llvm/lib/Target/Hexagon/HexagonScheduleV5.td
+++ b/llvm/lib/Target/Hexagon/HexagonScheduleV5.td
@ -1,4 +1,4 @@
-//=-HexagonScheduleV4.td - HexagonV4 Scheduling Definitions --*- tablegen -*-=//
+//=-HexagonScheduleV5.td - HexagonV5 Scheduling Definitions --*- tablegen -*-=//
 //
 //                     The LLVM Compiler Infrastructure
 //
@ -10,8 +10,8 @@
 def LD_tc_ld_SLOT01 : InstrItinClass;
 def ST_tc_st_SLOT01 : InstrItinClass;

-class HexagonV4PseudoItin {
-  list<InstrItinData> V4PseudoItin_list = [
+class HexagonV5PseudoItin {
+  list<InstrItinData> V5PseudoItin_list = [
    InstrItinData<PSEUDO,     [InstrStage<1, [SLOT0, SLOT1, SLOT2, SLOT3]>]>,
    InstrItinData<PSEUDOM,    [InstrStage<1, [SLOT2, SLOT3], 0>,
                               InstrStage<1, [SLOT2, SLOT3]>]>,
@ -20,27 +20,27 @@ class HexagonV4PseudoItin {
  ];
 }

-def HexagonV4ItinList : DepScalarItinV4, HexagonV4PseudoItin {
-  list<InstrItinData> V4Itin_list = [
+def HexagonV5ItinList : DepScalarItinV5, HexagonV5PseudoItin {
+  list<InstrItinData> V5Itin_list = [
    InstrItinData<LD_tc_ld_SLOT01, [InstrStage<1, [SLOT0, SLOT1]>]>,
    InstrItinData<ST_tc_st_SLOT01, [InstrStage<1, [SLOT0, SLOT1]>]>
  ];
  list<InstrItinData> ItinList =
-    !listconcat(V4Itin_list, DepScalarItinV4_list, V4PseudoItin_list);
+    !listconcat(V5Itin_list, DepScalarItinV5_list, V5PseudoItin_list);
 }

-def HexagonItinerariesV4 :
+def HexagonItinerariesV5 :
      ProcessorItineraries<[SLOT0, SLOT1, SLOT2, SLOT3, SLOT_ENDLOOP],
-                           [Hex_FWD], HexagonV4ItinList.ItinList>;
+                           [Hex_FWD], HexagonV5ItinList.ItinList>;

-def HexagonModelV4 : SchedMachineModel {
+def HexagonModelV5 : SchedMachineModel {
  // Max issue per cycle == bundle width.
  let IssueWidth = 4;
-  let Itineraries = HexagonItinerariesV4;
+  let Itineraries = HexagonItinerariesV5;
  let LoadLatency = 1;
  let CompleteModel = 0;
 }

 //===----------------------------------------------------------------------===//
-// Hexagon V4 Resource Definitions -
+// Hexagon V5 Resource Definitions -
 //===----------------------------------------------------------------------===//
--- a/llvm/lib/Target/Hexagon/HexagonSubtarget.cpp
+++ b/llvm/lib/Target/Hexagon/HexagonSubtarget.cpp
@ -93,7 +93,6 @@ HexagonSubtarget &
 HexagonSubtarget::initializeSubtargetDependencies(StringRef CPU, StringRef FS) {
  static std::map<StringRef, Hexagon::ArchEnum> CpuTable{
      {"generic", Hexagon::ArchEnum::V60},
-      {"hexagonv4", Hexagon::ArchEnum::V4},
      {"hexagonv5", Hexagon::ArchEnum::V5},
      {"hexagonv55", Hexagon::ArchEnum::V55},
      {"hexagonv60", Hexagon::ArchEnum::V60},
--- a/llvm/lib/Target/Hexagon/HexagonSubtarget.h
+++ b/llvm/lib/Target/Hexagon/HexagonSubtarget.h
@ -59,7 +59,7 @@ class HexagonSubtarget : public HexagonGenSubtargetInfo {

 public:
  Hexagon::ArchEnum HexagonArchVersion;
-  Hexagon::ArchEnum HexagonHVXVersion = Hexagon::ArchEnum::V4;
+  Hexagon::ArchEnum HexagonHVXVersion = Hexagon::ArchEnum::NoArch;
  CodeGenOpt::Level OptLevel;
  /// True if the target should use Back-Skip-Back scheduling. This is the
  /// default for V60.
@ -158,7 +158,9 @@ public:
  bool useNewValueStores() const { return UseNewValueStores; }
  bool useSmallData() const { return UseSmallData; }

-  bool useHVXOps() const { return HexagonHVXVersion > Hexagon::ArchEnum::V4; }
+  bool useHVXOps() const {
+    return HexagonHVXVersion > Hexagon::ArchEnum::NoArch;
+  }
  bool useHVX128BOps() const { return useHVXOps() && UseHVX128BOps; }
  bool useHVX64BOps() const { return useHVXOps() && UseHVX64BOps; }

--- a/llvm/lib/Target/Hexagon/HexagonVLIWPacketizer.cpp
+++ b/llvm/lib/Target/Hexagon/HexagonVLIWPacketizer.cpp
@ -768,7 +768,7 @@ bool HexagonPacketizerList::canPromoteToNewValueStore(const MachineInstr &MI,

  // Make sure that for non-POST_INC stores:
  // 1. The only use of reg is DepReg and no other registers.
-  //    This handles V4 base+index registers.
+  //    This handles base+index registers.
  //    The following store can not be dot new.
  //    Eg.   r0 = add(r0, #3)
  //          memw(r1+r0<<#2) = r0
@ -838,11 +838,7 @@ static bool isImplicitDependency(const MachineInstr &I, bool CheckDef,
  return false;
 }

-// Check to see if an instruction can be dot new
-// There are three kinds.
-// 1. dot new on predicate - V2/V3/V4
-// 2. dot new on stores NV/ST - V4
-// 3. dot new on jump NV/J - V4 -- This is generated in a pass.
+// Check to see if an instruction can be dot new.
 bool HexagonPacketizerList::canPromoteToDotNew(const MachineInstr &MI,
      const SUnit *PacketSU, unsigned DepReg, MachineBasicBlock::iterator &MII,
      const TargetRegisterClass* RC) {
@ -1075,9 +1071,6 @@ bool HexagonPacketizerList::isSoloInstruction(const MachineInstr &MI) {
  if (MI.isInlineAsm() && !ScheduleInlineAsm)
    return true;

-  // From Hexagon V4 Programmer's Reference Manual 3.4.4 Grouping constraints:
-  // trap, pause, barrier, icinva, isync, and syncht are solo instructions.
-  // They must not be grouped with other instructions in a packet.
  if (isSchedBarrier(MI))
    return true;

@ -1289,8 +1282,8 @@ bool HexagonPacketizerList::hasRegMaskDependence(const MachineInstr &I,
  return false;
 }

-bool HexagonPacketizerList::hasV4SpecificDependence(const MachineInstr &I,
-                                                    const MachineInstr &J) {
+bool HexagonPacketizerList::hasDualStoreDependence(const MachineInstr &I,
+                                                   const MachineInstr &J) {
  bool SysI = isSystemInstr(I), SysJ = isSystemInstr(J);
  bool StoreI = I.mayStore(), StoreJ = J.mayStore();
  if ((SysI && StoreJ) || (SysJ && StoreI))
@ -1343,10 +1336,10 @@ bool HexagonPacketizerList::isLegalToPacketizeTogether(SUnit *SUI, SUnit *SUJ) {
  if (Dependence)
    return false;

-  // V4 allows dual stores. It does not allow second store, if the first
-  // store is not in SLOT0. New value store, new value jump, dealloc_return
-  // and memop always take SLOT0. Arch spec 3.4.4.2.
-  Dependence = hasV4SpecificDependence(I, J);
+  // Dual-store does not allow second store, if the first store is not
+  // in SLOT0. New value store, new value jump, dealloc_return and memop
+  // always take SLOT0. Arch spec 3.4.4.2.
+  Dependence = hasDualStoreDependence(I, J);
  if (Dependence)
    return false;

@ -1505,10 +1498,10 @@ bool HexagonPacketizerList::isLegalToPacketizeTogether(SUnit *SUI, SUnit *SUJ) {
    }

    // For Order dependences:
-    // 1. On V4 or later, volatile loads/stores can be packetized together,
-    //    unless other rules prevent is.
+    // 1. Volatile loads/stores can be packetized together, unless other
+    //    rules prevent is.
    // 2. Store followed by a load is not allowed.
-    // 3. Store followed by a store is only valid on V4 or later.
+    // 3. Store followed by a store is valid.
    // 4. Load followed by any memory operation is allowed.
    if (DepType == SDep::Order) {
      if (!PacketizeVolatiles) {
@ -1555,7 +1548,7 @@ bool HexagonPacketizerList::isLegalToPacketizeTogether(SUnit *SUI, SUnit *SUJ) {
      continue;
    }

-    // For V4, special case ALLOCFRAME. Even though there is dependency
+    // Special case for ALLOCFRAME: even though there is dependency
    // between ALLOCFRAME and subsequent store, allow it to be packetized
    // in a same packet. This implies that the store is using the caller's
    // SP. Hence, offset needs to be updated accordingly.
--- a/llvm/lib/Target/Hexagon/HexagonVLIWPacketizer.h
+++ b/llvm/lib/Target/Hexagon/HexagonVLIWPacketizer.h
@ -149,7 +149,7 @@ protected:
  bool hasDeadDependence(const MachineInstr &I, const MachineInstr &J);
  bool hasControlDependence(const MachineInstr &I, const MachineInstr &J);
  bool hasRegMaskDependence(const MachineInstr &I, const MachineInstr &J);
-  bool hasV4SpecificDependence(const MachineInstr &I, const MachineInstr &J);
+  bool hasDualStoreDependence(const MachineInstr &I, const MachineInstr &J);
  bool producesStall(const MachineInstr &MI);
 };

--- a/llvm/lib/Target/Hexagon/MCTargetDesc/HexagonMCDuplexInfo.cpp
+++ b/llvm/lib/Target/Hexagon/MCTargetDesc/HexagonMCDuplexInfo.cpp
@ -634,8 +634,7 @@ bool HexagonMCInstrInfo::isOrderedDuplexPair(MCInstrInfo const &MCII,
      return false;
  }

-  if (STI.getCPU().equals_lower("hexagonv4") ||
-      STI.getCPU().equals_lower("hexagonv5") ||
+  if (STI.getCPU().equals_lower("hexagonv5") ||
      STI.getCPU().equals_lower("hexagonv55") ||
      STI.getCPU().equals_lower("hexagonv60")) {
    // If a store appears, it must be in slot 0 (MIa) 1st, and then slot 1 (MIb);
--- a/llvm/lib/Target/Hexagon/MCTargetDesc/HexagonMCTargetDesc.cpp
+++ b/llvm/lib/Target/Hexagon/MCTargetDesc/HexagonMCTargetDesc.cpp
@ -61,8 +61,6 @@ cl::opt<bool> llvm::HexagonDisableDuplex
   cl::desc("Disable looking for duplex instructions for Hexagon"));

 namespace { // These flags are to be deprecated
-cl::opt<bool> MV4("mv4", cl::Hidden, cl::desc("Build for Hexagon V4"),
-                  cl::init(false));
 cl::opt<bool> MV5("mv5", cl::Hidden, cl::desc("Build for Hexagon V5"),
                  cl::init(false));
 cl::opt<bool> MV55("mv55", cl::Hidden, cl::desc("Build for Hexagon V55"),
@ -83,18 +81,18 @@ cl::opt<Hexagon::ArchEnum>
        clEnumValN(Hexagon::ArchEnum::V62, "v62", "Build for HVX v62"),
        clEnumValN(Hexagon::ArchEnum::V65, "v65", "Build for HVX v65"),
        // Sentinal for no value specified
-        clEnumValN(Hexagon::ArchEnum::V5, "", "")),
+        clEnumValN(Hexagon::ArchEnum::Generic, "", "")),
      // Sentinal for flag not present
-      cl::init(Hexagon::ArchEnum::V4), cl::ValueOptional);
+      cl::init(Hexagon::ArchEnum::NoArch), cl::ValueOptional);
+
 static cl::opt<bool>
-  DisableHVX("mno-hvx", cl::Hidden, cl::desc("Disable Hexagon Vector eXtensions"));
+  DisableHVX("mno-hvx", cl::Hidden,
+             cl::desc("Disable Hexagon Vector eXtensions"));


 static StringRef DefaultArch = "hexagonv60";

 static StringRef HexagonGetArchVariant() {
-  if (MV4)
-    return "hexagonv4";
  if (MV5)
    return "hexagonv5";
  if (MV55)
@ -123,7 +121,7 @@ StringRef Hexagon_MC::selectHexagonCPU(StringRef CPU) {
  return ArchV;
 }

-unsigned llvm::HexagonGetLastSlot() { return HexagonItinerariesV4FU::SLOT3; }
+unsigned llvm::HexagonGetLastSlot() { return HexagonItinerariesV5FU::SLOT3; }

 namespace {

@ -279,6 +277,7 @@ std::string selectHexagonFS(StringRef CPU, StringRef FS) {
    Result.push_back(FS);

  switch (EnableHVX) {
+  case Hexagon::ArchEnum::V5:
  case Hexagon::ArchEnum::V55:
    break;
  case Hexagon::ArchEnum::V60:
@ -290,14 +289,14 @@ std::string selectHexagonFS(StringRef CPU, StringRef FS) {
  case Hexagon::ArchEnum::V65:
    Result.push_back("+hvxv65");
    break;
-  case Hexagon::ArchEnum::V5:{
+  case Hexagon::ArchEnum::Generic:{
    Result.push_back(StringSwitch<StringRef>(CPU)
             .Case("hexagonv60", "+hvxv60")
             .Case("hexagonv62", "+hvxv62")
             .Case("hexagonv65", "+hvxv65"));
    break;
  }
-  case Hexagon::ArchEnum::V4:
+  case Hexagon::ArchEnum::NoArch:
    // Sentinal if -mhvx isn't specified
    break;
  }
@ -307,15 +306,9 @@ std::string selectHexagonFS(StringRef CPU, StringRef FS) {

 static bool isCPUValid(std::string CPU)
 {
-  std::vector<std::string> table
-  {
-    "generic",
-    "hexagonv4",
-    "hexagonv5",
-    "hexagonv55",
-    "hexagonv60",
-    "hexagonv62",
-    "hexagonv65",
+  std::vector<std::string> table {
+    "generic",    "hexagonv5",  "hexagonv55", "hexagonv60",
+    "hexagonv62", "hexagonv65",
  };

  return std::find(table.begin(), table.end(), CPU) != table.end();
@ -336,8 +329,8 @@ FeatureBitset Hexagon_MC::completeHVXFeatures(const FeatureBitset &S) {
  // Make sure that +hvx-length turns hvx on, and that "hvx" alone
  // turns on hvxvNN, corresponding to the existing ArchVNN.
  FeatureBitset FB = S;
-  unsigned CpuArch = ArchV4;
-  for (unsigned F : {ArchV65, ArchV62, ArchV60, ArchV55, ArchV5, ArchV4}) {
+  unsigned CpuArch = ArchV5;
+  for (unsigned F : {ArchV65, ArchV62, ArchV60, ArchV55, ArchV5}) {
    if (!FB.test(F))
      continue;
    CpuArch = F;
@ -402,7 +395,6 @@ MCSubtargetInfo *Hexagon_MC::createHexagonMCSubtargetInfo(const Triple &TT,

 unsigned Hexagon_MC::GetELFFlags(const MCSubtargetInfo &STI) {
  static std::map<StringRef,unsigned> ElfFlags = {
-    {"hexagonv4",  ELF::EF_HEXAGON_MACH_V4},
    {"hexagonv5",  ELF::EF_HEXAGON_MACH_V5},
    {"hexagonv55", ELF::EF_HEXAGON_MACH_V55},
    {"hexagonv60", ELF::EF_HEXAGON_MACH_V60},
--- a/llvm/test/CodeGen/Hexagon/cfi-late.ll
+++ b/llvm/test/CodeGen/Hexagon/cfi-late.ll
@ -32,8 +32,8 @@ declare i32 @bar(i32, i32) #1
 ; Function Attrs: nounwind readnone
 declare void @llvm.dbg.value(metadata, i64, metadata, metadata) #2

-attributes #0 = { nounwind "disable-tail-calls"="false" "less-precise-fpmad"="false" "no-frame-pointer-elim"="true" "no-frame-pointer-elim-non-leaf" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "stack-protector-buffer-size"="8" "target-cpu"="hexagonv4" "unsafe-fp-math"="false" "use-soft-float"="false" }
-attributes #1 = { "disable-tail-calls"="false" "less-precise-fpmad"="false" "no-frame-pointer-elim"="true" "no-frame-pointer-elim-non-leaf" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "stack-protector-buffer-size"="8" "target-cpu"="hexagonv4" "unsafe-fp-math"="false" "use-soft-float"="false" }
+attributes #0 = { nounwind "target-cpu"="hexagonv5" }
+attributes #1 = { "target-cpu"="hexagonv5" }
 attributes #2 = { nounwind readnone }
 attributes #3 = { nounwind }

--- a/llvm/test/CodeGen/Hexagon/double.ll
+++ b/llvm/test/CodeGen/Hexagon/double.ll
@ -1,22 +1,24 @@
-; RUN: llc -march=hexagon -mcpu=hexagonv4 < %s | FileCheck %s
+; RUN: llc -march=hexagon < %s | FileCheck %s
 ; CHECK: __hexagon_adddf3
 ; CHECK: __hexagon_subdf3

-define void @foo(double* %acc, double %num, double %num2) nounwind {
-entry:
-  %acc.addr = alloca double*, align 4
-  %num.addr = alloca double, align 8
-  %num2.addr = alloca double, align 8
-  store double* %acc, double** %acc.addr, align 4
-  store double %num, double* %num.addr, align 8
-  store double %num2, double* %num2.addr, align 8
-  %0 = load double*, double** %acc.addr, align 4
-  %1 = load double, double* %0
-  %2 = load double, double* %num.addr, align 8
-  %add = fadd double %1, %2
-  %3 = load double, double* %num2.addr, align 8
-  %sub = fsub double %add, %3
-  %4 = load double*, double** %acc.addr, align 4
-  store double %sub, double* %4
+define void @f0(double* %a0, double %a1, double %a2) #0 {
+b0:
+  %v0 = alloca double*, align 4
+  %v1 = alloca double, align 8
+  %v2 = alloca double, align 8
+  store double* %a0, double** %v0, align 4
+  store double %a1, double* %v1, align 8
+  store double %a2, double* %v2, align 8
+  %v3 = load double*, double** %v0, align 4
+  %v4 = load double, double* %v3
+  %v5 = load double, double* %v1, align 8
+  %v6 = fadd double %v4, %v5
+  %v7 = load double, double* %v2, align 8
+  %v8 = fsub double %v6, %v7
+  %v9 = load double*, double** %v0, align 4
+  store double %v8, double* %v9
  ret void
 }
+
+attributes #0 = { nounwind "target-cpu"="hexagonv5" }
--- a/llvm/test/CodeGen/Hexagon/float.ll
+++ b/llvm/test/CodeGen/Hexagon/float.ll
@ -1,22 +1,24 @@
-; RUN: llc -march=hexagon -mcpu=hexagonv4 < %s | FileCheck %s
-; CHECK: __hexagon_addsf3
-; CHECK: __hexagon_subsf3
+; RUN: llc -march=hexagon < %s | FileCheck %s
+; CHECK: sfadd
+; CHECK: sfsub

-define void @foo(float* %acc, float %num, float %num2) nounwind {
-entry:
-  %acc.addr = alloca float*, align 4
-  %num.addr = alloca float, align 4
-  %num2.addr = alloca float, align 4
-  store float* %acc, float** %acc.addr, align 4
-  store float %num, float* %num.addr, align 4
-  store float %num2, float* %num2.addr, align 4
-  %0 = load float*, float** %acc.addr, align 4
-  %1 = load float, float* %0
-  %2 = load float, float* %num.addr, align 4
-  %add = fadd float %1, %2
-  %3 = load float, float* %num2.addr, align 4
-  %sub = fsub float %add, %3
-  %4 = load float*, float** %acc.addr, align 4
-  store float %sub, float* %4
+define void @f0(float* %a0, float %a1, float %a2) #0 {
+b0:
+  %v0 = alloca float*, align 4
+  %v1 = alloca float, align 4
+  %v2 = alloca float, align 4
+  store float* %a0, float** %v0, align 4
+  store float %a1, float* %v1, align 4
+  store float %a2, float* %v2, align 4
+  %v3 = load float*, float** %v0, align 4
+  %v4 = load float, float* %v3
+  %v5 = load float, float* %v1, align 4
+  %v6 = fadd float %v4, %v5
+  %v7 = load float, float* %v2, align 4
+  %v8 = fsub float %v6, %v7
+  %v9 = load float*, float** %v0, align 4
+  store float %v8, float* %v9
  ret void
 }
+
+attributes #0 = { nounwind "target-cpu"="hexagonv5" }
--- a/llvm/test/CodeGen/Hexagon/floatconvert-ieee-rnd-near.ll
+++ b/llvm/test/CodeGen/Hexagon/floatconvert-ieee-rnd-near.ll
@ -1,22 +1,24 @@
-; RUN: llc -march=hexagon -mcpu=hexagonv4 < %s | FileCheck %s
-; CHECK: __hexagon_addsf3
-; CHECK: __hexagon_subsf3
+; RUN: llc -march=hexagon < %s | FileCheck %s
+; CHECK: sfadd
+; CHECK: sfsub

-define void @foo(float* %acc, float %num, float %num2) nounwind {
-entry:
-  %acc.addr = alloca float*, align 4
-  %num.addr = alloca float, align 4
-  %num2.addr = alloca float, align 4
-  store float* %acc, float** %acc.addr, align 4
-  store float %num, float* %num.addr, align 4
-  store float %num2, float* %num2.addr, align 4
-  %0 = load float*, float** %acc.addr, align 4
-  %1 = load float, float* %0
-  %2 = load float, float* %num.addr, align 4
-  %add = fadd float %1, %2
-  %3 = load float, float* %num2.addr, align 4
-  %sub = fsub float %add, %3
-  %4 = load float*, float** %acc.addr, align 4
-  store float %sub, float* %4
+define void @f0(float* %a0, float %a1, float %a2) #0 {
+b0:
+  %v0 = alloca float*, align 4
+  %v1 = alloca float, align 4
+  %v2 = alloca float, align 4
+  store float* %a0, float** %v0, align 4
+  store float %a1, float* %v1, align 4
+  store float %a2, float* %v2, align 4
+  %v3 = load float*, float** %v0, align 4
+  %v4 = load float, float* %v3
+  %v5 = load float, float* %v1, align 4
+  %v6 = fadd float %v4, %v5
+  %v7 = load float, float* %v2, align 4
+  %v8 = fsub float %v6, %v7
+  %v9 = load float*, float** %v0, align 4
+  store float %v8, float* %v9
  ret void
 }
+
+attributes #0 = { nounwind "target-cpu"="hexagonv5" }
--- a/llvm/test/CodeGen/Hexagon/gp-plus-offset-load.ll
+++ b/llvm/test/CodeGen/Hexagon/gp-plus-offset-load.ll
@ -1,51 +1,57 @@
-; RUN: llc -march=hexagon -mcpu=hexagonv4 < %s | FileCheck %s
+; RUN: llc -march=hexagon < %s | FileCheck %s
 ; Check that we generate load instructions with global + offset

-%struct.struc = type { i8, i8, i16, i32 }

-@foo = common global %struct.struc zeroinitializer, align 4
+%s.0 = type { i8, i8, i16, i32 }

-define void @loadWord(i32 %val1, i32 %val2, i32* nocapture %ival) nounwind {
-; CHECK: r{{[0-9]+}} = memw(##foo+4)
-entry:
-  %cmp = icmp sgt i32 %val1, %val2
-  br i1 %cmp, label %if.then, label %if.end
+@g0 = common global %s.0 zeroinitializer, align 4

-if.then:                                          ; preds = %entry
-  %0 = load i32, i32* getelementptr inbounds (%struct.struc, %struct.struc* @foo, i32 0, i32 3), align 4
-  store i32 %0, i32* %ival, align 4
-  br label %if.end
+; CHECK-LABEL: f0:
+; CHECK: r{{[0-9]+}} = memw(##g0+4)
+define void @f0(i32 %a0, i32 %a1, i32* nocapture %a2) #0 {
+b0:
+  %v0 = icmp sgt i32 %a0, %a1
+  br i1 %v0, label %b1, label %b2

-if.end:                                           ; preds = %if.then, %entry
+b1:                                               ; preds = %b0
+  %v1 = load i32, i32* getelementptr inbounds (%s.0, %s.0* @g0, i32 0, i32 3), align 4
+  store i32 %v1, i32* %a2, align 4
+  br label %b2
+
+b2:                                               ; preds = %b1, %b0
  ret void
 }

-define void @loadByte(i32 %val1, i32 %val2, i8* nocapture %ival) nounwind {
-; CHECK: r{{[0-9]+}} = memub(##foo+1)
-entry:
-  %cmp = icmp sgt i32 %val1, %val2
-  br i1 %cmp, label %if.then, label %if.end
+; CHECK-LABEL: f1:
+; CHECK: r{{[0-9]+}} = memub(##g0+1)
+define void @f1(i32 %a0, i32 %a1, i8* nocapture %a2) #0 {
+b0:
+  %v0 = icmp sgt i32 %a0, %a1
+  br i1 %v0, label %b1, label %b2

-if.then:                                          ; preds = %entry
-  %0 = load i8, i8* getelementptr inbounds (%struct.struc, %struct.struc* @foo, i32 0, i32 1), align 1
-  store i8 %0, i8* %ival, align 1
-  br label %if.end
+b1:                                               ; preds = %b0
+  %v1 = load i8, i8* getelementptr inbounds (%s.0, %s.0* @g0, i32 0, i32 1), align 1
+  store i8 %v1, i8* %a2, align 1
+  br label %b2

-if.end:                                           ; preds = %if.then, %entry
+b2:                                               ; preds = %b1, %b0
  ret void
 }

-define void @loadHWord(i32 %val1, i32 %val2, i16* %ival) nounwind {
-; CHECK: r{{[0-9]+}} = memuh(##foo+2)
-entry:
-  %cmp = icmp sgt i32 %val1, %val2
-  br i1 %cmp, label %if.then, label %if.end
+; CHECK-LABEL: f2:
+; CHECK: r{{[0-9]+}} = memuh(##g0+2)
+define void @f2(i32 %a0, i32 %a1, i16* %a2) #0 {
+b0:
+  %v0 = icmp sgt i32 %a0, %a1
+  br i1 %v0, label %b1, label %b2

-if.then:                                          ; preds = %entry
-  %0 = load i16, i16* getelementptr inbounds (%struct.struc, %struct.struc* @foo, i32 0, i32 2), align 2
-  store i16 %0, i16* %ival, align 2
-  br label %if.end
+b1:                                               ; preds = %b0
+  %v1 = load i16, i16* getelementptr inbounds (%s.0, %s.0* @g0, i32 0, i32 2), align 2
+  store i16 %v1, i16* %a2, align 2
+  br label %b2

-if.end:                                           ; preds = %if.then, %entry
+b2:                                               ; preds = %b1, %b0
  ret void
 }
+
+attributes #0 = { nounwind "target-cpu"="hexagonv5" }
--- a/llvm/test/CodeGen/Hexagon/gp-plus-offset-store.ll
+++ b/llvm/test/CodeGen/Hexagon/gp-plus-offset-store.ll
@ -1,35 +1,38 @@
-; RUN: llc -march=hexagon -mcpu=hexagonv4 < %s | FileCheck %s
+; RUN: llc -march=hexagon < %s | FileCheck %s
 ; Check that we generate store instructions with global + offset

-%struct.struc = type { i8, i8, i16, i32 }
+%s.0 = type { i8, i8, i16, i32 }

-@foo = common global %struct.struc zeroinitializer, align 4
+@g0 = common global %s.0 zeroinitializer, align 4

-define void @storeByte(i32 %val1, i32 %val2, i8 zeroext %ival) nounwind {
-; CHECK: memb(##foo+1) = r{{[0-9]+}}
-entry:
-  %cmp = icmp sgt i32 %val1, %val2
-  br i1 %cmp, label %if.then, label %if.end
+; CHECK-LABEL: f0:
+; CHECK: memb(##g0+1) = r{{[0-9]+}}
+define void @f0(i32 %a0, i32 %a1, i8 zeroext %a2) #0 {
+b0:
+  %v0 = icmp sgt i32 %a0, %a1
+  br i1 %v0, label %b1, label %b2

-if.then:                                          ; preds = %entry
-  store i8 %ival, i8* getelementptr inbounds (%struct.struc, %struct.struc* @foo, i32 0, i32 1), align 1
-  br label %if.end
+b1:                                               ; preds = %b0
+  store i8 %a2, i8* getelementptr inbounds (%s.0, %s.0* @g0, i32 0, i32 1), align 1
+  br label %b2

-if.end:                                           ; preds = %if.then, %entry
+b2:                                               ; preds = %b1, %b0
  ret void
 }

-define void @storeHW(i32 %val1, i32 %val2, i16 signext %ival) nounwind {
-; CHECK: memh(##foo+2) = r{{[0-9]+}}
-entry:
-  %cmp = icmp sgt i32 %val1, %val2
-  br i1 %cmp, label %if.then, label %if.end
+; CHECK-LABEL: f1:
+; CHECK: memh(##g0+2) = r{{[0-9]+}}
+define void @f1(i32 %a0, i32 %a1, i16 signext %a2) #0 {
+b0:
+  %v0 = icmp sgt i32 %a0, %a1
+  br i1 %v0, label %b1, label %b2

-if.then:                                          ; preds = %entry
-  store i16 %ival, i16* getelementptr inbounds (%struct.struc, %struct.struc* @foo, i32 0, i32 2), align 2
-  br label %if.end
+b1:                                               ; preds = %b0
+  store i16 %a2, i16* getelementptr inbounds (%s.0, %s.0* @g0, i32 0, i32 2), align 2
+  br label %b2

-if.end:                                           ; preds = %if.then, %entry
+b2:                                               ; preds = %b1, %b0
  ret void
 }

+attributes #0 = { nounwind "target-cpu"="hexagonv5" }
--- a/llvm/test/CodeGen/Hexagon/gp-rel.ll
+++ b/llvm/test/CodeGen/Hexagon/gp-rel.ll
@ -1,33 +1,36 @@
-; RUN: llc -march=hexagon -mcpu=hexagonv4 < %s | FileCheck %s
+; RUN: llc -march=hexagon < %s | FileCheck %s
 ; Check that gp-relative instructions are being generated.

-@a = common global i32 0, align 4
-@b = common global i32 0, align 4
-@c = common global i32 0, align 4
+; CHECK: r{{[0-9]+}} = memw(gp+#g0)
+; CHECK: r{{[0-9]+}} = memw(gp+#g1)
+; CHECK: if (p{{[0-3]}}) memw(##g2) = r{{[0-9]+}}

-define i32 @foo(i32 %p) #0 {
-entry:
-; CHECK: r{{[0-9]+}} = memw(gp+#a)
-; CHECK: r{{[0-9]+}} = memw(gp+#b)
-; CHECK: if (p{{[0-3]}}) memw(##c) = r{{[0-9]+}}
-  %0 = load i32, i32* @a, align 4
-  %1 = load i32, i32* @b, align 4
-  %add = add nsw i32 %1, %0
-  %cmp = icmp eq i32 %0, %1
-  br i1 %cmp, label %if.then, label %entry.if.end_crit_edge
+@g0 = common global i32 0, align 4
+@g1 = common global i32 0, align 4
+@g2 = common global i32 0, align 4

-entry.if.end_crit_edge:
-  %.pre = load i32, i32* @c, align 4
-  br label %if.end
+define i32 @f0(i32 %a0) #0 {
+b0:
+  %v0 = load i32, i32* @g0, align 4
+  %v1 = load i32, i32* @g1, align 4
+  %v2 = add nsw i32 %v1, %v0
+  %v3 = icmp eq i32 %v0, %v1
+  br i1 %v3, label %b2, label %b1

-if.then:
-  %add1 = add nsw i32 %add, %0
-  store i32 %add1, i32* @c, align 4
-  br label %if.end
+b1:                                               ; preds = %b0
+  %v4 = load i32, i32* @g2, align 4
+  br label %b3

-if.end:
-  %2 = phi i32 [ %.pre, %entry.if.end_crit_edge ], [ %add1, %if.then ]
-  %cmp2 = icmp eq i32 %add, %2
-  %sel1 = select i1 %cmp2, i32 %2, i32 %1
-  ret i32 %sel1
+b2:                                               ; preds = %b0
+  %v5 = add nsw i32 %v2, %v0
+  store i32 %v5, i32* @g2, align 4
+  br label %b3
+
+b3:                                               ; preds = %b2, %b1
+  %v6 = phi i32 [ %v4, %b1 ], [ %v5, %b2 ]
+  %v7 = icmp eq i32 %v2, %v6
+  %v8 = select i1 %v7, i32 %v6, i32 %v1
+  ret i32 %v8
 }
+
+attributes #0 = { nounwind "target-cpu"="hexagonv5" }
--- a/llvm/test/CodeGen/Hexagon/hwloop-cleanup.ll
+++ b/llvm/test/CodeGen/Hexagon/hwloop-cleanup.ll
@ -1,87 +1,91 @@
-; RUN: llc -march=hexagon -mcpu=hexagonv4 -no-phi-elim-live-out-early-exit \
-; RUN:    < %s | FileCheck %s
+; RUN: llc -march=hexagon -no-phi-elim-live-out-early-exit < %s | FileCheck %s
 ; Check that we remove the compare and induction variable instructions
 ; after generating hardware loops.
 ; Bug 6685.

+; CHECK-LABEL: f0:
 ; CHECK: loop0
 ; CHECK-NOT: r{{[0-9]+}} = add(r{{[0-9]+}},#-1)
 ; CHECK-NOT: cmp.eq
 ; CHECK: endloop0

-define i32 @test1(i32* nocapture %b, i32 %n) nounwind readonly {
-entry:
-  %cmp1 = icmp sgt i32 %n, 0
-  br i1 %cmp1, label %for.body.preheader, label %for.end
+define i32 @f0(i32* nocapture %a0, i32 %a1) #0 {
+b0:
+  %v0 = icmp sgt i32 %a1, 0
+  br i1 %v0, label %b1, label %b4

-for.body.preheader:
-  br label %for.body
+b1:                                               ; preds = %b0
+  br label %b2

-for.body:                                         ; preds = %for.body.preheader, %for.body
-  %sum.03 = phi i32 [ %add, %for.body ], [ 0, %for.body.preheader ]
-  %arrayidx.phi = phi i32* [ %arrayidx.inc, %for.body ], [ %b, %for.body.preheader ]
-  %i.02 = phi i32 [ %inc, %for.body ], [ 0, %for.body.preheader ]
-  %0 = load i32, i32* %arrayidx.phi, align 4
-  %add = add nsw i32 %0, %sum.03
-  %inc = add nsw i32 %i.02, 1
-  %exitcond = icmp eq i32 %inc, %n
-  %arrayidx.inc = getelementptr i32, i32* %arrayidx.phi, i32 1
-  br i1 %exitcond, label %for.end.loopexit, label %for.body
+b2:                                               ; preds = %b2, %b1
+  %v1 = phi i32 [ %v5, %b2 ], [ 0, %b1 ]
+  %v2 = phi i32* [ %v8, %b2 ], [ %a0, %b1 ]
+  %v3 = phi i32 [ %v6, %b2 ], [ 0, %b1 ]
+  %v4 = load i32, i32* %v2, align 4
+  %v5 = add nsw i32 %v4, %v1
+  %v6 = add nsw i32 %v3, 1
+  %v7 = icmp eq i32 %v6, %a1
+  %v8 = getelementptr i32, i32* %v2, i32 1
+  br i1 %v7, label %b3, label %b2

-for.end.loopexit:
-  br label %for.end
+b3:                                               ; preds = %b2
+  br label %b4

-for.end:
-  %sum.0.lcssa = phi i32 [ 0, %entry ], [ %add, %for.end.loopexit ]
-  ret i32 %sum.0.lcssa
+b4:                                               ; preds = %b3, %b0
+  %v9 = phi i32 [ 0, %b0 ], [ %v5, %b3 ]
+  ret i32 %v9
 }

 ; This test checks that that initial loop count value is removed.
+; CHECK-LABEL: f1:
 ; CHECK-NOT: ={{.}}#40
 ; CHECK: loop0
 ; CHECK-NOT: r{{[0-9]+}} = add(r{{[0-9]+}},#-1)
 ; CHECK-NOT: cmp.eq
 ; CHECK: endloop0

-define i32 @test2(i32* nocapture %b) nounwind readonly {
-entry:
-  br label %for.body
+define i32 @f1(i32* nocapture %a0) #0 {
+b0:
+  br label %b1

-for.body:
-  %sum.02 = phi i32 [ 0, %entry ], [ %add, %for.body ]
-  %arrayidx.phi = phi i32* [ %b, %entry ], [ %arrayidx.inc, %for.body ]
-  %i.01 = phi i32 [ 0, %entry ], [ %inc, %for.body ]
-  %0 = load i32, i32* %arrayidx.phi, align 4
-  %add = add nsw i32 %0, %sum.02
-  %inc = add nsw i32 %i.01, 1
-  %exitcond = icmp eq i32 %inc, 40
-  %arrayidx.inc = getelementptr i32, i32* %arrayidx.phi, i32 1
-  br i1 %exitcond, label %for.end, label %for.body
+b1:                                               ; preds = %b1, %b0
+  %v0 = phi i32 [ 0, %b0 ], [ %v4, %b1 ]
+  %v1 = phi i32* [ %a0, %b0 ], [ %v7, %b1 ]
+  %v2 = phi i32 [ 0, %b0 ], [ %v5, %b1 ]
+  %v3 = load i32, i32* %v1, align 4
+  %v4 = add nsw i32 %v3, %v0
+  %v5 = add nsw i32 %v2, 1
+  %v6 = icmp eq i32 %v5, 40
+  %v7 = getelementptr i32, i32* %v1, i32 1
+  br i1 %v6, label %b2, label %b1

-for.end:
-  ret i32 %add
+b2:                                               ; preds = %b1
+  ret i32 %v4
 }

 ; This test checks that we don't remove the induction variable since it's used.
+; CHECK-LABEL: f2:
 ; CHECK: loop0
 ; CHECK: r{{[0-9]+}} = add(r{{[0-9]+}},#1)
 ; CHECK-NOT: cmp.eq
 ; CHECK: endloop0
-define i32 @test3(i32* nocapture %b) nounwind {
-entry:
-  br label %for.body

-for.body:
-  %arrayidx.phi = phi i32* [ %b, %entry ], [ %arrayidx.inc, %for.body ]
-  %i.01 = phi i32 [ 0, %entry ], [ %inc, %for.body ]
-  store i32 %i.01, i32* %arrayidx.phi, align 4
-  %inc = add nsw i32 %i.01, 1
-  %exitcond = icmp eq i32 %inc, 40
-  %arrayidx.inc = getelementptr i32, i32* %arrayidx.phi, i32 1
-  br i1 %exitcond, label %for.end, label %for.body
+define i32 @f2(i32* nocapture %a0) #1 {
+b0:
+  br label %b1

-for.end:
+b1:                                               ; preds = %b1, %b0
+  %v0 = phi i32* [ %a0, %b0 ], [ %v4, %b1 ]
+  %v1 = phi i32 [ 0, %b0 ], [ %v2, %b1 ]
+  store i32 %v1, i32* %v0, align 4
+  %v2 = add nsw i32 %v1, 1
+  %v3 = icmp eq i32 %v2, 40
+  %v4 = getelementptr i32, i32* %v0, i32 1
+  br i1 %v3, label %b2, label %b1
+
+b2:                                               ; preds = %b1
  ret i32 0
 }

-
+attributes #0 = { nounwind readonly "target-cpu"="hexagonv5" }
+attributes #1 = { nounwind "target-cpu"="hexagonv5" }
--- a/llvm/test/CodeGen/Hexagon/hwloop-const.ll
+++ b/llvm/test/CodeGen/Hexagon/hwloop-const.ll
@ -1,27 +1,27 @@
-; RUN: llc -march=hexagon -mcpu=hexagonv4 -O2 < %s | FileCheck %s
-; ModuleID = 'hwloop-const.c'
-target datalayout = "e-p:32:32:32-i64:64:64-i32:32:32-i16:16:16-i1:32:32-f64:64:64-f32:32:32-v64:64:64-v32:32:32-a0:0-n16:32"
+; RUN: llc -march=hexagon < %s | FileCheck %s
+; CHECK: endloop
+
 target triple = "hexagon-unknown-linux-gnu"

-@b = common global [25000 x i32] zeroinitializer, align 8
-@a = common global [25000 x i32] zeroinitializer, align 8
-@c = common global [25000 x i32] zeroinitializer, align 8
+@g0 = common global [25000 x i32] zeroinitializer, align 8
+@g1 = common global [25000 x i32] zeroinitializer, align 8

-define i32 @hwloop_bug() nounwind {
-entry:
-  br label %for.body
+define i32 @f0() #0 {
+b0:
+  br label %b1

-; CHECK: endloop
-for.body:                                         ; preds = %for.body, %entry
-  %i.02 = phi i32 [ 0, %entry ], [ %inc, %for.body ]
-  %arrayidx = getelementptr inbounds [25000 x i32], [25000 x i32]* @b, i32 0, i32 %i.02
-  store i32 %i.02, i32* %arrayidx, align 4
-  %arrayidx1 = getelementptr inbounds [25000 x i32], [25000 x i32]* @a, i32 0, i32 %i.02
-  store i32 %i.02, i32* %arrayidx1, align 4
-  %inc = add nsw i32 %i.02, 1
-  %exitcond = icmp eq i32 %inc, 25000
-  br i1 %exitcond, label %for.end, label %for.body
+b1:                                               ; preds = %b1, %b0
+  %v0 = phi i32 [ 0, %b0 ], [ %v3, %b1 ]
+  %v1 = getelementptr inbounds [25000 x i32], [25000 x i32]* @g0, i32 0, i32 %v0
+  store i32 %v0, i32* %v1, align 4
+  %v2 = getelementptr inbounds [25000 x i32], [25000 x i32]* @g1, i32 0, i32 %v0
+  store i32 %v0, i32* %v2, align 4
+  %v3 = add nsw i32 %v0, 1
+  %v4 = icmp eq i32 %v3, 25000
+  br i1 %v4, label %b2, label %b1

-for.end:                                          ; preds = %for.body
+b2:                                               ; preds = %b1
  ret i32 0
 }
+
+attributes #0 = { nounwind "target-cpu"="hexagonv5" }
--- a/llvm/test/CodeGen/Hexagon/hwloop-dbg.ll
+++ b/llvm/test/CodeGen/Hexagon/hwloop-dbg.ll
@ -1,63 +1,64 @@
-; RUN: llc < %s -march=hexagon -mcpu=hexagonv4 -O2 -disable-lsr | FileCheck %s
-; ModuleID = 'hwloop-dbg.o'
-target datalayout = "e-p:32:32:32-i64:64:64-i32:32:32-i16:16:16-i1:32:32-f64:64:64-f32:32:32-v64:64:64-v32:32:32-a0:0-n16:32"
-target triple = "hexagon"
+; RUN: llc < %s -march=hexagon -disable-lsr | FileCheck %s

-define void @foo(i32* nocapture %a, i32* nocapture %b) nounwind !dbg !5 {
-entry:
-  tail call void @llvm.dbg.value(metadata i32* %a, i64 0, metadata !13, metadata !DIExpression()), !dbg !17
-  tail call void @llvm.dbg.value(metadata i32* %b, i64 0, metadata !14, metadata !DIExpression()), !dbg !18
-  tail call void @llvm.dbg.value(metadata i32 0, i64 0, metadata !15, metadata !DIExpression()), !dbg !19
-  br label %for.body, !dbg !19
-
-for.body:                                         ; preds = %for.body, %entry
 ; CHECK:     loop0(
 ; CHECK-NOT: add({{r[0-9]*}}, #
 ; CHECK:     endloop0
-  %arrayidx.phi = phi i32* [ %a, %entry ], [ %arrayidx.inc, %for.body ]
-  %i.02 = phi i32 [ 0, %entry ], [ %inc, %for.body ]
-  %b.addr.01 = phi i32* [ %b, %entry ], [ %incdec.ptr, %for.body ]
-  %incdec.ptr = getelementptr inbounds i32, i32* %b.addr.01, i32 1, !dbg !21
-  tail call void @llvm.dbg.value(metadata i32* %incdec.ptr, i64 0, metadata !14, metadata !DIExpression()), !dbg !21
-  %0 = load i32, i32* %b.addr.01, align 4, !dbg !21
-  store i32 %0, i32* %arrayidx.phi, align 4, !dbg !21
-  %inc = add nsw i32 %i.02, 1, !dbg !26
-  tail call void @llvm.dbg.value(metadata i32 %inc, i64 0, metadata !15, metadata !DIExpression()), !dbg !26
-  %exitcond = icmp eq i32 %inc, 10, !dbg !19
-  %arrayidx.inc = getelementptr i32, i32* %arrayidx.phi, i32 1
-  br i1 %exitcond, label %for.end, label %for.body, !dbg !19

-for.end:                                          ; preds = %for.body
-  ret void, !dbg !27
+target datalayout = "e-p:32:32:32-i64:64:64-i32:32:32-i16:16:16-i1:32:32-f64:64:64-f32:32:32-v64:64:64-v32:32:32-a0:0-n16:32"
+target triple = "hexagon"
+
+define void @f0(i32* nocapture %a0, i32* nocapture %a1) #0 !dbg !4 {
+b0:
+  call void @llvm.dbg.value(metadata i32* %a0, metadata !10, metadata !DIExpression()), !dbg !14
+  call void @llvm.dbg.value(metadata i32* %a1, metadata !11, metadata !DIExpression()), !dbg !15
+  call void @llvm.dbg.value(metadata i32 0, metadata !12, metadata !DIExpression()), !dbg !16
+  br label %b1, !dbg !16
+
+b1:                                               ; preds = %b1, %b0
+  %v0 = phi i32* [ %a0, %b0 ], [ %v7, %b1 ]
+  %v1 = phi i32 [ 0, %b0 ], [ %v5, %b1 ]
+  %v2 = phi i32* [ %a1, %b0 ], [ %v3, %b1 ]
+  %v3 = getelementptr inbounds i32, i32* %v2, i32 1, !dbg !18
+  call void @llvm.dbg.value(metadata i32* %v3, metadata !11, metadata !DIExpression()), !dbg !18
+  %v4 = load i32, i32* %v2, align 4, !dbg !18
+  store i32 %v4, i32* %v0, align 4, !dbg !18
+  %v5 = add nsw i32 %v1, 1, !dbg !20
+  call void @llvm.dbg.value(metadata i32 %v5, metadata !12, metadata !DIExpression()), !dbg !20
+  %v6 = icmp eq i32 %v5, 10, !dbg !16
+  %v7 = getelementptr i32, i32* %v0, i32 1
+  br i1 %v6, label %b2, label %b1, !dbg !16
+
+b2:                                               ; preds = %b1
+  ret void, !dbg !21
 }

-declare void @llvm.dbg.value(metadata, i64, metadata, metadata) nounwind readnone
+declare void @llvm.dbg.value(metadata, metadata, metadata) #1

+attributes #0 = { nounwind "target-cpu"="hexagonv5" }
+attributes #1 = { nounwind readnone speculatable }

 !llvm.dbg.cu = !{!0}
-!llvm.module.flags = !{!29}
+!llvm.module.flags = !{!3}

-!0 = distinct !DICompileUnit(language: DW_LANG_C99, producer: "QuIC LLVM Hexagon Clang version 6.1-pre-unknown, (git://git-hexagon-aus.quicinc.com/llvm/clang-mainline.git e9382867661454cdf44addb39430741578e9765c) (llvm/llvm-mainline.git 36412bb1fcf03ed426d4437b41198bae066675ac)", isOptimized: true, emissionKind: FullDebug, file: !28, enums: !2, retainedTypes: !2, globals: !2)
+!0 = distinct !DICompileUnit(language: DW_LANG_C99, file: !1, producer: "QuIC LLVM Hexagon Clang", isOptimized: true, runtimeVersion: 0, emissionKind: FullDebug, enums: !2, retainedTypes: !2, globals: !2)
+!1 = !DIFile(filename: "hwloop-dbg.c", directory: "/test")
 !2 = !{}
-!5 = distinct !DISubprogram(name: "foo", line: 1, isLocal: false, isDefinition: true, virtualIndex: 6, flags: DIFlagPrototyped, isOptimized: true, unit: !0, scopeLine: 1, file: !28, scope: null, type: !7, retainedNodes: !11)
-!6 = !DIFile(filename: "hwloop-dbg.c", directory: "/usr2/kparzysz/s.hex/t")
-!7 = !DISubroutineType(types: !8)
-!8 = !{null, !9, !9}
-!9 = !DIDerivedType(tag: DW_TAG_pointer_type, size: 32, align: 32, baseType: !10)
-!10 = !DIBasicType(tag: DW_TAG_base_type, name: "int", size: 32, align: 32, encoding: DW_ATE_signed)
-!11 = !{!13, !14, !15}
-!13 = !DILocalVariable(name: "a", line: 1, arg: 1, scope: !5, file: !6, type: !9)
-!14 = !DILocalVariable(name: "b", line: 1, arg: 2, scope: !5, file: !6, type: !9)
-!15 = !DILocalVariable(name: "i", line: 2, scope: !16, file: !6, type: !10)
-!16 = distinct !DILexicalBlock(line: 1, column: 26, file: !28, scope: !5)
-!17 = !DILocation(line: 1, column: 15, scope: !5)
-!18 = !DILocation(line: 1, column: 23, scope: !5)
-!19 = !DILocation(line: 3, column: 8, scope: !20)
-!20 = distinct !DILexicalBlock(line: 3, column: 3, file: !28, scope: !16)
-!21 = !DILocation(line: 4, column: 5, scope: !22)
-!22 = distinct !DILexicalBlock(line: 3, column: 28, file: !28, scope: !20)
-!26 = !DILocation(line: 3, column: 23, scope: !20)
-!27 = !DILocation(line: 6, column: 1, scope: !16)
-!28 = !DIFile(filename: "hwloop-dbg.c", directory: "/usr2/kparzysz/s.hex/t")
-!29 = !{i32 1, !"Debug Info Version", i32 3}
-!30 = !{i32 0}
+!3 = !{i32 1, !"Debug Info Version", i32 3}
+!4 = distinct !DISubprogram(name: "foo", scope: null, file: !1, line: 1, type: !5, isLocal: false, isDefinition: true, scopeLine: 1, virtualIndex: 6, flags: DIFlagPrototyped, isOptimized: true, unit: !0, retainedNodes: !9)
+!5 = !DISubroutineType(types: !6)
+!6 = !{null, !7, !7}
+!7 = !DIDerivedType(tag: DW_TAG_pointer_type, baseType: !8, size: 32, align: 32)
+!8 = !DIBasicType(name: "int", size: 32, align: 32, encoding: DW_ATE_signed)
+!9 = !{!10, !11, !12}
+!10 = !DILocalVariable(name: "a", arg: 1, scope: !4, file: !1, line: 1, type: !7)
+!11 = !DILocalVariable(name: "b", arg: 2, scope: !4, file: !1, line: 1, type: !7)
+!12 = !DILocalVariable(name: "i", scope: !13, file: !1, line: 2, type: !8)
+!13 = distinct !DILexicalBlock(scope: !4, file: !1, line: 1, column: 26)
+!14 = !DILocation(line: 1, column: 15, scope: !4)
+!15 = !DILocation(line: 1, column: 23, scope: !4)
+!16 = !DILocation(line: 3, column: 8, scope: !17)
+!17 = distinct !DILexicalBlock(scope: !13, file: !1, line: 3, column: 3)
+!18 = !DILocation(line: 4, column: 5, scope: !19)
+!19 = distinct !DILexicalBlock(scope: !17, file: !1, line: 3, column: 28)
+!20 = !DILocation(line: 3, column: 23, scope: !17)
+!21 = !DILocation(line: 6, column: 1, scope: !13)
--- a/llvm/test/CodeGen/Hexagon/hwloop-le.ll
+++ b/llvm/test/CodeGen/Hexagon/hwloop-le.ll
@ -1,438 +1,408 @@
-; RUN: llc -march=hexagon -mcpu=hexagonv4 -O3 < %s | FileCheck %s
+; RUN: llc -march=hexagon -mcpu=hexagonv5 -O3 < %s | FileCheck %s

-
-; CHECK: test_pos1_ir_sle
+; CHECK-LABEL: f0:
 ; CHECK: loop0
 ; a < b
-define void @test_pos1_ir_sle(i8* nocapture %p, i32 %a, i32 %b) nounwind {
-entry:
-  %cmp3 = icmp sle i32 28395, %b
-  br i1 %cmp3, label %for.body.lr.ph, label %for.end
+define void @f0(i8* nocapture %a0, i32 %a1, i32 %a2) #0 {
+b0:
+  %v0 = icmp sle i32 28395, %a2
+  br i1 %v0, label %b1, label %b3

-for.body.lr.ph:                                   ; preds = %entry
-  br label %for.body
+b1:                                               ; preds = %b0
+  br label %b2

-for.body:                                         ; preds = %for.body.lr.ph, %for.body
-  %i.04 = phi i32 [ 28395, %for.body.lr.ph ], [ %inc, %for.body ]
-  %arrayidx = getelementptr inbounds i8, i8* %p, i32 %i.04
-  %0 = load i8, i8* %arrayidx, align 1
-  %conv = zext i8 %0 to i32
-  %add = add nsw i32 %conv, 1
-  %conv1 = trunc i32 %add to i8
-  store i8 %conv1, i8* %arrayidx, align 1
-  %inc = add nsw i32 %i.04, 1
-  %cmp = icmp sle i32 %inc, %b
-  br i1 %cmp, label %for.body, label %for.end
+b2:                                               ; preds = %b2, %b1
+  %v1 = phi i32 [ 28395, %b1 ], [ %v7, %b2 ]
+  %v2 = getelementptr inbounds i8, i8* %a0, i32 %v1
+  %v3 = load i8, i8* %v2, align 1
+  %v4 = zext i8 %v3 to i32
+  %v5 = add nsw i32 %v4, 1
+  %v6 = trunc i32 %v5 to i8
+  store i8 %v6, i8* %v2, align 1
+  %v7 = add nsw i32 %v1, 1
+  %v8 = icmp sle i32 %v7, %a2
+  br i1 %v8, label %b2, label %b3

-for.end:                                          ; preds = %for.body, %entry
+b3:                                               ; preds = %b2, %b0
  ret void
 }

-
-
-; CHECK: test_pos2_ir_sle
+; CHECK-LABEL: f1:
 ; CHECK: loop0
 ; a < b
-define void @test_pos2_ir_sle(i8* nocapture %p, i32 %a, i32 %b) nounwind {
-entry:
-  %cmp3 = icmp sle i32 9073, %b
-  br i1 %cmp3, label %for.body.lr.ph, label %for.end
+define void @f1(i8* nocapture %a0, i32 %a1, i32 %a2) #0 {
+b0:
+  %v0 = icmp sle i32 9073, %a2
+  br i1 %v0, label %b1, label %b3

-for.body.lr.ph:                                   ; preds = %entry
-  br label %for.body
+b1:                                               ; preds = %b0
+  br label %b2

-for.body:                                         ; preds = %for.body.lr.ph, %for.body
-  %i.04 = phi i32 [ 9073, %for.body.lr.ph ], [ %inc, %for.body ]
-  %arrayidx = getelementptr inbounds i8, i8* %p, i32 %i.04
-  %0 = load i8, i8* %arrayidx, align 1
-  %conv = zext i8 %0 to i32
-  %add = add nsw i32 %conv, 1
-  %conv1 = trunc i32 %add to i8
-  store i8 %conv1, i8* %arrayidx, align 1
-  %inc = add nsw i32 %i.04, 2
-  %cmp = icmp sle i32 %inc, %b
-  br i1 %cmp, label %for.body, label %for.end
+b2:                                               ; preds = %b2, %b1
+  %v1 = phi i32 [ 9073, %b1 ], [ %v7, %b2 ]
+  %v2 = getelementptr inbounds i8, i8* %a0, i32 %v1
+  %v3 = load i8, i8* %v2, align 1
+  %v4 = zext i8 %v3 to i32
+  %v5 = add nsw i32 %v4, 1
+  %v6 = trunc i32 %v5 to i8
+  store i8 %v6, i8* %v2, align 1
+  %v7 = add nsw i32 %v1, 2
+  %v8 = icmp sle i32 %v7, %a2
+  br i1 %v8, label %b2, label %b3

-for.end:                                          ; preds = %for.body, %entry
+b3:                                               ; preds = %b2, %b0
  ret void
 }

-
-
-; CHECK: test_pos4_ir_sle
+; CHECK-LABEL: f2:
 ; CHECK: loop0
 ; a < b
-define void @test_pos4_ir_sle(i8* nocapture %p, i32 %a, i32 %b) nounwind {
-entry:
-  %cmp3 = icmp sle i32 21956, %b
-  br i1 %cmp3, label %for.body.lr.ph, label %for.end
+define void @f2(i8* nocapture %a0, i32 %a1, i32 %a2) #0 {
+b0:
+  %v0 = icmp sle i32 21956, %a2
+  br i1 %v0, label %b1, label %b3

-for.body.lr.ph:                                   ; preds = %entry
-  br label %for.body
+b1:                                               ; preds = %b0
+  br label %b2

-for.body:                                         ; preds = %for.body.lr.ph, %for.body
-  %i.04 = phi i32 [ 21956, %for.body.lr.ph ], [ %inc, %for.body ]
-  %arrayidx = getelementptr inbounds i8, i8* %p, i32 %i.04
-  %0 = load i8, i8* %arrayidx, align 1
-  %conv = zext i8 %0 to i32
-  %add = add nsw i32 %conv, 1
-  %conv1 = trunc i32 %add to i8
-  store i8 %conv1, i8* %arrayidx, align 1
-  %inc = add nsw i32 %i.04, 4
-  %cmp = icmp sle i32 %inc, %b
-  br i1 %cmp, label %for.body, label %for.end
+b2:                                               ; preds = %b2, %b1
+  %v1 = phi i32 [ 21956, %b1 ], [ %v7, %b2 ]
+  %v2 = getelementptr inbounds i8, i8* %a0, i32 %v1
+  %v3 = load i8, i8* %v2, align 1
+  %v4 = zext i8 %v3 to i32
+  %v5 = add nsw i32 %v4, 1
+  %v6 = trunc i32 %v5 to i8
+  store i8 %v6, i8* %v2, align 1
+  %v7 = add nsw i32 %v1, 4
+  %v8 = icmp sle i32 %v7, %a2
+  br i1 %v8, label %b2, label %b3

-for.end:                                          ; preds = %for.body, %entry
+b3:                                               ; preds = %b2, %b0
  ret void
 }

-
-
-; CHECK: test_pos8_ir_sle
+; CHECK-LABEL: f3:
 ; CHECK: loop0
 ; a < b
-define void @test_pos8_ir_sle(i8* nocapture %p, i32 %a, i32 %b) nounwind {
-entry:
-  %cmp3 = icmp sle i32 16782, %b
-  br i1 %cmp3, label %for.body.lr.ph, label %for.end
+define void @f3(i8* nocapture %a0, i32 %a1, i32 %a2) #0 {
+b0:
+  %v0 = icmp sle i32 16782, %a2
+  br i1 %v0, label %b1, label %b3

-for.body.lr.ph:                                   ; preds = %entry
-  br label %for.body
+b1:                                               ; preds = %b0
+  br label %b2

-for.body:                                         ; preds = %for.body.lr.ph, %for.body
-  %i.04 = phi i32 [ 16782, %for.body.lr.ph ], [ %inc, %for.body ]
-  %arrayidx = getelementptr inbounds i8, i8* %p, i32 %i.04
-  %0 = load i8, i8* %arrayidx, align 1
-  %conv = zext i8 %0 to i32
-  %add = add nsw i32 %conv, 1
-  %conv1 = trunc i32 %add to i8
-  store i8 %conv1, i8* %arrayidx, align 1
-  %inc = add nsw i32 %i.04, 8
-  %cmp = icmp sle i32 %inc, %b
-  br i1 %cmp, label %for.body, label %for.end
+b2:                                               ; preds = %b2, %b1
+  %v1 = phi i32 [ 16782, %b1 ], [ %v7, %b2 ]
+  %v2 = getelementptr inbounds i8, i8* %a0, i32 %v1
+  %v3 = load i8, i8* %v2, align 1
+  %v4 = zext i8 %v3 to i32
+  %v5 = add nsw i32 %v4, 1
+  %v6 = trunc i32 %v5 to i8
+  store i8 %v6, i8* %v2, align 1
+  %v7 = add nsw i32 %v1, 8
+  %v8 = icmp sle i32 %v7, %a2
+  br i1 %v8, label %b2, label %b3

-for.end:                                          ; preds = %for.body, %entry
+b3:                                               ; preds = %b2, %b0
  ret void
 }

-
-
-; CHECK: test_pos16_ir_sle
+; CHECK-LABEL: f4:
 ; CHECK: loop0
 ; a < b
-define void @test_pos16_ir_sle(i8* nocapture %p, i32 %a, i32 %b) nounwind {
-entry:
-  %cmp3 = icmp sle i32 19097, %b
-  br i1 %cmp3, label %for.body.lr.ph, label %for.end
+define void @f4(i8* nocapture %a0, i32 %a1, i32 %a2) #0 {
+b0:
+  %v0 = icmp sle i32 19097, %a2
+  br i1 %v0, label %b1, label %b3

-for.body.lr.ph:                                   ; preds = %entry
-  br label %for.body
+b1:                                               ; preds = %b0
+  br label %b2

-for.body:                                         ; preds = %for.body.lr.ph, %for.body
-  %i.04 = phi i32 [ 19097, %for.body.lr.ph ], [ %inc, %for.body ]
-  %arrayidx = getelementptr inbounds i8, i8* %p, i32 %i.04
-  %0 = load i8, i8* %arrayidx, align 1
-  %conv = zext i8 %0 to i32
-  %add = add nsw i32 %conv, 1
-  %conv1 = trunc i32 %add to i8
-  store i8 %conv1, i8* %arrayidx, align 1
-  %inc = add nsw i32 %i.04, 16
-  %cmp = icmp sle i32 %inc, %b
-  br i1 %cmp, label %for.body, label %for.end
+b2:                                               ; preds = %b2, %b1
+  %v1 = phi i32 [ 19097, %b1 ], [ %v7, %b2 ]
+  %v2 = getelementptr inbounds i8, i8* %a0, i32 %v1
+  %v3 = load i8, i8* %v2, align 1
+  %v4 = zext i8 %v3 to i32
+  %v5 = add nsw i32 %v4, 1
+  %v6 = trunc i32 %v5 to i8
+  store i8 %v6, i8* %v2, align 1
+  %v7 = add nsw i32 %v1, 16
+  %v8 = icmp sle i32 %v7, %a2
+  br i1 %v8, label %b2, label %b3

-for.end:                                          ; preds = %for.body, %entry
+b3:                                               ; preds = %b2, %b0
  ret void
 }

-
-
-; CHECK: test_pos1_ri_sle
+; CHECK-LABEL: f5:
 ; CHECK: loop0
 ; a < b
-define void @test_pos1_ri_sle(i8* nocapture %p, i32 %a, i32 %b) nounwind {
-entry:
-  %cmp3 = icmp sle i32 %a, 14040
-  br i1 %cmp3, label %for.body.lr.ph, label %for.end
+define void @f5(i8* nocapture %a0, i32 %a1, i32 %a2) #0 {
+b0:
+  %v0 = icmp sle i32 %a1, 14040
+  br i1 %v0, label %b1, label %b3

-for.body.lr.ph:                                   ; preds = %entry
-  br label %for.body
+b1:                                               ; preds = %b0
+  br label %b2

-for.body:                                         ; preds = %for.body.lr.ph, %for.body
-  %i.04 = phi i32 [ %a, %for.body.lr.ph ], [ %inc, %for.body ]
-  %arrayidx = getelementptr inbounds i8, i8* %p, i32 %i.04
-  %0 = load i8, i8* %arrayidx, align 1
-  %conv = zext i8 %0 to i32
-  %add = add nsw i32 %conv, 1
-  %conv1 = trunc i32 %add to i8
-  store i8 %conv1, i8* %arrayidx, align 1
-  %inc = add nsw i32 %i.04, 1
-  %cmp = icmp sle i32 %inc, 14040
-  br i1 %cmp, label %for.body, label %for.end
+b2:                                               ; preds = %b2, %b1
+  %v1 = phi i32 [ %a1, %b1 ], [ %v7, %b2 ]
+  %v2 = getelementptr inbounds i8, i8* %a0, i32 %v1
+  %v3 = load i8, i8* %v2, align 1
+  %v4 = zext i8 %v3 to i32
+  %v5 = add nsw i32 %v4, 1
+  %v6 = trunc i32 %v5 to i8
+  store i8 %v6, i8* %v2, align 1
+  %v7 = add nsw i32 %v1, 1
+  %v8 = icmp sle i32 %v7, 14040
+  br i1 %v8, label %b2, label %b3

-for.end:                                          ; preds = %for.body, %entry
+b3:                                               ; preds = %b2, %b0
  ret void
 }

-
-
-; CHECK: test_pos2_ri_sle
+; CHECK-LABEL: f6:
 ; CHECK: loop0
 ; a < b
-define void @test_pos2_ri_sle(i8* nocapture %p, i32 %a, i32 %b) nounwind {
-entry:
-  %cmp3 = icmp sle i32 %a, 13710
-  br i1 %cmp3, label %for.body.lr.ph, label %for.end
+define void @f6(i8* nocapture %a0, i32 %a1, i32 %a2) #0 {
+b0:
+  %v0 = icmp sle i32 %a1, 13710
+  br i1 %v0, label %b1, label %b3

-for.body.lr.ph:                                   ; preds = %entry
-  br label %for.body
+b1:                                               ; preds = %b0
+  br label %b2

-for.body:                                         ; preds = %for.body.lr.ph, %for.body
-  %i.04 = phi i32 [ %a, %for.body.lr.ph ], [ %inc, %for.body ]
-  %arrayidx = getelementptr inbounds i8, i8* %p, i32 %i.04
-  %0 = load i8, i8* %arrayidx, align 1
-  %conv = zext i8 %0 to i32
-  %add = add nsw i32 %conv, 1
-  %conv1 = trunc i32 %add to i8
-  store i8 %conv1, i8* %arrayidx, align 1
-  %inc = add nsw i32 %i.04, 2
-  %cmp = icmp sle i32 %inc, 13710
-  br i1 %cmp, label %for.body, label %for.end
+b2:                                               ; preds = %b2, %b1
+  %v1 = phi i32 [ %a1, %b1 ], [ %v7, %b2 ]
+  %v2 = getelementptr inbounds i8, i8* %a0, i32 %v1
+  %v3 = load i8, i8* %v2, align 1
+  %v4 = zext i8 %v3 to i32
+  %v5 = add nsw i32 %v4, 1
+  %v6 = trunc i32 %v5 to i8
+  store i8 %v6, i8* %v2, align 1
+  %v7 = add nsw i32 %v1, 2
+  %v8 = icmp sle i32 %v7, 13710
+  br i1 %v8, label %b2, label %b3

-for.end:                                          ; preds = %for.body, %entry
+b3:                                               ; preds = %b2, %b0
  ret void
 }

-
-
-; CHECK: test_pos4_ri_sle
+; CHECK-LABEL: f7:
 ; CHECK: loop0
 ; a < b
-define void @test_pos4_ri_sle(i8* nocapture %p, i32 %a, i32 %b) nounwind {
-entry:
-  %cmp3 = icmp sle i32 %a, 9920
-  br i1 %cmp3, label %for.body.lr.ph, label %for.end
+define void @f7(i8* nocapture %a0, i32 %a1, i32 %a2) #0 {
+b0:
+  %v0 = icmp sle i32 %a1, 9920
+  br i1 %v0, label %b1, label %b3

-for.body.lr.ph:                                   ; preds = %entry
-  br label %for.body
+b1:                                               ; preds = %b0
+  br label %b2

-for.body:                                         ; preds = %for.body.lr.ph, %for.body
-  %i.04 = phi i32 [ %a, %for.body.lr.ph ], [ %inc, %for.body ]
-  %arrayidx = getelementptr inbounds i8, i8* %p, i32 %i.04
-  %0 = load i8, i8* %arrayidx, align 1
-  %conv = zext i8 %0 to i32
-  %add = add nsw i32 %conv, 1
-  %conv1 = trunc i32 %add to i8
-  store i8 %conv1, i8* %arrayidx, align 1
-  %inc = add nsw i32 %i.04, 4
-  %cmp = icmp sle i32 %inc, 9920
-  br i1 %cmp, label %for.body, label %for.end
+b2:                                               ; preds = %b2, %b1
+  %v1 = phi i32 [ %a1, %b1 ], [ %v7, %b2 ]
+  %v2 = getelementptr inbounds i8, i8* %a0, i32 %v1
+  %v3 = load i8, i8* %v2, align 1
+  %v4 = zext i8 %v3 to i32
+  %v5 = add nsw i32 %v4, 1
+  %v6 = trunc i32 %v5 to i8
+  store i8 %v6, i8* %v2, align 1
+  %v7 = add nsw i32 %v1, 4
+  %v8 = icmp sle i32 %v7, 9920
+  br i1 %v8, label %b2, label %b3

-for.end:                                          ; preds = %for.body, %entry
+b3:                                               ; preds = %b2, %b0
  ret void
 }

-
-
-; CHECK: test_pos8_ri_sle
+; CHECK-LABEL: f8:
 ; CHECK: loop0
 ; a < b
-define void @test_pos8_ri_sle(i8* nocapture %p, i32 %a, i32 %b) nounwind {
-entry:
-  %cmp3 = icmp sle i32 %a, 18924
-  br i1 %cmp3, label %for.body.lr.ph, label %for.end
+define void @f8(i8* nocapture %a0, i32 %a1, i32 %a2) #0 {
+b0:
+  %v0 = icmp sle i32 %a1, 18924
+  br i1 %v0, label %b1, label %b3

-for.body.lr.ph:                                   ; preds = %entry
-  br label %for.body
+b1:                                               ; preds = %b0
+  br label %b2

-for.body:                                         ; preds = %for.body.lr.ph, %for.body
-  %i.04 = phi i32 [ %a, %for.body.lr.ph ], [ %inc, %for.body ]
-  %arrayidx = getelementptr inbounds i8, i8* %p, i32 %i.04
-  %0 = load i8, i8* %arrayidx, align 1
-  %conv = zext i8 %0 to i32
-  %add = add nsw i32 %conv, 1
-  %conv1 = trunc i32 %add to i8
-  store i8 %conv1, i8* %arrayidx, align 1
-  %inc = add nsw i32 %i.04, 8
-  %cmp = icmp sle i32 %inc, 18924
-  br i1 %cmp, label %for.body, label %for.end
+b2:                                               ; preds = %b2, %b1
+  %v1 = phi i32 [ %a1, %b1 ], [ %v7, %b2 ]
+  %v2 = getelementptr inbounds i8, i8* %a0, i32 %v1
+  %v3 = load i8, i8* %v2, align 1
+  %v4 = zext i8 %v3 to i32
+  %v5 = add nsw i32 %v4, 1
+  %v6 = trunc i32 %v5 to i8
+  store i8 %v6, i8* %v2, align 1
+  %v7 = add nsw i32 %v1, 8
+  %v8 = icmp sle i32 %v7, 18924
+  br i1 %v8, label %b2, label %b3

-for.end:                                          ; preds = %for.body, %entry
+b3:                                               ; preds = %b2, %b0
  ret void
 }

-
-
-; CHECK: test_pos16_ri_sle
+; CHECK-LABEL: f9:
 ; CHECK: loop0
 ; a < b
-define void @test_pos16_ri_sle(i8* nocapture %p, i32 %a, i32 %b) nounwind {
-entry:
-  %cmp3 = icmp sle i32 %a, 11812
-  br i1 %cmp3, label %for.body.lr.ph, label %for.end
+define void @f9(i8* nocapture %a0, i32 %a1, i32 %a2) #0 {
+b0:
+  %v0 = icmp sle i32 %a1, 11812
+  br i1 %v0, label %b1, label %b3

-for.body.lr.ph:                                   ; preds = %entry
-  br label %for.body
+b1:                                               ; preds = %b0
+  br label %b2

-for.body:                                         ; preds = %for.body.lr.ph, %for.body
-  %i.04 = phi i32 [ %a, %for.body.lr.ph ], [ %inc, %for.body ]
-  %arrayidx = getelementptr inbounds i8, i8* %p, i32 %i.04
-  %0 = load i8, i8* %arrayidx, align 1
-  %conv = zext i8 %0 to i32
-  %add = add nsw i32 %conv, 1
-  %conv1 = trunc i32 %add to i8
-  store i8 %conv1, i8* %arrayidx, align 1
-  %inc = add nsw i32 %i.04, 16
-  %cmp = icmp sle i32 %inc, 11812
-  br i1 %cmp, label %for.body, label %for.end
+b2:                                               ; preds = %b2, %b1
+  %v1 = phi i32 [ %a1, %b1 ], [ %v7, %b2 ]
+  %v2 = getelementptr inbounds i8, i8* %a0, i32 %v1
+  %v3 = load i8, i8* %v2, align 1
+  %v4 = zext i8 %v3 to i32
+  %v5 = add nsw i32 %v4, 1
+  %v6 = trunc i32 %v5 to i8
+  store i8 %v6, i8* %v2, align 1
+  %v7 = add nsw i32 %v1, 16
+  %v8 = icmp sle i32 %v7, 11812
+  br i1 %v8, label %b2, label %b3

-for.end:                                          ; preds = %for.body, %entry
+b3:                                               ; preds = %b2, %b0
  ret void
 }

-
-
-; CHECK: test_pos1_rr_sle
+; CHECK-LABEL: f10:
 ; CHECK: loop0
 ; a < b
-define void @test_pos1_rr_sle(i8* nocapture %p, i32 %a, i32 %b) nounwind {
-entry:
-  %cmp3 = icmp sle i32 %a, %b
-  br i1 %cmp3, label %for.body.lr.ph, label %for.end
+define void @f10(i8* nocapture %a0, i32 %a1, i32 %a2) #0 {
+b0:
+  %v0 = icmp sle i32 %a1, %a2
+  br i1 %v0, label %b1, label %b3

-for.body.lr.ph:                                   ; preds = %entry
-  br label %for.body
+b1:                                               ; preds = %b0
+  br label %b2

-for.body:                                         ; preds = %for.body.lr.ph, %for.body
-  %i.04 = phi i32 [ %a, %for.body.lr.ph ], [ %inc, %for.body ]
-  %arrayidx = getelementptr inbounds i8, i8* %p, i32 %i.04
-  %0 = load i8, i8* %arrayidx, align 1
-  %conv = zext i8 %0 to i32
-  %add = add nsw i32 %conv, 1
-  %conv1 = trunc i32 %add to i8
-  store i8 %conv1, i8* %arrayidx, align 1
-  %inc = add nsw i32 %i.04, 1
-  %cmp = icmp sle i32 %inc, %b
-  br i1 %cmp, label %for.body, label %for.end
+b2:                                               ; preds = %b2, %b1
+  %v1 = phi i32 [ %a1, %b1 ], [ %v7, %b2 ]
+  %v2 = getelementptr inbounds i8, i8* %a0, i32 %v1
+  %v3 = load i8, i8* %v2, align 1
+  %v4 = zext i8 %v3 to i32
+  %v5 = add nsw i32 %v4, 1
+  %v6 = trunc i32 %v5 to i8
+  store i8 %v6, i8* %v2, align 1
+  %v7 = add nsw i32 %v1, 1
+  %v8 = icmp sle i32 %v7, %a2
+  br i1 %v8, label %b2, label %b3

-for.end:                                          ; preds = %for.body, %entry
+b3:                                               ; preds = %b2, %b0
  ret void
 }

-
-
-; CHECK: test_pos2_rr_sle
+; CHECK-LABEL: f11:
 ; CHECK: loop0
 ; a < b
-define void @test_pos2_rr_sle(i8* nocapture %p, i32 %a, i32 %b) nounwind {
-entry:
-  %cmp3 = icmp sle i32 %a, %b
-  br i1 %cmp3, label %for.body.lr.ph, label %for.end
+define void @f11(i8* nocapture %a0, i32 %a1, i32 %a2) #0 {
+b0:
+  %v0 = icmp sle i32 %a1, %a2
+  br i1 %v0, label %b1, label %b3

-for.body.lr.ph:                                   ; preds = %entry
-  br label %for.body
+b1:                                               ; preds = %b0
+  br label %b2

-for.body:                                         ; preds = %for.body.lr.ph, %for.body
-  %i.04 = phi i32 [ %a, %for.body.lr.ph ], [ %inc, %for.body ]
-  %arrayidx = getelementptr inbounds i8, i8* %p, i32 %i.04
-  %0 = load i8, i8* %arrayidx, align 1
-  %conv = zext i8 %0 to i32
-  %add = add nsw i32 %conv, 1
-  %conv1 = trunc i32 %add to i8
-  store i8 %conv1, i8* %arrayidx, align 1
-  %inc = add nsw i32 %i.04, 2
-  %cmp = icmp sle i32 %inc, %b
-  br i1 %cmp, label %for.body, label %for.end
+b2:                                               ; preds = %b2, %b1
+  %v1 = phi i32 [ %a1, %b1 ], [ %v7, %b2 ]
+  %v2 = getelementptr inbounds i8, i8* %a0, i32 %v1
+  %v3 = load i8, i8* %v2, align 1
+  %v4 = zext i8 %v3 to i32
+  %v5 = add nsw i32 %v4, 1
+  %v6 = trunc i32 %v5 to i8
+  store i8 %v6, i8* %v2, align 1
+  %v7 = add nsw i32 %v1, 2
+  %v8 = icmp sle i32 %v7, %a2
+  br i1 %v8, label %b2, label %b3

-for.end:                                          ; preds = %for.body, %entry
+b3:                                               ; preds = %b2, %b0
  ret void
 }

-
-
-; CHECK: test_pos4_rr_sle
+; CHECK-LABEL: f12:
 ; CHECK: loop0
 ; a < b
-define void @test_pos4_rr_sle(i8* nocapture %p, i32 %a, i32 %b) nounwind {
-entry:
-  %cmp3 = icmp sle i32 %a, %b
-  br i1 %cmp3, label %for.body.lr.ph, label %for.end
+define void @f12(i8* nocapture %a0, i32 %a1, i32 %a2) #0 {
+b0:
+  %v0 = icmp sle i32 %a1, %a2
+  br i1 %v0, label %b1, label %b3

-for.body.lr.ph:                                   ; preds = %entry
-  br label %for.body
+b1:                                               ; preds = %b0
+  br label %b2

-for.body:                                         ; preds = %for.body.lr.ph, %for.body
-  %i.04 = phi i32 [ %a, %for.body.lr.ph ], [ %inc, %for.body ]
-  %arrayidx = getelementptr inbounds i8, i8* %p, i32 %i.04
-  %0 = load i8, i8* %arrayidx, align 1
-  %conv = zext i8 %0 to i32
-  %add = add nsw i32 %conv, 1
-  %conv1 = trunc i32 %add to i8
-  store i8 %conv1, i8* %arrayidx, align 1
-  %inc = add nsw i32 %i.04, 4
-  %cmp = icmp sle i32 %inc, %b
-  br i1 %cmp, label %for.body, label %for.end
+b2:                                               ; preds = %b2, %b1
+  %v1 = phi i32 [ %a1, %b1 ], [ %v7, %b2 ]
+  %v2 = getelementptr inbounds i8, i8* %a0, i32 %v1
+  %v3 = load i8, i8* %v2, align 1
+  %v4 = zext i8 %v3 to i32
+  %v5 = add nsw i32 %v4, 1
+  %v6 = trunc i32 %v5 to i8
+  store i8 %v6, i8* %v2, align 1
+  %v7 = add nsw i32 %v1, 4
+  %v8 = icmp sle i32 %v7, %a2
+  br i1 %v8, label %b2, label %b3

-for.end:                                          ; preds = %for.body, %entry
+b3:                                               ; preds = %b2, %b0
  ret void
 }

-
-
-; CHECK: test_pos8_rr_sle
+; CHECK-LABEL: f13:
 ; CHECK: loop0
 ; a < b
-define void @test_pos8_rr_sle(i8* nocapture %p, i32 %a, i32 %b) nounwind {
-entry:
-  %cmp3 = icmp sle i32 %a, %b
-  br i1 %cmp3, label %for.body.lr.ph, label %for.end
+define void @f13(i8* nocapture %a0, i32 %a1, i32 %a2) #0 {
+b0:
+  %v0 = icmp sle i32 %a1, %a2
+  br i1 %v0, label %b1, label %b3

-for.body.lr.ph:                                   ; preds = %entry
-  br label %for.body
+b1:                                               ; preds = %b0
+  br label %b2

-for.body:                                         ; preds = %for.body.lr.ph, %for.body
-  %i.04 = phi i32 [ %a, %for.body.lr.ph ], [ %inc, %for.body ]
-  %arrayidx = getelementptr inbounds i8, i8* %p, i32 %i.04
-  %0 = load i8, i8* %arrayidx, align 1
-  %conv = zext i8 %0 to i32
-  %add = add nsw i32 %conv, 1
-  %conv1 = trunc i32 %add to i8
-  store i8 %conv1, i8* %arrayidx, align 1
-  %inc = add nsw i32 %i.04, 8
-  %cmp = icmp sle i32 %inc, %b
-  br i1 %cmp, label %for.body, label %for.end
+b2:                                               ; preds = %b2, %b1
+  %v1 = phi i32 [ %a1, %b1 ], [ %v7, %b2 ]
+  %v2 = getelementptr inbounds i8, i8* %a0, i32 %v1
+  %v3 = load i8, i8* %v2, align 1
+  %v4 = zext i8 %v3 to i32
+  %v5 = add nsw i32 %v4, 1
+  %v6 = trunc i32 %v5 to i8
+  store i8 %v6, i8* %v2, align 1
+  %v7 = add nsw i32 %v1, 8
+  %v8 = icmp sle i32 %v7, %a2
+  br i1 %v8, label %b2, label %b3

-for.end:                                          ; preds = %for.body, %entry
+b3:                                               ; preds = %b2, %b0
  ret void
 }

-
-
-; CHECK: test_pos16_rr_sle
+; CHECK-LABEL: f14:
 ; CHECK: loop0
 ; a < b
-define void @test_pos16_rr_sle(i8* nocapture %p, i32 %a, i32 %b) nounwind {
-entry:
-  %cmp3 = icmp sle i32 %a, %b
-  br i1 %cmp3, label %for.body.lr.ph, label %for.end
+define void @f14(i8* nocapture %a0, i32 %a1, i32 %a2) #0 {
+b0:
+  %v0 = icmp sle i32 %a1, %a2
+  br i1 %v0, label %b1, label %b3

-for.body.lr.ph:                                   ; preds = %entry
-  br label %for.body
+b1:                                               ; preds = %b0
+  br label %b2

-for.body:                                         ; preds = %for.body.lr.ph, %for.body
-  %i.04 = phi i32 [ %a, %for.body.lr.ph ], [ %inc, %for.body ]
-  %arrayidx = getelementptr inbounds i8, i8* %p, i32 %i.04
-  %0 = load i8, i8* %arrayidx, align 1
-  %conv = zext i8 %0 to i32
-  %add = add nsw i32 %conv, 1
-  %conv1 = trunc i32 %add to i8
-  store i8 %conv1, i8* %arrayidx, align 1
-  %inc = add nsw i32 %i.04, 16
-  %cmp = icmp sle i32 %inc, %b
-  br i1 %cmp, label %for.body, label %for.end
+b2:                                               ; preds = %b2, %b1
+  %v1 = phi i32 [ %a1, %b1 ], [ %v7, %b2 ]
+  %v2 = getelementptr inbounds i8, i8* %a0, i32 %v1
+  %v3 = load i8, i8* %v2, align 1
+  %v4 = zext i8 %v3 to i32
+  %v5 = add nsw i32 %v4, 1
+  %v6 = trunc i32 %v5 to i8
+  store i8 %v6, i8* %v2, align 1
+  %v7 = add nsw i32 %v1, 16
+  %v8 = icmp sle i32 %v7, %a2
+  br i1 %v8, label %b2, label %b3

-for.end:                                          ; preds = %for.body, %entry
+b3:                                               ; preds = %b2, %b0
  ret void
 }

-
-
+attributes #0 = { nounwind "target-cpu"="hexagonv5" }
--- a/llvm/test/CodeGen/Hexagon/hwloop-ne.ll
+++ b/llvm/test/CodeGen/Hexagon/hwloop-ne.ll
@ -1,438 +1,408 @@
-; RUN: llc -march=hexagon -mcpu=hexagonv4 -O3 < %s | FileCheck %s
+; RUN: llc -march=hexagon -O3 < %s | FileCheck %s

-
-; CHECK: test_pos1_ir_ne
+; CHECK-LABEL: f0:
 ; CHECK: loop0
 ; a < b
-define void @test_pos1_ir_ne(i8* nocapture %p, i32 %a, i32 %b) nounwind {
-entry:
-  %cmp3 = icmp slt i32 32623, %b
-  br i1 %cmp3, label %for.body.lr.ph, label %for.end
+define void @f0(i8* nocapture %a0, i32 %a1, i32 %a2) #0 {
+b0:
+  %v0 = icmp slt i32 32623, %a2
+  br i1 %v0, label %b1, label %b3

-for.body.lr.ph:                                   ; preds = %entry
-  br label %for.body
+b1:                                               ; preds = %b0
+  br label %b2

-for.body:                                         ; preds = %for.body.lr.ph, %for.body
-  %i.04 = phi i32 [ 32623, %for.body.lr.ph ], [ %inc, %for.body ]
-  %arrayidx = getelementptr inbounds i8, i8* %p, i32 %i.04
-  %0 = load i8, i8* %arrayidx, align 1
-  %conv = zext i8 %0 to i32
-  %add = add nsw i32 %conv, 1
-  %conv1 = trunc i32 %add to i8
-  store i8 %conv1, i8* %arrayidx, align 1
-  %inc = add nsw i32 %i.04, 1
-  %cmp = icmp ne i32 %inc, %b
-  br i1 %cmp, label %for.body, label %for.end
+b2:                                               ; preds = %b2, %b1
+  %v1 = phi i32 [ 32623, %b1 ], [ %v7, %b2 ]
+  %v2 = getelementptr inbounds i8, i8* %a0, i32 %v1
+  %v3 = load i8, i8* %v2, align 1
+  %v4 = zext i8 %v3 to i32
+  %v5 = add nsw i32 %v4, 1
+  %v6 = trunc i32 %v5 to i8
+  store i8 %v6, i8* %v2, align 1
+  %v7 = add nsw i32 %v1, 1
+  %v8 = icmp ne i32 %v7, %a2
+  br i1 %v8, label %b2, label %b3

-for.end:                                          ; preds = %for.body, %entry
+b3:                                               ; preds = %b2, %b0
  ret void
 }

-
-
-; CHECK: test_pos2_ir_ne
+; CHECK-LABEL: f1:
 ; CHECK: loop0
 ; a < b
-define void @test_pos2_ir_ne(i8* nocapture %p, i32 %a, i32 %b) nounwind {
-entry:
-  %cmp3 = icmp slt i32 29554, %b
-  br i1 %cmp3, label %for.body.lr.ph, label %for.end
+define void @f1(i8* nocapture %a0, i32 %a1, i32 %a2) #0 {
+b0:
+  %v0 = icmp slt i32 29554, %a2
+  br i1 %v0, label %b1, label %b3

-for.body.lr.ph:                                   ; preds = %entry
-  br label %for.body
+b1:                                               ; preds = %b0
+  br label %b2

-for.body:                                         ; preds = %for.body.lr.ph, %for.body
-  %i.04 = phi i32 [ 29554, %for.body.lr.ph ], [ %inc, %for.body ]
-  %arrayidx = getelementptr inbounds i8, i8* %p, i32 %i.04
-  %0 = load i8, i8* %arrayidx, align 1
-  %conv = zext i8 %0 to i32
-  %add = add nsw i32 %conv, 1
-  %conv1 = trunc i32 %add to i8
-  store i8 %conv1, i8* %arrayidx, align 1
-  %inc = add nsw i32 %i.04, 2
-  %cmp = icmp ne i32 %inc, %b
-  br i1 %cmp, label %for.body, label %for.end
+b2:                                               ; preds = %b2, %b1
+  %v1 = phi i32 [ 29554, %b1 ], [ %v7, %b2 ]
+  %v2 = getelementptr inbounds i8, i8* %a0, i32 %v1
+  %v3 = load i8, i8* %v2, align 1
+  %v4 = zext i8 %v3 to i32
+  %v5 = add nsw i32 %v4, 1
+  %v6 = trunc i32 %v5 to i8
+  store i8 %v6, i8* %v2, align 1
+  %v7 = add nsw i32 %v1, 2
+  %v8 = icmp ne i32 %v7, %a2
+  br i1 %v8, label %b2, label %b3

-for.end:                                          ; preds = %for.body, %entry
+b3:                                               ; preds = %b2, %b0
  ret void
 }

-
-
-; CHECK: test_pos4_ir_ne
+; CHECK-LABEL: f2:
 ; CHECK: loop0
 ; a < b
-define void @test_pos4_ir_ne(i8* nocapture %p, i32 %a, i32 %b) nounwind {
-entry:
-  %cmp3 = icmp slt i32 15692, %b
-  br i1 %cmp3, label %for.body.lr.ph, label %for.end
+define void @f2(i8* nocapture %a0, i32 %a1, i32 %a2) #0 {
+b0:
+  %v0 = icmp slt i32 15692, %a2
+  br i1 %v0, label %b1, label %b3

-for.body.lr.ph:                                   ; preds = %entry
-  br label %for.body
+b1:                                               ; preds = %b0
+  br label %b2

-for.body:                                         ; preds = %for.body.lr.ph, %for.body
-  %i.04 = phi i32 [ 15692, %for.body.lr.ph ], [ %inc, %for.body ]
-  %arrayidx = getelementptr inbounds i8, i8* %p, i32 %i.04
-  %0 = load i8, i8* %arrayidx, align 1
-  %conv = zext i8 %0 to i32
-  %add = add nsw i32 %conv, 1
-  %conv1 = trunc i32 %add to i8
-  store i8 %conv1, i8* %arrayidx, align 1
-  %inc = add nsw i32 %i.04, 4
-  %cmp = icmp ne i32 %inc, %b
-  br i1 %cmp, label %for.body, label %for.end
+b2:                                               ; preds = %b2, %b1
+  %v1 = phi i32 [ 15692, %b1 ], [ %v7, %b2 ]
+  %v2 = getelementptr inbounds i8, i8* %a0, i32 %v1
+  %v3 = load i8, i8* %v2, align 1
+  %v4 = zext i8 %v3 to i32
+  %v5 = add nsw i32 %v4, 1
+  %v6 = trunc i32 %v5 to i8
+  store i8 %v6, i8* %v2, align 1
+  %v7 = add nsw i32 %v1, 4
+  %v8 = icmp ne i32 %v7, %a2
+  br i1 %v8, label %b2, label %b3

-for.end:                                          ; preds = %for.body, %entry
+b3:                                               ; preds = %b2, %b0
  ret void
 }

-
-
-; CHECK: test_pos8_ir_ne
+; CHECK-LABEL: f3:
 ; CHECK: loop0
 ; a < b
-define void @test_pos8_ir_ne(i8* nocapture %p, i32 %a, i32 %b) nounwind {
-entry:
-  %cmp3 = icmp slt i32 10449, %b
-  br i1 %cmp3, label %for.body.lr.ph, label %for.end
+define void @f3(i8* nocapture %a0, i32 %a1, i32 %a2) #0 {
+b0:
+  %v0 = icmp slt i32 10449, %a2
+  br i1 %v0, label %b1, label %b3

-for.body.lr.ph:                                   ; preds = %entry
-  br label %for.body
+b1:                                               ; preds = %b0
+  br label %b2

-for.body:                                         ; preds = %for.body.lr.ph, %for.body
-  %i.04 = phi i32 [ 10449, %for.body.lr.ph ], [ %inc, %for.body ]
-  %arrayidx = getelementptr inbounds i8, i8* %p, i32 %i.04
-  %0 = load i8, i8* %arrayidx, align 1
-  %conv = zext i8 %0 to i32
-  %add = add nsw i32 %conv, 1
-  %conv1 = trunc i32 %add to i8
-  store i8 %conv1, i8* %arrayidx, align 1
-  %inc = add nsw i32 %i.04, 8
-  %cmp = icmp ne i32 %inc, %b
-  br i1 %cmp, label %for.body, label %for.end
+b2:                                               ; preds = %b2, %b1
+  %v1 = phi i32 [ 10449, %b1 ], [ %v7, %b2 ]
+  %v2 = getelementptr inbounds i8, i8* %a0, i32 %v1
+  %v3 = load i8, i8* %v2, align 1
+  %v4 = zext i8 %v3 to i32
+  %v5 = add nsw i32 %v4, 1
+  %v6 = trunc i32 %v5 to i8
+  store i8 %v6, i8* %v2, align 1
+  %v7 = add nsw i32 %v1, 8
+  %v8 = icmp ne i32 %v7, %a2
+  br i1 %v8, label %b2, label %b3

-for.end:                                          ; preds = %for.body, %entry
+b3:                                               ; preds = %b2, %b0
  ret void
 }

-
-
-; CHECK: test_pos16_ir_ne
+; CHECK-LABEL: f4:
 ; CHECK: loop0
 ; a < b
-define void @test_pos16_ir_ne(i8* nocapture %p, i32 %a, i32 %b) nounwind {
-entry:
-  %cmp3 = icmp slt i32 32087, %b
-  br i1 %cmp3, label %for.body.lr.ph, label %for.end
+define void @f4(i8* nocapture %a0, i32 %a1, i32 %a2) #0 {
+b0:
+  %v0 = icmp slt i32 32087, %a2
+  br i1 %v0, label %b1, label %b3

-for.body.lr.ph:                                   ; preds = %entry
-  br label %for.body
+b1:                                               ; preds = %b0
+  br label %b2

-for.body:                                         ; preds = %for.body.lr.ph, %for.body
-  %i.04 = phi i32 [ 32087, %for.body.lr.ph ], [ %inc, %for.body ]
-  %arrayidx = getelementptr inbounds i8, i8* %p, i32 %i.04
-  %0 = load i8, i8* %arrayidx, align 1
-  %conv = zext i8 %0 to i32
-  %add = add nsw i32 %conv, 1
-  %conv1 = trunc i32 %add to i8
-  store i8 %conv1, i8* %arrayidx, align 1
-  %inc = add nsw i32 %i.04, 16
-  %cmp = icmp ne i32 %inc, %b
-  br i1 %cmp, label %for.body, label %for.end
+b2:                                               ; preds = %b2, %b1
+  %v1 = phi i32 [ 32087, %b1 ], [ %v7, %b2 ]
+  %v2 = getelementptr inbounds i8, i8* %a0, i32 %v1
+  %v3 = load i8, i8* %v2, align 1
+  %v4 = zext i8 %v3 to i32
+  %v5 = add nsw i32 %v4, 1
+  %v6 = trunc i32 %v5 to i8
+  store i8 %v6, i8* %v2, align 1
+  %v7 = add nsw i32 %v1, 16
+  %v8 = icmp ne i32 %v7, %a2
+  br i1 %v8, label %b2, label %b3

-for.end:                                          ; preds = %for.body, %entry
+b3:                                               ; preds = %b2, %b0
  ret void
 }

-
-
-; CHECK: test_pos1_ri_ne
+; CHECK-LABEL: f5:
 ; CHECK: loop0
 ; a < b
-define void @test_pos1_ri_ne(i8* nocapture %p, i32 %a, i32 %b) nounwind {
-entry:
-  %cmp3 = icmp slt i32 %a, 3472
-  br i1 %cmp3, label %for.body.lr.ph, label %for.end
+define void @f5(i8* nocapture %a0, i32 %a1, i32 %a2) #0 {
+b0:
+  %v0 = icmp slt i32 %a1, 3472
+  br i1 %v0, label %b1, label %b3

-for.body.lr.ph:                                   ; preds = %entry
-  br label %for.body
+b1:                                               ; preds = %b0
+  br label %b2

-for.body:                                         ; preds = %for.body.lr.ph, %for.body
-  %i.04 = phi i32 [ %a, %for.body.lr.ph ], [ %inc, %for.body ]
-  %arrayidx = getelementptr inbounds i8, i8* %p, i32 %i.04
-  %0 = load i8, i8* %arrayidx, align 1
-  %conv = zext i8 %0 to i32
-  %add = add nsw i32 %conv, 1
-  %conv1 = trunc i32 %add to i8
-  store i8 %conv1, i8* %arrayidx, align 1
-  %inc = add nsw i32 %i.04, 1
-  %cmp = icmp ne i32 %inc, 3472
-  br i1 %cmp, label %for.body, label %for.end
+b2:                                               ; preds = %b2, %b1
+  %v1 = phi i32 [ %a1, %b1 ], [ %v7, %b2 ]
+  %v2 = getelementptr inbounds i8, i8* %a0, i32 %v1
+  %v3 = load i8, i8* %v2, align 1
+  %v4 = zext i8 %v3 to i32
+  %v5 = add nsw i32 %v4, 1
+  %v6 = trunc i32 %v5 to i8
+  store i8 %v6, i8* %v2, align 1
+  %v7 = add nsw i32 %v1, 1
+  %v8 = icmp ne i32 %v7, 3472
+  br i1 %v8, label %b2, label %b3

-for.end:                                          ; preds = %for.body, %entry
+b3:                                               ; preds = %b2, %b0
  ret void
 }

-
-
-; CHECK: test_pos2_ri_ne
+; CHECK-LABEL: f6:
 ; CHECK: loop0
 ; a < b
-define void @test_pos2_ri_ne(i8* nocapture %p, i32 %a, i32 %b) nounwind {
-entry:
-  %cmp3 = icmp slt i32 %a, 8730
-  br i1 %cmp3, label %for.body.lr.ph, label %for.end
+define void @f6(i8* nocapture %a0, i32 %a1, i32 %a2) #0 {
+b0:
+  %v0 = icmp slt i32 %a1, 8730
+  br i1 %v0, label %b1, label %b3

-for.body.lr.ph:                                   ; preds = %entry
-  br label %for.body
+b1:                                               ; preds = %b0
+  br label %b2

-for.body:                                         ; preds = %for.body.lr.ph, %for.body
-  %i.04 = phi i32 [ %a, %for.body.lr.ph ], [ %inc, %for.body ]
-  %arrayidx = getelementptr inbounds i8, i8* %p, i32 %i.04
-  %0 = load i8, i8* %arrayidx, align 1
-  %conv = zext i8 %0 to i32
-  %add = add nsw i32 %conv, 1
-  %conv1 = trunc i32 %add to i8
-  store i8 %conv1, i8* %arrayidx, align 1
-  %inc = add nsw i32 %i.04, 2
-  %cmp = icmp ne i32 %inc, 8730
-  br i1 %cmp, label %for.body, label %for.end
+b2:                                               ; preds = %b2, %b1
+  %v1 = phi i32 [ %a1, %b1 ], [ %v7, %b2 ]
+  %v2 = getelementptr inbounds i8, i8* %a0, i32 %v1
+  %v3 = load i8, i8* %v2, align 1
+  %v4 = zext i8 %v3 to i32
+  %v5 = add nsw i32 %v4, 1
+  %v6 = trunc i32 %v5 to i8
+  store i8 %v6, i8* %v2, align 1
+  %v7 = add nsw i32 %v1, 2
+  %v8 = icmp ne i32 %v7, 8730
+  br i1 %v8, label %b2, label %b3

-for.end:                                          ; preds = %for.body, %entry
+b3:                                               ; preds = %b2, %b0
  ret void
 }

-
-
-; CHECK: test_pos4_ri_ne
+; CHECK-LABEL: f7:
 ; CHECK: loop0
 ; a < b
-define void @test_pos4_ri_ne(i8* nocapture %p, i32 %a, i32 %b) nounwind {
-entry:
-  %cmp3 = icmp slt i32 %a, 1493
-  br i1 %cmp3, label %for.body.lr.ph, label %for.end
+define void @f7(i8* nocapture %a0, i32 %a1, i32 %a2) #0 {
+b0:
+  %v0 = icmp slt i32 %a1, 1493
+  br i1 %v0, label %b1, label %b3

-for.body.lr.ph:                                   ; preds = %entry
-  br label %for.body
+b1:                                               ; preds = %b0
+  br label %b2

-for.body:                                         ; preds = %for.body.lr.ph, %for.body
-  %i.04 = phi i32 [ %a, %for.body.lr.ph ], [ %inc, %for.body ]
-  %arrayidx = getelementptr inbounds i8, i8* %p, i32 %i.04
-  %0 = load i8, i8* %arrayidx, align 1
-  %conv = zext i8 %0 to i32
-  %add = add nsw i32 %conv, 1
-  %conv1 = trunc i32 %add to i8
-  store i8 %conv1, i8* %arrayidx, align 1
-  %inc = add nsw i32 %i.04, 4
-  %cmp = icmp ne i32 %inc, 1493
-  br i1 %cmp, label %for.body, label %for.end
+b2:                                               ; preds = %b2, %b1
+  %v1 = phi i32 [ %a1, %b1 ], [ %v7, %b2 ]
+  %v2 = getelementptr inbounds i8, i8* %a0, i32 %v1
+  %v3 = load i8, i8* %v2, align 1
+  %v4 = zext i8 %v3 to i32
+  %v5 = add nsw i32 %v4, 1
+  %v6 = trunc i32 %v5 to i8
+  store i8 %v6, i8* %v2, align 1
+  %v7 = add nsw i32 %v1, 4
+  %v8 = icmp ne i32 %v7, 1493
+  br i1 %v8, label %b2, label %b3

-for.end:                                          ; preds = %for.body, %entry
+b3:                                               ; preds = %b2, %b0
  ret void
 }

-
-
-; CHECK: test_pos8_ri_ne
+; CHECK-LABEL: f8:
 ; CHECK: loop0
 ; a < b
-define void @test_pos8_ri_ne(i8* nocapture %p, i32 %a, i32 %b) nounwind {
-entry:
-  %cmp3 = icmp slt i32 %a, 1706
-  br i1 %cmp3, label %for.body.lr.ph, label %for.end
+define void @f8(i8* nocapture %a0, i32 %a1, i32 %a2) #0 {
+b0:
+  %v0 = icmp slt i32 %a1, 1706
+  br i1 %v0, label %b1, label %b3

-for.body.lr.ph:                                   ; preds = %entry
-  br label %for.body
+b1:                                               ; preds = %b0
+  br label %b2

-for.body:                                         ; preds = %for.body.lr.ph, %for.body
-  %i.04 = phi i32 [ %a, %for.body.lr.ph ], [ %inc, %for.body ]
-  %arrayidx = getelementptr inbounds i8, i8* %p, i32 %i.04
-  %0 = load i8, i8* %arrayidx, align 1
-  %conv = zext i8 %0 to i32
-  %add = add nsw i32 %conv, 1
-  %conv1 = trunc i32 %add to i8
-  store i8 %conv1, i8* %arrayidx, align 1
-  %inc = add nsw i32 %i.04, 8
-  %cmp = icmp ne i32 %inc, 1706
-  br i1 %cmp, label %for.body, label %for.end
+b2:                                               ; preds = %b2, %b1
+  %v1 = phi i32 [ %a1, %b1 ], [ %v7, %b2 ]
+  %v2 = getelementptr inbounds i8, i8* %a0, i32 %v1
+  %v3 = load i8, i8* %v2, align 1
+  %v4 = zext i8 %v3 to i32
+  %v5 = add nsw i32 %v4, 1
+  %v6 = trunc i32 %v5 to i8
+  store i8 %v6, i8* %v2, align 1
+  %v7 = add nsw i32 %v1, 8
+  %v8 = icmp ne i32 %v7, 1706
+  br i1 %v8, label %b2, label %b3

-for.end:                                          ; preds = %for.body, %entry
+b3:                                               ; preds = %b2, %b0
  ret void
 }

-
-
-; CHECK: test_pos16_ri_ne
+; CHECK-LABEL: f9:
 ; CHECK: loop0
 ; a < b
-define void @test_pos16_ri_ne(i8* nocapture %p, i32 %a, i32 %b) nounwind {
-entry:
-  %cmp3 = icmp slt i32 %a, 1886
-  br i1 %cmp3, label %for.body.lr.ph, label %for.end
+define void @f9(i8* nocapture %a0, i32 %a1, i32 %a2) #0 {
+b0:
+  %v0 = icmp slt i32 %a1, 1886
+  br i1 %v0, label %b1, label %b3

-for.body.lr.ph:                                   ; preds = %entry
-  br label %for.body
+b1:                                               ; preds = %b0
+  br label %b2

-for.body:                                         ; preds = %for.body.lr.ph, %for.body
-  %i.04 = phi i32 [ %a, %for.body.lr.ph ], [ %inc, %for.body ]
-  %arrayidx = getelementptr inbounds i8, i8* %p, i32 %i.04
-  %0 = load i8, i8* %arrayidx, align 1
-  %conv = zext i8 %0 to i32
-  %add = add nsw i32 %conv, 1
-  %conv1 = trunc i32 %add to i8
-  store i8 %conv1, i8* %arrayidx, align 1
-  %inc = add nsw i32 %i.04, 16
-  %cmp = icmp ne i32 %inc, 1886
-  br i1 %cmp, label %for.body, label %for.end
+b2:                                               ; preds = %b2, %b1
+  %v1 = phi i32 [ %a1, %b1 ], [ %v7, %b2 ]
+  %v2 = getelementptr inbounds i8, i8* %a0, i32 %v1
+  %v3 = load i8, i8* %v2, align 1
+  %v4 = zext i8 %v3 to i32
+  %v5 = add nsw i32 %v4, 1
+  %v6 = trunc i32 %v5 to i8
+  store i8 %v6, i8* %v2, align 1
+  %v7 = add nsw i32 %v1, 16
+  %v8 = icmp ne i32 %v7, 1886
+  br i1 %v8, label %b2, label %b3

-for.end:                                          ; preds = %for.body, %entry
+b3:                                               ; preds = %b2, %b0
  ret void
 }

-
-
-; CHECK: test_pos1_rr_ne
+; CHECK-LABEL: f10:
 ; CHECK: loop0
 ; a < b
-define void @test_pos1_rr_ne(i8* nocapture %p, i32 %a, i32 %b) nounwind {
-entry:
-  %cmp3 = icmp slt i32 %a, %b
-  br i1 %cmp3, label %for.body.lr.ph, label %for.end
+define void @f10(i8* nocapture %a0, i32 %a1, i32 %a2) #0 {
+b0:
+  %v0 = icmp slt i32 %a1, %a2
+  br i1 %v0, label %b1, label %b3

-for.body.lr.ph:                                   ; preds = %entry
-  br label %for.body
+b1:                                               ; preds = %b0
+  br label %b2

-for.body:                                         ; preds = %for.body.lr.ph, %for.body
-  %i.04 = phi i32 [ %a, %for.body.lr.ph ], [ %inc, %for.body ]
-  %arrayidx = getelementptr inbounds i8, i8* %p, i32 %i.04
-  %0 = load i8, i8* %arrayidx, align 1
-  %conv = zext i8 %0 to i32
-  %add = add nsw i32 %conv, 1
-  %conv1 = trunc i32 %add to i8
-  store i8 %conv1, i8* %arrayidx, align 1
-  %inc = add nsw i32 %i.04, 1
-  %cmp = icmp ne i32 %inc, %b
-  br i1 %cmp, label %for.body, label %for.end
+b2:                                               ; preds = %b2, %b1
+  %v1 = phi i32 [ %a1, %b1 ], [ %v7, %b2 ]
+  %v2 = getelementptr inbounds i8, i8* %a0, i32 %v1
+  %v3 = load i8, i8* %v2, align 1
+  %v4 = zext i8 %v3 to i32
+  %v5 = add nsw i32 %v4, 1
+  %v6 = trunc i32 %v5 to i8
+  store i8 %v6, i8* %v2, align 1
+  %v7 = add nsw i32 %v1, 1
+  %v8 = icmp ne i32 %v7, %a2
+  br i1 %v8, label %b2, label %b3

-for.end:                                          ; preds = %for.body, %entry
+b3:                                               ; preds = %b2, %b0
  ret void
 }

-
-
-; CHECK: test_pos2_rr_ne
+; CHECK-LABEL: f11:
 ; CHECK: loop0
 ; a < b
-define void @test_pos2_rr_ne(i8* nocapture %p, i32 %a, i32 %b) nounwind {
-entry:
-  %cmp3 = icmp slt i32 %a, %b
-  br i1 %cmp3, label %for.body.lr.ph, label %for.end
+define void @f11(i8* nocapture %a0, i32 %a1, i32 %a2) #0 {
+b0:
+  %v0 = icmp slt i32 %a1, %a2
+  br i1 %v0, label %b1, label %b3

-for.body.lr.ph:                                   ; preds = %entry
-  br label %for.body
+b1:                                               ; preds = %b0
+  br label %b2

-for.body:                                         ; preds = %for.body.lr.ph, %for.body
-  %i.04 = phi i32 [ %a, %for.body.lr.ph ], [ %inc, %for.body ]
-  %arrayidx = getelementptr inbounds i8, i8* %p, i32 %i.04
-  %0 = load i8, i8* %arrayidx, align 1
-  %conv = zext i8 %0 to i32
-  %add = add nsw i32 %conv, 1
-  %conv1 = trunc i32 %add to i8
-  store i8 %conv1, i8* %arrayidx, align 1
-  %inc = add nsw i32 %i.04, 2
-  %cmp = icmp ne i32 %inc, %b
-  br i1 %cmp, label %for.body, label %for.end
+b2:                                               ; preds = %b2, %b1
+  %v1 = phi i32 [ %a1, %b1 ], [ %v7, %b2 ]
+  %v2 = getelementptr inbounds i8, i8* %a0, i32 %v1
+  %v3 = load i8, i8* %v2, align 1
+  %v4 = zext i8 %v3 to i32
+  %v5 = add nsw i32 %v4, 1
+  %v6 = trunc i32 %v5 to i8
+  store i8 %v6, i8* %v2, align 1
+  %v7 = add nsw i32 %v1, 2
+  %v8 = icmp ne i32 %v7, %a2
+  br i1 %v8, label %b2, label %b3

-for.end:                                          ; preds = %for.body, %entry
+b3:                                               ; preds = %b2, %b0
  ret void
 }

-
-
-; CHECK: test_pos4_rr_ne
+; CHECK-LABEL: f12:
 ; CHECK: loop0
 ; a < b
-define void @test_pos4_rr_ne(i8* nocapture %p, i32 %a, i32 %b) nounwind {
-entry:
-  %cmp3 = icmp slt i32 %a, %b
-  br i1 %cmp3, label %for.body.lr.ph, label %for.end
+define void @f12(i8* nocapture %a0, i32 %a1, i32 %a2) #0 {
+b0:
+  %v0 = icmp slt i32 %a1, %a2
+  br i1 %v0, label %b1, label %b3

-for.body.lr.ph:                                   ; preds = %entry
-  br label %for.body
+b1:                                               ; preds = %b0
+  br label %b2

-for.body:                                         ; preds = %for.body.lr.ph, %for.body
-  %i.04 = phi i32 [ %a, %for.body.lr.ph ], [ %inc, %for.body ]
-  %arrayidx = getelementptr inbounds i8, i8* %p, i32 %i.04
-  %0 = load i8, i8* %arrayidx, align 1
-  %conv = zext i8 %0 to i32
-  %add = add nsw i32 %conv, 1
-  %conv1 = trunc i32 %add to i8
-  store i8 %conv1, i8* %arrayidx, align 1
-  %inc = add nsw i32 %i.04, 4
-  %cmp = icmp ne i32 %inc, %b
-  br i1 %cmp, label %for.body, label %for.end
+b2:                                               ; preds = %b2, %b1
+  %v1 = phi i32 [ %a1, %b1 ], [ %v7, %b2 ]
+  %v2 = getelementptr inbounds i8, i8* %a0, i32 %v1
+  %v3 = load i8, i8* %v2, align 1
+  %v4 = zext i8 %v3 to i32
+  %v5 = add nsw i32 %v4, 1
+  %v6 = trunc i32 %v5 to i8
+  store i8 %v6, i8* %v2, align 1
+  %v7 = add nsw i32 %v1, 4
+  %v8 = icmp ne i32 %v7, %a2
+  br i1 %v8, label %b2, label %b3

-for.end:                                          ; preds = %for.body, %entry
+b3:                                               ; preds = %b2, %b0
  ret void
 }

-
-
-; CHECK: test_pos8_rr_ne
+; CHECK-LABEL: f13
 ; CHECK: loop0
 ; a < b
-define void @test_pos8_rr_ne(i8* nocapture %p, i32 %a, i32 %b) nounwind {
-entry:
-  %cmp3 = icmp slt i32 %a, %b
-  br i1 %cmp3, label %for.body.lr.ph, label %for.end
+define void @f13(i8* nocapture %a0, i32 %a1, i32 %a2) #0 {
+b0:
+  %v0 = icmp slt i32 %a1, %a2
+  br i1 %v0, label %b1, label %b3

-for.body.lr.ph:                                   ; preds = %entry
-  br label %for.body
+b1:                                               ; preds = %b0
+  br label %b2

-for.body:                                         ; preds = %for.body.lr.ph, %for.body
-  %i.04 = phi i32 [ %a, %for.body.lr.ph ], [ %inc, %for.body ]
-  %arrayidx = getelementptr inbounds i8, i8* %p, i32 %i.04
-  %0 = load i8, i8* %arrayidx, align 1
-  %conv = zext i8 %0 to i32
-  %add = add nsw i32 %conv, 1
-  %conv1 = trunc i32 %add to i8
-  store i8 %conv1, i8* %arrayidx, align 1
-  %inc = add nsw i32 %i.04, 8
-  %cmp = icmp ne i32 %inc, %b
-  br i1 %cmp, label %for.body, label %for.end
+b2:                                               ; preds = %b2, %b1
+  %v1 = phi i32 [ %a1, %b1 ], [ %v7, %b2 ]
+  %v2 = getelementptr inbounds i8, i8* %a0, i32 %v1
+  %v3 = load i8, i8* %v2, align 1
+  %v4 = zext i8 %v3 to i32
+  %v5 = add nsw i32 %v4, 1
+  %v6 = trunc i32 %v5 to i8
+  store i8 %v6, i8* %v2, align 1
+  %v7 = add nsw i32 %v1, 8
+  %v8 = icmp ne i32 %v7, %a2
+  br i1 %v8, label %b2, label %b3

-for.end:                                          ; preds = %for.body, %entry
+b3:                                               ; preds = %b2, %b0
  ret void
 }

-
-
-; CHECK: test_pos16_rr_ne
+; CHECK-LABEL: f14
 ; CHECK: loop0
 ; a < b
-define void @test_pos16_rr_ne(i8* nocapture %p, i32 %a, i32 %b) nounwind {
-entry:
-  %cmp3 = icmp slt i32 %a, %b
-  br i1 %cmp3, label %for.body.lr.ph, label %for.end
+define void @f14(i8* nocapture %a0, i32 %a1, i32 %a2) #0 {
+b0:
+  %v0 = icmp slt i32 %a1, %a2
+  br i1 %v0, label %b1, label %b3

-for.body.lr.ph:                                   ; preds = %entry
-  br label %for.body
+b1:                                               ; preds = %b0
+  br label %b2

-for.body:                                         ; preds = %for.body.lr.ph, %for.body
-  %i.04 = phi i32 [ %a, %for.body.lr.ph ], [ %inc, %for.body ]
-  %arrayidx = getelementptr inbounds i8, i8* %p, i32 %i.04
-  %0 = load i8, i8* %arrayidx, align 1
-  %conv = zext i8 %0 to i32
-  %add = add nsw i32 %conv, 1
-  %conv1 = trunc i32 %add to i8
-  store i8 %conv1, i8* %arrayidx, align 1
-  %inc = add nsw i32 %i.04, 16
-  %cmp = icmp ne i32 %inc, %b
-  br i1 %cmp, label %for.body, label %for.end
+b2:                                               ; preds = %b2, %b1
+  %v1 = phi i32 [ %a1, %b1 ], [ %v7, %b2 ]
+  %v2 = getelementptr inbounds i8, i8* %a0, i32 %v1
+  %v3 = load i8, i8* %v2, align 1
+  %v4 = zext i8 %v3 to i32
+  %v5 = add nsw i32 %v4, 1
+  %v6 = trunc i32 %v5 to i8
+  store i8 %v6, i8* %v2, align 1
+  %v7 = add nsw i32 %v1, 16
+  %v8 = icmp ne i32 %v7, %a2
+  br i1 %v8, label %b2, label %b3

-for.end:                                          ; preds = %for.body, %entry
+b3:                                               ; preds = %b2, %b0
  ret void
 }

-
-
+attributes #0 = { nounwind "target-cpu"="hexagonv5" }
--- a/llvm/test/CodeGen/Hexagon/i16_VarArg.ll
+++ b/llvm/test/CodeGen/Hexagon/i16_VarArg.ll
@ -1,40 +1,36 @@
-; RUN: llc -march=hexagon -mcpu=hexagonv4 < %s | FileCheck %s
-; CHECK: call __hexagon_{{[A-Z_a-z0-9]+}}
+; RUN: llc -march=hexagon < %s | FileCheck %s
+; CHECK: dfcmp

-@a_str = internal constant [8 x i8] c"a = %f\0A\00"
-@b_str = internal constant [8 x i8] c"b = %f\0A\00"
-@add_str = internal constant [12 x i8] c"a + b = %f\0A\00"
-@sub_str = internal constant [12 x i8] c"a - b = %f\0A\00"
-@mul_str = internal constant [12 x i8] c"a * b = %f\0A\00"
-@div_str = internal constant [12 x i8] c"b / a = %f\0A\00"
-@rem_str = internal constant [13 x i8] c"b %% a = %f\0A\00"
-@lt_str = internal constant [12 x i8] c"a < b = %d\0A\00"
-@le_str = internal constant [13 x i8] c"a <= b = %d\0A\00"
-@gt_str = internal constant [12 x i8] c"a > b = %d\0A\00"
-@ge_str = internal constant [13 x i8] c"a >= b = %d\0A\00"
-@eq_str = internal constant [13 x i8] c"a == b = %d\0A\00"
-@ne_str = internal constant [13 x i8] c"a != b = %d\0A\00"
-@A = global double 2.000000e+00
-@B = global double 5.000000e+00
+@g0 = internal constant [12 x i8] c"a < b = %d\0A\00"
+@g1 = internal constant [13 x i8] c"a <= b = %d\0A\00"
+@g2 = internal constant [12 x i8] c"a > b = %d\0A\00"
+@g3 = internal constant [13 x i8] c"a >= b = %d\0A\00"
+@g4 = internal constant [13 x i8] c"a == b = %d\0A\00"
+@g5 = internal constant [13 x i8] c"a != b = %d\0A\00"
+@g6 = global double 2.000000e+00
+@g7 = global double 5.000000e+00

-declare i32 @printf(i8*, ...)
+declare i32 @f0(i8*, ...) #0

-define i32 @main() {
-        %a = load double, double* @A
-        %b = load double, double* @B
-        %lt_r = fcmp olt double %a, %b
-        %le_r = fcmp ole double %a, %b
-        %gt_r = fcmp ogt double %a, %b
-        %ge_r = fcmp oge double %a, %b
-        %eq_r = fcmp oeq double %a, %b
-        %ne_r = fcmp une double %a, %b
-        %val1 = zext i1 %lt_r to i16
-        %lt_s = getelementptr [12 x i8], [12 x i8]* @lt_str, i64 0, i64 0
-        %le_s = getelementptr [13 x i8], [13 x i8]* @le_str, i64 0, i64 0
-        %gt_s = getelementptr [12 x i8], [12 x i8]* @gt_str, i64 0, i64 0
-        %ge_s = getelementptr [13 x i8], [13 x i8]* @ge_str, i64 0, i64 0
-        %eq_s = getelementptr [13 x i8], [13 x i8]* @eq_str, i64 0, i64 0
-        %ne_s = getelementptr [13 x i8], [13 x i8]* @ne_str, i64 0, i64 0
-        call i32 (i8*, ...) @printf( i8* %lt_s, i16 %val1 )
-        ret i32 0
+define i32 @f1() #0 {
+b0:
+  %v0 = load double, double* @g6
+  %v1 = load double, double* @g7
+  %v2 = fcmp olt double %v0, %v1
+  %v3 = fcmp ole double %v0, %v1
+  %v4 = fcmp ogt double %v0, %v1
+  %v5 = fcmp oge double %v0, %v1
+  %v6 = fcmp oeq double %v0, %v1
+  %v7 = fcmp une double %v0, %v1
+  %v8 = zext i1 %v2 to i16
+  %v9 = getelementptr [12 x i8], [12 x i8]* @g0, i64 0, i64 0
+  %v10 = getelementptr [13 x i8], [13 x i8]* @g1, i64 0, i64 0
+  %v11 = getelementptr [12 x i8], [12 x i8]* @g2, i64 0, i64 0
+  %v12 = getelementptr [13 x i8], [13 x i8]* @g3, i64 0, i64 0
+  %v13 = getelementptr [13 x i8], [13 x i8]* @g4, i64 0, i64 0
+  %v14 = getelementptr [13 x i8], [13 x i8]* @g5, i64 0, i64 0
+  %v15 = call i32 (i8*, ...) @f0(i8* %v9, i16 %v8)
+  ret i32 0
 }
+
+attributes #0 = { nounwind "target-cpu"="hexagonv5" }
--- a/llvm/test/CodeGen/Hexagon/i1_VarArg.ll
+++ b/llvm/test/CodeGen/Hexagon/i1_VarArg.ll
@ -1,44 +1,40 @@
-; RUN: llc -march=hexagon -mcpu=hexagonv4 < %s | FileCheck %s
-; CHECK: call __hexagon_{{[_A-Za-z0-9]+}}
+; RUN: llc -march=hexagon < %s | FileCheck %s
+; CHECK: dfcmp

-@a_str = internal constant [8 x i8] c"a = %f\0A\00"
-@b_str = internal constant [8 x i8] c"b = %f\0A\00"
-@add_str = internal constant [12 x i8] c"a + b = %f\0A\00"
-@sub_str = internal constant [12 x i8] c"a - b = %f\0A\00"
-@mul_str = internal constant [12 x i8] c"a * b = %f\0A\00"
-@div_str = internal constant [12 x i8] c"b / a = %f\0A\00"
-@rem_str = internal constant [13 x i8] c"b %% a = %f\0A\00"
-@lt_str = internal constant [12 x i8] c"a < b = %d\0A\00"
-@le_str = internal constant [13 x i8] c"a <= b = %d\0A\00"
-@gt_str = internal constant [12 x i8] c"a > b = %d\0A\00"
-@ge_str = internal constant [13 x i8] c"a >= b = %d\0A\00"
-@eq_str = internal constant [13 x i8] c"a == b = %d\0A\00"
-@ne_str = internal constant [13 x i8] c"a != b = %d\0A\00"
-@A = global double 2.000000e+00
-@B = global double 5.000000e+00
+@g0 = internal constant [12 x i8] c"a < b = %d\0A\00"
+@g1 = internal constant [13 x i8] c"a <= b = %d\0A\00"
+@g2 = internal constant [12 x i8] c"a > b = %d\0A\00"
+@g3 = internal constant [13 x i8] c"a >= b = %d\0A\00"
+@g4 = internal constant [13 x i8] c"a == b = %d\0A\00"
+@g5 = internal constant [13 x i8] c"a != b = %d\0A\00"
+@g6 = global double 2.000000e+00
+@g7 = global double 5.000000e+00

-declare i32 @printf(i8*, ...)
+declare i32 @f0(i8*, ...) #0

-define i32 @main() {
-        %a = load double, double* @A
-        %b = load double, double* @B
-        %lt_r = fcmp olt double %a, %b
-        %le_r = fcmp ole double %a, %b
-        %gt_r = fcmp ogt double %a, %b
-        %ge_r = fcmp oge double %a, %b
-        %eq_r = fcmp oeq double %a, %b
-        %ne_r = fcmp une double %a, %b
-        %lt_s = getelementptr [12 x i8], [12 x i8]* @lt_str, i64 0, i64 0
-        %le_s = getelementptr [13 x i8], [13 x i8]* @le_str, i64 0, i64 0
-        %gt_s = getelementptr [12 x i8], [12 x i8]* @gt_str, i64 0, i64 0
-        %ge_s = getelementptr [13 x i8], [13 x i8]* @ge_str, i64 0, i64 0
-        %eq_s = getelementptr [13 x i8], [13 x i8]* @eq_str, i64 0, i64 0
-        %ne_s = getelementptr [13 x i8], [13 x i8]* @ne_str, i64 0, i64 0
-        call i32 (i8*, ...) @printf( i8* %lt_s, i1 %lt_r )
-        call i32 (i8*, ...) @printf( i8* %le_s, i1 %le_r )
-        call i32 (i8*, ...) @printf( i8* %gt_s, i1 %gt_r )
-        call i32 (i8*, ...) @printf( i8* %ge_s, i1 %ge_r )
-        call i32 (i8*, ...) @printf( i8* %eq_s, i1 %eq_r )
-        call i32 (i8*, ...) @printf( i8* %ne_s, i1 %ne_r )
-        ret i32 0
+define i32 @f1() #0 {
+b0:
+  %v0 = load double, double* @g6
+  %v1 = load double, double* @g7
+  %v2 = fcmp olt double %v0, %v1
+  %v3 = fcmp ole double %v0, %v1
+  %v4 = fcmp ogt double %v0, %v1
+  %v5 = fcmp oge double %v0, %v1
+  %v6 = fcmp oeq double %v0, %v1
+  %v7 = fcmp une double %v0, %v1
+  %v8 = getelementptr [12 x i8], [12 x i8]* @g0, i64 0, i64 0
+  %v9 = getelementptr [13 x i8], [13 x i8]* @g1, i64 0, i64 0
+  %v10 = getelementptr [12 x i8], [12 x i8]* @g2, i64 0, i64 0
+  %v11 = getelementptr [13 x i8], [13 x i8]* @g3, i64 0, i64 0
+  %v12 = getelementptr [13 x i8], [13 x i8]* @g4, i64 0, i64 0
+  %v13 = getelementptr [13 x i8], [13 x i8]* @g5, i64 0, i64 0
+  %v14 = call i32 (i8*, ...) @f0(i8* %v8, i1 %v2)
+  %v15 = call i32 (i8*, ...) @f0(i8* %v9, i1 %v3)
+  %v16 = call i32 (i8*, ...) @f0(i8* %v10, i1 %v4)
+  %v17 = call i32 (i8*, ...) @f0(i8* %v11, i1 %v5)
+  %v18 = call i32 (i8*, ...) @f0(i8* %v12, i1 %v6)
+  %v19 = call i32 (i8*, ...) @f0(i8* %v13, i1 %v7)
+  ret i32 0
 }
+
+attributes #0 = { nounwind "target-cpu"="hexagonv5" }
--- a/llvm/test/CodeGen/Hexagon/i8_VarArg.ll
+++ b/llvm/test/CodeGen/Hexagon/i8_VarArg.ll
@ -1,40 +1,36 @@
-; RUN: llc -march=hexagon -mcpu=hexagonv4 < %s | FileCheck %s
-; CHECK: call __hexagon_{{[A-Z_a-z0-9]+}}
+; RUN: llc -march=hexagon < %s | FileCheck %s
+; CHECK: dfcmp

-@a_str = internal constant [8 x i8] c"a = %f\0A\00"
-@b_str = internal constant [8 x i8] c"b = %f\0A\00"
-@add_str = internal constant [12 x i8] c"a + b = %f\0A\00"
-@sub_str = internal constant [12 x i8] c"a - b = %f\0A\00"
-@mul_str = internal constant [12 x i8] c"a * b = %f\0A\00"
-@div_str = internal constant [12 x i8] c"b / a = %f\0A\00"
-@rem_str = internal constant [13 x i8] c"b %% a = %f\0A\00"
-@lt_str = internal constant [12 x i8] c"a < b = %d\0A\00"
-@le_str = internal constant [13 x i8] c"a <= b = %d\0A\00"
-@gt_str = internal constant [12 x i8] c"a > b = %d\0A\00"
-@ge_str = internal constant [13 x i8] c"a >= b = %d\0A\00"
-@eq_str = internal constant [13 x i8] c"a == b = %d\0A\00"
-@ne_str = internal constant [13 x i8] c"a != b = %d\0A\00"
-@A = global double 2.000000e+00
-@B = global double 5.000000e+00
+@g0 = internal constant [12 x i8] c"a < b = %d\0A\00"
+@g1 = internal constant [13 x i8] c"a <= b = %d\0A\00"
+@g2 = internal constant [12 x i8] c"a > b = %d\0A\00"
+@g3 = internal constant [13 x i8] c"a >= b = %d\0A\00"
+@g4 = internal constant [13 x i8] c"a == b = %d\0A\00"
+@g5 = internal constant [13 x i8] c"a != b = %d\0A\00"
+@g6 = global double 2.000000e+00
+@g7 = global double 5.000000e+00

-declare i32 @printf(i8*, ...)
+declare i32 @f0(i8*, ...) #0

-define i32 @main() {
-        %a = load double, double* @A
-        %b = load double, double* @B
-        %lt_r = fcmp olt double %a, %b
-        %le_r = fcmp ole double %a, %b
-        %gt_r = fcmp ogt double %a, %b
-        %ge_r = fcmp oge double %a, %b
-        %eq_r = fcmp oeq double %a, %b
-        %ne_r = fcmp une double %a, %b
-        %val1 = zext i1 %lt_r to i8
-        %lt_s = getelementptr [12 x i8], [12 x i8]* @lt_str, i64 0, i64 0
-        %le_s = getelementptr [13 x i8], [13 x i8]* @le_str, i64 0, i64 0
-        %gt_s = getelementptr [12 x i8], [12 x i8]* @gt_str, i64 0, i64 0
-        %ge_s = getelementptr [13 x i8], [13 x i8]* @ge_str, i64 0, i64 0
-        %eq_s = getelementptr [13 x i8], [13 x i8]* @eq_str, i64 0, i64 0
-        %ne_s = getelementptr [13 x i8], [13 x i8]* @ne_str, i64 0, i64 0
-        call i32 (i8*, ...) @printf( i8* %lt_s, i8 %val1 )
-        ret i32 0
+define i32 @f1() #0 {
+b0:
+  %v0 = load double, double* @g6
+  %v1 = load double, double* @g7
+  %v2 = fcmp olt double %v0, %v1
+  %v3 = fcmp ole double %v0, %v1
+  %v4 = fcmp ogt double %v0, %v1
+  %v5 = fcmp oge double %v0, %v1
+  %v6 = fcmp oeq double %v0, %v1
+  %v7 = fcmp une double %v0, %v1
+  %v8 = zext i1 %v2 to i8
+  %v9 = getelementptr [12 x i8], [12 x i8]* @g0, i64 0, i64 0
+  %v10 = getelementptr [13 x i8], [13 x i8]* @g1, i64 0, i64 0
+  %v11 = getelementptr [12 x i8], [12 x i8]* @g2, i64 0, i64 0
+  %v12 = getelementptr [13 x i8], [13 x i8]* @g3, i64 0, i64 0
+  %v13 = getelementptr [13 x i8], [13 x i8]* @g4, i64 0, i64 0
+  %v14 = getelementptr [13 x i8], [13 x i8]* @g5, i64 0, i64 0
+  %v15 = call i32 (i8*, ...) @f0(i8* %v9, i8 %v8)
+  ret i32 0
 }
+
+attributes #0 = { nounwind "target-cpu"="hexagonv5" }
--- a/llvm/test/CodeGen/Hexagon/macint.ll
+++ b/llvm/test/CodeGen/Hexagon/macint.ll
@ -1,14 +1,15 @@
-; RUN: llc -march=hexagon -mcpu=hexagonv4  < %s | FileCheck %s
+; RUN: llc -march=hexagon < %s | FileCheck %s
 ; Check that we generate integer multiply accumulate.

 ; CHECK: r{{[0-9]+}} {{\+|\-}}= mpyi(r{{[0-9]+}},

-define i32 @main(i32* %a, i32* %b) nounwind {
-  entry:
-  %0 = load i32, i32* %a, align 4
-  %div = udiv i32 %0, 10000
-  %rem = urem i32 %div, 10
-  store i32 %rem, i32* %b, align 4
+define i32 @f0(i32* %a0, i32* %a1) #0 {
+b0:
+  %v0 = load i32, i32* %a0, align 4
+  %v1 = udiv i32 %v0, 10000
+  %v2 = urem i32 %v1, 10
+  store i32 %v2, i32* %a1, align 4
  ret i32 0
 }

+attributes #0 = { nounwind "target-cpu"="hexagonv5" }
--- a/llvm/test/CodeGen/Hexagon/misaligned-access.ll
+++ b/llvm/test/CodeGen/Hexagon/misaligned-access.ll
@ -1,16 +1,19 @@
-; RUN: llc -march=hexagon -mcpu=hexagonv4 < %s
+; RUN: llc -march=hexagon < %s
 ; Check that the mis-aligned load doesn't cause compiler to assert.

-declare i32 @_hi(i64) #1
-@temp1 = common global i32 0, align 4
+@g0 = common global i32 0, align 4

-define i32 @CSDRSEARCH_executeSearchManager() #0 {
-entry:
-  %temp = alloca i32, align 4
-  %0 = load i32, i32* @temp1, align 4
-  store i32 %0, i32* %temp, align 4
-  %1 = bitcast i32* %temp to i64*
-  %2 = load i64, i64* %1, align 8
-  %call = call i32 @_hi(i64 %2)
-  ret i32 %call
+declare i32 @f0(i64) #0
+
+define i32 @f1() #0 {
+b0:
+  %v0 = alloca i32, align 4
+  %v1 = load i32, i32* @g0, align 4
+  store i32 %v1, i32* %v0, align 4
+  %v2 = bitcast i32* %v0 to i64*
+  %v3 = load i64, i64* %v2, align 8
+  %v4 = call i32 @f0(i64 %v3)
+  ret i32 %v4
 }
+
+attributes #0 = { nounwind "target-cpu"="hexagonv5" }
--- a/llvm/test/CodeGen/Hexagon/mpy.ll
+++ b/llvm/test/CodeGen/Hexagon/mpy.ll
@ -1,19 +1,21 @@
-; RUN: llc -march=hexagon -mcpu=hexagonv4 < %s | FileCheck %s
+; RUN: llc -march=hexagon < %s | FileCheck %s
 ; CHECK: += mpyi

-define void @foo(i32 %acc, i32 %num, i32 %num2) nounwind {
-entry:
-  %acc.addr = alloca i32, align 4
-  %num.addr = alloca i32, align 4
-  %num2.addr = alloca i32, align 4
-  store i32 %acc, i32* %acc.addr, align 4
-  store i32 %num, i32* %num.addr, align 4
-  store i32 %num2, i32* %num2.addr, align 4
-  %0 = load i32, i32* %num.addr, align 4
-  %1 = load i32, i32* %acc.addr, align 4
-  %mul = mul nsw i32 %0, %1
-  %2 = load i32, i32* %num2.addr, align 4
-  %add = add nsw i32 %mul, %2
-  store i32 %add, i32* %num.addr, align 4
+define void @f0(i32 %a0, i32 %a1, i32 %a2) #0 {
+b0:
+  %v0 = alloca i32, align 4
+  %v1 = alloca i32, align 4
+  %v2 = alloca i32, align 4
+  store i32 %a0, i32* %v0, align 4
+  store i32 %a1, i32* %v1, align 4
+  store i32 %a2, i32* %v2, align 4
+  %v3 = load i32, i32* %v1, align 4
+  %v4 = load i32, i32* %v0, align 4
+  %v5 = mul nsw i32 %v3, %v4
+  %v6 = load i32, i32* %v2, align 4
+  %v7 = add nsw i32 %v5, %v6
+  store i32 %v7, i32* %v1, align 4
  ret void
 }
+
+attributes #0 = { nounwind "target-cpu"="hexagonv5" }
--- a/llvm/test/CodeGen/Hexagon/newvaluejump.ll
+++ b/llvm/test/CodeGen/Hexagon/newvaluejump.ll
@ -1,33 +1,36 @@
-; RUN: llc -march=hexagon -mcpu=hexagonv4 < %s | FileCheck %s
+; RUN: llc -march=hexagon < %s | FileCheck %s
 ; Check that we generate new value jump.

-@i = global i32 0, align 4
-@j = global i32 10, align 4
-
-define i32 @foo(i32 %a) nounwind {
-entry:
 ; CHECK: if (cmp.eq(r{{[0-9]+}}.new,#0)) jump{{.}}
-  %addr1 = alloca i32, align 4
-  %addr2 = alloca i32, align 4
-  %0 = load i32, i32* @i, align 4
-  store i32 %0, i32* %addr1, align 4
-  call void @bar(i32 1, i32 2)
-  %1 = load i32, i32* @j, align 4
-  %tobool = icmp ne i32 %1, 0
-  br i1 %tobool, label %if.then, label %if.else

-if.then:
-  call void @baz(i32 1, i32 2)
-  br label %if.end
+@g0 = global i32 0, align 4
+@g1 = global i32 10, align 4

-if.else:
-  call void @guy(i32 10, i32 20)
-  br label %if.end
+define i32 @f0(i32 %a0) #0 {
+b0:
+  %v0 = alloca i32, align 4
+  %v1 = alloca i32, align 4
+  %v2 = load i32, i32* @g0, align 4
+  store i32 %v2, i32* %v0, align 4
+  call void @f2(i32 1, i32 2)
+  %v3 = load i32, i32* @g1, align 4
+  %v4 = icmp ne i32 %v3, 0
+  br i1 %v4, label %b1, label %b2

-if.end:
+b1:                                               ; preds = %b0
+  call void @f3(i32 1, i32 2)
+  br label %b3
+
+b2:                                               ; preds = %b0
+  call void @f1(i32 10, i32 20)
+  br label %b3
+
+b3:                                               ; preds = %b2, %b1
  ret i32 0
 }

-declare void @guy(i32, i32)
-declare void @bar(i32, i32)
-declare void @baz(i32, i32)
+declare void @f1(i32, i32) #0
+declare void @f2(i32, i32) #0
+declare void @f3(i32, i32) #0
+
+attributes #0 = { nounwind "target-cpu"="hexagonv5" }
--- a/llvm/test/CodeGen/Hexagon/packetize_cond_inst.ll
+++ b/llvm/test/CodeGen/Hexagon/packetize_cond_inst.ll
@ -1,10 +1,8 @@
-; RUN: llc -mcpu=hexagonv4 -tail-dup-size=1 < %s | FileCheck %s
+; RUN: llc -march=hexagon -tail-dup-size=1 < %s | FileCheck %s

-target datalayout = "e-p:32:32:32-i64:64:64-i32:32:32-i16:16:16-i1:32:32-f64:64:64-f32:32:32-v64:64:64-v32:32:32-a0:0-n16:32"
 target triple = "hexagon-unknown--elf"

 ; Make sure we put the two conditionally executed adds in a packet.
-; ifcnv_add:
 ;     {
 ;       p0 = cmp.gt(r2, r1)
 ;       if (!p0.new) r0 = add(r2, r1)
@ -13,20 +11,23 @@ target triple = "hexagon-unknown--elf"
 ; CHECK: cmp
 ; CHECK-NEXT: add
 ; CHECK-NEXT: add
-define i32 @ifcnv_add(i32, i32, i32) nounwind readnone {
-  %4 = icmp sgt i32 %2, %1
-  br i1 %4, label %5, label %7
+define i32 @f0(i32 %a0, i32 %a1, i32 %a2) #0 {
+b0:
+  %v0 = icmp sgt i32 %a2, %a1
+  br i1 %v0, label %b1, label %b2

-; <label>:5                                       ; preds = %3
-  %6 = add nsw i32 %0, 10
-  br label %9
+b1:                                               ; preds = %b0
+  %v1 = add nsw i32 %a0, 10
+  br label %b3

-; <label>:7                                       ; preds = %3
-  %8 = add nsw i32 %2, %1
-  br label %9
+b2:                                               ; preds = %b0
+  %v2 = add nsw i32 %a2, %a1
+  br label %b3

-; <label>:9                                       ; preds = %7, %5
-  %10 = phi i32 [ %6, %5 ], [ %8, %7 ]
-  %11 = add nsw i32 %10, 1
-  ret i32 %11
+b3:                                               ; preds = %b2, %b1
+  %v3 = phi i32 [ %v1, %b1 ], [ %v2, %b2 ]
+  %v4 = add nsw i32 %v3, 1
+  ret i32 %v4
 }
+
+attributes #0 = { nounwind readnone "target-cpu"="hexagonv5" }
--- a/llvm/test/CodeGen/Hexagon/postinc-load.ll
+++ b/llvm/test/CodeGen/Hexagon/postinc-load.ll
@ -1,29 +1,30 @@
-; RUN: llc -march=hexagon -mcpu=hexagonv4 < %s | FileCheck %s
+; RUN: llc -march=hexagon < %s | FileCheck %s

 ; Check that post-increment load instructions are being generated.
 ; CHECK: r{{[0-9]+}} = memw(r{{[0-9]+}}++#4)

-define i32 @sum(i32* nocapture %a, i16* nocapture %b, i32 %n) nounwind {
-entry:
-  br label %for.body
+define i32 @f0(i32* nocapture %a0, i16* nocapture %a1, i32 %a2) #0 {
+b0:
+  br label %b1

-for.body:
-  %lsr.iv = phi i32 [ %lsr.iv.next, %for.body ], [ 10, %entry ]
-  %arrayidx.phi = phi i32* [ %a, %entry ], [ %arrayidx.inc, %for.body ]
-  %arrayidx1.phi = phi i16* [ %b, %entry ], [ %arrayidx1.inc, %for.body ]
-  %sum.03 = phi i32 [ 0, %entry ], [ %add2, %for.body ]
-  %0 = load i32, i32* %arrayidx.phi, align 4
-  %1 = load i16, i16* %arrayidx1.phi, align 2
-  %conv = sext i16 %1 to i32
-  %add = add i32 %0, %sum.03
-  %add2 = add i32 %add, %conv
-  %arrayidx.inc = getelementptr i32, i32* %arrayidx.phi, i32 1
-  %arrayidx1.inc = getelementptr i16, i16* %arrayidx1.phi, i32 1
-  %lsr.iv.next = add i32 %lsr.iv, -1
-  %exitcond = icmp eq i32 %lsr.iv.next, 0
-  br i1 %exitcond, label %for.end, label %for.body
+b1:                                               ; preds = %b1, %b0
+  %v0 = phi i32 [ %v11, %b1 ], [ 10, %b0 ]
+  %v1 = phi i32* [ %a0, %b0 ], [ %v9, %b1 ]
+  %v2 = phi i16* [ %a1, %b0 ], [ %v10, %b1 ]
+  %v3 = phi i32 [ 0, %b0 ], [ %v8, %b1 ]
+  %v4 = load i32, i32* %v1, align 4
+  %v5 = load i16, i16* %v2, align 2
+  %v6 = sext i16 %v5 to i32
+  %v7 = add i32 %v4, %v3
+  %v8 = add i32 %v7, %v6
+  %v9 = getelementptr i32, i32* %v1, i32 1
+  %v10 = getelementptr i16, i16* %v2, i32 1
+  %v11 = add i32 %v0, -1
+  %v12 = icmp eq i32 %v11, 0
+  br i1 %v12, label %b2, label %b1

-for.end:
-  ret i32 %add2
+b2:                                               ; preds = %b1
+  ret i32 %v8
 }

+attributes #0 = { nounwind "target-cpu"="hexagonv5" }
--- a/llvm/test/CodeGen/Hexagon/postinc-store.ll
+++ b/llvm/test/CodeGen/Hexagon/postinc-store.ll
@ -1,29 +1,30 @@
-; RUN: llc -march=hexagon -mcpu=hexagonv4 < %s | FileCheck %s
+; RUN: llc -march=hexagon < %s | FileCheck %s

 ; Check that post-increment store instructions are being generated.
 ; CHECK: memw(r{{[0-9]+}}++#4) = r{{[0-9]+}}

-define i32 @sum(i32* nocapture %a, i16* nocapture %b, i32 %n) nounwind {
-entry:
-  br label %for.body
+define i32 @f0(i32* nocapture %a0, i16* nocapture %a1, i32 %a2) #0 {
+b0:
+  br label %b1

-for.body:                                         ; preds = %for.body, %entry
-  %lsr.iv = phi i32 [ %lsr.iv.next, %for.body ], [ 10, %entry ]
-  %arrayidx.phi = phi i32* [ %a, %entry ], [ %arrayidx.inc, %for.body ]
-  %arrayidx1.phi = phi i16* [ %b, %entry ], [ %arrayidx1.inc, %for.body ]
-  %0 = load i32, i32* %arrayidx.phi, align 4
-  %1 = load i16, i16* %arrayidx1.phi, align 2
-  %conv = sext i16 %1 to i32
-  %factor = mul i32 %0, 2
-  %add3 = add i32 %factor, %conv
-  store i32 %add3, i32* %arrayidx.phi, align 4
+b1:                                               ; preds = %b1, %b0
+  %v0 = phi i32 [ %v10, %b1 ], [ 10, %b0 ]
+  %v1 = phi i32* [ %a0, %b0 ], [ %v8, %b1 ]
+  %v2 = phi i16* [ %a1, %b0 ], [ %v9, %b1 ]
+  %v3 = load i32, i32* %v1, align 4
+  %v4 = load i16, i16* %v2, align 2
+  %v5 = sext i16 %v4 to i32
+  %v6 = mul i32 %v3, 2
+  %v7 = add i32 %v6, %v5
+  store i32 %v7, i32* %v1, align 4
+  %v8 = getelementptr i32, i32* %v1, i32 1
+  %v9 = getelementptr i16, i16* %v2, i32 1
+  %v10 = add i32 %v0, -1
+  %v11 = icmp eq i32 %v10, 0
+  br i1 %v11, label %b2, label %b1

-  %arrayidx.inc = getelementptr i32, i32* %arrayidx.phi, i32 1
-  %arrayidx1.inc = getelementptr i16, i16* %arrayidx1.phi, i32 1
-  %lsr.iv.next = add i32 %lsr.iv, -1
-  %exitcond = icmp eq i32 %lsr.iv.next, 0
-  br i1 %exitcond, label %for.end, label %for.body
-
-for.end:                                          ; preds = %for.body
+b2:                                               ; preds = %b1
  ret i32 0
 }
+
+attributes #0 = { nounwind "target-cpu"="hexagonv5" }
--- a/llvm/test/CodeGen/Hexagon/pred-gp.ll
+++ b/llvm/test/CodeGen/Hexagon/pred-gp.ll
@ -1,28 +1,30 @@
-; RUN: llc -march=hexagon -mcpu=hexagonv4 < %s | FileCheck %s
+; RUN: llc -march=hexagon < %s | FileCheck %s
 ; Check that we are able to predicate instructions with gp-relative
 ; addressing mode.

-@d = external global i32
-@c = common global i32 0, align 4
+; CHECK: if ({{!?}}p{{[0-3]+}}{{(.new)?}}) r{{[0-9]+}} = memw(##g{{[01]}})
+; CHECK: if ({{!?}}p{{[0-3]+}}) r{{[0-9]+}} = memw(##g{{[01]}})

-; Function Attrs: nounwind
-define i32 @test2(i8 zeroext %a, i8 zeroext %b) #0 {
-; CHECK: if ({{!?}}p{{[0-3]+}}{{(.new)?}}) r{{[0-9]+}} = memw(##{{[cd]}})
-; CHECK: if ({{!?}}p{{[0-3]+}}) r{{[0-9]+}} = memw(##{{[cd]}})
-entry:
-  %cmp = icmp eq i8 %a, %b
-  br i1 %cmp, label %if.then, label %entry.if.end_crit_edge
+@g0 = external global i32
+@g1 = common global i32 0, align 4

-entry.if.end_crit_edge:
-  %.pre = load i32, i32* @c, align 4
-  br label %if.end
+define i32 @f0(i8 zeroext %a0, i8 zeroext %a1) #0 {
+b0:
+  %v0 = icmp eq i8 %a0, %a1
+  br i1 %v0, label %b2, label %b1

-if.then:
-  %0 = load i32, i32* @d, align 4
-  store i32 %0, i32* @c, align 4
-  br label %if.end
+b1:                                               ; preds = %b0
+  %v1 = load i32, i32* @g1, align 4
+  br label %b3

-if.end:
-  %1 = phi i32 [ %.pre, %entry.if.end_crit_edge ], [ %0, %if.then ]
-  ret i32 %1
+b2:                                               ; preds = %b0
+  %v2 = load i32, i32* @g0, align 4
+  store i32 %v2, i32* @g1, align 4
+  br label %b3
+
+b3:                                               ; preds = %b2, %b1
+  %v3 = phi i32 [ %v1, %b1 ], [ %v2, %b2 ]
+  ret i32 %v3
 }
+
+attributes #0 = { nounwind "target-cpu"="hexagonv5" }
--- a/llvm/test/CodeGen/Hexagon/pred-instrs.ll
+++ b/llvm/test/CodeGen/Hexagon/pred-instrs.ll
@ -1,30 +1,32 @@
-; RUN: llc -march=hexagon -mcpu=hexagonv4 < %s | FileCheck %s
+; RUN: llc -march=hexagon < %s | FileCheck %s
 ; Check that we are able to predicate instructions.

 ; CHECK: if ({{!?}}p{{[0-3]}}{{(.new)?}}) r{{[0-9]+}} = {{and|aslh}}
 ; CHECK: if ({{!?}}p{{[0-3]}}{{(.new)?}}) r{{[0-9]+}} = {{and|aslh}}
-@a = external global i32
-@d = external global i32

-; Function Attrs: nounwind
-define i32 @test1(i8 zeroext %la, i8 zeroext %lb) {
-entry:
-  %cmp = icmp eq i8 %la, %lb
-  br i1 %cmp, label %if.then, label %if.else
+@g0 = external global i32
+@g1 = external global i32

-if.then:                                          ; preds = %entry
-  %conv1 = zext i8 %la to i32
-  %shl = shl nuw nsw i32 %conv1, 16
-  br label %if.end
+define i32 @f0(i8 zeroext %a0, i8 zeroext %a1) #0 {
+b0:
+  %v0 = icmp eq i8 %a0, %a1
+  br i1 %v0, label %b1, label %b2

-if.else:                                          ; preds = %entry
-  %and8 = and i8 %lb, %la
-  %and = zext i8 %and8 to i32
-  br label %if.end
+b1:                                               ; preds = %b0
+  %v1 = zext i8 %a0 to i32
+  %v2 = shl nuw nsw i32 %v1, 16
+  br label %b3

-if.end:                                           ; preds = %if.else, %if.then
-  %storemerge = phi i32 [ %and, %if.else ], [ %shl, %if.then ]
-  store i32 %storemerge, i32* @a, align 4
-  %0 = load i32, i32* @d, align 4
-  ret i32 %0
+b2:                                               ; preds = %b0
+  %v3 = and i8 %a1, %a0
+  %v4 = zext i8 %v3 to i32
+  br label %b3
+
+b3:                                               ; preds = %b2, %b1
+  %v5 = phi i32 [ %v4, %b2 ], [ %v2, %b1 ]
+  store i32 %v5, i32* @g0, align 4
+  %v6 = load i32, i32* @g1, align 4
+  ret i32 %v6
 }
+
+attributes #0 = { nounwind "target-cpu"="hexagonv5" }
--- a/llvm/test/CodeGen/Hexagon/predicate-copy.ll
+++ b/llvm/test/CodeGen/Hexagon/predicate-copy.ll
@ -1,8 +1,10 @@
-; RUN: llc -march=hexagon -mcpu=hexagonv4 -O3 < %s | FileCheck %s
+; RUN: llc -march=hexagon -O3 < %s | FileCheck %s

 ; CHECK: r{{[0-9]+}} = p{{[0-9]+}}
-define i1 @foo() {
-entry:
+
+define i1 @f0() #0 {
+b0:
  ret i1 false
 }

+attributes #0 = { nounwind "target-cpu"="hexagonv5" }
--- a/llvm/test/CodeGen/Hexagon/remove_lsr.ll
+++ b/llvm/test/CodeGen/Hexagon/remove_lsr.ll
@ -1,6 +1,6 @@
 ; Test fix for PR-13709.
-; RUN: llc -march=hexagon -mcpu=hexagonv4 < %s | FileCheck %s
-; CHECK: foo
+; RUN: llc -march=hexagon < %s | FileCheck %s
+; CHECK: f0
 ; CHECK-NOT: lsr(r{{[0-9]+}}:{{[0-9]+}}, #32)
 ; CHECK-NOT: lsr(r{{[0-9]+}}:{{[0-9]+}}, #32)

@ -13,64 +13,64 @@
 ; This makes the lsr instruction dead and it gets removed subsequently
 ; by a dead code removal pass.

-%union.vect64 = type { i64 }
-%union.vect32 = type { i32 }

-define void @foo(%union.vect64* nocapture %sss_extracted_bit_rx_data_ptr,
- %union.vect32* nocapture %s_even, %union.vect32* nocapture %s_odd,
- i8* nocapture %scr_s_even_code_ptr, i8* nocapture %scr_s_odd_code_ptr)
- nounwind {
-entry:
-  %scevgep = getelementptr %union.vect64, %union.vect64* %sss_extracted_bit_rx_data_ptr, i32 1
-  %scevgep28 = getelementptr %union.vect32, %union.vect32* %s_odd, i32 1
-  %scevgep32 = getelementptr %union.vect32, %union.vect32* %s_even, i32 1
-  %scevgep36 = getelementptr i8, i8* %scr_s_odd_code_ptr, i32 1
-  %scevgep39 = getelementptr i8, i8* %scr_s_even_code_ptr, i32 1
-  br label %for.body
+%s.0 = type { i64 }
+%s.1 = type { i32 }

-for.body:                                         ; preds = %for.body, %entry
-  %lsr.iv42 = phi i32 [ %lsr.iv.next, %for.body ], [ 2, %entry ]
-  %lsr.iv40 = phi i8* [ %scevgep41, %for.body ], [ %scevgep39, %entry ]
-  %lsr.iv37 = phi i8* [ %scevgep38, %for.body ], [ %scevgep36, %entry ]
-  %lsr.iv33 = phi %union.vect32* [ %scevgep34, %for.body ], [ %scevgep32, %entry ]
-  %lsr.iv29 = phi %union.vect32* [ %scevgep30, %for.body ], [ %scevgep28, %entry ]
-  %lsr.iv = phi %union.vect64* [ %scevgep26, %for.body ], [ %scevgep, %entry ]
-  %predicate_1.023 = phi i8 [ undef, %entry ], [ %10, %for.body ]
-  %predicate.022 = phi i8 [ undef, %entry ], [ %9, %for.body ]
-  %val.021 = phi i64 [ undef, %entry ], [ %srcval, %for.body ]
-  %lsr.iv3335 = bitcast %union.vect32* %lsr.iv33 to i32*
-  %lsr.iv2931 = bitcast %union.vect32* %lsr.iv29 to i32*
-  %lsr.iv27 = bitcast %union.vect64* %lsr.iv to i64*
-  %0 = tail call i64 @llvm.hexagon.A2.vsubhs(i64 0, i64 %val.021)
-  %conv3 = sext i8 %predicate.022 to i32
-  %1 = trunc i64 %val.021 to i32
-  %2 = trunc i64 %0 to i32
-  %3 = tail call i32 @llvm.hexagon.C2.mux(i32 %conv3, i32 %1, i32 %2)
-  store i32 %3, i32* %lsr.iv3335, align 4
-  %conv8 = sext i8 %predicate_1.023 to i32
-  %4 = lshr i64 %val.021, 32
-  %5 = trunc i64 %4 to i32
-  %6 = lshr i64 %0, 32
-  %7 = trunc i64 %6 to i32
-  %8 = tail call i32 @llvm.hexagon.C2.mux(i32 %conv8, i32 %5, i32 %7)
-  store i32 %8, i32* %lsr.iv2931, align 4
-  %srcval = load i64, i64* %lsr.iv27, align 8
-  %9 = load i8, i8* %lsr.iv40, align 1
-  %10 = load i8, i8* %lsr.iv37, align 1
-  %lftr.wideiv = trunc i32 %lsr.iv42 to i8
-  %exitcond = icmp eq i8 %lftr.wideiv, 32
-  %scevgep26 = getelementptr %union.vect64, %union.vect64* %lsr.iv, i32 1
-  %scevgep30 = getelementptr %union.vect32, %union.vect32* %lsr.iv29, i32 1
-  %scevgep34 = getelementptr %union.vect32, %union.vect32* %lsr.iv33, i32 1
-  %scevgep38 = getelementptr i8, i8* %lsr.iv37, i32 1
-  %scevgep41 = getelementptr i8, i8* %lsr.iv40, i32 1
-  %lsr.iv.next = add i32 %lsr.iv42, 1
-  br i1 %exitcond, label %for.end, label %for.body
+define void @f0(%s.0* nocapture %a0, %s.1* nocapture %a1, %s.1* nocapture %a2, i8* nocapture %a3, i8* nocapture %a4) #0 {
+b0:
+  %v0 = getelementptr %s.0, %s.0* %a0, i32 1
+  %v1 = getelementptr %s.1, %s.1* %a2, i32 1
+  %v2 = getelementptr %s.1, %s.1* %a1, i32 1
+  %v3 = getelementptr i8, i8* %a4, i32 1
+  %v4 = getelementptr i8, i8* %a3, i32 1
+  br label %b1

-for.end:                                          ; preds = %for.body
+b1:                                               ; preds = %b1, %b0
+  %v5 = phi i32 [ %v38, %b1 ], [ 2, %b0 ]
+  %v6 = phi i8* [ %v37, %b1 ], [ %v4, %b0 ]
+  %v7 = phi i8* [ %v36, %b1 ], [ %v3, %b0 ]
+  %v8 = phi %s.1* [ %v35, %b1 ], [ %v2, %b0 ]
+  %v9 = phi %s.1* [ %v34, %b1 ], [ %v1, %b0 ]
+  %v10 = phi %s.0* [ %v33, %b1 ], [ %v0, %b0 ]
+  %v11 = phi i8 [ undef, %b0 ], [ %v30, %b1 ]
+  %v12 = phi i8 [ undef, %b0 ], [ %v29, %b1 ]
+  %v13 = phi i64 [ undef, %b0 ], [ %v28, %b1 ]
+  %v14 = bitcast %s.1* %v8 to i32*
+  %v15 = bitcast %s.1* %v9 to i32*
+  %v16 = bitcast %s.0* %v10 to i64*
+  %v17 = tail call i64 @llvm.hexagon.A2.vsubhs(i64 0, i64 %v13)
+  %v18 = sext i8 %v12 to i32
+  %v19 = trunc i64 %v13 to i32
+  %v20 = trunc i64 %v17 to i32
+  %v21 = tail call i32 @llvm.hexagon.C2.mux(i32 %v18, i32 %v19, i32 %v20)
+  store i32 %v21, i32* %v14, align 4
+  %v22 = sext i8 %v11 to i32
+  %v23 = lshr i64 %v13, 32
+  %v24 = trunc i64 %v23 to i32
+  %v25 = lshr i64 %v17, 32
+  %v26 = trunc i64 %v25 to i32
+  %v27 = tail call i32 @llvm.hexagon.C2.mux(i32 %v22, i32 %v24, i32 %v26)
+  store i32 %v27, i32* %v15, align 4
+  %v28 = load i64, i64* %v16, align 8
+  %v29 = load i8, i8* %v6, align 1
+  %v30 = load i8, i8* %v7, align 1
+  %v31 = trunc i32 %v5 to i8
+  %v32 = icmp eq i8 %v31, 32
+  %v33 = getelementptr %s.0, %s.0* %v10, i32 1
+  %v34 = getelementptr %s.1, %s.1* %v9, i32 1
+  %v35 = getelementptr %s.1, %s.1* %v8, i32 1
+  %v36 = getelementptr i8, i8* %v7, i32 1
+  %v37 = getelementptr i8, i8* %v6, i32 1
+  %v38 = add i32 %v5, 1
+  br i1 %v32, label %b2, label %b1
+
+b2:                                               ; preds = %b1
  ret void
 }

-declare i64 @llvm.hexagon.A2.vsubhs(i64, i64) nounwind readnone
+declare i64 @llvm.hexagon.A2.vsubhs(i64, i64) #1
+declare i32 @llvm.hexagon.C2.mux(i32, i32, i32) #1

-declare i32 @llvm.hexagon.C2.mux(i32, i32, i32) nounwind readnone
+attributes #0 = { nounwind "target-cpu"="hexagonv5" }
+attributes #1 = { nounwind readnone "target-cpu"="hexagonv5" }
--- a/llvm/test/CodeGen/Hexagon/simpletailcall.ll
+++ b/llvm/test/CodeGen/Hexagon/simpletailcall.ll
@ -1,14 +1,16 @@
-; RUN: llc -march=hexagon -mcpu=hexagonv4 < %s | FileCheck %s
-; CHECK: foo_empty
+; RUN: llc -march=hexagon < %s | FileCheck %s
+; CHECK: f0
 ; CHECK-NOT: allocframe
 ; CHECK-NOT: memd(r29
-; CHECK: jump bar_empty
+; CHECK: jump f1

-define void @foo_empty(i32 %h) nounwind {
-entry:
-  %add = add nsw i32 %h, 3
-  %call = tail call i32 bitcast (i32 (...)* @bar_empty to i32 (i32)*)(i32 %add) nounwind
+define void @f0(i32 %a0) #0 {
+b0:
+  %v0 = add nsw i32 %a0, 3
+  %v1 = tail call i32 bitcast (i32 (...)* @f1 to i32 (i32)*)(i32 %v0) #0
  ret void
 }

-declare i32 @bar_empty(...)
+declare i32 @f1(...) #0
+
+attributes #0 = { nounwind "target-cpu"="hexagonv5" }
--- a/llvm/test/CodeGen/Hexagon/union-1.ll
+++ b/llvm/test/CodeGen/Hexagon/union-1.ll
@ -1,19 +1,21 @@
-; RUN: llc -march=hexagon -mcpu=hexagonv4 < %s | FileCheck %s
-; CHECK: word
+; RUN: llc -march=hexagon < %s | FileCheck %s
+; CHECK: f0
 ; CHECK-NOT: combine(#0
-; CHECK: jump bar
+; CHECK: jump f1

-define void @word(i32* nocapture %a) nounwind {
-entry:
-  %0 = load i32, i32* %a, align 4
-  %1 = zext i32 %0 to i64
-  %add.ptr = getelementptr inbounds i32, i32* %a, i32 1
-  %2 = load i32, i32* %add.ptr, align 4
-  %3 = zext i32 %2 to i64
-  %4 = shl nuw i64 %3, 32
-  %ins = or i64 %4, %1
-  tail call void @bar(i64 %ins) nounwind
+define void @f0(i32* nocapture %a0) #0 {
+b0:
+  %v0 = load i32, i32* %a0, align 4
+  %v1 = zext i32 %v0 to i64
+  %v2 = getelementptr inbounds i32, i32* %a0, i32 1
+  %v3 = load i32, i32* %v2, align 4
+  %v4 = zext i32 %v3 to i64
+  %v5 = shl nuw i64 %v4, 32
+  %v6 = or i64 %v5, %v1
+  tail call void @f1(i64 %v6) #0
  ret void
 }

-declare void @bar(i64)
+declare void @f1(i64) #0
+
+attributes #0 = { nounwind "target-cpu"="hexagonv5" }
--- a/llvm/test/CodeGen/Hexagon/vaddh.ll
+++ b/llvm/test/CodeGen/Hexagon/vaddh.ll
@ -1,16 +1,19 @@
-; RUN: llc -march=hexagon -mcpu=hexagonv4 < %s | FileCheck %s
+; RUN: llc -march=hexagon < %s | FileCheck %s
 ; CHECK: vaddh(r{{[0-9]+}},r{{[0-9]+}})

-@j = external global i32
-@k = external global i32
+@g0 = external global i32
+@g1 = external global i32

-define void @foo() nounwind {
-entry:
-  %0 = load i32, i32* @j, align 4
-  %1 = load i32, i32* @k, align 4
-  %2 = call i32 @llvm.hexagon.A2.svaddh(i32 %0, i32 %1)
-  store i32 %2, i32* @k, align 4
+define void @f0() #0 {
+b0:
+  %v0 = load i32, i32* @g0, align 4
+  %v1 = load i32, i32* @g1, align 4
+  %v2 = call i32 @llvm.hexagon.A2.svaddh(i32 %v0, i32 %v1)
+  store i32 %v2, i32* @g1, align 4
  ret void
 }

-declare i32 @llvm.hexagon.A2.svaddh(i32, i32) nounwind readnone
+declare i32 @llvm.hexagon.A2.svaddh(i32, i32) #1
+
+attributes #0 = { nounwind "target-cpu"="hexagonv5" }
+attributes #1 = { nounwind readnone "target-cpu"="hexagonv5" }
--- a/llvm/test/CodeGen/Hexagon/validate-offset.ll
+++ b/llvm/test/CodeGen/Hexagon/validate-offset.ll
@ -1,36 +1,38 @@
-; RUN: llc -march=hexagon -mcpu=hexagonv4 < %s -O0
+; RUN: llc -march=hexagon -O0 < %s

 ; This is a regression test which makes sure that the offset check
 ; is available for STRiw_indexed instruction. This is required
 ; by 'Hexagon Expand Predicate Spill Code' pass.

-define i32 @f(i32 %a, i32 %b) nounwind {
-entry:
-  %retval = alloca i32, align 4
-  %a.addr = alloca i32, align 4
-  %b.addr = alloca i32, align 4
-  store i32 %a, i32* %a.addr, align 4
-  store i32 %b, i32* %b.addr, align 4
-  %0 = load i32, i32* %a.addr, align 4
-  %1 = load i32, i32* %b.addr, align 4
-  %cmp = icmp sgt i32 %0, %1
-  br i1 %cmp, label %if.then, label %if.else
+define i32 @f0(i32 %a0, i32 %a1) #0 {
+b0:
+  %v0 = alloca i32, align 4
+  %v1 = alloca i32, align 4
+  %v2 = alloca i32, align 4
+  store i32 %a0, i32* %v1, align 4
+  store i32 %a1, i32* %v2, align 4
+  %v3 = load i32, i32* %v1, align 4
+  %v4 = load i32, i32* %v2, align 4
+  %v5 = icmp sgt i32 %v3, %v4
+  br i1 %v5, label %b1, label %b2

-if.then:
-  %2 = load i32, i32* %a.addr, align 4
-  %3 = load i32, i32* %b.addr, align 4
-  %add = add nsw i32 %2, %3
-  store i32 %add, i32* %retval
-  br label %return
+b1:                                               ; preds = %b0
+  %v6 = load i32, i32* %v1, align 4
+  %v7 = load i32, i32* %v2, align 4
+  %v8 = add nsw i32 %v6, %v7
+  store i32 %v8, i32* %v0
+  br label %b3

-if.else:
-  %4 = load i32, i32* %a.addr, align 4
-  %5 = load i32, i32* %b.addr, align 4
-  %sub = sub nsw i32 %4, %5
-  store i32 %sub, i32* %retval
-  br label %return
+b2:                                               ; preds = %b0
+  %v9 = load i32, i32* %v1, align 4
+  %v10 = load i32, i32* %v2, align 4
+  %v11 = sub nsw i32 %v9, %v10
+  store i32 %v11, i32* %v0
+  br label %b3

-return:
-  %6 = load i32, i32* %retval
-  ret i32 %6
+b3:                                               ; preds = %b2, %b1
+  %v12 = load i32, i32* %v0
+  ret i32 %v12
 }
+
+attributes #0 = { nounwind "target-cpu"="hexagonv5" }
--- a/llvm/test/MC/Hexagon/elf-flags.s
+++ b/llvm/test/MC/Hexagon/elf-flags.s
@ -1,10 +1,8 @@
-# RUN: llvm-mc -arch=hexagon -mcpu=hexagonv4 --filetype=obj %s -o - | llvm-readobj -file-headers -elf-output-style=GNU | FileCheck --check-prefix=CHECK-V4 %s
 # RUN: llvm-mc -arch=hexagon -mcpu=hexagonv5 --filetype=obj %s -o - | llvm-readobj -file-headers -elf-output-style=GNU | FileCheck --check-prefix=CHECK-V5 %s
 # RUN: llvm-mc -arch=hexagon -mcpu=hexagonv55 --filetype=obj %s -o - | llvm-readobj -file-headers -elf-output-style=GNU | FileCheck --check-prefix=CHECK-V55 %s
 # RUN: llvm-mc -arch=hexagon -mcpu=hexagonv60 --filetype=obj %s -o - | llvm-readobj -file-headers -elf-output-style=GNU | FileCheck --check-prefix=CHECK-V60 %s
 # RUN: llvm-mc -arch=hexagon -mcpu=hexagonv62 --filetype=obj %s -o - | llvm-readobj -file-headers -elf-output-style=GNU | FileCheck --check-prefix=CHECK-V62 %s

-# CHECK-V4: Flags: 0x3
 # CHECK-V5: Flags: 0x4
 # CHECK-V55: Flags: 0x5
 # CHECK-V60: Flags: 0x60