[AArch64][GlobalISel] Port some AArch64 target specific MUL combines from SDAG.

These do things like turn a multiply of a pow-2+1 into a shift and and add, which is a common pattern that pops up, and is universally better than expensive madd instructions with a constant. I've added check lines to an existing codegen test since the code being ported is almost identical, however the mul by negative pow2 constant tests don't generate the same code because we're missing some generic G_MUL combines still. Differential Revision: https://reviews.llvm.org/D91125
2020-11-09 21:55:22 -08:00 · 2020-11-09 21:55:22 -08:00 · 2262393090
parent 881b4d20f6
commit 2262393090
3 changed files with 587 additions and 106 deletions
--- a/llvm/lib/Target/AArch64/AArch64Combine.td
+++ b/llvm/lib/Target/AArch64/AArch64Combine.td
@ -111,6 +111,14 @@ def extractvecelt_pairwise_add : GICombineRule<
  (apply [{ applyExtractVecEltPairwiseAdd(*${root}, MRI, B, ${matchinfo}); }])
 >;

+def mul_const_matchdata : GIDefMatchData<"std::function<void(MachineIRBuilder&, Register)>">;
+def mul_const : GICombineRule<
+  (defs root:$root, mul_const_matchdata:$matchinfo),
+  (match (wip_match_opcode G_MUL):$root,
+          [{ return matchAArch64MulConstCombine(*${root}, MRI, ${matchinfo}); }]),
+  (apply [{ applyAArch64MulConstCombine(*${root}, MRI, B, ${matchinfo}); }])
+>;
+
 // Post-legalization combines which should happen at all optimization levels.
 // (E.g. ones that facilitate matching for the selector) For example, matching
 // pseudos.
@ -128,6 +136,7 @@ def AArch64PostLegalizerCombinerHelper
                        sext_trunc_sextload,
                        hoist_logic_op_with_same_opcode_hands,
                        redundant_and, xor_of_and_with_same_reg,
-                        extractvecelt_pairwise_add, redundant_or]> {
+                        extractvecelt_pairwise_add, redundant_or,
+                        mul_const]> {
  let DisableRuleOption = "aarch64postlegalizercombiner-disable-rule";
 }
--- a/llvm/lib/Target/AArch64/GISel/AArch64PostLegalizerCombiner.cpp
+++ b/llvm/lib/Target/AArch64/GISel/AArch64PostLegalizerCombiner.cpp
@ -28,6 +28,7 @@
 #include "llvm/CodeGen/GlobalISel/Utils.h"
 #include "llvm/CodeGen/MachineDominators.h"
 #include "llvm/CodeGen/MachineFunctionPass.h"
+#include "llvm/CodeGen/MachineRegisterInfo.h"
 #include "llvm/CodeGen/TargetOpcodes.h"
 #include "llvm/CodeGen/TargetPassConfig.h"
 #include "llvm/Support/Debug.h"
@ -104,6 +105,138 @@ bool applyExtractVecEltPairwiseAdd(
  return true;
 }

+static bool isSignExtended(Register R, MachineRegisterInfo &MRI) {
+  // TODO: check if extended build vector as well.
+  unsigned Opc = MRI.getVRegDef(R)->getOpcode();
+  return Opc == TargetOpcode::G_SEXT || Opc == TargetOpcode::G_SEXT_INREG;
+}
+
+static bool isZeroExtended(Register R, MachineRegisterInfo &MRI) {
+  // TODO: check if extended build vector as well.
+  return MRI.getVRegDef(R)->getOpcode() == TargetOpcode::G_ZEXT;
+}
+
+bool matchAArch64MulConstCombine(
+    MachineInstr &MI, MachineRegisterInfo &MRI,
+    std::function<void(MachineIRBuilder &B, Register DstReg)> &ApplyFn) {
+  assert(MI.getOpcode() == TargetOpcode::G_MUL);
+  Register LHS = MI.getOperand(1).getReg();
+  Register RHS = MI.getOperand(2).getReg();
+  Register Dst = MI.getOperand(0).getReg();
+  const LLT Ty = MRI.getType(LHS);
+
+  // The below optimizations require a constant RHS.
+  auto Const = getConstantVRegValWithLookThrough(RHS, MRI);
+  if (!Const)
+    return false;
+
+  const APInt &ConstValue = APInt(Ty.getSizeInBits(), Const->Value, true);
+  // The following code is ported from AArch64ISelLowering.
+  // Multiplication of a power of two plus/minus one can be done more
+  // cheaply as as shift+add/sub. For now, this is true unilaterally. If
+  // future CPUs have a cheaper MADD instruction, this may need to be
+  // gated on a subtarget feature. For Cyclone, 32-bit MADD is 4 cycles and
+  // 64-bit is 5 cycles, so this is always a win.
+  // More aggressively, some multiplications N0 * C can be lowered to
+  // shift+add+shift if the constant C = A * B where A = 2^N + 1 and B = 2^M,
+  // e.g. 6=3*2=(2+1)*2.
+  // TODO: consider lowering more cases, e.g. C = 14, -6, -14 or even 45
+  // which equals to (1+2)*16-(1+2).
+  // TrailingZeroes is used to test if the mul can be lowered to
+  // shift+add+shift.
+  unsigned TrailingZeroes = ConstValue.countTrailingZeros();
+  if (TrailingZeroes) {
+    // Conservatively do not lower to shift+add+shift if the mul might be
+    // folded into smul or umul.
+    if (MRI.hasOneNonDBGUse(LHS) &&
+        (isSignExtended(LHS, MRI) || isZeroExtended(LHS, MRI)))
+      return false;
+    // Conservatively do not lower to shift+add+shift if the mul might be
+    // folded into madd or msub.
+    if (MRI.hasOneNonDBGUse(Dst)) {
+      MachineInstr &UseMI = *MRI.use_instr_begin(Dst);
+      if (UseMI.getOpcode() == TargetOpcode::G_ADD ||
+          UseMI.getOpcode() == TargetOpcode::G_SUB)
+        return false;
+    }
+  }
+  // Use ShiftedConstValue instead of ConstValue to support both shift+add/sub
+  // and shift+add+shift.
+  APInt ShiftedConstValue = ConstValue.ashr(TrailingZeroes);
+
+  unsigned ShiftAmt, AddSubOpc;
+  // Is the shifted value the LHS operand of the add/sub?
+  bool ShiftValUseIsLHS = true;
+  // Do we need to negate the result?
+  bool NegateResult = false;
+
+  if (ConstValue.isNonNegative()) {
+    // (mul x, 2^N + 1) => (add (shl x, N), x)
+    // (mul x, 2^N - 1) => (sub (shl x, N), x)
+    // (mul x, (2^N + 1) * 2^M) => (shl (add (shl x, N), x), M)
+    APInt SCVMinus1 = ShiftedConstValue - 1;
+    APInt CVPlus1 = ConstValue + 1;
+    if (SCVMinus1.isPowerOf2()) {
+      ShiftAmt = SCVMinus1.logBase2();
+      AddSubOpc = TargetOpcode::G_ADD;
+    } else if (CVPlus1.isPowerOf2()) {
+      ShiftAmt = CVPlus1.logBase2();
+      AddSubOpc = TargetOpcode::G_SUB;
+    } else
+      return false;
+  } else {
+    // (mul x, -(2^N - 1)) => (sub x, (shl x, N))
+    // (mul x, -(2^N + 1)) => - (add (shl x, N), x)
+    APInt CVNegPlus1 = -ConstValue + 1;
+    APInt CVNegMinus1 = -ConstValue - 1;
+    if (CVNegPlus1.isPowerOf2()) {
+      ShiftAmt = CVNegPlus1.logBase2();
+      AddSubOpc = TargetOpcode::G_SUB;
+      ShiftValUseIsLHS = false;
+    } else if (CVNegMinus1.isPowerOf2()) {
+      ShiftAmt = CVNegMinus1.logBase2();
+      AddSubOpc = TargetOpcode::G_ADD;
+      NegateResult = true;
+    } else
+      return false;
+  }
+
+  if (NegateResult && TrailingZeroes)
+    return false;
+
+  ApplyFn = [=](MachineIRBuilder &B, Register DstReg) {
+    auto Shift = B.buildConstant(LLT::scalar(64), ShiftAmt);
+    auto ShiftedVal = B.buildShl(Ty, LHS, Shift);
+
+    Register AddSubLHS = ShiftValUseIsLHS ? ShiftedVal.getReg(0) : LHS;
+    Register AddSubRHS = ShiftValUseIsLHS ? LHS : ShiftedVal.getReg(0);
+    auto Res = B.buildInstr(AddSubOpc, {Ty}, {AddSubLHS, AddSubRHS});
+    assert(!(NegateResult && TrailingZeroes) &&
+           "NegateResult and TrailingZeroes cannot both be true for now.");
+    // Negate the result.
+    if (NegateResult) {
+      B.buildSub(DstReg, B.buildConstant(Ty, 0), Res);
+      return;
+    }
+    // Shift the result.
+    if (TrailingZeroes) {
+      B.buildShl(DstReg, Res, B.buildConstant(LLT::scalar(64), TrailingZeroes));
+      return;
+    }
+    B.buildCopy(DstReg, Res.getReg(0));
+  };
+  return true;
+}
+
+bool applyAArch64MulConstCombine(
+    MachineInstr &MI, MachineRegisterInfo &MRI, MachineIRBuilder &B,
+    std::function<void(MachineIRBuilder &B, Register DstReg)> &ApplyFn) {
+  B.setInstrAndDebugLoc(MI);
+  ApplyFn(B, MI.getOperand(0).getReg());
+  MI.eraseFromParent();
+  return true;
+}
+
 #define AARCH64POSTLEGALIZERCOMBINERHELPER_GENCOMBINERHELPER_DEPS
 #include "AArch64GenPostLegalizeGICombiner.inc"
 #undef AARCH64POSTLEGALIZERCOMBINERHELPER_GENCOMBINERHELPER_DEPS
--- a/llvm/test/CodeGen/AArch64/mul_pow2.ll
+++ b/llvm/test/CodeGen/AArch64/mul_pow2.ll
@ -1,4 +1,6 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
 ; RUN: llc < %s -mtriple=aarch64-eabi | FileCheck %s
+; RUN: llc < %s -mtriple=aarch64-eabi -global-isel -global-isel-abort=1 | FileCheck %s --check-prefix=GISEL

 ; Convert mul x, pow2 to shift.
 ; Convert mul x, pow2 +/- 1 to shift + add/sub.
@ -6,32 +8,60 @@
 ; Lowering other positive constants are not supported yet.

 define i32 @test2(i32 %x) {
-; CHECK-LABEL: test2
-; CHECK: lsl w0, w0, #1
+; CHECK-LABEL: test2:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    lsl w0, w0, #1
+; CHECK-NEXT:    ret
+;
+; GISEL-LABEL: test2:
+; GISEL:       // %bb.0:
+; GISEL-NEXT:    lsl w0, w0, #1
+; GISEL-NEXT:    ret

  %mul = shl nsw i32 %x, 1
  ret i32 %mul
 }

 define i32 @test3(i32 %x) {
-; CHECK-LABEL: test3
-; CHECK: add w0, w0, w0, lsl #1
+; CHECK-LABEL: test3:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    add w0, w0, w0, lsl #1
+; CHECK-NEXT:    ret
+;
+; GISEL-LABEL: test3:
+; GISEL:       // %bb.0:
+; GISEL-NEXT:    add w0, w0, w0, lsl #1
+; GISEL-NEXT:    ret

  %mul = mul nsw i32 %x, 3
  ret i32 %mul
 }

 define i32 @test4(i32 %x) {
-; CHECK-LABEL: test4
-; CHECK: lsl w0, w0, #2
+; CHECK-LABEL: test4:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    lsl w0, w0, #2
+; CHECK-NEXT:    ret
+;
+; GISEL-LABEL: test4:
+; GISEL:       // %bb.0:
+; GISEL-NEXT:    lsl w0, w0, #2
+; GISEL-NEXT:    ret

  %mul = shl nsw i32 %x, 2
  ret i32 %mul
 }

 define i32 @test5(i32 %x) {
-; CHECK-LABEL: test5
-; CHECK: add w0, w0, w0, lsl #2
+; CHECK-LABEL: test5:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    add w0, w0, w0, lsl #2
+; CHECK-NEXT:    ret
+;
+; GISEL-LABEL: test5:
+; GISEL:       // %bb.0:
+; GISEL-NEXT:    add w0, w0, w0, lsl #2
+; GISEL-NEXT:    ret


  %mul = mul nsw i32 %x, 5
@ -39,200 +69,386 @@ define i32 @test5(i32 %x) {
 }

 define i32 @test6_32b(i32 %x) {
-; CHECK-LABEL: test6
-; CHECK: add {{w[0-9]+}}, w0, w0, lsl #1
-; CHECK: lsl w0, {{w[0-9]+}}, #1
+; CHECK-LABEL: test6_32b:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    add w8, w0, w0, lsl #1
+; CHECK-NEXT:    lsl w0, w8, #1
+; CHECK-NEXT:    ret
+;
+; GISEL-LABEL: test6_32b:
+; GISEL:       // %bb.0:
+; GISEL-NEXT:    add w8, w0, w0, lsl #1
+; GISEL-NEXT:    lsl w0, w8, #1
+; GISEL-NEXT:    ret

-  %mul = mul nsw i32 %x, 6 
+  %mul = mul nsw i32 %x, 6
  ret i32 %mul
 }

 define i64 @test6_64b(i64 %x) {
-; CHECK-LABEL: test6_64b
-; CHECK: add {{x[0-9]+}}, x0, x0, lsl #1
-; CHECK: lsl x0, {{x[0-9]+}}, #1
+; CHECK-LABEL: test6_64b:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    add x8, x0, x0, lsl #1
+; CHECK-NEXT:    lsl x0, x8, #1
+; CHECK-NEXT:    ret
+;
+; GISEL-LABEL: test6_64b:
+; GISEL:       // %bb.0:
+; GISEL-NEXT:    add x8, x0, x0, lsl #1
+; GISEL-NEXT:    lsl x0, x8, #1
+; GISEL-NEXT:    ret

-  %mul = mul nsw i64 %x, 6 
+  %mul = mul nsw i64 %x, 6
  ret i64 %mul
 }

-; mul that appears together with add, sub, s(z)ext is not supported to be 
+; mul that appears together with add, sub, s(z)ext is not supported to be
 ; converted to the combination of lsl, add/sub yet.
 define i64 @test6_umull(i32 %x) {
-; CHECK-LABEL: test6_umull
-; CHECK: umull x0, w0, {{w[0-9]+}} 
+; CHECK-LABEL: test6_umull:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    mov w8, #6
+; CHECK-NEXT:    umull x0, w0, w8
+; CHECK-NEXT:    ret
+;
+; GISEL-LABEL: test6_umull:
+; GISEL:       // %bb.0:
+; GISEL-NEXT:    mov w8, #6
+; GISEL-NEXT:    umull x0, w0, w8
+; GISEL-NEXT:    ret

  %ext = zext i32 %x to i64
-  %mul = mul nsw i64 %ext, 6 
+  %mul = mul nsw i64 %ext, 6
  ret i64 %mul
 }

 define i64 @test6_smull(i32 %x) {
-; CHECK-LABEL: test6_smull
-; CHECK: smull x0, w0, {{w[0-9]+}} 
+; CHECK-LABEL: test6_smull:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    mov w8, #6
+; CHECK-NEXT:    smull x0, w0, w8
+; CHECK-NEXT:    ret
+;
+; GISEL-LABEL: test6_smull:
+; GISEL:       // %bb.0:
+; GISEL-NEXT:    mov w8, #6
+; GISEL-NEXT:    smull x0, w0, w8
+; GISEL-NEXT:    ret

  %ext = sext i32 %x to i64
-  %mul = mul nsw i64 %ext, 6 
+  %mul = mul nsw i64 %ext, 6
  ret i64 %mul
 }

 define i32 @test6_madd(i32 %x, i32 %y) {
-; CHECK-LABEL: test6_madd
-; CHECK: madd w0, w0, {{w[0-9]+}}, w1 
+; CHECK-LABEL: test6_madd:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    mov w8, #6
+; CHECK-NEXT:    madd w0, w0, w8, w1
+; CHECK-NEXT:    ret
+;
+; GISEL-LABEL: test6_madd:
+; GISEL:       // %bb.0:
+; GISEL-NEXT:    mov w8, #6
+; GISEL-NEXT:    madd w0, w0, w8, w1
+; GISEL-NEXT:    ret

-  %mul = mul nsw i32 %x, 6 
+  %mul = mul nsw i32 %x, 6
  %add = add i32 %mul, %y
  ret i32 %add
 }

 define i32 @test6_msub(i32 %x, i32 %y) {
-; CHECK-LABEL: test6_msub
-; CHECK: msub w0, w0, {{w[0-9]+}}, w1 
+; CHECK-LABEL: test6_msub:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    mov w8, #6
+; CHECK-NEXT:    msub w0, w0, w8, w1
+; CHECK-NEXT:    ret
+;
+; GISEL-LABEL: test6_msub:
+; GISEL:       // %bb.0:
+; GISEL-NEXT:    mov w8, #6
+; GISEL-NEXT:    msub w0, w0, w8, w1
+; GISEL-NEXT:    ret

-  %mul = mul nsw i32 %x, 6 
+  %mul = mul nsw i32 %x, 6
  %sub = sub i32 %y, %mul
  ret i32 %sub
 }

 define i64 @test6_umaddl(i32 %x, i64 %y) {
-; CHECK-LABEL: test6_umaddl
-; CHECK: umaddl x0, w0, {{w[0-9]+}}, x1 
+; CHECK-LABEL: test6_umaddl:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    mov w8, #6
+; CHECK-NEXT:    umaddl x0, w0, w8, x1
+; CHECK-NEXT:    ret
+;
+; GISEL-LABEL: test6_umaddl:
+; GISEL:       // %bb.0:
+; GISEL-NEXT:    mov w8, #6
+; GISEL-NEXT:    umaddl x0, w0, w8, x1
+; GISEL-NEXT:    ret

  %ext = zext i32 %x to i64
-  %mul = mul nsw i64 %ext, 6 
+  %mul = mul nsw i64 %ext, 6
  %add = add i64 %mul, %y
  ret i64 %add
 }

 define i64 @test6_smaddl(i32 %x, i64 %y) {
-; CHECK-LABEL: test6_smaddl
-; CHECK: smaddl x0, w0, {{w[0-9]+}}, x1
+; CHECK-LABEL: test6_smaddl:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    mov w8, #6
+; CHECK-NEXT:    smaddl x0, w0, w8, x1
+; CHECK-NEXT:    ret
+;
+; GISEL-LABEL: test6_smaddl:
+; GISEL:       // %bb.0:
+; GISEL-NEXT:    mov w8, #6
+; GISEL-NEXT:    smaddl x0, w0, w8, x1
+; GISEL-NEXT:    ret

  %ext = sext i32 %x to i64
-  %mul = mul nsw i64 %ext, 6 
+  %mul = mul nsw i64 %ext, 6
  %add = add i64 %mul, %y
  ret i64 %add
 }

 define i64 @test6_umsubl(i32 %x, i64 %y) {
-; CHECK-LABEL: test6_umsubl
-; CHECK: umsubl x0, w0, {{w[0-9]+}}, x1
+; CHECK-LABEL: test6_umsubl:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    mov w8, #6
+; CHECK-NEXT:    umsubl x0, w0, w8, x1
+; CHECK-NEXT:    ret
+;
+; GISEL-LABEL: test6_umsubl:
+; GISEL:       // %bb.0:
+; GISEL-NEXT:    mov w8, #6
+; GISEL-NEXT:    umsubl x0, w0, w8, x1
+; GISEL-NEXT:    ret

  %ext = zext i32 %x to i64
-  %mul = mul nsw i64 %ext, 6 
+  %mul = mul nsw i64 %ext, 6
  %sub = sub i64 %y, %mul
  ret i64 %sub
 }

 define i64 @test6_smsubl(i32 %x, i64 %y) {
-; CHECK-LABEL: test6_smsubl
-; CHECK: smsubl x0, w0, {{w[0-9]+}}, x1 
+; CHECK-LABEL: test6_smsubl:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    mov w8, #6
+; CHECK-NEXT:    smsubl x0, w0, w8, x1
+; CHECK-NEXT:    ret
+;
+; GISEL-LABEL: test6_smsubl:
+; GISEL:       // %bb.0:
+; GISEL-NEXT:    mov w8, #6
+; GISEL-NEXT:    smsubl x0, w0, w8, x1
+; GISEL-NEXT:    ret

  %ext = sext i32 %x to i64
-  %mul = mul nsw i64 %ext, 6 
+  %mul = mul nsw i64 %ext, 6
  %sub = sub i64 %y, %mul
  ret i64 %sub
 }

 define i64 @test6_umnegl(i32 %x) {
-; CHECK-LABEL: test6_umnegl
-; CHECK: umnegl x0, w0, {{w[0-9]+}} 
+; CHECK-LABEL: test6_umnegl:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    mov w8, #6
+; CHECK-NEXT:    umnegl x0, w0, w8
+; CHECK-NEXT:    ret
+;
+; GISEL-LABEL: test6_umnegl:
+; GISEL:       // %bb.0:
+; GISEL-NEXT:    mov w8, #6
+; GISEL-NEXT:    umnegl x0, w0, w8
+; GISEL-NEXT:    ret

  %ext = zext i32 %x to i64
-  %mul = mul nsw i64 %ext, 6 
+  %mul = mul nsw i64 %ext, 6
  %sub = sub i64 0, %mul
  ret i64 %sub
 }

 define i64 @test6_smnegl(i32 %x) {
-; CHECK-LABEL: test6_smnegl
-; CHECK: smnegl x0, w0, {{w[0-9]+}} 
+; CHECK-LABEL: test6_smnegl:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    mov w8, #6
+; CHECK-NEXT:    smnegl x0, w0, w8
+; CHECK-NEXT:    ret
+;
+; GISEL-LABEL: test6_smnegl:
+; GISEL:       // %bb.0:
+; GISEL-NEXT:    mov w8, #6
+; GISEL-NEXT:    smnegl x0, w0, w8
+; GISEL-NEXT:    ret

  %ext = sext i32 %x to i64
-  %mul = mul nsw i64 %ext, 6 
+  %mul = mul nsw i64 %ext, 6
  %sub = sub i64 0, %mul
  ret i64 %sub
 }

 define i32 @test7(i32 %x) {
-; CHECK-LABEL: test7
-; CHECK: lsl {{w[0-9]+}}, w0, #3
-; CHECK: sub w0, {{w[0-9]+}}, w0
+; CHECK-LABEL: test7:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    lsl w8, w0, #3
+; CHECK-NEXT:    sub w0, w8, w0
+; CHECK-NEXT:    ret
+;
+; GISEL-LABEL: test7:
+; GISEL:       // %bb.0:
+; GISEL-NEXT:    lsl w8, w0, #3
+; GISEL-NEXT:    sub w0, w8, w0
+; GISEL-NEXT:    ret

  %mul = mul nsw i32 %x, 7
  ret i32 %mul
 }

 define i32 @test8(i32 %x) {
-; CHECK-LABEL: test8
-; CHECK: lsl w0, w0, #3
+; CHECK-LABEL: test8:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    lsl w0, w0, #3
+; CHECK-NEXT:    ret
+;
+; GISEL-LABEL: test8:
+; GISEL:       // %bb.0:
+; GISEL-NEXT:    lsl w0, w0, #3
+; GISEL-NEXT:    ret

  %mul = shl nsw i32 %x, 3
  ret i32 %mul
 }

 define i32 @test9(i32 %x) {
-; CHECK-LABEL: test9
-; CHECK: add w0, w0, w0, lsl #3
+; CHECK-LABEL: test9:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    add w0, w0, w0, lsl #3
+; CHECK-NEXT:    ret
+;
+; GISEL-LABEL: test9:
+; GISEL:       // %bb.0:
+; GISEL-NEXT:    add w0, w0, w0, lsl #3
+; GISEL-NEXT:    ret

-  %mul = mul nsw i32 %x, 9 
+  %mul = mul nsw i32 %x, 9
  ret i32 %mul
 }

 define i32 @test10(i32 %x) {
-; CHECK-LABEL: test10
-; CHECK: add {{w[0-9]+}}, w0, w0, lsl #2
-; CHECK: lsl w0, {{w[0-9]+}}, #1
+; CHECK-LABEL: test10:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    add w8, w0, w0, lsl #2
+; CHECK-NEXT:    lsl w0, w8, #1
+; CHECK-NEXT:    ret
+;
+; GISEL-LABEL: test10:
+; GISEL:       // %bb.0:
+; GISEL-NEXT:    add w8, w0, w0, lsl #2
+; GISEL-NEXT:    lsl w0, w8, #1
+; GISEL-NEXT:    ret

  %mul = mul nsw i32 %x, 10
  ret i32 %mul
 }

 define i32 @test11(i32 %x) {
-; CHECK-LABEL: test11
-; CHECK: mul w0, w0, {{w[0-9]+}}
+; CHECK-LABEL: test11:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    mov w8, #11
+; CHECK-NEXT:    mul w0, w0, w8
+; CHECK-NEXT:    ret
+;
+; GISEL-LABEL: test11:
+; GISEL:       // %bb.0:
+; GISEL-NEXT:    mov w8, #11
+; GISEL-NEXT:    mul w0, w0, w8
+; GISEL-NEXT:    ret

  %mul = mul nsw i32 %x, 11
  ret i32 %mul
 }

 define i32 @test12(i32 %x) {
-; CHECK-LABEL: test12
-; CHECK: add {{w[0-9]+}}, w0, w0, lsl #1
-; CHECK: lsl w0, {{w[0-9]+}}, #2
+; CHECK-LABEL: test12:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    add w8, w0, w0, lsl #1
+; CHECK-NEXT:    lsl w0, w8, #2
+; CHECK-NEXT:    ret
+;
+; GISEL-LABEL: test12:
+; GISEL:       // %bb.0:
+; GISEL-NEXT:    add w8, w0, w0, lsl #1
+; GISEL-NEXT:    lsl w0, w8, #2
+; GISEL-NEXT:    ret

  %mul = mul nsw i32 %x, 12
  ret i32 %mul
 }

 define i32 @test13(i32 %x) {
-; CHECK-LABEL: test13
-; CHECK: mul w0, w0, {{w[0-9]+}}
+; CHECK-LABEL: test13:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    mov w8, #13
+; CHECK-NEXT:    mul w0, w0, w8
+; CHECK-NEXT:    ret
+;
+; GISEL-LABEL: test13:
+; GISEL:       // %bb.0:
+; GISEL-NEXT:    mov w8, #13
+; GISEL-NEXT:    mul w0, w0, w8
+; GISEL-NEXT:    ret

  %mul = mul nsw i32 %x, 13
  ret i32 %mul
 }

 define i32 @test14(i32 %x) {
-; CHECK-LABEL: test14
-; CHECK: mul w0, w0, {{w[0-9]+}}
+; CHECK-LABEL: test14:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    mov w8, #14
+; CHECK-NEXT:    mul w0, w0, w8
+; CHECK-NEXT:    ret
+;
+; GISEL-LABEL: test14:
+; GISEL:       // %bb.0:
+; GISEL-NEXT:    mov w8, #14
+; GISEL-NEXT:    mul w0, w0, w8
+; GISEL-NEXT:    ret

-  %mul = mul nsw i32 %x, 14 
+  %mul = mul nsw i32 %x, 14
  ret i32 %mul
 }

 define i32 @test15(i32 %x) {
-; CHECK-LABEL: test15
-; CHECK: lsl {{w[0-9]+}}, w0, #4
-; CHECK: sub w0, {{w[0-9]+}}, w0
+; CHECK-LABEL: test15:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    lsl w8, w0, #4
+; CHECK-NEXT:    sub w0, w8, w0
+; CHECK-NEXT:    ret
+;
+; GISEL-LABEL: test15:
+; GISEL:       // %bb.0:
+; GISEL-NEXT:    lsl w8, w0, #4
+; GISEL-NEXT:    sub w0, w8, w0
+; GISEL-NEXT:    ret

  %mul = mul nsw i32 %x, 15
  ret i32 %mul
 }

 define i32 @test16(i32 %x) {
-; CHECK-LABEL: test16
-; CHECK: lsl w0, w0, #4
+; CHECK-LABEL: test16:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    lsl w0, w0, #4
+; CHECK-NEXT:    ret
+;
+; GISEL-LABEL: test16:
+; GISEL:       // %bb.0:
+; GISEL-NEXT:    lsl w0, w0, #4
+; GISEL-NEXT:    ret

  %mul = mul nsw i32 %x, 16
  ret i32 %mul
@ -243,120 +459,243 @@ define i32 @test16(i32 %x) {
 ; Lowering other negative constants are not supported yet.

 define i32 @ntest2(i32 %x) {
-; CHECK-LABEL: ntest2
-; CHECK: neg w0, w0, lsl #1
+; CHECK-LABEL: ntest2:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    neg w0, w0, lsl #1
+; CHECK-NEXT:    ret
+;
+; GISEL-LABEL: ntest2:
+; GISEL:       // %bb.0:
+; GISEL-NEXT:    mov w8, #-2
+; GISEL-NEXT:    mul w0, w0, w8
+; GISEL-NEXT:    ret

  %mul = mul nsw i32 %x, -2
  ret i32 %mul
 }

 define i32 @ntest3(i32 %x) {
-; CHECK-LABEL: ntest3
-; CHECK: sub w0, w0, w0, lsl #2
+; CHECK-LABEL: ntest3:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    sub w0, w0, w0, lsl #2
+; CHECK-NEXT:    ret
+;
+; GISEL-LABEL: ntest3:
+; GISEL:       // %bb.0:
+; GISEL-NEXT:    sub w0, w0, w0, lsl #2
+; GISEL-NEXT:    ret

  %mul = mul nsw i32 %x, -3
  ret i32 %mul
 }

 define i32 @ntest4(i32 %x) {
-; CHECK-LABEL: ntest4
-; CHECK:neg w0, w0, lsl #2
+; CHECK-LABEL: ntest4:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    neg w0, w0, lsl #2
+; CHECK-NEXT:    ret
+;
+; GISEL-LABEL: ntest4:
+; GISEL:       // %bb.0:
+; GISEL-NEXT:    mov w8, #-4
+; GISEL-NEXT:    mul w0, w0, w8
+; GISEL-NEXT:    ret

  %mul = mul nsw i32 %x, -4
  ret i32 %mul
 }

 define i32 @ntest5(i32 %x) {
-; CHECK-LABEL: ntest5
-; CHECK: add {{w[0-9]+}}, w0, w0, lsl #2
-; CHECK: neg w0, {{w[0-9]+}}
+; CHECK-LABEL: ntest5:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    add w8, w0, w0, lsl #2
+; CHECK-NEXT:    neg w0, w8
+; CHECK-NEXT:    ret
+;
+; GISEL-LABEL: ntest5:
+; GISEL:       // %bb.0:
+; GISEL-NEXT:    add w8, w0, w0, lsl #2
+; GISEL-NEXT:    neg w0, w8
+; GISEL-NEXT:    ret
  %mul = mul nsw i32 %x, -5
  ret i32 %mul
 }

 define i32 @ntest6(i32 %x) {
-; CHECK-LABEL: ntest6
-; CHECK: mul w0, w0, {{w[0-9]+}}
+; CHECK-LABEL: ntest6:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    mov w8, #-6
+; CHECK-NEXT:    mul w0, w0, w8
+; CHECK-NEXT:    ret
+;
+; GISEL-LABEL: ntest6:
+; GISEL:       // %bb.0:
+; GISEL-NEXT:    mov w8, #-6
+; GISEL-NEXT:    mul w0, w0, w8
+; GISEL-NEXT:    ret

  %mul = mul nsw i32 %x, -6
  ret i32 %mul
 }

 define i32 @ntest7(i32 %x) {
-; CHECK-LABEL: ntest7
-; CHECK: sub w0, w0, w0, lsl #3
+; CHECK-LABEL: ntest7:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    sub w0, w0, w0, lsl #3
+; CHECK-NEXT:    ret
+;
+; GISEL-LABEL: ntest7:
+; GISEL:       // %bb.0:
+; GISEL-NEXT:    sub w0, w0, w0, lsl #3
+; GISEL-NEXT:    ret

  %mul = mul nsw i32 %x, -7
  ret i32 %mul
 }

 define i32 @ntest8(i32 %x) {
-; CHECK-LABEL: ntest8
-; CHECK: neg w0, w0, lsl #3
+; CHECK-LABEL: ntest8:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    neg w0, w0, lsl #3
+; CHECK-NEXT:    ret
+;
+; GISEL-LABEL: ntest8:
+; GISEL:       // %bb.0:
+; GISEL-NEXT:    mov w8, #-8
+; GISEL-NEXT:    mul w0, w0, w8
+; GISEL-NEXT:    ret

  %mul = mul nsw i32 %x, -8
  ret i32 %mul
 }

 define i32 @ntest9(i32 %x) {
-; CHECK-LABEL: ntest9
-; CHECK: add {{w[0-9]+}}, w0, w0, lsl #3
-; CHECK: neg w0, {{w[0-9]+}}
+; CHECK-LABEL: ntest9:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    add w8, w0, w0, lsl #3
+; CHECK-NEXT:    neg w0, w8
+; CHECK-NEXT:    ret
+;
+; GISEL-LABEL: ntest9:
+; GISEL:       // %bb.0:
+; GISEL-NEXT:    add w8, w0, w0, lsl #3
+; GISEL-NEXT:    neg w0, w8
+; GISEL-NEXT:    ret

  %mul = mul nsw i32 %x, -9
  ret i32 %mul
 }

 define i32 @ntest10(i32 %x) {
-; CHECK-LABEL: ntest10
-; CHECK: mul w0, w0, {{w[0-9]+}}
+; CHECK-LABEL: ntest10:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    mov w8, #-10
+; CHECK-NEXT:    mul w0, w0, w8
+; CHECK-NEXT:    ret
+;
+; GISEL-LABEL: ntest10:
+; GISEL:       // %bb.0:
+; GISEL-NEXT:    mov w8, #-10
+; GISEL-NEXT:    mul w0, w0, w8
+; GISEL-NEXT:    ret

  %mul = mul nsw i32 %x, -10
  ret i32 %mul
 }

 define i32 @ntest11(i32 %x) {
-; CHECK-LABEL: ntest11
-; CHECK: mul w0, w0, {{w[0-9]+}}
+; CHECK-LABEL: ntest11:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    mov w8, #-11
+; CHECK-NEXT:    mul w0, w0, w8
+; CHECK-NEXT:    ret
+;
+; GISEL-LABEL: ntest11:
+; GISEL:       // %bb.0:
+; GISEL-NEXT:    mov w8, #-11
+; GISEL-NEXT:    mul w0, w0, w8
+; GISEL-NEXT:    ret

  %mul = mul nsw i32 %x, -11
  ret i32 %mul
 }

 define i32 @ntest12(i32 %x) {
-; CHECK-LABEL: ntest12
-; CHECK: mul w0, w0, {{w[0-9]+}}
+; CHECK-LABEL: ntest12:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    mov w8, #-12
+; CHECK-NEXT:    mul w0, w0, w8
+; CHECK-NEXT:    ret
+;
+; GISEL-LABEL: ntest12:
+; GISEL:       // %bb.0:
+; GISEL-NEXT:    mov w8, #-12
+; GISEL-NEXT:    mul w0, w0, w8
+; GISEL-NEXT:    ret

  %mul = mul nsw i32 %x, -12
  ret i32 %mul
 }

 define i32 @ntest13(i32 %x) {
-; CHECK-LABEL: ntest13
-; CHECK: mul w0, w0, {{w[0-9]+}}
+; CHECK-LABEL: ntest13:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    mov w8, #-13
+; CHECK-NEXT:    mul w0, w0, w8
+; CHECK-NEXT:    ret
+;
+; GISEL-LABEL: ntest13:
+; GISEL:       // %bb.0:
+; GISEL-NEXT:    mov w8, #-13
+; GISEL-NEXT:    mul w0, w0, w8
+; GISEL-NEXT:    ret
  %mul = mul nsw i32 %x, -13
  ret i32 %mul
 }

 define i32 @ntest14(i32 %x) {
-; CHECK-LABEL: ntest14
-; CHECK: mul w0, w0, {{w[0-9]+}}
+; CHECK-LABEL: ntest14:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    mov w8, #-14
+; CHECK-NEXT:    mul w0, w0, w8
+; CHECK-NEXT:    ret
+;
+; GISEL-LABEL: ntest14:
+; GISEL:       // %bb.0:
+; GISEL-NEXT:    mov w8, #-14
+; GISEL-NEXT:    mul w0, w0, w8
+; GISEL-NEXT:    ret

  %mul = mul nsw i32 %x, -14
  ret i32 %mul
 }

 define i32 @ntest15(i32 %x) {
-; CHECK-LABEL: ntest15
-; CHECK: sub w0, w0, w0, lsl #4
+; CHECK-LABEL: ntest15:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    sub w0, w0, w0, lsl #4
+; CHECK-NEXT:    ret
+;
+; GISEL-LABEL: ntest15:
+; GISEL:       // %bb.0:
+; GISEL-NEXT:    sub w0, w0, w0, lsl #4
+; GISEL-NEXT:    ret

  %mul = mul nsw i32 %x, -15
  ret i32 %mul
 }

 define i32 @ntest16(i32 %x) {
-; CHECK-LABEL: ntest16
-; CHECK: neg w0, w0, lsl #4
+; CHECK-LABEL: ntest16:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    neg w0, w0, lsl #4
+; CHECK-NEXT:    ret
+;
+; GISEL-LABEL: ntest16:
+; GISEL:       // %bb.0:
+; GISEL-NEXT:    mov w8, #-16
+; GISEL-NEXT:    mul w0, w0, w8
+; GISEL-NEXT:    ret

  %mul = mul nsw i32 %x, -16
  ret i32 %mul