[ARM] Ensure we do not attempt to create lsll #0

During legalisation we can end up with some pretty strange nodes, like shifts of 0. We need to make sure we don't try to make long shifts of these, ending up with invalid assembly instructions. A long shift with a zero immediate actually encodes a shift by 32. Differential Revision: https://reviews.llvm.org/D67664 llvm-svn: 372839
2019-09-25 10:16:48 +00:00 · 2019-09-25 10:16:48 +00:00 · 10d10102a4
parent 5b9a408113
commit 10d10102a4
4 changed files with 54 additions and 5 deletions
--- a/llvm/lib/Target/ARM/ARMISelLowering.cpp
+++ b/llvm/lib/Target/ARM/ARMISelLowering.cpp
@ -6011,7 +6011,7 @@ static SDValue Expand64BitShift(SDNode *N, SelectionDAG &DAG,
    // If the shift amount is greater than 32 or has a greater bitwidth than 64
    // then do the default optimisation
    if (ShAmt->getValueType(0).getSizeInBits() > 64 ||
-        (Con && Con->getZExtValue() >= 32))
+        (Con && (Con->getZExtValue() == 0 || Con->getZExtValue() >= 32)))
      return SDValue();

    // Extract the lower 32 bits of the shift amount if it's not an i32
--- a/llvm/lib/Target/ARM/ARMInstrMVE.td
+++ b/llvm/lib/Target/ARM/ARMInstrMVE.td
@ -453,16 +453,16 @@ def MVE_ASRLr   : MVE_ScalarShiftDRegReg<"asrl",    0b1,  [(set tGPREven:$RdaLo,
                                        tGPROdd:$RdaHi_src, rGPR:$Rm))]>;
 def MVE_ASRLi   : MVE_ScalarShiftDRegImm<"asrl",    0b10, ?, [(set tGPREven:$RdaLo, tGPROdd:$RdaHi,
                                        (ARMasrl tGPREven:$RdaLo_src,
-                                        tGPROdd:$RdaHi_src, (i32 imm:$imm)))]>;
+                                        tGPROdd:$RdaHi_src, (i32 long_shift:$imm)))]>;
 def MVE_LSLLr   : MVE_ScalarShiftDRegReg<"lsll",    0b0,  [(set tGPREven:$RdaLo, tGPROdd:$RdaHi,
                                        (ARMlsll tGPREven:$RdaLo_src,
                                        tGPROdd:$RdaHi_src, rGPR:$Rm))]>;
 def MVE_LSLLi   : MVE_ScalarShiftDRegImm<"lsll",    0b00, ?, [(set tGPREven:$RdaLo, tGPROdd:$RdaHi,
                                        (ARMlsll tGPREven:$RdaLo_src,
-                                        tGPROdd:$RdaHi_src, (i32 imm:$imm)))]>;
+                                        tGPROdd:$RdaHi_src, (i32 long_shift:$imm)))]>;
 def MVE_LSRL    : MVE_ScalarShiftDRegImm<"lsrl",    0b01, ?, [(set tGPREven:$RdaLo, tGPROdd:$RdaHi,
                                        (ARMlsrl tGPREven:$RdaLo_src,
-                                        tGPROdd:$RdaHi_src, (i32 imm:$imm)))]>;
+                                        tGPROdd:$RdaHi_src, (i32 long_shift:$imm)))]>;

 def MVE_SQRSHRL : MVE_ScalarShiftDRegRegWithSat<"sqrshrl", 0b1>;
 def MVE_SQSHLL  : MVE_ScalarShiftDRegImm<"sqshll",  0b11, 0b1>;
--- a/llvm/lib/Target/ARM/ARMInstrThumb2.td
+++ b/llvm/lib/Target/ARM/ARMInstrThumb2.td
@ -45,7 +45,8 @@ def mve_shift_imm : AsmOperandClass {
  let RenderMethod = "addImmOperands";
  let DiagnosticString = "operand must be an immediate in the range [1,32]";
 }
-def long_shift : Operand<i32> {
+def long_shift : Operand<i32>,
+                 ImmLeaf<i32, [{ return Imm > 0 && Imm <= 32; }]> {
  let ParserMatchClass = mve_shift_imm;
  let DecoderMethod = "DecodeLongShiftOperand";
 }
--- a/llvm/test/CodeGen/Thumb2/lsll0.ll
+++ b/llvm/test/CodeGen/Thumb2/lsll0.ll
@ -0,0 +1,48 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
+; RUN: llc -mtriple=thumbv8.1m.main-arm-none-eabi -mattr=+mve -verify-machineinstrs %s -o - | FileCheck %s
+
+define void @_Z4loopPxS_iS_i(i64* %d) {
+; CHECK-LABEL: _Z4loopPxS_iS_i:
+; CHECK:       @ %bb.0: @ %entry
+; CHECK-NEXT:    vldrw.u32 q0, [r0]
+; CHECK-NEXT:    vmov r1, s2
+; CHECK-NEXT:    vmov r2, s0
+; CHECK-NEXT:    sxth r1, r1
+; CHECK-NEXT:    sxth r2, r2
+; CHECK-NEXT:    rsbs r1, r1, #0
+; CHECK-NEXT:    rsbs r2, r2, #0
+; CHECK-NEXT:    sxth r1, r1
+; CHECK-NEXT:    sxth r2, r2
+; CHECK-NEXT:    asr.w r12, r1, #31
+; CHECK-NEXT:    asrs r3, r2, #31
+; CHECK-NEXT:    strd r2, r3, [r0]
+; CHECK-NEXT:    strd r1, r12, [r0, #8]
+; CHECK-NEXT:    bx lr
+entry:
+  %wide.load = load <2 x i64>, <2 x i64>* undef, align 8
+  %0 = trunc <2 x i64> %wide.load to <2 x i32>
+  %1 = shl <2 x i32> %0, <i32 16, i32 16>
+  %2 = ashr exact <2 x i32> %1, <i32 16, i32 16>
+  %3 = sub <2 x i32> %2, %0
+  %4 = and <2 x i32> %3, <i32 7, i32 7>
+  %5 = shl <2 x i32> %2, %4
+  %6 = extractelement <2 x i32> %5, i32 0
+  %7 = zext i32 %6 to i64
+  %8 = select i1 false, i64 %7, i64 undef
+  %9 = trunc i64 %8 to i16
+  %10 = sub i16 0, %9
+  %11 = sext i16 %10 to i64
+  %12 = getelementptr inbounds i64, i64* %d, i64 undef
+  store i64 %11, i64* %12, align 8
+  %13 = extractelement <2 x i32> %5, i32 1
+  %14 = zext i32 %13 to i64
+  %15 = select i1 false, i64 %14, i64 undef
+  %16 = trunc i64 %15 to i16
+  %17 = sub i16 0, %16
+  %18 = sext i16 %17 to i64
+  %19 = or i32 0, 1
+  %20 = sext i32 %19 to i64
+  %21 = getelementptr inbounds i64, i64* %d, i64 %20
+  store i64 %18, i64* %21, align 8
+  ret void
+}