forked from OSchip/llvm-project
[ARM] Ensure we do not attempt to create lsll #0
During legalisation we can end up with some pretty strange nodes, like shifts of 0. We need to make sure we don't try to make long shifts of these, ending up with invalid assembly instructions. A long shift with a zero immediate actually encodes a shift by 32. Differential Revision: https://reviews.llvm.org/D67664 llvm-svn: 372839
This commit is contained in:
parent
5b9a408113
commit
10d10102a4
|
@ -6011,7 +6011,7 @@ static SDValue Expand64BitShift(SDNode *N, SelectionDAG &DAG,
|
||||||
// If the shift amount is greater than 32 or has a greater bitwidth than 64
|
// If the shift amount is greater than 32 or has a greater bitwidth than 64
|
||||||
// then do the default optimisation
|
// then do the default optimisation
|
||||||
if (ShAmt->getValueType(0).getSizeInBits() > 64 ||
|
if (ShAmt->getValueType(0).getSizeInBits() > 64 ||
|
||||||
(Con && Con->getZExtValue() >= 32))
|
(Con && (Con->getZExtValue() == 0 || Con->getZExtValue() >= 32)))
|
||||||
return SDValue();
|
return SDValue();
|
||||||
|
|
||||||
// Extract the lower 32 bits of the shift amount if it's not an i32
|
// Extract the lower 32 bits of the shift amount if it's not an i32
|
||||||
|
|
|
@ -453,16 +453,16 @@ def MVE_ASRLr : MVE_ScalarShiftDRegReg<"asrl", 0b1, [(set tGPREven:$RdaLo,
|
||||||
tGPROdd:$RdaHi_src, rGPR:$Rm))]>;
|
tGPROdd:$RdaHi_src, rGPR:$Rm))]>;
|
||||||
def MVE_ASRLi : MVE_ScalarShiftDRegImm<"asrl", 0b10, ?, [(set tGPREven:$RdaLo, tGPROdd:$RdaHi,
|
def MVE_ASRLi : MVE_ScalarShiftDRegImm<"asrl", 0b10, ?, [(set tGPREven:$RdaLo, tGPROdd:$RdaHi,
|
||||||
(ARMasrl tGPREven:$RdaLo_src,
|
(ARMasrl tGPREven:$RdaLo_src,
|
||||||
tGPROdd:$RdaHi_src, (i32 imm:$imm)))]>;
|
tGPROdd:$RdaHi_src, (i32 long_shift:$imm)))]>;
|
||||||
def MVE_LSLLr : MVE_ScalarShiftDRegReg<"lsll", 0b0, [(set tGPREven:$RdaLo, tGPROdd:$RdaHi,
|
def MVE_LSLLr : MVE_ScalarShiftDRegReg<"lsll", 0b0, [(set tGPREven:$RdaLo, tGPROdd:$RdaHi,
|
||||||
(ARMlsll tGPREven:$RdaLo_src,
|
(ARMlsll tGPREven:$RdaLo_src,
|
||||||
tGPROdd:$RdaHi_src, rGPR:$Rm))]>;
|
tGPROdd:$RdaHi_src, rGPR:$Rm))]>;
|
||||||
def MVE_LSLLi : MVE_ScalarShiftDRegImm<"lsll", 0b00, ?, [(set tGPREven:$RdaLo, tGPROdd:$RdaHi,
|
def MVE_LSLLi : MVE_ScalarShiftDRegImm<"lsll", 0b00, ?, [(set tGPREven:$RdaLo, tGPROdd:$RdaHi,
|
||||||
(ARMlsll tGPREven:$RdaLo_src,
|
(ARMlsll tGPREven:$RdaLo_src,
|
||||||
tGPROdd:$RdaHi_src, (i32 imm:$imm)))]>;
|
tGPROdd:$RdaHi_src, (i32 long_shift:$imm)))]>;
|
||||||
def MVE_LSRL : MVE_ScalarShiftDRegImm<"lsrl", 0b01, ?, [(set tGPREven:$RdaLo, tGPROdd:$RdaHi,
|
def MVE_LSRL : MVE_ScalarShiftDRegImm<"lsrl", 0b01, ?, [(set tGPREven:$RdaLo, tGPROdd:$RdaHi,
|
||||||
(ARMlsrl tGPREven:$RdaLo_src,
|
(ARMlsrl tGPREven:$RdaLo_src,
|
||||||
tGPROdd:$RdaHi_src, (i32 imm:$imm)))]>;
|
tGPROdd:$RdaHi_src, (i32 long_shift:$imm)))]>;
|
||||||
|
|
||||||
def MVE_SQRSHRL : MVE_ScalarShiftDRegRegWithSat<"sqrshrl", 0b1>;
|
def MVE_SQRSHRL : MVE_ScalarShiftDRegRegWithSat<"sqrshrl", 0b1>;
|
||||||
def MVE_SQSHLL : MVE_ScalarShiftDRegImm<"sqshll", 0b11, 0b1>;
|
def MVE_SQSHLL : MVE_ScalarShiftDRegImm<"sqshll", 0b11, 0b1>;
|
||||||
|
|
|
@ -45,7 +45,8 @@ def mve_shift_imm : AsmOperandClass {
|
||||||
let RenderMethod = "addImmOperands";
|
let RenderMethod = "addImmOperands";
|
||||||
let DiagnosticString = "operand must be an immediate in the range [1,32]";
|
let DiagnosticString = "operand must be an immediate in the range [1,32]";
|
||||||
}
|
}
|
||||||
def long_shift : Operand<i32> {
|
def long_shift : Operand<i32>,
|
||||||
|
ImmLeaf<i32, [{ return Imm > 0 && Imm <= 32; }]> {
|
||||||
let ParserMatchClass = mve_shift_imm;
|
let ParserMatchClass = mve_shift_imm;
|
||||||
let DecoderMethod = "DecodeLongShiftOperand";
|
let DecoderMethod = "DecodeLongShiftOperand";
|
||||||
}
|
}
|
||||||
|
|
|
@ -0,0 +1,48 @@
|
||||||
|
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
|
||||||
|
; RUN: llc -mtriple=thumbv8.1m.main-arm-none-eabi -mattr=+mve -verify-machineinstrs %s -o - | FileCheck %s
|
||||||
|
|
||||||
|
define void @_Z4loopPxS_iS_i(i64* %d) {
|
||||||
|
; CHECK-LABEL: _Z4loopPxS_iS_i:
|
||||||
|
; CHECK: @ %bb.0: @ %entry
|
||||||
|
; CHECK-NEXT: vldrw.u32 q0, [r0]
|
||||||
|
; CHECK-NEXT: vmov r1, s2
|
||||||
|
; CHECK-NEXT: vmov r2, s0
|
||||||
|
; CHECK-NEXT: sxth r1, r1
|
||||||
|
; CHECK-NEXT: sxth r2, r2
|
||||||
|
; CHECK-NEXT: rsbs r1, r1, #0
|
||||||
|
; CHECK-NEXT: rsbs r2, r2, #0
|
||||||
|
; CHECK-NEXT: sxth r1, r1
|
||||||
|
; CHECK-NEXT: sxth r2, r2
|
||||||
|
; CHECK-NEXT: asr.w r12, r1, #31
|
||||||
|
; CHECK-NEXT: asrs r3, r2, #31
|
||||||
|
; CHECK-NEXT: strd r2, r3, [r0]
|
||||||
|
; CHECK-NEXT: strd r1, r12, [r0, #8]
|
||||||
|
; CHECK-NEXT: bx lr
|
||||||
|
entry:
|
||||||
|
%wide.load = load <2 x i64>, <2 x i64>* undef, align 8
|
||||||
|
%0 = trunc <2 x i64> %wide.load to <2 x i32>
|
||||||
|
%1 = shl <2 x i32> %0, <i32 16, i32 16>
|
||||||
|
%2 = ashr exact <2 x i32> %1, <i32 16, i32 16>
|
||||||
|
%3 = sub <2 x i32> %2, %0
|
||||||
|
%4 = and <2 x i32> %3, <i32 7, i32 7>
|
||||||
|
%5 = shl <2 x i32> %2, %4
|
||||||
|
%6 = extractelement <2 x i32> %5, i32 0
|
||||||
|
%7 = zext i32 %6 to i64
|
||||||
|
%8 = select i1 false, i64 %7, i64 undef
|
||||||
|
%9 = trunc i64 %8 to i16
|
||||||
|
%10 = sub i16 0, %9
|
||||||
|
%11 = sext i16 %10 to i64
|
||||||
|
%12 = getelementptr inbounds i64, i64* %d, i64 undef
|
||||||
|
store i64 %11, i64* %12, align 8
|
||||||
|
%13 = extractelement <2 x i32> %5, i32 1
|
||||||
|
%14 = zext i32 %13 to i64
|
||||||
|
%15 = select i1 false, i64 %14, i64 undef
|
||||||
|
%16 = trunc i64 %15 to i16
|
||||||
|
%17 = sub i16 0, %16
|
||||||
|
%18 = sext i16 %17 to i64
|
||||||
|
%19 = or i32 0, 1
|
||||||
|
%20 = sext i32 %19 to i64
|
||||||
|
%21 = getelementptr inbounds i64, i64* %d, i64 %20
|
||||||
|
store i64 %18, i64* %21, align 8
|
||||||
|
ret void
|
||||||
|
}
|
Loading…
Reference in New Issue