forked from OSchip/llvm-project
[ARM] Add support for the MVE long shift instructions
MVE adds the lsll, lsrl and asrl instructions, which perform a shift on a 64 bit value separated into two 32 bit registers. The Expand64BitShift function is modified to accept ISD::SHL, ISD::SRL and ISD::SRA and convert it into the appropriate opcode in ARMISD. An SHL is converted into an lsll, an SRL is converted into an lsrl for the immediate form and a negation and lsll for the register form, and SRA is converted into an asrl. test/CodeGen/ARM/shift_parts.ll is added to test the logic of emitting these instructions. Differential Revision: https://reviews.llvm.org/D63430 llvm-svn: 364654
This commit is contained in:
parent
176b9f6516
commit
e39e958da3
|
@ -932,6 +932,11 @@ ARMTargetLowering::ARMTargetLowering(const TargetMachine &TM,
|
|||
setOperationAction(ISD::SRA, MVT::i64, Custom);
|
||||
setOperationAction(ISD::INTRINSIC_WO_CHAIN, MVT::i64, Custom);
|
||||
|
||||
// MVE lowers 64 bit shifts to lsll and lsrl
|
||||
// assuming that ISD::SRL and SRA of i64 are already marked custom
|
||||
if (Subtarget->hasMVEIntegerOps())
|
||||
setOperationAction(ISD::SHL, MVT::i64, Custom);
|
||||
|
||||
// Expand to __aeabi_l{lsl,lsr,asr} calls for Thumb1.
|
||||
if (Subtarget->isThumb1Only()) {
|
||||
setOperationAction(ISD::SHL_PARTS, MVT::i32, Expand);
|
||||
|
@ -1411,6 +1416,10 @@ const char *ARMTargetLowering::getTargetNodeName(unsigned Opcode) const {
|
|||
case ARMISD::SSAT: return "ARMISD::SSAT";
|
||||
case ARMISD::USAT: return "ARMISD::USAT";
|
||||
|
||||
case ARMISD::ASRL: return "ARMISD::ASRL";
|
||||
case ARMISD::LSRL: return "ARMISD::LSRL";
|
||||
case ARMISD::LSLL: return "ARMISD::LSLL";
|
||||
|
||||
case ARMISD::SRL_FLAG: return "ARMISD::SRL_FLAG";
|
||||
case ARMISD::SRA_FLAG: return "ARMISD::SRA_FLAG";
|
||||
case ARMISD::RRX: return "ARMISD::RRX";
|
||||
|
@ -5619,11 +5628,54 @@ static SDValue Expand64BitShift(SDNode *N, SelectionDAG &DAG,
|
|||
if (VT != MVT::i64)
|
||||
return SDValue();
|
||||
|
||||
assert((N->getOpcode() == ISD::SRL || N->getOpcode() == ISD::SRA) &&
|
||||
assert((N->getOpcode() == ISD::SRL || N->getOpcode() == ISD::SRA ||
|
||||
N->getOpcode() == ISD::SHL) &&
|
||||
"Unknown shift to lower!");
|
||||
|
||||
unsigned ShOpc = N->getOpcode();
|
||||
if (ST->hasMVEIntegerOps()) {
|
||||
SDValue ShAmt = N->getOperand(1);
|
||||
unsigned ShPartsOpc = ARMISD::LSLL;
|
||||
ConstantSDNode *Con = dyn_cast<ConstantSDNode>(ShAmt);
|
||||
|
||||
// If the shift amount is greater than 32 then do the default optimisation
|
||||
if (Con && Con->getZExtValue() > 32)
|
||||
return SDValue();
|
||||
|
||||
// Extract the lower 32 bits of the shift amount if it's an i64
|
||||
if (ShAmt->getValueType(0) == MVT::i64)
|
||||
ShAmt = DAG.getNode(ISD::EXTRACT_ELEMENT, dl, MVT::i32, ShAmt,
|
||||
DAG.getConstant(0, dl, MVT::i32));
|
||||
|
||||
if (ShOpc == ISD::SRL) {
|
||||
if (!Con)
|
||||
// There is no t2LSRLr instruction so negate and perform an lsll if the
|
||||
// shift amount is in a register, emulating a right shift.
|
||||
ShAmt = DAG.getNode(ISD::SUB, dl, MVT::i32,
|
||||
DAG.getConstant(0, dl, MVT::i32), ShAmt);
|
||||
else
|
||||
// Else generate an lsrl on the immediate shift amount
|
||||
ShPartsOpc = ARMISD::LSRL;
|
||||
} else if (ShOpc == ISD::SRA)
|
||||
ShPartsOpc = ARMISD::ASRL;
|
||||
|
||||
// Lower 32 bits of the destination/source
|
||||
SDValue Lo = DAG.getNode(ISD::EXTRACT_ELEMENT, dl, MVT::i32, N->getOperand(0),
|
||||
DAG.getConstant(0, dl, MVT::i32));
|
||||
// Upper 32 bits of the destination/source
|
||||
SDValue Hi = DAG.getNode(ISD::EXTRACT_ELEMENT, dl, MVT::i32, N->getOperand(0),
|
||||
DAG.getConstant(1, dl, MVT::i32));
|
||||
|
||||
// Generate the shift operation as computed above
|
||||
Lo = DAG.getNode(ShPartsOpc, dl, DAG.getVTList(MVT::i32, MVT::i32), Lo, Hi,
|
||||
ShAmt);
|
||||
// The upper 32 bits come from the second return value of lsll
|
||||
Hi = SDValue(Lo.getNode(), 1);
|
||||
return DAG.getNode(ISD::BUILD_PAIR, dl, MVT::i64, Lo, Hi);
|
||||
}
|
||||
|
||||
// We only lower SRA, SRL of 1 here, all others use generic lowering.
|
||||
if (!isOneConstant(N->getOperand(1)))
|
||||
if (!isOneConstant(N->getOperand(1)) || N->getOpcode() == ISD::SHL)
|
||||
return SDValue();
|
||||
|
||||
// If we are in thumb mode, we don't have RRX.
|
||||
|
@ -8291,6 +8343,7 @@ void ARMTargetLowering::ReplaceNodeResults(SDNode *N,
|
|||
break;
|
||||
case ISD::SRL:
|
||||
case ISD::SRA:
|
||||
case ISD::SHL:
|
||||
Res = Expand64BitShift(N, DAG, Subtarget);
|
||||
break;
|
||||
case ISD::SREM:
|
||||
|
|
|
@ -76,6 +76,10 @@ class VectorType;
|
|||
|
||||
PIC_ADD, // Add with a PC operand and a PIC label.
|
||||
|
||||
ASRL, // MVE long arithmetic shift right.
|
||||
LSRL, // MVE long shift right.
|
||||
LSLL, // MVE long shift left.
|
||||
|
||||
CMP, // ARM compare instructions.
|
||||
CMN, // ARM CMN instructions.
|
||||
CMPZ, // ARM compare that sets only Z flag.
|
||||
|
|
|
@ -99,6 +99,13 @@ def SDT_LongMac : SDTypeProfile<2, 4, [SDTCisVT<0, i32>,
|
|||
SDTCisSameAs<0, 4>,
|
||||
SDTCisSameAs<0, 5>]>;
|
||||
|
||||
// ARMlsll, ARMlsrl, ARMasrl
|
||||
def SDT_ARMIntShiftParts : SDTypeProfile<2, 3, [SDTCisSameAs<0, 1>,
|
||||
SDTCisSameAs<0, 2>,
|
||||
SDTCisSameAs<0, 3>,
|
||||
SDTCisInt<0>,
|
||||
SDTCisInt<4>]>;
|
||||
|
||||
def ARMSmlald : SDNode<"ARMISD::SMLALD", SDT_LongMac>;
|
||||
def ARMSmlaldx : SDNode<"ARMISD::SMLALDX", SDT_LongMac>;
|
||||
def ARMSmlsld : SDNode<"ARMISD::SMLSLD", SDT_LongMac>;
|
||||
|
@ -171,6 +178,10 @@ def ARMcmpZ : SDNode<"ARMISD::CMPZ", SDT_ARMCmp,
|
|||
|
||||
def ARMpic_add : SDNode<"ARMISD::PIC_ADD", SDT_ARMPICAdd>;
|
||||
|
||||
def ARMasrl : SDNode<"ARMISD::ASRL", SDT_ARMIntShiftParts, []>;
|
||||
def ARMlsrl : SDNode<"ARMISD::LSRL", SDT_ARMIntShiftParts, []>;
|
||||
def ARMlsll : SDNode<"ARMISD::LSLL", SDT_ARMIntShiftParts, []>;
|
||||
|
||||
def ARMsrl_flag : SDNode<"ARMISD::SRL_FLAG", SDTIntUnaryOp, [SDNPOutGlue]>;
|
||||
def ARMsra_flag : SDNode<"ARMISD::SRA_FLAG", SDTIntUnaryOp, [SDNPOutGlue]>;
|
||||
def ARMrrx : SDNode<"ARMISD::RRX" , SDTIntUnaryOp, [SDNPInGlue ]>;
|
||||
|
|
|
@ -427,11 +427,21 @@ class MVE_ScalarShiftDRegReg<string iname, bit op5, bit op16,
|
|||
let DecoderMethod = "DecodeMVEOverlappingLongShift";
|
||||
}
|
||||
|
||||
def MVE_ASRLr : MVE_ScalarShiftDRegReg<"asrl", 0b1, 0b0>;
|
||||
def MVE_ASRLi : MVE_ScalarShiftDRegImm<"asrl", 0b10, ?>;
|
||||
def MVE_LSLLr : MVE_ScalarShiftDRegReg<"lsll", 0b0, 0b0>;
|
||||
def MVE_LSLLi : MVE_ScalarShiftDRegImm<"lsll", 0b00, ?>;
|
||||
def MVE_LSRL : MVE_ScalarShiftDRegImm<"lsrl", 0b01, ?>;
|
||||
def MVE_ASRLr : MVE_ScalarShiftDRegReg<"asrl", 0b1, 0b0, [(set tGPREven:$RdaLo, tGPROdd:$RdaHi,
|
||||
(ARMasrl tGPREven:$RdaLo_src,
|
||||
tGPROdd:$RdaHi_src, rGPR:$Rm))]>;
|
||||
def MVE_ASRLi : MVE_ScalarShiftDRegImm<"asrl", 0b10, ?, [(set tGPREven:$RdaLo, tGPROdd:$RdaHi,
|
||||
(ARMasrl tGPREven:$RdaLo_src,
|
||||
tGPROdd:$RdaHi_src, (i32 imm:$imm)))]>;
|
||||
def MVE_LSLLr : MVE_ScalarShiftDRegReg<"lsll", 0b0, 0b0, [(set tGPREven:$RdaLo, tGPROdd:$RdaHi,
|
||||
(ARMlsll tGPREven:$RdaLo_src,
|
||||
tGPROdd:$RdaHi_src, rGPR:$Rm))]>;
|
||||
def MVE_LSLLi : MVE_ScalarShiftDRegImm<"lsll", 0b00, ?, [(set tGPREven:$RdaLo, tGPROdd:$RdaHi,
|
||||
(ARMlsll tGPREven:$RdaLo_src,
|
||||
tGPROdd:$RdaHi_src, (i32 imm:$imm)))]>;
|
||||
def MVE_LSRL : MVE_ScalarShiftDRegImm<"lsrl", 0b01, ?, [(set tGPREven:$RdaLo, tGPROdd:$RdaHi,
|
||||
(ARMlsrl tGPREven:$RdaLo_src,
|
||||
tGPROdd:$RdaHi_src, (i32 imm:$imm)))]>;
|
||||
|
||||
def MVE_SQRSHRL : MVE_ScalarShiftDRegReg<"sqrshrl", 0b1, 0b1>;
|
||||
def MVE_SQSHLL : MVE_ScalarShiftDRegImm<"sqshll", 0b11, 0b1>;
|
||||
|
|
|
@ -0,0 +1,221 @@
|
|||
; RUN: llc --verify-machineinstrs -mtriple=thumbv8.1-m.main-none-eabi -mattr=+mve %s -o - | FileCheck %s -check-prefix=CHECK --check-prefix=CHECK-MVE
|
||||
; RUN: llc --verify-machineinstrs -mtriple=thumbv8.1-m.main-none-eabi %s -o - | FileCheck %s -check-prefix=CHECK --check-prefix=CHECK-NON-MVE
|
||||
|
||||
define i64 @shift_left_reg(i64 %x, i64 %y) {
|
||||
; CHECK-MVE-LABEL: shift_left_reg:
|
||||
; CHECK-MVE: @ %bb.0: @ %entry
|
||||
; CHECK-MVE-NEXT: lsll r0, r1, r2
|
||||
; CHECK-MVE-NEXT: bx lr
|
||||
;
|
||||
; CHECK-NON-MVE-LABEL: shift_left_reg:
|
||||
; CHECK-NON-MVE: @ %bb.0: @ %entry
|
||||
; CHECK-NON-MVE-NEXT: .save {r7, lr}
|
||||
; CHECK-NON-MVE-NEXT: push {r7, lr}
|
||||
; CHECK-NON-MVE-NEXT: bl __aeabi_llsl
|
||||
; CHECK-NON-MVE-NEXT: pop {r7}
|
||||
; CHECK-NON-MVE-NEXT: pop {r2}
|
||||
; CHECK-NON-MVE-NEXT: bx r2
|
||||
entry:
|
||||
%shl = shl i64 %x, %y
|
||||
ret i64 %shl
|
||||
}
|
||||
|
||||
define i64 @shift_left_imm(i64 %x) {
|
||||
; CHECK-MVE-LABEL: shift_left_imm:
|
||||
; CHECK-MVE: @ %bb.0: @ %entry
|
||||
; CHECK-MVE-NEXT: lsll r0, r1, #3
|
||||
; CHECK-MVE-NEXT: bx lr
|
||||
;
|
||||
; CHECK-NON-MVE-LABEL: shift_left_imm:
|
||||
; CHECK-NON-MVE: @ %bb.0: @ %entry
|
||||
; CHECK-NON-MVE-NEXT: lsrs r2, r0, #29
|
||||
; CHECK-NON-MVE-NEXT: lsls r1, r1, #3
|
||||
; CHECK-NON-MVE-NEXT: adds r1, r1, r2
|
||||
; CHECK-NON-MVE-NEXT: lsls r0, r0, #3
|
||||
; CHECK-NON-MVE-NEXT: bx lr
|
||||
entry:
|
||||
%shl = shl i64 %x, 3
|
||||
ret i64 %shl
|
||||
}
|
||||
|
||||
define i64 @shift_left_imm_big(i64 %x) {
|
||||
; CHECK-LABEL: shift_left_imm_big:
|
||||
; CHECK: @ %bb.0: @ %entry
|
||||
; CHECK-NEXT: lsls r1, r0, #16
|
||||
; CHECK-NEXT: movs r0, #0
|
||||
; CHECK-NEXT: bx lr
|
||||
entry:
|
||||
%shl = shl i64 %x, 48
|
||||
ret i64 %shl
|
||||
}
|
||||
|
||||
define i64 @shift_left_imm_big2(i64 %x) {
|
||||
; CHECK-MVE-LABEL: shift_left_imm_big2:
|
||||
; CHECK-MVE: @ %bb.0: @ %entry
|
||||
; CHECK-MVE-NEXT: lsll r0, r1, #32
|
||||
; CHECK-MVE-NEXT: bx lr
|
||||
;
|
||||
; CHECK-NON-MVE-LABEL: shift_left_imm_big2:
|
||||
; CHECK-NON-MVE: @ %bb.0: @ %entry
|
||||
; CHECK-NON-MVE-NEXT: movs r1, r0
|
||||
; CHECK-NON-MVE-NEXT: movs r0, #0
|
||||
; CHECK-NON-MVE-NEXT: bx lr
|
||||
entry:
|
||||
%shl = shl i64 %x, 32
|
||||
ret i64 %shl
|
||||
}
|
||||
|
||||
define i64 @shift_left_imm_big3(i64 %x) {
|
||||
; CHECK-LABEL: shift_left_imm_big3:
|
||||
; CHECK: @ %bb.0: @ %entry
|
||||
; CHECK-NEXT: lsls r1, r0, #1
|
||||
; CHECK-NEXT: movs r0, #0
|
||||
; CHECK-NEXT: bx lr
|
||||
entry:
|
||||
%shl = shl i64 %x, 33
|
||||
ret i64 %shl
|
||||
}
|
||||
|
||||
define i64 @shift_right_reg(i64 %x, i64 %y) {
|
||||
; CHECK-MVE-LABEL: shift_right_reg:
|
||||
; CHECK-MVE: @ %bb.0: @ %entry
|
||||
; CHECK-MVE-NEXT: rsbs r2, r2, #0
|
||||
; CHECK-MVE-NEXT: lsll r0, r1, r2
|
||||
; CHECK-MVE-NEXT: bx lr
|
||||
;
|
||||
; CHECK-NON-MVE-LABEL: shift_right_reg:
|
||||
; CHECK-NON-MVE: @ %bb.0: @ %entry
|
||||
; CHECK-NON-MVE-NEXT: .save {r7, lr}
|
||||
; CHECK-NON-MVE-NEXT: push {r7, lr}
|
||||
; CHECK-NON-MVE-NEXT: bl __aeabi_llsr
|
||||
; CHECK-NON-MVE-NEXT: pop {r7}
|
||||
; CHECK-NON-MVE-NEXT: pop {r2}
|
||||
; CHECK-NON-MVE-NEXT: bx r2
|
||||
entry:
|
||||
%shr = lshr i64 %x, %y
|
||||
ret i64 %shr
|
||||
}
|
||||
|
||||
define i64 @shift_right_imm(i64 %x) {
|
||||
; CHECK-MVE-LABEL: shift_right_imm:
|
||||
; CHECK-MVE: @ %bb.0: @ %entry
|
||||
; CHECK-MVE-NEXT: lsrl r0, r1, #3
|
||||
; CHECK-MVE-NEXT: bx lr
|
||||
;
|
||||
; CHECK-NON-MVE-LABEL: shift_right_imm:
|
||||
; CHECK-NON-MVE: @ %bb.0: @ %entry
|
||||
; CHECK-NON-MVE-NEXT: lsls r2, r1, #29
|
||||
; CHECK-NON-MVE-NEXT: lsrs r0, r0, #3
|
||||
; CHECK-NON-MVE-NEXT: adds r0, r0, r2
|
||||
; CHECK-NON-MVE-NEXT: lsrs r1, r1, #3
|
||||
; CHECK-NON-MVE-NEXT: bx lr
|
||||
entry:
|
||||
%shr = lshr i64 %x, 3
|
||||
ret i64 %shr
|
||||
}
|
||||
|
||||
define i64 @shift_right_imm_big(i64 %x) {
|
||||
; CHECK-LABEL: shift_right_imm_big:
|
||||
; CHECK: @ %bb.0: @ %entry
|
||||
; CHECK-NEXT: lsrs r0, r1, #16
|
||||
; CHECK-NEXT: movs r1, #0
|
||||
; CHECK-NEXT: bx lr
|
||||
entry:
|
||||
%shr = lshr i64 %x, 48
|
||||
ret i64 %shr
|
||||
}
|
||||
|
||||
define i64 @shift_right_imm_big2(i64 %x) {
|
||||
; CHECK-MVE-LABEL: shift_right_imm_big2:
|
||||
; CHECK-MVE: @ %bb.0: @ %entry
|
||||
; CHECK-MVE-NEXT: lsrl r0, r1, #32
|
||||
; CHECK-MVE-NEXT: bx lr
|
||||
;
|
||||
; CHECK-NON-MVE-LABEL: shift_right_imm_big2:
|
||||
; CHECK-NON-MVE: @ %bb.0: @ %entry
|
||||
; CHECK-NON-MVE-NEXT: movs r0, r1
|
||||
; CHECK-NON-MVE-NEXT: movs r1, #0
|
||||
; CHECK-NON-MVE-NEXT: bx lr
|
||||
entry:
|
||||
%shr = lshr i64 %x, 32
|
||||
ret i64 %shr
|
||||
}
|
||||
|
||||
define i64 @shift_right_imm_big3(i64 %x) {
|
||||
; CHECK-LABEL: shift_right_imm_big3:
|
||||
; CHECK: @ %bb.0: @ %entry
|
||||
; CHECK-NEXT: lsrs r0, r1, #1
|
||||
; CHECK-NEXT: movs r1, #0
|
||||
; CHECK-NEXT: bx lr
|
||||
entry:
|
||||
%shr = lshr i64 %x, 33
|
||||
ret i64 %shr
|
||||
}
|
||||
|
||||
define i64 @shift_arithmetic_right_reg(i64 %x, i64 %y) {
|
||||
; CHECK-MVE-LABEL: shift_arithmetic_right_reg:
|
||||
; CHECK-MVE: @ %bb.0: @ %entry
|
||||
; CHECK-MVE-NEXT: asrl r0, r1, r2
|
||||
; CHECK-MVE-NEXT: bx lr
|
||||
;
|
||||
; CHECK-NON-MVE-LABEL: shift_arithmetic_right_reg:
|
||||
; CHECK-NON-MVE: @ %bb.0: @ %entry
|
||||
; CHECK-NON-MVE-NEXT: .save {r7, lr}
|
||||
; CHECK-NON-MVE-NEXT: push {r7, lr}
|
||||
; CHECK-NON-MVE-NEXT: bl __aeabi_lasr
|
||||
; CHECK-NON-MVE-NEXT: pop {r7}
|
||||
; CHECK-NON-MVE-NEXT: pop {r2}
|
||||
; CHECK-NON-MVE-NEXT: bx r2
|
||||
entry:
|
||||
%shr = ashr i64 %x, %y
|
||||
ret i64 %shr
|
||||
}
|
||||
|
||||
define i64 @shift_arithmetic_right_imm(i64 %x) {
|
||||
; CHECK-MVE-LABEL: shift_arithmetic_right_imm:
|
||||
; CHECK-MVE: @ %bb.0: @ %entry
|
||||
; CHECK-MVE-NEXT: asrl r0, r1, #3
|
||||
; CHECK-MVE-NEXT: bx lr
|
||||
;
|
||||
; CHECK-NON-MVE-LABEL: shift_arithmetic_right_imm:
|
||||
; CHECK-NON-MVE: @ %bb.0: @ %entry
|
||||
; CHECK-NON-MVE-NEXT: lsls r2, r1, #29
|
||||
; CHECK-NON-MVE-NEXT: lsrs r0, r0, #3
|
||||
; CHECK-NON-MVE-NEXT: adds r0, r0, r2
|
||||
; CHECK-NON-MVE-NEXT: asrs r1, r1, #3
|
||||
; CHECK-NON-MVE-NEXT: bx lr
|
||||
entry:
|
||||
%shr = ashr i64 %x, 3
|
||||
ret i64 %shr
|
||||
}
|
||||
|
||||
%struct.bar = type { i16, i8, [5 x i8] }
|
||||
|
||||
define arm_aapcs_vfpcc void @fn1(%struct.bar* nocapture %a) {
|
||||
; CHECK-MVE-LABEL: fn1:
|
||||
; CHECK-MVE: @ %bb.0: @ %entry
|
||||
; CHECK-MVE-NEXT: ldr r2, [r0, #4]
|
||||
; CHECK-MVE-NEXT: movs r1, #0
|
||||
; CHECK-MVE-NEXT: lsll r2, r1, #8
|
||||
; CHECK-MVE-NEXT: strb r1, [r0, #7]
|
||||
; CHECK-MVE-NEXT: str.w r2, [r0, #3]
|
||||
; CHECK-MVE-NEXT: bx lr
|
||||
;
|
||||
; CHECK-NON-MVE-LABEL: fn1:
|
||||
; CHECK-NON-MVE: @ %bb.0: @ %entry
|
||||
; CHECK-NON-MVE-NEXT: ldr r1, [r0, #4]
|
||||
; CHECK-NON-MVE-NEXT: lsls r2, r1, #8
|
||||
; CHECK-NON-MVE-NEXT: movs r3, #3
|
||||
; CHECK-NON-MVE-NEXT: str r2, [r0, r3]
|
||||
; CHECK-NON-MVE-NEXT: adds r0, r0, #3
|
||||
; CHECK-NON-MVE-NEXT: lsrs r1, r1, #24
|
||||
; CHECK-NON-MVE-NEXT: strb r1, [r0, #4]
|
||||
; CHECK-NON-MVE-NEXT: bx lr
|
||||
entry:
|
||||
%carey = getelementptr inbounds %struct.bar, %struct.bar* %a, i32 0, i32 2
|
||||
%0 = bitcast [5 x i8]* %carey to i40*
|
||||
%bf.load = load i40, i40* %0, align 1
|
||||
%bf.clear = and i40 %bf.load, -256
|
||||
store i40 %bf.clear, i40* %0, align 1
|
||||
ret void
|
||||
}
|
Loading…
Reference in New Issue