forked from OSchip/llvm-project
[ARM] Enable SMLAL[B|T] isel
Enable the selection of the 64-bit signed multiply accumulate instructions which operate on 16-bit operands. These are enabled for ARMv5TE onwards for ARM and for V6T2 and other DSP enabled Thumb architectures. Differential Revision: https://reviews.llvm.org/D30044 llvm-svn: 297809
This commit is contained in:
parent
1b192336d8
commit
654cb8263a
|
@ -1337,6 +1337,10 @@ const char *ARMTargetLowering::getTargetNodeName(unsigned Opcode) const {
|
|||
case ARMISD::UMAAL: return "ARMISD::UMAAL";
|
||||
case ARMISD::UMLAL: return "ARMISD::UMLAL";
|
||||
case ARMISD::SMLAL: return "ARMISD::SMLAL";
|
||||
case ARMISD::SMLALBB: return "ARMISD::SMLALBB";
|
||||
case ARMISD::SMLALBT: return "ARMISD::SMLALBT";
|
||||
case ARMISD::SMLALTB: return "ARMISD::SMLALTB";
|
||||
case ARMISD::SMLALTT: return "ARMISD::SMLALTT";
|
||||
case ARMISD::SMULWB: return "ARMISD::SMULWB";
|
||||
case ARMISD::SMULWT: return "ARMISD::SMULWT";
|
||||
case ARMISD::BUILD_VECTOR: return "ARMISD::BUILD_VECTOR";
|
||||
|
@ -9497,8 +9501,90 @@ static SDValue findMUL_LOHI(SDValue V) {
|
|||
return SDValue();
|
||||
}
|
||||
|
||||
static SDValue AddCombineTo64BitSMLAL16(SDNode *AddcNode, SDNode *AddeNode,
|
||||
TargetLowering::DAGCombinerInfo &DCI,
|
||||
const ARMSubtarget *Subtarget) {
|
||||
|
||||
if (Subtarget->isThumb()) {
|
||||
if (!Subtarget->hasDSP())
|
||||
return SDValue();
|
||||
} else if (!Subtarget->hasV5TEOps())
|
||||
return SDValue();
|
||||
|
||||
// SMLALBB, SMLALBT, SMLALTB, SMLALTT multiply two 16-bit values and
|
||||
// accumulates the product into a 64-bit value. The 16-bit values will
|
||||
// be sign extended somehow or SRA'd into 32-bit values
|
||||
// (addc (adde (mul 16bit, 16bit), lo), hi)
|
||||
SDValue Mul = AddcNode->getOperand(0);
|
||||
SDValue Hi = AddcNode->getOperand(1);
|
||||
if (Mul.getOpcode() != ISD::MUL) {
|
||||
Hi = AddcNode->getOperand(0);
|
||||
Mul = AddcNode->getOperand(1);
|
||||
if (Mul.getOpcode() != ISD::MUL)
|
||||
return SDValue();
|
||||
}
|
||||
|
||||
SDValue SRA = AddeNode->getOperand(0);
|
||||
SDValue Lo = AddeNode->getOperand(1);
|
||||
if (SRA.getOpcode() != ISD::SRA) {
|
||||
SRA = AddeNode->getOperand(1);
|
||||
Lo = AddeNode->getOperand(0);
|
||||
if (SRA.getOpcode() != ISD::SRA)
|
||||
return SDValue();
|
||||
}
|
||||
if (auto Const = dyn_cast<ConstantSDNode>(SRA.getOperand(1))) {
|
||||
if (Const->getZExtValue() != 31)
|
||||
return SDValue();
|
||||
} else
|
||||
return SDValue();
|
||||
|
||||
if (SRA.getOperand(0) != Mul)
|
||||
return SDValue();
|
||||
|
||||
SelectionDAG &DAG = DCI.DAG;
|
||||
SDLoc dl(AddcNode);
|
||||
unsigned Opcode = 0;
|
||||
SDValue Op0;
|
||||
SDValue Op1;
|
||||
|
||||
if (isS16(Mul.getOperand(0), DAG) && isS16(Mul.getOperand(1), DAG)) {
|
||||
Opcode = ARMISD::SMLALBB;
|
||||
Op0 = Mul.getOperand(0);
|
||||
Op1 = Mul.getOperand(1);
|
||||
} else if (isS16(Mul.getOperand(0), DAG) && isSRA16(Mul.getOperand(1))) {
|
||||
Opcode = ARMISD::SMLALBT;
|
||||
Op0 = Mul.getOperand(0);
|
||||
Op1 = Mul.getOperand(1).getOperand(0);
|
||||
} else if (isSRA16(Mul.getOperand(0)) && isS16(Mul.getOperand(1), DAG)) {
|
||||
Opcode = ARMISD::SMLALTB;
|
||||
Op0 = Mul.getOperand(0).getOperand(0);
|
||||
Op1 = Mul.getOperand(1);
|
||||
} else if (isSRA16(Mul.getOperand(0)) && isSRA16(Mul.getOperand(1))) {
|
||||
Opcode = ARMISD::SMLALTT;
|
||||
Op0 = Mul->getOperand(0).getOperand(0);
|
||||
Op1 = Mul->getOperand(1).getOperand(0);
|
||||
}
|
||||
|
||||
if (!Op0 || !Op1)
|
||||
return SDValue();
|
||||
|
||||
SDValue SMLAL = DAG.getNode(Opcode, dl, DAG.getVTList(MVT::i32, MVT::i32),
|
||||
Op0, Op1, Lo, Hi);
|
||||
// Replace the ADDs' nodes uses by the MLA node's values.
|
||||
SDValue HiMLALResult(SMLAL.getNode(), 1);
|
||||
SDValue LoMLALResult(SMLAL.getNode(), 0);
|
||||
|
||||
DAG.ReplaceAllUsesOfValueWith(SDValue(AddcNode, 0), LoMLALResult);
|
||||
DAG.ReplaceAllUsesOfValueWith(SDValue(AddeNode, 0), HiMLALResult);
|
||||
|
||||
// Return original node to notify the driver to stop replacing.
|
||||
SDValue resNode(AddcNode, 0);
|
||||
return resNode;
|
||||
}
|
||||
|
||||
static SDValue AddCombineTo64bitMLAL(SDNode *AddeNode,
|
||||
TargetLowering::DAGCombinerInfo &DCI) {
|
||||
TargetLowering::DAGCombinerInfo &DCI,
|
||||
const ARMSubtarget *Subtarget) {
|
||||
// Look for multiply add opportunities.
|
||||
// The pattern is a ISD::UMUL_LOHI followed by two add nodes, where
|
||||
// each add nodes consumes a value from ISD::UMUL_LOHI and there is
|
||||
|
@ -9535,12 +9621,13 @@ static SDValue AddCombineTo64bitMLAL(SDNode *AddeNode,
|
|||
AddcNode->getValueType(0) == MVT::i32 &&
|
||||
"Expect ADDC with two result values. First: i32");
|
||||
|
||||
// Check that the ADDC adds the low result of the S/UMUL_LOHI.
|
||||
// Check that the ADDC adds the low result of the S/UMUL_LOHI. If not, it
|
||||
// maybe a SMLAL which multiplies two 16-bit values.
|
||||
if (AddcOp0->getOpcode() != ISD::UMUL_LOHI &&
|
||||
AddcOp0->getOpcode() != ISD::SMUL_LOHI &&
|
||||
AddcOp1->getOpcode() != ISD::UMUL_LOHI &&
|
||||
AddcOp1->getOpcode() != ISD::SMUL_LOHI)
|
||||
return SDValue();
|
||||
return AddCombineTo64BitSMLAL16(AddcNode, AddeNode, DCI, Subtarget);
|
||||
|
||||
// Check for the triangle shape.
|
||||
SDValue AddeOp0 = AddeNode->getOperand(0);
|
||||
|
@ -9628,7 +9715,7 @@ static SDValue AddCombineTo64bitUMAAL(SDNode *AddeNode,
|
|||
// as the addend, and it's handled in PerformUMLALCombine.
|
||||
|
||||
if (!Subtarget->hasV6Ops() || !Subtarget->hasDSP())
|
||||
return AddCombineTo64bitMLAL(AddeNode, DCI);
|
||||
return AddCombineTo64bitMLAL(AddeNode, DCI, Subtarget);
|
||||
|
||||
// Check that we have a glued ADDC node.
|
||||
SDNode* AddcNode = AddeNode->getOperand(2).getNode();
|
||||
|
@ -9645,7 +9732,7 @@ static SDValue AddCombineTo64bitUMAAL(SDNode *AddeNode,
|
|||
UmlalNode = AddcNode->getOperand(1).getNode();
|
||||
AddHi = AddcNode->getOperand(0);
|
||||
} else {
|
||||
return AddCombineTo64bitMLAL(AddeNode, DCI);
|
||||
return AddCombineTo64bitMLAL(AddeNode, DCI, Subtarget);
|
||||
}
|
||||
|
||||
// The ADDC should be glued to an ADDE node, which uses the same UMLAL as
|
||||
|
@ -11894,6 +11981,42 @@ SDValue ARMTargetLowering::PerformDAGCombine(SDNode *N,
|
|||
return SDValue();
|
||||
break;
|
||||
}
|
||||
case ARMISD::SMLALBB: {
|
||||
unsigned BitWidth = N->getValueType(0).getSizeInBits();
|
||||
APInt DemandedMask = APInt::getLowBitsSet(BitWidth, 16);
|
||||
if ((SimplifyDemandedBits(N->getOperand(0), DemandedMask, DCI)) ||
|
||||
(SimplifyDemandedBits(N->getOperand(1), DemandedMask, DCI)))
|
||||
return SDValue();
|
||||
break;
|
||||
}
|
||||
case ARMISD::SMLALBT: {
|
||||
unsigned LowWidth = N->getOperand(0).getValueType().getSizeInBits();
|
||||
APInt LowMask = APInt::getLowBitsSet(LowWidth, 16);
|
||||
unsigned HighWidth = N->getOperand(1).getValueType().getSizeInBits();
|
||||
APInt HighMask = APInt::getHighBitsSet(HighWidth, 16);
|
||||
if ((SimplifyDemandedBits(N->getOperand(0), LowMask, DCI)) ||
|
||||
(SimplifyDemandedBits(N->getOperand(1), HighMask, DCI)))
|
||||
return SDValue();
|
||||
break;
|
||||
}
|
||||
case ARMISD::SMLALTB: {
|
||||
unsigned HighWidth = N->getOperand(0).getValueType().getSizeInBits();
|
||||
APInt HighMask = APInt::getHighBitsSet(HighWidth, 16);
|
||||
unsigned LowWidth = N->getOperand(1).getValueType().getSizeInBits();
|
||||
APInt LowMask = APInt::getLowBitsSet(LowWidth, 16);
|
||||
if ((SimplifyDemandedBits(N->getOperand(0), HighMask, DCI)) ||
|
||||
(SimplifyDemandedBits(N->getOperand(1), LowMask, DCI)))
|
||||
return SDValue();
|
||||
break;
|
||||
}
|
||||
case ARMISD::SMLALTT: {
|
||||
unsigned BitWidth = N->getValueType(0).getSizeInBits();
|
||||
APInt DemandedMask = APInt::getHighBitsSet(BitWidth, 16);
|
||||
if ((SimplifyDemandedBits(N->getOperand(0), DemandedMask, DCI)) ||
|
||||
(SimplifyDemandedBits(N->getOperand(1), DemandedMask, DCI)))
|
||||
return SDValue();
|
||||
break;
|
||||
}
|
||||
case ISD::INTRINSIC_VOID:
|
||||
case ISD::INTRINSIC_W_CHAIN:
|
||||
switch (cast<ConstantSDNode>(N->getOperand(1))->getZExtValue()) {
|
||||
|
|
|
@ -180,6 +180,10 @@ class InstrItineraryData;
|
|||
UMLAL, // 64bit Unsigned Accumulate Multiply
|
||||
SMLAL, // 64bit Signed Accumulate Multiply
|
||||
UMAAL, // 64-bit Unsigned Accumulate Accumulate Multiply
|
||||
SMLALBB, // 64-bit signed accumulate multiply bottom, bottom 16
|
||||
SMLALBT, // 64-bit signed accumulate multiply bottom, top 16
|
||||
SMLALTB, // 64-bit signed accumulate multiply top, bottom 16
|
||||
SMLALTT, // 64-bit signed accumulate multiply top, top 16
|
||||
|
||||
// Operands of the standard BUILD_VECTOR node are not legalized, which
|
||||
// is fine if BUILD_VECTORs are always lowered to shuffles or other
|
||||
|
|
|
@ -92,6 +92,13 @@ def SDTBinaryArithWithFlagsInOut : SDTypeProfile<2, 3,
|
|||
SDTCisVT<1, i32>,
|
||||
SDTCisVT<4, i32>]>;
|
||||
|
||||
def SDT_LongMac : SDTypeProfile<2, 4, [SDTCisVT<0, i32>,
|
||||
SDTCisSameAs<0, 1>,
|
||||
SDTCisSameAs<0, 2>,
|
||||
SDTCisSameAs<0, 3>,
|
||||
SDTCisSameAs<0, 4>,
|
||||
SDTCisSameAs<0, 5>]>;
|
||||
|
||||
// Node definitions.
|
||||
def ARMWrapper : SDNode<"ARMISD::Wrapper", SDTIntUnaryOp>;
|
||||
def ARMWrapperPIC : SDNode<"ARMISD::WrapperPIC", SDTIntUnaryOp>;
|
||||
|
@ -185,6 +192,10 @@ def ARMmemcopy : SDNode<"ARMISD::MEMCPY", SDT_ARMMEMCPY,
|
|||
|
||||
def ARMsmulwb : SDNode<"ARMISD::SMULWB", SDTIntBinOp, []>;
|
||||
def ARMsmulwt : SDNode<"ARMISD::SMULWT", SDTIntBinOp, []>;
|
||||
def ARMsmlalbb : SDNode<"ARMISD::SMLALBB", SDT_LongMac, []>;
|
||||
def ARMsmlalbt : SDNode<"ARMISD::SMLALBT", SDT_LongMac, []>;
|
||||
def ARMsmlaltb : SDNode<"ARMISD::SMLALTB", SDT_LongMac, []>;
|
||||
def ARMsmlaltt : SDNode<"ARMISD::SMLALTT", SDT_LongMac, []>;
|
||||
|
||||
//===----------------------------------------------------------------------===//
|
||||
// ARM Instruction Predicate Definitions.
|
||||
|
@ -4183,29 +4194,28 @@ defm SMUL : AI_smul<"smul">;
|
|||
defm SMLA : AI_smla<"smla">;
|
||||
|
||||
// Halfword multiply accumulate long: SMLAL<x><y>.
|
||||
def SMLALBB : AMulxyI64<0b0001010, 0b00, (outs GPRnopc:$RdLo, GPRnopc:$RdHi),
|
||||
(ins GPRnopc:$Rn, GPRnopc:$Rm),
|
||||
IIC_iMAC64, "smlalbb", "\t$RdLo, $RdHi, $Rn, $Rm", []>,
|
||||
Requires<[IsARM, HasV5TE]>,
|
||||
Sched<[WriteMAC64Lo, WriteMAC64Hi, ReadMUL, ReadMUL, ReadMAC, ReadMAC]>;
|
||||
class SMLAL<bits<2> opc1, string asm>
|
||||
: AMulxyI64<0b0001010, opc1,
|
||||
(outs GPRnopc:$RdLo, GPRnopc:$RdHi),
|
||||
(ins GPRnopc:$Rn, GPRnopc:$Rm, GPRnopc:$RLo, GPRnopc:$RHi),
|
||||
IIC_iMAC64, asm, "\t$RdLo, $RdHi, $Rn, $Rm", []>,
|
||||
RegConstraint<"$RLo = $RdLo, $RHi = $RdHi">,
|
||||
Requires<[IsARM, HasV5TE]>,
|
||||
Sched<[WriteMAC64Lo, WriteMAC64Hi, ReadMUL, ReadMUL, ReadMAC, ReadMAC]>;
|
||||
|
||||
def SMLALBT : AMulxyI64<0b0001010, 0b10, (outs GPRnopc:$RdLo, GPRnopc:$RdHi),
|
||||
(ins GPRnopc:$Rn, GPRnopc:$Rm),
|
||||
IIC_iMAC64, "smlalbt", "\t$RdLo, $RdHi, $Rn, $Rm", []>,
|
||||
Requires<[IsARM, HasV5TE]>,
|
||||
Sched<[WriteMAC64Lo, WriteMAC64Hi, ReadMUL, ReadMUL, ReadMAC, ReadMAC]>;
|
||||
def SMLALBB : SMLAL<0b00, "smlalbb">;
|
||||
def SMLALBT : SMLAL<0b10, "smlalbt">;
|
||||
def SMLALTB : SMLAL<0b01, "smlaltb">;
|
||||
def SMLALTT : SMLAL<0b11, "smlaltt">;
|
||||
|
||||
def SMLALTB : AMulxyI64<0b0001010, 0b01, (outs GPRnopc:$RdLo, GPRnopc:$RdHi),
|
||||
(ins GPRnopc:$Rn, GPRnopc:$Rm),
|
||||
IIC_iMAC64, "smlaltb", "\t$RdLo, $RdHi, $Rn, $Rm", []>,
|
||||
Requires<[IsARM, HasV5TE]>,
|
||||
Sched<[WriteMAC64Lo, WriteMAC64Hi, ReadMUL, ReadMUL, ReadMAC, ReadMAC]>;
|
||||
|
||||
def SMLALTT : AMulxyI64<0b0001010, 0b11, (outs GPRnopc:$RdLo, GPRnopc:$RdHi),
|
||||
(ins GPRnopc:$Rn, GPRnopc:$Rm),
|
||||
IIC_iMAC64, "smlaltt", "\t$RdLo, $RdHi, $Rn, $Rm", []>,
|
||||
Requires<[IsARM, HasV5TE]>,
|
||||
Sched<[WriteMAC64Lo, WriteMAC64Hi, ReadMUL, ReadMUL, ReadMAC, ReadMAC]>;
|
||||
def : ARMV5TEPat<(ARMsmlalbb GPR:$Rn, GPR:$Rm, GPR:$RLo, GPR:$RHi),
|
||||
(SMLALBB $Rn, $Rm, $RLo, $RHi)>;
|
||||
def : ARMV5TEPat<(ARMsmlalbt GPR:$Rn, GPR:$Rm, GPR:$RLo, GPR:$RHi),
|
||||
(SMLALBT $Rn, $Rm, $RLo, $RHi)>;
|
||||
def : ARMV5TEPat<(ARMsmlaltb GPR:$Rn, GPR:$Rm, GPR:$RLo, GPR:$RHi),
|
||||
(SMLALTB $Rn, $Rm, $RLo, $RHi)>;
|
||||
def : ARMV5TEPat<(ARMsmlaltt GPR:$Rn, GPR:$Rm, GPR:$RLo, GPR:$RHi),
|
||||
(SMLALTT $Rn, $Rm, $RLo, $RHi)>;
|
||||
|
||||
// Helper class for AI_smld.
|
||||
class AMulDualIbase<bit long, bit sub, bit swap, dag oops, dag iops,
|
||||
|
|
|
@ -2732,19 +2732,24 @@ def : Thumb2DSPMulPat<(add rGPR:$Ra,
|
|||
(mul (sra rGPR:$Rn, (i32 16)), sext_16_node:$Rm)),
|
||||
(t2SMLATB rGPR:$Rn, rGPR:$Rm, rGPR:$Ra)>;
|
||||
|
||||
class T2SMLAL<bits<3> op22_20, bits<4> op7_4, string opc, list<dag> pattern>
|
||||
: T2FourReg_mac<1, op22_20, op7_4,
|
||||
(outs rGPR:$Ra, rGPR:$Rd),
|
||||
(ins rGPR:$Rn, rGPR:$Rm),
|
||||
IIC_iMAC64, opc, "\t$Ra, $Rd, $Rn, $Rm", []>,
|
||||
Requires<[IsThumb2, HasDSP]>,
|
||||
Sched<[WriteMAC64Lo, WriteMAC64Hi, ReadMUL, ReadMUL, ReadMAC, ReadMAC]>;
|
||||
|
||||
// Halfword multiple accumulate long: SMLAL<x><y>
|
||||
def t2SMLALBB : T2SMLAL<0b100, 0b1000, "smlalbb", []>;
|
||||
def t2SMLALBT : T2SMLAL<0b100, 0b1001, "smlalbt", []>;
|
||||
def t2SMLALTB : T2SMLAL<0b100, 0b1010, "smlaltb", []>;
|
||||
def t2SMLALTT : T2SMLAL<0b100, 0b1011, "smlaltt", []>;
|
||||
def t2SMLALBB : T2MlaLong<0b100, 0b1000, "smlalbb">,
|
||||
Requires<[IsThumb2, HasDSP]>;
|
||||
def t2SMLALBT : T2MlaLong<0b100, 0b1001, "smlalbt">,
|
||||
Requires<[IsThumb2, HasDSP]>;
|
||||
def t2SMLALTB : T2MlaLong<0b100, 0b1010, "smlaltb">,
|
||||
Requires<[IsThumb2, HasDSP]>;
|
||||
def t2SMLALTT : T2MlaLong<0b100, 0b1011, "smlaltt">,
|
||||
Requires<[IsThumb2, HasDSP]>;
|
||||
|
||||
def : Thumb2DSPPat<(ARMsmlalbb GPR:$Rn, GPR:$Rm, GPR:$RLo, GPR:$RHi),
|
||||
(t2SMLALBB $Rn, $Rm, $RLo, $RHi)>;
|
||||
def : Thumb2DSPPat<(ARMsmlalbt GPR:$Rn, GPR:$Rm, GPR:$RLo, GPR:$RHi),
|
||||
(t2SMLALBT $Rn, $Rm, $RLo, $RHi)>;
|
||||
def : Thumb2DSPPat<(ARMsmlaltb GPR:$Rn, GPR:$Rm, GPR:$RLo, GPR:$RHi),
|
||||
(t2SMLALTB $Rn, $Rm, $RLo, $RHi)>;
|
||||
def : Thumb2DSPPat<(ARMsmlaltt GPR:$Rn, GPR:$Rm, GPR:$RLo, GPR:$RHi),
|
||||
(t2SMLALTT $Rn, $Rm, $RLo, $RHi)>;
|
||||
|
||||
class T2DualHalfMul<bits<3> op22_20, bits<4> op7_4, string opc>
|
||||
: T2ThreeReg_mac<0, op22_20, op7_4,
|
||||
|
|
|
@ -3,12 +3,13 @@
|
|||
; RUN: llc -mtriple=armeb-eabi %s -o - | FileCheck %s --check-prefix=CHECK --check-prefix=CHECK-BE
|
||||
; RUN: llc -mtriple=armebv7-eabi %s -o - | FileCheck %s -check-prefix=CHECK-V7-BE
|
||||
; RUN: llc -mtriple=thumbv6-eabi %s -o - | FileCheck %s -check-prefix=CHECK-V6-THUMB
|
||||
; RUN: llc -mtriple=thumbv6t2-eabi %s -o - | FileCheck %s -check-prefix=CHECK-V6-THUMB2
|
||||
; RUN: llc -mtriple=thumbv7-eabi %s -o - | FileCheck %s -check-prefix=CHECK-V7-THUMB
|
||||
; RUN: llc -mtriple=thumbv6t2-eabi %s -o - | FileCheck %s -check-prefix=CHECK-T2-DSP
|
||||
; RUN: llc -mtriple=thumbv7-eabi %s -o - | FileCheck %s -check-prefix=CHECK-T2-DSP
|
||||
; RUN: llc -mtriple=thumbebv7-eabi %s -o - | FileCheck %s -check-prefix=CHECK-V7-THUMB-BE
|
||||
; RUN: llc -mtriple=thumbv6m-eabi %s -o - | FileCheck %s -check-prefix=CHECK-V6M-THUMB
|
||||
; RUN: llc -mtriple=thumbv7m-eabi %s -o - | FileCheck %s -check-prefix=CHECK-V7M-THUMB
|
||||
; RUN: llc -mtriple=thumbv7em-eabi %s -o - | FileCheck %s -check-prefix=CHECK-V7EM-THUMB
|
||||
; RUN: llc -mtriple=thumbv7em-eabi %s -o - | FileCheck %s -check-prefix=CHECK-T2-DSP
|
||||
; RUN: llc -mtriple=armv5te-eabi %s -o - | FileCheck %s -check-prefix=CHECK-V5TE
|
||||
; Check generated signed and unsigned multiply accumulate long.
|
||||
|
||||
define i64 @MACLongTest1(i32 %a, i32 %b, i64 %c) {
|
||||
|
@ -20,12 +21,9 @@ define i64 @MACLongTest1(i32 %a, i32 %b, i64 %c) {
|
|||
;CHECK-BE: umlal [[RDLO:r[0-9]+]], [[RDHI:r[0-9]+]], [[LHS:r[0-9]+]], [[RHS:r[0-9]+]]
|
||||
;CHECK-BE: mov r0, [[RDHI]]
|
||||
;CHECK-BE: mov r1, [[RDLO]]
|
||||
;CHECK-V6-THUMB2: umlal [[RDLO:r[0-9]+]], [[RDHI:r[0-9]+]], [[LHS:r[0-9]+]], [[RHS:r[0-9]+]]
|
||||
;CHECK-V6-THUMB2: mov r0, [[RDLO]]
|
||||
;CHECK-V6-THUMB2: mov r1, [[RDHI]]
|
||||
;CHECK-V7-THUMB: umlal [[RDLO:r[0-9]+]], [[RDHI:r[0-9]+]], [[LHS:r[0-9]+]], [[RHS:r[0-9]+]]
|
||||
;CHECK-V7-THUMB: mov r0, [[RDLO]]
|
||||
;CHECK-V7-THUMB: mov r1, [[RDHI]]
|
||||
;CHECK-T2-DSP: umlal [[RDLO:r[0-9]+]], [[RDHI:r[0-9]+]], [[LHS:r[0-9]+]], [[RHS:r[0-9]+]]
|
||||
;CHECK-T2-DSP-NEXT: mov r0, [[RDLO]]
|
||||
;CHECK-T2-DSP-NEXT: mov r1, [[RDHI]]
|
||||
;CHECK-V7-THUMB-BE: umlal [[RDLO:r[0-9]+]], [[RDHI:r[0-9]+]], [[LHS:r[0-9]+]], [[RHS:r[0-9]+]]
|
||||
;CHECK-V7-THUMB-BE: mov r0, [[RDHI]]
|
||||
;CHECK-V7-THUMB-BE: mov r1, [[RDLO]]
|
||||
|
@ -44,12 +42,9 @@ define i64 @MACLongTest2(i32 %a, i32 %b, i64 %c) {
|
|||
;CHECK-BE: smlal [[RDLO:r[0-9]+]], [[RDHI:r[0-9]+]], [[LHS:r[0-9]+]], [[RHS:r[0-9]+]]
|
||||
;CHECK-BE: mov r0, [[RDHI]]
|
||||
;CHECK-BE: mov r1, [[RDLO]]
|
||||
;CHECK-V6-THUMB2: smlal [[RDLO:r[0-9]+]], [[RDHI:r[0-9]+]], [[LHS:r[0-9]+]], [[RHS:r[0-9]+]]
|
||||
;CHECK-V6-THUMB2: mov r0, [[RDLO]]
|
||||
;CHECK-V6-THUMB2: mov r1, [[RDHI]]
|
||||
;CHECK-V7-THUMB: smlal [[RDLO:r[0-9]+]], [[RDHI:r[0-9]+]], [[LHS:r[0-9]+]], [[RHS:r[0-9]+]]
|
||||
;CHECK-V7-THUMB: mov r0, [[RDLO]]
|
||||
;CHECK-V7-THUMB: mov r1, [[RDHI]]
|
||||
;CHECK-T2-DSP: smlal [[RDLO:r[0-9]+]], [[RDHI:r[0-9]+]], [[LHS:r[0-9]+]], [[RHS:r[0-9]+]]
|
||||
;CHECK-T2-DSP-NEXT: mov r0, [[RDLO]]
|
||||
;CHECK-T2-DSP-NEXT: mov r1, [[RDHI]]
|
||||
;CHECK-V7-THUMB-BE: smlal [[RDLO:r[0-9]+]], [[RDHI:r[0-9]+]], [[LHS:r[0-9]+]], [[RHS:r[0-9]+]]
|
||||
;CHECK-V7-THUMB-BE: mov r0, [[RDHI]]
|
||||
;CHECK-V7-THUMB-BE: mov r1, [[RDLO]]
|
||||
|
@ -78,8 +73,7 @@ define i64 @MACLongTest3(i32 %a, i32 %b, i32 %c) {
|
|||
;CHECK-BE: umlal [[RDLO:r[0-9]+]], [[RDHI]], r1, r0
|
||||
;CHECK-BE: mov r0, [[RDHI]]
|
||||
;CHECK-BE: mov r1, [[RDLO]]
|
||||
;CHECK-V6-THUMB2: umlal
|
||||
;CHECK-V7-THUMB: umlal
|
||||
;CHECK-T2-DSP: umlal
|
||||
;CHECK-V6-THUMB-NOT: umlal
|
||||
%conv = zext i32 %b to i64
|
||||
%conv1 = zext i32 %a to i64
|
||||
|
@ -92,8 +86,7 @@ define i64 @MACLongTest3(i32 %a, i32 %b, i32 %c) {
|
|||
define i64 @MACLongTest4(i32 %a, i32 %b, i32 %c) {
|
||||
;CHECK-LABEL: MACLongTest4:
|
||||
;CHECK-V6-THUMB-NOT: smlal
|
||||
;CHECK-V6-THUMB2: smlal
|
||||
;CHECK-V7-THUMB: smlal
|
||||
;CHECK-T2-DSP: smlal
|
||||
;CHECK-LE: asr [[RDHI:r[0-9]+]], [[RDLO:r[0-9]+]], #31
|
||||
;CHECK-LE: smlal [[RDLO]], [[RDHI]], r1, r0
|
||||
;CHECK-LE: mov r0, [[RDLO]]
|
||||
|
@ -118,10 +111,8 @@ define i64 @MACLongTest6(i32 %a, i32 %b, i32 %c, i32 %d) {
|
|||
;CHECK: smlal r12, lr, r3, r2
|
||||
;CHECK-V7: smull [[RDLO:r[0-9]+]], [[RDHI:r[0-9]+]], r1, r0
|
||||
;CHECK-V7: smlal [[RDLO]], [[RDHI]], [[Rn:r[0-9]+]], [[Rm:r[0-9]+]]
|
||||
;CHECK-V7-THUMB: smull [[RDLO:r[0-9]+]], [[RDHI:r[0-9]+]], r1, r0
|
||||
;CHECK-V7-THUMB: smlal [[RDLO]], [[RDHI]], [[Rn:r[0-9]+]], [[Rm:r[0-9]+]]
|
||||
;CHECK-V6-THUMB2: smull [[RDLO:r[0-9]+]], [[RDHI:r[0-9]+]], r1, r0
|
||||
;CHECK-V6-THUMB2: smlal [[RDLO]], [[RDHI]], [[Rn:r[0-9]+]], [[Rm:r[0-9]+]]
|
||||
;CHECK-T2-DSP: smull [[RDLO:r[0-9]+]], [[RDHI:r[0-9]+]], r1, r0
|
||||
;CHECK-T2-DSP: smlal [[RDLO]], [[RDHI]], [[Rn:r[0-9]+]], [[Rm:r[0-9]+]]
|
||||
%conv = sext i32 %a to i64
|
||||
%conv1 = sext i32 %b to i64
|
||||
%mul = mul nsw i64 %conv1, %conv
|
||||
|
@ -172,18 +163,12 @@ define i64 @MACLongTest9(i32 %lhs, i32 %rhs, i32 %lo, i32 %hi) {
|
|||
;CHECK-V7-BE: umaal [[RDLO:r[0-9]+]], [[RDHI:r[0-9]+]], [[LHS:r[0-9]+]], [[RHS:r[0-9]+]]
|
||||
;CHECK-V7-BE: mov r0, [[RDHI]]
|
||||
;CHECK-V7-BE: mov r1, [[RDLO]]
|
||||
;CHECK-V6-THUMB2: umaal [[RDLO:r[0-9]+]], [[RDHI:r[0-9]+]], [[LHS:r[0-9]+]], [[RHS:r[0-9]+]]
|
||||
;CHECK-V6-THUMB2: mov r0, [[RDLO]]
|
||||
;CHECK-V6-THUMB2: mov r1, [[RDHI]]
|
||||
;CHECK-V7-THUMB: umaal [[RDLO:r[0-9]+]], [[RDHI:r[0-9]+]], [[LHS:r[0-9]+]], [[RHS:r[0-9]+]]
|
||||
;CHECK-V7-THUMB: mov r0, [[RDLO]]
|
||||
;CHECK-V7-THUMB: mov r1, [[RDHI]]
|
||||
;CHECK-T2-DSP: umaal [[RDLO:r[0-9]+]], [[RDHI:r[0-9]+]], [[LHS:r[0-9]+]], [[RHS:r[0-9]+]]
|
||||
;CHECK-T2-DSP-NEXT: mov r0, [[RDLO]]
|
||||
;CHECK-T2-DSP-NEXT: mov r1, [[RDHI]]
|
||||
;CHECK-V7-THUMB-BE: umaal [[RDLO:r[0-9]+]], [[RDHI:r[0-9]+]], [[LHS:r[0-9]+]], [[RHS:r[0-9]+]]
|
||||
;CHECK-V7-THUMB-BE: mov r0, [[RDHI]]
|
||||
;CHECK-V7-THUMB-BE: mov r1, [[RDLO]]
|
||||
;CHECK-V7EM-THUMB: umaal [[RDLO:r[0-9]+]], [[RDHI:r[0-9]+]], [[LHS:r[0-9]+]], [[RHS:r[0-9]+]]
|
||||
;CHECK-V7EM-THUMB: mov r0, [[RDLO]]
|
||||
;CHECK-V7EM-THUMB: mov r1, [[RDHI]]
|
||||
;CHECK-NOT:umaal
|
||||
;CHECK-V6-THUMB-NOT: umaal
|
||||
;CHECK-V6M-THUMB-NOT: umaal
|
||||
|
@ -206,18 +191,12 @@ define i64 @MACLongTest10(i32 %lhs, i32 %rhs, i32 %lo, i32 %hi) {
|
|||
;CHECK-V7-BE: umaal [[RDLO:r[0-9]+]], [[RDHI:r[0-9]+]], [[LHS:r[0-9]+]], [[RHS:r[0-9]+]]
|
||||
;CHECK-V7-BE: mov r0, [[RDHI]]
|
||||
;CHECK-V7-BE: mov r1, [[RDLO]]
|
||||
;CHECK-V6-THUMB2: umaal [[RDLO:r[0-9]+]], [[RDHI:r[0-9]+]], [[LHS:r[0-9]+]], [[RHS:r[0-9]+]]
|
||||
;CHECK-V6-THUMB2: mov r0, [[RDLO]]
|
||||
;CHECK-V6-THUMB2: mov r1, [[RDHI]]
|
||||
;CHECK-V7-THUMB: umaal [[RDLO:r[0-9]+]], [[RDHI:r[0-9]+]], [[LHS:r[0-9]+]], [[RHS:r[0-9]+]]
|
||||
;CHECK-V7-THUMB: mov r0, [[RDLO]]
|
||||
;CHECK-V7-THUMB: mov r1, [[RDHI]]
|
||||
;CHECK-T2-DSP: umaal r2, r3, r1, r0
|
||||
;CHECK-T2-DSP-NEXT: mov r0, r2
|
||||
;CHECK-T2-DSP-NEXT: mov r1, r3
|
||||
;CHECK-V7-THUMB-BE: umaal [[RDLO:r[0-9]+]], [[RDHI:r[0-9]+]], [[LHS:r[0-9]+]], [[RHS:r[0-9]+]]
|
||||
;CHECK-V7-THUMB-BE: mov r0, [[RDHI]]
|
||||
;CHECK-V7-THUMB-BE: mov r1, [[RDLO]]
|
||||
;CHECK-V7EM-THUMB: umaal [[RDLO:r[0-9]+]], [[RDHI:r[0-9]+]], [[LHS:r[0-9]+]], [[RHS:r[0-9]+]]
|
||||
;CHECK-V7EM-THUMB: mov r0, [[RDLO]]
|
||||
;CHECK-V7EM-THUMB: mov r1, [[RDHI]]
|
||||
;CHECK-NOT:umaal
|
||||
;CHECK-V6-THUMB-NOT:umaal
|
||||
;CHECK-V6M-THUMB-NOT: umaal
|
||||
|
@ -231,3 +210,188 @@ define i64 @MACLongTest10(i32 %lhs, i32 %rhs, i32 %lo, i32 %hi) {
|
|||
%add2 = add i64 %add, %mul
|
||||
ret i64 %add2
|
||||
}
|
||||
|
||||
define i64 @MACLongTest11(i16 %a, i16 %b, i64 %c) {
|
||||
;CHECK-LABEL: MACLongTest11:
|
||||
;CHECK-T2-DSP-NOT: sxth
|
||||
;CHECK-T2-DSP: smlalbb r3, r2
|
||||
;CHECK-T2-DSP-NEXT: mov r0, r3
|
||||
;CHECK-T2-DSP-NEXT: mov r1, r2
|
||||
;CHECK-V5TE-NOT: sxth
|
||||
;CHECK-V5TE: smlalbb r3, r2
|
||||
;CHECK-V5TE-NEXT: mov r0, r3
|
||||
;CHECK-V5TE-NEXT: mov r1, r2
|
||||
;CHECK-V7-LE-NOT: sxth
|
||||
;CHECK-V7-LE: smlalbb r3, r2
|
||||
;CHECK-V7-LE-NEXT: mov r0, r3
|
||||
;CHECK-V7-LE-NEXT: mov r1, r2
|
||||
;CHECK-V7-THUMB-BE: smlalbb r2, r3
|
||||
;CHECK-V7-THUMB-BE-NEXT: mov r0, r3
|
||||
;CHECK-V7-THUMB-BE-NEXT: mov r1, r2
|
||||
;CHECK-LE-NOT: smlalbb
|
||||
;CHECK-BE-NOT: smlalbb
|
||||
;CHECK-V6M-THUMB-NOT: smlalbb
|
||||
;CHECK-V7M-THUMB-NOT: smlalbb
|
||||
%conv = sext i16 %a to i32
|
||||
%conv1 = sext i16 %b to i32
|
||||
%mul = mul nsw i32 %conv1, %conv
|
||||
%conv2 = sext i32 %mul to i64
|
||||
%add = add nsw i64 %conv2, %c
|
||||
ret i64 %add
|
||||
}
|
||||
|
||||
define i64 @MACLongTest12(i16 %b, i32 %t, i64 %c) {
|
||||
;CHECK-LABEL: MACLongTest12:
|
||||
;CHECK-T2-DSP-NOT: sxth
|
||||
;CHECK-T2-DSP-NOT: {{asr|lsr}}
|
||||
;CHECK-T2-DSP: smlalbt r3, r2, r0, r1
|
||||
;CHECK-T2-DSP-NEXT: mov r0, r3
|
||||
;CHECK-T2-DSP-NEXT: mov r1, r2
|
||||
;CHECK-T2-DSP-NOT: sxth
|
||||
;CHECK-V5TE-NOT: sxth
|
||||
;CHECK-V5TE-NOT: {{asr|lsr}}
|
||||
;CHECK-V5TE: smlalbt r3, r2, r0, r1
|
||||
;CHECK-V5TE-NEXT: mov r0, r3
|
||||
;CHECK-V5TE-NEXT: mov r1, r2
|
||||
;CHECK-V7-LE-NOT: sxth
|
||||
;CHECK-V7-LE-NOT: {{asr|lsr}}
|
||||
;CHECK-V7-LE: smlalbt r3, r2, r0, r1
|
||||
;CHECK-V7-LE-NEXT: mov r0, r3
|
||||
;CHECK-V7-LE-NEXT: mov r1, r2
|
||||
;CHECK-V7-THUMB-BE: smlalbt r2, r3,
|
||||
;CHECK-V7-THUMB-BE-NEXT: mov r0, r3
|
||||
;CHECK-V7-THUMB-BE-NEXT: mov r1, r2
|
||||
;CHECK-LE-NOT: smlalbt
|
||||
;CHECK-BE-NOT: smlalbt
|
||||
;CHECK-V6M-THUMB-NOT: smlalbt
|
||||
;CHECK-V7M-THUMB-NOT: smlalbt
|
||||
%conv0 = sext i16 %b to i32
|
||||
%conv1 = ashr i32 %t, 16
|
||||
%mul = mul nsw i32 %conv0, %conv1
|
||||
%conv2 = sext i32 %mul to i64
|
||||
%add = add nsw i64 %conv2, %c
|
||||
ret i64 %add
|
||||
}
|
||||
|
||||
define i64 @MACLongTest13(i32 %t, i16 %b, i64 %c) {
|
||||
;CHECK-LABEL: MACLongTest13:
|
||||
;CHECK-T2-DSP-NOT: sxth
|
||||
;CHECK-T2-DSP-NOT: {{asr|lsr}}
|
||||
;CHECK-T2-DSP: smlaltb r3, r2, r0, r1
|
||||
;CHECK-T2-DSP-NEXT: mov r0, r3
|
||||
;CHECK-T2-DSP-NEXT: mov r1, r2
|
||||
;CHECK-V5TE-NOT: sxth
|
||||
;CHECK-V5TE-NOT: {{asr|lsr}}
|
||||
;CHECK-V5TE: smlaltb r3, r2, r0, r1
|
||||
;CHECK-V5TE-NEXT: mov r0, r3
|
||||
;CHECK-V5TE-NEXT: mov r1, r2
|
||||
;CHECK-V7-LE-NOT: sxth
|
||||
;CHECK-V7-LE-NOT: {{asr|lsr}}
|
||||
;CHECK-V7-LE: smlaltb r3, r2, r0, r1
|
||||
;CHECK-V7-LE-NEXT: mov r0, r3
|
||||
;CHECK-V7-LE-NEXT: mov r1, r2
|
||||
;CHECK-V7-THUMB-BE: smlaltb r2, r3, r0, r1
|
||||
;CHECK-V7-THUMB-BE-NEXT: mov r0, r3
|
||||
;CHECK-V7-THUMB-BE-NEXT: mov r1, r2
|
||||
;CHECK-LE-NOT: smlaltb
|
||||
;CHECK-BE-NOT: smlaltb
|
||||
;CHECK-V6M-THUMB-NOT: smlaltb
|
||||
;CHECK-V7M-THUMB-NOT: smlaltb
|
||||
%conv0 = ashr i32 %t, 16
|
||||
%conv1= sext i16 %b to i32
|
||||
%mul = mul nsw i32 %conv0, %conv1
|
||||
%conv2 = sext i32 %mul to i64
|
||||
%add = add nsw i64 %conv2, %c
|
||||
ret i64 %add
|
||||
}
|
||||
|
||||
define i64 @MACLongTest14(i32 %a, i32 %b, i64 %c) {
|
||||
;CHECK-LABEL: MACLongTest14:
|
||||
;CHECK-T2-DSP-NOT: {{asr|lsr}}
|
||||
;CHECK-T2-DSP: smlaltt r3, r2,
|
||||
;CHECK-T2-DSP-NEXT: mov r0, r3
|
||||
;CHECK-T2-DSP-NEXT: mov r1, r2
|
||||
;CHECK-V5TE-NOT: {{asr|lsr}}
|
||||
;CHECK-V5TE: smlaltt r3, r2,
|
||||
;CHECK-V5TE-NEXT: mov r0, r3
|
||||
;CHECK-V5TE-NEXT: mov r1, r2
|
||||
;CHECK-V7-LE-NOT: {{asr|lsr}}
|
||||
;CHECK-V7-LE: smlaltt r3, r2,
|
||||
;CHECK-V7-LE-NEXT: mov r0, r3
|
||||
;CHECK-V7-LE-NEXT: mov r1, r2
|
||||
;CHECK-V7-THUMB-BE: smlaltt r2, r3,
|
||||
;CHECK-V7-THUMB-BE-NEXT: mov r0, r3
|
||||
;CHECK-V7-THUMB-BE-NEXT: mov r1, r2
|
||||
;CHECK-LE-NOT: smlaltt
|
||||
;CHECK-BE-NOT: smlaltt
|
||||
;CHECK-V6M-THUMB-NOT: smlaltt
|
||||
;CHECK-V7M-THUMB-NOT: smlaltt
|
||||
%conv0 = ashr i32 %a, 16
|
||||
%conv1 = ashr i32 %b, 16
|
||||
%mul = mul nsw i32 %conv1, %conv0
|
||||
%conv2 = sext i32 %mul to i64
|
||||
%add = add nsw i64 %conv2, %c
|
||||
ret i64 %add
|
||||
}
|
||||
|
||||
@global_b = external global i16, align 2
|
||||
;CHECK-LABEL: MACLongTest15
|
||||
;CHECK-T2-DSP-NOT: {{asr|lsr}}
|
||||
;CHECK-T2-DSP: smlaltb r3, r2, r0, r1
|
||||
;CHECK-T2-DSP-NEXT: mov r0, r3
|
||||
;CHECK-T2-DSP-NEXT: mov r1, r2
|
||||
;CHECK-V5TE-NOT: {{asr|lsr}}
|
||||
;CHECK-V5TE: smlaltb r3, r2, r0, r1
|
||||
;CHECK-V5TE-NEXT: mov r0, r3
|
||||
;CHECK-V5TE-NEXT: mov r1, r2
|
||||
;CHECK-V7-LE-NOT: {{asr|lsr}}
|
||||
;CHECK-V7-LE: smlaltb r3, r2, r0, r1
|
||||
;CHECK-V7-LE-NEXT: mov r0, r3
|
||||
;CHECK-V7-LE-NEXT: mov r1, r2
|
||||
;CHECK-V7-THUMB-BE: smlaltb r2, r3, r0, r1
|
||||
;CHECK-V7-THUMB-BE-NEXT: mov r0, r3
|
||||
;CHECK-V7-THUMB-BE-NEXT: mov r1, r2
|
||||
;CHECK-LE-NOT: smlaltb
|
||||
;CHECK-BE-NOT: smlaltb
|
||||
;CHECK-V6M-THUMB-NOT: smlaltb
|
||||
;CHECK-V7M-THUMB-NOT: smlaltb
|
||||
define i64 @MACLongTest15(i32 %t, i64 %acc) {
|
||||
entry:
|
||||
%0 = load i16, i16* @global_b, align 2
|
||||
%conv = sext i16 %0 to i32
|
||||
%shr = ashr i32 %t, 16
|
||||
%mul = mul nsw i32 %shr, %conv
|
||||
%conv1 = sext i32 %mul to i64
|
||||
%add = add nsw i64 %conv1, %acc
|
||||
ret i64 %add
|
||||
}
|
||||
|
||||
;CHECK-LABEL: MACLongTest16
|
||||
;CHECK-T2-DSP-NOT: {{asr|lsr}}
|
||||
;CHECK-T2-DSP: smlalbt r3, r2, r1, r0
|
||||
;CHECK-T2-DSP-NEXT: mov r0, r3
|
||||
;CHECK-T2-DSP-NEXT: mov r1, r2
|
||||
;CHECK-V5TE-NOT: {{asr|lsr}}
|
||||
;CHECK-V5TE: smlalbt r3, r2, r1, r0
|
||||
;CHECK-V5TE-NEXT: mov r0, r3
|
||||
;CHECK-V5TE-NEXT: mov r1, r2
|
||||
;CHECK-V7-LE: smlalbt r3, r2, r1, r0
|
||||
;CHECK-V7-LE-NEXT: mov r0, r3
|
||||
;CHECK-V7-LE-NEXT: mov r1, r2
|
||||
;CHECK-V7-THUMB-BE: smlalbt r2, r3, r1, r0
|
||||
;CHECK-V7-THUMB-BE-NEXT: mov r0, r3
|
||||
;CHECK-V7-THUMB-BE-NEXT: mov r1, r2
|
||||
;CHECK-LE-NOT: smlalbt
|
||||
;CHECK-BE-NOT: smlalbt
|
||||
;CHECK-V6M-THUMB-NOT: smlalbt
|
||||
;CHECK-V7M-THUMB-NOT: smlalbt
|
||||
define i64 @MACLongTest16(i32 %t, i64 %acc) {
|
||||
entry:
|
||||
%0 = load i16, i16* @global_b, align 2
|
||||
%conv = sext i16 %0 to i32
|
||||
%shr = ashr i32 %t, 16
|
||||
%mul = mul nsw i32 %conv, %shr
|
||||
%conv1 = sext i32 %mul to i64
|
||||
%add = add nsw i64 %conv1, %acc
|
||||
ret i64 %add
|
||||
}
|
||||
|
|
Loading…
Reference in New Issue