forked from OSchip/llvm-project
[Intrinsic] Signed Fixed Point Multiplication Intrinsic
Add an intrinsic that takes 2 signed integers with the scale of them provided as the third argument and performs fixed point multiplication on them. This is a part of implementing fixed point arithmetic in clang where some of the more complex operations will be implemented as intrinsics. Differential Revision: https://reviews.llvm.org/D54719 llvm-svn: 348912
This commit is contained in:
parent
2000170e27
commit
118e53fd63
|
@ -12772,6 +12772,76 @@ Examples
|
|||
%res = call i4 @llvm.usub.sat.i4(i4 2, i4 6) ; %res = 0
|
||||
|
||||
|
||||
Fixed Point Arithmetic Intrinsics
|
||||
---------------------------------
|
||||
|
||||
A fixed point number represents a real data type for a number that has a fixed
|
||||
number of digits after a radix point (equivalent to the decimal point '.').
|
||||
The number of digits after the radix point is referred as the ``scale``. These
|
||||
are useful for representing fractional values to a specific precision. The
|
||||
following intrinsics perform fixed point arithmetic operations on 2 operands
|
||||
of the same scale, specified as the third argument.
|
||||
|
||||
|
||||
'``llvm.smul.fix.*``' Intrinsics
|
||||
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
|
||||
|
||||
Syntax
|
||||
"""""""
|
||||
|
||||
This is an overloaded intrinsic. You can use ``llvm.smul.fix``
|
||||
on any integer bit width or vectors of integers.
|
||||
|
||||
::
|
||||
|
||||
declare i16 @llvm.smul.fix.i16(i16 %a, i16 %b, i32 %scale)
|
||||
declare i32 @llvm.smul.fix.i32(i32 %a, i32 %b, i32 %scale)
|
||||
declare i64 @llvm.smul.fix.i64(i64 %a, i64 %b, i32 %scale)
|
||||
declare <4 x i32> @llvm.smul.fix.v4i32(<4 x i32> %a, <4 x i32> %b, i32 %scale)
|
||||
|
||||
Overview
|
||||
"""""""""
|
||||
|
||||
The '``llvm.smul.fix``' family of intrinsic functions perform signed
|
||||
fixed point multiplication on 2 arguments of the same scale.
|
||||
|
||||
Arguments
|
||||
""""""""""
|
||||
|
||||
The arguments (%a and %b) and the result may be of integer types of any bit
|
||||
width, but they must have the same bit width. ``%a`` and ``%b`` are the two
|
||||
values that will undergo signed fixed point multiplication. The argument
|
||||
``%scale`` represents the scale of both operands, and must be a constant
|
||||
integer.
|
||||
|
||||
Semantics:
|
||||
""""""""""
|
||||
|
||||
This operation performs fixed point multiplication on the 2 arguments of a
|
||||
specified scale. The result will also be returned in the same scale specified
|
||||
in the third argument.
|
||||
|
||||
If the result value cannot be precisely represented in the given scale, the
|
||||
value is rounded up or down to the closest representable value. The rounding
|
||||
direction is unspecified.
|
||||
|
||||
It is undefined behavior if the source value does not fit within the range of
|
||||
the fixed point type.
|
||||
|
||||
|
||||
Examples
|
||||
"""""""""
|
||||
|
||||
.. code-block:: llvm
|
||||
|
||||
%res = call i4 @llvm.smul.fix.i4(i4 3, i4 2, i32 0) ; %res = 6 (2 x 3 = 6)
|
||||
%res = call i4 @llvm.smul.fix.i4(i4 3, i4 2, i32 1) ; %res = 3 (1.5 x 1 = 1.5)
|
||||
%res = call i4 @llvm.smul.fix.i4(i4 3, i4 -2, i32 1) ; %res = -3 (1.5 x -1 = -1.5)
|
||||
|
||||
; The result in the following could be rounded up to -2 or down to -2.5
|
||||
%res = call i4 @llvm.smul.fix.i4(i4 3, i4 -3, i32 1) ; %res = -5 (or -4) (1.5 x -1.5 = -2.25)
|
||||
|
||||
|
||||
Specialised Arithmetic Intrinsics
|
||||
---------------------------------
|
||||
|
||||
|
|
|
@ -272,6 +272,13 @@ namespace ISD {
|
|||
/// resulting value is this minimum value.
|
||||
SSUBSAT, USUBSAT,
|
||||
|
||||
/// RESULT = SMULFIX(LHS, RHS, SCALE) - Perform fixed point multiplication on
|
||||
/// 2 integers with the same width and scale. SCALE represents the scale of
|
||||
/// both operands as fixed point numbers. This SCALE parameter must be a
|
||||
/// constant integer. A scale of zero is effectively performing
|
||||
/// multiplication on 2 integers.
|
||||
SMULFIX,
|
||||
|
||||
/// Simple binary floating point operators.
|
||||
FADD, FSUB, FMUL, FDIV, FREM,
|
||||
|
||||
|
|
|
@ -805,6 +805,38 @@ public:
|
|||
return OpActions[(unsigned)VT.getSimpleVT().SimpleTy][Op];
|
||||
}
|
||||
|
||||
/// Custom method defined by each target to indicate if an operation which
|
||||
/// may require a scale is supported natively by the target.
|
||||
/// If not, the operation is illegal.
|
||||
virtual bool isSupportedFixedPointOperation(unsigned Op, EVT VT,
|
||||
unsigned Scale) const {
|
||||
return false;
|
||||
}
|
||||
|
||||
/// Some fixed point operations may be natively supported by the target but
|
||||
/// only for specific scales. This method allows for checking
|
||||
/// if the width is supported by the target for a given operation that may
|
||||
/// depend on scale.
|
||||
LegalizeAction getFixedPointOperationAction(unsigned Op, EVT VT,
|
||||
unsigned Scale) const {
|
||||
auto Action = getOperationAction(Op, VT);
|
||||
if (Action != Legal)
|
||||
return Action;
|
||||
|
||||
// This operation is supported in this type but may only work on specific
|
||||
// scales.
|
||||
bool Supported;
|
||||
switch (Op) {
|
||||
default:
|
||||
llvm_unreachable("Unexpected fixed point operation.");
|
||||
case ISD::SMULFIX:
|
||||
Supported = isSupportedFixedPointOperation(Op, VT, Scale);
|
||||
break;
|
||||
}
|
||||
|
||||
return Supported ? Action : Expand;
|
||||
}
|
||||
|
||||
LegalizeAction getStrictFPOperationAction(unsigned Op, EVT VT) const {
|
||||
unsigned EqOpc;
|
||||
switch (Op) {
|
||||
|
@ -3775,10 +3807,15 @@ public:
|
|||
SDValue Index) const;
|
||||
|
||||
/// Method for building the DAG expansion of ISD::[US][ADD|SUB]SAT. This
|
||||
/// method accepts integers or vectors of integers as its arguments.
|
||||
/// method accepts integers as its arguments.
|
||||
SDValue getExpandedSaturationAdditionSubtraction(SDNode *Node,
|
||||
SelectionDAG &DAG) const;
|
||||
|
||||
/// Method for building the DAG expansion of ISD::SMULFIX. This method accepts
|
||||
/// integers as its arguments.
|
||||
SDValue getExpandedFixedPointMultiplication(SDNode *Node,
|
||||
SelectionDAG &DAG) const;
|
||||
|
||||
//===--------------------------------------------------------------------===//
|
||||
// Instruction Emitting Hooks
|
||||
//
|
||||
|
|
|
@ -811,7 +811,7 @@ def int_umul_with_overflow : Intrinsic<[llvm_anyint_ty, llvm_i1_ty],
|
|||
[LLVMMatchType<0>, LLVMMatchType<0>],
|
||||
[IntrNoMem, IntrSpeculatable]>;
|
||||
|
||||
//===------------------------- Fixed Point Intrinsics ---------------------===//
|
||||
//===------------------------- Saturation Arithmetic Intrinsics ---------------------===//
|
||||
//
|
||||
def int_sadd_sat : Intrinsic<[llvm_anyint_ty],
|
||||
[LLVMMatchType<0>, LLVMMatchType<0>],
|
||||
|
@ -826,6 +826,12 @@ def int_usub_sat : Intrinsic<[llvm_anyint_ty],
|
|||
[LLVMMatchType<0>, LLVMMatchType<0>],
|
||||
[IntrNoMem, IntrSpeculatable]>;
|
||||
|
||||
//===------------------------- Fixed Point Arithmetic Intrinsics ---------------------===//
|
||||
//
|
||||
def int_smul_fix : Intrinsic<[llvm_anyint_ty],
|
||||
[LLVMMatchType<0>, LLVMMatchType<0>, llvm_i32_ty],
|
||||
[IntrNoMem, IntrSpeculatable, Commutative]>;
|
||||
|
||||
//===------------------------- Memory Use Markers -------------------------===//
|
||||
//
|
||||
def int_lifetime_start : Intrinsic<[],
|
||||
|
|
|
@ -125,6 +125,9 @@ def SDTIntSatNoShOp : SDTypeProfile<1, 2, [ // ssat with no shift
|
|||
def SDTIntBinHiLoOp : SDTypeProfile<2, 2, [ // mulhi, mullo, sdivrem, udivrem
|
||||
SDTCisSameAs<0, 1>, SDTCisSameAs<0, 2>, SDTCisSameAs<0, 3>,SDTCisInt<0>
|
||||
]>;
|
||||
def SDTIntScaledBinOp : SDTypeProfile<1, 3, [ // smulfix
|
||||
SDTCisSameAs<0, 1>, SDTCisSameAs<0, 2>, SDTCisInt<0>, SDTCisInt<3>
|
||||
]>;
|
||||
|
||||
def SDTFPBinOp : SDTypeProfile<1, 2, [ // fadd, fmul, etc.
|
||||
SDTCisSameAs<0, 1>, SDTCisSameAs<0, 2>, SDTCisFP<0>
|
||||
|
@ -382,6 +385,7 @@ def saddsat : SDNode<"ISD::SADDSAT" , SDTIntBinOp, [SDNPCommutative]>;
|
|||
def uaddsat : SDNode<"ISD::UADDSAT" , SDTIntBinOp, [SDNPCommutative]>;
|
||||
def ssubsat : SDNode<"ISD::SSUBSAT" , SDTIntBinOp>;
|
||||
def usubsat : SDNode<"ISD::USUBSAT" , SDTIntBinOp>;
|
||||
def smulfix : SDNode<"ISD::SMULFIX" , SDTIntScaledBinOp, [SDNPCommutative]>;
|
||||
|
||||
def sext_inreg : SDNode<"ISD::SIGN_EXTEND_INREG", SDTExtInreg>;
|
||||
def sext_invec : SDNode<"ISD::SIGN_EXTEND_VECTOR_INREG", SDTExtInvec>;
|
||||
|
|
|
@ -1128,6 +1128,12 @@ void SelectionDAGLegalize::LegalizeOp(SDNode *Node) {
|
|||
Action = TLI.getOperationAction(Node->getOpcode(), Node->getValueType(0));
|
||||
break;
|
||||
}
|
||||
case ISD::SMULFIX: {
|
||||
unsigned Scale = Node->getConstantOperandVal(2);
|
||||
Action = TLI.getFixedPointOperationAction(Node->getOpcode(),
|
||||
Node->getValueType(0), Scale);
|
||||
break;
|
||||
}
|
||||
case ISD::MSCATTER:
|
||||
Action = TLI.getOperationAction(Node->getOpcode(),
|
||||
cast<MaskedScatterSDNode>(Node)->getValue().getValueType());
|
||||
|
@ -3276,6 +3282,10 @@ bool SelectionDAGLegalize::ExpandNode(SDNode *Node) {
|
|||
Results.push_back(TLI.getExpandedSaturationAdditionSubtraction(Node, DAG));
|
||||
break;
|
||||
}
|
||||
case ISD::SMULFIX: {
|
||||
Results.push_back(TLI.getExpandedFixedPointMultiplication(Node, DAG));
|
||||
break;
|
||||
}
|
||||
case ISD::SADDO:
|
||||
case ISD::SSUBO: {
|
||||
SDValue LHS = Node->getOperand(0);
|
||||
|
|
|
@ -147,6 +147,7 @@ void DAGTypeLegalizer::PromoteIntegerResult(SDNode *N, unsigned ResNo) {
|
|||
case ISD::UADDSAT:
|
||||
case ISD::SSUBSAT:
|
||||
case ISD::USUBSAT: Res = PromoteIntRes_ADDSUBSAT(N); break;
|
||||
case ISD::SMULFIX: Res = PromoteIntRes_SMULFIX(N); break;
|
||||
|
||||
case ISD::ATOMIC_LOAD:
|
||||
Res = PromoteIntRes_Atomic0(cast<AtomicSDNode>(N)); break;
|
||||
|
@ -625,6 +626,16 @@ SDValue DAGTypeLegalizer::PromoteIntRes_ADDSUBSAT(SDNode *N) {
|
|||
return DAG.getNode(ShiftOp, dl, PromotedType, Result, ShiftAmount);
|
||||
}
|
||||
|
||||
SDValue DAGTypeLegalizer::PromoteIntRes_SMULFIX(SDNode *N) {
|
||||
// Can just promote the operands then continue with operation.
|
||||
SDLoc dl(N);
|
||||
SDValue Op1Promoted = SExtPromotedInteger(N->getOperand(0));
|
||||
SDValue Op2Promoted = SExtPromotedInteger(N->getOperand(1));
|
||||
EVT PromotedType = Op1Promoted.getValueType();
|
||||
return DAG.getNode(N->getOpcode(), dl, PromotedType, Op1Promoted, Op2Promoted,
|
||||
N->getOperand(2));
|
||||
}
|
||||
|
||||
SDValue DAGTypeLegalizer::PromoteIntRes_SADDSUBO(SDNode *N, unsigned ResNo) {
|
||||
if (ResNo == 1)
|
||||
return PromoteIntRes_Overflow(N);
|
||||
|
@ -1056,6 +1067,8 @@ bool DAGTypeLegalizer::PromoteIntegerOperand(SDNode *N, unsigned OpNo) {
|
|||
case ISD::RETURNADDR: Res = PromoteIntOp_FRAMERETURNADDR(N); break;
|
||||
|
||||
case ISD::PREFETCH: Res = PromoteIntOp_PREFETCH(N, OpNo); break;
|
||||
|
||||
case ISD::SMULFIX: Res = PromoteIntOp_SMULFIX(N); break;
|
||||
}
|
||||
|
||||
// If the result is null, the sub-method took care of registering results etc.
|
||||
|
@ -1415,6 +1428,12 @@ SDValue DAGTypeLegalizer::PromoteIntOp_ADDSUBCARRY(SDNode *N, unsigned OpNo) {
|
|||
return SDValue(DAG.UpdateNodeOperands(N, LHS, RHS, Carry), 0);
|
||||
}
|
||||
|
||||
SDValue DAGTypeLegalizer::PromoteIntOp_SMULFIX(SDNode *N) {
|
||||
SDValue Op2 = ZExtPromotedInteger(N->getOperand(2));
|
||||
return SDValue(
|
||||
DAG.UpdateNodeOperands(N, N->getOperand(0), N->getOperand(1), Op2), 0);
|
||||
}
|
||||
|
||||
SDValue DAGTypeLegalizer::PromoteIntOp_FRAMERETURNADDR(SDNode *N) {
|
||||
// Promote the RETURNADDR/FRAMEADDR argument to a supported integer width.
|
||||
SDValue Op = ZExtPromotedInteger(N->getOperand(0));
|
||||
|
@ -1571,6 +1590,7 @@ void DAGTypeLegalizer::ExpandIntegerResult(SDNode *N, unsigned ResNo) {
|
|||
case ISD::UADDSAT:
|
||||
case ISD::SSUBSAT:
|
||||
case ISD::USUBSAT: ExpandIntRes_ADDSUBSAT(N, Lo, Hi); break;
|
||||
case ISD::SMULFIX: ExpandIntRes_SMULFIX(N, Lo, Hi); break;
|
||||
}
|
||||
|
||||
// If Lo/Hi is null, the sub-method took care of registering results etc.
|
||||
|
@ -2539,6 +2559,95 @@ void DAGTypeLegalizer::ExpandIntRes_ADDSUBSAT(SDNode *N, SDValue &Lo,
|
|||
SplitInteger(Result, Lo, Hi);
|
||||
}
|
||||
|
||||
void DAGTypeLegalizer::ExpandIntRes_SMULFIX(SDNode *N, SDValue &Lo,
|
||||
SDValue &Hi) {
|
||||
SDLoc dl(N);
|
||||
EVT VT = N->getValueType(0);
|
||||
SDValue LHS = N->getOperand(0);
|
||||
SDValue RHS = N->getOperand(1);
|
||||
uint64_t Scale = N->getConstantOperandVal(2);
|
||||
if (!Scale) {
|
||||
SDValue Result = DAG.getNode(ISD::MUL, dl, VT, LHS, RHS);
|
||||
SplitInteger(Result, Lo, Hi);
|
||||
return;
|
||||
}
|
||||
|
||||
EVT NVT = TLI.getTypeToTransformTo(*DAG.getContext(), VT);
|
||||
SDValue LL, LH, RL, RH;
|
||||
GetExpandedInteger(LHS, LL, LH);
|
||||
GetExpandedInteger(RHS, RL, RH);
|
||||
SmallVector<SDValue, 4> Result;
|
||||
|
||||
if (!TLI.expandMUL_LOHI(ISD::SMUL_LOHI, VT, dl, LHS, RHS, Result, NVT, DAG,
|
||||
TargetLowering::MulExpansionKind::OnlyLegalOrCustom,
|
||||
LL, LH, RL, RH)) {
|
||||
report_fatal_error("Unable to expand SMUL_FIX using SMUL_LOHI.");
|
||||
return;
|
||||
}
|
||||
|
||||
unsigned VTSize = VT.getScalarSizeInBits();
|
||||
unsigned NVTSize = NVT.getScalarSizeInBits();
|
||||
EVT ShiftTy = TLI.getShiftAmountTy(NVT, DAG.getDataLayout());
|
||||
|
||||
// Shift whole amount by scale.
|
||||
SDValue ResultLL = Result[0];
|
||||
SDValue ResultLH = Result[1];
|
||||
SDValue ResultHL = Result[2];
|
||||
SDValue ResultHH = Result[3];
|
||||
|
||||
// After getting the multplication result in 4 parts, we need to perform a
|
||||
// shift right by the amount of the scale to get the result in that scale.
|
||||
// Let's say we multiply 2 64 bit numbers. The resulting value can be held in
|
||||
// 128 bits that are cut into 4 32-bit parts:
|
||||
//
|
||||
// HH HL LH LL
|
||||
// |---32---|---32---|---32---|---32---|
|
||||
// 128 96 64 32 0
|
||||
//
|
||||
// |------VTSize-----|
|
||||
//
|
||||
// |NVTSize-|
|
||||
//
|
||||
// The resulting Lo and Hi will only need to be one of these 32-bit parts
|
||||
// after shifting.
|
||||
if (Scale < NVTSize) {
|
||||
// If the scale is less than the size of the VT we expand to, the Hi and
|
||||
// Lo of the result will be in the first 2 parts of the result after
|
||||
// shifting right. This only requires shifting by the scale as far as the
|
||||
// third part in the result (ResultHL).
|
||||
SDValue SRLAmnt = DAG.getConstant(Scale, dl, ShiftTy);
|
||||
SDValue SHLAmnt = DAG.getConstant(NVTSize - Scale, dl, ShiftTy);
|
||||
Lo = DAG.getNode(ISD::SRL, dl, NVT, ResultLL, SRLAmnt);
|
||||
Lo = DAG.getNode(ISD::OR, dl, NVT, Lo,
|
||||
DAG.getNode(ISD::SHL, dl, NVT, ResultLH, SHLAmnt));
|
||||
Hi = DAG.getNode(ISD::SRL, dl, NVT, ResultLH, SRLAmnt);
|
||||
Hi = DAG.getNode(ISD::OR, dl, NVT, Hi,
|
||||
DAG.getNode(ISD::SHL, dl, NVT, ResultHL, SHLAmnt));
|
||||
} else if (Scale == NVTSize) {
|
||||
// If the scales are equal, Lo and Hi are ResultLH and Result HL,
|
||||
// respectively. Avoid shifting to prevent undefined behavior.
|
||||
Lo = ResultLH;
|
||||
Hi = ResultHL;
|
||||
} else if (Scale < VTSize) {
|
||||
// If the scale is instead less than the old VT size, but greater than or
|
||||
// equal to the expanded VT size, the first part of the result (ResultLL) is
|
||||
// no longer a part of Lo because it would be scaled out anyway. Instead we
|
||||
// can start shifting right from the fourth part (ResultHH) to the second
|
||||
// part (ResultLH), and Result LH will be the new Lo.
|
||||
SDValue SRLAmnt = DAG.getConstant(Scale - NVTSize, dl, ShiftTy);
|
||||
SDValue SHLAmnt = DAG.getConstant(VTSize - Scale, dl, ShiftTy);
|
||||
Lo = DAG.getNode(ISD::SRL, dl, NVT, ResultLH, SRLAmnt);
|
||||
Lo = DAG.getNode(ISD::OR, dl, NVT, Lo,
|
||||
DAG.getNode(ISD::SHL, dl, NVT, ResultHL, SHLAmnt));
|
||||
Hi = DAG.getNode(ISD::SRL, dl, NVT, ResultHL, SRLAmnt);
|
||||
Hi = DAG.getNode(ISD::OR, dl, NVT, Hi,
|
||||
DAG.getNode(ISD::SHL, dl, NVT, ResultHH, SHLAmnt));
|
||||
} else {
|
||||
llvm_unreachable(
|
||||
"Expected the scale to be less than the width of the operands");
|
||||
}
|
||||
}
|
||||
|
||||
void DAGTypeLegalizer::ExpandIntRes_SADDSUBO(SDNode *Node,
|
||||
SDValue &Lo, SDValue &Hi) {
|
||||
SDValue LHS = Node->getOperand(0);
|
||||
|
|
|
@ -345,6 +345,7 @@ private:
|
|||
SDValue PromoteIntRes_VAARG(SDNode *N);
|
||||
SDValue PromoteIntRes_XMULO(SDNode *N, unsigned ResNo);
|
||||
SDValue PromoteIntRes_ADDSUBSAT(SDNode *N);
|
||||
SDValue PromoteIntRes_SMULFIX(SDNode *N);
|
||||
SDValue PromoteIntRes_FLT_ROUNDS(SDNode *N);
|
||||
|
||||
// Integer Operand Promotion.
|
||||
|
@ -378,6 +379,7 @@ private:
|
|||
SDValue PromoteIntOp_ADDSUBCARRY(SDNode *N, unsigned OpNo);
|
||||
SDValue PromoteIntOp_FRAMERETURNADDR(SDNode *N);
|
||||
SDValue PromoteIntOp_PREFETCH(SDNode *N, unsigned OpNo);
|
||||
SDValue PromoteIntOp_SMULFIX(SDNode *N);
|
||||
|
||||
void PromoteSetCCOperands(SDValue &LHS,SDValue &RHS, ISD::CondCode Code);
|
||||
|
||||
|
@ -433,6 +435,7 @@ private:
|
|||
void ExpandIntRes_UADDSUBO (SDNode *N, SDValue &Lo, SDValue &Hi);
|
||||
void ExpandIntRes_XMULO (SDNode *N, SDValue &Lo, SDValue &Hi);
|
||||
void ExpandIntRes_ADDSUBSAT (SDNode *N, SDValue &Lo, SDValue &Hi);
|
||||
void ExpandIntRes_SMULFIX (SDNode *N, SDValue &Lo, SDValue &Hi);
|
||||
|
||||
void ExpandIntRes_ATOMIC_LOAD (SDNode *N, SDValue &Lo, SDValue &Hi);
|
||||
|
||||
|
@ -688,6 +691,8 @@ private:
|
|||
SDValue ScalarizeVecRes_UNDEF(SDNode *N);
|
||||
SDValue ScalarizeVecRes_VECTOR_SHUFFLE(SDNode *N);
|
||||
|
||||
SDValue ScalarizeVecRes_SMULFIX(SDNode *N);
|
||||
|
||||
// Vector Operand Scalarization: <1 x ty> -> ty.
|
||||
bool ScalarizeVectorOperand(SDNode *N, unsigned OpNo);
|
||||
SDValue ScalarizeVecOp_BITCAST(SDNode *N);
|
||||
|
@ -723,6 +728,8 @@ private:
|
|||
void SplitVecRes_ExtVecInRegOp(SDNode *N, SDValue &Lo, SDValue &Hi);
|
||||
void SplitVecRes_StrictFPOp(SDNode *N, SDValue &Lo, SDValue &Hi);
|
||||
|
||||
void SplitVecRes_SMULFIX(SDNode *N, SDValue &Lo, SDValue &Hi);
|
||||
|
||||
void SplitVecRes_BITCAST(SDNode *N, SDValue &Lo, SDValue &Hi);
|
||||
void SplitVecRes_BUILD_VECTOR(SDNode *N, SDValue &Lo, SDValue &Hi);
|
||||
void SplitVecRes_CONCAT_VECTORS(SDNode *N, SDValue &Lo, SDValue &Hi);
|
||||
|
|
|
@ -414,6 +414,12 @@ SDValue VectorLegalizer::LegalizeOp(SDValue Op) {
|
|||
case ISD::USUBSAT:
|
||||
Action = TLI.getOperationAction(Node->getOpcode(), Node->getValueType(0));
|
||||
break;
|
||||
case ISD::SMULFIX: {
|
||||
unsigned Scale = Node->getConstantOperandVal(2);
|
||||
Action = TLI.getFixedPointOperationAction(Node->getOpcode(),
|
||||
Node->getValueType(0), Scale);
|
||||
break;
|
||||
}
|
||||
case ISD::FP_ROUND_INREG:
|
||||
Action = TLI.getOperationAction(Node->getOpcode(),
|
||||
cast<VTSDNode>(Node->getOperand(1))->getVT());
|
||||
|
|
|
@ -172,6 +172,9 @@ void DAGTypeLegalizer::ScalarizeVectorResult(SDNode *N, unsigned ResNo) {
|
|||
case ISD::STRICT_FTRUNC:
|
||||
R = ScalarizeVecRes_StrictFPOp(N);
|
||||
break;
|
||||
case ISD::SMULFIX:
|
||||
R = ScalarizeVecRes_SMULFIX(N);
|
||||
break;
|
||||
}
|
||||
|
||||
// If R is null, the sub-method took care of registering the result.
|
||||
|
@ -194,6 +197,14 @@ SDValue DAGTypeLegalizer::ScalarizeVecRes_TernaryOp(SDNode *N) {
|
|||
Op0.getValueType(), Op0, Op1, Op2);
|
||||
}
|
||||
|
||||
SDValue DAGTypeLegalizer::ScalarizeVecRes_SMULFIX(SDNode *N) {
|
||||
SDValue Op0 = GetScalarizedVector(N->getOperand(0));
|
||||
SDValue Op1 = GetScalarizedVector(N->getOperand(1));
|
||||
SDValue Op2 = N->getOperand(2);
|
||||
return DAG.getNode(N->getOpcode(), SDLoc(N), Op0.getValueType(), Op0, Op1,
|
||||
Op2);
|
||||
}
|
||||
|
||||
SDValue DAGTypeLegalizer::ScalarizeVecRes_StrictFPOp(SDNode *N) {
|
||||
EVT VT = N->getValueType(0).getVectorElementType();
|
||||
unsigned NumOpers = N->getNumOperands();
|
||||
|
@ -848,6 +859,9 @@ void DAGTypeLegalizer::SplitVectorResult(SDNode *N, unsigned ResNo) {
|
|||
case ISD::STRICT_FTRUNC:
|
||||
SplitVecRes_StrictFPOp(N, Lo, Hi);
|
||||
break;
|
||||
case ISD::SMULFIX:
|
||||
SplitVecRes_SMULFIX(N, Lo, Hi);
|
||||
break;
|
||||
}
|
||||
|
||||
// If Lo/Hi is null, the sub-method took care of registering results etc.
|
||||
|
@ -885,6 +899,20 @@ void DAGTypeLegalizer::SplitVecRes_TernaryOp(SDNode *N, SDValue &Lo,
|
|||
Op0Hi, Op1Hi, Op2Hi);
|
||||
}
|
||||
|
||||
void DAGTypeLegalizer::SplitVecRes_SMULFIX(SDNode *N, SDValue &Lo,
|
||||
SDValue &Hi) {
|
||||
SDValue LHSLo, LHSHi;
|
||||
GetSplitVector(N->getOperand(0), LHSLo, LHSHi);
|
||||
SDValue RHSLo, RHSHi;
|
||||
GetSplitVector(N->getOperand(1), RHSLo, RHSHi);
|
||||
SDLoc dl(N);
|
||||
SDValue Op2 = N->getOperand(2);
|
||||
|
||||
unsigned Opcode = N->getOpcode();
|
||||
Lo = DAG.getNode(Opcode, dl, LHSLo.getValueType(), LHSLo, RHSLo, Op2);
|
||||
Hi = DAG.getNode(Opcode, dl, LHSHi.getValueType(), LHSHi, RHSHi, Op2);
|
||||
}
|
||||
|
||||
void DAGTypeLegalizer::SplitVecRes_BITCAST(SDNode *N, SDValue &Lo,
|
||||
SDValue &Hi) {
|
||||
// We know the result is a vector. The input may be either a vector or a
|
||||
|
|
|
@ -5832,6 +5832,14 @@ SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I, unsigned Intrinsic) {
|
|||
setValue(&I, DAG.getNode(ISD::USUBSAT, sdl, Op1.getValueType(), Op1, Op2));
|
||||
return nullptr;
|
||||
}
|
||||
case Intrinsic::smul_fix: {
|
||||
SDValue Op1 = getValue(I.getArgOperand(0));
|
||||
SDValue Op2 = getValue(I.getArgOperand(1));
|
||||
SDValue Op3 = getValue(I.getArgOperand(2));
|
||||
setValue(&I,
|
||||
DAG.getNode(ISD::SMULFIX, sdl, Op1.getValueType(), Op1, Op2, Op3));
|
||||
return nullptr;
|
||||
}
|
||||
case Intrinsic::stacksave: {
|
||||
SDValue Op = getRoot();
|
||||
Res = DAG.getNode(
|
||||
|
|
|
@ -297,6 +297,7 @@ std::string SDNode::getOperationName(const SelectionDAG *G) const {
|
|||
case ISD::UADDSAT: return "uaddsat";
|
||||
case ISD::SSUBSAT: return "ssubsat";
|
||||
case ISD::USUBSAT: return "usubsat";
|
||||
case ISD::SMULFIX: return "smulfix";
|
||||
|
||||
// Conversion operators.
|
||||
case ISD::SIGN_EXTEND: return "sign_extend";
|
||||
|
|
|
@ -4089,8 +4089,17 @@ bool TargetLowering::expandMUL_LOHI(unsigned Opcode, EVT VT, SDLoc dl,
|
|||
if (!MakeMUL_LOHI(LH, RL, Lo, Hi, false))
|
||||
return false;
|
||||
|
||||
Next = DAG.getNode(ISD::ADDC, dl, DAG.getVTList(VT, MVT::Glue), Next,
|
||||
Merge(Lo, Hi));
|
||||
SDValue Zero = DAG.getConstant(0, dl, HiLoVT);
|
||||
EVT BoolType = getSetCCResultType(DAG.getDataLayout(), *DAG.getContext(), VT);
|
||||
|
||||
bool UseGlue = (isOperationLegalOrCustom(ISD::ADDC, VT) &&
|
||||
isOperationLegalOrCustom(ISD::ADDE, VT));
|
||||
if (UseGlue)
|
||||
Next = DAG.getNode(ISD::ADDC, dl, DAG.getVTList(VT, MVT::Glue), Next,
|
||||
Merge(Lo, Hi));
|
||||
else
|
||||
Next = DAG.getNode(ISD::ADDCARRY, dl, DAG.getVTList(VT, BoolType), Next,
|
||||
Merge(Lo, Hi), DAG.getConstant(0, dl, BoolType));
|
||||
|
||||
SDValue Carry = Next.getValue(1);
|
||||
Result.push_back(DAG.getNode(ISD::TRUNCATE, dl, HiLoVT, Next));
|
||||
|
@ -4099,9 +4108,13 @@ bool TargetLowering::expandMUL_LOHI(unsigned Opcode, EVT VT, SDLoc dl,
|
|||
if (!MakeMUL_LOHI(LH, RH, Lo, Hi, Opcode == ISD::SMUL_LOHI))
|
||||
return false;
|
||||
|
||||
SDValue Zero = DAG.getConstant(0, dl, HiLoVT);
|
||||
Hi = DAG.getNode(ISD::ADDE, dl, DAG.getVTList(HiLoVT, MVT::Glue), Hi, Zero,
|
||||
Carry);
|
||||
if (UseGlue)
|
||||
Hi = DAG.getNode(ISD::ADDE, dl, DAG.getVTList(HiLoVT, MVT::Glue), Hi, Zero,
|
||||
Carry);
|
||||
else
|
||||
Hi = DAG.getNode(ISD::ADDCARRY, dl, DAG.getVTList(HiLoVT, BoolType), Hi,
|
||||
Zero, Carry);
|
||||
|
||||
Next = DAG.getNode(ISD::ADD, dl, VT, Next, Merge(Lo, Hi));
|
||||
|
||||
if (Opcode == ISD::SMUL_LOHI) {
|
||||
|
@ -5198,3 +5211,55 @@ SDValue TargetLowering::getExpandedSaturationAdditionSubtraction(
|
|||
return DAG.getSelect(dl, ResultType, Overflow, Result, SumDiff);
|
||||
}
|
||||
}
|
||||
|
||||
SDValue
|
||||
TargetLowering::getExpandedFixedPointMultiplication(SDNode *Node,
|
||||
SelectionDAG &DAG) const {
|
||||
assert(Node->getOpcode() == ISD::SMULFIX && "Expected opcode to be SMULFIX.");
|
||||
assert(Node->getNumOperands() == 3 &&
|
||||
"Expected signed fixed point multiplication to have 3 operands.");
|
||||
|
||||
SDLoc dl(Node);
|
||||
SDValue LHS = Node->getOperand(0);
|
||||
SDValue RHS = Node->getOperand(1);
|
||||
assert(LHS.getValueType().isScalarInteger() &&
|
||||
"Expected operands to be integers. Vector of int arguments should "
|
||||
"already be unrolled.");
|
||||
assert(RHS.getValueType().isScalarInteger() &&
|
||||
"Expected operands to be integers. Vector of int arguments should "
|
||||
"already be unrolled.");
|
||||
assert(LHS.getValueType() == RHS.getValueType() &&
|
||||
"Expected both operands to be the same type");
|
||||
|
||||
unsigned Scale = Node->getConstantOperandVal(2);
|
||||
EVT VT = LHS.getValueType();
|
||||
assert(Scale < VT.getScalarSizeInBits() &&
|
||||
"Expected scale to be less than the number of bits.");
|
||||
|
||||
if (!Scale)
|
||||
return DAG.getNode(ISD::MUL, dl, VT, LHS, RHS);
|
||||
|
||||
// Get the upper and lower bits of the result.
|
||||
SDValue Lo, Hi;
|
||||
if (isOperationLegalOrCustom(ISD::SMUL_LOHI, VT)) {
|
||||
SDValue Result =
|
||||
DAG.getNode(ISD::SMUL_LOHI, dl, DAG.getVTList(VT, VT), LHS, RHS);
|
||||
Lo = Result.getValue(0);
|
||||
Hi = Result.getValue(1);
|
||||
} else if (isOperationLegalOrCustom(ISD::MULHS, VT)) {
|
||||
Lo = DAG.getNode(ISD::MUL, dl, VT, LHS, RHS);
|
||||
Hi = DAG.getNode(ISD::MULHS, dl, VT, LHS, RHS);
|
||||
} else {
|
||||
report_fatal_error("Unable to expand signed fixed point multiplication.");
|
||||
}
|
||||
|
||||
// The result will need to be shifted right by the scale since both operands
|
||||
// are scaled. The result is given to us in 2 halves, so we only want part of
|
||||
// both in the result.
|
||||
EVT ShiftTy = getShiftAmountTy(VT, DAG.getDataLayout());
|
||||
Lo = DAG.getNode(ISD::SRL, dl, VT, Lo, DAG.getConstant(Scale, dl, ShiftTy));
|
||||
Hi = DAG.getNode(
|
||||
ISD::SHL, dl, VT, Hi,
|
||||
DAG.getConstant(VT.getScalarSizeInBits() - Scale, dl, ShiftTy));
|
||||
return DAG.getNode(ISD::OR, dl, VT, Lo, Hi);
|
||||
}
|
||||
|
|
|
@ -616,6 +616,7 @@ void TargetLoweringBase::initActions() {
|
|||
setOperationAction(ISD::UADDSAT, VT, Expand);
|
||||
setOperationAction(ISD::SSUBSAT, VT, Expand);
|
||||
setOperationAction(ISD::USUBSAT, VT, Expand);
|
||||
setOperationAction(ISD::SMULFIX, VT, Expand);
|
||||
|
||||
// Overflow operations default to expand
|
||||
setOperationAction(ISD::SADDO, VT, Expand);
|
||||
|
|
|
@ -4541,6 +4541,24 @@ void Verifier::visitIntrinsicCallSite(Intrinsic::ID ID, CallSite CS) {
|
|||
"of ints");
|
||||
break;
|
||||
}
|
||||
case Intrinsic::smul_fix: {
|
||||
Value *Op1 = CS.getArgOperand(0);
|
||||
Value *Op2 = CS.getArgOperand(1);
|
||||
Assert(Op1->getType()->isIntOrIntVectorTy(),
|
||||
"first operand of smul_fix must be an int type or vector "
|
||||
"of ints");
|
||||
Assert(Op2->getType()->isIntOrIntVectorTy(),
|
||||
"second operand of smul_fix must be an int type or vector "
|
||||
"of ints");
|
||||
|
||||
auto *Op3 = dyn_cast<ConstantInt>(CS.getArgOperand(2));
|
||||
Assert(Op3, "third argument of smul_fix must be a constant integer");
|
||||
Assert(Op3->getType()->getBitWidth() <= 32,
|
||||
"third argument of smul_fix must fit within 32 bits");
|
||||
Assert(Op3->getZExtValue() < Op1->getType()->getScalarSizeInBits(),
|
||||
"the scale of smul_fix must be less than the width of the operands");
|
||||
break;
|
||||
}
|
||||
};
|
||||
}
|
||||
|
||||
|
|
|
@ -0,0 +1,458 @@
|
|||
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
|
||||
; RUN: llc < %s -mtriple=x86_64-linux | FileCheck %s --check-prefix=X64
|
||||
; RUN: llc < %s -mtriple=i686 -mattr=cmov | FileCheck %s --check-prefix=X86
|
||||
|
||||
declare i4 @llvm.smul.fix.i4 (i4, i4, i32)
|
||||
declare i32 @llvm.smul.fix.i32 (i32, i32, i32)
|
||||
declare i64 @llvm.smul.fix.i64 (i64, i64, i32)
|
||||
declare <4 x i32> @llvm.smul.fix.v4i32(<4 x i32>, <4 x i32>, i32)
|
||||
|
||||
define i32 @func(i32 %x, i32 %y) nounwind {
|
||||
; X64-LABEL: func:
|
||||
; X64: # %bb.0:
|
||||
; X64-NEXT: movslq %esi, %rax
|
||||
; X64-NEXT: movslq %edi, %rcx
|
||||
; X64-NEXT: imulq %rax, %rcx
|
||||
; X64-NEXT: movq %rcx, %rax
|
||||
; X64-NEXT: shrq $32, %rax
|
||||
; X64-NEXT: shldl $30, %ecx, %eax
|
||||
; X64-NEXT: # kill: def $eax killed $eax killed $rax
|
||||
; X64-NEXT: retq
|
||||
;
|
||||
; X86-LABEL: func:
|
||||
; X86: # %bb.0:
|
||||
; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
|
||||
; X86-NEXT: imull {{[0-9]+}}(%esp)
|
||||
; X86-NEXT: shrdl $2, %edx, %eax
|
||||
; X86-NEXT: retl
|
||||
%tmp = call i32 @llvm.smul.fix.i32(i32 %x, i32 %y, i32 2);
|
||||
ret i32 %tmp;
|
||||
}
|
||||
|
||||
define i64 @func2(i64 %x, i64 %y) {
|
||||
; X64-LABEL: func2:
|
||||
; X64: # %bb.0:
|
||||
; X64-NEXT: movq %rdi, %rax
|
||||
; X64-NEXT: imulq %rsi
|
||||
; X64-NEXT: shrdq $2, %rdx, %rax
|
||||
; X64-NEXT: retq
|
||||
;
|
||||
; X86-LABEL: func2:
|
||||
; X86: # %bb.0:
|
||||
; X86-NEXT: pushl %ebp
|
||||
; X86-NEXT: .cfi_def_cfa_offset 8
|
||||
; X86-NEXT: pushl %ebx
|
||||
; X86-NEXT: .cfi_def_cfa_offset 12
|
||||
; X86-NEXT: pushl %edi
|
||||
; X86-NEXT: .cfi_def_cfa_offset 16
|
||||
; X86-NEXT: pushl %esi
|
||||
; X86-NEXT: .cfi_def_cfa_offset 20
|
||||
; X86-NEXT: .cfi_offset %esi, -20
|
||||
; X86-NEXT: .cfi_offset %edi, -16
|
||||
; X86-NEXT: .cfi_offset %ebx, -12
|
||||
; X86-NEXT: .cfi_offset %ebp, -8
|
||||
; X86-NEXT: movl {{[0-9]+}}(%esp), %ebx
|
||||
; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx
|
||||
; X86-NEXT: movl %ebx, %eax
|
||||
; X86-NEXT: mull {{[0-9]+}}(%esp)
|
||||
; X86-NEXT: movl %edx, %esi
|
||||
; X86-NEXT: movl %eax, %edi
|
||||
; X86-NEXT: movl %ebx, %eax
|
||||
; X86-NEXT: mull %ecx
|
||||
; X86-NEXT: movl %eax, %ebx
|
||||
; X86-NEXT: movl %edx, %ebp
|
||||
; X86-NEXT: addl %edi, %ebp
|
||||
; X86-NEXT: movl {{[0-9]+}}(%esp), %edi
|
||||
; X86-NEXT: adcl $0, %esi
|
||||
; X86-NEXT: movl %edi, %eax
|
||||
; X86-NEXT: mull %ecx
|
||||
; X86-NEXT: addl %ebp, %eax
|
||||
; X86-NEXT: adcl %esi, %edx
|
||||
; X86-NEXT: movl %edi, %esi
|
||||
; X86-NEXT: imull {{[0-9]+}}(%esp), %esi
|
||||
; X86-NEXT: addl %edx, %esi
|
||||
; X86-NEXT: movl %esi, %ebp
|
||||
; X86-NEXT: subl %ecx, %ebp
|
||||
; X86-NEXT: testl %edi, %edi
|
||||
; X86-NEXT: cmovnsl %esi, %ebp
|
||||
; X86-NEXT: movl %ebp, %edx
|
||||
; X86-NEXT: subl {{[0-9]+}}(%esp), %edx
|
||||
; X86-NEXT: cmpl $0, {{[0-9]+}}(%esp)
|
||||
; X86-NEXT: cmovnsl %ebp, %edx
|
||||
; X86-NEXT: shldl $30, %eax, %edx
|
||||
; X86-NEXT: shldl $30, %ebx, %eax
|
||||
; X86-NEXT: popl %esi
|
||||
; X86-NEXT: .cfi_def_cfa_offset 16
|
||||
; X86-NEXT: popl %edi
|
||||
; X86-NEXT: .cfi_def_cfa_offset 12
|
||||
; X86-NEXT: popl %ebx
|
||||
; X86-NEXT: .cfi_def_cfa_offset 8
|
||||
; X86-NEXT: popl %ebp
|
||||
; X86-NEXT: .cfi_def_cfa_offset 4
|
||||
; X86-NEXT: retl
|
||||
%tmp = call i64 @llvm.smul.fix.i64(i64 %x, i64 %y, i32 2);
|
||||
ret i64 %tmp;
|
||||
}
|
||||
|
||||
define i4 @func3(i4 %x, i4 %y) nounwind {
|
||||
; X64-LABEL: func3:
|
||||
; X64: # %bb.0:
|
||||
; X64-NEXT: shlb $4, %dil
|
||||
; X64-NEXT: sarb $4, %dil
|
||||
; X64-NEXT: shlb $4, %sil
|
||||
; X64-NEXT: sarb $4, %sil
|
||||
; X64-NEXT: movsbl %sil, %ecx
|
||||
; X64-NEXT: movsbl %dil, %eax
|
||||
; X64-NEXT: imull %ecx, %eax
|
||||
; X64-NEXT: movl %eax, %ecx
|
||||
; X64-NEXT: shrb $2, %cl
|
||||
; X64-NEXT: shrl $8, %eax
|
||||
; X64-NEXT: shlb $6, %al
|
||||
; X64-NEXT: orb %cl, %al
|
||||
; X64-NEXT: # kill: def $al killed $al killed $eax
|
||||
; X64-NEXT: retq
|
||||
;
|
||||
; X86-LABEL: func3:
|
||||
; X86: # %bb.0:
|
||||
; X86-NEXT: movb {{[0-9]+}}(%esp), %al
|
||||
; X86-NEXT: shlb $4, %al
|
||||
; X86-NEXT: sarb $4, %al
|
||||
; X86-NEXT: movb {{[0-9]+}}(%esp), %cl
|
||||
; X86-NEXT: shlb $4, %cl
|
||||
; X86-NEXT: sarb $4, %cl
|
||||
; X86-NEXT: movsbl %cl, %ecx
|
||||
; X86-NEXT: movsbl %al, %eax
|
||||
; X86-NEXT: imull %ecx, %eax
|
||||
; X86-NEXT: shlb $6, %ah
|
||||
; X86-NEXT: shrb $2, %al
|
||||
; X86-NEXT: orb %ah, %al
|
||||
; X86-NEXT: # kill: def $al killed $al killed $eax
|
||||
; X86-NEXT: retl
|
||||
%tmp = call i4 @llvm.smul.fix.i4(i4 %x, i4 %y, i32 2);
|
||||
ret i4 %tmp;
|
||||
}
|
||||
|
||||
define <4 x i32> @vec(<4 x i32> %x, <4 x i32> %y) nounwind {
|
||||
; X64-LABEL: vec:
|
||||
; X64: # %bb.0:
|
||||
; X64-NEXT: pshufd {{.*#+}} xmm2 = xmm1[3,1,2,3]
|
||||
; X64-NEXT: movd %xmm2, %eax
|
||||
; X64-NEXT: cltq
|
||||
; X64-NEXT: pshufd {{.*#+}} xmm2 = xmm0[3,1,2,3]
|
||||
; X64-NEXT: movd %xmm2, %ecx
|
||||
; X64-NEXT: movslq %ecx, %rcx
|
||||
; X64-NEXT: imulq %rax, %rcx
|
||||
; X64-NEXT: movq %rcx, %rax
|
||||
; X64-NEXT: shrq $32, %rax
|
||||
; X64-NEXT: shldl $30, %ecx, %eax
|
||||
; X64-NEXT: movd %eax, %xmm2
|
||||
; X64-NEXT: pshufd {{.*#+}} xmm3 = xmm1[2,3,0,1]
|
||||
; X64-NEXT: movd %xmm3, %eax
|
||||
; X64-NEXT: cltq
|
||||
; X64-NEXT: pshufd {{.*#+}} xmm3 = xmm0[2,3,0,1]
|
||||
; X64-NEXT: movd %xmm3, %ecx
|
||||
; X64-NEXT: movslq %ecx, %rcx
|
||||
; X64-NEXT: imulq %rax, %rcx
|
||||
; X64-NEXT: movq %rcx, %rax
|
||||
; X64-NEXT: shrq $32, %rax
|
||||
; X64-NEXT: shldl $30, %ecx, %eax
|
||||
; X64-NEXT: movd %eax, %xmm3
|
||||
; X64-NEXT: punpckldq {{.*#+}} xmm3 = xmm3[0],xmm2[0],xmm3[1],xmm2[1]
|
||||
; X64-NEXT: movd %xmm1, %eax
|
||||
; X64-NEXT: cltq
|
||||
; X64-NEXT: movd %xmm0, %ecx
|
||||
; X64-NEXT: movslq %ecx, %rcx
|
||||
; X64-NEXT: imulq %rax, %rcx
|
||||
; X64-NEXT: movq %rcx, %rax
|
||||
; X64-NEXT: shrq $32, %rax
|
||||
; X64-NEXT: shldl $30, %ecx, %eax
|
||||
; X64-NEXT: movd %eax, %xmm2
|
||||
; X64-NEXT: pshufd {{.*#+}} xmm1 = xmm1[1,1,2,3]
|
||||
; X64-NEXT: movd %xmm1, %eax
|
||||
; X64-NEXT: cltq
|
||||
; X64-NEXT: pshufd {{.*#+}} xmm0 = xmm0[1,1,2,3]
|
||||
; X64-NEXT: movd %xmm0, %ecx
|
||||
; X64-NEXT: movslq %ecx, %rcx
|
||||
; X64-NEXT: imulq %rax, %rcx
|
||||
; X64-NEXT: movq %rcx, %rax
|
||||
; X64-NEXT: shrq $32, %rax
|
||||
; X64-NEXT: shldl $30, %ecx, %eax
|
||||
; X64-NEXT: movd %eax, %xmm0
|
||||
; X64-NEXT: punpckldq {{.*#+}} xmm2 = xmm2[0],xmm0[0],xmm2[1],xmm0[1]
|
||||
; X64-NEXT: punpcklqdq {{.*#+}} xmm2 = xmm2[0],xmm3[0]
|
||||
; X64-NEXT: movdqa %xmm2, %xmm0
|
||||
; X64-NEXT: retq
|
||||
;
|
||||
; X86-LABEL: vec:
|
||||
; X86: # %bb.0:
|
||||
; X86-NEXT: pushl %ebp
|
||||
; X86-NEXT: pushl %ebx
|
||||
; X86-NEXT: pushl %edi
|
||||
; X86-NEXT: pushl %esi
|
||||
; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx
|
||||
; X86-NEXT: movl {{[0-9]+}}(%esp), %esi
|
||||
; X86-NEXT: movl {{[0-9]+}}(%esp), %edi
|
||||
; X86-NEXT: movl {{[0-9]+}}(%esp), %ebx
|
||||
; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
|
||||
; X86-NEXT: imull {{[0-9]+}}(%esp)
|
||||
; X86-NEXT: movl %edx, %ebp
|
||||
; X86-NEXT: shldl $30, %eax, %ebp
|
||||
; X86-NEXT: movl %ebx, %eax
|
||||
; X86-NEXT: imull {{[0-9]+}}(%esp)
|
||||
; X86-NEXT: movl %edx, %ebx
|
||||
; X86-NEXT: shldl $30, %eax, %ebx
|
||||
; X86-NEXT: movl %edi, %eax
|
||||
; X86-NEXT: imull {{[0-9]+}}(%esp)
|
||||
; X86-NEXT: movl %edx, %edi
|
||||
; X86-NEXT: shldl $30, %eax, %edi
|
||||
; X86-NEXT: movl %esi, %eax
|
||||
; X86-NEXT: imull {{[0-9]+}}(%esp)
|
||||
; X86-NEXT: shldl $30, %eax, %edx
|
||||
; X86-NEXT: movl %edx, 12(%ecx)
|
||||
; X86-NEXT: movl %edi, 8(%ecx)
|
||||
; X86-NEXT: movl %ebx, 4(%ecx)
|
||||
; X86-NEXT: movl %ebp, (%ecx)
|
||||
; X86-NEXT: movl %ecx, %eax
|
||||
; X86-NEXT: popl %esi
|
||||
; X86-NEXT: popl %edi
|
||||
; X86-NEXT: popl %ebx
|
||||
; X86-NEXT: popl %ebp
|
||||
; X86-NEXT: retl $4
|
||||
%tmp = call <4 x i32> @llvm.smul.fix.v4i32(<4 x i32> %x, <4 x i32> %y, i32 2);
|
||||
ret <4 x i32> %tmp;
|
||||
}
|
||||
|
||||
; These result in regular integer multiplication
|
||||
define i32 @func4(i32 %x, i32 %y) nounwind {
|
||||
; X64-LABEL: func4:
|
||||
; X64: # %bb.0:
|
||||
; X64-NEXT: movl %edi, %eax
|
||||
; X64-NEXT: imull %esi, %eax
|
||||
; X64-NEXT: retq
|
||||
;
|
||||
; X86-LABEL: func4:
|
||||
; X86: # %bb.0:
|
||||
; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
|
||||
; X86-NEXT: imull {{[0-9]+}}(%esp), %eax
|
||||
; X86-NEXT: retl
|
||||
%tmp = call i32 @llvm.smul.fix.i32(i32 %x, i32 %y, i32 0);
|
||||
ret i32 %tmp;
|
||||
}
|
||||
|
||||
define i64 @func5(i64 %x, i64 %y) {
|
||||
; X64-LABEL: func5:
|
||||
; X64: # %bb.0:
|
||||
; X64-NEXT: movq %rdi, %rax
|
||||
; X64-NEXT: imulq %rsi, %rax
|
||||
; X64-NEXT: retq
|
||||
;
|
||||
; X86-LABEL: func5:
|
||||
; X86: # %bb.0:
|
||||
; X86-NEXT: pushl %esi
|
||||
; X86-NEXT: .cfi_def_cfa_offset 8
|
||||
; X86-NEXT: .cfi_offset %esi, -8
|
||||
; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx
|
||||
; X86-NEXT: movl {{[0-9]+}}(%esp), %esi
|
||||
; X86-NEXT: movl %ecx, %eax
|
||||
; X86-NEXT: mull %esi
|
||||
; X86-NEXT: imull {{[0-9]+}}(%esp), %ecx
|
||||
; X86-NEXT: addl %ecx, %edx
|
||||
; X86-NEXT: imull {{[0-9]+}}(%esp), %esi
|
||||
; X86-NEXT: addl %esi, %edx
|
||||
; X86-NEXT: popl %esi
|
||||
; X86-NEXT: .cfi_def_cfa_offset 4
|
||||
; X86-NEXT: retl
|
||||
%tmp = call i64 @llvm.smul.fix.i64(i64 %x, i64 %y, i32 0);
|
||||
ret i64 %tmp;
|
||||
}
|
||||
|
||||
define i4 @func6(i4 %x, i4 %y) nounwind {
|
||||
; X64-LABEL: func6:
|
||||
; X64: # %bb.0:
|
||||
; X64-NEXT: movl %edi, %eax
|
||||
; X64-NEXT: shlb $4, %al
|
||||
; X64-NEXT: sarb $4, %al
|
||||
; X64-NEXT: shlb $4, %sil
|
||||
; X64-NEXT: sarb $4, %sil
|
||||
; X64-NEXT: # kill: def $al killed $al killed $eax
|
||||
; X64-NEXT: mulb %sil
|
||||
; X64-NEXT: retq
|
||||
;
|
||||
; X86-LABEL: func6:
|
||||
; X86: # %bb.0:
|
||||
; X86-NEXT: movb {{[0-9]+}}(%esp), %al
|
||||
; X86-NEXT: shlb $4, %al
|
||||
; X86-NEXT: sarb $4, %al
|
||||
; X86-NEXT: movb {{[0-9]+}}(%esp), %cl
|
||||
; X86-NEXT: shlb $4, %cl
|
||||
; X86-NEXT: sarb $4, %cl
|
||||
; X86-NEXT: mulb %cl
|
||||
; X86-NEXT: retl
|
||||
%tmp = call i4 @llvm.smul.fix.i4(i4 %x, i4 %y, i32 0);
|
||||
ret i4 %tmp;
|
||||
}
|
||||
|
||||
define <4 x i32> @vec2(<4 x i32> %x, <4 x i32> %y) nounwind {
|
||||
; X64-LABEL: vec2:
|
||||
; X64: # %bb.0:
|
||||
; X64-NEXT: pshufd {{.*#+}} xmm2 = xmm1[3,1,2,3]
|
||||
; X64-NEXT: movd %xmm2, %eax
|
||||
; X64-NEXT: pshufd {{.*#+}} xmm2 = xmm0[3,1,2,3]
|
||||
; X64-NEXT: movd %xmm2, %ecx
|
||||
; X64-NEXT: imull %eax, %ecx
|
||||
; X64-NEXT: movd %ecx, %xmm2
|
||||
; X64-NEXT: pshufd {{.*#+}} xmm3 = xmm1[2,3,0,1]
|
||||
; X64-NEXT: movd %xmm3, %eax
|
||||
; X64-NEXT: pshufd {{.*#+}} xmm3 = xmm0[2,3,0,1]
|
||||
; X64-NEXT: movd %xmm3, %ecx
|
||||
; X64-NEXT: imull %eax, %ecx
|
||||
; X64-NEXT: movd %ecx, %xmm3
|
||||
; X64-NEXT: punpckldq {{.*#+}} xmm3 = xmm3[0],xmm2[0],xmm3[1],xmm2[1]
|
||||
; X64-NEXT: movd %xmm1, %eax
|
||||
; X64-NEXT: movd %xmm0, %ecx
|
||||
; X64-NEXT: imull %eax, %ecx
|
||||
; X64-NEXT: movd %ecx, %xmm2
|
||||
; X64-NEXT: pshufd {{.*#+}} xmm1 = xmm1[1,1,2,3]
|
||||
; X64-NEXT: movd %xmm1, %eax
|
||||
; X64-NEXT: pshufd {{.*#+}} xmm0 = xmm0[1,1,2,3]
|
||||
; X64-NEXT: movd %xmm0, %ecx
|
||||
; X64-NEXT: imull %eax, %ecx
|
||||
; X64-NEXT: movd %ecx, %xmm0
|
||||
; X64-NEXT: punpckldq {{.*#+}} xmm2 = xmm2[0],xmm0[0],xmm2[1],xmm0[1]
|
||||
; X64-NEXT: punpcklqdq {{.*#+}} xmm2 = xmm2[0],xmm3[0]
|
||||
; X64-NEXT: movdqa %xmm2, %xmm0
|
||||
; X64-NEXT: retq
|
||||
;
|
||||
; X86-LABEL: vec2:
|
||||
; X86: # %bb.0:
|
||||
; X86-NEXT: pushl %edi
|
||||
; X86-NEXT: pushl %esi
|
||||
; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
|
||||
; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx
|
||||
; X86-NEXT: movl {{[0-9]+}}(%esp), %edx
|
||||
; X86-NEXT: movl {{[0-9]+}}(%esp), %esi
|
||||
; X86-NEXT: movl {{[0-9]+}}(%esp), %edi
|
||||
; X86-NEXT: imull {{[0-9]+}}(%esp), %edi
|
||||
; X86-NEXT: imull {{[0-9]+}}(%esp), %esi
|
||||
; X86-NEXT: imull {{[0-9]+}}(%esp), %edx
|
||||
; X86-NEXT: imull {{[0-9]+}}(%esp), %ecx
|
||||
; X86-NEXT: movl %ecx, 12(%eax)
|
||||
; X86-NEXT: movl %edx, 8(%eax)
|
||||
; X86-NEXT: movl %esi, 4(%eax)
|
||||
; X86-NEXT: movl %edi, (%eax)
|
||||
; X86-NEXT: popl %esi
|
||||
; X86-NEXT: popl %edi
|
||||
; X86-NEXT: retl $4
|
||||
%tmp = call <4 x i32> @llvm.smul.fix.v4i32(<4 x i32> %x, <4 x i32> %y, i32 0);
|
||||
ret <4 x i32> %tmp;
|
||||
}
|
||||
|
||||
define i64 @func7(i64 %x, i64 %y) nounwind {
|
||||
; X64-LABEL: func7:
|
||||
; X64: # %bb.0:
|
||||
; X64-NEXT: movq %rdi, %rax
|
||||
; X64-NEXT: imulq %rsi
|
||||
; X64-NEXT: shrdq $32, %rdx, %rax
|
||||
; X64-NEXT: retq
|
||||
;
|
||||
; X86-LABEL: func7:
|
||||
; X86: # %bb.0:
|
||||
; X86-NEXT: pushl %ebp
|
||||
; X86-NEXT: pushl %ebx
|
||||
; X86-NEXT: pushl %edi
|
||||
; X86-NEXT: pushl %esi
|
||||
; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx
|
||||
; X86-NEXT: movl {{[0-9]+}}(%esp), %esi
|
||||
; X86-NEXT: movl {{[0-9]+}}(%esp), %ebp
|
||||
; X86-NEXT: movl %ecx, %eax
|
||||
; X86-NEXT: mull {{[0-9]+}}(%esp)
|
||||
; X86-NEXT: movl %edx, %edi
|
||||
; X86-NEXT: movl %eax, %ebx
|
||||
; X86-NEXT: movl %ecx, %eax
|
||||
; X86-NEXT: mull %ebp
|
||||
; X86-NEXT: addl %edx, %ebx
|
||||
; X86-NEXT: adcl $0, %edi
|
||||
; X86-NEXT: movl %esi, %eax
|
||||
; X86-NEXT: mull %ebp
|
||||
; X86-NEXT: addl %ebx, %eax
|
||||
; X86-NEXT: adcl %edi, %edx
|
||||
; X86-NEXT: movl %esi, %edi
|
||||
; X86-NEXT: imull {{[0-9]+}}(%esp), %edi
|
||||
; X86-NEXT: addl %edx, %edi
|
||||
; X86-NEXT: movl %edi, %ebx
|
||||
; X86-NEXT: subl %ebp, %ebx
|
||||
; X86-NEXT: testl %esi, %esi
|
||||
; X86-NEXT: cmovnsl %edi, %ebx
|
||||
; X86-NEXT: movl %ebx, %edx
|
||||
; X86-NEXT: subl %ecx, %edx
|
||||
; X86-NEXT: cmpl $0, {{[0-9]+}}(%esp)
|
||||
; X86-NEXT: cmovnsl %ebx, %edx
|
||||
; X86-NEXT: popl %esi
|
||||
; X86-NEXT: popl %edi
|
||||
; X86-NEXT: popl %ebx
|
||||
; X86-NEXT: popl %ebp
|
||||
; X86-NEXT: retl
|
||||
%tmp = call i64 @llvm.smul.fix.i64(i64 %x, i64 %y, i32 32);
|
||||
ret i64 %tmp;
|
||||
}
|
||||
|
||||
define i64 @func8(i64 %x, i64 %y) nounwind {
|
||||
; X64-LABEL: func8:
|
||||
; X64: # %bb.0:
|
||||
; X64-NEXT: movq %rdi, %rax
|
||||
; X64-NEXT: imulq %rsi
|
||||
; X64-NEXT: shrdq $63, %rdx, %rax
|
||||
; X64-NEXT: retq
|
||||
;
|
||||
; X86-LABEL: func8:
|
||||
; X86: # %bb.0:
|
||||
; X86-NEXT: pushl %ebp
|
||||
; X86-NEXT: pushl %ebx
|
||||
; X86-NEXT: pushl %edi
|
||||
; X86-NEXT: pushl %esi
|
||||
; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx
|
||||
; X86-NEXT: movl {{[0-9]+}}(%esp), %esi
|
||||
; X86-NEXT: movl %ecx, %eax
|
||||
; X86-NEXT: mull {{[0-9]+}}(%esp)
|
||||
; X86-NEXT: movl %edx, %edi
|
||||
; X86-NEXT: movl %eax, %ebx
|
||||
; X86-NEXT: movl %ecx, %eax
|
||||
; X86-NEXT: mull {{[0-9]+}}(%esp)
|
||||
; X86-NEXT: movl %edx, %ebp
|
||||
; X86-NEXT: addl %ebx, %ebp
|
||||
; X86-NEXT: adcl $0, %edi
|
||||
; X86-NEXT: movl %esi, %eax
|
||||
; X86-NEXT: imull {{[0-9]+}}(%esp)
|
||||
; X86-NEXT: movl %edx, %ebx
|
||||
; X86-NEXT: movl %eax, %ecx
|
||||
; X86-NEXT: movl %esi, %eax
|
||||
; X86-NEXT: mull {{[0-9]+}}(%esp)
|
||||
; X86-NEXT: addl %ebp, %eax
|
||||
; X86-NEXT: adcl %edi, %edx
|
||||
; X86-NEXT: adcl $0, %ebx
|
||||
; X86-NEXT: addl %ecx, %edx
|
||||
; X86-NEXT: adcl $0, %ebx
|
||||
; X86-NEXT: movl %edx, %ecx
|
||||
; X86-NEXT: subl {{[0-9]+}}(%esp), %ecx
|
||||
; X86-NEXT: movl %ebx, %esi
|
||||
; X86-NEXT: sbbl $0, %esi
|
||||
; X86-NEXT: cmpl $0, {{[0-9]+}}(%esp)
|
||||
; X86-NEXT: cmovnsl %ebx, %esi
|
||||
; X86-NEXT: cmovnsl %edx, %ecx
|
||||
; X86-NEXT: movl %ecx, %edi
|
||||
; X86-NEXT: subl {{[0-9]+}}(%esp), %edi
|
||||
; X86-NEXT: movl %esi, %edx
|
||||
; X86-NEXT: sbbl $0, %edx
|
||||
; X86-NEXT: cmpl $0, {{[0-9]+}}(%esp)
|
||||
; X86-NEXT: cmovnsl %esi, %edx
|
||||
; X86-NEXT: cmovnsl %ecx, %edi
|
||||
; X86-NEXT: shldl $1, %edi, %edx
|
||||
; X86-NEXT: shrdl $31, %edi, %eax
|
||||
; X86-NEXT: popl %esi
|
||||
; X86-NEXT: popl %edi
|
||||
; X86-NEXT: popl %ebx
|
||||
; X86-NEXT: popl %ebp
|
||||
; X86-NEXT: retl
|
||||
%tmp = call i64 @llvm.smul.fix.i64(i64 %x, i64 %y, i32 63);
|
||||
ret i64 %tmp;
|
||||
}
|
Loading…
Reference in New Issue