forked from OSchip/llvm-project
[SelectionDAG] Better legalization for FSHL and FSHR
In SelectionDAGBuilder always translate the fshl and fshr intrinsics to FSHL and FSHR (or ROTL and ROTR) instead of lowering them to shifts and ORs. Improve the legalization of FSHL and FSHR to avoid code quality regressions. Differential Revision: https://reviews.llvm.org/D77152
This commit is contained in:
parent
c6863a4ab8
commit
0819a6416f
|
@ -207,6 +207,16 @@ void DAGTypeLegalizer::PromoteIntegerResult(SDNode *N, unsigned ResNo) {
|
||||||
case ISD::FREEZE:
|
case ISD::FREEZE:
|
||||||
Res = PromoteIntRes_FREEZE(N);
|
Res = PromoteIntRes_FREEZE(N);
|
||||||
break;
|
break;
|
||||||
|
|
||||||
|
case ISD::ROTL:
|
||||||
|
case ISD::ROTR:
|
||||||
|
Res = PromoteIntRes_Rotate(N);
|
||||||
|
break;
|
||||||
|
|
||||||
|
case ISD::FSHL:
|
||||||
|
case ISD::FSHR:
|
||||||
|
Res = PromoteIntRes_FunnelShift(N);
|
||||||
|
break;
|
||||||
}
|
}
|
||||||
|
|
||||||
// If the result is null then the sub-method took care of registering it.
|
// If the result is null then the sub-method took care of registering it.
|
||||||
|
@ -1105,6 +1115,43 @@ SDValue DAGTypeLegalizer::PromoteIntRes_SRL(SDNode *N) {
|
||||||
return DAG.getNode(ISD::SRL, SDLoc(N), LHS.getValueType(), LHS, RHS);
|
return DAG.getNode(ISD::SRL, SDLoc(N), LHS.getValueType(), LHS, RHS);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
SDValue DAGTypeLegalizer::PromoteIntRes_Rotate(SDNode *N) {
|
||||||
|
// Lower the rotate to shifts and ORs which can be promoted.
|
||||||
|
SDValue Res;
|
||||||
|
TLI.expandROT(N, Res, DAG);
|
||||||
|
ReplaceValueWith(SDValue(N, 0), Res);
|
||||||
|
return SDValue();
|
||||||
|
}
|
||||||
|
|
||||||
|
SDValue DAGTypeLegalizer::PromoteIntRes_FunnelShift(SDNode *N) {
|
||||||
|
SDValue Hi = GetPromotedInteger(N->getOperand(0));
|
||||||
|
SDValue Lo = GetPromotedInteger(N->getOperand(1));
|
||||||
|
SDValue Amount = GetPromotedInteger(N->getOperand(2));
|
||||||
|
|
||||||
|
unsigned OldBits = N->getOperand(0).getScalarValueSizeInBits();
|
||||||
|
unsigned NewBits = Hi.getScalarValueSizeInBits();
|
||||||
|
|
||||||
|
// Shift Lo up to occupy the upper bits of the promoted type.
|
||||||
|
SDLoc DL(N);
|
||||||
|
EVT VT = Lo.getValueType();
|
||||||
|
Lo = DAG.getNode(ISD::SHL, DL, VT, Lo,
|
||||||
|
DAG.getConstant(NewBits - OldBits, DL, VT));
|
||||||
|
|
||||||
|
// Amount has to be interpreted modulo the old bit width.
|
||||||
|
Amount =
|
||||||
|
DAG.getNode(ISD::UREM, DL, VT, Amount, DAG.getConstant(OldBits, DL, VT));
|
||||||
|
|
||||||
|
unsigned Opcode = N->getOpcode();
|
||||||
|
if (Opcode == ISD::FSHR) {
|
||||||
|
// Increase Amount to shift the result into the lower bits of the promoted
|
||||||
|
// type.
|
||||||
|
Amount = DAG.getNode(ISD::ADD, DL, VT, Amount,
|
||||||
|
DAG.getConstant(NewBits - OldBits, DL, VT));
|
||||||
|
}
|
||||||
|
|
||||||
|
return DAG.getNode(Opcode, DL, VT, Hi, Lo, Amount);
|
||||||
|
}
|
||||||
|
|
||||||
SDValue DAGTypeLegalizer::PromoteIntRes_TRUNCATE(SDNode *N) {
|
SDValue DAGTypeLegalizer::PromoteIntRes_TRUNCATE(SDNode *N) {
|
||||||
EVT NVT = TLI.getTypeToTransformTo(*DAG.getContext(), N->getValueType(0));
|
EVT NVT = TLI.getTypeToTransformTo(*DAG.getContext(), N->getValueType(0));
|
||||||
SDValue Res;
|
SDValue Res;
|
||||||
|
@ -2059,6 +2106,16 @@ void DAGTypeLegalizer::ExpandIntegerResult(SDNode *N, unsigned ResNo) {
|
||||||
case ISD::VECREDUCE_SMIN:
|
case ISD::VECREDUCE_SMIN:
|
||||||
case ISD::VECREDUCE_UMAX:
|
case ISD::VECREDUCE_UMAX:
|
||||||
case ISD::VECREDUCE_UMIN: ExpandIntRes_VECREDUCE(N, Lo, Hi); break;
|
case ISD::VECREDUCE_UMIN: ExpandIntRes_VECREDUCE(N, Lo, Hi); break;
|
||||||
|
|
||||||
|
case ISD::ROTL:
|
||||||
|
case ISD::ROTR:
|
||||||
|
ExpandIntRes_Rotate(N, Lo, Hi);
|
||||||
|
break;
|
||||||
|
|
||||||
|
case ISD::FSHL:
|
||||||
|
case ISD::FSHR:
|
||||||
|
ExpandIntRes_FunnelShift(N, Lo, Hi);
|
||||||
|
break;
|
||||||
}
|
}
|
||||||
|
|
||||||
// If Lo/Hi is null, the sub-method took care of registering results etc.
|
// If Lo/Hi is null, the sub-method took care of registering results etc.
|
||||||
|
@ -3895,6 +3952,22 @@ void DAGTypeLegalizer::ExpandIntRes_VECREDUCE(SDNode *N,
|
||||||
SplitInteger(Res, Lo, Hi);
|
SplitInteger(Res, Lo, Hi);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
void DAGTypeLegalizer::ExpandIntRes_Rotate(SDNode *N,
|
||||||
|
SDValue &Lo, SDValue &Hi) {
|
||||||
|
// Lower the rotate to shifts and ORs which can be expanded.
|
||||||
|
SDValue Res;
|
||||||
|
TLI.expandROT(N, Res, DAG);
|
||||||
|
SplitInteger(Res, Lo, Hi);
|
||||||
|
}
|
||||||
|
|
||||||
|
void DAGTypeLegalizer::ExpandIntRes_FunnelShift(SDNode *N,
|
||||||
|
SDValue &Lo, SDValue &Hi) {
|
||||||
|
// Lower the funnel shift to shifts and ORs which can be expanded.
|
||||||
|
SDValue Res;
|
||||||
|
TLI.expandFunnelShift(N, Res, DAG);
|
||||||
|
SplitInteger(Res, Lo, Hi);
|
||||||
|
}
|
||||||
|
|
||||||
//===----------------------------------------------------------------------===//
|
//===----------------------------------------------------------------------===//
|
||||||
// Integer Operand Expansion
|
// Integer Operand Expansion
|
||||||
//===----------------------------------------------------------------------===//
|
//===----------------------------------------------------------------------===//
|
||||||
|
|
|
@ -347,6 +347,8 @@ private:
|
||||||
SDValue PromoteIntRes_FLT_ROUNDS(SDNode *N);
|
SDValue PromoteIntRes_FLT_ROUNDS(SDNode *N);
|
||||||
SDValue PromoteIntRes_VECREDUCE(SDNode *N);
|
SDValue PromoteIntRes_VECREDUCE(SDNode *N);
|
||||||
SDValue PromoteIntRes_ABS(SDNode *N);
|
SDValue PromoteIntRes_ABS(SDNode *N);
|
||||||
|
SDValue PromoteIntRes_Rotate(SDNode *N);
|
||||||
|
SDValue PromoteIntRes_FunnelShift(SDNode *N);
|
||||||
|
|
||||||
// Integer Operand Promotion.
|
// Integer Operand Promotion.
|
||||||
bool PromoteIntegerOperand(SDNode *N, unsigned OpNo);
|
bool PromoteIntegerOperand(SDNode *N, unsigned OpNo);
|
||||||
|
@ -449,6 +451,9 @@ private:
|
||||||
void ExpandIntRes_ATOMIC_LOAD (SDNode *N, SDValue &Lo, SDValue &Hi);
|
void ExpandIntRes_ATOMIC_LOAD (SDNode *N, SDValue &Lo, SDValue &Hi);
|
||||||
void ExpandIntRes_VECREDUCE (SDNode *N, SDValue &Lo, SDValue &Hi);
|
void ExpandIntRes_VECREDUCE (SDNode *N, SDValue &Lo, SDValue &Hi);
|
||||||
|
|
||||||
|
void ExpandIntRes_Rotate (SDNode *N, SDValue &Lo, SDValue &Hi);
|
||||||
|
void ExpandIntRes_FunnelShift (SDNode *N, SDValue &Lo, SDValue &Hi);
|
||||||
|
|
||||||
void ExpandShiftByConstant(SDNode *N, const APInt &Amt,
|
void ExpandShiftByConstant(SDNode *N, const APInt &Amt,
|
||||||
SDValue &Lo, SDValue &Hi);
|
SDValue &Lo, SDValue &Hi);
|
||||||
bool ExpandShiftWithKnownAmountBit(SDNode *N, SDValue &Lo, SDValue &Hi);
|
bool ExpandShiftWithKnownAmountBit(SDNode *N, SDValue &Lo, SDValue &Hi);
|
||||||
|
|
|
@ -149,6 +149,8 @@ void DAGTypeLegalizer::ScalarizeVectorResult(SDNode *N, unsigned ResNo) {
|
||||||
R = ScalarizeVecRes_BinOp(N);
|
R = ScalarizeVecRes_BinOp(N);
|
||||||
break;
|
break;
|
||||||
case ISD::FMA:
|
case ISD::FMA:
|
||||||
|
case ISD::FSHL:
|
||||||
|
case ISD::FSHR:
|
||||||
R = ScalarizeVecRes_TernaryOp(N);
|
R = ScalarizeVecRes_TernaryOp(N);
|
||||||
break;
|
break;
|
||||||
|
|
||||||
|
@ -946,9 +948,13 @@ void DAGTypeLegalizer::SplitVectorResult(SDNode *N, unsigned ResNo) {
|
||||||
case ISD::USUBSAT:
|
case ISD::USUBSAT:
|
||||||
case ISD::SSHLSAT:
|
case ISD::SSHLSAT:
|
||||||
case ISD::USHLSAT:
|
case ISD::USHLSAT:
|
||||||
|
case ISD::ROTL:
|
||||||
|
case ISD::ROTR:
|
||||||
SplitVecRes_BinOp(N, Lo, Hi);
|
SplitVecRes_BinOp(N, Lo, Hi);
|
||||||
break;
|
break;
|
||||||
case ISD::FMA:
|
case ISD::FMA:
|
||||||
|
case ISD::FSHL:
|
||||||
|
case ISD::FSHR:
|
||||||
SplitVecRes_TernaryOp(N, Lo, Hi);
|
SplitVecRes_TernaryOp(N, Lo, Hi);
|
||||||
break;
|
break;
|
||||||
|
|
||||||
|
@ -2926,6 +2932,8 @@ void DAGTypeLegalizer::WidenVectorResult(SDNode *N, unsigned ResNo) {
|
||||||
Res = WidenVecRes_Unary(N);
|
Res = WidenVecRes_Unary(N);
|
||||||
break;
|
break;
|
||||||
case ISD::FMA:
|
case ISD::FMA:
|
||||||
|
case ISD::FSHL:
|
||||||
|
case ISD::FSHR:
|
||||||
Res = WidenVecRes_Ternary(N);
|
Res = WidenVecRes_Ternary(N);
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
|
|
|
@ -6252,62 +6252,14 @@ void SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I,
|
||||||
SDValue Y = getValue(I.getArgOperand(1));
|
SDValue Y = getValue(I.getArgOperand(1));
|
||||||
SDValue Z = getValue(I.getArgOperand(2));
|
SDValue Z = getValue(I.getArgOperand(2));
|
||||||
EVT VT = X.getValueType();
|
EVT VT = X.getValueType();
|
||||||
SDValue BitWidthC = DAG.getConstant(VT.getScalarSizeInBits(), sdl, VT);
|
|
||||||
SDValue Zero = DAG.getConstant(0, sdl, VT);
|
|
||||||
SDValue ShAmt = DAG.getNode(ISD::UREM, sdl, VT, Z, BitWidthC);
|
|
||||||
|
|
||||||
// When X == Y, this is rotate. If the data type has a power-of-2 size, we
|
if (X == Y) {
|
||||||
// avoid the select that is necessary in the general case to filter out
|
|
||||||
// the 0-shift possibility that leads to UB.
|
|
||||||
if (X == Y && isPowerOf2_32(VT.getScalarSizeInBits())) {
|
|
||||||
auto RotateOpcode = IsFSHL ? ISD::ROTL : ISD::ROTR;
|
auto RotateOpcode = IsFSHL ? ISD::ROTL : ISD::ROTR;
|
||||||
if (TLI.isOperationLegalOrCustom(RotateOpcode, VT)) {
|
setValue(&I, DAG.getNode(RotateOpcode, sdl, VT, X, Z));
|
||||||
setValue(&I, DAG.getNode(RotateOpcode, sdl, VT, X, Z));
|
} else {
|
||||||
return;
|
auto FunnelOpcode = IsFSHL ? ISD::FSHL : ISD::FSHR;
|
||||||
}
|
|
||||||
|
|
||||||
// Some targets only rotate one way. Try the opposite direction.
|
|
||||||
RotateOpcode = IsFSHL ? ISD::ROTR : ISD::ROTL;
|
|
||||||
if (TLI.isOperationLegalOrCustom(RotateOpcode, VT)) {
|
|
||||||
// Negate the shift amount because it is safe to ignore the high bits.
|
|
||||||
SDValue NegShAmt = DAG.getNode(ISD::SUB, sdl, VT, Zero, Z);
|
|
||||||
setValue(&I, DAG.getNode(RotateOpcode, sdl, VT, X, NegShAmt));
|
|
||||||
return;
|
|
||||||
}
|
|
||||||
|
|
||||||
// fshl (rotl): (X << (Z % BW)) | (X >> ((0 - Z) % BW))
|
|
||||||
// fshr (rotr): (X << ((0 - Z) % BW)) | (X >> (Z % BW))
|
|
||||||
SDValue NegZ = DAG.getNode(ISD::SUB, sdl, VT, Zero, Z);
|
|
||||||
SDValue NShAmt = DAG.getNode(ISD::UREM, sdl, VT, NegZ, BitWidthC);
|
|
||||||
SDValue ShX = DAG.getNode(ISD::SHL, sdl, VT, X, IsFSHL ? ShAmt : NShAmt);
|
|
||||||
SDValue ShY = DAG.getNode(ISD::SRL, sdl, VT, X, IsFSHL ? NShAmt : ShAmt);
|
|
||||||
setValue(&I, DAG.getNode(ISD::OR, sdl, VT, ShX, ShY));
|
|
||||||
return;
|
|
||||||
}
|
|
||||||
|
|
||||||
auto FunnelOpcode = IsFSHL ? ISD::FSHL : ISD::FSHR;
|
|
||||||
if (TLI.isOperationLegalOrCustom(FunnelOpcode, VT)) {
|
|
||||||
setValue(&I, DAG.getNode(FunnelOpcode, sdl, VT, X, Y, Z));
|
setValue(&I, DAG.getNode(FunnelOpcode, sdl, VT, X, Y, Z));
|
||||||
return;
|
|
||||||
}
|
}
|
||||||
|
|
||||||
// fshl: (X << (Z % BW)) | (Y >> (BW - (Z % BW)))
|
|
||||||
// fshr: (X << (BW - (Z % BW))) | (Y >> (Z % BW))
|
|
||||||
SDValue InvShAmt = DAG.getNode(ISD::SUB, sdl, VT, BitWidthC, ShAmt);
|
|
||||||
SDValue ShX = DAG.getNode(ISD::SHL, sdl, VT, X, IsFSHL ? ShAmt : InvShAmt);
|
|
||||||
SDValue ShY = DAG.getNode(ISD::SRL, sdl, VT, Y, IsFSHL ? InvShAmt : ShAmt);
|
|
||||||
SDValue Or = DAG.getNode(ISD::OR, sdl, VT, ShX, ShY);
|
|
||||||
|
|
||||||
// If (Z % BW == 0), then the opposite direction shift is shift-by-bitwidth,
|
|
||||||
// and that is undefined. We must compare and select to avoid UB.
|
|
||||||
EVT CCVT = MVT::i1;
|
|
||||||
if (VT.isVector())
|
|
||||||
CCVT = EVT::getVectorVT(*Context, CCVT, VT.getVectorNumElements());
|
|
||||||
|
|
||||||
// For fshl, 0-shift returns the 1st arg (X).
|
|
||||||
// For fshr, 0-shift returns the 2nd arg (Y).
|
|
||||||
SDValue IsZeroShift = DAG.getSetCC(sdl, CCVT, ShAmt, Zero, ISD::SETEQ);
|
|
||||||
setValue(&I, DAG.getSelect(sdl, VT, IsZeroShift, IsFSHL ? X : Y, Or));
|
|
||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
case Intrinsic::sadd_sat: {
|
case Intrinsic::sadd_sat: {
|
||||||
|
|
|
@ -6156,6 +6156,18 @@ bool TargetLowering::expandFunnelShift(SDNode *Node, SDValue &Result,
|
||||||
|
|
||||||
EVT ShVT = Z.getValueType();
|
EVT ShVT = Z.getValueType();
|
||||||
|
|
||||||
|
assert(isPowerOf2_32(BW) && "Expecting the type bitwidth to be a power of 2");
|
||||||
|
|
||||||
|
// If a funnel shift in the other direction is more supported, use it.
|
||||||
|
unsigned RevOpcode = IsFSHL ? ISD::FSHR : ISD::FSHL;
|
||||||
|
if (!isOperationLegalOrCustom(Node->getOpcode(), VT) &&
|
||||||
|
isOperationLegalOrCustom(RevOpcode, VT)) {
|
||||||
|
SDValue Zero = DAG.getConstant(0, DL, ShVT);
|
||||||
|
SDValue Sub = DAG.getNode(ISD::SUB, DL, ShVT, Zero, Z);
|
||||||
|
Result = DAG.getNode(RevOpcode, DL, VT, X, Y, Sub);
|
||||||
|
return true;
|
||||||
|
}
|
||||||
|
|
||||||
SDValue ShX, ShY;
|
SDValue ShX, ShY;
|
||||||
SDValue ShAmt, InvShAmt;
|
SDValue ShAmt, InvShAmt;
|
||||||
if (isNonZeroModBitWidth(Z, BW)) {
|
if (isNonZeroModBitWidth(Z, BW)) {
|
||||||
|
|
|
@ -124,25 +124,37 @@ void RISCVDAGToDAGISel::Select(SDNode *Node) {
|
||||||
case ISD::SRL: {
|
case ISD::SRL: {
|
||||||
if (!Subtarget->is64Bit())
|
if (!Subtarget->is64Bit())
|
||||||
break;
|
break;
|
||||||
SDValue Op0 = Node->getOperand(0);
|
SDNode *Op0 = Node->getOperand(0).getNode();
|
||||||
SDValue Op1 = Node->getOperand(1);
|
|
||||||
uint64_t Mask;
|
uint64_t Mask;
|
||||||
// Match (srl (and val, mask), imm) where the result would be a
|
// Match (srl (and val, mask), imm) where the result would be a
|
||||||
// zero-extended 32-bit integer. i.e. the mask is 0xffffffff or the result
|
// zero-extended 32-bit integer. i.e. the mask is 0xffffffff or the result
|
||||||
// is equivalent to this (SimplifyDemandedBits may have removed lower bits
|
// is equivalent to this (SimplifyDemandedBits may have removed lower bits
|
||||||
// from the mask that aren't necessary due to the right-shifting).
|
// from the mask that aren't necessary due to the right-shifting).
|
||||||
if (Op1.getOpcode() == ISD::Constant &&
|
if (isa<ConstantSDNode>(Node->getOperand(1)) && isConstantMask(Op0, Mask)) {
|
||||||
isConstantMask(Op0.getNode(), Mask)) {
|
uint64_t ShAmt = Node->getConstantOperandVal(1);
|
||||||
uint64_t ShAmt = cast<ConstantSDNode>(Op1.getNode())->getZExtValue();
|
|
||||||
|
|
||||||
if ((Mask | maskTrailingOnes<uint64_t>(ShAmt)) == 0xffffffff) {
|
if ((Mask | maskTrailingOnes<uint64_t>(ShAmt)) == 0xffffffff) {
|
||||||
SDValue ShAmtVal =
|
SDValue ShAmtVal =
|
||||||
CurDAG->getTargetConstant(ShAmt, SDLoc(Node), XLenVT);
|
CurDAG->getTargetConstant(ShAmt, SDLoc(Node), XLenVT);
|
||||||
CurDAG->SelectNodeTo(Node, RISCV::SRLIW, XLenVT, Op0.getOperand(0),
|
CurDAG->SelectNodeTo(Node, RISCV::SRLIW, XLenVT, Op0->getOperand(0),
|
||||||
ShAmtVal);
|
ShAmtVal);
|
||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
// Match (srl (shl val, 32), imm).
|
||||||
|
if (Op0->getOpcode() == ISD::SHL &&
|
||||||
|
isa<ConstantSDNode>(Op0->getOperand(1)) &&
|
||||||
|
isa<ConstantSDNode>(Node->getOperand(1))) {
|
||||||
|
uint64_t ShlAmt = Op0->getConstantOperandVal(1);
|
||||||
|
uint64_t SrlAmt = Node->getConstantOperandVal(1);
|
||||||
|
if (ShlAmt == 32 && SrlAmt > 32) {
|
||||||
|
SDValue SrlAmtSub32Val =
|
||||||
|
CurDAG->getTargetConstant(SrlAmt - 32, SDLoc(Node), XLenVT);
|
||||||
|
CurDAG->SelectNodeTo(Node, RISCV::SRLIW, XLenVT, Op0->getOperand(0),
|
||||||
|
SrlAmtSub32Val);
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
}
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
case RISCVISD::READ_CYCLE_WIDE:
|
case RISCVISD::READ_CYCLE_WIDE:
|
||||||
|
@ -459,55 +471,6 @@ bool RISCVDAGToDAGISel::SelectRORIW(SDValue N, SDValue &RS1, SDValue &Shamt) {
|
||||||
return false;
|
return false;
|
||||||
}
|
}
|
||||||
|
|
||||||
// Check that it is a FSRIW (i32 Funnel Shift Right Immediate on RV64).
|
|
||||||
// We first check that it is the right node tree:
|
|
||||||
//
|
|
||||||
// (SIGN_EXTEND_INREG (OR (SHL (AsserSext RS1, i32), VC2),
|
|
||||||
// (SRL (AND (AssertSext RS2, i32), VC3), VC1)))
|
|
||||||
//
|
|
||||||
// Then we check that the constant operands respect these constraints:
|
|
||||||
//
|
|
||||||
// VC2 == 32 - VC1
|
|
||||||
// VC3 == maskLeadingOnes<uint32_t>(VC2)
|
|
||||||
//
|
|
||||||
// being VC1 the Shamt we need, VC2 the complementary of Shamt over 32
|
|
||||||
// and VC3 a 32 bit mask of (32 - VC1) leading ones.
|
|
||||||
|
|
||||||
bool RISCVDAGToDAGISel::SelectFSRIW(SDValue N, SDValue &RS1, SDValue &RS2,
|
|
||||||
SDValue &Shamt) {
|
|
||||||
if (N.getOpcode() == ISD::SIGN_EXTEND_INREG &&
|
|
||||||
Subtarget->getXLenVT() == MVT::i64 &&
|
|
||||||
cast<VTSDNode>(N.getOperand(1))->getVT() == MVT::i32) {
|
|
||||||
if (N.getOperand(0).getOpcode() == ISD::OR) {
|
|
||||||
SDValue Or = N.getOperand(0);
|
|
||||||
if (Or.getOperand(0).getOpcode() == ISD::SHL &&
|
|
||||||
Or.getOperand(1).getOpcode() == ISD::SRL) {
|
|
||||||
SDValue Shl = Or.getOperand(0);
|
|
||||||
SDValue Srl = Or.getOperand(1);
|
|
||||||
if (Srl.getOperand(0).getOpcode() == ISD::AND) {
|
|
||||||
SDValue And = Srl.getOperand(0);
|
|
||||||
if (isa<ConstantSDNode>(Srl.getOperand(1)) &&
|
|
||||||
isa<ConstantSDNode>(Shl.getOperand(1)) &&
|
|
||||||
isa<ConstantSDNode>(And.getOperand(1))) {
|
|
||||||
uint32_t VC1 = Srl.getConstantOperandVal(1);
|
|
||||||
uint32_t VC2 = Shl.getConstantOperandVal(1);
|
|
||||||
uint32_t VC3 = And.getConstantOperandVal(1);
|
|
||||||
if (VC2 == (32 - VC1) &&
|
|
||||||
VC3 == maskLeadingOnes<uint32_t>(VC2)) {
|
|
||||||
RS1 = Shl.getOperand(0);
|
|
||||||
RS2 = And.getOperand(0);
|
|
||||||
Shamt = CurDAG->getTargetConstant(VC1, SDLoc(N),
|
|
||||||
Srl.getOperand(1).getValueType());
|
|
||||||
return true;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
return false;
|
|
||||||
}
|
|
||||||
|
|
||||||
// Merge an ADDI into the offset of a load/store instruction where possible.
|
// Merge an ADDI into the offset of a load/store instruction where possible.
|
||||||
// (load (addi base, off1), off2) -> (load base, off1+off2)
|
// (load (addi base, off1), off2) -> (load base, off1+off2)
|
||||||
// (store val, (addi base, off1), off2) -> (store val, base, off1+off2)
|
// (store val, (addi base, off1), off2) -> (store val, base, off1+off2)
|
||||||
|
|
|
@ -52,7 +52,6 @@ public:
|
||||||
bool SelectSLOIW(SDValue N, SDValue &RS1, SDValue &Shamt);
|
bool SelectSLOIW(SDValue N, SDValue &RS1, SDValue &Shamt);
|
||||||
bool SelectSROIW(SDValue N, SDValue &RS1, SDValue &Shamt);
|
bool SelectSROIW(SDValue N, SDValue &RS1, SDValue &Shamt);
|
||||||
bool SelectRORIW(SDValue N, SDValue &RS1, SDValue &Shamt);
|
bool SelectRORIW(SDValue N, SDValue &RS1, SDValue &Shamt);
|
||||||
bool SelectFSRIW(SDValue N, SDValue &RS1, SDValue &RS2, SDValue &Shamt);
|
|
||||||
|
|
||||||
// Include the pieces autogenerated from the target description.
|
// Include the pieces autogenerated from the target description.
|
||||||
#include "RISCVGenDAGISel.inc"
|
#include "RISCVGenDAGISel.inc"
|
||||||
|
|
|
@ -14,7 +14,7 @@
|
||||||
//===----------------------------------------------------------------------===//
|
//===----------------------------------------------------------------------===//
|
||||||
|
|
||||||
//===----------------------------------------------------------------------===//
|
//===----------------------------------------------------------------------===//
|
||||||
// Operand definitions.
|
// Operand and SDNode transformation definitions.
|
||||||
//===----------------------------------------------------------------------===//
|
//===----------------------------------------------------------------------===//
|
||||||
|
|
||||||
def UImmLog2XLenHalfAsmOperand : AsmOperandClass {
|
def UImmLog2XLenHalfAsmOperand : AsmOperandClass {
|
||||||
|
@ -40,6 +40,12 @@ def shfl_uimm : Operand<XLenVT>, ImmLeaf<XLenVT, [{
|
||||||
}];
|
}];
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// Return an immediate value minus 32.
|
||||||
|
def ImmSub32 : SDNodeXForm<imm, [{
|
||||||
|
return CurDAG->getTargetConstant(N->getSExtValue() - 32, SDLoc(N),
|
||||||
|
N->getValueType(0));
|
||||||
|
}]>;
|
||||||
|
|
||||||
//===----------------------------------------------------------------------===//
|
//===----------------------------------------------------------------------===//
|
||||||
// Instruction class templates
|
// Instruction class templates
|
||||||
//===----------------------------------------------------------------------===//
|
//===----------------------------------------------------------------------===//
|
||||||
|
@ -643,7 +649,6 @@ def SLLIUWPat : ComplexPattern<i64, 2, "SelectSLLIUW", [and]>;
|
||||||
def SLOIWPat : ComplexPattern<i64, 2, "SelectSLOIW", [sext_inreg]>;
|
def SLOIWPat : ComplexPattern<i64, 2, "SelectSLOIW", [sext_inreg]>;
|
||||||
def SROIWPat : ComplexPattern<i64, 2, "SelectSROIW", [or]>;
|
def SROIWPat : ComplexPattern<i64, 2, "SelectSROIW", [or]>;
|
||||||
def RORIWPat : ComplexPattern<i64, 2, "SelectRORIW", [sext_inreg]>;
|
def RORIWPat : ComplexPattern<i64, 2, "SelectRORIW", [sext_inreg]>;
|
||||||
def FSRIWPat : ComplexPattern<i64, 3, "SelectFSRIW", [sext_inreg]>;
|
|
||||||
|
|
||||||
let Predicates = [HasStdExtZbbOrZbp] in {
|
let Predicates = [HasStdExtZbbOrZbp] in {
|
||||||
def : Pat<(and GPR:$rs1, (not GPR:$rs2)), (ANDN GPR:$rs1, GPR:$rs2)>;
|
def : Pat<(and GPR:$rs1, (not GPR:$rs2)), (ANDN GPR:$rs1, GPR:$rs2)>;
|
||||||
|
@ -1019,28 +1024,21 @@ def : Pat<(sra (bitreverse GPR:$rs1), (i64 32)), (GREVIW GPR:$rs1, (i64 31))>;
|
||||||
} // Predicates = [HasStdExtZbp, IsRV64]
|
} // Predicates = [HasStdExtZbp, IsRV64]
|
||||||
|
|
||||||
let Predicates = [HasStdExtZbt, IsRV64] in {
|
let Predicates = [HasStdExtZbt, IsRV64] in {
|
||||||
def : Pat<(riscv_selectcc (and (assertsexti32 GPR:$rs3), 31),
|
def : Pat<(sext_inreg (fshl (assertsexti32 GPR:$rs1),
|
||||||
(i64 0),
|
(shl (assertsexti32 GPR:$rs2), (i64 32)),
|
||||||
(i64 17),
|
(and (assertsexti32 GPR:$rs3), (i64 31))),
|
||||||
(assertsexti32 GPR:$rs1),
|
i32),
|
||||||
(or (riscv_sllw (assertsexti32 GPR:$rs1),
|
|
||||||
(and (assertsexti32 GPR:$rs3), 31)),
|
|
||||||
(riscv_srlw (assertsexti32 GPR:$rs2),
|
|
||||||
(sub (i64 32),
|
|
||||||
(assertsexti32 GPR:$rs3))))),
|
|
||||||
(FSLW GPR:$rs1, GPR:$rs2, GPR:$rs3)>;
|
(FSLW GPR:$rs1, GPR:$rs2, GPR:$rs3)>;
|
||||||
def : Pat<(riscv_selectcc (and (assertsexti32 GPR:$rs3), 31),
|
def : Pat<(sext_inreg (fshr (assertsexti32 GPR:$rs1),
|
||||||
(i64 0),
|
(shl (assertsexti32 GPR:$rs2), (i64 32)),
|
||||||
(i64 17),
|
(or (assertsexti32 GPR:$rs3), (i64 32))),
|
||||||
(assertsexti32 GPR:$rs2),
|
i32),
|
||||||
(or (riscv_sllw (assertsexti32 GPR:$rs1),
|
|
||||||
(sub (i64 32),
|
|
||||||
(assertsexti32 GPR:$rs3))),
|
|
||||||
(riscv_srlw (assertsexti32 GPR:$rs2),
|
|
||||||
(and (assertsexti32 GPR:$rs3), 31)))),
|
|
||||||
(FSRW GPR:$rs1, GPR:$rs2, GPR:$rs3)>;
|
(FSRW GPR:$rs1, GPR:$rs2, GPR:$rs3)>;
|
||||||
def : Pat<(FSRIWPat GPR:$rs1, GPR:$rs2, uimmlog2xlen:$shamt),
|
def : Pat<(sext_inreg (fshr (assertsexti32 GPR:$rs1),
|
||||||
(FSRIW GPR:$rs1, GPR:$rs2, uimmlog2xlen:$shamt)>;
|
(shl (assertsexti32 GPR:$rs2), (i64 32)),
|
||||||
|
uimmlog2xlen:$shamt),
|
||||||
|
i32),
|
||||||
|
(FSRIW GPR:$rs1, GPR:$rs2, (ImmSub32 uimm5:$shamt))>;
|
||||||
} // Predicates = [HasStdExtZbt, IsRV64]
|
} // Predicates = [HasStdExtZbt, IsRV64]
|
||||||
|
|
||||||
let Predicates = [HasStdExtZbb, IsRV64] in {
|
let Predicates = [HasStdExtZbb, IsRV64] in {
|
||||||
|
|
|
@ -110,8 +110,8 @@ define <4 x i32> @rotl_v4i32_rotl_const_shift(<4 x i32> %x) {
|
||||||
define i8 @rotr_i8_const_shift(i8 %x) {
|
define i8 @rotr_i8_const_shift(i8 %x) {
|
||||||
; CHECK-LABEL: rotr_i8_const_shift:
|
; CHECK-LABEL: rotr_i8_const_shift:
|
||||||
; CHECK: // %bb.0:
|
; CHECK: // %bb.0:
|
||||||
; CHECK-NEXT: ubfx w8, w0, #3, #5
|
; CHECK-NEXT: lsl w8, w0, #5
|
||||||
; CHECK-NEXT: bfi w8, w0, #5, #27
|
; CHECK-NEXT: bfxil w8, w0, #3, #5
|
||||||
; CHECK-NEXT: mov w0, w8
|
; CHECK-NEXT: mov w0, w8
|
||||||
; CHECK-NEXT: ret
|
; CHECK-NEXT: ret
|
||||||
%f = call i8 @llvm.fshr.i8(i8 %x, i8 %x, i8 3)
|
%f = call i8 @llvm.fshr.i8(i8 %x, i8 %x, i8 3)
|
||||||
|
@ -138,7 +138,7 @@ define i16 @rotr_i16(i16 %x, i16 %z) {
|
||||||
; CHECK-NEXT: lsr w8, w8, w9
|
; CHECK-NEXT: lsr w8, w8, w9
|
||||||
; CHECK-NEXT: and w9, w10, #0xf
|
; CHECK-NEXT: and w9, w10, #0xf
|
||||||
; CHECK-NEXT: lsl w9, w0, w9
|
; CHECK-NEXT: lsl w9, w0, w9
|
||||||
; CHECK-NEXT: orr w0, w9, w8
|
; CHECK-NEXT: orr w0, w8, w9
|
||||||
; CHECK-NEXT: ret
|
; CHECK-NEXT: ret
|
||||||
%f = call i16 @llvm.fshr.i16(i16 %x, i16 %x, i16 %z)
|
%f = call i16 @llvm.fshr.i16(i16 %x, i16 %x, i16 %z)
|
||||||
ret i16 %f
|
ret i16 %f
|
||||||
|
@ -167,14 +167,14 @@ define i64 @rotr_i64(i64 %x, i64 %z) {
|
||||||
define <4 x i32> @rotr_v4i32(<4 x i32> %x, <4 x i32> %z) {
|
define <4 x i32> @rotr_v4i32(<4 x i32> %x, <4 x i32> %z) {
|
||||||
; CHECK-LABEL: rotr_v4i32:
|
; CHECK-LABEL: rotr_v4i32:
|
||||||
; CHECK: // %bb.0:
|
; CHECK: // %bb.0:
|
||||||
; CHECK-NEXT: movi v2.4s, #31
|
; CHECK-NEXT: movi v3.4s, #31
|
||||||
; CHECK-NEXT: neg v3.4s, v1.4s
|
; CHECK-NEXT: neg v2.4s, v1.4s
|
||||||
; CHECK-NEXT: and v1.16b, v1.16b, v2.16b
|
; CHECK-NEXT: and v1.16b, v1.16b, v3.16b
|
||||||
; CHECK-NEXT: and v2.16b, v3.16b, v2.16b
|
; CHECK-NEXT: and v2.16b, v2.16b, v3.16b
|
||||||
; CHECK-NEXT: neg v1.4s, v1.4s
|
; CHECK-NEXT: neg v1.4s, v1.4s
|
||||||
; CHECK-NEXT: ushl v1.4s, v0.4s, v1.4s
|
; CHECK-NEXT: ushl v2.4s, v0.4s, v2.4s
|
||||||
; CHECK-NEXT: ushl v0.4s, v0.4s, v2.4s
|
; CHECK-NEXT: ushl v0.4s, v0.4s, v1.4s
|
||||||
; CHECK-NEXT: orr v0.16b, v0.16b, v1.16b
|
; CHECK-NEXT: orr v0.16b, v0.16b, v2.16b
|
||||||
; CHECK-NEXT: ret
|
; CHECK-NEXT: ret
|
||||||
%f = call <4 x i32> @llvm.fshr.v4i32(<4 x i32> %x, <4 x i32> %x, <4 x i32> %z)
|
%f = call <4 x i32> @llvm.fshr.v4i32(<4 x i32> %x, <4 x i32> %x, <4 x i32> %z)
|
||||||
ret <4 x i32> %f
|
ret <4 x i32> %f
|
||||||
|
@ -185,8 +185,8 @@ define <4 x i32> @rotr_v4i32(<4 x i32> %x, <4 x i32> %z) {
|
||||||
define <4 x i32> @rotr_v4i32_const_shift(<4 x i32> %x) {
|
define <4 x i32> @rotr_v4i32_const_shift(<4 x i32> %x) {
|
||||||
; CHECK-LABEL: rotr_v4i32_const_shift:
|
; CHECK-LABEL: rotr_v4i32_const_shift:
|
||||||
; CHECK: // %bb.0:
|
; CHECK: // %bb.0:
|
||||||
; CHECK-NEXT: ushr v1.4s, v0.4s, #3
|
; CHECK-NEXT: shl v1.4s, v0.4s, #29
|
||||||
; CHECK-NEXT: shl v0.4s, v0.4s, #29
|
; CHECK-NEXT: ushr v0.4s, v0.4s, #3
|
||||||
; CHECK-NEXT: orr v0.16b, v0.16b, v1.16b
|
; CHECK-NEXT: orr v0.16b, v0.16b, v1.16b
|
||||||
; CHECK-NEXT: ret
|
; CHECK-NEXT: ret
|
||||||
%f = call <4 x i32> @llvm.fshr.v4i32(<4 x i32> %x, <4 x i32> %x, <4 x i32> <i32 3, i32 3, i32 3, i32 3>)
|
%f = call <4 x i32> @llvm.fshr.v4i32(<4 x i32> %x, <4 x i32> %x, <4 x i32> <i32 3, i32 3, i32 3, i32 3>)
|
||||||
|
|
|
@ -18,12 +18,12 @@ declare <4 x i32> @llvm.fshr.v4i32(<4 x i32>, <4 x i32>, <4 x i32>)
|
||||||
define i32 @fshl_i32(i32 %x, i32 %y, i32 %z) {
|
define i32 @fshl_i32(i32 %x, i32 %y, i32 %z) {
|
||||||
; CHECK-LABEL: fshl_i32:
|
; CHECK-LABEL: fshl_i32:
|
||||||
; CHECK: // %bb.0:
|
; CHECK: // %bb.0:
|
||||||
; CHECK-NEXT: ands w9, w2, #0x1f
|
; CHECK-NEXT: // kill: def $w2 killed $w2 def $x2
|
||||||
; CHECK-NEXT: neg w9, w9
|
; CHECK-NEXT: mvn w9, w2
|
||||||
|
; CHECK-NEXT: lsr w10, w1, #1
|
||||||
; CHECK-NEXT: lsl w8, w0, w2
|
; CHECK-NEXT: lsl w8, w0, w2
|
||||||
; CHECK-NEXT: lsr w9, w1, w9
|
; CHECK-NEXT: lsr w9, w10, w9
|
||||||
; CHECK-NEXT: orr w8, w8, w9
|
; CHECK-NEXT: orr w0, w8, w9
|
||||||
; CHECK-NEXT: csel w0, w0, w8, eq
|
|
||||||
; CHECK-NEXT: ret
|
; CHECK-NEXT: ret
|
||||||
%f = call i32 @llvm.fshl.i32(i32 %x, i32 %y, i32 %z)
|
%f = call i32 @llvm.fshl.i32(i32 %x, i32 %y, i32 %z)
|
||||||
ret i32 %f
|
ret i32 %f
|
||||||
|
@ -34,22 +34,19 @@ declare i37 @llvm.fshl.i37(i37, i37, i37)
|
||||||
define i37 @fshl_i37(i37 %x, i37 %y, i37 %z) {
|
define i37 @fshl_i37(i37 %x, i37 %y, i37 %z) {
|
||||||
; CHECK-LABEL: fshl_i37:
|
; CHECK-LABEL: fshl_i37:
|
||||||
; CHECK: // %bb.0:
|
; CHECK: // %bb.0:
|
||||||
; CHECK-NEXT: mov x10, #31883
|
; CHECK-NEXT: mov x8, #31883
|
||||||
; CHECK-NEXT: movk x10, #3542, lsl #16
|
; CHECK-NEXT: movk x8, #3542, lsl #16
|
||||||
; CHECK-NEXT: movk x10, #51366, lsl #32
|
; CHECK-NEXT: movk x8, #51366, lsl #32
|
||||||
; CHECK-NEXT: and x9, x2, #0x1fffffffff
|
; CHECK-NEXT: movk x8, #56679, lsl #48
|
||||||
; CHECK-NEXT: movk x10, #56679, lsl #48
|
; CHECK-NEXT: umulh x8, x2, x8
|
||||||
; CHECK-NEXT: umulh x10, x9, x10
|
; CHECK-NEXT: mov w9, #37
|
||||||
; CHECK-NEXT: mov w11, #37
|
; CHECK-NEXT: ubfx x8, x8, #5, #27
|
||||||
; CHECK-NEXT: lsr x10, x10, #5
|
; CHECK-NEXT: msub w8, w8, w9, w2
|
||||||
; CHECK-NEXT: msub x9, x10, x11, x9
|
; CHECK-NEXT: lsl x9, x0, x8
|
||||||
; CHECK-NEXT: and x8, x1, #0x1fffffffff
|
; CHECK-NEXT: mvn w8, w8
|
||||||
; CHECK-NEXT: sub x11, x11, x9
|
; CHECK-NEXT: ubfiz x10, x1, #26, #37
|
||||||
; CHECK-NEXT: lsl x10, x0, x9
|
; CHECK-NEXT: lsr x8, x10, x8
|
||||||
; CHECK-NEXT: lsr x8, x8, x11
|
; CHECK-NEXT: orr x0, x9, x8
|
||||||
; CHECK-NEXT: orr x8, x10, x8
|
|
||||||
; CHECK-NEXT: cmp x9, #0 // =0
|
|
||||||
; CHECK-NEXT: csel x0, x0, x8, eq
|
|
||||||
; CHECK-NEXT: ret
|
; CHECK-NEXT: ret
|
||||||
%f = call i37 @llvm.fshl.i37(i37 %x, i37 %y, i37 %z)
|
%f = call i37 @llvm.fshl.i37(i37 %x, i37 %y, i37 %z)
|
||||||
ret i37 %f
|
ret i37 %f
|
||||||
|
@ -145,12 +142,12 @@ define i8 @fshl_i8_const_fold() {
|
||||||
define i32 @fshr_i32(i32 %x, i32 %y, i32 %z) {
|
define i32 @fshr_i32(i32 %x, i32 %y, i32 %z) {
|
||||||
; CHECK-LABEL: fshr_i32:
|
; CHECK-LABEL: fshr_i32:
|
||||||
; CHECK: // %bb.0:
|
; CHECK: // %bb.0:
|
||||||
; CHECK-NEXT: ands w9, w2, #0x1f
|
; CHECK-NEXT: // kill: def $w2 killed $w2 def $x2
|
||||||
; CHECK-NEXT: neg w9, w9
|
; CHECK-NEXT: mvn w9, w2
|
||||||
|
; CHECK-NEXT: lsl w10, w0, #1
|
||||||
; CHECK-NEXT: lsr w8, w1, w2
|
; CHECK-NEXT: lsr w8, w1, w2
|
||||||
; CHECK-NEXT: lsl w9, w0, w9
|
; CHECK-NEXT: lsl w9, w10, w9
|
||||||
; CHECK-NEXT: orr w8, w9, w8
|
; CHECK-NEXT: orr w0, w9, w8
|
||||||
; CHECK-NEXT: csel w0, w1, w8, eq
|
|
||||||
; CHECK-NEXT: ret
|
; CHECK-NEXT: ret
|
||||||
%f = call i32 @llvm.fshr.i32(i32 %x, i32 %y, i32 %z)
|
%f = call i32 @llvm.fshr.i32(i32 %x, i32 %y, i32 %z)
|
||||||
ret i32 %f
|
ret i32 %f
|
||||||
|
@ -161,22 +158,21 @@ declare i37 @llvm.fshr.i37(i37, i37, i37)
|
||||||
define i37 @fshr_i37(i37 %x, i37 %y, i37 %z) {
|
define i37 @fshr_i37(i37 %x, i37 %y, i37 %z) {
|
||||||
; CHECK-LABEL: fshr_i37:
|
; CHECK-LABEL: fshr_i37:
|
||||||
; CHECK: // %bb.0:
|
; CHECK: // %bb.0:
|
||||||
; CHECK-NEXT: mov x10, #31883
|
; CHECK-NEXT: mov x8, #31883
|
||||||
; CHECK-NEXT: movk x10, #3542, lsl #16
|
; CHECK-NEXT: movk x8, #3542, lsl #16
|
||||||
; CHECK-NEXT: movk x10, #51366, lsl #32
|
; CHECK-NEXT: movk x8, #51366, lsl #32
|
||||||
; CHECK-NEXT: and x9, x2, #0x1fffffffff
|
; CHECK-NEXT: movk x8, #56679, lsl #48
|
||||||
; CHECK-NEXT: movk x10, #56679, lsl #48
|
; CHECK-NEXT: umulh x8, x2, x8
|
||||||
; CHECK-NEXT: umulh x10, x9, x10
|
; CHECK-NEXT: mov w9, #37
|
||||||
; CHECK-NEXT: mov w11, #37
|
; CHECK-NEXT: lsr x8, x8, #5
|
||||||
; CHECK-NEXT: lsr x10, x10, #5
|
; CHECK-NEXT: msub w8, w8, w9, w2
|
||||||
; CHECK-NEXT: msub x9, x10, x11, x9
|
; CHECK-NEXT: lsl x10, x1, #27
|
||||||
; CHECK-NEXT: and x8, x1, #0x1fffffffff
|
; CHECK-NEXT: add w8, w8, #27 // =27
|
||||||
; CHECK-NEXT: sub x10, x11, x9
|
; CHECK-NEXT: lsr x9, x10, x8
|
||||||
; CHECK-NEXT: lsr x8, x8, x9
|
; CHECK-NEXT: mvn w8, w8
|
||||||
; CHECK-NEXT: lsl x10, x0, x10
|
; CHECK-NEXT: lsl x10, x0, #1
|
||||||
; CHECK-NEXT: orr x8, x10, x8
|
; CHECK-NEXT: lsl x8, x10, x8
|
||||||
; CHECK-NEXT: cmp x9, #0 // =0
|
; CHECK-NEXT: orr x0, x8, x9
|
||||||
; CHECK-NEXT: csel x0, x1, x8, eq
|
|
||||||
; CHECK-NEXT: ret
|
; CHECK-NEXT: ret
|
||||||
%f = call i37 @llvm.fshr.i37(i37 %x, i37 %y, i37 %z)
|
%f = call i37 @llvm.fshr.i37(i37 %x, i37 %y, i37 %z)
|
||||||
ret i37 %f
|
ret i37 %f
|
||||||
|
|
|
@ -80,12 +80,12 @@ declare i32 @llvm.fshr.i32(i32 %a, i32 %b, i32 %c)
|
||||||
define i32 @n6_fshl(i32 %x, i32 %y, i8 %shamt) nounwind {
|
define i32 @n6_fshl(i32 %x, i32 %y, i8 %shamt) nounwind {
|
||||||
; CHECK-LABEL: n6_fshl:
|
; CHECK-LABEL: n6_fshl:
|
||||||
; CHECK: // %bb.0:
|
; CHECK: // %bb.0:
|
||||||
; CHECK-NEXT: ands w9, w2, #0x1f
|
; CHECK-NEXT: // kill: def $w2 killed $w2 def $x2
|
||||||
; CHECK-NEXT: neg w9, w9
|
; CHECK-NEXT: mvn w9, w2
|
||||||
|
; CHECK-NEXT: lsr w10, w1, #1
|
||||||
; CHECK-NEXT: lsl w8, w0, w2
|
; CHECK-NEXT: lsl w8, w0, w2
|
||||||
; CHECK-NEXT: lsr w9, w1, w9
|
; CHECK-NEXT: lsr w9, w10, w9
|
||||||
; CHECK-NEXT: orr w8, w8, w9
|
; CHECK-NEXT: orr w0, w8, w9
|
||||||
; CHECK-NEXT: csel w0, w0, w8, eq
|
|
||||||
; CHECK-NEXT: ret
|
; CHECK-NEXT: ret
|
||||||
%shamt_wide = sext i8 %shamt to i32
|
%shamt_wide = sext i8 %shamt to i32
|
||||||
%r = call i32 @llvm.fshl.i32(i32 %x, i32 %y, i32 %shamt_wide)
|
%r = call i32 @llvm.fshl.i32(i32 %x, i32 %y, i32 %shamt_wide)
|
||||||
|
@ -94,12 +94,12 @@ define i32 @n6_fshl(i32 %x, i32 %y, i8 %shamt) nounwind {
|
||||||
define i32 @n7_fshr(i32 %x, i32 %y, i8 %shamt) nounwind {
|
define i32 @n7_fshr(i32 %x, i32 %y, i8 %shamt) nounwind {
|
||||||
; CHECK-LABEL: n7_fshr:
|
; CHECK-LABEL: n7_fshr:
|
||||||
; CHECK: // %bb.0:
|
; CHECK: // %bb.0:
|
||||||
; CHECK-NEXT: ands w9, w2, #0x1f
|
; CHECK-NEXT: // kill: def $w2 killed $w2 def $x2
|
||||||
; CHECK-NEXT: neg w9, w9
|
; CHECK-NEXT: mvn w9, w2
|
||||||
|
; CHECK-NEXT: lsl w10, w0, #1
|
||||||
; CHECK-NEXT: lsr w8, w1, w2
|
; CHECK-NEXT: lsr w8, w1, w2
|
||||||
; CHECK-NEXT: lsl w9, w0, w9
|
; CHECK-NEXT: lsl w9, w10, w9
|
||||||
; CHECK-NEXT: orr w8, w9, w8
|
; CHECK-NEXT: orr w0, w9, w8
|
||||||
; CHECK-NEXT: csel w0, w1, w8, eq
|
|
||||||
; CHECK-NEXT: ret
|
; CHECK-NEXT: ret
|
||||||
%shamt_wide = sext i8 %shamt to i32
|
%shamt_wide = sext i8 %shamt to i32
|
||||||
%r = call i32 @llvm.fshr.i32(i32 %x, i32 %y, i32 %shamt_wide)
|
%r = call i32 @llvm.fshr.i32(i32 %x, i32 %y, i32 %shamt_wide)
|
||||||
|
|
|
@ -16,14 +16,10 @@ define amdgpu_kernel void @fshl_i32(i32 addrspace(1)* %in, i32 %x, i32 %y, i32 %
|
||||||
; SI-NEXT: s_mov_b32 s7, 0xf000
|
; SI-NEXT: s_mov_b32 s7, 0xf000
|
||||||
; SI-NEXT: s_mov_b32 s6, -1
|
; SI-NEXT: s_mov_b32 s6, -1
|
||||||
; SI-NEXT: s_waitcnt lgkmcnt(0)
|
; SI-NEXT: s_waitcnt lgkmcnt(0)
|
||||||
; SI-NEXT: s_sub_i32 s3, 32, s2
|
; SI-NEXT: s_sub_i32 s2, 0, s2
|
||||||
; SI-NEXT: v_mov_b32_e32 v0, s1
|
; SI-NEXT: v_mov_b32_e32 v0, s1
|
||||||
; SI-NEXT: v_mov_b32_e32 v1, s3
|
; SI-NEXT: v_mov_b32_e32 v1, s2
|
||||||
; SI-NEXT: s_and_b32 s1, s2, 31
|
|
||||||
; SI-NEXT: v_alignbit_b32 v0, s0, v0, v1
|
; SI-NEXT: v_alignbit_b32 v0, s0, v0, v1
|
||||||
; SI-NEXT: v_mov_b32_e32 v1, s0
|
|
||||||
; SI-NEXT: v_cmp_eq_u32_e64 vcc, s1, 0
|
|
||||||
; SI-NEXT: v_cndmask_b32_e32 v0, v0, v1, vcc
|
|
||||||
; SI-NEXT: buffer_store_dword v0, off, s[4:7], 0
|
; SI-NEXT: buffer_store_dword v0, off, s[4:7], 0
|
||||||
; SI-NEXT: s_endpgm
|
; SI-NEXT: s_endpgm
|
||||||
;
|
;
|
||||||
|
@ -32,15 +28,10 @@ define amdgpu_kernel void @fshl_i32(i32 addrspace(1)* %in, i32 %x, i32 %y, i32 %
|
||||||
; VI-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0x24
|
; VI-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0x24
|
||||||
; VI-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x2c
|
; VI-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x2c
|
||||||
; VI-NEXT: s_waitcnt lgkmcnt(0)
|
; VI-NEXT: s_waitcnt lgkmcnt(0)
|
||||||
; VI-NEXT: s_sub_i32 s3, 32, s2
|
; VI-NEXT: s_sub_i32 s2, 0, s2
|
||||||
; VI-NEXT: v_mov_b32_e32 v0, s1
|
; VI-NEXT: v_mov_b32_e32 v0, s1
|
||||||
; VI-NEXT: s_and_b32 s1, s2, 31
|
; VI-NEXT: v_mov_b32_e32 v1, s2
|
||||||
; VI-NEXT: v_mov_b32_e32 v1, s3
|
; VI-NEXT: v_alignbit_b32 v2, s0, v0, v1
|
||||||
; VI-NEXT: s_cmp_eq_u32 s1, 0
|
|
||||||
; VI-NEXT: v_alignbit_b32 v0, s0, v0, v1
|
|
||||||
; VI-NEXT: v_mov_b32_e32 v1, s0
|
|
||||||
; VI-NEXT: s_cselect_b64 vcc, -1, 0
|
|
||||||
; VI-NEXT: v_cndmask_b32_e32 v2, v0, v1, vcc
|
|
||||||
; VI-NEXT: v_mov_b32_e32 v0, s4
|
; VI-NEXT: v_mov_b32_e32 v0, s4
|
||||||
; VI-NEXT: v_mov_b32_e32 v1, s5
|
; VI-NEXT: v_mov_b32_e32 v1, s5
|
||||||
; VI-NEXT: flat_store_dword v[0:1], v2
|
; VI-NEXT: flat_store_dword v[0:1], v2
|
||||||
|
@ -51,15 +42,10 @@ define amdgpu_kernel void @fshl_i32(i32 addrspace(1)* %in, i32 %x, i32 %y, i32 %
|
||||||
; GFX9-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0x24
|
; GFX9-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0x24
|
||||||
; GFX9-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x2c
|
; GFX9-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x2c
|
||||||
; GFX9-NEXT: s_waitcnt lgkmcnt(0)
|
; GFX9-NEXT: s_waitcnt lgkmcnt(0)
|
||||||
; GFX9-NEXT: s_sub_i32 s3, 32, s2
|
; GFX9-NEXT: s_sub_i32 s2, 0, s2
|
||||||
; GFX9-NEXT: v_mov_b32_e32 v0, s1
|
; GFX9-NEXT: v_mov_b32_e32 v0, s1
|
||||||
; GFX9-NEXT: s_and_b32 s1, s2, 31
|
; GFX9-NEXT: v_mov_b32_e32 v1, s2
|
||||||
; GFX9-NEXT: v_mov_b32_e32 v1, s3
|
; GFX9-NEXT: v_alignbit_b32 v2, s0, v0, v1
|
||||||
; GFX9-NEXT: s_cmp_eq_u32 s1, 0
|
|
||||||
; GFX9-NEXT: v_alignbit_b32 v0, s0, v0, v1
|
|
||||||
; GFX9-NEXT: v_mov_b32_e32 v1, s0
|
|
||||||
; GFX9-NEXT: s_cselect_b64 vcc, -1, 0
|
|
||||||
; GFX9-NEXT: v_cndmask_b32_e32 v2, v0, v1, vcc
|
|
||||||
; GFX9-NEXT: v_mov_b32_e32 v0, s4
|
; GFX9-NEXT: v_mov_b32_e32 v0, s4
|
||||||
; GFX9-NEXT: v_mov_b32_e32 v1, s5
|
; GFX9-NEXT: v_mov_b32_e32 v1, s5
|
||||||
; GFX9-NEXT: global_store_dword v[0:1], v2, off
|
; GFX9-NEXT: global_store_dword v[0:1], v2, off
|
||||||
|
@ -67,17 +53,13 @@ define amdgpu_kernel void @fshl_i32(i32 addrspace(1)* %in, i32 %x, i32 %y, i32 %
|
||||||
;
|
;
|
||||||
; R600-LABEL: fshl_i32:
|
; R600-LABEL: fshl_i32:
|
||||||
; R600: ; %bb.0: ; %entry
|
; R600: ; %bb.0: ; %entry
|
||||||
; R600-NEXT: ALU 7, @4, KC0[CB0:0-32], KC1[]
|
; R600-NEXT: ALU 3, @4, KC0[CB0:0-32], KC1[]
|
||||||
; R600-NEXT: MEM_RAT_CACHELESS STORE_RAW T0.X, T1.X, 1
|
; R600-NEXT: MEM_RAT_CACHELESS STORE_RAW T0.X, T1.X, 1
|
||||||
; R600-NEXT: CF_END
|
; R600-NEXT: CF_END
|
||||||
; R600-NEXT: PAD
|
; R600-NEXT: PAD
|
||||||
; R600-NEXT: ALU clause starting at 4:
|
; R600-NEXT: ALU clause starting at 4:
|
||||||
; R600-NEXT: SUB_INT * T0.W, literal.x, KC0[3].X,
|
; R600-NEXT: SUB_INT * T0.W, 0.0, KC0[3].X,
|
||||||
; R600-NEXT: 32(4.484155e-44), 0(0.000000e+00)
|
; R600-NEXT: BIT_ALIGN_INT T0.X, KC0[2].Z, KC0[2].W, PV.W,
|
||||||
; R600-NEXT: BIT_ALIGN_INT T0.W, KC0[2].Z, KC0[2].W, PV.W,
|
|
||||||
; R600-NEXT: AND_INT * T1.W, KC0[3].X, literal.x,
|
|
||||||
; R600-NEXT: 31(4.344025e-44), 0(0.000000e+00)
|
|
||||||
; R600-NEXT: CNDE_INT T0.X, PS, KC0[2].Z, PV.W,
|
|
||||||
; R600-NEXT: LSHR * T1.X, KC0[2].Y, literal.x,
|
; R600-NEXT: LSHR * T1.X, KC0[2].Y, literal.x,
|
||||||
; R600-NEXT: 2(2.802597e-45), 0(0.000000e+00)
|
; R600-NEXT: 2(2.802597e-45), 0(0.000000e+00)
|
||||||
entry:
|
entry:
|
||||||
|
@ -151,21 +133,13 @@ define amdgpu_kernel void @fshl_v2i32(<2 x i32> addrspace(1)* %in, <2 x i32> %x,
|
||||||
; SI-NEXT: s_mov_b32 s6, -1
|
; SI-NEXT: s_mov_b32 s6, -1
|
||||||
; SI-NEXT: s_waitcnt lgkmcnt(0)
|
; SI-NEXT: s_waitcnt lgkmcnt(0)
|
||||||
; SI-NEXT: v_mov_b32_e32 v0, s9
|
; SI-NEXT: v_mov_b32_e32 v0, s9
|
||||||
; SI-NEXT: s_sub_i32 s10, 32, s1
|
; SI-NEXT: s_sub_i32 s1, 0, s1
|
||||||
; SI-NEXT: v_mov_b32_e32 v1, s10
|
; SI-NEXT: v_mov_b32_e32 v1, s1
|
||||||
; SI-NEXT: s_and_b32 s1, s1, 31
|
; SI-NEXT: s_sub_i32 s0, 0, s0
|
||||||
; SI-NEXT: v_alignbit_b32 v0, s3, v0, v1
|
; SI-NEXT: v_alignbit_b32 v1, s3, v0, v1
|
||||||
; SI-NEXT: v_cmp_eq_u32_e64 vcc, s1, 0
|
|
||||||
; SI-NEXT: v_mov_b32_e32 v1, s3
|
|
||||||
; SI-NEXT: s_sub_i32 s1, 32, s0
|
|
||||||
; SI-NEXT: v_cndmask_b32_e32 v1, v0, v1, vcc
|
|
||||||
; SI-NEXT: s_and_b32 s0, s0, 31
|
|
||||||
; SI-NEXT: v_mov_b32_e32 v0, s8
|
; SI-NEXT: v_mov_b32_e32 v0, s8
|
||||||
; SI-NEXT: v_mov_b32_e32 v2, s1
|
; SI-NEXT: v_mov_b32_e32 v2, s0
|
||||||
; SI-NEXT: v_alignbit_b32 v0, s2, v0, v2
|
; SI-NEXT: v_alignbit_b32 v0, s2, v0, v2
|
||||||
; SI-NEXT: v_mov_b32_e32 v2, s2
|
|
||||||
; SI-NEXT: v_cmp_eq_u32_e64 vcc, s0, 0
|
|
||||||
; SI-NEXT: v_cndmask_b32_e32 v0, v0, v2, vcc
|
|
||||||
; SI-NEXT: buffer_store_dwordx2 v[0:1], off, s[4:7], 0
|
; SI-NEXT: buffer_store_dwordx2 v[0:1], off, s[4:7], 0
|
||||||
; SI-NEXT: s_endpgm
|
; SI-NEXT: s_endpgm
|
||||||
;
|
;
|
||||||
|
@ -177,23 +151,13 @@ define amdgpu_kernel void @fshl_v2i32(<2 x i32> addrspace(1)* %in, <2 x i32> %x,
|
||||||
; VI-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x3c
|
; VI-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x3c
|
||||||
; VI-NEXT: s_waitcnt lgkmcnt(0)
|
; VI-NEXT: s_waitcnt lgkmcnt(0)
|
||||||
; VI-NEXT: v_mov_b32_e32 v0, s7
|
; VI-NEXT: v_mov_b32_e32 v0, s7
|
||||||
; VI-NEXT: s_sub_i32 s8, 32, s1
|
; VI-NEXT: s_sub_i32 s1, 0, s1
|
||||||
; VI-NEXT: s_and_b32 s1, s1, 31
|
; VI-NEXT: v_mov_b32_e32 v1, s1
|
||||||
; VI-NEXT: v_mov_b32_e32 v1, s8
|
; VI-NEXT: s_sub_i32 s0, 0, s0
|
||||||
; VI-NEXT: s_cmp_eq_u32 s1, 0
|
; VI-NEXT: v_alignbit_b32 v1, s5, v0, v1
|
||||||
; VI-NEXT: v_alignbit_b32 v0, s5, v0, v1
|
|
||||||
; VI-NEXT: s_cselect_b64 vcc, -1, 0
|
|
||||||
; VI-NEXT: v_mov_b32_e32 v1, s5
|
|
||||||
; VI-NEXT: s_sub_i32 s1, 32, s0
|
|
||||||
; VI-NEXT: s_and_b32 s0, s0, 31
|
|
||||||
; VI-NEXT: v_cndmask_b32_e32 v1, v0, v1, vcc
|
|
||||||
; VI-NEXT: s_cmp_eq_u32 s0, 0
|
|
||||||
; VI-NEXT: v_mov_b32_e32 v0, s6
|
; VI-NEXT: v_mov_b32_e32 v0, s6
|
||||||
; VI-NEXT: v_mov_b32_e32 v2, s1
|
; VI-NEXT: v_mov_b32_e32 v2, s0
|
||||||
; VI-NEXT: v_alignbit_b32 v0, s4, v0, v2
|
; VI-NEXT: v_alignbit_b32 v0, s4, v0, v2
|
||||||
; VI-NEXT: v_mov_b32_e32 v2, s4
|
|
||||||
; VI-NEXT: s_cselect_b64 vcc, -1, 0
|
|
||||||
; VI-NEXT: v_cndmask_b32_e32 v0, v0, v2, vcc
|
|
||||||
; VI-NEXT: v_mov_b32_e32 v2, s2
|
; VI-NEXT: v_mov_b32_e32 v2, s2
|
||||||
; VI-NEXT: v_mov_b32_e32 v3, s3
|
; VI-NEXT: v_mov_b32_e32 v3, s3
|
||||||
; VI-NEXT: flat_store_dwordx2 v[2:3], v[0:1]
|
; VI-NEXT: flat_store_dwordx2 v[2:3], v[0:1]
|
||||||
|
@ -207,23 +171,13 @@ define amdgpu_kernel void @fshl_v2i32(<2 x i32> addrspace(1)* %in, <2 x i32> %x,
|
||||||
; GFX9-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x3c
|
; GFX9-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x3c
|
||||||
; GFX9-NEXT: s_waitcnt lgkmcnt(0)
|
; GFX9-NEXT: s_waitcnt lgkmcnt(0)
|
||||||
; GFX9-NEXT: v_mov_b32_e32 v0, s7
|
; GFX9-NEXT: v_mov_b32_e32 v0, s7
|
||||||
; GFX9-NEXT: s_sub_i32 s8, 32, s1
|
; GFX9-NEXT: s_sub_i32 s1, 0, s1
|
||||||
; GFX9-NEXT: s_and_b32 s1, s1, 31
|
; GFX9-NEXT: v_mov_b32_e32 v1, s1
|
||||||
; GFX9-NEXT: v_mov_b32_e32 v1, s8
|
; GFX9-NEXT: s_sub_i32 s0, 0, s0
|
||||||
; GFX9-NEXT: s_cmp_eq_u32 s1, 0
|
; GFX9-NEXT: v_alignbit_b32 v1, s5, v0, v1
|
||||||
; GFX9-NEXT: v_alignbit_b32 v0, s5, v0, v1
|
|
||||||
; GFX9-NEXT: s_cselect_b64 vcc, -1, 0
|
|
||||||
; GFX9-NEXT: v_mov_b32_e32 v1, s5
|
|
||||||
; GFX9-NEXT: s_sub_i32 s1, 32, s0
|
|
||||||
; GFX9-NEXT: s_and_b32 s0, s0, 31
|
|
||||||
; GFX9-NEXT: v_cndmask_b32_e32 v1, v0, v1, vcc
|
|
||||||
; GFX9-NEXT: s_cmp_eq_u32 s0, 0
|
|
||||||
; GFX9-NEXT: v_mov_b32_e32 v0, s6
|
; GFX9-NEXT: v_mov_b32_e32 v0, s6
|
||||||
; GFX9-NEXT: v_mov_b32_e32 v2, s1
|
; GFX9-NEXT: v_mov_b32_e32 v2, s0
|
||||||
; GFX9-NEXT: v_alignbit_b32 v0, s4, v0, v2
|
; GFX9-NEXT: v_alignbit_b32 v0, s4, v0, v2
|
||||||
; GFX9-NEXT: v_mov_b32_e32 v2, s4
|
|
||||||
; GFX9-NEXT: s_cselect_b64 vcc, -1, 0
|
|
||||||
; GFX9-NEXT: v_cndmask_b32_e32 v0, v0, v2, vcc
|
|
||||||
; GFX9-NEXT: v_mov_b32_e32 v2, s2
|
; GFX9-NEXT: v_mov_b32_e32 v2, s2
|
||||||
; GFX9-NEXT: v_mov_b32_e32 v3, s3
|
; GFX9-NEXT: v_mov_b32_e32 v3, s3
|
||||||
; GFX9-NEXT: global_store_dwordx2 v[2:3], v[0:1], off
|
; GFX9-NEXT: global_store_dwordx2 v[2:3], v[0:1], off
|
||||||
|
@ -231,24 +185,16 @@ define amdgpu_kernel void @fshl_v2i32(<2 x i32> addrspace(1)* %in, <2 x i32> %x,
|
||||||
;
|
;
|
||||||
; R600-LABEL: fshl_v2i32:
|
; R600-LABEL: fshl_v2i32:
|
||||||
; R600: ; %bb.0: ; %entry
|
; R600: ; %bb.0: ; %entry
|
||||||
; R600-NEXT: ALU 13, @4, KC0[CB0:0-32], KC1[]
|
; R600-NEXT: ALU 5, @4, KC0[CB0:0-32], KC1[]
|
||||||
; R600-NEXT: MEM_RAT_CACHELESS STORE_RAW T1.XY, T0.X, 1
|
; R600-NEXT: MEM_RAT_CACHELESS STORE_RAW T0.XY, T1.X, 1
|
||||||
; R600-NEXT: CF_END
|
; R600-NEXT: CF_END
|
||||||
; R600-NEXT: PAD
|
; R600-NEXT: PAD
|
||||||
; R600-NEXT: ALU clause starting at 4:
|
; R600-NEXT: ALU clause starting at 4:
|
||||||
; R600-NEXT: AND_INT T0.W, KC0[4].X, literal.x,
|
; R600-NEXT: SUB_INT * T0.W, 0.0, KC0[4].X,
|
||||||
; R600-NEXT: SUB_INT * T1.W, literal.y, KC0[4].X,
|
; R600-NEXT: BIT_ALIGN_INT T0.Y, KC0[3].X, KC0[3].Z, PV.W,
|
||||||
; R600-NEXT: 31(4.344025e-44), 32(4.484155e-44)
|
; R600-NEXT: SUB_INT * T0.W, 0.0, KC0[3].W,
|
||||||
; R600-NEXT: AND_INT T0.Y, KC0[3].W, literal.x,
|
; R600-NEXT: BIT_ALIGN_INT * T0.X, KC0[2].W, KC0[3].Y, PV.W,
|
||||||
; R600-NEXT: SUB_INT T0.Z, literal.y, KC0[3].W,
|
; R600-NEXT: LSHR * T1.X, KC0[2].Y, literal.x,
|
||||||
; R600-NEXT: BIT_ALIGN_INT T1.W, KC0[3].X, KC0[3].Z, PS,
|
|
||||||
; R600-NEXT: SETE_INT * T0.W, PV.W, 0.0,
|
|
||||||
; R600-NEXT: 31(4.344025e-44), 32(4.484155e-44)
|
|
||||||
; R600-NEXT: CNDE_INT T1.Y, PS, PV.W, KC0[3].X,
|
|
||||||
; R600-NEXT: BIT_ALIGN_INT T0.W, KC0[2].W, KC0[3].Y, PV.Z,
|
|
||||||
; R600-NEXT: SETE_INT * T1.W, PV.Y, 0.0,
|
|
||||||
; R600-NEXT: CNDE_INT T1.X, PS, PV.W, KC0[2].W,
|
|
||||||
; R600-NEXT: LSHR * T0.X, KC0[2].Y, literal.x,
|
|
||||||
; R600-NEXT: 2(2.802597e-45), 0(0.000000e+00)
|
; R600-NEXT: 2(2.802597e-45), 0(0.000000e+00)
|
||||||
entry:
|
entry:
|
||||||
%0 = call <2 x i32> @llvm.fshl.v2i32(<2 x i32> %x, <2 x i32> %y, <2 x i32> %z)
|
%0 = call <2 x i32> @llvm.fshl.v2i32(<2 x i32> %x, <2 x i32> %y, <2 x i32> %z)
|
||||||
|
@ -332,37 +278,21 @@ define amdgpu_kernel void @fshl_v4i32(<4 x i32> addrspace(1)* %in, <4 x i32> %x,
|
||||||
; SI-NEXT: s_mov_b32 s6, -1
|
; SI-NEXT: s_mov_b32 s6, -1
|
||||||
; SI-NEXT: s_waitcnt lgkmcnt(0)
|
; SI-NEXT: s_waitcnt lgkmcnt(0)
|
||||||
; SI-NEXT: v_mov_b32_e32 v0, s15
|
; SI-NEXT: v_mov_b32_e32 v0, s15
|
||||||
; SI-NEXT: s_sub_i32 s16, 32, s3
|
; SI-NEXT: s_sub_i32 s3, 0, s3
|
||||||
; SI-NEXT: v_mov_b32_e32 v1, s16
|
|
||||||
; SI-NEXT: s_and_b32 s3, s3, 31
|
|
||||||
; SI-NEXT: v_alignbit_b32 v0, s11, v0, v1
|
|
||||||
; SI-NEXT: v_cmp_eq_u32_e64 vcc, s3, 0
|
|
||||||
; SI-NEXT: v_mov_b32_e32 v1, s11
|
|
||||||
; SI-NEXT: s_sub_i32 s3, 32, s2
|
|
||||||
; SI-NEXT: v_cndmask_b32_e32 v3, v0, v1, vcc
|
|
||||||
; SI-NEXT: s_and_b32 s2, s2, 31
|
|
||||||
; SI-NEXT: v_mov_b32_e32 v0, s14
|
|
||||||
; SI-NEXT: v_mov_b32_e32 v1, s3
|
; SI-NEXT: v_mov_b32_e32 v1, s3
|
||||||
; SI-NEXT: v_alignbit_b32 v0, s10, v0, v1
|
; SI-NEXT: s_sub_i32 s2, 0, s2
|
||||||
; SI-NEXT: v_cmp_eq_u32_e64 vcc, s2, 0
|
; SI-NEXT: v_alignbit_b32 v3, s11, v0, v1
|
||||||
; SI-NEXT: v_mov_b32_e32 v1, s10
|
; SI-NEXT: v_mov_b32_e32 v0, s14
|
||||||
; SI-NEXT: s_sub_i32 s2, 32, s1
|
|
||||||
; SI-NEXT: v_cndmask_b32_e32 v2, v0, v1, vcc
|
|
||||||
; SI-NEXT: s_and_b32 s1, s1, 31
|
|
||||||
; SI-NEXT: v_mov_b32_e32 v0, s13
|
|
||||||
; SI-NEXT: v_mov_b32_e32 v1, s2
|
; SI-NEXT: v_mov_b32_e32 v1, s2
|
||||||
; SI-NEXT: v_alignbit_b32 v0, s9, v0, v1
|
; SI-NEXT: s_sub_i32 s1, 0, s1
|
||||||
; SI-NEXT: v_cmp_eq_u32_e64 vcc, s1, 0
|
; SI-NEXT: v_alignbit_b32 v2, s10, v0, v1
|
||||||
; SI-NEXT: v_mov_b32_e32 v1, s9
|
; SI-NEXT: s_sub_i32 s0, 0, s0
|
||||||
; SI-NEXT: s_sub_i32 s1, 32, s0
|
; SI-NEXT: v_mov_b32_e32 v0, s13
|
||||||
; SI-NEXT: v_cndmask_b32_e32 v1, v0, v1, vcc
|
; SI-NEXT: v_mov_b32_e32 v1, s1
|
||||||
; SI-NEXT: s_and_b32 s0, s0, 31
|
; SI-NEXT: v_alignbit_b32 v1, s9, v0, v1
|
||||||
; SI-NEXT: v_mov_b32_e32 v0, s12
|
; SI-NEXT: v_mov_b32_e32 v0, s12
|
||||||
; SI-NEXT: v_mov_b32_e32 v4, s1
|
; SI-NEXT: v_mov_b32_e32 v4, s0
|
||||||
; SI-NEXT: v_alignbit_b32 v0, s8, v0, v4
|
; SI-NEXT: v_alignbit_b32 v0, s8, v0, v4
|
||||||
; SI-NEXT: v_mov_b32_e32 v4, s8
|
|
||||||
; SI-NEXT: v_cmp_eq_u32_e64 vcc, s0, 0
|
|
||||||
; SI-NEXT: v_cndmask_b32_e32 v0, v0, v4, vcc
|
|
||||||
; SI-NEXT: buffer_store_dwordx4 v[0:3], off, s[4:7], 0
|
; SI-NEXT: buffer_store_dwordx4 v[0:3], off, s[4:7], 0
|
||||||
; SI-NEXT: s_endpgm
|
; SI-NEXT: s_endpgm
|
||||||
;
|
;
|
||||||
|
@ -374,41 +304,21 @@ define amdgpu_kernel void @fshl_v4i32(<4 x i32> addrspace(1)* %in, <4 x i32> %x,
|
||||||
; VI-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x54
|
; VI-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x54
|
||||||
; VI-NEXT: s_waitcnt lgkmcnt(0)
|
; VI-NEXT: s_waitcnt lgkmcnt(0)
|
||||||
; VI-NEXT: v_mov_b32_e32 v0, s11
|
; VI-NEXT: v_mov_b32_e32 v0, s11
|
||||||
; VI-NEXT: s_sub_i32 s14, 32, s3
|
; VI-NEXT: s_sub_i32 s3, 0, s3
|
||||||
; VI-NEXT: s_and_b32 s3, s3, 31
|
|
||||||
; VI-NEXT: v_mov_b32_e32 v1, s14
|
|
||||||
; VI-NEXT: s_cmp_eq_u32 s3, 0
|
|
||||||
; VI-NEXT: v_alignbit_b32 v0, s7, v0, v1
|
|
||||||
; VI-NEXT: s_cselect_b64 vcc, -1, 0
|
|
||||||
; VI-NEXT: v_mov_b32_e32 v1, s7
|
|
||||||
; VI-NEXT: s_sub_i32 s3, 32, s2
|
|
||||||
; VI-NEXT: s_and_b32 s2, s2, 31
|
|
||||||
; VI-NEXT: v_cndmask_b32_e32 v3, v0, v1, vcc
|
|
||||||
; VI-NEXT: s_cmp_eq_u32 s2, 0
|
|
||||||
; VI-NEXT: v_mov_b32_e32 v0, s10
|
|
||||||
; VI-NEXT: v_mov_b32_e32 v1, s3
|
; VI-NEXT: v_mov_b32_e32 v1, s3
|
||||||
; VI-NEXT: v_alignbit_b32 v0, s6, v0, v1
|
; VI-NEXT: s_sub_i32 s2, 0, s2
|
||||||
; VI-NEXT: s_cselect_b64 vcc, -1, 0
|
; VI-NEXT: v_alignbit_b32 v3, s7, v0, v1
|
||||||
; VI-NEXT: v_mov_b32_e32 v1, s6
|
; VI-NEXT: v_mov_b32_e32 v0, s10
|
||||||
; VI-NEXT: s_sub_i32 s2, 32, s1
|
|
||||||
; VI-NEXT: s_and_b32 s1, s1, 31
|
|
||||||
; VI-NEXT: v_cndmask_b32_e32 v2, v0, v1, vcc
|
|
||||||
; VI-NEXT: s_cmp_eq_u32 s1, 0
|
|
||||||
; VI-NEXT: v_mov_b32_e32 v0, s9
|
|
||||||
; VI-NEXT: v_mov_b32_e32 v1, s2
|
; VI-NEXT: v_mov_b32_e32 v1, s2
|
||||||
; VI-NEXT: v_alignbit_b32 v0, s5, v0, v1
|
; VI-NEXT: s_sub_i32 s1, 0, s1
|
||||||
; VI-NEXT: s_cselect_b64 vcc, -1, 0
|
; VI-NEXT: v_alignbit_b32 v2, s6, v0, v1
|
||||||
; VI-NEXT: v_mov_b32_e32 v1, s5
|
; VI-NEXT: s_sub_i32 s0, 0, s0
|
||||||
; VI-NEXT: s_sub_i32 s1, 32, s0
|
; VI-NEXT: v_mov_b32_e32 v0, s9
|
||||||
; VI-NEXT: s_and_b32 s0, s0, 31
|
; VI-NEXT: v_mov_b32_e32 v1, s1
|
||||||
; VI-NEXT: v_cndmask_b32_e32 v1, v0, v1, vcc
|
; VI-NEXT: v_alignbit_b32 v1, s5, v0, v1
|
||||||
; VI-NEXT: s_cmp_eq_u32 s0, 0
|
|
||||||
; VI-NEXT: v_mov_b32_e32 v0, s8
|
; VI-NEXT: v_mov_b32_e32 v0, s8
|
||||||
; VI-NEXT: v_mov_b32_e32 v4, s1
|
; VI-NEXT: v_mov_b32_e32 v4, s0
|
||||||
; VI-NEXT: v_alignbit_b32 v0, s4, v0, v4
|
; VI-NEXT: v_alignbit_b32 v0, s4, v0, v4
|
||||||
; VI-NEXT: v_mov_b32_e32 v4, s4
|
|
||||||
; VI-NEXT: s_cselect_b64 vcc, -1, 0
|
|
||||||
; VI-NEXT: v_cndmask_b32_e32 v0, v0, v4, vcc
|
|
||||||
; VI-NEXT: v_mov_b32_e32 v4, s12
|
; VI-NEXT: v_mov_b32_e32 v4, s12
|
||||||
; VI-NEXT: v_mov_b32_e32 v5, s13
|
; VI-NEXT: v_mov_b32_e32 v5, s13
|
||||||
; VI-NEXT: flat_store_dwordx4 v[4:5], v[0:3]
|
; VI-NEXT: flat_store_dwordx4 v[4:5], v[0:3]
|
||||||
|
@ -422,41 +332,21 @@ define amdgpu_kernel void @fshl_v4i32(<4 x i32> addrspace(1)* %in, <4 x i32> %x,
|
||||||
; GFX9-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x54
|
; GFX9-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x54
|
||||||
; GFX9-NEXT: s_waitcnt lgkmcnt(0)
|
; GFX9-NEXT: s_waitcnt lgkmcnt(0)
|
||||||
; GFX9-NEXT: v_mov_b32_e32 v0, s11
|
; GFX9-NEXT: v_mov_b32_e32 v0, s11
|
||||||
; GFX9-NEXT: s_sub_i32 s14, 32, s3
|
; GFX9-NEXT: s_sub_i32 s3, 0, s3
|
||||||
; GFX9-NEXT: s_and_b32 s3, s3, 31
|
|
||||||
; GFX9-NEXT: v_mov_b32_e32 v1, s14
|
|
||||||
; GFX9-NEXT: s_cmp_eq_u32 s3, 0
|
|
||||||
; GFX9-NEXT: v_alignbit_b32 v0, s7, v0, v1
|
|
||||||
; GFX9-NEXT: s_cselect_b64 vcc, -1, 0
|
|
||||||
; GFX9-NEXT: v_mov_b32_e32 v1, s7
|
|
||||||
; GFX9-NEXT: s_sub_i32 s3, 32, s2
|
|
||||||
; GFX9-NEXT: s_and_b32 s2, s2, 31
|
|
||||||
; GFX9-NEXT: v_cndmask_b32_e32 v3, v0, v1, vcc
|
|
||||||
; GFX9-NEXT: s_cmp_eq_u32 s2, 0
|
|
||||||
; GFX9-NEXT: v_mov_b32_e32 v0, s10
|
|
||||||
; GFX9-NEXT: v_mov_b32_e32 v1, s3
|
; GFX9-NEXT: v_mov_b32_e32 v1, s3
|
||||||
; GFX9-NEXT: v_alignbit_b32 v0, s6, v0, v1
|
; GFX9-NEXT: s_sub_i32 s2, 0, s2
|
||||||
; GFX9-NEXT: s_cselect_b64 vcc, -1, 0
|
; GFX9-NEXT: v_alignbit_b32 v3, s7, v0, v1
|
||||||
; GFX9-NEXT: v_mov_b32_e32 v1, s6
|
; GFX9-NEXT: v_mov_b32_e32 v0, s10
|
||||||
; GFX9-NEXT: s_sub_i32 s2, 32, s1
|
|
||||||
; GFX9-NEXT: s_and_b32 s1, s1, 31
|
|
||||||
; GFX9-NEXT: v_cndmask_b32_e32 v2, v0, v1, vcc
|
|
||||||
; GFX9-NEXT: s_cmp_eq_u32 s1, 0
|
|
||||||
; GFX9-NEXT: v_mov_b32_e32 v0, s9
|
|
||||||
; GFX9-NEXT: v_mov_b32_e32 v1, s2
|
; GFX9-NEXT: v_mov_b32_e32 v1, s2
|
||||||
; GFX9-NEXT: v_alignbit_b32 v0, s5, v0, v1
|
; GFX9-NEXT: s_sub_i32 s1, 0, s1
|
||||||
; GFX9-NEXT: s_cselect_b64 vcc, -1, 0
|
; GFX9-NEXT: v_alignbit_b32 v2, s6, v0, v1
|
||||||
; GFX9-NEXT: v_mov_b32_e32 v1, s5
|
; GFX9-NEXT: s_sub_i32 s0, 0, s0
|
||||||
; GFX9-NEXT: s_sub_i32 s1, 32, s0
|
; GFX9-NEXT: v_mov_b32_e32 v0, s9
|
||||||
; GFX9-NEXT: s_and_b32 s0, s0, 31
|
; GFX9-NEXT: v_mov_b32_e32 v1, s1
|
||||||
; GFX9-NEXT: v_cndmask_b32_e32 v1, v0, v1, vcc
|
; GFX9-NEXT: v_alignbit_b32 v1, s5, v0, v1
|
||||||
; GFX9-NEXT: s_cmp_eq_u32 s0, 0
|
|
||||||
; GFX9-NEXT: v_mov_b32_e32 v0, s8
|
; GFX9-NEXT: v_mov_b32_e32 v0, s8
|
||||||
; GFX9-NEXT: v_mov_b32_e32 v4, s1
|
; GFX9-NEXT: v_mov_b32_e32 v4, s0
|
||||||
; GFX9-NEXT: v_alignbit_b32 v0, s4, v0, v4
|
; GFX9-NEXT: v_alignbit_b32 v0, s4, v0, v4
|
||||||
; GFX9-NEXT: v_mov_b32_e32 v4, s4
|
|
||||||
; GFX9-NEXT: s_cselect_b64 vcc, -1, 0
|
|
||||||
; GFX9-NEXT: v_cndmask_b32_e32 v0, v0, v4, vcc
|
|
||||||
; GFX9-NEXT: v_mov_b32_e32 v4, s12
|
; GFX9-NEXT: v_mov_b32_e32 v4, s12
|
||||||
; GFX9-NEXT: v_mov_b32_e32 v5, s13
|
; GFX9-NEXT: v_mov_b32_e32 v5, s13
|
||||||
; GFX9-NEXT: global_store_dwordx4 v[4:5], v[0:3], off
|
; GFX9-NEXT: global_store_dwordx4 v[4:5], v[0:3], off
|
||||||
|
@ -464,35 +354,19 @@ define amdgpu_kernel void @fshl_v4i32(<4 x i32> addrspace(1)* %in, <4 x i32> %x,
|
||||||
;
|
;
|
||||||
; R600-LABEL: fshl_v4i32:
|
; R600-LABEL: fshl_v4i32:
|
||||||
; R600: ; %bb.0: ; %entry
|
; R600: ; %bb.0: ; %entry
|
||||||
; R600-NEXT: ALU 25, @4, KC0[CB0:0-32], KC1[]
|
; R600-NEXT: ALU 9, @4, KC0[CB0:0-32], KC1[]
|
||||||
; R600-NEXT: MEM_RAT_CACHELESS STORE_RAW T0.XYZW, T1.X, 1
|
; R600-NEXT: MEM_RAT_CACHELESS STORE_RAW T0.XYZW, T1.X, 1
|
||||||
; R600-NEXT: CF_END
|
; R600-NEXT: CF_END
|
||||||
; R600-NEXT: PAD
|
; R600-NEXT: PAD
|
||||||
; R600-NEXT: ALU clause starting at 4:
|
; R600-NEXT: ALU clause starting at 4:
|
||||||
; R600-NEXT: SUB_INT * T0.W, literal.x, KC0[6].X,
|
; R600-NEXT: SUB_INT * T0.W, 0.0, KC0[6].X,
|
||||||
; R600-NEXT: 32(4.484155e-44), 0(0.000000e+00)
|
|
||||||
; R600-NEXT: BIT_ALIGN_INT * T0.W, KC0[4].X, KC0[5].X, PV.W,
|
; R600-NEXT: BIT_ALIGN_INT * T0.W, KC0[4].X, KC0[5].X, PV.W,
|
||||||
; R600-NEXT: AND_INT * T1.W, KC0[6].X, literal.x,
|
; R600-NEXT: SUB_INT * T1.W, 0.0, KC0[5].W,
|
||||||
; R600-NEXT: 31(4.344025e-44), 0(0.000000e+00)
|
; R600-NEXT: BIT_ALIGN_INT * T0.Z, KC0[3].W, KC0[4].W, PV.W,
|
||||||
; R600-NEXT: AND_INT T0.X, KC0[5].Z, literal.x,
|
; R600-NEXT: SUB_INT * T1.W, 0.0, KC0[5].Z,
|
||||||
; R600-NEXT: SUB_INT T0.Y, literal.y, KC0[5].Z,
|
; R600-NEXT: BIT_ALIGN_INT * T0.Y, KC0[3].Z, KC0[4].Z, PV.W,
|
||||||
; R600-NEXT: SETE_INT T0.Z, PV.W, 0.0,
|
; R600-NEXT: SUB_INT * T1.W, 0.0, KC0[5].Y,
|
||||||
; R600-NEXT: SUB_INT T1.W, literal.y, KC0[5].W,
|
; R600-NEXT: BIT_ALIGN_INT * T0.X, KC0[3].Y, KC0[4].Y, PV.W,
|
||||||
; R600-NEXT: AND_INT * T2.W, KC0[5].W, literal.x,
|
|
||||||
; R600-NEXT: 31(4.344025e-44), 32(4.484155e-44)
|
|
||||||
; R600-NEXT: SETE_INT T1.Z, PS, 0.0,
|
|
||||||
; R600-NEXT: BIT_ALIGN_INT * T1.W, KC0[3].W, KC0[4].W, PV.W,
|
|
||||||
; R600-NEXT: CNDE_INT * T0.W, T0.Z, T0.W, KC0[4].X,
|
|
||||||
; R600-NEXT: CNDE_INT T0.Z, T1.Z, T1.W, KC0[3].W,
|
|
||||||
; R600-NEXT: BIT_ALIGN_INT T1.W, KC0[3].Z, KC0[4].Z, T0.Y,
|
|
||||||
; R600-NEXT: SETE_INT * T2.W, T0.X, 0.0,
|
|
||||||
; R600-NEXT: CNDE_INT T0.Y, PS, PV.W, KC0[3].Z,
|
|
||||||
; R600-NEXT: AND_INT T1.W, KC0[5].Y, literal.x,
|
|
||||||
; R600-NEXT: SUB_INT * T2.W, literal.y, KC0[5].Y,
|
|
||||||
; R600-NEXT: 31(4.344025e-44), 32(4.484155e-44)
|
|
||||||
; R600-NEXT: BIT_ALIGN_INT T2.W, KC0[3].Y, KC0[4].Y, PS,
|
|
||||||
; R600-NEXT: SETE_INT * T1.W, PV.W, 0.0,
|
|
||||||
; R600-NEXT: CNDE_INT T0.X, PS, PV.W, KC0[3].Y,
|
|
||||||
; R600-NEXT: LSHR * T1.X, KC0[2].Y, literal.x,
|
; R600-NEXT: LSHR * T1.X, KC0[2].Y, literal.x,
|
||||||
; R600-NEXT: 2(2.802597e-45), 0(0.000000e+00)
|
; R600-NEXT: 2(2.802597e-45), 0(0.000000e+00)
|
||||||
entry:
|
entry:
|
||||||
|
|
File diff suppressed because it is too large
Load Diff
|
@ -126,7 +126,7 @@ define i16 @rotr_i16(i16 %x, i16 %z) {
|
||||||
; CHECK-NEXT: clrlwi 5, 5, 28
|
; CHECK-NEXT: clrlwi 5, 5, 28
|
||||||
; CHECK-NEXT: srw 4, 6, 4
|
; CHECK-NEXT: srw 4, 6, 4
|
||||||
; CHECK-NEXT: slw 3, 3, 5
|
; CHECK-NEXT: slw 3, 3, 5
|
||||||
; CHECK-NEXT: or 3, 3, 4
|
; CHECK-NEXT: or 3, 4, 3
|
||||||
; CHECK-NEXT: blr
|
; CHECK-NEXT: blr
|
||||||
%f = call i16 @llvm.fshr.i16(i16 %x, i16 %x, i16 %z)
|
%f = call i16 @llvm.fshr.i16(i16 %x, i16 %x, i16 %z)
|
||||||
ret i16 %f
|
ret i16 %f
|
||||||
|
|
|
@ -47,21 +47,20 @@ define i37 @fshl_i37(i37 %x, i37 %y, i37 %z) {
|
||||||
; CHECK-LABEL: fshl_i37:
|
; CHECK-LABEL: fshl_i37:
|
||||||
; CHECK: # %bb.0:
|
; CHECK: # %bb.0:
|
||||||
; CHECK-NEXT: lis 6, -8857
|
; CHECK-NEXT: lis 6, -8857
|
||||||
; CHECK-NEXT: clrldi 5, 5, 27
|
; CHECK-NEXT: sldi 4, 4, 27
|
||||||
; CHECK-NEXT: ori 6, 6, 51366
|
; CHECK-NEXT: ori 6, 6, 51366
|
||||||
; CHECK-NEXT: clrldi 4, 4, 27
|
|
||||||
; CHECK-NEXT: sldi 6, 6, 32
|
; CHECK-NEXT: sldi 6, 6, 32
|
||||||
; CHECK-NEXT: oris 6, 6, 3542
|
; CHECK-NEXT: oris 6, 6, 3542
|
||||||
; CHECK-NEXT: ori 6, 6, 31883
|
; CHECK-NEXT: ori 6, 6, 31883
|
||||||
; CHECK-NEXT: mulhdu 6, 5, 6
|
; CHECK-NEXT: mulhdu 6, 5, 6
|
||||||
; CHECK-NEXT: rldicl 6, 6, 59, 5
|
; CHECK-NEXT: rldicl 6, 6, 59, 5
|
||||||
; CHECK-NEXT: mulli 6, 6, 37
|
; CHECK-NEXT: mulli 6, 6, 37
|
||||||
; CHECK-NEXT: sub. 5, 5, 6
|
; CHECK-NEXT: sub 5, 5, 6
|
||||||
; CHECK-NEXT: subfic 6, 5, 37
|
; CHECK-NEXT: clrlwi 5, 5, 26
|
||||||
; CHECK-NEXT: sld 5, 3, 5
|
; CHECK-NEXT: subfic 6, 5, 64
|
||||||
|
; CHECK-NEXT: sld 3, 3, 5
|
||||||
; CHECK-NEXT: srd 4, 4, 6
|
; CHECK-NEXT: srd 4, 4, 6
|
||||||
; CHECK-NEXT: or 4, 5, 4
|
; CHECK-NEXT: or 3, 3, 4
|
||||||
; CHECK-NEXT: iseleq 3, 3, 4
|
|
||||||
; CHECK-NEXT: blr
|
; CHECK-NEXT: blr
|
||||||
%f = call i37 @llvm.fshl.i37(i37 %x, i37 %y, i37 %z)
|
%f = call i37 @llvm.fshl.i37(i37 %x, i37 %y, i37 %z)
|
||||||
ret i37 %f
|
ret i37 %f
|
||||||
|
@ -165,7 +164,7 @@ define i37 @fshr_i37(i37 %x, i37 %y, i37 %z) {
|
||||||
; CHECK-LABEL: fshr_i37:
|
; CHECK-LABEL: fshr_i37:
|
||||||
; CHECK: # %bb.0:
|
; CHECK: # %bb.0:
|
||||||
; CHECK-NEXT: lis 6, -8857
|
; CHECK-NEXT: lis 6, -8857
|
||||||
; CHECK-NEXT: clrldi 5, 5, 27
|
; CHECK-NEXT: sldi 4, 4, 27
|
||||||
; CHECK-NEXT: ori 6, 6, 51366
|
; CHECK-NEXT: ori 6, 6, 51366
|
||||||
; CHECK-NEXT: sldi 6, 6, 32
|
; CHECK-NEXT: sldi 6, 6, 32
|
||||||
; CHECK-NEXT: oris 6, 6, 3542
|
; CHECK-NEXT: oris 6, 6, 3542
|
||||||
|
@ -173,13 +172,13 @@ define i37 @fshr_i37(i37 %x, i37 %y, i37 %z) {
|
||||||
; CHECK-NEXT: mulhdu 6, 5, 6
|
; CHECK-NEXT: mulhdu 6, 5, 6
|
||||||
; CHECK-NEXT: rldicl 6, 6, 59, 5
|
; CHECK-NEXT: rldicl 6, 6, 59, 5
|
||||||
; CHECK-NEXT: mulli 6, 6, 37
|
; CHECK-NEXT: mulli 6, 6, 37
|
||||||
; CHECK-NEXT: sub. 5, 5, 6
|
; CHECK-NEXT: sub 5, 5, 6
|
||||||
; CHECK-NEXT: clrldi 6, 4, 27
|
; CHECK-NEXT: addi 5, 5, 27
|
||||||
; CHECK-NEXT: subfic 7, 5, 37
|
; CHECK-NEXT: clrlwi 5, 5, 26
|
||||||
; CHECK-NEXT: srd 5, 6, 5
|
; CHECK-NEXT: subfic 6, 5, 64
|
||||||
; CHECK-NEXT: sld 3, 3, 7
|
; CHECK-NEXT: srd 4, 4, 5
|
||||||
; CHECK-NEXT: or 3, 3, 5
|
; CHECK-NEXT: sld 3, 3, 6
|
||||||
; CHECK-NEXT: iseleq 3, 4, 3
|
; CHECK-NEXT: or 3, 3, 4
|
||||||
; CHECK-NEXT: blr
|
; CHECK-NEXT: blr
|
||||||
%f = call i37 @llvm.fshr.i37(i37 %x, i37 %y, i37 %z)
|
%f = call i37 @llvm.fshr.i37(i37 %x, i37 %y, i37 %z)
|
||||||
ret i37 %f
|
ret i37 %f
|
||||||
|
|
|
@ -411,7 +411,7 @@ define i32 @ror_i32(i32 %a, i32 %b) nounwind {
|
||||||
; RV32I-NEXT: srl a2, a0, a1
|
; RV32I-NEXT: srl a2, a0, a1
|
||||||
; RV32I-NEXT: neg a1, a1
|
; RV32I-NEXT: neg a1, a1
|
||||||
; RV32I-NEXT: sll a0, a0, a1
|
; RV32I-NEXT: sll a0, a0, a1
|
||||||
; RV32I-NEXT: or a0, a0, a2
|
; RV32I-NEXT: or a0, a2, a0
|
||||||
; RV32I-NEXT: ret
|
; RV32I-NEXT: ret
|
||||||
;
|
;
|
||||||
; RV32IB-LABEL: ror_i32:
|
; RV32IB-LABEL: ror_i32:
|
||||||
|
@ -469,21 +469,21 @@ define i64 @ror_i64(i64 %a, i64 %b) nounwind {
|
||||||
; RV32I-NEXT: srli a0, a0, 1
|
; RV32I-NEXT: srli a0, a0, 1
|
||||||
; RV32I-NEXT: srl a0, a0, a4
|
; RV32I-NEXT: srl a0, a0, a4
|
||||||
; RV32I-NEXT: or a4, a3, a0
|
; RV32I-NEXT: or a4, a3, a0
|
||||||
; RV32I-NEXT: or a0, t0, a7
|
; RV32I-NEXT: or a0, a7, t0
|
||||||
; RV32I-NEXT: bgez t1, .LBB9_9
|
; RV32I-NEXT: bgez t1, .LBB9_9
|
||||||
; RV32I-NEXT: .LBB9_6:
|
; RV32I-NEXT: .LBB9_6:
|
||||||
; RV32I-NEXT: srl a1, a1, a2
|
; RV32I-NEXT: srl a1, a1, a2
|
||||||
; RV32I-NEXT: or a1, a4, a1
|
; RV32I-NEXT: or a1, a1, a4
|
||||||
; RV32I-NEXT: ret
|
; RV32I-NEXT: ret
|
||||||
; RV32I-NEXT: .LBB9_7:
|
; RV32I-NEXT: .LBB9_7:
|
||||||
; RV32I-NEXT: sll t0, a0, a4
|
; RV32I-NEXT: sll t0, a0, a4
|
||||||
; RV32I-NEXT: bltz a3, .LBB9_5
|
; RV32I-NEXT: bltz a3, .LBB9_5
|
||||||
; RV32I-NEXT: .LBB9_8:
|
; RV32I-NEXT: .LBB9_8:
|
||||||
; RV32I-NEXT: sll a4, a0, a3
|
; RV32I-NEXT: sll a4, a0, a3
|
||||||
; RV32I-NEXT: or a0, t0, a7
|
; RV32I-NEXT: or a0, a7, t0
|
||||||
; RV32I-NEXT: bltz t1, .LBB9_6
|
; RV32I-NEXT: bltz t1, .LBB9_6
|
||||||
; RV32I-NEXT: .LBB9_9:
|
; RV32I-NEXT: .LBB9_9:
|
||||||
; RV32I-NEXT: or a1, a4, zero
|
; RV32I-NEXT: or a1, zero, a4
|
||||||
; RV32I-NEXT: ret
|
; RV32I-NEXT: ret
|
||||||
;
|
;
|
||||||
; RV32IB-LABEL: ror_i64:
|
; RV32IB-LABEL: ror_i64:
|
||||||
|
@ -515,21 +515,21 @@ define i64 @ror_i64(i64 %a, i64 %b) nounwind {
|
||||||
; RV32IB-NEXT: srli a0, a0, 1
|
; RV32IB-NEXT: srli a0, a0, 1
|
||||||
; RV32IB-NEXT: srl a0, a0, a4
|
; RV32IB-NEXT: srl a0, a0, a4
|
||||||
; RV32IB-NEXT: or a4, a3, a0
|
; RV32IB-NEXT: or a4, a3, a0
|
||||||
; RV32IB-NEXT: or a0, t0, a7
|
; RV32IB-NEXT: or a0, a7, t0
|
||||||
; RV32IB-NEXT: bgez t1, .LBB9_9
|
; RV32IB-NEXT: bgez t1, .LBB9_9
|
||||||
; RV32IB-NEXT: .LBB9_6:
|
; RV32IB-NEXT: .LBB9_6:
|
||||||
; RV32IB-NEXT: srl a1, a1, a2
|
; RV32IB-NEXT: srl a1, a1, a2
|
||||||
; RV32IB-NEXT: or a1, a4, a1
|
; RV32IB-NEXT: or a1, a1, a4
|
||||||
; RV32IB-NEXT: ret
|
; RV32IB-NEXT: ret
|
||||||
; RV32IB-NEXT: .LBB9_7:
|
; RV32IB-NEXT: .LBB9_7:
|
||||||
; RV32IB-NEXT: sll t0, a0, a4
|
; RV32IB-NEXT: sll t0, a0, a4
|
||||||
; RV32IB-NEXT: bltz a3, .LBB9_5
|
; RV32IB-NEXT: bltz a3, .LBB9_5
|
||||||
; RV32IB-NEXT: .LBB9_8:
|
; RV32IB-NEXT: .LBB9_8:
|
||||||
; RV32IB-NEXT: sll a4, a0, a3
|
; RV32IB-NEXT: sll a4, a0, a3
|
||||||
; RV32IB-NEXT: or a0, t0, a7
|
; RV32IB-NEXT: or a0, a7, t0
|
||||||
; RV32IB-NEXT: bltz t1, .LBB9_6
|
; RV32IB-NEXT: bltz t1, .LBB9_6
|
||||||
; RV32IB-NEXT: .LBB9_9:
|
; RV32IB-NEXT: .LBB9_9:
|
||||||
; RV32IB-NEXT: or a1, a4, zero
|
; RV32IB-NEXT: or a1, zero, a4
|
||||||
; RV32IB-NEXT: ret
|
; RV32IB-NEXT: ret
|
||||||
;
|
;
|
||||||
; RV32IBB-LABEL: ror_i64:
|
; RV32IBB-LABEL: ror_i64:
|
||||||
|
@ -561,21 +561,21 @@ define i64 @ror_i64(i64 %a, i64 %b) nounwind {
|
||||||
; RV32IBB-NEXT: srli a0, a0, 1
|
; RV32IBB-NEXT: srli a0, a0, 1
|
||||||
; RV32IBB-NEXT: srl a0, a0, a4
|
; RV32IBB-NEXT: srl a0, a0, a4
|
||||||
; RV32IBB-NEXT: or a4, a3, a0
|
; RV32IBB-NEXT: or a4, a3, a0
|
||||||
; RV32IBB-NEXT: or a0, t0, a7
|
; RV32IBB-NEXT: or a0, a7, t0
|
||||||
; RV32IBB-NEXT: bgez t1, .LBB9_9
|
; RV32IBB-NEXT: bgez t1, .LBB9_9
|
||||||
; RV32IBB-NEXT: .LBB9_6:
|
; RV32IBB-NEXT: .LBB9_6:
|
||||||
; RV32IBB-NEXT: srl a1, a1, a2
|
; RV32IBB-NEXT: srl a1, a1, a2
|
||||||
; RV32IBB-NEXT: or a1, a4, a1
|
; RV32IBB-NEXT: or a1, a1, a4
|
||||||
; RV32IBB-NEXT: ret
|
; RV32IBB-NEXT: ret
|
||||||
; RV32IBB-NEXT: .LBB9_7:
|
; RV32IBB-NEXT: .LBB9_7:
|
||||||
; RV32IBB-NEXT: sll t0, a0, a4
|
; RV32IBB-NEXT: sll t0, a0, a4
|
||||||
; RV32IBB-NEXT: bltz a3, .LBB9_5
|
; RV32IBB-NEXT: bltz a3, .LBB9_5
|
||||||
; RV32IBB-NEXT: .LBB9_8:
|
; RV32IBB-NEXT: .LBB9_8:
|
||||||
; RV32IBB-NEXT: sll a4, a0, a3
|
; RV32IBB-NEXT: sll a4, a0, a3
|
||||||
; RV32IBB-NEXT: or a0, t0, a7
|
; RV32IBB-NEXT: or a0, a7, t0
|
||||||
; RV32IBB-NEXT: bltz t1, .LBB9_6
|
; RV32IBB-NEXT: bltz t1, .LBB9_6
|
||||||
; RV32IBB-NEXT: .LBB9_9:
|
; RV32IBB-NEXT: .LBB9_9:
|
||||||
; RV32IBB-NEXT: or a1, a4, zero
|
; RV32IBB-NEXT: or a1, zero, a4
|
||||||
; RV32IBB-NEXT: ret
|
; RV32IBB-NEXT: ret
|
||||||
;
|
;
|
||||||
; RV32IBP-LABEL: ror_i64:
|
; RV32IBP-LABEL: ror_i64:
|
||||||
|
@ -607,21 +607,21 @@ define i64 @ror_i64(i64 %a, i64 %b) nounwind {
|
||||||
; RV32IBP-NEXT: srli a0, a0, 1
|
; RV32IBP-NEXT: srli a0, a0, 1
|
||||||
; RV32IBP-NEXT: srl a0, a0, a4
|
; RV32IBP-NEXT: srl a0, a0, a4
|
||||||
; RV32IBP-NEXT: or a4, a3, a0
|
; RV32IBP-NEXT: or a4, a3, a0
|
||||||
; RV32IBP-NEXT: or a0, t0, a7
|
; RV32IBP-NEXT: or a0, a7, t0
|
||||||
; RV32IBP-NEXT: bgez t1, .LBB9_9
|
; RV32IBP-NEXT: bgez t1, .LBB9_9
|
||||||
; RV32IBP-NEXT: .LBB9_6:
|
; RV32IBP-NEXT: .LBB9_6:
|
||||||
; RV32IBP-NEXT: srl a1, a1, a2
|
; RV32IBP-NEXT: srl a1, a1, a2
|
||||||
; RV32IBP-NEXT: or a1, a4, a1
|
; RV32IBP-NEXT: or a1, a1, a4
|
||||||
; RV32IBP-NEXT: ret
|
; RV32IBP-NEXT: ret
|
||||||
; RV32IBP-NEXT: .LBB9_7:
|
; RV32IBP-NEXT: .LBB9_7:
|
||||||
; RV32IBP-NEXT: sll t0, a0, a4
|
; RV32IBP-NEXT: sll t0, a0, a4
|
||||||
; RV32IBP-NEXT: bltz a3, .LBB9_5
|
; RV32IBP-NEXT: bltz a3, .LBB9_5
|
||||||
; RV32IBP-NEXT: .LBB9_8:
|
; RV32IBP-NEXT: .LBB9_8:
|
||||||
; RV32IBP-NEXT: sll a4, a0, a3
|
; RV32IBP-NEXT: sll a4, a0, a3
|
||||||
; RV32IBP-NEXT: or a0, t0, a7
|
; RV32IBP-NEXT: or a0, a7, t0
|
||||||
; RV32IBP-NEXT: bltz t1, .LBB9_6
|
; RV32IBP-NEXT: bltz t1, .LBB9_6
|
||||||
; RV32IBP-NEXT: .LBB9_9:
|
; RV32IBP-NEXT: .LBB9_9:
|
||||||
; RV32IBP-NEXT: or a1, a4, zero
|
; RV32IBP-NEXT: or a1, zero, a4
|
||||||
; RV32IBP-NEXT: ret
|
; RV32IBP-NEXT: ret
|
||||||
%or = tail call i64 @llvm.fshr.i64(i64 %a, i64 %a, i64 %b)
|
%or = tail call i64 @llvm.fshr.i64(i64 %a, i64 %a, i64 %b)
|
||||||
ret i64 %or
|
ret i64 %or
|
||||||
|
|
|
@ -122,15 +122,11 @@ declare i32 @llvm.fshl.i32(i32, i32, i32)
|
||||||
define i32 @fshl_i32(i32 %a, i32 %b, i32 %c) nounwind {
|
define i32 @fshl_i32(i32 %a, i32 %b, i32 %c) nounwind {
|
||||||
; RV32I-LABEL: fshl_i32:
|
; RV32I-LABEL: fshl_i32:
|
||||||
; RV32I: # %bb.0:
|
; RV32I: # %bb.0:
|
||||||
; RV32I-NEXT: andi a3, a2, 31
|
|
||||||
; RV32I-NEXT: beqz a3, .LBB4_2
|
|
||||||
; RV32I-NEXT: # %bb.1:
|
|
||||||
; RV32I-NEXT: sll a0, a0, a2
|
; RV32I-NEXT: sll a0, a0, a2
|
||||||
; RV32I-NEXT: addi a2, zero, 32
|
; RV32I-NEXT: not a2, a2
|
||||||
; RV32I-NEXT: sub a2, a2, a3
|
; RV32I-NEXT: srli a1, a1, 1
|
||||||
; RV32I-NEXT: srl a1, a1, a2
|
; RV32I-NEXT: srl a1, a1, a2
|
||||||
; RV32I-NEXT: or a0, a0, a1
|
; RV32I-NEXT: or a0, a0, a1
|
||||||
; RV32I-NEXT: .LBB4_2:
|
|
||||||
; RV32I-NEXT: ret
|
; RV32I-NEXT: ret
|
||||||
;
|
;
|
||||||
; RV32IB-LABEL: fshl_i32:
|
; RV32IB-LABEL: fshl_i32:
|
||||||
|
@ -157,158 +153,149 @@ declare i64 @llvm.fshl.i64(i64, i64, i64)
|
||||||
define i64 @fshl_i64(i64 %a, i64 %b, i64 %c) nounwind {
|
define i64 @fshl_i64(i64 %a, i64 %b, i64 %c) nounwind {
|
||||||
; RV32I-LABEL: fshl_i64:
|
; RV32I-LABEL: fshl_i64:
|
||||||
; RV32I: # %bb.0:
|
; RV32I: # %bb.0:
|
||||||
; RV32I-NEXT: andi t1, a4, 63
|
; RV32I-NEXT: andi a5, a4, 63
|
||||||
; RV32I-NEXT: addi a6, t1, -32
|
; RV32I-NEXT: addi t1, a5, -32
|
||||||
; RV32I-NEXT: addi a7, zero, 31
|
; RV32I-NEXT: addi a6, zero, 31
|
||||||
; RV32I-NEXT: bltz a6, .LBB5_2
|
; RV32I-NEXT: bltz t1, .LBB5_2
|
||||||
; RV32I-NEXT: # %bb.1:
|
; RV32I-NEXT: # %bb.1:
|
||||||
; RV32I-NEXT: sll t0, a0, a6
|
; RV32I-NEXT: sll a7, a0, t1
|
||||||
; RV32I-NEXT: j .LBB5_3
|
; RV32I-NEXT: j .LBB5_3
|
||||||
; RV32I-NEXT: .LBB5_2:
|
; RV32I-NEXT: .LBB5_2:
|
||||||
; RV32I-NEXT: sll t0, a1, a4
|
; RV32I-NEXT: sll a7, a1, a4
|
||||||
; RV32I-NEXT: sub t2, a7, t1
|
; RV32I-NEXT: sub a5, a6, a5
|
||||||
; RV32I-NEXT: srli a5, a0, 1
|
; RV32I-NEXT: srli a1, a0, 1
|
||||||
; RV32I-NEXT: srl a5, a5, t2
|
; RV32I-NEXT: srl a1, a1, a5
|
||||||
; RV32I-NEXT: or t0, t0, a5
|
; RV32I-NEXT: or a7, a7, a1
|
||||||
; RV32I-NEXT: .LBB5_3:
|
; RV32I-NEXT: .LBB5_3:
|
||||||
; RV32I-NEXT: addi a5, zero, 32
|
; RV32I-NEXT: not a1, a4
|
||||||
; RV32I-NEXT: sub t4, a5, t1
|
; RV32I-NEXT: andi t3, a1, 63
|
||||||
; RV32I-NEXT: addi a5, zero, 64
|
; RV32I-NEXT: addi a5, t3, -32
|
||||||
; RV32I-NEXT: sub t2, a5, t1
|
; RV32I-NEXT: srli t2, a3, 1
|
||||||
; RV32I-NEXT: bltz t4, .LBB5_5
|
; RV32I-NEXT: bltz a5, .LBB5_7
|
||||||
; RV32I-NEXT: # %bb.4:
|
; RV32I-NEXT: # %bb.4:
|
||||||
; RV32I-NEXT: mv t3, zero
|
; RV32I-NEXT: mv t0, zero
|
||||||
; RV32I-NEXT: bnez t1, .LBB5_6
|
; RV32I-NEXT: bgez a5, .LBB5_8
|
||||||
; RV32I-NEXT: j .LBB5_7
|
|
||||||
; RV32I-NEXT: .LBB5_5:
|
; RV32I-NEXT: .LBB5_5:
|
||||||
; RV32I-NEXT: srl t3, a3, t2
|
; RV32I-NEXT: slli a3, a3, 31
|
||||||
; RV32I-NEXT: beqz t1, .LBB5_7
|
; RV32I-NEXT: srli a2, a2, 1
|
||||||
; RV32I-NEXT: .LBB5_6:
|
|
||||||
; RV32I-NEXT: or a1, t0, t3
|
|
||||||
; RV32I-NEXT: .LBB5_7:
|
|
||||||
; RV32I-NEXT: bltz t4, .LBB5_10
|
|
||||||
; RV32I-NEXT: # %bb.8:
|
|
||||||
; RV32I-NEXT: srl a2, a3, t4
|
|
||||||
; RV32I-NEXT: bgez a6, .LBB5_11
|
|
||||||
; RV32I-NEXT: .LBB5_9:
|
|
||||||
; RV32I-NEXT: sll a3, a0, a4
|
|
||||||
; RV32I-NEXT: bnez t1, .LBB5_12
|
|
||||||
; RV32I-NEXT: j .LBB5_13
|
|
||||||
; RV32I-NEXT: .LBB5_10:
|
|
||||||
; RV32I-NEXT: srl a2, a2, t2
|
|
||||||
; RV32I-NEXT: sub a5, a7, t2
|
|
||||||
; RV32I-NEXT: slli a3, a3, 1
|
|
||||||
; RV32I-NEXT: sll a3, a3, a5
|
|
||||||
; RV32I-NEXT: or a2, a2, a3
|
; RV32I-NEXT: or a2, a2, a3
|
||||||
; RV32I-NEXT: bltz a6, .LBB5_9
|
; RV32I-NEXT: srl a1, a2, a1
|
||||||
; RV32I-NEXT: .LBB5_11:
|
; RV32I-NEXT: sub a2, a6, t3
|
||||||
; RV32I-NEXT: mv a3, zero
|
; RV32I-NEXT: slli a3, t2, 1
|
||||||
; RV32I-NEXT: beqz t1, .LBB5_13
|
; RV32I-NEXT: sll a2, a3, a2
|
||||||
; RV32I-NEXT: .LBB5_12:
|
; RV32I-NEXT: or a2, a1, a2
|
||||||
; RV32I-NEXT: or a0, a3, a2
|
; RV32I-NEXT: or a1, a7, t0
|
||||||
; RV32I-NEXT: .LBB5_13:
|
; RV32I-NEXT: bgez t1, .LBB5_9
|
||||||
|
; RV32I-NEXT: .LBB5_6:
|
||||||
|
; RV32I-NEXT: sll a0, a0, a4
|
||||||
|
; RV32I-NEXT: or a0, a0, a2
|
||||||
|
; RV32I-NEXT: ret
|
||||||
|
; RV32I-NEXT: .LBB5_7:
|
||||||
|
; RV32I-NEXT: srl t0, t2, a1
|
||||||
|
; RV32I-NEXT: bltz a5, .LBB5_5
|
||||||
|
; RV32I-NEXT: .LBB5_8:
|
||||||
|
; RV32I-NEXT: srl a2, t2, a5
|
||||||
|
; RV32I-NEXT: or a1, a7, t0
|
||||||
|
; RV32I-NEXT: bltz t1, .LBB5_6
|
||||||
|
; RV32I-NEXT: .LBB5_9:
|
||||||
|
; RV32I-NEXT: or a0, zero, a2
|
||||||
; RV32I-NEXT: ret
|
; RV32I-NEXT: ret
|
||||||
;
|
;
|
||||||
; RV32IB-LABEL: fshl_i64:
|
; RV32IB-LABEL: fshl_i64:
|
||||||
; RV32IB: # %bb.0:
|
; RV32IB: # %bb.0:
|
||||||
; RV32IB-NEXT: andi t1, a4, 63
|
; RV32IB-NEXT: andi a5, a4, 63
|
||||||
; RV32IB-NEXT: addi a6, t1, -32
|
; RV32IB-NEXT: addi t2, a5, -32
|
||||||
; RV32IB-NEXT: addi a7, zero, 31
|
; RV32IB-NEXT: addi a6, zero, 31
|
||||||
; RV32IB-NEXT: bltz a6, .LBB5_2
|
; RV32IB-NEXT: bltz t2, .LBB5_2
|
||||||
; RV32IB-NEXT: # %bb.1:
|
; RV32IB-NEXT: # %bb.1:
|
||||||
; RV32IB-NEXT: sll t0, a0, a6
|
; RV32IB-NEXT: sll a7, a0, t2
|
||||||
; RV32IB-NEXT: j .LBB5_3
|
; RV32IB-NEXT: j .LBB5_3
|
||||||
; RV32IB-NEXT: .LBB5_2:
|
; RV32IB-NEXT: .LBB5_2:
|
||||||
; RV32IB-NEXT: sll t0, a1, a4
|
; RV32IB-NEXT: sll a7, a1, a4
|
||||||
; RV32IB-NEXT: sub t2, a7, t1
|
; RV32IB-NEXT: sub a5, a6, a5
|
||||||
; RV32IB-NEXT: srli a5, a0, 1
|
; RV32IB-NEXT: srli a1, a0, 1
|
||||||
; RV32IB-NEXT: srl a5, a5, t2
|
; RV32IB-NEXT: srl a1, a1, a5
|
||||||
; RV32IB-NEXT: or t0, t0, a5
|
; RV32IB-NEXT: or a7, a7, a1
|
||||||
; RV32IB-NEXT: .LBB5_3:
|
; RV32IB-NEXT: .LBB5_3:
|
||||||
; RV32IB-NEXT: addi a5, zero, 32
|
; RV32IB-NEXT: not t1, a4
|
||||||
; RV32IB-NEXT: sub t4, a5, t1
|
; RV32IB-NEXT: addi a1, zero, 63
|
||||||
; RV32IB-NEXT: addi a5, zero, 64
|
; RV32IB-NEXT: andn a5, a1, a4
|
||||||
; RV32IB-NEXT: sub t2, a5, t1
|
; RV32IB-NEXT: addi a1, a5, -32
|
||||||
; RV32IB-NEXT: bltz t4, .LBB5_7
|
; RV32IB-NEXT: srli t3, a3, 1
|
||||||
|
; RV32IB-NEXT: bltz a1, .LBB5_7
|
||||||
; RV32IB-NEXT: # %bb.4:
|
; RV32IB-NEXT: # %bb.4:
|
||||||
; RV32IB-NEXT: mv t3, zero
|
; RV32IB-NEXT: mv t0, zero
|
||||||
; RV32IB-NEXT: or t0, t0, t3
|
; RV32IB-NEXT: bgez a1, .LBB5_8
|
||||||
; RV32IB-NEXT: bgez t4, .LBB5_8
|
|
||||||
; RV32IB-NEXT: .LBB5_5:
|
; RV32IB-NEXT: .LBB5_5:
|
||||||
; RV32IB-NEXT: srl a2, a2, t2
|
; RV32IB-NEXT: fsl a1, a3, a6, a2
|
||||||
; RV32IB-NEXT: sub a5, a7, t2
|
; RV32IB-NEXT: srl a1, a1, t1
|
||||||
; RV32IB-NEXT: slli a3, a3, 1
|
; RV32IB-NEXT: sub a2, a6, a5
|
||||||
; RV32IB-NEXT: sll a3, a3, a5
|
; RV32IB-NEXT: slli a3, t3, 1
|
||||||
; RV32IB-NEXT: or a2, a2, a3
|
; RV32IB-NEXT: sll a2, a3, a2
|
||||||
; RV32IB-NEXT: cmov a1, t1, t0, a1
|
; RV32IB-NEXT: or a2, a1, a2
|
||||||
; RV32IB-NEXT: bgez a6, .LBB5_9
|
; RV32IB-NEXT: or a1, a7, t0
|
||||||
|
; RV32IB-NEXT: bgez t2, .LBB5_9
|
||||||
; RV32IB-NEXT: .LBB5_6:
|
; RV32IB-NEXT: .LBB5_6:
|
||||||
; RV32IB-NEXT: sll a3, a0, a4
|
; RV32IB-NEXT: sll a0, a0, a4
|
||||||
; RV32IB-NEXT: j .LBB5_10
|
; RV32IB-NEXT: or a0, a0, a2
|
||||||
|
; RV32IB-NEXT: ret
|
||||||
; RV32IB-NEXT: .LBB5_7:
|
; RV32IB-NEXT: .LBB5_7:
|
||||||
; RV32IB-NEXT: srl t3, a3, t2
|
; RV32IB-NEXT: srl t0, t3, t1
|
||||||
; RV32IB-NEXT: or t0, t0, t3
|
; RV32IB-NEXT: bltz a1, .LBB5_5
|
||||||
; RV32IB-NEXT: bltz t4, .LBB5_5
|
|
||||||
; RV32IB-NEXT: .LBB5_8:
|
; RV32IB-NEXT: .LBB5_8:
|
||||||
; RV32IB-NEXT: srl a2, a3, t4
|
; RV32IB-NEXT: srl a2, t3, a1
|
||||||
; RV32IB-NEXT: cmov a1, t1, t0, a1
|
; RV32IB-NEXT: or a1, a7, t0
|
||||||
; RV32IB-NEXT: bltz a6, .LBB5_6
|
; RV32IB-NEXT: bltz t2, .LBB5_6
|
||||||
; RV32IB-NEXT: .LBB5_9:
|
; RV32IB-NEXT: .LBB5_9:
|
||||||
; RV32IB-NEXT: mv a3, zero
|
; RV32IB-NEXT: or a0, zero, a2
|
||||||
; RV32IB-NEXT: .LBB5_10:
|
|
||||||
; RV32IB-NEXT: or a2, a3, a2
|
|
||||||
; RV32IB-NEXT: cmov a0, t1, a2, a0
|
|
||||||
; RV32IB-NEXT: ret
|
; RV32IB-NEXT: ret
|
||||||
;
|
;
|
||||||
; RV32IBT-LABEL: fshl_i64:
|
; RV32IBT-LABEL: fshl_i64:
|
||||||
; RV32IBT: # %bb.0:
|
; RV32IBT: # %bb.0:
|
||||||
; RV32IBT-NEXT: andi t1, a4, 63
|
; RV32IBT-NEXT: andi a5, a4, 63
|
||||||
; RV32IBT-NEXT: addi a6, t1, -32
|
; RV32IBT-NEXT: addi t1, a5, -32
|
||||||
; RV32IBT-NEXT: addi a7, zero, 31
|
; RV32IBT-NEXT: addi a6, zero, 31
|
||||||
; RV32IBT-NEXT: bltz a6, .LBB5_2
|
; RV32IBT-NEXT: bltz t1, .LBB5_2
|
||||||
; RV32IBT-NEXT: # %bb.1:
|
; RV32IBT-NEXT: # %bb.1:
|
||||||
; RV32IBT-NEXT: sll t0, a0, a6
|
; RV32IBT-NEXT: sll a7, a0, t1
|
||||||
; RV32IBT-NEXT: j .LBB5_3
|
; RV32IBT-NEXT: j .LBB5_3
|
||||||
; RV32IBT-NEXT: .LBB5_2:
|
; RV32IBT-NEXT: .LBB5_2:
|
||||||
; RV32IBT-NEXT: sll t0, a1, a4
|
; RV32IBT-NEXT: sll a7, a1, a4
|
||||||
; RV32IBT-NEXT: sub t2, a7, t1
|
; RV32IBT-NEXT: sub a5, a6, a5
|
||||||
; RV32IBT-NEXT: srli a5, a0, 1
|
; RV32IBT-NEXT: srli a1, a0, 1
|
||||||
; RV32IBT-NEXT: srl a5, a5, t2
|
; RV32IBT-NEXT: srl a1, a1, a5
|
||||||
; RV32IBT-NEXT: or t0, t0, a5
|
; RV32IBT-NEXT: or a7, a7, a1
|
||||||
; RV32IBT-NEXT: .LBB5_3:
|
; RV32IBT-NEXT: .LBB5_3:
|
||||||
; RV32IBT-NEXT: addi a5, zero, 32
|
; RV32IBT-NEXT: not a1, a4
|
||||||
; RV32IBT-NEXT: sub t4, a5, t1
|
; RV32IBT-NEXT: andi t3, a1, 63
|
||||||
; RV32IBT-NEXT: addi a5, zero, 64
|
; RV32IBT-NEXT: addi a5, t3, -32
|
||||||
; RV32IBT-NEXT: sub t2, a5, t1
|
; RV32IBT-NEXT: srli t2, a3, 1
|
||||||
; RV32IBT-NEXT: bltz t4, .LBB5_7
|
; RV32IBT-NEXT: bltz a5, .LBB5_7
|
||||||
; RV32IBT-NEXT: # %bb.4:
|
; RV32IBT-NEXT: # %bb.4:
|
||||||
; RV32IBT-NEXT: mv t3, zero
|
; RV32IBT-NEXT: mv t0, zero
|
||||||
; RV32IBT-NEXT: or t0, t0, t3
|
; RV32IBT-NEXT: bgez a5, .LBB5_8
|
||||||
; RV32IBT-NEXT: bgez t4, .LBB5_8
|
|
||||||
; RV32IBT-NEXT: .LBB5_5:
|
; RV32IBT-NEXT: .LBB5_5:
|
||||||
; RV32IBT-NEXT: srl a2, a2, t2
|
; RV32IBT-NEXT: fsl a2, a3, a6, a2
|
||||||
; RV32IBT-NEXT: sub a5, a7, t2
|
; RV32IBT-NEXT: srl a1, a2, a1
|
||||||
; RV32IBT-NEXT: slli a3, a3, 1
|
; RV32IBT-NEXT: sub a2, a6, t3
|
||||||
; RV32IBT-NEXT: sll a3, a3, a5
|
; RV32IBT-NEXT: slli a3, t2, 1
|
||||||
; RV32IBT-NEXT: or a2, a2, a3
|
; RV32IBT-NEXT: sll a2, a3, a2
|
||||||
; RV32IBT-NEXT: cmov a1, t1, t0, a1
|
; RV32IBT-NEXT: or a2, a1, a2
|
||||||
; RV32IBT-NEXT: bgez a6, .LBB5_9
|
; RV32IBT-NEXT: or a1, a7, t0
|
||||||
|
; RV32IBT-NEXT: bgez t1, .LBB5_9
|
||||||
; RV32IBT-NEXT: .LBB5_6:
|
; RV32IBT-NEXT: .LBB5_6:
|
||||||
; RV32IBT-NEXT: sll a3, a0, a4
|
; RV32IBT-NEXT: sll a0, a0, a4
|
||||||
; RV32IBT-NEXT: j .LBB5_10
|
; RV32IBT-NEXT: or a0, a0, a2
|
||||||
|
; RV32IBT-NEXT: ret
|
||||||
; RV32IBT-NEXT: .LBB5_7:
|
; RV32IBT-NEXT: .LBB5_7:
|
||||||
; RV32IBT-NEXT: srl t3, a3, t2
|
; RV32IBT-NEXT: srl t0, t2, a1
|
||||||
; RV32IBT-NEXT: or t0, t0, t3
|
; RV32IBT-NEXT: bltz a5, .LBB5_5
|
||||||
; RV32IBT-NEXT: bltz t4, .LBB5_5
|
|
||||||
; RV32IBT-NEXT: .LBB5_8:
|
; RV32IBT-NEXT: .LBB5_8:
|
||||||
; RV32IBT-NEXT: srl a2, a3, t4
|
; RV32IBT-NEXT: srl a2, t2, a5
|
||||||
; RV32IBT-NEXT: cmov a1, t1, t0, a1
|
; RV32IBT-NEXT: or a1, a7, t0
|
||||||
; RV32IBT-NEXT: bltz a6, .LBB5_6
|
; RV32IBT-NEXT: bltz t1, .LBB5_6
|
||||||
; RV32IBT-NEXT: .LBB5_9:
|
; RV32IBT-NEXT: .LBB5_9:
|
||||||
; RV32IBT-NEXT: mv a3, zero
|
; RV32IBT-NEXT: or a0, zero, a2
|
||||||
; RV32IBT-NEXT: .LBB5_10:
|
|
||||||
; RV32IBT-NEXT: or a2, a3, a2
|
|
||||||
; RV32IBT-NEXT: cmov a0, t1, a2, a0
|
|
||||||
; RV32IBT-NEXT: ret
|
; RV32IBT-NEXT: ret
|
||||||
%1 = tail call i64 @llvm.fshl.i64(i64 %a, i64 %b, i64 %c)
|
%1 = tail call i64 @llvm.fshl.i64(i64 %a, i64 %b, i64 %c)
|
||||||
ret i64 %1
|
ret i64 %1
|
||||||
|
@ -319,16 +306,11 @@ declare i32 @llvm.fshr.i32(i32, i32, i32)
|
||||||
define i32 @fshr_i32(i32 %a, i32 %b, i32 %c) nounwind {
|
define i32 @fshr_i32(i32 %a, i32 %b, i32 %c) nounwind {
|
||||||
; RV32I-LABEL: fshr_i32:
|
; RV32I-LABEL: fshr_i32:
|
||||||
; RV32I: # %bb.0:
|
; RV32I: # %bb.0:
|
||||||
; RV32I-NEXT: andi a3, a2, 31
|
|
||||||
; RV32I-NEXT: beqz a3, .LBB6_2
|
|
||||||
; RV32I-NEXT: # %bb.1:
|
|
||||||
; RV32I-NEXT: srl a1, a1, a2
|
; RV32I-NEXT: srl a1, a1, a2
|
||||||
; RV32I-NEXT: addi a2, zero, 32
|
; RV32I-NEXT: not a2, a2
|
||||||
; RV32I-NEXT: sub a2, a2, a3
|
; RV32I-NEXT: slli a0, a0, 1
|
||||||
; RV32I-NEXT: sll a0, a0, a2
|
; RV32I-NEXT: sll a0, a0, a2
|
||||||
; RV32I-NEXT: or a1, a0, a1
|
; RV32I-NEXT: or a0, a0, a1
|
||||||
; RV32I-NEXT: .LBB6_2:
|
|
||||||
; RV32I-NEXT: mv a0, a1
|
|
||||||
; RV32I-NEXT: ret
|
; RV32I-NEXT: ret
|
||||||
;
|
;
|
||||||
; RV32IB-LABEL: fshr_i32:
|
; RV32IB-LABEL: fshr_i32:
|
||||||
|
@ -355,162 +337,157 @@ declare i64 @llvm.fshr.i64(i64, i64, i64)
|
||||||
define i64 @fshr_i64(i64 %a, i64 %b, i64 %c) nounwind {
|
define i64 @fshr_i64(i64 %a, i64 %b, i64 %c) nounwind {
|
||||||
; RV32I-LABEL: fshr_i64:
|
; RV32I-LABEL: fshr_i64:
|
||||||
; RV32I: # %bb.0:
|
; RV32I: # %bb.0:
|
||||||
; RV32I-NEXT: mv t1, a3
|
|
||||||
; RV32I-NEXT: mv a6, a2
|
|
||||||
; RV32I-NEXT: andi a5, a4, 63
|
; RV32I-NEXT: andi a5, a4, 63
|
||||||
; RV32I-NEXT: addi t2, a5, -32
|
; RV32I-NEXT: addi t1, a5, -32
|
||||||
; RV32I-NEXT: addi a7, zero, 31
|
; RV32I-NEXT: addi a6, zero, 31
|
||||||
; RV32I-NEXT: bltz t2, .LBB7_2
|
; RV32I-NEXT: bltz t1, .LBB7_2
|
||||||
; RV32I-NEXT: # %bb.1:
|
; RV32I-NEXT: # %bb.1:
|
||||||
; RV32I-NEXT: srl t0, t1, t2
|
; RV32I-NEXT: srl a7, a3, t1
|
||||||
; RV32I-NEXT: j .LBB7_3
|
; RV32I-NEXT: j .LBB7_3
|
||||||
; RV32I-NEXT: .LBB7_2:
|
; RV32I-NEXT: .LBB7_2:
|
||||||
; RV32I-NEXT: srl t0, a6, a4
|
; RV32I-NEXT: srl a7, a2, a4
|
||||||
; RV32I-NEXT: sub a3, a7, a5
|
; RV32I-NEXT: sub a5, a6, a5
|
||||||
; RV32I-NEXT: slli a2, t1, 1
|
; RV32I-NEXT: slli a2, a3, 1
|
||||||
; RV32I-NEXT: sll a2, a2, a3
|
; RV32I-NEXT: sll a2, a2, a5
|
||||||
; RV32I-NEXT: or t0, t0, a2
|
; RV32I-NEXT: or a7, a7, a2
|
||||||
; RV32I-NEXT: .LBB7_3:
|
; RV32I-NEXT: .LBB7_3:
|
||||||
; RV32I-NEXT: addi a2, zero, 32
|
; RV32I-NEXT: not a2, a4
|
||||||
; RV32I-NEXT: sub a3, a2, a5
|
; RV32I-NEXT: andi t2, a2, 63
|
||||||
; RV32I-NEXT: addi a2, zero, 64
|
; RV32I-NEXT: addi a5, t2, -32
|
||||||
; RV32I-NEXT: sub a2, a2, a5
|
; RV32I-NEXT: slli t3, a0, 1
|
||||||
; RV32I-NEXT: bltz a3, .LBB7_5
|
; RV32I-NEXT: bltz a5, .LBB7_7
|
||||||
; RV32I-NEXT: # %bb.4:
|
; RV32I-NEXT: # %bb.4:
|
||||||
; RV32I-NEXT: mv t3, zero
|
; RV32I-NEXT: mv t0, zero
|
||||||
; RV32I-NEXT: bnez a5, .LBB7_6
|
; RV32I-NEXT: bgez a5, .LBB7_8
|
||||||
; RV32I-NEXT: j .LBB7_7
|
|
||||||
; RV32I-NEXT: .LBB7_5:
|
; RV32I-NEXT: .LBB7_5:
|
||||||
; RV32I-NEXT: sll t3, a0, a2
|
; RV32I-NEXT: lui a5, 524288
|
||||||
; RV32I-NEXT: beqz a5, .LBB7_7
|
; RV32I-NEXT: addi a5, a5, -1
|
||||||
; RV32I-NEXT: .LBB7_6:
|
; RV32I-NEXT: and t3, a0, a5
|
||||||
; RV32I-NEXT: or a6, t3, t0
|
; RV32I-NEXT: sub a5, a6, t2
|
||||||
; RV32I-NEXT: .LBB7_7:
|
; RV32I-NEXT: srl a5, t3, a5
|
||||||
; RV32I-NEXT: bltz a3, .LBB7_10
|
; RV32I-NEXT: srli a0, a0, 31
|
||||||
; RV32I-NEXT: # %bb.8:
|
; RV32I-NEXT: slli a1, a1, 1
|
||||||
; RV32I-NEXT: sll a0, a0, a3
|
|
||||||
; RV32I-NEXT: bgez t2, .LBB7_11
|
|
||||||
; RV32I-NEXT: .LBB7_9:
|
|
||||||
; RV32I-NEXT: srl a1, t1, a4
|
|
||||||
; RV32I-NEXT: bnez a5, .LBB7_12
|
|
||||||
; RV32I-NEXT: j .LBB7_13
|
|
||||||
; RV32I-NEXT: .LBB7_10:
|
|
||||||
; RV32I-NEXT: sll a1, a1, a2
|
|
||||||
; RV32I-NEXT: sub a2, a7, a2
|
|
||||||
; RV32I-NEXT: srli a0, a0, 1
|
|
||||||
; RV32I-NEXT: srl a0, a0, a2
|
|
||||||
; RV32I-NEXT: or a0, a1, a0
|
; RV32I-NEXT: or a0, a1, a0
|
||||||
; RV32I-NEXT: bltz t2, .LBB7_9
|
; RV32I-NEXT: sll a0, a0, a2
|
||||||
; RV32I-NEXT: .LBB7_11:
|
; RV32I-NEXT: or a1, a0, a5
|
||||||
; RV32I-NEXT: mv a1, zero
|
; RV32I-NEXT: or a0, t0, a7
|
||||||
; RV32I-NEXT: beqz a5, .LBB7_13
|
; RV32I-NEXT: bgez t1, .LBB7_9
|
||||||
; RV32I-NEXT: .LBB7_12:
|
; RV32I-NEXT: .LBB7_6:
|
||||||
; RV32I-NEXT: or t1, a0, a1
|
; RV32I-NEXT: srl a2, a3, a4
|
||||||
; RV32I-NEXT: .LBB7_13:
|
; RV32I-NEXT: or a1, a1, a2
|
||||||
; RV32I-NEXT: mv a0, a6
|
; RV32I-NEXT: ret
|
||||||
; RV32I-NEXT: mv a1, t1
|
; RV32I-NEXT: .LBB7_7:
|
||||||
|
; RV32I-NEXT: sll t0, t3, a2
|
||||||
|
; RV32I-NEXT: bltz a5, .LBB7_5
|
||||||
|
; RV32I-NEXT: .LBB7_8:
|
||||||
|
; RV32I-NEXT: sll a1, t3, a5
|
||||||
|
; RV32I-NEXT: or a0, t0, a7
|
||||||
|
; RV32I-NEXT: bltz t1, .LBB7_6
|
||||||
|
; RV32I-NEXT: .LBB7_9:
|
||||||
|
; RV32I-NEXT: or a1, a1, zero
|
||||||
; RV32I-NEXT: ret
|
; RV32I-NEXT: ret
|
||||||
;
|
;
|
||||||
; RV32IB-LABEL: fshr_i64:
|
; RV32IB-LABEL: fshr_i64:
|
||||||
; RV32IB: # %bb.0:
|
; RV32IB: # %bb.0:
|
||||||
; RV32IB-NEXT: andi t1, a4, 63
|
; RV32IB-NEXT: andi a5, a4, 63
|
||||||
; RV32IB-NEXT: addi a6, t1, -32
|
; RV32IB-NEXT: addi t2, a5, -32
|
||||||
; RV32IB-NEXT: addi a7, zero, 31
|
; RV32IB-NEXT: addi a6, zero, 31
|
||||||
; RV32IB-NEXT: bltz a6, .LBB7_2
|
; RV32IB-NEXT: bltz t2, .LBB7_2
|
||||||
; RV32IB-NEXT: # %bb.1:
|
; RV32IB-NEXT: # %bb.1:
|
||||||
; RV32IB-NEXT: srl t0, a3, a6
|
; RV32IB-NEXT: srl a7, a3, t2
|
||||||
; RV32IB-NEXT: j .LBB7_3
|
; RV32IB-NEXT: j .LBB7_3
|
||||||
; RV32IB-NEXT: .LBB7_2:
|
; RV32IB-NEXT: .LBB7_2:
|
||||||
; RV32IB-NEXT: srl t0, a2, a4
|
; RV32IB-NEXT: srl a7, a2, a4
|
||||||
; RV32IB-NEXT: sub t2, a7, t1
|
; RV32IB-NEXT: sub a5, a6, a5
|
||||||
; RV32IB-NEXT: slli a5, a3, 1
|
; RV32IB-NEXT: slli a2, a3, 1
|
||||||
; RV32IB-NEXT: sll a5, a5, t2
|
; RV32IB-NEXT: sll a2, a2, a5
|
||||||
; RV32IB-NEXT: or t0, t0, a5
|
; RV32IB-NEXT: or a7, a7, a2
|
||||||
; RV32IB-NEXT: .LBB7_3:
|
; RV32IB-NEXT: .LBB7_3:
|
||||||
; RV32IB-NEXT: addi a5, zero, 32
|
; RV32IB-NEXT: not t1, a4
|
||||||
; RV32IB-NEXT: sub t4, a5, t1
|
; RV32IB-NEXT: addi a2, zero, 63
|
||||||
; RV32IB-NEXT: addi a5, zero, 64
|
; RV32IB-NEXT: andn a2, a2, a4
|
||||||
; RV32IB-NEXT: sub t2, a5, t1
|
; RV32IB-NEXT: addi a5, a2, -32
|
||||||
; RV32IB-NEXT: bltz t4, .LBB7_7
|
; RV32IB-NEXT: slli t3, a0, 1
|
||||||
|
; RV32IB-NEXT: bltz a5, .LBB7_7
|
||||||
; RV32IB-NEXT: # %bb.4:
|
; RV32IB-NEXT: # %bb.4:
|
||||||
; RV32IB-NEXT: mv t3, zero
|
; RV32IB-NEXT: mv t0, zero
|
||||||
; RV32IB-NEXT: or t0, t3, t0
|
; RV32IB-NEXT: bgez a5, .LBB7_8
|
||||||
; RV32IB-NEXT: bgez t4, .LBB7_8
|
|
||||||
; RV32IB-NEXT: .LBB7_5:
|
; RV32IB-NEXT: .LBB7_5:
|
||||||
; RV32IB-NEXT: sll a1, a1, t2
|
; RV32IB-NEXT: addi a5, zero, 1
|
||||||
; RV32IB-NEXT: sub a5, a7, t2
|
; RV32IB-NEXT: fsl a1, a1, a5, a0
|
||||||
; RV32IB-NEXT: srli a0, a0, 1
|
; RV32IB-NEXT: sll a1, a1, t1
|
||||||
; RV32IB-NEXT: srl a0, a0, a5
|
; RV32IB-NEXT: sub a2, a6, a2
|
||||||
|
; RV32IB-NEXT: lui a5, 524288
|
||||||
|
; RV32IB-NEXT: addi a5, a5, -1
|
||||||
|
; RV32IB-NEXT: and a0, a0, a5
|
||||||
|
; RV32IB-NEXT: srl a0, a0, a2
|
||||||
; RV32IB-NEXT: or a1, a1, a0
|
; RV32IB-NEXT: or a1, a1, a0
|
||||||
; RV32IB-NEXT: cmov a0, t1, t0, a2
|
; RV32IB-NEXT: or a0, t0, a7
|
||||||
; RV32IB-NEXT: bgez a6, .LBB7_9
|
; RV32IB-NEXT: bgez t2, .LBB7_9
|
||||||
; RV32IB-NEXT: .LBB7_6:
|
; RV32IB-NEXT: .LBB7_6:
|
||||||
; RV32IB-NEXT: srl a2, a3, a4
|
; RV32IB-NEXT: srl a2, a3, a4
|
||||||
; RV32IB-NEXT: j .LBB7_10
|
|
||||||
; RV32IB-NEXT: .LBB7_7:
|
|
||||||
; RV32IB-NEXT: sll t3, a0, t2
|
|
||||||
; RV32IB-NEXT: or t0, t3, t0
|
|
||||||
; RV32IB-NEXT: bltz t4, .LBB7_5
|
|
||||||
; RV32IB-NEXT: .LBB7_8:
|
|
||||||
; RV32IB-NEXT: sll a1, a0, t4
|
|
||||||
; RV32IB-NEXT: cmov a0, t1, t0, a2
|
|
||||||
; RV32IB-NEXT: bltz a6, .LBB7_6
|
|
||||||
; RV32IB-NEXT: .LBB7_9:
|
|
||||||
; RV32IB-NEXT: mv a2, zero
|
|
||||||
; RV32IB-NEXT: .LBB7_10:
|
|
||||||
; RV32IB-NEXT: or a1, a1, a2
|
; RV32IB-NEXT: or a1, a1, a2
|
||||||
; RV32IB-NEXT: cmov a1, t1, a1, a3
|
; RV32IB-NEXT: ret
|
||||||
|
; RV32IB-NEXT: .LBB7_7:
|
||||||
|
; RV32IB-NEXT: sll t0, t3, t1
|
||||||
|
; RV32IB-NEXT: bltz a5, .LBB7_5
|
||||||
|
; RV32IB-NEXT: .LBB7_8:
|
||||||
|
; RV32IB-NEXT: sll a1, t3, a5
|
||||||
|
; RV32IB-NEXT: or a0, t0, a7
|
||||||
|
; RV32IB-NEXT: bltz t2, .LBB7_6
|
||||||
|
; RV32IB-NEXT: .LBB7_9:
|
||||||
|
; RV32IB-NEXT: or a1, a1, zero
|
||||||
; RV32IB-NEXT: ret
|
; RV32IB-NEXT: ret
|
||||||
;
|
;
|
||||||
; RV32IBT-LABEL: fshr_i64:
|
; RV32IBT-LABEL: fshr_i64:
|
||||||
; RV32IBT: # %bb.0:
|
; RV32IBT: # %bb.0:
|
||||||
; RV32IBT-NEXT: andi t1, a4, 63
|
; RV32IBT-NEXT: not a7, a4
|
||||||
; RV32IBT-NEXT: addi a6, t1, -32
|
; RV32IBT-NEXT: andi t1, a7, 63
|
||||||
; RV32IBT-NEXT: addi a7, zero, 31
|
; RV32IBT-NEXT: addi t0, zero, 31
|
||||||
; RV32IBT-NEXT: bltz a6, .LBB7_2
|
; RV32IBT-NEXT: addi t2, t1, -32
|
||||||
|
; RV32IBT-NEXT: slli a6, a0, 1
|
||||||
|
; RV32IBT-NEXT: bltz t2, .LBB7_2
|
||||||
; RV32IBT-NEXT: # %bb.1:
|
; RV32IBT-NEXT: # %bb.1:
|
||||||
; RV32IBT-NEXT: srl t0, a3, a6
|
; RV32IBT-NEXT: sll t1, a6, t2
|
||||||
; RV32IBT-NEXT: j .LBB7_3
|
; RV32IBT-NEXT: j .LBB7_3
|
||||||
; RV32IBT-NEXT: .LBB7_2:
|
; RV32IBT-NEXT: .LBB7_2:
|
||||||
; RV32IBT-NEXT: srl t0, a2, a4
|
; RV32IBT-NEXT: addi a5, zero, 1
|
||||||
; RV32IBT-NEXT: sub t2, a7, t1
|
; RV32IBT-NEXT: fsl a1, a1, a5, a0
|
||||||
; RV32IBT-NEXT: slli a5, a3, 1
|
; RV32IBT-NEXT: sll a1, a1, a7
|
||||||
; RV32IBT-NEXT: sll a5, a5, t2
|
; RV32IBT-NEXT: lui a5, 524288
|
||||||
; RV32IBT-NEXT: or t0, t0, a5
|
; RV32IBT-NEXT: addi a5, a5, -1
|
||||||
; RV32IBT-NEXT: .LBB7_3:
|
; RV32IBT-NEXT: and a0, a0, a5
|
||||||
; RV32IBT-NEXT: addi a5, zero, 32
|
; RV32IBT-NEXT: sub a5, t0, t1
|
||||||
; RV32IBT-NEXT: sub t4, a5, t1
|
|
||||||
; RV32IBT-NEXT: addi a5, zero, 64
|
|
||||||
; RV32IBT-NEXT: sub t2, a5, t1
|
|
||||||
; RV32IBT-NEXT: bltz t4, .LBB7_7
|
|
||||||
; RV32IBT-NEXT: # %bb.4:
|
|
||||||
; RV32IBT-NEXT: mv t3, zero
|
|
||||||
; RV32IBT-NEXT: or t0, t3, t0
|
|
||||||
; RV32IBT-NEXT: bgez t4, .LBB7_8
|
|
||||||
; RV32IBT-NEXT: .LBB7_5:
|
|
||||||
; RV32IBT-NEXT: sll a1, a1, t2
|
|
||||||
; RV32IBT-NEXT: sub a5, a7, t2
|
|
||||||
; RV32IBT-NEXT: srli a0, a0, 1
|
|
||||||
; RV32IBT-NEXT: srl a0, a0, a5
|
; RV32IBT-NEXT: srl a0, a0, a5
|
||||||
; RV32IBT-NEXT: or a1, a1, a0
|
; RV32IBT-NEXT: or t1, a1, a0
|
||||||
; RV32IBT-NEXT: cmov a0, t1, t0, a2
|
; RV32IBT-NEXT: .LBB7_3:
|
||||||
; RV32IBT-NEXT: bgez a6, .LBB7_9
|
; RV32IBT-NEXT: andi a0, a4, 63
|
||||||
|
; RV32IBT-NEXT: addi a5, a0, -32
|
||||||
|
; RV32IBT-NEXT: bltz a5, .LBB7_7
|
||||||
|
; RV32IBT-NEXT: # %bb.4:
|
||||||
|
; RV32IBT-NEXT: mv a1, zero
|
||||||
|
; RV32IBT-NEXT: bgez a5, .LBB7_8
|
||||||
|
; RV32IBT-NEXT: .LBB7_5:
|
||||||
|
; RV32IBT-NEXT: srl a2, a2, a4
|
||||||
|
; RV32IBT-NEXT: sub a0, t0, a0
|
||||||
|
; RV32IBT-NEXT: slli a3, a3, 1
|
||||||
|
; RV32IBT-NEXT: sll a0, a3, a0
|
||||||
|
; RV32IBT-NEXT: or a2, a2, a0
|
||||||
|
; RV32IBT-NEXT: or a1, t1, a1
|
||||||
|
; RV32IBT-NEXT: bgez t2, .LBB7_9
|
||||||
; RV32IBT-NEXT: .LBB7_6:
|
; RV32IBT-NEXT: .LBB7_6:
|
||||||
; RV32IBT-NEXT: srl a2, a3, a4
|
; RV32IBT-NEXT: sll a0, a6, a7
|
||||||
; RV32IBT-NEXT: j .LBB7_10
|
; RV32IBT-NEXT: or a0, a0, a2
|
||||||
|
; RV32IBT-NEXT: ret
|
||||||
; RV32IBT-NEXT: .LBB7_7:
|
; RV32IBT-NEXT: .LBB7_7:
|
||||||
; RV32IBT-NEXT: sll t3, a0, t2
|
; RV32IBT-NEXT: srl a1, a3, a4
|
||||||
; RV32IBT-NEXT: or t0, t3, t0
|
; RV32IBT-NEXT: bltz a5, .LBB7_5
|
||||||
; RV32IBT-NEXT: bltz t4, .LBB7_5
|
|
||||||
; RV32IBT-NEXT: .LBB7_8:
|
; RV32IBT-NEXT: .LBB7_8:
|
||||||
; RV32IBT-NEXT: sll a1, a0, t4
|
; RV32IBT-NEXT: srl a2, a3, a5
|
||||||
; RV32IBT-NEXT: cmov a0, t1, t0, a2
|
; RV32IBT-NEXT: or a1, t1, a1
|
||||||
; RV32IBT-NEXT: bltz a6, .LBB7_6
|
; RV32IBT-NEXT: bltz t2, .LBB7_6
|
||||||
; RV32IBT-NEXT: .LBB7_9:
|
; RV32IBT-NEXT: .LBB7_9:
|
||||||
; RV32IBT-NEXT: mv a2, zero
|
; RV32IBT-NEXT: or a0, zero, a2
|
||||||
; RV32IBT-NEXT: .LBB7_10:
|
|
||||||
; RV32IBT-NEXT: or a1, a1, a2
|
|
||||||
; RV32IBT-NEXT: cmov a1, t1, a1, a3
|
|
||||||
; RV32IBT-NEXT: ret
|
; RV32IBT-NEXT: ret
|
||||||
%1 = tail call i64 @llvm.fshr.i64(i64 %a, i64 %b, i64 %c)
|
%1 = tail call i64 @llvm.fshr.i64(i64 %a, i64 %b, i64 %c)
|
||||||
ret i64 %1
|
ret i64 %1
|
||||||
|
|
|
@ -230,7 +230,7 @@ define signext i32 @ror_i32(i32 signext %a, i32 signext %b) nounwind {
|
||||||
; RV64I-NEXT: srlw a2, a0, a1
|
; RV64I-NEXT: srlw a2, a0, a1
|
||||||
; RV64I-NEXT: neg a1, a1
|
; RV64I-NEXT: neg a1, a1
|
||||||
; RV64I-NEXT: sllw a0, a0, a1
|
; RV64I-NEXT: sllw a0, a0, a1
|
||||||
; RV64I-NEXT: or a0, a0, a2
|
; RV64I-NEXT: or a0, a2, a0
|
||||||
; RV64I-NEXT: ret
|
; RV64I-NEXT: ret
|
||||||
;
|
;
|
||||||
; RV64IB-LABEL: ror_i32:
|
; RV64IB-LABEL: ror_i32:
|
||||||
|
@ -259,7 +259,7 @@ define i64 @ror_i64(i64 %a, i64 %b) nounwind {
|
||||||
; RV64I-NEXT: srl a2, a0, a1
|
; RV64I-NEXT: srl a2, a0, a1
|
||||||
; RV64I-NEXT: neg a1, a1
|
; RV64I-NEXT: neg a1, a1
|
||||||
; RV64I-NEXT: sll a0, a0, a1
|
; RV64I-NEXT: sll a0, a0, a1
|
||||||
; RV64I-NEXT: or a0, a0, a2
|
; RV64I-NEXT: or a0, a2, a0
|
||||||
; RV64I-NEXT: ret
|
; RV64I-NEXT: ret
|
||||||
;
|
;
|
||||||
; RV64IB-LABEL: ror_i64:
|
; RV64IB-LABEL: ror_i64:
|
||||||
|
@ -291,7 +291,7 @@ define signext i32 @rori_i32(i32 signext %a) nounwind {
|
||||||
;
|
;
|
||||||
; RV64IB-LABEL: rori_i32:
|
; RV64IB-LABEL: rori_i32:
|
||||||
; RV64IB: # %bb.0:
|
; RV64IB: # %bb.0:
|
||||||
; RV64IB-NEXT: fsriw a0, a0, a0, 1
|
; RV64IB-NEXT: roriw a0, a0, 1
|
||||||
; RV64IB-NEXT: ret
|
; RV64IB-NEXT: ret
|
||||||
;
|
;
|
||||||
; RV64IBB-LABEL: rori_i32:
|
; RV64IBB-LABEL: rori_i32:
|
||||||
|
|
|
@ -109,15 +109,14 @@ declare i32 @llvm.fshl.i32(i32, i32, i32)
|
||||||
define signext i32 @fshl_i32(i32 signext %a, i32 signext %b, i32 signext %c) nounwind {
|
define signext i32 @fshl_i32(i32 signext %a, i32 signext %b, i32 signext %c) nounwind {
|
||||||
; RV64I-LABEL: fshl_i32:
|
; RV64I-LABEL: fshl_i32:
|
||||||
; RV64I: # %bb.0:
|
; RV64I: # %bb.0:
|
||||||
; RV64I-NEXT: andi a3, a2, 31
|
; RV64I-NEXT: andi a2, a2, 31
|
||||||
; RV64I-NEXT: beqz a3, .LBB4_2
|
; RV64I-NEXT: sll a0, a0, a2
|
||||||
; RV64I-NEXT: # %bb.1:
|
; RV64I-NEXT: not a2, a2
|
||||||
; RV64I-NEXT: addi a4, zero, 32
|
; RV64I-NEXT: slli a1, a1, 32
|
||||||
; RV64I-NEXT: sub a2, a4, a2
|
; RV64I-NEXT: srli a1, a1, 1
|
||||||
; RV64I-NEXT: srlw a1, a1, a2
|
; RV64I-NEXT: srl a1, a1, a2
|
||||||
; RV64I-NEXT: sllw a0, a0, a3
|
|
||||||
; RV64I-NEXT: or a0, a0, a1
|
; RV64I-NEXT: or a0, a0, a1
|
||||||
; RV64I-NEXT: .LBB4_2:
|
; RV64I-NEXT: sext.w a0, a0
|
||||||
; RV64I-NEXT: ret
|
; RV64I-NEXT: ret
|
||||||
;
|
;
|
||||||
; RV64IB-LABEL: fshl_i32:
|
; RV64IB-LABEL: fshl_i32:
|
||||||
|
@ -138,15 +137,11 @@ declare i64 @llvm.fshl.i64(i64, i64, i64)
|
||||||
define i64 @fshl_i64(i64 %a, i64 %b, i64 %c) nounwind {
|
define i64 @fshl_i64(i64 %a, i64 %b, i64 %c) nounwind {
|
||||||
; RV64I-LABEL: fshl_i64:
|
; RV64I-LABEL: fshl_i64:
|
||||||
; RV64I: # %bb.0:
|
; RV64I: # %bb.0:
|
||||||
; RV64I-NEXT: andi a3, a2, 63
|
|
||||||
; RV64I-NEXT: beqz a3, .LBB5_2
|
|
||||||
; RV64I-NEXT: # %bb.1:
|
|
||||||
; RV64I-NEXT: sll a0, a0, a2
|
; RV64I-NEXT: sll a0, a0, a2
|
||||||
; RV64I-NEXT: addi a2, zero, 64
|
; RV64I-NEXT: not a2, a2
|
||||||
; RV64I-NEXT: sub a2, a2, a3
|
; RV64I-NEXT: srli a1, a1, 1
|
||||||
; RV64I-NEXT: srl a1, a1, a2
|
; RV64I-NEXT: srl a1, a1, a2
|
||||||
; RV64I-NEXT: or a0, a0, a1
|
; RV64I-NEXT: or a0, a0, a1
|
||||||
; RV64I-NEXT: .LBB5_2:
|
|
||||||
; RV64I-NEXT: ret
|
; RV64I-NEXT: ret
|
||||||
;
|
;
|
||||||
; RV64IB-LABEL: fshl_i64:
|
; RV64IB-LABEL: fshl_i64:
|
||||||
|
@ -167,16 +162,15 @@ declare i32 @llvm.fshr.i32(i32, i32, i32)
|
||||||
define signext i32 @fshr_i32(i32 signext %a, i32 signext %b, i32 signext %c) nounwind {
|
define signext i32 @fshr_i32(i32 signext %a, i32 signext %b, i32 signext %c) nounwind {
|
||||||
; RV64I-LABEL: fshr_i32:
|
; RV64I-LABEL: fshr_i32:
|
||||||
; RV64I: # %bb.0:
|
; RV64I: # %bb.0:
|
||||||
; RV64I-NEXT: andi a3, a2, 31
|
; RV64I-NEXT: slli a1, a1, 32
|
||||||
; RV64I-NEXT: beqz a3, .LBB6_2
|
; RV64I-NEXT: andi a2, a2, 31
|
||||||
; RV64I-NEXT: # %bb.1:
|
; RV64I-NEXT: ori a3, a2, 32
|
||||||
; RV64I-NEXT: srlw a1, a1, a3
|
; RV64I-NEXT: srl a1, a1, a3
|
||||||
; RV64I-NEXT: addi a3, zero, 32
|
; RV64I-NEXT: slli a0, a0, 1
|
||||||
; RV64I-NEXT: sub a2, a3, a2
|
; RV64I-NEXT: xori a2, a2, 31
|
||||||
; RV64I-NEXT: sllw a0, a0, a2
|
; RV64I-NEXT: sll a0, a0, a2
|
||||||
; RV64I-NEXT: or a1, a0, a1
|
; RV64I-NEXT: or a0, a0, a1
|
||||||
; RV64I-NEXT: .LBB6_2:
|
; RV64I-NEXT: sext.w a0, a0
|
||||||
; RV64I-NEXT: mv a0, a1
|
|
||||||
; RV64I-NEXT: ret
|
; RV64I-NEXT: ret
|
||||||
;
|
;
|
||||||
; RV64IB-LABEL: fshr_i32:
|
; RV64IB-LABEL: fshr_i32:
|
||||||
|
@ -197,16 +191,11 @@ declare i64 @llvm.fshr.i64(i64, i64, i64)
|
||||||
define i64 @fshr_i64(i64 %a, i64 %b, i64 %c) nounwind {
|
define i64 @fshr_i64(i64 %a, i64 %b, i64 %c) nounwind {
|
||||||
; RV64I-LABEL: fshr_i64:
|
; RV64I-LABEL: fshr_i64:
|
||||||
; RV64I: # %bb.0:
|
; RV64I: # %bb.0:
|
||||||
; RV64I-NEXT: andi a3, a2, 63
|
|
||||||
; RV64I-NEXT: beqz a3, .LBB7_2
|
|
||||||
; RV64I-NEXT: # %bb.1:
|
|
||||||
; RV64I-NEXT: srl a1, a1, a2
|
; RV64I-NEXT: srl a1, a1, a2
|
||||||
; RV64I-NEXT: addi a2, zero, 64
|
; RV64I-NEXT: not a2, a2
|
||||||
; RV64I-NEXT: sub a2, a2, a3
|
; RV64I-NEXT: slli a0, a0, 1
|
||||||
; RV64I-NEXT: sll a0, a0, a2
|
; RV64I-NEXT: sll a0, a0, a2
|
||||||
; RV64I-NEXT: or a1, a0, a1
|
; RV64I-NEXT: or a0, a0, a1
|
||||||
; RV64I-NEXT: .LBB7_2:
|
|
||||||
; RV64I-NEXT: mv a0, a1
|
|
||||||
; RV64I-NEXT: ret
|
; RV64I-NEXT: ret
|
||||||
;
|
;
|
||||||
; RV64IB-LABEL: fshr_i64:
|
; RV64IB-LABEL: fshr_i64:
|
||||||
|
|
|
@ -178,58 +178,41 @@ define i32 @var_shift_i32_pgso(i32 %x, i32 %y, i32 %z) nounwind !prof !14 {
|
||||||
define i64 @var_shift_i64(i64 %x, i64 %y, i64 %z) nounwind {
|
define i64 @var_shift_i64(i64 %x, i64 %y, i64 %z) nounwind {
|
||||||
; X86-FAST-LABEL: var_shift_i64:
|
; X86-FAST-LABEL: var_shift_i64:
|
||||||
; X86-FAST: # %bb.0:
|
; X86-FAST: # %bb.0:
|
||||||
; X86-FAST-NEXT: pushl %ebp
|
|
||||||
; X86-FAST-NEXT: pushl %ebx
|
; X86-FAST-NEXT: pushl %ebx
|
||||||
; X86-FAST-NEXT: pushl %edi
|
; X86-FAST-NEXT: pushl %edi
|
||||||
; X86-FAST-NEXT: pushl %esi
|
; X86-FAST-NEXT: pushl %esi
|
||||||
; X86-FAST-NEXT: pushl %eax
|
|
||||||
; X86-FAST-NEXT: movl {{[0-9]+}}(%esp), %eax
|
|
||||||
; X86-FAST-NEXT: movl %eax, (%esp) # 4-byte Spill
|
|
||||||
; X86-FAST-NEXT: movl {{[0-9]+}}(%esp), %edx
|
|
||||||
; X86-FAST-NEXT: movl {{[0-9]+}}(%esp), %eax
|
|
||||||
; X86-FAST-NEXT: movl {{[0-9]+}}(%esp), %ebp
|
|
||||||
; X86-FAST-NEXT: movl {{[0-9]+}}(%esp), %ebx
|
; X86-FAST-NEXT: movl {{[0-9]+}}(%esp), %ebx
|
||||||
; X86-FAST-NEXT: andl $63, %ebx
|
; X86-FAST-NEXT: movl {{[0-9]+}}(%esp), %edx
|
||||||
; X86-FAST-NEXT: movl %eax, %edi
|
; X86-FAST-NEXT: movl {{[0-9]+}}(%esp), %esi
|
||||||
; X86-FAST-NEXT: movl %ebx, %ecx
|
; X86-FAST-NEXT: movl {{[0-9]+}}(%esp), %edi
|
||||||
; X86-FAST-NEXT: shll %cl, %edi
|
; X86-FAST-NEXT: movb {{[0-9]+}}(%esp), %ch
|
||||||
; X86-FAST-NEXT: shldl %cl, %eax, %ebp
|
; X86-FAST-NEXT: movb %ch, %cl
|
||||||
; X86-FAST-NEXT: testb $32, %bl
|
; X86-FAST-NEXT: notb %cl
|
||||||
|
; X86-FAST-NEXT: shrdl $1, %edi, %esi
|
||||||
|
; X86-FAST-NEXT: shrl %edi
|
||||||
|
; X86-FAST-NEXT: shrdl %cl, %edi, %esi
|
||||||
|
; X86-FAST-NEXT: shrl %cl, %edi
|
||||||
|
; X86-FAST-NEXT: testb $32, %cl
|
||||||
; X86-FAST-NEXT: je .LBB5_2
|
; X86-FAST-NEXT: je .LBB5_2
|
||||||
; X86-FAST-NEXT: # %bb.1:
|
; X86-FAST-NEXT: # %bb.1:
|
||||||
; X86-FAST-NEXT: movl %edi, %ebp
|
; X86-FAST-NEXT: movl %edi, %esi
|
||||||
; X86-FAST-NEXT: xorl %edi, %edi
|
; X86-FAST-NEXT: xorl %edi, %edi
|
||||||
; X86-FAST-NEXT: .LBB5_2:
|
; X86-FAST-NEXT: .LBB5_2:
|
||||||
; X86-FAST-NEXT: movb $64, %cl
|
; X86-FAST-NEXT: movl %ebx, %eax
|
||||||
; X86-FAST-NEXT: subb %bl, %cl
|
; X86-FAST-NEXT: movb %ch, %cl
|
||||||
; X86-FAST-NEXT: movl %edx, %esi
|
; X86-FAST-NEXT: shll %cl, %eax
|
||||||
; X86-FAST-NEXT: shrl %cl, %esi
|
; X86-FAST-NEXT: shldl %cl, %ebx, %edx
|
||||||
; X86-FAST-NEXT: shrdl %cl, %edx, (%esp) # 4-byte Folded Spill
|
; X86-FAST-NEXT: testb $32, %ch
|
||||||
; X86-FAST-NEXT: testb $32, %cl
|
; X86-FAST-NEXT: je .LBB5_4
|
||||||
; X86-FAST-NEXT: jne .LBB5_3
|
; X86-FAST-NEXT: # %bb.3:
|
||||||
; X86-FAST-NEXT: # %bb.4:
|
; X86-FAST-NEXT: movl %eax, %edx
|
||||||
; X86-FAST-NEXT: movl {{[0-9]+}}(%esp), %edx
|
; X86-FAST-NEXT: xorl %eax, %eax
|
||||||
; X86-FAST-NEXT: movl (%esp), %ecx # 4-byte Reload
|
; X86-FAST-NEXT: .LBB5_4:
|
||||||
; X86-FAST-NEXT: testl %ebx, %ebx
|
; X86-FAST-NEXT: orl %edi, %edx
|
||||||
; X86-FAST-NEXT: jne .LBB5_6
|
; X86-FAST-NEXT: orl %esi, %eax
|
||||||
; X86-FAST-NEXT: jmp .LBB5_7
|
|
||||||
; X86-FAST-NEXT: .LBB5_3:
|
|
||||||
; X86-FAST-NEXT: movl %esi, %ecx
|
|
||||||
; X86-FAST-NEXT: xorl %esi, %esi
|
|
||||||
; X86-FAST-NEXT: movl {{[0-9]+}}(%esp), %edx
|
|
||||||
; X86-FAST-NEXT: testl %ebx, %ebx
|
|
||||||
; X86-FAST-NEXT: je .LBB5_7
|
|
||||||
; X86-FAST-NEXT: .LBB5_6:
|
|
||||||
; X86-FAST-NEXT: orl %esi, %ebp
|
|
||||||
; X86-FAST-NEXT: orl %ecx, %edi
|
|
||||||
; X86-FAST-NEXT: movl %edi, %eax
|
|
||||||
; X86-FAST-NEXT: movl %ebp, %edx
|
|
||||||
; X86-FAST-NEXT: .LBB5_7:
|
|
||||||
; X86-FAST-NEXT: addl $4, %esp
|
|
||||||
; X86-FAST-NEXT: popl %esi
|
; X86-FAST-NEXT: popl %esi
|
||||||
; X86-FAST-NEXT: popl %edi
|
; X86-FAST-NEXT: popl %edi
|
||||||
; X86-FAST-NEXT: popl %ebx
|
; X86-FAST-NEXT: popl %ebx
|
||||||
; X86-FAST-NEXT: popl %ebp
|
|
||||||
; X86-FAST-NEXT: retl
|
; X86-FAST-NEXT: retl
|
||||||
;
|
;
|
||||||
; X86-SLOW-LABEL: var_shift_i64:
|
; X86-SLOW-LABEL: var_shift_i64:
|
||||||
|
@ -238,59 +221,55 @@ define i64 @var_shift_i64(i64 %x, i64 %y, i64 %z) nounwind {
|
||||||
; X86-SLOW-NEXT: pushl %ebx
|
; X86-SLOW-NEXT: pushl %ebx
|
||||||
; X86-SLOW-NEXT: pushl %edi
|
; X86-SLOW-NEXT: pushl %edi
|
||||||
; X86-SLOW-NEXT: pushl %esi
|
; X86-SLOW-NEXT: pushl %esi
|
||||||
; X86-SLOW-NEXT: movl {{[0-9]+}}(%esp), %edi
|
|
||||||
; X86-SLOW-NEXT: movl {{[0-9]+}}(%esp), %edx
|
; X86-SLOW-NEXT: movl {{[0-9]+}}(%esp), %edx
|
||||||
; X86-SLOW-NEXT: movl {{[0-9]+}}(%esp), %eax
|
|
||||||
; X86-SLOW-NEXT: movl {{[0-9]+}}(%esp), %ebx
|
; X86-SLOW-NEXT: movl {{[0-9]+}}(%esp), %ebx
|
||||||
; X86-SLOW-NEXT: andl $63, %ebx
|
; X86-SLOW-NEXT: movb {{[0-9]+}}(%esp), %cl
|
||||||
; X86-SLOW-NEXT: movb $64, %ch
|
; X86-SLOW-NEXT: movl {{[0-9]+}}(%esp), %eax
|
||||||
; X86-SLOW-NEXT: subb %bl, %ch
|
|
||||||
; X86-SLOW-NEXT: movb %ch, %cl
|
|
||||||
; X86-SLOW-NEXT: shrl %cl, %edx
|
|
||||||
; X86-SLOW-NEXT: notb %cl
|
|
||||||
; X86-SLOW-NEXT: addl %eax, %eax
|
|
||||||
; X86-SLOW-NEXT: shll %cl, %eax
|
|
||||||
; X86-SLOW-NEXT: movb %bl, %cl
|
|
||||||
; X86-SLOW-NEXT: shll %cl, %edi
|
|
||||||
; X86-SLOW-NEXT: movl {{[0-9]+}}(%esp), %esi
|
; X86-SLOW-NEXT: movl {{[0-9]+}}(%esp), %esi
|
||||||
; X86-SLOW-NEXT: movl %esi, %ebp
|
; X86-SLOW-NEXT: shrl %eax
|
||||||
; X86-SLOW-NEXT: shrl %ebp
|
; X86-SLOW-NEXT: movl %esi, %edi
|
||||||
; X86-SLOW-NEXT: notb %cl
|
; X86-SLOW-NEXT: shll $31, %edi
|
||||||
; X86-SLOW-NEXT: shrl %cl, %ebp
|
; X86-SLOW-NEXT: orl %eax, %edi
|
||||||
; X86-SLOW-NEXT: movb %bl, %cl
|
; X86-SLOW-NEXT: movl %ecx, %eax
|
||||||
; X86-SLOW-NEXT: shll %cl, %esi
|
; X86-SLOW-NEXT: movb %cl, %ch
|
||||||
; X86-SLOW-NEXT: testb $32, %bl
|
; X86-SLOW-NEXT: notb %ch
|
||||||
|
; X86-SLOW-NEXT: movb %ch, %cl
|
||||||
|
; X86-SLOW-NEXT: shrl %cl, %edi
|
||||||
|
; X86-SLOW-NEXT: shrl %esi
|
||||||
|
; X86-SLOW-NEXT: leal (%esi,%esi), %ebp
|
||||||
|
; X86-SLOW-NEXT: movb %al, %cl
|
||||||
|
; X86-SLOW-NEXT: shll %cl, %ebp
|
||||||
|
; X86-SLOW-NEXT: shll %cl, %ebx
|
||||||
|
; X86-SLOW-NEXT: movl %edx, %eax
|
||||||
|
; X86-SLOW-NEXT: shrl %eax
|
||||||
|
; X86-SLOW-NEXT: movb %ch, %cl
|
||||||
|
; X86-SLOW-NEXT: shrl %cl, %eax
|
||||||
|
; X86-SLOW-NEXT: movb {{[0-9]+}}(%esp), %cl
|
||||||
|
; X86-SLOW-NEXT: shll %cl, %edx
|
||||||
|
; X86-SLOW-NEXT: testb $32, {{[0-9]+}}(%esp)
|
||||||
; X86-SLOW-NEXT: jne .LBB5_1
|
; X86-SLOW-NEXT: jne .LBB5_1
|
||||||
; X86-SLOW-NEXT: # %bb.2:
|
; X86-SLOW-NEXT: # %bb.2:
|
||||||
; X86-SLOW-NEXT: orl %ebp, %edi
|
; X86-SLOW-NEXT: orl %eax, %ebx
|
||||||
; X86-SLOW-NEXT: jmp .LBB5_3
|
; X86-SLOW-NEXT: jmp .LBB5_3
|
||||||
; X86-SLOW-NEXT: .LBB5_1:
|
; X86-SLOW-NEXT: .LBB5_1:
|
||||||
; X86-SLOW-NEXT: movl %esi, %edi
|
; X86-SLOW-NEXT: movl %edx, %ebx
|
||||||
; X86-SLOW-NEXT: xorl %esi, %esi
|
; X86-SLOW-NEXT: xorl %edx, %edx
|
||||||
; X86-SLOW-NEXT: .LBB5_3:
|
; X86-SLOW-NEXT: .LBB5_3:
|
||||||
; X86-SLOW-NEXT: movb %ch, %cl
|
; X86-SLOW-NEXT: movb %ch, %cl
|
||||||
; X86-SLOW-NEXT: movl {{[0-9]+}}(%esp), %ebp
|
; X86-SLOW-NEXT: shrl %cl, %esi
|
||||||
; X86-SLOW-NEXT: shrl %cl, %ebp
|
|
||||||
; X86-SLOW-NEXT: testb $32, %ch
|
; X86-SLOW-NEXT: testb $32, %ch
|
||||||
; X86-SLOW-NEXT: jne .LBB5_4
|
; X86-SLOW-NEXT: jne .LBB5_4
|
||||||
; X86-SLOW-NEXT: # %bb.5:
|
; X86-SLOW-NEXT: # %bb.5:
|
||||||
; X86-SLOW-NEXT: orl %edx, %eax
|
; X86-SLOW-NEXT: orl %edi, %ebp
|
||||||
; X86-SLOW-NEXT: movl %eax, %ecx
|
|
||||||
; X86-SLOW-NEXT: jmp .LBB5_6
|
; X86-SLOW-NEXT: jmp .LBB5_6
|
||||||
; X86-SLOW-NEXT: .LBB5_4:
|
; X86-SLOW-NEXT: .LBB5_4:
|
||||||
; X86-SLOW-NEXT: movl %ebp, %ecx
|
; X86-SLOW-NEXT: movl %esi, %ebp
|
||||||
; X86-SLOW-NEXT: xorl %ebp, %ebp
|
; X86-SLOW-NEXT: xorl %esi, %esi
|
||||||
; X86-SLOW-NEXT: .LBB5_6:
|
; X86-SLOW-NEXT: .LBB5_6:
|
||||||
; X86-SLOW-NEXT: movl {{[0-9]+}}(%esp), %edx
|
; X86-SLOW-NEXT: orl %ebp, %edx
|
||||||
; X86-SLOW-NEXT: movl {{[0-9]+}}(%esp), %eax
|
; X86-SLOW-NEXT: orl %esi, %ebx
|
||||||
; X86-SLOW-NEXT: testl %ebx, %ebx
|
; X86-SLOW-NEXT: movl %edx, %eax
|
||||||
; X86-SLOW-NEXT: je .LBB5_8
|
; X86-SLOW-NEXT: movl %ebx, %edx
|
||||||
; X86-SLOW-NEXT: # %bb.7:
|
|
||||||
; X86-SLOW-NEXT: orl %ebp, %edi
|
|
||||||
; X86-SLOW-NEXT: orl %ecx, %esi
|
|
||||||
; X86-SLOW-NEXT: movl %edi, %edx
|
|
||||||
; X86-SLOW-NEXT: movl %esi, %eax
|
|
||||||
; X86-SLOW-NEXT: .LBB5_8:
|
|
||||||
; X86-SLOW-NEXT: popl %esi
|
; X86-SLOW-NEXT: popl %esi
|
||||||
; X86-SLOW-NEXT: popl %edi
|
; X86-SLOW-NEXT: popl %edi
|
||||||
; X86-SLOW-NEXT: popl %ebx
|
; X86-SLOW-NEXT: popl %ebx
|
||||||
|
|
|
@ -179,46 +179,37 @@ define i64 @var_shift_i64(i64 %x, i64 %y, i64 %z) nounwind {
|
||||||
; X86-FAST-NEXT: pushl %ebx
|
; X86-FAST-NEXT: pushl %ebx
|
||||||
; X86-FAST-NEXT: pushl %edi
|
; X86-FAST-NEXT: pushl %edi
|
||||||
; X86-FAST-NEXT: pushl %esi
|
; X86-FAST-NEXT: pushl %esi
|
||||||
; X86-FAST-NEXT: pushl %eax
|
|
||||||
; X86-FAST-NEXT: movl {{[0-9]+}}(%esp), %eax
|
|
||||||
; X86-FAST-NEXT: movl %eax, (%esp) # 4-byte Spill
|
|
||||||
; X86-FAST-NEXT: movl {{[0-9]+}}(%esp), %edx
|
|
||||||
; X86-FAST-NEXT: movl {{[0-9]+}}(%esp), %eax
|
|
||||||
; X86-FAST-NEXT: movl {{[0-9]+}}(%esp), %esi
|
; X86-FAST-NEXT: movl {{[0-9]+}}(%esp), %esi
|
||||||
; X86-FAST-NEXT: movl {{[0-9]+}}(%esp), %ebx
|
; X86-FAST-NEXT: movl {{[0-9]+}}(%esp), %ebp
|
||||||
; X86-FAST-NEXT: andl $63, %ebx
|
; X86-FAST-NEXT: movl {{[0-9]+}}(%esp), %eax
|
||||||
; X86-FAST-NEXT: movb $64, %cl
|
; X86-FAST-NEXT: movl {{[0-9]+}}(%esp), %edx
|
||||||
; X86-FAST-NEXT: subb %bl, %cl
|
; X86-FAST-NEXT: movb {{[0-9]+}}(%esp), %bl
|
||||||
; X86-FAST-NEXT: movl %eax, %edi
|
; X86-FAST-NEXT: movb %bl, %ch
|
||||||
; X86-FAST-NEXT: shll %cl, %edi
|
; X86-FAST-NEXT: notb %ch
|
||||||
; X86-FAST-NEXT: shldl %cl, %eax, %esi
|
; X86-FAST-NEXT: shldl $1, %eax, %edx
|
||||||
; X86-FAST-NEXT: testb $32, %cl
|
; X86-FAST-NEXT: addl %eax, %eax
|
||||||
|
; X86-FAST-NEXT: movb %ch, %cl
|
||||||
|
; X86-FAST-NEXT: shldl %cl, %eax, %edx
|
||||||
|
; X86-FAST-NEXT: movl %ebp, %edi
|
||||||
|
; X86-FAST-NEXT: movb %bl, %cl
|
||||||
|
; X86-FAST-NEXT: shrl %cl, %edi
|
||||||
|
; X86-FAST-NEXT: shrdl %cl, %ebp, %esi
|
||||||
|
; X86-FAST-NEXT: testb $32, %bl
|
||||||
; X86-FAST-NEXT: je .LBB5_2
|
; X86-FAST-NEXT: je .LBB5_2
|
||||||
; X86-FAST-NEXT: # %bb.1:
|
; X86-FAST-NEXT: # %bb.1:
|
||||||
; X86-FAST-NEXT: movl %edi, %esi
|
; X86-FAST-NEXT: movl %edi, %esi
|
||||||
; X86-FAST-NEXT: xorl %edi, %edi
|
; X86-FAST-NEXT: xorl %edi, %edi
|
||||||
; X86-FAST-NEXT: .LBB5_2:
|
; X86-FAST-NEXT: .LBB5_2:
|
||||||
; X86-FAST-NEXT: movl %edx, %ebp
|
; X86-FAST-NEXT: movb %ch, %cl
|
||||||
; X86-FAST-NEXT: movl %ebx, %ecx
|
; X86-FAST-NEXT: shll %cl, %eax
|
||||||
; X86-FAST-NEXT: shrl %cl, %ebp
|
; X86-FAST-NEXT: testb $32, %ch
|
||||||
; X86-FAST-NEXT: movl {{[0-9]+}}(%esp), %eax
|
|
||||||
; X86-FAST-NEXT: shrdl %cl, %edx, %eax
|
|
||||||
; X86-FAST-NEXT: testb $32, %bl
|
|
||||||
; X86-FAST-NEXT: je .LBB5_4
|
; X86-FAST-NEXT: je .LBB5_4
|
||||||
; X86-FAST-NEXT: # %bb.3:
|
; X86-FAST-NEXT: # %bb.3:
|
||||||
; X86-FAST-NEXT: movl %ebp, %eax
|
; X86-FAST-NEXT: movl %eax, %edx
|
||||||
; X86-FAST-NEXT: xorl %ebp, %ebp
|
; X86-FAST-NEXT: xorl %eax, %eax
|
||||||
; X86-FAST-NEXT: .LBB5_4:
|
; X86-FAST-NEXT: .LBB5_4:
|
||||||
; X86-FAST-NEXT: testl %ebx, %ebx
|
; X86-FAST-NEXT: orl %edi, %edx
|
||||||
; X86-FAST-NEXT: je .LBB5_6
|
; X86-FAST-NEXT: orl %esi, %eax
|
||||||
; X86-FAST-NEXT: # %bb.5:
|
|
||||||
; X86-FAST-NEXT: orl %ebp, %esi
|
|
||||||
; X86-FAST-NEXT: orl %eax, %edi
|
|
||||||
; X86-FAST-NEXT: movl %edi, (%esp) # 4-byte Spill
|
|
||||||
; X86-FAST-NEXT: movl %esi, %edx
|
|
||||||
; X86-FAST-NEXT: .LBB5_6:
|
|
||||||
; X86-FAST-NEXT: movl (%esp), %eax # 4-byte Reload
|
|
||||||
; X86-FAST-NEXT: addl $4, %esp
|
|
||||||
; X86-FAST-NEXT: popl %esi
|
; X86-FAST-NEXT: popl %esi
|
||||||
; X86-FAST-NEXT: popl %edi
|
; X86-FAST-NEXT: popl %edi
|
||||||
; X86-FAST-NEXT: popl %ebx
|
; X86-FAST-NEXT: popl %ebx
|
||||||
|
@ -231,62 +222,55 @@ define i64 @var_shift_i64(i64 %x, i64 %y, i64 %z) nounwind {
|
||||||
; X86-SLOW-NEXT: pushl %ebx
|
; X86-SLOW-NEXT: pushl %ebx
|
||||||
; X86-SLOW-NEXT: pushl %edi
|
; X86-SLOW-NEXT: pushl %edi
|
||||||
; X86-SLOW-NEXT: pushl %esi
|
; X86-SLOW-NEXT: pushl %esi
|
||||||
; X86-SLOW-NEXT: subl $8, %esp
|
; X86-SLOW-NEXT: pushl %eax
|
||||||
; X86-SLOW-NEXT: movl {{[0-9]+}}(%esp), %edi
|
; X86-SLOW-NEXT: movl {{[0-9]+}}(%esp), %ebp
|
||||||
; X86-SLOW-NEXT: movl {{[0-9]+}}(%esp), %esi
|
; X86-SLOW-NEXT: movl {{[0-9]+}}(%esp), %esi
|
||||||
|
; X86-SLOW-NEXT: movb {{[0-9]+}}(%esp), %bl
|
||||||
; X86-SLOW-NEXT: movl {{[0-9]+}}(%esp), %eax
|
; X86-SLOW-NEXT: movl {{[0-9]+}}(%esp), %eax
|
||||||
; X86-SLOW-NEXT: movl {{[0-9]+}}(%esp), %ebx
|
; X86-SLOW-NEXT: movl {{[0-9]+}}(%esp), %edx
|
||||||
; X86-SLOW-NEXT: andl $63, %ebx
|
; X86-SLOW-NEXT: movl %eax, %edi
|
||||||
; X86-SLOW-NEXT: movb $64, %ch
|
; X86-SLOW-NEXT: andl $2147483647, %edi # imm = 0x7FFFFFFF
|
||||||
; X86-SLOW-NEXT: subb %bl, %ch
|
; X86-SLOW-NEXT: movl %ebx, %ecx
|
||||||
; X86-SLOW-NEXT: movb %ch, %cl
|
|
||||||
; X86-SLOW-NEXT: shll %cl, %eax
|
|
||||||
; X86-SLOW-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
|
|
||||||
; X86-SLOW-NEXT: notb %cl
|
|
||||||
; X86-SLOW-NEXT: shrl %esi
|
|
||||||
; X86-SLOW-NEXT: shrl %cl, %esi
|
|
||||||
; X86-SLOW-NEXT: movb %bl, %cl
|
|
||||||
; X86-SLOW-NEXT: shrl %cl, %edi
|
; X86-SLOW-NEXT: shrl %cl, %edi
|
||||||
; X86-SLOW-NEXT: notb %cl
|
; X86-SLOW-NEXT: movl %eax, %ecx
|
||||||
; X86-SLOW-NEXT: movl {{[0-9]+}}(%esp), %eax
|
; X86-SLOW-NEXT: shrl $31, %ecx
|
||||||
; X86-SLOW-NEXT: leal (%eax,%eax), %ebp
|
; X86-SLOW-NEXT: leal (%ecx,%edx,2), %edx
|
||||||
|
; X86-SLOW-NEXT: movb %bl, %ch
|
||||||
|
; X86-SLOW-NEXT: notb %ch
|
||||||
|
; X86-SLOW-NEXT: movb %ch, %cl
|
||||||
|
; X86-SLOW-NEXT: shll %cl, %edx
|
||||||
|
; X86-SLOW-NEXT: movb %bl, %cl
|
||||||
|
; X86-SLOW-NEXT: shrl %cl, %ebp
|
||||||
|
; X86-SLOW-NEXT: movl %ebp, (%esp) # 4-byte Spill
|
||||||
|
; X86-SLOW-NEXT: leal (%esi,%esi), %ebp
|
||||||
|
; X86-SLOW-NEXT: movb %ch, %cl
|
||||||
; X86-SLOW-NEXT: shll %cl, %ebp
|
; X86-SLOW-NEXT: shll %cl, %ebp
|
||||||
; X86-SLOW-NEXT: movb %bl, %cl
|
; X86-SLOW-NEXT: movb %bl, %cl
|
||||||
; X86-SLOW-NEXT: shrl %cl, %eax
|
; X86-SLOW-NEXT: shrl %cl, %esi
|
||||||
; X86-SLOW-NEXT: testb $32, %bl
|
; X86-SLOW-NEXT: testb $32, %bl
|
||||||
; X86-SLOW-NEXT: jne .LBB5_1
|
; X86-SLOW-NEXT: jne .LBB5_1
|
||||||
; X86-SLOW-NEXT: # %bb.2:
|
; X86-SLOW-NEXT: # %bb.2:
|
||||||
; X86-SLOW-NEXT: orl %edi, %ebp
|
; X86-SLOW-NEXT: orl (%esp), %ebp # 4-byte Folded Reload
|
||||||
; X86-SLOW-NEXT: jmp .LBB5_3
|
; X86-SLOW-NEXT: jmp .LBB5_3
|
||||||
; X86-SLOW-NEXT: .LBB5_1:
|
; X86-SLOW-NEXT: .LBB5_1:
|
||||||
; X86-SLOW-NEXT: movl %eax, %ebp
|
; X86-SLOW-NEXT: movl %esi, %ebp
|
||||||
; X86-SLOW-NEXT: xorl %eax, %eax
|
; X86-SLOW-NEXT: xorl %esi, %esi
|
||||||
; X86-SLOW-NEXT: .LBB5_3:
|
; X86-SLOW-NEXT: .LBB5_3:
|
||||||
|
; X86-SLOW-NEXT: addl %eax, %eax
|
||||||
; X86-SLOW-NEXT: movb %ch, %cl
|
; X86-SLOW-NEXT: movb %ch, %cl
|
||||||
; X86-SLOW-NEXT: movl {{[0-9]+}}(%esp), %edi
|
; X86-SLOW-NEXT: shll %cl, %eax
|
||||||
; X86-SLOW-NEXT: shll %cl, %edi
|
|
||||||
; X86-SLOW-NEXT: testb $32, %ch
|
; X86-SLOW-NEXT: testb $32, %ch
|
||||||
; X86-SLOW-NEXT: movl %eax, (%esp) # 4-byte Spill
|
|
||||||
; X86-SLOW-NEXT: jne .LBB5_4
|
; X86-SLOW-NEXT: jne .LBB5_4
|
||||||
; X86-SLOW-NEXT: # %bb.5:
|
; X86-SLOW-NEXT: # %bb.5:
|
||||||
; X86-SLOW-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
|
; X86-SLOW-NEXT: orl %edi, %edx
|
||||||
; X86-SLOW-NEXT: orl %esi, %ecx
|
|
||||||
; X86-SLOW-NEXT: jmp .LBB5_6
|
; X86-SLOW-NEXT: jmp .LBB5_6
|
||||||
; X86-SLOW-NEXT: .LBB5_4:
|
; X86-SLOW-NEXT: .LBB5_4:
|
||||||
; X86-SLOW-NEXT: movl %edi, %ecx
|
; X86-SLOW-NEXT: movl %eax, %edx
|
||||||
; X86-SLOW-NEXT: xorl %edi, %edi
|
; X86-SLOW-NEXT: xorl %eax, %eax
|
||||||
; X86-SLOW-NEXT: .LBB5_6:
|
; X86-SLOW-NEXT: .LBB5_6:
|
||||||
; X86-SLOW-NEXT: movl {{[0-9]+}}(%esp), %edx
|
; X86-SLOW-NEXT: orl %esi, %edx
|
||||||
; X86-SLOW-NEXT: testl %ebx, %ebx
|
; X86-SLOW-NEXT: orl %ebp, %eax
|
||||||
; X86-SLOW-NEXT: movl {{[0-9]+}}(%esp), %eax
|
; X86-SLOW-NEXT: addl $4, %esp
|
||||||
; X86-SLOW-NEXT: je .LBB5_8
|
|
||||||
; X86-SLOW-NEXT: # %bb.7:
|
|
||||||
; X86-SLOW-NEXT: orl %ebp, %edi
|
|
||||||
; X86-SLOW-NEXT: orl (%esp), %ecx # 4-byte Folded Reload
|
|
||||||
; X86-SLOW-NEXT: movl %edi, %eax
|
|
||||||
; X86-SLOW-NEXT: movl %ecx, %edx
|
|
||||||
; X86-SLOW-NEXT: .LBB5_8:
|
|
||||||
; X86-SLOW-NEXT: addl $8, %esp
|
|
||||||
; X86-SLOW-NEXT: popl %esi
|
; X86-SLOW-NEXT: popl %esi
|
||||||
; X86-SLOW-NEXT: popl %edi
|
; X86-SLOW-NEXT: popl %edi
|
||||||
; X86-SLOW-NEXT: popl %ebx
|
; X86-SLOW-NEXT: popl %ebx
|
||||||
|
|
|
@ -45,46 +45,40 @@ define i37 @fshl_i37(i37 %x, i37 %y, i37 %z) nounwind {
|
||||||
; X32-SSE2-NEXT: pushl %ebx
|
; X32-SSE2-NEXT: pushl %ebx
|
||||||
; X32-SSE2-NEXT: pushl %edi
|
; X32-SSE2-NEXT: pushl %edi
|
||||||
; X32-SSE2-NEXT: pushl %esi
|
; X32-SSE2-NEXT: pushl %esi
|
||||||
; X32-SSE2-NEXT: pushl %eax
|
|
||||||
; X32-SSE2-NEXT: movl {{[0-9]+}}(%esp), %edi
|
|
||||||
; X32-SSE2-NEXT: movl {{[0-9]+}}(%esp), %esi
|
; X32-SSE2-NEXT: movl {{[0-9]+}}(%esp), %esi
|
||||||
; X32-SSE2-NEXT: andl $31, %esi
|
; X32-SSE2-NEXT: movl {{[0-9]+}}(%esp), %ebx
|
||||||
; X32-SSE2-NEXT: movl {{[0-9]+}}(%esp), %eax
|
; X32-SSE2-NEXT: movl {{[0-9]+}}(%esp), %edi
|
||||||
; X32-SSE2-NEXT: andl $31, %eax
|
; X32-SSE2-NEXT: shldl $27, %ebx, %edi
|
||||||
; X32-SSE2-NEXT: movl {{[0-9]+}}(%esp), %ebp
|
; X32-SSE2-NEXT: shll $27, %ebx
|
||||||
|
; X32-SSE2-NEXT: shrdl $1, %edi, %ebx
|
||||||
|
; X32-SSE2-NEXT: shrl %edi
|
||||||
; X32-SSE2-NEXT: pushl $0
|
; X32-SSE2-NEXT: pushl $0
|
||||||
; X32-SSE2-NEXT: pushl $37
|
; X32-SSE2-NEXT: pushl $37
|
||||||
; X32-SSE2-NEXT: pushl %eax
|
; X32-SSE2-NEXT: pushl {{[0-9]+}}(%esp)
|
||||||
; X32-SSE2-NEXT: pushl {{[0-9]+}}(%esp)
|
; X32-SSE2-NEXT: pushl {{[0-9]+}}(%esp)
|
||||||
; X32-SSE2-NEXT: calll __umoddi3
|
; X32-SSE2-NEXT: calll __umoddi3
|
||||||
; X32-SSE2-NEXT: addl $16, %esp
|
; X32-SSE2-NEXT: addl $16, %esp
|
||||||
; X32-SSE2-NEXT: movl %eax, %ebx
|
; X32-SSE2-NEXT: movl %eax, %edx
|
||||||
; X32-SSE2-NEXT: movl %edx, (%esp) # 4-byte Spill
|
; X32-SSE2-NEXT: movl %edx, %ecx
|
||||||
; X32-SSE2-NEXT: movl %ebp, %edx
|
; X32-SSE2-NEXT: notb %cl
|
||||||
; X32-SSE2-NEXT: movl %ebx, %ecx
|
; X32-SSE2-NEXT: shrdl %cl, %edi, %ebx
|
||||||
; X32-SSE2-NEXT: shll %cl, %ebp
|
; X32-SSE2-NEXT: shrl %cl, %edi
|
||||||
; X32-SSE2-NEXT: movl {{[0-9]+}}(%esp), %eax
|
; X32-SSE2-NEXT: xorl %eax, %eax
|
||||||
; X32-SSE2-NEXT: shldl %cl, %edx, %eax
|
|
||||||
; X32-SSE2-NEXT: xorl %ecx, %ecx
|
|
||||||
; X32-SSE2-NEXT: testb $32, %bl
|
|
||||||
; X32-SSE2-NEXT: cmovnel %ebp, %eax
|
|
||||||
; X32-SSE2-NEXT: cmovnel %ecx, %ebp
|
|
||||||
; X32-SSE2-NEXT: xorl %edx, %edx
|
|
||||||
; X32-SSE2-NEXT: movb $37, %cl
|
|
||||||
; X32-SSE2-NEXT: subb %bl, %cl
|
|
||||||
; X32-SSE2-NEXT: shrdl %cl, %esi, %edi
|
|
||||||
; X32-SSE2-NEXT: shrl %cl, %esi
|
|
||||||
; X32-SSE2-NEXT: testb $32, %cl
|
; X32-SSE2-NEXT: testb $32, %cl
|
||||||
; X32-SSE2-NEXT: cmovnel %esi, %edi
|
; X32-SSE2-NEXT: cmovnel %edi, %ebx
|
||||||
; X32-SSE2-NEXT: cmovnel %edx, %esi
|
; X32-SSE2-NEXT: cmovnel %eax, %edi
|
||||||
; X32-SSE2-NEXT: orl %eax, %esi
|
; X32-SSE2-NEXT: movl {{[0-9]+}}(%esp), %eax
|
||||||
; X32-SSE2-NEXT: orl %ebp, %edi
|
; X32-SSE2-NEXT: movl %edx, %ecx
|
||||||
; X32-SSE2-NEXT: orl %ebx, (%esp) # 4-byte Folded Spill
|
; X32-SSE2-NEXT: shll %cl, %eax
|
||||||
; X32-SSE2-NEXT: cmovel {{[0-9]+}}(%esp), %edi
|
; X32-SSE2-NEXT: movl {{[0-9]+}}(%esp), %ebp
|
||||||
; X32-SSE2-NEXT: cmovel {{[0-9]+}}(%esp), %esi
|
; X32-SSE2-NEXT: shldl %cl, %ebp, %esi
|
||||||
; X32-SSE2-NEXT: movl %edi, %eax
|
; X32-SSE2-NEXT: testb $32, %dl
|
||||||
|
; X32-SSE2-NEXT: cmovnel %eax, %esi
|
||||||
|
; X32-SSE2-NEXT: movl $0, %ecx
|
||||||
|
; X32-SSE2-NEXT: cmovnel %ecx, %eax
|
||||||
|
; X32-SSE2-NEXT: orl %ebx, %eax
|
||||||
|
; X32-SSE2-NEXT: orl %edi, %esi
|
||||||
; X32-SSE2-NEXT: movl %esi, %edx
|
; X32-SSE2-NEXT: movl %esi, %edx
|
||||||
; X32-SSE2-NEXT: addl $4, %esp
|
|
||||||
; X32-SSE2-NEXT: popl %esi
|
; X32-SSE2-NEXT: popl %esi
|
||||||
; X32-SSE2-NEXT: popl %edi
|
; X32-SSE2-NEXT: popl %edi
|
||||||
; X32-SSE2-NEXT: popl %ebx
|
; X32-SSE2-NEXT: popl %ebx
|
||||||
|
@ -93,28 +87,18 @@ define i37 @fshl_i37(i37 %x, i37 %y, i37 %z) nounwind {
|
||||||
;
|
;
|
||||||
; X64-AVX2-LABEL: fshl_i37:
|
; X64-AVX2-LABEL: fshl_i37:
|
||||||
; X64-AVX2: # %bb.0:
|
; X64-AVX2: # %bb.0:
|
||||||
; X64-AVX2-NEXT: movq %rdx, %r8
|
; X64-AVX2-NEXT: movq %rdx, %rcx
|
||||||
; X64-AVX2-NEXT: movabsq $137438953471, %rax # imm = 0x1FFFFFFFFF
|
; X64-AVX2-NEXT: movabsq $-2492803253203993461, %rdx # imm = 0xDD67C8A60DD67C8B
|
||||||
; X64-AVX2-NEXT: andq %rax, %rsi
|
; X64-AVX2-NEXT: movq %rcx, %rax
|
||||||
; X64-AVX2-NEXT: andq %rax, %r8
|
; X64-AVX2-NEXT: mulq %rdx
|
||||||
; X64-AVX2-NEXT: movabsq $-2492803253203993461, %rcx # imm = 0xDD67C8A60DD67C8B
|
|
||||||
; X64-AVX2-NEXT: movq %r8, %rax
|
|
||||||
; X64-AVX2-NEXT: mulq %rcx
|
|
||||||
; X64-AVX2-NEXT: shrq $5, %rdx
|
; X64-AVX2-NEXT: shrq $5, %rdx
|
||||||
; X64-AVX2-NEXT: leaq (%rdx,%rdx,8), %rax
|
; X64-AVX2-NEXT: leal (%rdx,%rdx,8), %eax
|
||||||
; X64-AVX2-NEXT: leaq (%rdx,%rax,4), %rax
|
; X64-AVX2-NEXT: leal (%rdx,%rax,4), %eax
|
||||||
; X64-AVX2-NEXT: subq %rax, %r8
|
; X64-AVX2-NEXT: subl %eax, %ecx
|
||||||
|
; X64-AVX2-NEXT: shlq $27, %rsi
|
||||||
|
; X64-AVX2-NEXT: # kill: def $cl killed $cl killed $rcx
|
||||||
|
; X64-AVX2-NEXT: shldq %cl, %rsi, %rdi
|
||||||
; X64-AVX2-NEXT: movq %rdi, %rax
|
; X64-AVX2-NEXT: movq %rdi, %rax
|
||||||
; X64-AVX2-NEXT: movl %r8d, %ecx
|
|
||||||
; X64-AVX2-NEXT: shlq %cl, %rax
|
|
||||||
; X64-AVX2-NEXT: movl $37, %ecx
|
|
||||||
; X64-AVX2-NEXT: subl %r8d, %ecx
|
|
||||||
; X64-AVX2-NEXT: # kill: def $cl killed $cl killed $ecx
|
|
||||||
; X64-AVX2-NEXT: shrq %cl, %rsi
|
|
||||||
; X64-AVX2-NEXT: orq %rax, %rsi
|
|
||||||
; X64-AVX2-NEXT: testq %r8, %r8
|
|
||||||
; X64-AVX2-NEXT: cmoveq %rdi, %rsi
|
|
||||||
; X64-AVX2-NEXT: movq %rsi, %rax
|
|
||||||
; X64-AVX2-NEXT: retq
|
; X64-AVX2-NEXT: retq
|
||||||
%f = call i37 @llvm.fshl.i37(i37 %x, i37 %y, i37 %z)
|
%f = call i37 @llvm.fshl.i37(i37 %x, i37 %y, i37 %z)
|
||||||
ret i37 %f
|
ret i37 %f
|
||||||
|
@ -235,41 +219,41 @@ define i37 @fshr_i37(i37 %x, i37 %y, i37 %z) nounwind {
|
||||||
; X32-SSE2-NEXT: pushl %ebx
|
; X32-SSE2-NEXT: pushl %ebx
|
||||||
; X32-SSE2-NEXT: pushl %edi
|
; X32-SSE2-NEXT: pushl %edi
|
||||||
; X32-SSE2-NEXT: pushl %esi
|
; X32-SSE2-NEXT: pushl %esi
|
||||||
; X32-SSE2-NEXT: movl {{[0-9]+}}(%esp), %esi
|
|
||||||
; X32-SSE2-NEXT: andl $31, %esi
|
|
||||||
; X32-SSE2-NEXT: movl {{[0-9]+}}(%esp), %eax
|
|
||||||
; X32-SSE2-NEXT: andl $31, %eax
|
|
||||||
; X32-SSE2-NEXT: movl {{[0-9]+}}(%esp), %ebp
|
; X32-SSE2-NEXT: movl {{[0-9]+}}(%esp), %ebp
|
||||||
|
; X32-SSE2-NEXT: movl {{[0-9]+}}(%esp), %ebx
|
||||||
; X32-SSE2-NEXT: movl {{[0-9]+}}(%esp), %edi
|
; X32-SSE2-NEXT: movl {{[0-9]+}}(%esp), %edi
|
||||||
|
; X32-SSE2-NEXT: movl {{[0-9]+}}(%esp), %esi
|
||||||
|
; X32-SSE2-NEXT: shldl $1, %edi, %esi
|
||||||
|
; X32-SSE2-NEXT: addl %edi, %edi
|
||||||
; X32-SSE2-NEXT: pushl $0
|
; X32-SSE2-NEXT: pushl $0
|
||||||
; X32-SSE2-NEXT: pushl $37
|
; X32-SSE2-NEXT: pushl $37
|
||||||
; X32-SSE2-NEXT: pushl %eax
|
; X32-SSE2-NEXT: pushl {{[0-9]+}}(%esp)
|
||||||
; X32-SSE2-NEXT: pushl {{[0-9]+}}(%esp)
|
; X32-SSE2-NEXT: pushl {{[0-9]+}}(%esp)
|
||||||
; X32-SSE2-NEXT: calll __umoddi3
|
; X32-SSE2-NEXT: calll __umoddi3
|
||||||
; X32-SSE2-NEXT: addl $16, %esp
|
; X32-SSE2-NEXT: addl $16, %esp
|
||||||
; X32-SSE2-NEXT: movl %eax, %ebx
|
; X32-SSE2-NEXT: addb $27, %al
|
||||||
; X32-SSE2-NEXT: movb $37, %cl
|
; X32-SSE2-NEXT: movl %eax, %edx
|
||||||
; X32-SSE2-NEXT: subb %bl, %cl
|
; X32-SSE2-NEXT: notb %dl
|
||||||
; X32-SSE2-NEXT: movl %ebp, %eax
|
; X32-SSE2-NEXT: movl %edx, %ecx
|
||||||
; X32-SSE2-NEXT: shll %cl, %ebp
|
; X32-SSE2-NEXT: shldl %cl, %edi, %esi
|
||||||
; X32-SSE2-NEXT: shldl %cl, %eax, %edi
|
; X32-SSE2-NEXT: shldl $27, %ebp, %ebx
|
||||||
|
; X32-SSE2-NEXT: shll $27, %ebp
|
||||||
|
; X32-SSE2-NEXT: movl %eax, %ecx
|
||||||
|
; X32-SSE2-NEXT: shrdl %cl, %ebx, %ebp
|
||||||
|
; X32-SSE2-NEXT: shrl %cl, %ebx
|
||||||
|
; X32-SSE2-NEXT: xorl %ecx, %ecx
|
||||||
|
; X32-SSE2-NEXT: testb $32, %al
|
||||||
|
; X32-SSE2-NEXT: cmovnel %ebx, %ebp
|
||||||
|
; X32-SSE2-NEXT: cmovnel %ecx, %ebx
|
||||||
; X32-SSE2-NEXT: xorl %eax, %eax
|
; X32-SSE2-NEXT: xorl %eax, %eax
|
||||||
; X32-SSE2-NEXT: testb $32, %cl
|
; X32-SSE2-NEXT: movl %edx, %ecx
|
||||||
; X32-SSE2-NEXT: cmovnel %ebp, %edi
|
; X32-SSE2-NEXT: shll %cl, %edi
|
||||||
; X32-SSE2-NEXT: cmovnel %eax, %ebp
|
; X32-SSE2-NEXT: testb $32, %dl
|
||||||
; X32-SSE2-NEXT: movl {{[0-9]+}}(%esp), %eax
|
; X32-SSE2-NEXT: cmovnel %edi, %esi
|
||||||
; X32-SSE2-NEXT: movl %ebx, %ecx
|
; X32-SSE2-NEXT: cmovnel %eax, %edi
|
||||||
; X32-SSE2-NEXT: shrdl %cl, %esi, %eax
|
; X32-SSE2-NEXT: orl %ebp, %edi
|
||||||
; X32-SSE2-NEXT: shrl %cl, %esi
|
; X32-SSE2-NEXT: orl %ebx, %esi
|
||||||
; X32-SSE2-NEXT: testb $32, %bl
|
; X32-SSE2-NEXT: movl %edi, %eax
|
||||||
; X32-SSE2-NEXT: cmovnel %esi, %eax
|
|
||||||
; X32-SSE2-NEXT: movl $0, %ecx
|
|
||||||
; X32-SSE2-NEXT: cmovnel %ecx, %esi
|
|
||||||
; X32-SSE2-NEXT: orl %edi, %esi
|
|
||||||
; X32-SSE2-NEXT: orl %ebp, %eax
|
|
||||||
; X32-SSE2-NEXT: orl %ebx, %edx
|
|
||||||
; X32-SSE2-NEXT: cmovel {{[0-9]+}}(%esp), %eax
|
|
||||||
; X32-SSE2-NEXT: cmovel {{[0-9]+}}(%esp), %esi
|
|
||||||
; X32-SSE2-NEXT: movl %esi, %edx
|
; X32-SSE2-NEXT: movl %esi, %edx
|
||||||
; X32-SSE2-NEXT: popl %esi
|
; X32-SSE2-NEXT: popl %esi
|
||||||
; X32-SSE2-NEXT: popl %edi
|
; X32-SSE2-NEXT: popl %edi
|
||||||
|
@ -279,28 +263,19 @@ define i37 @fshr_i37(i37 %x, i37 %y, i37 %z) nounwind {
|
||||||
;
|
;
|
||||||
; X64-AVX2-LABEL: fshr_i37:
|
; X64-AVX2-LABEL: fshr_i37:
|
||||||
; X64-AVX2: # %bb.0:
|
; X64-AVX2: # %bb.0:
|
||||||
; X64-AVX2-NEXT: movq %rdx, %r8
|
; X64-AVX2-NEXT: movq %rdx, %rcx
|
||||||
; X64-AVX2-NEXT: movabsq $137438953471, %rax # imm = 0x1FFFFFFFFF
|
; X64-AVX2-NEXT: movabsq $-2492803253203993461, %rdx # imm = 0xDD67C8A60DD67C8B
|
||||||
; X64-AVX2-NEXT: movq %rsi, %r9
|
; X64-AVX2-NEXT: movq %rcx, %rax
|
||||||
; X64-AVX2-NEXT: andq %rax, %r9
|
; X64-AVX2-NEXT: mulq %rdx
|
||||||
; X64-AVX2-NEXT: andq %rax, %r8
|
|
||||||
; X64-AVX2-NEXT: movabsq $-2492803253203993461, %rcx # imm = 0xDD67C8A60DD67C8B
|
|
||||||
; X64-AVX2-NEXT: movq %r8, %rax
|
|
||||||
; X64-AVX2-NEXT: mulq %rcx
|
|
||||||
; X64-AVX2-NEXT: shrq $5, %rdx
|
; X64-AVX2-NEXT: shrq $5, %rdx
|
||||||
; X64-AVX2-NEXT: leaq (%rdx,%rdx,8), %rax
|
; X64-AVX2-NEXT: leal (%rdx,%rdx,8), %eax
|
||||||
; X64-AVX2-NEXT: leaq (%rdx,%rax,4), %rax
|
; X64-AVX2-NEXT: leal (%rdx,%rax,4), %eax
|
||||||
; X64-AVX2-NEXT: subq %rax, %r8
|
; X64-AVX2-NEXT: subl %eax, %ecx
|
||||||
; X64-AVX2-NEXT: movl %r8d, %ecx
|
; X64-AVX2-NEXT: addl $27, %ecx
|
||||||
; X64-AVX2-NEXT: shrq %cl, %r9
|
; X64-AVX2-NEXT: shlq $27, %rsi
|
||||||
; X64-AVX2-NEXT: movl $37, %ecx
|
; X64-AVX2-NEXT: # kill: def $cl killed $cl killed $rcx
|
||||||
; X64-AVX2-NEXT: subl %r8d, %ecx
|
; X64-AVX2-NEXT: shrdq %cl, %rdi, %rsi
|
||||||
; X64-AVX2-NEXT: # kill: def $cl killed $cl killed $ecx
|
; X64-AVX2-NEXT: movq %rsi, %rax
|
||||||
; X64-AVX2-NEXT: shlq %cl, %rdi
|
|
||||||
; X64-AVX2-NEXT: orq %r9, %rdi
|
|
||||||
; X64-AVX2-NEXT: testq %r8, %r8
|
|
||||||
; X64-AVX2-NEXT: cmoveq %rsi, %rdi
|
|
||||||
; X64-AVX2-NEXT: movq %rdi, %rax
|
|
||||||
; X64-AVX2-NEXT: retq
|
; X64-AVX2-NEXT: retq
|
||||||
%f = call i37 @llvm.fshr.i37(i37 %x, i37 %y, i37 %z)
|
%f = call i37 @llvm.fshr.i37(i37 %x, i37 %y, i37 %z)
|
||||||
ret i37 %f
|
ret i37 %f
|
||||||
|
|
File diff suppressed because it is too large
Load Diff
File diff suppressed because it is too large
Load Diff
File diff suppressed because it is too large
Load Diff
|
@ -1256,16 +1256,24 @@ define <2 x i64> @constant_funnnel_v2i64(<2 x i64> %x) nounwind {
|
||||||
;
|
;
|
||||||
; X32-SSE-LABEL: constant_funnnel_v2i64:
|
; X32-SSE-LABEL: constant_funnnel_v2i64:
|
||||||
; X32-SSE: # %bb.0:
|
; X32-SSE: # %bb.0:
|
||||||
|
; X32-SSE-NEXT: movdqa {{.*#+}} xmm1 = [63,0,63,0]
|
||||||
|
; X32-SSE-NEXT: movdqa {{.*#+}} xmm2 = <4,u,14,u>
|
||||||
|
; X32-SSE-NEXT: pxor %xmm3, %xmm3
|
||||||
|
; X32-SSE-NEXT: psubq %xmm2, %xmm3
|
||||||
|
; X32-SSE-NEXT: pand %xmm1, %xmm2
|
||||||
|
; X32-SSE-NEXT: movdqa %xmm0, %xmm4
|
||||||
|
; X32-SSE-NEXT: psllq %xmm2, %xmm4
|
||||||
|
; X32-SSE-NEXT: pshufd {{.*#+}} xmm2 = xmm2[2,3,2,3]
|
||||||
|
; X32-SSE-NEXT: movdqa %xmm0, %xmm5
|
||||||
|
; X32-SSE-NEXT: psllq %xmm2, %xmm5
|
||||||
|
; X32-SSE-NEXT: movsd {{.*#+}} xmm5 = xmm4[0],xmm5[1]
|
||||||
|
; X32-SSE-NEXT: pand %xmm1, %xmm3
|
||||||
; X32-SSE-NEXT: movdqa %xmm0, %xmm1
|
; X32-SSE-NEXT: movdqa %xmm0, %xmm1
|
||||||
; X32-SSE-NEXT: psrlq $60, %xmm1
|
; X32-SSE-NEXT: psrlq %xmm3, %xmm1
|
||||||
; X32-SSE-NEXT: movdqa %xmm0, %xmm2
|
; X32-SSE-NEXT: pshufd {{.*#+}} xmm2 = xmm3[2,3,2,3]
|
||||||
; X32-SSE-NEXT: psrlq $50, %xmm2
|
; X32-SSE-NEXT: psrlq %xmm2, %xmm0
|
||||||
; X32-SSE-NEXT: movsd {{.*#+}} xmm2 = xmm1[0],xmm2[1]
|
|
||||||
; X32-SSE-NEXT: movdqa %xmm0, %xmm1
|
|
||||||
; X32-SSE-NEXT: psllq $4, %xmm1
|
|
||||||
; X32-SSE-NEXT: psllq $14, %xmm0
|
|
||||||
; X32-SSE-NEXT: movsd {{.*#+}} xmm0 = xmm1[0],xmm0[1]
|
; X32-SSE-NEXT: movsd {{.*#+}} xmm0 = xmm1[0],xmm0[1]
|
||||||
; X32-SSE-NEXT: orpd %xmm2, %xmm0
|
; X32-SSE-NEXT: orpd %xmm5, %xmm0
|
||||||
; X32-SSE-NEXT: retl
|
; X32-SSE-NEXT: retl
|
||||||
%res = call <2 x i64> @llvm.fshl.v2i64(<2 x i64> %x, <2 x i64> %x, <2 x i64> <i64 4, i64 14>)
|
%res = call <2 x i64> @llvm.fshl.v2i64(<2 x i64> %x, <2 x i64> %x, <2 x i64> <i64 4, i64 14>)
|
||||||
ret <2 x i64> %res
|
ret <2 x i64> %res
|
||||||
|
@ -1657,8 +1665,10 @@ define <2 x i64> @splatconstant_funnnel_v2i64(<2 x i64> %x) nounwind {
|
||||||
; X32-SSE: # %bb.0:
|
; X32-SSE: # %bb.0:
|
||||||
; X32-SSE-NEXT: movdqa %xmm0, %xmm1
|
; X32-SSE-NEXT: movdqa %xmm0, %xmm1
|
||||||
; X32-SSE-NEXT: psrlq $50, %xmm1
|
; X32-SSE-NEXT: psrlq $50, %xmm1
|
||||||
|
; X32-SSE-NEXT: movsd {{.*#+}} xmm1 = xmm1[0,1]
|
||||||
; X32-SSE-NEXT: psllq $14, %xmm0
|
; X32-SSE-NEXT: psllq $14, %xmm0
|
||||||
; X32-SSE-NEXT: por %xmm1, %xmm0
|
; X32-SSE-NEXT: movsd {{.*#+}} xmm0 = xmm0[0,1]
|
||||||
|
; X32-SSE-NEXT: orpd %xmm1, %xmm0
|
||||||
; X32-SSE-NEXT: retl
|
; X32-SSE-NEXT: retl
|
||||||
%res = call <2 x i64> @llvm.fshl.v2i64(<2 x i64> %x, <2 x i64> %x, <2 x i64> <i64 14, i64 14>)
|
%res = call <2 x i64> @llvm.fshl.v2i64(<2 x i64> %x, <2 x i64> %x, <2 x i64> <i64 14, i64 14>)
|
||||||
ret <2 x i64> %res
|
ret <2 x i64> %res
|
||||||
|
|
File diff suppressed because it is too large
Load Diff
File diff suppressed because it is too large
Load Diff
File diff suppressed because it is too large
Load Diff
|
@ -80,7 +80,7 @@ define <2 x i64> @var_funnnel_v2i64(<2 x i64> %x, <2 x i64> %amt) nounwind {
|
||||||
; AVX1-NEXT: vpshufd {{.*#+}} xmm1 = xmm1[2,3,2,3]
|
; AVX1-NEXT: vpshufd {{.*#+}} xmm1 = xmm1[2,3,2,3]
|
||||||
; AVX1-NEXT: vpsllq %xmm1, %xmm0, %xmm0
|
; AVX1-NEXT: vpsllq %xmm1, %xmm0, %xmm0
|
||||||
; AVX1-NEXT: vpblendw {{.*#+}} xmm0 = xmm2[0,1,2,3],xmm0[4,5,6,7]
|
; AVX1-NEXT: vpblendw {{.*#+}} xmm0 = xmm2[0,1,2,3],xmm0[4,5,6,7]
|
||||||
; AVX1-NEXT: vpor %xmm3, %xmm0, %xmm0
|
; AVX1-NEXT: vpor %xmm0, %xmm3, %xmm0
|
||||||
; AVX1-NEXT: retq
|
; AVX1-NEXT: retq
|
||||||
;
|
;
|
||||||
; AVX2-LABEL: var_funnnel_v2i64:
|
; AVX2-LABEL: var_funnnel_v2i64:
|
||||||
|
@ -92,7 +92,7 @@ define <2 x i64> @var_funnnel_v2i64(<2 x i64> %x, <2 x i64> %amt) nounwind {
|
||||||
; AVX2-NEXT: vpsubq %xmm1, %xmm4, %xmm1
|
; AVX2-NEXT: vpsubq %xmm1, %xmm4, %xmm1
|
||||||
; AVX2-NEXT: vpand %xmm2, %xmm1, %xmm1
|
; AVX2-NEXT: vpand %xmm2, %xmm1, %xmm1
|
||||||
; AVX2-NEXT: vpsllvq %xmm1, %xmm0, %xmm0
|
; AVX2-NEXT: vpsllvq %xmm1, %xmm0, %xmm0
|
||||||
; AVX2-NEXT: vpor %xmm3, %xmm0, %xmm0
|
; AVX2-NEXT: vpor %xmm0, %xmm3, %xmm0
|
||||||
; AVX2-NEXT: retq
|
; AVX2-NEXT: retq
|
||||||
;
|
;
|
||||||
; AVX512F-LABEL: var_funnnel_v2i64:
|
; AVX512F-LABEL: var_funnnel_v2i64:
|
||||||
|
@ -592,7 +592,7 @@ define <16 x i8> @var_funnnel_v16i8(<16 x i8> %x, <16 x i8> %amt) nounwind {
|
||||||
; AVX512F-NEXT: vpand %xmm2, %xmm1, %xmm1
|
; AVX512F-NEXT: vpand %xmm2, %xmm1, %xmm1
|
||||||
; AVX512F-NEXT: vpmovzxbd {{.*#+}} zmm1 = xmm1[0],zero,zero,zero,xmm1[1],zero,zero,zero,xmm1[2],zero,zero,zero,xmm1[3],zero,zero,zero,xmm1[4],zero,zero,zero,xmm1[5],zero,zero,zero,xmm1[6],zero,zero,zero,xmm1[7],zero,zero,zero,xmm1[8],zero,zero,zero,xmm1[9],zero,zero,zero,xmm1[10],zero,zero,zero,xmm1[11],zero,zero,zero,xmm1[12],zero,zero,zero,xmm1[13],zero,zero,zero,xmm1[14],zero,zero,zero,xmm1[15],zero,zero,zero
|
; AVX512F-NEXT: vpmovzxbd {{.*#+}} zmm1 = xmm1[0],zero,zero,zero,xmm1[1],zero,zero,zero,xmm1[2],zero,zero,zero,xmm1[3],zero,zero,zero,xmm1[4],zero,zero,zero,xmm1[5],zero,zero,zero,xmm1[6],zero,zero,zero,xmm1[7],zero,zero,zero,xmm1[8],zero,zero,zero,xmm1[9],zero,zero,zero,xmm1[10],zero,zero,zero,xmm1[11],zero,zero,zero,xmm1[12],zero,zero,zero,xmm1[13],zero,zero,zero,xmm1[14],zero,zero,zero,xmm1[15],zero,zero,zero
|
||||||
; AVX512F-NEXT: vpsllvd %zmm1, %zmm0, %zmm0
|
; AVX512F-NEXT: vpsllvd %zmm1, %zmm0, %zmm0
|
||||||
; AVX512F-NEXT: vpord %zmm3, %zmm0, %zmm0
|
; AVX512F-NEXT: vpord %zmm0, %zmm3, %zmm0
|
||||||
; AVX512F-NEXT: vpmovdb %zmm0, %xmm0
|
; AVX512F-NEXT: vpmovdb %zmm0, %xmm0
|
||||||
; AVX512F-NEXT: vzeroupper
|
; AVX512F-NEXT: vzeroupper
|
||||||
; AVX512F-NEXT: retq
|
; AVX512F-NEXT: retq
|
||||||
|
@ -609,7 +609,7 @@ define <16 x i8> @var_funnnel_v16i8(<16 x i8> %x, <16 x i8> %amt) nounwind {
|
||||||
; AVX512VL-NEXT: vpand %xmm2, %xmm1, %xmm1
|
; AVX512VL-NEXT: vpand %xmm2, %xmm1, %xmm1
|
||||||
; AVX512VL-NEXT: vpmovzxbd {{.*#+}} zmm1 = xmm1[0],zero,zero,zero,xmm1[1],zero,zero,zero,xmm1[2],zero,zero,zero,xmm1[3],zero,zero,zero,xmm1[4],zero,zero,zero,xmm1[5],zero,zero,zero,xmm1[6],zero,zero,zero,xmm1[7],zero,zero,zero,xmm1[8],zero,zero,zero,xmm1[9],zero,zero,zero,xmm1[10],zero,zero,zero,xmm1[11],zero,zero,zero,xmm1[12],zero,zero,zero,xmm1[13],zero,zero,zero,xmm1[14],zero,zero,zero,xmm1[15],zero,zero,zero
|
; AVX512VL-NEXT: vpmovzxbd {{.*#+}} zmm1 = xmm1[0],zero,zero,zero,xmm1[1],zero,zero,zero,xmm1[2],zero,zero,zero,xmm1[3],zero,zero,zero,xmm1[4],zero,zero,zero,xmm1[5],zero,zero,zero,xmm1[6],zero,zero,zero,xmm1[7],zero,zero,zero,xmm1[8],zero,zero,zero,xmm1[9],zero,zero,zero,xmm1[10],zero,zero,zero,xmm1[11],zero,zero,zero,xmm1[12],zero,zero,zero,xmm1[13],zero,zero,zero,xmm1[14],zero,zero,zero,xmm1[15],zero,zero,zero
|
||||||
; AVX512VL-NEXT: vpsllvd %zmm1, %zmm0, %zmm0
|
; AVX512VL-NEXT: vpsllvd %zmm1, %zmm0, %zmm0
|
||||||
; AVX512VL-NEXT: vpord %zmm3, %zmm0, %zmm0
|
; AVX512VL-NEXT: vpord %zmm0, %zmm3, %zmm0
|
||||||
; AVX512VL-NEXT: vpmovdb %zmm0, %xmm0
|
; AVX512VL-NEXT: vpmovdb %zmm0, %xmm0
|
||||||
; AVX512VL-NEXT: vzeroupper
|
; AVX512VL-NEXT: vzeroupper
|
||||||
; AVX512VL-NEXT: retq
|
; AVX512VL-NEXT: retq
|
||||||
|
@ -626,7 +626,7 @@ define <16 x i8> @var_funnnel_v16i8(<16 x i8> %x, <16 x i8> %amt) nounwind {
|
||||||
; AVX512BW-NEXT: vpand %xmm2, %xmm1, %xmm1
|
; AVX512BW-NEXT: vpand %xmm2, %xmm1, %xmm1
|
||||||
; AVX512BW-NEXT: vpmovzxbw {{.*#+}} ymm1 = xmm1[0],zero,xmm1[1],zero,xmm1[2],zero,xmm1[3],zero,xmm1[4],zero,xmm1[5],zero,xmm1[6],zero,xmm1[7],zero,xmm1[8],zero,xmm1[9],zero,xmm1[10],zero,xmm1[11],zero,xmm1[12],zero,xmm1[13],zero,xmm1[14],zero,xmm1[15],zero
|
; AVX512BW-NEXT: vpmovzxbw {{.*#+}} ymm1 = xmm1[0],zero,xmm1[1],zero,xmm1[2],zero,xmm1[3],zero,xmm1[4],zero,xmm1[5],zero,xmm1[6],zero,xmm1[7],zero,xmm1[8],zero,xmm1[9],zero,xmm1[10],zero,xmm1[11],zero,xmm1[12],zero,xmm1[13],zero,xmm1[14],zero,xmm1[15],zero
|
||||||
; AVX512BW-NEXT: vpsllvw %zmm1, %zmm0, %zmm0
|
; AVX512BW-NEXT: vpsllvw %zmm1, %zmm0, %zmm0
|
||||||
; AVX512BW-NEXT: vpor %ymm3, %ymm0, %ymm0
|
; AVX512BW-NEXT: vpor %ymm0, %ymm3, %ymm0
|
||||||
; AVX512BW-NEXT: vpmovwb %zmm0, %ymm0
|
; AVX512BW-NEXT: vpmovwb %zmm0, %ymm0
|
||||||
; AVX512BW-NEXT: # kill: def $xmm0 killed $xmm0 killed $ymm0
|
; AVX512BW-NEXT: # kill: def $xmm0 killed $xmm0 killed $ymm0
|
||||||
; AVX512BW-NEXT: vzeroupper
|
; AVX512BW-NEXT: vzeroupper
|
||||||
|
@ -644,7 +644,7 @@ define <16 x i8> @var_funnnel_v16i8(<16 x i8> %x, <16 x i8> %amt) nounwind {
|
||||||
; AVX512VLBW-NEXT: vpand %xmm2, %xmm1, %xmm1
|
; AVX512VLBW-NEXT: vpand %xmm2, %xmm1, %xmm1
|
||||||
; AVX512VLBW-NEXT: vpmovzxbw {{.*#+}} ymm1 = xmm1[0],zero,xmm1[1],zero,xmm1[2],zero,xmm1[3],zero,xmm1[4],zero,xmm1[5],zero,xmm1[6],zero,xmm1[7],zero,xmm1[8],zero,xmm1[9],zero,xmm1[10],zero,xmm1[11],zero,xmm1[12],zero,xmm1[13],zero,xmm1[14],zero,xmm1[15],zero
|
; AVX512VLBW-NEXT: vpmovzxbw {{.*#+}} ymm1 = xmm1[0],zero,xmm1[1],zero,xmm1[2],zero,xmm1[3],zero,xmm1[4],zero,xmm1[5],zero,xmm1[6],zero,xmm1[7],zero,xmm1[8],zero,xmm1[9],zero,xmm1[10],zero,xmm1[11],zero,xmm1[12],zero,xmm1[13],zero,xmm1[14],zero,xmm1[15],zero
|
||||||
; AVX512VLBW-NEXT: vpsllvw %ymm1, %ymm0, %ymm0
|
; AVX512VLBW-NEXT: vpsllvw %ymm1, %ymm0, %ymm0
|
||||||
; AVX512VLBW-NEXT: vpor %ymm3, %ymm0, %ymm0
|
; AVX512VLBW-NEXT: vpor %ymm0, %ymm3, %ymm0
|
||||||
; AVX512VLBW-NEXT: vpmovwb %ymm0, %xmm0
|
; AVX512VLBW-NEXT: vpmovwb %ymm0, %xmm0
|
||||||
; AVX512VLBW-NEXT: vzeroupper
|
; AVX512VLBW-NEXT: vzeroupper
|
||||||
; AVX512VLBW-NEXT: retq
|
; AVX512VLBW-NEXT: retq
|
||||||
|
@ -731,7 +731,7 @@ define <2 x i64> @splatvar_funnnel_v2i64(<2 x i64> %x, <2 x i64> %amt) nounwind
|
||||||
; AVX-NEXT: vpsubq %xmm1, %xmm4, %xmm1
|
; AVX-NEXT: vpsubq %xmm1, %xmm4, %xmm1
|
||||||
; AVX-NEXT: vpand %xmm2, %xmm1, %xmm1
|
; AVX-NEXT: vpand %xmm2, %xmm1, %xmm1
|
||||||
; AVX-NEXT: vpsllq %xmm1, %xmm0, %xmm0
|
; AVX-NEXT: vpsllq %xmm1, %xmm0, %xmm0
|
||||||
; AVX-NEXT: vpor %xmm3, %xmm0, %xmm0
|
; AVX-NEXT: vpor %xmm0, %xmm3, %xmm0
|
||||||
; AVX-NEXT: retq
|
; AVX-NEXT: retq
|
||||||
;
|
;
|
||||||
; AVX512F-LABEL: splatvar_funnnel_v2i64:
|
; AVX512F-LABEL: splatvar_funnnel_v2i64:
|
||||||
|
@ -1142,7 +1142,7 @@ define <16 x i8> @splatvar_funnnel_v16i8(<16 x i8> %x, <16 x i8> %amt) nounwind
|
||||||
; AVX512F-NEXT: vpand %xmm2, %xmm1, %xmm1
|
; AVX512F-NEXT: vpand %xmm2, %xmm1, %xmm1
|
||||||
; AVX512F-NEXT: vpmovzxbq {{.*#+}} xmm1 = xmm1[0],zero,zero,zero,zero,zero,zero,zero,xmm1[1],zero,zero,zero,zero,zero,zero,zero
|
; AVX512F-NEXT: vpmovzxbq {{.*#+}} xmm1 = xmm1[0],zero,zero,zero,zero,zero,zero,zero,xmm1[1],zero,zero,zero,zero,zero,zero,zero
|
||||||
; AVX512F-NEXT: vpslld %xmm1, %zmm0, %zmm0
|
; AVX512F-NEXT: vpslld %xmm1, %zmm0, %zmm0
|
||||||
; AVX512F-NEXT: vpord %zmm3, %zmm0, %zmm0
|
; AVX512F-NEXT: vpord %zmm0, %zmm3, %zmm0
|
||||||
; AVX512F-NEXT: vpmovdb %zmm0, %xmm0
|
; AVX512F-NEXT: vpmovdb %zmm0, %xmm0
|
||||||
; AVX512F-NEXT: vzeroupper
|
; AVX512F-NEXT: vzeroupper
|
||||||
; AVX512F-NEXT: retq
|
; AVX512F-NEXT: retq
|
||||||
|
@ -1159,7 +1159,7 @@ define <16 x i8> @splatvar_funnnel_v16i8(<16 x i8> %x, <16 x i8> %amt) nounwind
|
||||||
; AVX512VL-NEXT: vpand %xmm2, %xmm1, %xmm1
|
; AVX512VL-NEXT: vpand %xmm2, %xmm1, %xmm1
|
||||||
; AVX512VL-NEXT: vpmovzxbq {{.*#+}} xmm1 = xmm1[0],zero,zero,zero,zero,zero,zero,zero,xmm1[1],zero,zero,zero,zero,zero,zero,zero
|
; AVX512VL-NEXT: vpmovzxbq {{.*#+}} xmm1 = xmm1[0],zero,zero,zero,zero,zero,zero,zero,xmm1[1],zero,zero,zero,zero,zero,zero,zero
|
||||||
; AVX512VL-NEXT: vpslld %xmm1, %zmm0, %zmm0
|
; AVX512VL-NEXT: vpslld %xmm1, %zmm0, %zmm0
|
||||||
; AVX512VL-NEXT: vpord %zmm3, %zmm0, %zmm0
|
; AVX512VL-NEXT: vpord %zmm0, %zmm3, %zmm0
|
||||||
; AVX512VL-NEXT: vpmovdb %zmm0, %xmm0
|
; AVX512VL-NEXT: vpmovdb %zmm0, %xmm0
|
||||||
; AVX512VL-NEXT: vzeroupper
|
; AVX512VL-NEXT: vzeroupper
|
||||||
; AVX512VL-NEXT: retq
|
; AVX512VL-NEXT: retq
|
||||||
|
@ -1176,7 +1176,7 @@ define <16 x i8> @splatvar_funnnel_v16i8(<16 x i8> %x, <16 x i8> %amt) nounwind
|
||||||
; AVX512BW-NEXT: vpand %xmm2, %xmm1, %xmm1
|
; AVX512BW-NEXT: vpand %xmm2, %xmm1, %xmm1
|
||||||
; AVX512BW-NEXT: vpmovzxbq {{.*#+}} xmm1 = xmm1[0],zero,zero,zero,zero,zero,zero,zero,xmm1[1],zero,zero,zero,zero,zero,zero,zero
|
; AVX512BW-NEXT: vpmovzxbq {{.*#+}} xmm1 = xmm1[0],zero,zero,zero,zero,zero,zero,zero,xmm1[1],zero,zero,zero,zero,zero,zero,zero
|
||||||
; AVX512BW-NEXT: vpsllw %xmm1, %ymm0, %ymm0
|
; AVX512BW-NEXT: vpsllw %xmm1, %ymm0, %ymm0
|
||||||
; AVX512BW-NEXT: vpor %ymm3, %ymm0, %ymm0
|
; AVX512BW-NEXT: vpor %ymm0, %ymm3, %ymm0
|
||||||
; AVX512BW-NEXT: vpmovwb %zmm0, %ymm0
|
; AVX512BW-NEXT: vpmovwb %zmm0, %ymm0
|
||||||
; AVX512BW-NEXT: # kill: def $xmm0 killed $xmm0 killed $ymm0
|
; AVX512BW-NEXT: # kill: def $xmm0 killed $xmm0 killed $ymm0
|
||||||
; AVX512BW-NEXT: vzeroupper
|
; AVX512BW-NEXT: vzeroupper
|
||||||
|
@ -1194,7 +1194,7 @@ define <16 x i8> @splatvar_funnnel_v16i8(<16 x i8> %x, <16 x i8> %amt) nounwind
|
||||||
; AVX512VLBW-NEXT: vpand %xmm2, %xmm1, %xmm1
|
; AVX512VLBW-NEXT: vpand %xmm2, %xmm1, %xmm1
|
||||||
; AVX512VLBW-NEXT: vpmovzxbq {{.*#+}} xmm1 = xmm1[0],zero,zero,zero,zero,zero,zero,zero,xmm1[1],zero,zero,zero,zero,zero,zero,zero
|
; AVX512VLBW-NEXT: vpmovzxbq {{.*#+}} xmm1 = xmm1[0],zero,zero,zero,zero,zero,zero,zero,xmm1[1],zero,zero,zero,zero,zero,zero,zero
|
||||||
; AVX512VLBW-NEXT: vpsllw %xmm1, %ymm0, %ymm0
|
; AVX512VLBW-NEXT: vpsllw %xmm1, %ymm0, %ymm0
|
||||||
; AVX512VLBW-NEXT: vpor %ymm3, %ymm0, %ymm0
|
; AVX512VLBW-NEXT: vpor %ymm0, %ymm3, %ymm0
|
||||||
; AVX512VLBW-NEXT: vpmovwb %ymm0, %xmm0
|
; AVX512VLBW-NEXT: vpmovwb %ymm0, %xmm0
|
||||||
; AVX512VLBW-NEXT: vzeroupper
|
; AVX512VLBW-NEXT: vzeroupper
|
||||||
; AVX512VLBW-NEXT: retq
|
; AVX512VLBW-NEXT: retq
|
||||||
|
@ -1258,13 +1258,13 @@ define <2 x i64> @constant_funnnel_v2i64(<2 x i64> %x) nounwind {
|
||||||
; SSE2-LABEL: constant_funnnel_v2i64:
|
; SSE2-LABEL: constant_funnnel_v2i64:
|
||||||
; SSE2: # %bb.0:
|
; SSE2: # %bb.0:
|
||||||
; SSE2-NEXT: movdqa %xmm0, %xmm1
|
; SSE2-NEXT: movdqa %xmm0, %xmm1
|
||||||
; SSE2-NEXT: psrlq $4, %xmm1
|
; SSE2-NEXT: psllq $60, %xmm1
|
||||||
; SSE2-NEXT: movdqa %xmm0, %xmm2
|
; SSE2-NEXT: movdqa %xmm0, %xmm2
|
||||||
; SSE2-NEXT: psrlq $14, %xmm2
|
; SSE2-NEXT: psllq $50, %xmm2
|
||||||
; SSE2-NEXT: movsd {{.*#+}} xmm2 = xmm1[0],xmm2[1]
|
; SSE2-NEXT: movsd {{.*#+}} xmm2 = xmm1[0],xmm2[1]
|
||||||
; SSE2-NEXT: movdqa %xmm0, %xmm1
|
; SSE2-NEXT: movdqa %xmm0, %xmm1
|
||||||
; SSE2-NEXT: psllq $60, %xmm1
|
; SSE2-NEXT: psrlq $4, %xmm1
|
||||||
; SSE2-NEXT: psllq $50, %xmm0
|
; SSE2-NEXT: psrlq $14, %xmm0
|
||||||
; SSE2-NEXT: movsd {{.*#+}} xmm0 = xmm1[0],xmm0[1]
|
; SSE2-NEXT: movsd {{.*#+}} xmm0 = xmm1[0],xmm0[1]
|
||||||
; SSE2-NEXT: orpd %xmm2, %xmm0
|
; SSE2-NEXT: orpd %xmm2, %xmm0
|
||||||
; SSE2-NEXT: retq
|
; SSE2-NEXT: retq
|
||||||
|
@ -1272,32 +1272,32 @@ define <2 x i64> @constant_funnnel_v2i64(<2 x i64> %x) nounwind {
|
||||||
; SSE41-LABEL: constant_funnnel_v2i64:
|
; SSE41-LABEL: constant_funnnel_v2i64:
|
||||||
; SSE41: # %bb.0:
|
; SSE41: # %bb.0:
|
||||||
; SSE41-NEXT: movdqa %xmm0, %xmm1
|
; SSE41-NEXT: movdqa %xmm0, %xmm1
|
||||||
; SSE41-NEXT: psrlq $14, %xmm1
|
; SSE41-NEXT: psllq $50, %xmm1
|
||||||
; SSE41-NEXT: movdqa %xmm0, %xmm2
|
; SSE41-NEXT: movdqa %xmm0, %xmm2
|
||||||
; SSE41-NEXT: psrlq $4, %xmm2
|
; SSE41-NEXT: psllq $60, %xmm2
|
||||||
; SSE41-NEXT: pblendw {{.*#+}} xmm2 = xmm2[0,1,2,3],xmm1[4,5,6,7]
|
; SSE41-NEXT: pblendw {{.*#+}} xmm2 = xmm2[0,1,2,3],xmm1[4,5,6,7]
|
||||||
; SSE41-NEXT: movdqa %xmm0, %xmm1
|
; SSE41-NEXT: movdqa %xmm0, %xmm1
|
||||||
; SSE41-NEXT: psllq $50, %xmm1
|
; SSE41-NEXT: psrlq $14, %xmm1
|
||||||
; SSE41-NEXT: psllq $60, %xmm0
|
; SSE41-NEXT: psrlq $4, %xmm0
|
||||||
; SSE41-NEXT: pblendw {{.*#+}} xmm0 = xmm0[0,1,2,3],xmm1[4,5,6,7]
|
; SSE41-NEXT: pblendw {{.*#+}} xmm0 = xmm0[0,1,2,3],xmm1[4,5,6,7]
|
||||||
; SSE41-NEXT: por %xmm2, %xmm0
|
; SSE41-NEXT: por %xmm2, %xmm0
|
||||||
; SSE41-NEXT: retq
|
; SSE41-NEXT: retq
|
||||||
;
|
;
|
||||||
; AVX1-LABEL: constant_funnnel_v2i64:
|
; AVX1-LABEL: constant_funnnel_v2i64:
|
||||||
; AVX1: # %bb.0:
|
; AVX1: # %bb.0:
|
||||||
; AVX1-NEXT: vpsrlq $14, %xmm0, %xmm1
|
; AVX1-NEXT: vpsllq $50, %xmm0, %xmm1
|
||||||
; AVX1-NEXT: vpsrlq $4, %xmm0, %xmm2
|
; AVX1-NEXT: vpsllq $60, %xmm0, %xmm2
|
||||||
; AVX1-NEXT: vpblendw {{.*#+}} xmm1 = xmm2[0,1,2,3],xmm1[4,5,6,7]
|
; AVX1-NEXT: vpblendw {{.*#+}} xmm1 = xmm2[0,1,2,3],xmm1[4,5,6,7]
|
||||||
; AVX1-NEXT: vpsllq $50, %xmm0, %xmm2
|
; AVX1-NEXT: vpsrlq $14, %xmm0, %xmm2
|
||||||
; AVX1-NEXT: vpsllq $60, %xmm0, %xmm0
|
; AVX1-NEXT: vpsrlq $4, %xmm0, %xmm0
|
||||||
; AVX1-NEXT: vpblendw {{.*#+}} xmm0 = xmm0[0,1,2,3],xmm2[4,5,6,7]
|
; AVX1-NEXT: vpblendw {{.*#+}} xmm0 = xmm0[0,1,2,3],xmm2[4,5,6,7]
|
||||||
; AVX1-NEXT: vpor %xmm1, %xmm0, %xmm0
|
; AVX1-NEXT: vpor %xmm1, %xmm0, %xmm0
|
||||||
; AVX1-NEXT: retq
|
; AVX1-NEXT: retq
|
||||||
;
|
;
|
||||||
; AVX2-LABEL: constant_funnnel_v2i64:
|
; AVX2-LABEL: constant_funnnel_v2i64:
|
||||||
; AVX2: # %bb.0:
|
; AVX2: # %bb.0:
|
||||||
; AVX2-NEXT: vpsrlvq {{.*}}(%rip), %xmm0, %xmm1
|
; AVX2-NEXT: vpsllvq {{.*}}(%rip), %xmm0, %xmm1
|
||||||
; AVX2-NEXT: vpsllvq {{.*}}(%rip), %xmm0, %xmm0
|
; AVX2-NEXT: vpsrlvq {{.*}}(%rip), %xmm0, %xmm0
|
||||||
; AVX2-NEXT: vpor %xmm1, %xmm0, %xmm0
|
; AVX2-NEXT: vpor %xmm1, %xmm0, %xmm0
|
||||||
; AVX2-NEXT: retq
|
; AVX2-NEXT: retq
|
||||||
;
|
;
|
||||||
|
@ -1336,16 +1336,24 @@ define <2 x i64> @constant_funnnel_v2i64(<2 x i64> %x) nounwind {
|
||||||
;
|
;
|
||||||
; X32-SSE-LABEL: constant_funnnel_v2i64:
|
; X32-SSE-LABEL: constant_funnnel_v2i64:
|
||||||
; X32-SSE: # %bb.0:
|
; X32-SSE: # %bb.0:
|
||||||
|
; X32-SSE-NEXT: movdqa {{.*#+}} xmm1 = [63,0,63,0]
|
||||||
|
; X32-SSE-NEXT: movdqa {{.*#+}} xmm2 = <4,u,14,u>
|
||||||
|
; X32-SSE-NEXT: pxor %xmm3, %xmm3
|
||||||
|
; X32-SSE-NEXT: psubq %xmm2, %xmm3
|
||||||
|
; X32-SSE-NEXT: pand %xmm1, %xmm2
|
||||||
|
; X32-SSE-NEXT: movdqa %xmm0, %xmm4
|
||||||
|
; X32-SSE-NEXT: psrlq %xmm2, %xmm4
|
||||||
|
; X32-SSE-NEXT: pshufd {{.*#+}} xmm2 = xmm2[2,3,2,3]
|
||||||
|
; X32-SSE-NEXT: movdqa %xmm0, %xmm5
|
||||||
|
; X32-SSE-NEXT: psrlq %xmm2, %xmm5
|
||||||
|
; X32-SSE-NEXT: movsd {{.*#+}} xmm5 = xmm4[0],xmm5[1]
|
||||||
|
; X32-SSE-NEXT: pand %xmm1, %xmm3
|
||||||
; X32-SSE-NEXT: movdqa %xmm0, %xmm1
|
; X32-SSE-NEXT: movdqa %xmm0, %xmm1
|
||||||
; X32-SSE-NEXT: psrlq $4, %xmm1
|
; X32-SSE-NEXT: psllq %xmm3, %xmm1
|
||||||
; X32-SSE-NEXT: movdqa %xmm0, %xmm2
|
; X32-SSE-NEXT: pshufd {{.*#+}} xmm2 = xmm3[2,3,2,3]
|
||||||
; X32-SSE-NEXT: psrlq $14, %xmm2
|
; X32-SSE-NEXT: psllq %xmm2, %xmm0
|
||||||
; X32-SSE-NEXT: movsd {{.*#+}} xmm2 = xmm1[0],xmm2[1]
|
|
||||||
; X32-SSE-NEXT: movdqa %xmm0, %xmm1
|
|
||||||
; X32-SSE-NEXT: psllq $60, %xmm1
|
|
||||||
; X32-SSE-NEXT: psllq $50, %xmm0
|
|
||||||
; X32-SSE-NEXT: movsd {{.*#+}} xmm0 = xmm1[0],xmm0[1]
|
; X32-SSE-NEXT: movsd {{.*#+}} xmm0 = xmm1[0],xmm0[1]
|
||||||
; X32-SSE-NEXT: orpd %xmm2, %xmm0
|
; X32-SSE-NEXT: orpd %xmm5, %xmm0
|
||||||
; X32-SSE-NEXT: retl
|
; X32-SSE-NEXT: retl
|
||||||
%res = call <2 x i64> @llvm.fshr.v2i64(<2 x i64> %x, <2 x i64> %x, <2 x i64> <i64 4, i64 14>)
|
%res = call <2 x i64> @llvm.fshr.v2i64(<2 x i64> %x, <2 x i64> %x, <2 x i64> <i64 4, i64 14>)
|
||||||
ret <2 x i64> %res
|
ret <2 x i64> %res
|
||||||
|
@ -1610,8 +1618,8 @@ define <16 x i8> @constant_funnnel_v16i8(<16 x i8> %x) nounwind {
|
||||||
; AVX512F-LABEL: constant_funnnel_v16i8:
|
; AVX512F-LABEL: constant_funnnel_v16i8:
|
||||||
; AVX512F: # %bb.0:
|
; AVX512F: # %bb.0:
|
||||||
; AVX512F-NEXT: vpmovzxbd {{.*#+}} zmm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero,xmm0[4],zero,zero,zero,xmm0[5],zero,zero,zero,xmm0[6],zero,zero,zero,xmm0[7],zero,zero,zero,xmm0[8],zero,zero,zero,xmm0[9],zero,zero,zero,xmm0[10],zero,zero,zero,xmm0[11],zero,zero,zero,xmm0[12],zero,zero,zero,xmm0[13],zero,zero,zero,xmm0[14],zero,zero,zero,xmm0[15],zero,zero,zero
|
; AVX512F-NEXT: vpmovzxbd {{.*#+}} zmm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero,xmm0[4],zero,zero,zero,xmm0[5],zero,zero,zero,xmm0[6],zero,zero,zero,xmm0[7],zero,zero,zero,xmm0[8],zero,zero,zero,xmm0[9],zero,zero,zero,xmm0[10],zero,zero,zero,xmm0[11],zero,zero,zero,xmm0[12],zero,zero,zero,xmm0[13],zero,zero,zero,xmm0[14],zero,zero,zero,xmm0[15],zero,zero,zero
|
||||||
; AVX512F-NEXT: vpsrlvd {{.*}}(%rip), %zmm0, %zmm1
|
; AVX512F-NEXT: vpsllvd {{.*}}(%rip), %zmm0, %zmm1
|
||||||
; AVX512F-NEXT: vpsllvd {{.*}}(%rip), %zmm0, %zmm0
|
; AVX512F-NEXT: vpsrlvd {{.*}}(%rip), %zmm0, %zmm0
|
||||||
; AVX512F-NEXT: vpord %zmm1, %zmm0, %zmm0
|
; AVX512F-NEXT: vpord %zmm1, %zmm0, %zmm0
|
||||||
; AVX512F-NEXT: vpmovdb %zmm0, %xmm0
|
; AVX512F-NEXT: vpmovdb %zmm0, %xmm0
|
||||||
; AVX512F-NEXT: vzeroupper
|
; AVX512F-NEXT: vzeroupper
|
||||||
|
@ -1620,8 +1628,8 @@ define <16 x i8> @constant_funnnel_v16i8(<16 x i8> %x) nounwind {
|
||||||
; AVX512VL-LABEL: constant_funnnel_v16i8:
|
; AVX512VL-LABEL: constant_funnnel_v16i8:
|
||||||
; AVX512VL: # %bb.0:
|
; AVX512VL: # %bb.0:
|
||||||
; AVX512VL-NEXT: vpmovzxbd {{.*#+}} zmm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero,xmm0[4],zero,zero,zero,xmm0[5],zero,zero,zero,xmm0[6],zero,zero,zero,xmm0[7],zero,zero,zero,xmm0[8],zero,zero,zero,xmm0[9],zero,zero,zero,xmm0[10],zero,zero,zero,xmm0[11],zero,zero,zero,xmm0[12],zero,zero,zero,xmm0[13],zero,zero,zero,xmm0[14],zero,zero,zero,xmm0[15],zero,zero,zero
|
; AVX512VL-NEXT: vpmovzxbd {{.*#+}} zmm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero,xmm0[4],zero,zero,zero,xmm0[5],zero,zero,zero,xmm0[6],zero,zero,zero,xmm0[7],zero,zero,zero,xmm0[8],zero,zero,zero,xmm0[9],zero,zero,zero,xmm0[10],zero,zero,zero,xmm0[11],zero,zero,zero,xmm0[12],zero,zero,zero,xmm0[13],zero,zero,zero,xmm0[14],zero,zero,zero,xmm0[15],zero,zero,zero
|
||||||
; AVX512VL-NEXT: vpsrlvd {{.*}}(%rip), %zmm0, %zmm1
|
; AVX512VL-NEXT: vpsllvd {{.*}}(%rip), %zmm0, %zmm1
|
||||||
; AVX512VL-NEXT: vpsllvd {{.*}}(%rip), %zmm0, %zmm0
|
; AVX512VL-NEXT: vpsrlvd {{.*}}(%rip), %zmm0, %zmm0
|
||||||
; AVX512VL-NEXT: vpord %zmm1, %zmm0, %zmm0
|
; AVX512VL-NEXT: vpord %zmm1, %zmm0, %zmm0
|
||||||
; AVX512VL-NEXT: vpmovdb %zmm0, %xmm0
|
; AVX512VL-NEXT: vpmovdb %zmm0, %xmm0
|
||||||
; AVX512VL-NEXT: vzeroupper
|
; AVX512VL-NEXT: vzeroupper
|
||||||
|
@ -1629,11 +1637,11 @@ define <16 x i8> @constant_funnnel_v16i8(<16 x i8> %x) nounwind {
|
||||||
;
|
;
|
||||||
; AVX512BW-LABEL: constant_funnnel_v16i8:
|
; AVX512BW-LABEL: constant_funnnel_v16i8:
|
||||||
; AVX512BW: # %bb.0:
|
; AVX512BW: # %bb.0:
|
||||||
; AVX512BW-NEXT: vmovdqa {{.*#+}} ymm1 = [0,1,2,3,4,5,6,7,0,7,6,5,4,3,2,1]
|
; AVX512BW-NEXT: vmovdqa {{.*#+}} ymm1 = [0,7,6,5,4,3,2,1,0,1,2,3,4,5,6,7]
|
||||||
; AVX512BW-NEXT: vpmovzxbw {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero,xmm0[8],zero,xmm0[9],zero,xmm0[10],zero,xmm0[11],zero,xmm0[12],zero,xmm0[13],zero,xmm0[14],zero,xmm0[15],zero
|
; AVX512BW-NEXT: vpmovzxbw {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero,xmm0[8],zero,xmm0[9],zero,xmm0[10],zero,xmm0[11],zero,xmm0[12],zero,xmm0[13],zero,xmm0[14],zero,xmm0[15],zero
|
||||||
; AVX512BW-NEXT: vpsrlvw %zmm1, %zmm0, %zmm1
|
; AVX512BW-NEXT: vpsllvw %zmm1, %zmm0, %zmm1
|
||||||
; AVX512BW-NEXT: vmovdqa {{.*#+}} ymm2 = [0,7,6,5,4,3,2,1,0,1,2,3,4,5,6,7]
|
; AVX512BW-NEXT: vmovdqa {{.*#+}} ymm2 = [0,1,2,3,4,5,6,7,0,7,6,5,4,3,2,1]
|
||||||
; AVX512BW-NEXT: vpsllvw %zmm2, %zmm0, %zmm0
|
; AVX512BW-NEXT: vpsrlvw %zmm2, %zmm0, %zmm0
|
||||||
; AVX512BW-NEXT: vpor %ymm1, %ymm0, %ymm0
|
; AVX512BW-NEXT: vpor %ymm1, %ymm0, %ymm0
|
||||||
; AVX512BW-NEXT: vpmovwb %zmm0, %ymm0
|
; AVX512BW-NEXT: vpmovwb %zmm0, %ymm0
|
||||||
; AVX512BW-NEXT: # kill: def $xmm0 killed $xmm0 killed $ymm0
|
; AVX512BW-NEXT: # kill: def $xmm0 killed $xmm0 killed $ymm0
|
||||||
|
@ -1643,8 +1651,8 @@ define <16 x i8> @constant_funnnel_v16i8(<16 x i8> %x) nounwind {
|
||||||
; AVX512VLBW-LABEL: constant_funnnel_v16i8:
|
; AVX512VLBW-LABEL: constant_funnnel_v16i8:
|
||||||
; AVX512VLBW: # %bb.0:
|
; AVX512VLBW: # %bb.0:
|
||||||
; AVX512VLBW-NEXT: vpmovzxbw {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero,xmm0[8],zero,xmm0[9],zero,xmm0[10],zero,xmm0[11],zero,xmm0[12],zero,xmm0[13],zero,xmm0[14],zero,xmm0[15],zero
|
; AVX512VLBW-NEXT: vpmovzxbw {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero,xmm0[8],zero,xmm0[9],zero,xmm0[10],zero,xmm0[11],zero,xmm0[12],zero,xmm0[13],zero,xmm0[14],zero,xmm0[15],zero
|
||||||
; AVX512VLBW-NEXT: vpsrlvw {{.*}}(%rip), %ymm0, %ymm1
|
; AVX512VLBW-NEXT: vpsllvw {{.*}}(%rip), %ymm0, %ymm1
|
||||||
; AVX512VLBW-NEXT: vpsllvw {{.*}}(%rip), %ymm0, %ymm0
|
; AVX512VLBW-NEXT: vpsrlvw {{.*}}(%rip), %ymm0, %ymm0
|
||||||
; AVX512VLBW-NEXT: vpor %ymm1, %ymm0, %ymm0
|
; AVX512VLBW-NEXT: vpor %ymm1, %ymm0, %ymm0
|
||||||
; AVX512VLBW-NEXT: vpmovwb %ymm0, %xmm0
|
; AVX512VLBW-NEXT: vpmovwb %ymm0, %xmm0
|
||||||
; AVX512VLBW-NEXT: vzeroupper
|
; AVX512VLBW-NEXT: vzeroupper
|
||||||
|
@ -1690,15 +1698,15 @@ define <2 x i64> @splatconstant_funnnel_v2i64(<2 x i64> %x) nounwind {
|
||||||
; SSE-LABEL: splatconstant_funnnel_v2i64:
|
; SSE-LABEL: splatconstant_funnnel_v2i64:
|
||||||
; SSE: # %bb.0:
|
; SSE: # %bb.0:
|
||||||
; SSE-NEXT: movdqa %xmm0, %xmm1
|
; SSE-NEXT: movdqa %xmm0, %xmm1
|
||||||
; SSE-NEXT: psrlq $14, %xmm1
|
; SSE-NEXT: psllq $50, %xmm1
|
||||||
; SSE-NEXT: psllq $50, %xmm0
|
; SSE-NEXT: psrlq $14, %xmm0
|
||||||
; SSE-NEXT: por %xmm1, %xmm0
|
; SSE-NEXT: por %xmm1, %xmm0
|
||||||
; SSE-NEXT: retq
|
; SSE-NEXT: retq
|
||||||
;
|
;
|
||||||
; AVX-LABEL: splatconstant_funnnel_v2i64:
|
; AVX-LABEL: splatconstant_funnnel_v2i64:
|
||||||
; AVX: # %bb.0:
|
; AVX: # %bb.0:
|
||||||
; AVX-NEXT: vpsrlq $14, %xmm0, %xmm1
|
; AVX-NEXT: vpsllq $50, %xmm0, %xmm1
|
||||||
; AVX-NEXT: vpsllq $50, %xmm0, %xmm0
|
; AVX-NEXT: vpsrlq $14, %xmm0, %xmm0
|
||||||
; AVX-NEXT: vpor %xmm1, %xmm0, %xmm0
|
; AVX-NEXT: vpor %xmm1, %xmm0, %xmm0
|
||||||
; AVX-NEXT: retq
|
; AVX-NEXT: retq
|
||||||
;
|
;
|
||||||
|
@ -1736,9 +1744,11 @@ define <2 x i64> @splatconstant_funnnel_v2i64(<2 x i64> %x) nounwind {
|
||||||
; X32-SSE-LABEL: splatconstant_funnnel_v2i64:
|
; X32-SSE-LABEL: splatconstant_funnnel_v2i64:
|
||||||
; X32-SSE: # %bb.0:
|
; X32-SSE: # %bb.0:
|
||||||
; X32-SSE-NEXT: movdqa %xmm0, %xmm1
|
; X32-SSE-NEXT: movdqa %xmm0, %xmm1
|
||||||
; X32-SSE-NEXT: psrlq $14, %xmm1
|
; X32-SSE-NEXT: psllq $50, %xmm1
|
||||||
; X32-SSE-NEXT: psllq $50, %xmm0
|
; X32-SSE-NEXT: movsd {{.*#+}} xmm1 = xmm1[0,1]
|
||||||
; X32-SSE-NEXT: por %xmm1, %xmm0
|
; X32-SSE-NEXT: psrlq $14, %xmm0
|
||||||
|
; X32-SSE-NEXT: movsd {{.*#+}} xmm0 = xmm0[0,1]
|
||||||
|
; X32-SSE-NEXT: orpd %xmm1, %xmm0
|
||||||
; X32-SSE-NEXT: retl
|
; X32-SSE-NEXT: retl
|
||||||
%res = call <2 x i64> @llvm.fshr.v2i64(<2 x i64> %x, <2 x i64> %x, <2 x i64> <i64 14, i64 14>)
|
%res = call <2 x i64> @llvm.fshr.v2i64(<2 x i64> %x, <2 x i64> %x, <2 x i64> <i64 14, i64 14>)
|
||||||
ret <2 x i64> %res
|
ret <2 x i64> %res
|
||||||
|
@ -1863,33 +1873,33 @@ define <16 x i8> @splatconstant_funnnel_v16i8(<16 x i8> %x) nounwind {
|
||||||
;
|
;
|
||||||
; AVX512F-LABEL: splatconstant_funnnel_v16i8:
|
; AVX512F-LABEL: splatconstant_funnnel_v16i8:
|
||||||
; AVX512F: # %bb.0:
|
; AVX512F: # %bb.0:
|
||||||
; AVX512F-NEXT: vpsrlw $4, %xmm0, %xmm1
|
; AVX512F-NEXT: vpsllw $4, %xmm0, %xmm1
|
||||||
; AVX512F-NEXT: vpand {{.*}}(%rip), %xmm1, %xmm1
|
; AVX512F-NEXT: vpand {{.*}}(%rip), %xmm1, %xmm1
|
||||||
; AVX512F-NEXT: vpsllw $4, %xmm0, %xmm0
|
; AVX512F-NEXT: vpsrlw $4, %xmm0, %xmm0
|
||||||
; AVX512F-NEXT: vpand {{.*}}(%rip), %xmm0, %xmm0
|
; AVX512F-NEXT: vpand {{.*}}(%rip), %xmm0, %xmm0
|
||||||
; AVX512F-NEXT: vpor %xmm1, %xmm0, %xmm0
|
; AVX512F-NEXT: vpor %xmm1, %xmm0, %xmm0
|
||||||
; AVX512F-NEXT: retq
|
; AVX512F-NEXT: retq
|
||||||
;
|
;
|
||||||
; AVX512VL-LABEL: splatconstant_funnnel_v16i8:
|
; AVX512VL-LABEL: splatconstant_funnnel_v16i8:
|
||||||
; AVX512VL: # %bb.0:
|
; AVX512VL: # %bb.0:
|
||||||
; AVX512VL-NEXT: vpsllw $4, %xmm0, %xmm1
|
; AVX512VL-NEXT: vpsrlw $4, %xmm0, %xmm1
|
||||||
; AVX512VL-NEXT: vpsrlw $4, %xmm0, %xmm0
|
; AVX512VL-NEXT: vpsllw $4, %xmm0, %xmm0
|
||||||
; AVX512VL-NEXT: vpternlogq $216, {{.*}}(%rip), %xmm1, %xmm0
|
; AVX512VL-NEXT: vpternlogq $216, {{.*}}(%rip), %xmm1, %xmm0
|
||||||
; AVX512VL-NEXT: retq
|
; AVX512VL-NEXT: retq
|
||||||
;
|
;
|
||||||
; AVX512BW-LABEL: splatconstant_funnnel_v16i8:
|
; AVX512BW-LABEL: splatconstant_funnnel_v16i8:
|
||||||
; AVX512BW: # %bb.0:
|
; AVX512BW: # %bb.0:
|
||||||
; AVX512BW-NEXT: vpsrlw $4, %xmm0, %xmm1
|
; AVX512BW-NEXT: vpsllw $4, %xmm0, %xmm1
|
||||||
; AVX512BW-NEXT: vpand {{.*}}(%rip), %xmm1, %xmm1
|
; AVX512BW-NEXT: vpand {{.*}}(%rip), %xmm1, %xmm1
|
||||||
; AVX512BW-NEXT: vpsllw $4, %xmm0, %xmm0
|
; AVX512BW-NEXT: vpsrlw $4, %xmm0, %xmm0
|
||||||
; AVX512BW-NEXT: vpand {{.*}}(%rip), %xmm0, %xmm0
|
; AVX512BW-NEXT: vpand {{.*}}(%rip), %xmm0, %xmm0
|
||||||
; AVX512BW-NEXT: vpor %xmm1, %xmm0, %xmm0
|
; AVX512BW-NEXT: vpor %xmm1, %xmm0, %xmm0
|
||||||
; AVX512BW-NEXT: retq
|
; AVX512BW-NEXT: retq
|
||||||
;
|
;
|
||||||
; AVX512VLBW-LABEL: splatconstant_funnnel_v16i8:
|
; AVX512VLBW-LABEL: splatconstant_funnnel_v16i8:
|
||||||
; AVX512VLBW: # %bb.0:
|
; AVX512VLBW: # %bb.0:
|
||||||
; AVX512VLBW-NEXT: vpsllw $4, %xmm0, %xmm1
|
; AVX512VLBW-NEXT: vpsrlw $4, %xmm0, %xmm1
|
||||||
; AVX512VLBW-NEXT: vpsrlw $4, %xmm0, %xmm0
|
; AVX512VLBW-NEXT: vpsllw $4, %xmm0, %xmm0
|
||||||
; AVX512VLBW-NEXT: vpternlogq $216, {{.*}}(%rip), %xmm1, %xmm0
|
; AVX512VLBW-NEXT: vpternlogq $216, {{.*}}(%rip), %xmm1, %xmm0
|
||||||
; AVX512VLBW-NEXT: retq
|
; AVX512VLBW-NEXT: retq
|
||||||
;
|
;
|
||||||
|
|
|
@ -48,7 +48,7 @@ define <4 x i64> @var_funnnel_v4i64(<4 x i64> %x, <4 x i64> %amt) nounwind {
|
||||||
; AVX1-NEXT: vpsllq %xmm1, %xmm0, %xmm0
|
; AVX1-NEXT: vpsllq %xmm1, %xmm0, %xmm0
|
||||||
; AVX1-NEXT: vpblendw {{.*#+}} xmm0 = xmm4[0,1,2,3],xmm0[4,5,6,7]
|
; AVX1-NEXT: vpblendw {{.*#+}} xmm0 = xmm4[0,1,2,3],xmm0[4,5,6,7]
|
||||||
; AVX1-NEXT: vinsertf128 $1, %xmm2, %ymm0, %ymm0
|
; AVX1-NEXT: vinsertf128 $1, %xmm2, %ymm0, %ymm0
|
||||||
; AVX1-NEXT: vorps %ymm3, %ymm0, %ymm0
|
; AVX1-NEXT: vorps %ymm0, %ymm3, %ymm0
|
||||||
; AVX1-NEXT: retq
|
; AVX1-NEXT: retq
|
||||||
;
|
;
|
||||||
; AVX2-LABEL: var_funnnel_v4i64:
|
; AVX2-LABEL: var_funnnel_v4i64:
|
||||||
|
@ -60,7 +60,7 @@ define <4 x i64> @var_funnnel_v4i64(<4 x i64> %x, <4 x i64> %amt) nounwind {
|
||||||
; AVX2-NEXT: vpsubq %ymm1, %ymm4, %ymm1
|
; AVX2-NEXT: vpsubq %ymm1, %ymm4, %ymm1
|
||||||
; AVX2-NEXT: vpand %ymm2, %ymm1, %ymm1
|
; AVX2-NEXT: vpand %ymm2, %ymm1, %ymm1
|
||||||
; AVX2-NEXT: vpsllvq %ymm1, %ymm0, %ymm0
|
; AVX2-NEXT: vpsllvq %ymm1, %ymm0, %ymm0
|
||||||
; AVX2-NEXT: vpor %ymm3, %ymm0, %ymm0
|
; AVX2-NEXT: vpor %ymm0, %ymm3, %ymm0
|
||||||
; AVX2-NEXT: retq
|
; AVX2-NEXT: retq
|
||||||
;
|
;
|
||||||
; AVX512F-LABEL: var_funnnel_v4i64:
|
; AVX512F-LABEL: var_funnnel_v4i64:
|
||||||
|
@ -506,7 +506,7 @@ define <32 x i8> @var_funnnel_v32i8(<32 x i8> %x, <32 x i8> %amt) nounwind {
|
||||||
; AVX512BW-NEXT: vpand %ymm2, %ymm1, %ymm1
|
; AVX512BW-NEXT: vpand %ymm2, %ymm1, %ymm1
|
||||||
; AVX512BW-NEXT: vpmovzxbw {{.*#+}} zmm1 = ymm1[0],zero,ymm1[1],zero,ymm1[2],zero,ymm1[3],zero,ymm1[4],zero,ymm1[5],zero,ymm1[6],zero,ymm1[7],zero,ymm1[8],zero,ymm1[9],zero,ymm1[10],zero,ymm1[11],zero,ymm1[12],zero,ymm1[13],zero,ymm1[14],zero,ymm1[15],zero,ymm1[16],zero,ymm1[17],zero,ymm1[18],zero,ymm1[19],zero,ymm1[20],zero,ymm1[21],zero,ymm1[22],zero,ymm1[23],zero,ymm1[24],zero,ymm1[25],zero,ymm1[26],zero,ymm1[27],zero,ymm1[28],zero,ymm1[29],zero,ymm1[30],zero,ymm1[31],zero
|
; AVX512BW-NEXT: vpmovzxbw {{.*#+}} zmm1 = ymm1[0],zero,ymm1[1],zero,ymm1[2],zero,ymm1[3],zero,ymm1[4],zero,ymm1[5],zero,ymm1[6],zero,ymm1[7],zero,ymm1[8],zero,ymm1[9],zero,ymm1[10],zero,ymm1[11],zero,ymm1[12],zero,ymm1[13],zero,ymm1[14],zero,ymm1[15],zero,ymm1[16],zero,ymm1[17],zero,ymm1[18],zero,ymm1[19],zero,ymm1[20],zero,ymm1[21],zero,ymm1[22],zero,ymm1[23],zero,ymm1[24],zero,ymm1[25],zero,ymm1[26],zero,ymm1[27],zero,ymm1[28],zero,ymm1[29],zero,ymm1[30],zero,ymm1[31],zero
|
||||||
; AVX512BW-NEXT: vpsllvw %zmm1, %zmm0, %zmm0
|
; AVX512BW-NEXT: vpsllvw %zmm1, %zmm0, %zmm0
|
||||||
; AVX512BW-NEXT: vporq %zmm3, %zmm0, %zmm0
|
; AVX512BW-NEXT: vporq %zmm0, %zmm3, %zmm0
|
||||||
; AVX512BW-NEXT: vpmovwb %zmm0, %ymm0
|
; AVX512BW-NEXT: vpmovwb %zmm0, %ymm0
|
||||||
; AVX512BW-NEXT: retq
|
; AVX512BW-NEXT: retq
|
||||||
;
|
;
|
||||||
|
@ -522,7 +522,7 @@ define <32 x i8> @var_funnnel_v32i8(<32 x i8> %x, <32 x i8> %amt) nounwind {
|
||||||
; AVX512VLBW-NEXT: vpand %ymm2, %ymm1, %ymm1
|
; AVX512VLBW-NEXT: vpand %ymm2, %ymm1, %ymm1
|
||||||
; AVX512VLBW-NEXT: vpmovzxbw {{.*#+}} zmm1 = ymm1[0],zero,ymm1[1],zero,ymm1[2],zero,ymm1[3],zero,ymm1[4],zero,ymm1[5],zero,ymm1[6],zero,ymm1[7],zero,ymm1[8],zero,ymm1[9],zero,ymm1[10],zero,ymm1[11],zero,ymm1[12],zero,ymm1[13],zero,ymm1[14],zero,ymm1[15],zero,ymm1[16],zero,ymm1[17],zero,ymm1[18],zero,ymm1[19],zero,ymm1[20],zero,ymm1[21],zero,ymm1[22],zero,ymm1[23],zero,ymm1[24],zero,ymm1[25],zero,ymm1[26],zero,ymm1[27],zero,ymm1[28],zero,ymm1[29],zero,ymm1[30],zero,ymm1[31],zero
|
; AVX512VLBW-NEXT: vpmovzxbw {{.*#+}} zmm1 = ymm1[0],zero,ymm1[1],zero,ymm1[2],zero,ymm1[3],zero,ymm1[4],zero,ymm1[5],zero,ymm1[6],zero,ymm1[7],zero,ymm1[8],zero,ymm1[9],zero,ymm1[10],zero,ymm1[11],zero,ymm1[12],zero,ymm1[13],zero,ymm1[14],zero,ymm1[15],zero,ymm1[16],zero,ymm1[17],zero,ymm1[18],zero,ymm1[19],zero,ymm1[20],zero,ymm1[21],zero,ymm1[22],zero,ymm1[23],zero,ymm1[24],zero,ymm1[25],zero,ymm1[26],zero,ymm1[27],zero,ymm1[28],zero,ymm1[29],zero,ymm1[30],zero,ymm1[31],zero
|
||||||
; AVX512VLBW-NEXT: vpsllvw %zmm1, %zmm0, %zmm0
|
; AVX512VLBW-NEXT: vpsllvw %zmm1, %zmm0, %zmm0
|
||||||
; AVX512VLBW-NEXT: vporq %zmm3, %zmm0, %zmm0
|
; AVX512VLBW-NEXT: vporq %zmm0, %zmm3, %zmm0
|
||||||
; AVX512VLBW-NEXT: vpmovwb %zmm0, %ymm0
|
; AVX512VLBW-NEXT: vpmovwb %zmm0, %ymm0
|
||||||
; AVX512VLBW-NEXT: retq
|
; AVX512VLBW-NEXT: retq
|
||||||
;
|
;
|
||||||
|
@ -577,7 +577,7 @@ define <4 x i64> @splatvar_funnnel_v4i64(<4 x i64> %x, <4 x i64> %amt) nounwind
|
||||||
; AVX1-NEXT: vpsrlq %xmm1, %xmm4, %xmm3
|
; AVX1-NEXT: vpsrlq %xmm1, %xmm4, %xmm3
|
||||||
; AVX1-NEXT: vpsrlq %xmm1, %xmm0, %xmm0
|
; AVX1-NEXT: vpsrlq %xmm1, %xmm0, %xmm0
|
||||||
; AVX1-NEXT: vinsertf128 $1, %xmm3, %ymm0, %ymm0
|
; AVX1-NEXT: vinsertf128 $1, %xmm3, %ymm0, %ymm0
|
||||||
; AVX1-NEXT: vorps %ymm0, %ymm2, %ymm0
|
; AVX1-NEXT: vorps %ymm2, %ymm0, %ymm0
|
||||||
; AVX1-NEXT: retq
|
; AVX1-NEXT: retq
|
||||||
;
|
;
|
||||||
; AVX2-LABEL: splatvar_funnnel_v4i64:
|
; AVX2-LABEL: splatvar_funnnel_v4i64:
|
||||||
|
@ -589,7 +589,7 @@ define <4 x i64> @splatvar_funnnel_v4i64(<4 x i64> %x, <4 x i64> %amt) nounwind
|
||||||
; AVX2-NEXT: vpsubq %xmm1, %xmm4, %xmm1
|
; AVX2-NEXT: vpsubq %xmm1, %xmm4, %xmm1
|
||||||
; AVX2-NEXT: vpand %xmm2, %xmm1, %xmm1
|
; AVX2-NEXT: vpand %xmm2, %xmm1, %xmm1
|
||||||
; AVX2-NEXT: vpsllq %xmm1, %ymm0, %ymm0
|
; AVX2-NEXT: vpsllq %xmm1, %ymm0, %ymm0
|
||||||
; AVX2-NEXT: vpor %ymm3, %ymm0, %ymm0
|
; AVX2-NEXT: vpor %ymm0, %ymm3, %ymm0
|
||||||
; AVX2-NEXT: retq
|
; AVX2-NEXT: retq
|
||||||
;
|
;
|
||||||
; AVX512F-LABEL: splatvar_funnnel_v4i64:
|
; AVX512F-LABEL: splatvar_funnnel_v4i64:
|
||||||
|
@ -622,9 +622,9 @@ define <4 x i64> @splatvar_funnnel_v4i64(<4 x i64> %x, <4 x i64> %amt) nounwind
|
||||||
;
|
;
|
||||||
; XOPAVX1-LABEL: splatvar_funnnel_v4i64:
|
; XOPAVX1-LABEL: splatvar_funnnel_v4i64:
|
||||||
; XOPAVX1: # %bb.0:
|
; XOPAVX1: # %bb.0:
|
||||||
|
; XOPAVX1-NEXT: vpshufd {{.*#+}} xmm1 = xmm1[0,1,0,1]
|
||||||
; XOPAVX1-NEXT: vpxor %xmm2, %xmm2, %xmm2
|
; XOPAVX1-NEXT: vpxor %xmm2, %xmm2, %xmm2
|
||||||
; XOPAVX1-NEXT: vpsubq %xmm1, %xmm2, %xmm1
|
; XOPAVX1-NEXT: vpsubq %xmm1, %xmm2, %xmm1
|
||||||
; XOPAVX1-NEXT: vpshufd {{.*#+}} xmm1 = xmm1[0,1,0,1]
|
|
||||||
; XOPAVX1-NEXT: vextractf128 $1, %ymm0, %xmm2
|
; XOPAVX1-NEXT: vextractf128 $1, %ymm0, %xmm2
|
||||||
; XOPAVX1-NEXT: vprotq %xmm1, %xmm2, %xmm2
|
; XOPAVX1-NEXT: vprotq %xmm1, %xmm2, %xmm2
|
||||||
; XOPAVX1-NEXT: vprotq %xmm1, %xmm0, %xmm0
|
; XOPAVX1-NEXT: vprotq %xmm1, %xmm0, %xmm0
|
||||||
|
@ -649,9 +649,9 @@ define <4 x i64> @splatvar_funnnel_v4i64(<4 x i64> %x, <4 x i64> %amt) nounwind
|
||||||
define <8 x i32> @splatvar_funnnel_v8i32(<8 x i32> %x, <8 x i32> %amt) nounwind {
|
define <8 x i32> @splatvar_funnnel_v8i32(<8 x i32> %x, <8 x i32> %amt) nounwind {
|
||||||
; AVX1-LABEL: splatvar_funnnel_v8i32:
|
; AVX1-LABEL: splatvar_funnnel_v8i32:
|
||||||
; AVX1: # %bb.0:
|
; AVX1: # %bb.0:
|
||||||
|
; AVX1-NEXT: vpshufd {{.*#+}} xmm1 = xmm1[0,0,0,0]
|
||||||
; AVX1-NEXT: vpxor %xmm2, %xmm2, %xmm2
|
; AVX1-NEXT: vpxor %xmm2, %xmm2, %xmm2
|
||||||
; AVX1-NEXT: vpsubd %xmm1, %xmm2, %xmm1
|
; AVX1-NEXT: vpsubd %xmm1, %xmm2, %xmm1
|
||||||
; AVX1-NEXT: vpshufd {{.*#+}} xmm1 = xmm1[0,0,0,0]
|
|
||||||
; AVX1-NEXT: vpand {{.*}}(%rip), %xmm1, %xmm1
|
; AVX1-NEXT: vpand {{.*}}(%rip), %xmm1, %xmm1
|
||||||
; AVX1-NEXT: vpmovzxdq {{.*#+}} xmm2 = xmm1[0],zero,xmm1[1],zero
|
; AVX1-NEXT: vpmovzxdq {{.*#+}} xmm2 = xmm1[0],zero,xmm1[1],zero
|
||||||
; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm3
|
; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm3
|
||||||
|
@ -669,9 +669,9 @@ define <8 x i32> @splatvar_funnnel_v8i32(<8 x i32> %x, <8 x i32> %amt) nounwind
|
||||||
;
|
;
|
||||||
; AVX2-LABEL: splatvar_funnnel_v8i32:
|
; AVX2-LABEL: splatvar_funnnel_v8i32:
|
||||||
; AVX2: # %bb.0:
|
; AVX2: # %bb.0:
|
||||||
|
; AVX2-NEXT: vpbroadcastd %xmm1, %xmm1
|
||||||
; AVX2-NEXT: vpxor %xmm2, %xmm2, %xmm2
|
; AVX2-NEXT: vpxor %xmm2, %xmm2, %xmm2
|
||||||
; AVX2-NEXT: vpsubd %xmm1, %xmm2, %xmm1
|
; AVX2-NEXT: vpsubd %xmm1, %xmm2, %xmm1
|
||||||
; AVX2-NEXT: vpbroadcastd %xmm1, %xmm1
|
|
||||||
; AVX2-NEXT: vpbroadcastd {{.*#+}} xmm2 = [31,31,31,31]
|
; AVX2-NEXT: vpbroadcastd {{.*#+}} xmm2 = [31,31,31,31]
|
||||||
; AVX2-NEXT: vpand %xmm2, %xmm1, %xmm1
|
; AVX2-NEXT: vpand %xmm2, %xmm1, %xmm1
|
||||||
; AVX2-NEXT: vpmovzxdq {{.*#+}} xmm2 = xmm1[0],zero,xmm1[1],zero
|
; AVX2-NEXT: vpmovzxdq {{.*#+}} xmm2 = xmm1[0],zero,xmm1[1],zero
|
||||||
|
@ -713,9 +713,9 @@ define <8 x i32> @splatvar_funnnel_v8i32(<8 x i32> %x, <8 x i32> %amt) nounwind
|
||||||
;
|
;
|
||||||
; XOPAVX1-LABEL: splatvar_funnnel_v8i32:
|
; XOPAVX1-LABEL: splatvar_funnnel_v8i32:
|
||||||
; XOPAVX1: # %bb.0:
|
; XOPAVX1: # %bb.0:
|
||||||
|
; XOPAVX1-NEXT: vpshufd {{.*#+}} xmm1 = xmm1[0,0,0,0]
|
||||||
; XOPAVX1-NEXT: vpxor %xmm2, %xmm2, %xmm2
|
; XOPAVX1-NEXT: vpxor %xmm2, %xmm2, %xmm2
|
||||||
; XOPAVX1-NEXT: vpsubd %xmm1, %xmm2, %xmm1
|
; XOPAVX1-NEXT: vpsubd %xmm1, %xmm2, %xmm1
|
||||||
; XOPAVX1-NEXT: vpshufd {{.*#+}} xmm1 = xmm1[0,0,0,0]
|
|
||||||
; XOPAVX1-NEXT: vextractf128 $1, %ymm0, %xmm2
|
; XOPAVX1-NEXT: vextractf128 $1, %ymm0, %xmm2
|
||||||
; XOPAVX1-NEXT: vprotd %xmm1, %xmm2, %xmm2
|
; XOPAVX1-NEXT: vprotd %xmm1, %xmm2, %xmm2
|
||||||
; XOPAVX1-NEXT: vprotd %xmm1, %xmm0, %xmm0
|
; XOPAVX1-NEXT: vprotd %xmm1, %xmm0, %xmm0
|
||||||
|
@ -740,10 +740,10 @@ define <8 x i32> @splatvar_funnnel_v8i32(<8 x i32> %x, <8 x i32> %amt) nounwind
|
||||||
define <16 x i16> @splatvar_funnnel_v16i16(<16 x i16> %x, <16 x i16> %amt) nounwind {
|
define <16 x i16> @splatvar_funnnel_v16i16(<16 x i16> %x, <16 x i16> %amt) nounwind {
|
||||||
; AVX1-LABEL: splatvar_funnnel_v16i16:
|
; AVX1-LABEL: splatvar_funnnel_v16i16:
|
||||||
; AVX1: # %bb.0:
|
; AVX1: # %bb.0:
|
||||||
; AVX1-NEXT: vpxor %xmm2, %xmm2, %xmm2
|
|
||||||
; AVX1-NEXT: vpsubw %xmm1, %xmm2, %xmm1
|
|
||||||
; AVX1-NEXT: vpshuflw {{.*#+}} xmm1 = xmm1[0,0,0,0,4,5,6,7]
|
; AVX1-NEXT: vpshuflw {{.*#+}} xmm1 = xmm1[0,0,0,0,4,5,6,7]
|
||||||
; AVX1-NEXT: vpshufd {{.*#+}} xmm1 = xmm1[0,0,0,0]
|
; AVX1-NEXT: vpshufd {{.*#+}} xmm1 = xmm1[0,0,0,0]
|
||||||
|
; AVX1-NEXT: vpxor %xmm2, %xmm2, %xmm2
|
||||||
|
; AVX1-NEXT: vpsubw %xmm1, %xmm2, %xmm1
|
||||||
; AVX1-NEXT: vpand {{.*}}(%rip), %xmm1, %xmm1
|
; AVX1-NEXT: vpand {{.*}}(%rip), %xmm1, %xmm1
|
||||||
; AVX1-NEXT: vpmovzxwq {{.*#+}} xmm2 = xmm1[0],zero,zero,zero,xmm1[1],zero,zero,zero
|
; AVX1-NEXT: vpmovzxwq {{.*#+}} xmm2 = xmm1[0],zero,zero,zero,xmm1[1],zero,zero,zero
|
||||||
; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm3
|
; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm3
|
||||||
|
@ -761,9 +761,9 @@ define <16 x i16> @splatvar_funnnel_v16i16(<16 x i16> %x, <16 x i16> %amt) nounw
|
||||||
;
|
;
|
||||||
; AVX2-LABEL: splatvar_funnnel_v16i16:
|
; AVX2-LABEL: splatvar_funnnel_v16i16:
|
||||||
; AVX2: # %bb.0:
|
; AVX2: # %bb.0:
|
||||||
|
; AVX2-NEXT: vpbroadcastw %xmm1, %xmm1
|
||||||
; AVX2-NEXT: vpxor %xmm2, %xmm2, %xmm2
|
; AVX2-NEXT: vpxor %xmm2, %xmm2, %xmm2
|
||||||
; AVX2-NEXT: vpsubw %xmm1, %xmm2, %xmm1
|
; AVX2-NEXT: vpsubw %xmm1, %xmm2, %xmm1
|
||||||
; AVX2-NEXT: vpbroadcastw %xmm1, %xmm1
|
|
||||||
; AVX2-NEXT: vpand {{.*}}(%rip), %xmm1, %xmm1
|
; AVX2-NEXT: vpand {{.*}}(%rip), %xmm1, %xmm1
|
||||||
; AVX2-NEXT: vpmovzxwq {{.*#+}} xmm2 = xmm1[0],zero,zero,zero,xmm1[1],zero,zero,zero
|
; AVX2-NEXT: vpmovzxwq {{.*#+}} xmm2 = xmm1[0],zero,zero,zero,xmm1[1],zero,zero,zero
|
||||||
; AVX2-NEXT: vpsllw %xmm2, %ymm0, %ymm2
|
; AVX2-NEXT: vpsllw %xmm2, %ymm0, %ymm2
|
||||||
|
@ -776,9 +776,9 @@ define <16 x i16> @splatvar_funnnel_v16i16(<16 x i16> %x, <16 x i16> %amt) nounw
|
||||||
;
|
;
|
||||||
; AVX512-LABEL: splatvar_funnnel_v16i16:
|
; AVX512-LABEL: splatvar_funnnel_v16i16:
|
||||||
; AVX512: # %bb.0:
|
; AVX512: # %bb.0:
|
||||||
|
; AVX512-NEXT: vpbroadcastw %xmm1, %xmm1
|
||||||
; AVX512-NEXT: vpxor %xmm2, %xmm2, %xmm2
|
; AVX512-NEXT: vpxor %xmm2, %xmm2, %xmm2
|
||||||
; AVX512-NEXT: vpsubw %xmm1, %xmm2, %xmm1
|
; AVX512-NEXT: vpsubw %xmm1, %xmm2, %xmm1
|
||||||
; AVX512-NEXT: vpbroadcastw %xmm1, %xmm1
|
|
||||||
; AVX512-NEXT: vpand {{.*}}(%rip), %xmm1, %xmm1
|
; AVX512-NEXT: vpand {{.*}}(%rip), %xmm1, %xmm1
|
||||||
; AVX512-NEXT: vpmovzxwq {{.*#+}} xmm2 = xmm1[0],zero,zero,zero,xmm1[1],zero,zero,zero
|
; AVX512-NEXT: vpmovzxwq {{.*#+}} xmm2 = xmm1[0],zero,zero,zero,xmm1[1],zero,zero,zero
|
||||||
; AVX512-NEXT: vpsllw %xmm2, %ymm0, %ymm2
|
; AVX512-NEXT: vpsllw %xmm2, %ymm0, %ymm2
|
||||||
|
@ -791,10 +791,10 @@ define <16 x i16> @splatvar_funnnel_v16i16(<16 x i16> %x, <16 x i16> %amt) nounw
|
||||||
;
|
;
|
||||||
; XOPAVX1-LABEL: splatvar_funnnel_v16i16:
|
; XOPAVX1-LABEL: splatvar_funnnel_v16i16:
|
||||||
; XOPAVX1: # %bb.0:
|
; XOPAVX1: # %bb.0:
|
||||||
; XOPAVX1-NEXT: vpxor %xmm2, %xmm2, %xmm2
|
|
||||||
; XOPAVX1-NEXT: vpsubw %xmm1, %xmm2, %xmm1
|
|
||||||
; XOPAVX1-NEXT: vpshuflw {{.*#+}} xmm1 = xmm1[0,0,0,0,4,5,6,7]
|
; XOPAVX1-NEXT: vpshuflw {{.*#+}} xmm1 = xmm1[0,0,0,0,4,5,6,7]
|
||||||
; XOPAVX1-NEXT: vpshufd {{.*#+}} xmm1 = xmm1[0,0,0,0]
|
; XOPAVX1-NEXT: vpshufd {{.*#+}} xmm1 = xmm1[0,0,0,0]
|
||||||
|
; XOPAVX1-NEXT: vpxor %xmm2, %xmm2, %xmm2
|
||||||
|
; XOPAVX1-NEXT: vpsubw %xmm1, %xmm2, %xmm1
|
||||||
; XOPAVX1-NEXT: vextractf128 $1, %ymm0, %xmm2
|
; XOPAVX1-NEXT: vextractf128 $1, %ymm0, %xmm2
|
||||||
; XOPAVX1-NEXT: vprotw %xmm1, %xmm2, %xmm2
|
; XOPAVX1-NEXT: vprotw %xmm1, %xmm2, %xmm2
|
||||||
; XOPAVX1-NEXT: vprotw %xmm1, %xmm0, %xmm0
|
; XOPAVX1-NEXT: vprotw %xmm1, %xmm0, %xmm0
|
||||||
|
@ -820,8 +820,8 @@ define <32 x i8> @splatvar_funnnel_v32i8(<32 x i8> %x, <32 x i8> %amt) nounwind
|
||||||
; AVX1-LABEL: splatvar_funnnel_v32i8:
|
; AVX1-LABEL: splatvar_funnnel_v32i8:
|
||||||
; AVX1: # %bb.0:
|
; AVX1: # %bb.0:
|
||||||
; AVX1-NEXT: vpxor %xmm2, %xmm2, %xmm2
|
; AVX1-NEXT: vpxor %xmm2, %xmm2, %xmm2
|
||||||
; AVX1-NEXT: vpsubb %xmm1, %xmm2, %xmm1
|
|
||||||
; AVX1-NEXT: vpshufb %xmm2, %xmm1, %xmm1
|
; AVX1-NEXT: vpshufb %xmm2, %xmm1, %xmm1
|
||||||
|
; AVX1-NEXT: vpsubb %xmm1, %xmm2, %xmm1
|
||||||
; AVX1-NEXT: vpand {{.*}}(%rip), %xmm1, %xmm1
|
; AVX1-NEXT: vpand {{.*}}(%rip), %xmm1, %xmm1
|
||||||
; AVX1-NEXT: vpmovzxbq {{.*#+}} xmm3 = xmm1[0],zero,zero,zero,zero,zero,zero,zero,xmm1[1],zero,zero,zero,zero,zero,zero,zero
|
; AVX1-NEXT: vpmovzxbq {{.*#+}} xmm3 = xmm1[0],zero,zero,zero,zero,zero,zero,zero,xmm1[1],zero,zero,zero,zero,zero,zero,zero
|
||||||
; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm4
|
; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm4
|
||||||
|
@ -848,9 +848,9 @@ define <32 x i8> @splatvar_funnnel_v32i8(<32 x i8> %x, <32 x i8> %amt) nounwind
|
||||||
;
|
;
|
||||||
; AVX2-LABEL: splatvar_funnnel_v32i8:
|
; AVX2-LABEL: splatvar_funnnel_v32i8:
|
||||||
; AVX2: # %bb.0:
|
; AVX2: # %bb.0:
|
||||||
|
; AVX2-NEXT: vpbroadcastb %xmm1, %xmm1
|
||||||
; AVX2-NEXT: vpxor %xmm2, %xmm2, %xmm2
|
; AVX2-NEXT: vpxor %xmm2, %xmm2, %xmm2
|
||||||
; AVX2-NEXT: vpsubb %xmm1, %xmm2, %xmm1
|
; AVX2-NEXT: vpsubb %xmm1, %xmm2, %xmm1
|
||||||
; AVX2-NEXT: vpbroadcastb %xmm1, %xmm1
|
|
||||||
; AVX2-NEXT: vpand {{.*}}(%rip), %xmm1, %xmm1
|
; AVX2-NEXT: vpand {{.*}}(%rip), %xmm1, %xmm1
|
||||||
; AVX2-NEXT: vpmovzxbq {{.*#+}} xmm2 = xmm1[0],zero,zero,zero,zero,zero,zero,zero,xmm1[1],zero,zero,zero,zero,zero,zero,zero
|
; AVX2-NEXT: vpmovzxbq {{.*#+}} xmm2 = xmm1[0],zero,zero,zero,zero,zero,zero,zero,xmm1[1],zero,zero,zero,zero,zero,zero,zero
|
||||||
; AVX2-NEXT: vpsllw %xmm2, %ymm0, %ymm3
|
; AVX2-NEXT: vpsllw %xmm2, %ymm0, %ymm3
|
||||||
|
@ -871,9 +871,9 @@ define <32 x i8> @splatvar_funnnel_v32i8(<32 x i8> %x, <32 x i8> %amt) nounwind
|
||||||
;
|
;
|
||||||
; AVX512F-LABEL: splatvar_funnnel_v32i8:
|
; AVX512F-LABEL: splatvar_funnnel_v32i8:
|
||||||
; AVX512F: # %bb.0:
|
; AVX512F: # %bb.0:
|
||||||
|
; AVX512F-NEXT: vpbroadcastb %xmm1, %xmm1
|
||||||
; AVX512F-NEXT: vpxor %xmm2, %xmm2, %xmm2
|
; AVX512F-NEXT: vpxor %xmm2, %xmm2, %xmm2
|
||||||
; AVX512F-NEXT: vpsubb %xmm1, %xmm2, %xmm1
|
; AVX512F-NEXT: vpsubb %xmm1, %xmm2, %xmm1
|
||||||
; AVX512F-NEXT: vpbroadcastb %xmm1, %xmm1
|
|
||||||
; AVX512F-NEXT: vpand {{.*}}(%rip), %xmm1, %xmm1
|
; AVX512F-NEXT: vpand {{.*}}(%rip), %xmm1, %xmm1
|
||||||
; AVX512F-NEXT: vpmovzxbq {{.*#+}} xmm2 = xmm1[0],zero,zero,zero,zero,zero,zero,zero,xmm1[1],zero,zero,zero,zero,zero,zero,zero
|
; AVX512F-NEXT: vpmovzxbq {{.*#+}} xmm2 = xmm1[0],zero,zero,zero,zero,zero,zero,zero,xmm1[1],zero,zero,zero,zero,zero,zero,zero
|
||||||
; AVX512F-NEXT: vpsllw %xmm2, %ymm0, %ymm3
|
; AVX512F-NEXT: vpsllw %xmm2, %ymm0, %ymm3
|
||||||
|
@ -894,9 +894,9 @@ define <32 x i8> @splatvar_funnnel_v32i8(<32 x i8> %x, <32 x i8> %amt) nounwind
|
||||||
;
|
;
|
||||||
; AVX512VL-LABEL: splatvar_funnnel_v32i8:
|
; AVX512VL-LABEL: splatvar_funnnel_v32i8:
|
||||||
; AVX512VL: # %bb.0:
|
; AVX512VL: # %bb.0:
|
||||||
|
; AVX512VL-NEXT: vpbroadcastb %xmm1, %xmm1
|
||||||
; AVX512VL-NEXT: vpxor %xmm2, %xmm2, %xmm2
|
; AVX512VL-NEXT: vpxor %xmm2, %xmm2, %xmm2
|
||||||
; AVX512VL-NEXT: vpsubb %xmm1, %xmm2, %xmm1
|
; AVX512VL-NEXT: vpsubb %xmm1, %xmm2, %xmm1
|
||||||
; AVX512VL-NEXT: vpbroadcastb %xmm1, %xmm1
|
|
||||||
; AVX512VL-NEXT: vpand {{.*}}(%rip), %xmm1, %xmm1
|
; AVX512VL-NEXT: vpand {{.*}}(%rip), %xmm1, %xmm1
|
||||||
; AVX512VL-NEXT: vpmovzxbq {{.*#+}} xmm2 = xmm1[0],zero,zero,zero,zero,zero,zero,zero,xmm1[1],zero,zero,zero,zero,zero,zero,zero
|
; AVX512VL-NEXT: vpmovzxbq {{.*#+}} xmm2 = xmm1[0],zero,zero,zero,zero,zero,zero,zero,xmm1[1],zero,zero,zero,zero,zero,zero,zero
|
||||||
; AVX512VL-NEXT: vpsllw %xmm2, %ymm0, %ymm3
|
; AVX512VL-NEXT: vpsllw %xmm2, %ymm0, %ymm3
|
||||||
|
@ -927,7 +927,7 @@ define <32 x i8> @splatvar_funnnel_v32i8(<32 x i8> %x, <32 x i8> %amt) nounwind
|
||||||
; AVX512BW-NEXT: vpand %xmm2, %xmm1, %xmm1
|
; AVX512BW-NEXT: vpand %xmm2, %xmm1, %xmm1
|
||||||
; AVX512BW-NEXT: vpmovzxbq {{.*#+}} xmm1 = xmm1[0],zero,zero,zero,zero,zero,zero,zero,xmm1[1],zero,zero,zero,zero,zero,zero,zero
|
; AVX512BW-NEXT: vpmovzxbq {{.*#+}} xmm1 = xmm1[0],zero,zero,zero,zero,zero,zero,zero,xmm1[1],zero,zero,zero,zero,zero,zero,zero
|
||||||
; AVX512BW-NEXT: vpsllw %xmm1, %zmm0, %zmm0
|
; AVX512BW-NEXT: vpsllw %xmm1, %zmm0, %zmm0
|
||||||
; AVX512BW-NEXT: vporq %zmm3, %zmm0, %zmm0
|
; AVX512BW-NEXT: vporq %zmm0, %zmm3, %zmm0
|
||||||
; AVX512BW-NEXT: vpmovwb %zmm0, %ymm0
|
; AVX512BW-NEXT: vpmovwb %zmm0, %ymm0
|
||||||
; AVX512BW-NEXT: retq
|
; AVX512BW-NEXT: retq
|
||||||
;
|
;
|
||||||
|
@ -944,15 +944,15 @@ define <32 x i8> @splatvar_funnnel_v32i8(<32 x i8> %x, <32 x i8> %amt) nounwind
|
||||||
; AVX512VLBW-NEXT: vpand %xmm2, %xmm1, %xmm1
|
; AVX512VLBW-NEXT: vpand %xmm2, %xmm1, %xmm1
|
||||||
; AVX512VLBW-NEXT: vpmovzxbq {{.*#+}} xmm1 = xmm1[0],zero,zero,zero,zero,zero,zero,zero,xmm1[1],zero,zero,zero,zero,zero,zero,zero
|
; AVX512VLBW-NEXT: vpmovzxbq {{.*#+}} xmm1 = xmm1[0],zero,zero,zero,zero,zero,zero,zero,xmm1[1],zero,zero,zero,zero,zero,zero,zero
|
||||||
; AVX512VLBW-NEXT: vpsllw %xmm1, %zmm0, %zmm0
|
; AVX512VLBW-NEXT: vpsllw %xmm1, %zmm0, %zmm0
|
||||||
; AVX512VLBW-NEXT: vporq %zmm3, %zmm0, %zmm0
|
; AVX512VLBW-NEXT: vporq %zmm0, %zmm3, %zmm0
|
||||||
; AVX512VLBW-NEXT: vpmovwb %zmm0, %ymm0
|
; AVX512VLBW-NEXT: vpmovwb %zmm0, %ymm0
|
||||||
; AVX512VLBW-NEXT: retq
|
; AVX512VLBW-NEXT: retq
|
||||||
;
|
;
|
||||||
; XOPAVX1-LABEL: splatvar_funnnel_v32i8:
|
; XOPAVX1-LABEL: splatvar_funnnel_v32i8:
|
||||||
; XOPAVX1: # %bb.0:
|
; XOPAVX1: # %bb.0:
|
||||||
; XOPAVX1-NEXT: vpxor %xmm2, %xmm2, %xmm2
|
; XOPAVX1-NEXT: vpxor %xmm2, %xmm2, %xmm2
|
||||||
; XOPAVX1-NEXT: vpsubb %xmm1, %xmm2, %xmm1
|
|
||||||
; XOPAVX1-NEXT: vpshufb %xmm2, %xmm1, %xmm1
|
; XOPAVX1-NEXT: vpshufb %xmm2, %xmm1, %xmm1
|
||||||
|
; XOPAVX1-NEXT: vpsubb %xmm1, %xmm2, %xmm1
|
||||||
; XOPAVX1-NEXT: vextractf128 $1, %ymm0, %xmm2
|
; XOPAVX1-NEXT: vextractf128 $1, %ymm0, %xmm2
|
||||||
; XOPAVX1-NEXT: vprotb %xmm1, %xmm2, %xmm2
|
; XOPAVX1-NEXT: vprotb %xmm1, %xmm2, %xmm2
|
||||||
; XOPAVX1-NEXT: vprotb %xmm1, %xmm0, %xmm0
|
; XOPAVX1-NEXT: vprotb %xmm1, %xmm0, %xmm0
|
||||||
|
@ -982,18 +982,18 @@ define <4 x i64> @constant_funnnel_v4i64(<4 x i64> %x) nounwind {
|
||||||
; AVX1-LABEL: constant_funnnel_v4i64:
|
; AVX1-LABEL: constant_funnnel_v4i64:
|
||||||
; AVX1: # %bb.0:
|
; AVX1: # %bb.0:
|
||||||
; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm1
|
; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm1
|
||||||
; AVX1-NEXT: vpsrlq $60, %xmm1, %xmm2
|
; AVX1-NEXT: vpsllq $4, %xmm1, %xmm2
|
||||||
; AVX1-NEXT: vpsrlq $50, %xmm1, %xmm3
|
; AVX1-NEXT: vpsllq $14, %xmm1, %xmm3
|
||||||
; AVX1-NEXT: vpblendw {{.*#+}} xmm2 = xmm3[0,1,2,3],xmm2[4,5,6,7]
|
; AVX1-NEXT: vpblendw {{.*#+}} xmm2 = xmm3[0,1,2,3],xmm2[4,5,6,7]
|
||||||
; AVX1-NEXT: vpsrlq $14, %xmm0, %xmm3
|
; AVX1-NEXT: vpsllq $50, %xmm0, %xmm3
|
||||||
; AVX1-NEXT: vpsrlq $4, %xmm0, %xmm4
|
; AVX1-NEXT: vpsllq $60, %xmm0, %xmm4
|
||||||
; AVX1-NEXT: vpblendw {{.*#+}} xmm3 = xmm4[0,1,2,3],xmm3[4,5,6,7]
|
; AVX1-NEXT: vpblendw {{.*#+}} xmm3 = xmm4[0,1,2,3],xmm3[4,5,6,7]
|
||||||
; AVX1-NEXT: vinsertf128 $1, %xmm2, %ymm3, %ymm2
|
; AVX1-NEXT: vinsertf128 $1, %xmm2, %ymm3, %ymm2
|
||||||
; AVX1-NEXT: vpsllq $4, %xmm1, %xmm3
|
; AVX1-NEXT: vpsrlq $60, %xmm1, %xmm3
|
||||||
; AVX1-NEXT: vpsllq $14, %xmm1, %xmm1
|
; AVX1-NEXT: vpsrlq $50, %xmm1, %xmm1
|
||||||
; AVX1-NEXT: vpblendw {{.*#+}} xmm1 = xmm1[0,1,2,3],xmm3[4,5,6,7]
|
; AVX1-NEXT: vpblendw {{.*#+}} xmm1 = xmm1[0,1,2,3],xmm3[4,5,6,7]
|
||||||
; AVX1-NEXT: vpsllq $50, %xmm0, %xmm3
|
; AVX1-NEXT: vpsrlq $14, %xmm0, %xmm3
|
||||||
; AVX1-NEXT: vpsllq $60, %xmm0, %xmm0
|
; AVX1-NEXT: vpsrlq $4, %xmm0, %xmm0
|
||||||
; AVX1-NEXT: vpblendw {{.*#+}} xmm0 = xmm0[0,1,2,3],xmm3[4,5,6,7]
|
; AVX1-NEXT: vpblendw {{.*#+}} xmm0 = xmm0[0,1,2,3],xmm3[4,5,6,7]
|
||||||
; AVX1-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0
|
; AVX1-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0
|
||||||
; AVX1-NEXT: vorps %ymm2, %ymm0, %ymm0
|
; AVX1-NEXT: vorps %ymm2, %ymm0, %ymm0
|
||||||
|
@ -1001,8 +1001,8 @@ define <4 x i64> @constant_funnnel_v4i64(<4 x i64> %x) nounwind {
|
||||||
;
|
;
|
||||||
; AVX2-LABEL: constant_funnnel_v4i64:
|
; AVX2-LABEL: constant_funnnel_v4i64:
|
||||||
; AVX2: # %bb.0:
|
; AVX2: # %bb.0:
|
||||||
; AVX2-NEXT: vpsrlvq {{.*}}(%rip), %ymm0, %ymm1
|
; AVX2-NEXT: vpsllvq {{.*}}(%rip), %ymm0, %ymm1
|
||||||
; AVX2-NEXT: vpsllvq {{.*}}(%rip), %ymm0, %ymm0
|
; AVX2-NEXT: vpsrlvq {{.*}}(%rip), %ymm0, %ymm0
|
||||||
; AVX2-NEXT: vpor %ymm1, %ymm0, %ymm0
|
; AVX2-NEXT: vpor %ymm1, %ymm0, %ymm0
|
||||||
; AVX2-NEXT: retq
|
; AVX2-NEXT: retq
|
||||||
;
|
;
|
||||||
|
@ -1324,8 +1324,8 @@ define <32 x i8> @constant_funnnel_v32i8(<32 x i8> %x) nounwind {
|
||||||
; AVX512BW-LABEL: constant_funnnel_v32i8:
|
; AVX512BW-LABEL: constant_funnnel_v32i8:
|
||||||
; AVX512BW: # %bb.0:
|
; AVX512BW: # %bb.0:
|
||||||
; AVX512BW-NEXT: vpmovzxbw {{.*#+}} zmm0 = ymm0[0],zero,ymm0[1],zero,ymm0[2],zero,ymm0[3],zero,ymm0[4],zero,ymm0[5],zero,ymm0[6],zero,ymm0[7],zero,ymm0[8],zero,ymm0[9],zero,ymm0[10],zero,ymm0[11],zero,ymm0[12],zero,ymm0[13],zero,ymm0[14],zero,ymm0[15],zero,ymm0[16],zero,ymm0[17],zero,ymm0[18],zero,ymm0[19],zero,ymm0[20],zero,ymm0[21],zero,ymm0[22],zero,ymm0[23],zero,ymm0[24],zero,ymm0[25],zero,ymm0[26],zero,ymm0[27],zero,ymm0[28],zero,ymm0[29],zero,ymm0[30],zero,ymm0[31],zero
|
; AVX512BW-NEXT: vpmovzxbw {{.*#+}} zmm0 = ymm0[0],zero,ymm0[1],zero,ymm0[2],zero,ymm0[3],zero,ymm0[4],zero,ymm0[5],zero,ymm0[6],zero,ymm0[7],zero,ymm0[8],zero,ymm0[9],zero,ymm0[10],zero,ymm0[11],zero,ymm0[12],zero,ymm0[13],zero,ymm0[14],zero,ymm0[15],zero,ymm0[16],zero,ymm0[17],zero,ymm0[18],zero,ymm0[19],zero,ymm0[20],zero,ymm0[21],zero,ymm0[22],zero,ymm0[23],zero,ymm0[24],zero,ymm0[25],zero,ymm0[26],zero,ymm0[27],zero,ymm0[28],zero,ymm0[29],zero,ymm0[30],zero,ymm0[31],zero
|
||||||
; AVX512BW-NEXT: vpsrlvw {{.*}}(%rip), %zmm0, %zmm1
|
; AVX512BW-NEXT: vpsllvw {{.*}}(%rip), %zmm0, %zmm1
|
||||||
; AVX512BW-NEXT: vpsllvw {{.*}}(%rip), %zmm0, %zmm0
|
; AVX512BW-NEXT: vpsrlvw {{.*}}(%rip), %zmm0, %zmm0
|
||||||
; AVX512BW-NEXT: vporq %zmm1, %zmm0, %zmm0
|
; AVX512BW-NEXT: vporq %zmm1, %zmm0, %zmm0
|
||||||
; AVX512BW-NEXT: vpmovwb %zmm0, %ymm0
|
; AVX512BW-NEXT: vpmovwb %zmm0, %ymm0
|
||||||
; AVX512BW-NEXT: retq
|
; AVX512BW-NEXT: retq
|
||||||
|
@ -1333,8 +1333,8 @@ define <32 x i8> @constant_funnnel_v32i8(<32 x i8> %x) nounwind {
|
||||||
; AVX512VLBW-LABEL: constant_funnnel_v32i8:
|
; AVX512VLBW-LABEL: constant_funnnel_v32i8:
|
||||||
; AVX512VLBW: # %bb.0:
|
; AVX512VLBW: # %bb.0:
|
||||||
; AVX512VLBW-NEXT: vpmovzxbw {{.*#+}} zmm0 = ymm0[0],zero,ymm0[1],zero,ymm0[2],zero,ymm0[3],zero,ymm0[4],zero,ymm0[5],zero,ymm0[6],zero,ymm0[7],zero,ymm0[8],zero,ymm0[9],zero,ymm0[10],zero,ymm0[11],zero,ymm0[12],zero,ymm0[13],zero,ymm0[14],zero,ymm0[15],zero,ymm0[16],zero,ymm0[17],zero,ymm0[18],zero,ymm0[19],zero,ymm0[20],zero,ymm0[21],zero,ymm0[22],zero,ymm0[23],zero,ymm0[24],zero,ymm0[25],zero,ymm0[26],zero,ymm0[27],zero,ymm0[28],zero,ymm0[29],zero,ymm0[30],zero,ymm0[31],zero
|
; AVX512VLBW-NEXT: vpmovzxbw {{.*#+}} zmm0 = ymm0[0],zero,ymm0[1],zero,ymm0[2],zero,ymm0[3],zero,ymm0[4],zero,ymm0[5],zero,ymm0[6],zero,ymm0[7],zero,ymm0[8],zero,ymm0[9],zero,ymm0[10],zero,ymm0[11],zero,ymm0[12],zero,ymm0[13],zero,ymm0[14],zero,ymm0[15],zero,ymm0[16],zero,ymm0[17],zero,ymm0[18],zero,ymm0[19],zero,ymm0[20],zero,ymm0[21],zero,ymm0[22],zero,ymm0[23],zero,ymm0[24],zero,ymm0[25],zero,ymm0[26],zero,ymm0[27],zero,ymm0[28],zero,ymm0[29],zero,ymm0[30],zero,ymm0[31],zero
|
||||||
; AVX512VLBW-NEXT: vpsrlvw {{.*}}(%rip), %zmm0, %zmm1
|
; AVX512VLBW-NEXT: vpsllvw {{.*}}(%rip), %zmm0, %zmm1
|
||||||
; AVX512VLBW-NEXT: vpsllvw {{.*}}(%rip), %zmm0, %zmm0
|
; AVX512VLBW-NEXT: vpsrlvw {{.*}}(%rip), %zmm0, %zmm0
|
||||||
; AVX512VLBW-NEXT: vporq %zmm1, %zmm0, %zmm0
|
; AVX512VLBW-NEXT: vporq %zmm1, %zmm0, %zmm0
|
||||||
; AVX512VLBW-NEXT: vpmovwb %zmm0, %ymm0
|
; AVX512VLBW-NEXT: vpmovwb %zmm0, %ymm0
|
||||||
; AVX512VLBW-NEXT: retq
|
; AVX512VLBW-NEXT: retq
|
||||||
|
@ -1367,20 +1367,20 @@ define <32 x i8> @constant_funnnel_v32i8(<32 x i8> %x) nounwind {
|
||||||
define <4 x i64> @splatconstant_funnnel_v4i64(<4 x i64> %x) nounwind {
|
define <4 x i64> @splatconstant_funnnel_v4i64(<4 x i64> %x) nounwind {
|
||||||
; AVX1-LABEL: splatconstant_funnnel_v4i64:
|
; AVX1-LABEL: splatconstant_funnnel_v4i64:
|
||||||
; AVX1: # %bb.0:
|
; AVX1: # %bb.0:
|
||||||
; AVX1-NEXT: vpsrlq $14, %xmm0, %xmm1
|
; AVX1-NEXT: vpsllq $50, %xmm0, %xmm1
|
||||||
; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm2
|
; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm2
|
||||||
; AVX1-NEXT: vpsrlq $14, %xmm2, %xmm3
|
; AVX1-NEXT: vpsllq $50, %xmm2, %xmm3
|
||||||
; AVX1-NEXT: vinsertf128 $1, %xmm3, %ymm1, %ymm1
|
; AVX1-NEXT: vinsertf128 $1, %xmm3, %ymm1, %ymm1
|
||||||
; AVX1-NEXT: vpsllq $50, %xmm0, %xmm0
|
; AVX1-NEXT: vpsrlq $14, %xmm0, %xmm0
|
||||||
; AVX1-NEXT: vpsllq $50, %xmm2, %xmm2
|
; AVX1-NEXT: vpsrlq $14, %xmm2, %xmm2
|
||||||
; AVX1-NEXT: vinsertf128 $1, %xmm2, %ymm0, %ymm0
|
; AVX1-NEXT: vinsertf128 $1, %xmm2, %ymm0, %ymm0
|
||||||
; AVX1-NEXT: vorps %ymm1, %ymm0, %ymm0
|
; AVX1-NEXT: vorps %ymm1, %ymm0, %ymm0
|
||||||
; AVX1-NEXT: retq
|
; AVX1-NEXT: retq
|
||||||
;
|
;
|
||||||
; AVX2-LABEL: splatconstant_funnnel_v4i64:
|
; AVX2-LABEL: splatconstant_funnnel_v4i64:
|
||||||
; AVX2: # %bb.0:
|
; AVX2: # %bb.0:
|
||||||
; AVX2-NEXT: vpsrlq $14, %ymm0, %ymm1
|
; AVX2-NEXT: vpsllq $50, %ymm0, %ymm1
|
||||||
; AVX2-NEXT: vpsllq $50, %ymm0, %ymm0
|
; AVX2-NEXT: vpsrlq $14, %ymm0, %ymm0
|
||||||
; AVX2-NEXT: vpor %ymm1, %ymm0, %ymm0
|
; AVX2-NEXT: vpor %ymm1, %ymm0, %ymm0
|
||||||
; AVX2-NEXT: retq
|
; AVX2-NEXT: retq
|
||||||
;
|
;
|
||||||
|
@ -1581,17 +1581,17 @@ define <32 x i8> @splatconstant_funnnel_v32i8(<32 x i8> %x) nounwind {
|
||||||
;
|
;
|
||||||
; AVX512BW-LABEL: splatconstant_funnnel_v32i8:
|
; AVX512BW-LABEL: splatconstant_funnnel_v32i8:
|
||||||
; AVX512BW: # %bb.0:
|
; AVX512BW: # %bb.0:
|
||||||
; AVX512BW-NEXT: vpsrlw $4, %ymm0, %ymm1
|
; AVX512BW-NEXT: vpsllw $4, %ymm0, %ymm1
|
||||||
; AVX512BW-NEXT: vpand {{.*}}(%rip), %ymm1, %ymm1
|
; AVX512BW-NEXT: vpand {{.*}}(%rip), %ymm1, %ymm1
|
||||||
; AVX512BW-NEXT: vpsllw $4, %ymm0, %ymm0
|
; AVX512BW-NEXT: vpsrlw $4, %ymm0, %ymm0
|
||||||
; AVX512BW-NEXT: vpand {{.*}}(%rip), %ymm0, %ymm0
|
; AVX512BW-NEXT: vpand {{.*}}(%rip), %ymm0, %ymm0
|
||||||
; AVX512BW-NEXT: vpor %ymm1, %ymm0, %ymm0
|
; AVX512BW-NEXT: vpor %ymm1, %ymm0, %ymm0
|
||||||
; AVX512BW-NEXT: retq
|
; AVX512BW-NEXT: retq
|
||||||
;
|
;
|
||||||
; AVX512VLBW-LABEL: splatconstant_funnnel_v32i8:
|
; AVX512VLBW-LABEL: splatconstant_funnnel_v32i8:
|
||||||
; AVX512VLBW: # %bb.0:
|
; AVX512VLBW: # %bb.0:
|
||||||
; AVX512VLBW-NEXT: vpsllw $4, %ymm0, %ymm1
|
; AVX512VLBW-NEXT: vpsrlw $4, %ymm0, %ymm1
|
||||||
; AVX512VLBW-NEXT: vpsrlw $4, %ymm0, %ymm0
|
; AVX512VLBW-NEXT: vpsllw $4, %ymm0, %ymm0
|
||||||
; AVX512VLBW-NEXT: vpternlogq $216, {{.*}}(%rip), %ymm1, %ymm0
|
; AVX512VLBW-NEXT: vpternlogq $216, {{.*}}(%rip), %ymm1, %ymm0
|
||||||
; AVX512VLBW-NEXT: retq
|
; AVX512VLBW-NEXT: retq
|
||||||
;
|
;
|
||||||
|
|
|
@ -60,7 +60,7 @@ define <32 x i16> @var_funnnel_v32i16(<32 x i16> %x, <32 x i16> %amt) nounwind {
|
||||||
; AVX512F-NEXT: vpsllvd %zmm1, %zmm0, %zmm0
|
; AVX512F-NEXT: vpsllvd %zmm1, %zmm0, %zmm0
|
||||||
; AVX512F-NEXT: vpmovdw %zmm0, %ymm0
|
; AVX512F-NEXT: vpmovdw %zmm0, %ymm0
|
||||||
; AVX512F-NEXT: vinserti64x4 $1, %ymm0, %zmm2, %zmm0
|
; AVX512F-NEXT: vinserti64x4 $1, %ymm0, %zmm2, %zmm0
|
||||||
; AVX512F-NEXT: vporq %zmm3, %zmm0, %zmm0
|
; AVX512F-NEXT: vporq %zmm0, %zmm3, %zmm0
|
||||||
; AVX512F-NEXT: retq
|
; AVX512F-NEXT: retq
|
||||||
;
|
;
|
||||||
; AVX512VL-LABEL: var_funnnel_v32i16:
|
; AVX512VL-LABEL: var_funnnel_v32i16:
|
||||||
|
@ -91,7 +91,7 @@ define <32 x i16> @var_funnnel_v32i16(<32 x i16> %x, <32 x i16> %amt) nounwind {
|
||||||
; AVX512VL-NEXT: vpsllvd %zmm1, %zmm0, %zmm0
|
; AVX512VL-NEXT: vpsllvd %zmm1, %zmm0, %zmm0
|
||||||
; AVX512VL-NEXT: vpmovdw %zmm0, %ymm0
|
; AVX512VL-NEXT: vpmovdw %zmm0, %ymm0
|
||||||
; AVX512VL-NEXT: vinserti64x4 $1, %ymm0, %zmm2, %zmm0
|
; AVX512VL-NEXT: vinserti64x4 $1, %ymm0, %zmm2, %zmm0
|
||||||
; AVX512VL-NEXT: vporq %zmm3, %zmm0, %zmm0
|
; AVX512VL-NEXT: vporq %zmm0, %zmm3, %zmm0
|
||||||
; AVX512VL-NEXT: retq
|
; AVX512VL-NEXT: retq
|
||||||
;
|
;
|
||||||
; AVX512BW-LABEL: var_funnnel_v32i16:
|
; AVX512BW-LABEL: var_funnnel_v32i16:
|
||||||
|
@ -103,7 +103,7 @@ define <32 x i16> @var_funnnel_v32i16(<32 x i16> %x, <32 x i16> %amt) nounwind {
|
||||||
; AVX512BW-NEXT: vpsubw %zmm1, %zmm4, %zmm1
|
; AVX512BW-NEXT: vpsubw %zmm1, %zmm4, %zmm1
|
||||||
; AVX512BW-NEXT: vpandq %zmm2, %zmm1, %zmm1
|
; AVX512BW-NEXT: vpandq %zmm2, %zmm1, %zmm1
|
||||||
; AVX512BW-NEXT: vpsllvw %zmm1, %zmm0, %zmm0
|
; AVX512BW-NEXT: vpsllvw %zmm1, %zmm0, %zmm0
|
||||||
; AVX512BW-NEXT: vporq %zmm3, %zmm0, %zmm0
|
; AVX512BW-NEXT: vporq %zmm0, %zmm3, %zmm0
|
||||||
; AVX512BW-NEXT: retq
|
; AVX512BW-NEXT: retq
|
||||||
;
|
;
|
||||||
; AVX512VLBW-LABEL: var_funnnel_v32i16:
|
; AVX512VLBW-LABEL: var_funnnel_v32i16:
|
||||||
|
@ -115,7 +115,7 @@ define <32 x i16> @var_funnnel_v32i16(<32 x i16> %x, <32 x i16> %amt) nounwind {
|
||||||
; AVX512VLBW-NEXT: vpsubw %zmm1, %zmm4, %zmm1
|
; AVX512VLBW-NEXT: vpsubw %zmm1, %zmm4, %zmm1
|
||||||
; AVX512VLBW-NEXT: vpandq %zmm2, %zmm1, %zmm1
|
; AVX512VLBW-NEXT: vpandq %zmm2, %zmm1, %zmm1
|
||||||
; AVX512VLBW-NEXT: vpsllvw %zmm1, %zmm0, %zmm0
|
; AVX512VLBW-NEXT: vpsllvw %zmm1, %zmm0, %zmm0
|
||||||
; AVX512VLBW-NEXT: vporq %zmm3, %zmm0, %zmm0
|
; AVX512VLBW-NEXT: vporq %zmm0, %zmm3, %zmm0
|
||||||
; AVX512VLBW-NEXT: retq
|
; AVX512VLBW-NEXT: retq
|
||||||
%res = call <32 x i16> @llvm.fshr.v32i16(<32 x i16> %x, <32 x i16> %x, <32 x i16> %amt)
|
%res = call <32 x i16> @llvm.fshr.v32i16(<32 x i16> %x, <32 x i16> %x, <32 x i16> %amt)
|
||||||
ret <32 x i16> %res
|
ret <32 x i16> %res
|
||||||
|
@ -187,7 +187,7 @@ define <64 x i8> @var_funnnel_v64i8(<64 x i8> %x, <64 x i8> %amt) nounwind {
|
||||||
; AVX512F-NEXT: vpaddb %ymm1, %ymm1, %ymm1
|
; AVX512F-NEXT: vpaddb %ymm1, %ymm1, %ymm1
|
||||||
; AVX512F-NEXT: vpblendvb %ymm1, %ymm4, %ymm0, %ymm0
|
; AVX512F-NEXT: vpblendvb %ymm1, %ymm4, %ymm0, %ymm0
|
||||||
; AVX512F-NEXT: vinserti64x4 $1, %ymm2, %zmm0, %zmm0
|
; AVX512F-NEXT: vinserti64x4 $1, %ymm2, %zmm0, %zmm0
|
||||||
; AVX512F-NEXT: vporq %zmm0, %zmm3, %zmm0
|
; AVX512F-NEXT: vporq %zmm3, %zmm0, %zmm0
|
||||||
; AVX512F-NEXT: retq
|
; AVX512F-NEXT: retq
|
||||||
;
|
;
|
||||||
; AVX512VL-LABEL: var_funnnel_v64i8:
|
; AVX512VL-LABEL: var_funnnel_v64i8:
|
||||||
|
@ -255,7 +255,7 @@ define <64 x i8> @var_funnnel_v64i8(<64 x i8> %x, <64 x i8> %amt) nounwind {
|
||||||
; AVX512VL-NEXT: vpaddb %ymm1, %ymm1, %ymm1
|
; AVX512VL-NEXT: vpaddb %ymm1, %ymm1, %ymm1
|
||||||
; AVX512VL-NEXT: vpblendvb %ymm1, %ymm4, %ymm0, %ymm0
|
; AVX512VL-NEXT: vpblendvb %ymm1, %ymm4, %ymm0, %ymm0
|
||||||
; AVX512VL-NEXT: vinserti64x4 $1, %ymm2, %zmm0, %zmm0
|
; AVX512VL-NEXT: vinserti64x4 $1, %ymm2, %zmm0, %zmm0
|
||||||
; AVX512VL-NEXT: vporq %zmm0, %zmm3, %zmm0
|
; AVX512VL-NEXT: vporq %zmm3, %zmm0, %zmm0
|
||||||
; AVX512VL-NEXT: retq
|
; AVX512VL-NEXT: retq
|
||||||
;
|
;
|
||||||
; AVX512BW-LABEL: var_funnnel_v64i8:
|
; AVX512BW-LABEL: var_funnnel_v64i8:
|
||||||
|
@ -293,7 +293,7 @@ define <64 x i8> @var_funnnel_v64i8(<64 x i8> %x, <64 x i8> %amt) nounwind {
|
||||||
; AVX512BW-NEXT: vpaddb %zmm2, %zmm2, %zmm1
|
; AVX512BW-NEXT: vpaddb %zmm2, %zmm2, %zmm1
|
||||||
; AVX512BW-NEXT: vpmovb2m %zmm1, %k1
|
; AVX512BW-NEXT: vpmovb2m %zmm1, %k1
|
||||||
; AVX512BW-NEXT: vpaddb %zmm0, %zmm0, %zmm0 {%k1}
|
; AVX512BW-NEXT: vpaddb %zmm0, %zmm0, %zmm0 {%k1}
|
||||||
; AVX512BW-NEXT: vporq %zmm3, %zmm0, %zmm0
|
; AVX512BW-NEXT: vporq %zmm0, %zmm3, %zmm0
|
||||||
; AVX512BW-NEXT: retq
|
; AVX512BW-NEXT: retq
|
||||||
;
|
;
|
||||||
; AVX512VLBW-LABEL: var_funnnel_v64i8:
|
; AVX512VLBW-LABEL: var_funnnel_v64i8:
|
||||||
|
@ -331,7 +331,7 @@ define <64 x i8> @var_funnnel_v64i8(<64 x i8> %x, <64 x i8> %amt) nounwind {
|
||||||
; AVX512VLBW-NEXT: vpaddb %zmm2, %zmm2, %zmm1
|
; AVX512VLBW-NEXT: vpaddb %zmm2, %zmm2, %zmm1
|
||||||
; AVX512VLBW-NEXT: vpmovb2m %zmm1, %k1
|
; AVX512VLBW-NEXT: vpmovb2m %zmm1, %k1
|
||||||
; AVX512VLBW-NEXT: vpaddb %zmm0, %zmm0, %zmm0 {%k1}
|
; AVX512VLBW-NEXT: vpaddb %zmm0, %zmm0, %zmm0 {%k1}
|
||||||
; AVX512VLBW-NEXT: vporq %zmm3, %zmm0, %zmm0
|
; AVX512VLBW-NEXT: vporq %zmm0, %zmm3, %zmm0
|
||||||
; AVX512VLBW-NEXT: retq
|
; AVX512VLBW-NEXT: retq
|
||||||
%res = call <64 x i8> @llvm.fshr.v64i8(<64 x i8> %x, <64 x i8> %x, <64 x i8> %amt)
|
%res = call <64 x i8> @llvm.fshr.v64i8(<64 x i8> %x, <64 x i8> %x, <64 x i8> %amt)
|
||||||
ret <64 x i8> %res
|
ret <64 x i8> %res
|
||||||
|
@ -385,7 +385,7 @@ define <32 x i16> @splatvar_funnnel_v32i16(<32 x i16> %x, <32 x i16> %amt) nounw
|
||||||
; AVX512F-NEXT: vpslld %xmm1, %zmm2, %zmm1
|
; AVX512F-NEXT: vpslld %xmm1, %zmm2, %zmm1
|
||||||
; AVX512F-NEXT: vpmovdw %zmm1, %ymm1
|
; AVX512F-NEXT: vpmovdw %zmm1, %ymm1
|
||||||
; AVX512F-NEXT: vinserti64x4 $1, %ymm1, %zmm0, %zmm0
|
; AVX512F-NEXT: vinserti64x4 $1, %ymm1, %zmm0, %zmm0
|
||||||
; AVX512F-NEXT: vporq %zmm3, %zmm0, %zmm0
|
; AVX512F-NEXT: vporq %zmm0, %zmm3, %zmm0
|
||||||
; AVX512F-NEXT: retq
|
; AVX512F-NEXT: retq
|
||||||
;
|
;
|
||||||
; AVX512VL-LABEL: splatvar_funnnel_v32i16:
|
; AVX512VL-LABEL: splatvar_funnnel_v32i16:
|
||||||
|
@ -409,7 +409,7 @@ define <32 x i16> @splatvar_funnnel_v32i16(<32 x i16> %x, <32 x i16> %amt) nounw
|
||||||
; AVX512VL-NEXT: vpslld %xmm1, %zmm2, %zmm1
|
; AVX512VL-NEXT: vpslld %xmm1, %zmm2, %zmm1
|
||||||
; AVX512VL-NEXT: vpmovdw %zmm1, %ymm1
|
; AVX512VL-NEXT: vpmovdw %zmm1, %ymm1
|
||||||
; AVX512VL-NEXT: vinserti64x4 $1, %ymm1, %zmm0, %zmm0
|
; AVX512VL-NEXT: vinserti64x4 $1, %ymm1, %zmm0, %zmm0
|
||||||
; AVX512VL-NEXT: vporq %zmm3, %zmm0, %zmm0
|
; AVX512VL-NEXT: vporq %zmm0, %zmm3, %zmm0
|
||||||
; AVX512VL-NEXT: retq
|
; AVX512VL-NEXT: retq
|
||||||
;
|
;
|
||||||
; AVX512BW-LABEL: splatvar_funnnel_v32i16:
|
; AVX512BW-LABEL: splatvar_funnnel_v32i16:
|
||||||
|
@ -424,7 +424,7 @@ define <32 x i16> @splatvar_funnnel_v32i16(<32 x i16> %x, <32 x i16> %amt) nounw
|
||||||
; AVX512BW-NEXT: vpand %xmm2, %xmm1, %xmm1
|
; AVX512BW-NEXT: vpand %xmm2, %xmm1, %xmm1
|
||||||
; AVX512BW-NEXT: vpmovzxwq {{.*#+}} xmm1 = xmm1[0],zero,zero,zero,xmm1[1],zero,zero,zero
|
; AVX512BW-NEXT: vpmovzxwq {{.*#+}} xmm1 = xmm1[0],zero,zero,zero,xmm1[1],zero,zero,zero
|
||||||
; AVX512BW-NEXT: vpsllw %xmm1, %zmm0, %zmm0
|
; AVX512BW-NEXT: vpsllw %xmm1, %zmm0, %zmm0
|
||||||
; AVX512BW-NEXT: vporq %zmm3, %zmm0, %zmm0
|
; AVX512BW-NEXT: vporq %zmm0, %zmm3, %zmm0
|
||||||
; AVX512BW-NEXT: retq
|
; AVX512BW-NEXT: retq
|
||||||
;
|
;
|
||||||
; AVX512VLBW-LABEL: splatvar_funnnel_v32i16:
|
; AVX512VLBW-LABEL: splatvar_funnnel_v32i16:
|
||||||
|
@ -439,7 +439,7 @@ define <32 x i16> @splatvar_funnnel_v32i16(<32 x i16> %x, <32 x i16> %amt) nounw
|
||||||
; AVX512VLBW-NEXT: vpand %xmm2, %xmm1, %xmm1
|
; AVX512VLBW-NEXT: vpand %xmm2, %xmm1, %xmm1
|
||||||
; AVX512VLBW-NEXT: vpmovzxwq {{.*#+}} xmm1 = xmm1[0],zero,zero,zero,xmm1[1],zero,zero,zero
|
; AVX512VLBW-NEXT: vpmovzxwq {{.*#+}} xmm1 = xmm1[0],zero,zero,zero,xmm1[1],zero,zero,zero
|
||||||
; AVX512VLBW-NEXT: vpsllw %xmm1, %zmm0, %zmm0
|
; AVX512VLBW-NEXT: vpsllw %xmm1, %zmm0, %zmm0
|
||||||
; AVX512VLBW-NEXT: vporq %zmm3, %zmm0, %zmm0
|
; AVX512VLBW-NEXT: vporq %zmm0, %zmm3, %zmm0
|
||||||
; AVX512VLBW-NEXT: retq
|
; AVX512VLBW-NEXT: retq
|
||||||
%splat = shufflevector <32 x i16> %amt, <32 x i16> undef, <32 x i32> zeroinitializer
|
%splat = shufflevector <32 x i16> %amt, <32 x i16> undef, <32 x i32> zeroinitializer
|
||||||
%res = call <32 x i16> @llvm.fshr.v32i16(<32 x i16> %x, <32 x i16> %x, <32 x i16> %splat)
|
%res = call <32 x i16> @llvm.fshr.v32i16(<32 x i16> %x, <32 x i16> %x, <32 x i16> %splat)
|
||||||
|
@ -536,45 +536,45 @@ define <64 x i8> @splatvar_funnnel_v64i8(<64 x i8> %x, <64 x i8> %amt) nounwind
|
||||||
; AVX512BW-LABEL: splatvar_funnnel_v64i8:
|
; AVX512BW-LABEL: splatvar_funnnel_v64i8:
|
||||||
; AVX512BW: # %bb.0:
|
; AVX512BW: # %bb.0:
|
||||||
; AVX512BW-NEXT: vpbroadcastb %xmm1, %xmm1
|
; AVX512BW-NEXT: vpbroadcastb %xmm1, %xmm1
|
||||||
; AVX512BW-NEXT: vpxor %xmm2, %xmm2, %xmm2
|
; AVX512BW-NEXT: vmovdqa {{.*#+}} xmm2 = [7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7]
|
||||||
; AVX512BW-NEXT: vpsubb %xmm1, %xmm2, %xmm2
|
; AVX512BW-NEXT: vpand %xmm2, %xmm1, %xmm3
|
||||||
; AVX512BW-NEXT: vmovdqa {{.*#+}} xmm3 = [7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7]
|
; AVX512BW-NEXT: vpmovzxbq {{.*#+}} xmm3 = xmm3[0],zero,zero,zero,zero,zero,zero,zero,xmm3[1],zero,zero,zero,zero,zero,zero,zero
|
||||||
; AVX512BW-NEXT: vpand %xmm3, %xmm2, %xmm2
|
; AVX512BW-NEXT: vpsrlw %xmm3, %zmm0, %zmm4
|
||||||
; AVX512BW-NEXT: vpmovzxbq {{.*#+}} xmm2 = xmm2[0],zero,zero,zero,zero,zero,zero,zero,xmm2[1],zero,zero,zero,zero,zero,zero,zero
|
|
||||||
; AVX512BW-NEXT: vpsllw %xmm2, %zmm0, %zmm4
|
|
||||||
; AVX512BW-NEXT: vpcmpeqd %xmm5, %xmm5, %xmm5
|
; AVX512BW-NEXT: vpcmpeqd %xmm5, %xmm5, %xmm5
|
||||||
; AVX512BW-NEXT: vpsllw %xmm2, %xmm5, %xmm2
|
; AVX512BW-NEXT: vpsrlw %xmm3, %xmm5, %xmm3
|
||||||
; AVX512BW-NEXT: vpbroadcastb %xmm2, %zmm2
|
; AVX512BW-NEXT: vpsrlw $8, %xmm3, %xmm3
|
||||||
; AVX512BW-NEXT: vpandq %zmm2, %zmm4, %zmm2
|
; AVX512BW-NEXT: vpbroadcastb %xmm3, %zmm3
|
||||||
; AVX512BW-NEXT: vpand %xmm3, %xmm1, %xmm1
|
; AVX512BW-NEXT: vpandq %zmm3, %zmm4, %zmm3
|
||||||
|
; AVX512BW-NEXT: vpxor %xmm4, %xmm4, %xmm4
|
||||||
|
; AVX512BW-NEXT: vpsubb %xmm1, %xmm4, %xmm1
|
||||||
|
; AVX512BW-NEXT: vpand %xmm2, %xmm1, %xmm1
|
||||||
; AVX512BW-NEXT: vpmovzxbq {{.*#+}} xmm1 = xmm1[0],zero,zero,zero,zero,zero,zero,zero,xmm1[1],zero,zero,zero,zero,zero,zero,zero
|
; AVX512BW-NEXT: vpmovzxbq {{.*#+}} xmm1 = xmm1[0],zero,zero,zero,zero,zero,zero,zero,xmm1[1],zero,zero,zero,zero,zero,zero,zero
|
||||||
; AVX512BW-NEXT: vpsrlw %xmm1, %zmm0, %zmm3
|
; AVX512BW-NEXT: vpsllw %xmm1, %zmm0, %zmm2
|
||||||
; AVX512BW-NEXT: vpsrlw %xmm1, %xmm5, %xmm0
|
; AVX512BW-NEXT: vpsllw %xmm1, %xmm5, %xmm0
|
||||||
; AVX512BW-NEXT: vpsrlw $8, %xmm0, %xmm0
|
|
||||||
; AVX512BW-NEXT: vpbroadcastb %xmm0, %zmm0
|
; AVX512BW-NEXT: vpbroadcastb %xmm0, %zmm0
|
||||||
; AVX512BW-NEXT: vpternlogq $236, %zmm3, %zmm2, %zmm0
|
; AVX512BW-NEXT: vpternlogq $236, %zmm2, %zmm3, %zmm0
|
||||||
; AVX512BW-NEXT: retq
|
; AVX512BW-NEXT: retq
|
||||||
;
|
;
|
||||||
; AVX512VLBW-LABEL: splatvar_funnnel_v64i8:
|
; AVX512VLBW-LABEL: splatvar_funnnel_v64i8:
|
||||||
; AVX512VLBW: # %bb.0:
|
; AVX512VLBW: # %bb.0:
|
||||||
; AVX512VLBW-NEXT: vpbroadcastb %xmm1, %xmm1
|
; AVX512VLBW-NEXT: vpbroadcastb %xmm1, %xmm1
|
||||||
; AVX512VLBW-NEXT: vpxor %xmm2, %xmm2, %xmm2
|
; AVX512VLBW-NEXT: vmovdqa {{.*#+}} xmm2 = [7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7]
|
||||||
; AVX512VLBW-NEXT: vpsubb %xmm1, %xmm2, %xmm2
|
; AVX512VLBW-NEXT: vpand %xmm2, %xmm1, %xmm3
|
||||||
; AVX512VLBW-NEXT: vmovdqa {{.*#+}} xmm3 = [7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7]
|
; AVX512VLBW-NEXT: vpmovzxbq {{.*#+}} xmm3 = xmm3[0],zero,zero,zero,zero,zero,zero,zero,xmm3[1],zero,zero,zero,zero,zero,zero,zero
|
||||||
; AVX512VLBW-NEXT: vpand %xmm3, %xmm2, %xmm2
|
; AVX512VLBW-NEXT: vpsrlw %xmm3, %zmm0, %zmm4
|
||||||
; AVX512VLBW-NEXT: vpmovzxbq {{.*#+}} xmm2 = xmm2[0],zero,zero,zero,zero,zero,zero,zero,xmm2[1],zero,zero,zero,zero,zero,zero,zero
|
|
||||||
; AVX512VLBW-NEXT: vpsllw %xmm2, %zmm0, %zmm4
|
|
||||||
; AVX512VLBW-NEXT: vpcmpeqd %xmm5, %xmm5, %xmm5
|
; AVX512VLBW-NEXT: vpcmpeqd %xmm5, %xmm5, %xmm5
|
||||||
; AVX512VLBW-NEXT: vpsllw %xmm2, %xmm5, %xmm2
|
; AVX512VLBW-NEXT: vpsrlw %xmm3, %xmm5, %xmm3
|
||||||
; AVX512VLBW-NEXT: vpbroadcastb %xmm2, %zmm2
|
; AVX512VLBW-NEXT: vpsrlw $8, %xmm3, %xmm3
|
||||||
; AVX512VLBW-NEXT: vpandq %zmm2, %zmm4, %zmm2
|
; AVX512VLBW-NEXT: vpbroadcastb %xmm3, %zmm3
|
||||||
; AVX512VLBW-NEXT: vpand %xmm3, %xmm1, %xmm1
|
; AVX512VLBW-NEXT: vpandq %zmm3, %zmm4, %zmm3
|
||||||
|
; AVX512VLBW-NEXT: vpxor %xmm4, %xmm4, %xmm4
|
||||||
|
; AVX512VLBW-NEXT: vpsubb %xmm1, %xmm4, %xmm1
|
||||||
|
; AVX512VLBW-NEXT: vpand %xmm2, %xmm1, %xmm1
|
||||||
; AVX512VLBW-NEXT: vpmovzxbq {{.*#+}} xmm1 = xmm1[0],zero,zero,zero,zero,zero,zero,zero,xmm1[1],zero,zero,zero,zero,zero,zero,zero
|
; AVX512VLBW-NEXT: vpmovzxbq {{.*#+}} xmm1 = xmm1[0],zero,zero,zero,zero,zero,zero,zero,xmm1[1],zero,zero,zero,zero,zero,zero,zero
|
||||||
; AVX512VLBW-NEXT: vpsrlw %xmm1, %zmm0, %zmm3
|
; AVX512VLBW-NEXT: vpsllw %xmm1, %zmm0, %zmm2
|
||||||
; AVX512VLBW-NEXT: vpsrlw %xmm1, %xmm5, %xmm0
|
; AVX512VLBW-NEXT: vpsllw %xmm1, %xmm5, %xmm0
|
||||||
; AVX512VLBW-NEXT: vpsrlw $8, %xmm0, %xmm0
|
|
||||||
; AVX512VLBW-NEXT: vpbroadcastb %xmm0, %zmm0
|
; AVX512VLBW-NEXT: vpbroadcastb %xmm0, %zmm0
|
||||||
; AVX512VLBW-NEXT: vpternlogq $236, %zmm3, %zmm2, %zmm0
|
; AVX512VLBW-NEXT: vpternlogq $236, %zmm2, %zmm3, %zmm0
|
||||||
; AVX512VLBW-NEXT: retq
|
; AVX512VLBW-NEXT: retq
|
||||||
%splat = shufflevector <64 x i8> %amt, <64 x i8> undef, <64 x i32> zeroinitializer
|
%splat = shufflevector <64 x i8> %amt, <64 x i8> undef, <64 x i32> zeroinitializer
|
||||||
%res = call <64 x i8> @llvm.fshr.v64i8(<64 x i8> %x, <64 x i8> %x, <64 x i8> %splat)
|
%res = call <64 x i8> @llvm.fshr.v64i8(<64 x i8> %x, <64 x i8> %x, <64 x i8> %splat)
|
||||||
|
@ -619,7 +619,7 @@ define <32 x i16> @constant_funnnel_v32i16(<32 x i16> %x) nounwind {
|
||||||
; AVX512F-NEXT: vpmullw %ymm3, %ymm1, %ymm1
|
; AVX512F-NEXT: vpmullw %ymm3, %ymm1, %ymm1
|
||||||
; AVX512F-NEXT: vpmullw %ymm3, %ymm0, %ymm0
|
; AVX512F-NEXT: vpmullw %ymm3, %ymm0, %ymm0
|
||||||
; AVX512F-NEXT: vinserti64x4 $1, %ymm1, %zmm0, %zmm0
|
; AVX512F-NEXT: vinserti64x4 $1, %ymm1, %zmm0, %zmm0
|
||||||
; AVX512F-NEXT: vporq %zmm2, %zmm0, %zmm0
|
; AVX512F-NEXT: vporq %zmm0, %zmm2, %zmm0
|
||||||
; AVX512F-NEXT: retq
|
; AVX512F-NEXT: retq
|
||||||
;
|
;
|
||||||
; AVX512VL-LABEL: constant_funnnel_v32i16:
|
; AVX512VL-LABEL: constant_funnnel_v32i16:
|
||||||
|
@ -637,20 +637,20 @@ define <32 x i16> @constant_funnnel_v32i16(<32 x i16> %x) nounwind {
|
||||||
; AVX512VL-NEXT: vpmullw %ymm3, %ymm1, %ymm1
|
; AVX512VL-NEXT: vpmullw %ymm3, %ymm1, %ymm1
|
||||||
; AVX512VL-NEXT: vpmullw %ymm3, %ymm0, %ymm0
|
; AVX512VL-NEXT: vpmullw %ymm3, %ymm0, %ymm0
|
||||||
; AVX512VL-NEXT: vinserti64x4 $1, %ymm1, %zmm0, %zmm0
|
; AVX512VL-NEXT: vinserti64x4 $1, %ymm1, %zmm0, %zmm0
|
||||||
; AVX512VL-NEXT: vporq %zmm2, %zmm0, %zmm0
|
; AVX512VL-NEXT: vporq %zmm0, %zmm2, %zmm0
|
||||||
; AVX512VL-NEXT: retq
|
; AVX512VL-NEXT: retq
|
||||||
;
|
;
|
||||||
; AVX512BW-LABEL: constant_funnnel_v32i16:
|
; AVX512BW-LABEL: constant_funnnel_v32i16:
|
||||||
; AVX512BW: # %bb.0:
|
; AVX512BW: # %bb.0:
|
||||||
; AVX512BW-NEXT: vpsrlvw {{.*}}(%rip), %zmm0, %zmm1
|
; AVX512BW-NEXT: vpsllvw {{.*}}(%rip), %zmm0, %zmm1
|
||||||
; AVX512BW-NEXT: vpsllvw {{.*}}(%rip), %zmm0, %zmm0
|
; AVX512BW-NEXT: vpsrlvw {{.*}}(%rip), %zmm0, %zmm0
|
||||||
; AVX512BW-NEXT: vporq %zmm1, %zmm0, %zmm0
|
; AVX512BW-NEXT: vporq %zmm1, %zmm0, %zmm0
|
||||||
; AVX512BW-NEXT: retq
|
; AVX512BW-NEXT: retq
|
||||||
;
|
;
|
||||||
; AVX512VLBW-LABEL: constant_funnnel_v32i16:
|
; AVX512VLBW-LABEL: constant_funnnel_v32i16:
|
||||||
; AVX512VLBW: # %bb.0:
|
; AVX512VLBW: # %bb.0:
|
||||||
; AVX512VLBW-NEXT: vpsrlvw {{.*}}(%rip), %zmm0, %zmm1
|
; AVX512VLBW-NEXT: vpsllvw {{.*}}(%rip), %zmm0, %zmm1
|
||||||
; AVX512VLBW-NEXT: vpsllvw {{.*}}(%rip), %zmm0, %zmm0
|
; AVX512VLBW-NEXT: vpsrlvw {{.*}}(%rip), %zmm0, %zmm0
|
||||||
; AVX512VLBW-NEXT: vporq %zmm1, %zmm0, %zmm0
|
; AVX512VLBW-NEXT: vporq %zmm1, %zmm0, %zmm0
|
||||||
; AVX512VLBW-NEXT: retq
|
; AVX512VLBW-NEXT: retq
|
||||||
%res = call <32 x i16> @llvm.fshr.v32i16(<32 x i16> %x, <32 x i16> %x, <32 x i16> <i16 0, i16 1, i16 2, i16 3, i16 4, i16 5, i16 6, i16 7, i16 8, i16 9, i16 10, i16 11, i16 12, i16 13, i16 14, i16 15, i16 0, i16 1, i16 2, i16 3, i16 4, i16 5, i16 6, i16 7, i16 8, i16 9, i16 10, i16 11, i16 12, i16 13, i16 14, i16 15>)
|
%res = call <32 x i16> @llvm.fshr.v32i16(<32 x i16> %x, <32 x i16> %x, <32 x i16> <i16 0, i16 1, i16 2, i16 3, i16 4, i16 5, i16 6, i16 7, i16 8, i16 9, i16 10, i16 11, i16 12, i16 13, i16 14, i16 15, i16 0, i16 1, i16 2, i16 3, i16 4, i16 5, i16 6, i16 7, i16 8, i16 9, i16 10, i16 11, i16 12, i16 13, i16 14, i16 15>)
|
||||||
|
@ -704,7 +704,7 @@ define <64 x i8> @constant_funnnel_v64i8(<64 x i8> %x) nounwind {
|
||||||
; AVX512F-NEXT: vpsrlw $8, %ymm0, %ymm0
|
; AVX512F-NEXT: vpsrlw $8, %ymm0, %ymm0
|
||||||
; AVX512F-NEXT: vpackuswb %ymm4, %ymm0, %ymm0
|
; AVX512F-NEXT: vpackuswb %ymm4, %ymm0, %ymm0
|
||||||
; AVX512F-NEXT: vinserti64x4 $1, %ymm1, %zmm0, %zmm0
|
; AVX512F-NEXT: vinserti64x4 $1, %ymm1, %zmm0, %zmm0
|
||||||
; AVX512F-NEXT: vporq %zmm0, %zmm2, %zmm0
|
; AVX512F-NEXT: vporq %zmm2, %zmm0, %zmm0
|
||||||
; AVX512F-NEXT: retq
|
; AVX512F-NEXT: retq
|
||||||
;
|
;
|
||||||
; AVX512VL-LABEL: constant_funnnel_v64i8:
|
; AVX512VL-LABEL: constant_funnnel_v64i8:
|
||||||
|
@ -753,7 +753,7 @@ define <64 x i8> @constant_funnnel_v64i8(<64 x i8> %x) nounwind {
|
||||||
; AVX512VL-NEXT: vpsrlw $8, %ymm0, %ymm0
|
; AVX512VL-NEXT: vpsrlw $8, %ymm0, %ymm0
|
||||||
; AVX512VL-NEXT: vpackuswb %ymm4, %ymm0, %ymm0
|
; AVX512VL-NEXT: vpackuswb %ymm4, %ymm0, %ymm0
|
||||||
; AVX512VL-NEXT: vinserti64x4 $1, %ymm1, %zmm0, %zmm0
|
; AVX512VL-NEXT: vinserti64x4 $1, %ymm1, %zmm0, %zmm0
|
||||||
; AVX512VL-NEXT: vporq %zmm0, %zmm2, %zmm0
|
; AVX512VL-NEXT: vporq %zmm2, %zmm0, %zmm0
|
||||||
; AVX512VL-NEXT: retq
|
; AVX512VL-NEXT: retq
|
||||||
;
|
;
|
||||||
; AVX512BW-LABEL: constant_funnnel_v64i8:
|
; AVX512BW-LABEL: constant_funnnel_v64i8:
|
||||||
|
@ -780,7 +780,7 @@ define <64 x i8> @constant_funnnel_v64i8(<64 x i8> %x) nounwind {
|
||||||
; AVX512BW-NEXT: vpsllvw {{.*}}(%rip), %zmm0, %zmm0
|
; AVX512BW-NEXT: vpsllvw {{.*}}(%rip), %zmm0, %zmm0
|
||||||
; AVX512BW-NEXT: vpsrlw $8, %zmm0, %zmm0
|
; AVX512BW-NEXT: vpsrlw $8, %zmm0, %zmm0
|
||||||
; AVX512BW-NEXT: vpackuswb %zmm3, %zmm0, %zmm0
|
; AVX512BW-NEXT: vpackuswb %zmm3, %zmm0, %zmm0
|
||||||
; AVX512BW-NEXT: vporq %zmm0, %zmm2, %zmm0
|
; AVX512BW-NEXT: vporq %zmm2, %zmm0, %zmm0
|
||||||
; AVX512BW-NEXT: retq
|
; AVX512BW-NEXT: retq
|
||||||
;
|
;
|
||||||
; AVX512VLBW-LABEL: constant_funnnel_v64i8:
|
; AVX512VLBW-LABEL: constant_funnnel_v64i8:
|
||||||
|
@ -807,7 +807,7 @@ define <64 x i8> @constant_funnnel_v64i8(<64 x i8> %x) nounwind {
|
||||||
; AVX512VLBW-NEXT: vpsllvw {{.*}}(%rip), %zmm0, %zmm0
|
; AVX512VLBW-NEXT: vpsllvw {{.*}}(%rip), %zmm0, %zmm0
|
||||||
; AVX512VLBW-NEXT: vpsrlw $8, %zmm0, %zmm0
|
; AVX512VLBW-NEXT: vpsrlw $8, %zmm0, %zmm0
|
||||||
; AVX512VLBW-NEXT: vpackuswb %zmm3, %zmm0, %zmm0
|
; AVX512VLBW-NEXT: vpackuswb %zmm3, %zmm0, %zmm0
|
||||||
; AVX512VLBW-NEXT: vporq %zmm0, %zmm2, %zmm0
|
; AVX512VLBW-NEXT: vporq %zmm2, %zmm0, %zmm0
|
||||||
; AVX512VLBW-NEXT: retq
|
; AVX512VLBW-NEXT: retq
|
||||||
%res = call <64 x i8> @llvm.fshr.v64i8(<64 x i8> %x, <64 x i8> %x, <64 x i8> <i8 0, i8 1, i8 2, i8 3, i8 4, i8 5, i8 6, i8 7, i8 8, i8 7, i8 6, i8 5, i8 4, i8 3, i8 2, i8 1, i8 0, i8 1, i8 2, i8 3, i8 4, i8 5, i8 6, i8 7, i8 8, i8 7, i8 6, i8 5, i8 4, i8 3, i8 2, i8 1, i8 0, i8 1, i8 2, i8 3, i8 4, i8 5, i8 6, i8 7, i8 8, i8 7, i8 6, i8 5, i8 4, i8 3, i8 2, i8 1, i8 0, i8 1, i8 2, i8 3, i8 4, i8 5, i8 6, i8 7, i8 8, i8 7, i8 6, i8 5, i8 4, i8 3, i8 2, i8 1>)
|
%res = call <64 x i8> @llvm.fshr.v64i8(<64 x i8> %x, <64 x i8> %x, <64 x i8> <i8 0, i8 1, i8 2, i8 3, i8 4, i8 5, i8 6, i8 7, i8 8, i8 7, i8 6, i8 5, i8 4, i8 3, i8 2, i8 1, i8 0, i8 1, i8 2, i8 3, i8 4, i8 5, i8 6, i8 7, i8 8, i8 7, i8 6, i8 5, i8 4, i8 3, i8 2, i8 1, i8 0, i8 1, i8 2, i8 3, i8 4, i8 5, i8 6, i8 7, i8 8, i8 7, i8 6, i8 5, i8 4, i8 3, i8 2, i8 1, i8 0, i8 1, i8 2, i8 3, i8 4, i8 5, i8 6, i8 7, i8 8, i8 7, i8 6, i8 5, i8 4, i8 3, i8 2, i8 1>)
|
||||||
ret <64 x i8> %res
|
ret <64 x i8> %res
|
||||||
|
@ -838,39 +838,39 @@ define <16 x i32> @splatconstant_funnnel_v16i32(<16 x i32> %x) nounwind {
|
||||||
define <32 x i16> @splatconstant_funnnel_v32i16(<32 x i16> %x) nounwind {
|
define <32 x i16> @splatconstant_funnnel_v32i16(<32 x i16> %x) nounwind {
|
||||||
; AVX512F-LABEL: splatconstant_funnnel_v32i16:
|
; AVX512F-LABEL: splatconstant_funnnel_v32i16:
|
||||||
; AVX512F: # %bb.0:
|
; AVX512F: # %bb.0:
|
||||||
; AVX512F-NEXT: vpsrlw $7, %ymm0, %ymm1
|
; AVX512F-NEXT: vpsllw $9, %ymm0, %ymm1
|
||||||
; AVX512F-NEXT: vextracti64x4 $1, %zmm0, %ymm2
|
; AVX512F-NEXT: vextracti64x4 $1, %zmm0, %ymm2
|
||||||
; AVX512F-NEXT: vpsrlw $7, %ymm2, %ymm3
|
; AVX512F-NEXT: vpsllw $9, %ymm2, %ymm3
|
||||||
; AVX512F-NEXT: vinserti64x4 $1, %ymm3, %zmm1, %zmm1
|
; AVX512F-NEXT: vinserti64x4 $1, %ymm3, %zmm1, %zmm1
|
||||||
; AVX512F-NEXT: vpsllw $9, %ymm0, %ymm0
|
; AVX512F-NEXT: vpsrlw $7, %ymm0, %ymm0
|
||||||
; AVX512F-NEXT: vpsllw $9, %ymm2, %ymm2
|
; AVX512F-NEXT: vpsrlw $7, %ymm2, %ymm2
|
||||||
; AVX512F-NEXT: vinserti64x4 $1, %ymm2, %zmm0, %zmm0
|
; AVX512F-NEXT: vinserti64x4 $1, %ymm2, %zmm0, %zmm0
|
||||||
; AVX512F-NEXT: vporq %zmm1, %zmm0, %zmm0
|
; AVX512F-NEXT: vporq %zmm1, %zmm0, %zmm0
|
||||||
; AVX512F-NEXT: retq
|
; AVX512F-NEXT: retq
|
||||||
;
|
;
|
||||||
; AVX512VL-LABEL: splatconstant_funnnel_v32i16:
|
; AVX512VL-LABEL: splatconstant_funnnel_v32i16:
|
||||||
; AVX512VL: # %bb.0:
|
; AVX512VL: # %bb.0:
|
||||||
; AVX512VL-NEXT: vpsrlw $7, %ymm0, %ymm1
|
; AVX512VL-NEXT: vpsllw $9, %ymm0, %ymm1
|
||||||
; AVX512VL-NEXT: vextracti64x4 $1, %zmm0, %ymm2
|
; AVX512VL-NEXT: vextracti64x4 $1, %zmm0, %ymm2
|
||||||
; AVX512VL-NEXT: vpsrlw $7, %ymm2, %ymm3
|
; AVX512VL-NEXT: vpsllw $9, %ymm2, %ymm3
|
||||||
; AVX512VL-NEXT: vinserti64x4 $1, %ymm3, %zmm1, %zmm1
|
; AVX512VL-NEXT: vinserti64x4 $1, %ymm3, %zmm1, %zmm1
|
||||||
; AVX512VL-NEXT: vpsllw $9, %ymm0, %ymm0
|
; AVX512VL-NEXT: vpsrlw $7, %ymm0, %ymm0
|
||||||
; AVX512VL-NEXT: vpsllw $9, %ymm2, %ymm2
|
; AVX512VL-NEXT: vpsrlw $7, %ymm2, %ymm2
|
||||||
; AVX512VL-NEXT: vinserti64x4 $1, %ymm2, %zmm0, %zmm0
|
; AVX512VL-NEXT: vinserti64x4 $1, %ymm2, %zmm0, %zmm0
|
||||||
; AVX512VL-NEXT: vporq %zmm1, %zmm0, %zmm0
|
; AVX512VL-NEXT: vporq %zmm1, %zmm0, %zmm0
|
||||||
; AVX512VL-NEXT: retq
|
; AVX512VL-NEXT: retq
|
||||||
;
|
;
|
||||||
; AVX512BW-LABEL: splatconstant_funnnel_v32i16:
|
; AVX512BW-LABEL: splatconstant_funnnel_v32i16:
|
||||||
; AVX512BW: # %bb.0:
|
; AVX512BW: # %bb.0:
|
||||||
; AVX512BW-NEXT: vpsrlw $7, %zmm0, %zmm1
|
; AVX512BW-NEXT: vpsllw $9, %zmm0, %zmm1
|
||||||
; AVX512BW-NEXT: vpsllw $9, %zmm0, %zmm0
|
; AVX512BW-NEXT: vpsrlw $7, %zmm0, %zmm0
|
||||||
; AVX512BW-NEXT: vporq %zmm1, %zmm0, %zmm0
|
; AVX512BW-NEXT: vporq %zmm1, %zmm0, %zmm0
|
||||||
; AVX512BW-NEXT: retq
|
; AVX512BW-NEXT: retq
|
||||||
;
|
;
|
||||||
; AVX512VLBW-LABEL: splatconstant_funnnel_v32i16:
|
; AVX512VLBW-LABEL: splatconstant_funnnel_v32i16:
|
||||||
; AVX512VLBW: # %bb.0:
|
; AVX512VLBW: # %bb.0:
|
||||||
; AVX512VLBW-NEXT: vpsrlw $7, %zmm0, %zmm1
|
; AVX512VLBW-NEXT: vpsllw $9, %zmm0, %zmm1
|
||||||
; AVX512VLBW-NEXT: vpsllw $9, %zmm0, %zmm0
|
; AVX512VLBW-NEXT: vpsrlw $7, %zmm0, %zmm0
|
||||||
; AVX512VLBW-NEXT: vporq %zmm1, %zmm0, %zmm0
|
; AVX512VLBW-NEXT: vporq %zmm1, %zmm0, %zmm0
|
||||||
; AVX512VLBW-NEXT: retq
|
; AVX512VLBW-NEXT: retq
|
||||||
%res = call <32 x i16> @llvm.fshr.v32i16(<32 x i16> %x, <32 x i16> %x, <32 x i16> <i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7>)
|
%res = call <32 x i16> @llvm.fshr.v32i16(<32 x i16> %x, <32 x i16> %x, <32 x i16> <i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7>)
|
||||||
|
@ -880,39 +880,39 @@ define <32 x i16> @splatconstant_funnnel_v32i16(<32 x i16> %x) nounwind {
|
||||||
define <64 x i8> @splatconstant_funnnel_v64i8(<64 x i8> %x) nounwind {
|
define <64 x i8> @splatconstant_funnnel_v64i8(<64 x i8> %x) nounwind {
|
||||||
; AVX512F-LABEL: splatconstant_funnnel_v64i8:
|
; AVX512F-LABEL: splatconstant_funnnel_v64i8:
|
||||||
; AVX512F: # %bb.0:
|
; AVX512F: # %bb.0:
|
||||||
; AVX512F-NEXT: vpsllw $4, %ymm0, %ymm1
|
; AVX512F-NEXT: vpsrlw $4, %ymm0, %ymm1
|
||||||
; AVX512F-NEXT: vextracti64x4 $1, %zmm0, %ymm2
|
; AVX512F-NEXT: vextracti64x4 $1, %zmm0, %ymm2
|
||||||
; AVX512F-NEXT: vpsllw $4, %ymm2, %ymm3
|
; AVX512F-NEXT: vpsrlw $4, %ymm2, %ymm3
|
||||||
; AVX512F-NEXT: vinserti64x4 $1, %ymm3, %zmm1, %zmm1
|
; AVX512F-NEXT: vinserti64x4 $1, %ymm3, %zmm1, %zmm1
|
||||||
; AVX512F-NEXT: vpsrlw $4, %ymm0, %ymm0
|
; AVX512F-NEXT: vpsllw $4, %ymm0, %ymm0
|
||||||
; AVX512F-NEXT: vpsrlw $4, %ymm2, %ymm2
|
; AVX512F-NEXT: vpsllw $4, %ymm2, %ymm2
|
||||||
; AVX512F-NEXT: vinserti64x4 $1, %ymm2, %zmm0, %zmm0
|
; AVX512F-NEXT: vinserti64x4 $1, %ymm2, %zmm0, %zmm0
|
||||||
; AVX512F-NEXT: vpternlogq $216, {{.*}}(%rip), %zmm1, %zmm0
|
; AVX512F-NEXT: vpternlogq $216, {{.*}}(%rip), %zmm1, %zmm0
|
||||||
; AVX512F-NEXT: retq
|
; AVX512F-NEXT: retq
|
||||||
;
|
;
|
||||||
; AVX512VL-LABEL: splatconstant_funnnel_v64i8:
|
; AVX512VL-LABEL: splatconstant_funnnel_v64i8:
|
||||||
; AVX512VL: # %bb.0:
|
; AVX512VL: # %bb.0:
|
||||||
; AVX512VL-NEXT: vpsllw $4, %ymm0, %ymm1
|
; AVX512VL-NEXT: vpsrlw $4, %ymm0, %ymm1
|
||||||
; AVX512VL-NEXT: vextracti64x4 $1, %zmm0, %ymm2
|
; AVX512VL-NEXT: vextracti64x4 $1, %zmm0, %ymm2
|
||||||
; AVX512VL-NEXT: vpsllw $4, %ymm2, %ymm3
|
; AVX512VL-NEXT: vpsrlw $4, %ymm2, %ymm3
|
||||||
; AVX512VL-NEXT: vinserti64x4 $1, %ymm3, %zmm1, %zmm1
|
; AVX512VL-NEXT: vinserti64x4 $1, %ymm3, %zmm1, %zmm1
|
||||||
; AVX512VL-NEXT: vpsrlw $4, %ymm0, %ymm0
|
; AVX512VL-NEXT: vpsllw $4, %ymm0, %ymm0
|
||||||
; AVX512VL-NEXT: vpsrlw $4, %ymm2, %ymm2
|
; AVX512VL-NEXT: vpsllw $4, %ymm2, %ymm2
|
||||||
; AVX512VL-NEXT: vinserti64x4 $1, %ymm2, %zmm0, %zmm0
|
; AVX512VL-NEXT: vinserti64x4 $1, %ymm2, %zmm0, %zmm0
|
||||||
; AVX512VL-NEXT: vpternlogq $216, {{.*}}(%rip), %zmm1, %zmm0
|
; AVX512VL-NEXT: vpternlogq $216, {{.*}}(%rip), %zmm1, %zmm0
|
||||||
; AVX512VL-NEXT: retq
|
; AVX512VL-NEXT: retq
|
||||||
;
|
;
|
||||||
; AVX512BW-LABEL: splatconstant_funnnel_v64i8:
|
; AVX512BW-LABEL: splatconstant_funnnel_v64i8:
|
||||||
; AVX512BW: # %bb.0:
|
; AVX512BW: # %bb.0:
|
||||||
; AVX512BW-NEXT: vpsllw $4, %zmm0, %zmm1
|
; AVX512BW-NEXT: vpsrlw $4, %zmm0, %zmm1
|
||||||
; AVX512BW-NEXT: vpsrlw $4, %zmm0, %zmm0
|
; AVX512BW-NEXT: vpsllw $4, %zmm0, %zmm0
|
||||||
; AVX512BW-NEXT: vpternlogq $216, {{.*}}(%rip), %zmm1, %zmm0
|
; AVX512BW-NEXT: vpternlogq $216, {{.*}}(%rip), %zmm1, %zmm0
|
||||||
; AVX512BW-NEXT: retq
|
; AVX512BW-NEXT: retq
|
||||||
;
|
;
|
||||||
; AVX512VLBW-LABEL: splatconstant_funnnel_v64i8:
|
; AVX512VLBW-LABEL: splatconstant_funnnel_v64i8:
|
||||||
; AVX512VLBW: # %bb.0:
|
; AVX512VLBW: # %bb.0:
|
||||||
; AVX512VLBW-NEXT: vpsllw $4, %zmm0, %zmm1
|
; AVX512VLBW-NEXT: vpsrlw $4, %zmm0, %zmm1
|
||||||
; AVX512VLBW-NEXT: vpsrlw $4, %zmm0, %zmm0
|
; AVX512VLBW-NEXT: vpsllw $4, %zmm0, %zmm0
|
||||||
; AVX512VLBW-NEXT: vpternlogq $216, {{.*}}(%rip), %zmm1, %zmm0
|
; AVX512VLBW-NEXT: vpternlogq $216, {{.*}}(%rip), %zmm1, %zmm0
|
||||||
; AVX512VLBW-NEXT: retq
|
; AVX512VLBW-NEXT: retq
|
||||||
%res = call <64 x i8> @llvm.fshr.v64i8(<64 x i8> %x, <64 x i8> %x, <64 x i8> <i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4>)
|
%res = call <64 x i8> @llvm.fshr.v64i8(<64 x i8> %x, <64 x i8> %x, <64 x i8> <i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4>)
|
||||||
|
|
Loading…
Reference in New Issue