forked from OSchip/llvm-project
[SelectionDAG] Add initial implementation of TargetLowering::SimplifyDemandedVectorElts
This is mainly a move of simplifyShuffleOperands from DAGCombiner::visitVECTOR_SHUFFLE to create a more general purpose TargetLowering::SimplifyDemandedVectorElts implementation. Further features can be moved/added in future patches. Differential Revision: https://reviews.llvm.org/D42896 llvm-svn: 325232
This commit is contained in:
parent
9430c8cd1c
commit
80663ee986
|
@ -2707,6 +2707,30 @@ public:
|
|||
bool SimplifyDemandedBits(SDValue Op, const APInt &DemandedMask,
|
||||
DAGCombinerInfo &DCI) const;
|
||||
|
||||
/// Look at Vector Op. At this point, we know that only the DemandedElts
|
||||
/// elements of the result of Op are ever used downstream. If we can use
|
||||
/// this information to simplify Op, create a new simplified DAG node and
|
||||
/// return true, storing the original and new nodes in TLO.
|
||||
/// Otherwise, analyze the expression and return a mask of KnownUndef and
|
||||
/// KnownZero elements for the expression (used to simplify the caller).
|
||||
/// The KnownUndef/Zero elements may only be accurate for those bits
|
||||
/// in the DemandedMask.
|
||||
/// \p AssumeSingleUse When this parameter is true, this function will
|
||||
/// attempt to simplify \p Op even if there are multiple uses.
|
||||
/// Callers are responsible for correctly updating the DAG based on the
|
||||
/// results of this function, because simply replacing replacing TLO.Old
|
||||
/// with TLO.New will be incorrect when this parameter is true and TLO.Old
|
||||
/// has multiple uses.
|
||||
bool SimplifyDemandedVectorElts(SDValue Op, const APInt &DemandedElts,
|
||||
APInt &KnownUndef, APInt &KnownZero,
|
||||
TargetLoweringOpt &TLO, unsigned Depth = 0,
|
||||
bool AssumeSingleUse = false) const;
|
||||
|
||||
/// Helper wrapper around SimplifyDemandedVectorElts
|
||||
bool SimplifyDemandedVectorElts(SDValue Op, const APInt &DemandedElts,
|
||||
APInt &KnownUndef, APInt &KnownZero,
|
||||
DAGCombinerInfo &DCI) const;
|
||||
|
||||
/// Determine which of the bits specified in Mask are known to be either zero
|
||||
/// or one and return them in the KnownZero/KnownOne bitsets. The DemandedElts
|
||||
/// argument allows us to only collect the known bits that are shared by the
|
||||
|
@ -2735,6 +2759,15 @@ public:
|
|||
const SelectionDAG &DAG,
|
||||
unsigned Depth = 0) const;
|
||||
|
||||
/// Attempt to simplify any target nodes based on the demanded vector
|
||||
/// elements, returning true on success. Otherwise, analyze the expression and
|
||||
/// return a mask of KnownUndef and KnownZero elements for the expression
|
||||
/// (used to simplify the caller). The KnownUndef/Zero elements may only be
|
||||
/// accurate for those bits in the DemandedMask
|
||||
virtual bool SimplifyDemandedVectorEltsForTargetNode(
|
||||
SDValue Op, const APInt &DemandedElts, APInt &KnownUndef,
|
||||
APInt &KnownZero, TargetLoweringOpt &TLO, unsigned Depth = 0) const;
|
||||
|
||||
struct DAGCombinerInfo {
|
||||
void *DC; // The DAG Combiner object.
|
||||
CombineLevel Level;
|
||||
|
|
|
@ -232,7 +232,17 @@ namespace {
|
|||
return SimplifyDemandedBits(Op, Demanded);
|
||||
}
|
||||
|
||||
/// Check the specified vector node value to see if it can be simplified or
|
||||
/// if things it uses can be simplified as it only uses some of the
|
||||
/// elements. If so, return true.
|
||||
bool SimplifyDemandedVectorElts(SDValue Op) {
|
||||
unsigned NumElts = Op.getValueType().getVectorNumElements();
|
||||
APInt Demanded = APInt::getAllOnesValue(NumElts);
|
||||
return SimplifyDemandedVectorElts(Op, Demanded);
|
||||
}
|
||||
|
||||
bool SimplifyDemandedBits(SDValue Op, const APInt &Demanded);
|
||||
bool SimplifyDemandedVectorElts(SDValue Op, const APInt &Demanded);
|
||||
|
||||
bool CombineToPreIndexedLoadStore(SDNode *N);
|
||||
bool CombineToPostIndexedLoadStore(SDNode *N);
|
||||
|
@ -1085,6 +1095,28 @@ bool DAGCombiner::SimplifyDemandedBits(SDValue Op, const APInt &Demanded) {
|
|||
return true;
|
||||
}
|
||||
|
||||
/// Check the specified vector node value to see if it can be simplified or
|
||||
/// if things it uses can be simplified as it only uses some of the elements.
|
||||
/// If so, return true.
|
||||
bool DAGCombiner::SimplifyDemandedVectorElts(SDValue Op,
|
||||
const APInt &Demanded) {
|
||||
TargetLowering::TargetLoweringOpt TLO(DAG, LegalTypes, LegalOperations);
|
||||
APInt KnownUndef, KnownZero;
|
||||
if (!TLI.SimplifyDemandedVectorElts(Op, Demanded, KnownUndef, KnownZero, TLO))
|
||||
return false;
|
||||
|
||||
// Revisit the node.
|
||||
AddToWorklist(Op.getNode());
|
||||
|
||||
// Replace the old value with the new one.
|
||||
++NodesCombined;
|
||||
DEBUG(dbgs() << "\nReplacing.2 "; TLO.Old.getNode()->dump(&DAG);
|
||||
dbgs() << "\nWith: "; TLO.New.getNode()->dump(&DAG); dbgs() << '\n');
|
||||
|
||||
CommitTargetLoweringOpt(TLO);
|
||||
return true;
|
||||
}
|
||||
|
||||
void DAGCombiner::ReplaceLoadWithPromotedLoad(SDNode *Load, SDNode *ExtLoad) {
|
||||
SDLoc DL(Load);
|
||||
EVT VT = Load->getValueType(0);
|
||||
|
@ -15558,92 +15590,6 @@ SDValue DAGCombiner::visitEXTRACT_SUBVECTOR(SDNode* N) {
|
|||
return SDValue();
|
||||
}
|
||||
|
||||
static SDValue simplifyShuffleOperandRecursively(SmallBitVector &UsedElements,
|
||||
SDValue V, SelectionDAG &DAG) {
|
||||
SDLoc DL(V);
|
||||
EVT VT = V.getValueType();
|
||||
|
||||
switch (V.getOpcode()) {
|
||||
default:
|
||||
return V;
|
||||
|
||||
case ISD::CONCAT_VECTORS: {
|
||||
EVT OpVT = V->getOperand(0).getValueType();
|
||||
int OpSize = OpVT.getVectorNumElements();
|
||||
SmallBitVector OpUsedElements(OpSize, false);
|
||||
bool FoundSimplification = false;
|
||||
SmallVector<SDValue, 4> NewOps;
|
||||
NewOps.reserve(V->getNumOperands());
|
||||
for (int i = 0, NumOps = V->getNumOperands(); i < NumOps; ++i) {
|
||||
SDValue Op = V->getOperand(i);
|
||||
bool OpUsed = false;
|
||||
for (int j = 0; j < OpSize; ++j)
|
||||
if (UsedElements[i * OpSize + j]) {
|
||||
OpUsedElements[j] = true;
|
||||
OpUsed = true;
|
||||
}
|
||||
NewOps.push_back(
|
||||
OpUsed ? simplifyShuffleOperandRecursively(OpUsedElements, Op, DAG)
|
||||
: DAG.getUNDEF(OpVT));
|
||||
FoundSimplification |= Op == NewOps.back();
|
||||
OpUsedElements.reset();
|
||||
}
|
||||
if (FoundSimplification)
|
||||
V = DAG.getNode(ISD::CONCAT_VECTORS, DL, VT, NewOps);
|
||||
return V;
|
||||
}
|
||||
|
||||
case ISD::INSERT_SUBVECTOR: {
|
||||
SDValue BaseV = V->getOperand(0);
|
||||
SDValue SubV = V->getOperand(1);
|
||||
auto *IdxN = dyn_cast<ConstantSDNode>(V->getOperand(2));
|
||||
if (!IdxN)
|
||||
return V;
|
||||
|
||||
int SubSize = SubV.getValueType().getVectorNumElements();
|
||||
int Idx = IdxN->getZExtValue();
|
||||
bool SubVectorUsed = false;
|
||||
SmallBitVector SubUsedElements(SubSize, false);
|
||||
for (int i = 0; i < SubSize; ++i)
|
||||
if (UsedElements[i + Idx]) {
|
||||
SubVectorUsed = true;
|
||||
SubUsedElements[i] = true;
|
||||
UsedElements[i + Idx] = false;
|
||||
}
|
||||
|
||||
// Now recurse on both the base and sub vectors.
|
||||
SDValue SimplifiedSubV =
|
||||
SubVectorUsed
|
||||
? simplifyShuffleOperandRecursively(SubUsedElements, SubV, DAG)
|
||||
: DAG.getUNDEF(SubV.getValueType());
|
||||
SDValue SimplifiedBaseV = simplifyShuffleOperandRecursively(UsedElements, BaseV, DAG);
|
||||
if (SimplifiedSubV != SubV || SimplifiedBaseV != BaseV)
|
||||
V = DAG.getNode(ISD::INSERT_SUBVECTOR, DL, VT,
|
||||
SimplifiedBaseV, SimplifiedSubV, V->getOperand(2));
|
||||
return V;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
static SDValue simplifyShuffleOperands(ShuffleVectorSDNode *SVN, SDValue N0,
|
||||
SDValue N1, SelectionDAG &DAG) {
|
||||
EVT VT = SVN->getValueType(0);
|
||||
int NumElts = VT.getVectorNumElements();
|
||||
SmallBitVector N0UsedElements(NumElts, false), N1UsedElements(NumElts, false);
|
||||
for (int M : SVN->getMask())
|
||||
if (M >= 0 && M < NumElts)
|
||||
N0UsedElements[M] = true;
|
||||
else if (M >= NumElts)
|
||||
N1UsedElements[M - NumElts] = true;
|
||||
|
||||
SDValue S0 = simplifyShuffleOperandRecursively(N0UsedElements, N0, DAG);
|
||||
SDValue S1 = simplifyShuffleOperandRecursively(N1UsedElements, N1, DAG);
|
||||
if (S0 == N0 && S1 == N1)
|
||||
return SDValue();
|
||||
|
||||
return DAG.getVectorShuffle(VT, SDLoc(SVN), S0, S1, SVN->getMask());
|
||||
}
|
||||
|
||||
static SDValue simplifyShuffleMask(ShuffleVectorSDNode *SVN, SDValue N0,
|
||||
SDValue N1, SelectionDAG &DAG) {
|
||||
auto isUndefElt = [](SDValue V, int Idx) {
|
||||
|
@ -16181,11 +16127,9 @@ SDValue DAGCombiner::visitVECTOR_SHUFFLE(SDNode *N) {
|
|||
}
|
||||
}
|
||||
|
||||
// There are various patterns used to build up a vector from smaller vectors,
|
||||
// subvectors, or elements. Scan chains of these and replace unused insertions
|
||||
// or components with undef.
|
||||
if (SDValue S = simplifyShuffleOperands(SVN, N0, N1, DAG))
|
||||
return S;
|
||||
// Simplify source operands based on shuffle mask.
|
||||
if (SimplifyDemandedVectorElts(SDValue(N, 0)))
|
||||
return SDValue(N, 0);
|
||||
|
||||
// Match shuffles that can be converted to any_vector_extend_in_reg.
|
||||
if (SDValue V = combineShuffleToVectorExtend(SVN, DAG, TLI, LegalOperations, LegalTypes))
|
||||
|
|
|
@ -1279,6 +1279,197 @@ bool TargetLowering::SimplifyDemandedBits(SDValue Op,
|
|||
return false;
|
||||
}
|
||||
|
||||
bool TargetLowering::SimplifyDemandedVectorElts(SDValue Op,
|
||||
const APInt &DemandedElts,
|
||||
APInt &KnownUndef,
|
||||
APInt &KnownZero,
|
||||
DAGCombinerInfo &DCI) const {
|
||||
SelectionDAG &DAG = DCI.DAG;
|
||||
TargetLoweringOpt TLO(DAG, !DCI.isBeforeLegalize(),
|
||||
!DCI.isBeforeLegalizeOps());
|
||||
|
||||
bool Simplified =
|
||||
SimplifyDemandedVectorElts(Op, DemandedElts, KnownUndef, KnownZero, TLO);
|
||||
if (Simplified)
|
||||
DCI.CommitTargetLoweringOpt(TLO);
|
||||
return Simplified;
|
||||
}
|
||||
|
||||
bool TargetLowering::SimplifyDemandedVectorElts(
|
||||
SDValue Op, const APInt &DemandedEltMask, APInt &KnownUndef,
|
||||
APInt &KnownZero, TargetLoweringOpt &TLO, unsigned Depth,
|
||||
bool AssumeSingleUse) const {
|
||||
EVT VT = Op.getValueType();
|
||||
APInt DemandedElts = DemandedEltMask;
|
||||
unsigned NumElts = DemandedElts.getBitWidth();
|
||||
assert(VT.isVector() && "Expected vector op");
|
||||
assert(VT.getVectorNumElements() == NumElts &&
|
||||
"Mask size mismatches value type element count!");
|
||||
|
||||
KnownUndef = KnownZero = APInt::getNullValue(NumElts);
|
||||
|
||||
// Undef operand.
|
||||
if (Op.isUndef()) {
|
||||
KnownUndef.setAllBits();
|
||||
return false;
|
||||
}
|
||||
|
||||
// If Op has other users, assume that all elements are needed.
|
||||
if (!Op.getNode()->hasOneUse() && !AssumeSingleUse)
|
||||
DemandedElts.setAllBits();
|
||||
|
||||
// Not demanding any elements from Op.
|
||||
if (DemandedElts == 0) {
|
||||
KnownUndef.setAllBits();
|
||||
return TLO.CombineTo(Op, TLO.DAG.getUNDEF(VT));
|
||||
}
|
||||
|
||||
// Limit search depth.
|
||||
if (Depth >= 6)
|
||||
return false;
|
||||
|
||||
SDLoc DL(Op);
|
||||
unsigned EltSizeInBits = VT.getScalarSizeInBits();
|
||||
|
||||
switch (Op.getOpcode()) {
|
||||
case ISD::SCALAR_TO_VECTOR: {
|
||||
if (!DemandedElts[0]) {
|
||||
KnownUndef.setAllBits();
|
||||
return TLO.CombineTo(Op, TLO.DAG.getUNDEF(VT));
|
||||
}
|
||||
KnownUndef.setHighBits(NumElts - 1);
|
||||
break;
|
||||
}
|
||||
case ISD::BUILD_VECTOR: {
|
||||
// Check all elements and simplify any unused elements with UNDEF.
|
||||
if (!DemandedElts.isAllOnesValue()) {
|
||||
// Don't simplify BROADCASTS.
|
||||
if (llvm::any_of(Op->op_values(),
|
||||
[&](SDValue Elt) { return Op.getOperand(0) != Elt; })) {
|
||||
SmallVector<SDValue, 32> Ops(Op->op_begin(), Op->op_end());
|
||||
bool Updated = false;
|
||||
for (unsigned i = 0; i != NumElts; ++i) {
|
||||
if (!DemandedElts[i] && !Ops[i].isUndef()) {
|
||||
Ops[i] = TLO.DAG.getUNDEF(Ops[0].getValueType());
|
||||
KnownUndef.setBit(i);
|
||||
Updated = true;
|
||||
}
|
||||
}
|
||||
if (Updated)
|
||||
return TLO.CombineTo(Op, TLO.DAG.getBuildVector(VT, DL, Ops));
|
||||
}
|
||||
}
|
||||
for (unsigned i = 0; i != NumElts; ++i) {
|
||||
SDValue SrcOp = Op.getOperand(i);
|
||||
if (SrcOp.isUndef()) {
|
||||
KnownUndef.setBit(i);
|
||||
} else if (EltSizeInBits == SrcOp.getScalarValueSizeInBits() &&
|
||||
(isNullConstant(SrcOp) || isNullFPConstant(SrcOp))) {
|
||||
KnownZero.setBit(i);
|
||||
}
|
||||
}
|
||||
break;
|
||||
}
|
||||
case ISD::CONCAT_VECTORS: {
|
||||
EVT SubVT = Op.getOperand(0).getValueType();
|
||||
unsigned NumSubVecs = Op.getNumOperands();
|
||||
unsigned NumSubElts = SubVT.getVectorNumElements();
|
||||
for (unsigned i = 0; i != NumSubVecs; ++i) {
|
||||
SDValue SubOp = Op.getOperand(i);
|
||||
APInt SubElts = DemandedElts.extractBits(NumSubElts, i * NumSubElts);
|
||||
APInt SubUndef, SubZero;
|
||||
if (SimplifyDemandedVectorElts(SubOp, SubElts, SubUndef, SubZero, TLO,
|
||||
Depth + 1))
|
||||
return true;
|
||||
KnownUndef.insertBits(SubUndef, i * NumSubElts);
|
||||
KnownZero.insertBits(SubZero, i * NumSubElts);
|
||||
}
|
||||
break;
|
||||
}
|
||||
case ISD::INSERT_SUBVECTOR: {
|
||||
if (!isa<ConstantSDNode>(Op.getOperand(2)))
|
||||
break;
|
||||
SDValue Base = Op.getOperand(0);
|
||||
SDValue Sub = Op.getOperand(1);
|
||||
EVT SubVT = Sub.getValueType();
|
||||
unsigned NumSubElts = SubVT.getVectorNumElements();
|
||||
APInt Idx = cast<ConstantSDNode>(Op.getOperand(2))->getAPIntValue();
|
||||
if (Idx.uge(NumElts - NumSubElts))
|
||||
break;
|
||||
unsigned SubIdx = Idx.getZExtValue();
|
||||
APInt SubElts = DemandedElts.extractBits(NumSubElts, SubIdx);
|
||||
APInt SubUndef, SubZero;
|
||||
if (SimplifyDemandedVectorElts(Sub, SubElts, SubUndef, SubZero, TLO,
|
||||
Depth + 1))
|
||||
return true;
|
||||
APInt BaseElts = DemandedElts;
|
||||
BaseElts.insertBits(APInt::getNullValue(NumSubElts), SubIdx);
|
||||
if (SimplifyDemandedVectorElts(Base, BaseElts, KnownUndef, KnownZero, TLO,
|
||||
Depth + 1))
|
||||
return true;
|
||||
KnownUndef.insertBits(SubUndef, SubIdx);
|
||||
KnownZero.insertBits(SubZero, SubIdx);
|
||||
break;
|
||||
}
|
||||
case ISD::VECTOR_SHUFFLE: {
|
||||
ArrayRef<int> ShuffleMask = cast<ShuffleVectorSDNode>(Op)->getMask();
|
||||
|
||||
// Collect demanded elements from shuffle operands..
|
||||
APInt DemandedLHS(NumElts, 0);
|
||||
APInt DemandedRHS(NumElts, 0);
|
||||
for (unsigned i = 0; i != NumElts; ++i) {
|
||||
int M = ShuffleMask[i];
|
||||
if (M < 0 || !DemandedElts[i])
|
||||
continue;
|
||||
assert(0 <= M && M < (int)(2 * NumElts) && "Shuffle index out of range");
|
||||
if (M < (int)NumElts)
|
||||
DemandedLHS.setBit(M);
|
||||
else
|
||||
DemandedRHS.setBit(M - NumElts);
|
||||
}
|
||||
|
||||
// See if we can simplify either shuffle operand.
|
||||
APInt UndefLHS, ZeroLHS;
|
||||
APInt UndefRHS, ZeroRHS;
|
||||
if (SimplifyDemandedVectorElts(Op.getOperand(0), DemandedLHS, UndefLHS,
|
||||
ZeroLHS, TLO, Depth + 1))
|
||||
return true;
|
||||
if (SimplifyDemandedVectorElts(Op.getOperand(1), DemandedRHS, UndefRHS,
|
||||
ZeroRHS, TLO, Depth + 1))
|
||||
return true;
|
||||
|
||||
// Propagate undef/zero elements from LHS/RHS.
|
||||
for (unsigned i = 0; i != NumElts; ++i) {
|
||||
int M = ShuffleMask[i];
|
||||
if (M < 0) {
|
||||
KnownUndef.setBit(i);
|
||||
} else if (M < (int)NumElts) {
|
||||
if (UndefLHS[M])
|
||||
KnownUndef.setBit(i);
|
||||
if (ZeroLHS[M])
|
||||
KnownZero.setBit(i);
|
||||
} else {
|
||||
if (UndefRHS[M - NumElts])
|
||||
KnownUndef.setBit(i);
|
||||
if (ZeroRHS[M - NumElts])
|
||||
KnownZero.setBit(i);
|
||||
}
|
||||
}
|
||||
break;
|
||||
}
|
||||
default: {
|
||||
if (Op.getOpcode() >= ISD::BUILTIN_OP_END)
|
||||
if (SimplifyDemandedVectorEltsForTargetNode(Op, DemandedElts, KnownUndef,
|
||||
KnownZero, TLO, Depth))
|
||||
return true;
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
assert((KnownUndef & KnownZero) == 0 && "Elements flagged as undef AND zero");
|
||||
return false;
|
||||
}
|
||||
|
||||
/// Determine which of the bits specified in Mask are known to be either zero or
|
||||
/// one and return them in the Known.
|
||||
void TargetLowering::computeKnownBitsForTargetNode(const SDValue Op,
|
||||
|
@ -1323,6 +1514,18 @@ unsigned TargetLowering::ComputeNumSignBitsForTargetNode(SDValue Op,
|
|||
return 1;
|
||||
}
|
||||
|
||||
bool TargetLowering::SimplifyDemandedVectorEltsForTargetNode(
|
||||
SDValue Op, const APInt &DemandedElts, APInt &KnownUndef, APInt &KnownZero,
|
||||
TargetLoweringOpt &TLO, unsigned Depth) const {
|
||||
assert((Op.getOpcode() >= ISD::BUILTIN_OP_END ||
|
||||
Op.getOpcode() == ISD::INTRINSIC_WO_CHAIN ||
|
||||
Op.getOpcode() == ISD::INTRINSIC_W_CHAIN ||
|
||||
Op.getOpcode() == ISD::INTRINSIC_VOID) &&
|
||||
"Should use SimplifyDemandedVectorElts if you don't know whether Op"
|
||||
" is a target node!");
|
||||
return false;
|
||||
}
|
||||
|
||||
// FIXME: Ideally, this would use ISD::isConstantSplatVector(), but that must
|
||||
// work with truncating build vectors and vectors with elements of less than
|
||||
// 8 bits.
|
||||
|
|
|
@ -50,40 +50,40 @@ define <2 x i8> @i8_2(<2 x i8> %a, <2 x i8> %b) {
|
|||
;
|
||||
; MIPS32R5EB-LABEL: i8_2:
|
||||
; MIPS32R5EB: # %bb.0:
|
||||
; MIPS32R5EB-NEXT: addiu $sp, $sp, -16
|
||||
; MIPS32R5EB-NEXT: .cfi_def_cfa_offset 16
|
||||
; MIPS32R5EB-NEXT: sw $5, 8($sp)
|
||||
; MIPS32R5EB-NEXT: sw $4, 12($sp)
|
||||
; MIPS32R5EB-NEXT: ldi.b $w0, 0
|
||||
; MIPS32R5EB-NEXT: lbu $1, 9($sp)
|
||||
; MIPS32R5EB-NEXT: lbu $2, 8($sp)
|
||||
; MIPS32R5EB-NEXT: move.v $w1, $w0
|
||||
; MIPS32R5EB-NEXT: insert.w $w1[0], $2
|
||||
; MIPS32R5EB-NEXT: insert.w $w1[1], $1
|
||||
; MIPS32R5EB-NEXT: lbu $1, 12($sp)
|
||||
; MIPS32R5EB-NEXT: insert.w $w0[0], $1
|
||||
; MIPS32R5EB-NEXT: lbu $1, 10($sp)
|
||||
; MIPS32R5EB-NEXT: lbu $2, 13($sp)
|
||||
; MIPS32R5EB-NEXT: insert.w $w0[1], $2
|
||||
; MIPS32R5EB-NEXT: insert.w $w1[2], $1
|
||||
; MIPS32R5EB-NEXT: lbu $1, 11($sp)
|
||||
; MIPS32R5EB-NEXT: insert.w $w1[3], $1
|
||||
; MIPS32R5EB-NEXT: ilvr.w $w1, $w1, $w1
|
||||
; MIPS32R5EB-NEXT: lbu $1, 14($sp)
|
||||
; MIPS32R5EB-NEXT: shf.w $w1, $w1, 177
|
||||
; MIPS32R5EB-NEXT: insert.w $w0[2], $1
|
||||
; MIPS32R5EB-NEXT: lbu $1, 15($sp)
|
||||
; MIPS32R5EB-NEXT: insert.w $w0[3], $1
|
||||
; MIPS32R5EB-NEXT: addiu $sp, $sp, -48
|
||||
; MIPS32R5EB-NEXT: .cfi_def_cfa_offset 48
|
||||
; MIPS32R5EB-NEXT: sw $fp, 44($sp) # 4-byte Folded Spill
|
||||
; MIPS32R5EB-NEXT: .cfi_offset 30, -4
|
||||
; MIPS32R5EB-NEXT: move $fp, $sp
|
||||
; MIPS32R5EB-NEXT: .cfi_def_cfa_register 30
|
||||
; MIPS32R5EB-NEXT: addiu $1, $zero, -16
|
||||
; MIPS32R5EB-NEXT: and $sp, $sp, $1
|
||||
; MIPS32R5EB-NEXT: sw $5, 36($sp)
|
||||
; MIPS32R5EB-NEXT: sw $4, 40($sp)
|
||||
; MIPS32R5EB-NEXT: lbu $1, 37($sp)
|
||||
; MIPS32R5EB-NEXT: sw $1, 20($sp)
|
||||
; MIPS32R5EB-NEXT: lbu $1, 36($sp)
|
||||
; MIPS32R5EB-NEXT: sw $1, 16($sp)
|
||||
; MIPS32R5EB-NEXT: lbu $1, 40($sp)
|
||||
; MIPS32R5EB-NEXT: lbu $2, 41($sp)
|
||||
; MIPS32R5EB-NEXT: sw $2, 4($sp)
|
||||
; MIPS32R5EB-NEXT: sw $1, 0($sp)
|
||||
; MIPS32R5EB-NEXT: ld.w $w0, 16($sp)
|
||||
; MIPS32R5EB-NEXT: ilvr.w $w0, $w0, $w0
|
||||
; MIPS32R5EB-NEXT: shf.w $w0, $w0, 177
|
||||
; MIPS32R5EB-NEXT: addv.d $w0, $w0, $w1
|
||||
; MIPS32R5EB-NEXT: ld.w $w1, 0($sp)
|
||||
; MIPS32R5EB-NEXT: ilvr.w $w1, $w1, $w1
|
||||
; MIPS32R5EB-NEXT: shf.w $w1, $w1, 177
|
||||
; MIPS32R5EB-NEXT: addv.d $w0, $w1, $w0
|
||||
; MIPS32R5EB-NEXT: shf.w $w0, $w0, 177
|
||||
; MIPS32R5EB-NEXT: copy_s.w $1, $w0[1]
|
||||
; MIPS32R5EB-NEXT: copy_s.w $2, $w0[3]
|
||||
; MIPS32R5EB-NEXT: sb $2, 5($sp)
|
||||
; MIPS32R5EB-NEXT: sb $1, 4($sp)
|
||||
; MIPS32R5EB-NEXT: lhu $2, 4($sp)
|
||||
; MIPS32R5EB-NEXT: addiu $sp, $sp, 16
|
||||
; MIPS32R5EB-NEXT: sb $2, 33($sp)
|
||||
; MIPS32R5EB-NEXT: sb $1, 32($sp)
|
||||
; MIPS32R5EB-NEXT: lhu $2, 32($sp)
|
||||
; MIPS32R5EB-NEXT: move $sp, $fp
|
||||
; MIPS32R5EB-NEXT: lw $fp, 44($sp) # 4-byte Folded Reload
|
||||
; MIPS32R5EB-NEXT: addiu $sp, $sp, 48
|
||||
; MIPS32R5EB-NEXT: jr $ra
|
||||
; MIPS32R5EB-NEXT: nop
|
||||
;
|
||||
|
@ -179,37 +179,37 @@ define <2 x i8> @i8_2(<2 x i8> %a, <2 x i8> %b) {
|
|||
;
|
||||
; MIPS32R5EL-LABEL: i8_2:
|
||||
; MIPS32R5EL: # %bb.0:
|
||||
; MIPS32R5EL-NEXT: addiu $sp, $sp, -16
|
||||
; MIPS32R5EL-NEXT: .cfi_def_cfa_offset 16
|
||||
; MIPS32R5EL-NEXT: sw $5, 8($sp)
|
||||
; MIPS32R5EL-NEXT: sw $4, 12($sp)
|
||||
; MIPS32R5EL-NEXT: ldi.b $w0, 0
|
||||
; MIPS32R5EL-NEXT: lbu $1, 9($sp)
|
||||
; MIPS32R5EL-NEXT: lbu $2, 12($sp)
|
||||
; MIPS32R5EL-NEXT: lbu $3, 8($sp)
|
||||
; MIPS32R5EL-NEXT: move.v $w1, $w0
|
||||
; MIPS32R5EL-NEXT: insert.w $w1[0], $3
|
||||
; MIPS32R5EL-NEXT: insert.w $w0[0], $2
|
||||
; MIPS32R5EL-NEXT: insert.w $w1[1], $1
|
||||
; MIPS32R5EL-NEXT: lbu $1, 10($sp)
|
||||
; MIPS32R5EL-NEXT: insert.w $w1[2], $1
|
||||
; MIPS32R5EL-NEXT: lbu $1, 11($sp)
|
||||
; MIPS32R5EL-NEXT: insert.w $w1[3], $1
|
||||
; MIPS32R5EL-NEXT: ilvr.w $w1, $w1, $w1
|
||||
; MIPS32R5EL-NEXT: lbu $1, 13($sp)
|
||||
; MIPS32R5EL-NEXT: insert.w $w0[1], $1
|
||||
; MIPS32R5EL-NEXT: lbu $1, 14($sp)
|
||||
; MIPS32R5EL-NEXT: insert.w $w0[2], $1
|
||||
; MIPS32R5EL-NEXT: lbu $1, 15($sp)
|
||||
; MIPS32R5EL-NEXT: insert.w $w0[3], $1
|
||||
; MIPS32R5EL-NEXT: addiu $sp, $sp, -48
|
||||
; MIPS32R5EL-NEXT: .cfi_def_cfa_offset 48
|
||||
; MIPS32R5EL-NEXT: sw $fp, 44($sp) # 4-byte Folded Spill
|
||||
; MIPS32R5EL-NEXT: .cfi_offset 30, -4
|
||||
; MIPS32R5EL-NEXT: move $fp, $sp
|
||||
; MIPS32R5EL-NEXT: .cfi_def_cfa_register 30
|
||||
; MIPS32R5EL-NEXT: addiu $1, $zero, -16
|
||||
; MIPS32R5EL-NEXT: and $sp, $sp, $1
|
||||
; MIPS32R5EL-NEXT: sw $5, 36($sp)
|
||||
; MIPS32R5EL-NEXT: sw $4, 40($sp)
|
||||
; MIPS32R5EL-NEXT: lbu $1, 37($sp)
|
||||
; MIPS32R5EL-NEXT: sw $1, 20($sp)
|
||||
; MIPS32R5EL-NEXT: lbu $1, 36($sp)
|
||||
; MIPS32R5EL-NEXT: sw $1, 16($sp)
|
||||
; MIPS32R5EL-NEXT: lbu $1, 41($sp)
|
||||
; MIPS32R5EL-NEXT: sw $1, 4($sp)
|
||||
; MIPS32R5EL-NEXT: lbu $1, 40($sp)
|
||||
; MIPS32R5EL-NEXT: sw $1, 0($sp)
|
||||
; MIPS32R5EL-NEXT: ld.w $w0, 16($sp)
|
||||
; MIPS32R5EL-NEXT: ilvr.w $w0, $w0, $w0
|
||||
; MIPS32R5EL-NEXT: addv.d $w0, $w0, $w1
|
||||
; MIPS32R5EL-NEXT: ld.w $w1, 0($sp)
|
||||
; MIPS32R5EL-NEXT: ilvr.w $w1, $w1, $w1
|
||||
; MIPS32R5EL-NEXT: addv.d $w0, $w1, $w0
|
||||
; MIPS32R5EL-NEXT: copy_s.w $1, $w0[0]
|
||||
; MIPS32R5EL-NEXT: copy_s.w $2, $w0[2]
|
||||
; MIPS32R5EL-NEXT: sb $2, 5($sp)
|
||||
; MIPS32R5EL-NEXT: sb $1, 4($sp)
|
||||
; MIPS32R5EL-NEXT: lhu $2, 4($sp)
|
||||
; MIPS32R5EL-NEXT: addiu $sp, $sp, 16
|
||||
; MIPS32R5EL-NEXT: sb $2, 33($sp)
|
||||
; MIPS32R5EL-NEXT: sb $1, 32($sp)
|
||||
; MIPS32R5EL-NEXT: lhu $2, 32($sp)
|
||||
; MIPS32R5EL-NEXT: move $sp, $fp
|
||||
; MIPS32R5EL-NEXT: lw $fp, 44($sp) # 4-byte Folded Reload
|
||||
; MIPS32R5EL-NEXT: addiu $sp, $sp, 48
|
||||
; MIPS32R5EL-NEXT: jr $ra
|
||||
; MIPS32R5EL-NEXT: nop
|
||||
;
|
||||
|
@ -364,102 +364,82 @@ define <2 x i8> @i8x2_7(<2 x i8> %a, <2 x i8> %b, <2 x i8> %c, <2 x i8> %d, <2 x
|
|||
;
|
||||
; MIPS32R5EB-LABEL: i8x2_7:
|
||||
; MIPS32R5EB: # %bb.0: # %entry
|
||||
; MIPS32R5EB-NEXT: addiu $sp, $sp, -24
|
||||
; MIPS32R5EB-NEXT: .cfi_def_cfa_offset 24
|
||||
; MIPS32R5EB-NEXT: sw $5, 16($sp)
|
||||
; MIPS32R5EB-NEXT: sw $4, 20($sp)
|
||||
; MIPS32R5EB-NEXT: ldi.b $w0, 0
|
||||
; MIPS32R5EB-NEXT: lbu $1, 17($sp)
|
||||
; MIPS32R5EB-NEXT: lbu $2, 16($sp)
|
||||
; MIPS32R5EB-NEXT: move.v $w1, $w0
|
||||
; MIPS32R5EB-NEXT: insert.w $w1[0], $2
|
||||
; MIPS32R5EB-NEXT: insert.w $w1[1], $1
|
||||
; MIPS32R5EB-NEXT: lbu $1, 18($sp)
|
||||
; MIPS32R5EB-NEXT: lbu $2, 21($sp)
|
||||
; MIPS32R5EB-NEXT: lbu $3, 20($sp)
|
||||
; MIPS32R5EB-NEXT: move.v $w2, $w0
|
||||
; MIPS32R5EB-NEXT: insert.w $w2[0], $3
|
||||
; MIPS32R5EB-NEXT: insert.w $w2[1], $2
|
||||
; MIPS32R5EB-NEXT: insert.w $w1[2], $1
|
||||
; MIPS32R5EB-NEXT: lbu $1, 19($sp)
|
||||
; MIPS32R5EB-NEXT: insert.w $w1[3], $1
|
||||
; MIPS32R5EB-NEXT: ilvr.w $w1, $w1, $w1
|
||||
; MIPS32R5EB-NEXT: lbu $1, 22($sp)
|
||||
; MIPS32R5EB-NEXT: shf.w $w1, $w1, 177
|
||||
; MIPS32R5EB-NEXT: insert.w $w2[2], $1
|
||||
; MIPS32R5EB-NEXT: lbu $1, 23($sp)
|
||||
; MIPS32R5EB-NEXT: insert.w $w2[3], $1
|
||||
; MIPS32R5EB-NEXT: ilvr.w $w2, $w2, $w2
|
||||
; MIPS32R5EB-NEXT: shf.w $w2, $w2, 177
|
||||
; MIPS32R5EB-NEXT: addv.d $w1, $w2, $w1
|
||||
; MIPS32R5EB-NEXT: sw $6, 12($sp)
|
||||
; MIPS32R5EB-NEXT: lbu $1, 13($sp)
|
||||
; MIPS32R5EB-NEXT: lbu $2, 12($sp)
|
||||
; MIPS32R5EB-NEXT: move.v $w2, $w0
|
||||
; MIPS32R5EB-NEXT: insert.w $w2[0], $2
|
||||
; MIPS32R5EB-NEXT: insert.w $w2[1], $1
|
||||
; MIPS32R5EB-NEXT: lbu $1, 14($sp)
|
||||
; MIPS32R5EB-NEXT: insert.w $w2[2], $1
|
||||
; MIPS32R5EB-NEXT: lbu $1, 15($sp)
|
||||
; MIPS32R5EB-NEXT: insert.w $w2[3], $1
|
||||
; MIPS32R5EB-NEXT: ilvr.w $w2, $w2, $w2
|
||||
; MIPS32R5EB-NEXT: shf.w $w2, $w2, 177
|
||||
; MIPS32R5EB-NEXT: addv.d $w1, $w1, $w2
|
||||
; MIPS32R5EB-NEXT: sw $7, 8($sp)
|
||||
; MIPS32R5EB-NEXT: lbu $1, 9($sp)
|
||||
; MIPS32R5EB-NEXT: lbu $2, 8($sp)
|
||||
; MIPS32R5EB-NEXT: move.v $w2, $w0
|
||||
; MIPS32R5EB-NEXT: insert.w $w2[0], $2
|
||||
; MIPS32R5EB-NEXT: insert.w $w2[1], $1
|
||||
; MIPS32R5EB-NEXT: lbu $1, 10($sp)
|
||||
; MIPS32R5EB-NEXT: insert.w $w2[2], $1
|
||||
; MIPS32R5EB-NEXT: lbu $1, 11($sp)
|
||||
; MIPS32R5EB-NEXT: insert.w $w2[3], $1
|
||||
; MIPS32R5EB-NEXT: ilvr.w $w2, $w2, $w2
|
||||
; MIPS32R5EB-NEXT: shf.w $w2, $w2, 177
|
||||
; MIPS32R5EB-NEXT: addv.d $w1, $w1, $w2
|
||||
; MIPS32R5EB-NEXT: lbu $1, 41($sp)
|
||||
; MIPS32R5EB-NEXT: lbu $2, 40($sp)
|
||||
; MIPS32R5EB-NEXT: move.v $w2, $w0
|
||||
; MIPS32R5EB-NEXT: insert.w $w2[0], $2
|
||||
; MIPS32R5EB-NEXT: insert.w $w2[1], $1
|
||||
; MIPS32R5EB-NEXT: lbu $1, 42($sp)
|
||||
; MIPS32R5EB-NEXT: insert.w $w2[2], $1
|
||||
; MIPS32R5EB-NEXT: lbu $1, 43($sp)
|
||||
; MIPS32R5EB-NEXT: insert.w $w2[3], $1
|
||||
; MIPS32R5EB-NEXT: ilvr.w $w2, $w2, $w2
|
||||
; MIPS32R5EB-NEXT: shf.w $w2, $w2, 177
|
||||
; MIPS32R5EB-NEXT: addv.d $w1, $w1, $w2
|
||||
; MIPS32R5EB-NEXT: lbu $1, 45($sp)
|
||||
; MIPS32R5EB-NEXT: lbu $2, 44($sp)
|
||||
; MIPS32R5EB-NEXT: move.v $w2, $w0
|
||||
; MIPS32R5EB-NEXT: insert.w $w2[0], $2
|
||||
; MIPS32R5EB-NEXT: insert.w $w2[1], $1
|
||||
; MIPS32R5EB-NEXT: lbu $1, 46($sp)
|
||||
; MIPS32R5EB-NEXT: insert.w $w2[2], $1
|
||||
; MIPS32R5EB-NEXT: lbu $1, 47($sp)
|
||||
; MIPS32R5EB-NEXT: insert.w $w2[3], $1
|
||||
; MIPS32R5EB-NEXT: ilvr.w $w2, $w2, $w2
|
||||
; MIPS32R5EB-NEXT: shf.w $w2, $w2, 177
|
||||
; MIPS32R5EB-NEXT: addv.d $w1, $w1, $w2
|
||||
; MIPS32R5EB-NEXT: lbu $1, 48($sp)
|
||||
; MIPS32R5EB-NEXT: insert.w $w0[0], $1
|
||||
; MIPS32R5EB-NEXT: lbu $1, 49($sp)
|
||||
; MIPS32R5EB-NEXT: insert.w $w0[1], $1
|
||||
; MIPS32R5EB-NEXT: lbu $1, 50($sp)
|
||||
; MIPS32R5EB-NEXT: insert.w $w0[2], $1
|
||||
; MIPS32R5EB-NEXT: lbu $1, 51($sp)
|
||||
; MIPS32R5EB-NEXT: insert.w $w0[3], $1
|
||||
; MIPS32R5EB-NEXT: addiu $sp, $sp, -144
|
||||
; MIPS32R5EB-NEXT: .cfi_def_cfa_offset 144
|
||||
; MIPS32R5EB-NEXT: sw $fp, 140($sp) # 4-byte Folded Spill
|
||||
; MIPS32R5EB-NEXT: .cfi_offset 30, -4
|
||||
; MIPS32R5EB-NEXT: move $fp, $sp
|
||||
; MIPS32R5EB-NEXT: .cfi_def_cfa_register 30
|
||||
; MIPS32R5EB-NEXT: addiu $1, $zero, -16
|
||||
; MIPS32R5EB-NEXT: and $sp, $sp, $1
|
||||
; MIPS32R5EB-NEXT: sw $5, 132($sp)
|
||||
; MIPS32R5EB-NEXT: sw $4, 136($sp)
|
||||
; MIPS32R5EB-NEXT: lbu $1, 133($sp)
|
||||
; MIPS32R5EB-NEXT: sw $1, 68($sp)
|
||||
; MIPS32R5EB-NEXT: lbu $1, 132($sp)
|
||||
; MIPS32R5EB-NEXT: sw $1, 64($sp)
|
||||
; MIPS32R5EB-NEXT: lbu $1, 136($sp)
|
||||
; MIPS32R5EB-NEXT: lbu $2, 137($sp)
|
||||
; MIPS32R5EB-NEXT: sw $2, 52($sp)
|
||||
; MIPS32R5EB-NEXT: sw $1, 48($sp)
|
||||
; MIPS32R5EB-NEXT: ld.w $w0, 64($sp)
|
||||
; MIPS32R5EB-NEXT: ilvr.w $w0, $w0, $w0
|
||||
; MIPS32R5EB-NEXT: shf.w $w0, $w0, 177
|
||||
; MIPS32R5EB-NEXT: ld.w $w1, 48($sp)
|
||||
; MIPS32R5EB-NEXT: ilvr.w $w1, $w1, $w1
|
||||
; MIPS32R5EB-NEXT: shf.w $w1, $w1, 177
|
||||
; MIPS32R5EB-NEXT: addv.d $w0, $w1, $w0
|
||||
; MIPS32R5EB-NEXT: sw $6, 128($sp)
|
||||
; MIPS32R5EB-NEXT: lbu $1, 129($sp)
|
||||
; MIPS32R5EB-NEXT: sw $1, 84($sp)
|
||||
; MIPS32R5EB-NEXT: lbu $1, 128($sp)
|
||||
; MIPS32R5EB-NEXT: sw $1, 80($sp)
|
||||
; MIPS32R5EB-NEXT: ld.w $w1, 80($sp)
|
||||
; MIPS32R5EB-NEXT: ilvr.w $w1, $w1, $w1
|
||||
; MIPS32R5EB-NEXT: shf.w $w1, $w1, 177
|
||||
; MIPS32R5EB-NEXT: addv.d $w0, $w0, $w1
|
||||
; MIPS32R5EB-NEXT: sw $7, 124($sp)
|
||||
; MIPS32R5EB-NEXT: lbu $1, 125($sp)
|
||||
; MIPS32R5EB-NEXT: sw $1, 100($sp)
|
||||
; MIPS32R5EB-NEXT: lbu $1, 124($sp)
|
||||
; MIPS32R5EB-NEXT: sw $1, 96($sp)
|
||||
; MIPS32R5EB-NEXT: ld.w $w1, 96($sp)
|
||||
; MIPS32R5EB-NEXT: ilvr.w $w1, $w1, $w1
|
||||
; MIPS32R5EB-NEXT: shf.w $w1, $w1, 177
|
||||
; MIPS32R5EB-NEXT: addv.d $w0, $w0, $w1
|
||||
; MIPS32R5EB-NEXT: lbu $1, 161($fp)
|
||||
; MIPS32R5EB-NEXT: sw $1, 4($sp)
|
||||
; MIPS32R5EB-NEXT: lbu $1, 160($fp)
|
||||
; MIPS32R5EB-NEXT: sw $1, 0($sp)
|
||||
; MIPS32R5EB-NEXT: ld.w $w1, 0($sp)
|
||||
; MIPS32R5EB-NEXT: ilvr.w $w1, $w1, $w1
|
||||
; MIPS32R5EB-NEXT: shf.w $w1, $w1, 177
|
||||
; MIPS32R5EB-NEXT: addv.d $w0, $w0, $w1
|
||||
; MIPS32R5EB-NEXT: lbu $1, 165($fp)
|
||||
; MIPS32R5EB-NEXT: sw $1, 20($sp)
|
||||
; MIPS32R5EB-NEXT: lbu $1, 164($fp)
|
||||
; MIPS32R5EB-NEXT: sw $1, 16($sp)
|
||||
; MIPS32R5EB-NEXT: ld.w $w1, 16($sp)
|
||||
; MIPS32R5EB-NEXT: ilvr.w $w1, $w1, $w1
|
||||
; MIPS32R5EB-NEXT: shf.w $w1, $w1, 177
|
||||
; MIPS32R5EB-NEXT: addv.d $w0, $w0, $w1
|
||||
; MIPS32R5EB-NEXT: lbu $1, 169($fp)
|
||||
; MIPS32R5EB-NEXT: sw $1, 36($sp)
|
||||
; MIPS32R5EB-NEXT: lbu $1, 168($fp)
|
||||
; MIPS32R5EB-NEXT: sw $1, 32($sp)
|
||||
; MIPS32R5EB-NEXT: ld.w $w1, 32($sp)
|
||||
; MIPS32R5EB-NEXT: ilvr.w $w1, $w1, $w1
|
||||
; MIPS32R5EB-NEXT: shf.w $w1, $w1, 177
|
||||
; MIPS32R5EB-NEXT: addv.d $w0, $w0, $w1
|
||||
; MIPS32R5EB-NEXT: shf.w $w0, $w0, 177
|
||||
; MIPS32R5EB-NEXT: copy_s.w $1, $w0[1]
|
||||
; MIPS32R5EB-NEXT: copy_s.w $2, $w0[3]
|
||||
; MIPS32R5EB-NEXT: sb $2, 5($sp)
|
||||
; MIPS32R5EB-NEXT: sb $1, 4($sp)
|
||||
; MIPS32R5EB-NEXT: lhu $2, 4($sp)
|
||||
; MIPS32R5EB-NEXT: addiu $sp, $sp, 24
|
||||
; MIPS32R5EB-NEXT: sb $2, 121($sp)
|
||||
; MIPS32R5EB-NEXT: sb $1, 120($sp)
|
||||
; MIPS32R5EB-NEXT: lhu $2, 120($sp)
|
||||
; MIPS32R5EB-NEXT: move $sp, $fp
|
||||
; MIPS32R5EB-NEXT: lw $fp, 140($sp) # 4-byte Folded Reload
|
||||
; MIPS32R5EB-NEXT: addiu $sp, $sp, 144
|
||||
; MIPS32R5EB-NEXT: jr $ra
|
||||
; MIPS32R5EB-NEXT: nop
|
||||
;
|
||||
|
@ -720,94 +700,74 @@ define <2 x i8> @i8x2_7(<2 x i8> %a, <2 x i8> %b, <2 x i8> %c, <2 x i8> %d, <2 x
|
|||
;
|
||||
; MIPS32R5EL-LABEL: i8x2_7:
|
||||
; MIPS32R5EL: # %bb.0: # %entry
|
||||
; MIPS32R5EL-NEXT: addiu $sp, $sp, -24
|
||||
; MIPS32R5EL-NEXT: .cfi_def_cfa_offset 24
|
||||
; MIPS32R5EL-NEXT: sw $5, 16($sp)
|
||||
; MIPS32R5EL-NEXT: ldi.b $w0, 0
|
||||
; MIPS32R5EL-NEXT: sw $4, 20($sp)
|
||||
; MIPS32R5EL-NEXT: lbu $1, 17($sp)
|
||||
; MIPS32R5EL-NEXT: lbu $2, 16($sp)
|
||||
; MIPS32R5EL-NEXT: move.v $w1, $w0
|
||||
; MIPS32R5EL-NEXT: insert.w $w1[0], $2
|
||||
; MIPS32R5EL-NEXT: insert.w $w1[1], $1
|
||||
; MIPS32R5EL-NEXT: lbu $1, 18($sp)
|
||||
; MIPS32R5EL-NEXT: insert.w $w1[2], $1
|
||||
; MIPS32R5EL-NEXT: lbu $1, 19($sp)
|
||||
; MIPS32R5EL-NEXT: insert.w $w1[3], $1
|
||||
; MIPS32R5EL-NEXT: ilvr.w $w1, $w1, $w1
|
||||
; MIPS32R5EL-NEXT: lbu $1, 21($sp)
|
||||
; MIPS32R5EL-NEXT: lbu $2, 20($sp)
|
||||
; MIPS32R5EL-NEXT: move.v $w2, $w0
|
||||
; MIPS32R5EL-NEXT: insert.w $w2[0], $2
|
||||
; MIPS32R5EL-NEXT: insert.w $w2[1], $1
|
||||
; MIPS32R5EL-NEXT: lbu $1, 22($sp)
|
||||
; MIPS32R5EL-NEXT: insert.w $w2[2], $1
|
||||
; MIPS32R5EL-NEXT: lbu $1, 23($sp)
|
||||
; MIPS32R5EL-NEXT: insert.w $w2[3], $1
|
||||
; MIPS32R5EL-NEXT: ilvr.w $w2, $w2, $w2
|
||||
; MIPS32R5EL-NEXT: addv.d $w1, $w2, $w1
|
||||
; MIPS32R5EL-NEXT: sw $6, 12($sp)
|
||||
; MIPS32R5EL-NEXT: lbu $1, 13($sp)
|
||||
; MIPS32R5EL-NEXT: lbu $2, 12($sp)
|
||||
; MIPS32R5EL-NEXT: move.v $w2, $w0
|
||||
; MIPS32R5EL-NEXT: insert.w $w2[0], $2
|
||||
; MIPS32R5EL-NEXT: insert.w $w2[1], $1
|
||||
; MIPS32R5EL-NEXT: lbu $1, 14($sp)
|
||||
; MIPS32R5EL-NEXT: insert.w $w2[2], $1
|
||||
; MIPS32R5EL-NEXT: lbu $1, 15($sp)
|
||||
; MIPS32R5EL-NEXT: insert.w $w2[3], $1
|
||||
; MIPS32R5EL-NEXT: ilvr.w $w2, $w2, $w2
|
||||
; MIPS32R5EL-NEXT: addv.d $w1, $w1, $w2
|
||||
; MIPS32R5EL-NEXT: sw $7, 8($sp)
|
||||
; MIPS32R5EL-NEXT: lbu $1, 9($sp)
|
||||
; MIPS32R5EL-NEXT: lbu $2, 8($sp)
|
||||
; MIPS32R5EL-NEXT: move.v $w2, $w0
|
||||
; MIPS32R5EL-NEXT: insert.w $w2[0], $2
|
||||
; MIPS32R5EL-NEXT: insert.w $w2[1], $1
|
||||
; MIPS32R5EL-NEXT: lbu $1, 10($sp)
|
||||
; MIPS32R5EL-NEXT: insert.w $w2[2], $1
|
||||
; MIPS32R5EL-NEXT: lbu $1, 11($sp)
|
||||
; MIPS32R5EL-NEXT: insert.w $w2[3], $1
|
||||
; MIPS32R5EL-NEXT: ilvr.w $w2, $w2, $w2
|
||||
; MIPS32R5EL-NEXT: addv.d $w1, $w1, $w2
|
||||
; MIPS32R5EL-NEXT: lbu $1, 41($sp)
|
||||
; MIPS32R5EL-NEXT: lbu $2, 40($sp)
|
||||
; MIPS32R5EL-NEXT: move.v $w2, $w0
|
||||
; MIPS32R5EL-NEXT: insert.w $w2[0], $2
|
||||
; MIPS32R5EL-NEXT: insert.w $w2[1], $1
|
||||
; MIPS32R5EL-NEXT: lbu $1, 42($sp)
|
||||
; MIPS32R5EL-NEXT: insert.w $w2[2], $1
|
||||
; MIPS32R5EL-NEXT: lbu $1, 43($sp)
|
||||
; MIPS32R5EL-NEXT: insert.w $w2[3], $1
|
||||
; MIPS32R5EL-NEXT: ilvr.w $w2, $w2, $w2
|
||||
; MIPS32R5EL-NEXT: addv.d $w1, $w1, $w2
|
||||
; MIPS32R5EL-NEXT: lbu $1, 45($sp)
|
||||
; MIPS32R5EL-NEXT: lbu $2, 44($sp)
|
||||
; MIPS32R5EL-NEXT: move.v $w2, $w0
|
||||
; MIPS32R5EL-NEXT: insert.w $w2[0], $2
|
||||
; MIPS32R5EL-NEXT: insert.w $w2[1], $1
|
||||
; MIPS32R5EL-NEXT: lbu $1, 46($sp)
|
||||
; MIPS32R5EL-NEXT: insert.w $w2[2], $1
|
||||
; MIPS32R5EL-NEXT: lbu $1, 47($sp)
|
||||
; MIPS32R5EL-NEXT: insert.w $w2[3], $1
|
||||
; MIPS32R5EL-NEXT: ilvr.w $w2, $w2, $w2
|
||||
; MIPS32R5EL-NEXT: addv.d $w1, $w1, $w2
|
||||
; MIPS32R5EL-NEXT: lbu $1, 48($sp)
|
||||
; MIPS32R5EL-NEXT: insert.w $w0[0], $1
|
||||
; MIPS32R5EL-NEXT: lbu $1, 49($sp)
|
||||
; MIPS32R5EL-NEXT: insert.w $w0[1], $1
|
||||
; MIPS32R5EL-NEXT: lbu $1, 50($sp)
|
||||
; MIPS32R5EL-NEXT: insert.w $w0[2], $1
|
||||
; MIPS32R5EL-NEXT: lbu $1, 51($sp)
|
||||
; MIPS32R5EL-NEXT: insert.w $w0[3], $1
|
||||
; MIPS32R5EL-NEXT: addiu $sp, $sp, -144
|
||||
; MIPS32R5EL-NEXT: .cfi_def_cfa_offset 144
|
||||
; MIPS32R5EL-NEXT: sw $fp, 140($sp) # 4-byte Folded Spill
|
||||
; MIPS32R5EL-NEXT: .cfi_offset 30, -4
|
||||
; MIPS32R5EL-NEXT: move $fp, $sp
|
||||
; MIPS32R5EL-NEXT: .cfi_def_cfa_register 30
|
||||
; MIPS32R5EL-NEXT: addiu $1, $zero, -16
|
||||
; MIPS32R5EL-NEXT: and $sp, $sp, $1
|
||||
; MIPS32R5EL-NEXT: sw $5, 132($sp)
|
||||
; MIPS32R5EL-NEXT: sw $4, 136($sp)
|
||||
; MIPS32R5EL-NEXT: lbu $1, 133($sp)
|
||||
; MIPS32R5EL-NEXT: sw $1, 68($sp)
|
||||
; MIPS32R5EL-NEXT: lbu $1, 132($sp)
|
||||
; MIPS32R5EL-NEXT: sw $1, 64($sp)
|
||||
; MIPS32R5EL-NEXT: lbu $1, 137($sp)
|
||||
; MIPS32R5EL-NEXT: sw $1, 52($sp)
|
||||
; MIPS32R5EL-NEXT: lbu $1, 136($sp)
|
||||
; MIPS32R5EL-NEXT: sw $1, 48($sp)
|
||||
; MIPS32R5EL-NEXT: ld.w $w0, 64($sp)
|
||||
; MIPS32R5EL-NEXT: ilvr.w $w0, $w0, $w0
|
||||
; MIPS32R5EL-NEXT: ld.w $w1, 48($sp)
|
||||
; MIPS32R5EL-NEXT: ilvr.w $w1, $w1, $w1
|
||||
; MIPS32R5EL-NEXT: addv.d $w0, $w1, $w0
|
||||
; MIPS32R5EL-NEXT: sw $6, 128($sp)
|
||||
; MIPS32R5EL-NEXT: lbu $1, 129($sp)
|
||||
; MIPS32R5EL-NEXT: sw $1, 84($sp)
|
||||
; MIPS32R5EL-NEXT: lbu $1, 128($sp)
|
||||
; MIPS32R5EL-NEXT: sw $1, 80($sp)
|
||||
; MIPS32R5EL-NEXT: ld.w $w1, 80($sp)
|
||||
; MIPS32R5EL-NEXT: ilvr.w $w1, $w1, $w1
|
||||
; MIPS32R5EL-NEXT: addv.d $w0, $w0, $w1
|
||||
; MIPS32R5EL-NEXT: sw $7, 124($sp)
|
||||
; MIPS32R5EL-NEXT: lbu $1, 125($sp)
|
||||
; MIPS32R5EL-NEXT: sw $1, 100($sp)
|
||||
; MIPS32R5EL-NEXT: lbu $1, 124($sp)
|
||||
; MIPS32R5EL-NEXT: sw $1, 96($sp)
|
||||
; MIPS32R5EL-NEXT: ld.w $w1, 96($sp)
|
||||
; MIPS32R5EL-NEXT: ilvr.w $w1, $w1, $w1
|
||||
; MIPS32R5EL-NEXT: addv.d $w0, $w0, $w1
|
||||
; MIPS32R5EL-NEXT: lbu $1, 161($fp)
|
||||
; MIPS32R5EL-NEXT: sw $1, 4($sp)
|
||||
; MIPS32R5EL-NEXT: lbu $1, 160($fp)
|
||||
; MIPS32R5EL-NEXT: sw $1, 0($sp)
|
||||
; MIPS32R5EL-NEXT: ld.w $w1, 0($sp)
|
||||
; MIPS32R5EL-NEXT: ilvr.w $w1, $w1, $w1
|
||||
; MIPS32R5EL-NEXT: addv.d $w0, $w0, $w1
|
||||
; MIPS32R5EL-NEXT: lbu $1, 165($fp)
|
||||
; MIPS32R5EL-NEXT: sw $1, 20($sp)
|
||||
; MIPS32R5EL-NEXT: lbu $1, 164($fp)
|
||||
; MIPS32R5EL-NEXT: sw $1, 16($sp)
|
||||
; MIPS32R5EL-NEXT: ld.w $w1, 16($sp)
|
||||
; MIPS32R5EL-NEXT: ilvr.w $w1, $w1, $w1
|
||||
; MIPS32R5EL-NEXT: addv.d $w0, $w0, $w1
|
||||
; MIPS32R5EL-NEXT: lbu $1, 169($fp)
|
||||
; MIPS32R5EL-NEXT: sw $1, 36($sp)
|
||||
; MIPS32R5EL-NEXT: lbu $1, 168($fp)
|
||||
; MIPS32R5EL-NEXT: sw $1, 32($sp)
|
||||
; MIPS32R5EL-NEXT: ld.w $w1, 32($sp)
|
||||
; MIPS32R5EL-NEXT: ilvr.w $w1, $w1, $w1
|
||||
; MIPS32R5EL-NEXT: addv.d $w0, $w0, $w1
|
||||
; MIPS32R5EL-NEXT: copy_s.w $1, $w0[0]
|
||||
; MIPS32R5EL-NEXT: copy_s.w $2, $w0[2]
|
||||
; MIPS32R5EL-NEXT: sb $2, 5($sp)
|
||||
; MIPS32R5EL-NEXT: sb $1, 4($sp)
|
||||
; MIPS32R5EL-NEXT: lhu $2, 4($sp)
|
||||
; MIPS32R5EL-NEXT: addiu $sp, $sp, 24
|
||||
; MIPS32R5EL-NEXT: sb $2, 121($sp)
|
||||
; MIPS32R5EL-NEXT: sb $1, 120($sp)
|
||||
; MIPS32R5EL-NEXT: lhu $2, 120($sp)
|
||||
; MIPS32R5EL-NEXT: move $sp, $fp
|
||||
; MIPS32R5EL-NEXT: lw $fp, 140($sp) # 4-byte Folded Reload
|
||||
; MIPS32R5EL-NEXT: addiu $sp, $sp, 144
|
||||
; MIPS32R5EL-NEXT: jr $ra
|
||||
; MIPS32R5EL-NEXT: nop
|
||||
;
|
||||
|
|
|
@ -239,10 +239,7 @@ define <4 x i32> @combine_vec_ashr_trunc_lshr(<4 x i64> %x) {
|
|||
define <4 x i32> @combine_vec_ashr_trunc_ashr(<4 x i64> %x) {
|
||||
; SSE-LABEL: combine_vec_ashr_trunc_ashr:
|
||||
; SSE: # %bb.0:
|
||||
; SSE-NEXT: pshufd {{.*#+}} xmm2 = xmm1[1,1,3,3]
|
||||
; SSE-NEXT: psrad $31, %xmm1
|
||||
; SSE-NEXT: pblendw {{.*#+}} xmm1 = xmm2[0,1],xmm1[2,3],xmm2[4,5],xmm1[6,7]
|
||||
; SSE-NEXT: shufps {{.*#+}} xmm0 = xmm0[1,3],xmm1[0,2]
|
||||
; SSE-NEXT: shufps {{.*#+}} xmm0 = xmm0[1,3],xmm1[1,3]
|
||||
; SSE-NEXT: movaps %xmm0, %xmm2
|
||||
; SSE-NEXT: movaps %xmm0, %xmm1
|
||||
; SSE-NEXT: psrad $2, %xmm1
|
||||
|
|
|
@ -20,10 +20,7 @@ define <4 x i64> @autogen_SD88863() {
|
|||
;
|
||||
; X64-LABEL: autogen_SD88863:
|
||||
; X64: # %bb.0: # %BB
|
||||
; X64-NEXT: vpermilps {{.*#+}} xmm0 = xmm0[2,3,0,1]
|
||||
; X64-NEXT: vinsertf128 $1, %xmm0, %ymm0, %ymm0
|
||||
; X64-NEXT: vxorps %xmm1, %xmm1, %xmm1
|
||||
; X64-NEXT: vblendps {{.*#+}} ymm0 = ymm1[0,1,2,3],ymm0[4,5],ymm1[6,7]
|
||||
; X64-NEXT: vxorps %xmm0, %xmm0, %xmm0
|
||||
; X64-NEXT: movb $1, %al
|
||||
; X64-NEXT: .p2align 4, 0x90
|
||||
; X64-NEXT: .LBB0_1: # %CF
|
||||
|
@ -31,6 +28,9 @@ define <4 x i64> @autogen_SD88863() {
|
|||
; X64-NEXT: testb %al, %al
|
||||
; X64-NEXT: jne .LBB0_1
|
||||
; X64-NEXT: # %bb.2: # %CF240
|
||||
; X64-NEXT: vpermilps {{.*#+}} xmm1 = xmm0[2,3,0,1]
|
||||
; X64-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm1
|
||||
; X64-NEXT: vblendps {{.*#+}} ymm0 = ymm0[0,1,2,3],ymm1[4,5],ymm0[6,7]
|
||||
; X64-NEXT: retq
|
||||
BB:
|
||||
%I26 = insertelement <4 x i64> undef, i64 undef, i32 2
|
||||
|
|
|
@ -917,8 +917,6 @@ define <4 x float> @test_mm_loadh_pi(<4 x float> %a0, x86_mmx* %a1) {
|
|||
; X64-NEXT: movss {{.*#+}} xmm1 = mem[0],zero,zero,zero
|
||||
; X64-NEXT: movss {{.*#+}} xmm2 = mem[0],zero,zero,zero
|
||||
; X64-NEXT: unpcklps {{.*#+}} xmm1 = xmm1[0],xmm2[0],xmm1[1],xmm2[1]
|
||||
; X64-NEXT: xorps %xmm2, %xmm2
|
||||
; X64-NEXT: movlhps {{.*#+}} xmm1 = xmm1[0],xmm2[0]
|
||||
; X64-NEXT: movlhps {{.*#+}} xmm0 = xmm0[0],xmm1[0]
|
||||
; X64-NEXT: retq
|
||||
%ptr = bitcast x86_mmx* %a1 to <2 x float>*
|
||||
|
@ -948,8 +946,6 @@ define <4 x float> @test_mm_loadl_pi(<4 x float> %a0, x86_mmx* %a1) {
|
|||
; X64-NEXT: movss {{.*#+}} xmm1 = mem[0],zero,zero,zero
|
||||
; X64-NEXT: movss {{.*#+}} xmm2 = mem[0],zero,zero,zero
|
||||
; X64-NEXT: unpcklps {{.*#+}} xmm1 = xmm1[0],xmm2[0],xmm1[1],xmm2[1]
|
||||
; X64-NEXT: xorps %xmm2, %xmm2
|
||||
; X64-NEXT: movlhps {{.*#+}} xmm1 = xmm1[0],xmm2[0]
|
||||
; X64-NEXT: shufps {{.*#+}} xmm1 = xmm1[0,1],xmm0[2,3]
|
||||
; X64-NEXT: movaps %xmm1, %xmm0
|
||||
; X64-NEXT: retq
|
||||
|
|
|
@ -379,16 +379,12 @@ entry:
|
|||
define <16 x i8> @t16(<16 x i8> %T0) nounwind readnone {
|
||||
; X86-LABEL: t16:
|
||||
; X86: # %bb.0: # %entry
|
||||
; X86-NEXT: movdqa {{.*#+}} xmm1 = [0,0,0,0,1,1,1,1,0,0,0,0,0,0,0,0]
|
||||
; X86-NEXT: punpcklwd {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1],xmm1[2],xmm0[2],xmm1[3],xmm0[3]
|
||||
; X86-NEXT: movdqa %xmm1, %xmm0
|
||||
; X86-NEXT: pslld $16, %xmm0
|
||||
; X86-NEXT: retl
|
||||
;
|
||||
; X64-LABEL: t16:
|
||||
; X64: # %bb.0: # %entry
|
||||
; X64-NEXT: movdqa {{.*#+}} xmm1 = [0,0,0,0,1,1,1,1,0,0,0,0,0,0,0,0]
|
||||
; X64-NEXT: punpcklwd {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1],xmm1[2],xmm0[2],xmm1[3],xmm0[3]
|
||||
; X64-NEXT: movdqa %xmm1, %xmm0
|
||||
; X64-NEXT: pslld $16, %xmm0
|
||||
; X64-NEXT: retq
|
||||
entry:
|
||||
%tmp8 = shufflevector <16 x i8> <i8 0, i8 0, i8 0, i8 0, i8 1, i8 1, i8 1, i8 1, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0>, <16 x i8> %T0, <16 x i32> < i32 0, i32 1, i32 16, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef , i32 undef >
|
||||
|
|
|
@ -511,10 +511,9 @@ define <8 x float> @expand14(<4 x float> %a) {
|
|||
;
|
||||
; KNL64-LABEL: expand14:
|
||||
; KNL64: # %bb.0:
|
||||
; KNL64-NEXT: vpermilps {{.*#+}} xmm1 = mem[3,3,0,0]
|
||||
; KNL64-NEXT: vpermpd {{.*#+}} ymm1 = ymm1[0,1,1,1]
|
||||
; KNL64-NEXT: vpermilps {{.*#+}} xmm0 = xmm0[0,1,1,3]
|
||||
; KNL64-NEXT: vpermpd {{.*#+}} ymm0 = ymm0[0,0,1,3]
|
||||
; KNL64-NEXT: vxorps %xmm1, %xmm1, %xmm1
|
||||
; KNL64-NEXT: vblendps {{.*#+}} ymm0 = ymm1[0,1],ymm0[2],ymm1[3],ymm0[4],ymm1[5,6,7]
|
||||
; KNL64-NEXT: retq
|
||||
;
|
||||
|
@ -528,10 +527,9 @@ define <8 x float> @expand14(<4 x float> %a) {
|
|||
;
|
||||
; KNL32-LABEL: expand14:
|
||||
; KNL32: # %bb.0:
|
||||
; KNL32-NEXT: vpermilps {{.*#+}} xmm1 = mem[3,3,0,0]
|
||||
; KNL32-NEXT: vpermpd {{.*#+}} ymm1 = ymm1[0,1,1,1]
|
||||
; KNL32-NEXT: vpermilps {{.*#+}} xmm0 = xmm0[0,1,1,3]
|
||||
; KNL32-NEXT: vpermpd {{.*#+}} ymm0 = ymm0[0,0,1,3]
|
||||
; KNL32-NEXT: vxorps %xmm1, %xmm1, %xmm1
|
||||
; KNL32-NEXT: vblendps {{.*#+}} ymm0 = ymm1[0,1],ymm0[2],ymm1[3],ymm0[4],ymm1[5,6,7]
|
||||
; KNL32-NEXT: retl
|
||||
%addV = fadd <4 x float> <float 0.0,float 1.0,float 2.0,float 0.0> , <float 0.0,float 1.0,float 2.0,float 0.0>
|
||||
|
|
|
@ -985,9 +985,8 @@ define internal fastcc <8 x float> @PR34577(<8 x float> %inp0, <8 x float> %inp1
|
|||
;
|
||||
; X32-AVX512-LABEL: PR34577:
|
||||
; X32-AVX512: # %bb.0: # %entry
|
||||
; X32-AVX512-NEXT: vmovaps {{.*#+}} ymm2 = <1,u,u,u,2,u,5,0>
|
||||
; X32-AVX512-NEXT: vpermps %ymm0, %ymm2, %ymm0
|
||||
; X32-AVX512-NEXT: vxorps %xmm2, %xmm2, %xmm2
|
||||
; X32-AVX512-NEXT: vpermpd {{.*#+}} ymm0 = ymm0[0,1,1,3]
|
||||
; X32-AVX512-NEXT: vblendps {{.*#+}} ymm0 = ymm2[0,1,2,3],ymm0[4,5,6,7]
|
||||
; X32-AVX512-NEXT: vmovaps {{.*#+}} ymm2 = <u,u,7,2,u,u,3,2>
|
||||
; X32-AVX512-NEXT: vpermps %ymm1, %ymm2, %ymm1
|
||||
|
@ -1006,9 +1005,8 @@ define internal fastcc <8 x float> @PR34577(<8 x float> %inp0, <8 x float> %inp1
|
|||
;
|
||||
; X64-AVX512-LABEL: PR34577:
|
||||
; X64-AVX512: # %bb.0: # %entry
|
||||
; X64-AVX512-NEXT: vmovaps {{.*#+}} ymm2 = <1,u,u,u,2,u,5,0>
|
||||
; X64-AVX512-NEXT: vpermps %ymm0, %ymm2, %ymm0
|
||||
; X64-AVX512-NEXT: vxorps %xmm2, %xmm2, %xmm2
|
||||
; X64-AVX512-NEXT: vpermpd {{.*#+}} ymm0 = ymm0[0,1,1,3]
|
||||
; X64-AVX512-NEXT: vblendps {{.*#+}} ymm0 = ymm2[0,1,2,3],ymm0[4,5,6,7]
|
||||
; X64-AVX512-NEXT: vmovaps {{.*#+}} ymm2 = <u,u,7,2,u,u,3,2>
|
||||
; X64-AVX512-NEXT: vpermps %ymm1, %ymm2, %ymm1
|
||||
|
|
|
@ -237,8 +237,6 @@ define <4 x float> @insert_mem_lo_v4f32(<2 x float>* %ptr, <4 x float> %b) {
|
|||
; SSE1-NEXT: movss {{.*#+}} xmm1 = mem[0],zero,zero,zero
|
||||
; SSE1-NEXT: movss {{.*#+}} xmm2 = mem[0],zero,zero,zero
|
||||
; SSE1-NEXT: unpcklps {{.*#+}} xmm1 = xmm1[0],xmm2[0],xmm1[1],xmm2[1]
|
||||
; SSE1-NEXT: xorps %xmm2, %xmm2
|
||||
; SSE1-NEXT: movlhps {{.*#+}} xmm1 = xmm1[0],xmm2[0]
|
||||
; SSE1-NEXT: shufps {{.*#+}} xmm1 = xmm1[0,1],xmm0[2,3]
|
||||
; SSE1-NEXT: movaps %xmm1, %xmm0
|
||||
; SSE1-NEXT: retq
|
||||
|
@ -258,8 +256,6 @@ define <4 x float> @insert_mem_hi_v4f32(<2 x float>* %ptr, <4 x float> %b) {
|
|||
; SSE1-NEXT: movss {{.*#+}} xmm1 = mem[0],zero,zero,zero
|
||||
; SSE1-NEXT: movss {{.*#+}} xmm2 = mem[0],zero,zero,zero
|
||||
; SSE1-NEXT: unpcklps {{.*#+}} xmm1 = xmm1[0],xmm2[0],xmm1[1],xmm2[1]
|
||||
; SSE1-NEXT: xorps %xmm2, %xmm2
|
||||
; SSE1-NEXT: movlhps {{.*#+}} xmm1 = xmm1[0],xmm2[0]
|
||||
; SSE1-NEXT: movlhps {{.*#+}} xmm0 = xmm0[0],xmm1[0]
|
||||
; SSE1-NEXT: retq
|
||||
%a = load <2 x float>, <2 x float>* %ptr
|
||||
|
|
|
@ -710,7 +710,7 @@ define i8 @shuf8i1__9_6_1_10_3_7_7_1(i8 %a) {
|
|||
; AVX512F-NEXT: kmovw %edi, %k1
|
||||
; AVX512F-NEXT: vpternlogq $255, %zmm0, %zmm0, %zmm0 {%k1} {z}
|
||||
; AVX512F-NEXT: vmovdqa64 {{.*#+}} zmm1 = [9,6,1,0,3,7,7,1]
|
||||
; AVX512F-NEXT: vmovdqa64 {{.*#+}} zmm2 = [18446744073709551615,18446744073709551615,0,0,18446744073709551615,18446744073709551615,0,0]
|
||||
; AVX512F-NEXT: vmovdqa64 {{.*#+}} zmm2 = [18446744073709551615,18446744073709551615,0,0,0,0,0,0]
|
||||
; AVX512F-NEXT: vpermt2q %zmm0, %zmm1, %zmm2
|
||||
; AVX512F-NEXT: vptestmq %zmm2, %zmm2, %k0
|
||||
; AVX512F-NEXT: kmovw %k0, %eax
|
||||
|
|
|
@ -54,61 +54,19 @@ entry:
|
|||
}
|
||||
|
||||
define <8 x i32> @trunc8i64_8i32_ashr(<8 x i64> %a) {
|
||||
; SSE2-LABEL: trunc8i64_8i32_ashr:
|
||||
; SSE2: # %bb.0: # %entry
|
||||
; SSE2-NEXT: pshufd {{.*#+}} xmm4 = xmm3[1,3,2,3]
|
||||
; SSE2-NEXT: psrad $31, %xmm3
|
||||
; SSE2-NEXT: pshufd {{.*#+}} xmm3 = xmm3[1,3,2,3]
|
||||
; SSE2-NEXT: punpckldq {{.*#+}} xmm4 = xmm4[0],xmm3[0],xmm4[1],xmm3[1]
|
||||
; SSE2-NEXT: pshufd {{.*#+}} xmm3 = xmm1[1,3,2,3]
|
||||
; SSE2-NEXT: psrad $31, %xmm1
|
||||
; SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm1[1,3,2,3]
|
||||
; SSE2-NEXT: punpckldq {{.*#+}} xmm3 = xmm3[0],xmm1[0],xmm3[1],xmm1[1]
|
||||
; SSE2-NEXT: shufps {{.*#+}} xmm0 = xmm0[1,3],xmm3[0,2]
|
||||
; SSE2-NEXT: shufps {{.*#+}} xmm2 = xmm2[1,3],xmm4[0,2]
|
||||
; SSE2-NEXT: movaps %xmm2, %xmm1
|
||||
; SSE2-NEXT: retq
|
||||
;
|
||||
; SSSE3-LABEL: trunc8i64_8i32_ashr:
|
||||
; SSSE3: # %bb.0: # %entry
|
||||
; SSSE3-NEXT: pshufd {{.*#+}} xmm4 = xmm3[1,3,2,3]
|
||||
; SSSE3-NEXT: psrad $31, %xmm3
|
||||
; SSSE3-NEXT: pshufd {{.*#+}} xmm3 = xmm3[1,3,2,3]
|
||||
; SSSE3-NEXT: punpckldq {{.*#+}} xmm4 = xmm4[0],xmm3[0],xmm4[1],xmm3[1]
|
||||
; SSSE3-NEXT: pshufd {{.*#+}} xmm3 = xmm1[1,3,2,3]
|
||||
; SSSE3-NEXT: psrad $31, %xmm1
|
||||
; SSSE3-NEXT: pshufd {{.*#+}} xmm1 = xmm1[1,3,2,3]
|
||||
; SSSE3-NEXT: punpckldq {{.*#+}} xmm3 = xmm3[0],xmm1[0],xmm3[1],xmm1[1]
|
||||
; SSSE3-NEXT: shufps {{.*#+}} xmm0 = xmm0[1,3],xmm3[0,2]
|
||||
; SSSE3-NEXT: shufps {{.*#+}} xmm2 = xmm2[1,3],xmm4[0,2]
|
||||
; SSSE3-NEXT: movaps %xmm2, %xmm1
|
||||
; SSSE3-NEXT: retq
|
||||
;
|
||||
; SSE41-LABEL: trunc8i64_8i32_ashr:
|
||||
; SSE41: # %bb.0: # %entry
|
||||
; SSE41-NEXT: pshufd {{.*#+}} xmm4 = xmm3[1,1,3,3]
|
||||
; SSE41-NEXT: psrad $31, %xmm3
|
||||
; SSE41-NEXT: pblendw {{.*#+}} xmm3 = xmm4[0,1],xmm3[2,3],xmm4[4,5],xmm3[6,7]
|
||||
; SSE41-NEXT: pshufd {{.*#+}} xmm4 = xmm1[1,1,3,3]
|
||||
; SSE41-NEXT: psrad $31, %xmm1
|
||||
; SSE41-NEXT: pblendw {{.*#+}} xmm1 = xmm4[0,1],xmm1[2,3],xmm4[4,5],xmm1[6,7]
|
||||
; SSE41-NEXT: shufps {{.*#+}} xmm0 = xmm0[1,3],xmm1[0,2]
|
||||
; SSE41-NEXT: shufps {{.*#+}} xmm2 = xmm2[1,3],xmm3[0,2]
|
||||
; SSE41-NEXT: movaps %xmm2, %xmm1
|
||||
; SSE41-NEXT: retq
|
||||
; SSE-LABEL: trunc8i64_8i32_ashr:
|
||||
; SSE: # %bb.0: # %entry
|
||||
; SSE-NEXT: shufps {{.*#+}} xmm0 = xmm0[1,3],xmm1[1,3]
|
||||
; SSE-NEXT: shufps {{.*#+}} xmm2 = xmm2[1,3],xmm3[1,3]
|
||||
; SSE-NEXT: movaps %xmm2, %xmm1
|
||||
; SSE-NEXT: retq
|
||||
;
|
||||
; AVX1-LABEL: trunc8i64_8i32_ashr:
|
||||
; AVX1: # %bb.0: # %entry
|
||||
; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm2
|
||||
; AVX1-NEXT: vpsrad $31, %xmm2, %xmm3
|
||||
; AVX1-NEXT: vpshufd {{.*#+}} xmm2 = xmm2[1,1,3,3]
|
||||
; AVX1-NEXT: vpblendw {{.*#+}} xmm2 = xmm2[0,1],xmm3[2,3],xmm2[4,5],xmm3[6,7]
|
||||
; AVX1-NEXT: vextractf128 $1, %ymm1, %xmm3
|
||||
; AVX1-NEXT: vpsrad $31, %xmm3, %xmm4
|
||||
; AVX1-NEXT: vpshufd {{.*#+}} xmm3 = xmm3[1,1,3,3]
|
||||
; AVX1-NEXT: vpblendw {{.*#+}} xmm3 = xmm3[0,1],xmm4[2,3],xmm3[4,5],xmm4[6,7]
|
||||
; AVX1-NEXT: vshufps {{.*#+}} xmm1 = xmm1[1,3],xmm3[0,2]
|
||||
; AVX1-NEXT: vshufps {{.*#+}} xmm0 = xmm0[1,3],xmm2[0,2]
|
||||
; AVX1-NEXT: vshufps {{.*#+}} xmm1 = xmm1[1,3],xmm3[1,3]
|
||||
; AVX1-NEXT: vshufps {{.*#+}} xmm0 = xmm0[1,3],xmm2[1,3]
|
||||
; AVX1-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0
|
||||
; AVX1-NEXT: retq
|
||||
;
|
||||
|
|
Loading…
Reference in New Issue