(a) Improve the extract element code: there's no need to do gymnastics with
    rotates into the preferred slot if a shuffle will do the same thing.
(b) Rename a couple of SPUISD pseudo-instructions for readability and better
    semantic correspondence.
(c) Fix i64 sign/any/zero extension lowering.

llvm-svn: 59965
This commit is contained in:
Scott Michel 2008-11-24 17:11:17 +00:00
parent f3bfcf9748
commit efc8c7a292
7 changed files with 84 additions and 112 deletions

View File

@ -261,7 +261,7 @@ namespace {
void printROTNeg7Imm(const MachineInstr *MI, unsigned OpNo) { void printROTNeg7Imm(const MachineInstr *MI, unsigned OpNo) {
if (MI->getOperand(OpNo).isImm()) { if (MI->getOperand(OpNo).isImm()) {
int value = (int) MI->getOperand(OpNo).getImm(); int value = (int) MI->getOperand(OpNo).getImm();
assert((value >= 0 && value < 32) assert((value >= 0 && value <= 32)
&& "Invalid negated immediate rotate 7-bit argument"); && "Invalid negated immediate rotate 7-bit argument");
O << -value; O << -value;
} else { } else {

View File

@ -8,6 +8,7 @@ Department in The Aerospace Corporation:
- Mark Thomas (floating point instructions) - Mark Thomas (floating point instructions)
- Michael AuYeung (intrinsics) - Michael AuYeung (intrinsics)
- Chandler Carruth (LLVM expertise) - Chandler Carruth (LLVM expertise)
- Nehal Desai (debugging, RoadRunner SPU expertise)
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
IMPLIED, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF IMPLIED, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF
@ -33,6 +34,11 @@ to add 'spu' to configure's --enable-targets option, e.g.:
--------------------------------------------------------------------------- ---------------------------------------------------------------------------
The unofficially official status page (because it's not easy to get an
officially blessed external web page from either IBM Austin or Aerosapce):
http://sites.google.com/site/llvmcellspu/
TODO: TODO:
* Finish branch instructions, branch prediction * Finish branch instructions, branch prediction

View File

@ -425,9 +425,9 @@ SPUTargetLowering::getTargetNodeName(unsigned Opcode) const
node_names[(unsigned) SPUISD::SHUFFLE_MASK] = "SPUISD::SHUFFLE_MASK"; node_names[(unsigned) SPUISD::SHUFFLE_MASK] = "SPUISD::SHUFFLE_MASK";
node_names[(unsigned) SPUISD::CNTB] = "SPUISD::CNTB"; node_names[(unsigned) SPUISD::CNTB] = "SPUISD::CNTB";
node_names[(unsigned) SPUISD::PROMOTE_SCALAR] = "SPUISD::PROMOTE_SCALAR"; node_names[(unsigned) SPUISD::PROMOTE_SCALAR] = "SPUISD::PROMOTE_SCALAR";
node_names[(unsigned) SPUISD::EXTRACT_ELT0] = "SPUISD::EXTRACT_ELT0"; node_names[(unsigned) SPUISD::VEC2PREFSLOT] = "SPUISD::VEC2PREFSLOT";
node_names[(unsigned) SPUISD::EXTRACT_ELT0_CHAINED] node_names[(unsigned) SPUISD::VEC2PREFSLOT_CHAINED]
= "SPUISD::EXTRACT_ELT0_CHAINED"; = "SPUISD::VEC2PREFSLOT_CHAINED";
node_names[(unsigned) SPUISD::EXTRACT_I1_ZEXT] = "SPUISD::EXTRACT_I1_ZEXT"; node_names[(unsigned) SPUISD::EXTRACT_I1_ZEXT] = "SPUISD::EXTRACT_I1_ZEXT";
node_names[(unsigned) SPUISD::EXTRACT_I1_SEXT] = "SPUISD::EXTRACT_I1_SEXT"; node_names[(unsigned) SPUISD::EXTRACT_I1_SEXT] = "SPUISD::EXTRACT_I1_SEXT";
node_names[(unsigned) SPUISD::EXTRACT_I8_ZEXT] = "SPUISD::EXTRACT_I8_ZEXT"; node_names[(unsigned) SPUISD::EXTRACT_I8_ZEXT] = "SPUISD::EXTRACT_I8_ZEXT";
@ -447,8 +447,6 @@ SPUTargetLowering::getTargetNodeName(unsigned Opcode) const
"SPUISD::ROTQUAD_RZ_BYTES"; "SPUISD::ROTQUAD_RZ_BYTES";
node_names[(unsigned) SPUISD::ROTQUAD_RZ_BITS] = node_names[(unsigned) SPUISD::ROTQUAD_RZ_BITS] =
"SPUISD::ROTQUAD_RZ_BITS"; "SPUISD::ROTQUAD_RZ_BITS";
node_names[(unsigned) SPUISD::ROTBYTES_RIGHT_S] =
"SPUISD::ROTBYTES_RIGHT_S";
node_names[(unsigned) SPUISD::ROTBYTES_LEFT] = "SPUISD::ROTBYTES_LEFT"; node_names[(unsigned) SPUISD::ROTBYTES_LEFT] = "SPUISD::ROTBYTES_LEFT";
node_names[(unsigned) SPUISD::ROTBYTES_LEFT_CHAINED] = node_names[(unsigned) SPUISD::ROTBYTES_LEFT_CHAINED] =
"SPUISD::ROTBYTES_LEFT_CHAINED"; "SPUISD::ROTBYTES_LEFT_CHAINED";
@ -647,7 +645,7 @@ LowerLOAD(SDValue Op, SelectionDAG &DAG, const SPUSubtarget *ST) {
Ops[0] = the_chain; Ops[0] = the_chain;
Ops[1] = DAG.getNode(ISD::BIT_CONVERT, vecVT, result); Ops[1] = DAG.getNode(ISD::BIT_CONVERT, vecVT, result);
scalarvts = DAG.getVTList((OpVT == VT ? VT : OpVT), MVT::Other); scalarvts = DAG.getVTList((OpVT == VT ? VT : OpVT), MVT::Other);
result = DAG.getNode(SPUISD::EXTRACT_ELT0_CHAINED, scalarvts, Ops, 2); result = DAG.getNode(SPUISD::VEC2PREFSLOT_CHAINED, scalarvts, Ops, 2);
the_chain = result.getValue(1); the_chain = result.getValue(1);
} else { } else {
// Handle the sign and zero-extending loads for i1 and i8: // Handle the sign and zero-extending loads for i1 and i8:
@ -889,7 +887,7 @@ LowerConstant(SDValue Op, SelectionDAG &DAG) {
if (VT == MVT::i64) { if (VT == MVT::i64) {
SDValue T = DAG.getConstant(CN->getZExtValue(), MVT::i64); SDValue T = DAG.getConstant(CN->getZExtValue(), MVT::i64);
return DAG.getNode(SPUISD::EXTRACT_ELT0, VT, return DAG.getNode(SPUISD::VEC2PREFSLOT, VT,
DAG.getNode(ISD::BUILD_VECTOR, MVT::v2i64, T, T)); DAG.getNode(ISD::BUILD_VECTOR, MVT::v2i64, T, T));
} else { } else {
cerr << "LowerConstant: unhandled constant type " cerr << "LowerConstant: unhandled constant type "
@ -1603,7 +1601,7 @@ static SDValue LowerBUILD_VECTOR(SDValue Op, SelectionDAG &DAG) {
case MVT::v2f64: { case MVT::v2f64: {
uint64_t f64val = SplatBits; uint64_t f64val = SplatBits;
assert(SplatSize == 8 assert(SplatSize == 8
&& "LowerBUILD_VECTOR: 64-bit float vector element: unexpected size."); && "LowerBUILD_VECTOR: 64-bit float vector size > 8 bytes.");
// NOTE: pretend the constant is an integer. LLVM won't load FP constants // NOTE: pretend the constant is an integer. LLVM won't load FP constants
SDValue T = DAG.getConstant(f64val, MVT::i64); SDValue T = DAG.getConstant(f64val, MVT::i64);
return DAG.getNode(ISD::BIT_CONVERT, MVT::v2f64, return DAG.getNode(ISD::BIT_CONVERT, MVT::v2f64,
@ -1656,8 +1654,8 @@ static SDValue LowerBUILD_VECTOR(SDValue Op, SelectionDAG &DAG) {
// specialized masks to replace any and all 0's, 0xff's and 0x80's. // specialized masks to replace any and all 0's, 0xff's and 0x80's.
// Detect if the upper or lower half is a special shuffle mask pattern: // Detect if the upper or lower half is a special shuffle mask pattern:
upper_special = (upper == 0 || upper == 0xffffffff || upper == 0x80000000); upper_special = (upper == 0||upper == 0xffffffff||upper == 0x80000000);
lower_special = (lower == 0 || lower == 0xffffffff || lower == 0x80000000); lower_special = (lower == 0||lower == 0xffffffff||lower == 0x80000000);
// Create lower vector if not a special pattern // Create lower vector if not a special pattern
if (!lower_special) { if (!lower_special) {
@ -2077,7 +2075,7 @@ static SDValue LowerEXTRACT_VECTOR_ELT(SDValue Op, SelectionDAG &DAG) {
if (EltNo == 0 && (VT == MVT::i32 || VT == MVT::i64)) { if (EltNo == 0 && (VT == MVT::i32 || VT == MVT::i64)) {
// i32 and i64: Element 0 is the preferred slot // i32 and i64: Element 0 is the preferred slot
return DAG.getNode(SPUISD::EXTRACT_ELT0, VT, N); return DAG.getNode(SPUISD::VEC2PREFSLOT, VT, N);
} }
// Need to generate shuffle mask and extract: // Need to generate shuffle mask and extract:
@ -2140,7 +2138,7 @@ static SDValue LowerEXTRACT_VECTOR_ELT(SDValue Op, SelectionDAG &DAG) {
&ShufMask[0], &ShufMask[0],
sizeof(ShufMask) / sizeof(ShufMask[0])); sizeof(ShufMask) / sizeof(ShufMask[0]));
retval = DAG.getNode(SPUISD::EXTRACT_ELT0, VT, retval = DAG.getNode(SPUISD::VEC2PREFSLOT, VT,
DAG.getNode(SPUISD::SHUFB, N.getValueType(), DAG.getNode(SPUISD::SHUFB, N.getValueType(),
N, N, ShufMaskVec)); N, N, ShufMaskVec));
} else { } else {
@ -2158,60 +2156,20 @@ static SDValue LowerEXTRACT_VECTOR_ELT(SDValue Op, SelectionDAG &DAG) {
// Scale the index to a bit/byte shift quantity // Scale the index to a bit/byte shift quantity
APInt scaleFactor = APInt scaleFactor =
APInt(32, uint64_t(16 / N.getValueType().getVectorNumElements()), false); APInt(32, uint64_t(16 / N.getValueType().getVectorNumElements()), false);
unsigned scaleShift = scaleFactor.logBase2();
SDValue vecShift; SDValue vecShift;
switch (VT.getSimpleVT()) {
default:
cerr << "LowerEXTRACT_VECTOR_ELT(varable): Unhandled vector type\n";
abort();
/*NOTREACHED*/
case MVT::i8: {
// Don't need to scale, but we do need to correct for where bytes go in
// slot 0:
SDValue prefSlot = DAG.getNode(ISD::SUB, MVT::i32,
Elt, DAG.getConstant(3, MVT::i32));
SDValue corrected = DAG.getNode(ISD::ADD, MVT::i32, prefSlot,
DAG.getConstant(16, MVT::i32));
SDValue shiftAmt = DAG.getNode(ISD::SELECT_CC, MVT::i32, if (scaleShift > 0) {
prefSlot, DAG.getConstant(0, MVT::i32), // Scale the shift factor:
prefSlot, // trueval
corrected, // falseval
DAG.getCondCode(ISD::SETGT));
vecShift = DAG.getNode(SPUISD::ROTBYTES_LEFT, VecVT, N, shiftAmt);
break;
}
case MVT::i16: {
// Scale the index to bytes, subtract for preferred slot:
Elt = DAG.getNode(ISD::SHL, MVT::i32, Elt, Elt = DAG.getNode(ISD::SHL, MVT::i32, Elt,
DAG.getConstant(scaleFactor.logBase2(), MVT::i32)); DAG.getConstant(scaleShift, MVT::i32));
SDValue prefSlot = DAG.getNode(ISD::SUB, MVT::i32,
Elt, DAG.getConstant(2, MVT::i32));
SDValue corrected = DAG.getNode(ISD::ADD, MVT::i32, prefSlot,
DAG.getConstant(16, MVT::i32));
SDValue shiftAmt = DAG.getNode(ISD::SELECT_CC, MVT::i32,
prefSlot, DAG.getConstant(0, MVT::i32),
prefSlot, // trueval
corrected, // falseval
DAG.getCondCode(ISD::SETGT));
vecShift = DAG.getNode(SPUISD::ROTBYTES_LEFT, VecVT, N, shiftAmt);
break;
}
case MVT::i32:
case MVT::f32:
case MVT::i64:
case MVT::f64:
// Simple left shift to slot 0
Elt = DAG.getNode(ISD::SHL, MVT::i32, Elt,
DAG.getConstant(scaleFactor.logBase2(), MVT::i32));
vecShift = DAG.getNode(SPUISD::SHLQUAD_L_BYTES, VecVT, N, Elt);
break;
} }
// Replicate slot 0 across the entire vector (for consistency with the vecShift = DAG.getNode(SPUISD::SHLQUAD_L_BYTES, VecVT, N, Elt);
// notion of a unified register set)
// Replicate the bytes starting at byte 0 across the entire vector (for
// consistency with the notion of a unified register set)
SDValue replicate; SDValue replicate;
switch (VT.getSimpleVT()) { switch (VT.getSimpleVT()) {
@ -2220,13 +2178,13 @@ static SDValue LowerEXTRACT_VECTOR_ELT(SDValue Op, SelectionDAG &DAG) {
abort(); abort();
/*NOTREACHED*/ /*NOTREACHED*/
case MVT::i8: { case MVT::i8: {
SDValue factor = DAG.getConstant(0x03030303, MVT::i32); SDValue factor = DAG.getConstant(0x00000000, MVT::i32);
replicate = DAG.getNode(ISD::BUILD_VECTOR, MVT::v4i32, factor, factor, replicate = DAG.getNode(ISD::BUILD_VECTOR, MVT::v4i32, factor, factor,
factor, factor); factor, factor);
break; break;
} }
case MVT::i16: { case MVT::i16: {
SDValue factor = DAG.getConstant(0x02030203, MVT::i32); SDValue factor = DAG.getConstant(0x00010001, MVT::i32);
replicate = DAG.getNode(ISD::BUILD_VECTOR, MVT::v4i32, factor, factor, replicate = DAG.getNode(ISD::BUILD_VECTOR, MVT::v4i32, factor, factor,
factor, factor); factor, factor);
break; break;
@ -2248,7 +2206,7 @@ static SDValue LowerEXTRACT_VECTOR_ELT(SDValue Op, SelectionDAG &DAG) {
} }
} }
retval = DAG.getNode(SPUISD::EXTRACT_ELT0, VT, retval = DAG.getNode(SPUISD::VEC2PREFSLOT, VT,
DAG.getNode(SPUISD::SHUFB, VecVT, vecShift, vecShift, replicate)); DAG.getNode(SPUISD::SHUFB, VecVT, vecShift, vecShift, replicate));
} }
@ -2400,19 +2358,34 @@ static SDValue LowerI64Math(SDValue Op, SelectionDAG &DAG, unsigned Opc)
assert(Op0VT == MVT::i32 assert(Op0VT == MVT::i32
&& "CellSPU: Zero/sign extending something other than i32"); && "CellSPU: Zero/sign extending something other than i32");
DEBUG(cerr << "CellSPU: LowerI64Math custom lowering zero/sign/any extend\n");
unsigned NewOpc = (Opc == ISD::SIGN_EXTEND DEBUG(cerr << "CellSPU.LowerI64Math: lowering zero/sign/any extend\n");
? SPUISD::ROTBYTES_RIGHT_S
: SPUISD::ROTQUAD_RZ_BYTES);
SDValue PromoteScalar = SDValue PromoteScalar =
DAG.getNode(SPUISD::PROMOTE_SCALAR, Op0VecVT, Op0); DAG.getNode(SPUISD::PROMOTE_SCALAR, Op0VecVT, Op0);
SDValue RotQuad =
DAG.getNode(SPUISD::ROTQUAD_RZ_BYTES, Op0VecVT,
PromoteScalar, DAG.getConstant(4, MVT::i32));
return DAG.getNode(SPUISD::EXTRACT_ELT0, VT, if (Opc != ISD::SIGN_EXTEND) {
DAG.getNode(ISD::BIT_CONVERT, VecVT, return DAG.getNode(SPUISD::VEC2PREFSLOT, VT,
DAG.getNode(NewOpc, Op0VecVT, DAG.getNode(ISD::BIT_CONVERT, VecVT, RotQuad));
PromoteScalar, } else {
DAG.getConstant(4, MVT::i32)))); // SPU has no "rotate quadword and replicate bit 0" (i.e. rotate/shift
// right and propagate the sign bit) instruction.
SDValue SignQuad =
DAG.getNode(SPUISD::VEC_SRA, Op0VecVT,
PromoteScalar, DAG.getConstant(32, MVT::i32));
SDValue SelMask =
DAG.getNode(SPUISD::SELECT_MASK, Op0VecVT,
DAG.getConstant(0xf0f0, MVT::i16));
SDValue CombineQuad =
DAG.getNode(SPUISD::SELB, Op0VecVT,
SignQuad, RotQuad, SelMask);
return DAG.getNode(SPUISD::VEC2PREFSLOT, VT,
DAG.getNode(ISD::BIT_CONVERT, VecVT, CombineQuad));
}
} }
case ISD::ADD: { case ISD::ADD: {
@ -2439,7 +2412,7 @@ static SDValue LowerI64Math(SDValue Op, SelectionDAG &DAG, unsigned Opc)
DAG.getNode(ISD::BUILD_VECTOR, MVT::v4i32, DAG.getNode(ISD::BUILD_VECTOR, MVT::v4i32,
&ShufBytes[0], ShufBytes.size())); &ShufBytes[0], ShufBytes.size()));
return DAG.getNode(SPUISD::EXTRACT_ELT0, MVT::i64, return DAG.getNode(SPUISD::VEC2PREFSLOT, MVT::i64,
DAG.getNode(SPUISD::ADD_EXTENDED, MVT::v2i64, DAG.getNode(SPUISD::ADD_EXTENDED, MVT::v2i64,
Op0, Op1, ShiftedCarry)); Op0, Op1, ShiftedCarry));
} }
@ -2468,7 +2441,7 @@ static SDValue LowerI64Math(SDValue Op, SelectionDAG &DAG, unsigned Opc)
DAG.getNode(ISD::BUILD_VECTOR, MVT::v4i32, DAG.getNode(ISD::BUILD_VECTOR, MVT::v4i32,
&ShufBytes[0], ShufBytes.size())); &ShufBytes[0], ShufBytes.size()));
return DAG.getNode(SPUISD::EXTRACT_ELT0, MVT::i64, return DAG.getNode(SPUISD::VEC2PREFSLOT, MVT::i64,
DAG.getNode(SPUISD::SUB_EXTENDED, MVT::v2i64, DAG.getNode(SPUISD::SUB_EXTENDED, MVT::v2i64,
Op0, Op1, ShiftedBorrow)); Op0, Op1, ShiftedBorrow));
} }
@ -2492,7 +2465,7 @@ static SDValue LowerI64Math(SDValue Op, SelectionDAG &DAG, unsigned Opc)
ShiftAmt, ShiftAmt,
DAG.getConstant(7, ShiftAmtVT)); DAG.getConstant(7, ShiftAmtVT));
return DAG.getNode(SPUISD::EXTRACT_ELT0, VT, return DAG.getNode(SPUISD::VEC2PREFSLOT, VT,
DAG.getNode(SPUISD::SHLQUAD_L_BITS, VecVT, DAG.getNode(SPUISD::SHLQUAD_L_BITS, VecVT,
DAG.getNode(SPUISD::SHLQUAD_L_BYTES, VecVT, DAG.getNode(SPUISD::SHLQUAD_L_BYTES, VecVT,
MaskLower, ShiftAmtBytes), MaskLower, ShiftAmtBytes),
@ -2532,7 +2505,7 @@ static SDValue LowerI64Math(SDValue Op, SelectionDAG &DAG, unsigned Opc)
} }
SDValue UpperHalfSign = SDValue UpperHalfSign =
DAG.getNode(SPUISD::EXTRACT_ELT0, MVT::i32, DAG.getNode(SPUISD::VEC2PREFSLOT, MVT::i32,
DAG.getNode(ISD::BIT_CONVERT, MVT::v4i32, DAG.getNode(ISD::BIT_CONVERT, MVT::v4i32,
DAG.getNode(SPUISD::VEC_SRA, MVT::v2i64, DAG.getNode(SPUISD::VEC_SRA, MVT::v2i64,
Op0, DAG.getConstant(31, MVT::i32)))); Op0, DAG.getConstant(31, MVT::i32))));
@ -2551,7 +2524,7 @@ static SDValue LowerI64Math(SDValue Op, SelectionDAG &DAG, unsigned Opc)
DAG.getNode(SPUISD::ROTBYTES_LEFT, MVT::v2i64, DAG.getNode(SPUISD::ROTBYTES_LEFT, MVT::v2i64,
RotateLeftBytes, ShiftAmt); RotateLeftBytes, ShiftAmt);
return DAG.getNode(SPUISD::EXTRACT_ELT0, MVT::i64, return DAG.getNode(SPUISD::VEC2PREFSLOT, MVT::i64,
RotateLeftBits); RotateLeftBits);
} }
} }
@ -2968,7 +2941,7 @@ SPUTargetLowering::PerformDAGCombine(SDNode *N, DAGCombinerInfo &DCI) const
case ISD::SIGN_EXTEND: case ISD::SIGN_EXTEND:
case ISD::ZERO_EXTEND: case ISD::ZERO_EXTEND:
case ISD::ANY_EXTEND: { case ISD::ANY_EXTEND: {
if (Op0.getOpcode() == SPUISD::EXTRACT_ELT0 && if (Op0.getOpcode() == SPUISD::VEC2PREFSLOT &&
N->getValueType(0) == Op0.getValueType()) { N->getValueType(0) == Op0.getValueType()) {
// (any_extend (SPUextract_elt0 <arg>)) -> // (any_extend (SPUextract_elt0 <arg>)) ->
// (SPUextract_elt0 <arg>) // (SPUextract_elt0 <arg>)
@ -3031,7 +3004,7 @@ SPUTargetLowering::PerformDAGCombine(SDNode *N, DAGCombinerInfo &DCI) const
// <arg> // <arg>
// but only if the SPUpromote_scalar and <arg> types match. // but only if the SPUpromote_scalar and <arg> types match.
SDValue Op00 = Op0.getOperand(0); SDValue Op00 = Op0.getOperand(0);
if (Op00.getOpcode() == SPUISD::EXTRACT_ELT0) { if (Op00.getOpcode() == SPUISD::VEC2PREFSLOT) {
SDValue Op000 = Op00.getOperand(0); SDValue Op000 = Op00.getOperand(0);
if (Op000.getValueType() == N->getValueType(0)) { if (Op000.getValueType() == N->getValueType(0)) {
Result = Op000; Result = Op000;
@ -3039,7 +3012,7 @@ SPUTargetLowering::PerformDAGCombine(SDNode *N, DAGCombinerInfo &DCI) const
} }
break; break;
} }
case SPUISD::EXTRACT_ELT0: { case SPUISD::VEC2PREFSLOT: {
// (SPUpromote_scalar (SPUextract_elt0 <arg>)) -> // (SPUpromote_scalar (SPUextract_elt0 <arg>)) ->
// <arg> // <arg>
Result = Op0.getOperand(0); Result = Op0.getOperand(0);
@ -3146,8 +3119,8 @@ SPUTargetLowering::computeMaskedBitsForTargetNode(const SDValue Op,
} }
case SPUISD::LDRESULT: case SPUISD::LDRESULT:
case SPUISD::EXTRACT_ELT0: case SPUISD::VEC2PREFSLOT:
case SPUISD::EXTRACT_ELT0_CHAINED: { case SPUISD::VEC2PREFSLOT_CHAINED: {
MVT OpVT = Op.getValueType(); MVT OpVT = Op.getValueType();
unsigned OpVTBits = OpVT.getSizeInBits(); unsigned OpVTBits = OpVT.getSizeInBits();
uint64_t InMask = OpVT.getIntegerVTBitMask(); uint64_t InMask = OpVT.getIntegerVTBitMask();
@ -3174,7 +3147,6 @@ SPUTargetLowering::computeMaskedBitsForTargetNode(const SDValue Op,
case SPUISD::VEC_ROTR: case SPUISD::VEC_ROTR:
case SPUISD::ROTQUAD_RZ_BYTES: case SPUISD::ROTQUAD_RZ_BYTES:
case SPUISD::ROTQUAD_RZ_BITS: case SPUISD::ROTQUAD_RZ_BITS:
case SPUISD::ROTBYTES_RIGHT_S:
case SPUISD::ROTBYTES_LEFT: case SPUISD::ROTBYTES_LEFT:
case SPUISD::ROTBYTES_LEFT_CHAINED: case SPUISD::ROTBYTES_LEFT_CHAINED:
case SPUISD::SELECT_MASK: case SPUISD::SELECT_MASK:

View File

@ -40,8 +40,8 @@ namespace llvm {
SHUFFLE_MASK, ///< Shuffle mask SHUFFLE_MASK, ///< Shuffle mask
CNTB, ///< Count leading ones in bytes CNTB, ///< Count leading ones in bytes
PROMOTE_SCALAR, ///< Promote scalar->vector PROMOTE_SCALAR, ///< Promote scalar->vector
EXTRACT_ELT0, ///< Extract element 0 VEC2PREFSLOT, ///< Extract element 0
EXTRACT_ELT0_CHAINED, ///< Extract element 0, with chain VEC2PREFSLOT_CHAINED, ///< Extract element 0, with chain
EXTRACT_I1_ZEXT, ///< Extract element 0 as i1, zero extend EXTRACT_I1_ZEXT, ///< Extract element 0 as i1, zero extend
EXTRACT_I1_SEXT, ///< Extract element 0 as i1, sign extend EXTRACT_I1_SEXT, ///< Extract element 0 as i1, sign extend
EXTRACT_I8_ZEXT, ///< Extract element 0 as i8, zero extend EXTRACT_I8_ZEXT, ///< Extract element 0 as i8, zero extend
@ -59,7 +59,6 @@ namespace llvm {
VEC_ROTR, ///< Vector rotate right VEC_ROTR, ///< Vector rotate right
ROTQUAD_RZ_BYTES, ///< Rotate quad right, by bytes, zero fill ROTQUAD_RZ_BYTES, ///< Rotate quad right, by bytes, zero fill
ROTQUAD_RZ_BITS, ///< Rotate quad right, by bits, zero fill ROTQUAD_RZ_BITS, ///< Rotate quad right, by bits, zero fill
ROTBYTES_RIGHT_S, ///< Vector rotate right, by bytes, sign fill
ROTBYTES_LEFT, ///< Rotate bytes (loads -> ROTQBYI) ROTBYTES_LEFT, ///< Rotate bytes (loads -> ROTQBYI)
ROTBYTES_LEFT_CHAINED, ///< Rotate bytes (loads -> ROTQBYI), with chain ROTBYTES_LEFT_CHAINED, ///< Rotate bytes (loads -> ROTQBYI), with chain
ROTBYTES_LEFT_BITS, ///< Rotate bytes left by bit shift count ROTBYTES_LEFT_BITS, ///< Rotate bytes left by bit shift count

View File

@ -1286,40 +1286,40 @@ def : Pat<(v2f64 (SPUpromote_scalar R64FP:$rA)),
// ORi*_v*: Used to extract vector element 0 (the preferred slot) // ORi*_v*: Used to extract vector element 0 (the preferred slot)
def : Pat<(SPUextract_elt0 (v16i8 VECREG:$rA)), def : Pat<(SPUvec2prefslot (v16i8 VECREG:$rA)),
(ORi8_v16i8 VECREG:$rA, VECREG:$rA)>; (ORi8_v16i8 VECREG:$rA, VECREG:$rA)>;
def : Pat<(SPUextract_elt0_chained (v16i8 VECREG:$rA)), def : Pat<(SPUvec2prefslot_chained (v16i8 VECREG:$rA)),
(ORi8_v16i8 VECREG:$rA, VECREG:$rA)>; (ORi8_v16i8 VECREG:$rA, VECREG:$rA)>;
def : Pat<(SPUextract_elt0 (v8i16 VECREG:$rA)), def : Pat<(SPUvec2prefslot (v8i16 VECREG:$rA)),
(ORi16_v8i16 VECREG:$rA, VECREG:$rA)>; (ORi16_v8i16 VECREG:$rA, VECREG:$rA)>;
def : Pat<(SPUextract_elt0_chained (v8i16 VECREG:$rA)), def : Pat<(SPUvec2prefslot_chained (v8i16 VECREG:$rA)),
(ORi16_v8i16 VECREG:$rA, VECREG:$rA)>; (ORi16_v8i16 VECREG:$rA, VECREG:$rA)>;
def : Pat<(SPUextract_elt0 (v4i32 VECREG:$rA)), def : Pat<(SPUvec2prefslot (v4i32 VECREG:$rA)),
(ORi32_v4i32 VECREG:$rA, VECREG:$rA)>; (ORi32_v4i32 VECREG:$rA, VECREG:$rA)>;
def : Pat<(SPUextract_elt0_chained (v4i32 VECREG:$rA)), def : Pat<(SPUvec2prefslot_chained (v4i32 VECREG:$rA)),
(ORi32_v4i32 VECREG:$rA, VECREG:$rA)>; (ORi32_v4i32 VECREG:$rA, VECREG:$rA)>;
def : Pat<(SPUextract_elt0 (v2i64 VECREG:$rA)), def : Pat<(SPUvec2prefslot (v2i64 VECREG:$rA)),
(ORi64_v2i64 VECREG:$rA, VECREG:$rA)>; (ORi64_v2i64 VECREG:$rA, VECREG:$rA)>;
def : Pat<(SPUextract_elt0_chained (v2i64 VECREG:$rA)), def : Pat<(SPUvec2prefslot_chained (v2i64 VECREG:$rA)),
(ORi64_v2i64 VECREG:$rA, VECREG:$rA)>; (ORi64_v2i64 VECREG:$rA, VECREG:$rA)>;
def : Pat<(SPUextract_elt0 (v4f32 VECREG:$rA)), def : Pat<(SPUvec2prefslot (v4f32 VECREG:$rA)),
(ORf32_v4f32 VECREG:$rA, VECREG:$rA)>; (ORf32_v4f32 VECREG:$rA, VECREG:$rA)>;
def : Pat<(SPUextract_elt0_chained (v4f32 VECREG:$rA)), def : Pat<(SPUvec2prefslot_chained (v4f32 VECREG:$rA)),
(ORf32_v4f32 VECREG:$rA, VECREG:$rA)>; (ORf32_v4f32 VECREG:$rA, VECREG:$rA)>;
def : Pat<(SPUextract_elt0 (v2f64 VECREG:$rA)), def : Pat<(SPUvec2prefslot (v2f64 VECREG:$rA)),
(ORf64_v2f64 VECREG:$rA, VECREG:$rA)>; (ORf64_v2f64 VECREG:$rA, VECREG:$rA)>;
def : Pat<(SPUextract_elt0_chained (v2f64 VECREG:$rA)), def : Pat<(SPUvec2prefslot_chained (v2f64 VECREG:$rA)),
(ORf64_v2f64 VECREG:$rA, VECREG:$rA)>; (ORf64_v2f64 VECREG:$rA, VECREG:$rA)>;
// ORC: Bitwise "or" with complement (c = a | ~b) // ORC: Bitwise "or" with complement (c = a | ~b)

View File

@ -121,9 +121,6 @@ def SPUrotquad_rz_bytes: SDNode<"SPUISD::ROTQUAD_RZ_BYTES",
def SPUrotquad_rz_bits: SDNode<"SPUISD::ROTQUAD_RZ_BITS", def SPUrotquad_rz_bits: SDNode<"SPUISD::ROTQUAD_RZ_BITS",
SPUvecshift_type, []>; SPUvecshift_type, []>;
def SPUrotbytes_right_sfill: SDNode<"SPUISD::ROTBYTES_RIGHT_S",
SPUvecshift_type, []>;
// Vector rotate left, bits shifted out of the left are rotated in on the right // Vector rotate left, bits shifted out of the left are rotated in on the right
def SPUrotbytes_left: SDNode<"SPUISD::ROTBYTES_LEFT", def SPUrotbytes_left: SDNode<"SPUISD::ROTBYTES_LEFT",
SPUvecshift_type, []>; SPUvecshift_type, []>;
@ -155,9 +152,9 @@ def SDTpromote_scalar: SDTypeProfile<1, 1, []>;
def SPUpromote_scalar: SDNode<"SPUISD::PROMOTE_SCALAR", SDTpromote_scalar, []>; def SPUpromote_scalar: SDNode<"SPUISD::PROMOTE_SCALAR", SDTpromote_scalar, []>;
def SPU_vec_demote : SDTypeProfile<1, 1, []>; def SPU_vec_demote : SDTypeProfile<1, 1, []>;
def SPUextract_elt0: SDNode<"SPUISD::EXTRACT_ELT0", SPU_vec_demote, []>; def SPUvec2prefslot: SDNode<"SPUISD::VEC2PREFSLOT", SPU_vec_demote, []>;
def SPU_vec_demote_chained : SDTypeProfile<1, 2, []>; def SPU_vec_demote_chained : SDTypeProfile<1, 2, []>;
def SPUextract_elt0_chained: SDNode<"SPUISD::EXTRACT_ELT0_CHAINED", def SPUvec2prefslot_chained: SDNode<"SPUISD::VEC2PREFSLOT_CHAINED",
SPU_vec_demote_chained, [SDNPHasChain]>; SPU_vec_demote_chained, [SDNPHasChain]>;
def SPUextract_i1_sext: SDNode<"SPUISD::EXTRACT_I1_SEXT", SPU_vec_demote, []>; def SPUextract_i1_sext: SDNode<"SPUISD::EXTRACT_I1_SEXT", SPU_vec_demote, []>;
def SPUextract_i1_zext: SDNode<"SPUISD::EXTRACT_I1_ZEXT", SPU_vec_demote, []>; def SPUextract_i1_zext: SDNode<"SPUISD::EXTRACT_I1_ZEXT", SPU_vec_demote, []>;

View File

@ -1,12 +1,10 @@
; RUN: llvm-as -o - %s | llc -march=cellspu > %t1.s ; RUN: llvm-as -o - %s | llc -march=cellspu > %t1.s
; RUN: grep shufb %t1.s | count 39 ; RUN: grep shufb %t1.s | count 39
; RUN: grep ilhu %t1.s | count 31 ; RUN: grep ilhu %t1.s | count 27
; RUN: grep iohl %t1.s | count 31 ; RUN: grep iohl %t1.s | count 27
; RUN: grep lqa %t1.s | count 10 ; RUN: grep lqa %t1.s | count 10
; RUN: grep shlqbyi %t1.s | count 8 ; RUN: grep shlqbyi %t1.s | count 12
; RUN: grep selb %t1.s | count 4 ; RUN: grep 515 %t1.s | count 1
; RUN: grep cgti %t1.s | count 4
; RUN: grep 515 %t1.s | count 5
; RUN: grep 1029 %t1.s | count 2 ; RUN: grep 1029 %t1.s | count 2
; RUN: grep 1543 %t1.s | count 2 ; RUN: grep 1543 %t1.s | count 2
; RUN: grep 2057 %t1.s | count 2 ; RUN: grep 2057 %t1.s | count 2