forked from OSchip/llvm-project
CellSPU:
(a) Improve the extract element code: there's no need to do gymnastics with rotates into the preferred slot if a shuffle will do the same thing. (b) Rename a couple of SPUISD pseudo-instructions for readability and better semantic correspondence. (c) Fix i64 sign/any/zero extension lowering. llvm-svn: 59965
This commit is contained in:
parent
f3bfcf9748
commit
efc8c7a292
|
@ -261,7 +261,7 @@ namespace {
|
||||||
void printROTNeg7Imm(const MachineInstr *MI, unsigned OpNo) {
|
void printROTNeg7Imm(const MachineInstr *MI, unsigned OpNo) {
|
||||||
if (MI->getOperand(OpNo).isImm()) {
|
if (MI->getOperand(OpNo).isImm()) {
|
||||||
int value = (int) MI->getOperand(OpNo).getImm();
|
int value = (int) MI->getOperand(OpNo).getImm();
|
||||||
assert((value >= 0 && value < 32)
|
assert((value >= 0 && value <= 32)
|
||||||
&& "Invalid negated immediate rotate 7-bit argument");
|
&& "Invalid negated immediate rotate 7-bit argument");
|
||||||
O << -value;
|
O << -value;
|
||||||
} else {
|
} else {
|
||||||
|
|
|
@ -8,6 +8,7 @@ Department in The Aerospace Corporation:
|
||||||
- Mark Thomas (floating point instructions)
|
- Mark Thomas (floating point instructions)
|
||||||
- Michael AuYeung (intrinsics)
|
- Michael AuYeung (intrinsics)
|
||||||
- Chandler Carruth (LLVM expertise)
|
- Chandler Carruth (LLVM expertise)
|
||||||
|
- Nehal Desai (debugging, RoadRunner SPU expertise)
|
||||||
|
|
||||||
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||||
IMPLIED, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF
|
IMPLIED, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF
|
||||||
|
@ -33,6 +34,11 @@ to add 'spu' to configure's --enable-targets option, e.g.:
|
||||||
|
|
||||||
---------------------------------------------------------------------------
|
---------------------------------------------------------------------------
|
||||||
|
|
||||||
|
The unofficially official status page (because it's not easy to get an
|
||||||
|
officially blessed external web page from either IBM Austin or Aerosapce):
|
||||||
|
|
||||||
|
http://sites.google.com/site/llvmcellspu/
|
||||||
|
|
||||||
TODO:
|
TODO:
|
||||||
* Finish branch instructions, branch prediction
|
* Finish branch instructions, branch prediction
|
||||||
|
|
||||||
|
|
|
@ -425,9 +425,9 @@ SPUTargetLowering::getTargetNodeName(unsigned Opcode) const
|
||||||
node_names[(unsigned) SPUISD::SHUFFLE_MASK] = "SPUISD::SHUFFLE_MASK";
|
node_names[(unsigned) SPUISD::SHUFFLE_MASK] = "SPUISD::SHUFFLE_MASK";
|
||||||
node_names[(unsigned) SPUISD::CNTB] = "SPUISD::CNTB";
|
node_names[(unsigned) SPUISD::CNTB] = "SPUISD::CNTB";
|
||||||
node_names[(unsigned) SPUISD::PROMOTE_SCALAR] = "SPUISD::PROMOTE_SCALAR";
|
node_names[(unsigned) SPUISD::PROMOTE_SCALAR] = "SPUISD::PROMOTE_SCALAR";
|
||||||
node_names[(unsigned) SPUISD::EXTRACT_ELT0] = "SPUISD::EXTRACT_ELT0";
|
node_names[(unsigned) SPUISD::VEC2PREFSLOT] = "SPUISD::VEC2PREFSLOT";
|
||||||
node_names[(unsigned) SPUISD::EXTRACT_ELT0_CHAINED]
|
node_names[(unsigned) SPUISD::VEC2PREFSLOT_CHAINED]
|
||||||
= "SPUISD::EXTRACT_ELT0_CHAINED";
|
= "SPUISD::VEC2PREFSLOT_CHAINED";
|
||||||
node_names[(unsigned) SPUISD::EXTRACT_I1_ZEXT] = "SPUISD::EXTRACT_I1_ZEXT";
|
node_names[(unsigned) SPUISD::EXTRACT_I1_ZEXT] = "SPUISD::EXTRACT_I1_ZEXT";
|
||||||
node_names[(unsigned) SPUISD::EXTRACT_I1_SEXT] = "SPUISD::EXTRACT_I1_SEXT";
|
node_names[(unsigned) SPUISD::EXTRACT_I1_SEXT] = "SPUISD::EXTRACT_I1_SEXT";
|
||||||
node_names[(unsigned) SPUISD::EXTRACT_I8_ZEXT] = "SPUISD::EXTRACT_I8_ZEXT";
|
node_names[(unsigned) SPUISD::EXTRACT_I8_ZEXT] = "SPUISD::EXTRACT_I8_ZEXT";
|
||||||
|
@ -447,8 +447,6 @@ SPUTargetLowering::getTargetNodeName(unsigned Opcode) const
|
||||||
"SPUISD::ROTQUAD_RZ_BYTES";
|
"SPUISD::ROTQUAD_RZ_BYTES";
|
||||||
node_names[(unsigned) SPUISD::ROTQUAD_RZ_BITS] =
|
node_names[(unsigned) SPUISD::ROTQUAD_RZ_BITS] =
|
||||||
"SPUISD::ROTQUAD_RZ_BITS";
|
"SPUISD::ROTQUAD_RZ_BITS";
|
||||||
node_names[(unsigned) SPUISD::ROTBYTES_RIGHT_S] =
|
|
||||||
"SPUISD::ROTBYTES_RIGHT_S";
|
|
||||||
node_names[(unsigned) SPUISD::ROTBYTES_LEFT] = "SPUISD::ROTBYTES_LEFT";
|
node_names[(unsigned) SPUISD::ROTBYTES_LEFT] = "SPUISD::ROTBYTES_LEFT";
|
||||||
node_names[(unsigned) SPUISD::ROTBYTES_LEFT_CHAINED] =
|
node_names[(unsigned) SPUISD::ROTBYTES_LEFT_CHAINED] =
|
||||||
"SPUISD::ROTBYTES_LEFT_CHAINED";
|
"SPUISD::ROTBYTES_LEFT_CHAINED";
|
||||||
|
@ -647,7 +645,7 @@ LowerLOAD(SDValue Op, SelectionDAG &DAG, const SPUSubtarget *ST) {
|
||||||
Ops[0] = the_chain;
|
Ops[0] = the_chain;
|
||||||
Ops[1] = DAG.getNode(ISD::BIT_CONVERT, vecVT, result);
|
Ops[1] = DAG.getNode(ISD::BIT_CONVERT, vecVT, result);
|
||||||
scalarvts = DAG.getVTList((OpVT == VT ? VT : OpVT), MVT::Other);
|
scalarvts = DAG.getVTList((OpVT == VT ? VT : OpVT), MVT::Other);
|
||||||
result = DAG.getNode(SPUISD::EXTRACT_ELT0_CHAINED, scalarvts, Ops, 2);
|
result = DAG.getNode(SPUISD::VEC2PREFSLOT_CHAINED, scalarvts, Ops, 2);
|
||||||
the_chain = result.getValue(1);
|
the_chain = result.getValue(1);
|
||||||
} else {
|
} else {
|
||||||
// Handle the sign and zero-extending loads for i1 and i8:
|
// Handle the sign and zero-extending loads for i1 and i8:
|
||||||
|
@ -889,7 +887,7 @@ LowerConstant(SDValue Op, SelectionDAG &DAG) {
|
||||||
|
|
||||||
if (VT == MVT::i64) {
|
if (VT == MVT::i64) {
|
||||||
SDValue T = DAG.getConstant(CN->getZExtValue(), MVT::i64);
|
SDValue T = DAG.getConstant(CN->getZExtValue(), MVT::i64);
|
||||||
return DAG.getNode(SPUISD::EXTRACT_ELT0, VT,
|
return DAG.getNode(SPUISD::VEC2PREFSLOT, VT,
|
||||||
DAG.getNode(ISD::BUILD_VECTOR, MVT::v2i64, T, T));
|
DAG.getNode(ISD::BUILD_VECTOR, MVT::v2i64, T, T));
|
||||||
} else {
|
} else {
|
||||||
cerr << "LowerConstant: unhandled constant type "
|
cerr << "LowerConstant: unhandled constant type "
|
||||||
|
@ -1603,7 +1601,7 @@ static SDValue LowerBUILD_VECTOR(SDValue Op, SelectionDAG &DAG) {
|
||||||
case MVT::v2f64: {
|
case MVT::v2f64: {
|
||||||
uint64_t f64val = SplatBits;
|
uint64_t f64val = SplatBits;
|
||||||
assert(SplatSize == 8
|
assert(SplatSize == 8
|
||||||
&& "LowerBUILD_VECTOR: 64-bit float vector element: unexpected size.");
|
&& "LowerBUILD_VECTOR: 64-bit float vector size > 8 bytes.");
|
||||||
// NOTE: pretend the constant is an integer. LLVM won't load FP constants
|
// NOTE: pretend the constant is an integer. LLVM won't load FP constants
|
||||||
SDValue T = DAG.getConstant(f64val, MVT::i64);
|
SDValue T = DAG.getConstant(f64val, MVT::i64);
|
||||||
return DAG.getNode(ISD::BIT_CONVERT, MVT::v2f64,
|
return DAG.getNode(ISD::BIT_CONVERT, MVT::v2f64,
|
||||||
|
@ -1656,8 +1654,8 @@ static SDValue LowerBUILD_VECTOR(SDValue Op, SelectionDAG &DAG) {
|
||||||
// specialized masks to replace any and all 0's, 0xff's and 0x80's.
|
// specialized masks to replace any and all 0's, 0xff's and 0x80's.
|
||||||
|
|
||||||
// Detect if the upper or lower half is a special shuffle mask pattern:
|
// Detect if the upper or lower half is a special shuffle mask pattern:
|
||||||
upper_special = (upper == 0 || upper == 0xffffffff || upper == 0x80000000);
|
upper_special = (upper == 0||upper == 0xffffffff||upper == 0x80000000);
|
||||||
lower_special = (lower == 0 || lower == 0xffffffff || lower == 0x80000000);
|
lower_special = (lower == 0||lower == 0xffffffff||lower == 0x80000000);
|
||||||
|
|
||||||
// Create lower vector if not a special pattern
|
// Create lower vector if not a special pattern
|
||||||
if (!lower_special) {
|
if (!lower_special) {
|
||||||
|
@ -2077,7 +2075,7 @@ static SDValue LowerEXTRACT_VECTOR_ELT(SDValue Op, SelectionDAG &DAG) {
|
||||||
|
|
||||||
if (EltNo == 0 && (VT == MVT::i32 || VT == MVT::i64)) {
|
if (EltNo == 0 && (VT == MVT::i32 || VT == MVT::i64)) {
|
||||||
// i32 and i64: Element 0 is the preferred slot
|
// i32 and i64: Element 0 is the preferred slot
|
||||||
return DAG.getNode(SPUISD::EXTRACT_ELT0, VT, N);
|
return DAG.getNode(SPUISD::VEC2PREFSLOT, VT, N);
|
||||||
}
|
}
|
||||||
|
|
||||||
// Need to generate shuffle mask and extract:
|
// Need to generate shuffle mask and extract:
|
||||||
|
@ -2140,7 +2138,7 @@ static SDValue LowerEXTRACT_VECTOR_ELT(SDValue Op, SelectionDAG &DAG) {
|
||||||
&ShufMask[0],
|
&ShufMask[0],
|
||||||
sizeof(ShufMask) / sizeof(ShufMask[0]));
|
sizeof(ShufMask) / sizeof(ShufMask[0]));
|
||||||
|
|
||||||
retval = DAG.getNode(SPUISD::EXTRACT_ELT0, VT,
|
retval = DAG.getNode(SPUISD::VEC2PREFSLOT, VT,
|
||||||
DAG.getNode(SPUISD::SHUFB, N.getValueType(),
|
DAG.getNode(SPUISD::SHUFB, N.getValueType(),
|
||||||
N, N, ShufMaskVec));
|
N, N, ShufMaskVec));
|
||||||
} else {
|
} else {
|
||||||
|
@ -2158,60 +2156,20 @@ static SDValue LowerEXTRACT_VECTOR_ELT(SDValue Op, SelectionDAG &DAG) {
|
||||||
|
|
||||||
// Scale the index to a bit/byte shift quantity
|
// Scale the index to a bit/byte shift quantity
|
||||||
APInt scaleFactor =
|
APInt scaleFactor =
|
||||||
APInt(32, uint64_t(16 / N.getValueType().getVectorNumElements()), false);
|
APInt(32, uint64_t(16 / N.getValueType().getVectorNumElements()), false);
|
||||||
|
unsigned scaleShift = scaleFactor.logBase2();
|
||||||
SDValue vecShift;
|
SDValue vecShift;
|
||||||
|
|
||||||
switch (VT.getSimpleVT()) {
|
|
||||||
default:
|
|
||||||
cerr << "LowerEXTRACT_VECTOR_ELT(varable): Unhandled vector type\n";
|
|
||||||
abort();
|
|
||||||
/*NOTREACHED*/
|
|
||||||
case MVT::i8: {
|
|
||||||
// Don't need to scale, but we do need to correct for where bytes go in
|
|
||||||
// slot 0:
|
|
||||||
SDValue prefSlot = DAG.getNode(ISD::SUB, MVT::i32,
|
|
||||||
Elt, DAG.getConstant(3, MVT::i32));
|
|
||||||
SDValue corrected = DAG.getNode(ISD::ADD, MVT::i32, prefSlot,
|
|
||||||
DAG.getConstant(16, MVT::i32));
|
|
||||||
|
|
||||||
SDValue shiftAmt = DAG.getNode(ISD::SELECT_CC, MVT::i32,
|
if (scaleShift > 0) {
|
||||||
prefSlot, DAG.getConstant(0, MVT::i32),
|
// Scale the shift factor:
|
||||||
prefSlot, // trueval
|
|
||||||
corrected, // falseval
|
|
||||||
DAG.getCondCode(ISD::SETGT));
|
|
||||||
vecShift = DAG.getNode(SPUISD::ROTBYTES_LEFT, VecVT, N, shiftAmt);
|
|
||||||
break;
|
|
||||||
}
|
|
||||||
case MVT::i16: {
|
|
||||||
// Scale the index to bytes, subtract for preferred slot:
|
|
||||||
Elt = DAG.getNode(ISD::SHL, MVT::i32, Elt,
|
Elt = DAG.getNode(ISD::SHL, MVT::i32, Elt,
|
||||||
DAG.getConstant(scaleFactor.logBase2(), MVT::i32));
|
DAG.getConstant(scaleShift, MVT::i32));
|
||||||
SDValue prefSlot = DAG.getNode(ISD::SUB, MVT::i32,
|
|
||||||
Elt, DAG.getConstant(2, MVT::i32));
|
|
||||||
SDValue corrected = DAG.getNode(ISD::ADD, MVT::i32, prefSlot,
|
|
||||||
DAG.getConstant(16, MVT::i32));
|
|
||||||
|
|
||||||
SDValue shiftAmt = DAG.getNode(ISD::SELECT_CC, MVT::i32,
|
|
||||||
prefSlot, DAG.getConstant(0, MVT::i32),
|
|
||||||
prefSlot, // trueval
|
|
||||||
corrected, // falseval
|
|
||||||
DAG.getCondCode(ISD::SETGT));
|
|
||||||
vecShift = DAG.getNode(SPUISD::ROTBYTES_LEFT, VecVT, N, shiftAmt);
|
|
||||||
break;
|
|
||||||
}
|
|
||||||
case MVT::i32:
|
|
||||||
case MVT::f32:
|
|
||||||
case MVT::i64:
|
|
||||||
case MVT::f64:
|
|
||||||
// Simple left shift to slot 0
|
|
||||||
Elt = DAG.getNode(ISD::SHL, MVT::i32, Elt,
|
|
||||||
DAG.getConstant(scaleFactor.logBase2(), MVT::i32));
|
|
||||||
vecShift = DAG.getNode(SPUISD::SHLQUAD_L_BYTES, VecVT, N, Elt);
|
|
||||||
break;
|
|
||||||
}
|
}
|
||||||
|
|
||||||
// Replicate slot 0 across the entire vector (for consistency with the
|
vecShift = DAG.getNode(SPUISD::SHLQUAD_L_BYTES, VecVT, N, Elt);
|
||||||
// notion of a unified register set)
|
|
||||||
|
// Replicate the bytes starting at byte 0 across the entire vector (for
|
||||||
|
// consistency with the notion of a unified register set)
|
||||||
SDValue replicate;
|
SDValue replicate;
|
||||||
|
|
||||||
switch (VT.getSimpleVT()) {
|
switch (VT.getSimpleVT()) {
|
||||||
|
@ -2220,13 +2178,13 @@ static SDValue LowerEXTRACT_VECTOR_ELT(SDValue Op, SelectionDAG &DAG) {
|
||||||
abort();
|
abort();
|
||||||
/*NOTREACHED*/
|
/*NOTREACHED*/
|
||||||
case MVT::i8: {
|
case MVT::i8: {
|
||||||
SDValue factor = DAG.getConstant(0x03030303, MVT::i32);
|
SDValue factor = DAG.getConstant(0x00000000, MVT::i32);
|
||||||
replicate = DAG.getNode(ISD::BUILD_VECTOR, MVT::v4i32, factor, factor,
|
replicate = DAG.getNode(ISD::BUILD_VECTOR, MVT::v4i32, factor, factor,
|
||||||
factor, factor);
|
factor, factor);
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
case MVT::i16: {
|
case MVT::i16: {
|
||||||
SDValue factor = DAG.getConstant(0x02030203, MVT::i32);
|
SDValue factor = DAG.getConstant(0x00010001, MVT::i32);
|
||||||
replicate = DAG.getNode(ISD::BUILD_VECTOR, MVT::v4i32, factor, factor,
|
replicate = DAG.getNode(ISD::BUILD_VECTOR, MVT::v4i32, factor, factor,
|
||||||
factor, factor);
|
factor, factor);
|
||||||
break;
|
break;
|
||||||
|
@ -2248,7 +2206,7 @@ static SDValue LowerEXTRACT_VECTOR_ELT(SDValue Op, SelectionDAG &DAG) {
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
retval = DAG.getNode(SPUISD::EXTRACT_ELT0, VT,
|
retval = DAG.getNode(SPUISD::VEC2PREFSLOT, VT,
|
||||||
DAG.getNode(SPUISD::SHUFB, VecVT, vecShift, vecShift, replicate));
|
DAG.getNode(SPUISD::SHUFB, VecVT, vecShift, vecShift, replicate));
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -2400,19 +2358,34 @@ static SDValue LowerI64Math(SDValue Op, SelectionDAG &DAG, unsigned Opc)
|
||||||
|
|
||||||
assert(Op0VT == MVT::i32
|
assert(Op0VT == MVT::i32
|
||||||
&& "CellSPU: Zero/sign extending something other than i32");
|
&& "CellSPU: Zero/sign extending something other than i32");
|
||||||
DEBUG(cerr << "CellSPU: LowerI64Math custom lowering zero/sign/any extend\n");
|
|
||||||
|
|
||||||
unsigned NewOpc = (Opc == ISD::SIGN_EXTEND
|
DEBUG(cerr << "CellSPU.LowerI64Math: lowering zero/sign/any extend\n");
|
||||||
? SPUISD::ROTBYTES_RIGHT_S
|
|
||||||
: SPUISD::ROTQUAD_RZ_BYTES);
|
|
||||||
SDValue PromoteScalar =
|
SDValue PromoteScalar =
|
||||||
DAG.getNode(SPUISD::PROMOTE_SCALAR, Op0VecVT, Op0);
|
DAG.getNode(SPUISD::PROMOTE_SCALAR, Op0VecVT, Op0);
|
||||||
|
SDValue RotQuad =
|
||||||
|
DAG.getNode(SPUISD::ROTQUAD_RZ_BYTES, Op0VecVT,
|
||||||
|
PromoteScalar, DAG.getConstant(4, MVT::i32));
|
||||||
|
|
||||||
return DAG.getNode(SPUISD::EXTRACT_ELT0, VT,
|
if (Opc != ISD::SIGN_EXTEND) {
|
||||||
DAG.getNode(ISD::BIT_CONVERT, VecVT,
|
return DAG.getNode(SPUISD::VEC2PREFSLOT, VT,
|
||||||
DAG.getNode(NewOpc, Op0VecVT,
|
DAG.getNode(ISD::BIT_CONVERT, VecVT, RotQuad));
|
||||||
PromoteScalar,
|
} else {
|
||||||
DAG.getConstant(4, MVT::i32))));
|
// SPU has no "rotate quadword and replicate bit 0" (i.e. rotate/shift
|
||||||
|
// right and propagate the sign bit) instruction.
|
||||||
|
SDValue SignQuad =
|
||||||
|
DAG.getNode(SPUISD::VEC_SRA, Op0VecVT,
|
||||||
|
PromoteScalar, DAG.getConstant(32, MVT::i32));
|
||||||
|
SDValue SelMask =
|
||||||
|
DAG.getNode(SPUISD::SELECT_MASK, Op0VecVT,
|
||||||
|
DAG.getConstant(0xf0f0, MVT::i16));
|
||||||
|
SDValue CombineQuad =
|
||||||
|
DAG.getNode(SPUISD::SELB, Op0VecVT,
|
||||||
|
SignQuad, RotQuad, SelMask);
|
||||||
|
|
||||||
|
return DAG.getNode(SPUISD::VEC2PREFSLOT, VT,
|
||||||
|
DAG.getNode(ISD::BIT_CONVERT, VecVT, CombineQuad));
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
case ISD::ADD: {
|
case ISD::ADD: {
|
||||||
|
@ -2439,7 +2412,7 @@ static SDValue LowerI64Math(SDValue Op, SelectionDAG &DAG, unsigned Opc)
|
||||||
DAG.getNode(ISD::BUILD_VECTOR, MVT::v4i32,
|
DAG.getNode(ISD::BUILD_VECTOR, MVT::v4i32,
|
||||||
&ShufBytes[0], ShufBytes.size()));
|
&ShufBytes[0], ShufBytes.size()));
|
||||||
|
|
||||||
return DAG.getNode(SPUISD::EXTRACT_ELT0, MVT::i64,
|
return DAG.getNode(SPUISD::VEC2PREFSLOT, MVT::i64,
|
||||||
DAG.getNode(SPUISD::ADD_EXTENDED, MVT::v2i64,
|
DAG.getNode(SPUISD::ADD_EXTENDED, MVT::v2i64,
|
||||||
Op0, Op1, ShiftedCarry));
|
Op0, Op1, ShiftedCarry));
|
||||||
}
|
}
|
||||||
|
@ -2468,7 +2441,7 @@ static SDValue LowerI64Math(SDValue Op, SelectionDAG &DAG, unsigned Opc)
|
||||||
DAG.getNode(ISD::BUILD_VECTOR, MVT::v4i32,
|
DAG.getNode(ISD::BUILD_VECTOR, MVT::v4i32,
|
||||||
&ShufBytes[0], ShufBytes.size()));
|
&ShufBytes[0], ShufBytes.size()));
|
||||||
|
|
||||||
return DAG.getNode(SPUISD::EXTRACT_ELT0, MVT::i64,
|
return DAG.getNode(SPUISD::VEC2PREFSLOT, MVT::i64,
|
||||||
DAG.getNode(SPUISD::SUB_EXTENDED, MVT::v2i64,
|
DAG.getNode(SPUISD::SUB_EXTENDED, MVT::v2i64,
|
||||||
Op0, Op1, ShiftedBorrow));
|
Op0, Op1, ShiftedBorrow));
|
||||||
}
|
}
|
||||||
|
@ -2492,7 +2465,7 @@ static SDValue LowerI64Math(SDValue Op, SelectionDAG &DAG, unsigned Opc)
|
||||||
ShiftAmt,
|
ShiftAmt,
|
||||||
DAG.getConstant(7, ShiftAmtVT));
|
DAG.getConstant(7, ShiftAmtVT));
|
||||||
|
|
||||||
return DAG.getNode(SPUISD::EXTRACT_ELT0, VT,
|
return DAG.getNode(SPUISD::VEC2PREFSLOT, VT,
|
||||||
DAG.getNode(SPUISD::SHLQUAD_L_BITS, VecVT,
|
DAG.getNode(SPUISD::SHLQUAD_L_BITS, VecVT,
|
||||||
DAG.getNode(SPUISD::SHLQUAD_L_BYTES, VecVT,
|
DAG.getNode(SPUISD::SHLQUAD_L_BYTES, VecVT,
|
||||||
MaskLower, ShiftAmtBytes),
|
MaskLower, ShiftAmtBytes),
|
||||||
|
@ -2532,7 +2505,7 @@ static SDValue LowerI64Math(SDValue Op, SelectionDAG &DAG, unsigned Opc)
|
||||||
}
|
}
|
||||||
|
|
||||||
SDValue UpperHalfSign =
|
SDValue UpperHalfSign =
|
||||||
DAG.getNode(SPUISD::EXTRACT_ELT0, MVT::i32,
|
DAG.getNode(SPUISD::VEC2PREFSLOT, MVT::i32,
|
||||||
DAG.getNode(ISD::BIT_CONVERT, MVT::v4i32,
|
DAG.getNode(ISD::BIT_CONVERT, MVT::v4i32,
|
||||||
DAG.getNode(SPUISD::VEC_SRA, MVT::v2i64,
|
DAG.getNode(SPUISD::VEC_SRA, MVT::v2i64,
|
||||||
Op0, DAG.getConstant(31, MVT::i32))));
|
Op0, DAG.getConstant(31, MVT::i32))));
|
||||||
|
@ -2551,7 +2524,7 @@ static SDValue LowerI64Math(SDValue Op, SelectionDAG &DAG, unsigned Opc)
|
||||||
DAG.getNode(SPUISD::ROTBYTES_LEFT, MVT::v2i64,
|
DAG.getNode(SPUISD::ROTBYTES_LEFT, MVT::v2i64,
|
||||||
RotateLeftBytes, ShiftAmt);
|
RotateLeftBytes, ShiftAmt);
|
||||||
|
|
||||||
return DAG.getNode(SPUISD::EXTRACT_ELT0, MVT::i64,
|
return DAG.getNode(SPUISD::VEC2PREFSLOT, MVT::i64,
|
||||||
RotateLeftBits);
|
RotateLeftBits);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
@ -2968,7 +2941,7 @@ SPUTargetLowering::PerformDAGCombine(SDNode *N, DAGCombinerInfo &DCI) const
|
||||||
case ISD::SIGN_EXTEND:
|
case ISD::SIGN_EXTEND:
|
||||||
case ISD::ZERO_EXTEND:
|
case ISD::ZERO_EXTEND:
|
||||||
case ISD::ANY_EXTEND: {
|
case ISD::ANY_EXTEND: {
|
||||||
if (Op0.getOpcode() == SPUISD::EXTRACT_ELT0 &&
|
if (Op0.getOpcode() == SPUISD::VEC2PREFSLOT &&
|
||||||
N->getValueType(0) == Op0.getValueType()) {
|
N->getValueType(0) == Op0.getValueType()) {
|
||||||
// (any_extend (SPUextract_elt0 <arg>)) ->
|
// (any_extend (SPUextract_elt0 <arg>)) ->
|
||||||
// (SPUextract_elt0 <arg>)
|
// (SPUextract_elt0 <arg>)
|
||||||
|
@ -3031,7 +3004,7 @@ SPUTargetLowering::PerformDAGCombine(SDNode *N, DAGCombinerInfo &DCI) const
|
||||||
// <arg>
|
// <arg>
|
||||||
// but only if the SPUpromote_scalar and <arg> types match.
|
// but only if the SPUpromote_scalar and <arg> types match.
|
||||||
SDValue Op00 = Op0.getOperand(0);
|
SDValue Op00 = Op0.getOperand(0);
|
||||||
if (Op00.getOpcode() == SPUISD::EXTRACT_ELT0) {
|
if (Op00.getOpcode() == SPUISD::VEC2PREFSLOT) {
|
||||||
SDValue Op000 = Op00.getOperand(0);
|
SDValue Op000 = Op00.getOperand(0);
|
||||||
if (Op000.getValueType() == N->getValueType(0)) {
|
if (Op000.getValueType() == N->getValueType(0)) {
|
||||||
Result = Op000;
|
Result = Op000;
|
||||||
|
@ -3039,7 +3012,7 @@ SPUTargetLowering::PerformDAGCombine(SDNode *N, DAGCombinerInfo &DCI) const
|
||||||
}
|
}
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
case SPUISD::EXTRACT_ELT0: {
|
case SPUISD::VEC2PREFSLOT: {
|
||||||
// (SPUpromote_scalar (SPUextract_elt0 <arg>)) ->
|
// (SPUpromote_scalar (SPUextract_elt0 <arg>)) ->
|
||||||
// <arg>
|
// <arg>
|
||||||
Result = Op0.getOperand(0);
|
Result = Op0.getOperand(0);
|
||||||
|
@ -3146,8 +3119,8 @@ SPUTargetLowering::computeMaskedBitsForTargetNode(const SDValue Op,
|
||||||
}
|
}
|
||||||
|
|
||||||
case SPUISD::LDRESULT:
|
case SPUISD::LDRESULT:
|
||||||
case SPUISD::EXTRACT_ELT0:
|
case SPUISD::VEC2PREFSLOT:
|
||||||
case SPUISD::EXTRACT_ELT0_CHAINED: {
|
case SPUISD::VEC2PREFSLOT_CHAINED: {
|
||||||
MVT OpVT = Op.getValueType();
|
MVT OpVT = Op.getValueType();
|
||||||
unsigned OpVTBits = OpVT.getSizeInBits();
|
unsigned OpVTBits = OpVT.getSizeInBits();
|
||||||
uint64_t InMask = OpVT.getIntegerVTBitMask();
|
uint64_t InMask = OpVT.getIntegerVTBitMask();
|
||||||
|
@ -3174,7 +3147,6 @@ SPUTargetLowering::computeMaskedBitsForTargetNode(const SDValue Op,
|
||||||
case SPUISD::VEC_ROTR:
|
case SPUISD::VEC_ROTR:
|
||||||
case SPUISD::ROTQUAD_RZ_BYTES:
|
case SPUISD::ROTQUAD_RZ_BYTES:
|
||||||
case SPUISD::ROTQUAD_RZ_BITS:
|
case SPUISD::ROTQUAD_RZ_BITS:
|
||||||
case SPUISD::ROTBYTES_RIGHT_S:
|
|
||||||
case SPUISD::ROTBYTES_LEFT:
|
case SPUISD::ROTBYTES_LEFT:
|
||||||
case SPUISD::ROTBYTES_LEFT_CHAINED:
|
case SPUISD::ROTBYTES_LEFT_CHAINED:
|
||||||
case SPUISD::SELECT_MASK:
|
case SPUISD::SELECT_MASK:
|
||||||
|
|
|
@ -40,8 +40,8 @@ namespace llvm {
|
||||||
SHUFFLE_MASK, ///< Shuffle mask
|
SHUFFLE_MASK, ///< Shuffle mask
|
||||||
CNTB, ///< Count leading ones in bytes
|
CNTB, ///< Count leading ones in bytes
|
||||||
PROMOTE_SCALAR, ///< Promote scalar->vector
|
PROMOTE_SCALAR, ///< Promote scalar->vector
|
||||||
EXTRACT_ELT0, ///< Extract element 0
|
VEC2PREFSLOT, ///< Extract element 0
|
||||||
EXTRACT_ELT0_CHAINED, ///< Extract element 0, with chain
|
VEC2PREFSLOT_CHAINED, ///< Extract element 0, with chain
|
||||||
EXTRACT_I1_ZEXT, ///< Extract element 0 as i1, zero extend
|
EXTRACT_I1_ZEXT, ///< Extract element 0 as i1, zero extend
|
||||||
EXTRACT_I1_SEXT, ///< Extract element 0 as i1, sign extend
|
EXTRACT_I1_SEXT, ///< Extract element 0 as i1, sign extend
|
||||||
EXTRACT_I8_ZEXT, ///< Extract element 0 as i8, zero extend
|
EXTRACT_I8_ZEXT, ///< Extract element 0 as i8, zero extend
|
||||||
|
@ -59,7 +59,6 @@ namespace llvm {
|
||||||
VEC_ROTR, ///< Vector rotate right
|
VEC_ROTR, ///< Vector rotate right
|
||||||
ROTQUAD_RZ_BYTES, ///< Rotate quad right, by bytes, zero fill
|
ROTQUAD_RZ_BYTES, ///< Rotate quad right, by bytes, zero fill
|
||||||
ROTQUAD_RZ_BITS, ///< Rotate quad right, by bits, zero fill
|
ROTQUAD_RZ_BITS, ///< Rotate quad right, by bits, zero fill
|
||||||
ROTBYTES_RIGHT_S, ///< Vector rotate right, by bytes, sign fill
|
|
||||||
ROTBYTES_LEFT, ///< Rotate bytes (loads -> ROTQBYI)
|
ROTBYTES_LEFT, ///< Rotate bytes (loads -> ROTQBYI)
|
||||||
ROTBYTES_LEFT_CHAINED, ///< Rotate bytes (loads -> ROTQBYI), with chain
|
ROTBYTES_LEFT_CHAINED, ///< Rotate bytes (loads -> ROTQBYI), with chain
|
||||||
ROTBYTES_LEFT_BITS, ///< Rotate bytes left by bit shift count
|
ROTBYTES_LEFT_BITS, ///< Rotate bytes left by bit shift count
|
||||||
|
|
|
@ -1286,40 +1286,40 @@ def : Pat<(v2f64 (SPUpromote_scalar R64FP:$rA)),
|
||||||
|
|
||||||
// ORi*_v*: Used to extract vector element 0 (the preferred slot)
|
// ORi*_v*: Used to extract vector element 0 (the preferred slot)
|
||||||
|
|
||||||
def : Pat<(SPUextract_elt0 (v16i8 VECREG:$rA)),
|
def : Pat<(SPUvec2prefslot (v16i8 VECREG:$rA)),
|
||||||
(ORi8_v16i8 VECREG:$rA, VECREG:$rA)>;
|
(ORi8_v16i8 VECREG:$rA, VECREG:$rA)>;
|
||||||
|
|
||||||
def : Pat<(SPUextract_elt0_chained (v16i8 VECREG:$rA)),
|
def : Pat<(SPUvec2prefslot_chained (v16i8 VECREG:$rA)),
|
||||||
(ORi8_v16i8 VECREG:$rA, VECREG:$rA)>;
|
(ORi8_v16i8 VECREG:$rA, VECREG:$rA)>;
|
||||||
|
|
||||||
def : Pat<(SPUextract_elt0 (v8i16 VECREG:$rA)),
|
def : Pat<(SPUvec2prefslot (v8i16 VECREG:$rA)),
|
||||||
(ORi16_v8i16 VECREG:$rA, VECREG:$rA)>;
|
(ORi16_v8i16 VECREG:$rA, VECREG:$rA)>;
|
||||||
|
|
||||||
def : Pat<(SPUextract_elt0_chained (v8i16 VECREG:$rA)),
|
def : Pat<(SPUvec2prefslot_chained (v8i16 VECREG:$rA)),
|
||||||
(ORi16_v8i16 VECREG:$rA, VECREG:$rA)>;
|
(ORi16_v8i16 VECREG:$rA, VECREG:$rA)>;
|
||||||
|
|
||||||
def : Pat<(SPUextract_elt0 (v4i32 VECREG:$rA)),
|
def : Pat<(SPUvec2prefslot (v4i32 VECREG:$rA)),
|
||||||
(ORi32_v4i32 VECREG:$rA, VECREG:$rA)>;
|
(ORi32_v4i32 VECREG:$rA, VECREG:$rA)>;
|
||||||
|
|
||||||
def : Pat<(SPUextract_elt0_chained (v4i32 VECREG:$rA)),
|
def : Pat<(SPUvec2prefslot_chained (v4i32 VECREG:$rA)),
|
||||||
(ORi32_v4i32 VECREG:$rA, VECREG:$rA)>;
|
(ORi32_v4i32 VECREG:$rA, VECREG:$rA)>;
|
||||||
|
|
||||||
def : Pat<(SPUextract_elt0 (v2i64 VECREG:$rA)),
|
def : Pat<(SPUvec2prefslot (v2i64 VECREG:$rA)),
|
||||||
(ORi64_v2i64 VECREG:$rA, VECREG:$rA)>;
|
(ORi64_v2i64 VECREG:$rA, VECREG:$rA)>;
|
||||||
|
|
||||||
def : Pat<(SPUextract_elt0_chained (v2i64 VECREG:$rA)),
|
def : Pat<(SPUvec2prefslot_chained (v2i64 VECREG:$rA)),
|
||||||
(ORi64_v2i64 VECREG:$rA, VECREG:$rA)>;
|
(ORi64_v2i64 VECREG:$rA, VECREG:$rA)>;
|
||||||
|
|
||||||
def : Pat<(SPUextract_elt0 (v4f32 VECREG:$rA)),
|
def : Pat<(SPUvec2prefslot (v4f32 VECREG:$rA)),
|
||||||
(ORf32_v4f32 VECREG:$rA, VECREG:$rA)>;
|
(ORf32_v4f32 VECREG:$rA, VECREG:$rA)>;
|
||||||
|
|
||||||
def : Pat<(SPUextract_elt0_chained (v4f32 VECREG:$rA)),
|
def : Pat<(SPUvec2prefslot_chained (v4f32 VECREG:$rA)),
|
||||||
(ORf32_v4f32 VECREG:$rA, VECREG:$rA)>;
|
(ORf32_v4f32 VECREG:$rA, VECREG:$rA)>;
|
||||||
|
|
||||||
def : Pat<(SPUextract_elt0 (v2f64 VECREG:$rA)),
|
def : Pat<(SPUvec2prefslot (v2f64 VECREG:$rA)),
|
||||||
(ORf64_v2f64 VECREG:$rA, VECREG:$rA)>;
|
(ORf64_v2f64 VECREG:$rA, VECREG:$rA)>;
|
||||||
|
|
||||||
def : Pat<(SPUextract_elt0_chained (v2f64 VECREG:$rA)),
|
def : Pat<(SPUvec2prefslot_chained (v2f64 VECREG:$rA)),
|
||||||
(ORf64_v2f64 VECREG:$rA, VECREG:$rA)>;
|
(ORf64_v2f64 VECREG:$rA, VECREG:$rA)>;
|
||||||
|
|
||||||
// ORC: Bitwise "or" with complement (c = a | ~b)
|
// ORC: Bitwise "or" with complement (c = a | ~b)
|
||||||
|
|
|
@ -121,9 +121,6 @@ def SPUrotquad_rz_bytes: SDNode<"SPUISD::ROTQUAD_RZ_BYTES",
|
||||||
def SPUrotquad_rz_bits: SDNode<"SPUISD::ROTQUAD_RZ_BITS",
|
def SPUrotquad_rz_bits: SDNode<"SPUISD::ROTQUAD_RZ_BITS",
|
||||||
SPUvecshift_type, []>;
|
SPUvecshift_type, []>;
|
||||||
|
|
||||||
def SPUrotbytes_right_sfill: SDNode<"SPUISD::ROTBYTES_RIGHT_S",
|
|
||||||
SPUvecshift_type, []>;
|
|
||||||
|
|
||||||
// Vector rotate left, bits shifted out of the left are rotated in on the right
|
// Vector rotate left, bits shifted out of the left are rotated in on the right
|
||||||
def SPUrotbytes_left: SDNode<"SPUISD::ROTBYTES_LEFT",
|
def SPUrotbytes_left: SDNode<"SPUISD::ROTBYTES_LEFT",
|
||||||
SPUvecshift_type, []>;
|
SPUvecshift_type, []>;
|
||||||
|
@ -155,9 +152,9 @@ def SDTpromote_scalar: SDTypeProfile<1, 1, []>;
|
||||||
def SPUpromote_scalar: SDNode<"SPUISD::PROMOTE_SCALAR", SDTpromote_scalar, []>;
|
def SPUpromote_scalar: SDNode<"SPUISD::PROMOTE_SCALAR", SDTpromote_scalar, []>;
|
||||||
|
|
||||||
def SPU_vec_demote : SDTypeProfile<1, 1, []>;
|
def SPU_vec_demote : SDTypeProfile<1, 1, []>;
|
||||||
def SPUextract_elt0: SDNode<"SPUISD::EXTRACT_ELT0", SPU_vec_demote, []>;
|
def SPUvec2prefslot: SDNode<"SPUISD::VEC2PREFSLOT", SPU_vec_demote, []>;
|
||||||
def SPU_vec_demote_chained : SDTypeProfile<1, 2, []>;
|
def SPU_vec_demote_chained : SDTypeProfile<1, 2, []>;
|
||||||
def SPUextract_elt0_chained: SDNode<"SPUISD::EXTRACT_ELT0_CHAINED",
|
def SPUvec2prefslot_chained: SDNode<"SPUISD::VEC2PREFSLOT_CHAINED",
|
||||||
SPU_vec_demote_chained, [SDNPHasChain]>;
|
SPU_vec_demote_chained, [SDNPHasChain]>;
|
||||||
def SPUextract_i1_sext: SDNode<"SPUISD::EXTRACT_I1_SEXT", SPU_vec_demote, []>;
|
def SPUextract_i1_sext: SDNode<"SPUISD::EXTRACT_I1_SEXT", SPU_vec_demote, []>;
|
||||||
def SPUextract_i1_zext: SDNode<"SPUISD::EXTRACT_I1_ZEXT", SPU_vec_demote, []>;
|
def SPUextract_i1_zext: SDNode<"SPUISD::EXTRACT_I1_ZEXT", SPU_vec_demote, []>;
|
||||||
|
|
|
@ -1,12 +1,10 @@
|
||||||
; RUN: llvm-as -o - %s | llc -march=cellspu > %t1.s
|
; RUN: llvm-as -o - %s | llc -march=cellspu > %t1.s
|
||||||
; RUN: grep shufb %t1.s | count 39
|
; RUN: grep shufb %t1.s | count 39
|
||||||
; RUN: grep ilhu %t1.s | count 31
|
; RUN: grep ilhu %t1.s | count 27
|
||||||
; RUN: grep iohl %t1.s | count 31
|
; RUN: grep iohl %t1.s | count 27
|
||||||
; RUN: grep lqa %t1.s | count 10
|
; RUN: grep lqa %t1.s | count 10
|
||||||
; RUN: grep shlqbyi %t1.s | count 8
|
; RUN: grep shlqbyi %t1.s | count 12
|
||||||
; RUN: grep selb %t1.s | count 4
|
; RUN: grep 515 %t1.s | count 1
|
||||||
; RUN: grep cgti %t1.s | count 4
|
|
||||||
; RUN: grep 515 %t1.s | count 5
|
|
||||||
; RUN: grep 1029 %t1.s | count 2
|
; RUN: grep 1029 %t1.s | count 2
|
||||||
; RUN: grep 1543 %t1.s | count 2
|
; RUN: grep 1543 %t1.s | count 2
|
||||||
; RUN: grep 2057 %t1.s | count 2
|
; RUN: grep 2057 %t1.s | count 2
|
||||||
|
|
Loading…
Reference in New Issue