- Ensure that (operation) legalization emits proper FDIV libcall when needed.
- Fix various bugs encountered during llvm-spu-gcc build, along with various
  cleanups.
- Start supporting double precision comparisons for remaining libgcc2 build.
  Discovered interesting DAGCombiner feature, which is currently solved via
  custom lowering (64-bit constants are not legal on CellSPU, but DAGCombiner
  insists on inserting one anyway.)
- Update README.

llvm-svn: 62664
This commit is contained in:
Scott Michel 2009-01-21 04:58:48 +00:00
parent a70798cc9a
commit ed7d79fce4
10 changed files with 320 additions and 133 deletions

View File

@ -3294,6 +3294,10 @@ SDValue SelectionDAGLegalize::LegalizeOp(SDValue Op) {
LC = GetFPLibCall(VT, RTLIB::POW_F32, RTLIB::POW_F64, RTLIB::POW_F80,
RTLIB::POW_PPCF128);
break;
case ISD::FDIV:
LC = GetFPLibCall(VT, RTLIB::DIV_F32, RTLIB::DIV_F64, RTLIB::DIV_F80,
RTLIB::DIV_PPCF128);
break;
default: break;
}
if (LC != RTLIB::UNKNOWN_LIBCALL) {

View File

@ -8,7 +8,7 @@ Department in The Aerospace Corporation:
- Mark Thomas (floating point instructions)
- Michael AuYeung (intrinsics)
- Chandler Carruth (LLVM expertise)
- Nehal Desai (debugging, RoadRunner SPU expertise)
- Nehal Desai (debugging, i32 operations, RoadRunner SPU expertise)
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
IMPLIED, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF
@ -36,7 +36,7 @@ to add 'spu' to configure's --enable-targets option, e.g.:
TODO:
* Create a machine pass for performing dual-pipeline scheduling specifically
for CellSPU, handle inserting branch prediction instructions.
for CellSPU, and insert branch prediction instructions as needed.
* i32 instructions:
@ -48,20 +48,43 @@ TODO:
* sign and zero extension: done
* addition: done
* subtraction: needed
* multiplication: work-in-progress
* multiplication: done
* i128 support:
* zero extension: done
* zero extension, any extension: done
* sign extension: needed
* arithmetic operators (add, sub, mul, div): needed
* logical operations (and, or, shl, srl, sra, xor, nor, nand): needed
* Double floating point support
* or: done
This was started. "What's missing?" to be filled in.
* f64 support
* Comparison operators:
SETOEQ unimplemented
SETOGT unimplemented
SETOGE unimplemented
SETOLT unimplemented
SETOLE unimplemented
SETONE unimplemented
SETO done (lowered)
SETUO done (lowered)
SETUEQ unimplemented
SETUGT unimplemented
SETUGE unimplemented
SETULT unimplemented
SETULE unimplemented
SETUNE unimplemented
* LLVM vector suport
* VSETCC needs to be implemented. It's pretty straightforward to code, but
needs implementation.
* Intrinsics
Lots of progress. "What's missing/incomplete?" to be filled in.
* spu.h instrinsics added but not tested. Need to have an operational
llvm-spu-gcc in order to write a unit test harness.
===-------------------------------------------------------------------------===

View File

@ -2,7 +2,6 @@
//
// Cell SPU 128-bit operations
//
// Primary author: Scott Michel (scottm@aero.org)
//===----------------------------------------------------------------------===//
// zext 32->128: Zero extend 32-bit to 128-bit
@ -20,3 +19,23 @@ def : Pat<(i128 (zext R16C:$rSrc)),
// zext 8->128: Zero extend 8-bit to 128-bit
def : Pat<(i128 (zext R8C:$rSrc)),
(ROTQMBYIr128_zext_r32 (ANDIi8i32 R8C:$rSrc, 0xf), 12)>;
// anyext 32->128: Zero extend 32-bit to 128-bit
def : Pat<(i128 (anyext R32C:$rSrc)),
(ROTQMBYIr128_zext_r32 R32C:$rSrc, 12)>;
// anyext 64->128: Zero extend 64-bit to 128-bit
def : Pat<(i128 (anyext R64C:$rSrc)),
(ROTQMBYIr128_zext_r64 R64C:$rSrc, 8)>;
// anyext 16->128: Zero extend 16-bit to 128-bit
def : Pat<(i128 (anyext R16C:$rSrc)),
(ROTQMBYIr128_zext_r32 (ANDi16i32 R16C:$rSrc, (ILAr32 0xffff)), 12)>;
// anyext 8->128: Zero extend 8-bit to 128-bit
def : Pat<(i128 (anyext R8C:$rSrc)),
(ROTQMBYIr128_zext_r32 (ANDIi8i32 R8C:$rSrc, 0xf), 12)>;
// Shift left
def : Pat<(shl GPRC:$rA, R32C:$rB),
(SHLQBYBIr128 (SHLQBIr128 GPRC:$rA, R32C:$rB), R32C:$rB)>;

View File

@ -33,6 +33,13 @@ def SELBr64_cond:
SELBInst<(outs R64C:$rT), (ins R64C:$rA, R64C:$rB, VECREG:$rC),
[/* no pattern */]>;
// The generic i64 select pattern, which assumes that the comparison result
// is in a 32-bit register that contains a select mask pattern (i.e., gather
// bits result):
def : Pat<(select R32C:$rCond, R64C:$rFalse, R64C:$rTrue),
(SELBr64_cond R64C:$rTrue, R64C:$rFalse, (FSMr32 R32C:$rCond))>;
// select the negative condition:
class I64SELECTNegCond<PatFrag cond, CodeFrag compare>:
Pat<(select (i32 (cond R64C:$rA, R64C:$rB)), R64C:$rTrue, R64C:$rFalse),
@ -43,13 +50,6 @@ class I64SETCCNegCond<PatFrag cond, CodeFrag compare>:
Pat<(cond R64C:$rA, R64C:$rB),
(XORIr32 compare.Fragment, -1)>;
// The generic i64 select pattern, which assumes that the comparison result
// is in a 32-bit register that contains a select mask pattern (i.e., gather
// bits result):
def : Pat<(select R32C:$rCond, R64C:$rFalse, R64C:$rTrue),
(SELBr64_cond R64C:$rTrue, R64C:$rFalse, (FSMr32 R32C:$rCond))>;
//-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~
// The i64 seteq fragment that does the scalar->vector conversion and
// comparison:
@ -331,8 +331,8 @@ class v2i64_highprod<dag rA, dag rB>:
(MPYHv4i32 v2i64_mul_bhi64<rB>.Fragment,
v2i64_mul_ashlq4<rA>.Fragment),
(Av4i32
(MPYHv4i32 v2i64_mul_ashlq4<rA>.Fragment,
v2i64_mul_bhi64<rB>.Fragment),
(MPYHv4i32 v2i64_mul_ashlq4<rA>.Fragment,
v2i64_mul_bhi64<rB>.Fragment),
(Av4i32
(MPYUv4i32 v2i64_mul_ashlq4<rA>.Fragment,
v2i64_mul_bhi64<rB>.Fragment),
@ -381,3 +381,14 @@ def : Pat<(SPUmul64 (v2i64 VECREG:$rA), (v2i64 VECREG:$rB),
(v4i32 VECREG:$rCGmask)),
v2i64_mul<(v2i64 VECREG:$rA), (v2i64 VECREG:$rB),
(v4i32 VECREG:$rCGmask)>.Fragment>;
//-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~
// f64 comparisons
//-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~
// selb instruction definition for i64. Note that the selection mask is
// a vector, produced by various forms of FSM:
def SELBf64_cond:
SELBInst<(outs R64FP:$rT), (ins R64FP:$rA, R64FP:$rB, R32C:$rC),
[(set R64FP:$rT,
(select R32C:$rC, R64FP:$rB, R64FP:$rA))]>;

View File

@ -685,26 +685,26 @@ SPUDAGToDAGISel::Select(SDValue Op) {
break;
case MVT::i32:
shufMask = CurDAG->getNode(ISD::BUILD_VECTOR, MVT::v4i32,
CurDAG->getConstant(0x80808080, MVT::i32),
CurDAG->getConstant(0x00010203, MVT::i32),
CurDAG->getConstant(0x80808080, MVT::i32),
CurDAG->getConstant(0x08090a0b, MVT::i32));
CurDAG->getConstant(0x80808080, MVT::i32),
CurDAG->getConstant(0x00010203, MVT::i32),
CurDAG->getConstant(0x80808080, MVT::i32),
CurDAG->getConstant(0x08090a0b, MVT::i32));
break;
case MVT::i16:
shufMask = CurDAG->getNode(ISD::BUILD_VECTOR, MVT::v4i32,
CurDAG->getConstant(0x80808080, MVT::i32),
CurDAG->getConstant(0x80800203, MVT::i32),
CurDAG->getConstant(0x80808080, MVT::i32),
CurDAG->getConstant(0x80800a0b, MVT::i32));
CurDAG->getConstant(0x80808080, MVT::i32),
CurDAG->getConstant(0x80800203, MVT::i32),
CurDAG->getConstant(0x80808080, MVT::i32),
CurDAG->getConstant(0x80800a0b, MVT::i32));
break;
case MVT::i8:
shufMask = CurDAG->getNode(ISD::BUILD_VECTOR, MVT::v4i32,
CurDAG->getConstant(0x80808080, MVT::i32),
CurDAG->getConstant(0x80808003, MVT::i32),
CurDAG->getConstant(0x80808080, MVT::i32),
CurDAG->getConstant(0x8080800b, MVT::i32));
CurDAG->getConstant(0x80808080, MVT::i32),
CurDAG->getConstant(0x80808003, MVT::i32),
CurDAG->getConstant(0x80808080, MVT::i32),
CurDAG->getConstant(0x8080800b, MVT::i32));
break;
}
@ -714,9 +714,9 @@ SPUDAGToDAGISel::Select(SDValue Op) {
SDValue zextShuffle =
CurDAG->getNode(SPUISD::SHUFB, OpVecVT,
SDValue(PromoteScalar, 0),
SDValue(PromoteScalar, 0),
SDValue(shufMaskLoad, 0));
SDValue(PromoteScalar, 0),
SDValue(PromoteScalar, 0),
SDValue(shufMaskLoad, 0));
// N.B.: BIT_CONVERT replaces and updates the zextShuffle node, so we
// re-use it in the VEC2PREFSLOT selection without needing to explicitly
@ -742,6 +742,27 @@ SPUDAGToDAGISel::Select(SDValue Op) {
SDNode *CGLoad =
emitBuildVector(SPU::getCarryGenerateShufMask(*CurDAG));
return SelectCode(CurDAG->getNode(SPUISD::MUL64_MARKER, OpVT,
Op.getOperand(0), Op.getOperand(1),
SDValue(CGLoad, 0)));
} else if (Opc == ISD::ADD && (OpVT == MVT::i64 || OpVT == MVT::v2i64)) {
SDNode *CGLoad =
emitBuildVector(SPU::getCarryGenerateShufMask(*CurDAG));
return SelectCode(CurDAG->getNode(SPUISD::ADD64_MARKER, OpVT,
Op.getOperand(0), Op.getOperand(1),
SDValue(CGLoad, 0)));
} else if (Opc == ISD::SUB && (OpVT == MVT::i64 || OpVT == MVT::v2i64)) {
SDNode *CGLoad =
emitBuildVector(SPU::getBorrowGenerateShufMask(*CurDAG));
return SelectCode(CurDAG->getNode(SPUISD::SUB64_MARKER, OpVT,
Op.getOperand(0), Op.getOperand(1),
SDValue(CGLoad, 0)));
} else if (Opc == ISD::MUL && (OpVT == MVT::i64 || OpVT == MVT::v2i64)) {
SDNode *CGLoad =
emitBuildVector(SPU::getCarryGenerateShufMask(*CurDAG));
return SelectCode(CurDAG->getNode(SPUISD::MUL64_MARKER, OpVT,
Op.getOperand(0), Op.getOperand(1),
SDValue(CGLoad, 0)));

View File

@ -92,6 +92,9 @@ SPUTargetLowering::SPUTargetLowering(SPUTargetMachine &TM)
setUseUnderscoreSetJmp(true);
setUseUnderscoreLongJmp(true);
// Set RTLIB libcall names as used by SPU:
setLibcallName(RTLIB::DIV_F64, "__fast_divdf3");
// Set up the SPU's register classes:
addRegisterClass(MVT::i8, SPU::R8CRegisterClass);
addRegisterClass(MVT::i16, SPU::R16CRegisterClass);
@ -183,6 +186,9 @@ SPUTargetLowering::SPUTargetLowering(SPUTargetMachine &TM)
setOperationAction(ISD::FCOPYSIGN, MVT::f64, Expand);
setOperationAction(ISD::FCOPYSIGN, MVT::f32, Expand);
// Make sure that DAGCombine doesn't insert illegal 64-bit constants
setOperationAction(ISD::FABS, MVT::f64, Custom);
// SPU can do rotate right and left, so legalize it... but customize for i8
// because instructions don't exist.
@ -243,6 +249,7 @@ SPUTargetLowering::SPUTargetLowering(SPUTargetMachine &TM)
setOperationAction(ISD::SETCC, MVT::i16, Legal);
setOperationAction(ISD::SETCC, MVT::i32, Legal);
setOperationAction(ISD::SETCC, MVT::i64, Legal);
setOperationAction(ISD::SETCC, MVT::f64, Custom);
// Custom lower i128 -> i64 truncates
setOperationAction(ISD::TRUNCATE, MVT::i64, Custom);
@ -410,6 +417,9 @@ SPUTargetLowering::getTargetNodeName(unsigned Opcode) const
node_names[(unsigned) SPUISD::VEC_SRA] = "SPUISD::VEC_SRA";
node_names[(unsigned) SPUISD::VEC_ROTL] = "SPUISD::VEC_ROTL";
node_names[(unsigned) SPUISD::VEC_ROTR] = "SPUISD::VEC_ROTR";
node_names[(unsigned) SPUISD::ROTBYTES_LEFT] = "SPUISD::ROTBYTES_LEFT";
node_names[(unsigned) SPUISD::ROTBYTES_LEFT_BITS] =
"SPUISD::ROTBYTES_LEFT_BITS";
node_names[(unsigned) SPUISD::SELECT_MASK] = "SPUISD::SELECT_MASK";
node_names[(unsigned) SPUISD::SELB] = "SPUISD::SELB";
node_names[(unsigned) SPUISD::ADD64_MARKER] = "SPUISD::ADD64_MARKER";
@ -1552,12 +1562,9 @@ static bool isConstantSplat(const uint64_t Bits128[2],
return false; // Can't be a splat if two pieces don't match.
}
// If this is a case we can't handle, return null and let the default
// expansion code take care of it. If we CAN select this case, and if it
// selects to a single instruction, return Op. Otherwise, if we can codegen
// this case more efficiently than a constant pool load, lower it to the
// sequence of ops that should be used.
static SDValue LowerBUILD_VECTOR(SDValue Op, SelectionDAG &DAG) {
//! Lower a BUILD_VECTOR instruction creatively:
SDValue
SPU::LowerBUILD_VECTOR(SDValue Op, SelectionDAG &DAG) {
MVT VT = Op.getValueType();
// If this is a vector of constants or undefs, get the bits. A bit in
// UndefBits is set if the corresponding element of the vector is an
@ -1575,6 +1582,11 @@ static SDValue LowerBUILD_VECTOR(SDValue Op, SelectionDAG &DAG) {
switch (VT.getSimpleVT()) {
default:
cerr << "CellSPU: Unhandled VT in LowerBUILD_VECTOR, VT = "
<< VT.getMVTString()
<< "\n";
abort();
/*NOTREACHED*/
case MVT::v4f32: {
uint32_t Value32 = SplatBits;
assert(SplatSize == 4
@ -2188,32 +2200,32 @@ static SDValue LowerI8Math(SDValue Op, SelectionDAG &DAG, unsigned Opc,
//! Generate the carry-generate shuffle mask.
SDValue SPU::getCarryGenerateShufMask(SelectionDAG &DAG) {
SmallVector<SDValue, 16> ShufBytes;
SmallVector<SDValue, 16 > ShufBytes;
// Create the shuffle mask for "rotating" the borrow up one register slot
// once the borrow is generated.
ShufBytes.push_back(DAG.getConstant(0x04050607, MVT::i32));
ShufBytes.push_back(DAG.getConstant(0x80808080, MVT::i32));
ShufBytes.push_back(DAG.getConstant(0x0c0d0e0f, MVT::i32));
ShufBytes.push_back(DAG.getConstant(0x80808080, MVT::i32));
// Create the shuffle mask for "rotating" the borrow up one register slot
// once the borrow is generated.
ShufBytes.push_back(DAG.getConstant(0x04050607, MVT::i32));
ShufBytes.push_back(DAG.getConstant(0x80808080, MVT::i32));
ShufBytes.push_back(DAG.getConstant(0x0c0d0e0f, MVT::i32));
ShufBytes.push_back(DAG.getConstant(0x80808080, MVT::i32));
return DAG.getNode(ISD::BUILD_VECTOR, MVT::v4i32,
&ShufBytes[0], ShufBytes.size());
return DAG.getNode(ISD::BUILD_VECTOR, MVT::v4i32,
&ShufBytes[0], ShufBytes.size());
}
//! Generate the borrow-generate shuffle mask
SDValue SPU::getBorrowGenerateShufMask(SelectionDAG &DAG) {
SmallVector<SDValue, 16> ShufBytes;
SmallVector<SDValue, 16 > ShufBytes;
// Create the shuffle mask for "rotating" the borrow up one register slot
// once the borrow is generated.
ShufBytes.push_back(DAG.getConstant(0x04050607, MVT::i32));
ShufBytes.push_back(DAG.getConstant(0xc0c0c0c0, MVT::i32));
ShufBytes.push_back(DAG.getConstant(0x0c0d0e0f, MVT::i32));
ShufBytes.push_back(DAG.getConstant(0xc0c0c0c0, MVT::i32));
// Create the shuffle mask for "rotating" the borrow up one register slot
// once the borrow is generated.
ShufBytes.push_back(DAG.getConstant(0x04050607, MVT::i32));
ShufBytes.push_back(DAG.getConstant(0xc0c0c0c0, MVT::i32));
ShufBytes.push_back(DAG.getConstant(0x0c0d0e0f, MVT::i32));
ShufBytes.push_back(DAG.getConstant(0xc0c0c0c0, MVT::i32));
return DAG.getNode(ISD::BUILD_VECTOR, MVT::v4i32,
&ShufBytes[0], ShufBytes.size());
return DAG.getNode(ISD::BUILD_VECTOR, MVT::v4i32,
&ShufBytes[0], ShufBytes.size());
}
//! Lower byte immediate operations for v16i8 vectors:
@ -2372,6 +2384,83 @@ static SDValue LowerCTPOP(SDValue Op, SelectionDAG &DAG) {
return SDValue();
}
//! Lower ISD::FABS
/*!
DAGCombine does the same basic reduction: convert the double to i64 and mask
off the sign bit. Unfortunately, DAGCombine inserts the i64 constant, which
CellSPU has to legalize. Hence, the custom lowering.
*/
static SDValue LowerFABS(SDValue Op, SelectionDAG &DAG) {
MVT OpVT = Op.getValueType();
MVT IntVT(MVT::i64);
SDValue Op0 = Op.getOperand(0);
assert(OpVT == MVT::f64 && "LowerFABS: expecting MVT::f64!\n");
SDValue iABS =
DAG.getNode(ISD::AND, IntVT,
DAG.getNode(ISD::BIT_CONVERT, IntVT, Op0),
DAG.getConstant(~IntVT.getIntegerVTSignBit(), IntVT));
return DAG.getNode(ISD::BIT_CONVERT, MVT::f64, iABS);
}
//! Lower ISD::SETCC
/*!
This handles MVT::f64 (double floating point) condition lowering
*/
static SDValue LowerSETCC(SDValue Op, SelectionDAG &DAG,
const TargetLowering &TLI) {
SDValue lhs = Op.getOperand(0);
SDValue rhs = Op.getOperand(1);
CondCodeSDNode *CC = dyn_cast<CondCodeSDNode > (Op.getOperand(2));
MVT lhsVT = lhs.getValueType();
SDValue posNaN = DAG.getConstant(0x7ff0000000000001ULL, MVT::i64);
assert(CC != 0 && "LowerSETCC: CondCodeSDNode should not be null here!\n");
assert(lhsVT == MVT::f64 && "LowerSETCC: type other than MVT::64\n");
switch (CC->get()) {
case ISD::SETOEQ:
case ISD::SETOGT:
case ISD::SETOGE:
case ISD::SETOLT:
case ISD::SETOLE:
case ISD::SETONE:
cerr << "CellSPU ISel Select: unimplemented f64 condition\n";
abort();
break;
case ISD::SETO: {
SDValue lhsfabs = DAG.getNode(ISD::FABS, MVT::f64, lhs);
SDValue i64lhs =
DAG.getNode(ISD::BIT_CONVERT, MVT::i64, lhsfabs);
return DAG.getSetCC(MVT::i32, i64lhs, posNaN, ISD::SETLT);
}
case ISD::SETUO: {
SDValue lhsfabs = DAG.getNode(ISD::FABS, MVT::f64, lhs);
SDValue i64lhs =
DAG.getNode(ISD::BIT_CONVERT, MVT::i64, lhsfabs);
return DAG.getSetCC(MVT::i32, i64lhs, posNaN, ISD::SETGE);
}
case ISD::SETUEQ:
case ISD::SETUGT:
case ISD::SETUGE:
case ISD::SETULT:
case ISD::SETULE:
case ISD::SETUNE:
default:
cerr << "CellSPU ISel Select: unimplemented f64 condition\n";
abort();
break;
}
return SDValue();
}
//! Lower ISD::SELECT_CC
/*!
ISD::SELECT_CC can (generally) be implemented directly on the SPU using the
@ -2501,9 +2590,12 @@ SPUTargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG)
break;
}
case ISD::FABS:
return LowerFABS(Op, DAG);
// Vector-related lowering.
case ISD::BUILD_VECTOR:
return LowerBUILD_VECTOR(Op, DAG);
return SPU::LowerBUILD_VECTOR(Op, DAG);
case ISD::SCALAR_TO_VECTOR:
return LowerSCALAR_TO_VECTOR(Op, DAG);
case ISD::VECTOR_SHUFFLE:
@ -2530,6 +2622,9 @@ SPUTargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG)
case ISD::SELECT_CC:
return LowerSELECT_CC(Op, DAG, *this);
case ISD::SETCC:
return LowerSETCC(Op, DAG, *this);
case ISD::TRUNCATE:
return LowerTRUNCATE(Op, DAG);
}
@ -2656,8 +2751,8 @@ SPUTargetLowering::PerformDAGCombine(SDNode *N, DAGCombinerInfo &DCI) const
}
case SPUISD::IndirectAddr: {
if (!ST->usingLargeMem() && Op0.getOpcode() == SPUISD::AFormAddr) {
ConstantSDNode *CN = cast<ConstantSDNode>(N->getOperand(1));
if (CN->getZExtValue() == 0) {
ConstantSDNode *CN = dyn_cast<ConstantSDNode>(N->getOperand(1));
if (CN != 0 && CN->getZExtValue() == 0) {
// (SPUindirect (SPUaform <addr>, 0), 0) ->
// (SPUaform <addr>, 0)
@ -2736,7 +2831,7 @@ SPUTargetLowering::PerformDAGCombine(SDNode *N, DAGCombinerInfo &DCI) const
break;
}
}
// Otherwise, return unchanged.
#ifndef NDEBUG
if (Result.getNode()) {
@ -2809,41 +2904,18 @@ SPUTargetLowering::computeMaskedBitsForTargetNode(const SDValue Op,
unsigned Depth ) const {
#if 0
const uint64_t uint64_sizebits = sizeof(uint64_t) * 8;
#endif
switch (Op.getOpcode()) {
default:
// KnownZero = KnownOne = APInt(Mask.getBitWidth(), 0);
break;
#if 0
case CALL:
case SHUFB:
case SHUFFLE_MASK:
case CNTB:
#endif
case SPUISD::PREFSLOT2VEC: {
SDValue Op0 = Op.getOperand(0);
MVT Op0VT = Op0.getValueType();
unsigned Op0VTBits = Op0VT.getSizeInBits();
uint64_t InMask = Op0VT.getIntegerVTBitMask();
KnownZero |= APInt(Op0VTBits, ~InMask, false);
KnownOne |= APInt(Op0VTBits, InMask, false);
break;
}
case SPUISD::PREFSLOT2VEC:
case SPUISD::LDRESULT:
case SPUISD::VEC2PREFSLOT: {
MVT OpVT = Op.getValueType();
unsigned OpVTBits = OpVT.getSizeInBits();
uint64_t InMask = OpVT.getIntegerVTBitMask();
KnownZero |= APInt(OpVTBits, ~InMask, false);
KnownOne |= APInt(OpVTBits, InMask, false);
break;
}
#if 0
case SPUISD::VEC2PREFSLOT:
case SPUISD::SHLQUAD_L_BITS:
case SPUISD::SHLQUAD_L_BYTES:
case SPUISD::VEC_SHL:
@ -2854,8 +2926,8 @@ SPUTargetLowering::computeMaskedBitsForTargetNode(const SDValue Op,
case SPUISD::ROTBYTES_LEFT:
case SPUISD::SELECT_MASK:
case SPUISD::SELB:
#endif
}
#endif
}
unsigned

View File

@ -61,7 +61,7 @@ namespace llvm {
};
}
/// Predicates that are used for node matching:
//! Utility functions specific to CellSPU-only:
namespace SPU {
SDValue get_vec_u18imm(SDNode *N, SelectionDAG &DAG,
MVT ValueType);
@ -78,6 +78,7 @@ namespace llvm {
SDValue LowerConstantPool(SDValue Op, SelectionDAG &DAG,
const SPUTargetMachine &TM);
SDValue LowerBUILD_VECTOR(SDValue Op, SelectionDAG &DAG);
SDValue getBorrowGenerateShufMask(SelectionDAG &DAG);
SDValue getCarryGenerateShufMask(SelectionDAG &DAG);

View File

@ -134,6 +134,7 @@ SPUInstrInfo::isMoveInstr(const MachineInstr& MI,
case SPU::ORi64_v2i64:
case SPU::ORf32_v4f32:
case SPU::ORf64_v2f64:
/*
case SPU::ORi128_r64:
case SPU::ORi128_f64:
case SPU::ORi128_r32:
@ -148,6 +149,8 @@ SPUInstrInfo::isMoveInstr(const MachineInstr& MI,
case SPU::ORr16_i128:
case SPU::ORr8_i128:
case SPU::ORvec_i128:
*/
/*
case SPU::ORr16_r32:
case SPU::ORr8_r32:
case SPU::ORr32_r16:
@ -158,7 +161,11 @@ SPUInstrInfo::isMoveInstr(const MachineInstr& MI,
case SPU::ORr64_r32:
case SPU::ORr64_r16:
case SPU::ORr64_r8:
{
*/
case SPU::ORf32_r32:
case SPU::ORr32_f32:
case SPU::ORf64_r64:
case SPU::ORr64_f64: {
assert(MI.getNumOperands() == 2 &&
MI.getOperand(0).isReg() &&
MI.getOperand(1).isReg() &&

View File

@ -1259,9 +1259,6 @@ multiclass BitwiseAnd
def fabs32: ANDInst<(outs R32FP:$rT), (ins R32FP:$rA, R32C:$rB),
[/* Intentionally does not match a pattern */]>;
def fabs64: ANDInst<(outs R64FP:$rT), (ins R64FP:$rA, VECREG:$rB),
[/* Intentionally does not match a pattern */]>;
// Could use v4i32, but won't for clarity
def fabsvec: ANDInst<(outs VECREG:$rT), (ins VECREG:$rA, VECREG:$rB),
[/* Intentionally does not match a pattern */]>;
@ -1408,12 +1405,12 @@ class ORRegInst<RegisterClass rclass>:
// These are effectively no-ops, but need to exist for proper type conversion
// and type coercion.
class ORCvtForm<dag OOL, dag IOL>
class ORCvtForm<dag OOL, dag IOL, list<dag> pattern = [/* no pattern */]>
: SPUInstr<OOL, IOL, "or\t$rT, $rA, $rA", IntegerOp> {
bits<7> RA;
bits<7> RT;
let Pattern = [/* no pattern */];
let Pattern = pattern;
let Inst{0-10} = 0b10000010000;
let Inst{11-17} = RA;
@ -1427,29 +1424,29 @@ class ORPromoteScalar<RegisterClass rclass>:
class ORExtractElt<RegisterClass rclass>:
ORCvtForm<(outs rclass:$rT), (ins VECREG:$rA)>;
class ORCvtRegGPRC<RegisterClass rclass>:
ORCvtForm<(outs GPRC:$rT), (ins rclass:$rA)>;
/* class ORCvtRegGPRC<RegisterClass rclass>:
ORCvtForm<(outs GPRC:$rT), (ins rclass:$rA)>; */
class ORCvtVecGPRC:
ORCvtForm<(outs GPRC:$rT), (ins VECREG:$rA)>;
/* class ORCvtVecGPRC:
ORCvtForm<(outs GPRC:$rT), (ins VECREG:$rA)>; */
class ORCvtGPRCReg<RegisterClass rclass>:
ORCvtForm<(outs rclass:$rT), (ins GPRC:$rA)>;
/* class ORCvtGPRCReg<RegisterClass rclass>:
ORCvtForm<(outs rclass:$rT), (ins GPRC:$rA)>; */
class ORCvtFormR32Reg<RegisterClass rclass>:
ORCvtForm<(outs rclass:$rT), (ins R32C:$rA)>;
class ORCvtFormR32Reg<RegisterClass rclass, list<dag> pattern = [ ]>:
ORCvtForm<(outs rclass:$rT), (ins R32C:$rA), pattern>;
class ORCvtFormRegR32<RegisterClass rclass>:
ORCvtForm<(outs R32C:$rT), (ins rclass:$rA)>;
class ORCvtFormRegR32<RegisterClass rclass, list<dag> pattern = [ ]>:
ORCvtForm<(outs R32C:$rT), (ins rclass:$rA), pattern>;
class ORCvtFormR64Reg<RegisterClass rclass>:
ORCvtForm<(outs rclass:$rT), (ins R64C:$rA)>;
class ORCvtFormR64Reg<RegisterClass rclass, list<dag> pattern = [ ]>:
ORCvtForm<(outs rclass:$rT), (ins R64C:$rA), pattern>;
class ORCvtFormRegR64<RegisterClass rclass>:
ORCvtForm<(outs R64C:$rT), (ins rclass:$rA)>;
class ORCvtFormRegR64<RegisterClass rclass, list<dag> pattern = [ ]>:
ORCvtForm<(outs R64C:$rT), (ins rclass:$rA), pattern>;
class ORCvtGPRCVec:
ORCvtForm<(outs VECREG:$rT), (ins GPRC:$rA)>;
/* class ORCvtGPRCVec:
ORCvtForm<(outs VECREG:$rT), (ins GPRC:$rA)>; */
multiclass BitwiseOr
{
@ -1468,10 +1465,11 @@ multiclass BitwiseOr
(v2f64 (bitconvert (or (v2i64 VECREG:$rA),
(v2i64 VECREG:$rB)))))]>;
def r64: ORRegInst<R64C>;
def r32: ORRegInst<R32C>;
def r16: ORRegInst<R16C>;
def r8: ORRegInst<R8C>;
def r128: ORRegInst<GPRC>;
def r64: ORRegInst<R64C>;
def r32: ORRegInst<R32C>;
def r16: ORRegInst<R16C>;
def r8: ORRegInst<R8C>;
// OR instructions used to copy f32 and f64 registers.
def f32: ORInst<(outs R32FP:$rT), (ins R32FP:$rA, R32FP:$rB),
@ -1496,6 +1494,7 @@ multiclass BitwiseOr
def f32_v4f32: ORExtractElt<R32FP>;
def f64_v2f64: ORExtractElt<R64FP>;
/*
// Conversion from GPRC to register
def i128_r64: ORCvtRegGPRC<R64C>;
def i128_f64: ORCvtRegGPRC<R64FP>;
@ -1517,7 +1516,8 @@ multiclass BitwiseOr
// Conversion from vector to GPRC
def vec_i128: ORCvtGPRCVec;
*/
/*
// Conversion from register to R32C:
def r16_r32: ORCvtFormRegR32<R16C>;
def r8_r32: ORCvtFormRegR32<R8C>;
@ -1535,6 +1535,18 @@ multiclass BitwiseOr
def r64_r32: ORCvtFormRegR64<R32C>;
def r64_r16: ORCvtFormRegR64<R16C>;
def r64_r8: ORCvtFormRegR64<R8C>;
*/
// bitconvert patterns:
def r32_f32: ORCvtFormR32Reg<R32FP,
[(set R32FP:$rT, (bitconvert R32C:$rA))]>;
def f32_r32: ORCvtFormRegR32<R32FP,
[(set R32C:$rT, (bitconvert R32FP:$rA))]>;
def r64_f64: ORCvtFormR64Reg<R64FP,
[(set R64FP:$rT, (bitconvert R64C:$rA))]>;
def f64_r64: ORCvtFormRegR64<R64FP,
[(set R64C:$rT, (bitconvert R64FP:$rA))]>;
}
defm OR : BitwiseOr;
@ -1960,7 +1972,7 @@ multiclass SelectBits
(v4f32 VECREG:$rB),
(v4f32 VECREG:$rA)))]>;
// SELBr64_cond is defined further down, look for i64 comparisons
// SELBr64_cond is defined in SPU64InstrInfo.td
def r32_cond: SELBRegCondInst<R32C, R32C>;
def f32_cond: SELBRegCondInst<R32C, R32FP>;
def r16_cond: SELBRegCondInst<R16C, R16C>;
@ -2146,14 +2158,6 @@ class SHLHVecInst<ValueType vectype>:
[(set (vectype VECREG:$rT),
(SPUvec_shl (vectype VECREG:$rA), R16C:$rB))]>;
// $rB gets promoted to 32-bit register type when confronted with
// this llvm assembly code:
//
// define i16 @shlh_i16_1(i16 %arg1, i16 %arg2) {
// %A = shl i16 %arg1, %arg2
// ret i16 %A
// }
multiclass ShiftLeftHalfword
{
def v8i16: SHLHVecInst<v8i16>;
@ -2250,6 +2254,10 @@ class SHLQBIVecInst<ValueType vectype>:
[(set (vectype VECREG:$rT),
(SPUshlquad_l_bits (vectype VECREG:$rA), R32C:$rB))]>;
class SHLQBIRegInst<RegisterClass rclass>:
SHLQBIInst<(outs rclass:$rT), (ins rclass:$rA, R32C:$rB),
[/* no pattern */]>;
multiclass ShiftLeftQuadByBits
{
def v16i8: SHLQBIVecInst<v16i8>;
@ -2258,6 +2266,8 @@ multiclass ShiftLeftQuadByBits
def v4f32: SHLQBIVecInst<v4f32>;
def v2i64: SHLQBIVecInst<v2i64>;
def v2f64: SHLQBIVecInst<v2f64>;
def r128: SHLQBIRegInst<GPRC>;
}
defm SHLQBI : ShiftLeftQuadByBits;
@ -2335,6 +2345,32 @@ multiclass ShiftLeftQuadBytesImm
defm SHLQBYI : ShiftLeftQuadBytesImm;
class SHLQBYBIInst<dag OOL, dag IOL, list<dag> pattern>:
RRForm<0b00111001111, OOL, IOL, "shlqbybi\t$rT, $rA, $rB",
RotateShift, pattern>;
class SHLQBYBIVecInst<ValueType vectype>:
SHLQBYBIInst<(outs VECREG:$rT), (ins VECREG:$rA, R32C:$rB),
[/* no pattern */]>;
class SHLQBYBIRegInst<RegisterClass rclass>:
SHLQBYBIInst<(outs rclass:$rT), (ins rclass:$rA, R32C:$rB),
[/* no pattern */]>;
multiclass ShiftLeftQuadBytesBitCount
{
def v16i8: SHLQBYBIVecInst<v16i8>;
def v8i16: SHLQBYBIVecInst<v8i16>;
def v4i32: SHLQBYBIVecInst<v4i32>;
def v4f32: SHLQBYBIVecInst<v4f32>;
def v2i64: SHLQBYBIVecInst<v2i64>;
def v2f64: SHLQBYBIVecInst<v2f64>;
def r128: SHLQBYBIRegInst<GPRC>;
}
defm SHLQBYBI : ShiftLeftQuadBytesBitCount;
//-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~
// Rotate halfword:
//-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~
@ -4285,13 +4321,6 @@ def : Pat<(fabs (v4f32 VECREG:$rA)),
(ANDfabsvec (v4f32 VECREG:$rA),
(v4f32 (ANDBIv16i8 (FSMBIv16i8 0xffff), 0x7f)))>;
def : Pat<(fabs R64FP:$rA),
(ANDfabs64 R64FP:$rA, (ANDBIv16i8 (FSMBIv16i8 0xffff), 0x7f))>;
def : Pat<(fabs (v2f64 VECREG:$rA)),
(ANDfabsvec (v2f64 VECREG:$rA),
(v2f64 (ANDBIv16i8 (FSMBIv16i8 0xffff), 0x7f)))>;
//===----------------------------------------------------------------------===//
// Hint for branch instructions:
//===----------------------------------------------------------------------===//

View File

@ -1,9 +1,9 @@
; RUN: llvm-as -o - %s | llc -march=cellspu > %t1.s
; RUN: grep fsmbi %t1.s | count 3
; RUN: grep fsmbi %t1.s | count 2
; RUN: grep 32768 %t1.s | count 2
; RUN: grep xor %t1.s | count 4
; RUN: grep and %t1.s | count 5
; RUN: grep andbi %t1.s | count 3
; RUN: grep and %t1.s | count 4
; RUN: grep andbi %t1.s | count 2
target datalayout = "E-p:32:32:128-f64:64:128-f32:32:128-i64:32:128-i32:32:128-i16:16:128-i8:8:128-i1:8:128-a0:0:128-v128:128:128-s0:128:128"
target triple = "spu"