forked from OSchip/llvm-project
CellSPU:
- Ensure that (operation) legalization emits proper FDIV libcall when needed. - Fix various bugs encountered during llvm-spu-gcc build, along with various cleanups. - Start supporting double precision comparisons for remaining libgcc2 build. Discovered interesting DAGCombiner feature, which is currently solved via custom lowering (64-bit constants are not legal on CellSPU, but DAGCombiner insists on inserting one anyway.) - Update README. llvm-svn: 62664
This commit is contained in:
parent
a70798cc9a
commit
ed7d79fce4
|
@ -3294,6 +3294,10 @@ SDValue SelectionDAGLegalize::LegalizeOp(SDValue Op) {
|
|||
LC = GetFPLibCall(VT, RTLIB::POW_F32, RTLIB::POW_F64, RTLIB::POW_F80,
|
||||
RTLIB::POW_PPCF128);
|
||||
break;
|
||||
case ISD::FDIV:
|
||||
LC = GetFPLibCall(VT, RTLIB::DIV_F32, RTLIB::DIV_F64, RTLIB::DIV_F80,
|
||||
RTLIB::DIV_PPCF128);
|
||||
break;
|
||||
default: break;
|
||||
}
|
||||
if (LC != RTLIB::UNKNOWN_LIBCALL) {
|
||||
|
|
|
@ -8,7 +8,7 @@ Department in The Aerospace Corporation:
|
|||
- Mark Thomas (floating point instructions)
|
||||
- Michael AuYeung (intrinsics)
|
||||
- Chandler Carruth (LLVM expertise)
|
||||
- Nehal Desai (debugging, RoadRunner SPU expertise)
|
||||
- Nehal Desai (debugging, i32 operations, RoadRunner SPU expertise)
|
||||
|
||||
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
IMPLIED, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF
|
||||
|
@ -36,7 +36,7 @@ to add 'spu' to configure's --enable-targets option, e.g.:
|
|||
|
||||
TODO:
|
||||
* Create a machine pass for performing dual-pipeline scheduling specifically
|
||||
for CellSPU, handle inserting branch prediction instructions.
|
||||
for CellSPU, and insert branch prediction instructions as needed.
|
||||
|
||||
* i32 instructions:
|
||||
|
||||
|
@ -48,20 +48,43 @@ TODO:
|
|||
* sign and zero extension: done
|
||||
* addition: done
|
||||
* subtraction: needed
|
||||
* multiplication: work-in-progress
|
||||
* multiplication: done
|
||||
|
||||
* i128 support:
|
||||
|
||||
* zero extension: done
|
||||
* zero extension, any extension: done
|
||||
* sign extension: needed
|
||||
* arithmetic operators (add, sub, mul, div): needed
|
||||
* logical operations (and, or, shl, srl, sra, xor, nor, nand): needed
|
||||
|
||||
* Double floating point support
|
||||
* or: done
|
||||
|
||||
This was started. "What's missing?" to be filled in.
|
||||
* f64 support
|
||||
|
||||
* Comparison operators:
|
||||
SETOEQ unimplemented
|
||||
SETOGT unimplemented
|
||||
SETOGE unimplemented
|
||||
SETOLT unimplemented
|
||||
SETOLE unimplemented
|
||||
SETONE unimplemented
|
||||
SETO done (lowered)
|
||||
SETUO done (lowered)
|
||||
SETUEQ unimplemented
|
||||
SETUGT unimplemented
|
||||
SETUGE unimplemented
|
||||
SETULT unimplemented
|
||||
SETULE unimplemented
|
||||
SETUNE unimplemented
|
||||
|
||||
* LLVM vector suport
|
||||
|
||||
* VSETCC needs to be implemented. It's pretty straightforward to code, but
|
||||
needs implementation.
|
||||
|
||||
* Intrinsics
|
||||
|
||||
Lots of progress. "What's missing/incomplete?" to be filled in.
|
||||
* spu.h instrinsics added but not tested. Need to have an operational
|
||||
llvm-spu-gcc in order to write a unit test harness.
|
||||
|
||||
===-------------------------------------------------------------------------===
|
||||
|
|
|
@ -2,7 +2,6 @@
|
|||
//
|
||||
// Cell SPU 128-bit operations
|
||||
//
|
||||
// Primary author: Scott Michel (scottm@aero.org)
|
||||
//===----------------------------------------------------------------------===//
|
||||
|
||||
// zext 32->128: Zero extend 32-bit to 128-bit
|
||||
|
@ -20,3 +19,23 @@ def : Pat<(i128 (zext R16C:$rSrc)),
|
|||
// zext 8->128: Zero extend 8-bit to 128-bit
|
||||
def : Pat<(i128 (zext R8C:$rSrc)),
|
||||
(ROTQMBYIr128_zext_r32 (ANDIi8i32 R8C:$rSrc, 0xf), 12)>;
|
||||
|
||||
// anyext 32->128: Zero extend 32-bit to 128-bit
|
||||
def : Pat<(i128 (anyext R32C:$rSrc)),
|
||||
(ROTQMBYIr128_zext_r32 R32C:$rSrc, 12)>;
|
||||
|
||||
// anyext 64->128: Zero extend 64-bit to 128-bit
|
||||
def : Pat<(i128 (anyext R64C:$rSrc)),
|
||||
(ROTQMBYIr128_zext_r64 R64C:$rSrc, 8)>;
|
||||
|
||||
// anyext 16->128: Zero extend 16-bit to 128-bit
|
||||
def : Pat<(i128 (anyext R16C:$rSrc)),
|
||||
(ROTQMBYIr128_zext_r32 (ANDi16i32 R16C:$rSrc, (ILAr32 0xffff)), 12)>;
|
||||
|
||||
// anyext 8->128: Zero extend 8-bit to 128-bit
|
||||
def : Pat<(i128 (anyext R8C:$rSrc)),
|
||||
(ROTQMBYIr128_zext_r32 (ANDIi8i32 R8C:$rSrc, 0xf), 12)>;
|
||||
|
||||
// Shift left
|
||||
def : Pat<(shl GPRC:$rA, R32C:$rB),
|
||||
(SHLQBYBIr128 (SHLQBIr128 GPRC:$rA, R32C:$rB), R32C:$rB)>;
|
||||
|
|
|
@ -33,6 +33,13 @@ def SELBr64_cond:
|
|||
SELBInst<(outs R64C:$rT), (ins R64C:$rA, R64C:$rB, VECREG:$rC),
|
||||
[/* no pattern */]>;
|
||||
|
||||
// The generic i64 select pattern, which assumes that the comparison result
|
||||
// is in a 32-bit register that contains a select mask pattern (i.e., gather
|
||||
// bits result):
|
||||
|
||||
def : Pat<(select R32C:$rCond, R64C:$rFalse, R64C:$rTrue),
|
||||
(SELBr64_cond R64C:$rTrue, R64C:$rFalse, (FSMr32 R32C:$rCond))>;
|
||||
|
||||
// select the negative condition:
|
||||
class I64SELECTNegCond<PatFrag cond, CodeFrag compare>:
|
||||
Pat<(select (i32 (cond R64C:$rA, R64C:$rB)), R64C:$rTrue, R64C:$rFalse),
|
||||
|
@ -43,13 +50,6 @@ class I64SETCCNegCond<PatFrag cond, CodeFrag compare>:
|
|||
Pat<(cond R64C:$rA, R64C:$rB),
|
||||
(XORIr32 compare.Fragment, -1)>;
|
||||
|
||||
// The generic i64 select pattern, which assumes that the comparison result
|
||||
// is in a 32-bit register that contains a select mask pattern (i.e., gather
|
||||
// bits result):
|
||||
|
||||
def : Pat<(select R32C:$rCond, R64C:$rFalse, R64C:$rTrue),
|
||||
(SELBr64_cond R64C:$rTrue, R64C:$rFalse, (FSMr32 R32C:$rCond))>;
|
||||
|
||||
//-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~
|
||||
// The i64 seteq fragment that does the scalar->vector conversion and
|
||||
// comparison:
|
||||
|
@ -331,8 +331,8 @@ class v2i64_highprod<dag rA, dag rB>:
|
|||
(MPYHv4i32 v2i64_mul_bhi64<rB>.Fragment,
|
||||
v2i64_mul_ashlq4<rA>.Fragment),
|
||||
(Av4i32
|
||||
(MPYHv4i32 v2i64_mul_ashlq4<rA>.Fragment,
|
||||
v2i64_mul_bhi64<rB>.Fragment),
|
||||
(MPYHv4i32 v2i64_mul_ashlq4<rA>.Fragment,
|
||||
v2i64_mul_bhi64<rB>.Fragment),
|
||||
(Av4i32
|
||||
(MPYUv4i32 v2i64_mul_ashlq4<rA>.Fragment,
|
||||
v2i64_mul_bhi64<rB>.Fragment),
|
||||
|
@ -381,3 +381,14 @@ def : Pat<(SPUmul64 (v2i64 VECREG:$rA), (v2i64 VECREG:$rB),
|
|||
(v4i32 VECREG:$rCGmask)),
|
||||
v2i64_mul<(v2i64 VECREG:$rA), (v2i64 VECREG:$rB),
|
||||
(v4i32 VECREG:$rCGmask)>.Fragment>;
|
||||
|
||||
//-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~
|
||||
// f64 comparisons
|
||||
//-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~
|
||||
|
||||
// selb instruction definition for i64. Note that the selection mask is
|
||||
// a vector, produced by various forms of FSM:
|
||||
def SELBf64_cond:
|
||||
SELBInst<(outs R64FP:$rT), (ins R64FP:$rA, R64FP:$rB, R32C:$rC),
|
||||
[(set R64FP:$rT,
|
||||
(select R32C:$rC, R64FP:$rB, R64FP:$rA))]>;
|
||||
|
|
|
@ -685,26 +685,26 @@ SPUDAGToDAGISel::Select(SDValue Op) {
|
|||
break;
|
||||
case MVT::i32:
|
||||
shufMask = CurDAG->getNode(ISD::BUILD_VECTOR, MVT::v4i32,
|
||||
CurDAG->getConstant(0x80808080, MVT::i32),
|
||||
CurDAG->getConstant(0x00010203, MVT::i32),
|
||||
CurDAG->getConstant(0x80808080, MVT::i32),
|
||||
CurDAG->getConstant(0x08090a0b, MVT::i32));
|
||||
CurDAG->getConstant(0x80808080, MVT::i32),
|
||||
CurDAG->getConstant(0x00010203, MVT::i32),
|
||||
CurDAG->getConstant(0x80808080, MVT::i32),
|
||||
CurDAG->getConstant(0x08090a0b, MVT::i32));
|
||||
break;
|
||||
|
||||
case MVT::i16:
|
||||
shufMask = CurDAG->getNode(ISD::BUILD_VECTOR, MVT::v4i32,
|
||||
CurDAG->getConstant(0x80808080, MVT::i32),
|
||||
CurDAG->getConstant(0x80800203, MVT::i32),
|
||||
CurDAG->getConstant(0x80808080, MVT::i32),
|
||||
CurDAG->getConstant(0x80800a0b, MVT::i32));
|
||||
CurDAG->getConstant(0x80808080, MVT::i32),
|
||||
CurDAG->getConstant(0x80800203, MVT::i32),
|
||||
CurDAG->getConstant(0x80808080, MVT::i32),
|
||||
CurDAG->getConstant(0x80800a0b, MVT::i32));
|
||||
break;
|
||||
|
||||
case MVT::i8:
|
||||
shufMask = CurDAG->getNode(ISD::BUILD_VECTOR, MVT::v4i32,
|
||||
CurDAG->getConstant(0x80808080, MVT::i32),
|
||||
CurDAG->getConstant(0x80808003, MVT::i32),
|
||||
CurDAG->getConstant(0x80808080, MVT::i32),
|
||||
CurDAG->getConstant(0x8080800b, MVT::i32));
|
||||
CurDAG->getConstant(0x80808080, MVT::i32),
|
||||
CurDAG->getConstant(0x80808003, MVT::i32),
|
||||
CurDAG->getConstant(0x80808080, MVT::i32),
|
||||
CurDAG->getConstant(0x8080800b, MVT::i32));
|
||||
break;
|
||||
}
|
||||
|
||||
|
@ -714,9 +714,9 @@ SPUDAGToDAGISel::Select(SDValue Op) {
|
|||
|
||||
SDValue zextShuffle =
|
||||
CurDAG->getNode(SPUISD::SHUFB, OpVecVT,
|
||||
SDValue(PromoteScalar, 0),
|
||||
SDValue(PromoteScalar, 0),
|
||||
SDValue(shufMaskLoad, 0));
|
||||
SDValue(PromoteScalar, 0),
|
||||
SDValue(PromoteScalar, 0),
|
||||
SDValue(shufMaskLoad, 0));
|
||||
|
||||
// N.B.: BIT_CONVERT replaces and updates the zextShuffle node, so we
|
||||
// re-use it in the VEC2PREFSLOT selection without needing to explicitly
|
||||
|
@ -742,6 +742,27 @@ SPUDAGToDAGISel::Select(SDValue Op) {
|
|||
SDNode *CGLoad =
|
||||
emitBuildVector(SPU::getCarryGenerateShufMask(*CurDAG));
|
||||
|
||||
return SelectCode(CurDAG->getNode(SPUISD::MUL64_MARKER, OpVT,
|
||||
Op.getOperand(0), Op.getOperand(1),
|
||||
SDValue(CGLoad, 0)));
|
||||
} else if (Opc == ISD::ADD && (OpVT == MVT::i64 || OpVT == MVT::v2i64)) {
|
||||
SDNode *CGLoad =
|
||||
emitBuildVector(SPU::getCarryGenerateShufMask(*CurDAG));
|
||||
|
||||
return SelectCode(CurDAG->getNode(SPUISD::ADD64_MARKER, OpVT,
|
||||
Op.getOperand(0), Op.getOperand(1),
|
||||
SDValue(CGLoad, 0)));
|
||||
} else if (Opc == ISD::SUB && (OpVT == MVT::i64 || OpVT == MVT::v2i64)) {
|
||||
SDNode *CGLoad =
|
||||
emitBuildVector(SPU::getBorrowGenerateShufMask(*CurDAG));
|
||||
|
||||
return SelectCode(CurDAG->getNode(SPUISD::SUB64_MARKER, OpVT,
|
||||
Op.getOperand(0), Op.getOperand(1),
|
||||
SDValue(CGLoad, 0)));
|
||||
} else if (Opc == ISD::MUL && (OpVT == MVT::i64 || OpVT == MVT::v2i64)) {
|
||||
SDNode *CGLoad =
|
||||
emitBuildVector(SPU::getCarryGenerateShufMask(*CurDAG));
|
||||
|
||||
return SelectCode(CurDAG->getNode(SPUISD::MUL64_MARKER, OpVT,
|
||||
Op.getOperand(0), Op.getOperand(1),
|
||||
SDValue(CGLoad, 0)));
|
||||
|
|
|
@ -92,6 +92,9 @@ SPUTargetLowering::SPUTargetLowering(SPUTargetMachine &TM)
|
|||
setUseUnderscoreSetJmp(true);
|
||||
setUseUnderscoreLongJmp(true);
|
||||
|
||||
// Set RTLIB libcall names as used by SPU:
|
||||
setLibcallName(RTLIB::DIV_F64, "__fast_divdf3");
|
||||
|
||||
// Set up the SPU's register classes:
|
||||
addRegisterClass(MVT::i8, SPU::R8CRegisterClass);
|
||||
addRegisterClass(MVT::i16, SPU::R16CRegisterClass);
|
||||
|
@ -183,6 +186,9 @@ SPUTargetLowering::SPUTargetLowering(SPUTargetMachine &TM)
|
|||
setOperationAction(ISD::FCOPYSIGN, MVT::f64, Expand);
|
||||
setOperationAction(ISD::FCOPYSIGN, MVT::f32, Expand);
|
||||
|
||||
// Make sure that DAGCombine doesn't insert illegal 64-bit constants
|
||||
setOperationAction(ISD::FABS, MVT::f64, Custom);
|
||||
|
||||
// SPU can do rotate right and left, so legalize it... but customize for i8
|
||||
// because instructions don't exist.
|
||||
|
||||
|
@ -243,6 +249,7 @@ SPUTargetLowering::SPUTargetLowering(SPUTargetMachine &TM)
|
|||
setOperationAction(ISD::SETCC, MVT::i16, Legal);
|
||||
setOperationAction(ISD::SETCC, MVT::i32, Legal);
|
||||
setOperationAction(ISD::SETCC, MVT::i64, Legal);
|
||||
setOperationAction(ISD::SETCC, MVT::f64, Custom);
|
||||
|
||||
// Custom lower i128 -> i64 truncates
|
||||
setOperationAction(ISD::TRUNCATE, MVT::i64, Custom);
|
||||
|
@ -410,6 +417,9 @@ SPUTargetLowering::getTargetNodeName(unsigned Opcode) const
|
|||
node_names[(unsigned) SPUISD::VEC_SRA] = "SPUISD::VEC_SRA";
|
||||
node_names[(unsigned) SPUISD::VEC_ROTL] = "SPUISD::VEC_ROTL";
|
||||
node_names[(unsigned) SPUISD::VEC_ROTR] = "SPUISD::VEC_ROTR";
|
||||
node_names[(unsigned) SPUISD::ROTBYTES_LEFT] = "SPUISD::ROTBYTES_LEFT";
|
||||
node_names[(unsigned) SPUISD::ROTBYTES_LEFT_BITS] =
|
||||
"SPUISD::ROTBYTES_LEFT_BITS";
|
||||
node_names[(unsigned) SPUISD::SELECT_MASK] = "SPUISD::SELECT_MASK";
|
||||
node_names[(unsigned) SPUISD::SELB] = "SPUISD::SELB";
|
||||
node_names[(unsigned) SPUISD::ADD64_MARKER] = "SPUISD::ADD64_MARKER";
|
||||
|
@ -1552,12 +1562,9 @@ static bool isConstantSplat(const uint64_t Bits128[2],
|
|||
return false; // Can't be a splat if two pieces don't match.
|
||||
}
|
||||
|
||||
// If this is a case we can't handle, return null and let the default
|
||||
// expansion code take care of it. If we CAN select this case, and if it
|
||||
// selects to a single instruction, return Op. Otherwise, if we can codegen
|
||||
// this case more efficiently than a constant pool load, lower it to the
|
||||
// sequence of ops that should be used.
|
||||
static SDValue LowerBUILD_VECTOR(SDValue Op, SelectionDAG &DAG) {
|
||||
//! Lower a BUILD_VECTOR instruction creatively:
|
||||
SDValue
|
||||
SPU::LowerBUILD_VECTOR(SDValue Op, SelectionDAG &DAG) {
|
||||
MVT VT = Op.getValueType();
|
||||
// If this is a vector of constants or undefs, get the bits. A bit in
|
||||
// UndefBits is set if the corresponding element of the vector is an
|
||||
|
@ -1575,6 +1582,11 @@ static SDValue LowerBUILD_VECTOR(SDValue Op, SelectionDAG &DAG) {
|
|||
|
||||
switch (VT.getSimpleVT()) {
|
||||
default:
|
||||
cerr << "CellSPU: Unhandled VT in LowerBUILD_VECTOR, VT = "
|
||||
<< VT.getMVTString()
|
||||
<< "\n";
|
||||
abort();
|
||||
/*NOTREACHED*/
|
||||
case MVT::v4f32: {
|
||||
uint32_t Value32 = SplatBits;
|
||||
assert(SplatSize == 4
|
||||
|
@ -2188,32 +2200,32 @@ static SDValue LowerI8Math(SDValue Op, SelectionDAG &DAG, unsigned Opc,
|
|||
|
||||
//! Generate the carry-generate shuffle mask.
|
||||
SDValue SPU::getCarryGenerateShufMask(SelectionDAG &DAG) {
|
||||
SmallVector<SDValue, 16> ShufBytes;
|
||||
SmallVector<SDValue, 16 > ShufBytes;
|
||||
|
||||
// Create the shuffle mask for "rotating" the borrow up one register slot
|
||||
// once the borrow is generated.
|
||||
ShufBytes.push_back(DAG.getConstant(0x04050607, MVT::i32));
|
||||
ShufBytes.push_back(DAG.getConstant(0x80808080, MVT::i32));
|
||||
ShufBytes.push_back(DAG.getConstant(0x0c0d0e0f, MVT::i32));
|
||||
ShufBytes.push_back(DAG.getConstant(0x80808080, MVT::i32));
|
||||
// Create the shuffle mask for "rotating" the borrow up one register slot
|
||||
// once the borrow is generated.
|
||||
ShufBytes.push_back(DAG.getConstant(0x04050607, MVT::i32));
|
||||
ShufBytes.push_back(DAG.getConstant(0x80808080, MVT::i32));
|
||||
ShufBytes.push_back(DAG.getConstant(0x0c0d0e0f, MVT::i32));
|
||||
ShufBytes.push_back(DAG.getConstant(0x80808080, MVT::i32));
|
||||
|
||||
return DAG.getNode(ISD::BUILD_VECTOR, MVT::v4i32,
|
||||
&ShufBytes[0], ShufBytes.size());
|
||||
return DAG.getNode(ISD::BUILD_VECTOR, MVT::v4i32,
|
||||
&ShufBytes[0], ShufBytes.size());
|
||||
}
|
||||
|
||||
//! Generate the borrow-generate shuffle mask
|
||||
SDValue SPU::getBorrowGenerateShufMask(SelectionDAG &DAG) {
|
||||
SmallVector<SDValue, 16> ShufBytes;
|
||||
SmallVector<SDValue, 16 > ShufBytes;
|
||||
|
||||
// Create the shuffle mask for "rotating" the borrow up one register slot
|
||||
// once the borrow is generated.
|
||||
ShufBytes.push_back(DAG.getConstant(0x04050607, MVT::i32));
|
||||
ShufBytes.push_back(DAG.getConstant(0xc0c0c0c0, MVT::i32));
|
||||
ShufBytes.push_back(DAG.getConstant(0x0c0d0e0f, MVT::i32));
|
||||
ShufBytes.push_back(DAG.getConstant(0xc0c0c0c0, MVT::i32));
|
||||
// Create the shuffle mask for "rotating" the borrow up one register slot
|
||||
// once the borrow is generated.
|
||||
ShufBytes.push_back(DAG.getConstant(0x04050607, MVT::i32));
|
||||
ShufBytes.push_back(DAG.getConstant(0xc0c0c0c0, MVT::i32));
|
||||
ShufBytes.push_back(DAG.getConstant(0x0c0d0e0f, MVT::i32));
|
||||
ShufBytes.push_back(DAG.getConstant(0xc0c0c0c0, MVT::i32));
|
||||
|
||||
return DAG.getNode(ISD::BUILD_VECTOR, MVT::v4i32,
|
||||
&ShufBytes[0], ShufBytes.size());
|
||||
return DAG.getNode(ISD::BUILD_VECTOR, MVT::v4i32,
|
||||
&ShufBytes[0], ShufBytes.size());
|
||||
}
|
||||
|
||||
//! Lower byte immediate operations for v16i8 vectors:
|
||||
|
@ -2372,6 +2384,83 @@ static SDValue LowerCTPOP(SDValue Op, SelectionDAG &DAG) {
|
|||
return SDValue();
|
||||
}
|
||||
|
||||
//! Lower ISD::FABS
|
||||
/*!
|
||||
DAGCombine does the same basic reduction: convert the double to i64 and mask
|
||||
off the sign bit. Unfortunately, DAGCombine inserts the i64 constant, which
|
||||
CellSPU has to legalize. Hence, the custom lowering.
|
||||
*/
|
||||
|
||||
static SDValue LowerFABS(SDValue Op, SelectionDAG &DAG) {
|
||||
MVT OpVT = Op.getValueType();
|
||||
MVT IntVT(MVT::i64);
|
||||
SDValue Op0 = Op.getOperand(0);
|
||||
|
||||
assert(OpVT == MVT::f64 && "LowerFABS: expecting MVT::f64!\n");
|
||||
|
||||
SDValue iABS =
|
||||
DAG.getNode(ISD::AND, IntVT,
|
||||
DAG.getNode(ISD::BIT_CONVERT, IntVT, Op0),
|
||||
DAG.getConstant(~IntVT.getIntegerVTSignBit(), IntVT));
|
||||
|
||||
return DAG.getNode(ISD::BIT_CONVERT, MVT::f64, iABS);
|
||||
}
|
||||
|
||||
//! Lower ISD::SETCC
|
||||
/*!
|
||||
This handles MVT::f64 (double floating point) condition lowering
|
||||
*/
|
||||
|
||||
static SDValue LowerSETCC(SDValue Op, SelectionDAG &DAG,
|
||||
const TargetLowering &TLI) {
|
||||
SDValue lhs = Op.getOperand(0);
|
||||
SDValue rhs = Op.getOperand(1);
|
||||
CondCodeSDNode *CC = dyn_cast<CondCodeSDNode > (Op.getOperand(2));
|
||||
MVT lhsVT = lhs.getValueType();
|
||||
SDValue posNaN = DAG.getConstant(0x7ff0000000000001ULL, MVT::i64);
|
||||
|
||||
assert(CC != 0 && "LowerSETCC: CondCodeSDNode should not be null here!\n");
|
||||
assert(lhsVT == MVT::f64 && "LowerSETCC: type other than MVT::64\n");
|
||||
|
||||
switch (CC->get()) {
|
||||
case ISD::SETOEQ:
|
||||
case ISD::SETOGT:
|
||||
case ISD::SETOGE:
|
||||
case ISD::SETOLT:
|
||||
case ISD::SETOLE:
|
||||
case ISD::SETONE:
|
||||
cerr << "CellSPU ISel Select: unimplemented f64 condition\n";
|
||||
abort();
|
||||
break;
|
||||
case ISD::SETO: {
|
||||
SDValue lhsfabs = DAG.getNode(ISD::FABS, MVT::f64, lhs);
|
||||
SDValue i64lhs =
|
||||
DAG.getNode(ISD::BIT_CONVERT, MVT::i64, lhsfabs);
|
||||
|
||||
return DAG.getSetCC(MVT::i32, i64lhs, posNaN, ISD::SETLT);
|
||||
}
|
||||
case ISD::SETUO: {
|
||||
SDValue lhsfabs = DAG.getNode(ISD::FABS, MVT::f64, lhs);
|
||||
SDValue i64lhs =
|
||||
DAG.getNode(ISD::BIT_CONVERT, MVT::i64, lhsfabs);
|
||||
|
||||
return DAG.getSetCC(MVT::i32, i64lhs, posNaN, ISD::SETGE);
|
||||
}
|
||||
case ISD::SETUEQ:
|
||||
case ISD::SETUGT:
|
||||
case ISD::SETUGE:
|
||||
case ISD::SETULT:
|
||||
case ISD::SETULE:
|
||||
case ISD::SETUNE:
|
||||
default:
|
||||
cerr << "CellSPU ISel Select: unimplemented f64 condition\n";
|
||||
abort();
|
||||
break;
|
||||
}
|
||||
|
||||
return SDValue();
|
||||
}
|
||||
|
||||
//! Lower ISD::SELECT_CC
|
||||
/*!
|
||||
ISD::SELECT_CC can (generally) be implemented directly on the SPU using the
|
||||
|
@ -2501,9 +2590,12 @@ SPUTargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG)
|
|||
break;
|
||||
}
|
||||
|
||||
case ISD::FABS:
|
||||
return LowerFABS(Op, DAG);
|
||||
|
||||
// Vector-related lowering.
|
||||
case ISD::BUILD_VECTOR:
|
||||
return LowerBUILD_VECTOR(Op, DAG);
|
||||
return SPU::LowerBUILD_VECTOR(Op, DAG);
|
||||
case ISD::SCALAR_TO_VECTOR:
|
||||
return LowerSCALAR_TO_VECTOR(Op, DAG);
|
||||
case ISD::VECTOR_SHUFFLE:
|
||||
|
@ -2530,6 +2622,9 @@ SPUTargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG)
|
|||
case ISD::SELECT_CC:
|
||||
return LowerSELECT_CC(Op, DAG, *this);
|
||||
|
||||
case ISD::SETCC:
|
||||
return LowerSETCC(Op, DAG, *this);
|
||||
|
||||
case ISD::TRUNCATE:
|
||||
return LowerTRUNCATE(Op, DAG);
|
||||
}
|
||||
|
@ -2656,8 +2751,8 @@ SPUTargetLowering::PerformDAGCombine(SDNode *N, DAGCombinerInfo &DCI) const
|
|||
}
|
||||
case SPUISD::IndirectAddr: {
|
||||
if (!ST->usingLargeMem() && Op0.getOpcode() == SPUISD::AFormAddr) {
|
||||
ConstantSDNode *CN = cast<ConstantSDNode>(N->getOperand(1));
|
||||
if (CN->getZExtValue() == 0) {
|
||||
ConstantSDNode *CN = dyn_cast<ConstantSDNode>(N->getOperand(1));
|
||||
if (CN != 0 && CN->getZExtValue() == 0) {
|
||||
// (SPUindirect (SPUaform <addr>, 0), 0) ->
|
||||
// (SPUaform <addr>, 0)
|
||||
|
||||
|
@ -2736,7 +2831,7 @@ SPUTargetLowering::PerformDAGCombine(SDNode *N, DAGCombinerInfo &DCI) const
|
|||
break;
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
// Otherwise, return unchanged.
|
||||
#ifndef NDEBUG
|
||||
if (Result.getNode()) {
|
||||
|
@ -2809,41 +2904,18 @@ SPUTargetLowering::computeMaskedBitsForTargetNode(const SDValue Op,
|
|||
unsigned Depth ) const {
|
||||
#if 0
|
||||
const uint64_t uint64_sizebits = sizeof(uint64_t) * 8;
|
||||
#endif
|
||||
|
||||
switch (Op.getOpcode()) {
|
||||
default:
|
||||
// KnownZero = KnownOne = APInt(Mask.getBitWidth(), 0);
|
||||
break;
|
||||
|
||||
#if 0
|
||||
case CALL:
|
||||
case SHUFB:
|
||||
case SHUFFLE_MASK:
|
||||
case CNTB:
|
||||
#endif
|
||||
|
||||
case SPUISD::PREFSLOT2VEC: {
|
||||
SDValue Op0 = Op.getOperand(0);
|
||||
MVT Op0VT = Op0.getValueType();
|
||||
unsigned Op0VTBits = Op0VT.getSizeInBits();
|
||||
uint64_t InMask = Op0VT.getIntegerVTBitMask();
|
||||
KnownZero |= APInt(Op0VTBits, ~InMask, false);
|
||||
KnownOne |= APInt(Op0VTBits, InMask, false);
|
||||
break;
|
||||
}
|
||||
|
||||
case SPUISD::PREFSLOT2VEC:
|
||||
case SPUISD::LDRESULT:
|
||||
case SPUISD::VEC2PREFSLOT: {
|
||||
MVT OpVT = Op.getValueType();
|
||||
unsigned OpVTBits = OpVT.getSizeInBits();
|
||||
uint64_t InMask = OpVT.getIntegerVTBitMask();
|
||||
KnownZero |= APInt(OpVTBits, ~InMask, false);
|
||||
KnownOne |= APInt(OpVTBits, InMask, false);
|
||||
break;
|
||||
}
|
||||
|
||||
#if 0
|
||||
case SPUISD::VEC2PREFSLOT:
|
||||
case SPUISD::SHLQUAD_L_BITS:
|
||||
case SPUISD::SHLQUAD_L_BYTES:
|
||||
case SPUISD::VEC_SHL:
|
||||
|
@ -2854,8 +2926,8 @@ SPUTargetLowering::computeMaskedBitsForTargetNode(const SDValue Op,
|
|||
case SPUISD::ROTBYTES_LEFT:
|
||||
case SPUISD::SELECT_MASK:
|
||||
case SPUISD::SELB:
|
||||
#endif
|
||||
}
|
||||
#endif
|
||||
}
|
||||
|
||||
unsigned
|
||||
|
|
|
@ -61,7 +61,7 @@ namespace llvm {
|
|||
};
|
||||
}
|
||||
|
||||
/// Predicates that are used for node matching:
|
||||
//! Utility functions specific to CellSPU-only:
|
||||
namespace SPU {
|
||||
SDValue get_vec_u18imm(SDNode *N, SelectionDAG &DAG,
|
||||
MVT ValueType);
|
||||
|
@ -78,6 +78,7 @@ namespace llvm {
|
|||
|
||||
SDValue LowerConstantPool(SDValue Op, SelectionDAG &DAG,
|
||||
const SPUTargetMachine &TM);
|
||||
SDValue LowerBUILD_VECTOR(SDValue Op, SelectionDAG &DAG);
|
||||
|
||||
SDValue getBorrowGenerateShufMask(SelectionDAG &DAG);
|
||||
SDValue getCarryGenerateShufMask(SelectionDAG &DAG);
|
||||
|
|
|
@ -134,6 +134,7 @@ SPUInstrInfo::isMoveInstr(const MachineInstr& MI,
|
|||
case SPU::ORi64_v2i64:
|
||||
case SPU::ORf32_v4f32:
|
||||
case SPU::ORf64_v2f64:
|
||||
/*
|
||||
case SPU::ORi128_r64:
|
||||
case SPU::ORi128_f64:
|
||||
case SPU::ORi128_r32:
|
||||
|
@ -148,6 +149,8 @@ SPUInstrInfo::isMoveInstr(const MachineInstr& MI,
|
|||
case SPU::ORr16_i128:
|
||||
case SPU::ORr8_i128:
|
||||
case SPU::ORvec_i128:
|
||||
*/
|
||||
/*
|
||||
case SPU::ORr16_r32:
|
||||
case SPU::ORr8_r32:
|
||||
case SPU::ORr32_r16:
|
||||
|
@ -158,7 +161,11 @@ SPUInstrInfo::isMoveInstr(const MachineInstr& MI,
|
|||
case SPU::ORr64_r32:
|
||||
case SPU::ORr64_r16:
|
||||
case SPU::ORr64_r8:
|
||||
{
|
||||
*/
|
||||
case SPU::ORf32_r32:
|
||||
case SPU::ORr32_f32:
|
||||
case SPU::ORf64_r64:
|
||||
case SPU::ORr64_f64: {
|
||||
assert(MI.getNumOperands() == 2 &&
|
||||
MI.getOperand(0).isReg() &&
|
||||
MI.getOperand(1).isReg() &&
|
||||
|
|
|
@ -1259,9 +1259,6 @@ multiclass BitwiseAnd
|
|||
def fabs32: ANDInst<(outs R32FP:$rT), (ins R32FP:$rA, R32C:$rB),
|
||||
[/* Intentionally does not match a pattern */]>;
|
||||
|
||||
def fabs64: ANDInst<(outs R64FP:$rT), (ins R64FP:$rA, VECREG:$rB),
|
||||
[/* Intentionally does not match a pattern */]>;
|
||||
|
||||
// Could use v4i32, but won't for clarity
|
||||
def fabsvec: ANDInst<(outs VECREG:$rT), (ins VECREG:$rA, VECREG:$rB),
|
||||
[/* Intentionally does not match a pattern */]>;
|
||||
|
@ -1408,12 +1405,12 @@ class ORRegInst<RegisterClass rclass>:
|
|||
// These are effectively no-ops, but need to exist for proper type conversion
|
||||
// and type coercion.
|
||||
|
||||
class ORCvtForm<dag OOL, dag IOL>
|
||||
class ORCvtForm<dag OOL, dag IOL, list<dag> pattern = [/* no pattern */]>
|
||||
: SPUInstr<OOL, IOL, "or\t$rT, $rA, $rA", IntegerOp> {
|
||||
bits<7> RA;
|
||||
bits<7> RT;
|
||||
|
||||
let Pattern = [/* no pattern */];
|
||||
let Pattern = pattern;
|
||||
|
||||
let Inst{0-10} = 0b10000010000;
|
||||
let Inst{11-17} = RA;
|
||||
|
@ -1427,29 +1424,29 @@ class ORPromoteScalar<RegisterClass rclass>:
|
|||
class ORExtractElt<RegisterClass rclass>:
|
||||
ORCvtForm<(outs rclass:$rT), (ins VECREG:$rA)>;
|
||||
|
||||
class ORCvtRegGPRC<RegisterClass rclass>:
|
||||
ORCvtForm<(outs GPRC:$rT), (ins rclass:$rA)>;
|
||||
/* class ORCvtRegGPRC<RegisterClass rclass>:
|
||||
ORCvtForm<(outs GPRC:$rT), (ins rclass:$rA)>; */
|
||||
|
||||
class ORCvtVecGPRC:
|
||||
ORCvtForm<(outs GPRC:$rT), (ins VECREG:$rA)>;
|
||||
/* class ORCvtVecGPRC:
|
||||
ORCvtForm<(outs GPRC:$rT), (ins VECREG:$rA)>; */
|
||||
|
||||
class ORCvtGPRCReg<RegisterClass rclass>:
|
||||
ORCvtForm<(outs rclass:$rT), (ins GPRC:$rA)>;
|
||||
/* class ORCvtGPRCReg<RegisterClass rclass>:
|
||||
ORCvtForm<(outs rclass:$rT), (ins GPRC:$rA)>; */
|
||||
|
||||
class ORCvtFormR32Reg<RegisterClass rclass>:
|
||||
ORCvtForm<(outs rclass:$rT), (ins R32C:$rA)>;
|
||||
class ORCvtFormR32Reg<RegisterClass rclass, list<dag> pattern = [ ]>:
|
||||
ORCvtForm<(outs rclass:$rT), (ins R32C:$rA), pattern>;
|
||||
|
||||
class ORCvtFormRegR32<RegisterClass rclass>:
|
||||
ORCvtForm<(outs R32C:$rT), (ins rclass:$rA)>;
|
||||
class ORCvtFormRegR32<RegisterClass rclass, list<dag> pattern = [ ]>:
|
||||
ORCvtForm<(outs R32C:$rT), (ins rclass:$rA), pattern>;
|
||||
|
||||
class ORCvtFormR64Reg<RegisterClass rclass>:
|
||||
ORCvtForm<(outs rclass:$rT), (ins R64C:$rA)>;
|
||||
class ORCvtFormR64Reg<RegisterClass rclass, list<dag> pattern = [ ]>:
|
||||
ORCvtForm<(outs rclass:$rT), (ins R64C:$rA), pattern>;
|
||||
|
||||
class ORCvtFormRegR64<RegisterClass rclass>:
|
||||
ORCvtForm<(outs R64C:$rT), (ins rclass:$rA)>;
|
||||
class ORCvtFormRegR64<RegisterClass rclass, list<dag> pattern = [ ]>:
|
||||
ORCvtForm<(outs R64C:$rT), (ins rclass:$rA), pattern>;
|
||||
|
||||
class ORCvtGPRCVec:
|
||||
ORCvtForm<(outs VECREG:$rT), (ins GPRC:$rA)>;
|
||||
/* class ORCvtGPRCVec:
|
||||
ORCvtForm<(outs VECREG:$rT), (ins GPRC:$rA)>; */
|
||||
|
||||
multiclass BitwiseOr
|
||||
{
|
||||
|
@ -1468,10 +1465,11 @@ multiclass BitwiseOr
|
|||
(v2f64 (bitconvert (or (v2i64 VECREG:$rA),
|
||||
(v2i64 VECREG:$rB)))))]>;
|
||||
|
||||
def r64: ORRegInst<R64C>;
|
||||
def r32: ORRegInst<R32C>;
|
||||
def r16: ORRegInst<R16C>;
|
||||
def r8: ORRegInst<R8C>;
|
||||
def r128: ORRegInst<GPRC>;
|
||||
def r64: ORRegInst<R64C>;
|
||||
def r32: ORRegInst<R32C>;
|
||||
def r16: ORRegInst<R16C>;
|
||||
def r8: ORRegInst<R8C>;
|
||||
|
||||
// OR instructions used to copy f32 and f64 registers.
|
||||
def f32: ORInst<(outs R32FP:$rT), (ins R32FP:$rA, R32FP:$rB),
|
||||
|
@ -1496,6 +1494,7 @@ multiclass BitwiseOr
|
|||
def f32_v4f32: ORExtractElt<R32FP>;
|
||||
def f64_v2f64: ORExtractElt<R64FP>;
|
||||
|
||||
/*
|
||||
// Conversion from GPRC to register
|
||||
def i128_r64: ORCvtRegGPRC<R64C>;
|
||||
def i128_f64: ORCvtRegGPRC<R64FP>;
|
||||
|
@ -1517,7 +1516,8 @@ multiclass BitwiseOr
|
|||
|
||||
// Conversion from vector to GPRC
|
||||
def vec_i128: ORCvtGPRCVec;
|
||||
|
||||
*/
|
||||
/*
|
||||
// Conversion from register to R32C:
|
||||
def r16_r32: ORCvtFormRegR32<R16C>;
|
||||
def r8_r32: ORCvtFormRegR32<R8C>;
|
||||
|
@ -1535,6 +1535,18 @@ multiclass BitwiseOr
|
|||
def r64_r32: ORCvtFormRegR64<R32C>;
|
||||
def r64_r16: ORCvtFormRegR64<R16C>;
|
||||
def r64_r8: ORCvtFormRegR64<R8C>;
|
||||
*/
|
||||
|
||||
// bitconvert patterns:
|
||||
def r32_f32: ORCvtFormR32Reg<R32FP,
|
||||
[(set R32FP:$rT, (bitconvert R32C:$rA))]>;
|
||||
def f32_r32: ORCvtFormRegR32<R32FP,
|
||||
[(set R32C:$rT, (bitconvert R32FP:$rA))]>;
|
||||
|
||||
def r64_f64: ORCvtFormR64Reg<R64FP,
|
||||
[(set R64FP:$rT, (bitconvert R64C:$rA))]>;
|
||||
def f64_r64: ORCvtFormRegR64<R64FP,
|
||||
[(set R64C:$rT, (bitconvert R64FP:$rA))]>;
|
||||
}
|
||||
|
||||
defm OR : BitwiseOr;
|
||||
|
@ -1960,7 +1972,7 @@ multiclass SelectBits
|
|||
(v4f32 VECREG:$rB),
|
||||
(v4f32 VECREG:$rA)))]>;
|
||||
|
||||
// SELBr64_cond is defined further down, look for i64 comparisons
|
||||
// SELBr64_cond is defined in SPU64InstrInfo.td
|
||||
def r32_cond: SELBRegCondInst<R32C, R32C>;
|
||||
def f32_cond: SELBRegCondInst<R32C, R32FP>;
|
||||
def r16_cond: SELBRegCondInst<R16C, R16C>;
|
||||
|
@ -2146,14 +2158,6 @@ class SHLHVecInst<ValueType vectype>:
|
|||
[(set (vectype VECREG:$rT),
|
||||
(SPUvec_shl (vectype VECREG:$rA), R16C:$rB))]>;
|
||||
|
||||
// $rB gets promoted to 32-bit register type when confronted with
|
||||
// this llvm assembly code:
|
||||
//
|
||||
// define i16 @shlh_i16_1(i16 %arg1, i16 %arg2) {
|
||||
// %A = shl i16 %arg1, %arg2
|
||||
// ret i16 %A
|
||||
// }
|
||||
|
||||
multiclass ShiftLeftHalfword
|
||||
{
|
||||
def v8i16: SHLHVecInst<v8i16>;
|
||||
|
@ -2250,6 +2254,10 @@ class SHLQBIVecInst<ValueType vectype>:
|
|||
[(set (vectype VECREG:$rT),
|
||||
(SPUshlquad_l_bits (vectype VECREG:$rA), R32C:$rB))]>;
|
||||
|
||||
class SHLQBIRegInst<RegisterClass rclass>:
|
||||
SHLQBIInst<(outs rclass:$rT), (ins rclass:$rA, R32C:$rB),
|
||||
[/* no pattern */]>;
|
||||
|
||||
multiclass ShiftLeftQuadByBits
|
||||
{
|
||||
def v16i8: SHLQBIVecInst<v16i8>;
|
||||
|
@ -2258,6 +2266,8 @@ multiclass ShiftLeftQuadByBits
|
|||
def v4f32: SHLQBIVecInst<v4f32>;
|
||||
def v2i64: SHLQBIVecInst<v2i64>;
|
||||
def v2f64: SHLQBIVecInst<v2f64>;
|
||||
|
||||
def r128: SHLQBIRegInst<GPRC>;
|
||||
}
|
||||
|
||||
defm SHLQBI : ShiftLeftQuadByBits;
|
||||
|
@ -2335,6 +2345,32 @@ multiclass ShiftLeftQuadBytesImm
|
|||
|
||||
defm SHLQBYI : ShiftLeftQuadBytesImm;
|
||||
|
||||
class SHLQBYBIInst<dag OOL, dag IOL, list<dag> pattern>:
|
||||
RRForm<0b00111001111, OOL, IOL, "shlqbybi\t$rT, $rA, $rB",
|
||||
RotateShift, pattern>;
|
||||
|
||||
class SHLQBYBIVecInst<ValueType vectype>:
|
||||
SHLQBYBIInst<(outs VECREG:$rT), (ins VECREG:$rA, R32C:$rB),
|
||||
[/* no pattern */]>;
|
||||
|
||||
class SHLQBYBIRegInst<RegisterClass rclass>:
|
||||
SHLQBYBIInst<(outs rclass:$rT), (ins rclass:$rA, R32C:$rB),
|
||||
[/* no pattern */]>;
|
||||
|
||||
multiclass ShiftLeftQuadBytesBitCount
|
||||
{
|
||||
def v16i8: SHLQBYBIVecInst<v16i8>;
|
||||
def v8i16: SHLQBYBIVecInst<v8i16>;
|
||||
def v4i32: SHLQBYBIVecInst<v4i32>;
|
||||
def v4f32: SHLQBYBIVecInst<v4f32>;
|
||||
def v2i64: SHLQBYBIVecInst<v2i64>;
|
||||
def v2f64: SHLQBYBIVecInst<v2f64>;
|
||||
|
||||
def r128: SHLQBYBIRegInst<GPRC>;
|
||||
}
|
||||
|
||||
defm SHLQBYBI : ShiftLeftQuadBytesBitCount;
|
||||
|
||||
//-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~
|
||||
// Rotate halfword:
|
||||
//-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~
|
||||
|
@ -4285,13 +4321,6 @@ def : Pat<(fabs (v4f32 VECREG:$rA)),
|
|||
(ANDfabsvec (v4f32 VECREG:$rA),
|
||||
(v4f32 (ANDBIv16i8 (FSMBIv16i8 0xffff), 0x7f)))>;
|
||||
|
||||
def : Pat<(fabs R64FP:$rA),
|
||||
(ANDfabs64 R64FP:$rA, (ANDBIv16i8 (FSMBIv16i8 0xffff), 0x7f))>;
|
||||
|
||||
def : Pat<(fabs (v2f64 VECREG:$rA)),
|
||||
(ANDfabsvec (v2f64 VECREG:$rA),
|
||||
(v2f64 (ANDBIv16i8 (FSMBIv16i8 0xffff), 0x7f)))>;
|
||||
|
||||
//===----------------------------------------------------------------------===//
|
||||
// Hint for branch instructions:
|
||||
//===----------------------------------------------------------------------===//
|
||||
|
|
|
@ -1,9 +1,9 @@
|
|||
; RUN: llvm-as -o - %s | llc -march=cellspu > %t1.s
|
||||
; RUN: grep fsmbi %t1.s | count 3
|
||||
; RUN: grep fsmbi %t1.s | count 2
|
||||
; RUN: grep 32768 %t1.s | count 2
|
||||
; RUN: grep xor %t1.s | count 4
|
||||
; RUN: grep and %t1.s | count 5
|
||||
; RUN: grep andbi %t1.s | count 3
|
||||
; RUN: grep and %t1.s | count 4
|
||||
; RUN: grep andbi %t1.s | count 2
|
||||
target datalayout = "E-p:32:32:128-f64:64:128-f32:32:128-i64:32:128-i32:32:128-i16:16:128-i8:8:128-i1:8:128-a0:0:128-v128:128:128-s0:128:128"
|
||||
target triple = "spu"
|
||||
|
||||
|
|
Loading…
Reference in New Issue