forked from OSchip/llvm-project
CellSPU:
- Incorporate Tilmann Scheller's ISD::TRUNCATE custom lowering patch - Update SPU calling convention info, even if it's not used yet (but can be at some point or another) - Ensure that any-extended f32 loads are custom lowered, especially when they're promoted for use in printf. llvm-svn: 60438
This commit is contained in:
parent
af058b5696
commit
7364025ff8
|
@ -21,6 +21,8 @@ class CCIfSubtarget<string F, CCAction A>
|
|||
|
||||
// Return-value convention for Cell SPU: Everything can be passed back via $3:
|
||||
def RetCC_SPU : CallingConv<[
|
||||
CCIfType<[i8], CCAssignToReg<[R3]>>,
|
||||
CCIfType<[i16], CCAssignToReg<[R3]>>,
|
||||
CCIfType<[i32], CCAssignToReg<[R3]>>,
|
||||
CCIfType<[i64], CCAssignToReg<[R3]>>,
|
||||
CCIfType<[f32, f64], CCAssignToReg<[R3]>>,
|
||||
|
@ -30,30 +32,82 @@ def RetCC_SPU : CallingConv<[
|
|||
|
||||
//===----------------------------------------------------------------------===//
|
||||
// CellSPU Argument Calling Conventions
|
||||
// FIXME
|
||||
// (note: this isn't used, but presumably should be at some point when other
|
||||
// targets do.)
|
||||
//===----------------------------------------------------------------------===//
|
||||
/*
|
||||
def CC_SPU : CallingConv<[
|
||||
// The first 8 integer arguments are passed in integer registers.
|
||||
CCIfType<[i32], CCAssignToReg<[R3, R4, R5, R6, R7, R8, R9, R10]>>,
|
||||
CCIfType<[i64], CCAssignToReg<[X3, X4, X5, X6, X7, X8, X9, X10]>>,
|
||||
CCIfType<[i8], CCAssignToReg<[R3, R4, R5, R6, R7, R8, R9, R10, R11,
|
||||
R12, R13, R14, R15, R16, R17, R18, R19, R20,
|
||||
R21, R22, R23, R24, R25, R26, R27, R28, R29,
|
||||
R30, R31, R32, R33, R34, R35, R36, R37, R38,
|
||||
R39, R40, R41, R42, R43, R44, R45, R46, R47,
|
||||
R48, R49, R50, R51, R52, R53, R54, R55, R56,
|
||||
R57, R58, R59, R60, R61, R62, R63, R64, R65,
|
||||
R66, R67, R68, R69, R70, R71, R72, R73, R74,
|
||||
R75, R76, R77, R78, R79]>>,
|
||||
CCIfType<[i16], CCAssignToReg<[R3, R4, R5, R6, R7, R8, R9, R10, R11,
|
||||
R12, R13, R14, R15, R16, R17, R18, R19, R20,
|
||||
R21, R22, R23, R24, R25, R26, R27, R28, R29,
|
||||
R30, R31, R32, R33, R34, R35, R36, R37, R38,
|
||||
R39, R40, R41, R42, R43, R44, R45, R46, R47,
|
||||
R48, R49, R50, R51, R52, R53, R54, R55, R56,
|
||||
R57, R58, R59, R60, R61, R62, R63, R64, R65,
|
||||
R66, R67, R68, R69, R70, R71, R72, R73, R74,
|
||||
R75, R76, R77, R78, R79]>>,
|
||||
CCIfType<[i32], CCAssignToReg<[R3, R4, R5, R6, R7, R8, R9, R10, R11,
|
||||
R12, R13, R14, R15, R16, R17, R18, R19, R20,
|
||||
R21, R22, R23, R24, R25, R26, R27, R28, R29,
|
||||
R30, R31, R32, R33, R34, R35, R36, R37, R38,
|
||||
R39, R40, R41, R42, R43, R44, R45, R46, R47,
|
||||
R48, R49, R50, R51, R52, R53, R54, R55, R56,
|
||||
R57, R58, R59, R60, R61, R62, R63, R64, R65,
|
||||
R66, R67, R68, R69, R70, R71, R72, R73, R74,
|
||||
R75, R76, R77, R78, R79]>>,
|
||||
CCIfType<[f32], CCAssignToReg<[R3, R4, R5, R6, R7, R8, R9, R10, R11,
|
||||
R12, R13, R14, R15, R16, R17, R18, R19, R20,
|
||||
R21, R22, R23, R24, R25, R26, R27, R28, R29,
|
||||
R30, R31, R32, R33, R34, R35, R36, R37, R38,
|
||||
R39, R40, R41, R42, R43, R44, R45, R46, R47,
|
||||
R48, R49, R50, R51, R52, R53, R54, R55, R56,
|
||||
R57, R58, R59, R60, R61, R62, R63, R64, R65,
|
||||
R66, R67, R68, R69, R70, R71, R72, R73, R74,
|
||||
R75, R76, R77, R78, R79]>>,
|
||||
CCIfType<[i64], CCAssignToReg<[R3, R4, R5, R6, R7, R8, R9, R10, R11,
|
||||
R12, R13, R14, R15, R16, R17, R18, R19, R20,
|
||||
R21, R22, R23, R24, R25, R26, R27, R28, R29,
|
||||
R30, R31, R32, R33, R34, R35, R36, R37, R38,
|
||||
R39, R40, R41, R42, R43, R44, R45, R46, R47,
|
||||
R48, R49, R50, R51, R52, R53, R54, R55, R56,
|
||||
R57, R58, R59, R60, R61, R62, R63, R64, R65,
|
||||
R66, R67, R68, R69, R70, R71, R72, R73, R74,
|
||||
R75, R76, R77, R78, R79]>>,
|
||||
CCIfType<[f64], CCAssignToReg<[R3, R4, R5, R6, R7, R8, R9, R10, R11,
|
||||
R12, R13, R14, R15, R16, R17, R18, R19, R20,
|
||||
R21, R22, R23, R24, R25, R26, R27, R28, R29,
|
||||
R30, R31, R32, R33, R34, R35, R36, R37, R38,
|
||||
R39, R40, R41, R42, R43, R44, R45, R46, R47,
|
||||
R48, R49, R50, R51, R52, R53, R54, R55, R56,
|
||||
R57, R58, R59, R60, R61, R62, R63, R64, R65,
|
||||
R66, R67, R68, R69, R70, R71, R72, R73, R74,
|
||||
R75, R76, R77, R78, R79]>>,
|
||||
CCIfType<[v16i8, v8i16, v4i32, v4f32, v2i64, v2f64],
|
||||
CCAssignToReg<[R3, R4, R5, R6, R7, R8, R9, R10, R11,
|
||||
R12, R13, R14, R15, R16, R17, R18, R19, R20,
|
||||
R21, R22, R23, R24, R25, R26, R27, R28, R29,
|
||||
R30, R31, R32, R33, R34, R35, R36, R37, R38,
|
||||
R39, R40, R41, R42, R43, R44, R45, R46, R47,
|
||||
R48, R49, R50, R51, R52, R53, R54, R55, R56,
|
||||
R57, R58, R59, R60, R61, R62, R63, R64, R65,
|
||||
R66, R67, R68, R69, R70, R71, R72, R73, R74,
|
||||
R75, R76, R77, R78, R79]>>,
|
||||
|
||||
// SPU can pass back arguments in all
|
||||
CCIfType<[f32, f64], CCIfSubtarget<"isMachoABI()",
|
||||
CCAssignToReg<[F1, F2, F3, F4, F5, F6, F7, F8,F9,F10,F11,F12,F13]>>>,
|
||||
// Other sub-targets pass FP values in F1-10.
|
||||
CCIfType<[f32, f64], CCAssignToReg<[F1, F2, F3, F4, F5, F6, F7, F8, F9,F10]>>,
|
||||
|
||||
// The first 12 Vector arguments are passed in altivec registers.
|
||||
CCIfType<[v16i8, v8i16, v4i32, v4f32],
|
||||
CCAssignToReg<[V2, V3, V4, V5, V6, V7, V8, V9, V10,V11,V12,V13]>>
|
||||
/*
|
||||
// Integer/FP values get stored in stack slots that are 8 bytes in size and
|
||||
// 8-byte aligned if there are no more registers to hold them.
|
||||
CCIfType<[i32, i64, f32, f64], CCAssignToStack<8, 8>>,
|
||||
|
||||
// Vectors get 16-byte stack slots that are 16-byte aligned.
|
||||
CCIfType<[v16i8, v8i16, v4i32, v2i64, v4f32, v2f64],
|
||||
CCAssignToStack<16, 16>>*/
|
||||
CCAssignToStack<16, 16>>
|
||||
]>;
|
||||
*/
|
||||
*/
|
||||
|
|
|
@ -151,6 +151,8 @@ SPUTargetLowering::SPUTargetLowering(SPUTargetMachine &TM)
|
|||
setLoadExtAction(ISD::SEXTLOAD, MVT::i16, Custom);
|
||||
setLoadExtAction(ISD::ZEXTLOAD, MVT::i16, Custom);
|
||||
|
||||
setLoadExtAction(ISD::EXTLOAD, MVT::f32, Custom);
|
||||
|
||||
// SPU constant load actions are custom lowered:
|
||||
setOperationAction(ISD::Constant, MVT::i64, Custom);
|
||||
setOperationAction(ISD::ConstantFP, MVT::f32, Legal);
|
||||
|
@ -277,6 +279,12 @@ SPUTargetLowering::SPUTargetLowering(SPUTargetMachine &TM)
|
|||
setOperationAction(ISD::SIGN_EXTEND, MVT::i64, Custom);
|
||||
setOperationAction(ISD::ANY_EXTEND, MVT::i64, Custom);
|
||||
|
||||
// Custom lower truncates
|
||||
setOperationAction(ISD::TRUNCATE, MVT::i8, Custom);
|
||||
setOperationAction(ISD::TRUNCATE, MVT::i16, Custom);
|
||||
setOperationAction(ISD::TRUNCATE, MVT::i32, Custom);
|
||||
setOperationAction(ISD::TRUNCATE, MVT::i64, Custom);
|
||||
|
||||
// SPU has a legal FP -> signed INT instruction
|
||||
setOperationAction(ISD::FP_TO_SINT, MVT::i32, Legal);
|
||||
setOperationAction(ISD::FP_TO_SINT, MVT::i64, Custom);
|
||||
|
@ -782,7 +790,7 @@ LowerSTORE(SDValue Op, SelectionDAG &DAG, const SPUSubtarget *ST) {
|
|||
DAG.setRoot(currentRoot);
|
||||
}
|
||||
#endif
|
||||
|
||||
|
||||
return result;
|
||||
/*UNREACHED*/
|
||||
}
|
||||
|
@ -2759,6 +2767,102 @@ static SDValue LowerSELECT_CC(SDValue Op, SelectionDAG &DAG) {
|
|||
return DAG.getNode(SPUISD::SELB, VT, trueval, falseval, compare);
|
||||
}
|
||||
|
||||
//! Custom lower ISD::TRUNCATE
|
||||
static SDValue LowerTRUNCATE(SDValue Op, SelectionDAG &DAG)
|
||||
{
|
||||
MVT VT = Op.getValueType();
|
||||
MVT::SimpleValueType simpleVT = VT.getSimpleVT();
|
||||
MVT VecVT = MVT::getVectorVT(VT, (128 / VT.getSizeInBits()));
|
||||
|
||||
SDValue Op0 = Op.getOperand(0);
|
||||
MVT Op0VT = Op0.getValueType();
|
||||
MVT Op0VecVT = MVT::getVectorVT(Op0VT, (128 / Op0VT.getSizeInBits()));
|
||||
|
||||
SDValue PromoteScalar = DAG.getNode(SPUISD::PROMOTE_SCALAR, Op0VecVT, Op0);
|
||||
|
||||
unsigned maskLow;
|
||||
unsigned maskHigh;
|
||||
|
||||
// Create shuffle mask
|
||||
switch (Op0VT.getSimpleVT()) {
|
||||
case MVT::i128:
|
||||
switch (simpleVT) {
|
||||
case MVT::i64:
|
||||
// least significant doubleword of quadword
|
||||
maskHigh = 0x08090a0b;
|
||||
maskLow = 0x0c0d0e0f;
|
||||
break;
|
||||
case MVT::i32:
|
||||
// least significant word of quadword
|
||||
maskHigh = maskLow = 0x0c0d0e0f;
|
||||
break;
|
||||
case MVT::i16:
|
||||
// least significant halfword of quadword
|
||||
maskHigh = maskLow = 0x0e0f0e0f;
|
||||
break;
|
||||
case MVT::i8:
|
||||
// least significant byte of quadword
|
||||
maskHigh = maskLow = 0x0f0f0f0f;
|
||||
break;
|
||||
default:
|
||||
cerr << "Truncation to illegal type!";
|
||||
abort();
|
||||
}
|
||||
break;
|
||||
case MVT::i64:
|
||||
switch (simpleVT) {
|
||||
case MVT::i32:
|
||||
// least significant word of doubleword
|
||||
maskHigh = maskLow = 0x04050607;
|
||||
break;
|
||||
case MVT::i16:
|
||||
// least significant halfword of doubleword
|
||||
maskHigh = maskLow = 0x06070607;
|
||||
break;
|
||||
case MVT::i8:
|
||||
// least significant byte of doubleword
|
||||
maskHigh = maskLow = 0x07070707;
|
||||
break;
|
||||
default:
|
||||
cerr << "Truncation to illegal type!";
|
||||
abort();
|
||||
}
|
||||
break;
|
||||
case MVT::i32:
|
||||
case MVT::i16:
|
||||
switch (simpleVT) {
|
||||
case MVT::i16:
|
||||
// least significant halfword of word
|
||||
maskHigh = maskLow = 0x02030203;
|
||||
break;
|
||||
case MVT::i8:
|
||||
// least significant byte of word/halfword
|
||||
maskHigh = maskLow = 0x03030303;
|
||||
break;
|
||||
default:
|
||||
cerr << "Truncation to illegal type!";
|
||||
abort();
|
||||
}
|
||||
break;
|
||||
default:
|
||||
cerr << "Trying to lower truncation from illegal type!";
|
||||
abort();
|
||||
}
|
||||
|
||||
// Use a shuffle to perform the truncation
|
||||
SDValue shufMask = DAG.getNode(ISD::BUILD_VECTOR, MVT::v4i32,
|
||||
DAG.getConstant(maskHigh, MVT::i32),
|
||||
DAG.getConstant(maskLow, MVT::i32),
|
||||
DAG.getConstant(maskHigh, MVT::i32),
|
||||
DAG.getConstant(maskLow, MVT::i32));
|
||||
|
||||
SDValue truncShuffle = DAG.getNode(SPUISD::SHUFB, Op0VecVT,
|
||||
PromoteScalar, PromoteScalar, shufMask);
|
||||
|
||||
return DAG.getNode(SPUISD::VEC2PREFSLOT, VT,
|
||||
DAG.getNode(ISD::BIT_CONVERT, VecVT, truncShuffle));
|
||||
}
|
||||
|
||||
//! Custom (target-specific) lowering entry point
|
||||
/*!
|
||||
This is where LLVM's DAG selection process calls to do target-specific
|
||||
|
@ -2779,6 +2883,7 @@ SPUTargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG)
|
|||
abort();
|
||||
}
|
||||
case ISD::LOAD:
|
||||
case ISD::EXTLOAD:
|
||||
case ISD::SEXTLOAD:
|
||||
case ISD::ZEXTLOAD:
|
||||
return LowerLOAD(Op, DAG, SPUTM.getSubtargetImpl());
|
||||
|
@ -2865,6 +2970,9 @@ SPUTargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG)
|
|||
|
||||
case ISD::SELECT_CC:
|
||||
return LowerSELECT_CC(Op, DAG);
|
||||
|
||||
case ISD::TRUNCATE:
|
||||
return LowerTRUNCATE(Op, DAG);
|
||||
}
|
||||
|
||||
return SDValue();
|
||||
|
|
|
@ -1371,13 +1371,6 @@ multiclass BitwiseOrByteImm
|
|||
|
||||
defm ORBI : BitwiseOrByteImm;
|
||||
|
||||
// Truncate i16 -> i8
|
||||
def ORBItrunc : ORBIInst<(outs R8C:$rT), (ins R16C:$rA, u10imm:$val),
|
||||
[/* empty */]>;
|
||||
|
||||
def : Pat<(trunc R16C:$rSrc),
|
||||
(ORBItrunc R16C:$rSrc, 0)>;
|
||||
|
||||
// OR halfword immediate
|
||||
class ORHIInst<dag OOL, dag IOL, list<dag> pattern>:
|
||||
RI10Form<0b10100000, OOL, IOL, "orhi\t$rT, $rA, $val",
|
||||
|
@ -1403,13 +1396,6 @@ multiclass BitwiseOrHalfwordImm
|
|||
|
||||
defm ORHI : BitwiseOrHalfwordImm;
|
||||
|
||||
// Truncate i32 -> i16
|
||||
def ORHItrunc : ORHIInst<(outs R16C:$rT), (ins R32C:$rA, u10imm:$val),
|
||||
[/* empty */]>;
|
||||
|
||||
def : Pat<(trunc R32C:$rSrc),
|
||||
(ORHItrunc R32C:$rSrc, 0)>;
|
||||
|
||||
class ORIInst<dag OOL, dag IOL, list<dag> pattern>:
|
||||
RI10Form<0b00100000, OOL, IOL, "ori\t$rT, $rA, $val",
|
||||
IntegerOp, pattern>;
|
||||
|
@ -1444,13 +1430,6 @@ multiclass BitwiseOrImm
|
|||
|
||||
defm ORI : BitwiseOrImm;
|
||||
|
||||
// Truncate i64 -> i32
|
||||
def ORItrunc : ORIInst<(outs R32C:$rT), (ins R64C:$rA, u10imm_i32:$val),
|
||||
[/* empty */]>;
|
||||
|
||||
def : Pat<(trunc R64C:$rSrc),
|
||||
(ORItrunc R64C:$rSrc, 0)>;
|
||||
|
||||
// ORX: "or" across the vector: or's $rA's word slots leaving the result in
|
||||
// $rT[0], slots 1-3 are zeroed.
|
||||
//
|
||||
|
@ -2014,13 +1993,6 @@ multiclass ShiftLeftQuadBytesImm
|
|||
|
||||
defm SHLQBYI : ShiftLeftQuadBytesImm;
|
||||
|
||||
// Special form for truncating i64 to i32:
|
||||
def SHLQBYItrunc64: SHLQBYIInst<(outs R32C:$rT), (ins R64C:$rA, u7imm_i32:$val),
|
||||
[/* no pattern, see below */]>;
|
||||
|
||||
def : Pat<(trunc R64C:$rSrc),
|
||||
(SHLQBYItrunc64 R64C:$rSrc, 4)>;
|
||||
|
||||
//-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~
|
||||
// Rotate halfword:
|
||||
//-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~
|
||||
|
|
|
@ -0,0 +1,81 @@
|
|||
; RUN: llvm-as -o - %s | llc -march=cellspu > %t1.s
|
||||
; RUN: grep shufb %t1.s | count 9
|
||||
; RUN: grep {ilhu.*1799} %t1.s | count 1
|
||||
; RUN: grep {ilhu.*771} %t1.s | count 3
|
||||
; RUN: grep {ilhu.*1543} %t1.s | count 1
|
||||
; RUN: grep {ilhu.*1029} %t1.s | count 1
|
||||
; RUN: grep {ilhu.*515} %t1.s | count 1
|
||||
; RUN: grep {iohl.*1799} %t1.s | count 1
|
||||
; RUN: grep {iohl.*771} %t1.s | count 3
|
||||
; RUN: grep {iohl.*1543} %t1.s | count 2
|
||||
; RUN: grep {iohl.*515} %t1.s | count 1
|
||||
; RUN: grep xsbh %t1.s | count 6
|
||||
; RUN: grep sfh %t1.s | count 5
|
||||
|
||||
; ModuleID = 'trunc.bc'
|
||||
target datalayout = "E-p:32:32:128-i1:8:128-i8:8:128-i16:16:128-i32:32:128-i64:32:128-f32:32:128-f64:64:128-v64:64:64-v128:128:128-a0:0:128-s0:128:128"
|
||||
target triple = "spu"
|
||||
|
||||
; codegen for i128 arguments is not implemented yet on CellSPU
|
||||
; once this changes uncomment the functions below
|
||||
; and update the expected results accordingly
|
||||
|
||||
;define i8 @trunc_i128_i8(i128 %u) nounwind readnone {
|
||||
;entry:
|
||||
; %0 = trunc i128 %u to i8
|
||||
; ret i8 %0
|
||||
;}
|
||||
;define i16 @trunc_i128_i16(i128 %u) nounwind readnone {
|
||||
;entry:
|
||||
; %0 = trunc i128 %u to i16
|
||||
; ret i16 %0
|
||||
;}
|
||||
;define i32 @trunc_i128_i32(i128 %u) nounwind readnone {
|
||||
;entry:
|
||||
; %0 = trunc i128 %u to i32
|
||||
; ret i32 %0
|
||||
;}
|
||||
;define i64 @trunc_i128_i64(i128 %u) nounwind readnone {
|
||||
;entry:
|
||||
; %0 = trunc i128 %u to i64
|
||||
; ret i64 %0
|
||||
;}
|
||||
|
||||
define i8 @trunc_i64_i8(i64 %u, i8 %v) nounwind readnone {
|
||||
entry:
|
||||
%0 = trunc i64 %u to i8
|
||||
%1 = sub i8 %0, %v
|
||||
ret i8 %1
|
||||
}
|
||||
define i16 @trunc_i64_i16(i64 %u, i16 %v) nounwind readnone {
|
||||
entry:
|
||||
%0 = trunc i64 %u to i16
|
||||
%1 = sub i16 %0, %v
|
||||
ret i16 %1
|
||||
}
|
||||
define i32 @trunc_i64_i32(i64 %u, i32 %v) nounwind readnone {
|
||||
entry:
|
||||
%0 = trunc i64 %u to i32
|
||||
%1 = sub i32 %0, %v
|
||||
ret i32 %1
|
||||
}
|
||||
|
||||
define i8 @trunc_i32_i8(i32 %u, i8 %v) nounwind readnone {
|
||||
entry:
|
||||
%0 = trunc i32 %u to i8
|
||||
%1 = sub i8 %0, %v
|
||||
ret i8 %1
|
||||
}
|
||||
define i16 @trunc_i32_i16(i32 %u, i16 %v) nounwind readnone {
|
||||
entry:
|
||||
%0 = trunc i32 %u to i16
|
||||
%1 = sub i16 %0, %v
|
||||
ret i16 %1
|
||||
}
|
||||
|
||||
define i8 @trunc_i16_i8(i16 %u, i8 %v) nounwind readnone {
|
||||
entry:
|
||||
%0 = trunc i16 %u to i8
|
||||
%1 = sub i8 %0, %v
|
||||
ret i8 %1
|
||||
}
|
Loading…
Reference in New Issue