forked from OSchip/llvm-project
SelectionDAG: Optimize expansion of vec_type = BITCAST scalar_type
The legalizer can now do this type of expansion for more type combinations without loading and storing to and from the stack. NOTE: This is a candidate for the 3.4 branch. llvm-svn: 195398
This commit is contained in:
parent
9cbd2c5581
commit
06c67bcbe4
|
@ -731,6 +731,12 @@ private:
|
|||
GetExpandedFloat(Op, Lo, Hi);
|
||||
}
|
||||
|
||||
|
||||
/// This function will split the integer \p Op into \p NumElements
|
||||
/// operations of type \p EltVT and store them in \p Ops.
|
||||
void IntegerToVector(SDValue Op, unsigned NumElements,
|
||||
SmallVectorImpl<SDValue> &Ops, EVT EltVT);
|
||||
|
||||
// Generic Result Expansion.
|
||||
void ExpandRes_MERGE_VALUES (SDNode *N, unsigned ResNo,
|
||||
SDValue &Lo, SDValue &Hi);
|
||||
|
|
|
@ -306,6 +306,25 @@ void DAGTypeLegalizer::ExpandRes_VAARG(SDNode *N, SDValue &Lo, SDValue &Hi) {
|
|||
// Generic Operand Expansion.
|
||||
//===--------------------------------------------------------------------===//
|
||||
|
||||
void DAGTypeLegalizer::IntegerToVector(SDValue Op, unsigned NumElements,
|
||||
SmallVectorImpl<SDValue> &Ops,
|
||||
EVT EltVT) {
|
||||
assert(Op.getValueType().isInteger());
|
||||
SDLoc DL(Op);
|
||||
SDValue Parts[2];
|
||||
|
||||
if (NumElements > 1) {
|
||||
NumElements >>= 1;
|
||||
SplitInteger(Op, Parts[0], Parts[1]);
|
||||
if (TLI.isBigEndian())
|
||||
std::swap(Parts[0], Parts[1]);
|
||||
IntegerToVector(Parts[0], NumElements, Ops, EltVT);
|
||||
IntegerToVector(Parts[1], NumElements, Ops, EltVT);
|
||||
} else {
|
||||
Ops.push_back(DAG.getNode(ISD::BITCAST, DL, EltVT, Op));
|
||||
}
|
||||
}
|
||||
|
||||
SDValue DAGTypeLegalizer::ExpandOp_BITCAST(SDNode *N) {
|
||||
SDLoc dl(N);
|
||||
if (N->getValueType(0).isVector()) {
|
||||
|
@ -314,21 +333,27 @@ SDValue DAGTypeLegalizer::ExpandOp_BITCAST(SDNode *N) {
|
|||
// instead, but only if the new vector type is legal (otherwise there
|
||||
// is no point, and it might create expansion loops). For example, on
|
||||
// x86 this turns v1i64 = BITCAST i64 into v1i64 = BITCAST v2i32.
|
||||
//
|
||||
// FIXME: I'm not sure why we are first trying to split the input into
|
||||
// a 2 element vector, so I'm leaving it here to maintain the current
|
||||
// behavior.
|
||||
unsigned NumElts = 2;
|
||||
EVT OVT = N->getOperand(0).getValueType();
|
||||
EVT NVT = EVT::getVectorVT(*DAG.getContext(),
|
||||
TLI.getTypeToTransformTo(*DAG.getContext(), OVT),
|
||||
2);
|
||||
|
||||
if (isTypeLegal(NVT)) {
|
||||
SDValue Parts[2];
|
||||
GetExpandedOp(N->getOperand(0), Parts[0], Parts[1]);
|
||||
|
||||
if (TLI.isBigEndian())
|
||||
std::swap(Parts[0], Parts[1]);
|
||||
|
||||
SDValue Vec = DAG.getNode(ISD::BUILD_VECTOR, dl, NVT, Parts, 2);
|
||||
return DAG.getNode(ISD::BITCAST, dl, N->getValueType(0), Vec);
|
||||
NumElts);
|
||||
if (!isTypeLegal(NVT)) {
|
||||
// If we can't find a legal type by splitting the integer in half,
|
||||
// then we can use the node's value type.
|
||||
NumElts = N->getValueType(0).getVectorNumElements();
|
||||
NVT = N->getValueType(0);
|
||||
}
|
||||
|
||||
SmallVector<SDValue, 8> Ops;
|
||||
IntegerToVector(N->getOperand(0), NumElts, Ops, NVT.getVectorElementType());
|
||||
|
||||
SDValue Vec = DAG.getNode(ISD::BUILD_VECTOR, dl, NVT, &Ops[0], NumElts);
|
||||
return DAG.getNode(ISD::BITCAST, dl, N->getValueType(0), Vec);
|
||||
}
|
||||
|
||||
// Otherwise, store to a temporary and load out again as the new type.
|
||||
|
|
|
@ -0,0 +1,15 @@
|
|||
; RUN: llc < %s -march=r600 -mcpu=redwood | FileCheck %s
|
||||
; XXX: Merge this test into vselect.ll once SI supports 64-bit select.
|
||||
|
||||
; CHECK-LABEL: @test_select_v4i64
|
||||
; Make sure the vectors aren't being stored on the stack. We know they are
|
||||
; being stored on the stack if the shaders uses at leat 10 registers.
|
||||
; CHECK-NOT: {{\**}} MOV T{{[0-9][0-9]}}.X
|
||||
define void @test_select_v4i64(<4 x i64> addrspace(1)* %out, <4 x i32> %c) {
|
||||
entry:
|
||||
%cmp = icmp ne <4 x i32> %c, <i32 0, i32 0, i32 0, i32 0>
|
||||
%result = select <4 x i1> %cmp, <4 x i64> <i64 0, i64 1, i64 2, i64 3>, <4 x i64> <i64 4, i64 5, i64 6, i64 7>
|
||||
store <4 x i64> %result, <4 x i64> addrspace(1)* %out
|
||||
ret void
|
||||
}
|
||||
|
Loading…
Reference in New Issue