Teach DAG combine to fold (extract_subvec (concat v1, ..) i) to v_i

- If the extracted vector has the same type of all vectored being concatenated
  together, it should be simplified directly into v_i, where i is the index of
  the element being extracted.

llvm-svn: 166125
This commit is contained in:
Michael Liao 2012-10-17 20:48:33 +00:00
parent 7a9f0c09de
commit 7a442c8031
2 changed files with 34 additions and 2 deletions

View File

@ -8610,8 +8610,8 @@ SDValue DAGCombiner::visitEXTRACT_SUBVECTOR(SDNode* N) {
return SDValue();
// Only handle cases where both indexes are constants with the same type.
ConstantSDNode *InsIdx = dyn_cast<ConstantSDNode>(N->getOperand(1));
ConstantSDNode *ExtIdx = dyn_cast<ConstantSDNode>(V->getOperand(2));
ConstantSDNode *ExtIdx = dyn_cast<ConstantSDNode>(N->getOperand(1));
ConstantSDNode *InsIdx = dyn_cast<ConstantSDNode>(V->getOperand(2));
if (InsIdx && ExtIdx &&
InsIdx->getValueType(0).getSizeInBits() <= 64 &&
@ -8628,6 +8628,21 @@ SDValue DAGCombiner::visitEXTRACT_SUBVECTOR(SDNode* N) {
}
}
if (V->getOpcode() == ISD::CONCAT_VECTORS) {
// Combine:
// (extract_subvec (concat V1, V2, ...), i)
// Into:
// Vi if possible
for (unsigned i = 0, e = V->getNumOperands(); i != e; ++i)
if (V->getOperand(i).getValueType() != NVT)
return SDValue();
unsigned Idx = dyn_cast<ConstantSDNode>(N->getOperand(1))->getZExtValue();
unsigned NumElems = NVT.getVectorNumElements();
assert((Idx % NumElems) == 0 &&
"IDX in concat is not a multiple of the result vector length.");
return V->getOperand(Idx / NumElems);
}
return SDValue();
}

View File

@ -0,0 +1,17 @@
; RUN: llc < %s -mcpu=corei7 -mtriple=x86_64-unknown-linux-gnu | FileCheck %s
define void @foo(<4 x float> %in, <4 x i8>* %out) {
%t0 = fptoui <4 x float> %in to <4 x i32>
%t1 = trunc <4 x i32> %t0 to <4 x i16>
%t2 = shufflevector <4 x i16> %t1, <4 x i16> undef, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
%t3 = trunc <8 x i16> %t2 to <8 x i8>
%t4 = shufflevector <8 x i8> %t3, <8 x i8> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
%t5 = insertelement <4 x i8> %t4, i8 -1, i32 3
store <4 x i8> %t5, <4 x i8>* %out
ret void
; CHECK: foo
; CHECK: cvttps2dq
; CHECK-NOT: pextrd
; CHECK: pshufb
; CHECK: ret
}