forked from OSchip/llvm-project
Allow v16i16 and v32i8 shuffles to be rewritten as narrower shuffles.
llvm-svn: 156156
This commit is contained in:
parent
fa0ebcd1b0
commit
42f2182366
|
@ -5920,10 +5920,12 @@ SDValue RewriteAsNarrowerShuffle(ShuffleVectorSDNode *SVOp,
|
|||
unsigned Scale;
|
||||
switch (VT.SimpleTy) {
|
||||
default: llvm_unreachable("Unexpected!");
|
||||
case MVT::v4f32: NewVT = MVT::v2f64; Scale = 2; break;
|
||||
case MVT::v4i32: NewVT = MVT::v2i64; Scale = 2; break;
|
||||
case MVT::v8i16: NewVT = MVT::v4i32; Scale = 2; break;
|
||||
case MVT::v16i8: NewVT = MVT::v4i32; Scale = 4; break;
|
||||
case MVT::v4f32: NewVT = MVT::v2f64; Scale = 2; break;
|
||||
case MVT::v4i32: NewVT = MVT::v2i64; Scale = 2; break;
|
||||
case MVT::v8i16: NewVT = MVT::v4i32; Scale = 2; break;
|
||||
case MVT::v16i8: NewVT = MVT::v4i32; Scale = 4; break;
|
||||
case MVT::v16i16: NewVT = MVT::v8i32; Scale = 2; break;
|
||||
case MVT::v32i8: NewVT = MVT::v8i32; Scale = 4; break;
|
||||
}
|
||||
|
||||
SmallVector<int, 8> MaskVec;
|
||||
|
@ -6370,7 +6372,8 @@ X86TargetLowering::NormalizeVectorShuffle(SDValue Op, SelectionDAG &DAG) const {
|
|||
|
||||
// If the shuffle can be profitably rewritten as a narrower shuffle, then
|
||||
// do it!
|
||||
if (VT == MVT::v8i16 || VT == MVT::v16i8) {
|
||||
if (VT == MVT::v8i16 || VT == MVT::v16i8 ||
|
||||
VT == MVT::v16i16 || VT == MVT::v32i8) {
|
||||
SDValue NewOp = RewriteAsNarrowerShuffle(SVOp, DAG, dl);
|
||||
if (NewOp.getNode())
|
||||
return DAG.getNode(ISD::BITCAST, dl, VT, NewOp);
|
||||
|
|
|
@ -202,3 +202,11 @@ define <4 x i64> @blend4(<4 x i64> %a, <4 x i64> %b) nounwind alwaysinline {
|
|||
%t = shufflevector <4 x i64> %a, <4 x i64> %b, <4 x i32> <i32 0, i32 1, i32 2, i32 7>
|
||||
ret <4 x i64> %t
|
||||
}
|
||||
|
||||
; CHECK: narrow
|
||||
; CHECK: vpermilps
|
||||
; CHECK: ret
|
||||
define <16 x i16> @narrow(<16 x i16> %a) nounwind alwaysinline {
|
||||
%t = shufflevector <16 x i16> %a, <16 x i16> undef, <16 x i32> <i32 2, i32 3, i32 undef, i32 1, i32 6, i32 7, i32 4, i32 5, i32 10, i32 11, i32 8, i32 undef, i32 14, i32 15, i32 undef, i32 undef>
|
||||
ret <16 x i16> %t
|
||||
}
|
||||
|
|
Loading…
Reference in New Issue