AArch64: fix LowerCONCAT_VECTORS for new CodeGen.

The function was making too many assumptions about its input:

1. The NEON_VDUP optimisation was far too aggressive, assuming (I
think) that the input would always be BUILD_VECTOR.

2. We were treating most unknown concats as legal (by returning Op
rather than SDValue()). I think only concats of pairs of vectors are
actually legal.

http://llvm.org/PR19094

llvm-svn: 203450
This commit is contained in:
Tim Northover 2014-03-10 09:34:07 +00:00
parent e32cd191f0
commit 2a661f3f73
2 changed files with 32 additions and 10 deletions

View File

@ -2281,19 +2281,20 @@ static SDValue LowerCONCAT_VECTORS(SDValue Op, SelectionDAG &DAG) {
// We custom lower concat_vectors with 4, 8, or 16 operands that are all the
// same operand and of type v1* using the DUP instruction.
unsigned NumOps = Op->getNumOperands();
if (NumOps != 4 && NumOps != 8 && NumOps != 16)
if (NumOps == 2) {
assert(Op.getValueType().getSizeInBits() == 128 && "unexpected concat");
return Op;
}
if (NumOps != 4 && NumOps != 8 && NumOps != 16)
return SDValue();
// Must be a single value for VDUP.
bool isConstant = true;
SDValue Op0 = Op.getOperand(0);
for (unsigned i = 1; i < NumOps; ++i) {
SDValue OpN = Op.getOperand(i);
if (Op0 != OpN)
return Op;
if (!isa<ConstantSDNode>(OpN->getOperand(0)))
isConstant = false;
return SDValue();
}
// Verify the value type.
@ -2302,22 +2303,22 @@ static SDValue LowerCONCAT_VECTORS(SDValue Op, SelectionDAG &DAG) {
default: llvm_unreachable("Unexpected number of operands");
case 4:
if (EltVT != MVT::v1i16 && EltVT != MVT::v1i32)
return Op;
return SDValue();
break;
case 8:
if (EltVT != MVT::v1i8 && EltVT != MVT::v1i16)
return Op;
return SDValue();
break;
case 16:
if (EltVT != MVT::v1i8)
return Op;
return SDValue();
break;
}
SDLoc DL(Op);
EVT VT = Op.getValueType();
// VDUP produces better code for constants.
if (isConstant)
if (Op0->getOpcode() == ISD::BUILD_VECTOR)
return DAG.getNode(AArch64ISD::NEON_VDUP, DL, VT, Op0->getOperand(0));
return DAG.getNode(AArch64ISD::NEON_VDUPLANE, DL, VT, Op0,
DAG.getConstant(0, MVT::i64));

View File

@ -45,3 +45,24 @@ for.body130.us.us: ; preds = %for.body130.us.us,
br label %for.body130.us.us
}
declare <1 x i16> @llvm.aarch64.neon.vuqrshrn.v1i16(<1 x i32>, i32)
define <8 x i16> @test_splat(i32 %l) nounwind {
; CHECK-LABEL: test_splat:
; CHECK: ret
%lhs = insertelement <1 x i32> undef, i32 %l, i32 0
%shift = tail call <1 x i16> @llvm.aarch64.neon.vuqrshrn.v1i16(<1 x i32> %lhs, i32 11)
%vec = shufflevector <1 x i16> %shift, <1 x i16> undef, <8 x i32> zeroinitializer
ret <8 x i16> %vec
}
define <8 x i16> @test_notsplat(<8 x i16> %a, <8 x i16> %b, i32 %l) nounwind {
; CHECK-LABEL: test_notsplat:
; CHECK: ret
entry:
%lhs = insertelement <1 x i32> undef, i32 %l, i32 0
%shift = tail call <1 x i16> @llvm.aarch64.neon.vuqrshrn.v1i16(<1 x i32> %lhs, i32 11)
%vec = shufflevector <1 x i16> %shift, <1 x i16> undef, <8 x i32> <i32 undef, i32 undef, i32 undef, i32 undef, i32 0, i32 0, i32 0, i32 0>
ret <8 x i16> %vec
}