forked from OSchip/llvm-project
AArch64: fix LowerCONCAT_VECTORS for new CodeGen.
The function was making too many assumptions about its input: 1. The NEON_VDUP optimisation was far too aggressive, assuming (I think) that the input would always be BUILD_VECTOR. 2. We were treating most unknown concats as legal (by returning Op rather than SDValue()). I think only concats of pairs of vectors are actually legal. http://llvm.org/PR19094 llvm-svn: 203450
This commit is contained in:
parent
e32cd191f0
commit
2a661f3f73
|
@ -2281,19 +2281,20 @@ static SDValue LowerCONCAT_VECTORS(SDValue Op, SelectionDAG &DAG) {
|
|||
// We custom lower concat_vectors with 4, 8, or 16 operands that are all the
|
||||
// same operand and of type v1* using the DUP instruction.
|
||||
unsigned NumOps = Op->getNumOperands();
|
||||
if (NumOps != 4 && NumOps != 8 && NumOps != 16)
|
||||
if (NumOps == 2) {
|
||||
assert(Op.getValueType().getSizeInBits() == 128 && "unexpected concat");
|
||||
return Op;
|
||||
}
|
||||
|
||||
if (NumOps != 4 && NumOps != 8 && NumOps != 16)
|
||||
return SDValue();
|
||||
|
||||
// Must be a single value for VDUP.
|
||||
bool isConstant = true;
|
||||
SDValue Op0 = Op.getOperand(0);
|
||||
for (unsigned i = 1; i < NumOps; ++i) {
|
||||
SDValue OpN = Op.getOperand(i);
|
||||
if (Op0 != OpN)
|
||||
return Op;
|
||||
|
||||
if (!isa<ConstantSDNode>(OpN->getOperand(0)))
|
||||
isConstant = false;
|
||||
return SDValue();
|
||||
}
|
||||
|
||||
// Verify the value type.
|
||||
|
@ -2302,22 +2303,22 @@ static SDValue LowerCONCAT_VECTORS(SDValue Op, SelectionDAG &DAG) {
|
|||
default: llvm_unreachable("Unexpected number of operands");
|
||||
case 4:
|
||||
if (EltVT != MVT::v1i16 && EltVT != MVT::v1i32)
|
||||
return Op;
|
||||
return SDValue();
|
||||
break;
|
||||
case 8:
|
||||
if (EltVT != MVT::v1i8 && EltVT != MVT::v1i16)
|
||||
return Op;
|
||||
return SDValue();
|
||||
break;
|
||||
case 16:
|
||||
if (EltVT != MVT::v1i8)
|
||||
return Op;
|
||||
return SDValue();
|
||||
break;
|
||||
}
|
||||
|
||||
SDLoc DL(Op);
|
||||
EVT VT = Op.getValueType();
|
||||
// VDUP produces better code for constants.
|
||||
if (isConstant)
|
||||
if (Op0->getOpcode() == ISD::BUILD_VECTOR)
|
||||
return DAG.getNode(AArch64ISD::NEON_VDUP, DL, VT, Op0->getOperand(0));
|
||||
return DAG.getNode(AArch64ISD::NEON_VDUPLANE, DL, VT, Op0,
|
||||
DAG.getConstant(0, MVT::i64));
|
||||
|
|
|
@ -45,3 +45,24 @@ for.body130.us.us: ; preds = %for.body130.us.us,
|
|||
br label %for.body130.us.us
|
||||
}
|
||||
|
||||
declare <1 x i16> @llvm.aarch64.neon.vuqrshrn.v1i16(<1 x i32>, i32)
|
||||
|
||||
define <8 x i16> @test_splat(i32 %l) nounwind {
|
||||
; CHECK-LABEL: test_splat:
|
||||
; CHECK: ret
|
||||
%lhs = insertelement <1 x i32> undef, i32 %l, i32 0
|
||||
%shift = tail call <1 x i16> @llvm.aarch64.neon.vuqrshrn.v1i16(<1 x i32> %lhs, i32 11)
|
||||
%vec = shufflevector <1 x i16> %shift, <1 x i16> undef, <8 x i32> zeroinitializer
|
||||
ret <8 x i16> %vec
|
||||
}
|
||||
|
||||
|
||||
define <8 x i16> @test_notsplat(<8 x i16> %a, <8 x i16> %b, i32 %l) nounwind {
|
||||
; CHECK-LABEL: test_notsplat:
|
||||
; CHECK: ret
|
||||
entry:
|
||||
%lhs = insertelement <1 x i32> undef, i32 %l, i32 0
|
||||
%shift = tail call <1 x i16> @llvm.aarch64.neon.vuqrshrn.v1i16(<1 x i32> %lhs, i32 11)
|
||||
%vec = shufflevector <1 x i16> %shift, <1 x i16> undef, <8 x i32> <i32 undef, i32 undef, i32 undef, i32 undef, i32 0, i32 0, i32 0, i32 0>
|
||||
ret <8 x i16> %vec
|
||||
}
|
Loading…
Reference in New Issue