forked from OSchip/llvm-project
Don't introduce illegal types when creating vmull operations. <rdar://11324364>
ARM BUILD_VECTORs created after type legalization cannot use i8 or i16 operands, since those types are not legal. Instead use i32 operands, which will be implicitly truncated by the BUILD_VECTOR to match the element type. llvm-svn: 155824
This commit is contained in:
parent
d5ead14365
commit
9245c93656
|
@ -4792,7 +4792,9 @@ static SDValue SkipExtension(SDNode *N, SelectionDAG &DAG) {
|
|||
for (unsigned i = 0; i != NumElts; ++i) {
|
||||
ConstantSDNode *C = cast<ConstantSDNode>(N->getOperand(i));
|
||||
const APInt &CInt = C->getAPIntValue();
|
||||
Ops.push_back(DAG.getConstant(CInt.trunc(EltSize), TruncVT));
|
||||
// Element types smaller than 32 bits are not legal, so use i32 elements.
|
||||
// The values are implicitly truncated so sext vs. zext doesn't matter.
|
||||
Ops.push_back(DAG.getConstant(CInt.zextOrTrunc(32), MVT::i32));
|
||||
}
|
||||
return DAG.getNode(ISD::BUILD_VECTOR, N->getDebugLoc(),
|
||||
MVT::getVectorVT(TruncVT, NumElts), Ops.data(), NumElts);
|
||||
|
|
|
@ -525,3 +525,77 @@ define i16 @vmullWithInconsistentExtensions(<8 x i8> %vec) {
|
|||
%3 = extractelement <8 x i16> %2, i32 0
|
||||
ret i16 %3
|
||||
}
|
||||
|
||||
; A constant build_vector created for a vmull with half-width elements must
|
||||
; not introduce illegal types. <rdar://problem/11324364>
|
||||
define void @vmull_buildvector() nounwind optsize ssp align 2 {
|
||||
; CHECK: vmull_buildvector
|
||||
entry:
|
||||
br i1 undef, label %for.end179, label %for.body.lr.ph
|
||||
|
||||
for.body.lr.ph: ; preds = %entry
|
||||
br label %for.body
|
||||
|
||||
for.cond.loopexit: ; preds = %for.body33, %for.body
|
||||
br i1 undef, label %for.end179, label %for.body
|
||||
|
||||
for.body: ; preds = %for.cond.loopexit, %for.body.lr.ph
|
||||
br i1 undef, label %for.cond.loopexit, label %for.body33.lr.ph
|
||||
|
||||
for.body33.lr.ph: ; preds = %for.body
|
||||
%.sub = select i1 undef, i32 0, i32 undef
|
||||
br label %for.body33
|
||||
|
||||
for.body33: ; preds = %for.body33, %for.body33.lr.ph
|
||||
%add45 = add i32 undef, undef
|
||||
%vld155 = tail call <16 x i8> @llvm.arm.neon.vld1.v16i8(i8* undef, i32 1)
|
||||
%0 = load i32** undef, align 4
|
||||
%shuffle.i250 = shufflevector <2 x i64> undef, <2 x i64> undef, <1 x i32> zeroinitializer
|
||||
%1 = bitcast <1 x i64> %shuffle.i250 to <8 x i8>
|
||||
%vmovl.i249 = zext <8 x i8> %1 to <8 x i16>
|
||||
%shuffle.i246 = shufflevector <2 x i64> undef, <2 x i64> undef, <1 x i32> zeroinitializer
|
||||
%shuffle.i240 = shufflevector <2 x i64> undef, <2 x i64> undef, <1 x i32> <i32 1>
|
||||
%2 = bitcast <1 x i64> %shuffle.i240 to <8 x i8>
|
||||
%3 = bitcast <16 x i8> undef to <2 x i64>
|
||||
%vmovl.i237 = zext <8 x i8> undef to <8 x i16>
|
||||
%shuffle.i234 = shufflevector <2 x i64> undef, <2 x i64> undef, <1 x i32> zeroinitializer
|
||||
%shuffle.i226 = shufflevector <2 x i64> undef, <2 x i64> undef, <1 x i32> zeroinitializer
|
||||
%vmovl.i225 = zext <8 x i8> undef to <8 x i16>
|
||||
%mul.i223 = mul <8 x i16> %vmovl.i249, %vmovl.i249
|
||||
%vshl_n = shl <8 x i16> %mul.i223, <i16 2, i16 2, i16 2, i16 2, i16 2, i16 2, i16 2, i16 2>
|
||||
%vqsub2.i216 = tail call <8 x i16> @llvm.arm.neon.vqsubu.v8i16(<8 x i16> <i16 256, i16 256, i16 256, i16 256, i16 256, i16 256, i16 256, i16 256>, <8 x i16> %vshl_n) nounwind
|
||||
%mul.i209 = mul <8 x i16> undef, <i16 80, i16 80, i16 80, i16 80, i16 80, i16 80, i16 80, i16 80>
|
||||
%vshr_n130 = lshr <8 x i16> undef, <i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8>
|
||||
%vshr_n134 = lshr <8 x i16> %mul.i209, <i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8>
|
||||
%sub.i205 = sub <8 x i16> <i16 80, i16 80, i16 80, i16 80, i16 80, i16 80, i16 80, i16 80>, %vshr_n130
|
||||
%sub.i203 = sub <8 x i16> <i16 80, i16 80, i16 80, i16 80, i16 80, i16 80, i16 80, i16 80>, %vshr_n134
|
||||
%add.i200 = add <8 x i16> %sub.i205, <i16 96, i16 96, i16 96, i16 96, i16 96, i16 96, i16 96, i16 96>
|
||||
%add.i198 = add <8 x i16> %add.i200, %sub.i203
|
||||
%mul.i194 = mul <8 x i16> %add.i198, %vmovl.i237
|
||||
%mul.i191 = mul <8 x i16> %vshr_n130, undef
|
||||
%add.i192 = add <8 x i16> %mul.i191, %mul.i194
|
||||
%mul.i187 = mul <8 x i16> %vshr_n134, undef
|
||||
%add.i188 = add <8 x i16> %mul.i187, %add.i192
|
||||
%mul.i185 = mul <8 x i16> undef, undef
|
||||
%add.i186 = add <8 x i16> %mul.i185, undef
|
||||
%vrshr_n160 = tail call <8 x i16> @llvm.arm.neon.vrshiftu.v8i16(<8 x i16> %add.i188, <8 x i16> <i16 -8, i16 -8, i16 -8, i16 -8, i16 -8, i16 -8, i16 -8, i16 -8>)
|
||||
%vrshr_n163 = tail call <8 x i16> @llvm.arm.neon.vrshiftu.v8i16(<8 x i16> %add.i186, <8 x i16> <i16 -8, i16 -8, i16 -8, i16 -8, i16 -8, i16 -8, i16 -8, i16 -8>)
|
||||
%mul.i184 = mul <8 x i16> undef, %vrshr_n160
|
||||
%mul.i181 = mul <8 x i16> undef, %vmovl.i225
|
||||
%add.i182 = add <8 x i16> %mul.i181, %mul.i184
|
||||
%vrshr_n170 = tail call <8 x i16> @llvm.arm.neon.vrshiftu.v8i16(<8 x i16> %add.i182, <8 x i16> <i16 -7, i16 -7, i16 -7, i16 -7, i16 -7, i16 -7, i16 -7, i16 -7>)
|
||||
%vqmovn1.i180 = tail call <8 x i8> @llvm.arm.neon.vqmovnu.v8i8(<8 x i16> %vrshr_n170) nounwind
|
||||
%4 = bitcast <8 x i8> %vqmovn1.i180 to <1 x i64>
|
||||
%shuffle.i = shufflevector <1 x i64> %4, <1 x i64> undef, <2 x i32> <i32 0, i32 1>
|
||||
%5 = bitcast <2 x i64> %shuffle.i to <16 x i8>
|
||||
store <16 x i8> %5, <16 x i8>* undef, align 16
|
||||
%add177 = add nsw i32 undef, 16
|
||||
br i1 undef, label %for.body33, label %for.cond.loopexit
|
||||
|
||||
for.end179: ; preds = %for.cond.loopexit, %entry
|
||||
ret void
|
||||
}
|
||||
|
||||
declare <8 x i16> @llvm.arm.neon.vrshiftu.v8i16(<8 x i16>, <8 x i16>) nounwind readnone
|
||||
declare <8 x i16> @llvm.arm.neon.vqsubu.v8i16(<8 x i16>, <8 x i16>) nounwind readnone
|
||||
declare <8 x i8> @llvm.arm.neon.vqmovnu.v8i8(<8 x i16>) nounwind readnone
|
||||
|
|
Loading…
Reference in New Issue