forked from OSchip/llvm-project
[DAGCombiner] allow undef shuffle operands when eliminating bitcasts (PR34111)
As noted in the FIXME, this could be improved more, but this is the smallest fix that helps: https://bugs.llvm.org/show_bug.cgi?id=34111 llvm-svn: 311853
This commit is contained in:
parent
4e4ba615b2
commit
a7a61d9768
|
@ -8863,12 +8863,15 @@ SDValue DAGCombiner::visitBITCAST(SDNode *N) {
|
|||
if (Op.getOpcode() == ISD::BITCAST &&
|
||||
Op.getOperand(0).getValueType() == VT)
|
||||
return SDValue(Op.getOperand(0));
|
||||
if (ISD::isBuildVectorOfConstantSDNodes(Op.getNode()) ||
|
||||
if (Op.isUndef() || ISD::isBuildVectorOfConstantSDNodes(Op.getNode()) ||
|
||||
ISD::isBuildVectorOfConstantFPSDNodes(Op.getNode()))
|
||||
return DAG.getBitcast(VT, Op);
|
||||
return SDValue();
|
||||
};
|
||||
|
||||
// FIXME: If either input vector is bitcast, try to convert the shuffle to
|
||||
// the result type of this bitcast. This would eliminate at least one
|
||||
// bitcast. See the transform in InstCombine.
|
||||
SDValue SV0 = PeekThroughBitcast(N0->getOperand(0));
|
||||
SDValue SV1 = PeekThroughBitcast(N0->getOperand(1));
|
||||
if (!(SV0 && SV1))
|
||||
|
|
|
@ -401,18 +401,13 @@ define <2 x float> @haddps_v2f32(<4 x float> %v0) {
|
|||
define <4 x float> @PR34111(<4 x float> %a) {
|
||||
; SSE3-LABEL: PR34111:
|
||||
; SSE3: # BB#0:
|
||||
; SSE3-NEXT: movaps %xmm0, %xmm1
|
||||
; SSE3-NEXT: shufps {{.*#+}} xmm1 = xmm1[0,2,2,3]
|
||||
; SSE3-NEXT: shufps {{.*#+}} xmm0 = xmm0[1,3,2,3]
|
||||
; SSE3-NEXT: addps %xmm1, %xmm0
|
||||
; SSE3-NEXT: haddps %xmm0, %xmm0
|
||||
; SSE3-NEXT: movddup {{.*#+}} xmm0 = xmm0[0,0]
|
||||
; SSE3-NEXT: retq
|
||||
;
|
||||
; AVX-LABEL: PR34111:
|
||||
; AVX: # BB#0:
|
||||
; AVX-NEXT: vpermilps {{.*#+}} xmm1 = xmm0[0,2,2,3]
|
||||
; AVX-NEXT: vpermilps {{.*#+}} xmm0 = xmm0[1,3,2,3]
|
||||
; AVX-NEXT: vaddps %xmm0, %xmm1, %xmm0
|
||||
; AVX-NEXT: vhaddps %xmm0, %xmm0, %xmm0
|
||||
; AVX-NEXT: vmovddup {{.*#+}} xmm0 = xmm0[0,0]
|
||||
; AVX-NEXT: retq
|
||||
%a02 = shufflevector <4 x float> %a, <4 x float> undef, <2 x i32> <i32 0, i32 2>
|
||||
|
|
Loading…
Reference in New Issue