forked from OSchip/llvm-project
[LegalizeVectorTypes] When widening the operands to a concat_vectors, see if we can use the widened operand 0 if the width matches and the other operands are undef.
This saves a conversion to extracts and build_vector. We already do this when both the result and the input need to be widened to the same type. This changed the sse-intrinsics-fast-isel test because we don't lower (insert_vector_elt (scalar_to_vector X), Y, 1) well. We turn it into (vector_shuffle (scalar_to_vector X), (scalar_to_vector Y), <0, 4, 2, 3>) losing track of the fact that the upper elts could be undef. We should probably find a way to prevent the scalarization of the <2 x f32> load on these tests. llvm-svn: 344404
This commit is contained in:
parent
05f014a684
commit
1bb0c6041a
|
@ -3794,20 +3794,31 @@ SDValue DAGTypeLegalizer::WidenVecOp_BITCAST(SDNode *N) {
|
||||||
}
|
}
|
||||||
|
|
||||||
SDValue DAGTypeLegalizer::WidenVecOp_CONCAT_VECTORS(SDNode *N) {
|
SDValue DAGTypeLegalizer::WidenVecOp_CONCAT_VECTORS(SDNode *N) {
|
||||||
// If the input vector is not legal, it is likely that we will not find a
|
|
||||||
// legal vector of the same size. Replace the concatenate vector with a
|
|
||||||
// nasty build vector.
|
|
||||||
EVT VT = N->getValueType(0);
|
EVT VT = N->getValueType(0);
|
||||||
EVT EltVT = VT.getVectorElementType();
|
EVT EltVT = VT.getVectorElementType();
|
||||||
|
EVT InVT = N->getOperand(0).getValueType();
|
||||||
SDLoc dl(N);
|
SDLoc dl(N);
|
||||||
|
|
||||||
|
// If the widen width for this operand is the same as the width of the concat
|
||||||
|
// and all but the first operand is undef, just use the widened operand.
|
||||||
|
unsigned NumOperands = N->getNumOperands();
|
||||||
|
if (VT == TLI.getTypeToTransformTo(*DAG.getContext(), InVT)) {
|
||||||
|
unsigned i;
|
||||||
|
for (i = 1; i < NumOperands; ++i)
|
||||||
|
if (!N->getOperand(i).isUndef())
|
||||||
|
break;
|
||||||
|
|
||||||
|
if (i == NumOperands)
|
||||||
|
return GetWidenedVector(N->getOperand(0));
|
||||||
|
}
|
||||||
|
|
||||||
|
// Otherwise, fall back to a nasty build vector.
|
||||||
unsigned NumElts = VT.getVectorNumElements();
|
unsigned NumElts = VT.getVectorNumElements();
|
||||||
SmallVector<SDValue, 16> Ops(NumElts);
|
SmallVector<SDValue, 16> Ops(NumElts);
|
||||||
|
|
||||||
EVT InVT = N->getOperand(0).getValueType();
|
|
||||||
unsigned NumInElts = InVT.getVectorNumElements();
|
unsigned NumInElts = InVT.getVectorNumElements();
|
||||||
|
|
||||||
unsigned Idx = 0;
|
unsigned Idx = 0;
|
||||||
unsigned NumOperands = N->getNumOperands();
|
|
||||||
for (unsigned i=0; i < NumOperands; ++i) {
|
for (unsigned i=0; i < NumOperands; ++i) {
|
||||||
SDValue InOp = N->getOperand(i);
|
SDValue InOp = N->getOperand(i);
|
||||||
assert(getTypeAction(InOp.getValueType()) ==
|
assert(getTypeAction(InOp.getValueType()) ==
|
||||||
|
|
|
@ -1320,10 +1320,10 @@ define <4 x float> @test_mm_loadh_pi(<4 x float> %a0, x86_mmx* %a1) {
|
||||||
; X86-SSE-NEXT: # xmm1 = mem[0],zero,zero,zero
|
; X86-SSE-NEXT: # xmm1 = mem[0],zero,zero,zero
|
||||||
; X86-SSE-NEXT: movss 4(%eax), %xmm2 # encoding: [0xf3,0x0f,0x10,0x50,0x04]
|
; X86-SSE-NEXT: movss 4(%eax), %xmm2 # encoding: [0xf3,0x0f,0x10,0x50,0x04]
|
||||||
; X86-SSE-NEXT: # xmm2 = mem[0],zero,zero,zero
|
; X86-SSE-NEXT: # xmm2 = mem[0],zero,zero,zero
|
||||||
; X86-SSE-NEXT: unpcklps %xmm2, %xmm1 # encoding: [0x0f,0x14,0xca]
|
; X86-SSE-NEXT: shufps $0, %xmm1, %xmm2 # encoding: [0x0f,0xc6,0xd1,0x00]
|
||||||
; X86-SSE-NEXT: # xmm1 = xmm1[0],xmm2[0],xmm1[1],xmm2[1]
|
; X86-SSE-NEXT: # xmm2 = xmm2[0,0],xmm1[0,0]
|
||||||
; X86-SSE-NEXT: movlhps %xmm1, %xmm0 # encoding: [0x0f,0x16,0xc1]
|
; X86-SSE-NEXT: shufps $36, %xmm2, %xmm0 # encoding: [0x0f,0xc6,0xc2,0x24]
|
||||||
; X86-SSE-NEXT: # xmm0 = xmm0[0],xmm1[0]
|
; X86-SSE-NEXT: # xmm0 = xmm0[0,1],xmm2[2,0]
|
||||||
; X86-SSE-NEXT: retl # encoding: [0xc3]
|
; X86-SSE-NEXT: retl # encoding: [0xc3]
|
||||||
;
|
;
|
||||||
; X86-AVX1-LABEL: test_mm_loadh_pi:
|
; X86-AVX1-LABEL: test_mm_loadh_pi:
|
||||||
|
@ -1378,14 +1378,14 @@ define <4 x float> @test_mm_loadl_pi(<4 x float> %a0, x86_mmx* %a1) {
|
||||||
; X86-SSE-LABEL: test_mm_loadl_pi:
|
; X86-SSE-LABEL: test_mm_loadl_pi:
|
||||||
; X86-SSE: # %bb.0:
|
; X86-SSE: # %bb.0:
|
||||||
; X86-SSE-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
|
; X86-SSE-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
|
||||||
; X86-SSE-NEXT: movss (%eax), %xmm1 # encoding: [0xf3,0x0f,0x10,0x08]
|
; X86-SSE-NEXT: movss (%eax), %xmm2 # encoding: [0xf3,0x0f,0x10,0x10]
|
||||||
; X86-SSE-NEXT: # xmm1 = mem[0],zero,zero,zero
|
|
||||||
; X86-SSE-NEXT: movss 4(%eax), %xmm2 # encoding: [0xf3,0x0f,0x10,0x50,0x04]
|
|
||||||
; X86-SSE-NEXT: # xmm2 = mem[0],zero,zero,zero
|
; X86-SSE-NEXT: # xmm2 = mem[0],zero,zero,zero
|
||||||
; X86-SSE-NEXT: unpcklps %xmm2, %xmm1 # encoding: [0x0f,0x14,0xca]
|
; X86-SSE-NEXT: movss 4(%eax), %xmm1 # encoding: [0xf3,0x0f,0x10,0x48,0x04]
|
||||||
; X86-SSE-NEXT: # xmm1 = xmm1[0],xmm2[0],xmm1[1],xmm2[1]
|
; X86-SSE-NEXT: # xmm1 = mem[0],zero,zero,zero
|
||||||
; X86-SSE-NEXT: shufps $228, %xmm0, %xmm1 # encoding: [0x0f,0xc6,0xc8,0xe4]
|
; X86-SSE-NEXT: shufps $0, %xmm2, %xmm1 # encoding: [0x0f,0xc6,0xca,0x00]
|
||||||
; X86-SSE-NEXT: # xmm1 = xmm1[0,1],xmm0[2,3]
|
; X86-SSE-NEXT: # xmm1 = xmm1[0,0],xmm2[0,0]
|
||||||
|
; X86-SSE-NEXT: shufps $226, %xmm0, %xmm1 # encoding: [0x0f,0xc6,0xc8,0xe2]
|
||||||
|
; X86-SSE-NEXT: # xmm1 = xmm1[2,0],xmm0[2,3]
|
||||||
; X86-SSE-NEXT: movaps %xmm1, %xmm0 # encoding: [0x0f,0x28,0xc1]
|
; X86-SSE-NEXT: movaps %xmm1, %xmm0 # encoding: [0x0f,0x28,0xc1]
|
||||||
; X86-SSE-NEXT: retl # encoding: [0xc3]
|
; X86-SSE-NEXT: retl # encoding: [0xc3]
|
||||||
;
|
;
|
||||||
|
|
Loading…
Reference in New Issue