forked from OSchip/llvm-project
[X86] Add DAG combine to turn (vzext_movl (vbroadcast_load)) -> vzext_load.
If we're zeroing the other elements then we don't need the broadcast.
This commit is contained in:
parent
0bec7e47d0
commit
70e4fb8a53
|
@ -35965,9 +35965,30 @@ static SDValue combineShuffle(SDNode *N, SelectionDAG &DAG,
|
||||||
VT.getVectorElementType(),
|
VT.getVectorElementType(),
|
||||||
LN->getPointerInfo(),
|
LN->getPointerInfo(),
|
||||||
LN->getAlignment(),
|
LN->getAlignment(),
|
||||||
MachineMemOperand::MOLoad);
|
LN->getMemOperand()->getFlags());
|
||||||
|
DCI.CombineTo(N, VZLoad);
|
||||||
DAG.ReplaceAllUsesOfValueWith(SDValue(LN, 1), VZLoad.getValue(1));
|
DAG.ReplaceAllUsesOfValueWith(SDValue(LN, 1), VZLoad.getValue(1));
|
||||||
return VZLoad;
|
DCI.recursivelyDeleteUnusedNodes(LN);
|
||||||
|
return SDValue(N, 0);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// If this a VZEXT_MOVL of a VBROADCAST_LOAD, we don't need the broadcast and
|
||||||
|
// can just use a VZEXT_LOAD.
|
||||||
|
// FIXME: Is there some way to do this with SimplifyDemandedVectorElts?
|
||||||
|
if (N->getOpcode() == X86ISD::VZEXT_MOVL && N->getOperand(0).hasOneUse() &&
|
||||||
|
N->getOperand(0).getOpcode() == X86ISD::VBROADCAST_LOAD) {
|
||||||
|
auto *LN = cast<MemSDNode>(N->getOperand(0));
|
||||||
|
if (VT.getScalarSizeInBits() == LN->getMemoryVT().getSizeInBits()) {
|
||||||
|
SDVTList Tys = DAG.getVTList(VT, MVT::Other);
|
||||||
|
SDValue Ops[] = { LN->getChain(), LN->getBasePtr() };
|
||||||
|
SDValue VZLoad =
|
||||||
|
DAG.getMemIntrinsicNode(X86ISD::VZEXT_LOAD, dl, Tys, Ops,
|
||||||
|
LN->getMemoryVT(), LN->getMemOperand());
|
||||||
|
DCI.CombineTo(N, VZLoad);
|
||||||
|
DAG.ReplaceAllUsesOfValueWith(SDValue(LN, 1), VZLoad.getValue(1));
|
||||||
|
DCI.recursivelyDeleteUnusedNodes(LN);
|
||||||
|
return SDValue(N, 0);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
|
@ -71,17 +71,16 @@ define i64 @extract_any_extend_vector_inreg_v16i64(<16 x i64> %a0, i32 %a1) noun
|
||||||
; X32-AVX-NEXT: andl $-128, %esp
|
; X32-AVX-NEXT: andl $-128, %esp
|
||||||
; X32-AVX-NEXT: subl $384, %esp # imm = 0x180
|
; X32-AVX-NEXT: subl $384, %esp # imm = 0x180
|
||||||
; X32-AVX-NEXT: movl 40(%ebp), %ecx
|
; X32-AVX-NEXT: movl 40(%ebp), %ecx
|
||||||
; X32-AVX-NEXT: vpbroadcastq 32(%ebp), %ymm0
|
; X32-AVX-NEXT: vmovsd {{.*#+}} xmm0 = mem[0],zero
|
||||||
; X32-AVX-NEXT: vmovq {{.*#+}} xmm0 = xmm0[0],zero
|
|
||||||
; X32-AVX-NEXT: vxorps %xmm1, %xmm1, %xmm1
|
; X32-AVX-NEXT: vxorps %xmm1, %xmm1, %xmm1
|
||||||
; X32-AVX-NEXT: vmovaps %ymm1, {{[0-9]+}}(%esp)
|
; X32-AVX-NEXT: vmovaps %ymm1, {{[0-9]+}}(%esp)
|
||||||
; X32-AVX-NEXT: vmovaps %ymm1, {{[0-9]+}}(%esp)
|
; X32-AVX-NEXT: vmovaps %ymm1, {{[0-9]+}}(%esp)
|
||||||
; X32-AVX-NEXT: vmovaps %ymm1, {{[0-9]+}}(%esp)
|
; X32-AVX-NEXT: vmovaps %ymm1, {{[0-9]+}}(%esp)
|
||||||
; X32-AVX-NEXT: vmovdqa %ymm0, {{[0-9]+}}(%esp)
|
; X32-AVX-NEXT: vmovaps %ymm0, {{[0-9]+}}(%esp)
|
||||||
; X32-AVX-NEXT: vmovaps %ymm1, {{[0-9]+}}(%esp)
|
; X32-AVX-NEXT: vmovaps %ymm1, {{[0-9]+}}(%esp)
|
||||||
; X32-AVX-NEXT: vmovaps %ymm1, {{[0-9]+}}(%esp)
|
; X32-AVX-NEXT: vmovaps %ymm1, {{[0-9]+}}(%esp)
|
||||||
; X32-AVX-NEXT: vmovaps %ymm1, (%esp)
|
; X32-AVX-NEXT: vmovaps %ymm1, (%esp)
|
||||||
; X32-AVX-NEXT: vmovdqa %ymm0, {{[0-9]+}}(%esp)
|
; X32-AVX-NEXT: vmovaps %ymm0, {{[0-9]+}}(%esp)
|
||||||
; X32-AVX-NEXT: leal (%ecx,%ecx), %eax
|
; X32-AVX-NEXT: leal (%ecx,%ecx), %eax
|
||||||
; X32-AVX-NEXT: andl $31, %eax
|
; X32-AVX-NEXT: andl $31, %eax
|
||||||
; X32-AVX-NEXT: movl 128(%esp,%eax,4), %eax
|
; X32-AVX-NEXT: movl 128(%esp,%eax,4), %eax
|
||||||
|
|
Loading…
Reference in New Issue