[X86][AVX] Fix failed broadcast fold

Strip excess BITCASTs from EXTRACT_SUBVECTOR input

llvm-svn: 320930
This commit is contained in:
Simon Pilgrim 2017-12-16 22:57:17 +00:00
parent 68d7f9da76
commit f3b6da00f5
2 changed files with 11 additions and 19 deletions

View File

@ -10483,9 +10483,13 @@ static SDValue lowerVectorShuffleAsBroadcast(const SDLoc &DL, MVT VT,
// We only support broadcasting from 128-bit vectors to minimize the
// number of patterns we need to deal with in isel. So extract down to
// 128-bits.
if (SrcVT.getSizeInBits() > 128)
V = extract128BitVector(V, 0, DAG, DL);
// 128-bits, removing as many bitcasts as possible.
if (SrcVT.getSizeInBits() > 128) {
MVT ExtVT = MVT::getVectorVT(SrcVT.getScalarType(),
128 / SrcVT.getScalarSizeInBits());
V = extract128BitVector(peekThroughBitcasts(V), 0, DAG, DL);
V = DAG.getBitcast(ExtVT, V);
}
return DAG.getBitcast(VT, DAG.getNode(Opcode, DL, BroadcastVT, V));
}

View File

@ -526,22 +526,10 @@ define <32 x i8> @load_splat_32i8_32i8_01230123012301230123012301230123(<32 x i8
; SSE-NEXT: movdqa %xmm0, %xmm1
; SSE-NEXT: retq
;
; AVX1-LABEL: load_splat_32i8_32i8_01230123012301230123012301230123:
; AVX1: # %bb.0: # %entry
; AVX1-NEXT: vbroadcastss (%rdi), %ymm0
; AVX1-NEXT: retq
;
; AVX2-LABEL: load_splat_32i8_32i8_01230123012301230123012301230123:
; AVX2: # %bb.0: # %entry
; AVX2-NEXT: vmovaps (%rdi), %ymm0
; AVX2-NEXT: vbroadcastss %xmm0, %ymm0
; AVX2-NEXT: retq
;
; AVX512-LABEL: load_splat_32i8_32i8_01230123012301230123012301230123:
; AVX512: # %bb.0: # %entry
; AVX512-NEXT: vmovaps (%rdi), %ymm0
; AVX512-NEXT: vbroadcastss %xmm0, %ymm0
; AVX512-NEXT: retq
; AVX-LABEL: load_splat_32i8_32i8_01230123012301230123012301230123:
; AVX: # %bb.0: # %entry
; AVX-NEXT: vbroadcastss (%rdi), %ymm0
; AVX-NEXT: retq
entry:
%ld = load <32 x i8>, <32 x i8>* %ptr
%ret = shufflevector <32 x i8> %ld, <32 x i8> undef, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 0, i32 1, i32 2, i32 3, i32 0, i32 1, i32 2, i32 3, i32 0, i32 1, i32 2, i32 3, i32 0, i32 1, i32 2, i32 3, i32 0, i32 1, i32 2, i32 3, i32 0, i32 1, i32 2, i32 3, i32 0, i32 1, i32 2, i32 3>