forked from OSchip/llvm-project
[X86][AVX] combineExtractSubvector - 'little to big' extract_subvector(bitcast()) support
Ideally this needs to be a generic combine in DAGCombiner::visitEXTRACT_SUBVECTOR but there's some nasty regressions in aarch64 due to neon shuffles not handling bitcasts at all..... llvm-svn: 364407
This commit is contained in:
parent
e821e79fce
commit
c0711af7f9
|
@ -43630,6 +43630,34 @@ static SDValue combineExtractSubvector(SDNode *N, SelectionDAG &DAG,
|
|||
VT, SDLoc(N),
|
||||
InVec.getNode()->ops().slice(IdxVal, VT.getVectorNumElements()));
|
||||
|
||||
// Try to move vector bitcast after extract_subv by scaling extraction index:
|
||||
// extract_subv (bitcast X), Index --> bitcast (extract_subv X, Index')
|
||||
// TODO: Move this to DAGCombiner::visitEXTRACT_SUBVECTOR
|
||||
if (InVec.getOpcode() == ISD::BITCAST &&
|
||||
InVec.getOperand(0).getValueType().isVector()) {
|
||||
SDValue SrcOp = InVec.getOperand(0);
|
||||
EVT SrcVT = SrcOp.getValueType();
|
||||
unsigned SrcNumElts = SrcVT.getVectorNumElements();
|
||||
unsigned DestNumElts = InVec.getValueType().getVectorNumElements();
|
||||
if ((DestNumElts % SrcNumElts) == 0) {
|
||||
unsigned DestSrcRatio = DestNumElts / SrcNumElts;
|
||||
if ((VT.getVectorNumElements() % DestSrcRatio) == 0) {
|
||||
unsigned NewExtNumElts = VT.getVectorNumElements() / DestSrcRatio;
|
||||
EVT NewExtVT = EVT::getVectorVT(*DAG.getContext(),
|
||||
SrcVT.getScalarType(), NewExtNumElts);
|
||||
if ((N->getConstantOperandVal(1) % DestSrcRatio) == 0 &&
|
||||
TLI.isOperationLegalOrCustom(ISD::EXTRACT_SUBVECTOR, NewExtVT)) {
|
||||
unsigned IndexValScaled = N->getConstantOperandVal(1) / DestSrcRatio;
|
||||
SDLoc DL(N);
|
||||
SDValue NewIndex = DAG.getIntPtrConstant(IndexValScaled, DL);
|
||||
SDValue NewExtract = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, NewExtVT,
|
||||
SrcOp, NewIndex);
|
||||
return DAG.getBitcast(VT, NewExtract);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// If we're extracting from a broadcast then we're better off just
|
||||
// broadcasting to the smaller type directly, assuming this is the only use.
|
||||
// As its a broadcast we don't care about the extraction index.
|
||||
|
|
|
@ -382,7 +382,7 @@ define void @avg_v48i8(<48 x i8>* %a, <48 x i8>* %b) nounwind {
|
|||
; AVX2-NEXT: vpshufd {{.*#+}} xmm2 = xmm0[2,3,0,1]
|
||||
; AVX2-NEXT: vpmovzxbd {{.*#+}} ymm2 = xmm2[0],zero,zero,zero,xmm2[1],zero,zero,zero,xmm2[2],zero,zero,zero,xmm2[3],zero,zero,zero,xmm2[4],zero,zero,zero,xmm2[5],zero,zero,zero,xmm2[6],zero,zero,zero,xmm2[7],zero,zero,zero
|
||||
; AVX2-NEXT: vpmovzxbd {{.*#+}} ymm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero,xmm0[4],zero,zero,zero,xmm0[5],zero,zero,zero,xmm0[6],zero,zero,zero,xmm0[7],zero,zero,zero
|
||||
; AVX2-NEXT: vpbroadcastq 24(%rdi), %ymm3
|
||||
; AVX2-NEXT: vpbroadcastq 24(%rdi), %xmm3
|
||||
; AVX2-NEXT: vpmovzxbd {{.*#+}} ymm3 = xmm3[0],zero,zero,zero,xmm3[1],zero,zero,zero,xmm3[2],zero,zero,zero,xmm3[3],zero,zero,zero,xmm3[4],zero,zero,zero,xmm3[5],zero,zero,zero,xmm3[6],zero,zero,zero,xmm3[7],zero,zero,zero
|
||||
; AVX2-NEXT: vpmovzxbd {{.*#+}} ymm4 = mem[0],zero,zero,zero,mem[1],zero,zero,zero,mem[2],zero,zero,zero,mem[3],zero,zero,zero,mem[4],zero,zero,zero,mem[5],zero,zero,zero,mem[6],zero,zero,zero,mem[7],zero,zero,zero
|
||||
; AVX2-NEXT: vpshufd {{.*#+}} xmm5 = xmm1[2,3,0,1]
|
||||
|
@ -395,7 +395,7 @@ define void @avg_v48i8(<48 x i8>* %a, <48 x i8>* %b) nounwind {
|
|||
; AVX2-NEXT: vpaddd %ymm1, %ymm2, %ymm1
|
||||
; AVX2-NEXT: vpmovzxbd {{.*#+}} ymm2 = xmm6[0],zero,zero,zero,xmm6[1],zero,zero,zero,xmm6[2],zero,zero,zero,xmm6[3],zero,zero,zero,xmm6[4],zero,zero,zero,xmm6[5],zero,zero,zero,xmm6[6],zero,zero,zero,xmm6[7],zero,zero,zero
|
||||
; AVX2-NEXT: vpaddd %ymm2, %ymm0, %ymm0
|
||||
; AVX2-NEXT: vpbroadcastq 24(%rsi), %ymm2
|
||||
; AVX2-NEXT: vpbroadcastq 24(%rsi), %xmm2
|
||||
; AVX2-NEXT: vpmovzxbd {{.*#+}} ymm2 = xmm2[0],zero,zero,zero,xmm2[1],zero,zero,zero,xmm2[2],zero,zero,zero,xmm2[3],zero,zero,zero,xmm2[4],zero,zero,zero,xmm2[5],zero,zero,zero,xmm2[6],zero,zero,zero,xmm2[7],zero,zero,zero
|
||||
; AVX2-NEXT: vpaddd %ymm2, %ymm3, %ymm2
|
||||
; AVX2-NEXT: vpmovzxbd {{.*#+}} ymm3 = mem[0],zero,zero,zero,mem[1],zero,zero,zero,mem[2],zero,zero,zero,mem[3],zero,zero,zero,mem[4],zero,zero,zero,mem[5],zero,zero,zero,mem[6],zero,zero,zero,mem[7],zero,zero,zero
|
||||
|
|
|
@ -472,7 +472,7 @@ define i64 @test_reduce_v4i64(<4 x i64> %a0) {
|
|||
; X86-AVX2-NEXT: vblendvpd %ymm2, %ymm0, %ymm1, %ymm0
|
||||
; X86-AVX2-NEXT: vpermilps {{.*#+}} xmm1 = xmm0[2,3,0,1]
|
||||
; X86-AVX2-NEXT: vpcmpgtq %ymm1, %ymm0, %ymm2
|
||||
; X86-AVX2-NEXT: vblendvpd %ymm2, %ymm0, %ymm1, %ymm0
|
||||
; X86-AVX2-NEXT: vblendvpd %xmm2, %xmm0, %xmm1, %xmm0
|
||||
; X86-AVX2-NEXT: vmovd %xmm0, %eax
|
||||
; X86-AVX2-NEXT: vpextrd $1, %xmm0, %edx
|
||||
; X86-AVX2-NEXT: vzeroupper
|
||||
|
@ -1139,7 +1139,7 @@ define i64 @test_reduce_v8i64(<8 x i64> %a0) {
|
|||
; X86-AVX2-NEXT: vblendvpd %ymm2, %ymm0, %ymm1, %ymm0
|
||||
; X86-AVX2-NEXT: vpermilps {{.*#+}} xmm1 = xmm0[2,3,0,1]
|
||||
; X86-AVX2-NEXT: vpcmpgtq %ymm1, %ymm0, %ymm2
|
||||
; X86-AVX2-NEXT: vblendvpd %ymm2, %ymm0, %ymm1, %ymm0
|
||||
; X86-AVX2-NEXT: vblendvpd %xmm2, %xmm0, %xmm1, %xmm0
|
||||
; X86-AVX2-NEXT: vmovd %xmm0, %eax
|
||||
; X86-AVX2-NEXT: vpextrd $1, %xmm0, %edx
|
||||
; X86-AVX2-NEXT: vzeroupper
|
||||
|
|
|
@ -475,7 +475,7 @@ define i64 @test_reduce_v4i64(<4 x i64> %a0) {
|
|||
; X86-AVX2-NEXT: vblendvpd %ymm2, %ymm0, %ymm1, %ymm0
|
||||
; X86-AVX2-NEXT: vpermilps {{.*#+}} xmm1 = xmm0[2,3,0,1]
|
||||
; X86-AVX2-NEXT: vpcmpgtq %ymm0, %ymm1, %ymm2
|
||||
; X86-AVX2-NEXT: vblendvpd %ymm2, %ymm0, %ymm1, %ymm0
|
||||
; X86-AVX2-NEXT: vblendvpd %xmm2, %xmm0, %xmm1, %xmm0
|
||||
; X86-AVX2-NEXT: vmovd %xmm0, %eax
|
||||
; X86-AVX2-NEXT: vpextrd $1, %xmm0, %edx
|
||||
; X86-AVX2-NEXT: vzeroupper
|
||||
|
@ -1143,7 +1143,7 @@ define i64 @test_reduce_v8i64(<8 x i64> %a0) {
|
|||
; X86-AVX2-NEXT: vblendvpd %ymm2, %ymm0, %ymm1, %ymm0
|
||||
; X86-AVX2-NEXT: vpermilps {{.*#+}} xmm1 = xmm0[2,3,0,1]
|
||||
; X86-AVX2-NEXT: vpcmpgtq %ymm0, %ymm1, %ymm2
|
||||
; X86-AVX2-NEXT: vblendvpd %ymm2, %ymm0, %ymm1, %ymm0
|
||||
; X86-AVX2-NEXT: vblendvpd %xmm2, %xmm0, %xmm1, %xmm0
|
||||
; X86-AVX2-NEXT: vmovd %xmm0, %eax
|
||||
; X86-AVX2-NEXT: vpextrd $1, %xmm0, %edx
|
||||
; X86-AVX2-NEXT: vzeroupper
|
||||
|
|
|
@ -565,7 +565,7 @@ define i64 @test_reduce_v4i64(<4 x i64> %a0) {
|
|||
; X86-AVX2-NEXT: vxorpd %ymm2, %ymm0, %ymm3
|
||||
; X86-AVX2-NEXT: vxorpd %ymm2, %ymm1, %ymm2
|
||||
; X86-AVX2-NEXT: vpcmpgtq %ymm2, %ymm3, %ymm2
|
||||
; X86-AVX2-NEXT: vblendvpd %ymm2, %ymm0, %ymm1, %ymm0
|
||||
; X86-AVX2-NEXT: vblendvpd %xmm2, %xmm0, %xmm1, %xmm0
|
||||
; X86-AVX2-NEXT: vmovd %xmm0, %eax
|
||||
; X86-AVX2-NEXT: vpextrd $1, %xmm0, %edx
|
||||
; X86-AVX2-NEXT: vzeroupper
|
||||
|
@ -1290,7 +1290,7 @@ define i64 @test_reduce_v8i64(<8 x i64> %a0) {
|
|||
; X86-AVX2-NEXT: vxorpd %ymm2, %ymm0, %ymm3
|
||||
; X86-AVX2-NEXT: vxorpd %ymm2, %ymm1, %ymm2
|
||||
; X86-AVX2-NEXT: vpcmpgtq %ymm2, %ymm3, %ymm2
|
||||
; X86-AVX2-NEXT: vblendvpd %ymm2, %ymm0, %ymm1, %ymm0
|
||||
; X86-AVX2-NEXT: vblendvpd %xmm2, %xmm0, %xmm1, %xmm0
|
||||
; X86-AVX2-NEXT: vmovd %xmm0, %eax
|
||||
; X86-AVX2-NEXT: vpextrd $1, %xmm0, %edx
|
||||
; X86-AVX2-NEXT: vzeroupper
|
||||
|
|
|
@ -503,7 +503,7 @@ define i64 @test_reduce_v4i64(<4 x i64> %a0) {
|
|||
; X86-AVX2-NEXT: vxorpd %ymm2, %ymm0, %ymm3
|
||||
; X86-AVX2-NEXT: vxorpd %ymm2, %ymm1, %ymm2
|
||||
; X86-AVX2-NEXT: vpcmpgtq %ymm3, %ymm2, %ymm2
|
||||
; X86-AVX2-NEXT: vblendvpd %ymm2, %ymm0, %ymm1, %ymm0
|
||||
; X86-AVX2-NEXT: vblendvpd %xmm2, %xmm0, %xmm1, %xmm0
|
||||
; X86-AVX2-NEXT: vmovd %xmm0, %eax
|
||||
; X86-AVX2-NEXT: vpextrd $1, %xmm0, %edx
|
||||
; X86-AVX2-NEXT: vzeroupper
|
||||
|
@ -1192,7 +1192,7 @@ define i64 @test_reduce_v8i64(<8 x i64> %a0) {
|
|||
; X86-AVX2-NEXT: vxorpd %ymm2, %ymm0, %ymm3
|
||||
; X86-AVX2-NEXT: vxorpd %ymm2, %ymm1, %ymm2
|
||||
; X86-AVX2-NEXT: vpcmpgtq %ymm3, %ymm2, %ymm2
|
||||
; X86-AVX2-NEXT: vblendvpd %ymm2, %ymm0, %ymm1, %ymm0
|
||||
; X86-AVX2-NEXT: vblendvpd %xmm2, %xmm0, %xmm1, %xmm0
|
||||
; X86-AVX2-NEXT: vmovd %xmm0, %eax
|
||||
; X86-AVX2-NEXT: vpextrd $1, %xmm0, %edx
|
||||
; X86-AVX2-NEXT: vzeroupper
|
||||
|
|
|
@ -144,14 +144,14 @@ define i64 @test_v4i64(<4 x i64> %a0) {
|
|||
; AVX2-LABEL: test_v4i64:
|
||||
; AVX2: # %bb.0:
|
||||
; AVX2-NEXT: vextracti128 $1, %ymm0, %xmm1
|
||||
; AVX2-NEXT: vpsrlq $32, %ymm0, %ymm2
|
||||
; AVX2-NEXT: vpmuludq %ymm1, %ymm2, %ymm2
|
||||
; AVX2-NEXT: vpsrlq $32, %ymm1, %ymm3
|
||||
; AVX2-NEXT: vpmuludq %ymm3, %ymm0, %ymm3
|
||||
; AVX2-NEXT: vpaddq %ymm2, %ymm3, %ymm2
|
||||
; AVX2-NEXT: vpsllq $32, %ymm2, %ymm2
|
||||
; AVX2-NEXT: vpmuludq %ymm1, %ymm0, %ymm0
|
||||
; AVX2-NEXT: vpaddq %ymm2, %ymm0, %ymm0
|
||||
; AVX2-NEXT: vpsrlq $32, %xmm0, %xmm2
|
||||
; AVX2-NEXT: vpmuludq %xmm1, %xmm2, %xmm2
|
||||
; AVX2-NEXT: vpsrlq $32, %xmm1, %xmm3
|
||||
; AVX2-NEXT: vpmuludq %xmm3, %xmm0, %xmm3
|
||||
; AVX2-NEXT: vpaddq %xmm2, %xmm3, %xmm2
|
||||
; AVX2-NEXT: vpsllq $32, %xmm2, %xmm2
|
||||
; AVX2-NEXT: vpmuludq %xmm1, %xmm0, %xmm0
|
||||
; AVX2-NEXT: vpaddq %xmm2, %xmm0, %xmm0
|
||||
; AVX2-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[2,3,0,1]
|
||||
; AVX2-NEXT: vpsrlq $32, %xmm0, %xmm2
|
||||
; AVX2-NEXT: vpmuludq %xmm1, %xmm2, %xmm2
|
||||
|
@ -168,14 +168,14 @@ define i64 @test_v4i64(<4 x i64> %a0) {
|
|||
; AVX512BW-LABEL: test_v4i64:
|
||||
; AVX512BW: # %bb.0:
|
||||
; AVX512BW-NEXT: vextracti128 $1, %ymm0, %xmm1
|
||||
; AVX512BW-NEXT: vpsrlq $32, %ymm0, %ymm2
|
||||
; AVX512BW-NEXT: vpmuludq %ymm1, %ymm2, %ymm2
|
||||
; AVX512BW-NEXT: vpsrlq $32, %ymm1, %ymm3
|
||||
; AVX512BW-NEXT: vpmuludq %ymm3, %ymm0, %ymm3
|
||||
; AVX512BW-NEXT: vpaddq %ymm2, %ymm3, %ymm2
|
||||
; AVX512BW-NEXT: vpsllq $32, %ymm2, %ymm2
|
||||
; AVX512BW-NEXT: vpmuludq %ymm1, %ymm0, %ymm0
|
||||
; AVX512BW-NEXT: vpaddq %ymm2, %ymm0, %ymm0
|
||||
; AVX512BW-NEXT: vpsrlq $32, %xmm0, %xmm2
|
||||
; AVX512BW-NEXT: vpmuludq %xmm1, %xmm2, %xmm2
|
||||
; AVX512BW-NEXT: vpsrlq $32, %xmm1, %xmm3
|
||||
; AVX512BW-NEXT: vpmuludq %xmm3, %xmm0, %xmm3
|
||||
; AVX512BW-NEXT: vpaddq %xmm2, %xmm3, %xmm2
|
||||
; AVX512BW-NEXT: vpsllq $32, %xmm2, %xmm2
|
||||
; AVX512BW-NEXT: vpmuludq %xmm1, %xmm0, %xmm0
|
||||
; AVX512BW-NEXT: vpaddq %xmm2, %xmm0, %xmm0
|
||||
; AVX512BW-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[2,3,0,1]
|
||||
; AVX512BW-NEXT: vpsrlq $32, %xmm0, %xmm2
|
||||
; AVX512BW-NEXT: vpmuludq %xmm1, %xmm2, %xmm2
|
||||
|
@ -336,14 +336,14 @@ define i64 @test_v8i64(<8 x i64> %a0) {
|
|||
; AVX2-NEXT: vpmuludq %ymm1, %ymm0, %ymm0
|
||||
; AVX2-NEXT: vpaddq %ymm2, %ymm0, %ymm0
|
||||
; AVX2-NEXT: vextracti128 $1, %ymm0, %xmm1
|
||||
; AVX2-NEXT: vpsrlq $32, %ymm0, %ymm2
|
||||
; AVX2-NEXT: vpmuludq %ymm1, %ymm2, %ymm2
|
||||
; AVX2-NEXT: vpsrlq $32, %ymm1, %ymm3
|
||||
; AVX2-NEXT: vpmuludq %ymm3, %ymm0, %ymm3
|
||||
; AVX2-NEXT: vpaddq %ymm2, %ymm3, %ymm2
|
||||
; AVX2-NEXT: vpsllq $32, %ymm2, %ymm2
|
||||
; AVX2-NEXT: vpmuludq %ymm1, %ymm0, %ymm0
|
||||
; AVX2-NEXT: vpaddq %ymm2, %ymm0, %ymm0
|
||||
; AVX2-NEXT: vpsrlq $32, %xmm0, %xmm2
|
||||
; AVX2-NEXT: vpmuludq %xmm1, %xmm2, %xmm2
|
||||
; AVX2-NEXT: vpsrlq $32, %xmm1, %xmm3
|
||||
; AVX2-NEXT: vpmuludq %xmm3, %xmm0, %xmm3
|
||||
; AVX2-NEXT: vpaddq %xmm2, %xmm3, %xmm2
|
||||
; AVX2-NEXT: vpsllq $32, %xmm2, %xmm2
|
||||
; AVX2-NEXT: vpmuludq %xmm1, %xmm0, %xmm0
|
||||
; AVX2-NEXT: vpaddq %xmm2, %xmm0, %xmm0
|
||||
; AVX2-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[2,3,0,1]
|
||||
; AVX2-NEXT: vpsrlq $32, %xmm0, %xmm2
|
||||
; AVX2-NEXT: vpmuludq %xmm1, %xmm2, %xmm2
|
||||
|
@ -639,14 +639,14 @@ define i64 @test_v16i64(<16 x i64> %a0) {
|
|||
; AVX2-NEXT: vpmuludq %ymm1, %ymm0, %ymm0
|
||||
; AVX2-NEXT: vpaddq %ymm2, %ymm0, %ymm0
|
||||
; AVX2-NEXT: vextracti128 $1, %ymm0, %xmm1
|
||||
; AVX2-NEXT: vpsrlq $32, %ymm0, %ymm2
|
||||
; AVX2-NEXT: vpmuludq %ymm1, %ymm2, %ymm2
|
||||
; AVX2-NEXT: vpsrlq $32, %ymm1, %ymm3
|
||||
; AVX2-NEXT: vpmuludq %ymm3, %ymm0, %ymm3
|
||||
; AVX2-NEXT: vpaddq %ymm2, %ymm3, %ymm2
|
||||
; AVX2-NEXT: vpsllq $32, %ymm2, %ymm2
|
||||
; AVX2-NEXT: vpmuludq %ymm1, %ymm0, %ymm0
|
||||
; AVX2-NEXT: vpaddq %ymm2, %ymm0, %ymm0
|
||||
; AVX2-NEXT: vpsrlq $32, %xmm0, %xmm2
|
||||
; AVX2-NEXT: vpmuludq %xmm1, %xmm2, %xmm2
|
||||
; AVX2-NEXT: vpsrlq $32, %xmm1, %xmm3
|
||||
; AVX2-NEXT: vpmuludq %xmm3, %xmm0, %xmm3
|
||||
; AVX2-NEXT: vpaddq %xmm2, %xmm3, %xmm2
|
||||
; AVX2-NEXT: vpsllq $32, %xmm2, %xmm2
|
||||
; AVX2-NEXT: vpmuludq %xmm1, %xmm0, %xmm0
|
||||
; AVX2-NEXT: vpaddq %xmm2, %xmm0, %xmm0
|
||||
; AVX2-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[2,3,0,1]
|
||||
; AVX2-NEXT: vpsrlq $32, %xmm0, %xmm2
|
||||
; AVX2-NEXT: vpmuludq %xmm1, %xmm2, %xmm2
|
||||
|
|
|
@ -144,14 +144,14 @@ define i64 @test_v4i64(<4 x i64> %a0) {
|
|||
; AVX2-LABEL: test_v4i64:
|
||||
; AVX2: # %bb.0:
|
||||
; AVX2-NEXT: vextracti128 $1, %ymm0, %xmm1
|
||||
; AVX2-NEXT: vpsrlq $32, %ymm0, %ymm2
|
||||
; AVX2-NEXT: vpmuludq %ymm1, %ymm2, %ymm2
|
||||
; AVX2-NEXT: vpsrlq $32, %ymm1, %ymm3
|
||||
; AVX2-NEXT: vpmuludq %ymm3, %ymm0, %ymm3
|
||||
; AVX2-NEXT: vpaddq %ymm2, %ymm3, %ymm2
|
||||
; AVX2-NEXT: vpsllq $32, %ymm2, %ymm2
|
||||
; AVX2-NEXT: vpmuludq %ymm1, %ymm0, %ymm0
|
||||
; AVX2-NEXT: vpaddq %ymm2, %ymm0, %ymm0
|
||||
; AVX2-NEXT: vpsrlq $32, %xmm0, %xmm2
|
||||
; AVX2-NEXT: vpmuludq %xmm1, %xmm2, %xmm2
|
||||
; AVX2-NEXT: vpsrlq $32, %xmm1, %xmm3
|
||||
; AVX2-NEXT: vpmuludq %xmm3, %xmm0, %xmm3
|
||||
; AVX2-NEXT: vpaddq %xmm2, %xmm3, %xmm2
|
||||
; AVX2-NEXT: vpsllq $32, %xmm2, %xmm2
|
||||
; AVX2-NEXT: vpmuludq %xmm1, %xmm0, %xmm0
|
||||
; AVX2-NEXT: vpaddq %xmm2, %xmm0, %xmm0
|
||||
; AVX2-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[2,3,0,1]
|
||||
; AVX2-NEXT: vpsrlq $32, %xmm0, %xmm2
|
||||
; AVX2-NEXT: vpmuludq %xmm1, %xmm2, %xmm2
|
||||
|
@ -168,14 +168,14 @@ define i64 @test_v4i64(<4 x i64> %a0) {
|
|||
; AVX512BW-LABEL: test_v4i64:
|
||||
; AVX512BW: # %bb.0:
|
||||
; AVX512BW-NEXT: vextracti128 $1, %ymm0, %xmm1
|
||||
; AVX512BW-NEXT: vpsrlq $32, %ymm0, %ymm2
|
||||
; AVX512BW-NEXT: vpmuludq %ymm1, %ymm2, %ymm2
|
||||
; AVX512BW-NEXT: vpsrlq $32, %ymm1, %ymm3
|
||||
; AVX512BW-NEXT: vpmuludq %ymm3, %ymm0, %ymm3
|
||||
; AVX512BW-NEXT: vpaddq %ymm2, %ymm3, %ymm2
|
||||
; AVX512BW-NEXT: vpsllq $32, %ymm2, %ymm2
|
||||
; AVX512BW-NEXT: vpmuludq %ymm1, %ymm0, %ymm0
|
||||
; AVX512BW-NEXT: vpaddq %ymm2, %ymm0, %ymm0
|
||||
; AVX512BW-NEXT: vpsrlq $32, %xmm0, %xmm2
|
||||
; AVX512BW-NEXT: vpmuludq %xmm1, %xmm2, %xmm2
|
||||
; AVX512BW-NEXT: vpsrlq $32, %xmm1, %xmm3
|
||||
; AVX512BW-NEXT: vpmuludq %xmm3, %xmm0, %xmm3
|
||||
; AVX512BW-NEXT: vpaddq %xmm2, %xmm3, %xmm2
|
||||
; AVX512BW-NEXT: vpsllq $32, %xmm2, %xmm2
|
||||
; AVX512BW-NEXT: vpmuludq %xmm1, %xmm0, %xmm0
|
||||
; AVX512BW-NEXT: vpaddq %xmm2, %xmm0, %xmm0
|
||||
; AVX512BW-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[2,3,0,1]
|
||||
; AVX512BW-NEXT: vpsrlq $32, %xmm0, %xmm2
|
||||
; AVX512BW-NEXT: vpmuludq %xmm1, %xmm2, %xmm2
|
||||
|
@ -336,14 +336,14 @@ define i64 @test_v8i64(<8 x i64> %a0) {
|
|||
; AVX2-NEXT: vpmuludq %ymm1, %ymm0, %ymm0
|
||||
; AVX2-NEXT: vpaddq %ymm2, %ymm0, %ymm0
|
||||
; AVX2-NEXT: vextracti128 $1, %ymm0, %xmm1
|
||||
; AVX2-NEXT: vpsrlq $32, %ymm0, %ymm2
|
||||
; AVX2-NEXT: vpmuludq %ymm1, %ymm2, %ymm2
|
||||
; AVX2-NEXT: vpsrlq $32, %ymm1, %ymm3
|
||||
; AVX2-NEXT: vpmuludq %ymm3, %ymm0, %ymm3
|
||||
; AVX2-NEXT: vpaddq %ymm2, %ymm3, %ymm2
|
||||
; AVX2-NEXT: vpsllq $32, %ymm2, %ymm2
|
||||
; AVX2-NEXT: vpmuludq %ymm1, %ymm0, %ymm0
|
||||
; AVX2-NEXT: vpaddq %ymm2, %ymm0, %ymm0
|
||||
; AVX2-NEXT: vpsrlq $32, %xmm0, %xmm2
|
||||
; AVX2-NEXT: vpmuludq %xmm1, %xmm2, %xmm2
|
||||
; AVX2-NEXT: vpsrlq $32, %xmm1, %xmm3
|
||||
; AVX2-NEXT: vpmuludq %xmm3, %xmm0, %xmm3
|
||||
; AVX2-NEXT: vpaddq %xmm2, %xmm3, %xmm2
|
||||
; AVX2-NEXT: vpsllq $32, %xmm2, %xmm2
|
||||
; AVX2-NEXT: vpmuludq %xmm1, %xmm0, %xmm0
|
||||
; AVX2-NEXT: vpaddq %xmm2, %xmm0, %xmm0
|
||||
; AVX2-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[2,3,0,1]
|
||||
; AVX2-NEXT: vpsrlq $32, %xmm0, %xmm2
|
||||
; AVX2-NEXT: vpmuludq %xmm1, %xmm2, %xmm2
|
||||
|
@ -639,14 +639,14 @@ define i64 @test_v16i64(<16 x i64> %a0) {
|
|||
; AVX2-NEXT: vpmuludq %ymm1, %ymm0, %ymm0
|
||||
; AVX2-NEXT: vpaddq %ymm2, %ymm0, %ymm0
|
||||
; AVX2-NEXT: vextracti128 $1, %ymm0, %xmm1
|
||||
; AVX2-NEXT: vpsrlq $32, %ymm0, %ymm2
|
||||
; AVX2-NEXT: vpmuludq %ymm1, %ymm2, %ymm2
|
||||
; AVX2-NEXT: vpsrlq $32, %ymm1, %ymm3
|
||||
; AVX2-NEXT: vpmuludq %ymm3, %ymm0, %ymm3
|
||||
; AVX2-NEXT: vpaddq %ymm2, %ymm3, %ymm2
|
||||
; AVX2-NEXT: vpsllq $32, %ymm2, %ymm2
|
||||
; AVX2-NEXT: vpmuludq %ymm1, %ymm0, %ymm0
|
||||
; AVX2-NEXT: vpaddq %ymm2, %ymm0, %ymm0
|
||||
; AVX2-NEXT: vpsrlq $32, %xmm0, %xmm2
|
||||
; AVX2-NEXT: vpmuludq %xmm1, %xmm2, %xmm2
|
||||
; AVX2-NEXT: vpsrlq $32, %xmm1, %xmm3
|
||||
; AVX2-NEXT: vpmuludq %xmm3, %xmm0, %xmm3
|
||||
; AVX2-NEXT: vpaddq %xmm2, %xmm3, %xmm2
|
||||
; AVX2-NEXT: vpsllq $32, %xmm2, %xmm2
|
||||
; AVX2-NEXT: vpmuludq %xmm1, %xmm0, %xmm0
|
||||
; AVX2-NEXT: vpaddq %xmm2, %xmm0, %xmm0
|
||||
; AVX2-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[2,3,0,1]
|
||||
; AVX2-NEXT: vpsrlq $32, %xmm0, %xmm2
|
||||
; AVX2-NEXT: vpmuludq %xmm1, %xmm2, %xmm2
|
||||
|
|
|
@ -671,23 +671,13 @@ define i64 @vselect_any_extend_vector_inreg_crash(<8 x i8>* %x) {
|
|||
; SSE41-NEXT: movq %xmm2, %rax
|
||||
; SSE41-NEXT: retq
|
||||
;
|
||||
; AVX1-LABEL: vselect_any_extend_vector_inreg_crash:
|
||||
; AVX1: # %bb.0:
|
||||
; AVX1-NEXT: vpmovzxbw {{.*#+}} xmm0 = mem[0],zero,mem[1],zero,mem[2],zero,mem[3],zero,mem[4],zero,mem[5],zero,mem[6],zero,mem[7],zero
|
||||
; AVX1-NEXT: vpcmpeqw {{.*}}(%rip), %xmm0, %xmm0
|
||||
; AVX1-NEXT: vmovq %xmm0, %rax
|
||||
; AVX1-NEXT: andl $32768, %eax # imm = 0x8000
|
||||
; AVX1-NEXT: retq
|
||||
;
|
||||
; AVX2-LABEL: vselect_any_extend_vector_inreg_crash:
|
||||
; AVX2: # %bb.0:
|
||||
; AVX2-NEXT: vpmovzxbw {{.*#+}} xmm0 = mem[0],zero,mem[1],zero,mem[2],zero,mem[3],zero,mem[4],zero,mem[5],zero,mem[6],zero,mem[7],zero
|
||||
; AVX2-NEXT: vpcmpeqw {{.*}}(%rip), %xmm0, %xmm0
|
||||
; AVX2-NEXT: vpmovzxwq {{.*#+}} ymm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero
|
||||
; AVX2-NEXT: vmovq %xmm0, %rax
|
||||
; AVX2-NEXT: andl $32768, %eax # imm = 0x8000
|
||||
; AVX2-NEXT: vzeroupper
|
||||
; AVX2-NEXT: retq
|
||||
; AVX-LABEL: vselect_any_extend_vector_inreg_crash:
|
||||
; AVX: # %bb.0:
|
||||
; AVX-NEXT: vpmovzxbw {{.*#+}} xmm0 = mem[0],zero,mem[1],zero,mem[2],zero,mem[3],zero,mem[4],zero,mem[5],zero,mem[6],zero,mem[7],zero
|
||||
; AVX-NEXT: vpcmpeqw {{.*}}(%rip), %xmm0, %xmm0
|
||||
; AVX-NEXT: vmovq %xmm0, %rax
|
||||
; AVX-NEXT: andl $32768, %eax # imm = 0x8000
|
||||
; AVX-NEXT: retq
|
||||
0:
|
||||
%1 = load <8 x i8>, <8 x i8>* %x
|
||||
%2 = icmp eq <8 x i8> %1, <i8 49, i8 49, i8 49, i8 49, i8 49, i8 49, i8 49, i8 49>
|
||||
|
|
Loading…
Reference in New Issue