[X86][FP16] Replace vXi16 to vXf16 instead of v8f16

Fixes pr52561

Reviewed By: LuoYuanke

Differential Revision: https://reviews.llvm.org/D114304
This commit is contained in:
Phoebe Wang 2021-12-05 19:17:12 +08:00 committed by Phoebe Wang
parent 203f29b40c
commit f37d9b4112
2 changed files with 44 additions and 6 deletions

View File

@ -36249,9 +36249,10 @@ static bool matchUnaryShuffle(MVT MaskVT, ArrayRef<int> Mask,
(V1.getOpcode() == ISD::SCALAR_TO_VECTOR &&
isUndefOrZeroInRange(Mask, 1, NumMaskElts - 1))) {
Shuffle = X86ISD::VZEXT_MOVL;
SrcVT = DstVT = MaskEltSize == 16 ? MVT::v8f16
: !Subtarget.hasSSE2() ? MVT::v4f32
: MaskVT;
if (MaskEltSize == 16)
SrcVT = DstVT = MaskVT.changeVectorElementType(MVT::f16);
else
SrcVT = DstVT = !Subtarget.hasSSE2() ? MVT::v4f32 : MaskVT;
return true;
}
}
@ -36300,9 +36301,10 @@ static bool matchUnaryShuffle(MVT MaskVT, ArrayRef<int> Mask,
isUndefOrEqual(Mask[0], 0) &&
isUndefOrZeroInRange(Mask, 1, NumMaskElts - 1)) {
Shuffle = X86ISD::VZEXT_MOVL;
SrcVT = DstVT = MaskEltSize == 16 ? MVT::v8f16
: !Subtarget.hasSSE2() ? MVT::v4f32
: MaskVT;
if (MaskEltSize == 16)
SrcVT = DstVT = MaskVT.changeVectorElementType(MVT::f16);
else
SrcVT = DstVT = !Subtarget.hasSSE2() ? MVT::v4f32 : MaskVT;
return true;
}

View File

@ -2025,3 +2025,39 @@ for.body.preheader: ; preds = %entry
for.end: ; preds = %for.body.preheader, %entry
ret void
}
define <16 x i32> @pr52561(<16 x i32> %a, <16 x i32> %b) "min-legal-vector-width"="256" "prefer-vector-width"="256" nounwind {
; X64-LABEL: pr52561:
; X64: # %bb.0:
; X64-NEXT: vpbroadcastd {{.*#+}} ymm4 = [112,112,112,112,112,112,112,112]
; X64-NEXT: vpaddd %ymm4, %ymm2, %ymm2
; X64-NEXT: vpaddd %ymm2, %ymm0, %ymm0
; X64-NEXT: vpaddd %ymm4, %ymm3, %ymm2
; X64-NEXT: vpaddd %ymm2, %ymm1, %ymm1
; X64-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm1, %ymm1
; X64-NEXT: vpxor %xmm2, %xmm2, %xmm2
; X64-NEXT: vmovsh %xmm0, %xmm2, %xmm0
; X64-NEXT: retq
;
; X86-LABEL: pr52561:
; X86: # %bb.0:
; X86-NEXT: pushl %ebp
; X86-NEXT: movl %esp, %ebp
; X86-NEXT: andl $-32, %esp
; X86-NEXT: subl $32, %esp
; X86-NEXT: vpaddd 8(%ebp), %ymm1, %ymm1
; X86-NEXT: vpbroadcastd {{.*#+}} ymm3 = [112,112,112,112,112,112,112,112]
; X86-NEXT: vpaddd %ymm3, %ymm2, %ymm2
; X86-NEXT: vpaddd %ymm2, %ymm0, %ymm0
; X86-NEXT: vpaddd %ymm3, %ymm1, %ymm1
; X86-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}, %ymm1, %ymm1
; X86-NEXT: vpxor %xmm2, %xmm2, %xmm2
; X86-NEXT: vmovsh %xmm0, %xmm2, %xmm0
; X86-NEXT: movl %ebp, %esp
; X86-NEXT: popl %ebp
; X86-NEXT: retl
%1 = add <16 x i32> %a, <i32 112, i32 112, i32 112, i32 112, i32 112, i32 112, i32 112, i32 112, i32 112, i32 112, i32 112, i32 112, i32 112, i32 112, i32 112, i32 112>
%2 = add <16 x i32> %1, %b
%3 = and <16 x i32> %2, <i32 65535, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 65535>
ret <16 x i32> %3
}