forked from OSchip/llvm-project
[X86][FP16] Replace vXi16 to vXf16 instead of v8f16
Fixes pr52561 Reviewed By: LuoYuanke Differential Revision: https://reviews.llvm.org/D114304
This commit is contained in:
parent
203f29b40c
commit
f37d9b4112
|
@ -36249,9 +36249,10 @@ static bool matchUnaryShuffle(MVT MaskVT, ArrayRef<int> Mask,
|
|||
(V1.getOpcode() == ISD::SCALAR_TO_VECTOR &&
|
||||
isUndefOrZeroInRange(Mask, 1, NumMaskElts - 1))) {
|
||||
Shuffle = X86ISD::VZEXT_MOVL;
|
||||
SrcVT = DstVT = MaskEltSize == 16 ? MVT::v8f16
|
||||
: !Subtarget.hasSSE2() ? MVT::v4f32
|
||||
: MaskVT;
|
||||
if (MaskEltSize == 16)
|
||||
SrcVT = DstVT = MaskVT.changeVectorElementType(MVT::f16);
|
||||
else
|
||||
SrcVT = DstVT = !Subtarget.hasSSE2() ? MVT::v4f32 : MaskVT;
|
||||
return true;
|
||||
}
|
||||
}
|
||||
|
@ -36300,9 +36301,10 @@ static bool matchUnaryShuffle(MVT MaskVT, ArrayRef<int> Mask,
|
|||
isUndefOrEqual(Mask[0], 0) &&
|
||||
isUndefOrZeroInRange(Mask, 1, NumMaskElts - 1)) {
|
||||
Shuffle = X86ISD::VZEXT_MOVL;
|
||||
SrcVT = DstVT = MaskEltSize == 16 ? MVT::v8f16
|
||||
: !Subtarget.hasSSE2() ? MVT::v4f32
|
||||
: MaskVT;
|
||||
if (MaskEltSize == 16)
|
||||
SrcVT = DstVT = MaskVT.changeVectorElementType(MVT::f16);
|
||||
else
|
||||
SrcVT = DstVT = !Subtarget.hasSSE2() ? MVT::v4f32 : MaskVT;
|
||||
return true;
|
||||
}
|
||||
|
||||
|
|
|
@ -2025,3 +2025,39 @@ for.body.preheader: ; preds = %entry
|
|||
for.end: ; preds = %for.body.preheader, %entry
|
||||
ret void
|
||||
}
|
||||
|
||||
define <16 x i32> @pr52561(<16 x i32> %a, <16 x i32> %b) "min-legal-vector-width"="256" "prefer-vector-width"="256" nounwind {
|
||||
; X64-LABEL: pr52561:
|
||||
; X64: # %bb.0:
|
||||
; X64-NEXT: vpbroadcastd {{.*#+}} ymm4 = [112,112,112,112,112,112,112,112]
|
||||
; X64-NEXT: vpaddd %ymm4, %ymm2, %ymm2
|
||||
; X64-NEXT: vpaddd %ymm2, %ymm0, %ymm0
|
||||
; X64-NEXT: vpaddd %ymm4, %ymm3, %ymm2
|
||||
; X64-NEXT: vpaddd %ymm2, %ymm1, %ymm1
|
||||
; X64-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm1, %ymm1
|
||||
; X64-NEXT: vpxor %xmm2, %xmm2, %xmm2
|
||||
; X64-NEXT: vmovsh %xmm0, %xmm2, %xmm0
|
||||
; X64-NEXT: retq
|
||||
;
|
||||
; X86-LABEL: pr52561:
|
||||
; X86: # %bb.0:
|
||||
; X86-NEXT: pushl %ebp
|
||||
; X86-NEXT: movl %esp, %ebp
|
||||
; X86-NEXT: andl $-32, %esp
|
||||
; X86-NEXT: subl $32, %esp
|
||||
; X86-NEXT: vpaddd 8(%ebp), %ymm1, %ymm1
|
||||
; X86-NEXT: vpbroadcastd {{.*#+}} ymm3 = [112,112,112,112,112,112,112,112]
|
||||
; X86-NEXT: vpaddd %ymm3, %ymm2, %ymm2
|
||||
; X86-NEXT: vpaddd %ymm2, %ymm0, %ymm0
|
||||
; X86-NEXT: vpaddd %ymm3, %ymm1, %ymm1
|
||||
; X86-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}, %ymm1, %ymm1
|
||||
; X86-NEXT: vpxor %xmm2, %xmm2, %xmm2
|
||||
; X86-NEXT: vmovsh %xmm0, %xmm2, %xmm0
|
||||
; X86-NEXT: movl %ebp, %esp
|
||||
; X86-NEXT: popl %ebp
|
||||
; X86-NEXT: retl
|
||||
%1 = add <16 x i32> %a, <i32 112, i32 112, i32 112, i32 112, i32 112, i32 112, i32 112, i32 112, i32 112, i32 112, i32 112, i32 112, i32 112, i32 112, i32 112, i32 112>
|
||||
%2 = add <16 x i32> %1, %b
|
||||
%3 = and <16 x i32> %2, <i32 65535, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 65535>
|
||||
ret <16 x i32> %3
|
||||
}
|
||||
|
|
Loading…
Reference in New Issue