[X86] Mark ISD::FP_TO_UINT v16i8/v16i16 as Promote under AVX512 instead of legal. Fix infinite loop in op legalization when promotion requires 2 steps.

Previously we had an isel pattern to add the truncate. Instead use Promote to add the truncate to the DAG before isel.

The Promote legalization code had to be updated to prevent an infinite loop if promotion took multiple steps because it wasn't remembering the previously tried value.

llvm-svn: 319259
This commit is contained in:
Craig Topper 2017-11-28 23:56:02 +00:00
parent 3f749c2d4b
commit 88ffb5d4d5
5 changed files with 10 additions and 15 deletions

View File

@ -497,10 +497,10 @@ SDValue VectorLegalizer::PromoteFP_TO_INT(SDValue Op, bool isSigned) {
"Can't promote a vector with multiple results!");
EVT VT = Op.getValueType();
EVT NewVT;
EVT NewVT = VT;
unsigned NewOpc;
while (true) {
NewVT = VT.widenIntegerVectorElementType(*DAG.getContext());
NewVT = NewVT.widenIntegerVectorElementType(*DAG.getContext());
assert(NewVT.isSimple() && "Promoting to a non-simple vector type!");
if (TLI.isOperationLegalOrCustom(ISD::FP_TO_SINT, NewVT)) {
NewOpc = ISD::FP_TO_SINT;

View File

@ -1174,8 +1174,8 @@ X86TargetLowering::X86TargetLowering(const X86TargetMachine &TM,
setOperationAction(ISD::FP_TO_SINT, MVT::v16i32, Legal);
setOperationAction(ISD::FP_TO_UINT, MVT::v16i32, Legal);
setOperationAction(ISD::FP_TO_UINT, MVT::v16i8, Legal);
setOperationAction(ISD::FP_TO_UINT, MVT::v16i16, Legal);
setOperationAction(ISD::FP_TO_UINT, MVT::v16i8, Promote);
setOperationAction(ISD::FP_TO_UINT, MVT::v16i16, Promote);
setOperationAction(ISD::FP_TO_UINT, MVT::v8i32, Legal);
setOperationAction(ISD::FP_TO_UINT, MVT::v4i32, Legal);
setOperationAction(ISD::FP_TO_UINT, MVT::v2i32, Custom);

View File

@ -7890,11 +7890,6 @@ defm VPMOVSWB : avx512_trunc_wb<0x20, "vpmovswb", X86vtruncs,
defm VPMOVUSWB : avx512_trunc_wb<0x10, "vpmovuswb", X86vtruncus,
truncstore_us_vi8, masked_truncstore_us_vi8>;
def : Pat<(v16i16 (fp_to_uint (v16f32 VR512:$src1))),
(VPMOVDWZrr (v16i32 (VCVTTPS2UDQZrr VR512:$src1)))>, Requires<[HasAVX512]>;
def : Pat<(v16i8 (fp_to_uint (v16f32 VR512:$src1))),
(VPMOVDBZrr (v16i32 (VCVTTPS2UDQZrr VR512:$src1)))>, Requires<[HasAVX512]>;
let Predicates = [HasAVX512, NoVLX] in {
def: Pat<(v8i16 (X86vtrunc (v8i32 VR256X:$src))),
(v8i16 (EXTRACT_SUBREG

View File

@ -442,7 +442,7 @@ define <16 x i32> @f32to16ui(<16 x float> %a) nounwind {
define <16 x i8> @f32to16uc(<16 x float> %f) {
; ALL-LABEL: f32to16uc:
; ALL: # BB#0:
; ALL-NEXT: vcvttps2udq %zmm0, %zmm0
; ALL-NEXT: vcvttps2dq %zmm0, %zmm0
; ALL-NEXT: vpmovdb %zmm0, %xmm0
; ALL-NEXT: vzeroupper
; ALL-NEXT: retq
@ -453,7 +453,7 @@ define <16 x i8> @f32to16uc(<16 x float> %f) {
define <16 x i16> @f32to16us(<16 x float> %f) {
; ALL-LABEL: f32to16us:
; ALL: # BB#0:
; ALL-NEXT: vcvttps2udq %zmm0, %zmm0
; ALL-NEXT: vcvttps2dq %zmm0, %zmm0
; ALL-NEXT: vpmovdw %zmm0, %ymm0
; ALL-NEXT: retq
%res = fptoui <16 x float> %f to <16 x i16>

View File

@ -1578,14 +1578,14 @@ define <16 x i32> @f32to16ui(<16 x float> %a) nounwind {
define <16 x i8> @f32to16uc(<16 x float> %f) {
; GENERIC-LABEL: f32to16uc:
; GENERIC: # BB#0:
; GENERIC-NEXT: vcvttps2udq %zmm0, %zmm0
; GENERIC-NEXT: vcvttps2dq %zmm0, %zmm0
; GENERIC-NEXT: vpmovdb %zmm0, %xmm0
; GENERIC-NEXT: vzeroupper
; GENERIC-NEXT: retq # sched: [1:1.00]
;
; SKX-LABEL: f32to16uc:
; SKX: # BB#0:
; SKX-NEXT: vcvttps2udq %zmm0, %zmm0 # sched: [4:0.33]
; SKX-NEXT: vcvttps2dq %zmm0, %zmm0 # sched: [4:0.33]
; SKX-NEXT: vpmovdb %zmm0, %xmm0 # sched: [4:2.00]
; SKX-NEXT: vzeroupper # sched: [4:1.00]
; SKX-NEXT: retq # sched: [7:1.00]
@ -1596,13 +1596,13 @@ define <16 x i8> @f32to16uc(<16 x float> %f) {
define <16 x i16> @f32to16us(<16 x float> %f) {
; GENERIC-LABEL: f32to16us:
; GENERIC: # BB#0:
; GENERIC-NEXT: vcvttps2udq %zmm0, %zmm0
; GENERIC-NEXT: vcvttps2dq %zmm0, %zmm0
; GENERIC-NEXT: vpmovdw %zmm0, %ymm0
; GENERIC-NEXT: retq # sched: [1:1.00]
;
; SKX-LABEL: f32to16us:
; SKX: # BB#0:
; SKX-NEXT: vcvttps2udq %zmm0, %zmm0 # sched: [4:0.33]
; SKX-NEXT: vcvttps2dq %zmm0, %zmm0 # sched: [4:0.33]
; SKX-NEXT: vpmovdw %zmm0, %ymm0 # sched: [4:2.00]
; SKX-NEXT: retq # sched: [7:1.00]
%res = fptoui <16 x float> %f to <16 x i16>