forked from OSchip/llvm-project
[X86] Use 0x9 instead of 0x1 as the immediate in some masked floor pattern. Similarly change 0x2 to 0xA for ceil.
This suppresses exceptions which is what we should be doing for ceil and floor. We already use the correct immediate in patterns without masking. llvm-svn: 360915
This commit is contained in:
parent
8779b74db1
commit
f09b9d419f
|
@ -9410,13 +9410,13 @@ multiclass avx512_masked_scalar_imm<SDNode OpNode, string OpcPrefix, SDNode Move
|
|||
}
|
||||
|
||||
defm : avx512_masked_scalar_imm<ffloor, "RNDSCALESS", X86Movss,
|
||||
v4f32x_info, fp32imm0, 0x01, HasAVX512>;
|
||||
v4f32x_info, fp32imm0, 0x09, HasAVX512>;
|
||||
defm : avx512_masked_scalar_imm<fceil, "RNDSCALESS", X86Movss,
|
||||
v4f32x_info, fp32imm0, 0x02, HasAVX512>;
|
||||
v4f32x_info, fp32imm0, 0x0A, HasAVX512>;
|
||||
defm : avx512_masked_scalar_imm<ffloor, "RNDSCALESD", X86Movsd,
|
||||
v2f64x_info, fp64imm0, 0x01, HasAVX512>;
|
||||
v2f64x_info, fp64imm0, 0x09, HasAVX512>;
|
||||
defm : avx512_masked_scalar_imm<fceil, "RNDSCALESD", X86Movsd,
|
||||
v2f64x_info, fp64imm0, 0x02, HasAVX512>;
|
||||
v2f64x_info, fp64imm0, 0x0A, HasAVX512>;
|
||||
|
||||
|
||||
//-------------------------------------------------
|
||||
|
|
|
@ -1374,7 +1374,7 @@ define <4 x float> @floor_mask_ss(<4 x float> %x, <4 x float> %y, <4 x float> %w
|
|||
; AVX512-LABEL: floor_mask_ss:
|
||||
; AVX512: ## %bb.0:
|
||||
; AVX512-NEXT: kmovw %edi, %k1
|
||||
; AVX512-NEXT: vrndscaless $1, %xmm0, %xmm1, %xmm2 {%k1}
|
||||
; AVX512-NEXT: vrndscaless $9, %xmm0, %xmm1, %xmm2 {%k1}
|
||||
; AVX512-NEXT: vmovaps %xmm2, %xmm0
|
||||
; AVX512-NEXT: retq
|
||||
%mask = and i8 %k, 1
|
||||
|
@ -1415,7 +1415,7 @@ define <4 x float> @floor_maskz_ss(<4 x float> %x, <4 x float> %y, i8 %k) nounwi
|
|||
; AVX512-LABEL: floor_maskz_ss:
|
||||
; AVX512: ## %bb.0:
|
||||
; AVX512-NEXT: kmovw %edi, %k1
|
||||
; AVX512-NEXT: vrndscaless $1, %xmm0, %xmm1, %xmm0 {%k1} {z}
|
||||
; AVX512-NEXT: vrndscaless $9, %xmm0, %xmm1, %xmm0 {%k1} {z}
|
||||
; AVX512-NEXT: retq
|
||||
%mask = and i8 %k, 1
|
||||
%nmask = icmp eq i8 %mask, 0
|
||||
|
@ -1452,7 +1452,7 @@ define <2 x double> @floor_mask_sd(<2 x double> %x, <2 x double> %y, <2 x double
|
|||
; AVX512-LABEL: floor_mask_sd:
|
||||
; AVX512: ## %bb.0:
|
||||
; AVX512-NEXT: kmovw %edi, %k1
|
||||
; AVX512-NEXT: vrndscalesd $1, %xmm0, %xmm1, %xmm2 {%k1}
|
||||
; AVX512-NEXT: vrndscalesd $9, %xmm0, %xmm1, %xmm2 {%k1}
|
||||
; AVX512-NEXT: vmovapd %xmm2, %xmm0
|
||||
; AVX512-NEXT: retq
|
||||
%mask = and i8 %k, 1
|
||||
|
@ -1493,7 +1493,7 @@ define <2 x double> @floor_maskz_sd(<2 x double> %x, <2 x double> %y, i8 %k) nou
|
|||
; AVX512-LABEL: floor_maskz_sd:
|
||||
; AVX512: ## %bb.0:
|
||||
; AVX512-NEXT: kmovw %edi, %k1
|
||||
; AVX512-NEXT: vrndscalesd $1, %xmm0, %xmm1, %xmm0 {%k1} {z}
|
||||
; AVX512-NEXT: vrndscalesd $9, %xmm0, %xmm1, %xmm0 {%k1} {z}
|
||||
; AVX512-NEXT: retq
|
||||
%mask = and i8 %k, 1
|
||||
%nmask = icmp eq i8 %mask, 0
|
||||
|
@ -1530,7 +1530,7 @@ define <4 x float> @floor_mask_ss_trunc(<4 x float> %x, <4 x float> %y, <4 x flo
|
|||
; AVX512-LABEL: floor_mask_ss_trunc:
|
||||
; AVX512: ## %bb.0:
|
||||
; AVX512-NEXT: kmovw %edi, %k1
|
||||
; AVX512-NEXT: vrndscaless $1, %xmm0, %xmm1, %xmm2 {%k1}
|
||||
; AVX512-NEXT: vrndscaless $9, %xmm0, %xmm1, %xmm2 {%k1}
|
||||
; AVX512-NEXT: vmovaps %xmm2, %xmm0
|
||||
; AVX512-NEXT: retq
|
||||
%mask = trunc i16 %k to i1
|
||||
|
@ -1573,7 +1573,7 @@ define <4 x float> @floor_maskz_ss_trunc(<4 x float> %x, <4 x float> %y, i16 %k)
|
|||
; AVX512-LABEL: floor_maskz_ss_trunc:
|
||||
; AVX512: ## %bb.0:
|
||||
; AVX512-NEXT: kmovw %edi, %k1
|
||||
; AVX512-NEXT: vrndscaless $1, %xmm0, %xmm1, %xmm0 {%k1} {z}
|
||||
; AVX512-NEXT: vrndscaless $9, %xmm0, %xmm1, %xmm0 {%k1} {z}
|
||||
; AVX512-NEXT: retq
|
||||
%mask = trunc i16 %k to i1
|
||||
%s = extractelement <4 x float> %x, i64 0
|
||||
|
@ -1609,7 +1609,7 @@ define <2 x double> @floor_mask_sd_trunc(<2 x double> %x, <2 x double> %y, <2 x
|
|||
; AVX512-LABEL: floor_mask_sd_trunc:
|
||||
; AVX512: ## %bb.0:
|
||||
; AVX512-NEXT: kmovw %edi, %k1
|
||||
; AVX512-NEXT: vrndscalesd $1, %xmm0, %xmm1, %xmm2 {%k1}
|
||||
; AVX512-NEXT: vrndscalesd $9, %xmm0, %xmm1, %xmm2 {%k1}
|
||||
; AVX512-NEXT: vmovapd %xmm2, %xmm0
|
||||
; AVX512-NEXT: retq
|
||||
%mask = trunc i16 %k to i1
|
||||
|
@ -1652,7 +1652,7 @@ define <2 x double> @floor_maskz_sd_trunc(<2 x double> %x, <2 x double> %y, i16
|
|||
; AVX512-LABEL: floor_maskz_sd_trunc:
|
||||
; AVX512: ## %bb.0:
|
||||
; AVX512-NEXT: kmovw %edi, %k1
|
||||
; AVX512-NEXT: vrndscalesd $1, %xmm0, %xmm1, %xmm0 {%k1} {z}
|
||||
; AVX512-NEXT: vrndscalesd $9, %xmm0, %xmm1, %xmm0 {%k1} {z}
|
||||
; AVX512-NEXT: retq
|
||||
%mask = trunc i16 %k to i1
|
||||
%s = extractelement <2 x double> %x, i64 0
|
||||
|
@ -1684,7 +1684,7 @@ define <4 x float> @floor_mask_ss_mask8(<4 x float> %x, <4 x float> %y, <4 x flo
|
|||
; AVX512-LABEL: floor_mask_ss_mask8:
|
||||
; AVX512: ## %bb.0:
|
||||
; AVX512-NEXT: vcmpeqss %xmm1, %xmm0, %k1
|
||||
; AVX512-NEXT: vrndscaless $1, %xmm0, %xmm1, %xmm2 {%k1}
|
||||
; AVX512-NEXT: vrndscaless $9, %xmm0, %xmm1, %xmm2 {%k1}
|
||||
; AVX512-NEXT: vmovaps %xmm2, %xmm0
|
||||
; AVX512-NEXT: retq
|
||||
%mask1 = fcmp oeq <4 x float> %x, %y
|
||||
|
@ -1717,7 +1717,7 @@ define <4 x float> @floor_maskz_ss_mask8(<4 x float> %x, <4 x float> %y) nounwin
|
|||
; AVX512-LABEL: floor_maskz_ss_mask8:
|
||||
; AVX512: ## %bb.0:
|
||||
; AVX512-NEXT: vcmpeqss %xmm1, %xmm0, %k1
|
||||
; AVX512-NEXT: vrndscaless $1, %xmm0, %xmm1, %xmm0 {%k1} {z}
|
||||
; AVX512-NEXT: vrndscaless $9, %xmm0, %xmm1, %xmm0 {%k1} {z}
|
||||
; AVX512-NEXT: retq
|
||||
%mask1 = fcmp oeq <4 x float> %x, %y
|
||||
%mask = extractelement <4 x i1> %mask1, i64 0
|
||||
|
@ -1750,7 +1750,7 @@ define <2 x double> @floor_mask_sd_mask8(<2 x double> %x, <2 x double> %y, <2 x
|
|||
; AVX512-LABEL: floor_mask_sd_mask8:
|
||||
; AVX512: ## %bb.0:
|
||||
; AVX512-NEXT: vcmpeqsd %xmm1, %xmm0, %k1
|
||||
; AVX512-NEXT: vrndscalesd $1, %xmm0, %xmm1, %xmm2 {%k1}
|
||||
; AVX512-NEXT: vrndscalesd $9, %xmm0, %xmm1, %xmm2 {%k1}
|
||||
; AVX512-NEXT: vmovapd %xmm2, %xmm0
|
||||
; AVX512-NEXT: retq
|
||||
%mask1 = fcmp oeq <2 x double> %x, %y
|
||||
|
@ -1783,7 +1783,7 @@ define <2 x double> @floor_maskz_sd_mask8(<2 x double> %x, <2 x double> %y) noun
|
|||
; AVX512-LABEL: floor_maskz_sd_mask8:
|
||||
; AVX512: ## %bb.0:
|
||||
; AVX512-NEXT: vcmpeqsd %xmm1, %xmm0, %k1
|
||||
; AVX512-NEXT: vrndscalesd $1, %xmm0, %xmm1, %xmm0 {%k1} {z}
|
||||
; AVX512-NEXT: vrndscalesd $9, %xmm0, %xmm1, %xmm0 {%k1} {z}
|
||||
; AVX512-NEXT: retq
|
||||
%mask1 = fcmp oeq <2 x double> %x, %y
|
||||
%mask = extractelement <2 x i1> %mask1, i64 0
|
||||
|
@ -2350,7 +2350,7 @@ define <4 x float> @ceil_mask_ss(<4 x float> %x, <4 x float> %y, <4 x float> %w,
|
|||
; AVX512-LABEL: ceil_mask_ss:
|
||||
; AVX512: ## %bb.0:
|
||||
; AVX512-NEXT: kmovw %edi, %k1
|
||||
; AVX512-NEXT: vrndscaless $2, %xmm0, %xmm1, %xmm2 {%k1}
|
||||
; AVX512-NEXT: vrndscaless $10, %xmm0, %xmm1, %xmm2 {%k1}
|
||||
; AVX512-NEXT: vmovaps %xmm2, %xmm0
|
||||
; AVX512-NEXT: retq
|
||||
%mask = and i8 %k, 1
|
||||
|
@ -2391,7 +2391,7 @@ define <4 x float> @ceil_maskz_ss(<4 x float> %x, <4 x float> %y, i8 %k) nounwin
|
|||
; AVX512-LABEL: ceil_maskz_ss:
|
||||
; AVX512: ## %bb.0:
|
||||
; AVX512-NEXT: kmovw %edi, %k1
|
||||
; AVX512-NEXT: vrndscaless $2, %xmm0, %xmm1, %xmm0 {%k1} {z}
|
||||
; AVX512-NEXT: vrndscaless $10, %xmm0, %xmm1, %xmm0 {%k1} {z}
|
||||
; AVX512-NEXT: retq
|
||||
%mask = and i8 %k, 1
|
||||
%nmask = icmp eq i8 %mask, 0
|
||||
|
@ -2428,7 +2428,7 @@ define <2 x double> @ceil_mask_sd(<2 x double> %x, <2 x double> %y, <2 x double>
|
|||
; AVX512-LABEL: ceil_mask_sd:
|
||||
; AVX512: ## %bb.0:
|
||||
; AVX512-NEXT: kmovw %edi, %k1
|
||||
; AVX512-NEXT: vrndscalesd $2, %xmm0, %xmm1, %xmm2 {%k1}
|
||||
; AVX512-NEXT: vrndscalesd $10, %xmm0, %xmm1, %xmm2 {%k1}
|
||||
; AVX512-NEXT: vmovapd %xmm2, %xmm0
|
||||
; AVX512-NEXT: retq
|
||||
%mask = and i8 %k, 1
|
||||
|
@ -2469,7 +2469,7 @@ define <2 x double> @ceil_maskz_sd(<2 x double> %x, <2 x double> %y, i8 %k) noun
|
|||
; AVX512-LABEL: ceil_maskz_sd:
|
||||
; AVX512: ## %bb.0:
|
||||
; AVX512-NEXT: kmovw %edi, %k1
|
||||
; AVX512-NEXT: vrndscalesd $2, %xmm0, %xmm1, %xmm0 {%k1} {z}
|
||||
; AVX512-NEXT: vrndscalesd $10, %xmm0, %xmm1, %xmm0 {%k1} {z}
|
||||
; AVX512-NEXT: retq
|
||||
%mask = and i8 %k, 1
|
||||
%nmask = icmp eq i8 %mask, 0
|
||||
|
@ -2506,7 +2506,7 @@ define <4 x float> @ceil_mask_ss_trunc(<4 x float> %x, <4 x float> %y, <4 x floa
|
|||
; AVX512-LABEL: ceil_mask_ss_trunc:
|
||||
; AVX512: ## %bb.0:
|
||||
; AVX512-NEXT: kmovw %edi, %k1
|
||||
; AVX512-NEXT: vrndscaless $2, %xmm0, %xmm1, %xmm2 {%k1}
|
||||
; AVX512-NEXT: vrndscaless $10, %xmm0, %xmm1, %xmm2 {%k1}
|
||||
; AVX512-NEXT: vmovaps %xmm2, %xmm0
|
||||
; AVX512-NEXT: retq
|
||||
%mask = trunc i16 %k to i1
|
||||
|
@ -2549,7 +2549,7 @@ define <4 x float> @ceil_maskz_ss_trunc(<4 x float> %x, <4 x float> %y, i16 %k)
|
|||
; AVX512-LABEL: ceil_maskz_ss_trunc:
|
||||
; AVX512: ## %bb.0:
|
||||
; AVX512-NEXT: kmovw %edi, %k1
|
||||
; AVX512-NEXT: vrndscaless $2, %xmm0, %xmm1, %xmm0 {%k1} {z}
|
||||
; AVX512-NEXT: vrndscaless $10, %xmm0, %xmm1, %xmm0 {%k1} {z}
|
||||
; AVX512-NEXT: retq
|
||||
%mask = trunc i16 %k to i1
|
||||
%s = extractelement <4 x float> %x, i64 0
|
||||
|
@ -2585,7 +2585,7 @@ define <2 x double> @ceil_mask_sd_trunc(<2 x double> %x, <2 x double> %y, <2 x d
|
|||
; AVX512-LABEL: ceil_mask_sd_trunc:
|
||||
; AVX512: ## %bb.0:
|
||||
; AVX512-NEXT: kmovw %edi, %k1
|
||||
; AVX512-NEXT: vrndscalesd $2, %xmm0, %xmm1, %xmm2 {%k1}
|
||||
; AVX512-NEXT: vrndscalesd $10, %xmm0, %xmm1, %xmm2 {%k1}
|
||||
; AVX512-NEXT: vmovapd %xmm2, %xmm0
|
||||
; AVX512-NEXT: retq
|
||||
%mask = trunc i16 %k to i1
|
||||
|
@ -2628,7 +2628,7 @@ define <2 x double> @ceil_maskz_sd_trunc(<2 x double> %x, <2 x double> %y, i16 %
|
|||
; AVX512-LABEL: ceil_maskz_sd_trunc:
|
||||
; AVX512: ## %bb.0:
|
||||
; AVX512-NEXT: kmovw %edi, %k1
|
||||
; AVX512-NEXT: vrndscalesd $2, %xmm0, %xmm1, %xmm0 {%k1} {z}
|
||||
; AVX512-NEXT: vrndscalesd $10, %xmm0, %xmm1, %xmm0 {%k1} {z}
|
||||
; AVX512-NEXT: retq
|
||||
%mask = trunc i16 %k to i1
|
||||
%s = extractelement <2 x double> %x, i64 0
|
||||
|
@ -2660,7 +2660,7 @@ define <4 x float> @ceil_mask_ss_mask8(<4 x float> %x, <4 x float> %y, <4 x floa
|
|||
; AVX512-LABEL: ceil_mask_ss_mask8:
|
||||
; AVX512: ## %bb.0:
|
||||
; AVX512-NEXT: vcmpeqss %xmm1, %xmm0, %k1
|
||||
; AVX512-NEXT: vrndscaless $2, %xmm0, %xmm1, %xmm2 {%k1}
|
||||
; AVX512-NEXT: vrndscaless $10, %xmm0, %xmm1, %xmm2 {%k1}
|
||||
; AVX512-NEXT: vmovaps %xmm2, %xmm0
|
||||
; AVX512-NEXT: retq
|
||||
%mask1 = fcmp oeq <4 x float> %x, %y
|
||||
|
@ -2693,7 +2693,7 @@ define <4 x float> @ceil_maskz_ss_mask8(<4 x float> %x, <4 x float> %y) nounwind
|
|||
; AVX512-LABEL: ceil_maskz_ss_mask8:
|
||||
; AVX512: ## %bb.0:
|
||||
; AVX512-NEXT: vcmpeqss %xmm1, %xmm0, %k1
|
||||
; AVX512-NEXT: vrndscaless $2, %xmm0, %xmm1, %xmm0 {%k1} {z}
|
||||
; AVX512-NEXT: vrndscaless $10, %xmm0, %xmm1, %xmm0 {%k1} {z}
|
||||
; AVX512-NEXT: retq
|
||||
%mask1 = fcmp oeq <4 x float> %x, %y
|
||||
%mask = extractelement <4 x i1> %mask1, i64 0
|
||||
|
@ -2726,7 +2726,7 @@ define <2 x double> @ceil_mask_sd_mask8(<2 x double> %x, <2 x double> %y, <2 x d
|
|||
; AVX512-LABEL: ceil_mask_sd_mask8:
|
||||
; AVX512: ## %bb.0:
|
||||
; AVX512-NEXT: vcmpeqsd %xmm1, %xmm0, %k1
|
||||
; AVX512-NEXT: vrndscalesd $2, %xmm0, %xmm1, %xmm2 {%k1}
|
||||
; AVX512-NEXT: vrndscalesd $10, %xmm0, %xmm1, %xmm2 {%k1}
|
||||
; AVX512-NEXT: vmovapd %xmm2, %xmm0
|
||||
; AVX512-NEXT: retq
|
||||
%mask1 = fcmp oeq <2 x double> %x, %y
|
||||
|
@ -2759,7 +2759,7 @@ define <2 x double> @ceil_maskz_sd_mask8(<2 x double> %x, <2 x double> %y) nounw
|
|||
; AVX512-LABEL: ceil_maskz_sd_mask8:
|
||||
; AVX512: ## %bb.0:
|
||||
; AVX512-NEXT: vcmpeqsd %xmm1, %xmm0, %k1
|
||||
; AVX512-NEXT: vrndscalesd $2, %xmm0, %xmm1, %xmm0 {%k1} {z}
|
||||
; AVX512-NEXT: vrndscalesd $10, %xmm0, %xmm1, %xmm0 {%k1} {z}
|
||||
; AVX512-NEXT: retq
|
||||
%mask1 = fcmp oeq <2 x double> %x, %y
|
||||
%mask = extractelement <2 x i1> %mask1, i64 0
|
||||
|
|
Loading…
Reference in New Issue