[PowerPC] Improve codegen for vector fp to int widening conversions

We currently do not utilize instructions that convert single
precision vectors to doubleword integer vectors. These conversions
come up in code occasionally and this improvement allows us to
open code some functions that need to be added to altivec.h.
This commit is contained in:
Nemanja Ivanovic 2021-04-21 22:16:35 -05:00
parent 28b6726c4d
commit 092619cf6b
2 changed files with 160 additions and 0 deletions

View File

@ -2899,6 +2899,22 @@ def : Pat<(v2f64 (build_vector (f64 (fpextend (extractelt v4f32:$A, 3))),
(f64 (fpextend (extractelt v4f32:$B, 3))))),
(v2f64 (XVCVSPDP (XXSLDWI (XXPERMDI $A, $B, 3),
(XXPERMDI $A, $B, 3), 1)))>;
def : Pat<(v2i64 (fp_to_sint
(build_vector (f64 (fpextend (extractelt v4f32:$A, 0))),
(f64 (fpextend (extractelt v4f32:$A, 2)))))),
(v2i64 (XVCVSPSXDS $A))>;
def : Pat<(v2i64 (fp_to_uint
(build_vector (f64 (fpextend (extractelt v4f32:$A, 0))),
(f64 (fpextend (extractelt v4f32:$A, 2)))))),
(v2i64 (XVCVSPUXDS $A))>;
def : Pat<(v2i64 (fp_to_sint
(build_vector (f64 (fpextend (extractelt v4f32:$A, 1))),
(f64 (fpextend (extractelt v4f32:$A, 3)))))),
(v2i64 (XVCVSPSXDS (XXSLDWI $A, $A, 1)))>;
def : Pat<(v2i64 (fp_to_uint
(build_vector (f64 (fpextend (extractelt v4f32:$A, 1))),
(f64 (fpextend (extractelt v4f32:$A, 3)))))),
(v2i64 (XVCVSPUXDS (XXSLDWI $A, $A, 1)))>;
def : Pat<WToDPExtractConv.BV02S,
(v2f64 (XVCVSXWDP $A))>;
def : Pat<WToDPExtractConv.BV13S,
@ -3008,6 +3024,22 @@ def : Pat<(v2f64 (build_vector (f64 (fpextend (extractelt v4f32:$A, 0))),
def : Pat<(v2f64 (build_vector (f64 (fpextend (extractelt v4f32:$A, 3))),
(f64 (fpextend (extractelt v4f32:$B, 3))))),
(v2f64 (XVCVSPDP (XXPERMDI $B, $A, 0)))>;
def : Pat<(v2i64 (fp_to_sint
(build_vector (f64 (fpextend (extractelt v4f32:$A, 1))),
(f64 (fpextend (extractelt v4f32:$A, 3)))))),
(v2i64 (XVCVSPSXDS $A))>;
def : Pat<(v2i64 (fp_to_uint
(build_vector (f64 (fpextend (extractelt v4f32:$A, 1))),
(f64 (fpextend (extractelt v4f32:$A, 3)))))),
(v2i64 (XVCVSPUXDS $A))>;
def : Pat<(v2i64 (fp_to_sint
(build_vector (f64 (fpextend (extractelt v4f32:$A, 0))),
(f64 (fpextend (extractelt v4f32:$A, 2)))))),
(v2i64 (XVCVSPSXDS (XXSLDWI $A, $A, 1)))>;
def : Pat<(v2i64 (fp_to_uint
(build_vector (f64 (fpextend (extractelt v4f32:$A, 0))),
(f64 (fpextend (extractelt v4f32:$A, 2)))))),
(v2i64 (XVCVSPUXDS (XXSLDWI $A, $A, 1)))>;
def : Pat<WToDPExtractConv.BV02S,
(v2f64 (XVCVSXWDP (XXSLDWI $A, $A, 1)))>;
def : Pat<WToDPExtractConv.BV13S,

View File

@ -6532,3 +6532,131 @@ entry:
%vecinit3 = insertelement <2 x double> %vecinit, double %conv2, i32 1
ret <2 x double> %vecinit3
}
define dso_local <2 x i64> @test_xvcvspsxds13(<4 x float> %a) local_unnamed_addr {
; P9BE-LABEL: test_xvcvspsxds13:
; P9BE: # %bb.0: # %entry
; P9BE-NEXT: xxsldwi vs0, v2, v2, 1
; P9BE-NEXT: xvcvspsxds v2, vs0
; P9BE-NEXT: blr
;
; P9LE-LABEL: test_xvcvspsxds13:
; P9LE: # %bb.0: # %entry
; P9LE-NEXT: xvcvspsxds v2, v2
; P9LE-NEXT: blr
;
; P8BE-LABEL: test_xvcvspsxds13:
; P8BE: # %bb.0: # %entry
; P8BE-NEXT: xxsldwi vs0, v2, v2, 1
; P8BE-NEXT: xvcvspsxds v2, vs0
; P8BE-NEXT: blr
;
; P8LE-LABEL: test_xvcvspsxds13:
; P8LE: # %bb.0: # %entry
; P8LE-NEXT: xvcvspsxds v2, v2
; P8LE-NEXT: blr
entry:
%vecext = extractelement <4 x float> %a, i32 1
%conv = fptosi float %vecext to i64
%vecinit = insertelement <2 x i64> undef, i64 %conv, i32 0
%vecext1 = extractelement <4 x float> %a, i32 3
%conv2 = fptosi float %vecext1 to i64
%vecinit3 = insertelement <2 x i64> %vecinit, i64 %conv2, i32 1
ret <2 x i64> %vecinit3
}
define dso_local <2 x i64> @test_xvcvspuxds13(<4 x float> %a) local_unnamed_addr {
; P9BE-LABEL: test_xvcvspuxds13:
; P9BE: # %bb.0: # %entry
; P9BE-NEXT: xxsldwi vs0, v2, v2, 1
; P9BE-NEXT: xvcvspuxds v2, vs0
; P9BE-NEXT: blr
;
; P9LE-LABEL: test_xvcvspuxds13:
; P9LE: # %bb.0: # %entry
; P9LE-NEXT: xvcvspuxds v2, v2
; P9LE-NEXT: blr
;
; P8BE-LABEL: test_xvcvspuxds13:
; P8BE: # %bb.0: # %entry
; P8BE-NEXT: xxsldwi vs0, v2, v2, 1
; P8BE-NEXT: xvcvspuxds v2, vs0
; P8BE-NEXT: blr
;
; P8LE-LABEL: test_xvcvspuxds13:
; P8LE: # %bb.0: # %entry
; P8LE-NEXT: xvcvspuxds v2, v2
; P8LE-NEXT: blr
entry:
%vecext = extractelement <4 x float> %a, i32 1
%conv = fptoui float %vecext to i64
%vecinit = insertelement <2 x i64> undef, i64 %conv, i32 0
%vecext1 = extractelement <4 x float> %a, i32 3
%conv2 = fptoui float %vecext1 to i64
%vecinit3 = insertelement <2 x i64> %vecinit, i64 %conv2, i32 1
ret <2 x i64> %vecinit3
}
define dso_local <2 x i64> @test_xvcvspsxds02(<4 x float> %a) local_unnamed_addr {
; P9BE-LABEL: test_xvcvspsxds02:
; P9BE: # %bb.0: # %entry
; P9BE-NEXT: xvcvspsxds v2, v2
; P9BE-NEXT: blr
;
; P9LE-LABEL: test_xvcvspsxds02:
; P9LE: # %bb.0: # %entry
; P9LE-NEXT: xxsldwi vs0, v2, v2, 1
; P9LE-NEXT: xvcvspsxds v2, vs0
; P9LE-NEXT: blr
;
; P8BE-LABEL: test_xvcvspsxds02:
; P8BE: # %bb.0: # %entry
; P8BE-NEXT: xvcvspsxds v2, v2
; P8BE-NEXT: blr
;
; P8LE-LABEL: test_xvcvspsxds02:
; P8LE: # %bb.0: # %entry
; P8LE-NEXT: xxsldwi vs0, v2, v2, 1
; P8LE-NEXT: xvcvspsxds v2, vs0
; P8LE-NEXT: blr
entry:
%vecext = extractelement <4 x float> %a, i32 0
%conv = fptosi float %vecext to i64
%vecinit = insertelement <2 x i64> undef, i64 %conv, i32 0
%vecext1 = extractelement <4 x float> %a, i32 2
%conv2 = fptosi float %vecext1 to i64
%vecinit3 = insertelement <2 x i64> %vecinit, i64 %conv2, i32 1
ret <2 x i64> %vecinit3
}
define dso_local <2 x i64> @test_xvcvspuxds02(<4 x float> %a) local_unnamed_addr {
; P9BE-LABEL: test_xvcvspuxds02:
; P9BE: # %bb.0: # %entry
; P9BE-NEXT: xvcvspuxds v2, v2
; P9BE-NEXT: blr
;
; P9LE-LABEL: test_xvcvspuxds02:
; P9LE: # %bb.0: # %entry
; P9LE-NEXT: xxsldwi vs0, v2, v2, 1
; P9LE-NEXT: xvcvspuxds v2, vs0
; P9LE-NEXT: blr
;
; P8BE-LABEL: test_xvcvspuxds02:
; P8BE: # %bb.0: # %entry
; P8BE-NEXT: xvcvspuxds v2, v2
; P8BE-NEXT: blr
;
; P8LE-LABEL: test_xvcvspuxds02:
; P8LE: # %bb.0: # %entry
; P8LE-NEXT: xxsldwi vs0, v2, v2, 1
; P8LE-NEXT: xvcvspuxds v2, vs0
; P8LE-NEXT: blr
entry:
%vecext = extractelement <4 x float> %a, i32 0
%conv = fptoui float %vecext to i64
%vecinit = insertelement <2 x i64> undef, i64 %conv, i32 0
%vecext1 = extractelement <4 x float> %a, i32 2
%conv2 = fptoui float %vecext1 to i64
%vecinit3 = insertelement <2 x i64> %vecinit, i64 %conv2, i32 1
ret <2 x i64> %vecinit3
}