[PowerPC] Add codegen for VSX word extract convert to FP

Add codegen for VSX word extract conversion from signed/unsigned to single/double
precision.

For UINT_TO_FP:
Extract word unsigned and convert to float was implemented in https://reviews.llvm.org/D20239.
Here we will add the missing extract integer and conversion to double. This
utilizes the new P9 instruction xxextractuw to extracting an integer element
when the result will be converted to double thereby saving 2 direct moves
(VSR <-> GPR).

For SINT_TO_FP:
We will implement the following sequence which will also reduce the number of
instructions by saving 2 direct moves.

v4i32->f32:
        xxspltw
        xvcvsxwsp
        xscvspdpn

v4i32->f64:
        xxspltw
        xvcvsxwdp

Differential Revision: https://reviews.llvm.org/D35859

llvm-svn: 310866
This commit is contained in:
Lei Huang 2017-08-14 18:09:29 +00:00
parent 0f87dbee4e
commit 451ef4adcd
3 changed files with 245 additions and 0 deletions

View File

@ -2550,6 +2550,44 @@ let AddedComplexity = 400, Predicates = [HasP9Vector] in {
UseVSXReg;
} // mayStore
let Predicates = [IsLittleEndian] in {
def: Pat<(f32 (PPCfcfids (PPCmtvsra (i32 (extractelt v4i32:$A, 0))))),
(f32 (XSCVSPDPN (XVCVSXWSP (XXSPLTW $A, 3))))>;
def: Pat<(f32 (PPCfcfids (PPCmtvsra (i32 (extractelt v4i32:$A, 1))))),
(f32 (XSCVSPDPN (XVCVSXWSP (XXSPLTW $A, 2))))>;
def: Pat<(f32 (PPCfcfids (PPCmtvsra (i32 (extractelt v4i32:$A, 2))))),
(f32 (XSCVSPDPN (XVCVSXWSP (XXSPLTW $A, 1))))>;
def: Pat<(f32 (PPCfcfids (PPCmtvsra (i32 (extractelt v4i32:$A, 3))))),
(f32 (XSCVSPDPN (XVCVSXWSP (XXSPLTW $A, 0))))>;
def: Pat<(f64 (PPCfcfid (PPCmtvsra (i32 (extractelt v4i32:$A, 0))))),
(f64 (COPY_TO_REGCLASS (XVCVSXWDP (XXSPLTW $A, 3)), VSFRC))>;
def: Pat<(f64 (PPCfcfid (PPCmtvsra (i32 (extractelt v4i32:$A, 1))))),
(f64 (COPY_TO_REGCLASS (XVCVSXWDP (XXSPLTW $A, 2)), VSFRC))>;
def: Pat<(f64 (PPCfcfid (PPCmtvsra (i32 (extractelt v4i32:$A, 2))))),
(f64 (COPY_TO_REGCLASS (XVCVSXWDP (XXSPLTW $A, 1)), VSFRC))>;
def: Pat<(f64 (PPCfcfid (PPCmtvsra (i32 (extractelt v4i32:$A, 3))))),
(f64 (COPY_TO_REGCLASS (XVCVSXWDP (XXSPLTW $A, 0)), VSFRC))>;
}
let Predicates = [IsBigEndian] in {
def: Pat<(f32 (PPCfcfids (PPCmtvsra (i32 (extractelt v4i32:$A, 0))))),
(f32 (XSCVSPDPN (XVCVSXWSP (XXSPLTW $A, 0))))>;
def: Pat<(f32 (PPCfcfids (PPCmtvsra (i32 (extractelt v4i32:$A, 1))))),
(f32 (XSCVSPDPN (XVCVSXWSP (XXSPLTW $A, 1))))>;
def: Pat<(f32 (PPCfcfids (PPCmtvsra (i32 (extractelt v4i32:$A, 2))))),
(f32 (XSCVSPDPN (XVCVSXWSP (XXSPLTW $A, 2))))>;
def: Pat<(f32 (PPCfcfids (PPCmtvsra (i32 (extractelt v4i32:$A, 3))))),
(f32 (XSCVSPDPN (XVCVSXWSP (XXSPLTW $A, 3))))>;
def: Pat<(f64 (PPCfcfid (PPCmtvsra (i32 (extractelt v4i32:$A, 0))))),
(f64 (COPY_TO_REGCLASS (XVCVSXWDP (XXSPLTW $A, 0)), VSFRC))>;
def: Pat<(f64 (PPCfcfid (PPCmtvsra (i32 (extractelt v4i32:$A, 1))))),
(f64 (COPY_TO_REGCLASS (XVCVSXWDP (XXSPLTW $A, 1)), VSFRC))>;
def: Pat<(f64 (PPCfcfid (PPCmtvsra (i32 (extractelt v4i32:$A, 2))))),
(f64 (COPY_TO_REGCLASS (XVCVSXWDP (XXSPLTW $A, 2)), VSFRC))>;
def: Pat<(f64 (PPCfcfid (PPCmtvsra (i32 (extractelt v4i32:$A, 3))))),
(f64 (COPY_TO_REGCLASS (XVCVSXWDP (XXSPLTW $A, 3)), VSFRC))>;
}
// Patterns for which instructions from ISA 3.0 are a better match
let Predicates = [IsLittleEndian, HasP9Vector] in {
def : Pat<(f32 (PPCfcfidus (PPCmtvsrz (i32 (extractelt v4i32:$A, 0))))),
@ -2560,6 +2598,14 @@ let AddedComplexity = 400, Predicates = [HasP9Vector] in {
(f32 (XSCVUXDSP (XXEXTRACTUW $A, 4)))>;
def : Pat<(f32 (PPCfcfidus (PPCmtvsrz (i32 (extractelt v4i32:$A, 3))))),
(f32 (XSCVUXDSP (XXEXTRACTUW $A, 0)))>;
def : Pat<(f64 (PPCfcfidu (PPCmtvsrz (i32 (extractelt v4i32:$A, 0))))),
(f64 (XSCVUXDDP (XXEXTRACTUW $A, 12)))>;
def : Pat<(f64 (PPCfcfidu (PPCmtvsrz (i32 (extractelt v4i32:$A, 1))))),
(f64 (XSCVUXDDP (XXEXTRACTUW $A, 8)))>;
def : Pat<(f64 (PPCfcfidu (PPCmtvsrz (i32 (extractelt v4i32:$A, 2))))),
(f64 (XSCVUXDDP (XXEXTRACTUW $A, 4)))>;
def : Pat<(f64 (PPCfcfidu (PPCmtvsrz (i32 (extractelt v4i32:$A, 3))))),
(f64 (XSCVUXDDP (XXEXTRACTUW $A, 0)))>;
def : Pat<(v4i32 (insertelt v4i32:$A, i32:$B, 0)),
(v4i32 (XXINSERTW v4i32:$A, AlignValues.I32_TO_BE_WORD1, 12))>;
def : Pat<(v4i32 (insertelt v4i32:$A, i32:$B, 1)),
@ -2587,6 +2633,14 @@ let AddedComplexity = 400, Predicates = [HasP9Vector] in {
(f32 (XSCVUXDSP (XXEXTRACTUW $A, 8)))>;
def : Pat<(f32 (PPCfcfidus (PPCmtvsrz (i32 (extractelt v4i32:$A, 3))))),
(f32 (XSCVUXDSP (XXEXTRACTUW $A, 12)))>;
def : Pat<(f64 (PPCfcfidu (PPCmtvsrz (i32 (extractelt v4i32:$A, 0))))),
(f64 (XSCVUXDDP (XXEXTRACTUW $A, 0)))>;
def : Pat<(f64 (PPCfcfidu (PPCmtvsrz (i32 (extractelt v4i32:$A, 1))))),
(f64 (XSCVUXDDP (XXEXTRACTUW $A, 4)))>;
def : Pat<(f64 (PPCfcfidu (PPCmtvsrz (i32 (extractelt v4i32:$A, 2))))),
(f64 (XSCVUXDDP (XXEXTRACTUW $A, 8)))>;
def : Pat<(f64 (PPCfcfidu (PPCmtvsrz (i32 (extractelt v4i32:$A, 3))))),
(f64 (XSCVUXDDP (XXEXTRACTUW $A, 12)))>;
def : Pat<(v4i32 (insertelt v4i32:$A, i32:$B, 0)),
(v4i32 (XXINSERTW v4i32:$A, AlignValues.I32_TO_BE_WORD1, 0))>;
def : Pat<(v4i32 (insertelt v4i32:$A, i32:$B, 1)),

View File

@ -439,6 +439,69 @@ entry:
ret float %conv
}
; Verify we generate optimal code for unsigned vector int elem extract followed
; by conversion to double
define double @conv2dlbTestui0(<4 x i32> %a) {
entry:
; CHECK-LABEL: conv2dlbTestui0
; CHECK: xxextractuw [[SW:[0-9]+]], 34, 12
; CHECK: xscvuxddp 1, [[SW]]
; CHECK-BE-LABEL: conv2dlbTestui0
; CHECK-BE: xxextractuw [[CP:[0-9]+]], 34, 0
; CHECK-BE: xscvuxddp 1, [[CP]]
%0 = extractelement <4 x i32> %a, i32 0
%1 = uitofp i32 %0 to double
ret double %1
}
define double @conv2dlbTestui1(<4 x i32> %a) {
entry:
; CHECK-LABEL: conv2dlbTestui1
; CHECK: xxextractuw [[SW:[0-9]+]], 34, 8
; CHECK: xscvuxddp 1, [[SW]]
; CHECK-BE-LABEL: conv2dlbTestui1
; CHECK-BE: xxextractuw [[CP:[0-9]+]], 34, 4
; CHECK-BE: xscvuxddp 1, [[CP]]
%0 = extractelement <4 x i32> %a, i32 1
%1 = uitofp i32 %0 to double
ret double %1
}
define double @conv2dlbTestui2(<4 x i32> %a) {
entry:
; CHECK-LABEL: conv2dlbTestui2
; CHECK: xxextractuw [[SW:[0-9]+]], 34, 4
; CHECK: xscvuxddp 1, [[SW]]
; CHECK-BE-LABEL: conv2dlbTestui2
; CHECK-BE: xxextractuw [[CP:[0-9]+]], 34, 8
; CHECK-BE: xscvuxddp 1, [[CP]]
%0 = extractelement <4 x i32> %a, i32 2
%1 = uitofp i32 %0 to double
ret double %1
}
define double @conv2dlbTestui3(<4 x i32> %a) {
entry:
; CHECK-LABEL: conv2dlbTestui3
; CHECK: xxextractuw [[SW:[0-9]+]], 34, 0
; CHECK: xscvuxddp 1, [[SW]]
; CHECK-BE-LABEL: conv2dlbTestui3
; CHECK-BE: xxextractuw [[CP:[0-9]+]], 34, 12
; CHECK-BE: xscvuxddp 1, [[CP]]
%0 = extractelement <4 x i32> %a, i32 3
%1 = uitofp i32 %0 to double
ret double %1
}
; verify we don't crash for variable elem extract
define double @conv2dlbTestuiVar(<4 x i32> %a, i32 zeroext %elem) {
entry:
%vecext = extractelement <4 x i32> %a, i32 %elem
%conv = uitofp i32 %vecext to double
ret double %conv
}
define <4 x float> @_Z10testInsEltILj0EDv4_ffET0_S1_T1_(<4 x float> %a, float %b) {
entry:
; CHECK-LABEL: _Z10testInsEltILj0EDv4_ffET0_S1_T1_

View File

@ -105,3 +105,131 @@ entry:
%1 = uitofp i64 %0 to float
ret float %1
}
define float @conv2fltTesti0(<4 x i32> %a) {
entry:
; CHECK-LABEL: conv2fltTesti0
; CHECK: xxspltw [[SW:[0-9]+]], 34, 3
; CHECK: xvcvsxwsp [[SW]], [[SW]]
; CHECK: xscvspdpn 1, [[SW]]
; CHECK-BE-LABEL: conv2fltTesti0
; CHECK-BE: xxspltw [[CP:[0-9]+]], 34, 0
; CHECK-BE: xvcvsxwsp [[CP]], [[CP]]
; CHECK-BE: xscvspdpn 1, [[CP]]
%vecext = extractelement <4 x i32> %a, i32 0
%conv = sitofp i32 %vecext to float
ret float %conv
}
define float @conv2fltTesti1(<4 x i32> %a) {
entry:
; CHECK-LABEL: conv2fltTesti1
; CHECK: xxspltw [[SW:[0-9]+]], 34, 2
; CHECK: xvcvsxwsp [[SW]], [[SW]]
; CHECK: xscvspdpn 1, [[SW]]
; CHECK-BE-LABEL: conv2fltTesti1
; CHECK-BE: xxspltw [[CP:[0-9]+]], 34, 1
; CHECK-BE: xvcvsxwsp [[CP]], [[CP]]
; CHECK-BE: xscvspdpn 1, [[CP]]
%vecext = extractelement <4 x i32> %a, i32 1
%conv = sitofp i32 %vecext to float
ret float %conv
}
define float @conv2fltTesti2(<4 x i32> %a) {
entry:
; CHECK-LABEL: conv2fltTesti2
; CHECK: xxspltw [[SW:[0-9]+]], 34, 1
; CHECK: xvcvsxwsp [[SW]], [[SW]]
; CHECK: xscvspdpn 1, [[SW]]
; CHECK-BE-LABEL: conv2fltTesti2
; CHECK-BE: xxspltw [[CP:[0-9]+]], 34, 2
; CHECK-BE: xvcvsxwsp [[CP]], [[CP]]
; CHECK-BE: xscvspdpn 1, [[CP]]
%vecext = extractelement <4 x i32> %a, i32 2
%conv = sitofp i32 %vecext to float
ret float %conv
}
define float @conv2fltTesti3(<4 x i32> %a) {
entry:
; CHECK-LABEL: conv2fltTesti3
; CHECK: xxspltw [[SW:[0-9]+]], 34, 0
; CHECK: xvcvsxwsp [[SW]], [[SW]]
; CHECK: xscvspdpn 1, [[SW]]
; CHECK-BE-LABEL: conv2fltTesti3
; CHECK-BE: xxspltw [[CP:[0-9]+]], 34, 3
; CHECK-BE: xvcvsxwsp [[CP]], [[CP]]
; CHECK-BE: xscvspdpn 1, [[CP]]
%vecext = extractelement <4 x i32> %a, i32 3
%conv = sitofp i32 %vecext to float
ret float %conv
}
; verify we don't crash for variable elem extract
define float @conv2fltTestiVar(<4 x i32> %a, i32 zeroext %elem) {
entry:
%vecext = extractelement <4 x i32> %a, i32 %elem
%conv = sitofp i32 %vecext to float
ret float %conv
}
define double @conv2dblTesti0(<4 x i32> %a) {
entry:
; CHECK-LABEL: conv2dblTesti0
; CHECK: xxspltw [[SW:[0-9]+]], 34, 3
; CHECK: xvcvsxwdp 1, [[SW]]
; CHECK-BE-LABEL: conv2dblTesti0
; CHECK-BE: xxspltw [[CP:[0-9]+]], 34, 0
; CHECK-BE: xvcvsxwdp 1, [[CP]]
%vecext = extractelement <4 x i32> %a, i32 0
%conv = sitofp i32 %vecext to double
ret double %conv
}
define double @conv2dblTesti1(<4 x i32> %a) {
entry:
; CHECK-LABEL: conv2dblTesti1
; CHECK: xxspltw [[SW:[0-9]+]], 34, 2
; CHECK: xvcvsxwdp 1, [[SW]]
; CHECK-BE-LABEL: conv2dblTesti1
; CHECK-BE: xxspltw [[CP:[0-9]+]], 34, 1
; CHECK-BE: xvcvsxwdp 1, [[CP]]
%vecext = extractelement <4 x i32> %a, i32 1
%conv = sitofp i32 %vecext to double
ret double %conv
}
define double @conv2dblTesti2(<4 x i32> %a) {
entry:
; CHECK-LABEL: conv2dblTesti2
; CHECK: xxspltw [[SW:[0-9]+]], 34, 1
; CHECK: xvcvsxwdp 1, [[SW]]
; CHECK-BE-LABEL: conv2dblTesti2
; CHECK-BE: xxspltw [[CP:[0-9]+]], 34, 2
; CHECK-BE: xvcvsxwdp 1, [[CP]]
%vecext = extractelement <4 x i32> %a, i32 2
%conv = sitofp i32 %vecext to double
ret double %conv
}
define double @conv2dblTesti3(<4 x i32> %a) {
entry:
; CHECK-LABEL: conv2dblTesti3
; CHECK: xxspltw [[SW:[0-9]+]], 34, 0
; CHECK: xvcvsxwdp 1, [[SW]]
; CHECK-BE-LABEL: conv2dblTesti3
; CHECK-BE: xxspltw [[CP:[0-9]+]], 34, 3
; CHECK-BE: xvcvsxwdp 1, [[CP]]
%vecext = extractelement <4 x i32> %a, i32 3
%conv = sitofp i32 %vecext to double
ret double %conv
}
; verify we don't crash for variable elem extract
define double @conv2dblTestiVar(<4 x i32> %a, i32 zeroext %elem) {
entry:
%vecext = extractelement <4 x i32> %a, i32 %elem
%conv = sitofp i32 %vecext to double
ret double %conv
}