forked from OSchip/llvm-project
[PowerPC] Improve f32 to i32 bitcast code gen
The code gen for f32 to i32 bitcast is not currently the most efficient; this patch removes some unneccessary instructions gerneated. Differential revision: https://reviews.llvm.org/D100782
This commit is contained in:
parent
f0e10cc91b
commit
db26cd30b6
|
@ -1816,8 +1816,7 @@ let PPC970_Single = 1, AddedComplexity = 400 in {
|
|||
|
||||
// Output dag used to bitcast f32 to i32 and f64 to i64
|
||||
def Bitcast {
|
||||
dag FltToInt = (i32 (MFVSRWZ (EXTRACT_SUBREG (XXSLDWI (XSCVDPSPN $A),
|
||||
(XSCVDPSPN $A), 3), sub_64)));
|
||||
dag FltToInt = (i32 (MFVSRWZ (EXTRACT_SUBREG (XSCVDPSPN $A), sub_64)));
|
||||
dag DblToLong = (i64 (MFVSRD $A));
|
||||
}
|
||||
|
||||
|
@ -2212,7 +2211,7 @@ def VectorExtractions {
|
|||
}
|
||||
|
||||
def AlignValues {
|
||||
dag F32_TO_BE_WORD1 = (v4f32 (XXSLDWI (XSCVDPSPN $B), (XSCVDPSPN $B), 3));
|
||||
dag F32_TO_BE_WORD1 = (v4f32 (XSCVDPSPN $B));
|
||||
dag I32_TO_BE_WORD1 = (SUBREG_TO_REG (i64 1), (MTVSRWZ $B), sub_64);
|
||||
}
|
||||
|
||||
|
@ -2817,6 +2816,10 @@ defm : ScalToVecWPermute<
|
|||
v4i32, FltToUIntLoad.A,
|
||||
(XXSPLTW (SUBREG_TO_REG (i64 1), (XSCVDPUXWSs (XFLOADf32 ForceXForm:$A)), sub_64), 1),
|
||||
(SUBREG_TO_REG (i64 1), (XSCVDPUXWSs (XFLOADf32 ForceXForm:$A)), sub_64)>;
|
||||
def : Pat<(v4f32 (build_vector (f32 (fpround f64:$A)), (f32 (fpround f64:$A)),
|
||||
(f32 (fpround f64:$A)), (f32 (fpround f64:$A)))),
|
||||
(v4f32 (XXSPLTW (SUBREG_TO_REG (i64 1), (XSCVDPSP f64:$A), sub_64), 0))>;
|
||||
|
||||
def : Pat<(v4f32 (build_vector f32:$A, f32:$A, f32:$A, f32:$A)),
|
||||
(v4f32 (XXSPLTW (v4f32 (XSCVDPSPN $A)), 0))>;
|
||||
def : Pat<(v2f64 (PPCldsplat ForceXForm:$A)),
|
||||
|
@ -4135,6 +4138,19 @@ def : Pat<(v4f32 (insertelt v4f32:$A, f32:$B, 2)),
|
|||
def : Pat<(v4f32 (insertelt v4f32:$A, f32:$B, 3)),
|
||||
(v4f32 (XXINSERTW v4f32:$A, AlignValues.F32_TO_BE_WORD1, 12))>;
|
||||
|
||||
def : Pat<(v4f32 (insertelt v4f32:$A, (f32 (fpround f64:$B)), 0)),
|
||||
(v4f32 (XXINSERTW v4f32:$A,
|
||||
(SUBREG_TO_REG (i64 1), (XSCVDPSP f64:$B), sub_64), 0))>;
|
||||
def : Pat<(v4f32 (insertelt v4f32:$A, (f32 (fpround f64:$B)), 1)),
|
||||
(v4f32 (XXINSERTW v4f32:$A,
|
||||
(SUBREG_TO_REG (i64 1), (XSCVDPSP f64:$B), sub_64), 4))>;
|
||||
def : Pat<(v4f32 (insertelt v4f32:$A, (f32 (fpround f64:$B)), 2)),
|
||||
(v4f32 (XXINSERTW v4f32:$A,
|
||||
(SUBREG_TO_REG (i64 1), (XSCVDPSP f64:$B), sub_64), 8))>;
|
||||
def : Pat<(v4f32 (insertelt v4f32:$A, (f32 (fpround f64:$B)), 3)),
|
||||
(v4f32 (XXINSERTW v4f32:$A,
|
||||
(SUBREG_TO_REG (i64 1), (XSCVDPSP f64:$B), sub_64), 12))>;
|
||||
|
||||
// Scalar stores of i8
|
||||
def : Pat<(truncstorei8 (i32 (vector_extract v16i8:$S, 0)), ForceXForm:$dst),
|
||||
(STXSIBXv (COPY_TO_REGCLASS (v16i8 (VSLDOI $S, $S, 9)), VSRC), ForceXForm:$dst)>;
|
||||
|
@ -4366,6 +4382,19 @@ def : Pat<(v4f32 (insertelt v4f32:$A, f32:$B, 2)),
|
|||
def : Pat<(v4f32 (insertelt v4f32:$A, f32:$B, 3)),
|
||||
(v4f32 (XXINSERTW v4f32:$A, AlignValues.F32_TO_BE_WORD1, 0))>;
|
||||
|
||||
def : Pat<(v4f32 (insertelt v4f32:$A, (f32 (fpround f64:$B)), 0)),
|
||||
(v4f32 (XXINSERTW v4f32:$A,
|
||||
(SUBREG_TO_REG (i64 1), (XSCVDPSP f64:$B), sub_64), 12))>;
|
||||
def : Pat<(v4f32 (insertelt v4f32:$A, (f32 (fpround f64:$B)), 1)),
|
||||
(v4f32 (XXINSERTW v4f32:$A,
|
||||
(SUBREG_TO_REG (i64 1), (XSCVDPSP f64:$B), sub_64), 8))>;
|
||||
def : Pat<(v4f32 (insertelt v4f32:$A, (f32 (fpround f64:$B)), 2)),
|
||||
(v4f32 (XXINSERTW v4f32:$A,
|
||||
(SUBREG_TO_REG (i64 1), (XSCVDPSP f64:$B), sub_64), 4))>;
|
||||
def : Pat<(v4f32 (insertelt v4f32:$A, (f32 (fpround f64:$B)), 3)),
|
||||
(v4f32 (XXINSERTW v4f32:$A,
|
||||
(SUBREG_TO_REG (i64 1), (XSCVDPSP f64:$B), sub_64), 0))>;
|
||||
|
||||
def : Pat<(v8i16 (PPCld_vec_be ForceXForm:$src)),
|
||||
(COPY_TO_REGCLASS (LXVH8X ForceXForm:$src), VRRC)>;
|
||||
def : Pat<(PPCst_vec_be v8i16:$rS, ForceXForm:$dst),
|
||||
|
|
|
@ -743,14 +743,12 @@ define <4 x float> @_Z10testInsEltILj0EDv4_ffET0_S1_T1_(<4 x float> %a, float %b
|
|||
; CHECK-64-LABEL: _Z10testInsEltILj0EDv4_ffET0_S1_T1_:
|
||||
; CHECK-64: # %bb.0: # %entry
|
||||
; CHECK-64-NEXT: xscvdpspn 0, 1
|
||||
; CHECK-64-NEXT: xxsldwi 0, 0, 0, 3
|
||||
; CHECK-64-NEXT: xxinsertw 34, 0, 0
|
||||
; CHECK-64-NEXT: blr
|
||||
;
|
||||
; CHECK-32-LABEL: _Z10testInsEltILj0EDv4_ffET0_S1_T1_:
|
||||
; CHECK-32: # %bb.0: # %entry
|
||||
; CHECK-32-NEXT: xscvdpspn 0, 1
|
||||
; CHECK-32-NEXT: xxsldwi 0, 0, 0, 3
|
||||
; CHECK-32-NEXT: xxinsertw 34, 0, 0
|
||||
; CHECK-32-NEXT: blr
|
||||
entry:
|
||||
|
@ -762,14 +760,12 @@ define <4 x float> @_Z10testInsEltILj1EDv4_ffET0_S1_T1_(<4 x float> %a, float %b
|
|||
; CHECK-64-LABEL: _Z10testInsEltILj1EDv4_ffET0_S1_T1_:
|
||||
; CHECK-64: # %bb.0: # %entry
|
||||
; CHECK-64-NEXT: xscvdpspn 0, 1
|
||||
; CHECK-64-NEXT: xxsldwi 0, 0, 0, 3
|
||||
; CHECK-64-NEXT: xxinsertw 34, 0, 4
|
||||
; CHECK-64-NEXT: blr
|
||||
;
|
||||
; CHECK-32-LABEL: _Z10testInsEltILj1EDv4_ffET0_S1_T1_:
|
||||
; CHECK-32: # %bb.0: # %entry
|
||||
; CHECK-32-NEXT: xscvdpspn 0, 1
|
||||
; CHECK-32-NEXT: xxsldwi 0, 0, 0, 3
|
||||
; CHECK-32-NEXT: xxinsertw 34, 0, 4
|
||||
; CHECK-32-NEXT: blr
|
||||
entry:
|
||||
|
@ -781,14 +777,12 @@ define <4 x float> @_Z10testInsEltILj2EDv4_ffET0_S1_T1_(<4 x float> %a, float %b
|
|||
; CHECK-64-LABEL: _Z10testInsEltILj2EDv4_ffET0_S1_T1_:
|
||||
; CHECK-64: # %bb.0: # %entry
|
||||
; CHECK-64-NEXT: xscvdpspn 0, 1
|
||||
; CHECK-64-NEXT: xxsldwi 0, 0, 0, 3
|
||||
; CHECK-64-NEXT: xxinsertw 34, 0, 8
|
||||
; CHECK-64-NEXT: blr
|
||||
;
|
||||
; CHECK-32-LABEL: _Z10testInsEltILj2EDv4_ffET0_S1_T1_:
|
||||
; CHECK-32: # %bb.0: # %entry
|
||||
; CHECK-32-NEXT: xscvdpspn 0, 1
|
||||
; CHECK-32-NEXT: xxsldwi 0, 0, 0, 3
|
||||
; CHECK-32-NEXT: xxinsertw 34, 0, 8
|
||||
; CHECK-32-NEXT: blr
|
||||
entry:
|
||||
|
@ -800,14 +794,12 @@ define <4 x float> @_Z10testInsEltILj3EDv4_ffET0_S1_T1_(<4 x float> %a, float %b
|
|||
; CHECK-64-LABEL: _Z10testInsEltILj3EDv4_ffET0_S1_T1_:
|
||||
; CHECK-64: # %bb.0: # %entry
|
||||
; CHECK-64-NEXT: xscvdpspn 0, 1
|
||||
; CHECK-64-NEXT: xxsldwi 0, 0, 0, 3
|
||||
; CHECK-64-NEXT: xxinsertw 34, 0, 12
|
||||
; CHECK-64-NEXT: blr
|
||||
;
|
||||
; CHECK-32-LABEL: _Z10testInsEltILj3EDv4_ffET0_S1_T1_:
|
||||
; CHECK-32: # %bb.0: # %entry
|
||||
; CHECK-32-NEXT: xscvdpspn 0, 1
|
||||
; CHECK-32-NEXT: xxsldwi 0, 0, 0, 3
|
||||
; CHECK-32-NEXT: xxinsertw 34, 0, 12
|
||||
; CHECK-32-NEXT: blr
|
||||
entry:
|
||||
|
|
|
@ -262,8 +262,8 @@ entry:
|
|||
define <4 x float> @testFloat1(<4 x float> %a, float %b, i32 zeroext %idx1) {
|
||||
; CHECK-64-LABEL: testFloat1:
|
||||
; CHECK-64: # %bb.0: # %entry
|
||||
; CHECK-64-NEXT: rlwinm 3, 4, 2, 28, 29
|
||||
; CHECK-64-NEXT: addi 4, 1, -16
|
||||
; CHECK-64-DAG: rlwinm 3, 4, 2, 28, 29
|
||||
; CHECK-64-DAG: addi 4, 1, -16
|
||||
; CHECK-64-NEXT: stxv 34, -16(1)
|
||||
; CHECK-64-NEXT: stfsx 1, 4, 3
|
||||
; CHECK-64-NEXT: lxv 34, -16(1)
|
||||
|
@ -281,17 +281,15 @@ define <4 x float> @testFloat1(<4 x float> %a, float %b, i32 zeroext %idx1) {
|
|||
; CHECK-64-P10-LABEL: testFloat1:
|
||||
; CHECK-64-P10: # %bb.0: # %entry
|
||||
; CHECK-64-P10-NEXT: xscvdpspn 0, 1
|
||||
; CHECK-64-P10-NEXT: extsw 3, 4
|
||||
; CHECK-64-P10-NEXT: slwi 3, 3, 2
|
||||
; CHECK-64-P10-NEXT: xxsldwi 0, 0, 0, 3
|
||||
; CHECK-64-P10-NEXT: mffprwz 4, 0
|
||||
; CHECK-64-P10-NEXT: vinswlx 2, 3, 4
|
||||
; CHECK-64-P10-NEXT: extsw 4, 4
|
||||
; CHECK-64-P10-NEXT: slwi 4, 4, 2
|
||||
; CHECK-64-P10-NEXT: mffprwz 3, 0
|
||||
; CHECK-64-P10-NEXT: vinswlx 2, 4, 3
|
||||
; CHECK-64-P10-NEXT: blr
|
||||
;
|
||||
; CHECK-32-P10-LABEL: testFloat1:
|
||||
; CHECK-32-P10: # %bb.0: # %entry
|
||||
; CHECK-32-P10-NEXT: xscvdpspn 0, 1
|
||||
; CHECK-32-P10-NEXT: xxsldwi 0, 0, 0, 3
|
||||
; CHECK-32-P10-NEXT: mffprwz 3, 0
|
||||
; CHECK-32-P10-NEXT: vinswlx 2, 4, 3
|
||||
; CHECK-32-P10-NEXT: blr
|
||||
|
@ -304,8 +302,8 @@ define <4 x float> @testFloat2(<4 x float> %a, i8* %b, i32 zeroext %idx1, i32 ze
|
|||
; CHECK-64-LABEL: testFloat2:
|
||||
; CHECK-64: # %bb.0: # %entry
|
||||
; CHECK-64-NEXT: lwz 6, 0(3)
|
||||
; CHECK-64-NEXT: rlwinm 4, 4, 2, 28, 29
|
||||
; CHECK-64-NEXT: addi 7, 1, -32
|
||||
; CHECK-64-DAG: rlwinm 4, 4, 2, 28, 29
|
||||
; CHECK-64-DAG: addi 7, 1, -32
|
||||
; CHECK-64-NEXT: stxv 34, -32(1)
|
||||
; CHECK-64-NEXT: stwx 6, 7, 4
|
||||
; CHECK-64-NEXT: rlwinm 4, 5, 2, 28, 29
|
||||
|
@ -367,8 +365,8 @@ define <4 x float> @testFloat3(<4 x float> %a, i8* %b, i32 zeroext %idx1, i32 ze
|
|||
; CHECK-64-LABEL: testFloat3:
|
||||
; CHECK-64: # %bb.0: # %entry
|
||||
; CHECK-64-NEXT: lis 6, 1
|
||||
; CHECK-64-NEXT: rlwinm 4, 4, 2, 28, 29
|
||||
; CHECK-64-NEXT: addi 7, 1, -32
|
||||
; CHECK-64-DAG: rlwinm 4, 4, 2, 28, 29
|
||||
; CHECK-64-DAG: addi 7, 1, -32
|
||||
; CHECK-64-NEXT: lwzx 6, 3, 6
|
||||
; CHECK-64-NEXT: stxv 34, -32(1)
|
||||
; CHECK-64-NEXT: stwx 6, 7, 4
|
||||
|
@ -440,7 +438,6 @@ define <4 x float> @testFloatImm1(<4 x float> %a, float %b) {
|
|||
; CHECK-64-LABEL: testFloatImm1:
|
||||
; CHECK-64: # %bb.0: # %entry
|
||||
; CHECK-64-NEXT: xscvdpspn 0, 1
|
||||
; CHECK-64-NEXT: xxsldwi 0, 0, 0, 3
|
||||
; CHECK-64-NEXT: xxinsertw 34, 0, 0
|
||||
; CHECK-64-NEXT: xxinsertw 34, 0, 8
|
||||
; CHECK-64-NEXT: blr
|
||||
|
@ -448,7 +445,6 @@ define <4 x float> @testFloatImm1(<4 x float> %a, float %b) {
|
|||
; CHECK-32-LABEL: testFloatImm1:
|
||||
; CHECK-32: # %bb.0: # %entry
|
||||
; CHECK-32-NEXT: xscvdpspn 0, 1
|
||||
; CHECK-32-NEXT: xxsldwi 0, 0, 0, 3
|
||||
; CHECK-32-NEXT: xxinsertw 34, 0, 0
|
||||
; CHECK-32-NEXT: xxinsertw 34, 0, 8
|
||||
; CHECK-32-NEXT: blr
|
||||
|
@ -456,7 +452,6 @@ define <4 x float> @testFloatImm1(<4 x float> %a, float %b) {
|
|||
; CHECK-64-P10-LABEL: testFloatImm1:
|
||||
; CHECK-64-P10: # %bb.0: # %entry
|
||||
; CHECK-64-P10-NEXT: xscvdpspn 0, 1
|
||||
; CHECK-64-P10-NEXT: xxsldwi 0, 0, 0, 3
|
||||
; CHECK-64-P10-NEXT: xxinsertw 34, 0, 0
|
||||
; CHECK-64-P10-NEXT: xxinsertw 34, 0, 8
|
||||
; CHECK-64-P10-NEXT: blr
|
||||
|
@ -464,7 +459,6 @@ define <4 x float> @testFloatImm1(<4 x float> %a, float %b) {
|
|||
; CHECK-32-P10-LABEL: testFloatImm1:
|
||||
; CHECK-32-P10: # %bb.0: # %entry
|
||||
; CHECK-32-P10-NEXT: xscvdpspn 0, 1
|
||||
; CHECK-32-P10-NEXT: xxsldwi 0, 0, 0, 3
|
||||
; CHECK-32-P10-NEXT: xxinsertw 34, 0, 0
|
||||
; CHECK-32-P10-NEXT: xxinsertw 34, 0, 8
|
||||
; CHECK-32-P10-NEXT: blr
|
||||
|
@ -479,11 +473,9 @@ define <4 x float> @testFloatImm2(<4 x float> %a, i32* %b) {
|
|||
; CHECK-64: # %bb.0: # %entry
|
||||
; CHECK-64-NEXT: lfs 0, 0(3)
|
||||
; CHECK-64-NEXT: xscvdpspn 0, 0
|
||||
; CHECK-64-NEXT: xxsldwi 0, 0, 0, 3
|
||||
; CHECK-64-NEXT: xxinsertw 34, 0, 0
|
||||
; CHECK-64-NEXT: lfs 0, 4(3)
|
||||
; CHECK-64-NEXT: xscvdpspn 0, 0
|
||||
; CHECK-64-NEXT: xxsldwi 0, 0, 0, 3
|
||||
; CHECK-64-NEXT: xxinsertw 34, 0, 8
|
||||
; CHECK-64-NEXT: blr
|
||||
;
|
||||
|
@ -491,11 +483,9 @@ define <4 x float> @testFloatImm2(<4 x float> %a, i32* %b) {
|
|||
; CHECK-32: # %bb.0: # %entry
|
||||
; CHECK-32-NEXT: lfs 0, 0(3)
|
||||
; CHECK-32-NEXT: xscvdpspn 0, 0
|
||||
; CHECK-32-NEXT: xxsldwi 0, 0, 0, 3
|
||||
; CHECK-32-NEXT: xxinsertw 34, 0, 0
|
||||
; CHECK-32-NEXT: lfs 0, 4(3)
|
||||
; CHECK-32-NEXT: xscvdpspn 0, 0
|
||||
; CHECK-32-NEXT: xxsldwi 0, 0, 0, 3
|
||||
; CHECK-32-NEXT: xxinsertw 34, 0, 8
|
||||
; CHECK-32-NEXT: blr
|
||||
;
|
||||
|
@ -533,11 +523,9 @@ define <4 x float> @testFloatImm3(<4 x float> %a, i32* %b) {
|
|||
; CHECK-64-NEXT: li 4, 1
|
||||
; CHECK-64-NEXT: rldic 4, 4, 38, 25
|
||||
; CHECK-64-NEXT: xscvdpspn 0, 0
|
||||
; CHECK-64-NEXT: xxsldwi 0, 0, 0, 3
|
||||
; CHECK-64-NEXT: xxinsertw 34, 0, 0
|
||||
; CHECK-64-NEXT: lfsx 0, 3, 4
|
||||
; CHECK-64-NEXT: xscvdpspn 0, 0
|
||||
; CHECK-64-NEXT: xxsldwi 0, 0, 0, 3
|
||||
; CHECK-64-NEXT: xxinsertw 34, 0, 8
|
||||
; CHECK-64-NEXT: blr
|
||||
;
|
||||
|
@ -546,11 +534,9 @@ define <4 x float> @testFloatImm3(<4 x float> %a, i32* %b) {
|
|||
; CHECK-32-NEXT: lis 4, 4
|
||||
; CHECK-32-NEXT: lfsx 0, 3, 4
|
||||
; CHECK-32-NEXT: xscvdpspn 0, 0
|
||||
; CHECK-32-NEXT: xxsldwi 0, 0, 0, 3
|
||||
; CHECK-32-NEXT: xxinsertw 34, 0, 0
|
||||
; CHECK-32-NEXT: lfs 0, 0(3)
|
||||
; CHECK-32-NEXT: xscvdpspn 0, 0
|
||||
; CHECK-32-NEXT: xxsldwi 0, 0, 0, 3
|
||||
; CHECK-32-NEXT: xxinsertw 34, 0, 8
|
||||
; CHECK-32-NEXT: blr
|
||||
;
|
||||
|
@ -589,7 +575,7 @@ entry:
|
|||
define <2 x double> @testDouble1(<2 x double> %a, double %b, i32 zeroext %idx1) {
|
||||
; CHECK-64-LABEL: testDouble1:
|
||||
; CHECK-64: # %bb.0: # %entry
|
||||
; CHECK-64-NEXT: rlwinm 3, 4, 3, 28, 28
|
||||
; CHECK-64: rlwinm 3, 4, 3, 28, 28
|
||||
; CHECK-64-NEXT: addi 4, 1, -16
|
||||
; CHECK-64-NEXT: stxv 34, -16(1)
|
||||
; CHECK-64-NEXT: stfdx 1, 4, 3
|
||||
|
@ -615,8 +601,8 @@ define <2 x double> @testDouble1(<2 x double> %a, double %b, i32 zeroext %idx1)
|
|||
;
|
||||
; CHECK-32-P10-LABEL: testDouble1:
|
||||
; CHECK-32-P10: # %bb.0: # %entry
|
||||
; CHECK-32-P10-NEXT: addi 4, 1, -16
|
||||
; CHECK-32-P10-NEXT: rlwinm 3, 5, 3, 28, 28
|
||||
; CHECK-32-P10-DAG: addi 4, 1, -16
|
||||
; CHECK-32-P10-DAG: rlwinm 3, 5, 3, 28, 28
|
||||
; CHECK-32-P10-NEXT: stxv 34, -16(1)
|
||||
; CHECK-32-P10-NEXT: stfdx 1, 4, 3
|
||||
; CHECK-32-P10-NEXT: lxv 34, -16(1)
|
||||
|
@ -630,8 +616,8 @@ define <2 x double> @testDouble2(<2 x double> %a, i8* %b, i32 zeroext %idx1, i32
|
|||
; CHECK-64-LABEL: testDouble2:
|
||||
; CHECK-64: # %bb.0: # %entry
|
||||
; CHECK-64-NEXT: ld 6, 0(3)
|
||||
; CHECK-64-NEXT: rlwinm 4, 4, 3, 28, 28
|
||||
; CHECK-64-NEXT: addi 7, 1, -32
|
||||
; CHECK-64-DAG: rlwinm 4, 4, 3, 28, 28
|
||||
; CHECK-64-DAG: addi 7, 1, -32
|
||||
; CHECK-64-NEXT: stxv 34, -32(1)
|
||||
; CHECK-64-NEXT: stdx 6, 7, 4
|
||||
; CHECK-64-NEXT: li 4, 1
|
||||
|
@ -675,8 +661,8 @@ define <2 x double> @testDouble2(<2 x double> %a, i8* %b, i32 zeroext %idx1, i32
|
|||
; CHECK-32-P10-LABEL: testDouble2:
|
||||
; CHECK-32-P10: # %bb.0: # %entry
|
||||
; CHECK-32-P10-NEXT: lfd 0, 0(3)
|
||||
; CHECK-32-P10-NEXT: addi 6, 1, -32
|
||||
; CHECK-32-P10-NEXT: rlwinm 4, 4, 3, 28, 28
|
||||
; CHECK-32-P10-DAG: addi 6, 1, -32
|
||||
; CHECK-32-P10-DAG: rlwinm 4, 4, 3, 28, 28
|
||||
; CHECK-32-P10-NEXT: stxv 34, -32(1)
|
||||
; CHECK-32-P10-NEXT: rlwinm 5, 5, 3, 28, 28
|
||||
; CHECK-32-P10-NEXT: stfdx 0, 6, 4
|
||||
|
@ -702,8 +688,8 @@ define <2 x double> @testDouble3(<2 x double> %a, i8* %b, i32 zeroext %idx1, i32
|
|||
; CHECK-64-LABEL: testDouble3:
|
||||
; CHECK-64: # %bb.0: # %entry
|
||||
; CHECK-64-NEXT: lis 6, 1
|
||||
; CHECK-64-NEXT: rlwinm 4, 4, 3, 28, 28
|
||||
; CHECK-64-NEXT: addi 7, 1, -32
|
||||
; CHECK-64-DAG: rlwinm 4, 4, 3, 28, 28
|
||||
; CHECK-64-DAG: addi 7, 1, -32
|
||||
; CHECK-64-NEXT: ldx 6, 3, 6
|
||||
; CHECK-64-NEXT: stxv 34, -32(1)
|
||||
; CHECK-64-NEXT: stdx 6, 7, 4
|
||||
|
|
|
@ -10,8 +10,8 @@ entry:
|
|||
; CHECK-P7: stfs 1,
|
||||
; CHECK-P7: lwa 3,
|
||||
; CHECK: xscvdpspn [[CONVREG:[0-9]+]], 1
|
||||
; CHECK: xxsldwi [[SHIFTREG:[0-9]+]], [[CONVREG]], [[CONVREG]], 3
|
||||
; CHECK: mffprwz 3, [[SHIFTREG]]
|
||||
; CHECK-NOT: xxsldwi
|
||||
; CHECK: mffprwz 3, [[CONVREG]]
|
||||
}
|
||||
|
||||
define i64 @f64toi64(double %a) {
|
||||
|
@ -50,8 +50,8 @@ entry:
|
|||
; CHECK-P7: stfs 1,
|
||||
; CHECK-P7: lwz 3,
|
||||
; CHECK: xscvdpspn [[CONVREG:[0-9]+]], 1
|
||||
; CHECK: xxsldwi [[SHIFTREG:[0-9]+]], [[CONVREG]], [[CONVREG]], 3
|
||||
; CHECK: mffprwz 3, [[SHIFTREG]]
|
||||
; CHECK-NOT: xxsldwi
|
||||
; CHECK: mffprwz 3, [[CONVREG]]
|
||||
}
|
||||
|
||||
define i64 @f64toi64u(double %a) {
|
||||
|
|
|
@ -506,11 +506,9 @@ define <4 x float> @_Z10testInsEltILj0EDv4_ffET0_S1_T1_(<4 x float> %a, float %b
|
|||
entry:
|
||||
; CHECK-LABEL: _Z10testInsEltILj0EDv4_ffET0_S1_T1_
|
||||
; CHECK: xscvdpspn 0, 1
|
||||
; CHECK: xxsldwi 0, 0, 0, 3
|
||||
; CHECK: xxinsertw 34, 0, 12
|
||||
; CHECK-BE-LABEL: _Z10testInsEltILj0EDv4_ffET0_S1_T1_
|
||||
; CHECK-BE: xscvdpspn 0, 1
|
||||
; CHECK-BE: xxsldwi 0, 0, 0, 3
|
||||
; CHECK-BE: xxinsertw 34, 0, 0
|
||||
%vecins = insertelement <4 x float> %a, float %b, i32 0
|
||||
ret <4 x float> %vecins
|
||||
|
@ -520,11 +518,9 @@ define <4 x float> @_Z10testInsEltILj1EDv4_ffET0_S1_T1_(<4 x float> %a, float %b
|
|||
entry:
|
||||
; CHECK-LABEL: _Z10testInsEltILj1EDv4_ffET0_S1_T1_
|
||||
; CHECK: xscvdpspn 0, 1
|
||||
; CHECK: xxsldwi 0, 0, 0, 3
|
||||
; CHECK: xxinsertw 34, 0, 8
|
||||
; CHECK-BE-LABEL: _Z10testInsEltILj1EDv4_ffET0_S1_T1_
|
||||
; CHECK-BE: xscvdpspn 0, 1
|
||||
; CHECK-BE: xxsldwi 0, 0, 0, 3
|
||||
; CHECK-BE: xxinsertw 34, 0, 4
|
||||
%vecins = insertelement <4 x float> %a, float %b, i32 1
|
||||
ret <4 x float> %vecins
|
||||
|
@ -534,11 +530,9 @@ define <4 x float> @_Z10testInsEltILj2EDv4_ffET0_S1_T1_(<4 x float> %a, float %b
|
|||
entry:
|
||||
; CHECK-LABEL: _Z10testInsEltILj2EDv4_ffET0_S1_T1_
|
||||
; CHECK: xscvdpspn 0, 1
|
||||
; CHECK: xxsldwi 0, 0, 0, 3
|
||||
; CHECK: xxinsertw 34, 0, 4
|
||||
; CHECK-BE-LABEL: _Z10testInsEltILj2EDv4_ffET0_S1_T1_
|
||||
; CHECK-BE: xscvdpspn 0, 1
|
||||
; CHECK-BE: xxsldwi 0, 0, 0, 3
|
||||
; CHECK-BE: xxinsertw 34, 0, 8
|
||||
%vecins = insertelement <4 x float> %a, float %b, i32 2
|
||||
ret <4 x float> %vecins
|
||||
|
@ -548,11 +542,9 @@ define <4 x float> @_Z10testInsEltILj3EDv4_ffET0_S1_T1_(<4 x float> %a, float %b
|
|||
entry:
|
||||
; CHECK-LABEL: _Z10testInsEltILj3EDv4_ffET0_S1_T1_
|
||||
; CHECK: xscvdpspn 0, 1
|
||||
; CHECK: xxsldwi 0, 0, 0, 3
|
||||
; CHECK: xxinsertw 34, 0, 0
|
||||
; CHECK-BE-LABEL: _Z10testInsEltILj3EDv4_ffET0_S1_T1_
|
||||
; CHECK-BE: xscvdpspn 0, 1
|
||||
; CHECK-BE: xxsldwi 0, 0, 0, 3
|
||||
; CHECK-BE: xxinsertw 34, 0, 12
|
||||
%vecins = insertelement <4 x float> %a, float %b, i32 3
|
||||
ret <4 x float> %vecins
|
||||
|
|
|
@ -216,7 +216,6 @@ define <4 x float> @s2v_test_f1(float* nocapture readonly %f64, <4 x float> %vec
|
|||
; P9LE: # %bb.0: # %entry
|
||||
; P9LE-NEXT: lfs f0, 0(r3)
|
||||
; P9LE-NEXT: xscvdpspn vs0, f0
|
||||
; P9LE-NEXT: xxsldwi vs0, vs0, vs0, 3
|
||||
; P9LE-NEXT: xxinsertw v2, vs0, 12
|
||||
; P9LE-NEXT: blr
|
||||
;
|
||||
|
@ -224,7 +223,6 @@ define <4 x float> @s2v_test_f1(float* nocapture readonly %f64, <4 x float> %vec
|
|||
; P9BE: # %bb.0: # %entry
|
||||
; P9BE-NEXT: lfs f0, 0(r3)
|
||||
; P9BE-NEXT: xscvdpspn vs0, f0
|
||||
; P9BE-NEXT: xxsldwi vs0, vs0, vs0, 3
|
||||
; P9BE-NEXT: xxinsertw v2, vs0, 0
|
||||
; P9BE-NEXT: blr
|
||||
;
|
||||
|
|
|
@ -200,21 +200,19 @@ define <4 x float> @testFloat1(<4 x float> %a, float %b, i32 zeroext %idx1) {
|
|||
; CHECK-LABEL: testFloat1:
|
||||
; CHECK: # %bb.0: # %entry
|
||||
; CHECK-NEXT: xscvdpspn vs0, f1
|
||||
; CHECK-NEXT: extsw r3, r6
|
||||
; CHECK-NEXT: slwi r3, r3, 2
|
||||
; CHECK-NEXT: xxsldwi vs0, vs0, vs0, 3
|
||||
; CHECK-NEXT: mffprwz r4, f0
|
||||
; CHECK-NEXT: vinswrx v2, r3, r4
|
||||
; CHECK-NEXT: extsw r4, r6
|
||||
; CHECK-NEXT: slwi r4, r4, 2
|
||||
; CHECK-NEXT: mffprwz r3, f0
|
||||
; CHECK-NEXT: vinswrx v2, r4, r3
|
||||
; CHECK-NEXT: blr
|
||||
;
|
||||
; CHECK-BE-LABEL: testFloat1:
|
||||
; CHECK-BE: # %bb.0: # %entry
|
||||
; CHECK-BE-NEXT: xscvdpspn vs0, f1
|
||||
; CHECK-BE-NEXT: extsw r3, r6
|
||||
; CHECK-BE-NEXT: slwi r3, r3, 2
|
||||
; CHECK-BE-NEXT: xxsldwi vs0, vs0, vs0, 3
|
||||
; CHECK-BE-NEXT: mffprwz r4, f0
|
||||
; CHECK-BE-NEXT: vinswlx v2, r3, r4
|
||||
; CHECK-BE-NEXT: extsw r4, r6
|
||||
; CHECK-BE-NEXT: slwi r4, r4, 2
|
||||
; CHECK-BE-NEXT: mffprwz r3, f0
|
||||
; CHECK-BE-NEXT: vinswlx v2, r4, r3
|
||||
; CHECK-BE-NEXT: blr
|
||||
;
|
||||
; CHECK-P9-LABEL: testFloat1:
|
||||
|
@ -346,7 +344,6 @@ define <4 x float> @testFloatImm1(<4 x float> %a, float %b) {
|
|||
; CHECK-LABEL: testFloatImm1:
|
||||
; CHECK: # %bb.0: # %entry
|
||||
; CHECK-NEXT: xscvdpspn vs0, f1
|
||||
; CHECK-NEXT: xxsldwi vs0, vs0, vs0, 3
|
||||
; CHECK-NEXT: xxinsertw v2, vs0, 12
|
||||
; CHECK-NEXT: xxinsertw v2, vs0, 4
|
||||
; CHECK-NEXT: blr
|
||||
|
@ -354,7 +351,6 @@ define <4 x float> @testFloatImm1(<4 x float> %a, float %b) {
|
|||
; CHECK-BE-LABEL: testFloatImm1:
|
||||
; CHECK-BE: # %bb.0: # %entry
|
||||
; CHECK-BE-NEXT: xscvdpspn vs0, f1
|
||||
; CHECK-BE-NEXT: xxsldwi vs0, vs0, vs0, 3
|
||||
; CHECK-BE-NEXT: xxinsertw v2, vs0, 0
|
||||
; CHECK-BE-NEXT: xxinsertw v2, vs0, 8
|
||||
; CHECK-BE-NEXT: blr
|
||||
|
@ -362,7 +358,6 @@ define <4 x float> @testFloatImm1(<4 x float> %a, float %b) {
|
|||
; CHECK-P9-LABEL: testFloatImm1:
|
||||
; CHECK-P9: # %bb.0: # %entry
|
||||
; CHECK-P9-NEXT: xscvdpspn vs0, f1
|
||||
; CHECK-P9-NEXT: xxsldwi vs0, vs0, vs0, 3
|
||||
; CHECK-P9-NEXT: xxinsertw v2, vs0, 0
|
||||
; CHECK-P9-NEXT: xxinsertw v2, vs0, 8
|
||||
; CHECK-P9-NEXT: blr
|
||||
|
@ -393,11 +388,9 @@ define <4 x float> @testFloatImm2(<4 x float> %a, i32* %b) {
|
|||
; CHECK-P9: # %bb.0: # %entry
|
||||
; CHECK-P9-NEXT: lfs f0, 0(r5)
|
||||
; CHECK-P9-NEXT: xscvdpspn vs0, f0
|
||||
; CHECK-P9-NEXT: xxsldwi vs0, vs0, vs0, 3
|
||||
; CHECK-P9-NEXT: xxinsertw v2, vs0, 0
|
||||
; CHECK-P9-NEXT: lfs f0, 4(r5)
|
||||
; CHECK-P9-NEXT: xscvdpspn vs0, f0
|
||||
; CHECK-P9-NEXT: xxsldwi vs0, vs0, vs0, 3
|
||||
; CHECK-P9-NEXT: xxinsertw v2, vs0, 8
|
||||
; CHECK-P9-NEXT: blr
|
||||
entry:
|
||||
|
@ -439,11 +432,9 @@ define <4 x float> @testFloatImm3(<4 x float> %a, i32* %b) {
|
|||
; CHECK-P9-NEXT: li r3, 1
|
||||
; CHECK-P9-NEXT: rldic r3, r3, 38, 25
|
||||
; CHECK-P9-NEXT: xscvdpspn vs0, f0
|
||||
; CHECK-P9-NEXT: xxsldwi vs0, vs0, vs0, 3
|
||||
; CHECK-P9-NEXT: xxinsertw v2, vs0, 0
|
||||
; CHECK-P9-NEXT: lfsx f0, r5, r3
|
||||
; CHECK-P9-NEXT: xscvdpspn vs0, f0
|
||||
; CHECK-P9-NEXT: xxsldwi vs0, vs0, vs0, 3
|
||||
; CHECK-P9-NEXT: xxinsertw v2, vs0, 8
|
||||
; CHECK-P9-NEXT: blr
|
||||
entry:
|
||||
|
@ -738,3 +729,26 @@ entry:
|
|||
ret <2 x double> %vecins
|
||||
}
|
||||
|
||||
define dso_local <4 x float> @testInsertDoubleToFloat(<4 x float> %a, double %b) local_unnamed_addr #0 {
|
||||
; CHECK-LABEL: testInsertDoubleToFloat:
|
||||
; CHECK: # %bb.0: # %entry
|
||||
; CHECK-NEXT: xscvdpsp f0, f1
|
||||
; CHECK-NEXT: xxinsertw v2, vs0, 8
|
||||
; CHECK-NEXT: blr
|
||||
;
|
||||
; CHECK-BE-LABEL: testInsertDoubleToFloat:
|
||||
; CHECK-BE: # %bb.0: # %entry
|
||||
; CHECK-BE-NEXT: xscvdpsp f0, f1
|
||||
; CHECK-BE-NEXT: xxinsertw v2, vs0, 4
|
||||
; CHECK-BE-NEXT: blr
|
||||
;
|
||||
; CHECK-P9-LABEL: testInsertDoubleToFloat:
|
||||
; CHECK-P9: # %bb.0: # %entry
|
||||
; CHECK-P9-NEXT: xscvdpsp f0, f1
|
||||
; CHECK-P9-NEXT: xxinsertw v2, vs0, 4
|
||||
; CHECK-P9-NEXT: blr
|
||||
entry:
|
||||
%conv = fptrunc double %b to float
|
||||
%vecins = insertelement <4 x float> %a, float %conv, i32 1
|
||||
ret <4 x float> %vecins
|
||||
}
|
||||
|
|
Loading…
Reference in New Issue