forked from OSchip/llvm-project
[PowerPC][NFC] Explicitly state types on FP SDAG patterns in anticipation of adding the f128 type
llvm-svn: 327319
This commit is contained in:
parent
467b50057a
commit
cd4f385795
|
@ -705,7 +705,7 @@ def VSPLTH : VXForm_1<588, (outs vrrc:$vD), (ins u5imm:$UIMM, vrrc:$vB),
|
|||
(vsplth_shuffle:$UIMM v16i8:$vB, (undef)))]>;
|
||||
def VSPLTW : VXForm_1<652, (outs vrrc:$vD), (ins u5imm:$UIMM, vrrc:$vB),
|
||||
"vspltw $vD, $vB, $UIMM", IIC_VecPerm,
|
||||
[(set v16i8:$vD,
|
||||
[(set v16i8:$vD,
|
||||
(vspltw_shuffle:$UIMM v16i8:$vB, (undef)))]>;
|
||||
let isCodeGenOnly = 1 in {
|
||||
def VSPLTBs : VXForm_1<524, (outs vrrc:$vD), (ins u5imm:$UIMM, vfrc:$vB),
|
||||
|
@ -962,7 +962,7 @@ def : Pat<(and v4i32:$A, (vnot_ppc v4i32:$B)),
|
|||
|
||||
def : Pat<(fmul v4f32:$vA, v4f32:$vB),
|
||||
(VMADDFP $vA, $vB,
|
||||
(v4i32 (VSLW (V_SETALLONES), (V_SETALLONES))))>;
|
||||
(v4i32 (VSLW (v4i32 (V_SETALLONES)), (v4i32 (V_SETALLONES)))))>;
|
||||
|
||||
// Fused multiply add and multiply sub for packed float. These are represented
|
||||
// separately from the real instructions above, for operations that must have
|
||||
|
@ -991,7 +991,7 @@ def : Pat<(v8i16 (shl v8i16:$vA, v8i16:$vB)),
|
|||
def : Pat<(v4i32 (shl v4i32:$vA, v4i32:$vB)),
|
||||
(v4i32 (VSLW $vA, $vB))>;
|
||||
def : Pat<(v1i128 (shl v1i128:$vA, v1i128:$vB)),
|
||||
(v1i128 (VSL (VSLO $vA, $vB), (VSPLTB 15, $vB)))>;
|
||||
(v1i128 (VSL (v16i8 (VSLO $vA, $vB)), (v16i8 (VSPLTB 15, $vB))))>;
|
||||
def : Pat<(v16i8 (PPCshl v16i8:$vA, v16i8:$vB)),
|
||||
(v16i8 (VSLB $vA, $vB))>;
|
||||
def : Pat<(v8i16 (PPCshl v8i16:$vA, v8i16:$vB)),
|
||||
|
@ -999,7 +999,7 @@ def : Pat<(v8i16 (PPCshl v8i16:$vA, v8i16:$vB)),
|
|||
def : Pat<(v4i32 (PPCshl v4i32:$vA, v4i32:$vB)),
|
||||
(v4i32 (VSLW $vA, $vB))>;
|
||||
def : Pat<(v1i128 (PPCshl v1i128:$vA, v1i128:$vB)),
|
||||
(v1i128 (VSL (VSLO $vA, $vB), (VSPLTB 15, $vB)))>;
|
||||
(v1i128 (VSL (v16i8 (VSLO $vA, $vB)), (v16i8 (VSPLTB 15, $vB))))>;
|
||||
|
||||
def : Pat<(v16i8 (srl v16i8:$vA, v16i8:$vB)),
|
||||
(v16i8 (VSRB $vA, $vB))>;
|
||||
|
@ -1008,7 +1008,7 @@ def : Pat<(v8i16 (srl v8i16:$vA, v8i16:$vB)),
|
|||
def : Pat<(v4i32 (srl v4i32:$vA, v4i32:$vB)),
|
||||
(v4i32 (VSRW $vA, $vB))>;
|
||||
def : Pat<(v1i128 (srl v1i128:$vA, v1i128:$vB)),
|
||||
(v1i128 (VSR (VSRO $vA, $vB), (VSPLTB 15, $vB)))>;
|
||||
(v1i128 (VSR (v16i8 (VSRO $vA, $vB)), (v16i8 (VSPLTB 15, $vB))))>;
|
||||
def : Pat<(v16i8 (PPCsrl v16i8:$vA, v16i8:$vB)),
|
||||
(v16i8 (VSRB $vA, $vB))>;
|
||||
def : Pat<(v8i16 (PPCsrl v8i16:$vA, v8i16:$vB)),
|
||||
|
@ -1016,7 +1016,7 @@ def : Pat<(v8i16 (PPCsrl v8i16:$vA, v8i16:$vB)),
|
|||
def : Pat<(v4i32 (PPCsrl v4i32:$vA, v4i32:$vB)),
|
||||
(v4i32 (VSRW $vA, $vB))>;
|
||||
def : Pat<(v1i128 (PPCsrl v1i128:$vA, v1i128:$vB)),
|
||||
(v1i128 (VSR (VSRO $vA, $vB), (VSPLTB 15, $vB)))>;
|
||||
(v1i128 (VSR (v16i8 (VSRO $vA, $vB)), (v16i8 (VSPLTB 15, $vB))))>;
|
||||
|
||||
def : Pat<(v16i8 (sra v16i8:$vA, v16i8:$vB)),
|
||||
(v16i8 (VSRAB $vA, $vB))>;
|
||||
|
@ -1078,10 +1078,12 @@ def VMINUD : VX1_Int_Ty<706, "vminud", int_ppc_altivec_vminud, v2i64>;
|
|||
// Vector merge
|
||||
def VMRGEW : VXForm_1<1932, (outs vrrc:$vD), (ins vrrc:$vA, vrrc:$vB),
|
||||
"vmrgew $vD, $vA, $vB", IIC_VecFP,
|
||||
[(set v16i8:$vD, (vmrgew_shuffle v16i8:$vA, v16i8:$vB))]>;
|
||||
[(set v16i8:$vD,
|
||||
(v16i8 (vmrgew_shuffle v16i8:$vA, v16i8:$vB)))]>;
|
||||
def VMRGOW : VXForm_1<1676, (outs vrrc:$vD), (ins vrrc:$vA, vrrc:$vB),
|
||||
"vmrgow $vD, $vA, $vB", IIC_VecFP,
|
||||
[(set v16i8:$vD, (vmrgow_shuffle v16i8:$vA, v16i8:$vB))]>;
|
||||
[(set v16i8:$vD,
|
||||
(v16i8 (vmrgow_shuffle v16i8:$vA, v16i8:$vB)))]>;
|
||||
|
||||
// Match vmrgew(x,x) and vmrgow(x,x)
|
||||
def:Pat<(vmrgew_unary_shuffle v16i8:$vA, undef),
|
||||
|
@ -1503,11 +1505,11 @@ def VABSDUW : VXForm_1<1155, (outs vrrc:$vD), (ins vrrc:$vA, vrrc:$vB),
|
|||
[(set v4i32:$vD, (int_ppc_altivec_vabsduw v4i32:$vA, v4i32:$vB))]>;
|
||||
|
||||
def : Pat<(v16i8:$vD (abs v16i8:$vA)),
|
||||
(v16i8 (VABSDUB $vA, (V_SET0B)))>;
|
||||
(v16i8 (VABSDUB $vA, (v16i8 (V_SET0B))))>;
|
||||
def : Pat<(v8i16:$vD (abs v8i16:$vA)),
|
||||
(v8i16 (VABSDUH $vA, (V_SET0H)))>;
|
||||
(v8i16 (VABSDUH $vA, (v8i16 (V_SET0H))))>;
|
||||
def : Pat<(v4i32:$vD (abs v4i32:$vA)),
|
||||
(v4i32 (VABSDUW $vA, (V_SET0)))>;
|
||||
(v4i32 (VABSDUW $vA, (v4i32 (V_SET0))))>;
|
||||
|
||||
def : Pat<(v16i8:$vD (abs (sub v16i8:$vA, v16i8:$vB))),
|
||||
(v16i8 (VABSDUB $vA, $vB))>;
|
||||
|
|
|
@ -1278,7 +1278,7 @@ let AddedComplexity = 400 in { // Prefer VSX patterns over non-VSX patterns.
|
|||
|
||||
def : Pat<(f64 (extloadf32 xoaddr:$src)),
|
||||
(COPY_TO_REGCLASS (XFLOADf32 xoaddr:$src), VSFRC)>;
|
||||
def : Pat<(f32 (fpround (extloadf32 xoaddr:$src))),
|
||||
def : Pat<(f32 (fpround (f64 (extloadf32 xoaddr:$src)))),
|
||||
(f32 (XFLOADf32 xoaddr:$src))>;
|
||||
def : Pat<(f64 (fpextend f32:$src)),
|
||||
(COPY_TO_REGCLASS $src, VSFRC)>;
|
||||
|
@ -1432,24 +1432,34 @@ let AddedComplexity = 400 in { // Prefer VSX patterns over non-VSX patterns.
|
|||
} // UseVSXReg = 1
|
||||
|
||||
let Predicates = [IsLittleEndian] in {
|
||||
def : Pat<(f32 (PPCfcfids (PPCmtvsra (i64 (vector_extract v2i64:$S, 0))))),
|
||||
def : Pat<(f32 (PPCfcfids
|
||||
(f64 (PPCmtvsra (i64 (vector_extract v2i64:$S, 0)))))),
|
||||
(f32 (XSCVSXDSP (COPY_TO_REGCLASS (XXPERMDI $S, $S, 2), VSFRC)))>;
|
||||
def : Pat<(f32 (PPCfcfids (PPCmtvsra (i64 (vector_extract v2i64:$S, 1))))),
|
||||
(f32 (XSCVSXDSP (COPY_TO_REGCLASS (f64 (COPY_TO_REGCLASS $S, VSRC)), VSFRC)))>;
|
||||
def : Pat<(f32 (PPCfcfidus (PPCmtvsra (i64 (vector_extract v2i64:$S, 0))))),
|
||||
def : Pat<(f32 (PPCfcfids
|
||||
(f64 (PPCmtvsra (i64 (vector_extract v2i64:$S, 1)))))),
|
||||
(f32 (XSCVSXDSP (COPY_TO_REGCLASS
|
||||
(f64 (COPY_TO_REGCLASS $S, VSRC)), VSFRC)))>;
|
||||
def : Pat<(f32 (PPCfcfidus
|
||||
(f64 (PPCmtvsra (i64 (vector_extract v2i64:$S, 0)))))),
|
||||
(f32 (XSCVUXDSP (COPY_TO_REGCLASS (XXPERMDI $S, $S, 2), VSFRC)))>;
|
||||
def : Pat<(f32 (PPCfcfidus (PPCmtvsra (i64 (vector_extract v2i64:$S, 1))))),
|
||||
(f32 (XSCVUXDSP (COPY_TO_REGCLASS (f64 (COPY_TO_REGCLASS $S, VSRC)), VSFRC)))>;
|
||||
def : Pat<(f32 (PPCfcfidus
|
||||
(f64 (PPCmtvsra (i64 (vector_extract v2i64:$S, 1)))))),
|
||||
(f32 (XSCVUXDSP (COPY_TO_REGCLASS
|
||||
(f64 (COPY_TO_REGCLASS $S, VSRC)), VSFRC)))>;
|
||||
}
|
||||
|
||||
let Predicates = [IsBigEndian] in {
|
||||
def : Pat<(f32 (PPCfcfids (PPCmtvsra (i64 (vector_extract v2i64:$S, 0))))),
|
||||
def : Pat<(f32 (PPCfcfids
|
||||
(f64 (PPCmtvsra (i64 (vector_extract v2i64:$S, 0)))))),
|
||||
(f32 (XSCVSXDSP (COPY_TO_REGCLASS $S, VSFRC)))>;
|
||||
def : Pat<(f32 (PPCfcfids (PPCmtvsra (i64 (vector_extract v2i64:$S, 1))))),
|
||||
def : Pat<(f32 (PPCfcfids
|
||||
(f64 (PPCmtvsra (i64 (vector_extract v2i64:$S, 1)))))),
|
||||
(f32 (XSCVSXDSP (COPY_TO_REGCLASS (XXPERMDI $S, $S, 2), VSFRC)))>;
|
||||
def : Pat<(f32 (PPCfcfidus (PPCmtvsra (i64 (vector_extract v2i64:$S, 0))))),
|
||||
def : Pat<(f32 (PPCfcfidus
|
||||
(f64 (PPCmtvsra (i64 (vector_extract v2i64:$S, 0)))))),
|
||||
(f32 (XSCVUXDSP (COPY_TO_REGCLASS $S, VSFRC)))>;
|
||||
def : Pat<(f32 (PPCfcfidus (PPCmtvsra (i64 (vector_extract v2i64:$S, 1))))),
|
||||
def : Pat<(f32 (PPCfcfidus
|
||||
(f64 (PPCmtvsra (i64 (vector_extract v2i64:$S, 1)))))),
|
||||
(f32 (XSCVUXDSP (COPY_TO_REGCLASS (XXPERMDI $S, $S, 2), VSFRC)))>;
|
||||
}
|
||||
def : Pat<(v4i32 (scalar_to_vector ScalarLoads.Li32)),
|
||||
|
@ -1614,11 +1624,11 @@ def VectorExtractions {
|
|||
This is accomplished by inverting the bits of the index and AND-ing
|
||||
with 0x8 (i.e. clearing all bits of the index and inverting bit 60).
|
||||
*/
|
||||
dag LE_VBYTE_PERM_VEC = (LVSL ZERO8, (ANDC8 (LI8 8), $Idx));
|
||||
dag LE_VBYTE_PERM_VEC = (v16i8 (LVSL ZERO8, (ANDC8 (LI8 8), $Idx)));
|
||||
|
||||
// Number 2. above:
|
||||
// - Now that we set up the shift amount, we shift in the VMX register
|
||||
dag LE_VBYTE_PERMUTE = (VPERM $S, $S, LE_VBYTE_PERM_VEC);
|
||||
dag LE_VBYTE_PERMUTE = (v16i8 (VPERM $S, $S, LE_VBYTE_PERM_VEC));
|
||||
|
||||
// Number 3. above:
|
||||
// - The doubleword containing our element is moved to a GPR
|
||||
|
@ -1646,11 +1656,12 @@ def VectorExtractions {
|
|||
AND with 0x4 (i.e. clear all bits of the index and invert bit 61).
|
||||
Of course, the shift is still by 8 bytes, so we must multiply by 2.
|
||||
*/
|
||||
dag LE_VHALF_PERM_VEC = (LVSL ZERO8, (RLDICR (ANDC8 (LI8 4), $Idx), 1, 62));
|
||||
dag LE_VHALF_PERM_VEC =
|
||||
(v16i8 (LVSL ZERO8, (RLDICR (ANDC8 (LI8 4), $Idx), 1, 62)));
|
||||
|
||||
// Number 2. above:
|
||||
// - Now that we set up the shift amount, we shift in the VMX register
|
||||
dag LE_VHALF_PERMUTE = (VPERM $S, $S, LE_VHALF_PERM_VEC);
|
||||
dag LE_VHALF_PERMUTE = (v16i8 (VPERM $S, $S, LE_VHALF_PERM_VEC));
|
||||
|
||||
// Number 3. above:
|
||||
// - The doubleword containing our element is moved to a GPR
|
||||
|
@ -1675,11 +1686,12 @@ def VectorExtractions {
|
|||
- For elements 0-1, we shift left by 8 since they're on the right
|
||||
- For elements 2-3, we need not shift
|
||||
*/
|
||||
dag LE_VWORD_PERM_VEC = (LVSL ZERO8, (RLDICR (ANDC8 (LI8 2), $Idx), 2, 61));
|
||||
dag LE_VWORD_PERM_VEC = (v16i8 (LVSL ZERO8,
|
||||
(RLDICR (ANDC8 (LI8 2), $Idx), 2, 61)));
|
||||
|
||||
// Number 2. above:
|
||||
// - Now that we set up the shift amount, we shift in the VMX register
|
||||
dag LE_VWORD_PERMUTE = (VPERM $S, $S, LE_VWORD_PERM_VEC);
|
||||
dag LE_VWORD_PERMUTE = (v16i8 (VPERM $S, $S, LE_VWORD_PERM_VEC));
|
||||
|
||||
// Number 3. above:
|
||||
// - The doubleword containing our element is moved to a GPR
|
||||
|
@ -1704,11 +1716,12 @@ def VectorExtractions {
|
|||
- For element 0, we shift left by 8 since it's on the right
|
||||
- For element 1, we need not shift
|
||||
*/
|
||||
dag LE_VDWORD_PERM_VEC = (LVSL ZERO8, (RLDICR (ANDC8 (LI8 1), $Idx), 3, 60));
|
||||
dag LE_VDWORD_PERM_VEC = (v16i8 (LVSL ZERO8,
|
||||
(RLDICR (ANDC8 (LI8 1), $Idx), 3, 60)));
|
||||
|
||||
// Number 2. above:
|
||||
// - Now that we set up the shift amount, we shift in the VMX register
|
||||
dag LE_VDWORD_PERMUTE = (VPERM $S, $S, LE_VDWORD_PERM_VEC);
|
||||
dag LE_VDWORD_PERMUTE = (v16i8 (VPERM $S, $S, LE_VDWORD_PERM_VEC));
|
||||
|
||||
// Number 3. above:
|
||||
// - The doubleword containing our element is moved to a GPR
|
||||
|
@ -1722,16 +1735,17 @@ def VectorExtractions {
|
|||
- Shift the vector to line up the desired element to BE Word 0
|
||||
- Convert 32-bit float to a 64-bit single precision float
|
||||
*/
|
||||
dag LE_VFLOAT_PERM_VEC = (LVSL ZERO8, (RLDICR (XOR8 (LI8 3), $Idx), 2, 61));
|
||||
dag LE_VFLOAT_PERM_VEC = (v16i8 (LVSL ZERO8,
|
||||
(RLDICR (XOR8 (LI8 3), $Idx), 2, 61)));
|
||||
dag LE_VFLOAT_PERMUTE = (VPERM $S, $S, LE_VFLOAT_PERM_VEC);
|
||||
dag LE_VARIABLE_FLOAT = (XSCVSPDPN LE_VFLOAT_PERMUTE);
|
||||
|
||||
/* LE variable double
|
||||
Same as the LE doubleword except there is no move.
|
||||
*/
|
||||
dag LE_VDOUBLE_PERMUTE = (VPERM (COPY_TO_REGCLASS $S, VRRC),
|
||||
(COPY_TO_REGCLASS $S, VRRC),
|
||||
LE_VDWORD_PERM_VEC);
|
||||
dag LE_VDOUBLE_PERMUTE = (v16i8 (VPERM (v16i8 (COPY_TO_REGCLASS $S, VRRC)),
|
||||
(v16i8 (COPY_TO_REGCLASS $S, VRRC)),
|
||||
LE_VDWORD_PERM_VEC));
|
||||
dag LE_VARIABLE_DOUBLE = (COPY_TO_REGCLASS LE_VDOUBLE_PERMUTE, VSRC);
|
||||
|
||||
/* BE variable byte
|
||||
|
@ -1741,8 +1755,8 @@ def VectorExtractions {
|
|||
- The order of elements after the move to GPR is reversed, so we invert
|
||||
the bits of the index prior to truncating to the range 0-7
|
||||
*/
|
||||
dag BE_VBYTE_PERM_VEC = (LVSL ZERO8, (ANDIo8 $Idx, 8));
|
||||
dag BE_VBYTE_PERMUTE = (VPERM $S, $S, BE_VBYTE_PERM_VEC);
|
||||
dag BE_VBYTE_PERM_VEC = (v16i8 (LVSL ZERO8, (ANDIo8 $Idx, 8)));
|
||||
dag BE_VBYTE_PERMUTE = (v16i8 (VPERM $S, $S, BE_VBYTE_PERM_VEC));
|
||||
dag BE_MV_VBYTE = (MFVSRD
|
||||
(EXTRACT_SUBREG
|
||||
(v2i64 (COPY_TO_REGCLASS BE_VBYTE_PERMUTE, VSRC)),
|
||||
|
@ -1759,8 +1773,9 @@ def VectorExtractions {
|
|||
- The order of elements after the move to GPR is reversed, so we invert
|
||||
the bits of the index prior to truncating to the range 0-3
|
||||
*/
|
||||
dag BE_VHALF_PERM_VEC = (LVSL ZERO8, (RLDICR (ANDIo8 $Idx, 4), 1, 62));
|
||||
dag BE_VHALF_PERMUTE = (VPERM $S, $S, BE_VHALF_PERM_VEC);
|
||||
dag BE_VHALF_PERM_VEC = (v16i8 (LVSL ZERO8,
|
||||
(RLDICR (ANDIo8 $Idx, 4), 1, 62)));
|
||||
dag BE_VHALF_PERMUTE = (v16i8 (VPERM $S, $S, BE_VHALF_PERM_VEC));
|
||||
dag BE_MV_VHALF = (MFVSRD
|
||||
(EXTRACT_SUBREG
|
||||
(v2i64 (COPY_TO_REGCLASS BE_VHALF_PERMUTE, VSRC)),
|
||||
|
@ -1776,8 +1791,9 @@ def VectorExtractions {
|
|||
- The order of elements after the move to GPR is reversed, so we invert
|
||||
the bits of the index prior to truncating to the range 0-1
|
||||
*/
|
||||
dag BE_VWORD_PERM_VEC = (LVSL ZERO8, (RLDICR (ANDIo8 $Idx, 2), 2, 61));
|
||||
dag BE_VWORD_PERMUTE = (VPERM $S, $S, BE_VWORD_PERM_VEC);
|
||||
dag BE_VWORD_PERM_VEC = (v16i8 (LVSL ZERO8,
|
||||
(RLDICR (ANDIo8 $Idx, 2), 2, 61)));
|
||||
dag BE_VWORD_PERMUTE = (v16i8 (VPERM $S, $S, BE_VWORD_PERM_VEC));
|
||||
dag BE_MV_VWORD = (MFVSRD
|
||||
(EXTRACT_SUBREG
|
||||
(v2i64 (COPY_TO_REGCLASS BE_VWORD_PERMUTE, VSRC)),
|
||||
|
@ -1791,8 +1807,9 @@ def VectorExtractions {
|
|||
Same as the LE doubleword except we shift in the VMX register for opposite
|
||||
element indices.
|
||||
*/
|
||||
dag BE_VDWORD_PERM_VEC = (LVSL ZERO8, (RLDICR (ANDIo8 $Idx, 1), 3, 60));
|
||||
dag BE_VDWORD_PERMUTE = (VPERM $S, $S, BE_VDWORD_PERM_VEC);
|
||||
dag BE_VDWORD_PERM_VEC = (v16i8 (LVSL ZERO8,
|
||||
(RLDICR (ANDIo8 $Idx, 1), 3, 60)));
|
||||
dag BE_VDWORD_PERMUTE = (v16i8 (VPERM $S, $S, BE_VDWORD_PERM_VEC));
|
||||
dag BE_VARIABLE_DWORD =
|
||||
(MFVSRD (EXTRACT_SUBREG
|
||||
(v2i64 (COPY_TO_REGCLASS BE_VDWORD_PERMUTE, VSRC)),
|
||||
|
@ -1802,16 +1819,16 @@ def VectorExtractions {
|
|||
- Shift the vector to line up the desired element to BE Word 0
|
||||
- Convert 32-bit float to a 64-bit single precision float
|
||||
*/
|
||||
dag BE_VFLOAT_PERM_VEC = (LVSL ZERO8, (RLDICR $Idx, 2, 61));
|
||||
dag BE_VFLOAT_PERM_VEC = (v16i8 (LVSL ZERO8, (RLDICR $Idx, 2, 61)));
|
||||
dag BE_VFLOAT_PERMUTE = (VPERM $S, $S, BE_VFLOAT_PERM_VEC);
|
||||
dag BE_VARIABLE_FLOAT = (XSCVSPDPN BE_VFLOAT_PERMUTE);
|
||||
|
||||
/* BE variable double
|
||||
Same as the BE doubleword except there is no move.
|
||||
*/
|
||||
dag BE_VDOUBLE_PERMUTE = (VPERM (COPY_TO_REGCLASS $S, VRRC),
|
||||
(COPY_TO_REGCLASS $S, VRRC),
|
||||
BE_VDWORD_PERM_VEC);
|
||||
dag BE_VDOUBLE_PERMUTE = (v16i8 (VPERM (v16i8 (COPY_TO_REGCLASS $S, VRRC)),
|
||||
(v16i8 (COPY_TO_REGCLASS $S, VRRC)),
|
||||
BE_VDWORD_PERM_VEC));
|
||||
dag BE_VARIABLE_DOUBLE = (COPY_TO_REGCLASS BE_VDOUBLE_PERMUTE, VSRC);
|
||||
}
|
||||
|
||||
|
@ -2282,7 +2299,7 @@ let Predicates = [HasDirectMove, HasVSX] in {
|
|||
// (convert to 32-bit fp single, shift right 1 word, move to GPR)
|
||||
def : Pat<(i32 (bitconvert f32:$S)),
|
||||
(i32 (MFVSRWZ (EXTRACT_SUBREG
|
||||
(XXSLDWI (XSCVDPSPN $S),(XSCVDPSPN $S), 3),
|
||||
(XXSLDWI (XSCVDPSPN $S), (XSCVDPSPN $S), 3),
|
||||
sub_64)))>;
|
||||
// bitconvert i32 -> f32
|
||||
// (move to FPR, shift left 1 word, convert to 64-bit fp single)
|
||||
|
@ -2728,40 +2745,40 @@ let AddedComplexity = 400, Predicates = [HasP9Vector] in {
|
|||
} // mayStore
|
||||
|
||||
let Predicates = [IsLittleEndian] in {
|
||||
def: Pat<(f32 (PPCfcfids (PPCmtvsra (i32 (extractelt v4i32:$A, 0))))),
|
||||
def: Pat<(f32 (PPCfcfids (f64 (PPCmtvsra (i32 (extractelt v4i32:$A, 0)))))),
|
||||
(f32 (XSCVSPDPN (XVCVSXWSP (XXSPLTW $A, 3))))>;
|
||||
def: Pat<(f32 (PPCfcfids (PPCmtvsra (i32 (extractelt v4i32:$A, 1))))),
|
||||
def: Pat<(f32 (PPCfcfids (f64 (PPCmtvsra (i32 (extractelt v4i32:$A, 1)))))),
|
||||
(f32 (XSCVSPDPN (XVCVSXWSP (XXSPLTW $A, 2))))>;
|
||||
def: Pat<(f32 (PPCfcfids (PPCmtvsra (i32 (extractelt v4i32:$A, 2))))),
|
||||
def: Pat<(f32 (PPCfcfids (f64 (PPCmtvsra (i32 (extractelt v4i32:$A, 2)))))),
|
||||
(f32 (XSCVSPDPN (XVCVSXWSP (XXSPLTW $A, 1))))>;
|
||||
def: Pat<(f32 (PPCfcfids (PPCmtvsra (i32 (extractelt v4i32:$A, 3))))),
|
||||
def: Pat<(f32 (PPCfcfids (f64 (PPCmtvsra (i32 (extractelt v4i32:$A, 3)))))),
|
||||
(f32 (XSCVSPDPN (XVCVSXWSP (XXSPLTW $A, 0))))>;
|
||||
def: Pat<(f64 (PPCfcfid (PPCmtvsra (i32 (extractelt v4i32:$A, 0))))),
|
||||
def: Pat<(f64 (PPCfcfid (f64 (PPCmtvsra (i32 (extractelt v4i32:$A, 0)))))),
|
||||
(f64 (COPY_TO_REGCLASS (XVCVSXWDP (XXSPLTW $A, 3)), VSFRC))>;
|
||||
def: Pat<(f64 (PPCfcfid (PPCmtvsra (i32 (extractelt v4i32:$A, 1))))),
|
||||
def: Pat<(f64 (PPCfcfid (f64 (PPCmtvsra (i32 (extractelt v4i32:$A, 1)))))),
|
||||
(f64 (COPY_TO_REGCLASS (XVCVSXWDP (XXSPLTW $A, 2)), VSFRC))>;
|
||||
def: Pat<(f64 (PPCfcfid (PPCmtvsra (i32 (extractelt v4i32:$A, 2))))),
|
||||
def: Pat<(f64 (PPCfcfid (f64 (PPCmtvsra (i32 (extractelt v4i32:$A, 2)))))),
|
||||
(f64 (COPY_TO_REGCLASS (XVCVSXWDP (XXSPLTW $A, 1)), VSFRC))>;
|
||||
def: Pat<(f64 (PPCfcfid (PPCmtvsra (i32 (extractelt v4i32:$A, 3))))),
|
||||
def: Pat<(f64 (PPCfcfid (f64 (PPCmtvsra (i32 (extractelt v4i32:$A, 3)))))),
|
||||
(f64 (COPY_TO_REGCLASS (XVCVSXWDP (XXSPLTW $A, 0)), VSFRC))>;
|
||||
}
|
||||
|
||||
let Predicates = [IsBigEndian] in {
|
||||
def: Pat<(f32 (PPCfcfids (PPCmtvsra (i32 (extractelt v4i32:$A, 0))))),
|
||||
def: Pat<(f32 (PPCfcfids (f64 (PPCmtvsra (i32 (extractelt v4i32:$A, 0)))))),
|
||||
(f32 (XSCVSPDPN (XVCVSXWSP (XXSPLTW $A, 0))))>;
|
||||
def: Pat<(f32 (PPCfcfids (PPCmtvsra (i32 (extractelt v4i32:$A, 1))))),
|
||||
def: Pat<(f32 (PPCfcfids (f64 (PPCmtvsra (i32 (extractelt v4i32:$A, 1)))))),
|
||||
(f32 (XSCVSPDPN (XVCVSXWSP (XXSPLTW $A, 1))))>;
|
||||
def: Pat<(f32 (PPCfcfids (PPCmtvsra (i32 (extractelt v4i32:$A, 2))))),
|
||||
def: Pat<(f32 (PPCfcfids (f64 (PPCmtvsra (i32 (extractelt v4i32:$A, 2)))))),
|
||||
(f32 (XSCVSPDPN (XVCVSXWSP (XXSPLTW $A, 2))))>;
|
||||
def: Pat<(f32 (PPCfcfids (PPCmtvsra (i32 (extractelt v4i32:$A, 3))))),
|
||||
def: Pat<(f32 (PPCfcfids (f64 (PPCmtvsra (i32 (extractelt v4i32:$A, 3)))))),
|
||||
(f32 (XSCVSPDPN (XVCVSXWSP (XXSPLTW $A, 3))))>;
|
||||
def: Pat<(f64 (PPCfcfid (PPCmtvsra (i32 (extractelt v4i32:$A, 0))))),
|
||||
def: Pat<(f64 (PPCfcfid (f64 (PPCmtvsra (i32 (extractelt v4i32:$A, 0)))))),
|
||||
(f64 (COPY_TO_REGCLASS (XVCVSXWDP (XXSPLTW $A, 0)), VSFRC))>;
|
||||
def: Pat<(f64 (PPCfcfid (PPCmtvsra (i32 (extractelt v4i32:$A, 1))))),
|
||||
def: Pat<(f64 (PPCfcfid (f64 (PPCmtvsra (i32 (extractelt v4i32:$A, 1)))))),
|
||||
(f64 (COPY_TO_REGCLASS (XVCVSXWDP (XXSPLTW $A, 1)), VSFRC))>;
|
||||
def: Pat<(f64 (PPCfcfid (PPCmtvsra (i32 (extractelt v4i32:$A, 2))))),
|
||||
def: Pat<(f64 (PPCfcfid (f64 (PPCmtvsra (i32 (extractelt v4i32:$A, 2)))))),
|
||||
(f64 (COPY_TO_REGCLASS (XVCVSXWDP (XXSPLTW $A, 2)), VSFRC))>;
|
||||
def: Pat<(f64 (PPCfcfid (PPCmtvsra (i32 (extractelt v4i32:$A, 3))))),
|
||||
def: Pat<(f64 (PPCfcfid (f64 (PPCmtvsra (i32 (extractelt v4i32:$A, 3)))))),
|
||||
(f64 (COPY_TO_REGCLASS (XVCVSXWDP (XXSPLTW $A, 3)), VSFRC))>;
|
||||
}
|
||||
|
||||
|
@ -2774,21 +2791,21 @@ let AddedComplexity = 400, Predicates = [HasP9Vector] in {
|
|||
|
||||
// Patterns for which instructions from ISA 3.0 are a better match
|
||||
let Predicates = [IsLittleEndian, HasP9Vector] in {
|
||||
def : Pat<(f32 (PPCfcfidus (PPCmtvsrz (i32 (extractelt v4i32:$A, 0))))),
|
||||
def : Pat<(f32 (PPCfcfidus (f64 (PPCmtvsrz (i32 (extractelt v4i32:$A, 0)))))),
|
||||
(f32 (XSCVUXDSP (XXEXTRACTUW $A, 12)))>;
|
||||
def : Pat<(f32 (PPCfcfidus (PPCmtvsrz (i32 (extractelt v4i32:$A, 1))))),
|
||||
def : Pat<(f32 (PPCfcfidus (f64 (PPCmtvsrz (i32 (extractelt v4i32:$A, 1)))))),
|
||||
(f32 (XSCVUXDSP (XXEXTRACTUW $A, 8)))>;
|
||||
def : Pat<(f32 (PPCfcfidus (PPCmtvsrz (i32 (extractelt v4i32:$A, 2))))),
|
||||
def : Pat<(f32 (PPCfcfidus (f64 (PPCmtvsrz (i32 (extractelt v4i32:$A, 2)))))),
|
||||
(f32 (XSCVUXDSP (XXEXTRACTUW $A, 4)))>;
|
||||
def : Pat<(f32 (PPCfcfidus (PPCmtvsrz (i32 (extractelt v4i32:$A, 3))))),
|
||||
def : Pat<(f32 (PPCfcfidus (f64 (PPCmtvsrz (i32 (extractelt v4i32:$A, 3)))))),
|
||||
(f32 (XSCVUXDSP (XXEXTRACTUW $A, 0)))>;
|
||||
def : Pat<(f64 (PPCfcfidu (PPCmtvsrz (i32 (extractelt v4i32:$A, 0))))),
|
||||
def : Pat<(f64 (PPCfcfidu (f64 (PPCmtvsrz (i32 (extractelt v4i32:$A, 0)))))),
|
||||
(f64 (XSCVUXDDP (XXEXTRACTUW $A, 12)))>;
|
||||
def : Pat<(f64 (PPCfcfidu (PPCmtvsrz (i32 (extractelt v4i32:$A, 1))))),
|
||||
def : Pat<(f64 (PPCfcfidu (f64 (PPCmtvsrz (i32 (extractelt v4i32:$A, 1)))))),
|
||||
(f64 (XSCVUXDDP (XXEXTRACTUW $A, 8)))>;
|
||||
def : Pat<(f64 (PPCfcfidu (PPCmtvsrz (i32 (extractelt v4i32:$A, 2))))),
|
||||
def : Pat<(f64 (PPCfcfidu (f64 (PPCmtvsrz (i32 (extractelt v4i32:$A, 2)))))),
|
||||
(f64 (XSCVUXDDP (XXEXTRACTUW $A, 4)))>;
|
||||
def : Pat<(f64 (PPCfcfidu (PPCmtvsrz (i32 (extractelt v4i32:$A, 3))))),
|
||||
def : Pat<(f64 (PPCfcfidu (f64 (PPCmtvsrz (i32 (extractelt v4i32:$A, 3)))))),
|
||||
(f64 (XSCVUXDDP (XXEXTRACTUW $A, 0)))>;
|
||||
def : Pat<(v4i32 (insertelt v4i32:$A, i32:$B, 0)),
|
||||
(v4i32 (XXINSERTW v4i32:$A, AlignValues.I32_TO_BE_WORD1, 12))>;
|
||||
|
@ -2809,21 +2826,21 @@ let AddedComplexity = 400, Predicates = [HasP9Vector] in {
|
|||
} // IsLittleEndian, HasP9Vector
|
||||
|
||||
let Predicates = [IsBigEndian, HasP9Vector] in {
|
||||
def : Pat<(f32 (PPCfcfidus (PPCmtvsrz (i32 (extractelt v4i32:$A, 0))))),
|
||||
def : Pat<(f32 (PPCfcfidus (f64 (PPCmtvsrz (i32 (extractelt v4i32:$A, 0)))))),
|
||||
(f32 (XSCVUXDSP (XXEXTRACTUW $A, 0)))>;
|
||||
def : Pat<(f32 (PPCfcfidus (PPCmtvsrz (i32 (extractelt v4i32:$A, 1))))),
|
||||
def : Pat<(f32 (PPCfcfidus (f64 (PPCmtvsrz (i32 (extractelt v4i32:$A, 1)))))),
|
||||
(f32 (XSCVUXDSP (XXEXTRACTUW $A, 4)))>;
|
||||
def : Pat<(f32 (PPCfcfidus (PPCmtvsrz (i32 (extractelt v4i32:$A, 2))))),
|
||||
def : Pat<(f32 (PPCfcfidus (f64 (PPCmtvsrz (i32 (extractelt v4i32:$A, 2)))))),
|
||||
(f32 (XSCVUXDSP (XXEXTRACTUW $A, 8)))>;
|
||||
def : Pat<(f32 (PPCfcfidus (PPCmtvsrz (i32 (extractelt v4i32:$A, 3))))),
|
||||
def : Pat<(f32 (PPCfcfidus (f64 (PPCmtvsrz (i32 (extractelt v4i32:$A, 3)))))),
|
||||
(f32 (XSCVUXDSP (XXEXTRACTUW $A, 12)))>;
|
||||
def : Pat<(f64 (PPCfcfidu (PPCmtvsrz (i32 (extractelt v4i32:$A, 0))))),
|
||||
def : Pat<(f64 (PPCfcfidu (f64 (PPCmtvsrz (i32 (extractelt v4i32:$A, 0)))))),
|
||||
(f64 (XSCVUXDDP (XXEXTRACTUW $A, 0)))>;
|
||||
def : Pat<(f64 (PPCfcfidu (PPCmtvsrz (i32 (extractelt v4i32:$A, 1))))),
|
||||
def : Pat<(f64 (PPCfcfidu (f64 (PPCmtvsrz (i32 (extractelt v4i32:$A, 1)))))),
|
||||
(f64 (XSCVUXDDP (XXEXTRACTUW $A, 4)))>;
|
||||
def : Pat<(f64 (PPCfcfidu (PPCmtvsrz (i32 (extractelt v4i32:$A, 2))))),
|
||||
def : Pat<(f64 (PPCfcfidu (f64 (PPCmtvsrz (i32 (extractelt v4i32:$A, 2)))))),
|
||||
(f64 (XSCVUXDDP (XXEXTRACTUW $A, 8)))>;
|
||||
def : Pat<(f64 (PPCfcfidu (PPCmtvsrz (i32 (extractelt v4i32:$A, 3))))),
|
||||
def : Pat<(f64 (PPCfcfidu (f64 (PPCmtvsrz (i32 (extractelt v4i32:$A, 3)))))),
|
||||
(f64 (XSCVUXDDP (XXEXTRACTUW $A, 12)))>;
|
||||
def : Pat<(v4i32 (insertelt v4i32:$A, i32:$B, 0)),
|
||||
(v4i32 (XXINSERTW v4i32:$A, AlignValues.I32_TO_BE_WORD1, 0))>;
|
||||
|
@ -2883,7 +2900,8 @@ let AddedComplexity = 400, Predicates = [HasP9Vector] in {
|
|||
(v4i32 (LXVWSX xoaddr:$src))>;
|
||||
def : Pat<(v4f32 (scalar_to_vector (f32 (load xoaddr:$src)))),
|
||||
(v4f32 (LXVWSX xoaddr:$src))>;
|
||||
def : Pat<(v4f32 (scalar_to_vector (f32 (fpround (extloadf32 xoaddr:$src))))),
|
||||
def : Pat<(v4f32 (scalar_to_vector
|
||||
(f32 (fpround (f64 (extloadf32 xoaddr:$src)))))),
|
||||
(v4f32 (LXVWSX xoaddr:$src))>;
|
||||
|
||||
// Build vectors from i8 loads
|
||||
|
@ -2915,109 +2933,109 @@ let AddedComplexity = 400, Predicates = [HasP9Vector] in {
|
|||
let Predicates = [IsBigEndian, HasP9Vector] in {
|
||||
// Scalar stores of i8
|
||||
def : Pat<(truncstorei8 (i32 (vector_extract v16i8:$S, 0)), xoaddr:$dst),
|
||||
(STXSIBXv (VSLDOI $S, $S, 9), xoaddr:$dst)>;
|
||||
(STXSIBXv (v16i8 (VSLDOI $S, $S, 9)), xoaddr:$dst)>;
|
||||
def : Pat<(truncstorei8 (i32 (vector_extract v16i8:$S, 1)), xoaddr:$dst),
|
||||
(STXSIBXv (VSLDOI $S, $S, 10), xoaddr:$dst)>;
|
||||
(STXSIBXv (v16i8 (VSLDOI $S, $S, 10)), xoaddr:$dst)>;
|
||||
def : Pat<(truncstorei8 (i32 (vector_extract v16i8:$S, 2)), xoaddr:$dst),
|
||||
(STXSIBXv (VSLDOI $S, $S, 11), xoaddr:$dst)>;
|
||||
(STXSIBXv (v16i8 (VSLDOI $S, $S, 11)), xoaddr:$dst)>;
|
||||
def : Pat<(truncstorei8 (i32 (vector_extract v16i8:$S, 3)), xoaddr:$dst),
|
||||
(STXSIBXv (VSLDOI $S, $S, 12), xoaddr:$dst)>;
|
||||
(STXSIBXv (v16i8 (VSLDOI $S, $S, 12)), xoaddr:$dst)>;
|
||||
def : Pat<(truncstorei8 (i32 (vector_extract v16i8:$S, 4)), xoaddr:$dst),
|
||||
(STXSIBXv (VSLDOI $S, $S, 13), xoaddr:$dst)>;
|
||||
(STXSIBXv (v16i8 (VSLDOI $S, $S, 13)), xoaddr:$dst)>;
|
||||
def : Pat<(truncstorei8 (i32 (vector_extract v16i8:$S, 5)), xoaddr:$dst),
|
||||
(STXSIBXv (VSLDOI $S, $S, 14), xoaddr:$dst)>;
|
||||
(STXSIBXv (v16i8 (VSLDOI $S, $S, 14)), xoaddr:$dst)>;
|
||||
def : Pat<(truncstorei8 (i32 (vector_extract v16i8:$S, 6)), xoaddr:$dst),
|
||||
(STXSIBXv (VSLDOI $S, $S, 15), xoaddr:$dst)>;
|
||||
(STXSIBXv (v16i8 (VSLDOI $S, $S, 15)), xoaddr:$dst)>;
|
||||
def : Pat<(truncstorei8 (i32 (vector_extract v16i8:$S, 7)), xoaddr:$dst),
|
||||
(STXSIBXv $S, xoaddr:$dst)>;
|
||||
def : Pat<(truncstorei8 (i32 (vector_extract v16i8:$S, 8)), xoaddr:$dst),
|
||||
(STXSIBXv (VSLDOI $S, $S, 1), xoaddr:$dst)>;
|
||||
(STXSIBXv (v16i8 (VSLDOI $S, $S, 1)), xoaddr:$dst)>;
|
||||
def : Pat<(truncstorei8 (i32 (vector_extract v16i8:$S, 9)), xoaddr:$dst),
|
||||
(STXSIBXv (VSLDOI $S, $S, 2), xoaddr:$dst)>;
|
||||
(STXSIBXv (v16i8 (VSLDOI $S, $S, 2)), xoaddr:$dst)>;
|
||||
def : Pat<(truncstorei8 (i32 (vector_extract v16i8:$S, 10)), xoaddr:$dst),
|
||||
(STXSIBXv (VSLDOI $S, $S, 3), xoaddr:$dst)>;
|
||||
(STXSIBXv (v16i8 (VSLDOI $S, $S, 3)), xoaddr:$dst)>;
|
||||
def : Pat<(truncstorei8 (i32 (vector_extract v16i8:$S, 11)), xoaddr:$dst),
|
||||
(STXSIBXv (VSLDOI $S, $S, 4), xoaddr:$dst)>;
|
||||
(STXSIBXv (v16i8 (VSLDOI $S, $S, 4)), xoaddr:$dst)>;
|
||||
def : Pat<(truncstorei8 (i32 (vector_extract v16i8:$S, 12)), xoaddr:$dst),
|
||||
(STXSIBXv (VSLDOI $S, $S, 5), xoaddr:$dst)>;
|
||||
(STXSIBXv (v16i8 (VSLDOI $S, $S, 5)), xoaddr:$dst)>;
|
||||
def : Pat<(truncstorei8 (i32 (vector_extract v16i8:$S, 13)), xoaddr:$dst),
|
||||
(STXSIBXv (VSLDOI $S, $S, 6), xoaddr:$dst)>;
|
||||
(STXSIBXv (v16i8 (VSLDOI $S, $S, 6)), xoaddr:$dst)>;
|
||||
def : Pat<(truncstorei8 (i32 (vector_extract v16i8:$S, 14)), xoaddr:$dst),
|
||||
(STXSIBXv (VSLDOI $S, $S, 7), xoaddr:$dst)>;
|
||||
(STXSIBXv (v16i8 (VSLDOI $S, $S, 7)), xoaddr:$dst)>;
|
||||
def : Pat<(truncstorei8 (i32 (vector_extract v16i8:$S, 15)), xoaddr:$dst),
|
||||
(STXSIBXv (VSLDOI $S, $S, 8), xoaddr:$dst)>;
|
||||
(STXSIBXv (v16i8 (VSLDOI $S, $S, 8)), xoaddr:$dst)>;
|
||||
|
||||
// Scalar stores of i16
|
||||
def : Pat<(truncstorei16 (i32 (vector_extract v8i16:$S, 0)), xoaddr:$dst),
|
||||
(STXSIHXv (VSLDOI $S, $S, 10), xoaddr:$dst)>;
|
||||
(STXSIHXv (v16i8 (VSLDOI $S, $S, 10)), xoaddr:$dst)>;
|
||||
def : Pat<(truncstorei16 (i32 (vector_extract v8i16:$S, 1)), xoaddr:$dst),
|
||||
(STXSIHXv (VSLDOI $S, $S, 12), xoaddr:$dst)>;
|
||||
(STXSIHXv (v16i8 (VSLDOI $S, $S, 12)), xoaddr:$dst)>;
|
||||
def : Pat<(truncstorei16 (i32 (vector_extract v8i16:$S, 2)), xoaddr:$dst),
|
||||
(STXSIHXv (VSLDOI $S, $S, 14), xoaddr:$dst)>;
|
||||
(STXSIHXv (v16i8 (VSLDOI $S, $S, 14)), xoaddr:$dst)>;
|
||||
def : Pat<(truncstorei16 (i32 (vector_extract v8i16:$S, 3)), xoaddr:$dst),
|
||||
(STXSIHXv $S, xoaddr:$dst)>;
|
||||
def : Pat<(truncstorei16 (i32 (vector_extract v8i16:$S, 4)), xoaddr:$dst),
|
||||
(STXSIHXv (VSLDOI $S, $S, 2), xoaddr:$dst)>;
|
||||
(STXSIHXv (v16i8 (VSLDOI $S, $S, 2)), xoaddr:$dst)>;
|
||||
def : Pat<(truncstorei16 (i32 (vector_extract v8i16:$S, 5)), xoaddr:$dst),
|
||||
(STXSIHXv (VSLDOI $S, $S, 4), xoaddr:$dst)>;
|
||||
(STXSIHXv (v16i8 (VSLDOI $S, $S, 4)), xoaddr:$dst)>;
|
||||
def : Pat<(truncstorei16 (i32 (vector_extract v8i16:$S, 6)), xoaddr:$dst),
|
||||
(STXSIHXv (VSLDOI $S, $S, 6), xoaddr:$dst)>;
|
||||
(STXSIHXv (v16i8 (VSLDOI $S, $S, 6)), xoaddr:$dst)>;
|
||||
def : Pat<(truncstorei16 (i32 (vector_extract v8i16:$S, 7)), xoaddr:$dst),
|
||||
(STXSIHXv (VSLDOI $S, $S, 8), xoaddr:$dst)>;
|
||||
(STXSIHXv (v16i8 (VSLDOI $S, $S, 8)), xoaddr:$dst)>;
|
||||
} // IsBigEndian, HasP9Vector
|
||||
|
||||
let Predicates = [IsLittleEndian, HasP9Vector] in {
|
||||
// Scalar stores of i8
|
||||
def : Pat<(truncstorei8 (i32 (vector_extract v16i8:$S, 0)), xoaddr:$dst),
|
||||
(STXSIBXv (VSLDOI $S, $S, 8), xoaddr:$dst)>;
|
||||
(STXSIBXv (v16i8 (VSLDOI $S, $S, 8)), xoaddr:$dst)>;
|
||||
def : Pat<(truncstorei8 (i32 (vector_extract v16i8:$S, 1)), xoaddr:$dst),
|
||||
(STXSIBXv (VSLDOI $S, $S, 7), xoaddr:$dst)>;
|
||||
(STXSIBXv (v16i8 (VSLDOI $S, $S, 7)), xoaddr:$dst)>;
|
||||
def : Pat<(truncstorei8 (i32 (vector_extract v16i8:$S, 2)), xoaddr:$dst),
|
||||
(STXSIBXv (VSLDOI $S, $S, 6), xoaddr:$dst)>;
|
||||
(STXSIBXv (v16i8 (VSLDOI $S, $S, 6)), xoaddr:$dst)>;
|
||||
def : Pat<(truncstorei8 (i32 (vector_extract v16i8:$S, 3)), xoaddr:$dst),
|
||||
(STXSIBXv (VSLDOI $S, $S, 5), xoaddr:$dst)>;
|
||||
(STXSIBXv (v16i8 (VSLDOI $S, $S, 5)), xoaddr:$dst)>;
|
||||
def : Pat<(truncstorei8 (i32 (vector_extract v16i8:$S, 4)), xoaddr:$dst),
|
||||
(STXSIBXv (VSLDOI $S, $S, 4), xoaddr:$dst)>;
|
||||
(STXSIBXv (v16i8 (VSLDOI $S, $S, 4)), xoaddr:$dst)>;
|
||||
def : Pat<(truncstorei8 (i32 (vector_extract v16i8:$S, 5)), xoaddr:$dst),
|
||||
(STXSIBXv (VSLDOI $S, $S, 3), xoaddr:$dst)>;
|
||||
(STXSIBXv (v16i8 (VSLDOI $S, $S, 3)), xoaddr:$dst)>;
|
||||
def : Pat<(truncstorei8 (i32 (vector_extract v16i8:$S, 6)), xoaddr:$dst),
|
||||
(STXSIBXv (VSLDOI $S, $S, 2), xoaddr:$dst)>;
|
||||
(STXSIBXv (v16i8 (VSLDOI $S, $S, 2)), xoaddr:$dst)>;
|
||||
def : Pat<(truncstorei8 (i32 (vector_extract v16i8:$S, 7)), xoaddr:$dst),
|
||||
(STXSIBXv (VSLDOI $S, $S, 1), xoaddr:$dst)>;
|
||||
(STXSIBXv (v16i8 (VSLDOI $S, $S, 1)), xoaddr:$dst)>;
|
||||
def : Pat<(truncstorei8 (i32 (vector_extract v16i8:$S, 8)), xoaddr:$dst),
|
||||
(STXSIBXv $S, xoaddr:$dst)>;
|
||||
def : Pat<(truncstorei8 (i32 (vector_extract v16i8:$S, 9)), xoaddr:$dst),
|
||||
(STXSIBXv (VSLDOI $S, $S, 15), xoaddr:$dst)>;
|
||||
(STXSIBXv (v16i8 (VSLDOI $S, $S, 15)), xoaddr:$dst)>;
|
||||
def : Pat<(truncstorei8 (i32 (vector_extract v16i8:$S, 10)), xoaddr:$dst),
|
||||
(STXSIBXv (VSLDOI $S, $S, 14), xoaddr:$dst)>;
|
||||
(STXSIBXv (v16i8 (VSLDOI $S, $S, 14)), xoaddr:$dst)>;
|
||||
def : Pat<(truncstorei8 (i32 (vector_extract v16i8:$S, 11)), xoaddr:$dst),
|
||||
(STXSIBXv (VSLDOI $S, $S, 13), xoaddr:$dst)>;
|
||||
(STXSIBXv (v16i8 (VSLDOI $S, $S, 13)), xoaddr:$dst)>;
|
||||
def : Pat<(truncstorei8 (i32 (vector_extract v16i8:$S, 12)), xoaddr:$dst),
|
||||
(STXSIBXv (VSLDOI $S, $S, 12), xoaddr:$dst)>;
|
||||
(STXSIBXv (v16i8 (VSLDOI $S, $S, 12)), xoaddr:$dst)>;
|
||||
def : Pat<(truncstorei8 (i32 (vector_extract v16i8:$S, 13)), xoaddr:$dst),
|
||||
(STXSIBXv (VSLDOI $S, $S, 11), xoaddr:$dst)>;
|
||||
(STXSIBXv (v16i8 (VSLDOI $S, $S, 11)), xoaddr:$dst)>;
|
||||
def : Pat<(truncstorei8 (i32 (vector_extract v16i8:$S, 14)), xoaddr:$dst),
|
||||
(STXSIBXv (VSLDOI $S, $S, 10), xoaddr:$dst)>;
|
||||
(STXSIBXv (v16i8 (VSLDOI $S, $S, 10)), xoaddr:$dst)>;
|
||||
def : Pat<(truncstorei8 (i32 (vector_extract v16i8:$S, 15)), xoaddr:$dst),
|
||||
(STXSIBXv (VSLDOI $S, $S, 9), xoaddr:$dst)>;
|
||||
(STXSIBXv (v16i8 (VSLDOI $S, $S, 9)), xoaddr:$dst)>;
|
||||
|
||||
// Scalar stores of i16
|
||||
def : Pat<(truncstorei16 (i32 (vector_extract v8i16:$S, 0)), xoaddr:$dst),
|
||||
(STXSIHXv (VSLDOI $S, $S, 8), xoaddr:$dst)>;
|
||||
(STXSIHXv (v16i8 (VSLDOI $S, $S, 8)), xoaddr:$dst)>;
|
||||
def : Pat<(truncstorei16 (i32 (vector_extract v8i16:$S, 1)), xoaddr:$dst),
|
||||
(STXSIHXv (VSLDOI $S, $S, 6), xoaddr:$dst)>;
|
||||
(STXSIHXv (v16i8 (VSLDOI $S, $S, 6)), xoaddr:$dst)>;
|
||||
def : Pat<(truncstorei16 (i32 (vector_extract v8i16:$S, 2)), xoaddr:$dst),
|
||||
(STXSIHXv (VSLDOI $S, $S, 4), xoaddr:$dst)>;
|
||||
(STXSIHXv (v16i8 (VSLDOI $S, $S, 4)), xoaddr:$dst)>;
|
||||
def : Pat<(truncstorei16 (i32 (vector_extract v8i16:$S, 3)), xoaddr:$dst),
|
||||
(STXSIHXv (VSLDOI $S, $S, 2), xoaddr:$dst)>;
|
||||
(STXSIHXv (v16i8 (VSLDOI $S, $S, 2)), xoaddr:$dst)>;
|
||||
def : Pat<(truncstorei16 (i32 (vector_extract v8i16:$S, 4)), xoaddr:$dst),
|
||||
(STXSIHXv $S, xoaddr:$dst)>;
|
||||
def : Pat<(truncstorei16 (i32 (vector_extract v8i16:$S, 5)), xoaddr:$dst),
|
||||
(STXSIHXv (VSLDOI $S, $S, 14), xoaddr:$dst)>;
|
||||
(STXSIHXv (v16i8 (VSLDOI $S, $S, 14)), xoaddr:$dst)>;
|
||||
def : Pat<(truncstorei16 (i32 (vector_extract v8i16:$S, 6)), xoaddr:$dst),
|
||||
(STXSIHXv (VSLDOI $S, $S, 12), xoaddr:$dst)>;
|
||||
(STXSIHXv (v16i8 (VSLDOI $S, $S, 12)), xoaddr:$dst)>;
|
||||
def : Pat<(truncstorei16 (i32 (vector_extract v8i16:$S, 7)), xoaddr:$dst),
|
||||
(STXSIHXv (VSLDOI $S, $S, 10), xoaddr:$dst)>;
|
||||
(STXSIHXv (v16i8 (VSLDOI $S, $S, 10)), xoaddr:$dst)>;
|
||||
} // IsLittleEndian, HasP9Vector
|
||||
|
||||
|
||||
|
@ -3043,7 +3061,7 @@ let AddedComplexity = 400, Predicates = [HasP9Vector] in {
|
|||
}
|
||||
def : Pat<(f64 (extloadf32 ixaddr:$src)),
|
||||
(COPY_TO_REGCLASS (DFLOADf32 ixaddr:$src), VSFRC)>;
|
||||
def : Pat<(f32 (fpround (extloadf32 ixaddr:$src))),
|
||||
def : Pat<(f32 (fpround (f64 (extloadf32 ixaddr:$src)))),
|
||||
(f32 (DFLOADf32 ixaddr:$src))>;
|
||||
} // end HasP9Vector, AddedComplexity
|
||||
|
||||
|
@ -3149,10 +3167,10 @@ def FltToULongLoadP9 {
|
|||
dag A = (i64 (PPCmfvsr (PPCfctiduz (f64 (extloadf32 ixaddr:$A)))));
|
||||
}
|
||||
def FltToLong {
|
||||
dag A = (i64 (PPCmfvsr (PPCfctidz (fpextend f32:$A))));
|
||||
dag A = (i64 (PPCmfvsr (f64 (PPCfctidz (fpextend f32:$A)))));
|
||||
}
|
||||
def FltToULong {
|
||||
dag A = (i64 (PPCmfvsr (PPCfctiduz (fpextend f32:$A))));
|
||||
dag A = (i64 (PPCmfvsr (f64 (PPCfctiduz (fpextend f32:$A)))));
|
||||
}
|
||||
def DblToInt {
|
||||
dag A = (i32 (PPCmfvsr (f64 (PPCfctiwz f64:$A))));
|
||||
|
@ -3368,8 +3386,10 @@ let AddedComplexity = 400 in {
|
|||
def : Pat<(v2i64 (build_vector i64:$rB, i64:$rA)),
|
||||
(v2i64 (MTVSRDD $rB, $rA))>;
|
||||
def : Pat<(v4i32 (build_vector i32:$A, i32:$B, i32:$C, i32:$D)),
|
||||
(VMRGOW (COPY_TO_REGCLASS (MTVSRDD AnyExts.A, AnyExts.C), VSRC),
|
||||
(COPY_TO_REGCLASS (MTVSRDD AnyExts.B, AnyExts.D), VSRC))>;
|
||||
(VMRGOW
|
||||
(v4i32 (COPY_TO_REGCLASS (MTVSRDD AnyExts.A, AnyExts.C), VSRC)),
|
||||
(v4i32
|
||||
(COPY_TO_REGCLASS (MTVSRDD AnyExts.B, AnyExts.D), VSRC)))>;
|
||||
}
|
||||
|
||||
let Predicates = [IsISA3_0, HasDirectMove, IsLittleEndian] in {
|
||||
|
@ -3379,8 +3399,10 @@ let AddedComplexity = 400 in {
|
|||
def : Pat<(v2i64 (build_vector i64:$rA, i64:$rB)),
|
||||
(v2i64 (MTVSRDD $rB, $rA))>;
|
||||
def : Pat<(v4i32 (build_vector i32:$A, i32:$B, i32:$C, i32:$D)),
|
||||
(VMRGOW (COPY_TO_REGCLASS (MTVSRDD AnyExts.D, AnyExts.B), VSRC),
|
||||
(COPY_TO_REGCLASS (MTVSRDD AnyExts.C, AnyExts.A), VSRC))>;
|
||||
(VMRGOW
|
||||
(v4i32 (COPY_TO_REGCLASS (MTVSRDD AnyExts.D, AnyExts.B), VSRC)),
|
||||
(v4i32
|
||||
(COPY_TO_REGCLASS (MTVSRDD AnyExts.C, AnyExts.A), VSRC)))>;
|
||||
}
|
||||
// P9 Altivec instructions that can be used to build vectors.
|
||||
// Adding them to PPCInstrVSX.td rather than PPCAltivecVSX.td to compete
|
||||
|
|
|
@ -287,7 +287,9 @@ def F8RC : RegisterClass<"PPC", [f64], 64, (add (sequence "F%u", 0, 13),
|
|||
(sequence "F%u", 31, 14))>;
|
||||
def F4RC : RegisterClass<"PPC", [f32], 32, (add F8RC)>;
|
||||
|
||||
def VRRC : RegisterClass<"PPC", [v16i8,v8i16,v4i32,v2i64,v1i128,v4f32,v2f64], 128,
|
||||
def VRRC : RegisterClass<"PPC",
|
||||
[v16i8,v8i16,v4i32,v2i64,v1i128,v4f32,v2f64, f128],
|
||||
128,
|
||||
(add V2, V3, V4, V5, V0, V1, V6, V7, V8, V9, V10, V11,
|
||||
V12, V13, V14, V15, V16, V17, V18, V19, V31, V30,
|
||||
V29, V28, V27, V26, V25, V24, V23, V22, V21, V20)>;
|
||||
|
|
Loading…
Reference in New Issue