forked from OSchip/llvm-project
[PowerPC] Fix 32bit vector insert instructions for ISA3.1
The platform independent ISD::INSERT_VECTOR_ELT take a element index, but vins* instructions take a byte index. Update 32bit td patterns for vector insert to handle the element index accordingly. Since vector insert for non constant index are supported in ISA3.1, there is no need to use platform specific ISD node, PPCISD::VECINSERT. Update td pattern to directly use ISD::INSERT_VECTOR_ELT instead. Reviewed By: nemanjai, #powerpc Differential Revision: https://reviews.llvm.org/D113802
This commit is contained in:
parent
1ca00ecfb8
commit
f50c6c1718
|
@ -1247,9 +1247,16 @@ PPCTargetLowering::PPCTargetLowering(const PPCTargetMachine &TM,
|
|||
}
|
||||
|
||||
if (Subtarget.hasP9Altivec()) {
|
||||
setOperationAction(ISD::INSERT_VECTOR_ELT, MVT::v8i16, Custom);
|
||||
setOperationAction(ISD::INSERT_VECTOR_ELT, MVT::v16i8, Custom);
|
||||
|
||||
if (Subtarget.isISA3_1()) {
|
||||
setOperationAction(ISD::INSERT_VECTOR_ELT, MVT::v2i64, Legal);
|
||||
setOperationAction(ISD::INSERT_VECTOR_ELT, MVT::v8i16, Legal);
|
||||
setOperationAction(ISD::INSERT_VECTOR_ELT, MVT::v16i8, Legal);
|
||||
setOperationAction(ISD::INSERT_VECTOR_ELT, MVT::v4i32, Legal);
|
||||
setOperationAction(ISD::INSERT_VECTOR_ELT, MVT::v4f32, Legal);
|
||||
} else {
|
||||
setOperationAction(ISD::INSERT_VECTOR_ELT, MVT::v8i16, Custom);
|
||||
setOperationAction(ISD::INSERT_VECTOR_ELT, MVT::v16i8, Custom);
|
||||
}
|
||||
setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::v4i8, Legal);
|
||||
setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::v4i16, Legal);
|
||||
setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::v4i32, Legal);
|
||||
|
@ -1258,9 +1265,6 @@ PPCTargetLowering::PPCTargetLowering(const PPCTargetMachine &TM,
|
|||
setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::v2i32, Legal);
|
||||
setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::v2i64, Legal);
|
||||
}
|
||||
|
||||
if (Subtarget.isISA3_1())
|
||||
setOperationAction(ISD::INSERT_VECTOR_ELT, MVT::v2i64, Custom);
|
||||
}
|
||||
|
||||
if (Subtarget.pairedVectorMemops()) {
|
||||
|
@ -10752,7 +10756,6 @@ SDValue PPCTargetLowering::LowerINSERT_VECTOR_ELT(SDValue Op,
|
|||
SDLoc dl(Op);
|
||||
SDValue V1 = Op.getOperand(0);
|
||||
SDValue V2 = Op.getOperand(1);
|
||||
SDValue V3 = Op.getOperand(2);
|
||||
|
||||
if (VT == MVT::v2f64 && C)
|
||||
return Op;
|
||||
|
@ -10761,18 +10764,10 @@ SDValue PPCTargetLowering::LowerINSERT_VECTOR_ELT(SDValue Op,
|
|||
if ((VT == MVT::v2i64 || VT == MVT::v2f64) && !Subtarget.isPPC64())
|
||||
return SDValue();
|
||||
// On P10, we have legal lowering for constant and variable indices for
|
||||
// integer vectors.
|
||||
// all vectors.
|
||||
if (VT == MVT::v16i8 || VT == MVT::v8i16 || VT == MVT::v4i32 ||
|
||||
VT == MVT::v2i64)
|
||||
return DAG.getNode(PPCISD::VECINSERT, dl, VT, V1, V2, V3);
|
||||
// For f32 and f64 vectors, we have legal lowering for variable indices.
|
||||
// For f32 we also have legal lowering when the element is loaded from
|
||||
// memory.
|
||||
if (VT == MVT::v4f32 || VT == MVT::v2f64) {
|
||||
if (!C || (VT == MVT::v4f32 && isa<LoadSDNode>(V2)))
|
||||
return DAG.getNode(PPCISD::VECINSERT, dl, VT, V1, V2, V3);
|
||||
VT == MVT::v2i64 || VT == MVT::v4f32 || VT == MVT::v2f64)
|
||||
return Op;
|
||||
}
|
||||
}
|
||||
|
||||
// Before P10, we have legal lowering for constant indices but not for
|
||||
|
|
|
@ -29,9 +29,6 @@ def SDT_PPCPairExtractVsx : SDTypeProfile<1, 2, [
|
|||
def SDT_PPCxxmfacc : SDTypeProfile<1, 1, [
|
||||
SDTCisVT<0, v512i1>, SDTCisVT<1, v512i1>
|
||||
]>;
|
||||
def SDT_PPCVecInsertElt : SDTypeProfile<1, 3, [
|
||||
SDTCisVec<0>, SDTCisVec<1>, SDTCisInt<3>
|
||||
]>;
|
||||
|
||||
//===----------------------------------------------------------------------===//
|
||||
// ISA 3.1 specific PPCISD nodes.
|
||||
|
@ -45,7 +42,6 @@ def PPCAccExtractVsx : SDNode<"PPCISD::EXTRACT_VSX_REG", SDT_PPCAccExtractVsx,
|
|||
def PPCPairExtractVsx : SDNode<"PPCISD::EXTRACT_VSX_REG", SDT_PPCPairExtractVsx,
|
||||
[]>;
|
||||
def PPCxxmfacc : SDNode<"PPCISD::XXMFACC", SDT_PPCxxmfacc, []>;
|
||||
def PPCvecinsertelt : SDNode<"PPCISD::VECINSERT", SDT_PPCVecInsertElt, []>;
|
||||
|
||||
//===----------------------------------------------------------------------===//
|
||||
|
||||
|
@ -2797,135 +2793,128 @@ let Predicates = [PrefixInstrs] in {
|
|||
}
|
||||
|
||||
def InsertEltShift {
|
||||
dag Sub32Left0 = (EXTRACT_SUBREG $rB, sub_32);
|
||||
dag Sub32 = (i32 (EXTRACT_SUBREG $rB, sub_32));
|
||||
dag Sub32Left1 = (RLWINM (EXTRACT_SUBREG $rB, sub_32), 1, 0, 30);
|
||||
dag Sub32Left2 = (RLWINM (EXTRACT_SUBREG $rB, sub_32), 2, 0, 29);
|
||||
dag Left1 = (RLWINM $rB, 1, 0, 30);
|
||||
dag Left2 = (RLWINM $rB, 2, 0, 29);
|
||||
dag Left3 = (RLWINM8 $rB, 3, 0, 28);
|
||||
}
|
||||
|
||||
let Predicates = [IsISA3_1, HasVSX, IsLittleEndian] in {
|
||||
// Indexed vector insert element
|
||||
def : Pat<(v16i8 (PPCvecinsertelt v16i8:$vDi, i32:$rA, i64:$rB)),
|
||||
(VINSBRX $vDi, InsertEltShift.Sub32Left0, $rA)>;
|
||||
def : Pat<(v8i16 (PPCvecinsertelt v8i16:$vDi, i32:$rA, i64:$rB)),
|
||||
def : Pat<(v16i8 (vector_insert v16i8:$vDi, i32:$rA, i64:$rB)),
|
||||
(VINSBRX $vDi, InsertEltShift.Sub32, $rA)>;
|
||||
def : Pat<(v8i16 (vector_insert v8i16:$vDi, i32:$rA, i64:$rB)),
|
||||
(VINSHRX $vDi, InsertEltShift.Sub32Left1, $rA)>;
|
||||
def : Pat<(v4i32 (PPCvecinsertelt v4i32:$vDi, i32:$rA, i64:$rB)),
|
||||
def : Pat<(v4i32 (insertelt v4i32:$vDi, i32:$rA, i64:$rB)),
|
||||
(VINSWRX $vDi, InsertEltShift.Sub32Left2, $rA)>;
|
||||
def : Pat<(v2i64 (PPCvecinsertelt v2i64:$vDi, i64:$rA, i64:$rB)),
|
||||
def : Pat<(v2i64 (insertelt v2i64:$vDi, i64:$rA, i64:$rB)),
|
||||
(VINSDRX $vDi, InsertEltShift.Left3, $rA)>;
|
||||
|
||||
def : Pat<(v4f32 (PPCvecinsertelt v4f32:$vDi, f32:$A, i64:$rB)),
|
||||
(VINSWRX $vDi, InsertEltShift.Sub32Left2, Bitcast.FltToInt)>;
|
||||
def : Pat<(v4f32 (PPCvecinsertelt v4f32:$vDi, (f32 (load iaddr:$rA)), i64:$rB)),
|
||||
def : Pat<(v4f32 (insertelt v4f32:$vDi, f32:$rA, i64:$rB)),
|
||||
(VINSWVRX $vDi, InsertEltShift.Sub32Left2, (XSCVDPSPN $rA))>;
|
||||
def : Pat<(v4f32 (insertelt v4f32:$vDi, (f32 (load iaddr:$rA)), i64:$rB)),
|
||||
(VINSWRX $vDi, InsertEltShift.Sub32Left2, (LWZ memri:$rA))>;
|
||||
def : Pat<(v4f32 (PPCvecinsertelt v4f32:$vDi, (f32 (load iaddrX34:$rA)), i64:$rB)),
|
||||
def : Pat<(v4f32 (insertelt v4f32:$vDi, (f32 (load iaddrX34:$rA)), i64:$rB)),
|
||||
(VINSWRX $vDi, InsertEltShift.Sub32Left2, (PLWZ memri34:$rA))>;
|
||||
def : Pat<(v4f32 (PPCvecinsertelt v4f32:$vDi, (f32 (load xaddr:$rA)), i64:$rB)),
|
||||
def : Pat<(v4f32 (insertelt v4f32:$vDi, (f32 (load xaddr:$rA)), i64:$rB)),
|
||||
(VINSWRX $vDi, InsertEltShift.Sub32Left2, (LWZX memrr:$rA))>;
|
||||
|
||||
def : Pat<(v2f64 (PPCvecinsertelt v2f64:$vDi, f64:$A, i64:$rB)),
|
||||
def : Pat<(v2f64 (insertelt v2f64:$vDi, f64:$A, i64:$rB)),
|
||||
(VINSDRX $vDi, InsertEltShift.Left3, Bitcast.DblToLong)>;
|
||||
def : Pat<(v2f64 (PPCvecinsertelt v2f64:$vDi, (f64 (load iaddrX4:$rA)), i64:$rB)),
|
||||
def : Pat<(v2f64 (insertelt v2f64:$vDi, (f64 (load iaddrX4:$rA)), i64:$rB)),
|
||||
(VINSDRX $vDi, InsertEltShift.Left3, (LD memrix:$rA))>;
|
||||
def : Pat<(v2f64 (PPCvecinsertelt v2f64:$vDi, (f64 (load iaddrX34:$rA)), i64:$rB)),
|
||||
def : Pat<(v2f64 (insertelt v2f64:$vDi, (f64 (load iaddrX34:$rA)), i64:$rB)),
|
||||
(VINSDRX $vDi, InsertEltShift.Left3, (PLD memri34:$rA))>;
|
||||
def : Pat<(v2f64 (PPCvecinsertelt v2f64:$vDi, (f64 (load xaddrX4:$rA)), i64:$rB)),
|
||||
def : Pat<(v2f64 (insertelt v2f64:$vDi, (f64 (load xaddrX4:$rA)), i64:$rB)),
|
||||
(VINSDRX $vDi, InsertEltShift.Left3, (LDX memrr:$rA))>;
|
||||
|
||||
// Immediate vector insert element
|
||||
foreach i = [0, 1, 2, 3] in {
|
||||
def : Pat<(v4i32 (PPCvecinsertelt v4i32:$vDi, i32:$rA, (i64 i))),
|
||||
(VINSW $vDi, !mul(!sub(3, i), 4), $rA)>;
|
||||
def : Pat<(v4f32 (PPCvecinsertelt v4f32:$vDi, (f32 (load iaddr:$rA)), (i64 i))),
|
||||
(VINSW $vDi, !mul(!sub(3, i), 4), (LWZ memri:$rA))>;
|
||||
def : Pat<(v4f32 (PPCvecinsertelt v4f32:$vDi, (f32 (load iaddrX34:$rA)), (i64 i))),
|
||||
(VINSW $vDi, !mul(!sub(3, i), 4), (PLWZ memri34:$rA))>;
|
||||
def : Pat<(v4f32 (PPCvecinsertelt v4f32:$vDi, (f32 (load xaddr:$rA)), (i64 i))),
|
||||
(VINSW $vDi, !mul(!sub(3, i), 4), (LWZX memrr:$rA))>;
|
||||
let AddedComplexity = 400 in {
|
||||
// Immediate vector insert element
|
||||
foreach Idx = [0, 1, 2, 3] in {
|
||||
def : Pat<(v4i32 (insertelt v4i32:$vDi, i32:$rA, Idx)),
|
||||
(VINSW $vDi, !mul(!sub(3, Idx), 4), $rA)>;
|
||||
def : Pat<(v4f32 (insertelt v4f32:$vDi, (f32 (load iaddr:$rA)), Idx)),
|
||||
(VINSW $vDi, !mul(!sub(3, Idx), 4), (LWZ memri:$rA))>;
|
||||
def : Pat<(v4f32 (insertelt v4f32:$vDi, (f32 (load iaddrX34:$rA)), Idx)),
|
||||
(VINSW $vDi, !mul(!sub(3, Idx), 4), (PLWZ memri34:$rA))>;
|
||||
def : Pat<(v4f32 (insertelt v4f32:$vDi, (f32 (load xaddr:$rA)), Idx)),
|
||||
(VINSW $vDi, !mul(!sub(3, Idx), 4), (LWZX memrr:$rA))>;
|
||||
}
|
||||
foreach i = [0, 1] in
|
||||
def : Pat<(v2i64 (insertelt v2i64:$vDi, i64:$rA, (i64 i))),
|
||||
(VINSD $vDi, !mul(!sub(1, i), 8), $rA)>;
|
||||
}
|
||||
foreach i = [0, 1] in
|
||||
def : Pat<(v2i64 (PPCvecinsertelt v2i64:$vDi, i64:$rA, (i64 i))),
|
||||
(VINSD $vDi, !mul(!sub(1, i), 8), $rA)>;
|
||||
}
|
||||
|
||||
let Predicates = [IsISA3_1, HasVSX, IsBigEndian, IsPPC32] in {
|
||||
// Indexed vector insert element
|
||||
def : Pat<(v16i8 (PPCvecinsertelt v16i8:$vDi, i32:$rA, i32:$rB)),
|
||||
def : Pat<(v16i8 (vector_insert v16i8:$vDi, i32:$rA, i32:$rB)),
|
||||
(VINSBLX $vDi, $rB, $rA)>;
|
||||
def : Pat<(v8i16 (PPCvecinsertelt v8i16:$vDi, i32:$rA, i32:$rB)),
|
||||
(VINSHLX $vDi, $rB, $rA)>;
|
||||
def : Pat<(v4i32 (PPCvecinsertelt v4i32:$vDi, i32:$rA, i32:$rB)),
|
||||
(VINSWLX $vDi, $rB, $rA)>;
|
||||
def : Pat<(v8i16 (vector_insert v8i16:$vDi, i32:$rA, i32:$rB)),
|
||||
(VINSHLX $vDi, InsertEltShift.Left1, $rA)>;
|
||||
def : Pat<(v4i32 (insertelt v4i32:$vDi, i32:$rA, i32:$rB)),
|
||||
(VINSWLX $vDi, InsertEltShift.Left2, $rA)>;
|
||||
|
||||
def : Pat<(v4f32 (PPCvecinsertelt v4f32:$vDi, f32:$A, i32:$rB)),
|
||||
(VINSWLX $vDi, $rB, Bitcast.FltToInt)>;
|
||||
def : Pat<(v4f32 (PPCvecinsertelt v4f32:$vDi, (f32 (load iaddr:$rA)),
|
||||
i32:$rB)),
|
||||
(VINSWLX $vDi, $rB, (LWZ memri:$rA))>;
|
||||
def : Pat<(v4f32 (PPCvecinsertelt v4f32:$vDi, (f32 (load iaddrX34:$rA)),
|
||||
i32:$rB)),
|
||||
(VINSWLX $vDi, $rB, (PLWZ memri34:$rA))>;
|
||||
def : Pat<(v4f32 (PPCvecinsertelt v4f32:$vDi, (f32 (load xaddr:$rA)),
|
||||
i32:$rB)),
|
||||
(VINSWLX $vDi, $rB, (LWZX memrr:$rA))>;
|
||||
|
||||
// Immediate vector insert element
|
||||
foreach i = [0, 1, 2, 3] in {
|
||||
def : Pat<(v4i32 (PPCvecinsertelt v4i32:$vDi, i32:$rA, (i32 i))),
|
||||
(VINSW $vDi, !mul(i, 4), $rA)>;
|
||||
def : Pat<(v4f32 (PPCvecinsertelt v4f32:$vDi, (f32 (load iaddr:$rA)),
|
||||
(i32 i))),
|
||||
(VINSW $vDi, !mul(i, 4), (LWZ memri:$rA))>;
|
||||
def : Pat<(v4f32 (PPCvecinsertelt v4f32:$vDi, (f32 (load iaddrX34:$rA)),
|
||||
(i32 i))),
|
||||
(VINSW $vDi, !mul(i, 4), (PLWZ memri34:$rA))>;
|
||||
def : Pat<(v4f32 (PPCvecinsertelt v4f32:$vDi, (f32 (load xaddr:$rA)),
|
||||
(i32 i))),
|
||||
(VINSW $vDi, !mul(i, 4), (LWZX memrr:$rA))>;
|
||||
}
|
||||
def : Pat<(v4f32 (insertelt v4f32:$vDi, f32:$rA, i32:$rB)),
|
||||
(VINSWVLX $vDi, InsertEltShift.Left2, (XSCVDPSPN $rA))>;
|
||||
def : Pat<(v4f32 (insertelt v4f32:$vDi, (f32 (load iaddr:$rA)), i32:$rB)),
|
||||
(VINSWLX v4f32:$vDi, InsertEltShift.Left2, (LWZ memri:$rA))>;
|
||||
def : Pat<(v4f32 (insertelt v4f32:$vDi, (f32 (load iaddrX34:$rA)), i32:$rB)),
|
||||
(VINSWLX v4f32:$vDi, InsertEltShift.Left2, (PLWZ memri34:$rA))>;
|
||||
def: Pat<(v4f32(insertelt v4f32 : $vDi, (f32(load xaddr : $rA)), i32 : $rB)),
|
||||
(VINSWLX v4f32 : $vDi, InsertEltShift.Left2, (LWZX memrr : $rA))>;
|
||||
}
|
||||
|
||||
let Predicates = [IsISA3_1, HasVSX, IsBigEndian, IsPPC64] in {
|
||||
// Indexed vector insert element
|
||||
def : Pat<(v16i8 (PPCvecinsertelt v16i8:$vDi, i32:$rA, i64:$rB)),
|
||||
(VINSBLX $vDi, InsertEltShift.Sub32Left0, $rA)>;
|
||||
def : Pat<(v8i16 (PPCvecinsertelt v8i16:$vDi, i32:$rA, i64:$rB)),
|
||||
def : Pat<(v16i8 (vector_insert v16i8:$vDi, i32:$rA, i64:$rB)),
|
||||
(VINSBLX $vDi, InsertEltShift.Sub32, $rA)>;
|
||||
def : Pat<(v8i16 (vector_insert v8i16:$vDi, i32:$rA, i64:$rB)),
|
||||
(VINSHLX $vDi, InsertEltShift.Sub32Left1, $rA)>;
|
||||
def : Pat<(v4i32 (PPCvecinsertelt v4i32:$vDi, i32:$rA, i64:$rB)),
|
||||
def : Pat<(v4i32 (insertelt v4i32:$vDi, i32:$rA, i64:$rB)),
|
||||
(VINSWLX $vDi, InsertEltShift.Sub32Left2, $rA)>;
|
||||
def : Pat<(v2i64 (PPCvecinsertelt v2i64:$vDi, i64:$rA, i64:$rB)),
|
||||
def : Pat<(v2i64 (insertelt v2i64:$vDi, i64:$rA, i64:$rB)),
|
||||
(VINSDLX $vDi, InsertEltShift.Left3, $rA)>;
|
||||
|
||||
def : Pat<(v4f32 (PPCvecinsertelt v4f32:$vDi, f32:$A, i64:$rB)),
|
||||
(VINSWLX $vDi, InsertEltShift.Sub32Left2, Bitcast.FltToInt)>;
|
||||
def : Pat<(v4f32 (PPCvecinsertelt v4f32:$vDi, (f32 (load iaddr:$rA)), i64:$rB)),
|
||||
def : Pat<(v4f32 (insertelt v4f32:$vDi, f32:$rA, i64:$rB)),
|
||||
(VINSWVLX $vDi, InsertEltShift.Sub32Left2, (XSCVDPSPN $rA))>;
|
||||
def : Pat<(v4f32 (insertelt v4f32:$vDi, (f32 (load iaddr:$rA)), i64:$rB)),
|
||||
(VINSWLX $vDi, InsertEltShift.Sub32Left2, (LWZ memri:$rA))>;
|
||||
def : Pat<(v4f32 (PPCvecinsertelt v4f32:$vDi, (f32 (load iaddrX34:$rA)), i64:$rB)),
|
||||
def : Pat<(v4f32 (insertelt v4f32:$vDi, (f32 (load iaddrX34:$rA)), i64:$rB)),
|
||||
(VINSWLX $vDi, InsertEltShift.Sub32Left2, (PLWZ memri34:$rA))>;
|
||||
def : Pat<(v4f32 (PPCvecinsertelt v4f32:$vDi, (f32 (load xaddr:$rA)), i64:$rB)),
|
||||
def : Pat<(v4f32 (insertelt v4f32:$vDi, (f32 (load xaddr:$rA)), i64:$rB)),
|
||||
(VINSWLX $vDi, InsertEltShift.Sub32Left2, (LWZX memrr:$rA))>;
|
||||
|
||||
def : Pat<(v2f64 (PPCvecinsertelt v2f64:$vDi, f64:$A, i64:$rB)),
|
||||
def : Pat<(v2f64 (insertelt v2f64:$vDi, f64:$A, i64:$rB)),
|
||||
(VINSDLX $vDi, InsertEltShift.Left3, Bitcast.DblToLong)>;
|
||||
def : Pat<(v2f64 (PPCvecinsertelt v2f64:$vDi, (f64 (load iaddrX4:$rA)), i64:$rB)),
|
||||
def : Pat<(v2f64 (insertelt v2f64:$vDi, (f64 (load iaddrX4:$rA)), i64:$rB)),
|
||||
(VINSDLX $vDi, InsertEltShift.Left3, (LD memrix:$rA))>;
|
||||
def : Pat<(v2f64 (PPCvecinsertelt v2f64:$vDi, (f64 (load iaddrX34:$rA)), i64:$rB)),
|
||||
def : Pat<(v2f64 (insertelt v2f64:$vDi, (f64 (load iaddrX34:$rA)), i64:$rB)),
|
||||
(VINSDLX $vDi, InsertEltShift.Left3, (PLD memri34:$rA))>;
|
||||
def : Pat<(v2f64 (PPCvecinsertelt v2f64:$vDi, (f64 (load xaddrX4:$rA)), i64:$rB)),
|
||||
def : Pat<(v2f64 (insertelt v2f64:$vDi, (f64 (load xaddrX4:$rA)), i64:$rB)),
|
||||
(VINSDLX $vDi, InsertEltShift.Left3, (LDX memrr:$rA))>;
|
||||
|
||||
// Immediate vector insert element
|
||||
foreach i = [0, 1, 2, 3] in {
|
||||
def : Pat<(v4i32 (PPCvecinsertelt v4i32:$vDi, i32:$rA, (i64 i))),
|
||||
(VINSW $vDi, !mul(i, 4), $rA)>;
|
||||
def : Pat<(v4f32 (PPCvecinsertelt v4f32:$vDi, (f32 (load iaddr:$rA)), (i64 i))),
|
||||
(VINSW $vDi, !mul(i, 4), (LWZ memri:$rA))>;
|
||||
def : Pat<(v4f32 (PPCvecinsertelt v4f32:$vDi, (f32 (load iaddrX34:$rA)), (i64 i))),
|
||||
(VINSW $vDi, !mul(i, 4), (PLWZ memri34:$rA))>;
|
||||
def : Pat<(v4f32 (PPCvecinsertelt v4f32:$vDi, (f32 (load xaddr:$rA)), (i64 i))),
|
||||
(VINSW $vDi, !mul(i, 4), (LWZX memrr:$rA))>;
|
||||
}
|
||||
foreach i = [0, 1] in
|
||||
def : Pat<(v2i64 (PPCvecinsertelt v2i64:$vDi, i64:$rA, (i64 i))),
|
||||
(VINSD $vDi, !mul(i, 8), $rA)>;
|
||||
}
|
||||
|
||||
let AddedComplexity = 400, Predicates = [IsISA3_1, HasVSX, IsBigEndian] in {
|
||||
// Immediate vector insert element
|
||||
foreach Ty = [i32, i64] in {
|
||||
foreach Idx = [0, 1, 2, 3] in {
|
||||
def : Pat<(v4i32 (insertelt v4i32:$vDi, i32:$rA, (Ty Idx))),
|
||||
(VINSW $vDi, !mul(Idx, 4), $rA)>;
|
||||
def : Pat<(v4f32 (insertelt v4f32:$vDi, (f32 (load iaddr:$rA)),
|
||||
(Ty Idx))),
|
||||
(VINSW $vDi, !mul(Idx, 4), (LWZ memri:$rA))>;
|
||||
def : Pat<(v4f32 (insertelt v4f32:$vDi, (f32 (load iaddrX34:$rA)),
|
||||
(Ty Idx))),
|
||||
(VINSW $vDi, !mul(Idx, 4), (PLWZ memri34:$rA))>;
|
||||
def : Pat<(v4f32 (insertelt v4f32:$vDi, (f32 (load xaddr:$rA)),
|
||||
(Ty Idx))),
|
||||
(VINSW $vDi, !mul(Idx, 4), (LWZX memrr:$rA))>;
|
||||
}
|
||||
}
|
||||
|
||||
foreach Idx = [0, 1] in
|
||||
def : Pat<(v2i64 (insertelt v2i64:$vDi, i64:$rA, Idx)),
|
||||
(VINSD $vDi, !mul(Idx, 8), $rA)>;
|
||||
}
|
||||
|
|
|
@ -69,7 +69,8 @@ define <8 x i16> @testHalf(<8 x i16> %a, i64 %b, i64 %idx) {
|
|||
;
|
||||
; CHECK-32-P10-LABEL: testHalf:
|
||||
; CHECK-32-P10: # %bb.0: # %entry
|
||||
; CHECK-32-P10-NEXT: vinshlx 2, 6, 4
|
||||
; CHECK-32-P10-NEXT: slwi 3, 6, 1
|
||||
; CHECK-32-P10-NEXT: vinshlx 2, 3, 4
|
||||
; CHECK-32-P10-NEXT: blr
|
||||
entry:
|
||||
%conv = trunc i64 %b to i16
|
||||
|
@ -106,7 +107,8 @@ define <4 x i32> @testWord(<4 x i32> %a, i64 %b, i64 %idx) {
|
|||
;
|
||||
; CHECK-32-P10-LABEL: testWord:
|
||||
; CHECK-32-P10: # %bb.0: # %entry
|
||||
; CHECK-32-P10-NEXT: vinswlx 2, 6, 4
|
||||
; CHECK-32-P10-NEXT: slwi 3, 6, 2
|
||||
; CHECK-32-P10-NEXT: vinswlx 2, 3, 4
|
||||
; CHECK-32-P10-NEXT: blr
|
||||
entry:
|
||||
%conv = trunc i64 %b to i32
|
||||
|
@ -186,8 +188,10 @@ define <2 x i64> @testDoubleword(<2 x i64> %a, i64 %b, i64 %idx) {
|
|||
; CHECK-32-P10-LABEL: testDoubleword:
|
||||
; CHECK-32-P10: # %bb.0: # %entry
|
||||
; CHECK-32-P10-NEXT: add 5, 6, 6
|
||||
; CHECK-32-P10-NEXT: vinswlx 2, 5, 3
|
||||
; CHECK-32-P10-NEXT: slwi 6, 5, 2
|
||||
; CHECK-32-P10-NEXT: vinswlx 2, 6, 3
|
||||
; CHECK-32-P10-NEXT: addi 3, 5, 1
|
||||
; CHECK-32-P10-NEXT: slwi 3, 3, 2
|
||||
; CHECK-32-P10-NEXT: vinswlx 2, 3, 4
|
||||
; CHECK-32-P10-NEXT: blr
|
||||
entry:
|
||||
|
@ -280,18 +284,17 @@ define <4 x float> @testFloat1(<4 x float> %a, float %b, i32 zeroext %idx1) {
|
|||
;
|
||||
; CHECK-64-P10-LABEL: testFloat1:
|
||||
; CHECK-64-P10: # %bb.0: # %entry
|
||||
; CHECK-64-P10-NEXT: xscvdpspn 0, 1
|
||||
; CHECK-64-P10-NEXT: extsw 4, 4
|
||||
; CHECK-64-P10-NEXT: slwi 4, 4, 2
|
||||
; CHECK-64-P10-NEXT: mffprwz 3, 0
|
||||
; CHECK-64-P10-NEXT: vinswlx 2, 4, 3
|
||||
; CHECK-64-P10-NEXT: xscvdpspn 35, 1
|
||||
; CHECK-64-P10-NEXT: extsw 3, 4
|
||||
; CHECK-64-P10-NEXT: slwi 3, 3, 2
|
||||
; CHECK-64-P10-NEXT: vinswvlx 2, 3, 3
|
||||
; CHECK-64-P10-NEXT: blr
|
||||
;
|
||||
; CHECK-32-P10-LABEL: testFloat1:
|
||||
; CHECK-32-P10: # %bb.0: # %entry
|
||||
; CHECK-32-P10-NEXT: xscvdpspn 0, 1
|
||||
; CHECK-32-P10-NEXT: mffprwz 3, 0
|
||||
; CHECK-32-P10-NEXT: vinswlx 2, 4, 3
|
||||
; CHECK-32-P10-NEXT: xscvdpspn 35, 1
|
||||
; CHECK-32-P10-NEXT: slwi 3, 4, 2
|
||||
; CHECK-32-P10-NEXT: vinswvlx 2, 3, 3
|
||||
; CHECK-32-P10-NEXT: blr
|
||||
entry:
|
||||
%vecins = insertelement <4 x float> %a, float %b, i32 %idx1
|
||||
|
@ -347,8 +350,10 @@ define <4 x float> @testFloat2(<4 x float> %a, i8* %b, i32 zeroext %idx1, i32 ze
|
|||
; CHECK-32-P10: # %bb.0: # %entry
|
||||
; CHECK-32-P10-NEXT: lwz 6, 0(3)
|
||||
; CHECK-32-P10-NEXT: lwz 3, 1(3)
|
||||
; CHECK-32-P10-NEXT: slwi 4, 4, 2
|
||||
; CHECK-32-P10-NEXT: vinswlx 2, 4, 6
|
||||
; CHECK-32-P10-NEXT: vinswlx 2, 5, 3
|
||||
; CHECK-32-P10-NEXT: slwi 4, 5, 2
|
||||
; CHECK-32-P10-NEXT: vinswlx 2, 4, 3
|
||||
; CHECK-32-P10-NEXT: blr
|
||||
entry:
|
||||
%0 = bitcast i8* %b to float*
|
||||
|
@ -415,10 +420,12 @@ define <4 x float> @testFloat3(<4 x float> %a, i8* %b, i32 zeroext %idx1, i32 ze
|
|||
; CHECK-32-P10-LABEL: testFloat3:
|
||||
; CHECK-32-P10: # %bb.0: # %entry
|
||||
; CHECK-32-P10-NEXT: lis 6, 1
|
||||
; CHECK-32-P10-NEXT: slwi 4, 4, 2
|
||||
; CHECK-32-P10-NEXT: lwzx 6, 3, 6
|
||||
; CHECK-32-P10-NEXT: lwz 3, 0(3)
|
||||
; CHECK-32-P10-NEXT: vinswlx 2, 4, 6
|
||||
; CHECK-32-P10-NEXT: vinswlx 2, 5, 3
|
||||
; CHECK-32-P10-NEXT: slwi 4, 5, 2
|
||||
; CHECK-32-P10-NEXT: vinswlx 2, 4, 3
|
||||
; CHECK-32-P10-NEXT: blr
|
||||
entry:
|
||||
%add.ptr = getelementptr inbounds i8, i8* %b, i64 65536
|
||||
|
|
|
@ -199,20 +199,18 @@ entry:
|
|||
define <4 x float> @testFloat1(<4 x float> %a, float %b, i32 zeroext %idx1) {
|
||||
; CHECK-LABEL: testFloat1:
|
||||
; CHECK: # %bb.0: # %entry
|
||||
; CHECK-NEXT: xscvdpspn vs0, f1
|
||||
; CHECK-NEXT: extsw r4, r6
|
||||
; CHECK-NEXT: slwi r4, r4, 2
|
||||
; CHECK-NEXT: mffprwz r3, f0
|
||||
; CHECK-NEXT: vinswrx v2, r4, r3
|
||||
; CHECK-NEXT: xscvdpspn v3, f1
|
||||
; CHECK-NEXT: extsw r3, r6
|
||||
; CHECK-NEXT: slwi r3, r3, 2
|
||||
; CHECK-NEXT: vinswvrx v2, r3, v3
|
||||
; CHECK-NEXT: blr
|
||||
;
|
||||
; CHECK-BE-LABEL: testFloat1:
|
||||
; CHECK-BE: # %bb.0: # %entry
|
||||
; CHECK-BE-NEXT: xscvdpspn vs0, f1
|
||||
; CHECK-BE-NEXT: extsw r4, r6
|
||||
; CHECK-BE-NEXT: slwi r4, r4, 2
|
||||
; CHECK-BE-NEXT: mffprwz r3, f0
|
||||
; CHECK-BE-NEXT: vinswlx v2, r4, r3
|
||||
; CHECK-BE-NEXT: xscvdpspn v3, f1
|
||||
; CHECK-BE-NEXT: extsw r3, r6
|
||||
; CHECK-BE-NEXT: slwi r3, r3, 2
|
||||
; CHECK-BE-NEXT: vinswvlx v2, r3, v3
|
||||
; CHECK-BE-NEXT: blr
|
||||
;
|
||||
; CHECK-P9-LABEL: testFloat1:
|
||||
|
|
Loading…
Reference in New Issue