[PowerPC] Fix 32bit vector insert instructions for ISA3.1

The platform independent ISD::INSERT_VECTOR_ELT take a element index,
but vins* instructions take a byte index. Update 32bit td patterns for
vector insert to handle the element index accordingly.

Since vector insert for non constant index are supported in
ISA3.1, there is no need to use platform specific ISD node,
PPCISD::VECINSERT.  Update td pattern to directly use
ISD::INSERT_VECTOR_ELT instead.

Reviewed By: nemanjai, #powerpc

Differential Revision: https://reviews.llvm.org/D113802
This commit is contained in:
Lei Huang 2021-11-12 15:05:52 -06:00
parent 1ca00ecfb8
commit f50c6c1718
4 changed files with 122 additions and 133 deletions

View File

@ -1247,9 +1247,16 @@ PPCTargetLowering::PPCTargetLowering(const PPCTargetMachine &TM,
}
if (Subtarget.hasP9Altivec()) {
setOperationAction(ISD::INSERT_VECTOR_ELT, MVT::v8i16, Custom);
setOperationAction(ISD::INSERT_VECTOR_ELT, MVT::v16i8, Custom);
if (Subtarget.isISA3_1()) {
setOperationAction(ISD::INSERT_VECTOR_ELT, MVT::v2i64, Legal);
setOperationAction(ISD::INSERT_VECTOR_ELT, MVT::v8i16, Legal);
setOperationAction(ISD::INSERT_VECTOR_ELT, MVT::v16i8, Legal);
setOperationAction(ISD::INSERT_VECTOR_ELT, MVT::v4i32, Legal);
setOperationAction(ISD::INSERT_VECTOR_ELT, MVT::v4f32, Legal);
} else {
setOperationAction(ISD::INSERT_VECTOR_ELT, MVT::v8i16, Custom);
setOperationAction(ISD::INSERT_VECTOR_ELT, MVT::v16i8, Custom);
}
setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::v4i8, Legal);
setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::v4i16, Legal);
setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::v4i32, Legal);
@ -1258,9 +1265,6 @@ PPCTargetLowering::PPCTargetLowering(const PPCTargetMachine &TM,
setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::v2i32, Legal);
setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::v2i64, Legal);
}
if (Subtarget.isISA3_1())
setOperationAction(ISD::INSERT_VECTOR_ELT, MVT::v2i64, Custom);
}
if (Subtarget.pairedVectorMemops()) {
@ -10752,7 +10756,6 @@ SDValue PPCTargetLowering::LowerINSERT_VECTOR_ELT(SDValue Op,
SDLoc dl(Op);
SDValue V1 = Op.getOperand(0);
SDValue V2 = Op.getOperand(1);
SDValue V3 = Op.getOperand(2);
if (VT == MVT::v2f64 && C)
return Op;
@ -10761,18 +10764,10 @@ SDValue PPCTargetLowering::LowerINSERT_VECTOR_ELT(SDValue Op,
if ((VT == MVT::v2i64 || VT == MVT::v2f64) && !Subtarget.isPPC64())
return SDValue();
// On P10, we have legal lowering for constant and variable indices for
// integer vectors.
// all vectors.
if (VT == MVT::v16i8 || VT == MVT::v8i16 || VT == MVT::v4i32 ||
VT == MVT::v2i64)
return DAG.getNode(PPCISD::VECINSERT, dl, VT, V1, V2, V3);
// For f32 and f64 vectors, we have legal lowering for variable indices.
// For f32 we also have legal lowering when the element is loaded from
// memory.
if (VT == MVT::v4f32 || VT == MVT::v2f64) {
if (!C || (VT == MVT::v4f32 && isa<LoadSDNode>(V2)))
return DAG.getNode(PPCISD::VECINSERT, dl, VT, V1, V2, V3);
VT == MVT::v2i64 || VT == MVT::v4f32 || VT == MVT::v2f64)
return Op;
}
}
// Before P10, we have legal lowering for constant indices but not for

View File

@ -29,9 +29,6 @@ def SDT_PPCPairExtractVsx : SDTypeProfile<1, 2, [
def SDT_PPCxxmfacc : SDTypeProfile<1, 1, [
SDTCisVT<0, v512i1>, SDTCisVT<1, v512i1>
]>;
def SDT_PPCVecInsertElt : SDTypeProfile<1, 3, [
SDTCisVec<0>, SDTCisVec<1>, SDTCisInt<3>
]>;
//===----------------------------------------------------------------------===//
// ISA 3.1 specific PPCISD nodes.
@ -45,7 +42,6 @@ def PPCAccExtractVsx : SDNode<"PPCISD::EXTRACT_VSX_REG", SDT_PPCAccExtractVsx,
def PPCPairExtractVsx : SDNode<"PPCISD::EXTRACT_VSX_REG", SDT_PPCPairExtractVsx,
[]>;
def PPCxxmfacc : SDNode<"PPCISD::XXMFACC", SDT_PPCxxmfacc, []>;
def PPCvecinsertelt : SDNode<"PPCISD::VECINSERT", SDT_PPCVecInsertElt, []>;
//===----------------------------------------------------------------------===//
@ -2797,135 +2793,128 @@ let Predicates = [PrefixInstrs] in {
}
def InsertEltShift {
dag Sub32Left0 = (EXTRACT_SUBREG $rB, sub_32);
dag Sub32 = (i32 (EXTRACT_SUBREG $rB, sub_32));
dag Sub32Left1 = (RLWINM (EXTRACT_SUBREG $rB, sub_32), 1, 0, 30);
dag Sub32Left2 = (RLWINM (EXTRACT_SUBREG $rB, sub_32), 2, 0, 29);
dag Left1 = (RLWINM $rB, 1, 0, 30);
dag Left2 = (RLWINM $rB, 2, 0, 29);
dag Left3 = (RLWINM8 $rB, 3, 0, 28);
}
let Predicates = [IsISA3_1, HasVSX, IsLittleEndian] in {
// Indexed vector insert element
def : Pat<(v16i8 (PPCvecinsertelt v16i8:$vDi, i32:$rA, i64:$rB)),
(VINSBRX $vDi, InsertEltShift.Sub32Left0, $rA)>;
def : Pat<(v8i16 (PPCvecinsertelt v8i16:$vDi, i32:$rA, i64:$rB)),
def : Pat<(v16i8 (vector_insert v16i8:$vDi, i32:$rA, i64:$rB)),
(VINSBRX $vDi, InsertEltShift.Sub32, $rA)>;
def : Pat<(v8i16 (vector_insert v8i16:$vDi, i32:$rA, i64:$rB)),
(VINSHRX $vDi, InsertEltShift.Sub32Left1, $rA)>;
def : Pat<(v4i32 (PPCvecinsertelt v4i32:$vDi, i32:$rA, i64:$rB)),
def : Pat<(v4i32 (insertelt v4i32:$vDi, i32:$rA, i64:$rB)),
(VINSWRX $vDi, InsertEltShift.Sub32Left2, $rA)>;
def : Pat<(v2i64 (PPCvecinsertelt v2i64:$vDi, i64:$rA, i64:$rB)),
def : Pat<(v2i64 (insertelt v2i64:$vDi, i64:$rA, i64:$rB)),
(VINSDRX $vDi, InsertEltShift.Left3, $rA)>;
def : Pat<(v4f32 (PPCvecinsertelt v4f32:$vDi, f32:$A, i64:$rB)),
(VINSWRX $vDi, InsertEltShift.Sub32Left2, Bitcast.FltToInt)>;
def : Pat<(v4f32 (PPCvecinsertelt v4f32:$vDi, (f32 (load iaddr:$rA)), i64:$rB)),
def : Pat<(v4f32 (insertelt v4f32:$vDi, f32:$rA, i64:$rB)),
(VINSWVRX $vDi, InsertEltShift.Sub32Left2, (XSCVDPSPN $rA))>;
def : Pat<(v4f32 (insertelt v4f32:$vDi, (f32 (load iaddr:$rA)), i64:$rB)),
(VINSWRX $vDi, InsertEltShift.Sub32Left2, (LWZ memri:$rA))>;
def : Pat<(v4f32 (PPCvecinsertelt v4f32:$vDi, (f32 (load iaddrX34:$rA)), i64:$rB)),
def : Pat<(v4f32 (insertelt v4f32:$vDi, (f32 (load iaddrX34:$rA)), i64:$rB)),
(VINSWRX $vDi, InsertEltShift.Sub32Left2, (PLWZ memri34:$rA))>;
def : Pat<(v4f32 (PPCvecinsertelt v4f32:$vDi, (f32 (load xaddr:$rA)), i64:$rB)),
def : Pat<(v4f32 (insertelt v4f32:$vDi, (f32 (load xaddr:$rA)), i64:$rB)),
(VINSWRX $vDi, InsertEltShift.Sub32Left2, (LWZX memrr:$rA))>;
def : Pat<(v2f64 (PPCvecinsertelt v2f64:$vDi, f64:$A, i64:$rB)),
def : Pat<(v2f64 (insertelt v2f64:$vDi, f64:$A, i64:$rB)),
(VINSDRX $vDi, InsertEltShift.Left3, Bitcast.DblToLong)>;
def : Pat<(v2f64 (PPCvecinsertelt v2f64:$vDi, (f64 (load iaddrX4:$rA)), i64:$rB)),
def : Pat<(v2f64 (insertelt v2f64:$vDi, (f64 (load iaddrX4:$rA)), i64:$rB)),
(VINSDRX $vDi, InsertEltShift.Left3, (LD memrix:$rA))>;
def : Pat<(v2f64 (PPCvecinsertelt v2f64:$vDi, (f64 (load iaddrX34:$rA)), i64:$rB)),
def : Pat<(v2f64 (insertelt v2f64:$vDi, (f64 (load iaddrX34:$rA)), i64:$rB)),
(VINSDRX $vDi, InsertEltShift.Left3, (PLD memri34:$rA))>;
def : Pat<(v2f64 (PPCvecinsertelt v2f64:$vDi, (f64 (load xaddrX4:$rA)), i64:$rB)),
def : Pat<(v2f64 (insertelt v2f64:$vDi, (f64 (load xaddrX4:$rA)), i64:$rB)),
(VINSDRX $vDi, InsertEltShift.Left3, (LDX memrr:$rA))>;
// Immediate vector insert element
foreach i = [0, 1, 2, 3] in {
def : Pat<(v4i32 (PPCvecinsertelt v4i32:$vDi, i32:$rA, (i64 i))),
(VINSW $vDi, !mul(!sub(3, i), 4), $rA)>;
def : Pat<(v4f32 (PPCvecinsertelt v4f32:$vDi, (f32 (load iaddr:$rA)), (i64 i))),
(VINSW $vDi, !mul(!sub(3, i), 4), (LWZ memri:$rA))>;
def : Pat<(v4f32 (PPCvecinsertelt v4f32:$vDi, (f32 (load iaddrX34:$rA)), (i64 i))),
(VINSW $vDi, !mul(!sub(3, i), 4), (PLWZ memri34:$rA))>;
def : Pat<(v4f32 (PPCvecinsertelt v4f32:$vDi, (f32 (load xaddr:$rA)), (i64 i))),
(VINSW $vDi, !mul(!sub(3, i), 4), (LWZX memrr:$rA))>;
let AddedComplexity = 400 in {
// Immediate vector insert element
foreach Idx = [0, 1, 2, 3] in {
def : Pat<(v4i32 (insertelt v4i32:$vDi, i32:$rA, Idx)),
(VINSW $vDi, !mul(!sub(3, Idx), 4), $rA)>;
def : Pat<(v4f32 (insertelt v4f32:$vDi, (f32 (load iaddr:$rA)), Idx)),
(VINSW $vDi, !mul(!sub(3, Idx), 4), (LWZ memri:$rA))>;
def : Pat<(v4f32 (insertelt v4f32:$vDi, (f32 (load iaddrX34:$rA)), Idx)),
(VINSW $vDi, !mul(!sub(3, Idx), 4), (PLWZ memri34:$rA))>;
def : Pat<(v4f32 (insertelt v4f32:$vDi, (f32 (load xaddr:$rA)), Idx)),
(VINSW $vDi, !mul(!sub(3, Idx), 4), (LWZX memrr:$rA))>;
}
foreach i = [0, 1] in
def : Pat<(v2i64 (insertelt v2i64:$vDi, i64:$rA, (i64 i))),
(VINSD $vDi, !mul(!sub(1, i), 8), $rA)>;
}
foreach i = [0, 1] in
def : Pat<(v2i64 (PPCvecinsertelt v2i64:$vDi, i64:$rA, (i64 i))),
(VINSD $vDi, !mul(!sub(1, i), 8), $rA)>;
}
let Predicates = [IsISA3_1, HasVSX, IsBigEndian, IsPPC32] in {
// Indexed vector insert element
def : Pat<(v16i8 (PPCvecinsertelt v16i8:$vDi, i32:$rA, i32:$rB)),
def : Pat<(v16i8 (vector_insert v16i8:$vDi, i32:$rA, i32:$rB)),
(VINSBLX $vDi, $rB, $rA)>;
def : Pat<(v8i16 (PPCvecinsertelt v8i16:$vDi, i32:$rA, i32:$rB)),
(VINSHLX $vDi, $rB, $rA)>;
def : Pat<(v4i32 (PPCvecinsertelt v4i32:$vDi, i32:$rA, i32:$rB)),
(VINSWLX $vDi, $rB, $rA)>;
def : Pat<(v8i16 (vector_insert v8i16:$vDi, i32:$rA, i32:$rB)),
(VINSHLX $vDi, InsertEltShift.Left1, $rA)>;
def : Pat<(v4i32 (insertelt v4i32:$vDi, i32:$rA, i32:$rB)),
(VINSWLX $vDi, InsertEltShift.Left2, $rA)>;
def : Pat<(v4f32 (PPCvecinsertelt v4f32:$vDi, f32:$A, i32:$rB)),
(VINSWLX $vDi, $rB, Bitcast.FltToInt)>;
def : Pat<(v4f32 (PPCvecinsertelt v4f32:$vDi, (f32 (load iaddr:$rA)),
i32:$rB)),
(VINSWLX $vDi, $rB, (LWZ memri:$rA))>;
def : Pat<(v4f32 (PPCvecinsertelt v4f32:$vDi, (f32 (load iaddrX34:$rA)),
i32:$rB)),
(VINSWLX $vDi, $rB, (PLWZ memri34:$rA))>;
def : Pat<(v4f32 (PPCvecinsertelt v4f32:$vDi, (f32 (load xaddr:$rA)),
i32:$rB)),
(VINSWLX $vDi, $rB, (LWZX memrr:$rA))>;
// Immediate vector insert element
foreach i = [0, 1, 2, 3] in {
def : Pat<(v4i32 (PPCvecinsertelt v4i32:$vDi, i32:$rA, (i32 i))),
(VINSW $vDi, !mul(i, 4), $rA)>;
def : Pat<(v4f32 (PPCvecinsertelt v4f32:$vDi, (f32 (load iaddr:$rA)),
(i32 i))),
(VINSW $vDi, !mul(i, 4), (LWZ memri:$rA))>;
def : Pat<(v4f32 (PPCvecinsertelt v4f32:$vDi, (f32 (load iaddrX34:$rA)),
(i32 i))),
(VINSW $vDi, !mul(i, 4), (PLWZ memri34:$rA))>;
def : Pat<(v4f32 (PPCvecinsertelt v4f32:$vDi, (f32 (load xaddr:$rA)),
(i32 i))),
(VINSW $vDi, !mul(i, 4), (LWZX memrr:$rA))>;
}
def : Pat<(v4f32 (insertelt v4f32:$vDi, f32:$rA, i32:$rB)),
(VINSWVLX $vDi, InsertEltShift.Left2, (XSCVDPSPN $rA))>;
def : Pat<(v4f32 (insertelt v4f32:$vDi, (f32 (load iaddr:$rA)), i32:$rB)),
(VINSWLX v4f32:$vDi, InsertEltShift.Left2, (LWZ memri:$rA))>;
def : Pat<(v4f32 (insertelt v4f32:$vDi, (f32 (load iaddrX34:$rA)), i32:$rB)),
(VINSWLX v4f32:$vDi, InsertEltShift.Left2, (PLWZ memri34:$rA))>;
def: Pat<(v4f32(insertelt v4f32 : $vDi, (f32(load xaddr : $rA)), i32 : $rB)),
(VINSWLX v4f32 : $vDi, InsertEltShift.Left2, (LWZX memrr : $rA))>;
}
let Predicates = [IsISA3_1, HasVSX, IsBigEndian, IsPPC64] in {
// Indexed vector insert element
def : Pat<(v16i8 (PPCvecinsertelt v16i8:$vDi, i32:$rA, i64:$rB)),
(VINSBLX $vDi, InsertEltShift.Sub32Left0, $rA)>;
def : Pat<(v8i16 (PPCvecinsertelt v8i16:$vDi, i32:$rA, i64:$rB)),
def : Pat<(v16i8 (vector_insert v16i8:$vDi, i32:$rA, i64:$rB)),
(VINSBLX $vDi, InsertEltShift.Sub32, $rA)>;
def : Pat<(v8i16 (vector_insert v8i16:$vDi, i32:$rA, i64:$rB)),
(VINSHLX $vDi, InsertEltShift.Sub32Left1, $rA)>;
def : Pat<(v4i32 (PPCvecinsertelt v4i32:$vDi, i32:$rA, i64:$rB)),
def : Pat<(v4i32 (insertelt v4i32:$vDi, i32:$rA, i64:$rB)),
(VINSWLX $vDi, InsertEltShift.Sub32Left2, $rA)>;
def : Pat<(v2i64 (PPCvecinsertelt v2i64:$vDi, i64:$rA, i64:$rB)),
def : Pat<(v2i64 (insertelt v2i64:$vDi, i64:$rA, i64:$rB)),
(VINSDLX $vDi, InsertEltShift.Left3, $rA)>;
def : Pat<(v4f32 (PPCvecinsertelt v4f32:$vDi, f32:$A, i64:$rB)),
(VINSWLX $vDi, InsertEltShift.Sub32Left2, Bitcast.FltToInt)>;
def : Pat<(v4f32 (PPCvecinsertelt v4f32:$vDi, (f32 (load iaddr:$rA)), i64:$rB)),
def : Pat<(v4f32 (insertelt v4f32:$vDi, f32:$rA, i64:$rB)),
(VINSWVLX $vDi, InsertEltShift.Sub32Left2, (XSCVDPSPN $rA))>;
def : Pat<(v4f32 (insertelt v4f32:$vDi, (f32 (load iaddr:$rA)), i64:$rB)),
(VINSWLX $vDi, InsertEltShift.Sub32Left2, (LWZ memri:$rA))>;
def : Pat<(v4f32 (PPCvecinsertelt v4f32:$vDi, (f32 (load iaddrX34:$rA)), i64:$rB)),
def : Pat<(v4f32 (insertelt v4f32:$vDi, (f32 (load iaddrX34:$rA)), i64:$rB)),
(VINSWLX $vDi, InsertEltShift.Sub32Left2, (PLWZ memri34:$rA))>;
def : Pat<(v4f32 (PPCvecinsertelt v4f32:$vDi, (f32 (load xaddr:$rA)), i64:$rB)),
def : Pat<(v4f32 (insertelt v4f32:$vDi, (f32 (load xaddr:$rA)), i64:$rB)),
(VINSWLX $vDi, InsertEltShift.Sub32Left2, (LWZX memrr:$rA))>;
def : Pat<(v2f64 (PPCvecinsertelt v2f64:$vDi, f64:$A, i64:$rB)),
def : Pat<(v2f64 (insertelt v2f64:$vDi, f64:$A, i64:$rB)),
(VINSDLX $vDi, InsertEltShift.Left3, Bitcast.DblToLong)>;
def : Pat<(v2f64 (PPCvecinsertelt v2f64:$vDi, (f64 (load iaddrX4:$rA)), i64:$rB)),
def : Pat<(v2f64 (insertelt v2f64:$vDi, (f64 (load iaddrX4:$rA)), i64:$rB)),
(VINSDLX $vDi, InsertEltShift.Left3, (LD memrix:$rA))>;
def : Pat<(v2f64 (PPCvecinsertelt v2f64:$vDi, (f64 (load iaddrX34:$rA)), i64:$rB)),
def : Pat<(v2f64 (insertelt v2f64:$vDi, (f64 (load iaddrX34:$rA)), i64:$rB)),
(VINSDLX $vDi, InsertEltShift.Left3, (PLD memri34:$rA))>;
def : Pat<(v2f64 (PPCvecinsertelt v2f64:$vDi, (f64 (load xaddrX4:$rA)), i64:$rB)),
def : Pat<(v2f64 (insertelt v2f64:$vDi, (f64 (load xaddrX4:$rA)), i64:$rB)),
(VINSDLX $vDi, InsertEltShift.Left3, (LDX memrr:$rA))>;
// Immediate vector insert element
foreach i = [0, 1, 2, 3] in {
def : Pat<(v4i32 (PPCvecinsertelt v4i32:$vDi, i32:$rA, (i64 i))),
(VINSW $vDi, !mul(i, 4), $rA)>;
def : Pat<(v4f32 (PPCvecinsertelt v4f32:$vDi, (f32 (load iaddr:$rA)), (i64 i))),
(VINSW $vDi, !mul(i, 4), (LWZ memri:$rA))>;
def : Pat<(v4f32 (PPCvecinsertelt v4f32:$vDi, (f32 (load iaddrX34:$rA)), (i64 i))),
(VINSW $vDi, !mul(i, 4), (PLWZ memri34:$rA))>;
def : Pat<(v4f32 (PPCvecinsertelt v4f32:$vDi, (f32 (load xaddr:$rA)), (i64 i))),
(VINSW $vDi, !mul(i, 4), (LWZX memrr:$rA))>;
}
foreach i = [0, 1] in
def : Pat<(v2i64 (PPCvecinsertelt v2i64:$vDi, i64:$rA, (i64 i))),
(VINSD $vDi, !mul(i, 8), $rA)>;
}
let AddedComplexity = 400, Predicates = [IsISA3_1, HasVSX, IsBigEndian] in {
// Immediate vector insert element
foreach Ty = [i32, i64] in {
foreach Idx = [0, 1, 2, 3] in {
def : Pat<(v4i32 (insertelt v4i32:$vDi, i32:$rA, (Ty Idx))),
(VINSW $vDi, !mul(Idx, 4), $rA)>;
def : Pat<(v4f32 (insertelt v4f32:$vDi, (f32 (load iaddr:$rA)),
(Ty Idx))),
(VINSW $vDi, !mul(Idx, 4), (LWZ memri:$rA))>;
def : Pat<(v4f32 (insertelt v4f32:$vDi, (f32 (load iaddrX34:$rA)),
(Ty Idx))),
(VINSW $vDi, !mul(Idx, 4), (PLWZ memri34:$rA))>;
def : Pat<(v4f32 (insertelt v4f32:$vDi, (f32 (load xaddr:$rA)),
(Ty Idx))),
(VINSW $vDi, !mul(Idx, 4), (LWZX memrr:$rA))>;
}
}
foreach Idx = [0, 1] in
def : Pat<(v2i64 (insertelt v2i64:$vDi, i64:$rA, Idx)),
(VINSD $vDi, !mul(Idx, 8), $rA)>;
}

View File

@ -69,7 +69,8 @@ define <8 x i16> @testHalf(<8 x i16> %a, i64 %b, i64 %idx) {
;
; CHECK-32-P10-LABEL: testHalf:
; CHECK-32-P10: # %bb.0: # %entry
; CHECK-32-P10-NEXT: vinshlx 2, 6, 4
; CHECK-32-P10-NEXT: slwi 3, 6, 1
; CHECK-32-P10-NEXT: vinshlx 2, 3, 4
; CHECK-32-P10-NEXT: blr
entry:
%conv = trunc i64 %b to i16
@ -106,7 +107,8 @@ define <4 x i32> @testWord(<4 x i32> %a, i64 %b, i64 %idx) {
;
; CHECK-32-P10-LABEL: testWord:
; CHECK-32-P10: # %bb.0: # %entry
; CHECK-32-P10-NEXT: vinswlx 2, 6, 4
; CHECK-32-P10-NEXT: slwi 3, 6, 2
; CHECK-32-P10-NEXT: vinswlx 2, 3, 4
; CHECK-32-P10-NEXT: blr
entry:
%conv = trunc i64 %b to i32
@ -186,8 +188,10 @@ define <2 x i64> @testDoubleword(<2 x i64> %a, i64 %b, i64 %idx) {
; CHECK-32-P10-LABEL: testDoubleword:
; CHECK-32-P10: # %bb.0: # %entry
; CHECK-32-P10-NEXT: add 5, 6, 6
; CHECK-32-P10-NEXT: vinswlx 2, 5, 3
; CHECK-32-P10-NEXT: slwi 6, 5, 2
; CHECK-32-P10-NEXT: vinswlx 2, 6, 3
; CHECK-32-P10-NEXT: addi 3, 5, 1
; CHECK-32-P10-NEXT: slwi 3, 3, 2
; CHECK-32-P10-NEXT: vinswlx 2, 3, 4
; CHECK-32-P10-NEXT: blr
entry:
@ -280,18 +284,17 @@ define <4 x float> @testFloat1(<4 x float> %a, float %b, i32 zeroext %idx1) {
;
; CHECK-64-P10-LABEL: testFloat1:
; CHECK-64-P10: # %bb.0: # %entry
; CHECK-64-P10-NEXT: xscvdpspn 0, 1
; CHECK-64-P10-NEXT: extsw 4, 4
; CHECK-64-P10-NEXT: slwi 4, 4, 2
; CHECK-64-P10-NEXT: mffprwz 3, 0
; CHECK-64-P10-NEXT: vinswlx 2, 4, 3
; CHECK-64-P10-NEXT: xscvdpspn 35, 1
; CHECK-64-P10-NEXT: extsw 3, 4
; CHECK-64-P10-NEXT: slwi 3, 3, 2
; CHECK-64-P10-NEXT: vinswvlx 2, 3, 3
; CHECK-64-P10-NEXT: blr
;
; CHECK-32-P10-LABEL: testFloat1:
; CHECK-32-P10: # %bb.0: # %entry
; CHECK-32-P10-NEXT: xscvdpspn 0, 1
; CHECK-32-P10-NEXT: mffprwz 3, 0
; CHECK-32-P10-NEXT: vinswlx 2, 4, 3
; CHECK-32-P10-NEXT: xscvdpspn 35, 1
; CHECK-32-P10-NEXT: slwi 3, 4, 2
; CHECK-32-P10-NEXT: vinswvlx 2, 3, 3
; CHECK-32-P10-NEXT: blr
entry:
%vecins = insertelement <4 x float> %a, float %b, i32 %idx1
@ -347,8 +350,10 @@ define <4 x float> @testFloat2(<4 x float> %a, i8* %b, i32 zeroext %idx1, i32 ze
; CHECK-32-P10: # %bb.0: # %entry
; CHECK-32-P10-NEXT: lwz 6, 0(3)
; CHECK-32-P10-NEXT: lwz 3, 1(3)
; CHECK-32-P10-NEXT: slwi 4, 4, 2
; CHECK-32-P10-NEXT: vinswlx 2, 4, 6
; CHECK-32-P10-NEXT: vinswlx 2, 5, 3
; CHECK-32-P10-NEXT: slwi 4, 5, 2
; CHECK-32-P10-NEXT: vinswlx 2, 4, 3
; CHECK-32-P10-NEXT: blr
entry:
%0 = bitcast i8* %b to float*
@ -415,10 +420,12 @@ define <4 x float> @testFloat3(<4 x float> %a, i8* %b, i32 zeroext %idx1, i32 ze
; CHECK-32-P10-LABEL: testFloat3:
; CHECK-32-P10: # %bb.0: # %entry
; CHECK-32-P10-NEXT: lis 6, 1
; CHECK-32-P10-NEXT: slwi 4, 4, 2
; CHECK-32-P10-NEXT: lwzx 6, 3, 6
; CHECK-32-P10-NEXT: lwz 3, 0(3)
; CHECK-32-P10-NEXT: vinswlx 2, 4, 6
; CHECK-32-P10-NEXT: vinswlx 2, 5, 3
; CHECK-32-P10-NEXT: slwi 4, 5, 2
; CHECK-32-P10-NEXT: vinswlx 2, 4, 3
; CHECK-32-P10-NEXT: blr
entry:
%add.ptr = getelementptr inbounds i8, i8* %b, i64 65536

View File

@ -199,20 +199,18 @@ entry:
define <4 x float> @testFloat1(<4 x float> %a, float %b, i32 zeroext %idx1) {
; CHECK-LABEL: testFloat1:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: xscvdpspn vs0, f1
; CHECK-NEXT: extsw r4, r6
; CHECK-NEXT: slwi r4, r4, 2
; CHECK-NEXT: mffprwz r3, f0
; CHECK-NEXT: vinswrx v2, r4, r3
; CHECK-NEXT: xscvdpspn v3, f1
; CHECK-NEXT: extsw r3, r6
; CHECK-NEXT: slwi r3, r3, 2
; CHECK-NEXT: vinswvrx v2, r3, v3
; CHECK-NEXT: blr
;
; CHECK-BE-LABEL: testFloat1:
; CHECK-BE: # %bb.0: # %entry
; CHECK-BE-NEXT: xscvdpspn vs0, f1
; CHECK-BE-NEXT: extsw r4, r6
; CHECK-BE-NEXT: slwi r4, r4, 2
; CHECK-BE-NEXT: mffprwz r3, f0
; CHECK-BE-NEXT: vinswlx v2, r4, r3
; CHECK-BE-NEXT: xscvdpspn v3, f1
; CHECK-BE-NEXT: extsw r3, r6
; CHECK-BE-NEXT: slwi r3, r3, 2
; CHECK-BE-NEXT: vinswvlx v2, r3, v3
; CHECK-BE-NEXT: blr
;
; CHECK-P9-LABEL: testFloat1: