diff --git a/llvm/lib/Target/Mips/MipsISelLowering.cpp b/llvm/lib/Target/Mips/MipsISelLowering.cpp index 90d2cc6f290b..6f5918d23fb9 100644 --- a/llvm/lib/Target/Mips/MipsISelLowering.cpp +++ b/llvm/lib/Target/Mips/MipsISelLowering.cpp @@ -226,6 +226,10 @@ const char *MipsTargetLowering::getTargetNodeName(unsigned Opcode) const { case MipsISD::VNOR: return "MipsISD::VNOR"; case MipsISD::VSHF: return "MipsISD::VSHF"; case MipsISD::SHF: return "MipsISD::SHF"; + case MipsISD::ILVEV: return "MipsISD::ILVEV"; + case MipsISD::ILVOD: return "MipsISD::ILVOD"; + case MipsISD::ILVL: return "MipsISD::ILVL"; + case MipsISD::ILVR: return "MipsISD::ILVR"; default: return NULL; } } diff --git a/llvm/lib/Target/Mips/MipsISelLowering.h b/llvm/lib/Target/Mips/MipsISelLowering.h index 9e9e9959183b..2fece8252ef1 100644 --- a/llvm/lib/Target/Mips/MipsISelLowering.h +++ b/llvm/lib/Target/Mips/MipsISelLowering.h @@ -175,6 +175,10 @@ namespace llvm { // Vector Shuffle with mask as an operand VSHF, // Generic shuffle SHF, // 4-element set shuffle. + ILVEV, // Interleave even elements + ILVOD, // Interleave odd elements + ILVL, // Interleave left elements + ILVR, // Interleave right elements // Combined (XOR (OR $a, $b), -1) VNOR, diff --git a/llvm/lib/Target/Mips/MipsMSAInstrInfo.td b/llvm/lib/Target/Mips/MipsMSAInstrInfo.td index 5592ac5690b7..a9c421edc902 100644 --- a/llvm/lib/Target/Mips/MipsMSAInstrInfo.td +++ b/llvm/lib/Target/Mips/MipsMSAInstrInfo.td @@ -25,6 +25,8 @@ def SDT_VSHF : SDTypeProfile<1, 3, [SDTCisInt<0>, SDTCisVec<0>, SDTCisSameAs<0, 2>, SDTCisSameAs<2, 3>]>; def SDT_SHF : SDTypeProfile<1, 2, [SDTCisInt<0>, SDTCisVec<0>, SDTCisVT<1, i32>, SDTCisSameAs<0, 2>]>; +def SDT_ILV : SDTypeProfile<1, 2, [SDTCisInt<0>, SDTCisVec<0>, + SDTCisSameAs<0, 1>, SDTCisSameAs<1, 2>]>; def MipsVAllNonZero : SDNode<"MipsISD::VALL_NONZERO", SDT_MipsVecCond>; def MipsVAnyNonZero : SDNode<"MipsISD::VANY_NONZERO", SDT_MipsVecCond>; @@ -42,6 +44,10 @@ def MipsVNOR : SDNode<"MipsISD::VNOR", SDTIntBinOp, [SDNPCommutative, SDNPAssociative]>; def MipsVSHF : SDNode<"MipsISD::VSHF", SDT_VSHF>; def MipsSHF : SDNode<"MipsISD::SHF", SDT_SHF>; +def MipsILVEV : SDNode<"MipsISD::ILVEV", SDT_ILV>; +def MipsILVOD : SDNode<"MipsISD::ILVOD", SDT_ILV>; +def MipsILVL : SDNode<"MipsISD::ILVL", SDT_ILV>; +def MipsILVR : SDNode<"MipsISD::ILVR", SDT_ILV>; def vsetcc : SDNode<"ISD::SETCC", SDT_VSetCC>; def vfsetcc : SDNode<"ISD::SETCC", SDT_VFSetCC>; @@ -1833,25 +1839,25 @@ class HSUB_U_W_DESC : MSA_3R_DESC_BASE<"hsub_u.w", int_mips_hsub_u_w, MSA128W, class HSUB_U_D_DESC : MSA_3R_DESC_BASE<"hsub_u.d", int_mips_hsub_u_d, MSA128D, MSA128W, MSA128W>; -class ILVEV_B_DESC : MSA_3R_DESC_BASE<"ilvev.b", int_mips_ilvev_b, MSA128B>; -class ILVEV_H_DESC : MSA_3R_DESC_BASE<"ilvev.h", int_mips_ilvev_h, MSA128H>; -class ILVEV_W_DESC : MSA_3R_DESC_BASE<"ilvev.w", int_mips_ilvev_w, MSA128W>; -class ILVEV_D_DESC : MSA_3R_DESC_BASE<"ilvev.d", int_mips_ilvev_d, MSA128D>; +class ILVEV_B_DESC : MSA_3R_DESC_BASE<"ilvev.b", MipsILVEV, MSA128B>; +class ILVEV_H_DESC : MSA_3R_DESC_BASE<"ilvev.h", MipsILVEV, MSA128H>; +class ILVEV_W_DESC : MSA_3R_DESC_BASE<"ilvev.w", MipsILVEV, MSA128W>; +class ILVEV_D_DESC : MSA_3R_DESC_BASE<"ilvev.d", MipsILVEV, MSA128D>; -class ILVL_B_DESC : MSA_3R_DESC_BASE<"ilvl.b", int_mips_ilvl_b, MSA128B>; -class ILVL_H_DESC : MSA_3R_DESC_BASE<"ilvl.h", int_mips_ilvl_h, MSA128H>; -class ILVL_W_DESC : MSA_3R_DESC_BASE<"ilvl.w", int_mips_ilvl_w, MSA128W>; -class ILVL_D_DESC : MSA_3R_DESC_BASE<"ilvl.d", int_mips_ilvl_d, MSA128D>; +class ILVL_B_DESC : MSA_3R_DESC_BASE<"ilvl.b", MipsILVL, MSA128B>; +class ILVL_H_DESC : MSA_3R_DESC_BASE<"ilvl.h", MipsILVL, MSA128H>; +class ILVL_W_DESC : MSA_3R_DESC_BASE<"ilvl.w", MipsILVL, MSA128W>; +class ILVL_D_DESC : MSA_3R_DESC_BASE<"ilvl.d", MipsILVL, MSA128D>; -class ILVOD_B_DESC : MSA_3R_DESC_BASE<"ilvod.b", int_mips_ilvod_b, MSA128B>; -class ILVOD_H_DESC : MSA_3R_DESC_BASE<"ilvod.h", int_mips_ilvod_h, MSA128H>; -class ILVOD_W_DESC : MSA_3R_DESC_BASE<"ilvod.w", int_mips_ilvod_w, MSA128W>; -class ILVOD_D_DESC : MSA_3R_DESC_BASE<"ilvod.d", int_mips_ilvod_d, MSA128D>; +class ILVOD_B_DESC : MSA_3R_DESC_BASE<"ilvod.b", MipsILVOD, MSA128B>; +class ILVOD_H_DESC : MSA_3R_DESC_BASE<"ilvod.h", MipsILVOD, MSA128H>; +class ILVOD_W_DESC : MSA_3R_DESC_BASE<"ilvod.w", MipsILVOD, MSA128W>; +class ILVOD_D_DESC : MSA_3R_DESC_BASE<"ilvod.d", MipsILVOD, MSA128D>; -class ILVR_B_DESC : MSA_3R_DESC_BASE<"ilvr.b", int_mips_ilvr_b, MSA128B>; -class ILVR_H_DESC : MSA_3R_DESC_BASE<"ilvr.h", int_mips_ilvr_h, MSA128H>; -class ILVR_W_DESC : MSA_3R_DESC_BASE<"ilvr.w", int_mips_ilvr_w, MSA128W>; -class ILVR_D_DESC : MSA_3R_DESC_BASE<"ilvr.d", int_mips_ilvr_d, MSA128D>; +class ILVR_B_DESC : MSA_3R_DESC_BASE<"ilvr.b", MipsILVR, MSA128B>; +class ILVR_H_DESC : MSA_3R_DESC_BASE<"ilvr.h", MipsILVR, MSA128H>; +class ILVR_W_DESC : MSA_3R_DESC_BASE<"ilvr.w", MipsILVR, MSA128W>; +class ILVR_D_DESC : MSA_3R_DESC_BASE<"ilvr.d", MipsILVR, MSA128D>; class INSERT_B_DESC : MSA_INSERT_DESC_BASE<"insert.b", vinsert_v16i8, MSA128B, GPR32>; diff --git a/llvm/lib/Target/Mips/MipsSEISelLowering.cpp b/llvm/lib/Target/Mips/MipsSEISelLowering.cpp index b79e5321b696..4710e6a5a605 100644 --- a/llvm/lib/Target/Mips/MipsSEISelLowering.cpp +++ b/llvm/lib/Target/Mips/MipsSEISelLowering.cpp @@ -1346,6 +1346,30 @@ SDValue MipsSETargetLowering::lowerINTRINSIC_WO_CHAIN(SDValue Op, case Intrinsic::mips_fsub_w: case Intrinsic::mips_fsub_d: return lowerMSABinaryIntr(Op, DAG, ISD::FSUB); + case Intrinsic::mips_ilvev_b: + case Intrinsic::mips_ilvev_h: + case Intrinsic::mips_ilvev_w: + case Intrinsic::mips_ilvev_d: + return DAG.getNode(MipsISD::ILVEV, SDLoc(Op), Op->getValueType(0), + Op->getOperand(1), Op->getOperand(2)); + case Intrinsic::mips_ilvl_b: + case Intrinsic::mips_ilvl_h: + case Intrinsic::mips_ilvl_w: + case Intrinsic::mips_ilvl_d: + return DAG.getNode(MipsISD::ILVL, SDLoc(Op), Op->getValueType(0), + Op->getOperand(1), Op->getOperand(2)); + case Intrinsic::mips_ilvod_b: + case Intrinsic::mips_ilvod_h: + case Intrinsic::mips_ilvod_w: + case Intrinsic::mips_ilvod_d: + return DAG.getNode(MipsISD::ILVOD, SDLoc(Op), Op->getValueType(0), + Op->getOperand(1), Op->getOperand(2)); + case Intrinsic::mips_ilvr_b: + case Intrinsic::mips_ilvr_h: + case Intrinsic::mips_ilvr_w: + case Intrinsic::mips_ilvr_d: + return DAG.getNode(MipsISD::ILVR, SDLoc(Op), Op->getValueType(0), + Op->getOperand(1), Op->getOperand(2)); case Intrinsic::mips_insert_b: case Intrinsic::mips_insert_h: case Intrinsic::mips_insert_w: @@ -1806,6 +1830,127 @@ static SDValue lowerVECTOR_SHUFFLE_SHF(SDValue Op, EVT ResTy, DAG.getConstant(Imm, MVT::i32), Op->getOperand(0)); } +// Lower VECTOR_SHUFFLE into ILVEV (if possible). +// +// ILVEV interleaves the even elements from each vector. +// +// It is possible to lower into ILVEV when the mask takes the form: +// <0, n, 2, n+2, 4, n+4, ...> +// where n is the number of elements in the vector. +// +// When undef's appear in the mask they are treated as if they were whatever +// value is necessary in order to fit the above form. +static SDValue lowerVECTOR_SHUFFLE_ILVEV(SDValue Op, EVT ResTy, + SmallVector Indices, + SelectionDAG &DAG) { + assert ((Indices.size() % 2) == 0); + int WsIdx = 0; + int WtIdx = ResTy.getVectorNumElements(); + + for (unsigned i = 0; i < Indices.size(); i += 2) { + if (Indices[i] != -1 && Indices[i] != WsIdx) + return SDValue(); + if (Indices[i+1] != -1 && Indices[i+1] != WtIdx) + return SDValue(); + WsIdx += 2; + WtIdx += 2; + } + + return DAG.getNode(MipsISD::ILVEV, SDLoc(Op), ResTy, Op->getOperand(0), + Op->getOperand(1)); +} + +// Lower VECTOR_SHUFFLE into ILVOD (if possible). +// +// ILVOD interleaves the odd elements from each vector. +// +// It is possible to lower into ILVOD when the mask takes the form: +// <1, n+1, 3, n+3, 5, n+5, ...> +// where n is the number of elements in the vector. +// +// When undef's appear in the mask they are treated as if they were whatever +// value is necessary in order to fit the above form. +static SDValue lowerVECTOR_SHUFFLE_ILVOD(SDValue Op, EVT ResTy, + SmallVector Indices, + SelectionDAG &DAG) { + assert ((Indices.size() % 2) == 0); + int WsIdx = 1; + int WtIdx = ResTy.getVectorNumElements() + 1; + + for (unsigned i = 0; i < Indices.size(); i += 2) { + if (Indices[i] != -1 && Indices[i] != WsIdx) + return SDValue(); + if (Indices[i+1] != -1 && Indices[i+1] != WtIdx) + return SDValue(); + WsIdx += 2; + WtIdx += 2; + } + + return DAG.getNode(MipsISD::ILVOD, SDLoc(Op), ResTy, Op->getOperand(0), + Op->getOperand(1)); +} + +// Lower VECTOR_SHUFFLE into ILVL (if possible). +// +// ILVL interleaves consecutive elements from the left half of each vector. +// +// It is possible to lower into ILVL when the mask takes the form: +// <0, n, 1, n+1, 2, n+2, ...> +// where n is the number of elements in the vector. +// +// When undef's appear in the mask they are treated as if they were whatever +// value is necessary in order to fit the above form. +static SDValue lowerVECTOR_SHUFFLE_ILVL(SDValue Op, EVT ResTy, + SmallVector Indices, + SelectionDAG &DAG) { + assert ((Indices.size() % 2) == 0); + int WsIdx = 0; + int WtIdx = ResTy.getVectorNumElements(); + + for (unsigned i = 0; i < Indices.size(); i += 2) { + if (Indices[i] != -1 && Indices[i] != WsIdx) + return SDValue(); + if (Indices[i+1] != -1 && Indices[i+1] != WtIdx) + return SDValue(); + WsIdx ++; + WtIdx ++; + } + + return DAG.getNode(MipsISD::ILVL, SDLoc(Op), ResTy, Op->getOperand(0), + Op->getOperand(1)); +} + +// Lower VECTOR_SHUFFLE into ILVR (if possible). +// +// ILVR interleaves consecutive elements from the right half of each vector. +// +// It is possible to lower into ILVR when the mask takes the form: +// +// where n is the number of elements in the vector and x is half n. +// +// When undef's appear in the mask they are treated as if they were whatever +// value is necessary in order to fit the above form. +static SDValue lowerVECTOR_SHUFFLE_ILVR(SDValue Op, EVT ResTy, + SmallVector Indices, + SelectionDAG &DAG) { + assert ((Indices.size() % 2) == 0); + unsigned NumElts = ResTy.getVectorNumElements(); + int WsIdx = NumElts / 2; + int WtIdx = NumElts + NumElts / 2; + + for (unsigned i = 0; i < Indices.size(); i += 2) { + if (Indices[i] != -1 && Indices[i] != WsIdx) + return SDValue(); + if (Indices[i+1] != -1 && Indices[i+1] != WtIdx) + return SDValue(); + WsIdx ++; + WtIdx ++; + } + + return DAG.getNode(MipsISD::ILVR, SDLoc(Op), ResTy, Op->getOperand(0), + Op->getOperand(1)); +} + // Lower VECTOR_SHUFFLE into VSHF. // // This mostly consists of converting the shuffle indices in Indices into a @@ -1874,6 +2019,18 @@ SDValue MipsSETargetLowering::lowerVECTOR_SHUFFLE(SDValue Op, Indices.push_back(Node->getMaskElt(i)); SDValue Result = lowerVECTOR_SHUFFLE_SHF(Op, ResTy, Indices, DAG); + if (Result.getNode()) + return Result; + Result = lowerVECTOR_SHUFFLE_ILVEV(Op, ResTy, Indices, DAG); + if (Result.getNode()) + return Result; + Result = lowerVECTOR_SHUFFLE_ILVOD(Op, ResTy, Indices, DAG); + if (Result.getNode()) + return Result; + Result = lowerVECTOR_SHUFFLE_ILVL(Op, ResTy, Indices, DAG); + if (Result.getNode()) + return Result; + Result = lowerVECTOR_SHUFFLE_ILVR(Op, ResTy, Indices, DAG); if (Result.getNode()) return Result; return lowerVECTOR_SHUFFLE_VSHF(Op, ResTy, Indices, DAG); diff --git a/llvm/test/CodeGen/Mips/msa/shuffle.ll b/llvm/test/CodeGen/Mips/msa/shuffle.ll index 9854234c719c..a4c68600e8d2 100644 --- a/llvm/test/CodeGen/Mips/msa/shuffle.ll +++ b/llvm/test/CodeGen/Mips/msa/shuffle.ll @@ -354,3 +354,265 @@ define void @shf_v4i32_0(<4 x i32>* %c, <4 x i32>* %a, <4 x i32>* %b) nounwind { } ; shf.d does not exist + +define void @ilvev_v16i8_0(<16 x i8>* %c, <16 x i8>* %a, <16 x i8>* %b) nounwind { + ; CHECK: ilvev_v16i8_0: + + %1 = load <16 x i8>* %a + ; CHECK-DAG: ld.b [[R1:\$w[0-9]+]], 0($5) + %2 = load <16 x i8>* %b + ; CHECK-DAG: ld.b [[R2:\$w[0-9]+]], 0($6) + %3 = shufflevector <16 x i8> %1, <16 x i8> %2, + <16 x i32> + ; CHECK-DAG: ilvev.b [[R3:\$w[0-9]+]], [[R1]], [[R2]] + store <16 x i8> %3, <16 x i8>* %c + ; CHECK-DAG: st.b [[R3]], 0($4) + + ret void + ; CHECK: .size ilvev_v16i8_0 +} + +define void @ilvev_v8i16_0(<8 x i16>* %c, <8 x i16>* %a, <8 x i16>* %b) nounwind { + ; CHECK: ilvev_v8i16_0: + + %1 = load <8 x i16>* %a + ; CHECK-DAG: ld.h [[R1:\$w[0-9]+]], 0($5) + %2 = load <8 x i16>* %b + ; CHECK-DAG: ld.h [[R2:\$w[0-9]+]], 0($6) + %3 = shufflevector <8 x i16> %1, <8 x i16> %2, <8 x i32> + ; CHECK-DAG: ilvev.h [[R3:\$w[0-9]+]], [[R1]], [[R2]] + store <8 x i16> %3, <8 x i16>* %c + ; CHECK-DAG: st.h [[R3]], 0($4) + + ret void + ; CHECK: .size ilvev_v8i16_0 +} + +define void @ilvev_v4i32_0(<4 x i32>* %c, <4 x i32>* %a, <4 x i32>* %b) nounwind { + ; CHECK: ilvev_v4i32_0: + + %1 = load <4 x i32>* %a + ; CHECK-DAG: ld.w [[R1:\$w[0-9]+]], 0($5) + %2 = load <4 x i32>* %b + ; CHECK-DAG: ld.w [[R2:\$w[0-9]+]], 0($6) + %3 = shufflevector <4 x i32> %1, <4 x i32> %2, <4 x i32> + ; CHECK-DAG: ilvev.w [[R3:\$w[0-9]+]], [[R1]], [[R2]] + store <4 x i32> %3, <4 x i32>* %c + ; CHECK-DAG: st.w [[R3]], 0($4) + + ret void + ; CHECK: .size ilvev_v4i32_0 +} + +define void @ilvev_v2i64_0(<2 x i64>* %c, <2 x i64>* %a, <2 x i64>* %b) nounwind { + ; CHECK: ilvev_v2i64_0: + + %1 = load <2 x i64>* %a + ; CHECK-DAG: ld.d [[R1:\$w[0-9]+]], 0($5) + %2 = load <2 x i64>* %b + ; CHECK-DAG: ld.d [[R2:\$w[0-9]+]], 0($6) + %3 = shufflevector <2 x i64> %1, <2 x i64> %2, <2 x i32> + ; CHECK-DAG: ilvev.d [[R3:\$w[0-9]+]], [[R1]], [[R2]] + store <2 x i64> %3, <2 x i64>* %c + ; CHECK-DAG: st.d [[R3]], 0($4) + + ret void + ; CHECK: .size ilvev_v2i64_0 +} + +define void @ilvod_v16i8_0(<16 x i8>* %c, <16 x i8>* %a, <16 x i8>* %b) nounwind { + ; CHECK: ilvod_v16i8_0: + + %1 = load <16 x i8>* %a + ; CHECK-DAG: ld.b [[R1:\$w[0-9]+]], 0($5) + %2 = load <16 x i8>* %b + ; CHECK-DAG: ld.b [[R2:\$w[0-9]+]], 0($6) + %3 = shufflevector <16 x i8> %1, <16 x i8> %2, + <16 x i32> + ; CHECK-DAG: ilvod.b [[R3:\$w[0-9]+]], [[R1]], [[R2]] + store <16 x i8> %3, <16 x i8>* %c + ; CHECK-DAG: st.b [[R3]], 0($4) + + ret void + ; CHECK: .size ilvod_v16i8_0 +} + +define void @ilvod_v8i16_0(<8 x i16>* %c, <8 x i16>* %a, <8 x i16>* %b) nounwind { + ; CHECK: ilvod_v8i16_0: + + %1 = load <8 x i16>* %a + ; CHECK-DAG: ld.h [[R1:\$w[0-9]+]], 0($5) + %2 = load <8 x i16>* %b + ; CHECK-DAG: ld.h [[R2:\$w[0-9]+]], 0($6) + %3 = shufflevector <8 x i16> %1, <8 x i16> %2, <8 x i32> + ; CHECK-DAG: ilvod.h [[R3:\$w[0-9]+]], [[R1]], [[R2]] + store <8 x i16> %3, <8 x i16>* %c + ; CHECK-DAG: st.h [[R3]], 0($4) + + ret void + ; CHECK: .size ilvod_v8i16_0 +} + +define void @ilvod_v4i32_0(<4 x i32>* %c, <4 x i32>* %a, <4 x i32>* %b) nounwind { + ; CHECK: ilvod_v4i32_0: + + %1 = load <4 x i32>* %a + ; CHECK-DAG: ld.w [[R1:\$w[0-9]+]], 0($5) + %2 = load <4 x i32>* %b + ; CHECK-DAG: ld.w [[R2:\$w[0-9]+]], 0($6) + %3 = shufflevector <4 x i32> %1, <4 x i32> %2, <4 x i32> + ; CHECK-DAG: ilvod.w [[R3:\$w[0-9]+]], [[R1]], [[R2]] + store <4 x i32> %3, <4 x i32>* %c + ; CHECK-DAG: st.w [[R3]], 0($4) + + ret void + ; CHECK: .size ilvod_v4i32_0 +} + +define void @ilvod_v2i64_0(<2 x i64>* %c, <2 x i64>* %a, <2 x i64>* %b) nounwind { + ; CHECK: ilvod_v2i64_0: + + %1 = load <2 x i64>* %a + ; CHECK-DAG: ld.d [[R1:\$w[0-9]+]], 0($5) + %2 = load <2 x i64>* %b + ; CHECK-DAG: ld.d [[R2:\$w[0-9]+]], 0($6) + %3 = shufflevector <2 x i64> %1, <2 x i64> %2, <2 x i32> + ; CHECK-DAG: ilvod.d [[R3:\$w[0-9]+]], [[R1]], [[R2]] + store <2 x i64> %3, <2 x i64>* %c + ; CHECK-DAG: st.d [[R3]], 0($4) + + ret void + ; CHECK: .size ilvod_v2i64_0 +} + +define void @ilvl_v16i8_0(<16 x i8>* %c, <16 x i8>* %a, <16 x i8>* %b) nounwind { + ; CHECK: ilvl_v16i8_0: + + %1 = load <16 x i8>* %a + ; CHECK-DAG: ld.b [[R1:\$w[0-9]+]], 0($5) + %2 = load <16 x i8>* %b + ; CHECK-DAG: ld.b [[R2:\$w[0-9]+]], 0($6) + %3 = shufflevector <16 x i8> %1, <16 x i8> %2, + <16 x i32> + ; CHECK-DAG: ilvl.b [[R3:\$w[0-9]+]], [[R1]], [[R2]] + store <16 x i8> %3, <16 x i8>* %c + ; CHECK-DAG: st.b [[R3]], 0($4) + + ret void + ; CHECK: .size ilvl_v16i8_0 +} + +define void @ilvl_v8i16_0(<8 x i16>* %c, <8 x i16>* %a, <8 x i16>* %b) nounwind { + ; CHECK: ilvl_v8i16_0: + + %1 = load <8 x i16>* %a + ; CHECK-DAG: ld.h [[R1:\$w[0-9]+]], 0($5) + %2 = load <8 x i16>* %b + ; CHECK-DAG: ld.h [[R2:\$w[0-9]+]], 0($6) + %3 = shufflevector <8 x i16> %1, <8 x i16> %2, <8 x i32> + ; CHECK-DAG: ilvl.h [[R3:\$w[0-9]+]], [[R1]], [[R2]] + store <8 x i16> %3, <8 x i16>* %c + ; CHECK-DAG: st.h [[R3]], 0($4) + + ret void + ; CHECK: .size ilvl_v8i16_0 +} + +define void @ilvl_v4i32_0(<4 x i32>* %c, <4 x i32>* %a, <4 x i32>* %b) nounwind { + ; CHECK: ilvl_v4i32_0: + + %1 = load <4 x i32>* %a + ; CHECK-DAG: ld.w [[R1:\$w[0-9]+]], 0($5) + %2 = load <4 x i32>* %b + ; CHECK-DAG: ld.w [[R2:\$w[0-9]+]], 0($6) + %3 = shufflevector <4 x i32> %1, <4 x i32> %2, <4 x i32> + ; CHECK-DAG: ilvl.w [[R3:\$w[0-9]+]], [[R1]], [[R2]] + store <4 x i32> %3, <4 x i32>* %c + ; CHECK-DAG: st.w [[R3]], 0($4) + + ret void + ; CHECK: .size ilvl_v4i32_0 +} + +define void @ilvl_v2i64_0(<2 x i64>* %c, <2 x i64>* %a, <2 x i64>* %b) nounwind { + ; CHECK: ilvl_v2i64_0: + + %1 = load <2 x i64>* %a + ; CHECK-DAG: ld.d [[R1:\$w[0-9]+]], 0($5) + %2 = load <2 x i64>* %b + ; CHECK-DAG: ld.d [[R2:\$w[0-9]+]], 0($6) + %3 = shufflevector <2 x i64> %1, <2 x i64> %2, <2 x i32> + ; ilvl.d and ilvev.d are equivalent for v2i64 + ; CHECK-DAG: ilvev.d [[R3:\$w[0-9]+]], [[R1]], [[R2]] + store <2 x i64> %3, <2 x i64>* %c + ; CHECK-DAG: st.d [[R3]], 0($4) + + ret void + ; CHECK: .size ilvl_v2i64_0 +} + +define void @ilvr_v16i8_0(<16 x i8>* %c, <16 x i8>* %a, <16 x i8>* %b) nounwind { + ; CHECK: ilvr_v16i8_0: + + %1 = load <16 x i8>* %a + ; CHECK-DAG: ld.b [[R1:\$w[0-9]+]], 0($5) + %2 = load <16 x i8>* %b + ; CHECK-DAG: ld.b [[R2:\$w[0-9]+]], 0($6) + %3 = shufflevector <16 x i8> %1, <16 x i8> %2, + <16 x i32> + ; CHECK-DAG: ilvr.b [[R3:\$w[0-9]+]], [[R1]], [[R2]] + store <16 x i8> %3, <16 x i8>* %c + ; CHECK-DAG: st.b [[R3]], 0($4) + + ret void + ; CHECK: .size ilvr_v16i8_0 +} + +define void @ilvr_v8i16_0(<8 x i16>* %c, <8 x i16>* %a, <8 x i16>* %b) nounwind { + ; CHECK: ilvr_v8i16_0: + + %1 = load <8 x i16>* %a + ; CHECK-DAG: ld.h [[R1:\$w[0-9]+]], 0($5) + %2 = load <8 x i16>* %b + ; CHECK-DAG: ld.h [[R2:\$w[0-9]+]], 0($6) + %3 = shufflevector <8 x i16> %1, <8 x i16> %2, <8 x i32> + ; CHECK-DAG: ilvr.h [[R3:\$w[0-9]+]], [[R1]], [[R2]] + store <8 x i16> %3, <8 x i16>* %c + ; CHECK-DAG: st.h [[R3]], 0($4) + + ret void + ; CHECK: .size ilvr_v8i16_0 +} + +define void @ilvr_v4i32_0(<4 x i32>* %c, <4 x i32>* %a, <4 x i32>* %b) nounwind { + ; CHECK: ilvr_v4i32_0: + + %1 = load <4 x i32>* %a + ; CHECK-DAG: ld.w [[R1:\$w[0-9]+]], 0($5) + %2 = load <4 x i32>* %b + ; CHECK-DAG: ld.w [[R2:\$w[0-9]+]], 0($6) + %3 = shufflevector <4 x i32> %1, <4 x i32> %2, <4 x i32> + ; CHECK-DAG: ilvr.w [[R3:\$w[0-9]+]], [[R1]], [[R2]] + store <4 x i32> %3, <4 x i32>* %c + ; CHECK-DAG: st.w [[R3]], 0($4) + + ret void + ; CHECK: .size ilvr_v4i32_0 +} + +define void @ilvr_v2i64_0(<2 x i64>* %c, <2 x i64>* %a, <2 x i64>* %b) nounwind { + ; CHECK: ilvr_v2i64_0: + + %1 = load <2 x i64>* %a + ; CHECK-DAG: ld.d [[R1:\$w[0-9]+]], 0($5) + %2 = load <2 x i64>* %b + ; CHECK-DAG: ld.d [[R2:\$w[0-9]+]], 0($6) + %3 = shufflevector <2 x i64> %1, <2 x i64> %2, <2 x i32> + ; ilvr.d and ilvod.d are equivalent for v2i64 + ; CHECK-DAG: ilvod.d [[R3:\$w[0-9]+]], [[R1]], [[R2]] + store <2 x i64> %3, <2 x i64>* %c + ; CHECK-DAG: st.d [[R3]], 0($4) + + ret void + ; CHECK: .size ilvr_v2i64_0 +}