[mips][msa] Added support for matching splati from normal IR (i.e. not intrinsics)

Updated some of the vshf since they (correctly) emit splati's now llvm-svn: 191511
2013-09-27 11:48:57 +00:00 · 2013-09-27 11:48:57 +00:00 · 7e51fe19d5
parent 3c42ce1516
commit 7e51fe19d5
10 changed files with 157 additions and 20 deletions
--- a/llvm/lib/Target/Mips/InstPrinter/MipsInstPrinter.cpp
+++ b/llvm/lib/Target/Mips/InstPrinter/MipsInstPrinter.cpp
@ -184,6 +184,15 @@ void MipsInstPrinter::printUnsignedImm(const MCInst *MI, int opNum,
    printOperand(MI, opNum, O);
 }

+void MipsInstPrinter::printUnsignedImm8(const MCInst *MI, int opNum,
+                                        raw_ostream &O) {
+  const MCOperand &MO = MI->getOperand(opNum);
+  if (MO.isImm())
+    O << (unsigned short int)(unsigned char)MO.getImm();
+  else
+    printOperand(MI, opNum, O);
+}
+
 void MipsInstPrinter::
 printMemOperand(const MCInst *MI, int opNum, raw_ostream &O) {
  // Load/Store memory operands -- imm($reg)
--- a/llvm/lib/Target/Mips/InstPrinter/MipsInstPrinter.h
+++ b/llvm/lib/Target/Mips/InstPrinter/MipsInstPrinter.h
@ -93,6 +93,7 @@ public:
 private:
  void printOperand(const MCInst *MI, unsigned OpNo, raw_ostream &O);
  void printUnsignedImm(const MCInst *MI, int opNum, raw_ostream &O);
+  void printUnsignedImm8(const MCInst *MI, int opNum, raw_ostream &O);
  void printMemOperand(const MCInst *MI, int opNum, raw_ostream &O);
  void printMemOperandEA(const MCInst *MI, int opNum, raw_ostream &O);
  void printFCCOperand(const MCInst *MI, int opNum, raw_ostream &O);
--- a/llvm/lib/Target/Mips/MSA.txt
+++ b/llvm/lib/Target/Mips/MSA.txt
@ -28,3 +28,7 @@ ilvl.d, pckev.d:
 ilvr.d, ilvod.d, pckod.d:
        It is not possible to emit ilvr.d, or pckod.d since ilvod.d covers the
        same shuffle. ilvod.d will be emitted instead.
+
+splati.w:
+        It is not possible to emit splati.w since shf.w covers the same cases.
+        shf.w will be emitted instead.
--- a/llvm/lib/Target/Mips/MipsISelDAGToDAG.cpp
+++ b/llvm/lib/Target/Mips/MipsISelDAGToDAG.cpp
@ -104,6 +104,16 @@ bool MipsDAGToDAGISel::selectVSplat(SDNode *N, APInt &Imm) const {
  return false;
 }

+bool MipsDAGToDAGISel::selectVSplatUimm1(SDValue N, SDValue &Imm) const {
+  llvm_unreachable("Unimplemented function.");
+  return false;
+}
+
+bool MipsDAGToDAGISel::selectVSplatUimm2(SDValue N, SDValue &Imm) const {
+  llvm_unreachable("Unimplemented function.");
+  return false;
+}
+
 bool MipsDAGToDAGISel::selectVSplatUimm3(SDValue N, SDValue &Imm) const {
  llvm_unreachable("Unimplemented function.");
  return false;
--- a/llvm/lib/Target/Mips/MipsISelDAGToDAG.h
+++ b/llvm/lib/Target/Mips/MipsISelDAGToDAG.h
@ -78,6 +78,10 @@ private:

  /// \brief Select constant vector splats.
  virtual bool selectVSplat(SDNode *N, APInt &Imm) const;
+  /// \brief Select constant vector splats whose value fits in a uimm1.
+  virtual bool selectVSplatUimm1(SDValue N, SDValue &Imm) const;
+  /// \brief Select constant vector splats whose value fits in a uimm2.
+  virtual bool selectVSplatUimm2(SDValue N, SDValue &Imm) const;
  /// \brief Select constant vector splats whose value fits in a uimm3.
  virtual bool selectVSplatUimm3(SDValue N, SDValue &Imm) const;
  /// \brief Select constant vector splats whose value fits in a uimm4.
--- a/llvm/lib/Target/Mips/MipsMSAInstrInfo.td
+++ b/llvm/lib/Target/Mips/MipsMSAInstrInfo.td
@ -77,6 +77,14 @@ def simm5 : Operand<i32>;

 def simm10 : Operand<i32>;

+def vsplat_uimm1 : Operand<vAny> {
+  let PrintMethod = "printUnsignedImm8";
+}
+
+def vsplat_uimm2 : Operand<vAny> {
+  let PrintMethod = "printUnsignedImm8";
+}
+
 def vsplat_uimm3 : Operand<vAny> {
  let PrintMethod = "printUnsignedImm";
 }
@ -222,6 +230,10 @@ def vsplati8_uimm3 : SplatComplexPattern<vsplat_uimm3, v16i8, 1,
                                         "selectVSplatUimm3",
                                         [build_vector, bitconvert]>;

+def vsplati8_uimm4 : SplatComplexPattern<vsplat_uimm4, v16i8, 1,
+                                         "selectVSplatUimm4",
+                                         [build_vector, bitconvert]>;
+
 def vsplati8_uimm5 : SplatComplexPattern<vsplat_uimm5, v16i8, 1,
                                         "selectVSplatUimm5",
                                         [build_vector, bitconvert]>;
@ -234,6 +246,10 @@ def vsplati8_simm5 : SplatComplexPattern<vsplat_simm5, v16i8, 1,
                                         "selectVSplatSimm5",
                                         [build_vector, bitconvert]>;

+def vsplati16_uimm3 : SplatComplexPattern<vsplat_uimm3, v8i16, 1,
+                                          "selectVSplatUimm3",
+                                          [build_vector, bitconvert]>;
+
 def vsplati16_uimm4 : SplatComplexPattern<vsplat_uimm4, v8i16, 1,
                                          "selectVSplatUimm4",
                                          [build_vector, bitconvert]>;
@ -246,6 +262,10 @@ def vsplati16_simm5 : SplatComplexPattern<vsplat_simm5, v8i16, 1,
                                          "selectVSplatSimm5",
                                          [build_vector, bitconvert]>;

+def vsplati32_uimm2 : SplatComplexPattern<vsplat_uimm2, v4i32, 1,
+                                          "selectVSplatUimm2",
+                                          [build_vector, bitconvert]>;
+
 def vsplati32_uimm5 : SplatComplexPattern<vsplat_uimm5, v4i32, 1,
                                          "selectVSplatUimm5",
                                          [build_vector, bitconvert]>;
@ -254,6 +274,10 @@ def vsplati32_simm5 : SplatComplexPattern<vsplat_simm5, v4i32, 1,
                                          "selectVSplatSimm5",
                                          [build_vector, bitconvert]>;

+def vsplati64_uimm1 : SplatComplexPattern<vsplat_uimm1, v2i64, 1,
+                                          "selectVSplatUimm1",
+                                          [build_vector, bitconvert]>;
+
 def vsplati64_uimm5 : SplatComplexPattern<vsplat_uimm5, v2i64, 1,
                                          "selectVSplatUimm5",
                                          [build_vector, bitconvert]>;
@ -1235,6 +1259,18 @@ class MSA_VEC_DESC_BASE<string instr_asm, SDPatternOperator OpNode,
  InstrItinClass Itinerary = itin;
 }

+class MSA_ELM_SPLAT_DESC_BASE<string instr_asm, SplatComplexPattern SplatImm,
+                              RegisterClass RCWD,
+                              RegisterClass RCWS = RCWD,
+                              InstrItinClass itin = NoItinerary> {
+  dag OutOperandList = (outs RCWD:$wd);
+  dag InOperandList = (ins RCWS:$ws, SplatImm.OpClass:$u3);
+  string AsmString = !strconcat(instr_asm, "\t$wd, $ws[$u3]");
+  list<dag> Pattern = [(set RCWD:$wd, (MipsVSHF SplatImm:$u3, RCWS:$ws,
+                                                RCWS:$ws))];
+  InstrItinClass Itinerary = itin;
+}
+
 class MSA_VEC_PSEUDO_BASE<SDPatternOperator OpNode, RegisterClass RCWD,
                          RegisterClass RCWS = RCWD,
                          RegisterClass RCWT = RCWD> :
@ -2172,14 +2208,14 @@ class SPLAT_W_DESC : MSA_3R_DESC_BASE<"splat.w", int_mips_splat_w, MSA128WOpnd,
 class SPLAT_D_DESC : MSA_3R_DESC_BASE<"splat.d", int_mips_splat_d, MSA128DOpnd,
                                      MSA128DOpnd, GPR32Opnd>;

-class SPLATI_B_DESC : MSA_BIT_B_DESC_BASE<"splati.b", int_mips_splati_b,
-                                          MSA128B>;
-class SPLATI_H_DESC : MSA_BIT_H_DESC_BASE<"splati.h", int_mips_splati_h,
-                                          MSA128H>;
-class SPLATI_W_DESC : MSA_BIT_W_DESC_BASE<"splati.w", int_mips_splati_w,
-                                          MSA128W>;
-class SPLATI_D_DESC : MSA_BIT_D_DESC_BASE<"splati.d", int_mips_splati_d,
-                                          MSA128D>;
+class SPLATI_B_DESC : MSA_ELM_SPLAT_DESC_BASE<"splati.b", vsplati8_uimm4,
+                                              MSA128B>;
+class SPLATI_H_DESC : MSA_ELM_SPLAT_DESC_BASE<"splati.h", vsplati16_uimm3,
+                                              MSA128H>;
+class SPLATI_W_DESC : MSA_ELM_SPLAT_DESC_BASE<"splati.w", vsplati32_uimm2,
+                                              MSA128W>;
+class SPLATI_D_DESC : MSA_ELM_SPLAT_DESC_BASE<"splati.d", vsplati64_uimm1,
+                                              MSA128D>;

 class SRA_B_DESC : MSA_3R_DESC_BASE<"sra.b", sra, MSA128BOpnd>;
 class SRA_H_DESC : MSA_3R_DESC_BASE<"sra.h", sra, MSA128HOpnd>;
--- a/llvm/lib/Target/Mips/MipsSEISelDAGToDAG.cpp
+++ b/llvm/lib/Target/Mips/MipsSEISelDAGToDAG.cpp
@ -450,6 +450,16 @@ selectVSplatCommon(SDValue N, SDValue &Imm, bool Signed,
 }

 // Select constant vector splats.
+bool MipsSEDAGToDAGISel::
+selectVSplatUimm1(SDValue N, SDValue &Imm) const {
+  return selectVSplatCommon(N, Imm, false, 1);
+}
+
+bool MipsSEDAGToDAGISel::
+selectVSplatUimm2(SDValue N, SDValue &Imm) const {
+  return selectVSplatCommon(N, Imm, false, 2);
+}
+
 bool MipsSEDAGToDAGISel::
 selectVSplatUimm3(SDValue N, SDValue &Imm) const {
  return selectVSplatCommon(N, Imm, false, 3);
--- a/llvm/lib/Target/Mips/MipsSEISelDAGToDAG.h
+++ b/llvm/lib/Target/Mips/MipsSEISelDAGToDAG.h
@ -63,6 +63,10 @@ private:
  /// \brief Select constant vector splats whose value fits in a given integer.
  virtual bool selectVSplatCommon(SDValue N, SDValue &Imm, bool Signed,
                                  unsigned ImmBitSize) const;
+  /// \brief Select constant vector splats whose value fits in a uimm1.
+  virtual bool selectVSplatUimm1(SDValue N, SDValue &Imm) const;
+  /// \brief Select constant vector splats whose value fits in a uimm2.
+  virtual bool selectVSplatUimm2(SDValue N, SDValue &Imm) const;
  /// \brief Select constant vector splats whose value fits in a uimm3.
  virtual bool selectVSplatUimm3(SDValue N, SDValue &Imm) const;
  /// \brief Select constant vector splats whose value fits in a uimm4.
--- a/llvm/lib/Target/Mips/MipsSEISelLowering.cpp
+++ b/llvm/lib/Target/Mips/MipsSEISelLowering.cpp
@ -1470,6 +1470,13 @@ SDValue MipsSETargetLowering::lowerINTRINSIC_WO_CHAIN(SDValue Op,
  case Intrinsic::mips_slli_d:
    return DAG.getNode(ISD::SHL, DL, Op->getValueType(0),
                       Op->getOperand(1), lowerMSASplatImm(Op, 2, DAG));
+  case Intrinsic::mips_splati_b:
+  case Intrinsic::mips_splati_h:
+  case Intrinsic::mips_splati_w:
+  case Intrinsic::mips_splati_d:
+    return DAG.getNode(MipsISD::VSHF, DL, Op->getValueType(0),
+                       lowerMSASplatImm(Op, 2, DAG), Op->getOperand(1),
+                       Op->getOperand(1));
  case Intrinsic::mips_sra_b:
  case Intrinsic::mips_sra_h:
  case Intrinsic::mips_sra_w:
--- a/llvm/test/CodeGen/Mips/msa/shuffle.ll
+++ b/llvm/test/CodeGen/Mips/msa/shuffle.ll
@ -21,8 +21,7 @@ define void @vshf_v16i8_1(<16 x i8>* %c, <16 x i8>* %a, <16 x i8>* %b) nounwind
  %1 = load <16 x i8>* %a
  ; CHECK-DAG: ld.b [[R1:\$w[0-9]+]], 0($5)
  %2 = shufflevector <16 x i8> %1, <16 x i8> undef, <16 x i32> <i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1>
-  ; CHECK-DAG: ldi.b [[R3:\$w[0-9]+]], 1
-  ; CHECK-DAG: vshf.b [[R3]], [[R1]], [[R1]]
+  ; CHECK-DAG: splati.b [[R3:\$w[0-9]+]], [[R1]][1]
  store <16 x i8> %2, <16 x i8>* %c
  ; CHECK-DAG: st.b [[R3]], 0($4)

@ -69,8 +68,7 @@ define void @vshf_v16i8_4(<16 x i8>* %c, <16 x i8>* %a, <16 x i8>* %b) nounwind
  %1 = load <16 x i8>* %a
  ; CHECK-DAG: ld.b [[R1:\$w[0-9]+]], 0($5)
  %2 = shufflevector <16 x i8> %1, <16 x i8> %1, <16 x i32> <i32 1, i32 17, i32 1, i32 17, i32 1, i32 17, i32 1, i32 17, i32 1, i32 17, i32 1, i32 17, i32 1, i32 17, i32 1, i32 17>
-  ; CHECK-DAG: ldi.b [[R3:\$w[0-9]+]], 1
-  ; CHECK-DAG: vshf.b [[R3]], [[R1]], [[R1]]
+  ; CHECK-DAG: splati.b [[R3:\$w[0-9]+]], [[R1]][1]
  store <16 x i8> %2, <16 x i8>* %c
  ; CHECK-DAG: st.b [[R3]], 0($4)

@ -99,8 +97,7 @@ define void @vshf_v8i16_1(<8 x i16>* %c, <8 x i16>* %a, <8 x i16>* %b) nounwind
  %1 = load <8 x i16>* %a
  ; CHECK-DAG: ld.h [[R1:\$w[0-9]+]], 0($5)
  %2 = shufflevector <8 x i16> %1, <8 x i16> undef, <8 x i32> <i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1>
-  ; CHECK-DAG: ldi.h [[R3:\$w[0-9]+]], 1
-  ; CHECK-DAG: vshf.h [[R3]], [[R1]], [[R1]]
+  ; CHECK-DAG: splati.h [[R3:\$w[0-9]+]], [[R1]][1]
  store <8 x i16> %2, <8 x i16>* %c
  ; CHECK-DAG: st.h [[R3]], 0($4)

@ -147,8 +144,7 @@ define void @vshf_v8i16_4(<8 x i16>* %c, <8 x i16>* %a, <8 x i16>* %b) nounwind
  %1 = load <8 x i16>* %a
  ; CHECK-DAG: ld.h [[R1:\$w[0-9]+]], 0($5)
  %2 = shufflevector <8 x i16> %1, <8 x i16> %1, <8 x i32> <i32 1, i32 9, i32 1, i32 9, i32 1, i32 9, i32 1, i32 9>
-  ; CHECK-DAG: ldi.h [[R3:\$w[0-9]+]], 1
-  ; CHECK-DAG: vshf.h [[R3]], [[R1]], [[R1]]
+  ; CHECK-DAG: splati.h [[R3:\$w[0-9]+]], [[R1]][1]
  store <8 x i16> %2, <8 x i16>* %c
  ; CHECK-DAG: st.h [[R3]], 0($4)

@ -254,8 +250,7 @@ define void @vshf_v2i64_1(<2 x i64>* %c, <2 x i64>* %a, <2 x i64>* %b) nounwind
  %1 = load <2 x i64>* %a
  ; CHECK-DAG: ld.d [[R1:\$w[0-9]+]], 0($5)
  %2 = shufflevector <2 x i64> %1, <2 x i64> undef, <2 x i32> <i32 1, i32 1>
-  ; CHECK-DAG: ldi.d [[R3:\$w[0-9]+]], 1
-  ; CHECK-DAG: vshf.d [[R3]], [[R1]], [[R1]]
+  ; CHECK-DAG: splati.d [[R3:\$w[0-9]+]], [[R1]][1]
  store <2 x i64> %2, <2 x i64>* %c
  ; CHECK-DAG: st.d [[R3]], 0($4)

@ -302,8 +297,7 @@ define void @vshf_v2i64_4(<2 x i64>* %c, <2 x i64>* %a, <2 x i64>* %b) nounwind
  %1 = load <2 x i64>* %a
  ; CHECK-DAG: ld.d [[R1:\$w[0-9]+]], 0($5)
  %2 = shufflevector <2 x i64> %1, <2 x i64> %1, <2 x i32> <i32 1, i32 3>
-  ; CHECK-DAG: ldi.d [[R3:\$w[0-9]+]], 1
-  ; CHECK-DAG: vshf.d [[R3]], [[R1]], [[R1]]
+  ; CHECK-DAG: splati.d [[R3:\$w[0-9]+]], [[R1]][1]
  store <2 x i64> %2, <2 x i64>* %c
  ; CHECK-DAG: st.d [[R3]], 0($4)

@ -748,3 +742,61 @@ define void @pckod_v2i64_0(<2 x i64>* %c, <2 x i64>* %a, <2 x i64>* %b) nounwind
  ret void
  ; CHECK: .size pckod_v2i64_0
 }
+
+define void @splati_v16i8_0(<16 x i8>* %c, <16 x i8>* %a) nounwind {
+  ; CHECK: splati_v16i8_0:
+
+  %1 = load <16 x i8>* %a
+  ; CHECK-DAG: ld.b [[R1:\$w[0-9]+]], 0($5)
+  %2 = shufflevector <16 x i8> %1, <16 x i8> undef,
+                     <16 x i32> <i32 4, i32 4, i32 4, i32 4, i32 4, i32 4, i32 4, i32 4, i32 4, i32 4, i32 4, i32 4, i32 4, i32 4, i32 4, i32 4>
+  ; CHECK-DAG: splati.b [[R3:\$w[0-9]+]], [[R1]][4]
+  store <16 x i8> %2, <16 x i8>* %c
+  ; CHECK-DAG: st.b [[R3]], 0($4)
+
+  ret void
+  ; CHECK: .size splati_v16i8_0
+}
+
+define void @splati_v8i16_0(<8 x i16>* %c, <8 x i16>* %a) nounwind {
+  ; CHECK: splati_v8i16_0:
+
+  %1 = load <8 x i16>* %a
+  ; CHECK-DAG: ld.h [[R1:\$w[0-9]+]], 0($5)
+  %2 = shufflevector <8 x i16> %1, <8 x i16> undef, <8 x i32> <i32 4, i32 4, i32 4, i32 4, i32 4, i32 4, i32 4, i32 4>
+  ; CHECK-DAG: splati.h [[R3:\$w[0-9]+]], [[R1]][4]
+  store <8 x i16> %2, <8 x i16>* %c
+  ; CHECK-DAG: st.h [[R3]], 0($4)
+
+  ret void
+  ; CHECK: .size splati_v8i16_0
+}
+
+define void @splati_v4i32_0(<4 x i32>* %c, <4 x i32>* %a) nounwind {
+  ; CHECK: splati_v4i32_0:
+
+  %1 = load <4 x i32>* %a
+  ; CHECK-DAG: ld.w [[R1:\$w[0-9]+]], 0($5)
+  %2 = shufflevector <4 x i32> %1, <4 x i32> undef, <4 x i32> <i32 3, i32 3, i32 3, i32 3>
+  ; shf.w and splati.w are equivalent
+  ; CHECK-DAG: shf.w [[R3:\$w[0-9]+]], [[R1]], 255
+  store <4 x i32> %2, <4 x i32>* %c
+  ; CHECK-DAG: st.w [[R3]], 0($4)
+
+  ret void
+  ; CHECK: .size splati_v4i32_0
+}
+
+define void @splati_v2i64_0(<2 x i64>* %c, <2 x i64>* %a) nounwind {
+  ; CHECK: splati_v2i64_0:
+
+  %1 = load <2 x i64>* %a
+  ; CHECK-DAG: ld.d [[R1:\$w[0-9]+]], 0($5)
+  %2 = shufflevector <2 x i64> %1, <2 x i64> undef, <2 x i32> <i32 1, i32 1>
+  ; CHECK-DAG: splati.d [[R3:\$w[0-9]+]], [[R1]][1]
+  store <2 x i64> %2, <2 x i64>* %c
+  ; CHECK-DAG: st.d [[R3]], 0($4)
+
+  ret void
+  ; CHECK: .size splati_v2i64_0
+}