[Hexagon] Split HVX operations on vector pairs

Vector pairs are legal types, but not every operation can work on pairs. For those operations that are legal for single vectors, generate a concat of their results on pair halves. llvm-svn: 324350
2018-02-06 14:24:57 +00:00 · 2018-02-06 14:24:57 +00:00 · 88f11003a0
parent 7b52cf1d7f
commit 88f11003a0
7 changed files with 340 additions and 68 deletions
--- a/llvm/lib/Target/Hexagon/HexagonISelLowering.cpp
+++ b/llvm/lib/Target/Hexagon/HexagonISelLowering.cpp
@ -1275,12 +1275,8 @@ static bool isSExtFree(SDValue N) {

 SDValue HexagonTargetLowering::LowerSETCC(SDValue Op, SelectionDAG &DAG) const {
  SDLoc dl(Op);
-
  SDValue LHS = Op.getOperand(0);
  SDValue RHS = Op.getOperand(1);
-  if (Subtarget.useHVXOps() && Subtarget.isHVXVectorType(ty(LHS)))
-    return LowerHvxSetCC(Op, DAG);
-
  SDValue Cmp = Op.getOperand(2);
  ISD::CondCode CC = cast<CondCodeSDNode>(Cmp)->get();

@ -2151,15 +2147,39 @@ HexagonTargetLowering::HexagonTargetLowering(const TargetMachine &TM,
      // independent) handling of it would convert it to a load, which is
      // not always the optimal choice.
      setOperationAction(ISD::BUILD_VECTOR, T, Custom);
-      // Custom-lower SETCC for pairs. Expand it into a concat of SETCCs
-      // for individual vectors.
-      setOperationAction(ISD::SETCC,        T, Custom);

-      if (T == ByteW)
-        continue;
-      // Promote all shuffles and concats to operate on vectors of bytes.
-      setPromoteTo(ISD::VECTOR_SHUFFLE, T, ByteW);
-      setPromoteTo(ISD::CONCAT_VECTORS, T, ByteW);
+      // Custom-lower these operations for pairs. Expand them into a concat
+      // of the corresponding operations on individual vectors.
+      setOperationAction(ISD::ANY_EXTEND,               T, Custom);
+      setOperationAction(ISD::SIGN_EXTEND,              T, Custom);
+      setOperationAction(ISD::ZERO_EXTEND,              T, Custom);
+      setOperationAction(ISD::SIGN_EXTEND_INREG,        T, Custom);
+      setOperationAction(ISD::ANY_EXTEND_VECTOR_INREG,  T, Custom);
+      setOperationAction(ISD::SIGN_EXTEND_VECTOR_INREG, T, Legal);
+      setOperationAction(ISD::ZERO_EXTEND_VECTOR_INREG, T, Legal);
+
+      setOperationAction(ISD::ADD,      T, Legal);
+      setOperationAction(ISD::SUB,      T, Legal);
+      setOperationAction(ISD::MUL,      T, Custom);
+      setOperationAction(ISD::MULHS,    T, Custom);
+      setOperationAction(ISD::MULHU,    T, Custom);
+      setOperationAction(ISD::AND,      T, Custom);
+      setOperationAction(ISD::OR,       T, Custom);
+      setOperationAction(ISD::XOR,      T, Custom);
+      setOperationAction(ISD::SETCC,    T, Custom);
+      setOperationAction(ISD::VSELECT,  T, Custom);
+      if (T != ByteW) {
+        setOperationAction(ISD::SRA,      T, Custom);
+        setOperationAction(ISD::SHL,      T, Custom);
+        setOperationAction(ISD::SRL,      T, Custom);
+
+        // Promote all shuffles and concats to operate on vectors of bytes.
+        setPromoteTo(ISD::VECTOR_SHUFFLE, T, ByteW);
+        setPromoteTo(ISD::CONCAT_VECTORS, T, ByteW);
+      }
+
+      MVT BoolV = MVT::getVectorVT(MVT::i1, T.getVectorNumElements());
+      setOperationAction(ISD::SETCC, BoolV, Custom);
    }
  }

@ -2310,6 +2330,7 @@ const char* HexagonTargetLowering::getTargetNodeName(unsigned Opcode) const {
  case HexagonISD::P2D:           return "HexagonISD::P2D";
  case HexagonISD::V2Q:           return "HexagonISD::V2Q";
  case HexagonISD::Q2V:           return "HexagonISD::Q2V";
+  case HexagonISD::QCAT:          return "HexagonISD::QCAT";
  case HexagonISD::QTRUE:         return "HexagonISD::QTRUE";
  case HexagonISD::QFALSE:        return "HexagonISD::QFALSE";
  case HexagonISD::TYPECAST:      return "HexagonISD::TYPECAST";
@ -2593,7 +2614,16 @@ HexagonTargetLowering::LowerBITCAST(SDValue Op, SelectionDAG &DAG) const {

 SDValue
 HexagonTargetLowering::LowerANY_EXTEND(SDValue Op, SelectionDAG &DAG) const {
-  return LowerSIGN_EXTEND(Op, DAG);
+  // Lower any-extends of boolean vectors to sign-extends, since they
+  // translate directly to Q2V. Zero-extending could also be done equally
+  // fast, but Q2V is used/recognized in more places.
+  // For all other vectors, use zero-extend.
+  MVT ResTy = ty(Op);
+  SDValue InpV = Op.getOperand(0);
+  MVT ElemTy = ty(InpV).getVectorElementType();
+  if (ElemTy == MVT::i1 && Subtarget.isHVXVectorType(ResTy))
+    return LowerSIGN_EXTEND(Op, DAG);
+  return DAG.getNode(ISD::ZERO_EXTEND, SDLoc(Op), ResTy, InpV);
 }

 SDValue
@ -3185,6 +3215,14 @@ HexagonTargetLowering::LowerEH_RETURN(SDValue Op, SelectionDAG &DAG) const {
 SDValue
 HexagonTargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG) const {
  unsigned Opc = Op.getOpcode();
+
+  // Handle INLINEASM first.
+  if (Opc == ISD::INLINEASM)
+    return LowerINLINEASM(Op, DAG);
+
+  if (isHvxOperation(Op))
+    return LowerHvxOperation(Op, DAG);
+
  switch (Opc) {
    default:
 #ifndef NDEBUG
@ -3200,9 +3238,6 @@ HexagonTargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG) const {
    case ISD::EXTRACT_VECTOR_ELT:   return LowerEXTRACT_VECTOR_ELT(Op, DAG);
    case ISD::BUILD_VECTOR:         return LowerBUILD_VECTOR(Op, DAG);
    case ISD::VECTOR_SHUFFLE:       return LowerVECTOR_SHUFFLE(Op, DAG);
-    case ISD::ANY_EXTEND:           return LowerANY_EXTEND(Op, DAG);
-    case ISD::SIGN_EXTEND:          return LowerSIGN_EXTEND(Op, DAG);
-    case ISD::ZERO_EXTEND:          return LowerZERO_EXTEND(Op, DAG);
    case ISD::BITCAST:              return LowerBITCAST(Op, DAG);
    case ISD::SRA:
    case ISD::SHL:
@ -3210,7 +3245,6 @@ HexagonTargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG) const {
    case ISD::ConstantPool:         return LowerConstantPool(Op, DAG);
    case ISD::JumpTable:            return LowerJumpTable(Op, DAG);
    case ISD::EH_RETURN:            return LowerEH_RETURN(Op, DAG);
-      // Frame & Return address. Currently unimplemented.
    case ISD::RETURNADDR:           return LowerRETURNADDR(Op, DAG);
    case ISD::FRAMEADDR:            return LowerFRAMEADDR(Op, DAG);
    case ISD::GlobalTLSAddress:     return LowerGlobalTLSAddress(Op, DAG);
@ -3224,23 +3258,11 @@ HexagonTargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG) const {
    case ISD::VSELECT:              return LowerVSELECT(Op, DAG);
    case ISD::INTRINSIC_WO_CHAIN:   return LowerINTRINSIC_WO_CHAIN(Op, DAG);
    case ISD::INTRINSIC_VOID:       return LowerINTRINSIC_VOID(Op, DAG);
-    case ISD::INLINEASM:            return LowerINLINEASM(Op, DAG);
    case ISD::PREFETCH:             return LowerPREFETCH(Op, DAG);
    case ISD::READCYCLECOUNTER:     return LowerREADCYCLECOUNTER(Op, DAG);
-    case ISD::MUL:
-      if (Subtarget.useHVXOps())
-        return LowerHvxMul(Op, DAG);
-      break;
-    case ISD::MULHS:
-    case ISD::MULHU:
-      if (Subtarget.useHVXOps())
-        return LowerHvxMulh(Op, DAG);
-      break;
-    case ISD::ANY_EXTEND_VECTOR_INREG:
-      if (Subtarget.useHVXOps())
-        return LowerHvxExtend(Op, DAG);
      break;
  }
+
  return SDValue();
 }

--- a/llvm/lib/Target/Hexagon/HexagonISelLowering.h
+++ b/llvm/lib/Target/Hexagon/HexagonISelLowering.h
@ -73,6 +73,7 @@ namespace HexagonISD {
                   // [*] The equivalence is defined as "Q <=> (V != 0)",
                   //     where the != operation compares bytes.
                   // Note: V != 0 is implemented as V >u 0.
+      QCAT,
      QTRUE,
      QFALSE,
      VZERO,
@ -408,9 +409,14 @@ namespace HexagonISD {
    SDValue LowerHvxExtend(SDValue Op, SelectionDAG &DAG) const;
    SDValue LowerHvxShift(SDValue Op, SelectionDAG &DAG) const;

+    SDValue SplitHvxPairOp(SDValue Op, SelectionDAG &DAG) const;
+
    std::pair<const TargetRegisterClass*, uint8_t>
    findRepresentativeClass(const TargetRegisterInfo *TRI, MVT VT)
        const override;
+
+    bool isHvxOperation(SDValue Op) const;
+    SDValue LowerHvxOperation(SDValue Op, SelectionDAG &DAG) const;
  };

 } // end namespace llvm
--- a/llvm/lib/Target/Hexagon/HexagonISelLoweringHVX.cpp
+++ b/llvm/lib/Target/Hexagon/HexagonISelLoweringHVX.cpp
@ -75,6 +75,10 @@ HexagonTargetLowering::VectorPair
 HexagonTargetLowering::opSplit(SDValue Vec, const SDLoc &dl,
                               SelectionDAG &DAG) const {
  TypePair Tys = typeSplit(ty(Vec));
+  if (Vec.getOpcode() == HexagonISD::QCAT) {
+assert(0);
+    return VectorPair(Vec.getOperand(0), Vec.getOperand(1));
+  }
  return DAG.SplitVector(Vec, dl, Tys.first, Tys.second);
 }

@ -799,7 +803,26 @@ HexagonTargetLowering::LowerHvxConcatVectors(SDValue Op, SelectionDAG &DAG)
  unsigned HwLen = Subtarget.getVectorLength();
  unsigned NumOp = Op.getNumOperands();
  assert(isPowerOf2_32(NumOp) && HwLen % NumOp == 0);
-  (void)NumOp;
+
+  SDValue Op0 = Op.getOperand(0);
+
+  // If the operands are HVX types (i.e. not scalar predicates), then
+  // defer the concatenation, and create QCAT instead.
+  if (Subtarget.isHVXVectorType(ty(Op0), true)) {
+    if (NumOp == 2)
+      return DAG.getNode(HexagonISD::QCAT, dl, VecTy, Op0, Op.getOperand(1));
+
+    ArrayRef<SDUse> U(Op.getNode()->ops());
+    SmallVector<SDValue,4> SV(U.begin(), U.end());
+    ArrayRef<SDValue> Ops(SV);
+
+    MVT HalfTy = typeSplit(VecTy).first;
+    SDValue V0 = DAG.getNode(ISD::CONCAT_VECTORS, dl, HalfTy,
+                             Ops.take_front(NumOp/2));
+    SDValue V1 = DAG.getNode(ISD::CONCAT_VECTORS, dl, HalfTy,
+                             Ops.take_back(NumOp/2));
+    return DAG.getNode(HexagonISD::QCAT, dl, VecTy, V0, V1);
+  }

  // Count how many bytes (in a vector register) each bit in VecTy
  // corresponds to.
@ -889,7 +912,7 @@ HexagonTargetLowering::LowerHvxInsertSubvector(SDValue Op, SelectionDAG &DAG)
 SDValue
 HexagonTargetLowering::LowerHvxMul(SDValue Op, SelectionDAG &DAG) const {
  MVT ResTy = ty(Op);
-  assert(ResTy.isVector());
+  assert(ResTy.isVector() && isHvxSingleTy(ResTy));
  const SDLoc &dl(Op);
  SmallVector<int,256> ShuffMask;

@ -1046,30 +1069,6 @@ HexagonTargetLowering::LowerHvxMulh(SDValue Op, SelectionDAG &DAG) const {
  return T7;
 }

-SDValue
-HexagonTargetLowering::LowerHvxSetCC(SDValue Op, SelectionDAG &DAG) const {
-  MVT ResTy = ty(Op);
-  MVT VecTy = ty(Op.getOperand(0));
-  assert(VecTy == ty(Op.getOperand(1)));
-  unsigned HwLen = Subtarget.getVectorLength();
-  const SDLoc &dl(Op);
-
-  SDValue Cmp = Op.getOperand(2);
-  ISD::CondCode CC = cast<CondCodeSDNode>(Cmp)->get();
-
-  if (VecTy.getSizeInBits() == 16*HwLen) {
-    VectorPair P0 = opSplit(Op.getOperand(0), dl, DAG);
-    VectorPair P1 = opSplit(Op.getOperand(1), dl, DAG);
-    MVT HalfTy = typeSplit(ResTy).first;
-
-    SDValue V0 = DAG.getSetCC(dl, HalfTy, P0.first, P1.first, CC);
-    SDValue V1 = DAG.getSetCC(dl, HalfTy, P0.second, P1.second, CC);
-    return DAG.getNode(ISD::CONCAT_VECTORS, dl, ResTy, V1, V0);
-  }
-
-  return SDValue();
-}
-
 SDValue
 HexagonTargetLowering::LowerHvxExtend(SDValue Op, SelectionDAG &DAG) const {
  // Sign- and zero-extends are legal.
@ -1082,3 +1081,103 @@ HexagonTargetLowering::LowerHvxShift(SDValue Op, SelectionDAG &DAG) const {
  return Op;
 }

+SDValue
+HexagonTargetLowering::SplitHvxPairOp(SDValue Op, SelectionDAG &DAG) const {
+  assert(!Op.isMachineOpcode());
+  SmallVector<SDValue,2> OpsL, OpsH;
+  const SDLoc &dl(Op);
+
+  auto SplitVTNode = [&DAG,this] (const VTSDNode *N) {
+    MVT Ty = typeSplit(N->getVT().getSimpleVT()).first;
+    SDValue TV = DAG.getValueType(Ty);
+    return std::make_pair(TV, TV);
+  };
+
+  for (SDValue A : Op.getNode()->ops()) {
+    VectorPair P = Subtarget.isHVXVectorType(ty(A), true)
+                    ? opSplit(A, dl, DAG)
+                    : std::make_pair(A, A);
+    // Special case for type operand.
+    if (Op.getOpcode() == ISD::SIGN_EXTEND_INREG) {
+      if (const auto *N = dyn_cast<const VTSDNode>(A.getNode()))
+        P = SplitVTNode(N);
+    }
+    OpsL.push_back(P.first);
+    OpsH.push_back(P.second);
+  }
+
+  MVT ResTy = ty(Op);
+  MVT HalfTy = typeSplit(ResTy).first;
+  SDValue L = DAG.getNode(Op.getOpcode(), dl, HalfTy, OpsL);
+  SDValue H = DAG.getNode(Op.getOpcode(), dl, HalfTy, OpsH);
+  SDValue S = DAG.getNode(ISD::CONCAT_VECTORS, dl, ResTy, L, H);
+  return S;
+}
+
+SDValue
+HexagonTargetLowering::LowerHvxOperation(SDValue Op, SelectionDAG &DAG) const {
+  unsigned Opc = Op.getOpcode();
+  bool IsPairOp = isHvxPairTy(ty(Op)) ||
+                  llvm::any_of(Op.getNode()->ops(), [this] (SDValue V) {
+                    return isHvxPairTy(ty(V));
+                  });
+
+  if (IsPairOp) {
+    switch (Opc) {
+      default:
+        break;
+      case ISD::MUL:
+      case ISD::MULHS:
+      case ISD::MULHU:
+      case ISD::AND:
+      case ISD::OR:
+      case ISD::XOR:
+      case ISD::SRA:
+      case ISD::SHL:
+      case ISD::SRL:
+      case ISD::SETCC:
+      case ISD::VSELECT:
+      case ISD::SIGN_EXTEND_INREG:
+        return SplitHvxPairOp(Op, DAG);
+    }
+  }
+
+  switch (Opc) {
+    default:
+      break;
+    case ISD::CONCAT_VECTORS:           return LowerCONCAT_VECTORS(Op, DAG);
+    case ISD::INSERT_SUBVECTOR:         return LowerINSERT_SUBVECTOR(Op, DAG);
+    case ISD::INSERT_VECTOR_ELT:        return LowerINSERT_VECTOR_ELT(Op, DAG);
+    case ISD::EXTRACT_SUBVECTOR:        return LowerEXTRACT_SUBVECTOR(Op, DAG);
+    case ISD::EXTRACT_VECTOR_ELT:       return LowerEXTRACT_VECTOR_ELT(Op, DAG);
+    case ISD::BUILD_VECTOR:             return LowerBUILD_VECTOR(Op, DAG);
+    case ISD::VECTOR_SHUFFLE:           return LowerVECTOR_SHUFFLE(Op, DAG);
+    case ISD::ANY_EXTEND:               return LowerANY_EXTEND(Op, DAG);
+    case ISD::SIGN_EXTEND:              return LowerSIGN_EXTEND(Op, DAG);
+    case ISD::ZERO_EXTEND:              return LowerZERO_EXTEND(Op, DAG);
+    case ISD::SRA:
+    case ISD::SHL:
+    case ISD::SRL:                      return LowerVECTOR_SHIFT(Op, DAG);
+    case ISD::MUL:                      return LowerHvxMul(Op, DAG);
+    case ISD::MULHS:
+    case ISD::MULHU:                    return LowerHvxMulh(Op, DAG);
+    case ISD::ANY_EXTEND_VECTOR_INREG:  return LowerHvxExtend(Op, DAG);
+    case ISD::SETCC:
+    case ISD::INTRINSIC_VOID:           return Op;
+  }
+#ifndef NDEBUG
+  Op.dumpr(&DAG);
+#endif
+  llvm_unreachable("Unhandled HVX operation");
+}
+
+bool
+HexagonTargetLowering::isHvxOperation(SDValue Op) const {
+  // If the type of the result, or any operand type are HVX vector types,
+  // this is an HVX operation.
+  return Subtarget.isHVXVectorType(ty(Op)) ||
+         llvm::any_of(Op.getNode()->ops(),
+                      [this] (SDValue V) {
+                        return Subtarget.isHVXVectorType(ty(V), true);
+                      });
+}
--- a/llvm/lib/Target/Hexagon/HexagonPatterns.td
+++ b/llvm/lib/Target/Hexagon/HexagonPatterns.td
@ -2977,7 +2977,10 @@ def HexagonREADCYCLE: SDNode<"HexagonISD::READCYCLE", SDTInt64Leaf,
 def: Pat<(HexagonREADCYCLE), (A4_tfrcpp UPCYCLE)>;


-def SDTVecLeaf: SDTypeProfile<1, 0, [SDTCisVec<0>]>;
+def SDTVecLeaf:
+  SDTypeProfile<1, 0, [SDTCisVec<0>]>;
+def SDTVecBinOp:
+  SDTypeProfile<1, 2, [SDTCisVec<0>, SDTCisVec<1>, SDTCisSameAs<1,2>]>;

 def SDTHexagonVEXTRACTW: SDTypeProfile<1, 2,
  [SDTCisVT<0, i32>, SDTCisVec<1>, SDTCisVT<2, i32>]>;
@ -2987,18 +2990,36 @@ def SDTHexagonVINSERTW0: SDTypeProfile<1, 2,
  [SDTCisVec<0>, SDTCisSameAs<0, 1>, SDTCisVT<2, i32>]>;
 def HexagonVINSERTW0 : SDNode<"HexagonISD::VINSERTW0", SDTHexagonVINSERTW0>;

-def Combinev: OutPatFrag<(ops node:$Rs, node:$Rt),
-  (REG_SEQUENCE HvxWR, $Rs, vsub_hi, $Rt, vsub_lo)>;
+def HwLen2: SDNodeXForm<imm, [{
+  const auto &ST = static_cast<const HexagonSubtarget&>(CurDAG->getSubtarget());
+  return CurDAG->getTargetConstant(ST.getVectorLength()/2, SDLoc(N), MVT::i32);
+}]>;
+
+def Q2V: OutPatFrag<(ops node:$Qs), (V6_vandqrt $Qs, (A2_tfrsi -1))>;
+
+def Combinev: OutPatFrag<(ops node:$Vs, node:$Vt),
+  (REG_SEQUENCE HvxWR, $Vs, vsub_hi, $Vt, vsub_lo)>;
+
+def Combineq: OutPatFrag<(ops node:$Qs, node:$Qt),
+  (V6_vandvrt
+    (V6_vor
+      (V6_vror (V6_vpackeb (V6_vd0), (Q2V $Qs)),
+               (A2_tfrsi (HwLen2 (i32 0)))),  // Half the vector length
+      (V6_vpackeb (V6_vd0), (Q2V $Qt))),
+    (A2_tfrsi -1))>;

 def LoVec: OutPatFrag<(ops node:$Vs), (EXTRACT_SUBREG $Vs, vsub_lo)>;
 def HiVec: OutPatFrag<(ops node:$Vs), (EXTRACT_SUBREG $Vs, vsub_hi)>;

 def HexagonVZERO:  SDNode<"HexagonISD::VZERO",  SDTVecLeaf>;
+def HexagonQCAT:   SDNode<"HexagonISD::QCAT",   SDTVecBinOp>;
 def HexagonQTRUE:  SDNode<"HexagonISD::QTRUE",  SDTVecLeaf>;
 def HexagonQFALSE: SDNode<"HexagonISD::QFALSE", SDTVecLeaf>;
 def vzero:  PatFrag<(ops), (HexagonVZERO)>;
 def qtrue:  PatFrag<(ops), (HexagonQTRUE)>;
 def qfalse: PatFrag<(ops), (HexagonQFALSE)>;
+def qcat:   PatFrag<(ops node:$Qs, node:$Qt),
+                    (HexagonQCAT node:$Qs, node:$Qt)>;

 def qnot: PatFrag<(ops node:$Qs), (xor node:$Qs, qtrue)>;

@ -3021,9 +3042,13 @@ def SplatH: SDNodeXForm<imm, [{
 }]>;

 let Predicates = [UseHVX] in {
-  def: Pat<(VecI8  vzero), (V6_vd0)>;
-  def: Pat<(VecI16 vzero), (V6_vd0)>;
-  def: Pat<(VecI32 vzero), (V6_vd0)>;
+  def: Pat<(VecI8   vzero), (V6_vd0)>;
+  def: Pat<(VecI16  vzero), (V6_vd0)>;
+  def: Pat<(VecI32  vzero), (V6_vd0)>;
+  // Use V6_vsubw_dv instead.
+  def: Pat<(VecPI8  vzero), (Combinev (V6_vd0), (V6_vd0))>;
+  def: Pat<(VecPI16 vzero), (Combinev (V6_vd0), (V6_vd0))>;
+  def: Pat<(VecPI32 vzero), (Combinev (V6_vd0), (V6_vd0))>;

  def: Pat<(VecPI8 (concat_vectors HVI8:$Vs, HVI8:$Vt)),
           (Combinev HvxVR:$Vt, HvxVR:$Vs)>;
@ -3032,6 +3057,9 @@ let Predicates = [UseHVX] in {
  def: Pat<(VecPI32 (concat_vectors HVI32:$Vs, HVI32:$Vt)),
           (Combinev HvxVR:$Vt, HvxVR:$Vs)>;

+  def: Pat<(VecQ8  (qcat HQ16:$Qs, HQ16:$Qt)), (Combineq $Qs, $Qt)>;
+  def: Pat<(VecQ16 (qcat HQ32:$Qs, HQ32:$Qt)), (Combineq $Qs, $Qt)>;
+
  def: Pat<(HexagonVEXTRACTW HVI8:$Vu, I32:$Rs),
           (V6_extractw HvxVR:$Vu, I32:$Rs)>;
  def: Pat<(HexagonVEXTRACTW HVI16:$Vu, I32:$Rs),
@ -3053,6 +3081,14 @@ let Predicates = [UseHVX] in {
             (V6_lvsplatw (ToI32 (SplatH $V)))>;
    def: Pat<(VecI32 (HexagonVSPLAT anyimm:$V)),
             (V6_lvsplatw (ToI32 $V))>;
+    def: Pat<(VecPI8 (HexagonVSPLAT u8_0ImmPred:$V)),
+             (Combinev (V6_lvsplatw (ToI32 (SplatB $V))),
+                       (V6_lvsplatw (ToI32 (SplatB $V))))>;
+    def: Pat<(VecPI16 (HexagonVSPLAT u16_0ImmPred:$V)),
+             (Combinev (V6_lvsplatw (ToI32 (SplatH $V))),
+                       (V6_lvsplatw (ToI32 (SplatH $V))))>;
+    def: Pat<(VecPI32 (HexagonVSPLAT anyimm:$V)),
+             (Combinev (V6_lvsplatw (ToI32 $V)), (V6_lvsplatw (ToI32 $V)))>;
  }
  def: Pat<(VecI8 (HexagonVSPLAT I32:$Rs)),
           (V6_lvsplatw (S2_vsplatrb I32:$Rs))>;
@ -3060,14 +3096,28 @@ let Predicates = [UseHVX] in {
           (V6_lvsplatw (A2_combine_ll I32:$Rs, I32:$Rs))>;
  def: Pat<(VecI32 (HexagonVSPLAT I32:$Rs)),
           (V6_lvsplatw I32:$Rs)>;
+  def: Pat<(VecPI8 (HexagonVSPLAT I32:$Rs)),
+           (Combinev (V6_lvsplatw (S2_vsplatrb I32:$Rs)),
+                     (V6_lvsplatw (S2_vsplatrb I32:$Rs)))>;
+  def: Pat<(VecPI16 (HexagonVSPLAT I32:$Rs)),
+           (Combinev (V6_lvsplatw (A2_combine_ll I32:$Rs, I32:$Rs)),
+                     (V6_lvsplatw (A2_combine_ll I32:$Rs, I32:$Rs)))>;
+  def: Pat<(VecPI32 (HexagonVSPLAT I32:$Rs)),
+           (Combinev (V6_lvsplatw I32:$Rs), (V6_lvsplatw I32:$Rs))>;

-  def: Pat<(add HVI8:$Vs,  HVI8:$Vt),   (V6_vaddb HvxVR:$Vs, HvxVR:$Vt)>;
-  def: Pat<(add HVI16:$Vs, HVI16:$Vt),  (V6_vaddh HvxVR:$Vs, HvxVR:$Vt)>;
-  def: Pat<(add HVI32:$Vs, HVI32:$Vt),  (V6_vaddw HvxVR:$Vs, HvxVR:$Vt)>;
+  def: Pat<(add HVI8:$Vs,  HVI8:$Vt),   (V6_vaddb    HvxVR:$Vs, HvxVR:$Vt)>;
+  def: Pat<(add HVI16:$Vs, HVI16:$Vt),  (V6_vaddh    HvxVR:$Vs, HvxVR:$Vt)>;
+  def: Pat<(add HVI32:$Vs, HVI32:$Vt),  (V6_vaddw    HvxVR:$Vs, HvxVR:$Vt)>;
+  def: Pat<(add HWI8:$Vs,  HWI8:$Vt),   (V6_vaddb_dv HvxWR:$Vs, HvxWR:$Vt)>;
+  def: Pat<(add HWI16:$Vs, HWI16:$Vt),  (V6_vaddh_dv HvxWR:$Vs, HvxWR:$Vt)>;
+  def: Pat<(add HWI32:$Vs, HWI32:$Vt),  (V6_vaddw_dv HvxWR:$Vs, HvxWR:$Vt)>;

-  def: Pat<(sub HVI8:$Vs,  HVI8:$Vt),   (V6_vsubb HvxVR:$Vs, HvxVR:$Vt)>;
-  def: Pat<(sub HVI16:$Vs, HVI16:$Vt),  (V6_vsubh HvxVR:$Vs, HvxVR:$Vt)>;
-  def: Pat<(sub HVI32:$Vs, HVI32:$Vt),  (V6_vsubw HvxVR:$Vs, HvxVR:$Vt)>;
+  def: Pat<(sub HVI8:$Vs,  HVI8:$Vt),   (V6_vsubb    HvxVR:$Vs, HvxVR:$Vt)>;
+  def: Pat<(sub HVI16:$Vs, HVI16:$Vt),  (V6_vsubh    HvxVR:$Vs, HvxVR:$Vt)>;
+  def: Pat<(sub HVI32:$Vs, HVI32:$Vt),  (V6_vsubw    HvxVR:$Vs, HvxVR:$Vt)>;
+  def: Pat<(sub HWI8:$Vs,  HWI8:$Vt),   (V6_vsubb_dv HvxWR:$Vs, HvxWR:$Vt)>;
+  def: Pat<(sub HWI16:$Vs, HWI16:$Vt),  (V6_vsubh_dv HvxWR:$Vs, HvxWR:$Vt)>;
+  def: Pat<(sub HWI32:$Vs, HWI32:$Vt),  (V6_vsubw_dv HvxWR:$Vs, HvxWR:$Vt)>;

  def: Pat<(and HVI8:$Vs,  HVI8:$Vt),   (V6_vand  HvxVR:$Vs, HvxVR:$Vt)>;
  def: Pat<(or  HVI8:$Vs,  HVI8:$Vt),   (V6_vor   HvxVR:$Vs, HvxVR:$Vt)>;
@ -3096,11 +3146,19 @@ let Predicates = [UseHVX] in {
  def: Pat<(VecI32 (sext_invec HVI16:$Vs)), (LoVec (VSxth $Vs))>;
  def: Pat<(VecI32 (sext_invec HVI8:$Vs)),
           (LoVec (VSxth (LoVec (VSxtb $Vs))))>;
+  def: Pat<(VecPI16 (sext_invec HWI8:$Vss)),  (VSxtb (LoVec $Vss))>;
+  def: Pat<(VecPI32 (sext_invec HWI16:$Vss)), (VSxth (LoVec $Vss))>;
+  def: Pat<(VecPI32 (sext_invec HWI8:$Vss)),
+           (VSxth (LoVec (VSxtb (LoVec $Vss))))>;

  def: Pat<(VecI16 (zext_invec HVI8:$Vs)),  (LoVec (VZxtb $Vs))>;
  def: Pat<(VecI32 (zext_invec HVI16:$Vs)), (LoVec (VZxth $Vs))>;
  def: Pat<(VecI32 (zext_invec HVI8:$Vs)),
           (LoVec (VZxth (LoVec (VZxtb $Vs))))>;
+  def: Pat<(VecPI16 (zext_invec HWI8:$Vss)),  (VZxtb (LoVec $Vss))>;
+  def: Pat<(VecPI32 (zext_invec HWI16:$Vss)), (VZxth (LoVec $Vss))>;
+  def: Pat<(VecPI32 (zext_invec HWI8:$Vss)),
+           (VZxth (LoVec (VZxtb (LoVec $Vss))))>;

  // The "source" types are not legal, and there are no parameterized
  // definitions for them, but they are length-specific.
@ -3121,6 +3179,16 @@ let Predicates = [UseHVX] in {
             (V6_vasrw (V6_vaslw HVI32:$Vs, (A2_tfrsi 16)), (A2_tfrsi 16))>;
  }

+  def: Pat<(HexagonVASL HVI8:$Vs, I32:$Rt),
+           (V6_vpackeb (V6_vaslh (HiVec (VZxtb HvxVR:$Vs)), I32:$Rt),
+                       (V6_vaslh (LoVec (VZxtb HvxVR:$Vs)), I32:$Rt))>;
+  def: Pat<(HexagonVASR HVI8:$Vs, I32:$Rt),
+           (V6_vpackeb (V6_vasrh (HiVec (VSxtb HvxVR:$Vs)), I32:$Rt),
+                       (V6_vasrh (LoVec (VSxtb HvxVR:$Vs)), I32:$Rt))>;
+  def: Pat<(HexagonVLSR HVI8:$Vs, I32:$Rt),
+           (V6_vpackeb (V6_vlsrh (HiVec (VZxtb HvxVR:$Vs)), I32:$Rt),
+                       (V6_vlsrh (LoVec (VZxtb HvxVR:$Vs)), I32:$Rt))>;
+
  def: Pat<(HexagonVASL HVI16:$Vs, I32:$Rt), (V6_vaslh HvxVR:$Vs, I32:$Rt)>;
  def: Pat<(HexagonVASL HVI32:$Vs, I32:$Rt), (V6_vaslw HvxVR:$Vs, I32:$Rt)>;
  def: Pat<(HexagonVASR HVI16:$Vs, I32:$Rt), (V6_vasrh HvxVR:$Vs, I32:$Rt)>;
--- a/llvm/test/CodeGen/Hexagon/autohvx/isel-anyext-pair.ll
+++ b/llvm/test/CodeGen/Hexagon/autohvx/isel-anyext-pair.ll
@ -0,0 +1,25 @@
+; RUN: llc -march=hexagon < %s | FileCheck %s
+
+; Check that this compiles successfully.
+; CHECK: vunpack
+
+target datalayout = "e-m:e-p:32:32:32-a:0-n16:32-i64:64:64-i32:32:32-i16:16:16-i1:8:8-f32:32:32-f64:64:64-v32:32:32-v64:64:64-v512:512:512-v1024:1024:1024-v2048:2048:2048"
+target triple = "hexagon"
+
+define void @fred() #0 {
+b0:
+  %v1 = load <64 x i8>, <64 x i8>* undef, align 1
+  %v2 = sext <64 x i8> %v1 to <64 x i32>
+  %v3 = load <64 x i8>, <64 x i8>* null, align 1
+  %v4 = sext <64 x i8> %v3 to <64 x i32>
+  %v5 = mul nsw <64 x i32> %v4, %v2
+  %v6 = add nsw <64 x i32> %v5, zeroinitializer
+  %v7 = shl <64 x i32> %v6, <i32 24, i32 24, i32 24, i32 24, i32 24, i32 24, i32 24, i32 24, i32 24, i32 24, i32 24, i32 24, i32 24, i32 24, i32 24, i32 24, i32 24, i32 24, i32 24, i32 24, i32 24, i32 24, i32 24, i32 24, i32 24, i32 24, i32 24, i32 24, i32 24, i32 24, i32 24, i32 24, i32 24, i32 24, i32 24, i32 24, i32 24, i32 24, i32 24, i32 24, i32 24, i32 24, i32 24, i32 24, i32 24, i32 24, i32 24, i32 24, i32 24, i32 24, i32 24, i32 24, i32 24, i32 24, i32 24, i32 24, i32 24, i32 24, i32 24, i32 24, i32 24, i32 24, i32 24, i32 24>
+  %v8 = ashr exact <64 x i32> %v7, <i32 24, i32 24, i32 24, i32 24, i32 24, i32 24, i32 24, i32 24, i32 24, i32 24, i32 24, i32 24, i32 24, i32 24, i32 24, i32 24, i32 24, i32 24, i32 24, i32 24, i32 24, i32 24, i32 24, i32 24, i32 24, i32 24, i32 24, i32 24, i32 24, i32 24, i32 24, i32 24, i32 24, i32 24, i32 24, i32 24, i32 24, i32 24, i32 24, i32 24, i32 24, i32 24, i32 24, i32 24, i32 24, i32 24, i32 24, i32 24, i32 24, i32 24, i32 24, i32 24, i32 24, i32 24, i32 24, i32 24, i32 24, i32 24, i32 24, i32 24, i32 24, i32 24, i32 24, i32 24>
+  %v9 = mul nsw <64 x i32> %v8, %v8
+  %v10 = trunc <64 x i32> %v9 to <64 x i8>
+  store <64 x i8> %v10, <64 x i8>* undef, align 1
+  unreachable
+}
+
+attributes #0 = { nounwind "target-cpu"="hexagonv60" "target-features"="+hvx-length64b,+hvxv60" }
--- a/llvm/test/CodeGen/Hexagon/autohvx/isel-shift-byte.ll
+++ b/llvm/test/CodeGen/Hexagon/autohvx/isel-shift-byte.ll
@ -0,0 +1,30 @@
+; RUN: llc -march=hexagon < %s | FileCheck %s
+
+; Check that this compiles successfully.
+; CHECK: vasl
+
+target datalayout = "e-m:e-p:32:32:32-a:0-n16:32-i64:64:64-i32:32:32-i16:16:16-i1:8:8-f32:32:32-f64:64:64-v32:32:32-v64:64:64-v512:512:512-v1024:1024:1024-v2048:2048:2048"
+target triple = "hexagon"
+
+define void @fred() #0 {
+b0:
+  br label %b1
+
+b1:                                               ; preds = %b9, %b0
+  %v2 = phi <64 x i32> [ <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31, i32 32, i32 33, i32 34, i32 35, i32 36, i32 37, i32 38, i32 39, i32 40, i32 41, i32 42, i32 43, i32 44, i32 45, i32 46, i32 47, i32 48, i32 49, i32 50, i32 51, i32 52, i32 53, i32 54, i32 55, i32 56, i32 57, i32 58, i32 59, i32 60, i32 61, i32 62, i32 63>, %b0 ], [ zeroinitializer, %b9 ]
+  %v3 = add <64 x i32> %v2, <i32 512, i32 512, i32 512, i32 512, i32 512, i32 512, i32 512, i32 512, i32 512, i32 512, i32 512, i32 512, i32 512, i32 512, i32 512, i32 512, i32 512, i32 512, i32 512, i32 512, i32 512, i32 512, i32 512, i32 512, i32 512, i32 512, i32 512, i32 512, i32 512, i32 512, i32 512, i32 512, i32 512, i32 512, i32 512, i32 512, i32 512, i32 512, i32 512, i32 512, i32 512, i32 512, i32 512, i32 512, i32 512, i32 512, i32 512, i32 512, i32 512, i32 512, i32 512, i32 512, i32 512, i32 512, i32 512, i32 512, i32 512, i32 512, i32 512, i32 512, i32 512, i32 512, i32 512, i32 512>
+  %v4 = shl <64 x i32> %v3, <i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1>
+  %v5 = trunc <64 x i32> %v4 to <64 x i8>
+  %v6 = xor <64 x i8> %v5, <i8 -128, i8 -128, i8 -128, i8 -128, i8 -128, i8 -128, i8 -128, i8 -128, i8 -128, i8 -128, i8 -128, i8 -128, i8 -128, i8 -128, i8 -128, i8 -128, i8 -128, i8 -128, i8 -128, i8 -128, i8 -128, i8 -128, i8 -128, i8 -128, i8 -128, i8 -128, i8 -128, i8 -128, i8 -128, i8 -128, i8 -128, i8 -128, i8 -128, i8 -128, i8 -128, i8 -128, i8 -128, i8 -128, i8 -128, i8 -128, i8 -128, i8 -128, i8 -128, i8 -128, i8 -128, i8 -128, i8 -128, i8 -128, i8 -128, i8 -128, i8 -128, i8 -128, i8 -128, i8 -128, i8 -128, i8 -128, i8 -128, i8 -128, i8 -128, i8 -128, i8 -128, i8 -128, i8 -128, i8 -128>
+  %v7 = sub <64 x i8> zeroinitializer, %v6
+  store <64 x i8> %v7, <64 x i8>* undef, align 64
+  br i1 false, label %b8, label %b9
+
+b8:                                               ; preds = %b1
+  ret void
+
+b9:                                               ; preds = %b1
+  br label %b1
+}
+
+attributes #0 = { nounwind "target-cpu"="hexagonv60" "target-features"="+hvx-length64b,+hvxv60" }
--- a/llvm/test/CodeGen/Hexagon/autohvx/isel-vsplat-pair.ll
+++ b/llvm/test/CodeGen/Hexagon/autohvx/isel-vsplat-pair.ll
@ -0,0 +1,22 @@
+; RUN: llc -march=hexagon < %s | FileCheck %s
+
+; Check that this compiles successfully.
+; CHECK: vsplat
+
+target datalayout = "e-m:e-p:32:32:32-a:0-n16:32-i64:64:64-i32:32:32-i16:16:16-i1:8:8-f32:32:32-f64:64:64-v32:32:32-v64:64:64-v512:512:512-v1024:1024:1024-v2048:2048:2048"
+target triple = "hexagon"
+
+define void @fred() #0 {
+b0:
+  %v1 = load <64 x i8>, <64 x i8>* undef, align 8
+  %v2 = zext <64 x i8> %v1 to <64 x i32>
+  %v3 = add nuw nsw <64 x i32> %v2, zeroinitializer
+  %v4 = icmp ugt <64 x i32> %v3, <i32 254, i32 254, i32 254, i32 254, i32 254, i32 254, i32 254, i32 254, i32 254, i32 254, i32 254, i32 254, i32 254, i32 254, i32 254, i32 254, i32 254, i32 254, i32 254, i32 254, i32 254, i32 254, i32 254, i32 254, i32 254, i32 254, i32 254, i32 254, i32 254, i32 254, i32 254, i32 254, i32 254, i32 254, i32 254, i32 254, i32 254, i32 254, i32 254, i32 254, i32 254, i32 254, i32 254, i32 254, i32 254, i32 254, i32 254, i32 254, i32 254, i32 254, i32 254, i32 254, i32 254, i32 254, i32 254, i32 254, i32 254, i32 254, i32 254, i32 254, i32 254, i32 254, i32 254, i32 254>
+  %v5 = zext <64 x i1> %v4 to <64 x i32>
+  %v6 = add nuw nsw <64 x i32> %v3, %v5
+  %v7 = trunc <64 x i32> %v6 to <64 x i8>
+  store <64 x i8> %v7, <64 x i8>* undef, align 8
+  ret void
+}
+
+attributes #0 = { nounwind "target-cpu"="hexagonv60" "target-features"="+hvx-length64b,+hvxv60" }