[Hexagon] Handle HVX/FP shuffles, insertion and extraction

Co-authored-by: Anirudh Sundar Subramaniam <quic_sanirudh@quicinc.com>
2021-12-30 08:40:49 -08:00 · 2021-12-30 08:40:49 -08:00 · 23423638cc
parent 95c7dd8810
commit 23423638cc
2 changed files with 38 additions and 0 deletions
--- a/llvm/lib/Target/Hexagon/HexagonISelLoweringHVX.cpp
+++ b/llvm/lib/Target/Hexagon/HexagonISelLoweringHVX.cpp
@ -91,15 +91,26 @@ HexagonTargetLowering::initializeHVXLowering() {

  if (Subtarget.useHVX128BOps() && Subtarget.useHVXV68Ops() &&
      Subtarget.useHVXFloatingPoint()) {
+    setOperationAction(ISD::INSERT_SUBVECTOR,  MVT::v64f16, Custom);
+    setOperationAction(ISD::EXTRACT_SUBVECTOR, MVT::v64f16, Custom);
+    setOperationAction(ISD::INSERT_SUBVECTOR,  MVT::v32f32, Custom);
+    setOperationAction(ISD::EXTRACT_SUBVECTOR, MVT::v32f32, Custom);
+
    // Handle ISD::BUILD_VECTOR for v32f32 in a custom way to generate vsplat
    setOperationAction(ISD::BUILD_VECTOR, MVT::v32f32, Custom);

    // BUILD_VECTOR with f16 operands cannot be promoted without
    // promoting the result, so lower the node to vsplat or constant pool
    setOperationAction(ISD::BUILD_VECTOR,      MVT::f16,    Custom);
+    setOperationAction(ISD::INSERT_VECTOR_ELT, MVT::f16,    Custom);
    setOperationAction(ISD::SPLAT_VECTOR,      MVT::f16,    Custom);
    setOperationAction(ISD::SPLAT_VECTOR,      MVT::v64f16, Legal);
    setOperationAction(ISD::SPLAT_VECTOR,      MVT::v32f32, Legal);
+    // Vector shuffle is always promoted to ByteV and a bitcast to f16 is
+    // generated.
+    setPromoteTo(ISD::VECTOR_SHUFFLE, MVT::v64f16, ByteV);
+    setPromoteTo(ISD::VECTOR_SHUFFLE, MVT::v64f32, ByteW);
+    setPromoteTo(ISD::VECTOR_SHUFFLE, MVT::v32f32, ByteV);

    // Custom-lower BUILD_VECTOR for vector pairs. The standard (target-
    // independent) handling of it would convert it to a load, which is
@ -1483,6 +1494,7 @@ SDValue
 HexagonTargetLowering::LowerHvxInsertElement(SDValue Op, SelectionDAG &DAG)
      const {
  const SDLoc &dl(Op);
+  MVT VecTy = ty(Op);
  SDValue VecV = Op.getOperand(0);
  SDValue ValV = Op.getOperand(1);
  SDValue IdxV = Op.getOperand(2);
@ -1490,6 +1502,14 @@ HexagonTargetLowering::LowerHvxInsertElement(SDValue Op, SelectionDAG &DAG)
  if (ElemTy == MVT::i1)
    return insertHvxElementPred(VecV, IdxV, ValV, dl, DAG);

+  if (ElemTy == MVT::f16) {
+    SDValue T0 = DAG.getNode(ISD::INSERT_VECTOR_ELT, dl,
+        tyVector(VecTy, MVT::i16),
+        DAG.getBitcast(tyVector(VecTy, MVT::i16), VecV),
+        DAG.getBitcast(MVT::i16, ValV), IdxV);
+    return DAG.getBitcast(tyVector(VecTy, MVT::f16), T0);
+  }
+
  return insertHvxElementReg(VecV, IdxV, ValV, dl, DAG);
 }

--- a/llvm/test/CodeGen/Hexagon/autohvx/hfinsert.ll
+++ b/llvm/test/CodeGen/Hexagon/autohvx/hfinsert.ll
@ -0,0 +1,18 @@
+; RUN: llc -march=hexagon < %s | FileCheck %s
+
+; Check that we generate a proper vinsert instruction for f16 types.
+; CHECK: vinsert
+target datalayout = "e-m:e-p:32:32:32-a:0-n16:32-i64:64:64-i32:32:32-i16:16:16-i1:8:8-f32:32:32-f64:64:64-v32:32:32-v64:64:64-v512:512:512-v1024:1024:1024-v2048:2048:2048"
+target triple = "hexagon"
+
+define half* @fred(half* %v0) local_unnamed_addr #0 {
+b0:
+  %t1 = bitcast half* %v0 to <64 x half>*
+  %v1 = load <64 x half>, <64 x half>* %t1, align 2
+  %v2 = insertelement <64 x half> %v1, half 0xH4170, i32 17
+  store <64 x half> %v2, <64 x half>* %t1, align 2
+  %t2 = bitcast <64 x half>* %t1 to half*
+  ret half* %t2
+}
+
+attributes #0 = { norecurse nounwind "correctly-rounded-divide-sqrt-fp-math"="false" "disable-tail-calls"="false" "less-precise-fpmad"="false" "min-legal-vector-width"="0" "no-frame-pointer-elim"="true" "no-frame-pointer-elim-non-leaf" "no-infs-fp-math"="false" "no-jump-tables"="false" "no-nans-fp-math"="false" "no-signed-zeros-fp-math"="false" "no-trapping-math"="false" "stack-protector-buffer-size"="8" "target-cpu"="hexagonv69" "target-features"="+hvx-length128b,+hvxv69,+v69,+hvx-qfloat,-long-calls" "unsafe-fp-math"="false" "use-soft-float"="false" }