[WebAssembly] Simplify extract_vector lowering

Summary: Removes patterns that were not doing useful work, changes the default extract instructions to be the unsigned versions now that they are enabled by default, fixes PR44988, and adds tests for sext_inreg lowering. Reviewers: aheejin Reviewed By: aheejin Subscribers: dschuff, sbc100, jgravelle-google, hiraditya, sunfish, llvm-commits Tags: #llvm Differential Revision: https://reviews.llvm.org/D75005
2020-02-21 17:54:47 -08:00 · 2020-02-21 17:54:47 -08:00 · 0906dca493
parent 9c54f6154f
commit 0906dca493
5 changed files with 181 additions and 133 deletions
--- a/llvm/lib/Target/WebAssembly/WebAssemblyISelLowering.cpp
+++ b/llvm/lib/Target/WebAssembly/WebAssemblyISelLowering.cpp
@ -1314,15 +1314,20 @@ WebAssemblyTargetLowering::LowerSIGN_EXTEND_INREG(SDValue Op,
                                                  SelectionDAG &DAG) const {
  SDLoc DL(Op);
  // If sign extension operations are disabled, allow sext_inreg only if operand
-  // is a vector extract. SIMD does not depend on sign extension operations, but
-  // allowing sext_inreg in this context lets us have simple patterns to select
-  // extract_lane_s instructions. Expanding sext_inreg everywhere would be
-  // simpler in this file, but would necessitate large and brittle patterns to
-  // undo the expansion and select extract_lane_s instructions.
+  // is a vector extract of an i8 or i16 lane. SIMD does not depend on sign
+  // extension operations, but allowing sext_inreg in this context lets us have
+  // simple patterns to select extract_lane_s instructions. Expanding sext_inreg
+  // everywhere would be simpler in this file, but would necessitate large and
+  // brittle patterns to undo the expansion and select extract_lane_s
+  // instructions.
  assert(!Subtarget->hasSignExt() && Subtarget->hasSIMD128());
-  if (Op.getOperand(0).getOpcode() == ISD::EXTRACT_VECTOR_ELT) {
+  if (Op.getOperand(0).getOpcode() != ISD::EXTRACT_VECTOR_ELT)
+    return SDValue();
+
  const SDValue &Extract = Op.getOperand(0);
  MVT VecT = Extract.getOperand(0).getSimpleValueType();
+  if (VecT.getVectorElementType().getSizeInBits() > 32)
+    return SDValue();
  MVT ExtractedLaneT = static_cast<VTSDNode *>(Op.getOperand(1).getNode())
                           ->getVT()
                           .getSimpleVT();
@ -1330,6 +1335,7 @@ WebAssemblyTargetLowering::LowerSIGN_EXTEND_INREG(SDValue Op,
      MVT::getVectorVT(ExtractedLaneT, 128 / ExtractedLaneT.getSizeInBits());
  if (ExtractedVecT == VecT)
    return Op;
+
  // Bitcast vector to appropriate type to ensure ISel pattern coverage
  const SDValue &Index = Extract.getOperand(1);
  unsigned IndexVal =
@ -1342,11 +1348,8 @@ WebAssemblyTargetLowering::LowerSIGN_EXTEND_INREG(SDValue Op,
  SDValue NewExtract = DAG.getNode(
      ISD::EXTRACT_VECTOR_ELT, DL, Extract.getValueType(),
      DAG.getBitcast(ExtractedVecT, Extract.getOperand(0)), NewIndex);
-    return DAG.getNode(ISD::SIGN_EXTEND_INREG, DL, Op.getValueType(),
-                       NewExtract, Op.getOperand(1));
-  }
-  // Otherwise expand
-  return SDValue();
+  return DAG.getNode(ISD::SIGN_EXTEND_INREG, DL, Op.getValueType(), NewExtract,
+                     Op.getOperand(1));
 }

 SDValue WebAssemblyTargetLowering::LowerBUILD_VECTOR(SDValue Op,
--- a/llvm/lib/Target/WebAssembly/WebAssemblyInstrSIMD.td
+++ b/llvm/lib/Target/WebAssembly/WebAssemblyInstrSIMD.td
@ -330,81 +330,49 @@ def : ScalarSplatPat<v2f64, f64, F64>;
 //===----------------------------------------------------------------------===//

 // Extract lane as a scalar: extract_lane / extract_lane_s / extract_lane_u
-multiclass ExtractLane<ValueType vec_t, string vec, ImmLeaf imm_t,
-                       WebAssemblyRegClass reg_t, bits<32> simdop,
-                       string suffix = "", SDNode extract = vector_extract> {
+multiclass ExtractLane<ValueType vec_t, string vec, WebAssemblyRegClass reg_t,
+                       bits<32> simdop, string suffix = ""> {
  defm EXTRACT_LANE_#vec_t#suffix :
      SIMD_I<(outs reg_t:$dst), (ins V128:$vec, vec_i8imm_op:$idx),
-             (outs), (ins vec_i8imm_op:$idx),
-             [(set reg_t:$dst, (extract (vec_t V128:$vec), (i32 imm_t:$idx)))],
+             (outs), (ins vec_i8imm_op:$idx), [],
             vec#".extract_lane"#suffix#"\t$dst, $vec, $idx",
             vec#".extract_lane"#suffix#"\t$idx", simdop>;
 }

-multiclass ExtractPat<ValueType lane_t, int mask> {
-  def _s : PatFrag<(ops node:$vec, node:$idx),
-                   (i32 (sext_inreg
-                     (i32 (vector_extract
-                       node:$vec,
-                       node:$idx
-                     )),
-                     lane_t
-                   ))>;
-  def _u : PatFrag<(ops node:$vec, node:$idx),
-                   (i32 (and
-                     (i32 (vector_extract
-                       node:$vec,
-                       node:$idx
-                     )),
-                     (i32 mask)
-                   ))>;
-}
+defm "" : ExtractLane<v16i8, "i8x16", I32, 5, "_s">;
+defm "" : ExtractLane<v16i8, "i8x16", I32, 6, "_u">;
+defm "" : ExtractLane<v8i16, "i16x8", I32, 9, "_s">;
+defm "" : ExtractLane<v8i16, "i16x8", I32, 10, "_u">;
+defm "" : ExtractLane<v4i32, "i32x4", I32, 13>;
+defm "" : ExtractLane<v2i64, "i64x2", I64, 16>;
+defm "" : ExtractLane<v4f32, "f32x4", F32, 19>;
+defm "" : ExtractLane<v2f64, "f64x2", F64, 22>;

-defm extract_i8x16 : ExtractPat<i8, 0xff>;
-defm extract_i16x8 : ExtractPat<i16, 0xffff>;
+def : Pat<(vector_extract (v16i8 V128:$vec), (i32 LaneIdx16:$idx)),
+          (EXTRACT_LANE_v16i8_u V128:$vec, imm:$idx)>;
+def : Pat<(vector_extract (v8i16 V128:$vec), (i32 LaneIdx8:$idx)),
+          (EXTRACT_LANE_v8i16_u V128:$vec, imm:$idx)>;
+def : Pat<(vector_extract (v4i32 V128:$vec), (i32 LaneIdx4:$idx)),
+          (EXTRACT_LANE_v4i32 V128:$vec, imm:$idx)>;
+def : Pat<(vector_extract (v4f32 V128:$vec), (i32 LaneIdx4:$idx)),
+          (EXTRACT_LANE_v4f32 V128:$vec, imm:$idx)>;
+def : Pat<(vector_extract (v2i64 V128:$vec), (i32 LaneIdx2:$idx)),
+          (EXTRACT_LANE_v2i64 V128:$vec, imm:$idx)>;
+def : Pat<(vector_extract (v2f64 V128:$vec), (i32 LaneIdx2:$idx)),
+          (EXTRACT_LANE_v2f64 V128:$vec, imm:$idx)>;

-multiclass ExtractLaneExtended<string sign, bits<32> baseInst> {
-  defm "" : ExtractLane<v16i8, "i8x16", LaneIdx16, I32, baseInst, sign,
-                        !cast<PatFrag>("extract_i8x16"#sign)>;
-  defm "" : ExtractLane<v8i16, "i16x8", LaneIdx8, I32, !add(baseInst, 4), sign,
-                        !cast<PatFrag>("extract_i16x8"#sign)>;
-}
-
-defm "" : ExtractLaneExtended<"_s", 5>;
-defm "" : ExtractLaneExtended<"_u", 6>;
-defm "" : ExtractLane<v4i32, "i32x4", LaneIdx4, I32, 13>;
-defm "" : ExtractLane<v2i64, "i64x2", LaneIdx2, I64, 16>;
-defm "" : ExtractLane<v4f32, "f32x4", LaneIdx4, F32, 19>;
-defm "" : ExtractLane<v2f64, "f64x2", LaneIdx2, F64, 22>;
-
-// It would be more conventional to use unsigned extracts, but v8
-// doesn't implement them yet
-def : Pat<(i32 (vector_extract (v16i8 V128:$vec), (i32 LaneIdx16:$idx))),
-          (EXTRACT_LANE_v16i8_s V128:$vec, (i32 LaneIdx16:$idx))>;
-def : Pat<(i32 (vector_extract (v8i16 V128:$vec), (i32 LaneIdx8:$idx))),
-          (EXTRACT_LANE_v8i16_s V128:$vec, (i32 LaneIdx8:$idx))>;
-
-// Lower undef lane indices to zero
-def : Pat<(and (i32 (vector_extract (v16i8 V128:$vec), undef)), (i32 0xff)),
-          (EXTRACT_LANE_v16i8_u V128:$vec, 0)>;
-def : Pat<(and (i32 (vector_extract (v8i16 V128:$vec), undef)), (i32 0xffff)),
-          (EXTRACT_LANE_v8i16_u V128:$vec, 0)>;
-def : Pat<(i32 (vector_extract (v16i8 V128:$vec), undef)),
-          (EXTRACT_LANE_v16i8_u V128:$vec, 0)>;
-def : Pat<(i32 (vector_extract (v8i16 V128:$vec), undef)),
-          (EXTRACT_LANE_v8i16_u V128:$vec, 0)>;
-def : Pat<(sext_inreg (i32 (vector_extract (v16i8 V128:$vec), undef)), i8),
-          (EXTRACT_LANE_v16i8_s V128:$vec, 0)>;
-def : Pat<(sext_inreg (i32 (vector_extract (v8i16 V128:$vec), undef)), i16),
-          (EXTRACT_LANE_v8i16_s V128:$vec, 0)>;
-def : Pat<(vector_extract (v4i32 V128:$vec), undef),
-          (EXTRACT_LANE_v4i32 V128:$vec, 0)>;
-def : Pat<(vector_extract (v2i64 V128:$vec), undef),
-          (EXTRACT_LANE_v2i64 V128:$vec, 0)>;
-def : Pat<(vector_extract (v4f32 V128:$vec), undef),
-          (EXTRACT_LANE_v4f32 V128:$vec, 0)>;
-def : Pat<(vector_extract (v2f64 V128:$vec), undef),
-          (EXTRACT_LANE_v2f64 V128:$vec, 0)>;
+def : Pat<
+  (sext_inreg (vector_extract (v16i8 V128:$vec), (i32 LaneIdx16:$idx)), i8),
+  (EXTRACT_LANE_v16i8_s V128:$vec, imm:$idx)>;
+def : Pat<
+  (and (vector_extract (v16i8 V128:$vec), (i32 LaneIdx16:$idx)), (i32 0xff)),
+  (EXTRACT_LANE_v16i8_u V128:$vec, imm:$idx)>;
+def : Pat<
+  (sext_inreg (vector_extract (v8i16 V128:$vec), (i32 LaneIdx8:$idx)), i16),
+  (EXTRACT_LANE_v8i16_s V128:$vec, imm:$idx)>;
+def : Pat<
+  (and (vector_extract (v8i16 V128:$vec), (i32 LaneIdx8:$idx)), (i32 0xffff)),
+  (EXTRACT_LANE_v8i16_u V128:$vec, imm:$idx)>;

 // Replace lane value: replace_lane
 multiclass ReplaceLane<ValueType vec_t, string vec, ImmLeaf imm_t,
--- a/llvm/test/CodeGen/WebAssembly/simd-arith.ll
+++ b/llvm/test/CodeGen/WebAssembly/simd-arith.ll
@ -160,15 +160,15 @@ define <16 x i8> @shl_const_v16i8(<16 x i8> %v) {
 ; CHECK-LABEL: shl_vec_v16i8:
 ; NO-SIMD128-NOT: i8x16
 ; SIMD128-NEXT: .functype shl_vec_v16i8 (v128, v128) -> (v128){{$}}
-; SIMD128-NEXT: i8x16.extract_lane_s $push[[L0:[0-9]+]]=, $0, 0{{$}}
-; SIMD128-NEXT: i8x16.extract_lane_s $push[[L1:[0-9]+]]=, $1, 0{{$}}
+; SIMD128-NEXT: i8x16.extract_lane_u $push[[L0:[0-9]+]]=, $0, 0{{$}}
+; SIMD128-NEXT: i8x16.extract_lane_u $push[[L1:[0-9]+]]=, $1, 0{{$}}
 ; SIMD128-NEXT: i32.const $push[[M0:[0-9]+]]=, 7{{$}}
 ; SIMD128-NEXT: i32.and $push[[M1:[0-9]+]]=, $pop[[L1]], $pop[[M0]]{{$}}
 ; SIMD128-NEXT: i32.shl $push[[M2:[0-9]+]]=, $pop[[L0]], $pop[[M1]]
 ; SIMD128-NEXT: i8x16.splat $push[[M3:[0-9]+]]=, $pop[[M2]]
 ; Skip 14 lanes
-; SIMD128:      i8x16.extract_lane_s $push[[L4:[0-9]+]]=, $0, 15{{$}}
-; SIMD128-NEXT: i8x16.extract_lane_s $push[[L5:[0-9]+]]=, $1, 15{{$}}
+; SIMD128:      i8x16.extract_lane_u $push[[L4:[0-9]+]]=, $0, 15{{$}}
+; SIMD128-NEXT: i8x16.extract_lane_u $push[[L5:[0-9]+]]=, $1, 15{{$}}
 ; SIMD128-NEXT: i32.const $push[[M4:[0-9]+]]=, 7{{$}}
 ; SIMD128-NEXT: i32.and $push[[M5:[0-9]+]]=, $pop[[L5]], $pop[[M4]]{{$}}
 ; SIMD128-NEXT: i32.shl $push[[M6:[0-9]+]]=, $pop[[L4]], $pop[[M5]]{{$}}
@ -197,14 +197,14 @@ define <16 x i8> @shr_s_v16i8(<16 x i8> %v, i8 %x) {
 ; NO-SIMD128-NOT: i8x16
 ; SIMD128-NEXT: .functype shr_s_vec_v16i8 (v128, v128) -> (v128){{$}}
 ; SIMD128-NEXT: i8x16.extract_lane_s $push[[L0:[0-9]+]]=, $0, 0{{$}}
-; SIMD128-NEXT: i8x16.extract_lane_s $push[[L1:[0-9]+]]=, $1, 0{{$}}
+; SIMD128-NEXT: i8x16.extract_lane_u $push[[L1:[0-9]+]]=, $1, 0{{$}}
 ; SIMD128-NEXT: i32.const $push[[M0:[0-9]+]]=, 7{{$}}
 ; SIMD128-NEXT: i32.and $push[[M1:[0-9]+]]=, $pop[[L1]], $pop[[M0]]{{$}}
 ; SIMD128-NEXT: i32.shr_s $push[[M2:[0-9]+]]=, $pop[[L0]], $pop[[M1]]
 ; SIMD128-NEXT: i8x16.splat $push[[M3:[0-9]+]]=, $pop[[M2]]
 ; Skip 14 lanes
 ; SIMD128:      i8x16.extract_lane_s $push[[L4:[0-9]+]]=, $0, 15{{$}}
-; SIMD128-NEXT: i8x16.extract_lane_s $push[[L5:[0-9]+]]=, $1, 15{{$}}
+; SIMD128-NEXT: i8x16.extract_lane_u $push[[L5:[0-9]+]]=, $1, 15{{$}}
 ; SIMD128-NEXT: i32.const $push[[M4:[0-9]+]]=, 7{{$}}
 ; SIMD128-NEXT: i32.and $push[[M5:[0-9]+]]=, $pop[[L5]], $pop[[M4]]{{$}}
 ; SIMD128-NEXT: i32.shr_s $push[[M6:[0-9]+]]=, $pop[[L4]], $pop[[M5]]{{$}}
@ -233,14 +233,14 @@ define <16 x i8> @shr_u_v16i8(<16 x i8> %v, i8 %x) {
 ; NO-SIMD128-NOT: i8x16
 ; SIMD128-NEXT: .functype shr_u_vec_v16i8 (v128, v128) -> (v128){{$}}
 ; SIMD128-NEXT: i8x16.extract_lane_u $push[[L0:[0-9]+]]=, $0, 0{{$}}
-; SIMD128-NEXT: i8x16.extract_lane_s $push[[L1:[0-9]+]]=, $1, 0{{$}}
+; SIMD128-NEXT: i8x16.extract_lane_u $push[[L1:[0-9]+]]=, $1, 0{{$}}
 ; SIMD128-NEXT: i32.const $push[[M0:[0-9]+]]=, 7{{$}}
 ; SIMD128-NEXT: i32.and $push[[M1:[0-9]+]]=, $pop[[L1]], $pop[[M0]]{{$}}
 ; SIMD128-NEXT: i32.shr_u $push[[M2:[0-9]+]]=, $pop[[L0]], $pop[[M1]]
 ; SIMD128-NEXT: i8x16.splat $push[[M3:[0-9]+]]=, $pop[[M2]]
 ; Skip 14 lanes
 ; SIMD128:      i8x16.extract_lane_u $push[[L4:[0-9]+]]=, $0, 15{{$}}
-; SIMD128-NEXT: i8x16.extract_lane_s $push[[L5:[0-9]+]]=, $1, 15{{$}}
+; SIMD128-NEXT: i8x16.extract_lane_u $push[[L5:[0-9]+]]=, $1, 15{{$}}
 ; SIMD128-NEXT: i32.const $push[[M4:[0-9]+]]=, 7{{$}}
 ; SIMD128-NEXT: i32.and $push[[M5:[0-9]+]]=, $pop[[L5]], $pop[[M4]]{{$}}
 ; SIMD128-NEXT: i32.shr_u $push[[M6:[0-9]+]]=, $pop[[L4]], $pop[[M5]]{{$}}
@ -470,15 +470,15 @@ define <8 x i16> @shl_const_v8i16(<8 x i16> %v) {
 ; CHECK-LABEL: shl_vec_v8i16:
 ; NO-SIMD128-NOT: i16x8
 ; SIMD128-NEXT: .functype shl_vec_v8i16 (v128, v128) -> (v128){{$}}
-; SIMD128-NEXT: i16x8.extract_lane_s $push[[L0:[0-9]+]]=, $0, 0{{$}}
-; SIMD128-NEXT: i16x8.extract_lane_s $push[[L1:[0-9]+]]=, $1, 0{{$}}
+; SIMD128-NEXT: i16x8.extract_lane_u $push[[L0:[0-9]+]]=, $0, 0{{$}}
+; SIMD128-NEXT: i16x8.extract_lane_u $push[[L1:[0-9]+]]=, $1, 0{{$}}
 ; SIMD128-NEXT: i32.const $push[[M0:[0-9]+]]=, 15{{$}}
 ; SIMD128-NEXT: i32.and $push[[M1:[0-9]+]]=, $pop[[L1]], $pop[[M0]]{{$}}
 ; SIMD128-NEXT: i32.shl $push[[M2:[0-9]+]]=, $pop[[L0]], $pop[[M1]]{{$}}
 ; SIMD128-NEXT: i16x8.splat $push[[M3:[0-9]+]]=, $pop[[M2]]{{$}}
 ; Skip 6 lanes
-; SIMD128:      i16x8.extract_lane_s $push[[L4:[0-9]+]]=, $0, 7{{$}}
-; SIMD128-NEXT: i16x8.extract_lane_s $push[[L5:[0-9]+]]=, $1, 7{{$}}
+; SIMD128:      i16x8.extract_lane_u $push[[L4:[0-9]+]]=, $0, 7{{$}}
+; SIMD128-NEXT: i16x8.extract_lane_u $push[[L5:[0-9]+]]=, $1, 7{{$}}
 ; SIMD128-NEXT: i32.const $push[[M4:[0-9]+]]=, 15{{$}}
 ; SIMD128-NEXT: i32.and $push[[M5:[0-9]+]]=, $pop[[L5]], $pop[[M4]]{{$}}
 ; SIMD128-NEXT: i32.shl $push[[M6:[0-9]+]]=, $pop[[L4]], $pop[[M5]]{{$}}
@ -506,14 +506,14 @@ define <8 x i16> @shr_s_v8i16(<8 x i16> %v, i16 %x) {
 ; NO-SIMD128-NOT: i16x8
 ; SIMD128-NEXT: .functype shr_s_vec_v8i16 (v128, v128) -> (v128){{$}}
 ; SIMD128-NEXT: i16x8.extract_lane_s $push[[L0:[0-9]+]]=, $0, 0{{$}}
-; SIMD128-NEXT: i16x8.extract_lane_s $push[[L1:[0-9]+]]=, $1, 0{{$}}
+; SIMD128-NEXT: i16x8.extract_lane_u $push[[L1:[0-9]+]]=, $1, 0{{$}}
 ; SIMD128-NEXT: i32.const $push[[M0:[0-9]+]]=, 15{{$}}
 ; SIMD128-NEXT: i32.and $push[[M1:[0-9]+]]=, $pop[[L1]], $pop[[M0]]{{$}}
 ; SIMD128-NEXT: i32.shr_s $push[[M2:[0-9]+]]=, $pop[[L0]], $pop[[M1]]{{$}}
 ; SIMD128-NEXT: i16x8.splat $push[[M3:[0-9]+]]=, $pop[[M2]]{{$}}
 ; Skip 6 lanes
 ; SIMD128:      i16x8.extract_lane_s $push[[L4:[0-9]+]]=, $0, 7{{$}}
-; SIMD128-NEXT: i16x8.extract_lane_s $push[[L5:[0-9]+]]=, $1, 7{{$}}
+; SIMD128-NEXT: i16x8.extract_lane_u $push[[L5:[0-9]+]]=, $1, 7{{$}}
 ; SIMD128-NEXT: i32.const $push[[M4:[0-9]+]]=, 15{{$}}
 ; SIMD128-NEXT: i32.and $push[[M5:[0-9]+]]=, $pop[[L5]], $pop[[M4]]{{$}}
 ; SIMD128-NEXT: i32.shr_s $push[[M6:[0-9]+]]=, $pop[[L4]], $pop[[M5]]{{$}}
@ -541,14 +541,14 @@ define <8 x i16> @shr_u_v8i16(<8 x i16> %v, i16 %x) {
 ; NO-SIMD128-NOT: i16x8
 ; SIMD128-NEXT: .functype shr_u_vec_v8i16 (v128, v128) -> (v128){{$}}
 ; SIMD128-NEXT: i16x8.extract_lane_u $push[[L0:[0-9]+]]=, $0, 0{{$}}
-; SIMD128-NEXT: i16x8.extract_lane_s $push[[L1:[0-9]+]]=, $1, 0{{$}}
+; SIMD128-NEXT: i16x8.extract_lane_u $push[[L1:[0-9]+]]=, $1, 0{{$}}
 ; SIMD128-NEXT: i32.const $push[[M0:[0-9]+]]=, 15{{$}}
 ; SIMD128-NEXT: i32.and $push[[M1:[0-9]+]]=, $pop[[L1]], $pop[[M0]]{{$}}
 ; SIMD128-NEXT: i32.shr_u $push[[M2:[0-9]+]]=, $pop[[L0]], $pop[[M1]]{{$}}
 ; SIMD128-NEXT: i16x8.splat $push[[M3:[0-9]+]]=, $pop[[M2]]{{$}}
 ; Skip 6 lanes
 ; SIMD128:      i16x8.extract_lane_u $push[[L4:[0-9]+]]=, $0, 7{{$}}
-; SIMD128-NEXT: i16x8.extract_lane_s $push[[L5:[0-9]+]]=, $1, 7{{$}}
+; SIMD128-NEXT: i16x8.extract_lane_u $push[[L5:[0-9]+]]=, $1, 7{{$}}
 ; SIMD128-NEXT: i32.const $push[[M4:[0-9]+]]=, 15{{$}}
 ; SIMD128-NEXT: i32.and $push[[M5:[0-9]+]]=, $pop[[L5]], $pop[[M4]]{{$}}
 ; SIMD128-NEXT: i32.shr_u $push[[M6:[0-9]+]]=, $pop[[L4]], $pop[[M5]]{{$}}
--- a/llvm/test/CodeGen/WebAssembly/simd-sext-inreg.ll
+++ b/llvm/test/CodeGen/WebAssembly/simd-sext-inreg.ll
@ -1,5 +1,4 @@
 ; RUN: llc < %s -asm-verbose=false -verify-machineinstrs -wasm-keep-registers -disable-wasm-fallthrough-return-opt -wasm-disable-explicit-locals -mattr=+unimplemented-simd128 | FileCheck %s --check-prefixes CHECK,SIMD128
-; RUN: llc < %s -asm-verbose=false -verify-machineinstrs -wasm-keep-registers -disable-wasm-fallthrough-return-opt -wasm-disable-explicit-locals -mattr=+simd128 | FileCheck %s --check-prefixes CHECK,SIMD128-VM
 ; RUN: llc < %s -asm-verbose=false -verify-machineinstrs -wasm-keep-registers -disable-wasm-fallthrough-return-opt -wasm-disable-explicit-locals | FileCheck %s --check-prefixes CHECK,NO-SIMD128

 ; Test that vector sign extensions lower to shifts
@ -7,55 +6,133 @@
 target datalayout = "e-m:e-p:32:32-i64:64-n32:64-S128"
 target triple = "wasm32-unknown-unknown"

-; CHECK-LABEL: sext_inreg_v16i8:
+; CHECK-LABEL: sext_v16i8:
 ; NO-SIMD128-NOT: i8x16
-; SIMD128-NEXT: .functype sext_inreg_v16i8 (v128) -> (v128){{$}}
+; SIMD128-NEXT: .functype sext_v16i8 (v128) -> (v128){{$}}
 ; SIMD128-NEXT: i32.const $push[[T0:[0-9]+]]=, 7{{$}}
 ; SIMD128-NEXT: i8x16.shl $push[[T1:[0-9]+]]=, $0, $pop[[T0]]{{$}}
 ; SIMD128-NEXT: i32.const $push[[T2:[0-9]+]]=, 7{{$}}
 ; SIMD128-NEXT: i8x16.shr_s $push[[R:[0-9]+]]=, $pop[[T1]], $pop[[T2]]{{$}}
 ; SIMD128-NEXT: return $pop[[R]]{{$}}
-define <16 x i8> @sext_inreg_v16i8(<16 x i1> %x) {
+define <16 x i8> @sext_v16i8(<16 x i1> %x) {
  %res = sext <16 x i1> %x to <16 x i8>
  ret <16 x i8> %res
 }

-; CHECK-LABEL: sext_inreg_v8i16:
+; CHECK-LABEL: sext_v8i16:
 ; NO-SIMD128-NOT: i16x8
-; SIMD128-NEXT: .functype sext_inreg_v8i16 (v128) -> (v128){{$}}
+; SIMD128-NEXT: .functype sext_v8i16 (v128) -> (v128){{$}}
 ; SIMD128-NEXT: i32.const $push[[T0:[0-9]+]]=, 15{{$}}
 ; SIMD128-NEXT: i16x8.shl $push[[T1:[0-9]+]]=, $0, $pop[[T0]]{{$}}
 ; SIMD128-NEXT: i32.const $push[[T2:[0-9]+]]=, 15{{$}}
 ; SIMD128-NEXT: i16x8.shr_s $push[[R:[0-9]+]]=, $pop[[T1]], $pop[[T2]]{{$}}
 ; SIMD128-NEXT: return $pop[[R]]{{$}}
-define <8 x i16> @sext_inreg_v8i16(<8 x i1> %x) {
+define <8 x i16> @sext_v8i16(<8 x i1> %x) {
  %res = sext <8 x i1> %x to <8 x i16>
  ret <8 x i16> %res
 }

-; CHECK-LABEL: sext_inreg_v4i32:
+; CHECK-LABEL: sext_v4i32:
 ; NO-SIMD128-NOT: i32x4
-; SIMD128-NEXT: .functype sext_inreg_v4i32 (v128) -> (v128){{$}}
+; SIMD128-NEXT: .functype sext_v4i32 (v128) -> (v128){{$}}
 ; SIMD128-NEXT: i32.const $push[[T0:[0-9]+]]=, 31{{$}}
 ; SIMD128-NEXT: i32x4.shl $push[[T1:[0-9]+]]=, $0, $pop[[T0]]{{$}}
 ; SIMD128-NEXT: i32.const $push[[T2:[0-9]+]]=, 31{{$}}
 ; SIMD128-NEXT: i32x4.shr_s $push[[R:[0-9]+]]=, $pop[[T1]], $pop[[T2]]{{$}}
 ; SIMD128-NEXT: return $pop[[R]]{{$}}
-define <4 x i32> @sext_inreg_v4i32(<4 x i1> %x) {
+define <4 x i32> @sext_v4i32(<4 x i1> %x) {
  %res = sext <4 x i1> %x to <4 x i32>
  ret <4 x i32> %res
 }

-; CHECK-LABEL: sext_inreg_v2i64:
+; CHECK-LABEL: sext_v2i64:
 ; NO-SIMD128-NOT: i64x2
-; SDIM128-VM-NOT: i64x2
-; SIMD128-NEXT: .functype sext_inreg_v2i64 (v128) -> (v128){{$}}
+; SIMD128-NEXT: .functype sext_v2i64 (v128) -> (v128){{$}}
 ; SIMD128-NEXT: i32.const $push[[T0:[0-9]+]]=, 63{{$}}
 ; SIMD128-NEXT: i64x2.shl $push[[T1:[0-9]+]]=, $0, $pop[[T0]]{{$}}
 ; SIMD128-NEXT: i32.const $push[[T2:[0-9]+]]=, 63{{$}}
 ; SIMD128-NEXT: i64x2.shr_s $push[[R:[0-9]+]]=, $pop[[T1]], $pop[[T2]]{{$}}
 ; SIMD128-NEXT: return $pop[[R]]{{$}}
-define <2 x i64> @sext_inreg_v2i64(<2 x i1> %x) {
+define <2 x i64> @sext_v2i64(<2 x i1> %x) {
  %res = sext <2 x i1> %x to <2 x i64>
  ret <2 x i64> %res
 }
+
+; CHECK-LABEL: sext_inreg_i8_to_i16:
+; SIMD128-NEXT: .functype sext_inreg_i8_to_i16 (v128) -> (i32){{$}}
+; SIMD128-NEXT: i8x16.extract_lane_s $push[[R:[0-9]+]]=, $0, 2{{$}}
+; SIMD128-NEXT: return $pop[[R]]{{$}}
+define i16 @sext_inreg_i8_to_i16(<8 x i16> %x) {
+  %lane = extractelement <8 x i16> %x, i32 1
+  %a = shl i16 %lane, 8
+  %res = ashr i16 %a, 8
+  ret i16 %res
+}
+
+; CHECK-LABEL: sext_inreg_i8_to_i32:
+; SIMD128-NEXT: .functype sext_inreg_i8_to_i32 (v128) -> (i32){{$}}
+; SIMD128-NEXT: i8x16.extract_lane_s $push[[R:[0-9]+]]=, $0, 4{{$}}
+; SIMD128-NEXT: return $pop[[R]]{{$}}
+define i32 @sext_inreg_i8_to_i32(<4 x i32> %x) {
+  %lane = extractelement <4 x i32> %x, i32 1
+  %a = shl i32 %lane, 24
+  %res = ashr i32 %a, 24
+  ret i32 %res
+}
+
+; CHECK-LABEL: sext_inreg_i16_to_i32:
+; SIMD128-NEXT: .functype sext_inreg_i16_to_i32 (v128) -> (i32){{$}}
+; SIMD128-NEXT: i16x8.extract_lane_s $push[[R:[0-9]+]]=, $0, 2{{$}}
+; SIMD128-NEXT: return $pop[[R]]{{$}}
+define i32 @sext_inreg_i16_to_i32(<4 x i32> %x) {
+  %lane = extractelement <4 x i32> %x, i32 1
+  %a = shl i32 %lane, 16
+  %res = ashr i32 %a, 16
+  ret i32 %res
+}
+
+; CHECK-LABEL: sext_inreg_i8_to_i64:
+; SIMD128-NEXT: .functype sext_inreg_i8_to_i64 (v128) -> (i64){{$}}
+; SIMD128-NEXT: i64x2.extract_lane $push[[T0:[0-9]+]]=, $0, 1{{$}}
+; SIMD128-NEXT: i64.const $push[[T1:[0-9]+]]=, 56{{$}}
+; SIMD128-NEXT: i64.shl $push[[T2:[0-9]+]]=, $pop[[T0]], $pop[[T1]]{{$}}
+; SIMD128-NEXT: i64.const $push[[T3:[0-9]+]]=, 56{{$}}
+; SIMD128-NEXT: i64.shr_s $push[[R:[0-9]+]]=, $pop[[T2]], $pop[[T3]]{{$}}
+; SIMD128-NEXT: return $pop[[R]]{{$}}
+define i64 @sext_inreg_i8_to_i64(<2 x i64> %x) {
+  %lane = extractelement <2 x i64> %x, i32 1
+  %a = shl i64 %lane, 56
+  %res = ashr i64 %a, 56
+  ret i64 %res
+}
+
+; CHECK-LABEL: sext_inreg_i16_to_i64:
+; SIMD128-NEXT: .functype sext_inreg_i16_to_i64 (v128) -> (i64){{$}}
+; SIMD128-NEXT: i64x2.extract_lane $push[[T0:[0-9]+]]=, $0, 1{{$}}
+; SIMD128-NEXT: i64.const $push[[T1:[0-9]+]]=, 48{{$}}
+; SIMD128-NEXT: i64.shl $push[[T2:[0-9]+]]=, $pop[[T0]], $pop[[T1]]{{$}}
+; SIMD128-NEXT: i64.const $push[[T3:[0-9]+]]=, 48{{$}}
+; SIMD128-NEXT: i64.shr_s $push[[R:[0-9]+]]=, $pop[[T2]], $pop[[T3]]{{$}}
+; SIMD128-NEXT: return $pop[[R]]{{$}}
+define i64 @sext_inreg_i16_to_i64(<2 x i64> %x) {
+  %lane = extractelement <2 x i64> %x, i32 1
+  %a = shl i64 %lane, 48
+  %res = ashr i64 %a, 48
+  ret i64 %res
+}
+
+; CHECK-LABEL: sext_inreg_i32_to_i64:
+; NO-SIMD128-NOT: i64x2
+; SIMD128-NEXT: .functype sext_inreg_i32_to_i64 (v128) -> (i64){{$}}
+; SIMD128-NEXT: i64x2.extract_lane $push[[T0:[0-9]+]]=, $0, 1{{$}}
+; SIMD128-NEXT: i64.const $push[[T1:[0-9]+]]=, 32{{$}}
+; SIMD128-NEXT: i64.shl $push[[T2:[0-9]+]]=, $pop[[T0]], $pop[[T1]]{{$}}
+; SIMD128-NEXT: i64.const $push[[T3:[0-9]+]]=, 32{{$}}
+; SIMD128-NEXT: i64.shr_s $push[[R:[0-9]+]]=, $pop[[T2]], $pop[[T3]]{{$}}
+; SIMD128-NEXT: return $pop[[R]]{{$}}
+define i64 @sext_inreg_i32_to_i64(<2 x i64> %x) {
+  %lane = extractelement <2 x i64> %x, i32 1
+  %a = shl i64 %lane, 32
+  %res = ashr i64 %a, 32
+  ret i64 %res
+}
--- a/llvm/test/CodeGen/WebAssembly/simd.ll
+++ b/llvm/test/CodeGen/WebAssembly/simd.ll
@ -127,7 +127,7 @@ define i32 @extract_undef_v16i8_u(<16 x i8> %v) {
 ; CHECK-LABEL: extract_v16i8:
 ; NO-SIMD128-NOT: i8x16
 ; SIMD128-NEXT: .functype extract_v16i8 (v128) -> (i32){{$}}
-; SIMD128-NEXT: i8x16.extract_lane_s $push[[R:[0-9]+]]=, $0, 13{{$}}
+; SIMD128-NEXT: i8x16.extract_lane_u $push[[R:[0-9]+]]=, $0, 13{{$}}
 ; SIMD128-NEXT: return $pop[[R]]{{$}}
 define i8 @extract_v16i8(<16 x i8> %v) {
  %elem = extractelement <16 x i8> %v, i8 13
@ -155,7 +155,7 @@ define i8 @extract_var_v16i8(<16 x i8> %v, i32 %i) {
 ; CHECK-LABEL: extract_undef_v16i8:
 ; NO-SIMD128-NOT: i8x16
 ; SIMD128-NEXT: .functype extract_undef_v16i8 (v128) -> (i32){{$}}
-; SIMD128-NEXT: i8x16.extract_lane_s $push[[R:[0-9]+]]=, $0, 0{{$}}
+; SIMD128-NEXT: i8x16.extract_lane_u $push[[R:[0-9]+]]=, $0, 0{{$}}
 ; SIMD128-NEXT: return $pop[[R]]{{$}}
 define i8 @extract_undef_v16i8(<16 x i8> %v) {
  %elem = extractelement <16 x i8> %v, i8 undef
@ -393,7 +393,7 @@ define i32 @extract_undef_v8i16_u(<8 x i16> %v) {
 ; CHECK-LABEL: extract_v8i16:
 ; NO-SIMD128-NOT: i16x8
 ; SIMD128-NEXT: .functype extract_v8i16 (v128) -> (i32){{$}}
-; SIMD128-NEXT: i16x8.extract_lane_s $push[[R:[0-9]+]]=, $0, 5{{$}}
+; SIMD128-NEXT: i16x8.extract_lane_u $push[[R:[0-9]+]]=, $0, 5{{$}}
 ; SIMD128-NEXT: return $pop[[R]]{{$}}
 define i16 @extract_v8i16(<8 x i16> %v) {
  %elem = extractelement <8 x i16> %v, i16 5
@ -423,7 +423,7 @@ define i16 @extract_var_v8i16(<8 x i16> %v, i32 %i) {
 ; CHECK-LABEL: extract_undef_v8i16:
 ; NO-SIMD128-NOT: i16x8
 ; SIMD128-NEXT: .functype extract_undef_v8i16 (v128) -> (i32){{$}}
-; SIMD128-NEXT: i16x8.extract_lane_s $push[[R:[0-9]+]]=, $0, 0{{$}}
+; SIMD128-NEXT: i16x8.extract_lane_u $push[[R:[0-9]+]]=, $0, 0{{$}}
 ; SIMD128-NEXT: return $pop[[R]]{{$}}
 define i16 @extract_undef_v8i16(<8 x i16> %v) {
  %elem = extractelement <8 x i16> %v, i16 undef