From 0906dca493b3cec36d7fc3fe9f079c0a5288515d Mon Sep 17 00:00:00 2001
From: Thomas Lively <tlively@google.com>
Date: Fri, 21 Feb 2020 17:54:47 -0800
Subject: [PATCH] [WebAssembly] Simplify extract_vector lowering

Summary:
Removes patterns that were not doing useful work, changes the
default extract instructions to be the unsigned versions now that
they are enabled by default, fixes PR44988, and adds tests for
sext_inreg lowering.

Reviewers: aheejin

Reviewed By: aheejin

Subscribers: dschuff, sbc100, jgravelle-google, hiraditya, sunfish, llvm-commits

Tags: #llvm

Differential Revision: https://reviews.llvm.org/D75005
---
 .../WebAssembly/WebAssemblyISelLowering.cpp   |  67 +++++------
 .../WebAssembly/WebAssemblyInstrSIMD.td       | 102 ++++++-----------
 llvm/test/CodeGen/WebAssembly/simd-arith.ll   |  32 +++---
 .../CodeGen/WebAssembly/simd-sext-inreg.ll    | 105 +++++++++++++++---
 llvm/test/CodeGen/WebAssembly/simd.ll         |   8 +-
 5 files changed, 181 insertions(+), 133 deletions(-)
diff --git a/llvm/lib/Target/WebAssembly/WebAssemblyISelLowering.cpp b/llvm/lib/Target/WebAssembly/WebAssemblyISelLowering.cpp
index d427784d06da..b1e0de56c244 100644
--- a/llvm/lib/Target/WebAssembly/WebAssemblyISelLowering.cpp
+++ b/llvm/lib/Target/WebAssembly/WebAssemblyISelLowering.cpp
@@ -1314,39 +1314,42 @@ WebAssemblyTargetLowering::LowerSIGN_EXTEND_INREG(SDValue Op,
                                                   SelectionDAG &DAG) const {
   SDLoc DL(Op);
   // If sign extension operations are disabled, allow sext_inreg only if operand
-  // is a vector extract. SIMD does not depend on sign extension operations, but
-  // allowing sext_inreg in this context lets us have simple patterns to select
-  // extract_lane_s instructions. Expanding sext_inreg everywhere would be
-  // simpler in this file, but would necessitate large and brittle patterns to
-  // undo the expansion and select extract_lane_s instructions.
+  // is a vector extract of an i8 or i16 lane. SIMD does not depend on sign
+  // extension operations, but allowing sext_inreg in this context lets us have
+  // simple patterns to select extract_lane_s instructions. Expanding sext_inreg
+  // everywhere would be simpler in this file, but would necessitate large and
+  // brittle patterns to undo the expansion and select extract_lane_s
+  // instructions.
   assert(!Subtarget->hasSignExt() && Subtarget->hasSIMD128());
-  if (Op.getOperand(0).getOpcode() == ISD::EXTRACT_VECTOR_ELT) {
-    const SDValue &Extract = Op.getOperand(0);
-    MVT VecT = Extract.getOperand(0).getSimpleValueType();
-    MVT ExtractedLaneT = static_cast<VTSDNode *>(Op.getOperand(1).getNode())
-                             ->getVT()
-                             .getSimpleVT();
-    MVT ExtractedVecT =
-        MVT::getVectorVT(ExtractedLaneT, 128 / ExtractedLaneT.getSizeInBits());
-    if (ExtractedVecT == VecT)
-      return Op;
-    // Bitcast vector to appropriate type to ensure ISel pattern coverage
-    const SDValue &Index = Extract.getOperand(1);
-    unsigned IndexVal =
-        static_cast<ConstantSDNode *>(Index.getNode())->getZExtValue();
-    unsigned Scale =
-        ExtractedVecT.getVectorNumElements() / VecT.getVectorNumElements();
-    assert(Scale > 1);
-    SDValue NewIndex =
-        DAG.getConstant(IndexVal * Scale, DL, Index.getValueType());
-    SDValue NewExtract = DAG.getNode(
-        ISD::EXTRACT_VECTOR_ELT, DL, Extract.getValueType(),
-        DAG.getBitcast(ExtractedVecT, Extract.getOperand(0)), NewIndex);
-    return DAG.getNode(ISD::SIGN_EXTEND_INREG, DL, Op.getValueType(),
-                       NewExtract, Op.getOperand(1));
-  }
-  // Otherwise expand
-  return SDValue();
+  if (Op.getOperand(0).getOpcode() != ISD::EXTRACT_VECTOR_ELT)
+    return SDValue();
+
+  const SDValue &Extract = Op.getOperand(0);
+  MVT VecT = Extract.getOperand(0).getSimpleValueType();
+  if (VecT.getVectorElementType().getSizeInBits() > 32)
+    return SDValue();
+  MVT ExtractedLaneT = static_cast<VTSDNode *>(Op.getOperand(1).getNode())
+                           ->getVT()
+                           .getSimpleVT();
+  MVT ExtractedVecT =
+      MVT::getVectorVT(ExtractedLaneT, 128 / ExtractedLaneT.getSizeInBits());
+  if (ExtractedVecT == VecT)
+    return Op;
+
+  // Bitcast vector to appropriate type to ensure ISel pattern coverage
+  const SDValue &Index = Extract.getOperand(1);
+  unsigned IndexVal =
+      static_cast<ConstantSDNode *>(Index.getNode())->getZExtValue();
+  unsigned Scale =
+      ExtractedVecT.getVectorNumElements() / VecT.getVectorNumElements();
+  assert(Scale > 1);
+  SDValue NewIndex =
+      DAG.getConstant(IndexVal * Scale, DL, Index.getValueType());
+  SDValue NewExtract = DAG.getNode(
+      ISD::EXTRACT_VECTOR_ELT, DL, Extract.getValueType(),
+      DAG.getBitcast(ExtractedVecT, Extract.getOperand(0)), NewIndex);
+  return DAG.getNode(ISD::SIGN_EXTEND_INREG, DL, Op.getValueType(), NewExtract,
+                     Op.getOperand(1));
 }
 
 SDValue WebAssemblyTargetLowering::LowerBUILD_VECTOR(SDValue Op,
diff --git a/llvm/lib/Target/WebAssembly/WebAssemblyInstrSIMD.td b/llvm/lib/Target/WebAssembly/WebAssemblyInstrSIMD.td
index 5aab359df76f..144b7f6ca23e 100644
--- a/llvm/lib/Target/WebAssembly/WebAssemblyInstrSIMD.td
+++ b/llvm/lib/Target/WebAssembly/WebAssemblyInstrSIMD.td
@@ -330,81 +330,49 @@ def : ScalarSplatPat<v2f64, f64, F64>;
 //===----------------------------------------------------------------------===//
 
 // Extract lane as a scalar: extract_lane / extract_lane_s / extract_lane_u
-multiclass ExtractLane<ValueType vec_t, string vec, ImmLeaf imm_t,
-                       WebAssemblyRegClass reg_t, bits<32> simdop,
-                       string suffix = "", SDNode extract = vector_extract> {
+multiclass ExtractLane<ValueType vec_t, string vec, WebAssemblyRegClass reg_t,
+                       bits<32> simdop, string suffix = ""> {
   defm EXTRACT_LANE_#vec_t#suffix :
       SIMD_I<(outs reg_t:$dst), (ins V128:$vec, vec_i8imm_op:$idx),
-             (outs), (ins vec_i8imm_op:$idx),
-             [(set reg_t:$dst, (extract (vec_t V128:$vec), (i32 imm_t:$idx)))],
+             (outs), (ins vec_i8imm_op:$idx), [],
              vec#".extract_lane"#suffix#"\t$dst, $vec, $idx",
              vec#".extract_lane"#suffix#"\t$idx", simdop>;
 }
 
-multiclass ExtractPat<ValueType lane_t, int mask> {
-  def _s : PatFrag<(ops node:$vec, node:$idx),
-                   (i32 (sext_inreg
-                     (i32 (vector_extract
-                       node:$vec,
-                       node:$idx
-                     )),
-                     lane_t
-                   ))>;
-  def _u : PatFrag<(ops node:$vec, node:$idx),
-                   (i32 (and
-                     (i32 (vector_extract
-                       node:$vec,
-                       node:$idx
-                     )),
-                     (i32 mask)
-                   ))>;
-}
+defm "" : ExtractLane<v16i8, "i8x16", I32, 5, "_s">;
+defm "" : ExtractLane<v16i8, "i8x16", I32, 6, "_u">;
+defm "" : ExtractLane<v8i16, "i16x8", I32, 9, "_s">;
+defm "" : ExtractLane<v8i16, "i16x8", I32, 10, "_u">;
+defm "" : ExtractLane<v4i32, "i32x4", I32, 13>;
+defm "" : ExtractLane<v2i64, "i64x2", I64, 16>;
+defm "" : ExtractLane<v4f32, "f32x4", F32, 19>;
+defm "" : ExtractLane<v2f64, "f64x2", F64, 22>;
 
-defm extract_i8x16 : ExtractPat<i8, 0xff>;
-defm extract_i16x8 : ExtractPat<i16, 0xffff>;
+def : Pat<(vector_extract (v16i8 V128:$vec), (i32 LaneIdx16:$idx)),
+          (EXTRACT_LANE_v16i8_u V128:$vec, imm:$idx)>;
+def : Pat<(vector_extract (v8i16 V128:$vec), (i32 LaneIdx8:$idx)),
+          (EXTRACT_LANE_v8i16_u V128:$vec, imm:$idx)>;
+def : Pat<(vector_extract (v4i32 V128:$vec), (i32 LaneIdx4:$idx)),
+          (EXTRACT_LANE_v4i32 V128:$vec, imm:$idx)>;
+def : Pat<(vector_extract (v4f32 V128:$vec), (i32 LaneIdx4:$idx)),
+          (EXTRACT_LANE_v4f32 V128:$vec, imm:$idx)>;
+def : Pat<(vector_extract (v2i64 V128:$vec), (i32 LaneIdx2:$idx)),
+          (EXTRACT_LANE_v2i64 V128:$vec, imm:$idx)>;
+def : Pat<(vector_extract (v2f64 V128:$vec), (i32 LaneIdx2:$idx)),
+          (EXTRACT_LANE_v2f64 V128:$vec, imm:$idx)>;
 
-multiclass ExtractLaneExtended<string sign, bits<32> baseInst> {
-  defm "" : ExtractLane<v16i8, "i8x16", LaneIdx16, I32, baseInst, sign,
-                        !cast<PatFrag>("extract_i8x16"#sign)>;
-  defm "" : ExtractLane<v8i16, "i16x8", LaneIdx8, I32, !add(baseInst, 4), sign,
-                        !cast<PatFrag>("extract_i16x8"#sign)>;
-}
-
-defm "" : ExtractLaneExtended<"_s", 5>;
-defm "" : ExtractLaneExtended<"_u", 6>;
-defm "" : ExtractLane<v4i32, "i32x4", LaneIdx4, I32, 13>;
-defm "" : ExtractLane<v2i64, "i64x2", LaneIdx2, I64, 16>;
-defm "" : ExtractLane<v4f32, "f32x4", LaneIdx4, F32, 19>;
-defm "" : ExtractLane<v2f64, "f64x2", LaneIdx2, F64, 22>;
-
-// It would be more conventional to use unsigned extracts, but v8
-// doesn't implement them yet
-def : Pat<(i32 (vector_extract (v16i8 V128:$vec), (i32 LaneIdx16:$idx))),
-          (EXTRACT_LANE_v16i8_s V128:$vec, (i32 LaneIdx16:$idx))>;
-def : Pat<(i32 (vector_extract (v8i16 V128:$vec), (i32 LaneIdx8:$idx))),
-          (EXTRACT_LANE_v8i16_s V128:$vec, (i32 LaneIdx8:$idx))>;
-
-// Lower undef lane indices to zero
-def : Pat<(and (i32 (vector_extract (v16i8 V128:$vec), undef)), (i32 0xff)),
-          (EXTRACT_LANE_v16i8_u V128:$vec, 0)>;
-def : Pat<(and (i32 (vector_extract (v8i16 V128:$vec), undef)), (i32 0xffff)),
-          (EXTRACT_LANE_v8i16_u V128:$vec, 0)>;
-def : Pat<(i32 (vector_extract (v16i8 V128:$vec), undef)),
-          (EXTRACT_LANE_v16i8_u V128:$vec, 0)>;
-def : Pat<(i32 (vector_extract (v8i16 V128:$vec), undef)),
-          (EXTRACT_LANE_v8i16_u V128:$vec, 0)>;
-def : Pat<(sext_inreg (i32 (vector_extract (v16i8 V128:$vec), undef)), i8),
-          (EXTRACT_LANE_v16i8_s V128:$vec, 0)>;
-def : Pat<(sext_inreg (i32 (vector_extract (v8i16 V128:$vec), undef)), i16),
-          (EXTRACT_LANE_v8i16_s V128:$vec, 0)>;
-def : Pat<(vector_extract (v4i32 V128:$vec), undef),
-          (EXTRACT_LANE_v4i32 V128:$vec, 0)>;
-def : Pat<(vector_extract (v2i64 V128:$vec), undef),
-          (EXTRACT_LANE_v2i64 V128:$vec, 0)>;
-def : Pat<(vector_extract (v4f32 V128:$vec), undef),
-          (EXTRACT_LANE_v4f32 V128:$vec, 0)>;
-def : Pat<(vector_extract (v2f64 V128:$vec), undef),
-          (EXTRACT_LANE_v2f64 V128:$vec, 0)>;
+def : Pat<
+  (sext_inreg (vector_extract (v16i8 V128:$vec), (i32 LaneIdx16:$idx)), i8),
+  (EXTRACT_LANE_v16i8_s V128:$vec, imm:$idx)>;
+def : Pat<
+  (and (vector_extract (v16i8 V128:$vec), (i32 LaneIdx16:$idx)), (i32 0xff)),
+  (EXTRACT_LANE_v16i8_u V128:$vec, imm:$idx)>;
+def : Pat<
+  (sext_inreg (vector_extract (v8i16 V128:$vec), (i32 LaneIdx8:$idx)), i16),
+  (EXTRACT_LANE_v8i16_s V128:$vec, imm:$idx)>;
+def : Pat<
+  (and (vector_extract (v8i16 V128:$vec), (i32 LaneIdx8:$idx)), (i32 0xffff)),
+  (EXTRACT_LANE_v8i16_u V128:$vec, imm:$idx)>;
 
 // Replace lane value: replace_lane
 multiclass ReplaceLane<ValueType vec_t, string vec, ImmLeaf imm_t,
diff --git a/llvm/test/CodeGen/WebAssembly/simd-arith.ll b/llvm/test/CodeGen/WebAssembly/simd-arith.ll
index 19d67604c2da..4ccc6f8b613f 100644
--- a/llvm/test/CodeGen/WebAssembly/simd-arith.ll
+++ b/llvm/test/CodeGen/WebAssembly/simd-arith.ll
@@ -160,15 +160,15 @@ define <16 x i8> @shl_const_v16i8(<16 x i8> %v) {
 ; CHECK-LABEL: shl_vec_v16i8:
 ; NO-SIMD128-NOT: i8x16
 ; SIMD128-NEXT: .functype shl_vec_v16i8 (v128, v128) -> (v128){{$}}
-; SIMD128-NEXT: i8x16.extract_lane_s $push[[L0:[0-9]+]]=, $0, 0{{$}}
-; SIMD128-NEXT: i8x16.extract_lane_s $push[[L1:[0-9]+]]=, $1, 0{{$}}
+; SIMD128-NEXT: i8x16.extract_lane_u $push[[L0:[0-9]+]]=, $0, 0{{$}}
+; SIMD128-NEXT: i8x16.extract_lane_u $push[[L1:[0-9]+]]=, $1, 0{{$}}
 ; SIMD128-NEXT: i32.const $push[[M0:[0-9]+]]=, 7{{$}}
 ; SIMD128-NEXT: i32.and $push[[M1:[0-9]+]]=, $pop[[L1]], $pop[[M0]]{{$}}
 ; SIMD128-NEXT: i32.shl $push[[M2:[0-9]+]]=, $pop[[L0]], $pop[[M1]]
 ; SIMD128-NEXT: i8x16.splat $push[[M3:[0-9]+]]=, $pop[[M2]]
 ; Skip 14 lanes
-; SIMD128:      i8x16.extract_lane_s $push[[L4:[0-9]+]]=, $0, 15{{$}}
-; SIMD128-NEXT: i8x16.extract_lane_s $push[[L5:[0-9]+]]=, $1, 15{{$}}
+; SIMD128:      i8x16.extract_lane_u $push[[L4:[0-9]+]]=, $0, 15{{$}}
+; SIMD128-NEXT: i8x16.extract_lane_u $push[[L5:[0-9]+]]=, $1, 15{{$}}
 ; SIMD128-NEXT: i32.const $push[[M4:[0-9]+]]=, 7{{$}}
 ; SIMD128-NEXT: i32.and $push[[M5:[0-9]+]]=, $pop[[L5]], $pop[[M4]]{{$}}
 ; SIMD128-NEXT: i32.shl $push[[M6:[0-9]+]]=, $pop[[L4]], $pop[[M5]]{{$}}
@@ -197,14 +197,14 @@ define <16 x i8> @shr_s_v16i8(<16 x i8> %v, i8 %x) {
 ; NO-SIMD128-NOT: i8x16
 ; SIMD128-NEXT: .functype shr_s_vec_v16i8 (v128, v128) -> (v128){{$}}
 ; SIMD128-NEXT: i8x16.extract_lane_s $push[[L0:[0-9]+]]=, $0, 0{{$}}
-; SIMD128-NEXT: i8x16.extract_lane_s $push[[L1:[0-9]+]]=, $1, 0{{$}}
+; SIMD128-NEXT: i8x16.extract_lane_u $push[[L1:[0-9]+]]=, $1, 0{{$}}
 ; SIMD128-NEXT: i32.const $push[[M0:[0-9]+]]=, 7{{$}}
 ; SIMD128-NEXT: i32.and $push[[M1:[0-9]+]]=, $pop[[L1]], $pop[[M0]]{{$}}
 ; SIMD128-NEXT: i32.shr_s $push[[M2:[0-9]+]]=, $pop[[L0]], $pop[[M1]]
 ; SIMD128-NEXT: i8x16.splat $push[[M3:[0-9]+]]=, $pop[[M2]]
 ; Skip 14 lanes
 ; SIMD128:      i8x16.extract_lane_s $push[[L4:[0-9]+]]=, $0, 15{{$}}
-; SIMD128-NEXT: i8x16.extract_lane_s $push[[L5:[0-9]+]]=, $1, 15{{$}}
+; SIMD128-NEXT: i8x16.extract_lane_u $push[[L5:[0-9]+]]=, $1, 15{{$}}
 ; SIMD128-NEXT: i32.const $push[[M4:[0-9]+]]=, 7{{$}}
 ; SIMD128-NEXT: i32.and $push[[M5:[0-9]+]]=, $pop[[L5]], $pop[[M4]]{{$}}
 ; SIMD128-NEXT: i32.shr_s $push[[M6:[0-9]+]]=, $pop[[L4]], $pop[[M5]]{{$}}
@@ -233,14 +233,14 @@ define <16 x i8> @shr_u_v16i8(<16 x i8> %v, i8 %x) {
 ; NO-SIMD128-NOT: i8x16
 ; SIMD128-NEXT: .functype shr_u_vec_v16i8 (v128, v128) -> (v128){{$}}
 ; SIMD128-NEXT: i8x16.extract_lane_u $push[[L0:[0-9]+]]=, $0, 0{{$}}
-; SIMD128-NEXT: i8x16.extract_lane_s $push[[L1:[0-9]+]]=, $1, 0{{$}}
+; SIMD128-NEXT: i8x16.extract_lane_u $push[[L1:[0-9]+]]=, $1, 0{{$}}
 ; SIMD128-NEXT: i32.const $push[[M0:[0-9]+]]=, 7{{$}}
 ; SIMD128-NEXT: i32.and $push[[M1:[0-9]+]]=, $pop[[L1]], $pop[[M0]]{{$}}
 ; SIMD128-NEXT: i32.shr_u $push[[M2:[0-9]+]]=, $pop[[L0]], $pop[[M1]]
 ; SIMD128-NEXT: i8x16.splat $push[[M3:[0-9]+]]=, $pop[[M2]]
 ; Skip 14 lanes
 ; SIMD128:      i8x16.extract_lane_u $push[[L4:[0-9]+]]=, $0, 15{{$}}
-; SIMD128-NEXT: i8x16.extract_lane_s $push[[L5:[0-9]+]]=, $1, 15{{$}}
+; SIMD128-NEXT: i8x16.extract_lane_u $push[[L5:[0-9]+]]=, $1, 15{{$}}
 ; SIMD128-NEXT: i32.const $push[[M4:[0-9]+]]=, 7{{$}}
 ; SIMD128-NEXT: i32.and $push[[M5:[0-9]+]]=, $pop[[L5]], $pop[[M4]]{{$}}
 ; SIMD128-NEXT: i32.shr_u $push[[M6:[0-9]+]]=, $pop[[L4]], $pop[[M5]]{{$}}
@@ -470,15 +470,15 @@ define <8 x i16> @shl_const_v8i16(<8 x i16> %v) {
 ; CHECK-LABEL: shl_vec_v8i16:
 ; NO-SIMD128-NOT: i16x8
 ; SIMD128-NEXT: .functype shl_vec_v8i16 (v128, v128) -> (v128){{$}}
-; SIMD128-NEXT: i16x8.extract_lane_s $push[[L0:[0-9]+]]=, $0, 0{{$}}
-; SIMD128-NEXT: i16x8.extract_lane_s $push[[L1:[0-9]+]]=, $1, 0{{$}}
+; SIMD128-NEXT: i16x8.extract_lane_u $push[[L0:[0-9]+]]=, $0, 0{{$}}
+; SIMD128-NEXT: i16x8.extract_lane_u $push[[L1:[0-9]+]]=, $1, 0{{$}}
 ; SIMD128-NEXT: i32.const $push[[M0:[0-9]+]]=, 15{{$}}
 ; SIMD128-NEXT: i32.and $push[[M1:[0-9]+]]=, $pop[[L1]], $pop[[M0]]{{$}}
 ; SIMD128-NEXT: i32.shl $push[[M2:[0-9]+]]=, $pop[[L0]], $pop[[M1]]{{$}}
 ; SIMD128-NEXT: i16x8.splat $push[[M3:[0-9]+]]=, $pop[[M2]]{{$}}
 ; Skip 6 lanes
-; SIMD128:      i16x8.extract_lane_s $push[[L4:[0-9]+]]=, $0, 7{{$}}
-; SIMD128-NEXT: i16x8.extract_lane_s $push[[L5:[0-9]+]]=, $1, 7{{$}}
+; SIMD128:      i16x8.extract_lane_u $push[[L4:[0-9]+]]=, $0, 7{{$}}
+; SIMD128-NEXT: i16x8.extract_lane_u $push[[L5:[0-9]+]]=, $1, 7{{$}}
 ; SIMD128-NEXT: i32.const $push[[M4:[0-9]+]]=, 15{{$}}
 ; SIMD128-NEXT: i32.and $push[[M5:[0-9]+]]=, $pop[[L5]], $pop[[M4]]{{$}}
 ; SIMD128-NEXT: i32.shl $push[[M6:[0-9]+]]=, $pop[[L4]], $pop[[M5]]{{$}}
@@ -506,14 +506,14 @@ define <8 x i16> @shr_s_v8i16(<8 x i16> %v, i16 %x) {
 ; NO-SIMD128-NOT: i16x8
 ; SIMD128-NEXT: .functype shr_s_vec_v8i16 (v128, v128) -> (v128){{$}}
 ; SIMD128-NEXT: i16x8.extract_lane_s $push[[L0:[0-9]+]]=, $0, 0{{$}}
-; SIMD128-NEXT: i16x8.extract_lane_s $push[[L1:[0-9]+]]=, $1, 0{{$}}
+; SIMD128-NEXT: i16x8.extract_lane_u $push[[L1:[0-9]+]]=, $1, 0{{$}}
 ; SIMD128-NEXT: i32.const $push[[M0:[0-9]+]]=, 15{{$}}
 ; SIMD128-NEXT: i32.and $push[[M1:[0-9]+]]=, $pop[[L1]], $pop[[M0]]{{$}}
 ; SIMD128-NEXT: i32.shr_s $push[[M2:[0-9]+]]=, $pop[[L0]], $pop[[M1]]{{$}}
 ; SIMD128-NEXT: i16x8.splat $push[[M3:[0-9]+]]=, $pop[[M2]]{{$}}
 ; Skip 6 lanes
 ; SIMD128:      i16x8.extract_lane_s $push[[L4:[0-9]+]]=, $0, 7{{$}}
-; SIMD128-NEXT: i16x8.extract_lane_s $push[[L5:[0-9]+]]=, $1, 7{{$}}
+; SIMD128-NEXT: i16x8.extract_lane_u $push[[L5:[0-9]+]]=, $1, 7{{$}}
 ; SIMD128-NEXT: i32.const $push[[M4:[0-9]+]]=, 15{{$}}
 ; SIMD128-NEXT: i32.and $push[[M5:[0-9]+]]=, $pop[[L5]], $pop[[M4]]{{$}}
 ; SIMD128-NEXT: i32.shr_s $push[[M6:[0-9]+]]=, $pop[[L4]], $pop[[M5]]{{$}}
@@ -541,14 +541,14 @@ define <8 x i16> @shr_u_v8i16(<8 x i16> %v, i16 %x) {
 ; NO-SIMD128-NOT: i16x8
 ; SIMD128-NEXT: .functype shr_u_vec_v8i16 (v128, v128) -> (v128){{$}}
 ; SIMD128-NEXT: i16x8.extract_lane_u $push[[L0:[0-9]+]]=, $0, 0{{$}}
-; SIMD128-NEXT: i16x8.extract_lane_s $push[[L1:[0-9]+]]=, $1, 0{{$}}
+; SIMD128-NEXT: i16x8.extract_lane_u $push[[L1:[0-9]+]]=, $1, 0{{$}}
 ; SIMD128-NEXT: i32.const $push[[M0:[0-9]+]]=, 15{{$}}
 ; SIMD128-NEXT: i32.and $push[[M1:[0-9]+]]=, $pop[[L1]], $pop[[M0]]{{$}}
 ; SIMD128-NEXT: i32.shr_u $push[[M2:[0-9]+]]=, $pop[[L0]], $pop[[M1]]{{$}}
 ; SIMD128-NEXT: i16x8.splat $push[[M3:[0-9]+]]=, $pop[[M2]]{{$}}
 ; Skip 6 lanes
 ; SIMD128:      i16x8.extract_lane_u $push[[L4:[0-9]+]]=, $0, 7{{$}}
-; SIMD128-NEXT: i16x8.extract_lane_s $push[[L5:[0-9]+]]=, $1, 7{{$}}
+; SIMD128-NEXT: i16x8.extract_lane_u $push[[L5:[0-9]+]]=, $1, 7{{$}}
 ; SIMD128-NEXT: i32.const $push[[M4:[0-9]+]]=, 15{{$}}
 ; SIMD128-NEXT: i32.and $push[[M5:[0-9]+]]=, $pop[[L5]], $pop[[M4]]{{$}}
 ; SIMD128-NEXT: i32.shr_u $push[[M6:[0-9]+]]=, $pop[[L4]], $pop[[M5]]{{$}}
diff --git a/llvm/test/CodeGen/WebAssembly/simd-sext-inreg.ll b/llvm/test/CodeGen/WebAssembly/simd-sext-inreg.ll
index 0c375827c0f5..13f9ca14812d 100644
--- a/llvm/test/CodeGen/WebAssembly/simd-sext-inreg.ll
+++ b/llvm/test/CodeGen/WebAssembly/simd-sext-inreg.ll
@@ -1,5 +1,4 @@
 ; RUN: llc < %s -asm-verbose=false -verify-machineinstrs -wasm-keep-registers -disable-wasm-fallthrough-return-opt -wasm-disable-explicit-locals -mattr=+unimplemented-simd128 | FileCheck %s --check-prefixes CHECK,SIMD128
-; RUN: llc < %s -asm-verbose=false -verify-machineinstrs -wasm-keep-registers -disable-wasm-fallthrough-return-opt -wasm-disable-explicit-locals -mattr=+simd128 | FileCheck %s --check-prefixes CHECK,SIMD128-VM
 ; RUN: llc < %s -asm-verbose=false -verify-machineinstrs -wasm-keep-registers -disable-wasm-fallthrough-return-opt -wasm-disable-explicit-locals | FileCheck %s --check-prefixes CHECK,NO-SIMD128
 
 ; Test that vector sign extensions lower to shifts
@@ -7,55 +6,133 @@
 target datalayout = "e-m:e-p:32:32-i64:64-n32:64-S128"
 target triple = "wasm32-unknown-unknown"
 
-; CHECK-LABEL: sext_inreg_v16i8:
+; CHECK-LABEL: sext_v16i8:
 ; NO-SIMD128-NOT: i8x16
-; SIMD128-NEXT: .functype sext_inreg_v16i8 (v128) -> (v128){{$}}
+; SIMD128-NEXT: .functype sext_v16i8 (v128) -> (v128){{$}}
 ; SIMD128-NEXT: i32.const $push[[T0:[0-9]+]]=, 7{{$}}
 ; SIMD128-NEXT: i8x16.shl $push[[T1:[0-9]+]]=, $0, $pop[[T0]]{{$}}
 ; SIMD128-NEXT: i32.const $push[[T2:[0-9]+]]=, 7{{$}}
 ; SIMD128-NEXT: i8x16.shr_s $push[[R:[0-9]+]]=, $pop[[T1]], $pop[[T2]]{{$}}
 ; SIMD128-NEXT: return $pop[[R]]{{$}}
-define <16 x i8> @sext_inreg_v16i8(<16 x i1> %x) {
+define <16 x i8> @sext_v16i8(<16 x i1> %x) {
   %res = sext <16 x i1> %x to <16 x i8>
   ret <16 x i8> %res
 }
 
-; CHECK-LABEL: sext_inreg_v8i16:
+; CHECK-LABEL: sext_v8i16:
 ; NO-SIMD128-NOT: i16x8
-; SIMD128-NEXT: .functype sext_inreg_v8i16 (v128) -> (v128){{$}}
+; SIMD128-NEXT: .functype sext_v8i16 (v128) -> (v128){{$}}
 ; SIMD128-NEXT: i32.const $push[[T0:[0-9]+]]=, 15{{$}}
 ; SIMD128-NEXT: i16x8.shl $push[[T1:[0-9]+]]=, $0, $pop[[T0]]{{$}}
 ; SIMD128-NEXT: i32.const $push[[T2:[0-9]+]]=, 15{{$}}
 ; SIMD128-NEXT: i16x8.shr_s $push[[R:[0-9]+]]=, $pop[[T1]], $pop[[T2]]{{$}}
 ; SIMD128-NEXT: return $pop[[R]]{{$}}
-define <8 x i16> @sext_inreg_v8i16(<8 x i1> %x) {
+define <8 x i16> @sext_v8i16(<8 x i1> %x) {
   %res = sext <8 x i1> %x to <8 x i16>
   ret <8 x i16> %res
 }
 
-; CHECK-LABEL: sext_inreg_v4i32:
+; CHECK-LABEL: sext_v4i32:
 ; NO-SIMD128-NOT: i32x4
-; SIMD128-NEXT: .functype sext_inreg_v4i32 (v128) -> (v128){{$}}
+; SIMD128-NEXT: .functype sext_v4i32 (v128) -> (v128){{$}}
 ; SIMD128-NEXT: i32.const $push[[T0:[0-9]+]]=, 31{{$}}
 ; SIMD128-NEXT: i32x4.shl $push[[T1:[0-9]+]]=, $0, $pop[[T0]]{{$}}
 ; SIMD128-NEXT: i32.const $push[[T2:[0-9]+]]=, 31{{$}}
 ; SIMD128-NEXT: i32x4.shr_s $push[[R:[0-9]+]]=, $pop[[T1]], $pop[[T2]]{{$}}
 ; SIMD128-NEXT: return $pop[[R]]{{$}}
-define <4 x i32> @sext_inreg_v4i32(<4 x i1> %x) {
+define <4 x i32> @sext_v4i32(<4 x i1> %x) {
   %res = sext <4 x i1> %x to <4 x i32>
   ret <4 x i32> %res
 }
 
-; CHECK-LABEL: sext_inreg_v2i64:
+; CHECK-LABEL: sext_v2i64:
 ; NO-SIMD128-NOT: i64x2
-; SDIM128-VM-NOT: i64x2
-; SIMD128-NEXT: .functype sext_inreg_v2i64 (v128) -> (v128){{$}}
+; SIMD128-NEXT: .functype sext_v2i64 (v128) -> (v128){{$}}
 ; SIMD128-NEXT: i32.const $push[[T0:[0-9]+]]=, 63{{$}}
 ; SIMD128-NEXT: i64x2.shl $push[[T1:[0-9]+]]=, $0, $pop[[T0]]{{$}}
 ; SIMD128-NEXT: i32.const $push[[T2:[0-9]+]]=, 63{{$}}
 ; SIMD128-NEXT: i64x2.shr_s $push[[R:[0-9]+]]=, $pop[[T1]], $pop[[T2]]{{$}}
 ; SIMD128-NEXT: return $pop[[R]]{{$}}
-define <2 x i64> @sext_inreg_v2i64(<2 x i1> %x) {
+define <2 x i64> @sext_v2i64(<2 x i1> %x) {
   %res = sext <2 x i1> %x to <2 x i64>
   ret <2 x i64> %res
 }
+
+; CHECK-LABEL: sext_inreg_i8_to_i16:
+; SIMD128-NEXT: .functype sext_inreg_i8_to_i16 (v128) -> (i32){{$}}
+; SIMD128-NEXT: i8x16.extract_lane_s $push[[R:[0-9]+]]=, $0, 2{{$}}
+; SIMD128-NEXT: return $pop[[R]]{{$}}
+define i16 @sext_inreg_i8_to_i16(<8 x i16> %x) {
+  %lane = extractelement <8 x i16> %x, i32 1
+  %a = shl i16 %lane, 8
+  %res = ashr i16 %a, 8
+  ret i16 %res
+}
+
+; CHECK-LABEL: sext_inreg_i8_to_i32:
+; SIMD128-NEXT: .functype sext_inreg_i8_to_i32 (v128) -> (i32){{$}}
+; SIMD128-NEXT: i8x16.extract_lane_s $push[[R:[0-9]+]]=, $0, 4{{$}}
+; SIMD128-NEXT: return $pop[[R]]{{$}}
+define i32 @sext_inreg_i8_to_i32(<4 x i32> %x) {
+  %lane = extractelement <4 x i32> %x, i32 1
+  %a = shl i32 %lane, 24
+  %res = ashr i32 %a, 24
+  ret i32 %res
+}
+
+; CHECK-LABEL: sext_inreg_i16_to_i32:
+; SIMD128-NEXT: .functype sext_inreg_i16_to_i32 (v128) -> (i32){{$}}
+; SIMD128-NEXT: i16x8.extract_lane_s $push[[R:[0-9]+]]=, $0, 2{{$}}
+; SIMD128-NEXT: return $pop[[R]]{{$}}
+define i32 @sext_inreg_i16_to_i32(<4 x i32> %x) {
+  %lane = extractelement <4 x i32> %x, i32 1
+  %a = shl i32 %lane, 16
+  %res = ashr i32 %a, 16
+  ret i32 %res
+}
+
+; CHECK-LABEL: sext_inreg_i8_to_i64:
+; SIMD128-NEXT: .functype sext_inreg_i8_to_i64 (v128) -> (i64){{$}}
+; SIMD128-NEXT: i64x2.extract_lane $push[[T0:[0-9]+]]=, $0, 1{{$}}
+; SIMD128-NEXT: i64.const $push[[T1:[0-9]+]]=, 56{{$}}
+; SIMD128-NEXT: i64.shl $push[[T2:[0-9]+]]=, $pop[[T0]], $pop[[T1]]{{$}}
+; SIMD128-NEXT: i64.const $push[[T3:[0-9]+]]=, 56{{$}}
+; SIMD128-NEXT: i64.shr_s $push[[R:[0-9]+]]=, $pop[[T2]], $pop[[T3]]{{$}}
+; SIMD128-NEXT: return $pop[[R]]{{$}}
+define i64 @sext_inreg_i8_to_i64(<2 x i64> %x) {
+  %lane = extractelement <2 x i64> %x, i32 1
+  %a = shl i64 %lane, 56
+  %res = ashr i64 %a, 56
+  ret i64 %res
+}
+
+; CHECK-LABEL: sext_inreg_i16_to_i64:
+; SIMD128-NEXT: .functype sext_inreg_i16_to_i64 (v128) -> (i64){{$}}
+; SIMD128-NEXT: i64x2.extract_lane $push[[T0:[0-9]+]]=, $0, 1{{$}}
+; SIMD128-NEXT: i64.const $push[[T1:[0-9]+]]=, 48{{$}}
+; SIMD128-NEXT: i64.shl $push[[T2:[0-9]+]]=, $pop[[T0]], $pop[[T1]]{{$}}
+; SIMD128-NEXT: i64.const $push[[T3:[0-9]+]]=, 48{{$}}
+; SIMD128-NEXT: i64.shr_s $push[[R:[0-9]+]]=, $pop[[T2]], $pop[[T3]]{{$}}
+; SIMD128-NEXT: return $pop[[R]]{{$}}
+define i64 @sext_inreg_i16_to_i64(<2 x i64> %x) {
+  %lane = extractelement <2 x i64> %x, i32 1
+  %a = shl i64 %lane, 48
+  %res = ashr i64 %a, 48
+  ret i64 %res
+}
+
+; CHECK-LABEL: sext_inreg_i32_to_i64:
+; NO-SIMD128-NOT: i64x2
+; SIMD128-NEXT: .functype sext_inreg_i32_to_i64 (v128) -> (i64){{$}}
+; SIMD128-NEXT: i64x2.extract_lane $push[[T0:[0-9]+]]=, $0, 1{{$}}
+; SIMD128-NEXT: i64.const $push[[T1:[0-9]+]]=, 32{{$}}
+; SIMD128-NEXT: i64.shl $push[[T2:[0-9]+]]=, $pop[[T0]], $pop[[T1]]{{$}}
+; SIMD128-NEXT: i64.const $push[[T3:[0-9]+]]=, 32{{$}}
+; SIMD128-NEXT: i64.shr_s $push[[R:[0-9]+]]=, $pop[[T2]], $pop[[T3]]{{$}}
+; SIMD128-NEXT: return $pop[[R]]{{$}}
+define i64 @sext_inreg_i32_to_i64(<2 x i64> %x) {
+  %lane = extractelement <2 x i64> %x, i32 1
+  %a = shl i64 %lane, 32
+  %res = ashr i64 %a, 32
+  ret i64 %res
+}
diff --git a/llvm/test/CodeGen/WebAssembly/simd.ll b/llvm/test/CodeGen/WebAssembly/simd.ll
index df0b1a1417c2..2934d2c9beac 100644
--- a/llvm/test/CodeGen/WebAssembly/simd.ll
+++ b/llvm/test/CodeGen/WebAssembly/simd.ll
@@ -127,7 +127,7 @@ define i32 @extract_undef_v16i8_u(<16 x i8> %v) {
 ; CHECK-LABEL: extract_v16i8:
 ; NO-SIMD128-NOT: i8x16
 ; SIMD128-NEXT: .functype extract_v16i8 (v128) -> (i32){{$}}
-; SIMD128-NEXT: i8x16.extract_lane_s $push[[R:[0-9]+]]=, $0, 13{{$}}
+; SIMD128-NEXT: i8x16.extract_lane_u $push[[R:[0-9]+]]=, $0, 13{{$}}
 ; SIMD128-NEXT: return $pop[[R]]{{$}}
 define i8 @extract_v16i8(<16 x i8> %v) {
   %elem = extractelement <16 x i8> %v, i8 13
@@ -155,7 +155,7 @@ define i8 @extract_var_v16i8(<16 x i8> %v, i32 %i) {
 ; CHECK-LABEL: extract_undef_v16i8:
 ; NO-SIMD128-NOT: i8x16
 ; SIMD128-NEXT: .functype extract_undef_v16i8 (v128) -> (i32){{$}}
-; SIMD128-NEXT: i8x16.extract_lane_s $push[[R:[0-9]+]]=, $0, 0{{$}}
+; SIMD128-NEXT: i8x16.extract_lane_u $push[[R:[0-9]+]]=, $0, 0{{$}}
 ; SIMD128-NEXT: return $pop[[R]]{{$}}
 define i8 @extract_undef_v16i8(<16 x i8> %v) {
   %elem = extractelement <16 x i8> %v, i8 undef
@@ -393,7 +393,7 @@ define i32 @extract_undef_v8i16_u(<8 x i16> %v) {
 ; CHECK-LABEL: extract_v8i16:
 ; NO-SIMD128-NOT: i16x8
 ; SIMD128-NEXT: .functype extract_v8i16 (v128) -> (i32){{$}}
-; SIMD128-NEXT: i16x8.extract_lane_s $push[[R:[0-9]+]]=, $0, 5{{$}}
+; SIMD128-NEXT: i16x8.extract_lane_u $push[[R:[0-9]+]]=, $0, 5{{$}}
 ; SIMD128-NEXT: return $pop[[R]]{{$}}
 define i16 @extract_v8i16(<8 x i16> %v) {
   %elem = extractelement <8 x i16> %v, i16 5
@@ -423,7 +423,7 @@ define i16 @extract_var_v8i16(<8 x i16> %v, i32 %i) {
 ; CHECK-LABEL: extract_undef_v8i16:
 ; NO-SIMD128-NOT: i16x8
 ; SIMD128-NEXT: .functype extract_undef_v8i16 (v128) -> (i32){{$}}
-; SIMD128-NEXT: i16x8.extract_lane_s $push[[R:[0-9]+]]=, $0, 0{{$}}
+; SIMD128-NEXT: i16x8.extract_lane_u $push[[R:[0-9]+]]=, $0, 0{{$}}
 ; SIMD128-NEXT: return $pop[[R]]{{$}}
 define i16 @extract_undef_v8i16(<8 x i16> %v) {
   %elem = extractelement <8 x i16> %v, i16 undef