[WebAssembly] Simplify extract_vector lowering

Summary:
Removes patterns that were not doing useful work, changes the
default extract instructions to be the unsigned versions now that
they are enabled by default, fixes PR44988, and adds tests for
sext_inreg lowering.

Reviewers: aheejin

Reviewed By: aheejin

Subscribers: dschuff, sbc100, jgravelle-google, hiraditya, sunfish, llvm-commits

Tags: #llvm

Differential Revision: https://reviews.llvm.org/D75005
This commit is contained in:
Thomas Lively 2020-02-21 17:54:47 -08:00
parent 9c54f6154f
commit 0906dca493
5 changed files with 181 additions and 133 deletions

View File

@ -1314,15 +1314,20 @@ WebAssemblyTargetLowering::LowerSIGN_EXTEND_INREG(SDValue Op,
SelectionDAG &DAG) const {
SDLoc DL(Op);
// If sign extension operations are disabled, allow sext_inreg only if operand
// is a vector extract. SIMD does not depend on sign extension operations, but
// allowing sext_inreg in this context lets us have simple patterns to select
// extract_lane_s instructions. Expanding sext_inreg everywhere would be
// simpler in this file, but would necessitate large and brittle patterns to
// undo the expansion and select extract_lane_s instructions.
// is a vector extract of an i8 or i16 lane. SIMD does not depend on sign
// extension operations, but allowing sext_inreg in this context lets us have
// simple patterns to select extract_lane_s instructions. Expanding sext_inreg
// everywhere would be simpler in this file, but would necessitate large and
// brittle patterns to undo the expansion and select extract_lane_s
// instructions.
assert(!Subtarget->hasSignExt() && Subtarget->hasSIMD128());
if (Op.getOperand(0).getOpcode() == ISD::EXTRACT_VECTOR_ELT) {
if (Op.getOperand(0).getOpcode() != ISD::EXTRACT_VECTOR_ELT)
return SDValue();
const SDValue &Extract = Op.getOperand(0);
MVT VecT = Extract.getOperand(0).getSimpleValueType();
if (VecT.getVectorElementType().getSizeInBits() > 32)
return SDValue();
MVT ExtractedLaneT = static_cast<VTSDNode *>(Op.getOperand(1).getNode())
->getVT()
.getSimpleVT();
@ -1330,6 +1335,7 @@ WebAssemblyTargetLowering::LowerSIGN_EXTEND_INREG(SDValue Op,
MVT::getVectorVT(ExtractedLaneT, 128 / ExtractedLaneT.getSizeInBits());
if (ExtractedVecT == VecT)
return Op;
// Bitcast vector to appropriate type to ensure ISel pattern coverage
const SDValue &Index = Extract.getOperand(1);
unsigned IndexVal =
@ -1342,11 +1348,8 @@ WebAssemblyTargetLowering::LowerSIGN_EXTEND_INREG(SDValue Op,
SDValue NewExtract = DAG.getNode(
ISD::EXTRACT_VECTOR_ELT, DL, Extract.getValueType(),
DAG.getBitcast(ExtractedVecT, Extract.getOperand(0)), NewIndex);
return DAG.getNode(ISD::SIGN_EXTEND_INREG, DL, Op.getValueType(),
NewExtract, Op.getOperand(1));
}
// Otherwise expand
return SDValue();
return DAG.getNode(ISD::SIGN_EXTEND_INREG, DL, Op.getValueType(), NewExtract,
Op.getOperand(1));
}
SDValue WebAssemblyTargetLowering::LowerBUILD_VECTOR(SDValue Op,

View File

@ -330,81 +330,49 @@ def : ScalarSplatPat<v2f64, f64, F64>;
//===----------------------------------------------------------------------===//
// Extract lane as a scalar: extract_lane / extract_lane_s / extract_lane_u
multiclass ExtractLane<ValueType vec_t, string vec, ImmLeaf imm_t,
WebAssemblyRegClass reg_t, bits<32> simdop,
string suffix = "", SDNode extract = vector_extract> {
multiclass ExtractLane<ValueType vec_t, string vec, WebAssemblyRegClass reg_t,
bits<32> simdop, string suffix = ""> {
defm EXTRACT_LANE_#vec_t#suffix :
SIMD_I<(outs reg_t:$dst), (ins V128:$vec, vec_i8imm_op:$idx),
(outs), (ins vec_i8imm_op:$idx),
[(set reg_t:$dst, (extract (vec_t V128:$vec), (i32 imm_t:$idx)))],
(outs), (ins vec_i8imm_op:$idx), [],
vec#".extract_lane"#suffix#"\t$dst, $vec, $idx",
vec#".extract_lane"#suffix#"\t$idx", simdop>;
}
multiclass ExtractPat<ValueType lane_t, int mask> {
def _s : PatFrag<(ops node:$vec, node:$idx),
(i32 (sext_inreg
(i32 (vector_extract
node:$vec,
node:$idx
)),
lane_t
))>;
def _u : PatFrag<(ops node:$vec, node:$idx),
(i32 (and
(i32 (vector_extract
node:$vec,
node:$idx
)),
(i32 mask)
))>;
}
defm "" : ExtractLane<v16i8, "i8x16", I32, 5, "_s">;
defm "" : ExtractLane<v16i8, "i8x16", I32, 6, "_u">;
defm "" : ExtractLane<v8i16, "i16x8", I32, 9, "_s">;
defm "" : ExtractLane<v8i16, "i16x8", I32, 10, "_u">;
defm "" : ExtractLane<v4i32, "i32x4", I32, 13>;
defm "" : ExtractLane<v2i64, "i64x2", I64, 16>;
defm "" : ExtractLane<v4f32, "f32x4", F32, 19>;
defm "" : ExtractLane<v2f64, "f64x2", F64, 22>;
defm extract_i8x16 : ExtractPat<i8, 0xff>;
defm extract_i16x8 : ExtractPat<i16, 0xffff>;
def : Pat<(vector_extract (v16i8 V128:$vec), (i32 LaneIdx16:$idx)),
(EXTRACT_LANE_v16i8_u V128:$vec, imm:$idx)>;
def : Pat<(vector_extract (v8i16 V128:$vec), (i32 LaneIdx8:$idx)),
(EXTRACT_LANE_v8i16_u V128:$vec, imm:$idx)>;
def : Pat<(vector_extract (v4i32 V128:$vec), (i32 LaneIdx4:$idx)),
(EXTRACT_LANE_v4i32 V128:$vec, imm:$idx)>;
def : Pat<(vector_extract (v4f32 V128:$vec), (i32 LaneIdx4:$idx)),
(EXTRACT_LANE_v4f32 V128:$vec, imm:$idx)>;
def : Pat<(vector_extract (v2i64 V128:$vec), (i32 LaneIdx2:$idx)),
(EXTRACT_LANE_v2i64 V128:$vec, imm:$idx)>;
def : Pat<(vector_extract (v2f64 V128:$vec), (i32 LaneIdx2:$idx)),
(EXTRACT_LANE_v2f64 V128:$vec, imm:$idx)>;
multiclass ExtractLaneExtended<string sign, bits<32> baseInst> {
defm "" : ExtractLane<v16i8, "i8x16", LaneIdx16, I32, baseInst, sign,
!cast<PatFrag>("extract_i8x16"#sign)>;
defm "" : ExtractLane<v8i16, "i16x8", LaneIdx8, I32, !add(baseInst, 4), sign,
!cast<PatFrag>("extract_i16x8"#sign)>;
}
defm "" : ExtractLaneExtended<"_s", 5>;
defm "" : ExtractLaneExtended<"_u", 6>;
defm "" : ExtractLane<v4i32, "i32x4", LaneIdx4, I32, 13>;
defm "" : ExtractLane<v2i64, "i64x2", LaneIdx2, I64, 16>;
defm "" : ExtractLane<v4f32, "f32x4", LaneIdx4, F32, 19>;
defm "" : ExtractLane<v2f64, "f64x2", LaneIdx2, F64, 22>;
// It would be more conventional to use unsigned extracts, but v8
// doesn't implement them yet
def : Pat<(i32 (vector_extract (v16i8 V128:$vec), (i32 LaneIdx16:$idx))),
(EXTRACT_LANE_v16i8_s V128:$vec, (i32 LaneIdx16:$idx))>;
def : Pat<(i32 (vector_extract (v8i16 V128:$vec), (i32 LaneIdx8:$idx))),
(EXTRACT_LANE_v8i16_s V128:$vec, (i32 LaneIdx8:$idx))>;
// Lower undef lane indices to zero
def : Pat<(and (i32 (vector_extract (v16i8 V128:$vec), undef)), (i32 0xff)),
(EXTRACT_LANE_v16i8_u V128:$vec, 0)>;
def : Pat<(and (i32 (vector_extract (v8i16 V128:$vec), undef)), (i32 0xffff)),
(EXTRACT_LANE_v8i16_u V128:$vec, 0)>;
def : Pat<(i32 (vector_extract (v16i8 V128:$vec), undef)),
(EXTRACT_LANE_v16i8_u V128:$vec, 0)>;
def : Pat<(i32 (vector_extract (v8i16 V128:$vec), undef)),
(EXTRACT_LANE_v8i16_u V128:$vec, 0)>;
def : Pat<(sext_inreg (i32 (vector_extract (v16i8 V128:$vec), undef)), i8),
(EXTRACT_LANE_v16i8_s V128:$vec, 0)>;
def : Pat<(sext_inreg (i32 (vector_extract (v8i16 V128:$vec), undef)), i16),
(EXTRACT_LANE_v8i16_s V128:$vec, 0)>;
def : Pat<(vector_extract (v4i32 V128:$vec), undef),
(EXTRACT_LANE_v4i32 V128:$vec, 0)>;
def : Pat<(vector_extract (v2i64 V128:$vec), undef),
(EXTRACT_LANE_v2i64 V128:$vec, 0)>;
def : Pat<(vector_extract (v4f32 V128:$vec), undef),
(EXTRACT_LANE_v4f32 V128:$vec, 0)>;
def : Pat<(vector_extract (v2f64 V128:$vec), undef),
(EXTRACT_LANE_v2f64 V128:$vec, 0)>;
def : Pat<
(sext_inreg (vector_extract (v16i8 V128:$vec), (i32 LaneIdx16:$idx)), i8),
(EXTRACT_LANE_v16i8_s V128:$vec, imm:$idx)>;
def : Pat<
(and (vector_extract (v16i8 V128:$vec), (i32 LaneIdx16:$idx)), (i32 0xff)),
(EXTRACT_LANE_v16i8_u V128:$vec, imm:$idx)>;
def : Pat<
(sext_inreg (vector_extract (v8i16 V128:$vec), (i32 LaneIdx8:$idx)), i16),
(EXTRACT_LANE_v8i16_s V128:$vec, imm:$idx)>;
def : Pat<
(and (vector_extract (v8i16 V128:$vec), (i32 LaneIdx8:$idx)), (i32 0xffff)),
(EXTRACT_LANE_v8i16_u V128:$vec, imm:$idx)>;
// Replace lane value: replace_lane
multiclass ReplaceLane<ValueType vec_t, string vec, ImmLeaf imm_t,

View File

@ -160,15 +160,15 @@ define <16 x i8> @shl_const_v16i8(<16 x i8> %v) {
; CHECK-LABEL: shl_vec_v16i8:
; NO-SIMD128-NOT: i8x16
; SIMD128-NEXT: .functype shl_vec_v16i8 (v128, v128) -> (v128){{$}}
; SIMD128-NEXT: i8x16.extract_lane_s $push[[L0:[0-9]+]]=, $0, 0{{$}}
; SIMD128-NEXT: i8x16.extract_lane_s $push[[L1:[0-9]+]]=, $1, 0{{$}}
; SIMD128-NEXT: i8x16.extract_lane_u $push[[L0:[0-9]+]]=, $0, 0{{$}}
; SIMD128-NEXT: i8x16.extract_lane_u $push[[L1:[0-9]+]]=, $1, 0{{$}}
; SIMD128-NEXT: i32.const $push[[M0:[0-9]+]]=, 7{{$}}
; SIMD128-NEXT: i32.and $push[[M1:[0-9]+]]=, $pop[[L1]], $pop[[M0]]{{$}}
; SIMD128-NEXT: i32.shl $push[[M2:[0-9]+]]=, $pop[[L0]], $pop[[M1]]
; SIMD128-NEXT: i8x16.splat $push[[M3:[0-9]+]]=, $pop[[M2]]
; Skip 14 lanes
; SIMD128: i8x16.extract_lane_s $push[[L4:[0-9]+]]=, $0, 15{{$}}
; SIMD128-NEXT: i8x16.extract_lane_s $push[[L5:[0-9]+]]=, $1, 15{{$}}
; SIMD128: i8x16.extract_lane_u $push[[L4:[0-9]+]]=, $0, 15{{$}}
; SIMD128-NEXT: i8x16.extract_lane_u $push[[L5:[0-9]+]]=, $1, 15{{$}}
; SIMD128-NEXT: i32.const $push[[M4:[0-9]+]]=, 7{{$}}
; SIMD128-NEXT: i32.and $push[[M5:[0-9]+]]=, $pop[[L5]], $pop[[M4]]{{$}}
; SIMD128-NEXT: i32.shl $push[[M6:[0-9]+]]=, $pop[[L4]], $pop[[M5]]{{$}}
@ -197,14 +197,14 @@ define <16 x i8> @shr_s_v16i8(<16 x i8> %v, i8 %x) {
; NO-SIMD128-NOT: i8x16
; SIMD128-NEXT: .functype shr_s_vec_v16i8 (v128, v128) -> (v128){{$}}
; SIMD128-NEXT: i8x16.extract_lane_s $push[[L0:[0-9]+]]=, $0, 0{{$}}
; SIMD128-NEXT: i8x16.extract_lane_s $push[[L1:[0-9]+]]=, $1, 0{{$}}
; SIMD128-NEXT: i8x16.extract_lane_u $push[[L1:[0-9]+]]=, $1, 0{{$}}
; SIMD128-NEXT: i32.const $push[[M0:[0-9]+]]=, 7{{$}}
; SIMD128-NEXT: i32.and $push[[M1:[0-9]+]]=, $pop[[L1]], $pop[[M0]]{{$}}
; SIMD128-NEXT: i32.shr_s $push[[M2:[0-9]+]]=, $pop[[L0]], $pop[[M1]]
; SIMD128-NEXT: i8x16.splat $push[[M3:[0-9]+]]=, $pop[[M2]]
; Skip 14 lanes
; SIMD128: i8x16.extract_lane_s $push[[L4:[0-9]+]]=, $0, 15{{$}}
; SIMD128-NEXT: i8x16.extract_lane_s $push[[L5:[0-9]+]]=, $1, 15{{$}}
; SIMD128-NEXT: i8x16.extract_lane_u $push[[L5:[0-9]+]]=, $1, 15{{$}}
; SIMD128-NEXT: i32.const $push[[M4:[0-9]+]]=, 7{{$}}
; SIMD128-NEXT: i32.and $push[[M5:[0-9]+]]=, $pop[[L5]], $pop[[M4]]{{$}}
; SIMD128-NEXT: i32.shr_s $push[[M6:[0-9]+]]=, $pop[[L4]], $pop[[M5]]{{$}}
@ -233,14 +233,14 @@ define <16 x i8> @shr_u_v16i8(<16 x i8> %v, i8 %x) {
; NO-SIMD128-NOT: i8x16
; SIMD128-NEXT: .functype shr_u_vec_v16i8 (v128, v128) -> (v128){{$}}
; SIMD128-NEXT: i8x16.extract_lane_u $push[[L0:[0-9]+]]=, $0, 0{{$}}
; SIMD128-NEXT: i8x16.extract_lane_s $push[[L1:[0-9]+]]=, $1, 0{{$}}
; SIMD128-NEXT: i8x16.extract_lane_u $push[[L1:[0-9]+]]=, $1, 0{{$}}
; SIMD128-NEXT: i32.const $push[[M0:[0-9]+]]=, 7{{$}}
; SIMD128-NEXT: i32.and $push[[M1:[0-9]+]]=, $pop[[L1]], $pop[[M0]]{{$}}
; SIMD128-NEXT: i32.shr_u $push[[M2:[0-9]+]]=, $pop[[L0]], $pop[[M1]]
; SIMD128-NEXT: i8x16.splat $push[[M3:[0-9]+]]=, $pop[[M2]]
; Skip 14 lanes
; SIMD128: i8x16.extract_lane_u $push[[L4:[0-9]+]]=, $0, 15{{$}}
; SIMD128-NEXT: i8x16.extract_lane_s $push[[L5:[0-9]+]]=, $1, 15{{$}}
; SIMD128-NEXT: i8x16.extract_lane_u $push[[L5:[0-9]+]]=, $1, 15{{$}}
; SIMD128-NEXT: i32.const $push[[M4:[0-9]+]]=, 7{{$}}
; SIMD128-NEXT: i32.and $push[[M5:[0-9]+]]=, $pop[[L5]], $pop[[M4]]{{$}}
; SIMD128-NEXT: i32.shr_u $push[[M6:[0-9]+]]=, $pop[[L4]], $pop[[M5]]{{$}}
@ -470,15 +470,15 @@ define <8 x i16> @shl_const_v8i16(<8 x i16> %v) {
; CHECK-LABEL: shl_vec_v8i16:
; NO-SIMD128-NOT: i16x8
; SIMD128-NEXT: .functype shl_vec_v8i16 (v128, v128) -> (v128){{$}}
; SIMD128-NEXT: i16x8.extract_lane_s $push[[L0:[0-9]+]]=, $0, 0{{$}}
; SIMD128-NEXT: i16x8.extract_lane_s $push[[L1:[0-9]+]]=, $1, 0{{$}}
; SIMD128-NEXT: i16x8.extract_lane_u $push[[L0:[0-9]+]]=, $0, 0{{$}}
; SIMD128-NEXT: i16x8.extract_lane_u $push[[L1:[0-9]+]]=, $1, 0{{$}}
; SIMD128-NEXT: i32.const $push[[M0:[0-9]+]]=, 15{{$}}
; SIMD128-NEXT: i32.and $push[[M1:[0-9]+]]=, $pop[[L1]], $pop[[M0]]{{$}}
; SIMD128-NEXT: i32.shl $push[[M2:[0-9]+]]=, $pop[[L0]], $pop[[M1]]{{$}}
; SIMD128-NEXT: i16x8.splat $push[[M3:[0-9]+]]=, $pop[[M2]]{{$}}
; Skip 6 lanes
; SIMD128: i16x8.extract_lane_s $push[[L4:[0-9]+]]=, $0, 7{{$}}
; SIMD128-NEXT: i16x8.extract_lane_s $push[[L5:[0-9]+]]=, $1, 7{{$}}
; SIMD128: i16x8.extract_lane_u $push[[L4:[0-9]+]]=, $0, 7{{$}}
; SIMD128-NEXT: i16x8.extract_lane_u $push[[L5:[0-9]+]]=, $1, 7{{$}}
; SIMD128-NEXT: i32.const $push[[M4:[0-9]+]]=, 15{{$}}
; SIMD128-NEXT: i32.and $push[[M5:[0-9]+]]=, $pop[[L5]], $pop[[M4]]{{$}}
; SIMD128-NEXT: i32.shl $push[[M6:[0-9]+]]=, $pop[[L4]], $pop[[M5]]{{$}}
@ -506,14 +506,14 @@ define <8 x i16> @shr_s_v8i16(<8 x i16> %v, i16 %x) {
; NO-SIMD128-NOT: i16x8
; SIMD128-NEXT: .functype shr_s_vec_v8i16 (v128, v128) -> (v128){{$}}
; SIMD128-NEXT: i16x8.extract_lane_s $push[[L0:[0-9]+]]=, $0, 0{{$}}
; SIMD128-NEXT: i16x8.extract_lane_s $push[[L1:[0-9]+]]=, $1, 0{{$}}
; SIMD128-NEXT: i16x8.extract_lane_u $push[[L1:[0-9]+]]=, $1, 0{{$}}
; SIMD128-NEXT: i32.const $push[[M0:[0-9]+]]=, 15{{$}}
; SIMD128-NEXT: i32.and $push[[M1:[0-9]+]]=, $pop[[L1]], $pop[[M0]]{{$}}
; SIMD128-NEXT: i32.shr_s $push[[M2:[0-9]+]]=, $pop[[L0]], $pop[[M1]]{{$}}
; SIMD128-NEXT: i16x8.splat $push[[M3:[0-9]+]]=, $pop[[M2]]{{$}}
; Skip 6 lanes
; SIMD128: i16x8.extract_lane_s $push[[L4:[0-9]+]]=, $0, 7{{$}}
; SIMD128-NEXT: i16x8.extract_lane_s $push[[L5:[0-9]+]]=, $1, 7{{$}}
; SIMD128-NEXT: i16x8.extract_lane_u $push[[L5:[0-9]+]]=, $1, 7{{$}}
; SIMD128-NEXT: i32.const $push[[M4:[0-9]+]]=, 15{{$}}
; SIMD128-NEXT: i32.and $push[[M5:[0-9]+]]=, $pop[[L5]], $pop[[M4]]{{$}}
; SIMD128-NEXT: i32.shr_s $push[[M6:[0-9]+]]=, $pop[[L4]], $pop[[M5]]{{$}}
@ -541,14 +541,14 @@ define <8 x i16> @shr_u_v8i16(<8 x i16> %v, i16 %x) {
; NO-SIMD128-NOT: i16x8
; SIMD128-NEXT: .functype shr_u_vec_v8i16 (v128, v128) -> (v128){{$}}
; SIMD128-NEXT: i16x8.extract_lane_u $push[[L0:[0-9]+]]=, $0, 0{{$}}
; SIMD128-NEXT: i16x8.extract_lane_s $push[[L1:[0-9]+]]=, $1, 0{{$}}
; SIMD128-NEXT: i16x8.extract_lane_u $push[[L1:[0-9]+]]=, $1, 0{{$}}
; SIMD128-NEXT: i32.const $push[[M0:[0-9]+]]=, 15{{$}}
; SIMD128-NEXT: i32.and $push[[M1:[0-9]+]]=, $pop[[L1]], $pop[[M0]]{{$}}
; SIMD128-NEXT: i32.shr_u $push[[M2:[0-9]+]]=, $pop[[L0]], $pop[[M1]]{{$}}
; SIMD128-NEXT: i16x8.splat $push[[M3:[0-9]+]]=, $pop[[M2]]{{$}}
; Skip 6 lanes
; SIMD128: i16x8.extract_lane_u $push[[L4:[0-9]+]]=, $0, 7{{$}}
; SIMD128-NEXT: i16x8.extract_lane_s $push[[L5:[0-9]+]]=, $1, 7{{$}}
; SIMD128-NEXT: i16x8.extract_lane_u $push[[L5:[0-9]+]]=, $1, 7{{$}}
; SIMD128-NEXT: i32.const $push[[M4:[0-9]+]]=, 15{{$}}
; SIMD128-NEXT: i32.and $push[[M5:[0-9]+]]=, $pop[[L5]], $pop[[M4]]{{$}}
; SIMD128-NEXT: i32.shr_u $push[[M6:[0-9]+]]=, $pop[[L4]], $pop[[M5]]{{$}}

View File

@ -1,5 +1,4 @@
; RUN: llc < %s -asm-verbose=false -verify-machineinstrs -wasm-keep-registers -disable-wasm-fallthrough-return-opt -wasm-disable-explicit-locals -mattr=+unimplemented-simd128 | FileCheck %s --check-prefixes CHECK,SIMD128
; RUN: llc < %s -asm-verbose=false -verify-machineinstrs -wasm-keep-registers -disable-wasm-fallthrough-return-opt -wasm-disable-explicit-locals -mattr=+simd128 | FileCheck %s --check-prefixes CHECK,SIMD128-VM
; RUN: llc < %s -asm-verbose=false -verify-machineinstrs -wasm-keep-registers -disable-wasm-fallthrough-return-opt -wasm-disable-explicit-locals | FileCheck %s --check-prefixes CHECK,NO-SIMD128
; Test that vector sign extensions lower to shifts
@ -7,55 +6,133 @@
target datalayout = "e-m:e-p:32:32-i64:64-n32:64-S128"
target triple = "wasm32-unknown-unknown"
; CHECK-LABEL: sext_inreg_v16i8:
; CHECK-LABEL: sext_v16i8:
; NO-SIMD128-NOT: i8x16
; SIMD128-NEXT: .functype sext_inreg_v16i8 (v128) -> (v128){{$}}
; SIMD128-NEXT: .functype sext_v16i8 (v128) -> (v128){{$}}
; SIMD128-NEXT: i32.const $push[[T0:[0-9]+]]=, 7{{$}}
; SIMD128-NEXT: i8x16.shl $push[[T1:[0-9]+]]=, $0, $pop[[T0]]{{$}}
; SIMD128-NEXT: i32.const $push[[T2:[0-9]+]]=, 7{{$}}
; SIMD128-NEXT: i8x16.shr_s $push[[R:[0-9]+]]=, $pop[[T1]], $pop[[T2]]{{$}}
; SIMD128-NEXT: return $pop[[R]]{{$}}
define <16 x i8> @sext_inreg_v16i8(<16 x i1> %x) {
define <16 x i8> @sext_v16i8(<16 x i1> %x) {
%res = sext <16 x i1> %x to <16 x i8>
ret <16 x i8> %res
}
; CHECK-LABEL: sext_inreg_v8i16:
; CHECK-LABEL: sext_v8i16:
; NO-SIMD128-NOT: i16x8
; SIMD128-NEXT: .functype sext_inreg_v8i16 (v128) -> (v128){{$}}
; SIMD128-NEXT: .functype sext_v8i16 (v128) -> (v128){{$}}
; SIMD128-NEXT: i32.const $push[[T0:[0-9]+]]=, 15{{$}}
; SIMD128-NEXT: i16x8.shl $push[[T1:[0-9]+]]=, $0, $pop[[T0]]{{$}}
; SIMD128-NEXT: i32.const $push[[T2:[0-9]+]]=, 15{{$}}
; SIMD128-NEXT: i16x8.shr_s $push[[R:[0-9]+]]=, $pop[[T1]], $pop[[T2]]{{$}}
; SIMD128-NEXT: return $pop[[R]]{{$}}
define <8 x i16> @sext_inreg_v8i16(<8 x i1> %x) {
define <8 x i16> @sext_v8i16(<8 x i1> %x) {
%res = sext <8 x i1> %x to <8 x i16>
ret <8 x i16> %res
}
; CHECK-LABEL: sext_inreg_v4i32:
; CHECK-LABEL: sext_v4i32:
; NO-SIMD128-NOT: i32x4
; SIMD128-NEXT: .functype sext_inreg_v4i32 (v128) -> (v128){{$}}
; SIMD128-NEXT: .functype sext_v4i32 (v128) -> (v128){{$}}
; SIMD128-NEXT: i32.const $push[[T0:[0-9]+]]=, 31{{$}}
; SIMD128-NEXT: i32x4.shl $push[[T1:[0-9]+]]=, $0, $pop[[T0]]{{$}}
; SIMD128-NEXT: i32.const $push[[T2:[0-9]+]]=, 31{{$}}
; SIMD128-NEXT: i32x4.shr_s $push[[R:[0-9]+]]=, $pop[[T1]], $pop[[T2]]{{$}}
; SIMD128-NEXT: return $pop[[R]]{{$}}
define <4 x i32> @sext_inreg_v4i32(<4 x i1> %x) {
define <4 x i32> @sext_v4i32(<4 x i1> %x) {
%res = sext <4 x i1> %x to <4 x i32>
ret <4 x i32> %res
}
; CHECK-LABEL: sext_inreg_v2i64:
; CHECK-LABEL: sext_v2i64:
; NO-SIMD128-NOT: i64x2
; SDIM128-VM-NOT: i64x2
; SIMD128-NEXT: .functype sext_inreg_v2i64 (v128) -> (v128){{$}}
; SIMD128-NEXT: .functype sext_v2i64 (v128) -> (v128){{$}}
; SIMD128-NEXT: i32.const $push[[T0:[0-9]+]]=, 63{{$}}
; SIMD128-NEXT: i64x2.shl $push[[T1:[0-9]+]]=, $0, $pop[[T0]]{{$}}
; SIMD128-NEXT: i32.const $push[[T2:[0-9]+]]=, 63{{$}}
; SIMD128-NEXT: i64x2.shr_s $push[[R:[0-9]+]]=, $pop[[T1]], $pop[[T2]]{{$}}
; SIMD128-NEXT: return $pop[[R]]{{$}}
define <2 x i64> @sext_inreg_v2i64(<2 x i1> %x) {
define <2 x i64> @sext_v2i64(<2 x i1> %x) {
%res = sext <2 x i1> %x to <2 x i64>
ret <2 x i64> %res
}
; CHECK-LABEL: sext_inreg_i8_to_i16:
; SIMD128-NEXT: .functype sext_inreg_i8_to_i16 (v128) -> (i32){{$}}
; SIMD128-NEXT: i8x16.extract_lane_s $push[[R:[0-9]+]]=, $0, 2{{$}}
; SIMD128-NEXT: return $pop[[R]]{{$}}
define i16 @sext_inreg_i8_to_i16(<8 x i16> %x) {
%lane = extractelement <8 x i16> %x, i32 1
%a = shl i16 %lane, 8
%res = ashr i16 %a, 8
ret i16 %res
}
; CHECK-LABEL: sext_inreg_i8_to_i32:
; SIMD128-NEXT: .functype sext_inreg_i8_to_i32 (v128) -> (i32){{$}}
; SIMD128-NEXT: i8x16.extract_lane_s $push[[R:[0-9]+]]=, $0, 4{{$}}
; SIMD128-NEXT: return $pop[[R]]{{$}}
define i32 @sext_inreg_i8_to_i32(<4 x i32> %x) {
%lane = extractelement <4 x i32> %x, i32 1
%a = shl i32 %lane, 24
%res = ashr i32 %a, 24
ret i32 %res
}
; CHECK-LABEL: sext_inreg_i16_to_i32:
; SIMD128-NEXT: .functype sext_inreg_i16_to_i32 (v128) -> (i32){{$}}
; SIMD128-NEXT: i16x8.extract_lane_s $push[[R:[0-9]+]]=, $0, 2{{$}}
; SIMD128-NEXT: return $pop[[R]]{{$}}
define i32 @sext_inreg_i16_to_i32(<4 x i32> %x) {
%lane = extractelement <4 x i32> %x, i32 1
%a = shl i32 %lane, 16
%res = ashr i32 %a, 16
ret i32 %res
}
; CHECK-LABEL: sext_inreg_i8_to_i64:
; SIMD128-NEXT: .functype sext_inreg_i8_to_i64 (v128) -> (i64){{$}}
; SIMD128-NEXT: i64x2.extract_lane $push[[T0:[0-9]+]]=, $0, 1{{$}}
; SIMD128-NEXT: i64.const $push[[T1:[0-9]+]]=, 56{{$}}
; SIMD128-NEXT: i64.shl $push[[T2:[0-9]+]]=, $pop[[T0]], $pop[[T1]]{{$}}
; SIMD128-NEXT: i64.const $push[[T3:[0-9]+]]=, 56{{$}}
; SIMD128-NEXT: i64.shr_s $push[[R:[0-9]+]]=, $pop[[T2]], $pop[[T3]]{{$}}
; SIMD128-NEXT: return $pop[[R]]{{$}}
define i64 @sext_inreg_i8_to_i64(<2 x i64> %x) {
%lane = extractelement <2 x i64> %x, i32 1
%a = shl i64 %lane, 56
%res = ashr i64 %a, 56
ret i64 %res
}
; CHECK-LABEL: sext_inreg_i16_to_i64:
; SIMD128-NEXT: .functype sext_inreg_i16_to_i64 (v128) -> (i64){{$}}
; SIMD128-NEXT: i64x2.extract_lane $push[[T0:[0-9]+]]=, $0, 1{{$}}
; SIMD128-NEXT: i64.const $push[[T1:[0-9]+]]=, 48{{$}}
; SIMD128-NEXT: i64.shl $push[[T2:[0-9]+]]=, $pop[[T0]], $pop[[T1]]{{$}}
; SIMD128-NEXT: i64.const $push[[T3:[0-9]+]]=, 48{{$}}
; SIMD128-NEXT: i64.shr_s $push[[R:[0-9]+]]=, $pop[[T2]], $pop[[T3]]{{$}}
; SIMD128-NEXT: return $pop[[R]]{{$}}
define i64 @sext_inreg_i16_to_i64(<2 x i64> %x) {
%lane = extractelement <2 x i64> %x, i32 1
%a = shl i64 %lane, 48
%res = ashr i64 %a, 48
ret i64 %res
}
; CHECK-LABEL: sext_inreg_i32_to_i64:
; NO-SIMD128-NOT: i64x2
; SIMD128-NEXT: .functype sext_inreg_i32_to_i64 (v128) -> (i64){{$}}
; SIMD128-NEXT: i64x2.extract_lane $push[[T0:[0-9]+]]=, $0, 1{{$}}
; SIMD128-NEXT: i64.const $push[[T1:[0-9]+]]=, 32{{$}}
; SIMD128-NEXT: i64.shl $push[[T2:[0-9]+]]=, $pop[[T0]], $pop[[T1]]{{$}}
; SIMD128-NEXT: i64.const $push[[T3:[0-9]+]]=, 32{{$}}
; SIMD128-NEXT: i64.shr_s $push[[R:[0-9]+]]=, $pop[[T2]], $pop[[T3]]{{$}}
; SIMD128-NEXT: return $pop[[R]]{{$}}
define i64 @sext_inreg_i32_to_i64(<2 x i64> %x) {
%lane = extractelement <2 x i64> %x, i32 1
%a = shl i64 %lane, 32
%res = ashr i64 %a, 32
ret i64 %res
}

View File

@ -127,7 +127,7 @@ define i32 @extract_undef_v16i8_u(<16 x i8> %v) {
; CHECK-LABEL: extract_v16i8:
; NO-SIMD128-NOT: i8x16
; SIMD128-NEXT: .functype extract_v16i8 (v128) -> (i32){{$}}
; SIMD128-NEXT: i8x16.extract_lane_s $push[[R:[0-9]+]]=, $0, 13{{$}}
; SIMD128-NEXT: i8x16.extract_lane_u $push[[R:[0-9]+]]=, $0, 13{{$}}
; SIMD128-NEXT: return $pop[[R]]{{$}}
define i8 @extract_v16i8(<16 x i8> %v) {
%elem = extractelement <16 x i8> %v, i8 13
@ -155,7 +155,7 @@ define i8 @extract_var_v16i8(<16 x i8> %v, i32 %i) {
; CHECK-LABEL: extract_undef_v16i8:
; NO-SIMD128-NOT: i8x16
; SIMD128-NEXT: .functype extract_undef_v16i8 (v128) -> (i32){{$}}
; SIMD128-NEXT: i8x16.extract_lane_s $push[[R:[0-9]+]]=, $0, 0{{$}}
; SIMD128-NEXT: i8x16.extract_lane_u $push[[R:[0-9]+]]=, $0, 0{{$}}
; SIMD128-NEXT: return $pop[[R]]{{$}}
define i8 @extract_undef_v16i8(<16 x i8> %v) {
%elem = extractelement <16 x i8> %v, i8 undef
@ -393,7 +393,7 @@ define i32 @extract_undef_v8i16_u(<8 x i16> %v) {
; CHECK-LABEL: extract_v8i16:
; NO-SIMD128-NOT: i16x8
; SIMD128-NEXT: .functype extract_v8i16 (v128) -> (i32){{$}}
; SIMD128-NEXT: i16x8.extract_lane_s $push[[R:[0-9]+]]=, $0, 5{{$}}
; SIMD128-NEXT: i16x8.extract_lane_u $push[[R:[0-9]+]]=, $0, 5{{$}}
; SIMD128-NEXT: return $pop[[R]]{{$}}
define i16 @extract_v8i16(<8 x i16> %v) {
%elem = extractelement <8 x i16> %v, i16 5
@ -423,7 +423,7 @@ define i16 @extract_var_v8i16(<8 x i16> %v, i32 %i) {
; CHECK-LABEL: extract_undef_v8i16:
; NO-SIMD128-NOT: i16x8
; SIMD128-NEXT: .functype extract_undef_v8i16 (v128) -> (i32){{$}}
; SIMD128-NEXT: i16x8.extract_lane_s $push[[R:[0-9]+]]=, $0, 0{{$}}
; SIMD128-NEXT: i16x8.extract_lane_u $push[[R:[0-9]+]]=, $0, 0{{$}}
; SIMD128-NEXT: return $pop[[R]]{{$}}
define i16 @extract_undef_v8i16(<8 x i16> %v) {
%elem = extractelement <8 x i16> %v, i16 undef