[WebAssembly] Expand SIMD shifts while V8's implementation disagrees

Summary:
V8 currently implements SIMD shifts as taking an immediate operation,
which disagrees with the spec proposal and the toolchain
implementation. As a stopgap measure to get things working, unroll all
vector shifts. Since this is a temporary measure, there are no tests.

Reviewers: aheejin, dschuff

Subscribers: sbc100, jgravelle-google, sunfish, dmgreen, llvm-commits

Differential Revision: https://reviews.llvm.org/D56520

llvm-svn: 351151
This commit is contained in:
Thomas Lively 2019-01-15 02:16:03 +00:00
parent 33eb4d947d
commit 6bf2b40051
2 changed files with 61 additions and 7 deletions

View File

@ -1155,6 +1155,31 @@ WebAssemblyTargetLowering::LowerAccessVectorElement(SDValue Op,
return SDValue();
}
static SDValue UnrollVectorShift(SDValue Op, SelectionDAG &DAG) {
EVT LaneT = Op.getSimpleValueType().getVectorElementType();
// 32-bit and 64-bit unrolled shifts will have proper semantics
if (LaneT.bitsGE(MVT::i32))
return DAG.UnrollVectorOp(Op.getNode());
// Otherwise mask the shift value to get proper semantics from 32-bit shift
SDLoc DL(Op);
SDValue ShiftVal = Op.getOperand(1);
uint64_t MaskVal = LaneT.getSizeInBits() - 1;
SDValue MaskedShiftVal = DAG.getNode(
ISD::AND, // mask opcode
DL, ShiftVal.getValueType(), // masked value type
ShiftVal, // original shift value operand
DAG.getConstant(MaskVal, DL, ShiftVal.getValueType()) // mask operand
);
return DAG.UnrollVectorOp(
DAG.getNode(Op.getOpcode(), // original shift opcode
DL, Op.getValueType(), // original return type
Op.getOperand(0), // original vector operand,
MaskedShiftVal // new masked shift value operand
)
.getNode());
}
SDValue WebAssemblyTargetLowering::LowerShift(SDValue Op,
SelectionDAG &DAG) const {
SDLoc DL(Op);
@ -1162,12 +1187,17 @@ SDValue WebAssemblyTargetLowering::LowerShift(SDValue Op,
// Only manually lower vector shifts
assert(Op.getSimpleValueType().isVector());
// Expand all vector shifts until V8 fixes its implementation
// TODO: remove this once V8 is fixed
if (!Subtarget->hasUnimplementedSIMD128())
return UnrollVectorShift(Op, DAG);
// Unroll non-splat vector shifts
BuildVectorSDNode *ShiftVec;
SDValue SplatVal;
if (!(ShiftVec = dyn_cast<BuildVectorSDNode>(Op.getOperand(1).getNode())) ||
!(SplatVal = ShiftVec->getSplatValue()))
return DAG.UnrollVectorOp(Op.getNode());
return UnrollVectorShift(Op, DAG);
// All splats except i64x2 const splats are handled by patterns
ConstantSDNode *SplatConst = dyn_cast<ConstantSDNode>(SplatVal);

View File

@ -90,7 +90,11 @@ define <16 x i8> @shl_const_v16i8(<16 x i8> %v) {
; NO-SIMD128-NOT: i8x16
; SIMD128-NEXT: .functype shl_vec_v16i8 (v128, v128) -> (v128){{$}}
; SIMD128-NEXT: i8x16.extract_lane_s $push[[L0:[0-9]+]]=, $0, 0{{$}}
; SIMD128-NEXT: i8x16.extract_lane_u $push[[L1:[0-9]+]]=, $1, 0{{$}}
; SIMD128-NEXT: i32.const $push[[M0:[0-9]+]]=, 7{{$}}
; SIMD128-NEXT: i8x16.splat $push[[M1:[0-9]+]]=, $pop[[M0]]{{$}}
; SIMD128-NEXT: v128.and $push[[M2:[0-9]+]]=, $1, $pop[[M1]]{{$}}
; SIMD128-NEXT: local.tee $push[[M:[0-9]+]]=, $1=, $pop[[M2]]{{$}}
; SIMD128-NEXT: i8x16.extract_lane_u $push[[L1:[0-9]+]]=, $pop[[M]], 0{{$}}
; SIMD128-NEXT: i32.shl $push[[L2:[0-9]+]]=, $pop[[L0]], $pop[[L1]]{{$}}
; SIMD128-NEXT: i8x16.splat $push[[L3:[0-9]+]]=, $pop[[L2]]{{$}}
; Skip 14 lanes
@ -122,7 +126,11 @@ define <16 x i8> @shr_s_v16i8(<16 x i8> %v, i8 %x) {
; NO-SIMD128-NOT: i8x16
; SIMD128-NEXT: .functype shr_s_vec_v16i8 (v128, v128) -> (v128){{$}}
; SIMD128-NEXT: i8x16.extract_lane_s $push[[L0:[0-9]+]]=, $0, 0{{$}}
; SIMD128-NEXT: i8x16.extract_lane_u $push[[L1:[0-9]+]]=, $1, 0{{$}}
; SIMD128-NEXT: i32.const $push[[M0:[0-9]+]]=, 7{{$}}
; SIMD128-NEXT: i8x16.splat $push[[M1:[0-9]+]]=, $pop[[M0]]{{$}}
; SIMD128-NEXT: v128.and $push[[M2:[0-9]+]]=, $1, $pop[[M1]]{{$}}
; SIMD128-NEXT: local.tee $push[[M:[0-9]+]]=, $1=, $pop[[M2]]{{$}}
; SIMD128-NEXT: i8x16.extract_lane_u $push[[L1:[0-9]+]]=, $pop[[M]], 0{{$}}
; SIMD128-NEXT: i32.shr_s $push[[L2:[0-9]+]]=, $pop[[L0]], $pop[[L1]]{{$}}
; SIMD128-NEXT: i8x16.splat $push[[L3:[0-9]+]]=, $pop[[L2]]{{$}}
; Skip 14 lanes
@ -154,7 +162,11 @@ define <16 x i8> @shr_u_v16i8(<16 x i8> %v, i8 %x) {
; NO-SIMD128-NOT: i8x16
; SIMD128-NEXT: .functype shr_u_vec_v16i8 (v128, v128) -> (v128){{$}}
; SIMD128-NEXT: i8x16.extract_lane_u $push[[L0:[0-9]+]]=, $0, 0{{$}}
; SIMD128-NEXT: i8x16.extract_lane_u $push[[L1:[0-9]+]]=, $1, 0{{$}}
; SIMD128-NEXT: i32.const $push[[M0:[0-9]+]]=, 7{{$}}
; SIMD128-NEXT: i8x16.splat $push[[M1:[0-9]+]]=, $pop[[M0]]{{$}}
; SIMD128-NEXT: v128.and $push[[M2:[0-9]+]]=, $1, $pop[[M1]]{{$}}
; SIMD128-NEXT: local.tee $push[[M:[0-9]+]]=, $1=, $pop[[M2]]{{$}}
; SIMD128-NEXT: i8x16.extract_lane_u $push[[L1:[0-9]+]]=, $pop[[M]], 0{{$}}
; SIMD128-NEXT: i32.shr_u $push[[L2:[0-9]+]]=, $pop[[L0]], $pop[[L1]]{{$}}
; SIMD128-NEXT: i8x16.splat $push[[L3:[0-9]+]]=, $pop[[L2]]{{$}}
; Skip 14 lanes
@ -304,7 +316,11 @@ define <8 x i16> @shl_const_v8i16(<8 x i16> %v) {
; NO-SIMD128-NOT: i16x8
; SIMD128-NEXT: .functype shl_vec_v8i16 (v128, v128) -> (v128){{$}}
; SIMD128-NEXT: i16x8.extract_lane_s $push[[L0:[0-9]+]]=, $0, 0{{$}}
; SIMD128-NEXT: i16x8.extract_lane_u $push[[L1:[0-9]+]]=, $1, 0{{$}}
; SIMD128-NEXT: i32.const $push[[M0:[0-9]+]]=, 15{{$}}
; SIMD128-NEXT: i16x8.splat $push[[M1:[0-9]+]]=, $pop[[M0]]{{$}}
; SIMD128-NEXT: v128.and $push[[M2:[0-9]+]]=, $1, $pop[[M1]]{{$}}
; SIMD128-NEXT: local.tee $push[[M:[0-9]+]]=, $1=, $pop[[M2]]{{$}}
; SIMD128-NEXT: i16x8.extract_lane_u $push[[L1:[0-9]+]]=, $pop[[M]], 0{{$}}
; SIMD128-NEXT: i32.shl $push[[L2:[0-9]+]]=, $pop[[L0]], $pop[[L1]]{{$}}
; SIMD128-NEXT: i16x8.splat $push[[L3:[0-9]+]]=, $pop[[L2]]{{$}}
; Skip 6 lanes
@ -335,7 +351,11 @@ define <8 x i16> @shr_s_v8i16(<8 x i16> %v, i16 %x) {
; NO-SIMD128-NOT: i16x8
; SIMD128-NEXT: .functype shr_s_vec_v8i16 (v128, v128) -> (v128){{$}}
; SIMD128-NEXT: i16x8.extract_lane_s $push[[L0:[0-9]+]]=, $0, 0{{$}}
; SIMD128-NEXT: i16x8.extract_lane_u $push[[L1:[0-9]+]]=, $1, 0{{$}}
; SIMD128-NEXT: i32.const $push[[M0:[0-9]+]]=, 15{{$}}
; SIMD128-NEXT: i16x8.splat $push[[M1:[0-9]+]]=, $pop[[M0]]{{$}}
; SIMD128-NEXT: v128.and $push[[M2:[0-9]+]]=, $1, $pop[[M1]]{{$}}
; SIMD128-NEXT: local.tee $push[[M:[0-9]+]]=, $1=, $pop[[M2]]{{$}}
; SIMD128-NEXT: i16x8.extract_lane_u $push[[L1:[0-9]+]]=, $pop[[M]], 0{{$}}
; SIMD128-NEXT: i32.shr_s $push[[L2:[0-9]+]]=, $pop[[L0]], $pop[[L1]]{{$}}
; SIMD128-NEXT: i16x8.splat $push[[L3:[0-9]+]]=, $pop[[L2]]{{$}}
; Skip 6 lanes
@ -366,7 +386,11 @@ define <8 x i16> @shr_u_v8i16(<8 x i16> %v, i16 %x) {
; NO-SIMD128-NOT: i16x8
; SIMD128-NEXT: .functype shr_u_vec_v8i16 (v128, v128) -> (v128){{$}}
; SIMD128-NEXT: i16x8.extract_lane_u $push[[L0:[0-9]+]]=, $0, 0{{$}}
; SIMD128-NEXT: i16x8.extract_lane_u $push[[L1:[0-9]+]]=, $1, 0{{$}}
; SIMD128-NEXT: i32.const $push[[M0:[0-9]+]]=, 15{{$}}
; SIMD128-NEXT: i16x8.splat $push[[M1:[0-9]+]]=, $pop[[M0]]{{$}}
; SIMD128-NEXT: v128.and $push[[M2:[0-9]+]]=, $1, $pop[[M1]]{{$}}
; SIMD128-NEXT: local.tee $push[[M:[0-9]+]]=, $1=, $pop[[M2]]{{$}}
; SIMD128-NEXT: i16x8.extract_lane_u $push[[L1:[0-9]+]]=, $pop[[M]], 0{{$}}
; SIMD128-NEXT: i32.shr_u $push[[L2:[0-9]+]]=, $pop[[L0]], $pop[[L1]]{{$}}
; SIMD128-NEXT: i16x8.splat $push[[L3:[0-9]+]]=, $pop[[L2]]{{$}}
; Skip 6 lanes