forked from OSchip/llvm-project
[WebAssembly] Expand SIMD shifts while V8's implementation disagrees
Summary: V8 currently implements SIMD shifts as taking an immediate operation, which disagrees with the spec proposal and the toolchain implementation. As a stopgap measure to get things working, unroll all vector shifts. Since this is a temporary measure, there are no tests. Reviewers: aheejin, dschuff Subscribers: sbc100, jgravelle-google, sunfish, dmgreen, llvm-commits Differential Revision: https://reviews.llvm.org/D56520 llvm-svn: 351151
This commit is contained in:
parent
33eb4d947d
commit
6bf2b40051
|
@ -1155,6 +1155,31 @@ WebAssemblyTargetLowering::LowerAccessVectorElement(SDValue Op,
|
|||
return SDValue();
|
||||
}
|
||||
|
||||
static SDValue UnrollVectorShift(SDValue Op, SelectionDAG &DAG) {
|
||||
EVT LaneT = Op.getSimpleValueType().getVectorElementType();
|
||||
// 32-bit and 64-bit unrolled shifts will have proper semantics
|
||||
if (LaneT.bitsGE(MVT::i32))
|
||||
return DAG.UnrollVectorOp(Op.getNode());
|
||||
// Otherwise mask the shift value to get proper semantics from 32-bit shift
|
||||
SDLoc DL(Op);
|
||||
SDValue ShiftVal = Op.getOperand(1);
|
||||
uint64_t MaskVal = LaneT.getSizeInBits() - 1;
|
||||
SDValue MaskedShiftVal = DAG.getNode(
|
||||
ISD::AND, // mask opcode
|
||||
DL, ShiftVal.getValueType(), // masked value type
|
||||
ShiftVal, // original shift value operand
|
||||
DAG.getConstant(MaskVal, DL, ShiftVal.getValueType()) // mask operand
|
||||
);
|
||||
|
||||
return DAG.UnrollVectorOp(
|
||||
DAG.getNode(Op.getOpcode(), // original shift opcode
|
||||
DL, Op.getValueType(), // original return type
|
||||
Op.getOperand(0), // original vector operand,
|
||||
MaskedShiftVal // new masked shift value operand
|
||||
)
|
||||
.getNode());
|
||||
}
|
||||
|
||||
SDValue WebAssemblyTargetLowering::LowerShift(SDValue Op,
|
||||
SelectionDAG &DAG) const {
|
||||
SDLoc DL(Op);
|
||||
|
@ -1162,12 +1187,17 @@ SDValue WebAssemblyTargetLowering::LowerShift(SDValue Op,
|
|||
// Only manually lower vector shifts
|
||||
assert(Op.getSimpleValueType().isVector());
|
||||
|
||||
// Expand all vector shifts until V8 fixes its implementation
|
||||
// TODO: remove this once V8 is fixed
|
||||
if (!Subtarget->hasUnimplementedSIMD128())
|
||||
return UnrollVectorShift(Op, DAG);
|
||||
|
||||
// Unroll non-splat vector shifts
|
||||
BuildVectorSDNode *ShiftVec;
|
||||
SDValue SplatVal;
|
||||
if (!(ShiftVec = dyn_cast<BuildVectorSDNode>(Op.getOperand(1).getNode())) ||
|
||||
!(SplatVal = ShiftVec->getSplatValue()))
|
||||
return DAG.UnrollVectorOp(Op.getNode());
|
||||
return UnrollVectorShift(Op, DAG);
|
||||
|
||||
// All splats except i64x2 const splats are handled by patterns
|
||||
ConstantSDNode *SplatConst = dyn_cast<ConstantSDNode>(SplatVal);
|
||||
|
|
|
@ -90,7 +90,11 @@ define <16 x i8> @shl_const_v16i8(<16 x i8> %v) {
|
|||
; NO-SIMD128-NOT: i8x16
|
||||
; SIMD128-NEXT: .functype shl_vec_v16i8 (v128, v128) -> (v128){{$}}
|
||||
; SIMD128-NEXT: i8x16.extract_lane_s $push[[L0:[0-9]+]]=, $0, 0{{$}}
|
||||
; SIMD128-NEXT: i8x16.extract_lane_u $push[[L1:[0-9]+]]=, $1, 0{{$}}
|
||||
; SIMD128-NEXT: i32.const $push[[M0:[0-9]+]]=, 7{{$}}
|
||||
; SIMD128-NEXT: i8x16.splat $push[[M1:[0-9]+]]=, $pop[[M0]]{{$}}
|
||||
; SIMD128-NEXT: v128.and $push[[M2:[0-9]+]]=, $1, $pop[[M1]]{{$}}
|
||||
; SIMD128-NEXT: local.tee $push[[M:[0-9]+]]=, $1=, $pop[[M2]]{{$}}
|
||||
; SIMD128-NEXT: i8x16.extract_lane_u $push[[L1:[0-9]+]]=, $pop[[M]], 0{{$}}
|
||||
; SIMD128-NEXT: i32.shl $push[[L2:[0-9]+]]=, $pop[[L0]], $pop[[L1]]{{$}}
|
||||
; SIMD128-NEXT: i8x16.splat $push[[L3:[0-9]+]]=, $pop[[L2]]{{$}}
|
||||
; Skip 14 lanes
|
||||
|
@ -122,7 +126,11 @@ define <16 x i8> @shr_s_v16i8(<16 x i8> %v, i8 %x) {
|
|||
; NO-SIMD128-NOT: i8x16
|
||||
; SIMD128-NEXT: .functype shr_s_vec_v16i8 (v128, v128) -> (v128){{$}}
|
||||
; SIMD128-NEXT: i8x16.extract_lane_s $push[[L0:[0-9]+]]=, $0, 0{{$}}
|
||||
; SIMD128-NEXT: i8x16.extract_lane_u $push[[L1:[0-9]+]]=, $1, 0{{$}}
|
||||
; SIMD128-NEXT: i32.const $push[[M0:[0-9]+]]=, 7{{$}}
|
||||
; SIMD128-NEXT: i8x16.splat $push[[M1:[0-9]+]]=, $pop[[M0]]{{$}}
|
||||
; SIMD128-NEXT: v128.and $push[[M2:[0-9]+]]=, $1, $pop[[M1]]{{$}}
|
||||
; SIMD128-NEXT: local.tee $push[[M:[0-9]+]]=, $1=, $pop[[M2]]{{$}}
|
||||
; SIMD128-NEXT: i8x16.extract_lane_u $push[[L1:[0-9]+]]=, $pop[[M]], 0{{$}}
|
||||
; SIMD128-NEXT: i32.shr_s $push[[L2:[0-9]+]]=, $pop[[L0]], $pop[[L1]]{{$}}
|
||||
; SIMD128-NEXT: i8x16.splat $push[[L3:[0-9]+]]=, $pop[[L2]]{{$}}
|
||||
; Skip 14 lanes
|
||||
|
@ -154,7 +162,11 @@ define <16 x i8> @shr_u_v16i8(<16 x i8> %v, i8 %x) {
|
|||
; NO-SIMD128-NOT: i8x16
|
||||
; SIMD128-NEXT: .functype shr_u_vec_v16i8 (v128, v128) -> (v128){{$}}
|
||||
; SIMD128-NEXT: i8x16.extract_lane_u $push[[L0:[0-9]+]]=, $0, 0{{$}}
|
||||
; SIMD128-NEXT: i8x16.extract_lane_u $push[[L1:[0-9]+]]=, $1, 0{{$}}
|
||||
; SIMD128-NEXT: i32.const $push[[M0:[0-9]+]]=, 7{{$}}
|
||||
; SIMD128-NEXT: i8x16.splat $push[[M1:[0-9]+]]=, $pop[[M0]]{{$}}
|
||||
; SIMD128-NEXT: v128.and $push[[M2:[0-9]+]]=, $1, $pop[[M1]]{{$}}
|
||||
; SIMD128-NEXT: local.tee $push[[M:[0-9]+]]=, $1=, $pop[[M2]]{{$}}
|
||||
; SIMD128-NEXT: i8x16.extract_lane_u $push[[L1:[0-9]+]]=, $pop[[M]], 0{{$}}
|
||||
; SIMD128-NEXT: i32.shr_u $push[[L2:[0-9]+]]=, $pop[[L0]], $pop[[L1]]{{$}}
|
||||
; SIMD128-NEXT: i8x16.splat $push[[L3:[0-9]+]]=, $pop[[L2]]{{$}}
|
||||
; Skip 14 lanes
|
||||
|
@ -304,7 +316,11 @@ define <8 x i16> @shl_const_v8i16(<8 x i16> %v) {
|
|||
; NO-SIMD128-NOT: i16x8
|
||||
; SIMD128-NEXT: .functype shl_vec_v8i16 (v128, v128) -> (v128){{$}}
|
||||
; SIMD128-NEXT: i16x8.extract_lane_s $push[[L0:[0-9]+]]=, $0, 0{{$}}
|
||||
; SIMD128-NEXT: i16x8.extract_lane_u $push[[L1:[0-9]+]]=, $1, 0{{$}}
|
||||
; SIMD128-NEXT: i32.const $push[[M0:[0-9]+]]=, 15{{$}}
|
||||
; SIMD128-NEXT: i16x8.splat $push[[M1:[0-9]+]]=, $pop[[M0]]{{$}}
|
||||
; SIMD128-NEXT: v128.and $push[[M2:[0-9]+]]=, $1, $pop[[M1]]{{$}}
|
||||
; SIMD128-NEXT: local.tee $push[[M:[0-9]+]]=, $1=, $pop[[M2]]{{$}}
|
||||
; SIMD128-NEXT: i16x8.extract_lane_u $push[[L1:[0-9]+]]=, $pop[[M]], 0{{$}}
|
||||
; SIMD128-NEXT: i32.shl $push[[L2:[0-9]+]]=, $pop[[L0]], $pop[[L1]]{{$}}
|
||||
; SIMD128-NEXT: i16x8.splat $push[[L3:[0-9]+]]=, $pop[[L2]]{{$}}
|
||||
; Skip 6 lanes
|
||||
|
@ -335,7 +351,11 @@ define <8 x i16> @shr_s_v8i16(<8 x i16> %v, i16 %x) {
|
|||
; NO-SIMD128-NOT: i16x8
|
||||
; SIMD128-NEXT: .functype shr_s_vec_v8i16 (v128, v128) -> (v128){{$}}
|
||||
; SIMD128-NEXT: i16x8.extract_lane_s $push[[L0:[0-9]+]]=, $0, 0{{$}}
|
||||
; SIMD128-NEXT: i16x8.extract_lane_u $push[[L1:[0-9]+]]=, $1, 0{{$}}
|
||||
; SIMD128-NEXT: i32.const $push[[M0:[0-9]+]]=, 15{{$}}
|
||||
; SIMD128-NEXT: i16x8.splat $push[[M1:[0-9]+]]=, $pop[[M0]]{{$}}
|
||||
; SIMD128-NEXT: v128.and $push[[M2:[0-9]+]]=, $1, $pop[[M1]]{{$}}
|
||||
; SIMD128-NEXT: local.tee $push[[M:[0-9]+]]=, $1=, $pop[[M2]]{{$}}
|
||||
; SIMD128-NEXT: i16x8.extract_lane_u $push[[L1:[0-9]+]]=, $pop[[M]], 0{{$}}
|
||||
; SIMD128-NEXT: i32.shr_s $push[[L2:[0-9]+]]=, $pop[[L0]], $pop[[L1]]{{$}}
|
||||
; SIMD128-NEXT: i16x8.splat $push[[L3:[0-9]+]]=, $pop[[L2]]{{$}}
|
||||
; Skip 6 lanes
|
||||
|
@ -366,7 +386,11 @@ define <8 x i16> @shr_u_v8i16(<8 x i16> %v, i16 %x) {
|
|||
; NO-SIMD128-NOT: i16x8
|
||||
; SIMD128-NEXT: .functype shr_u_vec_v8i16 (v128, v128) -> (v128){{$}}
|
||||
; SIMD128-NEXT: i16x8.extract_lane_u $push[[L0:[0-9]+]]=, $0, 0{{$}}
|
||||
; SIMD128-NEXT: i16x8.extract_lane_u $push[[L1:[0-9]+]]=, $1, 0{{$}}
|
||||
; SIMD128-NEXT: i32.const $push[[M0:[0-9]+]]=, 15{{$}}
|
||||
; SIMD128-NEXT: i16x8.splat $push[[M1:[0-9]+]]=, $pop[[M0]]{{$}}
|
||||
; SIMD128-NEXT: v128.and $push[[M2:[0-9]+]]=, $1, $pop[[M1]]{{$}}
|
||||
; SIMD128-NEXT: local.tee $push[[M:[0-9]+]]=, $1=, $pop[[M2]]{{$}}
|
||||
; SIMD128-NEXT: i16x8.extract_lane_u $push[[L1:[0-9]+]]=, $pop[[M]], 0{{$}}
|
||||
; SIMD128-NEXT: i32.shr_u $push[[L2:[0-9]+]]=, $pop[[L0]], $pop[[L1]]{{$}}
|
||||
; SIMD128-NEXT: i16x8.splat $push[[L3:[0-9]+]]=, $pop[[L2]]{{$}}
|
||||
; Skip 6 lanes
|
||||
|
|
Loading…
Reference in New Issue