forked from OSchip/llvm-project
Reland "[WebAssembly] Emulate v128.const efficiently""
This reverts commit432e4e56d3
, which reverted542523a61a
. Two issues from the original commit have been fixed. First, MSVC does not like when std::array is initialized with only single braces, so this commit switches to using the more portable double braces. Second, there was a subtle endianness bug that prevented the original commit from working correctly on big-endian machines, which has been fixed by switching to using endianness-agnostic bit twiddling instead of type punning. Differential Revision: https://reviews.llvm.org/D88773
This commit is contained in:
parent
b3b4cda104
commit
72c628e835
|
@ -31,6 +31,7 @@
|
|||
#include "llvm/IR/IntrinsicsWebAssembly.h"
|
||||
#include "llvm/Support/Debug.h"
|
||||
#include "llvm/Support/ErrorHandling.h"
|
||||
#include "llvm/Support/MathExtras.h"
|
||||
#include "llvm/Support/raw_ostream.h"
|
||||
#include "llvm/Target/TargetOptions.h"
|
||||
using namespace llvm;
|
||||
|
@ -1565,6 +1566,7 @@ SDValue WebAssemblyTargetLowering::LowerBUILD_VECTOR(SDValue Op,
|
|||
};
|
||||
} else if (NumConstantLanes >= NumSplatLanes &&
|
||||
Subtarget->hasUnimplementedSIMD128()) {
|
||||
// If we support v128.const, emit it directly
|
||||
SmallVector<SDValue, 16> ConstLanes;
|
||||
for (const SDValue &Lane : Op->op_values()) {
|
||||
if (IsConstant(Lane)) {
|
||||
|
@ -1576,11 +1578,59 @@ SDValue WebAssemblyTargetLowering::LowerBUILD_VECTOR(SDValue Op,
|
|||
}
|
||||
}
|
||||
Result = DAG.getBuildVector(VecT, DL, ConstLanes);
|
||||
IsLaneConstructed = [&](size_t _, const SDValue &Lane) {
|
||||
IsLaneConstructed = [&IsConstant](size_t _, const SDValue &Lane) {
|
||||
return IsConstant(Lane);
|
||||
};
|
||||
}
|
||||
if (!Result) {
|
||||
} else if (NumConstantLanes >= NumSplatLanes && VecT.isInteger()) {
|
||||
// Otherwise, if this is an integer vector, pack the lane values together so
|
||||
// we can construct the 128-bit constant from a pair of i64s using a splat
|
||||
// followed by at most one i64x2.replace_lane. Also keep track of the lanes
|
||||
// that actually matter so we can avoid the replace_lane in more cases.
|
||||
std::array<uint64_t, 2> I64s{{0, 0}};
|
||||
std::array<uint64_t, 2> ConstLaneMasks{{0, 0}};
|
||||
size_t LaneBits = 128 / Lanes;
|
||||
size_t HalfLanes = Lanes / 2;
|
||||
for (size_t I = 0; I < Lanes; ++I) {
|
||||
const SDValue &Lane = Op.getOperand(I);
|
||||
if (IsConstant(Lane)) {
|
||||
// How much we need to shift Val to position it in an i64
|
||||
auto Shift = LaneBits * (I % HalfLanes);
|
||||
auto Mask = maskTrailingOnes<uint64_t>(LaneBits);
|
||||
auto Val = cast<ConstantSDNode>(Lane.getNode())->getZExtValue() & Mask;
|
||||
I64s[I / HalfLanes] |= Val << Shift;
|
||||
ConstLaneMasks[I / HalfLanes] |= Mask << Shift;
|
||||
}
|
||||
}
|
||||
// Check whether all constant lanes in the second half of the vector are
|
||||
// equivalent in the first half or vice versa to determine whether splatting
|
||||
// either side will be sufficient to materialize the constant. As a special
|
||||
// case, if the first and second halves have no constant lanes in common, we
|
||||
// can just combine them.
|
||||
bool FirstHalfSufficient = (I64s[0] & ConstLaneMasks[1]) == I64s[1];
|
||||
bool SecondHalfSufficient = (I64s[1] & ConstLaneMasks[0]) == I64s[0];
|
||||
bool CombinedSufficient = (ConstLaneMasks[0] & ConstLaneMasks[1]) == 0;
|
||||
|
||||
uint64_t Splatted;
|
||||
if (SecondHalfSufficient) {
|
||||
Splatted = I64s[1];
|
||||
} else if (CombinedSufficient) {
|
||||
Splatted = I64s[0] | I64s[1];
|
||||
} else {
|
||||
Splatted = I64s[0];
|
||||
}
|
||||
|
||||
Result = DAG.getSplatBuildVector(MVT::v2i64, DL,
|
||||
DAG.getConstant(Splatted, DL, MVT::i64));
|
||||
if (!FirstHalfSufficient && !SecondHalfSufficient && !CombinedSufficient) {
|
||||
Result = DAG.getNode(ISD::INSERT_VECTOR_ELT, DL, MVT::v2i64, Result,
|
||||
DAG.getConstant(I64s[1], DL, MVT::i64),
|
||||
DAG.getConstant(1, DL, MVT::i32));
|
||||
}
|
||||
Result = DAG.getBitcast(VecT, Result);
|
||||
IsLaneConstructed = [&IsConstant](size_t _, const SDValue &Lane) {
|
||||
return IsConstant(Lane);
|
||||
};
|
||||
} else {
|
||||
// Use a splat, but possibly a load_splat
|
||||
LoadSDNode *SplattedLoad;
|
||||
if ((SplattedLoad = dyn_cast<LoadSDNode>(SplatValue)) &&
|
||||
|
@ -1593,11 +1643,14 @@ SDValue WebAssemblyTargetLowering::LowerBUILD_VECTOR(SDValue Op,
|
|||
} else {
|
||||
Result = DAG.getSplatBuildVector(VecT, DL, SplatValue);
|
||||
}
|
||||
IsLaneConstructed = [&](size_t _, const SDValue &Lane) {
|
||||
IsLaneConstructed = [&SplatValue](size_t _, const SDValue &Lane) {
|
||||
return Lane == SplatValue;
|
||||
};
|
||||
}
|
||||
|
||||
assert(Result);
|
||||
assert(IsLaneConstructed);
|
||||
|
||||
// Add replace_lane instructions for any unhandled values
|
||||
for (size_t I = 0; I < Lanes; ++I) {
|
||||
const SDValue &Lane = Op->getOperand(I);
|
||||
|
|
|
@ -8,12 +8,95 @@
|
|||
target datalayout = "e-m:e-p:32:32-i64:64-n32:64-S128"
|
||||
target triple = "wasm32-unknown-unknown"
|
||||
|
||||
; CHECK-LABEL: emulated_const_trivial_splat:
|
||||
; CHECK-NEXT: .functype emulated_const_trivial_splat () -> (v128)
|
||||
; SIMD-VM-NEXT: i64.const $push0=, 8589934593
|
||||
; SIMD-VM-NEXT: i64x2.splat $push1=, $pop0
|
||||
; SIMD-VM-NEXT: return $pop1
|
||||
; UNIMP: v128.const
|
||||
define <4 x i32> @emulated_const_trivial_splat() {
|
||||
ret <4 x i32> <i32 1, i32 2, i32 1, i32 2>
|
||||
}
|
||||
|
||||
; CHECK-LABEL: emulated_const_first_sufficient:
|
||||
; CHECK-NEXT: .functype emulated_const_first_sufficient () -> (v128)
|
||||
; SIMD-VM-NEXT: i64.const $push0=, 8589934593
|
||||
; SIMD-VM-NEXT: i64x2.splat $push1=, $pop0
|
||||
; SIMD-VM-NEXT: return $pop1
|
||||
; UNIMP: v128.const
|
||||
define <4 x i32> @emulated_const_first_sufficient() {
|
||||
ret <4 x i32> <i32 1, i32 2, i32 undef, i32 2>
|
||||
}
|
||||
|
||||
; CHECK-LABEL: emulated_const_second_sufficient:
|
||||
; CHECK-NEXT: .functype emulated_const_second_sufficient () -> (v128)
|
||||
; SIMD-VM-NEXT: i64.const $push0=, 8589934593
|
||||
; SIMD-VM-NEXT: i64x2.splat $push1=, $pop0
|
||||
; SIMD-VM-NEXT: return $pop1
|
||||
; UNIMP: v128.const
|
||||
define <4 x i32> @emulated_const_second_sufficient() {
|
||||
ret <4 x i32> <i32 1, i32 undef, i32 1, i32 2>
|
||||
}
|
||||
|
||||
; CHECK-LABEL: emulated_const_combined_sufficient:
|
||||
; CHECK-NEXT: .functype emulated_const_combined_sufficient () -> (v128)
|
||||
; SIMD-VM-NEXT: i64.const $push0=, 8589934593
|
||||
; SIMD-VM-NEXT: i64x2.splat $push1=, $pop0
|
||||
; SIMD-VM-NEXT: return $pop1
|
||||
; UNIMP: v128.const
|
||||
define <4 x i32> @emulated_const_combined_sufficient() {
|
||||
ret <4 x i32> <i32 1, i32 undef, i32 undef, i32 2>
|
||||
}
|
||||
|
||||
; CHECK-LABEL: emulated_const_either_sufficient:
|
||||
; CHECK-NEXT: .functype emulated_const_either_sufficient () -> (v128)
|
||||
; SIMD-VM-NEXT: i64.const $push0=, 1
|
||||
; SIMD-VM-NEXT: i64x2.splat $push1=, $pop0
|
||||
; SIMD-VM-NEXT: return $pop1
|
||||
; UNIMP: v128.const
|
||||
define <4 x i32> @emulated_const_either_sufficient() {
|
||||
ret <4 x i32> <i32 1, i32 undef, i32 1, i32 undef>
|
||||
}
|
||||
|
||||
; CHECK-LABEL: emulated_const_neither_sufficient:
|
||||
; CHECK-NEXT: .functype emulated_const_neither_sufficient () -> (v128)
|
||||
; SIMD-VM-NEXT: i64.const $push0=, 8589934593
|
||||
; SIMD-VM-NEXT: i64x2.splat $push1=, $pop0
|
||||
; SIMD-VM-NEXT: i64.const $push2=, 17179869184
|
||||
; SIMD-VM-NEXT: i64x2.replace_lane $push3=, $pop1, 1, $pop2
|
||||
; SIMD-VM-NEXT: return $pop3
|
||||
define <4 x i32> @emulated_const_neither_sufficient() {
|
||||
ret <4 x i32> <i32 1, i32 2, i32 undef, i32 4>
|
||||
}
|
||||
|
||||
; CHECK-LABEL: emulated_const_combined_sufficient_large:
|
||||
; CHECK-NEXT: .functype emulated_const_combined_sufficient_large () -> (v128)
|
||||
; SIMD-VM-NEXT: i64.const $push0=, 506097522914230528
|
||||
; SIMD-VM-NEXT: i64x2.splat $push1=, $pop0
|
||||
; SIMD-VM-NEXT: return $pop1
|
||||
define <16 x i8> @emulated_const_combined_sufficient_large() {
|
||||
ret <16 x i8> <i8 0, i8 undef, i8 2, i8 undef, i8 4, i8 undef, i8 6, i8 undef,
|
||||
i8 undef, i8 1, i8 undef, i8 3, i8 undef, i8 5, i8 undef, i8 7>
|
||||
}
|
||||
|
||||
; CHECK-LABEL: emulated_const_neither_sufficient_large:
|
||||
; CHECK-NEXT: .functype emulated_const_neither_sufficient_large () -> (v128)
|
||||
; SIMD-VM-NEXT: i64.const $push0=, -70368726997663744
|
||||
; SIMD-VM-NEXT: i64x2.splat $push1=, $pop0
|
||||
; SIMD-VM-NEXT: i64.const $push2=, 504408655873966336
|
||||
; SIMD-VM-NEXT: i64x2.replace_lane $push3=, $pop1, 1, $pop2
|
||||
; SIMD-VM-NEXT: return $pop3
|
||||
define <16 x i8> @emulated_const_neither_sufficient_large() {
|
||||
ret <16 x i8> <i8 0, i8 undef, i8 2, i8 undef, i8 4, i8 undef, i8 6, i8 255,
|
||||
i8 undef, i8 1, i8 undef, i8 3, i8 undef, i8 5, i8 undef, i8 7>
|
||||
}
|
||||
|
||||
; CHECK-LABEL: same_const_one_replaced_i16x8:
|
||||
; CHECK-NEXT: .functype same_const_one_replaced_i16x8 (i32) -> (v128)
|
||||
; UNIMP-NEXT: v128.const $push[[L0:[0-9]+]]=, 42, 42, 42, 42, 42, 0, 42, 42
|
||||
; UNIMP-NEXT: i16x8.replace_lane $push[[L1:[0-9]+]]=, $pop[[L0]], 5, $0
|
||||
; UNIMP-NEXT: return $pop[[L1]]
|
||||
; SIMD-VM: i16x8.splat
|
||||
; SIMD-VM: i64x2.splat
|
||||
define <8 x i16> @same_const_one_replaced_i16x8(i16 %x) {
|
||||
%v = insertelement
|
||||
<8 x i16> <i16 42, i16 42, i16 42, i16 42, i16 42, i16 42, i16 42, i16 42>,
|
||||
|
@ -27,7 +110,7 @@ define <8 x i16> @same_const_one_replaced_i16x8(i16 %x) {
|
|||
; UNIMP-NEXT: v128.const $push[[L0:[0-9]+]]=, 1, -2, 3, -4, 5, 0, 7, -8
|
||||
; UNIMP-NEXT: i16x8.replace_lane $push[[L1:[0-9]+]]=, $pop[[L0]], 5, $0
|
||||
; UNIMP-NEXT: return $pop[[L1]]
|
||||
; SIMD-VM: i16x8.splat
|
||||
; SIMD-VM: i64x2.splat
|
||||
define <8 x i16> @different_const_one_replaced_i16x8(i16 %x) {
|
||||
%v = insertelement
|
||||
<8 x i16> <i16 1, i16 -2, i16 3, i16 -4, i16 5, i16 -6, i16 7, i16 -8>,
|
||||
|
@ -68,7 +151,7 @@ define <4 x float> @different_const_one_replaced_f32x4(float %x) {
|
|||
; CHECK-NEXT: .functype splat_common_const_i32x4 () -> (v128)
|
||||
; UNIMP-NEXT: v128.const $push[[L0:[0-9]+]]=, 0, 3, 3, 1
|
||||
; UNIMP-NEXT: return $pop[[L0]]
|
||||
; SIMD-VM: i32x4.splat
|
||||
; SIMD-VM: i64x2.splat
|
||||
define <4 x i32> @splat_common_const_i32x4() {
|
||||
ret <4 x i32> <i32 undef, i32 3, i32 3, i32 1>
|
||||
}
|
||||
|
@ -206,7 +289,7 @@ define <16 x i8> @mashup_swizzle_i8x16(<16 x i8> %src, <16 x i8> %mask, i8 %spla
|
|||
; UNIMP: i8x16.replace_lane
|
||||
; UNIMP: i8x16.replace_lane
|
||||
; UNIMP: return
|
||||
; SIMD-VM: i8x16.splat
|
||||
; SIMD-VM: i64x2.splat
|
||||
define <16 x i8> @mashup_const_i8x16(<16 x i8> %src, <16 x i8> %mask, i8 %splatted) {
|
||||
; swizzle 0
|
||||
%m0 = extractelement <16 x i8> %mask, i32 0
|
||||
|
|
Loading…
Reference in New Issue