[WebAssembly] Use v128.const instead of splats for constants

We previously used splats instead of v128.const to materialize vector constants
because V8 did not support v128.const. Now that V8 supports v128.const, we can
use v128.const instead. Although this increases code size, it should also
increase performance (or at least require fewer engine-side optimizations), so
it is an appropriate change to make.

Differential Revision: https://reviews.llvm.org/D100716
This commit is contained in:
Thomas Lively 2021-04-19 12:43:58 -07:00
parent 6c5b0d6bea
commit e657c84fa1
4 changed files with 29 additions and 47 deletions

View File

@ -1160,12 +1160,15 @@ defm "" : SIMDNarrow<I32x4, 133>;
// Use narrowing operations for truncating stores. Since the narrowing
// operations are saturating instead of truncating, we need to mask
// the stored values first.
// TODO: Use consts instead of splats
def store_v8i8_trunc_v8i16 :
OutPatFrag<(ops node:$val),
(EXTRACT_LANE_I64x2
(NARROW_U_I8x16
(AND (SPLAT_I32x4 (CONST_I32 0x00ff00ff)), node:$val),
(AND
(CONST_V128_I16x8
0x00ff, 0x00ff, 0x00ff, 0x00ff,
0x00ff, 0x00ff, 0x00ff, 0x00ff),
node:$val),
$val), // Unused input
0)>;
@ -1173,7 +1176,10 @@ def store_v4i16_trunc_v4i32 :
OutPatFrag<(ops node:$val),
(EXTRACT_LANE_I64x2
(NARROW_U_I16x8
(AND (SPLAT_I32x4 (CONST_I32 0x0000ffff)), node:$val),
(AND
(CONST_V128_I32x4
0x0000ffff, 0x0000ffff, 0x0000ffff, 0x0000ffff),
node:$val),
$val), // Unused input
0)>;

View File

@ -121,14 +121,9 @@ static void convertImplicitDefToConstZero(MachineInstr *MI,
Type::getDoubleTy(MF.getFunction().getContext())));
MI->addOperand(MachineOperand::CreateFPImm(Val));
} else if (RegClass == &WebAssembly::V128RegClass) {
// TODO: Replace this with v128.const 0 once that is supported in V8
Register TempReg = MRI.createVirtualRegister(&WebAssembly::I32RegClass);
MI->setDesc(TII->get(WebAssembly::SPLAT_I32x4));
MI->addOperand(MachineOperand::CreateReg(TempReg, false));
MachineInstr *Const = BuildMI(*MI->getParent(), MI, MI->getDebugLoc(),
TII->get(WebAssembly::CONST_I32), TempReg)
.addImm(0);
LIS.InsertMachineInstrInMaps(*Const);
MI->setDesc(TII->get(WebAssembly::CONST_V128_I64x2));
MI->addOperand(MachineOperand::CreateImm(0));
MI->addOperand(MachineOperand::CreateImm(0));
} else {
llvm_unreachable("Unexpected reg class");
}

View File

@ -97,10 +97,7 @@ X: ; preds = %0, C
}
; CHECK-LABEL: implicit_def_v4i32:
; CHECK: i32.const $push{{[0-9]+}}=, 0{{$}}
; CHECK: i32.const $push{{[0-9]+}}=, 0{{$}}
; CHECK: i32.const $push[[L0:[0-9]+]]=, 0{{$}}
; CHECK-NEXT: i32x4.splat $push[[R:[0-9]+]]=, $pop[[L0]]
; CHECK: v128.const $push[[R:[0-9]+]]=, 0, 0{{$}}
; CHECK-NEXT: return $pop[[R]]{{$}}
define <4 x i32> @implicit_def_v4i32() {
br i1 undef, label %A, label %X

View File

@ -923,8 +923,7 @@ define void @store_narrowing_v8i16(<8 x i8> %v, <8 x i8>* %p) {
; CHECK: .functype store_narrowing_v8i16 (v128, i32) -> ()
; CHECK-NEXT: # %bb.0:
; CHECK-NEXT: local.get 1
; CHECK-NEXT: i32.const 16711935
; CHECK-NEXT: i32x4.splat
; CHECK-NEXT: v128.const 255, 255, 255, 255, 255, 255, 255, 255
; CHECK-NEXT: local.get 0
; CHECK-NEXT: v128.and
; CHECK-NEXT: local.get 0
@ -956,8 +955,7 @@ define void @store_narrowing_v8i16_with_folded_offset(<8 x i8> %v, <8 x i8>* %p)
; CHECK: .functype store_narrowing_v8i16_with_folded_offset (v128, i32) -> ()
; CHECK-NEXT: # %bb.0:
; CHECK-NEXT: local.get 1
; CHECK-NEXT: i32.const 16711935
; CHECK-NEXT: i32x4.splat
; CHECK-NEXT: v128.const 255, 255, 255, 255, 255, 255, 255, 255
; CHECK-NEXT: local.get 0
; CHECK-NEXT: v128.and
; CHECK-NEXT: local.get 0
@ -990,8 +988,7 @@ define void @store_narrowing_v8i16_with_folded_gep_offset(<8 x i8> %v, <8 x i8>*
; CHECK: .functype store_narrowing_v8i16_with_folded_gep_offset (v128, i32) -> ()
; CHECK-NEXT: # %bb.0:
; CHECK-NEXT: local.get 1
; CHECK-NEXT: i32.const 16711935
; CHECK-NEXT: i32x4.splat
; CHECK-NEXT: v128.const 255, 255, 255, 255, 255, 255, 255, 255
; CHECK-NEXT: local.get 0
; CHECK-NEXT: v128.and
; CHECK-NEXT: local.get 0
@ -1026,8 +1023,7 @@ define void @store_narrowing_v8i16_with_unfolded_gep_negative_offset(<8 x i8> %v
; CHECK-NEXT: local.get 1
; CHECK-NEXT: i32.const -8
; CHECK-NEXT: i32.add
; CHECK-NEXT: i32.const 16711935
; CHECK-NEXT: i32x4.splat
; CHECK-NEXT: v128.const 255, 255, 255, 255, 255, 255, 255, 255
; CHECK-NEXT: local.get 0
; CHECK-NEXT: v128.and
; CHECK-NEXT: local.get 0
@ -1064,8 +1060,7 @@ define void @store_narrowing_v8i16_with_unfolded_offset(<8 x i8> %v, <8 x i8>* %
; CHECK-NEXT: local.get 1
; CHECK-NEXT: i32.const 16
; CHECK-NEXT: i32.add
; CHECK-NEXT: i32.const 16711935
; CHECK-NEXT: i32x4.splat
; CHECK-NEXT: v128.const 255, 255, 255, 255, 255, 255, 255, 255
; CHECK-NEXT: local.get 0
; CHECK-NEXT: v128.and
; CHECK-NEXT: local.get 0
@ -1102,8 +1097,7 @@ define void @store_narrowing_v8i16_with_unfolded_gep_offset(<8 x i8> %v, <8 x i8
; CHECK-NEXT: local.get 1
; CHECK-NEXT: i32.const 8
; CHECK-NEXT: i32.add
; CHECK-NEXT: i32.const 16711935
; CHECK-NEXT: i32x4.splat
; CHECK-NEXT: v128.const 255, 255, 255, 255, 255, 255, 255, 255
; CHECK-NEXT: local.get 0
; CHECK-NEXT: v128.and
; CHECK-NEXT: local.get 0
@ -1134,8 +1128,7 @@ define void @store_narrowing_v8i16_to_numeric_address(<8 x i8> %v, <8 x i8>* %p)
; CHECK: .functype store_narrowing_v8i16_to_numeric_address (v128, i32) -> ()
; CHECK-NEXT: # %bb.0:
; CHECK-NEXT: i32.const 0
; CHECK-NEXT: i32.const 16711935
; CHECK-NEXT: i32x4.splat
; CHECK-NEXT: v128.const 255, 255, 255, 255, 255, 255, 255, 255
; CHECK-NEXT: local.get 0
; CHECK-NEXT: v128.and
; CHECK-NEXT: local.get 0
@ -1165,8 +1158,7 @@ define void @store_narrowing_v8i16_to_global_address(<8 x i8> %v) {
; CHECK: .functype store_narrowing_v8i16_to_global_address (v128) -> ()
; CHECK-NEXT: # %bb.0:
; CHECK-NEXT: i32.const 0
; CHECK-NEXT: i32.const 16711935
; CHECK-NEXT: i32x4.splat
; CHECK-NEXT: v128.const 255, 255, 255, 255, 255, 255, 255, 255
; CHECK-NEXT: local.get 0
; CHECK-NEXT: v128.and
; CHECK-NEXT: local.get 0
@ -1753,8 +1745,7 @@ define void @store_narrowing_v4i32(<4 x i16> %v, <4 x i16>* %p) {
; CHECK: .functype store_narrowing_v4i32 (v128, i32) -> ()
; CHECK-NEXT: # %bb.0:
; CHECK-NEXT: local.get 1
; CHECK-NEXT: i32.const 65535
; CHECK-NEXT: i32x4.splat
; CHECK-NEXT: v128.const 65535, 65535, 65535, 65535
; CHECK-NEXT: local.get 0
; CHECK-NEXT: v128.and
; CHECK-NEXT: local.get 0
@ -1786,8 +1777,7 @@ define void @store_narrowing_v4i32_with_folded_offset(<4 x i16> %v, <4 x i16>* %
; CHECK: .functype store_narrowing_v4i32_with_folded_offset (v128, i32) -> ()
; CHECK-NEXT: # %bb.0:
; CHECK-NEXT: local.get 1
; CHECK-NEXT: i32.const 65535
; CHECK-NEXT: i32x4.splat
; CHECK-NEXT: v128.const 65535, 65535, 65535, 65535
; CHECK-NEXT: local.get 0
; CHECK-NEXT: v128.and
; CHECK-NEXT: local.get 0
@ -1820,8 +1810,7 @@ define void @store_narrowing_v4i32_with_folded_gep_offset(<4 x i16> %v, <4 x i16
; CHECK: .functype store_narrowing_v4i32_with_folded_gep_offset (v128, i32) -> ()
; CHECK-NEXT: # %bb.0:
; CHECK-NEXT: local.get 1
; CHECK-NEXT: i32.const 65535
; CHECK-NEXT: i32x4.splat
; CHECK-NEXT: v128.const 65535, 65535, 65535, 65535
; CHECK-NEXT: local.get 0
; CHECK-NEXT: v128.and
; CHECK-NEXT: local.get 0
@ -1856,8 +1845,7 @@ define void @store_narrowing_v4i32_with_unfolded_gep_negative_offset(<4 x i16> %
; CHECK-NEXT: local.get 1
; CHECK-NEXT: i32.const -8
; CHECK-NEXT: i32.add
; CHECK-NEXT: i32.const 65535
; CHECK-NEXT: i32x4.splat
; CHECK-NEXT: v128.const 65535, 65535, 65535, 65535
; CHECK-NEXT: local.get 0
; CHECK-NEXT: v128.and
; CHECK-NEXT: local.get 0
@ -1894,8 +1882,7 @@ define void @store_narrowing_v4i32_with_unfolded_offset(<4 x i16> %v, <4 x i16>*
; CHECK-NEXT: local.get 1
; CHECK-NEXT: i32.const 16
; CHECK-NEXT: i32.add
; CHECK-NEXT: i32.const 65535
; CHECK-NEXT: i32x4.splat
; CHECK-NEXT: v128.const 65535, 65535, 65535, 65535
; CHECK-NEXT: local.get 0
; CHECK-NEXT: v128.and
; CHECK-NEXT: local.get 0
@ -1932,8 +1919,7 @@ define void @store_narrowing_v4i32_with_unfolded_gep_offset(<4 x i16> %v, <4 x i
; CHECK-NEXT: local.get 1
; CHECK-NEXT: i32.const 8
; CHECK-NEXT: i32.add
; CHECK-NEXT: i32.const 65535
; CHECK-NEXT: i32x4.splat
; CHECK-NEXT: v128.const 65535, 65535, 65535, 65535
; CHECK-NEXT: local.get 0
; CHECK-NEXT: v128.and
; CHECK-NEXT: local.get 0
@ -1964,8 +1950,7 @@ define void @store_narrowing_v4i32_to_numeric_address(<4 x i16> %v) {
; CHECK: .functype store_narrowing_v4i32_to_numeric_address (v128) -> ()
; CHECK-NEXT: # %bb.0:
; CHECK-NEXT: i32.const 0
; CHECK-NEXT: i32.const 65535
; CHECK-NEXT: i32x4.splat
; CHECK-NEXT: v128.const 65535, 65535, 65535, 65535
; CHECK-NEXT: local.get 0
; CHECK-NEXT: v128.and
; CHECK-NEXT: local.get 0
@ -1995,8 +1980,7 @@ define void @store_narrowing_v4i32_to_global_address(<4 x i16> %v) {
; CHECK: .functype store_narrowing_v4i32_to_global_address (v128) -> ()
; CHECK-NEXT: # %bb.0:
; CHECK-NEXT: i32.const 0
; CHECK-NEXT: i32.const 65535
; CHECK-NEXT: i32x4.splat
; CHECK-NEXT: v128.const 65535, 65535, 65535, 65535
; CHECK-NEXT: local.get 0
; CHECK-NEXT: v128.and
; CHECK-NEXT: local.get 0