forked from OSchip/llvm-project
[WebAssembly] Implement prototype SIMD rounding instructions
Summary: As specified in https://github.com/WebAssembly/simd/pull/232. These instructions are implemented as LLVM intrinsics for now rather than normal ISel patterns to make these instructions opt-in. Once the instructions are merged to the spec proposal, the intrinsics will be replaced with proper ISel patterns. Reviewers: aheejin Subscribers: dschuff, sbc100, jgravelle-google, hiraditya, sunfish, cfe-commits, llvm-commits Tags: #clang, #llvm Differential Revision: https://reviews.llvm.org/D81222
This commit is contained in:
parent
9c2e770034
commit
b7d369280b
|
@ -146,6 +146,15 @@ TARGET_BUILTIN(__builtin_wasm_max_f64x2, "V2dV2dV2d", "nc", "simd128")
|
|||
TARGET_BUILTIN(__builtin_wasm_pmin_f64x2, "V2dV2dV2d", "nc", "simd128")
|
||||
TARGET_BUILTIN(__builtin_wasm_pmax_f64x2, "V2dV2dV2d", "nc", "simd128")
|
||||
|
||||
TARGET_BUILTIN(__builtin_wasm_ceil_f32x4, "V4fV4f", "nc", "simd128")
|
||||
TARGET_BUILTIN(__builtin_wasm_floor_f32x4, "V4fV4f", "nc", "simd128")
|
||||
TARGET_BUILTIN(__builtin_wasm_trunc_f32x4, "V4fV4f", "nc", "simd128")
|
||||
TARGET_BUILTIN(__builtin_wasm_nearest_f32x4, "V4fV4f", "nc", "simd128")
|
||||
TARGET_BUILTIN(__builtin_wasm_ceil_f64x2, "V2dV2d", "nc", "simd128")
|
||||
TARGET_BUILTIN(__builtin_wasm_floor_f64x2, "V2dV2d", "nc", "simd128")
|
||||
TARGET_BUILTIN(__builtin_wasm_trunc_f64x2, "V2dV2d", "nc", "simd128")
|
||||
TARGET_BUILTIN(__builtin_wasm_nearest_f64x2, "V2dV2d", "nc", "simd128")
|
||||
|
||||
TARGET_BUILTIN(__builtin_wasm_dot_s_i32x4_i16x8, "V4iV8sV8s", "nc", "simd128")
|
||||
|
||||
TARGET_BUILTIN(__builtin_wasm_sqrt_f32x4, "V4fV4f", "nc", "simd128")
|
||||
|
|
|
@ -15951,6 +15951,39 @@ Value *CodeGenFunction::EmitWebAssemblyBuiltinExpr(unsigned BuiltinID,
|
|||
CGM.getIntrinsic(Intrinsic::wasm_pmax, ConvertType(E->getType()));
|
||||
return Builder.CreateCall(Callee, {LHS, RHS});
|
||||
}
|
||||
case WebAssembly::BI__builtin_wasm_ceil_f32x4:
|
||||
case WebAssembly::BI__builtin_wasm_floor_f32x4:
|
||||
case WebAssembly::BI__builtin_wasm_trunc_f32x4:
|
||||
case WebAssembly::BI__builtin_wasm_nearest_f32x4:
|
||||
case WebAssembly::BI__builtin_wasm_ceil_f64x2:
|
||||
case WebAssembly::BI__builtin_wasm_floor_f64x2:
|
||||
case WebAssembly::BI__builtin_wasm_trunc_f64x2:
|
||||
case WebAssembly::BI__builtin_wasm_nearest_f64x2: {
|
||||
unsigned IntNo;
|
||||
switch (BuiltinID) {
|
||||
case WebAssembly::BI__builtin_wasm_ceil_f32x4:
|
||||
case WebAssembly::BI__builtin_wasm_ceil_f64x2:
|
||||
IntNo = Intrinsic::wasm_ceil;
|
||||
break;
|
||||
case WebAssembly::BI__builtin_wasm_floor_f32x4:
|
||||
case WebAssembly::BI__builtin_wasm_floor_f64x2:
|
||||
IntNo = Intrinsic::wasm_floor;
|
||||
break;
|
||||
case WebAssembly::BI__builtin_wasm_trunc_f32x4:
|
||||
case WebAssembly::BI__builtin_wasm_trunc_f64x2:
|
||||
IntNo = Intrinsic::wasm_trunc;
|
||||
break;
|
||||
case WebAssembly::BI__builtin_wasm_nearest_f32x4:
|
||||
case WebAssembly::BI__builtin_wasm_nearest_f64x2:
|
||||
IntNo = Intrinsic::wasm_nearest;
|
||||
break;
|
||||
default:
|
||||
llvm_unreachable("unexpected builtin ID");
|
||||
}
|
||||
Value *Value = EmitScalarExpr(E->getArg(0));
|
||||
Function *Callee = CGM.getIntrinsic(IntNo, ConvertType(E->getType()));
|
||||
return Builder.CreateCall(Callee, Value);
|
||||
}
|
||||
case WebAssembly::BI__builtin_wasm_swizzle_v8x16: {
|
||||
Value *Src = EmitScalarExpr(E->getArg(0));
|
||||
Value *Indices = EmitScalarExpr(E->getArg(1));
|
||||
|
|
|
@ -621,6 +621,54 @@ f64x2 pmax_f64x2(f64x2 x, f64x2 y) {
|
|||
// WEBASSEMBLY-NEXT: ret
|
||||
}
|
||||
|
||||
f32x4 ceil_f32x4(f32x4 x) {
|
||||
return __builtin_wasm_ceil_f32x4(x);
|
||||
// WEBASSEMBLY: call <4 x float> @llvm.wasm.ceil.v4f32(<4 x float> %x)
|
||||
// WEBASSEMBLY: ret
|
||||
}
|
||||
|
||||
f32x4 floor_f32x4(f32x4 x) {
|
||||
return __builtin_wasm_floor_f32x4(x);
|
||||
// WEBASSEMBLY: call <4 x float> @llvm.wasm.floor.v4f32(<4 x float> %x)
|
||||
// WEBASSEMBLY: ret
|
||||
}
|
||||
|
||||
f32x4 trunc_f32x4(f32x4 x) {
|
||||
return __builtin_wasm_trunc_f32x4(x);
|
||||
// WEBASSEMBLY: call <4 x float> @llvm.wasm.trunc.v4f32(<4 x float> %x)
|
||||
// WEBASSEMBLY: ret
|
||||
}
|
||||
|
||||
f32x4 nearest_f32x4(f32x4 x) {
|
||||
return __builtin_wasm_nearest_f32x4(x);
|
||||
// WEBASSEMBLY: call <4 x float> @llvm.wasm.nearest.v4f32(<4 x float> %x)
|
||||
// WEBASSEMBLY: ret
|
||||
}
|
||||
|
||||
f64x2 ceil_f64x2(f64x2 x) {
|
||||
return __builtin_wasm_ceil_f64x2(x);
|
||||
// WEBASSEMBLY: call <2 x double> @llvm.wasm.ceil.v2f64(<2 x double> %x)
|
||||
// WEBASSEMBLY: ret
|
||||
}
|
||||
|
||||
f64x2 floor_f64x2(f64x2 x) {
|
||||
return __builtin_wasm_floor_f64x2(x);
|
||||
// WEBASSEMBLY: call <2 x double> @llvm.wasm.floor.v2f64(<2 x double> %x)
|
||||
// WEBASSEMBLY: ret
|
||||
}
|
||||
|
||||
f64x2 trunc_f64x2(f64x2 x) {
|
||||
return __builtin_wasm_trunc_f64x2(x);
|
||||
// WEBASSEMBLY: call <2 x double> @llvm.wasm.trunc.v2f64(<2 x double> %x)
|
||||
// WEBASSEMBLY: ret
|
||||
}
|
||||
|
||||
f64x2 nearest_f64x2(f64x2 x) {
|
||||
return __builtin_wasm_nearest_f64x2(x);
|
||||
// WEBASSEMBLY: call <2 x double> @llvm.wasm.nearest.v2f64(<2 x double> %x)
|
||||
// WEBASSEMBLY: ret
|
||||
}
|
||||
|
||||
f32x4 sqrt_f32x4(f32x4 x) {
|
||||
return __builtin_wasm_sqrt_f32x4(x);
|
||||
// WEBASSEMBLY: call <4 x float> @llvm.sqrt.v4f32(<4 x float> %x)
|
||||
|
|
|
@ -176,8 +176,7 @@ def int_wasm_widen_high_unsigned :
|
|||
[llvm_anyvector_ty],
|
||||
[IntrNoMem, IntrSpeculatable]>;
|
||||
|
||||
// TODO: Replace these intrinsics with normal ISel patterns once the
|
||||
// pmin/pmax instructions are merged to the spec proposal.
|
||||
// TODO: Replace these intrinsics with normal ISel patterns
|
||||
def int_wasm_pmin :
|
||||
Intrinsic<[llvm_anyvector_ty],
|
||||
[LLVMMatchType<0>, LLVMMatchType<0>],
|
||||
|
@ -187,6 +186,26 @@ def int_wasm_pmax :
|
|||
[LLVMMatchType<0>, LLVMMatchType<0>],
|
||||
[IntrNoMem, IntrSpeculatable]>;
|
||||
|
||||
// TODO: Replace these instrinsics with normal ISel patterns once the
|
||||
// rounding instructions are merged to the proposal
|
||||
// (https://github.com/WebAssembly/simd/pull/232).
|
||||
def int_wasm_ceil :
|
||||
Intrinsic<[llvm_anyvector_ty],
|
||||
[LLVMMatchType<0>],
|
||||
[IntrNoMem, IntrSpeculatable]>;
|
||||
def int_wasm_floor :
|
||||
Intrinsic<[llvm_anyvector_ty],
|
||||
[LLVMMatchType<0>],
|
||||
[IntrNoMem, IntrSpeculatable]>;
|
||||
def int_wasm_trunc :
|
||||
Intrinsic<[llvm_anyvector_ty],
|
||||
[LLVMMatchType<0>],
|
||||
[IntrNoMem, IntrSpeculatable]>;
|
||||
def int_wasm_nearest :
|
||||
Intrinsic<[llvm_anyvector_ty],
|
||||
[LLVMMatchType<0>],
|
||||
[IntrNoMem, IntrSpeculatable]>;
|
||||
|
||||
//===----------------------------------------------------------------------===//
|
||||
// Bulk memory intrinsics
|
||||
//===----------------------------------------------------------------------===//
|
||||
|
|
|
@ -765,6 +765,16 @@ defm NEG : SIMDUnaryFP<fneg, "neg", 225>;
|
|||
// Square root: sqrt
|
||||
defm SQRT : SIMDUnaryFP<fsqrt, "sqrt", 227>;
|
||||
|
||||
// Rounding: ceil, floor, trunc, nearest
|
||||
defm CEIL : SIMDUnary<v4f32, "f32x4", int_wasm_ceil, "ceil", 216>;
|
||||
defm FLOOR : SIMDUnary<v4f32, "f32x4", int_wasm_floor, "floor", 217>;
|
||||
defm TRUNC: SIMDUnary<v4f32, "f32x4", int_wasm_trunc, "trunc", 218>;
|
||||
defm NEAREST: SIMDUnary<v4f32, "f32x4", int_wasm_nearest, "nearest", 219>;
|
||||
defm CEIL : SIMDUnary<v2f64, "f64x2", int_wasm_ceil, "ceil", 220>;
|
||||
defm FLOOR : SIMDUnary<v2f64, "f64x2", int_wasm_floor, "floor", 221>;
|
||||
defm TRUNC: SIMDUnary<v2f64, "f64x2", int_wasm_trunc, "trunc", 222>;
|
||||
defm NEAREST: SIMDUnary<v2f64, "f64x2", int_wasm_nearest, "nearest", 223>;
|
||||
|
||||
//===----------------------------------------------------------------------===//
|
||||
// Floating-point binary arithmetic
|
||||
//===----------------------------------------------------------------------===//
|
||||
|
|
|
@ -521,6 +521,46 @@ define <4 x float> @pmax_v4f32(<4 x float> %a, <4 x float> %b) {
|
|||
ret <4 x float> %v
|
||||
}
|
||||
|
||||
; CHECK-LABEL: ceil_v4f32:
|
||||
; SIMD128-NEXT: .functype ceil_v4f32 (v128) -> (v128){{$}}
|
||||
; SIMD128-NEXT: f32x4.ceil $push[[R:[0-9]+]]=, $0{{$}}
|
||||
; SIMD128-NEXT: return $pop[[R]]{{$}}
|
||||
declare <4 x float> @llvm.wasm.ceil.v4f32(<4 x float>)
|
||||
define <4 x float> @ceil_v4f32(<4 x float> %a) {
|
||||
%v = call <4 x float> @llvm.wasm.ceil.v4f32(<4 x float> %a)
|
||||
ret <4 x float> %v
|
||||
}
|
||||
|
||||
; CHECK-LABEL: floor_v4f32:
|
||||
; SIMD128-NEXT: .functype floor_v4f32 (v128) -> (v128){{$}}
|
||||
; SIMD128-NEXT: f32x4.floor $push[[R:[0-9]+]]=, $0{{$}}
|
||||
; SIMD128-NEXT: return $pop[[R]]{{$}}
|
||||
declare <4 x float> @llvm.wasm.floor.v4f32(<4 x float>)
|
||||
define <4 x float> @floor_v4f32(<4 x float> %a) {
|
||||
%v = call <4 x float> @llvm.wasm.floor.v4f32(<4 x float> %a)
|
||||
ret <4 x float> %v
|
||||
}
|
||||
|
||||
; CHECK-LABEL: trunc_v4f32:
|
||||
; SIMD128-NEXT: .functype trunc_v4f32 (v128) -> (v128){{$}}
|
||||
; SIMD128-NEXT: f32x4.trunc $push[[R:[0-9]+]]=, $0{{$}}
|
||||
; SIMD128-NEXT: return $pop[[R]]{{$}}
|
||||
declare <4 x float> @llvm.wasm.trunc.v4f32(<4 x float>)
|
||||
define <4 x float> @trunc_v4f32(<4 x float> %a) {
|
||||
%v = call <4 x float> @llvm.wasm.trunc.v4f32(<4 x float> %a)
|
||||
ret <4 x float> %v
|
||||
}
|
||||
|
||||
; CHECK-LABEL: nearest_v4f32:
|
||||
; SIMD128-NEXT: .functype nearest_v4f32 (v128) -> (v128){{$}}
|
||||
; SIMD128-NEXT: f32x4.nearest $push[[R:[0-9]+]]=, $0{{$}}
|
||||
; SIMD128-NEXT: return $pop[[R]]{{$}}
|
||||
declare <4 x float> @llvm.wasm.nearest.v4f32(<4 x float>)
|
||||
define <4 x float> @nearest_v4f32(<4 x float> %a) {
|
||||
%v = call <4 x float> @llvm.wasm.nearest.v4f32(<4 x float> %a)
|
||||
ret <4 x float> %v
|
||||
}
|
||||
|
||||
; CHECK-LABEL: qfma_v4f32:
|
||||
; SIMD128-NEXT: .functype qfma_v4f32 (v128, v128, v128) -> (v128){{$}}
|
||||
; SIMD128-NEXT: f32x4.qfma $push[[R:[0-9]+]]=, $0, $1, $2{{$}}
|
||||
|
@ -580,6 +620,46 @@ define <2 x double> @pmax_v2f64(<2 x double> %a, <2 x double> %b) {
|
|||
ret <2 x double> %v
|
||||
}
|
||||
|
||||
; CHECK-LABEL: ceil_v2f64:
|
||||
; SIMD128-NEXT: .functype ceil_v2f64 (v128) -> (v128){{$}}
|
||||
; SIMD128-NEXT: f64x2.ceil $push[[R:[0-9]+]]=, $0{{$}}
|
||||
; SIMD128-NEXT: return $pop[[R]]{{$}}
|
||||
declare <2 x double> @llvm.wasm.ceil.v2f64(<2 x double>)
|
||||
define <2 x double> @ceil_v2f64(<2 x double> %a) {
|
||||
%v = call <2 x double> @llvm.wasm.ceil.v2f64(<2 x double> %a)
|
||||
ret <2 x double> %v
|
||||
}
|
||||
|
||||
; CHECK-LABEL: floor_v2f64:
|
||||
; SIMD128-NEXT: .functype floor_v2f64 (v128) -> (v128){{$}}
|
||||
; SIMD128-NEXT: f64x2.floor $push[[R:[0-9]+]]=, $0{{$}}
|
||||
; SIMD128-NEXT: return $pop[[R]]{{$}}
|
||||
declare <2 x double> @llvm.wasm.floor.v2f64(<2 x double>)
|
||||
define <2 x double> @floor_v2f64(<2 x double> %a) {
|
||||
%v = call <2 x double> @llvm.wasm.floor.v2f64(<2 x double> %a)
|
||||
ret <2 x double> %v
|
||||
}
|
||||
|
||||
; CHECK-LABEL: trunc_v2f64:
|
||||
; SIMD128-NEXT: .functype trunc_v2f64 (v128) -> (v128){{$}}
|
||||
; SIMD128-NEXT: f64x2.trunc $push[[R:[0-9]+]]=, $0{{$}}
|
||||
; SIMD128-NEXT: return $pop[[R]]{{$}}
|
||||
declare <2 x double> @llvm.wasm.trunc.v2f64(<2 x double>)
|
||||
define <2 x double> @trunc_v2f64(<2 x double> %a) {
|
||||
%v = call <2 x double> @llvm.wasm.trunc.v2f64(<2 x double> %a)
|
||||
ret <2 x double> %v
|
||||
}
|
||||
|
||||
; CHECK-LABEL: nearest_v2f64:
|
||||
; SIMD128-NEXT: .functype nearest_v2f64 (v128) -> (v128){{$}}
|
||||
; SIMD128-NEXT: f64x2.nearest $push[[R:[0-9]+]]=, $0{{$}}
|
||||
; SIMD128-NEXT: return $pop[[R]]{{$}}
|
||||
declare <2 x double> @llvm.wasm.nearest.v2f64(<2 x double>)
|
||||
define <2 x double> @nearest_v2f64(<2 x double> %a) {
|
||||
%v = call <2 x double> @llvm.wasm.nearest.v2f64(<2 x double> %a)
|
||||
ret <2 x double> %v
|
||||
}
|
||||
|
||||
; CHECK-LABEL: qfma_v2f64:
|
||||
; SIMD128-NEXT: .functype qfma_v2f64 (v128, v128, v128) -> (v128){{$}}
|
||||
; SIMD128-NEXT: f64x2.qfma $push[[R:[0-9]+]]=, $0, $1, $2{{$}}
|
||||
|
|
|
@ -508,6 +508,30 @@ main:
|
|||
# CHECK: i64x2.mul # encoding: [0xfd,0xd5,0x01]
|
||||
i64x2.mul
|
||||
|
||||
# CHECK: f32x4.ceil # encoding: [0xfd,0xd8,0x01]
|
||||
f32x4.ceil
|
||||
|
||||
# CHECK: f32x4.floor # encoding: [0xfd,0xd9,0x01]
|
||||
f32x4.floor
|
||||
|
||||
# CHECK: f32x4.trunc # encoding: [0xfd,0xda,0x01]
|
||||
f32x4.trunc
|
||||
|
||||
# CHECK: f32x4.nearest # encoding: [0xfd,0xdb,0x01]
|
||||
f32x4.nearest
|
||||
|
||||
# CHECK: f64x2.ceil # encoding: [0xfd,0xdc,0x01]
|
||||
f64x2.ceil
|
||||
|
||||
# CHECK: f64x2.floor # encoding: [0xfd,0xdd,0x01]
|
||||
f64x2.floor
|
||||
|
||||
# CHECK: f64x2.trunc # encoding: [0xfd,0xde,0x01]
|
||||
f64x2.trunc
|
||||
|
||||
# CHECK: f64x2.nearest # encoding: [0xfd,0xdf,0x01]
|
||||
f64x2.nearest
|
||||
|
||||
# CHECK: f32x4.abs # encoding: [0xfd,0xe0,0x01]
|
||||
f32x4.abs
|
||||
|
||||
|
|
Loading…
Reference in New Issue