forked from OSchip/llvm-project
[WebAssembly] Codegen for v128.load{32,64}_zero
Replace the experimental clang builtins and LLVM intrinsics for these instructions with normal instruction selection patterns. The wasm_simd128.h intrinsics header was already using portable code for the corresponding intrinsics, so now it produces the correct instructions. Differential Revision: https://reviews.llvm.org/D106400
This commit is contained in:
parent
e23ff55931
commit
1a57ee1276
|
@ -192,8 +192,5 @@ TARGET_BUILTIN(__builtin_wasm_narrow_u_i16x8_i32x4, "V8UsV4iV4i", "nc", "simd128
|
|||
TARGET_BUILTIN(__builtin_wasm_trunc_sat_zero_s_f64x2_i32x4, "V4iV2d", "nc", "simd128")
|
||||
TARGET_BUILTIN(__builtin_wasm_trunc_sat_zero_u_f64x2_i32x4, "V4UiV2d", "nc", "simd128")
|
||||
|
||||
TARGET_BUILTIN(__builtin_wasm_load32_zero, "V4iiC*", "n", "simd128")
|
||||
TARGET_BUILTIN(__builtin_wasm_load64_zero, "V2LLiLLiC*", "n", "simd128")
|
||||
|
||||
#undef BUILTIN
|
||||
#undef TARGET_BUILTIN
|
||||
|
|
|
@ -17967,16 +17967,6 @@ Value *CodeGenFunction::EmitWebAssemblyBuiltinExpr(unsigned BuiltinID,
|
|||
Builder.getInt32(2), Builder.getInt32(3)});
|
||||
return Builder.CreateShuffleVector(Trunc, Splat, ConcatMask);
|
||||
}
|
||||
case WebAssembly::BI__builtin_wasm_load32_zero: {
|
||||
Value *Ptr = EmitScalarExpr(E->getArg(0));
|
||||
Function *Callee = CGM.getIntrinsic(Intrinsic::wasm_load32_zero);
|
||||
return Builder.CreateCall(Callee, {Ptr});
|
||||
}
|
||||
case WebAssembly::BI__builtin_wasm_load64_zero: {
|
||||
Value *Ptr = EmitScalarExpr(E->getArg(0));
|
||||
Function *Callee = CGM.getIntrinsic(Intrinsic::wasm_load64_zero);
|
||||
return Builder.CreateCall(Callee, {Ptr});
|
||||
}
|
||||
case WebAssembly::BI__builtin_wasm_shuffle_i8x16: {
|
||||
Value *Ops[18];
|
||||
size_t OpIdx = 0;
|
||||
|
|
|
@ -836,18 +836,6 @@ u32x4 trunc_sat_zero_u_f64x2_i32x4(f64x2 x) {
|
|||
// WEBASSEMBLY: ret <4 x i32> %1
|
||||
}
|
||||
|
||||
i32x4 load32_zero(const int *p) {
|
||||
return __builtin_wasm_load32_zero(p);
|
||||
// WEBASSEMBLY: call <4 x i32> @llvm.wasm.load32.zero(i32* %p)
|
||||
// WEBASSEMBLY: ret
|
||||
}
|
||||
|
||||
i64x2 load64_zero(const long long *p) {
|
||||
return __builtin_wasm_load64_zero(p);
|
||||
// WEBASSEMBLY: call <2 x i64> @llvm.wasm.load64.zero(i64* %p)
|
||||
// WEBASSEMBLY: ret
|
||||
}
|
||||
|
||||
i8x16 swizzle_i8x16(i8x16 x, i8x16 y) {
|
||||
return __builtin_wasm_swizzle_i8x16(x, y);
|
||||
// WEBASSEMBLY: call <16 x i8> @llvm.wasm.swizzle(<16 x i8> %x, <16 x i8> %y)
|
||||
|
|
|
@ -172,20 +172,6 @@ def int_wasm_pmax :
|
|||
[LLVMMatchType<0>, LLVMMatchType<0>],
|
||||
[IntrNoMem, IntrSpeculatable]>;
|
||||
|
||||
// TODO: Replace these intrinsic with normal ISel patterns once the
|
||||
// load_zero instructions are merged to the proposal.
|
||||
def int_wasm_load32_zero :
|
||||
Intrinsic<[llvm_v4i32_ty],
|
||||
[LLVMPointerType<llvm_i32_ty>],
|
||||
[IntrReadMem, IntrArgMemOnly],
|
||||
"", [SDNPMemOperand]>;
|
||||
|
||||
def int_wasm_load64_zero :
|
||||
Intrinsic<[llvm_v2i64_ty],
|
||||
[LLVMPointerType<llvm_i64_ty>],
|
||||
[IntrReadMem, IntrArgMemOnly],
|
||||
"", [SDNPMemOperand]>;
|
||||
|
||||
// TODO: Replace this intrinsic with normal ISel patterns once popcnt is merged
|
||||
// to the proposal.
|
||||
def int_wasm_popcnt :
|
||||
|
|
|
@ -758,15 +758,6 @@ bool WebAssemblyTargetLowering::getTgtMemIntrinsic(IntrinsicInfo &Info,
|
|||
Info.align = Align(8);
|
||||
Info.flags = MachineMemOperand::MOVolatile | MachineMemOperand::MOLoad;
|
||||
return true;
|
||||
case Intrinsic::wasm_load32_zero:
|
||||
case Intrinsic::wasm_load64_zero:
|
||||
Info.opc = ISD::INTRINSIC_W_CHAIN;
|
||||
Info.memVT = Intrinsic == Intrinsic::wasm_load32_zero ? MVT::i32 : MVT::i64;
|
||||
Info.ptrVal = I.getArgOperand(0);
|
||||
Info.offset = 0;
|
||||
Info.align = Align(1);
|
||||
Info.flags = MachineMemOperand::MOLoad;
|
||||
return true;
|
||||
default:
|
||||
return false;
|
||||
}
|
||||
|
|
|
@ -264,19 +264,19 @@ multiclass SIMDLoadZero<Vec vec, bits<32> simdop> {
|
|||
} // mayLoad = 1, UseNamedOperandTable = 1
|
||||
}
|
||||
|
||||
// TODO: Also support v4f32 and v2f64 once the instructions are merged
|
||||
// to the proposal
|
||||
defm "" : SIMDLoadZero<I32x4, 0x5c>;
|
||||
defm "" : SIMDLoadZero<I64x2, 0x5d>;
|
||||
|
||||
// TODO: f32x4 and f64x2 as well
|
||||
foreach vec = [I32x4, I64x2] in {
|
||||
defvar loadpat = !cast<Intrinsic>("int_wasm_load"#vec.lane_bits#"_zero");
|
||||
defvar inst = "LOAD_ZERO_"#vec;
|
||||
defm : LoadPatNoOffset<vec.vt, loadpat, inst>;
|
||||
defm : LoadPatImmOff<vec.vt, loadpat, regPlusImm, inst>;
|
||||
defm : LoadPatImmOff<vec.vt, loadpat, or_is_add, inst>;
|
||||
defm : LoadPatOffsetOnly<vec.vt, loadpat, inst>;
|
||||
defm : LoadPatGlobalAddrOffOnly<vec.vt, loadpat, inst>;
|
||||
defvar inst = "LOAD_ZERO_"#vec;
|
||||
defvar pat = PatFrag<(ops node:$ptr),
|
||||
(vector_insert (vec.splat (vec.lane_vt 0)), (vec.lane_vt (load $ptr)), 0)>;
|
||||
defm : LoadPatNoOffset<vec.vt, pat, inst>;
|
||||
defm : LoadPatImmOff<vec.vt, pat, regPlusImm, inst>;
|
||||
defm : LoadPatImmOff<vec.vt, pat, or_is_add, inst>;
|
||||
defm : LoadPatOffsetOnly<vec.vt, pat, inst>;
|
||||
defm : LoadPatGlobalAddrOffOnly<vec.vt, pat, inst>;
|
||||
}
|
||||
|
||||
// Load lane
|
||||
|
|
|
@ -912,6 +912,56 @@ define void @store_lane_i32_a8(<4 x i32> %v, i32* %p) {
|
|||
ret void
|
||||
}
|
||||
|
||||
define <4 x i32> @load_zero_i32_a1(i32* %p) {
|
||||
; CHECK-LABEL: load_zero_i32_a1:
|
||||
; CHECK: .functype load_zero_i32_a1 (i32) -> (v128)
|
||||
; CHECK-NEXT: # %bb.0:
|
||||
; CHECK-NEXT: local.get 0
|
||||
; CHECK-NEXT: v128.load32_zero 0:p2align=0
|
||||
; CHECK-NEXT: # fallthrough-return
|
||||
%x = load i32, i32* %p, align 1
|
||||
%v = insertelement <4 x i32> zeroinitializer, i32 %x, i32 0
|
||||
ret <4 x i32> %v
|
||||
}
|
||||
|
||||
define <4 x i32> @load_zero_i32_a2(i32* %p) {
|
||||
; CHECK-LABEL: load_zero_i32_a2:
|
||||
; CHECK: .functype load_zero_i32_a2 (i32) -> (v128)
|
||||
; CHECK-NEXT: # %bb.0:
|
||||
; CHECK-NEXT: local.get 0
|
||||
; CHECK-NEXT: v128.load32_zero 0:p2align=1
|
||||
; CHECK-NEXT: # fallthrough-return
|
||||
%x = load i32, i32* %p, align 2
|
||||
%v = insertelement <4 x i32> zeroinitializer, i32 %x, i32 0
|
||||
ret <4 x i32> %v
|
||||
}
|
||||
|
||||
; 4 is the default alignment for v128.load32_zero so no attribute is needed.
|
||||
define <4 x i32> @load_zero_i32_a4(i32* %p) {
|
||||
; CHECK-LABEL: load_zero_i32_a4:
|
||||
; CHECK: .functype load_zero_i32_a4 (i32) -> (v128)
|
||||
; CHECK-NEXT: # %bb.0:
|
||||
; CHECK-NEXT: local.get 0
|
||||
; CHECK-NEXT: v128.load32_zero 0
|
||||
; CHECK-NEXT: # fallthrough-return
|
||||
%x = load i32, i32* %p, align 4
|
||||
%v = insertelement <4 x i32> zeroinitializer, i32 %x, i32 0
|
||||
ret <4 x i32> %v
|
||||
}
|
||||
|
||||
; 8 is greater than the default alignment so it is ignored.
|
||||
define <4 x i32> @load_zero_i32_a8(i32* %p) {
|
||||
; CHECK-LABEL: load_zero_i32_a8:
|
||||
; CHECK: .functype load_zero_i32_a8 (i32) -> (v128)
|
||||
; CHECK-NEXT: # %bb.0:
|
||||
; CHECK-NEXT: local.get 0
|
||||
; CHECK-NEXT: v128.load32_zero 0
|
||||
; CHECK-NEXT: # fallthrough-return
|
||||
%x = load i32, i32* %p, align 8
|
||||
%v = insertelement <4 x i32> zeroinitializer, i32 %x, i32 0
|
||||
ret <4 x i32> %v
|
||||
}
|
||||
|
||||
; ==============================================================================
|
||||
; 2 x i64
|
||||
; ==============================================================================
|
||||
|
@ -1213,6 +1263,68 @@ define void @store_lane_i64_a16(<2 x i64> %v, i64* %p) {
|
|||
ret void
|
||||
}
|
||||
|
||||
define <2 x i64> @load_zero_i64_a1(i64* %p) {
|
||||
; CHECK-LABEL: load_zero_i64_a1:
|
||||
; CHECK: .functype load_zero_i64_a1 (i32) -> (v128)
|
||||
; CHECK-NEXT: # %bb.0:
|
||||
; CHECK-NEXT: local.get 0
|
||||
; CHECK-NEXT: v128.load64_zero 0:p2align=0
|
||||
; CHECK-NEXT: # fallthrough-return
|
||||
%x = load i64, i64* %p, align 1
|
||||
%v = insertelement <2 x i64> zeroinitializer, i64 %x, i32 0
|
||||
ret <2 x i64> %v
|
||||
}
|
||||
|
||||
define <2 x i64> @load_zero_i64_a2(i64* %p) {
|
||||
; CHECK-LABEL: load_zero_i64_a2:
|
||||
; CHECK: .functype load_zero_i64_a2 (i32) -> (v128)
|
||||
; CHECK-NEXT: # %bb.0:
|
||||
; CHECK-NEXT: local.get 0
|
||||
; CHECK-NEXT: v128.load64_zero 0:p2align=1
|
||||
; CHECK-NEXT: # fallthrough-return
|
||||
%x = load i64, i64* %p, align 2
|
||||
%v = insertelement <2 x i64> zeroinitializer, i64 %x, i32 0
|
||||
ret <2 x i64> %v
|
||||
}
|
||||
|
||||
define <2 x i64> @load_zero_i64_a4(i64* %p) {
|
||||
; CHECK-LABEL: load_zero_i64_a4:
|
||||
; CHECK: .functype load_zero_i64_a4 (i32) -> (v128)
|
||||
; CHECK-NEXT: # %bb.0:
|
||||
; CHECK-NEXT: local.get 0
|
||||
; CHECK-NEXT: v128.load64_zero 0:p2align=2
|
||||
; CHECK-NEXT: # fallthrough-return
|
||||
%x = load i64, i64* %p, align 4
|
||||
%v = insertelement <2 x i64> zeroinitializer, i64 %x, i32 0
|
||||
ret <2 x i64> %v
|
||||
}
|
||||
|
||||
; 8 is the default alignment for v128.load64_zero so no attribute is needed.
|
||||
define <2 x i64> @load_zero_i64_a8(i64* %p) {
|
||||
; CHECK-LABEL: load_zero_i64_a8:
|
||||
; CHECK: .functype load_zero_i64_a8 (i32) -> (v128)
|
||||
; CHECK-NEXT: # %bb.0:
|
||||
; CHECK-NEXT: local.get 0
|
||||
; CHECK-NEXT: v128.load64_zero 0
|
||||
; CHECK-NEXT: # fallthrough-return
|
||||
%x = load i64, i64* %p, align 8
|
||||
%v = insertelement <2 x i64> zeroinitializer, i64 %x, i32 0
|
||||
ret <2 x i64> %v
|
||||
}
|
||||
|
||||
; 16 is greater than the default alignment so it is ignored.
|
||||
define <2 x i64> @load_zero_i64_a16(i64* %p) {
|
||||
; CHECK-LABEL: load_zero_i64_a16:
|
||||
; CHECK: .functype load_zero_i64_a16 (i32) -> (v128)
|
||||
; CHECK-NEXT: # %bb.0:
|
||||
; CHECK-NEXT: local.get 0
|
||||
; CHECK-NEXT: v128.load64_zero 0
|
||||
; CHECK-NEXT: # fallthrough-return
|
||||
%x = load i64, i64* %p, align 16
|
||||
%v = insertelement <2 x i64> zeroinitializer, i64 %x, i32 0
|
||||
ret <2 x i64> %v
|
||||
}
|
||||
|
||||
; ==============================================================================
|
||||
; 4 x float
|
||||
; ==============================================================================
|
||||
|
|
|
@ -5,9 +5,6 @@
|
|||
|
||||
target triple = "wasm32-unknown-unknown"
|
||||
|
||||
declare <4 x i32> @llvm.wasm.load32.zero(i32*)
|
||||
declare <2 x i64> @llvm.wasm.load64.zero(i64*)
|
||||
|
||||
;===----------------------------------------------------------------------------
|
||||
; v128.load32_zero
|
||||
;===----------------------------------------------------------------------------
|
||||
|
@ -17,9 +14,10 @@ define <4 x i32> @load_zero_i32_no_offset(i32* %p) {
|
|||
; CHECK: .functype load_zero_i32_no_offset (i32) -> (v128)
|
||||
; CHECK-NEXT: # %bb.0:
|
||||
; CHECK-NEXT: local.get 0
|
||||
; CHECK-NEXT: v128.load32_zero 0:p2align=0
|
||||
; CHECK-NEXT: v128.load32_zero 0
|
||||
; CHECK-NEXT: # fallthrough-return
|
||||
%v = tail call <4 x i32> @llvm.wasm.load32.zero(i32* %p)
|
||||
%x = load i32, i32* %p
|
||||
%v = insertelement <4 x i32> zeroinitializer, i32 %x, i32 0
|
||||
ret <4 x i32> %v
|
||||
}
|
||||
|
||||
|
@ -28,12 +26,13 @@ define <4 x i32> @load_zero_i32_with_folded_offset(i32* %p) {
|
|||
; CHECK: .functype load_zero_i32_with_folded_offset (i32) -> (v128)
|
||||
; CHECK-NEXT: # %bb.0:
|
||||
; CHECK-NEXT: local.get 0
|
||||
; CHECK-NEXT: v128.load32_zero 24:p2align=0
|
||||
; CHECK-NEXT: v128.load32_zero 24
|
||||
; CHECK-NEXT: # fallthrough-return
|
||||
%q = ptrtoint i32* %p to i32
|
||||
%r = add nuw i32 %q, 24
|
||||
%s = inttoptr i32 %r to i32*
|
||||
%t = tail call <4 x i32> @llvm.wasm.load32.zero(i32* %s)
|
||||
%x = load i32, i32* %s
|
||||
%t = insertelement <4 x i32> zeroinitializer, i32 %x, i32 0
|
||||
ret <4 x i32> %t
|
||||
}
|
||||
|
||||
|
@ -42,10 +41,11 @@ define <4 x i32> @load_zero_i32_with_folded_gep_offset(i32* %p) {
|
|||
; CHECK: .functype load_zero_i32_with_folded_gep_offset (i32) -> (v128)
|
||||
; CHECK-NEXT: # %bb.0:
|
||||
; CHECK-NEXT: local.get 0
|
||||
; CHECK-NEXT: v128.load32_zero 24:p2align=0
|
||||
; CHECK-NEXT: v128.load32_zero 24
|
||||
; CHECK-NEXT: # fallthrough-return
|
||||
%s = getelementptr inbounds i32, i32* %p, i32 6
|
||||
%t = tail call <4 x i32> @llvm.wasm.load32.zero(i32* %s)
|
||||
%x = load i32, i32* %s
|
||||
%t = insertelement <4 x i32> zeroinitializer, i32 %x, i32 0
|
||||
ret <4 x i32> %t
|
||||
}
|
||||
|
||||
|
@ -56,10 +56,11 @@ define <4 x i32> @load_zero_i32_with_unfolded_gep_negative_offset(i32* %p) {
|
|||
; CHECK-NEXT: local.get 0
|
||||
; CHECK-NEXT: i32.const -24
|
||||
; CHECK-NEXT: i32.add
|
||||
; CHECK-NEXT: v128.load32_zero 0:p2align=0
|
||||
; CHECK-NEXT: v128.load32_zero 0
|
||||
; CHECK-NEXT: # fallthrough-return
|
||||
%s = getelementptr inbounds i32, i32* %p, i32 -6
|
||||
%t = tail call <4 x i32> @llvm.wasm.load32.zero(i32* %s)
|
||||
%x = load i32, i32* %s
|
||||
%t = insertelement <4 x i32> zeroinitializer, i32 %x, i32 0
|
||||
ret <4 x i32> %t
|
||||
}
|
||||
|
||||
|
@ -70,12 +71,13 @@ define <4 x i32> @load_zero_i32_with_unfolded_offset(i32* %p) {
|
|||
; CHECK-NEXT: local.get 0
|
||||
; CHECK-NEXT: i32.const 24
|
||||
; CHECK-NEXT: i32.add
|
||||
; CHECK-NEXT: v128.load32_zero 0:p2align=0
|
||||
; CHECK-NEXT: v128.load32_zero 0
|
||||
; CHECK-NEXT: # fallthrough-return
|
||||
%q = ptrtoint i32* %p to i32
|
||||
%r = add nsw i32 %q, 24
|
||||
%s = inttoptr i32 %r to i32*
|
||||
%t = tail call <4 x i32> @llvm.wasm.load32.zero(i32* %s)
|
||||
%x = load i32, i32* %s
|
||||
%t = insertelement <4 x i32> zeroinitializer, i32 %x, i32 0
|
||||
ret <4 x i32> %t
|
||||
}
|
||||
|
||||
|
@ -86,10 +88,11 @@ define <4 x i32> @load_zero_i32_with_unfolded_gep_offset(i32* %p) {
|
|||
; CHECK-NEXT: local.get 0
|
||||
; CHECK-NEXT: i32.const 24
|
||||
; CHECK-NEXT: i32.add
|
||||
; CHECK-NEXT: v128.load32_zero 0:p2align=0
|
||||
; CHECK-NEXT: v128.load32_zero 0
|
||||
; CHECK-NEXT: # fallthrough-return
|
||||
%s = getelementptr i32, i32* %p, i32 6
|
||||
%t = tail call <4 x i32> @llvm.wasm.load32.zero(i32* %s)
|
||||
%x = load i32, i32* %s
|
||||
%t = insertelement <4 x i32> zeroinitializer, i32 %x, i32 0
|
||||
ret <4 x i32> %t
|
||||
}
|
||||
|
||||
|
@ -98,10 +101,11 @@ define <4 x i32> @load_zero_i32_from_numeric_address() {
|
|||
; CHECK: .functype load_zero_i32_from_numeric_address () -> (v128)
|
||||
; CHECK-NEXT: # %bb.0:
|
||||
; CHECK-NEXT: i32.const 0
|
||||
; CHECK-NEXT: v128.load32_zero 42:p2align=0
|
||||
; CHECK-NEXT: v128.load32_zero 42
|
||||
; CHECK-NEXT: # fallthrough-return
|
||||
%s = inttoptr i32 42 to i32*
|
||||
%t = tail call <4 x i32> @llvm.wasm.load32.zero(i32* %s)
|
||||
%x = load i32, i32* %s
|
||||
%t = insertelement <4 x i32> zeroinitializer, i32 %x, i32 0
|
||||
ret <4 x i32> %t
|
||||
}
|
||||
|
||||
|
@ -111,9 +115,10 @@ define <4 x i32> @load_zero_i32_from_global_address() {
|
|||
; CHECK: .functype load_zero_i32_from_global_address () -> (v128)
|
||||
; CHECK-NEXT: # %bb.0:
|
||||
; CHECK-NEXT: i32.const 0
|
||||
; CHECK-NEXT: v128.load32_zero gv_i32:p2align=0
|
||||
; CHECK-NEXT: v128.load32_zero gv_i32
|
||||
; CHECK-NEXT: # fallthrough-return
|
||||
%t = tail call <4 x i32> @llvm.wasm.load32.zero(i32* @gv_i32)
|
||||
%x = load i32, i32* @gv_i32
|
||||
%t = insertelement <4 x i32> zeroinitializer, i32 %x, i32 0
|
||||
ret <4 x i32> %t
|
||||
}
|
||||
|
||||
|
@ -126,9 +131,10 @@ define <2 x i64> @load_zero_i64_no_offset(i64* %p) {
|
|||
; CHECK: .functype load_zero_i64_no_offset (i32) -> (v128)
|
||||
; CHECK-NEXT: # %bb.0:
|
||||
; CHECK-NEXT: local.get 0
|
||||
; CHECK-NEXT: v128.load64_zero 0:p2align=0
|
||||
; CHECK-NEXT: v128.load64_zero 0
|
||||
; CHECK-NEXT: # fallthrough-return
|
||||
%v = tail call <2 x i64> @llvm.wasm.load64.zero(i64* %p)
|
||||
%x = load i64, i64* %p
|
||||
%v = insertelement <2 x i64> zeroinitializer, i64 %x, i32 0
|
||||
ret <2 x i64> %v
|
||||
}
|
||||
|
||||
|
@ -137,12 +143,13 @@ define <2 x i64> @load_zero_i64_with_folded_offset(i64* %p) {
|
|||
; CHECK: .functype load_zero_i64_with_folded_offset (i32) -> (v128)
|
||||
; CHECK-NEXT: # %bb.0:
|
||||
; CHECK-NEXT: local.get 0
|
||||
; CHECK-NEXT: v128.load64_zero 24:p2align=0
|
||||
; CHECK-NEXT: v128.load64_zero 24
|
||||
; CHECK-NEXT: # fallthrough-return
|
||||
%q = ptrtoint i64* %p to i32
|
||||
%r = add nuw i32 %q, 24
|
||||
%s = inttoptr i32 %r to i64*
|
||||
%t = tail call <2 x i64> @llvm.wasm.load64.zero(i64* %s)
|
||||
%x = load i64, i64* %s
|
||||
%t = insertelement <2 x i64> zeroinitializer, i64 %x, i32 0
|
||||
ret <2 x i64> %t
|
||||
}
|
||||
|
||||
|
@ -151,10 +158,11 @@ define <2 x i64> @load_zero_i64_with_folded_gep_offset(i64* %p) {
|
|||
; CHECK: .functype load_zero_i64_with_folded_gep_offset (i32) -> (v128)
|
||||
; CHECK-NEXT: # %bb.0:
|
||||
; CHECK-NEXT: local.get 0
|
||||
; CHECK-NEXT: v128.load64_zero 48:p2align=0
|
||||
; CHECK-NEXT: v128.load64_zero 48
|
||||
; CHECK-NEXT: # fallthrough-return
|
||||
%s = getelementptr inbounds i64, i64* %p, i64 6
|
||||
%t = tail call <2 x i64> @llvm.wasm.load64.zero(i64* %s)
|
||||
%x = load i64, i64* %s
|
||||
%t = insertelement <2 x i64> zeroinitializer, i64 %x, i32 0
|
||||
ret <2 x i64> %t
|
||||
}
|
||||
|
||||
|
@ -165,10 +173,11 @@ define <2 x i64> @load_zero_i64_with_unfolded_gep_negative_offset(i64* %p) {
|
|||
; CHECK-NEXT: local.get 0
|
||||
; CHECK-NEXT: i32.const -48
|
||||
; CHECK-NEXT: i32.add
|
||||
; CHECK-NEXT: v128.load64_zero 0:p2align=0
|
||||
; CHECK-NEXT: v128.load64_zero 0
|
||||
; CHECK-NEXT: # fallthrough-return
|
||||
%s = getelementptr inbounds i64, i64* %p, i64 -6
|
||||
%t = tail call <2 x i64> @llvm.wasm.load64.zero(i64* %s)
|
||||
%x = load i64, i64* %s
|
||||
%t = insertelement <2 x i64> zeroinitializer, i64 %x, i32 0
|
||||
ret <2 x i64> %t
|
||||
}
|
||||
|
||||
|
@ -179,12 +188,13 @@ define <2 x i64> @load_zero_i64_with_unfolded_offset(i64* %p) {
|
|||
; CHECK-NEXT: local.get 0
|
||||
; CHECK-NEXT: i32.const 24
|
||||
; CHECK-NEXT: i32.add
|
||||
; CHECK-NEXT: v128.load64_zero 0:p2align=0
|
||||
; CHECK-NEXT: v128.load64_zero 0
|
||||
; CHECK-NEXT: # fallthrough-return
|
||||
%q = ptrtoint i64* %p to i32
|
||||
%r = add nsw i32 %q, 24
|
||||
%s = inttoptr i32 %r to i64*
|
||||
%t = tail call <2 x i64> @llvm.wasm.load64.zero(i64* %s)
|
||||
%x = load i64, i64* %s
|
||||
%t = insertelement <2 x i64> zeroinitializer, i64 %x, i32 0
|
||||
ret <2 x i64> %t
|
||||
}
|
||||
|
||||
|
@ -195,10 +205,11 @@ define <2 x i64> @load_zero_i64_with_unfolded_gep_offset(i64* %p) {
|
|||
; CHECK-NEXT: local.get 0
|
||||
; CHECK-NEXT: i32.const 48
|
||||
; CHECK-NEXT: i32.add
|
||||
; CHECK-NEXT: v128.load64_zero 0:p2align=0
|
||||
; CHECK-NEXT: v128.load64_zero 0
|
||||
; CHECK-NEXT: # fallthrough-return
|
||||
%s = getelementptr i64, i64* %p, i64 6
|
||||
%t = tail call <2 x i64> @llvm.wasm.load64.zero(i64* %s)
|
||||
%x = load i64, i64* %s
|
||||
%t = insertelement <2 x i64> zeroinitializer, i64 %x, i32 0
|
||||
ret <2 x i64> %t
|
||||
}
|
||||
|
||||
|
@ -207,10 +218,11 @@ define <2 x i64> @load_zero_i64_from_numeric_address() {
|
|||
; CHECK: .functype load_zero_i64_from_numeric_address () -> (v128)
|
||||
; CHECK-NEXT: # %bb.0:
|
||||
; CHECK-NEXT: i32.const 0
|
||||
; CHECK-NEXT: v128.load64_zero 42:p2align=0
|
||||
; CHECK-NEXT: v128.load64_zero 42
|
||||
; CHECK-NEXT: # fallthrough-return
|
||||
%s = inttoptr i32 42 to i64*
|
||||
%t = tail call <2 x i64> @llvm.wasm.load64.zero(i64* %s)
|
||||
%x = load i64, i64* %s
|
||||
%t = insertelement <2 x i64> zeroinitializer, i64 %x, i32 0
|
||||
ret <2 x i64> %t
|
||||
}
|
||||
|
||||
|
@ -220,8 +232,9 @@ define <2 x i64> @load_zero_i64_from_global_address() {
|
|||
; CHECK: .functype load_zero_i64_from_global_address () -> (v128)
|
||||
; CHECK-NEXT: # %bb.0:
|
||||
; CHECK-NEXT: i32.const 0
|
||||
; CHECK-NEXT: v128.load64_zero gv_i64:p2align=0
|
||||
; CHECK-NEXT: v128.load64_zero gv_i64
|
||||
; CHECK-NEXT: # fallthrough-return
|
||||
%t = tail call <2 x i64> @llvm.wasm.load64.zero(i64* @gv_i64)
|
||||
%x = load i64, i64* @gv_i64
|
||||
%t = insertelement <2 x i64> zeroinitializer, i64 %x, i32 0
|
||||
ret <2 x i64> %t
|
||||
}
|
||||
|
|
Loading…
Reference in New Issue