forked from OSchip/llvm-project
[VE] FADD,FSUB,FMUL,FDIV v256f32|f64 isel and tests
Depends on D115940 for the `Binary_rv_vr_vv` pattern class op isel fragment used for divisions. Reviewed By: kaz7 Differential Revision: https://reviews.llvm.org/D116035
This commit is contained in:
parent
2926d6d335
commit
b2cea573c9
|
@ -29,6 +29,16 @@ def SDTIntBinOpVVP : SDTypeProfile<1, 4, [ // vp_add, vp_and, etc.
|
|||
IsVLVT<4>
|
||||
]>;
|
||||
|
||||
// BinaryFPOp(x,y,mask,vl)
|
||||
def SDTFPBinOpVVP : SDTypeProfile<1, 4, [ // vvp_fadd, etc.
|
||||
SDTCisSameAs<0, 1>,
|
||||
SDTCisSameAs<0, 2>,
|
||||
SDTCisFP<0>,
|
||||
SDTCisInt<3>,
|
||||
SDTCisSameNumEltsAs<0, 3>,
|
||||
IsVLVT<4>
|
||||
]>;
|
||||
|
||||
// Binary operator commutative pattern.
|
||||
class vvp_commutative<SDNode RootOp> :
|
||||
PatFrags<
|
||||
|
@ -61,4 +71,11 @@ def vvp_srl : SDNode<"VEISD::VVP_SRL", SDTIntBinOpVVP>;
|
|||
def vvp_sra : SDNode<"VEISD::VVP_SRA", SDTIntBinOpVVP>;
|
||||
def vvp_shl : SDNode<"VEISD::VVP_SHL", SDTIntBinOpVVP>;
|
||||
|
||||
def vvp_fadd : SDNode<"VEISD::VVP_FADD", SDTFPBinOpVVP>;
|
||||
def c_vvp_fadd : vvp_commutative<vvp_fadd>;
|
||||
def vvp_fsub : SDNode<"VEISD::VVP_FSUB", SDTFPBinOpVVP>;
|
||||
def vvp_fmul : SDNode<"VEISD::VVP_FMUL", SDTFPBinOpVVP>;
|
||||
def c_vvp_fmul : vvp_commutative<vvp_fmul>;
|
||||
def vvp_fdiv : SDNode<"VEISD::VVP_FDIV", SDTFPBinOpVVP>;
|
||||
|
||||
// } Binary Operators
|
||||
|
|
|
@ -178,3 +178,16 @@ defm : Binary_vr_vv_ShortLong<vvp_sra,
|
|||
defm : Binary_vr_vv_ShortLong<vvp_srl,
|
||||
i64, v256i64, "VSRL",
|
||||
i32, v256i32, "PVSRLLO">;
|
||||
|
||||
defm : Binary_rv_vv_ShortLong<c_vvp_fadd,
|
||||
f64, v256f64, "VFADDD",
|
||||
f32, v256f32, "PVFADDUP">;
|
||||
defm : Binary_rv_vv_ShortLong<c_vvp_fmul,
|
||||
f64, v256f64, "VFMULD",
|
||||
f32, v256f32, "PVFMULUP">;
|
||||
defm : Binary_rv_vv_ShortLong<vvp_fsub,
|
||||
f64, v256f64, "VFSUBD",
|
||||
f32, v256f32, "PVFSUBUP">;
|
||||
defm : Binary_rv_vr_vv_ShortLong<vvp_fdiv,
|
||||
f64, v256f64, "VFDIVD",
|
||||
f32, v256f32, "VFDIVS">;
|
||||
|
|
|
@ -53,6 +53,12 @@ ADD_BINARY_VVP_OP_COMPACT(AND)
|
|||
ADD_BINARY_VVP_OP_COMPACT(OR)
|
||||
ADD_BINARY_VVP_OP_COMPACT(XOR)
|
||||
|
||||
// FP arithmetic.
|
||||
ADD_BINARY_VVP_OP_COMPACT(FADD)
|
||||
ADD_BINARY_VVP_OP_COMPACT(FSUB)
|
||||
ADD_BINARY_VVP_OP_COMPACT(FMUL)
|
||||
ADD_BINARY_VVP_OP_COMPACT(FDIV)
|
||||
|
||||
#undef ADD_BINARY_VVP_OP
|
||||
#undef ADD_BINARY_VVP_OP_COMPACT
|
||||
#undef ADD_VVP_OP
|
||||
|
|
|
@ -0,0 +1,81 @@
|
|||
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
|
||||
; RUN: llc < %s -march=ve -mattr=+vpu | FileCheck %s
|
||||
|
||||
declare <256 x float> @llvm.vp.fadd.v256f32(<256 x float>, <256 x float>, <256 x i1>, i32)
|
||||
|
||||
define fastcc <256 x float> @test_vp_fadd_v256f32_vv(<256 x float> %i0, <256 x float> %i1, <256 x i1> %m, i32 %n) {
|
||||
; CHECK-LABEL: test_vp_fadd_v256f32_vv:
|
||||
; CHECK: # %bb.0:
|
||||
; CHECK-NEXT: and %s0, %s0, (32)0
|
||||
; CHECK-NEXT: lvl %s0
|
||||
; CHECK-NEXT: pvfadd.up %v0, %v0, %v1, %vm1
|
||||
; CHECK-NEXT: b.l.t (, %s10)
|
||||
%r0 = call <256 x float> @llvm.vp.fadd.v256f32(<256 x float> %i0, <256 x float> %i1, <256 x i1> %m, i32 %n)
|
||||
ret <256 x float> %r0
|
||||
}
|
||||
|
||||
define fastcc <256 x float> @test_vp_fadd_v256f32_rv(float %s0, <256 x float> %i1, <256 x i1> %m, i32 %n) {
|
||||
; CHECK-LABEL: test_vp_fadd_v256f32_rv:
|
||||
; CHECK: # %bb.0:
|
||||
; CHECK-NEXT: and %s1, %s1, (32)0
|
||||
; CHECK-NEXT: lvl %s1
|
||||
; CHECK-NEXT: pvfadd.up %v0, %s0, %v0, %vm1
|
||||
; CHECK-NEXT: b.l.t (, %s10)
|
||||
%xins = insertelement <256 x float> undef, float %s0, i32 0
|
||||
%i0 = shufflevector <256 x float> %xins, <256 x float> undef, <256 x i32> zeroinitializer
|
||||
%r0 = call <256 x float> @llvm.vp.fadd.v256f32(<256 x float> %i0, <256 x float> %i1, <256 x i1> %m, i32 %n)
|
||||
ret <256 x float> %r0
|
||||
}
|
||||
|
||||
define fastcc <256 x float> @test_vp_fadd_v256f32_vr(<256 x float> %i0, float %s1, <256 x i1> %m, i32 %n) {
|
||||
; CHECK-LABEL: test_vp_fadd_v256f32_vr:
|
||||
; CHECK: # %bb.0:
|
||||
; CHECK-NEXT: and %s1, %s1, (32)0
|
||||
; CHECK-NEXT: lvl %s1
|
||||
; CHECK-NEXT: pvfadd.up %v0, %s0, %v0, %vm1
|
||||
; CHECK-NEXT: b.l.t (, %s10)
|
||||
%yins = insertelement <256 x float> undef, float %s1, i32 0
|
||||
%i1 = shufflevector <256 x float> %yins, <256 x float> undef, <256 x i32> zeroinitializer
|
||||
%r0 = call <256 x float> @llvm.vp.fadd.v256f32(<256 x float> %i0, <256 x float> %i1, <256 x i1> %m, i32 %n)
|
||||
ret <256 x float> %r0
|
||||
}
|
||||
|
||||
|
||||
declare <256 x double> @llvm.vp.fadd.v256f64(<256 x double>, <256 x double>, <256 x i1>, i32)
|
||||
|
||||
define fastcc <256 x double> @test_vp_fadd_v256f64_vv(<256 x double> %i0, <256 x double> %i1, <256 x i1> %m, i32 %n) {
|
||||
; CHECK-LABEL: test_vp_fadd_v256f64_vv:
|
||||
; CHECK: # %bb.0:
|
||||
; CHECK-NEXT: and %s0, %s0, (32)0
|
||||
; CHECK-NEXT: lvl %s0
|
||||
; CHECK-NEXT: vfadd.d %v0, %v0, %v1, %vm1
|
||||
; CHECK-NEXT: b.l.t (, %s10)
|
||||
%r0 = call <256 x double> @llvm.vp.fadd.v256f64(<256 x double> %i0, <256 x double> %i1, <256 x i1> %m, i32 %n)
|
||||
ret <256 x double> %r0
|
||||
}
|
||||
|
||||
define fastcc <256 x double> @test_vp_fadd_v256f64_rv(double %s0, <256 x double> %i1, <256 x i1> %m, i32 %n) {
|
||||
; CHECK-LABEL: test_vp_fadd_v256f64_rv:
|
||||
; CHECK: # %bb.0:
|
||||
; CHECK-NEXT: and %s1, %s1, (32)0
|
||||
; CHECK-NEXT: lvl %s1
|
||||
; CHECK-NEXT: vfadd.d %v0, %s0, %v0, %vm1
|
||||
; CHECK-NEXT: b.l.t (, %s10)
|
||||
%xins = insertelement <256 x double> undef, double %s0, i32 0
|
||||
%i0 = shufflevector <256 x double> %xins, <256 x double> undef, <256 x i32> zeroinitializer
|
||||
%r0 = call <256 x double> @llvm.vp.fadd.v256f64(<256 x double> %i0, <256 x double> %i1, <256 x i1> %m, i32 %n)
|
||||
ret <256 x double> %r0
|
||||
}
|
||||
|
||||
define fastcc <256 x double> @test_vp_fadd_v256f64_vr(<256 x double> %i0, double %s1, <256 x i1> %m, i32 %n) {
|
||||
; CHECK-LABEL: test_vp_fadd_v256f64_vr:
|
||||
; CHECK: # %bb.0:
|
||||
; CHECK-NEXT: and %s1, %s1, (32)0
|
||||
; CHECK-NEXT: lvl %s1
|
||||
; CHECK-NEXT: vfadd.d %v0, %s0, %v0, %vm1
|
||||
; CHECK-NEXT: b.l.t (, %s10)
|
||||
%yins = insertelement <256 x double> undef, double %s1, i32 0
|
||||
%i1 = shufflevector <256 x double> %yins, <256 x double> undef, <256 x i32> zeroinitializer
|
||||
%r0 = call <256 x double> @llvm.vp.fadd.v256f64(<256 x double> %i0, <256 x double> %i1, <256 x i1> %m, i32 %n)
|
||||
ret <256 x double> %r0
|
||||
}
|
|
@ -0,0 +1,81 @@
|
|||
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
|
||||
; RUN: llc < %s -march=ve -mattr=+vpu | FileCheck %s
|
||||
|
||||
declare <256 x float> @llvm.vp.fdiv.v256f32(<256 x float>, <256 x float>, <256 x i1>, i32)
|
||||
|
||||
define fastcc <256 x float> @test_vp_fdiv_v256f32_vv(<256 x float> %i0, <256 x float> %i1, <256 x i1> %m, i32 %n) {
|
||||
; CHECK-LABEL: test_vp_fdiv_v256f32_vv:
|
||||
; CHECK: # %bb.0:
|
||||
; CHECK-NEXT: and %s0, %s0, (32)0
|
||||
; CHECK-NEXT: lvl %s0
|
||||
; CHECK-NEXT: vfdiv.s %v0, %v0, %v1, %vm1
|
||||
; CHECK-NEXT: b.l.t (, %s10)
|
||||
%r0 = call <256 x float> @llvm.vp.fdiv.v256f32(<256 x float> %i0, <256 x float> %i1, <256 x i1> %m, i32 %n)
|
||||
ret <256 x float> %r0
|
||||
}
|
||||
|
||||
define fastcc <256 x float> @test_vp_fdiv_v256f32_rv(float %s0, <256 x float> %i1, <256 x i1> %m, i32 %n) {
|
||||
; CHECK-LABEL: test_vp_fdiv_v256f32_rv:
|
||||
; CHECK: # %bb.0:
|
||||
; CHECK-NEXT: and %s1, %s1, (32)0
|
||||
; CHECK-NEXT: lvl %s1
|
||||
; CHECK-NEXT: vfdiv.s %v0, %s0, %v0, %vm1
|
||||
; CHECK-NEXT: b.l.t (, %s10)
|
||||
%xins = insertelement <256 x float> undef, float %s0, i32 0
|
||||
%i0 = shufflevector <256 x float> %xins, <256 x float> undef, <256 x i32> zeroinitializer
|
||||
%r0 = call <256 x float> @llvm.vp.fdiv.v256f32(<256 x float> %i0, <256 x float> %i1, <256 x i1> %m, i32 %n)
|
||||
ret <256 x float> %r0
|
||||
}
|
||||
|
||||
define fastcc <256 x float> @test_vp_fdiv_v256f32_vr(<256 x float> %i0, float %s1, <256 x i1> %m, i32 %n) {
|
||||
; CHECK-LABEL: test_vp_fdiv_v256f32_vr:
|
||||
; CHECK: # %bb.0:
|
||||
; CHECK-NEXT: and %s1, %s1, (32)0
|
||||
; CHECK-NEXT: lvl %s1
|
||||
; CHECK-NEXT: vfdiv.s %v0, %v0, %s0, %vm1
|
||||
; CHECK-NEXT: b.l.t (, %s10)
|
||||
%yins = insertelement <256 x float> undef, float %s1, i32 0
|
||||
%i1 = shufflevector <256 x float> %yins, <256 x float> undef, <256 x i32> zeroinitializer
|
||||
%r0 = call <256 x float> @llvm.vp.fdiv.v256f32(<256 x float> %i0, <256 x float> %i1, <256 x i1> %m, i32 %n)
|
||||
ret <256 x float> %r0
|
||||
}
|
||||
|
||||
|
||||
declare <256 x double> @llvm.vp.fdiv.v256f64(<256 x double>, <256 x double>, <256 x i1>, i32)
|
||||
|
||||
define fastcc <256 x double> @test_vp_fdiv_v256f64_vv(<256 x double> %i0, <256 x double> %i1, <256 x i1> %m, i32 %n) {
|
||||
; CHECK-LABEL: test_vp_fdiv_v256f64_vv:
|
||||
; CHECK: # %bb.0:
|
||||
; CHECK-NEXT: and %s0, %s0, (32)0
|
||||
; CHECK-NEXT: lvl %s0
|
||||
; CHECK-NEXT: vfdiv.d %v0, %v0, %v1, %vm1
|
||||
; CHECK-NEXT: b.l.t (, %s10)
|
||||
%r0 = call <256 x double> @llvm.vp.fdiv.v256f64(<256 x double> %i0, <256 x double> %i1, <256 x i1> %m, i32 %n)
|
||||
ret <256 x double> %r0
|
||||
}
|
||||
|
||||
define fastcc <256 x double> @test_vp_fdiv_v256f64_rv(double %s0, <256 x double> %i1, <256 x i1> %m, i32 %n) {
|
||||
; CHECK-LABEL: test_vp_fdiv_v256f64_rv:
|
||||
; CHECK: # %bb.0:
|
||||
; CHECK-NEXT: and %s1, %s1, (32)0
|
||||
; CHECK-NEXT: lvl %s1
|
||||
; CHECK-NEXT: vfdiv.d %v0, %s0, %v0, %vm1
|
||||
; CHECK-NEXT: b.l.t (, %s10)
|
||||
%xins = insertelement <256 x double> undef, double %s0, i32 0
|
||||
%i0 = shufflevector <256 x double> %xins, <256 x double> undef, <256 x i32> zeroinitializer
|
||||
%r0 = call <256 x double> @llvm.vp.fdiv.v256f64(<256 x double> %i0, <256 x double> %i1, <256 x i1> %m, i32 %n)
|
||||
ret <256 x double> %r0
|
||||
}
|
||||
|
||||
define fastcc <256 x double> @test_vp_fdiv_v256f64_vr(<256 x double> %i0, double %s1, <256 x i1> %m, i32 %n) {
|
||||
; CHECK-LABEL: test_vp_fdiv_v256f64_vr:
|
||||
; CHECK: # %bb.0:
|
||||
; CHECK-NEXT: and %s1, %s1, (32)0
|
||||
; CHECK-NEXT: lvl %s1
|
||||
; CHECK-NEXT: vfdiv.d %v0, %v0, %s0, %vm1
|
||||
; CHECK-NEXT: b.l.t (, %s10)
|
||||
%yins = insertelement <256 x double> undef, double %s1, i32 0
|
||||
%i1 = shufflevector <256 x double> %yins, <256 x double> undef, <256 x i32> zeroinitializer
|
||||
%r0 = call <256 x double> @llvm.vp.fdiv.v256f64(<256 x double> %i0, <256 x double> %i1, <256 x i1> %m, i32 %n)
|
||||
ret <256 x double> %r0
|
||||
}
|
|
@ -0,0 +1,81 @@
|
|||
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
|
||||
; RUN: llc < %s -march=ve -mattr=+vpu | FileCheck %s
|
||||
|
||||
declare <256 x float> @llvm.vp.fmul.v256f32(<256 x float>, <256 x float>, <256 x i1>, i32)
|
||||
|
||||
define fastcc <256 x float> @test_vp_fmul_v256f32_vv(<256 x float> %i0, <256 x float> %i1, <256 x i1> %m, i32 %n) {
|
||||
; CHECK-LABEL: test_vp_fmul_v256f32_vv:
|
||||
; CHECK: # %bb.0:
|
||||
; CHECK-NEXT: and %s0, %s0, (32)0
|
||||
; CHECK-NEXT: lvl %s0
|
||||
; CHECK-NEXT: pvfmul.up %v0, %v0, %v1, %vm1
|
||||
; CHECK-NEXT: b.l.t (, %s10)
|
||||
%r0 = call <256 x float> @llvm.vp.fmul.v256f32(<256 x float> %i0, <256 x float> %i1, <256 x i1> %m, i32 %n)
|
||||
ret <256 x float> %r0
|
||||
}
|
||||
|
||||
define fastcc <256 x float> @test_vp_fmul_v256f32_rv(float %s0, <256 x float> %i1, <256 x i1> %m, i32 %n) {
|
||||
; CHECK-LABEL: test_vp_fmul_v256f32_rv:
|
||||
; CHECK: # %bb.0:
|
||||
; CHECK-NEXT: and %s1, %s1, (32)0
|
||||
; CHECK-NEXT: lvl %s1
|
||||
; CHECK-NEXT: pvfmul.up %v0, %s0, %v0, %vm1
|
||||
; CHECK-NEXT: b.l.t (, %s10)
|
||||
%xins = insertelement <256 x float> undef, float %s0, i32 0
|
||||
%i0 = shufflevector <256 x float> %xins, <256 x float> undef, <256 x i32> zeroinitializer
|
||||
%r0 = call <256 x float> @llvm.vp.fmul.v256f32(<256 x float> %i0, <256 x float> %i1, <256 x i1> %m, i32 %n)
|
||||
ret <256 x float> %r0
|
||||
}
|
||||
|
||||
define fastcc <256 x float> @test_vp_fmul_v256f32_vr(<256 x float> %i0, float %s1, <256 x i1> %m, i32 %n) {
|
||||
; CHECK-LABEL: test_vp_fmul_v256f32_vr:
|
||||
; CHECK: # %bb.0:
|
||||
; CHECK-NEXT: and %s1, %s1, (32)0
|
||||
; CHECK-NEXT: lvl %s1
|
||||
; CHECK-NEXT: pvfmul.up %v0, %s0, %v0, %vm1
|
||||
; CHECK-NEXT: b.l.t (, %s10)
|
||||
%yins = insertelement <256 x float> undef, float %s1, i32 0
|
||||
%i1 = shufflevector <256 x float> %yins, <256 x float> undef, <256 x i32> zeroinitializer
|
||||
%r0 = call <256 x float> @llvm.vp.fmul.v256f32(<256 x float> %i0, <256 x float> %i1, <256 x i1> %m, i32 %n)
|
||||
ret <256 x float> %r0
|
||||
}
|
||||
|
||||
|
||||
declare <256 x double> @llvm.vp.fmul.v256f64(<256 x double>, <256 x double>, <256 x i1>, i32)
|
||||
|
||||
define fastcc <256 x double> @test_vp_fmul_v256f64_vv(<256 x double> %i0, <256 x double> %i1, <256 x i1> %m, i32 %n) {
|
||||
; CHECK-LABEL: test_vp_fmul_v256f64_vv:
|
||||
; CHECK: # %bb.0:
|
||||
; CHECK-NEXT: and %s0, %s0, (32)0
|
||||
; CHECK-NEXT: lvl %s0
|
||||
; CHECK-NEXT: vfmul.d %v0, %v0, %v1, %vm1
|
||||
; CHECK-NEXT: b.l.t (, %s10)
|
||||
%r0 = call <256 x double> @llvm.vp.fmul.v256f64(<256 x double> %i0, <256 x double> %i1, <256 x i1> %m, i32 %n)
|
||||
ret <256 x double> %r0
|
||||
}
|
||||
|
||||
define fastcc <256 x double> @test_vp_fmul_v256f64_rv(double %s0, <256 x double> %i1, <256 x i1> %m, i32 %n) {
|
||||
; CHECK-LABEL: test_vp_fmul_v256f64_rv:
|
||||
; CHECK: # %bb.0:
|
||||
; CHECK-NEXT: and %s1, %s1, (32)0
|
||||
; CHECK-NEXT: lvl %s1
|
||||
; CHECK-NEXT: vfmul.d %v0, %s0, %v0, %vm1
|
||||
; CHECK-NEXT: b.l.t (, %s10)
|
||||
%xins = insertelement <256 x double> undef, double %s0, i32 0
|
||||
%i0 = shufflevector <256 x double> %xins, <256 x double> undef, <256 x i32> zeroinitializer
|
||||
%r0 = call <256 x double> @llvm.vp.fmul.v256f64(<256 x double> %i0, <256 x double> %i1, <256 x i1> %m, i32 %n)
|
||||
ret <256 x double> %r0
|
||||
}
|
||||
|
||||
define fastcc <256 x double> @test_vp_fmul_v256f64_vr(<256 x double> %i0, double %s1, <256 x i1> %m, i32 %n) {
|
||||
; CHECK-LABEL: test_vp_fmul_v256f64_vr:
|
||||
; CHECK: # %bb.0:
|
||||
; CHECK-NEXT: and %s1, %s1, (32)0
|
||||
; CHECK-NEXT: lvl %s1
|
||||
; CHECK-NEXT: vfmul.d %v0, %s0, %v0, %vm1
|
||||
; CHECK-NEXT: b.l.t (, %s10)
|
||||
%yins = insertelement <256 x double> undef, double %s1, i32 0
|
||||
%i1 = shufflevector <256 x double> %yins, <256 x double> undef, <256 x i32> zeroinitializer
|
||||
%r0 = call <256 x double> @llvm.vp.fmul.v256f64(<256 x double> %i0, <256 x double> %i1, <256 x i1> %m, i32 %n)
|
||||
ret <256 x double> %r0
|
||||
}
|
|
@ -0,0 +1,87 @@
|
|||
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
|
||||
; RUN: llc < %s -march=ve -mattr=+vpu | FileCheck %s
|
||||
|
||||
declare <256 x float> @llvm.vp.fsub.v256f32(<256 x float>, <256 x float>, <256 x i1>, i32)
|
||||
|
||||
define fastcc <256 x float> @test_vp_fsub_v256f32_vv(<256 x float> %i0, <256 x float> %i1, <256 x i1> %m, i32 %n) {
|
||||
; CHECK-LABEL: test_vp_fsub_v256f32_vv:
|
||||
; CHECK: # %bb.0:
|
||||
; CHECK-NEXT: and %s0, %s0, (32)0
|
||||
; CHECK-NEXT: lvl %s0
|
||||
; CHECK-NEXT: pvfsub.up %v0, %v0, %v1, %vm1
|
||||
; CHECK-NEXT: b.l.t (, %s10)
|
||||
%r0 = call <256 x float> @llvm.vp.fsub.v256f32(<256 x float> %i0, <256 x float> %i1, <256 x i1> %m, i32 %n)
|
||||
ret <256 x float> %r0
|
||||
}
|
||||
|
||||
define fastcc <256 x float> @test_vp_fsub_v256f32_rv(float %s0, <256 x float> %i1, <256 x i1> %m, i32 %n) {
|
||||
; CHECK-LABEL: test_vp_fsub_v256f32_rv:
|
||||
; CHECK: # %bb.0:
|
||||
; CHECK-NEXT: and %s1, %s1, (32)0
|
||||
; CHECK-NEXT: lvl %s1
|
||||
; CHECK-NEXT: pvfsub.up %v0, %s0, %v0, %vm1
|
||||
; CHECK-NEXT: b.l.t (, %s10)
|
||||
%xins = insertelement <256 x float> undef, float %s0, i32 0
|
||||
%i0 = shufflevector <256 x float> %xins, <256 x float> undef, <256 x i32> zeroinitializer
|
||||
%r0 = call <256 x float> @llvm.vp.fsub.v256f32(<256 x float> %i0, <256 x float> %i1, <256 x i1> %m, i32 %n)
|
||||
ret <256 x float> %r0
|
||||
}
|
||||
|
||||
define fastcc <256 x float> @test_vp_fsub_v256f32_vr(<256 x float> %i0, float %s1, <256 x i1> %m, i32 %n) {
|
||||
; CHECK-LABEL: test_vp_fsub_v256f32_vr:
|
||||
; CHECK: # %bb.0:
|
||||
; CHECK-NEXT: and %s1, %s1, (32)0
|
||||
; CHECK-NEXT: lea %s2, 256
|
||||
; CHECK-NEXT: lvl %s2
|
||||
; CHECK-NEXT: vbrd %v1, %s0
|
||||
; CHECK-NEXT: lvl %s1
|
||||
; CHECK-NEXT: pvfsub.up %v0, %v0, %v1, %vm1
|
||||
; CHECK-NEXT: b.l.t (, %s10)
|
||||
%yins = insertelement <256 x float> undef, float %s1, i32 0
|
||||
%i1 = shufflevector <256 x float> %yins, <256 x float> undef, <256 x i32> zeroinitializer
|
||||
%r0 = call <256 x float> @llvm.vp.fsub.v256f32(<256 x float> %i0, <256 x float> %i1, <256 x i1> %m, i32 %n)
|
||||
ret <256 x float> %r0
|
||||
}
|
||||
|
||||
|
||||
declare <256 x double> @llvm.vp.fsub.v256f64(<256 x double>, <256 x double>, <256 x i1>, i32)
|
||||
|
||||
define fastcc <256 x double> @test_vp_fsub_v256f64_vv(<256 x double> %i0, <256 x double> %i1, <256 x i1> %m, i32 %n) {
|
||||
; CHECK-LABEL: test_vp_fsub_v256f64_vv:
|
||||
; CHECK: # %bb.0:
|
||||
; CHECK-NEXT: and %s0, %s0, (32)0
|
||||
; CHECK-NEXT: lvl %s0
|
||||
; CHECK-NEXT: vfsub.d %v0, %v0, %v1, %vm1
|
||||
; CHECK-NEXT: b.l.t (, %s10)
|
||||
%r0 = call <256 x double> @llvm.vp.fsub.v256f64(<256 x double> %i0, <256 x double> %i1, <256 x i1> %m, i32 %n)
|
||||
ret <256 x double> %r0
|
||||
}
|
||||
|
||||
define fastcc <256 x double> @test_vp_fsub_v256f64_rv(double %s0, <256 x double> %i1, <256 x i1> %m, i32 %n) {
|
||||
; CHECK-LABEL: test_vp_fsub_v256f64_rv:
|
||||
; CHECK: # %bb.0:
|
||||
; CHECK-NEXT: and %s1, %s1, (32)0
|
||||
; CHECK-NEXT: lvl %s1
|
||||
; CHECK-NEXT: vfsub.d %v0, %s0, %v0, %vm1
|
||||
; CHECK-NEXT: b.l.t (, %s10)
|
||||
%xins = insertelement <256 x double> undef, double %s0, i32 0
|
||||
%i0 = shufflevector <256 x double> %xins, <256 x double> undef, <256 x i32> zeroinitializer
|
||||
%r0 = call <256 x double> @llvm.vp.fsub.v256f64(<256 x double> %i0, <256 x double> %i1, <256 x i1> %m, i32 %n)
|
||||
ret <256 x double> %r0
|
||||
}
|
||||
|
||||
define fastcc <256 x double> @test_vp_fsub_v256f64_vr(<256 x double> %i0, double %s1, <256 x i1> %m, i32 %n) {
|
||||
; CHECK-LABEL: test_vp_fsub_v256f64_vr:
|
||||
; CHECK: # %bb.0:
|
||||
; CHECK-NEXT: and %s1, %s1, (32)0
|
||||
; CHECK-NEXT: lea %s2, 256
|
||||
; CHECK-NEXT: lvl %s2
|
||||
; CHECK-NEXT: vbrd %v1, %s0
|
||||
; CHECK-NEXT: lvl %s1
|
||||
; CHECK-NEXT: vfsub.d %v0, %v0, %v1, %vm1
|
||||
; CHECK-NEXT: b.l.t (, %s10)
|
||||
%yins = insertelement <256 x double> undef, double %s1, i32 0
|
||||
%i1 = shufflevector <256 x double> %yins, <256 x double> undef, <256 x i32> zeroinitializer
|
||||
%r0 = call <256 x double> @llvm.vp.fsub.v256f64(<256 x double> %i0, <256 x double> %i1, <256 x i1> %m, i32 %n)
|
||||
ret <256 x double> %r0
|
||||
}
|
Loading…
Reference in New Issue