forked from OSchip/llvm-project
[VE] select|vp.merge|vp.select v256 isel and tests
Use the `VMRG` for all three operations for now. `vp_select` will be used in passthru patterns. Reviewed By: kaz7 Differential Revision: https://reviews.llvm.org/D117206
This commit is contained in:
parent
d97fb55ff3
commit
95bf5ac8a8
|
@ -1720,7 +1720,7 @@ SDValue VETargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG) const {
|
||||||
case ISD::EXTRACT_VECTOR_ELT:
|
case ISD::EXTRACT_VECTOR_ELT:
|
||||||
return lowerEXTRACT_VECTOR_ELT(Op, DAG);
|
return lowerEXTRACT_VECTOR_ELT(Op, DAG);
|
||||||
|
|
||||||
#define ADD_BINARY_VVP_OP(VVP_NAME, VP_NAME, ISD_NAME) case ISD::ISD_NAME:
|
#define ADD_VVP_OP(VVP_NAME, ISD_NAME) case ISD::ISD_NAME:
|
||||||
#include "VVPNodes.def"
|
#include "VVPNodes.def"
|
||||||
return lowerToVVP(Op, DAG);
|
return lowerToVVP(Op, DAG);
|
||||||
}
|
}
|
||||||
|
@ -2729,6 +2729,11 @@ SDValue VETargetLowering::lowerToVVP(SDValue Op, SelectionDAG &DAG) const {
|
||||||
assert(LegalVecVT.isSimple());
|
assert(LegalVecVT.isSimple());
|
||||||
return DAG.getNode(VVPOpcode, DL, LegalVecVT, Op->getOperand(0),
|
return DAG.getNode(VVPOpcode, DL, LegalVecVT, Op->getOperand(0),
|
||||||
Op->getOperand(1), Mask, AVL);
|
Op->getOperand(1), Mask, AVL);
|
||||||
|
} else if (VVPOpcode == VEISD::VVP_SELECT) {
|
||||||
|
auto Mask = Op->getOperand(0);
|
||||||
|
auto OnTrue = Op->getOperand(1);
|
||||||
|
auto OnFalse = Op->getOperand(2);
|
||||||
|
return DAG.getNode(VVPOpcode, DL, LegalVecVT, OnTrue, OnFalse, Mask, AVL);
|
||||||
}
|
}
|
||||||
llvm_unreachable("lowerToVVP called for unexpected SDNode.");
|
llvm_unreachable("lowerToVVP called for unexpected SDNode.");
|
||||||
}
|
}
|
||||||
|
|
|
@ -39,6 +39,15 @@ def SDTFPBinOpVVP : SDTypeProfile<1, 4, [ // vvp_fadd, etc.
|
||||||
IsVLVT<4>
|
IsVLVT<4>
|
||||||
]>;
|
]>;
|
||||||
|
|
||||||
|
// Select(OnTrue, OnFalse, SelMask, vl)
|
||||||
|
def SDTSelectVVP : SDTypeProfile<1, 4, [ // vp_select, vp_merge
|
||||||
|
SDTCisVec<0>,
|
||||||
|
SDTCisSameNumEltsAs<0, 3>,
|
||||||
|
SDTCisSameAs<0, 1>,
|
||||||
|
SDTCisSameAs<1, 2>,
|
||||||
|
IsVLVT<4>
|
||||||
|
]>;
|
||||||
|
|
||||||
// Binary operator commutative pattern.
|
// Binary operator commutative pattern.
|
||||||
class vvp_commutative<SDNode RootOp> :
|
class vvp_commutative<SDNode RootOp> :
|
||||||
PatFrags<
|
PatFrags<
|
||||||
|
@ -79,3 +88,5 @@ def c_vvp_fmul : vvp_commutative<vvp_fmul>;
|
||||||
def vvp_fdiv : SDNode<"VEISD::VVP_FDIV", SDTFPBinOpVVP>;
|
def vvp_fdiv : SDNode<"VEISD::VVP_FDIV", SDTFPBinOpVVP>;
|
||||||
|
|
||||||
// } Binary Operators
|
// } Binary Operators
|
||||||
|
|
||||||
|
def vvp_select : SDNode<"VEISD::VVP_SELECT", SDTSelectVVP>;
|
||||||
|
|
|
@ -191,3 +191,35 @@ defm : Binary_rv_vv_ShortLong<vvp_fsub,
|
||||||
defm : Binary_rv_vr_vv_ShortLong<vvp_fdiv,
|
defm : Binary_rv_vr_vv_ShortLong<vvp_fdiv,
|
||||||
f64, v256f64, "VFDIVD",
|
f64, v256f64, "VFDIVD",
|
||||||
f32, v256f32, "VFDIVS">;
|
f32, v256f32, "VFDIVS">;
|
||||||
|
|
||||||
|
multiclass Merge_mvv<
|
||||||
|
SDPatternOperator OpNode,
|
||||||
|
ValueType DataVT, ValueType MaskVT,
|
||||||
|
string OpBaseName> {
|
||||||
|
// Masked.
|
||||||
|
def : Pat<(OpNode
|
||||||
|
DataVT:$vtrue, DataVT:$vfalse,
|
||||||
|
MaskVT:$vm,
|
||||||
|
i32:$avl),
|
||||||
|
(!cast<Instruction>(OpBaseName#"vvml_v")
|
||||||
|
$vfalse, $vtrue, $vm, $avl, $vfalse)>;
|
||||||
|
}
|
||||||
|
|
||||||
|
multiclass Merge_mvv_ShortLong<
|
||||||
|
SDPatternOperator OpNode,
|
||||||
|
ValueType LongDataVT, ValueType ShortDataVT,
|
||||||
|
string OpBaseName> {
|
||||||
|
defm : Merge_mvv<OpNode,
|
||||||
|
LongDataVT, v256i1,
|
||||||
|
OpBaseName>;
|
||||||
|
defm : Merge_mvv<OpNode,
|
||||||
|
ShortDataVT, v256i1,
|
||||||
|
OpBaseName>;
|
||||||
|
}
|
||||||
|
|
||||||
|
defm : Merge_mvv_ShortLong<vvp_select,
|
||||||
|
v256f64,
|
||||||
|
v256f32, "VMRG">;
|
||||||
|
defm : Merge_mvv_ShortLong<vvp_select,
|
||||||
|
v256i64,
|
||||||
|
v256i32, "VMRG">;
|
||||||
|
|
|
@ -59,6 +59,11 @@ ADD_BINARY_VVP_OP_COMPACT(FSUB)
|
||||||
ADD_BINARY_VVP_OP_COMPACT(FMUL)
|
ADD_BINARY_VVP_OP_COMPACT(FMUL)
|
||||||
ADD_BINARY_VVP_OP_COMPACT(FDIV)
|
ADD_BINARY_VVP_OP_COMPACT(FDIV)
|
||||||
|
|
||||||
|
// Shuffles.
|
||||||
|
ADD_VVP_OP(VVP_SELECT,VSELECT)
|
||||||
|
HANDLE_VP_TO_VVP(VP_SELECT, VVP_SELECT)
|
||||||
|
HANDLE_VP_TO_VVP(VP_MERGE, VVP_SELECT)
|
||||||
|
|
||||||
#undef ADD_BINARY_VVP_OP
|
#undef ADD_BINARY_VVP_OP
|
||||||
#undef ADD_BINARY_VVP_OP_COMPACT
|
#undef ADD_BINARY_VVP_OP_COMPACT
|
||||||
#undef ADD_VVP_OP
|
#undef ADD_VVP_OP
|
||||||
|
|
|
@ -0,0 +1,135 @@
|
||||||
|
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
|
||||||
|
; RUN: llc < %s -march=ve -mattr=+vpu | FileCheck %s
|
||||||
|
|
||||||
|
declare <256 x i32> @llvm.vec.select.v256i32(<256 x i1>, <256 x i32>, <256 x i32>, i32)
|
||||||
|
|
||||||
|
define fastcc <256 x i32> @test_vec_select_v256i32_vv(<256 x i32> %i0, <256 x i32> %i1, <256 x i1> %m) {
|
||||||
|
; CHECK-LABEL: test_vec_select_v256i32_vv:
|
||||||
|
; CHECK: # %bb.0:
|
||||||
|
; CHECK-NEXT: lea %s0, 256
|
||||||
|
; CHECK-NEXT: lvl %s0
|
||||||
|
; CHECK-NEXT: vmrg %v1, %v1, %v0, %vm1
|
||||||
|
; CHECK-NEXT: lea %s16, 256
|
||||||
|
; CHECK-NEXT: lvl %s16
|
||||||
|
; CHECK-NEXT: vor %v0, (0)1, %v1
|
||||||
|
; CHECK-NEXT: b.l.t (, %s10)
|
||||||
|
%r0 = select <256 x i1> %m, <256 x i32> %i0, <256 x i32> %i1
|
||||||
|
ret <256 x i32> %r0
|
||||||
|
}
|
||||||
|
|
||||||
|
define fastcc <256 x i32> @test_vec_select_v256i32_vr(<256 x i32> %i0, i32 %s1, <256 x i1> %m) {
|
||||||
|
; CHECK-LABEL: test_vec_select_v256i32_vr:
|
||||||
|
; CHECK: # %bb.0:
|
||||||
|
; CHECK-NEXT: and %s0, %s0, (32)0
|
||||||
|
; CHECK-NEXT: lea %s1, 256
|
||||||
|
; CHECK-NEXT: lvl %s1
|
||||||
|
; CHECK-NEXT: vbrd %v1, %s0
|
||||||
|
; CHECK-NEXT: vmrg %v1, %v1, %v0, %vm1
|
||||||
|
; CHECK-NEXT: lea %s16, 256
|
||||||
|
; CHECK-NEXT: lvl %s16
|
||||||
|
; CHECK-NEXT: vor %v0, (0)1, %v1
|
||||||
|
; CHECK-NEXT: b.l.t (, %s10)
|
||||||
|
%xins = insertelement <256 x i32> undef, i32 %s1, i32 0
|
||||||
|
%i1 = shufflevector <256 x i32> %xins, <256 x i32> undef, <256 x i32> zeroinitializer
|
||||||
|
%r0 = select <256 x i1> %m, <256 x i32> %i0, <256 x i32> %i1
|
||||||
|
ret <256 x i32> %r0
|
||||||
|
}
|
||||||
|
|
||||||
|
declare <256 x float> @llvm.vec.select.v256f32(<256 x i1>, <256 x float>, <256 x float>, i32)
|
||||||
|
|
||||||
|
define fastcc <256 x float> @test_vec_select_v256f32_vv(<256 x float> %i0, <256 x float> %i1, <256 x i1> %m) {
|
||||||
|
; CHECK-LABEL: test_vec_select_v256f32_vv:
|
||||||
|
; CHECK: # %bb.0:
|
||||||
|
; CHECK-NEXT: lea %s0, 256
|
||||||
|
; CHECK-NEXT: lvl %s0
|
||||||
|
; CHECK-NEXT: vmrg %v1, %v1, %v0, %vm1
|
||||||
|
; CHECK-NEXT: lea %s16, 256
|
||||||
|
; CHECK-NEXT: lvl %s16
|
||||||
|
; CHECK-NEXT: vor %v0, (0)1, %v1
|
||||||
|
; CHECK-NEXT: b.l.t (, %s10)
|
||||||
|
%r0 = select <256 x i1> %m, <256 x float> %i0, <256 x float> %i1
|
||||||
|
ret <256 x float> %r0
|
||||||
|
}
|
||||||
|
|
||||||
|
define fastcc <256 x float> @test_vec_select_v256f32_vr(<256 x float> %i0, float %s1, <256 x i1> %m) {
|
||||||
|
; CHECK-LABEL: test_vec_select_v256f32_vr:
|
||||||
|
; CHECK: # %bb.0:
|
||||||
|
; CHECK-NEXT: lea %s1, 256
|
||||||
|
; CHECK-NEXT: lvl %s1
|
||||||
|
; CHECK-NEXT: vbrd %v1, %s0
|
||||||
|
; CHECK-NEXT: vmrg %v1, %v1, %v0, %vm1
|
||||||
|
; CHECK-NEXT: lea %s16, 256
|
||||||
|
; CHECK-NEXT: lvl %s16
|
||||||
|
; CHECK-NEXT: vor %v0, (0)1, %v1
|
||||||
|
; CHECK-NEXT: b.l.t (, %s10)
|
||||||
|
%xins = insertelement <256 x float> undef, float %s1, i32 0
|
||||||
|
%i1 = shufflevector <256 x float> %xins, <256 x float> undef, <256 x i32> zeroinitializer
|
||||||
|
%r0 = select <256 x i1> %m, <256 x float> %i0, <256 x float> %i1
|
||||||
|
ret <256 x float> %r0
|
||||||
|
}
|
||||||
|
|
||||||
|
declare <256 x double> @llvm.vec.select.v256f64(<256 x i1>, <256 x double>, <256 x double>, i32)
|
||||||
|
|
||||||
|
define fastcc <256 x double> @test_vec_select_v256f64_vv(<256 x double> %i0, <256 x double> %i1, <256 x i1> %m) {
|
||||||
|
; CHECK-LABEL: test_vec_select_v256f64_vv:
|
||||||
|
; CHECK: # %bb.0:
|
||||||
|
; CHECK-NEXT: lea %s0, 256
|
||||||
|
; CHECK-NEXT: lvl %s0
|
||||||
|
; CHECK-NEXT: vmrg %v1, %v1, %v0, %vm1
|
||||||
|
; CHECK-NEXT: lea %s16, 256
|
||||||
|
; CHECK-NEXT: lvl %s16
|
||||||
|
; CHECK-NEXT: vor %v0, (0)1, %v1
|
||||||
|
; CHECK-NEXT: b.l.t (, %s10)
|
||||||
|
%r0 = select <256 x i1> %m, <256 x double> %i0, <256 x double> %i1
|
||||||
|
ret <256 x double> %r0
|
||||||
|
}
|
||||||
|
|
||||||
|
define fastcc <256 x double> @test_vec_select_v256f64_vr(<256 x double> %i0, double %s1, <256 x i1> %m) {
|
||||||
|
; CHECK-LABEL: test_vec_select_v256f64_vr:
|
||||||
|
; CHECK: # %bb.0:
|
||||||
|
; CHECK-NEXT: lea %s1, 256
|
||||||
|
; CHECK-NEXT: lvl %s1
|
||||||
|
; CHECK-NEXT: vbrd %v1, %s0
|
||||||
|
; CHECK-NEXT: vmrg %v1, %v1, %v0, %vm1
|
||||||
|
; CHECK-NEXT: lea %s16, 256
|
||||||
|
; CHECK-NEXT: lvl %s16
|
||||||
|
; CHECK-NEXT: vor %v0, (0)1, %v1
|
||||||
|
; CHECK-NEXT: b.l.t (, %s10)
|
||||||
|
%xins = insertelement <256 x double> undef, double %s1, i32 0
|
||||||
|
%i1 = shufflevector <256 x double> %xins, <256 x double> undef, <256 x i32> zeroinitializer
|
||||||
|
%r0 = select <256 x i1> %m, <256 x double> %i0, <256 x double> %i1
|
||||||
|
ret <256 x double> %r0
|
||||||
|
}
|
||||||
|
|
||||||
|
declare <256 x i64> @llvm.vec.select.v256i64(<256 x i1>, <256 x i64>, <256 x i64>, i32)
|
||||||
|
|
||||||
|
define fastcc <256 x i64> @test_vec_select_v256i64_vv(<256 x i64> %i0, <256 x i64> %i1, <256 x i1> %m) {
|
||||||
|
; CHECK-LABEL: test_vec_select_v256i64_vv:
|
||||||
|
; CHECK: # %bb.0:
|
||||||
|
; CHECK-NEXT: lea %s0, 256
|
||||||
|
; CHECK-NEXT: lvl %s0
|
||||||
|
; CHECK-NEXT: vmrg %v1, %v1, %v0, %vm1
|
||||||
|
; CHECK-NEXT: lea %s16, 256
|
||||||
|
; CHECK-NEXT: lvl %s16
|
||||||
|
; CHECK-NEXT: vor %v0, (0)1, %v1
|
||||||
|
; CHECK-NEXT: b.l.t (, %s10)
|
||||||
|
%r0 = select <256 x i1> %m, <256 x i64> %i0, <256 x i64> %i1
|
||||||
|
ret <256 x i64> %r0
|
||||||
|
}
|
||||||
|
|
||||||
|
define fastcc <256 x i64> @test_vec_select_v256i64_vr(<256 x i64> %i0, i64 %s1, <256 x i1> %m) {
|
||||||
|
; CHECK-LABEL: test_vec_select_v256i64_vr:
|
||||||
|
; CHECK: # %bb.0:
|
||||||
|
; CHECK-NEXT: lea %s1, 256
|
||||||
|
; CHECK-NEXT: lvl %s1
|
||||||
|
; CHECK-NEXT: vbrd %v1, %s0
|
||||||
|
; CHECK-NEXT: vmrg %v1, %v1, %v0, %vm1
|
||||||
|
; CHECK-NEXT: lea %s16, 256
|
||||||
|
; CHECK-NEXT: lvl %s16
|
||||||
|
; CHECK-NEXT: vor %v0, (0)1, %v1
|
||||||
|
; CHECK-NEXT: b.l.t (, %s10)
|
||||||
|
%xins = insertelement <256 x i64> undef, i64 %s1, i32 0
|
||||||
|
%i1 = shufflevector <256 x i64> %xins, <256 x i64> undef, <256 x i32> zeroinitializer
|
||||||
|
%r0 = select <256 x i1> %m, <256 x i64> %i0, <256 x i64> %i1
|
||||||
|
ret <256 x i64> %r0
|
||||||
|
}
|
|
@ -0,0 +1,143 @@
|
||||||
|
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
|
||||||
|
; RUN: llc < %s -march=ve -mattr=+vpu | FileCheck %s
|
||||||
|
|
||||||
|
declare <256 x i32> @llvm.vp.merge.v256i32(<256 x i1>, <256 x i32>, <256 x i32>, i32)
|
||||||
|
|
||||||
|
define fastcc <256 x i32> @test_vp_merge_v256i32_vv(<256 x i32> %i0, <256 x i32> %i1, <256 x i1> %m, i32 %pivot) {
|
||||||
|
; CHECK-LABEL: test_vp_merge_v256i32_vv:
|
||||||
|
; CHECK: # %bb.0:
|
||||||
|
; CHECK-NEXT: and %s0, %s0, (32)0
|
||||||
|
; CHECK-NEXT: lvl %s0
|
||||||
|
; CHECK-NEXT: vmrg %v1, %v1, %v0, %vm1
|
||||||
|
; CHECK-NEXT: lea %s16, 256
|
||||||
|
; CHECK-NEXT: lvl %s16
|
||||||
|
; CHECK-NEXT: vor %v0, (0)1, %v1
|
||||||
|
; CHECK-NEXT: b.l.t (, %s10)
|
||||||
|
%r0 = call <256 x i32> @llvm.vp.merge.v256i32(<256 x i1> %m, <256 x i32> %i0, <256 x i32> %i1, i32 %pivot)
|
||||||
|
ret <256 x i32> %r0
|
||||||
|
}
|
||||||
|
|
||||||
|
define fastcc <256 x i32> @test_vp_merge_v256i32_vr(<256 x i32> %i0, i32 %s1, <256 x i1> %m, i32 %pivot) {
|
||||||
|
; CHECK-LABEL: test_vp_merge_v256i32_vr:
|
||||||
|
; CHECK: # %bb.0:
|
||||||
|
; CHECK-NEXT: and %s1, %s1, (32)0
|
||||||
|
; CHECK-NEXT: and %s0, %s0, (32)0
|
||||||
|
; CHECK-NEXT: lea %s2, 256
|
||||||
|
; CHECK-NEXT: lvl %s2
|
||||||
|
; CHECK-NEXT: vbrd %v1, %s0
|
||||||
|
; CHECK-NEXT: lvl %s1
|
||||||
|
; CHECK-NEXT: vmrg %v1, %v1, %v0, %vm1
|
||||||
|
; CHECK-NEXT: lea %s16, 256
|
||||||
|
; CHECK-NEXT: lvl %s16
|
||||||
|
; CHECK-NEXT: vor %v0, (0)1, %v1
|
||||||
|
; CHECK-NEXT: b.l.t (, %s10)
|
||||||
|
%xins = insertelement <256 x i32> undef, i32 %s1, i32 0
|
||||||
|
%i1 = shufflevector <256 x i32> %xins, <256 x i32> undef, <256 x i32> zeroinitializer
|
||||||
|
%r0 = call <256 x i32> @llvm.vp.merge.v256i32(<256 x i1> %m, <256 x i32> %i0, <256 x i32> %i1, i32 %pivot)
|
||||||
|
ret <256 x i32> %r0
|
||||||
|
}
|
||||||
|
|
||||||
|
declare <256 x float> @llvm.vp.merge.v256f32(<256 x i1>, <256 x float>, <256 x float>, i32)
|
||||||
|
|
||||||
|
define fastcc <256 x float> @test_vp_merge_v256f32_vv(<256 x float> %i0, <256 x float> %i1, <256 x i1> %m, i32 %pivot) {
|
||||||
|
; CHECK-LABEL: test_vp_merge_v256f32_vv:
|
||||||
|
; CHECK: # %bb.0:
|
||||||
|
; CHECK-NEXT: and %s0, %s0, (32)0
|
||||||
|
; CHECK-NEXT: lvl %s0
|
||||||
|
; CHECK-NEXT: vmrg %v1, %v1, %v0, %vm1
|
||||||
|
; CHECK-NEXT: lea %s16, 256
|
||||||
|
; CHECK-NEXT: lvl %s16
|
||||||
|
; CHECK-NEXT: vor %v0, (0)1, %v1
|
||||||
|
; CHECK-NEXT: b.l.t (, %s10)
|
||||||
|
%r0 = call <256 x float> @llvm.vp.merge.v256f32(<256 x i1> %m, <256 x float> %i0, <256 x float> %i1, i32 %pivot)
|
||||||
|
ret <256 x float> %r0
|
||||||
|
}
|
||||||
|
|
||||||
|
define fastcc <256 x float> @test_vp_merge_v256f32_vr(<256 x float> %i0, float %s1, <256 x i1> %m, i32 %pivot) {
|
||||||
|
; CHECK-LABEL: test_vp_merge_v256f32_vr:
|
||||||
|
; CHECK: # %bb.0:
|
||||||
|
; CHECK-NEXT: and %s1, %s1, (32)0
|
||||||
|
; CHECK-NEXT: lea %s2, 256
|
||||||
|
; CHECK-NEXT: lvl %s2
|
||||||
|
; CHECK-NEXT: vbrd %v1, %s0
|
||||||
|
; CHECK-NEXT: lvl %s1
|
||||||
|
; CHECK-NEXT: vmrg %v1, %v1, %v0, %vm1
|
||||||
|
; CHECK-NEXT: lea %s16, 256
|
||||||
|
; CHECK-NEXT: lvl %s16
|
||||||
|
; CHECK-NEXT: vor %v0, (0)1, %v1
|
||||||
|
; CHECK-NEXT: b.l.t (, %s10)
|
||||||
|
%xins = insertelement <256 x float> undef, float %s1, i32 0
|
||||||
|
%i1 = shufflevector <256 x float> %xins, <256 x float> undef, <256 x i32> zeroinitializer
|
||||||
|
%r0 = call <256 x float> @llvm.vp.merge.v256f32(<256 x i1> %m, <256 x float> %i0, <256 x float> %i1, i32 %pivot)
|
||||||
|
ret <256 x float> %r0
|
||||||
|
}
|
||||||
|
|
||||||
|
declare <256 x double> @llvm.vp.merge.v256f64(<256 x i1>, <256 x double>, <256 x double>, i32)
|
||||||
|
|
||||||
|
define fastcc <256 x double> @test_vp_merge_v256f64_vv(<256 x double> %i0, <256 x double> %i1, <256 x i1> %m, i32 %pivot) {
|
||||||
|
; CHECK-LABEL: test_vp_merge_v256f64_vv:
|
||||||
|
; CHECK: # %bb.0:
|
||||||
|
; CHECK-NEXT: and %s0, %s0, (32)0
|
||||||
|
; CHECK-NEXT: lvl %s0
|
||||||
|
; CHECK-NEXT: vmrg %v1, %v1, %v0, %vm1
|
||||||
|
; CHECK-NEXT: lea %s16, 256
|
||||||
|
; CHECK-NEXT: lvl %s16
|
||||||
|
; CHECK-NEXT: vor %v0, (0)1, %v1
|
||||||
|
; CHECK-NEXT: b.l.t (, %s10)
|
||||||
|
%r0 = call <256 x double> @llvm.vp.merge.v256f64(<256 x i1> %m, <256 x double> %i0, <256 x double> %i1, i32 %pivot)
|
||||||
|
ret <256 x double> %r0
|
||||||
|
}
|
||||||
|
|
||||||
|
define fastcc <256 x double> @test_vp_merge_v256f64_vr(<256 x double> %i0, double %s1, <256 x i1> %m, i32 %pivot) {
|
||||||
|
; CHECK-LABEL: test_vp_merge_v256f64_vr:
|
||||||
|
; CHECK: # %bb.0:
|
||||||
|
; CHECK-NEXT: and %s1, %s1, (32)0
|
||||||
|
; CHECK-NEXT: lea %s2, 256
|
||||||
|
; CHECK-NEXT: lvl %s2
|
||||||
|
; CHECK-NEXT: vbrd %v1, %s0
|
||||||
|
; CHECK-NEXT: lvl %s1
|
||||||
|
; CHECK-NEXT: vmrg %v1, %v1, %v0, %vm1
|
||||||
|
; CHECK-NEXT: lea %s16, 256
|
||||||
|
; CHECK-NEXT: lvl %s16
|
||||||
|
; CHECK-NEXT: vor %v0, (0)1, %v1
|
||||||
|
; CHECK-NEXT: b.l.t (, %s10)
|
||||||
|
%xins = insertelement <256 x double> undef, double %s1, i32 0
|
||||||
|
%i1 = shufflevector <256 x double> %xins, <256 x double> undef, <256 x i32> zeroinitializer
|
||||||
|
%r0 = call <256 x double> @llvm.vp.merge.v256f64(<256 x i1> %m, <256 x double> %i0, <256 x double> %i1, i32 %pivot)
|
||||||
|
ret <256 x double> %r0
|
||||||
|
}
|
||||||
|
|
||||||
|
declare <256 x i64> @llvm.vp.merge.v256i64(<256 x i1>, <256 x i64>, <256 x i64>, i32)
|
||||||
|
|
||||||
|
define fastcc <256 x i64> @test_vp_merge_v256i64_vv(<256 x i64> %i0, <256 x i64> %i1, <256 x i1> %m, i32 %pivot) {
|
||||||
|
; CHECK-LABEL: test_vp_merge_v256i64_vv:
|
||||||
|
; CHECK: # %bb.0:
|
||||||
|
; CHECK-NEXT: and %s0, %s0, (32)0
|
||||||
|
; CHECK-NEXT: lvl %s0
|
||||||
|
; CHECK-NEXT: vmrg %v1, %v1, %v0, %vm1
|
||||||
|
; CHECK-NEXT: lea %s16, 256
|
||||||
|
; CHECK-NEXT: lvl %s16
|
||||||
|
; CHECK-NEXT: vor %v0, (0)1, %v1
|
||||||
|
; CHECK-NEXT: b.l.t (, %s10)
|
||||||
|
%r0 = call <256 x i64> @llvm.vp.merge.v256i64(<256 x i1> %m, <256 x i64> %i0, <256 x i64> %i1, i32 %pivot)
|
||||||
|
ret <256 x i64> %r0
|
||||||
|
}
|
||||||
|
|
||||||
|
define fastcc <256 x i64> @test_vp_merge_v256i64_vr(<256 x i64> %i0, i64 %s1, <256 x i1> %m, i32 %pivot) {
|
||||||
|
; CHECK-LABEL: test_vp_merge_v256i64_vr:
|
||||||
|
; CHECK: # %bb.0:
|
||||||
|
; CHECK-NEXT: and %s1, %s1, (32)0
|
||||||
|
; CHECK-NEXT: lea %s2, 256
|
||||||
|
; CHECK-NEXT: lvl %s2
|
||||||
|
; CHECK-NEXT: vbrd %v1, %s0
|
||||||
|
; CHECK-NEXT: lvl %s1
|
||||||
|
; CHECK-NEXT: vmrg %v1, %v1, %v0, %vm1
|
||||||
|
; CHECK-NEXT: lea %s16, 256
|
||||||
|
; CHECK-NEXT: lvl %s16
|
||||||
|
; CHECK-NEXT: vor %v0, (0)1, %v1
|
||||||
|
; CHECK-NEXT: b.l.t (, %s10)
|
||||||
|
%xins = insertelement <256 x i64> undef, i64 %s1, i32 0
|
||||||
|
%i1 = shufflevector <256 x i64> %xins, <256 x i64> undef, <256 x i32> zeroinitializer
|
||||||
|
%r0 = call <256 x i64> @llvm.vp.merge.v256i64(<256 x i1> %m, <256 x i64> %i0, <256 x i64> %i1, i32 %pivot)
|
||||||
|
ret <256 x i64> %r0
|
||||||
|
}
|
|
@ -0,0 +1,143 @@
|
||||||
|
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
|
||||||
|
; RUN: llc < %s -march=ve -mattr=+vpu | FileCheck %s
|
||||||
|
|
||||||
|
declare <256 x i32> @llvm.vp.select.v256i32(<256 x i1>, <256 x i32>, <256 x i32>, i32)
|
||||||
|
|
||||||
|
define fastcc <256 x i32> @test_vp_select_v256i32_vv(<256 x i32> %i0, <256 x i32> %i1, <256 x i1> %m, i32 %pivot) {
|
||||||
|
; CHECK-LABEL: test_vp_select_v256i32_vv:
|
||||||
|
; CHECK: # %bb.0:
|
||||||
|
; CHECK-NEXT: and %s0, %s0, (32)0
|
||||||
|
; CHECK-NEXT: lvl %s0
|
||||||
|
; CHECK-NEXT: vmrg %v1, %v1, %v0, %vm1
|
||||||
|
; CHECK-NEXT: lea %s16, 256
|
||||||
|
; CHECK-NEXT: lvl %s16
|
||||||
|
; CHECK-NEXT: vor %v0, (0)1, %v1
|
||||||
|
; CHECK-NEXT: b.l.t (, %s10)
|
||||||
|
%r0 = call <256 x i32> @llvm.vp.select.v256i32(<256 x i1> %m, <256 x i32> %i0, <256 x i32> %i1, i32 %pivot)
|
||||||
|
ret <256 x i32> %r0
|
||||||
|
}
|
||||||
|
|
||||||
|
define fastcc <256 x i32> @test_vp_select_v256i32_vr(<256 x i32> %i0, i32 %s1, <256 x i1> %m, i32 %pivot) {
|
||||||
|
; CHECK-LABEL: test_vp_select_v256i32_vr:
|
||||||
|
; CHECK: # %bb.0:
|
||||||
|
; CHECK-NEXT: and %s1, %s1, (32)0
|
||||||
|
; CHECK-NEXT: and %s0, %s0, (32)0
|
||||||
|
; CHECK-NEXT: lea %s2, 256
|
||||||
|
; CHECK-NEXT: lvl %s2
|
||||||
|
; CHECK-NEXT: vbrd %v1, %s0
|
||||||
|
; CHECK-NEXT: lvl %s1
|
||||||
|
; CHECK-NEXT: vmrg %v1, %v1, %v0, %vm1
|
||||||
|
; CHECK-NEXT: lea %s16, 256
|
||||||
|
; CHECK-NEXT: lvl %s16
|
||||||
|
; CHECK-NEXT: vor %v0, (0)1, %v1
|
||||||
|
; CHECK-NEXT: b.l.t (, %s10)
|
||||||
|
%xins = insertelement <256 x i32> undef, i32 %s1, i32 0
|
||||||
|
%i1 = shufflevector <256 x i32> %xins, <256 x i32> undef, <256 x i32> zeroinitializer
|
||||||
|
%r0 = call <256 x i32> @llvm.vp.select.v256i32(<256 x i1> %m, <256 x i32> %i0, <256 x i32> %i1, i32 %pivot)
|
||||||
|
ret <256 x i32> %r0
|
||||||
|
}
|
||||||
|
|
||||||
|
declare <256 x float> @llvm.vp.select.v256f32(<256 x i1>, <256 x float>, <256 x float>, i32)
|
||||||
|
|
||||||
|
define fastcc <256 x float> @test_vp_select_v256f32_vv(<256 x float> %i0, <256 x float> %i1, <256 x i1> %m, i32 %pivot) {
|
||||||
|
; CHECK-LABEL: test_vp_select_v256f32_vv:
|
||||||
|
; CHECK: # %bb.0:
|
||||||
|
; CHECK-NEXT: and %s0, %s0, (32)0
|
||||||
|
; CHECK-NEXT: lvl %s0
|
||||||
|
; CHECK-NEXT: vmrg %v1, %v1, %v0, %vm1
|
||||||
|
; CHECK-NEXT: lea %s16, 256
|
||||||
|
; CHECK-NEXT: lvl %s16
|
||||||
|
; CHECK-NEXT: vor %v0, (0)1, %v1
|
||||||
|
; CHECK-NEXT: b.l.t (, %s10)
|
||||||
|
%r0 = call <256 x float> @llvm.vp.select.v256f32(<256 x i1> %m, <256 x float> %i0, <256 x float> %i1, i32 %pivot)
|
||||||
|
ret <256 x float> %r0
|
||||||
|
}
|
||||||
|
|
||||||
|
define fastcc <256 x float> @test_vp_select_v256f32_vr(<256 x float> %i0, float %s1, <256 x i1> %m, i32 %pivot) {
|
||||||
|
; CHECK-LABEL: test_vp_select_v256f32_vr:
|
||||||
|
; CHECK: # %bb.0:
|
||||||
|
; CHECK-NEXT: and %s1, %s1, (32)0
|
||||||
|
; CHECK-NEXT: lea %s2, 256
|
||||||
|
; CHECK-NEXT: lvl %s2
|
||||||
|
; CHECK-NEXT: vbrd %v1, %s0
|
||||||
|
; CHECK-NEXT: lvl %s1
|
||||||
|
; CHECK-NEXT: vmrg %v1, %v1, %v0, %vm1
|
||||||
|
; CHECK-NEXT: lea %s16, 256
|
||||||
|
; CHECK-NEXT: lvl %s16
|
||||||
|
; CHECK-NEXT: vor %v0, (0)1, %v1
|
||||||
|
; CHECK-NEXT: b.l.t (, %s10)
|
||||||
|
%xins = insertelement <256 x float> undef, float %s1, i32 0
|
||||||
|
%i1 = shufflevector <256 x float> %xins, <256 x float> undef, <256 x i32> zeroinitializer
|
||||||
|
%r0 = call <256 x float> @llvm.vp.select.v256f32(<256 x i1> %m, <256 x float> %i0, <256 x float> %i1, i32 %pivot)
|
||||||
|
ret <256 x float> %r0
|
||||||
|
}
|
||||||
|
|
||||||
|
declare <256 x double> @llvm.vp.select.v256f64(<256 x i1>, <256 x double>, <256 x double>, i32)
|
||||||
|
|
||||||
|
define fastcc <256 x double> @test_vp_select_v256f64_vv(<256 x double> %i0, <256 x double> %i1, <256 x i1> %m, i32 %pivot) {
|
||||||
|
; CHECK-LABEL: test_vp_select_v256f64_vv:
|
||||||
|
; CHECK: # %bb.0:
|
||||||
|
; CHECK-NEXT: and %s0, %s0, (32)0
|
||||||
|
; CHECK-NEXT: lvl %s0
|
||||||
|
; CHECK-NEXT: vmrg %v1, %v1, %v0, %vm1
|
||||||
|
; CHECK-NEXT: lea %s16, 256
|
||||||
|
; CHECK-NEXT: lvl %s16
|
||||||
|
; CHECK-NEXT: vor %v0, (0)1, %v1
|
||||||
|
; CHECK-NEXT: b.l.t (, %s10)
|
||||||
|
%r0 = call <256 x double> @llvm.vp.select.v256f64(<256 x i1> %m, <256 x double> %i0, <256 x double> %i1, i32 %pivot)
|
||||||
|
ret <256 x double> %r0
|
||||||
|
}
|
||||||
|
|
||||||
|
define fastcc <256 x double> @test_vp_select_v256f64_vr(<256 x double> %i0, double %s1, <256 x i1> %m, i32 %pivot) {
|
||||||
|
; CHECK-LABEL: test_vp_select_v256f64_vr:
|
||||||
|
; CHECK: # %bb.0:
|
||||||
|
; CHECK-NEXT: and %s1, %s1, (32)0
|
||||||
|
; CHECK-NEXT: lea %s2, 256
|
||||||
|
; CHECK-NEXT: lvl %s2
|
||||||
|
; CHECK-NEXT: vbrd %v1, %s0
|
||||||
|
; CHECK-NEXT: lvl %s1
|
||||||
|
; CHECK-NEXT: vmrg %v1, %v1, %v0, %vm1
|
||||||
|
; CHECK-NEXT: lea %s16, 256
|
||||||
|
; CHECK-NEXT: lvl %s16
|
||||||
|
; CHECK-NEXT: vor %v0, (0)1, %v1
|
||||||
|
; CHECK-NEXT: b.l.t (, %s10)
|
||||||
|
%xins = insertelement <256 x double> undef, double %s1, i32 0
|
||||||
|
%i1 = shufflevector <256 x double> %xins, <256 x double> undef, <256 x i32> zeroinitializer
|
||||||
|
%r0 = call <256 x double> @llvm.vp.select.v256f64(<256 x i1> %m, <256 x double> %i0, <256 x double> %i1, i32 %pivot)
|
||||||
|
ret <256 x double> %r0
|
||||||
|
}
|
||||||
|
|
||||||
|
declare <256 x i64> @llvm.vp.select.v256i64(<256 x i1>, <256 x i64>, <256 x i64>, i32)
|
||||||
|
|
||||||
|
define fastcc <256 x i64> @test_vp_select_v256i64_vv(<256 x i64> %i0, <256 x i64> %i1, <256 x i1> %m, i32 %pivot) {
|
||||||
|
; CHECK-LABEL: test_vp_select_v256i64_vv:
|
||||||
|
; CHECK: # %bb.0:
|
||||||
|
; CHECK-NEXT: and %s0, %s0, (32)0
|
||||||
|
; CHECK-NEXT: lvl %s0
|
||||||
|
; CHECK-NEXT: vmrg %v1, %v1, %v0, %vm1
|
||||||
|
; CHECK-NEXT: lea %s16, 256
|
||||||
|
; CHECK-NEXT: lvl %s16
|
||||||
|
; CHECK-NEXT: vor %v0, (0)1, %v1
|
||||||
|
; CHECK-NEXT: b.l.t (, %s10)
|
||||||
|
%r0 = call <256 x i64> @llvm.vp.select.v256i64(<256 x i1> %m, <256 x i64> %i0, <256 x i64> %i1, i32 %pivot)
|
||||||
|
ret <256 x i64> %r0
|
||||||
|
}
|
||||||
|
|
||||||
|
define fastcc <256 x i64> @test_vp_select_v256i64_vr(<256 x i64> %i0, i64 %s1, <256 x i1> %m, i32 %pivot) {
|
||||||
|
; CHECK-LABEL: test_vp_select_v256i64_vr:
|
||||||
|
; CHECK: # %bb.0:
|
||||||
|
; CHECK-NEXT: and %s1, %s1, (32)0
|
||||||
|
; CHECK-NEXT: lea %s2, 256
|
||||||
|
; CHECK-NEXT: lvl %s2
|
||||||
|
; CHECK-NEXT: vbrd %v1, %s0
|
||||||
|
; CHECK-NEXT: lvl %s1
|
||||||
|
; CHECK-NEXT: vmrg %v1, %v1, %v0, %vm1
|
||||||
|
; CHECK-NEXT: lea %s16, 256
|
||||||
|
; CHECK-NEXT: lvl %s16
|
||||||
|
; CHECK-NEXT: vor %v0, (0)1, %v1
|
||||||
|
; CHECK-NEXT: b.l.t (, %s10)
|
||||||
|
%xins = insertelement <256 x i64> undef, i64 %s1, i32 0
|
||||||
|
%i1 = shufflevector <256 x i64> %xins, <256 x i64> undef, <256 x i32> zeroinitializer
|
||||||
|
%r0 = call <256 x i64> @llvm.vp.select.v256i64(<256 x i1> %m, <256 x i64> %i0, <256 x i64> %i1, i32 %pivot)
|
||||||
|
ret <256 x i64> %r0
|
||||||
|
}
|
Loading…
Reference in New Issue