forked from OSchip/llvm-project
[VP] Declaration and docs for vp.select intrinsic
llvm.vp.select extends the regular select instruction with an explicit vector length (%evl). All lanes with indexes at and above %evl are undefined. Lanes below %evl are taken from the first input where the mask is true and from the second input otherwise. Reviewed By: rogfer01 Differential Revision: https://reviews.llvm.org/D105351
This commit is contained in:
parent
e387c8c413
commit
ea2cdbf5e6
|
@ -17745,6 +17745,64 @@ The use of an effective %evl is discouraged for those targets. The function
|
||||||
``TargetTransformInfo::hasActiveVectorLength()`` returns true when the target
|
``TargetTransformInfo::hasActiveVectorLength()`` returns true when the target
|
||||||
has native support for %evl.
|
has native support for %evl.
|
||||||
|
|
||||||
|
.. _int_vp_select:
|
||||||
|
|
||||||
|
'``llvm.vp.select.*``' Intrinsics
|
||||||
|
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
|
||||||
|
|
||||||
|
Syntax:
|
||||||
|
"""""""
|
||||||
|
This is an overloaded intrinsic.
|
||||||
|
|
||||||
|
::
|
||||||
|
|
||||||
|
declare <16 x i32> @llvm.vp.select.v16i32 (<16 x i1> <condition>, <16 x i32> <on_true>, <16 x i32> <on_false>, i32 <evl>)
|
||||||
|
declare <vscale x 4 x i64> @llvm.vp.select.nxv4i64 (<vscale x 4 x i1> <condition>, <vscale x 4 x i32> <on_true>, <vscale x 4 x i32> <on_false>, i32 <evl>)
|
||||||
|
|
||||||
|
Overview:
|
||||||
|
"""""""""
|
||||||
|
|
||||||
|
The '``llvm.vp.select``' intrinsic is used to choose one value based on a
|
||||||
|
condition vector, without IR-level branching.
|
||||||
|
|
||||||
|
Arguments:
|
||||||
|
""""""""""
|
||||||
|
|
||||||
|
The first operand is a vector of ``i1`` and indicates the condition. The
|
||||||
|
second operand is the value that is selected where the condition vector is
|
||||||
|
true. The third operand is the value that is selected where the condition
|
||||||
|
vector is false. The vectors must be of the same size. The fourth operand is
|
||||||
|
the explicit vector length.
|
||||||
|
|
||||||
|
#. The optional ``fast-math flags`` marker indicates that the select has one or
|
||||||
|
more :ref:`fast-math flags <fastmath>`. These are optimization hints to
|
||||||
|
enable otherwise unsafe floating-point optimizations. Fast-math flags are
|
||||||
|
only valid for selects that return a floating-point scalar or vector type,
|
||||||
|
or an array (nested to any depth) of floating-point scalar or vector types.
|
||||||
|
|
||||||
|
Semantics:
|
||||||
|
""""""""""
|
||||||
|
|
||||||
|
The intrinsic selects lanes from the second and third operand depending on a
|
||||||
|
condition vector.
|
||||||
|
|
||||||
|
All result lanes at positions greater or equal than ``%evl`` are undefined.
|
||||||
|
For all lanes below ``%evl`` where the condition vector is true the lane is
|
||||||
|
taken from the second operand. Otherwise, the lane is taken from the third
|
||||||
|
operand.
|
||||||
|
|
||||||
|
Example:
|
||||||
|
""""""""
|
||||||
|
|
||||||
|
.. code-block:: llvm
|
||||||
|
|
||||||
|
%r = call <4 x i32> @llvm.vp.select.v4i32(<4 x i1> %cond, <4 x i32> %on_true, <4 x i32> %on_false, i32 %evl)
|
||||||
|
|
||||||
|
;;; Expansion.
|
||||||
|
;; Any result is legal on lanes at and above %evl.
|
||||||
|
%also.r = select <4 x i1> %cond, <4 x i32> %on_true, <4 x i32> %on_false
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
.. _int_vp_add:
|
.. _int_vp_add:
|
||||||
|
|
||||||
|
|
|
@ -1507,6 +1507,12 @@ let IntrProperties =
|
||||||
LLVMScalarOrSameVectorWidth<0, llvm_i1_ty>,
|
LLVMScalarOrSameVectorWidth<0, llvm_i1_ty>,
|
||||||
llvm_i32_ty]>;
|
llvm_i32_ty]>;
|
||||||
}
|
}
|
||||||
|
// Shuffles.
|
||||||
|
def int_vp_select : DefaultAttrsIntrinsic<[ llvm_anyvector_ty ],
|
||||||
|
[ LLVMScalarOrSameVectorWidth<0, llvm_i1_ty>,
|
||||||
|
LLVMMatchType<0>,
|
||||||
|
LLVMMatchType<0>,
|
||||||
|
llvm_i32_ty]>;
|
||||||
|
|
||||||
// Reductions
|
// Reductions
|
||||||
let IntrProperties = [IntrSpeculatable, IntrNoMem, IntrNoSync, IntrWillReturn] in {
|
let IntrProperties = [IntrSpeculatable, IntrNoMem, IntrNoSync, IntrWillReturn] in {
|
||||||
|
|
|
@ -333,6 +333,16 @@ HELPER_REGISTER_REDUCTION_SEQ_VP(vp_reduce_fmul, VP_REDUCE_FMUL,
|
||||||
|
|
||||||
///// } Reduction
|
///// } Reduction
|
||||||
|
|
||||||
|
///// Shuffles {
|
||||||
|
|
||||||
|
// llvm.vp.select(mask,on_true,on_false,vlen)
|
||||||
|
BEGIN_REGISTER_VP_INTRINSIC(vp_select, 0, 3)
|
||||||
|
// BEGIN_REGISTER_VP_SDNODE(VP_SELECT, -1, vp_select, 0, 4)
|
||||||
|
// END_REGISTER_CASES(vp_select, VP_SELECT)
|
||||||
|
END_REGISTER_VP_INTRINSIC(vp_select)
|
||||||
|
|
||||||
|
///// } Shuffles
|
||||||
|
|
||||||
#undef BEGIN_REGISTER_VP
|
#undef BEGIN_REGISTER_VP
|
||||||
#undef BEGIN_REGISTER_VP_INTRINSIC
|
#undef BEGIN_REGISTER_VP_INTRINSIC
|
||||||
#undef BEGIN_REGISTER_VP_SDNODE
|
#undef BEGIN_REGISTER_VP_SDNODE
|
||||||
|
|
|
@ -482,6 +482,9 @@ Function *VPIntrinsic::getDeclarationForParams(Module *M, Intrinsic::ID VPID,
|
||||||
VPFunc = Intrinsic::getDeclaration(M, VPID, OverloadTy);
|
VPFunc = Intrinsic::getDeclaration(M, VPID, OverloadTy);
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
|
case Intrinsic::vp_select:
|
||||||
|
VPFunc = Intrinsic::getDeclaration(M, VPID, {Params[1]->getType()});
|
||||||
|
break;
|
||||||
case Intrinsic::vp_load:
|
case Intrinsic::vp_load:
|
||||||
VPFunc = Intrinsic::getDeclaration(
|
VPFunc = Intrinsic::getDeclaration(
|
||||||
M, VPID,
|
M, VPID,
|
||||||
|
|
|
@ -68,6 +68,8 @@ protected:
|
||||||
Str << " declare float @llvm.vp.reduce." << ReductionOpcode
|
Str << " declare float @llvm.vp.reduce." << ReductionOpcode
|
||||||
<< ".v8f32(float, <8 x float>, <8 x i1>, i32) ";
|
<< ".v8f32(float, <8 x float>, <8 x i1>, i32) ";
|
||||||
|
|
||||||
|
Str << " declare <8 x i32> @llvm.vp.select.v8i32(<8 x i1>, <8 x i32>, <8 x "
|
||||||
|
"i32>, i32)";
|
||||||
return parseAssemblyString(Str.str(), Err, C);
|
return parseAssemblyString(Str.str(), Err, C);
|
||||||
}
|
}
|
||||||
};
|
};
|
||||||
|
|
Loading…
Reference in New Issue