forked from OSchip/llvm-project
[VP] Add more cast VPintrinsic and docs.
Add vp.fptoui, vp.uitofp, vp.fptrunc, vp.fpext, vp.trunc, vp.zext, vp.sext, vp.ptrtoint, vp.inttoptr intrinsic and docs. Reviewed By: frasercrmck, craig.topper Differential Revision: https://reviews.llvm.org/D122291
This commit is contained in:
parent
4c2b57ae48
commit
a7c0b7504c
|
@ -20176,6 +20176,332 @@ Examples:
|
|||
|
||||
call void @llvm.masked.scatter.v8i8.v8p0i8(<8 x i8> %val, <8 x i8*> %ptrs, i32 1, <8 x i1> %mask)
|
||||
|
||||
|
||||
.. _int_vp_trunc:
|
||||
|
||||
'``llvm.vp.trunc.*``' Intrinsics
|
||||
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
|
||||
|
||||
Syntax:
|
||||
"""""""
|
||||
This is an overloaded intrinsic.
|
||||
|
||||
::
|
||||
|
||||
declare <16 x i16> @llvm.vp.trunc.v16i16.v16i32 (<16 x i32> <op>, <16 x i1> <mask>, i32 <vector_length>)
|
||||
declare <vscale x 4 x i16> @llvm.vp.trunc.nxv4i16.nxv4i32 (<vscale x 4 x i32> <op>, <vscale x 4 x i1> <mask>, i32 <vector_length>)
|
||||
|
||||
Overview:
|
||||
"""""""""
|
||||
|
||||
The '``llvm.vp.trunc``' intrinsic truncates its first operand to the return
|
||||
type. The operation has a mask and an explicit vector length parameter.
|
||||
|
||||
|
||||
Arguments:
|
||||
""""""""""
|
||||
|
||||
The '``llvm.vp.trunc``' intrinsic takes a value to cast as its first operand.
|
||||
The return type is the type to cast the value to. Both types must be vector of
|
||||
:ref:`integer <t_integer>` type. The bit size of the value must be larger than
|
||||
the bit size of the return type. The second operand is the vector mask. The
|
||||
return type, the value to cast, and the vector mask have the same number of
|
||||
elements. The third operand is the explicit vector length of the operation.
|
||||
|
||||
Semantics:
|
||||
""""""""""
|
||||
|
||||
The '``llvm.vp.trunc``' intrinsic truncates the high order bits in value and
|
||||
converts the remaining bits to return type. Since the source size must be larger
|
||||
than the destination size, '``llvm.vp.trunc``' cannot be a *no-op cast*. It will
|
||||
always truncate bits. The conversion is performed on lane positions below the
|
||||
explicit vector length and where the vector mask is true. Masked-off lanes are
|
||||
undefined.
|
||||
|
||||
Examples:
|
||||
"""""""""
|
||||
|
||||
.. code-block:: llvm
|
||||
|
||||
%r = call <4 x i16> @llvm.vp.trunc.v4i16.v4i32(<4 x i32> %a, <4 x i1> %mask, i32 %evl)
|
||||
;; For all lanes below %evl, %r is lane-wise equivalent to %also.r
|
||||
|
||||
%t = trunc <4 x i32> %a to <4 x i16>
|
||||
%also.r = select <4 x i1> %mask, <4 x i16> %t, <4 x i16> undef
|
||||
|
||||
|
||||
.. _int_vp_zext:
|
||||
|
||||
'``llvm.vp.zext.*``' Intrinsics
|
||||
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
|
||||
|
||||
Syntax:
|
||||
"""""""
|
||||
This is an overloaded intrinsic.
|
||||
|
||||
::
|
||||
|
||||
declare <16 x i32> @llvm.vp.zext.v16i32.v16i16 (<16 x i16> <op>, <16 x i1> <mask>, i32 <vector_length>)
|
||||
declare <vscale x 4 x i32> @llvm.vp.zext.nxv4i32.nxv4i16 (<vscale x 4 x i16> <op>, <vscale x 4 x i1> <mask>, i32 <vector_length>)
|
||||
|
||||
Overview:
|
||||
"""""""""
|
||||
|
||||
The '``llvm.vp.zext``' intrinsic zero extends its first operand to the return
|
||||
type. The operation has a mask and an explicit vector length parameter.
|
||||
|
||||
|
||||
Arguments:
|
||||
""""""""""
|
||||
|
||||
The '``llvm.vp.zext``' intrinsic takes a value to cast as its first operand.
|
||||
The return type is the type to cast the value to. Both types must be vectors of
|
||||
:ref:`integer <t_integer>` type. The bit size of the value must be smaller than
|
||||
the bit size of the return type. The second operand is the vector mask. The
|
||||
return type, the value to cast, and the vector mask have the same number of
|
||||
elements. The third operand is the explicit vector length of the operation.
|
||||
|
||||
Semantics:
|
||||
""""""""""
|
||||
|
||||
The '``llvm.vp.zext``' intrinsic fill the high order bits of the value with zero
|
||||
bits until it reaches the size of the return type. When zero extending from i1,
|
||||
the result will always be either 0 or 1. The conversion is performed on lane
|
||||
positions below the explicit vector length and where the vector mask is true.
|
||||
Masked-off lanes are undefined.
|
||||
|
||||
Examples:
|
||||
"""""""""
|
||||
|
||||
.. code-block:: llvm
|
||||
|
||||
%r = call <4 x i32> @llvm.vp.zext.v4i32.v4i16(<4 x i16> %a, <4 x i1> %mask, i32 %evl)
|
||||
;; For all lanes below %evl, %r is lane-wise equivalent to %also.r
|
||||
|
||||
%t = zext <4 x i16> %a to <4 x i32>
|
||||
%also.r = select <4 x i1> %mask, <4 x i32> %t, <4 x i32> undef
|
||||
|
||||
|
||||
.. _int_vp_sext:
|
||||
|
||||
'``llvm.vp.sext.*``' Intrinsics
|
||||
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
|
||||
|
||||
Syntax:
|
||||
"""""""
|
||||
This is an overloaded intrinsic.
|
||||
|
||||
::
|
||||
|
||||
declare <16 x i32> @llvm.vp.sext.v16i32.v16i16 (<16 x i16> <op>, <16 x i1> <mask>, i32 <vector_length>)
|
||||
declare <vscale x 4 x i32> @llvm.vp.sext.nxv4i32.nxv4i16 (<vscale x 4 x i16> <op>, <vscale x 4 x i1> <mask>, i32 <vector_length>)
|
||||
|
||||
Overview:
|
||||
"""""""""
|
||||
|
||||
The '``llvm.vp.sext``' intrinsic sign extends its first operand to the return
|
||||
type. The operation has a mask and an explicit vector length parameter.
|
||||
|
||||
|
||||
Arguments:
|
||||
""""""""""
|
||||
|
||||
The '``llvm.vp.sext``' intrinsic takes a value to cast as its first operand.
|
||||
The return type is the type to cast the value to. Both types must be vectors of
|
||||
:ref:`integer <t_integer>` type. The bit size of the value must be smaller than
|
||||
the bit size of the return type. The second operand is the vector mask. The
|
||||
return type, the value to cast, and the vector mask have the same number of
|
||||
elements. The third operand is the explicit vector length of the operation.
|
||||
|
||||
Semantics:
|
||||
""""""""""
|
||||
|
||||
The '``llvm.vp.sext``' intrinsic performs a sign extension by copying the sign
|
||||
bit (highest order bit) of the value until it reaches the size of the return
|
||||
type. When zero extending from i1, the result will always be either -1 or 0.
|
||||
The conversion is performed on lane positions below the explicit vector length
|
||||
and where the vector mask is true. Masked-off lanes are undefined.
|
||||
|
||||
Examples:
|
||||
"""""""""
|
||||
|
||||
.. code-block:: llvm
|
||||
|
||||
%r = call <4 x i32> @llvm.vp.sext.v4i32.v4i16(<4 x i16> %a, <4 x i1> %mask, i32 %evl)
|
||||
;; For all lanes below %evl, %r is lane-wise equivalent to %also.r
|
||||
|
||||
%t = sext <4 x i16> %a to <4 x i32>
|
||||
%also.r = select <4 x i1> %mask, <4 x i32> %t, <4 x i32> undef
|
||||
|
||||
|
||||
.. _int_vp_fptrunc:
|
||||
|
||||
'``llvm.vp.fptrunc.*``' Intrinsics
|
||||
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
|
||||
|
||||
Syntax:
|
||||
"""""""
|
||||
This is an overloaded intrinsic.
|
||||
|
||||
::
|
||||
|
||||
declare <16 x float> @llvm.vp.fptrunc.v16f32.v16f64 (<16 x double> <op>, <16 x i1> <mask>, i32 <vector_length>)
|
||||
declare <vscale x 4 x float> @llvm.vp.trunc.nxv4f32.nxv4f64 (<vscale x 4 x double> <op>, <vscale x 4 x i1> <mask>, i32 <vector_length>)
|
||||
|
||||
Overview:
|
||||
"""""""""
|
||||
|
||||
The '``llvm.vp.fptrunc``' intrinsic truncates its first operand to the return
|
||||
type. The operation has a mask and an explicit vector length parameter.
|
||||
|
||||
|
||||
Arguments:
|
||||
""""""""""
|
||||
|
||||
The '``llvm.vp.fptrunc``' intrinsic takes a value to cast as its first operand.
|
||||
The return type is the type to cast the value to. Both types must be vector of
|
||||
:ref:`floating-point <t_floating>` type. The bit size of the value must be
|
||||
larger than the bit size of the return type. This implies that
|
||||
'``llvm.vp.fptrunc``' cannot be used to make a *no-op cast*. The second operand
|
||||
is the vector mask. The return type, the value to cast, and the vector mask have
|
||||
the same number of elements. The third operand is the explicit vector length of
|
||||
the operation.
|
||||
|
||||
Semantics:
|
||||
""""""""""
|
||||
|
||||
The '``llvm.vp.fptrunc``' intrinsic casts a ``value`` from a larger
|
||||
:ref:`floating-point <t_floating>` type to a smaller :ref:`floating-point
|
||||
<t_floating>` type.
|
||||
This instruction is assumed to execute in the default :ref:`floating-point
|
||||
environment <floatenv>`. The conversion is performed on lane positions below the
|
||||
explicit vector length and where the vector mask is true. Masked-off lanes are
|
||||
undefined.
|
||||
|
||||
Examples:
|
||||
"""""""""
|
||||
|
||||
.. code-block:: llvm
|
||||
|
||||
%r = call <4 x float> @llvm.vp.fptrunc.v4f32.v4f64(<4 x double> %a, <4 x i1> %mask, i32 %evl)
|
||||
;; For all lanes below %evl, %r is lane-wise equivalent to %also.r
|
||||
|
||||
%t = fptrunc <4 x double> %a to <4 x float>
|
||||
%also.r = select <4 x i1> %mask, <4 x float> %t, <4 x float> undef
|
||||
|
||||
|
||||
.. _int_vp_fpext:
|
||||
|
||||
'``llvm.vp.fpext.*``' Intrinsics
|
||||
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
|
||||
|
||||
Syntax:
|
||||
"""""""
|
||||
This is an overloaded intrinsic.
|
||||
|
||||
::
|
||||
|
||||
declare <16 x double> @llvm.vp.fpext.v16f64.v16f32 (<16 x float> <op>, <16 x i1> <mask>, i32 <vector_length>)
|
||||
declare <vscale x 4 x double> @llvm.vp.fpext.nxv4f64.nxv4f32 (<vscale x 4 x float> <op>, <vscale x 4 x i1> <mask>, i32 <vector_length>)
|
||||
|
||||
Overview:
|
||||
"""""""""
|
||||
|
||||
The '``llvm.vp.fpext``' intrinsic extends its first operand to the return
|
||||
type. The operation has a mask and an explicit vector length parameter.
|
||||
|
||||
|
||||
Arguments:
|
||||
""""""""""
|
||||
|
||||
The '``llvm.vp.fpext``' intrinsic takes a value to cast as its first operand.
|
||||
The return type is the type to cast the value to. Both types must be vector of
|
||||
:ref:`floating-point <t_floating>` type. The bit size of the value must be
|
||||
smaller than the bit size of the return type. This implies that
|
||||
'``llvm.vp.fpext``' cannot be used to make a *no-op cast*. The second operand
|
||||
is the vector mask. The return type, the value to cast, and the vector mask have
|
||||
the same number of elements. The third operand is the explicit vector length of
|
||||
the operation.
|
||||
|
||||
Semantics:
|
||||
""""""""""
|
||||
|
||||
The '``llvm.vp.fpext``' intrinsic extends the ``value`` from a smaller
|
||||
:ref:`floating-point <t_floating>` type to a larger :ref:`floating-point
|
||||
<t_floating>` type. The '``llvm.vp.fpext``' cannot be used to make a
|
||||
*no-op cast* because it always changes bits. Use ``bitcast`` to make a
|
||||
*no-op cast* for a floating-point cast.
|
||||
The conversion is performed on lane positions below the explicit vector length
|
||||
and where the vector mask is true. Masked-off lanes are undefined.
|
||||
|
||||
Examples:
|
||||
"""""""""
|
||||
|
||||
.. code-block:: llvm
|
||||
|
||||
%r = call <4 x double> @llvm.vp.fpext.v4f64.v4f32(<4 x float> %a, <4 x i1> %mask, i32 %evl)
|
||||
;; For all lanes below %evl, %r is lane-wise equivalent to %also.r
|
||||
|
||||
%t = fpext <4 x float> %a to <4 x double>
|
||||
%also.r = select <4 x i1> %mask, <4 x double> %t, <4 x double> undef
|
||||
|
||||
|
||||
.. _int_vp_fptoui:
|
||||
|
||||
'``llvm.vp.fptoui.*``' Intrinsics
|
||||
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
|
||||
|
||||
Syntax:
|
||||
"""""""
|
||||
This is an overloaded intrinsic.
|
||||
|
||||
::
|
||||
|
||||
declare <16 x i32> @llvm.vp.fptoui.v16i32.v16f32 (<16 x float> <op>, <16 x i1> <mask>, i32 <vector_length>)
|
||||
declare <vscale x 4 x i32> @llvm.vp.fptoui.nxv4i32.nxv4f32 (<vscale x 4 x float> <op>, <vscale x 4 x i1> <mask>, i32 <vector_length>)
|
||||
declare <256 x i64> @llvm.vp.fptoui.v256i64.v256f64 (<256 x double> <op>, <256 x i1> <mask>, i32 <vector_length>)
|
||||
|
||||
Overview:
|
||||
"""""""""
|
||||
|
||||
The '``llvm.vp.fptoui``' intrinsic converts the :ref:`floating-point
|
||||
<t_floating>` operand to the unsigned integer return type.
|
||||
The operation has a mask and an explicit vector length parameter.
|
||||
|
||||
|
||||
Arguments:
|
||||
""""""""""
|
||||
|
||||
The '``llvm.vp.fptoui``' intrinsic takes a value to cast as its first operand.
|
||||
The value to cast must be a vector of :ref:`floating-point <t_floating>` type.
|
||||
The return type is the type to cast the value to. The return type must be
|
||||
vector of :ref:`integer <t_integer>` type. The second operand is the vector
|
||||
mask. The return type, the value to cast, and the vector mask have the same
|
||||
number of elements. The third operand is the explicit vector length of the
|
||||
operation.
|
||||
|
||||
Semantics:
|
||||
""""""""""
|
||||
|
||||
The '``llvm.vp.fptoui``' intrinsic converts its :ref:`floating-point
|
||||
<t_floating>` operand into the nearest (rounding towards zero) unsigned integer
|
||||
value where the lane position is below the explicit vector length and the
|
||||
vector mask is true. Masked-off lanes are undefined. On enabled lanes where
|
||||
conversion takes place and the value cannot fit in the return type, the result
|
||||
on that lane is a :ref:`poison value <poisonvalues>`.
|
||||
|
||||
Examples:
|
||||
"""""""""
|
||||
|
||||
.. code-block:: llvm
|
||||
|
||||
%r = call <4 x i32> @llvm.vp.fptoui.v4i32.v4f32(<4 x float> %a, <4 x i1> %mask, i32 %evl)
|
||||
;; For all lanes below %evl, %r is lane-wise equivalent to %also.r
|
||||
|
||||
%t = fptoui <4 x float> %a to <4 x i32>
|
||||
%also.r = select <4 x i1> %mask, <4 x i32> %t, <4 x i32> undef
|
||||
|
||||
|
||||
.. _int_vp_fptosi:
|
||||
|
||||
'``llvm.vp.fptosi.*``' Intrinsics
|
||||
|
@ -20231,6 +20557,63 @@ Examples:
|
|||
%t = fptosi <4 x float> %a to <4 x i32>
|
||||
%also.r = select <4 x i1> %mask, <4 x i32> %t, <4 x i32> undef
|
||||
|
||||
|
||||
.. _int_vp_uitofp:
|
||||
|
||||
'``llvm.vp.uitofp.*``' Intrinsics
|
||||
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
|
||||
|
||||
Syntax:
|
||||
"""""""
|
||||
This is an overloaded intrinsic.
|
||||
|
||||
::
|
||||
|
||||
declare <16 x float> @llvm.vp.uitofp.v16f32.v16i32 (<16 x i32> <op>, <16 x i1> <mask>, i32 <vector_length>)
|
||||
declare <vscale x 4 x float> @llvm.vp.uitofp.nxv4f32.nxv4i32 (<vscale x 4 x i32> <op>, <vscale x 4 x i1> <mask>, i32 <vector_length>)
|
||||
declare <256 x double> @llvm.vp.uitofp.v256f64.v256i64 (<256 x i64> <op>, <256 x i1> <mask>, i32 <vector_length>)
|
||||
|
||||
Overview:
|
||||
"""""""""
|
||||
|
||||
The '``llvm.vp.uitofp``' intrinsic converts its unsigned integer operand to the
|
||||
:ref:`floating-point <t_floating>` return type. The operation has a mask and
|
||||
an explicit vector length parameter.
|
||||
|
||||
|
||||
Arguments:
|
||||
""""""""""
|
||||
|
||||
The '``llvm.vp.uitofp``' intrinsic takes a value to cast as its first operand.
|
||||
The value to cast must be vector of :ref:`integer <t_integer>` type. The
|
||||
return type is the type to cast the value to. The return type must be a vector
|
||||
of :ref:`floating-point <t_floating>` type. The second operand is the vector
|
||||
mask. The return type, the value to cast, and the vector mask have the same
|
||||
number of elements. The third operand is the explicit vector length of the
|
||||
operation.
|
||||
|
||||
Semantics:
|
||||
""""""""""
|
||||
|
||||
The '``llvm.vp.uitofp``' intrinsic interprets its first operand as an unsigned
|
||||
integer quantity and converts it to the corresponding floating-point value. If
|
||||
the value cannot be exactly represented, it is rounded using the default
|
||||
rounding mode. The conversion is performed on lane positions below the
|
||||
explicit vector length and where the vector mask is true. Masked-off lanes are
|
||||
undefined.
|
||||
|
||||
Examples:
|
||||
"""""""""
|
||||
|
||||
.. code-block:: llvm
|
||||
|
||||
%r = call <4 x float> @llvm.vp.uitofp.v4f32.v4i32(<4 x i32> %a, <4 x i1> %mask, i32 %evl)
|
||||
;; For all lanes below %evl, %r is lane-wise equivalent to %also.r
|
||||
|
||||
%t = uitofp <4 x i32> %a to <4 x float>
|
||||
%also.r = select <4 x i1> %mask, <4 x float> %t, <4 x float> undef
|
||||
|
||||
|
||||
.. _int_vp_sitofp:
|
||||
|
||||
'``llvm.vp.sitofp.*``' Intrinsics
|
||||
|
@ -20287,6 +20670,118 @@ Examples:
|
|||
%also.r = select <4 x i1> %mask, <4 x float> %t, <4 x float> undef
|
||||
|
||||
|
||||
.. _int_vp_ptrtoint:
|
||||
|
||||
'``llvm.vp.ptrtoint.*``' Intrinsics
|
||||
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
|
||||
|
||||
Syntax:
|
||||
"""""""
|
||||
This is an overloaded intrinsic.
|
||||
|
||||
::
|
||||
|
||||
declare <16 x i8> @llvm.vp.ptrtoint.v16i8.v16p0i32 (<16 x i32*> <op>, <16 x i1> <mask>, i32 <vector_length>)
|
||||
declare <vscale x 4 x i8> @llvm.vp.ptrtoint.nxv4i8.nxv4p0i32 (<vscale x 4 x i32*> <op>, <vscale x 4 x i1> <mask>, i32 <vector_length>)
|
||||
declare <256 x i64> @llvm.vp.ptrtoint.v16i64.v16p0i32 (<256 x i32*> <op>, <256 x i1> <mask>, i32 <vector_length>)
|
||||
|
||||
Overview:
|
||||
"""""""""
|
||||
|
||||
The '``llvm.vp.ptrtoint``' intrinsic converts its pointer to the integer return
|
||||
type. The operation has a mask and an explicit vector length parameter.
|
||||
|
||||
|
||||
Arguments:
|
||||
""""""""""
|
||||
|
||||
The '``llvm.vp.ptrtoint``' intrinsic takes a value to cast as its first operand
|
||||
, which must be a vector of pointers, and a type to cast it to return type,
|
||||
which must be a vector of :ref:`integer <t_integer>` type.
|
||||
The second operand is the vector mask. The return type, the value to cast, and
|
||||
the vector mask have the same number of elements.
|
||||
The third operand is the explicit vector length of the operation.
|
||||
|
||||
Semantics:
|
||||
""""""""""
|
||||
|
||||
The '``llvm.vp.ptrtoint``' intrinsic converts value to return type by
|
||||
interpreting the pointer value as an integer and either truncating or zero
|
||||
extending that value to the size of the integer type.
|
||||
If ``value`` is smaller than return type, then a zero extension is done. If
|
||||
``value`` is larger than return type, then a truncation is done. If they are
|
||||
the same size, then nothing is done (*no-op cast*) other than a type
|
||||
change.
|
||||
The conversion is performed on lane positions below the explicit vector length
|
||||
and where the vector mask is true. Masked-off lanes are undefined.
|
||||
|
||||
Examples:
|
||||
"""""""""
|
||||
|
||||
.. code-block:: llvm
|
||||
|
||||
%r = call <4 x i8> @llvm.vp.ptrtoint.v4i8.v4p0i32(<4 x i32*> %a, <4 x i1> %mask, i32 %evl)
|
||||
;; For all lanes below %evl, %r is lane-wise equivalent to %also.r
|
||||
|
||||
%t = ptrtoint <4 x i32*> %a to <4 x i8>
|
||||
%also.r = select <4 x i1> %mask, <4 x i8> %t, <4 x i8> undef
|
||||
|
||||
|
||||
.. _int_vp_inttoptr:
|
||||
|
||||
'``llvm.vp.inttoptr.*``' Intrinsics
|
||||
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
|
||||
|
||||
Syntax:
|
||||
"""""""
|
||||
This is an overloaded intrinsic.
|
||||
|
||||
::
|
||||
|
||||
declare <16 x i32*> @llvm.vp.inttoptr.v16p0i32.v16i32 (<16 x i32> <op>, <16 x i1> <mask>, i32 <vector_length>)
|
||||
declare <vscale x 4 x i32*> @llvm.vp.inttoptr.nxv4p0i32.nxv4i32 (<vscale x 4 x i32> <op>, <vscale x 4 x i1> <mask>, i32 <vector_length>)
|
||||
declare <256 x i32*> @llvm.vp.inttoptr.v256p0i32.v256i32 (<256 x i32> <op>, <256 x i1> <mask>, i32 <vector_length>)
|
||||
|
||||
Overview:
|
||||
"""""""""
|
||||
|
||||
The '``llvm.vp.inttoptr``' intrinsic converts its integer value to the point
|
||||
return type. The operation has a mask and an explicit vector length parameter.
|
||||
|
||||
|
||||
Arguments:
|
||||
""""""""""
|
||||
|
||||
The '``llvm.vp.inttoptr``' intrinsic takes a value to cast as its first operand
|
||||
, which must be a vector of :ref:`integer <t_integer>` type, and a type to cast
|
||||
it to return type, which must be a vector of pointers type.
|
||||
The second operand is the vector mask. The return type, the value to cast, and
|
||||
the vector mask have the same number of elements.
|
||||
The third operand is the explicit vector length of the operation.
|
||||
|
||||
Semantics:
|
||||
""""""""""
|
||||
|
||||
The '``llvm.vp.inttoptr``' intrinsic converts ``value`` to return type by
|
||||
applying either a zero extension or a truncation depending on the size of the
|
||||
integer ``value``. If ``value`` is larger than the size of a pointer, then a
|
||||
truncation is done. If ``value`` is smaller than the size of a pointer, then a
|
||||
zero extension is done. If they are the same size, nothing is done (*no-op cast*).
|
||||
The conversion is performed on lane positions below the explicit vector length
|
||||
and where the vector mask is true. Masked-off lanes are undefined.
|
||||
|
||||
Examples:
|
||||
"""""""""
|
||||
|
||||
.. code-block:: llvm
|
||||
|
||||
%r = call <4 x i32*> @llvm.vp.inttoptr.v4p0i32.v4i32(<4 x i32> %a, <4 x i1> %mask, i32 %evl)
|
||||
;; For all lanes below %evl, %r is lane-wise equivalent to %also.r
|
||||
|
||||
%t = inttoptr <4 x i32> %a to <4 x i32*>
|
||||
%also.r = select <4 x i1> %mask, <4 x i32*> %t, <4 x i32*> undef
|
||||
|
||||
|
||||
.. _int_vp_fcmp:
|
||||
|
||||
'``llvm.vp.fcmp.*``' Intrinsics
|
||||
|
|
|
@ -1545,14 +1545,50 @@ let IntrProperties =
|
|||
}
|
||||
|
||||
// Casts.
|
||||
def int_vp_trunc : DefaultAttrsIntrinsic<[ llvm_anyvector_ty ],
|
||||
[ llvm_anyvector_ty,
|
||||
LLVMScalarOrSameVectorWidth<0, llvm_i1_ty>,
|
||||
llvm_i32_ty]>;
|
||||
def int_vp_zext : DefaultAttrsIntrinsic<[ llvm_anyvector_ty ],
|
||||
[ llvm_anyvector_ty,
|
||||
LLVMScalarOrSameVectorWidth<0, llvm_i1_ty>,
|
||||
llvm_i32_ty]>;
|
||||
def int_vp_sext : DefaultAttrsIntrinsic<[ llvm_anyvector_ty ],
|
||||
[ llvm_anyvector_ty,
|
||||
LLVMScalarOrSameVectorWidth<0, llvm_i1_ty>,
|
||||
llvm_i32_ty]>;
|
||||
def int_vp_fptrunc : DefaultAttrsIntrinsic<[ llvm_anyvector_ty ],
|
||||
[ llvm_anyvector_ty,
|
||||
LLVMScalarOrSameVectorWidth<0, llvm_i1_ty>,
|
||||
llvm_i32_ty]>;
|
||||
def int_vp_fpext : DefaultAttrsIntrinsic<[ llvm_anyvector_ty ],
|
||||
[ llvm_anyvector_ty,
|
||||
LLVMScalarOrSameVectorWidth<0, llvm_i1_ty>,
|
||||
llvm_i32_ty]>;
|
||||
def int_vp_fptoui : DefaultAttrsIntrinsic<[ llvm_anyvector_ty ],
|
||||
[ llvm_anyvector_ty,
|
||||
LLVMScalarOrSameVectorWidth<0, llvm_i1_ty>,
|
||||
llvm_i32_ty]>;
|
||||
def int_vp_fptosi : DefaultAttrsIntrinsic<[ llvm_anyvector_ty ],
|
||||
[ llvm_anyvector_ty,
|
||||
LLVMScalarOrSameVectorWidth<0, llvm_i1_ty>,
|
||||
llvm_i32_ty]>;
|
||||
def int_vp_uitofp : DefaultAttrsIntrinsic<[ llvm_anyvector_ty ],
|
||||
[ llvm_anyvector_ty,
|
||||
LLVMScalarOrSameVectorWidth<0, llvm_i1_ty>,
|
||||
llvm_i32_ty]>;
|
||||
def int_vp_sitofp : DefaultAttrsIntrinsic<[ llvm_anyvector_ty ],
|
||||
[ llvm_anyvector_ty,
|
||||
LLVMScalarOrSameVectorWidth<0, llvm_i1_ty>,
|
||||
llvm_i32_ty]>;
|
||||
def int_vp_ptrtoint : DefaultAttrsIntrinsic<[ llvm_anyvector_ty ],
|
||||
[ llvm_anyvector_ty,
|
||||
LLVMScalarOrSameVectorWidth<0, llvm_i1_ty>,
|
||||
llvm_i32_ty]>;
|
||||
def int_vp_inttoptr : DefaultAttrsIntrinsic<[ llvm_anyvector_ty ],
|
||||
[ llvm_anyvector_ty,
|
||||
LLVMScalarOrSameVectorWidth<0, llvm_i1_ty>,
|
||||
llvm_i32_ty]>;
|
||||
|
||||
// Shuffles.
|
||||
def int_vp_select : DefaultAttrsIntrinsic<[ llvm_anyvector_ty ],
|
||||
|
|
|
@ -238,24 +238,65 @@ END_REGISTER_VP(vp_fma, VP_FMA)
|
|||
///// Type Casts {
|
||||
// Specialized helper macro for type conversions.
|
||||
// <operation>(%x, %mask, %evl).
|
||||
#ifdef HELPER_REGISTER_CAST_VP
|
||||
#ifdef HELPER_REGISTER_FP_CAST_VP
|
||||
#error \
|
||||
"The internal helper macro HELPER_REGISTER_CAST_VP is already defined!"
|
||||
"The internal helper macro HELPER_REGISTER_FP_CAST_VP is already defined!"
|
||||
#endif
|
||||
#define HELPER_REGISTER_CAST_VP(OPSUFFIX, VPSD, IROPC, HASROUND) \
|
||||
#define HELPER_REGISTER_FP_CAST_VP(OPSUFFIX, VPSD, IROPC, HASROUND) \
|
||||
BEGIN_REGISTER_VP(vp_##OPSUFFIX, 1, 2, VPSD, -1) \
|
||||
VP_PROPERTY_FUNCTIONAL_OPC(IROPC) \
|
||||
VP_PROPERTY_CONSTRAINEDFP(HASROUND, 1, experimental_constrained_##OPSUFFIX) \
|
||||
VP_PROPERTY_CASTOP \
|
||||
VP_PROPERTY_CONSTRAINEDFP(HASROUND, 1, experimental_constrained_##OPSUFFIX) \
|
||||
VP_PROPERTY_CASTOP \
|
||||
END_REGISTER_VP(vp_##OPSUFFIX, VPSD)
|
||||
|
||||
// llvm.vp.fptoui(x,mask,vlen)
|
||||
HELPER_REGISTER_FP_CAST_VP(fptoui, VP_FPTOUI, FPToUI, 0)
|
||||
|
||||
// llvm.vp.fptosi(x,mask,vlen)
|
||||
HELPER_REGISTER_CAST_VP(fptosi, VP_FPTOSI, FPToSI, 0)
|
||||
HELPER_REGISTER_FP_CAST_VP(fptosi, VP_FPTOSI, FPToSI, 0)
|
||||
|
||||
// llvm.vp.uitofp(x,mask,vlen)
|
||||
HELPER_REGISTER_FP_CAST_VP(uitofp, VP_UITOFP, UIToFP, 1)
|
||||
|
||||
// llvm.vp.sitofp(x,mask,vlen)
|
||||
HELPER_REGISTER_CAST_VP(sitofp, VP_SITOFP, SIToFP, 1)
|
||||
HELPER_REGISTER_FP_CAST_VP(sitofp, VP_SITOFP, SIToFP, 1)
|
||||
|
||||
#undef HELPER_REGISTER_CAST_VP
|
||||
// llvm.vp.fptrunc(x,mask,vlen)
|
||||
HELPER_REGISTER_FP_CAST_VP(fptrunc, VP_FPTRUNC, FPTrunc, 1)
|
||||
|
||||
// llvm.vp.fpext(x,mask,vlen)
|
||||
HELPER_REGISTER_FP_CAST_VP(fpext, VP_FPEXT, FPExt, 0)
|
||||
|
||||
#undef HELPER_REGISTER_FP_CAST_VP
|
||||
|
||||
// Specialized helper macro for integer type conversions.
|
||||
// <operation>(%x, %mask, %evl).
|
||||
#ifdef HELPER_REGISTER_INT_CAST_VP
|
||||
#error \
|
||||
"The internal helper macro HELPER_REGISTER_INT_CAST_VP is already defined!"
|
||||
#endif
|
||||
#define HELPER_REGISTER_INT_CAST_VP(OPSUFFIX, VPSD, IROPC) \
|
||||
BEGIN_REGISTER_VP(vp_##OPSUFFIX, 1, 2, VPSD, -1) \
|
||||
VP_PROPERTY_FUNCTIONAL_OPC(IROPC) \
|
||||
VP_PROPERTY_CASTOP \
|
||||
END_REGISTER_VP(vp_##OPSUFFIX, VPSD)
|
||||
|
||||
// llvm.vp.trunc(x,mask,vlen)
|
||||
HELPER_REGISTER_INT_CAST_VP(trunc, VP_TRUNC, Trunc)
|
||||
|
||||
// llvm.vp.zext(x,mask,vlen)
|
||||
HELPER_REGISTER_INT_CAST_VP(zext, VP_ZEXT, ZExt)
|
||||
|
||||
// llvm.vp.sext(x,mask,vlen)
|
||||
HELPER_REGISTER_INT_CAST_VP(sext, VP_SEXT, SExt)
|
||||
|
||||
// llvm.vp.ptrtoint(x,mask,vlen)
|
||||
HELPER_REGISTER_INT_CAST_VP(ptrtoint, VP_PTRTOINT, PtrToInt)
|
||||
|
||||
// llvm.vp.inttoptr(x,mask,vlen)
|
||||
HELPER_REGISTER_INT_CAST_VP(inttoptr, VP_INTTOPTR, IntToPtr)
|
||||
|
||||
#undef HELPER_REGISTER_INT_CAST_VP
|
||||
|
||||
///// } Type Casts
|
||||
|
||||
|
|
|
@ -501,8 +501,17 @@ Function *VPIntrinsic::getDeclarationForParams(Module *M, Intrinsic::ID VPID,
|
|||
VPFunc = Intrinsic::getDeclaration(M, VPID, OverloadTy);
|
||||
break;
|
||||
}
|
||||
case Intrinsic::vp_trunc:
|
||||
case Intrinsic::vp_sext:
|
||||
case Intrinsic::vp_zext:
|
||||
case Intrinsic::vp_fptoui:
|
||||
case Intrinsic::vp_fptosi:
|
||||
case Intrinsic::vp_uitofp:
|
||||
case Intrinsic::vp_sitofp:
|
||||
case Intrinsic::vp_fptrunc:
|
||||
case Intrinsic::vp_fpext:
|
||||
case Intrinsic::vp_ptrtoint:
|
||||
case Intrinsic::vp_inttoptr:
|
||||
VPFunc =
|
||||
Intrinsic::getDeclaration(M, VPID, {ReturnType, Params[0]->getType()});
|
||||
break;
|
||||
|
|
|
@ -5600,6 +5600,80 @@ void Verifier::visitVPIntrinsic(VPIntrinsic &VPI) {
|
|||
"VP cast intrinsic first argument and result vector lengths must be "
|
||||
"equal",
|
||||
*VPCast);
|
||||
|
||||
switch (VPCast->getIntrinsicID()) {
|
||||
default:
|
||||
llvm_unreachable("Unknown VP cast intrinsic");
|
||||
case Intrinsic::vp_trunc:
|
||||
Assert(RetTy->isIntOrIntVectorTy() && ValTy->isIntOrIntVectorTy(),
|
||||
"llvm.vp.trunc intrinsic first argument and result element type "
|
||||
"must be integer",
|
||||
*VPCast);
|
||||
Assert(RetTy->getScalarSizeInBits() < ValTy->getScalarSizeInBits(),
|
||||
"llvm.vp.trunc intrinsic the bit size of first argument must be "
|
||||
"larger than the bit size of the return type",
|
||||
*VPCast);
|
||||
break;
|
||||
case Intrinsic::vp_zext:
|
||||
case Intrinsic::vp_sext:
|
||||
Assert(RetTy->isIntOrIntVectorTy() && ValTy->isIntOrIntVectorTy(),
|
||||
"llvm.vp.zext or llvm.vp.sext intrinsic first argument and result "
|
||||
"element type must be integer",
|
||||
*VPCast);
|
||||
Assert(RetTy->getScalarSizeInBits() > ValTy->getScalarSizeInBits(),
|
||||
"llvm.vp.zext or llvm.vp.sext intrinsic the bit size of first "
|
||||
"argument must be smaller than the bit size of the return type",
|
||||
*VPCast);
|
||||
break;
|
||||
case Intrinsic::vp_fptoui:
|
||||
case Intrinsic::vp_fptosi:
|
||||
Assert(
|
||||
RetTy->isIntOrIntVectorTy() && ValTy->isFPOrFPVectorTy(),
|
||||
"llvm.vp.fptoui or llvm.vp.fptosi intrinsic first argument element "
|
||||
"type must be floating-point and result element type must be integer",
|
||||
*VPCast);
|
||||
break;
|
||||
case Intrinsic::vp_uitofp:
|
||||
case Intrinsic::vp_sitofp:
|
||||
Assert(
|
||||
RetTy->isFPOrFPVectorTy() && ValTy->isIntOrIntVectorTy(),
|
||||
"llvm.vp.uitofp or llvm.vp.sitofp intrinsic first argument element "
|
||||
"type must be integer and result element type must be floating-point",
|
||||
*VPCast);
|
||||
break;
|
||||
case Intrinsic::vp_fptrunc:
|
||||
Assert(RetTy->isFPOrFPVectorTy() && ValTy->isFPOrFPVectorTy(),
|
||||
"llvm.vp.fptrunc intrinsic first argument and result element type "
|
||||
"must be floating-point",
|
||||
*VPCast);
|
||||
Assert(RetTy->getScalarSizeInBits() < ValTy->getScalarSizeInBits(),
|
||||
"llvm.vp.fptrunc intrinsic the bit size of first argument must be "
|
||||
"larger than the bit size of the return type",
|
||||
*VPCast);
|
||||
break;
|
||||
case Intrinsic::vp_fpext:
|
||||
Assert(RetTy->isFPOrFPVectorTy() && ValTy->isFPOrFPVectorTy(),
|
||||
"llvm.vp.fpext intrinsic first argument and result element type "
|
||||
"must be floating-point",
|
||||
*VPCast);
|
||||
Assert(RetTy->getScalarSizeInBits() > ValTy->getScalarSizeInBits(),
|
||||
"llvm.vp.fpext intrinsic the bit size of first argument must be "
|
||||
"smaller than the bit size of the return type",
|
||||
*VPCast);
|
||||
break;
|
||||
case Intrinsic::vp_ptrtoint:
|
||||
Assert(RetTy->isIntOrIntVectorTy() && ValTy->isPtrOrPtrVectorTy(),
|
||||
"llvm.vp.ptrtoint intrinsic first argument element type must be "
|
||||
"pointer and result element type must be integer",
|
||||
*VPCast);
|
||||
break;
|
||||
case Intrinsic::vp_inttoptr:
|
||||
Assert(RetTy->isPtrOrPtrVectorTy() && ValTy->isIntOrIntVectorTy(),
|
||||
"llvm.vp.inttoptr intrinsic first argument element type must be "
|
||||
"integer and result element type must be pointer",
|
||||
*VPCast);
|
||||
break;
|
||||
}
|
||||
}
|
||||
if (VPI.getIntrinsicID() == Intrinsic::vp_fcmp) {
|
||||
auto Pred = cast<VPCmpIntrinsic>(&VPI)->getPredicate();
|
||||
|
|
|
@ -57,9 +57,18 @@ define void @test_vp_splice1(<vscale x 8 x i32> %i0, <vscale x 8 x i32> %i1, <vs
|
|||
ret void
|
||||
}
|
||||
|
||||
define void @test_vp_int_fp_conversions(<8 x i32> %i0, <8 x float> %f0, <8 x i1> %mask, i32 %evl) {
|
||||
%r0 = call <8 x float> @llvm.vp.sitofp.v8f32.v8i32(<8 x i32> %i0, <8 x i1> %mask, i32 %evl)
|
||||
define void @test_vp_conversions(<8 x i32*> %p0, <8 x i32> %i0, <8 x i64> %i1, <8 x float> %f0, <8 x double> %f1, <8 x i1> %mask, i32 %evl) {
|
||||
%r0 = call <8 x i32> @llvm.vp.fptoui.v8i32.v8f32(<8 x float> %f0, <8 x i1> %mask, i32 %evl)
|
||||
%r1 = call <8 x i32> @llvm.vp.fptosi.v8i32.v8f32(<8 x float> %f0, <8 x i1> %mask, i32 %evl)
|
||||
%r2 = call <8 x float> @llvm.vp.uitofp.v8f32.v8i32(<8 x i32> %i0, <8 x i1> %mask, i32 %evl)
|
||||
%r3 = call <8 x float> @llvm.vp.sitofp.v8f32.v8i32(<8 x i32> %i0, <8 x i1> %mask, i32 %evl)
|
||||
%r4 = call <8 x float> @llvm.vp.fptrunc.v8f32.v8f64(<8 x double> %f1, <8 x i1> %mask, i32 %evl)
|
||||
%r5 = call <8 x double> @llvm.vp.fpext.v8f64.v8f32(<8 x float> %f0, <8 x i1> %mask, i32 %evl)
|
||||
%r6 = call <8 x i32> @llvm.vp.trunc.v8i32.v8i64(<8 x i64> %i1, <8 x i1> %mask, i32 %evl)
|
||||
%r7 = call <8 x i64> @llvm.vp.zext.v8i64.v8i32(<8 x i32> %i0, <8 x i1> %mask, i32 %evl)
|
||||
%r8 = call <8 x i64> @llvm.vp.sext.v8i64.v8i32(<8 x i32> %i0, <8 x i1> %mask, i32 %evl)
|
||||
%r9 = call <8 x i32> @llvm.vp.ptrtoint.v8i32.v8p0i32(<8 x i32*> %p0, <8 x i1> %mask, i32 %evl)
|
||||
%r10 = call <8 x i32*> @llvm.vp.inttoptr.v8p0i32.v8i32(<8 x i32> %i0, <8 x i1> %mask, i32 %evl)
|
||||
ret void
|
||||
}
|
||||
|
||||
|
@ -105,8 +114,17 @@ declare float @llvm.vp.reduce.fmax.v8f32(float, <8 x float>, <8 x i1>, i32)
|
|||
declare float @llvm.vp.reduce.fadd.v8f32(float, <8 x float>, <8 x i1>, i32)
|
||||
declare float @llvm.vp.reduce.fmul.v8f32(float, <8 x float>, <8 x i1>, i32)
|
||||
; casts
|
||||
declare <8 x float> @llvm.vp.sitofp.v8f32.v8i32(<8 x i32>, <8 x i1>, i32)
|
||||
declare <8 x i32> @llvm.vp.fptoui.v8i32.v8f32(<8 x float>, <8 x i1>, i32)
|
||||
declare <8 x i32> @llvm.vp.fptosi.v8i32.v8f32(<8 x float>, <8 x i1>, i32)
|
||||
declare <8 x float> @llvm.vp.uitofp.v8f32.v8i32(<8 x i32>, <8 x i1>, i32)
|
||||
declare <8 x float> @llvm.vp.sitofp.v8f32.v8i32(<8 x i32>, <8 x i1>, i32)
|
||||
declare <8 x float> @llvm.vp.fptrunc.v8f32.v8f64(<8 x double>, <8 x i1>, i32)
|
||||
declare <8 x double> @llvm.vp.fpext.v8f64.v8f32(<8 x float>, <8 x i1>, i32)
|
||||
declare <8 x i32> @llvm.vp.trunc.v8i32.v8i64(<8 x i64>, <8 x i1>, i32)
|
||||
declare <8 x i64> @llvm.vp.zext.v8i64.v8i32(<8 x i32>, <8 x i1>, i32)
|
||||
declare <8 x i64> @llvm.vp.sext.v8i64.v8i32(<8 x i32>, <8 x i1>, i32)
|
||||
declare <8 x i32> @llvm.vp.ptrtoint.v8i32.v8p0i32(<8 x i32*>, <8 x i1>, i32)
|
||||
declare <8 x i32*> @llvm.vp.inttoptr.v8p0i32.v8i32(<8 x i32>, <8 x i1>, i32)
|
||||
; compares
|
||||
declare <8 x i1> @llvm.vp.fcmp.v8f32(<8 x float>, <8 x float>, metadata, <8 x i1>, i32)
|
||||
declare <8 x i1> @llvm.vp.icmp.v8i32(<8 x i32>, <8 x i32>, metadata, <8 x i1>, i32)
|
||||
|
|
|
@ -92,10 +92,28 @@ protected:
|
|||
Str << " declare <8 x i32> @llvm.experimental.vp.splice.v8i32(<8 x "
|
||||
"i32>, <8 x i32>, i32, <8 x i1>, i32, i32) ";
|
||||
|
||||
Str << " declare <8 x i32> @llvm.vp.fptoui.v8i32"
|
||||
<< ".v8f32(<8 x float>, <8 x i1>, i32) ";
|
||||
Str << " declare <8 x i32> @llvm.vp.fptosi.v8i32"
|
||||
<< ".v8f32(<8 x float>, <8 x i1>, i32) ";
|
||||
Str << " declare <8 x float> @llvm.vp.uitofp.v8f32"
|
||||
<< ".v8i32(<8 x i32>, <8 x i1>, i32) ";
|
||||
Str << " declare <8 x float> @llvm.vp.sitofp.v8f32"
|
||||
<< ".v8i32(<8 x i32>, <8 x i1>, i32) ";
|
||||
Str << " declare <8 x float> @llvm.vp.fptrunc.v8f32"
|
||||
<< ".v8f64(<8 x double>, <8 x i1>, i32) ";
|
||||
Str << " declare <8 x double> @llvm.vp.fpext.v8f64"
|
||||
<< ".v8f32(<8 x float>, <8 x i1>, i32) ";
|
||||
Str << " declare <8 x i32> @llvm.vp.trunc.v8i32"
|
||||
<< ".v8i64(<8 x i64>, <8 x i1>, i32) ";
|
||||
Str << " declare <8 x i64> @llvm.vp.zext.v8i64"
|
||||
<< ".v8i32(<8 x i32>, <8 x i1>, i32) ";
|
||||
Str << " declare <8 x i64> @llvm.vp.sext.v8i64"
|
||||
<< ".v8i32(<8 x i32>, <8 x i1>, i32) ";
|
||||
Str << " declare <8 x i32> @llvm.vp.ptrtoint.v8i32"
|
||||
<< ".v8p0i32(<8 x i32*>, <8 x i1>, i32) ";
|
||||
Str << " declare <8 x i32*> @llvm.vp.inttoptr.v8p0i32"
|
||||
<< ".v8i32(<8 x i32>, <8 x i1>, i32) ";
|
||||
|
||||
Str << " declare <8 x i1> @llvm.vp.fcmp.v8f32"
|
||||
<< "(<8 x float>, <8 x float>, metadata, <8 x i1>, i32) ";
|
||||
|
|
Loading…
Reference in New Issue