Merge remote-tracking branch 'net-next/master' into mac80211-next
Merge net-next so that we get the changes from net, which would otherwise conflict with the NLA_POLICY_NESTED/_ARRAY changes. Signed-off-by: Johannes Berg <johannes.berg@intel.com>
This commit is contained in:
commit
752cfee90d
|
@ -72,6 +72,10 @@ ForEachMacros:
|
|||
- 'apei_estatus_for_each_section'
|
||||
- 'ata_for_each_dev'
|
||||
- 'ata_for_each_link'
|
||||
- '__ata_qc_for_each'
|
||||
- 'ata_qc_for_each'
|
||||
- 'ata_qc_for_each_raw'
|
||||
- 'ata_qc_for_each_with_internal'
|
||||
- 'ax25_for_each'
|
||||
- 'ax25_uid_for_each'
|
||||
- 'bio_for_each_integrity_vec'
|
||||
|
@ -85,6 +89,7 @@ ForEachMacros:
|
|||
- 'blk_queue_for_each_rl'
|
||||
- 'bond_for_each_slave'
|
||||
- 'bond_for_each_slave_rcu'
|
||||
- 'bpf_for_each_spilled_reg'
|
||||
- 'btree_for_each_safe128'
|
||||
- 'btree_for_each_safe32'
|
||||
- 'btree_for_each_safe64'
|
||||
|
@ -103,6 +108,8 @@ ForEachMacros:
|
|||
- 'drm_atomic_crtc_for_each_plane'
|
||||
- 'drm_atomic_crtc_state_for_each_plane'
|
||||
- 'drm_atomic_crtc_state_for_each_plane_state'
|
||||
- 'drm_atomic_for_each_plane_damage'
|
||||
- 'drm_connector_for_each_possible_encoder'
|
||||
- 'drm_for_each_connector_iter'
|
||||
- 'drm_for_each_crtc'
|
||||
- 'drm_for_each_encoder'
|
||||
|
@ -121,11 +128,21 @@ ForEachMacros:
|
|||
- 'for_each_bio'
|
||||
- 'for_each_board_func_rsrc'
|
||||
- 'for_each_bvec'
|
||||
- 'for_each_card_components'
|
||||
- 'for_each_card_links'
|
||||
- 'for_each_card_links_safe'
|
||||
- 'for_each_card_prelinks'
|
||||
- 'for_each_card_rtds'
|
||||
- 'for_each_card_rtds_safe'
|
||||
- 'for_each_cgroup_storage_type'
|
||||
- 'for_each_child_of_node'
|
||||
- 'for_each_clear_bit'
|
||||
- 'for_each_clear_bit_from'
|
||||
- 'for_each_cmsghdr'
|
||||
- 'for_each_compatible_node'
|
||||
- 'for_each_component_dais'
|
||||
- 'for_each_component_dais_safe'
|
||||
- 'for_each_comp_order'
|
||||
- 'for_each_console'
|
||||
- 'for_each_cpu'
|
||||
- 'for_each_cpu_and'
|
||||
|
@ -133,6 +150,10 @@ ForEachMacros:
|
|||
- 'for_each_cpu_wrap'
|
||||
- 'for_each_dev_addr'
|
||||
- 'for_each_dma_cap_mask'
|
||||
- 'for_each_dpcm_be'
|
||||
- 'for_each_dpcm_be_rollback'
|
||||
- 'for_each_dpcm_be_safe'
|
||||
- 'for_each_dpcm_fe'
|
||||
- 'for_each_drhd_unit'
|
||||
- 'for_each_dss_dev'
|
||||
- 'for_each_efi_memory_desc'
|
||||
|
@ -149,6 +170,7 @@ ForEachMacros:
|
|||
- 'for_each_iommu'
|
||||
- 'for_each_ip_tunnel_rcu'
|
||||
- 'for_each_irq_nr'
|
||||
- 'for_each_link_codecs'
|
||||
- 'for_each_lru'
|
||||
- 'for_each_matching_node'
|
||||
- 'for_each_matching_node_and_match'
|
||||
|
@ -160,6 +182,7 @@ ForEachMacros:
|
|||
- 'for_each_mem_range_rev'
|
||||
- 'for_each_migratetype_order'
|
||||
- 'for_each_msi_entry'
|
||||
- 'for_each_msi_entry_safe'
|
||||
- 'for_each_net'
|
||||
- 'for_each_netdev'
|
||||
- 'for_each_netdev_continue'
|
||||
|
@ -183,12 +206,14 @@ ForEachMacros:
|
|||
- 'for_each_node_with_property'
|
||||
- 'for_each_of_allnodes'
|
||||
- 'for_each_of_allnodes_from'
|
||||
- 'for_each_of_cpu_node'
|
||||
- 'for_each_of_pci_range'
|
||||
- 'for_each_old_connector_in_state'
|
||||
- 'for_each_old_crtc_in_state'
|
||||
- 'for_each_oldnew_connector_in_state'
|
||||
- 'for_each_oldnew_crtc_in_state'
|
||||
- 'for_each_oldnew_plane_in_state'
|
||||
- 'for_each_oldnew_plane_in_state_reverse'
|
||||
- 'for_each_oldnew_private_obj_in_state'
|
||||
- 'for_each_old_plane_in_state'
|
||||
- 'for_each_old_private_obj_in_state'
|
||||
|
@ -206,14 +231,17 @@ ForEachMacros:
|
|||
- 'for_each_process'
|
||||
- 'for_each_process_thread'
|
||||
- 'for_each_property_of_node'
|
||||
- 'for_each_registered_fb'
|
||||
- 'for_each_reserved_mem_region'
|
||||
- 'for_each_resv_unavail_range'
|
||||
- 'for_each_rtd_codec_dai'
|
||||
- 'for_each_rtd_codec_dai_rollback'
|
||||
- 'for_each_rtdcom'
|
||||
- 'for_each_rtdcom_safe'
|
||||
- 'for_each_set_bit'
|
||||
- 'for_each_set_bit_from'
|
||||
- 'for_each_sg'
|
||||
- 'for_each_sg_page'
|
||||
- 'for_each_sibling_event'
|
||||
- '__for_each_thread'
|
||||
- 'for_each_thread'
|
||||
- 'for_each_zone'
|
||||
|
@ -251,6 +279,8 @@ ForEachMacros:
|
|||
- 'hlist_nulls_for_each_entry_from'
|
||||
- 'hlist_nulls_for_each_entry_rcu'
|
||||
- 'hlist_nulls_for_each_entry_safe'
|
||||
- 'i3c_bus_for_each_i2cdev'
|
||||
- 'i3c_bus_for_each_i3cdev'
|
||||
- 'ide_host_for_each_port'
|
||||
- 'ide_port_for_each_dev'
|
||||
- 'ide_port_for_each_present_dev'
|
||||
|
@ -267,11 +297,14 @@ ForEachMacros:
|
|||
- 'kvm_for_each_memslot'
|
||||
- 'kvm_for_each_vcpu'
|
||||
- 'list_for_each'
|
||||
- 'list_for_each_codec'
|
||||
- 'list_for_each_codec_safe'
|
||||
- 'list_for_each_entry'
|
||||
- 'list_for_each_entry_continue'
|
||||
- 'list_for_each_entry_continue_rcu'
|
||||
- 'list_for_each_entry_continue_reverse'
|
||||
- 'list_for_each_entry_from'
|
||||
- 'list_for_each_entry_from_rcu'
|
||||
- 'list_for_each_entry_from_reverse'
|
||||
- 'list_for_each_entry_lockless'
|
||||
- 'list_for_each_entry_rcu'
|
||||
|
@ -291,6 +324,7 @@ ForEachMacros:
|
|||
- 'media_device_for_each_intf'
|
||||
- 'media_device_for_each_link'
|
||||
- 'media_device_for_each_pad'
|
||||
- 'nanddev_io_for_each_page'
|
||||
- 'netdev_for_each_lower_dev'
|
||||
- 'netdev_for_each_lower_private'
|
||||
- 'netdev_for_each_lower_private_rcu'
|
||||
|
@ -357,12 +391,14 @@ ForEachMacros:
|
|||
- 'sk_nulls_for_each'
|
||||
- 'sk_nulls_for_each_from'
|
||||
- 'sk_nulls_for_each_rcu'
|
||||
- 'snd_array_for_each'
|
||||
- 'snd_pcm_group_for_each_entry'
|
||||
- 'snd_soc_dapm_widget_for_each_path'
|
||||
- 'snd_soc_dapm_widget_for_each_path_safe'
|
||||
- 'snd_soc_dapm_widget_for_each_sink_path'
|
||||
- 'snd_soc_dapm_widget_for_each_source_path'
|
||||
- 'tb_property_for_each'
|
||||
- 'tcf_exts_for_each_action'
|
||||
- 'udp_portaddr_for_each_entry'
|
||||
- 'udp_portaddr_for_each_entry_rcu'
|
||||
- 'usb_hub_for_each_child'
|
||||
|
@ -371,6 +407,11 @@ ForEachMacros:
|
|||
- 'v4l2_m2m_for_each_dst_buf_safe'
|
||||
- 'v4l2_m2m_for_each_src_buf'
|
||||
- 'v4l2_m2m_for_each_src_buf_safe'
|
||||
- 'virtio_device_for_each_vq'
|
||||
- 'xa_for_each'
|
||||
- 'xas_for_each'
|
||||
- 'xas_for_each_conflict'
|
||||
- 'xas_for_each_marked'
|
||||
- 'zorro_for_each_dev'
|
||||
|
||||
#IncludeBlocks: Preserve # Unknown to clang-format-5.0
|
||||
|
|
|
@ -0,0 +1,870 @@
|
|||
=====================
|
||||
BPF Type Format (BTF)
|
||||
=====================
|
||||
|
||||
1. Introduction
|
||||
***************
|
||||
|
||||
BTF (BPF Type Format) is the meta data format which
|
||||
encodes the debug info related to BPF program/map.
|
||||
The name BTF was used initially to describe
|
||||
data types. The BTF was later extended to include
|
||||
function info for defined subroutines, and line info
|
||||
for source/line information.
|
||||
|
||||
The debug info is used for map pretty print, function
|
||||
signature, etc. The function signature enables better
|
||||
bpf program/function kernel symbol.
|
||||
The line info helps generate
|
||||
source annotated translated byte code, jited code
|
||||
and verifier log.
|
||||
|
||||
The BTF specification contains two parts,
|
||||
* BTF kernel API
|
||||
* BTF ELF file format
|
||||
|
||||
The kernel API is the contract between
|
||||
user space and kernel. The kernel verifies
|
||||
the BTF info before using it.
|
||||
The ELF file format is a user space contract
|
||||
between ELF file and libbpf loader.
|
||||
|
||||
The type and string sections are part of the
|
||||
BTF kernel API, describing the debug info
|
||||
(mostly types related) referenced by the bpf program.
|
||||
These two sections are discussed in
|
||||
details in :ref:`BTF_Type_String`.
|
||||
|
||||
.. _BTF_Type_String:
|
||||
|
||||
2. BTF Type and String Encoding
|
||||
*******************************
|
||||
|
||||
The file ``include/uapi/linux/btf.h`` provides high
|
||||
level definition on how types/strings are encoded.
|
||||
|
||||
The beginning of data blob must be::
|
||||
|
||||
struct btf_header {
|
||||
__u16 magic;
|
||||
__u8 version;
|
||||
__u8 flags;
|
||||
__u32 hdr_len;
|
||||
|
||||
/* All offsets are in bytes relative to the end of this header */
|
||||
__u32 type_off; /* offset of type section */
|
||||
__u32 type_len; /* length of type section */
|
||||
__u32 str_off; /* offset of string section */
|
||||
__u32 str_len; /* length of string section */
|
||||
};
|
||||
|
||||
The magic is ``0xeB9F``, which has different encoding for big and little
|
||||
endian system, and can be used to test whether BTF is generated for
|
||||
big or little endian target.
|
||||
The btf_header is designed to be extensible with hdr_len equal to
|
||||
``sizeof(struct btf_header)`` when the data blob is generated.
|
||||
|
||||
2.1 String Encoding
|
||||
===================
|
||||
|
||||
The first string in the string section must be a null string.
|
||||
The rest of string table is a concatenation of other null-treminated
|
||||
strings.
|
||||
|
||||
2.2 Type Encoding
|
||||
=================
|
||||
|
||||
The type id ``0`` is reserved for ``void`` type.
|
||||
The type section is parsed sequentially and the type id is assigned to
|
||||
each recognized type starting from id ``1``.
|
||||
Currently, the following types are supported::
|
||||
|
||||
#define BTF_KIND_INT 1 /* Integer */
|
||||
#define BTF_KIND_PTR 2 /* Pointer */
|
||||
#define BTF_KIND_ARRAY 3 /* Array */
|
||||
#define BTF_KIND_STRUCT 4 /* Struct */
|
||||
#define BTF_KIND_UNION 5 /* Union */
|
||||
#define BTF_KIND_ENUM 6 /* Enumeration */
|
||||
#define BTF_KIND_FWD 7 /* Forward */
|
||||
#define BTF_KIND_TYPEDEF 8 /* Typedef */
|
||||
#define BTF_KIND_VOLATILE 9 /* Volatile */
|
||||
#define BTF_KIND_CONST 10 /* Const */
|
||||
#define BTF_KIND_RESTRICT 11 /* Restrict */
|
||||
#define BTF_KIND_FUNC 12 /* Function */
|
||||
#define BTF_KIND_FUNC_PROTO 13 /* Function Proto */
|
||||
|
||||
Note that the type section encodes debug info, not just pure types.
|
||||
``BTF_KIND_FUNC`` is not a type, and it represents a defined subprogram.
|
||||
|
||||
Each type contains the following common data::
|
||||
|
||||
struct btf_type {
|
||||
__u32 name_off;
|
||||
/* "info" bits arrangement
|
||||
* bits 0-15: vlen (e.g. # of struct's members)
|
||||
* bits 16-23: unused
|
||||
* bits 24-27: kind (e.g. int, ptr, array...etc)
|
||||
* bits 28-30: unused
|
||||
* bit 31: kind_flag, currently used by
|
||||
* struct, union and fwd
|
||||
*/
|
||||
__u32 info;
|
||||
/* "size" is used by INT, ENUM, STRUCT and UNION.
|
||||
* "size" tells the size of the type it is describing.
|
||||
*
|
||||
* "type" is used by PTR, TYPEDEF, VOLATILE, CONST, RESTRICT,
|
||||
* FUNC and FUNC_PROTO.
|
||||
* "type" is a type_id referring to another type.
|
||||
*/
|
||||
union {
|
||||
__u32 size;
|
||||
__u32 type;
|
||||
};
|
||||
};
|
||||
|
||||
For certain kinds, the common data are followed by kind specific data.
|
||||
The ``name_off`` in ``struct btf_type`` specifies the offset in the string table.
|
||||
The following details encoding of each kind.
|
||||
|
||||
2.2.1 BTF_KIND_INT
|
||||
~~~~~~~~~~~~~~~~~~
|
||||
|
||||
``struct btf_type`` encoding requirement:
|
||||
* ``name_off``: any valid offset
|
||||
* ``info.kind_flag``: 0
|
||||
* ``info.kind``: BTF_KIND_INT
|
||||
* ``info.vlen``: 0
|
||||
* ``size``: the size of the int type in bytes.
|
||||
|
||||
``btf_type`` is followed by a ``u32`` with following bits arrangement::
|
||||
|
||||
#define BTF_INT_ENCODING(VAL) (((VAL) & 0x0f000000) >> 24)
|
||||
#define BTF_INT_OFFSET(VAL) (((VAL & 0x00ff0000)) >> 16)
|
||||
#define BTF_INT_BITS(VAL) ((VAL) & 0x000000ff)
|
||||
|
||||
The ``BTF_INT_ENCODING`` has the following attributes::
|
||||
|
||||
#define BTF_INT_SIGNED (1 << 0)
|
||||
#define BTF_INT_CHAR (1 << 1)
|
||||
#define BTF_INT_BOOL (1 << 2)
|
||||
|
||||
The ``BTF_INT_ENCODING()`` provides extra information, signness,
|
||||
char, or bool, for the int type. The char and bool encoding
|
||||
are mostly useful for pretty print. At most one encoding can
|
||||
be specified for the int type.
|
||||
|
||||
The ``BTF_INT_BITS()`` specifies the number of actual bits held by
|
||||
this int type. For example, a 4-bit bitfield encodes
|
||||
``BTF_INT_BITS()`` equals to 4. The ``btf_type.size * 8``
|
||||
must be equal to or greater than ``BTF_INT_BITS()`` for the type.
|
||||
The maximum value of ``BTF_INT_BITS()`` is 128.
|
||||
|
||||
The ``BTF_INT_OFFSET()`` specifies the starting bit offset to
|
||||
calculate values for this int. For example, a bitfield struct
|
||||
member has
|
||||
|
||||
* btf member bit offset 100 from the start of the structure,
|
||||
* btf member pointing to an int type,
|
||||
* the int type has ``BTF_INT_OFFSET() = 2`` and ``BTF_INT_BITS() = 4``
|
||||
|
||||
Then in the struct memory layout, this member will occupy
|
||||
``4`` bits starting from bits ``100 + 2 = 102``.
|
||||
|
||||
Alternatively, the bitfield struct member can be the following to
|
||||
access the same bits as the above:
|
||||
|
||||
* btf member bit offset 102,
|
||||
* btf member pointing to an int type,
|
||||
* the int type has ``BTF_INT_OFFSET() = 0`` and ``BTF_INT_BITS() = 4``
|
||||
|
||||
The original intention of ``BTF_INT_OFFSET()`` is to provide
|
||||
flexibility of bitfield encoding.
|
||||
Currently, both llvm and pahole generates ``BTF_INT_OFFSET() = 0``
|
||||
for all int types.
|
||||
|
||||
2.2.2 BTF_KIND_PTR
|
||||
~~~~~~~~~~~~~~~~~~
|
||||
|
||||
``struct btf_type`` encoding requirement:
|
||||
* ``name_off``: 0
|
||||
* ``info.kind_flag``: 0
|
||||
* ``info.kind``: BTF_KIND_PTR
|
||||
* ``info.vlen``: 0
|
||||
* ``type``: the pointee type of the pointer
|
||||
|
||||
No additional type data follow ``btf_type``.
|
||||
|
||||
2.2.3 BTF_KIND_ARRAY
|
||||
~~~~~~~~~~~~~~~~~~~~
|
||||
|
||||
``struct btf_type`` encoding requirement:
|
||||
* ``name_off``: 0
|
||||
* ``info.kind_flag``: 0
|
||||
* ``info.kind``: BTF_KIND_ARRAY
|
||||
* ``info.vlen``: 0
|
||||
* ``size/type``: 0, not used
|
||||
|
||||
btf_type is followed by one "struct btf_array"::
|
||||
|
||||
struct btf_array {
|
||||
__u32 type;
|
||||
__u32 index_type;
|
||||
__u32 nelems;
|
||||
};
|
||||
|
||||
The ``struct btf_array`` encoding:
|
||||
* ``type``: the element type
|
||||
* ``index_type``: the index type
|
||||
* ``nelems``: the number of elements for this array (``0`` is also allowed).
|
||||
|
||||
The ``index_type`` can be any regular int types
|
||||
(u8, u16, u32, u64, unsigned __int128).
|
||||
The original design of including ``index_type`` follows dwarf
|
||||
which has a ``index_type`` for its array type.
|
||||
Currently in BTF, beyond type verification, the ``index_type`` is not used.
|
||||
|
||||
The ``struct btf_array`` allows chaining through element type to represent
|
||||
multiple dimensional arrays. For example, ``int a[5][6]``, the following
|
||||
type system illustrates the chaining:
|
||||
|
||||
* [1]: int
|
||||
* [2]: array, ``btf_array.type = [1]``, ``btf_array.nelems = 6``
|
||||
* [3]: array, ``btf_array.type = [2]``, ``btf_array.nelems = 5``
|
||||
|
||||
Currently, both pahole and llvm collapse multiple dimensional array
|
||||
into one dimensional array, e.g., ``a[5][6]``, the btf_array.nelems
|
||||
equal to ``30``. This is because the original use case is map pretty
|
||||
print where the whole array is dumped out so one dimensional array
|
||||
is enough. As more BTF usage is explored, pahole and llvm can be
|
||||
changed to generate proper chained representation for
|
||||
multiple dimensional arrays.
|
||||
|
||||
2.2.4 BTF_KIND_STRUCT
|
||||
~~~~~~~~~~~~~~~~~~~~~
|
||||
2.2.5 BTF_KIND_UNION
|
||||
~~~~~~~~~~~~~~~~~~~~
|
||||
|
||||
``struct btf_type`` encoding requirement:
|
||||
* ``name_off``: 0 or offset to a valid C identifier
|
||||
* ``info.kind_flag``: 0 or 1
|
||||
* ``info.kind``: BTF_KIND_STRUCT or BTF_KIND_UNION
|
||||
* ``info.vlen``: the number of struct/union members
|
||||
* ``info.size``: the size of the struct/union in bytes
|
||||
|
||||
``btf_type`` is followed by ``info.vlen`` number of ``struct btf_member``.::
|
||||
|
||||
struct btf_member {
|
||||
__u32 name_off;
|
||||
__u32 type;
|
||||
__u32 offset;
|
||||
};
|
||||
|
||||
``struct btf_member`` encoding:
|
||||
* ``name_off``: offset to a valid C identifier
|
||||
* ``type``: the member type
|
||||
* ``offset``: <see below>
|
||||
|
||||
If the type info ``kind_flag`` is not set, the offset contains
|
||||
only bit offset of the member. Note that the base type of the
|
||||
bitfield can only be int or enum type. If the bitfield size
|
||||
is 32, the base type can be either int or enum type.
|
||||
If the bitfield size is not 32, the base type must be int,
|
||||
and int type ``BTF_INT_BITS()`` encodes the bitfield size.
|
||||
|
||||
If the ``kind_flag`` is set, the ``btf_member.offset``
|
||||
contains both member bitfield size and bit offset. The
|
||||
bitfield size and bit offset are calculated as below.::
|
||||
|
||||
#define BTF_MEMBER_BITFIELD_SIZE(val) ((val) >> 24)
|
||||
#define BTF_MEMBER_BIT_OFFSET(val) ((val) & 0xffffff)
|
||||
|
||||
In this case, if the base type is an int type, it must
|
||||
be a regular int type:
|
||||
|
||||
* ``BTF_INT_OFFSET()`` must be 0.
|
||||
* ``BTF_INT_BITS()`` must be equal to ``{1,2,4,8,16} * 8``.
|
||||
|
||||
The following kernel patch introduced ``kind_flag`` and
|
||||
explained why both modes exist:
|
||||
|
||||
https://github.com/torvalds/linux/commit/9d5f9f701b1891466fb3dbb1806ad97716f95cc3#diff-fa650a64fdd3968396883d2fe8215ff3
|
||||
|
||||
2.2.6 BTF_KIND_ENUM
|
||||
~~~~~~~~~~~~~~~~~~~
|
||||
|
||||
``struct btf_type`` encoding requirement:
|
||||
* ``name_off``: 0 or offset to a valid C identifier
|
||||
* ``info.kind_flag``: 0
|
||||
* ``info.kind``: BTF_KIND_ENUM
|
||||
* ``info.vlen``: number of enum values
|
||||
* ``size``: 4
|
||||
|
||||
``btf_type`` is followed by ``info.vlen`` number of ``struct btf_enum``.::
|
||||
|
||||
struct btf_enum {
|
||||
__u32 name_off;
|
||||
__s32 val;
|
||||
};
|
||||
|
||||
The ``btf_enum`` encoding:
|
||||
* ``name_off``: offset to a valid C identifier
|
||||
* ``val``: any value
|
||||
|
||||
2.2.7 BTF_KIND_FWD
|
||||
~~~~~~~~~~~~~~~~~~
|
||||
|
||||
``struct btf_type`` encoding requirement:
|
||||
* ``name_off``: offset to a valid C identifier
|
||||
* ``info.kind_flag``: 0 for struct, 1 for union
|
||||
* ``info.kind``: BTF_KIND_FWD
|
||||
* ``info.vlen``: 0
|
||||
* ``type``: 0
|
||||
|
||||
No additional type data follow ``btf_type``.
|
||||
|
||||
2.2.8 BTF_KIND_TYPEDEF
|
||||
~~~~~~~~~~~~~~~~~~~~~~
|
||||
|
||||
``struct btf_type`` encoding requirement:
|
||||
* ``name_off``: offset to a valid C identifier
|
||||
* ``info.kind_flag``: 0
|
||||
* ``info.kind``: BTF_KIND_TYPEDEF
|
||||
* ``info.vlen``: 0
|
||||
* ``type``: the type which can be referred by name at ``name_off``
|
||||
|
||||
No additional type data follow ``btf_type``.
|
||||
|
||||
2.2.9 BTF_KIND_VOLATILE
|
||||
~~~~~~~~~~~~~~~~~~~~~~~
|
||||
|
||||
``struct btf_type`` encoding requirement:
|
||||
* ``name_off``: 0
|
||||
* ``info.kind_flag``: 0
|
||||
* ``info.kind``: BTF_KIND_VOLATILE
|
||||
* ``info.vlen``: 0
|
||||
* ``type``: the type with ``volatile`` qualifier
|
||||
|
||||
No additional type data follow ``btf_type``.
|
||||
|
||||
2.2.10 BTF_KIND_CONST
|
||||
~~~~~~~~~~~~~~~~~~~~~
|
||||
|
||||
``struct btf_type`` encoding requirement:
|
||||
* ``name_off``: 0
|
||||
* ``info.kind_flag``: 0
|
||||
* ``info.kind``: BTF_KIND_CONST
|
||||
* ``info.vlen``: 0
|
||||
* ``type``: the type with ``const`` qualifier
|
||||
|
||||
No additional type data follow ``btf_type``.
|
||||
|
||||
2.2.11 BTF_KIND_RESTRICT
|
||||
~~~~~~~~~~~~~~~~~~~~~~~~
|
||||
|
||||
``struct btf_type`` encoding requirement:
|
||||
* ``name_off``: 0
|
||||
* ``info.kind_flag``: 0
|
||||
* ``info.kind``: BTF_KIND_RESTRICT
|
||||
* ``info.vlen``: 0
|
||||
* ``type``: the type with ``restrict`` qualifier
|
||||
|
||||
No additional type data follow ``btf_type``.
|
||||
|
||||
2.2.12 BTF_KIND_FUNC
|
||||
~~~~~~~~~~~~~~~~~~~~
|
||||
|
||||
``struct btf_type`` encoding requirement:
|
||||
* ``name_off``: offset to a valid C identifier
|
||||
* ``info.kind_flag``: 0
|
||||
* ``info.kind``: BTF_KIND_FUNC
|
||||
* ``info.vlen``: 0
|
||||
* ``type``: a BTF_KIND_FUNC_PROTO type
|
||||
|
||||
No additional type data follow ``btf_type``.
|
||||
|
||||
A BTF_KIND_FUNC defines, not a type, but a subprogram (function) whose
|
||||
signature is defined by ``type``. The subprogram is thus an instance of
|
||||
that type. The BTF_KIND_FUNC may in turn be referenced by a func_info in
|
||||
the :ref:`BTF_Ext_Section` (ELF) or in the arguments to
|
||||
:ref:`BPF_Prog_Load` (ABI).
|
||||
|
||||
2.2.13 BTF_KIND_FUNC_PROTO
|
||||
~~~~~~~~~~~~~~~~~~~~~~~~~~
|
||||
|
||||
``struct btf_type`` encoding requirement:
|
||||
* ``name_off``: 0
|
||||
* ``info.kind_flag``: 0
|
||||
* ``info.kind``: BTF_KIND_FUNC_PROTO
|
||||
* ``info.vlen``: # of parameters
|
||||
* ``type``: the return type
|
||||
|
||||
``btf_type`` is followed by ``info.vlen`` number of ``struct btf_param``.::
|
||||
|
||||
struct btf_param {
|
||||
__u32 name_off;
|
||||
__u32 type;
|
||||
};
|
||||
|
||||
If a BTF_KIND_FUNC_PROTO type is referred by a BTF_KIND_FUNC type,
|
||||
then ``btf_param.name_off`` must point to a valid C identifier
|
||||
except for the possible last argument representing the variable
|
||||
argument. The btf_param.type refers to parameter type.
|
||||
|
||||
If the function has variable arguments, the last parameter
|
||||
is encoded with ``name_off = 0`` and ``type = 0``.
|
||||
|
||||
3. BTF Kernel API
|
||||
*****************
|
||||
|
||||
The following bpf syscall command involves BTF:
|
||||
* BPF_BTF_LOAD: load a blob of BTF data into kernel
|
||||
* BPF_MAP_CREATE: map creation with btf key and value type info.
|
||||
* BPF_PROG_LOAD: prog load with btf function and line info.
|
||||
* BPF_BTF_GET_FD_BY_ID: get a btf fd
|
||||
* BPF_OBJ_GET_INFO_BY_FD: btf, func_info, line_info
|
||||
and other btf related info are returned.
|
||||
|
||||
The workflow typically looks like:
|
||||
::
|
||||
|
||||
Application:
|
||||
BPF_BTF_LOAD
|
||||
|
|
||||
v
|
||||
BPF_MAP_CREATE and BPF_PROG_LOAD
|
||||
|
|
||||
V
|
||||
......
|
||||
|
||||
Introspection tool:
|
||||
......
|
||||
BPF_{PROG,MAP}_GET_NEXT_ID (get prog/map id's)
|
||||
|
|
||||
V
|
||||
BPF_{PROG,MAP}_GET_FD_BY_ID (get a prog/map fd)
|
||||
|
|
||||
V
|
||||
BPF_OBJ_GET_INFO_BY_FD (get bpf_prog_info/bpf_map_info with btf_id)
|
||||
| |
|
||||
V |
|
||||
BPF_BTF_GET_FD_BY_ID (get btf_fd) |
|
||||
| |
|
||||
V |
|
||||
BPF_OBJ_GET_INFO_BY_FD (get btf) |
|
||||
| |
|
||||
V V
|
||||
pretty print types, dump func signatures and line info, etc.
|
||||
|
||||
|
||||
3.1 BPF_BTF_LOAD
|
||||
================
|
||||
|
||||
Load a blob of BTF data into kernel. A blob of data
|
||||
described in :ref:`BTF_Type_String`
|
||||
can be directly loaded into the kernel.
|
||||
A ``btf_fd`` returns to userspace.
|
||||
|
||||
3.2 BPF_MAP_CREATE
|
||||
==================
|
||||
|
||||
A map can be created with ``btf_fd`` and specified key/value type id.::
|
||||
|
||||
__u32 btf_fd; /* fd pointing to a BTF type data */
|
||||
__u32 btf_key_type_id; /* BTF type_id of the key */
|
||||
__u32 btf_value_type_id; /* BTF type_id of the value */
|
||||
|
||||
In libbpf, the map can be defined with extra annotation like below:
|
||||
::
|
||||
|
||||
struct bpf_map_def SEC("maps") btf_map = {
|
||||
.type = BPF_MAP_TYPE_ARRAY,
|
||||
.key_size = sizeof(int),
|
||||
.value_size = sizeof(struct ipv_counts),
|
||||
.max_entries = 4,
|
||||
};
|
||||
BPF_ANNOTATE_KV_PAIR(btf_map, int, struct ipv_counts);
|
||||
|
||||
Here, the parameters for macro BPF_ANNOTATE_KV_PAIR are map name,
|
||||
key and value types for the map.
|
||||
During ELF parsing, libbpf is able to extract key/value type_id's
|
||||
and assigned them to BPF_MAP_CREATE attributes automatically.
|
||||
|
||||
.. _BPF_Prog_Load:
|
||||
|
||||
3.3 BPF_PROG_LOAD
|
||||
=================
|
||||
|
||||
During prog_load, func_info and line_info can be passed to kernel with
|
||||
proper values for the following attributes:
|
||||
::
|
||||
|
||||
__u32 insn_cnt;
|
||||
__aligned_u64 insns;
|
||||
......
|
||||
__u32 prog_btf_fd; /* fd pointing to BTF type data */
|
||||
__u32 func_info_rec_size; /* userspace bpf_func_info size */
|
||||
__aligned_u64 func_info; /* func info */
|
||||
__u32 func_info_cnt; /* number of bpf_func_info records */
|
||||
__u32 line_info_rec_size; /* userspace bpf_line_info size */
|
||||
__aligned_u64 line_info; /* line info */
|
||||
__u32 line_info_cnt; /* number of bpf_line_info records */
|
||||
|
||||
The func_info and line_info are an array of below, respectively.::
|
||||
|
||||
struct bpf_func_info {
|
||||
__u32 insn_off; /* [0, insn_cnt - 1] */
|
||||
__u32 type_id; /* pointing to a BTF_KIND_FUNC type */
|
||||
};
|
||||
struct bpf_line_info {
|
||||
__u32 insn_off; /* [0, insn_cnt - 1] */
|
||||
__u32 file_name_off; /* offset to string table for the filename */
|
||||
__u32 line_off; /* offset to string table for the source line */
|
||||
__u32 line_col; /* line number and column number */
|
||||
};
|
||||
|
||||
func_info_rec_size is the size of each func_info record, and line_info_rec_size
|
||||
is the size of each line_info record. Passing the record size to kernel make
|
||||
it possible to extend the record itself in the future.
|
||||
|
||||
Below are requirements for func_info:
|
||||
* func_info[0].insn_off must be 0.
|
||||
* the func_info insn_off is in strictly increasing order and matches
|
||||
bpf func boundaries.
|
||||
|
||||
Below are requirements for line_info:
|
||||
* the first insn in each func must points to a line_info record.
|
||||
* the line_info insn_off is in strictly increasing order.
|
||||
|
||||
For line_info, the line number and column number are defined as below:
|
||||
::
|
||||
|
||||
#define BPF_LINE_INFO_LINE_NUM(line_col) ((line_col) >> 10)
|
||||
#define BPF_LINE_INFO_LINE_COL(line_col) ((line_col) & 0x3ff)
|
||||
|
||||
3.4 BPF_{PROG,MAP}_GET_NEXT_ID
|
||||
|
||||
In kernel, every loaded program, map or btf has a unique id.
|
||||
The id won't change during the life time of the program, map or btf.
|
||||
|
||||
The bpf syscall command BPF_{PROG,MAP}_GET_NEXT_ID
|
||||
returns all id's, one for each command, to user space, for bpf
|
||||
program or maps,
|
||||
so the inspection tool can inspect all programs and maps.
|
||||
|
||||
3.5 BPF_{PROG,MAP}_GET_FD_BY_ID
|
||||
|
||||
The introspection tool cannot use id to get details about program or maps.
|
||||
A file descriptor needs to be obtained first for reference counting purpose.
|
||||
|
||||
3.6 BPF_OBJ_GET_INFO_BY_FD
|
||||
==========================
|
||||
|
||||
Once a program/map fd is acquired, the introspection tool can
|
||||
get the detailed information from kernel about this fd,
|
||||
some of which is btf related. For example,
|
||||
``bpf_map_info`` returns ``btf_id``, key/value type id.
|
||||
``bpf_prog_info`` returns ``btf_id``, func_info and line info
|
||||
for translated bpf byte codes, and jited_line_info.
|
||||
|
||||
3.7 BPF_BTF_GET_FD_BY_ID
|
||||
========================
|
||||
|
||||
With ``btf_id`` obtained in ``bpf_map_info`` and ``bpf_prog_info``,
|
||||
bpf syscall command BPF_BTF_GET_FD_BY_ID can retrieve a btf fd.
|
||||
Then, with command BPF_OBJ_GET_INFO_BY_FD, the btf blob, originally
|
||||
loaded into the kernel with BPF_BTF_LOAD, can be retrieved.
|
||||
|
||||
With the btf blob, ``bpf_map_info`` and ``bpf_prog_info``, the introspection
|
||||
tool has full btf knowledge and is able to pretty print map key/values,
|
||||
dump func signatures, dump line info along with byte/jit codes.
|
||||
|
||||
4. ELF File Format Interface
|
||||
****************************
|
||||
|
||||
4.1 .BTF section
|
||||
================
|
||||
|
||||
The .BTF section contains type and string data. The format of this section
|
||||
is same as the one describe in :ref:`BTF_Type_String`.
|
||||
|
||||
.. _BTF_Ext_Section:
|
||||
|
||||
4.2 .BTF.ext section
|
||||
====================
|
||||
|
||||
The .BTF.ext section encodes func_info and line_info which
|
||||
needs loader manipulation before loading into the kernel.
|
||||
|
||||
The specification for .BTF.ext section is defined at
|
||||
``tools/lib/bpf/btf.h`` and ``tools/lib/bpf/btf.c``.
|
||||
|
||||
The current header of .BTF.ext section::
|
||||
|
||||
struct btf_ext_header {
|
||||
__u16 magic;
|
||||
__u8 version;
|
||||
__u8 flags;
|
||||
__u32 hdr_len;
|
||||
|
||||
/* All offsets are in bytes relative to the end of this header */
|
||||
__u32 func_info_off;
|
||||
__u32 func_info_len;
|
||||
__u32 line_info_off;
|
||||
__u32 line_info_len;
|
||||
};
|
||||
|
||||
It is very similar to .BTF section. Instead of type/string section,
|
||||
it contains func_info and line_info section. See :ref:`BPF_Prog_Load`
|
||||
for details about func_info and line_info record format.
|
||||
|
||||
The func_info is organized as below.::
|
||||
|
||||
func_info_rec_size
|
||||
btf_ext_info_sec for section #1 /* func_info for section #1 */
|
||||
btf_ext_info_sec for section #2 /* func_info for section #2 */
|
||||
...
|
||||
|
||||
``func_info_rec_size`` specifies the size of ``bpf_func_info`` structure
|
||||
when .BTF.ext is generated. btf_ext_info_sec, defined below, is
|
||||
the func_info for each specific ELF section.::
|
||||
|
||||
struct btf_ext_info_sec {
|
||||
__u32 sec_name_off; /* offset to section name */
|
||||
__u32 num_info;
|
||||
/* Followed by num_info * record_size number of bytes */
|
||||
__u8 data[0];
|
||||
};
|
||||
|
||||
Here, num_info must be greater than 0.
|
||||
|
||||
The line_info is organized as below.::
|
||||
|
||||
line_info_rec_size
|
||||
btf_ext_info_sec for section #1 /* line_info for section #1 */
|
||||
btf_ext_info_sec for section #2 /* line_info for section #2 */
|
||||
...
|
||||
|
||||
``line_info_rec_size`` specifies the size of ``bpf_line_info`` structure
|
||||
when .BTF.ext is generated.
|
||||
|
||||
The interpretation of ``bpf_func_info->insn_off`` and
|
||||
``bpf_line_info->insn_off`` is different between kernel API and ELF API.
|
||||
For kernel API, the ``insn_off`` is the instruction offset in the unit
|
||||
of ``struct bpf_insn``. For ELF API, the ``insn_off`` is the byte offset
|
||||
from the beginning of section (``btf_ext_info_sec->sec_name_off``).
|
||||
|
||||
5. Using BTF
|
||||
************
|
||||
|
||||
5.1 bpftool map pretty print
|
||||
============================
|
||||
|
||||
With BTF, the map key/value can be printed based on fields rather than
|
||||
simply raw bytes. This is especially
|
||||
valuable for large structure or if you data structure
|
||||
has bitfields. For example, for the following map,::
|
||||
|
||||
enum A { A1, A2, A3, A4, A5 };
|
||||
typedef enum A ___A;
|
||||
struct tmp_t {
|
||||
char a1:4;
|
||||
int a2:4;
|
||||
int :4;
|
||||
__u32 a3:4;
|
||||
int b;
|
||||
___A b1:4;
|
||||
enum A b2:4;
|
||||
};
|
||||
struct bpf_map_def SEC("maps") tmpmap = {
|
||||
.type = BPF_MAP_TYPE_ARRAY,
|
||||
.key_size = sizeof(__u32),
|
||||
.value_size = sizeof(struct tmp_t),
|
||||
.max_entries = 1,
|
||||
};
|
||||
BPF_ANNOTATE_KV_PAIR(tmpmap, int, struct tmp_t);
|
||||
|
||||
bpftool is able to pretty print like below:
|
||||
::
|
||||
|
||||
[{
|
||||
"key": 0,
|
||||
"value": {
|
||||
"a1": 0x2,
|
||||
"a2": 0x4,
|
||||
"a3": 0x6,
|
||||
"b": 7,
|
||||
"b1": 0x8,
|
||||
"b2": 0xa
|
||||
}
|
||||
}
|
||||
]
|
||||
|
||||
5.2 bpftool prog dump
|
||||
=====================
|
||||
|
||||
The following is an example to show func_info and line_info
|
||||
can help prog dump with better kernel symbol name, function prototype
|
||||
and line information.::
|
||||
|
||||
$ bpftool prog dump jited pinned /sys/fs/bpf/test_btf_haskv
|
||||
[...]
|
||||
int test_long_fname_2(struct dummy_tracepoint_args * arg):
|
||||
bpf_prog_44a040bf25481309_test_long_fname_2:
|
||||
; static int test_long_fname_2(struct dummy_tracepoint_args *arg)
|
||||
0: push %rbp
|
||||
1: mov %rsp,%rbp
|
||||
4: sub $0x30,%rsp
|
||||
b: sub $0x28,%rbp
|
||||
f: mov %rbx,0x0(%rbp)
|
||||
13: mov %r13,0x8(%rbp)
|
||||
17: mov %r14,0x10(%rbp)
|
||||
1b: mov %r15,0x18(%rbp)
|
||||
1f: xor %eax,%eax
|
||||
21: mov %rax,0x20(%rbp)
|
||||
25: xor %esi,%esi
|
||||
; int key = 0;
|
||||
27: mov %esi,-0x4(%rbp)
|
||||
; if (!arg->sock)
|
||||
2a: mov 0x8(%rdi),%rdi
|
||||
; if (!arg->sock)
|
||||
2e: cmp $0x0,%rdi
|
||||
32: je 0x0000000000000070
|
||||
34: mov %rbp,%rsi
|
||||
; counts = bpf_map_lookup_elem(&btf_map, &key);
|
||||
[...]
|
||||
|
||||
5.3 verifier log
|
||||
================
|
||||
|
||||
The following is an example how line_info can help verifier failure debug.::
|
||||
|
||||
/* The code at tools/testing/selftests/bpf/test_xdp_noinline.c
|
||||
* is modified as below.
|
||||
*/
|
||||
data = (void *)(long)xdp->data;
|
||||
data_end = (void *)(long)xdp->data_end;
|
||||
/*
|
||||
if (data + 4 > data_end)
|
||||
return XDP_DROP;
|
||||
*/
|
||||
*(u32 *)data = dst->dst;
|
||||
|
||||
$ bpftool prog load ./test_xdp_noinline.o /sys/fs/bpf/test_xdp_noinline type xdp
|
||||
; data = (void *)(long)xdp->data;
|
||||
224: (79) r2 = *(u64 *)(r10 -112)
|
||||
225: (61) r2 = *(u32 *)(r2 +0)
|
||||
; *(u32 *)data = dst->dst;
|
||||
226: (63) *(u32 *)(r2 +0) = r1
|
||||
invalid access to packet, off=0 size=4, R2(id=0,off=0,r=0)
|
||||
R2 offset is outside of the packet
|
||||
|
||||
6. BTF Generation
|
||||
*****************
|
||||
|
||||
You need latest pahole
|
||||
|
||||
https://git.kernel.org/pub/scm/devel/pahole/pahole.git/
|
||||
|
||||
or llvm (8.0 or later). The pahole acts as a dwarf2btf converter. It doesn't support .BTF.ext
|
||||
and btf BTF_KIND_FUNC type yet. For example,::
|
||||
|
||||
-bash-4.4$ cat t.c
|
||||
struct t {
|
||||
int a:2;
|
||||
int b:3;
|
||||
int c:2;
|
||||
} g;
|
||||
-bash-4.4$ gcc -c -O2 -g t.c
|
||||
-bash-4.4$ pahole -JV t.o
|
||||
File t.o:
|
||||
[1] STRUCT t kind_flag=1 size=4 vlen=3
|
||||
a type_id=2 bitfield_size=2 bits_offset=0
|
||||
b type_id=2 bitfield_size=3 bits_offset=2
|
||||
c type_id=2 bitfield_size=2 bits_offset=5
|
||||
[2] INT int size=4 bit_offset=0 nr_bits=32 encoding=SIGNED
|
||||
|
||||
The llvm is able to generate .BTF and .BTF.ext directly with -g for bpf target only.
|
||||
The assembly code (-S) is able to show the BTF encoding in assembly format.::
|
||||
|
||||
-bash-4.4$ cat t2.c
|
||||
typedef int __int32;
|
||||
struct t2 {
|
||||
int a2;
|
||||
int (*f2)(char q1, __int32 q2, ...);
|
||||
int (*f3)();
|
||||
} g2;
|
||||
int main() { return 0; }
|
||||
int test() { return 0; }
|
||||
-bash-4.4$ clang -c -g -O2 -target bpf t2.c
|
||||
-bash-4.4$ readelf -S t2.o
|
||||
......
|
||||
[ 8] .BTF PROGBITS 0000000000000000 00000247
|
||||
000000000000016e 0000000000000000 0 0 1
|
||||
[ 9] .BTF.ext PROGBITS 0000000000000000 000003b5
|
||||
0000000000000060 0000000000000000 0 0 1
|
||||
[10] .rel.BTF.ext REL 0000000000000000 000007e0
|
||||
0000000000000040 0000000000000010 16 9 8
|
||||
......
|
||||
-bash-4.4$ clang -S -g -O2 -target bpf t2.c
|
||||
-bash-4.4$ cat t2.s
|
||||
......
|
||||
.section .BTF,"",@progbits
|
||||
.short 60319 # 0xeb9f
|
||||
.byte 1
|
||||
.byte 0
|
||||
.long 24
|
||||
.long 0
|
||||
.long 220
|
||||
.long 220
|
||||
.long 122
|
||||
.long 0 # BTF_KIND_FUNC_PROTO(id = 1)
|
||||
.long 218103808 # 0xd000000
|
||||
.long 2
|
||||
.long 83 # BTF_KIND_INT(id = 2)
|
||||
.long 16777216 # 0x1000000
|
||||
.long 4
|
||||
.long 16777248 # 0x1000020
|
||||
......
|
||||
.byte 0 # string offset=0
|
||||
.ascii ".text" # string offset=1
|
||||
.byte 0
|
||||
.ascii "/home/yhs/tmp-pahole/t2.c" # string offset=7
|
||||
.byte 0
|
||||
.ascii "int main() { return 0; }" # string offset=33
|
||||
.byte 0
|
||||
.ascii "int test() { return 0; }" # string offset=58
|
||||
.byte 0
|
||||
.ascii "int" # string offset=83
|
||||
......
|
||||
.section .BTF.ext,"",@progbits
|
||||
.short 60319 # 0xeb9f
|
||||
.byte 1
|
||||
.byte 0
|
||||
.long 24
|
||||
.long 0
|
||||
.long 28
|
||||
.long 28
|
||||
.long 44
|
||||
.long 8 # FuncInfo
|
||||
.long 1 # FuncInfo section string offset=1
|
||||
.long 2
|
||||
.long .Lfunc_begin0
|
||||
.long 3
|
||||
.long .Lfunc_begin1
|
||||
.long 5
|
||||
.long 16 # LineInfo
|
||||
.long 1 # LineInfo section string offset=1
|
||||
.long 2
|
||||
.long .Ltmp0
|
||||
.long 7
|
||||
.long 33
|
||||
.long 7182 # Line 7 Col 14
|
||||
.long .Ltmp3
|
||||
.long 7
|
||||
.long 58
|
||||
.long 8206 # Line 8 Col 14
|
||||
|
||||
7. Testing
|
||||
**********
|
||||
|
||||
Kernel bpf selftest `test_btf.c` provides extensive set of BTF related tests.
|
|
@ -15,6 +15,13 @@ that goes into great technical depth about the BPF Architecture.
|
|||
The primary info for the bpf syscall is available in the `man-pages`_
|
||||
for `bpf(2)`_.
|
||||
|
||||
BPF Type Format (BTF)
|
||||
=====================
|
||||
|
||||
.. toctree::
|
||||
:maxdepth: 1
|
||||
|
||||
btf
|
||||
|
||||
|
||||
Frequently asked questions (FAQ)
|
||||
|
|
|
@ -108,12 +108,13 @@ some, but not all of the other indices changing.
|
|||
|
||||
Sometimes you need to ensure that a subsequent call to :c:func:`xa_store`
|
||||
will not need to allocate memory. The :c:func:`xa_reserve` function
|
||||
will store a reserved entry at the indicated index. Users of the normal
|
||||
API will see this entry as containing ``NULL``. If you do not need to
|
||||
use the reserved entry, you can call :c:func:`xa_release` to remove the
|
||||
unused entry. If another user has stored to the entry in the meantime,
|
||||
:c:func:`xa_release` will do nothing; if instead you want the entry to
|
||||
become ``NULL``, you should use :c:func:`xa_erase`.
|
||||
will store a reserved entry at the indicated index. Users of the
|
||||
normal API will see this entry as containing ``NULL``. If you do
|
||||
not need to use the reserved entry, you can call :c:func:`xa_release`
|
||||
to remove the unused entry. If another user has stored to the entry
|
||||
in the meantime, :c:func:`xa_release` will do nothing; if instead you
|
||||
want the entry to become ``NULL``, you should use :c:func:`xa_erase`.
|
||||
Using :c:func:`xa_insert` on a reserved entry will fail.
|
||||
|
||||
If all entries in the array are ``NULL``, the :c:func:`xa_empty` function
|
||||
will return ``true``.
|
||||
|
@ -183,6 +184,8 @@ Takes xa_lock internally:
|
|||
* :c:func:`xa_store_bh`
|
||||
* :c:func:`xa_store_irq`
|
||||
* :c:func:`xa_insert`
|
||||
* :c:func:`xa_insert_bh`
|
||||
* :c:func:`xa_insert_irq`
|
||||
* :c:func:`xa_erase`
|
||||
* :c:func:`xa_erase_bh`
|
||||
* :c:func:`xa_erase_irq`
|
||||
|
|
|
@ -235,4 +235,4 @@ cpus {
|
|||
===========================================
|
||||
|
||||
[1] ARM Linux Kernel documentation - CPUs bindings
|
||||
Documentation/devicetree/bindings/arm/cpus.txt
|
||||
Documentation/devicetree/bindings/arm/cpus.yaml
|
||||
|
|
|
@ -684,7 +684,7 @@ cpus {
|
|||
===========================================
|
||||
|
||||
[1] ARM Linux Kernel documentation - CPUs bindings
|
||||
Documentation/devicetree/bindings/arm/cpus.txt
|
||||
Documentation/devicetree/bindings/arm/cpus.yaml
|
||||
|
||||
[2] ARM Linux Kernel documentation - PSCI bindings
|
||||
Documentation/devicetree/bindings/arm/psci.txt
|
||||
|
|
|
@ -4,7 +4,7 @@ SP810 System Controller
|
|||
Required properties:
|
||||
|
||||
- compatible: standard compatible string for a Primecell peripheral,
|
||||
see Documentation/devicetree/bindings/arm/primecell.txt
|
||||
see Documentation/devicetree/bindings/arm/primecell.yaml
|
||||
for more details
|
||||
should be: "arm,sp810", "arm,primecell"
|
||||
|
||||
|
|
|
@ -472,4 +472,4 @@ cpus {
|
|||
|
||||
===============================================================================
|
||||
[1] ARM Linux kernel documentation
|
||||
Documentation/devicetree/bindings/arm/cpus.txt
|
||||
Documentation/devicetree/bindings/arm/cpus.yaml
|
||||
|
|
|
@ -18,4 +18,4 @@ Required Properties:
|
|||
Each clock is assigned an identifier and client nodes use this identifier
|
||||
to specify the clock which they consume.
|
||||
|
||||
All these identifier could be found in <dt-bindings/clock/marvell-mmp2.h>.
|
||||
All these identifiers could be found in <dt-bindings/clock/marvell,mmp2.h>.
|
||||
|
|
|
@ -1,6 +1,6 @@
|
|||
* ARM PrimeCell Color LCD Controller PL110/PL111
|
||||
|
||||
See also Documentation/devicetree/bindings/arm/primecell.txt
|
||||
See also Documentation/devicetree/bindings/arm/primecell.yaml
|
||||
|
||||
Required properties:
|
||||
|
||||
|
|
|
@ -27,7 +27,6 @@ Example:
|
|||
reg = <0x04300000 0x20000>;
|
||||
reg-names = "kgsl_3d0_reg_memory";
|
||||
interrupts = <GIC_SPI 80 0>;
|
||||
interrupt-names = "kgsl_3d0_irq";
|
||||
clock-names =
|
||||
"core",
|
||||
"iface",
|
||||
|
|
|
@ -14,8 +14,6 @@ Required properties:
|
|||
|
||||
"marvell,armada-8k-gpio" should be used for the Armada 7K and 8K
|
||||
SoCs (either from AP or CP), see
|
||||
Documentation/devicetree/bindings/arm/marvell/cp110-system-controller0.txt
|
||||
and
|
||||
Documentation/devicetree/bindings/arm/marvell/ap806-system-controller.txt
|
||||
for specific details about the offset property.
|
||||
|
||||
|
|
|
@ -78,7 +78,7 @@ Sub-nodes:
|
|||
PPI affinity can be expressed as a single "ppi-partitions" node,
|
||||
containing a set of sub-nodes, each with the following property:
|
||||
- affinity: Should be a list of phandles to CPU nodes (as described in
|
||||
Documentation/devicetree/bindings/arm/cpus.txt).
|
||||
Documentation/devicetree/bindings/arm/cpus.yaml).
|
||||
|
||||
GICv3 has one or more Interrupt Translation Services (ITS) that are
|
||||
used to route Message Signalled Interrupts (MSI) to the CPUs.
|
||||
|
|
|
@ -3,12 +3,16 @@ Mediatek MT7530 Ethernet switch
|
|||
|
||||
Required properties:
|
||||
|
||||
- compatible: Must be compatible = "mediatek,mt7530";
|
||||
- compatible: may be compatible = "mediatek,mt7530"
|
||||
or compatible = "mediatek,mt7621"
|
||||
- #address-cells: Must be 1.
|
||||
- #size-cells: Must be 0.
|
||||
- mediatek,mcm: Boolean; if defined, indicates that either MT7530 is the part
|
||||
on multi-chip module belong to MT7623A has or the remotely standalone
|
||||
chip as the function MT7623N reference board provided for.
|
||||
|
||||
If compatible mediatek,mt7530 is set then the following properties are required
|
||||
|
||||
- core-supply: Phandle to the regulator node necessary for the core power.
|
||||
- io-supply: Phandle to the regulator node necessary for the I/O power.
|
||||
See Documentation/devicetree/bindings/regulator/mt6323-regulator.txt
|
||||
|
|
|
@ -0,0 +1,64 @@
|
|||
Qualcomm Ethernet ETHQOS device
|
||||
|
||||
This documents dwmmac based ethernet device which supports Gigabit
|
||||
ethernet for version v2.3.0 onwards.
|
||||
|
||||
This device has following properties:
|
||||
|
||||
Required properties:
|
||||
|
||||
- compatible: Should be qcom,qcs404-ethqos"
|
||||
|
||||
- reg: Address and length of the register set for the device
|
||||
|
||||
- reg-names: Should contain register names "stmmaceth", "rgmii"
|
||||
|
||||
- clocks: Should contain phandle to clocks
|
||||
|
||||
- clock-names: Should contain clock names "stmmaceth", "pclk",
|
||||
"ptp_ref", "rgmii"
|
||||
|
||||
- interrupts: Should contain phandle to interrupts
|
||||
|
||||
- interrupt-names: Should contain interrupt names "macirq", "eth_lpi"
|
||||
|
||||
Rest of the properties are defined in stmmac.txt file in same directory
|
||||
|
||||
|
||||
Example:
|
||||
|
||||
ethernet: ethernet@7a80000 {
|
||||
compatible = "qcom,qcs404-ethqos";
|
||||
reg = <0x07a80000 0x10000>,
|
||||
<0x07a96000 0x100>;
|
||||
reg-names = "stmmaceth", "rgmii";
|
||||
clock-names = "stmmaceth", "pclk", "ptp_ref", "rgmii";
|
||||
clocks = <&gcc GCC_ETH_AXI_CLK>,
|
||||
<&gcc GCC_ETH_SLAVE_AHB_CLK>,
|
||||
<&gcc GCC_ETH_PTP_CLK>,
|
||||
<&gcc GCC_ETH_RGMII_CLK>;
|
||||
interrupts = <GIC_SPI 56 IRQ_TYPE_LEVEL_HIGH>,
|
||||
<GIC_SPI 55 IRQ_TYPE_LEVEL_HIGH>;
|
||||
interrupt-names = "macirq", "eth_lpi";
|
||||
snps,reset-gpio = <&tlmm 60 GPIO_ACTIVE_LOW>;
|
||||
snps,reset-active-low;
|
||||
|
||||
snps,txpbl = <8>;
|
||||
snps,rxpbl = <2>;
|
||||
snps,aal;
|
||||
snps,tso;
|
||||
|
||||
phy-handle = <&phy1>;
|
||||
phy-mode = "rgmii";
|
||||
|
||||
mdio {
|
||||
#address-cells = <0x1>;
|
||||
#size-cells = <0x0>;
|
||||
compatible = "snps,dwmac-mdio";
|
||||
phy1: phy@4 {
|
||||
device_type = "ethernet-phy";
|
||||
reg = <0x4>;
|
||||
};
|
||||
};
|
||||
|
||||
};
|
|
@ -17,6 +17,8 @@ Clock Properties:
|
|||
- fsl,tmr-fiper1 Fixed interval period pulse generator.
|
||||
- fsl,tmr-fiper2 Fixed interval period pulse generator.
|
||||
- fsl,max-adj Maximum frequency adjustment in parts per billion.
|
||||
- fsl,extts-fifo The presence of this property indicates hardware
|
||||
support for the external trigger stamp FIFO.
|
||||
|
||||
These properties set the operational parameters for the PTP
|
||||
clock. You must choose these carefully for the clock to work right.
|
||||
|
|
|
@ -55,7 +55,7 @@ of these nodes are defined by the individual bindings for the specific function
|
|||
= EXAMPLE
|
||||
The following example represents the GLINK RPM node on a MSM8996 device, with
|
||||
the function for the "rpm_request" channel defined, which is used for
|
||||
regualtors and root clocks.
|
||||
regulators and root clocks.
|
||||
|
||||
apcs_glb: mailbox@9820000 {
|
||||
compatible = "qcom,msm8996-apcs-hmss-global";
|
||||
|
|
|
@ -41,12 +41,12 @@ processor ID) and a string identifier.
|
|||
- qcom,local-pid:
|
||||
Usage: required
|
||||
Value type: <u32>
|
||||
Definition: specifies the identfier of the local endpoint of this edge
|
||||
Definition: specifies the identifier of the local endpoint of this edge
|
||||
|
||||
- qcom,remote-pid:
|
||||
Usage: required
|
||||
Value type: <u32>
|
||||
Definition: specifies the identfier of the remote endpoint of this edge
|
||||
Definition: specifies the identifier of the remote endpoint of this edge
|
||||
|
||||
= SUBNODES
|
||||
Each SMP2P pair contain a set of inbound and outbound entries, these are
|
||||
|
|
|
@ -163,6 +163,14 @@ C. Boot options
|
|||
be preserved until there actually is some text is output to the console.
|
||||
This option causes fbcon to bind immediately to the fbdev device.
|
||||
|
||||
7. fbcon=logo-pos:<location>
|
||||
|
||||
The only possible 'location' is 'center' (without quotes), and when
|
||||
given, the bootup logo is moved from the default top-left corner
|
||||
location to the center of the framebuffer. If more than one logo is
|
||||
displayed due to multiple CPUs, the collected line of logos is moved
|
||||
as a whole.
|
||||
|
||||
C. Attaching, Detaching and Unloading
|
||||
|
||||
Before going on to how to attach, detach and unload the framebuffer console, an
|
||||
|
|
|
@ -1,86 +0,0 @@
|
|||
The health mechanism is targeted for Real Time Alerting, in order to know when
|
||||
something bad had happened to a PCI device
|
||||
- Provide alert debug information
|
||||
- Self healing
|
||||
- If problem needs vendor support, provide a way to gather all needed debugging
|
||||
information.
|
||||
|
||||
The main idea is to unify and centralize driver health reports in the
|
||||
generic devlink instance and allow the user to set different
|
||||
attributes of the health reporting and recovery procedures.
|
||||
|
||||
The devlink health reporter:
|
||||
Device driver creates a "health reporter" per each error/health type.
|
||||
Error/Health type can be a known/generic (eg pci error, fw error, rx/tx error)
|
||||
or unknown (driver specific).
|
||||
For each registered health reporter a driver can issue error/health reports
|
||||
asynchronously. All health reports handling is done by devlink.
|
||||
Device driver can provide specific callbacks for each "health reporter", e.g.
|
||||
- Recovery procedures
|
||||
- Diagnostics and object dump procedures
|
||||
- OOB initial parameters
|
||||
Different parts of the driver can register different types of health reporters
|
||||
with different handlers.
|
||||
|
||||
Once an error is reported, devlink health will do the following actions:
|
||||
* A log is being send to the kernel trace events buffer
|
||||
* Health status and statistics are being updated for the reporter instance
|
||||
* Object dump is being taken and saved at the reporter instance (as long as
|
||||
there is no other dump which is already stored)
|
||||
* Auto recovery attempt is being done. Depends on:
|
||||
- Auto-recovery configuration
|
||||
- Grace period vs. time passed since last recover
|
||||
|
||||
The user interface:
|
||||
User can access/change each reporter's parameters and driver specific callbacks
|
||||
via devlink, e.g per error type (per health reporter)
|
||||
- Configure reporter's generic parameters (like: disable/enable auto recovery)
|
||||
- Invoke recovery procedure
|
||||
- Run diagnostics
|
||||
- Object dump
|
||||
|
||||
The devlink health interface (via netlink):
|
||||
DEVLINK_CMD_HEALTH_REPORTER_GET
|
||||
Retrieves status and configuration info per DEV and reporter.
|
||||
DEVLINK_CMD_HEALTH_REPORTER_SET
|
||||
Allows reporter-related configuration setting.
|
||||
DEVLINK_CMD_HEALTH_REPORTER_RECOVER
|
||||
Triggers a reporter's recovery procedure.
|
||||
DEVLINK_CMD_HEALTH_REPORTER_DIAGNOSE
|
||||
Retrieves diagnostics data from a reporter on a device.
|
||||
DEVLINK_CMD_HEALTH_REPORTER_DUMP_GET
|
||||
Retrieves the last stored dump. Devlink health
|
||||
saves a single dump. If an dump is not already stored by the devlink
|
||||
for this reporter, devlink generates a new dump.
|
||||
dump output is defined by the reporter.
|
||||
DEVLINK_CMD_HEALTH_REPORTER_DUMP_CLEAR
|
||||
Clears the last saved dump file for the specified reporter.
|
||||
|
||||
|
||||
netlink
|
||||
+--------------------------+
|
||||
| |
|
||||
| + |
|
||||
| | |
|
||||
+--------------------------+
|
||||
|request for ops
|
||||
|(diagnose,
|
||||
mlx5_core devlink |recover,
|
||||
|dump)
|
||||
+--------+ +--------------------------+
|
||||
| | | reporter| |
|
||||
| | | +---------v----------+ |
|
||||
| | ops execution | | | |
|
||||
| <----------------------------------+ | |
|
||||
| | | | | |
|
||||
| | | + ^------------------+ |
|
||||
| | | | request for ops |
|
||||
| | | | (recover, dump) |
|
||||
| | | | |
|
||||
| | | +-+------------------+ |
|
||||
| | health report | | health handler | |
|
||||
| +-------------------------------> | |
|
||||
| | | +--------------------+ |
|
||||
| | health reporter create | |
|
||||
| +----------------------------> |
|
||||
+--------+ +--------------------------+
|
|
@ -0,0 +1,2 @@
|
|||
fw_load_policy [DEVICE, GENERIC]
|
||||
Configuration mode: driverinit
|
|
@ -865,7 +865,7 @@ Three LSB bits store instruction class which is one of:
|
|||
BPF_STX 0x03 BPF_STX 0x03
|
||||
BPF_ALU 0x04 BPF_ALU 0x04
|
||||
BPF_JMP 0x05 BPF_JMP 0x05
|
||||
BPF_RET 0x06 [ class 6 unused, for future if needed ]
|
||||
BPF_RET 0x06 BPF_JMP32 0x06
|
||||
BPF_MISC 0x07 BPF_ALU64 0x07
|
||||
|
||||
When BPF_CLASS(code) == BPF_ALU or BPF_JMP, 4th bit encodes source operand ...
|
||||
|
@ -902,9 +902,9 @@ If BPF_CLASS(code) == BPF_ALU or BPF_ALU64 [ in eBPF ], BPF_OP(code) is one of:
|
|||
BPF_ARSH 0xc0 /* eBPF only: sign extending shift right */
|
||||
BPF_END 0xd0 /* eBPF only: endianness conversion */
|
||||
|
||||
If BPF_CLASS(code) == BPF_JMP, BPF_OP(code) is one of:
|
||||
If BPF_CLASS(code) == BPF_JMP or BPF_JMP32 [ in eBPF ], BPF_OP(code) is one of:
|
||||
|
||||
BPF_JA 0x00
|
||||
BPF_JA 0x00 /* BPF_JMP only */
|
||||
BPF_JEQ 0x10
|
||||
BPF_JGT 0x20
|
||||
BPF_JGE 0x30
|
||||
|
@ -912,8 +912,8 @@ If BPF_CLASS(code) == BPF_JMP, BPF_OP(code) is one of:
|
|||
BPF_JNE 0x50 /* eBPF only: jump != */
|
||||
BPF_JSGT 0x60 /* eBPF only: signed '>' */
|
||||
BPF_JSGE 0x70 /* eBPF only: signed '>=' */
|
||||
BPF_CALL 0x80 /* eBPF only: function call */
|
||||
BPF_EXIT 0x90 /* eBPF only: function return */
|
||||
BPF_CALL 0x80 /* eBPF BPF_JMP only: function call */
|
||||
BPF_EXIT 0x90 /* eBPF BPF_JMP only: function return */
|
||||
BPF_JLT 0xa0 /* eBPF only: unsigned '<' */
|
||||
BPF_JLE 0xb0 /* eBPF only: unsigned '<=' */
|
||||
BPF_JSLT 0xc0 /* eBPF only: signed '<' */
|
||||
|
@ -936,8 +936,9 @@ Classic BPF wastes the whole BPF_RET class to represent a single 'ret'
|
|||
operation. Classic BPF_RET | BPF_K means copy imm32 into return register
|
||||
and perform function exit. eBPF is modeled to match CPU, so BPF_JMP | BPF_EXIT
|
||||
in eBPF means function exit only. The eBPF program needs to store return
|
||||
value into register R0 before doing a BPF_EXIT. Class 6 in eBPF is currently
|
||||
unused and reserved for future use.
|
||||
value into register R0 before doing a BPF_EXIT. Class 6 in eBPF is used as
|
||||
BPF_JMP32 to mean exactly the same operations as BPF_JMP, but with 32-bit wide
|
||||
operands for the comparisons instead.
|
||||
|
||||
For load and store instructions the 8-bit 'code' field is divided as:
|
||||
|
||||
|
|
|
@ -11,24 +11,25 @@ Contents:
|
|||
batman-adv
|
||||
can
|
||||
can_ucan_protocol
|
||||
dpaa2/index
|
||||
e100
|
||||
e1000
|
||||
e1000e
|
||||
fm10k
|
||||
igb
|
||||
igbvf
|
||||
ixgb
|
||||
ixgbe
|
||||
ixgbevf
|
||||
i40e
|
||||
iavf
|
||||
ice
|
||||
device_drivers/freescale/dpaa2/index
|
||||
device_drivers/intel/e100
|
||||
device_drivers/intel/e1000
|
||||
device_drivers/intel/e1000e
|
||||
device_drivers/intel/fm10k
|
||||
device_drivers/intel/igb
|
||||
device_drivers/intel/igbvf
|
||||
device_drivers/intel/ixgb
|
||||
device_drivers/intel/ixgbe
|
||||
device_drivers/intel/ixgbevf
|
||||
device_drivers/intel/i40e
|
||||
device_drivers/intel/iavf
|
||||
device_drivers/intel/ice
|
||||
kapi
|
||||
z8530book
|
||||
msg_zerocopy
|
||||
failover
|
||||
net_failover
|
||||
phy
|
||||
alias
|
||||
bridge
|
||||
snmp_counter
|
||||
|
|
|
@ -0,0 +1,447 @@
|
|||
=====================
|
||||
PHY Abstraction Layer
|
||||
=====================
|
||||
|
||||
Purpose
|
||||
=======
|
||||
|
||||
Most network devices consist of set of registers which provide an interface
|
||||
to a MAC layer, which communicates with the physical connection through a
|
||||
PHY. The PHY concerns itself with negotiating link parameters with the link
|
||||
partner on the other side of the network connection (typically, an ethernet
|
||||
cable), and provides a register interface to allow drivers to determine what
|
||||
settings were chosen, and to configure what settings are allowed.
|
||||
|
||||
While these devices are distinct from the network devices, and conform to a
|
||||
standard layout for the registers, it has been common practice to integrate
|
||||
the PHY management code with the network driver. This has resulted in large
|
||||
amounts of redundant code. Also, on embedded systems with multiple (and
|
||||
sometimes quite different) ethernet controllers connected to the same
|
||||
management bus, it is difficult to ensure safe use of the bus.
|
||||
|
||||
Since the PHYs are devices, and the management busses through which they are
|
||||
accessed are, in fact, busses, the PHY Abstraction Layer treats them as such.
|
||||
In doing so, it has these goals:
|
||||
|
||||
#. Increase code-reuse
|
||||
#. Increase overall code-maintainability
|
||||
#. Speed development time for new network drivers, and for new systems
|
||||
|
||||
Basically, this layer is meant to provide an interface to PHY devices which
|
||||
allows network driver writers to write as little code as possible, while
|
||||
still providing a full feature set.
|
||||
|
||||
The MDIO bus
|
||||
============
|
||||
|
||||
Most network devices are connected to a PHY by means of a management bus.
|
||||
Different devices use different busses (though some share common interfaces).
|
||||
In order to take advantage of the PAL, each bus interface needs to be
|
||||
registered as a distinct device.
|
||||
|
||||
#. read and write functions must be implemented. Their prototypes are::
|
||||
|
||||
int write(struct mii_bus *bus, int mii_id, int regnum, u16 value);
|
||||
int read(struct mii_bus *bus, int mii_id, int regnum);
|
||||
|
||||
mii_id is the address on the bus for the PHY, and regnum is the register
|
||||
number. These functions are guaranteed not to be called from interrupt
|
||||
time, so it is safe for them to block, waiting for an interrupt to signal
|
||||
the operation is complete
|
||||
|
||||
#. A reset function is optional. This is used to return the bus to an
|
||||
initialized state.
|
||||
|
||||
#. A probe function is needed. This function should set up anything the bus
|
||||
driver needs, setup the mii_bus structure, and register with the PAL using
|
||||
mdiobus_register. Similarly, there's a remove function to undo all of
|
||||
that (use mdiobus_unregister).
|
||||
|
||||
#. Like any driver, the device_driver structure must be configured, and init
|
||||
exit functions are used to register the driver.
|
||||
|
||||
#. The bus must also be declared somewhere as a device, and registered.
|
||||
|
||||
As an example for how one driver implemented an mdio bus driver, see
|
||||
drivers/net/ethernet/freescale/fsl_pq_mdio.c and an associated DTS file
|
||||
for one of the users. (e.g. "git grep fsl,.*-mdio arch/powerpc/boot/dts/")
|
||||
|
||||
(RG)MII/electrical interface considerations
|
||||
===========================================
|
||||
|
||||
The Reduced Gigabit Medium Independent Interface (RGMII) is a 12-pin
|
||||
electrical signal interface using a synchronous 125Mhz clock signal and several
|
||||
data lines. Due to this design decision, a 1.5ns to 2ns delay must be added
|
||||
between the clock line (RXC or TXC) and the data lines to let the PHY (clock
|
||||
sink) have enough setup and hold times to sample the data lines correctly. The
|
||||
PHY library offers different types of PHY_INTERFACE_MODE_RGMII* values to let
|
||||
the PHY driver and optionally the MAC driver, implement the required delay. The
|
||||
values of phy_interface_t must be understood from the perspective of the PHY
|
||||
device itself, leading to the following:
|
||||
|
||||
* PHY_INTERFACE_MODE_RGMII: the PHY is not responsible for inserting any
|
||||
internal delay by itself, it assumes that either the Ethernet MAC (if capable
|
||||
or the PCB traces) insert the correct 1.5-2ns delay
|
||||
|
||||
* PHY_INTERFACE_MODE_RGMII_TXID: the PHY should insert an internal delay
|
||||
for the transmit data lines (TXD[3:0]) processed by the PHY device
|
||||
|
||||
* PHY_INTERFACE_MODE_RGMII_RXID: the PHY should insert an internal delay
|
||||
for the receive data lines (RXD[3:0]) processed by the PHY device
|
||||
|
||||
* PHY_INTERFACE_MODE_RGMII_ID: the PHY should insert internal delays for
|
||||
both transmit AND receive data lines from/to the PHY device
|
||||
|
||||
Whenever possible, use the PHY side RGMII delay for these reasons:
|
||||
|
||||
* PHY devices may offer sub-nanosecond granularity in how they allow a
|
||||
receiver/transmitter side delay (e.g: 0.5, 1.0, 1.5ns) to be specified. Such
|
||||
precision may be required to account for differences in PCB trace lengths
|
||||
|
||||
* PHY devices are typically qualified for a large range of applications
|
||||
(industrial, medical, automotive...), and they provide a constant and
|
||||
reliable delay across temperature/pressure/voltage ranges
|
||||
|
||||
* PHY device drivers in PHYLIB being reusable by nature, being able to
|
||||
configure correctly a specified delay enables more designs with similar delay
|
||||
requirements to be operate correctly
|
||||
|
||||
For cases where the PHY is not capable of providing this delay, but the
|
||||
Ethernet MAC driver is capable of doing so, the correct phy_interface_t value
|
||||
should be PHY_INTERFACE_MODE_RGMII, and the Ethernet MAC driver should be
|
||||
configured correctly in order to provide the required transmit and/or receive
|
||||
side delay from the perspective of the PHY device. Conversely, if the Ethernet
|
||||
MAC driver looks at the phy_interface_t value, for any other mode but
|
||||
PHY_INTERFACE_MODE_RGMII, it should make sure that the MAC-level delays are
|
||||
disabled.
|
||||
|
||||
In case neither the Ethernet MAC, nor the PHY are capable of providing the
|
||||
required delays, as defined per the RGMII standard, several options may be
|
||||
available:
|
||||
|
||||
* Some SoCs may offer a pin pad/mux/controller capable of configuring a given
|
||||
set of pins'strength, delays, and voltage; and it may be a suitable
|
||||
option to insert the expected 2ns RGMII delay.
|
||||
|
||||
* Modifying the PCB design to include a fixed delay (e.g: using a specifically
|
||||
designed serpentine), which may not require software configuration at all.
|
||||
|
||||
Common problems with RGMII delay mismatch
|
||||
-----------------------------------------
|
||||
|
||||
When there is a RGMII delay mismatch between the Ethernet MAC and the PHY, this
|
||||
will most likely result in the clock and data line signals to be unstable when
|
||||
the PHY or MAC take a snapshot of these signals to translate them into logical
|
||||
1 or 0 states and reconstruct the data being transmitted/received. Typical
|
||||
symptoms include:
|
||||
|
||||
* Transmission/reception partially works, and there is frequent or occasional
|
||||
packet loss observed
|
||||
|
||||
* Ethernet MAC may report some or all packets ingressing with a FCS/CRC error,
|
||||
or just discard them all
|
||||
|
||||
* Switching to lower speeds such as 10/100Mbits/sec makes the problem go away
|
||||
(since there is enough setup/hold time in that case)
|
||||
|
||||
Connecting to a PHY
|
||||
===================
|
||||
|
||||
Sometime during startup, the network driver needs to establish a connection
|
||||
between the PHY device, and the network device. At this time, the PHY's bus
|
||||
and drivers need to all have been loaded, so it is ready for the connection.
|
||||
At this point, there are several ways to connect to the PHY:
|
||||
|
||||
#. The PAL handles everything, and only calls the network driver when
|
||||
the link state changes, so it can react.
|
||||
|
||||
#. The PAL handles everything except interrupts (usually because the
|
||||
controller has the interrupt registers).
|
||||
|
||||
#. The PAL handles everything, but checks in with the driver every second,
|
||||
allowing the network driver to react first to any changes before the PAL
|
||||
does.
|
||||
|
||||
#. The PAL serves only as a library of functions, with the network device
|
||||
manually calling functions to update status, and configure the PHY
|
||||
|
||||
|
||||
Letting the PHY Abstraction Layer do Everything
|
||||
===============================================
|
||||
|
||||
If you choose option 1 (The hope is that every driver can, but to still be
|
||||
useful to drivers that can't), connecting to the PHY is simple:
|
||||
|
||||
First, you need a function to react to changes in the link state. This
|
||||
function follows this protocol::
|
||||
|
||||
static void adjust_link(struct net_device *dev);
|
||||
|
||||
Next, you need to know the device name of the PHY connected to this device.
|
||||
The name will look something like, "0:00", where the first number is the
|
||||
bus id, and the second is the PHY's address on that bus. Typically,
|
||||
the bus is responsible for making its ID unique.
|
||||
|
||||
Now, to connect, just call this function::
|
||||
|
||||
phydev = phy_connect(dev, phy_name, &adjust_link, interface);
|
||||
|
||||
*phydev* is a pointer to the phy_device structure which represents the PHY.
|
||||
If phy_connect is successful, it will return the pointer. dev, here, is the
|
||||
pointer to your net_device. Once done, this function will have started the
|
||||
PHY's software state machine, and registered for the PHY's interrupt, if it
|
||||
has one. The phydev structure will be populated with information about the
|
||||
current state, though the PHY will not yet be truly operational at this
|
||||
point.
|
||||
|
||||
PHY-specific flags should be set in phydev->dev_flags prior to the call
|
||||
to phy_connect() such that the underlying PHY driver can check for flags
|
||||
and perform specific operations based on them.
|
||||
This is useful if the system has put hardware restrictions on
|
||||
the PHY/controller, of which the PHY needs to be aware.
|
||||
|
||||
*interface* is a u32 which specifies the connection type used
|
||||
between the controller and the PHY. Examples are GMII, MII,
|
||||
RGMII, and SGMII. For a full list, see include/linux/phy.h
|
||||
|
||||
Now just make sure that phydev->supported and phydev->advertising have any
|
||||
values pruned from them which don't make sense for your controller (a 10/100
|
||||
controller may be connected to a gigabit capable PHY, so you would need to
|
||||
mask off SUPPORTED_1000baseT*). See include/linux/ethtool.h for definitions
|
||||
for these bitfields. Note that you should not SET any bits, except the
|
||||
SUPPORTED_Pause and SUPPORTED_AsymPause bits (see below), or the PHY may get
|
||||
put into an unsupported state.
|
||||
|
||||
Lastly, once the controller is ready to handle network traffic, you call
|
||||
phy_start(phydev). This tells the PAL that you are ready, and configures the
|
||||
PHY to connect to the network. If the MAC interrupt of your network driver
|
||||
also handles PHY status changes, just set phydev->irq to PHY_IGNORE_INTERRUPT
|
||||
before you call phy_start and use phy_mac_interrupt() from the network
|
||||
driver. If you don't want to use interrupts, set phydev->irq to PHY_POLL.
|
||||
phy_start() enables the PHY interrupts (if applicable) and starts the
|
||||
phylib state machine.
|
||||
|
||||
When you want to disconnect from the network (even if just briefly), you call
|
||||
phy_stop(phydev). This function also stops the phylib state machine and
|
||||
disables PHY interrupts.
|
||||
|
||||
Pause frames / flow control
|
||||
===========================
|
||||
|
||||
The PHY does not participate directly in flow control/pause frames except by
|
||||
making sure that the SUPPORTED_Pause and SUPPORTED_AsymPause bits are set in
|
||||
MII_ADVERTISE to indicate towards the link partner that the Ethernet MAC
|
||||
controller supports such a thing. Since flow control/pause frames generation
|
||||
involves the Ethernet MAC driver, it is recommended that this driver takes care
|
||||
of properly indicating advertisement and support for such features by setting
|
||||
the SUPPORTED_Pause and SUPPORTED_AsymPause bits accordingly. This can be done
|
||||
either before or after phy_connect() and/or as a result of implementing the
|
||||
ethtool::set_pauseparam feature.
|
||||
|
||||
|
||||
Keeping Close Tabs on the PAL
|
||||
=============================
|
||||
|
||||
It is possible that the PAL's built-in state machine needs a little help to
|
||||
keep your network device and the PHY properly in sync. If so, you can
|
||||
register a helper function when connecting to the PHY, which will be called
|
||||
every second before the state machine reacts to any changes. To do this, you
|
||||
need to manually call phy_attach() and phy_prepare_link(), and then call
|
||||
phy_start_machine() with the second argument set to point to your special
|
||||
handler.
|
||||
|
||||
Currently there are no examples of how to use this functionality, and testing
|
||||
on it has been limited because the author does not have any drivers which use
|
||||
it (they all use option 1). So Caveat Emptor.
|
||||
|
||||
Doing it all yourself
|
||||
=====================
|
||||
|
||||
There's a remote chance that the PAL's built-in state machine cannot track
|
||||
the complex interactions between the PHY and your network device. If this is
|
||||
so, you can simply call phy_attach(), and not call phy_start_machine or
|
||||
phy_prepare_link(). This will mean that phydev->state is entirely yours to
|
||||
handle (phy_start and phy_stop toggle between some of the states, so you
|
||||
might need to avoid them).
|
||||
|
||||
An effort has been made to make sure that useful functionality can be
|
||||
accessed without the state-machine running, and most of these functions are
|
||||
descended from functions which did not interact with a complex state-machine.
|
||||
However, again, no effort has been made so far to test running without the
|
||||
state machine, so tryer beware.
|
||||
|
||||
Here is a brief rundown of the functions::
|
||||
|
||||
int phy_read(struct phy_device *phydev, u16 regnum);
|
||||
int phy_write(struct phy_device *phydev, u16 regnum, u16 val);
|
||||
|
||||
Simple read/write primitives. They invoke the bus's read/write function
|
||||
pointers.
|
||||
::
|
||||
|
||||
void phy_print_status(struct phy_device *phydev);
|
||||
|
||||
A convenience function to print out the PHY status neatly.
|
||||
::
|
||||
|
||||
void phy_request_interrupt(struct phy_device *phydev);
|
||||
|
||||
Requests the IRQ for the PHY interrupts.
|
||||
::
|
||||
|
||||
struct phy_device * phy_attach(struct net_device *dev, const char *phy_id,
|
||||
phy_interface_t interface);
|
||||
|
||||
Attaches a network device to a particular PHY, binding the PHY to a generic
|
||||
driver if none was found during bus initialization.
|
||||
::
|
||||
|
||||
int phy_start_aneg(struct phy_device *phydev);
|
||||
|
||||
Using variables inside the phydev structure, either configures advertising
|
||||
and resets autonegotiation, or disables autonegotiation, and configures
|
||||
forced settings.
|
||||
::
|
||||
|
||||
static inline int phy_read_status(struct phy_device *phydev);
|
||||
|
||||
Fills the phydev structure with up-to-date information about the current
|
||||
settings in the PHY.
|
||||
::
|
||||
|
||||
int phy_ethtool_sset(struct phy_device *phydev, struct ethtool_cmd *cmd);
|
||||
|
||||
Ethtool convenience functions.
|
||||
::
|
||||
|
||||
int phy_mii_ioctl(struct phy_device *phydev,
|
||||
struct mii_ioctl_data *mii_data, int cmd);
|
||||
|
||||
The MII ioctl. Note that this function will completely screw up the state
|
||||
machine if you write registers like BMCR, BMSR, ADVERTISE, etc. Best to
|
||||
use this only to write registers which are not standard, and don't set off
|
||||
a renegotiation.
|
||||
|
||||
PHY Device Drivers
|
||||
==================
|
||||
|
||||
With the PHY Abstraction Layer, adding support for new PHYs is
|
||||
quite easy. In some cases, no work is required at all! However,
|
||||
many PHYs require a little hand-holding to get up-and-running.
|
||||
|
||||
Generic PHY driver
|
||||
------------------
|
||||
|
||||
If the desired PHY doesn't have any errata, quirks, or special
|
||||
features you want to support, then it may be best to not add
|
||||
support, and let the PHY Abstraction Layer's Generic PHY Driver
|
||||
do all of the work.
|
||||
|
||||
Writing a PHY driver
|
||||
--------------------
|
||||
|
||||
If you do need to write a PHY driver, the first thing to do is
|
||||
make sure it can be matched with an appropriate PHY device.
|
||||
This is done during bus initialization by reading the device's
|
||||
UID (stored in registers 2 and 3), then comparing it to each
|
||||
driver's phy_id field by ANDing it with each driver's
|
||||
phy_id_mask field. Also, it needs a name. Here's an example::
|
||||
|
||||
static struct phy_driver dm9161_driver = {
|
||||
.phy_id = 0x0181b880,
|
||||
.name = "Davicom DM9161E",
|
||||
.phy_id_mask = 0x0ffffff0,
|
||||
...
|
||||
}
|
||||
|
||||
Next, you need to specify what features (speed, duplex, autoneg,
|
||||
etc) your PHY device and driver support. Most PHYs support
|
||||
PHY_BASIC_FEATURES, but you can look in include/mii.h for other
|
||||
features.
|
||||
|
||||
Each driver consists of a number of function pointers, documented
|
||||
in include/linux/phy.h under the phy_driver structure.
|
||||
|
||||
Of these, only config_aneg and read_status are required to be
|
||||
assigned by the driver code. The rest are optional. Also, it is
|
||||
preferred to use the generic phy driver's versions of these two
|
||||
functions if at all possible: genphy_read_status and
|
||||
genphy_config_aneg. If this is not possible, it is likely that
|
||||
you only need to perform some actions before and after invoking
|
||||
these functions, and so your functions will wrap the generic
|
||||
ones.
|
||||
|
||||
Feel free to look at the Marvell, Cicada, and Davicom drivers in
|
||||
drivers/net/phy/ for examples (the lxt and qsemi drivers have
|
||||
not been tested as of this writing).
|
||||
|
||||
The PHY's MMD register accesses are handled by the PAL framework
|
||||
by default, but can be overridden by a specific PHY driver if
|
||||
required. This could be the case if a PHY was released for
|
||||
manufacturing before the MMD PHY register definitions were
|
||||
standardized by the IEEE. Most modern PHYs will be able to use
|
||||
the generic PAL framework for accessing the PHY's MMD registers.
|
||||
An example of such usage is for Energy Efficient Ethernet support,
|
||||
implemented in the PAL. This support uses the PAL to access MMD
|
||||
registers for EEE query and configuration if the PHY supports
|
||||
the IEEE standard access mechanisms, or can use the PHY's specific
|
||||
access interfaces if overridden by the specific PHY driver. See
|
||||
the Micrel driver in drivers/net/phy/ for an example of how this
|
||||
can be implemented.
|
||||
|
||||
Board Fixups
|
||||
============
|
||||
|
||||
Sometimes the specific interaction between the platform and the PHY requires
|
||||
special handling. For instance, to change where the PHY's clock input is,
|
||||
or to add a delay to account for latency issues in the data path. In order
|
||||
to support such contingencies, the PHY Layer allows platform code to register
|
||||
fixups to be run when the PHY is brought up (or subsequently reset).
|
||||
|
||||
When the PHY Layer brings up a PHY it checks to see if there are any fixups
|
||||
registered for it, matching based on UID (contained in the PHY device's phy_id
|
||||
field) and the bus identifier (contained in phydev->dev.bus_id). Both must
|
||||
match, however two constants, PHY_ANY_ID and PHY_ANY_UID, are provided as
|
||||
wildcards for the bus ID and UID, respectively.
|
||||
|
||||
When a match is found, the PHY layer will invoke the run function associated
|
||||
with the fixup. This function is passed a pointer to the phy_device of
|
||||
interest. It should therefore only operate on that PHY.
|
||||
|
||||
The platform code can either register the fixup using phy_register_fixup()::
|
||||
|
||||
int phy_register_fixup(const char *phy_id,
|
||||
u32 phy_uid, u32 phy_uid_mask,
|
||||
int (*run)(struct phy_device *));
|
||||
|
||||
Or using one of the two stubs, phy_register_fixup_for_uid() and
|
||||
phy_register_fixup_for_id()::
|
||||
|
||||
int phy_register_fixup_for_uid(u32 phy_uid, u32 phy_uid_mask,
|
||||
int (*run)(struct phy_device *));
|
||||
int phy_register_fixup_for_id(const char *phy_id,
|
||||
int (*run)(struct phy_device *));
|
||||
|
||||
The stubs set one of the two matching criteria, and set the other one to
|
||||
match anything.
|
||||
|
||||
When phy_register_fixup() or \*_for_uid()/\*_for_id() is called at module,
|
||||
unregister fixup and free allocate memory are required.
|
||||
|
||||
Call one of following function before unloading module::
|
||||
|
||||
int phy_unregister_fixup(const char *phy_id, u32 phy_uid, u32 phy_uid_mask);
|
||||
int phy_unregister_fixup_for_uid(u32 phy_uid, u32 phy_uid_mask);
|
||||
int phy_register_fixup_for_id(const char *phy_id);
|
||||
|
||||
Standards
|
||||
=========
|
||||
|
||||
IEEE Standard 802.3: CSMA/CD Access Method and Physical Layer Specifications, Section Two:
|
||||
http://standards.ieee.org/getieee802/download/802.3-2008_section2.pdf
|
||||
|
||||
RGMII v1.3:
|
||||
http://web.archive.org/web/20160303212629/http://www.hp.com/rnd/pdfs/RGMIIv1_3.pdf
|
||||
|
||||
RGMII v2.0:
|
||||
http://web.archive.org/web/20160303171328/http://www.hp.com/rnd/pdfs/RGMIIv2_0_final_hp.pdf
|
|
@ -1,427 +0,0 @@
|
|||
|
||||
-------
|
||||
PHY Abstraction Layer
|
||||
(Updated 2008-04-08)
|
||||
|
||||
Purpose
|
||||
|
||||
Most network devices consist of set of registers which provide an interface
|
||||
to a MAC layer, which communicates with the physical connection through a
|
||||
PHY. The PHY concerns itself with negotiating link parameters with the link
|
||||
partner on the other side of the network connection (typically, an ethernet
|
||||
cable), and provides a register interface to allow drivers to determine what
|
||||
settings were chosen, and to configure what settings are allowed.
|
||||
|
||||
While these devices are distinct from the network devices, and conform to a
|
||||
standard layout for the registers, it has been common practice to integrate
|
||||
the PHY management code with the network driver. This has resulted in large
|
||||
amounts of redundant code. Also, on embedded systems with multiple (and
|
||||
sometimes quite different) ethernet controllers connected to the same
|
||||
management bus, it is difficult to ensure safe use of the bus.
|
||||
|
||||
Since the PHYs are devices, and the management busses through which they are
|
||||
accessed are, in fact, busses, the PHY Abstraction Layer treats them as such.
|
||||
In doing so, it has these goals:
|
||||
|
||||
1) Increase code-reuse
|
||||
2) Increase overall code-maintainability
|
||||
3) Speed development time for new network drivers, and for new systems
|
||||
|
||||
Basically, this layer is meant to provide an interface to PHY devices which
|
||||
allows network driver writers to write as little code as possible, while
|
||||
still providing a full feature set.
|
||||
|
||||
The MDIO bus
|
||||
|
||||
Most network devices are connected to a PHY by means of a management bus.
|
||||
Different devices use different busses (though some share common interfaces).
|
||||
In order to take advantage of the PAL, each bus interface needs to be
|
||||
registered as a distinct device.
|
||||
|
||||
1) read and write functions must be implemented. Their prototypes are:
|
||||
|
||||
int write(struct mii_bus *bus, int mii_id, int regnum, u16 value);
|
||||
int read(struct mii_bus *bus, int mii_id, int regnum);
|
||||
|
||||
mii_id is the address on the bus for the PHY, and regnum is the register
|
||||
number. These functions are guaranteed not to be called from interrupt
|
||||
time, so it is safe for them to block, waiting for an interrupt to signal
|
||||
the operation is complete
|
||||
|
||||
2) A reset function is optional. This is used to return the bus to an
|
||||
initialized state.
|
||||
|
||||
3) A probe function is needed. This function should set up anything the bus
|
||||
driver needs, setup the mii_bus structure, and register with the PAL using
|
||||
mdiobus_register. Similarly, there's a remove function to undo all of
|
||||
that (use mdiobus_unregister).
|
||||
|
||||
4) Like any driver, the device_driver structure must be configured, and init
|
||||
exit functions are used to register the driver.
|
||||
|
||||
5) The bus must also be declared somewhere as a device, and registered.
|
||||
|
||||
As an example for how one driver implemented an mdio bus driver, see
|
||||
drivers/net/ethernet/freescale/fsl_pq_mdio.c and an associated DTS file
|
||||
for one of the users. (e.g. "git grep fsl,.*-mdio arch/powerpc/boot/dts/")
|
||||
|
||||
(RG)MII/electrical interface considerations
|
||||
|
||||
The Reduced Gigabit Medium Independent Interface (RGMII) is a 12-pin
|
||||
electrical signal interface using a synchronous 125Mhz clock signal and several
|
||||
data lines. Due to this design decision, a 1.5ns to 2ns delay must be added
|
||||
between the clock line (RXC or TXC) and the data lines to let the PHY (clock
|
||||
sink) have enough setup and hold times to sample the data lines correctly. The
|
||||
PHY library offers different types of PHY_INTERFACE_MODE_RGMII* values to let
|
||||
the PHY driver and optionally the MAC driver, implement the required delay. The
|
||||
values of phy_interface_t must be understood from the perspective of the PHY
|
||||
device itself, leading to the following:
|
||||
|
||||
* PHY_INTERFACE_MODE_RGMII: the PHY is not responsible for inserting any
|
||||
internal delay by itself, it assumes that either the Ethernet MAC (if capable
|
||||
or the PCB traces) insert the correct 1.5-2ns delay
|
||||
|
||||
* PHY_INTERFACE_MODE_RGMII_TXID: the PHY should insert an internal delay
|
||||
for the transmit data lines (TXD[3:0]) processed by the PHY device
|
||||
|
||||
* PHY_INTERFACE_MODE_RGMII_RXID: the PHY should insert an internal delay
|
||||
for the receive data lines (RXD[3:0]) processed by the PHY device
|
||||
|
||||
* PHY_INTERFACE_MODE_RGMII_ID: the PHY should insert internal delays for
|
||||
both transmit AND receive data lines from/to the PHY device
|
||||
|
||||
Whenever possible, use the PHY side RGMII delay for these reasons:
|
||||
|
||||
* PHY devices may offer sub-nanosecond granularity in how they allow a
|
||||
receiver/transmitter side delay (e.g: 0.5, 1.0, 1.5ns) to be specified. Such
|
||||
precision may be required to account for differences in PCB trace lengths
|
||||
|
||||
* PHY devices are typically qualified for a large range of applications
|
||||
(industrial, medical, automotive...), and they provide a constant and
|
||||
reliable delay across temperature/pressure/voltage ranges
|
||||
|
||||
* PHY device drivers in PHYLIB being reusable by nature, being able to
|
||||
configure correctly a specified delay enables more designs with similar delay
|
||||
requirements to be operate correctly
|
||||
|
||||
For cases where the PHY is not capable of providing this delay, but the
|
||||
Ethernet MAC driver is capable of doing so, the correct phy_interface_t value
|
||||
should be PHY_INTERFACE_MODE_RGMII, and the Ethernet MAC driver should be
|
||||
configured correctly in order to provide the required transmit and/or receive
|
||||
side delay from the perspective of the PHY device. Conversely, if the Ethernet
|
||||
MAC driver looks at the phy_interface_t value, for any other mode but
|
||||
PHY_INTERFACE_MODE_RGMII, it should make sure that the MAC-level delays are
|
||||
disabled.
|
||||
|
||||
In case neither the Ethernet MAC, nor the PHY are capable of providing the
|
||||
required delays, as defined per the RGMII standard, several options may be
|
||||
available:
|
||||
|
||||
* Some SoCs may offer a pin pad/mux/controller capable of configuring a given
|
||||
set of pins'strength, delays, and voltage; and it may be a suitable
|
||||
option to insert the expected 2ns RGMII delay.
|
||||
|
||||
* Modifying the PCB design to include a fixed delay (e.g: using a specifically
|
||||
designed serpentine), which may not require software configuration at all.
|
||||
|
||||
Common problems with RGMII delay mismatch
|
||||
|
||||
When there is a RGMII delay mismatch between the Ethernet MAC and the PHY, this
|
||||
will most likely result in the clock and data line signals to be unstable when
|
||||
the PHY or MAC take a snapshot of these signals to translate them into logical
|
||||
1 or 0 states and reconstruct the data being transmitted/received. Typical
|
||||
symptoms include:
|
||||
|
||||
* Transmission/reception partially works, and there is frequent or occasional
|
||||
packet loss observed
|
||||
|
||||
* Ethernet MAC may report some or all packets ingressing with a FCS/CRC error,
|
||||
or just discard them all
|
||||
|
||||
* Switching to lower speeds such as 10/100Mbits/sec makes the problem go away
|
||||
(since there is enough setup/hold time in that case)
|
||||
|
||||
|
||||
Connecting to a PHY
|
||||
|
||||
Sometime during startup, the network driver needs to establish a connection
|
||||
between the PHY device, and the network device. At this time, the PHY's bus
|
||||
and drivers need to all have been loaded, so it is ready for the connection.
|
||||
At this point, there are several ways to connect to the PHY:
|
||||
|
||||
1) The PAL handles everything, and only calls the network driver when
|
||||
the link state changes, so it can react.
|
||||
|
||||
2) The PAL handles everything except interrupts (usually because the
|
||||
controller has the interrupt registers).
|
||||
|
||||
3) The PAL handles everything, but checks in with the driver every second,
|
||||
allowing the network driver to react first to any changes before the PAL
|
||||
does.
|
||||
|
||||
4) The PAL serves only as a library of functions, with the network device
|
||||
manually calling functions to update status, and configure the PHY
|
||||
|
||||
|
||||
Letting the PHY Abstraction Layer do Everything
|
||||
|
||||
If you choose option 1 (The hope is that every driver can, but to still be
|
||||
useful to drivers that can't), connecting to the PHY is simple:
|
||||
|
||||
First, you need a function to react to changes in the link state. This
|
||||
function follows this protocol:
|
||||
|
||||
static void adjust_link(struct net_device *dev);
|
||||
|
||||
Next, you need to know the device name of the PHY connected to this device.
|
||||
The name will look something like, "0:00", where the first number is the
|
||||
bus id, and the second is the PHY's address on that bus. Typically,
|
||||
the bus is responsible for making its ID unique.
|
||||
|
||||
Now, to connect, just call this function:
|
||||
|
||||
phydev = phy_connect(dev, phy_name, &adjust_link, interface);
|
||||
|
||||
phydev is a pointer to the phy_device structure which represents the PHY. If
|
||||
phy_connect is successful, it will return the pointer. dev, here, is the
|
||||
pointer to your net_device. Once done, this function will have started the
|
||||
PHY's software state machine, and registered for the PHY's interrupt, if it
|
||||
has one. The phydev structure will be populated with information about the
|
||||
current state, though the PHY will not yet be truly operational at this
|
||||
point.
|
||||
|
||||
PHY-specific flags should be set in phydev->dev_flags prior to the call
|
||||
to phy_connect() such that the underlying PHY driver can check for flags
|
||||
and perform specific operations based on them.
|
||||
This is useful if the system has put hardware restrictions on
|
||||
the PHY/controller, of which the PHY needs to be aware.
|
||||
|
||||
interface is a u32 which specifies the connection type used
|
||||
between the controller and the PHY. Examples are GMII, MII,
|
||||
RGMII, and SGMII. For a full list, see include/linux/phy.h
|
||||
|
||||
Now just make sure that phydev->supported and phydev->advertising have any
|
||||
values pruned from them which don't make sense for your controller (a 10/100
|
||||
controller may be connected to a gigabit capable PHY, so you would need to
|
||||
mask off SUPPORTED_1000baseT*). See include/linux/ethtool.h for definitions
|
||||
for these bitfields. Note that you should not SET any bits, except the
|
||||
SUPPORTED_Pause and SUPPORTED_AsymPause bits (see below), or the PHY may get
|
||||
put into an unsupported state.
|
||||
|
||||
Lastly, once the controller is ready to handle network traffic, you call
|
||||
phy_start(phydev). This tells the PAL that you are ready, and configures the
|
||||
PHY to connect to the network. If you want to handle your own interrupts,
|
||||
just set phydev->irq to PHY_IGNORE_INTERRUPT before you call phy_start.
|
||||
Similarly, if you don't want to use interrupts, set phydev->irq to PHY_POLL.
|
||||
|
||||
When you want to disconnect from the network (even if just briefly), you call
|
||||
phy_stop(phydev).
|
||||
|
||||
Pause frames / flow control
|
||||
|
||||
The PHY does not participate directly in flow control/pause frames except by
|
||||
making sure that the SUPPORTED_Pause and SUPPORTED_AsymPause bits are set in
|
||||
MII_ADVERTISE to indicate towards the link partner that the Ethernet MAC
|
||||
controller supports such a thing. Since flow control/pause frames generation
|
||||
involves the Ethernet MAC driver, it is recommended that this driver takes care
|
||||
of properly indicating advertisement and support for such features by setting
|
||||
the SUPPORTED_Pause and SUPPORTED_AsymPause bits accordingly. This can be done
|
||||
either before or after phy_connect() and/or as a result of implementing the
|
||||
ethtool::set_pauseparam feature.
|
||||
|
||||
|
||||
Keeping Close Tabs on the PAL
|
||||
|
||||
It is possible that the PAL's built-in state machine needs a little help to
|
||||
keep your network device and the PHY properly in sync. If so, you can
|
||||
register a helper function when connecting to the PHY, which will be called
|
||||
every second before the state machine reacts to any changes. To do this, you
|
||||
need to manually call phy_attach() and phy_prepare_link(), and then call
|
||||
phy_start_machine() with the second argument set to point to your special
|
||||
handler.
|
||||
|
||||
Currently there are no examples of how to use this functionality, and testing
|
||||
on it has been limited because the author does not have any drivers which use
|
||||
it (they all use option 1). So Caveat Emptor.
|
||||
|
||||
Doing it all yourself
|
||||
|
||||
There's a remote chance that the PAL's built-in state machine cannot track
|
||||
the complex interactions between the PHY and your network device. If this is
|
||||
so, you can simply call phy_attach(), and not call phy_start_machine or
|
||||
phy_prepare_link(). This will mean that phydev->state is entirely yours to
|
||||
handle (phy_start and phy_stop toggle between some of the states, so you
|
||||
might need to avoid them).
|
||||
|
||||
An effort has been made to make sure that useful functionality can be
|
||||
accessed without the state-machine running, and most of these functions are
|
||||
descended from functions which did not interact with a complex state-machine.
|
||||
However, again, no effort has been made so far to test running without the
|
||||
state machine, so tryer beware.
|
||||
|
||||
Here is a brief rundown of the functions:
|
||||
|
||||
int phy_read(struct phy_device *phydev, u16 regnum);
|
||||
int phy_write(struct phy_device *phydev, u16 regnum, u16 val);
|
||||
|
||||
Simple read/write primitives. They invoke the bus's read/write function
|
||||
pointers.
|
||||
|
||||
void phy_print_status(struct phy_device *phydev);
|
||||
|
||||
A convenience function to print out the PHY status neatly.
|
||||
|
||||
int phy_start_interrupts(struct phy_device *phydev);
|
||||
int phy_stop_interrupts(struct phy_device *phydev);
|
||||
|
||||
Requests the IRQ for the PHY interrupts, then enables them for
|
||||
start, or disables then frees them for stop.
|
||||
|
||||
struct phy_device * phy_attach(struct net_device *dev, const char *phy_id,
|
||||
phy_interface_t interface);
|
||||
|
||||
Attaches a network device to a particular PHY, binding the PHY to a generic
|
||||
driver if none was found during bus initialization.
|
||||
|
||||
int phy_start_aneg(struct phy_device *phydev);
|
||||
|
||||
Using variables inside the phydev structure, either configures advertising
|
||||
and resets autonegotiation, or disables autonegotiation, and configures
|
||||
forced settings.
|
||||
|
||||
static inline int phy_read_status(struct phy_device *phydev);
|
||||
|
||||
Fills the phydev structure with up-to-date information about the current
|
||||
settings in the PHY.
|
||||
|
||||
int phy_ethtool_sset(struct phy_device *phydev, struct ethtool_cmd *cmd);
|
||||
|
||||
Ethtool convenience functions.
|
||||
|
||||
int phy_mii_ioctl(struct phy_device *phydev,
|
||||
struct mii_ioctl_data *mii_data, int cmd);
|
||||
|
||||
The MII ioctl. Note that this function will completely screw up the state
|
||||
machine if you write registers like BMCR, BMSR, ADVERTISE, etc. Best to
|
||||
use this only to write registers which are not standard, and don't set off
|
||||
a renegotiation.
|
||||
|
||||
|
||||
PHY Device Drivers
|
||||
|
||||
With the PHY Abstraction Layer, adding support for new PHYs is
|
||||
quite easy. In some cases, no work is required at all! However,
|
||||
many PHYs require a little hand-holding to get up-and-running.
|
||||
|
||||
Generic PHY driver
|
||||
|
||||
If the desired PHY doesn't have any errata, quirks, or special
|
||||
features you want to support, then it may be best to not add
|
||||
support, and let the PHY Abstraction Layer's Generic PHY Driver
|
||||
do all of the work.
|
||||
|
||||
Writing a PHY driver
|
||||
|
||||
If you do need to write a PHY driver, the first thing to do is
|
||||
make sure it can be matched with an appropriate PHY device.
|
||||
This is done during bus initialization by reading the device's
|
||||
UID (stored in registers 2 and 3), then comparing it to each
|
||||
driver's phy_id field by ANDing it with each driver's
|
||||
phy_id_mask field. Also, it needs a name. Here's an example:
|
||||
|
||||
static struct phy_driver dm9161_driver = {
|
||||
.phy_id = 0x0181b880,
|
||||
.name = "Davicom DM9161E",
|
||||
.phy_id_mask = 0x0ffffff0,
|
||||
...
|
||||
}
|
||||
|
||||
Next, you need to specify what features (speed, duplex, autoneg,
|
||||
etc) your PHY device and driver support. Most PHYs support
|
||||
PHY_BASIC_FEATURES, but you can look in include/mii.h for other
|
||||
features.
|
||||
|
||||
Each driver consists of a number of function pointers, documented
|
||||
in include/linux/phy.h under the phy_driver structure.
|
||||
|
||||
Of these, only config_aneg and read_status are required to be
|
||||
assigned by the driver code. The rest are optional. Also, it is
|
||||
preferred to use the generic phy driver's versions of these two
|
||||
functions if at all possible: genphy_read_status and
|
||||
genphy_config_aneg. If this is not possible, it is likely that
|
||||
you only need to perform some actions before and after invoking
|
||||
these functions, and so your functions will wrap the generic
|
||||
ones.
|
||||
|
||||
Feel free to look at the Marvell, Cicada, and Davicom drivers in
|
||||
drivers/net/phy/ for examples (the lxt and qsemi drivers have
|
||||
not been tested as of this writing).
|
||||
|
||||
The PHY's MMD register accesses are handled by the PAL framework
|
||||
by default, but can be overridden by a specific PHY driver if
|
||||
required. This could be the case if a PHY was released for
|
||||
manufacturing before the MMD PHY register definitions were
|
||||
standardized by the IEEE. Most modern PHYs will be able to use
|
||||
the generic PAL framework for accessing the PHY's MMD registers.
|
||||
An example of such usage is for Energy Efficient Ethernet support,
|
||||
implemented in the PAL. This support uses the PAL to access MMD
|
||||
registers for EEE query and configuration if the PHY supports
|
||||
the IEEE standard access mechanisms, or can use the PHY's specific
|
||||
access interfaces if overridden by the specific PHY driver. See
|
||||
the Micrel driver in drivers/net/phy/ for an example of how this
|
||||
can be implemented.
|
||||
|
||||
Board Fixups
|
||||
|
||||
Sometimes the specific interaction between the platform and the PHY requires
|
||||
special handling. For instance, to change where the PHY's clock input is,
|
||||
or to add a delay to account for latency issues in the data path. In order
|
||||
to support such contingencies, the PHY Layer allows platform code to register
|
||||
fixups to be run when the PHY is brought up (or subsequently reset).
|
||||
|
||||
When the PHY Layer brings up a PHY it checks to see if there are any fixups
|
||||
registered for it, matching based on UID (contained in the PHY device's phy_id
|
||||
field) and the bus identifier (contained in phydev->dev.bus_id). Both must
|
||||
match, however two constants, PHY_ANY_ID and PHY_ANY_UID, are provided as
|
||||
wildcards for the bus ID and UID, respectively.
|
||||
|
||||
When a match is found, the PHY layer will invoke the run function associated
|
||||
with the fixup. This function is passed a pointer to the phy_device of
|
||||
interest. It should therefore only operate on that PHY.
|
||||
|
||||
The platform code can either register the fixup using phy_register_fixup():
|
||||
|
||||
int phy_register_fixup(const char *phy_id,
|
||||
u32 phy_uid, u32 phy_uid_mask,
|
||||
int (*run)(struct phy_device *));
|
||||
|
||||
Or using one of the two stubs, phy_register_fixup_for_uid() and
|
||||
phy_register_fixup_for_id():
|
||||
|
||||
int phy_register_fixup_for_uid(u32 phy_uid, u32 phy_uid_mask,
|
||||
int (*run)(struct phy_device *));
|
||||
int phy_register_fixup_for_id(const char *phy_id,
|
||||
int (*run)(struct phy_device *));
|
||||
|
||||
The stubs set one of the two matching criteria, and set the other one to
|
||||
match anything.
|
||||
|
||||
When phy_register_fixup() or *_for_uid()/*_for_id() is called at module,
|
||||
unregister fixup and free allocate memory are required.
|
||||
|
||||
Call one of following function before unloading module.
|
||||
|
||||
int phy_unregister_fixup(const char *phy_id, u32 phy_uid, u32 phy_uid_mask);
|
||||
int phy_unregister_fixup_for_uid(u32 phy_uid, u32 phy_uid_mask);
|
||||
int phy_register_fixup_for_id(const char *phy_id);
|
||||
|
||||
Standards
|
||||
|
||||
IEEE Standard 802.3: CSMA/CD Access Method and Physical Layer Specifications, Section Two:
|
||||
http://standards.ieee.org/getieee802/download/802.3-2008_section2.pdf
|
||||
|
||||
RGMII v1.3:
|
||||
http://web.archive.org/web/20160303212629/http://www.hp.com/rnd/pdfs/RGMIIv1_3.pdf
|
||||
|
||||
RGMII v2.0:
|
||||
http://web.archive.org/web/20160303171328/http://www.hp.com/rnd/pdfs/RGMIIv2_0_final_hp.pdf
|
|
@ -1000,51 +1000,6 @@ The kernel interface functions are as follows:
|
|||
size should be set when the call is begun. tx_total_len may not be less
|
||||
than zero.
|
||||
|
||||
(*) Check to see the completion state of a call so that the caller can assess
|
||||
whether it needs to be retried.
|
||||
|
||||
enum rxrpc_call_completion {
|
||||
RXRPC_CALL_SUCCEEDED,
|
||||
RXRPC_CALL_REMOTELY_ABORTED,
|
||||
RXRPC_CALL_LOCALLY_ABORTED,
|
||||
RXRPC_CALL_LOCAL_ERROR,
|
||||
RXRPC_CALL_NETWORK_ERROR,
|
||||
};
|
||||
|
||||
int rxrpc_kernel_check_call(struct socket *sock, struct rxrpc_call *call,
|
||||
enum rxrpc_call_completion *_compl,
|
||||
u32 *_abort_code);
|
||||
|
||||
On return, -EINPROGRESS will be returned if the call is still ongoing; if
|
||||
it is finished, *_compl will be set to indicate the manner of completion,
|
||||
*_abort_code will be set to any abort code that occurred. 0 will be
|
||||
returned on a successful completion, -ECONNABORTED will be returned if the
|
||||
client failed due to a remote abort and anything else will return an
|
||||
appropriate error code.
|
||||
|
||||
The caller should look at this information to decide if it's worth
|
||||
retrying the call.
|
||||
|
||||
(*) Retry a client call.
|
||||
|
||||
int rxrpc_kernel_retry_call(struct socket *sock,
|
||||
struct rxrpc_call *call,
|
||||
struct sockaddr_rxrpc *srx,
|
||||
struct key *key);
|
||||
|
||||
This attempts to partially reinitialise a call and submit it again while
|
||||
reusing the original call's Tx queue to avoid the need to repackage and
|
||||
re-encrypt the data to be sent. call indicates the call to retry, srx the
|
||||
new address to send it to and key the encryption key to use for signing or
|
||||
encrypting the packets.
|
||||
|
||||
For this to work, the first Tx data packet must still be in the transmit
|
||||
queue, and currently this is only permitted for local and network errors
|
||||
and the call must not have been aborted. Any partially constructed Tx
|
||||
packet is left as is and can continue being filled afterwards.
|
||||
|
||||
It returns 0 if the call was requeued and an error otherwise.
|
||||
|
||||
(*) Get call RTT.
|
||||
|
||||
u64 rxrpc_kernel_get_rtt(struct socket *sock, struct rxrpc_call *call);
|
||||
|
|
|
@ -366,6 +366,27 @@ to the accept queue.
|
|||
|
||||
TCP Fast Open
|
||||
=============
|
||||
* TcpEstabResets
|
||||
Defined in `RFC1213 tcpEstabResets`_.
|
||||
|
||||
.. _RFC1213 tcpEstabResets: https://tools.ietf.org/html/rfc1213#page-48
|
||||
|
||||
* TcpAttemptFails
|
||||
Defined in `RFC1213 tcpAttemptFails`_.
|
||||
|
||||
.. _RFC1213 tcpAttemptFails: https://tools.ietf.org/html/rfc1213#page-48
|
||||
|
||||
* TcpOutRsts
|
||||
Defined in `RFC1213 tcpOutRsts`_. The RFC says this counter indicates
|
||||
the 'segments sent containing the RST flag', but in linux kernel, this
|
||||
couner indicates the segments kerenl tried to send. The sending
|
||||
process might be failed due to some errors (e.g. memory alloc failed).
|
||||
|
||||
.. _RFC1213 tcpOutRsts: https://tools.ietf.org/html/rfc1213#page-52
|
||||
|
||||
|
||||
TCP Fast Path
|
||||
============
|
||||
When kernel receives a TCP packet, it has two paths to handler the
|
||||
packet, one is fast path, another is slow path. The comment in kernel
|
||||
code provides a good explanation of them, I pasted them below::
|
||||
|
@ -413,7 +434,6 @@ increase 1.
|
|||
|
||||
TCP abort
|
||||
=========
|
||||
|
||||
* TcpExtTCPAbortOnData
|
||||
|
||||
It means TCP layer has data in flight, but need to close the
|
||||
|
@ -589,7 +609,6 @@ packet yet, the sender would know packet 4 is out of order. The TCP
|
|||
stack of kernel will increase TcpExtTCPSACKReorder for both of the
|
||||
above scenarios.
|
||||
|
||||
|
||||
DSACK
|
||||
=====
|
||||
The DSACK is defined in `RFC2883`_. The receiver uses DSACK to report
|
||||
|
@ -612,8 +631,7 @@ The TCP stack receives an out of order duplicate packet, so it sends a
|
|||
DSACK to the sender.
|
||||
|
||||
* TcpExtTCPDSACKRecv
|
||||
|
||||
The TCP stack receives a DSACK, which indicate an acknowledged
|
||||
The TCP stack receives a DSACK, which indicates an acknowledged
|
||||
duplicate packet is received.
|
||||
|
||||
* TcpExtTCPDSACKOfoRecv
|
||||
|
@ -621,6 +639,56 @@ duplicate packet is received.
|
|||
The TCP stack receives a DSACK, which indicate an out of order
|
||||
duplicate packet is received.
|
||||
|
||||
invalid SACK and DSACK
|
||||
====================
|
||||
When a SACK (or DSACK) block is invalid, a corresponding counter would
|
||||
be updated. The validation method is base on the start/end sequence
|
||||
number of the SACK block. For more details, please refer the comment
|
||||
of the function tcp_is_sackblock_valid in the kernel source code. A
|
||||
SACK option could have up to 4 blocks, they are checked
|
||||
individually. E.g., if 3 blocks of a SACk is invalid, the
|
||||
corresponding counter would be updated 3 times. The comment of the
|
||||
`Add counters for discarded SACK blocks`_ patch has additional
|
||||
explaination:
|
||||
|
||||
.. _Add counters for discarded SACK blocks: https://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git/commit/?id=18f02545a9a16c9a89778b91a162ad16d510bb32
|
||||
|
||||
* TcpExtTCPSACKDiscard
|
||||
This counter indicates how many SACK blocks are invalid. If the invalid
|
||||
SACK block is caused by ACK recording, the TCP stack will only ignore
|
||||
it and won't update this counter.
|
||||
|
||||
* TcpExtTCPDSACKIgnoredOld and TcpExtTCPDSACKIgnoredNoUndo
|
||||
When a DSACK block is invalid, one of these two counters would be
|
||||
updated. Which counter will be updated depends on the undo_marker flag
|
||||
of the TCP socket. If the undo_marker is not set, the TCP stack isn't
|
||||
likely to re-transmit any packets, and we still receive an invalid
|
||||
DSACK block, the reason might be that the packet is duplicated in the
|
||||
middle of the network. In such scenario, TcpExtTCPDSACKIgnoredNoUndo
|
||||
will be updated. If the undo_marker is set, TcpExtTCPDSACKIgnoredOld
|
||||
will be updated. As implied in its name, it might be an old packet.
|
||||
|
||||
SACK shift
|
||||
=========
|
||||
The linux networking stack stores data in sk_buff struct (skb for
|
||||
short). If a SACK block acrosses multiple skb, the TCP stack will try
|
||||
to re-arrange data in these skb. E.g. if a SACK block acknowledges seq
|
||||
10 to 15, skb1 has seq 10 to 13, skb2 has seq 14 to 20. The seq 14 and
|
||||
15 in skb2 would be moved to skb1. This operation is 'shift'. If a
|
||||
SACK block acknowledges seq 10 to 20, skb1 has seq 10 to 13, skb2 has
|
||||
seq 14 to 20. All data in skb2 will be moved to skb1, and skb2 will be
|
||||
discard, this operation is 'merge'.
|
||||
|
||||
* TcpExtTCPSackShifted
|
||||
A skb is shifted
|
||||
|
||||
* TcpExtTCPSackMerged
|
||||
A skb is merged
|
||||
|
||||
* TcpExtTCPSackShiftFallback
|
||||
A skb should be shifted or merged, but the TCP stack doesn't do it for
|
||||
some reasons.
|
||||
|
||||
TCP out of order
|
||||
================
|
||||
* TcpExtTCPOFOQueue
|
||||
|
@ -721,6 +789,60 @@ unacknowledged number (more strict than `RFC 5961 section 5.2`_).
|
|||
.. _RFC 5961 section 4.2: https://tools.ietf.org/html/rfc5961#page-9
|
||||
.. _RFC 5961 section 5.2: https://tools.ietf.org/html/rfc5961#page-11
|
||||
|
||||
TCP receive window
|
||||
=================
|
||||
* TcpExtTCPWantZeroWindowAdv
|
||||
Depending on current memory usage, the TCP stack tries to set receive
|
||||
window to zero. But the receive window might still be a no-zero
|
||||
value. For example, if the previous window size is 10, and the TCP
|
||||
stack receives 3 bytes, the current window size would be 7 even if the
|
||||
window size calculated by the memory usage is zero.
|
||||
|
||||
* TcpExtTCPToZeroWindowAdv
|
||||
The TCP receive window is set to zero from a no-zero value.
|
||||
|
||||
* TcpExtTCPFromZeroWindowAdv
|
||||
The TCP receive window is set to no-zero value from zero.
|
||||
|
||||
|
||||
Delayed ACK
|
||||
==========
|
||||
The TCP Delayed ACK is a technique which is used for reducing the
|
||||
packet count in the network. For more details, please refer the
|
||||
`Delayed ACK wiki`_
|
||||
|
||||
.. _Delayed ACK wiki: https://en.wikipedia.org/wiki/TCP_delayed_acknowledgment
|
||||
|
||||
* TcpExtDelayedACKs
|
||||
A delayed ACK timer expires. The TCP stack will send a pure ACK packet
|
||||
and exit the delayed ACK mode.
|
||||
|
||||
* TcpExtDelayedACKLocked
|
||||
A delayed ACK timer expires, but the TCP stack can't send an ACK
|
||||
immediately due to the socket is locked by a userspace program. The
|
||||
TCP stack will send a pure ACK later (after the userspace program
|
||||
unlock the socket). When the TCP stack sends the pure ACK later, the
|
||||
TCP stack will also update TcpExtDelayedACKs and exit the delayed ACK
|
||||
mode.
|
||||
|
||||
* TcpExtDelayedACKLost
|
||||
It will be updated when the TCP stack receives a packet which has been
|
||||
ACKed. A Delayed ACK loss might cause this issue, but it would also be
|
||||
triggered by other reasons, such as a packet is duplicated in the
|
||||
network.
|
||||
|
||||
Tail Loss Probe (TLP)
|
||||
===================
|
||||
TLP is an algorithm which is used to detect TCP packet loss. For more
|
||||
details, please refer the `TLP paper`_.
|
||||
|
||||
.. _TLP paper: https://tools.ietf.org/html/draft-dukkipati-tcpm-tcp-loss-probe-01
|
||||
|
||||
* TcpExtTCPLossProbes
|
||||
A TLP probe packet is sent.
|
||||
|
||||
* TcpExtTCPLossProbeRecovery
|
||||
A packet loss is detected and recovered by TLP.
|
||||
|
||||
examples
|
||||
========
|
||||
|
|
|
@ -417,7 +417,7 @@ is again deprecated and ts[2] holds a hardware timestamp if set.
|
|||
|
||||
Hardware time stamping must also be initialized for each device driver
|
||||
that is expected to do hardware time stamping. The parameter is defined in
|
||||
/include/linux/net_tstamp.h as:
|
||||
include/uapi/linux/net_tstamp.h as:
|
||||
|
||||
struct hwtstamp_config {
|
||||
int flags; /* no flags defined right now, must be zero */
|
||||
|
@ -487,7 +487,7 @@ enum {
|
|||
HWTSTAMP_FILTER_PTP_V1_L4_EVENT,
|
||||
|
||||
/* for the complete list of values, please check
|
||||
* the include file /include/linux/net_tstamp.h
|
||||
* the include file include/uapi/linux/net_tstamp.h
|
||||
*/
|
||||
};
|
||||
|
||||
|
|
|
@ -291,6 +291,20 @@ user space is responsible for creating them if needed.
|
|||
|
||||
Default : 0 (for compatibility reasons)
|
||||
|
||||
devconf_inherit_init_net
|
||||
----------------------------
|
||||
|
||||
Controls if a new network namespace should inherit all current
|
||||
settings under /proc/sys/net/{ipv4,ipv6}/conf/{all,default}/. By
|
||||
default, we keep the current behavior: for IPv4 we inherit all current
|
||||
settings from init_net and for IPv6 we reset all settings to default.
|
||||
|
||||
If set to 1, both IPv4 and IPv6 settings are forced to inherit from
|
||||
current ones in init_net. If set to 2, both IPv4 and IPv6 settings are
|
||||
forced to reset to their default values.
|
||||
|
||||
Default : 0 (for compatibility reasons)
|
||||
|
||||
2. /proc/sys/net/unix - Parameters for Unix domain sockets
|
||||
-------------------------------------------------------
|
||||
|
||||
|
|
89
MAINTAINERS
89
MAINTAINERS
|
@ -3052,8 +3052,8 @@ F: include/linux/bcm963xx_nvram.h
|
|||
F: include/linux/bcm963xx_tag.h
|
||||
|
||||
BROADCOM BNX2 GIGABIT ETHERNET DRIVER
|
||||
M: Rasesh Mody <rasesh.mody@cavium.com>
|
||||
M: Dept-GELinuxNICDev@cavium.com
|
||||
M: Rasesh Mody <rmody@marvell.com>
|
||||
M: GR-Linux-NIC-Dev@marvell.com
|
||||
L: netdev@vger.kernel.org
|
||||
S: Supported
|
||||
F: drivers/net/ethernet/broadcom/bnx2.*
|
||||
|
@ -3072,9 +3072,9 @@ S: Supported
|
|||
F: drivers/scsi/bnx2i/
|
||||
|
||||
BROADCOM BNX2X 10 GIGABIT ETHERNET DRIVER
|
||||
M: Ariel Elior <ariel.elior@cavium.com>
|
||||
M: Sudarsana Kalluru <sudarsana.kalluru@cavium.com>
|
||||
M: everest-linux-l2@cavium.com
|
||||
M: Ariel Elior <aelior@marvell.com>
|
||||
M: Sudarsana Kalluru <skalluru@marvell.com>
|
||||
M: GR-everest-linux-l2@marvell.com
|
||||
L: netdev@vger.kernel.org
|
||||
S: Supported
|
||||
F: drivers/net/ethernet/broadcom/bnx2x/
|
||||
|
@ -3249,9 +3249,9 @@ S: Supported
|
|||
F: drivers/scsi/bfa/
|
||||
|
||||
BROCADE BNA 10 GIGABIT ETHERNET DRIVER
|
||||
M: Rasesh Mody <rasesh.mody@cavium.com>
|
||||
M: Sudarsana Kalluru <sudarsana.kalluru@cavium.com>
|
||||
M: Dept-GELinuxNICDev@cavium.com
|
||||
M: Rasesh Mody <rmody@marvell.com>
|
||||
M: Sudarsana Kalluru <skalluru@marvell.com>
|
||||
M: GR-Linux-NIC-Dev@marvell.com
|
||||
L: netdev@vger.kernel.org
|
||||
S: Supported
|
||||
F: drivers/net/ethernet/brocade/bna/
|
||||
|
@ -3471,10 +3471,9 @@ F: drivers/i2c/busses/i2c-octeon*
|
|||
F: drivers/i2c/busses/i2c-thunderx*
|
||||
|
||||
CAVIUM LIQUIDIO NETWORK DRIVER
|
||||
M: Derek Chickles <derek.chickles@caviumnetworks.com>
|
||||
M: Satanand Burla <satananda.burla@caviumnetworks.com>
|
||||
M: Felix Manlunas <felix.manlunas@caviumnetworks.com>
|
||||
M: Raghu Vatsavayi <raghu.vatsavayi@caviumnetworks.com>
|
||||
M: Derek Chickles <dchickles@marvell.com>
|
||||
M: Satanand Burla <sburla@marvell.com>
|
||||
M: Felix Manlunas <fmanlunas@marvell.com>
|
||||
L: netdev@vger.kernel.org
|
||||
W: http://www.cavium.com
|
||||
S: Supported
|
||||
|
@ -3979,6 +3978,7 @@ F: drivers/cpufreq/arm_big_little.c
|
|||
CPU POWER MONITORING SUBSYSTEM
|
||||
M: Thomas Renninger <trenn@suse.com>
|
||||
M: Shuah Khan <shuah@kernel.org>
|
||||
M: Shuah Khan <skhan@linuxfoundation.org>
|
||||
L: linux-pm@vger.kernel.org
|
||||
S: Maintained
|
||||
F: tools/power/cpupower/
|
||||
|
@ -4123,7 +4123,7 @@ S: Maintained
|
|||
F: drivers/media/dvb-frontends/cxd2820r*
|
||||
|
||||
CXGB3 ETHERNET DRIVER (CXGB3)
|
||||
M: Arjun Vynipadath <arjun@chelsio.com>
|
||||
M: Vishal Kulkarni <vishal@chelsio.com>
|
||||
L: netdev@vger.kernel.org
|
||||
W: http://www.chelsio.com
|
||||
S: Supported
|
||||
|
@ -4152,7 +4152,7 @@ S: Supported
|
|||
F: drivers/crypto/chelsio
|
||||
|
||||
CXGB4 ETHERNET DRIVER (CXGB4)
|
||||
M: Arjun Vynipadath <arjun@chelsio.com>
|
||||
M: Vishal Kulkarni <vishal@chelsio.com>
|
||||
L: netdev@vger.kernel.org
|
||||
W: http://www.chelsio.com
|
||||
S: Supported
|
||||
|
@ -6024,6 +6024,12 @@ L: linuxppc-dev@lists.ozlabs.org
|
|||
S: Maintained
|
||||
F: drivers/dma/fsldma.*
|
||||
|
||||
FREESCALE ENETC ETHERNET DRIVERS
|
||||
M: Claudiu Manoil <claudiu.manoil@nxp.com>
|
||||
L: netdev@vger.kernel.org
|
||||
S: Maintained
|
||||
F: drivers/net/ethernet/freescale/enetc/
|
||||
|
||||
FREESCALE eTSEC ETHERNET DRIVER (GIANFAR)
|
||||
M: Claudiu Manoil <claudiu.manoil@nxp.com>
|
||||
L: netdev@vger.kernel.org
|
||||
|
@ -6088,6 +6094,7 @@ M: Yangbo Lu <yangbo.lu@nxp.com>
|
|||
L: netdev@vger.kernel.org
|
||||
S: Maintained
|
||||
F: drivers/ptp/ptp_qoriq.c
|
||||
F: drivers/ptp/ptp_qoriq_debugfs.c
|
||||
F: include/linux/fsl/ptp_qoriq.h
|
||||
F: Documentation/devicetree/bindings/ptp/ptp-qoriq.txt
|
||||
|
||||
|
@ -8259,6 +8266,7 @@ F: include/uapi/linux/sunrpc/
|
|||
|
||||
KERNEL SELFTEST FRAMEWORK
|
||||
M: Shuah Khan <shuah@kernel.org>
|
||||
M: Shuah Khan <skhan@linuxfoundation.org>
|
||||
L: linux-kselftest@vger.kernel.org
|
||||
T: git git://git.kernel.org/pub/scm/linux/kernel/git/shuah/linux-kselftest.git
|
||||
Q: https://patchwork.kernel.org/project/linux-kselftest/list/
|
||||
|
@ -10690,9 +10698,9 @@ S: Maintained
|
|||
F: drivers/net/netdevsim/*
|
||||
|
||||
NETXEN (1/10) GbE SUPPORT
|
||||
M: Manish Chopra <manish.chopra@cavium.com>
|
||||
M: Rahul Verma <rahul.verma@cavium.com>
|
||||
M: Dept-GELinuxNICDev@cavium.com
|
||||
M: Manish Chopra <manishc@marvell.com>
|
||||
M: Rahul Verma <rahulv@marvell.com>
|
||||
M: GR-Linux-NIC-Dev@marvell.com
|
||||
L: netdev@vger.kernel.org
|
||||
S: Supported
|
||||
F: drivers/net/ethernet/qlogic/netxen/
|
||||
|
@ -12476,8 +12484,8 @@ S: Supported
|
|||
F: drivers/scsi/qedi/
|
||||
|
||||
QLOGIC QL4xxx ETHERNET DRIVER
|
||||
M: Ariel Elior <Ariel.Elior@cavium.com>
|
||||
M: everest-linux-l2@cavium.com
|
||||
M: Ariel Elior <aelior@marvell.com>
|
||||
M: GR-everest-linux-l2@marvell.com
|
||||
L: netdev@vger.kernel.org
|
||||
S: Supported
|
||||
F: drivers/net/ethernet/qlogic/qed/
|
||||
|
@ -12485,8 +12493,8 @@ F: include/linux/qed/
|
|||
F: drivers/net/ethernet/qlogic/qede/
|
||||
|
||||
QLOGIC QL4xxx RDMA DRIVER
|
||||
M: Michal Kalderon <Michal.Kalderon@cavium.com>
|
||||
M: Ariel Elior <Ariel.Elior@cavium.com>
|
||||
M: Michal Kalderon <mkalderon@marvell.com>
|
||||
M: Ariel Elior <aelior@marvell.com>
|
||||
L: linux-rdma@vger.kernel.org
|
||||
S: Supported
|
||||
F: drivers/infiniband/hw/qedr/
|
||||
|
@ -12506,7 +12514,7 @@ F: Documentation/scsi/LICENSE.qla2xxx
|
|||
F: drivers/scsi/qla2xxx/
|
||||
|
||||
QLOGIC QLA3XXX NETWORK DRIVER
|
||||
M: Dept-GELinuxNICDev@cavium.com
|
||||
M: GR-Linux-NIC-Dev@marvell.com
|
||||
L: netdev@vger.kernel.org
|
||||
S: Supported
|
||||
F: Documentation/networking/device_drivers/qlogic/LICENSE.qla3xxx
|
||||
|
@ -12520,16 +12528,16 @@ F: Documentation/scsi/LICENSE.qla4xxx
|
|||
F: drivers/scsi/qla4xxx/
|
||||
|
||||
QLOGIC QLCNIC (1/10)Gb ETHERNET DRIVER
|
||||
M: Shahed Shaikh <Shahed.Shaikh@cavium.com>
|
||||
M: Manish Chopra <manish.chopra@cavium.com>
|
||||
M: Dept-GELinuxNICDev@cavium.com
|
||||
M: Shahed Shaikh <shshaikh@marvell.com>
|
||||
M: Manish Chopra <manishc@marvell.com>
|
||||
M: GR-Linux-NIC-Dev@marvell.com
|
||||
L: netdev@vger.kernel.org
|
||||
S: Supported
|
||||
F: drivers/net/ethernet/qlogic/qlcnic/
|
||||
|
||||
QLOGIC QLGE 10Gb ETHERNET DRIVER
|
||||
M: Manish Chopra <manish.chopra@cavium.com>
|
||||
M: Dept-GELinuxNICDev@cavium.com
|
||||
M: Manish Chopra <manishc@marvell.com>
|
||||
M: GR-Linux-NIC-Dev@marvell.com
|
||||
L: netdev@vger.kernel.org
|
||||
S: Supported
|
||||
F: drivers/net/ethernet/qlogic/qlge/
|
||||
|
@ -12609,6 +12617,14 @@ L: netdev@vger.kernel.org
|
|||
S: Maintained
|
||||
F: drivers/net/ethernet/qualcomm/emac/
|
||||
|
||||
QUALCOMM ETHQOS ETHERNET DRIVER
|
||||
M: Vinod Koul <vkoul@kernel.org>
|
||||
M: Niklas Cassel <niklas.cassel@linaro.org>
|
||||
L: netdev@vger.kernel.org
|
||||
S: Maintained
|
||||
F: drivers/net/ethernet/stmicro/stmmac/dwmac-qcom-ethqos.c
|
||||
F: Documentation/devicetree/bindings/net/qcom,dwmac.txt
|
||||
|
||||
QUALCOMM GENERIC INTERFACE I2C DRIVER
|
||||
M: Alok Chauhan <alokc@codeaurora.org>
|
||||
M: Karthikeyan Ramasubramanian <kramasub@codeaurora.org>
|
||||
|
@ -15843,6 +15859,7 @@ F: drivers/usb/common/usb-otg-fsm.c
|
|||
USB OVER IP DRIVER
|
||||
M: Valentina Manea <valentina.manea.m@gmail.com>
|
||||
M: Shuah Khan <shuah@kernel.org>
|
||||
M: Shuah Khan <skhan@linuxfoundation.org>
|
||||
L: linux-usb@vger.kernel.org
|
||||
S: Maintained
|
||||
F: Documentation/usb/usbip_protocol.txt
|
||||
|
@ -16672,6 +16689,24 @@ T: git git://linuxtv.org/media_tree.git
|
|||
S: Maintained
|
||||
F: drivers/media/tuners/tuner-xc2028.*
|
||||
|
||||
XDP (eXpress Data Path)
|
||||
M: Alexei Starovoitov <ast@kernel.org>
|
||||
M: Daniel Borkmann <daniel@iogearbox.net>
|
||||
M: David S. Miller <davem@davemloft.net>
|
||||
M: Jakub Kicinski <jakub.kicinski@netronome.com>
|
||||
M: Jesper Dangaard Brouer <hawk@kernel.org>
|
||||
M: John Fastabend <john.fastabend@gmail.com>
|
||||
L: netdev@vger.kernel.org
|
||||
L: xdp-newbies@vger.kernel.org
|
||||
S: Supported
|
||||
F: net/core/xdp.c
|
||||
F: include/net/xdp.h
|
||||
F: kernel/bpf/devmap.c
|
||||
F: kernel/bpf/cpumap.c
|
||||
F: include/trace/events/xdp.h
|
||||
K: xdp
|
||||
N: xdp
|
||||
|
||||
XDP SOCKETS (AF_XDP)
|
||||
M: Björn Töpel <bjorn.topel@intel.com>
|
||||
M: Magnus Karlsson <magnus.karlsson@intel.com>
|
||||
|
|
10
Makefile
10
Makefile
|
@ -2,7 +2,7 @@
|
|||
VERSION = 5
|
||||
PATCHLEVEL = 0
|
||||
SUBLEVEL = 0
|
||||
EXTRAVERSION = -rc2
|
||||
EXTRAVERSION = -rc4
|
||||
NAME = Shy Crocodile
|
||||
|
||||
# *DOCUMENTATION*
|
||||
|
@ -955,6 +955,7 @@ ifdef CONFIG_STACK_VALIDATION
|
|||
endif
|
||||
endif
|
||||
|
||||
PHONY += prepare0
|
||||
|
||||
ifeq ($(KBUILD_EXTMOD),)
|
||||
core-y += kernel/ certs/ mm/ fs/ ipc/ security/ crypto/ block/
|
||||
|
@ -1061,8 +1062,7 @@ scripts: scripts_basic scripts_dtc
|
|||
# archprepare is used in arch Makefiles and when processed asm symlink,
|
||||
# version.h and scripts_basic is processed / created.
|
||||
|
||||
# Listed in dependency order
|
||||
PHONY += prepare archprepare prepare0 prepare1 prepare2 prepare3
|
||||
PHONY += prepare archprepare prepare1 prepare2 prepare3
|
||||
|
||||
# prepare3 is used to check if we are building in a separate output directory,
|
||||
# and if so do:
|
||||
|
@ -1360,11 +1360,11 @@ mrproper: rm-dirs := $(wildcard $(MRPROPER_DIRS))
|
|||
mrproper: rm-files := $(wildcard $(MRPROPER_FILES))
|
||||
mrproper-dirs := $(addprefix _mrproper_,scripts)
|
||||
|
||||
PHONY += $(mrproper-dirs) mrproper archmrproper
|
||||
PHONY += $(mrproper-dirs) mrproper
|
||||
$(mrproper-dirs):
|
||||
$(Q)$(MAKE) $(clean)=$(patsubst _mrproper_%,%,$@)
|
||||
|
||||
mrproper: clean archmrproper $(mrproper-dirs)
|
||||
mrproper: clean $(mrproper-dirs)
|
||||
$(call cmd,rmdirs)
|
||||
$(call cmd,rmfiles)
|
||||
|
||||
|
|
|
@ -3,23 +3,19 @@ generic-y += bugs.h
|
|||
generic-y += compat.h
|
||||
generic-y += device.h
|
||||
generic-y += div64.h
|
||||
generic-y += dma-mapping.h
|
||||
generic-y += emergency-restart.h
|
||||
generic-y += extable.h
|
||||
generic-y += fb.h
|
||||
generic-y += ftrace.h
|
||||
generic-y += hardirq.h
|
||||
generic-y += hw_irq.h
|
||||
generic-y += irq_regs.h
|
||||
generic-y += irq_work.h
|
||||
generic-y += kmap_types.h
|
||||
generic-y += local.h
|
||||
generic-y += local64.h
|
||||
generic-y += mcs_spinlock.h
|
||||
generic-y += mm-arch-hooks.h
|
||||
generic-y += msi.h
|
||||
generic-y += parport.h
|
||||
generic-y += pci.h
|
||||
generic-y += percpu.h
|
||||
generic-y += preempt.h
|
||||
generic-y += topology.h
|
||||
|
|
|
@ -216,6 +216,14 @@ struct bcr_fp_arcv2 {
|
|||
#endif
|
||||
};
|
||||
|
||||
struct bcr_actionpoint {
|
||||
#ifdef CONFIG_CPU_BIG_ENDIAN
|
||||
unsigned int pad:21, min:1, num:2, ver:8;
|
||||
#else
|
||||
unsigned int ver:8, num:2, min:1, pad:21;
|
||||
#endif
|
||||
};
|
||||
|
||||
#include <soc/arc/timers.h>
|
||||
|
||||
struct bcr_bpu_arcompact {
|
||||
|
@ -283,7 +291,7 @@ struct cpuinfo_arc_cache {
|
|||
};
|
||||
|
||||
struct cpuinfo_arc_bpu {
|
||||
unsigned int ver, full, num_cache, num_pred;
|
||||
unsigned int ver, full, num_cache, num_pred, ret_stk;
|
||||
};
|
||||
|
||||
struct cpuinfo_arc_ccm {
|
||||
|
@ -302,7 +310,7 @@ struct cpuinfo_arc {
|
|||
struct {
|
||||
unsigned int swap:1, norm:1, minmax:1, barrel:1, crc:1, swape:1, pad1:2,
|
||||
fpu_sp:1, fpu_dp:1, dual:1, dual_enb:1, pad2:4,
|
||||
debug:1, ap:1, smart:1, rtt:1, pad3:4,
|
||||
ap_num:4, ap_full:1, smart:1, rtt:1, pad3:1,
|
||||
timer0:1, timer1:1, rtc:1, gfrc:1, pad4:4;
|
||||
} extn;
|
||||
struct bcr_mpy extn_mpy;
|
||||
|
|
|
@ -340,7 +340,7 @@ static inline __attribute__ ((const)) int __fls(unsigned long x)
|
|||
/*
|
||||
* __ffs: Similar to ffs, but zero based (0-31)
|
||||
*/
|
||||
static inline __attribute__ ((const)) int __ffs(unsigned long word)
|
||||
static inline __attribute__ ((const)) unsigned long __ffs(unsigned long word)
|
||||
{
|
||||
if (!word)
|
||||
return word;
|
||||
|
@ -400,9 +400,9 @@ static inline __attribute__ ((const)) int ffs(unsigned long x)
|
|||
/*
|
||||
* __ffs: Similar to ffs, but zero based (0-31)
|
||||
*/
|
||||
static inline __attribute__ ((const)) int __ffs(unsigned long x)
|
||||
static inline __attribute__ ((const)) unsigned long __ffs(unsigned long x)
|
||||
{
|
||||
int n;
|
||||
unsigned long n;
|
||||
|
||||
asm volatile(
|
||||
" ffs.f %0, %1 \n" /* 0:31; 31(Z) if src 0 */
|
||||
|
|
|
@ -103,7 +103,8 @@ static const char * const arc_pmu_ev_hw_map[] = {
|
|||
|
||||
/* counts condition */
|
||||
[PERF_COUNT_HW_INSTRUCTIONS] = "iall",
|
||||
[PERF_COUNT_HW_BRANCH_INSTRUCTIONS] = "ijmp", /* Excludes ZOL jumps */
|
||||
/* All jump instructions that are taken */
|
||||
[PERF_COUNT_HW_BRANCH_INSTRUCTIONS] = "ijmptak",
|
||||
[PERF_COUNT_ARC_BPOK] = "bpok", /* NP-NT, PT-T, PNT-NT */
|
||||
#ifdef CONFIG_ISA_ARCV2
|
||||
[PERF_COUNT_HW_BRANCH_MISSES] = "bpmp",
|
||||
|
|
|
@ -1,15 +1,10 @@
|
|||
/*
|
||||
* Linux performance counter support for ARC700 series
|
||||
*
|
||||
* Copyright (C) 2013-2015 Synopsys, Inc. (www.synopsys.com)
|
||||
*
|
||||
* This code is inspired by the perf support of various other architectures.
|
||||
*
|
||||
* This program is free software; you can redistribute it and/or modify
|
||||
* it under the terms of the GNU General Public License version 2 as
|
||||
* published by the Free Software Foundation.
|
||||
*
|
||||
*/
|
||||
// SPDX-License-Identifier: GPL-2.0+
|
||||
//
|
||||
// Linux performance counter support for ARC CPUs.
|
||||
// This code is inspired by the perf support of various other architectures.
|
||||
//
|
||||
// Copyright (C) 2013-2018 Synopsys, Inc. (www.synopsys.com)
|
||||
|
||||
#include <linux/errno.h>
|
||||
#include <linux/interrupt.h>
|
||||
#include <linux/module.h>
|
||||
|
@ -19,12 +14,31 @@
|
|||
#include <asm/arcregs.h>
|
||||
#include <asm/stacktrace.h>
|
||||
|
||||
/* HW holds 8 symbols + one for null terminator */
|
||||
#define ARCPMU_EVENT_NAME_LEN 9
|
||||
|
||||
enum arc_pmu_attr_groups {
|
||||
ARCPMU_ATTR_GR_EVENTS,
|
||||
ARCPMU_ATTR_GR_FORMATS,
|
||||
ARCPMU_NR_ATTR_GR
|
||||
};
|
||||
|
||||
struct arc_pmu_raw_event_entry {
|
||||
char name[ARCPMU_EVENT_NAME_LEN];
|
||||
};
|
||||
|
||||
struct arc_pmu {
|
||||
struct pmu pmu;
|
||||
unsigned int irq;
|
||||
int n_counters;
|
||||
int n_events;
|
||||
u64 max_period;
|
||||
int ev_hw_idx[PERF_COUNT_ARC_HW_MAX];
|
||||
|
||||
struct arc_pmu_raw_event_entry *raw_entry;
|
||||
struct attribute **attrs;
|
||||
struct perf_pmu_events_attr *attr;
|
||||
const struct attribute_group *attr_groups[ARCPMU_NR_ATTR_GR + 1];
|
||||
};
|
||||
|
||||
struct arc_pmu_cpu {
|
||||
|
@ -49,6 +63,7 @@ static int callchain_trace(unsigned int addr, void *data)
|
|||
{
|
||||
struct arc_callchain_trace *ctrl = data;
|
||||
struct perf_callchain_entry_ctx *entry = ctrl->perf_stuff;
|
||||
|
||||
perf_callchain_store(entry, addr);
|
||||
|
||||
if (ctrl->depth++ < 3)
|
||||
|
@ -57,8 +72,8 @@ static int callchain_trace(unsigned int addr, void *data)
|
|||
return -1;
|
||||
}
|
||||
|
||||
void
|
||||
perf_callchain_kernel(struct perf_callchain_entry_ctx *entry, struct pt_regs *regs)
|
||||
void perf_callchain_kernel(struct perf_callchain_entry_ctx *entry,
|
||||
struct pt_regs *regs)
|
||||
{
|
||||
struct arc_callchain_trace ctrl = {
|
||||
.depth = 0,
|
||||
|
@ -68,8 +83,8 @@ perf_callchain_kernel(struct perf_callchain_entry_ctx *entry, struct pt_regs *re
|
|||
arc_unwind_core(NULL, regs, callchain_trace, &ctrl);
|
||||
}
|
||||
|
||||
void
|
||||
perf_callchain_user(struct perf_callchain_entry_ctx *entry, struct pt_regs *regs)
|
||||
void perf_callchain_user(struct perf_callchain_entry_ctx *entry,
|
||||
struct pt_regs *regs)
|
||||
{
|
||||
/*
|
||||
* User stack can't be unwound trivially with kernel dwarf unwinder
|
||||
|
@ -82,10 +97,10 @@ static struct arc_pmu *arc_pmu;
|
|||
static DEFINE_PER_CPU(struct arc_pmu_cpu, arc_pmu_cpu);
|
||||
|
||||
/* read counter #idx; note that counter# != event# on ARC! */
|
||||
static uint64_t arc_pmu_read_counter(int idx)
|
||||
static u64 arc_pmu_read_counter(int idx)
|
||||
{
|
||||
uint32_t tmp;
|
||||
uint64_t result;
|
||||
u32 tmp;
|
||||
u64 result;
|
||||
|
||||
/*
|
||||
* ARC supports making 'snapshots' of the counters, so we don't
|
||||
|
@ -94,7 +109,7 @@ static uint64_t arc_pmu_read_counter(int idx)
|
|||
write_aux_reg(ARC_REG_PCT_INDEX, idx);
|
||||
tmp = read_aux_reg(ARC_REG_PCT_CONTROL);
|
||||
write_aux_reg(ARC_REG_PCT_CONTROL, tmp | ARC_REG_PCT_CONTROL_SN);
|
||||
result = (uint64_t) (read_aux_reg(ARC_REG_PCT_SNAPH)) << 32;
|
||||
result = (u64) (read_aux_reg(ARC_REG_PCT_SNAPH)) << 32;
|
||||
result |= read_aux_reg(ARC_REG_PCT_SNAPL);
|
||||
|
||||
return result;
|
||||
|
@ -103,9 +118,9 @@ static uint64_t arc_pmu_read_counter(int idx)
|
|||
static void arc_perf_event_update(struct perf_event *event,
|
||||
struct hw_perf_event *hwc, int idx)
|
||||
{
|
||||
uint64_t prev_raw_count = local64_read(&hwc->prev_count);
|
||||
uint64_t new_raw_count = arc_pmu_read_counter(idx);
|
||||
int64_t delta = new_raw_count - prev_raw_count;
|
||||
u64 prev_raw_count = local64_read(&hwc->prev_count);
|
||||
u64 new_raw_count = arc_pmu_read_counter(idx);
|
||||
s64 delta = new_raw_count - prev_raw_count;
|
||||
|
||||
/*
|
||||
* We aren't afraid of hwc->prev_count changing beneath our feet
|
||||
|
@ -155,7 +170,7 @@ static int arc_pmu_event_init(struct perf_event *event)
|
|||
int ret;
|
||||
|
||||
if (!is_sampling_event(event)) {
|
||||
hwc->sample_period = arc_pmu->max_period;
|
||||
hwc->sample_period = arc_pmu->max_period;
|
||||
hwc->last_period = hwc->sample_period;
|
||||
local64_set(&hwc->period_left, hwc->sample_period);
|
||||
}
|
||||
|
@ -192,6 +207,18 @@ static int arc_pmu_event_init(struct perf_event *event)
|
|||
pr_debug("init cache event with h/w %08x \'%s\'\n",
|
||||
(int)hwc->config, arc_pmu_ev_hw_map[ret]);
|
||||
return 0;
|
||||
|
||||
case PERF_TYPE_RAW:
|
||||
if (event->attr.config >= arc_pmu->n_events)
|
||||
return -ENOENT;
|
||||
|
||||
hwc->config |= event->attr.config;
|
||||
pr_debug("init raw event with idx %lld \'%s\'\n",
|
||||
event->attr.config,
|
||||
arc_pmu->raw_entry[event->attr.config].name);
|
||||
|
||||
return 0;
|
||||
|
||||
default:
|
||||
return -ENOENT;
|
||||
}
|
||||
|
@ -200,7 +227,7 @@ static int arc_pmu_event_init(struct perf_event *event)
|
|||
/* starts all counters */
|
||||
static void arc_pmu_enable(struct pmu *pmu)
|
||||
{
|
||||
uint32_t tmp;
|
||||
u32 tmp;
|
||||
tmp = read_aux_reg(ARC_REG_PCT_CONTROL);
|
||||
write_aux_reg(ARC_REG_PCT_CONTROL, (tmp & 0xffff0000) | 0x1);
|
||||
}
|
||||
|
@ -208,7 +235,7 @@ static void arc_pmu_enable(struct pmu *pmu)
|
|||
/* stops all counters */
|
||||
static void arc_pmu_disable(struct pmu *pmu)
|
||||
{
|
||||
uint32_t tmp;
|
||||
u32 tmp;
|
||||
tmp = read_aux_reg(ARC_REG_PCT_CONTROL);
|
||||
write_aux_reg(ARC_REG_PCT_CONTROL, (tmp & 0xffff0000) | 0x0);
|
||||
}
|
||||
|
@ -228,7 +255,7 @@ static int arc_pmu_event_set_period(struct perf_event *event)
|
|||
local64_set(&hwc->period_left, left);
|
||||
hwc->last_period = period;
|
||||
overflow = 1;
|
||||
} else if (unlikely(left <= 0)) {
|
||||
} else if (unlikely(left <= 0)) {
|
||||
/* left underflowed by less than period. */
|
||||
left += period;
|
||||
local64_set(&hwc->period_left, left);
|
||||
|
@ -246,8 +273,8 @@ static int arc_pmu_event_set_period(struct perf_event *event)
|
|||
write_aux_reg(ARC_REG_PCT_INDEX, idx);
|
||||
|
||||
/* Write value */
|
||||
write_aux_reg(ARC_REG_PCT_COUNTL, (u32)value);
|
||||
write_aux_reg(ARC_REG_PCT_COUNTH, (value >> 32));
|
||||
write_aux_reg(ARC_REG_PCT_COUNTL, lower_32_bits(value));
|
||||
write_aux_reg(ARC_REG_PCT_COUNTH, upper_32_bits(value));
|
||||
|
||||
perf_event_update_userpage(event);
|
||||
|
||||
|
@ -277,7 +304,7 @@ static void arc_pmu_start(struct perf_event *event, int flags)
|
|||
/* Enable interrupt for this counter */
|
||||
if (is_sampling_event(event))
|
||||
write_aux_reg(ARC_REG_PCT_INT_CTRL,
|
||||
read_aux_reg(ARC_REG_PCT_INT_CTRL) | (1 << idx));
|
||||
read_aux_reg(ARC_REG_PCT_INT_CTRL) | BIT(idx));
|
||||
|
||||
/* enable ARC pmu here */
|
||||
write_aux_reg(ARC_REG_PCT_INDEX, idx); /* counter # */
|
||||
|
@ -295,9 +322,9 @@ static void arc_pmu_stop(struct perf_event *event, int flags)
|
|||
* Reset interrupt flag by writing of 1. This is required
|
||||
* to make sure pending interrupt was not left.
|
||||
*/
|
||||
write_aux_reg(ARC_REG_PCT_INT_ACT, 1 << idx);
|
||||
write_aux_reg(ARC_REG_PCT_INT_ACT, BIT(idx));
|
||||
write_aux_reg(ARC_REG_PCT_INT_CTRL,
|
||||
read_aux_reg(ARC_REG_PCT_INT_CTRL) & ~(1 << idx));
|
||||
read_aux_reg(ARC_REG_PCT_INT_CTRL) & ~BIT(idx));
|
||||
}
|
||||
|
||||
if (!(event->hw.state & PERF_HES_STOPPED)) {
|
||||
|
@ -349,9 +376,10 @@ static int arc_pmu_add(struct perf_event *event, int flags)
|
|||
|
||||
if (is_sampling_event(event)) {
|
||||
/* Mimic full counter overflow as other arches do */
|
||||
write_aux_reg(ARC_REG_PCT_INT_CNTL, (u32)arc_pmu->max_period);
|
||||
write_aux_reg(ARC_REG_PCT_INT_CNTL,
|
||||
lower_32_bits(arc_pmu->max_period));
|
||||
write_aux_reg(ARC_REG_PCT_INT_CNTH,
|
||||
(arc_pmu->max_period >> 32));
|
||||
upper_32_bits(arc_pmu->max_period));
|
||||
}
|
||||
|
||||
write_aux_reg(ARC_REG_PCT_CONFIG, 0);
|
||||
|
@ -392,7 +420,7 @@ static irqreturn_t arc_pmu_intr(int irq, void *dev)
|
|||
idx = __ffs(active_ints);
|
||||
|
||||
/* Reset interrupt flag by writing of 1 */
|
||||
write_aux_reg(ARC_REG_PCT_INT_ACT, 1 << idx);
|
||||
write_aux_reg(ARC_REG_PCT_INT_ACT, BIT(idx));
|
||||
|
||||
/*
|
||||
* On reset of "interrupt active" bit corresponding
|
||||
|
@ -400,7 +428,7 @@ static irqreturn_t arc_pmu_intr(int irq, void *dev)
|
|||
* Now we need to re-enable interrupt for the counter.
|
||||
*/
|
||||
write_aux_reg(ARC_REG_PCT_INT_CTRL,
|
||||
read_aux_reg(ARC_REG_PCT_INT_CTRL) | (1 << idx));
|
||||
read_aux_reg(ARC_REG_PCT_INT_CTRL) | BIT(idx));
|
||||
|
||||
event = pmu_cpu->act_counter[idx];
|
||||
hwc = &event->hw;
|
||||
|
@ -414,7 +442,7 @@ static irqreturn_t arc_pmu_intr(int irq, void *dev)
|
|||
arc_pmu_stop(event, 0);
|
||||
}
|
||||
|
||||
active_ints &= ~(1U << idx);
|
||||
active_ints &= ~BIT(idx);
|
||||
} while (active_ints);
|
||||
|
||||
done:
|
||||
|
@ -441,19 +469,108 @@ static void arc_cpu_pmu_irq_init(void *data)
|
|||
write_aux_reg(ARC_REG_PCT_INT_ACT, 0xffffffff);
|
||||
}
|
||||
|
||||
/* Event field occupies the bottom 15 bits of our config field */
|
||||
PMU_FORMAT_ATTR(event, "config:0-14");
|
||||
static struct attribute *arc_pmu_format_attrs[] = {
|
||||
&format_attr_event.attr,
|
||||
NULL,
|
||||
};
|
||||
|
||||
static struct attribute_group arc_pmu_format_attr_gr = {
|
||||
.name = "format",
|
||||
.attrs = arc_pmu_format_attrs,
|
||||
};
|
||||
|
||||
static ssize_t arc_pmu_events_sysfs_show(struct device *dev,
|
||||
struct device_attribute *attr,
|
||||
char *page)
|
||||
{
|
||||
struct perf_pmu_events_attr *pmu_attr;
|
||||
|
||||
pmu_attr = container_of(attr, struct perf_pmu_events_attr, attr);
|
||||
return sprintf(page, "event=0x%04llx\n", pmu_attr->id);
|
||||
}
|
||||
|
||||
/*
|
||||
* We don't add attrs here as we don't have pre-defined list of perf events.
|
||||
* We will generate and add attrs dynamically in probe() after we read HW
|
||||
* configuration.
|
||||
*/
|
||||
static struct attribute_group arc_pmu_events_attr_gr = {
|
||||
.name = "events",
|
||||
};
|
||||
|
||||
static void arc_pmu_add_raw_event_attr(int j, char *str)
|
||||
{
|
||||
memmove(arc_pmu->raw_entry[j].name, str, ARCPMU_EVENT_NAME_LEN - 1);
|
||||
arc_pmu->attr[j].attr.attr.name = arc_pmu->raw_entry[j].name;
|
||||
arc_pmu->attr[j].attr.attr.mode = VERIFY_OCTAL_PERMISSIONS(0444);
|
||||
arc_pmu->attr[j].attr.show = arc_pmu_events_sysfs_show;
|
||||
arc_pmu->attr[j].id = j;
|
||||
arc_pmu->attrs[j] = &(arc_pmu->attr[j].attr.attr);
|
||||
}
|
||||
|
||||
static int arc_pmu_raw_alloc(struct device *dev)
|
||||
{
|
||||
arc_pmu->attr = devm_kmalloc_array(dev, arc_pmu->n_events + 1,
|
||||
sizeof(*arc_pmu->attr), GFP_KERNEL | __GFP_ZERO);
|
||||
if (!arc_pmu->attr)
|
||||
return -ENOMEM;
|
||||
|
||||
arc_pmu->attrs = devm_kmalloc_array(dev, arc_pmu->n_events + 1,
|
||||
sizeof(*arc_pmu->attrs), GFP_KERNEL | __GFP_ZERO);
|
||||
if (!arc_pmu->attrs)
|
||||
return -ENOMEM;
|
||||
|
||||
arc_pmu->raw_entry = devm_kmalloc_array(dev, arc_pmu->n_events,
|
||||
sizeof(*arc_pmu->raw_entry), GFP_KERNEL | __GFP_ZERO);
|
||||
if (!arc_pmu->raw_entry)
|
||||
return -ENOMEM;
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
static inline bool event_in_hw_event_map(int i, char *name)
|
||||
{
|
||||
if (!arc_pmu_ev_hw_map[i])
|
||||
return false;
|
||||
|
||||
if (!strlen(arc_pmu_ev_hw_map[i]))
|
||||
return false;
|
||||
|
||||
if (strcmp(arc_pmu_ev_hw_map[i], name))
|
||||
return false;
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
static void arc_pmu_map_hw_event(int j, char *str)
|
||||
{
|
||||
int i;
|
||||
|
||||
/* See if HW condition has been mapped to a perf event_id */
|
||||
for (i = 0; i < ARRAY_SIZE(arc_pmu_ev_hw_map); i++) {
|
||||
if (event_in_hw_event_map(i, str)) {
|
||||
pr_debug("mapping perf event %2d to h/w event \'%8s\' (idx %d)\n",
|
||||
i, str, j);
|
||||
arc_pmu->ev_hw_idx[i] = j;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
static int arc_pmu_device_probe(struct platform_device *pdev)
|
||||
{
|
||||
struct arc_reg_pct_build pct_bcr;
|
||||
struct arc_reg_cc_build cc_bcr;
|
||||
int i, j, has_interrupts;
|
||||
int i, has_interrupts;
|
||||
int counter_size; /* in bits */
|
||||
|
||||
union cc_name {
|
||||
struct {
|
||||
uint32_t word0, word1;
|
||||
u32 word0, word1;
|
||||
char sentinel;
|
||||
} indiv;
|
||||
char str[9];
|
||||
char str[ARCPMU_EVENT_NAME_LEN];
|
||||
} cc_name;
|
||||
|
||||
|
||||
|
@ -463,15 +580,22 @@ static int arc_pmu_device_probe(struct platform_device *pdev)
|
|||
return -ENODEV;
|
||||
}
|
||||
BUILD_BUG_ON(ARC_PERF_MAX_COUNTERS > 32);
|
||||
BUG_ON(pct_bcr.c > ARC_PERF_MAX_COUNTERS);
|
||||
if (WARN_ON(pct_bcr.c > ARC_PERF_MAX_COUNTERS))
|
||||
return -EINVAL;
|
||||
|
||||
READ_BCR(ARC_REG_CC_BUILD, cc_bcr);
|
||||
BUG_ON(!cc_bcr.v); /* Counters exist but No countable conditions ? */
|
||||
if (WARN(!cc_bcr.v, "Counters exist but No countable conditions?"))
|
||||
return -EINVAL;
|
||||
|
||||
arc_pmu = devm_kzalloc(&pdev->dev, sizeof(struct arc_pmu), GFP_KERNEL);
|
||||
if (!arc_pmu)
|
||||
return -ENOMEM;
|
||||
|
||||
arc_pmu->n_events = cc_bcr.c;
|
||||
|
||||
if (arc_pmu_raw_alloc(&pdev->dev))
|
||||
return -ENOMEM;
|
||||
|
||||
has_interrupts = is_isa_arcv2() ? pct_bcr.i : 0;
|
||||
|
||||
arc_pmu->n_counters = pct_bcr.c;
|
||||
|
@ -481,30 +605,26 @@ static int arc_pmu_device_probe(struct platform_device *pdev)
|
|||
|
||||
pr_info("ARC perf\t: %d counters (%d bits), %d conditions%s\n",
|
||||
arc_pmu->n_counters, counter_size, cc_bcr.c,
|
||||
has_interrupts ? ", [overflow IRQ support]":"");
|
||||
has_interrupts ? ", [overflow IRQ support]" : "");
|
||||
|
||||
cc_name.str[8] = 0;
|
||||
cc_name.str[ARCPMU_EVENT_NAME_LEN - 1] = 0;
|
||||
for (i = 0; i < PERF_COUNT_ARC_HW_MAX; i++)
|
||||
arc_pmu->ev_hw_idx[i] = -1;
|
||||
|
||||
/* loop thru all available h/w condition indexes */
|
||||
for (j = 0; j < cc_bcr.c; j++) {
|
||||
write_aux_reg(ARC_REG_CC_INDEX, j);
|
||||
for (i = 0; i < cc_bcr.c; i++) {
|
||||
write_aux_reg(ARC_REG_CC_INDEX, i);
|
||||
cc_name.indiv.word0 = read_aux_reg(ARC_REG_CC_NAME0);
|
||||
cc_name.indiv.word1 = read_aux_reg(ARC_REG_CC_NAME1);
|
||||
|
||||
/* See if it has been mapped to a perf event_id */
|
||||
for (i = 0; i < ARRAY_SIZE(arc_pmu_ev_hw_map); i++) {
|
||||
if (arc_pmu_ev_hw_map[i] &&
|
||||
!strcmp(arc_pmu_ev_hw_map[i], cc_name.str) &&
|
||||
strlen(arc_pmu_ev_hw_map[i])) {
|
||||
pr_debug("mapping perf event %2d to h/w event \'%8s\' (idx %d)\n",
|
||||
i, cc_name.str, j);
|
||||
arc_pmu->ev_hw_idx[i] = j;
|
||||
}
|
||||
}
|
||||
arc_pmu_map_hw_event(i, cc_name.str);
|
||||
arc_pmu_add_raw_event_attr(i, cc_name.str);
|
||||
}
|
||||
|
||||
arc_pmu_events_attr_gr.attrs = arc_pmu->attrs;
|
||||
arc_pmu->attr_groups[ARCPMU_ATTR_GR_EVENTS] = &arc_pmu_events_attr_gr;
|
||||
arc_pmu->attr_groups[ARCPMU_ATTR_GR_FORMATS] = &arc_pmu_format_attr_gr;
|
||||
|
||||
arc_pmu->pmu = (struct pmu) {
|
||||
.pmu_enable = arc_pmu_enable,
|
||||
.pmu_disable = arc_pmu_disable,
|
||||
|
@ -514,6 +634,7 @@ static int arc_pmu_device_probe(struct platform_device *pdev)
|
|||
.start = arc_pmu_start,
|
||||
.stop = arc_pmu_stop,
|
||||
.read = arc_pmu_read,
|
||||
.attr_groups = arc_pmu->attr_groups,
|
||||
};
|
||||
|
||||
if (has_interrupts) {
|
||||
|
@ -535,17 +656,19 @@ static int arc_pmu_device_probe(struct platform_device *pdev)
|
|||
} else
|
||||
arc_pmu->pmu.capabilities |= PERF_PMU_CAP_NO_INTERRUPT;
|
||||
|
||||
return perf_pmu_register(&arc_pmu->pmu, pdev->name, PERF_TYPE_RAW);
|
||||
/*
|
||||
* perf parser doesn't really like '-' symbol in events name, so let's
|
||||
* use '_' in arc pct name as it goes to kernel PMU event prefix.
|
||||
*/
|
||||
return perf_pmu_register(&arc_pmu->pmu, "arc_pct", PERF_TYPE_RAW);
|
||||
}
|
||||
|
||||
#ifdef CONFIG_OF
|
||||
static const struct of_device_id arc_pmu_match[] = {
|
||||
{ .compatible = "snps,arc700-pct" },
|
||||
{ .compatible = "snps,archs-pct" },
|
||||
{},
|
||||
};
|
||||
MODULE_DEVICE_TABLE(of, arc_pmu_match);
|
||||
#endif
|
||||
|
||||
static struct platform_driver arc_pmu_driver = {
|
||||
.driver = {
|
||||
|
|
|
@ -123,6 +123,7 @@ static void read_arc_build_cfg_regs(void)
|
|||
struct cpuinfo_arc *cpu = &cpuinfo_arc700[smp_processor_id()];
|
||||
const struct id_to_str *tbl;
|
||||
struct bcr_isa_arcv2 isa;
|
||||
struct bcr_actionpoint ap;
|
||||
|
||||
FIX_PTR(cpu);
|
||||
|
||||
|
@ -195,6 +196,7 @@ static void read_arc_build_cfg_regs(void)
|
|||
cpu->bpu.full = bpu.ft;
|
||||
cpu->bpu.num_cache = 256 << bpu.bce;
|
||||
cpu->bpu.num_pred = 2048 << bpu.pte;
|
||||
cpu->bpu.ret_stk = 4 << bpu.rse;
|
||||
|
||||
if (cpu->core.family >= 0x54) {
|
||||
unsigned int exec_ctrl;
|
||||
|
@ -207,8 +209,11 @@ static void read_arc_build_cfg_regs(void)
|
|||
}
|
||||
}
|
||||
|
||||
READ_BCR(ARC_REG_AP_BCR, bcr);
|
||||
cpu->extn.ap = bcr.ver ? 1 : 0;
|
||||
READ_BCR(ARC_REG_AP_BCR, ap);
|
||||
if (ap.ver) {
|
||||
cpu->extn.ap_num = 2 << ap.num;
|
||||
cpu->extn.ap_full = !!ap.min;
|
||||
}
|
||||
|
||||
READ_BCR(ARC_REG_SMART_BCR, bcr);
|
||||
cpu->extn.smart = bcr.ver ? 1 : 0;
|
||||
|
@ -216,8 +221,6 @@ static void read_arc_build_cfg_regs(void)
|
|||
READ_BCR(ARC_REG_RTT_BCR, bcr);
|
||||
cpu->extn.rtt = bcr.ver ? 1 : 0;
|
||||
|
||||
cpu->extn.debug = cpu->extn.ap | cpu->extn.smart | cpu->extn.rtt;
|
||||
|
||||
READ_BCR(ARC_REG_ISA_CFG_BCR, isa);
|
||||
|
||||
/* some hacks for lack of feature BCR info in old ARC700 cores */
|
||||
|
@ -299,10 +302,10 @@ static char *arc_cpu_mumbojumbo(int cpu_id, char *buf, int len)
|
|||
|
||||
if (cpu->bpu.ver)
|
||||
n += scnprintf(buf + n, len - n,
|
||||
"BPU\t\t: %s%s match, cache:%d, Predict Table:%d",
|
||||
"BPU\t\t: %s%s match, cache:%d, Predict Table:%d Return stk: %d",
|
||||
IS_AVAIL1(cpu->bpu.full, "full"),
|
||||
IS_AVAIL1(!cpu->bpu.full, "partial"),
|
||||
cpu->bpu.num_cache, cpu->bpu.num_pred);
|
||||
cpu->bpu.num_cache, cpu->bpu.num_pred, cpu->bpu.ret_stk);
|
||||
|
||||
if (is_isa_arcv2()) {
|
||||
struct bcr_lpb lpb;
|
||||
|
@ -336,11 +339,17 @@ static char *arc_extn_mumbojumbo(int cpu_id, char *buf, int len)
|
|||
IS_AVAIL1(cpu->extn.fpu_sp, "SP "),
|
||||
IS_AVAIL1(cpu->extn.fpu_dp, "DP "));
|
||||
|
||||
if (cpu->extn.debug)
|
||||
n += scnprintf(buf + n, len - n, "DEBUG\t\t: %s%s%s\n",
|
||||
IS_AVAIL1(cpu->extn.ap, "ActionPoint "),
|
||||
if (cpu->extn.ap_num | cpu->extn.smart | cpu->extn.rtt) {
|
||||
n += scnprintf(buf + n, len - n, "DEBUG\t\t: %s%s",
|
||||
IS_AVAIL1(cpu->extn.smart, "smaRT "),
|
||||
IS_AVAIL1(cpu->extn.rtt, "RTT "));
|
||||
if (cpu->extn.ap_num) {
|
||||
n += scnprintf(buf + n, len - n, "ActionPoint %d/%s",
|
||||
cpu->extn.ap_num,
|
||||
cpu->extn.ap_full ? "full":"min");
|
||||
}
|
||||
n += scnprintf(buf + n, len - n, "\n");
|
||||
}
|
||||
|
||||
if (cpu->dccm.sz || cpu->iccm.sz)
|
||||
n += scnprintf(buf + n, len - n, "Extn [CCM]\t: DCCM @ %x, %d KB / ICCM: @ %x, %d KB\n",
|
||||
|
|
|
@ -18,6 +18,8 @@
|
|||
#include <asm/arcregs.h>
|
||||
#include <asm/irqflags.h>
|
||||
|
||||
#define ARC_PATH_MAX 256
|
||||
|
||||
/*
|
||||
* Common routine to print scratch regs (r0-r12) or callee regs (r13-r25)
|
||||
* -Prints 3 regs per line and a CR.
|
||||
|
@ -58,11 +60,12 @@ static void show_callee_regs(struct callee_regs *cregs)
|
|||
print_reg_file(&(cregs->r13), 13);
|
||||
}
|
||||
|
||||
static void print_task_path_n_nm(struct task_struct *tsk, char *buf)
|
||||
static void print_task_path_n_nm(struct task_struct *tsk)
|
||||
{
|
||||
char *path_nm = NULL;
|
||||
struct mm_struct *mm;
|
||||
struct file *exe_file;
|
||||
char buf[ARC_PATH_MAX];
|
||||
|
||||
mm = get_task_mm(tsk);
|
||||
if (!mm)
|
||||
|
@ -72,7 +75,7 @@ static void print_task_path_n_nm(struct task_struct *tsk, char *buf)
|
|||
mmput(mm);
|
||||
|
||||
if (exe_file) {
|
||||
path_nm = file_path(exe_file, buf, 255);
|
||||
path_nm = file_path(exe_file, buf, ARC_PATH_MAX-1);
|
||||
fput(exe_file);
|
||||
}
|
||||
|
||||
|
@ -80,10 +83,9 @@ done:
|
|||
pr_info("Path: %s\n", !IS_ERR(path_nm) ? path_nm : "?");
|
||||
}
|
||||
|
||||
static void show_faulting_vma(unsigned long address, char *buf)
|
||||
static void show_faulting_vma(unsigned long address)
|
||||
{
|
||||
struct vm_area_struct *vma;
|
||||
char *nm = buf;
|
||||
struct mm_struct *active_mm = current->active_mm;
|
||||
|
||||
/* can't use print_vma_addr() yet as it doesn't check for
|
||||
|
@ -96,8 +98,11 @@ static void show_faulting_vma(unsigned long address, char *buf)
|
|||
* if the container VMA is not found
|
||||
*/
|
||||
if (vma && (vma->vm_start <= address)) {
|
||||
char buf[ARC_PATH_MAX];
|
||||
char *nm = "?";
|
||||
|
||||
if (vma->vm_file) {
|
||||
nm = file_path(vma->vm_file, buf, PAGE_SIZE - 1);
|
||||
nm = file_path(vma->vm_file, buf, ARC_PATH_MAX-1);
|
||||
if (IS_ERR(nm))
|
||||
nm = "?";
|
||||
}
|
||||
|
@ -173,13 +178,14 @@ void show_regs(struct pt_regs *regs)
|
|||
{
|
||||
struct task_struct *tsk = current;
|
||||
struct callee_regs *cregs;
|
||||
char *buf;
|
||||
|
||||
buf = (char *)__get_free_page(GFP_KERNEL);
|
||||
if (!buf)
|
||||
return;
|
||||
/*
|
||||
* generic code calls us with preemption disabled, but some calls
|
||||
* here could sleep, so re-enable to avoid lockdep splat
|
||||
*/
|
||||
preempt_enable();
|
||||
|
||||
print_task_path_n_nm(tsk, buf);
|
||||
print_task_path_n_nm(tsk);
|
||||
show_regs_print_info(KERN_INFO);
|
||||
|
||||
show_ecr_verbose(regs);
|
||||
|
@ -189,7 +195,7 @@ void show_regs(struct pt_regs *regs)
|
|||
(void *)regs->blink, (void *)regs->ret);
|
||||
|
||||
if (user_mode(regs))
|
||||
show_faulting_vma(regs->ret, buf); /* faulting code, not data */
|
||||
show_faulting_vma(regs->ret); /* faulting code, not data */
|
||||
|
||||
pr_info("[STAT32]: 0x%08lx", regs->status32);
|
||||
|
||||
|
@ -222,7 +228,7 @@ void show_regs(struct pt_regs *regs)
|
|||
if (cregs)
|
||||
show_callee_regs(cregs);
|
||||
|
||||
free_page((unsigned long)buf);
|
||||
preempt_disable();
|
||||
}
|
||||
|
||||
void show_kernel_fault_diag(const char *str, struct pt_regs *regs,
|
||||
|
|
|
@ -7,11 +7,39 @@
|
|||
*/
|
||||
|
||||
#include <linux/linkage.h>
|
||||
#include <asm/cache.h>
|
||||
|
||||
#undef PREALLOC_NOT_AVAIL
|
||||
/*
|
||||
* The memset implementation below is optimized to use prefetchw and prealloc
|
||||
* instruction in case of CPU with 64B L1 data cache line (L1_CACHE_SHIFT == 6)
|
||||
* If you want to implement optimized memset for other possible L1 data cache
|
||||
* line lengths (32B and 128B) you should rewrite code carefully checking
|
||||
* we don't call any prefetchw/prealloc instruction for L1 cache lines which
|
||||
* don't belongs to memset area.
|
||||
*/
|
||||
|
||||
#if L1_CACHE_SHIFT == 6
|
||||
|
||||
.macro PREALLOC_INSTR reg, off
|
||||
prealloc [\reg, \off]
|
||||
.endm
|
||||
|
||||
.macro PREFETCHW_INSTR reg, off
|
||||
prefetchw [\reg, \off]
|
||||
.endm
|
||||
|
||||
#else
|
||||
|
||||
.macro PREALLOC_INSTR
|
||||
.endm
|
||||
|
||||
.macro PREFETCHW_INSTR
|
||||
.endm
|
||||
|
||||
#endif
|
||||
|
||||
ENTRY_CFI(memset)
|
||||
prefetchw [r0] ; Prefetch the write location
|
||||
PREFETCHW_INSTR r0, 0 ; Prefetch the first write location
|
||||
mov.f 0, r2
|
||||
;;; if size is zero
|
||||
jz.d [blink]
|
||||
|
@ -48,11 +76,8 @@ ENTRY_CFI(memset)
|
|||
|
||||
lpnz @.Lset64bytes
|
||||
;; LOOP START
|
||||
#ifdef PREALLOC_NOT_AVAIL
|
||||
prefetchw [r3, 64] ;Prefetch the next write location
|
||||
#else
|
||||
prealloc [r3, 64]
|
||||
#endif
|
||||
PREALLOC_INSTR r3, 64 ; alloc next line w/o fetching
|
||||
|
||||
#ifdef CONFIG_ARC_HAS_LL64
|
||||
std.ab r4, [r3, 8]
|
||||
std.ab r4, [r3, 8]
|
||||
|
@ -85,7 +110,6 @@ ENTRY_CFI(memset)
|
|||
lsr.f lp_count, r2, 5 ;Last remaining max 124 bytes
|
||||
lpnz .Lset32bytes
|
||||
;; LOOP START
|
||||
prefetchw [r3, 32] ;Prefetch the next write location
|
||||
#ifdef CONFIG_ARC_HAS_LL64
|
||||
std.ab r4, [r3, 8]
|
||||
std.ab r4, [r3, 8]
|
||||
|
|
|
@ -141,12 +141,17 @@ good_area:
|
|||
*/
|
||||
fault = handle_mm_fault(vma, address, flags);
|
||||
|
||||
/* If Pagefault was interrupted by SIGKILL, exit page fault "early" */
|
||||
if (fatal_signal_pending(current)) {
|
||||
if ((fault & VM_FAULT_ERROR) && !(fault & VM_FAULT_RETRY))
|
||||
up_read(&mm->mmap_sem);
|
||||
if (user_mode(regs))
|
||||
|
||||
/*
|
||||
* if fault retry, mmap_sem already relinquished by core mm
|
||||
* so OK to return to user mode (with signal handled first)
|
||||
*/
|
||||
if (fault & VM_FAULT_RETRY) {
|
||||
if (!user_mode(regs))
|
||||
goto no_context;
|
||||
return;
|
||||
}
|
||||
}
|
||||
|
||||
perf_sw_event(PERF_COUNT_SW_PAGE_FAULTS, 1, regs, address);
|
||||
|
|
|
@ -119,7 +119,8 @@ void __init setup_arch_memory(void)
|
|||
*/
|
||||
|
||||
memblock_add_node(low_mem_start, low_mem_sz, 0);
|
||||
memblock_reserve(low_mem_start, __pa(_end) - low_mem_start);
|
||||
memblock_reserve(CONFIG_LINUX_LINK_BASE,
|
||||
__pa(_end) - CONFIG_LINUX_LINK_BASE);
|
||||
|
||||
#ifdef CONFIG_BLK_DEV_INITRD
|
||||
if (phys_initrd_size) {
|
||||
|
|
|
@ -706,6 +706,7 @@
|
|||
fsl,tmr-fiper1 = <999999995>;
|
||||
fsl,tmr-fiper2 = <99990>;
|
||||
fsl,max-adj = <499999999>;
|
||||
fsl,extts-fifo;
|
||||
};
|
||||
|
||||
enet0: ethernet@2d10000 {
|
||||
|
|
|
@ -1 +1,95 @@
|
|||
/* SPDX-License-Identifier: GPL-2.0 */
|
||||
#ifndef _ASM_ARM_XEN_PAGE_COHERENT_H
|
||||
#define _ASM_ARM_XEN_PAGE_COHERENT_H
|
||||
|
||||
#include <linux/dma-mapping.h>
|
||||
#include <asm/page.h>
|
||||
#include <xen/arm/page-coherent.h>
|
||||
|
||||
static inline const struct dma_map_ops *xen_get_dma_ops(struct device *dev)
|
||||
{
|
||||
if (dev && dev->archdata.dev_dma_ops)
|
||||
return dev->archdata.dev_dma_ops;
|
||||
return get_arch_dma_ops(NULL);
|
||||
}
|
||||
|
||||
static inline void *xen_alloc_coherent_pages(struct device *hwdev, size_t size,
|
||||
dma_addr_t *dma_handle, gfp_t flags, unsigned long attrs)
|
||||
{
|
||||
return xen_get_dma_ops(hwdev)->alloc(hwdev, size, dma_handle, flags, attrs);
|
||||
}
|
||||
|
||||
static inline void xen_free_coherent_pages(struct device *hwdev, size_t size,
|
||||
void *cpu_addr, dma_addr_t dma_handle, unsigned long attrs)
|
||||
{
|
||||
xen_get_dma_ops(hwdev)->free(hwdev, size, cpu_addr, dma_handle, attrs);
|
||||
}
|
||||
|
||||
static inline void xen_dma_map_page(struct device *hwdev, struct page *page,
|
||||
dma_addr_t dev_addr, unsigned long offset, size_t size,
|
||||
enum dma_data_direction dir, unsigned long attrs)
|
||||
{
|
||||
unsigned long page_pfn = page_to_xen_pfn(page);
|
||||
unsigned long dev_pfn = XEN_PFN_DOWN(dev_addr);
|
||||
unsigned long compound_pages =
|
||||
(1<<compound_order(page)) * XEN_PFN_PER_PAGE;
|
||||
bool local = (page_pfn <= dev_pfn) &&
|
||||
(dev_pfn - page_pfn < compound_pages);
|
||||
|
||||
/*
|
||||
* Dom0 is mapped 1:1, while the Linux page can span across
|
||||
* multiple Xen pages, it's not possible for it to contain a
|
||||
* mix of local and foreign Xen pages. So if the first xen_pfn
|
||||
* == mfn the page is local otherwise it's a foreign page
|
||||
* grant-mapped in dom0. If the page is local we can safely
|
||||
* call the native dma_ops function, otherwise we call the xen
|
||||
* specific function.
|
||||
*/
|
||||
if (local)
|
||||
xen_get_dma_ops(hwdev)->map_page(hwdev, page, offset, size, dir, attrs);
|
||||
else
|
||||
__xen_dma_map_page(hwdev, page, dev_addr, offset, size, dir, attrs);
|
||||
}
|
||||
|
||||
static inline void xen_dma_unmap_page(struct device *hwdev, dma_addr_t handle,
|
||||
size_t size, enum dma_data_direction dir, unsigned long attrs)
|
||||
{
|
||||
unsigned long pfn = PFN_DOWN(handle);
|
||||
/*
|
||||
* Dom0 is mapped 1:1, while the Linux page can be spanned accross
|
||||
* multiple Xen page, it's not possible to have a mix of local and
|
||||
* foreign Xen page. Dom0 is mapped 1:1, so calling pfn_valid on a
|
||||
* foreign mfn will always return false. If the page is local we can
|
||||
* safely call the native dma_ops function, otherwise we call the xen
|
||||
* specific function.
|
||||
*/
|
||||
if (pfn_valid(pfn)) {
|
||||
if (xen_get_dma_ops(hwdev)->unmap_page)
|
||||
xen_get_dma_ops(hwdev)->unmap_page(hwdev, handle, size, dir, attrs);
|
||||
} else
|
||||
__xen_dma_unmap_page(hwdev, handle, size, dir, attrs);
|
||||
}
|
||||
|
||||
static inline void xen_dma_sync_single_for_cpu(struct device *hwdev,
|
||||
dma_addr_t handle, size_t size, enum dma_data_direction dir)
|
||||
{
|
||||
unsigned long pfn = PFN_DOWN(handle);
|
||||
if (pfn_valid(pfn)) {
|
||||
if (xen_get_dma_ops(hwdev)->sync_single_for_cpu)
|
||||
xen_get_dma_ops(hwdev)->sync_single_for_cpu(hwdev, handle, size, dir);
|
||||
} else
|
||||
__xen_dma_sync_single_for_cpu(hwdev, handle, size, dir);
|
||||
}
|
||||
|
||||
static inline void xen_dma_sync_single_for_device(struct device *hwdev,
|
||||
dma_addr_t handle, size_t size, enum dma_data_direction dir)
|
||||
{
|
||||
unsigned long pfn = PFN_DOWN(handle);
|
||||
if (pfn_valid(pfn)) {
|
||||
if (xen_get_dma_ops(hwdev)->sync_single_for_device)
|
||||
xen_get_dma_ops(hwdev)->sync_single_for_device(hwdev, handle, size, dir);
|
||||
} else
|
||||
__xen_dma_sync_single_for_device(hwdev, handle, size, dir);
|
||||
}
|
||||
|
||||
#endif /* _ASM_ARM_XEN_PAGE_COHERENT_H */
|
||||
|
|
|
@ -1083,12 +1083,17 @@ static inline void emit_ldx_r(const s8 dst[], const s8 src,
|
|||
|
||||
/* Arithmatic Operation */
|
||||
static inline void emit_ar_r(const u8 rd, const u8 rt, const u8 rm,
|
||||
const u8 rn, struct jit_ctx *ctx, u8 op) {
|
||||
const u8 rn, struct jit_ctx *ctx, u8 op,
|
||||
bool is_jmp64) {
|
||||
switch (op) {
|
||||
case BPF_JSET:
|
||||
emit(ARM_AND_R(ARM_IP, rt, rn), ctx);
|
||||
emit(ARM_AND_R(ARM_LR, rd, rm), ctx);
|
||||
emit(ARM_ORRS_R(ARM_IP, ARM_LR, ARM_IP), ctx);
|
||||
if (is_jmp64) {
|
||||
emit(ARM_AND_R(ARM_IP, rt, rn), ctx);
|
||||
emit(ARM_AND_R(ARM_LR, rd, rm), ctx);
|
||||
emit(ARM_ORRS_R(ARM_IP, ARM_LR, ARM_IP), ctx);
|
||||
} else {
|
||||
emit(ARM_ANDS_R(ARM_IP, rt, rn), ctx);
|
||||
}
|
||||
break;
|
||||
case BPF_JEQ:
|
||||
case BPF_JNE:
|
||||
|
@ -1096,18 +1101,25 @@ static inline void emit_ar_r(const u8 rd, const u8 rt, const u8 rm,
|
|||
case BPF_JGE:
|
||||
case BPF_JLE:
|
||||
case BPF_JLT:
|
||||
emit(ARM_CMP_R(rd, rm), ctx);
|
||||
_emit(ARM_COND_EQ, ARM_CMP_R(rt, rn), ctx);
|
||||
if (is_jmp64) {
|
||||
emit(ARM_CMP_R(rd, rm), ctx);
|
||||
/* Only compare low halve if high halve are equal. */
|
||||
_emit(ARM_COND_EQ, ARM_CMP_R(rt, rn), ctx);
|
||||
} else {
|
||||
emit(ARM_CMP_R(rt, rn), ctx);
|
||||
}
|
||||
break;
|
||||
case BPF_JSLE:
|
||||
case BPF_JSGT:
|
||||
emit(ARM_CMP_R(rn, rt), ctx);
|
||||
emit(ARM_SBCS_R(ARM_IP, rm, rd), ctx);
|
||||
if (is_jmp64)
|
||||
emit(ARM_SBCS_R(ARM_IP, rm, rd), ctx);
|
||||
break;
|
||||
case BPF_JSLT:
|
||||
case BPF_JSGE:
|
||||
emit(ARM_CMP_R(rt, rn), ctx);
|
||||
emit(ARM_SBCS_R(ARM_IP, rd, rm), ctx);
|
||||
if (is_jmp64)
|
||||
emit(ARM_SBCS_R(ARM_IP, rd, rm), ctx);
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
@ -1615,6 +1627,17 @@ exit:
|
|||
case BPF_JMP | BPF_JLT | BPF_X:
|
||||
case BPF_JMP | BPF_JSLT | BPF_X:
|
||||
case BPF_JMP | BPF_JSLE | BPF_X:
|
||||
case BPF_JMP32 | BPF_JEQ | BPF_X:
|
||||
case BPF_JMP32 | BPF_JGT | BPF_X:
|
||||
case BPF_JMP32 | BPF_JGE | BPF_X:
|
||||
case BPF_JMP32 | BPF_JNE | BPF_X:
|
||||
case BPF_JMP32 | BPF_JSGT | BPF_X:
|
||||
case BPF_JMP32 | BPF_JSGE | BPF_X:
|
||||
case BPF_JMP32 | BPF_JSET | BPF_X:
|
||||
case BPF_JMP32 | BPF_JLE | BPF_X:
|
||||
case BPF_JMP32 | BPF_JLT | BPF_X:
|
||||
case BPF_JMP32 | BPF_JSLT | BPF_X:
|
||||
case BPF_JMP32 | BPF_JSLE | BPF_X:
|
||||
/* Setup source registers */
|
||||
rm = arm_bpf_get_reg32(src_hi, tmp2[0], ctx);
|
||||
rn = arm_bpf_get_reg32(src_lo, tmp2[1], ctx);
|
||||
|
@ -1641,6 +1664,17 @@ exit:
|
|||
case BPF_JMP | BPF_JLE | BPF_K:
|
||||
case BPF_JMP | BPF_JSLT | BPF_K:
|
||||
case BPF_JMP | BPF_JSLE | BPF_K:
|
||||
case BPF_JMP32 | BPF_JEQ | BPF_K:
|
||||
case BPF_JMP32 | BPF_JGT | BPF_K:
|
||||
case BPF_JMP32 | BPF_JGE | BPF_K:
|
||||
case BPF_JMP32 | BPF_JNE | BPF_K:
|
||||
case BPF_JMP32 | BPF_JSGT | BPF_K:
|
||||
case BPF_JMP32 | BPF_JSGE | BPF_K:
|
||||
case BPF_JMP32 | BPF_JSET | BPF_K:
|
||||
case BPF_JMP32 | BPF_JLT | BPF_K:
|
||||
case BPF_JMP32 | BPF_JLE | BPF_K:
|
||||
case BPF_JMP32 | BPF_JSLT | BPF_K:
|
||||
case BPF_JMP32 | BPF_JSLE | BPF_K:
|
||||
if (off == 0)
|
||||
break;
|
||||
rm = tmp2[0];
|
||||
|
@ -1652,7 +1686,8 @@ go_jmp:
|
|||
rd = arm_bpf_get_reg64(dst, tmp, ctx);
|
||||
|
||||
/* Check for the condition */
|
||||
emit_ar_r(rd[0], rd[1], rm, rn, ctx, BPF_OP(code));
|
||||
emit_ar_r(rd[0], rd[1], rm, rn, ctx, BPF_OP(code),
|
||||
BPF_CLASS(code) == BPF_JMP);
|
||||
|
||||
/* Setup JUMP instruction */
|
||||
jmp_offset = bpf2a32_offset(i+off, i, ctx);
|
||||
|
|
|
@ -62,6 +62,7 @@
|
|||
#define ARM_INST_ADDS_I 0x02900000
|
||||
|
||||
#define ARM_INST_AND_R 0x00000000
|
||||
#define ARM_INST_ANDS_R 0x00100000
|
||||
#define ARM_INST_AND_I 0x02000000
|
||||
|
||||
#define ARM_INST_BIC_R 0x01c00000
|
||||
|
@ -172,6 +173,7 @@
|
|||
#define ARM_ADC_I(rd, rn, imm) _AL3_I(ARM_INST_ADC, rd, rn, imm)
|
||||
|
||||
#define ARM_AND_R(rd, rn, rm) _AL3_R(ARM_INST_AND, rd, rn, rm)
|
||||
#define ARM_ANDS_R(rd, rn, rm) _AL3_R(ARM_INST_ANDS, rd, rn, rm)
|
||||
#define ARM_AND_I(rd, rn, imm) _AL3_I(ARM_INST_AND, rd, rn, imm)
|
||||
|
||||
#define ARM_BIC_R(rd, rn, rm) _AL3_R(ARM_INST_BIC, rd, rn, rm)
|
||||
|
|
|
@ -60,8 +60,6 @@
|
|||
|
||||
#ifdef CONFIG_KASAN_SW_TAGS
|
||||
#define ARCH_SLAB_MINALIGN (1ULL << KASAN_SHADOW_SCALE_SHIFT)
|
||||
#else
|
||||
#define ARCH_SLAB_MINALIGN __alignof__(unsigned long long)
|
||||
#endif
|
||||
|
||||
#ifndef __ASSEMBLY__
|
||||
|
|
|
@ -20,9 +20,6 @@ struct dev_archdata {
|
|||
#ifdef CONFIG_IOMMU_API
|
||||
void *iommu; /* private IOMMU data */
|
||||
#endif
|
||||
#ifdef CONFIG_XEN
|
||||
const struct dma_map_ops *dev_dma_ops;
|
||||
#endif
|
||||
};
|
||||
|
||||
struct pdev_archdata {
|
||||
|
|
|
@ -60,8 +60,11 @@ static inline bool arm64_kernel_use_ng_mappings(void)
|
|||
* later determine that kpti is required, then
|
||||
* kpti_install_ng_mappings() will make them non-global.
|
||||
*/
|
||||
if (arm64_kernel_unmapped_at_el0())
|
||||
return true;
|
||||
|
||||
if (!IS_ENABLED(CONFIG_RANDOMIZE_BASE))
|
||||
return arm64_kernel_unmapped_at_el0();
|
||||
return false;
|
||||
|
||||
/*
|
||||
* KASLR is enabled so we're going to be enabling kpti on non-broken
|
||||
|
|
|
@ -1 +1,77 @@
|
|||
/* SPDX-License-Identifier: GPL-2.0 */
|
||||
#ifndef _ASM_ARM64_XEN_PAGE_COHERENT_H
|
||||
#define _ASM_ARM64_XEN_PAGE_COHERENT_H
|
||||
|
||||
#include <linux/dma-mapping.h>
|
||||
#include <asm/page.h>
|
||||
#include <xen/arm/page-coherent.h>
|
||||
|
||||
static inline void *xen_alloc_coherent_pages(struct device *hwdev, size_t size,
|
||||
dma_addr_t *dma_handle, gfp_t flags, unsigned long attrs)
|
||||
{
|
||||
return dma_direct_alloc(hwdev, size, dma_handle, flags, attrs);
|
||||
}
|
||||
|
||||
static inline void xen_free_coherent_pages(struct device *hwdev, size_t size,
|
||||
void *cpu_addr, dma_addr_t dma_handle, unsigned long attrs)
|
||||
{
|
||||
dma_direct_free(hwdev, size, cpu_addr, dma_handle, attrs);
|
||||
}
|
||||
|
||||
static inline void xen_dma_sync_single_for_cpu(struct device *hwdev,
|
||||
dma_addr_t handle, size_t size, enum dma_data_direction dir)
|
||||
{
|
||||
unsigned long pfn = PFN_DOWN(handle);
|
||||
|
||||
if (pfn_valid(pfn))
|
||||
dma_direct_sync_single_for_cpu(hwdev, handle, size, dir);
|
||||
else
|
||||
__xen_dma_sync_single_for_cpu(hwdev, handle, size, dir);
|
||||
}
|
||||
|
||||
static inline void xen_dma_sync_single_for_device(struct device *hwdev,
|
||||
dma_addr_t handle, size_t size, enum dma_data_direction dir)
|
||||
{
|
||||
unsigned long pfn = PFN_DOWN(handle);
|
||||
if (pfn_valid(pfn))
|
||||
dma_direct_sync_single_for_device(hwdev, handle, size, dir);
|
||||
else
|
||||
__xen_dma_sync_single_for_device(hwdev, handle, size, dir);
|
||||
}
|
||||
|
||||
static inline void xen_dma_map_page(struct device *hwdev, struct page *page,
|
||||
dma_addr_t dev_addr, unsigned long offset, size_t size,
|
||||
enum dma_data_direction dir, unsigned long attrs)
|
||||
{
|
||||
unsigned long page_pfn = page_to_xen_pfn(page);
|
||||
unsigned long dev_pfn = XEN_PFN_DOWN(dev_addr);
|
||||
unsigned long compound_pages =
|
||||
(1<<compound_order(page)) * XEN_PFN_PER_PAGE;
|
||||
bool local = (page_pfn <= dev_pfn) &&
|
||||
(dev_pfn - page_pfn < compound_pages);
|
||||
|
||||
if (local)
|
||||
dma_direct_map_page(hwdev, page, offset, size, dir, attrs);
|
||||
else
|
||||
__xen_dma_map_page(hwdev, page, dev_addr, offset, size, dir, attrs);
|
||||
}
|
||||
|
||||
static inline void xen_dma_unmap_page(struct device *hwdev, dma_addr_t handle,
|
||||
size_t size, enum dma_data_direction dir, unsigned long attrs)
|
||||
{
|
||||
unsigned long pfn = PFN_DOWN(handle);
|
||||
/*
|
||||
* Dom0 is mapped 1:1, while the Linux page can be spanned accross
|
||||
* multiple Xen page, it's not possible to have a mix of local and
|
||||
* foreign Xen page. Dom0 is mapped 1:1, so calling pfn_valid on a
|
||||
* foreign mfn will always return false. If the page is local we can
|
||||
* safely call the native dma_ops function, otherwise we call the xen
|
||||
* specific function.
|
||||
*/
|
||||
if (pfn_valid(pfn))
|
||||
dma_direct_unmap_page(hwdev, handle, size, dir, attrs);
|
||||
else
|
||||
__xen_dma_unmap_page(hwdev, handle, size, dir, attrs);
|
||||
}
|
||||
|
||||
#endif /* _ASM_ARM64_XEN_PAGE_COHERENT_H */
|
||||
|
|
|
@ -14,6 +14,7 @@
|
|||
#include <linux/sched.h>
|
||||
#include <linux/types.h>
|
||||
|
||||
#include <asm/cacheflush.h>
|
||||
#include <asm/fixmap.h>
|
||||
#include <asm/kernel-pgtable.h>
|
||||
#include <asm/memory.h>
|
||||
|
@ -43,7 +44,7 @@ static __init u64 get_kaslr_seed(void *fdt)
|
|||
return ret;
|
||||
}
|
||||
|
||||
static __init const u8 *get_cmdline(void *fdt)
|
||||
static __init const u8 *kaslr_get_cmdline(void *fdt)
|
||||
{
|
||||
static __initconst const u8 default_cmdline[] = CONFIG_CMDLINE;
|
||||
|
||||
|
@ -109,7 +110,7 @@ u64 __init kaslr_early_init(u64 dt_phys)
|
|||
* Check if 'nokaslr' appears on the command line, and
|
||||
* return 0 if that is the case.
|
||||
*/
|
||||
cmdline = get_cmdline(fdt);
|
||||
cmdline = kaslr_get_cmdline(fdt);
|
||||
str = strstr(cmdline, "nokaslr");
|
||||
if (str == cmdline || (str > cmdline && *(str - 1) == ' '))
|
||||
return 0;
|
||||
|
@ -169,5 +170,8 @@ u64 __init kaslr_early_init(u64 dt_phys)
|
|||
module_alloc_base += (module_range * (seed & ((1 << 21) - 1))) >> 21;
|
||||
module_alloc_base &= PAGE_MASK;
|
||||
|
||||
__flush_dcache_area(&module_alloc_base, sizeof(module_alloc_base));
|
||||
__flush_dcache_area(&memstart_offset_seed, sizeof(memstart_offset_seed));
|
||||
|
||||
return offset;
|
||||
}
|
||||
|
|
|
@ -466,9 +466,7 @@ void arch_setup_dma_ops(struct device *dev, u64 dma_base, u64 size,
|
|||
__iommu_setup_dma_ops(dev, dma_base, size, iommu);
|
||||
|
||||
#ifdef CONFIG_XEN
|
||||
if (xen_initial_domain()) {
|
||||
dev->archdata.dev_dma_ops = dev->dma_ops;
|
||||
if (xen_initial_domain())
|
||||
dev->dma_ops = xen_dma_ops;
|
||||
}
|
||||
#endif
|
||||
}
|
||||
|
|
|
@ -362,7 +362,8 @@ static int build_insn(const struct bpf_insn *insn, struct jit_ctx *ctx,
|
|||
const s16 off = insn->off;
|
||||
const s32 imm = insn->imm;
|
||||
const int i = insn - ctx->prog->insnsi;
|
||||
const bool is64 = BPF_CLASS(code) == BPF_ALU64;
|
||||
const bool is64 = BPF_CLASS(code) == BPF_ALU64 ||
|
||||
BPF_CLASS(code) == BPF_JMP;
|
||||
const bool isdw = BPF_SIZE(code) == BPF_DW;
|
||||
u8 jmp_cond;
|
||||
s32 jmp_offset;
|
||||
|
@ -559,7 +560,17 @@ emit_bswap_uxt:
|
|||
case BPF_JMP | BPF_JSLT | BPF_X:
|
||||
case BPF_JMP | BPF_JSGE | BPF_X:
|
||||
case BPF_JMP | BPF_JSLE | BPF_X:
|
||||
emit(A64_CMP(1, dst, src), ctx);
|
||||
case BPF_JMP32 | BPF_JEQ | BPF_X:
|
||||
case BPF_JMP32 | BPF_JGT | BPF_X:
|
||||
case BPF_JMP32 | BPF_JLT | BPF_X:
|
||||
case BPF_JMP32 | BPF_JGE | BPF_X:
|
||||
case BPF_JMP32 | BPF_JLE | BPF_X:
|
||||
case BPF_JMP32 | BPF_JNE | BPF_X:
|
||||
case BPF_JMP32 | BPF_JSGT | BPF_X:
|
||||
case BPF_JMP32 | BPF_JSLT | BPF_X:
|
||||
case BPF_JMP32 | BPF_JSGE | BPF_X:
|
||||
case BPF_JMP32 | BPF_JSLE | BPF_X:
|
||||
emit(A64_CMP(is64, dst, src), ctx);
|
||||
emit_cond_jmp:
|
||||
jmp_offset = bpf2a64_offset(i + off, i, ctx);
|
||||
check_imm19(jmp_offset);
|
||||
|
@ -601,7 +612,8 @@ emit_cond_jmp:
|
|||
emit(A64_B_(jmp_cond, jmp_offset), ctx);
|
||||
break;
|
||||
case BPF_JMP | BPF_JSET | BPF_X:
|
||||
emit(A64_TST(1, dst, src), ctx);
|
||||
case BPF_JMP32 | BPF_JSET | BPF_X:
|
||||
emit(A64_TST(is64, dst, src), ctx);
|
||||
goto emit_cond_jmp;
|
||||
/* IF (dst COND imm) JUMP off */
|
||||
case BPF_JMP | BPF_JEQ | BPF_K:
|
||||
|
@ -614,12 +626,23 @@ emit_cond_jmp:
|
|||
case BPF_JMP | BPF_JSLT | BPF_K:
|
||||
case BPF_JMP | BPF_JSGE | BPF_K:
|
||||
case BPF_JMP | BPF_JSLE | BPF_K:
|
||||
emit_a64_mov_i(1, tmp, imm, ctx);
|
||||
emit(A64_CMP(1, dst, tmp), ctx);
|
||||
case BPF_JMP32 | BPF_JEQ | BPF_K:
|
||||
case BPF_JMP32 | BPF_JGT | BPF_K:
|
||||
case BPF_JMP32 | BPF_JLT | BPF_K:
|
||||
case BPF_JMP32 | BPF_JGE | BPF_K:
|
||||
case BPF_JMP32 | BPF_JLE | BPF_K:
|
||||
case BPF_JMP32 | BPF_JNE | BPF_K:
|
||||
case BPF_JMP32 | BPF_JSGT | BPF_K:
|
||||
case BPF_JMP32 | BPF_JSLT | BPF_K:
|
||||
case BPF_JMP32 | BPF_JSGE | BPF_K:
|
||||
case BPF_JMP32 | BPF_JSLE | BPF_K:
|
||||
emit_a64_mov_i(is64, tmp, imm, ctx);
|
||||
emit(A64_CMP(is64, dst, tmp), ctx);
|
||||
goto emit_cond_jmp;
|
||||
case BPF_JMP | BPF_JSET | BPF_K:
|
||||
emit_a64_mov_i(1, tmp, imm, ctx);
|
||||
emit(A64_TST(1, dst, tmp), ctx);
|
||||
case BPF_JMP32 | BPF_JSET | BPF_K:
|
||||
emit_a64_mov_i(is64, tmp, imm, ctx);
|
||||
emit(A64_TST(is64, dst, tmp), ctx);
|
||||
goto emit_cond_jmp;
|
||||
/* function call */
|
||||
case BPF_JMP | BPF_CALL:
|
||||
|
|
|
@ -37,8 +37,6 @@ libs-y += arch/$(ARCH)/lib/
|
|||
|
||||
boot := arch/h8300/boot
|
||||
|
||||
archmrproper:
|
||||
|
||||
archclean:
|
||||
$(Q)$(MAKE) $(clean)=$(boot)
|
||||
|
||||
|
|
|
@ -16,8 +16,6 @@ KBUILD_DEFCONFIG := generic_defconfig
|
|||
NM := $(CROSS_COMPILE)nm -B
|
||||
READELF := $(CROSS_COMPILE)readelf
|
||||
|
||||
export AWK
|
||||
|
||||
CHECKFLAGS += -D__ia64=1 -D__ia64__=1 -D_LP64 -D__LP64__
|
||||
|
||||
OBJCOPYFLAGS := --strip-all
|
||||
|
|
|
@ -3155,6 +3155,7 @@ config MIPS32_O32
|
|||
config MIPS32_N32
|
||||
bool "Kernel support for n32 binaries"
|
||||
depends on 64BIT
|
||||
select ARCH_WANT_COMPAT_IPC_PARSE_VERSION
|
||||
select COMPAT
|
||||
select MIPS32_COMPAT
|
||||
select SYSVIPC_COMPAT if SYSVIPC
|
||||
|
|
|
@ -173,6 +173,31 @@ void __init plat_mem_setup(void)
|
|||
pm_power_off = bcm47xx_machine_halt;
|
||||
}
|
||||
|
||||
#ifdef CONFIG_BCM47XX_BCMA
|
||||
static struct device * __init bcm47xx_setup_device(void)
|
||||
{
|
||||
struct device *dev;
|
||||
int err;
|
||||
|
||||
dev = kzalloc(sizeof(*dev), GFP_KERNEL);
|
||||
if (!dev)
|
||||
return NULL;
|
||||
|
||||
err = dev_set_name(dev, "bcm47xx_soc");
|
||||
if (err) {
|
||||
pr_err("Failed to set SoC device name: %d\n", err);
|
||||
kfree(dev);
|
||||
return NULL;
|
||||
}
|
||||
|
||||
err = dma_coerce_mask_and_coherent(dev, DMA_BIT_MASK(32));
|
||||
if (err)
|
||||
pr_err("Failed to set SoC DMA mask: %d\n", err);
|
||||
|
||||
return dev;
|
||||
}
|
||||
#endif
|
||||
|
||||
/*
|
||||
* This finishes bus initialization doing things that were not possible without
|
||||
* kmalloc. Make sure to call it late enough (after mm_init).
|
||||
|
@ -183,6 +208,10 @@ void __init bcm47xx_bus_setup(void)
|
|||
if (bcm47xx_bus_type == BCM47XX_BUS_TYPE_BCMA) {
|
||||
int err;
|
||||
|
||||
bcm47xx_bus.bcma.dev = bcm47xx_setup_device();
|
||||
if (!bcm47xx_bus.bcma.dev)
|
||||
panic("Failed to setup SoC device\n");
|
||||
|
||||
err = bcma_host_soc_init(&bcm47xx_bus.bcma);
|
||||
if (err)
|
||||
panic("Failed to initialize BCMA bus (err %d)", err);
|
||||
|
@ -235,6 +264,8 @@ static int __init bcm47xx_register_bus_complete(void)
|
|||
#endif
|
||||
#ifdef CONFIG_BCM47XX_BCMA
|
||||
case BCM47XX_BUS_TYPE_BCMA:
|
||||
if (device_register(bcm47xx_bus.bcma.dev))
|
||||
pr_err("Failed to register SoC device\n");
|
||||
bcma_bus_register(&bcm47xx_bus.bcma.bus);
|
||||
break;
|
||||
#endif
|
||||
|
|
|
@ -98,7 +98,7 @@ static void octeon_kexec_smp_down(void *ignored)
|
|||
" sync \n"
|
||||
" synci ($0) \n");
|
||||
|
||||
relocated_kexec_smp_wait(NULL);
|
||||
kexec_reboot();
|
||||
}
|
||||
#endif
|
||||
|
||||
|
|
|
@ -66,6 +66,7 @@ CONFIG_SERIAL_8250_CONSOLE=y
|
|||
# CONFIG_SERIAL_8250_PCI is not set
|
||||
CONFIG_SERIAL_8250_NR_UARTS=1
|
||||
CONFIG_SERIAL_8250_RUNTIME_UARTS=1
|
||||
CONFIG_SERIAL_OF_PLATFORM=y
|
||||
CONFIG_SERIAL_AR933X=y
|
||||
CONFIG_SERIAL_AR933X_CONSOLE=y
|
||||
# CONFIG_HW_RANDOM is not set
|
||||
|
|
|
@ -18,8 +18,6 @@
|
|||
#define INT_NUM_EXTRA_START (INT_NUM_IM4_IRL0 + 32)
|
||||
#define INT_NUM_IM_OFFSET (INT_NUM_IM1_IRL0 - INT_NUM_IM0_IRL0)
|
||||
|
||||
#define MIPS_CPU_TIMER_IRQ 7
|
||||
|
||||
#define MAX_IM 5
|
||||
|
||||
#endif /* _FALCON_IRQ__ */
|
||||
|
|
|
@ -19,8 +19,6 @@
|
|||
|
||||
#define LTQ_DMA_CH0_INT (INT_NUM_IM2_IRL0)
|
||||
|
||||
#define MIPS_CPU_TIMER_IRQ 7
|
||||
|
||||
#define MAX_IM 5
|
||||
|
||||
#endif
|
||||
|
|
|
@ -74,14 +74,15 @@ static int __init vdma_init(void)
|
|||
get_order(VDMA_PGTBL_SIZE));
|
||||
BUG_ON(!pgtbl);
|
||||
dma_cache_wback_inv((unsigned long)pgtbl, VDMA_PGTBL_SIZE);
|
||||
pgtbl = (VDMA_PGTBL_ENTRY *)KSEG1ADDR(pgtbl);
|
||||
pgtbl = (VDMA_PGTBL_ENTRY *)CKSEG1ADDR((unsigned long)pgtbl);
|
||||
|
||||
/*
|
||||
* Clear the R4030 translation table
|
||||
*/
|
||||
vdma_pgtbl_init();
|
||||
|
||||
r4030_write_reg32(JAZZ_R4030_TRSTBL_BASE, CPHYSADDR(pgtbl));
|
||||
r4030_write_reg32(JAZZ_R4030_TRSTBL_BASE,
|
||||
CPHYSADDR((unsigned long)pgtbl));
|
||||
r4030_write_reg32(JAZZ_R4030_TRSTBL_LIM, VDMA_PGTBL_SIZE);
|
||||
r4030_write_reg32(JAZZ_R4030_TRSTBL_INV, 0);
|
||||
|
||||
|
|
|
@ -224,9 +224,11 @@ static struct irq_chip ltq_eiu_type = {
|
|||
.irq_set_type = ltq_eiu_settype,
|
||||
};
|
||||
|
||||
static void ltq_hw_irqdispatch(int module)
|
||||
static void ltq_hw_irq_handler(struct irq_desc *desc)
|
||||
{
|
||||
int module = irq_desc_get_irq(desc) - 2;
|
||||
u32 irq;
|
||||
int hwirq;
|
||||
|
||||
irq = ltq_icu_r32(module, LTQ_ICU_IM0_IOSR);
|
||||
if (irq == 0)
|
||||
|
@ -237,7 +239,8 @@ static void ltq_hw_irqdispatch(int module)
|
|||
* other bits might be bogus
|
||||
*/
|
||||
irq = __fls(irq);
|
||||
do_IRQ((int)irq + MIPS_CPU_IRQ_CASCADE + (INT_NUM_IM_OFFSET * module));
|
||||
hwirq = irq + MIPS_CPU_IRQ_CASCADE + (INT_NUM_IM_OFFSET * module);
|
||||
generic_handle_irq(irq_linear_revmap(ltq_domain, hwirq));
|
||||
|
||||
/* if this is a EBU irq, we need to ack it or get a deadlock */
|
||||
if ((irq == LTQ_ICU_EBU_IRQ) && (module == 0) && LTQ_EBU_PCC_ISTAT)
|
||||
|
@ -245,49 +248,6 @@ static void ltq_hw_irqdispatch(int module)
|
|||
LTQ_EBU_PCC_ISTAT);
|
||||
}
|
||||
|
||||
#define DEFINE_HWx_IRQDISPATCH(x) \
|
||||
static void ltq_hw ## x ## _irqdispatch(void) \
|
||||
{ \
|
||||
ltq_hw_irqdispatch(x); \
|
||||
}
|
||||
DEFINE_HWx_IRQDISPATCH(0)
|
||||
DEFINE_HWx_IRQDISPATCH(1)
|
||||
DEFINE_HWx_IRQDISPATCH(2)
|
||||
DEFINE_HWx_IRQDISPATCH(3)
|
||||
DEFINE_HWx_IRQDISPATCH(4)
|
||||
|
||||
#if MIPS_CPU_TIMER_IRQ == 7
|
||||
static void ltq_hw5_irqdispatch(void)
|
||||
{
|
||||
do_IRQ(MIPS_CPU_TIMER_IRQ);
|
||||
}
|
||||
#else
|
||||
DEFINE_HWx_IRQDISPATCH(5)
|
||||
#endif
|
||||
|
||||
static void ltq_hw_irq_handler(struct irq_desc *desc)
|
||||
{
|
||||
ltq_hw_irqdispatch(irq_desc_get_irq(desc) - 2);
|
||||
}
|
||||
|
||||
asmlinkage void plat_irq_dispatch(void)
|
||||
{
|
||||
unsigned int pending = read_c0_status() & read_c0_cause() & ST0_IM;
|
||||
int irq;
|
||||
|
||||
if (!pending) {
|
||||
spurious_interrupt();
|
||||
return;
|
||||
}
|
||||
|
||||
pending >>= CAUSEB_IP;
|
||||
while (pending) {
|
||||
irq = fls(pending) - 1;
|
||||
do_IRQ(MIPS_CPU_IRQ_BASE + irq);
|
||||
pending &= ~BIT(irq);
|
||||
}
|
||||
}
|
||||
|
||||
static int icu_map(struct irq_domain *d, unsigned int irq, irq_hw_number_t hw)
|
||||
{
|
||||
struct irq_chip *chip = <q_irq_type;
|
||||
|
@ -343,38 +303,13 @@ int __init icu_of_init(struct device_node *node, struct device_node *parent)
|
|||
for (i = 0; i < MAX_IM; i++)
|
||||
irq_set_chained_handler(i + 2, ltq_hw_irq_handler);
|
||||
|
||||
if (cpu_has_vint) {
|
||||
pr_info("Setting up vectored interrupts\n");
|
||||
set_vi_handler(2, ltq_hw0_irqdispatch);
|
||||
set_vi_handler(3, ltq_hw1_irqdispatch);
|
||||
set_vi_handler(4, ltq_hw2_irqdispatch);
|
||||
set_vi_handler(5, ltq_hw3_irqdispatch);
|
||||
set_vi_handler(6, ltq_hw4_irqdispatch);
|
||||
set_vi_handler(7, ltq_hw5_irqdispatch);
|
||||
}
|
||||
|
||||
ltq_domain = irq_domain_add_linear(node,
|
||||
(MAX_IM * INT_NUM_IM_OFFSET) + MIPS_CPU_IRQ_CASCADE,
|
||||
&irq_domain_ops, 0);
|
||||
|
||||
#ifndef CONFIG_MIPS_MT_SMP
|
||||
set_c0_status(IE_IRQ0 | IE_IRQ1 | IE_IRQ2 |
|
||||
IE_IRQ3 | IE_IRQ4 | IE_IRQ5);
|
||||
#else
|
||||
set_c0_status(IE_SW0 | IE_SW1 | IE_IRQ0 | IE_IRQ1 |
|
||||
IE_IRQ2 | IE_IRQ3 | IE_IRQ4 | IE_IRQ5);
|
||||
#endif
|
||||
|
||||
/* tell oprofile which irq to use */
|
||||
ltq_perfcount_irq = irq_create_mapping(ltq_domain, LTQ_PERF_IRQ);
|
||||
|
||||
/*
|
||||
* if the timer irq is not one of the mips irqs we need to
|
||||
* create a mapping
|
||||
*/
|
||||
if (MIPS_CPU_TIMER_IRQ != 7)
|
||||
irq_create_mapping(ltq_domain, MIPS_CPU_TIMER_IRQ);
|
||||
|
||||
/* the external interrupts are optional and xway only */
|
||||
eiu_node = of_find_compatible_node(NULL, NULL, "lantiq,eiu-xway");
|
||||
if (eiu_node && !of_address_to_resource(eiu_node, 0, &res)) {
|
||||
|
@ -411,7 +346,7 @@ EXPORT_SYMBOL_GPL(get_c0_perfcount_int);
|
|||
|
||||
unsigned int get_c0_compare_int(void)
|
||||
{
|
||||
return MIPS_CPU_TIMER_IRQ;
|
||||
return CP0_LEGACY_COMPARE_IRQ;
|
||||
}
|
||||
|
||||
static struct of_device_id __initdata of_irq_ids[] = {
|
||||
|
|
|
@ -369,7 +369,9 @@ int __init octeon_msi_initialize(void)
|
|||
int irq;
|
||||
struct irq_chip *msi;
|
||||
|
||||
if (octeon_dma_bar_type == OCTEON_DMA_BAR_TYPE_PCIE) {
|
||||
if (octeon_dma_bar_type == OCTEON_DMA_BAR_TYPE_INVALID) {
|
||||
return 0;
|
||||
} else if (octeon_dma_bar_type == OCTEON_DMA_BAR_TYPE_PCIE) {
|
||||
msi_rcv_reg[0] = CVMX_PEXP_NPEI_MSI_RCV0;
|
||||
msi_rcv_reg[1] = CVMX_PEXP_NPEI_MSI_RCV1;
|
||||
msi_rcv_reg[2] = CVMX_PEXP_NPEI_MSI_RCV2;
|
||||
|
|
|
@ -3,9 +3,6 @@ OBJCOPYFLAGS := -O binary -R .note -R .note.gnu.build-id -R .comment -S
|
|||
|
||||
KBUILD_DEFCONFIG := defconfig
|
||||
|
||||
comma = ,
|
||||
|
||||
|
||||
ifdef CONFIG_FUNCTION_TRACER
|
||||
arch-y += -malways-save-lp -mno-relax
|
||||
endif
|
||||
|
@ -54,8 +51,6 @@ endif
|
|||
boot := arch/nds32/boot
|
||||
core-y += $(boot)/dts/
|
||||
|
||||
.PHONY: FORCE
|
||||
|
||||
Image: vmlinux
|
||||
$(Q)$(MAKE) $(build)=$(boot) $(boot)/$@
|
||||
|
||||
|
@ -68,9 +63,6 @@ prepare: vdso_prepare
|
|||
vdso_prepare: prepare0
|
||||
$(Q)$(MAKE) $(build)=arch/nds32/kernel/vdso include/generated/vdso-offsets.h
|
||||
|
||||
CLEAN_FILES += include/asm-nds32/constants.h*
|
||||
|
||||
# We use MRPROPER_FILES and CLEAN_FILES now
|
||||
archclean:
|
||||
$(Q)$(MAKE) $(clean)=$(boot)
|
||||
|
||||
|
|
|
@ -20,7 +20,6 @@
|
|||
KBUILD_DEFCONFIG := or1ksim_defconfig
|
||||
|
||||
OBJCOPYFLAGS := -O binary -R .note -R .comment -S
|
||||
LDFLAGS_vmlinux :=
|
||||
LIBGCC := $(shell $(CC) $(KBUILD_CFLAGS) -print-libgcc-file-name)
|
||||
|
||||
KBUILD_CFLAGS += -pipe -ffixed-r10 -D__linux__
|
||||
|
@ -50,5 +49,3 @@ else
|
|||
BUILTIN_DTB := n
|
||||
endif
|
||||
core-$(BUILTIN_DTB) += arch/openrisc/boot/dts/
|
||||
|
||||
all: vmlinux
|
||||
|
|
|
@ -337,6 +337,7 @@
|
|||
#define PPC_INST_DIVWU 0x7c000396
|
||||
#define PPC_INST_DIVD 0x7c0003d2
|
||||
#define PPC_INST_RLWINM 0x54000000
|
||||
#define PPC_INST_RLWINM_DOT 0x54000001
|
||||
#define PPC_INST_RLWIMI 0x50000000
|
||||
#define PPC_INST_RLDICL 0x78000000
|
||||
#define PPC_INST_RLDICR 0x78000004
|
||||
|
|
|
@ -47,6 +47,7 @@ enum perf_event_powerpc_regs {
|
|||
PERF_REG_POWERPC_DAR,
|
||||
PERF_REG_POWERPC_DSISR,
|
||||
PERF_REG_POWERPC_SIER,
|
||||
PERF_REG_POWERPC_MMCRA,
|
||||
PERF_REG_POWERPC_MAX,
|
||||
};
|
||||
#endif /* _UAPI_ASM_POWERPC_PERF_REGS_H */
|
||||
|
|
|
@ -852,11 +852,12 @@ start_here:
|
|||
|
||||
/* set up the PTE pointers for the Abatron bdiGDB.
|
||||
*/
|
||||
tovirt(r6,r6)
|
||||
lis r5, abatron_pteptrs@h
|
||||
ori r5, r5, abatron_pteptrs@l
|
||||
stw r5, 0xf0(0) /* Must match your Abatron config file */
|
||||
tophys(r5,r5)
|
||||
lis r6, swapper_pg_dir@h
|
||||
ori r6, r6, swapper_pg_dir@l
|
||||
stw r6, 0(r5)
|
||||
|
||||
/* Now turn on the MMU for real! */
|
||||
|
|
|
@ -755,11 +755,12 @@ SYSCALL_DEFINE0(rt_sigreturn)
|
|||
if (restore_tm_sigcontexts(current, &uc->uc_mcontext,
|
||||
&uc_transact->uc_mcontext))
|
||||
goto badframe;
|
||||
}
|
||||
} else
|
||||
#endif
|
||||
/* Fall through, for non-TM restore */
|
||||
if (!MSR_TM_ACTIVE(msr)) {
|
||||
{
|
||||
/*
|
||||
* Fall through, for non-TM restore
|
||||
*
|
||||
* Unset MSR[TS] on the thread regs since MSR from user
|
||||
* context does not have MSR active, and recheckpoint was
|
||||
* not called since restore_tm_sigcontexts() was not called
|
||||
|
|
|
@ -967,13 +967,6 @@ out:
|
|||
}
|
||||
#endif /* CONFIG_FUNCTION_GRAPH_TRACER */
|
||||
|
||||
#if defined(CONFIG_FTRACE_SYSCALLS) && defined(CONFIG_PPC64)
|
||||
unsigned long __init arch_syscall_addr(int nr)
|
||||
{
|
||||
return sys_call_table[nr*2];
|
||||
}
|
||||
#endif /* CONFIG_FTRACE_SYSCALLS && CONFIG_PPC64 */
|
||||
|
||||
#ifdef PPC64_ELF_ABI_v1
|
||||
char *arch_ftrace_match_adjust(char *str, const char *search)
|
||||
{
|
||||
|
|
|
@ -165,6 +165,10 @@
|
|||
#define PPC_RLWINM(d, a, i, mb, me) EMIT(PPC_INST_RLWINM | ___PPC_RA(d) | \
|
||||
___PPC_RS(a) | __PPC_SH(i) | \
|
||||
__PPC_MB(mb) | __PPC_ME(me))
|
||||
#define PPC_RLWINM_DOT(d, a, i, mb, me) EMIT(PPC_INST_RLWINM_DOT | \
|
||||
___PPC_RA(d) | ___PPC_RS(a) | \
|
||||
__PPC_SH(i) | __PPC_MB(mb) | \
|
||||
__PPC_ME(me))
|
||||
#define PPC_RLWIMI(d, a, i, mb, me) EMIT(PPC_INST_RLWIMI | ___PPC_RA(d) | \
|
||||
___PPC_RS(a) | __PPC_SH(i) | \
|
||||
__PPC_MB(mb) | __PPC_ME(me))
|
||||
|
|
|
@ -768,36 +768,58 @@ emit_clear:
|
|||
case BPF_JMP | BPF_JGT | BPF_X:
|
||||
case BPF_JMP | BPF_JSGT | BPF_K:
|
||||
case BPF_JMP | BPF_JSGT | BPF_X:
|
||||
case BPF_JMP32 | BPF_JGT | BPF_K:
|
||||
case BPF_JMP32 | BPF_JGT | BPF_X:
|
||||
case BPF_JMP32 | BPF_JSGT | BPF_K:
|
||||
case BPF_JMP32 | BPF_JSGT | BPF_X:
|
||||
true_cond = COND_GT;
|
||||
goto cond_branch;
|
||||
case BPF_JMP | BPF_JLT | BPF_K:
|
||||
case BPF_JMP | BPF_JLT | BPF_X:
|
||||
case BPF_JMP | BPF_JSLT | BPF_K:
|
||||
case BPF_JMP | BPF_JSLT | BPF_X:
|
||||
case BPF_JMP32 | BPF_JLT | BPF_K:
|
||||
case BPF_JMP32 | BPF_JLT | BPF_X:
|
||||
case BPF_JMP32 | BPF_JSLT | BPF_K:
|
||||
case BPF_JMP32 | BPF_JSLT | BPF_X:
|
||||
true_cond = COND_LT;
|
||||
goto cond_branch;
|
||||
case BPF_JMP | BPF_JGE | BPF_K:
|
||||
case BPF_JMP | BPF_JGE | BPF_X:
|
||||
case BPF_JMP | BPF_JSGE | BPF_K:
|
||||
case BPF_JMP | BPF_JSGE | BPF_X:
|
||||
case BPF_JMP32 | BPF_JGE | BPF_K:
|
||||
case BPF_JMP32 | BPF_JGE | BPF_X:
|
||||
case BPF_JMP32 | BPF_JSGE | BPF_K:
|
||||
case BPF_JMP32 | BPF_JSGE | BPF_X:
|
||||
true_cond = COND_GE;
|
||||
goto cond_branch;
|
||||
case BPF_JMP | BPF_JLE | BPF_K:
|
||||
case BPF_JMP | BPF_JLE | BPF_X:
|
||||
case BPF_JMP | BPF_JSLE | BPF_K:
|
||||
case BPF_JMP | BPF_JSLE | BPF_X:
|
||||
case BPF_JMP32 | BPF_JLE | BPF_K:
|
||||
case BPF_JMP32 | BPF_JLE | BPF_X:
|
||||
case BPF_JMP32 | BPF_JSLE | BPF_K:
|
||||
case BPF_JMP32 | BPF_JSLE | BPF_X:
|
||||
true_cond = COND_LE;
|
||||
goto cond_branch;
|
||||
case BPF_JMP | BPF_JEQ | BPF_K:
|
||||
case BPF_JMP | BPF_JEQ | BPF_X:
|
||||
case BPF_JMP32 | BPF_JEQ | BPF_K:
|
||||
case BPF_JMP32 | BPF_JEQ | BPF_X:
|
||||
true_cond = COND_EQ;
|
||||
goto cond_branch;
|
||||
case BPF_JMP | BPF_JNE | BPF_K:
|
||||
case BPF_JMP | BPF_JNE | BPF_X:
|
||||
case BPF_JMP32 | BPF_JNE | BPF_K:
|
||||
case BPF_JMP32 | BPF_JNE | BPF_X:
|
||||
true_cond = COND_NE;
|
||||
goto cond_branch;
|
||||
case BPF_JMP | BPF_JSET | BPF_K:
|
||||
case BPF_JMP | BPF_JSET | BPF_X:
|
||||
case BPF_JMP32 | BPF_JSET | BPF_K:
|
||||
case BPF_JMP32 | BPF_JSET | BPF_X:
|
||||
true_cond = COND_NE;
|
||||
/* Fall through */
|
||||
|
||||
|
@ -809,18 +831,44 @@ cond_branch:
|
|||
case BPF_JMP | BPF_JLE | BPF_X:
|
||||
case BPF_JMP | BPF_JEQ | BPF_X:
|
||||
case BPF_JMP | BPF_JNE | BPF_X:
|
||||
case BPF_JMP32 | BPF_JGT | BPF_X:
|
||||
case BPF_JMP32 | BPF_JLT | BPF_X:
|
||||
case BPF_JMP32 | BPF_JGE | BPF_X:
|
||||
case BPF_JMP32 | BPF_JLE | BPF_X:
|
||||
case BPF_JMP32 | BPF_JEQ | BPF_X:
|
||||
case BPF_JMP32 | BPF_JNE | BPF_X:
|
||||
/* unsigned comparison */
|
||||
PPC_CMPLD(dst_reg, src_reg);
|
||||
if (BPF_CLASS(code) == BPF_JMP32)
|
||||
PPC_CMPLW(dst_reg, src_reg);
|
||||
else
|
||||
PPC_CMPLD(dst_reg, src_reg);
|
||||
break;
|
||||
case BPF_JMP | BPF_JSGT | BPF_X:
|
||||
case BPF_JMP | BPF_JSLT | BPF_X:
|
||||
case BPF_JMP | BPF_JSGE | BPF_X:
|
||||
case BPF_JMP | BPF_JSLE | BPF_X:
|
||||
case BPF_JMP32 | BPF_JSGT | BPF_X:
|
||||
case BPF_JMP32 | BPF_JSLT | BPF_X:
|
||||
case BPF_JMP32 | BPF_JSGE | BPF_X:
|
||||
case BPF_JMP32 | BPF_JSLE | BPF_X:
|
||||
/* signed comparison */
|
||||
PPC_CMPD(dst_reg, src_reg);
|
||||
if (BPF_CLASS(code) == BPF_JMP32)
|
||||
PPC_CMPW(dst_reg, src_reg);
|
||||
else
|
||||
PPC_CMPD(dst_reg, src_reg);
|
||||
break;
|
||||
case BPF_JMP | BPF_JSET | BPF_X:
|
||||
PPC_AND_DOT(b2p[TMP_REG_1], dst_reg, src_reg);
|
||||
case BPF_JMP32 | BPF_JSET | BPF_X:
|
||||
if (BPF_CLASS(code) == BPF_JMP) {
|
||||
PPC_AND_DOT(b2p[TMP_REG_1], dst_reg,
|
||||
src_reg);
|
||||
} else {
|
||||
int tmp_reg = b2p[TMP_REG_1];
|
||||
|
||||
PPC_AND(tmp_reg, dst_reg, src_reg);
|
||||
PPC_RLWINM_DOT(tmp_reg, tmp_reg, 0, 0,
|
||||
31);
|
||||
}
|
||||
break;
|
||||
case BPF_JMP | BPF_JNE | BPF_K:
|
||||
case BPF_JMP | BPF_JEQ | BPF_K:
|
||||
|
@ -828,43 +876,87 @@ cond_branch:
|
|||
case BPF_JMP | BPF_JLT | BPF_K:
|
||||
case BPF_JMP | BPF_JGE | BPF_K:
|
||||
case BPF_JMP | BPF_JLE | BPF_K:
|
||||
case BPF_JMP32 | BPF_JNE | BPF_K:
|
||||
case BPF_JMP32 | BPF_JEQ | BPF_K:
|
||||
case BPF_JMP32 | BPF_JGT | BPF_K:
|
||||
case BPF_JMP32 | BPF_JLT | BPF_K:
|
||||
case BPF_JMP32 | BPF_JGE | BPF_K:
|
||||
case BPF_JMP32 | BPF_JLE | BPF_K:
|
||||
{
|
||||
bool is_jmp32 = BPF_CLASS(code) == BPF_JMP32;
|
||||
|
||||
/*
|
||||
* Need sign-extended load, so only positive
|
||||
* values can be used as imm in cmpldi
|
||||
*/
|
||||
if (imm >= 0 && imm < 32768)
|
||||
PPC_CMPLDI(dst_reg, imm);
|
||||
else {
|
||||
if (imm >= 0 && imm < 32768) {
|
||||
if (is_jmp32)
|
||||
PPC_CMPLWI(dst_reg, imm);
|
||||
else
|
||||
PPC_CMPLDI(dst_reg, imm);
|
||||
} else {
|
||||
/* sign-extending load */
|
||||
PPC_LI32(b2p[TMP_REG_1], imm);
|
||||
/* ... but unsigned comparison */
|
||||
PPC_CMPLD(dst_reg, b2p[TMP_REG_1]);
|
||||
if (is_jmp32)
|
||||
PPC_CMPLW(dst_reg,
|
||||
b2p[TMP_REG_1]);
|
||||
else
|
||||
PPC_CMPLD(dst_reg,
|
||||
b2p[TMP_REG_1]);
|
||||
}
|
||||
break;
|
||||
}
|
||||
case BPF_JMP | BPF_JSGT | BPF_K:
|
||||
case BPF_JMP | BPF_JSLT | BPF_K:
|
||||
case BPF_JMP | BPF_JSGE | BPF_K:
|
||||
case BPF_JMP | BPF_JSLE | BPF_K:
|
||||
case BPF_JMP32 | BPF_JSGT | BPF_K:
|
||||
case BPF_JMP32 | BPF_JSLT | BPF_K:
|
||||
case BPF_JMP32 | BPF_JSGE | BPF_K:
|
||||
case BPF_JMP32 | BPF_JSLE | BPF_K:
|
||||
{
|
||||
bool is_jmp32 = BPF_CLASS(code) == BPF_JMP32;
|
||||
|
||||
/*
|
||||
* signed comparison, so any 16-bit value
|
||||
* can be used in cmpdi
|
||||
*/
|
||||
if (imm >= -32768 && imm < 32768)
|
||||
PPC_CMPDI(dst_reg, imm);
|
||||
else {
|
||||
if (imm >= -32768 && imm < 32768) {
|
||||
if (is_jmp32)
|
||||
PPC_CMPWI(dst_reg, imm);
|
||||
else
|
||||
PPC_CMPDI(dst_reg, imm);
|
||||
} else {
|
||||
PPC_LI32(b2p[TMP_REG_1], imm);
|
||||
PPC_CMPD(dst_reg, b2p[TMP_REG_1]);
|
||||
if (is_jmp32)
|
||||
PPC_CMPW(dst_reg,
|
||||
b2p[TMP_REG_1]);
|
||||
else
|
||||
PPC_CMPD(dst_reg,
|
||||
b2p[TMP_REG_1]);
|
||||
}
|
||||
break;
|
||||
}
|
||||
case BPF_JMP | BPF_JSET | BPF_K:
|
||||
case BPF_JMP32 | BPF_JSET | BPF_K:
|
||||
/* andi does not sign-extend the immediate */
|
||||
if (imm >= 0 && imm < 32768)
|
||||
/* PPC_ANDI is _only/always_ dot-form */
|
||||
PPC_ANDI(b2p[TMP_REG_1], dst_reg, imm);
|
||||
else {
|
||||
PPC_LI32(b2p[TMP_REG_1], imm);
|
||||
PPC_AND_DOT(b2p[TMP_REG_1], dst_reg,
|
||||
b2p[TMP_REG_1]);
|
||||
int tmp_reg = b2p[TMP_REG_1];
|
||||
|
||||
PPC_LI32(tmp_reg, imm);
|
||||
if (BPF_CLASS(code) == BPF_JMP) {
|
||||
PPC_AND_DOT(tmp_reg, dst_reg,
|
||||
tmp_reg);
|
||||
} else {
|
||||
PPC_AND(tmp_reg, dst_reg,
|
||||
tmp_reg);
|
||||
PPC_RLWINM_DOT(tmp_reg, tmp_reg,
|
||||
0, 0, 31);
|
||||
}
|
||||
}
|
||||
break;
|
||||
}
|
||||
|
|
|
@ -70,6 +70,7 @@ static unsigned int pt_regs_offset[PERF_REG_POWERPC_MAX] = {
|
|||
PT_REGS_OFFSET(PERF_REG_POWERPC_DAR, dar),
|
||||
PT_REGS_OFFSET(PERF_REG_POWERPC_DSISR, dsisr),
|
||||
PT_REGS_OFFSET(PERF_REG_POWERPC_SIER, dar),
|
||||
PT_REGS_OFFSET(PERF_REG_POWERPC_MMCRA, dsisr),
|
||||
};
|
||||
|
||||
u64 perf_reg_value(struct pt_regs *regs, int idx)
|
||||
|
@ -83,6 +84,11 @@ u64 perf_reg_value(struct pt_regs *regs, int idx)
|
|||
!is_sier_available()))
|
||||
return 0;
|
||||
|
||||
if (idx == PERF_REG_POWERPC_MMCRA &&
|
||||
(IS_ENABLED(CONFIG_FSL_EMB_PERF_EVENT) ||
|
||||
IS_ENABLED(CONFIG_PPC32)))
|
||||
return 0;
|
||||
|
||||
return regs_get_register(regs, pt_regs_offset[idx]);
|
||||
}
|
||||
|
||||
|
|
|
@ -237,12 +237,12 @@ static int ocm_debugfs_show(struct seq_file *m, void *v)
|
|||
continue;
|
||||
|
||||
seq_printf(m, "PPC4XX OCM : %d\n", ocm->index);
|
||||
seq_printf(m, "PhysAddr : %pa[p]\n", &(ocm->phys));
|
||||
seq_printf(m, "PhysAddr : %pa\n", &(ocm->phys));
|
||||
seq_printf(m, "MemTotal : %d Bytes\n", ocm->memtotal);
|
||||
seq_printf(m, "MemTotal(NC) : %d Bytes\n", ocm->nc.memtotal);
|
||||
seq_printf(m, "MemTotal(C) : %d Bytes\n\n", ocm->c.memtotal);
|
||||
|
||||
seq_printf(m, "NC.PhysAddr : %pa[p]\n", &(ocm->nc.phys));
|
||||
seq_printf(m, "NC.PhysAddr : %pa\n", &(ocm->nc.phys));
|
||||
seq_printf(m, "NC.VirtAddr : 0x%p\n", ocm->nc.virt);
|
||||
seq_printf(m, "NC.MemTotal : %d Bytes\n", ocm->nc.memtotal);
|
||||
seq_printf(m, "NC.MemFree : %d Bytes\n", ocm->nc.memfree);
|
||||
|
@ -252,7 +252,7 @@ static int ocm_debugfs_show(struct seq_file *m, void *v)
|
|||
blk->size, blk->owner);
|
||||
}
|
||||
|
||||
seq_printf(m, "\nC.PhysAddr : %pa[p]\n", &(ocm->c.phys));
|
||||
seq_printf(m, "\nC.PhysAddr : %pa\n", &(ocm->c.phys));
|
||||
seq_printf(m, "C.VirtAddr : 0x%p\n", ocm->c.virt);
|
||||
seq_printf(m, "C.MemTotal : %d Bytes\n", ocm->c.memtotal);
|
||||
seq_printf(m, "C.MemFree : %d Bytes\n", ocm->c.memfree);
|
||||
|
|
|
@ -538,8 +538,7 @@ static void __init chrp_init_IRQ(void)
|
|||
/* see if there is a keyboard in the device tree
|
||||
with a parent of type "adb" */
|
||||
for_each_node_by_name(kbd, "keyboard")
|
||||
if (kbd->parent && kbd->parent->type
|
||||
&& strcmp(kbd->parent->type, "adb") == 0)
|
||||
if (of_node_is_type(kbd->parent, "adb"))
|
||||
break;
|
||||
of_node_put(kbd);
|
||||
if (kbd)
|
||||
|
|
|
@ -564,7 +564,7 @@ struct iommu_table_group *pnv_try_setup_npu_table_group(struct pnv_ioda_pe *pe)
|
|||
}
|
||||
} else {
|
||||
/* Create a group for 1 GPU and attached NPUs for POWER8 */
|
||||
pe->npucomp = kzalloc(sizeof(pe->npucomp), GFP_KERNEL);
|
||||
pe->npucomp = kzalloc(sizeof(*pe->npucomp), GFP_KERNEL);
|
||||
table_group = &pe->npucomp->table_group;
|
||||
table_group->ops = &pnv_npu_peers_ops;
|
||||
iommu_register_group(table_group, hose->global_number,
|
||||
|
|
|
@ -2681,7 +2681,8 @@ static void pnv_pci_ioda_setup_iommu_api(void)
|
|||
list_for_each_entry(hose, &hose_list, list_node) {
|
||||
phb = hose->private_data;
|
||||
|
||||
if (phb->type == PNV_PHB_NPU_NVLINK)
|
||||
if (phb->type == PNV_PHB_NPU_NVLINK ||
|
||||
phb->type == PNV_PHB_NPU_OCAPI)
|
||||
continue;
|
||||
|
||||
list_for_each_entry(pe, &phb->ioda.pe_list, list) {
|
||||
|
|
|
@ -264,7 +264,9 @@ void __init pSeries_final_fixup(void)
|
|||
if (!of_device_is_compatible(nvdn->parent,
|
||||
"ibm,power9-npu"))
|
||||
continue;
|
||||
#ifdef CONFIG_PPC_POWERNV
|
||||
WARN_ON_ONCE(pnv_npu2_init(hose));
|
||||
#endif
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
|
|
@ -25,7 +25,7 @@ static inline int init_new_context(struct task_struct *tsk,
|
|||
atomic_set(&mm->context.flush_count, 0);
|
||||
mm->context.gmap_asce = 0;
|
||||
mm->context.flush_mm = 0;
|
||||
mm->context.compat_mm = 0;
|
||||
mm->context.compat_mm = test_thread_flag(TIF_31BIT);
|
||||
#ifdef CONFIG_PGSTE
|
||||
mm->context.alloc_pgste = page_table_allocate_pgste ||
|
||||
test_thread_flag(TIF_PGSTE) ||
|
||||
|
@ -90,8 +90,6 @@ static inline void switch_mm(struct mm_struct *prev, struct mm_struct *next,
|
|||
{
|
||||
int cpu = smp_processor_id();
|
||||
|
||||
if (prev == next)
|
||||
return;
|
||||
S390_lowcore.user_asce = next->context.asce;
|
||||
cpumask_set_cpu(cpu, &next->context.cpu_attach_mask);
|
||||
/* Clear previous user-ASCE from CR1 and CR7 */
|
||||
|
@ -103,7 +101,8 @@ static inline void switch_mm(struct mm_struct *prev, struct mm_struct *next,
|
|||
__ctl_load(S390_lowcore.vdso_asce, 7, 7);
|
||||
clear_cpu_flag(CIF_ASCE_SECONDARY);
|
||||
}
|
||||
cpumask_clear_cpu(cpu, &prev->context.cpu_attach_mask);
|
||||
if (prev != next)
|
||||
cpumask_clear_cpu(cpu, &prev->context.cpu_attach_mask);
|
||||
}
|
||||
|
||||
#define finish_arch_post_lock_switch finish_arch_post_lock_switch
|
||||
|
|
|
@ -63,10 +63,10 @@ static noinline __init void detect_machine_type(void)
|
|||
if (stsi(vmms, 3, 2, 2) || !vmms->count)
|
||||
return;
|
||||
|
||||
/* Running under KVM? If not we assume z/VM */
|
||||
/* Detect known hypervisors */
|
||||
if (!memcmp(vmms->vm[0].cpi, "\xd2\xe5\xd4", 3))
|
||||
S390_lowcore.machine_flags |= MACHINE_FLAG_KVM;
|
||||
else
|
||||
else if (!memcmp(vmms->vm[0].cpi, "\xa9\x61\xe5\xd4", 4))
|
||||
S390_lowcore.machine_flags |= MACHINE_FLAG_VM;
|
||||
}
|
||||
|
||||
|
|
|
@ -1006,6 +1006,8 @@ void __init setup_arch(char **cmdline_p)
|
|||
pr_info("Linux is running under KVM in 64-bit mode\n");
|
||||
else if (MACHINE_IS_LPAR)
|
||||
pr_info("Linux is running natively in 64-bit mode\n");
|
||||
else
|
||||
pr_info("Linux is running as a guest in 64-bit mode\n");
|
||||
|
||||
/* Have one command line that is parsed and saved in /proc/cmdline */
|
||||
/* boot_command_line has been already set up in early.c */
|
||||
|
|
|
@ -381,8 +381,13 @@ void smp_call_online_cpu(void (*func)(void *), void *data)
|
|||
*/
|
||||
void smp_call_ipl_cpu(void (*func)(void *), void *data)
|
||||
{
|
||||
struct lowcore *lc = pcpu_devices->lowcore;
|
||||
|
||||
if (pcpu_devices[0].address == stap())
|
||||
lc = &S390_lowcore;
|
||||
|
||||
pcpu_delegate(&pcpu_devices[0], func, data,
|
||||
pcpu_devices->lowcore->nodat_stack);
|
||||
lc->nodat_stack);
|
||||
}
|
||||
|
||||
int smp_find_processor_id(u16 address)
|
||||
|
@ -1166,7 +1171,11 @@ static ssize_t __ref rescan_store(struct device *dev,
|
|||
{
|
||||
int rc;
|
||||
|
||||
rc = lock_device_hotplug_sysfs();
|
||||
if (rc)
|
||||
return rc;
|
||||
rc = smp_rescan_cpus();
|
||||
unlock_device_hotplug();
|
||||
return rc ? rc : count;
|
||||
}
|
||||
static DEVICE_ATTR_WO(rescan);
|
||||
|
|
|
@ -224,10 +224,9 @@ int arch_setup_additional_pages(struct linux_binprm *bprm, int uses_interp)
|
|||
|
||||
vdso_pages = vdso64_pages;
|
||||
#ifdef CONFIG_COMPAT
|
||||
if (is_compat_task()) {
|
||||
mm->context.compat_mm = is_compat_task();
|
||||
if (mm->context.compat_mm)
|
||||
vdso_pages = vdso32_pages;
|
||||
mm->context.compat_mm = 1;
|
||||
}
|
||||
#endif
|
||||
/*
|
||||
* vDSO has a problem and was disabled, just don't "enable" it for
|
||||
|
|
|
@ -1110,103 +1110,141 @@ static noinline int bpf_jit_insn(struct bpf_jit *jit, struct bpf_prog *fp, int i
|
|||
mask = 0xf000; /* j */
|
||||
goto branch_oc;
|
||||
case BPF_JMP | BPF_JSGT | BPF_K: /* ((s64) dst > (s64) imm) */
|
||||
case BPF_JMP32 | BPF_JSGT | BPF_K: /* ((s32) dst > (s32) imm) */
|
||||
mask = 0x2000; /* jh */
|
||||
goto branch_ks;
|
||||
case BPF_JMP | BPF_JSLT | BPF_K: /* ((s64) dst < (s64) imm) */
|
||||
case BPF_JMP32 | BPF_JSLT | BPF_K: /* ((s32) dst < (s32) imm) */
|
||||
mask = 0x4000; /* jl */
|
||||
goto branch_ks;
|
||||
case BPF_JMP | BPF_JSGE | BPF_K: /* ((s64) dst >= (s64) imm) */
|
||||
case BPF_JMP32 | BPF_JSGE | BPF_K: /* ((s32) dst >= (s32) imm) */
|
||||
mask = 0xa000; /* jhe */
|
||||
goto branch_ks;
|
||||
case BPF_JMP | BPF_JSLE | BPF_K: /* ((s64) dst <= (s64) imm) */
|
||||
case BPF_JMP32 | BPF_JSLE | BPF_K: /* ((s32) dst <= (s32) imm) */
|
||||
mask = 0xc000; /* jle */
|
||||
goto branch_ks;
|
||||
case BPF_JMP | BPF_JGT | BPF_K: /* (dst_reg > imm) */
|
||||
case BPF_JMP32 | BPF_JGT | BPF_K: /* ((u32) dst_reg > (u32) imm) */
|
||||
mask = 0x2000; /* jh */
|
||||
goto branch_ku;
|
||||
case BPF_JMP | BPF_JLT | BPF_K: /* (dst_reg < imm) */
|
||||
case BPF_JMP32 | BPF_JLT | BPF_K: /* ((u32) dst_reg < (u32) imm) */
|
||||
mask = 0x4000; /* jl */
|
||||
goto branch_ku;
|
||||
case BPF_JMP | BPF_JGE | BPF_K: /* (dst_reg >= imm) */
|
||||
case BPF_JMP32 | BPF_JGE | BPF_K: /* ((u32) dst_reg >= (u32) imm) */
|
||||
mask = 0xa000; /* jhe */
|
||||
goto branch_ku;
|
||||
case BPF_JMP | BPF_JLE | BPF_K: /* (dst_reg <= imm) */
|
||||
case BPF_JMP32 | BPF_JLE | BPF_K: /* ((u32) dst_reg <= (u32) imm) */
|
||||
mask = 0xc000; /* jle */
|
||||
goto branch_ku;
|
||||
case BPF_JMP | BPF_JNE | BPF_K: /* (dst_reg != imm) */
|
||||
case BPF_JMP32 | BPF_JNE | BPF_K: /* ((u32) dst_reg != (u32) imm) */
|
||||
mask = 0x7000; /* jne */
|
||||
goto branch_ku;
|
||||
case BPF_JMP | BPF_JEQ | BPF_K: /* (dst_reg == imm) */
|
||||
case BPF_JMP32 | BPF_JEQ | BPF_K: /* ((u32) dst_reg == (u32) imm) */
|
||||
mask = 0x8000; /* je */
|
||||
goto branch_ku;
|
||||
case BPF_JMP | BPF_JSET | BPF_K: /* (dst_reg & imm) */
|
||||
case BPF_JMP32 | BPF_JSET | BPF_K: /* ((u32) dst_reg & (u32) imm) */
|
||||
mask = 0x7000; /* jnz */
|
||||
/* lgfi %w1,imm (load sign extend imm) */
|
||||
EMIT6_IMM(0xc0010000, REG_W1, imm);
|
||||
/* ngr %w1,%dst */
|
||||
EMIT4(0xb9800000, REG_W1, dst_reg);
|
||||
if (BPF_CLASS(insn->code) == BPF_JMP32) {
|
||||
/* llilf %w1,imm (load zero extend imm) */
|
||||
EMIT6_IMM(0xc0010000, REG_W1, imm);
|
||||
/* nr %w1,%dst */
|
||||
EMIT2(0x1400, REG_W1, dst_reg);
|
||||
} else {
|
||||
/* lgfi %w1,imm (load sign extend imm) */
|
||||
EMIT6_IMM(0xc0010000, REG_W1, imm);
|
||||
/* ngr %w1,%dst */
|
||||
EMIT4(0xb9800000, REG_W1, dst_reg);
|
||||
}
|
||||
goto branch_oc;
|
||||
|
||||
case BPF_JMP | BPF_JSGT | BPF_X: /* ((s64) dst > (s64) src) */
|
||||
case BPF_JMP32 | BPF_JSGT | BPF_X: /* ((s32) dst > (s32) src) */
|
||||
mask = 0x2000; /* jh */
|
||||
goto branch_xs;
|
||||
case BPF_JMP | BPF_JSLT | BPF_X: /* ((s64) dst < (s64) src) */
|
||||
case BPF_JMP32 | BPF_JSLT | BPF_X: /* ((s32) dst < (s32) src) */
|
||||
mask = 0x4000; /* jl */
|
||||
goto branch_xs;
|
||||
case BPF_JMP | BPF_JSGE | BPF_X: /* ((s64) dst >= (s64) src) */
|
||||
case BPF_JMP32 | BPF_JSGE | BPF_X: /* ((s32) dst >= (s32) src) */
|
||||
mask = 0xa000; /* jhe */
|
||||
goto branch_xs;
|
||||
case BPF_JMP | BPF_JSLE | BPF_X: /* ((s64) dst <= (s64) src) */
|
||||
case BPF_JMP32 | BPF_JSLE | BPF_X: /* ((s32) dst <= (s32) src) */
|
||||
mask = 0xc000; /* jle */
|
||||
goto branch_xs;
|
||||
case BPF_JMP | BPF_JGT | BPF_X: /* (dst > src) */
|
||||
case BPF_JMP32 | BPF_JGT | BPF_X: /* ((u32) dst > (u32) src) */
|
||||
mask = 0x2000; /* jh */
|
||||
goto branch_xu;
|
||||
case BPF_JMP | BPF_JLT | BPF_X: /* (dst < src) */
|
||||
case BPF_JMP32 | BPF_JLT | BPF_X: /* ((u32) dst < (u32) src) */
|
||||
mask = 0x4000; /* jl */
|
||||
goto branch_xu;
|
||||
case BPF_JMP | BPF_JGE | BPF_X: /* (dst >= src) */
|
||||
case BPF_JMP32 | BPF_JGE | BPF_X: /* ((u32) dst >= (u32) src) */
|
||||
mask = 0xa000; /* jhe */
|
||||
goto branch_xu;
|
||||
case BPF_JMP | BPF_JLE | BPF_X: /* (dst <= src) */
|
||||
case BPF_JMP32 | BPF_JLE | BPF_X: /* ((u32) dst <= (u32) src) */
|
||||
mask = 0xc000; /* jle */
|
||||
goto branch_xu;
|
||||
case BPF_JMP | BPF_JNE | BPF_X: /* (dst != src) */
|
||||
case BPF_JMP32 | BPF_JNE | BPF_X: /* ((u32) dst != (u32) src) */
|
||||
mask = 0x7000; /* jne */
|
||||
goto branch_xu;
|
||||
case BPF_JMP | BPF_JEQ | BPF_X: /* (dst == src) */
|
||||
case BPF_JMP32 | BPF_JEQ | BPF_X: /* ((u32) dst == (u32) src) */
|
||||
mask = 0x8000; /* je */
|
||||
goto branch_xu;
|
||||
case BPF_JMP | BPF_JSET | BPF_X: /* (dst & src) */
|
||||
case BPF_JMP32 | BPF_JSET | BPF_X: /* ((u32) dst & (u32) src) */
|
||||
{
|
||||
bool is_jmp32 = BPF_CLASS(insn->code) == BPF_JMP32;
|
||||
|
||||
mask = 0x7000; /* jnz */
|
||||
/* ngrk %w1,%dst,%src */
|
||||
EMIT4_RRF(0xb9e40000, REG_W1, dst_reg, src_reg);
|
||||
/* nrk or ngrk %w1,%dst,%src */
|
||||
EMIT4_RRF((is_jmp32 ? 0xb9f40000 : 0xb9e40000),
|
||||
REG_W1, dst_reg, src_reg);
|
||||
goto branch_oc;
|
||||
branch_ks:
|
||||
/* lgfi %w1,imm (load sign extend imm) */
|
||||
EMIT6_IMM(0xc0010000, REG_W1, imm);
|
||||
/* cgrj %dst,%w1,mask,off */
|
||||
EMIT6_PCREL(0xec000000, 0x0064, dst_reg, REG_W1, i, off, mask);
|
||||
/* crj or cgrj %dst,%w1,mask,off */
|
||||
EMIT6_PCREL(0xec000000, (is_jmp32 ? 0x0076 : 0x0064),
|
||||
dst_reg, REG_W1, i, off, mask);
|
||||
break;
|
||||
branch_ku:
|
||||
/* lgfi %w1,imm (load sign extend imm) */
|
||||
EMIT6_IMM(0xc0010000, REG_W1, imm);
|
||||
/* clgrj %dst,%w1,mask,off */
|
||||
EMIT6_PCREL(0xec000000, 0x0065, dst_reg, REG_W1, i, off, mask);
|
||||
/* clrj or clgrj %dst,%w1,mask,off */
|
||||
EMIT6_PCREL(0xec000000, (is_jmp32 ? 0x0077 : 0x0065),
|
||||
dst_reg, REG_W1, i, off, mask);
|
||||
break;
|
||||
branch_xs:
|
||||
/* cgrj %dst,%src,mask,off */
|
||||
EMIT6_PCREL(0xec000000, 0x0064, dst_reg, src_reg, i, off, mask);
|
||||
/* crj or cgrj %dst,%src,mask,off */
|
||||
EMIT6_PCREL(0xec000000, (is_jmp32 ? 0x0076 : 0x0064),
|
||||
dst_reg, src_reg, i, off, mask);
|
||||
break;
|
||||
branch_xu:
|
||||
/* clgrj %dst,%src,mask,off */
|
||||
EMIT6_PCREL(0xec000000, 0x0065, dst_reg, src_reg, i, off, mask);
|
||||
/* clrj or clgrj %dst,%src,mask,off */
|
||||
EMIT6_PCREL(0xec000000, (is_jmp32 ? 0x0077 : 0x0065),
|
||||
dst_reg, src_reg, i, off, mask);
|
||||
break;
|
||||
branch_oc:
|
||||
/* brc mask,jmp_off (branch instruction needs 4 bytes) */
|
||||
jmp_off = addrs[i + off + 1] - (addrs[i + 1] - 4);
|
||||
EMIT4_PCREL(0xa7040000 | mask << 8, jmp_off);
|
||||
break;
|
||||
}
|
||||
default: /* too complex, give up */
|
||||
pr_err("Unknown opcode %02x\n", insn->code);
|
||||
return -1;
|
||||
|
|
|
@ -198,7 +198,7 @@ config X86
|
|||
select IRQ_FORCED_THREADING
|
||||
select NEED_SG_DMA_LENGTH
|
||||
select PCI_DOMAINS if PCI
|
||||
select PCI_LOCKLESS_CONFIG
|
||||
select PCI_LOCKLESS_CONFIG if PCI
|
||||
select PERF_EVENTS
|
||||
select RTC_LIB
|
||||
select RTC_MC146818_LIB
|
||||
|
@ -617,7 +617,7 @@ config X86_INTEL_QUARK
|
|||
|
||||
config X86_INTEL_LPSS
|
||||
bool "Intel Low Power Subsystem Support"
|
||||
depends on X86 && ACPI
|
||||
depends on X86 && ACPI && PCI
|
||||
select COMMON_CLK
|
||||
select PINCTRL
|
||||
select IOSF_MBI
|
||||
|
|
|
@ -361,7 +361,8 @@ ENTRY(entry_INT80_compat)
|
|||
|
||||
/* Need to switch before accessing the thread stack. */
|
||||
SWITCH_TO_KERNEL_CR3 scratch_reg=%rdi
|
||||
movq %rsp, %rdi
|
||||
/* In the Xen PV case we already run on the thread stack. */
|
||||
ALTERNATIVE "movq %rsp, %rdi", "jmp .Lint80_keep_stack", X86_FEATURE_XENPV
|
||||
movq PER_CPU_VAR(cpu_current_top_of_stack), %rsp
|
||||
|
||||
pushq 6*8(%rdi) /* regs->ss */
|
||||
|
@ -370,8 +371,9 @@ ENTRY(entry_INT80_compat)
|
|||
pushq 3*8(%rdi) /* regs->cs */
|
||||
pushq 2*8(%rdi) /* regs->ip */
|
||||
pushq 1*8(%rdi) /* regs->orig_ax */
|
||||
|
||||
pushq (%rdi) /* pt_regs->di */
|
||||
.Lint80_keep_stack:
|
||||
|
||||
pushq %rsi /* pt_regs->si */
|
||||
xorl %esi, %esi /* nospec si */
|
||||
pushq %rdx /* pt_regs->dx */
|
||||
|
|
|
@ -178,6 +178,10 @@ static inline void switch_ldt(struct mm_struct *prev, struct mm_struct *next)
|
|||
|
||||
void enter_lazy_tlb(struct mm_struct *mm, struct task_struct *tsk);
|
||||
|
||||
/*
|
||||
* Init a new mm. Used on mm copies, like at fork()
|
||||
* and on mm's that are brand-new, like at execve().
|
||||
*/
|
||||
static inline int init_new_context(struct task_struct *tsk,
|
||||
struct mm_struct *mm)
|
||||
{
|
||||
|
@ -228,8 +232,22 @@ do { \
|
|||
} while (0)
|
||||
#endif
|
||||
|
||||
static inline void arch_dup_pkeys(struct mm_struct *oldmm,
|
||||
struct mm_struct *mm)
|
||||
{
|
||||
#ifdef CONFIG_X86_INTEL_MEMORY_PROTECTION_KEYS
|
||||
if (!cpu_feature_enabled(X86_FEATURE_OSPKE))
|
||||
return;
|
||||
|
||||
/* Duplicate the oldmm pkey state in mm: */
|
||||
mm->context.pkey_allocation_map = oldmm->context.pkey_allocation_map;
|
||||
mm->context.execute_only_pkey = oldmm->context.execute_only_pkey;
|
||||
#endif
|
||||
}
|
||||
|
||||
static inline int arch_dup_mmap(struct mm_struct *oldmm, struct mm_struct *mm)
|
||||
{
|
||||
arch_dup_pkeys(oldmm, mm);
|
||||
paravirt_arch_dup_mmap(oldmm, mm);
|
||||
return ldt_dup_context(oldmm, mm);
|
||||
}
|
||||
|
|
|
@ -711,7 +711,7 @@ static __must_check inline bool user_access_begin(const void __user *ptr, size_t
|
|||
{
|
||||
if (unlikely(!access_ok(ptr,len)))
|
||||
return 0;
|
||||
__uaccess_begin();
|
||||
__uaccess_begin_nospec();
|
||||
return 1;
|
||||
}
|
||||
#define user_access_begin(a,b) user_access_begin(a,b)
|
||||
|
|
|
@ -470,6 +470,7 @@ int crash_load_segments(struct kimage *image)
|
|||
|
||||
kbuf.memsz = kbuf.bufsz;
|
||||
kbuf.buf_align = ELF_CORE_HEADER_ALIGN;
|
||||
kbuf.mem = KEXEC_BUF_MEM_UNKNOWN;
|
||||
ret = kexec_add_buffer(&kbuf);
|
||||
if (ret) {
|
||||
vfree((void *)image->arch.elf_headers);
|
||||
|
|
|
@ -21,10 +21,6 @@
|
|||
|
||||
#define HPET_MASK CLOCKSOURCE_MASK(32)
|
||||
|
||||
/* FSEC = 10^-15
|
||||
NSEC = 10^-9 */
|
||||
#define FSEC_PER_NSEC 1000000L
|
||||
|
||||
#define HPET_DEV_USED_BIT 2
|
||||
#define HPET_DEV_USED (1 << HPET_DEV_USED_BIT)
|
||||
#define HPET_DEV_VALID 0x8
|
||||
|
|
|
@ -434,6 +434,7 @@ static void *bzImage64_load(struct kimage *image, char *kernel,
|
|||
kbuf.memsz = PAGE_ALIGN(header->init_size);
|
||||
kbuf.buf_align = header->kernel_alignment;
|
||||
kbuf.buf_min = MIN_KERNEL_LOAD_ADDR;
|
||||
kbuf.mem = KEXEC_BUF_MEM_UNKNOWN;
|
||||
ret = kexec_add_buffer(&kbuf);
|
||||
if (ret)
|
||||
goto out_free_params;
|
||||
|
@ -448,6 +449,7 @@ static void *bzImage64_load(struct kimage *image, char *kernel,
|
|||
kbuf.bufsz = kbuf.memsz = initrd_len;
|
||||
kbuf.buf_align = PAGE_SIZE;
|
||||
kbuf.buf_min = MIN_INITRD_LOAD_ADDR;
|
||||
kbuf.mem = KEXEC_BUF_MEM_UNKNOWN;
|
||||
ret = kexec_add_buffer(&kbuf);
|
||||
if (ret)
|
||||
goto out_free_params;
|
||||
|
|
|
@ -457,6 +457,7 @@ static void __send_ipi_mask(const struct cpumask *mask, int vector)
|
|||
#else
|
||||
u64 ipi_bitmap = 0;
|
||||
#endif
|
||||
long ret;
|
||||
|
||||
if (cpumask_empty(mask))
|
||||
return;
|
||||
|
@ -482,8 +483,9 @@ static void __send_ipi_mask(const struct cpumask *mask, int vector)
|
|||
} else if (apic_id < min + KVM_IPI_CLUSTER_SIZE) {
|
||||
max = apic_id < max ? max : apic_id;
|
||||
} else {
|
||||
kvm_hypercall4(KVM_HC_SEND_IPI, (unsigned long)ipi_bitmap,
|
||||
ret = kvm_hypercall4(KVM_HC_SEND_IPI, (unsigned long)ipi_bitmap,
|
||||
(unsigned long)(ipi_bitmap >> BITS_PER_LONG), min, icr);
|
||||
WARN_ONCE(ret < 0, "KVM: failed to send PV IPI: %ld", ret);
|
||||
min = max = apic_id;
|
||||
ipi_bitmap = 0;
|
||||
}
|
||||
|
@ -491,8 +493,9 @@ static void __send_ipi_mask(const struct cpumask *mask, int vector)
|
|||
}
|
||||
|
||||
if (ipi_bitmap) {
|
||||
kvm_hypercall4(KVM_HC_SEND_IPI, (unsigned long)ipi_bitmap,
|
||||
ret = kvm_hypercall4(KVM_HC_SEND_IPI, (unsigned long)ipi_bitmap,
|
||||
(unsigned long)(ipi_bitmap >> BITS_PER_LONG), min, icr);
|
||||
WARN_ONCE(ret < 0, "KVM: failed to send PV IPI: %ld", ret);
|
||||
}
|
||||
|
||||
local_irq_restore(flags);
|
||||
|
|
|
@ -297,15 +297,16 @@ static int __init tsc_setup(char *str)
|
|||
|
||||
__setup("tsc=", tsc_setup);
|
||||
|
||||
#define MAX_RETRIES 5
|
||||
#define SMI_TRESHOLD 50000
|
||||
#define MAX_RETRIES 5
|
||||
#define TSC_DEFAULT_THRESHOLD 0x20000
|
||||
|
||||
/*
|
||||
* Read TSC and the reference counters. Take care of SMI disturbance
|
||||
* Read TSC and the reference counters. Take care of any disturbances
|
||||
*/
|
||||
static u64 tsc_read_refs(u64 *p, int hpet)
|
||||
{
|
||||
u64 t1, t2;
|
||||
u64 thresh = tsc_khz ? tsc_khz >> 5 : TSC_DEFAULT_THRESHOLD;
|
||||
int i;
|
||||
|
||||
for (i = 0; i < MAX_RETRIES; i++) {
|
||||
|
@ -315,7 +316,7 @@ static u64 tsc_read_refs(u64 *p, int hpet)
|
|||
else
|
||||
*p = acpi_pm_read_early();
|
||||
t2 = get_cycles();
|
||||
if ((t2 - t1) < SMI_TRESHOLD)
|
||||
if ((t2 - t1) < thresh)
|
||||
return t2;
|
||||
}
|
||||
return ULLONG_MAX;
|
||||
|
@ -703,15 +704,15 @@ static unsigned long pit_hpet_ptimer_calibrate_cpu(void)
|
|||
* zero. In each wait loop iteration we read the TSC and check
|
||||
* the delta to the previous read. We keep track of the min
|
||||
* and max values of that delta. The delta is mostly defined
|
||||
* by the IO time of the PIT access, so we can detect when a
|
||||
* SMI/SMM disturbance happened between the two reads. If the
|
||||
* by the IO time of the PIT access, so we can detect when
|
||||
* any disturbance happened between the two reads. If the
|
||||
* maximum time is significantly larger than the minimum time,
|
||||
* then we discard the result and have another try.
|
||||
*
|
||||
* 2) Reference counter. If available we use the HPET or the
|
||||
* PMTIMER as a reference to check the sanity of that value.
|
||||
* We use separate TSC readouts and check inside of the
|
||||
* reference read for a SMI/SMM disturbance. We dicard
|
||||
* reference read for any possible disturbance. We dicard
|
||||
* disturbed values here as well. We do that around the PIT
|
||||
* calibration delay loop as we have to wait for a certain
|
||||
* amount of time anyway.
|
||||
|
@ -744,7 +745,7 @@ static unsigned long pit_hpet_ptimer_calibrate_cpu(void)
|
|||
if (ref1 == ref2)
|
||||
continue;
|
||||
|
||||
/* Check, whether the sampling was disturbed by an SMI */
|
||||
/* Check, whether the sampling was disturbed */
|
||||
if (tsc1 == ULLONG_MAX || tsc2 == ULLONG_MAX)
|
||||
continue;
|
||||
|
||||
|
@ -1268,7 +1269,7 @@ static DECLARE_DELAYED_WORK(tsc_irqwork, tsc_refine_calibration_work);
|
|||
*/
|
||||
static void tsc_refine_calibration_work(struct work_struct *work)
|
||||
{
|
||||
static u64 tsc_start = -1, ref_start;
|
||||
static u64 tsc_start = ULLONG_MAX, ref_start;
|
||||
static int hpet;
|
||||
u64 tsc_stop, ref_stop, delta;
|
||||
unsigned long freq;
|
||||
|
@ -1283,14 +1284,15 @@ static void tsc_refine_calibration_work(struct work_struct *work)
|
|||
* delayed the first time we expire. So set the workqueue
|
||||
* again once we know timers are working.
|
||||
*/
|
||||
if (tsc_start == -1) {
|
||||
if (tsc_start == ULLONG_MAX) {
|
||||
restart:
|
||||
/*
|
||||
* Only set hpet once, to avoid mixing hardware
|
||||
* if the hpet becomes enabled later.
|
||||
*/
|
||||
hpet = is_hpet_enabled();
|
||||
schedule_delayed_work(&tsc_irqwork, HZ);
|
||||
tsc_start = tsc_read_refs(&ref_start, hpet);
|
||||
schedule_delayed_work(&tsc_irqwork, HZ);
|
||||
return;
|
||||
}
|
||||
|
||||
|
@ -1300,9 +1302,9 @@ static void tsc_refine_calibration_work(struct work_struct *work)
|
|||
if (ref_start == ref_stop)
|
||||
goto out;
|
||||
|
||||
/* Check, whether the sampling was disturbed by an SMI */
|
||||
if (tsc_start == ULLONG_MAX || tsc_stop == ULLONG_MAX)
|
||||
goto out;
|
||||
/* Check, whether the sampling was disturbed */
|
||||
if (tsc_stop == ULLONG_MAX)
|
||||
goto restart;
|
||||
|
||||
delta = tsc_stop - tsc_start;
|
||||
delta *= 1000000LL;
|
||||
|
|
|
@ -2,10 +2,6 @@
|
|||
|
||||
ccflags-y += -Iarch/x86/kvm
|
||||
|
||||
CFLAGS_x86.o := -I.
|
||||
CFLAGS_svm.o := -I.
|
||||
CFLAGS_vmx.o := -I.
|
||||
|
||||
KVM := ../../../virt/kvm
|
||||
|
||||
kvm-y += $(KVM)/kvm_main.o $(KVM)/coalesced_mmio.o \
|
||||
|
|
|
@ -1636,7 +1636,7 @@ int kvm_hv_hypercall(struct kvm_vcpu *vcpu)
|
|||
ret = kvm_hvcall_signal_event(vcpu, fast, ingpa);
|
||||
if (ret != HV_STATUS_INVALID_PORT_ID)
|
||||
break;
|
||||
/* maybe userspace knows this conn_id: fall through */
|
||||
/* fall through - maybe userspace knows this conn_id. */
|
||||
case HVCALL_POST_MESSAGE:
|
||||
/* don't bother userspace if it has no way to handle it */
|
||||
if (unlikely(rep || !vcpu_to_synic(vcpu)->active)) {
|
||||
|
@ -1832,7 +1832,6 @@ int kvm_vcpu_ioctl_get_hv_cpuid(struct kvm_vcpu *vcpu, struct kvm_cpuid2 *cpuid,
|
|||
ent->eax |= HV_X64_MSR_VP_INDEX_AVAILABLE;
|
||||
ent->eax |= HV_X64_MSR_RESET_AVAILABLE;
|
||||
ent->eax |= HV_MSR_REFERENCE_TSC_AVAILABLE;
|
||||
ent->eax |= HV_X64_MSR_GUEST_IDLE_AVAILABLE;
|
||||
ent->eax |= HV_X64_ACCESS_FREQUENCY_MSRS;
|
||||
ent->eax |= HV_X64_ACCESS_REENLIGHTENMENT;
|
||||
|
||||
|
@ -1848,11 +1847,11 @@ int kvm_vcpu_ioctl_get_hv_cpuid(struct kvm_vcpu *vcpu, struct kvm_cpuid2 *cpuid,
|
|||
case HYPERV_CPUID_ENLIGHTMENT_INFO:
|
||||
ent->eax |= HV_X64_REMOTE_TLB_FLUSH_RECOMMENDED;
|
||||
ent->eax |= HV_X64_APIC_ACCESS_RECOMMENDED;
|
||||
ent->eax |= HV_X64_SYSTEM_RESET_RECOMMENDED;
|
||||
ent->eax |= HV_X64_RELAXED_TIMING_RECOMMENDED;
|
||||
ent->eax |= HV_X64_CLUSTER_IPI_RECOMMENDED;
|
||||
ent->eax |= HV_X64_EX_PROCESSOR_MASKS_RECOMMENDED;
|
||||
ent->eax |= HV_X64_ENLIGHTENED_VMCS_RECOMMENDED;
|
||||
if (evmcs_ver)
|
||||
ent->eax |= HV_X64_ENLIGHTENED_VMCS_RECOMMENDED;
|
||||
|
||||
/*
|
||||
* Default number of spinlock retry attempts, matches
|
||||
|
|
|
@ -1035,6 +1035,7 @@ static int __apic_accept_irq(struct kvm_lapic *apic, int delivery_mode,
|
|||
switch (delivery_mode) {
|
||||
case APIC_DM_LOWEST:
|
||||
vcpu->arch.apic_arb_prio++;
|
||||
/* fall through */
|
||||
case APIC_DM_FIXED:
|
||||
if (unlikely(trig_mode && !level))
|
||||
break;
|
||||
|
@ -1874,6 +1875,7 @@ int kvm_lapic_reg_write(struct kvm_lapic *apic, u32 reg, u32 val)
|
|||
|
||||
case APIC_LVT0:
|
||||
apic_manage_nmi_watchdog(apic, val);
|
||||
/* fall through */
|
||||
case APIC_LVTTHMR:
|
||||
case APIC_LVTPC:
|
||||
case APIC_LVT1:
|
||||
|
|
|
@ -4371,6 +4371,7 @@ __reset_rsvds_bits_mask(struct kvm_vcpu *vcpu,
|
|||
rsvd_bits(maxphyaddr, 51);
|
||||
rsvd_check->rsvd_bits_mask[1][4] =
|
||||
rsvd_check->rsvd_bits_mask[0][4];
|
||||
/* fall through */
|
||||
case PT64_ROOT_4LEVEL:
|
||||
rsvd_check->rsvd_bits_mask[0][3] = exb_bit_rsvd |
|
||||
nonleaf_bit8_rsvd | rsvd_bits(7, 7) |
|
||||
|
|
|
@ -3414,6 +3414,14 @@ static int nested_svm_vmexit(struct vcpu_svm *svm)
|
|||
kvm_mmu_reset_context(&svm->vcpu);
|
||||
kvm_mmu_load(&svm->vcpu);
|
||||
|
||||
/*
|
||||
* Drop what we picked up for L2 via svm_complete_interrupts() so it
|
||||
* doesn't end up in L1.
|
||||
*/
|
||||
svm->vcpu.arch.nmi_injected = false;
|
||||
kvm_clear_exception_queue(&svm->vcpu);
|
||||
kvm_clear_interrupt_queue(&svm->vcpu);
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
|
@ -4395,7 +4403,7 @@ static int svm_set_msr(struct kvm_vcpu *vcpu, struct msr_data *msr)
|
|||
case MSR_IA32_APICBASE:
|
||||
if (kvm_vcpu_apicv_active(vcpu))
|
||||
avic_update_vapic_bar(to_svm(vcpu), data);
|
||||
/* Follow through */
|
||||
/* Fall through */
|
||||
default:
|
||||
return kvm_set_msr_common(vcpu, msr);
|
||||
}
|
||||
|
@ -4504,28 +4512,19 @@ static int avic_incomplete_ipi_interception(struct vcpu_svm *svm)
|
|||
kvm_lapic_reg_write(apic, APIC_ICR, icrl);
|
||||
break;
|
||||
case AVIC_IPI_FAILURE_TARGET_NOT_RUNNING: {
|
||||
int i;
|
||||
struct kvm_vcpu *vcpu;
|
||||
struct kvm *kvm = svm->vcpu.kvm;
|
||||
struct kvm_lapic *apic = svm->vcpu.arch.apic;
|
||||
|
||||
/*
|
||||
* At this point, we expect that the AVIC HW has already
|
||||
* set the appropriate IRR bits on the valid target
|
||||
* vcpus. So, we just need to kick the appropriate vcpu.
|
||||
* Update ICR high and low, then emulate sending IPI,
|
||||
* which is handled when writing APIC_ICR.
|
||||
*/
|
||||
kvm_for_each_vcpu(i, vcpu, kvm) {
|
||||
bool m = kvm_apic_match_dest(vcpu, apic,
|
||||
icrl & KVM_APIC_SHORT_MASK,
|
||||
GET_APIC_DEST_FIELD(icrh),
|
||||
icrl & KVM_APIC_DEST_MASK);
|
||||
|
||||
if (m && !avic_vcpu_is_running(vcpu))
|
||||
kvm_vcpu_wake_up(vcpu);
|
||||
}
|
||||
kvm_lapic_reg_write(apic, APIC_ICR2, icrh);
|
||||
kvm_lapic_reg_write(apic, APIC_ICR, icrl);
|
||||
break;
|
||||
}
|
||||
case AVIC_IPI_FAILURE_INVALID_TARGET:
|
||||
WARN_ONCE(1, "Invalid IPI target: index=%u, vcpu=%d, icr=%#0x:%#0x\n",
|
||||
index, svm->vcpu.vcpu_id, icrh, icrl);
|
||||
break;
|
||||
case AVIC_IPI_FAILURE_INVALID_BACKING_PAGE:
|
||||
WARN_ONCE(1, "Invalid backing page\n");
|
||||
|
|
|
@ -1465,7 +1465,7 @@ TRACE_EVENT(kvm_hv_send_ipi_ex,
|
|||
#endif /* _TRACE_KVM_H */
|
||||
|
||||
#undef TRACE_INCLUDE_PATH
|
||||
#define TRACE_INCLUDE_PATH arch/x86/kvm
|
||||
#define TRACE_INCLUDE_PATH ../../arch/x86/kvm
|
||||
#undef TRACE_INCLUDE_FILE
|
||||
#define TRACE_INCLUDE_FILE trace
|
||||
|
||||
|
|
|
@ -332,16 +332,17 @@ int nested_enable_evmcs(struct kvm_vcpu *vcpu,
|
|||
uint16_t *vmcs_version)
|
||||
{
|
||||
struct vcpu_vmx *vmx = to_vmx(vcpu);
|
||||
bool evmcs_already_enabled = vmx->nested.enlightened_vmcs_enabled;
|
||||
|
||||
vmx->nested.enlightened_vmcs_enabled = true;
|
||||
|
||||
if (vmcs_version)
|
||||
*vmcs_version = nested_get_evmcs_version(vcpu);
|
||||
|
||||
/* We don't support disabling the feature for simplicity. */
|
||||
if (vmx->nested.enlightened_vmcs_enabled)
|
||||
if (evmcs_already_enabled)
|
||||
return 0;
|
||||
|
||||
vmx->nested.enlightened_vmcs_enabled = true;
|
||||
|
||||
vmx->nested.msrs.pinbased_ctls_high &= ~EVMCS1_UNSUPPORTED_PINCTRL;
|
||||
vmx->nested.msrs.entry_ctls_high &= ~EVMCS1_UNSUPPORTED_VMENTRY_CTRL;
|
||||
vmx->nested.msrs.exit_ctls_high &= ~EVMCS1_UNSUPPORTED_VMEXIT_CTRL;
|
||||
|
|
Some files were not shown because too many files have changed in this diff Show More
Loading…
Reference in New Issue