ock: sync codes to ock 5.4.119-20.0009.21
Gitee limit the repo's size to 3GB, to reduce the size of the code, sync codes to ock 5.4.119-20.0009.21 in one commit. Signed-off-by: Jianping Liu <frankjpliu@tencent.com>
This commit is contained in:
parent
be16237b31
commit
c62d6b571d
|
@ -1,2 +1,3 @@
|
|||
*.c diff=cpp
|
||||
*.h diff=cpp
|
||||
dist/ export-ignore
|
||||
|
|
|
@ -146,3 +146,7 @@ x509.genkey
|
|||
|
||||
# Clang's compilation database file
|
||||
/compile_commands.json
|
||||
|
||||
# Tencent dist files
|
||||
/dist/rpm
|
||||
/dist/workdir
|
||||
|
|
|
@ -1566,7 +1566,8 @@ What: /sys/bus/iio/devices/iio:deviceX/in_concentrationX_voc_raw
|
|||
KernelVersion: 4.3
|
||||
Contact: linux-iio@vger.kernel.org
|
||||
Description:
|
||||
Raw (unscaled no offset etc.) percentage reading of a substance.
|
||||
Raw (unscaled no offset etc.) reading of a substance. Units
|
||||
after application of scale and offset are percents.
|
||||
|
||||
What: /sys/bus/iio/devices/iio:deviceX/in_resistance_raw
|
||||
What: /sys/bus/iio/devices/iio:deviceX/in_resistanceX_raw
|
||||
|
|
|
@ -196,6 +196,12 @@ Description:
|
|||
does not reflect it. Likewise, if one enables a deep state but a
|
||||
lighter state still is disabled, then this has no effect.
|
||||
|
||||
What: /sys/devices/system/cpu/cpuX/cpuidle/stateN/default_status
|
||||
Date: December 2019
|
||||
KernelVersion: v5.6
|
||||
Contact: Linux power management list <linux-pm@vger.kernel.org>
|
||||
Description:
|
||||
(RO) The default status of this state, "enabled" or "disabled".
|
||||
|
||||
What: /sys/devices/system/cpu/cpuX/cpuidle/stateN/residency
|
||||
Date: March 2014
|
||||
|
@ -486,6 +492,7 @@ What: /sys/devices/system/cpu/vulnerabilities
|
|||
/sys/devices/system/cpu/vulnerabilities/spec_store_bypass
|
||||
/sys/devices/system/cpu/vulnerabilities/l1tf
|
||||
/sys/devices/system/cpu/vulnerabilities/mds
|
||||
/sys/devices/system/cpu/vulnerabilities/srbds
|
||||
/sys/devices/system/cpu/vulnerabilities/tsx_async_abort
|
||||
/sys/devices/system/cpu/vulnerabilities/itlb_multihit
|
||||
Date: January 2018
|
||||
|
|
|
@ -177,6 +177,12 @@ bitmap_flush_interval:number
|
|||
The bitmap flush interval in milliseconds. The metadata buffers
|
||||
are synchronized when this interval expires.
|
||||
|
||||
legacy_recalculate
|
||||
Allow recalculating of volumes with HMAC keys. This is disabled by
|
||||
default for security reasons - an attacker could modify the volume,
|
||||
set recalc_sector to zero, and the kernel would not detect the
|
||||
modification.
|
||||
|
||||
|
||||
The journal mode (D/J), buffer_sectors, journal_watermark, commit_time can
|
||||
be changed when reloading the target (load an inactive table and swap the
|
||||
|
|
|
@ -14,3 +14,4 @@ are configurable at compile, boot or run time.
|
|||
mds
|
||||
tsx_async_abort
|
||||
multihit.rst
|
||||
special-register-buffer-data-sampling.rst
|
||||
|
|
|
@ -0,0 +1,149 @@
|
|||
.. SPDX-License-Identifier: GPL-2.0
|
||||
|
||||
SRBDS - Special Register Buffer Data Sampling
|
||||
=============================================
|
||||
|
||||
SRBDS is a hardware vulnerability that allows MDS :doc:`mds` techniques to
|
||||
infer values returned from special register accesses. Special register
|
||||
accesses are accesses to off core registers. According to Intel's evaluation,
|
||||
the special register reads that have a security expectation of privacy are
|
||||
RDRAND, RDSEED and SGX EGETKEY.
|
||||
|
||||
When RDRAND, RDSEED and EGETKEY instructions are used, the data is moved
|
||||
to the core through the special register mechanism that is susceptible
|
||||
to MDS attacks.
|
||||
|
||||
Affected processors
|
||||
--------------------
|
||||
Core models (desktop, mobile, Xeon-E3) that implement RDRAND and/or RDSEED may
|
||||
be affected.
|
||||
|
||||
A processor is affected by SRBDS if its Family_Model and stepping is
|
||||
in the following list, with the exception of the listed processors
|
||||
exporting MDS_NO while Intel TSX is available yet not enabled. The
|
||||
latter class of processors are only affected when Intel TSX is enabled
|
||||
by software using TSX_CTRL_MSR otherwise they are not affected.
|
||||
|
||||
============= ============ ========
|
||||
common name Family_Model Stepping
|
||||
============= ============ ========
|
||||
IvyBridge 06_3AH All
|
||||
|
||||
Haswell 06_3CH All
|
||||
Haswell_L 06_45H All
|
||||
Haswell_G 06_46H All
|
||||
|
||||
Broadwell_G 06_47H All
|
||||
Broadwell 06_3DH All
|
||||
|
||||
Skylake_L 06_4EH All
|
||||
Skylake 06_5EH All
|
||||
|
||||
Kabylake_L 06_8EH <= 0xC
|
||||
Kabylake 06_9EH <= 0xD
|
||||
============= ============ ========
|
||||
|
||||
Related CVEs
|
||||
------------
|
||||
|
||||
The following CVE entry is related to this SRBDS issue:
|
||||
|
||||
============== ===== =====================================
|
||||
CVE-2020-0543 SRBDS Special Register Buffer Data Sampling
|
||||
============== ===== =====================================
|
||||
|
||||
Attack scenarios
|
||||
----------------
|
||||
An unprivileged user can extract values returned from RDRAND and RDSEED
|
||||
executed on another core or sibling thread using MDS techniques.
|
||||
|
||||
|
||||
Mitigation mechanism
|
||||
-------------------
|
||||
Intel will release microcode updates that modify the RDRAND, RDSEED, and
|
||||
EGETKEY instructions to overwrite secret special register data in the shared
|
||||
staging buffer before the secret data can be accessed by another logical
|
||||
processor.
|
||||
|
||||
During execution of the RDRAND, RDSEED, or EGETKEY instructions, off-core
|
||||
accesses from other logical processors will be delayed until the special
|
||||
register read is complete and the secret data in the shared staging buffer is
|
||||
overwritten.
|
||||
|
||||
This has three effects on performance:
|
||||
|
||||
#. RDRAND, RDSEED, or EGETKEY instructions have higher latency.
|
||||
|
||||
#. Executing RDRAND at the same time on multiple logical processors will be
|
||||
serialized, resulting in an overall reduction in the maximum RDRAND
|
||||
bandwidth.
|
||||
|
||||
#. Executing RDRAND, RDSEED or EGETKEY will delay memory accesses from other
|
||||
logical processors that miss their core caches, with an impact similar to
|
||||
legacy locked cache-line-split accesses.
|
||||
|
||||
The microcode updates provide an opt-out mechanism (RNGDS_MITG_DIS) to disable
|
||||
the mitigation for RDRAND and RDSEED instructions executed outside of Intel
|
||||
Software Guard Extensions (Intel SGX) enclaves. On logical processors that
|
||||
disable the mitigation using this opt-out mechanism, RDRAND and RDSEED do not
|
||||
take longer to execute and do not impact performance of sibling logical
|
||||
processors memory accesses. The opt-out mechanism does not affect Intel SGX
|
||||
enclaves (including execution of RDRAND or RDSEED inside an enclave, as well
|
||||
as EGETKEY execution).
|
||||
|
||||
IA32_MCU_OPT_CTRL MSR Definition
|
||||
--------------------------------
|
||||
Along with the mitigation for this issue, Intel added a new thread-scope
|
||||
IA32_MCU_OPT_CTRL MSR, (address 0x123). The presence of this MSR and
|
||||
RNGDS_MITG_DIS (bit 0) is enumerated by CPUID.(EAX=07H,ECX=0).EDX[SRBDS_CTRL =
|
||||
9]==1. This MSR is introduced through the microcode update.
|
||||
|
||||
Setting IA32_MCU_OPT_CTRL[0] (RNGDS_MITG_DIS) to 1 for a logical processor
|
||||
disables the mitigation for RDRAND and RDSEED executed outside of an Intel SGX
|
||||
enclave on that logical processor. Opting out of the mitigation for a
|
||||
particular logical processor does not affect the RDRAND and RDSEED mitigations
|
||||
for other logical processors.
|
||||
|
||||
Note that inside of an Intel SGX enclave, the mitigation is applied regardless
|
||||
of the value of RNGDS_MITG_DS.
|
||||
|
||||
Mitigation control on the kernel command line
|
||||
---------------------------------------------
|
||||
The kernel command line allows control over the SRBDS mitigation at boot time
|
||||
with the option "srbds=". The option for this is:
|
||||
|
||||
============= =============================================================
|
||||
off This option disables SRBDS mitigation for RDRAND and RDSEED on
|
||||
affected platforms.
|
||||
============= =============================================================
|
||||
|
||||
SRBDS System Information
|
||||
-----------------------
|
||||
The Linux kernel provides vulnerability status information through sysfs. For
|
||||
SRBDS this can be accessed by the following sysfs file:
|
||||
/sys/devices/system/cpu/vulnerabilities/srbds
|
||||
|
||||
The possible values contained in this file are:
|
||||
|
||||
============================== =============================================
|
||||
Not affected Processor not vulnerable
|
||||
Vulnerable Processor vulnerable and mitigation disabled
|
||||
Vulnerable: No microcode Processor vulnerable and microcode is missing
|
||||
mitigation
|
||||
Mitigation: Microcode Processor is vulnerable and mitigation is in
|
||||
effect.
|
||||
Mitigation: TSX disabled Processor is only vulnerable when TSX is
|
||||
enabled while this system was booted with TSX
|
||||
disabled.
|
||||
Unknown: Dependent on
|
||||
hypervisor status Running on virtual guest processor that is
|
||||
affected but with no way to know if host
|
||||
processor is mitigated or vulnerable.
|
||||
============================== =============================================
|
||||
|
||||
SRBDS Default mitigation
|
||||
------------------------
|
||||
This new microcode serializes processor access during execution of RDRAND,
|
||||
RDSEED ensures that the shared buffer is overwritten before it is released for
|
||||
reuse. Use the "srbds=off" kernel command line to disable the mitigation for
|
||||
RDRAND and RDSEED.
|
|
@ -99,7 +99,7 @@ Field 10 -- # of milliseconds spent doing I/Os
|
|||
|
||||
Since 5.0 this field counts jiffies when at least one request was
|
||||
started or completed. If request runs more than 2 jiffies then some
|
||||
I/O time will not be accounted unless there are other requests.
|
||||
I/O time might be not accounted in case of concurrent requests.
|
||||
|
||||
Field 11 -- weighted # of milliseconds spent doing I/Os
|
||||
This field is incremented at each I/O start, I/O completion, I/O
|
||||
|
@ -133,6 +133,9 @@ are summed (possibly overflowing the unsigned long variable they are
|
|||
summed to) and the result given to the user. There is no convenient
|
||||
user interface for accessing the per-CPU counters themselves.
|
||||
|
||||
Since 4.19 request times are measured with nanoseconds precision and
|
||||
truncated to milliseconds before showing in this interface.
|
||||
|
||||
Disks vs Partitions
|
||||
-------------------
|
||||
|
||||
|
|
|
@ -567,7 +567,7 @@
|
|||
loops can be debugged more effectively on production
|
||||
systems.
|
||||
|
||||
clearcpuid=BITNUM [X86]
|
||||
clearcpuid=BITNUM[,BITNUM...] [X86]
|
||||
Disable CPUID feature X for the kernel. See
|
||||
arch/x86/include/asm/cpufeatures.h for the valid bit
|
||||
numbers. Note the Linux specific bits are not necessarily
|
||||
|
@ -2667,6 +2667,8 @@
|
|||
mds=off [X86]
|
||||
tsx_async_abort=off [X86]
|
||||
kvm.nx_huge_pages=off [X86]
|
||||
no_entry_flush [PPC]
|
||||
no_uaccess_flush [PPC]
|
||||
|
||||
Exceptions:
|
||||
This does not have any effect on
|
||||
|
@ -2741,7 +2743,7 @@
|
|||
<name>,<region-number>[,<base>,<size>,<buswidth>,<altbuswidth>]
|
||||
|
||||
mtdparts= [MTD]
|
||||
See drivers/mtd/cmdlinepart.c.
|
||||
See drivers/mtd/parsers/cmdlinepart.c
|
||||
|
||||
multitce=off [PPC] This parameter disables the use of the pSeries
|
||||
firmware feature for updating multiple TCE entries
|
||||
|
@ -2936,6 +2938,8 @@
|
|||
no5lvl [X86-64] Disable 5-level paging mode. Forces
|
||||
kernel to use 4-level paging instead.
|
||||
|
||||
nofsgsbase [X86] Disables FSGSBASE instructions.
|
||||
|
||||
no_console_suspend
|
||||
[HW] Never suspend the console
|
||||
Disable suspending of consoles during suspend and
|
||||
|
@ -2989,6 +2993,8 @@
|
|||
|
||||
noefi Disable EFI runtime services support.
|
||||
|
||||
no_entry_flush [PPC] Don't flush the L1-D cache when entering the kernel.
|
||||
|
||||
noexec [IA-64]
|
||||
|
||||
noexec [X86]
|
||||
|
@ -3038,6 +3044,9 @@
|
|||
nospec_store_bypass_disable
|
||||
[HW] Disable all mitigations for the Speculative Store Bypass vulnerability
|
||||
|
||||
no_uaccess_flush
|
||||
[PPC] Don't flush the L1-D cache after accessing user data.
|
||||
|
||||
noxsave [BUGS=X86] Disables x86 extended register state save
|
||||
and restore using xsave. The kernel will fallback to
|
||||
enabling legacy floating-point and sse state.
|
||||
|
@ -3171,6 +3180,8 @@
|
|||
|
||||
nosep [BUGS=X86-32] Disables x86 SYSENTER/SYSEXIT support.
|
||||
|
||||
nosgx [X86-64,SGX] Disables Intel SGX kernel support.
|
||||
|
||||
nosmp [SMP] Tells an SMP kernel to act as a UP kernel,
|
||||
and disable the IO APIC. legacy for "maxcpus=0".
|
||||
|
||||
|
@ -3577,6 +3588,8 @@
|
|||
even if the platform doesn't give the OS permission to
|
||||
use them. This may cause conflicts if the platform
|
||||
also tries to use these services.
|
||||
dpc-native Use native PCIe service for DPC only. May
|
||||
cause conflicts if firmware uses AER or DPC.
|
||||
compat Disable native PCIe services (PME, AER, DPC, PCIe
|
||||
hotplug).
|
||||
|
||||
|
@ -3771,6 +3784,14 @@
|
|||
[KNL] Number of legacy pty's. Overwrites compiled-in
|
||||
default number.
|
||||
|
||||
qspinlock.numa_spinlock_threshold_ns= [NUMA, PV_OPS]
|
||||
Set the time threshold in nanoseconds for the
|
||||
number of intra-node lock hand-offs before the
|
||||
NUMA-aware spinlock is forced to be passed to
|
||||
a thread on another NUMA node. Smaller values
|
||||
result in a more fair, but less performant spinlock,
|
||||
and vice versa. The default value is 1000000 (=1ms).
|
||||
|
||||
quiet [KNL] Disable most log messages
|
||||
|
||||
r128= [HW,DRM]
|
||||
|
@ -4579,6 +4600,26 @@
|
|||
spia_pedr=
|
||||
spia_peddr=
|
||||
|
||||
srbds= [X86,INTEL]
|
||||
Control the Special Register Buffer Data Sampling
|
||||
(SRBDS) mitigation.
|
||||
|
||||
Certain CPUs are vulnerable to an MDS-like
|
||||
exploit which can leak bits from the random
|
||||
number generator.
|
||||
|
||||
By default, this issue is mitigated by
|
||||
microcode. However, the microcode fix can cause
|
||||
the RDRAND and RDSEED instructions to become
|
||||
much slower. Among other effects, this will
|
||||
result in reduced throughput from /dev/urandom.
|
||||
|
||||
The microcode mitigation can be disabled with
|
||||
the following option:
|
||||
|
||||
off: Disable mitigation and remove
|
||||
performance impact to RDRAND and RDSEED
|
||||
|
||||
srcutree.counter_wrap_check [KNL]
|
||||
Specifies how frequently to check for
|
||||
grace-period sequence counter wrap for the
|
||||
|
@ -5005,8 +5046,7 @@
|
|||
|
||||
usbcore.old_scheme_first=
|
||||
[USB] Start with the old device initialization
|
||||
scheme, applies only to low and full-speed devices
|
||||
(default 0 = off).
|
||||
scheme (default 0 = off).
|
||||
|
||||
usbcore.usbfs_memory_mb=
|
||||
[USB] Memory limit (in MB) for buffers allocated by
|
||||
|
@ -5125,6 +5165,7 @@
|
|||
device);
|
||||
j = NO_REPORT_LUNS (don't use report luns
|
||||
command, uas only);
|
||||
k = NO_SAME (do not use WRITE_SAME, uas only)
|
||||
l = NOT_LOCKABLE (don't try to lock and
|
||||
unlock ejectable media, not on uas);
|
||||
m = MAX_SECTORS_64 (don't transfer more
|
||||
|
@ -5425,6 +5466,10 @@
|
|||
This option is obsoleted by the "nopv" option, which
|
||||
has equivalent effect for XEN platform.
|
||||
|
||||
xen_no_vector_callback
|
||||
[KNL,X86,XEN] Disable the vector callback for Xen
|
||||
event channel interrupts.
|
||||
|
||||
xen_scrub_pages= [XEN]
|
||||
Boolean option to control scrubbing pages before giving them back
|
||||
to Xen, for use by other domains. Can be also changed at runtime
|
||||
|
@ -5443,6 +5488,18 @@
|
|||
as generic guest with no PV drivers. Currently support
|
||||
XEN HVM, KVM, HYPER_V and VMWARE guest.
|
||||
|
||||
xen.event_eoi_delay= [XEN]
|
||||
How long to delay EOI handling in case of event
|
||||
storms (jiffies). Default is 10.
|
||||
|
||||
xen.event_loop_timeout= [XEN]
|
||||
After which time (jiffies) the event handling loop
|
||||
should start to delay EOI handling. Default is 2.
|
||||
nopvspin [X86,XEN,KVM]
|
||||
Disables the qspinlock slow path using PV optimizations
|
||||
which allow the hypervisor to 'idle' the guest on lock
|
||||
contention.
|
||||
|
||||
xirc2ps_cs= [NET,PCMCIA]
|
||||
Format:
|
||||
<irq>,<irq_mask>,<io>,<full_duplex>,<do_sound>,<lockup_hack>[,<irq2>[,<irq3>[,<irq4>]]]
|
||||
|
|
|
@ -506,6 +506,9 @@ object corresponding to it, as follows:
|
|||
``disable``
|
||||
Whether or not this idle state is disabled.
|
||||
|
||||
``default_status``
|
||||
The default status of this state, "enabled" or "disabled".
|
||||
|
||||
``latency``
|
||||
Exit latency of the idle state in microseconds.
|
||||
|
||||
|
|
|
@ -0,0 +1,246 @@
|
|||
.. SPDX-License-Identifier: GPL-2.0
|
||||
.. include:: <isonum.txt>
|
||||
|
||||
==============================================
|
||||
``intel_idle`` CPU Idle Time Management Driver
|
||||
==============================================
|
||||
|
||||
:Copyright: |copy| 2020 Intel Corporation
|
||||
|
||||
:Author: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
|
||||
|
||||
|
||||
General Information
|
||||
===================
|
||||
|
||||
``intel_idle`` is a part of the
|
||||
:doc:`CPU idle time management subsystem <cpuidle>` in the Linux kernel
|
||||
(``CPUIdle``). It is the default CPU idle time management driver for the
|
||||
Nehalem and later generations of Intel processors, but the level of support for
|
||||
a particular processor model in it depends on whether or not it recognizes that
|
||||
processor model and may also depend on information coming from the platform
|
||||
firmware. [To understand ``intel_idle`` it is necessary to know how ``CPUIdle``
|
||||
works in general, so this is the time to get familiar with :doc:`cpuidle` if you
|
||||
have not done that yet.]
|
||||
|
||||
``intel_idle`` uses the ``MWAIT`` instruction to inform the processor that the
|
||||
logical CPU executing it is idle and so it may be possible to put some of the
|
||||
processor's functional blocks into low-power states. That instruction takes two
|
||||
arguments (passed in the ``EAX`` and ``ECX`` registers of the target CPU), the
|
||||
first of which, referred to as a *hint*, can be used by the processor to
|
||||
determine what can be done (for details refer to Intel Software Developer’s
|
||||
Manual [1]_). Accordingly, ``intel_idle`` refuses to work with processors in
|
||||
which the support for the ``MWAIT`` instruction has been disabled (for example,
|
||||
via the platform firmware configuration menu) or which do not support that
|
||||
instruction at all.
|
||||
|
||||
``intel_idle`` is not modular, so it cannot be unloaded, which means that the
|
||||
only way to pass early-configuration-time parameters to it is via the kernel
|
||||
command line.
|
||||
|
||||
|
||||
.. _intel-idle-enumeration-of-states:
|
||||
|
||||
Enumeration of Idle States
|
||||
==========================
|
||||
|
||||
Each ``MWAIT`` hint value is interpreted by the processor as a license to
|
||||
reconfigure itself in a certain way in order to save energy. The processor
|
||||
configurations (with reduced power draw) resulting from that are referred to
|
||||
as C-states (in the ACPI terminology) or idle states. The list of meaningful
|
||||
``MWAIT`` hint values and idle states (i.e. low-power configurations of the
|
||||
processor) corresponding to them depends on the processor model and it may also
|
||||
depend on the configuration of the platform.
|
||||
|
||||
In order to create a list of available idle states required by the ``CPUIdle``
|
||||
subsystem (see :ref:`idle-states-representation` in :doc:`cpuidle`),
|
||||
``intel_idle`` can use two sources of information: static tables of idle states
|
||||
for different processor models included in the driver itself and the ACPI tables
|
||||
of the system. The former are always used if the processor model at hand is
|
||||
recognized by ``intel_idle`` and the latter are used if that is required for
|
||||
the given processor model (which is the case for all server processor models
|
||||
recognized by ``intel_idle``) or if the processor model is not recognized.
|
||||
|
||||
If the ACPI tables are going to be used for building the list of available idle
|
||||
states, ``intel_idle`` first looks for a ``_CST`` object under one of the ACPI
|
||||
objects corresponding to the CPUs in the system (refer to the ACPI specification
|
||||
[2]_ for the description of ``_CST`` and its output package). Because the
|
||||
``CPUIdle`` subsystem expects that the list of idle states supplied by the
|
||||
driver will be suitable for all of the CPUs handled by it and ``intel_idle`` is
|
||||
registered as the ``CPUIdle`` driver for all of the CPUs in the system, the
|
||||
driver looks for the first ``_CST`` object returning at least one valid idle
|
||||
state description and such that all of the idle states included in its return
|
||||
package are of the FFH (Functional Fixed Hardware) type, which means that the
|
||||
``MWAIT`` instruction is expected to be used to tell the processor that it can
|
||||
enter one of them. The return package of that ``_CST`` is then assumed to be
|
||||
applicable to all of the other CPUs in the system and the idle state
|
||||
descriptions extracted from it are stored in a preliminary list of idle states
|
||||
coming from the ACPI tables. [This step is skipped if ``intel_idle`` is
|
||||
configured to ignore the ACPI tables; see `below <intel-idle-parameters_>`_.]
|
||||
|
||||
Next, the first (index 0) entry in the list of available idle states is
|
||||
initialized to represent a "polling idle state" (a pseudo-idle state in which
|
||||
the target CPU continuously fetches and executes instructions), and the
|
||||
subsequent (real) idle state entries are populated as follows.
|
||||
|
||||
If the processor model at hand is recognized by ``intel_idle``, there is a
|
||||
(static) table of idle state descriptions for it in the driver. In that case,
|
||||
the "internal" table is the primary source of information on idle states and the
|
||||
information from it is copied to the final list of available idle states. If
|
||||
using the ACPI tables for the enumeration of idle states is not required
|
||||
(depending on the processor model), all of the listed idle state are enabled by
|
||||
default (so all of them will be taken into consideration by ``CPUIdle``
|
||||
governors during CPU idle state selection). Otherwise, some of the listed idle
|
||||
states may not be enabled by default if there are no matching entries in the
|
||||
preliminary list of idle states coming from the ACPI tables. In that case user
|
||||
space still can enable them later (on a per-CPU basis) with the help of
|
||||
the ``disable`` idle state attribute in ``sysfs`` (see
|
||||
:ref:`idle-states-representation` in :doc:`cpuidle`). This basically means that
|
||||
the idle states "known" to the driver may not be enabled by default if they have
|
||||
not been exposed by the platform firmware (through the ACPI tables).
|
||||
|
||||
If the given processor model is not recognized by ``intel_idle``, but it
|
||||
supports ``MWAIT``, the preliminary list of idle states coming from the ACPI
|
||||
tables is used for building the final list that will be supplied to the
|
||||
``CPUIdle`` core during driver registration. For each idle state in that list,
|
||||
the description, ``MWAIT`` hint and exit latency are copied to the corresponding
|
||||
entry in the final list of idle states. The name of the idle state represented
|
||||
by it (to be returned by the ``name`` idle state attribute in ``sysfs``) is
|
||||
"CX_ACPI", where X is the index of that idle state in the final list (note that
|
||||
the minimum value of X is 1, because 0 is reserved for the "polling" state), and
|
||||
its target residency is based on the exit latency value. Specifically, for
|
||||
C1-type idle states the exit latency value is also used as the target residency
|
||||
(for compatibility with the majority of the "internal" tables of idle states for
|
||||
various processor models recognized by ``intel_idle``) and for the other idle
|
||||
state types (C2 and C3) the target residency value is 3 times the exit latency
|
||||
(again, that is because it reflects the target residency to exit latency ratio
|
||||
in the majority of cases for the processor models recognized by ``intel_idle``).
|
||||
All of the idle states in the final list are enabled by default in this case.
|
||||
|
||||
|
||||
.. _intel-idle-initialization:
|
||||
|
||||
Initialization
|
||||
==============
|
||||
|
||||
The initialization of ``intel_idle`` starts with checking if the kernel command
|
||||
line options forbid the use of the ``MWAIT`` instruction. If that is the case,
|
||||
an error code is returned right away.
|
||||
|
||||
The next step is to check whether or not the processor model is known to the
|
||||
driver, which determines the idle states enumeration method (see
|
||||
`above <intel-idle-enumeration-of-states_>`_), and whether or not the processor
|
||||
supports ``MWAIT`` (the initialization fails if that is not the case). Then,
|
||||
the ``MWAIT`` support in the processor is enumerated through ``CPUID`` and the
|
||||
driver initialization fails if the level of support is not as expected (for
|
||||
example, if the total number of ``MWAIT`` substates returned is 0).
|
||||
|
||||
Next, if the driver is not configured to ignore the ACPI tables (see
|
||||
`below <intel-idle-parameters_>`_), the idle states information provided by the
|
||||
platform firmware is extracted from them.
|
||||
|
||||
Then, ``CPUIdle`` device objects are allocated for all CPUs and the list of
|
||||
available idle states is created as explained
|
||||
`above <intel-idle-enumeration-of-states_>`_.
|
||||
|
||||
Finally, ``intel_idle`` is registered with the help of cpuidle_register_driver()
|
||||
as the ``CPUIdle`` driver for all CPUs in the system and a CPU online callback
|
||||
for configuring individual CPUs is registered via cpuhp_setup_state(), which
|
||||
(among other things) causes the callback routine to be invoked for all of the
|
||||
CPUs present in the system at that time (each CPU executes its own instance of
|
||||
the callback routine). That routine registers a ``CPUIdle`` device for the CPU
|
||||
running it (which enables the ``CPUIdle`` subsystem to operate that CPU) and
|
||||
optionally performs some CPU-specific initialization actions that may be
|
||||
required for the given processor model.
|
||||
|
||||
|
||||
.. _intel-idle-parameters:
|
||||
|
||||
Kernel Command Line Options and Module Parameters
|
||||
=================================================
|
||||
|
||||
The *x86* architecture support code recognizes three kernel command line
|
||||
options related to CPU idle time management: ``idle=poll``, ``idle=halt``,
|
||||
and ``idle=nomwait``. If any of them is present in the kernel command line, the
|
||||
``MWAIT`` instruction is not allowed to be used, so the initialization of
|
||||
``intel_idle`` will fail.
|
||||
|
||||
Apart from that there are two module parameters recognized by ``intel_idle``
|
||||
itself that can be set via the kernel command line (they cannot be updated via
|
||||
sysfs, so that is the only way to change their values).
|
||||
|
||||
The ``max_cstate`` parameter value is the maximum idle state index in the list
|
||||
of idle states supplied to the ``CPUIdle`` core during the registration of the
|
||||
driver. It is also the maximum number of regular (non-polling) idle states that
|
||||
can be used by ``intel_idle``, so the enumeration of idle states is terminated
|
||||
after finding that number of usable idle states (the other idle states that
|
||||
potentially might have been used if ``max_cstate`` had been greater are not
|
||||
taken into consideration at all). Setting ``max_cstate`` can prevent
|
||||
``intel_idle`` from exposing idle states that are regarded as "too deep" for
|
||||
some reason to the ``CPUIdle`` core, but it does so by making them effectively
|
||||
invisible until the system is shut down and started again which may not always
|
||||
be desirable. In practice, it is only really necessary to do that if the idle
|
||||
states in question cannot be enabled during system startup, because in the
|
||||
working state of the system the CPU power management quality of service (PM
|
||||
QoS) feature can be used to prevent ``CPUIdle`` from touching those idle states
|
||||
even if they have been enumerated (see :ref:`cpu-pm-qos` in :doc:`cpuidle`).
|
||||
Setting ``max_cstate`` to 0 causes the ``intel_idle`` initialization to fail.
|
||||
|
||||
The ``noacpi`` module parameter (which is recognized by ``intel_idle`` if the
|
||||
kernel has been configured with ACPI support), can be set to make the driver
|
||||
ignore the system's ACPI tables entirely (it is unset by default).
|
||||
|
||||
|
||||
.. _intel-idle-core-and-package-idle-states:
|
||||
|
||||
Core and Package Levels of Idle States
|
||||
======================================
|
||||
|
||||
Typically, in a processor supporting the ``MWAIT`` instruction there are (at
|
||||
least) two levels of idle states (or C-states). One level, referred to as
|
||||
"core C-states", covers individual cores in the processor, whereas the other
|
||||
level, referred to as "package C-states", covers the entire processor package
|
||||
and it may also involve other components of the system (GPUs, memory
|
||||
controllers, I/O hubs etc.).
|
||||
|
||||
Some of the ``MWAIT`` hint values allow the processor to use core C-states only
|
||||
(most importantly, that is the case for the ``MWAIT`` hint value corresponding
|
||||
to the ``C1`` idle state), but the majority of them give it a license to put
|
||||
the target core (i.e. the core containing the logical CPU executing ``MWAIT``
|
||||
with the given hint value) into a specific core C-state and then (if possible)
|
||||
to enter a specific package C-state at the deeper level. For example, the
|
||||
``MWAIT`` hint value representing the ``C3`` idle state allows the processor to
|
||||
put the target core into the low-power state referred to as "core ``C3``" (or
|
||||
``CC3``), which happens if all of the logical CPUs (SMT siblings) in that core
|
||||
have executed ``MWAIT`` with the ``C3`` hint value (or with a hint value
|
||||
representing a deeper idle state), and in addition to that (in the majority of
|
||||
cases) it gives the processor a license to put the entire package (possibly
|
||||
including some non-CPU components such as a GPU or a memory controller) into the
|
||||
low-power state referred to as "package ``C3``" (or ``PC3``), which happens if
|
||||
all of the cores have gone into the ``CC3`` state and (possibly) some additional
|
||||
conditions are satisfied (for instance, if the GPU is covered by ``PC3``, it may
|
||||
be required to be in a certain GPU-specific low-power state for ``PC3`` to be
|
||||
reachable).
|
||||
|
||||
As a rule, there is no simple way to make the processor use core C-states only
|
||||
if the conditions for entering the corresponding package C-states are met, so
|
||||
the logical CPU executing ``MWAIT`` with a hint value that is not core-level
|
||||
only (like for ``C1``) must always assume that this may cause the processor to
|
||||
enter a package C-state. [That is why the exit latency and target residency
|
||||
values corresponding to the majority of ``MWAIT`` hint values in the "internal"
|
||||
tables of idle states in ``intel_idle`` reflect the properties of package
|
||||
C-states.] If using package C-states is not desirable at all, either
|
||||
:ref:`PM QoS <cpu-pm-qos>` or the ``max_cstate`` module parameter of
|
||||
``intel_idle`` described `above <intel-idle-parameters_>`_ must be used to
|
||||
restrict the range of permissible idle states to the ones with core-level only
|
||||
``MWAIT`` hint values (like ``C1``).
|
||||
|
||||
|
||||
References
|
||||
==========
|
||||
|
||||
.. [1] *Intel® 64 and IA-32 Architectures Software Developer’s Manual Volume 2B*,
|
||||
https://www.intel.com/content/www/us/en/architecture-and-technology/64-ia-32-architectures-software-developer-vol-2b-manual.html
|
||||
|
||||
.. [2] *Advanced Configuration and Power Interface (ACPI) Specification*,
|
||||
https://uefi.org/specifications
|
|
@ -8,6 +8,7 @@ Working-State Power Management
|
|||
:maxdepth: 2
|
||||
|
||||
cpuidle
|
||||
intel_idle
|
||||
cpufreq
|
||||
intel_pstate
|
||||
intel_epb
|
||||
|
|
|
@ -52,6 +52,9 @@ stable kernels.
|
|||
| Allwinner | A64/R18 | UNKNOWN1 | SUN50I_ERRATUM_UNKNOWN1 |
|
||||
+----------------+-----------------+-----------------+-----------------------------+
|
||||
+----------------+-----------------+-----------------+-----------------------------+
|
||||
| Ampere | Altra | #82288 | ALTRA_ERRATUM_82288 |
|
||||
+----------------+-----------------+-----------------+-----------------------------+
|
||||
+----------------+-----------------+-----------------+-----------------------------+
|
||||
| ARM | Cortex-A53 | #826319 | ARM64_ERRATUM_826319 |
|
||||
+----------------+-----------------+-----------------+-----------------------------+
|
||||
| ARM | Cortex-A53 | #827319 | ARM64_ERRATUM_827319 |
|
||||
|
@ -88,6 +91,8 @@ stable kernels.
|
|||
+----------------+-----------------+-----------------+-----------------------------+
|
||||
| ARM | Neoverse-N1 | #1349291 | N/A |
|
||||
+----------------+-----------------+-----------------+-----------------------------+
|
||||
| ARM | Neoverse-N1 | #1542419 | ARM64_ERRATUM_1542419 |
|
||||
+----------------+-----------------+-----------------+-----------------------------+
|
||||
| ARM | MMU-500 | #841119,826419 | N/A |
|
||||
+----------------+-----------------+-----------------+-----------------------------+
|
||||
+----------------+-----------------+-----------------+-----------------------------+
|
||||
|
|
|
@ -0,0 +1,216 @@
|
|||
Assembler Annotations
|
||||
=====================
|
||||
|
||||
Copyright (c) 2017-2019 Jiri Slaby
|
||||
|
||||
This document describes the new macros for annotation of data and code in
|
||||
assembly. In particular, it contains information about ``SYM_FUNC_START``,
|
||||
``SYM_FUNC_END``, ``SYM_CODE_START``, and similar.
|
||||
|
||||
Rationale
|
||||
---------
|
||||
Some code like entries, trampolines, or boot code needs to be written in
|
||||
assembly. The same as in C, such code is grouped into functions and
|
||||
accompanied with data. Standard assemblers do not force users into precisely
|
||||
marking these pieces as code, data, or even specifying their length.
|
||||
Nevertheless, assemblers provide developers with such annotations to aid
|
||||
debuggers throughout assembly. On top of that, developers also want to mark
|
||||
some functions as *global* in order to be visible outside of their translation
|
||||
units.
|
||||
|
||||
Over time, the Linux kernel has adopted macros from various projects (like
|
||||
``binutils``) to facilitate such annotations. So for historic reasons,
|
||||
developers have been using ``ENTRY``, ``END``, ``ENDPROC``, and other
|
||||
annotations in assembly. Due to the lack of their documentation, the macros
|
||||
are used in rather wrong contexts at some locations. Clearly, ``ENTRY`` was
|
||||
intended to denote the beginning of global symbols (be it data or code).
|
||||
``END`` used to mark the end of data or end of special functions with
|
||||
*non-standard* calling convention. In contrast, ``ENDPROC`` should annotate
|
||||
only ends of *standard* functions.
|
||||
|
||||
When these macros are used correctly, they help assemblers generate a nice
|
||||
object with both sizes and types set correctly. For example, the result of
|
||||
``arch/x86/lib/putuser.S``::
|
||||
|
||||
Num: Value Size Type Bind Vis Ndx Name
|
||||
25: 0000000000000000 33 FUNC GLOBAL DEFAULT 1 __put_user_1
|
||||
29: 0000000000000030 37 FUNC GLOBAL DEFAULT 1 __put_user_2
|
||||
32: 0000000000000060 36 FUNC GLOBAL DEFAULT 1 __put_user_4
|
||||
35: 0000000000000090 37 FUNC GLOBAL DEFAULT 1 __put_user_8
|
||||
|
||||
This is not only important for debugging purposes. When there are properly
|
||||
annotated objects like this, tools can be run on them to generate more useful
|
||||
information. In particular, on properly annotated objects, ``objtool`` can be
|
||||
run to check and fix the object if needed. Currently, ``objtool`` can report
|
||||
missing frame pointer setup/destruction in functions. It can also
|
||||
automatically generate annotations for :doc:`ORC unwinder <x86/orc-unwinder>`
|
||||
for most code. Both of these are especially important to support reliable
|
||||
stack traces which are in turn necessary for :doc:`Kernel live patching
|
||||
<livepatch/livepatch>`.
|
||||
|
||||
Caveat and Discussion
|
||||
---------------------
|
||||
As one might realize, there were only three macros previously. That is indeed
|
||||
insufficient to cover all the combinations of cases:
|
||||
|
||||
* standard/non-standard function
|
||||
* code/data
|
||||
* global/local symbol
|
||||
|
||||
There was a discussion_ and instead of extending the current ``ENTRY/END*``
|
||||
macros, it was decided that brand new macros should be introduced instead::
|
||||
|
||||
So how about using macro names that actually show the purpose, instead
|
||||
of importing all the crappy, historic, essentially randomly chosen
|
||||
debug symbol macro names from the binutils and older kernels?
|
||||
|
||||
.. _discussion: https://lkml.kernel.org/r/20170217104757.28588-1-jslaby@suse.cz
|
||||
|
||||
Macros Description
|
||||
------------------
|
||||
|
||||
The new macros are prefixed with the ``SYM_`` prefix and can be divided into
|
||||
three main groups:
|
||||
|
||||
1. ``SYM_FUNC_*`` -- to annotate C-like functions. This means functions with
|
||||
standard C calling conventions, i.e. the stack contains a return address at
|
||||
the predefined place and a return from the function can happen in a
|
||||
standard way. When frame pointers are enabled, save/restore of frame
|
||||
pointer shall happen at the start/end of a function, respectively, too.
|
||||
|
||||
Checking tools like ``objtool`` should ensure such marked functions conform
|
||||
to these rules. The tools can also easily annotate these functions with
|
||||
debugging information (like *ORC data*) automatically.
|
||||
|
||||
2. ``SYM_CODE_*`` -- special functions called with special stack. Be it
|
||||
interrupt handlers with special stack content, trampolines, or startup
|
||||
functions.
|
||||
|
||||
Checking tools mostly ignore checking of these functions. But some debug
|
||||
information still can be generated automatically. For correct debug data,
|
||||
this code needs hints like ``UNWIND_HINT_REGS`` provided by developers.
|
||||
|
||||
3. ``SYM_DATA*`` -- obviously data belonging to ``.data`` sections and not to
|
||||
``.text``. Data do not contain instructions, so they have to be treated
|
||||
specially by the tools: they should not treat the bytes as instructions,
|
||||
nor assign any debug information to them.
|
||||
|
||||
Instruction Macros
|
||||
~~~~~~~~~~~~~~~~~~
|
||||
This section covers ``SYM_FUNC_*`` and ``SYM_CODE_*`` enumerated above.
|
||||
|
||||
* ``SYM_FUNC_START`` and ``SYM_FUNC_START_LOCAL`` are supposed to be **the
|
||||
most frequent markings**. They are used for functions with standard calling
|
||||
conventions -- global and local. Like in C, they both align the functions to
|
||||
architecture specific ``__ALIGN`` bytes. There are also ``_NOALIGN`` variants
|
||||
for special cases where developers do not want this implicit alignment.
|
||||
|
||||
``SYM_FUNC_START_WEAK`` and ``SYM_FUNC_START_WEAK_NOALIGN`` markings are
|
||||
also offered as an assembler counterpart to the *weak* attribute known from
|
||||
C.
|
||||
|
||||
All of these **shall** be coupled with ``SYM_FUNC_END``. First, it marks
|
||||
the sequence of instructions as a function and computes its size to the
|
||||
generated object file. Second, it also eases checking and processing such
|
||||
object files as the tools can trivially find exact function boundaries.
|
||||
|
||||
So in most cases, developers should write something like in the following
|
||||
example, having some asm instructions in between the macros, of course::
|
||||
|
||||
SYM_FUNC_START(function_hook)
|
||||
... asm insns ...
|
||||
SYM_FUNC_END(function_hook)
|
||||
|
||||
In fact, this kind of annotation corresponds to the now deprecated ``ENTRY``
|
||||
and ``ENDPROC`` macros.
|
||||
|
||||
* ``SYM_FUNC_START_ALIAS`` and ``SYM_FUNC_START_LOCAL_ALIAS`` serve for those
|
||||
who decided to have two or more names for one function. The typical use is::
|
||||
|
||||
SYM_FUNC_START_ALIAS(__memset)
|
||||
SYM_FUNC_START(memset)
|
||||
... asm insns ...
|
||||
SYM_FUNC_END(memset)
|
||||
SYM_FUNC_END_ALIAS(__memset)
|
||||
|
||||
In this example, one can call ``__memset`` or ``memset`` with the same
|
||||
result, except the debug information for the instructions is generated to
|
||||
the object file only once -- for the non-``ALIAS`` case.
|
||||
|
||||
* ``SYM_CODE_START`` and ``SYM_CODE_START_LOCAL`` should be used only in
|
||||
special cases -- if you know what you are doing. This is used exclusively
|
||||
for interrupt handlers and similar where the calling convention is not the C
|
||||
one. ``_NOALIGN`` variants exist too. The use is the same as for the ``FUNC``
|
||||
category above::
|
||||
|
||||
SYM_CODE_START_LOCAL(bad_put_user)
|
||||
... asm insns ...
|
||||
SYM_CODE_END(bad_put_user)
|
||||
|
||||
Again, every ``SYM_CODE_START*`` **shall** be coupled by ``SYM_CODE_END``.
|
||||
|
||||
To some extent, this category corresponds to deprecated ``ENTRY`` and
|
||||
``END``. Except ``END`` had several other meanings too.
|
||||
|
||||
* ``SYM_INNER_LABEL*`` is used to denote a label inside some
|
||||
``SYM_{CODE,FUNC}_START`` and ``SYM_{CODE,FUNC}_END``. They are very similar
|
||||
to C labels, except they can be made global. An example of use::
|
||||
|
||||
SYM_CODE_START(ftrace_caller)
|
||||
/* save_mcount_regs fills in first two parameters */
|
||||
...
|
||||
|
||||
SYM_INNER_LABEL(ftrace_caller_op_ptr, SYM_L_GLOBAL)
|
||||
/* Load the ftrace_ops into the 3rd parameter */
|
||||
...
|
||||
|
||||
SYM_INNER_LABEL(ftrace_call, SYM_L_GLOBAL)
|
||||
call ftrace_stub
|
||||
...
|
||||
retq
|
||||
SYM_CODE_END(ftrace_caller)
|
||||
|
||||
Data Macros
|
||||
~~~~~~~~~~~
|
||||
Similar to instructions, there is a couple of macros to describe data in the
|
||||
assembly.
|
||||
|
||||
* ``SYM_DATA_START`` and ``SYM_DATA_START_LOCAL`` mark the start of some data
|
||||
and shall be used in conjunction with either ``SYM_DATA_END``, or
|
||||
``SYM_DATA_END_LABEL``. The latter adds also a label to the end, so that
|
||||
people can use ``lstack`` and (local) ``lstack_end`` in the following
|
||||
example::
|
||||
|
||||
SYM_DATA_START_LOCAL(lstack)
|
||||
.skip 4096
|
||||
SYM_DATA_END_LABEL(lstack, SYM_L_LOCAL, lstack_end)
|
||||
|
||||
* ``SYM_DATA`` and ``SYM_DATA_LOCAL`` are variants for simple, mostly one-line
|
||||
data::
|
||||
|
||||
SYM_DATA(HEAP, .long rm_heap)
|
||||
SYM_DATA(heap_end, .long rm_stack)
|
||||
|
||||
In the end, they expand to ``SYM_DATA_START`` with ``SYM_DATA_END``
|
||||
internally.
|
||||
|
||||
Support Macros
|
||||
~~~~~~~~~~~~~~
|
||||
All the above reduce themselves to some invocation of ``SYM_START``,
|
||||
``SYM_END``, or ``SYM_ENTRY`` at last. Normally, developers should avoid using
|
||||
these.
|
||||
|
||||
Further, in the above examples, one could see ``SYM_L_LOCAL``. There are also
|
||||
``SYM_L_GLOBAL`` and ``SYM_L_WEAK``. All are intended to denote linkage of a
|
||||
symbol marked by them. They are used either in ``_LABEL`` variants of the
|
||||
earlier macros, or in ``SYM_START``.
|
||||
|
||||
|
||||
Overriding Macros
|
||||
~~~~~~~~~~~~~~~~~
|
||||
Architecture can also override any of the macros in their own
|
||||
``asm/linkage.h``, including macros specifying the type of a symbol
|
||||
(``SYM_T_FUNC``, ``SYM_T_OBJECT``, and ``SYM_T_NONE``). As every macro
|
||||
described in this file is surrounded by ``#ifdef`` + ``#endif``, it is enough
|
||||
to define the macros differently in the aforementioned architecture-dependent
|
||||
header.
|
|
@ -142,3 +142,6 @@ BPF flow dissector doesn't support exporting all the metadata that in-kernel
|
|||
C-based implementation can export. Notable example is single VLAN (802.1Q)
|
||||
and double VLAN (802.1AD) tags. Please refer to the ``struct bpf_flow_keys``
|
||||
for a set of information that's currently can be exported from the BPF context.
|
||||
|
||||
When BPF flow dissector is attached to the root network namespace (machine-wide
|
||||
policy), users can't override it in their child network namespaces.
|
||||
|
|
|
@ -0,0 +1,209 @@
|
|||
===============
|
||||
BPF ring buffer
|
||||
===============
|
||||
|
||||
This document describes BPF ring buffer design, API, and implementation details.
|
||||
|
||||
.. contents::
|
||||
:local:
|
||||
:depth: 2
|
||||
|
||||
Motivation
|
||||
----------
|
||||
|
||||
There are two distinctive motivators for this work, which are not satisfied by
|
||||
existing perf buffer, which prompted creation of a new ring buffer
|
||||
implementation.
|
||||
|
||||
- more efficient memory utilization by sharing ring buffer across CPUs;
|
||||
- preserving ordering of events that happen sequentially in time, even across
|
||||
multiple CPUs (e.g., fork/exec/exit events for a task).
|
||||
|
||||
These two problems are independent, but perf buffer fails to satisfy both.
|
||||
Both are a result of a choice to have per-CPU perf ring buffer. Both can be
|
||||
also solved by having an MPSC implementation of ring buffer. The ordering
|
||||
problem could technically be solved for perf buffer with some in-kernel
|
||||
counting, but given the first one requires an MPSC buffer, the same solution
|
||||
would solve the second problem automatically.
|
||||
|
||||
Semantics and APIs
|
||||
------------------
|
||||
|
||||
Single ring buffer is presented to BPF programs as an instance of BPF map of
|
||||
type ``BPF_MAP_TYPE_RINGBUF``. Two other alternatives considered, but
|
||||
ultimately rejected.
|
||||
|
||||
One way would be to, similar to ``BPF_MAP_TYPE_PERF_EVENT_ARRAY``, make
|
||||
``BPF_MAP_TYPE_RINGBUF`` could represent an array of ring buffers, but not
|
||||
enforce "same CPU only" rule. This would be more familiar interface compatible
|
||||
with existing perf buffer use in BPF, but would fail if application needed more
|
||||
advanced logic to lookup ring buffer by arbitrary key.
|
||||
``BPF_MAP_TYPE_HASH_OF_MAPS`` addresses this with current approach.
|
||||
Additionally, given the performance of BPF ringbuf, many use cases would just
|
||||
opt into a simple single ring buffer shared among all CPUs, for which current
|
||||
approach would be an overkill.
|
||||
|
||||
Another approach could introduce a new concept, alongside BPF map, to represent
|
||||
generic "container" object, which doesn't necessarily have key/value interface
|
||||
with lookup/update/delete operations. This approach would add a lot of extra
|
||||
infrastructure that has to be built for observability and verifier support. It
|
||||
would also add another concept that BPF developers would have to familiarize
|
||||
themselves with, new syntax in libbpf, etc. But then would really provide no
|
||||
additional benefits over the approach of using a map. ``BPF_MAP_TYPE_RINGBUF``
|
||||
doesn't support lookup/update/delete operations, but so doesn't few other map
|
||||
types (e.g., queue and stack; array doesn't support delete, etc).
|
||||
|
||||
The approach chosen has an advantage of re-using existing BPF map
|
||||
infrastructure (introspection APIs in kernel, libbpf support, etc), being
|
||||
familiar concept (no need to teach users a new type of object in BPF program),
|
||||
and utilizing existing tooling (bpftool). For common scenario of using a single
|
||||
ring buffer for all CPUs, it's as simple and straightforward, as would be with
|
||||
a dedicated "container" object. On the other hand, by being a map, it can be
|
||||
combined with ``ARRAY_OF_MAPS`` and ``HASH_OF_MAPS`` map-in-maps to implement
|
||||
a wide variety of topologies, from one ring buffer for each CPU (e.g., as
|
||||
a replacement for perf buffer use cases), to a complicated application
|
||||
hashing/sharding of ring buffers (e.g., having a small pool of ring buffers
|
||||
with hashed task's tgid being a look up key to preserve order, but reduce
|
||||
contention).
|
||||
|
||||
Key and value sizes are enforced to be zero. ``max_entries`` is used to specify
|
||||
the size of ring buffer and has to be a power of 2 value.
|
||||
|
||||
There are a bunch of similarities between perf buffer
|
||||
(``BPF_MAP_TYPE_PERF_EVENT_ARRAY``) and new BPF ring buffer semantics:
|
||||
|
||||
- variable-length records;
|
||||
- if there is no more space left in ring buffer, reservation fails, no
|
||||
blocking;
|
||||
- memory-mappable data area for user-space applications for ease of
|
||||
consumption and high performance;
|
||||
- epoll notifications for new incoming data;
|
||||
- but still the ability to do busy polling for new data to achieve the
|
||||
lowest latency, if necessary.
|
||||
|
||||
BPF ringbuf provides two sets of APIs to BPF programs:
|
||||
|
||||
- ``bpf_ringbuf_output()`` allows to *copy* data from one place to a ring
|
||||
buffer, similarly to ``bpf_perf_event_output()``;
|
||||
- ``bpf_ringbuf_reserve()``/``bpf_ringbuf_commit()``/``bpf_ringbuf_discard()``
|
||||
APIs split the whole process into two steps. First, a fixed amount of space
|
||||
is reserved. If successful, a pointer to a data inside ring buffer data
|
||||
area is returned, which BPF programs can use similarly to a data inside
|
||||
array/hash maps. Once ready, this piece of memory is either committed or
|
||||
discarded. Discard is similar to commit, but makes consumer ignore the
|
||||
record.
|
||||
|
||||
``bpf_ringbuf_output()`` has disadvantage of incurring extra memory copy,
|
||||
because record has to be prepared in some other place first. But it allows to
|
||||
submit records of the length that's not known to verifier beforehand. It also
|
||||
closely matches ``bpf_perf_event_output()``, so will simplify migration
|
||||
significantly.
|
||||
|
||||
``bpf_ringbuf_reserve()`` avoids the extra copy of memory by providing a memory
|
||||
pointer directly to ring buffer memory. In a lot of cases records are larger
|
||||
than BPF stack space allows, so many programs have use extra per-CPU array as
|
||||
a temporary heap for preparing sample. bpf_ringbuf_reserve() avoid this needs
|
||||
completely. But in exchange, it only allows a known constant size of memory to
|
||||
be reserved, such that verifier can verify that BPF program can't access memory
|
||||
outside its reserved record space. bpf_ringbuf_output(), while slightly slower
|
||||
due to extra memory copy, covers some use cases that are not suitable for
|
||||
``bpf_ringbuf_reserve()``.
|
||||
|
||||
The difference between commit and discard is very small. Discard just marks
|
||||
a record as discarded, and such records are supposed to be ignored by consumer
|
||||
code. Discard is useful for some advanced use-cases, such as ensuring
|
||||
all-or-nothing multi-record submission, or emulating temporary
|
||||
``malloc()``/``free()`` within single BPF program invocation.
|
||||
|
||||
Each reserved record is tracked by verifier through existing
|
||||
reference-tracking logic, similar to socket ref-tracking. It is thus
|
||||
impossible to reserve a record, but forget to submit (or discard) it.
|
||||
|
||||
``bpf_ringbuf_query()`` helper allows to query various properties of ring
|
||||
buffer. Currently 4 are supported:
|
||||
|
||||
- ``BPF_RB_AVAIL_DATA`` returns amount of unconsumed data in ring buffer;
|
||||
- ``BPF_RB_RING_SIZE`` returns the size of ring buffer;
|
||||
- ``BPF_RB_CONS_POS``/``BPF_RB_PROD_POS`` returns current logical possition
|
||||
of consumer/producer, respectively.
|
||||
|
||||
Returned values are momentarily snapshots of ring buffer state and could be
|
||||
off by the time helper returns, so this should be used only for
|
||||
debugging/reporting reasons or for implementing various heuristics, that take
|
||||
into account highly-changeable nature of some of those characteristics.
|
||||
|
||||
One such heuristic might involve more fine-grained control over poll/epoll
|
||||
notifications about new data availability in ring buffer. Together with
|
||||
``BPF_RB_NO_WAKEUP``/``BPF_RB_FORCE_WAKEUP`` flags for output/commit/discard
|
||||
helpers, it allows BPF program a high degree of control and, e.g., more
|
||||
efficient batched notifications. Default self-balancing strategy, though,
|
||||
should be adequate for most applications and will work reliable and efficiently
|
||||
already.
|
||||
|
||||
Design and Implementation
|
||||
-------------------------
|
||||
|
||||
This reserve/commit schema allows a natural way for multiple producers, either
|
||||
on different CPUs or even on the same CPU/in the same BPF program, to reserve
|
||||
independent records and work with them without blocking other producers. This
|
||||
means that if BPF program was interruped by another BPF program sharing the
|
||||
same ring buffer, they will both get a record reserved (provided there is
|
||||
enough space left) and can work with it and submit it independently. This
|
||||
applies to NMI context as well, except that due to using a spinlock during
|
||||
reservation, in NMI context, ``bpf_ringbuf_reserve()`` might fail to get
|
||||
a lock, in which case reservation will fail even if ring buffer is not full.
|
||||
|
||||
The ring buffer itself internally is implemented as a power-of-2 sized
|
||||
circular buffer, with two logical and ever-increasing counters (which might
|
||||
wrap around on 32-bit architectures, that's not a problem):
|
||||
|
||||
- consumer counter shows up to which logical position consumer consumed the
|
||||
data;
|
||||
- producer counter denotes amount of data reserved by all producers.
|
||||
|
||||
Each time a record is reserved, producer that "owns" the record will
|
||||
successfully advance producer counter. At that point, data is still not yet
|
||||
ready to be consumed, though. Each record has 8 byte header, which contains the
|
||||
length of reserved record, as well as two extra bits: busy bit to denote that
|
||||
record is still being worked on, and discard bit, which might be set at commit
|
||||
time if record is discarded. In the latter case, consumer is supposed to skip
|
||||
the record and move on to the next one. Record header also encodes record's
|
||||
relative offset from the beginning of ring buffer data area (in pages). This
|
||||
allows ``bpf_ringbuf_commit()``/``bpf_ringbuf_discard()`` to accept only the
|
||||
pointer to the record itself, without requiring also the pointer to ring buffer
|
||||
itself. Ring buffer memory location will be restored from record metadata
|
||||
header. This significantly simplifies verifier, as well as improving API
|
||||
usability.
|
||||
|
||||
Producer counter increments are serialized under spinlock, so there is
|
||||
a strict ordering between reservations. Commits, on the other hand, are
|
||||
completely lockless and independent. All records become available to consumer
|
||||
in the order of reservations, but only after all previous records where
|
||||
already committed. It is thus possible for slow producers to temporarily hold
|
||||
off submitted records, that were reserved later.
|
||||
|
||||
Reservation/commit/consumer protocol is verified by litmus tests in
|
||||
Documentation/litmus_tests/bpf-rb/_.
|
||||
|
||||
One interesting implementation bit, that significantly simplifies (and thus
|
||||
speeds up as well) implementation of both producers and consumers is how data
|
||||
area is mapped twice contiguously back-to-back in the virtual memory. This
|
||||
allows to not take any special measures for samples that have to wrap around
|
||||
at the end of the circular buffer data area, because the next page after the
|
||||
last data page would be first data page again, and thus the sample will still
|
||||
appear completely contiguous in virtual memory. See comment and a simple ASCII
|
||||
diagram showing this visually in ``bpf_ringbuf_area_alloc()``.
|
||||
|
||||
Another feature that distinguishes BPF ringbuf from perf ring buffer is
|
||||
a self-pacing notifications of new data being availability.
|
||||
``bpf_ringbuf_commit()`` implementation will send a notification of new record
|
||||
being available after commit only if consumer has already caught up right up to
|
||||
the record being committed. If not, consumer still has to catch up and thus
|
||||
will see new data anyways without needing an extra poll notification.
|
||||
Benchmarks (see tools/testing/selftests/bpf/benchs/bench_ringbuf.c_) show that
|
||||
this allows to achieve a very high throughput without having to resort to
|
||||
tricks like "notify only every Nth sample", which are necessary with perf
|
||||
buffer. For extreme cases, when BPF program wants more manual control of
|
||||
notifications, commit/discard/output helpers accept ``BPF_RB_NO_WAKEUP`` and
|
||||
``BPF_RB_FORCE_WAKEUP`` flags, which give full control over notifications of
|
||||
data availability, but require extra caution and diligence in using this API.
|
|
@ -16,6 +16,9 @@ Required properties:
|
|||
Documentation/devicetree/bindings/graph.txt. This port should be connected
|
||||
to the input port of an attached HDMI or LVDS encoder chip.
|
||||
|
||||
Optional properties:
|
||||
- pinctrl-names: Contain "default" and "sleep".
|
||||
|
||||
Example:
|
||||
|
||||
dpi0: dpi@1401d000 {
|
||||
|
@ -26,6 +29,9 @@ dpi0: dpi@1401d000 {
|
|||
<&mmsys CLK_MM_DPI_ENGINE>,
|
||||
<&apmixedsys CLK_APMIXED_TVDPLL>;
|
||||
clock-names = "pixel", "engine", "pll";
|
||||
pinctrl-names = "default", "sleep";
|
||||
pinctrl-0 = <&dpi_pin_func>;
|
||||
pinctrl-1 = <&dpi_pin_idle>;
|
||||
|
||||
port {
|
||||
dpi0_out: endpoint {
|
||||
|
|
|
@ -20,8 +20,9 @@ Required properties:
|
|||
- gpio-controller : Marks the device node as a GPIO controller
|
||||
- interrupts : Interrupt specifier, see interrupt-controller/interrupts.txt
|
||||
- interrupt-controller : Mark the GPIO controller as an interrupt-controller
|
||||
- ngpios : number of GPIO lines, see gpio.txt
|
||||
(should be multiple of 8, up to 80 pins)
|
||||
- ngpios : number of *hardware* GPIO lines, see gpio.txt. This will expose
|
||||
2 software GPIOs per hardware GPIO: one for hardware input, one for hardware
|
||||
output. Up to 80 pins, must be a multiple of 8.
|
||||
- clocks : A phandle to the APB clock for SGPM clock division
|
||||
- bus-frequency : SGPM CLK frequency
|
||||
|
||||
|
|
|
@ -21,7 +21,7 @@ controller state. The mux controller state is described in
|
|||
|
||||
Example:
|
||||
mux: mux-controller {
|
||||
compatible = "mux-gpio";
|
||||
compatible = "gpio-mux";
|
||||
#mux-control-cells = <0>;
|
||||
|
||||
mux-gpios = <&pioA 0 GPIO_ACTIVE_HIGH>,
|
||||
|
|
|
@ -87,7 +87,7 @@ Example:
|
|||
ranges;
|
||||
|
||||
/* APU<->RPU0 IPI mailbox controller */
|
||||
ipi_mailbox_rpu0: mailbox@ff90400 {
|
||||
ipi_mailbox_rpu0: mailbox@ff990400 {
|
||||
reg = <0xff990400 0x20>,
|
||||
<0xff990420 0x20>,
|
||||
<0xff990080 0x20>,
|
||||
|
|
|
@ -49,6 +49,8 @@ Optional properties:
|
|||
error caused by stop clock(fifo full)
|
||||
Valid range = [0:0x7]. if not present, default value is 0.
|
||||
applied to compatible "mediatek,mt2701-mmc".
|
||||
- resets: Phandle and reset specifier pair to softreset line of MSDC IP.
|
||||
- reset-names: Should be "hrst".
|
||||
|
||||
Examples:
|
||||
mmc0: mmc@11230000 {
|
||||
|
|
|
@ -15,8 +15,15 @@ Required properties:
|
|||
- "nvidia,tegra210-sdhci": for Tegra210
|
||||
- "nvidia,tegra186-sdhci": for Tegra186
|
||||
- "nvidia,tegra194-sdhci": for Tegra194
|
||||
- clocks : Must contain one entry, for the module clock.
|
||||
See ../clocks/clock-bindings.txt for details.
|
||||
- clocks: For Tegra210, Tegra186 and Tegra194 must contain two entries.
|
||||
One for the module clock and one for the timeout clock.
|
||||
For all other Tegra devices, must contain a single entry for
|
||||
the module clock. See ../clocks/clock-bindings.txt for details.
|
||||
- clock-names: For Tegra210, Tegra186 and Tegra194 must contain the
|
||||
strings 'sdhci' and 'tmclk' to represent the module and
|
||||
the timeout clocks, respectively.
|
||||
For all other Tegra devices must contain the string 'sdhci'
|
||||
to represent the module clock.
|
||||
- resets : Must contain an entry for each entry in reset-names.
|
||||
See ../reset/reset.txt for details.
|
||||
- reset-names : Must include the following entries:
|
||||
|
@ -99,7 +106,7 @@ Optional properties for Tegra210, Tegra186 and Tegra194:
|
|||
|
||||
Example:
|
||||
sdhci@700b0000 {
|
||||
compatible = "nvidia,tegra210-sdhci", "nvidia,tegra124-sdhci";
|
||||
compatible = "nvidia,tegra124-sdhci";
|
||||
reg = <0x0 0x700b0000 0x0 0x200>;
|
||||
interrupts = <GIC_SPI 14 IRQ_TYPE_LEVEL_HIGH>;
|
||||
clocks = <&tegra_car TEGRA210_CLK_SDMMC1>;
|
||||
|
@ -115,3 +122,22 @@ sdhci@700b0000 {
|
|||
nvidia,pad-autocal-pull-down-offset-1v8 = <0x7b>;
|
||||
status = "disabled";
|
||||
};
|
||||
|
||||
sdhci@700b0000 {
|
||||
compatible = "nvidia,tegra210-sdhci";
|
||||
reg = <0x0 0x700b0000 0x0 0x200>;
|
||||
interrupts = <GIC_SPI 14 IRQ_TYPE_LEVEL_HIGH>;
|
||||
clocks = <&tegra_car TEGRA210_CLK_SDMMC1>,
|
||||
<&tegra_car TEGRA210_CLK_SDMMC_LEGACY>;
|
||||
clock-names = "sdhci", "tmclk";
|
||||
resets = <&tegra_car 14>;
|
||||
reset-names = "sdhci";
|
||||
pinctrl-names = "sdmmc-3v3", "sdmmc-1v8";
|
||||
pinctrl-0 = <&sdmmc1_3v3>;
|
||||
pinctrl-1 = <&sdmmc1_1v8>;
|
||||
nvidia,pad-autocal-pull-up-offset-3v3 = <0x00>;
|
||||
nvidia,pad-autocal-pull-down-offset-3v3 = <0x7d>;
|
||||
nvidia,pad-autocal-pull-up-offset-1v8 = <0x7b>;
|
||||
nvidia,pad-autocal-pull-down-offset-1v8 = <0x7b>;
|
||||
status = "disabled";
|
||||
};
|
||||
|
|
|
@ -38,7 +38,7 @@ Following example uses irq pin number 3 of gpio0 for out of band wake-on-bt:
|
|||
compatible = "usb1286,204e";
|
||||
reg = <1>;
|
||||
interrupt-parent = <&gpio0>;
|
||||
interrupt-name = "wakeup";
|
||||
interrupt-names = "wakeup";
|
||||
interrupts = <3 IRQ_TYPE_LEVEL_LOW>;
|
||||
};
|
||||
};
|
||||
|
|
|
@ -33,7 +33,7 @@ tcan4x5x: tcan4x5x@0 {
|
|||
spi-max-frequency = <10000000>;
|
||||
bosch,mram-cfg = <0x0 0 0 32 0 0 1 1>;
|
||||
interrupt-parent = <&gpio1>;
|
||||
interrupts = <14 GPIO_ACTIVE_LOW>;
|
||||
interrupts = <14 IRQ_TYPE_LEVEL_LOW>;
|
||||
device-state-gpios = <&gpio3 21 GPIO_ACTIVE_HIGH>;
|
||||
device-wake-gpios = <&gpio1 15 GPIO_ACTIVE_HIGH>;
|
||||
reset-gpios = <&gpio1 27 GPIO_ACTIVE_LOW>;
|
||||
|
|
|
@ -51,7 +51,7 @@ properties:
|
|||
description:
|
||||
Reference to an nvmem node for the MAC address
|
||||
|
||||
nvmem-cells-names:
|
||||
nvmem-cell-names:
|
||||
const: mac-address
|
||||
|
||||
phy-connection-type:
|
||||
|
@ -190,6 +190,11 @@ properties:
|
|||
Indicates that full-duplex is used. When absent, half
|
||||
duplex is assumed.
|
||||
|
||||
pause:
|
||||
$ref: /schemas/types.yaml#definitions/flag
|
||||
description:
|
||||
Indicates that pause should be enabled.
|
||||
|
||||
asym-pause:
|
||||
$ref: /schemas/types.yaml#definitions/flag
|
||||
description:
|
||||
|
|
|
@ -25,7 +25,7 @@ Example (for ARM-based BeagleBone with NPC100 NFC controller on I2C2):
|
|||
clock-frequency = <100000>;
|
||||
|
||||
interrupt-parent = <&gpio1>;
|
||||
interrupts = <29 GPIO_ACTIVE_HIGH>;
|
||||
interrupts = <29 IRQ_TYPE_LEVEL_HIGH>;
|
||||
|
||||
enable-gpios = <&gpio0 30 GPIO_ACTIVE_HIGH>;
|
||||
firmware-gpios = <&gpio0 31 GPIO_ACTIVE_HIGH>;
|
||||
|
|
|
@ -25,7 +25,7 @@ Example (for ARM-based BeagleBone with PN544 on I2C2):
|
|||
clock-frequency = <400000>;
|
||||
|
||||
interrupt-parent = <&gpio1>;
|
||||
interrupts = <17 GPIO_ACTIVE_HIGH>;
|
||||
interrupts = <17 IRQ_TYPE_LEVEL_HIGH>;
|
||||
|
||||
enable-gpios = <&gpio3 21 GPIO_ACTIVE_HIGH>;
|
||||
firmware-gpios = <&gpio3 19 GPIO_ACTIVE_HIGH>;
|
||||
|
|
|
@ -118,7 +118,7 @@ Tegra194:
|
|||
--------
|
||||
|
||||
pcie@14180000 {
|
||||
compatible = "nvidia,tegra194-pcie", "snps,dw-pcie";
|
||||
compatible = "nvidia,tegra194-pcie";
|
||||
power-domains = <&bpmp TEGRA194_POWER_DOMAIN_PCIEX8B>;
|
||||
reg = <0x00 0x14180000 0x0 0x00020000 /* appl registers (128K) */
|
||||
0x00 0x38000000 0x0 0x00040000 /* configuration space (256K) */
|
||||
|
|
|
@ -14,9 +14,15 @@ Required properties:
|
|||
- #gpio-cells : Must be 2. The first cell is the pin number and the
|
||||
second cell is used to specify optional parameters (currently unused).
|
||||
|
||||
- AVDD2-supply, DBVDD1-supply, DBVDD2-supply, DBVDD3-supply, CPVDD-supply,
|
||||
SPKVDD1-supply, SPKVDD2-supply : power supplies for the device, as covered
|
||||
in Documentation/devicetree/bindings/regulator/regulator.txt
|
||||
- power supplies for the device, as covered in
|
||||
Documentation/devicetree/bindings/regulator/regulator.txt, depending
|
||||
on compatible:
|
||||
- for wlf,wm1811 and wlf,wm8958:
|
||||
AVDD1-supply, AVDD2-supply, DBVDD1-supply, DBVDD2-supply, DBVDD3-supply,
|
||||
DCVDD-supply, CPVDD-supply, SPKVDD1-supply, SPKVDD2-supply
|
||||
- for wlf,wm8994:
|
||||
AVDD1-supply, AVDD2-supply, DBVDD-supply, DCVDD-supply, CPVDD-supply,
|
||||
SPKVDD1-supply, SPKVDD2-supply
|
||||
|
||||
Optional properties:
|
||||
|
||||
|
@ -73,11 +79,11 @@ wm8994: codec@1a {
|
|||
|
||||
lineout1-se;
|
||||
|
||||
AVDD1-supply = <®ulator>;
|
||||
AVDD2-supply = <®ulator>;
|
||||
CPVDD-supply = <®ulator>;
|
||||
DBVDD1-supply = <®ulator>;
|
||||
DBVDD2-supply = <®ulator>;
|
||||
DBVDD3-supply = <®ulator>;
|
||||
DBVDD-supply = <®ulator>;
|
||||
DCVDD-supply = <®ulator>;
|
||||
SPKVDD1-supply = <®ulator>;
|
||||
SPKVDD2-supply = <®ulator>;
|
||||
};
|
||||
|
|
|
@ -75,6 +75,8 @@ Optional properties:
|
|||
from P0 to P1/P2/P3 without delay.
|
||||
- snps,dis-tx-ipgap-linecheck-quirk: when set, disable u2mac linestate check
|
||||
during HS transmit.
|
||||
- snps,parkmode-disable-ss-quirk: when set, all SuperSpeed bus instances in
|
||||
park mode are disabled.
|
||||
- snps,dis_metastability_quirk: when set, disable metastability workaround.
|
||||
CAUTION: use only if you are absolutely sure of it.
|
||||
- snps,is-utmi-l1-suspend: true when DWC3 asserts output signal
|
||||
|
|
|
@ -250,7 +250,7 @@ High-level taskfile hooks
|
|||
|
||||
::
|
||||
|
||||
void (*qc_prep) (struct ata_queued_cmd *qc);
|
||||
enum ata_completion_errors (*qc_prep) (struct ata_queued_cmd *qc);
|
||||
int (*qc_issue) (struct ata_queued_cmd *qc);
|
||||
|
||||
|
||||
|
|
|
@ -93,13 +93,15 @@ The Amiga protection flags RWEDRWEDHSPARWED are handled as follows:
|
|||
|
||||
- R maps to r for user, group and others. On directories, R implies x.
|
||||
|
||||
- If both W and D are allowed, w will be set.
|
||||
- W maps to w.
|
||||
|
||||
- E maps to x.
|
||||
|
||||
- H and P are always retained and ignored under Linux.
|
||||
- D is ignored.
|
||||
|
||||
- A is always reset when a file is written to.
|
||||
- H, S and P are always retained and ignored under Linux.
|
||||
|
||||
- A is cleared when a file is written to.
|
||||
|
||||
User id and group id will be used unless set[gu]id are given as mount
|
||||
options. Since most of the Amiga file systems are single user systems
|
||||
|
@ -111,11 +113,13 @@ Linux -> Amiga:
|
|||
|
||||
The Linux rwxrwxrwx file mode is handled as follows:
|
||||
|
||||
- r permission will set R for user, group and others.
|
||||
- r permission will allow R for user, group and others.
|
||||
|
||||
- w permission will set W and D for user, group and others.
|
||||
- w permission will allow W for user, group and others.
|
||||
|
||||
- x permission of the user will set E for plain files.
|
||||
- x permission of the user will allow E for plain files.
|
||||
|
||||
- D will be allowed for user, group and others.
|
||||
|
||||
- All other flags (suid, sgid, ...) are ignored and will
|
||||
not be retained.
|
||||
|
|
|
@ -192,6 +192,12 @@ between the calls to start() and stop(), so holding a lock during that time
|
|||
is a reasonable thing to do. The seq_file code will also avoid taking any
|
||||
other locks while the iterator is active.
|
||||
|
||||
The iterater value returned by start() or next() is guaranteed to be
|
||||
passed to a subsequent next() or stop() call. This allows resources
|
||||
such as locks that were taken to be reliably released. There is *no*
|
||||
guarantee that the iterator will be passed to show(), though in practice
|
||||
it often will be.
|
||||
|
||||
|
||||
Formatted output
|
||||
|
||||
|
|
|
@ -232,12 +232,10 @@ Other notes:
|
|||
is 4096.
|
||||
|
||||
- show() methods should return the number of bytes printed into the
|
||||
buffer. This is the return value of scnprintf().
|
||||
buffer.
|
||||
|
||||
- show() must not use snprintf() when formatting the value to be
|
||||
returned to user space. If you can guarantee that an overflow
|
||||
will never happen you can use sprintf() otherwise you must use
|
||||
scnprintf().
|
||||
- show() should only use sysfs_emit() or sysfs_emit_at() when formatting
|
||||
the value to be returned to user space.
|
||||
|
||||
- store() should return the number of bytes used from the buffer. If the
|
||||
entire buffer has been used, just return the count argument.
|
||||
|
|
|
@ -135,6 +135,14 @@ needed).
|
|||
mic/index
|
||||
scheduler/index
|
||||
|
||||
Architecture-agnostic documentation
|
||||
-----------------------------------
|
||||
|
||||
.. toctree::
|
||||
:maxdepth: 2
|
||||
|
||||
asm-annotations
|
||||
|
||||
Architecture-specific documentation
|
||||
-----------------------------------
|
||||
|
||||
|
|
|
@ -321,6 +321,7 @@ Code Seq# Include File Comments
|
|||
<mailto:tlewis@mindspring.com>
|
||||
0xA3 90-9F linux/dtlk.h
|
||||
0xA4 00-1F uapi/linux/tee.h Generic TEE subsystem
|
||||
0xA4 00-1F uapi/asm/sgx.h <mailto:linux-sgx@vger.kernel.org>
|
||||
0xAA 00-3F linux/uapi/linux/userfaultfd.h
|
||||
0xAB 00-1F linux/nbd.h
|
||||
0xAC 00-1F linux/raw.h
|
||||
|
|
|
@ -19,6 +19,7 @@ Kernel Build System
|
|||
|
||||
issues
|
||||
reproducible-builds
|
||||
llvm
|
||||
|
||||
.. only:: subproject and html
|
||||
|
||||
|
|
|
@ -262,3 +262,8 @@ KBUILD_BUILD_USER, KBUILD_BUILD_HOST
|
|||
These two variables allow to override the user@host string displayed during
|
||||
boot and in /proc/version. The default value is the output of the commands
|
||||
whoami and host, respectively.
|
||||
|
||||
LLVM
|
||||
----
|
||||
If this variable is set to 1, Kbuild will use Clang and LLVM utilities instead
|
||||
of GCC and GNU binutils to build the kernel.
|
||||
|
|
|
@ -0,0 +1,87 @@
|
|||
==============================
|
||||
Building Linux with Clang/LLVM
|
||||
==============================
|
||||
|
||||
This document covers how to build the Linux kernel with Clang and LLVM
|
||||
utilities.
|
||||
|
||||
About
|
||||
-----
|
||||
|
||||
The Linux kernel has always traditionally been compiled with GNU toolchains
|
||||
such as GCC and binutils. Ongoing work has allowed for `Clang
|
||||
<https://clang.llvm.org/>`_ and `LLVM <https://llvm.org/>`_ utilities to be
|
||||
used as viable substitutes. Distributions such as `Android
|
||||
<https://www.android.com/>`_, `ChromeOS
|
||||
<https://www.chromium.org/chromium-os>`_, and `OpenMandriva
|
||||
<https://www.openmandriva.org/>`_ use Clang built kernels. `LLVM is a
|
||||
collection of toolchain components implemented in terms of C++ objects
|
||||
<https://www.aosabook.org/en/llvm.html>`_. Clang is a front-end to LLVM that
|
||||
supports C and the GNU C extensions required by the kernel, and is pronounced
|
||||
"klang," not "see-lang."
|
||||
|
||||
Clang
|
||||
-----
|
||||
|
||||
The compiler used can be swapped out via `CC=` command line argument to `make`.
|
||||
`CC=` should be set when selecting a config and during a build.
|
||||
|
||||
make CC=clang defconfig
|
||||
|
||||
make CC=clang
|
||||
|
||||
Cross Compiling
|
||||
---------------
|
||||
|
||||
A single Clang compiler binary will typically contain all supported backends,
|
||||
which can help simplify cross compiling.
|
||||
|
||||
ARCH=arm64 CROSS_COMPILE=aarch64-linux-gnu- make CC=clang
|
||||
|
||||
`CROSS_COMPILE` is not used to prefix the Clang compiler binary, instead
|
||||
`CROSS_COMPILE` is used to set a command line flag: `--target <triple>`. For
|
||||
example:
|
||||
|
||||
clang --target aarch64-linux-gnu foo.c
|
||||
|
||||
LLVM Utilities
|
||||
--------------
|
||||
|
||||
LLVM has substitutes for GNU binutils utilities. Kbuild supports `LLVM=1`
|
||||
to enable them.
|
||||
|
||||
make LLVM=1
|
||||
|
||||
They can be enabled individually. The full list of the parameters:
|
||||
|
||||
make CC=clang LD=ld.lld AR=llvm-ar NM=llvm-nm STRIP=llvm-strip \\
|
||||
OBJCOPY=llvm-objcopy OBJDUMP=llvm-objdump OBJSIZE=llvm-size \\
|
||||
READELF=llvm-readelf HOSTCC=clang HOSTCXX=clang++ HOSTAR=llvm-ar \\
|
||||
HOSTLD=ld.lld
|
||||
|
||||
Currently, the integrated assembler is disabled by default. You can pass
|
||||
`LLVM_IAS=1` to enable it.
|
||||
|
||||
Getting Help
|
||||
------------
|
||||
|
||||
- `Website <https://clangbuiltlinux.github.io/>`_
|
||||
- `Mailing List <https://groups.google.com/forum/#!forum/clang-built-linux>`_: <clang-built-linux@googlegroups.com>
|
||||
- `Issue Tracker <https://github.com/ClangBuiltLinux/linux/issues>`_
|
||||
- IRC: #clangbuiltlinux on chat.freenode.net
|
||||
- `Telegram <https://t.me/ClangBuiltLinux>`_: @ClangBuiltLinux
|
||||
- `Wiki <https://github.com/ClangBuiltLinux/linux/wiki>`_
|
||||
- `Beginner Bugs <https://github.com/ClangBuiltLinux/linux/issues?q=is%3Aopen+is%3Aissue+label%3A%22good+first+issue%22>`_
|
||||
|
||||
Getting LLVM
|
||||
-------------
|
||||
|
||||
- http://releases.llvm.org/download.html
|
||||
- https://github.com/llvm/llvm-project
|
||||
- https://llvm.org/docs/GettingStarted.html
|
||||
- https://llvm.org/docs/CMake.html
|
||||
- https://apt.llvm.org/
|
||||
- https://www.archlinux.org/packages/extra/x86_64/llvm/
|
||||
- https://github.com/ClangBuiltLinux/tc-build
|
||||
- https://github.com/ClangBuiltLinux/linux/wiki/Building-Clang-from-source
|
||||
- https://android.googlesource.com/platform/prebuilts/clang/host/linux-x86/
|
|
@ -159,11 +159,15 @@ Byte sequences
|
|||
distance = 16384 + (H << 14) + D
|
||||
state = S (copy S literals after this block)
|
||||
End of stream is reached if distance == 16384
|
||||
In version 1 only, to prevent ambiguity with the RLE case when
|
||||
((distance & 0x803f) == 0x803f) && (261 <= length <= 264), the
|
||||
compressor must not emit block copies where distance and length
|
||||
meet these conditions.
|
||||
|
||||
In version 1 only, this instruction is also used to encode a run of
|
||||
zeros if distance = 0xbfff, i.e. H = 1 and the D bits are all 1.
|
||||
In this case, it is followed by a fourth byte, X.
|
||||
run length = ((X << 3) | (0 0 0 0 0 L L L)) + 4.
|
||||
run length = ((X << 3) | (0 0 0 0 0 L L L)) + 4
|
||||
|
||||
0 0 1 L L L L L (32..63)
|
||||
Copy of small block within 16kB distance (preferably less than 34B)
|
||||
|
|
|
@ -36,8 +36,7 @@ whole range, 0-255, dividing the angular value by 1.41. The enum
|
|||
:c:type:`v4l2_hsv_encoding` specifies which encoding is used.
|
||||
|
||||
.. note:: The default R'G'B' quantization is full range for all
|
||||
colorspaces except for BT.2020 which uses limited range R'G'B'
|
||||
quantization.
|
||||
colorspaces. HSV formats are always full range.
|
||||
|
||||
.. tabularcolumns:: |p{6.7cm}|p{10.8cm}|
|
||||
|
||||
|
@ -169,8 +168,8 @@ whole range, 0-255, dividing the angular value by 1.41. The enum
|
|||
- Details
|
||||
* - ``V4L2_QUANTIZATION_DEFAULT``
|
||||
- Use the default quantization encoding as defined by the
|
||||
colorspace. This is always full range for R'G'B' (except for the
|
||||
BT.2020 colorspace) and HSV. It is usually limited range for Y'CbCr.
|
||||
colorspace. This is always full range for R'G'B' and HSV.
|
||||
It is usually limited range for Y'CbCr.
|
||||
* - ``V4L2_QUANTIZATION_FULL_RANGE``
|
||||
- Use the full range quantization encoding. I.e. the range [0…1] is
|
||||
mapped to [0…255] (with possible clipping to [1…254] to avoid the
|
||||
|
@ -180,4 +179,4 @@ whole range, 0-255, dividing the angular value by 1.41. The enum
|
|||
* - ``V4L2_QUANTIZATION_LIM_RANGE``
|
||||
- Use the limited range quantization encoding. I.e. the range [0…1]
|
||||
is mapped to [16…235]. Cb and Cr are mapped from [-0.5…0.5] to
|
||||
[16…240].
|
||||
[16…240]. Limited Range cannot be used with HSV.
|
||||
|
|
|
@ -377,9 +377,8 @@ Colorspace BT.2020 (V4L2_COLORSPACE_BT2020)
|
|||
The :ref:`itu2020` standard defines the colorspace used by Ultra-high
|
||||
definition television (UHDTV). The default transfer function is
|
||||
``V4L2_XFER_FUNC_709``. The default Y'CbCr encoding is
|
||||
``V4L2_YCBCR_ENC_BT2020``. The default R'G'B' quantization is limited
|
||||
range (!), and so is the default Y'CbCr quantization. The chromaticities
|
||||
of the primary colors and the white reference are:
|
||||
``V4L2_YCBCR_ENC_BT2020``. The default Y'CbCr quantization is limited range.
|
||||
The chromaticities of the primary colors and the white reference are:
|
||||
|
||||
|
||||
|
||||
|
|
|
@ -750,6 +750,11 @@ tcp_tw_reuse - INTEGER
|
|||
experts.
|
||||
Default: 2
|
||||
|
||||
tcp_tw_timeout - INTEGER
|
||||
How long to wait to destroy TIME-WAIT state. The maximum value
|
||||
is 60 seconds, the minimum value is 10 second.
|
||||
Default: 60 seconds
|
||||
|
||||
tcp_window_scaling - BOOLEAN
|
||||
Enable window scaling as defined in RFC1323.
|
||||
|
||||
|
@ -1008,12 +1013,14 @@ icmp_ratelimit - INTEGER
|
|||
icmp_msgs_per_sec - INTEGER
|
||||
Limit maximal number of ICMP packets sent per second from this host.
|
||||
Only messages whose type matches icmp_ratemask (see below) are
|
||||
controlled by this limit.
|
||||
controlled by this limit. For security reasons, the precise count
|
||||
of messages per second is randomized.
|
||||
Default: 1000
|
||||
|
||||
icmp_msgs_burst - INTEGER
|
||||
icmp_msgs_per_sec controls number of ICMP packets sent per second,
|
||||
while icmp_msgs_burst controls the burst size of these packets.
|
||||
For security reasons, the precise burst size is randomized.
|
||||
Default: 50
|
||||
|
||||
icmp_ratemask - INTEGER
|
||||
|
|
|
@ -414,8 +414,8 @@ Send:
|
|||
.can_family = AF_CAN,
|
||||
.can_addr.j1939 = {
|
||||
.name = J1939_NO_NAME;
|
||||
.pgn = 0x30,
|
||||
.addr = 0x12300,
|
||||
.addr = 0x30,
|
||||
.pgn = 0x12300,
|
||||
},
|
||||
};
|
||||
|
||||
|
|
|
@ -8,3 +8,4 @@ HD-Audio
|
|||
models
|
||||
controls
|
||||
dp-mst
|
||||
realtek-pc-beep
|
||||
|
|
|
@ -216,8 +216,6 @@ alc298-dell-aio
|
|||
ALC298 fixups on Dell AIO machines
|
||||
alc275-dell-xps
|
||||
ALC275 fixups on Dell XPS models
|
||||
alc256-dell-xps13
|
||||
ALC256 fixups on Dell XPS13
|
||||
lenovo-spk-noise
|
||||
Workaround for speaker noise on Lenovo machines
|
||||
lenovo-hotkey
|
||||
|
|
|
@ -0,0 +1,129 @@
|
|||
===============================
|
||||
Realtek PC Beep Hidden Register
|
||||
===============================
|
||||
|
||||
This file documents the "PC Beep Hidden Register", which is present in certain
|
||||
Realtek HDA codecs and controls a muxer and pair of passthrough mixers that can
|
||||
route audio between pins but aren't themselves exposed as HDA widgets. As far
|
||||
as I can tell, these hidden routes are designed to allow flexible PC Beep output
|
||||
for codecs that don't have mixer widgets in their output paths. Why it's easier
|
||||
to hide a mixer behind an undocumented vendor register than to just expose it
|
||||
as a widget, I have no idea.
|
||||
|
||||
Register Description
|
||||
====================
|
||||
|
||||
The register is accessed via processing coefficient 0x36 on NID 20h. Bits not
|
||||
identified below have no discernible effect on my machine, a Dell XPS 13 9350::
|
||||
|
||||
MSB LSB
|
||||
+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
|
||||
| |h|S|L| | B |R| | Known bits
|
||||
+=+=+=+=+=+=+=+=+=+=+=+=+=+=+=+=+
|
||||
|0|0|1|1| 0x7 |0|0x0|1| 0x7 | Reset value
|
||||
+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
|
||||
|
||||
1Ah input select (B): 2 bits
|
||||
When zero, expose the PC Beep line (from the internal beep generator, when
|
||||
enabled with the Set Beep Generation verb on NID 01h, or else from the
|
||||
external PCBEEP pin) on the 1Ah pin node. When nonzero, expose the headphone
|
||||
jack (or possibly Line In on some machines) input instead. If PC Beep is
|
||||
selected, the 1Ah boost control has no effect.
|
||||
|
||||
Amplify 1Ah loopback, left (L): 1 bit
|
||||
Amplify the left channel of 1Ah before mixing it into outputs as specified
|
||||
by h and S bits. Does not affect the level of 1Ah exposed to other widgets.
|
||||
|
||||
Amplify 1Ah loopback, right (R): 1 bit
|
||||
Amplify the right channel of 1Ah before mixing it into outputs as specified
|
||||
by h and S bits. Does not affect the level of 1Ah exposed to other widgets.
|
||||
|
||||
Loopback 1Ah to 21h [active low] (h): 1 bit
|
||||
When zero, mix 1Ah (possibly with amplification, depending on L and R bits)
|
||||
into 21h (headphone jack on my machine). Mixed signal respects the mute
|
||||
setting on 21h.
|
||||
|
||||
Loopback 1Ah to 14h (S): 1 bit
|
||||
When one, mix 1Ah (possibly with amplification, depending on L and R bits)
|
||||
into 14h (internal speaker on my machine). Mixed signal **ignores** the mute
|
||||
setting on 14h and is present whenever 14h is configured as an output.
|
||||
|
||||
Path diagrams
|
||||
=============
|
||||
|
||||
1Ah input selection (DIV is the PC Beep divider set on NID 01h)::
|
||||
|
||||
<Beep generator> <PCBEEP pin> <Headphone jack>
|
||||
| | |
|
||||
+--DIV--+--!DIV--+ {1Ah boost control}
|
||||
| |
|
||||
+--(b == 0)--+--(b != 0)--+
|
||||
|
|
||||
>1Ah (Beep/Headphone Mic/Line In)<
|
||||
|
||||
Loopback of 1Ah to 21h/14h::
|
||||
|
||||
<1Ah (Beep/Headphone Mic/Line In)>
|
||||
|
|
||||
{amplify if L/R}
|
||||
|
|
||||
+-----!h-----+-----S-----+
|
||||
| |
|
||||
{21h mute control} |
|
||||
| |
|
||||
>21h (Headphone)< >14h (Internal Speaker)<
|
||||
|
||||
Background
|
||||
==========
|
||||
|
||||
All Realtek HDA codecs have a vendor-defined widget with node ID 20h which
|
||||
provides access to a bank of registers that control various codec functions.
|
||||
Registers are read and written via the standard HDA processing coefficient
|
||||
verbs (Set/Get Coefficient Index, Set/Get Processing Coefficient). The node is
|
||||
named "Realtek Vendor Registers" in public datasheets' verb listings and,
|
||||
apart from that, is entirely undocumented.
|
||||
|
||||
This particular register, exposed at coefficient 0x36 and named in commits from
|
||||
Realtek, is of note: unlike most registers, which seem to control detailed
|
||||
amplifier parameters not in scope of the HDA specification, it controls audio
|
||||
routing which could just as easily have been defined using standard HDA mixer
|
||||
and selector widgets.
|
||||
|
||||
Specifically, it selects between two sources for the input pin widget with Node
|
||||
ID (NID) 1Ah: the widget's signal can come either from an audio jack (on my
|
||||
laptop, a Dell XPS 13 9350, it's the headphone jack, but comments in Realtek
|
||||
commits indicate that it might be a Line In on some machines) or from the PC
|
||||
Beep line (which is itself multiplexed between the codec's internal beep
|
||||
generator and external PCBEEP pin, depending on if the beep generator is
|
||||
enabled via verbs on NID 01h). Additionally, it can mix (with optional
|
||||
amplification) that signal onto the 21h and/or 14h output pins.
|
||||
|
||||
The register's reset value is 0x3717, corresponding to PC Beep on 1Ah that is
|
||||
then amplified and mixed into both the headphones and the speakers. Not only
|
||||
does this violate the HDA specification, which says that "[a vendor defined
|
||||
beep input pin] connection may be maintained *only* while the Link reset
|
||||
(**RST#**) is asserted", it means that we cannot ignore the register if we care
|
||||
about the input that 1Ah would otherwise expose or if the PCBEEP trace is
|
||||
poorly shielded and picks up chassis noise (both of which are the case on my
|
||||
machine).
|
||||
|
||||
Unfortunately, there are lots of ways to get this register configuration wrong.
|
||||
Linux, it seems, has gone through most of them. For one, the register resets
|
||||
after S3 suspend: judging by existing code, this isn't the case for all vendor
|
||||
registers, and it's led to some fixes that improve behavior on cold boot but
|
||||
don't last after suspend. Other fixes have successfully switched the 1Ah input
|
||||
away from PC Beep but have failed to disable both loopback paths. On my
|
||||
machine, this means that the headphone input is amplified and looped back to
|
||||
the headphone output, which uses the exact same pins! As you might expect, this
|
||||
causes terrible headphone noise, the character of which is controlled by the
|
||||
1Ah boost control. (If you've seen instructions online to fix XPS 13 headphone
|
||||
noise by changing "Headphone Mic Boost" in ALSA, now you know why.)
|
||||
|
||||
The information here has been obtained through black-box reverse engineering of
|
||||
the ALC256 codec's behavior and is not guaranteed to be correct. It likely
|
||||
also applies for the ALC255, ALC257, ALC235, and ALC236, since those codecs
|
||||
seem to be close relatives of the ALC256. (They all share one initialization
|
||||
function.) Additionally, other codecs like the ALC225 and ALC285 also have this
|
||||
register, judging by existing fixups in ``patch_realtek.c``, but specific
|
||||
data (e.g. node IDs, bit positions, pin mappings) for those codecs may differ
|
||||
from what I've described here.
|
|
@ -172,6 +172,9 @@ is dependent on the CPU capability and the kernel configuration. The limit can
|
|||
be retrieved using KVM_CAP_ARM_VM_IPA_SIZE of the KVM_CHECK_EXTENSION
|
||||
ioctl() at run-time.
|
||||
|
||||
Creation of the VM will fail if the requested IPA size (whether it is
|
||||
implicit or explicit) is unsupported on the host.
|
||||
|
||||
Please note that configuring the IPA size does not affect the capability
|
||||
exposed by the guest CPUs in ID_AA64MMFR0_EL1[PARange]. It only affects
|
||||
size of the address translated by the stage2 level (guest physical to
|
||||
|
@ -1132,6 +1135,9 @@ field userspace_addr, which must point at user addressable memory for
|
|||
the entire memory slot size. Any object may back this memory, including
|
||||
anonymous memory, ordinary files, and hugetlbfs.
|
||||
|
||||
On architectures that support a form of address tagging, userspace_addr must
|
||||
be an untagged address.
|
||||
|
||||
It is recommended that the lower 21 bits of guest_phys_addr and userspace_addr
|
||||
be identical. This allows large pages in the guest to be backed by large
|
||||
pages in the host.
|
||||
|
@ -4444,9 +4450,11 @@ EOI was received.
|
|||
#define KVM_EXIT_HYPERV_SYNIC 1
|
||||
#define KVM_EXIT_HYPERV_HCALL 2
|
||||
__u32 type;
|
||||
__u32 pad1;
|
||||
union {
|
||||
struct {
|
||||
__u32 msr;
|
||||
__u32 pad2;
|
||||
__u64 control;
|
||||
__u64 evt_page;
|
||||
__u64 msg_page;
|
||||
|
|
|
@ -420,7 +420,7 @@ If the generation number of the spte does not equal the global generation
|
|||
number, it will ignore the cached MMIO information and handle the page
|
||||
fault through the slow path.
|
||||
|
||||
Since only 19 bits are used to store generation-number on mmio spte, all
|
||||
Since only 18 bits are used to store generation-number on mmio spte, all
|
||||
pages are zapped when there is an overflow.
|
||||
|
||||
Unfortunately, a single memory access might access kvm_memslots(kvm) multiple
|
||||
|
|
|
@ -31,3 +31,4 @@ x86-specific Documentation
|
|||
usb-legacy-support
|
||||
i386/index
|
||||
x86_64/index
|
||||
sgx
|
||||
|
|
|
@ -0,0 +1,211 @@
|
|||
.. SPDX-License-Identifier: GPL-2.0
|
||||
|
||||
===============================
|
||||
Software Guard eXtensions (SGX)
|
||||
===============================
|
||||
|
||||
Overview
|
||||
========
|
||||
|
||||
Software Guard eXtensions (SGX) hardware enables for user space applications
|
||||
to set aside private memory regions of code and data:
|
||||
|
||||
* Privileged (ring-0) ENCLS functions orchestrate the construction of the.
|
||||
regions.
|
||||
* Unprivileged (ring-3) ENCLU functions allow an application to enter and
|
||||
execute inside the regions.
|
||||
|
||||
These memory regions are called enclaves. An enclave can be only entered at a
|
||||
fixed set of entry points. Each entry point can hold a single hardware thread
|
||||
at a time. While the enclave is loaded from a regular binary file by using
|
||||
ENCLS functions, only the threads inside the enclave can access its memory. The
|
||||
region is denied from outside access by the CPU, and encrypted before it leaves
|
||||
from LLC.
|
||||
|
||||
The support can be determined by
|
||||
|
||||
``grep sgx /proc/cpuinfo``
|
||||
|
||||
SGX must both be supported in the processor and enabled by the BIOS. If SGX
|
||||
appears to be unsupported on a system which has hardware support, ensure
|
||||
support is enabled in the BIOS. If a BIOS presents a choice between "Enabled"
|
||||
and "Software Enabled" modes for SGX, choose "Enabled".
|
||||
|
||||
Enclave Page Cache
|
||||
==================
|
||||
|
||||
SGX utilizes an *Enclave Page Cache (EPC)* to store pages that are associated
|
||||
with an enclave. It is contained in a BIOS-reserved region of physical memory.
|
||||
Unlike pages used for regular memory, pages can only be accessed from outside of
|
||||
the enclave during enclave construction with special, limited SGX instructions.
|
||||
|
||||
Only a CPU executing inside an enclave can directly access enclave memory.
|
||||
However, a CPU executing inside an enclave may access normal memory outside the
|
||||
enclave.
|
||||
|
||||
The kernel manages enclave memory similar to how it treats device memory.
|
||||
|
||||
Enclave Page Types
|
||||
------------------
|
||||
|
||||
**SGX Enclave Control Structure (SECS)**
|
||||
Enclave's address range, attributes and other global data are defined
|
||||
by this structure.
|
||||
|
||||
**Regular (REG)**
|
||||
Regular EPC pages contain the code and data of an enclave.
|
||||
|
||||
**Thread Control Structure (TCS)**
|
||||
Thread Control Structure pages define the entry points to an enclave and
|
||||
track the execution state of an enclave thread.
|
||||
|
||||
**Version Array (VA)**
|
||||
Version Array pages contain 512 slots, each of which can contain a version
|
||||
number for a page evicted from the EPC.
|
||||
|
||||
Enclave Page Cache Map
|
||||
----------------------
|
||||
|
||||
The processor tracks EPC pages in a hardware metadata structure called the
|
||||
*Enclave Page Cache Map (EPCM)*. The EPCM contains an entry for each EPC page
|
||||
which describes the owning enclave, access rights and page type among the other
|
||||
things.
|
||||
|
||||
EPCM permissions are separate from the normal page tables. This prevents the
|
||||
kernel from, for instance, allowing writes to data which an enclave wishes to
|
||||
remain read-only. EPCM permissions may only impose additional restrictions on
|
||||
top of normal x86 page permissions.
|
||||
|
||||
For all intents and purposes, the SGX architecture allows the processor to
|
||||
invalidate all EPCM entries at will. This requires that software be prepared to
|
||||
handle an EPCM fault at any time. In practice, this can happen on events like
|
||||
power transitions when the ephemeral key that encrypts enclave memory is lost.
|
||||
|
||||
Application interface
|
||||
=====================
|
||||
|
||||
Enclave build functions
|
||||
-----------------------
|
||||
|
||||
In addition to the traditional compiler and linker build process, SGX has a
|
||||
separate enclave “build” process. Enclaves must be built before they can be
|
||||
executed (entered). The first step in building an enclave is opening the
|
||||
**/dev/sgx_enclave** device. Since enclave memory is protected from direct
|
||||
access, special privileged instructions are Then used to copy data into enclave
|
||||
pages and establish enclave page permissions.
|
||||
|
||||
.. kernel-doc:: arch/x86/kernel/cpu/sgx/ioctl.c
|
||||
:functions: sgx_ioc_enclave_create
|
||||
sgx_ioc_enclave_add_pages
|
||||
sgx_ioc_enclave_init
|
||||
sgx_ioc_enclave_provision
|
||||
|
||||
Enclave vDSO
|
||||
------------
|
||||
|
||||
Entering an enclave can only be done through SGX-specific EENTER and ERESUME
|
||||
functions, and is a non-trivial process. Because of the complexity of
|
||||
transitioning to and from an enclave, enclaves typically utilize a library to
|
||||
handle the actual transitions. This is roughly analogous to how glibc
|
||||
implementations are used by most applications to wrap system calls.
|
||||
|
||||
Another crucial characteristic of enclaves is that they can generate exceptions
|
||||
as part of their normal operation that need to be handled in the enclave or are
|
||||
unique to SGX.
|
||||
|
||||
Instead of the traditional signal mechanism to handle these exceptions, SGX
|
||||
can leverage special exception fixup provided by the vDSO. The kernel-provided
|
||||
vDSO function wraps low-level transitions to/from the enclave like EENTER and
|
||||
ERESUME. The vDSO function intercepts exceptions that would otherwise generate
|
||||
a signal and return the fault information directly to its caller. This avoids
|
||||
the need to juggle signal handlers.
|
||||
|
||||
.. kernel-doc:: arch/x86/include/uapi/asm/sgx.h
|
||||
:functions: vdso_sgx_enter_enclave_t
|
||||
|
||||
ksgxd
|
||||
=====
|
||||
|
||||
SGX support includes a kernel thread called *ksgxwapd*.
|
||||
|
||||
EPC sanitization
|
||||
----------------
|
||||
|
||||
ksgxd is started when SGX initializes. Enclave memory is typically ready
|
||||
For use when the processor powers on or resets. However, if SGX has been in
|
||||
use since the reset, enclave pages may be in an inconsistent state. This might
|
||||
occur after a crash and kexec() cycle, for instance. At boot, ksgxd
|
||||
reinitializes all enclave pages so that they can be allocated and re-used.
|
||||
|
||||
The sanitization is done by going through EPC address space and applying the
|
||||
EREMOVE function to each physical page. Some enclave pages like SECS pages have
|
||||
hardware dependencies on other pages which prevents EREMOVE from functioning.
|
||||
Executing two EREMOVE passes removes the dependencies.
|
||||
|
||||
Page reclaimer
|
||||
--------------
|
||||
|
||||
Similar to the core kswapd, ksgxd, is responsible for managing the
|
||||
overcommitment of enclave memory. If the system runs out of enclave memory,
|
||||
*ksgxwapd* “swaps” enclave memory to normal memory.
|
||||
|
||||
Launch Control
|
||||
==============
|
||||
|
||||
SGX provides a launch control mechanism. After all enclave pages have been
|
||||
copied, kernel executes EINIT function, which initializes the enclave. Only after
|
||||
this the CPU can execute inside the enclave.
|
||||
|
||||
ENIT function takes an RSA-3072 signature of the enclave measurement. The function
|
||||
checks that the measurement is correct and signature is signed with the key
|
||||
hashed to the four **IA32_SGXLEPUBKEYHASH{0, 1, 2, 3}** MSRs representing the
|
||||
SHA256 of a public key.
|
||||
|
||||
Those MSRs can be configured by the BIOS to be either readable or writable.
|
||||
Linux supports only writable configuration in order to give full control to the
|
||||
kernel on launch control policy. Before calling EINIT function, the driver sets
|
||||
the MSRs to match the enclave's signing key.
|
||||
|
||||
Encryption engines
|
||||
==================
|
||||
|
||||
In order to conceal the enclave data while it is out of the CPU package, the
|
||||
memory controller has an encryption engine to transparently encrypt and decrypt
|
||||
enclave memory.
|
||||
|
||||
In CPUs prior to Ice Lake, the Memory Encryption Engine (MEE) is used to
|
||||
encrypt pages leaving the CPU caches. MEE uses a n-ary Merkle tree with root in
|
||||
SRAM to maintain integrity of the encrypted data. This provides integrity and
|
||||
anti-replay protection but does not scale to large memory sizes because the time
|
||||
required to update the Merkle tree grows logarithmically in relation to the
|
||||
memory size.
|
||||
|
||||
CPUs starting from Icelake use Total Memory Encryption (TME) in the place of
|
||||
MEE. TME-based SGX implementations do not have an integrity Merkle tree, which
|
||||
means integrity and replay-attacks are not mitigated. B, it includes
|
||||
additional changes to prevent cipher text from being returned and SW memory
|
||||
aliases from being Created.
|
||||
|
||||
DMA to enclave memory is blocked by range registers on both MEE and TME systems
|
||||
(SDM section 41.10).
|
||||
|
||||
Usage Models
|
||||
============
|
||||
|
||||
Shared Library
|
||||
--------------
|
||||
|
||||
Sensitive data and the code that acts on it is partitioned from the application
|
||||
into a separate library. The library is then linked as a DSO which can be loaded
|
||||
into an enclave. The application can then make individual function calls into
|
||||
the enclave through special SGX instructions. A run-time within the enclave is
|
||||
configured to marshal function parameters into and out of the enclave and to
|
||||
call the correct library function.
|
||||
|
||||
Application Container
|
||||
---------------------
|
||||
|
||||
An application may be loaded into a container enclave which is specially
|
||||
configured with a library OS and run-time which permits the application to run.
|
||||
The enclave run-time and library OS work together to execute the application
|
||||
when a thread enters the enclave.
|
|
@ -41,6 +41,8 @@ Package
|
|||
Packages contain a number of cores plus shared resources, e.g. DRAM
|
||||
controller, shared caches etc.
|
||||
|
||||
Modern systems may also use the term 'Die' for package.
|
||||
|
||||
AMD nomenclature for package is 'Node'.
|
||||
|
||||
Package-related topology information in the kernel:
|
||||
|
@ -53,11 +55,18 @@ Package-related topology information in the kernel:
|
|||
|
||||
The number of dies in a package. This information is retrieved via CPUID.
|
||||
|
||||
- cpuinfo_x86.cpu_die_id:
|
||||
|
||||
The physical ID of the die. This information is retrieved via CPUID.
|
||||
|
||||
- cpuinfo_x86.phys_proc_id:
|
||||
|
||||
The physical ID of the package. This information is retrieved via CPUID
|
||||
and deduced from the APIC IDs of the cores in the package.
|
||||
|
||||
Modern systems use this value for the socket. There may be multiple
|
||||
packages within a socket. This value may differ from cpu_die_id.
|
||||
|
||||
- cpuinfo_x86.logical_proc_id:
|
||||
|
||||
The logical ID of the package. As we do not trust BIOSes to enumerate the
|
||||
|
|
|
@ -0,0 +1,199 @@
|
|||
.. SPDX-License-Identifier: GPL-2.0
|
||||
|
||||
Using FS and GS segments in user space applications
|
||||
===================================================
|
||||
|
||||
The x86 architecture supports segmentation. Instructions which access
|
||||
memory can use segment register based addressing mode. The following
|
||||
notation is used to address a byte within a segment:
|
||||
|
||||
Segment-register:Byte-address
|
||||
|
||||
The segment base address is added to the Byte-address to compute the
|
||||
resulting virtual address which is accessed. This allows to access multiple
|
||||
instances of data with the identical Byte-address, i.e. the same code. The
|
||||
selection of a particular instance is purely based on the base-address in
|
||||
the segment register.
|
||||
|
||||
In 32-bit mode the CPU provides 6 segments, which also support segment
|
||||
limits. The limits can be used to enforce address space protections.
|
||||
|
||||
In 64-bit mode the CS/SS/DS/ES segments are ignored and the base address is
|
||||
always 0 to provide a full 64bit address space. The FS and GS segments are
|
||||
still functional in 64-bit mode.
|
||||
|
||||
Common FS and GS usage
|
||||
------------------------------
|
||||
|
||||
The FS segment is commonly used to address Thread Local Storage (TLS). FS
|
||||
is usually managed by runtime code or a threading library. Variables
|
||||
declared with the '__thread' storage class specifier are instantiated per
|
||||
thread and the compiler emits the FS: address prefix for accesses to these
|
||||
variables. Each thread has its own FS base address so common code can be
|
||||
used without complex address offset calculations to access the per thread
|
||||
instances. Applications should not use FS for other purposes when they use
|
||||
runtimes or threading libraries which manage the per thread FS.
|
||||
|
||||
The GS segment has no common use and can be used freely by
|
||||
applications. GCC and Clang support GS based addressing via address space
|
||||
identifiers.
|
||||
|
||||
Reading and writing the FS/GS base address
|
||||
------------------------------------------
|
||||
|
||||
There exist two mechanisms to read and write the FS/GS base address:
|
||||
|
||||
- the arch_prctl() system call
|
||||
|
||||
- the FSGSBASE instruction family
|
||||
|
||||
Accessing FS/GS base with arch_prctl()
|
||||
--------------------------------------
|
||||
|
||||
The arch_prctl(2) based mechanism is available on all 64-bit CPUs and all
|
||||
kernel versions.
|
||||
|
||||
Reading the base:
|
||||
|
||||
arch_prctl(ARCH_GET_FS, &fsbase);
|
||||
arch_prctl(ARCH_GET_GS, &gsbase);
|
||||
|
||||
Writing the base:
|
||||
|
||||
arch_prctl(ARCH_SET_FS, fsbase);
|
||||
arch_prctl(ARCH_SET_GS, gsbase);
|
||||
|
||||
The ARCH_SET_GS prctl may be disabled depending on kernel configuration
|
||||
and security settings.
|
||||
|
||||
Accessing FS/GS base with the FSGSBASE instructions
|
||||
---------------------------------------------------
|
||||
|
||||
With the Ivy Bridge CPU generation Intel introduced a new set of
|
||||
instructions to access the FS and GS base registers directly from user
|
||||
space. These instructions are also supported on AMD Family 17H CPUs. The
|
||||
following instructions are available:
|
||||
|
||||
=============== ===========================
|
||||
RDFSBASE %reg Read the FS base register
|
||||
RDGSBASE %reg Read the GS base register
|
||||
WRFSBASE %reg Write the FS base register
|
||||
WRGSBASE %reg Write the GS base register
|
||||
=============== ===========================
|
||||
|
||||
The instructions avoid the overhead of the arch_prctl() syscall and allow
|
||||
more flexible usage of the FS/GS addressing modes in user space
|
||||
applications. This does not prevent conflicts between threading libraries
|
||||
and runtimes which utilize FS and applications which want to use it for
|
||||
their own purpose.
|
||||
|
||||
FSGSBASE instructions enablement
|
||||
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
|
||||
The instructions are enumerated in CPUID leaf 7, bit 0 of EBX. If
|
||||
available /proc/cpuinfo shows 'fsgsbase' in the flag entry of the CPUs.
|
||||
|
||||
The availability of the instructions does not enable them
|
||||
automatically. The kernel has to enable them explicitly in CR4. The
|
||||
reason for this is that older kernels make assumptions about the values in
|
||||
the GS register and enforce them when GS base is set via
|
||||
arch_prctl(). Allowing user space to write arbitrary values to GS base
|
||||
would violate these assumptions and cause malfunction.
|
||||
|
||||
On kernels which do not enable FSGSBASE the execution of the FSGSBASE
|
||||
instructions will fault with a #UD exception.
|
||||
|
||||
The kernel provides reliable information about the enabled state in the
|
||||
ELF AUX vector. If the HWCAP2_FSGSBASE bit is set in the AUX vector, the
|
||||
kernel has FSGSBASE instructions enabled and applications can use them.
|
||||
The following code example shows how this detection works::
|
||||
|
||||
#include <sys/auxv.h>
|
||||
#include <elf.h>
|
||||
|
||||
/* Will be eventually in asm/hwcap.h */
|
||||
#ifndef HWCAP2_FSGSBASE
|
||||
#define HWCAP2_FSGSBASE (1 << 1)
|
||||
#endif
|
||||
|
||||
....
|
||||
|
||||
unsigned val = getauxval(AT_HWCAP2);
|
||||
|
||||
if (val & HWCAP2_FSGSBASE)
|
||||
printf("FSGSBASE enabled\n");
|
||||
|
||||
FSGSBASE instructions compiler support
|
||||
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
|
||||
|
||||
GCC version 4.6.4 and newer provide instrinsics for the FSGSBASE
|
||||
instructions. Clang 5 supports them as well.
|
||||
|
||||
=================== ===========================
|
||||
_readfsbase_u64() Read the FS base register
|
||||
_readfsbase_u64() Read the GS base register
|
||||
_writefsbase_u64() Write the FS base register
|
||||
_writegsbase_u64() Write the GS base register
|
||||
=================== ===========================
|
||||
|
||||
To utilize these instrinsics <immintrin.h> must be included in the source
|
||||
code and the compiler option -mfsgsbase has to be added.
|
||||
|
||||
Compiler support for FS/GS based addressing
|
||||
-------------------------------------------
|
||||
|
||||
GCC version 6 and newer provide support for FS/GS based addressing via
|
||||
Named Address Spaces. GCC implements the following address space
|
||||
identifiers for x86:
|
||||
|
||||
========= ====================================
|
||||
__seg_fs Variable is addressed relative to FS
|
||||
__seg_gs Variable is addressed relative to GS
|
||||
========= ====================================
|
||||
|
||||
The preprocessor symbols __SEG_FS and __SEG_GS are defined when these
|
||||
address spaces are supported. Code which implements fallback modes should
|
||||
check whether these symbols are defined. Usage example::
|
||||
|
||||
#ifdef __SEG_GS
|
||||
|
||||
long data0 = 0;
|
||||
long data1 = 1;
|
||||
|
||||
long __seg_gs *ptr;
|
||||
|
||||
/* Check whether FSGSBASE is enabled by the kernel (HWCAP2_FSGSBASE) */
|
||||
....
|
||||
|
||||
/* Set GS base to point to data0 */
|
||||
_writegsbase_u64(&data0);
|
||||
|
||||
/* Access offset 0 of GS */
|
||||
ptr = 0;
|
||||
printf("data0 = %ld\n", *ptr);
|
||||
|
||||
/* Set GS base to point to data1 */
|
||||
_writegsbase_u64(&data1);
|
||||
/* ptr still addresses offset 0! */
|
||||
printf("data1 = %ld\n", *ptr);
|
||||
|
||||
|
||||
Clang does not provide the GCC address space identifiers, but it provides
|
||||
address spaces via an attribute based mechanism in Clang 2.6 and newer
|
||||
versions:
|
||||
|
||||
==================================== =====================================
|
||||
__attribute__((address_space(256)) Variable is addressed relative to GS
|
||||
__attribute__((address_space(257)) Variable is addressed relative to FS
|
||||
==================================== =====================================
|
||||
|
||||
FS/GS based addressing with inline assembly
|
||||
-------------------------------------------
|
||||
|
||||
In case the compiler does not support address spaces, inline assembly can
|
||||
be used for FS/GS based addressing mode::
|
||||
|
||||
mov %fs:offset, %reg
|
||||
mov %gs:offset, %reg
|
||||
|
||||
mov %reg, %fs:offset
|
||||
mov %reg, %gs:offset
|
|
@ -14,3 +14,4 @@ x86_64 Support
|
|||
fake-numa-for-cpusets
|
||||
cpu-hotplug-spec
|
||||
machinecheck
|
||||
fsgs
|
||||
|
|
|
@ -82,7 +82,8 @@ Default MMUv2-compatible layout::
|
|||
+------------------+
|
||||
| VMALLOC area | VMALLOC_START 0xc0000000 128MB - 64KB
|
||||
+------------------+ VMALLOC_END
|
||||
| Cache aliasing | TLBTEMP_BASE_1 0xc7ff0000 DCACHE_WAY_SIZE
|
||||
+------------------+
|
||||
| Cache aliasing | TLBTEMP_BASE_1 0xc8000000 DCACHE_WAY_SIZE
|
||||
| remap area 1 |
|
||||
+------------------+
|
||||
| Cache aliasing | TLBTEMP_BASE_2 DCACHE_WAY_SIZE
|
||||
|
@ -124,7 +125,8 @@ Default MMUv2-compatible layout::
|
|||
+------------------+
|
||||
| VMALLOC area | VMALLOC_START 0xa0000000 128MB - 64KB
|
||||
+------------------+ VMALLOC_END
|
||||
| Cache aliasing | TLBTEMP_BASE_1 0xa7ff0000 DCACHE_WAY_SIZE
|
||||
+------------------+
|
||||
| Cache aliasing | TLBTEMP_BASE_1 0xa8000000 DCACHE_WAY_SIZE
|
||||
| remap area 1 |
|
||||
+------------------+
|
||||
| Cache aliasing | TLBTEMP_BASE_2 DCACHE_WAY_SIZE
|
||||
|
@ -167,7 +169,8 @@ Default MMUv2-compatible layout::
|
|||
+------------------+
|
||||
| VMALLOC area | VMALLOC_START 0x90000000 128MB - 64KB
|
||||
+------------------+ VMALLOC_END
|
||||
| Cache aliasing | TLBTEMP_BASE_1 0x97ff0000 DCACHE_WAY_SIZE
|
||||
+------------------+
|
||||
| Cache aliasing | TLBTEMP_BASE_1 0x98000000 DCACHE_WAY_SIZE
|
||||
| remap area 1 |
|
||||
+------------------+
|
||||
| Cache aliasing | TLBTEMP_BASE_2 DCACHE_WAY_SIZE
|
||||
|
|
16
MAINTAINERS
16
MAINTAINERS
|
@ -3054,6 +3054,7 @@ R: Martin KaFai Lau <kafai@fb.com>
|
|||
R: Song Liu <songliubraving@fb.com>
|
||||
R: Yonghong Song <yhs@fb.com>
|
||||
R: Andrii Nakryiko <andriin@fb.com>
|
||||
R: KP Singh <kpsingh@chromium.org>
|
||||
L: netdev@vger.kernel.org
|
||||
L: bpf@vger.kernel.org
|
||||
T: git git://git.kernel.org/pub/scm/linux/kernel/git/bpf/bpf.git
|
||||
|
@ -4028,6 +4029,7 @@ B: https://github.com/ClangBuiltLinux/linux/issues
|
|||
C: irc://chat.freenode.net/clangbuiltlinux
|
||||
S: Supported
|
||||
K: \b(?i:clang|llvm)\b
|
||||
F: Documentation/kbuild/llvm.rst
|
||||
|
||||
CLEANCACHE API
|
||||
M: Konrad Rzeszutek Wilk <konrad.wilk@oracle.com>
|
||||
|
@ -8516,6 +8518,19 @@ F: Documentation/x86/intel_txt.rst
|
|||
F: include/linux/tboot.h
|
||||
F: arch/x86/kernel/tboot.c
|
||||
|
||||
INTEL SGX
|
||||
M: Jarkko Sakkinen <jarkko@kernel.org>
|
||||
L: linux-sgx@vger.kernel.org
|
||||
S: Supported
|
||||
Q: https://patchwork.kernel.org/project/intel-sgx/list/
|
||||
T: git git://git.kernel.org/pub/scm/linux/kernel/git/jarkko/linux-sgx.git
|
||||
F: Documentation/x86/sgx.rst
|
||||
F: arch/x86/entry/vdso/vsgx.S
|
||||
F: arch/x86/include/uapi/asm/sgx.h
|
||||
F: arch/x86/kernel/cpu/sgx/*
|
||||
F: tools/testing/selftests/sgx/*
|
||||
K: \bSGX_
|
||||
|
||||
INTERCONNECT API
|
||||
M: Georgi Djakov <georgi.djakov@linaro.org>
|
||||
L: linux-pm@vger.kernel.org
|
||||
|
@ -9144,6 +9159,7 @@ F: include/linux/skmsg.h
|
|||
F: net/core/skmsg.c
|
||||
F: net/core/sock_map.c
|
||||
F: net/ipv4/tcp_bpf.c
|
||||
F: net/ipv4/udp_bpf.c
|
||||
|
||||
LANTIQ / INTEL Ethernet drivers
|
||||
M: Hauke Mehrtens <hauke@hauke-m.de>
|
||||
|
|
156
Makefile
156
Makefile
|
@ -1,8 +1,15 @@
|
|||
# SPDX-License-Identifier: GPL-2.0
|
||||
|
||||
# dist_make: Tencent Dist Makefile, which contains dist-* make targets
|
||||
ifneq ($(shell echo $(MAKECMDGOALS) | grep "^dist-"),)
|
||||
include dist/Makefile
|
||||
else
|
||||
|
||||
# SPDX-License-Identifier: GPL-2.0
|
||||
VERSION = 5
|
||||
PATCHLEVEL = 4
|
||||
SUBLEVEL = 32
|
||||
EXTRAVERSION = -1
|
||||
SUBLEVEL = 119
|
||||
EXTRAVERSION = -20
|
||||
NAME = Kleptomaniac Octopus
|
||||
|
||||
# *DOCUMENTATION*
|
||||
|
@ -394,8 +401,13 @@ HOST_LFS_CFLAGS := $(shell getconf LFS_CFLAGS 2>/dev/null)
|
|||
HOST_LFS_LDFLAGS := $(shell getconf LFS_LDFLAGS 2>/dev/null)
|
||||
HOST_LFS_LIBS := $(shell getconf LFS_LIBS 2>/dev/null)
|
||||
|
||||
ifneq ($(LLVM),)
|
||||
HOSTCC = clang
|
||||
HOSTCXX = clang++
|
||||
else
|
||||
HOSTCC = gcc
|
||||
HOSTCXX = g++
|
||||
endif
|
||||
KBUILD_HOSTCFLAGS := -Wall -Wmissing-prototypes -Wstrict-prototypes -O2 \
|
||||
-fomit-frame-pointer -std=gnu89 $(HOST_LFS_CFLAGS) \
|
||||
$(HOSTCFLAGS)
|
||||
|
@ -404,28 +416,46 @@ KBUILD_HOSTLDFLAGS := $(HOST_LFS_LDFLAGS) $(HOSTLDFLAGS)
|
|||
KBUILD_HOSTLDLIBS := $(HOST_LFS_LIBS) $(HOSTLDLIBS)
|
||||
|
||||
# Make variables (CC, etc...)
|
||||
AS = $(CROSS_COMPILE)as
|
||||
LD = $(CROSS_COMPILE)ld
|
||||
CC = $(CROSS_COMPILE)gcc
|
||||
CPP = $(CC) -E
|
||||
ifneq ($(LLVM),)
|
||||
CC = clang
|
||||
LD = ld.lld
|
||||
AR = llvm-ar
|
||||
NM = llvm-nm
|
||||
OBJCOPY = llvm-objcopy
|
||||
OBJDUMP = llvm-objdump
|
||||
READELF = llvm-readelf
|
||||
OBJSIZE = llvm-size
|
||||
STRIP = llvm-strip
|
||||
else
|
||||
CC = $(CROSS_COMPILE)gcc
|
||||
LD = $(CROSS_COMPILE)ld
|
||||
AR = $(CROSS_COMPILE)ar
|
||||
NM = $(CROSS_COMPILE)nm
|
||||
STRIP = $(CROSS_COMPILE)strip
|
||||
OBJCOPY = $(CROSS_COMPILE)objcopy
|
||||
OBJDUMP = $(CROSS_COMPILE)objdump
|
||||
READELF = $(CROSS_COMPILE)readelf
|
||||
OBJSIZE = $(CROSS_COMPILE)size
|
||||
STRIP = $(CROSS_COMPILE)strip
|
||||
endif
|
||||
PAHOLE = pahole
|
||||
RESOLVE_BTFIDS = $(objtree)/tools/bpf/resolve_btfids/resolve_btfids
|
||||
LEX = flex
|
||||
YACC = bison
|
||||
AWK = awk
|
||||
INSTALLKERNEL := installkernel
|
||||
DEPMOD = /sbin/depmod
|
||||
DEPMOD = depmod
|
||||
PERL = perl
|
||||
PYTHON = python
|
||||
PYTHON2 = python2
|
||||
PYTHON3 = python3
|
||||
CHECK = sparse
|
||||
BASH = bash
|
||||
KGZIP = gzip
|
||||
KBZIP2 = bzip2
|
||||
KLZOP = lzop
|
||||
LZMA = lzma
|
||||
LZ4 = lz4c
|
||||
XZ = xz
|
||||
|
||||
CHECKFLAGS := -D__linux__ -Dlinux -D__STDC__ -Dunix -D__unix__ \
|
||||
-Wbitwise -Wno-return-void -Wno-unknown-attribute $(CF)
|
||||
|
@ -458,7 +488,7 @@ KBUILD_AFLAGS := -D__ASSEMBLY__ -fno-PIE
|
|||
KBUILD_CFLAGS := -Wall -Wundef -Werror=strict-prototypes -Wno-trigraphs \
|
||||
-fno-strict-aliasing -fno-common -fshort-wchar -fno-PIE \
|
||||
-Werror=implicit-function-declaration -Werror=implicit-int \
|
||||
-Wno-format-security \
|
||||
-Werror=return-type -Wno-format-security \
|
||||
-std=gnu89
|
||||
KBUILD_CPPFLAGS := -D__KERNEL__
|
||||
KBUILD_AFLAGS_KERNEL :=
|
||||
|
@ -471,9 +501,10 @@ KBUILD_LDFLAGS :=
|
|||
GCC_PLUGINS_CFLAGS :=
|
||||
CLANG_FLAGS :=
|
||||
|
||||
export ARCH SRCARCH CONFIG_SHELL BASH HOSTCC KBUILD_HOSTCFLAGS CROSS_COMPILE AS LD CC
|
||||
export CPP AR NM STRIP OBJCOPY OBJDUMP OBJSIZE PAHOLE LEX YACC AWK INSTALLKERNEL
|
||||
export PERL PYTHON PYTHON2 PYTHON3 CHECK CHECKFLAGS MAKE UTS_MACHINE HOSTCXX
|
||||
export ARCH SRCARCH CONFIG_SHELL BASH HOSTCC KBUILD_HOSTCFLAGS CROSS_COMPILE LD CC
|
||||
export CPP AR NM STRIP OBJCOPY OBJDUMP OBJSIZE READELF PAHOLE RESOLVE_BTFIDS LEX YACC AWK INSTALLKERNEL
|
||||
export PERL PYTHON PYTHON3 CHECK CHECKFLAGS MAKE UTS_MACHINE HOSTCXX
|
||||
export KGZIP KBZIP2 KLZOP LZMA LZ4 XZ
|
||||
export KBUILD_HOSTCXXFLAGS KBUILD_HOSTLDFLAGS KBUILD_HOSTLDLIBS LDFLAGS_MODULE
|
||||
|
||||
export KBUILD_CPPFLAGS NOSTDINC_FLAGS LINUXINCLUDE OBJCOPYFLAGS KBUILD_LDFLAGS
|
||||
|
@ -528,13 +559,13 @@ ifneq ($(shell $(CC) --version 2>&1 | head -n 1 | grep clang),)
|
|||
ifneq ($(CROSS_COMPILE),)
|
||||
CLANG_FLAGS += --target=$(notdir $(CROSS_COMPILE:%-=%))
|
||||
GCC_TOOLCHAIN_DIR := $(dir $(shell which $(CROSS_COMPILE)elfedit))
|
||||
CLANG_FLAGS += --prefix=$(GCC_TOOLCHAIN_DIR)
|
||||
CLANG_FLAGS += --prefix=$(GCC_TOOLCHAIN_DIR)$(notdir $(CROSS_COMPILE))
|
||||
GCC_TOOLCHAIN := $(realpath $(GCC_TOOLCHAIN_DIR)/..)
|
||||
endif
|
||||
ifneq ($(GCC_TOOLCHAIN),)
|
||||
CLANG_FLAGS += --gcc-toolchain=$(GCC_TOOLCHAIN)
|
||||
endif
|
||||
ifeq ($(shell $(AS) --version 2>&1 | head -n 1 | grep clang),)
|
||||
ifneq ($(LLVM_IAS),1)
|
||||
CLANG_FLAGS += -no-integrated-as
|
||||
endif
|
||||
CLANG_FLAGS += -Werror=unknown-warning-option
|
||||
|
@ -587,12 +618,8 @@ KBUILD_MODULES :=
|
|||
KBUILD_BUILTIN := 1
|
||||
|
||||
# If we have only "make modules", don't compile built-in objects.
|
||||
# When we're building modules with modversions, we need to consider
|
||||
# the built-in objects during the descend as well, in order to
|
||||
# make sure the checksums are up to date before we record them.
|
||||
|
||||
ifeq ($(MAKECMDGOALS),modules)
|
||||
KBUILD_BUILTIN := $(if $(CONFIG_MODVERSIONS),1)
|
||||
KBUILD_BUILTIN :=
|
||||
endif
|
||||
|
||||
# If we have "make <whatever> modules", compile modules
|
||||
|
@ -707,12 +734,9 @@ else ifdef CONFIG_CC_OPTIMIZE_FOR_SIZE
|
|||
KBUILD_CFLAGS += -Os
|
||||
endif
|
||||
|
||||
ifdef CONFIG_CC_DISABLE_WARN_MAYBE_UNINITIALIZED
|
||||
KBUILD_CFLAGS += -Wno-maybe-uninitialized
|
||||
endif
|
||||
|
||||
# Tell gcc to never replace conditional load with a non-conditional one
|
||||
KBUILD_CFLAGS += $(call cc-option,--param=allow-store-data-races=0)
|
||||
KBUILD_CFLAGS += $(call cc-option,-fno-allow-store-data-races)
|
||||
|
||||
include scripts/Makefile.kcov
|
||||
include scripts/Makefile.gcc-plugins
|
||||
|
@ -749,17 +773,20 @@ KBUILD_CFLAGS += -Wno-tautological-compare
|
|||
KBUILD_CFLAGS += -mno-global-merge
|
||||
else
|
||||
|
||||
# These warnings generated too much noise in a regular build.
|
||||
# Use make W=1 to enable them (see scripts/Makefile.extrawarn)
|
||||
KBUILD_CFLAGS += -Wno-unused-but-set-variable
|
||||
|
||||
# Warn about unmarked fall-throughs in switch statement.
|
||||
# Disabled for clang while comment to attribute conversion happens and
|
||||
# https://github.com/ClangBuiltLinux/linux/issues/636 is discussed.
|
||||
KBUILD_CFLAGS += $(call cc-option,-Wimplicit-fallthrough,)
|
||||
endif
|
||||
|
||||
# These warnings generated too much noise in a regular build.
|
||||
# Use make W=1 to enable them (see scripts/Makefile.extrawarn)
|
||||
KBUILD_CFLAGS += $(call cc-disable-warning, unused-but-set-variable)
|
||||
KBUILD_CFLAGS += $(call cc-disable-warning, unused-const-variable)
|
||||
|
||||
# These result in bogus false positives
|
||||
KBUILD_CFLAGS += $(call cc-disable-warning, dangling-pointer)
|
||||
|
||||
ifdef CONFIG_FRAME_POINTER
|
||||
KBUILD_CFLAGS += -fno-omit-frame-pointer -fno-optimize-sibling-calls
|
||||
else
|
||||
|
@ -786,8 +813,11 @@ DEBUG_CFLAGS += -gsplit-dwarf
|
|||
else
|
||||
DEBUG_CFLAGS += -g
|
||||
endif
|
||||
ifneq ($(LLVM_IAS),1)
|
||||
KBUILD_AFLAGS += -Wa,-gdwarf-2
|
||||
endif
|
||||
endif
|
||||
|
||||
ifdef CONFIG_DEBUG_INFO_DWARF4
|
||||
DEBUG_CFLAGS += -gdwarf-4
|
||||
endif
|
||||
|
@ -860,6 +890,17 @@ KBUILD_CFLAGS += -Wno-pointer-sign
|
|||
# disable stringop warnings in gcc 8+
|
||||
KBUILD_CFLAGS += $(call cc-disable-warning, stringop-truncation)
|
||||
|
||||
# We'll want to enable this eventually, but it's not going away for 5.7 at least
|
||||
KBUILD_CFLAGS += $(call cc-disable-warning, zero-length-bounds)
|
||||
KBUILD_CFLAGS += $(call cc-disable-warning, array-bounds)
|
||||
KBUILD_CFLAGS += $(call cc-disable-warning, stringop-overflow)
|
||||
|
||||
# Another good warning that we'll want to enable eventually
|
||||
KBUILD_CFLAGS += $(call cc-disable-warning, restrict)
|
||||
|
||||
# Enabled with W=2, disabled by default as noisy
|
||||
KBUILD_CFLAGS += $(call cc-disable-warning, maybe-uninitialized)
|
||||
|
||||
# disable invalid "can't wrap" optimizations for signed / pointers
|
||||
KBUILD_CFLAGS += $(call cc-option,-fno-strict-overflow)
|
||||
|
||||
|
@ -890,12 +931,6 @@ KBUILD_CFLAGS += $(call cc-option,-Werror=designated-init)
|
|||
# change __FILE__ to the relative path from the srctree
|
||||
KBUILD_CFLAGS += $(call cc-option,-fmacro-prefix-map=$(srctree)/=)
|
||||
|
||||
# ensure -fcf-protection is disabled when using retpoline as it is
|
||||
# incompatible with -mindirect-branch=thunk-extern
|
||||
ifdef CONFIG_RETPOLINE
|
||||
KBUILD_CFLAGS += $(call cc-option,-fcf-protection=none)
|
||||
endif
|
||||
|
||||
include scripts/Makefile.kasan
|
||||
include scripts/Makefile.extrawarn
|
||||
include scripts/Makefile.ubsan
|
||||
|
@ -977,10 +1012,10 @@ export mod_strip_cmd
|
|||
mod_compress_cmd = true
|
||||
ifdef CONFIG_MODULE_COMPRESS
|
||||
ifdef CONFIG_MODULE_COMPRESS_GZIP
|
||||
mod_compress_cmd = gzip -n -f
|
||||
mod_compress_cmd = $(KGZIP) -n -f
|
||||
endif # CONFIG_MODULE_COMPRESS_GZIP
|
||||
ifdef CONFIG_MODULE_COMPRESS_XZ
|
||||
mod_compress_cmd = xz -f
|
||||
mod_compress_cmd = $(XZ) -f
|
||||
endif # CONFIG_MODULE_COMPRESS_XZ
|
||||
endif # CONFIG_MODULE_COMPRESS
|
||||
export mod_compress_cmd
|
||||
|
@ -996,9 +1031,10 @@ export mod_sign_cmd
|
|||
|
||||
HOST_LIBELF_LIBS = $(shell pkg-config libelf --libs 2>/dev/null || echo -lelf)
|
||||
|
||||
ifdef CONFIG_STACK_VALIDATION
|
||||
has_libelf := $(call try-run,\
|
||||
has_libelf = $(call try-run,\
|
||||
echo "int main() {}" | $(HOSTCC) -xc -o /dev/null $(HOST_LIBELF_LIBS) -,1,0)
|
||||
|
||||
ifdef CONFIG_STACK_VALIDATION
|
||||
ifeq ($(has_libelf),1)
|
||||
objtool_target := tools/objtool FORCE
|
||||
else
|
||||
|
@ -1007,6 +1043,14 @@ ifdef CONFIG_STACK_VALIDATION
|
|||
endif
|
||||
endif
|
||||
|
||||
ifdef CONFIG_DEBUG_INFO_BTF
|
||||
ifeq ($(has_libelf),1)
|
||||
resolve_btfids_target := tools/bpf/resolve_btfids FORCE
|
||||
else
|
||||
ERROR_RESOLVE_BTFIDS := 1
|
||||
endif
|
||||
endif
|
||||
|
||||
PHONY += prepare0
|
||||
|
||||
export MODORDER := $(extmod-prefix)modules.order
|
||||
|
@ -1111,7 +1155,7 @@ prepare0: archprepare
|
|||
$(Q)$(MAKE) $(build)=.
|
||||
|
||||
# All the preparing..
|
||||
prepare: prepare0 prepare-objtool
|
||||
prepare: prepare0 prepare-objtool prepare-resolve_btfids
|
||||
|
||||
# Support for using generic headers in asm-generic
|
||||
asm-generic := -f $(srctree)/scripts/Makefile.asm-generic obj
|
||||
|
@ -1124,7 +1168,7 @@ uapi-asm-generic:
|
|||
$(Q)$(MAKE) $(asm-generic)=arch/$(SRCARCH)/include/generated/uapi/asm \
|
||||
generic=include/uapi/asm-generic
|
||||
|
||||
PHONY += prepare-objtool
|
||||
PHONY += prepare-objtool prepare-resolve_btfids
|
||||
prepare-objtool: $(objtool_target)
|
||||
ifeq ($(SKIP_STACK_VALIDATION),1)
|
||||
ifdef CONFIG_UNWINDER_ORC
|
||||
|
@ -1135,6 +1179,11 @@ else
|
|||
endif
|
||||
endif
|
||||
|
||||
prepare-resolve_btfids: $(resolve_btfids_target)
|
||||
ifeq ($(ERROR_RESOLVE_BTFIDS),1)
|
||||
@echo "error: Cannot resolve BTF IDs for CONFIG_DEBUG_INFO_BTF, please install libelf-dev, libelf-devel or elfutils-libelf-devel" >&2
|
||||
@false
|
||||
endif
|
||||
# Generate some files
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
|
@ -1151,11 +1200,19 @@ define filechk_utsrelease.h
|
|||
endef
|
||||
|
||||
define filechk_version.h
|
||||
if [ $(SUBLEVEL) -gt 255 ]; then \
|
||||
echo \#define LINUX_VERSION_CODE $(shell \
|
||||
expr $(VERSION) \* 65536 + 0$(PATCHLEVEL) \* 256 + 0$(SUBLEVEL)); \
|
||||
echo '#define KERNEL_VERSION(a,b,c) (((a) << 16) + ((b) << 8) + (c))'
|
||||
expr $(VERSION) \* 65536 + $(PATCHLEVEL) \* 256 + 255); \
|
||||
else \
|
||||
echo \#define LINUX_VERSION_CODE $(shell \
|
||||
expr $(VERSION) \* 65536 + $(PATCHLEVEL) \* 256 + $(SUBLEVEL)); \
|
||||
fi; \
|
||||
echo '#define KERNEL_VERSION(a,b,c) (((a) << 16) + ((b) << 8) + \
|
||||
((c) > 255 ? 255 : (c)))'
|
||||
endef
|
||||
|
||||
$(version_h): PATCHLEVEL := $(if $(PATCHLEVEL), $(PATCHLEVEL), 0)
|
||||
$(version_h): SUBLEVEL := $(if $(SUBLEVEL), $(SUBLEVEL), 0)
|
||||
$(version_h): FORCE
|
||||
$(call filechk,version.h)
|
||||
$(Q)rm -f $(old_version_h)
|
||||
|
@ -1238,11 +1295,15 @@ ifneq ($(dtstree),)
|
|||
$(Q)$(MAKE) $(build)=$(dtstree) $(dtstree)/$@
|
||||
|
||||
PHONY += dtbs dtbs_install dtbs_check
|
||||
dtbs dtbs_check: include/config/kernel.release scripts_dtc
|
||||
dtbs: include/config/kernel.release scripts_dtc
|
||||
$(Q)$(MAKE) $(build)=$(dtstree)
|
||||
|
||||
ifneq ($(filter dtbs_check, $(MAKECMDGOALS)),)
|
||||
dtbs: dt_binding_check
|
||||
endif
|
||||
|
||||
dtbs_check: export CHECK_DTBS=1
|
||||
dtbs_check: dt_binding_check
|
||||
dtbs_check: dtbs
|
||||
|
||||
dtbs_install:
|
||||
$(Q)$(MAKE) $(dtbinst)=$(dtstree)
|
||||
|
@ -1270,6 +1331,13 @@ ifdef CONFIG_MODULES
|
|||
|
||||
all: modules
|
||||
|
||||
# When we're building modules with modversions, we need to consider
|
||||
# the built-in objects during the descend as well, in order to
|
||||
# make sure the checksums are up to date before we record them.
|
||||
ifdef CONFIG_MODVERSIONS
|
||||
KBUILD_BUILTIN := 1
|
||||
endif
|
||||
|
||||
# Build modules
|
||||
#
|
||||
# A module can be listed more than once in obj-m resulting in
|
||||
|
@ -1823,3 +1891,5 @@ FORCE:
|
|||
# Declare the contents of the PHONY variable as phony. We keep that
|
||||
# information in a variable so we can use it in if_changed and friends.
|
||||
.PHONY: $(PHONY)
|
||||
|
||||
endif # dist_make
|
||||
|
|
23
arch/Kconfig
23
arch/Kconfig
|
@ -131,6 +131,22 @@ config UPROBES
|
|||
managed by the kernel and kept transparent to the probed
|
||||
application. )
|
||||
|
||||
config HAVE_64BIT_ALIGNED_ACCESS
|
||||
def_bool 64BIT && !HAVE_EFFICIENT_UNALIGNED_ACCESS
|
||||
help
|
||||
Some architectures require 64 bit accesses to be 64 bit
|
||||
aligned, which also requires structs containing 64 bit values
|
||||
to be 64 bit aligned too. This includes some 32 bit
|
||||
architectures which can do 64 bit accesses, as well as 64 bit
|
||||
architectures without unaligned access.
|
||||
|
||||
This symbol should be selected by an architecture if 64 bit
|
||||
accesses are required to be 64 bit aligned in this way even
|
||||
though it is not a 64 bit architecture.
|
||||
|
||||
See Documentation/unaligned-memory-access.txt for more
|
||||
information on the topic of unaligned memory accesses.
|
||||
|
||||
config HAVE_EFFICIENT_UNALIGNED_ACCESS
|
||||
bool
|
||||
help
|
||||
|
@ -405,6 +421,13 @@ config MMU_GATHER_NO_RANGE
|
|||
config HAVE_MMU_GATHER_NO_GATHER
|
||||
bool
|
||||
|
||||
config ARCH_WANT_IRQS_OFF_ACTIVATE_MM
|
||||
bool
|
||||
help
|
||||
Temporary select until all architectures can be converted to have
|
||||
irqs disabled over activate_mm. Architectures that do IPI based TLB
|
||||
shootdowns should enable this.
|
||||
|
||||
config ARCH_HAVE_NMI_SAFE_CMPXCHG
|
||||
bool
|
||||
|
||||
|
|
|
@ -36,7 +36,6 @@ CONFIG_BLK_DEV_CY82C693=y
|
|||
CONFIG_SCSI=y
|
||||
CONFIG_BLK_DEV_SD=y
|
||||
CONFIG_BLK_DEV_SR=y
|
||||
CONFIG_BLK_DEV_SR_VENDOR=y
|
||||
CONFIG_SCSI_AIC7XXX=m
|
||||
CONFIG_AIC7XXX_CMDS_PER_DEVICE=253
|
||||
# CONFIG_AIC7XXX_DEBUG_ENABLE is not set
|
||||
|
|
|
@ -322,14 +322,18 @@ static inline int __is_mmio(const volatile void __iomem *addr)
|
|||
#if IO_CONCAT(__IO_PREFIX,trivial_io_bw)
|
||||
extern inline unsigned int ioread8(void __iomem *addr)
|
||||
{
|
||||
unsigned int ret = IO_CONCAT(__IO_PREFIX,ioread8)(addr);
|
||||
unsigned int ret;
|
||||
mb();
|
||||
ret = IO_CONCAT(__IO_PREFIX,ioread8)(addr);
|
||||
mb();
|
||||
return ret;
|
||||
}
|
||||
|
||||
extern inline unsigned int ioread16(void __iomem *addr)
|
||||
{
|
||||
unsigned int ret = IO_CONCAT(__IO_PREFIX,ioread16)(addr);
|
||||
unsigned int ret;
|
||||
mb();
|
||||
ret = IO_CONCAT(__IO_PREFIX,ioread16)(addr);
|
||||
mb();
|
||||
return ret;
|
||||
}
|
||||
|
@ -370,7 +374,9 @@ extern inline void outw(u16 b, unsigned long port)
|
|||
#if IO_CONCAT(__IO_PREFIX,trivial_io_lq)
|
||||
extern inline unsigned int ioread32(void __iomem *addr)
|
||||
{
|
||||
unsigned int ret = IO_CONCAT(__IO_PREFIX,ioread32)(addr);
|
||||
unsigned int ret;
|
||||
mb();
|
||||
ret = IO_CONCAT(__IO_PREFIX,ioread32)(addr);
|
||||
mb();
|
||||
return ret;
|
||||
}
|
||||
|
@ -415,14 +421,18 @@ extern inline void __raw_writew(u16 b, volatile void __iomem *addr)
|
|||
|
||||
extern inline u8 readb(const volatile void __iomem *addr)
|
||||
{
|
||||
u8 ret = __raw_readb(addr);
|
||||
u8 ret;
|
||||
mb();
|
||||
ret = __raw_readb(addr);
|
||||
mb();
|
||||
return ret;
|
||||
}
|
||||
|
||||
extern inline u16 readw(const volatile void __iomem *addr)
|
||||
{
|
||||
u16 ret = __raw_readw(addr);
|
||||
u16 ret;
|
||||
mb();
|
||||
ret = __raw_readw(addr);
|
||||
mb();
|
||||
return ret;
|
||||
}
|
||||
|
@ -463,14 +473,18 @@ extern inline void __raw_writeq(u64 b, volatile void __iomem *addr)
|
|||
|
||||
extern inline u32 readl(const volatile void __iomem *addr)
|
||||
{
|
||||
u32 ret = __raw_readl(addr);
|
||||
u32 ret;
|
||||
mb();
|
||||
ret = __raw_readl(addr);
|
||||
mb();
|
||||
return ret;
|
||||
}
|
||||
|
||||
extern inline u64 readq(const volatile void __iomem *addr)
|
||||
{
|
||||
u64 ret = __raw_readq(addr);
|
||||
u64 ret;
|
||||
mb();
|
||||
ret = __raw_readq(addr);
|
||||
mb();
|
||||
return ret;
|
||||
}
|
||||
|
@ -488,10 +502,10 @@ extern inline void writeq(u64 b, volatile void __iomem *addr)
|
|||
}
|
||||
#endif
|
||||
|
||||
#define ioread16be(p) be16_to_cpu(ioread16(p))
|
||||
#define ioread32be(p) be32_to_cpu(ioread32(p))
|
||||
#define iowrite16be(v,p) iowrite16(cpu_to_be16(v), (p))
|
||||
#define iowrite32be(v,p) iowrite32(cpu_to_be32(v), (p))
|
||||
#define ioread16be(p) swab16(ioread16(p))
|
||||
#define ioread32be(p) swab32(ioread32(p))
|
||||
#define iowrite16be(v,p) iowrite16(swab16(v), (p))
|
||||
#define iowrite32be(v,p) iowrite32(swab32(v), (p))
|
||||
|
||||
#define inb_p inb
|
||||
#define inw_p inw
|
||||
|
@ -499,14 +513,44 @@ extern inline void writeq(u64 b, volatile void __iomem *addr)
|
|||
#define outb_p outb
|
||||
#define outw_p outw
|
||||
#define outl_p outl
|
||||
#define readb_relaxed(addr) __raw_readb(addr)
|
||||
#define readw_relaxed(addr) __raw_readw(addr)
|
||||
#define readl_relaxed(addr) __raw_readl(addr)
|
||||
#define readq_relaxed(addr) __raw_readq(addr)
|
||||
#define writeb_relaxed(b, addr) __raw_writeb(b, addr)
|
||||
#define writew_relaxed(b, addr) __raw_writew(b, addr)
|
||||
#define writel_relaxed(b, addr) __raw_writel(b, addr)
|
||||
#define writeq_relaxed(b, addr) __raw_writeq(b, addr)
|
||||
|
||||
extern u8 readb_relaxed(const volatile void __iomem *addr);
|
||||
extern u16 readw_relaxed(const volatile void __iomem *addr);
|
||||
extern u32 readl_relaxed(const volatile void __iomem *addr);
|
||||
extern u64 readq_relaxed(const volatile void __iomem *addr);
|
||||
|
||||
#if IO_CONCAT(__IO_PREFIX,trivial_io_bw)
|
||||
extern inline u8 readb_relaxed(const volatile void __iomem *addr)
|
||||
{
|
||||
mb();
|
||||
return __raw_readb(addr);
|
||||
}
|
||||
|
||||
extern inline u16 readw_relaxed(const volatile void __iomem *addr)
|
||||
{
|
||||
mb();
|
||||
return __raw_readw(addr);
|
||||
}
|
||||
#endif
|
||||
|
||||
#if IO_CONCAT(__IO_PREFIX,trivial_io_lq)
|
||||
extern inline u32 readl_relaxed(const volatile void __iomem *addr)
|
||||
{
|
||||
mb();
|
||||
return __raw_readl(addr);
|
||||
}
|
||||
|
||||
extern inline u64 readq_relaxed(const volatile void __iomem *addr)
|
||||
{
|
||||
mb();
|
||||
return __raw_readq(addr);
|
||||
}
|
||||
#endif
|
||||
|
||||
#define writeb_relaxed writeb
|
||||
#define writew_relaxed writew
|
||||
#define writel_relaxed writel
|
||||
#define writeq_relaxed writeq
|
||||
|
||||
/*
|
||||
* String version of IO memory access ops:
|
||||
|
|
|
@ -124,6 +124,8 @@
|
|||
|
||||
#define SO_DETACH_REUSEPORT_BPF 68
|
||||
|
||||
#define SO_NETNS_COOKIE 71
|
||||
|
||||
#if !defined(__KERNEL__)
|
||||
|
||||
#if __BITS_PER_LONG == 64
|
||||
|
|
|
@ -16,21 +16,27 @@
|
|||
unsigned int
|
||||
ioread8(void __iomem *addr)
|
||||
{
|
||||
unsigned int ret = IO_CONCAT(__IO_PREFIX,ioread8)(addr);
|
||||
unsigned int ret;
|
||||
mb();
|
||||
ret = IO_CONCAT(__IO_PREFIX,ioread8)(addr);
|
||||
mb();
|
||||
return ret;
|
||||
}
|
||||
|
||||
unsigned int ioread16(void __iomem *addr)
|
||||
{
|
||||
unsigned int ret = IO_CONCAT(__IO_PREFIX,ioread16)(addr);
|
||||
unsigned int ret;
|
||||
mb();
|
||||
ret = IO_CONCAT(__IO_PREFIX,ioread16)(addr);
|
||||
mb();
|
||||
return ret;
|
||||
}
|
||||
|
||||
unsigned int ioread32(void __iomem *addr)
|
||||
{
|
||||
unsigned int ret = IO_CONCAT(__IO_PREFIX,ioread32)(addr);
|
||||
unsigned int ret;
|
||||
mb();
|
||||
ret = IO_CONCAT(__IO_PREFIX,ioread32)(addr);
|
||||
mb();
|
||||
return ret;
|
||||
}
|
||||
|
@ -148,28 +154,36 @@ EXPORT_SYMBOL(__raw_writeq);
|
|||
|
||||
u8 readb(const volatile void __iomem *addr)
|
||||
{
|
||||
u8 ret = __raw_readb(addr);
|
||||
u8 ret;
|
||||
mb();
|
||||
ret = __raw_readb(addr);
|
||||
mb();
|
||||
return ret;
|
||||
}
|
||||
|
||||
u16 readw(const volatile void __iomem *addr)
|
||||
{
|
||||
u16 ret = __raw_readw(addr);
|
||||
u16 ret;
|
||||
mb();
|
||||
ret = __raw_readw(addr);
|
||||
mb();
|
||||
return ret;
|
||||
}
|
||||
|
||||
u32 readl(const volatile void __iomem *addr)
|
||||
{
|
||||
u32 ret = __raw_readl(addr);
|
||||
u32 ret;
|
||||
mb();
|
||||
ret = __raw_readl(addr);
|
||||
mb();
|
||||
return ret;
|
||||
}
|
||||
|
||||
u64 readq(const volatile void __iomem *addr)
|
||||
{
|
||||
u64 ret = __raw_readq(addr);
|
||||
u64 ret;
|
||||
mb();
|
||||
ret = __raw_readq(addr);
|
||||
mb();
|
||||
return ret;
|
||||
}
|
||||
|
@ -207,6 +221,38 @@ EXPORT_SYMBOL(writew);
|
|||
EXPORT_SYMBOL(writel);
|
||||
EXPORT_SYMBOL(writeq);
|
||||
|
||||
/*
|
||||
* The _relaxed functions must be ordered w.r.t. each other, but they don't
|
||||
* have to be ordered w.r.t. other memory accesses.
|
||||
*/
|
||||
u8 readb_relaxed(const volatile void __iomem *addr)
|
||||
{
|
||||
mb();
|
||||
return __raw_readb(addr);
|
||||
}
|
||||
|
||||
u16 readw_relaxed(const volatile void __iomem *addr)
|
||||
{
|
||||
mb();
|
||||
return __raw_readw(addr);
|
||||
}
|
||||
|
||||
u32 readl_relaxed(const volatile void __iomem *addr)
|
||||
{
|
||||
mb();
|
||||
return __raw_readl(addr);
|
||||
}
|
||||
|
||||
u64 readq_relaxed(const volatile void __iomem *addr)
|
||||
{
|
||||
mb();
|
||||
return __raw_readq(addr);
|
||||
}
|
||||
|
||||
EXPORT_SYMBOL(readb_relaxed);
|
||||
EXPORT_SYMBOL(readw_relaxed);
|
||||
EXPORT_SYMBOL(readl_relaxed);
|
||||
EXPORT_SYMBOL(readq_relaxed);
|
||||
|
||||
/*
|
||||
* Read COUNT 8-bit bytes from port PORT into memory starting at SRC.
|
||||
|
|
|
@ -90,16 +90,22 @@ libs-y += arch/arc/lib/ $(LIBGCC)
|
|||
|
||||
boot := arch/arc/boot
|
||||
|
||||
#default target for make without any arguments.
|
||||
KBUILD_IMAGE := $(boot)/bootpImage
|
||||
|
||||
all: bootpImage
|
||||
bootpImage: vmlinux
|
||||
|
||||
boot_targets += uImage uImage.bin uImage.gz
|
||||
boot_targets := uImage.bin uImage.gz uImage.lzma
|
||||
|
||||
PHONY += $(boot_targets)
|
||||
$(boot_targets): vmlinux
|
||||
$(Q)$(MAKE) $(build)=$(boot) $(boot)/$@
|
||||
|
||||
uimage-default-y := uImage.bin
|
||||
uimage-default-$(CONFIG_KERNEL_GZIP) := uImage.gz
|
||||
uimage-default-$(CONFIG_KERNEL_LZMA) := uImage.lzma
|
||||
|
||||
PHONY += uImage
|
||||
uImage: $(uimage-default-y)
|
||||
@ln -sf $< $(boot)/uImage
|
||||
@$(kecho) ' Image $(boot)/uImage is ready'
|
||||
|
||||
CLEAN_FILES += $(boot)/uImage
|
||||
|
||||
archclean:
|
||||
$(Q)$(MAKE) $(clean)=$(boot)
|
||||
|
|
|
@ -1,5 +1,5 @@
|
|||
# SPDX-License-Identifier: GPL-2.0
|
||||
targets := vmlinux.bin vmlinux.bin.gz uImage
|
||||
targets := vmlinux.bin vmlinux.bin.gz
|
||||
|
||||
# uImage build relies on mkimage being availble on your host for ARC target
|
||||
# You will need to build u-boot for ARC, rename mkimage to arc-elf32-mkimage
|
||||
|
@ -13,11 +13,6 @@ LINUX_START_TEXT = $$(readelf -h vmlinux | \
|
|||
UIMAGE_LOADADDR = $(CONFIG_LINUX_LINK_BASE)
|
||||
UIMAGE_ENTRYADDR = $(LINUX_START_TEXT)
|
||||
|
||||
suffix-y := bin
|
||||
suffix-$(CONFIG_KERNEL_GZIP) := gz
|
||||
suffix-$(CONFIG_KERNEL_LZMA) := lzma
|
||||
|
||||
targets += uImage
|
||||
targets += uImage.bin
|
||||
targets += uImage.gz
|
||||
targets += uImage.lzma
|
||||
|
@ -42,7 +37,3 @@ $(obj)/uImage.gz: $(obj)/vmlinux.bin.gz FORCE
|
|||
|
||||
$(obj)/uImage.lzma: $(obj)/vmlinux.bin.lzma FORCE
|
||||
$(call if_changed,uimage,lzma)
|
||||
|
||||
$(obj)/uImage: $(obj)/uImage.$(suffix-y)
|
||||
@ln -sf $(notdir $<) $@
|
||||
@echo ' Image $@ is ready'
|
||||
|
|
|
@ -85,7 +85,7 @@
|
|||
* avoid duplicating the MB dtsi file given that IRQ from
|
||||
* this intc to cpu intc are different for axs101 and axs103
|
||||
*/
|
||||
mb_intc: dw-apb-ictl@e0012000 {
|
||||
mb_intc: interrupt-controller@e0012000 {
|
||||
#interrupt-cells = <1>;
|
||||
compatible = "snps,dw-apb-ictl";
|
||||
reg = < 0x0 0xe0012000 0x0 0x200 >;
|
||||
|
|
|
@ -129,7 +129,7 @@
|
|||
* avoid duplicating the MB dtsi file given that IRQ from
|
||||
* this intc to cpu intc are different for axs101 and axs103
|
||||
*/
|
||||
mb_intc: dw-apb-ictl@e0012000 {
|
||||
mb_intc: interrupt-controller@e0012000 {
|
||||
#interrupt-cells = <1>;
|
||||
compatible = "snps,dw-apb-ictl";
|
||||
reg = < 0x0 0xe0012000 0x0 0x200 >;
|
||||
|
|
|
@ -135,7 +135,7 @@
|
|||
* avoid duplicating the MB dtsi file given that IRQ from
|
||||
* this intc to cpu intc are different for axs101 and axs103
|
||||
*/
|
||||
mb_intc: dw-apb-ictl@e0012000 {
|
||||
mb_intc: interrupt-controller@e0012000 {
|
||||
#interrupt-cells = <1>;
|
||||
compatible = "snps,dw-apb-ictl";
|
||||
reg = < 0x0 0xe0012000 0x0 0x200 >;
|
||||
|
|
|
@ -88,6 +88,8 @@
|
|||
|
||||
arcpct: pct {
|
||||
compatible = "snps,archs-pct";
|
||||
interrupt-parent = <&cpu_intc>;
|
||||
interrupts = <20>;
|
||||
};
|
||||
|
||||
/* TIMER0 with interrupt for clockevent */
|
||||
|
@ -208,7 +210,7 @@
|
|||
reg = <0x8000 0x2000>;
|
||||
interrupts = <10>;
|
||||
interrupt-names = "macirq";
|
||||
phy-mode = "rgmii";
|
||||
phy-mode = "rgmii-id";
|
||||
snps,pbl = <32>;
|
||||
snps,multicast-filter-bins = <256>;
|
||||
clocks = <&gmacclk>;
|
||||
|
@ -226,7 +228,7 @@
|
|||
#address-cells = <1>;
|
||||
#size-cells = <0>;
|
||||
compatible = "snps,dwmac-mdio";
|
||||
phy0: ethernet-phy@0 {
|
||||
phy0: ethernet-phy@0 { /* Micrel KSZ9031 */
|
||||
reg = <0>;
|
||||
};
|
||||
};
|
||||
|
|
|
@ -46,7 +46,7 @@
|
|||
|
||||
};
|
||||
|
||||
mb_intc: dw-apb-ictl@e0012000 {
|
||||
mb_intc: interrupt-controller@e0012000 {
|
||||
#interrupt-cells = <1>;
|
||||
compatible = "snps,dw-apb-ictl";
|
||||
reg = < 0xe0012000 0x200 >;
|
||||
|
|
|
@ -54,7 +54,7 @@
|
|||
|
||||
};
|
||||
|
||||
mb_intc: dw-apb-ictl@e0012000 {
|
||||
mb_intc: interrupt-controller@e0012000 {
|
||||
#interrupt-cells = <1>;
|
||||
compatible = "snps,dw-apb-ictl";
|
||||
reg = < 0xe0012000 0x200 >;
|
||||
|
|
|
@ -19,7 +19,7 @@
|
|||
#define R_ARC_32_PCREL 0x31
|
||||
|
||||
/*to set parameters in the core dumps */
|
||||
#define ELF_ARCH EM_ARCOMPACT
|
||||
#define ELF_ARCH EM_ARC_INUSE
|
||||
#define ELF_CLASS ELFCLASS32
|
||||
|
||||
#ifdef CONFIG_CPU_BIG_ENDIAN
|
||||
|
|
|
@ -10,6 +10,7 @@
|
|||
#ifndef __ASSEMBLY__
|
||||
|
||||
#define clear_page(paddr) memset((paddr), 0, PAGE_SIZE)
|
||||
#define copy_user_page(to, from, vaddr, pg) copy_page(to, from)
|
||||
#define copy_page(to, from) memcpy((to), (from), PAGE_SIZE)
|
||||
|
||||
struct vm_area_struct;
|
||||
|
|
|
@ -135,8 +135,10 @@
|
|||
|
||||
#ifdef CONFIG_ARC_HAS_PAE40
|
||||
#define PTE_BITS_NON_RWX_IN_PD1 (0xff00000000 | PAGE_MASK | _PAGE_CACHEABLE)
|
||||
#define MAX_POSSIBLE_PHYSMEM_BITS 40
|
||||
#else
|
||||
#define PTE_BITS_NON_RWX_IN_PD1 (PAGE_MASK | _PAGE_CACHEABLE)
|
||||
#define MAX_POSSIBLE_PHYSMEM_BITS 32
|
||||
#endif
|
||||
|
||||
/**************************************************************************
|
||||
|
|
|
@ -153,7 +153,6 @@ END(EV_Extension)
|
|||
tracesys:
|
||||
; save EFA in case tracer wants the PC of traced task
|
||||
; using ERET won't work since next-PC has already committed
|
||||
lr r12, [efa]
|
||||
GET_CURR_TASK_FIELD_PTR TASK_THREAD, r11
|
||||
st r12, [r11, THREAD_FAULT_ADDR] ; thread.fault_address
|
||||
|
||||
|
@ -196,15 +195,9 @@ tracesys_exit:
|
|||
; Breakpoint TRAP
|
||||
; ---------------------------------------------
|
||||
trap_with_param:
|
||||
|
||||
; stop_pc info by gdb needs this info
|
||||
lr r0, [efa]
|
||||
mov r0, r12 ; EFA in case ptracer/gdb wants stop_pc
|
||||
mov r1, sp
|
||||
|
||||
; Now that we have read EFA, it is safe to do "fake" rtie
|
||||
; and get out of CPU exception mode
|
||||
FAKE_RET_FROM_EXCPN
|
||||
|
||||
; Save callee regs in case gdb wants to have a look
|
||||
; SP will grow up by size of CALLEE Reg-File
|
||||
; NOTE: clobbers r12
|
||||
|
@ -231,6 +224,10 @@ ENTRY(EV_Trap)
|
|||
|
||||
EXCEPTION_PROLOGUE
|
||||
|
||||
lr r12, [efa]
|
||||
|
||||
FAKE_RET_FROM_EXCPN
|
||||
|
||||
;============ TRAP 1 :breakpoints
|
||||
; Check ECR for trap with arg (PROLOGUE ensures r10 has ECR)
|
||||
bmsk.f 0, r10, 7
|
||||
|
@ -238,9 +235,6 @@ ENTRY(EV_Trap)
|
|||
|
||||
;============ TRAP (no param): syscall top level
|
||||
|
||||
; First return from Exception to pure K mode (Exception/IRQs renabled)
|
||||
FAKE_RET_FROM_EXCPN
|
||||
|
||||
; If syscall tracing ongoing, invoke pre-post-hooks
|
||||
GET_CURR_THR_INFO_FLAGS r10
|
||||
btst r10, TIF_SYSCALL_TRACE
|
||||
|
|
|
@ -562,7 +562,7 @@ static int arc_pmu_device_probe(struct platform_device *pdev)
|
|||
{
|
||||
struct arc_reg_pct_build pct_bcr;
|
||||
struct arc_reg_cc_build cc_bcr;
|
||||
int i, has_interrupts;
|
||||
int i, has_interrupts, irq = -1;
|
||||
int counter_size; /* in bits */
|
||||
|
||||
union cc_name {
|
||||
|
@ -638,22 +638,25 @@ static int arc_pmu_device_probe(struct platform_device *pdev)
|
|||
};
|
||||
|
||||
if (has_interrupts) {
|
||||
int irq = platform_get_irq(pdev, 0);
|
||||
|
||||
if (irq < 0) {
|
||||
pr_err("Cannot get IRQ number for the platform\n");
|
||||
return -ENODEV;
|
||||
}
|
||||
irq = platform_get_irq(pdev, 0);
|
||||
if (irq >= 0) {
|
||||
int ret;
|
||||
|
||||
arc_pmu->irq = irq;
|
||||
|
||||
/* intc map function ensures irq_set_percpu_devid() called */
|
||||
request_percpu_irq(irq, arc_pmu_intr, "ARC perf counters",
|
||||
ret = request_percpu_irq(irq, arc_pmu_intr, "ARC perf counters",
|
||||
this_cpu_ptr(&arc_pmu_cpu));
|
||||
|
||||
if (!ret)
|
||||
on_each_cpu(arc_cpu_pmu_irq_init, &irq, 1);
|
||||
else
|
||||
irq = -1;
|
||||
}
|
||||
|
||||
} else
|
||||
}
|
||||
|
||||
if (irq == -1)
|
||||
arc_pmu->pmu.capabilities |= PERF_PMU_CAP_NO_INTERRUPT;
|
||||
|
||||
/*
|
||||
|
|
|
@ -12,6 +12,7 @@
|
|||
#include <linux/clocksource.h>
|
||||
#include <linux/console.h>
|
||||
#include <linux/module.h>
|
||||
#include <linux/sizes.h>
|
||||
#include <linux/cpu.h>
|
||||
#include <linux/of_fdt.h>
|
||||
#include <linux/of.h>
|
||||
|
@ -409,12 +410,12 @@ static void arc_chk_core_config(void)
|
|||
if ((unsigned int)__arc_dccm_base != cpu->dccm.base_addr)
|
||||
panic("Linux built with incorrect DCCM Base address\n");
|
||||
|
||||
if (CONFIG_ARC_DCCM_SZ != cpu->dccm.sz)
|
||||
if (CONFIG_ARC_DCCM_SZ * SZ_1K != cpu->dccm.sz)
|
||||
panic("Linux built with incorrect DCCM Size\n");
|
||||
#endif
|
||||
|
||||
#ifdef CONFIG_ARC_HAS_ICCM
|
||||
if (CONFIG_ARC_ICCM_SZ != cpu->iccm.sz)
|
||||
if (CONFIG_ARC_ICCM_SZ * SZ_1K != cpu->iccm.sz)
|
||||
panic("Linux built with incorrect ICCM Size\n");
|
||||
#endif
|
||||
|
||||
|
|
|
@ -96,7 +96,7 @@ stash_usr_regs(struct rt_sigframe __user *sf, struct pt_regs *regs,
|
|||
sizeof(sf->uc.uc_mcontext.regs.scratch));
|
||||
err |= __copy_to_user(&sf->uc.uc_sigmask, set, sizeof(sigset_t));
|
||||
|
||||
return err;
|
||||
return err ? -EFAULT : 0;
|
||||
}
|
||||
|
||||
static int restore_usr_regs(struct pt_regs *regs, struct rt_sigframe __user *sf)
|
||||
|
@ -110,7 +110,7 @@ static int restore_usr_regs(struct pt_regs *regs, struct rt_sigframe __user *sf)
|
|||
&(sf->uc.uc_mcontext.regs.scratch),
|
||||
sizeof(sf->uc.uc_mcontext.regs.scratch));
|
||||
if (err)
|
||||
return err;
|
||||
return -EFAULT;
|
||||
|
||||
set_current_blocked(&set);
|
||||
regs->bta = uregs.scratch.bta;
|
||||
|
|
|
@ -38,15 +38,15 @@
|
|||
|
||||
#ifdef CONFIG_ARC_DW2_UNWIND
|
||||
|
||||
static void seed_unwind_frame_info(struct task_struct *tsk,
|
||||
struct pt_regs *regs,
|
||||
static int
|
||||
seed_unwind_frame_info(struct task_struct *tsk, struct pt_regs *regs,
|
||||
struct unwind_frame_info *frame_info)
|
||||
{
|
||||
/*
|
||||
* synchronous unwinding (e.g. dump_stack)
|
||||
* - uses current values of SP and friends
|
||||
*/
|
||||
if (tsk == NULL && regs == NULL) {
|
||||
if (regs == NULL && (tsk == NULL || tsk == current)) {
|
||||
unsigned long fp, sp, blink, ret;
|
||||
frame_info->task = current;
|
||||
|
||||
|
@ -65,11 +65,15 @@ static void seed_unwind_frame_info(struct task_struct *tsk,
|
|||
frame_info->call_frame = 0;
|
||||
} else if (regs == NULL) {
|
||||
/*
|
||||
* Asynchronous unwinding of sleeping task
|
||||
* - Gets SP etc from task's pt_regs (saved bottom of kernel
|
||||
* mode stack of task)
|
||||
* Asynchronous unwinding of a likely sleeping task
|
||||
* - first ensure it is actually sleeping
|
||||
* - if so, it will be in __switch_to, kernel mode SP of task
|
||||
* is safe-kept and BLINK at a well known location in there
|
||||
*/
|
||||
|
||||
if (tsk->state == TASK_RUNNING)
|
||||
return -1;
|
||||
|
||||
frame_info->task = tsk;
|
||||
|
||||
frame_info->regs.r27 = TSK_K_FP(tsk);
|
||||
|
@ -103,6 +107,8 @@ static void seed_unwind_frame_info(struct task_struct *tsk,
|
|||
frame_info->regs.r63 = regs->ret;
|
||||
frame_info->call_frame = 0;
|
||||
}
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
#endif
|
||||
|
@ -112,11 +118,12 @@ arc_unwind_core(struct task_struct *tsk, struct pt_regs *regs,
|
|||
int (*consumer_fn) (unsigned int, void *), void *arg)
|
||||
{
|
||||
#ifdef CONFIG_ARC_DW2_UNWIND
|
||||
int ret = 0;
|
||||
int ret = 0, cnt = 0;
|
||||
unsigned int address;
|
||||
struct unwind_frame_info frame_info;
|
||||
|
||||
seed_unwind_frame_info(tsk, regs, &frame_info);
|
||||
if (seed_unwind_frame_info(tsk, regs, &frame_info))
|
||||
return 0;
|
||||
|
||||
while (1) {
|
||||
address = UNW_PC(&frame_info);
|
||||
|
@ -132,6 +139,11 @@ arc_unwind_core(struct task_struct *tsk, struct pt_regs *regs,
|
|||
break;
|
||||
|
||||
frame_info.regs.r63 = frame_info.regs.r31;
|
||||
|
||||
if (cnt++ > 128) {
|
||||
printk("unwinder looping too long, aborting !\n");
|
||||
return 0;
|
||||
}
|
||||
}
|
||||
|
||||
return address; /* return the last address it saw */
|
||||
|
|
|
@ -6,6 +6,7 @@
|
|||
|
||||
menuconfig ARC_PLAT_EZNPS
|
||||
bool "\"EZchip\" ARC dev platform"
|
||||
depends on ISA_ARCOMPACT
|
||||
select CPU_BIG_ENDIAN
|
||||
select CLKSRC_NPS if !PHYS_ADDR_T_64BIT
|
||||
select EZNPS_GIC
|
||||
|
|
|
@ -33,7 +33,6 @@
|
|||
#define CTOP_AUX_DPC (CTOP_AUX_BASE + 0x02C)
|
||||
#define CTOP_AUX_LPC (CTOP_AUX_BASE + 0x030)
|
||||
#define CTOP_AUX_EFLAGS (CTOP_AUX_BASE + 0x080)
|
||||
#define CTOP_AUX_IACK (CTOP_AUX_BASE + 0x088)
|
||||
#define CTOP_AUX_GPA1 (CTOP_AUX_BASE + 0x08C)
|
||||
#define CTOP_AUX_UDMC (CTOP_AUX_BASE + 0x300)
|
||||
|
||||
|
|
|
@ -8,5 +8,6 @@ menuconfig ARC_SOC_HSDK
|
|||
select ARC_HAS_ACCL_REGS
|
||||
select ARC_IRQ_NO_AUTOSAVE
|
||||
select CLK_HSDK
|
||||
select RESET_CONTROLLER
|
||||
select RESET_HSDK
|
||||
select HAVE_PCI
|
||||
|
|
|
@ -507,8 +507,10 @@ config ARCH_S3C24XX
|
|||
select HAVE_S3C2410_WATCHDOG if WATCHDOG
|
||||
select HAVE_S3C_RTC if RTC_CLASS
|
||||
select NEED_MACH_IO_H
|
||||
select S3C2410_WATCHDOG
|
||||
select SAMSUNG_ATAGS
|
||||
select USE_OF
|
||||
select WATCHDOG
|
||||
help
|
||||
Samsung S3C2410, S3C2412, S3C2413, S3C2416, S3C2440, S3C2442, S3C2443
|
||||
and S3C2450 SoCs based systems, such as the Simtec Electronics BAST
|
||||
|
|
|
@ -121,9 +121,9 @@ ccflags-y := -fpic $(call cc-option,-mno-single-pic-base,) -fno-builtin \
|
|||
asflags-y := -DZIMAGE
|
||||
|
||||
# Supply kernel BSS size to the decompressor via a linker symbol.
|
||||
KBSS_SZ = $(shell echo $$(($$($(CROSS_COMPILE)nm $(obj)/../../../../vmlinux | \
|
||||
sed -n -e 's/^\([^ ]*\) [AB] __bss_start$$/-0x\1/p' \
|
||||
-e 's/^\([^ ]*\) [AB] __bss_stop$$/+0x\1/p') )) )
|
||||
KBSS_SZ = $(shell echo $$(($$($(NM) $(obj)/../../../../vmlinux | \
|
||||
sed -n -e 's/^\([^ ]*\) [ABD] __bss_start$$/-0x\1/p' \
|
||||
-e 's/^\([^ ]*\) [ABD] __bss_stop$$/+0x\1/p') )) )
|
||||
LDFLAGS_vmlinux = --defsym _kernel_bss_size=$(KBSS_SZ)
|
||||
# Supply ZRELADDR to the decompressor via a linker symbol.
|
||||
ifneq ($(CONFIG_AUTO_ZRELADDR),y)
|
||||
|
@ -165,7 +165,7 @@ $(obj)/bswapsdi2.S: $(srctree)/arch/$(SRCARCH)/lib/bswapsdi2.S
|
|||
# The .data section is already discarded by the linker script so no need
|
||||
# to bother about it here.
|
||||
check_for_bad_syms = \
|
||||
bad_syms=$$($(CROSS_COMPILE)nm $@ | sed -n 's/^.\{8\} [bc] \(.*\)/\1/p') && \
|
||||
bad_syms=$$($(NM) $@ | sed -n 's/^.\{8\} [bc] \(.*\)/\1/p') && \
|
||||
[ -z "$$bad_syms" ] || \
|
||||
( echo "following symbols must have non local/private scope:" >&2; \
|
||||
echo "$$bad_syms" >&2; false )
|
||||
|
|
|
@ -1142,9 +1142,9 @@ __armv4_mmu_cache_off:
|
|||
__armv7_mmu_cache_off:
|
||||
mrc p15, 0, r0, c1, c0
|
||||
#ifdef CONFIG_MMU
|
||||
bic r0, r0, #0x000d
|
||||
bic r0, r0, #0x0005
|
||||
#else
|
||||
bic r0, r0, #0x000c
|
||||
bic r0, r0, #0x0004
|
||||
#endif
|
||||
mcr p15, 0, r0, c1, c0 @ turn MMU and cache off
|
||||
mov r12, lr
|
||||
|
|
|
@ -43,7 +43,7 @@ SECTIONS
|
|||
}
|
||||
.table : ALIGN(4) {
|
||||
_table_start = .;
|
||||
LONG(ZIMAGE_MAGIC(2))
|
||||
LONG(ZIMAGE_MAGIC(4))
|
||||
LONG(ZIMAGE_MAGIC(0x5a534c4b))
|
||||
LONG(ZIMAGE_MAGIC(__piggy_size_addr - _start))
|
||||
LONG(ZIMAGE_MAGIC(_kernel_bss_size))
|
||||
|
|
|
@ -56,7 +56,7 @@ trap 'rm -f "$XIPIMAGE.tmp"; exit 1' 1 2 3
|
|||
# substitute the data section by a compressed version
|
||||
$DD if="$XIPIMAGE" count=$data_start iflag=count_bytes of="$XIPIMAGE.tmp"
|
||||
$DD if="$XIPIMAGE" skip=$data_start iflag=skip_bytes |
|
||||
gzip -9 >> "$XIPIMAGE.tmp"
|
||||
$KGZIP -9 >> "$XIPIMAGE.tmp"
|
||||
|
||||
# replace kernel binary
|
||||
mv -f "$XIPIMAGE.tmp" "$XIPIMAGE"
|
||||
|
|
|
@ -88,7 +88,6 @@
|
|||
AM33XX_PADCONF(AM335X_PIN_MMC0_DAT3, PIN_INPUT_PULLUP, MUX_MODE0)
|
||||
AM33XX_PADCONF(AM335X_PIN_MMC0_CMD, PIN_INPUT_PULLUP, MUX_MODE0)
|
||||
AM33XX_PADCONF(AM335X_PIN_MMC0_CLK, PIN_INPUT_PULLUP, MUX_MODE0)
|
||||
AM33XX_PADCONF(AM335X_PIN_MCASP0_ACLKR, PIN_INPUT, MUX_MODE4) /* (B12) mcasp0_aclkr.mmc0_sdwp */
|
||||
>;
|
||||
};
|
||||
|
||||
|
|
|
@ -40,6 +40,9 @@
|
|||
ethernet1 = &cpsw_emac1;
|
||||
spi0 = &spi0;
|
||||
spi1 = &spi1;
|
||||
mmc0 = &mmc1;
|
||||
mmc1 = &mmc2;
|
||||
mmc2 = &mmc3;
|
||||
};
|
||||
|
||||
cpus {
|
||||
|
|
|
@ -1576,8 +1576,9 @@
|
|||
reg-names = "rev";
|
||||
ti,hwmods = "d_can0";
|
||||
/* Domains (P, C): per_pwrdm, l4ls_clkdm */
|
||||
clocks = <&l4ls_clkctrl AM4_L4LS_D_CAN0_CLKCTRL 0>;
|
||||
clock-names = "fck";
|
||||
clocks = <&l4ls_clkctrl AM4_L4LS_D_CAN0_CLKCTRL 0>,
|
||||
<&dcan0_fck>;
|
||||
clock-names = "fck", "osc";
|
||||
#address-cells = <1>;
|
||||
#size-cells = <1>;
|
||||
ranges = <0x0 0xcc000 0x2000>;
|
||||
|
@ -1585,6 +1586,8 @@
|
|||
dcan0: can@0 {
|
||||
compatible = "ti,am4372-d_can", "ti,am3352-d_can";
|
||||
reg = <0x0 0x2000>;
|
||||
clocks = <&dcan0_fck>;
|
||||
clock-names = "fck";
|
||||
syscon-raminit = <&scm_conf 0x644 0>;
|
||||
interrupts = <GIC_SPI 52 IRQ_TYPE_LEVEL_HIGH>;
|
||||
status = "disabled";
|
||||
|
@ -1597,8 +1600,9 @@
|
|||
reg-names = "rev";
|
||||
ti,hwmods = "d_can1";
|
||||
/* Domains (P, C): per_pwrdm, l4ls_clkdm */
|
||||
clocks = <&l4ls_clkctrl AM4_L4LS_D_CAN1_CLKCTRL 0>;
|
||||
clock-names = "fck";
|
||||
clocks = <&l4ls_clkctrl AM4_L4LS_D_CAN1_CLKCTRL 0>,
|
||||
<&dcan1_fck>;
|
||||
clock-names = "fck", "osc";
|
||||
#address-cells = <1>;
|
||||
#size-cells = <1>;
|
||||
ranges = <0x0 0xd0000 0x2000>;
|
||||
|
@ -1606,6 +1610,8 @@
|
|||
dcan1: can@0 {
|
||||
compatible = "ti,am4372-d_can", "ti,am3352-d_can";
|
||||
reg = <0x0 0x2000>;
|
||||
clocks = <&dcan1_fck>;
|
||||
clock-name = "fck";
|
||||
syscon-raminit = <&scm_conf 0x644 1>;
|
||||
interrupts = <GIC_SPI 49 IRQ_TYPE_LEVEL_HIGH>;
|
||||
status = "disabled";
|
||||
|
|
|
@ -236,6 +236,7 @@
|
|||
status = "okay";
|
||||
compatible = "ethernet-phy-id0141.0DD1", "ethernet-phy-ieee802.3-c22";
|
||||
reg = <1>;
|
||||
marvell,reg-init = <3 18 0 0x4985>;
|
||||
|
||||
/* irq is connected to &pcawan pin 7 */
|
||||
};
|
||||
|
|
|
@ -70,6 +70,9 @@
|
|||
|
||||
system-leds {
|
||||
compatible = "gpio-leds";
|
||||
pinctrl-names = "default";
|
||||
pinctrl-0 = <&helios_system_led_pins>;
|
||||
|
||||
status-led {
|
||||
label = "helios4:green:status";
|
||||
gpios = <&gpio0 24 GPIO_ACTIVE_LOW>;
|
||||
|
@ -86,6 +89,9 @@
|
|||
|
||||
io-leds {
|
||||
compatible = "gpio-leds";
|
||||
pinctrl-names = "default";
|
||||
pinctrl-0 = <&helios_io_led_pins>;
|
||||
|
||||
sata1-led {
|
||||
label = "helios4:green:ata1";
|
||||
gpios = <&gpio1 17 GPIO_ACTIVE_LOW>;
|
||||
|
@ -121,11 +127,15 @@
|
|||
fan1: j10-pwm {
|
||||
compatible = "pwm-fan";
|
||||
pwms = <&gpio1 9 40000>; /* Target freq:25 kHz */
|
||||
pinctrl-names = "default";
|
||||
pinctrl-0 = <&helios_fan1_pins>;
|
||||
};
|
||||
|
||||
fan2: j17-pwm {
|
||||
compatible = "pwm-fan";
|
||||
pwms = <&gpio1 23 40000>; /* Target freq:25 kHz */
|
||||
pinctrl-names = "default";
|
||||
pinctrl-0 = <&helios_fan2_pins>;
|
||||
};
|
||||
|
||||
usb2_phy: usb2-phy {
|
||||
|
@ -291,16 +301,22 @@
|
|||
"mpp39", "mpp40";
|
||||
marvell,function = "sd0";
|
||||
};
|
||||
helios_led_pins: helios-led-pins {
|
||||
marvell,pins = "mpp24", "mpp25",
|
||||
"mpp49", "mpp50",
|
||||
helios_system_led_pins: helios-system-led-pins {
|
||||
marvell,pins = "mpp24", "mpp25";
|
||||
marvell,function = "gpio";
|
||||
};
|
||||
helios_io_led_pins: helios-io-led-pins {
|
||||
marvell,pins = "mpp49", "mpp50",
|
||||
"mpp52", "mpp53",
|
||||
"mpp54";
|
||||
marvell,function = "gpio";
|
||||
};
|
||||
helios_fan_pins: helios-fan-pins {
|
||||
marvell,pins = "mpp41", "mpp43",
|
||||
"mpp48", "mpp55";
|
||||
helios_fan1_pins: helios_fan1_pins {
|
||||
marvell,pins = "mpp41", "mpp43";
|
||||
marvell,function = "gpio";
|
||||
};
|
||||
helios_fan2_pins: helios_fan2_pins {
|
||||
marvell,pins = "mpp48", "mpp55";
|
||||
marvell,function = "gpio";
|
||||
};
|
||||
microsom_spi1_cs_pins: spi1-cs-pins {
|
||||
|
|
|
@ -339,7 +339,8 @@
|
|||
|
||||
comphy: phy@18300 {
|
||||
compatible = "marvell,armada-380-comphy";
|
||||
reg = <0x18300 0x100>;
|
||||
reg-names = "comphy", "conf";
|
||||
reg = <0x18300 0x100>, <0x18460 4>;
|
||||
#address-cells = <1>;
|
||||
#size-cells = <0>;
|
||||
|
||||
|
|
|
@ -266,11 +266,6 @@
|
|||
reg = <0x11000 0x100>;
|
||||
};
|
||||
|
||||
&i2c1 {
|
||||
compatible = "marvell,mv78230-i2c", "marvell,mv64xxx-i2c";
|
||||
reg = <0x11100 0x100>;
|
||||
};
|
||||
|
||||
&mpic {
|
||||
reg = <0x20a00 0x2d0>, <0x21070 0x58>;
|
||||
};
|
||||
|
|
|
@ -81,11 +81,6 @@
|
|||
status = "okay";
|
||||
};
|
||||
|
||||
&vuart {
|
||||
// VUART Host Console
|
||||
status = "okay";
|
||||
};
|
||||
|
||||
&uart1 {
|
||||
// Host Console
|
||||
status = "okay";
|
||||
|
|
|
@ -22,9 +22,9 @@
|
|||
#size-cells = <1>;
|
||||
ranges;
|
||||
|
||||
vga_memory: framebuffer@7f000000 {
|
||||
vga_memory: framebuffer@9f000000 {
|
||||
no-map;
|
||||
reg = <0x7f000000 0x01000000>;
|
||||
reg = <0x9f000000 0x01000000>; /* 16M */
|
||||
};
|
||||
};
|
||||
|
||||
|
|
|
@ -371,6 +371,7 @@
|
|||
compatible = "aspeed,ast2400-ibt-bmc";
|
||||
reg = <0xc0 0x18>;
|
||||
interrupts = <8>;
|
||||
clocks = <&syscon ASPEED_CLK_GATE_LCLK>;
|
||||
status = "disabled";
|
||||
};
|
||||
};
|
||||
|
|
|
@ -464,6 +464,7 @@
|
|||
compatible = "aspeed,ast2500-ibt-bmc";
|
||||
reg = <0xc0 0x18>;
|
||||
interrupts = <8>;
|
||||
clocks = <&syscon ASPEED_CLK_GATE_LCLK>;
|
||||
status = "disabled";
|
||||
};
|
||||
};
|
||||
|
|
|
@ -44,8 +44,8 @@
|
|||
pinctrl-0 = <&pinctrl_macb0_default>;
|
||||
phy-mode = "rmii";
|
||||
|
||||
ethernet-phy@0 {
|
||||
reg = <0x0>;
|
||||
ethernet-phy@7 {
|
||||
reg = <0x7>;
|
||||
interrupt-parent = <&pioA>;
|
||||
interrupts = <PIN_PD31 IRQ_TYPE_LEVEL_LOW>;
|
||||
pinctrl-names = "default";
|
||||
|
|
|
@ -40,7 +40,7 @@
|
|||
|
||||
ahb {
|
||||
usb0: gadget@300000 {
|
||||
atmel,vbus-gpio = <&pioA PIN_PA27 GPIO_ACTIVE_HIGH>;
|
||||
atmel,vbus-gpio = <&pioA PIN_PB11 GPIO_ACTIVE_HIGH>;
|
||||
pinctrl-names = "default";
|
||||
pinctrl-0 = <&pinctrl_usba_vbus>;
|
||||
status = "okay";
|
||||
|
@ -125,8 +125,6 @@
|
|||
bus-width = <8>;
|
||||
pinctrl-names = "default";
|
||||
pinctrl-0 = <&pinctrl_sdmmc0_default>;
|
||||
non-removable;
|
||||
mmc-ddr-1_8v;
|
||||
status = "okay";
|
||||
};
|
||||
|
||||
|
|
|
@ -242,6 +242,11 @@
|
|||
atmel,pins =
|
||||
<AT91_PIOE 9 AT91_PERIPH_GPIO AT91_PINCTRL_DEGLITCH>; /* PE9, conflicts with A9 */
|
||||
};
|
||||
pinctrl_usb_default: usb_default {
|
||||
atmel,pins =
|
||||
<AT91_PIOE 3 AT91_PERIPH_GPIO AT91_PINCTRL_NONE
|
||||
AT91_PIOE 4 AT91_PERIPH_GPIO AT91_PINCTRL_NONE>;
|
||||
};
|
||||
};
|
||||
};
|
||||
};
|
||||
|
@ -259,6 +264,8 @@
|
|||
&pioE 3 GPIO_ACTIVE_LOW
|
||||
&pioE 4 GPIO_ACTIVE_LOW
|
||||
>;
|
||||
pinctrl-names = "default";
|
||||
pinctrl-0 = <&pinctrl_usb_default>;
|
||||
status = "okay";
|
||||
};
|
||||
|
||||
|
|
Some files were not shown because too many files have changed in this diff Show More
Loading…
Reference in New Issue