Merge branch 'master'
This commit is contained in:
commit
6b995751c2
6
CREDITS
6
CREDITS
|
@ -3642,11 +3642,9 @@ S: Beaverton, OR 97005
|
|||
S: USA
|
||||
|
||||
N: Michal Wronski
|
||||
E: wrona@mat.uni.torun.pl
|
||||
W: http://www.mat.uni.torun.pl/~wrona
|
||||
E: Michal.Wronski@motorola.com
|
||||
D: POSIX message queues fs (with K. Benedyczak)
|
||||
S: ul. Teczowa 23/12
|
||||
S: 80-680 Gdansk-Sobieszewo
|
||||
S: Krakow
|
||||
S: Poland
|
||||
|
||||
N: Frank Xia
|
||||
|
|
|
@ -139,9 +139,14 @@ You'll probably want to upgrade.
|
|||
Ksymoops
|
||||
--------
|
||||
|
||||
If the unthinkable happens and your kernel oopses, you'll need a 2.4
|
||||
version of ksymoops to decode the report; see REPORTING-BUGS in the
|
||||
root of the Linux source for more information.
|
||||
If the unthinkable happens and your kernel oopses, you may need the
|
||||
ksymoops tool to decode it, but in most cases you don't.
|
||||
In the 2.6 kernel it is generally preferred to build the kernel with
|
||||
CONFIG_KALLSYMS so that it produces readable dumps that can be used as-is
|
||||
(this also produces better output than ksymoops).
|
||||
If for some reason your kernel is not build with CONFIG_KALLSYMS and
|
||||
you have no way to rebuild and reproduce the Oops with that option, then
|
||||
you can still decode that Oops with ksymoops.
|
||||
|
||||
Module-Init-Tools
|
||||
-----------------
|
||||
|
|
|
@ -10,7 +10,7 @@ DOCBOOKS := wanbook.xml z8530book.xml mcabook.xml videobook.xml \
|
|||
kernel-hacking.xml kernel-locking.xml deviceiobook.xml \
|
||||
procfs-guide.xml writing_usb_driver.xml \
|
||||
sis900.xml kernel-api.xml journal-api.xml lsm.xml usb.xml \
|
||||
gadget.xml libata.xml mtdnand.xml librs.xml
|
||||
gadget.xml libata.xml mtdnand.xml librs.xml rapidio.xml
|
||||
|
||||
###
|
||||
# The build process is as follows (targets):
|
||||
|
|
|
@ -306,7 +306,7 @@ an example.
|
|||
</para>
|
||||
<sect1><title>Journal Level</title>
|
||||
!Efs/jbd/journal.c
|
||||
!Efs/jbd/recovery.c
|
||||
!Ifs/jbd/recovery.c
|
||||
</sect1>
|
||||
<sect1><title>Transasction Level</title>
|
||||
!Efs/jbd/transaction.c
|
||||
|
|
|
@ -118,7 +118,7 @@ X!Ilib/string.c
|
|||
</sect1>
|
||||
<sect1><title>User Space Memory Access</title>
|
||||
!Iinclude/asm-i386/uaccess.h
|
||||
!Iarch/i386/lib/usercopy.c
|
||||
!Earch/i386/lib/usercopy.c
|
||||
</sect1>
|
||||
<sect1><title>More Memory Management Functions</title>
|
||||
!Iinclude/linux/rmap.h
|
||||
|
@ -174,7 +174,6 @@ X!Ilib/string.c
|
|||
<title>The Linux VFS</title>
|
||||
<sect1><title>The Filesystem types</title>
|
||||
!Iinclude/linux/fs.h
|
||||
!Einclude/linux/fs.h
|
||||
</sect1>
|
||||
<sect1><title>The Directory Cache</title>
|
||||
!Efs/dcache.c
|
||||
|
@ -266,7 +265,7 @@ X!Ekernel/module.c
|
|||
<chapter id="hardware">
|
||||
<title>Hardware Interfaces</title>
|
||||
<sect1><title>Interrupt Handling</title>
|
||||
!Ikernel/irq/manage.c
|
||||
!Ekernel/irq/manage.c
|
||||
</sect1>
|
||||
|
||||
<sect1><title>Resources Management</title>
|
||||
|
@ -501,7 +500,7 @@ KAO -->
|
|||
!Edrivers/video/modedb.c
|
||||
</sect1>
|
||||
<sect1><title>Frame Buffer Macintosh Video Mode Database</title>
|
||||
!Idrivers/video/macmodes.c
|
||||
!Edrivers/video/macmodes.c
|
||||
</sect1>
|
||||
<sect1><title>Frame Buffer Fonts</title>
|
||||
<para>
|
||||
|
|
|
@ -0,0 +1,160 @@
|
|||
<?xml version="1.0" encoding="UTF-8"?>
|
||||
<!DOCTYPE book PUBLIC "-//OASIS//DTD DocBook XML V4.1.2//EN"
|
||||
"http://www.oasis-open.org/docbook/xml/4.1.2/docbookx.dtd" [
|
||||
<!ENTITY rapidio SYSTEM "rapidio.xml">
|
||||
]>
|
||||
|
||||
<book id="RapidIO-Guide">
|
||||
<bookinfo>
|
||||
<title>RapidIO Subsystem Guide</title>
|
||||
|
||||
<authorgroup>
|
||||
<author>
|
||||
<firstname>Matt</firstname>
|
||||
<surname>Porter</surname>
|
||||
<affiliation>
|
||||
<address>
|
||||
<email>mporter@kernel.crashing.org</email>
|
||||
<email>mporter@mvista.com</email>
|
||||
</address>
|
||||
</affiliation>
|
||||
</author>
|
||||
</authorgroup>
|
||||
|
||||
<copyright>
|
||||
<year>2005</year>
|
||||
<holder>MontaVista Software, Inc.</holder>
|
||||
</copyright>
|
||||
|
||||
<legalnotice>
|
||||
<para>
|
||||
This documentation is free software; you can redistribute
|
||||
it and/or modify it under the terms of the GNU General Public
|
||||
License version 2 as published by the Free Software Foundation.
|
||||
</para>
|
||||
|
||||
<para>
|
||||
This program is distributed in the hope that it will be
|
||||
useful, but WITHOUT ANY WARRANTY; without even the implied
|
||||
warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
|
||||
See the GNU General Public License for more details.
|
||||
</para>
|
||||
|
||||
<para>
|
||||
You should have received a copy of the GNU General Public
|
||||
License along with this program; if not, write to the Free
|
||||
Software Foundation, Inc., 59 Temple Place, Suite 330, Boston,
|
||||
MA 02111-1307 USA
|
||||
</para>
|
||||
|
||||
<para>
|
||||
For more details see the file COPYING in the source
|
||||
distribution of Linux.
|
||||
</para>
|
||||
</legalnotice>
|
||||
</bookinfo>
|
||||
|
||||
<toc></toc>
|
||||
|
||||
<chapter id="intro">
|
||||
<title>Introduction</title>
|
||||
<para>
|
||||
RapidIO is a high speed switched fabric interconnect with
|
||||
features aimed at the embedded market. RapidIO provides
|
||||
support for memory-mapped I/O as well as message-based
|
||||
transactions over the switched fabric network. RapidIO has
|
||||
a standardized discovery mechanism not unlike the PCI bus
|
||||
standard that allows simple detection of devices in a
|
||||
network.
|
||||
</para>
|
||||
<para>
|
||||
This documentation is provided for developers intending
|
||||
to support RapidIO on new architectures, write new drivers,
|
||||
or to understand the subsystem internals.
|
||||
</para>
|
||||
</chapter>
|
||||
|
||||
<chapter id="bugs">
|
||||
<title>Known Bugs and Limitations</title>
|
||||
|
||||
<sect1>
|
||||
<title>Bugs</title>
|
||||
<para>None. ;)</para>
|
||||
</sect1>
|
||||
<sect1>
|
||||
<title>Limitations</title>
|
||||
<para>
|
||||
<orderedlist>
|
||||
<listitem><para>Access/management of RapidIO memory regions is not supported</para></listitem>
|
||||
<listitem><para>Multiple host enumeration is not supported</para></listitem>
|
||||
</orderedlist>
|
||||
</para>
|
||||
</sect1>
|
||||
</chapter>
|
||||
|
||||
<chapter id="drivers">
|
||||
<title>RapidIO driver interface</title>
|
||||
<para>
|
||||
Drivers are provided a set of calls in order
|
||||
to interface with the subsystem to gather info
|
||||
on devices, request/map memory region resources,
|
||||
and manage mailboxes/doorbells.
|
||||
</para>
|
||||
<sect1>
|
||||
<title>Functions</title>
|
||||
!Iinclude/linux/rio_drv.h
|
||||
!Edrivers/rapidio/rio-driver.c
|
||||
!Edrivers/rapidio/rio.c
|
||||
</sect1>
|
||||
</chapter>
|
||||
|
||||
<chapter id="internals">
|
||||
<title>Internals</title>
|
||||
|
||||
<para>
|
||||
This chapter contains the autogenerated documentation of the RapidIO
|
||||
subsystem.
|
||||
</para>
|
||||
|
||||
<sect1><title>Structures</title>
|
||||
!Iinclude/linux/rio.h
|
||||
</sect1>
|
||||
<sect1><title>Enumeration and Discovery</title>
|
||||
!Idrivers/rapidio/rio-scan.c
|
||||
</sect1>
|
||||
<sect1><title>Driver functionality</title>
|
||||
!Idrivers/rapidio/rio.c
|
||||
!Idrivers/rapidio/rio-access.c
|
||||
</sect1>
|
||||
<sect1><title>Device model support</title>
|
||||
!Idrivers/rapidio/rio-driver.c
|
||||
</sect1>
|
||||
<sect1><title>Sysfs support</title>
|
||||
!Idrivers/rapidio/rio-sysfs.c
|
||||
</sect1>
|
||||
<sect1><title>PPC32 support</title>
|
||||
!Iarch/ppc/kernel/rio.c
|
||||
!Earch/ppc/syslib/ppc85xx_rio.c
|
||||
!Iarch/ppc/syslib/ppc85xx_rio.c
|
||||
</sect1>
|
||||
</chapter>
|
||||
|
||||
<chapter id="credits">
|
||||
<title>Credits</title>
|
||||
<para>
|
||||
The following people have contributed to the RapidIO
|
||||
subsystem directly or indirectly:
|
||||
<orderedlist>
|
||||
<listitem><para>Matt Porter<email>mporter@kernel.crashing.org</email></para></listitem>
|
||||
<listitem><para>Randy Vinson<email>rvinson@mvista.com</email></para></listitem>
|
||||
<listitem><para>Dan Malek<email>dan@embeddedalley.com</email></para></listitem>
|
||||
</orderedlist>
|
||||
</para>
|
||||
<para>
|
||||
The following people have contributed to this document:
|
||||
<orderedlist>
|
||||
<listitem><para>Matt Porter<email>mporter@kernel.crashing.org</email></para></listitem>
|
||||
</orderedlist>
|
||||
</para>
|
||||
</chapter>
|
||||
</book>
|
|
@ -10,14 +10,22 @@
|
|||
This guide describes the basics of Message Signaled Interrupts (MSI),
|
||||
the advantages of using MSI over traditional interrupt mechanisms,
|
||||
and how to enable your driver to use MSI or MSI-X. Also included is
|
||||
a Frequently Asked Questions.
|
||||
a Frequently Asked Questions (FAQ) section.
|
||||
|
||||
1.1 Terminology
|
||||
|
||||
PCI devices can be single-function or multi-function. In either case,
|
||||
when this text talks about enabling or disabling MSI on a "device
|
||||
function," it is referring to one specific PCI device and function and
|
||||
not to all functions on a PCI device (unless the PCI device has only
|
||||
one function).
|
||||
|
||||
2. Copyright 2003 Intel Corporation
|
||||
|
||||
3. What is MSI/MSI-X?
|
||||
|
||||
Message Signaled Interrupt (MSI), as described in the PCI Local Bus
|
||||
Specification Revision 2.3 or latest, is an optional feature, and a
|
||||
Specification Revision 2.3 or later, is an optional feature, and a
|
||||
required feature for PCI Express devices. MSI enables a device function
|
||||
to request service by sending an Inbound Memory Write on its PCI bus to
|
||||
the FSB as a Message Signal Interrupt transaction. Because MSI is
|
||||
|
@ -27,7 +35,7 @@ supported.
|
|||
|
||||
A PCI device that supports MSI must also support pin IRQ assertion
|
||||
interrupt mechanism to provide backward compatibility for systems that
|
||||
do not support MSI. In Systems, which support MSI, the bus driver is
|
||||
do not support MSI. In systems which support MSI, the bus driver is
|
||||
responsible for initializing the message address and message data of
|
||||
the device function's MSI/MSI-X capability structure during device
|
||||
initial configuration.
|
||||
|
@ -61,17 +69,17 @@ over the MSI capability structure as described below.
|
|||
|
||||
- MSI and MSI-X both support per-vector masking. Per-vector
|
||||
masking is an optional extension of MSI but a required
|
||||
feature for MSI-X. Per-vector masking provides the kernel
|
||||
the ability to mask/unmask MSI when servicing its software
|
||||
interrupt service routing handler. If per-vector masking is
|
||||
feature for MSI-X. Per-vector masking provides the kernel the
|
||||
ability to mask/unmask a single MSI while running its
|
||||
interrupt service routine. If per-vector masking is
|
||||
not supported, then the device driver should provide the
|
||||
hardware/software synchronization to ensure that the device
|
||||
generates MSI when the driver wants it to do so.
|
||||
|
||||
4. Why use MSI?
|
||||
|
||||
As a benefit the simplification of board design, MSI allows board
|
||||
designers to remove out of band interrupt routing. MSI is another
|
||||
As a benefit to the simplification of board design, MSI allows board
|
||||
designers to remove out-of-band interrupt routing. MSI is another
|
||||
step towards a legacy-free environment.
|
||||
|
||||
Due to increasing pressure on chipset and processor packages to
|
||||
|
@ -87,7 +95,7 @@ support. As a result, the PCI Express technology requires MSI
|
|||
support for better interrupt performance.
|
||||
|
||||
Using MSI enables the device functions to support two or more
|
||||
vectors, which can be configured to target different CPU's to
|
||||
vectors, which can be configured to target different CPUs to
|
||||
increase scalability.
|
||||
|
||||
5. Configuring a driver to use MSI/MSI-X
|
||||
|
@ -119,13 +127,13 @@ pci_enable_msi() explicitly.
|
|||
|
||||
int pci_enable_msi(struct pci_dev *dev)
|
||||
|
||||
With this new API, any existing device driver, which like to have
|
||||
MSI enabled on its device function, must call this API to enable MSI
|
||||
With this new API, a device driver that wants to have MSI
|
||||
enabled on its device function must call this API to enable MSI.
|
||||
A successful call will initialize the MSI capability structure
|
||||
with ONE vector, regardless of whether a device function is
|
||||
capable of supporting multiple messages. This vector replaces the
|
||||
pre-assigned dev->irq with a new MSI vector. To avoid the conflict
|
||||
of new assigned vector with existing pre-assigned vector requires
|
||||
pre-assigned dev->irq with a new MSI vector. To avoid a conflict
|
||||
of the new assigned vector with existing pre-assigned vector requires
|
||||
a device driver to call this API before calling request_irq().
|
||||
|
||||
5.2.2 API pci_disable_msi
|
||||
|
@ -137,14 +145,14 @@ when a device driver is unloading. This API restores dev->irq with
|
|||
the pre-assigned IOAPIC vector and switches a device's interrupt
|
||||
mode to PCI pin-irq assertion/INTx emulation mode.
|
||||
|
||||
Note that a device driver should always call free_irq() on MSI vector
|
||||
it has done request_irq() on before calling this API. Failure to do
|
||||
so results a BUG_ON() and a device will be left with MSI enabled and
|
||||
Note that a device driver should always call free_irq() on the MSI vector
|
||||
that it has done request_irq() on before calling this API. Failure to do
|
||||
so results in a BUG_ON() and a device will be left with MSI enabled and
|
||||
leaks its vector.
|
||||
|
||||
5.2.3 MSI mode vs. legacy mode diagram
|
||||
|
||||
The below diagram shows the events, which switches the interrupt
|
||||
The below diagram shows the events which switch the interrupt
|
||||
mode on the MSI-capable device function between MSI mode and
|
||||
PIN-IRQ assertion mode.
|
||||
|
||||
|
@ -155,9 +163,9 @@ PIN-IRQ assertion mode.
|
|||
------------ pci_disable_msi ------------------------
|
||||
|
||||
|
||||
Figure 1.0 MSI Mode vs. Legacy Mode
|
||||
Figure 1. MSI Mode vs. Legacy Mode
|
||||
|
||||
In Figure 1.0, a device operates by default in legacy mode. Legacy
|
||||
In Figure 1, a device operates by default in legacy mode. Legacy
|
||||
in this context means PCI pin-irq assertion or PCI-Express INTx
|
||||
emulation. A successful MSI request (using pci_enable_msi()) switches
|
||||
a device's interrupt mode to MSI mode. A pre-assigned IOAPIC vector
|
||||
|
@ -166,11 +174,11 @@ assigned MSI vector will replace dev->irq.
|
|||
|
||||
To return back to its default mode, a device driver should always call
|
||||
pci_disable_msi() to undo the effect of pci_enable_msi(). Note that a
|
||||
device driver should always call free_irq() on MSI vector it has done
|
||||
request_irq() on before calling pci_disable_msi(). Failure to do so
|
||||
results a BUG_ON() and a device will be left with MSI enabled and
|
||||
device driver should always call free_irq() on the MSI vector it has
|
||||
done request_irq() on before calling pci_disable_msi(). Failure to do
|
||||
so results in a BUG_ON() and a device will be left with MSI enabled and
|
||||
leaks its vector. Otherwise, the PCI subsystem restores a device's
|
||||
dev->irq with a pre-assigned IOAPIC vector and marks released
|
||||
dev->irq with a pre-assigned IOAPIC vector and marks the released
|
||||
MSI vector as unused.
|
||||
|
||||
Once being marked as unused, there is no guarantee that the PCI
|
||||
|
@ -178,8 +186,8 @@ subsystem will reserve this MSI vector for a device. Depending on
|
|||
the availability of current PCI vector resources and the number of
|
||||
MSI/MSI-X requests from other drivers, this MSI may be re-assigned.
|
||||
|
||||
For the case where the PCI subsystem re-assigned this MSI vector
|
||||
another driver, a request to switching back to MSI mode may result
|
||||
For the case where the PCI subsystem re-assigns this MSI vector to
|
||||
another driver, a request to switch back to MSI mode may result
|
||||
in being assigned a different MSI vector or a failure if no more
|
||||
vectors are available.
|
||||
|
||||
|
@ -208,12 +216,12 @@ Unlike the function pci_enable_msi(), the function pci_enable_msix()
|
|||
does not replace the pre-assigned IOAPIC dev->irq with a new MSI
|
||||
vector because the PCI subsystem writes the 1:1 vector-to-entry mapping
|
||||
into the field vector of each element contained in a second argument.
|
||||
Note that the pre-assigned IO-APIC dev->irq is valid only if the device
|
||||
operates in PIN-IRQ assertion mode. In MSI-X mode, any attempt of
|
||||
Note that the pre-assigned IOAPIC dev->irq is valid only if the device
|
||||
operates in PIN-IRQ assertion mode. In MSI-X mode, any attempt at
|
||||
using dev->irq by the device driver to request for interrupt service
|
||||
may result unpredictabe behavior.
|
||||
|
||||
For each MSI-X vector granted, a device driver is responsible to call
|
||||
For each MSI-X vector granted, a device driver is responsible for calling
|
||||
other functions like request_irq(), enable_irq(), etc. to enable
|
||||
this vector with its corresponding interrupt service handler. It is
|
||||
a device driver's choice to assign all vectors with the same
|
||||
|
@ -224,13 +232,13 @@ service handler.
|
|||
|
||||
The PCI 3.0 specification has implementation notes that MMIO address
|
||||
space for a device's MSI-X structure should be isolated so that the
|
||||
software system can set different page for controlling accesses to
|
||||
the MSI-X structure. The implementation of MSI patch requires the PCI
|
||||
software system can set different pages for controlling accesses to the
|
||||
MSI-X structure. The implementation of MSI support requires the PCI
|
||||
subsystem, not a device driver, to maintain full control of the MSI-X
|
||||
table/MSI-X PBA and MMIO address space of the MSI-X table/MSI-X PBA.
|
||||
A device driver is prohibited from requesting the MMIO address space
|
||||
of the MSI-X table/MSI-X PBA. Otherwise, the PCI subsystem will fail
|
||||
enabling MSI-X on its hardware device when it calls the function
|
||||
table/MSI-X PBA (Pending Bit Array) and MMIO address space of the MSI-X
|
||||
table/MSI-X PBA. A device driver is prohibited from requesting the MMIO
|
||||
address space of the MSI-X table/MSI-X PBA. Otherwise, the PCI subsystem
|
||||
will fail enabling MSI-X on its hardware device when it calls the function
|
||||
pci_enable_msix().
|
||||
|
||||
5.3.2 Handling MSI-X allocation
|
||||
|
@ -274,9 +282,9 @@ For the case where fewer MSI-X vectors are allocated to a function
|
|||
than requested, the function pci_enable_msix() will return the
|
||||
maximum number of MSI-X vectors available to the caller. A device
|
||||
driver may re-send its request with fewer or equal vectors indicated
|
||||
in a return. For example, if a device driver requests 5 vectors, but
|
||||
the number of available vectors is 3 vectors, a value of 3 will be a
|
||||
return as a result of pci_enable_msix() call. A function could be
|
||||
in the return. For example, if a device driver requests 5 vectors, but
|
||||
the number of available vectors is 3 vectors, a value of 3 will be
|
||||
returned as a result of pci_enable_msix() call. A function could be
|
||||
designed for its driver to use only 3 MSI-X table entries as
|
||||
different combinations as ABC--, A-B-C, A--CB, etc. Note that this
|
||||
patch does not support multiple entries with the same vector. Such
|
||||
|
@ -285,49 +293,46 @@ as ABBCC, AABCC, BCCBA, etc will result as a failure by the function
|
|||
pci_enable_msix(). Below are the reasons why supporting multiple
|
||||
entries with the same vector is an undesirable solution.
|
||||
|
||||
- The PCI subsystem can not determine which entry, which
|
||||
generated the message, to mask/unmask MSI while handling
|
||||
- The PCI subsystem cannot determine the entry that
|
||||
generated the message to mask/unmask MSI while handling
|
||||
software driver ISR. Attempting to walk through all MSI-X
|
||||
table entries (2048 max) to mask/unmask any match vector
|
||||
is an undesirable solution.
|
||||
|
||||
- Walk through all MSI-X table entries (2048 max) to handle
|
||||
- Walking through all MSI-X table entries (2048 max) to handle
|
||||
SMP affinity of any match vector is an undesirable solution.
|
||||
|
||||
5.3.4 API pci_enable_msix
|
||||
|
||||
int pci_enable_msix(struct pci_dev *dev, u32 *entries, int nvec)
|
||||
int pci_enable_msix(struct pci_dev *dev, struct msix_entry *entries, int nvec)
|
||||
|
||||
This API enables a device driver to request the PCI subsystem
|
||||
for enabling MSI-X messages on its hardware device. Depending on
|
||||
to enable MSI-X messages on its hardware device. Depending on
|
||||
the availability of PCI vectors resources, the PCI subsystem enables
|
||||
either all or nothing.
|
||||
either all or none of the requested vectors.
|
||||
|
||||
Argument dev points to the device (pci_dev) structure.
|
||||
Argument 'dev' points to the device (pci_dev) structure.
|
||||
|
||||
Argument entries is a pointer of unsigned integer type. The number of
|
||||
elements is indicated in argument nvec. The content of each element
|
||||
will be mapped to the following struct defined in /driver/pci/msi.h.
|
||||
Argument 'entries' is a pointer to an array of msix_entry structs.
|
||||
The number of entries is indicated in argument 'nvec'.
|
||||
struct msix_entry is defined in /driver/pci/msi.h:
|
||||
|
||||
struct msix_entry {
|
||||
u16 vector; /* kernel uses to write alloc vector */
|
||||
u16 entry; /* driver uses to specify entry */
|
||||
};
|
||||
|
||||
A device driver is responsible for initializing the field entry of
|
||||
each element with unique entry supported by MSI-X table. Otherwise,
|
||||
A device driver is responsible for initializing the field 'entry' of
|
||||
each element with a unique entry supported by MSI-X table. Otherwise,
|
||||
-EINVAL will be returned as a result. A successful return of zero
|
||||
indicates the PCI subsystem completes initializing each of requested
|
||||
indicates the PCI subsystem completed initializing each of the requested
|
||||
entries of the MSI-X table with message address and message data.
|
||||
Last but not least, the PCI subsystem will write the 1:1
|
||||
vector-to-entry mapping into the field vector of each element. A
|
||||
device driver is responsible of keeping track of allocated MSI-X
|
||||
vector-to-entry mapping into the field 'vector' of each element. A
|
||||
device driver is responsible for keeping track of allocated MSI-X
|
||||
vectors in its internal data structure.
|
||||
|
||||
Argument nvec is an integer indicating the number of messages
|
||||
requested.
|
||||
|
||||
A return of zero indicates that the number of MSI-X vectors is
|
||||
A return of zero indicates that the number of MSI-X vectors was
|
||||
successfully allocated. A return of greater than zero indicates
|
||||
MSI-X vector shortage. Or a return of less than zero indicates
|
||||
a failure. This failure may be a result of duplicate entries
|
||||
|
@ -341,12 +346,12 @@ void pci_disable_msix(struct pci_dev *dev)
|
|||
This API should always be used to undo the effect of pci_enable_msix()
|
||||
when a device driver is unloading. Note that a device driver should
|
||||
always call free_irq() on all MSI-X vectors it has done request_irq()
|
||||
on before calling this API. Failure to do so results a BUG_ON() and
|
||||
on before calling this API. Failure to do so results in a BUG_ON() and
|
||||
a device will be left with MSI-X enabled and leaks its vectors.
|
||||
|
||||
5.3.6 MSI-X mode vs. legacy mode diagram
|
||||
|
||||
The below diagram shows the events, which switches the interrupt
|
||||
The below diagram shows the events which switch the interrupt
|
||||
mode on the MSI-X capable device function between MSI-X mode and
|
||||
PIN-IRQ assertion mode (legacy).
|
||||
|
||||
|
@ -356,22 +361,22 @@ PIN-IRQ assertion mode (legacy).
|
|||
| | ===============> | |
|
||||
------------ pci_disable_msix ------------------------
|
||||
|
||||
Figure 2.0 MSI-X Mode vs. Legacy Mode
|
||||
Figure 2. MSI-X Mode vs. Legacy Mode
|
||||
|
||||
In Figure 2.0, a device operates by default in legacy mode. A
|
||||
In Figure 2, a device operates by default in legacy mode. A
|
||||
successful MSI-X request (using pci_enable_msix()) switches a
|
||||
device's interrupt mode to MSI-X mode. A pre-assigned IOAPIC vector
|
||||
stored in dev->irq will be saved by the PCI subsystem; however,
|
||||
unlike MSI mode, the PCI subsystem will not replace dev->irq with
|
||||
assigned MSI-X vector because the PCI subsystem already writes the 1:1
|
||||
vector-to-entry mapping into the field vector of each element
|
||||
vector-to-entry mapping into the field 'vector' of each element
|
||||
specified in second argument.
|
||||
|
||||
To return back to its default mode, a device driver should always call
|
||||
pci_disable_msix() to undo the effect of pci_enable_msix(). Note that
|
||||
a device driver should always call free_irq() on all MSI-X vectors it
|
||||
has done request_irq() on before calling pci_disable_msix(). Failure
|
||||
to do so results a BUG_ON() and a device will be left with MSI-X
|
||||
to do so results in a BUG_ON() and a device will be left with MSI-X
|
||||
enabled and leaks its vectors. Otherwise, the PCI subsystem switches a
|
||||
device function's interrupt mode from MSI-X mode to legacy mode and
|
||||
marks all allocated MSI-X vectors as unused.
|
||||
|
@ -383,53 +388,56 @@ MSI/MSI-X requests from other drivers, these MSI-X vectors may be
|
|||
re-assigned.
|
||||
|
||||
For the case where the PCI subsystem re-assigned these MSI-X vectors
|
||||
to other driver, a request to switching back to MSI-X mode may result
|
||||
to other drivers, a request to switch back to MSI-X mode may result
|
||||
being assigned with another set of MSI-X vectors or a failure if no
|
||||
more vectors are available.
|
||||
|
||||
5.4 Handling function implementng both MSI and MSI-X capabilities
|
||||
5.4 Handling function implementing both MSI and MSI-X capabilities
|
||||
|
||||
For the case where a function implements both MSI and MSI-X
|
||||
capabilities, the PCI subsystem enables a device to run either in MSI
|
||||
mode or MSI-X mode but not both. A device driver determines whether it
|
||||
wants MSI or MSI-X enabled on its hardware device. Once a device
|
||||
driver requests for MSI, for example, it is prohibited to request for
|
||||
driver requests for MSI, for example, it is prohibited from requesting
|
||||
MSI-X; in other words, a device driver is not permitted to ping-pong
|
||||
between MSI mod MSI-X mode during a run-time.
|
||||
|
||||
5.5 Hardware requirements for MSI/MSI-X support
|
||||
|
||||
MSI/MSI-X support requires support from both system hardware and
|
||||
individual hardware device functions.
|
||||
|
||||
5.5.1 System hardware support
|
||||
|
||||
Since the target of MSI address is the local APIC CPU, enabling
|
||||
MSI/MSI-X support in Linux kernel is dependent on whether existing
|
||||
system hardware supports local APIC. Users should verify their
|
||||
system whether it runs when CONFIG_X86_LOCAL_APIC=y.
|
||||
MSI/MSI-X support in the Linux kernel is dependent on whether existing
|
||||
system hardware supports local APIC. Users should verify that their
|
||||
system supports local APIC operation by testing that it runs when
|
||||
CONFIG_X86_LOCAL_APIC=y.
|
||||
|
||||
In SMP environment, CONFIG_X86_LOCAL_APIC is automatically set;
|
||||
however, in UP environment, users must manually set
|
||||
CONFIG_X86_LOCAL_APIC. Once CONFIG_X86_LOCAL_APIC=y, setting
|
||||
CONFIG_PCI_MSI enables the VECTOR based scheme and
|
||||
the option for MSI-capable device drivers to selectively enable
|
||||
MSI/MSI-X.
|
||||
CONFIG_PCI_MSI enables the VECTOR based scheme and the option for
|
||||
MSI-capable device drivers to selectively enable MSI/MSI-X.
|
||||
|
||||
Note that CONFIG_X86_IO_APIC setting is irrelevant because MSI/MSI-X
|
||||
vector is allocated new during runtime and MSI/MSI-X support does not
|
||||
depend on BIOS support. This key independency enables MSI/MSI-X
|
||||
support on future IOxAPIC free platform.
|
||||
support on future IOxAPIC free platforms.
|
||||
|
||||
5.5.2 Device hardware support
|
||||
|
||||
The hardware device function supports MSI by indicating the
|
||||
MSI/MSI-X capability structure on its PCI capability list. By
|
||||
default, this capability structure will not be initialized by
|
||||
the kernel to enable MSI during the system boot. In other words,
|
||||
the device function is running on its default pin assertion mode.
|
||||
Note that in many cases the hardware supporting MSI have bugs,
|
||||
which may result in system hang. The software driver of specific
|
||||
MSI-capable hardware is responsible for whether calling
|
||||
which may result in system hangs. The software driver of specific
|
||||
MSI-capable hardware is responsible for deciding whether to call
|
||||
pci_enable_msi or not. A return of zero indicates the kernel
|
||||
successfully initializes the MSI/MSI-X capability structure of the
|
||||
successfully initialized the MSI/MSI-X capability structure of the
|
||||
device function. The device function is now running on MSI/MSI-X mode.
|
||||
|
||||
5.6 How to tell whether MSI/MSI-X is enabled on device function
|
||||
|
@ -439,10 +447,10 @@ pci_enable_msi()/pci_enable_msix() indicates to a device driver that
|
|||
its device function is initialized successfully and ready to run in
|
||||
MSI/MSI-X mode.
|
||||
|
||||
At the user level, users can use command 'cat /proc/interrupts'
|
||||
to display the vector allocated for a device and its interrupt
|
||||
MSI/MSI-X mode ("PCI MSI"/"PCI MSIX"). Below shows below MSI mode is
|
||||
enabled on a SCSI Adaptec 39320D Ultra320.
|
||||
At the user level, users can use the command 'cat /proc/interrupts'
|
||||
to display the vectors allocated for devices and their interrupt
|
||||
MSI/MSI-X modes ("PCI-MSI"/"PCI-MSI-X"). Below shows MSI mode is
|
||||
enabled on a SCSI Adaptec 39320D Ultra320 controller.
|
||||
|
||||
CPU0 CPU1
|
||||
0: 324639 0 IO-APIC-edge timer
|
||||
|
@ -453,8 +461,8 @@ enabled on a SCSI Adaptec 39320D Ultra320.
|
|||
15: 1 0 IO-APIC-edge ide1
|
||||
169: 0 0 IO-APIC-level uhci-hcd
|
||||
185: 0 0 IO-APIC-level uhci-hcd
|
||||
193: 138 10 PCI MSI aic79xx
|
||||
201: 30 0 PCI MSI aic79xx
|
||||
193: 138 10 PCI-MSI aic79xx
|
||||
201: 30 0 PCI-MSI aic79xx
|
||||
225: 30 0 IO-APIC-level aic7xxx
|
||||
233: 30 0 IO-APIC-level aic7xxx
|
||||
NMI: 0 0
|
||||
|
@ -490,8 +498,8 @@ target address set as 0xfeexxxxx, as conformed to PCI
|
|||
specification 2.3 or latest, then it should work.
|
||||
|
||||
Q4. From the driver point of view, if the MSI is lost because
|
||||
of the errors occur during inbound memory write, then it may
|
||||
wait for ever. Is there a mechanism for it to recover?
|
||||
of errors occurring during inbound memory write, then it may
|
||||
wait forever. Is there a mechanism for it to recover?
|
||||
|
||||
A4. Since the target of the transaction is an inbound memory
|
||||
write, all transaction termination conditions (Retry,
|
||||
|
|
|
@ -772,8 +772,6 @@ RCU pointer/list traversal:
|
|||
list_for_each_entry_rcu
|
||||
list_for_each_continue_rcu (to be deprecated in favor of new
|
||||
list_for_each_entry_continue_rcu)
|
||||
hlist_for_each_rcu (to be deprecated in favor of
|
||||
hlist_for_each_entry_rcu)
|
||||
hlist_for_each_entry_rcu
|
||||
|
||||
RCU pointer update:
|
||||
|
|
|
@ -19,7 +19,6 @@ There are two dm targets available: snapshot and snapshot-origin.
|
|||
*) snapshot-origin <origin>
|
||||
|
||||
which will normally have one or more snapshots based on it.
|
||||
You must create the snapshot-origin device before you can create snapshots.
|
||||
Reads will be mapped directly to the backing device. For each write, the
|
||||
original data will be saved in the <COW device> of each snapshot to keep
|
||||
its visible content unchanged, at least until the <COW device> fills up.
|
||||
|
@ -27,7 +26,7 @@ its visible content unchanged, at least until the <COW device> fills up.
|
|||
|
||||
*) snapshot <origin> <COW device> <persistent?> <chunksize>
|
||||
|
||||
A snapshot is created of the <origin> block device. Changed chunks of
|
||||
A snapshot of the <origin> block device is created. Changed chunks of
|
||||
<chunksize> sectors will be stored on the <COW device>. Writes will
|
||||
only go to the <COW device>. Reads will come from the <COW device> or
|
||||
from <origin> for unchanged data. <COW device> will often be
|
||||
|
@ -37,6 +36,8 @@ the amount of free space and expand the <COW device> before it fills up.
|
|||
|
||||
<persistent?> is P (Persistent) or N (Not persistent - will not survive
|
||||
after reboot).
|
||||
The difference is that for transient snapshots less metadata must be
|
||||
saved on disk - they can be kept in memory by the kernel.
|
||||
|
||||
|
||||
How this is used by LVM2
|
||||
|
|
|
@ -146,10 +146,10 @@ pmipal Use the protected mode interface for palette changes.
|
|||
|
||||
mtrr:n setup memory type range registers for the vesafb framebuffer
|
||||
where n:
|
||||
0 - disabled (equivalent to nomtrr)
|
||||
0 - disabled (equivalent to nomtrr) (default)
|
||||
1 - uncachable
|
||||
2 - write-back
|
||||
3 - write-combining (default)
|
||||
3 - write-combining
|
||||
4 - write-through
|
||||
|
||||
If you see the following in dmesg, choose the type that matches the
|
||||
|
|
|
@ -69,6 +69,22 @@ Who: Grant Coady <gcoady@gmail.com>
|
|||
|
||||
---------------------------
|
||||
|
||||
What: remove EXPORT_SYMBOL(panic_timeout)
|
||||
When: April 2006
|
||||
Files: kernel/panic.c
|
||||
Why: No modular usage in the kernel.
|
||||
Who: Adrian Bunk <bunk@stusta.de>
|
||||
|
||||
---------------------------
|
||||
|
||||
What: remove EXPORT_SYMBOL(insert_resource)
|
||||
When: April 2006
|
||||
Files: kernel/resource.c
|
||||
Why: No modular usage in the kernel.
|
||||
Who: Adrian Bunk <bunk@stusta.de>
|
||||
|
||||
---------------------------
|
||||
|
||||
What: PCMCIA control ioctl (needed for pcmcia-cs [cardmgr, cardctl])
|
||||
When: November 2005
|
||||
Files: drivers/pcmcia/: pcmcia_ioctl.c
|
||||
|
|
|
@ -0,0 +1,173 @@
|
|||
RCU-based dcache locking model
|
||||
==============================
|
||||
|
||||
On many workloads, the most common operation on dcache is to look up a
|
||||
dentry, given a parent dentry and the name of the child. Typically,
|
||||
for every open(), stat() etc., the dentry corresponding to the
|
||||
pathname will be looked up by walking the tree starting with the first
|
||||
component of the pathname and using that dentry along with the next
|
||||
component to look up the next level and so on. Since it is a frequent
|
||||
operation for workloads like multiuser environments and web servers,
|
||||
it is important to optimize this path.
|
||||
|
||||
Prior to 2.5.10, dcache_lock was acquired in d_lookup and thus in
|
||||
every component during path look-up. Since 2.5.10 onwards, fast-walk
|
||||
algorithm changed this by holding the dcache_lock at the beginning and
|
||||
walking as many cached path component dentries as possible. This
|
||||
significantly decreases the number of acquisition of
|
||||
dcache_lock. However it also increases the lock hold time
|
||||
significantly and affects performance in large SMP machines. Since
|
||||
2.5.62 kernel, dcache has been using a new locking model that uses RCU
|
||||
to make dcache look-up lock-free.
|
||||
|
||||
The current dcache locking model is not very different from the
|
||||
existing dcache locking model. Prior to 2.5.62 kernel, dcache_lock
|
||||
protected the hash chain, d_child, d_alias, d_lru lists as well as
|
||||
d_inode and several other things like mount look-up. RCU-based changes
|
||||
affect only the way the hash chain is protected. For everything else
|
||||
the dcache_lock must be taken for both traversing as well as
|
||||
updating. The hash chain updates too take the dcache_lock. The
|
||||
significant change is the way d_lookup traverses the hash chain, it
|
||||
doesn't acquire the dcache_lock for this and rely on RCU to ensure
|
||||
that the dentry has not been *freed*.
|
||||
|
||||
|
||||
Dcache locking details
|
||||
======================
|
||||
|
||||
For many multi-user workloads, open() and stat() on files are very
|
||||
frequently occurring operations. Both involve walking of path names to
|
||||
find the dentry corresponding to the concerned file. In 2.4 kernel,
|
||||
dcache_lock was held during look-up of each path component. Contention
|
||||
and cache-line bouncing of this global lock caused significant
|
||||
scalability problems. With the introduction of RCU in Linux kernel,
|
||||
this was worked around by making the look-up of path components during
|
||||
path walking lock-free.
|
||||
|
||||
|
||||
Safe lock-free look-up of dcache hash table
|
||||
===========================================
|
||||
|
||||
Dcache is a complex data structure with the hash table entries also
|
||||
linked together in other lists. In 2.4 kernel, dcache_lock protected
|
||||
all the lists. We applied RCU only on hash chain walking. The rest of
|
||||
the lists are still protected by dcache_lock. Some of the important
|
||||
changes are :
|
||||
|
||||
1. The deletion from hash chain is done using hlist_del_rcu() macro
|
||||
which doesn't initialize next pointer of the deleted dentry and
|
||||
this allows us to walk safely lock-free while a deletion is
|
||||
happening.
|
||||
|
||||
2. Insertion of a dentry into the hash table is done using
|
||||
hlist_add_head_rcu() which take care of ordering the writes - the
|
||||
writes to the dentry must be visible before the dentry is
|
||||
inserted. This works in conjunction with hlist_for_each_rcu() while
|
||||
walking the hash chain. The only requirement is that all
|
||||
initialization to the dentry must be done before
|
||||
hlist_add_head_rcu() since we don't have dcache_lock protection
|
||||
while traversing the hash chain. This isn't different from the
|
||||
existing code.
|
||||
|
||||
3. The dentry looked up without holding dcache_lock by cannot be
|
||||
returned for walking if it is unhashed. It then may have a NULL
|
||||
d_inode or other bogosity since RCU doesn't protect the other
|
||||
fields in the dentry. We therefore use a flag DCACHE_UNHASHED to
|
||||
indicate unhashed dentries and use this in conjunction with a
|
||||
per-dentry lock (d_lock). Once looked up without the dcache_lock,
|
||||
we acquire the per-dentry lock (d_lock) and check if the dentry is
|
||||
unhashed. If so, the look-up is failed. If not, the reference count
|
||||
of the dentry is increased and the dentry is returned.
|
||||
|
||||
4. Once a dentry is looked up, it must be ensured during the path walk
|
||||
for that component it doesn't go away. In pre-2.5.10 code, this was
|
||||
done holding a reference to the dentry. dcache_rcu does the same.
|
||||
In some sense, dcache_rcu path walking looks like the pre-2.5.10
|
||||
version.
|
||||
|
||||
5. All dentry hash chain updates must take the dcache_lock as well as
|
||||
the per-dentry lock in that order. dput() does this to ensure that
|
||||
a dentry that has just been looked up in another CPU doesn't get
|
||||
deleted before dget() can be done on it.
|
||||
|
||||
6. There are several ways to do reference counting of RCU protected
|
||||
objects. One such example is in ipv4 route cache where deferred
|
||||
freeing (using call_rcu()) is done as soon as the reference count
|
||||
goes to zero. This cannot be done in the case of dentries because
|
||||
tearing down of dentries require blocking (dentry_iput()) which
|
||||
isn't supported from RCU callbacks. Instead, tearing down of
|
||||
dentries happen synchronously in dput(), but actual freeing happens
|
||||
later when RCU grace period is over. This allows safe lock-free
|
||||
walking of the hash chains, but a matched dentry may have been
|
||||
partially torn down. The checking of DCACHE_UNHASHED flag with
|
||||
d_lock held detects such dentries and prevents them from being
|
||||
returned from look-up.
|
||||
|
||||
|
||||
Maintaining POSIX rename semantics
|
||||
==================================
|
||||
|
||||
Since look-up of dentries is lock-free, it can race against a
|
||||
concurrent rename operation. For example, during rename of file A to
|
||||
B, look-up of either A or B must succeed. So, if look-up of B happens
|
||||
after A has been removed from the hash chain but not added to the new
|
||||
hash chain, it may fail. Also, a comparison while the name is being
|
||||
written concurrently by a rename may result in false positive matches
|
||||
violating rename semantics. Issues related to race with rename are
|
||||
handled as described below :
|
||||
|
||||
1. Look-up can be done in two ways - d_lookup() which is safe from
|
||||
simultaneous renames and __d_lookup() which is not. If
|
||||
__d_lookup() fails, it must be followed up by a d_lookup() to
|
||||
correctly determine whether a dentry is in the hash table or
|
||||
not. d_lookup() protects look-ups using a sequence lock
|
||||
(rename_lock).
|
||||
|
||||
2. The name associated with a dentry (d_name) may be changed if a
|
||||
rename is allowed to happen simultaneously. To avoid memcmp() in
|
||||
__d_lookup() go out of bounds due to a rename and false positive
|
||||
comparison, the name comparison is done while holding the
|
||||
per-dentry lock. This prevents concurrent renames during this
|
||||
operation.
|
||||
|
||||
3. Hash table walking during look-up may move to a different bucket as
|
||||
the current dentry is moved to a different bucket due to rename.
|
||||
But we use hlists in dcache hash table and they are
|
||||
null-terminated. So, even if a dentry moves to a different bucket,
|
||||
hash chain walk will terminate. [with a list_head list, it may not
|
||||
since termination is when the list_head in the original bucket is
|
||||
reached]. Since we redo the d_parent check and compare name while
|
||||
holding d_lock, lock-free look-up will not race against d_move().
|
||||
|
||||
4. There can be a theoretical race when a dentry keeps coming back to
|
||||
original bucket due to double moves. Due to this look-up may
|
||||
consider that it has never moved and can end up in a infinite loop.
|
||||
But this is not any worse that theoretical livelocks we already
|
||||
have in the kernel.
|
||||
|
||||
|
||||
Important guidelines for filesystem developers related to dcache_rcu
|
||||
====================================================================
|
||||
|
||||
1. Existing dcache interfaces (pre-2.5.62) exported to filesystem
|
||||
don't change. Only dcache internal implementation changes. However
|
||||
filesystems *must not* delete from the dentry hash chains directly
|
||||
using the list macros like allowed earlier. They must use dcache
|
||||
APIs like d_drop() or __d_drop() depending on the situation.
|
||||
|
||||
2. d_flags is now protected by a per-dentry lock (d_lock). All access
|
||||
to d_flags must be protected by it.
|
||||
|
||||
3. For a hashed dentry, checking of d_count needs to be protected by
|
||||
d_lock.
|
||||
|
||||
|
||||
Papers and other documentation on dcache locking
|
||||
================================================
|
||||
|
||||
1. Scaling dcache with RCU (http://linuxjournal.com/article.php?sid=7124).
|
||||
|
||||
2. http://lse.sourceforge.net/locking/dcache/dcache.html
|
||||
|
||||
|
||||
|
|
@ -1812,11 +1812,6 @@ it may overflow the messages buffer, but try to get as much of it as
|
|||
you can
|
||||
|
||||
|
||||
if you get an Oops, run ksymoops to decode it so that the
|
||||
names of the offending functions are provided. A non-decoded Oops is
|
||||
pretty useless
|
||||
|
||||
|
||||
send a copy of your devfsd configuration file(s)
|
||||
|
||||
send the bug report to me first.
|
||||
|
|
|
@ -0,0 +1,195 @@
|
|||
ramfs, rootfs and initramfs
|
||||
October 17, 2005
|
||||
Rob Landley <rob@landley.net>
|
||||
=============================
|
||||
|
||||
What is ramfs?
|
||||
--------------
|
||||
|
||||
Ramfs is a very simple filesystem that exports Linux's disk caching
|
||||
mechanisms (the page cache and dentry cache) as a dynamically resizable
|
||||
ram-based filesystem.
|
||||
|
||||
Normally all files are cached in memory by Linux. Pages of data read from
|
||||
backing store (usually the block device the filesystem is mounted on) are kept
|
||||
around in case it's needed again, but marked as clean (freeable) in case the
|
||||
Virtual Memory system needs the memory for something else. Similarly, data
|
||||
written to files is marked clean as soon as it has been written to backing
|
||||
store, but kept around for caching purposes until the VM reallocates the
|
||||
memory. A similar mechanism (the dentry cache) greatly speeds up access to
|
||||
directories.
|
||||
|
||||
With ramfs, there is no backing store. Files written into ramfs allocate
|
||||
dentries and page cache as usual, but there's nowhere to write them to.
|
||||
This means the pages are never marked clean, so they can't be freed by the
|
||||
VM when it's looking to recycle memory.
|
||||
|
||||
The amount of code required to implement ramfs is tiny, because all the
|
||||
work is done by the existing Linux caching infrastructure. Basically,
|
||||
you're mounting the disk cache as a filesystem. Because of this, ramfs is not
|
||||
an optional component removable via menuconfig, since there would be negligible
|
||||
space savings.
|
||||
|
||||
ramfs and ramdisk:
|
||||
------------------
|
||||
|
||||
The older "ram disk" mechanism created a synthetic block device out of
|
||||
an area of ram and used it as backing store for a filesystem. This block
|
||||
device was of fixed size, so the filesystem mounted on it was of fixed
|
||||
size. Using a ram disk also required unnecessarily copying memory from the
|
||||
fake block device into the page cache (and copying changes back out), as well
|
||||
as creating and destroying dentries. Plus it needed a filesystem driver
|
||||
(such as ext2) to format and interpret this data.
|
||||
|
||||
Compared to ramfs, this wastes memory (and memory bus bandwidth), creates
|
||||
unnecessary work for the CPU, and pollutes the CPU caches. (There are tricks
|
||||
to avoid this copying by playing with the page tables, but they're unpleasantly
|
||||
complicated and turn out to be about as expensive as the copying anyway.)
|
||||
More to the point, all the work ramfs is doing has to happen _anyway_,
|
||||
since all file access goes through the page and dentry caches. The ram
|
||||
disk is simply unnecessary, ramfs is internally much simpler.
|
||||
|
||||
Another reason ramdisks are semi-obsolete is that the introduction of
|
||||
loopback devices offered a more flexible and convenient way to create
|
||||
synthetic block devices, now from files instead of from chunks of memory.
|
||||
See losetup (8) for details.
|
||||
|
||||
ramfs and tmpfs:
|
||||
----------------
|
||||
|
||||
One downside of ramfs is you can keep writing data into it until you fill
|
||||
up all memory, and the VM can't free it because the VM thinks that files
|
||||
should get written to backing store (rather than swap space), but ramfs hasn't
|
||||
got any backing store. Because of this, only root (or a trusted user) should
|
||||
be allowed write access to a ramfs mount.
|
||||
|
||||
A ramfs derivative called tmpfs was created to add size limits, and the ability
|
||||
to write the data to swap space. Normal users can be allowed write access to
|
||||
tmpfs mounts. See Documentation/filesystems/tmpfs.txt for more information.
|
||||
|
||||
What is rootfs?
|
||||
---------------
|
||||
|
||||
Rootfs is a special instance of ramfs, which is always present in 2.6 systems.
|
||||
(It's used internally as the starting and stopping point for searches of the
|
||||
kernel's doubly-linked list of mount points.)
|
||||
|
||||
Most systems just mount another filesystem over it and ignore it. The
|
||||
amount of space an empty instance of ramfs takes up is tiny.
|
||||
|
||||
What is initramfs?
|
||||
------------------
|
||||
|
||||
All 2.6 Linux kernels contain a gzipped "cpio" format archive, which is
|
||||
extracted into rootfs when the kernel boots up. After extracting, the kernel
|
||||
checks to see if rootfs contains a file "init", and if so it executes it as PID
|
||||
1. If found, this init process is responsible for bringing the system the
|
||||
rest of the way up, including locating and mounting the real root device (if
|
||||
any). If rootfs does not contain an init program after the embedded cpio
|
||||
archive is extracted into it, the kernel will fall through to the older code
|
||||
to locate and mount a root partition, then exec some variant of /sbin/init
|
||||
out of that.
|
||||
|
||||
All this differs from the old initrd in several ways:
|
||||
|
||||
- The old initrd was a separate file, while the initramfs archive is linked
|
||||
into the linux kernel image. (The directory linux-*/usr is devoted to
|
||||
generating this archive during the build.)
|
||||
|
||||
- The old initrd file was a gzipped filesystem image (in some file format,
|
||||
such as ext2, that had to be built into the kernel), while the new
|
||||
initramfs archive is a gzipped cpio archive (like tar only simpler,
|
||||
see cpio(1) and Documentation/early-userspace/buffer-format.txt).
|
||||
|
||||
- The program run by the old initrd (which was called /initrd, not /init) did
|
||||
some setup and then returned to the kernel, while the init program from
|
||||
initramfs is not expected to return to the kernel. (If /init needs to hand
|
||||
off control it can overmount / with a new root device and exec another init
|
||||
program. See the switch_root utility, below.)
|
||||
|
||||
- When switching another root device, initrd would pivot_root and then
|
||||
umount the ramdisk. But initramfs is rootfs: you can neither pivot_root
|
||||
rootfs, nor unmount it. Instead delete everything out of rootfs to
|
||||
free up the space (find -xdev / -exec rm '{}' ';'), overmount rootfs
|
||||
with the new root (cd /newmount; mount --move . /; chroot .), attach
|
||||
stdin/stdout/stderr to the new /dev/console, and exec the new init.
|
||||
|
||||
Since this is a remarkably persnickity process (and involves deleting
|
||||
commands before you can run them), the klibc package introduced a helper
|
||||
program (utils/run_init.c) to do all this for you. Most other packages
|
||||
(such as busybox) have named this command "switch_root".
|
||||
|
||||
Populating initramfs:
|
||||
---------------------
|
||||
|
||||
The 2.6 kernel build process always creates a gzipped cpio format initramfs
|
||||
archive and links it into the resulting kernel binary. By default, this
|
||||
archive is empty (consuming 134 bytes on x86). The config option
|
||||
CONFIG_INITRAMFS_SOURCE (for some reason buried under devices->block devices
|
||||
in menuconfig, and living in usr/Kconfig) can be used to specify a source for
|
||||
the initramfs archive, which will automatically be incorporated into the
|
||||
resulting binary. This option can point to an existing gzipped cpio archive, a
|
||||
directory containing files to be archived, or a text file specification such
|
||||
as the following example:
|
||||
|
||||
dir /dev 755 0 0
|
||||
nod /dev/console 644 0 0 c 5 1
|
||||
nod /dev/loop0 644 0 0 b 7 0
|
||||
dir /bin 755 1000 1000
|
||||
slink /bin/sh busybox 777 0 0
|
||||
file /bin/busybox initramfs/busybox 755 0 0
|
||||
dir /proc 755 0 0
|
||||
dir /sys 755 0 0
|
||||
dir /mnt 755 0 0
|
||||
file /init initramfs/init.sh 755 0 0
|
||||
|
||||
One advantage of the text file is that root access is not required to
|
||||
set permissions or create device nodes in the new archive. (Note that those
|
||||
two example "file" entries expect to find files named "init.sh" and "busybox" in
|
||||
a directory called "initramfs", under the linux-2.6.* directory. See
|
||||
Documentation/early-userspace/README for more details.)
|
||||
|
||||
If you don't already understand what shared libraries, devices, and paths
|
||||
you need to get a minimal root filesystem up and running, here are some
|
||||
references:
|
||||
http://www.tldp.org/HOWTO/Bootdisk-HOWTO/
|
||||
http://www.tldp.org/HOWTO/From-PowerUp-To-Bash-Prompt-HOWTO.html
|
||||
http://www.linuxfromscratch.org/lfs/view/stable/
|
||||
|
||||
The "klibc" package (http://www.kernel.org/pub/linux/libs/klibc) is
|
||||
designed to be a tiny C library to statically link early userspace
|
||||
code against, along with some related utilities. It is BSD licensed.
|
||||
|
||||
I use uClibc (http://www.uclibc.org) and busybox (http://www.busybox.net)
|
||||
myself. These are LGPL and GPL, respectively.
|
||||
|
||||
In theory you could use glibc, but that's not well suited for small embedded
|
||||
uses like this. (A "hello world" program statically linked against glibc is
|
||||
over 400k. With uClibc it's 7k. Also note that glibc dlopens libnss to do
|
||||
name lookups, even when otherwise statically linked.)
|
||||
|
||||
Future directions:
|
||||
------------------
|
||||
|
||||
Today (2.6.14), initramfs is always compiled in, but not always used. The
|
||||
kernel falls back to legacy boot code that is reached only if initramfs does
|
||||
not contain an /init program. The fallback is legacy code, there to ensure a
|
||||
smooth transition and allowing early boot functionality to gradually move to
|
||||
"early userspace" (I.E. initramfs).
|
||||
|
||||
The move to early userspace is necessary because finding and mounting the real
|
||||
root device is complex. Root partitions can span multiple devices (raid or
|
||||
separate journal). They can be out on the network (requiring dhcp, setting a
|
||||
specific mac address, logging into a server, etc). They can live on removable
|
||||
media, with dynamically allocated major/minor numbers and persistent naming
|
||||
issues requiring a full udev implementation to sort out. They can be
|
||||
compressed, encrypted, copy-on-write, loopback mounted, strangely partitioned,
|
||||
and so on.
|
||||
|
||||
This kind of complexity (which inevitably includes policy) is rightly handled
|
||||
in userspace. Both klibc and busybox/uClibc are working on simple initramfs
|
||||
packages to drop into a kernel build, and when standard solutions are ready
|
||||
and widely deployed, the kernel's legacy early boot code will become obsolete
|
||||
and a candidate for the feature removal schedule.
|
||||
|
||||
But that's a while off yet.
|
|
@ -3,7 +3,7 @@
|
|||
|
||||
Original author: Richard Gooch <rgooch@atnf.csiro.au>
|
||||
|
||||
Last updated on August 25, 2005
|
||||
Last updated on October 28, 2005
|
||||
|
||||
Copyright (C) 1999 Richard Gooch
|
||||
Copyright (C) 2005 Pekka Enberg
|
||||
|
@ -11,62 +11,61 @@
|
|||
This file is released under the GPLv2.
|
||||
|
||||
|
||||
What is it?
|
||||
===========
|
||||
Introduction
|
||||
============
|
||||
|
||||
The Virtual File System (otherwise known as the Virtual Filesystem
|
||||
Switch) is the software layer in the kernel that provides the
|
||||
filesystem interface to userspace programs. It also provides an
|
||||
abstraction within the kernel which allows different filesystem
|
||||
implementations to coexist.
|
||||
The Virtual File System (also known as the Virtual Filesystem Switch)
|
||||
is the software layer in the kernel that provides the filesystem
|
||||
interface to userspace programs. It also provides an abstraction
|
||||
within the kernel which allows different filesystem implementations to
|
||||
coexist.
|
||||
|
||||
VFS system calls open(2), stat(2), read(2), write(2), chmod(2) and so
|
||||
on are called from a process context. Filesystem locking is described
|
||||
in the document Documentation/filesystems/Locking.
|
||||
|
||||
|
||||
A Quick Look At How It Works
|
||||
============================
|
||||
Directory Entry Cache (dcache)
|
||||
------------------------------
|
||||
|
||||
In this section I'll briefly describe how things work, before
|
||||
launching into the details. I'll start with describing what happens
|
||||
when user programs open and manipulate files, and then look from the
|
||||
other view which is how a filesystem is supported and subsequently
|
||||
mounted.
|
||||
The VFS implements the open(2), stat(2), chmod(2), and similar system
|
||||
calls. The pathname argument that is passed to them is used by the VFS
|
||||
to search through the directory entry cache (also known as the dentry
|
||||
cache or dcache). This provides a very fast look-up mechanism to
|
||||
translate a pathname (filename) into a specific dentry. Dentries live
|
||||
in RAM and are never saved to disc: they exist only for performance.
|
||||
|
||||
The dentry cache is meant to be a view into your entire filespace. As
|
||||
most computers cannot fit all dentries in the RAM at the same time,
|
||||
some bits of the cache are missing. In order to resolve your pathname
|
||||
into a dentry, the VFS may have to resort to creating dentries along
|
||||
the way, and then loading the inode. This is done by looking up the
|
||||
inode.
|
||||
|
||||
|
||||
Opening a File
|
||||
--------------
|
||||
The Inode Object
|
||||
----------------
|
||||
|
||||
The VFS implements the open(2), stat(2), chmod(2) and similar system
|
||||
calls. The pathname argument is used by the VFS to search through the
|
||||
directory entry cache (dentry cache or "dcache"). This provides a very
|
||||
fast look-up mechanism to translate a pathname (filename) into a
|
||||
specific dentry.
|
||||
An individual dentry usually has a pointer to an inode. Inodes are
|
||||
filesystem objects such as regular files, directories, FIFOs and other
|
||||
beasts. They live either on the disc (for block device filesystems)
|
||||
or in the memory (for pseudo filesystems). Inodes that live on the
|
||||
disc are copied into the memory when required and changes to the inode
|
||||
are written back to disc. A single inode can be pointed to by multiple
|
||||
dentries (hard links, for example, do this).
|
||||
|
||||
An individual dentry usually has a pointer to an inode. Inodes are the
|
||||
things that live on disc drives, and can be regular files (you know:
|
||||
those things that you write data into), directories, FIFOs and other
|
||||
beasts. Dentries live in RAM and are never saved to disc: they exist
|
||||
only for performance. Inodes live on disc and are copied into memory
|
||||
when required. Later any changes are written back to disc. The inode
|
||||
that lives in RAM is a VFS inode, and it is this which the dentry
|
||||
points to. A single inode can be pointed to by multiple dentries
|
||||
(think about hardlinks).
|
||||
To look up an inode requires that the VFS calls the lookup() method of
|
||||
the parent directory inode. This method is installed by the specific
|
||||
filesystem implementation that the inode lives in. Once the VFS has
|
||||
the required dentry (and hence the inode), we can do all those boring
|
||||
things like open(2) the file, or stat(2) it to peek at the inode
|
||||
data. The stat(2) operation is fairly simple: once the VFS has the
|
||||
dentry, it peeks at the inode data and passes some of it back to
|
||||
userspace.
|
||||
|
||||
The dcache is meant to be a view into your entire filespace. Unlike
|
||||
Linus, most of us losers can't fit enough dentries into RAM to cover
|
||||
all of our filespace, so the dcache has bits missing. In order to
|
||||
resolve your pathname into a dentry, the VFS may have to resort to
|
||||
creating dentries along the way, and then loading the inode. This is
|
||||
done by looking up the inode.
|
||||
|
||||
To look up an inode (usually read from disc) requires that the VFS
|
||||
calls the lookup() method of the parent directory inode. This method
|
||||
is installed by the specific filesystem implementation that the inode
|
||||
lives in. There will be more on this later.
|
||||
|
||||
Once the VFS has the required dentry (and hence the inode), we can do
|
||||
all those boring things like open(2) the file, or stat(2) it to peek
|
||||
at the inode data. The stat(2) operation is fairly simple: once the
|
||||
VFS has the dentry, it peeks at the inode data and passes some of it
|
||||
back to userspace.
|
||||
The File Object
|
||||
---------------
|
||||
|
||||
Opening a file requires another operation: allocation of a file
|
||||
structure (this is the kernel-side implementation of file
|
||||
|
@ -74,51 +73,39 @@ descriptors). The freshly allocated file structure is initialized with
|
|||
a pointer to the dentry and a set of file operation member functions.
|
||||
These are taken from the inode data. The open() file method is then
|
||||
called so the specific filesystem implementation can do it's work. You
|
||||
can see that this is another switch performed by the VFS.
|
||||
|
||||
The file structure is placed into the file descriptor table for the
|
||||
process.
|
||||
can see that this is another switch performed by the VFS. The file
|
||||
structure is placed into the file descriptor table for the process.
|
||||
|
||||
Reading, writing and closing files (and other assorted VFS operations)
|
||||
is done by using the userspace file descriptor to grab the appropriate
|
||||
file structure, and then calling the required file structure method
|
||||
function to do whatever is required.
|
||||
|
||||
For as long as the file is open, it keeps the dentry "open" (in use),
|
||||
which in turn means that the VFS inode is still in use.
|
||||
|
||||
All VFS system calls (i.e. open(2), stat(2), read(2), write(2),
|
||||
chmod(2) and so on) are called from a process context. You should
|
||||
assume that these calls are made without any kernel locks being
|
||||
held. This means that the processes may be executing the same piece of
|
||||
filesystem or driver code at the same time, on different
|
||||
processors. You should ensure that access to shared resources is
|
||||
protected by appropriate locks.
|
||||
file structure, and then calling the required file structure method to
|
||||
do whatever is required. For as long as the file is open, it keeps the
|
||||
dentry in use, which in turn means that the VFS inode is still in use.
|
||||
|
||||
|
||||
Registering and Mounting a Filesystem
|
||||
-------------------------------------
|
||||
=====================================
|
||||
|
||||
If you want to support a new kind of filesystem in the kernel, all you
|
||||
need to do is call register_filesystem(). You pass a structure
|
||||
describing the filesystem implementation (struct file_system_type)
|
||||
which is then added to an internal table of supported filesystems. You
|
||||
can do:
|
||||
To register and unregister a filesystem, use the following API
|
||||
functions:
|
||||
|
||||
% cat /proc/filesystems
|
||||
#include <linux/fs.h>
|
||||
|
||||
to see what filesystems are currently available on your system.
|
||||
extern int register_filesystem(struct file_system_type *);
|
||||
extern int unregister_filesystem(struct file_system_type *);
|
||||
|
||||
When a request is made to mount a block device onto a directory in
|
||||
your filespace the VFS will call the appropriate method for the
|
||||
specific filesystem. The dentry for the mount point will then be
|
||||
updated to point to the root inode for the new filesystem.
|
||||
The passed struct file_system_type describes your filesystem. When a
|
||||
request is made to mount a device onto a directory in your filespace,
|
||||
the VFS will call the appropriate get_sb() method for the specific
|
||||
filesystem. The dentry for the mount point will then be updated to
|
||||
point to the root inode for the new filesystem.
|
||||
|
||||
It's now time to look at things in more detail.
|
||||
You can see all filesystems that are registered to the kernel in the
|
||||
file /proc/filesystems.
|
||||
|
||||
|
||||
struct file_system_type
|
||||
=======================
|
||||
-----------------------
|
||||
|
||||
This describes the filesystem. As of kernel 2.6.13, the following
|
||||
members are defined:
|
||||
|
@ -197,8 +184,14 @@ A fill_super() method implementation has the following arguments:
|
|||
int silent: whether or not to be silent on error
|
||||
|
||||
|
||||
The Superblock Object
|
||||
=====================
|
||||
|
||||
A superblock object represents a mounted filesystem.
|
||||
|
||||
|
||||
struct super_operations
|
||||
=======================
|
||||
-----------------------
|
||||
|
||||
This describes how the VFS can manipulate the superblock of your
|
||||
filesystem. As of kernel 2.6.13, the following members are defined:
|
||||
|
@ -286,9 +279,9 @@ or bottom half).
|
|||
a superblock. The second parameter indicates whether the method
|
||||
should wait until the write out has been completed. Optional.
|
||||
|
||||
write_super_lockfs: called when VFS is locking a filesystem and forcing
|
||||
it into a consistent state. This function is currently used by the
|
||||
Logical Volume Manager (LVM).
|
||||
write_super_lockfs: called when VFS is locking a filesystem and
|
||||
forcing it into a consistent state. This method is currently
|
||||
used by the Logical Volume Manager (LVM).
|
||||
|
||||
unlockfs: called when VFS is unlocking a filesystem and making it writable
|
||||
again.
|
||||
|
@ -317,8 +310,14 @@ field. This is a pointer to a "struct inode_operations" which
|
|||
describes the methods that can be performed on individual inodes.
|
||||
|
||||
|
||||
The Inode Object
|
||||
================
|
||||
|
||||
An inode object represents an object within the filesystem.
|
||||
|
||||
|
||||
struct inode_operations
|
||||
=======================
|
||||
-----------------------
|
||||
|
||||
This describes how the VFS can manipulate an inode in your
|
||||
filesystem. As of kernel 2.6.13, the following members are defined:
|
||||
|
@ -394,51 +393,62 @@ otherwise noted.
|
|||
will probably need to call d_instantiate() just as you would
|
||||
in the create() method
|
||||
|
||||
rename: called by the rename(2) system call to rename the object to
|
||||
have the parent and name given by the second inode and dentry.
|
||||
|
||||
readlink: called by the readlink(2) system call. Only required if
|
||||
you want to support reading symbolic links
|
||||
|
||||
follow_link: called by the VFS to follow a symbolic link to the
|
||||
inode it points to. Only required if you want to support
|
||||
symbolic links. This function returns a void pointer cookie
|
||||
symbolic links. This method returns a void pointer cookie
|
||||
that is passed to put_link().
|
||||
|
||||
put_link: called by the VFS to release resources allocated by
|
||||
follow_link(). The cookie returned by follow_link() is passed to
|
||||
to this function as the last parameter. It is used by filesystems
|
||||
such as NFS where page cache is not stable (i.e. page that was
|
||||
installed when the symbolic link walk started might not be in the
|
||||
page cache at the end of the walk).
|
||||
follow_link(). The cookie returned by follow_link() is passed
|
||||
to to this method as the last parameter. It is used by
|
||||
filesystems such as NFS where page cache is not stable
|
||||
(i.e. page that was installed when the symbolic link walk
|
||||
started might not be in the page cache at the end of the
|
||||
walk).
|
||||
|
||||
truncate: called by the VFS to change the size of a file. The i_size
|
||||
field of the inode is set to the desired size by the VFS before
|
||||
this function is called. This function is called by the truncate(2)
|
||||
system call and related functionality.
|
||||
truncate: called by the VFS to change the size of a file. The
|
||||
i_size field of the inode is set to the desired size by the
|
||||
VFS before this method is called. This method is called by
|
||||
the truncate(2) system call and related functionality.
|
||||
|
||||
permission: called by the VFS to check for access rights on a POSIX-like
|
||||
filesystem.
|
||||
|
||||
setattr: called by the VFS to set attributes for a file. This function is
|
||||
called by chmod(2) and related system calls.
|
||||
setattr: called by the VFS to set attributes for a file. This method
|
||||
is called by chmod(2) and related system calls.
|
||||
|
||||
getattr: called by the VFS to get attributes of a file. This function is
|
||||
called by stat(2) and related system calls.
|
||||
getattr: called by the VFS to get attributes of a file. This method
|
||||
is called by stat(2) and related system calls.
|
||||
|
||||
setxattr: called by the VFS to set an extended attribute for a file.
|
||||
Extended attribute is a name:value pair associated with an inode. This
|
||||
function is called by setxattr(2) system call.
|
||||
Extended attribute is a name:value pair associated with an
|
||||
inode. This method is called by setxattr(2) system call.
|
||||
|
||||
getxattr: called by the VFS to retrieve the value of an extended attribute
|
||||
name. This function is called by getxattr(2) function call.
|
||||
getxattr: called by the VFS to retrieve the value of an extended
|
||||
attribute name. This method is called by getxattr(2) function
|
||||
call.
|
||||
|
||||
listxattr: called by the VFS to list all extended attributes for a given
|
||||
file. This function is called by listxattr(2) system call.
|
||||
listxattr: called by the VFS to list all extended attributes for a
|
||||
given file. This method is called by listxattr(2) system call.
|
||||
|
||||
removexattr: called by the VFS to remove an extended attribute from a file.
|
||||
This function is called by removexattr(2) system call.
|
||||
removexattr: called by the VFS to remove an extended attribute from
|
||||
a file. This method is called by removexattr(2) system call.
|
||||
|
||||
|
||||
The Address Space Object
|
||||
========================
|
||||
|
||||
The address space object is used to identify pages in the page cache.
|
||||
|
||||
|
||||
struct address_space_operations
|
||||
===============================
|
||||
-------------------------------
|
||||
|
||||
This describes how the VFS can manipulate mapping of a file to page cache in
|
||||
your filesystem. As of kernel 2.6.13, the following members are defined:
|
||||
|
@ -502,8 +512,14 @@ struct address_space_operations {
|
|||
it. An example implementation can be found in fs/ext2/xip.c.
|
||||
|
||||
|
||||
The File Object
|
||||
===============
|
||||
|
||||
A file object represents a file opened by a process.
|
||||
|
||||
|
||||
struct file_operations
|
||||
======================
|
||||
----------------------
|
||||
|
||||
This describes how the VFS can manipulate an open file. As of kernel
|
||||
2.6.13, the following members are defined:
|
||||
|
@ -661,7 +677,7 @@ of child dentries. Child dentries are basically like files in a
|
|||
directory.
|
||||
|
||||
|
||||
Directory Entry Cache APIs
|
||||
Directory Entry Cache API
|
||||
--------------------------
|
||||
|
||||
There are a number of functions defined which permit a filesystem to
|
||||
|
@ -705,178 +721,24 @@ manipulate dentries:
|
|||
and the dentry is returned. The caller must use d_put()
|
||||
to free the dentry when it finishes using it.
|
||||
|
||||
|
||||
RCU-based dcache locking model
|
||||
------------------------------
|
||||
|
||||
On many workloads, the most common operation on dcache is
|
||||
to look up a dentry, given a parent dentry and the name
|
||||
of the child. Typically, for every open(), stat() etc.,
|
||||
the dentry corresponding to the pathname will be looked
|
||||
up by walking the tree starting with the first component
|
||||
of the pathname and using that dentry along with the next
|
||||
component to look up the next level and so on. Since it
|
||||
is a frequent operation for workloads like multiuser
|
||||
environments and web servers, it is important to optimize
|
||||
this path.
|
||||
|
||||
Prior to 2.5.10, dcache_lock was acquired in d_lookup and thus
|
||||
in every component during path look-up. Since 2.5.10 onwards,
|
||||
fast-walk algorithm changed this by holding the dcache_lock
|
||||
at the beginning and walking as many cached path component
|
||||
dentries as possible. This significantly decreases the number
|
||||
of acquisition of dcache_lock. However it also increases the
|
||||
lock hold time significantly and affects performance in large
|
||||
SMP machines. Since 2.5.62 kernel, dcache has been using
|
||||
a new locking model that uses RCU to make dcache look-up
|
||||
lock-free.
|
||||
|
||||
The current dcache locking model is not very different from the existing
|
||||
dcache locking model. Prior to 2.5.62 kernel, dcache_lock
|
||||
protected the hash chain, d_child, d_alias, d_lru lists as well
|
||||
as d_inode and several other things like mount look-up. RCU-based
|
||||
changes affect only the way the hash chain is protected. For everything
|
||||
else the dcache_lock must be taken for both traversing as well as
|
||||
updating. The hash chain updates too take the dcache_lock.
|
||||
The significant change is the way d_lookup traverses the hash chain,
|
||||
it doesn't acquire the dcache_lock for this and rely on RCU to
|
||||
ensure that the dentry has not been *freed*.
|
||||
For further information on dentry locking, please refer to the document
|
||||
Documentation/filesystems/dentry-locking.txt.
|
||||
|
||||
|
||||
Dcache locking details
|
||||
----------------------
|
||||
Resources
|
||||
=========
|
||||
|
||||
For many multi-user workloads, open() and stat() on files are
|
||||
very frequently occurring operations. Both involve walking
|
||||
of path names to find the dentry corresponding to the
|
||||
concerned file. In 2.4 kernel, dcache_lock was held
|
||||
during look-up of each path component. Contention and
|
||||
cache-line bouncing of this global lock caused significant
|
||||
scalability problems. With the introduction of RCU
|
||||
in Linux kernel, this was worked around by making
|
||||
the look-up of path components during path walking lock-free.
|
||||
(Note some of these resources are not up-to-date with the latest kernel
|
||||
version.)
|
||||
|
||||
Creating Linux virtual filesystems. 2002
|
||||
<http://lwn.net/Articles/13325/>
|
||||
|
||||
Safe lock-free look-up of dcache hash table
|
||||
===========================================
|
||||
The Linux Virtual File-system Layer by Neil Brown. 1999
|
||||
<http://www.cse.unsw.edu.au/~neilb/oss/linux-commentary/vfs.html>
|
||||
|
||||
Dcache is a complex data structure with the hash table entries
|
||||
also linked together in other lists. In 2.4 kernel, dcache_lock
|
||||
protected all the lists. We applied RCU only on hash chain
|
||||
walking. The rest of the lists are still protected by dcache_lock.
|
||||
Some of the important changes are :
|
||||
A tour of the Linux VFS by Michael K. Johnson. 1996
|
||||
<http://www.tldp.org/LDP/khg/HyperNews/get/fs/vfstour.html>
|
||||
|
||||
1. The deletion from hash chain is done using hlist_del_rcu() macro which
|
||||
doesn't initialize next pointer of the deleted dentry and this
|
||||
allows us to walk safely lock-free while a deletion is happening.
|
||||
|
||||
2. Insertion of a dentry into the hash table is done using
|
||||
hlist_add_head_rcu() which take care of ordering the writes -
|
||||
the writes to the dentry must be visible before the dentry
|
||||
is inserted. This works in conjunction with hlist_for_each_rcu()
|
||||
while walking the hash chain. The only requirement is that
|
||||
all initialization to the dentry must be done before hlist_add_head_rcu()
|
||||
since we don't have dcache_lock protection while traversing
|
||||
the hash chain. This isn't different from the existing code.
|
||||
|
||||
3. The dentry looked up without holding dcache_lock by cannot be
|
||||
returned for walking if it is unhashed. It then may have a NULL
|
||||
d_inode or other bogosity since RCU doesn't protect the other
|
||||
fields in the dentry. We therefore use a flag DCACHE_UNHASHED to
|
||||
indicate unhashed dentries and use this in conjunction with a
|
||||
per-dentry lock (d_lock). Once looked up without the dcache_lock,
|
||||
we acquire the per-dentry lock (d_lock) and check if the
|
||||
dentry is unhashed. If so, the look-up is failed. If not, the
|
||||
reference count of the dentry is increased and the dentry is returned.
|
||||
|
||||
4. Once a dentry is looked up, it must be ensured during the path
|
||||
walk for that component it doesn't go away. In pre-2.5.10 code,
|
||||
this was done holding a reference to the dentry. dcache_rcu does
|
||||
the same. In some sense, dcache_rcu path walking looks like
|
||||
the pre-2.5.10 version.
|
||||
|
||||
5. All dentry hash chain updates must take the dcache_lock as well as
|
||||
the per-dentry lock in that order. dput() does this to ensure
|
||||
that a dentry that has just been looked up in another CPU
|
||||
doesn't get deleted before dget() can be done on it.
|
||||
|
||||
6. There are several ways to do reference counting of RCU protected
|
||||
objects. One such example is in ipv4 route cache where
|
||||
deferred freeing (using call_rcu()) is done as soon as
|
||||
the reference count goes to zero. This cannot be done in
|
||||
the case of dentries because tearing down of dentries
|
||||
require blocking (dentry_iput()) which isn't supported from
|
||||
RCU callbacks. Instead, tearing down of dentries happen
|
||||
synchronously in dput(), but actual freeing happens later
|
||||
when RCU grace period is over. This allows safe lock-free
|
||||
walking of the hash chains, but a matched dentry may have
|
||||
been partially torn down. The checking of DCACHE_UNHASHED
|
||||
flag with d_lock held detects such dentries and prevents
|
||||
them from being returned from look-up.
|
||||
|
||||
|
||||
Maintaining POSIX rename semantics
|
||||
==================================
|
||||
|
||||
Since look-up of dentries is lock-free, it can race against
|
||||
a concurrent rename operation. For example, during rename
|
||||
of file A to B, look-up of either A or B must succeed.
|
||||
So, if look-up of B happens after A has been removed from the
|
||||
hash chain but not added to the new hash chain, it may fail.
|
||||
Also, a comparison while the name is being written concurrently
|
||||
by a rename may result in false positive matches violating
|
||||
rename semantics. Issues related to race with rename are
|
||||
handled as described below :
|
||||
|
||||
1. Look-up can be done in two ways - d_lookup() which is safe
|
||||
from simultaneous renames and __d_lookup() which is not.
|
||||
If __d_lookup() fails, it must be followed up by a d_lookup()
|
||||
to correctly determine whether a dentry is in the hash table
|
||||
or not. d_lookup() protects look-ups using a sequence
|
||||
lock (rename_lock).
|
||||
|
||||
2. The name associated with a dentry (d_name) may be changed if
|
||||
a rename is allowed to happen simultaneously. To avoid memcmp()
|
||||
in __d_lookup() go out of bounds due to a rename and false
|
||||
positive comparison, the name comparison is done while holding the
|
||||
per-dentry lock. This prevents concurrent renames during this
|
||||
operation.
|
||||
|
||||
3. Hash table walking during look-up may move to a different bucket as
|
||||
the current dentry is moved to a different bucket due to rename.
|
||||
But we use hlists in dcache hash table and they are null-terminated.
|
||||
So, even if a dentry moves to a different bucket, hash chain
|
||||
walk will terminate. [with a list_head list, it may not since
|
||||
termination is when the list_head in the original bucket is reached].
|
||||
Since we redo the d_parent check and compare name while holding
|
||||
d_lock, lock-free look-up will not race against d_move().
|
||||
|
||||
4. There can be a theoretical race when a dentry keeps coming back
|
||||
to original bucket due to double moves. Due to this look-up may
|
||||
consider that it has never moved and can end up in a infinite loop.
|
||||
But this is not any worse that theoretical livelocks we already
|
||||
have in the kernel.
|
||||
|
||||
|
||||
Important guidelines for filesystem developers related to dcache_rcu
|
||||
====================================================================
|
||||
|
||||
1. Existing dcache interfaces (pre-2.5.62) exported to filesystem
|
||||
don't change. Only dcache internal implementation changes. However
|
||||
filesystems *must not* delete from the dentry hash chains directly
|
||||
using the list macros like allowed earlier. They must use dcache
|
||||
APIs like d_drop() or __d_drop() depending on the situation.
|
||||
|
||||
2. d_flags is now protected by a per-dentry lock (d_lock). All
|
||||
access to d_flags must be protected by it.
|
||||
|
||||
3. For a hashed dentry, checking of d_count needs to be protected
|
||||
by d_lock.
|
||||
|
||||
|
||||
Papers and other documentation on dcache locking
|
||||
================================================
|
||||
|
||||
1. Scaling dcache with RCU (http://linuxjournal.com/article.php?sid=7124).
|
||||
|
||||
2. http://lse.sourceforge.net/locking/dcache/dcache.html
|
||||
A small trail through the Linux kernel by Andries Brouwer. 2001
|
||||
<http://www.win.tue.nl/~aeb/linux/vfs/trail.html>
|
||||
|
|
|
@ -1,18 +1,21 @@
|
|||
High Precision Event Timer Driver for Linux
|
||||
|
||||
The High Precision Event Timer (HPET) hardware is the future replacement for the 8254 and Real
|
||||
Time Clock (RTC) periodic timer functionality. Each HPET can have up two 32 timers. It is possible
|
||||
to configure the first two timers as legacy replacements for 8254 and RTC periodic. A specification
|
||||
done by INTEL and Microsoft can be found at http://www.intel.com/labs/platcomp/hpet/hpetspec.htm.
|
||||
The High Precision Event Timer (HPET) hardware is the future replacement
|
||||
for the 8254 and Real Time Clock (RTC) periodic timer functionality.
|
||||
Each HPET can have up two 32 timers. It is possible to configure the
|
||||
first two timers as legacy replacements for 8254 and RTC periodic timers.
|
||||
A specification done by Intel and Microsoft can be found at
|
||||
<http://www.intel.com/hardwaredesign/hpetspec.htm>.
|
||||
|
||||
The driver supports detection of HPET driver allocation and initialization of the HPET before the
|
||||
driver module_init routine is called. This enables platform code which uses timer 0 or 1 as the
|
||||
main timer to intercept HPET initialization. An example of this initialization can be found in
|
||||
The driver supports detection of HPET driver allocation and initialization
|
||||
of the HPET before the driver module_init routine is called. This enables
|
||||
platform code which uses timer 0 or 1 as the main timer to intercept HPET
|
||||
initialization. An example of this initialization can be found in
|
||||
arch/i386/kernel/time_hpet.c.
|
||||
|
||||
The driver provides two APIs which are very similar to the API found in the rtc.c driver.
|
||||
There is a user space API and a kernel space API. An example user space program is provided
|
||||
below.
|
||||
The driver provides two APIs which are very similar to the API found in
|
||||
the rtc.c driver. There is a user space API and a kernel space API.
|
||||
An example user space program is provided below.
|
||||
|
||||
#include <stdio.h>
|
||||
#include <stdlib.h>
|
||||
|
@ -290,9 +293,8 @@ The kernel API has three interfaces exported from the driver:
|
|||
hpet_unregister(struct hpet_task *tp)
|
||||
hpet_control(struct hpet_task *tp, unsigned int cmd, unsigned long arg)
|
||||
|
||||
The kernel module using this interface fills in the ht_func and ht_data members of the
|
||||
hpet_task structure before calling hpet_register. hpet_control simply vectors to the hpet_ioctl
|
||||
routine and has the same commands and respective arguments as the user API. hpet_unregister
|
||||
The kernel module using this interface fills in the ht_func and ht_data
|
||||
members of the hpet_task structure before calling hpet_register.
|
||||
hpet_control simply vectors to the hpet_ioctl routine and has the same
|
||||
commands and respective arguments as the user API. hpet_unregister
|
||||
is used to terminate usage of the HPET timer reserved by hpet_register.
|
||||
|
||||
|
||||
|
|
|
@ -130,8 +130,6 @@ Code Seq# Include File Comments
|
|||
<mailto:zapman@interlan.net>
|
||||
'i' 00-3F linux/i2o.h
|
||||
'j' 00-3F linux/joystick.h
|
||||
'k' all asm-sparc/kbio.h
|
||||
asm-sparc64/kbio.h
|
||||
'l' 00-3F linux/tcfs_fs.h transparent cryptographic file system
|
||||
<http://mikonos.dia.unisa.it/tcfs>
|
||||
'l' 40-7F linux/udf_fs_i.h in development:
|
||||
|
|
|
@ -120,7 +120,7 @@ ISDN_NET_MAGIC 0x49344C02 isdn_net_local_s drivers/isdn/i4l/isdn_net_li
|
|||
SAVEKMSG_MAGIC2 0x4B4D5347 savekmsg arch/*/amiga/config.c
|
||||
STLI_BOARDMAGIC 0x4bc6c825 stlibrd include/linux/istallion.h
|
||||
CS_STATE_MAGIC 0x4c4f4749 cs_state sound/oss/cs46xx.c
|
||||
SLAB_C_MAGIC 0x4f17a36d kmem_cache_s mm/slab.c
|
||||
SLAB_C_MAGIC 0x4f17a36d kmem_cache mm/slab.c
|
||||
COW_MAGIC 0x4f4f4f4d cow_header_v1 arch/um/drivers/ubd_user.c
|
||||
I810_CARD_MAGIC 0x5072696E i810_card sound/oss/i810_audio.c
|
||||
TRIDENT_CARD_MAGIC 0x5072696E trident_card sound/oss/trident.c
|
||||
|
|
|
@ -176,8 +176,6 @@ information (_most_ of which _is_ _essential_) includes:
|
|||
- Which client caused the problem ?
|
||||
- How much data was being transferred ?
|
||||
- Was the network congested ?
|
||||
- If there was a kernel panic, please run the output through ksymoops
|
||||
before sending it to me, otherwise its _useless_.
|
||||
- How can the problem be reproduced ?
|
||||
- Can you use tcpdump to get a trace ? (N.B. Most (all?) versions of
|
||||
tcpdump don't understand how to dump DECnet properly, so including
|
||||
|
|
|
@ -1,6 +1,6 @@
|
|||
NOTE: ksymoops is useless on 2.6. Please use the Oops in its original format
|
||||
(from dmesg, etc). Ignore any references in this or other docs to "decoding
|
||||
the Oops" or "running it through ksymoops". If you post an Oops fron 2.6 that
|
||||
the Oops" or "running it through ksymoops". If you post an Oops from 2.6 that
|
||||
has been run through ksymoops, people will just tell you to repost it.
|
||||
|
||||
Quick Summary
|
||||
|
|
|
@ -11,9 +11,9 @@ boot video card. (Kernel usually does not even contain video card
|
|||
driver -- vesafb and vgacon are widely used).
|
||||
|
||||
This is not problem for swsusp, because during swsusp resume, BIOS is
|
||||
run normally so video card is normally initialized. S3 has absolutely
|
||||
no chance of working with SMP/HT. Be sure it to turn it off before
|
||||
testing (swsusp should work ok, OTOH).
|
||||
run normally so video card is normally initialized. It should not be
|
||||
problem for S1 standby, because hardware should retain its state over
|
||||
that.
|
||||
|
||||
There are a few types of systems where video works after S3 resume:
|
||||
|
||||
|
@ -64,7 +64,7 @@ your video card (good luck getting docs :-(). Maybe suspending from X
|
|||
(proper X, knowing your hardware, not XF68_FBcon) might have better
|
||||
chance of working.
|
||||
|
||||
Table of known working systems:
|
||||
Table of known working notebooks:
|
||||
|
||||
Model hack (or "how to do it")
|
||||
------------------------------------------------------------------------------
|
||||
|
@ -73,7 +73,7 @@ Acer TM 242FX vbetool (6)
|
|||
Acer TM C110 video_post (8)
|
||||
Acer TM C300 vga=normal (only suspend on console, not in X), vbetool (6) or video_post (8)
|
||||
Acer TM 4052LCi s3_bios (2)
|
||||
Acer TM 636Lci s3_bios vga=normal (2)
|
||||
Acer TM 636Lci s3_bios,s3_mode (4)
|
||||
Acer TM 650 (Radeon M7) vga=normal plus boot-radeon (5) gets text console back
|
||||
Acer TM 660 ??? (*)
|
||||
Acer TM 800 vga=normal, X patches, see webpage (5) or vbetool (6)
|
||||
|
@ -137,6 +137,13 @@ Toshiba Satellite P10-554 s3_bios,s3_mode (4)(****)
|
|||
Toshiba M30 (2) xor X with nvidia driver using internal AGP
|
||||
Uniwill 244IIO ??? (*)
|
||||
|
||||
Known working desktop systems
|
||||
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
|
||||
|
||||
Mainboard Graphics card hack (or "how to do it")
|
||||
------------------------------------------------------------------------------
|
||||
Asus A7V8X nVidia RIVA TNT2 model 64 s3_bios,s3_mode (4)
|
||||
|
||||
|
||||
(*) from http://www.ubuntulinux.org/wiki/HoaryPMResults, not sure
|
||||
which options to use. If you know, please tell me.
|
||||
|
|
|
@ -8,11 +8,10 @@ All devices which can be addressed by means of ccws are called 'CCW devices' -
|
|||
even if they aren't actually driven by ccws.
|
||||
|
||||
All ccw devices are accessed via a subchannel, this is reflected in the
|
||||
structures under root/:
|
||||
structures under devices/:
|
||||
|
||||
root/
|
||||
- sys
|
||||
- legacy
|
||||
devices/
|
||||
- system/
|
||||
- css0/
|
||||
- 0.0.0000/0.0.0815/
|
||||
- 0.0.0001/0.0.4711/
|
||||
|
@ -36,7 +35,7 @@ availability: Can be 'good' or 'boxed'; 'no path' or 'no device' for
|
|||
|
||||
online: An interface to set the device online and offline.
|
||||
In the special case of the device being disconnected (see the
|
||||
notify function under 1.2), piping 0 to online will focibly delete
|
||||
notify function under 1.2), piping 0 to online will forcibly delete
|
||||
the device.
|
||||
|
||||
The device drivers can add entries to export per-device data and interfaces.
|
||||
|
@ -222,7 +221,7 @@ and are called 'chp0.<chpid>'. They have no driver and do not belong to any bus.
|
|||
Please note, that unlike /proc/chpids in 2.4, the channel path objects reflect
|
||||
only the logical state and not the physical state, since we cannot track the
|
||||
latter consistently due to lacking machine support (we don't need to be aware
|
||||
of anyway).
|
||||
of it anyway).
|
||||
|
||||
status - Can be 'online' or 'offline'.
|
||||
Piping 'on' or 'off' sets the chpid logically online/offline.
|
||||
|
@ -235,12 +234,16 @@ status - Can be 'online' or 'offline'.
|
|||
3. System devices
|
||||
-----------------
|
||||
|
||||
Note: cpus may yet be added here.
|
||||
|
||||
3.1 xpram
|
||||
---------
|
||||
|
||||
xpram shows up under sys/ as 'xpram'.
|
||||
xpram shows up under devices/system/ as 'xpram'.
|
||||
|
||||
3.2 cpus
|
||||
--------
|
||||
|
||||
For each cpu, a directory is created under devices/system/cpu/. Each cpu has an
|
||||
attribute 'online' which can be 0 or 1.
|
||||
|
||||
|
||||
4. Other devices
|
||||
|
|
File diff suppressed because it is too large
Load Diff
|
@ -167,7 +167,7 @@ Prior to version 0.9.0rc4 options had a 'snd_' prefix. This was removed.
|
|||
spdif - Support SPDIF I/O
|
||||
- Default: disabled
|
||||
|
||||
Module supports autoprobe and multiple chips (max 8).
|
||||
This module supports one chip and autoprobe.
|
||||
|
||||
The power-management is supported.
|
||||
|
||||
|
@ -206,7 +206,7 @@ Prior to version 0.9.0rc4 options had a 'snd_' prefix. This was removed.
|
|||
See "AC97 Quirk Option" section below.
|
||||
spdif_aclink - S/PDIF transfer over AC-link (default = 1)
|
||||
|
||||
This module supports up to 8 cards and autoprobe.
|
||||
This module supports one card and autoprobe.
|
||||
|
||||
ATI IXP has two different methods to control SPDIF output. One is
|
||||
over AC-link and another is over the "direct" SPDIF output. The
|
||||
|
@ -218,7 +218,7 @@ Prior to version 0.9.0rc4 options had a 'snd_' prefix. This was removed.
|
|||
|
||||
Module for ATI IXP 150/200/250 AC97 modem controllers.
|
||||
|
||||
Module supports up to 8 cards.
|
||||
This module supports one card and autoprobe.
|
||||
|
||||
Note: The default index value of this module is -2, i.e. the first
|
||||
slot is excluded.
|
||||
|
@ -637,7 +637,7 @@ Prior to version 0.9.0rc4 options had a 'snd_' prefix. This was removed.
|
|||
model - force the model name
|
||||
position_fix - Fix DMA pointer (0 = auto, 1 = none, 2 = POSBUF, 3 = FIFO size)
|
||||
|
||||
Module supports up to 8 cards.
|
||||
This module supports one card and autoprobe.
|
||||
|
||||
Each codec may have a model table for different configurations.
|
||||
If your machine isn't listed there, the default (usually minimal)
|
||||
|
@ -663,6 +663,10 @@ Prior to version 0.9.0rc4 options had a 'snd_' prefix. This was removed.
|
|||
adjusted. Appearing only when compiled with
|
||||
$CONFIG_SND_DEBUG=y
|
||||
|
||||
ALC260
|
||||
hp HP machines
|
||||
fujitsu Fujitsu S7020
|
||||
|
||||
CMI9880
|
||||
minimal 3-jack in back
|
||||
min_fp 3-jack in back, 2-jack in front
|
||||
|
@ -811,7 +815,7 @@ Prior to version 0.9.0rc4 options had a 'snd_' prefix. This was removed.
|
|||
semaphores (e.g. on some ASUS laptops)
|
||||
(default off)
|
||||
|
||||
Module supports autoprobe and multiple bus-master chips (max 8).
|
||||
This module supports one chip and autoprobe.
|
||||
|
||||
Note: the latest driver supports auto-detection of chip clock.
|
||||
if you still encounter too fast playback, specify the clock
|
||||
|
@ -830,7 +834,7 @@ Prior to version 0.9.0rc4 options had a 'snd_' prefix. This was removed.
|
|||
|
||||
ac97_clock - AC'97 codec clock base (0 = auto-detect)
|
||||
|
||||
This module supports up to 8 cards and autoprobe.
|
||||
This module supports one card and autoprobe.
|
||||
|
||||
Note: The default index value of this module is -2, i.e. the first
|
||||
slot is excluded.
|
||||
|
@ -950,8 +954,10 @@ Prior to version 0.9.0rc4 options had a 'snd_' prefix. This was removed.
|
|||
use_cache - 0 or 1 (disabled by default)
|
||||
vaio_hack - alias buffer_top=0x25a800
|
||||
reset_workaround - enable AC97 RESET workaround for some laptops
|
||||
reset_workaround2 - enable extended AC97 RESET workaround for some
|
||||
other laptops
|
||||
|
||||
Module supports autoprobe and multiple chips (max 8).
|
||||
This module supports one chip and autoprobe.
|
||||
|
||||
The power-management is supported.
|
||||
|
||||
|
@ -980,6 +986,11 @@ Prior to version 0.9.0rc4 options had a 'snd_' prefix. This was removed.
|
|||
workaround is enabled automatically. For other laptops with a
|
||||
hard freeze, you can try reset_workaround=1 option.
|
||||
|
||||
Note: Dell Latitude CSx laptops have another problem regarding
|
||||
AC97 RESET. On these laptops, reset_workaround2 option is
|
||||
turned on as default. This option is worth to try if the
|
||||
previous reset_workaround option doesn't help.
|
||||
|
||||
Note: This driver is really crappy. It's a porting from the
|
||||
OSS driver, which is a result of black-magic reverse engineering.
|
||||
The detection of codec will fail if the driver is loaded *after*
|
||||
|
@ -1310,7 +1321,7 @@ Prior to version 0.9.0rc4 options had a 'snd_' prefix. This was removed.
|
|||
ac97_quirk - AC'97 workaround for strange hardware
|
||||
See "AC97 Quirk Option" section below.
|
||||
|
||||
Module supports autoprobe and multiple bus-master chips (max 8).
|
||||
This module supports one chip and autoprobe.
|
||||
|
||||
Note: on some SMP motherboards like MSI 694D the interrupts might
|
||||
not be generated properly. In such a case, please try to
|
||||
|
@ -1352,7 +1363,7 @@ Prior to version 0.9.0rc4 options had a 'snd_' prefix. This was removed.
|
|||
|
||||
ac97_clock - AC'97 codec clock base (default 48000Hz)
|
||||
|
||||
Module supports up to 8 cards.
|
||||
This module supports one card and autoprobe.
|
||||
|
||||
Note: The default index value of this module is -2, i.e. the first
|
||||
slot is excluded.
|
||||
|
|
|
@ -18,8 +18,8 @@
|
|||
</affiliation>
|
||||
</author>
|
||||
|
||||
<date>March 6, 2005</date>
|
||||
<edition>0.3.4</edition>
|
||||
<date>October 6, 2005</date>
|
||||
<edition>0.3.5</edition>
|
||||
|
||||
<abstract>
|
||||
<para>
|
||||
|
@ -30,7 +30,7 @@
|
|||
|
||||
<legalnotice>
|
||||
<para>
|
||||
Copyright (c) 2002-2004 Takashi Iwai <email>tiwai@suse.de</email>
|
||||
Copyright (c) 2002-2005 Takashi Iwai <email>tiwai@suse.de</email>
|
||||
</para>
|
||||
|
||||
<para>
|
||||
|
@ -1433,25 +1433,10 @@
|
|||
<informalexample>
|
||||
<programlisting>
|
||||
<![CDATA[
|
||||
if (chip->res_port) {
|
||||
release_resource(chip->res_port);
|
||||
kfree_nocheck(chip->res_port);
|
||||
}
|
||||
release_and_free_resource(chip->res_port);
|
||||
]]>
|
||||
</programlisting>
|
||||
</informalexample>
|
||||
|
||||
As you can see, the resource pointer is also to be freed
|
||||
via <function>kfree_nocheck()</function> after
|
||||
<function>release_resource()</function> is called. You
|
||||
cannot use <function>kfree()</function> here, because on ALSA,
|
||||
<function>kfree()</function> may be a wrapper to its own
|
||||
allocator with the memory debugging. Since the resource pointer
|
||||
is allocated externally outside the ALSA, it must be released
|
||||
via the native
|
||||
<function>kfree()</function>.
|
||||
<function>kfree_nocheck()</function> is used for that; it calls
|
||||
the native <function>kfree()</function> without wrapper.
|
||||
</para>
|
||||
|
||||
<para>
|
||||
|
@ -2190,8 +2175,7 @@ struct _snd_pcm_runtime {
|
|||
unsigned int rate_den;
|
||||
|
||||
/* -- SW params -- */
|
||||
int tstamp_timespec; /* use timeval (0) or timespec (1) */
|
||||
snd_pcm_tstamp_t tstamp_mode; /* mmap timestamp is updated */
|
||||
struct timespec tstamp_mode; /* mmap timestamp is updated */
|
||||
unsigned int period_step;
|
||||
unsigned int sleep_min; /* min ticks to sleep */
|
||||
snd_pcm_uframes_t xfer_align; /* xfer size need to be a multiple */
|
||||
|
@ -3709,8 +3693,7 @@ struct _snd_pcm_runtime {
|
|||
<para>
|
||||
Here, the chip instance is retrieved via
|
||||
<function>snd_kcontrol_chip()</function> macro. This macro
|
||||
converts from kcontrol->private_data to the type defined by
|
||||
<type>chip_t</type>. The
|
||||
just accesses to kcontrol->private_data. The
|
||||
kcontrol->private_data field is
|
||||
given as the argument of <function>snd_ctl_new()</function>
|
||||
(see the later subsection
|
||||
|
@ -5998,32 +5981,23 @@ struct _snd_pcm_runtime {
|
|||
The first argument is the expression to evaluate, and the
|
||||
second argument is the action if it fails. When
|
||||
<constant>CONFIG_SND_DEBUG</constant>, is set, it will show an
|
||||
error message such as <computeroutput>BUG? (xxx) (called from
|
||||
yyy)</computeroutput>. When no debug flag is set, this is
|
||||
ignored.
|
||||
error message such as <computeroutput>BUG? (xxx)</computeroutput>
|
||||
together with stack trace.
|
||||
</para>
|
||||
</section>
|
||||
|
||||
<section id="useful-functions-snd-runtime-check">
|
||||
<title><function>snd_runtime_check()</function></title>
|
||||
<para>
|
||||
This macro is quite similar with
|
||||
<function>snd_assert()</function>. Unlike
|
||||
<function>snd_assert()</function>, the expression is always
|
||||
evaluated regardless of
|
||||
<constant>CONFIG_SND_DEBUG</constant>. When
|
||||
<constant>CONFIG_SND_DEBUG</constant> is set, the macro will
|
||||
show a message like <computeroutput>ERROR (xx) (called from
|
||||
yyy)</computeroutput>.
|
||||
When no debug flag is set, this macro is ignored.
|
||||
</para>
|
||||
</section>
|
||||
|
||||
<section id="useful-functions-snd-bug">
|
||||
<title><function>snd_BUG()</function></title>
|
||||
<para>
|
||||
It calls <function>snd_assert(0,)</function> -- that is, just
|
||||
prints the error message at the point. It's useful to show that
|
||||
a fatal error happens there.
|
||||
It shows <computeroutput>BUG?</computeroutput> message and
|
||||
stack trace as well as <function>snd_assert</function> at the point.
|
||||
It's useful to show that a fatal error happens there.
|
||||
</para>
|
||||
<para>
|
||||
When no debug flag is set, this macro is ignored.
|
||||
</para>
|
||||
</section>
|
||||
</chapter>
|
||||
|
|
|
@ -41,9 +41,9 @@ sure that bitwise types don't get mixed up (little-endian vs big-endian
|
|||
vs cpu-endian vs whatever), and there the constant "0" really _is_
|
||||
special.
|
||||
|
||||
Modify top-level Makefile to say
|
||||
Use
|
||||
|
||||
CHECK = sparse -Wbitwise
|
||||
make C=[12] CF=-Wbitwise
|
||||
|
||||
or you don't get any checking at all.
|
||||
|
||||
|
|
|
@ -27,9 +27,9 @@ information out of a register+stack dump printed by the kernel on
|
|||
protection faults (so-called "kernel oops").
|
||||
|
||||
If you run into some kind of deadlock, you can try to dump a call trace
|
||||
for each process using sysrq-t (see Documentation/sysrq.txt). ksymoops
|
||||
will translate these dumps into kernel symbols too. This way it is
|
||||
possible to figure where *exactly* some process in "D" state is stuck.
|
||||
for each process using sysrq-t (see Documentation/sysrq.txt).
|
||||
This way it is possible to figure where *exactly* some process in "D"
|
||||
state is stuck.
|
||||
|
||||
I've seen reports that bttv 0.7.x crashes whereas 0.8.x works rock solid
|
||||
for some people. Thus probably a small buglet left somewhere in bttv
|
||||
|
|
|
@ -13,12 +13,13 @@ This optimization is more critical now as bigger and bigger physical memories
|
|||
Users can use the huge page support in Linux kernel by either using the mmap
|
||||
system call or standard SYSv shared memory system calls (shmget, shmat).
|
||||
|
||||
First the Linux kernel needs to be built with CONFIG_HUGETLB_PAGE (present
|
||||
under Processor types and feature) and CONFIG_HUGETLBFS (present under file
|
||||
system option on config menu) config options.
|
||||
First the Linux kernel needs to be built with the CONFIG_HUGETLBFS
|
||||
(present under "File systems") and CONFIG_HUGETLB_PAGE (selected
|
||||
automatically when CONFIG_HUGETLBFS is selected) configuration
|
||||
options.
|
||||
|
||||
The kernel built with hugepage support should show the number of configured
|
||||
hugepages in the system by running the "cat /proc/meminfo" command.
|
||||
hugepages in the system by running the "cat /proc/meminfo" command.
|
||||
|
||||
/proc/meminfo also provides information about the total number of hugetlb
|
||||
pages configured in the kernel. It also displays information about the
|
||||
|
@ -38,19 +39,19 @@ in the kernel.
|
|||
|
||||
/proc/sys/vm/nr_hugepages indicates the current number of configured hugetlb
|
||||
pages in the kernel. Super user can dynamically request more (or free some
|
||||
pre-configured) hugepages.
|
||||
The allocation( or deallocation) of hugetlb pages is posible only if there are
|
||||
pre-configured) hugepages.
|
||||
The allocation (or deallocation) of hugetlb pages is possible only if there are
|
||||
enough physically contiguous free pages in system (freeing of hugepages is
|
||||
possible only if there are enough hugetlb pages free that can be transfered
|
||||
possible only if there are enough hugetlb pages free that can be transfered
|
||||
back to regular memory pool).
|
||||
|
||||
Pages that are used as hugetlb pages are reserved inside the kernel and can
|
||||
not be used for other purposes.
|
||||
not be used for other purposes.
|
||||
|
||||
Once the kernel with Hugetlb page support is built and running, a user can
|
||||
use either the mmap system call or shared memory system calls to start using
|
||||
the huge pages. It is required that the system administrator preallocate
|
||||
enough memory for huge page purposes.
|
||||
enough memory for huge page purposes.
|
||||
|
||||
Use the following command to dynamically allocate/deallocate hugepages:
|
||||
|
||||
|
@ -80,9 +81,9 @@ memory (huge pages) allowed for that filesystem (/mnt/huge). The size is
|
|||
rounded down to HPAGE_SIZE. The option nr_inode sets the maximum number of
|
||||
inodes that /mnt/huge can use. If the size or nr_inode options are not
|
||||
provided on command line then no limits are set. For size and nr_inodes
|
||||
options, you can use [G|g]/[M|m]/[K|k] to represent giga/mega/kilo. For
|
||||
example, size=2K has the same meaning as size=2048. An example is given at
|
||||
the end of this document.
|
||||
options, you can use [G|g]/[M|m]/[K|k] to represent giga/mega/kilo. For
|
||||
example, size=2K has the same meaning as size=2048. An example is given at
|
||||
the end of this document.
|
||||
|
||||
read and write system calls are not supported on files that reside on hugetlb
|
||||
file systems.
|
||||
|
|
32
MAINTAINERS
32
MAINTAINERS
|
@ -1077,6 +1077,26 @@ P: Jaroslav Kysela
|
|||
M: perex@suse.cz
|
||||
S: Maintained
|
||||
|
||||
HPET: High Precision Event Timers driver (hpet.c)
|
||||
P: Clemens Ladisch
|
||||
M: clemens@ladisch.de
|
||||
S: Maintained
|
||||
|
||||
HPET: i386
|
||||
P: Venkatesh Pallipadi (Venki)
|
||||
M: venkatesh.pallipadi@intel.com
|
||||
S: Maintained
|
||||
|
||||
HPET: x86_64
|
||||
P: Andi Kleen and Vojtech Pavlik
|
||||
M: ak@muc.de and vojtech@suse.cz
|
||||
S: Maintained
|
||||
|
||||
HPET: ACPI hpet.c
|
||||
P: Bob Picco
|
||||
M: bob.picco@hp.com
|
||||
S: Maintained
|
||||
|
||||
HPFS FILESYSTEM
|
||||
P: Mikulas Patocka
|
||||
M: mikulas@artax.karlin.mff.cuni.cz
|
||||
|
@ -2051,6 +2071,12 @@ P: Matt Mackall
|
|||
M: mpm@selenic.com
|
||||
S: Maintained
|
||||
|
||||
RAPIDIO SUBSYSTEM
|
||||
P: Matt Porter
|
||||
M: mporter@kernel.crashing.org
|
||||
L: linux-kernel@vger.kernel.org
|
||||
S: Maintained
|
||||
|
||||
REAL TIME CLOCK DRIVER
|
||||
P: Paul Gortmaker
|
||||
M: p_gortmaker@yahoo.com
|
||||
|
@ -2455,10 +2481,10 @@ L: linux-kernel@vger.kernel.org
|
|||
S: Maintained
|
||||
|
||||
TRIVIAL PATCHES
|
||||
P: Rusty Russell
|
||||
M: trivial@rustcorp.com.au
|
||||
P: Adrian Bunk
|
||||
M: trivial@kernel.org
|
||||
L: linux-kernel@vger.kernel.org
|
||||
W: http://www.kernel.org/pub/linux/kernel/people/rusty/trivial/
|
||||
W: http://www.kernel.org/pub/linux/kernel/people/bunk/trivial/
|
||||
S: Maintained
|
||||
|
||||
TMS380 TOKEN-RING NETWORK DRIVER
|
||||
|
|
2
Makefile
2
Makefile
|
@ -583,7 +583,7 @@ export MODLIB
|
|||
|
||||
|
||||
ifeq ($(KBUILD_EXTMOD),)
|
||||
core-y += kernel/ mm/ fs/ ipc/ security/ crypto/
|
||||
core-y += kernel/ mm/ fs/ ipc/ security/ crypto/ block/
|
||||
|
||||
vmlinux-dirs := $(patsubst %/,%,$(filter %/, $(init-y) $(init-m) \
|
||||
$(core-y) $(core-m) $(drivers-y) $(drivers-m) \
|
||||
|
|
|
@ -324,7 +324,7 @@ menu "Kernel Features"
|
|||
|
||||
config SMP
|
||||
bool "Symmetric Multi-Processing (EXPERIMENTAL)"
|
||||
depends on EXPERIMENTAL && BROKEN #&& n
|
||||
depends on EXPERIMENTAL && REALVIEW_MPCORE
|
||||
help
|
||||
This enables support for systems with more than one CPU. If you have
|
||||
a system with only one CPU, like most personal computers, say N. If
|
||||
|
@ -585,7 +585,7 @@ config FPE_NWFPE
|
|||
|
||||
config FPE_NWFPE_XP
|
||||
bool "Support extended precision"
|
||||
depends on FPE_NWFPE && !CPU_BIG_ENDIAN
|
||||
depends on FPE_NWFPE
|
||||
help
|
||||
Say Y to include 80-bit support in the kernel floating-point
|
||||
emulator. Otherwise, only 32 and 64-bit support is compiled in.
|
||||
|
|
|
@ -648,7 +648,7 @@ static int ptrace_setwmmxregs(struct task_struct *tsk, void __user *ufp)
|
|||
|
||||
#endif
|
||||
|
||||
static int do_ptrace(int request, struct task_struct *child, long addr, long data)
|
||||
long arch_ptrace(struct task_struct *child, long request, long addr, long data)
|
||||
{
|
||||
unsigned long tmp;
|
||||
int ret;
|
||||
|
@ -782,53 +782,6 @@ static int do_ptrace(int request, struct task_struct *child, long addr, long dat
|
|||
return ret;
|
||||
}
|
||||
|
||||
asmlinkage long sys_ptrace(long request, long pid, long addr, long data)
|
||||
{
|
||||
struct task_struct *child;
|
||||
int ret;
|
||||
|
||||
lock_kernel();
|
||||
ret = -EPERM;
|
||||
if (request == PTRACE_TRACEME) {
|
||||
/* are we already being traced? */
|
||||
if (current->ptrace & PT_PTRACED)
|
||||
goto out;
|
||||
ret = security_ptrace(current->parent, current);
|
||||
if (ret)
|
||||
goto out;
|
||||
/* set the ptrace bit in the process flags. */
|
||||
current->ptrace |= PT_PTRACED;
|
||||
ret = 0;
|
||||
goto out;
|
||||
}
|
||||
ret = -ESRCH;
|
||||
read_lock(&tasklist_lock);
|
||||
child = find_task_by_pid(pid);
|
||||
if (child)
|
||||
get_task_struct(child);
|
||||
read_unlock(&tasklist_lock);
|
||||
if (!child)
|
||||
goto out;
|
||||
|
||||
ret = -EPERM;
|
||||
if (pid == 1) /* you may not mess with init */
|
||||
goto out_tsk;
|
||||
|
||||
if (request == PTRACE_ATTACH) {
|
||||
ret = ptrace_attach(child);
|
||||
goto out_tsk;
|
||||
}
|
||||
ret = ptrace_check_attach(child, request == PTRACE_KILL);
|
||||
if (ret == 0)
|
||||
ret = do_ptrace(request, child, addr, data);
|
||||
|
||||
out_tsk:
|
||||
put_task_struct(child);
|
||||
out:
|
||||
unlock_kernel();
|
||||
return ret;
|
||||
}
|
||||
|
||||
asmlinkage void syscall_trace(int why, struct pt_regs *regs)
|
||||
{
|
||||
unsigned long ip;
|
||||
|
|
|
@ -14,6 +14,7 @@
|
|||
#include <linux/list.h>
|
||||
#include <linux/errno.h>
|
||||
#include <linux/err.h>
|
||||
#include <linux/string.h>
|
||||
|
||||
#include <asm/semaphore.h>
|
||||
#include <asm/hardware/clock.h>
|
||||
|
|
|
@ -25,6 +25,7 @@
|
|||
#include <asm/hardware.h>
|
||||
#include <asm/io.h>
|
||||
#include <asm/sizes.h>
|
||||
#include <asm/page.h>
|
||||
|
||||
#include <asm/mach/map.h>
|
||||
|
||||
|
|
|
@ -420,8 +420,7 @@ static int impd1_probe(struct lm_device *dev)
|
|||
free_impd1:
|
||||
if (impd1 && impd1->base)
|
||||
iounmap(impd1->base);
|
||||
if (impd1)
|
||||
kfree(impd1);
|
||||
kfree(impd1);
|
||||
release_lm:
|
||||
release_mem_region(dev->resource.start, SZ_4K);
|
||||
return ret;
|
||||
|
|
|
@ -84,63 +84,54 @@ static struct map_desc ixp2000_io_desc[] __initdata = {
|
|||
.virtual = IXP2000_CAP_VIRT_BASE,
|
||||
.pfn = __phys_to_pfn(IXP2000_CAP_PHYS_BASE),
|
||||
.length = IXP2000_CAP_SIZE,
|
||||
.type = MT_DEVICE
|
||||
.type = MT_IXP2000_DEVICE,
|
||||
}, {
|
||||
.virtual = IXP2000_INTCTL_VIRT_BASE,
|
||||
.pfn = __phys_to_pfn(IXP2000_INTCTL_PHYS_BASE),
|
||||
.length = IXP2000_INTCTL_SIZE,
|
||||
.type = MT_DEVICE
|
||||
.type = MT_IXP2000_DEVICE,
|
||||
}, {
|
||||
.virtual = IXP2000_PCI_CREG_VIRT_BASE,
|
||||
.pfn = __phys_to_pfn(IXP2000_PCI_CREG_PHYS_BASE),
|
||||
.length = IXP2000_PCI_CREG_SIZE,
|
||||
.type = MT_DEVICE
|
||||
.type = MT_IXP2000_DEVICE,
|
||||
}, {
|
||||
.virtual = IXP2000_PCI_CSR_VIRT_BASE,
|
||||
.pfn = __phys_to_pfn(IXP2000_PCI_CSR_PHYS_BASE),
|
||||
.length = IXP2000_PCI_CSR_SIZE,
|
||||
.type = MT_DEVICE
|
||||
.type = MT_IXP2000_DEVICE,
|
||||
}, {
|
||||
.virtual = IXP2000_MSF_VIRT_BASE,
|
||||
.pfn = __phys_to_pfn(IXP2000_MSF_PHYS_BASE),
|
||||
.length = IXP2000_MSF_SIZE,
|
||||
.type = MT_DEVICE
|
||||
.type = MT_IXP2000_DEVICE,
|
||||
}, {
|
||||
.virtual = IXP2000_PCI_IO_VIRT_BASE,
|
||||
.pfn = __phys_to_pfn(IXP2000_PCI_IO_PHYS_BASE),
|
||||
.length = IXP2000_PCI_IO_SIZE,
|
||||
.type = MT_DEVICE
|
||||
.type = MT_IXP2000_DEVICE,
|
||||
}, {
|
||||
.virtual = IXP2000_PCI_CFG0_VIRT_BASE,
|
||||
.pfn = __phys_to_pfn(IXP2000_PCI_CFG0_PHYS_BASE),
|
||||
.length = IXP2000_PCI_CFG0_SIZE,
|
||||
.type = MT_DEVICE
|
||||
.type = MT_IXP2000_DEVICE,
|
||||
}, {
|
||||
.virtual = IXP2000_PCI_CFG1_VIRT_BASE,
|
||||
.pfn = __phys_to_pfn(IXP2000_PCI_CFG1_PHYS_BASE),
|
||||
.length = IXP2000_PCI_CFG1_SIZE,
|
||||
.type = MT_DEVICE
|
||||
.type = MT_IXP2000_DEVICE,
|
||||
}
|
||||
};
|
||||
|
||||
void __init ixp2000_map_io(void)
|
||||
{
|
||||
extern unsigned int processor_id;
|
||||
|
||||
/*
|
||||
* On IXP2400 CPUs we need to use MT_IXP2000_DEVICE for
|
||||
* tweaking the PMDs so XCB=101. On IXP2800s we use the normal
|
||||
* PMD flags.
|
||||
* On IXP2400 CPUs we need to use MT_IXP2000_DEVICE so that
|
||||
* XCB=101 (to avoid triggering erratum #66), and given that
|
||||
* this mode speeds up I/O accesses and we have write buffer
|
||||
* flushes in the right places anyway, it doesn't hurt to use
|
||||
* XCB=101 for all IXP2000s.
|
||||
*/
|
||||
if ((processor_id & 0xfffffff0) == 0x69054190) {
|
||||
int i;
|
||||
|
||||
printk(KERN_INFO "Enabling IXP2400 erratum #66 workaround\n");
|
||||
|
||||
for(i=0;i<ARRAY_SIZE(ixp2000_io_desc);i++)
|
||||
ixp2000_io_desc[i].type = MT_IXP2000_DEVICE;
|
||||
}
|
||||
|
||||
iotable_init(ixp2000_io_desc, ARRAY_SIZE(ixp2000_io_desc));
|
||||
|
||||
/* Set slowport to 8-bit mode. */
|
||||
|
|
|
@ -19,6 +19,7 @@
|
|||
#include <linux/kernel.h>
|
||||
#include <linux/platform_device.h>
|
||||
#include <linux/module.h>
|
||||
#include <linux/string.h>
|
||||
#include <asm/arch/akita.h>
|
||||
#include <asm/arch/corgi.h>
|
||||
#include <asm/arch/hardware.h>
|
||||
|
|
|
@ -8,4 +8,13 @@ config MACH_REALVIEW_EB
|
|||
help
|
||||
Include support for the ARM(R) RealView Emulation Baseboard platform.
|
||||
|
||||
config REALVIEW_MPCORE
|
||||
bool "Support MPcore tile"
|
||||
depends on MACH_REALVIEW_EB
|
||||
help
|
||||
Enable support for the MPCore tile on the Realview platform.
|
||||
Since there are device address and interrupt differences, a
|
||||
kernel built with this option enabled is not compatible with
|
||||
other tiles.
|
||||
|
||||
endmenu
|
||||
|
|
|
@ -4,3 +4,4 @@
|
|||
|
||||
obj-y := core.o clock.o
|
||||
obj-$(CONFIG_MACH_REALVIEW_EB) += realview_eb.o
|
||||
obj-$(CONFIG_SMP) += platsmp.o headsmp.o
|
||||
|
|
|
@ -23,6 +23,7 @@
|
|||
#define __ASM_ARCH_REALVIEW_H
|
||||
|
||||
#include <asm/hardware/amba.h>
|
||||
#include <asm/leds.h>
|
||||
#include <asm/io.h>
|
||||
|
||||
#define __io_address(n) __io(IO_ADDRESS(n))
|
||||
|
|
|
@ -0,0 +1,39 @@
|
|||
/*
|
||||
* linux/arch/arm/mach-realview/headsmp.S
|
||||
*
|
||||
* Copyright (c) 2003 ARM Limited
|
||||
* All Rights Reserved
|
||||
*
|
||||
* This program is free software; you can redistribute it and/or modify
|
||||
* it under the terms of the GNU General Public License version 2 as
|
||||
* published by the Free Software Foundation.
|
||||
*/
|
||||
#include <linux/linkage.h>
|
||||
#include <linux/init.h>
|
||||
|
||||
__INIT
|
||||
|
||||
/*
|
||||
* Realview specific entry point for secondary CPUs. This provides
|
||||
* a "holding pen" into which all secondary cores are held until we're
|
||||
* ready for them to initialise.
|
||||
*/
|
||||
ENTRY(realview_secondary_startup)
|
||||
mrc p15, 0, r0, c0, c0, 5
|
||||
and r0, r0, #15
|
||||
adr r4, 1f
|
||||
ldmia r4, {r5, r6}
|
||||
sub r4, r4, r5
|
||||
add r6, r6, r4
|
||||
pen: ldr r7, [r6]
|
||||
cmp r7, r0
|
||||
bne pen
|
||||
|
||||
/*
|
||||
* we've been released from the holding pen: secondary_stack
|
||||
* should now contain the SVC stack for this core
|
||||
*/
|
||||
b secondary_startup
|
||||
|
||||
1: .long .
|
||||
.long pen_release
|
|
@ -0,0 +1,195 @@
|
|||
/*
|
||||
* linux/arch/arm/mach-realview/platsmp.c
|
||||
*
|
||||
* Copyright (C) 2002 ARM Ltd.
|
||||
* All Rights Reserved
|
||||
*
|
||||
* This program is free software; you can redistribute it and/or modify
|
||||
* it under the terms of the GNU General Public License version 2 as
|
||||
* published by the Free Software Foundation.
|
||||
*/
|
||||
#include <linux/init.h>
|
||||
#include <linux/errno.h>
|
||||
#include <linux/delay.h>
|
||||
#include <linux/device.h>
|
||||
#include <linux/smp.h>
|
||||
|
||||
#include <asm/cacheflush.h>
|
||||
#include <asm/hardware/arm_scu.h>
|
||||
#include <asm/hardware.h>
|
||||
|
||||
#include "core.h"
|
||||
|
||||
extern void realview_secondary_startup(void);
|
||||
|
||||
/*
|
||||
* control for which core is the next to come out of the secondary
|
||||
* boot "holding pen"
|
||||
*/
|
||||
volatile int __cpuinitdata pen_release = -1;
|
||||
|
||||
static unsigned int __init get_core_count(void)
|
||||
{
|
||||
unsigned int ncores;
|
||||
|
||||
ncores = __raw_readl(IO_ADDRESS(REALVIEW_MPCORE_SCU_BASE) + SCU_CONFIG);
|
||||
|
||||
return (ncores & 0x03) + 1;
|
||||
}
|
||||
|
||||
static DEFINE_SPINLOCK(boot_lock);
|
||||
|
||||
void __cpuinit platform_secondary_init(unsigned int cpu)
|
||||
{
|
||||
/*
|
||||
* the primary core may have used a "cross call" soft interrupt
|
||||
* to get this processor out of WFI in the BootMonitor - make
|
||||
* sure that we are no longer being sent this soft interrupt
|
||||
*/
|
||||
smp_cross_call_done(cpumask_of_cpu(cpu));
|
||||
|
||||
/*
|
||||
* if any interrupts are already enabled for the primary
|
||||
* core (e.g. timer irq), then they will not have been enabled
|
||||
* for us: do so
|
||||
*/
|
||||
gic_cpu_init(__io_address(REALVIEW_GIC_CPU_BASE));
|
||||
|
||||
/*
|
||||
* let the primary processor know we're out of the
|
||||
* pen, then head off into the C entry point
|
||||
*/
|
||||
pen_release = -1;
|
||||
|
||||
/*
|
||||
* Synchronise with the boot thread.
|
||||
*/
|
||||
spin_lock(&boot_lock);
|
||||
spin_unlock(&boot_lock);
|
||||
}
|
||||
|
||||
int __cpuinit boot_secondary(unsigned int cpu, struct task_struct *idle)
|
||||
{
|
||||
unsigned long timeout;
|
||||
|
||||
/*
|
||||
* set synchronisation state between this boot processor
|
||||
* and the secondary one
|
||||
*/
|
||||
spin_lock(&boot_lock);
|
||||
|
||||
/*
|
||||
* The secondary processor is waiting to be released from
|
||||
* the holding pen - release it, then wait for it to flag
|
||||
* that it has been released by resetting pen_release.
|
||||
*
|
||||
* Note that "pen_release" is the hardware CPU ID, whereas
|
||||
* "cpu" is Linux's internal ID.
|
||||
*/
|
||||
pen_release = cpu;
|
||||
flush_cache_all();
|
||||
|
||||
/*
|
||||
* XXX
|
||||
*
|
||||
* This is a later addition to the booting protocol: the
|
||||
* bootMonitor now puts secondary cores into WFI, so
|
||||
* poke_milo() no longer gets the cores moving; we need
|
||||
* to send a soft interrupt to wake the secondary core.
|
||||
* Use smp_cross_call() for this, since there's little
|
||||
* point duplicating the code here
|
||||
*/
|
||||
smp_cross_call(cpumask_of_cpu(cpu));
|
||||
|
||||
timeout = jiffies + (1 * HZ);
|
||||
while (time_before(jiffies, timeout)) {
|
||||
if (pen_release == -1)
|
||||
break;
|
||||
|
||||
udelay(10);
|
||||
}
|
||||
|
||||
/*
|
||||
* now the secondary core is starting up let it run its
|
||||
* calibrations, then wait for it to finish
|
||||
*/
|
||||
spin_unlock(&boot_lock);
|
||||
|
||||
return pen_release != -1 ? -ENOSYS : 0;
|
||||
}
|
||||
|
||||
static void __init poke_milo(void)
|
||||
{
|
||||
extern void secondary_startup(void);
|
||||
|
||||
/* nobody is to be released from the pen yet */
|
||||
pen_release = -1;
|
||||
|
||||
/*
|
||||
* write the address of secondary startup into the system-wide
|
||||
* flags register, then clear the bottom two bits, which is what
|
||||
* BootMonitor is waiting for
|
||||
*/
|
||||
#if 1
|
||||
#define REALVIEW_SYS_FLAGSS_OFFSET 0x30
|
||||
__raw_writel(virt_to_phys(realview_secondary_startup),
|
||||
(IO_ADDRESS(REALVIEW_SYS_BASE) +
|
||||
REALVIEW_SYS_FLAGSS_OFFSET));
|
||||
#define REALVIEW_SYS_FLAGSC_OFFSET 0x34
|
||||
__raw_writel(3,
|
||||
(IO_ADDRESS(REALVIEW_SYS_BASE) +
|
||||
REALVIEW_SYS_FLAGSC_OFFSET));
|
||||
#endif
|
||||
|
||||
mb();
|
||||
}
|
||||
|
||||
void __init smp_prepare_cpus(unsigned int max_cpus)
|
||||
{
|
||||
unsigned int ncores = get_core_count();
|
||||
unsigned int cpu = smp_processor_id();
|
||||
int i;
|
||||
|
||||
/* sanity check */
|
||||
if (ncores == 0) {
|
||||
printk(KERN_ERR
|
||||
"Realview: strange CM count of 0? Default to 1\n");
|
||||
|
||||
ncores = 1;
|
||||
}
|
||||
|
||||
if (ncores > NR_CPUS) {
|
||||
printk(KERN_WARNING
|
||||
"Realview: no. of cores (%d) greater than configured "
|
||||
"maximum of %d - clipping\n",
|
||||
ncores, NR_CPUS);
|
||||
ncores = NR_CPUS;
|
||||
}
|
||||
|
||||
smp_store_cpu_info(cpu);
|
||||
|
||||
/*
|
||||
* are we trying to boot more cores than exist?
|
||||
*/
|
||||
if (max_cpus > ncores)
|
||||
max_cpus = ncores;
|
||||
|
||||
/*
|
||||
* Initialise the possible/present maps.
|
||||
* cpu_possible_map describes the set of CPUs which may be present
|
||||
* cpu_present_map describes the set of CPUs populated
|
||||
*/
|
||||
for (i = 0; i < max_cpus; i++) {
|
||||
cpu_set(i, cpu_possible_map);
|
||||
cpu_set(i, cpu_present_map);
|
||||
}
|
||||
|
||||
/*
|
||||
* Do we need any more CPUs? If so, then let them know where
|
||||
* to start. Note that, on modern versions of MILO, the "poke"
|
||||
* doesn't actually do anything until each individual core is
|
||||
* sent a soft interrupt to get it out of WFI
|
||||
*/
|
||||
if (max_cpus > 1)
|
||||
poke_milo();
|
||||
}
|
|
@ -136,6 +136,11 @@ static struct amba_device *amba_devs[] __initdata = {
|
|||
|
||||
static void __init gic_init_irq(void)
|
||||
{
|
||||
#ifdef CONFIG_REALVIEW_MPCORE
|
||||
writel(0x0000a05f, __io_address(REALVIEW_SYS_LOCK));
|
||||
writel(0x008003c0, __io_address(REALVIEW_SYS_BASE) + 0xd8);
|
||||
writel(0x00000000, __io_address(REALVIEW_SYS_LOCK));
|
||||
#endif
|
||||
gic_dist_init(__io_address(REALVIEW_GIC_DIST_BASE));
|
||||
gic_cpu_init(__io_address(REALVIEW_GIC_CPU_BASE));
|
||||
}
|
||||
|
|
|
@ -354,7 +354,7 @@ void __init build_mem_type_table(void)
|
|||
{
|
||||
struct cachepolicy *cp;
|
||||
unsigned int cr = get_cr();
|
||||
unsigned int user_pgprot;
|
||||
unsigned int user_pgprot, kern_pgprot;
|
||||
int cpu_arch = cpu_architecture();
|
||||
int i;
|
||||
|
||||
|
@ -381,7 +381,7 @@ void __init build_mem_type_table(void)
|
|||
}
|
||||
|
||||
cp = &cache_policies[cachepolicy];
|
||||
user_pgprot = cp->pte;
|
||||
kern_pgprot = user_pgprot = cp->pte;
|
||||
|
||||
/*
|
||||
* ARMv6 and above have extended page tables.
|
||||
|
@ -393,6 +393,7 @@ void __init build_mem_type_table(void)
|
|||
*/
|
||||
mem_types[MT_MEMORY].prot_sect &= ~PMD_BIT4;
|
||||
mem_types[MT_ROM].prot_sect &= ~PMD_BIT4;
|
||||
|
||||
/*
|
||||
* Mark cache clean areas and XIP ROM read only
|
||||
* from SVC mode and no access from userspace.
|
||||
|
@ -412,32 +413,47 @@ void __init build_mem_type_table(void)
|
|||
* (iow, non-global)
|
||||
*/
|
||||
user_pgprot |= L_PTE_ASID;
|
||||
|
||||
#ifdef CONFIG_SMP
|
||||
/*
|
||||
* Mark memory with the "shared" attribute for SMP systems
|
||||
*/
|
||||
user_pgprot |= L_PTE_SHARED;
|
||||
kern_pgprot |= L_PTE_SHARED;
|
||||
mem_types[MT_MEMORY].prot_sect |= PMD_SECT_S;
|
||||
#endif
|
||||
}
|
||||
|
||||
for (i = 0; i < 16; i++) {
|
||||
unsigned long v = pgprot_val(protection_map[i]);
|
||||
v = (v & ~(L_PTE_BUFFERABLE|L_PTE_CACHEABLE)) | user_pgprot;
|
||||
protection_map[i] = __pgprot(v);
|
||||
}
|
||||
|
||||
mem_types[MT_LOW_VECTORS].prot_pte |= kern_pgprot;
|
||||
mem_types[MT_HIGH_VECTORS].prot_pte |= kern_pgprot;
|
||||
|
||||
if (cpu_arch >= CPU_ARCH_ARMv5) {
|
||||
mem_types[MT_LOW_VECTORS].prot_pte |= cp->pte & PTE_CACHEABLE;
|
||||
mem_types[MT_HIGH_VECTORS].prot_pte |= cp->pte & PTE_CACHEABLE;
|
||||
#ifndef CONFIG_SMP
|
||||
/*
|
||||
* Only use write-through for non-SMP systems
|
||||
*/
|
||||
mem_types[MT_LOW_VECTORS].prot_pte &= ~L_PTE_BUFFERABLE;
|
||||
mem_types[MT_HIGH_VECTORS].prot_pte &= ~L_PTE_BUFFERABLE;
|
||||
#endif
|
||||
} else {
|
||||
mem_types[MT_LOW_VECTORS].prot_pte |= cp->pte;
|
||||
mem_types[MT_HIGH_VECTORS].prot_pte |= cp->pte;
|
||||
mem_types[MT_MINICLEAN].prot_sect &= ~PMD_SECT_TEX(1);
|
||||
}
|
||||
|
||||
pgprot_kernel = __pgprot(L_PTE_PRESENT | L_PTE_YOUNG |
|
||||
L_PTE_DIRTY | L_PTE_WRITE |
|
||||
L_PTE_EXEC | kern_pgprot);
|
||||
|
||||
mem_types[MT_LOW_VECTORS].prot_l1 |= ecc_mask;
|
||||
mem_types[MT_HIGH_VECTORS].prot_l1 |= ecc_mask;
|
||||
mem_types[MT_MEMORY].prot_sect |= ecc_mask | cp->pmd;
|
||||
mem_types[MT_ROM].prot_sect |= cp->pmd;
|
||||
|
||||
for (i = 0; i < 16; i++) {
|
||||
unsigned long v = pgprot_val(protection_map[i]);
|
||||
v = (v & ~(PTE_BUFFERABLE|PTE_CACHEABLE)) | user_pgprot;
|
||||
protection_map[i] = __pgprot(v);
|
||||
}
|
||||
|
||||
pgprot_kernel = __pgprot(L_PTE_PRESENT | L_PTE_YOUNG |
|
||||
L_PTE_DIRTY | L_PTE_WRITE |
|
||||
L_PTE_EXEC | cp->pte);
|
||||
|
||||
switch (cp->pmd) {
|
||||
case PMD_SECT_WT:
|
||||
mem_types[MT_CACHECLEAN].prot_sect |= PMD_SECT_WT;
|
||||
|
|
|
@ -12,6 +12,7 @@
|
|||
#include <linux/linkage.h>
|
||||
#include <asm/assembler.h>
|
||||
#include <asm/asm-offsets.h>
|
||||
#include <asm/hardware/arm_scu.h>
|
||||
#include <asm/procinfo.h>
|
||||
#include <asm/pgtable.h>
|
||||
|
||||
|
@ -112,6 +113,9 @@ ENTRY(cpu_v6_dcache_clean_area)
|
|||
ENTRY(cpu_v6_switch_mm)
|
||||
mov r2, #0
|
||||
ldr r1, [r1, #MM_CONTEXT_ID] @ get mm->context.id
|
||||
#ifdef CONFIG_SMP
|
||||
orr r0, r0, #2 @ set shared pgtable
|
||||
#endif
|
||||
mcr p15, 0, r2, c7, c5, 6 @ flush BTAC/BTB
|
||||
mcr p15, 0, r2, c7, c10, 4 @ drain write buffer
|
||||
mcr p15, 0, r0, c2, c0, 0 @ set TTB 0
|
||||
|
@ -140,7 +144,7 @@ ENTRY(cpu_v6_switch_mm)
|
|||
ENTRY(cpu_v6_set_pte)
|
||||
str r1, [r0], #-2048 @ linux version
|
||||
|
||||
bic r2, r1, #0x000007f0
|
||||
bic r2, r1, #0x000003f0
|
||||
bic r2, r2, #0x00000003
|
||||
orr r2, r2, #PTE_EXT_AP0 | 2
|
||||
|
||||
|
@ -191,6 +195,23 @@ cpu_v6_name:
|
|||
* - cache type register is implemented
|
||||
*/
|
||||
__v6_setup:
|
||||
#ifdef CONFIG_SMP
|
||||
/* Set up the SCU on core 0 only */
|
||||
mrc p15, 0, r0, c0, c0, 5 @ CPU core number
|
||||
ands r0, r0, #15
|
||||
moveq r0, #0x10000000 @ SCU_BASE
|
||||
orreq r0, r0, #0x00100000
|
||||
ldreq r5, [r0, #SCU_CTRL]
|
||||
orreq r5, r5, #1
|
||||
streq r5, [r0, #SCU_CTRL]
|
||||
|
||||
#ifndef CONFIG_CPU_DCACHE_DISABLE
|
||||
mrc p15, 0, r0, c1, c0, 1 @ Enable SMP/nAMP mode
|
||||
orr r0, r0, #0x20
|
||||
mcr p15, 0, r0, c1, c0, 1
|
||||
#endif
|
||||
#endif
|
||||
|
||||
mov r0, #0
|
||||
mcr p15, 0, r0, c7, c14, 0 @ clean+invalidate D cache
|
||||
mcr p15, 0, r0, c7, c5, 0 @ invalidate I cache
|
||||
|
@ -198,6 +219,9 @@ __v6_setup:
|
|||
mcr p15, 0, r0, c7, c10, 4 @ drain write buffer
|
||||
mcr p15, 0, r0, c8, c7, 0 @ invalidate I + D TLBs
|
||||
mcr p15, 0, r0, c2, c0, 2 @ TTB control register
|
||||
#ifdef CONFIG_SMP
|
||||
orr r4, r4, #2 @ set shared pgtable
|
||||
#endif
|
||||
mcr p15, 0, r4, c2, c0, 1 @ load TTB1
|
||||
#ifdef CONFIG_VFP
|
||||
mrc p15, 0, r0, c1, c0, 2
|
||||
|
|
|
@ -60,7 +60,7 @@ typedef union tagFPREG {
|
|||
#ifdef CONFIG_FPE_NWFPE_XP
|
||||
floatx80 fExtended;
|
||||
#else
|
||||
int padding[3];
|
||||
u32 padding[3];
|
||||
#endif
|
||||
} FPREG;
|
||||
|
||||
|
|
|
@ -59,8 +59,13 @@ static inline void loadExtended(const unsigned int Fn, const unsigned int __user
|
|||
p = (unsigned int *) &fpa11->fpreg[Fn].fExtended;
|
||||
fpa11->fType[Fn] = typeExtended;
|
||||
get_user(p[0], &pMem[0]); /* sign & exponent */
|
||||
#ifdef __ARMEB__
|
||||
get_user(p[1], &pMem[1]); /* ms bits */
|
||||
get_user(p[2], &pMem[2]); /* ls bits */
|
||||
#else
|
||||
get_user(p[1], &pMem[2]); /* ls bits */
|
||||
get_user(p[2], &pMem[1]); /* ms bits */
|
||||
#endif
|
||||
}
|
||||
#endif
|
||||
|
||||
|
@ -177,8 +182,13 @@ static inline void storeExtended(const unsigned int Fn, unsigned int __user *pMe
|
|||
}
|
||||
|
||||
put_user(val.i[0], &pMem[0]); /* sign & exp */
|
||||
#ifdef __ARMEB__
|
||||
put_user(val.i[1], &pMem[1]); /* msw */
|
||||
put_user(val.i[2], &pMem[2]);
|
||||
#else
|
||||
put_user(val.i[1], &pMem[2]);
|
||||
put_user(val.i[2], &pMem[1]); /* msw */
|
||||
#endif
|
||||
}
|
||||
#endif
|
||||
|
||||
|
|
|
@ -29,14 +29,14 @@
|
|||
|
||||
#ifdef CONFIG_FPE_NWFPE_XP
|
||||
const floatx80 floatx80Constant[] = {
|
||||
{0x0000, 0x0000000000000000ULL}, /* extended 0.0 */
|
||||
{0x3fff, 0x8000000000000000ULL}, /* extended 1.0 */
|
||||
{0x4000, 0x8000000000000000ULL}, /* extended 2.0 */
|
||||
{0x4000, 0xc000000000000000ULL}, /* extended 3.0 */
|
||||
{0x4001, 0x8000000000000000ULL}, /* extended 4.0 */
|
||||
{0x4001, 0xa000000000000000ULL}, /* extended 5.0 */
|
||||
{0x3ffe, 0x8000000000000000ULL}, /* extended 0.5 */
|
||||
{0x4002, 0xa000000000000000ULL} /* extended 10.0 */
|
||||
{ .high = 0x0000, .low = 0x0000000000000000ULL},/* extended 0.0 */
|
||||
{ .high = 0x3fff, .low = 0x8000000000000000ULL},/* extended 1.0 */
|
||||
{ .high = 0x4000, .low = 0x8000000000000000ULL},/* extended 2.0 */
|
||||
{ .high = 0x4000, .low = 0xc000000000000000ULL},/* extended 3.0 */
|
||||
{ .high = 0x4001, .low = 0x8000000000000000ULL},/* extended 4.0 */
|
||||
{ .high = 0x4001, .low = 0xa000000000000000ULL},/* extended 5.0 */
|
||||
{ .high = 0x3ffe, .low = 0x8000000000000000ULL},/* extended 0.5 */
|
||||
{ .high = 0x4002, .low = 0xa000000000000000ULL},/* extended 10.0 */
|
||||
};
|
||||
#endif
|
||||
|
||||
|
|
|
@ -332,6 +332,7 @@ static floatx80 commonNaNToFloatx80( commonNaNT a )
|
|||
|
||||
z.low = LIT64( 0xC000000000000000 ) | ( a.high>>1 );
|
||||
z.high = ( ( (bits16) a.sign )<<15 ) | 0x7FFF;
|
||||
z.__padding = 0;
|
||||
return z;
|
||||
|
||||
}
|
||||
|
|
|
@ -531,6 +531,7 @@ INLINE floatx80 packFloatx80( flag zSign, int32 zExp, bits64 zSig )
|
|||
|
||||
z.low = zSig;
|
||||
z.high = ( ( (bits16) zSign )<<15 ) + zExp;
|
||||
z.__padding = 0;
|
||||
return z;
|
||||
|
||||
}
|
||||
|
@ -2831,6 +2832,7 @@ static floatx80 subFloatx80Sigs( struct roundingData *roundData, floatx80 a, flo
|
|||
roundData->exception |= float_flag_invalid;
|
||||
z.low = floatx80_default_nan_low;
|
||||
z.high = floatx80_default_nan_high;
|
||||
z.__padding = 0;
|
||||
return z;
|
||||
}
|
||||
if ( aExp == 0 ) {
|
||||
|
@ -2950,6 +2952,7 @@ floatx80 floatx80_mul( struct roundingData *roundData, floatx80 a, floatx80 b )
|
|||
roundData->exception |= float_flag_invalid;
|
||||
z.low = floatx80_default_nan_low;
|
||||
z.high = floatx80_default_nan_high;
|
||||
z.__padding = 0;
|
||||
return z;
|
||||
}
|
||||
return packFloatx80( zSign, 0x7FFF, LIT64( 0x8000000000000000 ) );
|
||||
|
@ -3015,6 +3018,7 @@ floatx80 floatx80_div( struct roundingData *roundData, floatx80 a, floatx80 b )
|
|||
roundData->exception |= float_flag_invalid;
|
||||
z.low = floatx80_default_nan_low;
|
||||
z.high = floatx80_default_nan_high;
|
||||
z.__padding = 0;
|
||||
return z;
|
||||
}
|
||||
roundData->exception |= float_flag_divbyzero;
|
||||
|
@ -3093,6 +3097,7 @@ floatx80 floatx80_rem( struct roundingData *roundData, floatx80 a, floatx80 b )
|
|||
roundData->exception |= float_flag_invalid;
|
||||
z.low = floatx80_default_nan_low;
|
||||
z.high = floatx80_default_nan_high;
|
||||
z.__padding = 0;
|
||||
return z;
|
||||
}
|
||||
normalizeFloatx80Subnormal( bSig, &bExp, &bSig );
|
||||
|
@ -3184,6 +3189,7 @@ floatx80 floatx80_sqrt( struct roundingData *roundData, floatx80 a )
|
|||
roundData->exception |= float_flag_invalid;
|
||||
z.low = floatx80_default_nan_low;
|
||||
z.high = floatx80_default_nan_high;
|
||||
z.__padding = 0;
|
||||
return z;
|
||||
}
|
||||
if ( aExp == 0 ) {
|
||||
|
|
|
@ -51,11 +51,17 @@ input or output the `floatx80' type will be defined.
|
|||
Software IEC/IEEE floating-point types.
|
||||
-------------------------------------------------------------------------------
|
||||
*/
|
||||
typedef unsigned long int float32;
|
||||
typedef unsigned long long float64;
|
||||
typedef u32 float32;
|
||||
typedef u64 float64;
|
||||
typedef struct {
|
||||
unsigned short high;
|
||||
unsigned long long low;
|
||||
#ifdef __ARMEB__
|
||||
u16 __padding;
|
||||
u16 high;
|
||||
#else
|
||||
u16 high;
|
||||
u16 __padding;
|
||||
#endif
|
||||
u64 low;
|
||||
} floatx80;
|
||||
|
||||
/*
|
||||
|
|
|
@ -546,7 +546,7 @@ static int ptrace_setfpregs(struct task_struct *tsk, void *ufp)
|
|||
sizeof(struct user_fp)) ? -EFAULT : 0;
|
||||
}
|
||||
|
||||
static int do_ptrace(int request, struct task_struct *child, long addr, long data)
|
||||
long arch_ptrace(struct task_struct *child, long request, long addr, long data)
|
||||
{
|
||||
unsigned long tmp;
|
||||
int ret;
|
||||
|
@ -665,53 +665,6 @@ static int do_ptrace(int request, struct task_struct *child, long addr, long dat
|
|||
return ret;
|
||||
}
|
||||
|
||||
asmlinkage long sys_ptrace(long request, long pid, long addr, long data)
|
||||
{
|
||||
struct task_struct *child;
|
||||
int ret;
|
||||
|
||||
lock_kernel();
|
||||
ret = -EPERM;
|
||||
if (request == PTRACE_TRACEME) {
|
||||
/* are we already being traced? */
|
||||
if (current->ptrace & PT_PTRACED)
|
||||
goto out;
|
||||
ret = security_ptrace(current->parent, current);
|
||||
if (ret)
|
||||
goto out;
|
||||
/* set the ptrace bit in the process flags. */
|
||||
current->ptrace |= PT_PTRACED;
|
||||
ret = 0;
|
||||
goto out;
|
||||
}
|
||||
ret = -ESRCH;
|
||||
read_lock(&tasklist_lock);
|
||||
child = find_task_by_pid(pid);
|
||||
if (child)
|
||||
get_task_struct(child);
|
||||
read_unlock(&tasklist_lock);
|
||||
if (!child)
|
||||
goto out;
|
||||
|
||||
ret = -EPERM;
|
||||
if (pid == 1) /* you may not mess with init */
|
||||
goto out_tsk;
|
||||
|
||||
if (request == PTRACE_ATTACH) {
|
||||
ret = ptrace_attach(child);
|
||||
goto out_tsk;
|
||||
}
|
||||
ret = ptrace_check_attach(child, request == PTRACE_KILL);
|
||||
if (ret == 0)
|
||||
ret = do_ptrace(request, child, addr, data);
|
||||
|
||||
out_tsk:
|
||||
put_task_struct(child);
|
||||
out:
|
||||
unlock_kernel();
|
||||
return ret;
|
||||
}
|
||||
|
||||
asmlinkage void syscall_trace(int why, struct pt_regs *regs)
|
||||
{
|
||||
unsigned long ip;
|
||||
|
|
|
@ -177,7 +177,7 @@ The example address is 0xd004000c; in binary this is:
|
|||
Given the top-level Page Directory, the offset in that directory is calculated
|
||||
using the upper 8 bits:
|
||||
|
||||
extern inline pgd_t * pgd_offset(struct mm_struct * mm, unsigned long address)
|
||||
static inline pgd_t * pgd_offset(struct mm_struct * mm, unsigned long address)
|
||||
{
|
||||
return mm->pgd + (address >> PGDIR_SHIFT);
|
||||
}
|
||||
|
@ -190,14 +190,14 @@ The pgd_t from our example will therefore be the 208'th (0xd0) entry in mm->pgd.
|
|||
|
||||
Since the Middle Directory does not exist, it is a unity mapping:
|
||||
|
||||
extern inline pmd_t * pmd_offset(pgd_t * dir, unsigned long address)
|
||||
static inline pmd_t * pmd_offset(pgd_t * dir, unsigned long address)
|
||||
{
|
||||
return (pmd_t *) dir;
|
||||
}
|
||||
|
||||
The Page Table provides the final lookup by using bits 13 to 23 as index:
|
||||
|
||||
extern inline pte_t * pte_offset(pmd_t * dir, unsigned long address)
|
||||
static inline pte_t * pte_offset(pmd_t * dir, unsigned long address)
|
||||
{
|
||||
return (pte_t *) pmd_page(*dir) + ((address >> PAGE_SHIFT) &
|
||||
(PTRS_PER_PTE - 1));
|
||||
|
|
|
@ -76,55 +76,11 @@ ptrace_disable(struct task_struct *child)
|
|||
* (in user space) where the result of the ptrace call is written (instead of
|
||||
* being returned).
|
||||
*/
|
||||
asmlinkage int
|
||||
sys_ptrace(long request, long pid, long addr, long data)
|
||||
long arch_ptrace(struct task_struct *child, long request, long addr, long data)
|
||||
{
|
||||
struct task_struct *child;
|
||||
int ret;
|
||||
unsigned long __user *datap = (unsigned long __user *)data;
|
||||
|
||||
lock_kernel();
|
||||
ret = -EPERM;
|
||||
|
||||
if (request == PTRACE_TRACEME) {
|
||||
/* are we already being traced? */
|
||||
if (current->ptrace & PT_PTRACED)
|
||||
goto out;
|
||||
ret = security_ptrace(current->parent, current);
|
||||
if (ret)
|
||||
goto out;
|
||||
/* set the ptrace bit in the process flags. */
|
||||
current->ptrace |= PT_PTRACED;
|
||||
ret = 0;
|
||||
goto out;
|
||||
}
|
||||
|
||||
ret = -ESRCH;
|
||||
read_lock(&tasklist_lock);
|
||||
child = find_task_by_pid(pid);
|
||||
|
||||
if (child)
|
||||
get_task_struct(child);
|
||||
|
||||
read_unlock(&tasklist_lock);
|
||||
|
||||
if (!child)
|
||||
goto out;
|
||||
|
||||
ret = -EPERM;
|
||||
|
||||
if (pid == 1) /* Leave the init process alone! */
|
||||
goto out_tsk;
|
||||
|
||||
if (request == PTRACE_ATTACH) {
|
||||
ret = ptrace_attach(child);
|
||||
goto out_tsk;
|
||||
}
|
||||
|
||||
ret = ptrace_check_attach(child, request == PTRACE_KILL);
|
||||
if (ret < 0)
|
||||
goto out_tsk;
|
||||
|
||||
switch (request) {
|
||||
/* Read word at location address. */
|
||||
case PTRACE_PEEKTEXT:
|
||||
|
@ -289,10 +245,7 @@ sys_ptrace(long request, long pid, long addr, long data)
|
|||
ret = ptrace_request(child, request, addr, data);
|
||||
break;
|
||||
}
|
||||
out_tsk:
|
||||
put_task_struct(child);
|
||||
out:
|
||||
unlock_kernel();
|
||||
|
||||
return ret;
|
||||
}
|
||||
|
||||
|
|
|
@ -476,7 +476,7 @@ give_sigsegv:
|
|||
* OK, we're invoking a handler
|
||||
*/
|
||||
|
||||
extern inline void
|
||||
static inline void
|
||||
handle_signal(int canrestart, unsigned long sig,
|
||||
siginfo_t *info, struct k_sigaction *ka,
|
||||
sigset_t *oldset, struct pt_regs * regs)
|
||||
|
|
|
@ -277,7 +277,7 @@ struct file_operations cryptocop_fops = {
|
|||
static void free_cdesc(struct cryptocop_dma_desc *cdesc)
|
||||
{
|
||||
DEBUG(printk("free_cdesc: cdesc 0x%p, from_pool=%d\n", cdesc, cdesc->from_pool));
|
||||
if (cdesc->free_buf) kfree(cdesc->free_buf);
|
||||
kfree(cdesc->free_buf);
|
||||
|
||||
if (cdesc->from_pool) {
|
||||
unsigned long int flags;
|
||||
|
@ -2950,15 +2950,15 @@ static int cryptocop_ioctl_process(struct inode *inode, struct file *filp, unsig
|
|||
put_page(outpages[i]);
|
||||
}
|
||||
|
||||
if (digest_result) kfree(digest_result);
|
||||
if (inpages) kfree(inpages);
|
||||
if (outpages) kfree(outpages);
|
||||
kfree(digest_result);
|
||||
kfree(inpages);
|
||||
kfree(outpages);
|
||||
if (cop){
|
||||
if (cop->tfrm_op.indata) kfree(cop->tfrm_op.indata);
|
||||
if (cop->tfrm_op.outdata) kfree(cop->tfrm_op.outdata);
|
||||
kfree(cop->tfrm_op.indata);
|
||||
kfree(cop->tfrm_op.outdata);
|
||||
kfree(cop);
|
||||
}
|
||||
if (jc) kfree(jc);
|
||||
kfree(jc);
|
||||
|
||||
DEBUG(print_lock_status());
|
||||
|
||||
|
|
|
@ -99,55 +99,11 @@ ptrace_disable(struct task_struct *child)
|
|||
}
|
||||
|
||||
|
||||
asmlinkage int
|
||||
sys_ptrace(long request, long pid, long addr, long data)
|
||||
long arch_ptrace(struct task_struct *child, long request, long addr, long data)
|
||||
{
|
||||
struct task_struct *child;
|
||||
int ret;
|
||||
unsigned long __user *datap = (unsigned long __user *)data;
|
||||
|
||||
lock_kernel();
|
||||
ret = -EPERM;
|
||||
|
||||
if (request == PTRACE_TRACEME) {
|
||||
/* are we already being traced? */
|
||||
if (current->ptrace & PT_PTRACED)
|
||||
goto out;
|
||||
ret = security_ptrace(current->parent, current);
|
||||
if (ret)
|
||||
goto out;
|
||||
/* set the ptrace bit in the process flags. */
|
||||
current->ptrace |= PT_PTRACED;
|
||||
ret = 0;
|
||||
goto out;
|
||||
}
|
||||
|
||||
ret = -ESRCH;
|
||||
read_lock(&tasklist_lock);
|
||||
child = find_task_by_pid(pid);
|
||||
|
||||
if (child)
|
||||
get_task_struct(child);
|
||||
|
||||
read_unlock(&tasklist_lock);
|
||||
|
||||
if (!child)
|
||||
goto out;
|
||||
|
||||
ret = -EPERM;
|
||||
|
||||
if (pid == 1) /* Leave the init process alone! */
|
||||
goto out_tsk;
|
||||
|
||||
if (request == PTRACE_ATTACH) {
|
||||
ret = ptrace_attach(child);
|
||||
goto out_tsk;
|
||||
}
|
||||
|
||||
ret = ptrace_check_attach(child, request == PTRACE_KILL);
|
||||
if (ret < 0)
|
||||
goto out_tsk;
|
||||
|
||||
switch (request) {
|
||||
/* Read word at location address. */
|
||||
case PTRACE_PEEKTEXT:
|
||||
|
@ -347,10 +303,7 @@ sys_ptrace(long request, long pid, long addr, long data)
|
|||
ret = ptrace_request(child, request, addr, data);
|
||||
break;
|
||||
}
|
||||
out_tsk:
|
||||
put_task_struct(child);
|
||||
out:
|
||||
unlock_kernel();
|
||||
|
||||
return ret;
|
||||
}
|
||||
|
||||
|
|
|
@ -513,7 +513,7 @@ give_sigsegv:
|
|||
}
|
||||
|
||||
/* Invoke a singal handler to, well, handle the signal. */
|
||||
extern inline void
|
||||
static inline void
|
||||
handle_signal(int canrestart, unsigned long sig,
|
||||
siginfo_t *info, struct k_sigaction *ka,
|
||||
sigset_t *oldset, struct pt_regs * regs)
|
||||
|
|
|
@ -16,7 +16,7 @@
|
|||
#include <asm/tlbflush.h>
|
||||
#include <asm/arch/memmap.h>
|
||||
|
||||
extern inline void remap_area_pte(pte_t * pte, unsigned long address, unsigned long size,
|
||||
static inline void remap_area_pte(pte_t * pte, unsigned long address, unsigned long size,
|
||||
unsigned long phys_addr, pgprot_t prot)
|
||||
{
|
||||
unsigned long end;
|
||||
|
|
|
@ -106,48 +106,11 @@ void ptrace_enable(struct task_struct *child)
|
|||
child->thread.frame0->__status |= REG__STATUS_STEP;
|
||||
}
|
||||
|
||||
asmlinkage long sys_ptrace(long request, long pid, long addr, long data)
|
||||
long arch_ptrace(struct task_struct *child, long request, long addr, long data)
|
||||
{
|
||||
struct task_struct *child;
|
||||
unsigned long tmp;
|
||||
int ret;
|
||||
|
||||
lock_kernel();
|
||||
ret = -EPERM;
|
||||
if (request == PTRACE_TRACEME) {
|
||||
/* are we already being traced? */
|
||||
if (current->ptrace & PT_PTRACED)
|
||||
goto out;
|
||||
ret = security_ptrace(current->parent, current);
|
||||
if (ret)
|
||||
goto out;
|
||||
/* set the ptrace bit in the process flags. */
|
||||
current->ptrace |= PT_PTRACED;
|
||||
ret = 0;
|
||||
goto out;
|
||||
}
|
||||
ret = -ESRCH;
|
||||
read_lock(&tasklist_lock);
|
||||
child = find_task_by_pid(pid);
|
||||
if (child)
|
||||
get_task_struct(child);
|
||||
read_unlock(&tasklist_lock);
|
||||
if (!child)
|
||||
goto out;
|
||||
|
||||
ret = -EPERM;
|
||||
if (pid == 1) /* you may not mess with init */
|
||||
goto out_tsk;
|
||||
|
||||
if (request == PTRACE_ATTACH) {
|
||||
ret = ptrace_attach(child);
|
||||
goto out_tsk;
|
||||
}
|
||||
|
||||
ret = ptrace_check_attach(child, request == PTRACE_KILL);
|
||||
if (ret < 0)
|
||||
goto out_tsk;
|
||||
|
||||
switch (request) {
|
||||
/* when I and D space are separate, these will need to be fixed. */
|
||||
case PTRACE_PEEKTEXT: /* read word at location addr. */
|
||||
|
@ -351,10 +314,6 @@ asmlinkage long sys_ptrace(long request, long pid, long addr, long data)
|
|||
ret = -EIO;
|
||||
break;
|
||||
}
|
||||
out_tsk:
|
||||
put_task_struct(child);
|
||||
out:
|
||||
unlock_kernel();
|
||||
return ret;
|
||||
}
|
||||
|
||||
|
|
|
@ -57,43 +57,10 @@ void ptrace_disable(struct task_struct *child)
|
|||
h8300_disable_trace(child);
|
||||
}
|
||||
|
||||
asmlinkage long sys_ptrace(long request, long pid, long addr, long data)
|
||||
long arch_ptrace(struct task_struct *child, long request, long addr, long data)
|
||||
{
|
||||
struct task_struct *child;
|
||||
int ret;
|
||||
|
||||
lock_kernel();
|
||||
ret = -EPERM;
|
||||
if (request == PTRACE_TRACEME) {
|
||||
/* are we already being traced? */
|
||||
if (current->ptrace & PT_PTRACED)
|
||||
goto out;
|
||||
/* set the ptrace bit in the process flags. */
|
||||
current->ptrace |= PT_PTRACED;
|
||||
ret = 0;
|
||||
goto out;
|
||||
}
|
||||
ret = -ESRCH;
|
||||
read_lock(&tasklist_lock);
|
||||
child = find_task_by_pid(pid);
|
||||
if (child)
|
||||
get_task_struct(child);
|
||||
read_unlock(&tasklist_lock);
|
||||
if (!child)
|
||||
goto out;
|
||||
|
||||
ret = -EPERM;
|
||||
if (pid == 1) /* you may not mess with init */
|
||||
goto out_tsk;
|
||||
|
||||
if (request == PTRACE_ATTACH) {
|
||||
ret = ptrace_attach(child);
|
||||
goto out_tsk;
|
||||
}
|
||||
ret = ptrace_check_attach(child, request == PTRACE_KILL);
|
||||
if (ret < 0)
|
||||
goto out_tsk;
|
||||
|
||||
switch (request) {
|
||||
case PTRACE_PEEKTEXT: /* read word at location addr. */
|
||||
case PTRACE_PEEKDATA: {
|
||||
|
@ -251,10 +218,6 @@ asmlinkage long sys_ptrace(long request, long pid, long addr, long data)
|
|||
ret = -EIO;
|
||||
break;
|
||||
}
|
||||
out_tsk:
|
||||
put_task_struct(child);
|
||||
out:
|
||||
unlock_kernel();
|
||||
return ret;
|
||||
}
|
||||
|
||||
|
|
|
@ -997,8 +997,21 @@ source "drivers/Kconfig"
|
|||
|
||||
source "fs/Kconfig"
|
||||
|
||||
menu "Instrumentation Support"
|
||||
depends on EXPERIMENTAL
|
||||
|
||||
source "arch/i386/oprofile/Kconfig"
|
||||
|
||||
config KPROBES
|
||||
bool "Kprobes (EXPERIMENTAL)"
|
||||
help
|
||||
Kprobes allows you to trap at almost any kernel address and
|
||||
execute a callback function. register_kprobe() establishes
|
||||
a probepoint and specifies the callback. Kprobes is useful
|
||||
for kernel debugging, non-intrusive instrumentation and testing.
|
||||
If in doubt, say "N".
|
||||
endmenu
|
||||
|
||||
source "arch/i386/Kconfig.debug"
|
||||
|
||||
source "security/Kconfig"
|
||||
|
|
|
@ -22,16 +22,6 @@ config DEBUG_STACKOVERFLOW
|
|||
This option will cause messages to be printed if free stack space
|
||||
drops below a certain limit.
|
||||
|
||||
config KPROBES
|
||||
bool "Kprobes"
|
||||
depends on DEBUG_KERNEL
|
||||
help
|
||||
Kprobes allows you to trap at almost any kernel address and
|
||||
execute a callback function. register_kprobe() establishes
|
||||
a probepoint and specifies the callback. Kprobes is useful
|
||||
for kernel debugging, non-intrusive instrumentation and testing.
|
||||
If in doubt, say "N".
|
||||
|
||||
config DEBUG_STACK_USAGE
|
||||
bool "Stack utilization instrumentation"
|
||||
depends on DEBUG_KERNEL
|
||||
|
|
|
@ -559,14 +559,20 @@ void __devinit setup_local_APIC(void)
|
|||
* If Linux enabled the LAPIC against the BIOS default
|
||||
* disable it down before re-entering the BIOS on shutdown.
|
||||
* Otherwise the BIOS may get confused and not power-off.
|
||||
* Additionally clear all LVT entries before disable_local_APIC
|
||||
* for the case where Linux didn't enable the LAPIC.
|
||||
*/
|
||||
void lapic_shutdown(void)
|
||||
{
|
||||
if (!cpu_has_apic || !enabled_via_apicbase)
|
||||
if (!cpu_has_apic)
|
||||
return;
|
||||
|
||||
local_irq_disable();
|
||||
disable_local_APIC();
|
||||
clear_local_APIC();
|
||||
|
||||
if (enabled_via_apicbase)
|
||||
disable_local_APIC();
|
||||
|
||||
local_irq_enable();
|
||||
}
|
||||
|
||||
|
|
|
@ -447,8 +447,7 @@ static char * apm_event_name[] = {
|
|||
"system standby resume",
|
||||
"capabilities change"
|
||||
};
|
||||
#define NR_APM_EVENT_NAME \
|
||||
(sizeof(apm_event_name) / sizeof(apm_event_name[0]))
|
||||
#define NR_APM_EVENT_NAME ARRAY_SIZE(apm_event_name)
|
||||
|
||||
typedef struct lookup_t {
|
||||
int key;
|
||||
|
@ -479,7 +478,7 @@ static const lookup_t error_table[] = {
|
|||
{ APM_NO_ERROR, "BIOS did not set a return code" },
|
||||
{ APM_NOT_PRESENT, "No APM present" }
|
||||
};
|
||||
#define ERROR_COUNT (sizeof(error_table)/sizeof(lookup_t))
|
||||
#define ERROR_COUNT ARRAY_SIZE(error_table)
|
||||
|
||||
/**
|
||||
* apm_error - display an APM error
|
||||
|
|
|
@ -30,8 +30,6 @@ static int disable_x86_serial_nr __devinitdata = 1;
|
|||
|
||||
struct cpu_dev * cpu_devs[X86_VENDOR_NUM] = {};
|
||||
|
||||
extern void mcheck_init(struct cpuinfo_x86 *c);
|
||||
|
||||
extern int disable_pse;
|
||||
|
||||
static void default_init(struct cpuinfo_x86 * c)
|
||||
|
@ -429,9 +427,8 @@ void __devinit identify_cpu(struct cpuinfo_x86 *c)
|
|||
}
|
||||
|
||||
/* Init Machine Check Exception if available. */
|
||||
#ifdef CONFIG_X86_MCE
|
||||
mcheck_init(c);
|
||||
#endif
|
||||
|
||||
if (c == &boot_cpu_data)
|
||||
sysenter_setup();
|
||||
enable_sep_cpu();
|
||||
|
|
|
@ -377,10 +377,9 @@ acpi_cpufreq_cpu_init (
|
|||
arg0.buffer.length = 12;
|
||||
arg0.buffer.pointer = (u8 *) arg0_buf;
|
||||
|
||||
data = kmalloc(sizeof(struct cpufreq_acpi_io), GFP_KERNEL);
|
||||
data = kzalloc(sizeof(struct cpufreq_acpi_io), GFP_KERNEL);
|
||||
if (!data)
|
||||
return (-ENOMEM);
|
||||
memset(data, 0, sizeof(struct cpufreq_acpi_io));
|
||||
|
||||
acpi_io_data[cpu] = data;
|
||||
|
||||
|
|
|
@ -171,10 +171,9 @@ static int get_ranges (unsigned char *pst)
|
|||
unsigned int speed;
|
||||
u8 fid, vid;
|
||||
|
||||
powernow_table = kmalloc((sizeof(struct cpufreq_frequency_table) * (number_scales + 1)), GFP_KERNEL);
|
||||
powernow_table = kzalloc((sizeof(struct cpufreq_frequency_table) * (number_scales + 1)), GFP_KERNEL);
|
||||
if (!powernow_table)
|
||||
return -ENOMEM;
|
||||
memset(powernow_table, 0, (sizeof(struct cpufreq_frequency_table) * (number_scales + 1)));
|
||||
|
||||
for (j=0 ; j < number_scales; j++) {
|
||||
fid = *pst++;
|
||||
|
@ -305,16 +304,13 @@ static int powernow_acpi_init(void)
|
|||
goto err0;
|
||||
}
|
||||
|
||||
acpi_processor_perf = kmalloc(sizeof(struct acpi_processor_performance),
|
||||
acpi_processor_perf = kzalloc(sizeof(struct acpi_processor_performance),
|
||||
GFP_KERNEL);
|
||||
|
||||
if (!acpi_processor_perf) {
|
||||
retval = -ENOMEM;
|
||||
goto err0;
|
||||
}
|
||||
|
||||
memset(acpi_processor_perf, 0, sizeof(struct acpi_processor_performance));
|
||||
|
||||
if (acpi_processor_register_performance(acpi_processor_perf, 0)) {
|
||||
retval = -EIO;
|
||||
goto err1;
|
||||
|
@ -337,14 +333,12 @@ static int powernow_acpi_init(void)
|
|||
goto err2;
|
||||
}
|
||||
|
||||
powernow_table = kmalloc((number_scales + 1) * (sizeof(struct cpufreq_frequency_table)), GFP_KERNEL);
|
||||
powernow_table = kzalloc((number_scales + 1) * (sizeof(struct cpufreq_frequency_table)), GFP_KERNEL);
|
||||
if (!powernow_table) {
|
||||
retval = -ENOMEM;
|
||||
goto err2;
|
||||
}
|
||||
|
||||
memset(powernow_table, 0, ((number_scales + 1) * sizeof(struct cpufreq_frequency_table)));
|
||||
|
||||
pc.val = (unsigned long) acpi_processor_perf->states[0].control;
|
||||
for (i = 0; i < number_scales; i++) {
|
||||
u8 fid, vid;
|
||||
|
|
|
@ -462,7 +462,6 @@ static int check_supported_cpu(unsigned int cpu)
|
|||
|
||||
oldmask = current->cpus_allowed;
|
||||
set_cpus_allowed(current, cpumask_of_cpu(cpu));
|
||||
schedule();
|
||||
|
||||
if (smp_processor_id() != cpu) {
|
||||
printk(KERN_ERR "limiting to cpu %u failed\n", cpu);
|
||||
|
@ -497,9 +496,7 @@ static int check_supported_cpu(unsigned int cpu)
|
|||
|
||||
out:
|
||||
set_cpus_allowed(current, oldmask);
|
||||
schedule();
|
||||
return rc;
|
||||
|
||||
}
|
||||
|
||||
static int check_pst_table(struct powernow_k8_data *data, struct pst_s *pst, u8 maxvid)
|
||||
|
@ -913,7 +910,6 @@ static int powernowk8_target(struct cpufreq_policy *pol, unsigned targfreq, unsi
|
|||
/* only run on specific CPU from here on */
|
||||
oldmask = current->cpus_allowed;
|
||||
set_cpus_allowed(current, cpumask_of_cpu(pol->cpu));
|
||||
schedule();
|
||||
|
||||
if (smp_processor_id() != pol->cpu) {
|
||||
printk(KERN_ERR "limiting to cpu %u failed\n", pol->cpu);
|
||||
|
@ -968,8 +964,6 @@ static int powernowk8_target(struct cpufreq_policy *pol, unsigned targfreq, unsi
|
|||
|
||||
err_out:
|
||||
set_cpus_allowed(current, oldmask);
|
||||
schedule();
|
||||
|
||||
return ret;
|
||||
}
|
||||
|
||||
|
@ -991,12 +985,11 @@ static int __init powernowk8_cpu_init(struct cpufreq_policy *pol)
|
|||
if (!check_supported_cpu(pol->cpu))
|
||||
return -ENODEV;
|
||||
|
||||
data = kmalloc(sizeof(struct powernow_k8_data), GFP_KERNEL);
|
||||
data = kzalloc(sizeof(struct powernow_k8_data), GFP_KERNEL);
|
||||
if (!data) {
|
||||
printk(KERN_ERR PFX "unable to alloc powernow_k8_data");
|
||||
return -ENOMEM;
|
||||
}
|
||||
memset(data,0,sizeof(struct powernow_k8_data));
|
||||
|
||||
data->cpu = pol->cpu;
|
||||
|
||||
|
@ -1026,7 +1019,6 @@ static int __init powernowk8_cpu_init(struct cpufreq_policy *pol)
|
|||
/* only run on specific CPU from here on */
|
||||
oldmask = current->cpus_allowed;
|
||||
set_cpus_allowed(current, cpumask_of_cpu(pol->cpu));
|
||||
schedule();
|
||||
|
||||
if (smp_processor_id() != pol->cpu) {
|
||||
printk(KERN_ERR "limiting to cpu %u failed\n", pol->cpu);
|
||||
|
@ -1045,7 +1037,6 @@ static int __init powernowk8_cpu_init(struct cpufreq_policy *pol)
|
|||
|
||||
/* run on any CPU again */
|
||||
set_cpus_allowed(current, oldmask);
|
||||
schedule();
|
||||
|
||||
pol->governor = CPUFREQ_DEFAULT_GOVERNOR;
|
||||
pol->cpus = cpu_core_map[pol->cpu];
|
||||
|
@ -1080,7 +1071,6 @@ static int __init powernowk8_cpu_init(struct cpufreq_policy *pol)
|
|||
|
||||
err_out:
|
||||
set_cpus_allowed(current, oldmask);
|
||||
schedule();
|
||||
powernow_k8_cpu_exit_acpi(data);
|
||||
|
||||
kfree(data);
|
||||
|
@ -1116,17 +1106,14 @@ static unsigned int powernowk8_get (unsigned int cpu)
|
|||
set_cpus_allowed(current, oldmask);
|
||||
return 0;
|
||||
}
|
||||
preempt_disable();
|
||||
|
||||
|
||||
if (query_current_values_with_pending_wait(data))
|
||||
goto out;
|
||||
|
||||
khz = find_khz_freq_from_fid(data->currfid);
|
||||
|
||||
out:
|
||||
preempt_enable_no_resched();
|
||||
out:
|
||||
set_cpus_allowed(current, oldmask);
|
||||
|
||||
return khz;
|
||||
}
|
||||
|
||||
|
|
|
@ -67,7 +67,7 @@ static const struct cpu_id cpu_ids[] = {
|
|||
[CPU_MP4HT_D0] = {15, 3, 4 },
|
||||
[CPU_MP4HT_E0] = {15, 4, 1 },
|
||||
};
|
||||
#define N_IDS (sizeof(cpu_ids)/sizeof(cpu_ids[0]))
|
||||
#define N_IDS ARRAY_SIZE(cpu_ids)
|
||||
|
||||
struct cpu_model
|
||||
{
|
||||
|
@ -423,12 +423,11 @@ static int centrino_cpu_init_acpi(struct cpufreq_policy *policy)
|
|||
}
|
||||
}
|
||||
|
||||
centrino_model[cpu] = kmalloc(sizeof(struct cpu_model), GFP_KERNEL);
|
||||
centrino_model[cpu] = kzalloc(sizeof(struct cpu_model), GFP_KERNEL);
|
||||
if (!centrino_model[cpu]) {
|
||||
result = -ENOMEM;
|
||||
goto err_unreg;
|
||||
}
|
||||
memset(centrino_model[cpu], 0, sizeof(struct cpu_model));
|
||||
|
||||
centrino_model[cpu]->model_name=NULL;
|
||||
centrino_model[cpu]->max_freq = p.states[0].core_frequency * 1000;
|
||||
|
|
|
@ -68,7 +68,7 @@ static fastcall void k7_machine_check(struct pt_regs * regs, long error_code)
|
|||
|
||||
|
||||
/* AMD K7 machine check is Intel like */
|
||||
void __devinit amd_mcheck_init(struct cpuinfo_x86 *c)
|
||||
void amd_mcheck_init(struct cpuinfo_x86 *c)
|
||||
{
|
||||
u32 l, h;
|
||||
int i;
|
||||
|
|
|
@ -16,7 +16,7 @@
|
|||
|
||||
#include "mce.h"
|
||||
|
||||
int mce_disabled __devinitdata = 0;
|
||||
int mce_disabled = 0;
|
||||
int nr_mce_banks;
|
||||
|
||||
EXPORT_SYMBOL_GPL(nr_mce_banks); /* non-fatal.o */
|
||||
|
@ -31,7 +31,7 @@ static fastcall void unexpected_machine_check(struct pt_regs * regs, long error_
|
|||
void fastcall (*machine_check_vector)(struct pt_regs *, long error_code) = unexpected_machine_check;
|
||||
|
||||
/* This has to be run for each processor */
|
||||
void __devinit mcheck_init(struct cpuinfo_x86 *c)
|
||||
void mcheck_init(struct cpuinfo_x86 *c)
|
||||
{
|
||||
if (mce_disabled==1)
|
||||
return;
|
||||
|
|
|
@ -77,7 +77,7 @@ fastcall void smp_thermal_interrupt(struct pt_regs *regs)
|
|||
}
|
||||
|
||||
/* P4/Xeon Thermal regulation detect and init */
|
||||
static void __devinit intel_init_thermal(struct cpuinfo_x86 *c)
|
||||
static void intel_init_thermal(struct cpuinfo_x86 *c)
|
||||
{
|
||||
u32 l, h;
|
||||
unsigned int cpu = smp_processor_id();
|
||||
|
@ -231,7 +231,7 @@ static fastcall void intel_machine_check(struct pt_regs * regs, long error_code)
|
|||
}
|
||||
|
||||
|
||||
void __devinit intel_p4_mcheck_init(struct cpuinfo_x86 *c)
|
||||
void intel_p4_mcheck_init(struct cpuinfo_x86 *c)
|
||||
{
|
||||
u32 l, h;
|
||||
int i;
|
||||
|
|
|
@ -28,7 +28,7 @@ static fastcall void pentium_machine_check(struct pt_regs * regs, long error_cod
|
|||
}
|
||||
|
||||
/* Set up machine check reporting for processors with Intel style MCE */
|
||||
void __devinit intel_p5_mcheck_init(struct cpuinfo_x86 *c)
|
||||
void intel_p5_mcheck_init(struct cpuinfo_x86 *c)
|
||||
{
|
||||
u32 l, h;
|
||||
|
||||
|
|
|
@ -79,7 +79,7 @@ static fastcall void intel_machine_check(struct pt_regs * regs, long error_code)
|
|||
}
|
||||
|
||||
/* Set up machine check reporting for processors with Intel style MCE */
|
||||
void __devinit intel_p6_mcheck_init(struct cpuinfo_x86 *c)
|
||||
void intel_p6_mcheck_init(struct cpuinfo_x86 *c)
|
||||
{
|
||||
u32 l, h;
|
||||
int i;
|
||||
|
|
|
@ -22,7 +22,7 @@ static fastcall void winchip_machine_check(struct pt_regs * regs, long error_cod
|
|||
}
|
||||
|
||||
/* Set up machine check reporting on the Winchip C6 series */
|
||||
void __devinit winchip_mcheck_init(struct cpuinfo_x86 *c)
|
||||
void winchip_mcheck_init(struct cpuinfo_x86 *c)
|
||||
{
|
||||
u32 lo, hi;
|
||||
machine_check_vector = winchip_machine_check;
|
||||
|
|
|
@ -31,22 +31,16 @@
|
|||
#include <linux/config.h>
|
||||
#include <linux/kprobes.h>
|
||||
#include <linux/ptrace.h>
|
||||
#include <linux/spinlock.h>
|
||||
#include <linux/preempt.h>
|
||||
#include <asm/cacheflush.h>
|
||||
#include <asm/kdebug.h>
|
||||
#include <asm/desc.h>
|
||||
|
||||
static struct kprobe *current_kprobe;
|
||||
static unsigned long kprobe_status, kprobe_old_eflags, kprobe_saved_eflags;
|
||||
static struct kprobe *kprobe_prev;
|
||||
static unsigned long kprobe_status_prev, kprobe_old_eflags_prev, kprobe_saved_eflags_prev;
|
||||
static struct pt_regs jprobe_saved_regs;
|
||||
static long *jprobe_saved_esp;
|
||||
/* copy of the kernel stack at the probe fire time */
|
||||
static kprobe_opcode_t jprobes_stack[MAX_STACK_SIZE];
|
||||
void jprobe_return_end(void);
|
||||
|
||||
DEFINE_PER_CPU(struct kprobe *, current_kprobe) = NULL;
|
||||
DEFINE_PER_CPU(struct kprobe_ctlblk, kprobe_ctlblk);
|
||||
|
||||
/*
|
||||
* returns non-zero if opcode modifies the interrupt flag.
|
||||
*/
|
||||
|
@ -91,29 +85,30 @@ void __kprobes arch_remove_kprobe(struct kprobe *p)
|
|||
{
|
||||
}
|
||||
|
||||
static inline void save_previous_kprobe(void)
|
||||
static inline void save_previous_kprobe(struct kprobe_ctlblk *kcb)
|
||||
{
|
||||
kprobe_prev = current_kprobe;
|
||||
kprobe_status_prev = kprobe_status;
|
||||
kprobe_old_eflags_prev = kprobe_old_eflags;
|
||||
kprobe_saved_eflags_prev = kprobe_saved_eflags;
|
||||
kcb->prev_kprobe.kp = kprobe_running();
|
||||
kcb->prev_kprobe.status = kcb->kprobe_status;
|
||||
kcb->prev_kprobe.old_eflags = kcb->kprobe_old_eflags;
|
||||
kcb->prev_kprobe.saved_eflags = kcb->kprobe_saved_eflags;
|
||||
}
|
||||
|
||||
static inline void restore_previous_kprobe(void)
|
||||
static inline void restore_previous_kprobe(struct kprobe_ctlblk *kcb)
|
||||
{
|
||||
current_kprobe = kprobe_prev;
|
||||
kprobe_status = kprobe_status_prev;
|
||||
kprobe_old_eflags = kprobe_old_eflags_prev;
|
||||
kprobe_saved_eflags = kprobe_saved_eflags_prev;
|
||||
__get_cpu_var(current_kprobe) = kcb->prev_kprobe.kp;
|
||||
kcb->kprobe_status = kcb->prev_kprobe.status;
|
||||
kcb->kprobe_old_eflags = kcb->prev_kprobe.old_eflags;
|
||||
kcb->kprobe_saved_eflags = kcb->prev_kprobe.saved_eflags;
|
||||
}
|
||||
|
||||
static inline void set_current_kprobe(struct kprobe *p, struct pt_regs *regs)
|
||||
static inline void set_current_kprobe(struct kprobe *p, struct pt_regs *regs,
|
||||
struct kprobe_ctlblk *kcb)
|
||||
{
|
||||
current_kprobe = p;
|
||||
kprobe_saved_eflags = kprobe_old_eflags
|
||||
__get_cpu_var(current_kprobe) = p;
|
||||
kcb->kprobe_saved_eflags = kcb->kprobe_old_eflags
|
||||
= (regs->eflags & (TF_MASK | IF_MASK));
|
||||
if (is_IF_modifier(p->opcode))
|
||||
kprobe_saved_eflags &= ~IF_MASK;
|
||||
kcb->kprobe_saved_eflags &= ~IF_MASK;
|
||||
}
|
||||
|
||||
static inline void prepare_singlestep(struct kprobe *p, struct pt_regs *regs)
|
||||
|
@ -127,6 +122,7 @@ static inline void prepare_singlestep(struct kprobe *p, struct pt_regs *regs)
|
|||
regs->eip = (unsigned long)&p->ainsn.insn;
|
||||
}
|
||||
|
||||
/* Called with kretprobe_lock held */
|
||||
void __kprobes arch_prepare_kretprobe(struct kretprobe *rp,
|
||||
struct pt_regs *regs)
|
||||
{
|
||||
|
@ -157,9 +153,15 @@ static int __kprobes kprobe_handler(struct pt_regs *regs)
|
|||
int ret = 0;
|
||||
kprobe_opcode_t *addr = NULL;
|
||||
unsigned long *lp;
|
||||
struct kprobe_ctlblk *kcb;
|
||||
|
||||
/* We're in an interrupt, but this is clear and BUG()-safe. */
|
||||
/*
|
||||
* We don't want to be preempted for the entire
|
||||
* duration of kprobe processing
|
||||
*/
|
||||
preempt_disable();
|
||||
kcb = get_kprobe_ctlblk();
|
||||
|
||||
/* Check if the application is using LDT entry for its code segment and
|
||||
* calculate the address by reading the base address from the LDT entry.
|
||||
*/
|
||||
|
@ -173,15 +175,12 @@ static int __kprobes kprobe_handler(struct pt_regs *regs)
|
|||
}
|
||||
/* Check we're not actually recursing */
|
||||
if (kprobe_running()) {
|
||||
/* We *are* holding lock here, so this is safe.
|
||||
Disarm the probe we just hit, and ignore it. */
|
||||
p = get_kprobe(addr);
|
||||
if (p) {
|
||||
if (kprobe_status == KPROBE_HIT_SS &&
|
||||
if (kcb->kprobe_status == KPROBE_HIT_SS &&
|
||||
*p->ainsn.insn == BREAKPOINT_INSTRUCTION) {
|
||||
regs->eflags &= ~TF_MASK;
|
||||
regs->eflags |= kprobe_saved_eflags;
|
||||
unlock_kprobes();
|
||||
regs->eflags |= kcb->kprobe_saved_eflags;
|
||||
goto no_kprobe;
|
||||
}
|
||||
/* We have reentered the kprobe_handler(), since
|
||||
|
@ -190,26 +189,23 @@ static int __kprobes kprobe_handler(struct pt_regs *regs)
|
|||
* just single step on the instruction of the new probe
|
||||
* without calling any user handlers.
|
||||
*/
|
||||
save_previous_kprobe();
|
||||
set_current_kprobe(p, regs);
|
||||
save_previous_kprobe(kcb);
|
||||
set_current_kprobe(p, regs, kcb);
|
||||
p->nmissed++;
|
||||
prepare_singlestep(p, regs);
|
||||
kprobe_status = KPROBE_REENTER;
|
||||
kcb->kprobe_status = KPROBE_REENTER;
|
||||
return 1;
|
||||
} else {
|
||||
p = current_kprobe;
|
||||
p = __get_cpu_var(current_kprobe);
|
||||
if (p->break_handler && p->break_handler(p, regs)) {
|
||||
goto ss_probe;
|
||||
}
|
||||
}
|
||||
/* If it's not ours, can't be delete race, (we hold lock). */
|
||||
goto no_kprobe;
|
||||
}
|
||||
|
||||
lock_kprobes();
|
||||
p = get_kprobe(addr);
|
||||
if (!p) {
|
||||
unlock_kprobes();
|
||||
if (regs->eflags & VM_MASK) {
|
||||
/* We are in virtual-8086 mode. Return 0 */
|
||||
goto no_kprobe;
|
||||
|
@ -232,8 +228,8 @@ static int __kprobes kprobe_handler(struct pt_regs *regs)
|
|||
goto no_kprobe;
|
||||
}
|
||||
|
||||
kprobe_status = KPROBE_HIT_ACTIVE;
|
||||
set_current_kprobe(p, regs);
|
||||
set_current_kprobe(p, regs, kcb);
|
||||
kcb->kprobe_status = KPROBE_HIT_ACTIVE;
|
||||
|
||||
if (p->pre_handler && p->pre_handler(p, regs))
|
||||
/* handler has already set things up, so skip ss setup */
|
||||
|
@ -241,7 +237,7 @@ static int __kprobes kprobe_handler(struct pt_regs *regs)
|
|||
|
||||
ss_probe:
|
||||
prepare_singlestep(p, regs);
|
||||
kprobe_status = KPROBE_HIT_SS;
|
||||
kcb->kprobe_status = KPROBE_HIT_SS;
|
||||
return 1;
|
||||
|
||||
no_kprobe:
|
||||
|
@ -269,9 +265,10 @@ int __kprobes trampoline_probe_handler(struct kprobe *p, struct pt_regs *regs)
|
|||
struct kretprobe_instance *ri = NULL;
|
||||
struct hlist_head *head;
|
||||
struct hlist_node *node, *tmp;
|
||||
unsigned long orig_ret_address = 0;
|
||||
unsigned long flags, orig_ret_address = 0;
|
||||
unsigned long trampoline_address =(unsigned long)&kretprobe_trampoline;
|
||||
|
||||
spin_lock_irqsave(&kretprobe_lock, flags);
|
||||
head = kretprobe_inst_table_head(current);
|
||||
|
||||
/*
|
||||
|
@ -310,14 +307,15 @@ int __kprobes trampoline_probe_handler(struct kprobe *p, struct pt_regs *regs)
|
|||
BUG_ON(!orig_ret_address || (orig_ret_address == trampoline_address));
|
||||
regs->eip = orig_ret_address;
|
||||
|
||||
unlock_kprobes();
|
||||
reset_current_kprobe();
|
||||
spin_unlock_irqrestore(&kretprobe_lock, flags);
|
||||
preempt_enable_no_resched();
|
||||
|
||||
/*
|
||||
* By returning a non-zero value, we are telling
|
||||
* kprobe_handler() that we have handled unlocking
|
||||
* and re-enabling preemption.
|
||||
*/
|
||||
/*
|
||||
* By returning a non-zero value, we are telling
|
||||
* kprobe_handler() that we don't want the post_handler
|
||||
* to run (and have re-enabled preemption)
|
||||
*/
|
||||
return 1;
|
||||
}
|
||||
|
||||
|
@ -343,7 +341,8 @@ int __kprobes trampoline_probe_handler(struct kprobe *p, struct pt_regs *regs)
|
|||
* that is atop the stack is the address following the copied instruction.
|
||||
* We need to make it the address following the original instruction.
|
||||
*/
|
||||
static void __kprobes resume_execution(struct kprobe *p, struct pt_regs *regs)
|
||||
static void __kprobes resume_execution(struct kprobe *p,
|
||||
struct pt_regs *regs, struct kprobe_ctlblk *kcb)
|
||||
{
|
||||
unsigned long *tos = (unsigned long *)®s->esp;
|
||||
unsigned long next_eip = 0;
|
||||
|
@ -353,7 +352,7 @@ static void __kprobes resume_execution(struct kprobe *p, struct pt_regs *regs)
|
|||
switch (p->ainsn.insn[0]) {
|
||||
case 0x9c: /* pushfl */
|
||||
*tos &= ~(TF_MASK | IF_MASK);
|
||||
*tos |= kprobe_old_eflags;
|
||||
*tos |= kcb->kprobe_old_eflags;
|
||||
break;
|
||||
case 0xc3: /* ret/lret */
|
||||
case 0xcb:
|
||||
|
@ -394,27 +393,30 @@ static void __kprobes resume_execution(struct kprobe *p, struct pt_regs *regs)
|
|||
|
||||
/*
|
||||
* Interrupts are disabled on entry as trap1 is an interrupt gate and they
|
||||
* remain disabled thoroughout this function. And we hold kprobe lock.
|
||||
* remain disabled thoroughout this function.
|
||||
*/
|
||||
static inline int post_kprobe_handler(struct pt_regs *regs)
|
||||
{
|
||||
if (!kprobe_running())
|
||||
struct kprobe *cur = kprobe_running();
|
||||
struct kprobe_ctlblk *kcb = get_kprobe_ctlblk();
|
||||
|
||||
if (!cur)
|
||||
return 0;
|
||||
|
||||
if ((kprobe_status != KPROBE_REENTER) && current_kprobe->post_handler) {
|
||||
kprobe_status = KPROBE_HIT_SSDONE;
|
||||
current_kprobe->post_handler(current_kprobe, regs, 0);
|
||||
if ((kcb->kprobe_status != KPROBE_REENTER) && cur->post_handler) {
|
||||
kcb->kprobe_status = KPROBE_HIT_SSDONE;
|
||||
cur->post_handler(cur, regs, 0);
|
||||
}
|
||||
|
||||
resume_execution(current_kprobe, regs);
|
||||
regs->eflags |= kprobe_saved_eflags;
|
||||
resume_execution(cur, regs, kcb);
|
||||
regs->eflags |= kcb->kprobe_saved_eflags;
|
||||
|
||||
/*Restore back the original saved kprobes variables and continue. */
|
||||
if (kprobe_status == KPROBE_REENTER) {
|
||||
restore_previous_kprobe();
|
||||
if (kcb->kprobe_status == KPROBE_REENTER) {
|
||||
restore_previous_kprobe(kcb);
|
||||
goto out;
|
||||
}
|
||||
unlock_kprobes();
|
||||
reset_current_kprobe();
|
||||
out:
|
||||
preempt_enable_no_resched();
|
||||
|
||||
|
@ -429,18 +431,19 @@ out:
|
|||
return 1;
|
||||
}
|
||||
|
||||
/* Interrupts disabled, kprobe_lock held. */
|
||||
static inline int kprobe_fault_handler(struct pt_regs *regs, int trapnr)
|
||||
{
|
||||
if (current_kprobe->fault_handler
|
||||
&& current_kprobe->fault_handler(current_kprobe, regs, trapnr))
|
||||
struct kprobe *cur = kprobe_running();
|
||||
struct kprobe_ctlblk *kcb = get_kprobe_ctlblk();
|
||||
|
||||
if (cur->fault_handler && cur->fault_handler(cur, regs, trapnr))
|
||||
return 1;
|
||||
|
||||
if (kprobe_status & KPROBE_HIT_SS) {
|
||||
resume_execution(current_kprobe, regs);
|
||||
regs->eflags |= kprobe_old_eflags;
|
||||
if (kcb->kprobe_status & KPROBE_HIT_SS) {
|
||||
resume_execution(cur, regs, kcb);
|
||||
regs->eflags |= kcb->kprobe_old_eflags;
|
||||
|
||||
unlock_kprobes();
|
||||
reset_current_kprobe();
|
||||
preempt_enable_no_resched();
|
||||
}
|
||||
return 0;
|
||||
|
@ -453,39 +456,41 @@ int __kprobes kprobe_exceptions_notify(struct notifier_block *self,
|
|||
unsigned long val, void *data)
|
||||
{
|
||||
struct die_args *args = (struct die_args *)data;
|
||||
int ret = NOTIFY_DONE;
|
||||
|
||||
switch (val) {
|
||||
case DIE_INT3:
|
||||
if (kprobe_handler(args->regs))
|
||||
return NOTIFY_STOP;
|
||||
ret = NOTIFY_STOP;
|
||||
break;
|
||||
case DIE_DEBUG:
|
||||
if (post_kprobe_handler(args->regs))
|
||||
return NOTIFY_STOP;
|
||||
ret = NOTIFY_STOP;
|
||||
break;
|
||||
case DIE_GPF:
|
||||
if (kprobe_running() &&
|
||||
kprobe_fault_handler(args->regs, args->trapnr))
|
||||
return NOTIFY_STOP;
|
||||
break;
|
||||
case DIE_PAGE_FAULT:
|
||||
/* kprobe_running() needs smp_processor_id() */
|
||||
preempt_disable();
|
||||
if (kprobe_running() &&
|
||||
kprobe_fault_handler(args->regs, args->trapnr))
|
||||
return NOTIFY_STOP;
|
||||
ret = NOTIFY_STOP;
|
||||
preempt_enable();
|
||||
break;
|
||||
default:
|
||||
break;
|
||||
}
|
||||
return NOTIFY_DONE;
|
||||
return ret;
|
||||
}
|
||||
|
||||
int __kprobes setjmp_pre_handler(struct kprobe *p, struct pt_regs *regs)
|
||||
{
|
||||
struct jprobe *jp = container_of(p, struct jprobe, kp);
|
||||
unsigned long addr;
|
||||
struct kprobe_ctlblk *kcb = get_kprobe_ctlblk();
|
||||
|
||||
jprobe_saved_regs = *regs;
|
||||
jprobe_saved_esp = ®s->esp;
|
||||
addr = (unsigned long)jprobe_saved_esp;
|
||||
kcb->jprobe_saved_regs = *regs;
|
||||
kcb->jprobe_saved_esp = ®s->esp;
|
||||
addr = (unsigned long)(kcb->jprobe_saved_esp);
|
||||
|
||||
/*
|
||||
* TBD: As Linus pointed out, gcc assumes that the callee
|
||||
|
@ -494,7 +499,8 @@ int __kprobes setjmp_pre_handler(struct kprobe *p, struct pt_regs *regs)
|
|||
* we also save and restore enough stack bytes to cover
|
||||
* the argument area.
|
||||
*/
|
||||
memcpy(jprobes_stack, (kprobe_opcode_t *) addr, MIN_STACK_SIZE(addr));
|
||||
memcpy(kcb->jprobes_stack, (kprobe_opcode_t *)addr,
|
||||
MIN_STACK_SIZE(addr));
|
||||
regs->eflags &= ~IF_MASK;
|
||||
regs->eip = (unsigned long)(jp->entry);
|
||||
return 1;
|
||||
|
@ -502,36 +508,40 @@ int __kprobes setjmp_pre_handler(struct kprobe *p, struct pt_regs *regs)
|
|||
|
||||
void __kprobes jprobe_return(void)
|
||||
{
|
||||
preempt_enable_no_resched();
|
||||
struct kprobe_ctlblk *kcb = get_kprobe_ctlblk();
|
||||
|
||||
asm volatile (" xchgl %%ebx,%%esp \n"
|
||||
" int3 \n"
|
||||
" .globl jprobe_return_end \n"
|
||||
" jprobe_return_end: \n"
|
||||
" nop \n"::"b"
|
||||
(jprobe_saved_esp):"memory");
|
||||
(kcb->jprobe_saved_esp):"memory");
|
||||
}
|
||||
|
||||
int __kprobes longjmp_break_handler(struct kprobe *p, struct pt_regs *regs)
|
||||
{
|
||||
struct kprobe_ctlblk *kcb = get_kprobe_ctlblk();
|
||||
u8 *addr = (u8 *) (regs->eip - 1);
|
||||
unsigned long stack_addr = (unsigned long)jprobe_saved_esp;
|
||||
unsigned long stack_addr = (unsigned long)(kcb->jprobe_saved_esp);
|
||||
struct jprobe *jp = container_of(p, struct jprobe, kp);
|
||||
|
||||
if ((addr > (u8 *) jprobe_return) && (addr < (u8 *) jprobe_return_end)) {
|
||||
if (®s->esp != jprobe_saved_esp) {
|
||||
if (®s->esp != kcb->jprobe_saved_esp) {
|
||||
struct pt_regs *saved_regs =
|
||||
container_of(jprobe_saved_esp, struct pt_regs, esp);
|
||||
container_of(kcb->jprobe_saved_esp,
|
||||
struct pt_regs, esp);
|
||||
printk("current esp %p does not match saved esp %p\n",
|
||||
®s->esp, jprobe_saved_esp);
|
||||
®s->esp, kcb->jprobe_saved_esp);
|
||||
printk("Saved registers for jprobe %p\n", jp);
|
||||
show_registers(saved_regs);
|
||||
printk("Current registers\n");
|
||||
show_registers(regs);
|
||||
BUG();
|
||||
}
|
||||
*regs = jprobe_saved_regs;
|
||||
memcpy((kprobe_opcode_t *) stack_addr, jprobes_stack,
|
||||
*regs = kcb->jprobe_saved_regs;
|
||||
memcpy((kprobe_opcode_t *) stack_addr, kcb->jprobes_stack,
|
||||
MIN_STACK_SIZE(stack_addr));
|
||||
preempt_enable_no_resched();
|
||||
return 1;
|
||||
}
|
||||
return 0;
|
||||
|
|
|
@ -18,6 +18,7 @@
|
|||
#include <asm/system.h>
|
||||
#include <asm/ldt.h>
|
||||
#include <asm/desc.h>
|
||||
#include <asm/mmu_context.h>
|
||||
|
||||
#ifdef CONFIG_SMP /* avoids "defined but not used" warnig */
|
||||
static void flush_ldt(void *null)
|
||||
|
|
|
@ -132,7 +132,7 @@ static struct resource mca_standard_resources[] = {
|
|||
{ .start = 0x100, .end = 0x107, .name = "POS (MCA)" }
|
||||
};
|
||||
|
||||
#define MCA_STANDARD_RESOURCES (sizeof(mca_standard_resources)/sizeof(struct resource))
|
||||
#define MCA_STANDARD_RESOURCES ARRAY_SIZE(mca_standard_resources)
|
||||
|
||||
/**
|
||||
* mca_read_and_store_pos - read the POS registers into a memory buffer
|
||||
|
|
|
@ -354,49 +354,12 @@ ptrace_set_thread_area(struct task_struct *child,
|
|||
return 0;
|
||||
}
|
||||
|
||||
asmlinkage long sys_ptrace(long request, long pid, long addr, long data)
|
||||
long arch_ptrace(struct task_struct *child, long request, long addr, long data)
|
||||
{
|
||||
struct task_struct *child;
|
||||
struct user * dummy = NULL;
|
||||
int i, ret;
|
||||
unsigned long __user *datap = (unsigned long __user *)data;
|
||||
|
||||
lock_kernel();
|
||||
ret = -EPERM;
|
||||
if (request == PTRACE_TRACEME) {
|
||||
/* are we already being traced? */
|
||||
if (current->ptrace & PT_PTRACED)
|
||||
goto out;
|
||||
ret = security_ptrace(current->parent, current);
|
||||
if (ret)
|
||||
goto out;
|
||||
/* set the ptrace bit in the process flags. */
|
||||
current->ptrace |= PT_PTRACED;
|
||||
ret = 0;
|
||||
goto out;
|
||||
}
|
||||
ret = -ESRCH;
|
||||
read_lock(&tasklist_lock);
|
||||
child = find_task_by_pid(pid);
|
||||
if (child)
|
||||
get_task_struct(child);
|
||||
read_unlock(&tasklist_lock);
|
||||
if (!child)
|
||||
goto out;
|
||||
|
||||
ret = -EPERM;
|
||||
if (pid == 1) /* you may not mess with init */
|
||||
goto out_tsk;
|
||||
|
||||
if (request == PTRACE_ATTACH) {
|
||||
ret = ptrace_attach(child);
|
||||
goto out_tsk;
|
||||
}
|
||||
|
||||
ret = ptrace_check_attach(child, request == PTRACE_KILL);
|
||||
if (ret < 0)
|
||||
goto out_tsk;
|
||||
|
||||
switch (request) {
|
||||
/* when I and D space are separate, these will need to be fixed. */
|
||||
case PTRACE_PEEKTEXT: /* read word at location addr. */
|
||||
|
@ -663,10 +626,7 @@ asmlinkage long sys_ptrace(long request, long pid, long addr, long data)
|
|||
ret = ptrace_request(child, request, addr, data);
|
||||
break;
|
||||
}
|
||||
out_tsk:
|
||||
put_task_struct(child);
|
||||
out:
|
||||
unlock_kernel();
|
||||
out_tsk:
|
||||
return ret;
|
||||
}
|
||||
|
||||
|
|
|
@ -10,6 +10,7 @@
|
|||
|
||||
#include <asm/delay.h>
|
||||
#include <linux/pci.h>
|
||||
#include <linux/reboot_fixups.h>
|
||||
|
||||
static void cs5530a_warm_reset(struct pci_dev *dev)
|
||||
{
|
||||
|
@ -42,7 +43,7 @@ void mach_reboot_fixups(void)
|
|||
struct pci_dev *dev;
|
||||
int i;
|
||||
|
||||
for (i=0; i < (sizeof(fixups_table)/sizeof(fixups_table[0])); i++) {
|
||||
for (i=0; i < ARRAY_SIZE(fixups_table); i++) {
|
||||
cur = &(fixups_table[i]);
|
||||
dev = pci_get_device(cur->vendor, cur->device, NULL);
|
||||
if (!dev)
|
||||
|
|
|
@ -12,6 +12,7 @@
|
|||
#include <linux/pci.h>
|
||||
|
||||
#include <linux/scx200.h>
|
||||
#include <linux/scx200_gpio.h>
|
||||
|
||||
/* Verify that the configuration block really is there */
|
||||
#define scx200_cb_probe(base) (inw((base) + SCx200_CBA) == (base))
|
||||
|
|
|
@ -68,11 +68,9 @@ EXPORT_SYMBOL(smp_num_siblings);
|
|||
|
||||
/* Package ID of each logical CPU */
|
||||
int phys_proc_id[NR_CPUS] __read_mostly = {[0 ... NR_CPUS-1] = BAD_APICID};
|
||||
EXPORT_SYMBOL(phys_proc_id);
|
||||
|
||||
/* Core ID of each logical CPU */
|
||||
int cpu_core_id[NR_CPUS] __read_mostly = {[0 ... NR_CPUS-1] = BAD_APICID};
|
||||
EXPORT_SYMBOL(cpu_core_id);
|
||||
|
||||
cpumask_t cpu_sibling_map[NR_CPUS] __read_mostly;
|
||||
EXPORT_SYMBOL(cpu_sibling_map);
|
||||
|
@ -612,7 +610,7 @@ static inline void __inquire_remote_apic(int apicid)
|
|||
|
||||
printk("Inquiring remote APIC #%d...\n", apicid);
|
||||
|
||||
for (i = 0; i < sizeof(regs) / sizeof(*regs); i++) {
|
||||
for (i = 0; i < ARRAY_SIZE(regs); i++) {
|
||||
printk("... APIC #%d %s: ", apicid, names[i]);
|
||||
|
||||
/*
|
||||
|
|
|
@ -1,7 +1,3 @@
|
|||
|
||||
menu "Profiling support"
|
||||
depends on EXPERIMENTAL
|
||||
|
||||
config PROFILING
|
||||
bool "Profiling support (EXPERIMENTAL)"
|
||||
help
|
||||
|
@ -19,5 +15,3 @@ config OPROFILE
|
|||
|
||||
If unsure, say N.
|
||||
|
||||
endmenu
|
||||
|
||||
|
|
|
@ -118,6 +118,7 @@ void __restore_processor_state(struct saved_context *ctxt)
|
|||
fix_processor_context();
|
||||
do_fpu_end();
|
||||
mtrr_ap_init();
|
||||
mcheck_init(&boot_cpu_data);
|
||||
}
|
||||
|
||||
void restore_processor_state(void)
|
||||
|
|
|
@ -191,6 +191,7 @@ config IOSAPIC
|
|||
|
||||
config IA64_SGI_SN_XP
|
||||
tristate "Support communication between SGI SSIs"
|
||||
depends on IA64_GENERIC || IA64_SGI_SN2
|
||||
select IA64_UNCACHED_ALLOCATOR
|
||||
help
|
||||
An SGI machine can be divided into multiple Single System
|
||||
|
@ -426,8 +427,21 @@ config GENERIC_PENDING_IRQ
|
|||
|
||||
source "arch/ia64/hp/sim/Kconfig"
|
||||
|
||||
menu "Instrumentation Support"
|
||||
depends on EXPERIMENTAL
|
||||
|
||||
source "arch/ia64/oprofile/Kconfig"
|
||||
|
||||
config KPROBES
|
||||
bool "Kprobes (EXPERIMENTAL)"
|
||||
help
|
||||
Kprobes allows you to trap at almost any kernel address and
|
||||
execute a callback function. register_kprobe() establishes
|
||||
a probepoint and specifies the callback. Kprobes is useful
|
||||
for kernel debugging, non-intrusive instrumentation and testing.
|
||||
If in doubt, say "N".
|
||||
endmenu
|
||||
|
||||
source "arch/ia64/Kconfig.debug"
|
||||
|
||||
source "security/Kconfig"
|
||||
|
|
|
@ -2,17 +2,6 @@ menu "Kernel hacking"
|
|||
|
||||
source "lib/Kconfig.debug"
|
||||
|
||||
config KPROBES
|
||||
bool "Kprobes"
|
||||
depends on DEBUG_KERNEL
|
||||
help
|
||||
Kprobes allows you to trap at almost any kernel address and
|
||||
execute a callback function. register_kprobe() establishes
|
||||
a probepoint and specifies the callback. Kprobes is useful
|
||||
for kernel debugging, non-intrusive instrumentation and testing.
|
||||
If in doubt, say "N".
|
||||
|
||||
|
||||
choice
|
||||
prompt "Physical memory granularity"
|
||||
default IA64_GRANULE_64MB
|
||||
|
|
|
@ -642,10 +642,8 @@ static void rs_close(struct tty_struct *tty, struct file * filp)
|
|||
info->event = 0;
|
||||
info->tty = 0;
|
||||
if (info->blocked_open) {
|
||||
if (info->close_delay) {
|
||||
current->state = TASK_INTERRUPTIBLE;
|
||||
schedule_timeout(info->close_delay);
|
||||
}
|
||||
if (info->close_delay)
|
||||
schedule_timeout_interruptible(info->close_delay);
|
||||
wake_up_interruptible(&info->open_wait);
|
||||
}
|
||||
info->flags &= ~(ASYNC_NORMAL_ACTIVE|ASYNC_CLOSING);
|
||||
|
|
|
@ -26,7 +26,6 @@
|
|||
#include <linux/config.h>
|
||||
#include <linux/kprobes.h>
|
||||
#include <linux/ptrace.h>
|
||||
#include <linux/spinlock.h>
|
||||
#include <linux/string.h>
|
||||
#include <linux/slab.h>
|
||||
#include <linux/preempt.h>
|
||||
|
@ -38,13 +37,8 @@
|
|||
|
||||
extern void jprobe_inst_return(void);
|
||||
|
||||
/* kprobe_status settings */
|
||||
#define KPROBE_HIT_ACTIVE 0x00000001
|
||||
#define KPROBE_HIT_SS 0x00000002
|
||||
|
||||
static struct kprobe *current_kprobe, *kprobe_prev;
|
||||
static unsigned long kprobe_status, kprobe_status_prev;
|
||||
static struct pt_regs jprobe_saved_regs;
|
||||
DEFINE_PER_CPU(struct kprobe *, current_kprobe) = NULL;
|
||||
DEFINE_PER_CPU(struct kprobe_ctlblk, kprobe_ctlblk);
|
||||
|
||||
enum instruction_type {A, I, M, F, B, L, X, u};
|
||||
static enum instruction_type bundle_encoding[32][3] = {
|
||||
|
@ -313,21 +307,22 @@ static int __kprobes valid_kprobe_addr(int template, int slot,
|
|||
return 0;
|
||||
}
|
||||
|
||||
static inline void save_previous_kprobe(void)
|
||||
static inline void save_previous_kprobe(struct kprobe_ctlblk *kcb)
|
||||
{
|
||||
kprobe_prev = current_kprobe;
|
||||
kprobe_status_prev = kprobe_status;
|
||||
kcb->prev_kprobe.kp = kprobe_running();
|
||||
kcb->prev_kprobe.status = kcb->kprobe_status;
|
||||
}
|
||||
|
||||
static inline void restore_previous_kprobe(void)
|
||||
static inline void restore_previous_kprobe(struct kprobe_ctlblk *kcb)
|
||||
{
|
||||
current_kprobe = kprobe_prev;
|
||||
kprobe_status = kprobe_status_prev;
|
||||
__get_cpu_var(current_kprobe) = kcb->prev_kprobe.kp;
|
||||
kcb->kprobe_status = kcb->prev_kprobe.status;
|
||||
}
|
||||
|
||||
static inline void set_current_kprobe(struct kprobe *p)
|
||||
static inline void set_current_kprobe(struct kprobe *p,
|
||||
struct kprobe_ctlblk *kcb)
|
||||
{
|
||||
current_kprobe = p;
|
||||
__get_cpu_var(current_kprobe) = p;
|
||||
}
|
||||
|
||||
static void kretprobe_trampoline(void)
|
||||
|
@ -347,10 +342,11 @@ int __kprobes trampoline_probe_handler(struct kprobe *p, struct pt_regs *regs)
|
|||
struct kretprobe_instance *ri = NULL;
|
||||
struct hlist_head *head;
|
||||
struct hlist_node *node, *tmp;
|
||||
unsigned long orig_ret_address = 0;
|
||||
unsigned long flags, orig_ret_address = 0;
|
||||
unsigned long trampoline_address =
|
||||
((struct fnptr *)kretprobe_trampoline)->ip;
|
||||
|
||||
spin_lock_irqsave(&kretprobe_lock, flags);
|
||||
head = kretprobe_inst_table_head(current);
|
||||
|
||||
/*
|
||||
|
@ -389,17 +385,19 @@ int __kprobes trampoline_probe_handler(struct kprobe *p, struct pt_regs *regs)
|
|||
BUG_ON(!orig_ret_address || (orig_ret_address == trampoline_address));
|
||||
regs->cr_iip = orig_ret_address;
|
||||
|
||||
unlock_kprobes();
|
||||
reset_current_kprobe();
|
||||
spin_unlock_irqrestore(&kretprobe_lock, flags);
|
||||
preempt_enable_no_resched();
|
||||
|
||||
/*
|
||||
* By returning a non-zero value, we are telling
|
||||
* kprobe_handler() that we have handled unlocking
|
||||
* and re-enabling preemption.
|
||||
*/
|
||||
/*
|
||||
* By returning a non-zero value, we are telling
|
||||
* kprobe_handler() that we don't want the post_handler
|
||||
* to run (and have re-enabled preemption)
|
||||
*/
|
||||
return 1;
|
||||
}
|
||||
|
||||
/* Called with kretprobe_lock held */
|
||||
void __kprobes arch_prepare_kretprobe(struct kretprobe *rp,
|
||||
struct pt_regs *regs)
|
||||
{
|
||||
|
@ -606,17 +604,22 @@ static int __kprobes pre_kprobes_handler(struct die_args *args)
|
|||
int ret = 0;
|
||||
struct pt_regs *regs = args->regs;
|
||||
kprobe_opcode_t *addr = (kprobe_opcode_t *)instruction_pointer(regs);
|
||||
struct kprobe_ctlblk *kcb;
|
||||
|
||||
/*
|
||||
* We don't want to be preempted for the entire
|
||||
* duration of kprobe processing
|
||||
*/
|
||||
preempt_disable();
|
||||
kcb = get_kprobe_ctlblk();
|
||||
|
||||
/* Handle recursion cases */
|
||||
if (kprobe_running()) {
|
||||
p = get_kprobe(addr);
|
||||
if (p) {
|
||||
if ( (kprobe_status == KPROBE_HIT_SS) &&
|
||||
if ((kcb->kprobe_status == KPROBE_HIT_SS) &&
|
||||
(p->ainsn.inst_flag == INST_FLAG_BREAK_INST)) {
|
||||
ia64_psr(regs)->ss = 0;
|
||||
unlock_kprobes();
|
||||
goto no_kprobe;
|
||||
}
|
||||
/* We have reentered the pre_kprobe_handler(), since
|
||||
|
@ -625,17 +628,17 @@ static int __kprobes pre_kprobes_handler(struct die_args *args)
|
|||
* just single step on the instruction of the new probe
|
||||
* without calling any user handlers.
|
||||
*/
|
||||
save_previous_kprobe();
|
||||
set_current_kprobe(p);
|
||||
save_previous_kprobe(kcb);
|
||||
set_current_kprobe(p, kcb);
|
||||
p->nmissed++;
|
||||
prepare_ss(p, regs);
|
||||
kprobe_status = KPROBE_REENTER;
|
||||
kcb->kprobe_status = KPROBE_REENTER;
|
||||
return 1;
|
||||
} else if (args->err == __IA64_BREAK_JPROBE) {
|
||||
/*
|
||||
* jprobe instrumented function just completed
|
||||
*/
|
||||
p = current_kprobe;
|
||||
p = __get_cpu_var(current_kprobe);
|
||||
if (p->break_handler && p->break_handler(p, regs)) {
|
||||
goto ss_probe;
|
||||
}
|
||||
|
@ -645,10 +648,8 @@ static int __kprobes pre_kprobes_handler(struct die_args *args)
|
|||
}
|
||||
}
|
||||
|
||||
lock_kprobes();
|
||||
p = get_kprobe(addr);
|
||||
if (!p) {
|
||||
unlock_kprobes();
|
||||
if (!is_ia64_break_inst(regs)) {
|
||||
/*
|
||||
* The breakpoint instruction was removed right
|
||||
|
@ -665,8 +666,8 @@ static int __kprobes pre_kprobes_handler(struct die_args *args)
|
|||
goto no_kprobe;
|
||||
}
|
||||
|
||||
kprobe_status = KPROBE_HIT_ACTIVE;
|
||||
set_current_kprobe(p);
|
||||
set_current_kprobe(p, kcb);
|
||||
kcb->kprobe_status = KPROBE_HIT_ACTIVE;
|
||||
|
||||
if (p->pre_handler && p->pre_handler(p, regs))
|
||||
/*
|
||||
|
@ -678,7 +679,7 @@ static int __kprobes pre_kprobes_handler(struct die_args *args)
|
|||
|
||||
ss_probe:
|
||||
prepare_ss(p, regs);
|
||||
kprobe_status = KPROBE_HIT_SS;
|
||||
kcb->kprobe_status = KPROBE_HIT_SS;
|
||||
return 1;
|
||||
|
||||
no_kprobe:
|
||||
|
@ -688,23 +689,25 @@ no_kprobe:
|
|||
|
||||
static int __kprobes post_kprobes_handler(struct pt_regs *regs)
|
||||
{
|
||||
if (!kprobe_running())
|
||||
struct kprobe *cur = kprobe_running();
|
||||
struct kprobe_ctlblk *kcb = get_kprobe_ctlblk();
|
||||
|
||||
if (!cur)
|
||||
return 0;
|
||||
|
||||
if ((kprobe_status != KPROBE_REENTER) && current_kprobe->post_handler) {
|
||||
kprobe_status = KPROBE_HIT_SSDONE;
|
||||
current_kprobe->post_handler(current_kprobe, regs, 0);
|
||||
if ((kcb->kprobe_status != KPROBE_REENTER) && cur->post_handler) {
|
||||
kcb->kprobe_status = KPROBE_HIT_SSDONE;
|
||||
cur->post_handler(cur, regs, 0);
|
||||
}
|
||||
|
||||
resume_execution(current_kprobe, regs);
|
||||
resume_execution(cur, regs);
|
||||
|
||||
/*Restore back the original saved kprobes variables and continue. */
|
||||
if (kprobe_status == KPROBE_REENTER) {
|
||||
restore_previous_kprobe();
|
||||
if (kcb->kprobe_status == KPROBE_REENTER) {
|
||||
restore_previous_kprobe(kcb);
|
||||
goto out;
|
||||
}
|
||||
|
||||
unlock_kprobes();
|
||||
reset_current_kprobe();
|
||||
|
||||
out:
|
||||
preempt_enable_no_resched();
|
||||
|
@ -713,16 +716,15 @@ out:
|
|||
|
||||
static int __kprobes kprobes_fault_handler(struct pt_regs *regs, int trapnr)
|
||||
{
|
||||
if (!kprobe_running())
|
||||
return 0;
|
||||
struct kprobe *cur = kprobe_running();
|
||||
struct kprobe_ctlblk *kcb = get_kprobe_ctlblk();
|
||||
|
||||
if (current_kprobe->fault_handler &&
|
||||
current_kprobe->fault_handler(current_kprobe, regs, trapnr))
|
||||
if (cur->fault_handler && cur->fault_handler(cur, regs, trapnr))
|
||||
return 1;
|
||||
|
||||
if (kprobe_status & KPROBE_HIT_SS) {
|
||||
resume_execution(current_kprobe, regs);
|
||||
unlock_kprobes();
|
||||
if (kcb->kprobe_status & KPROBE_HIT_SS) {
|
||||
resume_execution(cur, regs);
|
||||
reset_current_kprobe();
|
||||
preempt_enable_no_resched();
|
||||
}
|
||||
|
||||
|
@ -733,31 +735,38 @@ int __kprobes kprobe_exceptions_notify(struct notifier_block *self,
|
|||
unsigned long val, void *data)
|
||||
{
|
||||
struct die_args *args = (struct die_args *)data;
|
||||
int ret = NOTIFY_DONE;
|
||||
|
||||
switch(val) {
|
||||
case DIE_BREAK:
|
||||
if (pre_kprobes_handler(args))
|
||||
return NOTIFY_STOP;
|
||||
ret = NOTIFY_STOP;
|
||||
break;
|
||||
case DIE_SS:
|
||||
if (post_kprobes_handler(args->regs))
|
||||
return NOTIFY_STOP;
|
||||
ret = NOTIFY_STOP;
|
||||
break;
|
||||
case DIE_PAGE_FAULT:
|
||||
if (kprobes_fault_handler(args->regs, args->trapnr))
|
||||
return NOTIFY_STOP;
|
||||
/* kprobe_running() needs smp_processor_id() */
|
||||
preempt_disable();
|
||||
if (kprobe_running() &&
|
||||
kprobes_fault_handler(args->regs, args->trapnr))
|
||||
ret = NOTIFY_STOP;
|
||||
preempt_enable();
|
||||
default:
|
||||
break;
|
||||
}
|
||||
return NOTIFY_DONE;
|
||||
return ret;
|
||||
}
|
||||
|
||||
int __kprobes setjmp_pre_handler(struct kprobe *p, struct pt_regs *regs)
|
||||
{
|
||||
struct jprobe *jp = container_of(p, struct jprobe, kp);
|
||||
unsigned long addr = ((struct fnptr *)(jp->entry))->ip;
|
||||
struct kprobe_ctlblk *kcb = get_kprobe_ctlblk();
|
||||
|
||||
/* save architectural state */
|
||||
jprobe_saved_regs = *regs;
|
||||
kcb->jprobe_saved_regs = *regs;
|
||||
|
||||
/* after rfi, execute the jprobe instrumented function */
|
||||
regs->cr_iip = addr & ~0xFULL;
|
||||
|
@ -775,7 +784,10 @@ int __kprobes setjmp_pre_handler(struct kprobe *p, struct pt_regs *regs)
|
|||
|
||||
int __kprobes longjmp_break_handler(struct kprobe *p, struct pt_regs *regs)
|
||||
{
|
||||
*regs = jprobe_saved_regs;
|
||||
struct kprobe_ctlblk *kcb = get_kprobe_ctlblk();
|
||||
|
||||
*regs = kcb->jprobe_saved_regs;
|
||||
preempt_enable_no_resched();
|
||||
return 1;
|
||||
}
|
||||
|
||||
|
|
|
@ -4940,7 +4940,7 @@ abort_locked:
|
|||
if (call_made && PFM_CMD_RW_ARG(cmd) && copy_to_user(arg, args_k, base_sz*count)) ret = -EFAULT;
|
||||
|
||||
error_args:
|
||||
if (args_k) kfree(args_k);
|
||||
kfree(args_k);
|
||||
|
||||
DPRINT(("cmd=%s ret=%ld\n", PFM_CMD_NAME(cmd), ret));
|
||||
|
||||
|
|
|
@ -92,6 +92,13 @@ extern void efi_initialize_iomem_resources(struct resource *,
|
|||
extern char _text[], _end[], _etext[];
|
||||
|
||||
unsigned long ia64_max_cacheline_size;
|
||||
|
||||
int dma_get_cache_alignment(void)
|
||||
{
|
||||
return ia64_max_cacheline_size;
|
||||
}
|
||||
EXPORT_SYMBOL(dma_get_cache_alignment);
|
||||
|
||||
unsigned long ia64_iobase; /* virtual address for I/O accesses */
|
||||
EXPORT_SYMBOL(ia64_iobase);
|
||||
struct io_space io_space[MAX_IO_SPACES];
|
||||
|
|
|
@ -1,7 +1,3 @@
|
|||
|
||||
menu "Profiling support"
|
||||
depends on EXPERIMENTAL
|
||||
|
||||
config PROFILING
|
||||
bool "Profiling support (EXPERIMENTAL)"
|
||||
help
|
||||
|
@ -22,5 +18,3 @@ config OPROFILE
|
|||
|
||||
If unsure, say N.
|
||||
|
||||
endmenu
|
||||
|
||||
|
|
|
@ -212,10 +212,8 @@ int atari_tt_hwclk( int op, struct rtc_time *t )
|
|||
* additionally the RTC_SET bit is set to prevent an update cycle.
|
||||
*/
|
||||
|
||||
while( RTC_READ(RTC_FREQ_SELECT) & RTC_UIP ) {
|
||||
current->state = TASK_INTERRUPTIBLE;
|
||||
schedule_timeout(HWCLK_POLL_INTERVAL);
|
||||
}
|
||||
while( RTC_READ(RTC_FREQ_SELECT) & RTC_UIP )
|
||||
schedule_timeout_interruptible(HWCLK_POLL_INTERVAL);
|
||||
|
||||
local_irq_save(flags);
|
||||
RTC_WRITE( RTC_CONTROL, ctrl | RTC_SET );
|
||||
|
|
|
@ -121,48 +121,11 @@ void ptrace_disable(struct task_struct *child)
|
|||
child->thread.work.syscall_trace = 0;
|
||||
}
|
||||
|
||||
asmlinkage long sys_ptrace(long request, long pid, long addr, long data)
|
||||
long arch_ptrace(struct task_struct *child, long request, long addr, long data)
|
||||
{
|
||||
struct task_struct *child;
|
||||
unsigned long tmp;
|
||||
int i, ret = 0;
|
||||
|
||||
lock_kernel();
|
||||
if (request == PTRACE_TRACEME) {
|
||||
/* are we already being traced? */
|
||||
if (current->ptrace & PT_PTRACED) {
|
||||
ret = -EPERM;
|
||||
goto out;
|
||||
}
|
||||
/* set the ptrace bit in the process flags. */
|
||||
current->ptrace |= PT_PTRACED;
|
||||
goto out;
|
||||
}
|
||||
read_lock(&tasklist_lock);
|
||||
child = find_task_by_pid(pid);
|
||||
if (child)
|
||||
get_task_struct(child);
|
||||
read_unlock(&tasklist_lock);
|
||||
if (unlikely(!child)) {
|
||||
ret = -ESRCH;
|
||||
goto out;
|
||||
}
|
||||
|
||||
/* you may not mess with init */
|
||||
if (unlikely(pid == 1)) {
|
||||
ret = -EPERM;
|
||||
goto out_tsk;
|
||||
}
|
||||
|
||||
if (request == PTRACE_ATTACH) {
|
||||
ret = ptrace_attach(child);
|
||||
goto out_tsk;
|
||||
}
|
||||
|
||||
ret = ptrace_check_attach(child, request == PTRACE_KILL);
|
||||
if (ret)
|
||||
goto out_tsk;
|
||||
|
||||
switch (request) {
|
||||
/* when I and D space are separate, these will need to be fixed. */
|
||||
case PTRACE_PEEKTEXT: /* read word at location addr. */
|
||||
|
@ -317,14 +280,10 @@ asmlinkage long sys_ptrace(long request, long pid, long addr, long data)
|
|||
ret = ptrace_request(child, request, addr, data);
|
||||
break;
|
||||
}
|
||||
out_tsk:
|
||||
put_task_struct(child);
|
||||
out:
|
||||
unlock_kernel();
|
||||
|
||||
return ret;
|
||||
out_eio:
|
||||
ret = -EIO;
|
||||
goto out_tsk;
|
||||
return -EIO;
|
||||
}
|
||||
|
||||
asmlinkage void syscall_trace(void)
|
||||
|
|
|
@ -71,6 +71,11 @@ config M5206e
|
|||
help
|
||||
Motorola ColdFire 5206e processor support.
|
||||
|
||||
config M520x
|
||||
bool "MCF520x"
|
||||
help
|
||||
Freescale Coldfire 5207/5208 processor support.
|
||||
|
||||
config M523x
|
||||
bool "MCF523x"
|
||||
help
|
||||
|
@ -120,7 +125,7 @@ config M527x
|
|||
|
||||
config COLDFIRE
|
||||
bool
|
||||
depends on (M5206 || M5206e || M523x || M5249 || M527x || M5272 || M528x || M5307 || M5407)
|
||||
depends on (M5206 || M5206e || M520x || M523x || M5249 || M527x || M5272 || M528x || M5307 || M5407)
|
||||
default y
|
||||
|
||||
choice
|
||||
|
@ -322,6 +327,12 @@ config ELITE
|
|||
help
|
||||
Support for the Motorola M5206eLITE board.
|
||||
|
||||
config M5208EVB
|
||||
bool "Freescale M5208EVB board support"
|
||||
depends on M520x
|
||||
help
|
||||
Support for the Freescale Coldfire M5208EVB.
|
||||
|
||||
config M5235EVB
|
||||
bool "Freescale M5235EVB support"
|
||||
depends on M523x
|
||||
|
@ -465,10 +476,10 @@ config ARNEWSH
|
|||
default y
|
||||
depends on (ARN5206 || ARN5307)
|
||||
|
||||
config MOTOROLA
|
||||
config FREESCALE
|
||||
bool
|
||||
default y
|
||||
depends on (M5206eC3 || M5235EVB || M5249C3 || M5271EVB || M5272C3 || M5275EVB || M5282EVB || M5307C3 || M5407C3)
|
||||
depends on (M5206eC3 || M5208EVB || M5235EVB || M5249C3 || M5271EVB || M5272C3 || M5275EVB || M5282EVB || M5307C3 || M5407C3)
|
||||
|
||||
config HW_FEITH
|
||||
bool
|
||||
|
|
|
@ -14,6 +14,7 @@ platform-$(CONFIG_M68VZ328) := 68VZ328
|
|||
platform-$(CONFIG_M68360) := 68360
|
||||
platform-$(CONFIG_M5206) := 5206
|
||||
platform-$(CONFIG_M5206e) := 5206e
|
||||
platform-$(CONFIG_M520x) := 520x
|
||||
platform-$(CONFIG_M523x) := 523x
|
||||
platform-$(CONFIG_M5249) := 5249
|
||||
platform-$(CONFIG_M527x) := 527x
|
||||
|
@ -29,7 +30,7 @@ board-$(CONFIG_UCDIMM) := ucdimm
|
|||
board-$(CONFIG_UCQUICC) := uCquicc
|
||||
board-$(CONFIG_DRAGEN2) := de2
|
||||
board-$(CONFIG_ARNEWSH) := ARNEWSH
|
||||
board-$(CONFIG_MOTOROLA) := MOTOROLA
|
||||
board-$(CONFIG_FREESCALE) := FREESCALE
|
||||
board-$(CONFIG_M5235EVB) := M5235EVB
|
||||
board-$(CONFIG_M5271EVB) := M5271EVB
|
||||
board-$(CONFIG_M5275EVB) := M5275EVB
|
||||
|
@ -41,6 +42,7 @@ board-$(CONFIG_SECUREEDGEMP3) := MP3
|
|||
board-$(CONFIG_CLEOPATRA) := CLEOPATRA
|
||||
board-$(CONFIG_senTec) := senTec
|
||||
board-$(CONFIG_SNEHA) := SNEHA
|
||||
board-$(CONFIG_M5208EVB) := M5208EVB
|
||||
board-$(CONFIG_MOD5272) := MOD5272
|
||||
BOARD := $(board-y)
|
||||
|
||||
|
@ -56,6 +58,7 @@ MODEL := $(model-y)
|
|||
#
|
||||
cpuclass-$(CONFIG_M5206) := 5307
|
||||
cpuclass-$(CONFIG_M5206e) := 5307
|
||||
cpuclass-$(CONFIG_M520x) := 5307
|
||||
cpuclass-$(CONFIG_M523x) := 5307
|
||||
cpuclass-$(CONFIG_M5249) := 5307
|
||||
cpuclass-$(CONFIG_M527x) := 5307
|
||||
|
@ -80,6 +83,7 @@ export PLATFORM BOARD MODEL CPUCLASS
|
|||
#
|
||||
cflags-$(CONFIG_M5206) := -m5200 -Wa,-S -Wa,-m5200
|
||||
cflags-$(CONFIG_M5206e) := -m5200 -Wa,-S -Wa,-m5200
|
||||
cflags-$(CONFIG_M520x) := -m5307 -Wa,-S -Wa,-m5307
|
||||
cflags-$(CONFIG_M523x) := -m5307 -Wa,-S -Wa,-m5307
|
||||
cflags-$(CONFIG_M5249) := -m5200 -Wa,-S -Wa,-m5200
|
||||
cflags-$(CONFIG_M527x) := -m5307 -Wa,-S -Wa,-m5307
|
||||
|
@ -95,7 +99,6 @@ cflags-$(CONFIG_M68360) := -m68332
|
|||
AFLAGS += $(cflags-y)
|
||||
|
||||
CFLAGS += $(cflags-y)
|
||||
CFLAGS += -fno-builtin
|
||||
CFLAGS += -O1 -g
|
||||
CFLAGS += -D__linux__
|
||||
CFLAGS += -DUTS_SYSNAME=\"uClinux\"
|
||||
|
|
|
@ -15,6 +15,7 @@
|
|||
#include <linux/hardirq.h>
|
||||
#include <asm/bootinfo.h>
|
||||
#include <asm/irq.h>
|
||||
#include <asm/irqnode.h>
|
||||
#include <asm/thread_info.h>
|
||||
|
||||
#define DEFINE(sym, val) \
|
||||
|
|
|
@ -101,43 +101,10 @@ void ptrace_disable(struct task_struct *child)
|
|||
put_reg(child, PT_SR, tmp);
|
||||
}
|
||||
|
||||
asmlinkage long sys_ptrace(long request, long pid, long addr, long data)
|
||||
long arch_ptrace(truct task_struct *child, long request, long addr, long data)
|
||||
{
|
||||
struct task_struct *child;
|
||||
int ret;
|
||||
|
||||
lock_kernel();
|
||||
ret = -EPERM;
|
||||
if (request == PTRACE_TRACEME) {
|
||||
/* are we already being traced? */
|
||||
if (current->ptrace & PT_PTRACED)
|
||||
goto out;
|
||||
/* set the ptrace bit in the process flags. */
|
||||
current->ptrace |= PT_PTRACED;
|
||||
ret = 0;
|
||||
goto out;
|
||||
}
|
||||
ret = -ESRCH;
|
||||
read_lock(&tasklist_lock);
|
||||
child = find_task_by_pid(pid);
|
||||
if (child)
|
||||
get_task_struct(child);
|
||||
read_unlock(&tasklist_lock);
|
||||
if (!child)
|
||||
goto out;
|
||||
|
||||
ret = -EPERM;
|
||||
if (pid == 1) /* you may not mess with init */
|
||||
goto out_tsk;
|
||||
|
||||
if (request == PTRACE_ATTACH) {
|
||||
ret = ptrace_attach(child);
|
||||
goto out_tsk;
|
||||
}
|
||||
ret = ptrace_check_attach(child, request == PTRACE_KILL);
|
||||
if (ret < 0)
|
||||
goto out_tsk;
|
||||
|
||||
switch (request) {
|
||||
/* when I and D space are separate, these will need to be fixed. */
|
||||
case PTRACE_PEEKTEXT: /* read word at location addr. */
|
||||
|
@ -357,10 +324,6 @@ asmlinkage long sys_ptrace(long request, long pid, long addr, long data)
|
|||
ret = -EIO;
|
||||
break;
|
||||
}
|
||||
out_tsk:
|
||||
put_task_struct(child);
|
||||
out:
|
||||
unlock_kernel();
|
||||
return ret;
|
||||
}
|
||||
|
||||
|
|
|
@ -107,6 +107,9 @@ void (*mach_power_off)( void ) = NULL;
|
|||
#if defined(CONFIG_M5206e)
|
||||
#define CPU "COLDFIRE(m5206e)"
|
||||
#endif
|
||||
#if defined(CONFIG_M520x)
|
||||
#define CPU "COLDFIRE(m520x)"
|
||||
#endif
|
||||
#if defined(CONFIG_M523x)
|
||||
#define CPU "COLDFIRE(m523x)"
|
||||
#endif
|
||||
|
@ -132,7 +135,7 @@ void (*mach_power_off)( void ) = NULL;
|
|||
#define CPU "COLDFIRE(m5407)"
|
||||
#endif
|
||||
#ifndef CPU
|
||||
#define CPU "UNKOWN"
|
||||
#define CPU "UNKNOWN"
|
||||
#endif
|
||||
|
||||
/* (es) */
|
||||
|
|
|
@ -124,6 +124,14 @@
|
|||
#define RAM_LENGTH 0x3e0000
|
||||
#endif
|
||||
|
||||
/*
|
||||
* The Freescale 5208EVB board has 32MB of RAM.
|
||||
*/
|
||||
#if defined(CONFIG_M5208EVB)
|
||||
#define RAM_START 0x40020000
|
||||
#define RAM_LENGTH 0x01e00000
|
||||
#endif
|
||||
|
||||
/*
|
||||
* The senTec COBRA5272 board has nearly the same memory layout as
|
||||
* the M5272C3. We assume 16MiB ram.
|
||||
|
@ -275,6 +283,7 @@ SECTIONS {
|
|||
*(__ksymtab_strings)
|
||||
|
||||
/* Built-in module parameters */
|
||||
. = ALIGN(4) ;
|
||||
__start___param = .;
|
||||
*(__param)
|
||||
__stop___param = .;
|
||||
|
|
Some files were not shown because too many files have changed in this diff Show More
Loading…
Reference in New Issue