Merge branches 'topic/fix/asoc', 'topic/fix/hda', 'topic/fix/misc' and 'topic/pci-ioremap-bar' into for-linus
This commit is contained in:
commit
0a9b86381c
1
.mailmap
1
.mailmap
|
@ -66,6 +66,7 @@ Kenneth W Chen <kenneth.w.chen@intel.com>
|
||||||
Koushik <raghavendra.koushik@neterion.com>
|
Koushik <raghavendra.koushik@neterion.com>
|
||||||
Leonid I Ananiev <leonid.i.ananiev@intel.com>
|
Leonid I Ananiev <leonid.i.ananiev@intel.com>
|
||||||
Linas Vepstas <linas@austin.ibm.com>
|
Linas Vepstas <linas@austin.ibm.com>
|
||||||
|
Mark Brown <broonie@sirena.org.uk>
|
||||||
Matthieu CASTET <castet.matthieu@free.fr>
|
Matthieu CASTET <castet.matthieu@free.fr>
|
||||||
Michael Buesch <mb@bu3sch.de>
|
Michael Buesch <mb@bu3sch.de>
|
||||||
Michael Buesch <mbuesch@freenet.de>
|
Michael Buesch <mbuesch@freenet.de>
|
||||||
|
|
12
CREDITS
12
CREDITS
|
@ -1653,14 +1653,14 @@ S: Chapel Hill, North Carolina 27514-4818
|
||||||
S: USA
|
S: USA
|
||||||
|
|
||||||
N: Dave Jones
|
N: Dave Jones
|
||||||
E: davej@codemonkey.org.uk
|
E: davej@redhat.com
|
||||||
W: http://www.codemonkey.org.uk
|
W: http://www.codemonkey.org.uk
|
||||||
D: x86 errata/setup maintenance.
|
D: Assorted VIA x86 support.
|
||||||
D: AGPGART driver.
|
D: 2.5 AGPGART overhaul.
|
||||||
D: CPUFREQ maintenance.
|
D: CPUFREQ maintenance.
|
||||||
D: Backport/Forwardport merge monkey.
|
D: Fedora kernel maintainence.
|
||||||
D: Various Janitor work.
|
D: Misc/Other.
|
||||||
S: United Kingdom
|
S: 314 Littleton Rd, Westford, MA 01886, USA
|
||||||
|
|
||||||
N: Martin Josfsson
|
N: Martin Josfsson
|
||||||
E: gandalf@wlug.westbo.se
|
E: gandalf@wlug.westbo.se
|
||||||
|
|
|
@ -1105,7 +1105,7 @@ static struct block_device_operations opt_fops = {
|
||||||
</listitem>
|
</listitem>
|
||||||
<listitem>
|
<listitem>
|
||||||
<para>
|
<para>
|
||||||
Function names as strings (__FUNCTION__).
|
Function names as strings (__func__).
|
||||||
</para>
|
</para>
|
||||||
</listitem>
|
</listitem>
|
||||||
<listitem>
|
<listitem>
|
||||||
|
|
|
@ -236,10 +236,8 @@ software system can set different pages for controlling accesses to the
|
||||||
MSI-X structure. The implementation of MSI support requires the PCI
|
MSI-X structure. The implementation of MSI support requires the PCI
|
||||||
subsystem, not a device driver, to maintain full control of the MSI-X
|
subsystem, not a device driver, to maintain full control of the MSI-X
|
||||||
table/MSI-X PBA (Pending Bit Array) and MMIO address space of the MSI-X
|
table/MSI-X PBA (Pending Bit Array) and MMIO address space of the MSI-X
|
||||||
table/MSI-X PBA. A device driver is prohibited from requesting the MMIO
|
table/MSI-X PBA. A device driver should not access the MMIO address
|
||||||
address space of the MSI-X table/MSI-X PBA. Otherwise, the PCI subsystem
|
space of the MSI-X table/MSI-X PBA.
|
||||||
will fail enabling MSI-X on its hardware device when it calls the function
|
|
||||||
pci_enable_msix().
|
|
||||||
|
|
||||||
5.3.2 API pci_enable_msix
|
5.3.2 API pci_enable_msix
|
||||||
|
|
||||||
|
|
|
@ -163,6 +163,10 @@ need pass only as many optional fields as necessary:
|
||||||
o class and classmask fields default to 0
|
o class and classmask fields default to 0
|
||||||
o driver_data defaults to 0UL.
|
o driver_data defaults to 0UL.
|
||||||
|
|
||||||
|
Note that driver_data must match the value used by any of the pci_device_id
|
||||||
|
entries defined in the driver. This makes the driver_data field mandatory
|
||||||
|
if all the pci_device_id entries have a non-zero driver_data value.
|
||||||
|
|
||||||
Once added, the driver probe routine will be invoked for any unclaimed
|
Once added, the driver probe routine will be invoked for any unclaimed
|
||||||
PCI devices listed in its (newly updated) pci_ids list.
|
PCI devices listed in its (newly updated) pci_ids list.
|
||||||
|
|
||||||
|
|
|
@ -203,22 +203,17 @@ to mmio_enabled.
|
||||||
|
|
||||||
3.3 helper functions
|
3.3 helper functions
|
||||||
|
|
||||||
3.3.1 int pci_find_aer_capability(struct pci_dev *dev);
|
3.3.1 int pci_enable_pcie_error_reporting(struct pci_dev *dev);
|
||||||
pci_find_aer_capability locates the PCI Express AER capability
|
|
||||||
in the device configuration space. If the device doesn't support
|
|
||||||
PCI-Express AER, the function returns 0.
|
|
||||||
|
|
||||||
3.3.2 int pci_enable_pcie_error_reporting(struct pci_dev *dev);
|
|
||||||
pci_enable_pcie_error_reporting enables the device to send error
|
pci_enable_pcie_error_reporting enables the device to send error
|
||||||
messages to root port when an error is detected. Note that devices
|
messages to root port when an error is detected. Note that devices
|
||||||
don't enable the error reporting by default, so device drivers need
|
don't enable the error reporting by default, so device drivers need
|
||||||
call this function to enable it.
|
call this function to enable it.
|
||||||
|
|
||||||
3.3.3 int pci_disable_pcie_error_reporting(struct pci_dev *dev);
|
3.3.2 int pci_disable_pcie_error_reporting(struct pci_dev *dev);
|
||||||
pci_disable_pcie_error_reporting disables the device to send error
|
pci_disable_pcie_error_reporting disables the device to send error
|
||||||
messages to root port when an error is detected.
|
messages to root port when an error is detected.
|
||||||
|
|
||||||
3.3.4 int pci_cleanup_aer_uncorrect_error_status(struct pci_dev *dev);
|
3.3.3 int pci_cleanup_aer_uncorrect_error_status(struct pci_dev *dev);
|
||||||
pci_cleanup_aer_uncorrect_error_status cleanups the uncorrectable
|
pci_cleanup_aer_uncorrect_error_status cleanups the uncorrectable
|
||||||
error status register.
|
error status register.
|
||||||
|
|
||||||
|
|
|
@ -0,0 +1,99 @@
|
||||||
|
The cgroup freezer is useful to batch job management system which start
|
||||||
|
and stop sets of tasks in order to schedule the resources of a machine
|
||||||
|
according to the desires of a system administrator. This sort of program
|
||||||
|
is often used on HPC clusters to schedule access to the cluster as a
|
||||||
|
whole. The cgroup freezer uses cgroups to describe the set of tasks to
|
||||||
|
be started/stopped by the batch job management system. It also provides
|
||||||
|
a means to start and stop the tasks composing the job.
|
||||||
|
|
||||||
|
The cgroup freezer will also be useful for checkpointing running groups
|
||||||
|
of tasks. The freezer allows the checkpoint code to obtain a consistent
|
||||||
|
image of the tasks by attempting to force the tasks in a cgroup into a
|
||||||
|
quiescent state. Once the tasks are quiescent another task can
|
||||||
|
walk /proc or invoke a kernel interface to gather information about the
|
||||||
|
quiesced tasks. Checkpointed tasks can be restarted later should a
|
||||||
|
recoverable error occur. This also allows the checkpointed tasks to be
|
||||||
|
migrated between nodes in a cluster by copying the gathered information
|
||||||
|
to another node and restarting the tasks there.
|
||||||
|
|
||||||
|
Sequences of SIGSTOP and SIGCONT are not always sufficient for stopping
|
||||||
|
and resuming tasks in userspace. Both of these signals are observable
|
||||||
|
from within the tasks we wish to freeze. While SIGSTOP cannot be caught,
|
||||||
|
blocked, or ignored it can be seen by waiting or ptracing parent tasks.
|
||||||
|
SIGCONT is especially unsuitable since it can be caught by the task. Any
|
||||||
|
programs designed to watch for SIGSTOP and SIGCONT could be broken by
|
||||||
|
attempting to use SIGSTOP and SIGCONT to stop and resume tasks. We can
|
||||||
|
demonstrate this problem using nested bash shells:
|
||||||
|
|
||||||
|
$ echo $$
|
||||||
|
16644
|
||||||
|
$ bash
|
||||||
|
$ echo $$
|
||||||
|
16690
|
||||||
|
|
||||||
|
From a second, unrelated bash shell:
|
||||||
|
$ kill -SIGSTOP 16690
|
||||||
|
$ kill -SIGCONT 16990
|
||||||
|
|
||||||
|
<at this point 16990 exits and causes 16644 to exit too>
|
||||||
|
|
||||||
|
This happens because bash can observe both signals and choose how it
|
||||||
|
responds to them.
|
||||||
|
|
||||||
|
Another example of a program which catches and responds to these
|
||||||
|
signals is gdb. In fact any program designed to use ptrace is likely to
|
||||||
|
have a problem with this method of stopping and resuming tasks.
|
||||||
|
|
||||||
|
In contrast, the cgroup freezer uses the kernel freezer code to
|
||||||
|
prevent the freeze/unfreeze cycle from becoming visible to the tasks
|
||||||
|
being frozen. This allows the bash example above and gdb to run as
|
||||||
|
expected.
|
||||||
|
|
||||||
|
The freezer subsystem in the container filesystem defines a file named
|
||||||
|
freezer.state. Writing "FROZEN" to the state file will freeze all tasks in the
|
||||||
|
cgroup. Subsequently writing "THAWED" will unfreeze the tasks in the cgroup.
|
||||||
|
Reading will return the current state.
|
||||||
|
|
||||||
|
* Examples of usage :
|
||||||
|
|
||||||
|
# mkdir /containers/freezer
|
||||||
|
# mount -t cgroup -ofreezer freezer /containers
|
||||||
|
# mkdir /containers/0
|
||||||
|
# echo $some_pid > /containers/0/tasks
|
||||||
|
|
||||||
|
to get status of the freezer subsystem :
|
||||||
|
|
||||||
|
# cat /containers/0/freezer.state
|
||||||
|
THAWED
|
||||||
|
|
||||||
|
to freeze all tasks in the container :
|
||||||
|
|
||||||
|
# echo FROZEN > /containers/0/freezer.state
|
||||||
|
# cat /containers/0/freezer.state
|
||||||
|
FREEZING
|
||||||
|
# cat /containers/0/freezer.state
|
||||||
|
FROZEN
|
||||||
|
|
||||||
|
to unfreeze all tasks in the container :
|
||||||
|
|
||||||
|
# echo THAWED > /containers/0/freezer.state
|
||||||
|
# cat /containers/0/freezer.state
|
||||||
|
THAWED
|
||||||
|
|
||||||
|
This is the basic mechanism which should do the right thing for user space task
|
||||||
|
in a simple scenario.
|
||||||
|
|
||||||
|
It's important to note that freezing can be incomplete. In that case we return
|
||||||
|
EBUSY. This means that some tasks in the cgroup are busy doing something that
|
||||||
|
prevents us from completely freezing the cgroup at this time. After EBUSY,
|
||||||
|
the cgroup will remain partially frozen -- reflected by freezer.state reporting
|
||||||
|
"FREEZING" when read. The state will remain "FREEZING" until one of these
|
||||||
|
things happens:
|
||||||
|
|
||||||
|
1) Userspace cancels the freezing operation by writing "THAWED" to
|
||||||
|
the freezer.state file
|
||||||
|
2) Userspace retries the freezing operation by writing "FROZEN" to
|
||||||
|
the freezer.state file (writing "FREEZING" is not legal
|
||||||
|
and returns EIO)
|
||||||
|
3) The tasks that blocked the cgroup from entering the "FROZEN"
|
||||||
|
state disappear from the cgroup's set of tasks.
|
|
@ -112,14 +112,22 @@ the per cgroup LRU.
|
||||||
|
|
||||||
2.2.1 Accounting details
|
2.2.1 Accounting details
|
||||||
|
|
||||||
All mapped pages (RSS) and unmapped user pages (Page Cache) are accounted.
|
All mapped anon pages (RSS) and cache pages (Page Cache) are accounted.
|
||||||
RSS pages are accounted at the time of page_add_*_rmap() unless they've already
|
(some pages which never be reclaimable and will not be on global LRU
|
||||||
been accounted for earlier. A file page will be accounted for as Page Cache;
|
are not accounted. we just accounts pages under usual vm management.)
|
||||||
it's mapped into the page tables of a process, duplicate accounting is carefully
|
|
||||||
avoided. Page Cache pages are accounted at the time of add_to_page_cache().
|
RSS pages are accounted at page_fault unless they've already been accounted
|
||||||
The corresponding routines that remove a page from the page tables or removes
|
for earlier. A file page will be accounted for as Page Cache when it's
|
||||||
a page from Page Cache is used to decrement the accounting counters of the
|
inserted into inode (radix-tree). While it's mapped into the page tables of
|
||||||
cgroup.
|
processes, duplicate accounting is carefully avoided.
|
||||||
|
|
||||||
|
A RSS page is unaccounted when it's fully unmapped. A PageCache page is
|
||||||
|
unaccounted when it's removed from radix-tree.
|
||||||
|
|
||||||
|
At page migration, accounting information is kept.
|
||||||
|
|
||||||
|
Note: we just account pages-on-lru because our purpose is to control amount
|
||||||
|
of used pages. not-on-lru pages are tend to be out-of-control from vm view.
|
||||||
|
|
||||||
2.3 Shared Page Accounting
|
2.3 Shared Page Accounting
|
||||||
|
|
||||||
|
|
|
@ -48,7 +48,7 @@ hooks, beyond what is already present, required to manage dynamic
|
||||||
job placement on large systems.
|
job placement on large systems.
|
||||||
|
|
||||||
Cpusets use the generic cgroup subsystem described in
|
Cpusets use the generic cgroup subsystem described in
|
||||||
Documentation/cgroup.txt.
|
Documentation/cgroups/cgroups.txt.
|
||||||
|
|
||||||
Requests by a task, using the sched_setaffinity(2) system call to
|
Requests by a task, using the sched_setaffinity(2) system call to
|
||||||
include CPUs in its CPU affinity mask, and using the mbind(2) and
|
include CPUs in its CPU affinity mask, and using the mbind(2) and
|
||||||
|
|
|
@ -96,6 +96,11 @@ errors=remount-ro(*) Remount the filesystem read-only on an error.
|
||||||
errors=continue Keep going on a filesystem error.
|
errors=continue Keep going on a filesystem error.
|
||||||
errors=panic Panic and halt the machine if an error occurs.
|
errors=panic Panic and halt the machine if an error occurs.
|
||||||
|
|
||||||
|
data_err=ignore(*) Just print an error message if an error occurs
|
||||||
|
in a file data buffer in ordered mode.
|
||||||
|
data_err=abort Abort the journal if an error occurs in a file
|
||||||
|
data buffer in ordered mode.
|
||||||
|
|
||||||
grpid Give objects the same group ID as their creator.
|
grpid Give objects the same group ID as their creator.
|
||||||
bsdgroups
|
bsdgroups
|
||||||
|
|
||||||
|
|
|
@ -1384,15 +1384,18 @@ causes the kernel to prefer to reclaim dentries and inodes.
|
||||||
dirty_background_ratio
|
dirty_background_ratio
|
||||||
----------------------
|
----------------------
|
||||||
|
|
||||||
Contains, as a percentage of total system memory, the number of pages at which
|
Contains, as a percentage of the dirtyable system memory (free pages + mapped
|
||||||
the pdflush background writeback daemon will start writing out dirty data.
|
pages + file cache, not including locked pages and HugePages), the number of
|
||||||
|
pages at which the pdflush background writeback daemon will start writing out
|
||||||
|
dirty data.
|
||||||
|
|
||||||
dirty_ratio
|
dirty_ratio
|
||||||
-----------------
|
-----------------
|
||||||
|
|
||||||
Contains, as a percentage of total system memory, the number of pages at which
|
Contains, as a percentage of the dirtyable system memory (free pages + mapped
|
||||||
a process which is generating disk writes will itself start writing out dirty
|
pages + file cache, not including locked pages and HugePages), the number of
|
||||||
data.
|
pages at which a process which is generating disk writes will itself start
|
||||||
|
writing out dirty data.
|
||||||
|
|
||||||
dirty_writeback_centisecs
|
dirty_writeback_centisecs
|
||||||
-------------------------
|
-------------------------
|
||||||
|
@ -2412,24 +2415,29 @@ will be dumped when the <pid> process is dumped. coredump_filter is a bitmask
|
||||||
of memory types. If a bit of the bitmask is set, memory segments of the
|
of memory types. If a bit of the bitmask is set, memory segments of the
|
||||||
corresponding memory type are dumped, otherwise they are not dumped.
|
corresponding memory type are dumped, otherwise they are not dumped.
|
||||||
|
|
||||||
The following 4 memory types are supported:
|
The following 7 memory types are supported:
|
||||||
- (bit 0) anonymous private memory
|
- (bit 0) anonymous private memory
|
||||||
- (bit 1) anonymous shared memory
|
- (bit 1) anonymous shared memory
|
||||||
- (bit 2) file-backed private memory
|
- (bit 2) file-backed private memory
|
||||||
- (bit 3) file-backed shared memory
|
- (bit 3) file-backed shared memory
|
||||||
- (bit 4) ELF header pages in file-backed private memory areas (it is
|
- (bit 4) ELF header pages in file-backed private memory areas (it is
|
||||||
effective only if the bit 2 is cleared)
|
effective only if the bit 2 is cleared)
|
||||||
|
- (bit 5) hugetlb private memory
|
||||||
|
- (bit 6) hugetlb shared memory
|
||||||
|
|
||||||
Note that MMIO pages such as frame buffer are never dumped and vDSO pages
|
Note that MMIO pages such as frame buffer are never dumped and vDSO pages
|
||||||
are always dumped regardless of the bitmask status.
|
are always dumped regardless of the bitmask status.
|
||||||
|
|
||||||
Default value of coredump_filter is 0x3; this means all anonymous memory
|
Note bit 0-4 doesn't effect any hugetlb memory. hugetlb memory are only
|
||||||
segments are dumped.
|
effected by bit 5-6.
|
||||||
|
|
||||||
|
Default value of coredump_filter is 0x23; this means all anonymous memory
|
||||||
|
segments and hugetlb private memory are dumped.
|
||||||
|
|
||||||
If you don't want to dump all shared memory segments attached to pid 1234,
|
If you don't want to dump all shared memory segments attached to pid 1234,
|
||||||
write 1 to the process's proc file.
|
write 0x21 to the process's proc file.
|
||||||
|
|
||||||
$ echo 0x1 > /proc/1234/coredump_filter
|
$ echo 0x21 > /proc/1234/coredump_filter
|
||||||
|
|
||||||
When a new process is created, the process inherits the bitmask status from its
|
When a new process is created, the process inherits the bitmask status from its
|
||||||
parent. It is useful to set up coredump_filter before the program runs.
|
parent. It is useful to set up coredump_filter before the program runs.
|
||||||
|
|
|
@ -86,6 +86,15 @@ norm_unmount (*) commit on unmount; the journal is committed
|
||||||
fast_unmount do not commit on unmount; this option makes
|
fast_unmount do not commit on unmount; this option makes
|
||||||
unmount faster, but the next mount slower
|
unmount faster, but the next mount slower
|
||||||
because of the need to replay the journal.
|
because of the need to replay the journal.
|
||||||
|
bulk_read read more in one go to take advantage of flash
|
||||||
|
media that read faster sequentially
|
||||||
|
no_bulk_read (*) do not bulk-read
|
||||||
|
no_chk_data_crc skip checking of CRCs on data nodes in order to
|
||||||
|
improve read performance. Use this option only
|
||||||
|
if the flash media is highly reliable. The effect
|
||||||
|
of this option is that corruption of the contents
|
||||||
|
of a file can go unnoticed.
|
||||||
|
chk_data_crc (*) do not skip checking CRCs on data nodes
|
||||||
|
|
||||||
|
|
||||||
Quick usage instructions
|
Quick usage instructions
|
||||||
|
|
|
@ -101,6 +101,7 @@ parameter is applicable:
|
||||||
X86-64 X86-64 architecture is enabled.
|
X86-64 X86-64 architecture is enabled.
|
||||||
More X86-64 boot options can be found in
|
More X86-64 boot options can be found in
|
||||||
Documentation/x86_64/boot-options.txt .
|
Documentation/x86_64/boot-options.txt .
|
||||||
|
X86 Either 32bit or 64bit x86 (same as X86-32+X86-64)
|
||||||
|
|
||||||
In addition, the following text indicates that the option:
|
In addition, the following text indicates that the option:
|
||||||
|
|
||||||
|
@ -690,7 +691,7 @@ and is between 256 and 4096 characters. It is defined in the file
|
||||||
See Documentation/block/as-iosched.txt and
|
See Documentation/block/as-iosched.txt and
|
||||||
Documentation/block/deadline-iosched.txt for details.
|
Documentation/block/deadline-iosched.txt for details.
|
||||||
|
|
||||||
elfcorehdr= [X86-32, X86_64]
|
elfcorehdr= [IA64,PPC,SH,X86-32,X86_64]
|
||||||
Specifies physical address of start of kernel core
|
Specifies physical address of start of kernel core
|
||||||
image elf header. Generally kexec loader will
|
image elf header. Generally kexec loader will
|
||||||
pass this option to capture kernel.
|
pass this option to capture kernel.
|
||||||
|
@ -796,6 +797,8 @@ and is between 256 and 4096 characters. It is defined in the file
|
||||||
Defaults to the default architecture's huge page size
|
Defaults to the default architecture's huge page size
|
||||||
if not specified.
|
if not specified.
|
||||||
|
|
||||||
|
hlt [BUGS=ARM,SH]
|
||||||
|
|
||||||
i8042.debug [HW] Toggle i8042 debug mode
|
i8042.debug [HW] Toggle i8042 debug mode
|
||||||
i8042.direct [HW] Put keyboard port into non-translated mode
|
i8042.direct [HW] Put keyboard port into non-translated mode
|
||||||
i8042.dumbkbd [HW] Pretend that controller can only read data from
|
i8042.dumbkbd [HW] Pretend that controller can only read data from
|
||||||
|
@ -1211,6 +1214,10 @@ and is between 256 and 4096 characters. It is defined in the file
|
||||||
mem=nopentium [BUGS=X86-32] Disable usage of 4MB pages for kernel
|
mem=nopentium [BUGS=X86-32] Disable usage of 4MB pages for kernel
|
||||||
memory.
|
memory.
|
||||||
|
|
||||||
|
memchunk=nn[KMG]
|
||||||
|
[KNL,SH] Allow user to override the default size for
|
||||||
|
per-device physically contiguous DMA buffers.
|
||||||
|
|
||||||
memmap=exactmap [KNL,X86-32,X86_64] Enable setting of an exact
|
memmap=exactmap [KNL,X86-32,X86_64] Enable setting of an exact
|
||||||
E820 memory map, as specified by the user.
|
E820 memory map, as specified by the user.
|
||||||
Such memmap=exactmap lines can be constructed based on
|
Such memmap=exactmap lines can be constructed based on
|
||||||
|
@ -1393,6 +1400,8 @@ and is between 256 and 4096 characters. It is defined in the file
|
||||||
|
|
||||||
nodisconnect [HW,SCSI,M68K] Disables SCSI disconnects.
|
nodisconnect [HW,SCSI,M68K] Disables SCSI disconnects.
|
||||||
|
|
||||||
|
nodsp [SH] Disable hardware DSP at boot time.
|
||||||
|
|
||||||
noefi [X86-32,X86-64] Disable EFI runtime services support.
|
noefi [X86-32,X86-64] Disable EFI runtime services support.
|
||||||
|
|
||||||
noexec [IA-64]
|
noexec [IA-64]
|
||||||
|
@ -1409,13 +1418,15 @@ and is between 256 and 4096 characters. It is defined in the file
|
||||||
noexec32=off: disable non-executable mappings
|
noexec32=off: disable non-executable mappings
|
||||||
read implies executable mappings
|
read implies executable mappings
|
||||||
|
|
||||||
|
nofpu [SH] Disable hardware FPU at boot time.
|
||||||
|
|
||||||
nofxsr [BUGS=X86-32] Disables x86 floating point extended
|
nofxsr [BUGS=X86-32] Disables x86 floating point extended
|
||||||
register save and restore. The kernel will only save
|
register save and restore. The kernel will only save
|
||||||
legacy floating-point registers on task switch.
|
legacy floating-point registers on task switch.
|
||||||
|
|
||||||
noclflush [BUGS=X86] Don't use the CLFLUSH instruction
|
noclflush [BUGS=X86] Don't use the CLFLUSH instruction
|
||||||
|
|
||||||
nohlt [BUGS=ARM]
|
nohlt [BUGS=ARM,SH]
|
||||||
|
|
||||||
no-hlt [BUGS=X86-32] Tells the kernel that the hlt
|
no-hlt [BUGS=X86-32] Tells the kernel that the hlt
|
||||||
instruction doesn't work correctly and not to
|
instruction doesn't work correctly and not to
|
||||||
|
@ -1578,7 +1589,7 @@ and is between 256 and 4096 characters. It is defined in the file
|
||||||
See also Documentation/paride.txt.
|
See also Documentation/paride.txt.
|
||||||
|
|
||||||
pci=option[,option...] [PCI] various PCI subsystem options:
|
pci=option[,option...] [PCI] various PCI subsystem options:
|
||||||
off [X86-32] don't probe for the PCI bus
|
off [X86] don't probe for the PCI bus
|
||||||
bios [X86-32] force use of PCI BIOS, don't access
|
bios [X86-32] force use of PCI BIOS, don't access
|
||||||
the hardware directly. Use this if your machine
|
the hardware directly. Use this if your machine
|
||||||
has a non-standard PCI host bridge.
|
has a non-standard PCI host bridge.
|
||||||
|
@ -1586,9 +1597,9 @@ and is between 256 and 4096 characters. It is defined in the file
|
||||||
hardware access methods are allowed. Use this
|
hardware access methods are allowed. Use this
|
||||||
if you experience crashes upon bootup and you
|
if you experience crashes upon bootup and you
|
||||||
suspect they are caused by the BIOS.
|
suspect they are caused by the BIOS.
|
||||||
conf1 [X86-32] Force use of PCI Configuration
|
conf1 [X86] Force use of PCI Configuration
|
||||||
Mechanism 1.
|
Mechanism 1.
|
||||||
conf2 [X86-32] Force use of PCI Configuration
|
conf2 [X86] Force use of PCI Configuration
|
||||||
Mechanism 2.
|
Mechanism 2.
|
||||||
noaer [PCIE] If the PCIEAER kernel config parameter is
|
noaer [PCIE] If the PCIEAER kernel config parameter is
|
||||||
enabled, this kernel boot option can be used to
|
enabled, this kernel boot option can be used to
|
||||||
|
@ -1608,37 +1619,37 @@ and is between 256 and 4096 characters. It is defined in the file
|
||||||
this option if the kernel is unable to allocate
|
this option if the kernel is unable to allocate
|
||||||
IRQs or discover secondary PCI buses on your
|
IRQs or discover secondary PCI buses on your
|
||||||
motherboard.
|
motherboard.
|
||||||
rom [X86-32] Assign address space to expansion ROMs.
|
rom [X86] Assign address space to expansion ROMs.
|
||||||
Use with caution as certain devices share
|
Use with caution as certain devices share
|
||||||
address decoders between ROMs and other
|
address decoders between ROMs and other
|
||||||
resources.
|
resources.
|
||||||
norom [X86-32,X86_64] Do not assign address space to
|
norom [X86] Do not assign address space to
|
||||||
expansion ROMs that do not already have
|
expansion ROMs that do not already have
|
||||||
BIOS assigned address ranges.
|
BIOS assigned address ranges.
|
||||||
irqmask=0xMMMM [X86-32] Set a bit mask of IRQs allowed to be
|
irqmask=0xMMMM [X86] Set a bit mask of IRQs allowed to be
|
||||||
assigned automatically to PCI devices. You can
|
assigned automatically to PCI devices. You can
|
||||||
make the kernel exclude IRQs of your ISA cards
|
make the kernel exclude IRQs of your ISA cards
|
||||||
this way.
|
this way.
|
||||||
pirqaddr=0xAAAAA [X86-32] Specify the physical address
|
pirqaddr=0xAAAAA [X86] Specify the physical address
|
||||||
of the PIRQ table (normally generated
|
of the PIRQ table (normally generated
|
||||||
by the BIOS) if it is outside the
|
by the BIOS) if it is outside the
|
||||||
F0000h-100000h range.
|
F0000h-100000h range.
|
||||||
lastbus=N [X86-32] Scan all buses thru bus #N. Can be
|
lastbus=N [X86] Scan all buses thru bus #N. Can be
|
||||||
useful if the kernel is unable to find your
|
useful if the kernel is unable to find your
|
||||||
secondary buses and you want to tell it
|
secondary buses and you want to tell it
|
||||||
explicitly which ones they are.
|
explicitly which ones they are.
|
||||||
assign-busses [X86-32] Always assign all PCI bus
|
assign-busses [X86] Always assign all PCI bus
|
||||||
numbers ourselves, overriding
|
numbers ourselves, overriding
|
||||||
whatever the firmware may have done.
|
whatever the firmware may have done.
|
||||||
usepirqmask [X86-32] Honor the possible IRQ mask stored
|
usepirqmask [X86] Honor the possible IRQ mask stored
|
||||||
in the BIOS $PIR table. This is needed on
|
in the BIOS $PIR table. This is needed on
|
||||||
some systems with broken BIOSes, notably
|
some systems with broken BIOSes, notably
|
||||||
some HP Pavilion N5400 and Omnibook XE3
|
some HP Pavilion N5400 and Omnibook XE3
|
||||||
notebooks. This will have no effect if ACPI
|
notebooks. This will have no effect if ACPI
|
||||||
IRQ routing is enabled.
|
IRQ routing is enabled.
|
||||||
noacpi [X86-32] Do not use ACPI for IRQ routing
|
noacpi [X86] Do not use ACPI for IRQ routing
|
||||||
or for PCI scanning.
|
or for PCI scanning.
|
||||||
use_crs [X86-32] Use _CRS for PCI resource
|
use_crs [X86] Use _CRS for PCI resource
|
||||||
allocation.
|
allocation.
|
||||||
routeirq Do IRQ routing for all PCI devices.
|
routeirq Do IRQ routing for all PCI devices.
|
||||||
This is normally done in pci_enable_device(),
|
This is normally done in pci_enable_device(),
|
||||||
|
@ -1667,6 +1678,12 @@ and is between 256 and 4096 characters. It is defined in the file
|
||||||
reserved for the CardBus bridge's memory
|
reserved for the CardBus bridge's memory
|
||||||
window. The default value is 64 megabytes.
|
window. The default value is 64 megabytes.
|
||||||
|
|
||||||
|
pcie_aspm= [PCIE] Forcibly enable or disable PCIe Active State Power
|
||||||
|
Management.
|
||||||
|
off Disable ASPM.
|
||||||
|
force Enable ASPM even on devices that claim not to support it.
|
||||||
|
WARNING: Forcing ASPM on may cause system lockups.
|
||||||
|
|
||||||
pcmv= [HW,PCMCIA] BadgePAD 4
|
pcmv= [HW,PCMCIA] BadgePAD 4
|
||||||
|
|
||||||
pd. [PARIDE]
|
pd. [PARIDE]
|
||||||
|
|
|
@ -50,10 +50,12 @@ Connecting a function (probe) to a marker is done by providing a probe (function
|
||||||
to call) for the specific marker through marker_probe_register() and can be
|
to call) for the specific marker through marker_probe_register() and can be
|
||||||
activated by calling marker_arm(). Marker deactivation can be done by calling
|
activated by calling marker_arm(). Marker deactivation can be done by calling
|
||||||
marker_disarm() as many times as marker_arm() has been called. Removing a probe
|
marker_disarm() as many times as marker_arm() has been called. Removing a probe
|
||||||
is done through marker_probe_unregister(); it will disarm the probe and make
|
is done through marker_probe_unregister(); it will disarm the probe.
|
||||||
sure there is no caller left using the probe when it returns. Probe removal is
|
marker_synchronize_unregister() must be called before the end of the module exit
|
||||||
preempt-safe because preemption is disabled around the probe call. See the
|
function to make sure there is no caller left using the probe. This, and the
|
||||||
"Probe example" section below for a sample probe module.
|
fact that preemption is disabled around the probe call, make sure that probe
|
||||||
|
removal and module unload are safe. See the "Probe example" section below for a
|
||||||
|
sample probe module.
|
||||||
|
|
||||||
The marker mechanism supports inserting multiple instances of the same marker.
|
The marker mechanism supports inserting multiple instances of the same marker.
|
||||||
Markers can be put in inline functions, inlined static functions, and
|
Markers can be put in inline functions, inlined static functions, and
|
||||||
|
|
|
@ -0,0 +1,714 @@
|
||||||
|
Introduction
|
||||||
|
============
|
||||||
|
|
||||||
|
Having looked at the linux mtd/nand driver and more specific at nand_ecc.c
|
||||||
|
I felt there was room for optimisation. I bashed the code for a few hours
|
||||||
|
performing tricks like table lookup removing superfluous code etc.
|
||||||
|
After that the speed was increased by 35-40%.
|
||||||
|
Still I was not too happy as I felt there was additional room for improvement.
|
||||||
|
|
||||||
|
Bad! I was hooked.
|
||||||
|
I decided to annotate my steps in this file. Perhaps it is useful to someone
|
||||||
|
or someone learns something from it.
|
||||||
|
|
||||||
|
|
||||||
|
The problem
|
||||||
|
===========
|
||||||
|
|
||||||
|
NAND flash (at least SLC one) typically has sectors of 256 bytes.
|
||||||
|
However NAND flash is not extremely reliable so some error detection
|
||||||
|
(and sometimes correction) is needed.
|
||||||
|
|
||||||
|
This is done by means of a Hamming code. I'll try to explain it in
|
||||||
|
laymans terms (and apologies to all the pro's in the field in case I do
|
||||||
|
not use the right terminology, my coding theory class was almost 30
|
||||||
|
years ago, and I must admit it was not one of my favourites).
|
||||||
|
|
||||||
|
As I said before the ecc calculation is performed on sectors of 256
|
||||||
|
bytes. This is done by calculating several parity bits over the rows and
|
||||||
|
columns. The parity used is even parity which means that the parity bit = 1
|
||||||
|
if the data over which the parity is calculated is 1 and the parity bit = 0
|
||||||
|
if the data over which the parity is calculated is 0. So the total
|
||||||
|
number of bits over the data over which the parity is calculated + the
|
||||||
|
parity bit is even. (see wikipedia if you can't follow this).
|
||||||
|
Parity is often calculated by means of an exclusive or operation,
|
||||||
|
sometimes also referred to as xor. In C the operator for xor is ^
|
||||||
|
|
||||||
|
Back to ecc.
|
||||||
|
Let's give a small figure:
|
||||||
|
|
||||||
|
byte 0: bit7 bit6 bit5 bit4 bit3 bit2 bit1 bit0 rp0 rp2 rp4 ... rp14
|
||||||
|
byte 1: bit7 bit6 bit5 bit4 bit3 bit2 bit1 bit0 rp1 rp2 rp4 ... rp14
|
||||||
|
byte 2: bit7 bit6 bit5 bit4 bit3 bit2 bit1 bit0 rp0 rp3 rp4 ... rp14
|
||||||
|
byte 3: bit7 bit6 bit5 bit4 bit3 bit2 bit1 bit0 rp1 rp3 rp4 ... rp14
|
||||||
|
byte 4: bit7 bit6 bit5 bit4 bit3 bit2 bit1 bit0 rp0 rp2 rp5 ... rp14
|
||||||
|
....
|
||||||
|
byte 254: bit7 bit6 bit5 bit4 bit3 bit2 bit1 bit0 rp0 rp3 rp5 ... rp15
|
||||||
|
byte 255: bit7 bit6 bit5 bit4 bit3 bit2 bit1 bit0 rp1 rp3 rp5 ... rp15
|
||||||
|
cp1 cp0 cp1 cp0 cp1 cp0 cp1 cp0
|
||||||
|
cp3 cp3 cp2 cp2 cp3 cp3 cp2 cp2
|
||||||
|
cp5 cp5 cp5 cp5 cp4 cp4 cp4 cp4
|
||||||
|
|
||||||
|
This figure represents a sector of 256 bytes.
|
||||||
|
cp is my abbreviaton for column parity, rp for row parity.
|
||||||
|
|
||||||
|
Let's start to explain column parity.
|
||||||
|
cp0 is the parity that belongs to all bit0, bit2, bit4, bit6.
|
||||||
|
so the sum of all bit0, bit2, bit4 and bit6 values + cp0 itself is even.
|
||||||
|
Similarly cp1 is the sum of all bit1, bit3, bit5 and bit7.
|
||||||
|
cp2 is the parity over bit0, bit1, bit4 and bit5
|
||||||
|
cp3 is the parity over bit2, bit3, bit6 and bit7.
|
||||||
|
cp4 is the parity over bit0, bit1, bit2 and bit3.
|
||||||
|
cp5 is the parity over bit4, bit5, bit6 and bit7.
|
||||||
|
Note that each of cp0 .. cp5 is exactly one bit.
|
||||||
|
|
||||||
|
Row parity actually works almost the same.
|
||||||
|
rp0 is the parity of all even bytes (0, 2, 4, 6, ... 252, 254)
|
||||||
|
rp1 is the parity of all odd bytes (1, 3, 5, 7, ..., 253, 255)
|
||||||
|
rp2 is the parity of all bytes 0, 1, 4, 5, 8, 9, ...
|
||||||
|
(so handle two bytes, then skip 2 bytes).
|
||||||
|
rp3 is covers the half rp2 does not cover (bytes 2, 3, 6, 7, 10, 11, ...)
|
||||||
|
for rp4 the rule is cover 4 bytes, skip 4 bytes, cover 4 bytes, skip 4 etc.
|
||||||
|
so rp4 calculates parity over bytes 0, 1, 2, 3, 8, 9, 10, 11, 16, ...)
|
||||||
|
and rp5 covers the other half, so bytes 4, 5, 6, 7, 12, 13, 14, 15, 20, ..
|
||||||
|
The story now becomes quite boring. I guess you get the idea.
|
||||||
|
rp6 covers 8 bytes then skips 8 etc
|
||||||
|
rp7 skips 8 bytes then covers 8 etc
|
||||||
|
rp8 covers 16 bytes then skips 16 etc
|
||||||
|
rp9 skips 16 bytes then covers 16 etc
|
||||||
|
rp10 covers 32 bytes then skips 32 etc
|
||||||
|
rp11 skips 32 bytes then covers 32 etc
|
||||||
|
rp12 covers 64 bytes then skips 64 etc
|
||||||
|
rp13 skips 64 bytes then covers 64 etc
|
||||||
|
rp14 covers 128 bytes then skips 128
|
||||||
|
rp15 skips 128 bytes then covers 128
|
||||||
|
|
||||||
|
In the end the parity bits are grouped together in three bytes as
|
||||||
|
follows:
|
||||||
|
ECC Bit 7 Bit 6 Bit 5 Bit 4 Bit 3 Bit 2 Bit 1 Bit 0
|
||||||
|
ECC 0 rp07 rp06 rp05 rp04 rp03 rp02 rp01 rp00
|
||||||
|
ECC 1 rp15 rp14 rp13 rp12 rp11 rp10 rp09 rp08
|
||||||
|
ECC 2 cp5 cp4 cp3 cp2 cp1 cp0 1 1
|
||||||
|
|
||||||
|
I detected after writing this that ST application note AN1823
|
||||||
|
(http://www.st.com/stonline/books/pdf/docs/10123.pdf) gives a much
|
||||||
|
nicer picture.(but they use line parity as term where I use row parity)
|
||||||
|
Oh well, I'm graphically challenged, so suffer with me for a moment :-)
|
||||||
|
And I could not reuse the ST picture anyway for copyright reasons.
|
||||||
|
|
||||||
|
|
||||||
|
Attempt 0
|
||||||
|
=========
|
||||||
|
|
||||||
|
Implementing the parity calculation is pretty simple.
|
||||||
|
In C pseudocode:
|
||||||
|
for (i = 0; i < 256; i++)
|
||||||
|
{
|
||||||
|
if (i & 0x01)
|
||||||
|
rp1 = bit7 ^ bit6 ^ bit5 ^ bit4 ^ bit3 ^ bit2 ^ bit1 ^ bit0 ^ rp1;
|
||||||
|
else
|
||||||
|
rp0 = bit7 ^ bit6 ^ bit5 ^ bit4 ^ bit3 ^ bit2 ^ bit1 ^ bit0 ^ rp1;
|
||||||
|
if (i & 0x02)
|
||||||
|
rp3 = bit7 ^ bit6 ^ bit5 ^ bit4 ^ bit3 ^ bit2 ^ bit1 ^ bit0 ^ rp3;
|
||||||
|
else
|
||||||
|
rp2 = bit7 ^ bit6 ^ bit5 ^ bit4 ^ bit3 ^ bit2 ^ bit1 ^ bit0 ^ rp2;
|
||||||
|
if (i & 0x04)
|
||||||
|
rp5 = bit7 ^ bit6 ^ bit5 ^ bit4 ^ bit3 ^ bit2 ^ bit1 ^ bit0 ^ rp5;
|
||||||
|
else
|
||||||
|
rp4 = bit7 ^ bit6 ^ bit5 ^ bit4 ^ bit3 ^ bit2 ^ bit1 ^ bit0 ^ rp4;
|
||||||
|
if (i & 0x08)
|
||||||
|
rp7 = bit7 ^ bit6 ^ bit5 ^ bit4 ^ bit3 ^ bit2 ^ bit1 ^ bit0 ^ rp7;
|
||||||
|
else
|
||||||
|
rp6 = bit7 ^ bit6 ^ bit5 ^ bit4 ^ bit3 ^ bit2 ^ bit1 ^ bit0 ^ rp6;
|
||||||
|
if (i & 0x10)
|
||||||
|
rp9 = bit7 ^ bit6 ^ bit5 ^ bit4 ^ bit3 ^ bit2 ^ bit1 ^ bit0 ^ rp9;
|
||||||
|
else
|
||||||
|
rp8 = bit7 ^ bit6 ^ bit5 ^ bit4 ^ bit3 ^ bit2 ^ bit1 ^ bit0 ^ rp8;
|
||||||
|
if (i & 0x20)
|
||||||
|
rp11 = bit7 ^ bit6 ^ bit5 ^ bit4 ^ bit3 ^ bit2 ^ bit1 ^ bit0 ^ rp11;
|
||||||
|
else
|
||||||
|
rp10 = bit7 ^ bit6 ^ bit5 ^ bit4 ^ bit3 ^ bit2 ^ bit1 ^ bit0 ^ rp10;
|
||||||
|
if (i & 0x40)
|
||||||
|
rp13 = bit7 ^ bit6 ^ bit5 ^ bit4 ^ bit3 ^ bit2 ^ bit1 ^ bit0 ^ rp13;
|
||||||
|
else
|
||||||
|
rp12 = bit7 ^ bit6 ^ bit5 ^ bit4 ^ bit3 ^ bit2 ^ bit1 ^ bit0 ^ rp12;
|
||||||
|
if (i & 0x80)
|
||||||
|
rp15 = bit7 ^ bit6 ^ bit5 ^ bit4 ^ bit3 ^ bit2 ^ bit1 ^ bit0 ^ rp15;
|
||||||
|
else
|
||||||
|
rp14 = bit7 ^ bit6 ^ bit5 ^ bit4 ^ bit3 ^ bit2 ^ bit1 ^ bit0 ^ rp14;
|
||||||
|
cp0 = bit6 ^ bit4 ^ bit2 ^ bit0 ^ cp0;
|
||||||
|
cp1 = bit7 ^ bit5 ^ bit3 ^ bit1 ^ cp1;
|
||||||
|
cp2 = bit5 ^ bit4 ^ bit1 ^ bit0 ^ cp2;
|
||||||
|
cp3 = bit7 ^ bit6 ^ bit3 ^ bit2 ^ cp3
|
||||||
|
cp4 = bit3 ^ bit2 ^ bit1 ^ bit0 ^ cp4
|
||||||
|
cp5 = bit7 ^ bit6 ^ bit5 ^ bit4 ^ cp5
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
Analysis 0
|
||||||
|
==========
|
||||||
|
|
||||||
|
C does have bitwise operators but not really operators to do the above
|
||||||
|
efficiently (and most hardware has no such instructions either).
|
||||||
|
Therefore without implementing this it was clear that the code above was
|
||||||
|
not going to bring me a Nobel prize :-)
|
||||||
|
|
||||||
|
Fortunately the exclusive or operation is commutative, so we can combine
|
||||||
|
the values in any order. So instead of calculating all the bits
|
||||||
|
individually, let us try to rearrange things.
|
||||||
|
For the column parity this is easy. We can just xor the bytes and in the
|
||||||
|
end filter out the relevant bits. This is pretty nice as it will bring
|
||||||
|
all cp calculation out of the if loop.
|
||||||
|
|
||||||
|
Similarly we can first xor the bytes for the various rows.
|
||||||
|
This leads to:
|
||||||
|
|
||||||
|
|
||||||
|
Attempt 1
|
||||||
|
=========
|
||||||
|
|
||||||
|
const char parity[256] = {
|
||||||
|
0, 1, 1, 0, 1, 0, 0, 1, 1, 0, 0, 1, 0, 1, 1, 0,
|
||||||
|
1, 0, 0, 1, 0, 1, 1, 0, 0, 1, 1, 0, 1, 0, 0, 1,
|
||||||
|
1, 0, 0, 1, 0, 1, 1, 0, 0, 1, 1, 0, 1, 0, 0, 1,
|
||||||
|
0, 1, 1, 0, 1, 0, 0, 1, 1, 0, 0, 1, 0, 1, 1, 0,
|
||||||
|
1, 0, 0, 1, 0, 1, 1, 0, 0, 1, 1, 0, 1, 0, 0, 1,
|
||||||
|
0, 1, 1, 0, 1, 0, 0, 1, 1, 0, 0, 1, 0, 1, 1, 0,
|
||||||
|
0, 1, 1, 0, 1, 0, 0, 1, 1, 0, 0, 1, 0, 1, 1, 0,
|
||||||
|
1, 0, 0, 1, 0, 1, 1, 0, 0, 1, 1, 0, 1, 0, 0, 1,
|
||||||
|
1, 0, 0, 1, 0, 1, 1, 0, 0, 1, 1, 0, 1, 0, 0, 1,
|
||||||
|
0, 1, 1, 0, 1, 0, 0, 1, 1, 0, 0, 1, 0, 1, 1, 0,
|
||||||
|
0, 1, 1, 0, 1, 0, 0, 1, 1, 0, 0, 1, 0, 1, 1, 0,
|
||||||
|
1, 0, 0, 1, 0, 1, 1, 0, 0, 1, 1, 0, 1, 0, 0, 1,
|
||||||
|
0, 1, 1, 0, 1, 0, 0, 1, 1, 0, 0, 1, 0, 1, 1, 0,
|
||||||
|
1, 0, 0, 1, 0, 1, 1, 0, 0, 1, 1, 0, 1, 0, 0, 1,
|
||||||
|
1, 0, 0, 1, 0, 1, 1, 0, 0, 1, 1, 0, 1, 0, 0, 1,
|
||||||
|
0, 1, 1, 0, 1, 0, 0, 1, 1, 0, 0, 1, 0, 1, 1, 0
|
||||||
|
};
|
||||||
|
|
||||||
|
void ecc1(const unsigned char *buf, unsigned char *code)
|
||||||
|
{
|
||||||
|
int i;
|
||||||
|
const unsigned char *bp = buf;
|
||||||
|
unsigned char cur;
|
||||||
|
unsigned char rp0, rp1, rp2, rp3, rp4, rp5, rp6, rp7;
|
||||||
|
unsigned char rp8, rp9, rp10, rp11, rp12, rp13, rp14, rp15;
|
||||||
|
unsigned char par;
|
||||||
|
|
||||||
|
par = 0;
|
||||||
|
rp0 = 0; rp1 = 0; rp2 = 0; rp3 = 0;
|
||||||
|
rp4 = 0; rp5 = 0; rp6 = 0; rp7 = 0;
|
||||||
|
rp8 = 0; rp9 = 0; rp10 = 0; rp11 = 0;
|
||||||
|
rp12 = 0; rp13 = 0; rp14 = 0; rp15 = 0;
|
||||||
|
|
||||||
|
for (i = 0; i < 256; i++)
|
||||||
|
{
|
||||||
|
cur = *bp++;
|
||||||
|
par ^= cur;
|
||||||
|
if (i & 0x01) rp1 ^= cur; else rp0 ^= cur;
|
||||||
|
if (i & 0x02) rp3 ^= cur; else rp2 ^= cur;
|
||||||
|
if (i & 0x04) rp5 ^= cur; else rp4 ^= cur;
|
||||||
|
if (i & 0x08) rp7 ^= cur; else rp6 ^= cur;
|
||||||
|
if (i & 0x10) rp9 ^= cur; else rp8 ^= cur;
|
||||||
|
if (i & 0x20) rp11 ^= cur; else rp10 ^= cur;
|
||||||
|
if (i & 0x40) rp13 ^= cur; else rp12 ^= cur;
|
||||||
|
if (i & 0x80) rp15 ^= cur; else rp14 ^= cur;
|
||||||
|
}
|
||||||
|
code[0] =
|
||||||
|
(parity[rp7] << 7) |
|
||||||
|
(parity[rp6] << 6) |
|
||||||
|
(parity[rp5] << 5) |
|
||||||
|
(parity[rp4] << 4) |
|
||||||
|
(parity[rp3] << 3) |
|
||||||
|
(parity[rp2] << 2) |
|
||||||
|
(parity[rp1] << 1) |
|
||||||
|
(parity[rp0]);
|
||||||
|
code[1] =
|
||||||
|
(parity[rp15] << 7) |
|
||||||
|
(parity[rp14] << 6) |
|
||||||
|
(parity[rp13] << 5) |
|
||||||
|
(parity[rp12] << 4) |
|
||||||
|
(parity[rp11] << 3) |
|
||||||
|
(parity[rp10] << 2) |
|
||||||
|
(parity[rp9] << 1) |
|
||||||
|
(parity[rp8]);
|
||||||
|
code[2] =
|
||||||
|
(parity[par & 0xf0] << 7) |
|
||||||
|
(parity[par & 0x0f] << 6) |
|
||||||
|
(parity[par & 0xcc] << 5) |
|
||||||
|
(parity[par & 0x33] << 4) |
|
||||||
|
(parity[par & 0xaa] << 3) |
|
||||||
|
(parity[par & 0x55] << 2);
|
||||||
|
code[0] = ~code[0];
|
||||||
|
code[1] = ~code[1];
|
||||||
|
code[2] = ~code[2];
|
||||||
|
}
|
||||||
|
|
||||||
|
Still pretty straightforward. The last three invert statements are there to
|
||||||
|
give a checksum of 0xff 0xff 0xff for an empty flash. In an empty flash
|
||||||
|
all data is 0xff, so the checksum then matches.
|
||||||
|
|
||||||
|
I also introduced the parity lookup. I expected this to be the fastest
|
||||||
|
way to calculate the parity, but I will investigate alternatives later
|
||||||
|
on.
|
||||||
|
|
||||||
|
|
||||||
|
Analysis 1
|
||||||
|
==========
|
||||||
|
|
||||||
|
The code works, but is not terribly efficient. On my system it took
|
||||||
|
almost 4 times as much time as the linux driver code. But hey, if it was
|
||||||
|
*that* easy this would have been done long before.
|
||||||
|
No pain. no gain.
|
||||||
|
|
||||||
|
Fortunately there is plenty of room for improvement.
|
||||||
|
|
||||||
|
In step 1 we moved from bit-wise calculation to byte-wise calculation.
|
||||||
|
However in C we can also use the unsigned long data type and virtually
|
||||||
|
every modern microprocessor supports 32 bit operations, so why not try
|
||||||
|
to write our code in such a way that we process data in 32 bit chunks.
|
||||||
|
|
||||||
|
Of course this means some modification as the row parity is byte by
|
||||||
|
byte. A quick analysis:
|
||||||
|
for the column parity we use the par variable. When extending to 32 bits
|
||||||
|
we can in the end easily calculate p0 and p1 from it.
|
||||||
|
(because par now consists of 4 bytes, contributing to rp1, rp0, rp1, rp0
|
||||||
|
respectively)
|
||||||
|
also rp2 and rp3 can be easily retrieved from par as rp3 covers the
|
||||||
|
first two bytes and rp2 the last two bytes.
|
||||||
|
|
||||||
|
Note that of course now the loop is executed only 64 times (256/4).
|
||||||
|
And note that care must taken wrt byte ordering. The way bytes are
|
||||||
|
ordered in a long is machine dependent, and might affect us.
|
||||||
|
Anyway, if there is an issue: this code is developed on x86 (to be
|
||||||
|
precise: a DELL PC with a D920 Intel CPU)
|
||||||
|
|
||||||
|
And of course the performance might depend on alignment, but I expect
|
||||||
|
that the I/O buffers in the nand driver are aligned properly (and
|
||||||
|
otherwise that should be fixed to get maximum performance).
|
||||||
|
|
||||||
|
Let's give it a try...
|
||||||
|
|
||||||
|
|
||||||
|
Attempt 2
|
||||||
|
=========
|
||||||
|
|
||||||
|
extern const char parity[256];
|
||||||
|
|
||||||
|
void ecc2(const unsigned char *buf, unsigned char *code)
|
||||||
|
{
|
||||||
|
int i;
|
||||||
|
const unsigned long *bp = (unsigned long *)buf;
|
||||||
|
unsigned long cur;
|
||||||
|
unsigned long rp0, rp1, rp2, rp3, rp4, rp5, rp6, rp7;
|
||||||
|
unsigned long rp8, rp9, rp10, rp11, rp12, rp13, rp14, rp15;
|
||||||
|
unsigned long par;
|
||||||
|
|
||||||
|
par = 0;
|
||||||
|
rp0 = 0; rp1 = 0; rp2 = 0; rp3 = 0;
|
||||||
|
rp4 = 0; rp5 = 0; rp6 = 0; rp7 = 0;
|
||||||
|
rp8 = 0; rp9 = 0; rp10 = 0; rp11 = 0;
|
||||||
|
rp12 = 0; rp13 = 0; rp14 = 0; rp15 = 0;
|
||||||
|
|
||||||
|
for (i = 0; i < 64; i++)
|
||||||
|
{
|
||||||
|
cur = *bp++;
|
||||||
|
par ^= cur;
|
||||||
|
if (i & 0x01) rp5 ^= cur; else rp4 ^= cur;
|
||||||
|
if (i & 0x02) rp7 ^= cur; else rp6 ^= cur;
|
||||||
|
if (i & 0x04) rp9 ^= cur; else rp8 ^= cur;
|
||||||
|
if (i & 0x08) rp11 ^= cur; else rp10 ^= cur;
|
||||||
|
if (i & 0x10) rp13 ^= cur; else rp12 ^= cur;
|
||||||
|
if (i & 0x20) rp15 ^= cur; else rp14 ^= cur;
|
||||||
|
}
|
||||||
|
/*
|
||||||
|
we need to adapt the code generation for the fact that rp vars are now
|
||||||
|
long; also the column parity calculation needs to be changed.
|
||||||
|
we'll bring rp4 to 15 back to single byte entities by shifting and
|
||||||
|
xoring
|
||||||
|
*/
|
||||||
|
rp4 ^= (rp4 >> 16); rp4 ^= (rp4 >> 8); rp4 &= 0xff;
|
||||||
|
rp5 ^= (rp5 >> 16); rp5 ^= (rp5 >> 8); rp5 &= 0xff;
|
||||||
|
rp6 ^= (rp6 >> 16); rp6 ^= (rp6 >> 8); rp6 &= 0xff;
|
||||||
|
rp7 ^= (rp7 >> 16); rp7 ^= (rp7 >> 8); rp7 &= 0xff;
|
||||||
|
rp8 ^= (rp8 >> 16); rp8 ^= (rp8 >> 8); rp8 &= 0xff;
|
||||||
|
rp9 ^= (rp9 >> 16); rp9 ^= (rp9 >> 8); rp9 &= 0xff;
|
||||||
|
rp10 ^= (rp10 >> 16); rp10 ^= (rp10 >> 8); rp10 &= 0xff;
|
||||||
|
rp11 ^= (rp11 >> 16); rp11 ^= (rp11 >> 8); rp11 &= 0xff;
|
||||||
|
rp12 ^= (rp12 >> 16); rp12 ^= (rp12 >> 8); rp12 &= 0xff;
|
||||||
|
rp13 ^= (rp13 >> 16); rp13 ^= (rp13 >> 8); rp13 &= 0xff;
|
||||||
|
rp14 ^= (rp14 >> 16); rp14 ^= (rp14 >> 8); rp14 &= 0xff;
|
||||||
|
rp15 ^= (rp15 >> 16); rp15 ^= (rp15 >> 8); rp15 &= 0xff;
|
||||||
|
rp3 = (par >> 16); rp3 ^= (rp3 >> 8); rp3 &= 0xff;
|
||||||
|
rp2 = par & 0xffff; rp2 ^= (rp2 >> 8); rp2 &= 0xff;
|
||||||
|
par ^= (par >> 16);
|
||||||
|
rp1 = (par >> 8); rp1 &= 0xff;
|
||||||
|
rp0 = (par & 0xff);
|
||||||
|
par ^= (par >> 8); par &= 0xff;
|
||||||
|
|
||||||
|
code[0] =
|
||||||
|
(parity[rp7] << 7) |
|
||||||
|
(parity[rp6] << 6) |
|
||||||
|
(parity[rp5] << 5) |
|
||||||
|
(parity[rp4] << 4) |
|
||||||
|
(parity[rp3] << 3) |
|
||||||
|
(parity[rp2] << 2) |
|
||||||
|
(parity[rp1] << 1) |
|
||||||
|
(parity[rp0]);
|
||||||
|
code[1] =
|
||||||
|
(parity[rp15] << 7) |
|
||||||
|
(parity[rp14] << 6) |
|
||||||
|
(parity[rp13] << 5) |
|
||||||
|
(parity[rp12] << 4) |
|
||||||
|
(parity[rp11] << 3) |
|
||||||
|
(parity[rp10] << 2) |
|
||||||
|
(parity[rp9] << 1) |
|
||||||
|
(parity[rp8]);
|
||||||
|
code[2] =
|
||||||
|
(parity[par & 0xf0] << 7) |
|
||||||
|
(parity[par & 0x0f] << 6) |
|
||||||
|
(parity[par & 0xcc] << 5) |
|
||||||
|
(parity[par & 0x33] << 4) |
|
||||||
|
(parity[par & 0xaa] << 3) |
|
||||||
|
(parity[par & 0x55] << 2);
|
||||||
|
code[0] = ~code[0];
|
||||||
|
code[1] = ~code[1];
|
||||||
|
code[2] = ~code[2];
|
||||||
|
}
|
||||||
|
|
||||||
|
The parity array is not shown any more. Note also that for these
|
||||||
|
examples I kinda deviated from my regular programming style by allowing
|
||||||
|
multiple statements on a line, not using { } in then and else blocks
|
||||||
|
with only a single statement and by using operators like ^=
|
||||||
|
|
||||||
|
|
||||||
|
Analysis 2
|
||||||
|
==========
|
||||||
|
|
||||||
|
The code (of course) works, and hurray: we are a little bit faster than
|
||||||
|
the linux driver code (about 15%). But wait, don't cheer too quickly.
|
||||||
|
THere is more to be gained.
|
||||||
|
If we look at e.g. rp14 and rp15 we see that we either xor our data with
|
||||||
|
rp14 or with rp15. However we also have par which goes over all data.
|
||||||
|
This means there is no need to calculate rp14 as it can be calculated from
|
||||||
|
rp15 through rp14 = par ^ rp15;
|
||||||
|
(or if desired we can avoid calculating rp15 and calculate it from
|
||||||
|
rp14). That is why some places refer to inverse parity.
|
||||||
|
Of course the same thing holds for rp4/5, rp6/7, rp8/9, rp10/11 and rp12/13.
|
||||||
|
Effectively this means we can eliminate the else clause from the if
|
||||||
|
statements. Also we can optimise the calculation in the end a little bit
|
||||||
|
by going from long to byte first. Actually we can even avoid the table
|
||||||
|
lookups
|
||||||
|
|
||||||
|
Attempt 3
|
||||||
|
=========
|
||||||
|
|
||||||
|
Odd replaced:
|
||||||
|
if (i & 0x01) rp5 ^= cur; else rp4 ^= cur;
|
||||||
|
if (i & 0x02) rp7 ^= cur; else rp6 ^= cur;
|
||||||
|
if (i & 0x04) rp9 ^= cur; else rp8 ^= cur;
|
||||||
|
if (i & 0x08) rp11 ^= cur; else rp10 ^= cur;
|
||||||
|
if (i & 0x10) rp13 ^= cur; else rp12 ^= cur;
|
||||||
|
if (i & 0x20) rp15 ^= cur; else rp14 ^= cur;
|
||||||
|
with
|
||||||
|
if (i & 0x01) rp5 ^= cur;
|
||||||
|
if (i & 0x02) rp7 ^= cur;
|
||||||
|
if (i & 0x04) rp9 ^= cur;
|
||||||
|
if (i & 0x08) rp11 ^= cur;
|
||||||
|
if (i & 0x10) rp13 ^= cur;
|
||||||
|
if (i & 0x20) rp15 ^= cur;
|
||||||
|
|
||||||
|
and outside the loop added:
|
||||||
|
rp4 = par ^ rp5;
|
||||||
|
rp6 = par ^ rp7;
|
||||||
|
rp8 = par ^ rp9;
|
||||||
|
rp10 = par ^ rp11;
|
||||||
|
rp12 = par ^ rp13;
|
||||||
|
rp14 = par ^ rp15;
|
||||||
|
|
||||||
|
And after that the code takes about 30% more time, although the number of
|
||||||
|
statements is reduced. This is also reflected in the assembly code.
|
||||||
|
|
||||||
|
|
||||||
|
Analysis 3
|
||||||
|
==========
|
||||||
|
|
||||||
|
Very weird. Guess it has to do with caching or instruction parallellism
|
||||||
|
or so. I also tried on an eeePC (Celeron, clocked at 900 Mhz). Interesting
|
||||||
|
observation was that this one is only 30% slower (according to time)
|
||||||
|
executing the code as my 3Ghz D920 processor.
|
||||||
|
|
||||||
|
Well, it was expected not to be easy so maybe instead move to a
|
||||||
|
different track: let's move back to the code from attempt2 and do some
|
||||||
|
loop unrolling. This will eliminate a few if statements. I'll try
|
||||||
|
different amounts of unrolling to see what works best.
|
||||||
|
|
||||||
|
|
||||||
|
Attempt 4
|
||||||
|
=========
|
||||||
|
|
||||||
|
Unrolled the loop 1, 2, 3 and 4 times.
|
||||||
|
For 4 the code starts with:
|
||||||
|
|
||||||
|
for (i = 0; i < 4; i++)
|
||||||
|
{
|
||||||
|
cur = *bp++;
|
||||||
|
par ^= cur;
|
||||||
|
rp4 ^= cur;
|
||||||
|
rp6 ^= cur;
|
||||||
|
rp8 ^= cur;
|
||||||
|
rp10 ^= cur;
|
||||||
|
if (i & 0x1) rp13 ^= cur; else rp12 ^= cur;
|
||||||
|
if (i & 0x2) rp15 ^= cur; else rp14 ^= cur;
|
||||||
|
cur = *bp++;
|
||||||
|
par ^= cur;
|
||||||
|
rp5 ^= cur;
|
||||||
|
rp6 ^= cur;
|
||||||
|
...
|
||||||
|
|
||||||
|
|
||||||
|
Analysis 4
|
||||||
|
==========
|
||||||
|
|
||||||
|
Unrolling once gains about 15%
|
||||||
|
Unrolling twice keeps the gain at about 15%
|
||||||
|
Unrolling three times gives a gain of 30% compared to attempt 2.
|
||||||
|
Unrolling four times gives a marginal improvement compared to unrolling
|
||||||
|
three times.
|
||||||
|
|
||||||
|
I decided to proceed with a four time unrolled loop anyway. It was my gut
|
||||||
|
feeling that in the next steps I would obtain additional gain from it.
|
||||||
|
|
||||||
|
The next step was triggered by the fact that par contains the xor of all
|
||||||
|
bytes and rp4 and rp5 each contain the xor of half of the bytes.
|
||||||
|
So in effect par = rp4 ^ rp5. But as xor is commutative we can also say
|
||||||
|
that rp5 = par ^ rp4. So no need to keep both rp4 and rp5 around. We can
|
||||||
|
eliminate rp5 (or rp4, but I already foresaw another optimisation).
|
||||||
|
The same holds for rp6/7, rp8/9, rp10/11 rp12/13 and rp14/15.
|
||||||
|
|
||||||
|
|
||||||
|
Attempt 5
|
||||||
|
=========
|
||||||
|
|
||||||
|
Effectively so all odd digit rp assignments in the loop were removed.
|
||||||
|
This included the else clause of the if statements.
|
||||||
|
Of course after the loop we need to correct things by adding code like:
|
||||||
|
rp5 = par ^ rp4;
|
||||||
|
Also the initial assignments (rp5 = 0; etc) could be removed.
|
||||||
|
Along the line I also removed the initialisation of rp0/1/2/3.
|
||||||
|
|
||||||
|
|
||||||
|
Analysis 5
|
||||||
|
==========
|
||||||
|
|
||||||
|
Measurements showed this was a good move. The run-time roughly halved
|
||||||
|
compared with attempt 4 with 4 times unrolled, and we only require 1/3rd
|
||||||
|
of the processor time compared to the current code in the linux kernel.
|
||||||
|
|
||||||
|
However, still I thought there was more. I didn't like all the if
|
||||||
|
statements. Why not keep a running parity and only keep the last if
|
||||||
|
statement. Time for yet another version!
|
||||||
|
|
||||||
|
|
||||||
|
Attempt 6
|
||||||
|
=========
|
||||||
|
|
||||||
|
THe code within the for loop was changed to:
|
||||||
|
|
||||||
|
for (i = 0; i < 4; i++)
|
||||||
|
{
|
||||||
|
cur = *bp++; tmppar = cur; rp4 ^= cur;
|
||||||
|
cur = *bp++; tmppar ^= cur; rp6 ^= tmppar;
|
||||||
|
cur = *bp++; tmppar ^= cur; rp4 ^= cur;
|
||||||
|
cur = *bp++; tmppar ^= cur; rp8 ^= tmppar;
|
||||||
|
|
||||||
|
cur = *bp++; tmppar ^= cur; rp4 ^= cur; rp6 ^= cur;
|
||||||
|
cur = *bp++; tmppar ^= cur; rp6 ^= cur;
|
||||||
|
cur = *bp++; tmppar ^= cur; rp4 ^= cur;
|
||||||
|
cur = *bp++; tmppar ^= cur; rp10 ^= tmppar;
|
||||||
|
|
||||||
|
cur = *bp++; tmppar ^= cur; rp4 ^= cur; rp6 ^= cur; rp8 ^= cur;
|
||||||
|
cur = *bp++; tmppar ^= cur; rp6 ^= cur; rp8 ^= cur;
|
||||||
|
cur = *bp++; tmppar ^= cur; rp4 ^= cur; rp8 ^= cur;
|
||||||
|
cur = *bp++; tmppar ^= cur; rp8 ^= cur;
|
||||||
|
|
||||||
|
cur = *bp++; tmppar ^= cur; rp4 ^= cur; rp6 ^= cur;
|
||||||
|
cur = *bp++; tmppar ^= cur; rp6 ^= cur;
|
||||||
|
cur = *bp++; tmppar ^= cur; rp4 ^= cur;
|
||||||
|
cur = *bp++; tmppar ^= cur;
|
||||||
|
|
||||||
|
par ^= tmppar;
|
||||||
|
if ((i & 0x1) == 0) rp12 ^= tmppar;
|
||||||
|
if ((i & 0x2) == 0) rp14 ^= tmppar;
|
||||||
|
}
|
||||||
|
|
||||||
|
As you can see tmppar is used to accumulate the parity within a for
|
||||||
|
iteration. In the last 3 statements is is added to par and, if needed,
|
||||||
|
to rp12 and rp14.
|
||||||
|
|
||||||
|
While making the changes I also found that I could exploit that tmppar
|
||||||
|
contains the running parity for this iteration. So instead of having:
|
||||||
|
rp4 ^= cur; rp6 = cur;
|
||||||
|
I removed the rp6 = cur; statement and did rp6 ^= tmppar; on next
|
||||||
|
statement. A similar change was done for rp8 and rp10
|
||||||
|
|
||||||
|
|
||||||
|
Analysis 6
|
||||||
|
==========
|
||||||
|
|
||||||
|
Measuring this code again showed big gain. When executing the original
|
||||||
|
linux code 1 million times, this took about 1 second on my system.
|
||||||
|
(using time to measure the performance). After this iteration I was back
|
||||||
|
to 0.075 sec. Actually I had to decide to start measuring over 10
|
||||||
|
million interations in order not to loose too much accuracy. This one
|
||||||
|
definitely seemed to be the jackpot!
|
||||||
|
|
||||||
|
There is a little bit more room for improvement though. There are three
|
||||||
|
places with statements:
|
||||||
|
rp4 ^= cur; rp6 ^= cur;
|
||||||
|
It seems more efficient to also maintain a variable rp4_6 in the while
|
||||||
|
loop; This eliminates 3 statements per loop. Of course after the loop we
|
||||||
|
need to correct by adding:
|
||||||
|
rp4 ^= rp4_6;
|
||||||
|
rp6 ^= rp4_6
|
||||||
|
Furthermore there are 4 sequential assingments to rp8. This can be
|
||||||
|
encoded slightly more efficient by saving tmppar before those 4 lines
|
||||||
|
and later do rp8 = rp8 ^ tmppar ^ notrp8;
|
||||||
|
(where notrp8 is the value of rp8 before those 4 lines).
|
||||||
|
Again a use of the commutative property of xor.
|
||||||
|
Time for a new test!
|
||||||
|
|
||||||
|
|
||||||
|
Attempt 7
|
||||||
|
=========
|
||||||
|
|
||||||
|
The new code now looks like:
|
||||||
|
|
||||||
|
for (i = 0; i < 4; i++)
|
||||||
|
{
|
||||||
|
cur = *bp++; tmppar = cur; rp4 ^= cur;
|
||||||
|
cur = *bp++; tmppar ^= cur; rp6 ^= tmppar;
|
||||||
|
cur = *bp++; tmppar ^= cur; rp4 ^= cur;
|
||||||
|
cur = *bp++; tmppar ^= cur; rp8 ^= tmppar;
|
||||||
|
|
||||||
|
cur = *bp++; tmppar ^= cur; rp4_6 ^= cur;
|
||||||
|
cur = *bp++; tmppar ^= cur; rp6 ^= cur;
|
||||||
|
cur = *bp++; tmppar ^= cur; rp4 ^= cur;
|
||||||
|
cur = *bp++; tmppar ^= cur; rp10 ^= tmppar;
|
||||||
|
|
||||||
|
notrp8 = tmppar;
|
||||||
|
cur = *bp++; tmppar ^= cur; rp4_6 ^= cur;
|
||||||
|
cur = *bp++; tmppar ^= cur; rp6 ^= cur;
|
||||||
|
cur = *bp++; tmppar ^= cur; rp4 ^= cur;
|
||||||
|
cur = *bp++; tmppar ^= cur;
|
||||||
|
rp8 = rp8 ^ tmppar ^ notrp8;
|
||||||
|
|
||||||
|
cur = *bp++; tmppar ^= cur; rp4_6 ^= cur;
|
||||||
|
cur = *bp++; tmppar ^= cur; rp6 ^= cur;
|
||||||
|
cur = *bp++; tmppar ^= cur; rp4 ^= cur;
|
||||||
|
cur = *bp++; tmppar ^= cur;
|
||||||
|
|
||||||
|
par ^= tmppar;
|
||||||
|
if ((i & 0x1) == 0) rp12 ^= tmppar;
|
||||||
|
if ((i & 0x2) == 0) rp14 ^= tmppar;
|
||||||
|
}
|
||||||
|
rp4 ^= rp4_6;
|
||||||
|
rp6 ^= rp4_6;
|
||||||
|
|
||||||
|
|
||||||
|
Not a big change, but every penny counts :-)
|
||||||
|
|
||||||
|
|
||||||
|
Analysis 7
|
||||||
|
==========
|
||||||
|
|
||||||
|
Acutally this made things worse. Not very much, but I don't want to move
|
||||||
|
into the wrong direction. Maybe something to investigate later. Could
|
||||||
|
have to do with caching again.
|
||||||
|
|
||||||
|
Guess that is what there is to win within the loop. Maybe unrolling one
|
||||||
|
more time will help. I'll keep the optimisations from 7 for now.
|
||||||
|
|
||||||
|
|
||||||
|
Attempt 8
|
||||||
|
=========
|
||||||
|
|
||||||
|
Unrolled the loop one more time.
|
||||||
|
|
||||||
|
|
||||||
|
Analysis 8
|
||||||
|
==========
|
||||||
|
|
||||||
|
This makes things worse. Let's stick with attempt 6 and continue from there.
|
||||||
|
Although it seems that the code within the loop cannot be optimised
|
||||||
|
further there is still room to optimize the generation of the ecc codes.
|
||||||
|
We can simply calcualate the total parity. If this is 0 then rp4 = rp5
|
||||||
|
etc. If the parity is 1, then rp4 = !rp5;
|
||||||
|
But if rp4 = rp5 we do not need rp5 etc. We can just write the even bits
|
||||||
|
in the result byte and then do something like
|
||||||
|
code[0] |= (code[0] << 1);
|
||||||
|
Lets test this.
|
||||||
|
|
||||||
|
|
||||||
|
Attempt 9
|
||||||
|
=========
|
||||||
|
|
||||||
|
Changed the code but again this slightly degrades performance. Tried all
|
||||||
|
kind of other things, like having dedicated parity arrays to avoid the
|
||||||
|
shift after parity[rp7] << 7; No gain.
|
||||||
|
Change the lookup using the parity array by using shift operators (e.g.
|
||||||
|
replace parity[rp7] << 7 with:
|
||||||
|
rp7 ^= (rp7 << 4);
|
||||||
|
rp7 ^= (rp7 << 2);
|
||||||
|
rp7 ^= (rp7 << 1);
|
||||||
|
rp7 &= 0x80;
|
||||||
|
No gain.
|
||||||
|
|
||||||
|
The only marginal change was inverting the parity bits, so we can remove
|
||||||
|
the last three invert statements.
|
||||||
|
|
||||||
|
Ah well, pity this does not deliver more. Then again 10 million
|
||||||
|
iterations using the linux driver code takes between 13 and 13.5
|
||||||
|
seconds, whereas my code now takes about 0.73 seconds for those 10
|
||||||
|
million iterations. So basically I've improved the performance by a
|
||||||
|
factor 18 on my system. Not that bad. Of course on different hardware
|
||||||
|
you will get different results. No warranties!
|
||||||
|
|
||||||
|
But of course there is no such thing as a free lunch. The codesize almost
|
||||||
|
tripled (from 562 bytes to 1434 bytes). Then again, it is not that much.
|
||||||
|
|
||||||
|
|
||||||
|
Correcting errors
|
||||||
|
=================
|
||||||
|
|
||||||
|
For correcting errors I again used the ST application note as a starter,
|
||||||
|
but I also peeked at the existing code.
|
||||||
|
The algorithm itself is pretty straightforward. Just xor the given and
|
||||||
|
the calculated ecc. If all bytes are 0 there is no problem. If 11 bits
|
||||||
|
are 1 we have one correctable bit error. If there is 1 bit 1, we have an
|
||||||
|
error in the given ecc code.
|
||||||
|
It proved to be fastest to do some table lookups. Performance gain
|
||||||
|
introduced by this is about a factor 2 on my system when a repair had to
|
||||||
|
be done, and 1% or so if no repair had to be done.
|
||||||
|
Code size increased from 330 bytes to 686 bytes for this function.
|
||||||
|
(gcc 4.2, -O3)
|
||||||
|
|
||||||
|
|
||||||
|
Conclusion
|
||||||
|
==========
|
||||||
|
|
||||||
|
The gain when calculating the ecc is tremendous. Om my development hardware
|
||||||
|
a speedup of a factor of 18 for ecc calculation was achieved. On a test on an
|
||||||
|
embedded system with a MIPS core a factor 7 was obtained.
|
||||||
|
On a test with a Linksys NSLU2 (ARMv5TE processor) the speedup was a factor
|
||||||
|
5 (big endian mode, gcc 4.1.2, -O3)
|
||||||
|
For correction not much gain could be obtained (as bitflips are rare). Then
|
||||||
|
again there are also much less cycles spent there.
|
||||||
|
|
||||||
|
It seems there is not much more gain possible in this, at least when
|
||||||
|
programmed in C. Of course it might be possible to squeeze something more
|
||||||
|
out of it with an assembler program, but due to pipeline behaviour etc
|
||||||
|
this is very tricky (at least for intel hw).
|
||||||
|
|
||||||
|
Author: Frans Meulenbroeks
|
||||||
|
Copyright (C) 2008 Koninklijke Philips Electronics NV.
|
|
@ -95,7 +95,9 @@ On all - write a character to /proc/sysrq-trigger. e.g.:
|
||||||
|
|
||||||
'p' - Will dump the current registers and flags to your console.
|
'p' - Will dump the current registers and flags to your console.
|
||||||
|
|
||||||
'q' - Will dump a list of all running timers.
|
'q' - Will dump per CPU lists of all armed hrtimers (but NOT regular
|
||||||
|
timer_list timers) and detailed information about all
|
||||||
|
clockevent devices.
|
||||||
|
|
||||||
'r' - Turns off keyboard raw mode and sets it to XLATE.
|
'r' - Turns off keyboard raw mode and sets it to XLATE.
|
||||||
|
|
||||||
|
|
|
@ -0,0 +1,101 @@
|
||||||
|
Using the Linux Kernel Tracepoints
|
||||||
|
|
||||||
|
Mathieu Desnoyers
|
||||||
|
|
||||||
|
|
||||||
|
This document introduces Linux Kernel Tracepoints and their use. It provides
|
||||||
|
examples of how to insert tracepoints in the kernel and connect probe functions
|
||||||
|
to them and provides some examples of probe functions.
|
||||||
|
|
||||||
|
|
||||||
|
* Purpose of tracepoints
|
||||||
|
|
||||||
|
A tracepoint placed in code provides a hook to call a function (probe) that you
|
||||||
|
can provide at runtime. A tracepoint can be "on" (a probe is connected to it) or
|
||||||
|
"off" (no probe is attached). When a tracepoint is "off" it has no effect,
|
||||||
|
except for adding a tiny time penalty (checking a condition for a branch) and
|
||||||
|
space penalty (adding a few bytes for the function call at the end of the
|
||||||
|
instrumented function and adds a data structure in a separate section). When a
|
||||||
|
tracepoint is "on", the function you provide is called each time the tracepoint
|
||||||
|
is executed, in the execution context of the caller. When the function provided
|
||||||
|
ends its execution, it returns to the caller (continuing from the tracepoint
|
||||||
|
site).
|
||||||
|
|
||||||
|
You can put tracepoints at important locations in the code. They are
|
||||||
|
lightweight hooks that can pass an arbitrary number of parameters,
|
||||||
|
which prototypes are described in a tracepoint declaration placed in a header
|
||||||
|
file.
|
||||||
|
|
||||||
|
They can be used for tracing and performance accounting.
|
||||||
|
|
||||||
|
|
||||||
|
* Usage
|
||||||
|
|
||||||
|
Two elements are required for tracepoints :
|
||||||
|
|
||||||
|
- A tracepoint definition, placed in a header file.
|
||||||
|
- The tracepoint statement, in C code.
|
||||||
|
|
||||||
|
In order to use tracepoints, you should include linux/tracepoint.h.
|
||||||
|
|
||||||
|
In include/trace/subsys.h :
|
||||||
|
|
||||||
|
#include <linux/tracepoint.h>
|
||||||
|
|
||||||
|
DEFINE_TRACE(subsys_eventname,
|
||||||
|
TPPTOTO(int firstarg, struct task_struct *p),
|
||||||
|
TPARGS(firstarg, p));
|
||||||
|
|
||||||
|
In subsys/file.c (where the tracing statement must be added) :
|
||||||
|
|
||||||
|
#include <trace/subsys.h>
|
||||||
|
|
||||||
|
void somefct(void)
|
||||||
|
{
|
||||||
|
...
|
||||||
|
trace_subsys_eventname(arg, task);
|
||||||
|
...
|
||||||
|
}
|
||||||
|
|
||||||
|
Where :
|
||||||
|
- subsys_eventname is an identifier unique to your event
|
||||||
|
- subsys is the name of your subsystem.
|
||||||
|
- eventname is the name of the event to trace.
|
||||||
|
- TPPTOTO(int firstarg, struct task_struct *p) is the prototype of the function
|
||||||
|
called by this tracepoint.
|
||||||
|
- TPARGS(firstarg, p) are the parameters names, same as found in the prototype.
|
||||||
|
|
||||||
|
Connecting a function (probe) to a tracepoint is done by providing a probe
|
||||||
|
(function to call) for the specific tracepoint through
|
||||||
|
register_trace_subsys_eventname(). Removing a probe is done through
|
||||||
|
unregister_trace_subsys_eventname(); it will remove the probe sure there is no
|
||||||
|
caller left using the probe when it returns. Probe removal is preempt-safe
|
||||||
|
because preemption is disabled around the probe call. See the "Probe example"
|
||||||
|
section below for a sample probe module.
|
||||||
|
|
||||||
|
The tracepoint mechanism supports inserting multiple instances of the same
|
||||||
|
tracepoint, but a single definition must be made of a given tracepoint name over
|
||||||
|
all the kernel to make sure no type conflict will occur. Name mangling of the
|
||||||
|
tracepoints is done using the prototypes to make sure typing is correct.
|
||||||
|
Verification of probe type correctness is done at the registration site by the
|
||||||
|
compiler. Tracepoints can be put in inline functions, inlined static functions,
|
||||||
|
and unrolled loops as well as regular functions.
|
||||||
|
|
||||||
|
The naming scheme "subsys_event" is suggested here as a convention intended
|
||||||
|
to limit collisions. Tracepoint names are global to the kernel: they are
|
||||||
|
considered as being the same whether they are in the core kernel image or in
|
||||||
|
modules.
|
||||||
|
|
||||||
|
|
||||||
|
* Probe / tracepoint example
|
||||||
|
|
||||||
|
See the example provided in samples/tracepoints/src
|
||||||
|
|
||||||
|
Compile them with your kernel.
|
||||||
|
|
||||||
|
Run, as root :
|
||||||
|
modprobe tracepoint-example (insmod order is not important)
|
||||||
|
modprobe tracepoint-probe-example
|
||||||
|
cat /proc/tracepoint-example (returns an expected error)
|
||||||
|
rmmod tracepoint-example tracepoint-probe-example
|
||||||
|
dmesg
|
|
@ -36,7 +36,7 @@ $ mount -t debugfs debugfs /debug
|
||||||
$ echo mmiotrace > /debug/tracing/current_tracer
|
$ echo mmiotrace > /debug/tracing/current_tracer
|
||||||
$ cat /debug/tracing/trace_pipe > mydump.txt &
|
$ cat /debug/tracing/trace_pipe > mydump.txt &
|
||||||
Start X or whatever.
|
Start X or whatever.
|
||||||
$ echo "X is up" > /debug/tracing/marker
|
$ echo "X is up" > /debug/tracing/trace_marker
|
||||||
$ echo none > /debug/tracing/current_tracer
|
$ echo none > /debug/tracing/current_tracer
|
||||||
Check for lost events.
|
Check for lost events.
|
||||||
|
|
||||||
|
@ -59,9 +59,8 @@ The 'cat' process should stay running (sleeping) in the background.
|
||||||
Load the driver you want to trace and use it. Mmiotrace will only catch MMIO
|
Load the driver you want to trace and use it. Mmiotrace will only catch MMIO
|
||||||
accesses to areas that are ioremapped while mmiotrace is active.
|
accesses to areas that are ioremapped while mmiotrace is active.
|
||||||
|
|
||||||
[Unimplemented feature:]
|
|
||||||
During tracing you can place comments (markers) into the trace by
|
During tracing you can place comments (markers) into the trace by
|
||||||
$ echo "X is up" > /debug/tracing/marker
|
$ echo "X is up" > /debug/tracing/trace_marker
|
||||||
This makes it easier to see which part of the (huge) trace corresponds to
|
This makes it easier to see which part of the (huge) trace corresponds to
|
||||||
which action. It is recommended to place descriptive markers about what you
|
which action. It is recommended to place descriptive markers about what you
|
||||||
do.
|
do.
|
||||||
|
|
|
@ -0,0 +1,615 @@
|
||||||
|
|
||||||
|
This document describes the Linux memory management "Unevictable LRU"
|
||||||
|
infrastructure and the use of this infrastructure to manage several types
|
||||||
|
of "unevictable" pages. The document attempts to provide the overall
|
||||||
|
rationale behind this mechanism and the rationale for some of the design
|
||||||
|
decisions that drove the implementation. The latter design rationale is
|
||||||
|
discussed in the context of an implementation description. Admittedly, one
|
||||||
|
can obtain the implementation details--the "what does it do?"--by reading the
|
||||||
|
code. One hopes that the descriptions below add value by provide the answer
|
||||||
|
to "why does it do that?".
|
||||||
|
|
||||||
|
Unevictable LRU Infrastructure:
|
||||||
|
|
||||||
|
The Unevictable LRU adds an additional LRU list to track unevictable pages
|
||||||
|
and to hide these pages from vmscan. This mechanism is based on a patch by
|
||||||
|
Larry Woodman of Red Hat to address several scalability problems with page
|
||||||
|
reclaim in Linux. The problems have been observed at customer sites on large
|
||||||
|
memory x86_64 systems. For example, a non-numal x86_64 platform with 128GB
|
||||||
|
of main memory will have over 32 million 4k pages in a single zone. When a
|
||||||
|
large fraction of these pages are not evictable for any reason [see below],
|
||||||
|
vmscan will spend a lot of time scanning the LRU lists looking for the small
|
||||||
|
fraction of pages that are evictable. This can result in a situation where
|
||||||
|
all cpus are spending 100% of their time in vmscan for hours or days on end,
|
||||||
|
with the system completely unresponsive.
|
||||||
|
|
||||||
|
The Unevictable LRU infrastructure addresses the following classes of
|
||||||
|
unevictable pages:
|
||||||
|
|
||||||
|
+ page owned by ramfs
|
||||||
|
+ page mapped into SHM_LOCKed shared memory regions
|
||||||
|
+ page mapped into VM_LOCKED [mlock()ed] vmas
|
||||||
|
|
||||||
|
The infrastructure might be able to handle other conditions that make pages
|
||||||
|
unevictable, either by definition or by circumstance, in the future.
|
||||||
|
|
||||||
|
|
||||||
|
The Unevictable LRU List
|
||||||
|
|
||||||
|
The Unevictable LRU infrastructure consists of an additional, per-zone, LRU list
|
||||||
|
called the "unevictable" list and an associated page flag, PG_unevictable, to
|
||||||
|
indicate that the page is being managed on the unevictable list. The
|
||||||
|
PG_unevictable flag is analogous to, and mutually exclusive with, the PG_active
|
||||||
|
flag in that it indicates on which LRU list a page resides when PG_lru is set.
|
||||||
|
The unevictable LRU list is source configurable based on the UNEVICTABLE_LRU
|
||||||
|
Kconfig option.
|
||||||
|
|
||||||
|
The Unevictable LRU infrastructure maintains unevictable pages on an additional
|
||||||
|
LRU list for a few reasons:
|
||||||
|
|
||||||
|
1) We get to "treat unevictable pages just like we treat other pages in the
|
||||||
|
system, which means we get to use the same code to manipulate them, the
|
||||||
|
same code to isolate them (for migrate, etc.), the same code to keep track
|
||||||
|
of the statistics, etc..." [Rik van Riel]
|
||||||
|
|
||||||
|
2) We want to be able to migrate unevictable pages between nodes--for memory
|
||||||
|
defragmentation, workload management and memory hotplug. The linux kernel
|
||||||
|
can only migrate pages that it can successfully isolate from the lru lists.
|
||||||
|
If we were to maintain pages elsewise than on an lru-like list, where they
|
||||||
|
can be found by isolate_lru_page(), we would prevent their migration, unless
|
||||||
|
we reworked migration code to find the unevictable pages.
|
||||||
|
|
||||||
|
|
||||||
|
The unevictable LRU list does not differentiate between file backed and swap
|
||||||
|
backed [anon] pages. This differentiation is only important while the pages
|
||||||
|
are, in fact, evictable.
|
||||||
|
|
||||||
|
The unevictable LRU list benefits from the "arrayification" of the per-zone
|
||||||
|
LRU lists and statistics originally proposed and posted by Christoph Lameter.
|
||||||
|
|
||||||
|
The unevictable list does not use the lru pagevec mechanism. Rather,
|
||||||
|
unevictable pages are placed directly on the page's zone's unevictable
|
||||||
|
list under the zone lru_lock. The reason for this is to prevent stranding
|
||||||
|
of pages on the unevictable list when one task has the page isolated from the
|
||||||
|
lru and other tasks are changing the "evictability" state of the page.
|
||||||
|
|
||||||
|
|
||||||
|
Unevictable LRU and Memory Controller Interaction
|
||||||
|
|
||||||
|
The memory controller data structure automatically gets a per zone unevictable
|
||||||
|
lru list as a result of the "arrayification" of the per-zone LRU lists. The
|
||||||
|
memory controller tracks the movement of pages to and from the unevictable list.
|
||||||
|
When a memory control group comes under memory pressure, the controller will
|
||||||
|
not attempt to reclaim pages on the unevictable list. This has a couple of
|
||||||
|
effects. Because the pages are "hidden" from reclaim on the unevictable list,
|
||||||
|
the reclaim process can be more efficient, dealing only with pages that have
|
||||||
|
a chance of being reclaimed. On the other hand, if too many of the pages
|
||||||
|
charged to the control group are unevictable, the evictable portion of the
|
||||||
|
working set of the tasks in the control group may not fit into the available
|
||||||
|
memory. This can cause the control group to thrash or to oom-kill tasks.
|
||||||
|
|
||||||
|
|
||||||
|
Unevictable LRU: Detecting Unevictable Pages
|
||||||
|
|
||||||
|
The function page_evictable(page, vma) in vmscan.c determines whether a
|
||||||
|
page is evictable or not. For ramfs pages and pages in SHM_LOCKed regions,
|
||||||
|
page_evictable() tests a new address space flag, AS_UNEVICTABLE, in the page's
|
||||||
|
address space using a wrapper function. Wrapper functions are used to set,
|
||||||
|
clear and test the flag to reduce the requirement for #ifdef's throughout the
|
||||||
|
source code. AS_UNEVICTABLE is set on ramfs inode/mapping when it is created.
|
||||||
|
This flag remains for the life of the inode.
|
||||||
|
|
||||||
|
For shared memory regions, AS_UNEVICTABLE is set when an application
|
||||||
|
successfully SHM_LOCKs the region and is removed when the region is
|
||||||
|
SHM_UNLOCKed. Note that shmctl(SHM_LOCK, ...) does not populate the page
|
||||||
|
tables for the region as does, for example, mlock(). So, we make no special
|
||||||
|
effort to push any pages in the SHM_LOCKed region to the unevictable list.
|
||||||
|
Vmscan will do this when/if it encounters the pages during reclaim. On
|
||||||
|
SHM_UNLOCK, shmctl() scans the pages in the region and "rescues" them from the
|
||||||
|
unevictable list if no other condition keeps them unevictable. If a SHM_LOCKed
|
||||||
|
region is destroyed, the pages are also "rescued" from the unevictable list in
|
||||||
|
the process of freeing them.
|
||||||
|
|
||||||
|
page_evictable() detects mlock()ed pages by testing an additional page flag,
|
||||||
|
PG_mlocked via the PageMlocked() wrapper. If the page is NOT mlocked, and a
|
||||||
|
non-NULL vma is supplied, page_evictable() will check whether the vma is
|
||||||
|
VM_LOCKED via is_mlocked_vma(). is_mlocked_vma() will SetPageMlocked() and
|
||||||
|
update the appropriate statistics if the vma is VM_LOCKED. This method allows
|
||||||
|
efficient "culling" of pages in the fault path that are being faulted in to
|
||||||
|
VM_LOCKED vmas.
|
||||||
|
|
||||||
|
|
||||||
|
Unevictable Pages and Vmscan [shrink_*_list()]
|
||||||
|
|
||||||
|
If unevictable pages are culled in the fault path, or moved to the unevictable
|
||||||
|
list at mlock() or mmap() time, vmscan will never encounter the pages until
|
||||||
|
they have become evictable again, for example, via munlock() and have been
|
||||||
|
"rescued" from the unevictable list. However, there may be situations where we
|
||||||
|
decide, for the sake of expediency, to leave a unevictable page on one of the
|
||||||
|
regular active/inactive LRU lists for vmscan to deal with. Vmscan checks for
|
||||||
|
such pages in all of the shrink_{active|inactive|page}_list() functions and
|
||||||
|
will "cull" such pages that it encounters--that is, it diverts those pages to
|
||||||
|
the unevictable list for the zone being scanned.
|
||||||
|
|
||||||
|
There may be situations where a page is mapped into a VM_LOCKED vma, but the
|
||||||
|
page is not marked as PageMlocked. Such pages will make it all the way to
|
||||||
|
shrink_page_list() where they will be detected when vmscan walks the reverse
|
||||||
|
map in try_to_unmap(). If try_to_unmap() returns SWAP_MLOCK, shrink_page_list()
|
||||||
|
will cull the page at that point.
|
||||||
|
|
||||||
|
Note that for anonymous pages, shrink_page_list() attempts to add the page to
|
||||||
|
the swap cache before it tries to unmap the page. To avoid this unnecessary
|
||||||
|
consumption of swap space, shrink_page_list() calls try_to_munlock() to check
|
||||||
|
whether any VM_LOCKED vmas map the page without attempting to unmap the page.
|
||||||
|
If try_to_munlock() returns SWAP_MLOCK, shrink_page_list() will cull the page
|
||||||
|
without consuming swap space. try_to_munlock() will be described below.
|
||||||
|
|
||||||
|
To "cull" an unevictable page, vmscan simply puts the page back on the lru
|
||||||
|
list using putback_lru_page()--the inverse operation to isolate_lru_page()--
|
||||||
|
after dropping the page lock. Because the condition which makes the page
|
||||||
|
unevictable may change once the page is unlocked, putback_lru_page() will
|
||||||
|
recheck the unevictable state of a page that it places on the unevictable lru
|
||||||
|
list. If the page has become unevictable, putback_lru_page() removes it from
|
||||||
|
the list and retries, including the page_unevictable() test. Because such a
|
||||||
|
race is a rare event and movement of pages onto the unevictable list should be
|
||||||
|
rare, these extra evictabilty checks should not occur in the majority of calls
|
||||||
|
to putback_lru_page().
|
||||||
|
|
||||||
|
|
||||||
|
Mlocked Page: Prior Work
|
||||||
|
|
||||||
|
The "Unevictable Mlocked Pages" infrastructure is based on work originally
|
||||||
|
posted by Nick Piggin in an RFC patch entitled "mm: mlocked pages off LRU".
|
||||||
|
Nick posted his patch as an alternative to a patch posted by Christoph
|
||||||
|
Lameter to achieve the same objective--hiding mlocked pages from vmscan.
|
||||||
|
In Nick's patch, he used one of the struct page lru list link fields as a count
|
||||||
|
of VM_LOCKED vmas that map the page. This use of the link field for a count
|
||||||
|
prevented the management of the pages on an LRU list. Thus, mlocked pages were
|
||||||
|
not migratable as isolate_lru_page() could not find them and the lru list link
|
||||||
|
field was not available to the migration subsystem. Nick resolved this by
|
||||||
|
putting mlocked pages back on the lru list before attempting to isolate them,
|
||||||
|
thus abandoning the count of VM_LOCKED vmas. When Nick's patch was integrated
|
||||||
|
with the Unevictable LRU work, the count was replaced by walking the reverse
|
||||||
|
map to determine whether any VM_LOCKED vmas mapped the page. More on this
|
||||||
|
below.
|
||||||
|
|
||||||
|
|
||||||
|
Mlocked Pages: Basic Management
|
||||||
|
|
||||||
|
Mlocked pages--pages mapped into a VM_LOCKED vma--represent one class of
|
||||||
|
unevictable pages. When such a page has been "noticed" by the memory
|
||||||
|
management subsystem, the page is marked with the PG_mlocked [PageMlocked()]
|
||||||
|
flag. A PageMlocked() page will be placed on the unevictable LRU list when
|
||||||
|
it is added to the LRU. Pages can be "noticed" by memory management in
|
||||||
|
several places:
|
||||||
|
|
||||||
|
1) in the mlock()/mlockall() system call handlers.
|
||||||
|
2) in the mmap() system call handler when mmap()ing a region with the
|
||||||
|
MAP_LOCKED flag, or mmap()ing a region in a task that has called
|
||||||
|
mlockall() with the MCL_FUTURE flag. Both of these conditions result
|
||||||
|
in the VM_LOCKED flag being set for the vma.
|
||||||
|
3) in the fault path, if mlocked pages are "culled" in the fault path,
|
||||||
|
and when a VM_LOCKED stack segment is expanded.
|
||||||
|
4) as mentioned above, in vmscan:shrink_page_list() with attempting to
|
||||||
|
reclaim a page in a VM_LOCKED vma--via try_to_unmap() or try_to_munlock().
|
||||||
|
|
||||||
|
Mlocked pages become unlocked and rescued from the unevictable list when:
|
||||||
|
|
||||||
|
1) mapped in a range unlocked via the munlock()/munlockall() system calls.
|
||||||
|
2) munmapped() out of the last VM_LOCKED vma that maps the page, including
|
||||||
|
unmapping at task exit.
|
||||||
|
3) when the page is truncated from the last VM_LOCKED vma of an mmap()ed file.
|
||||||
|
4) before a page is COWed in a VM_LOCKED vma.
|
||||||
|
|
||||||
|
|
||||||
|
Mlocked Pages: mlock()/mlockall() System Call Handling
|
||||||
|
|
||||||
|
Both [do_]mlock() and [do_]mlockall() system call handlers call mlock_fixup()
|
||||||
|
for each vma in the range specified by the call. In the case of mlockall(),
|
||||||
|
this is the entire active address space of the task. Note that mlock_fixup()
|
||||||
|
is used for both mlock()ing and munlock()ing a range of memory. A call to
|
||||||
|
mlock() an already VM_LOCKED vma, or to munlock() a vma that is not VM_LOCKED
|
||||||
|
is treated as a no-op--mlock_fixup() simply returns.
|
||||||
|
|
||||||
|
If the vma passes some filtering described in "Mlocked Pages: Filtering Vmas"
|
||||||
|
below, mlock_fixup() will attempt to merge the vma with its neighbors or split
|
||||||
|
off a subset of the vma if the range does not cover the entire vma. Once the
|
||||||
|
vma has been merged or split or neither, mlock_fixup() will call
|
||||||
|
__mlock_vma_pages_range() to fault in the pages via get_user_pages() and
|
||||||
|
to mark the pages as mlocked via mlock_vma_page().
|
||||||
|
|
||||||
|
Note that the vma being mlocked might be mapped with PROT_NONE. In this case,
|
||||||
|
get_user_pages() will be unable to fault in the pages. That's OK. If pages
|
||||||
|
do end up getting faulted into this VM_LOCKED vma, we'll handle them in the
|
||||||
|
fault path or in vmscan.
|
||||||
|
|
||||||
|
Also note that a page returned by get_user_pages() could be truncated or
|
||||||
|
migrated out from under us, while we're trying to mlock it. To detect
|
||||||
|
this, __mlock_vma_pages_range() tests the page_mapping after acquiring
|
||||||
|
the page lock. If the page is still associated with its mapping, we'll
|
||||||
|
go ahead and call mlock_vma_page(). If the mapping is gone, we just
|
||||||
|
unlock the page and move on. Worse case, this results in page mapped
|
||||||
|
in a VM_LOCKED vma remaining on a normal LRU list without being
|
||||||
|
PageMlocked(). Again, vmscan will detect and cull such pages.
|
||||||
|
|
||||||
|
mlock_vma_page(), called with the page locked [N.B., not "mlocked"], will
|
||||||
|
TestSetPageMlocked() for each page returned by get_user_pages(). We use
|
||||||
|
TestSetPageMlocked() because the page might already be mlocked by another
|
||||||
|
task/vma and we don't want to do extra work. We especially do not want to
|
||||||
|
count an mlocked page more than once in the statistics. If the page was
|
||||||
|
already mlocked, mlock_vma_page() is done.
|
||||||
|
|
||||||
|
If the page was NOT already mlocked, mlock_vma_page() attempts to isolate the
|
||||||
|
page from the LRU, as it is likely on the appropriate active or inactive list
|
||||||
|
at that time. If the isolate_lru_page() succeeds, mlock_vma_page() will
|
||||||
|
putback the page--putback_lru_page()--which will notice that the page is now
|
||||||
|
mlocked and divert the page to the zone's unevictable LRU list. If
|
||||||
|
mlock_vma_page() is unable to isolate the page from the LRU, vmscan will handle
|
||||||
|
it later if/when it attempts to reclaim the page.
|
||||||
|
|
||||||
|
|
||||||
|
Mlocked Pages: Filtering Special Vmas
|
||||||
|
|
||||||
|
mlock_fixup() filters several classes of "special" vmas:
|
||||||
|
|
||||||
|
1) vmas with VM_IO|VM_PFNMAP set are skipped entirely. The pages behind
|
||||||
|
these mappings are inherently pinned, so we don't need to mark them as
|
||||||
|
mlocked. In any case, most of the pages have no struct page in which to
|
||||||
|
so mark the page. Because of this, get_user_pages() will fail for these
|
||||||
|
vmas, so there is no sense in attempting to visit them.
|
||||||
|
|
||||||
|
2) vmas mapping hugetlbfs page are already effectively pinned into memory.
|
||||||
|
We don't need nor want to mlock() these pages. However, to preserve the
|
||||||
|
prior behavior of mlock()--before the unevictable/mlock changes--mlock_fixup()
|
||||||
|
will call make_pages_present() in the hugetlbfs vma range to allocate the
|
||||||
|
huge pages and populate the ptes.
|
||||||
|
|
||||||
|
3) vmas with VM_DONTEXPAND|VM_RESERVED are generally user space mappings of
|
||||||
|
kernel pages, such as the vdso page, relay channel pages, etc. These pages
|
||||||
|
are inherently unevictable and are not managed on the LRU lists.
|
||||||
|
mlock_fixup() treats these vmas the same as hugetlbfs vmas. It calls
|
||||||
|
make_pages_present() to populate the ptes.
|
||||||
|
|
||||||
|
Note that for all of these special vmas, mlock_fixup() does not set the
|
||||||
|
VM_LOCKED flag. Therefore, we won't have to deal with them later during
|
||||||
|
munlock() or munmap()--for example, at task exit. Neither does mlock_fixup()
|
||||||
|
account these vmas against the task's "locked_vm".
|
||||||
|
|
||||||
|
Mlocked Pages: Downgrading the Mmap Semaphore.
|
||||||
|
|
||||||
|
mlock_fixup() must be called with the mmap semaphore held for write, because
|
||||||
|
it may have to merge or split vmas. However, mlocking a large region of
|
||||||
|
memory can take a long time--especially if vmscan must reclaim pages to
|
||||||
|
satisfy the regions requirements. Faulting in a large region with the mmap
|
||||||
|
semaphore held for write can hold off other faults on the address space, in
|
||||||
|
the case of a multi-threaded task. It can also hold off scans of the task's
|
||||||
|
address space via /proc. While testing under heavy load, it was observed that
|
||||||
|
the ps(1) command could be held off for many minutes while a large segment was
|
||||||
|
mlock()ed down.
|
||||||
|
|
||||||
|
To address this issue, and to make the system more responsive during mlock()ing
|
||||||
|
of large segments, mlock_fixup() downgrades the mmap semaphore to read mode
|
||||||
|
during the call to __mlock_vma_pages_range(). This works fine. However, the
|
||||||
|
callers of mlock_fixup() expect the semaphore to be returned in write mode.
|
||||||
|
So, mlock_fixup() "upgrades" the semphore to write mode. Linux does not
|
||||||
|
support an atomic upgrade_sem() call, so mlock_fixup() must drop the semaphore
|
||||||
|
and reacquire it in write mode. In a multi-threaded task, it is possible for
|
||||||
|
the task memory map to change while the semaphore is dropped. Therefore,
|
||||||
|
mlock_fixup() looks up the vma at the range start address after reacquiring
|
||||||
|
the semaphore in write mode and verifies that it still covers the original
|
||||||
|
range. If not, mlock_fixup() returns an error [-EAGAIN]. All callers of
|
||||||
|
mlock_fixup() have been changed to deal with this new error condition.
|
||||||
|
|
||||||
|
Note: when munlocking a region, all of the pages should already be resident--
|
||||||
|
unless we have racing threads mlocking() and munlocking() regions. So,
|
||||||
|
unlocking should not have to wait for page allocations nor faults of any kind.
|
||||||
|
Therefore mlock_fixup() does not downgrade the semaphore for munlock().
|
||||||
|
|
||||||
|
|
||||||
|
Mlocked Pages: munlock()/munlockall() System Call Handling
|
||||||
|
|
||||||
|
The munlock() and munlockall() system calls are handled by the same functions--
|
||||||
|
do_mlock[all]()--as the mlock() and mlockall() system calls with the unlock
|
||||||
|
vs lock operation indicated by an argument. So, these system calls are also
|
||||||
|
handled by mlock_fixup(). Again, if called for an already munlock()ed vma,
|
||||||
|
mlock_fixup() simply returns. Because of the vma filtering discussed above,
|
||||||
|
VM_LOCKED will not be set in any "special" vmas. So, these vmas will be
|
||||||
|
ignored for munlock.
|
||||||
|
|
||||||
|
If the vma is VM_LOCKED, mlock_fixup() again attempts to merge or split off
|
||||||
|
the specified range. The range is then munlocked via the function
|
||||||
|
__mlock_vma_pages_range()--the same function used to mlock a vma range--
|
||||||
|
passing a flag to indicate that munlock() is being performed.
|
||||||
|
|
||||||
|
Because the vma access protections could have been changed to PROT_NONE after
|
||||||
|
faulting in and mlocking some pages, get_user_pages() was unreliable for visiting
|
||||||
|
these pages for munlocking. Because we don't want to leave pages mlocked(),
|
||||||
|
get_user_pages() was enhanced to accept a flag to ignore the permissions when
|
||||||
|
fetching the pages--all of which should be resident as a result of previous
|
||||||
|
mlock()ing.
|
||||||
|
|
||||||
|
For munlock(), __mlock_vma_pages_range() unlocks individual pages by calling
|
||||||
|
munlock_vma_page(). munlock_vma_page() unconditionally clears the PG_mlocked
|
||||||
|
flag using TestClearPageMlocked(). As with mlock_vma_page(), munlock_vma_page()
|
||||||
|
use the Test*PageMlocked() function to handle the case where the page might
|
||||||
|
have already been unlocked by another task. If the page was mlocked,
|
||||||
|
munlock_vma_page() updates that zone statistics for the number of mlocked
|
||||||
|
pages. Note, however, that at this point we haven't checked whether the page
|
||||||
|
is mapped by other VM_LOCKED vmas.
|
||||||
|
|
||||||
|
We can't call try_to_munlock(), the function that walks the reverse map to check
|
||||||
|
for other VM_LOCKED vmas, without first isolating the page from the LRU.
|
||||||
|
try_to_munlock() is a variant of try_to_unmap() and thus requires that the page
|
||||||
|
not be on an lru list. [More on these below.] However, the call to
|
||||||
|
isolate_lru_page() could fail, in which case we couldn't try_to_munlock().
|
||||||
|
So, we go ahead and clear PG_mlocked up front, as this might be the only chance
|
||||||
|
we have. If we can successfully isolate the page, we go ahead and
|
||||||
|
try_to_munlock(), which will restore the PG_mlocked flag and update the zone
|
||||||
|
page statistics if it finds another vma holding the page mlocked. If we fail
|
||||||
|
to isolate the page, we'll have left a potentially mlocked page on the LRU.
|
||||||
|
This is fine, because we'll catch it later when/if vmscan tries to reclaim the
|
||||||
|
page. This should be relatively rare.
|
||||||
|
|
||||||
|
Mlocked Pages: Migrating Them...
|
||||||
|
|
||||||
|
A page that is being migrated has been isolated from the lru lists and is
|
||||||
|
held locked across unmapping of the page, updating the page's mapping
|
||||||
|
[address_space] entry and copying the contents and state, until the
|
||||||
|
page table entry has been replaced with an entry that refers to the new
|
||||||
|
page. Linux supports migration of mlocked pages and other unevictable
|
||||||
|
pages. This involves simply moving the PageMlocked and PageUnevictable states
|
||||||
|
from the old page to the new page.
|
||||||
|
|
||||||
|
Note that page migration can race with mlocking or munlocking of the same
|
||||||
|
page. This has been discussed from the mlock/munlock perspective in the
|
||||||
|
respective sections above. Both processes [migration, m[un]locking], hold
|
||||||
|
the page locked. This provides the first level of synchronization. Page
|
||||||
|
migration zeros out the page_mapping of the old page before unlocking it,
|
||||||
|
so m[un]lock can skip these pages by testing the page mapping under page
|
||||||
|
lock.
|
||||||
|
|
||||||
|
When completing page migration, we place the new and old pages back onto the
|
||||||
|
lru after dropping the page lock. The "unneeded" page--old page on success,
|
||||||
|
new page on failure--will be freed when the reference count held by the
|
||||||
|
migration process is released. To ensure that we don't strand pages on the
|
||||||
|
unevictable list because of a race between munlock and migration, page
|
||||||
|
migration uses the putback_lru_page() function to add migrated pages back to
|
||||||
|
the lru.
|
||||||
|
|
||||||
|
|
||||||
|
Mlocked Pages: mmap(MAP_LOCKED) System Call Handling
|
||||||
|
|
||||||
|
In addition the the mlock()/mlockall() system calls, an application can request
|
||||||
|
that a region of memory be mlocked using the MAP_LOCKED flag with the mmap()
|
||||||
|
call. Furthermore, any mmap() call or brk() call that expands the heap by a
|
||||||
|
task that has previously called mlockall() with the MCL_FUTURE flag will result
|
||||||
|
in the newly mapped memory being mlocked. Before the unevictable/mlock changes,
|
||||||
|
the kernel simply called make_pages_present() to allocate pages and populate
|
||||||
|
the page table.
|
||||||
|
|
||||||
|
To mlock a range of memory under the unevictable/mlock infrastructure, the
|
||||||
|
mmap() handler and task address space expansion functions call
|
||||||
|
mlock_vma_pages_range() specifying the vma and the address range to mlock.
|
||||||
|
mlock_vma_pages_range() filters vmas like mlock_fixup(), as described above in
|
||||||
|
"Mlocked Pages: Filtering Vmas". It will clear the VM_LOCKED flag, which will
|
||||||
|
have already been set by the caller, in filtered vmas. Thus these vma's need
|
||||||
|
not be visited for munlock when the region is unmapped.
|
||||||
|
|
||||||
|
For "normal" vmas, mlock_vma_pages_range() calls __mlock_vma_pages_range() to
|
||||||
|
fault/allocate the pages and mlock them. Again, like mlock_fixup(),
|
||||||
|
mlock_vma_pages_range() downgrades the mmap semaphore to read mode before
|
||||||
|
attempting to fault/allocate and mlock the pages; and "upgrades" the semaphore
|
||||||
|
back to write mode before returning.
|
||||||
|
|
||||||
|
The callers of mlock_vma_pages_range() will have already added the memory
|
||||||
|
range to be mlocked to the task's "locked_vm". To account for filtered vmas,
|
||||||
|
mlock_vma_pages_range() returns the number of pages NOT mlocked. All of the
|
||||||
|
callers then subtract a non-negative return value from the task's locked_vm.
|
||||||
|
A negative return value represent an error--for example, from get_user_pages()
|
||||||
|
attempting to fault in a vma with PROT_NONE access. In this case, we leave
|
||||||
|
the memory range accounted as locked_vm, as the protections could be changed
|
||||||
|
later and pages allocated into that region.
|
||||||
|
|
||||||
|
|
||||||
|
Mlocked Pages: munmap()/exit()/exec() System Call Handling
|
||||||
|
|
||||||
|
When unmapping an mlocked region of memory, whether by an explicit call to
|
||||||
|
munmap() or via an internal unmap from exit() or exec() processing, we must
|
||||||
|
munlock the pages if we're removing the last VM_LOCKED vma that maps the pages.
|
||||||
|
Before the unevictable/mlock changes, mlocking did not mark the pages in any way,
|
||||||
|
so unmapping them required no processing.
|
||||||
|
|
||||||
|
To munlock a range of memory under the unevictable/mlock infrastructure, the
|
||||||
|
munmap() hander and task address space tear down function call
|
||||||
|
munlock_vma_pages_all(). The name reflects the observation that one always
|
||||||
|
specifies the entire vma range when munlock()ing during unmap of a region.
|
||||||
|
Because of the vma filtering when mlocking() regions, only "normal" vmas that
|
||||||
|
actually contain mlocked pages will be passed to munlock_vma_pages_all().
|
||||||
|
|
||||||
|
munlock_vma_pages_all() clears the VM_LOCKED vma flag and, like mlock_fixup()
|
||||||
|
for the munlock case, calls __munlock_vma_pages_range() to walk the page table
|
||||||
|
for the vma's memory range and munlock_vma_page() each resident page mapped by
|
||||||
|
the vma. This effectively munlocks the page, only if this is the last
|
||||||
|
VM_LOCKED vma that maps the page.
|
||||||
|
|
||||||
|
|
||||||
|
Mlocked Page: try_to_unmap()
|
||||||
|
|
||||||
|
[Note: the code changes represented by this section are really quite small
|
||||||
|
compared to the text to describe what happening and why, and to discuss the
|
||||||
|
implications.]
|
||||||
|
|
||||||
|
Pages can, of course, be mapped into multiple vmas. Some of these vmas may
|
||||||
|
have VM_LOCKED flag set. It is possible for a page mapped into one or more
|
||||||
|
VM_LOCKED vmas not to have the PG_mlocked flag set and therefore reside on one
|
||||||
|
of the active or inactive LRU lists. This could happen if, for example, a
|
||||||
|
task in the process of munlock()ing the page could not isolate the page from
|
||||||
|
the LRU. As a result, vmscan/shrink_page_list() might encounter such a page
|
||||||
|
as described in "Unevictable Pages and Vmscan [shrink_*_list()]". To
|
||||||
|
handle this situation, try_to_unmap() has been enhanced to check for VM_LOCKED
|
||||||
|
vmas while it is walking a page's reverse map.
|
||||||
|
|
||||||
|
try_to_unmap() is always called, by either vmscan for reclaim or for page
|
||||||
|
migration, with the argument page locked and isolated from the LRU. BUG_ON()
|
||||||
|
assertions enforce this requirement. Separate functions handle anonymous and
|
||||||
|
mapped file pages, as these types of pages have different reverse map
|
||||||
|
mechanisms.
|
||||||
|
|
||||||
|
try_to_unmap_anon()
|
||||||
|
|
||||||
|
To unmap anonymous pages, each vma in the list anchored in the anon_vma must be
|
||||||
|
visited--at least until a VM_LOCKED vma is encountered. If the page is being
|
||||||
|
unmapped for migration, VM_LOCKED vmas do not stop the process because mlocked
|
||||||
|
pages are migratable. However, for reclaim, if the page is mapped into a
|
||||||
|
VM_LOCKED vma, the scan stops. try_to_unmap() attempts to acquire the mmap
|
||||||
|
semphore of the mm_struct to which the vma belongs in read mode. If this is
|
||||||
|
successful, try_to_unmap() will mlock the page via mlock_vma_page()--we
|
||||||
|
wouldn't have gotten to try_to_unmap() if the page were already mlocked--and
|
||||||
|
will return SWAP_MLOCK, indicating that the page is unevictable. If the
|
||||||
|
mmap semaphore cannot be acquired, we are not sure whether the page is really
|
||||||
|
unevictable or not. In this case, try_to_unmap() will return SWAP_AGAIN.
|
||||||
|
|
||||||
|
try_to_unmap_file() -- linear mappings
|
||||||
|
|
||||||
|
Unmapping of a mapped file page works the same, except that the scan visits
|
||||||
|
all vmas that maps the page's index/page offset in the page's mapping's
|
||||||
|
reverse map priority search tree. It must also visit each vma in the page's
|
||||||
|
mapping's non-linear list, if the list is non-empty. As for anonymous pages,
|
||||||
|
on encountering a VM_LOCKED vma for a mapped file page, try_to_unmap() will
|
||||||
|
attempt to acquire the associated mm_struct's mmap semaphore to mlock the page,
|
||||||
|
returning SWAP_MLOCK if this is successful, and SWAP_AGAIN, if not.
|
||||||
|
|
||||||
|
try_to_unmap_file() -- non-linear mappings
|
||||||
|
|
||||||
|
If a page's mapping contains a non-empty non-linear mapping vma list, then
|
||||||
|
try_to_un{map|lock}() must also visit each vma in that list to determine
|
||||||
|
whether the page is mapped in a VM_LOCKED vma. Again, the scan must visit
|
||||||
|
all vmas in the non-linear list to ensure that the pages is not/should not be
|
||||||
|
mlocked. If a VM_LOCKED vma is found in the list, the scan could terminate.
|
||||||
|
However, there is no easy way to determine whether the page is actually mapped
|
||||||
|
in a given vma--either for unmapping or testing whether the VM_LOCKED vma
|
||||||
|
actually pins the page.
|
||||||
|
|
||||||
|
So, try_to_unmap_file() handles non-linear mappings by scanning a certain
|
||||||
|
number of pages--a "cluster"--in each non-linear vma associated with the page's
|
||||||
|
mapping, for each file mapped page that vmscan tries to unmap. If this happens
|
||||||
|
to unmap the page we're trying to unmap, try_to_unmap() will notice this on
|
||||||
|
return--(page_mapcount(page) == 0)--and return SWAP_SUCCESS. Otherwise, it
|
||||||
|
will return SWAP_AGAIN, causing vmscan to recirculate this page. We take
|
||||||
|
advantage of the cluster scan in try_to_unmap_cluster() as follows:
|
||||||
|
|
||||||
|
For each non-linear vma, try_to_unmap_cluster() attempts to acquire the mmap
|
||||||
|
semaphore of the associated mm_struct for read without blocking. If this
|
||||||
|
attempt is successful and the vma is VM_LOCKED, try_to_unmap_cluster() will
|
||||||
|
retain the mmap semaphore for the scan; otherwise it drops it here. Then,
|
||||||
|
for each page in the cluster, if we're holding the mmap semaphore for a locked
|
||||||
|
vma, try_to_unmap_cluster() calls mlock_vma_page() to mlock the page. This
|
||||||
|
call is a no-op if the page is already locked, but will mlock any pages in
|
||||||
|
the non-linear mapping that happen to be unlocked. If one of the pages so
|
||||||
|
mlocked is the page passed in to try_to_unmap(), try_to_unmap_cluster() will
|
||||||
|
return SWAP_MLOCK, rather than the default SWAP_AGAIN. This will allow vmscan
|
||||||
|
to cull the page, rather than recirculating it on the inactive list. Again,
|
||||||
|
if try_to_unmap_cluster() cannot acquire the vma's mmap sem, it returns
|
||||||
|
SWAP_AGAIN, indicating that the page is mapped by a VM_LOCKED vma, but
|
||||||
|
couldn't be mlocked.
|
||||||
|
|
||||||
|
|
||||||
|
Mlocked pages: try_to_munlock() Reverse Map Scan
|
||||||
|
|
||||||
|
TODO/FIXME: a better name might be page_mlocked()--analogous to the
|
||||||
|
page_referenced() reverse map walker--especially if we continue to call this
|
||||||
|
from shrink_page_list(). See related TODO/FIXME below.
|
||||||
|
|
||||||
|
When munlock_vma_page()--see "Mlocked Pages: munlock()/munlockall() System
|
||||||
|
Call Handling" above--tries to munlock a page, or when shrink_page_list()
|
||||||
|
encounters an anonymous page that is not yet in the swap cache, they need to
|
||||||
|
determine whether or not the page is mapped by any VM_LOCKED vma, without
|
||||||
|
actually attempting to unmap all ptes from the page. For this purpose, the
|
||||||
|
unevictable/mlock infrastructure introduced a variant of try_to_unmap() called
|
||||||
|
try_to_munlock().
|
||||||
|
|
||||||
|
try_to_munlock() calls the same functions as try_to_unmap() for anonymous and
|
||||||
|
mapped file pages with an additional argument specifing unlock versus unmap
|
||||||
|
processing. Again, these functions walk the respective reverse maps looking
|
||||||
|
for VM_LOCKED vmas. When such a vma is found for anonymous pages and file
|
||||||
|
pages mapped in linear VMAs, as in the try_to_unmap() case, the functions
|
||||||
|
attempt to acquire the associated mmap semphore, mlock the page via
|
||||||
|
mlock_vma_page() and return SWAP_MLOCK. This effectively undoes the
|
||||||
|
pre-clearing of the page's PG_mlocked done by munlock_vma_page() and informs
|
||||||
|
shrink_page_list() that the anonymous page should be culled rather than added
|
||||||
|
to the swap cache in preparation for a try_to_unmap() that will almost
|
||||||
|
certainly fail.
|
||||||
|
|
||||||
|
If try_to_unmap() is unable to acquire a VM_LOCKED vma's associated mmap
|
||||||
|
semaphore, it will return SWAP_AGAIN. This will allow shrink_page_list()
|
||||||
|
to recycle the page on the inactive list and hope that it has better luck
|
||||||
|
with the page next time.
|
||||||
|
|
||||||
|
For file pages mapped into non-linear vmas, the try_to_munlock() logic works
|
||||||
|
slightly differently. On encountering a VM_LOCKED non-linear vma that might
|
||||||
|
map the page, try_to_munlock() returns SWAP_AGAIN without actually mlocking
|
||||||
|
the page. munlock_vma_page() will just leave the page unlocked and let
|
||||||
|
vmscan deal with it--the usual fallback position.
|
||||||
|
|
||||||
|
Note that try_to_munlock()'s reverse map walk must visit every vma in a pages'
|
||||||
|
reverse map to determine that a page is NOT mapped into any VM_LOCKED vma.
|
||||||
|
However, the scan can terminate when it encounters a VM_LOCKED vma and can
|
||||||
|
successfully acquire the vma's mmap semphore for read and mlock the page.
|
||||||
|
Although try_to_munlock() can be called many [very many!] times when
|
||||||
|
munlock()ing a large region or tearing down a large address space that has been
|
||||||
|
mlocked via mlockall(), overall this is a fairly rare event. In addition,
|
||||||
|
although shrink_page_list() calls try_to_munlock() for every anonymous page that
|
||||||
|
it handles that is not yet in the swap cache, on average anonymous pages will
|
||||||
|
have very short reverse map lists.
|
||||||
|
|
||||||
|
Mlocked Page: Page Reclaim in shrink_*_list()
|
||||||
|
|
||||||
|
shrink_active_list() culls any obviously unevictable pages--i.e.,
|
||||||
|
!page_evictable(page, NULL)--diverting these to the unevictable lru
|
||||||
|
list. However, shrink_active_list() only sees unevictable pages that
|
||||||
|
made it onto the active/inactive lru lists. Note that these pages do not
|
||||||
|
have PageUnevictable set--otherwise, they would be on the unevictable list and
|
||||||
|
shrink_active_list would never see them.
|
||||||
|
|
||||||
|
Some examples of these unevictable pages on the LRU lists are:
|
||||||
|
|
||||||
|
1) ramfs pages that have been placed on the lru lists when first allocated.
|
||||||
|
|
||||||
|
2) SHM_LOCKed shared memory pages. shmctl(SHM_LOCK) does not attempt to
|
||||||
|
allocate or fault in the pages in the shared memory region. This happens
|
||||||
|
when an application accesses the page the first time after SHM_LOCKing
|
||||||
|
the segment.
|
||||||
|
|
||||||
|
3) Mlocked pages that could not be isolated from the lru and moved to the
|
||||||
|
unevictable list in mlock_vma_page().
|
||||||
|
|
||||||
|
3) Pages mapped into multiple VM_LOCKED vmas, but try_to_munlock() couldn't
|
||||||
|
acquire the vma's mmap semaphore to test the flags and set PageMlocked.
|
||||||
|
munlock_vma_page() was forced to let the page back on to the normal
|
||||||
|
LRU list for vmscan to handle.
|
||||||
|
|
||||||
|
shrink_inactive_list() also culls any unevictable pages that it finds
|
||||||
|
on the inactive lists, again diverting them to the appropriate zone's unevictable
|
||||||
|
lru list. shrink_inactive_list() should only see SHM_LOCKed pages that became
|
||||||
|
SHM_LOCKed after shrink_active_list() had moved them to the inactive list, or
|
||||||
|
pages mapped into VM_LOCKED vmas that munlock_vma_page() couldn't isolate from
|
||||||
|
the lru to recheck via try_to_munlock(). shrink_inactive_list() won't notice
|
||||||
|
the latter, but will pass on to shrink_page_list().
|
||||||
|
|
||||||
|
shrink_page_list() again culls obviously unevictable pages that it could
|
||||||
|
encounter for similar reason to shrink_inactive_list(). As already discussed,
|
||||||
|
shrink_page_list() proactively looks for anonymous pages that should have
|
||||||
|
PG_mlocked set but don't--these would not be detected by page_evictable()--to
|
||||||
|
avoid adding them to the swap cache unnecessarily. File pages mapped into
|
||||||
|
VM_LOCKED vmas but without PG_mlocked set will make it all the way to
|
||||||
|
try_to_unmap(). shrink_page_list() will divert them to the unevictable list when
|
||||||
|
try_to_unmap() returns SWAP_MLOCK, as discussed above.
|
||||||
|
|
||||||
|
TODO/FIXME: If we can enhance the swap cache to reliably remove entries
|
||||||
|
with page_count(page) > 2, as long as all ptes are mapped to the page and
|
||||||
|
not the swap entry, we can probably remove the call to try_to_munlock() in
|
||||||
|
shrink_page_list() and just remove the page from the swap cache when
|
||||||
|
try_to_unmap() returns SWAP_MLOCK. Currently, remove_exclusive_swap_page()
|
||||||
|
doesn't seem to allow that.
|
||||||
|
|
||||||
|
|
|
@ -1198,7 +1198,7 @@ S: Maintained
|
||||||
|
|
||||||
CPU FREQUENCY DRIVERS
|
CPU FREQUENCY DRIVERS
|
||||||
P: Dave Jones
|
P: Dave Jones
|
||||||
M: davej@codemonkey.org.uk
|
M: davej@redhat.com
|
||||||
L: cpufreq@vger.kernel.org
|
L: cpufreq@vger.kernel.org
|
||||||
W: http://www.codemonkey.org.uk/projects/cpufreq/
|
W: http://www.codemonkey.org.uk/projects/cpufreq/
|
||||||
T: git kernel.org/pub/scm/linux/kernel/git/davej/cpufreq.git
|
T: git kernel.org/pub/scm/linux/kernel/git/davej/cpufreq.git
|
||||||
|
|
|
@ -70,6 +70,7 @@ config AUTO_IRQ_AFFINITY
|
||||||
default y
|
default y
|
||||||
|
|
||||||
source "init/Kconfig"
|
source "init/Kconfig"
|
||||||
|
source "kernel/Kconfig.freezer"
|
||||||
|
|
||||||
|
|
||||||
menu "System setup"
|
menu "System setup"
|
||||||
|
|
|
@ -74,12 +74,14 @@ register struct thread_info *__current_thread_info __asm__("$8");
|
||||||
#define TIF_UAC_SIGBUS 7
|
#define TIF_UAC_SIGBUS 7
|
||||||
#define TIF_MEMDIE 8
|
#define TIF_MEMDIE 8
|
||||||
#define TIF_RESTORE_SIGMASK 9 /* restore signal mask in do_signal */
|
#define TIF_RESTORE_SIGMASK 9 /* restore signal mask in do_signal */
|
||||||
|
#define TIF_FREEZE 16 /* is freezing for suspend */
|
||||||
|
|
||||||
#define _TIF_SYSCALL_TRACE (1<<TIF_SYSCALL_TRACE)
|
#define _TIF_SYSCALL_TRACE (1<<TIF_SYSCALL_TRACE)
|
||||||
#define _TIF_SIGPENDING (1<<TIF_SIGPENDING)
|
#define _TIF_SIGPENDING (1<<TIF_SIGPENDING)
|
||||||
#define _TIF_NEED_RESCHED (1<<TIF_NEED_RESCHED)
|
#define _TIF_NEED_RESCHED (1<<TIF_NEED_RESCHED)
|
||||||
#define _TIF_POLLING_NRFLAG (1<<TIF_POLLING_NRFLAG)
|
#define _TIF_POLLING_NRFLAG (1<<TIF_POLLING_NRFLAG)
|
||||||
#define _TIF_RESTORE_SIGMASK (1<<TIF_RESTORE_SIGMASK)
|
#define _TIF_RESTORE_SIGMASK (1<<TIF_RESTORE_SIGMASK)
|
||||||
|
#define _TIF_FREEZE (1<<TIF_FREEZE)
|
||||||
|
|
||||||
/* Work to do on interrupt/exception return. */
|
/* Work to do on interrupt/exception return. */
|
||||||
#define _TIF_WORK_MASK (_TIF_SIGPENDING | _TIF_NEED_RESCHED)
|
#define _TIF_WORK_MASK (_TIF_SIGPENDING | _TIF_NEED_RESCHED)
|
||||||
|
|
|
@ -655,7 +655,7 @@ __marvel_rtc_io(u8 b, unsigned long addr, int write)
|
||||||
|
|
||||||
case 0x71: /* RTC_PORT(1) */
|
case 0x71: /* RTC_PORT(1) */
|
||||||
rtc_access.index = index;
|
rtc_access.index = index;
|
||||||
rtc_access.data = BCD_TO_BIN(b);
|
rtc_access.data = bcd2bin(b);
|
||||||
rtc_access.function = 0x48 + !write; /* GET/PUT_TOY */
|
rtc_access.function = 0x48 + !write; /* GET/PUT_TOY */
|
||||||
|
|
||||||
#ifdef CONFIG_SMP
|
#ifdef CONFIG_SMP
|
||||||
|
@ -668,7 +668,7 @@ __marvel_rtc_io(u8 b, unsigned long addr, int write)
|
||||||
#else
|
#else
|
||||||
__marvel_access_rtc(&rtc_access);
|
__marvel_access_rtc(&rtc_access);
|
||||||
#endif
|
#endif
|
||||||
ret = BIN_TO_BCD(rtc_access.data);
|
ret = bin2bcd(rtc_access.data);
|
||||||
break;
|
break;
|
||||||
|
|
||||||
default:
|
default:
|
||||||
|
|
|
@ -47,7 +47,7 @@ typedef struct irq_swizzle_struct
|
||||||
|
|
||||||
static irq_swizzle_t *sable_lynx_irq_swizzle;
|
static irq_swizzle_t *sable_lynx_irq_swizzle;
|
||||||
|
|
||||||
static void sable_lynx_init_irq(int nr_irqs);
|
static void sable_lynx_init_irq(int nr_of_irqs);
|
||||||
|
|
||||||
#if defined(CONFIG_ALPHA_GENERIC) || defined(CONFIG_ALPHA_SABLE)
|
#if defined(CONFIG_ALPHA_GENERIC) || defined(CONFIG_ALPHA_SABLE)
|
||||||
|
|
||||||
|
@ -530,11 +530,11 @@ sable_lynx_srm_device_interrupt(unsigned long vector)
|
||||||
}
|
}
|
||||||
|
|
||||||
static void __init
|
static void __init
|
||||||
sable_lynx_init_irq(int nr_irqs)
|
sable_lynx_init_irq(int nr_of_irqs)
|
||||||
{
|
{
|
||||||
long i;
|
long i;
|
||||||
|
|
||||||
for (i = 0; i < nr_irqs; ++i) {
|
for (i = 0; i < nr_of_irqs; ++i) {
|
||||||
irq_desc[i].status = IRQ_DISABLED | IRQ_LEVEL;
|
irq_desc[i].status = IRQ_DISABLED | IRQ_LEVEL;
|
||||||
irq_desc[i].chip = &sable_lynx_irq_type;
|
irq_desc[i].chip = &sable_lynx_irq_type;
|
||||||
}
|
}
|
||||||
|
|
|
@ -346,12 +346,12 @@ time_init(void)
|
||||||
year = CMOS_READ(RTC_YEAR);
|
year = CMOS_READ(RTC_YEAR);
|
||||||
|
|
||||||
if (!(CMOS_READ(RTC_CONTROL) & RTC_DM_BINARY) || RTC_ALWAYS_BCD) {
|
if (!(CMOS_READ(RTC_CONTROL) & RTC_DM_BINARY) || RTC_ALWAYS_BCD) {
|
||||||
BCD_TO_BIN(sec);
|
sec = bcd2bin(sec);
|
||||||
BCD_TO_BIN(min);
|
min = bcd2bin(min);
|
||||||
BCD_TO_BIN(hour);
|
hour = bcd2bin(hour);
|
||||||
BCD_TO_BIN(day);
|
day = bcd2bin(day);
|
||||||
BCD_TO_BIN(mon);
|
mon = bcd2bin(mon);
|
||||||
BCD_TO_BIN(year);
|
year = bcd2bin(year);
|
||||||
}
|
}
|
||||||
|
|
||||||
/* PC-like is standard; used for year >= 70 */
|
/* PC-like is standard; used for year >= 70 */
|
||||||
|
@ -525,7 +525,7 @@ set_rtc_mmss(unsigned long nowtime)
|
||||||
|
|
||||||
cmos_minutes = CMOS_READ(RTC_MINUTES);
|
cmos_minutes = CMOS_READ(RTC_MINUTES);
|
||||||
if (!(save_control & RTC_DM_BINARY) || RTC_ALWAYS_BCD)
|
if (!(save_control & RTC_DM_BINARY) || RTC_ALWAYS_BCD)
|
||||||
BCD_TO_BIN(cmos_minutes);
|
cmos_minutes = bcd2bin(cmos_minutes);
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* since we're only adjusting minutes and seconds,
|
* since we're only adjusting minutes and seconds,
|
||||||
|
@ -543,8 +543,8 @@ set_rtc_mmss(unsigned long nowtime)
|
||||||
|
|
||||||
if (abs(real_minutes - cmos_minutes) < 30) {
|
if (abs(real_minutes - cmos_minutes) < 30) {
|
||||||
if (!(save_control & RTC_DM_BINARY) || RTC_ALWAYS_BCD) {
|
if (!(save_control & RTC_DM_BINARY) || RTC_ALWAYS_BCD) {
|
||||||
BIN_TO_BCD(real_seconds);
|
real_seconds = bin2bcd(real_seconds);
|
||||||
BIN_TO_BCD(real_minutes);
|
real_minutes = bin2bcd(real_minutes);
|
||||||
}
|
}
|
||||||
CMOS_WRITE(real_seconds,RTC_SECONDS);
|
CMOS_WRITE(real_seconds,RTC_SECONDS);
|
||||||
CMOS_WRITE(real_minutes,RTC_MINUTES);
|
CMOS_WRITE(real_minutes,RTC_MINUTES);
|
||||||
|
|
|
@ -192,6 +192,8 @@ config VECTORS_BASE
|
||||||
|
|
||||||
source "init/Kconfig"
|
source "init/Kconfig"
|
||||||
|
|
||||||
|
source "kernel/Kconfig.freezer"
|
||||||
|
|
||||||
menu "System Type"
|
menu "System Type"
|
||||||
|
|
||||||
choice
|
choice
|
||||||
|
|
|
@ -41,7 +41,7 @@ static inline unsigned long iop13xx_core_freq(void)
|
||||||
return 1200000000;
|
return 1200000000;
|
||||||
default:
|
default:
|
||||||
printk("%s: warning unknown frequency, defaulting to 800Mhz\n",
|
printk("%s: warning unknown frequency, defaulting to 800Mhz\n",
|
||||||
__FUNCTION__);
|
__func__);
|
||||||
}
|
}
|
||||||
|
|
||||||
return 800000000;
|
return 800000000;
|
||||||
|
@ -60,7 +60,7 @@ static inline unsigned long iop13xx_xsi_bus_ratio(void)
|
||||||
return 4;
|
return 4;
|
||||||
default:
|
default:
|
||||||
printk("%s: warning unknown ratio, defaulting to 2\n",
|
printk("%s: warning unknown ratio, defaulting to 2\n",
|
||||||
__FUNCTION__);
|
__func__);
|
||||||
}
|
}
|
||||||
|
|
||||||
return 2;
|
return 2;
|
||||||
|
|
|
@ -143,7 +143,7 @@ static struct irq_chip ixdp2x00_cpld_irq_chip = {
|
||||||
.unmask = ixdp2x00_irq_unmask
|
.unmask = ixdp2x00_irq_unmask
|
||||||
};
|
};
|
||||||
|
|
||||||
void __init ixdp2x00_init_irq(volatile unsigned long *stat_reg, volatile unsigned long *mask_reg, unsigned long nr_irqs)
|
void __init ixdp2x00_init_irq(volatile unsigned long *stat_reg, volatile unsigned long *mask_reg, unsigned long nr_of_irqs)
|
||||||
{
|
{
|
||||||
unsigned int irq;
|
unsigned int irq;
|
||||||
|
|
||||||
|
@ -154,7 +154,7 @@ void __init ixdp2x00_init_irq(volatile unsigned long *stat_reg, volatile unsigne
|
||||||
|
|
||||||
board_irq_stat = stat_reg;
|
board_irq_stat = stat_reg;
|
||||||
board_irq_mask = mask_reg;
|
board_irq_mask = mask_reg;
|
||||||
board_irq_count = nr_irqs;
|
board_irq_count = nr_of_irqs;
|
||||||
|
|
||||||
*board_irq_mask = 0xffffffff;
|
*board_irq_mask = 0xffffffff;
|
||||||
|
|
||||||
|
|
|
@ -119,7 +119,7 @@ static void __init omap_irq_bank_init_one(struct omap_irq_bank *bank)
|
||||||
|
|
||||||
void __init omap_init_irq(void)
|
void __init omap_init_irq(void)
|
||||||
{
|
{
|
||||||
unsigned long nr_irqs = 0;
|
unsigned long nr_of_irqs = 0;
|
||||||
unsigned int nr_banks = 0;
|
unsigned int nr_banks = 0;
|
||||||
int i;
|
int i;
|
||||||
|
|
||||||
|
@ -133,14 +133,14 @@ void __init omap_init_irq(void)
|
||||||
|
|
||||||
omap_irq_bank_init_one(bank);
|
omap_irq_bank_init_one(bank);
|
||||||
|
|
||||||
nr_irqs += bank->nr_irqs;
|
nr_of_irqs += bank->nr_irqs;
|
||||||
nr_banks++;
|
nr_banks++;
|
||||||
}
|
}
|
||||||
|
|
||||||
printk(KERN_INFO "Total of %ld interrupts on %d active controller%s\n",
|
printk(KERN_INFO "Total of %ld interrupts on %d active controller%s\n",
|
||||||
nr_irqs, nr_banks, nr_banks > 1 ? "s" : "");
|
nr_of_irqs, nr_banks, nr_banks > 1 ? "s" : "");
|
||||||
|
|
||||||
for (i = 0; i < nr_irqs; i++) {
|
for (i = 0; i < nr_of_irqs; i++) {
|
||||||
set_irq_chip(i, &omap_irq_chip);
|
set_irq_chip(i, &omap_irq_chip);
|
||||||
set_irq_handler(i, handle_level_irq);
|
set_irq_handler(i, handle_level_irq);
|
||||||
set_irq_flags(i, IRQF_VALID);
|
set_irq_flags(i, IRQF_VALID);
|
||||||
|
|
|
@ -4,6 +4,43 @@
|
||||||
#include <linux/mtd/mtd.h>
|
#include <linux/mtd/mtd.h>
|
||||||
#include <linux/mtd/partitions.h>
|
#include <linux/mtd/partitions.h>
|
||||||
|
|
||||||
|
struct pxa3xx_nand_timing {
|
||||||
|
unsigned int tCH; /* Enable signal hold time */
|
||||||
|
unsigned int tCS; /* Enable signal setup time */
|
||||||
|
unsigned int tWH; /* ND_nWE high duration */
|
||||||
|
unsigned int tWP; /* ND_nWE pulse time */
|
||||||
|
unsigned int tRH; /* ND_nRE high duration */
|
||||||
|
unsigned int tRP; /* ND_nRE pulse width */
|
||||||
|
unsigned int tR; /* ND_nWE high to ND_nRE low for read */
|
||||||
|
unsigned int tWHR; /* ND_nWE high to ND_nRE low for status read */
|
||||||
|
unsigned int tAR; /* ND_ALE low to ND_nRE low delay */
|
||||||
|
};
|
||||||
|
|
||||||
|
struct pxa3xx_nand_cmdset {
|
||||||
|
uint16_t read1;
|
||||||
|
uint16_t read2;
|
||||||
|
uint16_t program;
|
||||||
|
uint16_t read_status;
|
||||||
|
uint16_t read_id;
|
||||||
|
uint16_t erase;
|
||||||
|
uint16_t reset;
|
||||||
|
uint16_t lock;
|
||||||
|
uint16_t unlock;
|
||||||
|
uint16_t lock_status;
|
||||||
|
};
|
||||||
|
|
||||||
|
struct pxa3xx_nand_flash {
|
||||||
|
const struct pxa3xx_nand_timing *timing; /* NAND Flash timing */
|
||||||
|
const struct pxa3xx_nand_cmdset *cmdset;
|
||||||
|
|
||||||
|
uint32_t page_per_block;/* Pages per block (PG_PER_BLK) */
|
||||||
|
uint32_t page_size; /* Page size in bytes (PAGE_SZ) */
|
||||||
|
uint32_t flash_width; /* Width of Flash memory (DWIDTH_M) */
|
||||||
|
uint32_t dfc_width; /* Width of flash controller(DWIDTH_C) */
|
||||||
|
uint32_t num_blocks; /* Number of physical blocks in Flash */
|
||||||
|
uint32_t chip_id;
|
||||||
|
};
|
||||||
|
|
||||||
struct pxa3xx_nand_platform_data {
|
struct pxa3xx_nand_platform_data {
|
||||||
|
|
||||||
/* the data flash bus is shared between the Static Memory
|
/* the data flash bus is shared between the Static Memory
|
||||||
|
@ -12,8 +49,11 @@ struct pxa3xx_nand_platform_data {
|
||||||
*/
|
*/
|
||||||
int enable_arbiter;
|
int enable_arbiter;
|
||||||
|
|
||||||
struct mtd_partition *parts;
|
const struct mtd_partition *parts;
|
||||||
unsigned int nr_parts;
|
unsigned int nr_parts;
|
||||||
|
|
||||||
|
const struct pxa3xx_nand_flash * flash;
|
||||||
|
size_t num_flash;
|
||||||
};
|
};
|
||||||
|
|
||||||
extern void pxa3xx_set_nand_info(struct pxa3xx_nand_platform_data *info);
|
extern void pxa3xx_set_nand_info(struct pxa3xx_nand_platform_data *info);
|
||||||
|
|
|
@ -59,8 +59,6 @@
|
||||||
* TC6393XB GPIOs
|
* TC6393XB GPIOs
|
||||||
*/
|
*/
|
||||||
#define TOSA_TC6393XB_GPIO_BASE (NR_BUILTIN_GPIO + 2 * 12)
|
#define TOSA_TC6393XB_GPIO_BASE (NR_BUILTIN_GPIO + 2 * 12)
|
||||||
#define TOSA_TC6393XB_GPIO(i) (TOSA_TC6393XB_GPIO_BASE + (i))
|
|
||||||
#define TOSA_TC6393XB_GPIO_BIT(gpio) (1 << (gpio - TOSA_TC6393XB_GPIO_BASE))
|
|
||||||
|
|
||||||
#define TOSA_GPIO_TG_ON (TOSA_TC6393XB_GPIO_BASE + 0)
|
#define TOSA_GPIO_TG_ON (TOSA_TC6393XB_GPIO_BASE + 0)
|
||||||
#define TOSA_GPIO_L_MUTE (TOSA_TC6393XB_GPIO_BASE + 1)
|
#define TOSA_GPIO_L_MUTE (TOSA_TC6393XB_GPIO_BASE + 1)
|
||||||
|
|
|
@ -30,7 +30,7 @@ extern void zylonite_pxa300_init(void);
|
||||||
static inline void zylonite_pxa300_init(void)
|
static inline void zylonite_pxa300_init(void)
|
||||||
{
|
{
|
||||||
if (cpu_is_pxa300() || cpu_is_pxa310())
|
if (cpu_is_pxa300() || cpu_is_pxa310())
|
||||||
panic("%s: PXA300/PXA310 not supported\n", __FUNCTION__);
|
panic("%s: PXA300/PXA310 not supported\n", __func__);
|
||||||
}
|
}
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
|
@ -40,7 +40,7 @@ extern void zylonite_pxa320_init(void);
|
||||||
static inline void zylonite_pxa320_init(void)
|
static inline void zylonite_pxa320_init(void)
|
||||||
{
|
{
|
||||||
if (cpu_is_pxa320())
|
if (cpu_is_pxa320())
|
||||||
panic("%s: PXA320 not supported\n", __FUNCTION__);
|
panic("%s: PXA320 not supported\n", __func__);
|
||||||
}
|
}
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
|
|
|
@ -706,16 +706,39 @@ static struct tmio_nand_data tosa_tc6393xb_nand_config = {
|
||||||
.badblock_pattern = &tosa_tc6393xb_nand_bbt,
|
.badblock_pattern = &tosa_tc6393xb_nand_bbt,
|
||||||
};
|
};
|
||||||
|
|
||||||
static struct tc6393xb_platform_data tosa_tc6393xb_setup = {
|
static int tosa_tc6393xb_setup(struct platform_device *dev)
|
||||||
|
{
|
||||||
|
int rc;
|
||||||
|
|
||||||
|
rc = gpio_request(TOSA_GPIO_CARD_VCC_ON, "CARD_VCC_ON");
|
||||||
|
if (rc)
|
||||||
|
goto err_req;
|
||||||
|
|
||||||
|
rc = gpio_direction_output(TOSA_GPIO_CARD_VCC_ON, 1);
|
||||||
|
if (rc)
|
||||||
|
goto err_dir;
|
||||||
|
|
||||||
|
return rc;
|
||||||
|
|
||||||
|
err_dir:
|
||||||
|
gpio_free(TOSA_GPIO_CARD_VCC_ON);
|
||||||
|
err_req:
|
||||||
|
return rc;
|
||||||
|
}
|
||||||
|
|
||||||
|
static void tosa_tc6393xb_teardown(struct platform_device *dev)
|
||||||
|
{
|
||||||
|
gpio_free(TOSA_GPIO_CARD_VCC_ON);
|
||||||
|
}
|
||||||
|
|
||||||
|
static struct tc6393xb_platform_data tosa_tc6393xb_data = {
|
||||||
.scr_pll2cr = 0x0cc1,
|
.scr_pll2cr = 0x0cc1,
|
||||||
.scr_gper = 0x3300,
|
.scr_gper = 0x3300,
|
||||||
.scr_gpo_dsr =
|
|
||||||
TOSA_TC6393XB_GPIO_BIT(TOSA_GPIO_CARD_VCC_ON),
|
|
||||||
.scr_gpo_doecr =
|
|
||||||
TOSA_TC6393XB_GPIO_BIT(TOSA_GPIO_CARD_VCC_ON),
|
|
||||||
|
|
||||||
.irq_base = IRQ_BOARD_START,
|
.irq_base = IRQ_BOARD_START,
|
||||||
.gpio_base = TOSA_TC6393XB_GPIO_BASE,
|
.gpio_base = TOSA_TC6393XB_GPIO_BASE,
|
||||||
|
.setup = tosa_tc6393xb_setup,
|
||||||
|
.teardown = tosa_tc6393xb_teardown,
|
||||||
|
|
||||||
.enable = tosa_tc6393xb_enable,
|
.enable = tosa_tc6393xb_enable,
|
||||||
.disable = tosa_tc6393xb_disable,
|
.disable = tosa_tc6393xb_disable,
|
||||||
|
@ -723,6 +746,8 @@ static struct tc6393xb_platform_data tosa_tc6393xb_setup = {
|
||||||
.resume = tosa_tc6393xb_resume,
|
.resume = tosa_tc6393xb_resume,
|
||||||
|
|
||||||
.nand_data = &tosa_tc6393xb_nand_config,
|
.nand_data = &tosa_tc6393xb_nand_config,
|
||||||
|
|
||||||
|
.resume_restore = 1,
|
||||||
};
|
};
|
||||||
|
|
||||||
|
|
||||||
|
@ -730,7 +755,7 @@ static struct platform_device tc6393xb_device = {
|
||||||
.name = "tc6393xb",
|
.name = "tc6393xb",
|
||||||
.id = -1,
|
.id = -1,
|
||||||
.dev = {
|
.dev = {
|
||||||
.platform_data = &tosa_tc6393xb_setup,
|
.platform_data = &tosa_tc6393xb_data,
|
||||||
},
|
},
|
||||||
.num_resources = ARRAY_SIZE(tc6393xb_resources),
|
.num_resources = ARRAY_SIZE(tc6393xb_resources),
|
||||||
.resource = tc6393xb_resources,
|
.resource = tc6393xb_resources,
|
||||||
|
|
|
@ -1,75 +0,0 @@
|
||||||
/*
|
|
||||||
* arch/arm/mach-sa1100/include/mach/ide.h
|
|
||||||
*
|
|
||||||
* Copyright (c) 1998 Hugo Fiennes & Nicolas Pitre
|
|
||||||
*
|
|
||||||
* 18-aug-2000: Cleanup by Erik Mouw (J.A.K.Mouw@its.tudelft.nl)
|
|
||||||
* Get rid of the special ide_init_hwif_ports() functions
|
|
||||||
* and make a generalised function that can be used by all
|
|
||||||
* architectures.
|
|
||||||
*/
|
|
||||||
|
|
||||||
#include <asm/irq.h>
|
|
||||||
#include <mach/hardware.h>
|
|
||||||
#include <asm/mach-types.h>
|
|
||||||
|
|
||||||
#error "This code is broken and needs update to match with current ide support"
|
|
||||||
|
|
||||||
|
|
||||||
/*
|
|
||||||
* Set up a hw structure for a specified data port, control port and IRQ.
|
|
||||||
* This should follow whatever the default interface uses.
|
|
||||||
*/
|
|
||||||
static inline void ide_init_hwif_ports(hw_regs_t *hw, unsigned long data_port,
|
|
||||||
unsigned long ctrl_port, int *irq)
|
|
||||||
{
|
|
||||||
unsigned long reg = data_port;
|
|
||||||
int i;
|
|
||||||
int regincr = 1;
|
|
||||||
|
|
||||||
/* The Empeg board has the first two address lines unused */
|
|
||||||
if (machine_is_empeg())
|
|
||||||
regincr = 1 << 2;
|
|
||||||
|
|
||||||
/* The LART doesn't use A0 for IDE */
|
|
||||||
if (machine_is_lart())
|
|
||||||
regincr = 1 << 1;
|
|
||||||
|
|
||||||
memset(hw, 0, sizeof(*hw));
|
|
||||||
|
|
||||||
for (i = 0; i <= 7; i++) {
|
|
||||||
hw->io_ports_array[i] = reg;
|
|
||||||
reg += regincr;
|
|
||||||
}
|
|
||||||
|
|
||||||
hw->io_ports.ctl_addr = ctrl_port;
|
|
||||||
|
|
||||||
if (irq)
|
|
||||||
*irq = 0;
|
|
||||||
}
|
|
||||||
|
|
||||||
/*
|
|
||||||
* This registers the standard ports for this architecture with the IDE
|
|
||||||
* driver.
|
|
||||||
*/
|
|
||||||
static __inline__ void
|
|
||||||
ide_init_default_hwifs(void)
|
|
||||||
{
|
|
||||||
if (machine_is_lart()) {
|
|
||||||
#ifdef CONFIG_SA1100_LART
|
|
||||||
hw_regs_t hw;
|
|
||||||
|
|
||||||
/* Enable GPIO as interrupt line */
|
|
||||||
GPDR &= ~LART_GPIO_IDE;
|
|
||||||
set_irq_type(LART_IRQ_IDE, IRQ_TYPE_EDGE_RISING);
|
|
||||||
|
|
||||||
/* set PCMCIA interface timing */
|
|
||||||
MECR = 0x00060006;
|
|
||||||
|
|
||||||
/* init the interface */
|
|
||||||
ide_init_hwif_ports(&hw, PCMCIA_IO_0_BASE + 0x0000, PCMCIA_IO_0_BASE + 0x1000, NULL);
|
|
||||||
hw.irq = LART_IRQ_IDE;
|
|
||||||
ide_register_hw(&hw);
|
|
||||||
#endif
|
|
||||||
}
|
|
||||||
}
|
|
|
@ -0,0 +1,27 @@
|
||||||
|
/*
|
||||||
|
* Copyright 2004-2007 Freescale Semiconductor, Inc. All Rights Reserved.
|
||||||
|
* Copyright 2008 Sascha Hauer, kernel@pengutronix.de
|
||||||
|
*
|
||||||
|
* This program is free software; you can redistribute it and/or
|
||||||
|
* modify it under the terms of the GNU General Public License
|
||||||
|
* as published by the Free Software Foundation; either version 2
|
||||||
|
* of the License, or (at your option) any later version.
|
||||||
|
* This program is distributed in the hope that it will be useful,
|
||||||
|
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||||
|
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||||
|
* GNU General Public License for more details.
|
||||||
|
*
|
||||||
|
* You should have received a copy of the GNU General Public License
|
||||||
|
* along with this program; if not, write to the Free Software
|
||||||
|
* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston,
|
||||||
|
* MA 02110-1301, USA.
|
||||||
|
*/
|
||||||
|
|
||||||
|
#ifndef __ASM_ARCH_NAND_H
|
||||||
|
#define __ASM_ARCH_NAND_H
|
||||||
|
|
||||||
|
struct mxc_nand_platform_data {
|
||||||
|
int width; /* data bus width in bytes */
|
||||||
|
int hw_ecc; /* 0 if supress hardware ECC */
|
||||||
|
};
|
||||||
|
#endif /* __ASM_ARCH_NAND_H */
|
|
@ -16,6 +16,10 @@ struct omap_onenand_platform_data {
|
||||||
int gpio_irq;
|
int gpio_irq;
|
||||||
struct mtd_partition *parts;
|
struct mtd_partition *parts;
|
||||||
int nr_parts;
|
int nr_parts;
|
||||||
int (*onenand_setup)(void __iomem *);
|
int (*onenand_setup)(void __iomem *, int freq);
|
||||||
int dma_channel;
|
int dma_channel;
|
||||||
};
|
};
|
||||||
|
|
||||||
|
int omap2_onenand_rephase(void);
|
||||||
|
|
||||||
|
#define ONENAND_MAX_PARTITIONS 8
|
||||||
|
|
|
@ -72,6 +72,8 @@ config GENERIC_BUG
|
||||||
|
|
||||||
source "init/Kconfig"
|
source "init/Kconfig"
|
||||||
|
|
||||||
|
source "kernel/Kconfig.freezer"
|
||||||
|
|
||||||
menu "System Type and features"
|
menu "System Type and features"
|
||||||
|
|
||||||
source "kernel/time/Kconfig"
|
source "kernel/time/Kconfig"
|
||||||
|
|
|
@ -96,6 +96,7 @@ static inline struct thread_info *current_thread_info(void)
|
||||||
#define _TIF_MEMDIE (1 << TIF_MEMDIE)
|
#define _TIF_MEMDIE (1 << TIF_MEMDIE)
|
||||||
#define _TIF_RESTORE_SIGMASK (1 << TIF_RESTORE_SIGMASK)
|
#define _TIF_RESTORE_SIGMASK (1 << TIF_RESTORE_SIGMASK)
|
||||||
#define _TIF_CPU_GOING_TO_SLEEP (1 << TIF_CPU_GOING_TO_SLEEP)
|
#define _TIF_CPU_GOING_TO_SLEEP (1 << TIF_CPU_GOING_TO_SLEEP)
|
||||||
|
#define _TIF_FREEZE (1 << TIF_FREEZE)
|
||||||
|
|
||||||
/* Note: The masks below must never span more than 16 bits! */
|
/* Note: The masks below must never span more than 16 bits! */
|
||||||
|
|
||||||
|
|
|
@ -191,7 +191,7 @@ static int __init eic_probe(struct platform_device *pdev)
|
||||||
struct eic *eic;
|
struct eic *eic;
|
||||||
struct resource *regs;
|
struct resource *regs;
|
||||||
unsigned int i;
|
unsigned int i;
|
||||||
unsigned int nr_irqs;
|
unsigned int nr_of_irqs;
|
||||||
unsigned int int_irq;
|
unsigned int int_irq;
|
||||||
int ret;
|
int ret;
|
||||||
u32 pattern;
|
u32 pattern;
|
||||||
|
@ -224,7 +224,7 @@ static int __init eic_probe(struct platform_device *pdev)
|
||||||
eic_writel(eic, IDR, ~0UL);
|
eic_writel(eic, IDR, ~0UL);
|
||||||
eic_writel(eic, MODE, ~0UL);
|
eic_writel(eic, MODE, ~0UL);
|
||||||
pattern = eic_readl(eic, MODE);
|
pattern = eic_readl(eic, MODE);
|
||||||
nr_irqs = fls(pattern);
|
nr_of_irqs = fls(pattern);
|
||||||
|
|
||||||
/* Trigger on low level unless overridden by driver */
|
/* Trigger on low level unless overridden by driver */
|
||||||
eic_writel(eic, EDGE, 0UL);
|
eic_writel(eic, EDGE, 0UL);
|
||||||
|
@ -232,7 +232,7 @@ static int __init eic_probe(struct platform_device *pdev)
|
||||||
|
|
||||||
eic->chip = &eic_chip;
|
eic->chip = &eic_chip;
|
||||||
|
|
||||||
for (i = 0; i < nr_irqs; i++) {
|
for (i = 0; i < nr_of_irqs; i++) {
|
||||||
set_irq_chip_and_handler(eic->first_irq + i, &eic_chip,
|
set_irq_chip_and_handler(eic->first_irq + i, &eic_chip,
|
||||||
handle_level_irq);
|
handle_level_irq);
|
||||||
set_irq_chip_data(eic->first_irq + i, eic);
|
set_irq_chip_data(eic->first_irq + i, eic);
|
||||||
|
@ -256,7 +256,7 @@ static int __init eic_probe(struct platform_device *pdev)
|
||||||
eic->regs, int_irq);
|
eic->regs, int_irq);
|
||||||
dev_info(&pdev->dev,
|
dev_info(&pdev->dev,
|
||||||
"Handling %u external IRQs, starting with IRQ %u\n",
|
"Handling %u external IRQs, starting with IRQ %u\n",
|
||||||
nr_irqs, eic->first_irq);
|
nr_of_irqs, eic->first_irq);
|
||||||
|
|
||||||
return 0;
|
return 0;
|
||||||
|
|
||||||
|
|
|
@ -64,8 +64,11 @@ config HARDWARE_PM
|
||||||
depends on OPROFILE
|
depends on OPROFILE
|
||||||
|
|
||||||
source "init/Kconfig"
|
source "init/Kconfig"
|
||||||
|
|
||||||
source "kernel/Kconfig.preempt"
|
source "kernel/Kconfig.preempt"
|
||||||
|
|
||||||
|
source "kernel/Kconfig.freezer"
|
||||||
|
|
||||||
menu "Blackfin Processor Options"
|
menu "Blackfin Processor Options"
|
||||||
|
|
||||||
comment "Processor and Board Settings"
|
comment "Processor and Board Settings"
|
||||||
|
|
|
@ -62,6 +62,8 @@ config HZ
|
||||||
|
|
||||||
source "init/Kconfig"
|
source "init/Kconfig"
|
||||||
|
|
||||||
|
source "kernel/Kconfig.freezer"
|
||||||
|
|
||||||
menu "General setup"
|
menu "General setup"
|
||||||
|
|
||||||
source "fs/Kconfig.binfmt"
|
source "fs/Kconfig.binfmt"
|
||||||
|
|
|
@ -215,12 +215,12 @@ get_rtc_time(struct rtc_time *rtc_tm)
|
||||||
|
|
||||||
local_irq_restore(flags);
|
local_irq_restore(flags);
|
||||||
|
|
||||||
BCD_TO_BIN(rtc_tm->tm_sec);
|
rtc_tm->tm_sec = bcd2bin(rtc_tm->tm_sec);
|
||||||
BCD_TO_BIN(rtc_tm->tm_min);
|
rtc_tm->tm_min = bcd2bin(rtc_tm->tm_min);
|
||||||
BCD_TO_BIN(rtc_tm->tm_hour);
|
rtc_tm->tm_hour = bcd2bin(rtc_tm->tm_hour);
|
||||||
BCD_TO_BIN(rtc_tm->tm_mday);
|
rtc_tm->tm_mday = bcd2bin(rtc_tm->tm_mday);
|
||||||
BCD_TO_BIN(rtc_tm->tm_mon);
|
rtc_tm->tm_mon = bcd2bin(rtc_tm->tm_mon);
|
||||||
BCD_TO_BIN(rtc_tm->tm_year);
|
rtc_tm->tm_year = bcd2bin(rtc_tm->tm_year);
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* Account for differences between how the RTC uses the values
|
* Account for differences between how the RTC uses the values
|
||||||
|
@ -295,12 +295,12 @@ rtc_ioctl(struct inode *inode, struct file *file, unsigned int cmd,
|
||||||
else
|
else
|
||||||
yrs -= 1900; /* RTC (70, 71, ... 99) */
|
yrs -= 1900; /* RTC (70, 71, ... 99) */
|
||||||
|
|
||||||
BIN_TO_BCD(sec);
|
sec = bin2bcd(sec);
|
||||||
BIN_TO_BCD(min);
|
min = bin2bcd(min);
|
||||||
BIN_TO_BCD(hrs);
|
hrs = bin2bcd(hrs);
|
||||||
BIN_TO_BCD(day);
|
day = bin2bcd(day);
|
||||||
BIN_TO_BCD(mon);
|
mon = bin2bcd(mon);
|
||||||
BIN_TO_BCD(yrs);
|
yrs = bin2bcd(yrs);
|
||||||
|
|
||||||
local_irq_save(flags);
|
local_irq_save(flags);
|
||||||
CMOS_WRITE(yrs, RTC_YEAR);
|
CMOS_WRITE(yrs, RTC_YEAR);
|
||||||
|
|
|
@ -122,7 +122,7 @@ get_rtc_time(struct rtc_time *tm)
|
||||||
"information is no longer guaranteed!\n", PCF8563_NAME);
|
"information is no longer guaranteed!\n", PCF8563_NAME);
|
||||||
}
|
}
|
||||||
|
|
||||||
tm->tm_year = BCD_TO_BIN(tm->tm_year) +
|
tm->tm_year = bcd2bin(tm->tm_year) +
|
||||||
((tm->tm_mon & 0x80) ? 100 : 0);
|
((tm->tm_mon & 0x80) ? 100 : 0);
|
||||||
tm->tm_sec &= 0x7F;
|
tm->tm_sec &= 0x7F;
|
||||||
tm->tm_min &= 0x7F;
|
tm->tm_min &= 0x7F;
|
||||||
|
@ -131,11 +131,11 @@ get_rtc_time(struct rtc_time *tm)
|
||||||
tm->tm_wday &= 0x07; /* Not coded in BCD. */
|
tm->tm_wday &= 0x07; /* Not coded in BCD. */
|
||||||
tm->tm_mon &= 0x1F;
|
tm->tm_mon &= 0x1F;
|
||||||
|
|
||||||
BCD_TO_BIN(tm->tm_sec);
|
tm->tm_sec = bcd2bin(tm->tm_sec);
|
||||||
BCD_TO_BIN(tm->tm_min);
|
tm->tm_min = bcd2bin(tm->tm_min);
|
||||||
BCD_TO_BIN(tm->tm_hour);
|
tm->tm_hour = bcd2bin(tm->tm_hour);
|
||||||
BCD_TO_BIN(tm->tm_mday);
|
tm->tm_mday = bcd2bin(tm->tm_mday);
|
||||||
BCD_TO_BIN(tm->tm_mon);
|
tm->tm_mon = bcd2bin(tm->tm_mon);
|
||||||
tm->tm_mon--; /* Month is 1..12 in RTC but 0..11 in linux */
|
tm->tm_mon--; /* Month is 1..12 in RTC but 0..11 in linux */
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -282,12 +282,12 @@ int pcf8563_ioctl(struct inode *inode, struct file *filp, unsigned int cmd,
|
||||||
century = (tm.tm_year >= 2000) ? 0x80 : 0;
|
century = (tm.tm_year >= 2000) ? 0x80 : 0;
|
||||||
tm.tm_year = tm.tm_year % 100;
|
tm.tm_year = tm.tm_year % 100;
|
||||||
|
|
||||||
BIN_TO_BCD(tm.tm_year);
|
tm.tm_year = bin2bcd(tm.tm_year);
|
||||||
BIN_TO_BCD(tm.tm_mon);
|
tm.tm_mon = bin2bcd(tm.tm_mon);
|
||||||
BIN_TO_BCD(tm.tm_mday);
|
tm.tm_mday = bin2bcd(tm.tm_mday);
|
||||||
BIN_TO_BCD(tm.tm_hour);
|
tm.tm_hour = bin2bcd(tm.tm_hour);
|
||||||
BIN_TO_BCD(tm.tm_min);
|
tm.tm_min = bin2bcd(tm.tm_min);
|
||||||
BIN_TO_BCD(tm.tm_sec);
|
tm.tm_sec = bin2bcd(tm.tm_sec);
|
||||||
tm.tm_mon |= century;
|
tm.tm_mon |= century;
|
||||||
|
|
||||||
mutex_lock(&rtc_lock);
|
mutex_lock(&rtc_lock);
|
||||||
|
|
|
@ -118,7 +118,7 @@ get_rtc_time(struct rtc_time *tm)
|
||||||
"information is no longer guaranteed!\n", PCF8563_NAME);
|
"information is no longer guaranteed!\n", PCF8563_NAME);
|
||||||
}
|
}
|
||||||
|
|
||||||
tm->tm_year = BCD_TO_BIN(tm->tm_year) +
|
tm->tm_year = bcd2bin(tm->tm_year) +
|
||||||
((tm->tm_mon & 0x80) ? 100 : 0);
|
((tm->tm_mon & 0x80) ? 100 : 0);
|
||||||
tm->tm_sec &= 0x7F;
|
tm->tm_sec &= 0x7F;
|
||||||
tm->tm_min &= 0x7F;
|
tm->tm_min &= 0x7F;
|
||||||
|
@ -127,11 +127,11 @@ get_rtc_time(struct rtc_time *tm)
|
||||||
tm->tm_wday &= 0x07; /* Not coded in BCD. */
|
tm->tm_wday &= 0x07; /* Not coded in BCD. */
|
||||||
tm->tm_mon &= 0x1F;
|
tm->tm_mon &= 0x1F;
|
||||||
|
|
||||||
BCD_TO_BIN(tm->tm_sec);
|
tm->tm_sec = bcd2bin(tm->tm_sec);
|
||||||
BCD_TO_BIN(tm->tm_min);
|
tm->tm_min = bcd2bin(tm->tm_min);
|
||||||
BCD_TO_BIN(tm->tm_hour);
|
tm->tm_hour = bcd2bin(tm->tm_hour);
|
||||||
BCD_TO_BIN(tm->tm_mday);
|
tm->tm_mday = bcd2bin(tm->tm_mday);
|
||||||
BCD_TO_BIN(tm->tm_mon);
|
tm->tm_mon = bcd2bin(tm->tm_mon);
|
||||||
tm->tm_mon--; /* Month is 1..12 in RTC but 0..11 in linux */
|
tm->tm_mon--; /* Month is 1..12 in RTC but 0..11 in linux */
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -279,12 +279,12 @@ int pcf8563_ioctl(struct inode *inode, struct file *filp, unsigned int cmd,
|
||||||
century = (tm.tm_year >= 2000) ? 0x80 : 0;
|
century = (tm.tm_year >= 2000) ? 0x80 : 0;
|
||||||
tm.tm_year = tm.tm_year % 100;
|
tm.tm_year = tm.tm_year % 100;
|
||||||
|
|
||||||
BIN_TO_BCD(tm.tm_year);
|
tm.tm_year = bin2bcd(tm.tm_year);
|
||||||
BIN_TO_BCD(tm.tm_mon);
|
tm.tm_mon = bin2bcd(tm.tm_mon);
|
||||||
BIN_TO_BCD(tm.tm_mday);
|
tm.tm_mday = bin2bcd(tm.tm_mday);
|
||||||
BIN_TO_BCD(tm.tm_hour);
|
tm.tm_hour = bin2bcd(tm.tm_hour);
|
||||||
BIN_TO_BCD(tm.tm_min);
|
tm.tm_min = bin2bcd(tm.tm_min);
|
||||||
BIN_TO_BCD(tm.tm_sec);
|
tm.tm_sec = bin2bcd(tm.tm_sec);
|
||||||
tm.tm_mon |= century;
|
tm.tm_mon |= century;
|
||||||
|
|
||||||
mutex_lock(&rtc_lock);
|
mutex_lock(&rtc_lock);
|
||||||
|
|
|
@ -127,7 +127,7 @@ int set_rtc_mmss(unsigned long nowtime)
|
||||||
return 0;
|
return 0;
|
||||||
|
|
||||||
cmos_minutes = CMOS_READ(RTC_MINUTES);
|
cmos_minutes = CMOS_READ(RTC_MINUTES);
|
||||||
BCD_TO_BIN(cmos_minutes);
|
cmos_minutes = bcd2bin(cmos_minutes);
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* since we're only adjusting minutes and seconds,
|
* since we're only adjusting minutes and seconds,
|
||||||
|
@ -142,8 +142,8 @@ int set_rtc_mmss(unsigned long nowtime)
|
||||||
real_minutes %= 60;
|
real_minutes %= 60;
|
||||||
|
|
||||||
if (abs(real_minutes - cmos_minutes) < 30) {
|
if (abs(real_minutes - cmos_minutes) < 30) {
|
||||||
BIN_TO_BCD(real_seconds);
|
real_seconds = bin2bcd(real_seconds);
|
||||||
BIN_TO_BCD(real_minutes);
|
real_minutes = bin2bcd(real_minutes);
|
||||||
CMOS_WRITE(real_seconds,RTC_SECONDS);
|
CMOS_WRITE(real_seconds,RTC_SECONDS);
|
||||||
CMOS_WRITE(real_minutes,RTC_MINUTES);
|
CMOS_WRITE(real_minutes,RTC_MINUTES);
|
||||||
} else {
|
} else {
|
||||||
|
@ -170,12 +170,12 @@ get_cmos_time(void)
|
||||||
mon = CMOS_READ(RTC_MONTH);
|
mon = CMOS_READ(RTC_MONTH);
|
||||||
year = CMOS_READ(RTC_YEAR);
|
year = CMOS_READ(RTC_YEAR);
|
||||||
|
|
||||||
BCD_TO_BIN(sec);
|
sec = bcd2bin(sec);
|
||||||
BCD_TO_BIN(min);
|
min = bcd2bin(min);
|
||||||
BCD_TO_BIN(hour);
|
hour = bcd2bin(hour);
|
||||||
BCD_TO_BIN(day);
|
day = bcd2bin(day);
|
||||||
BCD_TO_BIN(mon);
|
mon = bcd2bin(mon);
|
||||||
BCD_TO_BIN(year);
|
year = bcd2bin(year);
|
||||||
|
|
||||||
if ((year += 1900) < 1970)
|
if ((year += 1900) < 1970)
|
||||||
year += 100;
|
year += 100;
|
||||||
|
|
|
@ -66,6 +66,8 @@ mainmenu "Fujitsu FR-V Kernel Configuration"
|
||||||
|
|
||||||
source "init/Kconfig"
|
source "init/Kconfig"
|
||||||
|
|
||||||
|
source "kernel/Kconfig.freezer"
|
||||||
|
|
||||||
|
|
||||||
menu "Fujitsu FR-V system setup"
|
menu "Fujitsu FR-V system setup"
|
||||||
|
|
||||||
|
|
|
@ -90,6 +90,8 @@ config HZ
|
||||||
|
|
||||||
source "init/Kconfig"
|
source "init/Kconfig"
|
||||||
|
|
||||||
|
source "kernel/Kconfig.freezer"
|
||||||
|
|
||||||
source "arch/h8300/Kconfig.cpu"
|
source "arch/h8300/Kconfig.cpu"
|
||||||
|
|
||||||
menu "Executable file formats"
|
menu "Executable file formats"
|
||||||
|
|
|
@ -89,6 +89,7 @@ static inline struct thread_info *current_thread_info(void)
|
||||||
TIF_NEED_RESCHED */
|
TIF_NEED_RESCHED */
|
||||||
#define TIF_MEMDIE 4
|
#define TIF_MEMDIE 4
|
||||||
#define TIF_RESTORE_SIGMASK 5 /* restore signal mask in do_signal() */
|
#define TIF_RESTORE_SIGMASK 5 /* restore signal mask in do_signal() */
|
||||||
|
#define TIF_FREEZE 16 /* is freezing for suspend */
|
||||||
|
|
||||||
/* as above, but as bit values */
|
/* as above, but as bit values */
|
||||||
#define _TIF_SYSCALL_TRACE (1<<TIF_SYSCALL_TRACE)
|
#define _TIF_SYSCALL_TRACE (1<<TIF_SYSCALL_TRACE)
|
||||||
|
@ -96,6 +97,7 @@ static inline struct thread_info *current_thread_info(void)
|
||||||
#define _TIF_NEED_RESCHED (1<<TIF_NEED_RESCHED)
|
#define _TIF_NEED_RESCHED (1<<TIF_NEED_RESCHED)
|
||||||
#define _TIF_POLLING_NRFLAG (1<<TIF_POLLING_NRFLAG)
|
#define _TIF_POLLING_NRFLAG (1<<TIF_POLLING_NRFLAG)
|
||||||
#define _TIF_RESTORE_SIGMASK (1<<TIF_RESTORE_SIGMASK)
|
#define _TIF_RESTORE_SIGMASK (1<<TIF_RESTORE_SIGMASK)
|
||||||
|
#define _TIF_FREEZE (1<<TIF_FREEZE)
|
||||||
|
|
||||||
#define _TIF_WORK_MASK 0x0000FFFE /* work to do on interrupt/exception return */
|
#define _TIF_WORK_MASK 0x0000FFFE /* work to do on interrupt/exception return */
|
||||||
|
|
||||||
|
|
|
@ -7,6 +7,8 @@ mainmenu "IA-64 Linux Kernel Configuration"
|
||||||
|
|
||||||
source "init/Kconfig"
|
source "init/Kconfig"
|
||||||
|
|
||||||
|
source "kernel/Kconfig.freezer"
|
||||||
|
|
||||||
menu "Processor type and features"
|
menu "Processor type and features"
|
||||||
|
|
||||||
config IA64
|
config IA64
|
||||||
|
|
|
@ -2070,14 +2070,13 @@ sba_init(void)
|
||||||
if (!ia64_platform_is("hpzx1") && !ia64_platform_is("hpzx1_swiotlb"))
|
if (!ia64_platform_is("hpzx1") && !ia64_platform_is("hpzx1_swiotlb"))
|
||||||
return 0;
|
return 0;
|
||||||
|
|
||||||
#if defined(CONFIG_IA64_GENERIC) && defined(CONFIG_CRASH_DUMP) && \
|
#if defined(CONFIG_IA64_GENERIC)
|
||||||
defined(CONFIG_PROC_FS)
|
|
||||||
/* If we are booting a kdump kernel, the sba_iommu will
|
/* If we are booting a kdump kernel, the sba_iommu will
|
||||||
* cause devices that were not shutdown properly to MCA
|
* cause devices that were not shutdown properly to MCA
|
||||||
* as soon as they are turned back on. Our only option for
|
* as soon as they are turned back on. Our only option for
|
||||||
* a successful kdump kernel boot is to use the swiotlb.
|
* a successful kdump kernel boot is to use the swiotlb.
|
||||||
*/
|
*/
|
||||||
if (elfcorehdr_addr < ELFCORE_ADDR_MAX) {
|
if (is_kdump_kernel()) {
|
||||||
if (swiotlb_late_init_with_default_size(64 * (1<<20)) != 0)
|
if (swiotlb_late_init_with_default_size(64 * (1<<20)) != 0)
|
||||||
panic("Unable to initialize software I/O TLB:"
|
panic("Unable to initialize software I/O TLB:"
|
||||||
" Try machvec=dig boot option");
|
" Try machvec=dig boot option");
|
||||||
|
|
|
@ -95,16 +95,8 @@ extern int pci_mmap_page_range (struct pci_dev *dev, struct vm_area_struct *vma,
|
||||||
enum pci_mmap_state mmap_state, int write_combine);
|
enum pci_mmap_state mmap_state, int write_combine);
|
||||||
#define HAVE_PCI_LEGACY
|
#define HAVE_PCI_LEGACY
|
||||||
extern int pci_mmap_legacy_page_range(struct pci_bus *bus,
|
extern int pci_mmap_legacy_page_range(struct pci_bus *bus,
|
||||||
struct vm_area_struct *vma);
|
struct vm_area_struct *vma,
|
||||||
extern ssize_t pci_read_legacy_io(struct kobject *kobj,
|
enum pci_mmap_state mmap_state);
|
||||||
struct bin_attribute *bin_attr,
|
|
||||||
char *buf, loff_t off, size_t count);
|
|
||||||
extern ssize_t pci_write_legacy_io(struct kobject *kobj,
|
|
||||||
struct bin_attribute *bin_attr,
|
|
||||||
char *buf, loff_t off, size_t count);
|
|
||||||
extern int pci_mmap_legacy_mem(struct kobject *kobj,
|
|
||||||
struct bin_attribute *attr,
|
|
||||||
struct vm_area_struct *vma);
|
|
||||||
|
|
||||||
#define pci_get_legacy_mem platform_pci_get_legacy_mem
|
#define pci_get_legacy_mem platform_pci_get_legacy_mem
|
||||||
#define pci_legacy_read platform_pci_legacy_read
|
#define pci_legacy_read platform_pci_legacy_read
|
||||||
|
|
|
@ -8,10 +8,14 @@
|
||||||
|
|
||||||
#include <linux/errno.h>
|
#include <linux/errno.h>
|
||||||
#include <linux/types.h>
|
#include <linux/types.h>
|
||||||
|
#include <linux/crash_dump.h>
|
||||||
|
|
||||||
#include <asm/page.h>
|
#include <asm/page.h>
|
||||||
#include <asm/uaccess.h>
|
#include <asm/uaccess.h>
|
||||||
|
|
||||||
|
/* Stores the physical address of elf header of crash image. */
|
||||||
|
unsigned long long elfcorehdr_addr = ELFCORE_ADDR_MAX;
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* copy_oldmem_page - copy one page from "oldmem"
|
* copy_oldmem_page - copy one page from "oldmem"
|
||||||
* @pfn: page frame number to be copied
|
* @pfn: page frame number to be copied
|
||||||
|
|
|
@ -1335,7 +1335,7 @@ kdump_find_rsvd_region (unsigned long size, struct rsvd_region *r, int n)
|
||||||
}
|
}
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
#ifdef CONFIG_PROC_VMCORE
|
#ifdef CONFIG_CRASH_DUMP
|
||||||
/* locate the size find a the descriptor at a certain address */
|
/* locate the size find a the descriptor at a certain address */
|
||||||
unsigned long __init
|
unsigned long __init
|
||||||
vmcore_find_descriptor_size (unsigned long address)
|
vmcore_find_descriptor_size (unsigned long address)
|
||||||
|
|
|
@ -352,7 +352,7 @@ reserve_memory (void)
|
||||||
}
|
}
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
#ifdef CONFIG_PROC_VMCORE
|
#ifdef CONFIG_CRASH_KERNEL
|
||||||
if (reserve_elfcorehdr(&rsvd_region[n].start,
|
if (reserve_elfcorehdr(&rsvd_region[n].start,
|
||||||
&rsvd_region[n].end) == 0)
|
&rsvd_region[n].end) == 0)
|
||||||
n++;
|
n++;
|
||||||
|
@ -478,7 +478,12 @@ static __init int setup_nomca(char *s)
|
||||||
}
|
}
|
||||||
early_param("nomca", setup_nomca);
|
early_param("nomca", setup_nomca);
|
||||||
|
|
||||||
#ifdef CONFIG_PROC_VMCORE
|
/*
|
||||||
|
* Note: elfcorehdr_addr is not just limited to vmcore. It is also used by
|
||||||
|
* is_kdump_kernel() to determine if we are booting after a panic. Hence
|
||||||
|
* ifdef it under CONFIG_CRASH_DUMP and not CONFIG_PROC_VMCORE.
|
||||||
|
*/
|
||||||
|
#ifdef CONFIG_CRASH_DUMP
|
||||||
/* elfcorehdr= specifies the location of elf core header
|
/* elfcorehdr= specifies the location of elf core header
|
||||||
* stored by the crashed kernel.
|
* stored by the crashed kernel.
|
||||||
*/
|
*/
|
||||||
|
@ -502,11 +507,11 @@ int __init reserve_elfcorehdr(unsigned long *start, unsigned long *end)
|
||||||
* to work properly.
|
* to work properly.
|
||||||
*/
|
*/
|
||||||
|
|
||||||
if (elfcorehdr_addr >= ELFCORE_ADDR_MAX)
|
if (!is_vmcore_usable())
|
||||||
return -EINVAL;
|
return -EINVAL;
|
||||||
|
|
||||||
if ((length = vmcore_find_descriptor_size(elfcorehdr_addr)) == 0) {
|
if ((length = vmcore_find_descriptor_size(elfcorehdr_addr)) == 0) {
|
||||||
elfcorehdr_addr = ELFCORE_ADDR_MAX;
|
vmcore_unusable();
|
||||||
return -EINVAL;
|
return -EINVAL;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
|
@ -700,23 +700,6 @@ int arch_add_memory(int nid, u64 start, u64 size)
|
||||||
|
|
||||||
return ret;
|
return ret;
|
||||||
}
|
}
|
||||||
#ifdef CONFIG_MEMORY_HOTREMOVE
|
|
||||||
int remove_memory(u64 start, u64 size)
|
|
||||||
{
|
|
||||||
unsigned long start_pfn, end_pfn;
|
|
||||||
unsigned long timeout = 120 * HZ;
|
|
||||||
int ret;
|
|
||||||
start_pfn = start >> PAGE_SHIFT;
|
|
||||||
end_pfn = start_pfn + (size >> PAGE_SHIFT);
|
|
||||||
ret = offline_pages(start_pfn, end_pfn, timeout);
|
|
||||||
if (ret)
|
|
||||||
goto out;
|
|
||||||
/* we can free mem_map at this point */
|
|
||||||
out:
|
|
||||||
return ret;
|
|
||||||
}
|
|
||||||
EXPORT_SYMBOL_GPL(remove_memory);
|
|
||||||
#endif /* CONFIG_MEMORY_HOTREMOVE */
|
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
/*
|
/*
|
||||||
|
|
|
@ -614,12 +614,17 @@ char *ia64_pci_get_legacy_mem(struct pci_bus *bus)
|
||||||
* vector to get the base address.
|
* vector to get the base address.
|
||||||
*/
|
*/
|
||||||
int
|
int
|
||||||
pci_mmap_legacy_page_range(struct pci_bus *bus, struct vm_area_struct *vma)
|
pci_mmap_legacy_page_range(struct pci_bus *bus, struct vm_area_struct *vma,
|
||||||
|
enum pci_mmap_state mmap_state)
|
||||||
{
|
{
|
||||||
unsigned long size = vma->vm_end - vma->vm_start;
|
unsigned long size = vma->vm_end - vma->vm_start;
|
||||||
pgprot_t prot;
|
pgprot_t prot;
|
||||||
char *addr;
|
char *addr;
|
||||||
|
|
||||||
|
/* We only support mmap'ing of legacy memory space */
|
||||||
|
if (mmap_state != pci_mmap_mem)
|
||||||
|
return -ENOSYS;
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* Avoid attribute aliasing. See Documentation/ia64/aliasing.txt
|
* Avoid attribute aliasing. See Documentation/ia64/aliasing.txt
|
||||||
* for more details.
|
* for more details.
|
||||||
|
|
|
@ -42,6 +42,8 @@ config HZ
|
||||||
|
|
||||||
source "init/Kconfig"
|
source "init/Kconfig"
|
||||||
|
|
||||||
|
source "kernel/Kconfig.freezer"
|
||||||
|
|
||||||
|
|
||||||
menu "Processor type and features"
|
menu "Processor type and features"
|
||||||
|
|
||||||
|
|
|
@ -40,6 +40,7 @@
|
||||||
*/
|
*/
|
||||||
|
|
||||||
#include <linux/module.h>
|
#include <linux/module.h>
|
||||||
|
#include <linux/cpu.h>
|
||||||
#include <linux/init.h>
|
#include <linux/init.h>
|
||||||
#include <linux/kernel.h>
|
#include <linux/kernel.h>
|
||||||
#include <linux/mm.h>
|
#include <linux/mm.h>
|
||||||
|
|
|
@ -62,6 +62,8 @@ mainmenu "Linux/68k Kernel Configuration"
|
||||||
|
|
||||||
source "init/Kconfig"
|
source "init/Kconfig"
|
||||||
|
|
||||||
|
source "kernel/Kconfig.freezer"
|
||||||
|
|
||||||
menu "Platform dependent setup"
|
menu "Platform dependent setup"
|
||||||
|
|
||||||
config EISA
|
config EISA
|
||||||
|
|
|
@ -18,7 +18,6 @@
|
||||||
#include <linux/poll.h>
|
#include <linux/poll.h>
|
||||||
#include <linux/module.h>
|
#include <linux/module.h>
|
||||||
#include <linux/mc146818rtc.h> /* For struct rtc_time and ioctls, etc */
|
#include <linux/mc146818rtc.h> /* For struct rtc_time and ioctls, etc */
|
||||||
#include <linux/smp_lock.h>
|
|
||||||
#include <linux/bcd.h>
|
#include <linux/bcd.h>
|
||||||
#include <asm/bvme6000hw.h>
|
#include <asm/bvme6000hw.h>
|
||||||
|
|
||||||
|
|
|
@ -75,6 +75,8 @@ config NO_IOPORT
|
||||||
|
|
||||||
source "init/Kconfig"
|
source "init/Kconfig"
|
||||||
|
|
||||||
|
source "kernel/Kconfig.freezer"
|
||||||
|
|
||||||
menu "Processor type and features"
|
menu "Processor type and features"
|
||||||
|
|
||||||
choice
|
choice
|
||||||
|
|
|
@ -84,12 +84,14 @@ static inline struct thread_info *current_thread_info(void)
|
||||||
#define TIF_POLLING_NRFLAG 3 /* true if poll_idle() is polling
|
#define TIF_POLLING_NRFLAG 3 /* true if poll_idle() is polling
|
||||||
TIF_NEED_RESCHED */
|
TIF_NEED_RESCHED */
|
||||||
#define TIF_MEMDIE 4
|
#define TIF_MEMDIE 4
|
||||||
|
#define TIF_FREEZE 16 /* is freezing for suspend */
|
||||||
|
|
||||||
/* as above, but as bit values */
|
/* as above, but as bit values */
|
||||||
#define _TIF_SYSCALL_TRACE (1<<TIF_SYSCALL_TRACE)
|
#define _TIF_SYSCALL_TRACE (1<<TIF_SYSCALL_TRACE)
|
||||||
#define _TIF_SIGPENDING (1<<TIF_SIGPENDING)
|
#define _TIF_SIGPENDING (1<<TIF_SIGPENDING)
|
||||||
#define _TIF_NEED_RESCHED (1<<TIF_NEED_RESCHED)
|
#define _TIF_NEED_RESCHED (1<<TIF_NEED_RESCHED)
|
||||||
#define _TIF_POLLING_NRFLAG (1<<TIF_POLLING_NRFLAG)
|
#define _TIF_POLLING_NRFLAG (1<<TIF_POLLING_NRFLAG)
|
||||||
|
#define _TIF_FREEZE (1<<TIF_FREEZE)
|
||||||
|
|
||||||
#define _TIF_WORK_MASK 0x0000FFFE /* work to do on interrupt/exception return */
|
#define _TIF_WORK_MASK 0x0000FFFE /* work to do on interrupt/exception return */
|
||||||
|
|
||||||
|
|
|
@ -1885,6 +1885,8 @@ config PROBE_INITRD_HEADER
|
||||||
add initrd or initramfs image to the kernel image.
|
add initrd or initramfs image to the kernel image.
|
||||||
Otherwise, say N.
|
Otherwise, say N.
|
||||||
|
|
||||||
|
source "kernel/Kconfig.freezer"
|
||||||
|
|
||||||
menu "Bus options (PCI, PCMCIA, EISA, ISA, TC)"
|
menu "Bus options (PCI, PCMCIA, EISA, ISA, TC)"
|
||||||
|
|
||||||
config HW_HAS_EISA
|
config HW_HAS_EISA
|
||||||
|
|
|
@ -45,12 +45,12 @@ unsigned long read_persistent_clock(void)
|
||||||
spin_unlock_irqrestore(&rtc_lock, flags);
|
spin_unlock_irqrestore(&rtc_lock, flags);
|
||||||
|
|
||||||
if (!(CMOS_READ(RTC_CONTROL) & RTC_DM_BINARY) || RTC_ALWAYS_BCD) {
|
if (!(CMOS_READ(RTC_CONTROL) & RTC_DM_BINARY) || RTC_ALWAYS_BCD) {
|
||||||
sec = BCD2BIN(sec);
|
sec = bcd2bin(sec);
|
||||||
min = BCD2BIN(min);
|
min = bcd2bin(min);
|
||||||
hour = BCD2BIN(hour);
|
hour = bcd2bin(hour);
|
||||||
day = BCD2BIN(day);
|
day = bcd2bin(day);
|
||||||
mon = BCD2BIN(mon);
|
mon = bcd2bin(mon);
|
||||||
year = BCD2BIN(year);
|
year = bcd2bin(year);
|
||||||
}
|
}
|
||||||
|
|
||||||
year += real_year - 72 + 2000;
|
year += real_year - 72 + 2000;
|
||||||
|
@ -83,7 +83,7 @@ int rtc_mips_set_mmss(unsigned long nowtime)
|
||||||
|
|
||||||
cmos_minutes = CMOS_READ(RTC_MINUTES);
|
cmos_minutes = CMOS_READ(RTC_MINUTES);
|
||||||
if (!(save_control & RTC_DM_BINARY) || RTC_ALWAYS_BCD)
|
if (!(save_control & RTC_DM_BINARY) || RTC_ALWAYS_BCD)
|
||||||
cmos_minutes = BCD2BIN(cmos_minutes);
|
cmos_minutes = bcd2bin(cmos_minutes);
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* since we're only adjusting minutes and seconds,
|
* since we're only adjusting minutes and seconds,
|
||||||
|
@ -99,8 +99,8 @@ int rtc_mips_set_mmss(unsigned long nowtime)
|
||||||
|
|
||||||
if (abs(real_minutes - cmos_minutes) < 30) {
|
if (abs(real_minutes - cmos_minutes) < 30) {
|
||||||
if (!(save_control & RTC_DM_BINARY) || RTC_ALWAYS_BCD) {
|
if (!(save_control & RTC_DM_BINARY) || RTC_ALWAYS_BCD) {
|
||||||
real_seconds = BIN2BCD(real_seconds);
|
real_seconds = bin2bcd(real_seconds);
|
||||||
real_minutes = BIN2BCD(real_minutes);
|
real_minutes = bin2bcd(real_minutes);
|
||||||
}
|
}
|
||||||
CMOS_WRITE(real_seconds, RTC_SECONDS);
|
CMOS_WRITE(real_seconds, RTC_SECONDS);
|
||||||
CMOS_WRITE(real_minutes, RTC_MINUTES);
|
CMOS_WRITE(real_minutes, RTC_MINUTES);
|
||||||
|
|
|
@ -44,7 +44,7 @@ static inline int mc146818_set_rtc_mmss(unsigned long nowtime)
|
||||||
|
|
||||||
cmos_minutes = CMOS_READ(RTC_MINUTES);
|
cmos_minutes = CMOS_READ(RTC_MINUTES);
|
||||||
if (!(save_control & RTC_DM_BINARY) || RTC_ALWAYS_BCD)
|
if (!(save_control & RTC_DM_BINARY) || RTC_ALWAYS_BCD)
|
||||||
BCD_TO_BIN(cmos_minutes);
|
cmos_minutes = bcd2bin(cmos_minutes);
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* since we're only adjusting minutes and seconds,
|
* since we're only adjusting minutes and seconds,
|
||||||
|
@ -60,8 +60,8 @@ static inline int mc146818_set_rtc_mmss(unsigned long nowtime)
|
||||||
|
|
||||||
if (abs(real_minutes - cmos_minutes) < 30) {
|
if (abs(real_minutes - cmos_minutes) < 30) {
|
||||||
if (!(save_control & RTC_DM_BINARY) || RTC_ALWAYS_BCD) {
|
if (!(save_control & RTC_DM_BINARY) || RTC_ALWAYS_BCD) {
|
||||||
BIN_TO_BCD(real_seconds);
|
real_seconds = bin2bcd(real_seconds);
|
||||||
BIN_TO_BCD(real_minutes);
|
real_minutes = bin2bcd(real_minutes);
|
||||||
}
|
}
|
||||||
CMOS_WRITE(real_seconds, RTC_SECONDS);
|
CMOS_WRITE(real_seconds, RTC_SECONDS);
|
||||||
CMOS_WRITE(real_minutes, RTC_MINUTES);
|
CMOS_WRITE(real_minutes, RTC_MINUTES);
|
||||||
|
@ -103,12 +103,12 @@ static inline unsigned long mc146818_get_cmos_time(void)
|
||||||
} while (sec != CMOS_READ(RTC_SECONDS));
|
} while (sec != CMOS_READ(RTC_SECONDS));
|
||||||
|
|
||||||
if (!(CMOS_READ(RTC_CONTROL) & RTC_DM_BINARY) || RTC_ALWAYS_BCD) {
|
if (!(CMOS_READ(RTC_CONTROL) & RTC_DM_BINARY) || RTC_ALWAYS_BCD) {
|
||||||
BCD_TO_BIN(sec);
|
sec = bcd2bin(sec);
|
||||||
BCD_TO_BIN(min);
|
min = bcd2bin(min);
|
||||||
BCD_TO_BIN(hour);
|
hour = bcd2bin(hour);
|
||||||
BCD_TO_BIN(day);
|
day = bcd2bin(day);
|
||||||
BCD_TO_BIN(mon);
|
mon = bcd2bin(mon);
|
||||||
BCD_TO_BIN(year);
|
year = bcd2bin(year);
|
||||||
}
|
}
|
||||||
spin_unlock_irqrestore(&rtc_lock, flags);
|
spin_unlock_irqrestore(&rtc_lock, flags);
|
||||||
year = mc146818_decode_year(year);
|
year = mc146818_decode_year(year);
|
||||||
|
|
|
@ -79,14 +79,14 @@ unsigned long read_persistent_clock(void)
|
||||||
/* Stop the update to the time */
|
/* Stop the update to the time */
|
||||||
m48t37_base->control = 0x40;
|
m48t37_base->control = 0x40;
|
||||||
|
|
||||||
year = BCD2BIN(m48t37_base->year);
|
year = bcd2bin(m48t37_base->year);
|
||||||
year += BCD2BIN(m48t37_base->century) * 100;
|
year += bcd2bin(m48t37_base->century) * 100;
|
||||||
|
|
||||||
month = BCD2BIN(m48t37_base->month);
|
month = bcd2bin(m48t37_base->month);
|
||||||
day = BCD2BIN(m48t37_base->date);
|
day = bcd2bin(m48t37_base->date);
|
||||||
hour = BCD2BIN(m48t37_base->hour);
|
hour = bcd2bin(m48t37_base->hour);
|
||||||
min = BCD2BIN(m48t37_base->min);
|
min = bcd2bin(m48t37_base->min);
|
||||||
sec = BCD2BIN(m48t37_base->sec);
|
sec = bcd2bin(m48t37_base->sec);
|
||||||
|
|
||||||
/* Start the update to the time again */
|
/* Start the update to the time again */
|
||||||
m48t37_base->control = 0x00;
|
m48t37_base->control = 0x00;
|
||||||
|
@ -113,22 +113,22 @@ int rtc_mips_set_time(unsigned long tim)
|
||||||
m48t37_base->control = 0x80;
|
m48t37_base->control = 0x80;
|
||||||
|
|
||||||
/* year */
|
/* year */
|
||||||
m48t37_base->year = BIN2BCD(tm.tm_year % 100);
|
m48t37_base->year = bin2bcd(tm.tm_year % 100);
|
||||||
m48t37_base->century = BIN2BCD(tm.tm_year / 100);
|
m48t37_base->century = bin2bcd(tm.tm_year / 100);
|
||||||
|
|
||||||
/* month */
|
/* month */
|
||||||
m48t37_base->month = BIN2BCD(tm.tm_mon);
|
m48t37_base->month = bin2bcd(tm.tm_mon);
|
||||||
|
|
||||||
/* day */
|
/* day */
|
||||||
m48t37_base->date = BIN2BCD(tm.tm_mday);
|
m48t37_base->date = bin2bcd(tm.tm_mday);
|
||||||
|
|
||||||
/* hour/min/sec */
|
/* hour/min/sec */
|
||||||
m48t37_base->hour = BIN2BCD(tm.tm_hour);
|
m48t37_base->hour = bin2bcd(tm.tm_hour);
|
||||||
m48t37_base->min = BIN2BCD(tm.tm_min);
|
m48t37_base->min = bin2bcd(tm.tm_min);
|
||||||
m48t37_base->sec = BIN2BCD(tm.tm_sec);
|
m48t37_base->sec = bin2bcd(tm.tm_sec);
|
||||||
|
|
||||||
/* day of week -- not really used, but let's keep it up-to-date */
|
/* day of week -- not really used, but let's keep it up-to-date */
|
||||||
m48t37_base->day = BIN2BCD(tm.tm_wday + 1);
|
m48t37_base->day = bin2bcd(tm.tm_wday + 1);
|
||||||
|
|
||||||
/* disable writing */
|
/* disable writing */
|
||||||
m48t37_base->control = 0x00;
|
m48t37_base->control = 0x00;
|
||||||
|
|
|
@ -156,32 +156,32 @@ int m41t81_set_time(unsigned long t)
|
||||||
*/
|
*/
|
||||||
|
|
||||||
spin_lock_irqsave(&rtc_lock, flags);
|
spin_lock_irqsave(&rtc_lock, flags);
|
||||||
tm.tm_sec = BIN2BCD(tm.tm_sec);
|
tm.tm_sec = bin2bcd(tm.tm_sec);
|
||||||
m41t81_write(M41T81REG_SC, tm.tm_sec);
|
m41t81_write(M41T81REG_SC, tm.tm_sec);
|
||||||
|
|
||||||
tm.tm_min = BIN2BCD(tm.tm_min);
|
tm.tm_min = bin2bcd(tm.tm_min);
|
||||||
m41t81_write(M41T81REG_MN, tm.tm_min);
|
m41t81_write(M41T81REG_MN, tm.tm_min);
|
||||||
|
|
||||||
tm.tm_hour = BIN2BCD(tm.tm_hour);
|
tm.tm_hour = bin2bcd(tm.tm_hour);
|
||||||
tm.tm_hour = (tm.tm_hour & 0x3f) | (m41t81_read(M41T81REG_HR) & 0xc0);
|
tm.tm_hour = (tm.tm_hour & 0x3f) | (m41t81_read(M41T81REG_HR) & 0xc0);
|
||||||
m41t81_write(M41T81REG_HR, tm.tm_hour);
|
m41t81_write(M41T81REG_HR, tm.tm_hour);
|
||||||
|
|
||||||
/* tm_wday starts from 0 to 6 */
|
/* tm_wday starts from 0 to 6 */
|
||||||
if (tm.tm_wday == 0) tm.tm_wday = 7;
|
if (tm.tm_wday == 0) tm.tm_wday = 7;
|
||||||
tm.tm_wday = BIN2BCD(tm.tm_wday);
|
tm.tm_wday = bin2bcd(tm.tm_wday);
|
||||||
m41t81_write(M41T81REG_DY, tm.tm_wday);
|
m41t81_write(M41T81REG_DY, tm.tm_wday);
|
||||||
|
|
||||||
tm.tm_mday = BIN2BCD(tm.tm_mday);
|
tm.tm_mday = bin2bcd(tm.tm_mday);
|
||||||
m41t81_write(M41T81REG_DT, tm.tm_mday);
|
m41t81_write(M41T81REG_DT, tm.tm_mday);
|
||||||
|
|
||||||
/* tm_mon starts from 0, *ick* */
|
/* tm_mon starts from 0, *ick* */
|
||||||
tm.tm_mon ++;
|
tm.tm_mon ++;
|
||||||
tm.tm_mon = BIN2BCD(tm.tm_mon);
|
tm.tm_mon = bin2bcd(tm.tm_mon);
|
||||||
m41t81_write(M41T81REG_MO, tm.tm_mon);
|
m41t81_write(M41T81REG_MO, tm.tm_mon);
|
||||||
|
|
||||||
/* we don't do century, everything is beyond 2000 */
|
/* we don't do century, everything is beyond 2000 */
|
||||||
tm.tm_year %= 100;
|
tm.tm_year %= 100;
|
||||||
tm.tm_year = BIN2BCD(tm.tm_year);
|
tm.tm_year = bin2bcd(tm.tm_year);
|
||||||
m41t81_write(M41T81REG_YR, tm.tm_year);
|
m41t81_write(M41T81REG_YR, tm.tm_year);
|
||||||
spin_unlock_irqrestore(&rtc_lock, flags);
|
spin_unlock_irqrestore(&rtc_lock, flags);
|
||||||
|
|
||||||
|
@ -209,12 +209,12 @@ unsigned long m41t81_get_time(void)
|
||||||
year = m41t81_read(M41T81REG_YR);
|
year = m41t81_read(M41T81REG_YR);
|
||||||
spin_unlock_irqrestore(&rtc_lock, flags);
|
spin_unlock_irqrestore(&rtc_lock, flags);
|
||||||
|
|
||||||
sec = BCD2BIN(sec);
|
sec = bcd2bin(sec);
|
||||||
min = BCD2BIN(min);
|
min = bcd2bin(min);
|
||||||
hour = BCD2BIN(hour);
|
hour = bcd2bin(hour);
|
||||||
day = BCD2BIN(day);
|
day = bcd2bin(day);
|
||||||
mon = BCD2BIN(mon);
|
mon = bcd2bin(mon);
|
||||||
year = BCD2BIN(year);
|
year = bcd2bin(year);
|
||||||
|
|
||||||
year += 2000;
|
year += 2000;
|
||||||
|
|
||||||
|
|
|
@ -124,18 +124,18 @@ int xicor_set_time(unsigned long t)
|
||||||
xicor_write(X1241REG_SR, X1241REG_SR_WEL | X1241REG_SR_RWEL);
|
xicor_write(X1241REG_SR, X1241REG_SR_WEL | X1241REG_SR_RWEL);
|
||||||
|
|
||||||
/* trivial ones */
|
/* trivial ones */
|
||||||
tm.tm_sec = BIN2BCD(tm.tm_sec);
|
tm.tm_sec = bin2bcd(tm.tm_sec);
|
||||||
xicor_write(X1241REG_SC, tm.tm_sec);
|
xicor_write(X1241REG_SC, tm.tm_sec);
|
||||||
|
|
||||||
tm.tm_min = BIN2BCD(tm.tm_min);
|
tm.tm_min = bin2bcd(tm.tm_min);
|
||||||
xicor_write(X1241REG_MN, tm.tm_min);
|
xicor_write(X1241REG_MN, tm.tm_min);
|
||||||
|
|
||||||
tm.tm_mday = BIN2BCD(tm.tm_mday);
|
tm.tm_mday = bin2bcd(tm.tm_mday);
|
||||||
xicor_write(X1241REG_DT, tm.tm_mday);
|
xicor_write(X1241REG_DT, tm.tm_mday);
|
||||||
|
|
||||||
/* tm_mon starts from 0, *ick* */
|
/* tm_mon starts from 0, *ick* */
|
||||||
tm.tm_mon ++;
|
tm.tm_mon ++;
|
||||||
tm.tm_mon = BIN2BCD(tm.tm_mon);
|
tm.tm_mon = bin2bcd(tm.tm_mon);
|
||||||
xicor_write(X1241REG_MO, tm.tm_mon);
|
xicor_write(X1241REG_MO, tm.tm_mon);
|
||||||
|
|
||||||
/* year is split */
|
/* year is split */
|
||||||
|
@ -148,7 +148,7 @@ int xicor_set_time(unsigned long t)
|
||||||
tmp = xicor_read(X1241REG_HR);
|
tmp = xicor_read(X1241REG_HR);
|
||||||
if (tmp & X1241REG_HR_MIL) {
|
if (tmp & X1241REG_HR_MIL) {
|
||||||
/* 24 hour format */
|
/* 24 hour format */
|
||||||
tm.tm_hour = BIN2BCD(tm.tm_hour);
|
tm.tm_hour = bin2bcd(tm.tm_hour);
|
||||||
tmp = (tmp & ~0x3f) | (tm.tm_hour & 0x3f);
|
tmp = (tmp & ~0x3f) | (tm.tm_hour & 0x3f);
|
||||||
} else {
|
} else {
|
||||||
/* 12 hour format, with 0x2 for pm */
|
/* 12 hour format, with 0x2 for pm */
|
||||||
|
@ -157,7 +157,7 @@ int xicor_set_time(unsigned long t)
|
||||||
tmp |= 0x20;
|
tmp |= 0x20;
|
||||||
tm.tm_hour -= 12;
|
tm.tm_hour -= 12;
|
||||||
}
|
}
|
||||||
tm.tm_hour = BIN2BCD(tm.tm_hour);
|
tm.tm_hour = bin2bcd(tm.tm_hour);
|
||||||
tmp |= tm.tm_hour;
|
tmp |= tm.tm_hour;
|
||||||
}
|
}
|
||||||
xicor_write(X1241REG_HR, tmp);
|
xicor_write(X1241REG_HR, tmp);
|
||||||
|
@ -191,13 +191,13 @@ unsigned long xicor_get_time(void)
|
||||||
y2k = xicor_read(X1241REG_Y2K);
|
y2k = xicor_read(X1241REG_Y2K);
|
||||||
spin_unlock_irqrestore(&rtc_lock, flags);
|
spin_unlock_irqrestore(&rtc_lock, flags);
|
||||||
|
|
||||||
sec = BCD2BIN(sec);
|
sec = bcd2bin(sec);
|
||||||
min = BCD2BIN(min);
|
min = bcd2bin(min);
|
||||||
hour = BCD2BIN(hour);
|
hour = bcd2bin(hour);
|
||||||
day = BCD2BIN(day);
|
day = bcd2bin(day);
|
||||||
mon = BCD2BIN(mon);
|
mon = bcd2bin(mon);
|
||||||
year = BCD2BIN(year);
|
year = bcd2bin(year);
|
||||||
y2k = BCD2BIN(y2k);
|
y2k = bcd2bin(y2k);
|
||||||
|
|
||||||
year += (y2k * 100);
|
year += (y2k * 100);
|
||||||
|
|
||||||
|
|
|
@ -68,6 +68,8 @@ mainmenu "Matsushita MN10300/AM33 Kernel Configuration"
|
||||||
|
|
||||||
source "init/Kconfig"
|
source "init/Kconfig"
|
||||||
|
|
||||||
|
source "kernel/Kconfig.freezer"
|
||||||
|
|
||||||
|
|
||||||
menu "Matsushita MN10300 system setup"
|
menu "Matsushita MN10300 system setup"
|
||||||
|
|
||||||
|
|
|
@ -67,7 +67,7 @@ static int set_rtc_mmss(unsigned long nowtime)
|
||||||
|
|
||||||
cmos_minutes = CMOS_READ(RTC_MINUTES);
|
cmos_minutes = CMOS_READ(RTC_MINUTES);
|
||||||
if (!(save_control & RTC_DM_BINARY) || RTC_ALWAYS_BCD)
|
if (!(save_control & RTC_DM_BINARY) || RTC_ALWAYS_BCD)
|
||||||
BCD_TO_BIN(cmos_minutes);
|
cmos_minutes = bcd2bin(cmos_minutes);
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* since we're only adjusting minutes and seconds,
|
* since we're only adjusting minutes and seconds,
|
||||||
|
@ -84,8 +84,8 @@ static int set_rtc_mmss(unsigned long nowtime)
|
||||||
|
|
||||||
if (abs(real_minutes - cmos_minutes) < 30) {
|
if (abs(real_minutes - cmos_minutes) < 30) {
|
||||||
if (!(save_control & RTC_DM_BINARY) || RTC_ALWAYS_BCD) {
|
if (!(save_control & RTC_DM_BINARY) || RTC_ALWAYS_BCD) {
|
||||||
BIN_TO_BCD(real_seconds);
|
real_seconds = bin2bcd(real_seconds);
|
||||||
BIN_TO_BCD(real_minutes);
|
real_minutes = bin2bcd(real_minutes);
|
||||||
}
|
}
|
||||||
CMOS_WRITE(real_seconds, RTC_SECONDS);
|
CMOS_WRITE(real_seconds, RTC_SECONDS);
|
||||||
CMOS_WRITE(real_minutes, RTC_MINUTES);
|
CMOS_WRITE(real_minutes, RTC_MINUTES);
|
||||||
|
|
|
@ -9,6 +9,8 @@ config PARISC
|
||||||
def_bool y
|
def_bool y
|
||||||
select HAVE_IDE
|
select HAVE_IDE
|
||||||
select HAVE_OPROFILE
|
select HAVE_OPROFILE
|
||||||
|
select RTC_CLASS
|
||||||
|
select RTC_DRV_PARISC
|
||||||
help
|
help
|
||||||
The PA-RISC microprocessor is designed by Hewlett-Packard and used
|
The PA-RISC microprocessor is designed by Hewlett-Packard and used
|
||||||
in many of their workstations & servers (HP9000 700 and 800 series,
|
in many of their workstations & servers (HP9000 700 and 800 series,
|
||||||
|
@ -90,6 +92,8 @@ config ARCH_MAY_HAVE_PC_FDC
|
||||||
|
|
||||||
source "init/Kconfig"
|
source "init/Kconfig"
|
||||||
|
|
||||||
|
source "kernel/Kconfig.freezer"
|
||||||
|
|
||||||
|
|
||||||
menu "Processor type and features"
|
menu "Processor type and features"
|
||||||
|
|
||||||
|
|
Some files were not shown because too many files have changed in this diff Show More
Loading…
Reference in New Issue