Merge git://git.kernel.org/pub/scm/linux/kernel/git/davem/sparc
Pull sparc updates from David Miller: "Here we go: - Fix various long standing issues in the sparc 32-bit IOMMU support code, from Christoph Hellwig. - Various other code cleanups and simplifications all over. From Gustavo A. R. Silva, Jagadeesh Pagadala, Masahiro Yamada, Mauro Carvalho Chehab, Mike Rapoport" * git://git.kernel.org/pub/scm/linux/kernel/git/davem/sparc: sparc64: simplify reduce_memory() function sparc: use struct_size() in kzalloc() docs: sparc: convert to ReST sparc/iommu: merge iommu_get_one and __sbus_iommu_map_page sparc/iommu: use __sbus_iommu_map_page to implement the map_sg path sparc/iommu: fix __sbus_iommu_map_page for highmem pages sparc/iommu: move per-page flushing into __sbus_iommu_map_page sparc/iommu: pass a physical address to iommu_get_one sparc/iommu: create a common helper for map_sg sparc/iommu: merge iommu_release_one and sbus_iommu_unmap_page sparc/iommu: use sbus_iommu_unmap_page in sbus_iommu_unmap_sg sparc/iommu: use !PageHighMem to check if a page has a kernel mapping sparc: vdso: add FORCE to the build rule of %.so arch:sparc:kernel/uprobes.c : Remove duplicate header
This commit is contained in:
commit
9b6c9e96f9
|
@ -1,3 +1,4 @@
|
|||
================================
|
||||
Application Data Integrity (ADI)
|
||||
================================
|
||||
|
||||
|
@ -44,12 +45,15 @@ provided by the hypervisor to the kernel. Kernel returns the value of
|
|||
ADI block size to userspace using auxiliary vector along with other ADI
|
||||
info. Following auxiliary vectors are provided by the kernel:
|
||||
|
||||
============ ===========================================
|
||||
AT_ADI_BLKSZ ADI block size. This is the granularity and
|
||||
alignment, in bytes, of ADI versioning.
|
||||
AT_ADI_NBITS Number of ADI version bits in the VA
|
||||
============ ===========================================
|
||||
|
||||
|
||||
IMPORTANT NOTES:
|
||||
IMPORTANT NOTES
|
||||
===============
|
||||
|
||||
- Version tag values of 0x0 and 0xf are reserved. These values match any
|
||||
tag in virtual address and never generate a mismatch exception.
|
||||
|
@ -86,11 +90,12 @@ IMPORTANT NOTES:
|
|||
|
||||
|
||||
ADI related traps
|
||||
-----------------
|
||||
=================
|
||||
|
||||
With ADI enabled, following new traps may occur:
|
||||
|
||||
Disrupting memory corruption
|
||||
----------------------------
|
||||
|
||||
When a store accesses a memory localtion that has TTE.mcd=1,
|
||||
the task is running with ADI enabled (PSTATE.mcde=1), and the ADI
|
||||
|
@ -100,7 +105,7 @@ Disrupting memory corruption
|
|||
first. Hypervisor creates a sun4v error report and sends a
|
||||
resumable error (TT=0x7e) trap to the kernel. The kernel sends
|
||||
a SIGSEGV to the task that resulted in this trap with the following
|
||||
info:
|
||||
info::
|
||||
|
||||
siginfo.si_signo = SIGSEGV;
|
||||
siginfo.errno = 0;
|
||||
|
@ -110,6 +115,7 @@ Disrupting memory corruption
|
|||
|
||||
|
||||
Precise memory corruption
|
||||
-------------------------
|
||||
|
||||
When a store accesses a memory location that has TTE.mcd=1,
|
||||
the task is running with ADI enabled (PSTATE.mcde=1), and the ADI
|
||||
|
@ -118,7 +124,7 @@ Precise memory corruption
|
|||
MCD precise exception is enabled (MCDPERR=1), a precise
|
||||
exception is sent to the kernel with TT=0x1a. The kernel sends
|
||||
a SIGSEGV to the task that resulted in this trap with the following
|
||||
info:
|
||||
info::
|
||||
|
||||
siginfo.si_signo = SIGSEGV;
|
||||
siginfo.errno = 0;
|
||||
|
@ -126,17 +132,19 @@ Precise memory corruption
|
|||
siginfo.si_addr = addr; /* address that caused trap */
|
||||
siginfo.si_trapno = 0;
|
||||
|
||||
NOTE: ADI tag mismatch on a load always results in precise trap.
|
||||
NOTE:
|
||||
ADI tag mismatch on a load always results in precise trap.
|
||||
|
||||
|
||||
MCD disabled
|
||||
------------
|
||||
|
||||
When a task has not enabled ADI and attempts to set ADI version
|
||||
on a memory address, processor sends an MCD disabled trap. This
|
||||
trap is handled by hypervisor first and the hypervisor vectors this
|
||||
trap through to the kernel as Data Access Exception trap with
|
||||
fault type set to 0xa (invalid ASI). When this occurs, the kernel
|
||||
sends the task SIGSEGV signal with following info:
|
||||
sends the task SIGSEGV signal with following info::
|
||||
|
||||
siginfo.si_signo = SIGSEGV;
|
||||
siginfo.errno = 0;
|
||||
|
@ -149,35 +157,35 @@ Sample program to use ADI
|
|||
-------------------------
|
||||
|
||||
Following sample program is meant to illustrate how to use the ADI
|
||||
functionality.
|
||||
functionality::
|
||||
|
||||
#include <unistd.h>
|
||||
#include <stdio.h>
|
||||
#include <stdlib.h>
|
||||
#include <elf.h>
|
||||
#include <sys/ipc.h>
|
||||
#include <sys/shm.h>
|
||||
#include <sys/mman.h>
|
||||
#include <asm/asi.h>
|
||||
#include <unistd.h>
|
||||
#include <stdio.h>
|
||||
#include <stdlib.h>
|
||||
#include <elf.h>
|
||||
#include <sys/ipc.h>
|
||||
#include <sys/shm.h>
|
||||
#include <sys/mman.h>
|
||||
#include <asm/asi.h>
|
||||
|
||||
#ifndef AT_ADI_BLKSZ
|
||||
#define AT_ADI_BLKSZ 48
|
||||
#endif
|
||||
#ifndef AT_ADI_NBITS
|
||||
#define AT_ADI_NBITS 49
|
||||
#endif
|
||||
#ifndef AT_ADI_BLKSZ
|
||||
#define AT_ADI_BLKSZ 48
|
||||
#endif
|
||||
#ifndef AT_ADI_NBITS
|
||||
#define AT_ADI_NBITS 49
|
||||
#endif
|
||||
|
||||
#ifndef PROT_ADI
|
||||
#define PROT_ADI 0x10
|
||||
#endif
|
||||
#ifndef PROT_ADI
|
||||
#define PROT_ADI 0x10
|
||||
#endif
|
||||
|
||||
#define BUFFER_SIZE 32*1024*1024UL
|
||||
#define BUFFER_SIZE 32*1024*1024UL
|
||||
|
||||
main(int argc, char* argv[], char* envp[])
|
||||
{
|
||||
unsigned long i, mcde, adi_blksz, adi_nbits;
|
||||
char *shmaddr, *tmp_addr, *end, *veraddr, *clraddr;
|
||||
int shmid, version;
|
||||
main(int argc, char* argv[], char* envp[])
|
||||
{
|
||||
unsigned long i, mcde, adi_blksz, adi_nbits;
|
||||
char *shmaddr, *tmp_addr, *end, *veraddr, *clraddr;
|
||||
int shmid, version;
|
||||
Elf64_auxv_t *auxv;
|
||||
|
||||
adi_blksz = 0;
|
||||
|
@ -202,77 +210,77 @@ main(int argc, char* argv[], char* envp[])
|
|||
printf("\tBlock size = %ld\n", adi_blksz);
|
||||
printf("\tNumber of bits = %ld\n", adi_nbits);
|
||||
|
||||
if ((shmid = shmget(2, BUFFER_SIZE,
|
||||
IPC_CREAT | SHM_R | SHM_W)) < 0) {
|
||||
perror("shmget failed");
|
||||
exit(1);
|
||||
}
|
||||
if ((shmid = shmget(2, BUFFER_SIZE,
|
||||
IPC_CREAT | SHM_R | SHM_W)) < 0) {
|
||||
perror("shmget failed");
|
||||
exit(1);
|
||||
}
|
||||
|
||||
shmaddr = shmat(shmid, NULL, 0);
|
||||
if (shmaddr == (char *)-1) {
|
||||
perror("shm attach failed");
|
||||
shmctl(shmid, IPC_RMID, NULL);
|
||||
exit(1);
|
||||
}
|
||||
shmaddr = shmat(shmid, NULL, 0);
|
||||
if (shmaddr == (char *)-1) {
|
||||
perror("shm attach failed");
|
||||
shmctl(shmid, IPC_RMID, NULL);
|
||||
exit(1);
|
||||
}
|
||||
|
||||
if (mprotect(shmaddr, BUFFER_SIZE, PROT_READ|PROT_WRITE|PROT_ADI)) {
|
||||
perror("mprotect failed");
|
||||
goto err_out;
|
||||
}
|
||||
|
||||
/* Set the ADI version tag on the shm segment
|
||||
*/
|
||||
version = 10;
|
||||
tmp_addr = shmaddr;
|
||||
end = shmaddr + BUFFER_SIZE;
|
||||
while (tmp_addr < end) {
|
||||
asm volatile(
|
||||
"stxa %1, [%0]0x90\n\t"
|
||||
:
|
||||
: "r" (tmp_addr), "r" (version));
|
||||
tmp_addr += adi_blksz;
|
||||
}
|
||||
/* Set the ADI version tag on the shm segment
|
||||
*/
|
||||
version = 10;
|
||||
tmp_addr = shmaddr;
|
||||
end = shmaddr + BUFFER_SIZE;
|
||||
while (tmp_addr < end) {
|
||||
asm volatile(
|
||||
"stxa %1, [%0]0x90\n\t"
|
||||
:
|
||||
: "r" (tmp_addr), "r" (version));
|
||||
tmp_addr += adi_blksz;
|
||||
}
|
||||
asm volatile("membar #Sync\n\t");
|
||||
|
||||
/* Create a versioned address from the normal address by placing
|
||||
/* Create a versioned address from the normal address by placing
|
||||
* version tag in the upper adi_nbits bits
|
||||
*/
|
||||
tmp_addr = (void *) ((unsigned long)shmaddr << adi_nbits);
|
||||
tmp_addr = (void *) ((unsigned long)tmp_addr >> adi_nbits);
|
||||
veraddr = (void *) (((unsigned long)version << (64-adi_nbits))
|
||||
| (unsigned long)tmp_addr);
|
||||
*/
|
||||
tmp_addr = (void *) ((unsigned long)shmaddr << adi_nbits);
|
||||
tmp_addr = (void *) ((unsigned long)tmp_addr >> adi_nbits);
|
||||
veraddr = (void *) (((unsigned long)version << (64-adi_nbits))
|
||||
| (unsigned long)tmp_addr);
|
||||
|
||||
printf("Starting the writes:\n");
|
||||
for (i = 0; i < BUFFER_SIZE; i++) {
|
||||
veraddr[i] = (char)(i);
|
||||
if (!(i % (1024 * 1024)))
|
||||
printf(".");
|
||||
}
|
||||
printf("\n");
|
||||
printf("Starting the writes:\n");
|
||||
for (i = 0; i < BUFFER_SIZE; i++) {
|
||||
veraddr[i] = (char)(i);
|
||||
if (!(i % (1024 * 1024)))
|
||||
printf(".");
|
||||
}
|
||||
printf("\n");
|
||||
|
||||
printf("Verifying data...");
|
||||
printf("Verifying data...");
|
||||
fflush(stdout);
|
||||
for (i = 0; i < BUFFER_SIZE; i++)
|
||||
if (veraddr[i] != (char)i)
|
||||
printf("\nIndex %lu mismatched\n", i);
|
||||
printf("Done.\n");
|
||||
for (i = 0; i < BUFFER_SIZE; i++)
|
||||
if (veraddr[i] != (char)i)
|
||||
printf("\nIndex %lu mismatched\n", i);
|
||||
printf("Done.\n");
|
||||
|
||||
/* Disable ADI and clean up
|
||||
*/
|
||||
/* Disable ADI and clean up
|
||||
*/
|
||||
if (mprotect(shmaddr, BUFFER_SIZE, PROT_READ|PROT_WRITE)) {
|
||||
perror("mprotect failed");
|
||||
goto err_out;
|
||||
}
|
||||
|
||||
if (shmdt((const void *)shmaddr) != 0)
|
||||
perror("Detach failure");
|
||||
shmctl(shmid, IPC_RMID, NULL);
|
||||
if (shmdt((const void *)shmaddr) != 0)
|
||||
perror("Detach failure");
|
||||
shmctl(shmid, IPC_RMID, NULL);
|
||||
|
||||
exit(0);
|
||||
exit(0);
|
||||
|
||||
err_out:
|
||||
if (shmdt((const void *)shmaddr) != 0)
|
||||
perror("Detach failure");
|
||||
shmctl(shmid, IPC_RMID, NULL);
|
||||
exit(1);
|
||||
}
|
||||
err_out:
|
||||
if (shmdt((const void *)shmaddr) != 0)
|
||||
perror("Detach failure");
|
||||
shmctl(shmid, IPC_RMID, NULL);
|
||||
exit(1);
|
||||
}
|
|
@ -1,5 +1,5 @@
|
|||
Steps for sending 'break' on sunhv console:
|
||||
===========================================
|
||||
Steps for sending 'break' on sunhv console
|
||||
==========================================
|
||||
|
||||
On Baremetal:
|
||||
1. press Esc + 'B'
|
|
@ -0,0 +1,13 @@
|
|||
:orphan:
|
||||
|
||||
==================
|
||||
Sparc Architecture
|
||||
==================
|
||||
|
||||
.. toctree::
|
||||
:maxdepth: 1
|
||||
|
||||
console
|
||||
adi
|
||||
|
||||
oradax/oracle-dax
|
|
@ -1,5 +1,6 @@
|
|||
=======================================
|
||||
Oracle Data Analytics Accelerator (DAX)
|
||||
---------------------------------------
|
||||
=======================================
|
||||
|
||||
DAX is a coprocessor which resides on the SPARC M7 (DAX1) and M8
|
||||
(DAX2) processor chips, and has direct access to the CPU's L3 caches
|
||||
|
@ -17,6 +18,7 @@ code sufficient to write user or kernel applications that use DAX
|
|||
functionality.
|
||||
|
||||
The user library is open source and available at:
|
||||
|
||||
https://oss.oracle.com/git/gitweb.cgi?p=libdax.git
|
||||
|
||||
The Hypervisor interface to the coprocessor is described in detail in
|
||||
|
@ -26,7 +28,7 @@ Specification" version 3.0.20+15, dated 2017-09-25.
|
|||
|
||||
|
||||
High Level Overview
|
||||
-------------------
|
||||
===================
|
||||
|
||||
A coprocessor request is described by a Command Control Block
|
||||
(CCB). The CCB contains an opcode and various parameters. The opcode
|
||||
|
@ -52,7 +54,7 @@ thread.
|
|||
|
||||
|
||||
Addressing Memory
|
||||
-----------------
|
||||
=================
|
||||
|
||||
The kernel does not have access to physical memory in the Sun4v
|
||||
architecture, as there is an additional level of memory virtualization
|
||||
|
@ -77,7 +79,7 @@ the request.
|
|||
|
||||
|
||||
The Driver API
|
||||
--------------
|
||||
==============
|
||||
|
||||
An application makes requests to the driver via the write() system
|
||||
call, and gets results (if any) via read(). The completion areas are
|
||||
|
@ -108,6 +110,7 @@ equal to the number of bytes given in the call. Otherwise -1 is
|
|||
returned and errno is set.
|
||||
|
||||
CCB_DEQUEUE
|
||||
-----------
|
||||
|
||||
Tells the driver to clean up resources associated with past
|
||||
requests. Since no interrupt is generated upon the completion of a
|
||||
|
@ -116,12 +119,14 @@ further status information is returned, so the user should not
|
|||
subsequently call read().
|
||||
|
||||
CCB_KILL
|
||||
--------
|
||||
|
||||
Kills a CCB during execution. The CCB is guaranteed to not continue
|
||||
executing once this call returns successfully. On success, read() must
|
||||
be called to retrieve the result of the action.
|
||||
|
||||
CCB_INFO
|
||||
--------
|
||||
|
||||
Retrieves information about a currently executing CCB. Note that some
|
||||
Hypervisors might return 'notfound' when the CCB is in 'inprogress'
|
||||
|
@ -130,6 +135,7 @@ CCB_KILL must be invoked on that CCB. Upon success, read() must be
|
|||
called to retrieve the details of the action.
|
||||
|
||||
Submission of an array of CCBs for execution
|
||||
---------------------------------------------
|
||||
|
||||
A write() whose length is a multiple of the CCB size is treated as a
|
||||
submit operation. The file offset is treated as the index of the
|
||||
|
@ -146,6 +152,7 @@ status will reflect the error caused by the first CCB that was not
|
|||
accepted, and status_data will provide additional data in some cases.
|
||||
|
||||
MMAP
|
||||
----
|
||||
|
||||
The mmap() function provides access to the completion area allocated
|
||||
in the driver. Note that the completion area is not writeable by the
|
||||
|
@ -153,7 +160,7 @@ user process, and the mmap call must not specify PROT_WRITE.
|
|||
|
||||
|
||||
Completion of a Request
|
||||
-----------------------
|
||||
=======================
|
||||
|
||||
The first byte in each completion area is the command status which is
|
||||
updated by the coprocessor hardware. Software may take advantage of
|
||||
|
@ -172,7 +179,7 @@ and resumption of execution may be just a few nanoseconds.
|
|||
|
||||
|
||||
Application Life Cycle of a DAX Submission
|
||||
------------------------------------------
|
||||
==========================================
|
||||
|
||||
- open dax device
|
||||
- call mmap() to get the completion area address
|
||||
|
@ -187,7 +194,7 @@ Application Life Cycle of a DAX Submission
|
|||
|
||||
|
||||
Memory Constraints
|
||||
------------------
|
||||
==================
|
||||
|
||||
The DAX hardware operates only on physical addresses. Therefore, it is
|
||||
not aware of virtual memory mappings and the discontiguities that may
|
||||
|
@ -226,7 +233,7 @@ CCB Structure
|
|||
-------------
|
||||
A CCB is an array of 8 64-bit words. Several of these words provide
|
||||
command opcodes, parameters, flags, etc., and the rest are addresses
|
||||
for the completion area, output buffer, and various inputs:
|
||||
for the completion area, output buffer, and various inputs::
|
||||
|
||||
struct ccb {
|
||||
u64 control;
|
||||
|
@ -252,7 +259,7 @@ The first word (control) is examined by the driver for the following:
|
|||
|
||||
|
||||
Example Code
|
||||
------------
|
||||
============
|
||||
|
||||
The DAX is accessible to both user and kernel code. The kernel code
|
||||
can make hypercalls directly while the user code must use wrappers
|
||||
|
@ -265,7 +272,7 @@ arch/sparc/include/uapi/asm/oradax.h must be included.
|
|||
|
||||
First, the proper device must be opened. For M7 it will be
|
||||
/dev/oradax1 and for M8 it will be /dev/oradax2. The simplest
|
||||
procedure is to attempt to open both, as only one will succeed:
|
||||
procedure is to attempt to open both, as only one will succeed::
|
||||
|
||||
fd = open("/dev/oradax1", O_RDWR);
|
||||
if (fd < 0)
|
||||
|
@ -273,7 +280,7 @@ procedure is to attempt to open both, as only one will succeed:
|
|||
if (fd < 0)
|
||||
/* No DAX found */
|
||||
|
||||
Next, the completion area must be mapped:
|
||||
Next, the completion area must be mapped::
|
||||
|
||||
completion_area = mmap(NULL, DAX_MMAP_LEN, PROT_READ, MAP_SHARED, fd, 0);
|
||||
|
||||
|
@ -295,7 +302,7 @@ is the input bitmap inverted.
|
|||
|
||||
For details of all the parameters and bits used in this CCB, please
|
||||
refer to section 36.2.1.3 of the DAX Hypervisor API document, which
|
||||
describes the Scan command in detail.
|
||||
describes the Scan command in detail::
|
||||
|
||||
ccb->control = /* Table 36.1, CCB Header Format */
|
||||
(2L << 48) /* command = Scan Value */
|
||||
|
@ -326,7 +333,7 @@ describes the Scan command in detail.
|
|||
|
||||
The CCB submission is a write() or pwrite() system call to the
|
||||
driver. If the call fails, then a read() must be used to retrieve the
|
||||
status:
|
||||
status::
|
||||
|
||||
if (pwrite(fd, ccb, 64, 0) != 64) {
|
||||
struct ccb_exec_result status;
|
||||
|
@ -337,7 +344,7 @@ status:
|
|||
After a successful submission of the CCB, the completion area may be
|
||||
polled to determine when the DAX is finished. Detailed information on
|
||||
the contents of the completion area can be found in section 36.2.2 of
|
||||
the DAX HV API document.
|
||||
the DAX HV API document::
|
||||
|
||||
while (1) {
|
||||
/* Monitored Load */
|
||||
|
@ -355,7 +362,7 @@ the DAX HV API document.
|
|||
A completion area status of 1 indicates successful completion of the
|
||||
CCB and validity of the output bitmap, which may be used immediately.
|
||||
All other non-zero values indicate error conditions which are
|
||||
described in section 36.2.2.
|
||||
described in section 36.2.2::
|
||||
|
||||
if (completion_area[0] != 1) { /* section 36.2.2, 1 = command ran and succeeded */
|
||||
/* completion_area[0] contains the completion status */
|
||||
|
@ -364,7 +371,7 @@ described in section 36.2.2.
|
|||
|
||||
After the completion area has been processed, the driver must be
|
||||
notified that it can release any resources associated with the
|
||||
request. This is done via the dequeue operation:
|
||||
request. This is done via the dequeue operation::
|
||||
|
||||
struct dax_command cmd;
|
||||
cmd.command = CCB_DEQUEUE;
|
||||
|
@ -375,13 +382,14 @@ request. This is done via the dequeue operation:
|
|||
Finally, normal program cleanup should be done, i.e., unmapping
|
||||
completion area, closing the dax device, freeing memory etc.
|
||||
|
||||
[Kernel example]
|
||||
Kernel example
|
||||
--------------
|
||||
|
||||
The only difference in using the DAX in kernel code is the treatment
|
||||
of the completion area. Unlike user applications which mmap the
|
||||
completion area allocated by the driver, kernel code must allocate its
|
||||
own memory to use for the completion area, and this address and its
|
||||
type must be given in the CCB:
|
||||
type must be given in the CCB::
|
||||
|
||||
ccb->control |= /* Table 36.1, CCB Header Format */
|
||||
(3L << 32); /* completion area address type = primary virtual */
|
||||
|
@ -389,9 +397,11 @@ type must be given in the CCB:
|
|||
ccb->completion = (unsigned long) completion_area; /* Completion area address */
|
||||
|
||||
The dax submit hypercall is made directly. The flags used in the
|
||||
ccb_submit call are documented in the DAX HV API in section 36.3.1.
|
||||
ccb_submit call are documented in the DAX HV API in section 36.3.1/
|
||||
|
||||
#include <asm/hypervisor.h>
|
||||
::
|
||||
|
||||
#include <asm/hypervisor.h>
|
||||
|
||||
hv_rv = sun4v_ccb_submit((unsigned long)ccb, 64,
|
||||
HV_CCB_QUERY_CMD |
|
||||
|
@ -405,7 +415,7 @@ ccb_submit call are documented in the DAX HV API in section 36.3.1.
|
|||
}
|
||||
|
||||
After the submission, the completion area polling code is identical to
|
||||
that in user land:
|
||||
that in user land::
|
||||
|
||||
while (1) {
|
||||
/* Monitored Load */
|
||||
|
@ -427,3 +437,9 @@ that in user land:
|
|||
|
||||
The output bitmap is ready for consumption immediately after the
|
||||
completion status indicates success.
|
||||
|
||||
Excer[t from UltraSPARC Virtual Machine Specification
|
||||
=====================================================
|
||||
|
||||
.. include:: dax-hv-api.txt
|
||||
:literal:
|
|
@ -194,8 +194,7 @@ static struct cpuinfo_tree *build_cpuinfo_tree(void)
|
|||
|
||||
n = enumerate_cpuinfo_nodes(tmp_level);
|
||||
|
||||
new_tree = kzalloc(sizeof(struct cpuinfo_tree) +
|
||||
(sizeof(struct cpuinfo_node) * n), GFP_ATOMIC);
|
||||
new_tree = kzalloc(struct_size(new_tree, nodes, n), GFP_ATOMIC);
|
||||
if (!new_tree)
|
||||
return NULL;
|
||||
|
||||
|
|
|
@ -29,7 +29,6 @@
|
|||
#include <linux/kdebug.h>
|
||||
|
||||
#include <asm/cacheflush.h>
|
||||
#include <linux/uaccess.h>
|
||||
|
||||
/* Compute the address of the breakpoint instruction and return it.
|
||||
*
|
||||
|
|
|
@ -2269,19 +2269,6 @@ static unsigned long last_valid_pfn;
|
|||
static void sun4u_pgprot_init(void);
|
||||
static void sun4v_pgprot_init(void);
|
||||
|
||||
static phys_addr_t __init available_memory(void)
|
||||
{
|
||||
phys_addr_t available = 0ULL;
|
||||
phys_addr_t pa_start, pa_end;
|
||||
u64 i;
|
||||
|
||||
for_each_free_mem_range(i, NUMA_NO_NODE, MEMBLOCK_NONE, &pa_start,
|
||||
&pa_end, NULL)
|
||||
available = available + (pa_end - pa_start);
|
||||
|
||||
return available;
|
||||
}
|
||||
|
||||
#define _PAGE_CACHE_4U (_PAGE_CP_4U | _PAGE_CV_4U)
|
||||
#define _PAGE_CACHE_4V (_PAGE_CP_4V | _PAGE_CV_4V)
|
||||
#define __DIRTY_BITS_4U (_PAGE_MODIFIED_4U | _PAGE_WRITE_4U | _PAGE_W_4U)
|
||||
|
@ -2295,33 +2282,8 @@ static phys_addr_t __init available_memory(void)
|
|||
*/
|
||||
static void __init reduce_memory(phys_addr_t limit_ram)
|
||||
{
|
||||
phys_addr_t avail_ram = available_memory();
|
||||
phys_addr_t pa_start, pa_end;
|
||||
u64 i;
|
||||
|
||||
if (limit_ram >= avail_ram)
|
||||
return;
|
||||
|
||||
for_each_free_mem_range(i, NUMA_NO_NODE, MEMBLOCK_NONE, &pa_start,
|
||||
&pa_end, NULL) {
|
||||
phys_addr_t region_size = pa_end - pa_start;
|
||||
phys_addr_t clip_start = pa_start;
|
||||
|
||||
avail_ram = avail_ram - region_size;
|
||||
/* Are we consuming too much? */
|
||||
if (avail_ram < limit_ram) {
|
||||
phys_addr_t give_back = limit_ram - avail_ram;
|
||||
|
||||
region_size = region_size - give_back;
|
||||
clip_start = clip_start + give_back;
|
||||
}
|
||||
|
||||
memblock_remove(clip_start, region_size);
|
||||
|
||||
if (avail_ram <= limit_ram)
|
||||
break;
|
||||
i = 0UL;
|
||||
}
|
||||
limit_ram += memblock_reserved_size();
|
||||
memblock_enforce_memory_limit(limit_ram);
|
||||
}
|
||||
|
||||
void __init paging_init(void)
|
||||
|
|
|
@ -175,16 +175,37 @@ static void iommu_flush_iotlb(iopte_t *iopte, unsigned int niopte)
|
|||
}
|
||||
}
|
||||
|
||||
static u32 iommu_get_one(struct device *dev, struct page *page, int npages)
|
||||
static dma_addr_t __sbus_iommu_map_page(struct device *dev, struct page *page,
|
||||
unsigned long offset, size_t len, bool per_page_flush)
|
||||
{
|
||||
struct iommu_struct *iommu = dev->archdata.iommu;
|
||||
int ioptex;
|
||||
iopte_t *iopte, *iopte0;
|
||||
phys_addr_t paddr = page_to_phys(page) + offset;
|
||||
unsigned long off = paddr & ~PAGE_MASK;
|
||||
unsigned long npages = (off + len + PAGE_SIZE - 1) >> PAGE_SHIFT;
|
||||
unsigned long pfn = __phys_to_pfn(paddr);
|
||||
unsigned int busa, busa0;
|
||||
int i;
|
||||
iopte_t *iopte, *iopte0;
|
||||
int ioptex, i;
|
||||
|
||||
/* XXX So what is maxphys for us and how do drivers know it? */
|
||||
if (!len || len > 256 * 1024)
|
||||
return DMA_MAPPING_ERROR;
|
||||
|
||||
/*
|
||||
* We expect unmapped highmem pages to be not in the cache.
|
||||
* XXX Is this a good assumption?
|
||||
* XXX What if someone else unmaps it here and races us?
|
||||
*/
|
||||
if (per_page_flush && !PageHighMem(page)) {
|
||||
unsigned long vaddr, p;
|
||||
|
||||
vaddr = (unsigned long)page_address(page) + offset;
|
||||
for (p = vaddr & PAGE_MASK; p < vaddr + len; p += PAGE_SIZE)
|
||||
flush_page_for_dma(p);
|
||||
}
|
||||
|
||||
/* page color = pfn of page */
|
||||
ioptex = bit_map_string_get(&iommu->usemap, npages, page_to_pfn(page));
|
||||
ioptex = bit_map_string_get(&iommu->usemap, npages, pfn);
|
||||
if (ioptex < 0)
|
||||
panic("iommu out");
|
||||
busa0 = iommu->start + (ioptex << PAGE_SHIFT);
|
||||
|
@ -193,29 +214,15 @@ static u32 iommu_get_one(struct device *dev, struct page *page, int npages)
|
|||
busa = busa0;
|
||||
iopte = iopte0;
|
||||
for (i = 0; i < npages; i++) {
|
||||
iopte_val(*iopte) = MKIOPTE(page_to_pfn(page), IOPERM);
|
||||
iopte_val(*iopte) = MKIOPTE(pfn, IOPERM);
|
||||
iommu_invalidate_page(iommu->regs, busa);
|
||||
busa += PAGE_SIZE;
|
||||
iopte++;
|
||||
page++;
|
||||
pfn++;
|
||||
}
|
||||
|
||||
iommu_flush_iotlb(iopte0, npages);
|
||||
|
||||
return busa0;
|
||||
}
|
||||
|
||||
static dma_addr_t __sbus_iommu_map_page(struct device *dev, struct page *page,
|
||||
unsigned long offset, size_t len)
|
||||
{
|
||||
void *vaddr = page_address(page) + offset;
|
||||
unsigned long off = (unsigned long)vaddr & ~PAGE_MASK;
|
||||
unsigned long npages = (off + len + PAGE_SIZE - 1) >> PAGE_SHIFT;
|
||||
|
||||
/* XXX So what is maxphys for us and how do drivers know it? */
|
||||
if (!len || len > 256 * 1024)
|
||||
return DMA_MAPPING_ERROR;
|
||||
return iommu_get_one(dev, virt_to_page(vaddr), npages) + off;
|
||||
return busa0 + off;
|
||||
}
|
||||
|
||||
static dma_addr_t sbus_iommu_map_page_gflush(struct device *dev,
|
||||
|
@ -223,81 +230,58 @@ static dma_addr_t sbus_iommu_map_page_gflush(struct device *dev,
|
|||
enum dma_data_direction dir, unsigned long attrs)
|
||||
{
|
||||
flush_page_for_dma(0);
|
||||
return __sbus_iommu_map_page(dev, page, offset, len);
|
||||
return __sbus_iommu_map_page(dev, page, offset, len, false);
|
||||
}
|
||||
|
||||
static dma_addr_t sbus_iommu_map_page_pflush(struct device *dev,
|
||||
struct page *page, unsigned long offset, size_t len,
|
||||
enum dma_data_direction dir, unsigned long attrs)
|
||||
{
|
||||
void *vaddr = page_address(page) + offset;
|
||||
unsigned long p = ((unsigned long)vaddr) & PAGE_MASK;
|
||||
return __sbus_iommu_map_page(dev, page, offset, len, true);
|
||||
}
|
||||
|
||||
while (p < (unsigned long)vaddr + len) {
|
||||
flush_page_for_dma(p);
|
||||
p += PAGE_SIZE;
|
||||
static int __sbus_iommu_map_sg(struct device *dev, struct scatterlist *sgl,
|
||||
int nents, enum dma_data_direction dir, unsigned long attrs,
|
||||
bool per_page_flush)
|
||||
{
|
||||
struct scatterlist *sg;
|
||||
int j;
|
||||
|
||||
for_each_sg(sgl, sg, nents, j) {
|
||||
sg->dma_address =__sbus_iommu_map_page(dev, sg_page(sg),
|
||||
sg->offset, sg->length, per_page_flush);
|
||||
if (sg->dma_address == DMA_MAPPING_ERROR)
|
||||
return 0;
|
||||
sg->dma_length = sg->length;
|
||||
}
|
||||
|
||||
return __sbus_iommu_map_page(dev, page, offset, len);
|
||||
return nents;
|
||||
}
|
||||
|
||||
static int sbus_iommu_map_sg_gflush(struct device *dev, struct scatterlist *sgl,
|
||||
int nents, enum dma_data_direction dir, unsigned long attrs)
|
||||
{
|
||||
struct scatterlist *sg;
|
||||
int i, n;
|
||||
|
||||
flush_page_for_dma(0);
|
||||
|
||||
for_each_sg(sgl, sg, nents, i) {
|
||||
n = (sg->length + sg->offset + PAGE_SIZE-1) >> PAGE_SHIFT;
|
||||
sg->dma_address = iommu_get_one(dev, sg_page(sg), n) + sg->offset;
|
||||
sg->dma_length = sg->length;
|
||||
}
|
||||
|
||||
return nents;
|
||||
return __sbus_iommu_map_sg(dev, sgl, nents, dir, attrs, false);
|
||||
}
|
||||
|
||||
static int sbus_iommu_map_sg_pflush(struct device *dev, struct scatterlist *sgl,
|
||||
int nents, enum dma_data_direction dir, unsigned long attrs)
|
||||
{
|
||||
unsigned long page, oldpage = 0;
|
||||
struct scatterlist *sg;
|
||||
int i, j, n;
|
||||
|
||||
for_each_sg(sgl, sg, nents, j) {
|
||||
n = (sg->length + sg->offset + PAGE_SIZE-1) >> PAGE_SHIFT;
|
||||
|
||||
/*
|
||||
* We expect unmapped highmem pages to be not in the cache.
|
||||
* XXX Is this a good assumption?
|
||||
* XXX What if someone else unmaps it here and races us?
|
||||
*/
|
||||
if ((page = (unsigned long) page_address(sg_page(sg))) != 0) {
|
||||
for (i = 0; i < n; i++) {
|
||||
if (page != oldpage) { /* Already flushed? */
|
||||
flush_page_for_dma(page);
|
||||
oldpage = page;
|
||||
}
|
||||
page += PAGE_SIZE;
|
||||
}
|
||||
}
|
||||
|
||||
sg->dma_address = iommu_get_one(dev, sg_page(sg), n) + sg->offset;
|
||||
sg->dma_length = sg->length;
|
||||
}
|
||||
|
||||
return nents;
|
||||
return __sbus_iommu_map_sg(dev, sgl, nents, dir, attrs, true);
|
||||
}
|
||||
|
||||
static void iommu_release_one(struct device *dev, u32 busa, int npages)
|
||||
static void sbus_iommu_unmap_page(struct device *dev, dma_addr_t dma_addr,
|
||||
size_t len, enum dma_data_direction dir, unsigned long attrs)
|
||||
{
|
||||
struct iommu_struct *iommu = dev->archdata.iommu;
|
||||
int ioptex;
|
||||
int i;
|
||||
unsigned int busa = dma_addr & PAGE_MASK;
|
||||
unsigned long off = dma_addr & ~PAGE_MASK;
|
||||
unsigned int npages = (off + len + PAGE_SIZE-1) >> PAGE_SHIFT;
|
||||
unsigned int ioptex = (busa - iommu->start) >> PAGE_SHIFT;
|
||||
unsigned int i;
|
||||
|
||||
BUG_ON(busa < iommu->start);
|
||||
ioptex = (busa - iommu->start) >> PAGE_SHIFT;
|
||||
for (i = 0; i < npages; i++) {
|
||||
iopte_val(iommu->page_table[ioptex + i]) = 0;
|
||||
iommu_invalidate_page(iommu->regs, busa);
|
||||
|
@ -306,25 +290,15 @@ static void iommu_release_one(struct device *dev, u32 busa, int npages)
|
|||
bit_map_clear(&iommu->usemap, ioptex, npages);
|
||||
}
|
||||
|
||||
static void sbus_iommu_unmap_page(struct device *dev, dma_addr_t dma_addr,
|
||||
size_t len, enum dma_data_direction dir, unsigned long attrs)
|
||||
{
|
||||
unsigned long off = dma_addr & ~PAGE_MASK;
|
||||
int npages;
|
||||
|
||||
npages = (off + len + PAGE_SIZE-1) >> PAGE_SHIFT;
|
||||
iommu_release_one(dev, dma_addr & PAGE_MASK, npages);
|
||||
}
|
||||
|
||||
static void sbus_iommu_unmap_sg(struct device *dev, struct scatterlist *sgl,
|
||||
int nents, enum dma_data_direction dir, unsigned long attrs)
|
||||
{
|
||||
struct scatterlist *sg;
|
||||
int i, n;
|
||||
int i;
|
||||
|
||||
for_each_sg(sgl, sg, nents, i) {
|
||||
n = (sg->length + sg->offset + PAGE_SIZE-1) >> PAGE_SHIFT;
|
||||
iommu_release_one(dev, sg->dma_address & PAGE_MASK, n);
|
||||
sbus_iommu_unmap_page(dev, sg->dma_address, sg->length, dir,
|
||||
attrs);
|
||||
sg->dma_address = 0x21212121;
|
||||
}
|
||||
}
|
||||
|
|
|
@ -68,7 +68,7 @@ CFLAGS_REMOVE_vdso-note.o = -pg
|
|||
CFLAGS_REMOVE_vclock_gettime.o = -pg
|
||||
|
||||
$(obj)/%.so: OBJCOPYFLAGS := -S
|
||||
$(obj)/%.so: $(obj)/%.so.dbg
|
||||
$(obj)/%.so: $(obj)/%.so.dbg FORCE
|
||||
$(call if_changed,objcopy)
|
||||
|
||||
CPPFLAGS_vdso32.lds = $(CPPFLAGS_vdso.lds)
|
||||
|
|
|
@ -30,7 +30,7 @@
|
|||
* the recommended way for applications to use the coprocessor, and
|
||||
* the driver interface is not intended for general use.
|
||||
*
|
||||
* See Documentation/sparc/oradax/oracle-dax.txt for more details.
|
||||
* See Documentation/sparc/oradax/oracle-dax.rst for more details.
|
||||
*/
|
||||
|
||||
#include <linux/uaccess.h>
|
||||
|
|
Loading…
Reference in New Issue