Merge branch 'next' of git://git.monstr.eu/linux-2.6-microblaze
* 'next' of git://git.monstr.eu/linux-2.6-microblaze: microblaze: Remove __ARCH_WANT_INTERRUPTS_ON_CTXSW usage microblaze: Use delay slot in __strnlen_user, __strncpy_user microblaze: Remove NET_IP_ALIGN from system.h microblaze: Add __ucmpdi2() helper function microblaze: Raise SIGFPE/FPE_INTDIV for div by zero microblaze: Switch ELF_ARCH code to 189 microblaze: Added DMA sync operations microblaze: Moved __dma_sync() to dma-mapping.h microblaze: Add PVR for Microblaze v8.20.a microblaze: Fix access_ok macro microblaze: Add loop unrolling for PAGE in copy_tofrom_user microblaze: Simplify logic for unaligned byte copying microblaze: Change label names - copy_tofrom_user microblaze: Separate fixup section definition microblaze: Change label name in copy_tofrom_user microblaze: Clear top bit from cnt32_to_63
This commit is contained in:
commit
b1c907f3b2
|
@ -28,12 +28,12 @@
|
|||
#include <linux/dma-attrs.h>
|
||||
#include <asm/io.h>
|
||||
#include <asm-generic/dma-coherent.h>
|
||||
#include <asm/cacheflush.h>
|
||||
|
||||
#define DMA_ERROR_CODE (~(dma_addr_t)0x0)
|
||||
|
||||
#define __dma_alloc_coherent(dev, gfp, size, handle) NULL
|
||||
#define __dma_free_coherent(size, addr) ((void)0)
|
||||
#define __dma_sync(addr, size, rw) ((void)0)
|
||||
|
||||
static inline unsigned long device_to_mask(struct device *dev)
|
||||
{
|
||||
|
@ -95,6 +95,22 @@ static inline int dma_set_mask(struct device *dev, u64 dma_mask)
|
|||
|
||||
#include <asm-generic/dma-mapping-common.h>
|
||||
|
||||
static inline void __dma_sync(unsigned long paddr,
|
||||
size_t size, enum dma_data_direction direction)
|
||||
{
|
||||
switch (direction) {
|
||||
case DMA_TO_DEVICE:
|
||||
case DMA_BIDIRECTIONAL:
|
||||
flush_dcache_range(paddr, paddr + size);
|
||||
break;
|
||||
case DMA_FROM_DEVICE:
|
||||
invalidate_dcache_range(paddr, paddr + size);
|
||||
break;
|
||||
default:
|
||||
BUG();
|
||||
}
|
||||
}
|
||||
|
||||
static inline int dma_mapping_error(struct device *dev, dma_addr_t dma_addr)
|
||||
{
|
||||
struct dma_map_ops *ops = get_dma_ops(dev);
|
||||
|
@ -135,7 +151,7 @@ static inline void dma_cache_sync(struct device *dev, void *vaddr, size_t size,
|
|||
enum dma_data_direction direction)
|
||||
{
|
||||
BUG_ON(direction == DMA_NONE);
|
||||
__dma_sync(vaddr, size, (int)direction);
|
||||
__dma_sync(virt_to_phys(vaddr), size, (int)direction);
|
||||
}
|
||||
|
||||
#endif /* _ASM_MICROBLAZE_DMA_MAPPING_H */
|
||||
|
|
|
@ -16,13 +16,15 @@
|
|||
* I've snaffled the value from the microblaze binutils source code
|
||||
* /binutils/microblaze/include/elf/microblaze.h
|
||||
*/
|
||||
#define EM_XILINX_MICROBLAZE 0xbaab
|
||||
#define ELF_ARCH EM_XILINX_MICROBLAZE
|
||||
#define EM_MICROBLAZE 189
|
||||
#define EM_MICROBLAZE_OLD 0xbaab
|
||||
#define ELF_ARCH EM_MICROBLAZE
|
||||
|
||||
/*
|
||||
* This is used to ensure we don't load something for the wrong architecture.
|
||||
*/
|
||||
#define elf_check_arch(x) ((x)->e_machine == EM_XILINX_MICROBLAZE)
|
||||
#define elf_check_arch(x) ((x)->e_machine == EM_MICROBLAZE \
|
||||
|| (x)->e_machine == EM_MICROBLAZE_OLD)
|
||||
|
||||
/*
|
||||
* These are used to set parameters in the core dumps.
|
||||
|
|
|
@ -17,8 +17,6 @@
|
|||
#include <asm-generic/cmpxchg.h>
|
||||
#include <asm-generic/cmpxchg-local.h>
|
||||
|
||||
#define __ARCH_WANT_INTERRUPTS_ON_CTXSW
|
||||
|
||||
struct task_struct;
|
||||
struct thread_info;
|
||||
|
||||
|
@ -96,11 +94,4 @@ extern struct dentry *of_debugfs_root;
|
|||
|
||||
#define arch_align_stack(x) (x)
|
||||
|
||||
/*
|
||||
* MicroBlaze doesn't handle unaligned accesses in hardware.
|
||||
*
|
||||
* Based on this we force the IP header alignment in network drivers.
|
||||
*/
|
||||
#define NET_IP_ALIGN 2
|
||||
|
||||
#endif /* _ASM_MICROBLAZE_SYSTEM_H */
|
||||
|
|
|
@ -95,7 +95,7 @@ static inline int ___range_ok(unsigned long addr, unsigned long size)
|
|||
* - "addr", "addr + size" and "size" are all below the limit
|
||||
*/
|
||||
#define access_ok(type, addr, size) \
|
||||
(get_fs().seg > (((unsigned long)(addr)) | \
|
||||
(get_fs().seg >= (((unsigned long)(addr)) | \
|
||||
(size) | ((unsigned long)(addr) + (size))))
|
||||
|
||||
/* || printk("access_ok failed for %s at 0x%08lx (size %d), seg 0x%08x\n",
|
||||
|
|
|
@ -34,6 +34,7 @@ const struct cpu_ver_key cpu_ver_lookup[] = {
|
|||
{"8.00.a", 0x12},
|
||||
{"8.00.b", 0x13},
|
||||
{"8.10.a", 0x14},
|
||||
{"8.20.a", 0x15},
|
||||
{NULL, 0},
|
||||
};
|
||||
|
||||
|
|
|
@ -11,7 +11,6 @@
|
|||
#include <linux/gfp.h>
|
||||
#include <linux/dma-debug.h>
|
||||
#include <asm/bug.h>
|
||||
#include <asm/cacheflush.h>
|
||||
|
||||
/*
|
||||
* Generic direct DMA implementation
|
||||
|
@ -21,21 +20,6 @@
|
|||
* can set archdata.dma_data to an unsigned long holding the offset. By
|
||||
* default the offset is PCI_DRAM_OFFSET.
|
||||
*/
|
||||
static inline void __dma_sync_page(unsigned long paddr, unsigned long offset,
|
||||
size_t size, enum dma_data_direction direction)
|
||||
{
|
||||
switch (direction) {
|
||||
case DMA_TO_DEVICE:
|
||||
case DMA_BIDIRECTIONAL:
|
||||
flush_dcache_range(paddr + offset, paddr + offset + size);
|
||||
break;
|
||||
case DMA_FROM_DEVICE:
|
||||
invalidate_dcache_range(paddr + offset, paddr + offset + size);
|
||||
break;
|
||||
default:
|
||||
BUG();
|
||||
}
|
||||
}
|
||||
|
||||
static unsigned long get_dma_direct_offset(struct device *dev)
|
||||
{
|
||||
|
@ -91,7 +75,7 @@ static int dma_direct_map_sg(struct device *dev, struct scatterlist *sgl,
|
|||
/* FIXME this part of code is untested */
|
||||
for_each_sg(sgl, sg, nents, i) {
|
||||
sg->dma_address = sg_phys(sg) + get_dma_direct_offset(dev);
|
||||
__dma_sync_page(page_to_phys(sg_page(sg)), sg->offset,
|
||||
__dma_sync(page_to_phys(sg_page(sg)) + sg->offset,
|
||||
sg->length, direction);
|
||||
}
|
||||
|
||||
|
@ -116,7 +100,7 @@ static inline dma_addr_t dma_direct_map_page(struct device *dev,
|
|||
enum dma_data_direction direction,
|
||||
struct dma_attrs *attrs)
|
||||
{
|
||||
__dma_sync_page(page_to_phys(page), offset, size, direction);
|
||||
__dma_sync(page_to_phys(page) + offset, size, direction);
|
||||
return page_to_phys(page) + offset + get_dma_direct_offset(dev);
|
||||
}
|
||||
|
||||
|
@ -131,7 +115,63 @@ static inline void dma_direct_unmap_page(struct device *dev,
|
|||
* phys_to_virt is here because in __dma_sync_page is __virt_to_phys and
|
||||
* dma_address is physical address
|
||||
*/
|
||||
__dma_sync_page(dma_address, 0 , size, direction);
|
||||
__dma_sync(dma_address, size, direction);
|
||||
}
|
||||
|
||||
static inline void
|
||||
dma_direct_sync_single_for_cpu(struct device *dev,
|
||||
dma_addr_t dma_handle, size_t size,
|
||||
enum dma_data_direction direction)
|
||||
{
|
||||
/*
|
||||
* It's pointless to flush the cache as the memory segment
|
||||
* is given to the CPU
|
||||
*/
|
||||
|
||||
if (direction == DMA_FROM_DEVICE)
|
||||
__dma_sync(dma_handle, size, direction);
|
||||
}
|
||||
|
||||
static inline void
|
||||
dma_direct_sync_single_for_device(struct device *dev,
|
||||
dma_addr_t dma_handle, size_t size,
|
||||
enum dma_data_direction direction)
|
||||
{
|
||||
/*
|
||||
* It's pointless to invalidate the cache if the device isn't
|
||||
* supposed to write to the relevant region
|
||||
*/
|
||||
|
||||
if (direction == DMA_TO_DEVICE)
|
||||
__dma_sync(dma_handle, size, direction);
|
||||
}
|
||||
|
||||
static inline void
|
||||
dma_direct_sync_sg_for_cpu(struct device *dev,
|
||||
struct scatterlist *sgl, int nents,
|
||||
enum dma_data_direction direction)
|
||||
{
|
||||
struct scatterlist *sg;
|
||||
int i;
|
||||
|
||||
/* FIXME this part of code is untested */
|
||||
if (direction == DMA_FROM_DEVICE)
|
||||
for_each_sg(sgl, sg, nents, i)
|
||||
__dma_sync(sg->dma_address, sg->length, direction);
|
||||
}
|
||||
|
||||
static inline void
|
||||
dma_direct_sync_sg_for_device(struct device *dev,
|
||||
struct scatterlist *sgl, int nents,
|
||||
enum dma_data_direction direction)
|
||||
{
|
||||
struct scatterlist *sg;
|
||||
int i;
|
||||
|
||||
/* FIXME this part of code is untested */
|
||||
if (direction == DMA_TO_DEVICE)
|
||||
for_each_sg(sgl, sg, nents, i)
|
||||
__dma_sync(sg->dma_address, sg->length, direction);
|
||||
}
|
||||
|
||||
struct dma_map_ops dma_direct_ops = {
|
||||
|
@ -142,6 +182,10 @@ struct dma_map_ops dma_direct_ops = {
|
|||
.dma_supported = dma_direct_dma_supported,
|
||||
.map_page = dma_direct_map_page,
|
||||
.unmap_page = dma_direct_unmap_page,
|
||||
.sync_single_for_cpu = dma_direct_sync_single_for_cpu,
|
||||
.sync_single_for_device = dma_direct_sync_single_for_device,
|
||||
.sync_sg_for_cpu = dma_direct_sync_sg_for_cpu,
|
||||
.sync_sg_for_device = dma_direct_sync_sg_for_device,
|
||||
};
|
||||
EXPORT_SYMBOL(dma_direct_ops);
|
||||
|
||||
|
|
|
@ -119,7 +119,7 @@ asmlinkage void full_exception(struct pt_regs *regs, unsigned int type,
|
|||
case MICROBLAZE_DIV_ZERO_EXCEPTION:
|
||||
if (user_mode(regs)) {
|
||||
pr_debug("Divide by zero exception in user mode\n");
|
||||
_exception(SIGILL, regs, FPE_INTDIV, addr);
|
||||
_exception(SIGFPE, regs, FPE_INTDIV, addr);
|
||||
return;
|
||||
}
|
||||
printk(KERN_WARNING "Divide by zero exception " \
|
||||
|
|
|
@ -179,6 +179,7 @@ int copy_thread(unsigned long clone_flags, unsigned long usp,
|
|||
|
||||
ti->cpu_context.msr = (childregs->msr|MSR_VM);
|
||||
ti->cpu_context.msr &= ~MSR_UMS; /* switch_to to kernel mode */
|
||||
ti->cpu_context.msr &= ~MSR_IE;
|
||||
#endif
|
||||
ti->cpu_context.r15 = (unsigned long)ret_from_fork - 8;
|
||||
|
||||
|
|
|
@ -148,7 +148,7 @@ asmlinkage long do_syscall_trace_enter(struct pt_regs *regs)
|
|||
ret = -1L;
|
||||
|
||||
if (unlikely(current->audit_context))
|
||||
audit_syscall_entry(EM_XILINX_MICROBLAZE, regs->r12,
|
||||
audit_syscall_entry(EM_MICROBLAZE, regs->r12,
|
||||
regs->r5, regs->r6,
|
||||
regs->r7, regs->r8);
|
||||
|
||||
|
|
|
@ -308,7 +308,8 @@ unsigned long long notrace sched_clock(void)
|
|||
{
|
||||
if (timer_initialized) {
|
||||
struct clocksource *cs = &clocksource_microblaze;
|
||||
cycle_t cyc = cnt32_to_63(cs->read(NULL));
|
||||
|
||||
cycle_t cyc = cnt32_to_63(cs->read(NULL)) & LLONG_MAX;
|
||||
return clocksource_cyc2ns(cyc, cs->mult, cs->shift);
|
||||
}
|
||||
return 0;
|
||||
|
|
|
@ -25,5 +25,6 @@ lib-y += lshrdi3.o
|
|||
lib-y += modsi3.o
|
||||
lib-y += muldi3.o
|
||||
lib-y += mulsi3.o
|
||||
lib-y += ucmpdi2.o
|
||||
lib-y += udivsi3.o
|
||||
lib-y += umodsi3.o
|
||||
|
|
|
@ -10,6 +10,7 @@
|
|||
|
||||
#include <linux/errno.h>
|
||||
#include <linux/linkage.h>
|
||||
#include <asm/page.h>
|
||||
|
||||
/*
|
||||
* int __strncpy_user(char *to, char *from, int len);
|
||||
|
@ -33,8 +34,8 @@ __strncpy_user:
|
|||
* r3 - temp count
|
||||
* r4 - temp val
|
||||
*/
|
||||
beqid r7,3f
|
||||
addik r3,r7,0 /* temp_count = len */
|
||||
beqi r3,3f
|
||||
1:
|
||||
lbu r4,r6,r0
|
||||
sb r4,r5,r0
|
||||
|
@ -76,8 +77,8 @@ __strncpy_user:
|
|||
.type __strnlen_user, @function
|
||||
.align 4;
|
||||
__strnlen_user:
|
||||
beqid r6,3f
|
||||
addik r3,r6,0
|
||||
beqi r3,3f
|
||||
1:
|
||||
lbu r4,r5,r0
|
||||
beqid r4,2f /* break on NUL */
|
||||
|
@ -102,6 +103,49 @@ __strnlen_user:
|
|||
.section __ex_table,"a"
|
||||
.word 1b,4b
|
||||
|
||||
/* Loop unrolling for __copy_tofrom_user */
|
||||
#define COPY(offset) \
|
||||
1: lwi r4 , r6, 0x0000 + offset; \
|
||||
2: lwi r19, r6, 0x0004 + offset; \
|
||||
3: lwi r20, r6, 0x0008 + offset; \
|
||||
4: lwi r21, r6, 0x000C + offset; \
|
||||
5: lwi r22, r6, 0x0010 + offset; \
|
||||
6: lwi r23, r6, 0x0014 + offset; \
|
||||
7: lwi r24, r6, 0x0018 + offset; \
|
||||
8: lwi r25, r6, 0x001C + offset; \
|
||||
9: swi r4 , r5, 0x0000 + offset; \
|
||||
10: swi r19, r5, 0x0004 + offset; \
|
||||
11: swi r20, r5, 0x0008 + offset; \
|
||||
12: swi r21, r5, 0x000C + offset; \
|
||||
13: swi r22, r5, 0x0010 + offset; \
|
||||
14: swi r23, r5, 0x0014 + offset; \
|
||||
15: swi r24, r5, 0x0018 + offset; \
|
||||
16: swi r25, r5, 0x001C + offset; \
|
||||
.section __ex_table,"a"; \
|
||||
.word 1b, 0f; \
|
||||
.word 2b, 0f; \
|
||||
.word 3b, 0f; \
|
||||
.word 4b, 0f; \
|
||||
.word 5b, 0f; \
|
||||
.word 6b, 0f; \
|
||||
.word 7b, 0f; \
|
||||
.word 8b, 0f; \
|
||||
.word 9b, 0f; \
|
||||
.word 10b, 0f; \
|
||||
.word 11b, 0f; \
|
||||
.word 12b, 0f; \
|
||||
.word 13b, 0f; \
|
||||
.word 14b, 0f; \
|
||||
.word 15b, 0f; \
|
||||
.word 16b, 0f; \
|
||||
.text
|
||||
|
||||
#define COPY_80(offset) \
|
||||
COPY(0x00 + offset);\
|
||||
COPY(0x20 + offset);\
|
||||
COPY(0x40 + offset);\
|
||||
COPY(0x60 + offset);
|
||||
|
||||
/*
|
||||
* int __copy_tofrom_user(char *to, char *from, int len)
|
||||
* Return:
|
||||
|
@ -119,34 +163,79 @@ __copy_tofrom_user:
|
|||
* r7, r3 - count
|
||||
* r4 - tempval
|
||||
*/
|
||||
beqid r7, 3f /* zero size is not likely */
|
||||
andi r3, r7, 0x3 /* filter add count */
|
||||
bneid r3, 4f /* if is odd value then byte copying */
|
||||
beqid r7, 0f /* zero size is not likely */
|
||||
or r3, r5, r6 /* find if is any to/from unaligned */
|
||||
andi r3, r3, 0x3 /* mask unaligned */
|
||||
bneid r3, 1f /* it is unaligned -> then jump */
|
||||
or r3, r3, r7 /* find if count is unaligned */
|
||||
andi r3, r3, 0x3 /* mask last 3 bits */
|
||||
bneid r3, bu1 /* if r3 is not zero then byte copying */
|
||||
or r3, r0, r0
|
||||
|
||||
/* at least one 4 byte copy */
|
||||
5: lw r4, r6, r3
|
||||
6: sw r4, r5, r3
|
||||
rsubi r3, r7, PAGE_SIZE /* detect PAGE_SIZE */
|
||||
beqid r3, page;
|
||||
or r3, r0, r0
|
||||
|
||||
w1: lw r4, r6, r3 /* at least one 4 byte copy */
|
||||
w2: sw r4, r5, r3
|
||||
addik r7, r7, -4
|
||||
bneid r7, 5b
|
||||
bneid r7, w1
|
||||
addik r3, r3, 4
|
||||
addik r3, r7, 0
|
||||
rtsd r15, 8
|
||||
nop
|
||||
4: or r3, r0, r0
|
||||
1: lbu r4,r6,r3
|
||||
2: sb r4,r5,r3
|
||||
|
||||
.section __ex_table,"a"
|
||||
.word w1, 0f;
|
||||
.word w2, 0f;
|
||||
.text
|
||||
|
||||
.align 4 /* Alignment is important to keep icache happy */
|
||||
page: /* Create room on stack and save registers for storign values */
|
||||
addik r1, r1, -32
|
||||
swi r19, r1, 4
|
||||
swi r20, r1, 8
|
||||
swi r21, r1, 12
|
||||
swi r22, r1, 16
|
||||
swi r23, r1, 20
|
||||
swi r24, r1, 24
|
||||
swi r25, r1, 28
|
||||
loop: /* r4, r19, r20, r21, r22, r23, r24, r25 are used for storing values */
|
||||
/* Loop unrolling to get performance boost */
|
||||
COPY_80(0x000);
|
||||
COPY_80(0x080);
|
||||
COPY_80(0x100);
|
||||
COPY_80(0x180);
|
||||
/* copy loop */
|
||||
addik r6, r6, 0x200
|
||||
addik r7, r7, -0x200
|
||||
bneid r7, loop
|
||||
addik r5, r5, 0x200
|
||||
/* Restore register content */
|
||||
lwi r19, r1, 4
|
||||
lwi r20, r1, 8
|
||||
lwi r21, r1, 12
|
||||
lwi r22, r1, 16
|
||||
lwi r23, r1, 20
|
||||
lwi r24, r1, 24
|
||||
lwi r25, r1, 28
|
||||
addik r1, r1, 32
|
||||
/* return back */
|
||||
addik r3, r7, 0
|
||||
rtsd r15, 8
|
||||
nop
|
||||
|
||||
.align 4 /* Alignment is important to keep icache happy */
|
||||
bu1: lbu r4,r6,r3
|
||||
bu2: sb r4,r5,r3
|
||||
addik r7,r7,-1
|
||||
bneid r7,1b
|
||||
bneid r7,bu1
|
||||
addik r3,r3,1 /* delay slot */
|
||||
3:
|
||||
0:
|
||||
addik r3,r7,0
|
||||
rtsd r15,8
|
||||
nop
|
||||
.size __copy_tofrom_user, . - __copy_tofrom_user
|
||||
|
||||
.section __ex_table,"a"
|
||||
.word 1b,3b,2b,3b,5b,3b,6b,3b
|
||||
.word bu1, 0b;
|
||||
.word bu2, 0b;
|
||||
.text
|
||||
|
|
|
@ -0,0 +1,20 @@
|
|||
#include <linux/module.h>
|
||||
|
||||
#include "libgcc.h"
|
||||
|
||||
word_type __ucmpdi2(unsigned long long a, unsigned long long b)
|
||||
{
|
||||
const DWunion au = {.ll = a};
|
||||
const DWunion bu = {.ll = b};
|
||||
|
||||
if ((unsigned int) au.s.high < (unsigned int) bu.s.high)
|
||||
return 0;
|
||||
else if ((unsigned int) au.s.high > (unsigned int) bu.s.high)
|
||||
return 2;
|
||||
if ((unsigned int) au.s.low < (unsigned int) bu.s.low)
|
||||
return 0;
|
||||
else if ((unsigned int) au.s.low > (unsigned int) bu.s.low)
|
||||
return 2;
|
||||
return 1;
|
||||
}
|
||||
EXPORT_SYMBOL(__ucmpdi2);
|
Loading…
Reference in New Issue