Merge branch 'next' of git://git.monstr.eu/linux-2.6-microblaze

* 'next' of git://git.monstr.eu/linux-2.6-microblaze:
  microblaze: Remove __ARCH_WANT_INTERRUPTS_ON_CTXSW usage
  microblaze: Use delay slot in __strnlen_user, __strncpy_user
  microblaze: Remove NET_IP_ALIGN from system.h
  microblaze: Add __ucmpdi2() helper function
  microblaze: Raise SIGFPE/FPE_INTDIV for div by zero
  microblaze: Switch ELF_ARCH code to 189
  microblaze: Added DMA sync operations
  microblaze: Moved __dma_sync() to dma-mapping.h
  microblaze: Add PVR for Microblaze v8.20.a
  microblaze: Fix access_ok macro
  microblaze: Add loop unrolling for PAGE in copy_tofrom_user
  microblaze: Simplify logic for unaligned byte copying
  microblaze: Change label names - copy_tofrom_user
  microblaze: Separate fixup section definition
  microblaze: Change label name in copy_tofrom_user
  microblaze: Clear top bit from cnt32_to_63
This commit is contained in:
Linus Torvalds 2011-10-31 16:13:44 -07:00
commit b1c907f3b2
13 changed files with 220 additions and 54 deletions

View File

@ -28,12 +28,12 @@
#include <linux/dma-attrs.h>
#include <asm/io.h>
#include <asm-generic/dma-coherent.h>
#include <asm/cacheflush.h>
#define DMA_ERROR_CODE (~(dma_addr_t)0x0)
#define __dma_alloc_coherent(dev, gfp, size, handle) NULL
#define __dma_free_coherent(size, addr) ((void)0)
#define __dma_sync(addr, size, rw) ((void)0)
static inline unsigned long device_to_mask(struct device *dev)
{
@ -95,6 +95,22 @@ static inline int dma_set_mask(struct device *dev, u64 dma_mask)
#include <asm-generic/dma-mapping-common.h>
static inline void __dma_sync(unsigned long paddr,
size_t size, enum dma_data_direction direction)
{
switch (direction) {
case DMA_TO_DEVICE:
case DMA_BIDIRECTIONAL:
flush_dcache_range(paddr, paddr + size);
break;
case DMA_FROM_DEVICE:
invalidate_dcache_range(paddr, paddr + size);
break;
default:
BUG();
}
}
static inline int dma_mapping_error(struct device *dev, dma_addr_t dma_addr)
{
struct dma_map_ops *ops = get_dma_ops(dev);
@ -135,7 +151,7 @@ static inline void dma_cache_sync(struct device *dev, void *vaddr, size_t size,
enum dma_data_direction direction)
{
BUG_ON(direction == DMA_NONE);
__dma_sync(vaddr, size, (int)direction);
__dma_sync(virt_to_phys(vaddr), size, (int)direction);
}
#endif /* _ASM_MICROBLAZE_DMA_MAPPING_H */

View File

@ -16,13 +16,15 @@
* I've snaffled the value from the microblaze binutils source code
* /binutils/microblaze/include/elf/microblaze.h
*/
#define EM_XILINX_MICROBLAZE 0xbaab
#define ELF_ARCH EM_XILINX_MICROBLAZE
#define EM_MICROBLAZE 189
#define EM_MICROBLAZE_OLD 0xbaab
#define ELF_ARCH EM_MICROBLAZE
/*
* This is used to ensure we don't load something for the wrong architecture.
*/
#define elf_check_arch(x) ((x)->e_machine == EM_XILINX_MICROBLAZE)
#define elf_check_arch(x) ((x)->e_machine == EM_MICROBLAZE \
|| (x)->e_machine == EM_MICROBLAZE_OLD)
/*
* These are used to set parameters in the core dumps.

View File

@ -17,8 +17,6 @@
#include <asm-generic/cmpxchg.h>
#include <asm-generic/cmpxchg-local.h>
#define __ARCH_WANT_INTERRUPTS_ON_CTXSW
struct task_struct;
struct thread_info;
@ -96,11 +94,4 @@ extern struct dentry *of_debugfs_root;
#define arch_align_stack(x) (x)
/*
* MicroBlaze doesn't handle unaligned accesses in hardware.
*
* Based on this we force the IP header alignment in network drivers.
*/
#define NET_IP_ALIGN 2
#endif /* _ASM_MICROBLAZE_SYSTEM_H */

View File

@ -95,7 +95,7 @@ static inline int ___range_ok(unsigned long addr, unsigned long size)
* - "addr", "addr + size" and "size" are all below the limit
*/
#define access_ok(type, addr, size) \
(get_fs().seg > (((unsigned long)(addr)) | \
(get_fs().seg >= (((unsigned long)(addr)) | \
(size) | ((unsigned long)(addr) + (size))))
/* || printk("access_ok failed for %s at 0x%08lx (size %d), seg 0x%08x\n",

View File

@ -34,6 +34,7 @@ const struct cpu_ver_key cpu_ver_lookup[] = {
{"8.00.a", 0x12},
{"8.00.b", 0x13},
{"8.10.a", 0x14},
{"8.20.a", 0x15},
{NULL, 0},
};

View File

@ -11,7 +11,6 @@
#include <linux/gfp.h>
#include <linux/dma-debug.h>
#include <asm/bug.h>
#include <asm/cacheflush.h>
/*
* Generic direct DMA implementation
@ -21,21 +20,6 @@
* can set archdata.dma_data to an unsigned long holding the offset. By
* default the offset is PCI_DRAM_OFFSET.
*/
static inline void __dma_sync_page(unsigned long paddr, unsigned long offset,
size_t size, enum dma_data_direction direction)
{
switch (direction) {
case DMA_TO_DEVICE:
case DMA_BIDIRECTIONAL:
flush_dcache_range(paddr + offset, paddr + offset + size);
break;
case DMA_FROM_DEVICE:
invalidate_dcache_range(paddr + offset, paddr + offset + size);
break;
default:
BUG();
}
}
static unsigned long get_dma_direct_offset(struct device *dev)
{
@ -91,7 +75,7 @@ static int dma_direct_map_sg(struct device *dev, struct scatterlist *sgl,
/* FIXME this part of code is untested */
for_each_sg(sgl, sg, nents, i) {
sg->dma_address = sg_phys(sg) + get_dma_direct_offset(dev);
__dma_sync_page(page_to_phys(sg_page(sg)), sg->offset,
__dma_sync(page_to_phys(sg_page(sg)) + sg->offset,
sg->length, direction);
}
@ -116,7 +100,7 @@ static inline dma_addr_t dma_direct_map_page(struct device *dev,
enum dma_data_direction direction,
struct dma_attrs *attrs)
{
__dma_sync_page(page_to_phys(page), offset, size, direction);
__dma_sync(page_to_phys(page) + offset, size, direction);
return page_to_phys(page) + offset + get_dma_direct_offset(dev);
}
@ -131,7 +115,63 @@ static inline void dma_direct_unmap_page(struct device *dev,
* phys_to_virt is here because in __dma_sync_page is __virt_to_phys and
* dma_address is physical address
*/
__dma_sync_page(dma_address, 0 , size, direction);
__dma_sync(dma_address, size, direction);
}
static inline void
dma_direct_sync_single_for_cpu(struct device *dev,
dma_addr_t dma_handle, size_t size,
enum dma_data_direction direction)
{
/*
* It's pointless to flush the cache as the memory segment
* is given to the CPU
*/
if (direction == DMA_FROM_DEVICE)
__dma_sync(dma_handle, size, direction);
}
static inline void
dma_direct_sync_single_for_device(struct device *dev,
dma_addr_t dma_handle, size_t size,
enum dma_data_direction direction)
{
/*
* It's pointless to invalidate the cache if the device isn't
* supposed to write to the relevant region
*/
if (direction == DMA_TO_DEVICE)
__dma_sync(dma_handle, size, direction);
}
static inline void
dma_direct_sync_sg_for_cpu(struct device *dev,
struct scatterlist *sgl, int nents,
enum dma_data_direction direction)
{
struct scatterlist *sg;
int i;
/* FIXME this part of code is untested */
if (direction == DMA_FROM_DEVICE)
for_each_sg(sgl, sg, nents, i)
__dma_sync(sg->dma_address, sg->length, direction);
}
static inline void
dma_direct_sync_sg_for_device(struct device *dev,
struct scatterlist *sgl, int nents,
enum dma_data_direction direction)
{
struct scatterlist *sg;
int i;
/* FIXME this part of code is untested */
if (direction == DMA_TO_DEVICE)
for_each_sg(sgl, sg, nents, i)
__dma_sync(sg->dma_address, sg->length, direction);
}
struct dma_map_ops dma_direct_ops = {
@ -142,6 +182,10 @@ struct dma_map_ops dma_direct_ops = {
.dma_supported = dma_direct_dma_supported,
.map_page = dma_direct_map_page,
.unmap_page = dma_direct_unmap_page,
.sync_single_for_cpu = dma_direct_sync_single_for_cpu,
.sync_single_for_device = dma_direct_sync_single_for_device,
.sync_sg_for_cpu = dma_direct_sync_sg_for_cpu,
.sync_sg_for_device = dma_direct_sync_sg_for_device,
};
EXPORT_SYMBOL(dma_direct_ops);

View File

@ -119,7 +119,7 @@ asmlinkage void full_exception(struct pt_regs *regs, unsigned int type,
case MICROBLAZE_DIV_ZERO_EXCEPTION:
if (user_mode(regs)) {
pr_debug("Divide by zero exception in user mode\n");
_exception(SIGILL, regs, FPE_INTDIV, addr);
_exception(SIGFPE, regs, FPE_INTDIV, addr);
return;
}
printk(KERN_WARNING "Divide by zero exception " \

View File

@ -179,6 +179,7 @@ int copy_thread(unsigned long clone_flags, unsigned long usp,
ti->cpu_context.msr = (childregs->msr|MSR_VM);
ti->cpu_context.msr &= ~MSR_UMS; /* switch_to to kernel mode */
ti->cpu_context.msr &= ~MSR_IE;
#endif
ti->cpu_context.r15 = (unsigned long)ret_from_fork - 8;

View File

@ -148,7 +148,7 @@ asmlinkage long do_syscall_trace_enter(struct pt_regs *regs)
ret = -1L;
if (unlikely(current->audit_context))
audit_syscall_entry(EM_XILINX_MICROBLAZE, regs->r12,
audit_syscall_entry(EM_MICROBLAZE, regs->r12,
regs->r5, regs->r6,
regs->r7, regs->r8);

View File

@ -308,7 +308,8 @@ unsigned long long notrace sched_clock(void)
{
if (timer_initialized) {
struct clocksource *cs = &clocksource_microblaze;
cycle_t cyc = cnt32_to_63(cs->read(NULL));
cycle_t cyc = cnt32_to_63(cs->read(NULL)) & LLONG_MAX;
return clocksource_cyc2ns(cyc, cs->mult, cs->shift);
}
return 0;

View File

@ -25,5 +25,6 @@ lib-y += lshrdi3.o
lib-y += modsi3.o
lib-y += muldi3.o
lib-y += mulsi3.o
lib-y += ucmpdi2.o
lib-y += udivsi3.o
lib-y += umodsi3.o

View File

@ -10,6 +10,7 @@
#include <linux/errno.h>
#include <linux/linkage.h>
#include <asm/page.h>
/*
* int __strncpy_user(char *to, char *from, int len);
@ -33,8 +34,8 @@ __strncpy_user:
* r3 - temp count
* r4 - temp val
*/
beqid r7,3f
addik r3,r7,0 /* temp_count = len */
beqi r3,3f
1:
lbu r4,r6,r0
sb r4,r5,r0
@ -76,8 +77,8 @@ __strncpy_user:
.type __strnlen_user, @function
.align 4;
__strnlen_user:
beqid r6,3f
addik r3,r6,0
beqi r3,3f
1:
lbu r4,r5,r0
beqid r4,2f /* break on NUL */
@ -102,6 +103,49 @@ __strnlen_user:
.section __ex_table,"a"
.word 1b,4b
/* Loop unrolling for __copy_tofrom_user */
#define COPY(offset) \
1: lwi r4 , r6, 0x0000 + offset; \
2: lwi r19, r6, 0x0004 + offset; \
3: lwi r20, r6, 0x0008 + offset; \
4: lwi r21, r6, 0x000C + offset; \
5: lwi r22, r6, 0x0010 + offset; \
6: lwi r23, r6, 0x0014 + offset; \
7: lwi r24, r6, 0x0018 + offset; \
8: lwi r25, r6, 0x001C + offset; \
9: swi r4 , r5, 0x0000 + offset; \
10: swi r19, r5, 0x0004 + offset; \
11: swi r20, r5, 0x0008 + offset; \
12: swi r21, r5, 0x000C + offset; \
13: swi r22, r5, 0x0010 + offset; \
14: swi r23, r5, 0x0014 + offset; \
15: swi r24, r5, 0x0018 + offset; \
16: swi r25, r5, 0x001C + offset; \
.section __ex_table,"a"; \
.word 1b, 0f; \
.word 2b, 0f; \
.word 3b, 0f; \
.word 4b, 0f; \
.word 5b, 0f; \
.word 6b, 0f; \
.word 7b, 0f; \
.word 8b, 0f; \
.word 9b, 0f; \
.word 10b, 0f; \
.word 11b, 0f; \
.word 12b, 0f; \
.word 13b, 0f; \
.word 14b, 0f; \
.word 15b, 0f; \
.word 16b, 0f; \
.text
#define COPY_80(offset) \
COPY(0x00 + offset);\
COPY(0x20 + offset);\
COPY(0x40 + offset);\
COPY(0x60 + offset);
/*
* int __copy_tofrom_user(char *to, char *from, int len)
* Return:
@ -119,34 +163,79 @@ __copy_tofrom_user:
* r7, r3 - count
* r4 - tempval
*/
beqid r7, 3f /* zero size is not likely */
andi r3, r7, 0x3 /* filter add count */
bneid r3, 4f /* if is odd value then byte copying */
beqid r7, 0f /* zero size is not likely */
or r3, r5, r6 /* find if is any to/from unaligned */
andi r3, r3, 0x3 /* mask unaligned */
bneid r3, 1f /* it is unaligned -> then jump */
or r3, r3, r7 /* find if count is unaligned */
andi r3, r3, 0x3 /* mask last 3 bits */
bneid r3, bu1 /* if r3 is not zero then byte copying */
or r3, r0, r0
/* at least one 4 byte copy */
5: lw r4, r6, r3
6: sw r4, r5, r3
rsubi r3, r7, PAGE_SIZE /* detect PAGE_SIZE */
beqid r3, page;
or r3, r0, r0
w1: lw r4, r6, r3 /* at least one 4 byte copy */
w2: sw r4, r5, r3
addik r7, r7, -4
bneid r7, 5b
bneid r7, w1
addik r3, r3, 4
addik r3, r7, 0
rtsd r15, 8
nop
4: or r3, r0, r0
1: lbu r4,r6,r3
2: sb r4,r5,r3
.section __ex_table,"a"
.word w1, 0f;
.word w2, 0f;
.text
.align 4 /* Alignment is important to keep icache happy */
page: /* Create room on stack and save registers for storign values */
addik r1, r1, -32
swi r19, r1, 4
swi r20, r1, 8
swi r21, r1, 12
swi r22, r1, 16
swi r23, r1, 20
swi r24, r1, 24
swi r25, r1, 28
loop: /* r4, r19, r20, r21, r22, r23, r24, r25 are used for storing values */
/* Loop unrolling to get performance boost */
COPY_80(0x000);
COPY_80(0x080);
COPY_80(0x100);
COPY_80(0x180);
/* copy loop */
addik r6, r6, 0x200
addik r7, r7, -0x200
bneid r7, loop
addik r5, r5, 0x200
/* Restore register content */
lwi r19, r1, 4
lwi r20, r1, 8
lwi r21, r1, 12
lwi r22, r1, 16
lwi r23, r1, 20
lwi r24, r1, 24
lwi r25, r1, 28
addik r1, r1, 32
/* return back */
addik r3, r7, 0
rtsd r15, 8
nop
.align 4 /* Alignment is important to keep icache happy */
bu1: lbu r4,r6,r3
bu2: sb r4,r5,r3
addik r7,r7,-1
bneid r7,1b
bneid r7,bu1
addik r3,r3,1 /* delay slot */
3:
0:
addik r3,r7,0
rtsd r15,8
nop
.size __copy_tofrom_user, . - __copy_tofrom_user
.section __ex_table,"a"
.word 1b,3b,2b,3b,5b,3b,6b,3b
.word bu1, 0b;
.word bu2, 0b;
.text

View File

@ -0,0 +1,20 @@
#include <linux/module.h>
#include "libgcc.h"
word_type __ucmpdi2(unsigned long long a, unsigned long long b)
{
const DWunion au = {.ll = a};
const DWunion bu = {.ll = b};
if ((unsigned int) au.s.high < (unsigned int) bu.s.high)
return 0;
else if ((unsigned int) au.s.high > (unsigned int) bu.s.high)
return 2;
if ((unsigned int) au.s.low < (unsigned int) bu.s.low)
return 0;
else if ((unsigned int) au.s.low > (unsigned int) bu.s.low)
return 2;
return 1;
}
EXPORT_SYMBOL(__ucmpdi2);