diff --git a/arch/tile/Makefile b/arch/tile/Makefile index 07c4318c0629..fd8f6bb5face 100644 --- a/arch/tile/Makefile +++ b/arch/tile/Makefile @@ -8,20 +8,22 @@ # for "archclean" and "archdep" for cleaning up and making dependencies for # this architecture -ifeq ($(CROSS_COMPILE),) # If building with TILERA_ROOT set (i.e. using the Tilera Multicore # Development Environment) we can set CROSS_COMPILE based on that. -ifdef TILERA_ROOT -CROSS_COMPILE = $(TILERA_ROOT)/bin/tile- -endif -endif - # If we're not cross-compiling, make sure we're on the right architecture. +# Only bother to test for a few common targets, to avoid useless errors. ifeq ($(CROSS_COMPILE),) -HOST_ARCH = $(shell uname -m) -ifneq ($(HOST_ARCH),$(ARCH)) + ifdef TILERA_ROOT + CROSS_COMPILE := $(TILERA_ROOT)/bin/tile- + else + goals := $(if $(MAKECMDGOALS), $(MAKECMDGOALS), all) + ifneq ($(strip $(filter vmlinux modules all,$(goals))),) + HOST_ARCH := $(shell uname -m) + ifneq ($(HOST_ARCH),$(ARCH)) $(error Set TILERA_ROOT or CROSS_COMPILE when building $(ARCH) on $(HOST_ARCH)) -endif + endif + endif + endif endif diff --git a/arch/tile/include/arch/abi.h b/arch/tile/include/arch/abi.h index da8df5b9d914..8affc76f771a 100644 --- a/arch/tile/include/arch/abi.h +++ b/arch/tile/include/arch/abi.h @@ -59,9 +59,7 @@ * The ABI requires callers to allocate a caller state save area of * this many bytes at the bottom of each stack frame. */ -#ifdef __tile__ -#define C_ABI_SAVE_AREA_SIZE (2 * __SIZEOF_POINTER__) -#endif +#define C_ABI_SAVE_AREA_SIZE (2 * (CHIP_WORD_SIZE() / 8)) /** * The operand to an 'info' opcode directing the backtracer to not diff --git a/arch/tile/include/asm/atomic_32.h b/arch/tile/include/asm/atomic_32.h index 40a5a3a876d9..ed359aee8837 100644 --- a/arch/tile/include/asm/atomic_32.h +++ b/arch/tile/include/asm/atomic_32.h @@ -255,43 +255,6 @@ static inline void atomic64_set(atomic64_t *v, u64 n) #define smp_mb__after_atomic_dec() do { } while (0) #define smp_mb__after_atomic_inc() do { } while (0) - -/* - * Support "tns" atomic integers. These are atomic integers that can - * hold any value but "1". They are more efficient than regular atomic - * operations because the "lock" (aka acquire) step is a single "tns" - * in the uncontended case, and the "unlock" (aka release) step is a - * single "store" without an mf. (However, note that on tilepro the - * "tns" will evict the local cache line, so it's not all upside.) - * - * Note that you can ONLY observe the value stored in the pointer - * using these operations; a direct read of the value may confusingly - * return the special value "1". - */ - -int __tns_atomic_acquire(atomic_t *); -void __tns_atomic_release(atomic_t *p, int v); - -static inline void tns_atomic_set(atomic_t *v, int i) -{ - __tns_atomic_acquire(v); - __tns_atomic_release(v, i); -} - -static inline int tns_atomic_cmpxchg(atomic_t *v, int o, int n) -{ - int ret = __tns_atomic_acquire(v); - __tns_atomic_release(v, (ret == o) ? n : ret); - return ret; -} - -static inline int tns_atomic_xchg(atomic_t *v, int n) -{ - int ret = __tns_atomic_acquire(v); - __tns_atomic_release(v, n); - return ret; -} - #endif /* !__ASSEMBLY__ */ /* diff --git a/arch/tile/include/asm/backtrace.h b/arch/tile/include/asm/backtrace.h index 6970bfcad549..758ca4619d50 100644 --- a/arch/tile/include/asm/backtrace.h +++ b/arch/tile/include/asm/backtrace.h @@ -21,7 +21,9 @@ #include -#if CHIP_VA_WIDTH() > 32 +#if defined(__tile__) +typedef unsigned long VirtualAddress; +#elif CHIP_VA_WIDTH() > 32 typedef unsigned long long VirtualAddress; #else typedef unsigned int VirtualAddress; diff --git a/arch/tile/include/asm/bitops.h b/arch/tile/include/asm/bitops.h index 84600f3514da..6832b4be8990 100644 --- a/arch/tile/include/asm/bitops.h +++ b/arch/tile/include/asm/bitops.h @@ -98,26 +98,27 @@ static inline int fls64(__u64 w) return (sizeof(__u64) * 8) - __builtin_clzll(w); } -static inline unsigned int hweight32(unsigned int w) +static inline unsigned int __arch_hweight32(unsigned int w) { return __builtin_popcount(w); } -static inline unsigned int hweight16(unsigned int w) +static inline unsigned int __arch_hweight16(unsigned int w) { return __builtin_popcount(w & 0xffff); } -static inline unsigned int hweight8(unsigned int w) +static inline unsigned int __arch_hweight8(unsigned int w) { return __builtin_popcount(w & 0xff); } -static inline unsigned long hweight64(__u64 w) +static inline unsigned long __arch_hweight64(__u64 w) { return __builtin_popcountll(w); } +#include #include #include #include diff --git a/arch/tile/include/asm/cache.h b/arch/tile/include/asm/cache.h index f6101840c9e7..08a2815b5e4e 100644 --- a/arch/tile/include/asm/cache.h +++ b/arch/tile/include/asm/cache.h @@ -27,11 +27,10 @@ #define L2_CACHE_ALIGN(x) (((x)+(L2_CACHE_BYTES-1)) & -L2_CACHE_BYTES) /* - * TILE-Gx is fully coherents so we don't need to define - * ARCH_KMALLOC_MINALIGN. + * TILE-Gx is fully coherent so we don't need to define ARCH_DMA_MINALIGN. */ #ifndef __tilegx__ -#define ARCH_KMALLOC_MINALIGN L2_CACHE_BYTES +#define ARCH_DMA_MINALIGN L2_CACHE_BYTES #endif /* use the cache line size for the L2, which is where it counts */ diff --git a/arch/tile/include/asm/highmem.h b/arch/tile/include/asm/highmem.h index efdd12e91020..d155db6fa9bd 100644 --- a/arch/tile/include/asm/highmem.h +++ b/arch/tile/include/asm/highmem.h @@ -60,7 +60,7 @@ void *kmap_fix_kpte(struct page *page, int finished); /* This macro is used only in map_new_virtual() to map "page". */ #define kmap_prot page_to_kpgprot(page) -void kunmap_atomic(void *kvaddr, enum km_type type); +void kunmap_atomic_notypecheck(void *kvaddr, enum km_type type); void *kmap_atomic_pfn(unsigned long pfn, enum km_type type); void *kmap_atomic_prot_pfn(unsigned long pfn, enum km_type type, pgprot_t prot); struct page *kmap_atomic_to_page(void *ptr); diff --git a/arch/tile/include/asm/page.h b/arch/tile/include/asm/page.h index f894a9016da6..7d90641cf18d 100644 --- a/arch/tile/include/asm/page.h +++ b/arch/tile/include/asm/page.h @@ -129,6 +129,11 @@ static inline u64 pmd_val(pmd_t pmd) #endif +static inline __attribute_const__ int get_order(unsigned long size) +{ + return BITS_PER_LONG - __builtin_clzl((size - 1) >> PAGE_SHIFT); +} + #endif /* !__ASSEMBLY__ */ #define HUGETLB_PAGE_ORDER (HPAGE_SHIFT - PAGE_SHIFT) @@ -332,7 +337,6 @@ extern pte_t *virt_to_pte(struct mm_struct *mm, unsigned long addr); (VM_READ | VM_WRITE | VM_MAYREAD | VM_MAYWRITE | VM_MAYEXEC) #include -#include #endif /* __KERNEL__ */ diff --git a/arch/tile/include/asm/scatterlist.h b/arch/tile/include/asm/scatterlist.h index c5604242c0d5..35d786fe93ae 100644 --- a/arch/tile/include/asm/scatterlist.h +++ b/arch/tile/include/asm/scatterlist.h @@ -1,22 +1 @@ -/* - * Copyright 2010 Tilera Corporation. All Rights Reserved. - * - * This program is free software; you can redistribute it and/or - * modify it under the terms of the GNU General Public License - * as published by the Free Software Foundation, version 2. - * - * This program is distributed in the hope that it will be useful, but - * WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY OR FITNESS FOR A PARTICULAR PURPOSE, GOOD TITLE or - * NON INFRINGEMENT. See the GNU General Public License for - * more details. - */ - -#ifndef _ASM_TILE_SCATTERLIST_H -#define _ASM_TILE_SCATTERLIST_H - -#define ISA_DMA_THRESHOLD (~0UL) - #include - -#endif /* _ASM_TILE_SCATTERLIST_H */ diff --git a/arch/tile/include/asm/setup.h b/arch/tile/include/asm/setup.h index 823ddd47ff6e..7caf0f36b030 100644 --- a/arch/tile/include/asm/setup.h +++ b/arch/tile/include/asm/setup.h @@ -15,6 +15,10 @@ #ifndef _ASM_TILE_SETUP_H #define _ASM_TILE_SETUP_H +#define COMMAND_LINE_SIZE 2048 + +#ifdef __KERNEL__ + #include #include @@ -23,10 +27,10 @@ */ #define MAXMEM_PFN PFN_DOWN(MAXMEM) -#define COMMAND_LINE_SIZE 2048 - void early_panic(const char *fmt, ...); void warn_early_printk(void); void __init disable_early_printk(void); +#endif /* __KERNEL__ */ + #endif /* _ASM_TILE_SETUP_H */ diff --git a/arch/tile/include/asm/siginfo.h b/arch/tile/include/asm/siginfo.h index 0c12d1b9ddf2..56d661bb010b 100644 --- a/arch/tile/include/asm/siginfo.h +++ b/arch/tile/include/asm/siginfo.h @@ -17,6 +17,10 @@ #define __ARCH_SI_TRAPNO +#ifdef __LP64__ +# define __ARCH_SI_PREAMBLE_SIZE (4 * sizeof(int)) +#endif + #include /* diff --git a/arch/tile/include/asm/uaccess.h b/arch/tile/include/asm/uaccess.h index ed17a80ec0ed..ef34d2caa5b1 100644 --- a/arch/tile/include/asm/uaccess.h +++ b/arch/tile/include/asm/uaccess.h @@ -389,14 +389,14 @@ static inline unsigned long __must_check copy_from_user(void *to, * Returns number of bytes that could not be copied. * On success, this will be zero. */ -extern unsigned long __copy_in_user_asm( +extern unsigned long __copy_in_user_inatomic( void __user *to, const void __user *from, unsigned long n); static inline unsigned long __must_check __copy_in_user(void __user *to, const void __user *from, unsigned long n) { might_sleep(); - return __copy_in_user_asm(to, from, n); + return __copy_in_user_inatomic(to, from, n); } static inline unsigned long __must_check diff --git a/arch/tile/include/hv/hypervisor.h b/arch/tile/include/hv/hypervisor.h index 59b46dc53994..9bd303a141b2 100644 --- a/arch/tile/include/hv/hypervisor.h +++ b/arch/tile/include/hv/hypervisor.h @@ -532,11 +532,11 @@ void hv_disable_intr(HV_IntrMask disab_mask); */ void hv_clear_intr(HV_IntrMask clear_mask); -/** Assert a set of device interrupts. +/** Raise a set of device interrupts. * - * @param assert_mask Bitmap of interrupts to clear. + * @param raise_mask Bitmap of interrupts to raise. */ -void hv_assert_intr(HV_IntrMask assert_mask); +void hv_raise_intr(HV_IntrMask raise_mask); /** Trigger a one-shot interrupt on some tile * @@ -1712,7 +1712,7 @@ typedef struct * @param cache_control This argument allows you to specify a length of * physical address space to flush (maximum HV_FLUSH_MAX_CACHE_LEN). * You can "or" in HV_FLUSH_EVICT_L2 to flush the whole L2 cache. - * You can "or" in HV_FLUSH_EVICT_LI1 to flush the whole LII cache. + * You can "or" in HV_FLUSH_EVICT_L1I to flush the whole L1I cache. * HV_FLUSH_ALL flushes all caches. * @param cache_cpumask Bitmask (in row-major order, supervisor-relative) of * tile indices to perform cache flush on. The low bit of the first diff --git a/arch/tile/kernel/backtrace.c b/arch/tile/kernel/backtrace.c index 77265f3b58d6..d3c41c1ff6bd 100644 --- a/arch/tile/kernel/backtrace.c +++ b/arch/tile/kernel/backtrace.c @@ -19,9 +19,6 @@ #include -#if TILE_CHIP < 10 - - #include @@ -29,6 +26,27 @@ #define TREG_LR 55 +#if TILE_CHIP >= 10 +#define tile_bundle_bits tilegx_bundle_bits +#define TILE_MAX_INSTRUCTIONS_PER_BUNDLE TILEGX_MAX_INSTRUCTIONS_PER_BUNDLE +#define TILE_BUNDLE_ALIGNMENT_IN_BYTES TILEGX_BUNDLE_ALIGNMENT_IN_BYTES +#define tile_decoded_instruction tilegx_decoded_instruction +#define tile_mnemonic tilegx_mnemonic +#define parse_insn_tile parse_insn_tilegx +#define TILE_OPC_IRET TILEGX_OPC_IRET +#define TILE_OPC_ADDI TILEGX_OPC_ADDI +#define TILE_OPC_ADDLI TILEGX_OPC_ADDLI +#define TILE_OPC_INFO TILEGX_OPC_INFO +#define TILE_OPC_INFOL TILEGX_OPC_INFOL +#define TILE_OPC_JRP TILEGX_OPC_JRP +#define TILE_OPC_MOVE TILEGX_OPC_MOVE +#define OPCODE_STORE TILEGX_OPC_ST +typedef long long bt_int_reg_t; +#else +#define OPCODE_STORE TILE_OPC_SW +typedef int bt_int_reg_t; +#endif + /** A decoded bundle used for backtracer analysis. */ struct BacktraceBundle { tile_bundle_bits bits; @@ -41,7 +59,7 @@ struct BacktraceBundle { /* This implementation only makes sense for native tools. */ /** Default function to read memory. */ static bool bt_read_memory(void *result, VirtualAddress addr, - size_t size, void *extra) + unsigned int size, void *extra) { /* FIXME: this should do some horrible signal stuff to catch * SEGV cleanly and fail. @@ -106,6 +124,12 @@ static bool bt_has_addi_sp(const struct BacktraceBundle *bundle, int *adjust) find_matching_insn(bundle, TILE_OPC_ADDI, vals, 2); if (insn == NULL) insn = find_matching_insn(bundle, TILE_OPC_ADDLI, vals, 2); +#if TILE_CHIP >= 10 + if (insn == NULL) + insn = find_matching_insn(bundle, TILEGX_OPC_ADDXLI, vals, 2); + if (insn == NULL) + insn = find_matching_insn(bundle, TILEGX_OPC_ADDXI, vals, 2); +#endif if (insn == NULL) return false; @@ -190,13 +214,52 @@ static inline bool bt_has_move_r52_sp(const struct BacktraceBundle *bundle) return find_matching_insn(bundle, TILE_OPC_MOVE, vals, 2) != NULL; } -/** Does this bundle contain the instruction 'sw sp, lr'? */ +/** Does this bundle contain a store of lr to sp? */ static inline bool bt_has_sw_sp_lr(const struct BacktraceBundle *bundle) { static const int vals[2] = { TREG_SP, TREG_LR }; - return find_matching_insn(bundle, TILE_OPC_SW, vals, 2) != NULL; + return find_matching_insn(bundle, OPCODE_STORE, vals, 2) != NULL; } +#if TILE_CHIP >= 10 +/** Track moveli values placed into registers. */ +static inline void bt_update_moveli(const struct BacktraceBundle *bundle, + int moveli_args[]) +{ + int i; + for (i = 0; i < bundle->num_insns; i++) { + const struct tile_decoded_instruction *insn = + &bundle->insns[i]; + + if (insn->opcode->mnemonic == TILEGX_OPC_MOVELI) { + int reg = insn->operand_values[0]; + moveli_args[reg] = insn->operand_values[1]; + } + } +} + +/** Does this bundle contain an 'add sp, sp, reg' instruction + * from a register that we saw a moveli into, and if so, what + * is the value in the register? + */ +static bool bt_has_add_sp(const struct BacktraceBundle *bundle, int *adjust, + int moveli_args[]) +{ + static const int vals[2] = { TREG_SP, TREG_SP }; + + const struct tile_decoded_instruction *insn = + find_matching_insn(bundle, TILEGX_OPC_ADDX, vals, 2); + if (insn) { + int reg = insn->operand_values[2]; + if (moveli_args[reg]) { + *adjust = moveli_args[reg]; + return true; + } + } + return false; +} +#endif + /** Locates the caller's PC and SP for a program starting at the * given address. */ @@ -227,6 +290,11 @@ static void find_caller_pc_and_caller_sp(CallerLocation *location, int next_bundle = 0; VirtualAddress pc; +#if TILE_CHIP >= 10 + /* Naively try to track moveli values to support addx for -m32. */ + int moveli_args[TILEGX_NUM_REGISTERS] = { 0 }; +#endif + /* Default to assuming that the caller's sp is the current sp. * This is necessary to handle the case where we start backtracing * right at the end of the epilog. @@ -380,7 +448,11 @@ static void find_caller_pc_and_caller_sp(CallerLocation *location, if (!sp_determined) { int adjust; - if (bt_has_addi_sp(&bundle, &adjust)) { + if (bt_has_addi_sp(&bundle, &adjust) +#if TILE_CHIP >= 10 + || bt_has_add_sp(&bundle, &adjust, moveli_args) +#endif + ) { location->sp_location = SP_LOC_OFFSET; if (adjust <= 0) { @@ -427,6 +499,11 @@ static void find_caller_pc_and_caller_sp(CallerLocation *location, sp_determined = true; } } + +#if TILE_CHIP >= 10 + /* Track moveli arguments for -m32 mode. */ + bt_update_moveli(&bundle, moveli_args); +#endif } if (bt_has_iret(&bundle)) { @@ -502,11 +579,10 @@ void backtrace_init(BacktraceIterator *state, break; } - /* The frame pointer should theoretically be aligned mod 8. If - * it's not even aligned mod 4 then something terrible happened - * and we should mark it as invalid. + /* If the frame pointer is not aligned to the basic word size + * something terrible happened and we should mark it as invalid. */ - if (fp % 4 != 0) + if (fp % sizeof(bt_int_reg_t) != 0) fp = -1; /* -1 means "don't know initial_frame_caller_pc". */ @@ -547,9 +623,16 @@ void backtrace_init(BacktraceIterator *state, state->read_memory_func_extra = read_memory_func_extra; } +/* Handle the case where the register holds more bits than the VA. */ +static bool valid_addr_reg(bt_int_reg_t reg) +{ + return ((VirtualAddress)reg == reg); +} + bool backtrace_next(BacktraceIterator *state) { - VirtualAddress next_fp, next_pc, next_frame[2]; + VirtualAddress next_fp, next_pc; + bt_int_reg_t next_frame[2]; if (state->fp == -1) { /* No parent frame. */ @@ -563,11 +646,9 @@ bool backtrace_next(BacktraceIterator *state) } next_fp = next_frame[1]; - if (next_fp % 4 != 0) { - /* Caller's frame pointer is suspect, so give up. - * Technically it should be aligned mod 8, but we will - * be forgiving here. - */ + if (!valid_addr_reg(next_frame[1]) || + next_fp % sizeof(bt_int_reg_t) != 0) { + /* Caller's frame pointer is suspect, so give up. */ return false; } @@ -585,7 +666,7 @@ bool backtrace_next(BacktraceIterator *state) } else { /* Get the caller PC from the frame linkage area. */ next_pc = next_frame[0]; - if (next_pc == 0 || + if (!valid_addr_reg(next_frame[0]) || next_pc == 0 || next_pc % TILE_BUNDLE_ALIGNMENT_IN_BYTES != 0) { /* The PC is suspect, so give up. */ return false; @@ -599,23 +680,3 @@ bool backtrace_next(BacktraceIterator *state) return true; } - -#else /* TILE_CHIP < 10 */ - -void backtrace_init(BacktraceIterator *state, - BacktraceMemoryReader read_memory_func, - void *read_memory_func_extra, - VirtualAddress pc, VirtualAddress lr, - VirtualAddress sp, VirtualAddress r52) -{ - state->pc = pc; - state->sp = sp; - state->fp = -1; - state->initial_frame_caller_pc = -1; - state->read_memory_func = read_memory_func; - state->read_memory_func_extra = read_memory_func_extra; -} - -bool backtrace_next(BacktraceIterator *state) { return false; } - -#endif /* TILE_CHIP < 10 */ diff --git a/arch/tile/kernel/compat_signal.c b/arch/tile/kernel/compat_signal.c index d5efb215dd5f..9c710db43f13 100644 --- a/arch/tile/kernel/compat_signal.c +++ b/arch/tile/kernel/compat_signal.c @@ -56,13 +56,15 @@ struct compat_ucontext { sigset_t uc_sigmask; /* mask last for extensibility */ }; +#define COMPAT_SI_PAD_SIZE ((SI_MAX_SIZE - 3 * sizeof(int)) / sizeof(int)) + struct compat_siginfo { int si_signo; int si_errno; int si_code; union { - int _pad[SI_PAD_SIZE]; + int _pad[COMPAT_SI_PAD_SIZE]; /* kill() */ struct { diff --git a/arch/tile/kernel/intvec_32.S b/arch/tile/kernel/intvec_32.S index 3404c75f8e64..84f296ca9e63 100644 --- a/arch/tile/kernel/intvec_32.S +++ b/arch/tile/kernel/intvec_32.S @@ -952,7 +952,7 @@ STD_ENTRY(interrupt_return) * able to safely read all the remaining words on those cache * lines without waiting for the memory subsystem. */ - pop_reg_zero r0, r1, sp, PTREGS_OFFSET_REG(30) - PTREGS_OFFSET_REG(0) + pop_reg_zero r0, r28, sp, PTREGS_OFFSET_REG(30) - PTREGS_OFFSET_REG(0) pop_reg_zero r30, r2, sp, PTREGS_OFFSET_PC - PTREGS_OFFSET_REG(30) pop_reg_zero r21, r3, sp, PTREGS_OFFSET_EX1 - PTREGS_OFFSET_PC pop_reg_zero lr, r4, sp, PTREGS_OFFSET_REG(52) - PTREGS_OFFSET_EX1 @@ -1017,7 +1017,17 @@ STD_ENTRY(interrupt_return) { move r22, zero; move r23, zero } { move r24, zero; move r25, zero } { move r26, zero; move r27, zero } - { move r28, zero; move r29, zero } + + /* Set r1 to errno if we are returning an error, otherwise zero. */ + { + moveli r29, 1024 + sub r1, zero, r0 + } + slt_u r29, r1, r29 + { + mnz r1, r29, r1 + move r29, zero + } iret /* diff --git a/arch/tile/kernel/proc.c b/arch/tile/kernel/proc.c index 92ef925d2f8d..2e02c41ddf3b 100644 --- a/arch/tile/kernel/proc.c +++ b/arch/tile/kernel/proc.c @@ -23,7 +23,6 @@ #include #include #include -#include #include #include #include diff --git a/arch/tile/kernel/setup.c b/arch/tile/kernel/setup.c index 4dd21c1e6d5e..e7d54c73d5c1 100644 --- a/arch/tile/kernel/setup.c +++ b/arch/tile/kernel/setup.c @@ -953,7 +953,7 @@ static void __init load_hv_initrd(void) if (rc != stat.size) { pr_err("Error reading %d bytes from hvfs file '%s': %d\n", stat.size, initramfs_file, rc); - free_bootmem((unsigned long) initrd, stat.size); + free_initrd_mem((unsigned long) initrd, stat.size); return; } initrd_start = (unsigned long) initrd; @@ -962,7 +962,7 @@ static void __init load_hv_initrd(void) void __init free_initrd_mem(unsigned long begin, unsigned long end) { - free_bootmem(begin, end - begin); + free_bootmem(__pa(begin), end - begin); } static void __init validate_hv(void) diff --git a/arch/tile/kernel/stack.c b/arch/tile/kernel/stack.c index b6268d3ae869..38a68b0b4581 100644 --- a/arch/tile/kernel/stack.c +++ b/arch/tile/kernel/stack.c @@ -108,7 +108,6 @@ static bool read_memory_func(void *result, VirtualAddress address, /* Return a pt_regs pointer for a valid fault handler frame */ static struct pt_regs *valid_fault_handler(struct KBacktraceIterator* kbt) { -#ifndef __tilegx__ const char *fault = NULL; /* happy compiler */ char fault_buf[64]; VirtualAddress sp = kbt->it.sp; @@ -146,7 +145,6 @@ static struct pt_regs *valid_fault_handler(struct KBacktraceIterator* kbt) } if (!kbt->profile || (INT_MASK(p->faultnum) & QUEUED_INTERRUPTS) == 0) return p; -#endif return NULL; } @@ -351,12 +349,6 @@ void tile_show_stack(struct KBacktraceIterator *kbt, int headers) kbt->task->pid, kbt->task->tgid, kbt->task->comm, smp_processor_id(), get_cycles()); } -#ifdef __tilegx__ - if (kbt->is_current) { - __insn_mtspr(SPR_SIM_CONTROL, - SIM_DUMP_SPR_ARG(SIM_DUMP_BACKTRACE)); - } -#endif kbt->verbose = 1; i = 0; for (; !KBacktraceIterator_end(kbt); KBacktraceIterator_next(kbt)) { diff --git a/arch/tile/kernel/time.c b/arch/tile/kernel/time.c index b9ab25a889b5..6bed820e1421 100644 --- a/arch/tile/kernel/time.c +++ b/arch/tile/kernel/time.c @@ -36,16 +36,6 @@ /* How many cycles per second we are running at. */ static cycles_t cycles_per_sec __write_once; -/* - * We set up shift and multiply values with a minsec of five seconds, - * since our timer counter counts down 31 bits at a frequency of - * no less than 500 MHz. See @minsec for clocks_calc_mult_shift(). - * We could use a different value for the 64-bit free-running - * cycle counter, but we use the same one for consistency, and since - * we will be reasonably precise with this value anyway. - */ -#define TILE_MINSEC 5 - cycles_t get_clock_rate(void) { return cycles_per_sec; @@ -68,6 +58,14 @@ cycles_t get_cycles(void) } #endif +/* + * We use a relatively small shift value so that sched_clock() + * won't wrap around very often. + */ +#define SCHED_CLOCK_SHIFT 10 + +static unsigned long sched_clock_mult __write_once; + static cycles_t clocksource_get_cycles(struct clocksource *cs) { return get_cycles(); @@ -78,6 +76,7 @@ static struct clocksource cycle_counter_cs = { .rating = 300, .read = clocksource_get_cycles, .mask = CLOCKSOURCE_MASK(64), + .shift = 22, /* typical value, e.g. x86 tsc uses this */ .flags = CLOCK_SOURCE_IS_CONTINUOUS, }; @@ -88,8 +87,10 @@ static struct clocksource cycle_counter_cs = { void __init setup_clock(void) { cycles_per_sec = hv_sysconf(HV_SYSCONF_CPU_SPEED); - clocksource_calc_mult_shift(&cycle_counter_cs, cycles_per_sec, - TILE_MINSEC); + sched_clock_mult = + clocksource_hz2mult(cycles_per_sec, SCHED_CLOCK_SHIFT); + cycle_counter_cs.mult = + clocksource_hz2mult(cycles_per_sec, cycle_counter_cs.shift); } void __init calibrate_delay(void) @@ -117,9 +118,14 @@ void __init time_init(void) * counter, plus bit 31, which signifies that the counter has wrapped * from zero to (2**31) - 1. The INT_TILE_TIMER interrupt will be * raised as long as bit 31 is set. + * + * The TILE_MINSEC value represents the largest range of real-time + * we can possibly cover with the timer, based on MAX_TICK combined + * with the slowest reasonable clock rate we might run at. */ #define MAX_TICK 0x7fffffff /* we have 31 bits of countdown timer */ +#define TILE_MINSEC 5 /* timer covers no more than 5 seconds */ static int tile_timer_set_next_event(unsigned long ticks, struct clock_event_device *evt) @@ -211,8 +217,7 @@ void do_timer_interrupt(struct pt_regs *regs, int fault_num) unsigned long long sched_clock(void) { return clocksource_cyc2ns(get_cycles(), - cycle_counter_cs.mult, - cycle_counter_cs.shift); + sched_clock_mult, SCHED_CLOCK_SHIFT); } int setup_profiling_timer(unsigned int multiplier) diff --git a/arch/tile/kernel/traps.c b/arch/tile/kernel/traps.c index 3870abbeeaa2..0f362dc2c57f 100644 --- a/arch/tile/kernel/traps.c +++ b/arch/tile/kernel/traps.c @@ -128,7 +128,9 @@ static int special_ill(bundle_bits bundle, int *sigp, int *codep) #ifdef __tilegx__ if ((bundle & TILEGX_BUNDLE_MODE_MASK) != 0) return 0; - if (get_Opcode_X1(bundle) != UNARY_OPCODE_X1) + if (get_Opcode_X1(bundle) != RRR_0_OPCODE_X1) + return 0; + if (get_RRROpcodeExtension_X1(bundle) != UNARY_RRR_0_OPCODE_X1) return 0; if (get_UnaryOpcodeExtension_X1(bundle) != ILL_UNARY_OPCODE_X1) return 0; diff --git a/arch/tile/lib/Makefile b/arch/tile/lib/Makefile index 438af38bc9eb..746dc81ed3c4 100644 --- a/arch/tile/lib/Makefile +++ b/arch/tile/lib/Makefile @@ -7,7 +7,9 @@ lib-y = cacheflush.o checksum.o cpumask.o delay.o \ memcpy_$(BITS).o memchr_$(BITS).o memmove_$(BITS).o memset_$(BITS).o \ strchr_$(BITS).o strlen_$(BITS).o -ifneq ($(CONFIG_TILEGX),y) +ifeq ($(CONFIG_TILEGX),y) +lib-y += memcpy_user_64.o +else lib-y += atomic_32.o atomic_asm_32.o memcpy_tile64.o endif diff --git a/arch/tile/lib/exports.c b/arch/tile/lib/exports.c index 6bc7b52b4aa0..ce5dbf56578f 100644 --- a/arch/tile/lib/exports.c +++ b/arch/tile/lib/exports.c @@ -36,21 +36,29 @@ EXPORT_SYMBOL(clear_user_asm); EXPORT_SYMBOL(current_text_addr); EXPORT_SYMBOL(dump_stack); -/* arch/tile/lib/__memcpy.S */ -/* NOTE: on TILE64, these symbols appear in arch/tile/lib/memcpy_tile64.c */ +/* arch/tile/lib/, various memcpy files */ EXPORT_SYMBOL(memcpy); EXPORT_SYMBOL(__copy_to_user_inatomic); EXPORT_SYMBOL(__copy_from_user_inatomic); EXPORT_SYMBOL(__copy_from_user_zeroing); +#ifdef __tilegx__ +EXPORT_SYMBOL(__copy_in_user_inatomic); +#endif /* hypervisor glue */ #include EXPORT_SYMBOL(hv_dev_open); EXPORT_SYMBOL(hv_dev_pread); EXPORT_SYMBOL(hv_dev_pwrite); +EXPORT_SYMBOL(hv_dev_preada); +EXPORT_SYMBOL(hv_dev_pwritea); +EXPORT_SYMBOL(hv_dev_poll); +EXPORT_SYMBOL(hv_dev_poll_cancel); EXPORT_SYMBOL(hv_dev_close); +EXPORT_SYMBOL(hv_sysconf); +EXPORT_SYMBOL(hv_confstr); -/* -ltile-cc */ +/* libgcc.a */ uint32_t __udivsi3(uint32_t dividend, uint32_t divisor); EXPORT_SYMBOL(__udivsi3); int32_t __divsi3(int32_t dividend, int32_t divisor); @@ -70,8 +78,6 @@ EXPORT_SYMBOL(__moddi3); #ifndef __tilegx__ uint64_t __ll_mul(uint64_t n0, uint64_t n1); EXPORT_SYMBOL(__ll_mul); -#endif -#ifndef __tilegx__ int64_t __muldi3(int64_t, int64_t); EXPORT_SYMBOL(__muldi3); uint64_t __lshrdi3(uint64_t, unsigned int); diff --git a/arch/tile/lib/memcpy_32.S b/arch/tile/lib/memcpy_32.S index f92984bf60ec..30c3b7ebb55d 100644 --- a/arch/tile/lib/memcpy_32.S +++ b/arch/tile/lib/memcpy_32.S @@ -17,10 +17,6 @@ #include -#if CHIP_HAS_WH64() || defined(MEMCPY_TEST_WH64) -#define MEMCPY_USE_WH64 -#endif - #include @@ -160,7 +156,7 @@ EX: { sw r0, r3; addi r0, r0, 4; addi r2, r2, -4 } { addi r3, r1, 60; andi r9, r9, -64 } -#ifdef MEMCPY_USE_WH64 +#if CHIP_HAS_WH64() /* No need to prefetch dst, we'll just do the wh64 * right before we copy a line. */ @@ -173,7 +169,7 @@ EX: { lw r6, r3; addi r3, r3, 64 } /* Intentionally stall for a few cycles to leave L2 cache alone. */ { bnzt zero, . } EX: { lw r7, r3; addi r3, r3, 64 } -#ifndef MEMCPY_USE_WH64 +#if !CHIP_HAS_WH64() /* Prefetch the dest */ /* Intentionally stall for a few cycles to leave L2 cache alone. */ { bnzt zero, . } @@ -288,15 +284,7 @@ EX: { lw r7, r3; addi r3, r3, 64 } /* Fill second L1D line. */ EX: { lw r17, r17; addi r1, r1, 48; mvz r3, r13, r1 } /* r17 = WORD_4 */ -#ifdef MEMCPY_TEST_WH64 - /* Issue a fake wh64 that clobbers the destination words - * with random garbage, for testing. - */ - { movei r19, 64; crc32_32 r10, r2, r9 } -.Lwh64_test_loop: -EX: { sw r9, r10; addi r9, r9, 4; addi r19, r19, -4 } - { bnzt r19, .Lwh64_test_loop; crc32_32 r10, r10, r19 } -#elif CHIP_HAS_WH64() +#if CHIP_HAS_WH64() /* Prepare destination line for writing. */ EX: { wh64 r9; addi r9, r9, 64 } #else @@ -340,7 +328,7 @@ EX: { lw r18, r1; addi r1, r1, 4 } /* r18 = WORD_8 */ EX: { sw r0, r16; addi r0, r0, 4; add r16, r0, r2 } /* store(WORD_0) */ EX: { sw r0, r13; addi r0, r0, 4; andi r16, r16, -64 } /* store(WORD_1) */ EX: { sw r0, r14; addi r0, r0, 4; slt_u r16, r9, r16 } /* store(WORD_2) */ -#ifdef MEMCPY_USE_WH64 +#if CHIP_HAS_WH64() EX: { sw r0, r15; addi r0, r0, 4; addi r13, sp, -64 } /* store(WORD_3) */ #else /* Back up the r9 to a cache line we are already storing to diff --git a/arch/tile/lib/memset_32.c b/arch/tile/lib/memset_32.c index bfde5d864df1..d014c1fbcbc2 100644 --- a/arch/tile/lib/memset_32.c +++ b/arch/tile/lib/memset_32.c @@ -141,7 +141,6 @@ void *memset(void *s, int c, size_t n) */ __insn_prefetch(&out32[ahead32]); -#if 1 #if CACHE_LINE_SIZE_IN_WORDS % 4 != 0 #error "Unhandled CACHE_LINE_SIZE_IN_WORDS" #endif @@ -157,30 +156,6 @@ void *memset(void *s, int c, size_t n) *out32++ = v32; *out32++ = v32; } -#else - /* Unfortunately, due to a code generator flaw this - * allocates a separate register for each of these - * stores, which requires a large number of spills, - * which makes this procedure enormously bigger - * (something like 70%) - */ - *out32++ = v32; - *out32++ = v32; - *out32++ = v32; - *out32++ = v32; - *out32++ = v32; - *out32++ = v32; - *out32++ = v32; - *out32++ = v32; - *out32++ = v32; - *out32++ = v32; - *out32++ = v32; - *out32++ = v32; - *out32++ = v32; - *out32++ = v32; - *out32++ = v32; - n32 -= 16; -#endif /* To save compiled code size, reuse this loop even * when we run out of prefetching to do by dropping diff --git a/arch/tile/mm/fault.c b/arch/tile/mm/fault.c index 0011f06b4fe2..704f3e8a4385 100644 --- a/arch/tile/mm/fault.c +++ b/arch/tile/mm/fault.c @@ -567,6 +567,14 @@ do_sigbus: * since that might indicate we have not yet squirreled the SPR * contents away and can thus safely take a recursive interrupt. * Accordingly, the hypervisor passes us the PC via SYSTEM_SAVE_1_2. + * + * Note that this routine is called before homecache_tlb_defer_enter(), + * which means that we can properly unlock any atomics that might + * be used there (good), but also means we must be very sensitive + * to not touch any data structures that might be located in memory + * that could migrate, as we could be entering the kernel on a dataplane + * cpu that has been deferring kernel TLB updates. This means, for + * example, that we can't migrate init_mm or its pgd. */ struct intvec_state do_page_fault_ics(struct pt_regs *regs, int fault_num, unsigned long address, diff --git a/arch/tile/mm/highmem.c b/arch/tile/mm/highmem.c index ff1cdff5114d..12ab137e7d4f 100644 --- a/arch/tile/mm/highmem.c +++ b/arch/tile/mm/highmem.c @@ -276,7 +276,7 @@ void *kmap_atomic(struct page *page, enum km_type type) } EXPORT_SYMBOL(kmap_atomic); -void kunmap_atomic(void *kvaddr, enum km_type type) +void kunmap_atomic_notypecheck(void *kvaddr, enum km_type type) { unsigned long vaddr = (unsigned long) kvaddr & PAGE_MASK; enum fixed_addresses idx = type + KM_TYPE_NR*smp_processor_id(); @@ -300,7 +300,7 @@ void kunmap_atomic(void *kvaddr, enum km_type type) arch_flush_lazy_mmu_mode(); pagefault_enable(); } -EXPORT_SYMBOL(kunmap_atomic); +EXPORT_SYMBOL(kunmap_atomic_notypecheck); /* * This API is supposed to allow us to map memory without a "struct page". diff --git a/arch/tile/mm/homecache.c b/arch/tile/mm/homecache.c index 97c478e7be27..fb3b4a55cec4 100644 --- a/arch/tile/mm/homecache.c +++ b/arch/tile/mm/homecache.c @@ -29,6 +29,7 @@ #include #include #include +#include #include #include @@ -348,6 +349,7 @@ pte_t pte_set_home(pte_t pte, int home) return pte; } +EXPORT_SYMBOL(pte_set_home); /* * The routines in this section are the "static" versions of the normal @@ -403,6 +405,7 @@ struct page *homecache_alloc_pages(gfp_t gfp_mask, homecache_change_page_home(page, order, home); return page; } +EXPORT_SYMBOL(homecache_alloc_pages); struct page *homecache_alloc_pages_node(int nid, gfp_t gfp_mask, unsigned int order, int home) diff --git a/arch/tile/mm/pgtable.c b/arch/tile/mm/pgtable.c index 28c23140c947..335c24621c41 100644 --- a/arch/tile/mm/pgtable.c +++ b/arch/tile/mm/pgtable.c @@ -17,7 +17,6 @@ #include #include #include -#include #include #include #include diff --git a/include/asm-generic/unistd.h b/include/asm-generic/unistd.h index e1898090f22c..b969770196c2 100644 --- a/include/asm-generic/unistd.h +++ b/include/asm-generic/unistd.h @@ -642,9 +642,13 @@ __SYSCALL(__NR_recvmmsg, sys_recvmmsg) __SYSCALL(__NR_wait4, sys_wait4) #define __NR_prlimit64 261 __SYSCALL(__NR_prlimit64, sys_prlimit64) +#define __NR_fanotify_init 262 +__SYSCALL(__NR_fanotify_init, sys_fanotify_init) +#define __NR_fanotify_mark 263 +__SYSCALL(__NR_fanotify_mark, sys_fanotify_mark) #undef __NR_syscalls -#define __NR_syscalls 262 +#define __NR_syscalls 264 /* * All syscalls below here should go away really,