Merge branch 'master' of git://git.kernel.org/pub/scm/linux/kernel/git/paulus/powerpc

* 'master' of git://git.kernel.org/pub/scm/linux/kernel/git/paulus/powerpc: [POWERPC] Further fixes for the removal of 4level-fixup hack from ppc32 [POWERPC] EEH: log all PCI-X and PCI-E AER registers [POWERPC] EEH: capture and log pci state on error [POWERPC] EEH: Split up long error msg [POWERPC] EEH: log error only after driver notification. [POWERPC] fsl_soc: Make mac_addr const in fs_enet_of_init(). [POWERPC] Don't use SLAB/SLUB for PTE pages [POWERPC] Spufs support for 64K LS mappings on 4K kernels [POWERPC] Add ability to 4K kernel to hash in 64K pages [POWERPC] Introduce address space "slices" [POWERPC] Small fixes & cleanups in segment page size demotion [POWERPC] iSeries: Make HVC_ISERIES the default [POWERPC] iSeries: suppress build warning in lparmap.c [POWERPC] Mark pages that don't exist as nosave [POWERPC] swsusp: Introduce register_nosave_region_late
2007-05-09 12:56:01 -07:00 · 2007-05-09 12:56:01 -07:00 · aabded9c3a
parent 9a9136e270 f1a1eb299a
commit aabded9c3a
38 changed files with 1321 additions and 787 deletions
--- a/arch/powerpc/Kconfig
+++ b/arch/powerpc/Kconfig
@ -120,19 +120,6 @@ config GENERIC_BUG
 config SYS_SUPPORTS_APM_EMULATION
 	bool

-#
-# Powerpc uses the slab allocator to manage its ptes and the
-# page structs of ptes are used for splitting the page table
-# lock for configurations supporting more than SPLIT_PTLOCK_CPUS.
-#
-# In that special configuration the page structs of slabs are modified.
-# This setting disables the selection of SLUB as a slab allocator.
-#
-config ARCH_USES_SLAB_PAGE_STRUCT
-	bool
-	default y
-	depends on SPLIT_PTLOCK_CPUS <= NR_CPUS
-
 config DEFAULT_UIMAGE
 	bool
 	help
@ -352,6 +339,11 @@ config PPC_STD_MMU_32
 	def_bool y
 	depends on PPC_STD_MMU && PPC32

+config PPC_MM_SLICES
+	bool
+	default y if HUGETLB_PAGE
+	default n
+
 config VIRT_CPU_ACCOUNTING
 	bool "Deterministic task and CPU time accounting"
 	depends on PPC64
@ -541,9 +533,15 @@ config NODES_SPAN_OTHER_NODES
 	def_bool y
 	depends on NEED_MULTIPLE_NODES

+config PPC_HAS_HASH_64K
+	bool
+	depends on PPC64
+	default n
+
 config PPC_64K_PAGES
 	bool "64k page size"
 	depends on PPC64
+	select PPC_HAS_HASH_64K
 	help
 	  This option changes the kernel logical page size to 64k. On machines
 	  without processor support for 64k pages, the kernel will simulate
--- a/arch/powerpc/kernel/asm-offsets.c
+++ b/arch/powerpc/kernel/asm-offsets.c
@ -122,12 +122,18 @@ int main(void)
 	DEFINE(PACASLBCACHE, offsetof(struct paca_struct, slb_cache));
 	DEFINE(PACASLBCACHEPTR, offsetof(struct paca_struct, slb_cache_ptr));
 	DEFINE(PACACONTEXTID, offsetof(struct paca_struct, context.id));
-	DEFINE(PACACONTEXTSLLP, offsetof(struct paca_struct, context.sllp));
 	DEFINE(PACAVMALLOCSLLP, offsetof(struct paca_struct, vmalloc_sllp));
-#ifdef CONFIG_HUGETLB_PAGE
-	DEFINE(PACALOWHTLBAREAS, offsetof(struct paca_struct, context.low_htlb_areas));
-	DEFINE(PACAHIGHHTLBAREAS, offsetof(struct paca_struct, context.high_htlb_areas));
-#endif /* CONFIG_HUGETLB_PAGE */
+#ifdef CONFIG_PPC_MM_SLICES
+	DEFINE(PACALOWSLICESPSIZE, offsetof(struct paca_struct,
+					    context.low_slices_psize));
+	DEFINE(PACAHIGHSLICEPSIZE, offsetof(struct paca_struct,
+					    context.high_slices_psize));
+	DEFINE(MMUPSIZEDEFSIZE, sizeof(struct mmu_psize_def));
+	DEFINE(MMUPSIZESLLP, offsetof(struct mmu_psize_def, sllp));
+#else
+	DEFINE(PACACONTEXTSLLP, offsetof(struct paca_struct, context.sllp));
+
+#endif /* CONFIG_PPC_MM_SLICES */
 	DEFINE(PACA_EXGEN, offsetof(struct paca_struct, exgen));
 	DEFINE(PACA_EXMC, offsetof(struct paca_struct, exmc));
 	DEFINE(PACA_EXSLB, offsetof(struct paca_struct, exslb));
--- a/arch/powerpc/kernel/lparmap.c
+++ b/arch/powerpc/kernel/lparmap.c
@ -10,7 +10,8 @@
 #include <asm/pgtable.h>
 #include <asm/iseries/lpar_map.h>

-const struct LparMap __attribute__((__section__(".text"))) xLparMap = {
+/* The # is to stop gcc trying to make .text nonexecutable */
+const struct LparMap __attribute__((__section__(".text #"))) xLparMap = {
 	.xNumberEsids = HvEsidsToMap,
 	.xNumberRanges = HvRangesToMap,
 	.xSegmentTableOffs = STAB0_PAGE,
--- a/arch/powerpc/mm/Makefile
+++ b/arch/powerpc/mm/Makefile
@ -18,4 +18,5 @@ obj-$(CONFIG_40x)		+= 4xx_mmu.o
 obj-$(CONFIG_44x)		+= 44x_mmu.o
 obj-$(CONFIG_FSL_BOOKE)		+= fsl_booke_mmu.o
 obj-$(CONFIG_NEED_MULTIPLE_NODES) += numa.o
+obj-$(CONFIG_PPC_MM_SLICES)	+= slice.o
 obj-$(CONFIG_HUGETLB_PAGE)	+= hugetlbpage.o
--- a/arch/powerpc/mm/hash_low_64.S
+++ b/arch/powerpc/mm/hash_low_64.S
@ -615,6 +615,9 @@ htab_pte_insert_failure:
 	li	r3,-1
 	b	htab_bail

+#endif /* CONFIG_PPC_64K_PAGES */
+
+#ifdef CONFIG_PPC_HAS_HASH_64K

 /*****************************************************************************
 *                                                                           *
@ -870,7 +873,7 @@ ht64_pte_insert_failure:
 	b	ht64_bail


-#endif /* CONFIG_PPC_64K_PAGES */
+#endif /* CONFIG_PPC_HAS_HASH_64K */


 /*****************************************************************************
--- a/arch/powerpc/mm/hash_utils_64.c
+++ b/arch/powerpc/mm/hash_utils_64.c
@ -51,6 +51,7 @@
 #include <asm/cputable.h>
 #include <asm/abs_addr.h>
 #include <asm/sections.h>
+#include <asm/spu.h>

 #ifdef DEBUG
 #define DBG(fmt...) udbg_printf(fmt)
@ -419,7 +420,7 @@ static void __init htab_finish_init(void)
 	extern unsigned int *htab_call_hpte_remove;
 	extern unsigned int *htab_call_hpte_updatepp;

-#ifdef CONFIG_PPC_64K_PAGES
+#ifdef CONFIG_PPC_HAS_HASH_64K
 	extern unsigned int *ht64_call_hpte_insert1;
 	extern unsigned int *ht64_call_hpte_insert2;
 	extern unsigned int *ht64_call_hpte_remove;
@ -596,22 +597,23 @@ unsigned int hash_page_do_lazy_icache(unsigned int pp, pte_t pte, int trap)
 * Demote a segment to using 4k pages.
 * For now this makes the whole process use 4k pages.
 */
-void demote_segment_4k(struct mm_struct *mm, unsigned long addr)
-{
 #ifdef CONFIG_PPC_64K_PAGES
+static void demote_segment_4k(struct mm_struct *mm, unsigned long addr)
+{
 	if (mm->context.user_psize == MMU_PAGE_4K)
 		return;
+#ifdef CONFIG_PPC_MM_SLICES
+	slice_set_user_psize(mm, MMU_PAGE_4K);
+#else /* CONFIG_PPC_MM_SLICES */
 	mm->context.user_psize = MMU_PAGE_4K;
 	mm->context.sllp = SLB_VSID_USER | mmu_psize_defs[MMU_PAGE_4K].sllp;
-	get_paca()->context = mm->context;
-	slb_flush_and_rebolt();
+#endif /* CONFIG_PPC_MM_SLICES */
+
 #ifdef CONFIG_SPE_BASE
 	spu_flush_all_slbs(mm);
 #endif
-#endif
 }
-
-EXPORT_SYMBOL_GPL(demote_segment_4k);
+#endif /* CONFIG_PPC_64K_PAGES */

 /* Result code is:
 *  0 - handled
@ -646,7 +648,11 @@ int hash_page(unsigned long ea, unsigned long access, unsigned long trap)
 			return 1;
 		}
 		vsid = get_vsid(mm->context.id, ea);
+#ifdef CONFIG_PPC_MM_SLICES
+		psize = get_slice_psize(mm, ea);
+#else
 		psize = mm->context.user_psize;
+#endif
 		break;
 	case VMALLOC_REGION_ID:
 		mm = &init_mm;
@ -674,11 +680,22 @@ int hash_page(unsigned long ea, unsigned long access, unsigned long trap)
 	if (user_region && cpus_equal(mm->cpu_vm_mask, tmp))
 		local = 1;

+#ifdef CONFIG_HUGETLB_PAGE
 	/* Handle hugepage regions */
-	if (unlikely(in_hugepage_area(mm->context, ea))) {
+	if (HPAGE_SHIFT && psize == mmu_huge_psize) {
 		DBG_LOW(" -> huge page !\n");
 		return hash_huge_page(mm, access, ea, vsid, local, trap);
 	}
+#endif /* CONFIG_HUGETLB_PAGE */
+
+#ifndef CONFIG_PPC_64K_PAGES
+	/* If we use 4K pages and our psize is not 4K, then we are hitting
+	 * a special driver mapping, we need to align the address before
+	 * we fetch the PTE
+	 */
+	if (psize != MMU_PAGE_4K)
+		ea &= ~((1ul << mmu_psize_defs[psize].shift) - 1);
+#endif /* CONFIG_PPC_64K_PAGES */

 	/* Get PTE and page size from page tables */
 	ptep = find_linux_pte(pgdir, ea);
@ -702,54 +719,56 @@ int hash_page(unsigned long ea, unsigned long access, unsigned long trap)
 	}

 	/* Do actual hashing */
-#ifndef CONFIG_PPC_64K_PAGES
-	rc = __hash_page_4K(ea, access, vsid, ptep, trap, local);
-#else
+#ifdef CONFIG_PPC_64K_PAGES
 	/* If _PAGE_4K_PFN is set, make sure this is a 4k segment */
 	if (pte_val(*ptep) & _PAGE_4K_PFN) {
 		demote_segment_4k(mm, ea);
 		psize = MMU_PAGE_4K;
 	}

-	if (mmu_ci_restrictions) {
-		/* If this PTE is non-cacheable, switch to 4k */
-		if (psize == MMU_PAGE_64K &&
-		    (pte_val(*ptep) & _PAGE_NO_CACHE)) {
-			if (user_region) {
-				demote_segment_4k(mm, ea);
-				psize = MMU_PAGE_4K;
-			} else if (ea < VMALLOC_END) {
-				/*
-				 * some driver did a non-cacheable mapping
-				 * in vmalloc space, so switch vmalloc
-				 * to 4k pages
-				 */
-				printk(KERN_ALERT "Reducing vmalloc segment "
-				       "to 4kB pages because of "
-				       "non-cacheable mapping\n");
-				psize = mmu_vmalloc_psize = MMU_PAGE_4K;
-			}
+	/* If this PTE is non-cacheable and we have restrictions on
+	 * using non cacheable large pages, then we switch to 4k
+	 */
+	if (mmu_ci_restrictions && psize == MMU_PAGE_64K &&
+	    (pte_val(*ptep) & _PAGE_NO_CACHE)) {
+		if (user_region) {
+			demote_segment_4k(mm, ea);
+			psize = MMU_PAGE_4K;
+		} else if (ea < VMALLOC_END) {
+			/*
+			 * some driver did a non-cacheable mapping
+			 * in vmalloc space, so switch vmalloc
+			 * to 4k pages
+			 */
+			printk(KERN_ALERT "Reducing vmalloc segment "
+			       "to 4kB pages because of "
+			       "non-cacheable mapping\n");
+			psize = mmu_vmalloc_psize = MMU_PAGE_4K;
 #ifdef CONFIG_SPE_BASE
 			spu_flush_all_slbs(mm);
 #endif
 		}
-		if (user_region) {
-			if (psize != get_paca()->context.user_psize) {
-				get_paca()->context = mm->context;
-				slb_flush_and_rebolt();
-			}
-		} else if (get_paca()->vmalloc_sllp !=
-			   mmu_psize_defs[mmu_vmalloc_psize].sllp) {
-			get_paca()->vmalloc_sllp =
-				mmu_psize_defs[mmu_vmalloc_psize].sllp;
+	}
+	if (user_region) {
+		if (psize != get_paca()->context.user_psize) {
+			get_paca()->context.user_psize =
+				mm->context.user_psize;
 			slb_flush_and_rebolt();
 		}
+	} else if (get_paca()->vmalloc_sllp !=
+		   mmu_psize_defs[mmu_vmalloc_psize].sllp) {
+		get_paca()->vmalloc_sllp =
+			mmu_psize_defs[mmu_vmalloc_psize].sllp;
+		slb_flush_and_rebolt();
 	}
+#endif /* CONFIG_PPC_64K_PAGES */
+
+#ifdef CONFIG_PPC_HAS_HASH_64K
 	if (psize == MMU_PAGE_64K)
 		rc = __hash_page_64K(ea, access, vsid, ptep, trap, local);
 	else
+#endif /* CONFIG_PPC_HAS_HASH_64K */
 		rc = __hash_page_4K(ea, access, vsid, ptep, trap, local);
-#endif /* CONFIG_PPC_64K_PAGES */

 #ifndef CONFIG_PPC_64K_PAGES
 	DBG_LOW(" o-pte: %016lx\n", pte_val(*ptep));
@ -772,42 +791,55 @@ void hash_preload(struct mm_struct *mm, unsigned long ea,
 	unsigned long flags;
 	int local = 0;

-	/* We don't want huge pages prefaulted for now
-	 */
-	if (unlikely(in_hugepage_area(mm->context, ea)))
+	BUG_ON(REGION_ID(ea) != USER_REGION_ID);
+
+#ifdef CONFIG_PPC_MM_SLICES
+	/* We only prefault standard pages for now */
+	if (unlikely(get_slice_psize(mm, ea) != mm->context.user_psize));
 		return;
+#endif

 	DBG_LOW("hash_preload(mm=%p, mm->pgdir=%p, ea=%016lx, access=%lx,"
 		" trap=%lx\n", mm, mm->pgd, ea, access, trap);

-	/* Get PTE, VSID, access mask */
+	/* Get Linux PTE if available */
 	pgdir = mm->pgd;
 	if (pgdir == NULL)
 		return;
 	ptep = find_linux_pte(pgdir, ea);
 	if (!ptep)
 		return;
+
+#ifdef CONFIG_PPC_64K_PAGES
+	/* If either _PAGE_4K_PFN or _PAGE_NO_CACHE is set (and we are on
+	 * a 64K kernel), then we don't preload, hash_page() will take
+	 * care of it once we actually try to access the page.
+	 * That way we don't have to duplicate all of the logic for segment
+	 * page size demotion here
+	 */
+	if (pte_val(*ptep) & (_PAGE_4K_PFN | _PAGE_NO_CACHE))
+		return;
+#endif /* CONFIG_PPC_64K_PAGES */
+
+	/* Get VSID */
 	vsid = get_vsid(mm->context.id, ea);

-	/* Hash it in */
+	/* Hash doesn't like irqs */
 	local_irq_save(flags);
+
+	/* Is that local to this CPU ? */
 	mask = cpumask_of_cpu(smp_processor_id());
 	if (cpus_equal(mm->cpu_vm_mask, mask))
 		local = 1;
-#ifndef CONFIG_PPC_64K_PAGES
-	__hash_page_4K(ea, access, vsid, ptep, trap, local);
-#else
-	if (mmu_ci_restrictions) {
-		/* If this PTE is non-cacheable, switch to 4k */
-		if (mm->context.user_psize == MMU_PAGE_64K &&
-		    (pte_val(*ptep) & _PAGE_NO_CACHE))
-			demote_segment_4k(mm, ea);
-	}
+
+	/* Hash it in */
+#ifdef CONFIG_PPC_HAS_HASH_64K
 	if (mm->context.user_psize == MMU_PAGE_64K)
 		__hash_page_64K(ea, access, vsid, ptep, trap, local);
 	else
-		__hash_page_4K(ea, access, vsid, ptep, trap, local);
 #endif /* CONFIG_PPC_64K_PAGES */
+		__hash_page_4K(ea, access, vsid, ptep, trap, local);
+
 	local_irq_restore(flags);
 }

--- a/arch/powerpc/mm/hugetlbpage.c
+++ b/arch/powerpc/mm/hugetlbpage.c
@ -91,7 +91,7 @@ pte_t *huge_pte_offset(struct mm_struct *mm, unsigned long addr)
 	pgd_t *pg;
 	pud_t *pu;

-	BUG_ON(! in_hugepage_area(mm->context, addr));
+	BUG_ON(get_slice_psize(mm, addr) != mmu_huge_psize);

 	addr &= HPAGE_MASK;

@ -119,7 +119,7 @@ pte_t *huge_pte_alloc(struct mm_struct *mm, unsigned long addr)
 	pud_t *pu;
 	hugepd_t *hpdp = NULL;

-	BUG_ON(! in_hugepage_area(mm->context, addr));
+	BUG_ON(get_slice_psize(mm, addr) != mmu_huge_psize);

 	addr &= HPAGE_MASK;

@ -302,7 +302,7 @@ void hugetlb_free_pgd_range(struct mmu_gather **tlb,
 	start = addr;
 	pgd = pgd_offset((*tlb)->mm, addr);
 	do {
-		BUG_ON(! in_hugepage_area((*tlb)->mm->context, addr));
+		BUG_ON(get_slice_psize((*tlb)->mm, addr) != mmu_huge_psize);
 		next = pgd_addr_end(addr, end);
 		if (pgd_none_or_clear_bad(pgd))
 			continue;
@ -331,203 +331,13 @@ pte_t huge_ptep_get_and_clear(struct mm_struct *mm, unsigned long addr,
 	return __pte(old);
 }

-struct slb_flush_info {
-	struct mm_struct *mm;
-	u16 newareas;
-};
-
-static void flush_low_segments(void *parm)
-{
-	struct slb_flush_info *fi = parm;
-	unsigned long i;
-
-	BUILD_BUG_ON((sizeof(fi->newareas)*8) != NUM_LOW_AREAS);
-
-	if (current->active_mm != fi->mm)
-		return;
-
-	/* Only need to do anything if this CPU is working in the same
-	 * mm as the one which has changed */
-
-	/* update the paca copy of the context struct */
-	get_paca()->context = current->active_mm->context;
-
-	asm volatile("isync" : : : "memory");
-	for (i = 0; i < NUM_LOW_AREAS; i++) {
-		if (! (fi->newareas & (1U << i)))
-			continue;
-		asm volatile("slbie %0"
-			     : : "r" ((i << SID_SHIFT) | SLBIE_C));
-	}
-	asm volatile("isync" : : : "memory");
-}
-
-static void flush_high_segments(void *parm)
-{
-	struct slb_flush_info *fi = parm;
-	unsigned long i, j;
-
-
-	BUILD_BUG_ON((sizeof(fi->newareas)*8) != NUM_HIGH_AREAS);
-
-	if (current->active_mm != fi->mm)
-		return;
-
-	/* Only need to do anything if this CPU is working in the same
-	 * mm as the one which has changed */
-
-	/* update the paca copy of the context struct */
-	get_paca()->context = current->active_mm->context;
-
-	asm volatile("isync" : : : "memory");
-	for (i = 0; i < NUM_HIGH_AREAS; i++) {
-		if (! (fi->newareas & (1U << i)))
-			continue;
-		for (j = 0; j < (1UL << (HTLB_AREA_SHIFT-SID_SHIFT)); j++)
-			asm volatile("slbie %0"
-				     :: "r" (((i << HTLB_AREA_SHIFT)
-					      + (j << SID_SHIFT)) | SLBIE_C));
-	}
-	asm volatile("isync" : : : "memory");
-}
-
-static int prepare_low_area_for_htlb(struct mm_struct *mm, unsigned long area)
-{
-	unsigned long start = area << SID_SHIFT;
-	unsigned long end = (area+1) << SID_SHIFT;
-	struct vm_area_struct *vma;
-
-	BUG_ON(area >= NUM_LOW_AREAS);
-
-	/* Check no VMAs are in the region */
-	vma = find_vma(mm, start);
-	if (vma && (vma->vm_start < end))
-		return -EBUSY;
-
-	return 0;
-}
-
-static int prepare_high_area_for_htlb(struct mm_struct *mm, unsigned long area)
-{
-	unsigned long start = area << HTLB_AREA_SHIFT;
-	unsigned long end = (area+1) << HTLB_AREA_SHIFT;
-	struct vm_area_struct *vma;
-
-	BUG_ON(area >= NUM_HIGH_AREAS);
-
-	/* Hack, so that each addresses is controlled by exactly one
-	 * of the high or low area bitmaps, the first high area starts
-	 * at 4GB, not 0 */
-	if (start == 0)
-		start = 0x100000000UL;
-
-	/* Check no VMAs are in the region */
-	vma = find_vma(mm, start);
-	if (vma && (vma->vm_start < end))
-		return -EBUSY;
-
-	return 0;
-}
-
-static int open_low_hpage_areas(struct mm_struct *mm, u16 newareas)
-{
-	unsigned long i;
-	struct slb_flush_info fi;
-
-	BUILD_BUG_ON((sizeof(newareas)*8) != NUM_LOW_AREAS);
-	BUILD_BUG_ON((sizeof(mm->context.low_htlb_areas)*8) != NUM_LOW_AREAS);
-
-	newareas &= ~(mm->context.low_htlb_areas);
-	if (! newareas)
-		return 0; /* The segments we want are already open */
-
-	for (i = 0; i < NUM_LOW_AREAS; i++)
-		if ((1 << i) & newareas)
-			if (prepare_low_area_for_htlb(mm, i) != 0)
-				return -EBUSY;
-
-	mm->context.low_htlb_areas |= newareas;
-
-	/* the context change must make it to memory before the flush,
-	 * so that further SLB misses do the right thing. */
-	mb();
-
-	fi.mm = mm;
-	fi.newareas = newareas;
-	on_each_cpu(flush_low_segments, &fi, 0, 1);
-
-	return 0;
-}
-
-static int open_high_hpage_areas(struct mm_struct *mm, u16 newareas)
-{
-	struct slb_flush_info fi;
-	unsigned long i;
-
-	BUILD_BUG_ON((sizeof(newareas)*8) != NUM_HIGH_AREAS);
-	BUILD_BUG_ON((sizeof(mm->context.high_htlb_areas)*8)
-		     != NUM_HIGH_AREAS);
-
-	newareas &= ~(mm->context.high_htlb_areas);
-	if (! newareas)
-		return 0; /* The areas we want are already open */
-
-	for (i = 0; i < NUM_HIGH_AREAS; i++)
-		if ((1 << i) & newareas)
-			if (prepare_high_area_for_htlb(mm, i) != 0)
-				return -EBUSY;
-
-	mm->context.high_htlb_areas |= newareas;
-
-	/* the context change must make it to memory before the flush,
-	 * so that further SLB misses do the right thing. */
-	mb();
-
-	fi.mm = mm;
-	fi.newareas = newareas;
-	on_each_cpu(flush_high_segments, &fi, 0, 1);
-
-	return 0;
-}
-
-int prepare_hugepage_range(unsigned long addr, unsigned long len, pgoff_t pgoff)
-{
-	int err = 0;
-
-	if (pgoff & (~HPAGE_MASK >> PAGE_SHIFT))
-		return -EINVAL;
-	if (len & ~HPAGE_MASK)
-		return -EINVAL;
-	if (addr & ~HPAGE_MASK)
-		return -EINVAL;
-
-	if (addr < 0x100000000UL)
-		err = open_low_hpage_areas(current->mm,
-					  LOW_ESID_MASK(addr, len));
-	if ((addr + len) > 0x100000000UL)
-		err = open_high_hpage_areas(current->mm,
-					    HTLB_AREA_MASK(addr, len));
-#ifdef CONFIG_SPE_BASE
-	spu_flush_all_slbs(current->mm);
-#endif
-	if (err) {
-		printk(KERN_DEBUG "prepare_hugepage_range(%lx, %lx)"
-		       " failed (lowmask: 0x%04hx, highmask: 0x%04hx)\n",
-		       addr, len,
-		       LOW_ESID_MASK(addr, len), HTLB_AREA_MASK(addr, len));
-		return err;
-	}
-
-	return 0;
-}
-
 struct page *
 follow_huge_addr(struct mm_struct *mm, unsigned long address, int write)
 {
 	pte_t *ptep;
 	struct page *page;

-	if (! in_hugepage_area(mm->context, address))
+	if (get_slice_psize(mm, address) != mmu_huge_psize)
 		return ERR_PTR(-EINVAL);

 	ptep = huge_pte_offset(mm, address);
@ -551,359 +361,13 @@ follow_huge_pmd(struct mm_struct *mm, unsigned long address,
 	return NULL;
 }

-/* Because we have an exclusive hugepage region which lies within the
- * normal user address space, we have to take special measures to make
- * non-huge mmap()s evade the hugepage reserved regions. */
-unsigned long arch_get_unmapped_area(struct file *filp, unsigned long addr,
-				     unsigned long len, unsigned long pgoff,
-				     unsigned long flags)
-{
-	struct mm_struct *mm = current->mm;
-	struct vm_area_struct *vma;
-	unsigned long start_addr;
-
-	if (len > TASK_SIZE)
-		return -ENOMEM;
-
-	/* handle fixed mapping: prevent overlap with huge pages */
-	if (flags & MAP_FIXED) {
-		if (is_hugepage_only_range(mm, addr, len))
-			return -EINVAL;
-		return addr;
-	}
-
-	if (addr) {
-		addr = PAGE_ALIGN(addr);
-		vma = find_vma(mm, addr);
-		if (((TASK_SIZE - len) >= addr)
-		    && (!vma || (addr+len) <= vma->vm_start)
-		    && !is_hugepage_only_range(mm, addr,len))
-			return addr;
-	}
-	if (len > mm->cached_hole_size) {
-	        start_addr = addr = mm->free_area_cache;
-	} else {
-	        start_addr = addr = TASK_UNMAPPED_BASE;
-	        mm->cached_hole_size = 0;
-	}
-
-full_search:
-	vma = find_vma(mm, addr);
-	while (TASK_SIZE - len >= addr) {
-		BUG_ON(vma && (addr >= vma->vm_end));
-
-		if (touches_hugepage_low_range(mm, addr, len)) {
-			addr = ALIGN(addr+1, 1<<SID_SHIFT);
-			vma = find_vma(mm, addr);
-			continue;
-		}
-		if (touches_hugepage_high_range(mm, addr, len)) {
-			addr = ALIGN(addr+1, 1UL<<HTLB_AREA_SHIFT);
-			vma = find_vma(mm, addr);
-			continue;
-		}
-		if (!vma || addr + len <= vma->vm_start) {
-			/*
-			 * Remember the place where we stopped the search:
-			 */
-			mm->free_area_cache = addr + len;
-			return addr;
-		}
-		if (addr + mm->cached_hole_size < vma->vm_start)
-		        mm->cached_hole_size = vma->vm_start - addr;
-		addr = vma->vm_end;
-		vma = vma->vm_next;
-	}
-
-	/* Make sure we didn't miss any holes */
-	if (start_addr != TASK_UNMAPPED_BASE) {
-		start_addr = addr = TASK_UNMAPPED_BASE;
-		mm->cached_hole_size = 0;
-		goto full_search;
-	}
-	return -ENOMEM;
-}
-
-/*
- * This mmap-allocator allocates new areas top-down from below the
- * stack's low limit (the base):
- *
- * Because we have an exclusive hugepage region which lies within the
- * normal user address space, we have to take special measures to make
- * non-huge mmap()s evade the hugepage reserved regions.
- */
-unsigned long
-arch_get_unmapped_area_topdown(struct file *filp, const unsigned long addr0,
-			  const unsigned long len, const unsigned long pgoff,
-			  const unsigned long flags)
-{
-	struct vm_area_struct *vma, *prev_vma;
-	struct mm_struct *mm = current->mm;
-	unsigned long base = mm->mmap_base, addr = addr0;
-	unsigned long largest_hole = mm->cached_hole_size;
-	int first_time = 1;
-
-	/* requested length too big for entire address space */
-	if (len > TASK_SIZE)
-		return -ENOMEM;
-
-	/* handle fixed mapping: prevent overlap with huge pages */
-	if (flags & MAP_FIXED) {
-		if (is_hugepage_only_range(mm, addr, len))
-			return -EINVAL;
-		return addr;
-	}
-
-	/* dont allow allocations above current base */
-	if (mm->free_area_cache > base)
-		mm->free_area_cache = base;
-
-	/* requesting a specific address */
-	if (addr) {
-		addr = PAGE_ALIGN(addr);
-		vma = find_vma(mm, addr);
-		if (TASK_SIZE - len >= addr &&
-				(!vma || addr + len <= vma->vm_start)
-				&& !is_hugepage_only_range(mm, addr,len))
-			return addr;
-	}
-
-	if (len <= largest_hole) {
-	        largest_hole = 0;
-		mm->free_area_cache = base;
-	}
-try_again:
-	/* make sure it can fit in the remaining address space */
-	if (mm->free_area_cache < len)
-		goto fail;
-
-	/* either no address requested or cant fit in requested address hole */
-	addr = (mm->free_area_cache - len) & PAGE_MASK;
-	do {
-hugepage_recheck:
-		if (touches_hugepage_low_range(mm, addr, len)) {
-			addr = (addr & ((~0) << SID_SHIFT)) - len;
-			goto hugepage_recheck;
-		} else if (touches_hugepage_high_range(mm, addr, len)) {
-			addr = (addr & ((~0UL) << HTLB_AREA_SHIFT)) - len;
-			goto hugepage_recheck;
-		}
-
-		/*
-		 * Lookup failure means no vma is above this address,
-		 * i.e. return with success:
-		 */
- 	 	if (!(vma = find_vma_prev(mm, addr, &prev_vma)))
-			return addr;
-
-		/*
-		 * new region fits between prev_vma->vm_end and
-		 * vma->vm_start, use it:
-		 */
-		if (addr+len <= vma->vm_start &&
-		          (!prev_vma || (addr >= prev_vma->vm_end))) {
-			/* remember the address as a hint for next time */
-		        mm->cached_hole_size = largest_hole;
-		        return (mm->free_area_cache = addr);
-		} else {
-			/* pull free_area_cache down to the first hole */
-		        if (mm->free_area_cache == vma->vm_end) {
-				mm->free_area_cache = vma->vm_start;
-				mm->cached_hole_size = largest_hole;
-			}
-		}
-
-		/* remember the largest hole we saw so far */
-		if (addr + largest_hole < vma->vm_start)
-		        largest_hole = vma->vm_start - addr;
-
-		/* try just below the current vma->vm_start */
-		addr = vma->vm_start-len;
-	} while (len <= vma->vm_start);
-
-fail:
-	/*
-	 * if hint left us with no space for the requested
-	 * mapping then try again:
-	 */
-	if (first_time) {
-		mm->free_area_cache = base;
-		largest_hole = 0;
-		first_time = 0;
-		goto try_again;
-	}
-	/*
-	 * A failed mmap() very likely causes application failure,
-	 * so fall back to the bottom-up function here. This scenario
-	 * can happen with large stack limits and large mmap()
-	 * allocations.
-	 */
-	mm->free_area_cache = TASK_UNMAPPED_BASE;
-	mm->cached_hole_size = ~0UL;
-	addr = arch_get_unmapped_area(filp, addr0, len, pgoff, flags);
-	/*
-	 * Restore the topdown base:
-	 */
-	mm->free_area_cache = base;
-	mm->cached_hole_size = ~0UL;
-
-	return addr;
-}
-
-static int htlb_check_hinted_area(unsigned long addr, unsigned long len)
-{
-	struct vm_area_struct *vma;
-
-	vma = find_vma(current->mm, addr);
-	if (TASK_SIZE - len >= addr &&
-	    (!vma || ((addr + len) <= vma->vm_start)))
-		return 0;
-
-	return -ENOMEM;
-}
-
-static unsigned long htlb_get_low_area(unsigned long len, u16 segmask)
-{
-	unsigned long addr = 0;
-	struct vm_area_struct *vma;
-
-	vma = find_vma(current->mm, addr);
-	while (addr + len <= 0x100000000UL) {
-		BUG_ON(vma && (addr >= vma->vm_end)); /* invariant */
-
-		if (! __within_hugepage_low_range(addr, len, segmask)) {
-			addr = ALIGN(addr+1, 1<<SID_SHIFT);
-			vma = find_vma(current->mm, addr);
-			continue;
-		}
-
-		if (!vma || (addr + len) <= vma->vm_start)
-			return addr;
-		addr = ALIGN(vma->vm_end, HPAGE_SIZE);
-		/* Depending on segmask this might not be a confirmed
-		 * hugepage region, so the ALIGN could have skipped
-		 * some VMAs */
-		vma = find_vma(current->mm, addr);
-	}
-
-	return -ENOMEM;
-}
-
-static unsigned long htlb_get_high_area(unsigned long len, u16 areamask)
-{
-	unsigned long addr = 0x100000000UL;
-	struct vm_area_struct *vma;
-
-	vma = find_vma(current->mm, addr);
-	while (addr + len <= TASK_SIZE_USER64) {
-		BUG_ON(vma && (addr >= vma->vm_end)); /* invariant */
-
-		if (! __within_hugepage_high_range(addr, len, areamask)) {
-			addr = ALIGN(addr+1, 1UL<<HTLB_AREA_SHIFT);
-			vma = find_vma(current->mm, addr);
-			continue;
-		}
-
-		if (!vma || (addr + len) <= vma->vm_start)
-			return addr;
-		addr = ALIGN(vma->vm_end, HPAGE_SIZE);
-		/* Depending on segmask this might not be a confirmed
-		 * hugepage region, so the ALIGN could have skipped
-		 * some VMAs */
-		vma = find_vma(current->mm, addr);
-	}
-
-	return -ENOMEM;
-}

 unsigned long hugetlb_get_unmapped_area(struct file *file, unsigned long addr,
 					unsigned long len, unsigned long pgoff,
 					unsigned long flags)
 {
-	int lastshift;
-	u16 areamask, curareas;
-
-	if (HPAGE_SHIFT == 0)
-		return -EINVAL;
-	if (len & ~HPAGE_MASK)
-		return -EINVAL;
-	if (len > TASK_SIZE)
-		return -ENOMEM;
-
-	if (!cpu_has_feature(CPU_FTR_16M_PAGE))
-		return -EINVAL;
-
-	/* Paranoia, caller should have dealt with this */
-	BUG_ON((addr + len)  < addr);
-
-	/* Handle MAP_FIXED */
-	if (flags & MAP_FIXED) {
-		if (prepare_hugepage_range(addr, len, pgoff))
-			return -EINVAL;
-		return addr;
-	}
-
-	if (test_thread_flag(TIF_32BIT)) {
-		curareas = current->mm->context.low_htlb_areas;
-
-		/* First see if we can use the hint address */
-		if (addr && (htlb_check_hinted_area(addr, len) == 0)) {
-			areamask = LOW_ESID_MASK(addr, len);
-			if (open_low_hpage_areas(current->mm, areamask) == 0)
-				return addr;
-		}
-
-		/* Next see if we can map in the existing low areas */
-		addr = htlb_get_low_area(len, curareas);
-		if (addr != -ENOMEM)
-			return addr;
-
-		/* Finally go looking for areas to open */
-		lastshift = 0;
-		for (areamask = LOW_ESID_MASK(0x100000000UL-len, len);
-		     ! lastshift; areamask >>=1) {
-			if (areamask & 1)
-				lastshift = 1;
-
-			addr = htlb_get_low_area(len, curareas | areamask);
-			if ((addr != -ENOMEM)
-			    && open_low_hpage_areas(current->mm, areamask) == 0)
-				return addr;
-		}
-	} else {
-		curareas = current->mm->context.high_htlb_areas;
-
-		/* First see if we can use the hint address */
-		/* We discourage 64-bit processes from doing hugepage
-		 * mappings below 4GB (must use MAP_FIXED) */
-		if ((addr >= 0x100000000UL)
-		    && (htlb_check_hinted_area(addr, len) == 0)) {
-			areamask = HTLB_AREA_MASK(addr, len);
-			if (open_high_hpage_areas(current->mm, areamask) == 0)
-				return addr;
-		}
-
-		/* Next see if we can map in the existing high areas */
-		addr = htlb_get_high_area(len, curareas);
-		if (addr != -ENOMEM)
-			return addr;
-
-		/* Finally go looking for areas to open */
-		lastshift = 0;
-		for (areamask = HTLB_AREA_MASK(TASK_SIZE_USER64-len, len);
-		     ! lastshift; areamask >>=1) {
-			if (areamask & 1)
-				lastshift = 1;
-
-			addr = htlb_get_high_area(len, curareas | areamask);
-			if ((addr != -ENOMEM)
-			    && open_high_hpage_areas(current->mm, areamask) == 0)
-				return addr;
-		}
-	}
-	printk(KERN_DEBUG "hugetlb_get_unmapped_area() unable to open"
-	       " enough areas\n");
-	return -ENOMEM;
+	return slice_get_unmapped_area(addr, len, flags,
+				       mmu_huge_psize, 1, 0);
 }

 /*
--- a/arch/powerpc/mm/init_64.c
+++ b/arch/powerpc/mm/init_64.c
@ -146,21 +146,16 @@ static void zero_ctor(void *addr, struct kmem_cache *cache, unsigned long flags)
 	memset(addr, 0, kmem_cache_size(cache));
 }

-#ifdef CONFIG_PPC_64K_PAGES
-static const unsigned int pgtable_cache_size[3] = {
-	PTE_TABLE_SIZE, PMD_TABLE_SIZE, PGD_TABLE_SIZE
-};
-static const char *pgtable_cache_name[ARRAY_SIZE(pgtable_cache_size)] = {
-	"pte_pmd_cache", "pmd_cache", "pgd_cache",
-};
-#else
 static const unsigned int pgtable_cache_size[2] = {
-	PTE_TABLE_SIZE, PMD_TABLE_SIZE
+	PGD_TABLE_SIZE, PMD_TABLE_SIZE
 };
 static const char *pgtable_cache_name[ARRAY_SIZE(pgtable_cache_size)] = {
-	"pgd_pte_cache", "pud_pmd_cache",
-};
+#ifdef CONFIG_PPC_64K_PAGES
+	"pgd_cache", "pmd_cache",
+#else
+	"pgd_cache", "pud_pmd_cache",
 #endif /* CONFIG_PPC_64K_PAGES */
+};

 #ifdef CONFIG_HUGETLB_PAGE
 /* Hugepages need one extra cache, initialized in hugetlbpage.c.  We
--- a/arch/powerpc/mm/mem.c
+++ b/arch/powerpc/mm/mem.c
@ -31,6 +31,7 @@
 #include <linux/highmem.h>
 #include <linux/initrd.h>
 #include <linux/pagemap.h>
+#include <linux/suspend.h>

 #include <asm/pgalloc.h>
 #include <asm/prom.h>
@ -276,6 +277,28 @@ void __init do_init_bootmem(void)
 	init_bootmem_done = 1;
 }

+/* mark pages that don't exist as nosave */
+static int __init mark_nonram_nosave(void)
+{
+	unsigned long lmb_next_region_start_pfn,
+		      lmb_region_max_pfn;
+	int i;
+
+	for (i = 0; i < lmb.memory.cnt - 1; i++) {
+		lmb_region_max_pfn =
+			(lmb.memory.region[i].base >> PAGE_SHIFT) +
+			(lmb.memory.region[i].size >> PAGE_SHIFT);
+		lmb_next_region_start_pfn =
+			lmb.memory.region[i+1].base >> PAGE_SHIFT;
+
+		if (lmb_region_max_pfn < lmb_next_region_start_pfn)
+			register_nosave_region(lmb_region_max_pfn,
+					       lmb_next_region_start_pfn);
+	}
+
+	return 0;
+}
+
 /*
 * paging_init() sets up the page tables - in fact we've already done this.
 */
@ -307,6 +330,8 @@ void __init paging_init(void)
 	max_zone_pfns[ZONE_DMA] = top_of_ram >> PAGE_SHIFT;
 #endif
 	free_area_init_nodes(max_zone_pfns);
+
+	mark_nonram_nosave();
 }
 #endif /* ! CONFIG_NEED_MULTIPLE_NODES */

--- a/arch/powerpc/mm/mmu_context_64.c
+++ b/arch/powerpc/mm/mmu_context_64.c
@ -28,6 +28,7 @@ int init_new_context(struct task_struct *tsk, struct mm_struct *mm)
 {
 	int index;
 	int err;
+	int new_context = (mm->context.id == 0);

 again:
 	if (!idr_pre_get(&mmu_context_idr, GFP_KERNEL))
@ -50,9 +51,18 @@ again:
 	}

 	mm->context.id = index;
+#ifdef CONFIG_PPC_MM_SLICES
+	/* The old code would re-promote on fork, we don't do that
+	 * when using slices as it could cause problem promoting slices
+	 * that have been forced down to 4K
+	 */
+	if (new_context)
+		slice_set_user_psize(mm, mmu_virtual_psize);
+#else
 	mm->context.user_psize = mmu_virtual_psize;
 	mm->context.sllp = SLB_VSID_USER |
 		mmu_psize_defs[mmu_virtual_psize].sllp;
+#endif

 	return 0;
 }
--- a/arch/powerpc/mm/ppc_mmu_32.c
+++ b/arch/powerpc/mm/ppc_mmu_32.c
@ -185,7 +185,7 @@ void hash_preload(struct mm_struct *mm, unsigned long ea,

 	if (Hash == 0)
 		return;
-	pmd = pmd_offset(pgd_offset(mm, ea), ea);
+	pmd = pmd_offset(pud_offset(pgd_offset(mm, ea), ea), ea);
 	if (!pmd_none(*pmd))
 		add_hash_page(mm->context.id, ea, pmd_val(*pmd));
 }
--- a/arch/powerpc/mm/slb.c
+++ b/arch/powerpc/mm/slb.c
@ -198,12 +198,6 @@ void slb_initialize(void)
 	static int slb_encoding_inited;
 	extern unsigned int *slb_miss_kernel_load_linear;
 	extern unsigned int *slb_miss_kernel_load_io;
-#ifdef CONFIG_HUGETLB_PAGE
-	extern unsigned int *slb_miss_user_load_huge;
-	unsigned long huge_llp;
-
-	huge_llp = mmu_psize_defs[mmu_huge_psize].sllp;
-#endif

 	/* Prepare our SLB miss handler based on our page size */
 	linear_llp = mmu_psize_defs[mmu_linear_psize].sllp;
@ -220,11 +214,6 @@ void slb_initialize(void)

 		DBG("SLB: linear  LLP = %04x\n", linear_llp);
 		DBG("SLB: io      LLP = %04x\n", io_llp);
-#ifdef CONFIG_HUGETLB_PAGE
-		patch_slb_encoding(slb_miss_user_load_huge,
-				   SLB_VSID_USER | huge_llp);
-		DBG("SLB: huge    LLP = %04x\n", huge_llp);
-#endif
 	}

 	get_paca()->stab_rr = SLB_NUM_BOLTED;
--- a/arch/powerpc/mm/slb_low.S
+++ b/arch/powerpc/mm/slb_low.S
@ -82,31 +82,45 @@ _GLOBAL(slb_miss_kernel_load_io)
 	srdi.	r9,r10,USER_ESID_BITS
 	bne-	8f			/* invalid ea bits set */

-	/* Figure out if the segment contains huge pages */
-#ifdef CONFIG_HUGETLB_PAGE
-BEGIN_FTR_SECTION
-	b	1f
-END_FTR_SECTION_IFCLR(CPU_FTR_16M_PAGE)
+
+	/* when using slices, we extract the psize off the slice bitmaps
+	 * and then we need to get the sllp encoding off the mmu_psize_defs
+	 * array.
+	 *
+	 * XXX This is a bit inefficient especially for the normal case,
+	 * so we should try to implement a fast path for the standard page
+	 * size using the old sllp value so we avoid the array. We cannot
+	 * really do dynamic patching unfortunately as processes might flip
+	 * between 4k and 64k standard page size
+	 */
+#ifdef CONFIG_PPC_MM_SLICES
 	cmpldi	r10,16

-	lhz	r9,PACALOWHTLBAREAS(r13)
-	mr	r11,r10
+	/* Get the slice index * 4 in r11 and matching slice size mask in r9 */
+	ld	r9,PACALOWSLICESPSIZE(r13)
+	sldi	r11,r10,2
 	blt	5f
+	ld	r9,PACAHIGHSLICEPSIZE(r13)
+	srdi	r11,r10,(SLICE_HIGH_SHIFT - SLICE_LOW_SHIFT - 2)
+	andi.	r11,r11,0x3c

-	lhz	r9,PACAHIGHHTLBAREAS(r13)
-	srdi	r11,r10,(HTLB_AREA_SHIFT-SID_SHIFT)
-
-5:	srd	r9,r9,r11
-	andi.	r9,r9,1
-	beq	1f
-_GLOBAL(slb_miss_user_load_huge)
-	li	r11,0
-	b	2f
-1:
-#endif /* CONFIG_HUGETLB_PAGE */
+5:	/* Extract the psize and multiply to get an array offset */
+	srd	r9,r9,r11
+	andi.	r9,r9,0xf
+	mulli	r9,r9,MMUPSIZEDEFSIZE

+	/* Now get to the array and obtain the sllp
+	 */
+	ld	r11,PACATOC(r13)
+	ld	r11,mmu_psize_defs@got(r11)
+	add	r11,r11,r9
+	ld	r11,MMUPSIZESLLP(r11)
+	ori	r11,r11,SLB_VSID_USER
+#else
+	/* paca context sllp already contains the SLB_VSID_USER bits */
 	lhz	r11,PACACONTEXTSLLP(r13)
-2:
+#endif /* CONFIG_PPC_MM_SLICES */
+
 	ld	r9,PACACONTEXTID(r13)
 	rldimi	r10,r9,USER_ESID_BITS,0
 	b	slb_finish_load
--- a/arch/powerpc/mm/slice.c
+++ b/arch/powerpc/mm/slice.c
@ -0,0 +1,633 @@
+/*
+ * address space "slices" (meta-segments) support
+ *
+ * Copyright (C) 2007 Benjamin Herrenschmidt, IBM Corporation.
+ *
+ * Based on hugetlb implementation
+ *
+ * Copyright (C) 2003 David Gibson, IBM Corporation.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
+ */
+
+#undef DEBUG
+
+#include <linux/kernel.h>
+#include <linux/mm.h>
+#include <linux/pagemap.h>
+#include <linux/err.h>
+#include <linux/spinlock.h>
+#include <linux/module.h>
+#include <asm/mman.h>
+#include <asm/mmu.h>
+#include <asm/spu.h>
+
+static spinlock_t slice_convert_lock = SPIN_LOCK_UNLOCKED;
+
+
+#ifdef DEBUG
+int _slice_debug = 1;
+
+static void slice_print_mask(const char *label, struct slice_mask mask)
+{
+	char	*p, buf[16 + 3 + 16 + 1];
+	int	i;
+
+	if (!_slice_debug)
+		return;
+	p = buf;
+	for (i = 0; i < SLICE_NUM_LOW; i++)
+		*(p++) = (mask.low_slices & (1 << i)) ? '1' : '0';
+	*(p++) = ' ';
+	*(p++) = '-';
+	*(p++) = ' ';
+	for (i = 0; i < SLICE_NUM_HIGH; i++)
+		*(p++) = (mask.high_slices & (1 << i)) ? '1' : '0';
+	*(p++) = 0;
+
+	printk(KERN_DEBUG "%s:%s\n", label, buf);
+}
+
+#define slice_dbg(fmt...) do { if (_slice_debug) pr_debug(fmt); } while(0)
+
+#else
+
+static void slice_print_mask(const char *label, struct slice_mask mask) {}
+#define slice_dbg(fmt...)
+
+#endif
+
+static struct slice_mask slice_range_to_mask(unsigned long start,
+					     unsigned long len)
+{
+	unsigned long end = start + len - 1;
+	struct slice_mask ret = { 0, 0 };
+
+	if (start < SLICE_LOW_TOP) {
+		unsigned long mend = min(end, SLICE_LOW_TOP);
+		unsigned long mstart = min(start, SLICE_LOW_TOP);
+
+		ret.low_slices = (1u << (GET_LOW_SLICE_INDEX(mend) + 1))
+			- (1u << GET_LOW_SLICE_INDEX(mstart));
+	}
+
+	if ((start + len) > SLICE_LOW_TOP)
+		ret.high_slices = (1u << (GET_HIGH_SLICE_INDEX(end) + 1))
+			- (1u << GET_HIGH_SLICE_INDEX(start));
+
+	return ret;
+}
+
+static int slice_area_is_free(struct mm_struct *mm, unsigned long addr,
+			      unsigned long len)
+{
+	struct vm_area_struct *vma;
+
+	if ((mm->task_size - len) < addr)
+		return 0;
+	vma = find_vma(mm, addr);
+	return (!vma || (addr + len) <= vma->vm_start);
+}
+
+static int slice_low_has_vma(struct mm_struct *mm, unsigned long slice)
+{
+	return !slice_area_is_free(mm, slice << SLICE_LOW_SHIFT,
+				   1ul << SLICE_LOW_SHIFT);
+}
+
+static int slice_high_has_vma(struct mm_struct *mm, unsigned long slice)
+{
+	unsigned long start = slice << SLICE_HIGH_SHIFT;
+	unsigned long end = start + (1ul << SLICE_HIGH_SHIFT);
+
+	/* Hack, so that each addresses is controlled by exactly one
+	 * of the high or low area bitmaps, the first high area starts
+	 * at 4GB, not 0 */
+	if (start == 0)
+		start = SLICE_LOW_TOP;
+
+	return !slice_area_is_free(mm, start, end - start);
+}
+
+static struct slice_mask slice_mask_for_free(struct mm_struct *mm)
+{
+	struct slice_mask ret = { 0, 0 };
+	unsigned long i;
+
+	for (i = 0; i < SLICE_NUM_LOW; i++)
+		if (!slice_low_has_vma(mm, i))
+			ret.low_slices |= 1u << i;
+
+	if (mm->task_size <= SLICE_LOW_TOP)
+		return ret;
+
+	for (i = 0; i < SLICE_NUM_HIGH; i++)
+		if (!slice_high_has_vma(mm, i))
+			ret.high_slices |= 1u << i;
+
+	return ret;
+}
+
+static struct slice_mask slice_mask_for_size(struct mm_struct *mm, int psize)
+{
+	struct slice_mask ret = { 0, 0 };
+	unsigned long i;
+	u64 psizes;
+
+	psizes = mm->context.low_slices_psize;
+	for (i = 0; i < SLICE_NUM_LOW; i++)
+		if (((psizes >> (i * 4)) & 0xf) == psize)
+			ret.low_slices |= 1u << i;
+
+	psizes = mm->context.high_slices_psize;
+	for (i = 0; i < SLICE_NUM_HIGH; i++)
+		if (((psizes >> (i * 4)) & 0xf) == psize)
+			ret.high_slices |= 1u << i;
+
+	return ret;
+}
+
+static int slice_check_fit(struct slice_mask mask, struct slice_mask available)
+{
+	return (mask.low_slices & available.low_slices) == mask.low_slices &&
+		(mask.high_slices & available.high_slices) == mask.high_slices;
+}
+
+static void slice_flush_segments(void *parm)
+{
+	struct mm_struct *mm = parm;
+	unsigned long flags;
+
+	if (mm != current->active_mm)
+		return;
+
+	/* update the paca copy of the context struct */
+	get_paca()->context = current->active_mm->context;
+
+	local_irq_save(flags);
+	slb_flush_and_rebolt();
+	local_irq_restore(flags);
+}
+
+static void slice_convert(struct mm_struct *mm, struct slice_mask mask, int psize)
+{
+	/* Write the new slice psize bits */
+	u64 lpsizes, hpsizes;
+	unsigned long i, flags;
+
+	slice_dbg("slice_convert(mm=%p, psize=%d)\n", mm, psize);
+	slice_print_mask(" mask", mask);
+
+	/* We need to use a spinlock here to protect against
+	 * concurrent 64k -> 4k demotion ...
+	 */
+	spin_lock_irqsave(&slice_convert_lock, flags);
+
+	lpsizes = mm->context.low_slices_psize;
+	for (i = 0; i < SLICE_NUM_LOW; i++)
+		if (mask.low_slices & (1u << i))
+			lpsizes = (lpsizes & ~(0xful << (i * 4))) |
+				(((unsigned long)psize) << (i * 4));
+
+	hpsizes = mm->context.high_slices_psize;
+	for (i = 0; i < SLICE_NUM_HIGH; i++)
+		if (mask.high_slices & (1u << i))
+			hpsizes = (hpsizes & ~(0xful << (i * 4))) |
+				(((unsigned long)psize) << (i * 4));
+
+	mm->context.low_slices_psize = lpsizes;
+	mm->context.high_slices_psize = hpsizes;
+
+	slice_dbg(" lsps=%lx, hsps=%lx\n",
+		  mm->context.low_slices_psize,
+		  mm->context.high_slices_psize);
+
+	spin_unlock_irqrestore(&slice_convert_lock, flags);
+	mb();
+
+	/* XXX this is sub-optimal but will do for now */
+	on_each_cpu(slice_flush_segments, mm, 0, 1);
+#ifdef CONFIG_SPU_BASE
+	spu_flush_all_slbs(mm);
+#endif
+}
+
+static unsigned long slice_find_area_bottomup(struct mm_struct *mm,
+					      unsigned long len,
+					      struct slice_mask available,
+					      int psize, int use_cache)
+{
+	struct vm_area_struct *vma;
+	unsigned long start_addr, addr;
+	struct slice_mask mask;
+	int pshift = max_t(int, mmu_psize_defs[psize].shift, PAGE_SHIFT);
+
+	if (use_cache) {
+		if (len <= mm->cached_hole_size) {
+			start_addr = addr = TASK_UNMAPPED_BASE;
+			mm->cached_hole_size = 0;
+		} else
+			start_addr = addr = mm->free_area_cache;
+	} else
+		start_addr = addr = TASK_UNMAPPED_BASE;
+
+full_search:
+	for (;;) {
+		addr = _ALIGN_UP(addr, 1ul << pshift);
+		if ((TASK_SIZE - len) < addr)
+			break;
+		vma = find_vma(mm, addr);
+		BUG_ON(vma && (addr >= vma->vm_end));
+
+		mask = slice_range_to_mask(addr, len);
+		if (!slice_check_fit(mask, available)) {
+			if (addr < SLICE_LOW_TOP)
+				addr = _ALIGN_UP(addr + 1,  1ul << SLICE_LOW_SHIFT);
+			else
+				addr = _ALIGN_UP(addr + 1,  1ul << SLICE_HIGH_SHIFT);
+			continue;
+		}
+		if (!vma || addr + len <= vma->vm_start) {
+			/*
+			 * Remember the place where we stopped the search:
+			 */
+			if (use_cache)
+				mm->free_area_cache = addr + len;
+			return addr;
+		}
+		if (use_cache && (addr + mm->cached_hole_size) < vma->vm_start)
+		        mm->cached_hole_size = vma->vm_start - addr;
+		addr = vma->vm_end;
+	}
+
+	/* Make sure we didn't miss any holes */
+	if (use_cache && start_addr != TASK_UNMAPPED_BASE) {
+		start_addr = addr = TASK_UNMAPPED_BASE;
+		mm->cached_hole_size = 0;
+		goto full_search;
+	}
+	return -ENOMEM;
+}
+
+static unsigned long slice_find_area_topdown(struct mm_struct *mm,
+					     unsigned long len,
+					     struct slice_mask available,
+					     int psize, int use_cache)
+{
+	struct vm_area_struct *vma;
+	unsigned long addr;
+	struct slice_mask mask;
+	int pshift = max_t(int, mmu_psize_defs[psize].shift, PAGE_SHIFT);
+
+	/* check if free_area_cache is useful for us */
+	if (use_cache) {
+		if (len <= mm->cached_hole_size) {
+			mm->cached_hole_size = 0;
+			mm->free_area_cache = mm->mmap_base;
+		}
+
+		/* either no address requested or can't fit in requested
+		 * address hole
+		 */
+		addr = mm->free_area_cache;
+
+		/* make sure it can fit in the remaining address space */
+		if (addr > len) {
+			addr = _ALIGN_DOWN(addr - len, 1ul << pshift);
+			mask = slice_range_to_mask(addr, len);
+			if (slice_check_fit(mask, available) &&
+			    slice_area_is_free(mm, addr, len))
+					/* remember the address as a hint for
+					 * next time
+					 */
+					return (mm->free_area_cache = addr);
+		}
+	}
+
+	addr = mm->mmap_base;
+	while (addr > len) {
+		/* Go down by chunk size */
+		addr = _ALIGN_DOWN(addr - len, 1ul << pshift);
+
+		/* Check for hit with different page size */
+		mask = slice_range_to_mask(addr, len);
+		if (!slice_check_fit(mask, available)) {
+			if (addr < SLICE_LOW_TOP)
+				addr = _ALIGN_DOWN(addr, 1ul << SLICE_LOW_SHIFT);
+			else if (addr < (1ul << SLICE_HIGH_SHIFT))
+				addr = SLICE_LOW_TOP;
+			else
+				addr = _ALIGN_DOWN(addr, 1ul << SLICE_HIGH_SHIFT);
+			continue;
+		}
+
+		/*
+		 * Lookup failure means no vma is above this address,
+		 * else if new region fits below vma->vm_start,
+		 * return with success:
+		 */
+		vma = find_vma(mm, addr);
+		if (!vma || (addr + len) <= vma->vm_start) {
+			/* remember the address as a hint for next time */
+			if (use_cache)
+				mm->free_area_cache = addr;
+			return addr;
+		}
+
+		/* remember the largest hole we saw so far */
+		if (use_cache && (addr + mm->cached_hole_size) < vma->vm_start)
+		        mm->cached_hole_size = vma->vm_start - addr;
+
+		/* try just below the current vma->vm_start */
+		addr = vma->vm_start;
+	}
+
+	/*
+	 * A failed mmap() very likely causes application failure,
+	 * so fall back to the bottom-up function here. This scenario
+	 * can happen with large stack limits and large mmap()
+	 * allocations.
+	 */
+	addr = slice_find_area_bottomup(mm, len, available, psize, 0);
+
+	/*
+	 * Restore the topdown base:
+	 */
+	if (use_cache) {
+		mm->free_area_cache = mm->mmap_base;
+		mm->cached_hole_size = ~0UL;
+	}
+
+	return addr;
+}
+
+
+static unsigned long slice_find_area(struct mm_struct *mm, unsigned long len,
+				     struct slice_mask mask, int psize,
+				     int topdown, int use_cache)
+{
+	if (topdown)
+		return slice_find_area_topdown(mm, len, mask, psize, use_cache);
+	else
+		return slice_find_area_bottomup(mm, len, mask, psize, use_cache);
+}
+
+unsigned long slice_get_unmapped_area(unsigned long addr, unsigned long len,
+				      unsigned long flags, unsigned int psize,
+				      int topdown, int use_cache)
+{
+	struct slice_mask mask;
+	struct slice_mask good_mask;
+	struct slice_mask potential_mask = {0,0} /* silence stupid warning */;
+	int pmask_set = 0;
+	int fixed = (flags & MAP_FIXED);
+	int pshift = max_t(int, mmu_psize_defs[psize].shift, PAGE_SHIFT);
+	struct mm_struct *mm = current->mm;
+
+	/* Sanity checks */
+	BUG_ON(mm->task_size == 0);
+
+	slice_dbg("slice_get_unmapped_area(mm=%p, psize=%d...\n", mm, psize);
+	slice_dbg(" addr=%lx, len=%lx, flags=%lx, topdown=%d, use_cache=%d\n",
+		  addr, len, flags, topdown, use_cache);
+
+	if (len > mm->task_size)
+		return -ENOMEM;
+	if (fixed && (addr & ((1ul << pshift) - 1)))
+		return -EINVAL;
+	if (fixed && addr > (mm->task_size - len))
+		return -EINVAL;
+
+	/* If hint, make sure it matches our alignment restrictions */
+	if (!fixed && addr) {
+		addr = _ALIGN_UP(addr, 1ul << pshift);
+		slice_dbg(" aligned addr=%lx\n", addr);
+	}
+
+	/* First makeup a "good" mask of slices that have the right size
+	 * already
+	 */
+	good_mask = slice_mask_for_size(mm, psize);
+	slice_print_mask(" good_mask", good_mask);
+
+	/* First check hint if it's valid or if we have MAP_FIXED */
+	if ((addr != 0 || fixed) && (mm->task_size - len) >= addr) {
+
+		/* Don't bother with hint if it overlaps a VMA */
+		if (!fixed && !slice_area_is_free(mm, addr, len))
+			goto search;
+
+		/* Build a mask for the requested range */
+		mask = slice_range_to_mask(addr, len);
+		slice_print_mask(" mask", mask);
+
+		/* Check if we fit in the good mask. If we do, we just return,
+		 * nothing else to do
+		 */
+		if (slice_check_fit(mask, good_mask)) {
+			slice_dbg(" fits good !\n");
+			return addr;
+		}
+
+		/* We don't fit in the good mask, check what other slices are
+		 * empty and thus can be converted
+		 */
+		potential_mask = slice_mask_for_free(mm);
+		potential_mask.low_slices |= good_mask.low_slices;
+		potential_mask.high_slices |= good_mask.high_slices;
+		pmask_set = 1;
+		slice_print_mask(" potential", potential_mask);
+		if (slice_check_fit(mask, potential_mask)) {
+			slice_dbg(" fits potential !\n");
+			goto convert;
+		}
+	}
+
+	/* If we have MAP_FIXED and failed the above step, then error out */
+	if (fixed)
+		return -EBUSY;
+
+ search:
+	slice_dbg(" search...\n");
+
+	/* Now let's see if we can find something in the existing slices
+	 * for that size
+	 */
+	addr = slice_find_area(mm, len, good_mask, psize, topdown, use_cache);
+	if (addr != -ENOMEM) {
+		/* Found within the good mask, we don't have to setup,
+		 * we thus return directly
+		 */
+		slice_dbg(" found area at 0x%lx\n", addr);
+		return addr;
+	}
+
+	/* Won't fit, check what can be converted */
+	if (!pmask_set) {
+		potential_mask = slice_mask_for_free(mm);
+		potential_mask.low_slices |= good_mask.low_slices;
+		potential_mask.high_slices |= good_mask.high_slices;
+		pmask_set = 1;
+		slice_print_mask(" potential", potential_mask);
+	}
+
+	/* Now let's see if we can find something in the existing slices
+	 * for that size
+	 */
+	addr = slice_find_area(mm, len, potential_mask, psize, topdown,
+			       use_cache);
+	if (addr == -ENOMEM)
+		return -ENOMEM;
+
+	mask = slice_range_to_mask(addr, len);
+	slice_dbg(" found potential area at 0x%lx\n", addr);
+	slice_print_mask(" mask", mask);
+
+ convert:
+	slice_convert(mm, mask, psize);
+	return addr;
+
+}
+EXPORT_SYMBOL_GPL(slice_get_unmapped_area);
+
+unsigned long arch_get_unmapped_area(struct file *filp,
+				     unsigned long addr,
+				     unsigned long len,
+				     unsigned long pgoff,
+				     unsigned long flags)
+{
+	return slice_get_unmapped_area(addr, len, flags,
+				       current->mm->context.user_psize,
+				       0, 1);
+}
+
+unsigned long arch_get_unmapped_area_topdown(struct file *filp,
+					     const unsigned long addr0,
+					     const unsigned long len,
+					     const unsigned long pgoff,
+					     const unsigned long flags)
+{
+	return slice_get_unmapped_area(addr0, len, flags,
+				       current->mm->context.user_psize,
+				       1, 1);
+}
+
+unsigned int get_slice_psize(struct mm_struct *mm, unsigned long addr)
+{
+	u64 psizes;
+	int index;
+
+	if (addr < SLICE_LOW_TOP) {
+		psizes = mm->context.low_slices_psize;
+		index = GET_LOW_SLICE_INDEX(addr);
+	} else {
+		psizes = mm->context.high_slices_psize;
+		index = GET_HIGH_SLICE_INDEX(addr);
+	}
+
+	return (psizes >> (index * 4)) & 0xf;
+}
+EXPORT_SYMBOL_GPL(get_slice_psize);
+
+/*
+ * This is called by hash_page when it needs to do a lazy conversion of
+ * an address space from real 64K pages to combo 4K pages (typically
+ * when hitting a non cacheable mapping on a processor or hypervisor
+ * that won't allow them for 64K pages).
+ *
+ * This is also called in init_new_context() to change back the user
+ * psize from whatever the parent context had it set to
+ *
+ * This function will only change the content of the {low,high)_slice_psize
+ * masks, it will not flush SLBs as this shall be handled lazily by the
+ * caller.
+ */
+void slice_set_user_psize(struct mm_struct *mm, unsigned int psize)
+{
+	unsigned long flags, lpsizes, hpsizes;
+	unsigned int old_psize;
+	int i;
+
+	slice_dbg("slice_set_user_psize(mm=%p, psize=%d)\n", mm, psize);
+
+	spin_lock_irqsave(&slice_convert_lock, flags);
+
+	old_psize = mm->context.user_psize;
+	slice_dbg(" old_psize=%d\n", old_psize);
+	if (old_psize == psize)
+		goto bail;
+
+	mm->context.user_psize = psize;
+	wmb();
+
+	lpsizes = mm->context.low_slices_psize;
+	for (i = 0; i < SLICE_NUM_LOW; i++)
+		if (((lpsizes >> (i * 4)) & 0xf) == old_psize)
+			lpsizes = (lpsizes & ~(0xful << (i * 4))) |
+				(((unsigned long)psize) << (i * 4));
+
+	hpsizes = mm->context.high_slices_psize;
+	for (i = 0; i < SLICE_NUM_HIGH; i++)
+		if (((hpsizes >> (i * 4)) & 0xf) == old_psize)
+			hpsizes = (hpsizes & ~(0xful << (i * 4))) |
+				(((unsigned long)psize) << (i * 4));
+
+	mm->context.low_slices_psize = lpsizes;
+	mm->context.high_slices_psize = hpsizes;
+
+	slice_dbg(" lsps=%lx, hsps=%lx\n",
+		  mm->context.low_slices_psize,
+		  mm->context.high_slices_psize);
+
+ bail:
+	spin_unlock_irqrestore(&slice_convert_lock, flags);
+}
+
+/*
+ * is_hugepage_only_range() is used by generic code to verify wether
+ * a normal mmap mapping (non hugetlbfs) is valid on a given area.
+ *
+ * until the generic code provides a more generic hook and/or starts
+ * calling arch get_unmapped_area for MAP_FIXED (which our implementation
+ * here knows how to deal with), we hijack it to keep standard mappings
+ * away from us.
+ *
+ * because of that generic code limitation, MAP_FIXED mapping cannot
+ * "convert" back a slice with no VMAs to the standard page size, only
+ * get_unmapped_area() can. It would be possible to fix it here but I
+ * prefer working on fixing the generic code instead.
+ *
+ * WARNING: This will not work if hugetlbfs isn't enabled since the
+ * generic code will redefine that function as 0 in that. This is ok
+ * for now as we only use slices with hugetlbfs enabled. This should
+ * be fixed as the generic code gets fixed.
+ */
+int is_hugepage_only_range(struct mm_struct *mm, unsigned long addr,
+			   unsigned long len)
+{
+	struct slice_mask mask, available;
+
+	mask = slice_range_to_mask(addr, len);
+	available = slice_mask_for_size(mm, mm->context.user_psize);
+
+#if 0 /* too verbose */
+	slice_dbg("is_hugepage_only_range(mm=%p, addr=%lx, len=%lx)\n",
+		 mm, addr, len);
+	slice_print_mask(" mask", mask);
+	slice_print_mask(" available", available);
+#endif
+	return !slice_check_fit(mask, available);
+}
+
--- a/arch/powerpc/mm/tlb_32.c
+++ b/arch/powerpc/mm/tlb_32.c
@ -111,7 +111,7 @@ static void flush_range(struct mm_struct *mm, unsigned long start,
 	if (start >= end)
 		return;
 	end = (end - 1) | ~PAGE_MASK;
-	pmd = pmd_offset(pgd_offset(mm, start), start);
+	pmd = pmd_offset(pud_offset(pgd_offset(mm, start), start), start);
 	for (;;) {
 		pmd_end = ((start + PGDIR_SIZE) & PGDIR_MASK) - 1;
 		if (pmd_end > end)
@ -169,7 +169,7 @@ void flush_tlb_page(struct vm_area_struct *vma, unsigned long vmaddr)
 		return;
 	}
 	mm = (vmaddr < TASK_SIZE)? vma->vm_mm: &init_mm;
-	pmd = pmd_offset(pgd_offset(mm, vmaddr), vmaddr);
+	pmd = pmd_offset(pud_offset(pgd_offset(mm, vmaddr), vmaddr), vmaddr);
 	if (!pmd_none(*pmd))
 		flush_hash_pages(mm->context.id, vmaddr, pmd_val(*pmd), 1);
 	FINISH_FLUSH;
--- a/arch/powerpc/mm/tlb_64.c
+++ b/arch/powerpc/mm/tlb_64.c
@ -143,16 +143,22 @@ void hpte_need_flush(struct mm_struct *mm, unsigned long addr,
 	 */
 	addr &= PAGE_MASK;

-	/* Get page size (maybe move back to caller) */
+	/* Get page size (maybe move back to caller).
+	 *
+	 * NOTE: when using special 64K mappings in 4K environment like
+	 * for SPEs, we obtain the page size from the slice, which thus
+	 * must still exist (and thus the VMA not reused) at the time
+	 * of this call
+	 */
 	if (huge) {
 #ifdef CONFIG_HUGETLB_PAGE
 		psize = mmu_huge_psize;
 #else
 		BUG();
-		psize = pte_pagesize_index(pte); /* shutup gcc */
+		psize = pte_pagesize_index(mm, addr, pte); /* shutup gcc */
 #endif
 	} else
-		psize = pte_pagesize_index(pte);
+		psize = pte_pagesize_index(mm, addr, pte);

 	/* Build full vaddr */
 	if (!is_kernel_addr(addr)) {
--- a/arch/powerpc/platforms/86xx/mpc86xx_smp.c
+++ b/arch/powerpc/platforms/86xx/mpc86xx_smp.c
@ -15,8 +15,8 @@
 #include <linux/init.h>
 #include <linux/delay.h>

-#include <asm/pgtable.h>
 #include <asm/page.h>
+#include <asm/pgtable.h>
 #include <asm/pci-bridge.h>
 #include <asm-powerpc/mpic.h>
 #include <asm/mpc86xx.h>
--- a/arch/powerpc/platforms/cell/Kconfig
+++ b/arch/powerpc/platforms/cell/Kconfig
@ -35,6 +35,21 @@ config SPU_FS
 	  Units on machines implementing the Broadband Processor
 	  Architecture.

+config SPU_FS_64K_LS
+	bool "Use 64K pages to map SPE local  store"
+	# we depend on PPC_MM_SLICES for now rather than selecting
+	# it because we depend on hugetlbfs hooks being present. We
+	# will fix that when the generic code has been improved to
+	# not require hijacking hugetlbfs hooks.
+	depends on SPU_FS && PPC_MM_SLICES && !PPC_64K_PAGES
+	default y
+	select PPC_HAS_HASH_64K
+	help
+	  This option causes SPE local stores to be mapped in process
+	  address spaces using 64K pages while the rest of the kernel
+	  uses 4K pages. This can improve performances of applications
+	  using multiple SPEs by lowering the TLB pressure on them.
+
 config SPU_BASE
 	bool
 	default n
--- a/arch/powerpc/platforms/cell/spu_base.c
+++ b/arch/powerpc/platforms/cell/spu_base.c
@ -144,12 +144,11 @@ static int __spu_trap_data_seg(struct spu *spu, unsigned long ea)

 	switch(REGION_ID(ea)) {
 	case USER_REGION_ID:
-#ifdef CONFIG_HUGETLB_PAGE
-		if (in_hugepage_area(mm->context, ea))
-			psize = mmu_huge_psize;
-		else
+#ifdef CONFIG_PPC_MM_SLICES
+		psize = get_slice_psize(mm, ea);
+#else
+		psize = mm->context.user_psize;
 #endif
-			psize = mm->context.user_psize;
 		vsid = (get_vsid(mm->context.id, ea) << SLB_VSID_SHIFT) |
 				SLB_VSID_USER;
 		break;
--- a/arch/powerpc/platforms/cell/spufs/Makefile
+++ b/arch/powerpc/platforms/cell/spufs/Makefile
@ -1,4 +1,4 @@
-obj-y += switch.o fault.o
+obj-y += switch.o fault.o lscsa_alloc.o

 obj-$(CONFIG_SPU_FS) += spufs.o
 spufs-y += inode.o file.o context.o syscalls.o coredump.o
--- a/arch/powerpc/platforms/cell/spufs/context.c
+++ b/arch/powerpc/platforms/cell/spufs/context.c
@ -36,10 +36,8 @@ struct spu_context *alloc_spu_context(struct spu_gang *gang)
 	/* Binding to physical processor deferred
 	 * until spu_activate().
 	 */
-	spu_init_csa(&ctx->csa);
-	if (!ctx->csa.lscsa) {
+	if (spu_init_csa(&ctx->csa))
 		goto out_free;
-	}
 	spin_lock_init(&ctx->mmio_lock);
 	spin_lock_init(&ctx->mapping_lock);
 	kref_init(&ctx->kref);
--- a/arch/powerpc/platforms/cell/spufs/file.c
+++ b/arch/powerpc/platforms/cell/spufs/file.c
@ -118,14 +118,32 @@ spufs_mem_write(struct file *file, const char __user *buffer,
 static unsigned long spufs_mem_mmap_nopfn(struct vm_area_struct *vma,
 					  unsigned long address)
 {
-	struct spu_context *ctx = vma->vm_file->private_data;
-	unsigned long pfn, offset = address - vma->vm_start;
+	struct spu_context *ctx	= vma->vm_file->private_data;
+	unsigned long pfn, offset, addr0 = address;
+#ifdef CONFIG_SPU_FS_64K_LS
+	struct spu_state *csa = &ctx->csa;
+	int psize;

-	offset += vma->vm_pgoff << PAGE_SHIFT;
+	/* Check what page size we are using */
+	psize = get_slice_psize(vma->vm_mm, address);

+	/* Some sanity checking */
+	BUG_ON(csa->use_big_pages != (psize == MMU_PAGE_64K));
+
+	/* Wow, 64K, cool, we need to align the address though */
+	if (csa->use_big_pages) {
+		BUG_ON(vma->vm_start & 0xffff);
+		address &= ~0xfffful;
+	}
+#endif /* CONFIG_SPU_FS_64K_LS */
+
+	offset = (address - vma->vm_start) + (vma->vm_pgoff << PAGE_SHIFT);
 	if (offset >= LS_SIZE)
 		return NOPFN_SIGBUS;

+	pr_debug("spufs_mem_mmap_nopfn address=0x%lx -> 0x%lx, offset=0x%lx\n",
+		 addr0, address, offset);
+
 	spu_acquire(ctx);

 	if (ctx->state == SPU_STATE_SAVED) {
@ -149,9 +167,24 @@ static struct vm_operations_struct spufs_mem_mmap_vmops = {
 	.nopfn = spufs_mem_mmap_nopfn,
 };

-static int
-spufs_mem_mmap(struct file *file, struct vm_area_struct *vma)
+static int spufs_mem_mmap(struct file *file, struct vm_area_struct *vma)
 {
+#ifdef CONFIG_SPU_FS_64K_LS
+	struct spu_context	*ctx = file->private_data;
+	struct spu_state	*csa = &ctx->csa;
+
+	/* Sanity check VMA alignment */
+	if (csa->use_big_pages) {
+		pr_debug("spufs_mem_mmap 64K, start=0x%lx, end=0x%lx,"
+			 " pgoff=0x%lx\n", vma->vm_start, vma->vm_end,
+			 vma->vm_pgoff);
+		if (vma->vm_start & 0xffff)
+			return -EINVAL;
+		if (vma->vm_pgoff & 0xf)
+			return -EINVAL;
+	}
+#endif /* CONFIG_SPU_FS_64K_LS */
+
 	if (!(vma->vm_flags & VM_SHARED))
 		return -EINVAL;

@ -163,13 +196,34 @@ spufs_mem_mmap(struct file *file, struct vm_area_struct *vma)
 	return 0;
 }

+#ifdef CONFIG_SPU_FS_64K_LS
+unsigned long spufs_get_unmapped_area(struct file *file, unsigned long addr,
+				      unsigned long len, unsigned long pgoff,
+				      unsigned long flags)
+{
+	struct spu_context	*ctx = file->private_data;
+	struct spu_state	*csa = &ctx->csa;
+
+	/* If not using big pages, fallback to normal MM g_u_a */
+	if (!csa->use_big_pages)
+		return current->mm->get_unmapped_area(file, addr, len,
+						      pgoff, flags);
+
+	/* Else, try to obtain a 64K pages slice */
+	return slice_get_unmapped_area(addr, len, flags,
+				       MMU_PAGE_64K, 1, 0);
+}
+#endif /* CONFIG_SPU_FS_64K_LS */
+
 static const struct file_operations spufs_mem_fops = {
-	.open	 = spufs_mem_open,
-	.release = spufs_mem_release,
-	.read    = spufs_mem_read,
-	.write   = spufs_mem_write,
-	.llseek  = generic_file_llseek,
-	.mmap    = spufs_mem_mmap,
+	.open	 		= spufs_mem_open,
+	.read   		= spufs_mem_read,
+	.write   		= spufs_mem_write,
+	.llseek  		= generic_file_llseek,
+	.mmap    		= spufs_mem_mmap,
+#ifdef CONFIG_SPU_FS_64K_LS
+	.get_unmapped_area	= spufs_get_unmapped_area,
+#endif
 };

 static unsigned long spufs_ps_nopfn(struct vm_area_struct *vma,
--- a/arch/powerpc/platforms/cell/spufs/lscsa_alloc.c
+++ b/arch/powerpc/platforms/cell/spufs/lscsa_alloc.c
@ -0,0 +1,181 @@
+/*
+ * SPU local store allocation routines
+ *
+ * Copyright 2007 Benjamin Herrenschmidt, IBM Corp.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2, or (at your option)
+ * any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
+ */
+
+#undef DEBUG
+
+#include <linux/kernel.h>
+#include <linux/mm.h>
+#include <linux/vmalloc.h>
+
+#include <asm/spu.h>
+#include <asm/spu_csa.h>
+#include <asm/mmu.h>
+
+static int spu_alloc_lscsa_std(struct spu_state *csa)
+{
+	struct spu_lscsa *lscsa;
+	unsigned char *p;
+
+	lscsa = vmalloc(sizeof(struct spu_lscsa));
+	if (!lscsa)
+		return -ENOMEM;
+	memset(lscsa, 0, sizeof(struct spu_lscsa));
+	csa->lscsa = lscsa;
+
+	/* Set LS pages reserved to allow for user-space mapping. */
+	for (p = lscsa->ls; p < lscsa->ls + LS_SIZE; p += PAGE_SIZE)
+		SetPageReserved(vmalloc_to_page(p));
+
+	return 0;
+}
+
+static void spu_free_lscsa_std(struct spu_state *csa)
+{
+	/* Clear reserved bit before vfree. */
+	unsigned char *p;
+
+	if (csa->lscsa == NULL)
+		return;
+
+	for (p = csa->lscsa->ls; p < csa->lscsa->ls + LS_SIZE; p += PAGE_SIZE)
+		ClearPageReserved(vmalloc_to_page(p));
+
+	vfree(csa->lscsa);
+}
+
+#ifdef CONFIG_SPU_FS_64K_LS
+
+#define SPU_64K_PAGE_SHIFT	16
+#define SPU_64K_PAGE_ORDER	(SPU_64K_PAGE_SHIFT - PAGE_SHIFT)
+#define SPU_64K_PAGE_COUNT	(1ul << SPU_64K_PAGE_ORDER)
+
+int spu_alloc_lscsa(struct spu_state *csa)
+{
+	struct page	**pgarray;
+	unsigned char	*p;
+	int		i, j, n_4k;
+
+	/* Check availability of 64K pages */
+	if (mmu_psize_defs[MMU_PAGE_64K].shift == 0)
+		goto fail;
+
+	csa->use_big_pages = 1;
+
+	pr_debug("spu_alloc_lscsa(csa=0x%p), trying to allocate 64K pages\n",
+		 csa);
+
+	/* First try to allocate our 64K pages. We need 5 of them
+	 * with the current implementation. In the future, we should try
+	 * to separate the lscsa with the actual local store image, thus
+	 * allowing us to require only 4 64K pages per context
+	 */
+	for (i = 0; i < SPU_LSCSA_NUM_BIG_PAGES; i++) {
+		/* XXX This is likely to fail, we should use a special pool
+		 *     similiar to what hugetlbfs does.
+		 */
+		csa->lscsa_pages[i] = alloc_pages(GFP_KERNEL,
+						  SPU_64K_PAGE_ORDER);
+		if (csa->lscsa_pages[i] == NULL)
+			goto fail;
+	}
+
+	pr_debug(" success ! creating vmap...\n");
+
+	/* Now we need to create a vmalloc mapping of these for the kernel
+	 * and SPU context switch code to use. Currently, we stick to a
+	 * normal kernel vmalloc mapping, which in our case will be 4K
+	 */
+	n_4k = SPU_64K_PAGE_COUNT * SPU_LSCSA_NUM_BIG_PAGES;
+	pgarray = kmalloc(sizeof(struct page *) * n_4k, GFP_KERNEL);
+	if (pgarray == NULL)
+		goto fail;
+	for (i = 0; i < SPU_LSCSA_NUM_BIG_PAGES; i++)
+		for (j = 0; j < SPU_64K_PAGE_COUNT; j++)
+			/* We assume all the struct page's are contiguous
+			 * which should be hopefully the case for an order 4
+			 * allocation..
+			 */
+			pgarray[i * SPU_64K_PAGE_COUNT + j] =
+				csa->lscsa_pages[i] + j;
+	csa->lscsa = vmap(pgarray, n_4k, VM_USERMAP, PAGE_KERNEL);
+	kfree(pgarray);
+	if (csa->lscsa == NULL)
+		goto fail;
+
+	memset(csa->lscsa, 0, sizeof(struct spu_lscsa));
+
+	/* Set LS pages reserved to allow for user-space mapping.
+	 *
+	 * XXX isn't that a bit obsolete ? I think we should just
+	 * make sure the page count is high enough. Anyway, won't harm
+	 * for now
+	 */
+	for (p = csa->lscsa->ls; p < csa->lscsa->ls + LS_SIZE; p += PAGE_SIZE)
+		SetPageReserved(vmalloc_to_page(p));
+
+	pr_debug(" all good !\n");
+
+	return 0;
+fail:
+	pr_debug("spufs: failed to allocate lscsa 64K pages, falling back\n");
+	spu_free_lscsa(csa);
+	return spu_alloc_lscsa_std(csa);
+}
+
+void spu_free_lscsa(struct spu_state *csa)
+{
+	unsigned char *p;
+	int i;
+
+	if (!csa->use_big_pages) {
+		spu_free_lscsa_std(csa);
+		return;
+	}
+	csa->use_big_pages = 0;
+
+	if (csa->lscsa == NULL)
+		goto free_pages;
+
+	for (p = csa->lscsa->ls; p < csa->lscsa->ls + LS_SIZE; p += PAGE_SIZE)
+		ClearPageReserved(vmalloc_to_page(p));
+
+	vunmap(csa->lscsa);
+	csa->lscsa = NULL;
+
+ free_pages:
+
+	for (i = 0; i < SPU_LSCSA_NUM_BIG_PAGES; i++)
+		if (csa->lscsa_pages[i])
+			__free_pages(csa->lscsa_pages[i], SPU_64K_PAGE_ORDER);
+}
+
+#else /* CONFIG_SPU_FS_64K_LS */
+
+int spu_alloc_lscsa(struct spu_state *csa)
+{
+	return spu_alloc_lscsa_std(csa);
+}
+
+void spu_free_lscsa(struct spu_state *csa)
+{
+	spu_free_lscsa_std(csa);
+}
+
+#endif /* !defined(CONFIG_SPU_FS_64K_LS) */
--- a/arch/powerpc/platforms/cell/spufs/switch.c
+++ b/arch/powerpc/platforms/cell/spufs/switch.c
@ -2188,40 +2188,30 @@ static void init_priv2(struct spu_state *csa)
 * as it is by far the largest of the context save regions,
 * and may need to be pinned or otherwise specially aligned.
 */
-void spu_init_csa(struct spu_state *csa)
+int spu_init_csa(struct spu_state *csa)
 {
-	struct spu_lscsa *lscsa;
-	unsigned char *p;
+	int rc;

 	if (!csa)
-		return;
+		return -EINVAL;
 	memset(csa, 0, sizeof(struct spu_state));

-	lscsa = vmalloc(sizeof(struct spu_lscsa));
-	if (!lscsa)
-		return;
+	rc = spu_alloc_lscsa(csa);
+	if (rc)
+		return rc;

-	memset(lscsa, 0, sizeof(struct spu_lscsa));
-	csa->lscsa = lscsa;
 	spin_lock_init(&csa->register_lock);

-	/* Set LS pages reserved to allow for user-space mapping. */
-	for (p = lscsa->ls; p < lscsa->ls + LS_SIZE; p += PAGE_SIZE)
-		SetPageReserved(vmalloc_to_page(p));
-
 	init_prob(csa);
 	init_priv1(csa);
 	init_priv2(csa);
+
+	return 0;
 }
 EXPORT_SYMBOL_GPL(spu_init_csa);

 void spu_fini_csa(struct spu_state *csa)
 {
-	/* Clear reserved bit before vfree. */
-	unsigned char *p;
-	for (p = csa->lscsa->ls; p < csa->lscsa->ls + LS_SIZE; p += PAGE_SIZE)
-		ClearPageReserved(vmalloc_to_page(p));
-
-	vfree(csa->lscsa);
+	spu_free_lscsa(csa);
 }
 EXPORT_SYMBOL_GPL(spu_fini_csa);
--- a/arch/powerpc/platforms/iseries/Kconfig
+++ b/arch/powerpc/platforms/iseries/Kconfig
@ -7,7 +7,9 @@ menu "iSeries device drivers"
 	depends on PPC_ISERIES

 config VIOCONS
-	tristate "iSeries Virtual Console Support (Obsolete)"
+	bool "iSeries Virtual Console Support (Obsolete)"
+	depends on !HVC_ISERIES
+	default n
 	help
 	  This is the old virtual console driver for legacy iSeries.
 	  You should use the iSeries Hypervisor Virtual Console
--- a/arch/powerpc/platforms/pseries/eeh.c
+++ b/arch/powerpc/platforms/pseries/eeh.c
@ -100,6 +100,9 @@ static unsigned char slot_errbuf[RTAS_ERROR_LOG_MAX];
 static DEFINE_SPINLOCK(slot_errbuf_lock);
 static int eeh_error_buf_size;

+#define EEH_PCI_REGS_LOG_LEN 4096
+static unsigned char pci_regs_buf[EEH_PCI_REGS_LOG_LEN];
+
 /* System monitoring statistics */
 static unsigned long no_device;
 static unsigned long no_dn;
@ -115,7 +118,8 @@ static unsigned long slot_resets;
 /* --------------------------------------------------------------- */
 /* Below lies the EEH event infrastructure */

-void eeh_slot_error_detail (struct pci_dn *pdn, int severity)
+static void rtas_slot_error_detail(struct pci_dn *pdn, int severity,
+                                   char *driver_log, size_t loglen)
 {
 	int config_addr;
 	unsigned long flags;
@ -133,7 +137,8 @@ void eeh_slot_error_detail (struct pci_dn *pdn, int severity)
 	rc = rtas_call(ibm_slot_error_detail,
 	               8, 1, NULL, config_addr,
 	               BUID_HI(pdn->phb->buid),
-	               BUID_LO(pdn->phb->buid), NULL, 0,
+	               BUID_LO(pdn->phb->buid),
+	               virt_to_phys(driver_log), loglen,
 	               virt_to_phys(slot_errbuf),
 	               eeh_error_buf_size,
 	               severity);
@ -143,6 +148,84 @@ void eeh_slot_error_detail (struct pci_dn *pdn, int severity)
 	spin_unlock_irqrestore(&slot_errbuf_lock, flags);
 }

+/**
+ * gather_pci_data - copy assorted PCI config space registers to buff
+ * @pdn: device to report data for
+ * @buf: point to buffer in which to log
+ * @len: amount of room in buffer
+ *
+ * This routine captures assorted PCI configuration space data,
+ * and puts them into a buffer for RTAS error logging.
+ */
+static size_t gather_pci_data(struct pci_dn *pdn, char * buf, size_t len)
+{
+	u32 cfg;
+	int cap, i;
+	int n = 0;
+
+	n += scnprintf(buf+n, len-n, "%s\n", pdn->node->full_name);
+	printk(KERN_WARNING "EEH: of node=%s\n", pdn->node->full_name);
+
+	rtas_read_config(pdn, PCI_VENDOR_ID, 4, &cfg);
+	n += scnprintf(buf+n, len-n, "dev/vend:%08x\n", cfg);
+	printk(KERN_WARNING "EEH: PCI device/vendor: %08x\n", cfg);
+
+	rtas_read_config(pdn, PCI_COMMAND, 4, &cfg);
+	n += scnprintf(buf+n, len-n, "cmd/stat:%x\n", cfg);
+	printk(KERN_WARNING "EEH: PCI cmd/status register: %08x\n", cfg);
+
+	/* Dump out the PCI-X command and status regs */
+	cap = pci_find_capability(pdn->pcidev, PCI_CAP_ID_PCIX);
+	if (cap) {
+		rtas_read_config(pdn, cap, 4, &cfg);
+		n += scnprintf(buf+n, len-n, "pcix-cmd:%x\n", cfg);
+		printk(KERN_WARNING "EEH: PCI-X cmd: %08x\n", cfg);
+
+		rtas_read_config(pdn, cap+4, 4, &cfg);
+		n += scnprintf(buf+n, len-n, "pcix-stat:%x\n", cfg);
+		printk(KERN_WARNING "EEH: PCI-X status: %08x\n", cfg);
+	}
+
+	/* If PCI-E capable, dump PCI-E cap 10, and the AER */
+	cap = pci_find_capability(pdn->pcidev, PCI_CAP_ID_EXP);
+	if (cap) {
+		n += scnprintf(buf+n, len-n, "pci-e cap10:\n");
+		printk(KERN_WARNING
+		       "EEH: PCI-E capabilities and status follow:\n");
+
+		for (i=0; i<=8; i++) {
+			rtas_read_config(pdn, cap+4*i, 4, &cfg);
+			n += scnprintf(buf+n, len-n, "%02x:%x\n", 4*i, cfg);
+			printk(KERN_WARNING "EEH: PCI-E %02x: %08x\n", i, cfg);
+		}
+
+		cap = pci_find_ext_capability(pdn->pcidev,PCI_EXT_CAP_ID_ERR);
+		if (cap) {
+			n += scnprintf(buf+n, len-n, "pci-e AER:\n");
+			printk(KERN_WARNING
+			       "EEH: PCI-E AER capability register set follows:\n");
+
+			for (i=0; i<14; i++) {
+				rtas_read_config(pdn, cap+4*i, 4, &cfg);
+				n += scnprintf(buf+n, len-n, "%02x:%x\n", 4*i, cfg);
+				printk(KERN_WARNING "EEH: PCI-E AER %02x: %08x\n", i, cfg);
+			}
+		}
+	}
+	return n;
+}
+
+void eeh_slot_error_detail(struct pci_dn *pdn, int severity)
+{
+	size_t loglen = 0;
+	memset(pci_regs_buf, 0, EEH_PCI_REGS_LOG_LEN);
+
+	rtas_pci_enable(pdn, EEH_THAW_MMIO);
+	loglen = gather_pci_data(pdn, pci_regs_buf, EEH_PCI_REGS_LOG_LEN);
+
+	rtas_slot_error_detail(pdn, severity, pci_regs_buf, loglen);
+}
+
 /**
 * read_slot_reset_state - Read the reset state of a device node's slot
 * @dn: device node to read
--- a/arch/powerpc/platforms/pseries/eeh_driver.c
+++ b/arch/powerpc/platforms/pseries/eeh_driver.c
@ -361,11 +361,12 @@ struct pci_dn * handle_eeh_events (struct eeh_event *event)
 		goto hard_fail;
 	}

-	eeh_slot_error_detail(frozen_pdn, 1 /* Temporary Error */);
 	printk(KERN_WARNING
-	   "EEH: This PCI device has failed %d times since last reboot: "
-		"location=%s driver=%s pci addr=%s\n",
-		frozen_pdn->eeh_freeze_count, location, drv_str, pci_str);
+	   "EEH: This PCI device has failed %d times in the last hour:\n",
+		frozen_pdn->eeh_freeze_count);
+	printk(KERN_WARNING
+		"EEH: location=%s driver=%s pci addr=%s\n",
+		location, drv_str, pci_str);

 	/* Walk the various device drivers attached to this slot through
 	 * a reset sequence, giving each an opportunity to do what it needs
@ -375,6 +376,11 @@ struct pci_dn * handle_eeh_events (struct eeh_event *event)
 	 */
 	pci_walk_bus(frozen_bus, eeh_report_error, &result);

+	/* Since rtas may enable MMIO when posting the error log,
+	 * don't post the error log until after all dev drivers
+	 * have been informed. */
+	eeh_slot_error_detail(frozen_pdn, 1 /* Temporary Error */);
+
 	/* If all device drivers were EEH-unaware, then shut
 	 * down all of the device drivers, and hope they
 	 * go down willingly, without panicing the system.
--- a/arch/powerpc/sysdev/fsl_soc.c
+++ b/arch/powerpc/sysdev/fsl_soc.c
@ -907,7 +907,7 @@ static int __init fs_enet_of_init(void)
 		struct fs_platform_info fs_enet_data;
 		const unsigned int *id;
 		const unsigned int *phy_addr;
-		void *mac_addr;
+		const void *mac_addr;
 		const phandle *ph;
 		const char *model;

--- a/drivers/char/Kconfig
+++ b/drivers/char/Kconfig
@ -631,7 +631,8 @@ config HVC_CONSOLE

 config HVC_ISERIES
 	bool "iSeries Hypervisor Virtual Console support"
-	depends on PPC_ISERIES && !VIOCONS
+	depends on PPC_ISERIES
+	default y
 	select HVC_DRIVER
 	help
 	  iSeries machines support a hypervisor virtual console.
--- a/include/asm-powerpc/mmu-hash64.h
+++ b/include/asm-powerpc/mmu-hash64.h
@ -350,10 +350,13 @@ typedef unsigned long mm_context_id_t;

 typedef struct {
 	mm_context_id_t id;
-	u16 user_psize;			/* page size index */
-	u16 sllp;			/* SLB entry page size encoding */
-#ifdef CONFIG_HUGETLB_PAGE
-	u16 low_htlb_areas, high_htlb_areas;
+	u16 user_psize;		/* page size index */
+
+#ifdef CONFIG_PPC_MM_SLICES
+	u64 low_slices_psize;	/* SLB page size encodings */
+	u64 high_slices_psize;  /* 4 bits per slice for now */
+#else
+	u16 sllp;		/* SLB page size encoding */
 #endif
 	unsigned long vdso_base;
 } mm_context_t;
--- a/include/asm-powerpc/paca.h
+++ b/include/asm-powerpc/paca.h
@ -83,8 +83,8 @@ struct paca_struct {

 	mm_context_t context;
 	u16 vmalloc_sllp;
-	u16 slb_cache[SLB_CACHE_ENTRIES];
 	u16 slb_cache_ptr;
+	u16 slb_cache[SLB_CACHE_ENTRIES];

 	/*
 	 * then miscellaneous read-write fields
--- a/include/asm-powerpc/page_64.h
+++ b/include/asm-powerpc/page_64.h
@ -88,58 +88,56 @@ extern unsigned int HPAGE_SHIFT;

 #endif /* __ASSEMBLY__ */

-#ifdef CONFIG_HUGETLB_PAGE
+#ifdef CONFIG_PPC_MM_SLICES

-#define HTLB_AREA_SHIFT		40
-#define HTLB_AREA_SIZE		(1UL << HTLB_AREA_SHIFT)
-#define GET_HTLB_AREA(x)	((x) >> HTLB_AREA_SHIFT)
+#define SLICE_LOW_SHIFT		28
+#define SLICE_HIGH_SHIFT	40

-#define LOW_ESID_MASK(addr, len)    \
-	(((1U << (GET_ESID(min((addr)+(len)-1, 0x100000000UL))+1)) \
-	  - (1U << GET_ESID(min((addr), 0x100000000UL)))) & 0xffff)
-#define HTLB_AREA_MASK(addr, len)   (((1U << (GET_HTLB_AREA(addr+len-1)+1)) \
-		                      - (1U << GET_HTLB_AREA(addr))) & 0xffff)
+#define SLICE_LOW_TOP		(0x100000000ul)
+#define SLICE_NUM_LOW		(SLICE_LOW_TOP >> SLICE_LOW_SHIFT)
+#define SLICE_NUM_HIGH		(PGTABLE_RANGE >> SLICE_HIGH_SHIFT)
+
+#define GET_LOW_SLICE_INDEX(addr)	((addr) >> SLICE_LOW_SHIFT)
+#define GET_HIGH_SLICE_INDEX(addr)	((addr) >> SLICE_HIGH_SHIFT)
+
+#ifndef __ASSEMBLY__
+
+struct slice_mask {
+	u16 low_slices;
+	u16 high_slices;
+};
+
+struct mm_struct;
+
+extern unsigned long slice_get_unmapped_area(unsigned long addr,
+					     unsigned long len,
+					     unsigned long flags,
+					     unsigned int psize,
+					     int topdown,
+					     int use_cache);
+
+extern unsigned int get_slice_psize(struct mm_struct *mm,
+				    unsigned long addr);
+
+extern void slice_init_context(struct mm_struct *mm, unsigned int psize);
+extern void slice_set_user_psize(struct mm_struct *mm, unsigned int psize);

 #define ARCH_HAS_HUGEPAGE_ONLY_RANGE
+extern int is_hugepage_only_range(struct mm_struct *m,
+				  unsigned long addr,
+				  unsigned long len);
+
+#endif /* __ASSEMBLY__ */
+#else
+#define slice_init()
+#endif /* CONFIG_PPC_MM_SLICES */
+
+#ifdef CONFIG_HUGETLB_PAGE
+
 #define ARCH_HAS_HUGETLB_FREE_PGD_RANGE
-#define ARCH_HAS_PREPARE_HUGEPAGE_RANGE
 #define ARCH_HAS_SETCLEAR_HUGE_PTE
-
-#define touches_hugepage_low_range(mm, addr, len) \
-	(((addr) < 0x100000000UL) \
-	 && (LOW_ESID_MASK((addr), (len)) & (mm)->context.low_htlb_areas))
-#define touches_hugepage_high_range(mm, addr, len) \
-	((((addr) + (len)) > 0x100000000UL) \
-	  && (HTLB_AREA_MASK((addr), (len)) & (mm)->context.high_htlb_areas))
-
-#define __within_hugepage_low_range(addr, len, segmask) \
-	( (((addr)+(len)) <= 0x100000000UL) \
-	  && ((LOW_ESID_MASK((addr), (len)) | (segmask)) == (segmask)))
-#define within_hugepage_low_range(addr, len) \
-	__within_hugepage_low_range((addr), (len), \
-				    current->mm->context.low_htlb_areas)
-#define __within_hugepage_high_range(addr, len, zonemask) \
-	( ((addr) >= 0x100000000UL) \
-	  && ((HTLB_AREA_MASK((addr), (len)) | (zonemask)) == (zonemask)))
-#define within_hugepage_high_range(addr, len) \
-	__within_hugepage_high_range((addr), (len), \
-				    current->mm->context.high_htlb_areas)
-
-#define is_hugepage_only_range(mm, addr, len) \
-	(touches_hugepage_high_range((mm), (addr), (len)) || \
-	  touches_hugepage_low_range((mm), (addr), (len)))
 #define HAVE_ARCH_HUGETLB_UNMAPPED_AREA

-#define in_hugepage_area(context, addr) \
-	(cpu_has_feature(CPU_FTR_16M_PAGE) && \
-	 ( ( (addr) >= 0x100000000UL) \
-	   ? ((1 << GET_HTLB_AREA(addr)) & (context).high_htlb_areas) \
-	   : ((1 << GET_ESID(addr)) & (context).low_htlb_areas) ) )
-
-#else /* !CONFIG_HUGETLB_PAGE */
-
-#define in_hugepage_area(mm, addr)	0
-
 #endif /* !CONFIG_HUGETLB_PAGE */

 #ifdef MODULE
--- a/include/asm-powerpc/pgalloc-64.h
+++ b/include/asm-powerpc/pgalloc-64.h
@ -14,18 +14,11 @@

 extern struct kmem_cache *pgtable_cache[];

-#ifdef CONFIG_PPC_64K_PAGES
-#define PTE_CACHE_NUM	0
-#define PMD_CACHE_NUM	1
-#define PGD_CACHE_NUM	2
-#define HUGEPTE_CACHE_NUM 3
-#else
-#define PTE_CACHE_NUM	0
-#define PMD_CACHE_NUM	1
-#define PUD_CACHE_NUM	1
-#define PGD_CACHE_NUM	0
-#define HUGEPTE_CACHE_NUM 2
-#endif
+#define PGD_CACHE_NUM		0
+#define PUD_CACHE_NUM		1
+#define PMD_CACHE_NUM		1
+#define HUGEPTE_CACHE_NUM	2
+#define PTE_NONCACHE_NUM	3  /* from GFP rather than kmem_cache */

 static inline pgd_t *pgd_alloc(struct mm_struct *mm)
 {
@ -91,8 +84,7 @@ static inline void pmd_free(pmd_t *pmd)
 static inline pte_t *pte_alloc_one_kernel(struct mm_struct *mm,
 					  unsigned long address)
 {
-	return kmem_cache_alloc(pgtable_cache[PTE_CACHE_NUM],
-				GFP_KERNEL|__GFP_REPEAT);
+        return (pte_t *)__get_free_page(GFP_KERNEL | __GFP_REPEAT | __GFP_ZERO);
 }

 static inline struct page *pte_alloc_one(struct mm_struct *mm,
@ -103,12 +95,12 @@ static inline struct page *pte_alloc_one(struct mm_struct *mm,

 static inline void pte_free_kernel(pte_t *pte)
 {
-	kmem_cache_free(pgtable_cache[PTE_CACHE_NUM], pte);
+	free_page((unsigned long)pte);
 }

 static inline void pte_free(struct page *ptepage)
 {
-	pte_free_kernel(page_address(ptepage));
+	__free_page(ptepage);
 }

 #define PGF_CACHENUM_MASK	0x3
@ -130,14 +122,17 @@ static inline void pgtable_free(pgtable_free_t pgf)
 	void *p = (void *)(pgf.val & ~PGF_CACHENUM_MASK);
 	int cachenum = pgf.val & PGF_CACHENUM_MASK;

-	kmem_cache_free(pgtable_cache[cachenum], p);
+	if (cachenum == PTE_NONCACHE_NUM)
+		free_page((unsigned long)p);
+	else
+		kmem_cache_free(pgtable_cache[cachenum], p);
 }

 extern void pgtable_free_tlb(struct mmu_gather *tlb, pgtable_free_t pgf);

 #define __pte_free_tlb(tlb, ptepage)	\
 	pgtable_free_tlb(tlb, pgtable_free_cache(page_address(ptepage), \
-		PTE_CACHE_NUM, PTE_TABLE_SIZE-1))
+		PTE_NONCACHE_NUM, PTE_TABLE_SIZE-1))
 #define __pmd_free_tlb(tlb, pmd) 	\
 	pgtable_free_tlb(tlb, pgtable_free_cache(pmd, \
 		PMD_CACHE_NUM, PMD_TABLE_SIZE-1))
--- a/include/asm-powerpc/pgtable-4k.h
+++ b/include/asm-powerpc/pgtable-4k.h
@ -80,7 +80,11 @@

 #define pte_iterate_hashed_end() } while(0)

-#define pte_pagesize_index(pte)	MMU_PAGE_4K
+#ifdef CONFIG_PPC_HAS_HASH_64K
+#define pte_pagesize_index(mm, addr, pte)	get_slice_psize(mm, addr)
+#else
+#define pte_pagesize_index(mm, addr, pte)	MMU_PAGE_4K
+#endif

 /*
 * 4-level page tables related bits
--- a/include/asm-powerpc/pgtable-64k.h
+++ b/include/asm-powerpc/pgtable-64k.h
@ -35,6 +35,11 @@
 #define _PAGE_HPTE_SUB0	0x08000000 /* combo only: first sub page */
 #define _PAGE_COMBO	0x10000000 /* this is a combo 4k page */
 #define _PAGE_4K_PFN	0x20000000 /* PFN is for a single 4k page */
+
+/* Note the full page bits must be in the same location as for normal
+ * 4k pages as the same asssembly will be used to insert 64K pages
+ * wether the kernel has CONFIG_PPC_64K_PAGES or not
+ */
 #define _PAGE_F_SECOND  0x00008000 /* full page: hidx bits */
 #define _PAGE_F_GIX     0x00007000 /* full page: hidx bits */

@ -88,7 +93,7 @@

 #define pte_iterate_hashed_end() } while(0); } } while(0)

-#define pte_pagesize_index(pte)	\
+#define pte_pagesize_index(mm, addr, pte)	\
 	(((pte) & _PAGE_COMBO)? MMU_PAGE_4K: MMU_PAGE_64K)

 #define remap_4k_pfn(vma, addr, pfn, prot)				\
--- a/include/asm-powerpc/spu_csa.h
+++ b/include/asm-powerpc/spu_csa.h
@ -235,6 +235,12 @@ struct spu_priv2_collapsed {
 */
 struct spu_state {
 	struct spu_lscsa *lscsa;
+#ifdef CONFIG_SPU_FS_64K_LS
+	int		use_big_pages;
+	/* One struct page per 64k page */
+#define SPU_LSCSA_NUM_BIG_PAGES	(sizeof(struct spu_lscsa) / 0x10000)
+	struct page	*lscsa_pages[SPU_LSCSA_NUM_BIG_PAGES];
+#endif
 	struct spu_problem_collapsed prob;
 	struct spu_priv1_collapsed priv1;
 	struct spu_priv2_collapsed priv2;
@ -247,12 +253,14 @@ struct spu_state {
 	spinlock_t register_lock;
 };

-extern void spu_init_csa(struct spu_state *csa);
+extern int spu_init_csa(struct spu_state *csa);
 extern void spu_fini_csa(struct spu_state *csa);
 extern int spu_save(struct spu_state *prev, struct spu *spu);
 extern int spu_restore(struct spu_state *new, struct spu *spu);
 extern int spu_switch(struct spu_state *prev, struct spu_state *new,
 		      struct spu *spu);
+extern int spu_alloc_lscsa(struct spu_state *csa);
+extern void spu_free_lscsa(struct spu_state *csa);

 #endif /* !__SPU__ */
 #endif /* __KERNEL__ */
--- a/include/linux/suspend.h
+++ b/include/linux/suspend.h
@ -52,7 +52,15 @@ struct hibernation_ops {

 #if defined(CONFIG_PM) && defined(CONFIG_SOFTWARE_SUSPEND)
 /* kernel/power/snapshot.c */
-extern void __init register_nosave_region(unsigned long, unsigned long);
+extern void __register_nosave_region(unsigned long b, unsigned long e, int km);
+static inline void register_nosave_region(unsigned long b, unsigned long e)
+{
+	__register_nosave_region(b, e, 0);
+}
+static inline void register_nosave_region_late(unsigned long b, unsigned long e)
+{
+	__register_nosave_region(b, e, 1);
+}
 extern int swsusp_page_is_forbidden(struct page *);
 extern void swsusp_set_page_free(struct page *);
 extern void swsusp_unset_page_free(struct page *);
@ -62,6 +70,7 @@ extern void hibernation_set_ops(struct hibernation_ops *ops);
 extern int hibernate(void);
 #else
 static inline void register_nosave_region(unsigned long b, unsigned long e) {}
+static inline void register_nosave_region_late(unsigned long b, unsigned long e) {}
 static inline int swsusp_page_is_forbidden(struct page *p) { return 0; }
 static inline void swsusp_set_page_free(struct page *p) {}
 static inline void swsusp_unset_page_free(struct page *p) {}
--- a/kernel/power/snapshot.c
+++ b/kernel/power/snapshot.c
@ -607,7 +607,8 @@ static LIST_HEAD(nosave_regions);
 */

 void __init
-register_nosave_region(unsigned long start_pfn, unsigned long end_pfn)
+__register_nosave_region(unsigned long start_pfn, unsigned long end_pfn,
+			 int use_kmalloc)
 {
 	struct nosave_region *region;

@ -623,8 +624,13 @@ register_nosave_region(unsigned long start_pfn, unsigned long end_pfn)
 			goto Report;
 		}
 	}
-	/* This allocation cannot fail */
-	region = alloc_bootmem_low(sizeof(struct nosave_region));
+	if (use_kmalloc) {
+		/* during init, this shouldn't fail */
+		region = kmalloc(sizeof(struct nosave_region), GFP_KERNEL);
+		BUG_ON(!region);
+	} else
+		/* This allocation cannot fail */
+		region = alloc_bootmem_low(sizeof(struct nosave_region));
 	region->start_pfn = start_pfn;
 	region->end_pfn = end_pfn;
 	list_add_tail(&region->list, &nosave_regions);