linux-sg2042/arch/x86/include/asm/pmem.h

/*
 * Copyright(c) 2015 Intel Corporation. All rights reserved.
 *
 * This program is free software; you can redistribute it and/or modify
 * it under the terms of version 2 of the GNU General Public License as
 * published by the Free Software Foundation.
 *
 * This program is distributed in the hope that it will be useful, but
 * WITHOUT ANY WARRANTY; without even the implied warranty of
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
 * General Public License for more details.
 */
#ifndef __ASM_X86_PMEM_H__
#define __ASM_X86_PMEM_H__

#include <linux/uaccess.h>
#include <asm/cacheflush.h>
#include <asm/cpufeature.h>
#include <asm/special_insns.h>

#define ARCH_MEMREMAP_PMEM MEMREMAP_WB

#ifdef CONFIG_ARCH_HAS_PMEM_API
/**
 * arch_memcpy_to_pmem - copy data to persistent memory
 * @dst: destination buffer for the copy
 * @src: source buffer for the copy
 * @n: length of the copy in bytes
 *
 * Copy data to persistent memory media via non-temporal stores so that
 * a subsequent arch_wmb_pmem() can flush cpu and memory controller
 * write buffers to guarantee durability.
 */
static inline void arch_memcpy_to_pmem(void __pmem *dst, const void *src,
		size_t n)
{
	int unwritten;

	/*
	 * We are copying between two kernel buffers, if
	 * __copy_from_user_inatomic_nocache() returns an error (page
	 * fault) we would have already reported a general protection fault
	 * before the WARN+BUG.
	 */
	unwritten = __copy_from_user_inatomic_nocache((void __force *) dst,
			(void __user *) src, n);
	if (WARN(unwritten, "%s: fault copying %p <- %p unwritten: %d\n",
				__func__, dst, src, unwritten))
		BUG();
}

/**
 * arch_wmb_pmem - synchronize writes to persistent memory
 *
 * After a series of arch_memcpy_to_pmem() operations this drains data
 * from cpu write buffers and any platform (memory controller) buffers
 * to ensure that written data is durable on persistent memory media.
 */
static inline void arch_wmb_pmem(void)
{
	/*
	 * wmb() to 'sfence' all previous writes such that they are
	 * architecturally visible to 'pcommit'.  Note, that we've
	 * already arranged for pmem writes to avoid the cache via
	 * arch_memcpy_to_pmem().
	 */
	wmb();
	pcommit_sfence();
}

/**
 * __arch_wb_cache_pmem - write back a cache range with CLWB
 * @vaddr:	virtual start address
 * @size:	number of bytes to write back
 *
 * Write back a cache range using the CLWB (cache line write back)
 * instruction.  This function requires explicit ordering with an
 * arch_wmb_pmem() call.  This API is internal to the x86 PMEM implementation.
 */
static inline void __arch_wb_cache_pmem(void *vaddr, size_t size)
{
	u16 x86_clflush_size = boot_cpu_data.x86_clflush_size;
	unsigned long clflush_mask = x86_clflush_size - 1;
	void *vend = vaddr + size;
	void *p;

	for (p = (void *)((unsigned long)vaddr & ~clflush_mask);
	     p < vend; p += x86_clflush_size)
		clwb(p);
}

/*
 * copy_from_iter_nocache() on x86 only uses non-temporal stores for iovec
 * iterators, so for other types (bvec & kvec) we must do a cache write-back.
 */
static inline bool __iter_needs_pmem_wb(struct iov_iter *i)
{
	return iter_is_iovec(i) == false;
}

/**
 * arch_copy_from_iter_pmem - copy data from an iterator to PMEM
 * @addr:	PMEM destination address
 * @bytes:	number of bytes to copy
 * @i:		iterator with source data
 *
 * Copy data from the iterator 'i' to the PMEM buffer starting at 'addr'.
 * This function requires explicit ordering with an arch_wmb_pmem() call.
 */
static inline size_t arch_copy_from_iter_pmem(void __pmem *addr, size_t bytes,
		struct iov_iter *i)
{
	void *vaddr = (void __force *)addr;
	size_t len;

	/* TODO: skip the write-back by always using non-temporal stores */
	len = copy_from_iter_nocache(vaddr, bytes, i);

	if (__iter_needs_pmem_wb(i))
		__arch_wb_cache_pmem(vaddr, bytes);

	return len;
}

/**
 * arch_clear_pmem - zero a PMEM memory range
 * @addr:	virtual start address
 * @size:	number of bytes to zero
 *
 * Write zeros into the memory range starting at 'addr' for 'size' bytes.
 * This function requires explicit ordering with an arch_wmb_pmem() call.
 */
static inline void arch_clear_pmem(void __pmem *addr, size_t size)
{
	void *vaddr = (void __force *)addr;

	/* TODO: implement the zeroing via non-temporal writes */
	if (size == PAGE_SIZE && ((unsigned long)vaddr & ~PAGE_MASK) == 0)
		clear_page(vaddr);
	else
		memset(vaddr, 0, size);

	__arch_wb_cache_pmem(vaddr, size);
}

static inline bool arch_has_wmb_pmem(void)
{
#ifdef CONFIG_X86_64
	/*
	 * We require that wmb() be an 'sfence', that is only guaranteed on
	 * 64-bit builds
	 */
	return static_cpu_has(X86_FEATURE_PCOMMIT);
#else
	return false;
#endif
}
#endif /* CONFIG_ARCH_HAS_PMEM_API */

#endif /* __ASM_X86_PMEM_H__ */
pmem, x86: move x86 PMEM API to new pmem.h header Move the x86 PMEM API implementation out of asm/cacheflush.h and into its own header asm/pmem.h. This will allow members of the PMEM API to be more easily identified on this and other architectures. Signed-off-by: Ross Zwisler <ross.zwisler@linux.intel.com> Suggested-by: Christoph Hellwig <hch@lst.de> Reviewed-by: Christoph Hellwig <hch@lst.de> Signed-off-by: Dan Williams <dan.j.williams@intel.com> 2015-08-19 03:55:36 +08:00			`/*`
			`* Copyright(c) 2015 Intel Corporation. All rights reserved.`
			`*`
			`* This program is free software; you can redistribute it and/or modify`
			`* it under the terms of version 2 of the GNU General Public License as`
			`* published by the Free Software Foundation.`
			`*`
			`* This program is distributed in the hope that it will be useful, but`
			`* WITHOUT ANY WARRANTY; without even the implied warranty of`
			`* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU`
			`* General Public License for more details.`
			`*/`
			`#ifndef __ASM_X86_PMEM_H__`
			`#define __ASM_X86_PMEM_H__`

			`#include <linux/uaccess.h>`
			`#include <asm/cacheflush.h>`
			`#include <asm/cpufeature.h>`
			`#include <asm/special_insns.h>`

nd_blk: change aperture mapping from WC to WB This should result in a pretty sizeable performance gain for reads. For rough comparison I did some simple read testing using PMEM to compare reads of write combining (WC) mappings vs write-back (WB). This was done on a random lab machine. PMEM reads from a write combining mapping: # dd of=/dev/null if=/dev/pmem0 bs=4096 count=100000 100000+0 records in 100000+0 records out 409600000 bytes (410 MB) copied, 9.2855 s, 44.1 MB/s PMEM reads from a write-back mapping: # dd of=/dev/null if=/dev/pmem0 bs=4096 count=1000000 1000000+0 records in 1000000+0 records out 4096000000 bytes (4.1 GB) copied, 3.44034 s, 1.2 GB/s To be able to safely support a write-back aperture I needed to add support for the "read flush" _DSM flag, as outlined in the DSM spec: http://pmem.io/documents/NVDIMM_DSM_Interface_Example.pdf This flag tells the ND BLK driver that it needs to flush the cache lines associated with the aperture after the aperture is moved but before any new data is read. This ensures that any stale cache lines from the previous contents of the aperture will be discarded from the processor cache, and the new data will be read properly from the DIMM. We know that the cache lines are clean and will be discarded without any writeback because either a) the previous aperture operation was a read, and we never modified the contents of the aperture, or b) the previous aperture operation was a write and we must have written back the dirtied contents of the aperture to the DIMM before the I/O was completed. In order to add support for the "read flush" flag I needed to add a generic routine to invalidate cache lines, mmio_flush_range(). This is protected by the ARCH_HAS_MMIO_FLUSH Kconfig variable, and is currently only supported on x86. Signed-off-by: Ross Zwisler <ross.zwisler@linux.intel.com> Signed-off-by: Dan Williams <dan.j.williams@intel.com> 2015-08-28 03:14:20 +08:00			`#define ARCH_MEMREMAP_PMEM MEMREMAP_WB`

pmem, x86: clean up conditional pmem includes Prior to this change x86_64 used the pmem defines in arch/x86/include/asm/pmem.h, and UM used the default ones at the top of include/linux/pmem.h. The inclusion or exclusion in linux/pmem.h was controlled by CONFIG_ARCH_HAS_PMEM_API, but the ones in asm/pmem.h were controlled by ARCH_HAS_NOCACHE_UACCESS. Instead, control them both with CONFIG_ARCH_HAS_PMEM_API so that it's clear that they are related and we don't run into the possibility where they are both included or excluded. Also remove a bunch of stale function prototypes meant for UM in asm/pmem.h - these just conflicted with the inline defaults in linux/pmem.h and gave compile errors. Signed-off-by: Ross Zwisler <ross.zwisler@linux.intel.com> Reviewed-by: Christoph Hellwig <hch@lst.de> Signed-off-by: Dan Williams <dan.j.williams@intel.com> 2015-08-19 03:55:38 +08:00			`#ifdef CONFIG_ARCH_HAS_PMEM_API`
pmem, x86: move x86 PMEM API to new pmem.h header Move the x86 PMEM API implementation out of asm/cacheflush.h and into its own header asm/pmem.h. This will allow members of the PMEM API to be more easily identified on this and other architectures. Signed-off-by: Ross Zwisler <ross.zwisler@linux.intel.com> Suggested-by: Christoph Hellwig <hch@lst.de> Reviewed-by: Christoph Hellwig <hch@lst.de> Signed-off-by: Dan Williams <dan.j.williams@intel.com> 2015-08-19 03:55:36 +08:00			`/**`
			`* arch_memcpy_to_pmem - copy data to persistent memory`
			`* @dst: destination buffer for the copy`
			`* @src: source buffer for the copy`
			`* @n: length of the copy in bytes`
			`*`
			`* Copy data to persistent memory media via non-temporal stores so that`
			`* a subsequent arch_wmb_pmem() can flush cpu and memory controller`
			`* write buffers to guarantee durability.`
			`*/`
			`static inline void arch_memcpy_to_pmem(void __pmem dst, const void src,`
			`size_t n)`
			`{`
			`int unwritten;`

			`/*`
			`* We are copying between two kernel buffers, if`
			`* __copy_from_user_inatomic_nocache() returns an error (page`
			`* fault) we would have already reported a general protection fault`
			`* before the WARN+BUG.`
			`*/`
			`unwritten = __copy_from_user_inatomic_nocache((void __force *) dst,`
			`(void __user *) src, n);`
			`if (WARN(unwritten, "%s: fault copying %p <- %p unwritten: %d\n",`
			`__func__, dst, src, unwritten))`
			`BUG();`
			`}`

			`/**`
			`* arch_wmb_pmem - synchronize writes to persistent memory`
			`*`
			`* After a series of arch_memcpy_to_pmem() operations this drains data`
			`* from cpu write buffers and any platform (memory controller) buffers`
			`* to ensure that written data is durable on persistent memory media.`
			`*/`
			`static inline void arch_wmb_pmem(void)`
			`{`
			`/*`
			`* wmb() to 'sfence' all previous writes such that they are`
			`* architecturally visible to 'pcommit'. Note, that we've`
			`* already arranged for pmem writes to avoid the cache via`
			`* arch_memcpy_to_pmem().`
			`*/`
			`wmb();`
			`pcommit_sfence();`
			`}`

pmem: add copy_from_iter_pmem() and clear_pmem() Add support for two new PMEM APIs, copy_from_iter_pmem() and clear_pmem(). copy_from_iter_pmem() is used to copy data from an iterator into a PMEM buffer. clear_pmem() zeros a PMEM memory range. Both of these new APIs must be explicitly ordered using a wmb_pmem() function call and are implemented in such a way that the wmb_pmem() will make the stores to PMEM durable. Because both APIs are unordered they can be called as needed without introducing any unwanted memory barriers. Signed-off-by: Ross Zwisler <ross.zwisler@linux.intel.com> Signed-off-by: Dan Williams <dan.j.williams@intel.com> 2015-08-19 03:55:39 +08:00			`/**`
			`* __arch_wb_cache_pmem - write back a cache range with CLWB`
			`* @vaddr: virtual start address`
			`* @size: number of bytes to write back`
			`*`
			`* Write back a cache range using the CLWB (cache line write back)`
			`* instruction. This function requires explicit ordering with an`
			`* arch_wmb_pmem() call. This API is internal to the x86 PMEM implementation.`
			`*/`
			`static inline void __arch_wb_cache_pmem(void *vaddr, size_t size)`
			`{`
			`u16 x86_clflush_size = boot_cpu_data.x86_clflush_size;`
			`unsigned long clflush_mask = x86_clflush_size - 1;`
			`void *vend = vaddr + size;`
			`void *p;`

			`for (p = (void *)((unsigned long)vaddr & ~clflush_mask);`
			`p < vend; p += x86_clflush_size)`
			`clwb(p);`
			`}`

			`/*`
			`* copy_from_iter_nocache() on x86 only uses non-temporal stores for iovec`
			`* iterators, so for other types (bvec & kvec) we must do a cache write-back.`
			`*/`
			`static inline bool __iter_needs_pmem_wb(struct iov_iter *i)`
			`{`
			`return iter_is_iovec(i) == false;`
			`}`

			`/**`
			`* arch_copy_from_iter_pmem - copy data from an iterator to PMEM`
			`* @addr: PMEM destination address`
			`* @bytes: number of bytes to copy`
			`* @i: iterator with source data`
			`*`
			`* Copy data from the iterator 'i' to the PMEM buffer starting at 'addr'.`
			`* This function requires explicit ordering with an arch_wmb_pmem() call.`
			`*/`
			`static inline size_t arch_copy_from_iter_pmem(void __pmem *addr, size_t bytes,`
			`struct iov_iter *i)`
			`{`
			`void vaddr = (void __force )addr;`
			`size_t len;`

			`/* TODO: skip the write-back by always using non-temporal stores */`
			`len = copy_from_iter_nocache(vaddr, bytes, i);`

			`if (__iter_needs_pmem_wb(i))`
			`__arch_wb_cache_pmem(vaddr, bytes);`

			`return len;`
			`}`

			`/**`
			`* arch_clear_pmem - zero a PMEM memory range`
			`* @addr: virtual start address`
			`* @size: number of bytes to zero`
			`*`
			`* Write zeros into the memory range starting at 'addr' for 'size' bytes.`
			`* This function requires explicit ordering with an arch_wmb_pmem() call.`
			`*/`
			`static inline void arch_clear_pmem(void __pmem *addr, size_t size)`
			`{`
			`void vaddr = (void __force )addr;`

			`/* TODO: implement the zeroing via non-temporal writes */`
			`if (size == PAGE_SIZE && ((unsigned long)vaddr & ~PAGE_MASK) == 0)`
			`clear_page(vaddr);`
			`else`
			`memset(vaddr, 0, size);`

			`__arch_wb_cache_pmem(vaddr, size);`
			`}`

pmem: remove layer when calling arch_has_wmb_pmem() Prior to this change arch_has_wmb_pmem() was only called by arch_has_pmem_api(). Both arch_has_wmb_pmem() and arch_has_pmem_api() checked to make sure that CONFIG_ARCH_HAS_PMEM_API was enabled. Instead, remove the old arch_has_wmb_pmem() wrapper to be rid of one extra layer of indirection and the redundant CONFIG_ARCH_HAS_PMEM_API check. Rename __arch_has_wmb_pmem() to arch_has_wmb_pmem() since we no longer have a wrapper, and just have arch_has_pmem_api() call the architecture specific arch_has_wmb_pmem() directly. Signed-off-by: Ross Zwisler <ross.zwisler@linux.intel.com> Reviewed-by: Christoph Hellwig <hch@lst.de> Signed-off-by: Dan Williams <dan.j.williams@intel.com> 2015-08-19 03:55:37 +08:00			`static inline bool arch_has_wmb_pmem(void)`
pmem, x86: move x86 PMEM API to new pmem.h header Move the x86 PMEM API implementation out of asm/cacheflush.h and into its own header asm/pmem.h. This will allow members of the PMEM API to be more easily identified on this and other architectures. Signed-off-by: Ross Zwisler <ross.zwisler@linux.intel.com> Suggested-by: Christoph Hellwig <hch@lst.de> Reviewed-by: Christoph Hellwig <hch@lst.de> Signed-off-by: Dan Williams <dan.j.williams@intel.com> 2015-08-19 03:55:36 +08:00			`{`
			`#ifdef CONFIG_X86_64`
			`/*`
			`* We require that wmb() be an 'sfence', that is only guaranteed on`
			`* 64-bit builds`
			`*/`
			`return static_cpu_has(X86_FEATURE_PCOMMIT);`
			`#else`
			`return false;`
			`#endif`
			`}`
pmem, x86: clean up conditional pmem includes Prior to this change x86_64 used the pmem defines in arch/x86/include/asm/pmem.h, and UM used the default ones at the top of include/linux/pmem.h. The inclusion or exclusion in linux/pmem.h was controlled by CONFIG_ARCH_HAS_PMEM_API, but the ones in asm/pmem.h were controlled by ARCH_HAS_NOCACHE_UACCESS. Instead, control them both with CONFIG_ARCH_HAS_PMEM_API so that it's clear that they are related and we don't run into the possibility where they are both included or excluded. Also remove a bunch of stale function prototypes meant for UM in asm/pmem.h - these just conflicted with the inline defaults in linux/pmem.h and gave compile errors. Signed-off-by: Ross Zwisler <ross.zwisler@linux.intel.com> Reviewed-by: Christoph Hellwig <hch@lst.de> Signed-off-by: Dan Williams <dan.j.williams@intel.com> 2015-08-19 03:55:38 +08:00			`#endif /* CONFIG_ARCH_HAS_PMEM_API */`
pmem, x86: move x86 PMEM API to new pmem.h header Move the x86 PMEM API implementation out of asm/cacheflush.h and into its own header asm/pmem.h. This will allow members of the PMEM API to be more easily identified on this and other architectures. Signed-off-by: Ross Zwisler <ross.zwisler@linux.intel.com> Suggested-by: Christoph Hellwig <hch@lst.de> Reviewed-by: Christoph Hellwig <hch@lst.de> Signed-off-by: Dan Williams <dan.j.williams@intel.com> 2015-08-19 03:55:36 +08:00
			`#endif /* __ASM_X86_PMEM_H__ */`