hygon: newfeature Support sse2 instruction to accelerate memory copy.
Add using fpu in kernel nonatomic context function and using sse2 memcpy for copy_user_generic_string Signed-off-by: yuehongwu <yuehongwu@tencent.com> Reviewed-by: caelli <caelli@tencent.com> Signed-off-by: Jianping Liu <frankjpliu@tencent.com>
This commit is contained in:
parent
949978bef2
commit
183ff542e7
|
@ -866,6 +866,7 @@ config ACRN_GUEST
|
|||
endif #HYPERVISOR_GUEST
|
||||
|
||||
source "arch/x86/Kconfig.cpu"
|
||||
source "arch/x86/Kconfig.fpu"
|
||||
|
||||
config HPET_TIMER
|
||||
def_bool X86_64
|
||||
|
|
|
@ -0,0 +1,22 @@
|
|||
# SPDX-License-Identifier: GPL-2.0
|
||||
|
||||
config USING_FPU_IN_KERNEL_NONATOMIC
|
||||
bool "Support using fpu instructions in kernel non-atomic context"
|
||||
depends on X86_64 && CPU_SUP_HYGON
|
||||
help
|
||||
When this feature is enabled, we can use fpu instructions in kernel
|
||||
non-atomic context.
|
||||
|
||||
config USING_SSE2_FOR_LARGE_MEMORY_COPY
|
||||
bool "Using sse2 nt copy for large memory copy"
|
||||
depends on USING_FPU_IN_KERNEL_NONATOMIC
|
||||
help
|
||||
When this feature is enabled, we will using copy_user_sse2_nt_string
|
||||
for lagre memory copy.
|
||||
|
||||
config USING_AVX2_FOR_LARGE_MEMORY_COPY
|
||||
bool "Using avx2 nt copy for large memory copy"
|
||||
depends on USING_FPU_IN_KERNEL_NONATOMIC
|
||||
help
|
||||
When this feature is enabled, we will using copy_user_avx2_nt_string
|
||||
for lagre memory copy.
|
|
@ -35,6 +35,29 @@ static inline void kernel_fpu_begin(void)
|
|||
kernel_fpu_begin_mask(KFPU_387 | KFPU_MXCSR);
|
||||
}
|
||||
|
||||
#ifdef CONFIG_USING_FPU_IN_KERNEL_NONATOMIC
|
||||
extern int kernel_fpu_begin_nonatomic_mask(unsigned int kfpu_mask);
|
||||
extern void kernel_fpu_end_nonatomic(void);
|
||||
|
||||
/* Code that is unaware of kernel_fpu_begin_nonatomic_mask() can use this */
|
||||
static inline int kernel_fpu_begin_nonatomic(void)
|
||||
{
|
||||
#ifdef CONFIG_X86_64
|
||||
/*
|
||||
* Any 64-bit code that uses 387 instructions must explicitly request
|
||||
* KFPU_387.
|
||||
*/
|
||||
return kernel_fpu_begin_nonatomic_mask(KFPU_MXCSR);
|
||||
#else
|
||||
/*
|
||||
* 32-bit kernel code may use 387 operations as well as SSE2, etc,
|
||||
* as long as it checks that the CPU has the required capability.
|
||||
*/
|
||||
return kernel_fpu_begin_nonatomic_mask(KFPU_387 | KFPU_MXCSR);
|
||||
#endif
|
||||
}
|
||||
#endif //CONFIG_USING_FPU_IN_KERNEL_NONATOMIC
|
||||
|
||||
/*
|
||||
* Use fpregs_lock() while editing CPU's FPU registers or fpu->state.
|
||||
* A context switch will (and softirq might) save CPU's FPU registers to
|
||||
|
|
|
@ -616,6 +616,53 @@ static inline void switch_fpu_finish(struct task_struct *next)
|
|||
__write_pkru(pkru_val);
|
||||
}
|
||||
|
||||
#ifdef CONFIG_USING_FPU_IN_KERNEL_NONATOMIC
|
||||
/*
|
||||
* Kernel FPU state switching for scheduling.
|
||||
*
|
||||
* This is a two-stage process:
|
||||
*
|
||||
* - switch_kernel_fpu_prepare() saves the old task's kernel fpu state.
|
||||
* This is done within the context of the old process.
|
||||
*
|
||||
* - switch_kernel_fpu_finish() restore new task's kernel fpu state.
|
||||
*
|
||||
* The kernel FPU context is only stored/restored for a user task in kernel
|
||||
* mode and PF_KTHREAD is used to distinguish between kernel and user threads.
|
||||
*/
|
||||
|
||||
extern void save_fpregs_to_fpkernelstate(struct fpu *kfpu);
|
||||
static inline void switch_kernel_fpu_prepare(struct task_struct *prev, int cpu)
|
||||
{
|
||||
struct fpu *old_fpu = &prev->thread.fpu;
|
||||
|
||||
if (static_cpu_has(X86_FEATURE_FPU) && !(prev->flags & PF_KTHREAD)) {
|
||||
save_fpregs_to_fpkernelstate(old_fpu);
|
||||
}
|
||||
}
|
||||
|
||||
/* Internal helper for switch_kernel_fpu_finish() and signal frame setup */
|
||||
static inline void fpregs_restore_kernelregs(struct fpu *kfpu)
|
||||
{
|
||||
kernel_fpu_states_restore(NULL, &kfpu->kernel_state, sizeof(kfpu->kernel_state));
|
||||
}
|
||||
|
||||
/*
|
||||
* Loading of the complete FPU state immediately.
|
||||
*/
|
||||
static inline void switch_kernel_fpu_finish(struct task_struct *next)
|
||||
{
|
||||
struct fpu *new_fpu = &next->thread.fpu;
|
||||
if (next->flags & PF_KTHREAD)
|
||||
return;
|
||||
|
||||
if (cpu_feature_enabled(X86_FEATURE_FPU)
|
||||
&& test_ti_thread_flag((struct thread_info *)next,
|
||||
TIF_USING_FPU_NONATOMIC))
|
||||
fpregs_restore_kernelregs(new_fpu);
|
||||
}
|
||||
#endif
|
||||
|
||||
/*
|
||||
* MXCSR and XCR definitions:
|
||||
*/
|
||||
|
|
|
@ -300,6 +300,9 @@ struct fpu {
|
|||
*/
|
||||
unsigned long avx512_timestamp;
|
||||
|
||||
#ifdef CONFIG_USING_FPU_IN_KERNEL_NONATOMIC
|
||||
union fpregs_state kernel_state;
|
||||
#endif
|
||||
/*
|
||||
* @state:
|
||||
*
|
||||
|
|
|
@ -98,6 +98,7 @@ struct thread_info {
|
|||
#define TIF_IO_BITMAP 22 /* uses I/O bitmap */
|
||||
#define TIF_FORCED_TF 24 /* true if TF in eflags artificially */
|
||||
#define TIF_BLOCKSTEP 25 /* set when we want DEBUGCTLMSR_BTF */
|
||||
#define TIF_USING_FPU_NONATOMIC 26 /* using fpu in kernel non-atomic context */
|
||||
#define TIF_LAZY_MMU_UPDATES 27 /* task is updating the mmu lazily */
|
||||
#define TIF_SYSCALL_TRACEPOINT 28 /* syscall tracepoint instrumentation */
|
||||
#define TIF_ADDR32 29 /* 32-bit address space on 64 bits */
|
||||
|
|
|
@ -11,6 +11,9 @@
|
|||
#include <asm/alternative.h>
|
||||
#include <asm/cpufeatures.h>
|
||||
#include <asm/page.h>
|
||||
#ifdef CONFIG_USING_FPU_IN_KERNEL_NONATOMIC
|
||||
#include <asm/fpu/api.h>
|
||||
#endif
|
||||
|
||||
/*
|
||||
* Copy To/From Userspace
|
||||
|
@ -24,10 +27,55 @@ copy_user_generic_string(void *to, const void *from, unsigned len);
|
|||
__must_check unsigned long
|
||||
copy_user_generic_unrolled(void *to, const void *from, unsigned len);
|
||||
|
||||
#ifdef CONFIG_USING_FPU_IN_KERNEL_NONATOMIC
|
||||
#ifdef CONFIG_USING_SSE2_FOR_LARGE_MEMORY_COPY
|
||||
void fpu_save_xmm0_3(void *to, const void *from, unsigned len);
|
||||
void fpu_restore_xmm0_3(void *to, const void *from, unsigned len);
|
||||
|
||||
#define kernel_fpu_states_save fpu_save_xmm0_3
|
||||
#define kernel_fpu_states_restore fpu_restore_xmm0_3
|
||||
|
||||
__must_check unsigned long
|
||||
copy_user_sse2_opt_string(void *to, const void *from, unsigned len);
|
||||
|
||||
#define copy_user_large_memory_generic_string copy_user_sse2_opt_string
|
||||
|
||||
#endif //CONFIG_USING_SSE2_FOR_LARGE_MEMORY_COPY
|
||||
|
||||
#ifdef CONFIG_USING_AVX2_FOR_LARGE_MEMORY_COPY
|
||||
#ifndef CONFIG_USING_SSE2_FOR_LARGE_MEMORY_COPY
|
||||
void fpu_save_ymm0_7(void *to, const void *from, unsigned len);
|
||||
void fpu_restore_ymm0_7(void *to, const void *from, unsigned len);
|
||||
|
||||
#define kernel_fpu_states_save fpu_save_ymm0_7
|
||||
#define kernel_fpu_states_restore fpu_restore_ymm0_7
|
||||
|
||||
__must_check unsigned long
|
||||
copy_user_avx2_pf64_nt_string(void *to, const void *from, unsigned len);
|
||||
|
||||
#define copy_user_large_memory_generic_stirng copy_user_avx2_pf64_nt_string
|
||||
#endif //NO DEFINE CONFIG_USING_SSE2_FOR_LARGE_MEMORY_COPY
|
||||
#endif //CONFIG_USING_AVX2_FOR_LAGRE_MEMORY_COPY
|
||||
unsigned long get_nt_block_copy_mini_len(void);
|
||||
#endif //CONFIG_USING_FPU_IN_KERNEL_NONATOMIC
|
||||
|
||||
static __always_inline __must_check unsigned long
|
||||
copy_user_generic(void *to, const void *from, unsigned len)
|
||||
{
|
||||
unsigned ret;
|
||||
#ifdef CONFIG_USING_FPU_IN_KERNEL_NONATOMIC
|
||||
#if defined (CONFIG_USING_SSE2_FOR_LARGE_MEMORY_COPY) || defined (CONFIG_USING_AVX2_FOR_LARGE_MEMORY_COPY)
|
||||
unsigned int nt_blk_cpy_mini_len = get_nt_block_copy_mini_len();
|
||||
if (nt_blk_cpy_mini_len && (nt_blk_cpy_mini_len <= len)
|
||||
&& (system_state == SYSTEM_RUNNING)
|
||||
&& (!kernel_fpu_begin_nonatomic())) {
|
||||
ret = copy_user_large_memory_generic_string(to, from, len);
|
||||
kernel_fpu_end_nonatomic();
|
||||
|
||||
return ret;
|
||||
}
|
||||
#endif
|
||||
#endif
|
||||
|
||||
/*
|
||||
* If CPU has ERMS feature, use copy_user_enhanced_fast_string.
|
||||
|
|
|
@ -13,6 +13,10 @@
|
|||
#include <asm/cacheinfo.h>
|
||||
#include <asm/spec-ctrl.h>
|
||||
#include <asm/delay.h>
|
||||
#include <asm/page.h>
|
||||
#include <linux/module.h>
|
||||
#include <linux/init.h>
|
||||
|
||||
#ifdef CONFIG_X86_64
|
||||
# include <asm/set_memory.h>
|
||||
#endif
|
||||
|
@ -410,3 +414,173 @@ static const struct cpu_dev hygon_cpu_dev = {
|
|||
};
|
||||
|
||||
cpu_dev_register(hygon_cpu_dev);
|
||||
|
||||
#ifdef CONFIG_USING_FPU_IN_KERNEL_NONATOMIC
|
||||
#if defined (CONFIG_USING_SSE2_FOR_LARGE_MEMORY_COPY) || defined (CONFIG_USING_AVX2_FOR_LARGE_MEMORY_COPY)
|
||||
struct hygon_c86_info {
|
||||
unsigned int nt_cpy_mini_len;
|
||||
unsigned int nt_cpy_to_user_mini_nr_pages;
|
||||
unsigned int nt_cpy_from_user_mini_nr_pages;
|
||||
};
|
||||
|
||||
static struct hygon_c86_info hygon_c86_data = {
|
||||
.nt_cpy_mini_len = PAGE_SIZE,
|
||||
.nt_cpy_to_user_mini_nr_pages = 3,
|
||||
.nt_cpy_from_user_mini_nr_pages = 2
|
||||
};
|
||||
|
||||
void set_c86_features_para_invaild(void)
|
||||
{
|
||||
memset((void *)&hygon_c86_data, 0, sizeof(struct hygon_c86_info));
|
||||
}
|
||||
|
||||
unsigned int get_nt_block_copy_mini_len(void)
|
||||
{
|
||||
return hygon_c86_data.nt_cpy_mini_len;
|
||||
}
|
||||
EXPORT_SYMBOL_GPL(get_nt_block_copy_mini_len);
|
||||
|
||||
unsigned int get_nt_block_copy_to_user_mini_nr_pages(void)
|
||||
{
|
||||
return hygon_c86_data.nt_cpy_to_user_mini_nr_pages;
|
||||
}
|
||||
EXPORT_SYMBOL_GPL(get_nt_block_copy_to_user_mini_nr_pages);
|
||||
|
||||
unsigned int get_nt_block_copy_from_user_mini_nr_pages(void)
|
||||
{
|
||||
return hygon_c86_data.nt_cpy_from_user_mini_nr_pages;
|
||||
}
|
||||
EXPORT_SYMBOL_GPL(get_nt_block_copy_from_user_mini_nr_pages);
|
||||
|
||||
static ssize_t show_nt_cpy_mini_len(struct kobject *kobj,
|
||||
struct kobj_attribute *attr,
|
||||
char *buf)
|
||||
{
|
||||
return snprintf(buf, 40, "%d\n", hygon_c86_data.nt_cpy_mini_len);
|
||||
}
|
||||
|
||||
static ssize_t store_nt_cpy_mini_len(struct kobject *kobj,
|
||||
struct kobj_attribute *attr,
|
||||
const char *buf, size_t count)
|
||||
{
|
||||
unsigned long val;
|
||||
ssize_t ret;
|
||||
|
||||
ret = kstrtoul(buf, 0, &val);
|
||||
if (ret)
|
||||
return ret;
|
||||
|
||||
hygon_c86_data.nt_cpy_mini_len = val;
|
||||
|
||||
return count;
|
||||
}
|
||||
|
||||
static ssize_t show_nt_cpy_to_user_mini_nr_pages(struct kobject *kobj,
|
||||
struct kobj_attribute *attr,
|
||||
char *buf)
|
||||
{
|
||||
return snprintf(buf, 40, "%d\n", hygon_c86_data.nt_cpy_to_user_mini_nr_pages);
|
||||
}
|
||||
|
||||
static ssize_t store_nt_cpy_to_user_mini_nr_pages(struct kobject *kobj,
|
||||
struct kobj_attribute *attr,
|
||||
const char *buf, size_t count)
|
||||
{
|
||||
unsigned long val;
|
||||
ssize_t ret;
|
||||
|
||||
ret = kstrtoul(buf, 0, &val);
|
||||
if (ret)
|
||||
return ret;
|
||||
|
||||
hygon_c86_data.nt_cpy_to_user_mini_nr_pages = val;
|
||||
|
||||
return count;
|
||||
}
|
||||
|
||||
static ssize_t show_nt_cpy_from_user_mini_nr_pages(struct kobject *kobj,
|
||||
struct kobj_attribute *attr,
|
||||
char *buf)
|
||||
{
|
||||
return snprintf(buf, 40, "%d\n", hygon_c86_data.nt_cpy_from_user_mini_nr_pages);
|
||||
}
|
||||
|
||||
static ssize_t store_nt_cpy_from_user_mini_nr_pages(struct kobject *kobj,
|
||||
struct kobj_attribute *attr,
|
||||
const char *buf, size_t count)
|
||||
{
|
||||
unsigned long val;
|
||||
ssize_t ret;
|
||||
|
||||
ret = kstrtoul(buf, 0, &val);
|
||||
if (ret)
|
||||
return ret;
|
||||
|
||||
hygon_c86_data.nt_cpy_from_user_mini_nr_pages = val;
|
||||
|
||||
return count;
|
||||
}
|
||||
|
||||
static struct kobj_attribute nt_cpy_mini_len_attribute =
|
||||
__ATTR(nt_cpy_mini_len, S_IRUSR | S_IWUSR,
|
||||
show_nt_cpy_mini_len,
|
||||
store_nt_cpy_mini_len);
|
||||
static struct kobj_attribute nt_cpy_to_user_mini_nr_pages_attribute =
|
||||
__ATTR(nt_cpy_to_user_mini_nr_pages, S_IRUSR | S_IWUSR,
|
||||
show_nt_cpy_to_user_mini_nr_pages,
|
||||
store_nt_cpy_to_user_mini_nr_pages);
|
||||
static struct kobj_attribute nt_cpy_from_user_mini_nr_pages_attribute =
|
||||
__ATTR(nt_cpy_from_user_mini_nr_pages, S_IRUSR | S_IWUSR,
|
||||
show_nt_cpy_from_user_mini_nr_pages,
|
||||
store_nt_cpy_from_user_mini_nr_pages);
|
||||
|
||||
static struct attribute *c86_default_attrs[] = {
|
||||
&nt_cpy_mini_len_attribute.attr,
|
||||
&nt_cpy_to_user_mini_nr_pages_attribute.attr,
|
||||
&nt_cpy_from_user_mini_nr_pages_attribute.attr,
|
||||
NULL
|
||||
};
|
||||
|
||||
const struct attribute_group hygon_c86_attr_group = {
|
||||
.attrs = c86_default_attrs,
|
||||
.name = "hygon_c86",
|
||||
};
|
||||
|
||||
static struct kobject *c86_features_kobj;
|
||||
static int __init kobject_hygon_c86_init(void)
|
||||
{
|
||||
int ret;
|
||||
|
||||
if (boot_cpu_data.x86_vendor != X86_VENDOR_HYGON)
|
||||
goto err_out;
|
||||
|
||||
c86_features_kobj = kobject_create_and_add("c86_features", NULL);
|
||||
|
||||
if (c86_features_kobj) {
|
||||
ret = sysfs_create_group(c86_features_kobj, &hygon_c86_attr_group);
|
||||
if (ret)
|
||||
goto err_out;
|
||||
}
|
||||
|
||||
return 0;
|
||||
err_out:
|
||||
set_c86_features_para_invaild();
|
||||
if (c86_features_kobj) {
|
||||
sysfs_remove_group(c86_features_kobj, &hygon_c86_attr_group);
|
||||
kobject_del(c86_features_kobj);
|
||||
}
|
||||
|
||||
return -1;
|
||||
}
|
||||
module_init(kobject_hygon_c86_init);
|
||||
|
||||
static void __exit kobject_hygon_c86_exit(void)
|
||||
{
|
||||
if (c86_features_kobj) {
|
||||
sysfs_remove_group(c86_features_kobj, &hygon_c86_attr_group);
|
||||
kobject_del(c86_features_kobj);
|
||||
}
|
||||
}
|
||||
module_exit(kobject_hygon_c86_exit);
|
||||
#endif
|
||||
#endif
|
||||
|
|
|
@ -86,6 +86,14 @@ void kernel_fpu_begin_mask(unsigned int kfpu_mask)
|
|||
{
|
||||
preempt_disable();
|
||||
|
||||
#ifdef CONFIG_USING_FPU_IN_KERNEL_NONATOMIC
|
||||
/*
|
||||
* It means we call kernel_fpu_begin after kernel_fpu_begin_nonatomic
|
||||
* func, but before kernel_fpu_end_nonatomic
|
||||
*/
|
||||
WARN_ON_FPU(test_thread_flag(TIF_USING_FPU_NONATOMIC));
|
||||
#endif
|
||||
|
||||
WARN_ON_FPU(!irq_fpu_usable());
|
||||
WARN_ON_FPU(this_cpu_read(in_kernel_fpu));
|
||||
|
||||
|
@ -115,11 +123,96 @@ void kernel_fpu_end(void)
|
|||
{
|
||||
WARN_ON_FPU(!this_cpu_read(in_kernel_fpu));
|
||||
|
||||
#ifdef CONFIG_USING_FPU_IN_KERNEL_NONATOMIC
|
||||
/*
|
||||
* It means we call kernel_fpu_end after kernel_fpu_begin_nonatomic
|
||||
* func, but before kernel_fpu_end_nonatomic
|
||||
*/
|
||||
WARN_ON_FPU(test_thread_flag(TIF_USING_FPU_NONATOMIC));
|
||||
#endif
|
||||
|
||||
this_cpu_write(in_kernel_fpu, false);
|
||||
preempt_enable();
|
||||
}
|
||||
EXPORT_SYMBOL_GPL(kernel_fpu_end);
|
||||
|
||||
#ifdef CONFIG_USING_FPU_IN_KERNEL_NONATOMIC
|
||||
/*
|
||||
* We can call kernel_fpu_begin_nonatomic in non-atomic task context.
|
||||
*/
|
||||
int kernel_fpu_begin_nonatomic_mask(unsigned int kfpu_mask)
|
||||
{
|
||||
preempt_disable();
|
||||
|
||||
/* we not support Nested call */
|
||||
if (test_thread_flag(TIF_USING_FPU_NONATOMIC))
|
||||
goto nested_err;
|
||||
|
||||
/*
|
||||
* This means we call kernel_fpu_begin_nonatomic after kernel_fpu_begin,
|
||||
* but before kernel_fpu_end.
|
||||
*/
|
||||
if (this_cpu_read(in_kernel_fpu))
|
||||
goto nested_err;
|
||||
|
||||
if (in_interrupt())
|
||||
goto irq_err;
|
||||
|
||||
if (current->flags & PF_KTHREAD)
|
||||
goto err;
|
||||
|
||||
if (!test_thread_flag(TIF_NEED_FPU_LOAD)) {
|
||||
set_thread_flag(TIF_NEED_FPU_LOAD);
|
||||
copy_fpregs_to_fpstate(¤t->thread.fpu);
|
||||
}
|
||||
|
||||
/* Set thread flag: TIC_USING_FPU_NONATOMIC */
|
||||
set_thread_flag(TIF_USING_FPU_NONATOMIC);
|
||||
|
||||
__cpu_invalidate_fpregs_state();
|
||||
|
||||
/* Put sane initial values into the control registers. */
|
||||
if (likely(kfpu_mask & KFPU_MXCSR) && boot_cpu_has(X86_FEATURE_XMM))
|
||||
ldmxcsr(MXCSR_DEFAULT);
|
||||
|
||||
if (unlikely(kfpu_mask & KFPU_387) && boot_cpu_has(X86_FEATURE_FPU))
|
||||
asm volatile ("fninit");
|
||||
|
||||
preempt_enable();
|
||||
|
||||
return 0;
|
||||
|
||||
nested_err:
|
||||
irq_err:
|
||||
err:
|
||||
preempt_enable();
|
||||
|
||||
return -1;
|
||||
}
|
||||
EXPORT_SYMBOL_GPL(kernel_fpu_begin_nonatomic_mask);
|
||||
|
||||
void kernel_fpu_end_nonatomic(void)
|
||||
{
|
||||
preempt_disable();
|
||||
/*
|
||||
* This means we call kernel_fpu_end_nonatomic after kernel_fpu_begin,
|
||||
* but before kernel_fpu_end.
|
||||
*/
|
||||
WARN_ON_FPU(this_cpu_read(in_kernel_fpu));
|
||||
|
||||
WARN_ON_FPU(!test_thread_flag(TIF_USING_FPU_NONATOMIC));
|
||||
|
||||
clear_thread_flag(TIF_USING_FPU_NONATOMIC);
|
||||
preempt_enable();
|
||||
}
|
||||
EXPORT_SYMBOL_GPL(kernel_fpu_end_nonatomic);
|
||||
|
||||
void save_fpregs_to_fpkernelstate(struct fpu *kfpu)
|
||||
{
|
||||
kernel_fpu_states_save(&kfpu->kernel_state, NULL, sizeof(kfpu->kernel_state));
|
||||
}
|
||||
#endif //CONFIG_USING_FPU_IN_KERNEL_NONATOMIC
|
||||
|
||||
/*
|
||||
* Save the FPU state (mark it for reload if necessary):
|
||||
*
|
||||
|
|
|
@ -545,6 +545,10 @@ __switch_to(struct task_struct *prev_p, struct task_struct *next_p)
|
|||
if (!test_thread_flag(TIF_NEED_FPU_LOAD))
|
||||
switch_fpu_prepare(prev_p, cpu);
|
||||
|
||||
#ifdef CONFIG_USING_FPU_IN_KERNEL_NONATOMIC
|
||||
if (test_thread_flag(TIF_USING_FPU_NONATOMIC))
|
||||
switch_kernel_fpu_prepare(prev_p, cpu);
|
||||
#endif
|
||||
/* We must save %fs and %gs before load_TLS() because
|
||||
* %fs and %gs may be cleared by load_TLS().
|
||||
*
|
||||
|
@ -597,7 +601,11 @@ __switch_to(struct task_struct *prev_p, struct task_struct *next_p)
|
|||
|
||||
switch_fpu_finish(next_p);
|
||||
|
||||
/* Reload sp0. */
|
||||
#ifdef CONFIG_USING_FPU_IN_KERNEL_NONATOMIC
|
||||
switch_kernel_fpu_finish(next_p);
|
||||
#endif
|
||||
|
||||
/* Reload sp0. */
|
||||
update_task_stack(next_p);
|
||||
|
||||
switch_to_extra(prev_p, next_p);
|
||||
|
|
|
@ -59,5 +59,7 @@ else
|
|||
lib-y += clear_page_64.o copy_page_64.o
|
||||
lib-y += memmove_64.o memset_64.o
|
||||
lib-y += copy_user_64.o
|
||||
lib-$(CONFIG_USING_SSE2_FOR_LARGE_MEMORY_COPY) += copy_user_sse2.o
|
||||
lib-$(CONFIG_USING_AVX2_FOR_LARGE_MEMORY_COPY) += copy_user_avx2.o
|
||||
lib-y += cmpxchg16b_emu.o
|
||||
endif
|
||||
|
|
|
@ -0,0 +1,322 @@
|
|||
/*
|
||||
* Copyright © 2011 Siarhei Siamashka <siarhei.siamashka@gmail.com>
|
||||
*
|
||||
* Permission is hereby granted, free of charge, to any person obtaining a
|
||||
* copy of this software and associated documentation files (the "Software"),
|
||||
* to deal in the Software without restriction, including without limitation
|
||||
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
|
||||
* and/or sell copies of the Software, and to permit persons to whom the
|
||||
* Software is furnished to do so, subject to the following conditions:
|
||||
*
|
||||
* The above copyright notice and this permission notice (including the next
|
||||
* paragraph) shall be included in all copies or substantial portions of the
|
||||
* Software.
|
||||
*
|
||||
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
|
||||
* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
|
||||
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
|
||||
* DEALINGS IN THE SOFTWARE.
|
||||
*/
|
||||
#include <linux/linkage.h>
|
||||
#include <asm/current.h>
|
||||
#include <asm/asm-offsets.h>
|
||||
#include <asm/thread_info.h>
|
||||
#include <asm/cpufeatures.h>
|
||||
#include <asm/alternative-asm.h>
|
||||
#include <asm/asm.h>
|
||||
#include <asm/smap.h>
|
||||
#include <asm/export.h>
|
||||
|
||||
#define PREFETCH_DISTANCE 64
|
||||
//#define PREFETCH_DISTANCE 128
|
||||
//#define PREFETCH_DISTANCE 192
|
||||
//#define PREFETCH_DISTANCE 256
|
||||
|
||||
#define X86_NON_TEMPORAL_THRESHOLD 4095
|
||||
//#define X86_NON_TEMPORAL_THRESHOLD 1000000
|
||||
|
||||
#define PREFETCH(addr) prefetchnta addr
|
||||
|
||||
.macro ALIGN_DESTINATION_32
|
||||
/* check for bad alignment of destination, there is 32Bytes, for we will use vmovntdq */
|
||||
/* if <32Bytes, jb 302f */
|
||||
cmpl $32, %edx
|
||||
jb 302f
|
||||
|
||||
movl %edi, %ecx
|
||||
andl $31, %ecx
|
||||
jz 302f /* already aligned */
|
||||
|
||||
subl $32, %ecx
|
||||
negl %ecx
|
||||
subl %ecx, %edx
|
||||
|
||||
300:
|
||||
movb (%rsi), %al
|
||||
301:
|
||||
movb %al, (%rdi)
|
||||
incq %rsi
|
||||
incq %rdi
|
||||
decl %ecx
|
||||
jnz 300b
|
||||
302:
|
||||
|
||||
.section .fixup,"ax"
|
||||
303:
|
||||
addl %ecx,%edx/* ecx is zerorest also */
|
||||
jmp .Lavx2_copy_user_handle_tail
|
||||
.previous
|
||||
|
||||
_ASM_EXTABLE_UA(300b, 303b)
|
||||
_ASM_EXTABLE_UA(301b, 303b)
|
||||
.endm
|
||||
|
||||
/*
|
||||
* large block copy, use avx2 nt & prefetchnta
|
||||
*/
|
||||
SYM_FUNC_START(copy_user_avx2_pf64_nt_string)
|
||||
ASM_STAC
|
||||
ALIGN_DESTINATION_32
|
||||
|
||||
/* len >= 256 . */
|
||||
cmpl $256, %edx
|
||||
jb .Lless_than_256_bytes_cpy
|
||||
|
||||
movl %esi, %ecx /* check if src is aligned */
|
||||
andl $31, %ecx
|
||||
jnz large_block_nt_unaligned_cpy
|
||||
|
||||
large_block_nt_aligned_cpy:
|
||||
PREFETCH(PREFETCH_DISTANCE(%rsi))
|
||||
PREFETCH((PREFETCH_DISTANCE + 64)(%rsi))
|
||||
PREFETCH((PREFETCH_DISTANCE + 128)(%rsi))
|
||||
PREFETCH((PREFETCH_DISTANCE + 192)(%rsi))
|
||||
PREFETCH((PREFETCH_DISTANCE + 256)(%rsi))
|
||||
|
||||
32:
|
||||
vmovdqa 0(%rsi), %ymm0
|
||||
33:
|
||||
vmovdqa 32(%rsi), %ymm1
|
||||
34:
|
||||
vmovdqa 64(%rsi), %ymm2
|
||||
35:
|
||||
vmovdqa 96(%rsi), %ymm3
|
||||
36:
|
||||
vmovdqa 128(%rsi), %ymm4
|
||||
37:
|
||||
vmovdqa 160(%rsi), %ymm5
|
||||
38:
|
||||
vmovdqa 192(%rsi), %ymm6
|
||||
39:
|
||||
vmovdqa 224(%rsi), %ymm7
|
||||
|
||||
40:
|
||||
vmovntdq %ymm0, 0(%rdi)
|
||||
41:
|
||||
vmovntdq %ymm1, 32(%rdi)
|
||||
42:
|
||||
vmovntdq %ymm2, 64(%rdi)
|
||||
43:
|
||||
vmovntdq %ymm3, 96(%rdi)
|
||||
44:
|
||||
vmovntdq %ymm4, 128(%rdi)
|
||||
45:
|
||||
vmovntdq %ymm5, 160(%rdi)
|
||||
46:
|
||||
vmovntdq %ymm6, 192(%rdi)
|
||||
47:
|
||||
vmovntdq %ymm7, 224(%rdi)
|
||||
|
||||
add $256, %rsi
|
||||
add $256, %rdi
|
||||
subl $256, %edx
|
||||
cmpl $256, %edx
|
||||
jg large_block_nt_aligned_cpy
|
||||
|
||||
vzeroupper
|
||||
sfence
|
||||
jmp .Lless_than_256_bytes_cpy
|
||||
|
||||
large_block_nt_unaligned_cpy:
|
||||
PREFETCH(PREFETCH_DISTANCE(%rsi))
|
||||
PREFETCH((PREFETCH_DISTANCE + 64)(%rsi))
|
||||
PREFETCH((PREFETCH_DISTANCE + 128)(%rsi))
|
||||
PREFETCH((PREFETCH_DISTANCE + 192)(%rsi))
|
||||
PREFETCH((PREFETCH_DISTANCE + 256)(%rsi))
|
||||
|
||||
48:
|
||||
vmovdqu 0(%rsi), %ymm0
|
||||
49:
|
||||
vmovdqu 32(%rsi), %ymm1
|
||||
50:
|
||||
vmovdqu 64(%rsi), %ymm2
|
||||
51:
|
||||
vmovdqu 96(%rsi), %ymm3
|
||||
52:
|
||||
vmovdqu 128(%rsi), %ymm4
|
||||
53:
|
||||
vmovdqu 160(%rsi), %ymm5
|
||||
54:
|
||||
vmovdqu 192(%rsi), %ymm6
|
||||
55:
|
||||
vmovdqu 224(%rsi), %ymm7
|
||||
|
||||
56:
|
||||
vmovntdq %ymm0, 0(%rdi)
|
||||
57:
|
||||
vmovntdq %ymm1, 32(%rdi)
|
||||
58:
|
||||
vmovntdq %ymm2, 64(%rdi)
|
||||
59:
|
||||
vmovntdq %ymm3, 96(%rdi)
|
||||
60:
|
||||
vmovntdq %ymm4, 128(%rdi)
|
||||
61:
|
||||
vmovntdq %ymm5, 160(%rdi)
|
||||
62:
|
||||
vmovntdq %ymm6, 192(%rdi)
|
||||
63:
|
||||
vmovntdq %ymm7, 224(%rdi)
|
||||
|
||||
add $256, %rsi
|
||||
add $256, %rdi
|
||||
subl $256, %edx
|
||||
cmpl $256, %edx
|
||||
jg large_block_nt_unaligned_cpy
|
||||
|
||||
vzeroupper
|
||||
sfence
|
||||
jmp .Lless_than_256_bytes_cpy
|
||||
|
||||
.section .fixup,"ax"
|
||||
|
||||
88:
|
||||
vzeroupper
|
||||
jmp .Lavx2_copy_user_handle_tail
|
||||
.previous
|
||||
|
||||
_ASM_EXTABLE_UA(32b, 88b)
|
||||
_ASM_EXTABLE_UA(33b, 88b)
|
||||
_ASM_EXTABLE_UA(34b, 88b)
|
||||
_ASM_EXTABLE_UA(35b, 88b)
|
||||
_ASM_EXTABLE_UA(36b, 88b)
|
||||
_ASM_EXTABLE_UA(37b, 88b)
|
||||
_ASM_EXTABLE_UA(38b, 88b)
|
||||
_ASM_EXTABLE_UA(39b, 88b)
|
||||
|
||||
_ASM_EXTABLE_UA(40b, 88b)
|
||||
_ASM_EXTABLE_UA(41b, 88b)
|
||||
_ASM_EXTABLE_UA(42b, 88b)
|
||||
_ASM_EXTABLE_UA(43b, 88b)
|
||||
_ASM_EXTABLE_UA(44b, 88b)
|
||||
_ASM_EXTABLE_UA(45b, 88b)
|
||||
_ASM_EXTABLE_UA(46b, 88b)
|
||||
_ASM_EXTABLE_UA(47b, 88b)
|
||||
_ASM_EXTABLE_UA(48b, 88b)
|
||||
_ASM_EXTABLE_UA(49b, 88b)
|
||||
|
||||
_ASM_EXTABLE_UA(50b, 88b)
|
||||
_ASM_EXTABLE_UA(51b, 88b)
|
||||
_ASM_EXTABLE_UA(52b, 88b)
|
||||
_ASM_EXTABLE_UA(53b, 88b)
|
||||
_ASM_EXTABLE_UA(54b, 88b)
|
||||
_ASM_EXTABLE_UA(55b, 88b)
|
||||
_ASM_EXTABLE_UA(56b, 88b)
|
||||
_ASM_EXTABLE_UA(57b, 88b)
|
||||
_ASM_EXTABLE_UA(58b, 88b)
|
||||
_ASM_EXTABLE_UA(59b, 88b)
|
||||
|
||||
_ASM_EXTABLE_UA(60b, 88b)
|
||||
_ASM_EXTABLE_UA(61b, 88b)
|
||||
_ASM_EXTABLE_UA(62b, 88b)
|
||||
_ASM_EXTABLE_UA(63b, 88b)
|
||||
SYM_FUNC_END(copy_user_avx2_pf64_nt_string)
|
||||
EXPORT_SYMBOL(copy_user_avx2_pf64_nt_string)
|
||||
|
||||
/*
|
||||
* If len < 256 bytes, then we use rep mov directly.
|
||||
*/
|
||||
SYM_CODE_START_LOCAL(.Lless_than_256_bytes_cpy)
|
||||
movl %edx, %ecx
|
||||
90:
|
||||
rep movsb
|
||||
|
||||
xorl %eax,%eax
|
||||
ASM_CLAC
|
||||
RET
|
||||
|
||||
.section .fixup,"ax"
|
||||
99:
|
||||
mov %ecx,%eax
|
||||
|
||||
ASM_CLAC
|
||||
RET
|
||||
.previous
|
||||
|
||||
_ASM_EXTABLE_UA(90b, 99b)
|
||||
SYM_CODE_END(.Lless_than_256_bytes_cpy)
|
||||
|
||||
/*
|
||||
* Try to copy last bytes and clear the rest if needed.
|
||||
* Since protection fault in copy_from/to_user is not a normal situation,
|
||||
* it is not necessary to optimize tail handling.
|
||||
* Don't try to copy the tail if machine check happened
|
||||
*
|
||||
* Input:
|
||||
* rdi destination
|
||||
* rsi source
|
||||
* rdx count
|
||||
*
|
||||
* Output:
|
||||
* eax uncopied bytes or 0 if successful.
|
||||
*/
|
||||
|
||||
SYM_CODE_START_LOCAL(.Lavx2_copy_user_handle_tail)
|
||||
movl %edx,%ecx
|
||||
|
||||
1: rep movsb
|
||||
2: mov %ecx,%eax
|
||||
|
||||
ASM_CLAC
|
||||
RET
|
||||
|
||||
_ASM_EXTABLE_UA(1b, 2b)
|
||||
SYM_CODE_END(.Lavx2_copy_user_handle_tail)
|
||||
|
||||
/*
|
||||
* Called when task schedule. we call fpu_save_%ymm0_7 to save old
|
||||
* task's fpu states and we call fpu_restore_%ymm0_7 to restore new
|
||||
* task's fpu states.
|
||||
*/
|
||||
SYM_FUNC_START(fpu_restore_ymm0_7)
|
||||
vmovdqu 0(%rsi), %ymm0
|
||||
vmovdqu 32(%rsi), %ymm1
|
||||
vmovdqu 64(%rsi), %ymm2
|
||||
vmovdqu 96(%rsi), %ymm3
|
||||
vmovdqu 128(%rsi), %ymm4
|
||||
vmovdqu 160(%rsi), %ymm5
|
||||
vmovdqu 192(%rsi), %ymm6
|
||||
vmovdqu 224(%rsi), %ymm7
|
||||
|
||||
xorl %eax,%eax
|
||||
RET//ret
|
||||
SYM_FUNC_END(fpu_restore_ymm0_7)
|
||||
EXPORT_SYMBOL(fpu_restore_ymm0_7)
|
||||
|
||||
SYM_FUNC_START(fpu_save_ymm0_7)
|
||||
vmovdqu %ymm0, 0(%rdi)
|
||||
vmovdqu %ymm1, 32(%rdi)
|
||||
vmovdqu %ymm2, 64(%rdi)
|
||||
vmovdqu %ymm3, 96(%rdi)
|
||||
vmovdqu %ymm4, 128(%rdi)
|
||||
vmovdqu %ymm5, 160(%rdi)
|
||||
vmovdqu %ymm6, 192(%rdi)
|
||||
vmovdqu %ymm7, 224(%rdi)
|
||||
|
||||
xorl %eax,%eax
|
||||
RET
|
||||
SYM_FUNC_END(fpu_save_ymm0_7)
|
||||
EXPORT_SYMBOL(fpu_save_ymm0_7)
|
|
@ -0,0 +1,231 @@
|
|||
/*
|
||||
* Copyright © 2011 Siarhei Siamashka <siarhei.siamashka@gmail.com>
|
||||
*
|
||||
* Permission is hereby granted, free of charge, to any person obtaining a
|
||||
* copy of this software and associated documentation files (the "Software"),
|
||||
* to deal in the Software without restriction, including without limitation
|
||||
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
|
||||
* and/or sell copies of the Software, and to permit persons to whom the
|
||||
* Software is furnished to do so, subject to the following conditions:
|
||||
*
|
||||
* The above copyright notice and this permission notice (including the next
|
||||
* paragraph) shall be included in all copies or substantial portions of the
|
||||
* Software.
|
||||
*
|
||||
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
|
||||
* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
|
||||
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
|
||||
* DEALINGS IN THE SOFTWARE.
|
||||
*/
|
||||
|
||||
#include <linux/linkage.h>
|
||||
#include <asm/current.h>
|
||||
#include <asm/asm-offsets.h>
|
||||
#include <asm/thread_info.h>
|
||||
#include <asm/cpufeatures.h>
|
||||
#include <asm/alternative-asm.h>
|
||||
#include <asm/asm.h>
|
||||
#include <asm/smap.h>
|
||||
#include <asm/export.h>
|
||||
|
||||
#define PREFETCH_DISTANCE 256
|
||||
|
||||
.macro ALIGN_DESTINATION_16
|
||||
/* check for bad alignment of destination, there is 16Bytes, for we will use movdqa */
|
||||
/* if len<16Bytes, jb 202f */
|
||||
cmpl $16,%edx
|
||||
jb 202f
|
||||
|
||||
/* check for bad alignment of destination */
|
||||
movl %edi,%ecx
|
||||
andl $15,%ecx
|
||||
jz 202f /* already aligned */
|
||||
|
||||
subl $16,%ecx
|
||||
negl %ecx
|
||||
subl %ecx,%edx
|
||||
200:
|
||||
movb (%rsi),%al
|
||||
201:
|
||||
movb %al,(%rdi)
|
||||
incq %rsi
|
||||
incq %rdi
|
||||
decl %ecx
|
||||
jnz 200b
|
||||
202:
|
||||
|
||||
.section .fixup,"ax"
|
||||
203:
|
||||
addl %ecx,%edx/* ecx is zerorest also */
|
||||
jmp .Lsse2_copy_user_handle_tail
|
||||
.previous
|
||||
|
||||
_ASM_EXTABLE_UA(200b, 203b)
|
||||
_ASM_EXTABLE_UA(201b, 203b)
|
||||
.endm
|
||||
/*****************************************************************************/
|
||||
SYM_FUNC_START(copy_user_sse2_opt_string)
|
||||
ASM_STAC
|
||||
ALIGN_DESTINATION_16
|
||||
|
||||
cmpl $64,%edx
|
||||
jb 70f /* less then 64 bytes, avoid the costly 'rep' */
|
||||
|
||||
movl %esi,%ecx /* check if src is aligned */
|
||||
andl $15,%ecx
|
||||
jnz 20f
|
||||
|
||||
10:
|
||||
prefetchnta PREFETCH_DISTANCE(%rsi)
|
||||
11:
|
||||
prefetchnta (PREFETCH_DISTANCE + 32)(%rsi)
|
||||
12:
|
||||
movdqa (%rsi),%xmm0
|
||||
13:
|
||||
movdqa 16(%rsi),%xmm1
|
||||
14:
|
||||
movdqa 32(%rsi),%xmm2
|
||||
15:
|
||||
movdqa 48(%rsi),%xmm3
|
||||
16:
|
||||
movntdq %xmm0,0(%rdi)
|
||||
17:
|
||||
movntdq %xmm1,16(%rdi)
|
||||
18:
|
||||
movntdq %xmm2,32(%rdi)
|
||||
19:
|
||||
movntdq %xmm3,48(%rdi)
|
||||
add $64,%rsi
|
||||
add $64,%rdi
|
||||
subl $64,%edx
|
||||
cmpl $64,%edx
|
||||
jg 10b
|
||||
sfence
|
||||
jmp 70f
|
||||
|
||||
20:
|
||||
prefetchnta PREFETCH_DISTANCE(%rsi)
|
||||
21:
|
||||
prefetchnta (PREFETCH_DISTANCE + 32)(%rsi)
|
||||
22:
|
||||
movdqu (%rsi),%xmm0
|
||||
23:
|
||||
movdqu 16(%rsi),%xmm1
|
||||
24:
|
||||
movdqu 32(%rsi),%xmm2
|
||||
25:
|
||||
movdqu 48(%rsi),%xmm3
|
||||
26:
|
||||
movntdq %xmm0,0(%rdi)
|
||||
27:
|
||||
movntdq %xmm1,16(%rdi)
|
||||
28:
|
||||
movntdq %xmm2,32(%rdi)
|
||||
29:
|
||||
movntdq %xmm3,48(%rdi)
|
||||
add $64,%rsi
|
||||
add $64,%rdi
|
||||
subl $64,%edx
|
||||
cmpl $64,%edx
|
||||
jg 20b
|
||||
sfence
|
||||
|
||||
70:
|
||||
movl %edx,%ecx
|
||||
80:
|
||||
rep
|
||||
movsb
|
||||
|
||||
xorl %eax,%eax
|
||||
ASM_CLAC
|
||||
RET//ret
|
||||
|
||||
.section .fixup,"ax"
|
||||
99:
|
||||
movl %ecx,%edx /* ecx is zerorest also */
|
||||
100:
|
||||
sfence
|
||||
jmp .Lsse2_copy_user_handle_tail
|
||||
.previous
|
||||
|
||||
_ASM_EXTABLE_UA(10b, 100b)
|
||||
_ASM_EXTABLE_UA(11b, 100b)
|
||||
_ASM_EXTABLE_UA(12b, 100b)
|
||||
_ASM_EXTABLE_UA(13b, 100b)
|
||||
_ASM_EXTABLE_UA(14b, 100b)
|
||||
_ASM_EXTABLE_UA(15b, 100b)
|
||||
_ASM_EXTABLE_UA(16b, 100b)
|
||||
_ASM_EXTABLE_UA(17b, 100b)
|
||||
_ASM_EXTABLE_UA(18b, 100b)
|
||||
_ASM_EXTABLE_UA(19b, 100b)
|
||||
|
||||
_ASM_EXTABLE_UA(20b, 100b)
|
||||
_ASM_EXTABLE_UA(21b, 100b)
|
||||
_ASM_EXTABLE_UA(22b, 100b)
|
||||
_ASM_EXTABLE_UA(23b, 100b)
|
||||
_ASM_EXTABLE_UA(24b, 100b)
|
||||
_ASM_EXTABLE_UA(25b, 100b)
|
||||
_ASM_EXTABLE_UA(26b, 100b)
|
||||
_ASM_EXTABLE_UA(27b, 100b)
|
||||
_ASM_EXTABLE_UA(28b, 100b)
|
||||
_ASM_EXTABLE_UA(29b, 100b)
|
||||
|
||||
_ASM_EXTABLE_UA(80b, 99b)
|
||||
SYM_FUNC_END(copy_user_sse2_opt_string)
|
||||
EXPORT_SYMBOL(copy_user_sse2_opt_string)
|
||||
|
||||
SYM_FUNC_START(fpu_restore_xmm0_3)
|
||||
ASM_STAC
|
||||
movdqu (%rsi),%xmm0
|
||||
movdqu 16(%rsi),%xmm1
|
||||
movdqu 32(%rsi),%xmm2
|
||||
movdqu 48(%rsi),%xmm3
|
||||
|
||||
xorl %eax,%eax
|
||||
ASM_CLAC
|
||||
RET//ret
|
||||
SYM_FUNC_END(fpu_restore_xmm0_3)
|
||||
EXPORT_SYMBOL(fpu_restore_xmm0_3)
|
||||
|
||||
SYM_FUNC_START(fpu_save_xmm0_3)
|
||||
ASM_STAC
|
||||
|
||||
movdqu %xmm0,(%rdi)
|
||||
movdqu %xmm1,16(%rdi)
|
||||
movdqu %xmm2,32(%rdi)
|
||||
movdqu %xmm3,48(%rdi)
|
||||
|
||||
xorl %eax,%eax
|
||||
ASM_CLAC
|
||||
RET//ret
|
||||
SYM_FUNC_END(fpu_save_xmm0_3)
|
||||
EXPORT_SYMBOL(fpu_save_xmm0_3)
|
||||
|
||||
/*
|
||||
* Try to copy last bytes and clear the rest if needed.
|
||||
* Since protection fault in copy_from/to_user is not a normal situation,
|
||||
* it is not necessary to optimize tail handling.
|
||||
* Don't try to copy the tail if machine check happened
|
||||
*
|
||||
* Input:
|
||||
* rdi destination
|
||||
* rsi source
|
||||
* rdx count
|
||||
*
|
||||
* Output:
|
||||
* eax uncopied bytes or 0 if successful.
|
||||
*/
|
||||
SYM_CODE_START_LOCAL(.Lsse2_copy_user_handle_tail)
|
||||
movl %edx,%ecx
|
||||
1: rep movsb
|
||||
2: mov %ecx,%eax
|
||||
ASM_CLAC
|
||||
RET
|
||||
|
||||
_ASM_EXTABLE_UA(1b, 2b)
|
||||
SYM_CODE_END(.Lsse2_copy_user_handle_tail)
|
||||
|
||||
/*****************************************************************************/
|
Loading…
Reference in New Issue