x86/asm/64: Use 32-bit XOR to zero registers
Some Intel CPUs don't recognize 64-bit XORs as zeroing idioms. Zeroing idioms don't require execution bandwidth, as they're being taken care of in the frontend (through register renaming). Use 32-bit XORs instead. Signed-off-by: Jan Beulich <jbeulich@suse.com> Cc: Alok Kataria <akataria@vmware.com> Cc: Andy Lutomirski <luto@kernel.org> Cc: Borislav Petkov <bp@alien8.de> Cc: Brian Gerst <brgerst@gmail.com> Cc: Denys Vlasenko <dvlasenk@redhat.com> Cc: H. Peter Anvin <hpa@zytor.com> Cc: Josh Poimboeuf <jpoimboe@redhat.com> Cc: Juergen Gross <jgross@suse.com> Cc: Linus Torvalds <torvalds@linux-foundation.org> Cc: Peter Zijlstra <peterz@infradead.org> Cc: Thomas Gleixner <tglx@linutronix.de> Cc: davem@davemloft.net Cc: herbert@gondor.apana.org.au Cc: pavel@ucw.cz Cc: rjw@rjwysocki.net Link: http://lkml.kernel.org/r/5B39FF1A02000078001CFB54@prv1-mh.provo.novell.com Signed-off-by: Ingo Molnar <mingo@kernel.org>
This commit is contained in:
parent
c5fcdbf155
commit
a7bea83089
|
@ -75,7 +75,7 @@
|
|||
* %r9
|
||||
*/
|
||||
__load_partial:
|
||||
xor %r9, %r9
|
||||
xor %r9d, %r9d
|
||||
pxor MSG, MSG
|
||||
|
||||
mov LEN, %r8
|
||||
|
|
|
@ -66,7 +66,7 @@
|
|||
* %r9
|
||||
*/
|
||||
__load_partial:
|
||||
xor %r9, %r9
|
||||
xor %r9d, %r9d
|
||||
pxor MSG0, MSG0
|
||||
pxor MSG1, MSG1
|
||||
|
||||
|
|
|
@ -59,7 +59,7 @@
|
|||
* %r9
|
||||
*/
|
||||
__load_partial:
|
||||
xor %r9, %r9
|
||||
xor %r9d, %r9d
|
||||
pxor MSG, MSG
|
||||
|
||||
mov LEN, %r8
|
||||
|
|
|
@ -258,7 +258,7 @@ ALL_F: .octa 0xffffffffffffffffffffffffffffffff
|
|||
.macro GCM_INIT Iv SUBKEY AAD AADLEN
|
||||
mov \AADLEN, %r11
|
||||
mov %r11, AadLen(%arg2) # ctx_data.aad_length = aad_length
|
||||
xor %r11, %r11
|
||||
xor %r11d, %r11d
|
||||
mov %r11, InLen(%arg2) # ctx_data.in_length = 0
|
||||
mov %r11, PBlockLen(%arg2) # ctx_data.partial_block_length = 0
|
||||
mov %r11, PBlockEncKey(%arg2) # ctx_data.partial_block_enc_key = 0
|
||||
|
@ -286,7 +286,7 @@ ALL_F: .octa 0xffffffffffffffffffffffffffffffff
|
|||
movdqu HashKey(%arg2), %xmm13
|
||||
add %arg5, InLen(%arg2)
|
||||
|
||||
xor %r11, %r11 # initialise the data pointer offset as zero
|
||||
xor %r11d, %r11d # initialise the data pointer offset as zero
|
||||
PARTIAL_BLOCK %arg3 %arg4 %arg5 %r11 %xmm8 \operation
|
||||
|
||||
sub %r11, %arg5 # sub partial block data used
|
||||
|
@ -702,7 +702,7 @@ _no_extra_mask_1_\@:
|
|||
|
||||
# GHASH computation for the last <16 Byte block
|
||||
GHASH_MUL \AAD_HASH, %xmm13, %xmm0, %xmm10, %xmm11, %xmm5, %xmm6
|
||||
xor %rax,%rax
|
||||
xor %eax, %eax
|
||||
|
||||
mov %rax, PBlockLen(%arg2)
|
||||
jmp _dec_done_\@
|
||||
|
@ -737,7 +737,7 @@ _no_extra_mask_2_\@:
|
|||
|
||||
# GHASH computation for the last <16 Byte block
|
||||
GHASH_MUL \AAD_HASH, %xmm13, %xmm0, %xmm10, %xmm11, %xmm5, %xmm6
|
||||
xor %rax,%rax
|
||||
xor %eax, %eax
|
||||
|
||||
mov %rax, PBlockLen(%arg2)
|
||||
jmp _encode_done_\@
|
||||
|
|
|
@ -463,7 +463,7 @@ _get_AAD_rest_final\@:
|
|||
|
||||
_get_AAD_done\@:
|
||||
# initialize the data pointer offset as zero
|
||||
xor %r11, %r11
|
||||
xor %r11d, %r11d
|
||||
|
||||
# start AES for num_initial_blocks blocks
|
||||
mov arg5, %rax # rax = *Y0
|
||||
|
@ -1770,7 +1770,7 @@ _get_AAD_rest_final\@:
|
|||
|
||||
_get_AAD_done\@:
|
||||
# initialize the data pointer offset as zero
|
||||
xor %r11, %r11
|
||||
xor %r11d, %r11d
|
||||
|
||||
# start AES for num_initial_blocks blocks
|
||||
mov arg5, %rax # rax = *Y0
|
||||
|
|
|
@ -113,7 +113,7 @@ ENDPROC(__morus1280_update_zero)
|
|||
* %r9
|
||||
*/
|
||||
__load_partial:
|
||||
xor %r9, %r9
|
||||
xor %r9d, %r9d
|
||||
vpxor MSG, MSG, MSG
|
||||
|
||||
mov %rcx, %r8
|
||||
|
|
|
@ -235,7 +235,7 @@ ENDPROC(__morus1280_update_zero)
|
|||
* %r9
|
||||
*/
|
||||
__load_partial:
|
||||
xor %r9, %r9
|
||||
xor %r9d, %r9d
|
||||
pxor MSG_LO, MSG_LO
|
||||
pxor MSG_HI, MSG_HI
|
||||
|
||||
|
|
|
@ -113,7 +113,7 @@ ENDPROC(__morus640_update_zero)
|
|||
* %r9
|
||||
*/
|
||||
__load_partial:
|
||||
xor %r9, %r9
|
||||
xor %r9d, %r9d
|
||||
pxor MSG, MSG
|
||||
|
||||
mov %rcx, %r8
|
||||
|
|
|
@ -96,7 +96,7 @@
|
|||
# cleanup workspace
|
||||
mov $8, %ecx
|
||||
mov %rsp, %rdi
|
||||
xor %rax, %rax
|
||||
xor %eax, %eax
|
||||
rep stosq
|
||||
|
||||
mov %rbp, %rsp # deallocate workspace
|
||||
|
|
|
@ -235,7 +235,7 @@ ENTRY(secondary_startup_64)
|
|||
* address given in m16:64.
|
||||
*/
|
||||
pushq $.Lafter_lret # put return address on stack for unwinder
|
||||
xorq %rbp, %rbp # clear frame pointer
|
||||
xorl %ebp, %ebp # clear frame pointer
|
||||
movq initial_code(%rip), %rax
|
||||
pushq $__KERNEL_CS # set correct cs
|
||||
pushq %rax # target address in negative space
|
||||
|
|
|
@ -20,7 +20,7 @@ DEF_NATIVE(, mov64, "mov %rdi, %rax");
|
|||
|
||||
#if defined(CONFIG_PARAVIRT_SPINLOCKS)
|
||||
DEF_NATIVE(pv_lock_ops, queued_spin_unlock, "movb $0, (%rdi)");
|
||||
DEF_NATIVE(pv_lock_ops, vcpu_is_preempted, "xor %rax, %rax");
|
||||
DEF_NATIVE(pv_lock_ops, vcpu_is_preempted, "xor %eax, %eax");
|
||||
#endif
|
||||
|
||||
unsigned paravirt_patch_ident_32(void *insnbuf, unsigned len)
|
||||
|
|
|
@ -256,7 +256,7 @@ ENTRY(__memcpy_mcsafe)
|
|||
|
||||
/* Copy successful. Return zero */
|
||||
.L_done_memcpy_trap:
|
||||
xorq %rax, %rax
|
||||
xorl %eax, %eax
|
||||
ret
|
||||
ENDPROC(__memcpy_mcsafe)
|
||||
EXPORT_SYMBOL_GPL(__memcpy_mcsafe)
|
||||
|
|
|
@ -137,7 +137,7 @@ ENTRY(restore_registers)
|
|||
/* Saved in save_processor_state. */
|
||||
lgdt saved_context_gdt_desc(%rax)
|
||||
|
||||
xorq %rax, %rax
|
||||
xorl %eax, %eax
|
||||
|
||||
/* tell the hibernation core that we've just restored the memory */
|
||||
movq %rax, in_suspend(%rip)
|
||||
|
|
Loading…
Reference in New Issue