OpenCloudOS-Kernel/arch/x86/crypto/salsa20-i586-asm_32.S

939 lines
16 KiB
ArmAsm

# Derived from:
# salsa20_pm.s version 20051229
# D. J. Bernstein
# Public domain.
#include <linux/linkage.h>
.text
# enter salsa20_encrypt_bytes
ENTRY(salsa20_encrypt_bytes)
mov %esp,%eax
and $31,%eax
add $256,%eax
sub %eax,%esp
# eax_stack = eax
movl %eax,80(%esp)
# ebx_stack = ebx
movl %ebx,84(%esp)
# esi_stack = esi
movl %esi,88(%esp)
# edi_stack = edi
movl %edi,92(%esp)
# ebp_stack = ebp
movl %ebp,96(%esp)
# x = arg1
movl 4(%esp,%eax),%edx
# m = arg2
movl 8(%esp,%eax),%esi
# out = arg3
movl 12(%esp,%eax),%edi
# bytes = arg4
movl 16(%esp,%eax),%ebx
# bytes -= 0
sub $0,%ebx
# goto done if unsigned<=
jbe ._done
._start:
# in0 = *(uint32 *) (x + 0)
movl 0(%edx),%eax
# in1 = *(uint32 *) (x + 4)
movl 4(%edx),%ecx
# in2 = *(uint32 *) (x + 8)
movl 8(%edx),%ebp
# j0 = in0
movl %eax,164(%esp)
# in3 = *(uint32 *) (x + 12)
movl 12(%edx),%eax
# j1 = in1
movl %ecx,168(%esp)
# in4 = *(uint32 *) (x + 16)
movl 16(%edx),%ecx
# j2 = in2
movl %ebp,172(%esp)
# in5 = *(uint32 *) (x + 20)
movl 20(%edx),%ebp
# j3 = in3
movl %eax,176(%esp)
# in6 = *(uint32 *) (x + 24)
movl 24(%edx),%eax
# j4 = in4
movl %ecx,180(%esp)
# in7 = *(uint32 *) (x + 28)
movl 28(%edx),%ecx
# j5 = in5
movl %ebp,184(%esp)
# in8 = *(uint32 *) (x + 32)
movl 32(%edx),%ebp
# j6 = in6
movl %eax,188(%esp)
# in9 = *(uint32 *) (x + 36)
movl 36(%edx),%eax
# j7 = in7
movl %ecx,192(%esp)
# in10 = *(uint32 *) (x + 40)
movl 40(%edx),%ecx
# j8 = in8
movl %ebp,196(%esp)
# in11 = *(uint32 *) (x + 44)
movl 44(%edx),%ebp
# j9 = in9
movl %eax,200(%esp)
# in12 = *(uint32 *) (x + 48)
movl 48(%edx),%eax
# j10 = in10
movl %ecx,204(%esp)
# in13 = *(uint32 *) (x + 52)
movl 52(%edx),%ecx
# j11 = in11
movl %ebp,208(%esp)
# in14 = *(uint32 *) (x + 56)
movl 56(%edx),%ebp
# j12 = in12
movl %eax,212(%esp)
# in15 = *(uint32 *) (x + 60)
movl 60(%edx),%eax
# j13 = in13
movl %ecx,216(%esp)
# j14 = in14
movl %ebp,220(%esp)
# j15 = in15
movl %eax,224(%esp)
# x_backup = x
movl %edx,64(%esp)
._bytesatleast1:
# bytes - 64
cmp $64,%ebx
# goto nocopy if unsigned>=
jae ._nocopy
# ctarget = out
movl %edi,228(%esp)
# out = &tmp
leal 0(%esp),%edi
# i = bytes
mov %ebx,%ecx
# while (i) { *out++ = *m++; --i }
rep movsb
# out = &tmp
leal 0(%esp),%edi
# m = &tmp
leal 0(%esp),%esi
._nocopy:
# out_backup = out
movl %edi,72(%esp)
# m_backup = m
movl %esi,68(%esp)
# bytes_backup = bytes
movl %ebx,76(%esp)
# in0 = j0
movl 164(%esp),%eax
# in1 = j1
movl 168(%esp),%ecx
# in2 = j2
movl 172(%esp),%edx
# in3 = j3
movl 176(%esp),%ebx
# x0 = in0
movl %eax,100(%esp)
# x1 = in1
movl %ecx,104(%esp)
# x2 = in2
movl %edx,108(%esp)
# x3 = in3
movl %ebx,112(%esp)
# in4 = j4
movl 180(%esp),%eax
# in5 = j5
movl 184(%esp),%ecx
# in6 = j6
movl 188(%esp),%edx
# in7 = j7
movl 192(%esp),%ebx
# x4 = in4
movl %eax,116(%esp)
# x5 = in5
movl %ecx,120(%esp)
# x6 = in6
movl %edx,124(%esp)
# x7 = in7
movl %ebx,128(%esp)
# in8 = j8
movl 196(%esp),%eax
# in9 = j9
movl 200(%esp),%ecx
# in10 = j10
movl 204(%esp),%edx
# in11 = j11
movl 208(%esp),%ebx
# x8 = in8
movl %eax,132(%esp)
# x9 = in9
movl %ecx,136(%esp)
# x10 = in10
movl %edx,140(%esp)
# x11 = in11
movl %ebx,144(%esp)
# in12 = j12
movl 212(%esp),%eax
# in13 = j13
movl 216(%esp),%ecx
# in14 = j14
movl 220(%esp),%edx
# in15 = j15
movl 224(%esp),%ebx
# x12 = in12
movl %eax,148(%esp)
# x13 = in13
movl %ecx,152(%esp)
# x14 = in14
movl %edx,156(%esp)
# x15 = in15
movl %ebx,160(%esp)
# i = 20
mov $20,%ebp
# p = x0
movl 100(%esp),%eax
# s = x5
movl 120(%esp),%ecx
# t = x10
movl 140(%esp),%edx
# w = x15
movl 160(%esp),%ebx
._mainloop:
# x0 = p
movl %eax,100(%esp)
# x10 = t
movl %edx,140(%esp)
# p += x12
addl 148(%esp),%eax
# x5 = s
movl %ecx,120(%esp)
# t += x6
addl 124(%esp),%edx
# x15 = w
movl %ebx,160(%esp)
# r = x1
movl 104(%esp),%esi
# r += s
add %ecx,%esi
# v = x11
movl 144(%esp),%edi
# v += w
add %ebx,%edi
# p <<<= 7
rol $7,%eax
# p ^= x4
xorl 116(%esp),%eax
# t <<<= 7
rol $7,%edx
# t ^= x14
xorl 156(%esp),%edx
# r <<<= 7
rol $7,%esi
# r ^= x9
xorl 136(%esp),%esi
# v <<<= 7
rol $7,%edi
# v ^= x3
xorl 112(%esp),%edi
# x4 = p
movl %eax,116(%esp)
# x14 = t
movl %edx,156(%esp)
# p += x0
addl 100(%esp),%eax
# x9 = r
movl %esi,136(%esp)
# t += x10
addl 140(%esp),%edx
# x3 = v
movl %edi,112(%esp)
# p <<<= 9
rol $9,%eax
# p ^= x8
xorl 132(%esp),%eax
# t <<<= 9
rol $9,%edx
# t ^= x2
xorl 108(%esp),%edx
# s += r
add %esi,%ecx
# s <<<= 9
rol $9,%ecx
# s ^= x13
xorl 152(%esp),%ecx
# w += v
add %edi,%ebx
# w <<<= 9
rol $9,%ebx
# w ^= x7
xorl 128(%esp),%ebx
# x8 = p
movl %eax,132(%esp)
# x2 = t
movl %edx,108(%esp)
# p += x4
addl 116(%esp),%eax
# x13 = s
movl %ecx,152(%esp)
# t += x14
addl 156(%esp),%edx
# x7 = w
movl %ebx,128(%esp)
# p <<<= 13
rol $13,%eax
# p ^= x12
xorl 148(%esp),%eax
# t <<<= 13
rol $13,%edx
# t ^= x6
xorl 124(%esp),%edx
# r += s
add %ecx,%esi
# r <<<= 13
rol $13,%esi
# r ^= x1
xorl 104(%esp),%esi
# v += w
add %ebx,%edi
# v <<<= 13
rol $13,%edi
# v ^= x11
xorl 144(%esp),%edi
# x12 = p
movl %eax,148(%esp)
# x6 = t
movl %edx,124(%esp)
# p += x8
addl 132(%esp),%eax
# x1 = r
movl %esi,104(%esp)
# t += x2
addl 108(%esp),%edx
# x11 = v
movl %edi,144(%esp)
# p <<<= 18
rol $18,%eax
# p ^= x0
xorl 100(%esp),%eax
# t <<<= 18
rol $18,%edx
# t ^= x10
xorl 140(%esp),%edx
# s += r
add %esi,%ecx
# s <<<= 18
rol $18,%ecx
# s ^= x5
xorl 120(%esp),%ecx
# w += v
add %edi,%ebx
# w <<<= 18
rol $18,%ebx
# w ^= x15
xorl 160(%esp),%ebx
# x0 = p
movl %eax,100(%esp)
# x10 = t
movl %edx,140(%esp)
# p += x3
addl 112(%esp),%eax
# p <<<= 7
rol $7,%eax
# x5 = s
movl %ecx,120(%esp)
# t += x9
addl 136(%esp),%edx
# x15 = w
movl %ebx,160(%esp)
# r = x4
movl 116(%esp),%esi
# r += s
add %ecx,%esi
# v = x14
movl 156(%esp),%edi
# v += w
add %ebx,%edi
# p ^= x1
xorl 104(%esp),%eax
# t <<<= 7
rol $7,%edx
# t ^= x11
xorl 144(%esp),%edx
# r <<<= 7
rol $7,%esi
# r ^= x6
xorl 124(%esp),%esi
# v <<<= 7
rol $7,%edi
# v ^= x12
xorl 148(%esp),%edi
# x1 = p
movl %eax,104(%esp)
# x11 = t
movl %edx,144(%esp)
# p += x0
addl 100(%esp),%eax
# x6 = r
movl %esi,124(%esp)
# t += x10
addl 140(%esp),%edx
# x12 = v
movl %edi,148(%esp)
# p <<<= 9
rol $9,%eax
# p ^= x2
xorl 108(%esp),%eax
# t <<<= 9
rol $9,%edx
# t ^= x8
xorl 132(%esp),%edx
# s += r
add %esi,%ecx
# s <<<= 9
rol $9,%ecx
# s ^= x7
xorl 128(%esp),%ecx
# w += v
add %edi,%ebx
# w <<<= 9
rol $9,%ebx
# w ^= x13
xorl 152(%esp),%ebx
# x2 = p
movl %eax,108(%esp)
# x8 = t
movl %edx,132(%esp)
# p += x1
addl 104(%esp),%eax
# x7 = s
movl %ecx,128(%esp)
# t += x11
addl 144(%esp),%edx
# x13 = w
movl %ebx,152(%esp)
# p <<<= 13
rol $13,%eax
# p ^= x3
xorl 112(%esp),%eax
# t <<<= 13
rol $13,%edx
# t ^= x9
xorl 136(%esp),%edx
# r += s
add %ecx,%esi
# r <<<= 13
rol $13,%esi
# r ^= x4
xorl 116(%esp),%esi
# v += w
add %ebx,%edi
# v <<<= 13
rol $13,%edi
# v ^= x14
xorl 156(%esp),%edi
# x3 = p
movl %eax,112(%esp)
# x9 = t
movl %edx,136(%esp)
# p += x2
addl 108(%esp),%eax
# x4 = r
movl %esi,116(%esp)
# t += x8
addl 132(%esp),%edx
# x14 = v
movl %edi,156(%esp)
# p <<<= 18
rol $18,%eax
# p ^= x0
xorl 100(%esp),%eax
# t <<<= 18
rol $18,%edx
# t ^= x10
xorl 140(%esp),%edx
# s += r
add %esi,%ecx
# s <<<= 18
rol $18,%ecx
# s ^= x5
xorl 120(%esp),%ecx
# w += v
add %edi,%ebx
# w <<<= 18
rol $18,%ebx
# w ^= x15
xorl 160(%esp),%ebx
# x0 = p
movl %eax,100(%esp)
# x10 = t
movl %edx,140(%esp)
# p += x12
addl 148(%esp),%eax
# x5 = s
movl %ecx,120(%esp)
# t += x6
addl 124(%esp),%edx
# x15 = w
movl %ebx,160(%esp)
# r = x1
movl 104(%esp),%esi
# r += s
add %ecx,%esi
# v = x11
movl 144(%esp),%edi
# v += w
add %ebx,%edi
# p <<<= 7
rol $7,%eax
# p ^= x4
xorl 116(%esp),%eax
# t <<<= 7
rol $7,%edx
# t ^= x14
xorl 156(%esp),%edx
# r <<<= 7
rol $7,%esi
# r ^= x9
xorl 136(%esp),%esi
# v <<<= 7
rol $7,%edi
# v ^= x3
xorl 112(%esp),%edi
# x4 = p
movl %eax,116(%esp)
# x14 = t
movl %edx,156(%esp)
# p += x0
addl 100(%esp),%eax
# x9 = r
movl %esi,136(%esp)
# t += x10
addl 140(%esp),%edx
# x3 = v
movl %edi,112(%esp)
# p <<<= 9
rol $9,%eax
# p ^= x8
xorl 132(%esp),%eax
# t <<<= 9
rol $9,%edx
# t ^= x2
xorl 108(%esp),%edx
# s += r
add %esi,%ecx
# s <<<= 9
rol $9,%ecx
# s ^= x13
xorl 152(%esp),%ecx
# w += v
add %edi,%ebx
# w <<<= 9
rol $9,%ebx
# w ^= x7
xorl 128(%esp),%ebx
# x8 = p
movl %eax,132(%esp)
# x2 = t
movl %edx,108(%esp)
# p += x4
addl 116(%esp),%eax
# x13 = s
movl %ecx,152(%esp)
# t += x14
addl 156(%esp),%edx
# x7 = w
movl %ebx,128(%esp)
# p <<<= 13
rol $13,%eax
# p ^= x12
xorl 148(%esp),%eax
# t <<<= 13
rol $13,%edx
# t ^= x6
xorl 124(%esp),%edx
# r += s
add %ecx,%esi
# r <<<= 13
rol $13,%esi
# r ^= x1
xorl 104(%esp),%esi
# v += w
add %ebx,%edi
# v <<<= 13
rol $13,%edi
# v ^= x11
xorl 144(%esp),%edi
# x12 = p
movl %eax,148(%esp)
# x6 = t
movl %edx,124(%esp)
# p += x8
addl 132(%esp),%eax
# x1 = r
movl %esi,104(%esp)
# t += x2
addl 108(%esp),%edx
# x11 = v
movl %edi,144(%esp)
# p <<<= 18
rol $18,%eax
# p ^= x0
xorl 100(%esp),%eax
# t <<<= 18
rol $18,%edx
# t ^= x10
xorl 140(%esp),%edx
# s += r
add %esi,%ecx
# s <<<= 18
rol $18,%ecx
# s ^= x5
xorl 120(%esp),%ecx
# w += v
add %edi,%ebx
# w <<<= 18
rol $18,%ebx
# w ^= x15
xorl 160(%esp),%ebx
# x0 = p
movl %eax,100(%esp)
# x10 = t
movl %edx,140(%esp)
# p += x3
addl 112(%esp),%eax
# p <<<= 7
rol $7,%eax
# x5 = s
movl %ecx,120(%esp)
# t += x9
addl 136(%esp),%edx
# x15 = w
movl %ebx,160(%esp)
# r = x4
movl 116(%esp),%esi
# r += s
add %ecx,%esi
# v = x14
movl 156(%esp),%edi
# v += w
add %ebx,%edi
# p ^= x1
xorl 104(%esp),%eax
# t <<<= 7
rol $7,%edx
# t ^= x11
xorl 144(%esp),%edx
# r <<<= 7
rol $7,%esi
# r ^= x6
xorl 124(%esp),%esi
# v <<<= 7
rol $7,%edi
# v ^= x12
xorl 148(%esp),%edi
# x1 = p
movl %eax,104(%esp)
# x11 = t
movl %edx,144(%esp)
# p += x0
addl 100(%esp),%eax
# x6 = r
movl %esi,124(%esp)
# t += x10
addl 140(%esp),%edx
# x12 = v
movl %edi,148(%esp)
# p <<<= 9
rol $9,%eax
# p ^= x2
xorl 108(%esp),%eax
# t <<<= 9
rol $9,%edx
# t ^= x8
xorl 132(%esp),%edx
# s += r
add %esi,%ecx
# s <<<= 9
rol $9,%ecx
# s ^= x7
xorl 128(%esp),%ecx
# w += v
add %edi,%ebx
# w <<<= 9
rol $9,%ebx
# w ^= x13
xorl 152(%esp),%ebx
# x2 = p
movl %eax,108(%esp)
# x8 = t
movl %edx,132(%esp)
# p += x1
addl 104(%esp),%eax
# x7 = s
movl %ecx,128(%esp)
# t += x11
addl 144(%esp),%edx
# x13 = w
movl %ebx,152(%esp)
# p <<<= 13
rol $13,%eax
# p ^= x3
xorl 112(%esp),%eax
# t <<<= 13
rol $13,%edx
# t ^= x9
xorl 136(%esp),%edx
# r += s
add %ecx,%esi
# r <<<= 13
rol $13,%esi
# r ^= x4
xorl 116(%esp),%esi
# v += w
add %ebx,%edi
# v <<<= 13
rol $13,%edi
# v ^= x14
xorl 156(%esp),%edi
# x3 = p
movl %eax,112(%esp)
# x9 = t
movl %edx,136(%esp)
# p += x2
addl 108(%esp),%eax
# x4 = r
movl %esi,116(%esp)
# t += x8
addl 132(%esp),%edx
# x14 = v
movl %edi,156(%esp)
# p <<<= 18
rol $18,%eax
# p ^= x0
xorl 100(%esp),%eax
# t <<<= 18
rol $18,%edx
# t ^= x10
xorl 140(%esp),%edx
# s += r
add %esi,%ecx
# s <<<= 18
rol $18,%ecx
# s ^= x5
xorl 120(%esp),%ecx
# w += v
add %edi,%ebx
# w <<<= 18
rol $18,%ebx
# w ^= x15
xorl 160(%esp),%ebx
# i -= 4
sub $4,%ebp
# goto mainloop if unsigned >
ja ._mainloop
# x0 = p
movl %eax,100(%esp)
# x5 = s
movl %ecx,120(%esp)
# x10 = t
movl %edx,140(%esp)
# x15 = w
movl %ebx,160(%esp)
# out = out_backup
movl 72(%esp),%edi
# m = m_backup
movl 68(%esp),%esi
# in0 = x0
movl 100(%esp),%eax
# in1 = x1
movl 104(%esp),%ecx
# in0 += j0
addl 164(%esp),%eax
# in1 += j1
addl 168(%esp),%ecx
# in0 ^= *(uint32 *) (m + 0)
xorl 0(%esi),%eax
# in1 ^= *(uint32 *) (m + 4)
xorl 4(%esi),%ecx
# *(uint32 *) (out + 0) = in0
movl %eax,0(%edi)
# *(uint32 *) (out + 4) = in1
movl %ecx,4(%edi)
# in2 = x2
movl 108(%esp),%eax
# in3 = x3
movl 112(%esp),%ecx
# in2 += j2
addl 172(%esp),%eax
# in3 += j3
addl 176(%esp),%ecx
# in2 ^= *(uint32 *) (m + 8)
xorl 8(%esi),%eax
# in3 ^= *(uint32 *) (m + 12)
xorl 12(%esi),%ecx
# *(uint32 *) (out + 8) = in2
movl %eax,8(%edi)
# *(uint32 *) (out + 12) = in3
movl %ecx,12(%edi)
# in4 = x4
movl 116(%esp),%eax
# in5 = x5
movl 120(%esp),%ecx
# in4 += j4
addl 180(%esp),%eax
# in5 += j5
addl 184(%esp),%ecx
# in4 ^= *(uint32 *) (m + 16)
xorl 16(%esi),%eax
# in5 ^= *(uint32 *) (m + 20)
xorl 20(%esi),%ecx
# *(uint32 *) (out + 16) = in4
movl %eax,16(%edi)
# *(uint32 *) (out + 20) = in5
movl %ecx,20(%edi)
# in6 = x6
movl 124(%esp),%eax
# in7 = x7
movl 128(%esp),%ecx
# in6 += j6
addl 188(%esp),%eax
# in7 += j7
addl 192(%esp),%ecx
# in6 ^= *(uint32 *) (m + 24)
xorl 24(%esi),%eax
# in7 ^= *(uint32 *) (m + 28)
xorl 28(%esi),%ecx
# *(uint32 *) (out + 24) = in6
movl %eax,24(%edi)
# *(uint32 *) (out + 28) = in7
movl %ecx,28(%edi)
# in8 = x8
movl 132(%esp),%eax
# in9 = x9
movl 136(%esp),%ecx
# in8 += j8
addl 196(%esp),%eax
# in9 += j9
addl 200(%esp),%ecx
# in8 ^= *(uint32 *) (m + 32)
xorl 32(%esi),%eax
# in9 ^= *(uint32 *) (m + 36)
xorl 36(%esi),%ecx
# *(uint32 *) (out + 32) = in8
movl %eax,32(%edi)
# *(uint32 *) (out + 36) = in9
movl %ecx,36(%edi)
# in10 = x10
movl 140(%esp),%eax
# in11 = x11
movl 144(%esp),%ecx
# in10 += j10
addl 204(%esp),%eax
# in11 += j11
addl 208(%esp),%ecx
# in10 ^= *(uint32 *) (m + 40)
xorl 40(%esi),%eax
# in11 ^= *(uint32 *) (m + 44)
xorl 44(%esi),%ecx
# *(uint32 *) (out + 40) = in10
movl %eax,40(%edi)
# *(uint32 *) (out + 44) = in11
movl %ecx,44(%edi)
# in12 = x12
movl 148(%esp),%eax
# in13 = x13
movl 152(%esp),%ecx
# in12 += j12
addl 212(%esp),%eax
# in13 += j13
addl 216(%esp),%ecx
# in12 ^= *(uint32 *) (m + 48)
xorl 48(%esi),%eax
# in13 ^= *(uint32 *) (m + 52)
xorl 52(%esi),%ecx
# *(uint32 *) (out + 48) = in12
movl %eax,48(%edi)
# *(uint32 *) (out + 52) = in13
movl %ecx,52(%edi)
# in14 = x14
movl 156(%esp),%eax
# in15 = x15
movl 160(%esp),%ecx
# in14 += j14
addl 220(%esp),%eax
# in15 += j15
addl 224(%esp),%ecx
# in14 ^= *(uint32 *) (m + 56)
xorl 56(%esi),%eax
# in15 ^= *(uint32 *) (m + 60)
xorl 60(%esi),%ecx
# *(uint32 *) (out + 56) = in14
movl %eax,56(%edi)
# *(uint32 *) (out + 60) = in15
movl %ecx,60(%edi)
# bytes = bytes_backup
movl 76(%esp),%ebx
# in8 = j8
movl 196(%esp),%eax
# in9 = j9
movl 200(%esp),%ecx
# in8 += 1
add $1,%eax
# in9 += 0 + carry
adc $0,%ecx
# j8 = in8
movl %eax,196(%esp)
# j9 = in9
movl %ecx,200(%esp)
# bytes - 64
cmp $64,%ebx
# goto bytesatleast65 if unsigned>
ja ._bytesatleast65
# goto bytesatleast64 if unsigned>=
jae ._bytesatleast64
# m = out
mov %edi,%esi
# out = ctarget
movl 228(%esp),%edi
# i = bytes
mov %ebx,%ecx
# while (i) { *out++ = *m++; --i }
rep movsb
._bytesatleast64:
# x = x_backup
movl 64(%esp),%eax
# in8 = j8
movl 196(%esp),%ecx
# in9 = j9
movl 200(%esp),%edx
# *(uint32 *) (x + 32) = in8
movl %ecx,32(%eax)
# *(uint32 *) (x + 36) = in9
movl %edx,36(%eax)
._done:
# eax = eax_stack
movl 80(%esp),%eax
# ebx = ebx_stack
movl 84(%esp),%ebx
# esi = esi_stack
movl 88(%esp),%esi
# edi = edi_stack
movl 92(%esp),%edi
# ebp = ebp_stack
movl 96(%esp),%ebp
# leave
add %eax,%esp
ret
._bytesatleast65:
# bytes -= 64
sub $64,%ebx
# out += 64
add $64,%edi
# m += 64
add $64,%esi
# goto bytesatleast1
jmp ._bytesatleast1
ENDPROC(salsa20_encrypt_bytes)