Update BoringSSL
This commit is contained in:
parent
d728a4dc8d
commit
d2a42aa235
|
@ -262,6 +262,8 @@ int BIO_should_io_special(const BIO *bio) {
|
|||
|
||||
int BIO_get_retry_reason(const BIO *bio) { return bio->retry_reason; }
|
||||
|
||||
void BIO_set_retry_reason(BIO *bio, int reason) { bio->retry_reason = reason; }
|
||||
|
||||
void BIO_clear_flags(BIO *bio, int flags) {
|
||||
bio->flags &= ~flags;
|
||||
}
|
||||
|
|
|
@ -126,13 +126,7 @@ BIO *BIO_new_fp(FILE *stream, int close_flag) {
|
|||
return ret;
|
||||
}
|
||||
|
||||
static int file_new(BIO *bio) { return 1; }
|
||||
|
||||
static int file_free(BIO *bio) {
|
||||
if (bio == NULL) {
|
||||
return 0;
|
||||
}
|
||||
|
||||
if (!bio->shutdown) {
|
||||
return 1;
|
||||
}
|
||||
|
@ -279,7 +273,7 @@ static const BIO_METHOD methods_filep = {
|
|||
BIO_TYPE_FILE, "FILE pointer",
|
||||
file_write, file_read,
|
||||
NULL /* puts */, file_gets,
|
||||
file_ctrl, file_new,
|
||||
file_ctrl, NULL /* create */,
|
||||
file_free, NULL /* callback_ctrl */,
|
||||
};
|
||||
|
||||
|
@ -314,4 +308,10 @@ int BIO_rw_filename(BIO *bio, const char *filename) {
|
|||
BIO_CLOSE | BIO_FP_READ | BIO_FP_WRITE, (char *)filename);
|
||||
}
|
||||
|
||||
long BIO_tell(BIO *bio) { return BIO_ctrl(bio, BIO_C_FILE_TELL, 0, NULL); }
|
||||
|
||||
long BIO_seek(BIO *bio, long offset) {
|
||||
return BIO_ctrl(bio, BIO_C_FILE_SEEK, offset, NULL);
|
||||
}
|
||||
|
||||
#endif // OPENSSL_TRUSTY
|
||||
|
|
|
@ -29,8 +29,10 @@ static const unsigned kMaxDepth = 2048;
|
|||
// is_string_type returns one if |tag| is a string type and zero otherwise. It
|
||||
// ignores the constructed bit.
|
||||
static int is_string_type(unsigned tag) {
|
||||
// While BER supports constructed BIT STRINGS, OpenSSL misparses them. To
|
||||
// avoid acting on an ambiguous input, we do not support constructed BIT
|
||||
// STRINGS. See https://github.com/openssl/openssl/issues/12810.
|
||||
switch (tag & ~CBS_ASN1_CONSTRUCTED) {
|
||||
case CBS_ASN1_BITSTRING:
|
||||
case CBS_ASN1_OCTETSTRING:
|
||||
case CBS_ASN1_UTF8STRING:
|
||||
case CBS_ASN1_NUMERICSTRING:
|
||||
|
@ -53,7 +55,7 @@ static int is_string_type(unsigned tag) {
|
|||
// depending on whether an indefinite length element or constructed string was
|
||||
// found. The value of |orig_in| is not changed. It returns one on success (i.e.
|
||||
// |*ber_found| was set) and zero on error.
|
||||
static int cbs_find_ber(const CBS *orig_in, char *ber_found, unsigned depth) {
|
||||
static int cbs_find_ber(const CBS *orig_in, int *ber_found, unsigned depth) {
|
||||
CBS in;
|
||||
|
||||
if (depth > kMaxDepth) {
|
||||
|
@ -68,14 +70,11 @@ static int cbs_find_ber(const CBS *orig_in, char *ber_found, unsigned depth) {
|
|||
unsigned tag;
|
||||
size_t header_len;
|
||||
|
||||
if (!CBS_get_any_ber_asn1_element(&in, &contents, &tag, &header_len)) {
|
||||
if (!CBS_get_any_ber_asn1_element(&in, &contents, &tag, &header_len,
|
||||
ber_found)) {
|
||||
return 0;
|
||||
}
|
||||
if (CBS_len(&contents) == header_len &&
|
||||
header_len > 0 &&
|
||||
CBS_data(&contents)[header_len-1] == 0x80) {
|
||||
// Found an indefinite-length element.
|
||||
*ber_found = 1;
|
||||
if (*ber_found) {
|
||||
return 1;
|
||||
}
|
||||
if (tag & CBS_ASN1_CONSTRUCTED) {
|
||||
|
@ -120,9 +119,11 @@ static int cbs_convert_ber(CBS *in, CBB *out, unsigned string_tag,
|
|||
CBS contents;
|
||||
unsigned tag, child_string_tag = string_tag;
|
||||
size_t header_len;
|
||||
int ber_found;
|
||||
CBB *out_contents, out_contents_storage;
|
||||
|
||||
if (!CBS_get_any_ber_asn1_element(in, &contents, &tag, &header_len)) {
|
||||
if (!CBS_get_any_ber_asn1_element(in, &contents, &tag, &header_len,
|
||||
&ber_found)) {
|
||||
return 0;
|
||||
}
|
||||
|
||||
|
@ -194,7 +195,7 @@ int CBS_asn1_ber_to_der(CBS *in, CBS *out, uint8_t **out_storage) {
|
|||
|
||||
// First, do a quick walk to find any indefinite-length elements. Most of the
|
||||
// time we hope that there aren't any and thus we can quickly return.
|
||||
char conversion_needed;
|
||||
int conversion_needed;
|
||||
if (!cbs_find_ber(in, &conversion_needed, 0)) {
|
||||
return 0;
|
||||
}
|
||||
|
|
|
@ -404,6 +404,15 @@ int CBB_add_bytes(CBB *cbb, const uint8_t *data, size_t len) {
|
|||
return 1;
|
||||
}
|
||||
|
||||
int CBB_add_zeros(CBB *cbb, size_t len) {
|
||||
uint8_t *out;
|
||||
if (!CBB_add_space(cbb, &out, len)) {
|
||||
return 0;
|
||||
}
|
||||
OPENSSL_memset(out, 0, len);
|
||||
return 1;
|
||||
}
|
||||
|
||||
int CBB_add_space(CBB *cbb, uint8_t **out_data, size_t len) {
|
||||
if (!CBB_flush(cbb) ||
|
||||
!cbb_buffer_add(cbb->base, out_data, len)) {
|
||||
|
|
|
@ -216,6 +216,14 @@ int CBS_get_u24_length_prefixed(CBS *cbs, CBS *out) {
|
|||
return cbs_get_length_prefixed(cbs, out, 3);
|
||||
}
|
||||
|
||||
int CBS_get_until_first(CBS *cbs, CBS *out, uint8_t c) {
|
||||
const uint8_t *split = OPENSSL_memchr(CBS_data(cbs), c, CBS_len(cbs));
|
||||
if (split == NULL) {
|
||||
return 0;
|
||||
}
|
||||
return CBS_get_bytes(cbs, out, split - CBS_data(cbs));
|
||||
}
|
||||
|
||||
// parse_base128_integer reads a big-endian base-128 integer from |cbs| and sets
|
||||
// |*out| to the result. This is the encoding used in DER for both high tag
|
||||
// number form and OID components.
|
||||
|
@ -254,8 +262,7 @@ static int parse_asn1_tag(CBS *cbs, unsigned *out) {
|
|||
//
|
||||
// If the number portion is 31 (0x1f, the largest value that fits in the
|
||||
// allotted bits), then the tag is more than one byte long and the
|
||||
// continuation bytes contain the tag number. This parser only supports tag
|
||||
// numbers less than 31 (and thus single-byte tags).
|
||||
// continuation bytes contain the tag number.
|
||||
unsigned tag = ((unsigned)tag_byte & 0xe0) << CBS_ASN1_TAG_SHIFT;
|
||||
unsigned tag_number = tag_byte & 0x1f;
|
||||
if (tag_number == 0x1f) {
|
||||
|
@ -263,7 +270,7 @@ static int parse_asn1_tag(CBS *cbs, unsigned *out) {
|
|||
if (!parse_base128_integer(cbs, &v) ||
|
||||
// Check the tag number is within our supported bounds.
|
||||
v > CBS_ASN1_TAG_NUMBER_MASK ||
|
||||
// Small tag numbers should have used low tag number form.
|
||||
// Small tag numbers should have used low tag number form, even in BER.
|
||||
v < 0x1f) {
|
||||
return 0;
|
||||
}
|
||||
|
@ -277,13 +284,17 @@ static int parse_asn1_tag(CBS *cbs, unsigned *out) {
|
|||
}
|
||||
|
||||
static int cbs_get_any_asn1_element(CBS *cbs, CBS *out, unsigned *out_tag,
|
||||
size_t *out_header_len, int ber_ok) {
|
||||
size_t *out_header_len, int *out_ber_found,
|
||||
int ber_ok) {
|
||||
CBS header = *cbs;
|
||||
CBS throwaway;
|
||||
|
||||
if (out == NULL) {
|
||||
out = &throwaway;
|
||||
}
|
||||
if (ber_ok) {
|
||||
*out_ber_found = 0;
|
||||
}
|
||||
|
||||
unsigned tag;
|
||||
if (!parse_asn1_tag(&header, &tag)) {
|
||||
|
@ -321,27 +332,38 @@ static int cbs_get_any_asn1_element(CBS *cbs, CBS *out, unsigned *out_tag,
|
|||
if (out_header_len != NULL) {
|
||||
*out_header_len = header_len;
|
||||
}
|
||||
*out_ber_found = 1;
|
||||
return CBS_get_bytes(cbs, out, header_len);
|
||||
}
|
||||
|
||||
// ITU-T X.690 clause 8.1.3.5.c specifies that the value 0xff shall not be
|
||||
// used as the first byte of the length. If this parser encounters that
|
||||
// value, num_bytes will be parsed as 127, which will fail the check below.
|
||||
// value, num_bytes will be parsed as 127, which will fail this check.
|
||||
if (num_bytes == 0 || num_bytes > 4) {
|
||||
return 0;
|
||||
}
|
||||
if (!cbs_get_u(&header, &len64, num_bytes)) {
|
||||
return 0;
|
||||
}
|
||||
// ITU-T X.690 section 10.1 (DER length forms) requires encoding the length
|
||||
// with the minimum number of octets.
|
||||
// ITU-T X.690 section 10.1 (DER length forms) requires encoding the
|
||||
// length with the minimum number of octets. BER could, technically, have
|
||||
// 125 superfluous zero bytes. We do not attempt to handle that and still
|
||||
// require that the length fit in a |uint32_t| for BER.
|
||||
if (len64 < 128) {
|
||||
// Length should have used short-form encoding.
|
||||
return 0;
|
||||
if (ber_ok) {
|
||||
*out_ber_found = 1;
|
||||
} else {
|
||||
return 0;
|
||||
}
|
||||
}
|
||||
if ((len64 >> ((num_bytes-1)*8)) == 0) {
|
||||
if ((len64 >> ((num_bytes - 1) * 8)) == 0) {
|
||||
// Length should have been at least one byte shorter.
|
||||
return 0;
|
||||
if (ber_ok) {
|
||||
*out_ber_found = 1;
|
||||
} else {
|
||||
return 0;
|
||||
}
|
||||
}
|
||||
len = len64;
|
||||
if (len + header_len + num_bytes < len) {
|
||||
|
@ -374,13 +396,15 @@ int CBS_get_any_asn1(CBS *cbs, CBS *out, unsigned *out_tag) {
|
|||
int CBS_get_any_asn1_element(CBS *cbs, CBS *out, unsigned *out_tag,
|
||||
size_t *out_header_len) {
|
||||
return cbs_get_any_asn1_element(cbs, out, out_tag, out_header_len,
|
||||
0 /* DER only */);
|
||||
NULL, 0 /* DER only */);
|
||||
}
|
||||
|
||||
int CBS_get_any_ber_asn1_element(CBS *cbs, CBS *out, unsigned *out_tag,
|
||||
size_t *out_header_len) {
|
||||
return cbs_get_any_asn1_element(cbs, out, out_tag, out_header_len,
|
||||
1 /* BER allowed */);
|
||||
size_t *out_header_len, int *out_ber_found) {
|
||||
int ber_found_temp;
|
||||
return cbs_get_any_asn1_element(
|
||||
cbs, out, out_tag, out_header_len,
|
||||
out_ber_found ? out_ber_found : &ber_found_temp, 1 /* BER allowed */);
|
||||
}
|
||||
|
||||
static int cbs_get_asn1(CBS *cbs, CBS *out, unsigned tag_value,
|
||||
|
@ -426,29 +450,14 @@ int CBS_peek_asn1_tag(const CBS *cbs, unsigned tag_value) {
|
|||
|
||||
int CBS_get_asn1_uint64(CBS *cbs, uint64_t *out) {
|
||||
CBS bytes;
|
||||
if (!CBS_get_asn1(cbs, &bytes, CBS_ASN1_INTEGER)) {
|
||||
if (!CBS_get_asn1(cbs, &bytes, CBS_ASN1_INTEGER) ||
|
||||
!CBS_is_unsigned_asn1_integer(&bytes)) {
|
||||
return 0;
|
||||
}
|
||||
|
||||
*out = 0;
|
||||
const uint8_t *data = CBS_data(&bytes);
|
||||
size_t len = CBS_len(&bytes);
|
||||
|
||||
if (len == 0) {
|
||||
// An INTEGER is encoded with at least one octet.
|
||||
return 0;
|
||||
}
|
||||
|
||||
if ((data[0] & 0x80) != 0) {
|
||||
// Negative number.
|
||||
return 0;
|
||||
}
|
||||
|
||||
if (data[0] == 0 && len > 1 && (data[1] & 0x80) == 0) {
|
||||
// Extra leading zeros.
|
||||
return 0;
|
||||
}
|
||||
|
||||
for (size_t i = 0; i < len; i++) {
|
||||
if ((*out >> 56) != 0) {
|
||||
// Too large to represent as a uint64_t.
|
||||
|
@ -462,31 +471,21 @@ int CBS_get_asn1_uint64(CBS *cbs, uint64_t *out) {
|
|||
}
|
||||
|
||||
int CBS_get_asn1_int64(CBS *cbs, int64_t *out) {
|
||||
int is_negative;
|
||||
CBS bytes;
|
||||
if (!CBS_get_asn1(cbs, &bytes, CBS_ASN1_INTEGER)) {
|
||||
if (!CBS_get_asn1(cbs, &bytes, CBS_ASN1_INTEGER) ||
|
||||
!CBS_is_valid_asn1_integer(&bytes, &is_negative)) {
|
||||
return 0;
|
||||
}
|
||||
const uint8_t *data = CBS_data(&bytes);
|
||||
const size_t len = CBS_len(&bytes);
|
||||
|
||||
if (len == 0 || len > sizeof(int64_t)) {
|
||||
// An INTEGER is encoded with at least one octet.
|
||||
if (len > sizeof(int64_t)) {
|
||||
return 0;
|
||||
}
|
||||
if (len > 1) {
|
||||
if (data[0] == 0 && (data[1] & 0x80) == 0) {
|
||||
return 0; // Extra leading zeros.
|
||||
}
|
||||
if (data[0] == 0xff && (data[1] & 0x80) != 0) {
|
||||
return 0; // Extra leading 0xff.
|
||||
}
|
||||
}
|
||||
|
||||
union {
|
||||
int64_t i;
|
||||
uint8_t bytes[sizeof(int64_t)];
|
||||
} u;
|
||||
const int is_negative = (data[0] & 0x80);
|
||||
memset(u.bytes, is_negative ? 0xff : 0, sizeof(u.bytes)); // Sign-extend.
|
||||
for (size_t i = 0; i < len; i++) {
|
||||
u.bytes[i] = data[len - i - 1];
|
||||
|
@ -635,6 +634,30 @@ int CBS_asn1_bitstring_has_bit(const CBS *cbs, unsigned bit) {
|
|||
(CBS_data(cbs)[byte_num] & (1 << bit_num)) != 0;
|
||||
}
|
||||
|
||||
int CBS_is_valid_asn1_integer(const CBS *cbs, int *out_is_negative) {
|
||||
CBS copy = *cbs;
|
||||
uint8_t first_byte, second_byte;
|
||||
if (!CBS_get_u8(©, &first_byte)) {
|
||||
return 0; // INTEGERs may not be empty.
|
||||
}
|
||||
if (out_is_negative != NULL) {
|
||||
*out_is_negative = (first_byte & 0x80) != 0;
|
||||
}
|
||||
if (!CBS_get_u8(©, &second_byte)) {
|
||||
return 1; // One byte INTEGERs are always minimal.
|
||||
}
|
||||
if ((first_byte == 0x00 && (second_byte & 0x80) == 0) ||
|
||||
(first_byte == 0xff && (second_byte & 0x80) != 0)) {
|
||||
return 0; // The value is minimal iff the first 9 bits are not all equal.
|
||||
}
|
||||
return 1;
|
||||
}
|
||||
|
||||
int CBS_is_unsigned_asn1_integer(const CBS *cbs) {
|
||||
int is_negative;
|
||||
return CBS_is_valid_asn1_integer(cbs, &is_negative) && !is_negative;
|
||||
}
|
||||
|
||||
static int add_decimal(CBB *out, uint64_t v) {
|
||||
char buf[DECIMAL_SIZE(uint64_t) + 1];
|
||||
BIO_snprintf(buf, sizeof(buf), "%" PRIu64, v);
|
||||
|
|
|
@ -0,0 +1,73 @@
|
|||
/* Copyright (c) 2021, Google Inc.
|
||||
*
|
||||
* Permission to use, copy, modify, and/or distribute this software for any
|
||||
* purpose with or without fee is hereby granted, provided that the above
|
||||
* copyright notice and this permission notice appear in all copies.
|
||||
*
|
||||
* THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
|
||||
* WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
|
||||
* MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY
|
||||
* SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
|
||||
* WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN ACTION
|
||||
* OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF OR IN
|
||||
* CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. */
|
||||
|
||||
#include <CBigNumBoringSSL_cpu.h>
|
||||
|
||||
#if defined(OPENSSL_AARCH64) && defined(OPENSSL_APPLE) && \
|
||||
!defined(OPENSSL_STATIC_ARMCAP)
|
||||
|
||||
#include <sys/sysctl.h>
|
||||
#include <sys/types.h>
|
||||
|
||||
#include <CBigNumBoringSSL_arm_arch.h>
|
||||
|
||||
#include "internal.h"
|
||||
|
||||
|
||||
extern uint32_t OPENSSL_armcap_P;
|
||||
|
||||
static int has_hw_feature(const char *name) {
|
||||
int value;
|
||||
size_t len = sizeof(value);
|
||||
if (sysctlbyname(name, &value, &len, NULL, 0) != 0) {
|
||||
return 0;
|
||||
}
|
||||
if (len != sizeof(int)) {
|
||||
// This should not happen. All the values queried should be integer-valued.
|
||||
assert(0);
|
||||
return 0;
|
||||
}
|
||||
|
||||
// Per sys/sysctl.h:
|
||||
//
|
||||
// Selectors that return errors are not support on the system. Supported
|
||||
// features will return 1 if they are recommended or 0 if they are supported
|
||||
// but are not expected to help performance. Future versions of these
|
||||
// selectors may return larger values as necessary so it is best to test for
|
||||
// non zero.
|
||||
return value != 0;
|
||||
}
|
||||
|
||||
void OPENSSL_cpuid_setup(void) {
|
||||
// Apple ARM64 platforms have NEON and cryptography extensions available
|
||||
// statically, so we do not need to query them. In particular, there sometimes
|
||||
// are no sysctls corresponding to such features. See below.
|
||||
#if !defined(__ARM_NEON) || !defined(__ARM_FEATURE_CRYPTO)
|
||||
#error "NEON and crypto extensions should be statically available."
|
||||
#endif
|
||||
OPENSSL_armcap_P =
|
||||
ARMV7_NEON | ARMV8_AES | ARMV8_PMULL | ARMV8_SHA1 | ARMV8_SHA256;
|
||||
|
||||
// macOS has sysctls named both like "hw.optional.arm.FEAT_SHA512" and like
|
||||
// "hw.optional.armv8_2_sha512". There does not appear to be documentation on
|
||||
// which to use. The "armv8_2_sha512" style omits statically-available
|
||||
// features, while the "FEAT_SHA512" style includes them. However, the
|
||||
// "FEAT_SHA512" style was added in macOS 12, so we use the older style for
|
||||
// better compatibility and handle static features above.
|
||||
if (has_hw_feature("hw.optional.armv8_2_sha512")) {
|
||||
OPENSSL_armcap_P |= ARMV8_SHA512;
|
||||
}
|
||||
}
|
||||
|
||||
#endif // OPENSSL_AARCH64 && OPENSSL_APPLE && !OPENSSL_STATIC_ARMCAP
|
|
@ -50,6 +50,9 @@ void OPENSSL_cpuid_setup(void) {
|
|||
if (hwcap & ZX_ARM64_FEATURE_ISA_SHA2) {
|
||||
OPENSSL_armcap_P |= ARMV8_SHA256;
|
||||
}
|
||||
// As of writing, Fuchsia does not have a flag for ARMv8.2 SHA-512
|
||||
// extensions. When it does, add it here. See
|
||||
// https://bugs.fuchsia.dev/p/fuchsia/issues/detail?id=90759.
|
||||
}
|
||||
|
||||
#endif // OPENSSL_AARCH64 && !OPENSSL_STATIC_ARMCAP
|
||||
#endif // OPENSSL_AARCH64 && OPENSSL_FUCHSIA && !OPENSSL_STATIC_ARMCAP
|
|
@ -36,6 +36,7 @@ void OPENSSL_cpuid_setup(void) {
|
|||
static const unsigned long kPMULL = 1 << 4;
|
||||
static const unsigned long kSHA1 = 1 << 5;
|
||||
static const unsigned long kSHA256 = 1 << 6;
|
||||
static const unsigned long kSHA512 = 1 << 21;
|
||||
|
||||
if ((hwcap & kNEON) == 0) {
|
||||
// Matching OpenSSL, if NEON is missing, don't report other features
|
||||
|
@ -57,6 +58,9 @@ void OPENSSL_cpuid_setup(void) {
|
|||
if (hwcap & kSHA256) {
|
||||
OPENSSL_armcap_P |= ARMV8_SHA256;
|
||||
}
|
||||
if (hwcap & kSHA512) {
|
||||
OPENSSL_armcap_P |= ARMV8_SHA512;
|
||||
}
|
||||
}
|
||||
|
||||
#endif // OPENSSL_AARCH64 && !OPENSSL_STATIC_ARMCAP
|
||||
#endif // OPENSSL_AARCH64 && OPENSSL_LINUX && !OPENSSL_STATIC_ARMCAP
|
|
@ -0,0 +1,43 @@
|
|||
/* Copyright (c) 2018, Google Inc.
|
||||
* Copyright (c) 2020, Arm Ltd.
|
||||
*
|
||||
* Permission to use, copy, modify, and/or distribute this software for any
|
||||
* purpose with or without fee is hereby granted, provided that the above
|
||||
* copyright notice and this permission notice appear in all copies.
|
||||
*
|
||||
* THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
|
||||
* WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
|
||||
* MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY
|
||||
* SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
|
||||
* WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN ACTION
|
||||
* OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF OR IN
|
||||
* CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. */
|
||||
|
||||
#include <CBigNumBoringSSL_cpu.h>
|
||||
|
||||
#if defined(OPENSSL_AARCH64) && defined(OPENSSL_WINDOWS) && \
|
||||
!defined(OPENSSL_STATIC_ARMCAP)
|
||||
|
||||
#include <windows.h>
|
||||
|
||||
#include <CBigNumBoringSSL_arm_arch.h>
|
||||
|
||||
#include "internal.h"
|
||||
|
||||
extern uint32_t OPENSSL_armcap_P;
|
||||
void OPENSSL_cpuid_setup(void) {
|
||||
// We do not need to check for the presence of NEON, as Armv8-A always has it
|
||||
OPENSSL_armcap_P |= ARMV7_NEON;
|
||||
|
||||
if (IsProcessorFeaturePresent(PF_ARM_V8_CRYPTO_INSTRUCTIONS_AVAILABLE)) {
|
||||
// These are all covered by one call in Windows
|
||||
OPENSSL_armcap_P |= ARMV8_AES;
|
||||
OPENSSL_armcap_P |= ARMV8_PMULL;
|
||||
OPENSSL_armcap_P |= ARMV8_SHA1;
|
||||
OPENSSL_armcap_P |= ARMV8_SHA256;
|
||||
}
|
||||
// As of writing, Windows does not have a |PF_*| value for ARMv8.2 SHA-512
|
||||
// extensions. When it does, add it here.
|
||||
}
|
||||
|
||||
#endif // OPENSSL_AARCH64 && OPENSSL_WINDOWS && !OPENSSL_STATIC_ARMCAP
|
|
@ -22,15 +22,15 @@
|
|||
|
||||
extern uint32_t OPENSSL_armcap_P;
|
||||
|
||||
char CRYPTO_is_NEON_capable_at_runtime(void) {
|
||||
int CRYPTO_is_NEON_capable_at_runtime(void) {
|
||||
return (OPENSSL_armcap_P & ARMV7_NEON) != 0;
|
||||
}
|
||||
|
||||
int CRYPTO_is_ARMv8_AES_capable(void) {
|
||||
int CRYPTO_is_ARMv8_AES_capable_at_runtime(void) {
|
||||
return (OPENSSL_armcap_P & ARMV8_AES) != 0;
|
||||
}
|
||||
|
||||
int CRYPTO_is_ARMv8_PMULL_capable(void) {
|
||||
int CRYPTO_is_ARMv8_PMULL_capable_at_runtime(void) {
|
||||
return (OPENSSL_armcap_P & ARMV8_PMULL) != 0;
|
||||
}
|
||||
|
|
@ -23,7 +23,7 @@
|
|||
#include <CBigNumBoringSSL_arm_arch.h>
|
||||
#include <CBigNumBoringSSL_mem.h>
|
||||
|
||||
#include "cpu-arm-linux.h"
|
||||
#include "cpu_arm_linux.h"
|
||||
|
||||
#define AT_HWCAP 16
|
||||
#define AT_HWCAP2 26
|
||||
|
@ -146,11 +146,13 @@ extern uint32_t OPENSSL_armcap_P;
|
|||
static int g_has_broken_neon, g_needs_hwcap2_workaround;
|
||||
|
||||
void OPENSSL_cpuid_setup(void) {
|
||||
char *cpuinfo_data;
|
||||
size_t cpuinfo_len;
|
||||
if (!read_file(&cpuinfo_data, &cpuinfo_len, "/proc/cpuinfo")) {
|
||||
return;
|
||||
}
|
||||
// We ignore the return value of |read_file| and proceed with an empty
|
||||
// /proc/cpuinfo on error. If |getauxval| works, we will still detect
|
||||
// capabilities. There may be a false positive due to
|
||||
// |crypto_cpuinfo_has_broken_neon|, but this is now rare.
|
||||
char *cpuinfo_data = NULL;
|
||||
size_t cpuinfo_len = 0;
|
||||
read_file(&cpuinfo_data, &cpuinfo_len, "/proc/cpuinfo");
|
||||
STRING_PIECE cpuinfo;
|
||||
cpuinfo.data = cpuinfo_data;
|
||||
cpuinfo.len = cpuinfo_len;
|
||||
|
@ -173,7 +175,13 @@ void OPENSSL_cpuid_setup(void) {
|
|||
hwcap = crypto_get_arm_hwcap_from_cpuinfo(&cpuinfo);
|
||||
}
|
||||
|
||||
// Clear NEON support if known broken.
|
||||
// Clear NEON support if known broken. Note, if NEON is available statically,
|
||||
// the non-NEON code is dropped and this workaround is a no-op.
|
||||
//
|
||||
// TODO(davidben): The Android NDK now builds with NEON statically available
|
||||
// by default. Cronet still has some consumers that support NEON-less devices
|
||||
// (b/150371744). Get metrics on whether they still see this CPU and, if not,
|
||||
// remove this check entirely.
|
||||
g_has_broken_neon = crypto_cpuinfo_has_broken_neon(&cpuinfo);
|
||||
if (g_has_broken_neon) {
|
||||
hwcap &= ~HWCAP_NEON;
|
||||
|
@ -184,7 +192,10 @@ void OPENSSL_cpuid_setup(void) {
|
|||
OPENSSL_armcap_P |= ARMV7_NEON;
|
||||
|
||||
// Some ARMv8 Android devices don't expose AT_HWCAP2. Fall back to
|
||||
// /proc/cpuinfo. See https://crbug.com/596156.
|
||||
// /proc/cpuinfo. See https://crbug.com/boringssl/46. As of February 2021,
|
||||
// this is now rare (see Chrome's Net.NeedsHWCAP2Workaround metric), but AES
|
||||
// and PMULL extensions are very useful, so we still carry the workaround
|
||||
// for now.
|
||||
unsigned long hwcap2 = 0;
|
||||
if (getauxval != NULL) {
|
||||
hwcap2 = getauxval(AT_HWCAP2);
|
|
@ -16,6 +16,8 @@
|
|||
|
||||
#include <CBigNumBoringSSL_cpu.h>
|
||||
|
||||
#include "fipsmodule/rand/fork_detect.h"
|
||||
#include "fipsmodule/rand/internal.h"
|
||||
#include "internal.h"
|
||||
|
||||
|
||||
|
@ -102,6 +104,9 @@ HIDDEN uint32_t OPENSSL_armcap_P =
|
|||
#endif
|
||||
#if defined(OPENSSL_STATIC_ARMCAP_PMULL) || defined(__ARM_FEATURE_CRYPTO)
|
||||
ARMV8_PMULL |
|
||||
#endif
|
||||
#if defined(__ARM_FEATURE_SHA512)
|
||||
ARMV8_SHA512 |
|
||||
#endif
|
||||
0;
|
||||
|
||||
|
@ -174,6 +179,15 @@ int CRYPTO_has_asm(void) {
|
|||
#endif
|
||||
}
|
||||
|
||||
void CRYPTO_pre_sandbox_init(void) {
|
||||
// Read from /proc/cpuinfo if needed.
|
||||
CRYPTO_library_init();
|
||||
// Open /dev/urandom if needed.
|
||||
CRYPTO_init_sysrand();
|
||||
// Set up MADV_WIPEONFORK state if needed.
|
||||
CRYPTO_get_fork_generation();
|
||||
}
|
||||
|
||||
const char *SSLeay_version(int which) { return OpenSSL_version(which); }
|
||||
|
||||
const char *OpenSSL_version(int which) {
|
||||
|
|
|
@ -368,84 +368,6 @@ void ERR_clear_system_error(void) {
|
|||
errno = 0;
|
||||
}
|
||||
|
||||
char *ERR_error_string(uint32_t packed_error, char *ret) {
|
||||
static char buf[ERR_ERROR_STRING_BUF_LEN];
|
||||
|
||||
if (ret == NULL) {
|
||||
// TODO(fork): remove this.
|
||||
ret = buf;
|
||||
}
|
||||
|
||||
#if !defined(NDEBUG)
|
||||
// This is aimed to help catch callers who don't provide
|
||||
// |ERR_ERROR_STRING_BUF_LEN| bytes of space.
|
||||
OPENSSL_memset(ret, 0, ERR_ERROR_STRING_BUF_LEN);
|
||||
#endif
|
||||
|
||||
return ERR_error_string_n(packed_error, ret, ERR_ERROR_STRING_BUF_LEN);
|
||||
}
|
||||
|
||||
char *ERR_error_string_n(uint32_t packed_error, char *buf, size_t len) {
|
||||
char lib_buf[64], reason_buf[64];
|
||||
const char *lib_str, *reason_str;
|
||||
unsigned lib, reason;
|
||||
|
||||
if (len == 0) {
|
||||
return NULL;
|
||||
}
|
||||
|
||||
lib = ERR_GET_LIB(packed_error);
|
||||
reason = ERR_GET_REASON(packed_error);
|
||||
|
||||
lib_str = ERR_lib_error_string(packed_error);
|
||||
reason_str = ERR_reason_error_string(packed_error);
|
||||
|
||||
if (lib_str == NULL) {
|
||||
BIO_snprintf(lib_buf, sizeof(lib_buf), "lib(%u)", lib);
|
||||
lib_str = lib_buf;
|
||||
}
|
||||
|
||||
if (reason_str == NULL) {
|
||||
BIO_snprintf(reason_buf, sizeof(reason_buf), "reason(%u)", reason);
|
||||
reason_str = reason_buf;
|
||||
}
|
||||
|
||||
BIO_snprintf(buf, len, "error:%08" PRIx32 ":%s:OPENSSL_internal:%s",
|
||||
packed_error, lib_str, reason_str);
|
||||
|
||||
if (strlen(buf) == len - 1) {
|
||||
// output may be truncated; make sure we always have 5 colon-separated
|
||||
// fields, i.e. 4 colons.
|
||||
static const unsigned num_colons = 4;
|
||||
unsigned i;
|
||||
char *s = buf;
|
||||
|
||||
if (len <= num_colons) {
|
||||
// In this situation it's not possible to ensure that the correct number
|
||||
// of colons are included in the output.
|
||||
return buf;
|
||||
}
|
||||
|
||||
for (i = 0; i < num_colons; i++) {
|
||||
char *colon = strchr(s, ':');
|
||||
char *last_pos = &buf[len - 1] - num_colons + i;
|
||||
|
||||
if (colon == NULL || colon > last_pos) {
|
||||
// set colon |i| at last possible position (buf[len-1] is the
|
||||
// terminating 0). If we're setting this colon, then all whole of the
|
||||
// rest of the string must be colons in order to have the correct
|
||||
// number.
|
||||
OPENSSL_memset(last_pos, ':', num_colons - i);
|
||||
break;
|
||||
}
|
||||
|
||||
s = colon + 1;
|
||||
}
|
||||
}
|
||||
|
||||
return buf;
|
||||
}
|
||||
|
||||
// err_string_cmp is a compare function for searching error values with
|
||||
// |bsearch| in |err_string_lookup|.
|
||||
static int err_string_cmp(const void *a, const void *b) {
|
||||
|
@ -530,7 +452,7 @@ static const char *const kLibraryNames[ERR_NUM_LIBS] = {
|
|||
"User defined functions", // ERR_LIB_USER
|
||||
};
|
||||
|
||||
const char *ERR_lib_error_string(uint32_t packed_error) {
|
||||
static const char *err_lib_error_string(uint32_t packed_error) {
|
||||
const uint32_t lib = ERR_GET_LIB(packed_error);
|
||||
|
||||
if (lib >= ERR_NUM_LIBS) {
|
||||
|
@ -539,11 +461,16 @@ const char *ERR_lib_error_string(uint32_t packed_error) {
|
|||
return kLibraryNames[lib];
|
||||
}
|
||||
|
||||
const char *ERR_lib_error_string(uint32_t packed_error) {
|
||||
const char *ret = err_lib_error_string(packed_error);
|
||||
return ret == NULL ? "unknown library" : ret;
|
||||
}
|
||||
|
||||
const char *ERR_func_error_string(uint32_t packed_error) {
|
||||
return "OPENSSL_internal";
|
||||
}
|
||||
|
||||
const char *ERR_reason_error_string(uint32_t packed_error) {
|
||||
static const char *err_reason_error_string(uint32_t packed_error) {
|
||||
const uint32_t lib = ERR_GET_LIB(packed_error);
|
||||
const uint32_t reason = ERR_GET_REASON(packed_error);
|
||||
|
||||
|
@ -579,6 +506,86 @@ const char *ERR_reason_error_string(uint32_t packed_error) {
|
|||
kOpenSSLReasonValuesLen, kOpenSSLReasonStringData);
|
||||
}
|
||||
|
||||
const char *ERR_reason_error_string(uint32_t packed_error) {
|
||||
const char *ret = err_reason_error_string(packed_error);
|
||||
return ret == NULL ? "unknown error" : ret;
|
||||
}
|
||||
|
||||
char *ERR_error_string(uint32_t packed_error, char *ret) {
|
||||
static char buf[ERR_ERROR_STRING_BUF_LEN];
|
||||
|
||||
if (ret == NULL) {
|
||||
// TODO(fork): remove this.
|
||||
ret = buf;
|
||||
}
|
||||
|
||||
#if !defined(NDEBUG)
|
||||
// This is aimed to help catch callers who don't provide
|
||||
// |ERR_ERROR_STRING_BUF_LEN| bytes of space.
|
||||
OPENSSL_memset(ret, 0, ERR_ERROR_STRING_BUF_LEN);
|
||||
#endif
|
||||
|
||||
return ERR_error_string_n(packed_error, ret, ERR_ERROR_STRING_BUF_LEN);
|
||||
}
|
||||
|
||||
char *ERR_error_string_n(uint32_t packed_error, char *buf, size_t len) {
|
||||
if (len == 0) {
|
||||
return NULL;
|
||||
}
|
||||
|
||||
unsigned lib = ERR_GET_LIB(packed_error);
|
||||
unsigned reason = ERR_GET_REASON(packed_error);
|
||||
|
||||
const char *lib_str = err_lib_error_string(packed_error);
|
||||
const char *reason_str = err_reason_error_string(packed_error);
|
||||
|
||||
char lib_buf[64], reason_buf[64];
|
||||
if (lib_str == NULL) {
|
||||
BIO_snprintf(lib_buf, sizeof(lib_buf), "lib(%u)", lib);
|
||||
lib_str = lib_buf;
|
||||
}
|
||||
|
||||
if (reason_str == NULL) {
|
||||
BIO_snprintf(reason_buf, sizeof(reason_buf), "reason(%u)", reason);
|
||||
reason_str = reason_buf;
|
||||
}
|
||||
|
||||
BIO_snprintf(buf, len, "error:%08" PRIx32 ":%s:OPENSSL_internal:%s",
|
||||
packed_error, lib_str, reason_str);
|
||||
|
||||
if (strlen(buf) == len - 1) {
|
||||
// output may be truncated; make sure we always have 5 colon-separated
|
||||
// fields, i.e. 4 colons.
|
||||
static const unsigned num_colons = 4;
|
||||
unsigned i;
|
||||
char *s = buf;
|
||||
|
||||
if (len <= num_colons) {
|
||||
// In this situation it's not possible to ensure that the correct number
|
||||
// of colons are included in the output.
|
||||
return buf;
|
||||
}
|
||||
|
||||
for (i = 0; i < num_colons; i++) {
|
||||
char *colon = strchr(s, ':');
|
||||
char *last_pos = &buf[len - 1] - num_colons + i;
|
||||
|
||||
if (colon == NULL || colon > last_pos) {
|
||||
// set colon |i| at last possible position (buf[len-1] is the
|
||||
// terminating 0). If we're setting this colon, then all whole of the
|
||||
// rest of the string must be colons in order to have the correct
|
||||
// number.
|
||||
OPENSSL_memset(last_pos, ':', num_colons - i);
|
||||
break;
|
||||
}
|
||||
|
||||
s = colon + 1;
|
||||
}
|
||||
}
|
||||
|
||||
return buf;
|
||||
}
|
||||
|
||||
void ERR_print_errors_cb(ERR_print_errors_callback_t callback, void *ctx) {
|
||||
char buf[ERR_ERROR_STRING_BUF_LEN];
|
||||
char buf2[1024];
|
||||
|
@ -738,6 +745,22 @@ void ERR_add_error_dataf(const char *format, ...) {
|
|||
err_set_error_data(buf);
|
||||
}
|
||||
|
||||
void ERR_set_error_data(char *data, int flags) {
|
||||
if (!(flags & ERR_FLAG_STRING)) {
|
||||
// We do not support non-string error data.
|
||||
assert(0);
|
||||
return;
|
||||
}
|
||||
if (flags & ERR_FLAG_MALLOCED) {
|
||||
err_set_error_data(data);
|
||||
} else {
|
||||
char *copy = OPENSSL_strdup(data);
|
||||
if (copy != NULL) {
|
||||
err_set_error_data(copy);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
int ERR_set_mark(void) {
|
||||
ERR_STATE *const state = err_get_state();
|
||||
|
||||
|
|
File diff suppressed because it is too large
Load Diff
|
@ -57,7 +57,23 @@
|
|||
void AES_ctr128_encrypt(const uint8_t *in, uint8_t *out, size_t len,
|
||||
const AES_KEY *key, uint8_t ivec[AES_BLOCK_SIZE],
|
||||
uint8_t ecount_buf[AES_BLOCK_SIZE], unsigned int *num) {
|
||||
CRYPTO_ctr128_encrypt(in, out, len, key, ivec, ecount_buf, num, AES_encrypt);
|
||||
if (hwaes_capable()) {
|
||||
CRYPTO_ctr128_encrypt_ctr32(in, out, len, key, ivec, ecount_buf, num,
|
||||
aes_hw_ctr32_encrypt_blocks);
|
||||
} else if (vpaes_capable()) {
|
||||
#if defined(VPAES_CTR32)
|
||||
// TODO(davidben): On ARM, where |BSAES| is additionally defined, this could
|
||||
// use |vpaes_ctr32_encrypt_blocks_with_bsaes|.
|
||||
CRYPTO_ctr128_encrypt_ctr32(in, out, len, key, ivec, ecount_buf, num,
|
||||
vpaes_ctr32_encrypt_blocks);
|
||||
#else
|
||||
CRYPTO_ctr128_encrypt(in, out, len, key, ivec, ecount_buf, num,
|
||||
vpaes_encrypt);
|
||||
#endif
|
||||
} else {
|
||||
CRYPTO_ctr128_encrypt_ctr32(in, out, len, key, ivec, ecount_buf, num,
|
||||
aes_nohw_ctr32_encrypt_blocks);
|
||||
}
|
||||
}
|
||||
|
||||
void AES_ecb_encrypt(const uint8_t *in, uint8_t *out, const AES_KEY *key,
|
||||
|
|
|
@ -1,7 +1,7 @@
|
|||
#define BORINGSSL_PREFIX CBigNumBoringSSL
|
||||
#if defined(__x86_64__) && defined(__linux__)
|
||||
# This file is generated from a similarly-named Perl script in the BoringSSL
|
||||
# source tree. Do not edit by hand.
|
||||
// This file is generated from a similarly-named Perl script in the BoringSSL
|
||||
// source tree. Do not edit by hand.
|
||||
|
||||
#if defined(__has_feature)
|
||||
#if __has_feature(memory_sanitizer) && !defined(OPENSSL_NO_ASM)
|
||||
|
|
|
@ -1,7 +1,7 @@
|
|||
#define BORINGSSL_PREFIX CBigNumBoringSSL
|
||||
#if defined(__x86_64__) && defined(__APPLE__)
|
||||
# This file is generated from a similarly-named Perl script in the BoringSSL
|
||||
# source tree. Do not edit by hand.
|
||||
// This file is generated from a similarly-named Perl script in the BoringSSL
|
||||
// source tree. Do not edit by hand.
|
||||
|
||||
#if defined(__has_feature)
|
||||
#if __has_feature(memory_sanitizer) && !defined(OPENSSL_NO_ASM)
|
||||
|
|
|
@ -1,7 +1,7 @@
|
|||
#define BORINGSSL_PREFIX CBigNumBoringSSL
|
||||
#if defined(__i386__) && defined(__linux__)
|
||||
# This file is generated from a similarly-named Perl script in the BoringSSL
|
||||
# source tree. Do not edit by hand.
|
||||
// This file is generated from a similarly-named Perl script in the BoringSSL
|
||||
// source tree. Do not edit by hand.
|
||||
|
||||
#if defined(__i386__)
|
||||
#if defined(BORINGSSL_PREFIX)
|
||||
|
|
|
@ -1,7 +1,7 @@
|
|||
#define BORINGSSL_PREFIX CBigNumBoringSSL
|
||||
#if defined(__x86_64__) && defined(__linux__)
|
||||
# This file is generated from a similarly-named Perl script in the BoringSSL
|
||||
# source tree. Do not edit by hand.
|
||||
// This file is generated from a similarly-named Perl script in the BoringSSL
|
||||
// source tree. Do not edit by hand.
|
||||
|
||||
#if defined(__has_feature)
|
||||
#if __has_feature(memory_sanitizer) && !defined(OPENSSL_NO_ASM)
|
||||
|
|
|
@ -1,7 +1,7 @@
|
|||
#define BORINGSSL_PREFIX CBigNumBoringSSL
|
||||
#if defined(__x86_64__) && defined(__APPLE__)
|
||||
# This file is generated from a similarly-named Perl script in the BoringSSL
|
||||
# source tree. Do not edit by hand.
|
||||
// This file is generated from a similarly-named Perl script in the BoringSSL
|
||||
// source tree. Do not edit by hand.
|
||||
|
||||
#if defined(__has_feature)
|
||||
#if __has_feature(memory_sanitizer) && !defined(OPENSSL_NO_ASM)
|
||||
|
|
|
@ -250,6 +250,7 @@ Ldec_key_abort:
|
|||
#endif
|
||||
.align 5
|
||||
_aes_hw_encrypt:
|
||||
AARCH64_VALID_CALL_TARGET
|
||||
ldr r3,[r2,#240]
|
||||
vld1.32 {q0},[r2]!
|
||||
vld1.8 {q2},[r0]
|
||||
|
@ -282,6 +283,7 @@ Loop_enc:
|
|||
#endif
|
||||
.align 5
|
||||
_aes_hw_decrypt:
|
||||
AARCH64_VALID_CALL_TARGET
|
||||
ldr r3,[r2,#240]
|
||||
vld1.32 {q0},[r2]!
|
||||
vld1.8 {q2},[r0]
|
||||
|
@ -630,20 +632,34 @@ _aes_hw_ctr32_encrypt_blocks:
|
|||
add r7,r3,#32
|
||||
mov r6,r5
|
||||
movlo r12,#0
|
||||
|
||||
@ ARM Cortex-A57 and Cortex-A72 cores running in 32-bit mode are
|
||||
@ affected by silicon errata #1742098 [0] and #1655431 [1],
|
||||
@ respectively, where the second instruction of an aese/aesmc
|
||||
@ instruction pair may execute twice if an interrupt is taken right
|
||||
@ after the first instruction consumes an input register of which a
|
||||
@ single 32-bit lane has been updated the last time it was modified.
|
||||
@
|
||||
@ This function uses a counter in one 32-bit lane. The
|
||||
@ could write to q1 and q10 directly, but that trips this bugs.
|
||||
@ We write to q6 and copy to the final register as a workaround.
|
||||
@
|
||||
@ [0] ARM-EPM-049219 v23 Cortex-A57 MPCore Software Developers Errata Notice
|
||||
@ [1] ARM-EPM-012079 v11.0 Cortex-A72 MPCore Software Developers Errata Notice
|
||||
#ifndef __ARMEB__
|
||||
rev r8, r8
|
||||
#endif
|
||||
vorr q1,q0,q0
|
||||
add r10, r8, #1
|
||||
vorr q10,q0,q0
|
||||
add r8, r8, #2
|
||||
vorr q6,q0,q0
|
||||
rev r10, r10
|
||||
vmov.32 d3[1],r10
|
||||
vmov.32 d13[1],r10
|
||||
add r8, r8, #2
|
||||
vorr q1,q6,q6
|
||||
bls Lctr32_tail
|
||||
rev r12, r8
|
||||
vmov.32 d13[1],r12
|
||||
sub r2,r2,#3 @ bias
|
||||
vmov.32 d21[1],r12
|
||||
vorr q10,q6,q6
|
||||
b Loop3x_ctr32
|
||||
|
||||
.align 4
|
||||
|
@ -670,11 +686,11 @@ Loop3x_ctr32:
|
|||
.byte 0x20,0x23,0xb0,0xf3 @ aese q1,q8
|
||||
.byte 0x82,0xa3,0xb0,0xf3 @ aesmc q5,q1
|
||||
vld1.8 {q2},[r0]!
|
||||
vorr q0,q6,q6
|
||||
add r9,r8,#1
|
||||
.byte 0x20,0x43,0xf0,0xf3 @ aese q10,q8
|
||||
.byte 0xa4,0x43,0xf0,0xf3 @ aesmc q10,q10
|
||||
vld1.8 {q3},[r0]!
|
||||
vorr q1,q6,q6
|
||||
rev r9,r9
|
||||
.byte 0x22,0x83,0xb0,0xf3 @ aese q4,q9
|
||||
.byte 0x88,0x83,0xb0,0xf3 @ aesmc q4,q4
|
||||
.byte 0x22,0xa3,0xb0,0xf3 @ aese q5,q9
|
||||
|
@ -683,8 +699,6 @@ Loop3x_ctr32:
|
|||
mov r7,r3
|
||||
.byte 0x22,0x43,0xf0,0xf3 @ aese q10,q9
|
||||
.byte 0xa4,0x23,0xf0,0xf3 @ aesmc q9,q10
|
||||
vorr q10,q6,q6
|
||||
add r9,r8,#1
|
||||
.byte 0x28,0x83,0xb0,0xf3 @ aese q4,q12
|
||||
.byte 0x88,0x83,0xb0,0xf3 @ aesmc q4,q4
|
||||
.byte 0x28,0xa3,0xb0,0xf3 @ aese q5,q12
|
||||
|
@ -699,21 +713,26 @@ Loop3x_ctr32:
|
|||
.byte 0x88,0x83,0xb0,0xf3 @ aesmc q4,q4
|
||||
.byte 0x2a,0xa3,0xb0,0xf3 @ aese q5,q13
|
||||
.byte 0x8a,0xa3,0xb0,0xf3 @ aesmc q5,q5
|
||||
@ Note the logic to update q0, q1, and q1 is written to work
|
||||
@ around a bug in ARM Cortex-A57 and Cortex-A72 cores running in
|
||||
@ 32-bit mode. See the comment above.
|
||||
veor q11,q11,q7
|
||||
rev r9,r9
|
||||
vmov.32 d13[1], r9
|
||||
.byte 0x2a,0x23,0xf0,0xf3 @ aese q9,q13
|
||||
.byte 0xa2,0x23,0xf0,0xf3 @ aesmc q9,q9
|
||||
vmov.32 d1[1], r9
|
||||
vorr q0,q6,q6
|
||||
rev r10,r10
|
||||
.byte 0x2c,0x83,0xb0,0xf3 @ aese q4,q14
|
||||
.byte 0x88,0x83,0xb0,0xf3 @ aesmc q4,q4
|
||||
vmov.32 d13[1], r10
|
||||
rev r12,r8
|
||||
.byte 0x2c,0xa3,0xb0,0xf3 @ aese q5,q14
|
||||
.byte 0x8a,0xa3,0xb0,0xf3 @ aesmc q5,q5
|
||||
vmov.32 d3[1], r10
|
||||
rev r12,r8
|
||||
vorr q1,q6,q6
|
||||
vmov.32 d13[1], r12
|
||||
.byte 0x2c,0x23,0xf0,0xf3 @ aese q9,q14
|
||||
.byte 0xa2,0x23,0xf0,0xf3 @ aesmc q9,q9
|
||||
vmov.32 d21[1], r12
|
||||
vorr q10,q6,q6
|
||||
subs r2,r2,#3
|
||||
.byte 0x2e,0x83,0xb0,0xf3 @ aese q4,q15
|
||||
.byte 0x2e,0xa3,0xb0,0xf3 @ aese q5,q15
|
||||
|
|
|
@ -245,6 +245,7 @@ aes_hw_set_decrypt_key:
|
|||
.type aes_hw_encrypt,%function
|
||||
.align 5
|
||||
aes_hw_encrypt:
|
||||
AARCH64_VALID_CALL_TARGET
|
||||
ldr r3,[r2,#240]
|
||||
vld1.32 {q0},[r2]!
|
||||
vld1.8 {q2},[r0]
|
||||
|
@ -275,6 +276,7 @@ aes_hw_encrypt:
|
|||
.type aes_hw_decrypt,%function
|
||||
.align 5
|
||||
aes_hw_decrypt:
|
||||
AARCH64_VALID_CALL_TARGET
|
||||
ldr r3,[r2,#240]
|
||||
vld1.32 {q0},[r2]!
|
||||
vld1.8 {q2},[r0]
|
||||
|
@ -619,20 +621,34 @@ aes_hw_ctr32_encrypt_blocks:
|
|||
add r7,r3,#32
|
||||
mov r6,r5
|
||||
movlo r12,#0
|
||||
|
||||
@ ARM Cortex-A57 and Cortex-A72 cores running in 32-bit mode are
|
||||
@ affected by silicon errata #1742098 [0] and #1655431 [1],
|
||||
@ respectively, where the second instruction of an aese/aesmc
|
||||
@ instruction pair may execute twice if an interrupt is taken right
|
||||
@ after the first instruction consumes an input register of which a
|
||||
@ single 32-bit lane has been updated the last time it was modified.
|
||||
@
|
||||
@ This function uses a counter in one 32-bit lane. The
|
||||
@ could write to q1 and q10 directly, but that trips this bugs.
|
||||
@ We write to q6 and copy to the final register as a workaround.
|
||||
@
|
||||
@ [0] ARM-EPM-049219 v23 Cortex-A57 MPCore Software Developers Errata Notice
|
||||
@ [1] ARM-EPM-012079 v11.0 Cortex-A72 MPCore Software Developers Errata Notice
|
||||
#ifndef __ARMEB__
|
||||
rev r8, r8
|
||||
#endif
|
||||
vorr q1,q0,q0
|
||||
add r10, r8, #1
|
||||
vorr q10,q0,q0
|
||||
add r8, r8, #2
|
||||
vorr q6,q0,q0
|
||||
rev r10, r10
|
||||
vmov.32 d3[1],r10
|
||||
vmov.32 d13[1],r10
|
||||
add r8, r8, #2
|
||||
vorr q1,q6,q6
|
||||
bls .Lctr32_tail
|
||||
rev r12, r8
|
||||
vmov.32 d13[1],r12
|
||||
sub r2,r2,#3 @ bias
|
||||
vmov.32 d21[1],r12
|
||||
vorr q10,q6,q6
|
||||
b .Loop3x_ctr32
|
||||
|
||||
.align 4
|
||||
|
@ -659,11 +675,11 @@ aes_hw_ctr32_encrypt_blocks:
|
|||
.byte 0x20,0x23,0xb0,0xf3 @ aese q1,q8
|
||||
.byte 0x82,0xa3,0xb0,0xf3 @ aesmc q5,q1
|
||||
vld1.8 {q2},[r0]!
|
||||
vorr q0,q6,q6
|
||||
add r9,r8,#1
|
||||
.byte 0x20,0x43,0xf0,0xf3 @ aese q10,q8
|
||||
.byte 0xa4,0x43,0xf0,0xf3 @ aesmc q10,q10
|
||||
vld1.8 {q3},[r0]!
|
||||
vorr q1,q6,q6
|
||||
rev r9,r9
|
||||
.byte 0x22,0x83,0xb0,0xf3 @ aese q4,q9
|
||||
.byte 0x88,0x83,0xb0,0xf3 @ aesmc q4,q4
|
||||
.byte 0x22,0xa3,0xb0,0xf3 @ aese q5,q9
|
||||
|
@ -672,8 +688,6 @@ aes_hw_ctr32_encrypt_blocks:
|
|||
mov r7,r3
|
||||
.byte 0x22,0x43,0xf0,0xf3 @ aese q10,q9
|
||||
.byte 0xa4,0x23,0xf0,0xf3 @ aesmc q9,q10
|
||||
vorr q10,q6,q6
|
||||
add r9,r8,#1
|
||||
.byte 0x28,0x83,0xb0,0xf3 @ aese q4,q12
|
||||
.byte 0x88,0x83,0xb0,0xf3 @ aesmc q4,q4
|
||||
.byte 0x28,0xa3,0xb0,0xf3 @ aese q5,q12
|
||||
|
@ -688,21 +702,26 @@ aes_hw_ctr32_encrypt_blocks:
|
|||
.byte 0x88,0x83,0xb0,0xf3 @ aesmc q4,q4
|
||||
.byte 0x2a,0xa3,0xb0,0xf3 @ aese q5,q13
|
||||
.byte 0x8a,0xa3,0xb0,0xf3 @ aesmc q5,q5
|
||||
@ Note the logic to update q0, q1, and q1 is written to work
|
||||
@ around a bug in ARM Cortex-A57 and Cortex-A72 cores running in
|
||||
@ 32-bit mode. See the comment above.
|
||||
veor q11,q11,q7
|
||||
rev r9,r9
|
||||
vmov.32 d13[1], r9
|
||||
.byte 0x2a,0x23,0xf0,0xf3 @ aese q9,q13
|
||||
.byte 0xa2,0x23,0xf0,0xf3 @ aesmc q9,q9
|
||||
vmov.32 d1[1], r9
|
||||
vorr q0,q6,q6
|
||||
rev r10,r10
|
||||
.byte 0x2c,0x83,0xb0,0xf3 @ aese q4,q14
|
||||
.byte 0x88,0x83,0xb0,0xf3 @ aesmc q4,q4
|
||||
vmov.32 d13[1], r10
|
||||
rev r12,r8
|
||||
.byte 0x2c,0xa3,0xb0,0xf3 @ aese q5,q14
|
||||
.byte 0x8a,0xa3,0xb0,0xf3 @ aesmc q5,q5
|
||||
vmov.32 d3[1], r10
|
||||
rev r12,r8
|
||||
vorr q1,q6,q6
|
||||
vmov.32 d13[1], r12
|
||||
.byte 0x2c,0x23,0xf0,0xf3 @ aese q9,q14
|
||||
.byte 0xa2,0x23,0xf0,0xf3 @ aesmc q9,q9
|
||||
vmov.32 d21[1], r12
|
||||
vorr q10,q6,q6
|
||||
subs r2,r2,#3
|
||||
.byte 0x2e,0x83,0xb0,0xf3 @ aese q4,q15
|
||||
.byte 0x2e,0xa3,0xb0,0xf3 @ aese q5,q15
|
||||
|
|
|
@ -34,6 +34,8 @@ Lrcon:
|
|||
.align 5
|
||||
_aes_hw_set_encrypt_key:
|
||||
Lenc_key:
|
||||
// Armv8.3-A PAuth: even though x30 is pushed to stack it is not popped later.
|
||||
AARCH64_VALID_CALL_TARGET
|
||||
stp x29,x30,[sp,#-16]!
|
||||
add x29,sp,#0
|
||||
mov x3,#-1
|
||||
|
@ -202,6 +204,7 @@ Lenc_key_abort:
|
|||
|
||||
.align 5
|
||||
_aes_hw_set_decrypt_key:
|
||||
AARCH64_SIGN_LINK_REGISTER
|
||||
stp x29,x30,[sp,#-16]!
|
||||
add x29,sp,#0
|
||||
bl Lenc_key
|
||||
|
@ -235,6 +238,7 @@ Loop_imc:
|
|||
eor x0,x0,x0 // return value
|
||||
Ldec_key_abort:
|
||||
ldp x29,x30,[sp],#16
|
||||
AARCH64_VALIDATE_LINK_REGISTER
|
||||
ret
|
||||
|
||||
.globl _aes_hw_encrypt
|
||||
|
@ -242,6 +246,7 @@ Ldec_key_abort:
|
|||
|
||||
.align 5
|
||||
_aes_hw_encrypt:
|
||||
AARCH64_VALID_CALL_TARGET
|
||||
ldr w3,[x2,#240]
|
||||
ld1 {v0.4s},[x2],#16
|
||||
ld1 {v2.16b},[x0]
|
||||
|
@ -272,6 +277,7 @@ Loop_enc:
|
|||
|
||||
.align 5
|
||||
_aes_hw_decrypt:
|
||||
AARCH64_VALID_CALL_TARGET
|
||||
ldr w3,[x2,#240]
|
||||
ld1 {v0.4s},[x2],#16
|
||||
ld1 {v2.16b},[x0]
|
||||
|
@ -302,6 +308,8 @@ Loop_dec:
|
|||
|
||||
.align 5
|
||||
_aes_hw_cbc_encrypt:
|
||||
// Armv8.3-A PAuth: even though x30 is pushed to stack it is not popped later.
|
||||
AARCH64_VALID_CALL_TARGET
|
||||
stp x29,x30,[sp,#-16]!
|
||||
add x29,sp,#0
|
||||
subs x2,x2,#16
|
||||
|
@ -593,6 +601,8 @@ Lcbc_abort:
|
|||
|
||||
.align 5
|
||||
_aes_hw_ctr32_encrypt_blocks:
|
||||
// Armv8.3-A PAuth: even though x30 is pushed to stack it is not popped later.
|
||||
AARCH64_VALID_CALL_TARGET
|
||||
stp x29,x30,[sp,#-16]!
|
||||
add x29,sp,#0
|
||||
ldr w5,[x3,#240]
|
||||
|
@ -612,20 +622,34 @@ _aes_hw_ctr32_encrypt_blocks:
|
|||
add x7,x3,#32
|
||||
mov w6,w5
|
||||
csel x12,xzr,x12,lo
|
||||
|
||||
// ARM Cortex-A57 and Cortex-A72 cores running in 32-bit mode are
|
||||
// affected by silicon errata #1742098 [0] and #1655431 [1],
|
||||
// respectively, where the second instruction of an aese/aesmc
|
||||
// instruction pair may execute twice if an interrupt is taken right
|
||||
// after the first instruction consumes an input register of which a
|
||||
// single 32-bit lane has been updated the last time it was modified.
|
||||
//
|
||||
// This function uses a counter in one 32-bit lane. The vmov lines
|
||||
// could write to v1.16b and v18.16b directly, but that trips this bugs.
|
||||
// We write to v6.16b and copy to the final register as a workaround.
|
||||
//
|
||||
// [0] ARM-EPM-049219 v23 Cortex-A57 MPCore Software Developers Errata Notice
|
||||
// [1] ARM-EPM-012079 v11.0 Cortex-A72 MPCore Software Developers Errata Notice
|
||||
#ifndef __ARMEB__
|
||||
rev w8, w8
|
||||
#endif
|
||||
orr v1.16b,v0.16b,v0.16b
|
||||
add w10, w8, #1
|
||||
orr v18.16b,v0.16b,v0.16b
|
||||
add w8, w8, #2
|
||||
orr v6.16b,v0.16b,v0.16b
|
||||
rev w10, w10
|
||||
mov v1.s[3],w10
|
||||
mov v6.s[3],w10
|
||||
add w8, w8, #2
|
||||
orr v1.16b,v6.16b,v6.16b
|
||||
b.ls Lctr32_tail
|
||||
rev w12, w8
|
||||
mov v6.s[3],w12
|
||||
sub x2,x2,#3 // bias
|
||||
mov v18.s[3],w12
|
||||
orr v18.16b,v6.16b,v6.16b
|
||||
b Loop3x_ctr32
|
||||
|
||||
.align 4
|
||||
|
@ -652,11 +676,11 @@ Loop3x_ctr32:
|
|||
aese v1.16b,v16.16b
|
||||
aesmc v5.16b,v1.16b
|
||||
ld1 {v2.16b},[x0],#16
|
||||
orr v0.16b,v6.16b,v6.16b
|
||||
add w9,w8,#1
|
||||
aese v18.16b,v16.16b
|
||||
aesmc v18.16b,v18.16b
|
||||
ld1 {v3.16b},[x0],#16
|
||||
orr v1.16b,v6.16b,v6.16b
|
||||
rev w9,w9
|
||||
aese v4.16b,v17.16b
|
||||
aesmc v4.16b,v4.16b
|
||||
aese v5.16b,v17.16b
|
||||
|
@ -665,8 +689,6 @@ Loop3x_ctr32:
|
|||
mov x7,x3
|
||||
aese v18.16b,v17.16b
|
||||
aesmc v17.16b,v18.16b
|
||||
orr v18.16b,v6.16b,v6.16b
|
||||
add w9,w8,#1
|
||||
aese v4.16b,v20.16b
|
||||
aesmc v4.16b,v4.16b
|
||||
aese v5.16b,v20.16b
|
||||
|
@ -681,21 +703,26 @@ Loop3x_ctr32:
|
|||
aesmc v4.16b,v4.16b
|
||||
aese v5.16b,v21.16b
|
||||
aesmc v5.16b,v5.16b
|
||||
// Note the logic to update v0.16b, v1.16b, and v1.16b is written to work
|
||||
// around a bug in ARM Cortex-A57 and Cortex-A72 cores running in
|
||||
// 32-bit mode. See the comment above.
|
||||
eor v19.16b,v19.16b,v7.16b
|
||||
rev w9,w9
|
||||
mov v6.s[3], w9
|
||||
aese v17.16b,v21.16b
|
||||
aesmc v17.16b,v17.16b
|
||||
mov v0.s[3], w9
|
||||
orr v0.16b,v6.16b,v6.16b
|
||||
rev w10,w10
|
||||
aese v4.16b,v22.16b
|
||||
aesmc v4.16b,v4.16b
|
||||
mov v6.s[3], w10
|
||||
rev w12,w8
|
||||
aese v5.16b,v22.16b
|
||||
aesmc v5.16b,v5.16b
|
||||
mov v1.s[3], w10
|
||||
rev w12,w8
|
||||
orr v1.16b,v6.16b,v6.16b
|
||||
mov v6.s[3], w12
|
||||
aese v17.16b,v22.16b
|
||||
aesmc v17.16b,v17.16b
|
||||
mov v18.s[3], w12
|
||||
orr v18.16b,v6.16b,v6.16b
|
||||
subs x2,x2,#3
|
||||
aese v4.16b,v23.16b
|
||||
aese v5.16b,v23.16b
|
||||
|
|
|
@ -35,6 +35,8 @@
|
|||
.align 5
|
||||
aes_hw_set_encrypt_key:
|
||||
.Lenc_key:
|
||||
// Armv8.3-A PAuth: even though x30 is pushed to stack it is not popped later.
|
||||
AARCH64_VALID_CALL_TARGET
|
||||
stp x29,x30,[sp,#-16]!
|
||||
add x29,sp,#0
|
||||
mov x3,#-1
|
||||
|
@ -203,6 +205,7 @@ aes_hw_set_encrypt_key:
|
|||
.type aes_hw_set_decrypt_key,%function
|
||||
.align 5
|
||||
aes_hw_set_decrypt_key:
|
||||
AARCH64_SIGN_LINK_REGISTER
|
||||
stp x29,x30,[sp,#-16]!
|
||||
add x29,sp,#0
|
||||
bl .Lenc_key
|
||||
|
@ -236,6 +239,7 @@ aes_hw_set_decrypt_key:
|
|||
eor x0,x0,x0 // return value
|
||||
.Ldec_key_abort:
|
||||
ldp x29,x30,[sp],#16
|
||||
AARCH64_VALIDATE_LINK_REGISTER
|
||||
ret
|
||||
.size aes_hw_set_decrypt_key,.-aes_hw_set_decrypt_key
|
||||
.globl aes_hw_encrypt
|
||||
|
@ -243,6 +247,7 @@ aes_hw_set_decrypt_key:
|
|||
.type aes_hw_encrypt,%function
|
||||
.align 5
|
||||
aes_hw_encrypt:
|
||||
AARCH64_VALID_CALL_TARGET
|
||||
ldr w3,[x2,#240]
|
||||
ld1 {v0.4s},[x2],#16
|
||||
ld1 {v2.16b},[x0]
|
||||
|
@ -273,6 +278,7 @@ aes_hw_encrypt:
|
|||
.type aes_hw_decrypt,%function
|
||||
.align 5
|
||||
aes_hw_decrypt:
|
||||
AARCH64_VALID_CALL_TARGET
|
||||
ldr w3,[x2,#240]
|
||||
ld1 {v0.4s},[x2],#16
|
||||
ld1 {v2.16b},[x0]
|
||||
|
@ -303,6 +309,8 @@ aes_hw_decrypt:
|
|||
.type aes_hw_cbc_encrypt,%function
|
||||
.align 5
|
||||
aes_hw_cbc_encrypt:
|
||||
// Armv8.3-A PAuth: even though x30 is pushed to stack it is not popped later.
|
||||
AARCH64_VALID_CALL_TARGET
|
||||
stp x29,x30,[sp,#-16]!
|
||||
add x29,sp,#0
|
||||
subs x2,x2,#16
|
||||
|
@ -594,6 +602,8 @@ aes_hw_cbc_encrypt:
|
|||
.type aes_hw_ctr32_encrypt_blocks,%function
|
||||
.align 5
|
||||
aes_hw_ctr32_encrypt_blocks:
|
||||
// Armv8.3-A PAuth: even though x30 is pushed to stack it is not popped later.
|
||||
AARCH64_VALID_CALL_TARGET
|
||||
stp x29,x30,[sp,#-16]!
|
||||
add x29,sp,#0
|
||||
ldr w5,[x3,#240]
|
||||
|
@ -613,20 +623,34 @@ aes_hw_ctr32_encrypt_blocks:
|
|||
add x7,x3,#32
|
||||
mov w6,w5
|
||||
csel x12,xzr,x12,lo
|
||||
|
||||
// ARM Cortex-A57 and Cortex-A72 cores running in 32-bit mode are
|
||||
// affected by silicon errata #1742098 [0] and #1655431 [1],
|
||||
// respectively, where the second instruction of an aese/aesmc
|
||||
// instruction pair may execute twice if an interrupt is taken right
|
||||
// after the first instruction consumes an input register of which a
|
||||
// single 32-bit lane has been updated the last time it was modified.
|
||||
//
|
||||
// This function uses a counter in one 32-bit lane. The vmov lines
|
||||
// could write to v1.16b and v18.16b directly, but that trips this bugs.
|
||||
// We write to v6.16b and copy to the final register as a workaround.
|
||||
//
|
||||
// [0] ARM-EPM-049219 v23 Cortex-A57 MPCore Software Developers Errata Notice
|
||||
// [1] ARM-EPM-012079 v11.0 Cortex-A72 MPCore Software Developers Errata Notice
|
||||
#ifndef __ARMEB__
|
||||
rev w8, w8
|
||||
#endif
|
||||
orr v1.16b,v0.16b,v0.16b
|
||||
add w10, w8, #1
|
||||
orr v18.16b,v0.16b,v0.16b
|
||||
add w8, w8, #2
|
||||
orr v6.16b,v0.16b,v0.16b
|
||||
rev w10, w10
|
||||
mov v1.s[3],w10
|
||||
mov v6.s[3],w10
|
||||
add w8, w8, #2
|
||||
orr v1.16b,v6.16b,v6.16b
|
||||
b.ls .Lctr32_tail
|
||||
rev w12, w8
|
||||
mov v6.s[3],w12
|
||||
sub x2,x2,#3 // bias
|
||||
mov v18.s[3],w12
|
||||
orr v18.16b,v6.16b,v6.16b
|
||||
b .Loop3x_ctr32
|
||||
|
||||
.align 4
|
||||
|
@ -653,11 +677,11 @@ aes_hw_ctr32_encrypt_blocks:
|
|||
aese v1.16b,v16.16b
|
||||
aesmc v5.16b,v1.16b
|
||||
ld1 {v2.16b},[x0],#16
|
||||
orr v0.16b,v6.16b,v6.16b
|
||||
add w9,w8,#1
|
||||
aese v18.16b,v16.16b
|
||||
aesmc v18.16b,v18.16b
|
||||
ld1 {v3.16b},[x0],#16
|
||||
orr v1.16b,v6.16b,v6.16b
|
||||
rev w9,w9
|
||||
aese v4.16b,v17.16b
|
||||
aesmc v4.16b,v4.16b
|
||||
aese v5.16b,v17.16b
|
||||
|
@ -666,8 +690,6 @@ aes_hw_ctr32_encrypt_blocks:
|
|||
mov x7,x3
|
||||
aese v18.16b,v17.16b
|
||||
aesmc v17.16b,v18.16b
|
||||
orr v18.16b,v6.16b,v6.16b
|
||||
add w9,w8,#1
|
||||
aese v4.16b,v20.16b
|
||||
aesmc v4.16b,v4.16b
|
||||
aese v5.16b,v20.16b
|
||||
|
@ -682,21 +704,26 @@ aes_hw_ctr32_encrypt_blocks:
|
|||
aesmc v4.16b,v4.16b
|
||||
aese v5.16b,v21.16b
|
||||
aesmc v5.16b,v5.16b
|
||||
// Note the logic to update v0.16b, v1.16b, and v1.16b is written to work
|
||||
// around a bug in ARM Cortex-A57 and Cortex-A72 cores running in
|
||||
// 32-bit mode. See the comment above.
|
||||
eor v19.16b,v19.16b,v7.16b
|
||||
rev w9,w9
|
||||
mov v6.s[3], w9
|
||||
aese v17.16b,v21.16b
|
||||
aesmc v17.16b,v17.16b
|
||||
mov v0.s[3], w9
|
||||
orr v0.16b,v6.16b,v6.16b
|
||||
rev w10,w10
|
||||
aese v4.16b,v22.16b
|
||||
aesmc v4.16b,v4.16b
|
||||
mov v6.s[3], w10
|
||||
rev w12,w8
|
||||
aese v5.16b,v22.16b
|
||||
aesmc v5.16b,v5.16b
|
||||
mov v1.s[3], w10
|
||||
rev w12,w8
|
||||
orr v1.16b,v6.16b,v6.16b
|
||||
mov v6.s[3], w12
|
||||
aese v17.16b,v22.16b
|
||||
aesmc v17.16b,v17.16b
|
||||
mov v18.s[3], w12
|
||||
orr v18.16b,v6.16b,v6.16b
|
||||
subs x2,x2,#3
|
||||
aese v4.16b,v23.16b
|
||||
aese v5.16b,v23.16b
|
||||
|
|
|
@ -14,6 +14,8 @@
|
|||
#if defined(BORINGSSL_PREFIX)
|
||||
#include <CBigNumBoringSSL_boringssl_prefix_symbols_asm.h>
|
||||
#endif
|
||||
#include <CBigNumBoringSSL_arm_arch.h>
|
||||
|
||||
.text
|
||||
|
||||
.globl _bn_mul_mont
|
||||
|
@ -21,6 +23,7 @@
|
|||
|
||||
.align 5
|
||||
_bn_mul_mont:
|
||||
AARCH64_SIGN_LINK_REGISTER
|
||||
tst x5,#7
|
||||
b.eq __bn_sqr8x_mont
|
||||
tst x5,#3
|
||||
|
@ -218,11 +221,14 @@ Lcond_copy:
|
|||
mov x0,#1
|
||||
ldp x23,x24,[x29,#48]
|
||||
ldr x29,[sp],#64
|
||||
AARCH64_VALIDATE_LINK_REGISTER
|
||||
ret
|
||||
|
||||
|
||||
.align 5
|
||||
__bn_sqr8x_mont:
|
||||
// Not adding AARCH64_SIGN_LINK_REGISTER here because __bn_sqr8x_mont is jumped to
|
||||
// only from bn_mul_mont which has already signed the return address.
|
||||
cmp x1,x2
|
||||
b.ne __bn_mul4x_mont
|
||||
Lsqr8x_mont:
|
||||
|
@ -976,11 +982,16 @@ Lsqr8x_done:
|
|||
ldp x25,x26,[x29,#64]
|
||||
ldp x27,x28,[x29,#80]
|
||||
ldr x29,[sp],#128
|
||||
// x30 is popped earlier
|
||||
AARCH64_VALIDATE_LINK_REGISTER
|
||||
ret
|
||||
|
||||
|
||||
.align 5
|
||||
__bn_mul4x_mont:
|
||||
// Not adding AARCH64_SIGN_LINK_REGISTER here because __bn_mul4x_mont is jumped to
|
||||
// only from bn_mul_mont or __bn_mul8x_mont which have already signed the
|
||||
// return address.
|
||||
stp x29,x30,[sp,#-128]!
|
||||
add x29,sp,#0
|
||||
stp x19,x20,[sp,#16]
|
||||
|
@ -1414,6 +1425,8 @@ Lmul4x_done:
|
|||
ldp x25,x26,[x29,#64]
|
||||
ldp x27,x28,[x29,#80]
|
||||
ldr x29,[sp],#128
|
||||
// x30 is popped earlier
|
||||
AARCH64_VALIDATE_LINK_REGISTER
|
||||
ret
|
||||
|
||||
.byte 77,111,110,116,103,111,109,101,114,121,32,77,117,108,116,105,112,108,105,99,97,116,105,111,110,32,102,111,114,32,65,82,77,118,56,44,32,67,82,89,80,84,79,71,65,77,83,32,98,121,32,60,97,112,112,114,111,64,111,112,101,110,115,115,108,46,111,114,103,62,0
|
||||
|
|
|
@ -15,6 +15,8 @@
|
|||
#if defined(BORINGSSL_PREFIX)
|
||||
#include <CBigNumBoringSSL_boringssl_prefix_symbols_asm.h>
|
||||
#endif
|
||||
#include <CBigNumBoringSSL_arm_arch.h>
|
||||
|
||||
.text
|
||||
|
||||
.globl bn_mul_mont
|
||||
|
@ -22,6 +24,7 @@
|
|||
.type bn_mul_mont,%function
|
||||
.align 5
|
||||
bn_mul_mont:
|
||||
AARCH64_SIGN_LINK_REGISTER
|
||||
tst x5,#7
|
||||
b.eq __bn_sqr8x_mont
|
||||
tst x5,#3
|
||||
|
@ -219,11 +222,14 @@ bn_mul_mont:
|
|||
mov x0,#1
|
||||
ldp x23,x24,[x29,#48]
|
||||
ldr x29,[sp],#64
|
||||
AARCH64_VALIDATE_LINK_REGISTER
|
||||
ret
|
||||
.size bn_mul_mont,.-bn_mul_mont
|
||||
.type __bn_sqr8x_mont,%function
|
||||
.align 5
|
||||
__bn_sqr8x_mont:
|
||||
// Not adding AARCH64_SIGN_LINK_REGISTER here because __bn_sqr8x_mont is jumped to
|
||||
// only from bn_mul_mont which has already signed the return address.
|
||||
cmp x1,x2
|
||||
b.ne __bn_mul4x_mont
|
||||
.Lsqr8x_mont:
|
||||
|
@ -977,11 +983,16 @@ __bn_sqr8x_mont:
|
|||
ldp x25,x26,[x29,#64]
|
||||
ldp x27,x28,[x29,#80]
|
||||
ldr x29,[sp],#128
|
||||
// x30 is popped earlier
|
||||
AARCH64_VALIDATE_LINK_REGISTER
|
||||
ret
|
||||
.size __bn_sqr8x_mont,.-__bn_sqr8x_mont
|
||||
.type __bn_mul4x_mont,%function
|
||||
.align 5
|
||||
__bn_mul4x_mont:
|
||||
// Not adding AARCH64_SIGN_LINK_REGISTER here because __bn_mul4x_mont is jumped to
|
||||
// only from bn_mul_mont or __bn_mul8x_mont which have already signed the
|
||||
// return address.
|
||||
stp x29,x30,[sp,#-128]!
|
||||
add x29,sp,#0
|
||||
stp x19,x20,[sp,#16]
|
||||
|
@ -1415,6 +1426,8 @@ __bn_mul4x_mont:
|
|||
ldp x25,x26,[x29,#64]
|
||||
ldp x27,x28,[x29,#80]
|
||||
ldr x29,[sp],#128
|
||||
// x30 is popped earlier
|
||||
AARCH64_VALIDATE_LINK_REGISTER
|
||||
ret
|
||||
.size __bn_mul4x_mont,.-__bn_mul4x_mont
|
||||
.byte 77,111,110,116,103,111,109,101,114,121,32,77,117,108,116,105,112,108,105,99,97,116,105,111,110,32,102,111,114,32,65,82,77,118,56,44,32,67,82,89,80,84,79,71,65,77,83,32,98,121,32,60,97,112,112,114,111,64,111,112,101,110,115,115,108,46,111,114,103,62,0
|
||||
|
|
|
@ -1,7 +1,7 @@
|
|||
#define BORINGSSL_PREFIX CBigNumBoringSSL
|
||||
#if defined(__i386__) && defined(__linux__)
|
||||
# This file is generated from a similarly-named Perl script in the BoringSSL
|
||||
# source tree. Do not edit by hand.
|
||||
// This file is generated from a similarly-named Perl script in the BoringSSL
|
||||
// source tree. Do not edit by hand.
|
||||
|
||||
#if defined(__i386__)
|
||||
#if defined(BORINGSSL_PREFIX)
|
||||
|
|
|
@ -101,26 +101,7 @@ void BN_free(BIGNUM *bn) {
|
|||
}
|
||||
|
||||
void BN_clear_free(BIGNUM *bn) {
|
||||
char should_free;
|
||||
|
||||
if (bn == NULL) {
|
||||
return;
|
||||
}
|
||||
|
||||
if (bn->d != NULL) {
|
||||
if ((bn->flags & BN_FLG_STATIC_DATA) == 0) {
|
||||
OPENSSL_free(bn->d);
|
||||
} else {
|
||||
OPENSSL_cleanse(bn->d, bn->dmax * sizeof(bn->d[0]));
|
||||
}
|
||||
}
|
||||
|
||||
should_free = (bn->flags & BN_FLG_MALLOCED) != 0;
|
||||
if (should_free) {
|
||||
OPENSSL_free(bn);
|
||||
} else {
|
||||
OPENSSL_cleanse(bn, sizeof(BIGNUM));
|
||||
}
|
||||
BN_free(bn);
|
||||
}
|
||||
|
||||
BIGNUM *BN_dup(const BIGNUM *src) {
|
||||
|
@ -302,6 +283,18 @@ int bn_set_words(BIGNUM *bn, const BN_ULONG *words, size_t num) {
|
|||
return 1;
|
||||
}
|
||||
|
||||
void bn_set_static_words(BIGNUM *bn, const BN_ULONG *words, size_t num) {
|
||||
if ((bn->flags & BN_FLG_STATIC_DATA) == 0) {
|
||||
OPENSSL_free(bn->d);
|
||||
}
|
||||
bn->d = (BN_ULONG *)words;
|
||||
|
||||
bn->width = num;
|
||||
bn->dmax = num;
|
||||
bn->neg = 0;
|
||||
bn->flags |= BN_FLG_STATIC_DATA;
|
||||
}
|
||||
|
||||
int bn_fits_in_words(const BIGNUM *bn, size_t num) {
|
||||
// All words beyond |num| must be zero.
|
||||
BN_ULONG mask = 0;
|
||||
|
|
|
@ -64,10 +64,10 @@
|
|||
#include "internal.h"
|
||||
|
||||
|
||||
#if !defined(BN_CAN_DIVIDE_ULLONG) && !defined(BN_CAN_USE_INLINE_ASM)
|
||||
// bn_div_words divides a double-width |h|,|l| by |d| and returns the result,
|
||||
// which must fit in a |BN_ULONG|.
|
||||
static BN_ULONG bn_div_words(BN_ULONG h, BN_ULONG l, BN_ULONG d) {
|
||||
OPENSSL_UNUSED static BN_ULONG bn_div_words(BN_ULONG h, BN_ULONG l,
|
||||
BN_ULONG d) {
|
||||
BN_ULONG dh, dl, q, ret = 0, th, tl, t;
|
||||
int i, count = 2;
|
||||
|
||||
|
@ -135,7 +135,6 @@ static BN_ULONG bn_div_words(BN_ULONG h, BN_ULONG l, BN_ULONG d) {
|
|||
ret |= q;
|
||||
return ret;
|
||||
}
|
||||
#endif // !defined(BN_CAN_DIVIDE_ULLONG) && !defined(BN_CAN_USE_INLINE_ASM)
|
||||
|
||||
static inline void bn_div_rem_words(BN_ULONG *quotient_out, BN_ULONG *rem_out,
|
||||
BN_ULONG n0, BN_ULONG n1, BN_ULONG d0) {
|
||||
|
@ -286,8 +285,10 @@ int BN_div(BIGNUM *quotient, BIGNUM *rem, const BIGNUM *numerator,
|
|||
// pointer to the 'top' of snum
|
||||
wnump = &(snum->d[num_n - 1]);
|
||||
|
||||
// Setup to 'res'
|
||||
res->neg = (numerator->neg ^ divisor->neg);
|
||||
// Setup |res|. |numerator| and |res| may alias, so we save |numerator->neg|
|
||||
// for later.
|
||||
const int numerator_neg = numerator->neg;
|
||||
res->neg = (numerator_neg ^ divisor->neg);
|
||||
if (!bn_wexpand(res, loop + 1)) {
|
||||
goto err;
|
||||
}
|
||||
|
@ -380,14 +381,11 @@ int BN_div(BIGNUM *quotient, BIGNUM *rem, const BIGNUM *numerator,
|
|||
bn_set_minimal_width(snum);
|
||||
|
||||
if (rem != NULL) {
|
||||
// Keep a copy of the neg flag in numerator because if |rem| == |numerator|
|
||||
// |BN_rshift| will overwrite it.
|
||||
int neg = numerator->neg;
|
||||
if (!BN_rshift(rem, snum, norm_shift)) {
|
||||
goto err;
|
||||
}
|
||||
if (!BN_is_zero(rem)) {
|
||||
rem->neg = neg;
|
||||
rem->neg = numerator_neg;
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -458,7 +456,7 @@ void bn_mod_add_words(BN_ULONG *r, const BN_ULONG *a, const BN_ULONG *b,
|
|||
|
||||
int bn_div_consttime(BIGNUM *quotient, BIGNUM *remainder,
|
||||
const BIGNUM *numerator, const BIGNUM *divisor,
|
||||
BN_CTX *ctx) {
|
||||
unsigned divisor_min_bits, BN_CTX *ctx) {
|
||||
if (BN_is_negative(numerator) || BN_is_negative(divisor)) {
|
||||
OPENSSL_PUT_ERROR(BN, BN_R_NEGATIVE_NUMBER);
|
||||
return 0;
|
||||
|
@ -498,8 +496,26 @@ int bn_div_consttime(BIGNUM *quotient, BIGNUM *remainder,
|
|||
r->neg = 0;
|
||||
|
||||
// Incorporate |numerator| into |r|, one bit at a time, reducing after each
|
||||
// step. At the start of each loop iteration, |r| < |divisor|
|
||||
for (int i = numerator->width - 1; i >= 0; i--) {
|
||||
// step. We maintain the invariant that |0 <= r < divisor| and
|
||||
// |q * divisor + r = n| where |n| is the portion of |numerator| incorporated
|
||||
// so far.
|
||||
//
|
||||
// First, we short-circuit the loop: if we know |divisor| has at least
|
||||
// |divisor_min_bits| bits, the top |divisor_min_bits - 1| can be incorporated
|
||||
// without reductions. This significantly speeds up |RSA_check_key|. For
|
||||
// simplicity, we round down to a whole number of words.
|
||||
assert(divisor_min_bits <= BN_num_bits(divisor));
|
||||
int initial_words = 0;
|
||||
if (divisor_min_bits > 0) {
|
||||
initial_words = (divisor_min_bits - 1) / BN_BITS2;
|
||||
if (initial_words > numerator->width) {
|
||||
initial_words = numerator->width;
|
||||
}
|
||||
OPENSSL_memcpy(r->d, numerator->d + numerator->width - initial_words,
|
||||
initial_words * sizeof(BN_ULONG));
|
||||
}
|
||||
|
||||
for (int i = numerator->width - initial_words - 1; i >= 0; i--) {
|
||||
for (int bit = BN_BITS2 - 1; bit >= 0; bit--) {
|
||||
// Incorporate the next bit of the numerator, by computing
|
||||
// r = 2*r or 2*r + 1. Note the result fits in one more word. We store the
|
||||
|
|
|
@ -157,10 +157,11 @@ int bn_lcm_consttime(BIGNUM *r, const BIGNUM *a, const BIGNUM *b, BN_CTX *ctx) {
|
|||
BN_CTX_start(ctx);
|
||||
unsigned shift;
|
||||
BIGNUM *gcd = BN_CTX_get(ctx);
|
||||
int ret = gcd != NULL &&
|
||||
int ret = gcd != NULL && //
|
||||
bn_mul_consttime(r, a, b, ctx) &&
|
||||
bn_gcd_consttime(gcd, &shift, a, b, ctx) &&
|
||||
bn_div_consttime(r, NULL, r, gcd, ctx) &&
|
||||
// |gcd| has a secret bit width.
|
||||
bn_div_consttime(r, NULL, r, gcd, /*divisor_min_bits=*/0, ctx) &&
|
||||
bn_rshift_secret_shift(r, r, shift, ctx);
|
||||
BN_CTX_end(ctx);
|
||||
return ret;
|
||||
|
|
|
@ -123,7 +123,7 @@
|
|||
#ifndef OPENSSL_HEADER_BN_INTERNAL_H
|
||||
#define OPENSSL_HEADER_BN_INTERNAL_H
|
||||
|
||||
#include <CBigNumBoringSSL_base.h>
|
||||
#include <CBigNumBoringSSL_bn.h>
|
||||
|
||||
#if defined(OPENSSL_X86_64) && defined(_MSC_VER)
|
||||
OPENSSL_MSVC_PRAGMA(warning(push, 3))
|
||||
|
@ -241,6 +241,14 @@ void bn_select_words(BN_ULONG *r, BN_ULONG mask, const BN_ULONG *a,
|
|||
// least significant word first.
|
||||
int bn_set_words(BIGNUM *bn, const BN_ULONG *words, size_t num);
|
||||
|
||||
// bn_set_static_words acts like |bn_set_words|, but doesn't copy the data. A
|
||||
// flag is set on |bn| so that |BN_free| won't attempt to free the data.
|
||||
//
|
||||
// The |STATIC_BIGNUM| macro is probably a better solution for this outside of
|
||||
// the FIPS module. Inside of the FIPS module that macro generates rel.ro data,
|
||||
// which doesn't work with FIPS requirements.
|
||||
void bn_set_static_words(BIGNUM *bn, const BN_ULONG *words, size_t num);
|
||||
|
||||
// bn_fits_in_words returns one if |bn| may be represented in |num| words, plus
|
||||
// a sign bit, and zero otherwise.
|
||||
int bn_fits_in_words(const BIGNUM *bn, size_t num);
|
||||
|
@ -289,7 +297,7 @@ void bn_mul_comba4(BN_ULONG r[8], const BN_ULONG a[4], const BN_ULONG b[4]);
|
|||
void bn_mul_comba8(BN_ULONG r[16], const BN_ULONG a[8], const BN_ULONG b[8]);
|
||||
|
||||
// bn_sqr_comba8 sets |r| to |a|^2.
|
||||
void bn_sqr_comba8(BN_ULONG r[16], const BN_ULONG a[4]);
|
||||
void bn_sqr_comba8(BN_ULONG r[16], const BN_ULONG a[8]);
|
||||
|
||||
// bn_sqr_comba4 sets |r| to |a|^2.
|
||||
void bn_sqr_comba4(BN_ULONG r[8], const BN_ULONG a[4]);
|
||||
|
@ -404,9 +412,19 @@ uint64_t bn_mont_n0(const BIGNUM *n);
|
|||
int bn_mod_exp_base_2_consttime(BIGNUM *r, unsigned p, const BIGNUM *n,
|
||||
BN_CTX *ctx);
|
||||
|
||||
#if defined(OPENSSL_X86_64) && defined(_MSC_VER)
|
||||
#if defined(_MSC_VER)
|
||||
#if defined(OPENSSL_X86_64)
|
||||
#define BN_UMULT_LOHI(low, high, a, b) ((low) = _umul128((a), (b), &(high)))
|
||||
#elif defined(OPENSSL_AARCH64)
|
||||
#define BN_UMULT_LOHI(low, high, a, b) \
|
||||
do { \
|
||||
const BN_ULONG _a = (a); \
|
||||
const BN_ULONG _b = (b); \
|
||||
(low) = _a * _b; \
|
||||
(high) = __umulh(_a, _b); \
|
||||
} while (0)
|
||||
#endif
|
||||
#endif // _MSC_VER
|
||||
|
||||
#if !defined(BN_ULLONG) && !defined(BN_UMULT_LOHI)
|
||||
#error "Either BN_ULLONG or BN_UMULT_LOHI must be defined on every platform."
|
||||
|
@ -534,12 +552,15 @@ int bn_sqr_consttime(BIGNUM *r, const BIGNUM *a, BN_CTX *ctx);
|
|||
// bn_div_consttime behaves like |BN_div|, but it rejects negative inputs and
|
||||
// treats both inputs, including their magnitudes, as secret. It is, as a
|
||||
// result, much slower than |BN_div| and should only be used for rare operations
|
||||
// where Montgomery reduction is not available.
|
||||
// where Montgomery reduction is not available. |divisor_min_bits| is a
|
||||
// public lower bound for |BN_num_bits(divisor)|. When |divisor|'s bit width is
|
||||
// public, this can speed up the operation.
|
||||
//
|
||||
// Note that |quotient->width| will be set pessimally to |numerator->width|.
|
||||
OPENSSL_EXPORT int bn_div_consttime(BIGNUM *quotient, BIGNUM *remainder,
|
||||
const BIGNUM *numerator,
|
||||
const BIGNUM *divisor, BN_CTX *ctx);
|
||||
const BIGNUM *divisor,
|
||||
unsigned divisor_min_bits, BN_CTX *ctx);
|
||||
|
||||
// bn_is_relatively_prime checks whether GCD(|x|, |y|) is one. On success, it
|
||||
// returns one and sets |*out_relatively_prime| to one if the GCD was one and
|
||||
|
|
|
@ -115,10 +115,6 @@
|
|||
#include "../../internal.h"
|
||||
|
||||
|
||||
// The quick sieve algorithm approach to weeding out primes is Philip
|
||||
// Zimmermann's, as implemented in PGP. I have had a read of his comments and
|
||||
// implemented my own version.
|
||||
|
||||
// kPrimes contains the first 1024 primes.
|
||||
static const uint16_t kPrimes[] = {
|
||||
2, 3, 5, 7, 11, 13, 17, 19, 23, 29, 31, 37,
|
||||
|
@ -363,6 +359,18 @@ static int probable_prime_dh(BIGNUM *rnd, int bits, const BIGNUM *add,
|
|||
static int probable_prime_dh_safe(BIGNUM *rnd, int bits, const BIGNUM *add,
|
||||
const BIGNUM *rem, BN_CTX *ctx);
|
||||
|
||||
BN_GENCB *BN_GENCB_new(void) {
|
||||
BN_GENCB *callback = OPENSSL_malloc(sizeof(BN_GENCB));
|
||||
if (callback == NULL) {
|
||||
OPENSSL_PUT_ERROR(BN, ERR_R_MALLOC_FAILURE);
|
||||
return NULL;
|
||||
}
|
||||
OPENSSL_memset(callback, 0, sizeof(BN_GENCB));
|
||||
return callback;
|
||||
}
|
||||
|
||||
void BN_GENCB_free(BN_GENCB *callback) { OPENSSL_free(callback); }
|
||||
|
||||
void BN_GENCB_set(BN_GENCB *callback,
|
||||
int (*f)(int event, int n, struct bn_gencb_st *),
|
||||
void *arg) {
|
||||
|
|
|
@ -75,10 +75,8 @@ BIGNUM *BN_mod_sqrt(BIGNUM *in, const BIGNUM *a, const BIGNUM *p, BN_CTX *ctx) {
|
|||
if (ret == NULL) {
|
||||
ret = BN_new();
|
||||
}
|
||||
if (ret == NULL) {
|
||||
goto end;
|
||||
}
|
||||
if (!BN_set_word(ret, BN_is_bit_set(a, 0))) {
|
||||
if (ret == NULL ||
|
||||
!BN_set_word(ret, BN_is_bit_set(a, 0))) {
|
||||
if (ret != in) {
|
||||
BN_free(ret);
|
||||
}
|
||||
|
@ -88,17 +86,15 @@ BIGNUM *BN_mod_sqrt(BIGNUM *in, const BIGNUM *a, const BIGNUM *p, BN_CTX *ctx) {
|
|||
}
|
||||
|
||||
OPENSSL_PUT_ERROR(BN, BN_R_P_IS_NOT_PRIME);
|
||||
return (NULL);
|
||||
return NULL;
|
||||
}
|
||||
|
||||
if (BN_is_zero(a) || BN_is_one(a)) {
|
||||
if (ret == NULL) {
|
||||
ret = BN_new();
|
||||
}
|
||||
if (ret == NULL) {
|
||||
goto end;
|
||||
}
|
||||
if (!BN_set_word(ret, BN_is_one(a))) {
|
||||
if (ret == NULL ||
|
||||
!BN_set_word(ret, BN_is_one(a))) {
|
||||
if (ret != in) {
|
||||
BN_free(ret);
|
||||
}
|
||||
|
|
|
@ -57,6 +57,7 @@
|
|||
#include <CBigNumBoringSSL_cipher.h>
|
||||
|
||||
#include <assert.h>
|
||||
#include <limits.h>
|
||||
#include <string.h>
|
||||
|
||||
#include <CBigNumBoringSSL_err.h>
|
||||
|
@ -224,7 +225,6 @@ int EVP_CipherInit_ex(EVP_CIPHER_CTX *ctx, const EVP_CIPHER *cipher,
|
|||
|
||||
ctx->buf_len = 0;
|
||||
ctx->final_used = 0;
|
||||
ctx->block_mask = ctx->cipher->block_size - 1;
|
||||
return 1;
|
||||
}
|
||||
|
||||
|
@ -238,16 +238,31 @@ int EVP_DecryptInit_ex(EVP_CIPHER_CTX *ctx, const EVP_CIPHER *cipher,
|
|||
return EVP_CipherInit_ex(ctx, cipher, impl, key, iv, 0);
|
||||
}
|
||||
|
||||
// block_remainder returns the number of bytes to remove from |len| to get a
|
||||
// multiple of |ctx|'s block size.
|
||||
static int block_remainder(const EVP_CIPHER_CTX *ctx, int len) {
|
||||
// |block_size| must be a power of two.
|
||||
assert(ctx->cipher->block_size != 0);
|
||||
assert((ctx->cipher->block_size & (ctx->cipher->block_size - 1)) == 0);
|
||||
return len & (ctx->cipher->block_size - 1);
|
||||
}
|
||||
|
||||
int EVP_EncryptUpdate(EVP_CIPHER_CTX *ctx, uint8_t *out, int *out_len,
|
||||
const uint8_t *in, int in_len) {
|
||||
int i, j, bl;
|
||||
// Ciphers that use blocks may write up to |bl| extra bytes. Ensure the output
|
||||
// does not overflow |*out_len|.
|
||||
int bl = ctx->cipher->block_size;
|
||||
if (bl > 1 && in_len > INT_MAX - bl) {
|
||||
OPENSSL_PUT_ERROR(CIPHER, ERR_R_OVERFLOW);
|
||||
return 0;
|
||||
}
|
||||
|
||||
if (ctx->cipher->flags & EVP_CIPH_FLAG_CUSTOM_CIPHER) {
|
||||
i = ctx->cipher->cipher(ctx, out, in, in_len);
|
||||
if (i < 0) {
|
||||
int ret = ctx->cipher->cipher(ctx, out, in, in_len);
|
||||
if (ret < 0) {
|
||||
return 0;
|
||||
} else {
|
||||
*out_len = i;
|
||||
*out_len = ret;
|
||||
}
|
||||
return 1;
|
||||
}
|
||||
|
@ -257,7 +272,7 @@ int EVP_EncryptUpdate(EVP_CIPHER_CTX *ctx, uint8_t *out, int *out_len,
|
|||
return in_len == 0;
|
||||
}
|
||||
|
||||
if (ctx->buf_len == 0 && (in_len & ctx->block_mask) == 0) {
|
||||
if (ctx->buf_len == 0 && block_remainder(ctx, in_len) == 0) {
|
||||
if (ctx->cipher->cipher(ctx, out, in, in_len)) {
|
||||
*out_len = in_len;
|
||||
return 1;
|
||||
|
@ -267,8 +282,7 @@ int EVP_EncryptUpdate(EVP_CIPHER_CTX *ctx, uint8_t *out, int *out_len,
|
|||
}
|
||||
}
|
||||
|
||||
i = ctx->buf_len;
|
||||
bl = ctx->cipher->block_size;
|
||||
int i = ctx->buf_len;
|
||||
assert(bl <= (int)sizeof(ctx->buf));
|
||||
if (i != 0) {
|
||||
if (bl - i > in_len) {
|
||||
|
@ -277,7 +291,7 @@ int EVP_EncryptUpdate(EVP_CIPHER_CTX *ctx, uint8_t *out, int *out_len,
|
|||
*out_len = 0;
|
||||
return 1;
|
||||
} else {
|
||||
j = bl - i;
|
||||
int j = bl - i;
|
||||
OPENSSL_memcpy(&ctx->buf[i], in, j);
|
||||
if (!ctx->cipher->cipher(ctx, out, ctx->buf, bl)) {
|
||||
return 0;
|
||||
|
@ -291,7 +305,7 @@ int EVP_EncryptUpdate(EVP_CIPHER_CTX *ctx, uint8_t *out, int *out_len,
|
|||
*out_len = 0;
|
||||
}
|
||||
|
||||
i = in_len & ctx->block_mask;
|
||||
i = block_remainder(ctx, in_len);
|
||||
in_len -= i;
|
||||
if (in_len > 0) {
|
||||
if (!ctx->cipher->cipher(ctx, out, in, in_len)) {
|
||||
|
@ -353,8 +367,13 @@ int EVP_EncryptFinal_ex(EVP_CIPHER_CTX *ctx, uint8_t *out, int *out_len) {
|
|||
|
||||
int EVP_DecryptUpdate(EVP_CIPHER_CTX *ctx, uint8_t *out, int *out_len,
|
||||
const uint8_t *in, int in_len) {
|
||||
int fix_len;
|
||||
unsigned int b;
|
||||
// Ciphers that use blocks may write up to |bl| extra bytes. Ensure the output
|
||||
// does not overflow |*out_len|.
|
||||
unsigned int b = ctx->cipher->block_size;
|
||||
if (b > 1 && in_len > INT_MAX - (int)b) {
|
||||
OPENSSL_PUT_ERROR(CIPHER, ERR_R_OVERFLOW);
|
||||
return 0;
|
||||
}
|
||||
|
||||
if (ctx->cipher->flags & EVP_CIPH_FLAG_CUSTOM_CIPHER) {
|
||||
int r = ctx->cipher->cipher(ctx, out, in, in_len);
|
||||
|
@ -376,15 +395,12 @@ int EVP_DecryptUpdate(EVP_CIPHER_CTX *ctx, uint8_t *out, int *out_len,
|
|||
return EVP_EncryptUpdate(ctx, out, out_len, in, in_len);
|
||||
}
|
||||
|
||||
b = ctx->cipher->block_size;
|
||||
assert(b <= sizeof(ctx->final));
|
||||
|
||||
int fix_len = 0;
|
||||
if (ctx->final_used) {
|
||||
OPENSSL_memcpy(out, ctx->final, b);
|
||||
out += b;
|
||||
fix_len = 1;
|
||||
} else {
|
||||
fix_len = 0;
|
||||
}
|
||||
|
||||
if (!EVP_EncryptUpdate(ctx, out, out_len, in, in_len)) {
|
||||
|
@ -613,6 +629,18 @@ int EVP_DecryptInit(EVP_CIPHER_CTX *ctx, const EVP_CIPHER *cipher,
|
|||
return EVP_CipherInit(ctx, cipher, key, iv, 0);
|
||||
}
|
||||
|
||||
int EVP_CipherFinal(EVP_CIPHER_CTX *ctx, uint8_t *out, int *out_len) {
|
||||
return EVP_CipherFinal_ex(ctx, out, out_len);
|
||||
}
|
||||
|
||||
int EVP_EncryptFinal(EVP_CIPHER_CTX *ctx, uint8_t *out, int *out_len) {
|
||||
return EVP_EncryptFinal_ex(ctx, out, out_len);
|
||||
}
|
||||
|
||||
int EVP_DecryptFinal(EVP_CIPHER_CTX *ctx, uint8_t *out, int *out_len) {
|
||||
return EVP_DecryptFinal_ex(ctx, out, out_len);
|
||||
}
|
||||
|
||||
int EVP_add_cipher_alias(const char *a, const char *b) {
|
||||
return 1;
|
||||
}
|
||||
|
|
|
@ -68,6 +68,8 @@
|
|||
OPENSSL_MSVC_PRAGMA(warning(push))
|
||||
OPENSSL_MSVC_PRAGMA(warning(disable: 4702)) // Unreachable code.
|
||||
|
||||
#define AES_GCM_NONCE_LENGTH 12
|
||||
|
||||
#if defined(BSAES)
|
||||
static void vpaes_ctr32_encrypt_blocks_with_bsaes(const uint8_t *in,
|
||||
uint8_t *out, size_t blocks,
|
||||
|
@ -139,10 +141,22 @@ typedef struct {
|
|||
|
||||
static int aes_init_key(EVP_CIPHER_CTX *ctx, const uint8_t *key,
|
||||
const uint8_t *iv, int enc) {
|
||||
int ret, mode;
|
||||
int ret;
|
||||
EVP_AES_KEY *dat = (EVP_AES_KEY *)ctx->cipher_data;
|
||||
const int mode = ctx->cipher->flags & EVP_CIPH_MODE_MASK;
|
||||
|
||||
if (mode == EVP_CIPH_CTR_MODE) {
|
||||
switch (ctx->key_len) {
|
||||
case 16:
|
||||
boringssl_fips_inc_counter(fips_counter_evp_aes_128_ctr);
|
||||
break;
|
||||
|
||||
case 32:
|
||||
boringssl_fips_inc_counter(fips_counter_evp_aes_256_ctr);
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
mode = ctx->cipher->flags & EVP_CIPH_MODE_MASK;
|
||||
if ((mode == EVP_CIPH_ECB_MODE || mode == EVP_CIPH_CBC_MODE) && !enc) {
|
||||
if (hwaes_capable()) {
|
||||
ret = aes_hw_set_decrypt_key(key, ctx->key_len * 8, &dat->ks.ks);
|
||||
|
@ -351,6 +365,17 @@ static int aes_gcm_init_key(EVP_CIPHER_CTX *ctx, const uint8_t *key,
|
|||
if (!iv && !key) {
|
||||
return 1;
|
||||
}
|
||||
|
||||
switch (ctx->key_len) {
|
||||
case 16:
|
||||
boringssl_fips_inc_counter(fips_counter_evp_aes_128_gcm);
|
||||
break;
|
||||
|
||||
case 32:
|
||||
boringssl_fips_inc_counter(fips_counter_evp_aes_256_gcm);
|
||||
break;
|
||||
}
|
||||
|
||||
if (key) {
|
||||
OPENSSL_memset(&gctx->gcm, 0, sizeof(gctx->gcm));
|
||||
gctx->ctr = aes_ctr_set_key(&gctx->ks.ks, &gctx->gcm.gcm_key, NULL, key,
|
||||
|
@ -630,7 +655,7 @@ DEFINE_LOCAL_DATA(EVP_CIPHER, aes_128_gcm_generic) {
|
|||
out->nid = NID_aes_128_gcm;
|
||||
out->block_size = 1;
|
||||
out->key_len = 16;
|
||||
out->iv_len = 12;
|
||||
out->iv_len = AES_GCM_NONCE_LENGTH;
|
||||
out->ctx_size = sizeof(EVP_AES_GCM_CTX) + EVP_AES_GCM_CTX_PADDING;
|
||||
out->flags = EVP_CIPH_GCM_MODE | EVP_CIPH_CUSTOM_IV | EVP_CIPH_CUSTOM_COPY |
|
||||
EVP_CIPH_FLAG_CUSTOM_CIPHER | EVP_CIPH_ALWAYS_CALL_INIT |
|
||||
|
@ -698,7 +723,7 @@ DEFINE_LOCAL_DATA(EVP_CIPHER, aes_192_gcm_generic) {
|
|||
out->nid = NID_aes_192_gcm;
|
||||
out->block_size = 1;
|
||||
out->key_len = 24;
|
||||
out->iv_len = 12;
|
||||
out->iv_len = AES_GCM_NONCE_LENGTH;
|
||||
out->ctx_size = sizeof(EVP_AES_GCM_CTX) + EVP_AES_GCM_CTX_PADDING;
|
||||
out->flags = EVP_CIPH_GCM_MODE | EVP_CIPH_CUSTOM_IV | EVP_CIPH_CUSTOM_COPY |
|
||||
EVP_CIPH_FLAG_CUSTOM_CIPHER | EVP_CIPH_ALWAYS_CALL_INIT |
|
||||
|
@ -766,7 +791,7 @@ DEFINE_LOCAL_DATA(EVP_CIPHER, aes_256_gcm_generic) {
|
|||
out->nid = NID_aes_256_gcm;
|
||||
out->block_size = 1;
|
||||
out->key_len = 32;
|
||||
out->iv_len = 12;
|
||||
out->iv_len = AES_GCM_NONCE_LENGTH;
|
||||
out->ctx_size = sizeof(EVP_AES_GCM_CTX) + EVP_AES_GCM_CTX_PADDING;
|
||||
out->flags = EVP_CIPH_GCM_MODE | EVP_CIPH_CUSTOM_IV | EVP_CIPH_CUSTOM_COPY |
|
||||
EVP_CIPH_FLAG_CUSTOM_CIPHER | EVP_CIPH_ALWAYS_CALL_INIT |
|
||||
|
@ -886,6 +911,16 @@ static int aead_aes_gcm_init_impl(struct aead_aes_gcm_ctx *gcm_ctx,
|
|||
size_t key_len, size_t tag_len) {
|
||||
const size_t key_bits = key_len * 8;
|
||||
|
||||
switch (key_bits) {
|
||||
case 128:
|
||||
boringssl_fips_inc_counter(fips_counter_evp_aes_128_gcm);
|
||||
break;
|
||||
|
||||
case 256:
|
||||
boringssl_fips_inc_counter(fips_counter_evp_aes_256_gcm);
|
||||
break;
|
||||
}
|
||||
|
||||
if (key_bits != 128 && key_bits != 192 && key_bits != 256) {
|
||||
OPENSSL_PUT_ERROR(CIPHER, CIPHER_R_BAD_KEY_LENGTH);
|
||||
return 0; // EVP_AEAD_CTX_init should catch this.
|
||||
|
@ -931,21 +966,19 @@ static int aead_aes_gcm_init(EVP_AEAD_CTX *ctx, const uint8_t *key,
|
|||
|
||||
static void aead_aes_gcm_cleanup(EVP_AEAD_CTX *ctx) {}
|
||||
|
||||
static int aead_aes_gcm_seal_scatter(const EVP_AEAD_CTX *ctx, uint8_t *out,
|
||||
uint8_t *out_tag, size_t *out_tag_len,
|
||||
size_t max_out_tag_len,
|
||||
const uint8_t *nonce, size_t nonce_len,
|
||||
const uint8_t *in, size_t in_len,
|
||||
const uint8_t *extra_in,
|
||||
size_t extra_in_len,
|
||||
const uint8_t *ad, size_t ad_len) {
|
||||
struct aead_aes_gcm_ctx *gcm_ctx = (struct aead_aes_gcm_ctx *) &ctx->state;
|
||||
|
||||
if (extra_in_len + ctx->tag_len < ctx->tag_len) {
|
||||
static int aead_aes_gcm_seal_scatter_impl(
|
||||
const struct aead_aes_gcm_ctx *gcm_ctx,
|
||||
uint8_t *out, uint8_t *out_tag, size_t *out_tag_len, size_t max_out_tag_len,
|
||||
const uint8_t *nonce, size_t nonce_len,
|
||||
const uint8_t *in, size_t in_len,
|
||||
const uint8_t *extra_in, size_t extra_in_len,
|
||||
const uint8_t *ad, size_t ad_len,
|
||||
size_t tag_len) {
|
||||
if (extra_in_len + tag_len < tag_len) {
|
||||
OPENSSL_PUT_ERROR(CIPHER, CIPHER_R_TOO_LARGE);
|
||||
return 0;
|
||||
}
|
||||
if (max_out_tag_len < extra_in_len + ctx->tag_len) {
|
||||
if (max_out_tag_len < extra_in_len + tag_len) {
|
||||
OPENSSL_PUT_ERROR(CIPHER, CIPHER_R_BUFFER_TOO_SMALL);
|
||||
return 0;
|
||||
}
|
||||
|
@ -989,18 +1022,35 @@ static int aead_aes_gcm_seal_scatter(const EVP_AEAD_CTX *ctx, uint8_t *out,
|
|||
}
|
||||
}
|
||||
|
||||
CRYPTO_gcm128_tag(&gcm, out_tag + extra_in_len, ctx->tag_len);
|
||||
*out_tag_len = ctx->tag_len + extra_in_len;
|
||||
CRYPTO_gcm128_tag(&gcm, out_tag + extra_in_len, tag_len);
|
||||
*out_tag_len = tag_len + extra_in_len;
|
||||
|
||||
return 1;
|
||||
}
|
||||
|
||||
static int aead_aes_gcm_open_gather(const EVP_AEAD_CTX *ctx, uint8_t *out,
|
||||
const uint8_t *nonce, size_t nonce_len,
|
||||
const uint8_t *in, size_t in_len,
|
||||
const uint8_t *in_tag, size_t in_tag_len,
|
||||
const uint8_t *ad, size_t ad_len) {
|
||||
struct aead_aes_gcm_ctx *gcm_ctx = (struct aead_aes_gcm_ctx *) &ctx->state;
|
||||
static int aead_aes_gcm_seal_scatter(const EVP_AEAD_CTX *ctx, uint8_t *out,
|
||||
uint8_t *out_tag, size_t *out_tag_len,
|
||||
size_t max_out_tag_len,
|
||||
const uint8_t *nonce, size_t nonce_len,
|
||||
const uint8_t *in, size_t in_len,
|
||||
const uint8_t *extra_in,
|
||||
size_t extra_in_len,
|
||||
const uint8_t *ad, size_t ad_len) {
|
||||
const struct aead_aes_gcm_ctx *gcm_ctx =
|
||||
(const struct aead_aes_gcm_ctx *)&ctx->state;
|
||||
return aead_aes_gcm_seal_scatter_impl(
|
||||
gcm_ctx, out, out_tag, out_tag_len, max_out_tag_len, nonce, nonce_len, in,
|
||||
in_len, extra_in, extra_in_len, ad, ad_len, ctx->tag_len);
|
||||
}
|
||||
|
||||
static int aead_aes_gcm_open_gather_impl(const struct aead_aes_gcm_ctx *gcm_ctx,
|
||||
uint8_t *out,
|
||||
const uint8_t *nonce, size_t nonce_len,
|
||||
const uint8_t *in, size_t in_len,
|
||||
const uint8_t *in_tag,
|
||||
size_t in_tag_len,
|
||||
const uint8_t *ad, size_t ad_len,
|
||||
size_t tag_len) {
|
||||
uint8_t tag[EVP_AEAD_AES_GCM_TAG_LEN];
|
||||
|
||||
if (nonce_len == 0) {
|
||||
|
@ -1008,7 +1058,7 @@ static int aead_aes_gcm_open_gather(const EVP_AEAD_CTX *ctx, uint8_t *out,
|
|||
return 0;
|
||||
}
|
||||
|
||||
if (in_tag_len != ctx->tag_len) {
|
||||
if (in_tag_len != tag_len) {
|
||||
OPENSSL_PUT_ERROR(CIPHER, CIPHER_R_BAD_DECRYPT);
|
||||
return 0;
|
||||
}
|
||||
|
@ -1035,8 +1085,8 @@ static int aead_aes_gcm_open_gather(const EVP_AEAD_CTX *ctx, uint8_t *out,
|
|||
}
|
||||
}
|
||||
|
||||
CRYPTO_gcm128_tag(&gcm, tag, ctx->tag_len);
|
||||
if (CRYPTO_memcmp(tag, in_tag, ctx->tag_len) != 0) {
|
||||
CRYPTO_gcm128_tag(&gcm, tag, tag_len);
|
||||
if (CRYPTO_memcmp(tag, in_tag, tag_len) != 0) {
|
||||
OPENSSL_PUT_ERROR(CIPHER, CIPHER_R_BAD_DECRYPT);
|
||||
return 0;
|
||||
}
|
||||
|
@ -1044,11 +1094,22 @@ static int aead_aes_gcm_open_gather(const EVP_AEAD_CTX *ctx, uint8_t *out,
|
|||
return 1;
|
||||
}
|
||||
|
||||
static int aead_aes_gcm_open_gather(const EVP_AEAD_CTX *ctx, uint8_t *out,
|
||||
const uint8_t *nonce, size_t nonce_len,
|
||||
const uint8_t *in, size_t in_len,
|
||||
const uint8_t *in_tag, size_t in_tag_len,
|
||||
const uint8_t *ad, size_t ad_len) {
|
||||
struct aead_aes_gcm_ctx *gcm_ctx = (struct aead_aes_gcm_ctx *)&ctx->state;
|
||||
return aead_aes_gcm_open_gather_impl(gcm_ctx, out, nonce, nonce_len, in,
|
||||
in_len, in_tag, in_tag_len, ad, ad_len,
|
||||
ctx->tag_len);
|
||||
}
|
||||
|
||||
DEFINE_METHOD_FUNCTION(EVP_AEAD, EVP_aead_aes_128_gcm) {
|
||||
memset(out, 0, sizeof(EVP_AEAD));
|
||||
|
||||
out->key_len = 16;
|
||||
out->nonce_len = 12;
|
||||
out->nonce_len = AES_GCM_NONCE_LENGTH;
|
||||
out->overhead = EVP_AEAD_AES_GCM_TAG_LEN;
|
||||
out->max_tag_len = EVP_AEAD_AES_GCM_TAG_LEN;
|
||||
out->seal_scatter_supports_extra_in = 1;
|
||||
|
@ -1063,7 +1124,7 @@ DEFINE_METHOD_FUNCTION(EVP_AEAD, EVP_aead_aes_192_gcm) {
|
|||
memset(out, 0, sizeof(EVP_AEAD));
|
||||
|
||||
out->key_len = 24;
|
||||
out->nonce_len = 12;
|
||||
out->nonce_len = AES_GCM_NONCE_LENGTH;
|
||||
out->overhead = EVP_AEAD_AES_GCM_TAG_LEN;
|
||||
out->max_tag_len = EVP_AEAD_AES_GCM_TAG_LEN;
|
||||
out->seal_scatter_supports_extra_in = 1;
|
||||
|
@ -1078,7 +1139,7 @@ DEFINE_METHOD_FUNCTION(EVP_AEAD, EVP_aead_aes_256_gcm) {
|
|||
memset(out, 0, sizeof(EVP_AEAD));
|
||||
|
||||
out->key_len = 32;
|
||||
out->nonce_len = 12;
|
||||
out->nonce_len = AES_GCM_NONCE_LENGTH;
|
||||
out->overhead = EVP_AEAD_AES_GCM_TAG_LEN;
|
||||
out->max_tag_len = EVP_AEAD_AES_GCM_TAG_LEN;
|
||||
out->seal_scatter_supports_extra_in = 1;
|
||||
|
@ -1089,6 +1150,116 @@ DEFINE_METHOD_FUNCTION(EVP_AEAD, EVP_aead_aes_256_gcm) {
|
|||
out->open_gather = aead_aes_gcm_open_gather;
|
||||
}
|
||||
|
||||
static int aead_aes_gcm_init_randnonce(EVP_AEAD_CTX *ctx, const uint8_t *key,
|
||||
size_t key_len,
|
||||
size_t requested_tag_len) {
|
||||
if (requested_tag_len != EVP_AEAD_DEFAULT_TAG_LENGTH) {
|
||||
if (requested_tag_len < AES_GCM_NONCE_LENGTH) {
|
||||
OPENSSL_PUT_ERROR(CIPHER, CIPHER_R_BUFFER_TOO_SMALL);
|
||||
return 0;
|
||||
}
|
||||
requested_tag_len -= AES_GCM_NONCE_LENGTH;
|
||||
}
|
||||
|
||||
if (!aead_aes_gcm_init(ctx, key, key_len, requested_tag_len)) {
|
||||
return 0;
|
||||
}
|
||||
|
||||
ctx->tag_len += AES_GCM_NONCE_LENGTH;
|
||||
return 1;
|
||||
}
|
||||
|
||||
static int aead_aes_gcm_seal_scatter_randnonce(
|
||||
const EVP_AEAD_CTX *ctx,
|
||||
uint8_t *out, uint8_t *out_tag, size_t *out_tag_len, size_t max_out_tag_len,
|
||||
const uint8_t *external_nonce, size_t external_nonce_len,
|
||||
const uint8_t *in, size_t in_len,
|
||||
const uint8_t *extra_in, size_t extra_in_len,
|
||||
const uint8_t *ad, size_t ad_len) {
|
||||
if (external_nonce_len != 0) {
|
||||
OPENSSL_PUT_ERROR(CIPHER, CIPHER_R_INVALID_NONCE_SIZE);
|
||||
return 0;
|
||||
}
|
||||
|
||||
uint8_t nonce[AES_GCM_NONCE_LENGTH];
|
||||
if (max_out_tag_len < sizeof(nonce)) {
|
||||
OPENSSL_PUT_ERROR(CIPHER, CIPHER_R_BUFFER_TOO_SMALL);
|
||||
return 0;
|
||||
}
|
||||
|
||||
RAND_bytes(nonce, sizeof(nonce));
|
||||
const struct aead_aes_gcm_ctx *gcm_ctx =
|
||||
(const struct aead_aes_gcm_ctx *)&ctx->state;
|
||||
if (!aead_aes_gcm_seal_scatter_impl(gcm_ctx, out, out_tag, out_tag_len,
|
||||
max_out_tag_len - AES_GCM_NONCE_LENGTH,
|
||||
nonce, sizeof(nonce), in, in_len,
|
||||
extra_in, extra_in_len, ad, ad_len,
|
||||
ctx->tag_len - AES_GCM_NONCE_LENGTH)) {
|
||||
return 0;
|
||||
}
|
||||
|
||||
assert(*out_tag_len + sizeof(nonce) <= max_out_tag_len);
|
||||
memcpy(out_tag + *out_tag_len, nonce, sizeof(nonce));
|
||||
*out_tag_len += sizeof(nonce);
|
||||
|
||||
return 1;
|
||||
}
|
||||
|
||||
static int aead_aes_gcm_open_gather_randnonce(
|
||||
const EVP_AEAD_CTX *ctx, uint8_t *out,
|
||||
const uint8_t *external_nonce, size_t external_nonce_len,
|
||||
const uint8_t *in, size_t in_len,
|
||||
const uint8_t *in_tag, size_t in_tag_len,
|
||||
const uint8_t *ad, size_t ad_len) {
|
||||
if (external_nonce_len != 0) {
|
||||
OPENSSL_PUT_ERROR(CIPHER, CIPHER_R_INVALID_NONCE_SIZE);
|
||||
return 0;
|
||||
}
|
||||
|
||||
if (in_tag_len < AES_GCM_NONCE_LENGTH) {
|
||||
OPENSSL_PUT_ERROR(CIPHER, CIPHER_R_BAD_DECRYPT);
|
||||
return 0;
|
||||
}
|
||||
const uint8_t *nonce = in_tag + in_tag_len - AES_GCM_NONCE_LENGTH;
|
||||
|
||||
const struct aead_aes_gcm_ctx *gcm_ctx =
|
||||
(const struct aead_aes_gcm_ctx *)&ctx->state;
|
||||
return aead_aes_gcm_open_gather_impl(
|
||||
gcm_ctx, out, nonce, AES_GCM_NONCE_LENGTH, in, in_len, in_tag,
|
||||
in_tag_len - AES_GCM_NONCE_LENGTH, ad, ad_len,
|
||||
ctx->tag_len - AES_GCM_NONCE_LENGTH);
|
||||
}
|
||||
|
||||
DEFINE_METHOD_FUNCTION(EVP_AEAD, EVP_aead_aes_128_gcm_randnonce) {
|
||||
memset(out, 0, sizeof(EVP_AEAD));
|
||||
|
||||
out->key_len = 16;
|
||||
out->nonce_len = 0;
|
||||
out->overhead = EVP_AEAD_AES_GCM_TAG_LEN + AES_GCM_NONCE_LENGTH;
|
||||
out->max_tag_len = EVP_AEAD_AES_GCM_TAG_LEN + AES_GCM_NONCE_LENGTH;
|
||||
out->seal_scatter_supports_extra_in = 1;
|
||||
|
||||
out->init = aead_aes_gcm_init_randnonce;
|
||||
out->cleanup = aead_aes_gcm_cleanup;
|
||||
out->seal_scatter = aead_aes_gcm_seal_scatter_randnonce;
|
||||
out->open_gather = aead_aes_gcm_open_gather_randnonce;
|
||||
}
|
||||
|
||||
DEFINE_METHOD_FUNCTION(EVP_AEAD, EVP_aead_aes_256_gcm_randnonce) {
|
||||
memset(out, 0, sizeof(EVP_AEAD));
|
||||
|
||||
out->key_len = 32;
|
||||
out->nonce_len = 0;
|
||||
out->overhead = EVP_AEAD_AES_GCM_TAG_LEN + AES_GCM_NONCE_LENGTH;
|
||||
out->max_tag_len = EVP_AEAD_AES_GCM_TAG_LEN + AES_GCM_NONCE_LENGTH;
|
||||
out->seal_scatter_supports_extra_in = 1;
|
||||
|
||||
out->init = aead_aes_gcm_init_randnonce;
|
||||
out->cleanup = aead_aes_gcm_cleanup;
|
||||
out->seal_scatter = aead_aes_gcm_seal_scatter_randnonce;
|
||||
out->open_gather = aead_aes_gcm_open_gather_randnonce;
|
||||
}
|
||||
|
||||
struct aead_aes_gcm_tls12_ctx {
|
||||
struct aead_aes_gcm_ctx gcm_ctx;
|
||||
uint64_t min_next_nonce;
|
||||
|
@ -1128,7 +1299,7 @@ static int aead_aes_gcm_tls12_seal_scatter(
|
|||
struct aead_aes_gcm_tls12_ctx *gcm_ctx =
|
||||
(struct aead_aes_gcm_tls12_ctx *) &ctx->state;
|
||||
|
||||
if (nonce_len != 12) {
|
||||
if (nonce_len != AES_GCM_NONCE_LENGTH) {
|
||||
OPENSSL_PUT_ERROR(CIPHER, CIPHER_R_UNSUPPORTED_NONCE_SIZE);
|
||||
return 0;
|
||||
}
|
||||
|
@ -1155,7 +1326,7 @@ DEFINE_METHOD_FUNCTION(EVP_AEAD, EVP_aead_aes_128_gcm_tls12) {
|
|||
memset(out, 0, sizeof(EVP_AEAD));
|
||||
|
||||
out->key_len = 16;
|
||||
out->nonce_len = 12;
|
||||
out->nonce_len = AES_GCM_NONCE_LENGTH;
|
||||
out->overhead = EVP_AEAD_AES_GCM_TAG_LEN;
|
||||
out->max_tag_len = EVP_AEAD_AES_GCM_TAG_LEN;
|
||||
out->seal_scatter_supports_extra_in = 1;
|
||||
|
@ -1170,7 +1341,7 @@ DEFINE_METHOD_FUNCTION(EVP_AEAD, EVP_aead_aes_256_gcm_tls12) {
|
|||
memset(out, 0, sizeof(EVP_AEAD));
|
||||
|
||||
out->key_len = 32;
|
||||
out->nonce_len = 12;
|
||||
out->nonce_len = AES_GCM_NONCE_LENGTH;
|
||||
out->overhead = EVP_AEAD_AES_GCM_TAG_LEN;
|
||||
out->max_tag_len = EVP_AEAD_AES_GCM_TAG_LEN;
|
||||
out->seal_scatter_supports_extra_in = 1;
|
||||
|
@ -1223,7 +1394,7 @@ static int aead_aes_gcm_tls13_seal_scatter(
|
|||
struct aead_aes_gcm_tls13_ctx *gcm_ctx =
|
||||
(struct aead_aes_gcm_tls13_ctx *) &ctx->state;
|
||||
|
||||
if (nonce_len != 12) {
|
||||
if (nonce_len != AES_GCM_NONCE_LENGTH) {
|
||||
OPENSSL_PUT_ERROR(CIPHER, CIPHER_R_UNSUPPORTED_NONCE_SIZE);
|
||||
return 0;
|
||||
}
|
||||
|
@ -1261,7 +1432,7 @@ DEFINE_METHOD_FUNCTION(EVP_AEAD, EVP_aead_aes_128_gcm_tls13) {
|
|||
memset(out, 0, sizeof(EVP_AEAD));
|
||||
|
||||
out->key_len = 16;
|
||||
out->nonce_len = 12;
|
||||
out->nonce_len = AES_GCM_NONCE_LENGTH;
|
||||
out->overhead = EVP_AEAD_AES_GCM_TAG_LEN;
|
||||
out->max_tag_len = EVP_AEAD_AES_GCM_TAG_LEN;
|
||||
out->seal_scatter_supports_extra_in = 1;
|
||||
|
@ -1276,7 +1447,7 @@ DEFINE_METHOD_FUNCTION(EVP_AEAD, EVP_aead_aes_256_gcm_tls13) {
|
|||
memset(out, 0, sizeof(EVP_AEAD));
|
||||
|
||||
out->key_len = 32;
|
||||
out->nonce_len = 12;
|
||||
out->nonce_len = AES_GCM_NONCE_LENGTH;
|
||||
out->overhead = EVP_AEAD_AES_GCM_TAG_LEN;
|
||||
out->max_tag_len = EVP_AEAD_AES_GCM_TAG_LEN;
|
||||
out->seal_scatter_supports_extra_in = 1;
|
||||
|
|
|
@ -1,7 +1,7 @@
|
|||
#define BORINGSSL_PREFIX CBigNumBoringSSL
|
||||
#if defined(__i386__) && defined(__linux__)
|
||||
# This file is generated from a similarly-named Perl script in the BoringSSL
|
||||
# source tree. Do not edit by hand.
|
||||
// This file is generated from a similarly-named Perl script in the BoringSSL
|
||||
// source tree. Do not edit by hand.
|
||||
|
||||
#if defined(__i386__)
|
||||
#if defined(BORINGSSL_PREFIX)
|
||||
|
|
|
@ -14,6 +14,8 @@
|
|||
#if defined(BORINGSSL_PREFIX)
|
||||
#include <CBigNumBoringSSL_boringssl_prefix_symbols_asm.h>
|
||||
#endif
|
||||
#include <CBigNumBoringSSL_arm_arch.h>
|
||||
|
||||
.text
|
||||
|
||||
.globl _gcm_init_neon
|
||||
|
@ -21,6 +23,7 @@
|
|||
|
||||
.align 4
|
||||
_gcm_init_neon:
|
||||
AARCH64_VALID_CALL_TARGET
|
||||
// This function is adapted from gcm_init_v8. xC2 is t3.
|
||||
ld1 {v17.2d}, [x1] // load H
|
||||
movi v19.16b, #0xe1
|
||||
|
@ -46,6 +49,7 @@ _gcm_init_neon:
|
|||
|
||||
.align 4
|
||||
_gcm_gmult_neon:
|
||||
AARCH64_VALID_CALL_TARGET
|
||||
ld1 {v3.16b}, [x0] // load Xi
|
||||
ld1 {v5.1d}, [x1], #8 // load twisted H
|
||||
ld1 {v6.1d}, [x1]
|
||||
|
@ -65,6 +69,7 @@ _gcm_gmult_neon:
|
|||
|
||||
.align 4
|
||||
_gcm_ghash_neon:
|
||||
AARCH64_VALID_CALL_TARGET
|
||||
ld1 {v0.16b}, [x0] // load Xi
|
||||
ld1 {v5.1d}, [x1], #8 // load twisted H
|
||||
ld1 {v6.1d}, [x1]
|
||||
|
|
|
@ -15,6 +15,8 @@
|
|||
#if defined(BORINGSSL_PREFIX)
|
||||
#include <CBigNumBoringSSL_boringssl_prefix_symbols_asm.h>
|
||||
#endif
|
||||
#include <CBigNumBoringSSL_arm_arch.h>
|
||||
|
||||
.text
|
||||
|
||||
.globl gcm_init_neon
|
||||
|
@ -22,6 +24,7 @@
|
|||
.type gcm_init_neon,%function
|
||||
.align 4
|
||||
gcm_init_neon:
|
||||
AARCH64_VALID_CALL_TARGET
|
||||
// This function is adapted from gcm_init_v8. xC2 is t3.
|
||||
ld1 {v17.2d}, [x1] // load H
|
||||
movi v19.16b, #0xe1
|
||||
|
@ -47,6 +50,7 @@ gcm_init_neon:
|
|||
.type gcm_gmult_neon,%function
|
||||
.align 4
|
||||
gcm_gmult_neon:
|
||||
AARCH64_VALID_CALL_TARGET
|
||||
ld1 {v3.16b}, [x0] // load Xi
|
||||
ld1 {v5.1d}, [x1], #8 // load twisted H
|
||||
ld1 {v6.1d}, [x1]
|
||||
|
@ -66,6 +70,7 @@ gcm_gmult_neon:
|
|||
.type gcm_ghash_neon,%function
|
||||
.align 4
|
||||
gcm_ghash_neon:
|
||||
AARCH64_VALID_CALL_TARGET
|
||||
ld1 {v0.16b}, [x0] // load Xi
|
||||
ld1 {v5.1d}, [x1], #8 // load twisted H
|
||||
ld1 {v6.1d}, [x1]
|
||||
|
|
|
@ -1,7 +1,7 @@
|
|||
#define BORINGSSL_PREFIX CBigNumBoringSSL
|
||||
#if defined(__i386__) && defined(__linux__)
|
||||
# This file is generated from a similarly-named Perl script in the BoringSSL
|
||||
# source tree. Do not edit by hand.
|
||||
// This file is generated from a similarly-named Perl script in the BoringSSL
|
||||
// source tree. Do not edit by hand.
|
||||
|
||||
#if defined(__i386__)
|
||||
#if defined(BORINGSSL_PREFIX)
|
||||
|
|
|
@ -1,7 +1,7 @@
|
|||
#define BORINGSSL_PREFIX CBigNumBoringSSL
|
||||
#if defined(__x86_64__) && defined(__linux__)
|
||||
# This file is generated from a similarly-named Perl script in the BoringSSL
|
||||
# source tree. Do not edit by hand.
|
||||
// This file is generated from a similarly-named Perl script in the BoringSSL
|
||||
// source tree. Do not edit by hand.
|
||||
|
||||
#if defined(__has_feature)
|
||||
#if __has_feature(memory_sanitizer) && !defined(OPENSSL_NO_ASM)
|
||||
|
|
|
@ -1,7 +1,7 @@
|
|||
#define BORINGSSL_PREFIX CBigNumBoringSSL
|
||||
#if defined(__x86_64__) && defined(__APPLE__)
|
||||
# This file is generated from a similarly-named Perl script in the BoringSSL
|
||||
# source tree. Do not edit by hand.
|
||||
// This file is generated from a similarly-named Perl script in the BoringSSL
|
||||
// source tree. Do not edit by hand.
|
||||
|
||||
#if defined(__has_feature)
|
||||
#if __has_feature(memory_sanitizer) && !defined(OPENSSL_NO_ASM)
|
||||
|
|
|
@ -1,7 +1,7 @@
|
|||
#define BORINGSSL_PREFIX CBigNumBoringSSL
|
||||
#if defined(__i386__) && defined(__linux__)
|
||||
# This file is generated from a similarly-named Perl script in the BoringSSL
|
||||
# source tree. Do not edit by hand.
|
||||
// This file is generated from a similarly-named Perl script in the BoringSSL
|
||||
// source tree. Do not edit by hand.
|
||||
|
||||
#if defined(__i386__)
|
||||
#if defined(BORINGSSL_PREFIX)
|
||||
|
|
|
@ -1,7 +1,7 @@
|
|||
#define BORINGSSL_PREFIX CBigNumBoringSSL
|
||||
#if defined(__x86_64__) && defined(__linux__)
|
||||
# This file is generated from a similarly-named Perl script in the BoringSSL
|
||||
# source tree. Do not edit by hand.
|
||||
// This file is generated from a similarly-named Perl script in the BoringSSL
|
||||
// source tree. Do not edit by hand.
|
||||
|
||||
#if defined(__has_feature)
|
||||
#if __has_feature(memory_sanitizer) && !defined(OPENSSL_NO_ASM)
|
||||
|
|
|
@ -1,7 +1,7 @@
|
|||
#define BORINGSSL_PREFIX CBigNumBoringSSL
|
||||
#if defined(__x86_64__) && defined(__APPLE__)
|
||||
# This file is generated from a similarly-named Perl script in the BoringSSL
|
||||
# source tree. Do not edit by hand.
|
||||
// This file is generated from a similarly-named Perl script in the BoringSSL
|
||||
// source tree. Do not edit by hand.
|
||||
|
||||
#if defined(__has_feature)
|
||||
#if __has_feature(memory_sanitizer) && !defined(OPENSSL_NO_ASM)
|
||||
|
|
|
@ -16,6 +16,7 @@
|
|||
#endif
|
||||
#include <CBigNumBoringSSL_arm_arch.h>
|
||||
|
||||
#if __ARM_MAX_ARCH__>=7
|
||||
.text
|
||||
|
||||
.code 32
|
||||
|
@ -27,6 +28,7 @@
|
|||
#endif
|
||||
.align 4
|
||||
_gcm_init_v8:
|
||||
AARCH64_VALID_CALL_TARGET
|
||||
vld1.64 {q9},[r1] @ load input H
|
||||
vmov.i8 q11,#0xe1
|
||||
vshl.i64 q11,q11,#57 @ 0xc2.0
|
||||
|
@ -69,8 +71,7 @@ _gcm_init_v8:
|
|||
vext.8 q9,q14,q14,#8 @ Karatsuba pre-processing
|
||||
veor q9,q9,q14
|
||||
vext.8 q13,q8,q9,#8 @ pack Karatsuba pre-processed
|
||||
vst1.64 {q13,q14},[r0] @ store Htable[1..2]
|
||||
|
||||
vst1.64 {q13,q14},[r0]! @ store Htable[1..2]
|
||||
bx lr
|
||||
|
||||
.globl _gcm_gmult_v8
|
||||
|
@ -80,6 +81,7 @@ _gcm_init_v8:
|
|||
#endif
|
||||
.align 4
|
||||
_gcm_gmult_v8:
|
||||
AARCH64_VALID_CALL_TARGET
|
||||
vld1.64 {q9},[r0] @ load Xi
|
||||
vmov.i8 q11,#0xe1
|
||||
vld1.64 {q12,q13},[r1] @ load twisted H, ...
|
||||
|
@ -124,6 +126,7 @@ _gcm_gmult_v8:
|
|||
#endif
|
||||
.align 4
|
||||
_gcm_ghash_v8:
|
||||
AARCH64_VALID_CALL_TARGET
|
||||
vstmdb sp!,{d8,d9,d10,d11,d12,d13,d14,d15} @ 32-bit ABI says so
|
||||
vld1.64 {q0},[r0] @ load [rotated] Xi
|
||||
@ "[rotated]" means that
|
||||
|
@ -255,6 +258,7 @@ Ldone_v8:
|
|||
.byte 71,72,65,83,72,32,102,111,114,32,65,82,77,118,56,44,32,67,82,89,80,84,79,71,65,77,83,32,98,121,32,60,97,112,112,114,111,64,111,112,101,110,115,115,108,46,111,114,103,62,0
|
||||
.align 2
|
||||
.align 2
|
||||
#endif
|
||||
#endif // !OPENSSL_NO_ASM
|
||||
#endif // defined(__arm__) && defined(__APPLE__)
|
||||
#if defined(__linux__) && defined(__ELF__)
|
||||
|
|
|
@ -17,6 +17,7 @@
|
|||
#endif
|
||||
#include <CBigNumBoringSSL_arm_arch.h>
|
||||
|
||||
#if __ARM_MAX_ARCH__>=7
|
||||
.text
|
||||
.fpu neon
|
||||
.code 32
|
||||
|
@ -26,6 +27,7 @@
|
|||
.type gcm_init_v8,%function
|
||||
.align 4
|
||||
gcm_init_v8:
|
||||
AARCH64_VALID_CALL_TARGET
|
||||
vld1.64 {q9},[r1] @ load input H
|
||||
vmov.i8 q11,#0xe1
|
||||
vshl.i64 q11,q11,#57 @ 0xc2.0
|
||||
|
@ -68,8 +70,7 @@ gcm_init_v8:
|
|||
vext.8 q9,q14,q14,#8 @ Karatsuba pre-processing
|
||||
veor q9,q9,q14
|
||||
vext.8 q13,q8,q9,#8 @ pack Karatsuba pre-processed
|
||||
vst1.64 {q13,q14},[r0] @ store Htable[1..2]
|
||||
|
||||
vst1.64 {q13,q14},[r0]! @ store Htable[1..2]
|
||||
bx lr
|
||||
.size gcm_init_v8,.-gcm_init_v8
|
||||
.globl gcm_gmult_v8
|
||||
|
@ -77,6 +78,7 @@ gcm_init_v8:
|
|||
.type gcm_gmult_v8,%function
|
||||
.align 4
|
||||
gcm_gmult_v8:
|
||||
AARCH64_VALID_CALL_TARGET
|
||||
vld1.64 {q9},[r0] @ load Xi
|
||||
vmov.i8 q11,#0xe1
|
||||
vld1.64 {q12,q13},[r1] @ load twisted H, ...
|
||||
|
@ -119,6 +121,7 @@ gcm_gmult_v8:
|
|||
.type gcm_ghash_v8,%function
|
||||
.align 4
|
||||
gcm_ghash_v8:
|
||||
AARCH64_VALID_CALL_TARGET
|
||||
vstmdb sp!,{d8,d9,d10,d11,d12,d13,d14,d15} @ 32-bit ABI says so
|
||||
vld1.64 {q0},[r0] @ load [rotated] Xi
|
||||
@ "[rotated]" means that
|
||||
|
@ -251,6 +254,7 @@ gcm_ghash_v8:
|
|||
.align 2
|
||||
.align 2
|
||||
#endif
|
||||
#endif
|
||||
#endif // !OPENSSL_NO_ASM
|
||||
.section .note.GNU-stack,"",%progbits
|
||||
#endif // defined(__arm__) && defined(__linux__)
|
||||
|
|
|
@ -16,6 +16,7 @@
|
|||
#endif
|
||||
#include <CBigNumBoringSSL_arm_arch.h>
|
||||
|
||||
#if __ARM_MAX_ARCH__>=7
|
||||
.text
|
||||
|
||||
.globl _gcm_init_v8
|
||||
|
@ -23,6 +24,7 @@
|
|||
|
||||
.align 4
|
||||
_gcm_init_v8:
|
||||
AARCH64_VALID_CALL_TARGET
|
||||
ld1 {v17.2d},[x1] //load input H
|
||||
movi v19.16b,#0xe1
|
||||
shl v19.2d,v19.2d,#57 //0xc2.0
|
||||
|
@ -65,8 +67,48 @@ _gcm_init_v8:
|
|||
ext v17.16b,v22.16b,v22.16b,#8 //Karatsuba pre-processing
|
||||
eor v17.16b,v17.16b,v22.16b
|
||||
ext v21.16b,v16.16b,v17.16b,#8 //pack Karatsuba pre-processed
|
||||
st1 {v21.2d,v22.2d},[x0] //store Htable[1..2]
|
||||
st1 {v21.2d,v22.2d},[x0],#32 //store Htable[1..2]
|
||||
//calculate H^3 and H^4
|
||||
pmull v0.1q,v20.1d, v22.1d
|
||||
pmull v5.1q,v22.1d,v22.1d
|
||||
pmull2 v2.1q,v20.2d, v22.2d
|
||||
pmull2 v7.1q,v22.2d,v22.2d
|
||||
pmull v1.1q,v16.1d,v17.1d
|
||||
pmull v6.1q,v17.1d,v17.1d
|
||||
|
||||
ext v16.16b,v0.16b,v2.16b,#8 //Karatsuba post-processing
|
||||
ext v17.16b,v5.16b,v7.16b,#8
|
||||
eor v18.16b,v0.16b,v2.16b
|
||||
eor v1.16b,v1.16b,v16.16b
|
||||
eor v4.16b,v5.16b,v7.16b
|
||||
eor v6.16b,v6.16b,v17.16b
|
||||
eor v1.16b,v1.16b,v18.16b
|
||||
pmull v18.1q,v0.1d,v19.1d //1st phase
|
||||
eor v6.16b,v6.16b,v4.16b
|
||||
pmull v4.1q,v5.1d,v19.1d
|
||||
|
||||
ins v2.d[0],v1.d[1]
|
||||
ins v7.d[0],v6.d[1]
|
||||
ins v1.d[1],v0.d[0]
|
||||
ins v6.d[1],v5.d[0]
|
||||
eor v0.16b,v1.16b,v18.16b
|
||||
eor v5.16b,v6.16b,v4.16b
|
||||
|
||||
ext v18.16b,v0.16b,v0.16b,#8 //2nd phase
|
||||
ext v4.16b,v5.16b,v5.16b,#8
|
||||
pmull v0.1q,v0.1d,v19.1d
|
||||
pmull v5.1q,v5.1d,v19.1d
|
||||
eor v18.16b,v18.16b,v2.16b
|
||||
eor v4.16b,v4.16b,v7.16b
|
||||
eor v20.16b, v0.16b,v18.16b //H^3
|
||||
eor v22.16b,v5.16b,v4.16b //H^4
|
||||
|
||||
ext v16.16b,v20.16b, v20.16b,#8 //Karatsuba pre-processing
|
||||
ext v17.16b,v22.16b,v22.16b,#8
|
||||
eor v16.16b,v16.16b,v20.16b
|
||||
eor v17.16b,v17.16b,v22.16b
|
||||
ext v21.16b,v16.16b,v17.16b,#8 //pack Karatsuba pre-processed
|
||||
st1 {v20.2d,v21.2d,v22.2d},[x0] //store Htable[3..5]
|
||||
ret
|
||||
|
||||
.globl _gcm_gmult_v8
|
||||
|
@ -74,6 +116,7 @@ _gcm_init_v8:
|
|||
|
||||
.align 4
|
||||
_gcm_gmult_v8:
|
||||
AARCH64_VALID_CALL_TARGET
|
||||
ld1 {v17.2d},[x0] //load Xi
|
||||
movi v19.16b,#0xe1
|
||||
ld1 {v20.2d,v21.2d},[x1] //load twisted H, ...
|
||||
|
@ -116,6 +159,9 @@ _gcm_gmult_v8:
|
|||
|
||||
.align 4
|
||||
_gcm_ghash_v8:
|
||||
AARCH64_VALID_CALL_TARGET
|
||||
cmp x3,#64
|
||||
b.hs Lgcm_ghash_v8_4x
|
||||
ld1 {v0.2d},[x0] //load [rotated] Xi
|
||||
//"[rotated]" means that
|
||||
//loaded value would have
|
||||
|
@ -242,9 +288,290 @@ Ldone_v8:
|
|||
|
||||
ret
|
||||
|
||||
|
||||
.align 4
|
||||
gcm_ghash_v8_4x:
|
||||
Lgcm_ghash_v8_4x:
|
||||
ld1 {v0.2d},[x0] //load [rotated] Xi
|
||||
ld1 {v20.2d,v21.2d,v22.2d},[x1],#48 //load twisted H, ..., H^2
|
||||
movi v19.16b,#0xe1
|
||||
ld1 {v26.2d,v27.2d,v28.2d},[x1] //load twisted H^3, ..., H^4
|
||||
shl v19.2d,v19.2d,#57 //compose 0xc2.0 constant
|
||||
|
||||
ld1 {v4.2d,v5.2d,v6.2d,v7.2d},[x2],#64
|
||||
#ifndef __ARMEB__
|
||||
rev64 v0.16b,v0.16b
|
||||
rev64 v5.16b,v5.16b
|
||||
rev64 v6.16b,v6.16b
|
||||
rev64 v7.16b,v7.16b
|
||||
rev64 v4.16b,v4.16b
|
||||
#endif
|
||||
ext v25.16b,v7.16b,v7.16b,#8
|
||||
ext v24.16b,v6.16b,v6.16b,#8
|
||||
ext v23.16b,v5.16b,v5.16b,#8
|
||||
|
||||
pmull v29.1q,v20.1d,v25.1d //H·Ii+3
|
||||
eor v7.16b,v7.16b,v25.16b
|
||||
pmull2 v31.1q,v20.2d,v25.2d
|
||||
pmull v30.1q,v21.1d,v7.1d
|
||||
|
||||
pmull v16.1q,v22.1d,v24.1d //H^2·Ii+2
|
||||
eor v6.16b,v6.16b,v24.16b
|
||||
pmull2 v24.1q,v22.2d,v24.2d
|
||||
pmull2 v6.1q,v21.2d,v6.2d
|
||||
|
||||
eor v29.16b,v29.16b,v16.16b
|
||||
eor v31.16b,v31.16b,v24.16b
|
||||
eor v30.16b,v30.16b,v6.16b
|
||||
|
||||
pmull v7.1q,v26.1d,v23.1d //H^3·Ii+1
|
||||
eor v5.16b,v5.16b,v23.16b
|
||||
pmull2 v23.1q,v26.2d,v23.2d
|
||||
pmull v5.1q,v27.1d,v5.1d
|
||||
|
||||
eor v29.16b,v29.16b,v7.16b
|
||||
eor v31.16b,v31.16b,v23.16b
|
||||
eor v30.16b,v30.16b,v5.16b
|
||||
|
||||
subs x3,x3,#128
|
||||
b.lo Ltail4x
|
||||
|
||||
b Loop4x
|
||||
|
||||
.align 4
|
||||
Loop4x:
|
||||
eor v16.16b,v4.16b,v0.16b
|
||||
ld1 {v4.2d,v5.2d,v6.2d,v7.2d},[x2],#64
|
||||
ext v3.16b,v16.16b,v16.16b,#8
|
||||
#ifndef __ARMEB__
|
||||
rev64 v5.16b,v5.16b
|
||||
rev64 v6.16b,v6.16b
|
||||
rev64 v7.16b,v7.16b
|
||||
rev64 v4.16b,v4.16b
|
||||
#endif
|
||||
|
||||
pmull v0.1q,v28.1d,v3.1d //H^4·(Xi+Ii)
|
||||
eor v16.16b,v16.16b,v3.16b
|
||||
pmull2 v2.1q,v28.2d,v3.2d
|
||||
ext v25.16b,v7.16b,v7.16b,#8
|
||||
pmull2 v1.1q,v27.2d,v16.2d
|
||||
|
||||
eor v0.16b,v0.16b,v29.16b
|
||||
eor v2.16b,v2.16b,v31.16b
|
||||
ext v24.16b,v6.16b,v6.16b,#8
|
||||
eor v1.16b,v1.16b,v30.16b
|
||||
ext v23.16b,v5.16b,v5.16b,#8
|
||||
|
||||
ext v17.16b,v0.16b,v2.16b,#8 //Karatsuba post-processing
|
||||
eor v18.16b,v0.16b,v2.16b
|
||||
pmull v29.1q,v20.1d,v25.1d //H·Ii+3
|
||||
eor v7.16b,v7.16b,v25.16b
|
||||
eor v1.16b,v1.16b,v17.16b
|
||||
pmull2 v31.1q,v20.2d,v25.2d
|
||||
eor v1.16b,v1.16b,v18.16b
|
||||
pmull v30.1q,v21.1d,v7.1d
|
||||
|
||||
pmull v18.1q,v0.1d,v19.1d //1st phase of reduction
|
||||
ins v2.d[0],v1.d[1]
|
||||
ins v1.d[1],v0.d[0]
|
||||
pmull v16.1q,v22.1d,v24.1d //H^2·Ii+2
|
||||
eor v6.16b,v6.16b,v24.16b
|
||||
pmull2 v24.1q,v22.2d,v24.2d
|
||||
eor v0.16b,v1.16b,v18.16b
|
||||
pmull2 v6.1q,v21.2d,v6.2d
|
||||
|
||||
eor v29.16b,v29.16b,v16.16b
|
||||
eor v31.16b,v31.16b,v24.16b
|
||||
eor v30.16b,v30.16b,v6.16b
|
||||
|
||||
ext v18.16b,v0.16b,v0.16b,#8 //2nd phase of reduction
|
||||
pmull v0.1q,v0.1d,v19.1d
|
||||
pmull v7.1q,v26.1d,v23.1d //H^3·Ii+1
|
||||
eor v5.16b,v5.16b,v23.16b
|
||||
eor v18.16b,v18.16b,v2.16b
|
||||
pmull2 v23.1q,v26.2d,v23.2d
|
||||
pmull v5.1q,v27.1d,v5.1d
|
||||
|
||||
eor v0.16b,v0.16b,v18.16b
|
||||
eor v29.16b,v29.16b,v7.16b
|
||||
eor v31.16b,v31.16b,v23.16b
|
||||
ext v0.16b,v0.16b,v0.16b,#8
|
||||
eor v30.16b,v30.16b,v5.16b
|
||||
|
||||
subs x3,x3,#64
|
||||
b.hs Loop4x
|
||||
|
||||
Ltail4x:
|
||||
eor v16.16b,v4.16b,v0.16b
|
||||
ext v3.16b,v16.16b,v16.16b,#8
|
||||
|
||||
pmull v0.1q,v28.1d,v3.1d //H^4·(Xi+Ii)
|
||||
eor v16.16b,v16.16b,v3.16b
|
||||
pmull2 v2.1q,v28.2d,v3.2d
|
||||
pmull2 v1.1q,v27.2d,v16.2d
|
||||
|
||||
eor v0.16b,v0.16b,v29.16b
|
||||
eor v2.16b,v2.16b,v31.16b
|
||||
eor v1.16b,v1.16b,v30.16b
|
||||
|
||||
adds x3,x3,#64
|
||||
b.eq Ldone4x
|
||||
|
||||
cmp x3,#32
|
||||
b.lo Lone
|
||||
b.eq Ltwo
|
||||
Lthree:
|
||||
ext v17.16b,v0.16b,v2.16b,#8 //Karatsuba post-processing
|
||||
eor v18.16b,v0.16b,v2.16b
|
||||
eor v1.16b,v1.16b,v17.16b
|
||||
ld1 {v4.2d,v5.2d,v6.2d},[x2]
|
||||
eor v1.16b,v1.16b,v18.16b
|
||||
#ifndef __ARMEB__
|
||||
rev64 v5.16b,v5.16b
|
||||
rev64 v6.16b,v6.16b
|
||||
rev64 v4.16b,v4.16b
|
||||
#endif
|
||||
|
||||
pmull v18.1q,v0.1d,v19.1d //1st phase of reduction
|
||||
ins v2.d[0],v1.d[1]
|
||||
ins v1.d[1],v0.d[0]
|
||||
ext v24.16b,v6.16b,v6.16b,#8
|
||||
ext v23.16b,v5.16b,v5.16b,#8
|
||||
eor v0.16b,v1.16b,v18.16b
|
||||
|
||||
pmull v29.1q,v20.1d,v24.1d //H·Ii+2
|
||||
eor v6.16b,v6.16b,v24.16b
|
||||
|
||||
ext v18.16b,v0.16b,v0.16b,#8 //2nd phase of reduction
|
||||
pmull v0.1q,v0.1d,v19.1d
|
||||
eor v18.16b,v18.16b,v2.16b
|
||||
pmull2 v31.1q,v20.2d,v24.2d
|
||||
pmull v30.1q,v21.1d,v6.1d
|
||||
eor v0.16b,v0.16b,v18.16b
|
||||
pmull v7.1q,v22.1d,v23.1d //H^2·Ii+1
|
||||
eor v5.16b,v5.16b,v23.16b
|
||||
ext v0.16b,v0.16b,v0.16b,#8
|
||||
|
||||
pmull2 v23.1q,v22.2d,v23.2d
|
||||
eor v16.16b,v4.16b,v0.16b
|
||||
pmull2 v5.1q,v21.2d,v5.2d
|
||||
ext v3.16b,v16.16b,v16.16b,#8
|
||||
|
||||
eor v29.16b,v29.16b,v7.16b
|
||||
eor v31.16b,v31.16b,v23.16b
|
||||
eor v30.16b,v30.16b,v5.16b
|
||||
|
||||
pmull v0.1q,v26.1d,v3.1d //H^3·(Xi+Ii)
|
||||
eor v16.16b,v16.16b,v3.16b
|
||||
pmull2 v2.1q,v26.2d,v3.2d
|
||||
pmull v1.1q,v27.1d,v16.1d
|
||||
|
||||
eor v0.16b,v0.16b,v29.16b
|
||||
eor v2.16b,v2.16b,v31.16b
|
||||
eor v1.16b,v1.16b,v30.16b
|
||||
b Ldone4x
|
||||
|
||||
.align 4
|
||||
Ltwo:
|
||||
ext v17.16b,v0.16b,v2.16b,#8 //Karatsuba post-processing
|
||||
eor v18.16b,v0.16b,v2.16b
|
||||
eor v1.16b,v1.16b,v17.16b
|
||||
ld1 {v4.2d,v5.2d},[x2]
|
||||
eor v1.16b,v1.16b,v18.16b
|
||||
#ifndef __ARMEB__
|
||||
rev64 v5.16b,v5.16b
|
||||
rev64 v4.16b,v4.16b
|
||||
#endif
|
||||
|
||||
pmull v18.1q,v0.1d,v19.1d //1st phase of reduction
|
||||
ins v2.d[0],v1.d[1]
|
||||
ins v1.d[1],v0.d[0]
|
||||
ext v23.16b,v5.16b,v5.16b,#8
|
||||
eor v0.16b,v1.16b,v18.16b
|
||||
|
||||
ext v18.16b,v0.16b,v0.16b,#8 //2nd phase of reduction
|
||||
pmull v0.1q,v0.1d,v19.1d
|
||||
eor v18.16b,v18.16b,v2.16b
|
||||
eor v0.16b,v0.16b,v18.16b
|
||||
ext v0.16b,v0.16b,v0.16b,#8
|
||||
|
||||
pmull v29.1q,v20.1d,v23.1d //H·Ii+1
|
||||
eor v5.16b,v5.16b,v23.16b
|
||||
|
||||
eor v16.16b,v4.16b,v0.16b
|
||||
ext v3.16b,v16.16b,v16.16b,#8
|
||||
|
||||
pmull2 v31.1q,v20.2d,v23.2d
|
||||
pmull v30.1q,v21.1d,v5.1d
|
||||
|
||||
pmull v0.1q,v22.1d,v3.1d //H^2·(Xi+Ii)
|
||||
eor v16.16b,v16.16b,v3.16b
|
||||
pmull2 v2.1q,v22.2d,v3.2d
|
||||
pmull2 v1.1q,v21.2d,v16.2d
|
||||
|
||||
eor v0.16b,v0.16b,v29.16b
|
||||
eor v2.16b,v2.16b,v31.16b
|
||||
eor v1.16b,v1.16b,v30.16b
|
||||
b Ldone4x
|
||||
|
||||
.align 4
|
||||
Lone:
|
||||
ext v17.16b,v0.16b,v2.16b,#8 //Karatsuba post-processing
|
||||
eor v18.16b,v0.16b,v2.16b
|
||||
eor v1.16b,v1.16b,v17.16b
|
||||
ld1 {v4.2d},[x2]
|
||||
eor v1.16b,v1.16b,v18.16b
|
||||
#ifndef __ARMEB__
|
||||
rev64 v4.16b,v4.16b
|
||||
#endif
|
||||
|
||||
pmull v18.1q,v0.1d,v19.1d //1st phase of reduction
|
||||
ins v2.d[0],v1.d[1]
|
||||
ins v1.d[1],v0.d[0]
|
||||
eor v0.16b,v1.16b,v18.16b
|
||||
|
||||
ext v18.16b,v0.16b,v0.16b,#8 //2nd phase of reduction
|
||||
pmull v0.1q,v0.1d,v19.1d
|
||||
eor v18.16b,v18.16b,v2.16b
|
||||
eor v0.16b,v0.16b,v18.16b
|
||||
ext v0.16b,v0.16b,v0.16b,#8
|
||||
|
||||
eor v16.16b,v4.16b,v0.16b
|
||||
ext v3.16b,v16.16b,v16.16b,#8
|
||||
|
||||
pmull v0.1q,v20.1d,v3.1d
|
||||
eor v16.16b,v16.16b,v3.16b
|
||||
pmull2 v2.1q,v20.2d,v3.2d
|
||||
pmull v1.1q,v21.1d,v16.1d
|
||||
|
||||
Ldone4x:
|
||||
ext v17.16b,v0.16b,v2.16b,#8 //Karatsuba post-processing
|
||||
eor v18.16b,v0.16b,v2.16b
|
||||
eor v1.16b,v1.16b,v17.16b
|
||||
eor v1.16b,v1.16b,v18.16b
|
||||
|
||||
pmull v18.1q,v0.1d,v19.1d //1st phase of reduction
|
||||
ins v2.d[0],v1.d[1]
|
||||
ins v1.d[1],v0.d[0]
|
||||
eor v0.16b,v1.16b,v18.16b
|
||||
|
||||
ext v18.16b,v0.16b,v0.16b,#8 //2nd phase of reduction
|
||||
pmull v0.1q,v0.1d,v19.1d
|
||||
eor v18.16b,v18.16b,v2.16b
|
||||
eor v0.16b,v0.16b,v18.16b
|
||||
ext v0.16b,v0.16b,v0.16b,#8
|
||||
|
||||
#ifndef __ARMEB__
|
||||
rev64 v0.16b,v0.16b
|
||||
#endif
|
||||
st1 {v0.2d},[x0] //write out Xi
|
||||
|
||||
ret
|
||||
|
||||
.byte 71,72,65,83,72,32,102,111,114,32,65,82,77,118,56,44,32,67,82,89,80,84,79,71,65,77,83,32,98,121,32,60,97,112,112,114,111,64,111,112,101,110,115,115,108,46,111,114,103,62,0
|
||||
.align 2
|
||||
.align 2
|
||||
#endif
|
||||
#endif // !OPENSSL_NO_ASM
|
||||
#endif // defined(__aarch64__) && defined(__APPLE__)
|
||||
#if defined(__linux__) && defined(__ELF__)
|
||||
|
|
|
@ -17,6 +17,7 @@
|
|||
#endif
|
||||
#include <CBigNumBoringSSL_arm_arch.h>
|
||||
|
||||
#if __ARM_MAX_ARCH__>=7
|
||||
.text
|
||||
.arch armv8-a+crypto
|
||||
.globl gcm_init_v8
|
||||
|
@ -24,6 +25,7 @@
|
|||
.type gcm_init_v8,%function
|
||||
.align 4
|
||||
gcm_init_v8:
|
||||
AARCH64_VALID_CALL_TARGET
|
||||
ld1 {v17.2d},[x1] //load input H
|
||||
movi v19.16b,#0xe1
|
||||
shl v19.2d,v19.2d,#57 //0xc2.0
|
||||
|
@ -66,8 +68,48 @@ gcm_init_v8:
|
|||
ext v17.16b,v22.16b,v22.16b,#8 //Karatsuba pre-processing
|
||||
eor v17.16b,v17.16b,v22.16b
|
||||
ext v21.16b,v16.16b,v17.16b,#8 //pack Karatsuba pre-processed
|
||||
st1 {v21.2d,v22.2d},[x0] //store Htable[1..2]
|
||||
st1 {v21.2d,v22.2d},[x0],#32 //store Htable[1..2]
|
||||
//calculate H^3 and H^4
|
||||
pmull v0.1q,v20.1d, v22.1d
|
||||
pmull v5.1q,v22.1d,v22.1d
|
||||
pmull2 v2.1q,v20.2d, v22.2d
|
||||
pmull2 v7.1q,v22.2d,v22.2d
|
||||
pmull v1.1q,v16.1d,v17.1d
|
||||
pmull v6.1q,v17.1d,v17.1d
|
||||
|
||||
ext v16.16b,v0.16b,v2.16b,#8 //Karatsuba post-processing
|
||||
ext v17.16b,v5.16b,v7.16b,#8
|
||||
eor v18.16b,v0.16b,v2.16b
|
||||
eor v1.16b,v1.16b,v16.16b
|
||||
eor v4.16b,v5.16b,v7.16b
|
||||
eor v6.16b,v6.16b,v17.16b
|
||||
eor v1.16b,v1.16b,v18.16b
|
||||
pmull v18.1q,v0.1d,v19.1d //1st phase
|
||||
eor v6.16b,v6.16b,v4.16b
|
||||
pmull v4.1q,v5.1d,v19.1d
|
||||
|
||||
ins v2.d[0],v1.d[1]
|
||||
ins v7.d[0],v6.d[1]
|
||||
ins v1.d[1],v0.d[0]
|
||||
ins v6.d[1],v5.d[0]
|
||||
eor v0.16b,v1.16b,v18.16b
|
||||
eor v5.16b,v6.16b,v4.16b
|
||||
|
||||
ext v18.16b,v0.16b,v0.16b,#8 //2nd phase
|
||||
ext v4.16b,v5.16b,v5.16b,#8
|
||||
pmull v0.1q,v0.1d,v19.1d
|
||||
pmull v5.1q,v5.1d,v19.1d
|
||||
eor v18.16b,v18.16b,v2.16b
|
||||
eor v4.16b,v4.16b,v7.16b
|
||||
eor v20.16b, v0.16b,v18.16b //H^3
|
||||
eor v22.16b,v5.16b,v4.16b //H^4
|
||||
|
||||
ext v16.16b,v20.16b, v20.16b,#8 //Karatsuba pre-processing
|
||||
ext v17.16b,v22.16b,v22.16b,#8
|
||||
eor v16.16b,v16.16b,v20.16b
|
||||
eor v17.16b,v17.16b,v22.16b
|
||||
ext v21.16b,v16.16b,v17.16b,#8 //pack Karatsuba pre-processed
|
||||
st1 {v20.2d,v21.2d,v22.2d},[x0] //store Htable[3..5]
|
||||
ret
|
||||
.size gcm_init_v8,.-gcm_init_v8
|
||||
.globl gcm_gmult_v8
|
||||
|
@ -75,6 +117,7 @@ gcm_init_v8:
|
|||
.type gcm_gmult_v8,%function
|
||||
.align 4
|
||||
gcm_gmult_v8:
|
||||
AARCH64_VALID_CALL_TARGET
|
||||
ld1 {v17.2d},[x0] //load Xi
|
||||
movi v19.16b,#0xe1
|
||||
ld1 {v20.2d,v21.2d},[x1] //load twisted H, ...
|
||||
|
@ -117,6 +160,9 @@ gcm_gmult_v8:
|
|||
.type gcm_ghash_v8,%function
|
||||
.align 4
|
||||
gcm_ghash_v8:
|
||||
AARCH64_VALID_CALL_TARGET
|
||||
cmp x3,#64
|
||||
b.hs .Lgcm_ghash_v8_4x
|
||||
ld1 {v0.2d},[x0] //load [rotated] Xi
|
||||
//"[rotated]" means that
|
||||
//loaded value would have
|
||||
|
@ -243,10 +289,291 @@ gcm_ghash_v8:
|
|||
|
||||
ret
|
||||
.size gcm_ghash_v8,.-gcm_ghash_v8
|
||||
.type gcm_ghash_v8_4x,%function
|
||||
.align 4
|
||||
gcm_ghash_v8_4x:
|
||||
.Lgcm_ghash_v8_4x:
|
||||
ld1 {v0.2d},[x0] //load [rotated] Xi
|
||||
ld1 {v20.2d,v21.2d,v22.2d},[x1],#48 //load twisted H, ..., H^2
|
||||
movi v19.16b,#0xe1
|
||||
ld1 {v26.2d,v27.2d,v28.2d},[x1] //load twisted H^3, ..., H^4
|
||||
shl v19.2d,v19.2d,#57 //compose 0xc2.0 constant
|
||||
|
||||
ld1 {v4.2d,v5.2d,v6.2d,v7.2d},[x2],#64
|
||||
#ifndef __ARMEB__
|
||||
rev64 v0.16b,v0.16b
|
||||
rev64 v5.16b,v5.16b
|
||||
rev64 v6.16b,v6.16b
|
||||
rev64 v7.16b,v7.16b
|
||||
rev64 v4.16b,v4.16b
|
||||
#endif
|
||||
ext v25.16b,v7.16b,v7.16b,#8
|
||||
ext v24.16b,v6.16b,v6.16b,#8
|
||||
ext v23.16b,v5.16b,v5.16b,#8
|
||||
|
||||
pmull v29.1q,v20.1d,v25.1d //H·Ii+3
|
||||
eor v7.16b,v7.16b,v25.16b
|
||||
pmull2 v31.1q,v20.2d,v25.2d
|
||||
pmull v30.1q,v21.1d,v7.1d
|
||||
|
||||
pmull v16.1q,v22.1d,v24.1d //H^2·Ii+2
|
||||
eor v6.16b,v6.16b,v24.16b
|
||||
pmull2 v24.1q,v22.2d,v24.2d
|
||||
pmull2 v6.1q,v21.2d,v6.2d
|
||||
|
||||
eor v29.16b,v29.16b,v16.16b
|
||||
eor v31.16b,v31.16b,v24.16b
|
||||
eor v30.16b,v30.16b,v6.16b
|
||||
|
||||
pmull v7.1q,v26.1d,v23.1d //H^3·Ii+1
|
||||
eor v5.16b,v5.16b,v23.16b
|
||||
pmull2 v23.1q,v26.2d,v23.2d
|
||||
pmull v5.1q,v27.1d,v5.1d
|
||||
|
||||
eor v29.16b,v29.16b,v7.16b
|
||||
eor v31.16b,v31.16b,v23.16b
|
||||
eor v30.16b,v30.16b,v5.16b
|
||||
|
||||
subs x3,x3,#128
|
||||
b.lo .Ltail4x
|
||||
|
||||
b .Loop4x
|
||||
|
||||
.align 4
|
||||
.Loop4x:
|
||||
eor v16.16b,v4.16b,v0.16b
|
||||
ld1 {v4.2d,v5.2d,v6.2d,v7.2d},[x2],#64
|
||||
ext v3.16b,v16.16b,v16.16b,#8
|
||||
#ifndef __ARMEB__
|
||||
rev64 v5.16b,v5.16b
|
||||
rev64 v6.16b,v6.16b
|
||||
rev64 v7.16b,v7.16b
|
||||
rev64 v4.16b,v4.16b
|
||||
#endif
|
||||
|
||||
pmull v0.1q,v28.1d,v3.1d //H^4·(Xi+Ii)
|
||||
eor v16.16b,v16.16b,v3.16b
|
||||
pmull2 v2.1q,v28.2d,v3.2d
|
||||
ext v25.16b,v7.16b,v7.16b,#8
|
||||
pmull2 v1.1q,v27.2d,v16.2d
|
||||
|
||||
eor v0.16b,v0.16b,v29.16b
|
||||
eor v2.16b,v2.16b,v31.16b
|
||||
ext v24.16b,v6.16b,v6.16b,#8
|
||||
eor v1.16b,v1.16b,v30.16b
|
||||
ext v23.16b,v5.16b,v5.16b,#8
|
||||
|
||||
ext v17.16b,v0.16b,v2.16b,#8 //Karatsuba post-processing
|
||||
eor v18.16b,v0.16b,v2.16b
|
||||
pmull v29.1q,v20.1d,v25.1d //H·Ii+3
|
||||
eor v7.16b,v7.16b,v25.16b
|
||||
eor v1.16b,v1.16b,v17.16b
|
||||
pmull2 v31.1q,v20.2d,v25.2d
|
||||
eor v1.16b,v1.16b,v18.16b
|
||||
pmull v30.1q,v21.1d,v7.1d
|
||||
|
||||
pmull v18.1q,v0.1d,v19.1d //1st phase of reduction
|
||||
ins v2.d[0],v1.d[1]
|
||||
ins v1.d[1],v0.d[0]
|
||||
pmull v16.1q,v22.1d,v24.1d //H^2·Ii+2
|
||||
eor v6.16b,v6.16b,v24.16b
|
||||
pmull2 v24.1q,v22.2d,v24.2d
|
||||
eor v0.16b,v1.16b,v18.16b
|
||||
pmull2 v6.1q,v21.2d,v6.2d
|
||||
|
||||
eor v29.16b,v29.16b,v16.16b
|
||||
eor v31.16b,v31.16b,v24.16b
|
||||
eor v30.16b,v30.16b,v6.16b
|
||||
|
||||
ext v18.16b,v0.16b,v0.16b,#8 //2nd phase of reduction
|
||||
pmull v0.1q,v0.1d,v19.1d
|
||||
pmull v7.1q,v26.1d,v23.1d //H^3·Ii+1
|
||||
eor v5.16b,v5.16b,v23.16b
|
||||
eor v18.16b,v18.16b,v2.16b
|
||||
pmull2 v23.1q,v26.2d,v23.2d
|
||||
pmull v5.1q,v27.1d,v5.1d
|
||||
|
||||
eor v0.16b,v0.16b,v18.16b
|
||||
eor v29.16b,v29.16b,v7.16b
|
||||
eor v31.16b,v31.16b,v23.16b
|
||||
ext v0.16b,v0.16b,v0.16b,#8
|
||||
eor v30.16b,v30.16b,v5.16b
|
||||
|
||||
subs x3,x3,#64
|
||||
b.hs .Loop4x
|
||||
|
||||
.Ltail4x:
|
||||
eor v16.16b,v4.16b,v0.16b
|
||||
ext v3.16b,v16.16b,v16.16b,#8
|
||||
|
||||
pmull v0.1q,v28.1d,v3.1d //H^4·(Xi+Ii)
|
||||
eor v16.16b,v16.16b,v3.16b
|
||||
pmull2 v2.1q,v28.2d,v3.2d
|
||||
pmull2 v1.1q,v27.2d,v16.2d
|
||||
|
||||
eor v0.16b,v0.16b,v29.16b
|
||||
eor v2.16b,v2.16b,v31.16b
|
||||
eor v1.16b,v1.16b,v30.16b
|
||||
|
||||
adds x3,x3,#64
|
||||
b.eq .Ldone4x
|
||||
|
||||
cmp x3,#32
|
||||
b.lo .Lone
|
||||
b.eq .Ltwo
|
||||
.Lthree:
|
||||
ext v17.16b,v0.16b,v2.16b,#8 //Karatsuba post-processing
|
||||
eor v18.16b,v0.16b,v2.16b
|
||||
eor v1.16b,v1.16b,v17.16b
|
||||
ld1 {v4.2d,v5.2d,v6.2d},[x2]
|
||||
eor v1.16b,v1.16b,v18.16b
|
||||
#ifndef __ARMEB__
|
||||
rev64 v5.16b,v5.16b
|
||||
rev64 v6.16b,v6.16b
|
||||
rev64 v4.16b,v4.16b
|
||||
#endif
|
||||
|
||||
pmull v18.1q,v0.1d,v19.1d //1st phase of reduction
|
||||
ins v2.d[0],v1.d[1]
|
||||
ins v1.d[1],v0.d[0]
|
||||
ext v24.16b,v6.16b,v6.16b,#8
|
||||
ext v23.16b,v5.16b,v5.16b,#8
|
||||
eor v0.16b,v1.16b,v18.16b
|
||||
|
||||
pmull v29.1q,v20.1d,v24.1d //H·Ii+2
|
||||
eor v6.16b,v6.16b,v24.16b
|
||||
|
||||
ext v18.16b,v0.16b,v0.16b,#8 //2nd phase of reduction
|
||||
pmull v0.1q,v0.1d,v19.1d
|
||||
eor v18.16b,v18.16b,v2.16b
|
||||
pmull2 v31.1q,v20.2d,v24.2d
|
||||
pmull v30.1q,v21.1d,v6.1d
|
||||
eor v0.16b,v0.16b,v18.16b
|
||||
pmull v7.1q,v22.1d,v23.1d //H^2·Ii+1
|
||||
eor v5.16b,v5.16b,v23.16b
|
||||
ext v0.16b,v0.16b,v0.16b,#8
|
||||
|
||||
pmull2 v23.1q,v22.2d,v23.2d
|
||||
eor v16.16b,v4.16b,v0.16b
|
||||
pmull2 v5.1q,v21.2d,v5.2d
|
||||
ext v3.16b,v16.16b,v16.16b,#8
|
||||
|
||||
eor v29.16b,v29.16b,v7.16b
|
||||
eor v31.16b,v31.16b,v23.16b
|
||||
eor v30.16b,v30.16b,v5.16b
|
||||
|
||||
pmull v0.1q,v26.1d,v3.1d //H^3·(Xi+Ii)
|
||||
eor v16.16b,v16.16b,v3.16b
|
||||
pmull2 v2.1q,v26.2d,v3.2d
|
||||
pmull v1.1q,v27.1d,v16.1d
|
||||
|
||||
eor v0.16b,v0.16b,v29.16b
|
||||
eor v2.16b,v2.16b,v31.16b
|
||||
eor v1.16b,v1.16b,v30.16b
|
||||
b .Ldone4x
|
||||
|
||||
.align 4
|
||||
.Ltwo:
|
||||
ext v17.16b,v0.16b,v2.16b,#8 //Karatsuba post-processing
|
||||
eor v18.16b,v0.16b,v2.16b
|
||||
eor v1.16b,v1.16b,v17.16b
|
||||
ld1 {v4.2d,v5.2d},[x2]
|
||||
eor v1.16b,v1.16b,v18.16b
|
||||
#ifndef __ARMEB__
|
||||
rev64 v5.16b,v5.16b
|
||||
rev64 v4.16b,v4.16b
|
||||
#endif
|
||||
|
||||
pmull v18.1q,v0.1d,v19.1d //1st phase of reduction
|
||||
ins v2.d[0],v1.d[1]
|
||||
ins v1.d[1],v0.d[0]
|
||||
ext v23.16b,v5.16b,v5.16b,#8
|
||||
eor v0.16b,v1.16b,v18.16b
|
||||
|
||||
ext v18.16b,v0.16b,v0.16b,#8 //2nd phase of reduction
|
||||
pmull v0.1q,v0.1d,v19.1d
|
||||
eor v18.16b,v18.16b,v2.16b
|
||||
eor v0.16b,v0.16b,v18.16b
|
||||
ext v0.16b,v0.16b,v0.16b,#8
|
||||
|
||||
pmull v29.1q,v20.1d,v23.1d //H·Ii+1
|
||||
eor v5.16b,v5.16b,v23.16b
|
||||
|
||||
eor v16.16b,v4.16b,v0.16b
|
||||
ext v3.16b,v16.16b,v16.16b,#8
|
||||
|
||||
pmull2 v31.1q,v20.2d,v23.2d
|
||||
pmull v30.1q,v21.1d,v5.1d
|
||||
|
||||
pmull v0.1q,v22.1d,v3.1d //H^2·(Xi+Ii)
|
||||
eor v16.16b,v16.16b,v3.16b
|
||||
pmull2 v2.1q,v22.2d,v3.2d
|
||||
pmull2 v1.1q,v21.2d,v16.2d
|
||||
|
||||
eor v0.16b,v0.16b,v29.16b
|
||||
eor v2.16b,v2.16b,v31.16b
|
||||
eor v1.16b,v1.16b,v30.16b
|
||||
b .Ldone4x
|
||||
|
||||
.align 4
|
||||
.Lone:
|
||||
ext v17.16b,v0.16b,v2.16b,#8 //Karatsuba post-processing
|
||||
eor v18.16b,v0.16b,v2.16b
|
||||
eor v1.16b,v1.16b,v17.16b
|
||||
ld1 {v4.2d},[x2]
|
||||
eor v1.16b,v1.16b,v18.16b
|
||||
#ifndef __ARMEB__
|
||||
rev64 v4.16b,v4.16b
|
||||
#endif
|
||||
|
||||
pmull v18.1q,v0.1d,v19.1d //1st phase of reduction
|
||||
ins v2.d[0],v1.d[1]
|
||||
ins v1.d[1],v0.d[0]
|
||||
eor v0.16b,v1.16b,v18.16b
|
||||
|
||||
ext v18.16b,v0.16b,v0.16b,#8 //2nd phase of reduction
|
||||
pmull v0.1q,v0.1d,v19.1d
|
||||
eor v18.16b,v18.16b,v2.16b
|
||||
eor v0.16b,v0.16b,v18.16b
|
||||
ext v0.16b,v0.16b,v0.16b,#8
|
||||
|
||||
eor v16.16b,v4.16b,v0.16b
|
||||
ext v3.16b,v16.16b,v16.16b,#8
|
||||
|
||||
pmull v0.1q,v20.1d,v3.1d
|
||||
eor v16.16b,v16.16b,v3.16b
|
||||
pmull2 v2.1q,v20.2d,v3.2d
|
||||
pmull v1.1q,v21.1d,v16.1d
|
||||
|
||||
.Ldone4x:
|
||||
ext v17.16b,v0.16b,v2.16b,#8 //Karatsuba post-processing
|
||||
eor v18.16b,v0.16b,v2.16b
|
||||
eor v1.16b,v1.16b,v17.16b
|
||||
eor v1.16b,v1.16b,v18.16b
|
||||
|
||||
pmull v18.1q,v0.1d,v19.1d //1st phase of reduction
|
||||
ins v2.d[0],v1.d[1]
|
||||
ins v1.d[1],v0.d[0]
|
||||
eor v0.16b,v1.16b,v18.16b
|
||||
|
||||
ext v18.16b,v0.16b,v0.16b,#8 //2nd phase of reduction
|
||||
pmull v0.1q,v0.1d,v19.1d
|
||||
eor v18.16b,v18.16b,v2.16b
|
||||
eor v0.16b,v0.16b,v18.16b
|
||||
ext v0.16b,v0.16b,v0.16b,#8
|
||||
|
||||
#ifndef __ARMEB__
|
||||
rev64 v0.16b,v0.16b
|
||||
#endif
|
||||
st1 {v0.2d},[x0] //write out Xi
|
||||
|
||||
ret
|
||||
.size gcm_ghash_v8_4x,.-gcm_ghash_v8_4x
|
||||
.byte 71,72,65,83,72,32,102,111,114,32,65,82,77,118,56,44,32,67,82,89,80,84,79,71,65,77,83,32,98,121,32,60,97,112,112,114,111,64,111,112,101,110,115,115,108,46,111,114,103,62,0
|
||||
.align 2
|
||||
.align 2
|
||||
#endif
|
||||
#endif
|
||||
#endif // !OPENSSL_NO_ASM
|
||||
.section .note.GNU-stack,"",%progbits
|
||||
#endif // defined(__aarch64__) && defined(__linux__)
|
||||
|
|
|
@ -1,7 +1,7 @@
|
|||
#define BORINGSSL_PREFIX CBigNumBoringSSL
|
||||
#if defined(__i386__) && defined(__linux__)
|
||||
# This file is generated from a similarly-named Perl script in the BoringSSL
|
||||
# source tree. Do not edit by hand.
|
||||
// This file is generated from a similarly-named Perl script in the BoringSSL
|
||||
// source tree. Do not edit by hand.
|
||||
|
||||
#if defined(__i386__)
|
||||
#if defined(BORINGSSL_PREFIX)
|
||||
|
|
|
@ -1,7 +1,7 @@
|
|||
#define BORINGSSL_PREFIX CBigNumBoringSSL
|
||||
#if defined(__x86_64__) && defined(__linux__)
|
||||
# This file is generated from a similarly-named Perl script in the BoringSSL
|
||||
# source tree. Do not edit by hand.
|
||||
// This file is generated from a similarly-named Perl script in the BoringSSL
|
||||
// source tree. Do not edit by hand.
|
||||
|
||||
#if defined(__has_feature)
|
||||
#if __has_feature(memory_sanitizer) && !defined(OPENSSL_NO_ASM)
|
||||
|
|
|
@ -1,7 +1,7 @@
|
|||
#define BORINGSSL_PREFIX CBigNumBoringSSL
|
||||
#if defined(__x86_64__) && defined(__APPLE__)
|
||||
# This file is generated from a similarly-named Perl script in the BoringSSL
|
||||
# source tree. Do not edit by hand.
|
||||
// This file is generated from a similarly-named Perl script in the BoringSSL
|
||||
// source tree. Do not edit by hand.
|
||||
|
||||
#if defined(__has_feature)
|
||||
#if __has_feature(memory_sanitizer) && !defined(OPENSSL_NO_ASM)
|
||||
|
|
|
@ -52,20 +52,25 @@
|
|||
#include <CBigNumBoringSSL_type_check.h>
|
||||
|
||||
#include "internal.h"
|
||||
#include "../../internal.h"
|
||||
|
||||
|
||||
void CRYPTO_cbc128_encrypt(const uint8_t *in, uint8_t *out, size_t len,
|
||||
const AES_KEY *key, uint8_t ivec[16],
|
||||
block128_f block) {
|
||||
assert(key != NULL && ivec != NULL);
|
||||
if (len == 0) {
|
||||
// Avoid |ivec| == |iv| in the |memcpy| below, which is not legal in C.
|
||||
return;
|
||||
}
|
||||
|
||||
assert(in != NULL && out != NULL);
|
||||
size_t n;
|
||||
const uint8_t *iv = ivec;
|
||||
|
||||
assert(key != NULL && ivec != NULL);
|
||||
assert(len == 0 || (in != NULL && out != NULL));
|
||||
|
||||
while (len >= 16) {
|
||||
for (n = 0; n < 16; n += sizeof(size_t)) {
|
||||
store_word_le(out + n, load_word_le(in + n) ^ load_word_le(iv + n));
|
||||
for (n = 0; n < 16; n += sizeof(crypto_word_t)) {
|
||||
CRYPTO_store_word_le(
|
||||
out + n, CRYPTO_load_word_le(in + n) ^ CRYPTO_load_word_le(iv + n));
|
||||
}
|
||||
(*block)(out, out, key);
|
||||
iv = out;
|
||||
|
@ -97,30 +102,36 @@ void CRYPTO_cbc128_encrypt(const uint8_t *in, uint8_t *out, size_t len,
|
|||
void CRYPTO_cbc128_decrypt(const uint8_t *in, uint8_t *out, size_t len,
|
||||
const AES_KEY *key, uint8_t ivec[16],
|
||||
block128_f block) {
|
||||
size_t n;
|
||||
union {
|
||||
size_t t[16 / sizeof(size_t)];
|
||||
uint8_t c[16];
|
||||
} tmp;
|
||||
|
||||
assert(key != NULL && ivec != NULL);
|
||||
assert(len == 0 || (in != NULL && out != NULL));
|
||||
if (len == 0) {
|
||||
// Avoid |ivec| == |iv| in the |memcpy| below, which is not legal in C.
|
||||
return;
|
||||
}
|
||||
|
||||
assert(in != NULL && out != NULL);
|
||||
|
||||
const uintptr_t inptr = (uintptr_t) in;
|
||||
const uintptr_t outptr = (uintptr_t) out;
|
||||
// If |in| and |out| alias, |in| must be ahead.
|
||||
assert(inptr >= outptr || inptr + len <= outptr);
|
||||
|
||||
size_t n;
|
||||
union {
|
||||
crypto_word_t t[16 / sizeof(crypto_word_t)];
|
||||
uint8_t c[16];
|
||||
} tmp;
|
||||
|
||||
if ((inptr >= 32 && outptr <= inptr - 32) || inptr < outptr) {
|
||||
// If |out| is at least two blocks behind |in| or completely disjoint, there
|
||||
// is no need to decrypt to a temporary block.
|
||||
OPENSSL_STATIC_ASSERT(16 % sizeof(size_t) == 0,
|
||||
OPENSSL_STATIC_ASSERT(16 % sizeof(crypto_word_t) == 0,
|
||||
"block cannot be evenly divided into words");
|
||||
const uint8_t *iv = ivec;
|
||||
while (len >= 16) {
|
||||
(*block)(in, out, key);
|
||||
for (n = 0; n < 16; n += sizeof(size_t)) {
|
||||
store_word_le(out + n, load_word_le(out + n) ^ load_word_le(iv + n));
|
||||
for (n = 0; n < 16; n += sizeof(crypto_word_t)) {
|
||||
CRYPTO_store_word_le(out + n, CRYPTO_load_word_le(out + n) ^
|
||||
CRYPTO_load_word_le(iv + n));
|
||||
}
|
||||
iv = in;
|
||||
len -= 16;
|
||||
|
@ -129,16 +140,16 @@ void CRYPTO_cbc128_decrypt(const uint8_t *in, uint8_t *out, size_t len,
|
|||
}
|
||||
OPENSSL_memcpy(ivec, iv, 16);
|
||||
} else {
|
||||
OPENSSL_STATIC_ASSERT(16 % sizeof(size_t) == 0,
|
||||
OPENSSL_STATIC_ASSERT(16 % sizeof(crypto_word_t) == 0,
|
||||
"block cannot be evenly divided into words");
|
||||
|
||||
while (len >= 16) {
|
||||
(*block)(in, tmp.c, key);
|
||||
for (n = 0; n < 16; n += sizeof(size_t)) {
|
||||
size_t c = load_word_le(in + n);
|
||||
store_word_le(out + n,
|
||||
tmp.t[n / sizeof(size_t)] ^ load_word_le(ivec + n));
|
||||
store_word_le(ivec + n, c);
|
||||
for (n = 0; n < 16; n += sizeof(crypto_word_t)) {
|
||||
crypto_word_t c = CRYPTO_load_word_le(in + n);
|
||||
CRYPTO_store_word_le(out + n, tmp.t[n / sizeof(crypto_word_t)] ^
|
||||
CRYPTO_load_word_le(ivec + n));
|
||||
CRYPTO_store_word_le(ivec + n, c);
|
||||
}
|
||||
len -= 16;
|
||||
in += 16;
|
||||
|
|
|
@ -72,10 +72,11 @@ void CRYPTO_cfb128_encrypt(const uint8_t *in, uint8_t *out, size_t len,
|
|||
}
|
||||
while (len >= 16) {
|
||||
(*block)(ivec, ivec, key);
|
||||
for (; n < 16; n += sizeof(size_t)) {
|
||||
size_t tmp = load_word_le(ivec + n) ^ load_word_le(in + n);
|
||||
store_word_le(ivec + n, tmp);
|
||||
store_word_le(out + n, tmp);
|
||||
for (; n < 16; n += sizeof(crypto_word_t)) {
|
||||
crypto_word_t tmp =
|
||||
CRYPTO_load_word_le(ivec + n) ^ CRYPTO_load_word_le(in + n);
|
||||
CRYPTO_store_word_le(ivec + n, tmp);
|
||||
CRYPTO_store_word_le(out + n, tmp);
|
||||
}
|
||||
len -= 16;
|
||||
out += 16;
|
||||
|
@ -101,10 +102,10 @@ void CRYPTO_cfb128_encrypt(const uint8_t *in, uint8_t *out, size_t len,
|
|||
}
|
||||
while (len >= 16) {
|
||||
(*block)(ivec, ivec, key);
|
||||
for (; n < 16; n += sizeof(size_t)) {
|
||||
size_t t = load_word_le(in + n);
|
||||
store_word_le(out + n, load_word_le(ivec + n) ^ t);
|
||||
store_word_le(ivec + n, t);
|
||||
for (; n < 16; n += sizeof(crypto_word_t)) {
|
||||
crypto_word_t t = CRYPTO_load_word_le(in + n);
|
||||
CRYPTO_store_word_le(out + n, CRYPTO_load_word_le(ivec + n) ^ t);
|
||||
CRYPTO_store_word_le(ivec + n, t);
|
||||
}
|
||||
len -= 16;
|
||||
out += 16;
|
||||
|
|
|
@ -52,6 +52,7 @@
|
|||
#include <string.h>
|
||||
|
||||
#include "internal.h"
|
||||
#include "../../internal.h"
|
||||
|
||||
|
||||
// NOTE: the IV/counter CTR mode is big-endian. The code itself
|
||||
|
@ -69,8 +70,8 @@ static void ctr128_inc(uint8_t *counter) {
|
|||
} while (n);
|
||||
}
|
||||
|
||||
OPENSSL_STATIC_ASSERT(16 % sizeof(size_t) == 0,
|
||||
"block cannot be divided into size_t");
|
||||
OPENSSL_STATIC_ASSERT(16 % sizeof(crypto_word_t) == 0,
|
||||
"block cannot be divided into crypto_word_t");
|
||||
|
||||
// The input encrypted as though 128bit counter mode is being used. The extra
|
||||
// state information to record how much of the 128bit block we have used is
|
||||
|
@ -102,9 +103,9 @@ void CRYPTO_ctr128_encrypt(const uint8_t *in, uint8_t *out, size_t len,
|
|||
while (len >= 16) {
|
||||
(*block)(ivec, ecount_buf, key);
|
||||
ctr128_inc(ivec);
|
||||
for (n = 0; n < 16; n += sizeof(size_t)) {
|
||||
store_word_le(out + n,
|
||||
load_word_le(in + n) ^ load_word_le(ecount_buf + n));
|
||||
for (n = 0; n < 16; n += sizeof(crypto_word_t)) {
|
||||
CRYPTO_store_word_le(out + n, CRYPTO_load_word_le(in + n) ^
|
||||
CRYPTO_load_word_le(ecount_buf + n));
|
||||
}
|
||||
len -= 16;
|
||||
out += 16;
|
||||
|
@ -152,7 +153,7 @@ void CRYPTO_ctr128_encrypt_ctr32(const uint8_t *in, uint8_t *out, size_t len,
|
|||
n = (n + 1) % 16;
|
||||
}
|
||||
|
||||
ctr32 = GETU32(ivec + 12);
|
||||
ctr32 = CRYPTO_load_u32_be(ivec + 12);
|
||||
while (len >= 16) {
|
||||
size_t blocks = len / 16;
|
||||
// 1<<28 is just a not-so-small yet not-so-large number...
|
||||
|
@ -172,7 +173,7 @@ void CRYPTO_ctr128_encrypt_ctr32(const uint8_t *in, uint8_t *out, size_t len,
|
|||
}
|
||||
(*func)(in, out, blocks, key, ivec);
|
||||
// (*func) does not update ivec, caller does:
|
||||
PUTU32(ivec + 12, ctr32);
|
||||
CRYPTO_store_u32_be(ivec + 12, ctr32);
|
||||
// ... overflow was detected, propogate carry.
|
||||
if (ctr32 == 0) {
|
||||
ctr96_inc(ivec);
|
||||
|
@ -186,7 +187,7 @@ void CRYPTO_ctr128_encrypt_ctr32(const uint8_t *in, uint8_t *out, size_t len,
|
|||
OPENSSL_memset(ecount_buf, 0, 16);
|
||||
(*func)(ecount_buf, ecount_buf, 1, key, ivec);
|
||||
++ctr32;
|
||||
PUTU32(ivec + 12, ctr32);
|
||||
CRYPTO_store_u32_be(ivec + 12, ctr32);
|
||||
if (ctr32 == 0) {
|
||||
ctr96_inc(ivec);
|
||||
}
|
||||
|
|
|
@ -73,7 +73,7 @@ static const size_t kSizeTWithoutLower4Bits = (size_t) -16;
|
|||
|
||||
#if defined(GHASH_ASM_X86_64) || defined(GHASH_ASM_X86)
|
||||
static inline void gcm_reduce_1bit(u128 *V) {
|
||||
if (sizeof(size_t) == 8) {
|
||||
if (sizeof(crypto_word_t) == 8) {
|
||||
uint64_t T = UINT64_C(0xe100000000000000) & (0 - (V->hi & 1));
|
||||
V->hi = (V->lo << 63) | (V->hi >> 1);
|
||||
V->lo = (V->lo >> 1) ^ T;
|
||||
|
@ -377,9 +377,10 @@ int CRYPTO_gcm128_encrypt(GCM128_CONTEXT *ctx, const AES_KEY *key,
|
|||
(*block)(ctx->Yi.c, ctx->EKi.c, key);
|
||||
++ctr;
|
||||
ctx->Yi.d[3] = CRYPTO_bswap4(ctr);
|
||||
for (size_t i = 0; i < 16; i += sizeof(size_t)) {
|
||||
store_word_le(out + i,
|
||||
load_word_le(in + i) ^ ctx->EKi.t[i / sizeof(size_t)]);
|
||||
for (size_t i = 0; i < 16; i += sizeof(crypto_word_t)) {
|
||||
CRYPTO_store_word_le(out + i,
|
||||
CRYPTO_load_word_le(in + i) ^
|
||||
ctx->EKi.t[i / sizeof(crypto_word_t)]);
|
||||
}
|
||||
out += 16;
|
||||
in += 16;
|
||||
|
@ -394,9 +395,10 @@ int CRYPTO_gcm128_encrypt(GCM128_CONTEXT *ctx, const AES_KEY *key,
|
|||
(*block)(ctx->Yi.c, ctx->EKi.c, key);
|
||||
++ctr;
|
||||
ctx->Yi.d[3] = CRYPTO_bswap4(ctr);
|
||||
for (size_t i = 0; i < 16; i += sizeof(size_t)) {
|
||||
store_word_le(out + i,
|
||||
load_word_le(in + i) ^ ctx->EKi.t[i / sizeof(size_t)]);
|
||||
for (size_t i = 0; i < 16; i += sizeof(crypto_word_t)) {
|
||||
CRYPTO_store_word_le(out + i,
|
||||
CRYPTO_load_word_le(in + i) ^
|
||||
ctx->EKi.t[i / sizeof(crypto_word_t)]);
|
||||
}
|
||||
out += 16;
|
||||
in += 16;
|
||||
|
@ -468,9 +470,10 @@ int CRYPTO_gcm128_decrypt(GCM128_CONTEXT *ctx, const AES_KEY *key,
|
|||
(*block)(ctx->Yi.c, ctx->EKi.c, key);
|
||||
++ctr;
|
||||
ctx->Yi.d[3] = CRYPTO_bswap4(ctr);
|
||||
for (size_t i = 0; i < 16; i += sizeof(size_t)) {
|
||||
store_word_le(out + i,
|
||||
load_word_le(in + i) ^ ctx->EKi.t[i / sizeof(size_t)]);
|
||||
for (size_t i = 0; i < 16; i += sizeof(crypto_word_t)) {
|
||||
CRYPTO_store_word_le(out + i,
|
||||
CRYPTO_load_word_le(in + i) ^
|
||||
ctx->EKi.t[i / sizeof(crypto_word_t)]);
|
||||
}
|
||||
out += 16;
|
||||
in += 16;
|
||||
|
@ -485,9 +488,10 @@ int CRYPTO_gcm128_decrypt(GCM128_CONTEXT *ctx, const AES_KEY *key,
|
|||
(*block)(ctx->Yi.c, ctx->EKi.c, key);
|
||||
++ctr;
|
||||
ctx->Yi.d[3] = CRYPTO_bswap4(ctr);
|
||||
for (size_t i = 0; i < 16; i += sizeof(size_t)) {
|
||||
store_word_le(out + i,
|
||||
load_word_le(in + i) ^ ctx->EKi.t[i / sizeof(size_t)]);
|
||||
for (size_t i = 0; i < 16; i += sizeof(crypto_word_t)) {
|
||||
CRYPTO_store_word_le(out + i,
|
||||
CRYPTO_load_word_le(in + i) ^
|
||||
ctx->EKi.t[i / sizeof(crypto_word_t)]);
|
||||
}
|
||||
out += 16;
|
||||
in += 16;
|
||||
|
|
|
@ -193,7 +193,7 @@ static void gcm_mul64_nohw(uint64_t *out_lo, uint64_t *out_hi, uint64_t a,
|
|||
#endif // BORINGSSL_HAS_UINT128
|
||||
|
||||
void gcm_init_nohw(u128 Htable[16], const uint64_t Xi[2]) {
|
||||
// We implement GHASH in terms of POLYVAL, as described in RFC8452. This
|
||||
// We implement GHASH in terms of POLYVAL, as described in RFC 8452. This
|
||||
// avoids a shift by 1 in the multiplication, needed to account for bit
|
||||
// reversal losing a bit after multiplication, that is,
|
||||
// rev128(X) * rev128(Y) = rev255(X*Y).
|
||||
|
|
|
@ -64,27 +64,6 @@ extern "C" {
|
|||
#endif
|
||||
|
||||
|
||||
static inline uint32_t GETU32(const void *in) {
|
||||
uint32_t v;
|
||||
OPENSSL_memcpy(&v, in, sizeof(v));
|
||||
return CRYPTO_bswap4(v);
|
||||
}
|
||||
|
||||
static inline void PUTU32(void *out, uint32_t v) {
|
||||
v = CRYPTO_bswap4(v);
|
||||
OPENSSL_memcpy(out, &v, sizeof(v));
|
||||
}
|
||||
|
||||
static inline size_t load_word_le(const void *in) {
|
||||
size_t v;
|
||||
OPENSSL_memcpy(&v, in, sizeof(v));
|
||||
return v;
|
||||
}
|
||||
|
||||
static inline void store_word_le(void *out, size_t v) {
|
||||
OPENSSL_memcpy(out, &v, sizeof(v));
|
||||
}
|
||||
|
||||
// block128_f is the type of an AES block cipher implementation.
|
||||
//
|
||||
// Unlike upstream OpenSSL, it and the other functions in this file hard-code
|
||||
|
@ -171,7 +150,7 @@ typedef struct {
|
|||
uint64_t u[2];
|
||||
uint32_t d[4];
|
||||
uint8_t c[16];
|
||||
size_t t[16 / sizeof(size_t)];
|
||||
crypto_word_t t[16 / sizeof(crypto_word_t)];
|
||||
} Yi, EKi, EK0, len, Xi;
|
||||
|
||||
// Note that the order of |Xi| and |gcm_key| is fixed by the MOVBE-based,
|
||||
|
|
|
@ -60,7 +60,8 @@ OPENSSL_STATIC_ASSERT(16 % sizeof(size_t) == 0,
|
|||
void CRYPTO_ofb128_encrypt(const uint8_t *in, uint8_t *out, size_t len,
|
||||
const AES_KEY *key, uint8_t ivec[16], unsigned *num,
|
||||
block128_f block) {
|
||||
assert(in && out && key && ivec && num);
|
||||
assert(key != NULL && ivec != NULL && num != NULL);
|
||||
assert(len == 0 || (in != NULL && out != NULL));
|
||||
|
||||
unsigned n = *num;
|
||||
|
||||
|
|
|
@ -1,7 +1,7 @@
|
|||
#define BORINGSSL_PREFIX CBigNumBoringSSL
|
||||
#if defined(__x86_64__) && defined(__linux__)
|
||||
# This file is generated from a similarly-named Perl script in the BoringSSL
|
||||
# source tree. Do not edit by hand.
|
||||
// This file is generated from a similarly-named Perl script in the BoringSSL
|
||||
// source tree. Do not edit by hand.
|
||||
|
||||
#if defined(__has_feature)
|
||||
#if __has_feature(memory_sanitizer) && !defined(OPENSSL_NO_ASM)
|
||||
|
|
|
@ -1,7 +1,7 @@
|
|||
#define BORINGSSL_PREFIX CBigNumBoringSSL
|
||||
#if defined(__x86_64__) && defined(__APPLE__)
|
||||
# This file is generated from a similarly-named Perl script in the BoringSSL
|
||||
# source tree. Do not edit by hand.
|
||||
// This file is generated from a similarly-named Perl script in the BoringSSL
|
||||
// source tree. Do not edit by hand.
|
||||
|
||||
#if defined(__has_feature)
|
||||
#if __has_feature(memory_sanitizer) && !defined(OPENSSL_NO_ASM)
|
||||
|
|
|
@ -1,7 +1,7 @@
|
|||
#define BORINGSSL_PREFIX CBigNumBoringSSL
|
||||
#if defined(__x86_64__) && defined(__linux__)
|
||||
# This file is generated from a similarly-named Perl script in the BoringSSL
|
||||
# source tree. Do not edit by hand.
|
||||
// This file is generated from a similarly-named Perl script in the BoringSSL
|
||||
// source tree. Do not edit by hand.
|
||||
|
||||
#if defined(__has_feature)
|
||||
#if __has_feature(memory_sanitizer) && !defined(OPENSSL_NO_ASM)
|
||||
|
|
|
@ -1,7 +1,7 @@
|
|||
#define BORINGSSL_PREFIX CBigNumBoringSSL
|
||||
#if defined(__x86_64__) && defined(__APPLE__)
|
||||
# This file is generated from a similarly-named Perl script in the BoringSSL
|
||||
# source tree. Do not edit by hand.
|
||||
// This file is generated from a similarly-named Perl script in the BoringSSL
|
||||
// source tree. Do not edit by hand.
|
||||
|
||||
#if defined(__has_feature)
|
||||
#if __has_feature(memory_sanitizer) && !defined(OPENSSL_NO_ASM)
|
||||
|
|
|
@ -36,16 +36,45 @@ extern "C" {
|
|||
void RAND_bytes_with_additional_data(uint8_t *out, size_t out_len,
|
||||
const uint8_t user_additional_data[32]);
|
||||
|
||||
#if defined(BORINGSSL_FIPS)
|
||||
|
||||
// We overread from /dev/urandom or RDRAND by a factor of 10 and XOR to whiten.
|
||||
#define BORINGSSL_FIPS_OVERREAD 10
|
||||
|
||||
// CRYPTO_get_seed_entropy writes |out_entropy_len| bytes of entropy, suitable
|
||||
// for seeding a DRBG, to |out_entropy|. It sets |*out_used_cpu| to one if the
|
||||
// entropy came directly from the CPU and zero if it came from the OS. It
|
||||
// actively obtains entropy from the CPU/OS and so should not be called from
|
||||
// within the FIPS module.
|
||||
void CRYPTO_get_seed_entropy(uint8_t *out_entropy, size_t out_entropy_len,
|
||||
int *out_used_cpu);
|
||||
|
||||
// RAND_load_entropy supplies |entropy_len| bytes of entropy to the module. The
|
||||
// |from_cpu| parameter is true iff the entropy was obtained directly from the
|
||||
// CPU.
|
||||
void RAND_load_entropy(const uint8_t *entropy, size_t entropy_len,
|
||||
int from_cpu);
|
||||
|
||||
// RAND_need_entropy is implemented outside of the FIPS module and is called
|
||||
// when the module has stopped because it has run out of entropy.
|
||||
void RAND_need_entropy(size_t bytes_needed);
|
||||
|
||||
#endif // BORINGSSL_FIPS
|
||||
|
||||
// CRYPTO_sysrand fills |len| bytes at |buf| with entropy from the operating
|
||||
// system.
|
||||
void CRYPTO_sysrand(uint8_t *buf, size_t len);
|
||||
|
||||
#if defined(OPENSSL_URANDOM)
|
||||
// CRYPTO_sysrand_for_seed fills |len| bytes at |buf| with entropy from the
|
||||
// operating system. It may draw from the |GRND_RANDOM| pool on Android,
|
||||
// depending on the vendor's configuration.
|
||||
void CRYPTO_sysrand_for_seed(uint8_t *buf, size_t len);
|
||||
|
||||
#if defined(OPENSSL_URANDOM)
|
||||
// CRYPTO_init_sysrand initializes long-lived resources needed to draw entropy
|
||||
// from the operating system.
|
||||
void CRYPTO_init_sysrand(void);
|
||||
|
||||
// CRYPTO_sysrand_if_available fills |len| bytes at |buf| with entropy from the
|
||||
// operating system, or early /dev/urandom data, and returns 1, _if_ the entropy
|
||||
// pool is initialized or if getrandom() is not available and not in FIPS mode.
|
||||
|
@ -53,9 +82,7 @@ void CRYPTO_sysrand_for_seed(uint8_t *buf, size_t len);
|
|||
// return 0.
|
||||
int CRYPTO_sysrand_if_available(uint8_t *buf, size_t len);
|
||||
#else
|
||||
OPENSSL_INLINE void CRYPTO_sysrand_for_seed(uint8_t *buf, size_t len) {
|
||||
CRYPTO_sysrand(buf, len);
|
||||
}
|
||||
OPENSSL_INLINE void CRYPTO_init_sysrand(void) {}
|
||||
|
||||
OPENSSL_INLINE int CRYPTO_sysrand_if_available(uint8_t *buf, size_t len) {
|
||||
CRYPTO_sysrand(buf, len);
|
||||
|
|
|
@ -25,6 +25,7 @@
|
|||
#include <CBigNumBoringSSL_chacha.h>
|
||||
#include <CBigNumBoringSSL_cpu.h>
|
||||
#include <CBigNumBoringSSL_mem.h>
|
||||
#include <CBigNumBoringSSL_type_check.h>
|
||||
|
||||
#include "internal.h"
|
||||
#include "fork_detect.h"
|
||||
|
@ -63,11 +64,11 @@ struct rand_thread_state {
|
|||
// (re)seeded. This is bound by |kReseedInterval|.
|
||||
unsigned calls;
|
||||
// last_block_valid is non-zero iff |last_block| contains data from
|
||||
// |CRYPTO_sysrand_for_seed|.
|
||||
// |get_seed_entropy|.
|
||||
int last_block_valid;
|
||||
|
||||
#if defined(BORINGSSL_FIPS)
|
||||
// last_block contains the previous block from |CRYPTO_sysrand_for_seed|.
|
||||
// last_block contains the previous block from |get_seed_entropy|.
|
||||
uint8_t last_block[CRNGT_BLOCK_SIZE];
|
||||
// next and prev form a NULL-terminated, double-linked list of all states in
|
||||
// a process.
|
||||
|
@ -82,16 +83,18 @@ struct rand_thread_state {
|
|||
// called when the whole process is exiting.
|
||||
DEFINE_BSS_GET(struct rand_thread_state *, thread_states_list);
|
||||
DEFINE_STATIC_MUTEX(thread_states_list_lock);
|
||||
DEFINE_STATIC_MUTEX(state_clear_all_lock);
|
||||
|
||||
static void rand_thread_state_clear_all(void) __attribute__((destructor));
|
||||
static void rand_thread_state_clear_all(void) {
|
||||
CRYPTO_STATIC_MUTEX_lock_write(thread_states_list_lock_bss_get());
|
||||
CRYPTO_STATIC_MUTEX_lock_write(state_clear_all_lock_bss_get());
|
||||
for (struct rand_thread_state *cur = *thread_states_list_bss_get();
|
||||
cur != NULL; cur = cur->next) {
|
||||
CTR_DRBG_clear(&cur->drbg);
|
||||
}
|
||||
// |thread_states_list_lock is deliberately left locked so that any threads
|
||||
// that are still running will hang if they try to call |RAND_bytes|.
|
||||
// The locks are deliberately left locked so that any threads that are still
|
||||
// running will hang if they try to call |RAND_bytes|.
|
||||
}
|
||||
#endif
|
||||
|
||||
|
@ -146,12 +149,6 @@ static int rdrand(uint8_t *buf, const size_t len) {
|
|||
OPENSSL_memcpy(buf + len_multiple8, rand_buf, remainder);
|
||||
}
|
||||
|
||||
#if defined(BORINGSSL_FIPS_BREAK_CRNG)
|
||||
// This breaks the "continuous random number generator test" defined in FIPS
|
||||
// 140-2, section 4.9.2, and implemented in rand_get_seed().
|
||||
OPENSSL_memset(buf, 0, len);
|
||||
#endif
|
||||
|
||||
return 1;
|
||||
}
|
||||
|
||||
|
@ -165,25 +162,97 @@ static int rdrand(uint8_t *buf, size_t len) {
|
|||
|
||||
#if defined(BORINGSSL_FIPS)
|
||||
|
||||
void CRYPTO_get_seed_entropy(uint8_t *out_entropy, size_t out_entropy_len,
|
||||
int *out_used_cpu) {
|
||||
*out_used_cpu = 0;
|
||||
if (have_rdrand() && rdrand(out_entropy, out_entropy_len)) {
|
||||
*out_used_cpu = 1;
|
||||
} else {
|
||||
CRYPTO_sysrand_for_seed(out_entropy, out_entropy_len);
|
||||
}
|
||||
|
||||
#if defined(BORINGSSL_FIPS_BREAK_CRNG)
|
||||
// This breaks the "continuous random number generator test" defined in FIPS
|
||||
// 140-2, section 4.9.2, and implemented in |rand_get_seed|.
|
||||
OPENSSL_memset(out_entropy, 0, out_entropy_len);
|
||||
#endif
|
||||
}
|
||||
|
||||
// In passive entropy mode, entropy is supplied from outside of the module via
|
||||
// |RAND_load_entropy| and is stored in global instance of the following
|
||||
// structure.
|
||||
|
||||
struct entropy_buffer {
|
||||
// bytes contains entropy suitable for seeding a DRBG.
|
||||
uint8_t bytes[CTR_DRBG_ENTROPY_LEN * BORINGSSL_FIPS_OVERREAD];
|
||||
// bytes_valid indicates the number of bytes of |bytes| that contain valid
|
||||
// data.
|
||||
size_t bytes_valid;
|
||||
// from_cpu is true if any of the contents of |bytes| were obtained directly
|
||||
// from the CPU.
|
||||
int from_cpu;
|
||||
};
|
||||
|
||||
DEFINE_BSS_GET(struct entropy_buffer, entropy_buffer);
|
||||
DEFINE_STATIC_MUTEX(entropy_buffer_lock);
|
||||
|
||||
void RAND_load_entropy(const uint8_t *entropy, size_t entropy_len,
|
||||
int from_cpu) {
|
||||
struct entropy_buffer *const buffer = entropy_buffer_bss_get();
|
||||
|
||||
CRYPTO_STATIC_MUTEX_lock_write(entropy_buffer_lock_bss_get());
|
||||
const size_t space = sizeof(buffer->bytes) - buffer->bytes_valid;
|
||||
if (entropy_len > space) {
|
||||
entropy_len = space;
|
||||
}
|
||||
|
||||
OPENSSL_memcpy(&buffer->bytes[buffer->bytes_valid], entropy, entropy_len);
|
||||
buffer->bytes_valid += entropy_len;
|
||||
buffer->from_cpu |= from_cpu && (entropy_len != 0);
|
||||
CRYPTO_STATIC_MUTEX_unlock_write(entropy_buffer_lock_bss_get());
|
||||
}
|
||||
|
||||
// get_seed_entropy fills |out_entropy_len| bytes of |out_entropy| from the
|
||||
// global |entropy_buffer|.
|
||||
static void get_seed_entropy(uint8_t *out_entropy, size_t out_entropy_len,
|
||||
int *out_used_cpu) {
|
||||
struct entropy_buffer *const buffer = entropy_buffer_bss_get();
|
||||
if (out_entropy_len > sizeof(buffer->bytes)) {
|
||||
abort();
|
||||
}
|
||||
|
||||
CRYPTO_STATIC_MUTEX_lock_write(entropy_buffer_lock_bss_get());
|
||||
while (buffer->bytes_valid < out_entropy_len) {
|
||||
CRYPTO_STATIC_MUTEX_unlock_write(entropy_buffer_lock_bss_get());
|
||||
RAND_need_entropy(out_entropy_len - buffer->bytes_valid);
|
||||
CRYPTO_STATIC_MUTEX_lock_write(entropy_buffer_lock_bss_get());
|
||||
}
|
||||
|
||||
*out_used_cpu = buffer->from_cpu;
|
||||
OPENSSL_memcpy(out_entropy, buffer->bytes, out_entropy_len);
|
||||
OPENSSL_memmove(buffer->bytes, &buffer->bytes[out_entropy_len],
|
||||
buffer->bytes_valid - out_entropy_len);
|
||||
buffer->bytes_valid -= out_entropy_len;
|
||||
if (buffer->bytes_valid == 0) {
|
||||
buffer->from_cpu = 0;
|
||||
}
|
||||
|
||||
CRYPTO_STATIC_MUTEX_unlock_write(entropy_buffer_lock_bss_get());
|
||||
}
|
||||
|
||||
// rand_get_seed fills |seed| with entropy and sets |*out_used_cpu| to one if
|
||||
// that entropy came directly from the CPU and zero otherwise.
|
||||
static void rand_get_seed(struct rand_thread_state *state,
|
||||
uint8_t seed[CTR_DRBG_ENTROPY_LEN]) {
|
||||
uint8_t seed[CTR_DRBG_ENTROPY_LEN],
|
||||
int *out_used_cpu) {
|
||||
if (!state->last_block_valid) {
|
||||
if (!have_rdrand() ||
|
||||
!rdrand(state->last_block, sizeof(state->last_block))) {
|
||||
CRYPTO_sysrand_for_seed(state->last_block, sizeof(state->last_block));
|
||||
}
|
||||
int unused;
|
||||
get_seed_entropy(state->last_block, sizeof(state->last_block), &unused);
|
||||
state->last_block_valid = 1;
|
||||
}
|
||||
|
||||
// We overread from /dev/urandom or RDRAND by a factor of 10 and XOR to
|
||||
// whiten.
|
||||
#define FIPS_OVERREAD 10
|
||||
uint8_t entropy[CTR_DRBG_ENTROPY_LEN * FIPS_OVERREAD];
|
||||
|
||||
int used_rdrand = have_rdrand() && rdrand(entropy, sizeof(entropy));
|
||||
if (!used_rdrand) {
|
||||
CRYPTO_sysrand_for_seed(entropy, sizeof(entropy));
|
||||
}
|
||||
uint8_t entropy[CTR_DRBG_ENTROPY_LEN * BORINGSSL_FIPS_OVERREAD];
|
||||
get_seed_entropy(entropy, sizeof(entropy), out_used_cpu);
|
||||
|
||||
// See FIPS 140-2, section 4.9.2. This is the “continuous random number
|
||||
// generator test” which causes the program to randomly abort. Hopefully the
|
||||
|
@ -193,6 +262,7 @@ static void rand_get_seed(struct rand_thread_state *state,
|
|||
BORINGSSL_FIPS_abort();
|
||||
}
|
||||
|
||||
OPENSSL_STATIC_ASSERT(sizeof(entropy) % CRNGT_BLOCK_SIZE == 0, "");
|
||||
for (size_t i = CRNGT_BLOCK_SIZE; i < sizeof(entropy);
|
||||
i += CRNGT_BLOCK_SIZE) {
|
||||
if (CRYPTO_memcmp(entropy + i - CRNGT_BLOCK_SIZE, entropy + i,
|
||||
|
@ -207,31 +277,24 @@ static void rand_get_seed(struct rand_thread_state *state,
|
|||
|
||||
OPENSSL_memcpy(seed, entropy, CTR_DRBG_ENTROPY_LEN);
|
||||
|
||||
for (size_t i = 1; i < FIPS_OVERREAD; i++) {
|
||||
for (size_t i = 1; i < BORINGSSL_FIPS_OVERREAD; i++) {
|
||||
for (size_t j = 0; j < CTR_DRBG_ENTROPY_LEN; j++) {
|
||||
seed[j] ^= entropy[CTR_DRBG_ENTROPY_LEN * i + j];
|
||||
}
|
||||
}
|
||||
|
||||
#if defined(OPENSSL_URANDOM)
|
||||
// If we used RDRAND, also opportunistically read from the system. This avoids
|
||||
// solely relying on the hardware once the entropy pool has been initialized.
|
||||
if (used_rdrand) {
|
||||
CRYPTO_sysrand_if_available(entropy, CTR_DRBG_ENTROPY_LEN);
|
||||
for (size_t i = 0; i < CTR_DRBG_ENTROPY_LEN; i++) {
|
||||
seed[i] ^= entropy[i];
|
||||
}
|
||||
}
|
||||
#endif
|
||||
}
|
||||
|
||||
#else
|
||||
|
||||
// rand_get_seed fills |seed| with entropy and sets |*out_used_cpu| to one if
|
||||
// that entropy came directly from the CPU and zero otherwise.
|
||||
static void rand_get_seed(struct rand_thread_state *state,
|
||||
uint8_t seed[CTR_DRBG_ENTROPY_LEN]) {
|
||||
uint8_t seed[CTR_DRBG_ENTROPY_LEN],
|
||||
int *out_used_cpu) {
|
||||
// If not in FIPS mode, we don't overread from the system entropy source and
|
||||
// we don't depend only on the hardware RDRAND.
|
||||
CRYPTO_sysrand(seed, CTR_DRBG_ENTROPY_LEN);
|
||||
CRYPTO_sysrand_for_seed(seed, CTR_DRBG_ENTROPY_LEN);
|
||||
*out_used_cpu = 0;
|
||||
}
|
||||
|
||||
#endif
|
||||
|
@ -290,8 +353,23 @@ void RAND_bytes_with_additional_data(uint8_t *out, size_t out_len,
|
|||
|
||||
state->last_block_valid = 0;
|
||||
uint8_t seed[CTR_DRBG_ENTROPY_LEN];
|
||||
rand_get_seed(state, seed);
|
||||
if (!CTR_DRBG_init(&state->drbg, seed, NULL, 0)) {
|
||||
int used_cpu;
|
||||
rand_get_seed(state, seed, &used_cpu);
|
||||
|
||||
uint8_t personalization[CTR_DRBG_ENTROPY_LEN] = {0};
|
||||
size_t personalization_len = 0;
|
||||
#if defined(OPENSSL_URANDOM)
|
||||
// If we used RDRAND, also opportunistically read from the system. This
|
||||
// avoids solely relying on the hardware once the entropy pool has been
|
||||
// initialized.
|
||||
if (used_cpu &&
|
||||
CRYPTO_sysrand_if_available(personalization, sizeof(personalization))) {
|
||||
personalization_len = sizeof(personalization);
|
||||
}
|
||||
#endif
|
||||
|
||||
if (!CTR_DRBG_init(&state->drbg, seed, personalization,
|
||||
personalization_len)) {
|
||||
abort();
|
||||
}
|
||||
state->calls = 0;
|
||||
|
@ -315,7 +393,8 @@ void RAND_bytes_with_additional_data(uint8_t *out, size_t out_len,
|
|||
if (state->calls >= kReseedInterval ||
|
||||
state->fork_generation != fork_generation) {
|
||||
uint8_t seed[CTR_DRBG_ENTROPY_LEN];
|
||||
rand_get_seed(state, seed);
|
||||
int used_cpu;
|
||||
rand_get_seed(state, seed, &used_cpu);
|
||||
#if defined(BORINGSSL_FIPS)
|
||||
// Take a read lock around accesses to |state->drbg|. This is needed to
|
||||
// avoid returning bad entropy if we race with
|
||||
|
@ -325,7 +404,7 @@ void RAND_bytes_with_additional_data(uint8_t *out, size_t out_len,
|
|||
// bug on ppc64le. glibc may implement pthread locks by wrapping user code
|
||||
// in a hardware transaction, but, on some older versions of glibc and the
|
||||
// kernel, syscalls made with |syscall| did not abort the transaction.
|
||||
CRYPTO_STATIC_MUTEX_lock_read(thread_states_list_lock_bss_get());
|
||||
CRYPTO_STATIC_MUTEX_lock_read(state_clear_all_lock_bss_get());
|
||||
#endif
|
||||
if (!CTR_DRBG_reseed(&state->drbg, seed, NULL, 0)) {
|
||||
abort();
|
||||
|
@ -334,7 +413,7 @@ void RAND_bytes_with_additional_data(uint8_t *out, size_t out_len,
|
|||
state->fork_generation = fork_generation;
|
||||
} else {
|
||||
#if defined(BORINGSSL_FIPS)
|
||||
CRYPTO_STATIC_MUTEX_lock_read(thread_states_list_lock_bss_get());
|
||||
CRYPTO_STATIC_MUTEX_lock_read(state_clear_all_lock_bss_get());
|
||||
#endif
|
||||
}
|
||||
|
||||
|
@ -363,7 +442,7 @@ void RAND_bytes_with_additional_data(uint8_t *out, size_t out_len,
|
|||
}
|
||||
|
||||
#if defined(BORINGSSL_FIPS)
|
||||
CRYPTO_STATIC_MUTEX_unlock_read(thread_states_list_lock_bss_get());
|
||||
CRYPTO_STATIC_MUTEX_unlock_read(state_clear_all_lock_bss_get());
|
||||
#endif
|
||||
}
|
||||
|
||||
|
|
|
@ -62,6 +62,15 @@
|
|||
#include <sys/random.h>
|
||||
#endif
|
||||
|
||||
#if defined(OPENSSL_FREEBSD)
|
||||
#define URANDOM_BLOCKS_FOR_ENTROPY
|
||||
#if __FreeBSD__ >= 12
|
||||
// getrandom is supported in FreeBSD 12 and up.
|
||||
#define FREEBSD_GETRANDOM
|
||||
#include <sys/random.h>
|
||||
#endif
|
||||
#endif
|
||||
|
||||
#include <CBigNumBoringSSL_thread.h>
|
||||
#include <CBigNumBoringSSL_mem.h>
|
||||
|
||||
|
@ -95,17 +104,10 @@ static ssize_t boringssl_getrandom(void *buf, size_t buf_len, unsigned flags) {
|
|||
|
||||
#endif // USE_NR_getrandom
|
||||
|
||||
// rand_lock is used to protect the |*_requested| variables.
|
||||
DEFINE_STATIC_MUTEX(rand_lock)
|
||||
|
||||
// The following constants are magic values of |urandom_fd|.
|
||||
static const int kUnset = 0;
|
||||
// kHaveGetrandom in |urandom_fd| signals that |getrandom| or |getentropy| is
|
||||
// available and should be used instead.
|
||||
static const int kHaveGetrandom = -3;
|
||||
|
||||
// urandom_fd_requested is set by |RAND_set_urandom_fd|. It's protected by
|
||||
// |rand_lock|.
|
||||
DEFINE_BSS_GET(int, urandom_fd_requested)
|
||||
|
||||
// urandom_fd is a file descriptor to /dev/urandom. It's protected by |once|.
|
||||
DEFINE_BSS_GET(int, urandom_fd)
|
||||
|
||||
|
@ -144,14 +146,9 @@ static void maybe_set_extra_getrandom_flags(void) {
|
|||
DEFINE_STATIC_ONCE(rand_once)
|
||||
|
||||
// init_once initializes the state of this module to values previously
|
||||
// requested. This is the only function that modifies |urandom_fd| and
|
||||
// |urandom_buffering|, whose values may be read safely after calling the
|
||||
// once.
|
||||
// requested. This is the only function that modifies |urandom_fd|, which may be
|
||||
// read safely after calling the once.
|
||||
static void init_once(void) {
|
||||
CRYPTO_STATIC_MUTEX_lock_read(rand_lock_bss_get());
|
||||
int fd = *urandom_fd_requested_bss_get();
|
||||
CRYPTO_STATIC_MUTEX_unlock_read(rand_lock_bss_get());
|
||||
|
||||
#if defined(USE_NR_getrandom)
|
||||
int have_getrandom;
|
||||
uint8_t dummy;
|
||||
|
@ -188,37 +185,27 @@ static void init_once(void) {
|
|||
}
|
||||
#endif
|
||||
|
||||
#if defined(FREEBSD_GETRANDOM)
|
||||
*urandom_fd_bss_get() = kHaveGetrandom;
|
||||
return;
|
||||
#endif
|
||||
|
||||
// Android FIPS builds must support getrandom.
|
||||
#if defined(BORINGSSL_FIPS) && defined(OPENSSL_ANDROID)
|
||||
perror("getrandom not found");
|
||||
abort();
|
||||
#endif
|
||||
|
||||
if (fd == kUnset) {
|
||||
do {
|
||||
fd = open("/dev/urandom", O_RDONLY);
|
||||
} while (fd == -1 && errno == EINTR);
|
||||
}
|
||||
int fd;
|
||||
do {
|
||||
fd = open("/dev/urandom", O_RDONLY);
|
||||
} while (fd == -1 && errno == EINTR);
|
||||
|
||||
if (fd < 0) {
|
||||
perror("failed to open /dev/urandom");
|
||||
abort();
|
||||
}
|
||||
|
||||
assert(kUnset == 0);
|
||||
if (fd == kUnset) {
|
||||
// Because we want to keep |urandom_fd| in the BSS, we have to initialise
|
||||
// it to zero. But zero is a valid file descriptor too. Thus if open
|
||||
// returns zero for /dev/urandom, we dup it to get a non-zero number.
|
||||
fd = dup(fd);
|
||||
close(kUnset);
|
||||
|
||||
if (fd <= 0) {
|
||||
perror("failed to dup /dev/urandom fd");
|
||||
abort();
|
||||
}
|
||||
}
|
||||
|
||||
int flags = fcntl(fd, F_GETFD);
|
||||
if (flags == -1) {
|
||||
// Native Client doesn't implement |fcntl|.
|
||||
|
@ -283,11 +270,11 @@ static void wait_for_entropy(void) {
|
|||
return;
|
||||
}
|
||||
|
||||
#if defined(BORINGSSL_FIPS)
|
||||
// In FIPS mode we ensure that the kernel has sufficient entropy before
|
||||
// continuing. This is automatically handled by getrandom, which requires
|
||||
// that the entropy pool has been initialised, but for urandom we have to
|
||||
// poll.
|
||||
#if defined(BORINGSSL_FIPS) && !defined(URANDOM_BLOCKS_FOR_ENTROPY)
|
||||
// In FIPS mode on platforms where urandom doesn't block at startup, we ensure
|
||||
// that the kernel has sufficient entropy before continuing. This is
|
||||
// automatically handled by getrandom, which requires that the entropy pool
|
||||
// has been initialised, but for urandom we have to poll.
|
||||
for (;;) {
|
||||
int entropy_bits;
|
||||
if (ioctl(fd, RNDGETENTCNT, &entropy_bits)) {
|
||||
|
@ -304,41 +291,7 @@ static void wait_for_entropy(void) {
|
|||
|
||||
usleep(250000);
|
||||
}
|
||||
#endif // BORINGSSL_FIPS
|
||||
}
|
||||
|
||||
void RAND_set_urandom_fd(int fd) {
|
||||
fd = dup(fd);
|
||||
if (fd < 0) {
|
||||
perror("failed to dup supplied urandom fd");
|
||||
abort();
|
||||
}
|
||||
|
||||
assert(kUnset == 0);
|
||||
if (fd == kUnset) {
|
||||
// Because we want to keep |urandom_fd| in the BSS, we have to initialise
|
||||
// it to zero. But zero is a valid file descriptor too. Thus if dup
|
||||
// returned zero we dup it again to get a non-zero number.
|
||||
fd = dup(fd);
|
||||
close(kUnset);
|
||||
|
||||
if (fd <= 0) {
|
||||
perror("failed to dup supplied urandom fd");
|
||||
abort();
|
||||
}
|
||||
}
|
||||
|
||||
CRYPTO_STATIC_MUTEX_lock_write(rand_lock_bss_get());
|
||||
*urandom_fd_requested_bss_get() = fd;
|
||||
CRYPTO_STATIC_MUTEX_unlock_write(rand_lock_bss_get());
|
||||
|
||||
CRYPTO_once(rand_once_bss_get(), init_once);
|
||||
if (*urandom_fd_bss_get() == kHaveGetrandom) {
|
||||
close(fd);
|
||||
} else if (*urandom_fd_bss_get() != fd) {
|
||||
fprintf(stderr, "RAND_set_urandom_fd called after initialisation.\n");
|
||||
abort();
|
||||
}
|
||||
#endif // BORINGSSL_FIPS && !URANDOM_BLOCKS_FOR_ENTROPY
|
||||
}
|
||||
|
||||
// fill_with_entropy writes |len| bytes of entropy into |out|. It returns one
|
||||
|
@ -352,17 +305,20 @@ static int fill_with_entropy(uint8_t *out, size_t len, int block, int seed) {
|
|||
return 1;
|
||||
}
|
||||
|
||||
#if defined(USE_NR_getrandom)
|
||||
#if defined(USE_NR_getrandom) || defined(FREEBSD_GETRANDOM)
|
||||
int getrandom_flags = 0;
|
||||
if (!block) {
|
||||
getrandom_flags |= GRND_NONBLOCK;
|
||||
}
|
||||
#endif
|
||||
|
||||
#if defined (USE_NR_getrandom)
|
||||
if (seed) {
|
||||
getrandom_flags |= *extra_getrandom_flags_for_seed_bss_get();
|
||||
}
|
||||
#endif
|
||||
|
||||
CRYPTO_once(rand_once_bss_get(), init_once);
|
||||
CRYPTO_init_sysrand();
|
||||
if (block) {
|
||||
CRYPTO_once(wait_for_entropy_once_bss_get(), wait_for_entropy);
|
||||
}
|
||||
|
@ -376,6 +332,8 @@ static int fill_with_entropy(uint8_t *out, size_t len, int block, int seed) {
|
|||
if (*urandom_fd_bss_get() == kHaveGetrandom) {
|
||||
#if defined(USE_NR_getrandom)
|
||||
r = boringssl_getrandom(out, len, getrandom_flags);
|
||||
#elif defined(FREEBSD_GETRANDOM)
|
||||
r = getrandom(out, len, getrandom_flags);
|
||||
#elif defined(OPENSSL_MACOS)
|
||||
if (__builtin_available(macos 10.12, *)) {
|
||||
// |getentropy| can only request 256 bytes at a time.
|
||||
|
@ -409,6 +367,10 @@ static int fill_with_entropy(uint8_t *out, size_t len, int block, int seed) {
|
|||
return 1;
|
||||
}
|
||||
|
||||
void CRYPTO_init_sysrand(void) {
|
||||
CRYPTO_once(rand_once_bss_get(), init_once);
|
||||
}
|
||||
|
||||
// CRYPTO_sysrand puts |requested| random bytes into |out|.
|
||||
void CRYPTO_sysrand(uint8_t *out, size_t requested) {
|
||||
if (!fill_with_entropy(out, requested, /*block=*/1, /*seed=*/0)) {
|
||||
|
@ -417,22 +379,13 @@ void CRYPTO_sysrand(uint8_t *out, size_t requested) {
|
|||
}
|
||||
}
|
||||
|
||||
#if defined(BORINGSSL_FIPS)
|
||||
void CRYPTO_sysrand_for_seed(uint8_t *out, size_t requested) {
|
||||
if (!fill_with_entropy(out, requested, /*block=*/1, /*seed=*/1)) {
|
||||
perror("entropy fill failed");
|
||||
abort();
|
||||
}
|
||||
|
||||
#if defined(BORINGSSL_FIPS_BREAK_CRNG)
|
||||
// This breaks the "continuous random number generator test" defined in FIPS
|
||||
// 140-2, section 4.9.2, and implemented in rand_get_seed().
|
||||
OPENSSL_memset(out, 0, requested);
|
||||
#endif
|
||||
}
|
||||
|
||||
#endif // BORINGSSL_FIPS
|
||||
|
||||
int CRYPTO_sysrand_if_available(uint8_t *out, size_t requested) {
|
||||
if (fill_with_entropy(out, requested, /*block=*/0, /*seed=*/0)) {
|
||||
return 1;
|
||||
|
|
|
@ -1,7 +1,7 @@
|
|||
#define BORINGSSL_PREFIX CBigNumBoringSSL
|
||||
#if defined(__x86_64__) && defined(__linux__)
|
||||
# This file is generated from a similarly-named Perl script in the BoringSSL
|
||||
# source tree. Do not edit by hand.
|
||||
// This file is generated from a similarly-named Perl script in the BoringSSL
|
||||
// source tree. Do not edit by hand.
|
||||
|
||||
#if defined(__has_feature)
|
||||
#if __has_feature(memory_sanitizer) && !defined(OPENSSL_NO_ASM)
|
||||
|
|
|
@ -1,7 +1,7 @@
|
|||
#define BORINGSSL_PREFIX CBigNumBoringSSL
|
||||
#if defined(__x86_64__) && defined(__APPLE__)
|
||||
# This file is generated from a similarly-named Perl script in the BoringSSL
|
||||
# source tree. Do not edit by hand.
|
||||
// This file is generated from a similarly-named Perl script in the BoringSSL
|
||||
// source tree. Do not edit by hand.
|
||||
|
||||
#if defined(__has_feature)
|
||||
#if __has_feature(memory_sanitizer) && !defined(OPENSSL_NO_ASM)
|
||||
|
|
|
@ -1,7 +1,7 @@
|
|||
#define BORINGSSL_PREFIX CBigNumBoringSSL
|
||||
#if defined(__x86_64__) && defined(__linux__)
|
||||
# This file is generated from a similarly-named Perl script in the BoringSSL
|
||||
# source tree. Do not edit by hand.
|
||||
// This file is generated from a similarly-named Perl script in the BoringSSL
|
||||
// source tree. Do not edit by hand.
|
||||
|
||||
#if defined(__has_feature)
|
||||
#if __has_feature(memory_sanitizer) && !defined(OPENSSL_NO_ASM)
|
||||
|
|
|
@ -1,7 +1,7 @@
|
|||
#define BORINGSSL_PREFIX CBigNumBoringSSL
|
||||
#if defined(__x86_64__) && defined(__APPLE__)
|
||||
# This file is generated from a similarly-named Perl script in the BoringSSL
|
||||
# source tree. Do not edit by hand.
|
||||
// This file is generated from a similarly-named Perl script in the BoringSSL
|
||||
// source tree. Do not edit by hand.
|
||||
|
||||
#if defined(__has_feature)
|
||||
#if __has_feature(memory_sanitizer) && !defined(OPENSSL_NO_ASM)
|
||||
|
|
|
@ -1,7 +1,7 @@
|
|||
#define BORINGSSL_PREFIX CBigNumBoringSSL
|
||||
#if defined(__i386__) && defined(__linux__)
|
||||
# This file is generated from a similarly-named Perl script in the BoringSSL
|
||||
# source tree. Do not edit by hand.
|
||||
// This file is generated from a similarly-named Perl script in the BoringSSL
|
||||
// source tree. Do not edit by hand.
|
||||
|
||||
#if defined(__i386__)
|
||||
#if defined(BORINGSSL_PREFIX)
|
||||
|
|
|
@ -19,11 +19,14 @@
|
|||
.text
|
||||
|
||||
|
||||
.private_extern _OPENSSL_armcap_P
|
||||
.globl _sha1_block_data_order
|
||||
.private_extern _sha1_block_data_order
|
||||
|
||||
.align 6
|
||||
_sha1_block_data_order:
|
||||
// Armv8.3-A PAuth: even though x30 is pushed to stack it is not popped later.
|
||||
AARCH64_VALID_CALL_TARGET
|
||||
#if __has_feature(hwaddress_sanitizer) && __clang_major__ >= 10
|
||||
adrp x16,:pg_hi21_nc:_OPENSSL_armcap_P
|
||||
#else
|
||||
|
@ -1091,6 +1094,8 @@ Loop:
|
|||
|
||||
.align 6
|
||||
sha1_block_armv8:
|
||||
// Armv8.3-A PAuth: even though x30 is pushed to stack it is not popped later.
|
||||
AARCH64_VALID_CALL_TARGET
|
||||
Lv8_entry:
|
||||
stp x29,x30,[sp,#-16]!
|
||||
add x29,sp,#0
|
||||
|
@ -1229,8 +1234,6 @@ Lconst:
|
|||
.byte 83,72,65,49,32,98,108,111,99,107,32,116,114,97,110,115,102,111,114,109,32,102,111,114,32,65,82,77,118,56,44,32,67,82,89,80,84,79,71,65,77,83,32,98,121,32,60,97,112,112,114,111,64,111,112,101,110,115,115,108,46,111,114,103,62,0
|
||||
.align 2
|
||||
.align 2
|
||||
.comm _OPENSSL_armcap_P,4,4
|
||||
.private_extern _OPENSSL_armcap_P
|
||||
#endif // !OPENSSL_NO_ASM
|
||||
#endif // defined(__aarch64__) && defined(__APPLE__)
|
||||
#if defined(__linux__) && defined(__ELF__)
|
||||
|
|
|
@ -20,11 +20,14 @@
|
|||
.text
|
||||
|
||||
|
||||
.hidden OPENSSL_armcap_P
|
||||
.globl sha1_block_data_order
|
||||
.hidden sha1_block_data_order
|
||||
.type sha1_block_data_order,%function
|
||||
.align 6
|
||||
sha1_block_data_order:
|
||||
// Armv8.3-A PAuth: even though x30 is pushed to stack it is not popped later.
|
||||
AARCH64_VALID_CALL_TARGET
|
||||
#if __has_feature(hwaddress_sanitizer) && __clang_major__ >= 10
|
||||
adrp x16,:pg_hi21_nc:OPENSSL_armcap_P
|
||||
#else
|
||||
|
@ -1092,6 +1095,8 @@ sha1_block_data_order:
|
|||
.type sha1_block_armv8,%function
|
||||
.align 6
|
||||
sha1_block_armv8:
|
||||
// Armv8.3-A PAuth: even though x30 is pushed to stack it is not popped later.
|
||||
AARCH64_VALID_CALL_TARGET
|
||||
.Lv8_entry:
|
||||
stp x29,x30,[sp,#-16]!
|
||||
add x29,sp,#0
|
||||
|
@ -1230,8 +1235,6 @@ sha1_block_armv8:
|
|||
.byte 83,72,65,49,32,98,108,111,99,107,32,116,114,97,110,115,102,111,114,109,32,102,111,114,32,65,82,77,118,56,44,32,67,82,89,80,84,79,71,65,77,83,32,98,121,32,60,97,112,112,114,111,64,111,112,101,110,115,115,108,46,111,114,103,62,0
|
||||
.align 2
|
||||
.align 2
|
||||
.comm OPENSSL_armcap_P,4,4
|
||||
.hidden OPENSSL_armcap_P
|
||||
#endif
|
||||
#endif // !OPENSSL_NO_ASM
|
||||
.section .note.GNU-stack,"",%progbits
|
||||
|
|
File diff suppressed because it is too large
Load Diff
File diff suppressed because it is too large
Load Diff
|
@ -1,7 +1,7 @@
|
|||
#define BORINGSSL_PREFIX CBigNumBoringSSL
|
||||
#if defined(__i386__) && defined(__linux__)
|
||||
# This file is generated from a similarly-named Perl script in the BoringSSL
|
||||
# source tree. Do not edit by hand.
|
||||
// This file is generated from a similarly-named Perl script in the BoringSSL
|
||||
// source tree. Do not edit by hand.
|
||||
|
||||
#if defined(__i386__)
|
||||
#if defined(BORINGSSL_PREFIX)
|
||||
|
|
|
@ -14,7 +14,7 @@
|
|||
#if defined(BORINGSSL_PREFIX)
|
||||
#include <CBigNumBoringSSL_boringssl_prefix_symbols_asm.h>
|
||||
#endif
|
||||
// Copyright 2014-2016 The OpenSSL Project Authors. All Rights Reserved.
|
||||
// Copyright 2014-2020 The OpenSSL Project Authors. All Rights Reserved.
|
||||
//
|
||||
// Licensed under the OpenSSL license (the "License"). You may not use
|
||||
// this file except in compliance with the License. You can obtain a copy
|
||||
|
@ -42,6 +42,7 @@
|
|||
// Denver 2.01 10.5 (+26%) 6.70 (+8%)
|
||||
// X-Gene 20.0 (+100%) 12.8 (+300%(***))
|
||||
// Mongoose 2.36 13.0 (+50%) 8.36 (+33%)
|
||||
// Kryo 1.92 17.4 (+30%) 11.2 (+8%)
|
||||
//
|
||||
// (*) Software SHA256 results are of lesser relevance, presented
|
||||
// mostly for informational purposes.
|
||||
|
@ -50,7 +51,7 @@
|
|||
// on Cortex-A53 (or by 4 cycles per round).
|
||||
// (***) Super-impressive coefficients over gcc-generated code are
|
||||
// indication of some compiler "pathology", most notably code
|
||||
// generated with -mgeneral-regs-only is significanty faster
|
||||
// generated with -mgeneral-regs-only is significantly faster
|
||||
// and the gap is only 40-90%.
|
||||
|
||||
#ifndef __KERNEL__
|
||||
|
@ -60,11 +61,13 @@
|
|||
.text
|
||||
|
||||
|
||||
.private_extern _OPENSSL_armcap_P
|
||||
.globl _sha256_block_data_order
|
||||
.private_extern _sha256_block_data_order
|
||||
|
||||
.align 6
|
||||
_sha256_block_data_order:
|
||||
AARCH64_VALID_CALL_TARGET
|
||||
#ifndef __KERNEL__
|
||||
#if __has_feature(hwaddress_sanitizer) && __clang_major__ >= 10
|
||||
adrp x16,:pg_hi21_nc:_OPENSSL_armcap_P
|
||||
|
@ -75,6 +78,7 @@ _sha256_block_data_order:
|
|||
tst w16,#ARMV8_SHA256
|
||||
b.ne Lv8_entry
|
||||
#endif
|
||||
AARCH64_SIGN_LINK_REGISTER
|
||||
stp x29,x30,[sp,#-128]!
|
||||
add x29,sp,#0
|
||||
|
||||
|
@ -99,7 +103,7 @@ Loop:
|
|||
ldr w19,[x30],#4 // *K++
|
||||
eor w28,w21,w22 // magic seed
|
||||
str x1,[x29,#112]
|
||||
#ifndef __ARMEB__
|
||||
#ifndef __AARCH64EB__
|
||||
rev w3,w3 // 0
|
||||
#endif
|
||||
ror w16,w24,#6
|
||||
|
@ -122,7 +126,7 @@ Loop:
|
|||
add w27,w27,w28 // h+=Maj(a,b,c)
|
||||
ldr w28,[x30],#4 // *K++, w19 in next round
|
||||
//add w27,w27,w17 // h+=Sigma0(a)
|
||||
#ifndef __ARMEB__
|
||||
#ifndef __AARCH64EB__
|
||||
rev w4,w4 // 1
|
||||
#endif
|
||||
ldp w5,w6,[x1],#2*4
|
||||
|
@ -147,7 +151,7 @@ Loop:
|
|||
add w26,w26,w19 // h+=Maj(a,b,c)
|
||||
ldr w19,[x30],#4 // *K++, w28 in next round
|
||||
//add w26,w26,w17 // h+=Sigma0(a)
|
||||
#ifndef __ARMEB__
|
||||
#ifndef __AARCH64EB__
|
||||
rev w5,w5 // 2
|
||||
#endif
|
||||
add w26,w26,w17 // h+=Sigma0(a)
|
||||
|
@ -171,7 +175,7 @@ Loop:
|
|||
add w25,w25,w28 // h+=Maj(a,b,c)
|
||||
ldr w28,[x30],#4 // *K++, w19 in next round
|
||||
//add w25,w25,w17 // h+=Sigma0(a)
|
||||
#ifndef __ARMEB__
|
||||
#ifndef __AARCH64EB__
|
||||
rev w6,w6 // 3
|
||||
#endif
|
||||
ldp w7,w8,[x1],#2*4
|
||||
|
@ -196,7 +200,7 @@ Loop:
|
|||
add w24,w24,w19 // h+=Maj(a,b,c)
|
||||
ldr w19,[x30],#4 // *K++, w28 in next round
|
||||
//add w24,w24,w17 // h+=Sigma0(a)
|
||||
#ifndef __ARMEB__
|
||||
#ifndef __AARCH64EB__
|
||||
rev w7,w7 // 4
|
||||
#endif
|
||||
add w24,w24,w17 // h+=Sigma0(a)
|
||||
|
@ -220,7 +224,7 @@ Loop:
|
|||
add w23,w23,w28 // h+=Maj(a,b,c)
|
||||
ldr w28,[x30],#4 // *K++, w19 in next round
|
||||
//add w23,w23,w17 // h+=Sigma0(a)
|
||||
#ifndef __ARMEB__
|
||||
#ifndef __AARCH64EB__
|
||||
rev w8,w8 // 5
|
||||
#endif
|
||||
ldp w9,w10,[x1],#2*4
|
||||
|
@ -245,7 +249,7 @@ Loop:
|
|||
add w22,w22,w19 // h+=Maj(a,b,c)
|
||||
ldr w19,[x30],#4 // *K++, w28 in next round
|
||||
//add w22,w22,w17 // h+=Sigma0(a)
|
||||
#ifndef __ARMEB__
|
||||
#ifndef __AARCH64EB__
|
||||
rev w9,w9 // 6
|
||||
#endif
|
||||
add w22,w22,w17 // h+=Sigma0(a)
|
||||
|
@ -269,7 +273,7 @@ Loop:
|
|||
add w21,w21,w28 // h+=Maj(a,b,c)
|
||||
ldr w28,[x30],#4 // *K++, w19 in next round
|
||||
//add w21,w21,w17 // h+=Sigma0(a)
|
||||
#ifndef __ARMEB__
|
||||
#ifndef __AARCH64EB__
|
||||
rev w10,w10 // 7
|
||||
#endif
|
||||
ldp w11,w12,[x1],#2*4
|
||||
|
@ -294,7 +298,7 @@ Loop:
|
|||
add w20,w20,w19 // h+=Maj(a,b,c)
|
||||
ldr w19,[x30],#4 // *K++, w28 in next round
|
||||
//add w20,w20,w17 // h+=Sigma0(a)
|
||||
#ifndef __ARMEB__
|
||||
#ifndef __AARCH64EB__
|
||||
rev w11,w11 // 8
|
||||
#endif
|
||||
add w20,w20,w17 // h+=Sigma0(a)
|
||||
|
@ -318,7 +322,7 @@ Loop:
|
|||
add w27,w27,w28 // h+=Maj(a,b,c)
|
||||
ldr w28,[x30],#4 // *K++, w19 in next round
|
||||
//add w27,w27,w17 // h+=Sigma0(a)
|
||||
#ifndef __ARMEB__
|
||||
#ifndef __AARCH64EB__
|
||||
rev w12,w12 // 9
|
||||
#endif
|
||||
ldp w13,w14,[x1],#2*4
|
||||
|
@ -343,7 +347,7 @@ Loop:
|
|||
add w26,w26,w19 // h+=Maj(a,b,c)
|
||||
ldr w19,[x30],#4 // *K++, w28 in next round
|
||||
//add w26,w26,w17 // h+=Sigma0(a)
|
||||
#ifndef __ARMEB__
|
||||
#ifndef __AARCH64EB__
|
||||
rev w13,w13 // 10
|
||||
#endif
|
||||
add w26,w26,w17 // h+=Sigma0(a)
|
||||
|
@ -367,7 +371,7 @@ Loop:
|
|||
add w25,w25,w28 // h+=Maj(a,b,c)
|
||||
ldr w28,[x30],#4 // *K++, w19 in next round
|
||||
//add w25,w25,w17 // h+=Sigma0(a)
|
||||
#ifndef __ARMEB__
|
||||
#ifndef __AARCH64EB__
|
||||
rev w14,w14 // 11
|
||||
#endif
|
||||
ldp w15,w0,[x1],#2*4
|
||||
|
@ -393,7 +397,7 @@ Loop:
|
|||
add w24,w24,w19 // h+=Maj(a,b,c)
|
||||
ldr w19,[x30],#4 // *K++, w28 in next round
|
||||
//add w24,w24,w17 // h+=Sigma0(a)
|
||||
#ifndef __ARMEB__
|
||||
#ifndef __AARCH64EB__
|
||||
rev w15,w15 // 12
|
||||
#endif
|
||||
add w24,w24,w17 // h+=Sigma0(a)
|
||||
|
@ -418,7 +422,7 @@ Loop:
|
|||
add w23,w23,w28 // h+=Maj(a,b,c)
|
||||
ldr w28,[x30],#4 // *K++, w19 in next round
|
||||
//add w23,w23,w17 // h+=Sigma0(a)
|
||||
#ifndef __ARMEB__
|
||||
#ifndef __AARCH64EB__
|
||||
rev w0,w0 // 13
|
||||
#endif
|
||||
ldp w1,w2,[x1]
|
||||
|
@ -444,7 +448,7 @@ Loop:
|
|||
add w22,w22,w19 // h+=Maj(a,b,c)
|
||||
ldr w19,[x30],#4 // *K++, w28 in next round
|
||||
//add w22,w22,w17 // h+=Sigma0(a)
|
||||
#ifndef __ARMEB__
|
||||
#ifndef __AARCH64EB__
|
||||
rev w1,w1 // 14
|
||||
#endif
|
||||
ldr w6,[sp,#12]
|
||||
|
@ -470,7 +474,7 @@ Loop:
|
|||
add w21,w21,w28 // h+=Maj(a,b,c)
|
||||
ldr w28,[x30],#4 // *K++, w19 in next round
|
||||
//add w21,w21,w17 // h+=Sigma0(a)
|
||||
#ifndef __ARMEB__
|
||||
#ifndef __AARCH64EB__
|
||||
rev w2,w2 // 15
|
||||
#endif
|
||||
ldr w7,[sp,#0]
|
||||
|
@ -1035,6 +1039,7 @@ Loop_16_xx:
|
|||
ldp x25,x26,[x29,#64]
|
||||
ldp x27,x28,[x29,#80]
|
||||
ldp x29,x30,[sp],#128
|
||||
AARCH64_VALIDATE_LINK_REGISTER
|
||||
ret
|
||||
|
||||
|
||||
|
@ -1069,6 +1074,7 @@ LK256:
|
|||
.align 6
|
||||
sha256_block_armv8:
|
||||
Lv8_entry:
|
||||
// Armv8.3-A PAuth: even though x30 is pushed to stack it is not popped later.
|
||||
stp x29,x30,[sp,#-16]!
|
||||
add x29,sp,#0
|
||||
|
||||
|
@ -1204,10 +1210,6 @@ Loop_hw:
|
|||
ldr x29,[sp],#16
|
||||
ret
|
||||
|
||||
#endif
|
||||
#ifndef __KERNEL__
|
||||
.comm _OPENSSL_armcap_P,4,4
|
||||
.private_extern _OPENSSL_armcap_P
|
||||
#endif
|
||||
#endif // !OPENSSL_NO_ASM
|
||||
#endif // defined(__aarch64__) && defined(__APPLE__)
|
||||
|
|
|
@ -15,7 +15,7 @@
|
|||
#if defined(BORINGSSL_PREFIX)
|
||||
#include <CBigNumBoringSSL_boringssl_prefix_symbols_asm.h>
|
||||
#endif
|
||||
// Copyright 2014-2016 The OpenSSL Project Authors. All Rights Reserved.
|
||||
// Copyright 2014-2020 The OpenSSL Project Authors. All Rights Reserved.
|
||||
//
|
||||
// Licensed under the OpenSSL license (the "License"). You may not use
|
||||
// this file except in compliance with the License. You can obtain a copy
|
||||
|
@ -43,6 +43,7 @@
|
|||
// Denver 2.01 10.5 (+26%) 6.70 (+8%)
|
||||
// X-Gene 20.0 (+100%) 12.8 (+300%(***))
|
||||
// Mongoose 2.36 13.0 (+50%) 8.36 (+33%)
|
||||
// Kryo 1.92 17.4 (+30%) 11.2 (+8%)
|
||||
//
|
||||
// (*) Software SHA256 results are of lesser relevance, presented
|
||||
// mostly for informational purposes.
|
||||
|
@ -51,7 +52,7 @@
|
|||
// on Cortex-A53 (or by 4 cycles per round).
|
||||
// (***) Super-impressive coefficients over gcc-generated code are
|
||||
// indication of some compiler "pathology", most notably code
|
||||
// generated with -mgeneral-regs-only is significanty faster
|
||||
// generated with -mgeneral-regs-only is significantly faster
|
||||
// and the gap is only 40-90%.
|
||||
|
||||
#ifndef __KERNEL__
|
||||
|
@ -61,11 +62,13 @@
|
|||
.text
|
||||
|
||||
|
||||
.hidden OPENSSL_armcap_P
|
||||
.globl sha256_block_data_order
|
||||
.hidden sha256_block_data_order
|
||||
.type sha256_block_data_order,%function
|
||||
.align 6
|
||||
sha256_block_data_order:
|
||||
AARCH64_VALID_CALL_TARGET
|
||||
#ifndef __KERNEL__
|
||||
#if __has_feature(hwaddress_sanitizer) && __clang_major__ >= 10
|
||||
adrp x16,:pg_hi21_nc:OPENSSL_armcap_P
|
||||
|
@ -76,6 +79,7 @@ sha256_block_data_order:
|
|||
tst w16,#ARMV8_SHA256
|
||||
b.ne .Lv8_entry
|
||||
#endif
|
||||
AARCH64_SIGN_LINK_REGISTER
|
||||
stp x29,x30,[sp,#-128]!
|
||||
add x29,sp,#0
|
||||
|
||||
|
@ -100,7 +104,7 @@ sha256_block_data_order:
|
|||
ldr w19,[x30],#4 // *K++
|
||||
eor w28,w21,w22 // magic seed
|
||||
str x1,[x29,#112]
|
||||
#ifndef __ARMEB__
|
||||
#ifndef __AARCH64EB__
|
||||
rev w3,w3 // 0
|
||||
#endif
|
||||
ror w16,w24,#6
|
||||
|
@ -123,7 +127,7 @@ sha256_block_data_order:
|
|||
add w27,w27,w28 // h+=Maj(a,b,c)
|
||||
ldr w28,[x30],#4 // *K++, w19 in next round
|
||||
//add w27,w27,w17 // h+=Sigma0(a)
|
||||
#ifndef __ARMEB__
|
||||
#ifndef __AARCH64EB__
|
||||
rev w4,w4 // 1
|
||||
#endif
|
||||
ldp w5,w6,[x1],#2*4
|
||||
|
@ -148,7 +152,7 @@ sha256_block_data_order:
|
|||
add w26,w26,w19 // h+=Maj(a,b,c)
|
||||
ldr w19,[x30],#4 // *K++, w28 in next round
|
||||
//add w26,w26,w17 // h+=Sigma0(a)
|
||||
#ifndef __ARMEB__
|
||||
#ifndef __AARCH64EB__
|
||||
rev w5,w5 // 2
|
||||
#endif
|
||||
add w26,w26,w17 // h+=Sigma0(a)
|
||||
|
@ -172,7 +176,7 @@ sha256_block_data_order:
|
|||
add w25,w25,w28 // h+=Maj(a,b,c)
|
||||
ldr w28,[x30],#4 // *K++, w19 in next round
|
||||
//add w25,w25,w17 // h+=Sigma0(a)
|
||||
#ifndef __ARMEB__
|
||||
#ifndef __AARCH64EB__
|
||||
rev w6,w6 // 3
|
||||
#endif
|
||||
ldp w7,w8,[x1],#2*4
|
||||
|
@ -197,7 +201,7 @@ sha256_block_data_order:
|
|||
add w24,w24,w19 // h+=Maj(a,b,c)
|
||||
ldr w19,[x30],#4 // *K++, w28 in next round
|
||||
//add w24,w24,w17 // h+=Sigma0(a)
|
||||
#ifndef __ARMEB__
|
||||
#ifndef __AARCH64EB__
|
||||
rev w7,w7 // 4
|
||||
#endif
|
||||
add w24,w24,w17 // h+=Sigma0(a)
|
||||
|
@ -221,7 +225,7 @@ sha256_block_data_order:
|
|||
add w23,w23,w28 // h+=Maj(a,b,c)
|
||||
ldr w28,[x30],#4 // *K++, w19 in next round
|
||||
//add w23,w23,w17 // h+=Sigma0(a)
|
||||
#ifndef __ARMEB__
|
||||
#ifndef __AARCH64EB__
|
||||
rev w8,w8 // 5
|
||||
#endif
|
||||
ldp w9,w10,[x1],#2*4
|
||||
|
@ -246,7 +250,7 @@ sha256_block_data_order:
|
|||
add w22,w22,w19 // h+=Maj(a,b,c)
|
||||
ldr w19,[x30],#4 // *K++, w28 in next round
|
||||
//add w22,w22,w17 // h+=Sigma0(a)
|
||||
#ifndef __ARMEB__
|
||||
#ifndef __AARCH64EB__
|
||||
rev w9,w9 // 6
|
||||
#endif
|
||||
add w22,w22,w17 // h+=Sigma0(a)
|
||||
|
@ -270,7 +274,7 @@ sha256_block_data_order:
|
|||
add w21,w21,w28 // h+=Maj(a,b,c)
|
||||
ldr w28,[x30],#4 // *K++, w19 in next round
|
||||
//add w21,w21,w17 // h+=Sigma0(a)
|
||||
#ifndef __ARMEB__
|
||||
#ifndef __AARCH64EB__
|
||||
rev w10,w10 // 7
|
||||
#endif
|
||||
ldp w11,w12,[x1],#2*4
|
||||
|
@ -295,7 +299,7 @@ sha256_block_data_order:
|
|||
add w20,w20,w19 // h+=Maj(a,b,c)
|
||||
ldr w19,[x30],#4 // *K++, w28 in next round
|
||||
//add w20,w20,w17 // h+=Sigma0(a)
|
||||
#ifndef __ARMEB__
|
||||
#ifndef __AARCH64EB__
|
||||
rev w11,w11 // 8
|
||||
#endif
|
||||
add w20,w20,w17 // h+=Sigma0(a)
|
||||
|
@ -319,7 +323,7 @@ sha256_block_data_order:
|
|||
add w27,w27,w28 // h+=Maj(a,b,c)
|
||||
ldr w28,[x30],#4 // *K++, w19 in next round
|
||||
//add w27,w27,w17 // h+=Sigma0(a)
|
||||
#ifndef __ARMEB__
|
||||
#ifndef __AARCH64EB__
|
||||
rev w12,w12 // 9
|
||||
#endif
|
||||
ldp w13,w14,[x1],#2*4
|
||||
|
@ -344,7 +348,7 @@ sha256_block_data_order:
|
|||
add w26,w26,w19 // h+=Maj(a,b,c)
|
||||
ldr w19,[x30],#4 // *K++, w28 in next round
|
||||
//add w26,w26,w17 // h+=Sigma0(a)
|
||||
#ifndef __ARMEB__
|
||||
#ifndef __AARCH64EB__
|
||||
rev w13,w13 // 10
|
||||
#endif
|
||||
add w26,w26,w17 // h+=Sigma0(a)
|
||||
|
@ -368,7 +372,7 @@ sha256_block_data_order:
|
|||
add w25,w25,w28 // h+=Maj(a,b,c)
|
||||
ldr w28,[x30],#4 // *K++, w19 in next round
|
||||
//add w25,w25,w17 // h+=Sigma0(a)
|
||||
#ifndef __ARMEB__
|
||||
#ifndef __AARCH64EB__
|
||||
rev w14,w14 // 11
|
||||
#endif
|
||||
ldp w15,w0,[x1],#2*4
|
||||
|
@ -394,7 +398,7 @@ sha256_block_data_order:
|
|||
add w24,w24,w19 // h+=Maj(a,b,c)
|
||||
ldr w19,[x30],#4 // *K++, w28 in next round
|
||||
//add w24,w24,w17 // h+=Sigma0(a)
|
||||
#ifndef __ARMEB__
|
||||
#ifndef __AARCH64EB__
|
||||
rev w15,w15 // 12
|
||||
#endif
|
||||
add w24,w24,w17 // h+=Sigma0(a)
|
||||
|
@ -419,7 +423,7 @@ sha256_block_data_order:
|
|||
add w23,w23,w28 // h+=Maj(a,b,c)
|
||||
ldr w28,[x30],#4 // *K++, w19 in next round
|
||||
//add w23,w23,w17 // h+=Sigma0(a)
|
||||
#ifndef __ARMEB__
|
||||
#ifndef __AARCH64EB__
|
||||
rev w0,w0 // 13
|
||||
#endif
|
||||
ldp w1,w2,[x1]
|
||||
|
@ -445,7 +449,7 @@ sha256_block_data_order:
|
|||
add w22,w22,w19 // h+=Maj(a,b,c)
|
||||
ldr w19,[x30],#4 // *K++, w28 in next round
|
||||
//add w22,w22,w17 // h+=Sigma0(a)
|
||||
#ifndef __ARMEB__
|
||||
#ifndef __AARCH64EB__
|
||||
rev w1,w1 // 14
|
||||
#endif
|
||||
ldr w6,[sp,#12]
|
||||
|
@ -471,7 +475,7 @@ sha256_block_data_order:
|
|||
add w21,w21,w28 // h+=Maj(a,b,c)
|
||||
ldr w28,[x30],#4 // *K++, w19 in next round
|
||||
//add w21,w21,w17 // h+=Sigma0(a)
|
||||
#ifndef __ARMEB__
|
||||
#ifndef __AARCH64EB__
|
||||
rev w2,w2 // 15
|
||||
#endif
|
||||
ldr w7,[sp,#0]
|
||||
|
@ -1036,6 +1040,7 @@ sha256_block_data_order:
|
|||
ldp x25,x26,[x29,#64]
|
||||
ldp x27,x28,[x29,#80]
|
||||
ldp x29,x30,[sp],#128
|
||||
AARCH64_VALIDATE_LINK_REGISTER
|
||||
ret
|
||||
.size sha256_block_data_order,.-sha256_block_data_order
|
||||
|
||||
|
@ -1070,6 +1075,7 @@ sha256_block_data_order:
|
|||
.align 6
|
||||
sha256_block_armv8:
|
||||
.Lv8_entry:
|
||||
// Armv8.3-A PAuth: even though x30 is pushed to stack it is not popped later.
|
||||
stp x29,x30,[sp,#-16]!
|
||||
add x29,sp,#0
|
||||
|
||||
|
@ -1206,10 +1212,6 @@ sha256_block_armv8:
|
|||
ret
|
||||
.size sha256_block_armv8,.-sha256_block_armv8
|
||||
#endif
|
||||
#ifndef __KERNEL__
|
||||
.comm OPENSSL_armcap_P,4,4
|
||||
.hidden OPENSSL_armcap_P
|
||||
#endif
|
||||
#endif
|
||||
#endif // !OPENSSL_NO_ASM
|
||||
.section .note.GNU-stack,"",%progbits
|
||||
|
|
|
@ -1,7 +1,7 @@
|
|||
#define BORINGSSL_PREFIX CBigNumBoringSSL
|
||||
#if defined(__x86_64__) && defined(__linux__)
|
||||
# This file is generated from a similarly-named Perl script in the BoringSSL
|
||||
# source tree. Do not edit by hand.
|
||||
// This file is generated from a similarly-named Perl script in the BoringSSL
|
||||
// source tree. Do not edit by hand.
|
||||
|
||||
#if defined(__has_feature)
|
||||
#if __has_feature(memory_sanitizer) && !defined(OPENSSL_NO_ASM)
|
||||
|
|
|
@ -1,7 +1,7 @@
|
|||
#define BORINGSSL_PREFIX CBigNumBoringSSL
|
||||
#if defined(__x86_64__) && defined(__APPLE__)
|
||||
# This file is generated from a similarly-named Perl script in the BoringSSL
|
||||
# source tree. Do not edit by hand.
|
||||
// This file is generated from a similarly-named Perl script in the BoringSSL
|
||||
// source tree. Do not edit by hand.
|
||||
|
||||
#if defined(__has_feature)
|
||||
#if __has_feature(memory_sanitizer) && !defined(OPENSSL_NO_ASM)
|
||||
|
|
|
@ -1,7 +1,7 @@
|
|||
#define BORINGSSL_PREFIX CBigNumBoringSSL
|
||||
#if defined(__i386__) && defined(__linux__)
|
||||
# This file is generated from a similarly-named Perl script in the BoringSSL
|
||||
# source tree. Do not edit by hand.
|
||||
// This file is generated from a similarly-named Perl script in the BoringSSL
|
||||
// source tree. Do not edit by hand.
|
||||
|
||||
#if defined(__i386__)
|
||||
#if defined(BORINGSSL_PREFIX)
|
||||
|
|
|
@ -14,7 +14,7 @@
|
|||
#if defined(BORINGSSL_PREFIX)
|
||||
#include <CBigNumBoringSSL_boringssl_prefix_symbols_asm.h>
|
||||
#endif
|
||||
// Copyright 2014-2016 The OpenSSL Project Authors. All Rights Reserved.
|
||||
// Copyright 2014-2020 The OpenSSL Project Authors. All Rights Reserved.
|
||||
//
|
||||
// Licensed under the OpenSSL license (the "License"). You may not use
|
||||
// this file except in compliance with the License. You can obtain a copy
|
||||
|
@ -42,6 +42,7 @@
|
|||
// Denver 2.01 10.5 (+26%) 6.70 (+8%)
|
||||
// X-Gene 20.0 (+100%) 12.8 (+300%(***))
|
||||
// Mongoose 2.36 13.0 (+50%) 8.36 (+33%)
|
||||
// Kryo 1.92 17.4 (+30%) 11.2 (+8%)
|
||||
//
|
||||
// (*) Software SHA256 results are of lesser relevance, presented
|
||||
// mostly for informational purposes.
|
||||
|
@ -50,7 +51,7 @@
|
|||
// on Cortex-A53 (or by 4 cycles per round).
|
||||
// (***) Super-impressive coefficients over gcc-generated code are
|
||||
// indication of some compiler "pathology", most notably code
|
||||
// generated with -mgeneral-regs-only is significanty faster
|
||||
// generated with -mgeneral-regs-only is significantly faster
|
||||
// and the gap is only 40-90%.
|
||||
|
||||
#ifndef __KERNEL__
|
||||
|
@ -60,11 +61,24 @@
|
|||
.text
|
||||
|
||||
|
||||
.private_extern _OPENSSL_armcap_P
|
||||
.globl _sha512_block_data_order
|
||||
.private_extern _sha512_block_data_order
|
||||
|
||||
.align 6
|
||||
_sha512_block_data_order:
|
||||
AARCH64_VALID_CALL_TARGET
|
||||
#ifndef __KERNEL__
|
||||
#if __has_feature(hwaddress_sanitizer) && __clang_major__ >= 10
|
||||
adrp x16,:pg_hi21_nc:_OPENSSL_armcap_P
|
||||
#else
|
||||
adrp x16,_OPENSSL_armcap_P@PAGE
|
||||
#endif
|
||||
ldr w16,[x16,_OPENSSL_armcap_P@PAGEOFF]
|
||||
tst w16,#ARMV8_SHA512
|
||||
b.ne Lv8_entry
|
||||
#endif
|
||||
AARCH64_SIGN_LINK_REGISTER
|
||||
stp x29,x30,[sp,#-128]!
|
||||
add x29,sp,#0
|
||||
|
||||
|
@ -89,7 +103,7 @@ Loop:
|
|||
ldr x19,[x30],#8 // *K++
|
||||
eor x28,x21,x22 // magic seed
|
||||
str x1,[x29,#112]
|
||||
#ifndef __ARMEB__
|
||||
#ifndef __AARCH64EB__
|
||||
rev x3,x3 // 0
|
||||
#endif
|
||||
ror x16,x24,#14
|
||||
|
@ -112,7 +126,7 @@ Loop:
|
|||
add x27,x27,x28 // h+=Maj(a,b,c)
|
||||
ldr x28,[x30],#8 // *K++, x19 in next round
|
||||
//add x27,x27,x17 // h+=Sigma0(a)
|
||||
#ifndef __ARMEB__
|
||||
#ifndef __AARCH64EB__
|
||||
rev x4,x4 // 1
|
||||
#endif
|
||||
ldp x5,x6,[x1],#2*8
|
||||
|
@ -137,7 +151,7 @@ Loop:
|
|||
add x26,x26,x19 // h+=Maj(a,b,c)
|
||||
ldr x19,[x30],#8 // *K++, x28 in next round
|
||||
//add x26,x26,x17 // h+=Sigma0(a)
|
||||
#ifndef __ARMEB__
|
||||
#ifndef __AARCH64EB__
|
||||
rev x5,x5 // 2
|
||||
#endif
|
||||
add x26,x26,x17 // h+=Sigma0(a)
|
||||
|
@ -161,7 +175,7 @@ Loop:
|
|||
add x25,x25,x28 // h+=Maj(a,b,c)
|
||||
ldr x28,[x30],#8 // *K++, x19 in next round
|
||||
//add x25,x25,x17 // h+=Sigma0(a)
|
||||
#ifndef __ARMEB__
|
||||
#ifndef __AARCH64EB__
|
||||
rev x6,x6 // 3
|
||||
#endif
|
||||
ldp x7,x8,[x1],#2*8
|
||||
|
@ -186,7 +200,7 @@ Loop:
|
|||
add x24,x24,x19 // h+=Maj(a,b,c)
|
||||
ldr x19,[x30],#8 // *K++, x28 in next round
|
||||
//add x24,x24,x17 // h+=Sigma0(a)
|
||||
#ifndef __ARMEB__
|
||||
#ifndef __AARCH64EB__
|
||||
rev x7,x7 // 4
|
||||
#endif
|
||||
add x24,x24,x17 // h+=Sigma0(a)
|
||||
|
@ -210,7 +224,7 @@ Loop:
|
|||
add x23,x23,x28 // h+=Maj(a,b,c)
|
||||
ldr x28,[x30],#8 // *K++, x19 in next round
|
||||
//add x23,x23,x17 // h+=Sigma0(a)
|
||||
#ifndef __ARMEB__
|
||||
#ifndef __AARCH64EB__
|
||||
rev x8,x8 // 5
|
||||
#endif
|
||||
ldp x9,x10,[x1],#2*8
|
||||
|
@ -235,7 +249,7 @@ Loop:
|
|||
add x22,x22,x19 // h+=Maj(a,b,c)
|
||||
ldr x19,[x30],#8 // *K++, x28 in next round
|
||||
//add x22,x22,x17 // h+=Sigma0(a)
|
||||
#ifndef __ARMEB__
|
||||
#ifndef __AARCH64EB__
|
||||
rev x9,x9 // 6
|
||||
#endif
|
||||
add x22,x22,x17 // h+=Sigma0(a)
|
||||
|
@ -259,7 +273,7 @@ Loop:
|
|||
add x21,x21,x28 // h+=Maj(a,b,c)
|
||||
ldr x28,[x30],#8 // *K++, x19 in next round
|
||||
//add x21,x21,x17 // h+=Sigma0(a)
|
||||
#ifndef __ARMEB__
|
||||
#ifndef __AARCH64EB__
|
||||
rev x10,x10 // 7
|
||||
#endif
|
||||
ldp x11,x12,[x1],#2*8
|
||||
|
@ -284,7 +298,7 @@ Loop:
|
|||
add x20,x20,x19 // h+=Maj(a,b,c)
|
||||
ldr x19,[x30],#8 // *K++, x28 in next round
|
||||
//add x20,x20,x17 // h+=Sigma0(a)
|
||||
#ifndef __ARMEB__
|
||||
#ifndef __AARCH64EB__
|
||||
rev x11,x11 // 8
|
||||
#endif
|
||||
add x20,x20,x17 // h+=Sigma0(a)
|
||||
|
@ -308,7 +322,7 @@ Loop:
|
|||
add x27,x27,x28 // h+=Maj(a,b,c)
|
||||
ldr x28,[x30],#8 // *K++, x19 in next round
|
||||
//add x27,x27,x17 // h+=Sigma0(a)
|
||||
#ifndef __ARMEB__
|
||||
#ifndef __AARCH64EB__
|
||||
rev x12,x12 // 9
|
||||
#endif
|
||||
ldp x13,x14,[x1],#2*8
|
||||
|
@ -333,7 +347,7 @@ Loop:
|
|||
add x26,x26,x19 // h+=Maj(a,b,c)
|
||||
ldr x19,[x30],#8 // *K++, x28 in next round
|
||||
//add x26,x26,x17 // h+=Sigma0(a)
|
||||
#ifndef __ARMEB__
|
||||
#ifndef __AARCH64EB__
|
||||
rev x13,x13 // 10
|
||||
#endif
|
||||
add x26,x26,x17 // h+=Sigma0(a)
|
||||
|
@ -357,7 +371,7 @@ Loop:
|
|||
add x25,x25,x28 // h+=Maj(a,b,c)
|
||||
ldr x28,[x30],#8 // *K++, x19 in next round
|
||||
//add x25,x25,x17 // h+=Sigma0(a)
|
||||
#ifndef __ARMEB__
|
||||
#ifndef __AARCH64EB__
|
||||
rev x14,x14 // 11
|
||||
#endif
|
||||
ldp x15,x0,[x1],#2*8
|
||||
|
@ -383,7 +397,7 @@ Loop:
|
|||
add x24,x24,x19 // h+=Maj(a,b,c)
|
||||
ldr x19,[x30],#8 // *K++, x28 in next round
|
||||
//add x24,x24,x17 // h+=Sigma0(a)
|
||||
#ifndef __ARMEB__
|
||||
#ifndef __AARCH64EB__
|
||||
rev x15,x15 // 12
|
||||
#endif
|
||||
add x24,x24,x17 // h+=Sigma0(a)
|
||||
|
@ -408,7 +422,7 @@ Loop:
|
|||
add x23,x23,x28 // h+=Maj(a,b,c)
|
||||
ldr x28,[x30],#8 // *K++, x19 in next round
|
||||
//add x23,x23,x17 // h+=Sigma0(a)
|
||||
#ifndef __ARMEB__
|
||||
#ifndef __AARCH64EB__
|
||||
rev x0,x0 // 13
|
||||
#endif
|
||||
ldp x1,x2,[x1]
|
||||
|
@ -434,7 +448,7 @@ Loop:
|
|||
add x22,x22,x19 // h+=Maj(a,b,c)
|
||||
ldr x19,[x30],#8 // *K++, x28 in next round
|
||||
//add x22,x22,x17 // h+=Sigma0(a)
|
||||
#ifndef __ARMEB__
|
||||
#ifndef __AARCH64EB__
|
||||
rev x1,x1 // 14
|
||||
#endif
|
||||
ldr x6,[sp,#24]
|
||||
|
@ -460,7 +474,7 @@ Loop:
|
|||
add x21,x21,x28 // h+=Maj(a,b,c)
|
||||
ldr x28,[x30],#8 // *K++, x19 in next round
|
||||
//add x21,x21,x17 // h+=Sigma0(a)
|
||||
#ifndef __ARMEB__
|
||||
#ifndef __AARCH64EB__
|
||||
rev x2,x2 // 15
|
||||
#endif
|
||||
ldr x7,[sp,#0]
|
||||
|
@ -1025,6 +1039,7 @@ Loop_16_xx:
|
|||
ldp x25,x26,[x29,#64]
|
||||
ldp x27,x28,[x29,#80]
|
||||
ldp x29,x30,[sp],#128
|
||||
AARCH64_VALIDATE_LINK_REGISTER
|
||||
ret
|
||||
|
||||
|
||||
|
@ -1077,9 +1092,526 @@ LK512:
|
|||
.byte 83,72,65,53,49,50,32,98,108,111,99,107,32,116,114,97,110,115,102,111,114,109,32,102,111,114,32,65,82,77,118,56,44,32,67,82,89,80,84,79,71,65,77,83,32,98,121,32,60,97,112,112,114,111,64,111,112,101,110,115,115,108,46,111,114,103,62,0
|
||||
.align 2
|
||||
.align 2
|
||||
.text
|
||||
#ifndef __KERNEL__
|
||||
.comm _OPENSSL_armcap_P,4,4
|
||||
.private_extern _OPENSSL_armcap_P
|
||||
|
||||
.align 6
|
||||
sha512_block_armv8:
|
||||
Lv8_entry:
|
||||
stp x29,x30,[sp,#-16]!
|
||||
add x29,sp,#0
|
||||
|
||||
ld1 {v16.16b,v17.16b,v18.16b,v19.16b},[x1],#64 // load input
|
||||
ld1 {v20.16b,v21.16b,v22.16b,v23.16b},[x1],#64
|
||||
|
||||
ld1 {v0.2d,v1.2d,v2.2d,v3.2d},[x0] // load context
|
||||
adrp x3,LK512@PAGE
|
||||
add x3,x3,LK512@PAGEOFF
|
||||
|
||||
rev64 v16.16b,v16.16b
|
||||
rev64 v17.16b,v17.16b
|
||||
rev64 v18.16b,v18.16b
|
||||
rev64 v19.16b,v19.16b
|
||||
rev64 v20.16b,v20.16b
|
||||
rev64 v21.16b,v21.16b
|
||||
rev64 v22.16b,v22.16b
|
||||
rev64 v23.16b,v23.16b
|
||||
b Loop_hw
|
||||
|
||||
.align 4
|
||||
Loop_hw:
|
||||
ld1 {v24.2d},[x3],#16
|
||||
subs x2,x2,#1
|
||||
sub x4,x1,#128
|
||||
orr v26.16b,v0.16b,v0.16b // offload
|
||||
orr v27.16b,v1.16b,v1.16b
|
||||
orr v28.16b,v2.16b,v2.16b
|
||||
orr v29.16b,v3.16b,v3.16b
|
||||
csel x1,x1,x4,ne // conditional rewind
|
||||
add v24.2d,v24.2d,v16.2d
|
||||
ld1 {v25.2d},[x3],#16
|
||||
ext v24.16b,v24.16b,v24.16b,#8
|
||||
ext v5.16b,v2.16b,v3.16b,#8
|
||||
ext v6.16b,v1.16b,v2.16b,#8
|
||||
add v3.2d,v3.2d,v24.2d // "T1 + H + K512[i]"
|
||||
.long 0xcec08230 //sha512su0 v16.16b,v17.16b
|
||||
ext v7.16b,v20.16b,v21.16b,#8
|
||||
.long 0xce6680a3 //sha512h v3.16b,v5.16b,v6.16b
|
||||
.long 0xce678af0 //sha512su1 v16.16b,v23.16b,v7.16b
|
||||
add v4.2d,v1.2d,v3.2d // "D + T1"
|
||||
.long 0xce608423 //sha512h2 v3.16b,v1.16b,v0.16b
|
||||
add v25.2d,v25.2d,v17.2d
|
||||
ld1 {v24.2d},[x3],#16
|
||||
ext v25.16b,v25.16b,v25.16b,#8
|
||||
ext v5.16b,v4.16b,v2.16b,#8
|
||||
ext v6.16b,v0.16b,v4.16b,#8
|
||||
add v2.2d,v2.2d,v25.2d // "T1 + H + K512[i]"
|
||||
.long 0xcec08251 //sha512su0 v17.16b,v18.16b
|
||||
ext v7.16b,v21.16b,v22.16b,#8
|
||||
.long 0xce6680a2 //sha512h v2.16b,v5.16b,v6.16b
|
||||
.long 0xce678a11 //sha512su1 v17.16b,v16.16b,v7.16b
|
||||
add v1.2d,v0.2d,v2.2d // "D + T1"
|
||||
.long 0xce638402 //sha512h2 v2.16b,v0.16b,v3.16b
|
||||
add v24.2d,v24.2d,v18.2d
|
||||
ld1 {v25.2d},[x3],#16
|
||||
ext v24.16b,v24.16b,v24.16b,#8
|
||||
ext v5.16b,v1.16b,v4.16b,#8
|
||||
ext v6.16b,v3.16b,v1.16b,#8
|
||||
add v4.2d,v4.2d,v24.2d // "T1 + H + K512[i]"
|
||||
.long 0xcec08272 //sha512su0 v18.16b,v19.16b
|
||||
ext v7.16b,v22.16b,v23.16b,#8
|
||||
.long 0xce6680a4 //sha512h v4.16b,v5.16b,v6.16b
|
||||
.long 0xce678a32 //sha512su1 v18.16b,v17.16b,v7.16b
|
||||
add v0.2d,v3.2d,v4.2d // "D + T1"
|
||||
.long 0xce628464 //sha512h2 v4.16b,v3.16b,v2.16b
|
||||
add v25.2d,v25.2d,v19.2d
|
||||
ld1 {v24.2d},[x3],#16
|
||||
ext v25.16b,v25.16b,v25.16b,#8
|
||||
ext v5.16b,v0.16b,v1.16b,#8
|
||||
ext v6.16b,v2.16b,v0.16b,#8
|
||||
add v1.2d,v1.2d,v25.2d // "T1 + H + K512[i]"
|
||||
.long 0xcec08293 //sha512su0 v19.16b,v20.16b
|
||||
ext v7.16b,v23.16b,v16.16b,#8
|
||||
.long 0xce6680a1 //sha512h v1.16b,v5.16b,v6.16b
|
||||
.long 0xce678a53 //sha512su1 v19.16b,v18.16b,v7.16b
|
||||
add v3.2d,v2.2d,v1.2d // "D + T1"
|
||||
.long 0xce648441 //sha512h2 v1.16b,v2.16b,v4.16b
|
||||
add v24.2d,v24.2d,v20.2d
|
||||
ld1 {v25.2d},[x3],#16
|
||||
ext v24.16b,v24.16b,v24.16b,#8
|
||||
ext v5.16b,v3.16b,v0.16b,#8
|
||||
ext v6.16b,v4.16b,v3.16b,#8
|
||||
add v0.2d,v0.2d,v24.2d // "T1 + H + K512[i]"
|
||||
.long 0xcec082b4 //sha512su0 v20.16b,v21.16b
|
||||
ext v7.16b,v16.16b,v17.16b,#8
|
||||
.long 0xce6680a0 //sha512h v0.16b,v5.16b,v6.16b
|
||||
.long 0xce678a74 //sha512su1 v20.16b,v19.16b,v7.16b
|
||||
add v2.2d,v4.2d,v0.2d // "D + T1"
|
||||
.long 0xce618480 //sha512h2 v0.16b,v4.16b,v1.16b
|
||||
add v25.2d,v25.2d,v21.2d
|
||||
ld1 {v24.2d},[x3],#16
|
||||
ext v25.16b,v25.16b,v25.16b,#8
|
||||
ext v5.16b,v2.16b,v3.16b,#8
|
||||
ext v6.16b,v1.16b,v2.16b,#8
|
||||
add v3.2d,v3.2d,v25.2d // "T1 + H + K512[i]"
|
||||
.long 0xcec082d5 //sha512su0 v21.16b,v22.16b
|
||||
ext v7.16b,v17.16b,v18.16b,#8
|
||||
.long 0xce6680a3 //sha512h v3.16b,v5.16b,v6.16b
|
||||
.long 0xce678a95 //sha512su1 v21.16b,v20.16b,v7.16b
|
||||
add v4.2d,v1.2d,v3.2d // "D + T1"
|
||||
.long 0xce608423 //sha512h2 v3.16b,v1.16b,v0.16b
|
||||
add v24.2d,v24.2d,v22.2d
|
||||
ld1 {v25.2d},[x3],#16
|
||||
ext v24.16b,v24.16b,v24.16b,#8
|
||||
ext v5.16b,v4.16b,v2.16b,#8
|
||||
ext v6.16b,v0.16b,v4.16b,#8
|
||||
add v2.2d,v2.2d,v24.2d // "T1 + H + K512[i]"
|
||||
.long 0xcec082f6 //sha512su0 v22.16b,v23.16b
|
||||
ext v7.16b,v18.16b,v19.16b,#8
|
||||
.long 0xce6680a2 //sha512h v2.16b,v5.16b,v6.16b
|
||||
.long 0xce678ab6 //sha512su1 v22.16b,v21.16b,v7.16b
|
||||
add v1.2d,v0.2d,v2.2d // "D + T1"
|
||||
.long 0xce638402 //sha512h2 v2.16b,v0.16b,v3.16b
|
||||
add v25.2d,v25.2d,v23.2d
|
||||
ld1 {v24.2d},[x3],#16
|
||||
ext v25.16b,v25.16b,v25.16b,#8
|
||||
ext v5.16b,v1.16b,v4.16b,#8
|
||||
ext v6.16b,v3.16b,v1.16b,#8
|
||||
add v4.2d,v4.2d,v25.2d // "T1 + H + K512[i]"
|
||||
.long 0xcec08217 //sha512su0 v23.16b,v16.16b
|
||||
ext v7.16b,v19.16b,v20.16b,#8
|
||||
.long 0xce6680a4 //sha512h v4.16b,v5.16b,v6.16b
|
||||
.long 0xce678ad7 //sha512su1 v23.16b,v22.16b,v7.16b
|
||||
add v0.2d,v3.2d,v4.2d // "D + T1"
|
||||
.long 0xce628464 //sha512h2 v4.16b,v3.16b,v2.16b
|
||||
add v24.2d,v24.2d,v16.2d
|
||||
ld1 {v25.2d},[x3],#16
|
||||
ext v24.16b,v24.16b,v24.16b,#8
|
||||
ext v5.16b,v0.16b,v1.16b,#8
|
||||
ext v6.16b,v2.16b,v0.16b,#8
|
||||
add v1.2d,v1.2d,v24.2d // "T1 + H + K512[i]"
|
||||
.long 0xcec08230 //sha512su0 v16.16b,v17.16b
|
||||
ext v7.16b,v20.16b,v21.16b,#8
|
||||
.long 0xce6680a1 //sha512h v1.16b,v5.16b,v6.16b
|
||||
.long 0xce678af0 //sha512su1 v16.16b,v23.16b,v7.16b
|
||||
add v3.2d,v2.2d,v1.2d // "D + T1"
|
||||
.long 0xce648441 //sha512h2 v1.16b,v2.16b,v4.16b
|
||||
add v25.2d,v25.2d,v17.2d
|
||||
ld1 {v24.2d},[x3],#16
|
||||
ext v25.16b,v25.16b,v25.16b,#8
|
||||
ext v5.16b,v3.16b,v0.16b,#8
|
||||
ext v6.16b,v4.16b,v3.16b,#8
|
||||
add v0.2d,v0.2d,v25.2d // "T1 + H + K512[i]"
|
||||
.long 0xcec08251 //sha512su0 v17.16b,v18.16b
|
||||
ext v7.16b,v21.16b,v22.16b,#8
|
||||
.long 0xce6680a0 //sha512h v0.16b,v5.16b,v6.16b
|
||||
.long 0xce678a11 //sha512su1 v17.16b,v16.16b,v7.16b
|
||||
add v2.2d,v4.2d,v0.2d // "D + T1"
|
||||
.long 0xce618480 //sha512h2 v0.16b,v4.16b,v1.16b
|
||||
add v24.2d,v24.2d,v18.2d
|
||||
ld1 {v25.2d},[x3],#16
|
||||
ext v24.16b,v24.16b,v24.16b,#8
|
||||
ext v5.16b,v2.16b,v3.16b,#8
|
||||
ext v6.16b,v1.16b,v2.16b,#8
|
||||
add v3.2d,v3.2d,v24.2d // "T1 + H + K512[i]"
|
||||
.long 0xcec08272 //sha512su0 v18.16b,v19.16b
|
||||
ext v7.16b,v22.16b,v23.16b,#8
|
||||
.long 0xce6680a3 //sha512h v3.16b,v5.16b,v6.16b
|
||||
.long 0xce678a32 //sha512su1 v18.16b,v17.16b,v7.16b
|
||||
add v4.2d,v1.2d,v3.2d // "D + T1"
|
||||
.long 0xce608423 //sha512h2 v3.16b,v1.16b,v0.16b
|
||||
add v25.2d,v25.2d,v19.2d
|
||||
ld1 {v24.2d},[x3],#16
|
||||
ext v25.16b,v25.16b,v25.16b,#8
|
||||
ext v5.16b,v4.16b,v2.16b,#8
|
||||
ext v6.16b,v0.16b,v4.16b,#8
|
||||
add v2.2d,v2.2d,v25.2d // "T1 + H + K512[i]"
|
||||
.long 0xcec08293 //sha512su0 v19.16b,v20.16b
|
||||
ext v7.16b,v23.16b,v16.16b,#8
|
||||
.long 0xce6680a2 //sha512h v2.16b,v5.16b,v6.16b
|
||||
.long 0xce678a53 //sha512su1 v19.16b,v18.16b,v7.16b
|
||||
add v1.2d,v0.2d,v2.2d // "D + T1"
|
||||
.long 0xce638402 //sha512h2 v2.16b,v0.16b,v3.16b
|
||||
add v24.2d,v24.2d,v20.2d
|
||||
ld1 {v25.2d},[x3],#16
|
||||
ext v24.16b,v24.16b,v24.16b,#8
|
||||
ext v5.16b,v1.16b,v4.16b,#8
|
||||
ext v6.16b,v3.16b,v1.16b,#8
|
||||
add v4.2d,v4.2d,v24.2d // "T1 + H + K512[i]"
|
||||
.long 0xcec082b4 //sha512su0 v20.16b,v21.16b
|
||||
ext v7.16b,v16.16b,v17.16b,#8
|
||||
.long 0xce6680a4 //sha512h v4.16b,v5.16b,v6.16b
|
||||
.long 0xce678a74 //sha512su1 v20.16b,v19.16b,v7.16b
|
||||
add v0.2d,v3.2d,v4.2d // "D + T1"
|
||||
.long 0xce628464 //sha512h2 v4.16b,v3.16b,v2.16b
|
||||
add v25.2d,v25.2d,v21.2d
|
||||
ld1 {v24.2d},[x3],#16
|
||||
ext v25.16b,v25.16b,v25.16b,#8
|
||||
ext v5.16b,v0.16b,v1.16b,#8
|
||||
ext v6.16b,v2.16b,v0.16b,#8
|
||||
add v1.2d,v1.2d,v25.2d // "T1 + H + K512[i]"
|
||||
.long 0xcec082d5 //sha512su0 v21.16b,v22.16b
|
||||
ext v7.16b,v17.16b,v18.16b,#8
|
||||
.long 0xce6680a1 //sha512h v1.16b,v5.16b,v6.16b
|
||||
.long 0xce678a95 //sha512su1 v21.16b,v20.16b,v7.16b
|
||||
add v3.2d,v2.2d,v1.2d // "D + T1"
|
||||
.long 0xce648441 //sha512h2 v1.16b,v2.16b,v4.16b
|
||||
add v24.2d,v24.2d,v22.2d
|
||||
ld1 {v25.2d},[x3],#16
|
||||
ext v24.16b,v24.16b,v24.16b,#8
|
||||
ext v5.16b,v3.16b,v0.16b,#8
|
||||
ext v6.16b,v4.16b,v3.16b,#8
|
||||
add v0.2d,v0.2d,v24.2d // "T1 + H + K512[i]"
|
||||
.long 0xcec082f6 //sha512su0 v22.16b,v23.16b
|
||||
ext v7.16b,v18.16b,v19.16b,#8
|
||||
.long 0xce6680a0 //sha512h v0.16b,v5.16b,v6.16b
|
||||
.long 0xce678ab6 //sha512su1 v22.16b,v21.16b,v7.16b
|
||||
add v2.2d,v4.2d,v0.2d // "D + T1"
|
||||
.long 0xce618480 //sha512h2 v0.16b,v4.16b,v1.16b
|
||||
add v25.2d,v25.2d,v23.2d
|
||||
ld1 {v24.2d},[x3],#16
|
||||
ext v25.16b,v25.16b,v25.16b,#8
|
||||
ext v5.16b,v2.16b,v3.16b,#8
|
||||
ext v6.16b,v1.16b,v2.16b,#8
|
||||
add v3.2d,v3.2d,v25.2d // "T1 + H + K512[i]"
|
||||
.long 0xcec08217 //sha512su0 v23.16b,v16.16b
|
||||
ext v7.16b,v19.16b,v20.16b,#8
|
||||
.long 0xce6680a3 //sha512h v3.16b,v5.16b,v6.16b
|
||||
.long 0xce678ad7 //sha512su1 v23.16b,v22.16b,v7.16b
|
||||
add v4.2d,v1.2d,v3.2d // "D + T1"
|
||||
.long 0xce608423 //sha512h2 v3.16b,v1.16b,v0.16b
|
||||
add v24.2d,v24.2d,v16.2d
|
||||
ld1 {v25.2d},[x3],#16
|
||||
ext v24.16b,v24.16b,v24.16b,#8
|
||||
ext v5.16b,v4.16b,v2.16b,#8
|
||||
ext v6.16b,v0.16b,v4.16b,#8
|
||||
add v2.2d,v2.2d,v24.2d // "T1 + H + K512[i]"
|
||||
.long 0xcec08230 //sha512su0 v16.16b,v17.16b
|
||||
ext v7.16b,v20.16b,v21.16b,#8
|
||||
.long 0xce6680a2 //sha512h v2.16b,v5.16b,v6.16b
|
||||
.long 0xce678af0 //sha512su1 v16.16b,v23.16b,v7.16b
|
||||
add v1.2d,v0.2d,v2.2d // "D + T1"
|
||||
.long 0xce638402 //sha512h2 v2.16b,v0.16b,v3.16b
|
||||
add v25.2d,v25.2d,v17.2d
|
||||
ld1 {v24.2d},[x3],#16
|
||||
ext v25.16b,v25.16b,v25.16b,#8
|
||||
ext v5.16b,v1.16b,v4.16b,#8
|
||||
ext v6.16b,v3.16b,v1.16b,#8
|
||||
add v4.2d,v4.2d,v25.2d // "T1 + H + K512[i]"
|
||||
.long 0xcec08251 //sha512su0 v17.16b,v18.16b
|
||||
ext v7.16b,v21.16b,v22.16b,#8
|
||||
.long 0xce6680a4 //sha512h v4.16b,v5.16b,v6.16b
|
||||
.long 0xce678a11 //sha512su1 v17.16b,v16.16b,v7.16b
|
||||
add v0.2d,v3.2d,v4.2d // "D + T1"
|
||||
.long 0xce628464 //sha512h2 v4.16b,v3.16b,v2.16b
|
||||
add v24.2d,v24.2d,v18.2d
|
||||
ld1 {v25.2d},[x3],#16
|
||||
ext v24.16b,v24.16b,v24.16b,#8
|
||||
ext v5.16b,v0.16b,v1.16b,#8
|
||||
ext v6.16b,v2.16b,v0.16b,#8
|
||||
add v1.2d,v1.2d,v24.2d // "T1 + H + K512[i]"
|
||||
.long 0xcec08272 //sha512su0 v18.16b,v19.16b
|
||||
ext v7.16b,v22.16b,v23.16b,#8
|
||||
.long 0xce6680a1 //sha512h v1.16b,v5.16b,v6.16b
|
||||
.long 0xce678a32 //sha512su1 v18.16b,v17.16b,v7.16b
|
||||
add v3.2d,v2.2d,v1.2d // "D + T1"
|
||||
.long 0xce648441 //sha512h2 v1.16b,v2.16b,v4.16b
|
||||
add v25.2d,v25.2d,v19.2d
|
||||
ld1 {v24.2d},[x3],#16
|
||||
ext v25.16b,v25.16b,v25.16b,#8
|
||||
ext v5.16b,v3.16b,v0.16b,#8
|
||||
ext v6.16b,v4.16b,v3.16b,#8
|
||||
add v0.2d,v0.2d,v25.2d // "T1 + H + K512[i]"
|
||||
.long 0xcec08293 //sha512su0 v19.16b,v20.16b
|
||||
ext v7.16b,v23.16b,v16.16b,#8
|
||||
.long 0xce6680a0 //sha512h v0.16b,v5.16b,v6.16b
|
||||
.long 0xce678a53 //sha512su1 v19.16b,v18.16b,v7.16b
|
||||
add v2.2d,v4.2d,v0.2d // "D + T1"
|
||||
.long 0xce618480 //sha512h2 v0.16b,v4.16b,v1.16b
|
||||
add v24.2d,v24.2d,v20.2d
|
||||
ld1 {v25.2d},[x3],#16
|
||||
ext v24.16b,v24.16b,v24.16b,#8
|
||||
ext v5.16b,v2.16b,v3.16b,#8
|
||||
ext v6.16b,v1.16b,v2.16b,#8
|
||||
add v3.2d,v3.2d,v24.2d // "T1 + H + K512[i]"
|
||||
.long 0xcec082b4 //sha512su0 v20.16b,v21.16b
|
||||
ext v7.16b,v16.16b,v17.16b,#8
|
||||
.long 0xce6680a3 //sha512h v3.16b,v5.16b,v6.16b
|
||||
.long 0xce678a74 //sha512su1 v20.16b,v19.16b,v7.16b
|
||||
add v4.2d,v1.2d,v3.2d // "D + T1"
|
||||
.long 0xce608423 //sha512h2 v3.16b,v1.16b,v0.16b
|
||||
add v25.2d,v25.2d,v21.2d
|
||||
ld1 {v24.2d},[x3],#16
|
||||
ext v25.16b,v25.16b,v25.16b,#8
|
||||
ext v5.16b,v4.16b,v2.16b,#8
|
||||
ext v6.16b,v0.16b,v4.16b,#8
|
||||
add v2.2d,v2.2d,v25.2d // "T1 + H + K512[i]"
|
||||
.long 0xcec082d5 //sha512su0 v21.16b,v22.16b
|
||||
ext v7.16b,v17.16b,v18.16b,#8
|
||||
.long 0xce6680a2 //sha512h v2.16b,v5.16b,v6.16b
|
||||
.long 0xce678a95 //sha512su1 v21.16b,v20.16b,v7.16b
|
||||
add v1.2d,v0.2d,v2.2d // "D + T1"
|
||||
.long 0xce638402 //sha512h2 v2.16b,v0.16b,v3.16b
|
||||
add v24.2d,v24.2d,v22.2d
|
||||
ld1 {v25.2d},[x3],#16
|
||||
ext v24.16b,v24.16b,v24.16b,#8
|
||||
ext v5.16b,v1.16b,v4.16b,#8
|
||||
ext v6.16b,v3.16b,v1.16b,#8
|
||||
add v4.2d,v4.2d,v24.2d // "T1 + H + K512[i]"
|
||||
.long 0xcec082f6 //sha512su0 v22.16b,v23.16b
|
||||
ext v7.16b,v18.16b,v19.16b,#8
|
||||
.long 0xce6680a4 //sha512h v4.16b,v5.16b,v6.16b
|
||||
.long 0xce678ab6 //sha512su1 v22.16b,v21.16b,v7.16b
|
||||
add v0.2d,v3.2d,v4.2d // "D + T1"
|
||||
.long 0xce628464 //sha512h2 v4.16b,v3.16b,v2.16b
|
||||
add v25.2d,v25.2d,v23.2d
|
||||
ld1 {v24.2d},[x3],#16
|
||||
ext v25.16b,v25.16b,v25.16b,#8
|
||||
ext v5.16b,v0.16b,v1.16b,#8
|
||||
ext v6.16b,v2.16b,v0.16b,#8
|
||||
add v1.2d,v1.2d,v25.2d // "T1 + H + K512[i]"
|
||||
.long 0xcec08217 //sha512su0 v23.16b,v16.16b
|
||||
ext v7.16b,v19.16b,v20.16b,#8
|
||||
.long 0xce6680a1 //sha512h v1.16b,v5.16b,v6.16b
|
||||
.long 0xce678ad7 //sha512su1 v23.16b,v22.16b,v7.16b
|
||||
add v3.2d,v2.2d,v1.2d // "D + T1"
|
||||
.long 0xce648441 //sha512h2 v1.16b,v2.16b,v4.16b
|
||||
add v24.2d,v24.2d,v16.2d
|
||||
ld1 {v25.2d},[x3],#16
|
||||
ext v24.16b,v24.16b,v24.16b,#8
|
||||
ext v5.16b,v3.16b,v0.16b,#8
|
||||
ext v6.16b,v4.16b,v3.16b,#8
|
||||
add v0.2d,v0.2d,v24.2d // "T1 + H + K512[i]"
|
||||
.long 0xcec08230 //sha512su0 v16.16b,v17.16b
|
||||
ext v7.16b,v20.16b,v21.16b,#8
|
||||
.long 0xce6680a0 //sha512h v0.16b,v5.16b,v6.16b
|
||||
.long 0xce678af0 //sha512su1 v16.16b,v23.16b,v7.16b
|
||||
add v2.2d,v4.2d,v0.2d // "D + T1"
|
||||
.long 0xce618480 //sha512h2 v0.16b,v4.16b,v1.16b
|
||||
add v25.2d,v25.2d,v17.2d
|
||||
ld1 {v24.2d},[x3],#16
|
||||
ext v25.16b,v25.16b,v25.16b,#8
|
||||
ext v5.16b,v2.16b,v3.16b,#8
|
||||
ext v6.16b,v1.16b,v2.16b,#8
|
||||
add v3.2d,v3.2d,v25.2d // "T1 + H + K512[i]"
|
||||
.long 0xcec08251 //sha512su0 v17.16b,v18.16b
|
||||
ext v7.16b,v21.16b,v22.16b,#8
|
||||
.long 0xce6680a3 //sha512h v3.16b,v5.16b,v6.16b
|
||||
.long 0xce678a11 //sha512su1 v17.16b,v16.16b,v7.16b
|
||||
add v4.2d,v1.2d,v3.2d // "D + T1"
|
||||
.long 0xce608423 //sha512h2 v3.16b,v1.16b,v0.16b
|
||||
add v24.2d,v24.2d,v18.2d
|
||||
ld1 {v25.2d},[x3],#16
|
||||
ext v24.16b,v24.16b,v24.16b,#8
|
||||
ext v5.16b,v4.16b,v2.16b,#8
|
||||
ext v6.16b,v0.16b,v4.16b,#8
|
||||
add v2.2d,v2.2d,v24.2d // "T1 + H + K512[i]"
|
||||
.long 0xcec08272 //sha512su0 v18.16b,v19.16b
|
||||
ext v7.16b,v22.16b,v23.16b,#8
|
||||
.long 0xce6680a2 //sha512h v2.16b,v5.16b,v6.16b
|
||||
.long 0xce678a32 //sha512su1 v18.16b,v17.16b,v7.16b
|
||||
add v1.2d,v0.2d,v2.2d // "D + T1"
|
||||
.long 0xce638402 //sha512h2 v2.16b,v0.16b,v3.16b
|
||||
add v25.2d,v25.2d,v19.2d
|
||||
ld1 {v24.2d},[x3],#16
|
||||
ext v25.16b,v25.16b,v25.16b,#8
|
||||
ext v5.16b,v1.16b,v4.16b,#8
|
||||
ext v6.16b,v3.16b,v1.16b,#8
|
||||
add v4.2d,v4.2d,v25.2d // "T1 + H + K512[i]"
|
||||
.long 0xcec08293 //sha512su0 v19.16b,v20.16b
|
||||
ext v7.16b,v23.16b,v16.16b,#8
|
||||
.long 0xce6680a4 //sha512h v4.16b,v5.16b,v6.16b
|
||||
.long 0xce678a53 //sha512su1 v19.16b,v18.16b,v7.16b
|
||||
add v0.2d,v3.2d,v4.2d // "D + T1"
|
||||
.long 0xce628464 //sha512h2 v4.16b,v3.16b,v2.16b
|
||||
add v24.2d,v24.2d,v20.2d
|
||||
ld1 {v25.2d},[x3],#16
|
||||
ext v24.16b,v24.16b,v24.16b,#8
|
||||
ext v5.16b,v0.16b,v1.16b,#8
|
||||
ext v6.16b,v2.16b,v0.16b,#8
|
||||
add v1.2d,v1.2d,v24.2d // "T1 + H + K512[i]"
|
||||
.long 0xcec082b4 //sha512su0 v20.16b,v21.16b
|
||||
ext v7.16b,v16.16b,v17.16b,#8
|
||||
.long 0xce6680a1 //sha512h v1.16b,v5.16b,v6.16b
|
||||
.long 0xce678a74 //sha512su1 v20.16b,v19.16b,v7.16b
|
||||
add v3.2d,v2.2d,v1.2d // "D + T1"
|
||||
.long 0xce648441 //sha512h2 v1.16b,v2.16b,v4.16b
|
||||
add v25.2d,v25.2d,v21.2d
|
||||
ld1 {v24.2d},[x3],#16
|
||||
ext v25.16b,v25.16b,v25.16b,#8
|
||||
ext v5.16b,v3.16b,v0.16b,#8
|
||||
ext v6.16b,v4.16b,v3.16b,#8
|
||||
add v0.2d,v0.2d,v25.2d // "T1 + H + K512[i]"
|
||||
.long 0xcec082d5 //sha512su0 v21.16b,v22.16b
|
||||
ext v7.16b,v17.16b,v18.16b,#8
|
||||
.long 0xce6680a0 //sha512h v0.16b,v5.16b,v6.16b
|
||||
.long 0xce678a95 //sha512su1 v21.16b,v20.16b,v7.16b
|
||||
add v2.2d,v4.2d,v0.2d // "D + T1"
|
||||
.long 0xce618480 //sha512h2 v0.16b,v4.16b,v1.16b
|
||||
add v24.2d,v24.2d,v22.2d
|
||||
ld1 {v25.2d},[x3],#16
|
||||
ext v24.16b,v24.16b,v24.16b,#8
|
||||
ext v5.16b,v2.16b,v3.16b,#8
|
||||
ext v6.16b,v1.16b,v2.16b,#8
|
||||
add v3.2d,v3.2d,v24.2d // "T1 + H + K512[i]"
|
||||
.long 0xcec082f6 //sha512su0 v22.16b,v23.16b
|
||||
ext v7.16b,v18.16b,v19.16b,#8
|
||||
.long 0xce6680a3 //sha512h v3.16b,v5.16b,v6.16b
|
||||
.long 0xce678ab6 //sha512su1 v22.16b,v21.16b,v7.16b
|
||||
add v4.2d,v1.2d,v3.2d // "D + T1"
|
||||
.long 0xce608423 //sha512h2 v3.16b,v1.16b,v0.16b
|
||||
add v25.2d,v25.2d,v23.2d
|
||||
ld1 {v24.2d},[x3],#16
|
||||
ext v25.16b,v25.16b,v25.16b,#8
|
||||
ext v5.16b,v4.16b,v2.16b,#8
|
||||
ext v6.16b,v0.16b,v4.16b,#8
|
||||
add v2.2d,v2.2d,v25.2d // "T1 + H + K512[i]"
|
||||
.long 0xcec08217 //sha512su0 v23.16b,v16.16b
|
||||
ext v7.16b,v19.16b,v20.16b,#8
|
||||
.long 0xce6680a2 //sha512h v2.16b,v5.16b,v6.16b
|
||||
.long 0xce678ad7 //sha512su1 v23.16b,v22.16b,v7.16b
|
||||
add v1.2d,v0.2d,v2.2d // "D + T1"
|
||||
.long 0xce638402 //sha512h2 v2.16b,v0.16b,v3.16b
|
||||
ld1 {v25.2d},[x3],#16
|
||||
add v24.2d,v24.2d,v16.2d
|
||||
ld1 {v16.16b},[x1],#16 // load next input
|
||||
ext v24.16b,v24.16b,v24.16b,#8
|
||||
ext v5.16b,v1.16b,v4.16b,#8
|
||||
ext v6.16b,v3.16b,v1.16b,#8
|
||||
add v4.2d,v4.2d,v24.2d // "T1 + H + K512[i]"
|
||||
.long 0xce6680a4 //sha512h v4.16b,v5.16b,v6.16b
|
||||
rev64 v16.16b,v16.16b
|
||||
add v0.2d,v3.2d,v4.2d // "D + T1"
|
||||
.long 0xce628464 //sha512h2 v4.16b,v3.16b,v2.16b
|
||||
ld1 {v24.2d},[x3],#16
|
||||
add v25.2d,v25.2d,v17.2d
|
||||
ld1 {v17.16b},[x1],#16 // load next input
|
||||
ext v25.16b,v25.16b,v25.16b,#8
|
||||
ext v5.16b,v0.16b,v1.16b,#8
|
||||
ext v6.16b,v2.16b,v0.16b,#8
|
||||
add v1.2d,v1.2d,v25.2d // "T1 + H + K512[i]"
|
||||
.long 0xce6680a1 //sha512h v1.16b,v5.16b,v6.16b
|
||||
rev64 v17.16b,v17.16b
|
||||
add v3.2d,v2.2d,v1.2d // "D + T1"
|
||||
.long 0xce648441 //sha512h2 v1.16b,v2.16b,v4.16b
|
||||
ld1 {v25.2d},[x3],#16
|
||||
add v24.2d,v24.2d,v18.2d
|
||||
ld1 {v18.16b},[x1],#16 // load next input
|
||||
ext v24.16b,v24.16b,v24.16b,#8
|
||||
ext v5.16b,v3.16b,v0.16b,#8
|
||||
ext v6.16b,v4.16b,v3.16b,#8
|
||||
add v0.2d,v0.2d,v24.2d // "T1 + H + K512[i]"
|
||||
.long 0xce6680a0 //sha512h v0.16b,v5.16b,v6.16b
|
||||
rev64 v18.16b,v18.16b
|
||||
add v2.2d,v4.2d,v0.2d // "D + T1"
|
||||
.long 0xce618480 //sha512h2 v0.16b,v4.16b,v1.16b
|
||||
ld1 {v24.2d},[x3],#16
|
||||
add v25.2d,v25.2d,v19.2d
|
||||
ld1 {v19.16b},[x1],#16 // load next input
|
||||
ext v25.16b,v25.16b,v25.16b,#8
|
||||
ext v5.16b,v2.16b,v3.16b,#8
|
||||
ext v6.16b,v1.16b,v2.16b,#8
|
||||
add v3.2d,v3.2d,v25.2d // "T1 + H + K512[i]"
|
||||
.long 0xce6680a3 //sha512h v3.16b,v5.16b,v6.16b
|
||||
rev64 v19.16b,v19.16b
|
||||
add v4.2d,v1.2d,v3.2d // "D + T1"
|
||||
.long 0xce608423 //sha512h2 v3.16b,v1.16b,v0.16b
|
||||
ld1 {v25.2d},[x3],#16
|
||||
add v24.2d,v24.2d,v20.2d
|
||||
ld1 {v20.16b},[x1],#16 // load next input
|
||||
ext v24.16b,v24.16b,v24.16b,#8
|
||||
ext v5.16b,v4.16b,v2.16b,#8
|
||||
ext v6.16b,v0.16b,v4.16b,#8
|
||||
add v2.2d,v2.2d,v24.2d // "T1 + H + K512[i]"
|
||||
.long 0xce6680a2 //sha512h v2.16b,v5.16b,v6.16b
|
||||
rev64 v20.16b,v20.16b
|
||||
add v1.2d,v0.2d,v2.2d // "D + T1"
|
||||
.long 0xce638402 //sha512h2 v2.16b,v0.16b,v3.16b
|
||||
ld1 {v24.2d},[x3],#16
|
||||
add v25.2d,v25.2d,v21.2d
|
||||
ld1 {v21.16b},[x1],#16 // load next input
|
||||
ext v25.16b,v25.16b,v25.16b,#8
|
||||
ext v5.16b,v1.16b,v4.16b,#8
|
||||
ext v6.16b,v3.16b,v1.16b,#8
|
||||
add v4.2d,v4.2d,v25.2d // "T1 + H + K512[i]"
|
||||
.long 0xce6680a4 //sha512h v4.16b,v5.16b,v6.16b
|
||||
rev64 v21.16b,v21.16b
|
||||
add v0.2d,v3.2d,v4.2d // "D + T1"
|
||||
.long 0xce628464 //sha512h2 v4.16b,v3.16b,v2.16b
|
||||
ld1 {v25.2d},[x3],#16
|
||||
add v24.2d,v24.2d,v22.2d
|
||||
ld1 {v22.16b},[x1],#16 // load next input
|
||||
ext v24.16b,v24.16b,v24.16b,#8
|
||||
ext v5.16b,v0.16b,v1.16b,#8
|
||||
ext v6.16b,v2.16b,v0.16b,#8
|
||||
add v1.2d,v1.2d,v24.2d // "T1 + H + K512[i]"
|
||||
.long 0xce6680a1 //sha512h v1.16b,v5.16b,v6.16b
|
||||
rev64 v22.16b,v22.16b
|
||||
add v3.2d,v2.2d,v1.2d // "D + T1"
|
||||
.long 0xce648441 //sha512h2 v1.16b,v2.16b,v4.16b
|
||||
sub x3,x3,#80*8 // rewind
|
||||
add v25.2d,v25.2d,v23.2d
|
||||
ld1 {v23.16b},[x1],#16 // load next input
|
||||
ext v25.16b,v25.16b,v25.16b,#8
|
||||
ext v5.16b,v3.16b,v0.16b,#8
|
||||
ext v6.16b,v4.16b,v3.16b,#8
|
||||
add v0.2d,v0.2d,v25.2d // "T1 + H + K512[i]"
|
||||
.long 0xce6680a0 //sha512h v0.16b,v5.16b,v6.16b
|
||||
rev64 v23.16b,v23.16b
|
||||
add v2.2d,v4.2d,v0.2d // "D + T1"
|
||||
.long 0xce618480 //sha512h2 v0.16b,v4.16b,v1.16b
|
||||
add v0.2d,v0.2d,v26.2d // accumulate
|
||||
add v1.2d,v1.2d,v27.2d
|
||||
add v2.2d,v2.2d,v28.2d
|
||||
add v3.2d,v3.2d,v29.2d
|
||||
|
||||
cbnz x2,Loop_hw
|
||||
|
||||
st1 {v0.2d,v1.2d,v2.2d,v3.2d},[x0] // store context
|
||||
|
||||
ldr x29,[sp],#16
|
||||
ret
|
||||
|
||||
#endif
|
||||
#endif // !OPENSSL_NO_ASM
|
||||
#endif // defined(__aarch64__) && defined(__APPLE__)
|
||||
|
|
|
@ -15,7 +15,7 @@
|
|||
#if defined(BORINGSSL_PREFIX)
|
||||
#include <CBigNumBoringSSL_boringssl_prefix_symbols_asm.h>
|
||||
#endif
|
||||
// Copyright 2014-2016 The OpenSSL Project Authors. All Rights Reserved.
|
||||
// Copyright 2014-2020 The OpenSSL Project Authors. All Rights Reserved.
|
||||
//
|
||||
// Licensed under the OpenSSL license (the "License"). You may not use
|
||||
// this file except in compliance with the License. You can obtain a copy
|
||||
|
@ -43,6 +43,7 @@
|
|||
// Denver 2.01 10.5 (+26%) 6.70 (+8%)
|
||||
// X-Gene 20.0 (+100%) 12.8 (+300%(***))
|
||||
// Mongoose 2.36 13.0 (+50%) 8.36 (+33%)
|
||||
// Kryo 1.92 17.4 (+30%) 11.2 (+8%)
|
||||
//
|
||||
// (*) Software SHA256 results are of lesser relevance, presented
|
||||
// mostly for informational purposes.
|
||||
|
@ -51,7 +52,7 @@
|
|||
// on Cortex-A53 (or by 4 cycles per round).
|
||||
// (***) Super-impressive coefficients over gcc-generated code are
|
||||
// indication of some compiler "pathology", most notably code
|
||||
// generated with -mgeneral-regs-only is significanty faster
|
||||
// generated with -mgeneral-regs-only is significantly faster
|
||||
// and the gap is only 40-90%.
|
||||
|
||||
#ifndef __KERNEL__
|
||||
|
@ -61,11 +62,24 @@
|
|||
.text
|
||||
|
||||
|
||||
.hidden OPENSSL_armcap_P
|
||||
.globl sha512_block_data_order
|
||||
.hidden sha512_block_data_order
|
||||
.type sha512_block_data_order,%function
|
||||
.align 6
|
||||
sha512_block_data_order:
|
||||
AARCH64_VALID_CALL_TARGET
|
||||
#ifndef __KERNEL__
|
||||
#if __has_feature(hwaddress_sanitizer) && __clang_major__ >= 10
|
||||
adrp x16,:pg_hi21_nc:OPENSSL_armcap_P
|
||||
#else
|
||||
adrp x16,OPENSSL_armcap_P
|
||||
#endif
|
||||
ldr w16,[x16,:lo12:OPENSSL_armcap_P]
|
||||
tst w16,#ARMV8_SHA512
|
||||
b.ne .Lv8_entry
|
||||
#endif
|
||||
AARCH64_SIGN_LINK_REGISTER
|
||||
stp x29,x30,[sp,#-128]!
|
||||
add x29,sp,#0
|
||||
|
||||
|
@ -90,7 +104,7 @@ sha512_block_data_order:
|
|||
ldr x19,[x30],#8 // *K++
|
||||
eor x28,x21,x22 // magic seed
|
||||
str x1,[x29,#112]
|
||||
#ifndef __ARMEB__
|
||||
#ifndef __AARCH64EB__
|
||||
rev x3,x3 // 0
|
||||
#endif
|
||||
ror x16,x24,#14
|
||||
|
@ -113,7 +127,7 @@ sha512_block_data_order:
|
|||
add x27,x27,x28 // h+=Maj(a,b,c)
|
||||
ldr x28,[x30],#8 // *K++, x19 in next round
|
||||
//add x27,x27,x17 // h+=Sigma0(a)
|
||||
#ifndef __ARMEB__
|
||||
#ifndef __AARCH64EB__
|
||||
rev x4,x4 // 1
|
||||
#endif
|
||||
ldp x5,x6,[x1],#2*8
|
||||
|
@ -138,7 +152,7 @@ sha512_block_data_order:
|
|||
add x26,x26,x19 // h+=Maj(a,b,c)
|
||||
ldr x19,[x30],#8 // *K++, x28 in next round
|
||||
//add x26,x26,x17 // h+=Sigma0(a)
|
||||
#ifndef __ARMEB__
|
||||
#ifndef __AARCH64EB__
|
||||
rev x5,x5 // 2
|
||||
#endif
|
||||
add x26,x26,x17 // h+=Sigma0(a)
|
||||
|
@ -162,7 +176,7 @@ sha512_block_data_order:
|
|||
add x25,x25,x28 // h+=Maj(a,b,c)
|
||||
ldr x28,[x30],#8 // *K++, x19 in next round
|
||||
//add x25,x25,x17 // h+=Sigma0(a)
|
||||
#ifndef __ARMEB__
|
||||
#ifndef __AARCH64EB__
|
||||
rev x6,x6 // 3
|
||||
#endif
|
||||
ldp x7,x8,[x1],#2*8
|
||||
|
@ -187,7 +201,7 @@ sha512_block_data_order:
|
|||
add x24,x24,x19 // h+=Maj(a,b,c)
|
||||
ldr x19,[x30],#8 // *K++, x28 in next round
|
||||
//add x24,x24,x17 // h+=Sigma0(a)
|
||||
#ifndef __ARMEB__
|
||||
#ifndef __AARCH64EB__
|
||||
rev x7,x7 // 4
|
||||
#endif
|
||||
add x24,x24,x17 // h+=Sigma0(a)
|
||||
|
@ -211,7 +225,7 @@ sha512_block_data_order:
|
|||
add x23,x23,x28 // h+=Maj(a,b,c)
|
||||
ldr x28,[x30],#8 // *K++, x19 in next round
|
||||
//add x23,x23,x17 // h+=Sigma0(a)
|
||||
#ifndef __ARMEB__
|
||||
#ifndef __AARCH64EB__
|
||||
rev x8,x8 // 5
|
||||
#endif
|
||||
ldp x9,x10,[x1],#2*8
|
||||
|
@ -236,7 +250,7 @@ sha512_block_data_order:
|
|||
add x22,x22,x19 // h+=Maj(a,b,c)
|
||||
ldr x19,[x30],#8 // *K++, x28 in next round
|
||||
//add x22,x22,x17 // h+=Sigma0(a)
|
||||
#ifndef __ARMEB__
|
||||
#ifndef __AARCH64EB__
|
||||
rev x9,x9 // 6
|
||||
#endif
|
||||
add x22,x22,x17 // h+=Sigma0(a)
|
||||
|
@ -260,7 +274,7 @@ sha512_block_data_order:
|
|||
add x21,x21,x28 // h+=Maj(a,b,c)
|
||||
ldr x28,[x30],#8 // *K++, x19 in next round
|
||||
//add x21,x21,x17 // h+=Sigma0(a)
|
||||
#ifndef __ARMEB__
|
||||
#ifndef __AARCH64EB__
|
||||
rev x10,x10 // 7
|
||||
#endif
|
||||
ldp x11,x12,[x1],#2*8
|
||||
|
@ -285,7 +299,7 @@ sha512_block_data_order:
|
|||
add x20,x20,x19 // h+=Maj(a,b,c)
|
||||
ldr x19,[x30],#8 // *K++, x28 in next round
|
||||
//add x20,x20,x17 // h+=Sigma0(a)
|
||||
#ifndef __ARMEB__
|
||||
#ifndef __AARCH64EB__
|
||||
rev x11,x11 // 8
|
||||
#endif
|
||||
add x20,x20,x17 // h+=Sigma0(a)
|
||||
|
@ -309,7 +323,7 @@ sha512_block_data_order:
|
|||
add x27,x27,x28 // h+=Maj(a,b,c)
|
||||
ldr x28,[x30],#8 // *K++, x19 in next round
|
||||
//add x27,x27,x17 // h+=Sigma0(a)
|
||||
#ifndef __ARMEB__
|
||||
#ifndef __AARCH64EB__
|
||||
rev x12,x12 // 9
|
||||
#endif
|
||||
ldp x13,x14,[x1],#2*8
|
||||
|
@ -334,7 +348,7 @@ sha512_block_data_order:
|
|||
add x26,x26,x19 // h+=Maj(a,b,c)
|
||||
ldr x19,[x30],#8 // *K++, x28 in next round
|
||||
//add x26,x26,x17 // h+=Sigma0(a)
|
||||
#ifndef __ARMEB__
|
||||
#ifndef __AARCH64EB__
|
||||
rev x13,x13 // 10
|
||||
#endif
|
||||
add x26,x26,x17 // h+=Sigma0(a)
|
||||
|
@ -358,7 +372,7 @@ sha512_block_data_order:
|
|||
add x25,x25,x28 // h+=Maj(a,b,c)
|
||||
ldr x28,[x30],#8 // *K++, x19 in next round
|
||||
//add x25,x25,x17 // h+=Sigma0(a)
|
||||
#ifndef __ARMEB__
|
||||
#ifndef __AARCH64EB__
|
||||
rev x14,x14 // 11
|
||||
#endif
|
||||
ldp x15,x0,[x1],#2*8
|
||||
|
@ -384,7 +398,7 @@ sha512_block_data_order:
|
|||
add x24,x24,x19 // h+=Maj(a,b,c)
|
||||
ldr x19,[x30],#8 // *K++, x28 in next round
|
||||
//add x24,x24,x17 // h+=Sigma0(a)
|
||||
#ifndef __ARMEB__
|
||||
#ifndef __AARCH64EB__
|
||||
rev x15,x15 // 12
|
||||
#endif
|
||||
add x24,x24,x17 // h+=Sigma0(a)
|
||||
|
@ -409,7 +423,7 @@ sha512_block_data_order:
|
|||
add x23,x23,x28 // h+=Maj(a,b,c)
|
||||
ldr x28,[x30],#8 // *K++, x19 in next round
|
||||
//add x23,x23,x17 // h+=Sigma0(a)
|
||||
#ifndef __ARMEB__
|
||||
#ifndef __AARCH64EB__
|
||||
rev x0,x0 // 13
|
||||
#endif
|
||||
ldp x1,x2,[x1]
|
||||
|
@ -435,7 +449,7 @@ sha512_block_data_order:
|
|||
add x22,x22,x19 // h+=Maj(a,b,c)
|
||||
ldr x19,[x30],#8 // *K++, x28 in next round
|
||||
//add x22,x22,x17 // h+=Sigma0(a)
|
||||
#ifndef __ARMEB__
|
||||
#ifndef __AARCH64EB__
|
||||
rev x1,x1 // 14
|
||||
#endif
|
||||
ldr x6,[sp,#24]
|
||||
|
@ -461,7 +475,7 @@ sha512_block_data_order:
|
|||
add x21,x21,x28 // h+=Maj(a,b,c)
|
||||
ldr x28,[x30],#8 // *K++, x19 in next round
|
||||
//add x21,x21,x17 // h+=Sigma0(a)
|
||||
#ifndef __ARMEB__
|
||||
#ifndef __AARCH64EB__
|
||||
rev x2,x2 // 15
|
||||
#endif
|
||||
ldr x7,[sp,#0]
|
||||
|
@ -1026,6 +1040,7 @@ sha512_block_data_order:
|
|||
ldp x25,x26,[x29,#64]
|
||||
ldp x27,x28,[x29,#80]
|
||||
ldp x29,x30,[sp],#128
|
||||
AARCH64_VALIDATE_LINK_REGISTER
|
||||
ret
|
||||
.size sha512_block_data_order,.-sha512_block_data_order
|
||||
|
||||
|
@ -1078,9 +1093,526 @@ sha512_block_data_order:
|
|||
.byte 83,72,65,53,49,50,32,98,108,111,99,107,32,116,114,97,110,115,102,111,114,109,32,102,111,114,32,65,82,77,118,56,44,32,67,82,89,80,84,79,71,65,77,83,32,98,121,32,60,97,112,112,114,111,64,111,112,101,110,115,115,108,46,111,114,103,62,0
|
||||
.align 2
|
||||
.align 2
|
||||
.text
|
||||
#ifndef __KERNEL__
|
||||
.comm OPENSSL_armcap_P,4,4
|
||||
.hidden OPENSSL_armcap_P
|
||||
.type sha512_block_armv8,%function
|
||||
.align 6
|
||||
sha512_block_armv8:
|
||||
.Lv8_entry:
|
||||
stp x29,x30,[sp,#-16]!
|
||||
add x29,sp,#0
|
||||
|
||||
ld1 {v16.16b,v17.16b,v18.16b,v19.16b},[x1],#64 // load input
|
||||
ld1 {v20.16b,v21.16b,v22.16b,v23.16b},[x1],#64
|
||||
|
||||
ld1 {v0.2d,v1.2d,v2.2d,v3.2d},[x0] // load context
|
||||
adrp x3,.LK512
|
||||
add x3,x3,:lo12:.LK512
|
||||
|
||||
rev64 v16.16b,v16.16b
|
||||
rev64 v17.16b,v17.16b
|
||||
rev64 v18.16b,v18.16b
|
||||
rev64 v19.16b,v19.16b
|
||||
rev64 v20.16b,v20.16b
|
||||
rev64 v21.16b,v21.16b
|
||||
rev64 v22.16b,v22.16b
|
||||
rev64 v23.16b,v23.16b
|
||||
b .Loop_hw
|
||||
|
||||
.align 4
|
||||
.Loop_hw:
|
||||
ld1 {v24.2d},[x3],#16
|
||||
subs x2,x2,#1
|
||||
sub x4,x1,#128
|
||||
orr v26.16b,v0.16b,v0.16b // offload
|
||||
orr v27.16b,v1.16b,v1.16b
|
||||
orr v28.16b,v2.16b,v2.16b
|
||||
orr v29.16b,v3.16b,v3.16b
|
||||
csel x1,x1,x4,ne // conditional rewind
|
||||
add v24.2d,v24.2d,v16.2d
|
||||
ld1 {v25.2d},[x3],#16
|
||||
ext v24.16b,v24.16b,v24.16b,#8
|
||||
ext v5.16b,v2.16b,v3.16b,#8
|
||||
ext v6.16b,v1.16b,v2.16b,#8
|
||||
add v3.2d,v3.2d,v24.2d // "T1 + H + K512[i]"
|
||||
.inst 0xcec08230 //sha512su0 v16.16b,v17.16b
|
||||
ext v7.16b,v20.16b,v21.16b,#8
|
||||
.inst 0xce6680a3 //sha512h v3.16b,v5.16b,v6.16b
|
||||
.inst 0xce678af0 //sha512su1 v16.16b,v23.16b,v7.16b
|
||||
add v4.2d,v1.2d,v3.2d // "D + T1"
|
||||
.inst 0xce608423 //sha512h2 v3.16b,v1.16b,v0.16b
|
||||
add v25.2d,v25.2d,v17.2d
|
||||
ld1 {v24.2d},[x3],#16
|
||||
ext v25.16b,v25.16b,v25.16b,#8
|
||||
ext v5.16b,v4.16b,v2.16b,#8
|
||||
ext v6.16b,v0.16b,v4.16b,#8
|
||||
add v2.2d,v2.2d,v25.2d // "T1 + H + K512[i]"
|
||||
.inst 0xcec08251 //sha512su0 v17.16b,v18.16b
|
||||
ext v7.16b,v21.16b,v22.16b,#8
|
||||
.inst 0xce6680a2 //sha512h v2.16b,v5.16b,v6.16b
|
||||
.inst 0xce678a11 //sha512su1 v17.16b,v16.16b,v7.16b
|
||||
add v1.2d,v0.2d,v2.2d // "D + T1"
|
||||
.inst 0xce638402 //sha512h2 v2.16b,v0.16b,v3.16b
|
||||
add v24.2d,v24.2d,v18.2d
|
||||
ld1 {v25.2d},[x3],#16
|
||||
ext v24.16b,v24.16b,v24.16b,#8
|
||||
ext v5.16b,v1.16b,v4.16b,#8
|
||||
ext v6.16b,v3.16b,v1.16b,#8
|
||||
add v4.2d,v4.2d,v24.2d // "T1 + H + K512[i]"
|
||||
.inst 0xcec08272 //sha512su0 v18.16b,v19.16b
|
||||
ext v7.16b,v22.16b,v23.16b,#8
|
||||
.inst 0xce6680a4 //sha512h v4.16b,v5.16b,v6.16b
|
||||
.inst 0xce678a32 //sha512su1 v18.16b,v17.16b,v7.16b
|
||||
add v0.2d,v3.2d,v4.2d // "D + T1"
|
||||
.inst 0xce628464 //sha512h2 v4.16b,v3.16b,v2.16b
|
||||
add v25.2d,v25.2d,v19.2d
|
||||
ld1 {v24.2d},[x3],#16
|
||||
ext v25.16b,v25.16b,v25.16b,#8
|
||||
ext v5.16b,v0.16b,v1.16b,#8
|
||||
ext v6.16b,v2.16b,v0.16b,#8
|
||||
add v1.2d,v1.2d,v25.2d // "T1 + H + K512[i]"
|
||||
.inst 0xcec08293 //sha512su0 v19.16b,v20.16b
|
||||
ext v7.16b,v23.16b,v16.16b,#8
|
||||
.inst 0xce6680a1 //sha512h v1.16b,v5.16b,v6.16b
|
||||
.inst 0xce678a53 //sha512su1 v19.16b,v18.16b,v7.16b
|
||||
add v3.2d,v2.2d,v1.2d // "D + T1"
|
||||
.inst 0xce648441 //sha512h2 v1.16b,v2.16b,v4.16b
|
||||
add v24.2d,v24.2d,v20.2d
|
||||
ld1 {v25.2d},[x3],#16
|
||||
ext v24.16b,v24.16b,v24.16b,#8
|
||||
ext v5.16b,v3.16b,v0.16b,#8
|
||||
ext v6.16b,v4.16b,v3.16b,#8
|
||||
add v0.2d,v0.2d,v24.2d // "T1 + H + K512[i]"
|
||||
.inst 0xcec082b4 //sha512su0 v20.16b,v21.16b
|
||||
ext v7.16b,v16.16b,v17.16b,#8
|
||||
.inst 0xce6680a0 //sha512h v0.16b,v5.16b,v6.16b
|
||||
.inst 0xce678a74 //sha512su1 v20.16b,v19.16b,v7.16b
|
||||
add v2.2d,v4.2d,v0.2d // "D + T1"
|
||||
.inst 0xce618480 //sha512h2 v0.16b,v4.16b,v1.16b
|
||||
add v25.2d,v25.2d,v21.2d
|
||||
ld1 {v24.2d},[x3],#16
|
||||
ext v25.16b,v25.16b,v25.16b,#8
|
||||
ext v5.16b,v2.16b,v3.16b,#8
|
||||
ext v6.16b,v1.16b,v2.16b,#8
|
||||
add v3.2d,v3.2d,v25.2d // "T1 + H + K512[i]"
|
||||
.inst 0xcec082d5 //sha512su0 v21.16b,v22.16b
|
||||
ext v7.16b,v17.16b,v18.16b,#8
|
||||
.inst 0xce6680a3 //sha512h v3.16b,v5.16b,v6.16b
|
||||
.inst 0xce678a95 //sha512su1 v21.16b,v20.16b,v7.16b
|
||||
add v4.2d,v1.2d,v3.2d // "D + T1"
|
||||
.inst 0xce608423 //sha512h2 v3.16b,v1.16b,v0.16b
|
||||
add v24.2d,v24.2d,v22.2d
|
||||
ld1 {v25.2d},[x3],#16
|
||||
ext v24.16b,v24.16b,v24.16b,#8
|
||||
ext v5.16b,v4.16b,v2.16b,#8
|
||||
ext v6.16b,v0.16b,v4.16b,#8
|
||||
add v2.2d,v2.2d,v24.2d // "T1 + H + K512[i]"
|
||||
.inst 0xcec082f6 //sha512su0 v22.16b,v23.16b
|
||||
ext v7.16b,v18.16b,v19.16b,#8
|
||||
.inst 0xce6680a2 //sha512h v2.16b,v5.16b,v6.16b
|
||||
.inst 0xce678ab6 //sha512su1 v22.16b,v21.16b,v7.16b
|
||||
add v1.2d,v0.2d,v2.2d // "D + T1"
|
||||
.inst 0xce638402 //sha512h2 v2.16b,v0.16b,v3.16b
|
||||
add v25.2d,v25.2d,v23.2d
|
||||
ld1 {v24.2d},[x3],#16
|
||||
ext v25.16b,v25.16b,v25.16b,#8
|
||||
ext v5.16b,v1.16b,v4.16b,#8
|
||||
ext v6.16b,v3.16b,v1.16b,#8
|
||||
add v4.2d,v4.2d,v25.2d // "T1 + H + K512[i]"
|
||||
.inst 0xcec08217 //sha512su0 v23.16b,v16.16b
|
||||
ext v7.16b,v19.16b,v20.16b,#8
|
||||
.inst 0xce6680a4 //sha512h v4.16b,v5.16b,v6.16b
|
||||
.inst 0xce678ad7 //sha512su1 v23.16b,v22.16b,v7.16b
|
||||
add v0.2d,v3.2d,v4.2d // "D + T1"
|
||||
.inst 0xce628464 //sha512h2 v4.16b,v3.16b,v2.16b
|
||||
add v24.2d,v24.2d,v16.2d
|
||||
ld1 {v25.2d},[x3],#16
|
||||
ext v24.16b,v24.16b,v24.16b,#8
|
||||
ext v5.16b,v0.16b,v1.16b,#8
|
||||
ext v6.16b,v2.16b,v0.16b,#8
|
||||
add v1.2d,v1.2d,v24.2d // "T1 + H + K512[i]"
|
||||
.inst 0xcec08230 //sha512su0 v16.16b,v17.16b
|
||||
ext v7.16b,v20.16b,v21.16b,#8
|
||||
.inst 0xce6680a1 //sha512h v1.16b,v5.16b,v6.16b
|
||||
.inst 0xce678af0 //sha512su1 v16.16b,v23.16b,v7.16b
|
||||
add v3.2d,v2.2d,v1.2d // "D + T1"
|
||||
.inst 0xce648441 //sha512h2 v1.16b,v2.16b,v4.16b
|
||||
add v25.2d,v25.2d,v17.2d
|
||||
ld1 {v24.2d},[x3],#16
|
||||
ext v25.16b,v25.16b,v25.16b,#8
|
||||
ext v5.16b,v3.16b,v0.16b,#8
|
||||
ext v6.16b,v4.16b,v3.16b,#8
|
||||
add v0.2d,v0.2d,v25.2d // "T1 + H + K512[i]"
|
||||
.inst 0xcec08251 //sha512su0 v17.16b,v18.16b
|
||||
ext v7.16b,v21.16b,v22.16b,#8
|
||||
.inst 0xce6680a0 //sha512h v0.16b,v5.16b,v6.16b
|
||||
.inst 0xce678a11 //sha512su1 v17.16b,v16.16b,v7.16b
|
||||
add v2.2d,v4.2d,v0.2d // "D + T1"
|
||||
.inst 0xce618480 //sha512h2 v0.16b,v4.16b,v1.16b
|
||||
add v24.2d,v24.2d,v18.2d
|
||||
ld1 {v25.2d},[x3],#16
|
||||
ext v24.16b,v24.16b,v24.16b,#8
|
||||
ext v5.16b,v2.16b,v3.16b,#8
|
||||
ext v6.16b,v1.16b,v2.16b,#8
|
||||
add v3.2d,v3.2d,v24.2d // "T1 + H + K512[i]"
|
||||
.inst 0xcec08272 //sha512su0 v18.16b,v19.16b
|
||||
ext v7.16b,v22.16b,v23.16b,#8
|
||||
.inst 0xce6680a3 //sha512h v3.16b,v5.16b,v6.16b
|
||||
.inst 0xce678a32 //sha512su1 v18.16b,v17.16b,v7.16b
|
||||
add v4.2d,v1.2d,v3.2d // "D + T1"
|
||||
.inst 0xce608423 //sha512h2 v3.16b,v1.16b,v0.16b
|
||||
add v25.2d,v25.2d,v19.2d
|
||||
ld1 {v24.2d},[x3],#16
|
||||
ext v25.16b,v25.16b,v25.16b,#8
|
||||
ext v5.16b,v4.16b,v2.16b,#8
|
||||
ext v6.16b,v0.16b,v4.16b,#8
|
||||
add v2.2d,v2.2d,v25.2d // "T1 + H + K512[i]"
|
||||
.inst 0xcec08293 //sha512su0 v19.16b,v20.16b
|
||||
ext v7.16b,v23.16b,v16.16b,#8
|
||||
.inst 0xce6680a2 //sha512h v2.16b,v5.16b,v6.16b
|
||||
.inst 0xce678a53 //sha512su1 v19.16b,v18.16b,v7.16b
|
||||
add v1.2d,v0.2d,v2.2d // "D + T1"
|
||||
.inst 0xce638402 //sha512h2 v2.16b,v0.16b,v3.16b
|
||||
add v24.2d,v24.2d,v20.2d
|
||||
ld1 {v25.2d},[x3],#16
|
||||
ext v24.16b,v24.16b,v24.16b,#8
|
||||
ext v5.16b,v1.16b,v4.16b,#8
|
||||
ext v6.16b,v3.16b,v1.16b,#8
|
||||
add v4.2d,v4.2d,v24.2d // "T1 + H + K512[i]"
|
||||
.inst 0xcec082b4 //sha512su0 v20.16b,v21.16b
|
||||
ext v7.16b,v16.16b,v17.16b,#8
|
||||
.inst 0xce6680a4 //sha512h v4.16b,v5.16b,v6.16b
|
||||
.inst 0xce678a74 //sha512su1 v20.16b,v19.16b,v7.16b
|
||||
add v0.2d,v3.2d,v4.2d // "D + T1"
|
||||
.inst 0xce628464 //sha512h2 v4.16b,v3.16b,v2.16b
|
||||
add v25.2d,v25.2d,v21.2d
|
||||
ld1 {v24.2d},[x3],#16
|
||||
ext v25.16b,v25.16b,v25.16b,#8
|
||||
ext v5.16b,v0.16b,v1.16b,#8
|
||||
ext v6.16b,v2.16b,v0.16b,#8
|
||||
add v1.2d,v1.2d,v25.2d // "T1 + H + K512[i]"
|
||||
.inst 0xcec082d5 //sha512su0 v21.16b,v22.16b
|
||||
ext v7.16b,v17.16b,v18.16b,#8
|
||||
.inst 0xce6680a1 //sha512h v1.16b,v5.16b,v6.16b
|
||||
.inst 0xce678a95 //sha512su1 v21.16b,v20.16b,v7.16b
|
||||
add v3.2d,v2.2d,v1.2d // "D + T1"
|
||||
.inst 0xce648441 //sha512h2 v1.16b,v2.16b,v4.16b
|
||||
add v24.2d,v24.2d,v22.2d
|
||||
ld1 {v25.2d},[x3],#16
|
||||
ext v24.16b,v24.16b,v24.16b,#8
|
||||
ext v5.16b,v3.16b,v0.16b,#8
|
||||
ext v6.16b,v4.16b,v3.16b,#8
|
||||
add v0.2d,v0.2d,v24.2d // "T1 + H + K512[i]"
|
||||
.inst 0xcec082f6 //sha512su0 v22.16b,v23.16b
|
||||
ext v7.16b,v18.16b,v19.16b,#8
|
||||
.inst 0xce6680a0 //sha512h v0.16b,v5.16b,v6.16b
|
||||
.inst 0xce678ab6 //sha512su1 v22.16b,v21.16b,v7.16b
|
||||
add v2.2d,v4.2d,v0.2d // "D + T1"
|
||||
.inst 0xce618480 //sha512h2 v0.16b,v4.16b,v1.16b
|
||||
add v25.2d,v25.2d,v23.2d
|
||||
ld1 {v24.2d},[x3],#16
|
||||
ext v25.16b,v25.16b,v25.16b,#8
|
||||
ext v5.16b,v2.16b,v3.16b,#8
|
||||
ext v6.16b,v1.16b,v2.16b,#8
|
||||
add v3.2d,v3.2d,v25.2d // "T1 + H + K512[i]"
|
||||
.inst 0xcec08217 //sha512su0 v23.16b,v16.16b
|
||||
ext v7.16b,v19.16b,v20.16b,#8
|
||||
.inst 0xce6680a3 //sha512h v3.16b,v5.16b,v6.16b
|
||||
.inst 0xce678ad7 //sha512su1 v23.16b,v22.16b,v7.16b
|
||||
add v4.2d,v1.2d,v3.2d // "D + T1"
|
||||
.inst 0xce608423 //sha512h2 v3.16b,v1.16b,v0.16b
|
||||
add v24.2d,v24.2d,v16.2d
|
||||
ld1 {v25.2d},[x3],#16
|
||||
ext v24.16b,v24.16b,v24.16b,#8
|
||||
ext v5.16b,v4.16b,v2.16b,#8
|
||||
ext v6.16b,v0.16b,v4.16b,#8
|
||||
add v2.2d,v2.2d,v24.2d // "T1 + H + K512[i]"
|
||||
.inst 0xcec08230 //sha512su0 v16.16b,v17.16b
|
||||
ext v7.16b,v20.16b,v21.16b,#8
|
||||
.inst 0xce6680a2 //sha512h v2.16b,v5.16b,v6.16b
|
||||
.inst 0xce678af0 //sha512su1 v16.16b,v23.16b,v7.16b
|
||||
add v1.2d,v0.2d,v2.2d // "D + T1"
|
||||
.inst 0xce638402 //sha512h2 v2.16b,v0.16b,v3.16b
|
||||
add v25.2d,v25.2d,v17.2d
|
||||
ld1 {v24.2d},[x3],#16
|
||||
ext v25.16b,v25.16b,v25.16b,#8
|
||||
ext v5.16b,v1.16b,v4.16b,#8
|
||||
ext v6.16b,v3.16b,v1.16b,#8
|
||||
add v4.2d,v4.2d,v25.2d // "T1 + H + K512[i]"
|
||||
.inst 0xcec08251 //sha512su0 v17.16b,v18.16b
|
||||
ext v7.16b,v21.16b,v22.16b,#8
|
||||
.inst 0xce6680a4 //sha512h v4.16b,v5.16b,v6.16b
|
||||
.inst 0xce678a11 //sha512su1 v17.16b,v16.16b,v7.16b
|
||||
add v0.2d,v3.2d,v4.2d // "D + T1"
|
||||
.inst 0xce628464 //sha512h2 v4.16b,v3.16b,v2.16b
|
||||
add v24.2d,v24.2d,v18.2d
|
||||
ld1 {v25.2d},[x3],#16
|
||||
ext v24.16b,v24.16b,v24.16b,#8
|
||||
ext v5.16b,v0.16b,v1.16b,#8
|
||||
ext v6.16b,v2.16b,v0.16b,#8
|
||||
add v1.2d,v1.2d,v24.2d // "T1 + H + K512[i]"
|
||||
.inst 0xcec08272 //sha512su0 v18.16b,v19.16b
|
||||
ext v7.16b,v22.16b,v23.16b,#8
|
||||
.inst 0xce6680a1 //sha512h v1.16b,v5.16b,v6.16b
|
||||
.inst 0xce678a32 //sha512su1 v18.16b,v17.16b,v7.16b
|
||||
add v3.2d,v2.2d,v1.2d // "D + T1"
|
||||
.inst 0xce648441 //sha512h2 v1.16b,v2.16b,v4.16b
|
||||
add v25.2d,v25.2d,v19.2d
|
||||
ld1 {v24.2d},[x3],#16
|
||||
ext v25.16b,v25.16b,v25.16b,#8
|
||||
ext v5.16b,v3.16b,v0.16b,#8
|
||||
ext v6.16b,v4.16b,v3.16b,#8
|
||||
add v0.2d,v0.2d,v25.2d // "T1 + H + K512[i]"
|
||||
.inst 0xcec08293 //sha512su0 v19.16b,v20.16b
|
||||
ext v7.16b,v23.16b,v16.16b,#8
|
||||
.inst 0xce6680a0 //sha512h v0.16b,v5.16b,v6.16b
|
||||
.inst 0xce678a53 //sha512su1 v19.16b,v18.16b,v7.16b
|
||||
add v2.2d,v4.2d,v0.2d // "D + T1"
|
||||
.inst 0xce618480 //sha512h2 v0.16b,v4.16b,v1.16b
|
||||
add v24.2d,v24.2d,v20.2d
|
||||
ld1 {v25.2d},[x3],#16
|
||||
ext v24.16b,v24.16b,v24.16b,#8
|
||||
ext v5.16b,v2.16b,v3.16b,#8
|
||||
ext v6.16b,v1.16b,v2.16b,#8
|
||||
add v3.2d,v3.2d,v24.2d // "T1 + H + K512[i]"
|
||||
.inst 0xcec082b4 //sha512su0 v20.16b,v21.16b
|
||||
ext v7.16b,v16.16b,v17.16b,#8
|
||||
.inst 0xce6680a3 //sha512h v3.16b,v5.16b,v6.16b
|
||||
.inst 0xce678a74 //sha512su1 v20.16b,v19.16b,v7.16b
|
||||
add v4.2d,v1.2d,v3.2d // "D + T1"
|
||||
.inst 0xce608423 //sha512h2 v3.16b,v1.16b,v0.16b
|
||||
add v25.2d,v25.2d,v21.2d
|
||||
ld1 {v24.2d},[x3],#16
|
||||
ext v25.16b,v25.16b,v25.16b,#8
|
||||
ext v5.16b,v4.16b,v2.16b,#8
|
||||
ext v6.16b,v0.16b,v4.16b,#8
|
||||
add v2.2d,v2.2d,v25.2d // "T1 + H + K512[i]"
|
||||
.inst 0xcec082d5 //sha512su0 v21.16b,v22.16b
|
||||
ext v7.16b,v17.16b,v18.16b,#8
|
||||
.inst 0xce6680a2 //sha512h v2.16b,v5.16b,v6.16b
|
||||
.inst 0xce678a95 //sha512su1 v21.16b,v20.16b,v7.16b
|
||||
add v1.2d,v0.2d,v2.2d // "D + T1"
|
||||
.inst 0xce638402 //sha512h2 v2.16b,v0.16b,v3.16b
|
||||
add v24.2d,v24.2d,v22.2d
|
||||
ld1 {v25.2d},[x3],#16
|
||||
ext v24.16b,v24.16b,v24.16b,#8
|
||||
ext v5.16b,v1.16b,v4.16b,#8
|
||||
ext v6.16b,v3.16b,v1.16b,#8
|
||||
add v4.2d,v4.2d,v24.2d // "T1 + H + K512[i]"
|
||||
.inst 0xcec082f6 //sha512su0 v22.16b,v23.16b
|
||||
ext v7.16b,v18.16b,v19.16b,#8
|
||||
.inst 0xce6680a4 //sha512h v4.16b,v5.16b,v6.16b
|
||||
.inst 0xce678ab6 //sha512su1 v22.16b,v21.16b,v7.16b
|
||||
add v0.2d,v3.2d,v4.2d // "D + T1"
|
||||
.inst 0xce628464 //sha512h2 v4.16b,v3.16b,v2.16b
|
||||
add v25.2d,v25.2d,v23.2d
|
||||
ld1 {v24.2d},[x3],#16
|
||||
ext v25.16b,v25.16b,v25.16b,#8
|
||||
ext v5.16b,v0.16b,v1.16b,#8
|
||||
ext v6.16b,v2.16b,v0.16b,#8
|
||||
add v1.2d,v1.2d,v25.2d // "T1 + H + K512[i]"
|
||||
.inst 0xcec08217 //sha512su0 v23.16b,v16.16b
|
||||
ext v7.16b,v19.16b,v20.16b,#8
|
||||
.inst 0xce6680a1 //sha512h v1.16b,v5.16b,v6.16b
|
||||
.inst 0xce678ad7 //sha512su1 v23.16b,v22.16b,v7.16b
|
||||
add v3.2d,v2.2d,v1.2d // "D + T1"
|
||||
.inst 0xce648441 //sha512h2 v1.16b,v2.16b,v4.16b
|
||||
add v24.2d,v24.2d,v16.2d
|
||||
ld1 {v25.2d},[x3],#16
|
||||
ext v24.16b,v24.16b,v24.16b,#8
|
||||
ext v5.16b,v3.16b,v0.16b,#8
|
||||
ext v6.16b,v4.16b,v3.16b,#8
|
||||
add v0.2d,v0.2d,v24.2d // "T1 + H + K512[i]"
|
||||
.inst 0xcec08230 //sha512su0 v16.16b,v17.16b
|
||||
ext v7.16b,v20.16b,v21.16b,#8
|
||||
.inst 0xce6680a0 //sha512h v0.16b,v5.16b,v6.16b
|
||||
.inst 0xce678af0 //sha512su1 v16.16b,v23.16b,v7.16b
|
||||
add v2.2d,v4.2d,v0.2d // "D + T1"
|
||||
.inst 0xce618480 //sha512h2 v0.16b,v4.16b,v1.16b
|
||||
add v25.2d,v25.2d,v17.2d
|
||||
ld1 {v24.2d},[x3],#16
|
||||
ext v25.16b,v25.16b,v25.16b,#8
|
||||
ext v5.16b,v2.16b,v3.16b,#8
|
||||
ext v6.16b,v1.16b,v2.16b,#8
|
||||
add v3.2d,v3.2d,v25.2d // "T1 + H + K512[i]"
|
||||
.inst 0xcec08251 //sha512su0 v17.16b,v18.16b
|
||||
ext v7.16b,v21.16b,v22.16b,#8
|
||||
.inst 0xce6680a3 //sha512h v3.16b,v5.16b,v6.16b
|
||||
.inst 0xce678a11 //sha512su1 v17.16b,v16.16b,v7.16b
|
||||
add v4.2d,v1.2d,v3.2d // "D + T1"
|
||||
.inst 0xce608423 //sha512h2 v3.16b,v1.16b,v0.16b
|
||||
add v24.2d,v24.2d,v18.2d
|
||||
ld1 {v25.2d},[x3],#16
|
||||
ext v24.16b,v24.16b,v24.16b,#8
|
||||
ext v5.16b,v4.16b,v2.16b,#8
|
||||
ext v6.16b,v0.16b,v4.16b,#8
|
||||
add v2.2d,v2.2d,v24.2d // "T1 + H + K512[i]"
|
||||
.inst 0xcec08272 //sha512su0 v18.16b,v19.16b
|
||||
ext v7.16b,v22.16b,v23.16b,#8
|
||||
.inst 0xce6680a2 //sha512h v2.16b,v5.16b,v6.16b
|
||||
.inst 0xce678a32 //sha512su1 v18.16b,v17.16b,v7.16b
|
||||
add v1.2d,v0.2d,v2.2d // "D + T1"
|
||||
.inst 0xce638402 //sha512h2 v2.16b,v0.16b,v3.16b
|
||||
add v25.2d,v25.2d,v19.2d
|
||||
ld1 {v24.2d},[x3],#16
|
||||
ext v25.16b,v25.16b,v25.16b,#8
|
||||
ext v5.16b,v1.16b,v4.16b,#8
|
||||
ext v6.16b,v3.16b,v1.16b,#8
|
||||
add v4.2d,v4.2d,v25.2d // "T1 + H + K512[i]"
|
||||
.inst 0xcec08293 //sha512su0 v19.16b,v20.16b
|
||||
ext v7.16b,v23.16b,v16.16b,#8
|
||||
.inst 0xce6680a4 //sha512h v4.16b,v5.16b,v6.16b
|
||||
.inst 0xce678a53 //sha512su1 v19.16b,v18.16b,v7.16b
|
||||
add v0.2d,v3.2d,v4.2d // "D + T1"
|
||||
.inst 0xce628464 //sha512h2 v4.16b,v3.16b,v2.16b
|
||||
add v24.2d,v24.2d,v20.2d
|
||||
ld1 {v25.2d},[x3],#16
|
||||
ext v24.16b,v24.16b,v24.16b,#8
|
||||
ext v5.16b,v0.16b,v1.16b,#8
|
||||
ext v6.16b,v2.16b,v0.16b,#8
|
||||
add v1.2d,v1.2d,v24.2d // "T1 + H + K512[i]"
|
||||
.inst 0xcec082b4 //sha512su0 v20.16b,v21.16b
|
||||
ext v7.16b,v16.16b,v17.16b,#8
|
||||
.inst 0xce6680a1 //sha512h v1.16b,v5.16b,v6.16b
|
||||
.inst 0xce678a74 //sha512su1 v20.16b,v19.16b,v7.16b
|
||||
add v3.2d,v2.2d,v1.2d // "D + T1"
|
||||
.inst 0xce648441 //sha512h2 v1.16b,v2.16b,v4.16b
|
||||
add v25.2d,v25.2d,v21.2d
|
||||
ld1 {v24.2d},[x3],#16
|
||||
ext v25.16b,v25.16b,v25.16b,#8
|
||||
ext v5.16b,v3.16b,v0.16b,#8
|
||||
ext v6.16b,v4.16b,v3.16b,#8
|
||||
add v0.2d,v0.2d,v25.2d // "T1 + H + K512[i]"
|
||||
.inst 0xcec082d5 //sha512su0 v21.16b,v22.16b
|
||||
ext v7.16b,v17.16b,v18.16b,#8
|
||||
.inst 0xce6680a0 //sha512h v0.16b,v5.16b,v6.16b
|
||||
.inst 0xce678a95 //sha512su1 v21.16b,v20.16b,v7.16b
|
||||
add v2.2d,v4.2d,v0.2d // "D + T1"
|
||||
.inst 0xce618480 //sha512h2 v0.16b,v4.16b,v1.16b
|
||||
add v24.2d,v24.2d,v22.2d
|
||||
ld1 {v25.2d},[x3],#16
|
||||
ext v24.16b,v24.16b,v24.16b,#8
|
||||
ext v5.16b,v2.16b,v3.16b,#8
|
||||
ext v6.16b,v1.16b,v2.16b,#8
|
||||
add v3.2d,v3.2d,v24.2d // "T1 + H + K512[i]"
|
||||
.inst 0xcec082f6 //sha512su0 v22.16b,v23.16b
|
||||
ext v7.16b,v18.16b,v19.16b,#8
|
||||
.inst 0xce6680a3 //sha512h v3.16b,v5.16b,v6.16b
|
||||
.inst 0xce678ab6 //sha512su1 v22.16b,v21.16b,v7.16b
|
||||
add v4.2d,v1.2d,v3.2d // "D + T1"
|
||||
.inst 0xce608423 //sha512h2 v3.16b,v1.16b,v0.16b
|
||||
add v25.2d,v25.2d,v23.2d
|
||||
ld1 {v24.2d},[x3],#16
|
||||
ext v25.16b,v25.16b,v25.16b,#8
|
||||
ext v5.16b,v4.16b,v2.16b,#8
|
||||
ext v6.16b,v0.16b,v4.16b,#8
|
||||
add v2.2d,v2.2d,v25.2d // "T1 + H + K512[i]"
|
||||
.inst 0xcec08217 //sha512su0 v23.16b,v16.16b
|
||||
ext v7.16b,v19.16b,v20.16b,#8
|
||||
.inst 0xce6680a2 //sha512h v2.16b,v5.16b,v6.16b
|
||||
.inst 0xce678ad7 //sha512su1 v23.16b,v22.16b,v7.16b
|
||||
add v1.2d,v0.2d,v2.2d // "D + T1"
|
||||
.inst 0xce638402 //sha512h2 v2.16b,v0.16b,v3.16b
|
||||
ld1 {v25.2d},[x3],#16
|
||||
add v24.2d,v24.2d,v16.2d
|
||||
ld1 {v16.16b},[x1],#16 // load next input
|
||||
ext v24.16b,v24.16b,v24.16b,#8
|
||||
ext v5.16b,v1.16b,v4.16b,#8
|
||||
ext v6.16b,v3.16b,v1.16b,#8
|
||||
add v4.2d,v4.2d,v24.2d // "T1 + H + K512[i]"
|
||||
.inst 0xce6680a4 //sha512h v4.16b,v5.16b,v6.16b
|
||||
rev64 v16.16b,v16.16b
|
||||
add v0.2d,v3.2d,v4.2d // "D + T1"
|
||||
.inst 0xce628464 //sha512h2 v4.16b,v3.16b,v2.16b
|
||||
ld1 {v24.2d},[x3],#16
|
||||
add v25.2d,v25.2d,v17.2d
|
||||
ld1 {v17.16b},[x1],#16 // load next input
|
||||
ext v25.16b,v25.16b,v25.16b,#8
|
||||
ext v5.16b,v0.16b,v1.16b,#8
|
||||
ext v6.16b,v2.16b,v0.16b,#8
|
||||
add v1.2d,v1.2d,v25.2d // "T1 + H + K512[i]"
|
||||
.inst 0xce6680a1 //sha512h v1.16b,v5.16b,v6.16b
|
||||
rev64 v17.16b,v17.16b
|
||||
add v3.2d,v2.2d,v1.2d // "D + T1"
|
||||
.inst 0xce648441 //sha512h2 v1.16b,v2.16b,v4.16b
|
||||
ld1 {v25.2d},[x3],#16
|
||||
add v24.2d,v24.2d,v18.2d
|
||||
ld1 {v18.16b},[x1],#16 // load next input
|
||||
ext v24.16b,v24.16b,v24.16b,#8
|
||||
ext v5.16b,v3.16b,v0.16b,#8
|
||||
ext v6.16b,v4.16b,v3.16b,#8
|
||||
add v0.2d,v0.2d,v24.2d // "T1 + H + K512[i]"
|
||||
.inst 0xce6680a0 //sha512h v0.16b,v5.16b,v6.16b
|
||||
rev64 v18.16b,v18.16b
|
||||
add v2.2d,v4.2d,v0.2d // "D + T1"
|
||||
.inst 0xce618480 //sha512h2 v0.16b,v4.16b,v1.16b
|
||||
ld1 {v24.2d},[x3],#16
|
||||
add v25.2d,v25.2d,v19.2d
|
||||
ld1 {v19.16b},[x1],#16 // load next input
|
||||
ext v25.16b,v25.16b,v25.16b,#8
|
||||
ext v5.16b,v2.16b,v3.16b,#8
|
||||
ext v6.16b,v1.16b,v2.16b,#8
|
||||
add v3.2d,v3.2d,v25.2d // "T1 + H + K512[i]"
|
||||
.inst 0xce6680a3 //sha512h v3.16b,v5.16b,v6.16b
|
||||
rev64 v19.16b,v19.16b
|
||||
add v4.2d,v1.2d,v3.2d // "D + T1"
|
||||
.inst 0xce608423 //sha512h2 v3.16b,v1.16b,v0.16b
|
||||
ld1 {v25.2d},[x3],#16
|
||||
add v24.2d,v24.2d,v20.2d
|
||||
ld1 {v20.16b},[x1],#16 // load next input
|
||||
ext v24.16b,v24.16b,v24.16b,#8
|
||||
ext v5.16b,v4.16b,v2.16b,#8
|
||||
ext v6.16b,v0.16b,v4.16b,#8
|
||||
add v2.2d,v2.2d,v24.2d // "T1 + H + K512[i]"
|
||||
.inst 0xce6680a2 //sha512h v2.16b,v5.16b,v6.16b
|
||||
rev64 v20.16b,v20.16b
|
||||
add v1.2d,v0.2d,v2.2d // "D + T1"
|
||||
.inst 0xce638402 //sha512h2 v2.16b,v0.16b,v3.16b
|
||||
ld1 {v24.2d},[x3],#16
|
||||
add v25.2d,v25.2d,v21.2d
|
||||
ld1 {v21.16b},[x1],#16 // load next input
|
||||
ext v25.16b,v25.16b,v25.16b,#8
|
||||
ext v5.16b,v1.16b,v4.16b,#8
|
||||
ext v6.16b,v3.16b,v1.16b,#8
|
||||
add v4.2d,v4.2d,v25.2d // "T1 + H + K512[i]"
|
||||
.inst 0xce6680a4 //sha512h v4.16b,v5.16b,v6.16b
|
||||
rev64 v21.16b,v21.16b
|
||||
add v0.2d,v3.2d,v4.2d // "D + T1"
|
||||
.inst 0xce628464 //sha512h2 v4.16b,v3.16b,v2.16b
|
||||
ld1 {v25.2d},[x3],#16
|
||||
add v24.2d,v24.2d,v22.2d
|
||||
ld1 {v22.16b},[x1],#16 // load next input
|
||||
ext v24.16b,v24.16b,v24.16b,#8
|
||||
ext v5.16b,v0.16b,v1.16b,#8
|
||||
ext v6.16b,v2.16b,v0.16b,#8
|
||||
add v1.2d,v1.2d,v24.2d // "T1 + H + K512[i]"
|
||||
.inst 0xce6680a1 //sha512h v1.16b,v5.16b,v6.16b
|
||||
rev64 v22.16b,v22.16b
|
||||
add v3.2d,v2.2d,v1.2d // "D + T1"
|
||||
.inst 0xce648441 //sha512h2 v1.16b,v2.16b,v4.16b
|
||||
sub x3,x3,#80*8 // rewind
|
||||
add v25.2d,v25.2d,v23.2d
|
||||
ld1 {v23.16b},[x1],#16 // load next input
|
||||
ext v25.16b,v25.16b,v25.16b,#8
|
||||
ext v5.16b,v3.16b,v0.16b,#8
|
||||
ext v6.16b,v4.16b,v3.16b,#8
|
||||
add v0.2d,v0.2d,v25.2d // "T1 + H + K512[i]"
|
||||
.inst 0xce6680a0 //sha512h v0.16b,v5.16b,v6.16b
|
||||
rev64 v23.16b,v23.16b
|
||||
add v2.2d,v4.2d,v0.2d // "D + T1"
|
||||
.inst 0xce618480 //sha512h2 v0.16b,v4.16b,v1.16b
|
||||
add v0.2d,v0.2d,v26.2d // accumulate
|
||||
add v1.2d,v1.2d,v27.2d
|
||||
add v2.2d,v2.2d,v28.2d
|
||||
add v3.2d,v3.2d,v29.2d
|
||||
|
||||
cbnz x2,.Loop_hw
|
||||
|
||||
st1 {v0.2d,v1.2d,v2.2d,v3.2d},[x0] // store context
|
||||
|
||||
ldr x29,[sp],#16
|
||||
ret
|
||||
.size sha512_block_armv8,.-sha512_block_armv8
|
||||
#endif
|
||||
#endif
|
||||
#endif // !OPENSSL_NO_ASM
|
||||
|
|
|
@ -1,7 +1,7 @@
|
|||
#define BORINGSSL_PREFIX CBigNumBoringSSL
|
||||
#if defined(__x86_64__) && defined(__linux__)
|
||||
# This file is generated from a similarly-named Perl script in the BoringSSL
|
||||
# source tree. Do not edit by hand.
|
||||
// This file is generated from a similarly-named Perl script in the BoringSSL
|
||||
// source tree. Do not edit by hand.
|
||||
|
||||
#if defined(__has_feature)
|
||||
#if __has_feature(memory_sanitizer) && !defined(OPENSSL_NO_ASM)
|
||||
|
|
|
@ -1,7 +1,7 @@
|
|||
#define BORINGSSL_PREFIX CBigNumBoringSSL
|
||||
#if defined(__x86_64__) && defined(__APPLE__)
|
||||
# This file is generated from a similarly-named Perl script in the BoringSSL
|
||||
# source tree. Do not edit by hand.
|
||||
// This file is generated from a similarly-named Perl script in the BoringSSL
|
||||
// source tree. Do not edit by hand.
|
||||
|
||||
#if defined(__has_feature)
|
||||
#if __has_feature(memory_sanitizer) && !defined(OPENSSL_NO_ASM)
|
||||
|
|
|
@ -14,6 +14,8 @@
|
|||
#if defined(BORINGSSL_PREFIX)
|
||||
#include <CBigNumBoringSSL_boringssl_prefix_symbols_asm.h>
|
||||
#endif
|
||||
#include <CBigNumBoringSSL_arm_arch.h>
|
||||
|
||||
.section __TEXT,__const
|
||||
|
||||
|
||||
|
@ -216,6 +218,7 @@ Lenc_entry:
|
|||
|
||||
.align 4
|
||||
_vpaes_encrypt:
|
||||
AARCH64_SIGN_LINK_REGISTER
|
||||
stp x29,x30,[sp,#-16]!
|
||||
add x29,sp,#0
|
||||
|
||||
|
@ -225,6 +228,7 @@ _vpaes_encrypt:
|
|||
st1 {v0.16b}, [x1]
|
||||
|
||||
ldp x29,x30,[sp],#16
|
||||
AARCH64_VALIDATE_LINK_REGISTER
|
||||
ret
|
||||
|
||||
|
||||
|
@ -453,6 +457,7 @@ Ldec_entry:
|
|||
|
||||
.align 4
|
||||
_vpaes_decrypt:
|
||||
AARCH64_SIGN_LINK_REGISTER
|
||||
stp x29,x30,[sp,#-16]!
|
||||
add x29,sp,#0
|
||||
|
||||
|
@ -462,6 +467,7 @@ _vpaes_decrypt:
|
|||
st1 {v0.16b}, [x1]
|
||||
|
||||
ldp x29,x30,[sp],#16
|
||||
AARCH64_VALIDATE_LINK_REGISTER
|
||||
ret
|
||||
|
||||
|
||||
|
@ -631,6 +637,7 @@ _vpaes_key_preheat:
|
|||
|
||||
.align 4
|
||||
_vpaes_schedule_core:
|
||||
AARCH64_SIGN_LINK_REGISTER
|
||||
stp x29, x30, [sp,#-16]!
|
||||
add x29,sp,#0
|
||||
|
||||
|
@ -800,6 +807,7 @@ Lschedule_mangle_last_dec:
|
|||
eor v6.16b, v6.16b, v6.16b // vpxor %xmm6, %xmm6, %xmm6
|
||||
eor v7.16b, v7.16b, v7.16b // vpxor %xmm7, %xmm7, %xmm7
|
||||
ldp x29, x30, [sp],#16
|
||||
AARCH64_VALIDATE_LINK_REGISTER
|
||||
ret
|
||||
|
||||
|
||||
|
@ -1002,7 +1010,7 @@ Lschedule_mangle_dec:
|
|||
|
||||
Lschedule_mangle_both:
|
||||
tbl v3.16b, {v3.16b}, v1.16b // vpshufb %xmm1, %xmm3, %xmm3
|
||||
add x8, x8, #64-16 // add $-16, %r8
|
||||
add x8, x8, #48 // add $-16, %r8
|
||||
and x8, x8, #~(1<<6) // and $0x30, %r8
|
||||
st1 {v3.2d}, [x2] // vmovdqu %xmm3, (%rdx)
|
||||
ret
|
||||
|
@ -1013,6 +1021,7 @@ Lschedule_mangle_both:
|
|||
|
||||
.align 4
|
||||
_vpaes_set_encrypt_key:
|
||||
AARCH64_SIGN_LINK_REGISTER
|
||||
stp x29,x30,[sp,#-16]!
|
||||
add x29,sp,#0
|
||||
stp d8,d9,[sp,#-16]! // ABI spec says so
|
||||
|
@ -1028,6 +1037,7 @@ _vpaes_set_encrypt_key:
|
|||
|
||||
ldp d8,d9,[sp],#16
|
||||
ldp x29,x30,[sp],#16
|
||||
AARCH64_VALIDATE_LINK_REGISTER
|
||||
ret
|
||||
|
||||
|
||||
|
@ -1036,6 +1046,7 @@ _vpaes_set_encrypt_key:
|
|||
|
||||
.align 4
|
||||
_vpaes_set_decrypt_key:
|
||||
AARCH64_SIGN_LINK_REGISTER
|
||||
stp x29,x30,[sp,#-16]!
|
||||
add x29,sp,#0
|
||||
stp d8,d9,[sp,#-16]! // ABI spec says so
|
||||
|
@ -1055,6 +1066,7 @@ _vpaes_set_decrypt_key:
|
|||
|
||||
ldp d8,d9,[sp],#16
|
||||
ldp x29,x30,[sp],#16
|
||||
AARCH64_VALIDATE_LINK_REGISTER
|
||||
ret
|
||||
|
||||
.globl _vpaes_cbc_encrypt
|
||||
|
@ -1062,6 +1074,7 @@ _vpaes_set_decrypt_key:
|
|||
|
||||
.align 4
|
||||
_vpaes_cbc_encrypt:
|
||||
AARCH64_SIGN_LINK_REGISTER
|
||||
cbz x2, Lcbc_abort
|
||||
cmp w5, #0 // check direction
|
||||
b.eq vpaes_cbc_decrypt
|
||||
|
@ -1089,12 +1102,15 @@ Lcbc_enc_loop:
|
|||
|
||||
ldp x29,x30,[sp],#16
|
||||
Lcbc_abort:
|
||||
AARCH64_VALIDATE_LINK_REGISTER
|
||||
ret
|
||||
|
||||
|
||||
|
||||
.align 4
|
||||
vpaes_cbc_decrypt:
|
||||
// Not adding AARCH64_SIGN_LINK_REGISTER here because vpaes_cbc_decrypt is jumped to
|
||||
// only from vpaes_cbc_encrypt which has already signed the return address.
|
||||
stp x29,x30,[sp,#-16]!
|
||||
add x29,sp,#0
|
||||
stp d8,d9,[sp,#-16]! // ABI spec says so
|
||||
|
@ -1136,6 +1152,7 @@ Lcbc_dec_done:
|
|||
ldp d10,d11,[sp],#16
|
||||
ldp d8,d9,[sp],#16
|
||||
ldp x29,x30,[sp],#16
|
||||
AARCH64_VALIDATE_LINK_REGISTER
|
||||
ret
|
||||
|
||||
.globl _vpaes_ctr32_encrypt_blocks
|
||||
|
@ -1143,6 +1160,7 @@ Lcbc_dec_done:
|
|||
|
||||
.align 4
|
||||
_vpaes_ctr32_encrypt_blocks:
|
||||
AARCH64_SIGN_LINK_REGISTER
|
||||
stp x29,x30,[sp,#-16]!
|
||||
add x29,sp,#0
|
||||
stp d8,d9,[sp,#-16]! // ABI spec says so
|
||||
|
@ -1210,6 +1228,7 @@ Lctr32_done:
|
|||
ldp d10,d11,[sp],#16
|
||||
ldp d8,d9,[sp],#16
|
||||
ldp x29,x30,[sp],#16
|
||||
AARCH64_VALIDATE_LINK_REGISTER
|
||||
ret
|
||||
|
||||
#endif // !OPENSSL_NO_ASM
|
||||
|
|
|
@ -15,6 +15,8 @@
|
|||
#if defined(BORINGSSL_PREFIX)
|
||||
#include <CBigNumBoringSSL_boringssl_prefix_symbols_asm.h>
|
||||
#endif
|
||||
#include <CBigNumBoringSSL_arm_arch.h>
|
||||
|
||||
.section .rodata
|
||||
|
||||
.type _vpaes_consts,%object
|
||||
|
@ -217,6 +219,7 @@ _vpaes_encrypt_core:
|
|||
.type vpaes_encrypt,%function
|
||||
.align 4
|
||||
vpaes_encrypt:
|
||||
AARCH64_SIGN_LINK_REGISTER
|
||||
stp x29,x30,[sp,#-16]!
|
||||
add x29,sp,#0
|
||||
|
||||
|
@ -226,6 +229,7 @@ vpaes_encrypt:
|
|||
st1 {v0.16b}, [x1]
|
||||
|
||||
ldp x29,x30,[sp],#16
|
||||
AARCH64_VALIDATE_LINK_REGISTER
|
||||
ret
|
||||
.size vpaes_encrypt,.-vpaes_encrypt
|
||||
|
||||
|
@ -454,6 +458,7 @@ _vpaes_decrypt_core:
|
|||
.type vpaes_decrypt,%function
|
||||
.align 4
|
||||
vpaes_decrypt:
|
||||
AARCH64_SIGN_LINK_REGISTER
|
||||
stp x29,x30,[sp,#-16]!
|
||||
add x29,sp,#0
|
||||
|
||||
|
@ -463,6 +468,7 @@ vpaes_decrypt:
|
|||
st1 {v0.16b}, [x1]
|
||||
|
||||
ldp x29,x30,[sp],#16
|
||||
AARCH64_VALIDATE_LINK_REGISTER
|
||||
ret
|
||||
.size vpaes_decrypt,.-vpaes_decrypt
|
||||
|
||||
|
@ -632,6 +638,7 @@ _vpaes_key_preheat:
|
|||
.type _vpaes_schedule_core,%function
|
||||
.align 4
|
||||
_vpaes_schedule_core:
|
||||
AARCH64_SIGN_LINK_REGISTER
|
||||
stp x29, x30, [sp,#-16]!
|
||||
add x29,sp,#0
|
||||
|
||||
|
@ -801,6 +808,7 @@ _vpaes_schedule_core:
|
|||
eor v6.16b, v6.16b, v6.16b // vpxor %xmm6, %xmm6, %xmm6
|
||||
eor v7.16b, v7.16b, v7.16b // vpxor %xmm7, %xmm7, %xmm7
|
||||
ldp x29, x30, [sp],#16
|
||||
AARCH64_VALIDATE_LINK_REGISTER
|
||||
ret
|
||||
.size _vpaes_schedule_core,.-_vpaes_schedule_core
|
||||
|
||||
|
@ -1003,7 +1011,7 @@ _vpaes_schedule_mangle:
|
|||
|
||||
.Lschedule_mangle_both:
|
||||
tbl v3.16b, {v3.16b}, v1.16b // vpshufb %xmm1, %xmm3, %xmm3
|
||||
add x8, x8, #64-16 // add $-16, %r8
|
||||
add x8, x8, #48 // add $-16, %r8
|
||||
and x8, x8, #~(1<<6) // and $0x30, %r8
|
||||
st1 {v3.2d}, [x2] // vmovdqu %xmm3, (%rdx)
|
||||
ret
|
||||
|
@ -1014,6 +1022,7 @@ _vpaes_schedule_mangle:
|
|||
.type vpaes_set_encrypt_key,%function
|
||||
.align 4
|
||||
vpaes_set_encrypt_key:
|
||||
AARCH64_SIGN_LINK_REGISTER
|
||||
stp x29,x30,[sp,#-16]!
|
||||
add x29,sp,#0
|
||||
stp d8,d9,[sp,#-16]! // ABI spec says so
|
||||
|
@ -1029,6 +1038,7 @@ vpaes_set_encrypt_key:
|
|||
|
||||
ldp d8,d9,[sp],#16
|
||||
ldp x29,x30,[sp],#16
|
||||
AARCH64_VALIDATE_LINK_REGISTER
|
||||
ret
|
||||
.size vpaes_set_encrypt_key,.-vpaes_set_encrypt_key
|
||||
|
||||
|
@ -1037,6 +1047,7 @@ vpaes_set_encrypt_key:
|
|||
.type vpaes_set_decrypt_key,%function
|
||||
.align 4
|
||||
vpaes_set_decrypt_key:
|
||||
AARCH64_SIGN_LINK_REGISTER
|
||||
stp x29,x30,[sp,#-16]!
|
||||
add x29,sp,#0
|
||||
stp d8,d9,[sp,#-16]! // ABI spec says so
|
||||
|
@ -1056,6 +1067,7 @@ vpaes_set_decrypt_key:
|
|||
|
||||
ldp d8,d9,[sp],#16
|
||||
ldp x29,x30,[sp],#16
|
||||
AARCH64_VALIDATE_LINK_REGISTER
|
||||
ret
|
||||
.size vpaes_set_decrypt_key,.-vpaes_set_decrypt_key
|
||||
.globl vpaes_cbc_encrypt
|
||||
|
@ -1063,6 +1075,7 @@ vpaes_set_decrypt_key:
|
|||
.type vpaes_cbc_encrypt,%function
|
||||
.align 4
|
||||
vpaes_cbc_encrypt:
|
||||
AARCH64_SIGN_LINK_REGISTER
|
||||
cbz x2, .Lcbc_abort
|
||||
cmp w5, #0 // check direction
|
||||
b.eq vpaes_cbc_decrypt
|
||||
|
@ -1090,12 +1103,15 @@ vpaes_cbc_encrypt:
|
|||
|
||||
ldp x29,x30,[sp],#16
|
||||
.Lcbc_abort:
|
||||
AARCH64_VALIDATE_LINK_REGISTER
|
||||
ret
|
||||
.size vpaes_cbc_encrypt,.-vpaes_cbc_encrypt
|
||||
|
||||
.type vpaes_cbc_decrypt,%function
|
||||
.align 4
|
||||
vpaes_cbc_decrypt:
|
||||
// Not adding AARCH64_SIGN_LINK_REGISTER here because vpaes_cbc_decrypt is jumped to
|
||||
// only from vpaes_cbc_encrypt which has already signed the return address.
|
||||
stp x29,x30,[sp,#-16]!
|
||||
add x29,sp,#0
|
||||
stp d8,d9,[sp,#-16]! // ABI spec says so
|
||||
|
@ -1137,6 +1153,7 @@ vpaes_cbc_decrypt:
|
|||
ldp d10,d11,[sp],#16
|
||||
ldp d8,d9,[sp],#16
|
||||
ldp x29,x30,[sp],#16
|
||||
AARCH64_VALIDATE_LINK_REGISTER
|
||||
ret
|
||||
.size vpaes_cbc_decrypt,.-vpaes_cbc_decrypt
|
||||
.globl vpaes_ctr32_encrypt_blocks
|
||||
|
@ -1144,6 +1161,7 @@ vpaes_cbc_decrypt:
|
|||
.type vpaes_ctr32_encrypt_blocks,%function
|
||||
.align 4
|
||||
vpaes_ctr32_encrypt_blocks:
|
||||
AARCH64_SIGN_LINK_REGISTER
|
||||
stp x29,x30,[sp,#-16]!
|
||||
add x29,sp,#0
|
||||
stp d8,d9,[sp,#-16]! // ABI spec says so
|
||||
|
@ -1211,6 +1229,7 @@ vpaes_ctr32_encrypt_blocks:
|
|||
ldp d10,d11,[sp],#16
|
||||
ldp d8,d9,[sp],#16
|
||||
ldp x29,x30,[sp],#16
|
||||
AARCH64_VALIDATE_LINK_REGISTER
|
||||
ret
|
||||
.size vpaes_ctr32_encrypt_blocks,.-vpaes_ctr32_encrypt_blocks
|
||||
#endif
|
||||
|
|
|
@ -1,7 +1,7 @@
|
|||
#define BORINGSSL_PREFIX CBigNumBoringSSL
|
||||
#if defined(__i386__) && defined(__linux__)
|
||||
# This file is generated from a similarly-named Perl script in the BoringSSL
|
||||
# source tree. Do not edit by hand.
|
||||
// This file is generated from a similarly-named Perl script in the BoringSSL
|
||||
// source tree. Do not edit by hand.
|
||||
|
||||
#if defined(__i386__)
|
||||
#if defined(BORINGSSL_PREFIX)
|
||||
|
|
|
@ -1,7 +1,7 @@
|
|||
#define BORINGSSL_PREFIX CBigNumBoringSSL
|
||||
#if defined(__x86_64__) && defined(__linux__)
|
||||
# This file is generated from a similarly-named Perl script in the BoringSSL
|
||||
# source tree. Do not edit by hand.
|
||||
// This file is generated from a similarly-named Perl script in the BoringSSL
|
||||
// source tree. Do not edit by hand.
|
||||
|
||||
#if defined(__has_feature)
|
||||
#if __has_feature(memory_sanitizer) && !defined(OPENSSL_NO_ASM)
|
||||
|
|
|
@ -1,7 +1,7 @@
|
|||
#define BORINGSSL_PREFIX CBigNumBoringSSL
|
||||
#if defined(__x86_64__) && defined(__APPLE__)
|
||||
# This file is generated from a similarly-named Perl script in the BoringSSL
|
||||
# source tree. Do not edit by hand.
|
||||
// This file is generated from a similarly-named Perl script in the BoringSSL
|
||||
// source tree. Do not edit by hand.
|
||||
|
||||
#if defined(__has_feature)
|
||||
#if __has_feature(memory_sanitizer) && !defined(OPENSSL_NO_ASM)
|
||||
|
|
|
@ -1,7 +1,7 @@
|
|||
#define BORINGSSL_PREFIX CBigNumBoringSSL
|
||||
#if defined(__i386__) && defined(__linux__)
|
||||
# This file is generated from a similarly-named Perl script in the BoringSSL
|
||||
# source tree. Do not edit by hand.
|
||||
// This file is generated from a similarly-named Perl script in the BoringSSL
|
||||
// source tree. Do not edit by hand.
|
||||
|
||||
#if defined(__i386__)
|
||||
#if defined(BORINGSSL_PREFIX)
|
||||
|
|
|
@ -1,7 +1,7 @@
|
|||
#define BORINGSSL_PREFIX CBigNumBoringSSL
|
||||
#if defined(__x86_64__) && defined(__linux__)
|
||||
# This file is generated from a similarly-named Perl script in the BoringSSL
|
||||
# source tree. Do not edit by hand.
|
||||
// This file is generated from a similarly-named Perl script in the BoringSSL
|
||||
// source tree. Do not edit by hand.
|
||||
|
||||
#if defined(__has_feature)
|
||||
#if __has_feature(memory_sanitizer) && !defined(OPENSSL_NO_ASM)
|
||||
|
|
|
@ -1,7 +1,7 @@
|
|||
#define BORINGSSL_PREFIX CBigNumBoringSSL
|
||||
#if defined(__x86_64__) && defined(__APPLE__)
|
||||
# This file is generated from a similarly-named Perl script in the BoringSSL
|
||||
# source tree. Do not edit by hand.
|
||||
// This file is generated from a similarly-named Perl script in the BoringSSL
|
||||
// source tree. Do not edit by hand.
|
||||
|
||||
#if defined(__has_feature)
|
||||
#if __has_feature(memory_sanitizer) && !defined(OPENSSL_NO_ASM)
|
||||
|
|
|
@ -1,7 +1,7 @@
|
|||
#define BORINGSSL_PREFIX CBigNumBoringSSL
|
||||
#if defined(__x86_64__) && defined(__linux__)
|
||||
# This file is generated from a similarly-named Perl script in the BoringSSL
|
||||
# source tree. Do not edit by hand.
|
||||
// This file is generated from a similarly-named Perl script in the BoringSSL
|
||||
// source tree. Do not edit by hand.
|
||||
|
||||
#if defined(__has_feature)
|
||||
#if __has_feature(memory_sanitizer) && !defined(OPENSSL_NO_ASM)
|
||||
|
|
|
@ -1,7 +1,7 @@
|
|||
#define BORINGSSL_PREFIX CBigNumBoringSSL
|
||||
#if defined(__x86_64__) && defined(__APPLE__)
|
||||
# This file is generated from a similarly-named Perl script in the BoringSSL
|
||||
# source tree. Do not edit by hand.
|
||||
// This file is generated from a similarly-named Perl script in the BoringSSL
|
||||
// source tree. Do not edit by hand.
|
||||
|
||||
#if defined(__has_feature)
|
||||
#if __has_feature(memory_sanitizer) && !defined(OPENSSL_NO_ASM)
|
||||
|
|
|
@ -109,6 +109,7 @@
|
|||
#ifndef OPENSSL_HEADER_CRYPTO_INTERNAL_H
|
||||
#define OPENSSL_HEADER_CRYPTO_INTERNAL_H
|
||||
|
||||
#include <CBigNumBoringSSL_crypto.h>
|
||||
#include <CBigNumBoringSSL_ex_data.h>
|
||||
#include <CBigNumBoringSSL_stack.h>
|
||||
#include <CBigNumBoringSSL_thread.h>
|
||||
|
@ -208,6 +209,9 @@ typedef __uint128_t uint128_t;
|
|||
#define OPENSSL_SSE2
|
||||
#endif
|
||||
|
||||
|
||||
// Pointer utility functions.
|
||||
|
||||
// buffers_alias returns one if |a| and |b| alias and zero otherwise.
|
||||
static inline int buffers_alias(const uint8_t *a, size_t a_len,
|
||||
const uint8_t *b, size_t b_len) {
|
||||
|
@ -220,6 +224,23 @@ static inline int buffers_alias(const uint8_t *a, size_t a_len,
|
|||
return a_u + a_len > b_u && b_u + b_len > a_u;
|
||||
}
|
||||
|
||||
// align_pointer returns |ptr|, advanced to |alignment|. |alignment| must be a
|
||||
// power of two, and |ptr| must have at least |alignment - 1| bytes of scratch
|
||||
// space.
|
||||
static inline void *align_pointer(void *ptr, size_t alignment) {
|
||||
// |alignment| must be a power of two.
|
||||
assert(alignment != 0 && (alignment & (alignment - 1)) == 0);
|
||||
// Instead of aligning |ptr| as a |uintptr_t| and casting back, compute the
|
||||
// offset and advance in pointer space. C guarantees that casting from pointer
|
||||
// to |uintptr_t| and back gives the same pointer, but general
|
||||
// integer-to-pointer conversions are implementation-defined. GCC does define
|
||||
// it in the useful way, but this makes fewer assumptions.
|
||||
uintptr_t offset = (0u - (uintptr_t)ptr) & (alignment - 1);
|
||||
ptr = (char *)ptr + offset;
|
||||
assert(((uintptr_t)ptr & (alignment - 1)) == 0);
|
||||
return ptr;
|
||||
}
|
||||
|
||||
|
||||
// Constant-time utility functions.
|
||||
//
|
||||
|
@ -470,6 +491,13 @@ OPENSSL_EXPORT void CRYPTO_once(CRYPTO_once_t *once, void (*init)(void));
|
|||
|
||||
// Reference counting.
|
||||
|
||||
// Automatically enable C11 atomics if implemented.
|
||||
#if !defined(OPENSSL_C11_ATOMIC) && defined(OPENSSL_THREADS) && \
|
||||
!defined(__STDC_NO_ATOMICS__) && defined(__STDC_VERSION__) && \
|
||||
__STDC_VERSION__ >= 201112L
|
||||
#define OPENSSL_C11_ATOMIC
|
||||
#endif
|
||||
|
||||
// CRYPTO_REFCOUNT_MAX is the value at which the reference count saturates.
|
||||
#define CRYPTO_REFCOUNT_MAX 0xffffffff
|
||||
|
||||
|
@ -607,6 +635,7 @@ BSSL_NAMESPACE_END
|
|||
typedef enum {
|
||||
OPENSSL_THREAD_LOCAL_ERR = 0,
|
||||
OPENSSL_THREAD_LOCAL_RAND,
|
||||
OPENSSL_THREAD_LOCAL_FIPS_COUNTERS,
|
||||
OPENSSL_THREAD_LOCAL_TEST,
|
||||
NUM_OPENSSL_THREAD_LOCALS,
|
||||
} thread_local_data_t;
|
||||
|
@ -811,6 +840,97 @@ static inline void *OPENSSL_memset(void *dst, int c, size_t n) {
|
|||
return memset(dst, c, n);
|
||||
}
|
||||
|
||||
|
||||
// Loads and stores.
|
||||
//
|
||||
// The following functions load and store sized integers with the specified
|
||||
// endianness. They use |memcpy|, and so avoid alignment or strict aliasing
|
||||
// requirements on the input and output pointers.
|
||||
|
||||
static inline uint32_t CRYPTO_load_u32_le(const void *in) {
|
||||
uint32_t v;
|
||||
OPENSSL_memcpy(&v, in, sizeof(v));
|
||||
return v;
|
||||
}
|
||||
|
||||
static inline void CRYPTO_store_u32_le(void *out, uint32_t v) {
|
||||
OPENSSL_memcpy(out, &v, sizeof(v));
|
||||
}
|
||||
|
||||
static inline uint32_t CRYPTO_load_u32_be(const void *in) {
|
||||
uint32_t v;
|
||||
OPENSSL_memcpy(&v, in, sizeof(v));
|
||||
return CRYPTO_bswap4(v);
|
||||
}
|
||||
|
||||
static inline void CRYPTO_store_u32_be(void *out, uint32_t v) {
|
||||
v = CRYPTO_bswap4(v);
|
||||
OPENSSL_memcpy(out, &v, sizeof(v));
|
||||
}
|
||||
|
||||
static inline uint64_t CRYPTO_load_u64_be(const void *ptr) {
|
||||
uint64_t ret;
|
||||
OPENSSL_memcpy(&ret, ptr, sizeof(ret));
|
||||
return CRYPTO_bswap8(ret);
|
||||
}
|
||||
|
||||
static inline void CRYPTO_store_u64_be(void *out, uint64_t v) {
|
||||
v = CRYPTO_bswap8(v);
|
||||
OPENSSL_memcpy(out, &v, sizeof(v));
|
||||
}
|
||||
|
||||
static inline crypto_word_t CRYPTO_load_word_le(const void *in) {
|
||||
crypto_word_t v;
|
||||
OPENSSL_memcpy(&v, in, sizeof(v));
|
||||
return v;
|
||||
}
|
||||
|
||||
static inline void CRYPTO_store_word_le(void *out, crypto_word_t v) {
|
||||
OPENSSL_memcpy(out, &v, sizeof(v));
|
||||
}
|
||||
|
||||
|
||||
// Bit rotation functions.
|
||||
//
|
||||
// Note these functions use |(-shift) & 31|, etc., because shifting by the bit
|
||||
// width is undefined. Both Clang and GCC recognize this pattern as a rotation,
|
||||
// but MSVC does not. Instead, we call MSVC's built-in functions.
|
||||
|
||||
static inline uint32_t CRYPTO_rotl_u32(uint32_t value, int shift) {
|
||||
#if defined(_MSC_VER)
|
||||
return _rotl(value, shift);
|
||||
#else
|
||||
return (value << shift) | (value >> ((-shift) & 31));
|
||||
#endif
|
||||
}
|
||||
|
||||
static inline uint32_t CRYPTO_rotr_u32(uint32_t value, int shift) {
|
||||
#if defined(_MSC_VER)
|
||||
return _rotr(value, shift);
|
||||
#else
|
||||
return (value >> shift) | (value << ((-shift) & 31));
|
||||
#endif
|
||||
}
|
||||
|
||||
static inline uint64_t CRYPTO_rotl_u64(uint64_t value, int shift) {
|
||||
#if defined(_MSC_VER)
|
||||
return _rotl64(value, shift);
|
||||
#else
|
||||
return (value << shift) | (value >> ((-shift) & 63));
|
||||
#endif
|
||||
}
|
||||
|
||||
static inline uint64_t CRYPTO_rotr_u64(uint64_t value, int shift) {
|
||||
#if defined(_MSC_VER)
|
||||
return _rotr64(value, shift);
|
||||
#else
|
||||
return (value >> shift) | (value << ((-shift) & 63));
|
||||
#endif
|
||||
}
|
||||
|
||||
|
||||
// FIPS functions.
|
||||
|
||||
#if defined(BORINGSSL_FIPS)
|
||||
// BORINGSSL_FIPS_abort is called when a FIPS power-on or continuous test
|
||||
// fails. It prevents any further cryptographic operations by the current
|
||||
|
@ -826,6 +946,11 @@ void BORINGSSL_FIPS_abort(void) __attribute__((noreturn));
|
|||
int boringssl_fips_self_test(const uint8_t *module_hash,
|
||||
size_t module_hash_len);
|
||||
|
||||
#if defined(BORINGSSL_FIPS_COUNTERS)
|
||||
void boringssl_fips_inc_counter(enum fips_counter_t counter);
|
||||
#else
|
||||
OPENSSL_INLINE void boringssl_fips_inc_counter(enum fips_counter_t counter) {}
|
||||
#endif
|
||||
|
||||
#if defined(__cplusplus)
|
||||
} // extern C
|
||||
|
|
|
@ -72,6 +72,8 @@ OPENSSL_MSVC_PRAGMA(warning(pop))
|
|||
|
||||
|
||||
#define OPENSSL_MALLOC_PREFIX 8
|
||||
OPENSSL_STATIC_ASSERT(OPENSSL_MALLOC_PREFIX >= sizeof(size_t),
|
||||
"size_t too large");
|
||||
|
||||
#if defined(OPENSSL_ASAN)
|
||||
void __asan_poison_memory_region(const volatile void *addr, size_t size);
|
||||
|
@ -101,14 +103,54 @@ static void __asan_unpoison_memory_region(const void *addr, size_t size) {}
|
|||
// linked. This isn't an ideal result, but its helps in some cases.
|
||||
WEAK_SYMBOL_FUNC(void, sdallocx, (void *ptr, size_t size, int flags));
|
||||
|
||||
// The following two functions are for memory tracking. They are no-ops by
|
||||
// default but can be overridden at link time if the application needs to
|
||||
// observe heap operations.
|
||||
WEAK_SYMBOL_FUNC(void, OPENSSL_track_memory_alloc, (void *ptr, size_t size));
|
||||
WEAK_SYMBOL_FUNC(void, OPENSSL_track_memory_free, (void *ptr, size_t size));
|
||||
// The following three functions can be defined to override default heap
|
||||
// allocation and freeing. If defined, it is the responsibility of
|
||||
// |OPENSSL_memory_free| to zero out the memory before returning it to the
|
||||
// system. |OPENSSL_memory_free| will not be passed NULL pointers.
|
||||
//
|
||||
// WARNING: These functions are called on every allocation and free in
|
||||
// BoringSSL across the entire process. They may be called by any code in the
|
||||
// process which calls BoringSSL, including in process initializers and thread
|
||||
// destructors. When called, BoringSSL may hold pthreads locks. Any other code
|
||||
// in the process which, directly or indirectly, calls BoringSSL may be on the
|
||||
// call stack and may itself be using arbitrary synchronization primitives.
|
||||
//
|
||||
// As a result, these functions may not have the usual programming environment
|
||||
// available to most C or C++ code. In particular, they may not call into
|
||||
// BoringSSL, or any library which depends on BoringSSL. Any synchronization
|
||||
// primitives used must tolerate every other synchronization primitive linked
|
||||
// into the process, including pthreads locks. Failing to meet these constraints
|
||||
// may result in deadlocks, crashes, or memory corruption.
|
||||
WEAK_SYMBOL_FUNC(void*, OPENSSL_memory_alloc, (size_t size));
|
||||
WEAK_SYMBOL_FUNC(void, OPENSSL_memory_free, (void *ptr));
|
||||
WEAK_SYMBOL_FUNC(size_t, OPENSSL_memory_get_size, (void *ptr));
|
||||
|
||||
// kBoringSSLBinaryTag is a distinctive byte sequence to identify binaries that
|
||||
// are linking in BoringSSL and, roughly, what version they are using.
|
||||
static const uint8_t kBoringSSLBinaryTag[18] = {
|
||||
// 16 bytes of magic tag.
|
||||
0x8c, 0x62, 0x20, 0x0b, 0xd2, 0xa0, 0x72, 0x58,
|
||||
0x44, 0xa8, 0x96, 0x69, 0xad, 0x55, 0x7e, 0xec,
|
||||
// Current source iteration. Incremented ~monthly.
|
||||
2, 0,
|
||||
};
|
||||
|
||||
void *OPENSSL_malloc(size_t size) {
|
||||
if (OPENSSL_memory_alloc != NULL) {
|
||||
assert(OPENSSL_memory_free != NULL);
|
||||
assert(OPENSSL_memory_get_size != NULL);
|
||||
return OPENSSL_memory_alloc(size);
|
||||
}
|
||||
|
||||
if (size + OPENSSL_MALLOC_PREFIX < size) {
|
||||
// |OPENSSL_malloc| is a central function in BoringSSL thus a reference to
|
||||
// |kBoringSSLBinaryTag| is created here so that the tag isn't discarded by
|
||||
// the linker. The following is sufficient to stop GCC, Clang, and MSVC
|
||||
// optimising away the reference at the time of writing. Since this
|
||||
// probably results in an actual memory reference, it is put in this very
|
||||
// rare code path.
|
||||
uint8_t unused = *(volatile uint8_t *)kBoringSSLBinaryTag;
|
||||
(void) unused;
|
||||
return NULL;
|
||||
}
|
||||
|
||||
|
@ -120,9 +162,6 @@ void *OPENSSL_malloc(size_t size) {
|
|||
*(size_t *)ptr = size;
|
||||
|
||||
__asan_poison_memory_region(ptr, OPENSSL_MALLOC_PREFIX);
|
||||
if (OPENSSL_track_memory_alloc) {
|
||||
OPENSSL_track_memory_alloc(ptr, size + OPENSSL_MALLOC_PREFIX);
|
||||
}
|
||||
return ((uint8_t *)ptr) + OPENSSL_MALLOC_PREFIX;
|
||||
}
|
||||
|
||||
|
@ -131,13 +170,15 @@ void OPENSSL_free(void *orig_ptr) {
|
|||
return;
|
||||
}
|
||||
|
||||
if (OPENSSL_memory_free != NULL) {
|
||||
OPENSSL_memory_free(orig_ptr);
|
||||
return;
|
||||
}
|
||||
|
||||
void *ptr = ((uint8_t *)orig_ptr) - OPENSSL_MALLOC_PREFIX;
|
||||
__asan_unpoison_memory_region(ptr, OPENSSL_MALLOC_PREFIX);
|
||||
|
||||
size_t size = *(size_t *)ptr;
|
||||
if (OPENSSL_track_memory_free) {
|
||||
OPENSSL_track_memory_free(ptr, size + OPENSSL_MALLOC_PREFIX);
|
||||
}
|
||||
OPENSSL_cleanse(ptr, size + OPENSSL_MALLOC_PREFIX);
|
||||
if (sdallocx) {
|
||||
sdallocx(ptr, size + OPENSSL_MALLOC_PREFIX, 0 /* flags */);
|
||||
|
@ -151,10 +192,15 @@ void *OPENSSL_realloc(void *orig_ptr, size_t new_size) {
|
|||
return OPENSSL_malloc(new_size);
|
||||
}
|
||||
|
||||
void *ptr = ((uint8_t *)orig_ptr) - OPENSSL_MALLOC_PREFIX;
|
||||
__asan_unpoison_memory_region(ptr, OPENSSL_MALLOC_PREFIX);
|
||||
size_t old_size = *(size_t *)ptr;
|
||||
__asan_poison_memory_region(ptr, OPENSSL_MALLOC_PREFIX);
|
||||
size_t old_size;
|
||||
if (OPENSSL_memory_get_size != NULL) {
|
||||
old_size = OPENSSL_memory_get_size(orig_ptr);
|
||||
} else {
|
||||
void *ptr = ((uint8_t *)orig_ptr) - OPENSSL_MALLOC_PREFIX;
|
||||
__asan_unpoison_memory_region(ptr, OPENSSL_MALLOC_PREFIX);
|
||||
old_size = *(size_t *)ptr;
|
||||
__asan_poison_memory_region(ptr, OPENSSL_MALLOC_PREFIX);
|
||||
}
|
||||
|
||||
void *ret = OPENSSL_malloc(new_size);
|
||||
if (ret == NULL) {
|
||||
|
@ -219,6 +265,8 @@ uint32_t OPENSSL_hash32(const void *ptr, size_t len) {
|
|||
return h;
|
||||
}
|
||||
|
||||
uint32_t OPENSSL_strhash(const char *s) { return OPENSSL_hash32(s, strlen(s)); }
|
||||
|
||||
size_t OPENSSL_strnlen(const char *s, size_t len) {
|
||||
for (size_t i = 0; i < len; i++) {
|
||||
if (s[i] == 0) {
|
||||
|
@ -294,22 +342,15 @@ int BIO_vsnprintf(char *buf, size_t n, const char *format, va_list args) {
|
|||
}
|
||||
|
||||
char *OPENSSL_strndup(const char *str, size_t size) {
|
||||
char *ret;
|
||||
size_t alloc_size;
|
||||
|
||||
if (str == NULL) {
|
||||
return NULL;
|
||||
}
|
||||
|
||||
size = OPENSSL_strnlen(str, size);
|
||||
|
||||
alloc_size = size + 1;
|
||||
size_t alloc_size = size + 1;
|
||||
if (alloc_size < size) {
|
||||
// overflow
|
||||
OPENSSL_PUT_ERROR(CRYPTO, ERR_R_MALLOC_FAILURE);
|
||||
return NULL;
|
||||
}
|
||||
ret = OPENSSL_malloc(alloc_size);
|
||||
char *ret = OPENSSL_malloc(alloc_size);
|
||||
if (ret == NULL) {
|
||||
OPENSSL_PUT_ERROR(CRYPTO, ERR_R_MALLOC_FAILURE);
|
||||
return NULL;
|
||||
|
@ -357,3 +398,13 @@ void *OPENSSL_memdup(const void *data, size_t size) {
|
|||
OPENSSL_memcpy(ret, data, size);
|
||||
return ret;
|
||||
}
|
||||
|
||||
void *CRYPTO_malloc(size_t size, const char *file, int line) {
|
||||
return OPENSSL_malloc(size);
|
||||
}
|
||||
|
||||
void *CRYPTO_realloc(void *ptr, size_t new_size, const char *file, int line) {
|
||||
return OPENSSL_realloc(ptr, new_size);
|
||||
}
|
||||
|
||||
void CRYPTO_free(void *ptr, const char *file, int line) { OPENSSL_free(ptr); }
|
||||
|
|
|
@ -49,4 +49,8 @@ void CRYPTO_sysrand(uint8_t *out, size_t requested) {
|
|||
CRYPTO_chacha_20(out, out, requested, kZeroKey, nonce, 0);
|
||||
}
|
||||
|
||||
void CRYPTO_sysrand_for_seed(uint8_t *out, size_t requested) {
|
||||
CRYPTO_sysrand(out, requested);
|
||||
}
|
||||
|
||||
#endif // BORINGSSL_UNSAFE_DETERMINISTIC_MODE
|
||||
|
|
Some files were not shown because too many files have changed in this diff Show More
Loading…
Reference in New Issue